From f41b9cdf0268213b9d1c911aa7836d9dc9948194 Mon Sep 17 00:00:00 2001 From: Siraaj Khandkar Date: Wed, 28 Nov 2018 17:15:50 -0500 Subject: [PATCH] Add shell-equivalent as an executable script --- README.md | 6 +++--- dups.sh | 23 +++++++++++++++++++++++ 2 files changed, 26 insertions(+), 3 deletions(-) create mode 100755 dups.sh diff --git a/README.md b/README.md index 04b84ec..28ed81a 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,10 @@ dups ==== -Find duplicate files in given directory trees. Where "duplicate" is defined as -having the same (and non-0) file size and MD5 hash digest. +Find duplicate files in N given directory trees. Where "duplicate" is defined +as having the same (and non-0) file size and MD5 hash digest. -It is roughly equivalent to the following one-liner: +It is roughly equivalent to the following one-liner (included as `dups.sh`): ```sh find . -type f -print0 | xargs -0 -P 6 -I % md5sum % | awk '{digest = $1; sub("^" $1 " +", ""); path = $0; paths[digest, ++cnt[digest]] = path} END {for (digest in cnt) {n = cnt[digest]; if (n > 1) {print(digest, n); for (i=1; i<=n; i++) {printf " %s\n", paths[digest, i]} } } }' ``` diff --git a/dups.sh b/dups.sh new file mode 100755 index 0000000..bd282e5 --- /dev/null +++ b/dups.sh @@ -0,0 +1,23 @@ +#! /bin/sh + +find $@ -type f -print0 \ +| xargs -0 -P $(nproc) md5sum \ +| awk ' + { + digest = $1 + sub("^" $1 " +", "") + path = $0 + paths[digest, ++count[digest]] = path + } + + END { + for (digest in count) { + n = count[digest] + if (n > 1) { + print(digest, n) + for (i=1; i<=n; i++) { + printf " %s\n", paths[digest, i] + } + } + } + }' -- 2.20.1