Add shell-equivalent as an executable script
authorSiraaj Khandkar <siraaj@khandkar.net>
Wed, 28 Nov 2018 22:15:50 +0000 (17:15 -0500)
committerSiraaj Khandkar <siraaj@khandkar.net>
Wed, 28 Nov 2018 22:18:05 +0000 (17:18 -0500)
README.md
dups.sh [new file with mode: 0755]

index 04b84ec..28ed81a 100644 (file)
--- a/README.md
+++ b/README.md
@@ -1,10 +1,10 @@
 dups
 ====
 
-Find duplicate files in given directory trees. Where "duplicate" is defined as
-having the same (and non-0) file size and MD5 hash digest.
+Find duplicate files in N given directory trees. Where "duplicate" is defined
+as having the same (and non-0) file size and MD5 hash digest.
 
-It is roughly equivalent to the following one-liner:
+It is roughly equivalent to the following one-liner (included as `dups.sh`):
 ```sh
 find . -type f -print0 | xargs -0 -P 6 -I % md5sum % | awk '{digest = $1;  sub("^" $1 " +", ""); path = $0; paths[digest, ++cnt[digest]] = path} END {for (digest in cnt) {n = cnt[digest]; if (n > 1) {print(digest, n); for (i=1; i<=n; i++) {printf "    %s\n", paths[digest, i]} } } }'
 ```
diff --git a/dups.sh b/dups.sh
new file mode 100755 (executable)
index 0000000..bd282e5
--- /dev/null
+++ b/dups.sh
@@ -0,0 +1,23 @@
+#! /bin/sh
+
+find $@ -type f -print0 \
+| xargs -0 -P $(nproc) md5sum \
+| awk '
+    {
+        digest = $1
+        sub("^" $1 " +", "")
+        path = $0
+        paths[digest, ++count[digest]] = path
+    }
+
+    END {
+        for (digest in count) {
+            n = count[digest]
+            if (n > 1) {
+                print(digest, n)
+                for (i=1; i<=n; i++) {
+                    printf "    %s\n", paths[digest, i]
+                }
+            }
+        }
+    }'
This page took 0.018438 seconds and 4 git commands to generate.