X-Git-Url: https://git.xandkar.net/?p=dups.git;a=blobdiff_plain;f=README.md;h=f273b75a882a5692178ef60d2efd7e28a87e7e92;hp=a6e6c54804ace51f30c790c11ec56ad992fbbdb5;hb=4d53b6c0d5f9c3a8d42d6a531e927560a0bf64ac;hpb=b839d582481df4861b7bdf123f404dcf13ee5bbd diff --git a/README.md b/README.md index a6e6c54..f273b75 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ having the same MD5 hash digest. It is roughly equivalent to the following one-liner: ```sh -find . -type f -exec md5sum '{}' \; | awk '{paths[$1, ++cnt[$1]] = $2} END {for (path in cnt) {n = cnt[path]; if (n > 1) {print(path, n); for (i=1; i<=n; i++) {print(" ", paths[path, i])} } } }' +find . -type f -exec md5sum '{}' \; | awk '{digest = $1; path = $2; paths[digest, ++count[digest]] = path} END {for (digest in count) {n = count[digest]; if (n > 1) {print(digest, n); for (i=1; i<=n; i++) {print " ", paths[digest, i]} } } }' ``` which, when indented, looks like: @@ -14,15 +14,18 @@ which, when indented, looks like: find . -type f -exec md5sum '{}' \; \ | awk ' { - paths[$1, ++cnt[$1]] = $2 + digest = $1 + path = $2 + paths[digest, ++count[digest]] = path } + END { - for (path in cnt) { - n = cnt[path] + for (digest in count) { + n = count[digest] if (n > 1) { - print(path, n) + print(digest, n) for (i=1; i<=n; i++) { - print(" ", paths[path, i]) + print " ", paths[digest, i] } } }