From e1d6490a6bfaaa959b975ea8f0532c131d8951bc Mon Sep 17 00:00:00 2001
From: sesas <gabreyla@gmail.com>
Date: Mon, 26 Mar 2012 17:04:35 -0700
Subject: [PATCH 1/9] modified UFDFF.py to work with Python3000.

Signed-off-by: sesas <gabreyla@gmail.com>
---
 UltraFastDuplicateFilesFinder.py | 89 +++++++++++++++++---------------
 fileWalker.py                    |  3 ++
 2 files changed, 49 insertions(+), 43 deletions(-)
 create mode 100644 fileWalker.py

diff --git a/UltraFastDuplicateFilesFinder.py b/UltraFastDuplicateFilesFinder.py
index 8e7fe78..a7b479f 100644
--- a/UltraFastDuplicateFilesFinder.py
+++ b/UltraFastDuplicateFilesFinder.py
@@ -105,46 +105,49 @@ def humanize_size(size):
         if hsize > 0.5:
             return '%.2f %s' % (hsize, suffix)
 
-
-# we start here by checking all files
-for filename in sys.stdin:
-    filename = filename.strip()
-
-    check_file(filename)
-    totalfiles += 1
-    totalsize += os.path.getsize(filename)
-
-# print the report
-print '%10s   %s' % ('size', 'filename')
-
-for h, f in hashlist.iteritems():
-    if hashcount[h] < 2:
-        # present one time, skip
-        continue
-    
-    # reference file    
-    refsize = os.path.getsize(f[0])
-    refmd5 = get_file_hash(f[0])
-    print '%10d   %s' % (refsize, f[0])
-    
-    
-    for filename in f[1:]:
-        # and its copies
-        size = os.path.getsize(filename)
-        md5 = get_file_hash(filename)
-
-        status = ' '
-        msg = ''
-        if md5 != refmd5:
-            status = '!'
-            msg = ' partial match only!'
-
-        print '%10d %s %s%s' % (size, status, filename, msg)
-        dupsize += size
-    dupfiles += 1
-    print
-
-# final summary
-print '%d files checked (%s), %d duplicates (%s).' % (
-    totalfiles, humanize_size(totalsize), dupfiles, humanize_size(dupsize))
-
+def main():
+    # we start here by checking all files
+    for filename in sys.stdin:
+        filename = filename.strip()
+
+        check_file(filename)
+        totalfiles += 1
+        totalsize += os.path.getsize(filename)
+
+    # print the report
+    print( '%10s   %s' % ('size', 'filename') )
+
+    for h, f in hashlist.iteritems():
+        if hashcount[h] < 2:
+            # present one time, skip
+            continue
+        
+        # reference file    
+        refsize = os.path.getsize(f[0])
+        refmd5 = get_file_hash(f[0])
+        print( '%10d   %s' % (refsize, f[0]))
+        
+        
+        for filename in f[1:]:
+            # and its copies
+            size = os.path.getsize(filename)
+            md5 = get_file_hash(filename)
+
+            status = ' '
+            msg = ''
+            if md5 != refmd5:
+                status = '!'
+                msg = ' partial match only!'
+
+            print( '%10d %s %s%s' % (size, status, filename, msg))
+            dupsize += size
+        dupfiles += 1
+        print()
+
+    # final summary
+    print( '%d files checked (%s), %d duplicates (%s).' % (
+        totalfiles, humanize_size(totalsize), dupfiles, humanize_size(dupsize)))
+
+
+if __name__ == '__main__':
+    main()
diff --git a/fileWalker.py b/fileWalker.py
new file mode 100644
index 0000000..8a43d6c
--- /dev/null
+++ b/fileWalker.py
@@ -0,0 +1,3 @@
+import os, sys
+import UltraFastDuplicateFilesFinder as ff
+

From 5ec3b17390d938014e0baeb2e6b90afbad510967 Mon Sep 17 00:00:00 2001
From: sesas <gabreyla@gmail.com>
Date: Mon, 26 Mar 2012 17:24:18 -0700
Subject: [PATCH 2/9] added a test folder, dirName input and file walker
 creation.

Signed-off-by: sesas <gabreyla@gmail.com>
---
 fileWalker.py | 37 +++++++++++++++++++++++++++++++++++
 test/README   | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++
 test/README_2 | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++
 test/README_3 | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 196 insertions(+)
 create mode 100644 test/README
 create mode 100644 test/README_2
 create mode 100644 test/README_3

diff --git a/fileWalker.py b/fileWalker.py
index 8a43d6c..28de5ca 100644
--- a/fileWalker.py
+++ b/fileWalker.py
@@ -1,3 +1,40 @@
 import os, sys
 import UltraFastDuplicateFilesFinder as ff
 
+testPath = os.path.join( os.path.curdir )
+
+
+
+
+
+
+
+def getDirName(dirName=None):
+    if dirName:
+        if os.path.isdir( os.path.normpath( dirName )):
+            return dirName
+    if sys.argv[1:]:
+        out = sys.argv[1]
+        if os.path.isdir( os.path.normpath( out )):
+            return out
+    
+    while 1:
+        inp = input("which folder would you like to find the duplicates in?\n")
+        if not inp: # mainly for debugging
+            global testPath
+            inp = testPath
+            break
+        if os.path.isdir( os.path.normpath( inp )):
+            break
+    return inp
+    
+
+def main(dirName=None):
+    root = getDirName(dirName)
+    walker = os.walk(root)
+
+    for j in walker:
+        print(j)
+
+if __name__ == '__main__':
+    main()
diff --git a/test/README b/test/README
new file mode 100644
index 0000000..07a6777
--- /dev/null
+++ b/test/README
@@ -0,0 +1,53 @@
+Ultra Fast Duplicate Files Finder
+=================================
+  by Gautier Portet <kassoulet gmail com>
+
+
+Takes a list of file from stdin.
+And print the duplicate ones.
+
+
+example use:
+
+  find ~/ -size +10M | ./UltraFastDuplicateFilesFinder.py
+
+to find duplicates in your home folder, all files more than 10MB.
+
+UltraFastDuplicateFilesFinder compares only the very beginning of the files.
+Its sufficient for most uses, but use with caution.
+
+But this way is quite useful to detect duplicates within corrupted media files...
+
+
+this is public domain.
+
+
+
+------------------------------------------------------------------------------
+example run, took less than a second to answer...
+
+
+gautier@quad:~/code/tmp$ find /home -size +10M | ./duplicate.py
+      size   filename
+  12467906   /home/gautier/Photos/pict4614.mov
+  12467906   /home/gautier/Photos/Videos/PICT4614.MOV
+
+  13068570   /home/gautier/Photos/pict4588.mov
+  13068570   /home/gautier/Photos/Videos/PICT4588.MOV
+
+[...]
+
+  20865498   /home/gautier/Photos/pict4695.mov
+  20865498   /home/gautier/Photos/Videos/PICT4695.MOV
+
+  28270824   /home/gautier/tmp/tsunami 1 œ ǒǑ.flac
+  28270824   /home/gautier/tmp/tsunami-1.flac
+
+136 files checked (22.75 GiB), 8 duplicates (153.45 MiB).
+
+
+
+
+
+
+
diff --git a/test/README_2 b/test/README_2
new file mode 100644
index 0000000..07a6777
--- /dev/null
+++ b/test/README_2
@@ -0,0 +1,53 @@
+Ultra Fast Duplicate Files Finder
+=================================
+  by Gautier Portet <kassoulet gmail com>
+
+
+Takes a list of file from stdin.
+And print the duplicate ones.
+
+
+example use:
+
+  find ~/ -size +10M | ./UltraFastDuplicateFilesFinder.py
+
+to find duplicates in your home folder, all files more than 10MB.
+
+UltraFastDuplicateFilesFinder compares only the very beginning of the files.
+Its sufficient for most uses, but use with caution.
+
+But this way is quite useful to detect duplicates within corrupted media files...
+
+
+this is public domain.
+
+
+
+------------------------------------------------------------------------------
+example run, took less than a second to answer...
+
+
+gautier@quad:~/code/tmp$ find /home -size +10M | ./duplicate.py
+      size   filename
+  12467906   /home/gautier/Photos/pict4614.mov
+  12467906   /home/gautier/Photos/Videos/PICT4614.MOV
+
+  13068570   /home/gautier/Photos/pict4588.mov
+  13068570   /home/gautier/Photos/Videos/PICT4588.MOV
+
+[...]
+
+  20865498   /home/gautier/Photos/pict4695.mov
+  20865498   /home/gautier/Photos/Videos/PICT4695.MOV
+
+  28270824   /home/gautier/tmp/tsunami 1 œ ǒǑ.flac
+  28270824   /home/gautier/tmp/tsunami-1.flac
+
+136 files checked (22.75 GiB), 8 duplicates (153.45 MiB).
+
+
+
+
+
+
+
diff --git a/test/README_3 b/test/README_3
new file mode 100644
index 0000000..07a6777
--- /dev/null
+++ b/test/README_3
@@ -0,0 +1,53 @@
+Ultra Fast Duplicate Files Finder
+=================================
+  by Gautier Portet <kassoulet gmail com>
+
+
+Takes a list of file from stdin.
+And print the duplicate ones.
+
+
+example use:
+
+  find ~/ -size +10M | ./UltraFastDuplicateFilesFinder.py
+
+to find duplicates in your home folder, all files more than 10MB.
+
+UltraFastDuplicateFilesFinder compares only the very beginning of the files.
+Its sufficient for most uses, but use with caution.
+
+But this way is quite useful to detect duplicates within corrupted media files...
+
+
+this is public domain.
+
+
+
+------------------------------------------------------------------------------
+example run, took less than a second to answer...
+
+
+gautier@quad:~/code/tmp$ find /home -size +10M | ./duplicate.py
+      size   filename
+  12467906   /home/gautier/Photos/pict4614.mov
+  12467906   /home/gautier/Photos/Videos/PICT4614.MOV
+
+  13068570   /home/gautier/Photos/pict4588.mov
+  13068570   /home/gautier/Photos/Videos/PICT4588.MOV
+
+[...]
+
+  20865498   /home/gautier/Photos/pict4695.mov
+  20865498   /home/gautier/Photos/Videos/PICT4695.MOV
+
+  28270824   /home/gautier/tmp/tsunami 1 œ ǒǑ.flac
+  28270824   /home/gautier/tmp/tsunami-1.flac
+
+136 files checked (22.75 GiB), 8 duplicates (153.45 MiB).
+
+
+
+
+
+
+

From 10e5cd893d181a4c8f34d4f822c28a9deaeff0df Mon Sep 17 00:00:00 2001
From: sesas <gabreyla@gmail.com>
Date: Tue, 27 Mar 2012 18:13:10 -0700
Subject: [PATCH 3/9] made the new module interface nicely with the original
 module to get the hashlist back and made optional filter for hidden folders.

Signed-off-by: sesas <gabreyla@gmail.com>
---
 UltraFastDuplicateFilesFinder.py | 11 ++++++-----
 fileWalker.py                    | 34 ++++++++++++++++++++++++++------
 2 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/UltraFastDuplicateFilesFinder.py b/UltraFastDuplicateFilesFinder.py
index a7b479f..1850d16 100644
--- a/UltraFastDuplicateFilesFinder.py
+++ b/UltraFastDuplicateFilesFinder.py
@@ -46,7 +46,7 @@ def get_file_hash(filename, limit_size=None, buffer_size=BUFFER_SIZE):
     """
     # open file
     try:
-        f = file(filename, "rb")
+        f = open(filename, "rb")
     except IOError:
         return 'NONE'
 
@@ -105,9 +105,10 @@ def humanize_size(size):
         if hsize > 0.5:
             return '%.2f %s' % (hsize, suffix)
 
-def main():
+def main(dirWalker=sys.stdin):
+    global totalsize, totalfiles, dupfiles, dupsize 
     # we start here by checking all files
-    for filename in sys.stdin:
+    for filename in dirWalker:
         filename = filename.strip()
 
         check_file(filename)
@@ -117,7 +118,7 @@ def main():
     # print the report
     print( '%10s   %s' % ('size', 'filename') )
 
-    for h, f in hashlist.iteritems():
+    for h, f in hashlist.items():
         if hashcount[h] < 2:
             # present one time, skip
             continue
@@ -147,7 +148,7 @@ def main():
     # final summary
     print( '%d files checked (%s), %d duplicates (%s).' % (
         totalfiles, humanize_size(totalsize), dupfiles, humanize_size(dupsize)))
-
+    return hashlist
 
 if __name__ == '__main__':
     main()
diff --git a/fileWalker.py b/fileWalker.py
index 28de5ca..6bae9ca 100644
--- a/fileWalker.py
+++ b/fileWalker.py
@@ -5,9 +5,22 @@
 
 
 
-
-
-
+def walkerAdapter(walker, hiddenFolders=False):
+    for curDir, dirList, fileList in walker:
+        for filename in fileList:
+            filepath = os.path.join( curDir, filename )
+            if not hiddenFolders and folderIsHidden(filepath):
+                continue
+            yield filepath
+
+def folderIsHidden(filepath):
+    par = filepath
+    while 1:
+        par, cd = os.path.split(par)
+        if cd.startswith('.') and not cd == '.':
+            return True
+        if not par:
+            break
 
 def getDirName(dirName=None):
     if dirName:
@@ -27,14 +40,23 @@ def getDirName(dirName=None):
         if os.path.isdir( os.path.normpath( inp )):
             break
     return inp
+
+
+def delete_duplicates(hashlist):
     
+    pass
 
 def main(dirName=None):
     root = getDirName(dirName)
     walker = os.walk(root)
-
-    for j in walker:
-        print(j)
+    walker = walkerAdapter(walker)
+    hashlist = ff.main(walker)
+    inp = input('would you like to remove all duplicates?\n')
+    if inp.strip() in ['yes', 'y', 'ya']:
+        delete_duplicates(hashlist)
+##    for j in hashlist.items():
+##        if len(j[1]) >1:
+##            print(j)
 
 if __name__ == '__main__':
     main()

From cd1c581dfcfd3be58b012b70b49a992aaf99f802 Mon Sep 17 00:00:00 2001
From: sesas <gabreyla@gmail.com>
Date: Tue, 27 Mar 2012 19:10:11 -0700
Subject: [PATCH 4/9] implemented the deleter function, with the option for it
 to be interactive.

Signed-off-by: sesas <gabreyla@gmail.com>
---
 fileWalker.py | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/fileWalker.py b/fileWalker.py
index 6bae9ca..5e4f4e3 100644
--- a/fileWalker.py
+++ b/fileWalker.py
@@ -42,8 +42,16 @@ def getDirName(dirName=None):
     return inp
 
 
-def delete_duplicates(hashlist):
-    
+def delete_duplicates(hashlist, interactive=True):
+    for fl in hashlist.values():
+        for filename in fl[1:]:
+            print('removing:', filename)
+            if interactive:
+                inp = input('?').strip()
+                if not inp in ['yes', 'y', 'ya']:
+                    continue
+            os.remove(filename)
+        pass
     pass
 
 def main(dirName=None):
@@ -51,9 +59,13 @@ def main(dirName=None):
     walker = os.walk(root)
     walker = walkerAdapter(walker)
     hashlist = ff.main(walker)
-    inp = input('would you like to remove all duplicates?\n')
-    if inp.strip() in ['yes', 'y', 'ya']:
+    inp = input('would you like to remove all duplicates?\n').strip()
+    if inp in ['int', 'yi']:
+        delete_duplicates(hashlist, True)
+    elif inp in ['yes', 'y', 'ya']:
         delete_duplicates(hashlist)
+    else:
+        print('no file was deleted')
 ##    for j in hashlist.items():
 ##        if len(j[1]) >1:
 ##            print(j)

From 2ef7addb4e629a9b4ed7f8e96ef637857377c575 Mon Sep 17 00:00:00 2001
From: sesas <gabreyla@gmail.com>
Date: Tue, 27 Mar 2012 19:14:35 -0700
Subject: [PATCH 5/9] finished the deleter function with more verbose output
 that earlier.

Signed-off-by: sesas <gabreyla@gmail.com>
---
 fileWalker.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/fileWalker.py b/fileWalker.py
index 5e4f4e3..185ac30 100644
--- a/fileWalker.py
+++ b/fileWalker.py
@@ -42,14 +42,17 @@ def getDirName(dirName=None):
     return inp
 
 
-def delete_duplicates(hashlist, interactive=True):
+def delete_duplicates(hashlist, interactive=True, verbose=True):
     for fl in hashlist.values():
+        if len(fl) > 1:
+            print('keeping:', fl[0])
         for filename in fl[1:]:
-            print('removing:', filename)
+            print('duplicate:', filename)
             if interactive:
                 inp = input('?').strip()
                 if not inp in ['yes', 'y', 'ya']:
                     continue
+            print('deleting:', filename)
             os.remove(filename)
         pass
     pass

From 96f6d593a084f9e1edb91403db266a71aba7f9cd Mon Sep 17 00:00:00 2001
From: sesas <gabreyla@gmail.com>
Date: Tue, 27 Mar 2012 19:49:51 -0700
Subject: [PATCH 6/9] debugged a bit more, especially the folder input
 function, and fixed some bugs.

Signed-off-by: sesas <gabreyla@gmail.com>
---
 fileWalker.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/fileWalker.py b/fileWalker.py
index 185ac30..89d01ec 100644
--- a/fileWalker.py
+++ b/fileWalker.py
@@ -17,9 +17,10 @@ def folderIsHidden(filepath):
     par = filepath
     while 1:
         par, cd = os.path.split(par)
+##        print(par, ':', cd)
         if cd.startswith('.') and not cd == '.':
             return True
-        if not par:
+        if not par or os.path.ismount(par):
             break
 
 def getDirName(dirName=None):
@@ -32,12 +33,14 @@ def getDirName(dirName=None):
             return out
     
     while 1:
-        inp = input("which folder would you like to find the duplicates in?\n")
+        inp = print("which folder would you like to find the duplicates in?")
+##        inp = print("(make sure the path you insert has double \\ in between folders.")
+        inp = input()
         if not inp: # mainly for debugging
             global testPath
             inp = testPath
             break
-        if os.path.isdir( os.path.normpath( inp )):
+        if os.path.isdir( inp ):
             break
     return inp
 

From 1aa8a0f71e36e066ef1fb7ea423b3668cb1bec6c Mon Sep 17 00:00:00 2001
From: sesas <gabreyla@gmail.com>
Date: Tue, 27 Mar 2012 20:07:18 -0700
Subject: [PATCH 7/9] made it faster (it now checks only the beginning of a
 file)

Signed-off-by: sesas <gabreyla@gmail.com>
---
 UltraFastDuplicateFilesFinder.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/UltraFastDuplicateFilesFinder.py b/UltraFastDuplicateFilesFinder.py
index 1850d16..f993de2 100644
--- a/UltraFastDuplicateFilesFinder.py
+++ b/UltraFastDuplicateFilesFinder.py
@@ -78,7 +78,7 @@ def check_file(filename):
     Compare the given file to our lists of hashes
     """    
     # compute md5
-    h = get_file_hash(filename)
+    h = get_file_hash(filename, CHUNK_SIZE//2)
     
     # increase count
     i = hashcount.get(h, 0)
@@ -110,7 +110,8 @@ def main(dirWalker=sys.stdin):
     # we start here by checking all files
     for filename in dirWalker:
         filename = filename.strip()
-
+        if not totalfiles % 100:
+            print(filename)
         check_file(filename)
         totalfiles += 1
         totalsize += os.path.getsize(filename)
@@ -121,6 +122,7 @@ def main(dirWalker=sys.stdin):
     for h, f in hashlist.items():
         if hashcount[h] < 2:
             # present one time, skip
+            
             continue
         
         # reference file    

From 418bc012086e6428d3ceef8901e0a75bcf50f29f Mon Sep 17 00:00:00 2001
From: sesas <gabreyla@gmail.com>
Date: Wed, 28 Mar 2012 13:36:49 -0700
Subject: [PATCH 8/9] modified readme to reflect the extensions made.

Signed-off-by: sesas <gabreyla@gmail.com>
---
 README        | 31 ++++++++++++++++++++++++++++++-
 test/README   | 31 ++++++++++++++++++++++++++++++-
 test/README_2 | 31 ++++++++++++++++++++++++++++++-
 test/README_3 | 31 ++++++++++++++++++++++++++++++-
 4 files changed, 120 insertions(+), 4 deletions(-)

diff --git a/README b/README
index 07a6777..cddcda5 100644
--- a/README
+++ b/README
@@ -1,8 +1,37 @@
-Ultra Fast Duplicate Files Finder
+=================================
+original Ultra Fast Duplicate Files Finder
 =================================
   by Gautier Portet <kassoulet gmail com>
+forked and extended:
+  by Gabriel Reyla <gabreyla@gmail.com> 
 
+The extended version works on Windows too:
+------------------------------------------------------------------------------
+$ python fileWalker.py
+which folder would you like to find the duplicates in?
+.
+.\fileWalker.py
+      size   filename
+      1306   .\README
+      1306   .\test\README
+      1306   .\test\README_2
+      1306   .\test\README_3
+
+7 files checked (15.24 KiB), 1 duplicates (3.83 KiB).
+would you like to remove all duplicates?
+yes
+keeping: .\README
+duplicate: .\test\README
+?y
+deleting: .\test\README
+duplicate: .\test\README_2
+?n
+duplicate: .\test\README_3
+?n
+>>> 
+------------------------------------------------------------------------------
 
+On Unix systems the program can be used as follow:
 Takes a list of file from stdin.
 And print the duplicate ones.
 
diff --git a/test/README b/test/README
index 07a6777..cddcda5 100644
--- a/test/README
+++ b/test/README
@@ -1,8 +1,37 @@
-Ultra Fast Duplicate Files Finder
+=================================
+original Ultra Fast Duplicate Files Finder
 =================================
   by Gautier Portet <kassoulet gmail com>
+forked and extended:
+  by Gabriel Reyla <gabreyla@gmail.com> 
 
+The extended version works on Windows too:
+------------------------------------------------------------------------------
+$ python fileWalker.py
+which folder would you like to find the duplicates in?
+.
+.\fileWalker.py
+      size   filename
+      1306   .\README
+      1306   .\test\README
+      1306   .\test\README_2
+      1306   .\test\README_3
+
+7 files checked (15.24 KiB), 1 duplicates (3.83 KiB).
+would you like to remove all duplicates?
+yes
+keeping: .\README
+duplicate: .\test\README
+?y
+deleting: .\test\README
+duplicate: .\test\README_2
+?n
+duplicate: .\test\README_3
+?n
+>>> 
+------------------------------------------------------------------------------
 
+On Unix systems the program can be used as follow:
 Takes a list of file from stdin.
 And print the duplicate ones.
 
diff --git a/test/README_2 b/test/README_2
index 07a6777..cddcda5 100644
--- a/test/README_2
+++ b/test/README_2
@@ -1,8 +1,37 @@
-Ultra Fast Duplicate Files Finder
+=================================
+original Ultra Fast Duplicate Files Finder
 =================================
   by Gautier Portet <kassoulet gmail com>
+forked and extended:
+  by Gabriel Reyla <gabreyla@gmail.com> 
 
+The extended version works on Windows too:
+------------------------------------------------------------------------------
+$ python fileWalker.py
+which folder would you like to find the duplicates in?
+.
+.\fileWalker.py
+      size   filename
+      1306   .\README
+      1306   .\test\README
+      1306   .\test\README_2
+      1306   .\test\README_3
+
+7 files checked (15.24 KiB), 1 duplicates (3.83 KiB).
+would you like to remove all duplicates?
+yes
+keeping: .\README
+duplicate: .\test\README
+?y
+deleting: .\test\README
+duplicate: .\test\README_2
+?n
+duplicate: .\test\README_3
+?n
+>>> 
+------------------------------------------------------------------------------
 
+On Unix systems the program can be used as follow:
 Takes a list of file from stdin.
 And print the duplicate ones.
 
diff --git a/test/README_3 b/test/README_3
index 07a6777..cddcda5 100644
--- a/test/README_3
+++ b/test/README_3
@@ -1,8 +1,37 @@
-Ultra Fast Duplicate Files Finder
+=================================
+original Ultra Fast Duplicate Files Finder
 =================================
   by Gautier Portet <kassoulet gmail com>
+forked and extended:
+  by Gabriel Reyla <gabreyla@gmail.com> 
 
+The extended version works on Windows too:
+------------------------------------------------------------------------------
+$ python fileWalker.py
+which folder would you like to find the duplicates in?
+.
+.\fileWalker.py
+      size   filename
+      1306   .\README
+      1306   .\test\README
+      1306   .\test\README_2
+      1306   .\test\README_3
+
+7 files checked (15.24 KiB), 1 duplicates (3.83 KiB).
+would you like to remove all duplicates?
+yes
+keeping: .\README
+duplicate: .\test\README
+?y
+deleting: .\test\README
+duplicate: .\test\README_2
+?n
+duplicate: .\test\README_3
+?n
+>>> 
+------------------------------------------------------------------------------
 
+On Unix systems the program can be used as follow:
 Takes a list of file from stdin.
 And print the duplicate ones.
 

From dfa6bd0ae27e0130479e0d7136bc086fe3f2177a Mon Sep 17 00:00:00 2001
From: sesas <gabreyla@gmail.com>
Date: Wed, 28 Mar 2012 14:44:06 -0700
Subject: [PATCH 9/9] added a startup script so that the others are compiled to
 .pyc

Signed-off-by: sesas <gabreyla@gmail.com>
---
 UltraFastDuplicateFilesFinder.py | 4 ++--
 duplicate.py                     | 4 ++++
 2 files changed, 6 insertions(+), 2 deletions(-)
 create mode 100644 duplicate.py

diff --git a/UltraFastDuplicateFilesFinder.py b/UltraFastDuplicateFilesFinder.py
index f993de2..58c7bbc 100644
--- a/UltraFastDuplicateFilesFinder.py
+++ b/UltraFastDuplicateFilesFinder.py
@@ -110,8 +110,8 @@ def main(dirWalker=sys.stdin):
     # we start here by checking all files
     for filename in dirWalker:
         filename = filename.strip()
-        if not totalfiles % 100:
-            print(filename)
+        if not totalfiles % 500:
+            print('files processed:', totalfiles, filename)
         check_file(filename)
         totalfiles += 1
         totalsize += os.path.getsize(filename)
diff --git a/duplicate.py b/duplicate.py
new file mode 100644
index 0000000..c3a937b
--- /dev/null
+++ b/duplicate.py
@@ -0,0 +1,4 @@
+import fileWalker
+
+if __name__ == '__main__':
+    fileWalker.main()