#!/usr/bin/python __author__="Luis Ezcurdia (3zcurdia)" __email__="ing.ezcurdia@gmail.com" import os,sys import hashlib try: import magic except: print("To run this script you will need pymagic") def search(path): print("Searching on route : %s ..." % path) hash_dic = {} duplicates = {} print("This will take a while.. so go and get a coffee.") for path,dirs,files in os.walk(path): for File in files: shafile = None shafile = hashlib.sha1() shafile.update( open( path+"/"+File, "rb" ).read() ) key = str( shafile.hexdigest() ) if hash_dic.has_key( key ): if duplicates.has_key( key ): duplicates[ key ].append( path+"/"+File ) else: duplicates[ key ] = [ hash_dic[ key ] , path+"/"+File ] else: hash_dic[ key ] = path+"/"+File print("%d Files found" % len(duplicates)) return duplicates, len(duplicates) if __name__=="__main__": print("Duplicatrix v0.1") magic_square = magic.open(magic.MAGIC_NONE) magic_square.load() if len(sys.argv)>1: os.chdir(sys.argv[1]) duplex, duplex_count = search( os.getcwd() ) if duplex_count>0: print("Generating Report: duplicated.txt") report = open( "duplicated.txt", "w") report.write( "Files duplicated: " + str(duplex_count)+ "\n" ) for key in duplex: report.write( ("="*40)+ "\n" ) report.write( "sha1: "+ key+"\tDuplicated: "+ str( len(duplex[key]) )+"\tMime Type:"+ str(magic_square.file( duplex[key][0] )) + "\n" ) for item in duplex[key]: report.write( item+"\n" ) report.close()
duplicatrix.py
d
Nombre: duplicatrix.py
Autor: @3zcurdia
Descripción: Script busca archivos duplicados del directorio donde se ejecute
Suscribirse a:
Enviar comentarios (Atom)
0 comentarios:
Publicar un comentario