删除相同图片的Python实现代理

jopen 12年前
  1. 原理:读取图片二进制码,使用MD5或SHA-1散列,生成图片唯一编码,与图片字典做比对,存在则图片重复,移除.
  2. Python实现:
    import os  import re  import hashlib  from time import time    rootPath = 'F:/Image/照片'  backupPath = 'F:/Image/backup'  picDic = {}  regular = re.compile(r'^(.*)\.(jpg|jpeg|bmp|gif|png|JPG|JPEG|BMP|GIF|PNG)$')    def RemoverRePic(dirPath):      quantity = 0      for childPath in os.listdir(unicode(dirPath)):          childPath = dirPath + '/'  + childPath          if os.path.isdir(childPath):              quantity =+ RemoverRePic(childPath)          else:              if regular.match(childPath):                  pic = open(childPath, 'rb')                  picMd5 = hashlib.md5(pic.read()).hexdigest()                  pic.close()                  if picDic.has_key(picMd5):                      newPath = backupPath + '/'  + hashlib.md5(childPath)\                      .hexdigest() + childPath[childPath.find('.'):]                      os.rename(childPath, newPath)                      quantity =+ 1                  else:                      picDic[picMd5] = childPath      return quantity    if __name__ == '__main__':      t = time()      print 'start:'      print t      print RemoverRePic(rootPath)      print 'end:'      print time() - t