删除相同图片的Python实现代理
jopen
12年前
- 原理:读取图片二进制码,使用MD5或SHA-1散列,生成图片唯一编码,与图片字典做比对,存在则图片重复,移除.
- Python实现:
import os import re import hashlib from time import time rootPath = 'F:/Image/照片' backupPath = 'F:/Image/backup' picDic = {} regular = re.compile(r'^(.*)\.(jpg|jpeg|bmp|gif|png|JPG|JPEG|BMP|GIF|PNG)$') def RemoverRePic(dirPath): quantity = 0 for childPath in os.listdir(unicode(dirPath)): childPath = dirPath + '/' + childPath if os.path.isdir(childPath): quantity =+ RemoverRePic(childPath) else: if regular.match(childPath): pic = open(childPath, 'rb') picMd5 = hashlib.md5(pic.read()).hexdigest() pic.close() if picDic.has_key(picMd5): newPath = backupPath + '/' + hashlib.md5(childPath)\ .hexdigest() + childPath[childPath.find('.'):] os.rename(childPath, newPath) quantity =+ 1 else: picDic[picMd5] = childPath return quantity if __name__ == '__main__': t = time() print 'start:' print t print RemoverRePic(rootPath) print 'end:' print time() - t