python3求取大文件sha1值和md5
小文件
import hashlib
import base64
filePath = "test.txt"
with open(filePath, "rb") as f:
fileData = f.read()
sha1 = hashlib.sha1()
sha1.update(fileData)
fileHash = base64.b64encode(sha1.digest()).decode('utf-8 ')
print(fileHash)
print(base64.b64decode(fileHash).hex())
在linux中,可以使用指令sha1sum test.txt
验证
大文件
import hashlib
import base64
filePath = "test.txt"
sha1 = hashlib.sha1()
with open(filePath, "rb") as f:
while True:
fileData = f.read(2048)
if not fileData:
break
sha1.update(fileData)
fileHash = base64.b64encode(sha1.digest()).decode('utf-8 ')
print(fileHash)
print(base64.b64decode(fileHash).hex())
同理,求取MD5只需要把 sha1 = hashlib.sha1() 替换为 md5 = hashlib.md5(), 相应的sha1 也替换为 md5 即可
此外,获取十六进制编码也可以直接用 sha1.hexdigest() 和 md5.hexdigest()
参考: python 计算大文件的md5、sha1值
补充
计算字符串的sha1
def get_sha1_from_text(text):
try:
if isinstance(text, str):
data = text.encode('utf-8')
sha1 = hashlib.sha1()
buffer_size = 65535 # 64KB
# 逐块读取文件并更新SHA1对象
data_len = len(data)
if data_len <= buffer_size:
sha1.update(data)
else:
for i in range(0, data_len, buffer_size):
if i + buffer_size <= data_len:
sha1.update(data[i:i+buffer_size])
else:
sha1.update(data[i:])
# 返回SHA1值的十六进制表示
return base64.b64encode(sha1.digest()).decode('utf-8')
except Exception as e:
traceback.print_exc()
return None