#单位度量衡转换
单位度量衡转换
#单位度量衡转换
def GB_MB_TB(msg):
if msg == "":
return None
if msg is None or len(msg) == 0:
pass
m = re.match("^\d+\.\d+[GB|MB|TB]", msg)
if m:
if re.search('GB',msg):
number = re.match("^\d+(\.\d+)?", msg).group()
number = float(number)
return number
elif re.search('TB',msg):
number = re.match("^\d+(\.\d+)?", msg).group()
number = float(number)*1000
return number
elif re.search('MB',msg):
number = re.match("^\d+(\.\d+)?", msg).group()
number = float(number)/1000
return number
else :
number = 0
GB_MB_TB('3.03GB')
种子大小GB清洗
df['size']=df['大小'].apply(lambda x:GB_MB_TB(x))
df['size'] = df['size'].astype('float')
数据类型转化
df[['评论数', '种子数', '下载数','完成数']] = df[['评论数', '种子数', '下载数','完成数']].apply(lambda x: x.astype('int'))

浙公网安备 33010602011771号