20200315_python3.6去除标点符号
line = "python3.6下进行去!@#$%^&*()除标点测试,:!大家好,:!&》啥都不是!@#¥%……&*(-、||" # python3不支持ur, 使用r代替
def remove_punctuation(line):
rule = re.compile(r"[^a-zA-Z0-9\u4e00-\u9fa5]")
line = rule.sub('', line)
return line
#title = href.get('title'); #gb2312 GB18030
#dirName = title.encode("latin1").decode("gbk") #文件夹名称
浙公网安备 33010602011771号