提取中文英文和数字

def change_text(path):
    with open(path,'r',encoding='utf-8') as f_read:
        raw_text = f_read.read()
        ## 删除空格和换行
        raw_text = re.sub(u'([\r\n ])','',raw_text)
        ## 提取中文数字英文
        raw_text = re.sub(u"([^\u4e00-\u9fa5\u0030-s\u0039\u0041-\u005a\u0061-\u007a\,\.\,\。])","",raw_text)
        print(raw_text)
    with open('new.tt','w+') as f_write:
        f_write.write(raw_text)

if __name__ =='__main__':
    path =  'raw.tt'
    change_text(path)
posted @ 2019-08-27 17:23  FromZeroToOne  阅读(220)  评论(0编辑  收藏  举报