代码改变世界

Clean Text

2021-05-20 13:31  DataBases  阅读(25)  评论(0编辑  收藏  举报
import re
import string
def clean_text(s):
"""
This function cleans the text a bit
:param s: string
:return: cleaned string
"""
# split by all whitespaces
s = s.split()
# join tokens by single space
s = " ".join(s)
# remove all punctuations using regex and string module
s = re.sub(f'[{re.escape(string.punctuation)}]', '', s)
return s