python 去掉表情等特殊字符串

import emoji
#去掉表情等特殊字符
jieshao="包括表情的字符串"
jieshao=emoji.demojize(jieshao)

 

    def filter_emoji(self,desstr,restr='[emoji]'):
        '''
        过滤表情
        '''
        try:
            co = re.compile(u'[\U00010000-\U0010ffff]')
        except re.error:
            co = re.compile(u'[\uD800-\uDBFF][\uDC00-\uDFFF]')
        return co.sub(restr, desstr)

 

jieshao=self.filter_emoji(jieshao)
s = '\xe9\x9d\x92\xe8\x9b\x99\xe7\x8e\x8b\xe5\xad\x90'
    ss = s.encode('raw_unicode_escape')
    print(ss)  # 结果:b'\xe9\x9d\x92\xe8\x9b\x99\xe7\x8e\x8b\xe5\xad\x90'
    sss = ss.decode()
    print(sss)
如果是中文混合这种乱码,则用循环删除掉

mys='宝石 \x9e\x9d\x99'
 
sl = list(mys)
i=0 
while i<len(sl):
    s=sl[i]
    try:
        s=s.encode('raw_unicode_escape').decode('utf-8')
        i+=1
    except:
        #删掉它 
        sl.remove(s)
mys=''.join(mys)
print(mys)

 

posted @ 2021-11-09 05:46  myrj  阅读(1022)  评论(0编辑  收藏  举报