# -*- coding: utf-8 -*-
import re
def clear_str(string):
"""
清理字符串
"""
clear_string = strQ2B(string).strip()
return clear_string
def handle_space(string, repl=None):
"""
处理空白字符
"""
string = re.sub(r'((\s)|( )|(\?)|(_)|(<br>)|( )|( )|( ))+', repl if repl else '', string)
return string
def handle_re_match(matched):
string = matched.group('value')
return '>' + handle_space(string) + '<'
def handle_html_space(html):
"""
处理html中所有text的空格字符
"""
html = html.replace('\n', '')
html = re.sub('>(?P<value>.*?)<', handle_re_match, html)
return clear_str(html)
def strQ2B(ustring):
"""全角转半角"""
r_string = ""
for uchar in ustring:
inside_code = ord(uchar)
if inside_code == 12288: # 全角空格直接转换
inside_code = 32
elif 65281 <= inside_code <= 65374: # 全角字符(除空格)根据关系转化
inside_code -= 65248
r_string += chr(inside_code)
return r_string