Python字符串编码及正则表达式使用
编码与解码
# 字符串编码为字节 text = "你好,燕闪硕!我在测试!" utf8_bytes = text.encode("utf-8") gbk_bytes = text.encode("gbk") print(f"UTF-8编码: {utf8_bytes}") print(f"GBK编码: {gbk_bytes}") # 字节解码为字符串 decoded_text = utf8_bytes.decode("utf-8") print(f"解码后: {decoded_text}") # 处理编码错误 try: result = gbk_bytes.decode("utf-8") except UnicodeDecodeError as e: print(f"解码错误: {e}") # 使用错误处理策略 text_with_error = gbk_bytes.decode("utf-8", errors="ignore") print(f"忽略错误解码: {text_with_error}")
正则表达式与字符串
import re text = "我的电话是177-6901-8325,邮箱是792326016@qq.com" # 查找电话号码 phone_pattern = r'\d{3}-\d{4}-\d{4}' phones = re.findall(phone_pattern, text) print(f"电话号码: {phones}")
# 替换敏感信息 censored = re.sub(r'\d{3}-\d{4}', 'XXX-XXXX', text) print(f"脱敏后: {censored}")
# 分割字符串 complex_text = "苹果,香蕉;橙子|葡萄" items = re.split(r'[,;|]', complex_text) print(f"分割结果: {items}")
# 匹配常见邮箱地址的正则表达式
email_regex = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'
# 使用 findall 查找所有匹配的邮箱
text = """
联系方式:
张三的工作邮箱:zhangsan.company@example.com
李四的个人邮箱:lisi_123@gmail.com
测试邮箱:test.user+label@sub.domain.co.uk
无效示例:user@.com, @example.com, user@com
"""
found_emails = re.findall(email_regex, text)
print("找到的邮箱地址:")
for email in found_emails:
print(f" - {email}")
# 匹配网址
url = "https://www.baidu.com/index?name=test"
# 正则规则:匹配//后、/前的所有字符
domain_pattern = r"//([^/]+)"
# search找到第一个匹配,group(1)提取括号内的内容
result = re.search(domain_pattern, url)
if result:
print("提取的域名:", result.group(1)) # www.baidu.com
在线正则测试工具 regex101.com
石家庄的.net程序员
浙公网安备 33010602011771号