Python字符串编码及正则表达式使用

编码与解码

# 字符串编码为字节
text = "你好,燕闪硕!我在测试!"
utf8_bytes = text.encode("utf-8")
gbk_bytes = text.encode("gbk")

print(f"UTF-8编码: {utf8_bytes}")
print(f"GBK编码: {gbk_bytes}")

# 字节解码为字符串
decoded_text = utf8_bytes.decode("utf-8")
print(f"解码后: {decoded_text}")

# 处理编码错误
try:
    result = gbk_bytes.decode("utf-8")
except UnicodeDecodeError as e:
    print(f"解码错误: {e}")
    
# 使用错误处理策略
text_with_error = gbk_bytes.decode("utf-8", errors="ignore")
print(f"忽略错误解码: {text_with_error}")

正则表达式与字符串

import re

text = "我的电话是177-6901-8325,邮箱是792326016@qq.com"

# 查找电话号码
phone_pattern = r'\d{3}-\d{4}-\d{4}'
phones = re.findall(phone_pattern, text)
print(f"电话号码: {phones}")
# 替换敏感信息 censored = re.sub(r'\d{3}-\d{4}', 'XXX-XXXX', text) print(f"脱敏后: {censored}")
# 分割字符串 complex_text = "苹果,香蕉;橙子|葡萄" items = re.split(r'[,;|]', complex_text) print(f"分割结果: {items}")


# 匹配常见邮箱地址的正则表达式
email_regex = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'
# 使用 findall 查找所有匹配的邮箱
text = """
联系方式:
张三的工作邮箱:zhangsan.company@example.com
李四的个人邮箱:lisi_123@gmail.com
测试邮箱:test.user+label@sub.domain.co.uk
无效示例:user@.com, @example.com, user@com
"""
found_emails = re.findall(email_regex, text)
print("找到的邮箱地址:")
for email in found_emails:
print(f" - {email}")

# 匹配网址
url = "https://www.baidu.com/index?name=test"
# 正则规则:匹配//后、/前的所有字符
domain_pattern = r"//([^/]+)"
# search找到第一个匹配,group(1)提取括号内的内容
result = re.search(domain_pattern, url)
if result:
    print("提取的域名:", result.group(1))  # www.baidu.com

 

在线正则测试工具 regex101.com

 

 

 







posted @ 2025-12-05 10:29  燕闪硕  阅读(4)  评论(0)    收藏  举报