pandas中,对某列应用正则表达式
1.str.contains(),类似re.search(),检查是否包含特定字符串
import pandas as pd
# 示例数据
data = {'col1': ['apple', 'banana', 'cherry', 'pineapple', 'grape']}
df = pd.DataFrame(data)
# 查找包含字母 'a' 的行
pattern = r'a'
df['contains_a'] = df['col1'].str.contains(pattern)
print(df)
col1 contains_a
0 apple True
1 banana True
2 cherry False
3 pineapple True
4 grape True
2.str.match(),类似re.match(),检查是否以特定字符串开始
import pandas as pd
# 示例数据
data = {'col1': ['apple', 'banana', 'cherry', 'pineapple', 'grape']}
df = pd.DataFrame(data)
# 完全匹配以 'a' 开头的行
pattern = r'^a' # 匹配以 'a' 开头的字符串
df['starts_with_a'] = df['col1'].str.match(pattern)
print(df)
col1 starts_with_a
0 apple True
1 banana False
2 cherry False
3 pineapple False
4 grape False
3.str.replace(),类似re.sub(),查找特定字符串,并替换
import pandas as pd
# 示例数据
data = {'col1': ['apple', 'banana', 'cherry', 'pineapple', 'grape']}
df = pd.DataFrame(data)
# 替换 'a' 为 '@'
df['replaced'] = df['col1'].str.replace(r'a', '@', regex=True)
print(df)
col1 replaced
0 apple @pple
1 banana b@n@n@
2 cherry cherry
3 pineapple pine@pple
4 grape gr@pe
4.str.extract(),类似match.groups(),查找特定字符串,并提取出来
import pandas as pd
# 示例数据
data = {'col1': ['apple123', 'banana456', 'cherry789', 'pineapple000', 'grape987']}
df = pd.DataFrame(data)
# 提取数字部分
pattern = r'(\d+)' # 匹配数字
df['numbers'] = df['col1'].str.extract(pattern)
print(df)
col1 numbers
0 apple123 123
1 banana456 456
2 cherry789 789
3 pineapple000 000
4 grape987 987
5.str.findall(),类似re.findall(),查找所有匹配的字符串,返回列表
import pandas as pd
# 示例数据
data = {'col1': ['apple123', 'banana456', 'cherry789', 'pineapple000', 'grape987']}
df = pd.DataFrame(data)
# 查找所有数字
pattern = r'(\d+)' # 匹配数字
df['all_numbers'] = df['col1'].str.findall(pattern)
print(df)
col1 all_numbers
0 apple123 [123]
1 banana456 [456]
2 cherry789 [789]
3 pineapple000 [000]
4 grape987 [987]

浙公网安备 33010602011771号