python模糊匹配之fuzzywuzzy

fuzzywyzzy 是python下一个模糊匹配的模块。首先要安装fuzzywuzzy

示例:

 1 from fuzzywuzzy import fuzz
 2 from fuzzywuzzy import process
 3 
 4 state_to_code = {"VERMONT": "VT", "GEORGIA": "GA", "IOWA": "IA", "Armed Forces Pacific": "AP", "GUAM": "GU",
 5                  "KANSAS": "KS", "FLORIDA": "FL", "AMERICAN SAMOA": "AS", "NORTH CAROLINA": "NC", "HAWAII": "HI",
 6                  "NEW YORK": "NY", "CALIFORNIA": "CA", "ALABAMA": "AL", "IDAHO": "ID",
 7                  "FEDERATED STATES OF MICRONESIA": "FM",
 8                  "Armed Forces Americas": "AA", "DELAWARE": "DE", "ALASKA": "AK", "ILLINOIS": "IL",
 9                  "Armed Forces Africa": "AE", "SOUTH DAKOTA": "SD", "CONNECTICUT": "CT", "MONTANA": "MT",
10                  "MASSACHUSETTS": "MA",
11                  "PUERTO RICO": "PR", "Armed Forces Canada": "AE", "NEW HAMPSHIRE": "NH", "MARYLAND": "MD",
12                  "NEW MEXICO": "NM",
13                  "MISSISSIPPI": "MS", "TENNESSEE": "TN", "PALAU": "PW", "COLORADO": "CO",
14                  "Armed Forces Middle East": "AE",
15                  "NEW JERSEY": "NJ", "UTAH": "UT", "MICHIGAN": "MI", "WEST VIRGINIA": "WV", "WASHINGTON": "WA",
16                  "MINNESOTA": "MN", "OREGON": "OR", "VIRGINIA": "VA", "VIRGIN ISLANDS": "VI", "MARSHALL ISLANDS": "MH",
17                  "WYOMING": "WY", "OHIO": "OH", "SOUTH CAROLINA": "SC", "INDIANA": "IN", "NEVADA": "NV",
18                  "LOUISIANA": "LA",
19                  "NORTHERN MARIANA ISLANDS": "MP", "NEBRASKA": "NE", "ARIZONA": "AZ", "WISCONSIN": "WI",
20                  "NORTH DAKOTA": "ND",
21                  "Armed Forces Europe": "AE", "PENNSYLVANIA": "PA", "OKLAHOMA": "OK", "KENTUCKY": "KY",
22                  "RHODE ISLAND": "RI",
23                  "DISTRICT OF COLUMBIA": "DC", "ARKANSAS": "AR", "MISSOURI": "MO", "TEXAS": "TX", "MAINE": "ME"
24                  }
25 def studyfuzzy():
26     process.extractOne("Minnesotta", choices=state_to_code.keys())
27     process.extractOne("Minnesotta", choices=state_to_code.keys(), score_cutoff=80)
28     process.extractOne("Minnesotta", choices=state_to_code.keys(), score_cutoff=96)
29 
30     state_to_code.keys()
31     state_to_code.values()
32     state_to_code.viewkeys()
33     state_to_code.viewvalues()
34     state_to_code.viewitems()
35     process.extractOne("AlaBAMMazzz", choices=state_to_code.keys(), score_cutoff=80)
36     process.extractOne("AlaBAMMazzz",choices=state_to_code.keys())
In[6]: from fuzzywuzzy import fuzz

In[7]: from fuzzywuzzy import process

In[8]: state_to_code = {"VERMONT": "VT", "GEORGIA": "GA", "IOWA": "IA", "Armed Forces Pacific": "AP", "GUAM": "GU",
                 "KANSAS": "KS", "FLORIDA": "FL", "AMERICAN SAMOA": "AS", "NORTH CAROLINA": "NC", "HAWAII": "HI",
                 "NEW YORK": "NY", "CALIFORNIA": "CA", "ALABAMA": "AL", "IDAHO": "ID",
                 "FEDERATED STATES OF MICRONESIA": "FM",
                 "Armed Forces Americas": "AA", "DELAWARE": "DE", "ALASKA": "AK", "ILLINOIS": "IL",
                 "Armed Forces Africa": "AE", "SOUTH DAKOTA": "SD", "CONNECTICUT": "CT", "MONTANA": "MT",
                 "MASSACHUSETTS": "MA",
                 "PUERTO RICO": "PR", "Armed Forces Canada": "AE", "NEW HAMPSHIRE": "NH", "MARYLAND": "MD",
                 "NEW MEXICO": "NM",
                 "MISSISSIPPI": "MS", "TENNESSEE": "TN", "PALAU": "PW", "COLORADO": "CO",
                 "Armed Forces Middle East": "AE",
                 "NEW JERSEY": "NJ", "UTAH": "UT", "MICHIGAN": "MI", "WEST VIRGINIA": "WV", "WASHINGTON": "WA",
                 "MINNESOTA": "MN", "OREGON": "OR", "VIRGINIA": "VA", "VIRGIN ISLANDS": "VI", "MARSHALL ISLANDS": "MH",
                 "WYOMING": "WY", "OHIO": "OH", "SOUTH CAROLINA": "SC", "INDIANA": "IN", "NEVADA": "NV",
                 "LOUISIANA": "LA",
                 "NORTHERN MARIANA ISLANDS": "MP", "NEBRASKA": "NE", "ARIZONA": "AZ", "WISCONSIN": "WI",
                 "NORTH DAKOTA": "ND",
                 "Armed Forces Europe": "AE", "PENNSYLVANIA": "PA", "OKLAHOMA": "OK", "KENTUCKY": "KY",
                 "RHODE ISLAND": "RI",
                 "DISTRICT OF COLUMBIA": "DC", "ARKANSAS": "AR", "MISSOURI": "MO", "TEXAS": "TX", "MAINE": "ME"
                 }

 

Out[19]: ('MINNESOTA', 95)
In[20]: process.extractOne("Minnesotta", choices=state_to_code.keys(), score_cutoff=80)

Out[20]: ('MINNESOTA', 95)
In[21]: process.extractOne("Minnesotta", choices=state_to_code.keys(), score_cutoff=96)

In[22]: process.extractOne("AlaBAMMazzz", choices=state_to_code.keys(), score_cutoff=80)

In[23]: process.extractOne("AlaBAMMazzz",choices=state_to_code.keys())

Out[23]: ('ALABAMA', 78)

 

posted on 2017-07-21 09:24  老段的博客  阅读(9963)  评论(0编辑  收藏  举报