记录一下(拆分地址写入数据库)
import time import pymysql import cx_Oracle import os import re def fun(code, fb_flag): l = len(code) list1 = [] for string in code: list1.append(string) if fb_flag == 0: return None, None, None if fb_flag == 1: code1 = code.split('-') if(code1[0] != ''): a9 = code1[0] a10 = code1[1] a11 = code1[2] return a9+'-', a10+'-', a11 else: if(len(code1[2]) <= 2): a9 = code1[1] a10 = None a11 = None return '-'+a9+'-', a10, a11 else: a9 = None a10 = None a11 = code return a9, a10, a11 # for j in list1: # if(j >= '0' or j <= '9' or j == '-'): # code1 = code.split('-') # a9 = code1[0] # a10 = code1[1] # a11 = code1[2] # if a9 == '': # a9 = None # a10 = None # a11 = code # return a9, a10, a11 # return a9+'-', a10+'-', a11 if fb_flag == 3: if code[0] == '-': if len(code) == 2 or len(code) == 3: a9 = None a10 = None a11 = code return a9, a10, a11 if(len(code) == 4): a9 = None a10 = code[:2] a11 = code[2:] return a9, a10, a11 if(len(code) == 5): a9 = None a10 = code[:3] a11 = code[3:] return a9, a10, a11 if code[0] != '': code1 = code.split('-') a9 = None a10 = code1[0] a11 = code1[1] if(len(a10) >= 3 or len(a11) >= 3): a10 = None a11 = code return a9, a10, a11 return a9, a10+'-', a11 if fb_flag == 4: l = len(code) if (code[l-1] == '元' and code[l-2] == '单' or code[l-1] == '建' and code[l-2] == '公'): code1 = code.split('单元') a9 = code1[0] a10 = None a11 = None return a9+'单元', a10, a11 if (code[l-1] == '层'): code1 = code.split('单元') a9 = code1[0] a10 = code1[1] a11 = None return a9+'单元', a10, a11 if(code[l-1] == '楼'): code1 = code.split('单元') a9 = code1[0] a10 = code1[1] a11 = None return a9+'单元', a10, a11 if('单元' in code and '层' in code): code1 = code.split('单元') # print(code1) a9 = code1[0] code2 = code1[1] code3 = code2.split('层') # print('code3:', code3[0]) # 防止越界 a10 = code3[0] if(len(code3) == 1): a10 = None a11 = code3[0] return a9, a10, a11 if code3[1] == '': a11 = None return a9+'单元', a10+'层', a11 else: a11 = code3[1] # print() # print(a11) return a9+'单元', a10+'层', a11 if('单元' in code and '楼' in code): code1 = code.split('单元') a9 = code1[0] code2 = code1[1] code3 = code2.split('楼') a10 = code3[0] if(len(code3) == 1): # 防越界 a10 = None a11 = code3[0] return a9, a10, a11 if code3[1] == '': a11 = None return a9+'单元', a10+'楼', a11 else: a11 = code3[1] return a9+'单元', a10+'楼', a11 if(code[l-1] == '0' and code[l-2] == '层'): code1 = code.split('单元') a9 = code[0] code2 = code1[1] code3 = code2.split('0') a10 = code3[0] a11 = '0' return a9+'单元', a10, a11 if(code[l-1] == '左' or code[l-1] == '右' or code[l-1] == '上' or code[l-1] == '下'): code1 = code.split('单元') a9 = code1[0] code2 = code1[1] if('层' in code2): code2 = code2.split('层') a10 = code2[0] a11 = code[l-1] return a9+'单元', a10+'层', a11 else: a10 = None a11 = code[l-1] return a9+'单元', a10, a11 if('+' in code and '层' in code and '单元' in code): code1 = code.split('单元') a9 = code1[0] code2 = code1[1] code3 = code2.split('层') a10 = code3[0] a11 = code3[1] return a9+'单元', a10+'层', a11 if('(' in code or '(' in code): code1 = code.split('单元') a9 = code1[0] if('层' in code): a10 = code1[1] a11 = None return a9+'单元', a10, a11 else: a10 = None a11 = code1[1] return a9+'单元', a10, a11 for n in range(len(list1)-2): if(list1[n].isdigit() and list1[n+1].isdigit() and list1[n+2].isdigit()): try: code1 = code.split('单元') except Exception as e: print(code1) a9 = code1[0] addr = code1[1] for info in addr: if info.isdigit(): info else: addr = addr.replace(info, ' ') addr = addr.split() for i in addr: if len(i) == 3: a10 = i[0] a11 = i[1:3] return a9+'单元', a10, a11 if len(i) == 4: a10 = i[:2] a11 = i[2:4] return a9+'单元', a10, a11 else: a10 = None a11 = code1[1] return a9, a10, a11 else: code = code.split('单元') a9 = code[0] a11 = code[1] a10 = None list2 = [] # for i in a11: # list2.append(i) # for k in list2: # if(k >= '0' or k <= '9' and k == '-'): # if(a11[0] == '-'): # a11 = a11.split('-') # a10 = '-'+a11[1] # a11 = '-'+a11[2] # return a9,a10,a11 # else: # a11 = a11.split('-') # a10 = a11[0]+'-' # a11 = a11[1] # return a9,a10,a11 if code[1] == '地下室': a11 = None return a9+'单元', a10, a11 return a9+'单元', a10, a11 if fb_flag == 12: ll = len(code) if code.isdigit(): if(len(code) == 1 or len(code) == 2): a9 = None a10 = None a11 = code return a9, a10, a11 if(len(code) == 3): a9 = None a10 = code[0] a11 = code[1:3] return a9, a10, a11 if(len(code) == 4): a9 = None a10 = code[:2] a11 = code[2:5] return a9, a10, a11 if('层' in code): a9 = None if(code[ll-1] == '层'): a10 = code a11 = None return a9, a10, a11 else: code1 = code.split('层') a10 = code1[0] a11 = code1[1] return a9, a10+'层', a11 if('楼' in code): a9 = None if(code[ll-1] == '楼'): a10 = code a11 = None return a9, a10, a11 else: code1 = code.split('楼') a10 = code1[0] a11 = code1[1] return a9, a10+'楼', a11 if(code[ll-1] == '栋'): return None, None, None else: a9 = None, a10 = None a11 = code return a9, a10, a11 return None, None, None def get_oracle_conn(): # os.environ['NLS_LANG'] = 'SIMPLIFIED CHINESE_CHINA.ZHS16GBK' os.environ['NLS_LANG'] = 'SIMPLIFIED CHINESE_CHINA.UTF8' db = cx_Oracle.connect('gistar','res#pwd','192.168.205.58:1521/lndb',nencoding='utf8') return db def get_mysql_conn(): os.environ['NLS_LANG'] = 'SIMPLIFIED CHINESE_CHINA.UTF8' db = pymysql.connect("192.168.207.122","root","linux123","five_address") return db def match(address,addr): address = address.strip() res = address.find(addr) if res > -1: end = res + len(addr) return str(res) + ',' + str(end) return None def get_match1(address): # 1-1-1 # 只由数字和 - 组成 # 只在最后 # 有些地址有换行 \r address = address.strip() num_dash = 0 res = [] for tmp in address: # if tmp.isdigit() or tmp.isalpha(): if tmp.isdigit(): res.append(tmp) elif tmp == '-': num_dash += 1 res.append(tmp) else: num_dash = 0 res = [] length = len(address) front = length - len(res) if num_dash == 2: return address[:front],address[front:],1 elif num_dash > 2: return address[:front],address[front:],2 elif num_dash == 1: return address[:front],address[front:],3 else: return address,'',0 def get_match11(address): # 有些地址有换行 \r address = address.strip() flag = 0 index = [] for i,tmp in enumerate(address): if tmp.isdigit() or tmp == '-': if flag == 0: tmp_index = i flag = 1 else: if flag == 1: index.append([tmp_index,i]) flag = 0 if len(index) == 0: return address,'',0 # get_match1 之后不会有数字结尾 # 防止 get_match1 出错 考虑数字结尾的情况 if flag: # index.append([tmp_index,len(address)]) return address[:tmp_index],address[tmp_index:],12 start = index[-1][0] end = index[-1][1] if '号' in address[end:] or '巷' in address[end:]: return address,'',0 else: return address[:start],address[start:],12 def get_match4(address): # 有些地址有换行 \r address = address.strip() match = '单元' res = re.search(match,address) if not res: return address,'',0 start,end = res.span() # 判断单元前的数字 # 1.没有 # 2.有若干个 digit = {'一': 1, '二': 2, '三': 3, '四': 4, '五': 5, '六': 6, '七': 7, '八': 8, '九': 9} index = start for i in range(start-1,-1,-1): if address[i].isdigit() or address[i] == '-' or address[i] in digit.keys(): index = i else: break return address[:index],address[index:],4 def match91011(address): address = address.strip() res = get_match4(address) if res[2] > 0: return res else: res = get_match1(address) if res[2] > 0: return res else: res = get_match11(address) return res def write_data(): command = 'select id,code ,regionid,street_name,road_name,\ number_plate_name,unit,layer,room, a9, a10, a11 from data_2 where code is not null' # command = 'select id,code from ADDRESSINFO_SPLIT \ # where code is not null' # command = "select id,code,street_name,road_name,\ # number_plate_name,unit,layer,\ # room from ADDRESSINFO_SPLIT \ # where premises_network is \ # null and code like '%]%' " db = get_mysql_conn() cur = db.cursor() # command = "select id,code from ADDRESSINFO_SPLIT" cur.execute(command) while 1: data_all = cur.fetchone() if not data_all: break _,code,flag=match91011(data_all[1]) fun(code, int(flag)) def set_sql_command(): a1 = "select id,code,regionid,street_name,road_name,number_plate_name,unit,\ layer,room from addressinfo_split where code is not null" return a1 def write_mysql(data): # print(data) print('writing ',time.asctime( time.localtime(time.time()) )) db = pymysql.connect("192.168.207.122","root","linux123","five_address" ) # db = get_oracle_db() cursor = db.cursor() sql1 = "insert into data_2 \ (id,code,regionid,street_name,road_name,\ number_plate_name,unit,layer,room,a9,a10,a11) \ values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)" # sql1 = "insert into addressinfo_crf2 values(:id,\ # :code,:regionid,:street_name,:road_name,\ # :number_plate_name,:unit,:layer,:room,\ # :front_addr,:back_addr,:fb_flag,:a4,:a5,:a6)" cursor.executemany(sql1,data) db.commit() # cursor.close() db.close() print(time.asctime( time.localtime(time.time()) )) def main(): sql = set_sql_command() db = get_oracle_conn() cur = db.cursor() cur.execute(sql) a10w = 0 data_all = [] while 1: data = cur.fetchone() if not data: break data = list(data) # print(len(data)) tmp = match91011(data[1]) tmp1 = fun(tmp[1], tmp[2]) # print(len(tmp1)) # print(tmp1) if tmp1[0] == (None,): list_1 = list(tmp1) list_1[0] = None tmp1 = tuple(list_1) else: tmp1 # global x # if type(tmp1[0]) == tuple: # x = tmp1[0][0] # x = str(x) # else: # x = 'None' # l_1 = list(tmp1) # l_1[0] = x # tmp1 = tuple(l_1) # print(tmp1[0], type(tmp1[0])) # print(tmp1, type(tmp1[0])) # print(tmp1) # print(len(tmp1)) for i in tmp1: data.append(i) # print(len(data)) data_all.append(data) a10w += 1 if a10w == 10000: a10w = 0 write_mysql(data_all) data_all = [] # print(data_all) write_mysql(data_all)