import time
import pymysql
import cx_Oracle
import os
import re
def fun(code, fb_flag):
l = len(code)
list1 = []
for string in code:
list1.append(string)
if fb_flag == 0:
return None, None, None
if fb_flag == 1:
code1 = code.split('-')
if(code1[0] != ''):
a9 = code1[0]
a10 = code1[1]
a11 = code1[2]
return a9+'-', a10+'-', a11
else:
if(len(code1[2]) <= 2):
a9 = code1[1]
a10 = None
a11 = None
return '-'+a9+'-', a10, a11
else:
a9 = None
a10 = None
a11 = code
return a9, a10, a11
# for j in list1:
# if(j >= '0' or j <= '9' or j == '-'):
# code1 = code.split('-')
# a9 = code1[0]
# a10 = code1[1]
# a11 = code1[2]
# if a9 == '':
# a9 = None
# a10 = None
# a11 = code
# return a9, a10, a11
# return a9+'-', a10+'-', a11
if fb_flag == 3:
if code[0] == '-':
if len(code) == 2 or len(code) == 3:
a9 = None
a10 = None
a11 = code
return a9, a10, a11
if(len(code) == 4):
a9 = None
a10 = code[:2]
a11 = code[2:]
return a9, a10, a11
if(len(code) == 5):
a9 = None
a10 = code[:3]
a11 = code[3:]
return a9, a10, a11
if code[0] != '':
code1 = code.split('-')
a9 = None
a10 = code1[0]
a11 = code1[1]
if(len(a10) >= 3 or len(a11) >= 3):
a10 = None
a11 = code
return a9, a10, a11
return a9, a10+'-', a11
if fb_flag == 4:
l = len(code)
if (code[l-1] == '元' and code[l-2] == '单' or code[l-1] == '建' and code[l-2] == '公'):
code1 = code.split('单元')
a9 = code1[0]
a10 = None
a11 = None
return a9+'单元', a10, a11
if (code[l-1] == '层'):
code1 = code.split('单元')
a9 = code1[0]
a10 = code1[1]
a11 = None
return a9+'单元', a10, a11
if(code[l-1] == '楼'):
code1 = code.split('单元')
a9 = code1[0]
a10 = code1[1]
a11 = None
return a9+'单元', a10, a11
if('单元' in code and '层' in code):
code1 = code.split('单元')
# print(code1)
a9 = code1[0]
code2 = code1[1]
code3 = code2.split('层')
# print('code3:', code3[0]) # 防止越界
a10 = code3[0]
if(len(code3) == 1):
a10 = None
a11 = code3[0]
return a9, a10, a11
if code3[1] == '':
a11 = None
return a9+'单元', a10+'层', a11
else:
a11 = code3[1]
# print()
# print(a11)
return a9+'单元', a10+'层', a11
if('单元' in code and '楼' in code):
code1 = code.split('单元')
a9 = code1[0]
code2 = code1[1]
code3 = code2.split('楼')
a10 = code3[0]
if(len(code3) == 1): # 防越界
a10 = None
a11 = code3[0]
return a9, a10, a11
if code3[1] == '':
a11 = None
return a9+'单元', a10+'楼', a11
else:
a11 = code3[1]
return a9+'单元', a10+'楼', a11
if(code[l-1] == '0' and code[l-2] == '层'):
code1 = code.split('单元')
a9 = code[0]
code2 = code1[1]
code3 = code2.split('0')
a10 = code3[0]
a11 = '0'
return a9+'单元', a10, a11
if(code[l-1] == '左' or code[l-1] == '右' or code[l-1] == '上' or code[l-1] == '下'):
code1 = code.split('单元')
a9 = code1[0]
code2 = code1[1]
if('层' in code2):
code2 = code2.split('层')
a10 = code2[0]
a11 = code[l-1]
return a9+'单元', a10+'层', a11
else:
a10 = None
a11 = code[l-1]
return a9+'单元', a10, a11
if('+' in code and '层' in code and '单元' in code):
code1 = code.split('单元')
a9 = code1[0]
code2 = code1[1]
code3 = code2.split('层')
a10 = code3[0]
a11 = code3[1]
return a9+'单元', a10+'层', a11
if('(' in code or '(' in code):
code1 = code.split('单元')
a9 = code1[0]
if('层' in code):
a10 = code1[1]
a11 = None
return a9+'单元', a10, a11
else:
a10 = None
a11 = code1[1]
return a9+'单元', a10, a11
for n in range(len(list1)-2):
if(list1[n].isdigit() and list1[n+1].isdigit() and list1[n+2].isdigit()):
try:
code1 = code.split('单元')
except Exception as e:
print(code1)
a9 = code1[0]
addr = code1[1]
for info in addr:
if info.isdigit():
info
else:
addr = addr.replace(info, ' ')
addr = addr.split()
for i in addr:
if len(i) == 3:
a10 = i[0]
a11 = i[1:3]
return a9+'单元', a10, a11
if len(i) == 4:
a10 = i[:2]
a11 = i[2:4]
return a9+'单元', a10, a11
else:
a10 = None
a11 = code1[1]
return a9, a10, a11
else:
code = code.split('单元')
a9 = code[0]
a11 = code[1]
a10 = None
list2 = []
# for i in a11:
# list2.append(i)
# for k in list2:
# if(k >= '0' or k <= '9' and k == '-'):
# if(a11[0] == '-'):
# a11 = a11.split('-')
# a10 = '-'+a11[1]
# a11 = '-'+a11[2]
# return a9,a10,a11
# else:
# a11 = a11.split('-')
# a10 = a11[0]+'-'
# a11 = a11[1]
# return a9,a10,a11
if code[1] == '地下室':
a11 = None
return a9+'单元', a10, a11
return a9+'单元', a10, a11
if fb_flag == 12:
ll = len(code)
if code.isdigit():
if(len(code) == 1 or len(code) == 2):
a9 = None
a10 = None
a11 = code
return a9, a10, a11
if(len(code) == 3):
a9 = None
a10 = code[0]
a11 = code[1:3]
return a9, a10, a11
if(len(code) == 4):
a9 = None
a10 = code[:2]
a11 = code[2:5]
return a9, a10, a11
if('层' in code):
a9 = None
if(code[ll-1] == '层'):
a10 = code
a11 = None
return a9, a10, a11
else:
code1 = code.split('层')
a10 = code1[0]
a11 = code1[1]
return a9, a10+'层', a11
if('楼' in code):
a9 = None
if(code[ll-1] == '楼'):
a10 = code
a11 = None
return a9, a10, a11
else:
code1 = code.split('楼')
a10 = code1[0]
a11 = code1[1]
return a9, a10+'楼', a11
if(code[ll-1] == '栋'):
return None, None, None
else:
a9 = None,
a10 = None
a11 = code
return a9, a10, a11
return None, None, None
def get_oracle_conn():
# os.environ['NLS_LANG'] = 'SIMPLIFIED CHINESE_CHINA.ZHS16GBK'
os.environ['NLS_LANG'] = 'SIMPLIFIED CHINESE_CHINA.UTF8'
db = cx_Oracle.connect('gistar','res#pwd','192.168.205.58:1521/lndb',nencoding='utf8')
return db
def get_mysql_conn():
os.environ['NLS_LANG'] = 'SIMPLIFIED CHINESE_CHINA.UTF8'
db = pymysql.connect("192.168.207.122","root","linux123","five_address")
return db
def match(address,addr):
address = address.strip()
res = address.find(addr)
if res > -1:
end = res + len(addr)
return str(res) + ',' + str(end)
return None
def get_match1(address):
# 1-1-1
# 只由数字和 - 组成
# 只在最后
# 有些地址有换行 \r
address = address.strip()
num_dash = 0
res = []
for tmp in address:
# if tmp.isdigit() or tmp.isalpha():
if tmp.isdigit():
res.append(tmp)
elif tmp == '-':
num_dash += 1
res.append(tmp)
else:
num_dash = 0
res = []
length = len(address)
front = length - len(res)
if num_dash == 2:
return address[:front],address[front:],1
elif num_dash > 2:
return address[:front],address[front:],2
elif num_dash == 1:
return address[:front],address[front:],3
else:
return address,'',0
def get_match11(address):
# 有些地址有换行 \r
address = address.strip()
flag = 0
index = []
for i,tmp in enumerate(address):
if tmp.isdigit() or tmp == '-':
if flag == 0:
tmp_index = i
flag = 1
else:
if flag == 1:
index.append([tmp_index,i])
flag = 0
if len(index) == 0:
return address,'',0
# get_match1 之后不会有数字结尾
# 防止 get_match1 出错 考虑数字结尾的情况
if flag:
# index.append([tmp_index,len(address)])
return address[:tmp_index],address[tmp_index:],12
start = index[-1][0]
end = index[-1][1]
if '号' in address[end:] or '巷' in address[end:]:
return address,'',0
else:
return address[:start],address[start:],12
def get_match4(address):
# 有些地址有换行 \r
address = address.strip()
match = '单元'
res = re.search(match,address)
if not res:
return address,'',0
start,end = res.span()
# 判断单元前的数字
# 1.没有
# 2.有若干个
digit = {'一': 1, '二': 2, '三': 3, '四': 4, '五': 5, '六': 6, '七': 7, '八': 8, '九': 9}
index = start
for i in range(start-1,-1,-1):
if address[i].isdigit() or address[i] == '-' or address[i] in digit.keys():
index = i
else:
break
return address[:index],address[index:],4
def match91011(address):
address = address.strip()
res = get_match4(address)
if res[2] > 0:
return res
else:
res = get_match1(address)
if res[2] > 0:
return res
else:
res = get_match11(address)
return res
def write_data():
command = 'select id,code ,regionid,street_name,road_name,\
number_plate_name,unit,layer,room, a9, a10, a11 from data_2 where code is not null'
# command = 'select id,code from ADDRESSINFO_SPLIT \
# where code is not null'
# command = "select id,code,street_name,road_name,\
# number_plate_name,unit,layer,\
# room from ADDRESSINFO_SPLIT \
# where premises_network is \
# null and code like '%]%' "
db = get_mysql_conn()
cur = db.cursor()
# command = "select id,code from ADDRESSINFO_SPLIT"
cur.execute(command)
while 1:
data_all = cur.fetchone()
if not data_all:
break
_,code,flag=match91011(data_all[1])
fun(code, int(flag))
def set_sql_command():
a1 = "select id,code,regionid,street_name,road_name,number_plate_name,unit,\
layer,room from addressinfo_split where code is not null"
return a1
def write_mysql(data):
# print(data)
print('writing ',time.asctime( time.localtime(time.time()) ))
db = pymysql.connect("192.168.207.122","root","linux123","five_address" )
# db = get_oracle_db()
cursor = db.cursor()
sql1 = "insert into data_2 \
(id,code,regionid,street_name,road_name,\
number_plate_name,unit,layer,room,a9,a10,a11) \
values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
# sql1 = "insert into addressinfo_crf2 values(:id,\
# :code,:regionid,:street_name,:road_name,\
# :number_plate_name,:unit,:layer,:room,\
# :front_addr,:back_addr,:fb_flag,:a4,:a5,:a6)"
cursor.executemany(sql1,data)
db.commit()
# cursor.close()
db.close()
print(time.asctime( time.localtime(time.time()) ))
def main():
sql = set_sql_command()
db = get_oracle_conn()
cur = db.cursor()
cur.execute(sql)
a10w = 0
data_all = []
while 1:
data = cur.fetchone()
if not data:
break
data = list(data)
# print(len(data))
tmp = match91011(data[1])
tmp1 = fun(tmp[1], tmp[2])
# print(len(tmp1))
# print(tmp1)
if tmp1[0] == (None,):
list_1 = list(tmp1)
list_1[0] = None
tmp1 = tuple(list_1)
else:
tmp1
# global x
# if type(tmp1[0]) == tuple:
# x = tmp1[0][0]
# x = str(x)
# else:
# x = 'None'
# l_1 = list(tmp1)
# l_1[0] = x
# tmp1 = tuple(l_1)
# print(tmp1[0], type(tmp1[0]))
# print(tmp1, type(tmp1[0]))
# print(tmp1)
# print(len(tmp1))
for i in tmp1:
data.append(i)
# print(len(data))
data_all.append(data)
a10w += 1
if a10w == 10000:
a10w = 0
write_mysql(data_all)
data_all = []
# print(data_all)
write_mysql(data_all)