记录一下(拆分地址写入数据库)

import time
import pymysql
import cx_Oracle
import os
import re

def fun(code, fb_flag):

    l = len(code)
    list1 = []
    for string in code:
        list1.append(string)
        
    if fb_flag == 0:
        return None, None, None
    
    if fb_flag == 1:
        code1 = code.split('-')
        if(code1[0] != ''):
            a9 = code1[0]
            a10 = code1[1]
            a11 = code1[2]
            return a9+'-', a10+'-', a11
        else:
            if(len(code1[2]) <= 2):
                a9 = code1[1]
                a10 = None
                a11 = None
                return '-'+a9+'-', a10, a11
            else:
                a9 = None
                a10 = None
                a11 = code
                return a9, a10, a11
#         for j in list1:
#             if(j >= '0' or j <= '9' or j == '-'):
#                 code1 = code.split('-')
#                 a9 = code1[0]
#                 a10 = code1[1]
#                 a11 = code1[2]
#                 if a9 == '':
#                     a9 = None
#                     a10 = None
#                     a11 = code
#                     return a9, a10, a11
#                 return a9+'-', a10+'-', a11
    
    if fb_flag == 3:
        if code[0] == '-':
            if len(code) == 2 or len(code) == 3:
                a9 = None
                a10 = None
                a11 = code
                return a9, a10, a11
            if(len(code) == 4):
                a9 = None
                a10 = code[:2]
                a11 = code[2:]
                return a9, a10, a11
            if(len(code) == 5):
                a9 = None
                a10 = code[:3]
                a11 = code[3:]
                return a9, a10, a11
        if code[0] != '':
            code1 = code.split('-')
            a9 = None
            a10 = code1[0]
            a11 = code1[1]
            if(len(a10) >= 3 or len(a11) >= 3):
                a10 = None
                a11 = code
                return a9, a10, a11
            return a9, a10+'-', a11        
    
    if fb_flag == 4:
        l = len(code)
        if (code[l-1] == '' and code[l-2] == '' or code[l-1] == '' and code[l-2] == ''):
            code1 = code.split('单元')
            a9 = code1[0]
            a10 = None
            a11 = None
            return a9+'单元', a10, a11
        if (code[l-1] == ''):
            code1 = code.split('单元')
            a9 = code1[0]
            a10 = code1[1]
            a11 = None
            return a9+'单元', a10, a11
        if(code[l-1] == ''):
            code1 = code.split('单元')
            a9 = code1[0]
            a10 = code1[1]
            a11 = None
            return a9+'单元', a10, a11
        if('单元' in code and '' in code):
            code1 = code.split('单元')
#             print(code1)
            a9 = code1[0]
            code2 = code1[1]
            code3 = code2.split('')
#             print('code3:', code3[0])   # 防止越界
            a10 = code3[0]
            if(len(code3) == 1):
                a10 = None
                a11 = code3[0]
                return a9, a10, a11 
            if code3[1] == '':
                a11 = None
                return a9+'单元', a10+'', a11
            else:
                a11 = code3[1]
#                 print()
#                 print(a11)
                return a9+'单元', a10+'', a11
        if('单元' in code and '' in code):
            code1 = code.split('单元')
            a9 = code1[0]
            code2 = code1[1]
            code3 = code2.split('')
            a10 = code3[0]
            if(len(code3) == 1):   # 防越界
                a10 = None
                a11 = code3[0]
                return a9, a10, a11
            if code3[1] == '':
                a11 = None
                return a9+'单元', a10+'', a11
            else:
                a11 = code3[1]
                return a9+'单元', a10+'', a11
        if(code[l-1] == '0' and code[l-2] == ''):
            code1 = code.split('单元')
            a9 = code[0]
            code2 = code1[1]
            code3 = code2.split('0')
            a10 = code3[0]
            a11 = '0'
            return a9+'单元', a10, a11
        if(code[l-1] == '' or code[l-1] == '' or code[l-1] == '' or code[l-1] == ''):
            code1 = code.split('单元')
            a9 = code1[0]
            code2 = code1[1]
            if('' in code2):
                code2 = code2.split('')
                a10 = code2[0]
                a11 = code[l-1]
                return a9+'单元', a10+'', a11
            else:
                a10 = None
                a11 = code[l-1]
                return a9+'单元', a10, a11
            
        if('+' in code and '' in code and '单元' in code):
            code1 = code.split('单元')
            a9 = code1[0]
            code2 = code1[1]
            code3 = code2.split('')
            a10 = code3[0]
            a11 = code3[1]
            return a9+'单元', a10+'', a11
        if('(' in code or '' in code):
            code1 = code.split('单元')
            a9 = code1[0]
            if('' in code):
                a10 = code1[1]
                a11 = None
                return a9+'单元', a10, a11
            else:
                a10 = None
                a11 = code1[1]
                return a9+'单元', a10, a11             
        for n in range(len(list1)-2):
            
            if(list1[n].isdigit() and list1[n+1].isdigit() and list1[n+2].isdigit()):
                try:
                    code1 = code.split('单元')
                except Exception as e:
                    print(code1)

                a9 = code1[0]
                addr = code1[1]
                for info in addr:
                    if info.isdigit():
                        info
                    else:
                        addr = addr.replace(info, ' ')
                addr = addr.split()
                for i in addr:
                    if len(i) == 3:
                        a10 = i[0]
                        a11 = i[1:3]
                        return a9+'单元', a10, a11
                    if len(i) == 4:
                        a10 = i[:2]
                        a11 = i[2:4]
                        return a9+'单元', a10, a11
                    else:
                        a10 = None
                        a11 = code1[1]
                        return a9, a10, a11 
            
        else:
            code = code.split('单元')

            a9 = code[0]
            a11 = code[1]
            a10 = None
            list2 = []

#             for i in a11:
#                 list2.append(i)
#             for k in list2:
#                 if(k >= '0' or k <= '9' and k == '-'):
#                     if(a11[0] == '-'):
#                         a11 = a11.split('-')
#                         a10 = '-'+a11[1]
#                         a11 = '-'+a11[2]
#                         return a9,a10,a11
#                     else:
#                         a11 = a11.split('-')
#                         a10 = a11[0]+'-'
#                         a11 = a11[1]
#                         return a9,a10,a11

            if code[1] == '地下室':
                a11 = None
                return a9+'单元', a10, a11

            
            return a9+'单元', a10, a11
    
    if fb_flag == 12:
        ll = len(code)
        if code.isdigit():
            if(len(code) == 1 or len(code) == 2):
                a9 = None
                a10 = None
                a11 = code
                return a9, a10, a11
            if(len(code) == 3):
                a9 = None
                a10 = code[0]
                a11 = code[1:3]
                return a9, a10, a11
            if(len(code) == 4):
                a9 = None
                a10 = code[:2]
                a11 = code[2:5]
                return a9, a10, a11
        if('' in code):
            a9 = None
            if(code[ll-1] == ''):
                a10 = code
                a11 = None
                return a9, a10, a11
            else:
                code1 = code.split('')
                a10 = code1[0]
                a11 = code1[1]
                return a9, a10+'', a11
        if('' in code):
            a9 = None
            if(code[ll-1] == ''):
                a10 = code
                a11 = None
                return a9, a10, a11
            else:
                code1 = code.split('')
                a10 = code1[0]
                a11 = code1[1]
                return a9, a10+'', a11
        if(code[ll-1] == ''):
            return None, None, None
        else:
            a9 = None,
            a10 = None
            a11 = code
            return a9, a10, a11
    return None, None, None
        
def get_oracle_conn():
    #  os.environ['NLS_LANG'] = 'SIMPLIFIED CHINESE_CHINA.ZHS16GBK'
    os.environ['NLS_LANG'] = 'SIMPLIFIED CHINESE_CHINA.UTF8'
    db = cx_Oracle.connect('gistar','res#pwd','192.168.205.58:1521/lndb',nencoding='utf8')   
    return db

def get_mysql_conn():
    os.environ['NLS_LANG'] = 'SIMPLIFIED CHINESE_CHINA.UTF8'
    db = pymysql.connect("192.168.207.122","root","linux123","five_address")  
    return db

def match(address,addr):
    address = address.strip()
    res = address.find(addr)
    if res > -1:
        end = res + len(addr)
        return str(res) + ',' + str(end)
    return None

def get_match1(address):
    # 1-1-1
    # 只由数字和 - 组成
    # 只在最后

    # 有些地址有换行 \r
    address = address.strip()
    num_dash = 0
    res = []
    for tmp in address:
        # if tmp.isdigit() or tmp.isalpha():
        if tmp.isdigit():
            res.append(tmp)
        elif tmp == '-':           
            num_dash += 1
            res.append(tmp)
        else:
            num_dash = 0
            res = []
    length = len(address)
    front = length - len(res)
    if num_dash == 2:
        return address[:front],address[front:],1
    elif num_dash > 2:
        return address[:front],address[front:],2
    elif num_dash == 1:
        return address[:front],address[front:],3
    else:
        return address,'',0

def get_match11(address):

    # 有些地址有换行 \r
    address = address.strip()
    flag = 0
    index = []
    for i,tmp in enumerate(address):
        if tmp.isdigit() or tmp == '-':
            if flag == 0:
                tmp_index = i
                flag = 1
        else:
            if flag == 1:
                index.append([tmp_index,i])
            flag = 0

    if len(index) == 0:
        return address,'',0

    # get_match1 之后不会有数字结尾
    # 防止 get_match1 出错 考虑数字结尾的情况
    if flag:
        # index.append([tmp_index,len(address)])
        return address[:tmp_index],address[tmp_index:],12

    start = index[-1][0]
    end = index[-1][1]
    if '' in address[end:] or '' in address[end:]:
        return address,'',0
    else:
        return address[:start],address[start:],12
    
def get_match4(address):

    # 有些地址有换行 \r
    address = address.strip()
    match = '单元'
    res = re.search(match,address)
    if not res:
        return address,'',0
    start,end = res.span()
    # 判断单元前的数字
    # 1.没有
    # 2.有若干个
    digit = {'': 1, '': 2, '': 3, '': 4,         '': 5, '': 6, '': 7, '': 8, '': 9}
    index = start
    for i in range(start-1,-1,-1):
        if address[i].isdigit() or address[i] == '-'             or address[i] in digit.keys():
            index = i
        else:
            break
    return address[:index],address[index:],4


def match91011(address):
    address = address.strip()
    res = get_match4(address)
    if res[2] > 0:
        return res
    else:
        res = get_match1(address)
        if res[2] > 0:
            return res
        else:
            res = get_match11(address)
            return res
        
def write_data():
    command = 'select id,code ,regionid,street_name,road_name,\
        number_plate_name,unit,layer,room, a9, a10, a11 from data_2 where code is not null'

#     command = 'select id,code from ADDRESSINFO_SPLIT \
#             where code is not null'

#     command = "select id,code,street_name,road_name,\
#          number_plate_name,unit,layer,\
#              room from ADDRESSINFO_SPLIT \
#                  where premises_network is \
#                  null and code like '%]%' "
    db = get_mysql_conn()

    cur = db.cursor()
    # command = "select id,code from ADDRESSINFO_SPLIT"
    cur.execute(command) 
    while 1:
        data_all = cur.fetchone()
        if not data_all:
            break
        _,code,flag=match91011(data_all[1])
        fun(code, int(flag))

def set_sql_command():
    a1 = "select id,code,regionid,street_name,road_name,number_plate_name,unit,\
          layer,room from addressinfo_split where code is not null"
    return a1


def write_mysql(data):
#     print(data)
    print('writing ',time.asctime( time.localtime(time.time()) ))
    db = pymysql.connect("192.168.207.122","root","linux123","five_address" )
#     db = get_oracle_db()
    cursor = db.cursor()
    sql1 = "insert into data_2 \
        (id,code,regionid,street_name,road_name,\
        number_plate_name,unit,layer,room,a9,a10,a11) \
                    values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
#     sql1 = "insert into addressinfo_crf2 values(:id,\
#         :code,:regionid,:street_name,:road_name,\
#         :number_plate_name,:unit,:layer,:room,\
#         :front_addr,:back_addr,:fb_flag,:a4,:a5,:a6)"
    cursor.executemany(sql1,data)
    db.commit()
#     cursor.close()
    db.close()
    print(time.asctime( time.localtime(time.time()) ))
    
def main():
    sql = set_sql_command()
    db = get_oracle_conn()
    cur = db.cursor()
    cur.execute(sql)
    a10w = 0
    data_all = []
    while 1:
        data = cur.fetchone()
        if not data:
            break
        data = list(data)
#         print(len(data))
        tmp = match91011(data[1])
        tmp1 = fun(tmp[1], tmp[2])
#         print(len(tmp1))
#         print(tmp1)
        if tmp1[0] == (None,):
            list_1 = list(tmp1)
            list_1[0] = None
            tmp1 = tuple(list_1)
        else:
            tmp1

#         global x
#         if type(tmp1[0]) == tuple:
#             x = tmp1[0][0]
#             x = str(x)
#         else:
#             x = 'None'
#         l_1 = list(tmp1)
#         l_1[0] = x
#         tmp1 = tuple(l_1)
        
#         print(tmp1[0], type(tmp1[0]))
#         print(tmp1,  type(tmp1[0]))
#         print(tmp1)
#         print(len(tmp1))
        for i in tmp1:
            data.append(i)
#         print(len(data))
        data_all.append(data)
        a10w += 1
        if a10w == 10000:
            a10w = 0
            write_mysql(data_all)
            data_all = []
            
#     print(data_all)
    write_mysql(data_all)        

 

posted @ 2020-06-30 14:03  半日闲1  阅读(233)  评论(0)    收藏  举报