使用python 将 SQL脚本中的相关表 提取出来:
使用python 将 SQL脚本中的相关表 提取出来:
#!/usr/bin/env python # coding=utf-8 import datetime import re from requests.auth import HTTPBasicAuth import pymysql import pyhive import sqlalchemy import pandas pymysql.install_as_MySQLdb() from sqlalchemy.engine import create_engine from sqlalchemy.schema import * import pandas as pd import requests from requests.packages.urllib3.exceptions import InsecureRequestWarning requests.packages.urllib3.disable_warnings(InsecureRequestWarning) """ 将 sql_text 这段SQL 脚本中所使用到表提取出来 """ def extract_tables(sql_str): # pattern = r'(?:(?<=from|join)\s+)(?:\w+\.\w+|\w+)' pattern = r'(?:(?<=from|join)\s+)(?:\w+\.\w+\.\w+|\w+\.\w+)' # 经过测试这个比较完美 list_tables=list(set(re.findall(pattern, str(sql_str), re.I))) return list_tables if __name__ == '__main__': pattern = r'(?:(?<=from|join)\s+)(?:\w+\.\w+|\w+)' myChannel_sit_Name="" myChannel_sit_pwd="" engine = create_engine('mysql+pymysql://username:password@localhost/database') sql = """select id,title,sql_text from tableName """ df = pd.read_sql(sql, myChannel_sit_engine) print(df.loc[:,["id","sql_text"]]) dict_={} for row_index, row in df.loc[:,["id","sql_text"]].iterrows(): print("*"*10) print(row_index) sql_text=str(row[1]).replace("\\r\\n"," ").replace("\\t","") table_list=extract_tables(sql_text) dict_[row[0]]=table_list print(dict_) inser_tmp=" INSERT INTO test_table_001 (data_api_id, extract_tables) VALUES " for k,v in dict_.items(): inser_tmp+="(\"{}\",\"{}\"), ".format(k,str(v).replace(" ",'')) inser_str=inser_tmp[:len(inser_tmp) - 2] + ";" print(inser_str) conn=conn=myChannel_sit_engine.connect() conn.execute("DELETE FROM db_apple_bbi_sit.test_table_001 ;") conn.execute(inser_str)