神通mysql模式转 mysql

import re

def extract_field_list(insert_line):
    """
    从 INSERT 语句中提取字段列表(支持字段内的括号、换行等)
    """
    start = insert_line.find('(')
    if start == -1:
        return None, None  # 没有找到字段列表

    balance = 0
    end = -1
    for i in range(start, len(insert_line)):
        if insert_line[i] == '(':
            balance += 1
        elif insert_line[i] == ')':
            balance -= 1
            if balance == 0:
                end = i
                break

    if end == -1:
        return None, None  # 括号不平衡

    fields = insert_line[start+1:end]
    rest_of_line = insert_line[end+1:]
    return fields, rest_of_line

def extract_and_transform_inserts(input_file, output_file):
    with open(input_file, 'r', encoding='utf-8') as f:
        content = f.read()

    # 匹配 TABLE INSERT 块
    insert_blocks = re.findall(
        r'-{20,}TABLE INSERT SQL: (.*?)-{20,}\s*(.*?)(?=(?:-{20,}TABLE DDL:)|\Z)',
        content,
        re.DOTALL | re.IGNORECASE
    )

    extracted_sql = ''
    for table_name, insert_block in insert_blocks:
        table_name = table_name.strip().lower()
        lines = insert_block.strip().splitlines()

        merged_lines = []
        buffer = ''
        for line in lines:
            line = line.strip()
            if not line:
                continue
            buffer += ' ' + line
            if line.endswith(';'):
                merged_lines.append(buffer.strip())
                buffer = ''

        insert_lines = []
        for insert_line in merged_lines:
            if insert_line.upper().startswith('INSERT'):
                # .)
                insert_line = re.sub(r'INSERT INTO\s+\w+\.', 'INSERT INTO ', insert_line, flags=re.IGNORECASE)

                # 提取表名
                table_match = re.search(r'INSERT INTO\s+(\w+)', insert_line, flags=re.IGNORECASE)
                if table_match:
                    table_name_in_sql = table_match.group(1).lower()
                    insert_line = re.sub(r'INSERT INTO\s+\w+', f'INSERT INTO {table_name_in_sql}', insert_line, count=1, flags=re.IGNORECASE)

                # 提取字段部分(使用平衡括号方法)
                fields, rest_of_line = extract_field_list(insert_line)
                if fields:
                    # 转换字段名为小写并加反引号
                    field_list = [f.strip().lower() for f in fields.split(',')]
                    fields_lower = ', '.join([f'`{f}`' for f in field_list])
                    insert_line = f'INSERT INTO {table_name_in_sql} ({fields_lower}){rest_of_line}'

                insert_lines.append(insert_line)

        if insert_lines:
            extracted_sql += f'-- INSERTS FOR {table_name}\n'
            extracted_sql += '\n'.join(insert_lines)
            extracted_sql += '\n\n'

    with open(output_file, 'w', encoding='utf-8') as f_out:
        f_out.write(extracted_sql)

    print(f"✅ 提取并转换完成,输出文件:{output_file}")

if __name__ == '__main__':
    input_file = '/Users//Downloads/utf8.sql'  # 你的原始SQL文件
    output_file = '/Users//Downloads/output_inserts1.sql'
    extract_and_transform_inserts(input_file, output_file)


import re

def parse_table_columns(create_table_sql):
    """
    解析CREATE TABLE语句中的字段定义。
    返回一个dict: {字段名: 字段定义}
    """
    columns = {}
    lines = create_table_sql.splitlines()
    inside_table = False
    for line in lines:
        line = line.strip()
        if line.upper().startswith("CREATE TABLE"):
            inside_table = True
            continue
        if inside_table:
            if line.startswith(")") or line.upper().startswith("CONSTRAINT"):
                break  # 结束字段解析
            if line.endswith(","):
                line = line[:-1]
            if line:
                # 使用正则提取列名和类型
                match = re.match(r'(\S+)\s+(.+)', line)
                if match:
                    col_name = match.group(1).strip().lower()
                    col_def = match.group(2).strip()
                    columns[col_name] = col_def
    return columns

def convert_to_mysql(sql_text):
    """
    转换SQL文本为MySQL,包含:
    - 去除双引号
    - 字段和表名转为小写
    - 类型转换
    - COMMENT转换
    """
    # 1. 去除双引号
    sql_text = re.sub(r'"', '', sql_text)

    # 2. 类型转换
    sql_text = re.sub(r'character varying\((\d+)\)', r'VARCHAR(\1)', sql_text, flags=re.IGNORECASE)
    sql_text = re.sub(r'timestamp\(0\) without time zone', r'TIMESTAMP', sql_text, flags=re.IGNORECASE)

    # 3. 提取所有CREATE TABLE
    table_defs = {}
    for match in re.finditer(r'CREATE TABLE\s+(\S+)\.(\S+)\s*\((.*?)\)\s*AUTO_INCREMENT', sql_text, re.DOTALL | re.IGNORECASE):
        schema, table, columns_text = match.group(1).lower(), match.group(2).lower(), match.group(3)
        table_name = table
        table_defs[table_name] = parse_table_columns(f"CREATE TABLE {table_name} ({columns_text})")

    # 4. COMMENT ON TABLE
    def table_comment_repl(match):
        schema, table, comment = match.group(1).lower(), match.group(2).lower(), match.group(3)
        return f"ALTER TABLE {table} COMMENT='{comment}';"
    sql_text = re.sub(
        r'COMMENT ON TABLE (\S+)\.(\S+) IS \'([^\']*)\';',
        table_comment_repl,
        sql_text,
        flags=re.IGNORECASE
    )

    # 5. COMMENT ON COLUMN
    def column_comment_repl(match):
        schema = match.group(1).lower()
        table = match.group(2).lower()
        column = match.group(3).lower()
        comment = match.group(4)
        if table in table_defs and column in table_defs[table]:
            col_def = table_defs[table][column]
            col_def = col_def.rstrip(',')
            return f"ALTER TABLE {table} MODIFY COLUMN {column} {col_def} COMMENT '{comment}';"
        else:
            return f"-- TODO: 手动添加字段定义 ALTER TABLE {table} MODIFY COLUMN {column} [type] COMMENT '{comment}';"
    sql_text = re.sub(
        r'COMMENT ON COLUMN (\S+)\.(\S+)\.(\S+) IS \'([^\']*)\';',
        column_comment_repl,
        sql_text,
        flags=re.IGNORECASE
    )

    # 6. 替换所有 CREATE TABLE 语句里的表名和字段名为小写
    def lowercase_create_table(match):
        schema = match.group(1).lower()
        table = match.group(2).lower()
        columns_text = match.group(3)
        # 对字段名进行小写转换
        new_columns_lines = []
        for line in columns_text.splitlines():
            line = line.strip()
            if not line:
                continue
            if line.startswith("CONSTRAINT"):
                new_columns_lines.append(line)
                continue
            parts = line.split(maxsplit=1)
            if len(parts) == 2:
                col_name = parts[0].lower()
                col_def = parts[1]
                new_columns_lines.append(f"    {col_name} {col_def}")
            else:
                new_columns_lines.append(line)
        new_columns_text = ",\n".join(new_columns_lines)
        return f"CREATE TABLE {schema}.{table} (\n{new_columns_text}\n) AUTO_INCREMENT"

    sql_text = re.sub(
        r'CREATE TABLE\s+(\S+)\.(\S+)\s*\((.*?)\)\s*AUTO_INCREMENT',
        lowercase_create_table,
        sql_text,
        flags=re.DOTALL | re.IGNORECASE
    )

    # 7. 去除BINLOG ON
    sql_text = re.sub(r'\s+BINLOG\s+ON\s*;', r';', sql_text, flags=re.IGNORECASE)

    return sql_text

if __name__ == '__main__':
    input_file = '/Users//Downloads/utf8_create_tables.sql'
    output_file = '/Users//Downloads/utf8_output_mysql.sql'

    with open(input_file, 'r', encoding='utf-8') as f:
        content = f.read()

    converted_sql = convert_to_mysql(content)

    with open(output_file, 'w', encoding='utf-8') as f_out:
        f_out.write(converted_sql)

    print(f"✅ 转换完成,输出文件:{output_file}")

替换:

timestamp without time zone  datetime 
TIMESTAMP DEFAULT (NOW())::timestamp(6) without time zone  datetime 
,;, ;

iconv -f GBK -t UTF-8.SQL > utf8.sql

posted @ 2025-05-30 15:38  尘梦  阅读(28)  评论(0)    收藏  举报