神通mysql模式转 mysql
import re
def extract_field_list(insert_line):
"""
从 INSERT 语句中提取字段列表(支持字段内的括号、换行等)
"""
start = insert_line.find('(')
if start == -1:
return None, None # 没有找到字段列表
balance = 0
end = -1
for i in range(start, len(insert_line)):
if insert_line[i] == '(':
balance += 1
elif insert_line[i] == ')':
balance -= 1
if balance == 0:
end = i
break
if end == -1:
return None, None # 括号不平衡
fields = insert_line[start+1:end]
rest_of_line = insert_line[end+1:]
return fields, rest_of_line
def extract_and_transform_inserts(input_file, output_file):
with open(input_file, 'r', encoding='utf-8') as f:
content = f.read()
# 匹配 TABLE INSERT 块
insert_blocks = re.findall(
r'-{20,}TABLE INSERT SQL: (.*?)-{20,}\s*(.*?)(?=(?:-{20,}TABLE DDL:)|\Z)',
content,
re.DOTALL | re.IGNORECASE
)
extracted_sql = ''
for table_name, insert_block in insert_blocks:
table_name = table_name.strip().lower()
lines = insert_block.strip().splitlines()
merged_lines = []
buffer = ''
for line in lines:
line = line.strip()
if not line:
continue
buffer += ' ' + line
if line.endswith(';'):
merged_lines.append(buffer.strip())
buffer = ''
insert_lines = []
for insert_line in merged_lines:
if insert_line.upper().startswith('INSERT'):
# .)
insert_line = re.sub(r'INSERT INTO\s+\w+\.', 'INSERT INTO ', insert_line, flags=re.IGNORECASE)
# 提取表名
table_match = re.search(r'INSERT INTO\s+(\w+)', insert_line, flags=re.IGNORECASE)
if table_match:
table_name_in_sql = table_match.group(1).lower()
insert_line = re.sub(r'INSERT INTO\s+\w+', f'INSERT INTO {table_name_in_sql}', insert_line, count=1, flags=re.IGNORECASE)
# 提取字段部分(使用平衡括号方法)
fields, rest_of_line = extract_field_list(insert_line)
if fields:
# 转换字段名为小写并加反引号
field_list = [f.strip().lower() for f in fields.split(',')]
fields_lower = ', '.join([f'`{f}`' for f in field_list])
insert_line = f'INSERT INTO {table_name_in_sql} ({fields_lower}){rest_of_line}'
insert_lines.append(insert_line)
if insert_lines:
extracted_sql += f'-- INSERTS FOR {table_name}\n'
extracted_sql += '\n'.join(insert_lines)
extracted_sql += '\n\n'
with open(output_file, 'w', encoding='utf-8') as f_out:
f_out.write(extracted_sql)
print(f"✅ 提取并转换完成,输出文件:{output_file}")
if __name__ == '__main__':
input_file = '/Users//Downloads/utf8.sql' # 你的原始SQL文件
output_file = '/Users//Downloads/output_inserts1.sql'
extract_and_transform_inserts(input_file, output_file)
import re
def parse_table_columns(create_table_sql):
"""
解析CREATE TABLE语句中的字段定义。
返回一个dict: {字段名: 字段定义}
"""
columns = {}
lines = create_table_sql.splitlines()
inside_table = False
for line in lines:
line = line.strip()
if line.upper().startswith("CREATE TABLE"):
inside_table = True
continue
if inside_table:
if line.startswith(")") or line.upper().startswith("CONSTRAINT"):
break # 结束字段解析
if line.endswith(","):
line = line[:-1]
if line:
# 使用正则提取列名和类型
match = re.match(r'(\S+)\s+(.+)', line)
if match:
col_name = match.group(1).strip().lower()
col_def = match.group(2).strip()
columns[col_name] = col_def
return columns
def convert_to_mysql(sql_text):
"""
转换SQL文本为MySQL,包含:
- 去除双引号
- 字段和表名转为小写
- 类型转换
- COMMENT转换
"""
# 1. 去除双引号
sql_text = re.sub(r'"', '', sql_text)
# 2. 类型转换
sql_text = re.sub(r'character varying\((\d+)\)', r'VARCHAR(\1)', sql_text, flags=re.IGNORECASE)
sql_text = re.sub(r'timestamp\(0\) without time zone', r'TIMESTAMP', sql_text, flags=re.IGNORECASE)
# 3. 提取所有CREATE TABLE
table_defs = {}
for match in re.finditer(r'CREATE TABLE\s+(\S+)\.(\S+)\s*\((.*?)\)\s*AUTO_INCREMENT', sql_text, re.DOTALL | re.IGNORECASE):
schema, table, columns_text = match.group(1).lower(), match.group(2).lower(), match.group(3)
table_name = table
table_defs[table_name] = parse_table_columns(f"CREATE TABLE {table_name} ({columns_text})")
# 4. COMMENT ON TABLE
def table_comment_repl(match):
schema, table, comment = match.group(1).lower(), match.group(2).lower(), match.group(3)
return f"ALTER TABLE {table} COMMENT='{comment}';"
sql_text = re.sub(
r'COMMENT ON TABLE (\S+)\.(\S+) IS \'([^\']*)\';',
table_comment_repl,
sql_text,
flags=re.IGNORECASE
)
# 5. COMMENT ON COLUMN
def column_comment_repl(match):
schema = match.group(1).lower()
table = match.group(2).lower()
column = match.group(3).lower()
comment = match.group(4)
if table in table_defs and column in table_defs[table]:
col_def = table_defs[table][column]
col_def = col_def.rstrip(',')
return f"ALTER TABLE {table} MODIFY COLUMN {column} {col_def} COMMENT '{comment}';"
else:
return f"-- TODO: 手动添加字段定义 ALTER TABLE {table} MODIFY COLUMN {column} [type] COMMENT '{comment}';"
sql_text = re.sub(
r'COMMENT ON COLUMN (\S+)\.(\S+)\.(\S+) IS \'([^\']*)\';',
column_comment_repl,
sql_text,
flags=re.IGNORECASE
)
# 6. 替换所有 CREATE TABLE 语句里的表名和字段名为小写
def lowercase_create_table(match):
schema = match.group(1).lower()
table = match.group(2).lower()
columns_text = match.group(3)
# 对字段名进行小写转换
new_columns_lines = []
for line in columns_text.splitlines():
line = line.strip()
if not line:
continue
if line.startswith("CONSTRAINT"):
new_columns_lines.append(line)
continue
parts = line.split(maxsplit=1)
if len(parts) == 2:
col_name = parts[0].lower()
col_def = parts[1]
new_columns_lines.append(f" {col_name} {col_def}")
else:
new_columns_lines.append(line)
new_columns_text = ",\n".join(new_columns_lines)
return f"CREATE TABLE {schema}.{table} (\n{new_columns_text}\n) AUTO_INCREMENT"
sql_text = re.sub(
r'CREATE TABLE\s+(\S+)\.(\S+)\s*\((.*?)\)\s*AUTO_INCREMENT',
lowercase_create_table,
sql_text,
flags=re.DOTALL | re.IGNORECASE
)
# 7. 去除BINLOG ON
sql_text = re.sub(r'\s+BINLOG\s+ON\s*;', r';', sql_text, flags=re.IGNORECASE)
return sql_text
if __name__ == '__main__':
input_file = '/Users//Downloads/utf8_create_tables.sql'
output_file = '/Users//Downloads/utf8_output_mysql.sql'
with open(input_file, 'r', encoding='utf-8') as f:
content = f.read()
converted_sql = convert_to_mysql(content)
with open(output_file, 'w', encoding='utf-8') as f_out:
f_out.write(converted_sql)
print(f"✅ 转换完成,输出文件:{output_file}")
替换:
timestamp without time zone datetime
TIMESTAMP DEFAULT (NOW())::timestamp(6) without time zone datetime
,;, ;
iconv -f GBK -t UTF-8.SQL > utf8.sql

浙公网安备 33010602011771号