import datetime
import json
import os
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy.sql import text
# 数据库连接配置,请根据你的实际情况修改
db_config = {
'host': 'your_database_host',
'user': 'your_database_user',
'password': 'your_database_password',
'database': 'your_database_name'
}
# 使用 SQLAlchemy 创建数据库连接,并获取 Connection 对象
engine = create_engine (
f"mysql+mysqlconnector://{db_config['user']}:{db_config['password']}@{db_config['host']}/{db_config['database']}")
connection = engine.connect ()
# 查询onboard_companies表中status等于1的数据
companies_query = text ("SELECT id,ap_cid,company_name FROM onboard_companies WHERE status = 1")
# 尝试读取数据到DataFrame
companies_df = pd.read_sql (companies_query, connection)
# print(companies_df)
# 查询labels表中status等于2的数据
labels_query = text ("SELECT id,name FROM labels WHERE status = 2 AND id IN (4,9)")
# 尝试读取数据到DataFrame
labels_df = pd.read_sql (labels_query, connection)
# 查询label_fields表中与labels表关联的数据
label_fields_query = text ("""
SELECT lf.id, lf.label_id, lf.name, lf.type, lf.field_type,lf.is_required, lf.options
FROM label_fields lf
INNER JOIN labels l ON lf.label_id = l.id
WHERE l.status = 2 AND l.id IN (4,9) AND lf.status = 1 AND lf.type != 7
""")
# 尝试读取数据到DataFrame
label_fields_df = pd.read_sql (label_fields_query, connection)
# 关闭数据库连接
connection.close ()
# 转换type和is_required的值
type_mapping = {
1: "Single-select dropdown list (Self-defined)",
2: "Single-select dropdown list (Company ID)",
3: "Multi-select dropdown list (Self-defined)",
4: "Multi-select dropdown list (Company ID)",
5: "Multi-select dropdown list (Trade Code)",
6: "Calendar",
8: "Free text",
}
field_type_mapping = {
1: "dropdown",
2: "checkbox",
3: "textarea",
4: "date",
5: "file",
6: "multidropdow",
}
is_required_mapping = {
0: "可选",
1: "必填",
}
# 处理options列
def process_options(options):
if pd.notna (options):
options_list = json.loads (options)
options_str = ",".join (options_list)
return options_str
return None
label_fields_df['type'] = label_fields_df['type'].map (type_mapping)
label_fields_df['field_type'] = label_fields_df['field_type'].map (field_type_mapping)
label_fields_df['is_required'] = label_fields_df['is_required'].map (is_required_mapping)
label_fields_df['options'] = label_fields_df['options'].apply (process_options)
# 合并数据
merged_df = pd.merge (labels_df, label_fields_df, left_on='id', right_on='label_id')
# 选择需要的列
result_df = merged_df[['id_x', 'name_x', 'id_y', 'name_y', 'type', 'field_type', 'is_required', 'options']]
# print(result_df)
# 创建一个新的DataFrame来存储组合后的结果
combined_data = pd.DataFrame ()
# 使用嵌套的for循环组合数据
for _, company_row in companies_df.iterrows ():
for _, result_row in result_df.iterrows ():
# 创建一个新的行,包含公司数据和结果数据
new_row = {
'company_id': company_row['id'],
'company_ap_cid': company_row['ap_cid'],
'company_name': company_row['company_name'],
'result_id_x': result_row['id_x'],
'result_name_x': result_row['name_x'],
'result_id_y': result_row['id_y'],
'result_name_y': result_row['name_y'],
'result_type': result_row['type'],
'result_field_type': result_row['field_type'],
'result_is_required': result_row['is_required'],
'result_options': result_row['options'],
'input_options': ''
}
# 将新行添加到组合后的数据框中
combined_data = pd.concat ([combined_data, pd.DataFrame ([new_row])], ignore_index=True)
# 打印结果
# print(combined_data)
combined_data.columns = ['Company Id', 'Company AP ID', 'Company Name', 'Label Id', 'Label Name', 'Label Field Id',
'Label Field Name', 'Label Field Type', 'Label Field Val Type', 'is Required', 'Optional options', 'Input options']
# print(combined_data)
# 获取当前日期和时间
current_datetime = datetime.datetime.now ()
# 把日期和事件格式化为字符串
formatted_datetime = current_datetime.strftime ("%Y%m%d_%H%M%S")
# 在当前文件夹下的 "outFiles" 文件夹中保存 Excel 文件
output_folder = "outFiles"
# 将格式化的日期时间与所需的文件名连接起来
output_file_name = f"outPutLabel_{formatted_datetime}.xlsx"
# 如果 "outFiles" 文件夹不存在,则创建它
if not os.path.exists(output_folder):
os.makedirs(output_folder)
# 拼接输出文件的完整路径
output_file_path = os.path.join(output_folder, output_file_name)
# 将数据框架写入 Excel 文件
combined_data.to_excel (output_file_path, index=False)
print(f"Excel 文件已保存到 {output_file_path}")