python excel合并

import re
import os
import openpyxl
from collections import defaultdict
from openpyxl.utils import column_index_from_string

class ORIGIN_EXCEL:
    def __init__(self, color="", product=""):
        self.color = color
        self.product = product

def read_excel_cells2(file_path, sheet_name, start_row, end_row, column_color, column_product):
    origin_excel_datas = []
    try:
        workbook = openpyxl.load_workbook(file_path)
        sheet = workbook[sheet_name]
        
        max_row = sheet.max_row
        end_row = min(int(end_row), max_row)

        for row in range(int(start_row), int(end_row) + 1):
            cell_value_color = sheet[f'{column_color}{row}'].value
            cell_value_product = sheet[f'{column_product}{row}'].value
            data = ORIGIN_EXCEL(color=str(cell_value_color), product=str(cell_value_product))
            origin_excel_datas.append(data)
    except Exception as e:
        print(f"发生错误: {e}")
    return origin_excel_datas

def read_excel_cells3(file_path, sheet_name, start_row, end_row, column_begin, column_end):
    try:
        workbook = openpyxl.load_workbook(file_path)
        sheet = workbook[sheet_name]

        column_begin = column_index_from_string(column_begin)
        column_end = column_index_from_string(column_end)

        max_row = sheet.max_row
        max_column = sheet.max_column
        end_row = min(int(end_row), max_row)
        column_end = min(column_end, max_column)

        origin_excel_datas = []

        for row in range(int(start_row), int(end_row) + 1):
            origin_excel_datas.append([])
            for column in range(column_begin, column_end + 1):
                cell_value = sheet.cell(row=row, column=column).value
                origin_excel_datas[row - int(start_row)].append(cell_value)
        return origin_excel_datas
    except Exception as e:
        print(f"发生错误: {e}")
        return None

def write_excel_cells3(file_path, sheet_name, origin_excel_datas):
    try:
        new_workbook = openpyxl.Workbook()
        new_sheet = new_workbook.active
        new_sheet.title = sheet_name

        for r_idx, row_data in enumerate(origin_excel_datas, start=1):
            for c_idx, cell_data in enumerate(row_data, start=1):
                new_sheet.cell(row=r_idx, column=c_idx, value=cell_data)
        new_workbook.save(file_path)
    except Exception as e:
        print(f"发生错误: {e}")

#名字的校正
#例如 a."BA50/50中麻灰，BASJ40-4GZHY5-4" 改名成"BA50/50中麻灰，BASJ40-4GZHY5". b. "BA50/50中麻蓝，BBSJ40-4GZHY5"则不用更名 c. "BA50/50中麻黑，BBSJ40-4GZHY5-6D"则不用更名
#特殊处理: "贝壳米，SJ40-5FfYC8-1（等外批）" 这样的-\d+(.*)类型也需要改成"贝壳米，SJ40-5FfYC8" . 当前函数  处理不了这个情况
def validate_excel_color_info(origin_excel_datas):
    for item in origin_excel_datas:
        # 校正 color 字段
        if item.color:
            match = re.search(r'(-\d+[^-]*)(?=\)|$)', item.color)
            if match:
                item.color = item.color[:match.start()]
                
def validate_excel_color_info_dst(origin_excel_datas_dst):
    for row in origin_excel_datas_dst:
        # 校正 color 字段，假设颜色信息在每行的第一个单元格
        if len(row) > 0 and isinstance(row[0], str):
            # 使用更精确的正则表达式来匹配 "-数字" 并移除它
            # 这个模式会寻找以 "-" 开始紧接着是一个或多个数字，并且后面不是数字的位置。
            match = re.search(r'-\d+(?![\d\w])', row[0])
            if match:
                # 移除匹配到的 "-数字" 部分
                row[0] = row[0][:match.start()] + row[0][match.end():]



# 统计并更正原始数据  相同color的条目合并为一项
# 如果origin_excel_datas有相同color名字,那么 color-product1   color-product2  合并为 color - float(product1)+ float(product2). 实现statics_excel_color_info 函数
def statics_excel_color_info(origin_excel_datas):
    # 使用字典来统计相同颜色的条目和它们的数量
    color_dict = defaultdict(lambda: {"total_product": 0, "items": []})
    
    for item in origin_excel_datas:
        if item.color not in color_dict:
            color_dict[item.color]["items"].append(item)
        try:
            # 尝试将 product 转换为数值并累加到 total_product
            color_dict[item.color]["total_product"] += float(item.product)
        except ValueError:
            # 如果产品产量不是数值，则忽略该行数据
            pass
    
    # 清空原始数据列表并重新填充已汇总的数据
    origin_excel_datas.clear()
    for color, info in color_dict.items():
        # 对于每个颜色，创建一个新的 ORIGIN_EXCEL 实例，并设置其 product 为总和
        new_item = ORIGIN_EXCEL(color=color, product=str(info["total_product"]))
        origin_excel_datas.append(new_item)

#如果origin_excel_datas.color 和 obj_start_merge_row_dst.color 匹配。那么origin_excel_datas.product的值填写在origin_excel_datas_dst.color的obj_merge_column_4_usr列
#如果origin_excel_datas.color 在 obj_start_merge_row_dst.color 中没有匹配项,那么obj_start_merge_row_dst新增1行。然后 origin_excel_datas.color写在obj_start_merge_row_dst新增行的第一列。 origin_excel_datas.product的值填写在origin_excel_datas_dst.color的obj_merge_column_4_usr列    
def merge_excel_color_info(origin_excel_datas, origin_excel_datas_dst, obj_start_merge_row_dst, obj_end_merge_row_dst, obj_merge_column_4_usr):
    # 创建一个颜色到产品数量的映射
    color_to_product_map = {item.color: float(item.product) for item in origin_excel_datas}
    
    # 转换输入的行号为整数
    start_merge_row_dst = int(obj_start_merge_row_dst) - 1  # 减1是因为列表索引从0开始
    end_merge_row_dst = int(obj_end_merge_row_dst)
    merge_column_idx = column_index_from_string(obj_merge_column_4_usr) - 1  # 减1是因为列表索引从0开始
    
    updated_data = []
    matched_colors = set()
    
    for idx, row in enumerate(origin_excel_datas_dst):
        if start_merge_row_dst <= idx < end_merge_row_dst:
            color = row[0] if len(row) > 0 else None
            if color in color_to_product_map:
                # 更新产品数量（假设原始数据的产品数量在指定的列）
                if len(row) > merge_column_idx:
                    try:
                        # 检查是否为 None 或空字符串，并设置默认值
                        cell_value = row[merge_column_idx]
                        if cell_value is None or str(cell_value).strip() == '':
                            original_product = 0
                        else:
                            original_product = float(cell_value)
                        
                        row[merge_column_idx] = str(original_product + color_to_product_map[color])
                    except ValueError:
                        row[merge_column_idx] = str(color_to_product_map[color])
                else:
                    row.append(str(color_to_product_map[color]))
                matched_colors.add(color)
            updated_data.append(row)
    
    # 添加未出现在最终数据中的新颜色
    for color, product in color_to_product_map.items():
        if color not in matched_colors:
            new_row = [color] + [''] * (merge_column_idx - 1) + [str(product)]
            updated_data.append(new_row)
    
    # 更新 origin_excel_datas_dst
    origin_excel_datas_dst.clear()
    origin_excel_datas_dst.extend(updated_data)



# 文件路径和表名等用户输入
file_path = input("你要处理的原始excel名字,直接换行则使用默认名字-2025年生产品种统计.xlsx：").strip() or "2025年生产品种统计.xlsx"
sheet_name = input("你要处理的原始execl的sheet页名字:").strip() or "2023.12"
start_row = input("你要统计的原始execl的行的起始行:").strip()
end_row = input("你要统计的原始execl的行的终止行:").strip()
column_color = input("你要统计的原始execl的颜色所在的列: 例如在C列，则输入C:").strip()
column_product = input("你要统计的原始execl的产量所在的列: 例如在F列，则输入F:").strip()

file_path_dst = input("你要处理的最终excel名字,直接换行则使用默认名字-2025年生产品种统计.xlsx：").strip() or "2023年11月颜色色号台账.xls23.11.15.xlsx"
sheet_name_dst = input("你要处理的最终execl的sheet页名字:").strip() or "彩纱品种"
start_row_dst = input("最终excel行的起始行:").strip()
end_row_dst = input("最终excel行的终止行:").strip()
column_begin_dst = input("最终excel列的起始列(例如A):").strip()
end_column_dst = input("最终excel列的终止列(例如M):").strip()

obj_start_merge_row_dst = input("最终excel行的起始行:").strip()
obj_end_merge_row_dst = input("最终excel行的终止行:").strip()
obj_merge_column_4_usr =  input("最终excel列的产量所在列(例如A):").strip()

file_path_temp = "abc.xlsx"

# 加载origin info
origin_excel_datas = read_excel_cells2(file_path, sheet_name, start_row, end_row, column_color, column_product)

# 调用函数读文档 加载 dst_info
origin_excel_datas_dst = read_excel_cells3(file_path_dst, sheet_name_dst, start_row_dst, end_row_dst, column_begin_dst, end_column_dst)

# 校验并更正原始信息
validate_excel_color_info(origin_excel_datas)

# 统计并更正原始数据  相同color的条目合并为一项
statics_excel_color_info(origin_excel_datas)  

#打印
for data in origin_excel_datas:
    print(f"{data.color} --- {data.product}")

# 校验并更正最终信息
validate_excel_color_info_dst(origin_excel_datas_dst)

#合并原始数据到最终数据中
merge_excel_color_info(origin_excel_datas, origin_excel_datas_dst, obj_start_merge_row_dst, obj_end_merge_row_dst, obj_merge_column_4_usr)


# 如果文件已存在，则删除临时文件
if os.path.exists(file_path_temp):
    os.remove(file_path_temp)

# 调用函数写文档 dst info
write_excel_cells3(file_path_temp, sheet_name_dst, origin_excel_datas_dst)

import reimport osimport openpyxlfrom collections import defaultdictfrom openpyxl.utils import column_index_from_string
class ORIGIN_EXCEL: def __init__(self, color="", product=""): self.color = color self.product = product
def read_excel_cells2(file_path, sheet_name, start_row, end_row, column_color, column_product): origin_excel_datas = [] try: workbook = openpyxl.load_workbook(file_path) sheet = workbook[sheet_name]     max_row = sheet.max_row end_row = min(int(end_row), max_row)
for row in range(int(start_row), int(end_row) + 1): cell_value_color = sheet[f'{column_color}{row}'].value cell_value_product = sheet[f'{column_product}{row}'].value data = ORIGIN_EXCEL(color=str(cell_value_color), product=str(cell_value_product)) origin_excel_datas.append(data) except Exception as e: print(f"发生错误: {e}") return origin_excel_datas
def read_excel_cells3(file_path, sheet_name, start_row, end_row, column_begin, column_end): try: workbook = openpyxl.load_workbook(file_path) sheet = workbook[sheet_name]
column_begin = column_index_from_string(column_begin) column_end = column_index_from_string(column_end)
max_row = sheet.max_row max_column = sheet.max_column end_row = min(int(end_row), max_row) column_end = min(column_end, max_column)
origin_excel_datas = []
for row in range(int(start_row), int(end_row) + 1): origin_excel_datas.append([]) for column in range(column_begin, column_end + 1): cell_value = sheet.cell(row=row, column=column).value origin_excel_datas[row - int(start_row)].append(cell_value) return origin_excel_datas except Exception as e: print(f"发生错误: {e}") return None
def write_excel_cells3(file_path, sheet_name, origin_excel_datas): try: new_workbook = openpyxl.Workbook() new_sheet = new_workbook.active new_sheet.title = sheet_name
for r_idx, row_data in enumerate(origin_excel_datas, start=1): for c_idx, cell_data in enumerate(row_data, start=1): new_sheet.cell(row=r_idx, column=c_idx, value=cell_data) new_workbook.save(file_path) except Exception as e: print(f"发生错误: {e}")
#名字的校正#例如 a."BA50/50中麻灰，BASJ40-4GZHY5-4" 改名成"BA50/50中麻灰，BASJ40-4GZHY5". b. "BA50/50中麻蓝，BBSJ40-4GZHY5"则不用更名 c. "BA50/50中麻黑，BBSJ40-4GZHY5-6D"则不用更名#特殊处理: "贝壳米，SJ40-5FfYC8-1（等外批）" 这样的-\d+(.*)类型也需要改成"贝壳米，SJ40-5FfYC8" . 当前函数处理不了这个情况def validate_excel_color_info(origin_excel_datas): for item in origin_excel_datas: # 校正 color 字段 if item.color: match = re.search(r'(-\d+[^-]*)(?=\)|$)', item.color) if match: item.color = item.color[:match.start()]   def validate_excel_color_info_dst(origin_excel_datas_dst): for row in origin_excel_datas_dst: # 校正 color 字段，假设颜色信息在每行的第一个单元格 if len(row) > 0 and isinstance(row[0], str): # 使用更精确的正则表达式来匹配 "-数字" 并移除它 # 这个模式会寻找以 "-" 开始紧接着是一个或多个数字，并且后面不是数字的位置。 match = re.search(r'-\d+(?![\d\w])', row[0]) if match: # 移除匹配到的 "-数字" 部分 row[0] = row[0][:match.start()] + row[0][match.end():]

# 统计并更正原始数据相同color的条目合并为一项# 如果origin_excel_datas有相同color名字,那么 color-product1 color-product2 合并为 color - float(product1)+ float(product2). 实现statics_excel_color_info 函数def statics_excel_color_info(origin_excel_datas): # 使用字典来统计相同颜色的条目和它们的数量 color_dict = defaultdict(lambda: {"total_product": 0, "items": []})     for item in origin_excel_datas: if item.color not in color_dict: color_dict[item.color]["items"].append(item) try: # 尝试将 product 转换为数值并累加到 total_product color_dict[item.color]["total_product"] += float(item.product) except ValueError: # 如果产品产量不是数值，则忽略该行数据 pass     # 清空原始数据列表并重新填充已汇总的数据 origin_excel_datas.clear() for color, info in color_dict.items(): # 对于每个颜色，创建一个新的 ORIGIN_EXCEL 实例，并设置其 product 为总和 new_item = ORIGIN_EXCEL(color=color, product=str(info["total_product"])) origin_excel_datas.append(new_item)
#如果origin_excel_datas.color 和 obj_start_merge_row_dst.color 匹配。那么origin_excel_datas.product的值填写在origin_excel_datas_dst.color的obj_merge_column_4_usr列#如果origin_excel_datas.color 在 obj_start_merge_row_dst.color 中没有匹配项,那么obj_start_merge_row_dst新增1行。然后 origin_excel_datas.color写在obj_start_merge_row_dst新增行的第一列。 origin_excel_datas.product的值填写在origin_excel_datas_dst.color的obj_merge_column_4_usr列   def merge_excel_color_info(origin_excel_datas, origin_excel_datas_dst, obj_start_merge_row_dst, obj_end_merge_row_dst, obj_merge_column_4_usr): # 创建一个颜色到产品数量的映射 color_to_product_map = {item.color: float(item.product) for item in origin_excel_datas}     # 转换输入的行号为整数 start_merge_row_dst = int(obj_start_merge_row_dst) - 1 # 减1是因为列表索引从0开始 end_merge_row_dst = int(obj_end_merge_row_dst) merge_column_idx = column_index_from_string(obj_merge_column_4_usr) - 1 # 减1是因为列表索引从0开始     updated_data = [] matched_colors = set()     for idx, row in enumerate(origin_excel_datas_dst): if start_merge_row_dst <= idx < end_merge_row_dst: color = row[0] if len(row) > 0 else None if color in color_to_product_map: # 更新产品数量（假设原始数据的产品数量在指定的列） if len(row) > merge_column_idx: try: # 检查是否为 None 或空字符串，并设置默认值 cell_value = row[merge_column_idx] if cell_value is None or str(cell_value).strip() == '': original_product = 0 else: original_product = float(cell_value)     row[merge_column_idx] = str(original_product + color_to_product_map[color]) except ValueError: row[merge_column_idx] = str(color_to_product_map[color]) else: row.append(str(color_to_product_map[color])) matched_colors.add(color) updated_data.append(row)     # 添加未出现在最终数据中的新颜色 for color, product in color_to_product_map.items(): if color not in matched_colors: new_row = [color] + [''] * (merge_column_idx - 1) + [str(product)] updated_data.append(new_row)     # 更新 origin_excel_datas_dst origin_excel_datas_dst.clear() origin_excel_datas_dst.extend(updated_data)

# 文件路径和表名等用户输入file_path = input("你要处理的原始excel名字,直接换行则使用默认名字-2025年生产品种统计.xlsx：").strip() or "2025年生产品种统计.xlsx"sheet_name = input("你要处理的原始execl的sheet页名字:").strip() or "2023.12"start_row = input("你要统计的原始execl的行的起始行:").strip()end_row = input("你要统计的原始execl的行的终止行:").strip()column_color = input("你要统计的原始execl的颜色所在的列: 例如在C列，则输入C:").strip()column_product = input("你要统计的原始execl的产量所在的列: 例如在F列，则输入F:").strip()
file_path_dst = input("你要处理的最终excel名字,直接换行则使用默认名字-2025年生产品种统计.xlsx：").strip() or "2023年11月颜色色号台账.xls23.11.15.xlsx"sheet_name_dst = input("你要处理的最终execl的sheet页名字:").strip() or "彩纱品种"start_row_dst = input("最终excel行的起始行:").strip()end_row_dst = input("最终excel行的终止行:").strip()column_begin_dst = input("最终excel列的起始列(例如A):").strip()end_column_dst = input("最终excel列的终止列(例如M):").strip()
obj_start_merge_row_dst = input("最终excel行的起始行:").strip()obj_end_merge_row_dst = input("最终excel行的终止行:").strip()obj_merge_column_4_usr = input("最终excel列的产量所在列(例如A):").strip()
file_path_temp = "abc.xlsx"
# 加载origin infoorigin_excel_datas = read_excel_cells2(file_path, sheet_name, start_row, end_row, column_color, column_product)
# 调用函数读文档加载 dst_infoorigin_excel_datas_dst = read_excel_cells3(file_path_dst, sheet_name_dst, start_row_dst, end_row_dst, column_begin_dst, end_column_dst)
# 校验并更正原始信息validate_excel_color_info(origin_excel_datas)
# 统计并更正原始数据相同color的条目合并为一项statics_excel_color_info(origin_excel_datas)
#打印for data in origin_excel_datas: print(f"{data.color} --- {data.product}")
# 校验并更正最终信息validate_excel_color_info_dst(origin_excel_datas_dst)
#合并原始数据到最终数据中merge_excel_color_info(origin_excel_datas, origin_excel_datas_dst, obj_start_merge_row_dst, obj_end_merge_row_dst, obj_merge_column_4_usr)

# 如果文件已存在，则删除临时文件if os.path.exists(file_path_temp): os.remove(file_path_temp)
# 调用函数写文档 dst infowrite_excel_cells3(file_path_temp, sheet_name_dst, origin_excel_datas_dst)

posted @ 2025-04-26 20:15 哈哈不是嘎嘎阅读(42) 评论(0) 收藏举报

刷新页面返回顶部

python excel合并

公告