openpyxl解析xlsx文件示例

#coding=utf-8
import openpyxl

def read_sheet(book_name, sheet_name):
    return openpyxl.load_workbook(book_name)[sheet_name]

def get_pai2times(sheet, pai_col, time_col):
    records = dict()
    for row in sheet.iter_rows(min_row=2, max_row = sheet.max_row):
        pai, time = row[pai_col].value, row[time_col].value
        if pai not in records:
            records[pai] = [time]
        else:
            records[pai].append(time)
    return records

def fetch_one(pai, ilist, olist):
    if not ilist:
        otime = olist[0]
        olist.remove(otime)
        return [pai, None, otime]
    if not olist:
        itime = ilist[0]
        ilist.remove(itime)
        return [pai, itime, None]

    itime = ilist[0]
    ilist.remove(itime)
    itime_nxt = ilist[0] if len(ilist) > 0 else None
    otime = None
    for curr_time in olist:
        # 没有下一次进场,出场时间比进场时间大就行
        if itime_nxt is None:
            if curr_time > itime:
                otime = curr_time
        # 有下一次进场,出场时间必须介于两次进场时间中间
        else:
            if curr_time >= itime and curr_time <= itime_nxt:
                otime = curr_time

        if otime is not None:
            olist.remove(otime)
            break
    return [pai, itime, otime]

def write_xlsx(xlsx_file, inr, outr):
    all_pai = set(inr.keys()) | set(outr.keys())

    new_xlsx = openpyxl.Workbook()
    sheet1 = new_xlsx.active
    sheet1.append(["车牌", "进场时间", "出场时间"])
    for pai in all_pai:
        ilist = [] if pai not in inr else inr[pai]
        ilist = sorted(ilist, key=lambda x: x.timestamp())

        olist = [] if pai not in outr else outr[pai]
        olist = sorted(olist, key=lambda x: x.timestamp())

        while len(ilist) > 0 or len(olist) > 0:
            row = fetch_one(pai=pai, ilist=ilist, olist=olist)
            sheet1.append(row)
    
    with open(xlsx_file, "wb") as f:
        new_xlsx.save(f)

if __name__ == '__main__':
    inr = get_pai2times( sheet=read_sheet(book_name='in.xlsx', sheet_name='Sheet1'), pai_col=7, time_col=8 )
    outr = get_pai2times( sheet=read_sheet(book_name='out.xlsx', sheet_name='Sheet1'), pai_col=7, time_col=8)
    write_xlsx(xlsx_file="total.xlsx", inr=inr, outr=outr)

 

posted @ 2024-04-08 10:23  rex4399  阅读(46)  评论(0)    收藏  举报