python: read excel

pip install xlrd
pip install xlwt
pip install xlutils
pip install xlwings
pip install XlsxWriter
pip install openpyxl
pip install pandas
pip install pandasql

 pip install win32com

pip install pyspark

import DrawSquare
import Decorators
import xlrd
import xlwt
import xlwings as xw
import xlsxwriter
import openpyxl
import pandas as pd
import pandasql

def print_hi(name):
    # Use a breakpoint in the code line below to debug your script.
    print(f'Hi, {name}')  # Press Ctrl+F8 to toggle the breakpoint.


# Press the green button in the gutter to run the script.
if __name__ == '__main__':
    print_hi('PyCharm,geovin du study')
    #skiprows=1 跳过第1行
    xlspath = r'C:\Users\geovindu\PycharmProjects\pythonProject1\2月.xls'
    xlsxpath = r'C:\Users\geovindu\PycharmProjects\pythonProject1\2月.xlsx'

    xlspath1 = r'C:\Users\geovindu\PycharmProjects\pythonProject1\2月0.xls'
    xlsxpath1 = r'C:\Users\geovindu\PycharmProjects\pythonProject1\2月0.xlsx'

    xls = pd.read_excel(io=xlspath, sheet_name='Sheet1',index_col=(0,1)) #
    xlsx = pd.read_excel(io=xlsxpath, sheet_name='Sheet1',index_col=(0,1)) #
    print(xls.head())
    #xls.info()
    print(xlsx.head())
    #xlsx.info()
    print(xlsx.columns)

    print(xlsx.index)

    duxls = pd.read_excel(io=xlspath, sheet_name='Sheet1', names=["社保明细","1月缴纳明细(元)"])  #
    print(duxls.head())

    xls1 = pd.read_excel(io=xlspath1, sheet_name='Sheet1',index_col=(2,3),skiprows=1) #从第2列至第3列,省略第一行
    xlsx1 = pd.read_excel(io=xlsxpath1, sheet_name='Sheet1',index_col=(2,3),skiprows=1) #从第2列至第3列,省略第一行
    print(xls1.head())
    print(xlsx1.head())
    print(xls1.columns.names)
    for a in xls1.columns.names:
        print("name:",a)
    print(xlsx1.columns)
    print(xls1.index)
    print(xlsx1.index)
    xls1.fillna(value="No Data Found", inplace=True)
    xlsx1.fillna(value="No Data Found", inplace=True)
    print(xlsx1)
    print(xls1)
    for data in xlsx1.describe():
        print("d:",data)
    print(xlsx1.groupby(['社保明细',"1月缴纳明细(元)"]).sum())

    two=xls1.merge(xlsx1, how='left',on=['社保明细',"1月缴纳明细(元)"])
    print(two)

  

 

 

 

    df = pd.DataFrame()
    out = pd.concat([xlsx1,xls1])

    df = pd.concat([df, out])

    print("df:",df.iloc[:, 0:1])
    #df['Row_sum'] = df.apply(lambda x: x.sum(), axis=1)  # 按行求和,添加为新列
    df.loc['Col_sum'] = df.apply(lambda x: x.sum())  # 各列求和,添加新的行
    print(df)

    print(out)
    ds=out.groupby(['社保明细']).sum() #.apply(lambda x: x.sample(frac=0.3)).reset_index()
    print(out.groupby(['社保明细']).sum())
    for o in out:
        print(o)

    two=xls1.merge(xlsx1, how='left',on=['社保明细',"1月缴纳明细(元)"])
    print(two)
   # out.groupby(['社保明细'], as_index=False).agg(**{'Total Number': ('1月缴纳明细(元)', 'sum')})

    #out.groupby(["社保明细"])["1月缴纳明细(元)"].sum().reset_index()
    dataframe_sum =out.sum(axis=1, numeric_only=True)  #out["1月缴纳明细(元)"].sum()
    print(dataframe_sum)

    #print(out["社保明细"][out["社保明细"] == "养老"])

  

 

    path = r"C:\Users\geovindu\PycharmProjects\pythonProject1"
    os.chdir(path)  # 修改工作路径

    workbook = openpyxl.load_workbook('2月.xlsx')  # 返回一个workbook数据类型的值
    sheet = workbook.active  # 获取活动表
    print('当前活动表是:' + str(sheet))
    row = []
    column = []
    # 获取当前活动表有多少行
    for i in sheet.rows:
        row.append(list(i))  # i是元组类型,转为列表

    # 获取当前活动表有多少列
    for i in sheet.columns:
        column.append(list(i))  # i是元组类型,转为列表

    print('行数:' + str(len(row)))
    print('列数:' + str(len(column)))
    for rs in row:
        print(rs.index)

    # 按行获取值
    print('按行获取值')
    for i in sheet.iter_rows(min_row=2, max_row=3, min_col=1, max_col=2):
        for j in i:
            print(j.value)

    # 按列获取值
    print('按列获取值')
    for i in sheet.iter_cols(min_row=2, max_row=3, min_col=1, max_col=2):
        for j in i:
            print(j.value)

  

posted @ 2023-06-12 22:00  ®Geovin Du Dream Park™  阅读(36)  评论(0)    收藏  举报