数值计算_虚拟变量矩阵

import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

dataf = pd.read_excel(r'C:\Users\Admin\Desktop\统计资料\data使用中\16个行业分类\程序用格式\\3月11程序用数据表.xlsx',sheet_name = 0)
dataf

hangyefenlie = dataf.iloc[:,-15:]
hangyefenlie


zhibiao_data = dataf.iloc[:,18:-15]
zhibiao_data

zhuanzhishuju = np.matrix(zhibiao_data).T

fen_hangyehe = np.dot(zhuanzhishuju,hangyefenlie)
fen_hangyehe


hangye_16zhibiao = pd.DataFrame(fen_hangyehe,columns=dataf.columns[-15:])
hangye_16zhibiao


hangye_16zhibiao.T.describe()

hangye_16zhibiao['总和'] = 0.0
hangye_16zhibiao


hangye_16zhibiao['总和'] = hangye_16zhibiao.sum(axis=1)
hangye_16zhibiao



names = []
for i in list(hangye_16zhibiao.columns):
    name = i + '占比'
    names.append(name)
names

hangye_16zhibiao_1 = pd.concat([hangye_16zhibiao,pd.DataFrame([],columns=names)])


for i in range(57):
    for j in range(16):
        print(i,j)
        hangye_16zhibiao_1.iloc[i,j + 16] = hangye_16zhibiao_1.iloc[i,j]/hangye_16zhibiao_1.iloc[i,15]


hangye_16zhibiao_1.fillna(value=0,inplace=True)
hangye_16zhibiao_1.to_csv('分行业分年度汇总.csv',encoding='GBK')



year_3_zong = pd.DataFrame([],columns=hangye_16zhibiao_1.columns)
year_3_zong_19zhibiao = pd.DataFrame(np.zeros((19,32)),columns=hangye_16zhibiao_1.columns)
for i in range(19):
    year_3_zong = hangye_16zhibiao_1.iloc[i:i+3,:]
    year_3_zong_19zhibiao.iloc[i,:] = year_3_zong.sum(axis = 0)
year_3_zong_19zhibiao
for i in range(19):
    for j in range(16):
        print(i,j)
        year_3_zong_19zhibiao.iloc[i,j + 16] = year_3_zong_19zhibiao.iloc[i,j]/year_3_zong_19zhibiao.iloc[i,15]
year_3_zong_19zhibiao.fillna(value=0,inplace=True)




posted @ 2021-03-10 21:53  kuanleung  阅读(9)  评论(0)    收藏  举报  来源