import pandas as pd
import numpy as np
import openpyxl
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
def SSA(area,area_obj,industry,time,input_var,data):
time_list = sorted(list(set(list(data[time]))))
time_span = len(time_list)
industry_list =sorted(list(set(list(data[industry]))))
industry_num = len(industry_list)
area_list = sorted(list(set(list(data[area])) - set(area_obj)))
area_num = len(area_list)
data_units = data[data[area]!=area_obj[0]]
data_obj = data[data[area]==area_obj[0]]
R = pd.DataFrame(np.zeros((time_span,industry_num)),columns=industry_list)
R['年份'] = pd.DataFrame(time_list)
R['地区'] = area_obj[0]
for i in industry_list:
for j in time_list[1:]:
base_period = j-1
current_period = j
data_R_zong = []
R_zong = [0]
for i in time_list:
data_R_zong_1 = sum(data_obj.loc[data_obj[time]==i,input_var])
data_R_zong.append(data_R_zong_1)
for i in range(1,len(data_R_zong)):
R_zong_1 = (data_R_zong[i]-data_R_zong[i-1])/data_R_zong[i-1]
R_zong.append(R_zong_1)
RT = pd.DataFrame(R_zong,columns= ['总增长率'])
RT['地区'] = area_obj[0]
RT['时间'] = pd.DataFrame(time_list)
industry_list_name = industry_list + ['time','area']
r = pd.DataFrame([],columns=industry_list_name)
for i in area_list:
ri = pd.DataFrame(np.zeros((time_span,industry_num)),index = time_list,columns=industry_list)
ri['time'] = 2010
ri['area'] = 0
for j_index,j in enumerate(industry_list):
for k_index,k in enumerate(time_list[1:]):
base_period = int(k-1)
current_period = int(k)
ri['area'] = i
ri.iloc[k_index+1,-2] = k
ri.iloc[k_index+1,j_index] = (float(data_units.loc[(data_units[area] ==i) & (data_units[industry] == j) & (data_units[time] == current_period),input_var]) - float(data_units.loc[(data_units[area] ==i) & (data_units[industry] == j) & (data_units[time] == base_period),input_var])+0.000001)/(float((data_units.loc[(data_units[area] ==i) & (data_units[industry] == j) & (data_units[time] == base_period),input_var]))+0.000001)
r = r.append(ri)
rh = pd.DataFrame([],columns=['地区','时间','产业类别','增长率'])
r_p = pd.DataFrame(np.zeros((len(r),4)),index = r.index,columns=['地区','时间','产业类别','增长率'])
for i_index, i in enumerate(industry_list):
r_p['地区'] = r['area']
r_p['时间'] = r['time']
r_p['增长率'] = r.iloc[:,i_index]
r_p['产业类别'] = i
rh = rh.append(r_p)
ri_zong_pd = pd.DataFrame([],columns=['时间','地区','增长率'])
for i in area_list:
data_ri_zong=[]
ri_zong = pd.DataFrame(np.zeros((time_span,3)),index = time_list,columns=['时间','地区','增长率'])
ri_zong['地区'] = i
for j in time_list:
data_ri_zong_1 = sum(data_units.loc[(data_units[area] == i) & (data_units[time] == j) ,input_var])
data_ri_zong.append(data_ri_zong_1)
ri_zong['时间']=pd.DataFrame(time_list,index = ri_zong.index)
for k in range(1,time_span):
ri_zong_1 = (data_ri_zong[k] - data_ri_zong[k-1])/data_ri_zong[k-1]
ri_zong.loc[time_list[k],'增长率'] = ri_zong_1
ri_zong_pd = ri_zong_pd.append(ri_zong)
result = pd.DataFrame(np.zeros((len(data_units),6)),index = data_units.index,columns=['地区','年份','作业方式','N','P','D'])
result['地区'] = data_units[area]
result['年份'] = data_units[time]
result['作业方式'] = data_units[industry]
for i in area_list:
for j in time_list:
for k in industry_list:
result.loc[(result['地区'] == i) & (result['年份'] == j) & (result['作业方式'] == k),'N'] = float(data_units.loc[(data_units[area] == i) & (data_units[time] == j) & (data_units[industry] == k),input_var]) * float(RT.loc[RT['时间']==j,'总增长率'])
result.loc[(result['地区'] == i) & (result['年份'] == j) & (result['作业方式'] == k),'P'] = float(data_units.loc[(data_units[area] == i) & (data_units[time] == j) & (data_units[industry] == k),input_var]) * (float(ri_zong_pd.loc[(ri_zong_pd['地区'] == i) & (ri_zong_pd['时间'] == j),'增长率']) - float(RT.loc[RT['时间']==j,'总增长率']))
result.loc[(result['地区'] == i) & (result['年份'] == j) & (result['作业方式'] == k),'D'] = float(data_units.loc[(data_units[area] == i) & (data_units[time] == j) & (data_units[industry] == k),input_var]) * (float(rh.loc[(rh['地区'] == i)&(rh['时间'] == j)&(rh['产业类别'] == k),'增长率']) - float(ri_zong_pd.loc[(ri_zong_pd['地区'] == i) & (ri_zong_pd['时间'] == j),'增长率']))
return(result)
SSA(area = 'dmu',area_obj = [0],industry = '作业编码',time = '年份',input_var = 'TE',data = data_b)