数据处理
% Load xlsx子表数据至array
input_file='Returns_short_interest_data.xlsx';
input_sheet='GW variables';
Rfree_lag=xlsread(input_file,input_sheet,'k1225:k1728');
时间序列数据
%% 时间序列数据
userpath('E:\workfile\mobile and PC\data\index');
filename = 'A_SHall.csv';
SHa = readtable(filename);
SHa.ret = [nan;100*diff(log(SHa.close))];
SHa.ret_l1 = [nan;SHa.ret(1:end-1)]; % 滞后一阶
SHa_sample = SHa(SHa.date(:)>='2000-01-01'& SHa.date(:) <='2020-12-31',:);
X(:,[4 11])=[]; % 删除指定列
% 合并
df = join(Mdata,RV,'key','ym'); % 注意右表的键变量必须包含左表的键变量中的所有值
% 描述性统计
stats_GW=[mean(GW)' median(GW)' prctile(GW,1)' prctile(GW,99)' std(GW)'];
disp('Predictor variables, summary stats');
disp('Mean, median, 1st percentile, 99th percentile, std dev');
disp(stats_GW);
% 相关性
rho_GW=nan(size(GW,2));
for i=1:size(GW,2);
rho_GW(i)=corr(GW(2:end,i),GW(1:end-1,i));
end;
for i=1:size(GW,2);
for j = i:size(GW,2)
rho_GW(j,i)=corr(GW(:,i),GW(:,j));
end
end
GW_standardize=zscore(GW_adjust); % 标准化
% Perform robust trend test for log(EWSI) 趋势稳健性检验
[z_lambda_md]=Compute_z_lambda_md(log_EWSI,1);
z_lambda_md_cv=[1.645 1.96 2.58];
disp('Harvey et al (2007) z-statistics (10%, 5%, 1%)');
disp(z_lambda_md);
disp('Critical values');
disp(z_lambda_md_cv);
% Compute log(EWSI) deviation from linear trend
X_linear=[ones(length(log_EWSI),1) (1:1:length(log_EWSI))']; % log(EWSI)趋势线的估计
results_linear=ols(log_EWSI,X_linear);
SII=zscore(results_linear.resid); % 提取残差
% 日期处理 yyyy-mm-dd 转为 数值型 yyyymm
USREC.ym = yyyymmdd(USREC.DATE);
USREC.ym = floor(USREC.ym./100);
% 日期处理 数值型 yyyymm 转为 yyyy
da_sample.yyyy = floor(yyyymm./100); % 按年份
da_sample.mm = mod(yyyymm(:,1), 100); % 按年份
% weight 行权重(每行sum=1)
weight = bsxfun(@rdivide,weight,sum(weight,2));
矩阵运算
https://blog.csdn.net/fen_0108/article/details/140000898
面板数据
% 面板数据
id = unique(SH_A(:,1));
data_i = SH_A(SH_A.Stkcd==id(i),:);
% 如果待筛选的列为cell格式
data_i = data(strcmp(data.Nnindcd(:), 'A'),:);
% 输出array至xlsx Write summary statistics et al to Excel file
output_file='Returns_short_interest_results.xlsx';
output_sheet='Predictor correlations';
xlwrite(output_file,predictor_correlation,output_sheet,'b2');
% array转为table(添加行名和列名)
table = round(table,4);
table = array2table(table,'VariableNames',xtable.Properties.VariableNames,...
'RowNames',{'mean','median','min','max','std','skew','kurt'}); % xtable为table
% 输出table至xlsx
output_file='name.xlsx';
writetable(table,output_file);
循环tips
输出进度 i
sprintf('%d--%d--%d',rollwin(i),lambda(p),s)

浙公网安备 33010602011771号