需要对tmp_jf_user_02数据集中每个city_code下的observation做随机抽样,样本数见samplesize文件;
最终将每个city_code的抽样结果汇总生成random_user文件,包含serialnumber和city_code两个variable.
data samplesize;/*导入samplesize文件*/
input city_code $ sample;
cards;
CZ 820
DG 13762
FS 8636
GZ 29472
;
run;
data random_user;/*预生成random_user文件*/
informat serialnumber $12.;/*指定变量长度和格式*/
informat city_code $2.;
format serialnumber $12.;
format serialnumber $2.;
input serialnumber $ city_code $;
run;
%macro sample(city);
proc means data=samplesize(where=(city_code="&city")) noprint;
var sample;
output out=n_&city(drop=_type_ _freq_) mean=sample;/*通过proc means获得city_code所对应的samplesize(变量名为:sample)这一步的处理比较愚蠢@@*/
run;
data _null_;
set n_&city;
call symput("sample",sample);/*通过call symput对宏变量&sample赋值*/
run;
proc surveyselect data=tmp_jf_user_02(where=(city_code="&city"))/*选择city_code对应的obs*/
method=srs /*使用simple random sampling法*/
out=random_&city /*输出抽样结果数据集*/
n=&sample/*通过被赋值的宏变量设定抽样样本大小*/;
run;
data random_user;
set
random_user(keep=serialnumber city_code)
random_&city(keep=serialnumber city_code);/*一个类似循环的处理,将每次执行的结果都set到random_user文件中*/
run;
%mend;
%sample(CZ);
%sample(DG);
%sample(FS);
%sample(GZ);