SAS 有用的10个函数
*****(0) Simulate two datasets for demonstration********************;
*****(0.1) Simulate a dataset for two SSN entries*******************;
data ssn_data;
do i=1 to 1000;
ssn1=ceil((ranuni(1234)*1E9));
ssn2=ssn1;
if ssn1 le ceil((ranuni(1000)*1E9))then call missing(ssn1);
if ssn2 le ceil((rannor(2000)*1E9))then call missing(ssn2);
drop i;
output;
end;
format ssn1 ssn2 ssn11.;
run;
*****(0.2) Simulate a dataset for hospital visits ******************;
data hospital_data;
input id visit treat1 treat2 treat3 cost;
format cost dollar8.2;
cards;
1 0 0 0 520
2 1 0 0 320
3 0 1 0 650
1 1 0 0 560
2 1 0 0 360
1 1 0 0 500
2 0 0 1 350
;
run;
****(1) MONOTONIC: specify row numbers******************************;
proc sql noprint;
create table ssn_data1 as
select *
from ssn_data
where monotonic() between 501 and 800;
quit;
****(2) COUNT/N/NMISS: find total and missing values****************;
proc sql;
select count(*) as n 'Total number of the observations',
count(ssn1) as m_ssn1 'Number of the non-missing values for ssn1',
nmiss(ssn1) as nm_ssn1 'Number of the missing values for ssn1',
n(ssn2) as m_ssn1 'Number of the non-missing values for ssn2',
nmiss(ssn2) as nm_ssn2 'Number of the missing values for ssn2'
from ssn_data;
quit;
****(3) COALESCE: combine values among columns**********************;
proc sql;
select monotonic() as obs, coalesce(ssn1, ssn2) as ssn format = ssn11.
from ssn_data;
quit;
****(4) MISSING: return Boolean for missing value*******************;
proc sql;
select monotonic() as obs,
(
case sum(missing(ssn1), missing(ssn2))
when 0 then 'No missing'
when 1 then 'One missing value'
else'Both missing values'
end
) as status 'Missing status'
from ssn_data;
quit;
****(5)SPEDIS/SOUNDEX: fuzz matching*********************************;
****(5.1)SPEDIS: find spelling mistakes******************************;
proc sql;
select a.ssn1 as x, monotonic(a.ssn1) as x_obs,
b.ssn1 as y, monotonic(b.ssn1) as y_obs
from ssn_data as a, ssn_data as b
where(x gt y) and (spedis(put(x,z11.),put(y,z11.)) le 15);
quit;
****(5.2)SOUNDEX: find phonic similarity*****************************;
proc sql;
select a.name as name1, b.name as name2
from sashelp.class as a, sashelp.class as b
where soundex(name1)=soundex(name2) and(name1 gt name2);
quit;
****(6)RANUNI: simple random sampling********************************;
proc sql outobs=30;
select *
from ssn_data
order by ranuni(1234);
quit;
****(7)MAX: find the maximum value for each column******************;
proc sql;
select id, max(treat1) as effect1 'Effect after Treatment 1',
max(treat2) as effect2 'Effect after Treatment 2',
max(treat3) as effect3 'Effect after Treatment 3'
from hospital_data
group by id;
quit;
proc sql;
select id, ifc(max(treat1)=1,'Yes','No') as overall_effect
length=3 'Any effect after treatment 1',
sum(cost) as sum_cost format=dollar8.2 'Total cost',
ifn(calculated sum_cost ge 1000,calculated sum_cost*0.85,calculated sum_cost*1) as discounted_cost
format=dollar8.2 'Total cost after discount if any'
from hospital_data
group by id;
quit;
proc sql;
select count(unique(make)) as u_make 'Number of the car makers',
count(unique(origin)) as u_origin 'Number of the car origins',
count(unique(type)) as u_type 'Number of the car types'
from sashelp.cars;
quit;
****(10)PUT: create an filter by user-defined format***************;
proc format;
value range
40000-high='High'
26000-< 40000='Medium'
other ='Low';
run;
proc sql;
select model,make,msrp,msrp as range 'Price Range' format=range.
from sashelp.cars
where put(msrp, range.) in('High', 'Medium');
quit;

浙公网安备 33010602011771号