SAS 有用的10个函数

*****(0) Simulate two datasets for demonstration********************;
*****(0.1) Simulate a dataset for two SSN entries*******************;
data ssn_data;
    do i=1 to 1000;
        ssn1=ceil((ranuni(1234)*1E9));
        ssn2=ssn1;
        if ssn1 le ceil((ranuni(1000)*1E9))then call missing(ssn1);
        if ssn2 le ceil((rannor(2000)*1E9))then call missing(ssn2);
        drop i;
        output;
    end;
    format ssn1 ssn2 ssn11.;
run;

*****(0.2) Simulate a dataset for hospital visits ******************;
data hospital_data;
    input id visit treat1 treat2 treat3 cost;
    format cost dollar8.2;
cards;
1 0 0 0 520
2 1 0 0 320
3 0 1 0 650
1 1 0 0 560
2 1 0 0 360
1 1 0 0 500
2 0 0 1 350
;
run;

****(1) MONOTONIC: specify row numbers******************************;
proc sql noprint;
    create table ssn_data1 as
    select *
    from ssn_data
    where monotonic() between 501 and 800;
quit;

****(2) COUNT/N/NMISS: find total and missing values****************;
proc sql;
    select count(*) as n 'Total number of the observations',
    count(ssn1) as m_ssn1 'Number of the non-missing values for ssn1',
    nmiss(ssn1) as nm_ssn1 'Number of the missing values for ssn1',
    n(ssn2) as m_ssn1 'Number of the non-missing values for ssn2',
    nmiss(ssn2) as nm_ssn2 'Number of the missing values for ssn2'
    from ssn_data;
quit;

****(3) COALESCE: combine values among columns**********************;
proc sql;
    select monotonic() as obs, coalesce(ssn1, ssn2) as ssn format = ssn11.
    from ssn_data;
quit;

****(4) MISSING: return Boolean for missing value*******************;
proc sql;
    select monotonic() as obs,
    (
    case sum(missing(ssn1), missing(ssn2))
        when 0 then 'No missing'
        when 1 then 'One missing value'
        else'Both missing values'
    end
    ) as status 'Missing status'
    from ssn_data;
quit;
****(5)SPEDIS/SOUNDEX: fuzz matching*********************************;
****(5.1)SPEDIS: find spelling mistakes******************************;
proc sql;
    select a.ssn1 as x, monotonic(a.ssn1) as x_obs,
    b.ssn1 as y, monotonic(b.ssn1) as y_obs
    from ssn_data as a, ssn_data as b
    where(x gt y) and (spedis(put(x,z11.),put(y,z11.)) le 15);
quit;
****(5.2)SOUNDEX: find phonic similarity*****************************;
proc sql;
    select a.name as name1, b.name as name2
    from sashelp.class as a, sashelp.class as b
    where soundex(name1)=soundex(name2) and(name1 gt name2);
quit;
****(6)RANUNI: simple random sampling********************************;
proc sql outobs=30;
    select *
    from ssn_data
    order by ranuni(1234);
quit;
****(7)MAX: find the maximum value for each column******************;
proc sql;
    select id, max(treat1) as effect1 'Effect after Treatment 1',
    max(treat2) as effect2 'Effect after Treatment 2',
    max(treat3) as effect3 'Effect after Treatment 3'
    from hospital_data
    group by id;
quit;
proc sql;
    select id, ifc(max(treat1)=1,'Yes','No') as overall_effect
    length=3 'Any effect after treatment 1',
    sum(cost) as sum_cost format=dollar8.2 'Total cost',
    ifn(calculated sum_cost ge 1000,calculated sum_cost*0.85,calculated sum_cost*1) as discounted_cost
    format=dollar8.2 'Total cost after discount if any'
    from hospital_data
    group by id;
quit;

proc sql;
    select count(unique(make)) as u_make 'Number of the car makers',
    count(unique(origin)) as u_origin 'Number of the car origins',
    count(unique(type)) as u_type 'Number of the car types'
    from sashelp.cars;
quit;

****(10)PUT: create an filter by user-defined format***************;
proc format;
    value range
    40000-high='High'
    26000-< 40000='Medium'
    other ='Low';
run;
proc sql;
    select model,make,msrp,msrp as range 'Price Range' format=range.
    from sashelp.cars
    where put(msrp, range.) in('High', 'Medium');
quit;


posted @ 2019-12-26 17:39  虾米WD  阅读(773)  评论(0)    收藏  举报