SAS数据处理
引入数据
结构化数据
A包含B使用LIBNAME,比如文件夹包含多个SAS table,Excel文件包含多个Sheet等
*BASE是仅读SAS table的engine;
LIBNAME mylib engine=BASE "path";
...
*需要使用其他lib时;
LIBNAME mylib CLEAR;
读入excel时需要引入的选项:
OPTIONS VALIDVARNAE=V7;
LIBNAME mylib XLSX 'pathname/filename'
PROC CONTENTS DATA='filename.sheetname'
非结构化数据
【PROC IMPORT】读取的是COPY而非实时数据
PROC IMPORT DATAFILE='path/filename' DBMS=filetype OUT=outtable REPLACE;
GUESSINGROWS=20; *n|MAX;
RUN;
读取数据集合(excel)
PROC IMPORT DATAFILE='path/filename' DBMS=XLSX OUT=outtable REPLACE;
sheet=sheet1;
RUN;
查看数据表属性
PROC CONTENTS DATA='filepath';
RUN
> OUT: Alphabetic list of variables and attributes
Data explore
PROC PRINT
根据VAR排序
PROC PRINT DATA=lib.table (OBS=10);
VAR col1 col2;
RUN;
PROC MEANS
仅可对numeric列使用
PROC PRINT DATA=lib.table (OBS=10);
VAR col3 col4;
RUN;
PROC UNIVARIATE
PROC UNIVARIATE DATA=lib.table;
VAR col1 col2 col3
RUN;
PROC FREQ
ODS GRAPHICS ON; *
ODS NOPROCTITLE;
PROC FREQ DATA=lib.table ORDER=freq NLEVELS; *该列特殊值个数;
WHERE col1=something;
TABLES col2 col3 / nocum PLOTS=FREQPLOT(orient=horizontal scale=precent); *移除累计频数;
FORMAT startdate MONNAME.; *按月分组;
RUN;
ODS PROCTITLE;
/* by month */
proc freq data=pg1.storm_summary order=freq;
tables StartDate;
FORMAT StartDate monname.;
run;
排序
【如果不声明新表,会直接改变输入数据】
PROC SORT DATA=lib.table OUT=lib.newtable;
BY DESCENDING col1 col2;
RUN;
移除重复
PROC SORT DATA=lib.table OUT=lib.newtable NODUPKEY DUPOUT=removed_rows;
BY col; *_ALL_ 完全重复;
RUN;
定义格式
FORMAT或LENGTH

浙公网安备 33010602011771号