SAS数据处理

引入数据

结构化数据

A包含B使用LIBNAME,比如文件夹包含多个SAS table,Excel文件包含多个Sheet等

*BASE是仅读SAS table的engine;
LIBNAME mylib engine=BASE "path";
...
*需要使用其他lib时;
LIBNAME mylib CLEAR;

读入excel时需要引入的选项:

OPTIONS VALIDVARNAE=V7;
LIBNAME mylib XLSX 'pathname/filename'
PROC CONTENTS DATA='filename.sheetname'

非结构化数据

【PROC IMPORT】读取的是COPY而非实时数据

PROC IMPORT DATAFILE='path/filename' DBMS=filetype OUT=outtable REPLACE;
  GUESSINGROWS=20;  *n|MAX;
RUN;

读取数据集合(excel)

PROC IMPORT DATAFILE='path/filename' DBMS=XLSX OUT=outtable REPLACE;
  sheet=sheet1;
RUN;

查看数据表属性

PROC CONTENTS DATA='filepath';
RUN

> OUT: Alphabetic list of variables and attributes

Data explore

PROC PRINT

根据VAR排序

PROC PRINT DATA=lib.table (OBS=10);
  VAR col1 col2;
RUN;

PROC MEANS

仅可对numeric列使用

PROC PRINT DATA=lib.table (OBS=10);
  VAR col3 col4;
RUN;

PROC UNIVARIATE

PROC UNIVARIATE DATA=lib.table;
    VAR col1 col2 col3
RUN;

PROC FREQ

ODS GRAPHICS ON; *
ODS NOPROCTITLE;

PROC FREQ DATA=lib.table ORDER=freq NLEVELS; *该列特殊值个数;
    WHERE col1=something;
    TABLES col2 col3 / nocum PLOTS=FREQPLOT(orient=horizontal scale=precent); *移除累计频数;
    FORMAT startdate MONNAME.; *按月分组;
RUN;

ODS PROCTITLE;
/* by month */
proc freq data=pg1.storm_summary order=freq;
	tables StartDate;
	FORMAT StartDate monname.;
run;

排序

【如果不声明新表,会直接改变输入数据】

PROC SORT DATA=lib.table OUT=lib.newtable;
    BY DESCENDING col1 col2;
RUN;

移除重复

PROC SORT DATA=lib.table OUT=lib.newtable NODUPKEY DUPOUT=removed_rows;
    BY col; *_ALL_ 完全重复;
RUN;

定义格式

FORMAT或LENGTH

posted @ 2021-10-04 14:00  潼二  阅读(227)  评论(0)    收藏  举报