Home Projects Resources About

SAS

Import Data

PROC IMPORT DATAFILE=('my_file.csv') DBMS=CSV OUT=my_data;
GETNAMES=YES;
RUN;

Initial Look at Data

PROC PRINT DATA=my_data; RUN;

/* View variable names and types */
ODS SELECT VARIABLES ;
PROC CONTENTS DATA=my_data;
RUN;
ODS SELECT DEFAULT;

/* Display a detailed statistical summary of a particular variable, grouped by a category variable */
PROC UNIVARIATE DATA=my_data;
CLASS myCol1;
VAR myCol2;
RUN;

/* Display a brief statistical summary of a subset of stats, for a particular variable, grouped by a category variable */
PROC MEANS DATA=my_data MIN Q1 MEDIAN Q3 MAX MEAN STD VAR RANGE QRANGE SKEWNESS;
CLASS myCol1;
VAR myCol2;
RUN;

/* Display moments for a particular variable */
ODS SELECT UNIVARIATE.CM. MOMENTS ;
PROC UNIVARIATE DATA=my_data;
FREQ myCol1;
VAR myCol2;
RUN;
ODS SELECT OFF;

/* View correlation between numerical variables */
PROC CORR DATA=my_data;
VAR myCol1 myCol2 myCol3;
RUN;

Clean and Transform Data

DATA my_subset;
SET my_data;
WHERE myCol1 = 'This';
RUN;

/* create a data set excluding a subset of data */ DATA my_subset;
SET my_data;
IF myCol1 NE 'This';
RUN;

PROC SORT DATA=my_data;
BY mycol1;
RUN;

Visualise Data

/* Combined histogram and density plot */
PROC SGPLOT DATA=my_data;
HISTOGRAM myCol1;
DENSITY myCol1;
RUN;

/* box plot per category */
PROC SGPLOT DATA=my_data;
VBOX myCol1 / GROUP=myCol2;
TITLE 'My title here';
LABEL 'My axis label here';
RUN;

/* scatter plot where hue (colour) is a 3rd NUMERICAL variable */
PROC SGPLOT DATA=my_data;
SCATTER X=myCol1 Y=myCol2 / COLORRESPONSE =myCol3 MARKERATTRS=(SYMBOL=CIRCLEFILLED SIZE=14);
RUN;

/* scatter plot where hue (colour) is a 3rd CATEGORICAL variable */
PROC SGPLOT DATA=my_data;
SCATTER X=myCol1 Y=myCol2 / GROUP =myCol3;
RUN;

/* bar plot - frequency count */
PROC SGPLOT DATA=my_data;
VBAR myCol1 / FREQ=myCol2;
YAXIS GRID;
RUN;

/* bar plot - Y axis is a particular numerical variable */
PROC SGPLOT DATA=my_data;
VBAR myCol1 / RESPONSE=myCol2;
YAXIS GRID;
TITLE 'My title here';
RUN;

/* Stacked bar plot */
PROC SGPLOT DATA=my_data;
VBAR myCol1 / GROUP =myCol2;
YAXIS GRID;
TITLE 'My title here';
RUN;

/* Horizontal bar plot */
PROC SGPLOT DATA=my_data;
HBAR myCol1 / FREQ=myCol2;
YAXIS GRID;
RUN;

/* Dot plot */
ODS SELECT FREQPLOT;
PROC FREQ DATA=my_data ORDER=FREQ;
TABLES myCol1 / PLOTS=FREQPLOT (TYPE= DOTPLOT );
WEIGHT=myCol2;
RUN;
ODS SELECT DEFAULT;


Publish Analysis

ODS RTF FILE = 'H:\Documents\...';
/* some SAS code here */
ODS RTF CLOSE;

ODS PDF FILE = 'H:\Documents\...';
/* some SAS code here */
ODS PDF CLOSE;