SAS
Import Data
PROC IMPORT DATAFILE=('my_file.csv') DBMS=CSV OUT=my_data;
GETNAMES=YES;
RUN;
Initial Look at Data
PROC PRINT DATA=my_data; RUN;
/* View variable names and types */
ODS SELECT VARIABLES ;
PROC CONTENTS DATA=my_data;
RUN;
ODS SELECT DEFAULT;
/* Display a detailed statistical summary of a particular variable, grouped by a category variable */
PROC UNIVARIATE DATA=my_data;
CLASS myCol1;
VAR myCol2;
RUN;
/* Display a brief statistical summary of a subset of stats, for a particular variable, grouped by a category variable */
PROC MEANS DATA=my_data MIN Q1 MEDIAN Q3 MAX MEAN STD VAR RANGE QRANGE SKEWNESS;
CLASS myCol1;
VAR myCol2;
RUN;
/* Display moments for a particular variable */
ODS SELECT UNIVARIATE.CM. MOMENTS ;
PROC UNIVARIATE DATA=my_data;
FREQ myCol1;
VAR myCol2;
RUN;
ODS SELECT OFF;
/* View correlation between numerical variables */
PROC CORR DATA=my_data;
VAR myCol1 myCol2 myCol3;
RUN;
Clean and Transform Data
DATA my_subset;
SET my_data;
WHERE myCol1 = 'This';
RUN;
/* create a data set excluding a subset of data */
DATA my_subset;
SET my_data;
IF myCol1 NE 'This';
RUN;
PROC SORT DATA=my_data;
BY mycol1;
RUN;
Visualise Data
/* Combined histogram and density plot */
PROC SGPLOT DATA=my_data;
HISTOGRAM myCol1;
DENSITY myCol1;
RUN;
/* box plot per category */
PROC SGPLOT DATA=my_data;
VBOX myCol1 / GROUP=myCol2;
TITLE 'My title here';
LABEL 'My axis label here';
RUN;
/* scatter plot where hue (colour) is a 3rd NUMERICAL variable */
PROC SGPLOT DATA=my_data;
SCATTER X=myCol1 Y=myCol2 / COLORRESPONSE =myCol3 MARKERATTRS=(SYMBOL=CIRCLEFILLED SIZE=14);
RUN;
/* scatter plot where hue (colour) is a 3rd CATEGORICAL variable */
PROC SGPLOT DATA=my_data;
SCATTER X=myCol1 Y=myCol2 / GROUP =myCol3;
RUN;
/* bar plot - frequency count */
PROC SGPLOT DATA=my_data;
VBAR myCol1 / FREQ=myCol2;
YAXIS GRID;
RUN;
/* bar plot - Y axis is a particular numerical variable */
PROC SGPLOT DATA=my_data;
VBAR myCol1 / RESPONSE=myCol2;
YAXIS GRID;
TITLE 'My title here';
RUN;
/* Stacked bar plot */
PROC SGPLOT DATA=my_data;
VBAR myCol1 / GROUP =myCol2;
YAXIS GRID;
TITLE 'My title here';
RUN;
/* Horizontal bar plot */
PROC SGPLOT DATA=my_data;
HBAR myCol1 / FREQ=myCol2;
YAXIS GRID;
RUN;
/* Dot plot */
ODS SELECT FREQPLOT;
PROC FREQ DATA=my_data ORDER=FREQ;
TABLES myCol1 / PLOTS=FREQPLOT (TYPE= DOTPLOT );
WEIGHT=myCol2;
RUN;
ODS SELECT DEFAULT;
Publish Analysis
ODS RTF FILE = 'H:\Documents\...';
/* some SAS code here */
ODS RTF CLOSE;
ODS PDF FILE = 'H:\Documents\...';
/* some SAS code here */
ODS PDF CLOSE;