










Study with the several resources on Docsity
Earn points by helping other students or get them with a premium plan
Prepare for your exams
Study with the several resources on Docsity
Earn points to download
Earn points by helping other students or get them with a premium plan
Construct frequency, relative frequency, cumulative frequency, and cumulative relative frequency tables;. • Obtain standard summary statistics (eg – mode, ...
Typology: Schemes and Mind Maps
1 / 18
This page cannot be seen from the preview
Don't miss anything!











relate100obs$m_praise[relate100obs$m_praise==-1] <- NA relate100obs$m_praise[relate100obs$m_praise==-2] <- NA relate100obs$m_praise[relate100obs$m_praise==-4] <- NA relate100obs$m_praise[relate100obs$m_praise==-5] <- NA relate100obs$f_praise[relate100obs$f_praise==-1] <- NA relate100obs$f_praise[relate100obs$f_praise==-2] <- NA relate100obs$f_praise[relate100obs$f_praise==-4] <- NA relate100obs$f_praise[relate100obs$f_praise==-5] <- NA relate100obs$age[relate100obs$age==-1] <- NA relate100obs$age[relate100obs$age==-2] <- NA relate100obs$age[relate100obs$age==-4] <- NA relate100obs$age[relate100obs$age==-5] <- NA
library(Hmisc) label(relate100obs$m_praise) <- "m_praise: Mother praises R for doing well" label(relate100obs$f_praise) <- "f_praise: Father praises R for doing well" label(relate100obs$age) <- "age: Age of R (years)"
relate100obs$m_praise <- factor(relate100obs$m_praise,levels = c(0,1,2,3,4,".",".s"), labels = c("never", "rarely", "sometimes","usually","always",".",".s")) relate100obs$f_praise <- factor(relate100obs$f_praise,levels = c(0,1,2,3,4,".",".s"), labels = c("never", "rarely", "sometimes","usually","always",".",".s"))
relate100obs = subset(relate100obs, select = c("m_praise","f_praise","age") ) save(relate100obs,file="relatenew100.Rdata")
str(bplong) 'data.frame': 240 obs. of 5 variables: $ patient: int 1 1 2 2 3 3 4 4 5 5 ... $ sex : Factor w/ 2 levels "Male","Female": 1 1 1 1 1 1 1 1 1 1 ... $ agegrp : Factor w/ 3 levels "30- 4 5","46-59",..: 1 1 1 1 1 1 1 1 1 1 ... $ when : Factor w/ 2 levels "Before","After": 1 2 1 2 1 2 1 2 1 2 ... $ bp : int 143 153 163 170 153 168 153 142 146 141 ...
names(bplong) [1] "patient" "sex" "agegrp" "when" "bp"
nrow(bplong) [1] 240
1. 2 Data Set Description Commands { Package } command Example Check structure of dataframe {base} str(dataframename) str(bplong) Obtain dimensions of dataframe {base} nrow(dataframename) # observations, n ncol(dataframename) # variables, p dim(dataframename) # rows and columns nrow(bplong) Obtain variable names {base} names(dataframename) Names(bplong) Obtain variable value labels of a variable {base} levels(dataframename$variablename) levels(bplong$sex) Reorder the variables (columns) to be alphabetical {base} dataframename) <- dataframename[sort(names(dataframename))] bplong<-bplong[sort(names(bplong))] Obtain summary statistics of every variable {base} summary(dataframename) summary(Isoproterenol) Nicer looking summary stats of every variable {stargazer} stargazer(dataframename,type="text", median=TRUE) library(stargazer) stargazer(Isoproterenol,type="text", median=TRUE) What could go wrong: If the input is not a dataframe (for example, it is a tibble), you will only get a header. Solution: dataframename <- as.data.frame(dataframename) Show n and variable type for every variable {psych} print(describeFast(dataframename),short=FALSE) library(psych) print(describeFast(Isoproterenol,short=FALSE) Consider this really nice dataset summary. {summarytools} print(dfSummary(dataframename)) library(summarytools) print(dfSummary(Isoproterenol))
1.3 Listing Individual Observations { Package } command Example List first 6 observations – EVERY variable {base} head(dataframename) head(Isoproterenol) List last 6 observations – EVERY variable {base} head(dataframename) tail(Isoproterenol) List selection of observations (rows) EVERY variable (column) {base} dataframename[ROW1:ROWLAST,] ivf[ 1 : 6 ,] What could go wrong (1) must use square brackets now (2) must have comma and blank where columns would be List selection of variables (columns) EVERY observation (row) {base} dataframename[ , COLUMN1:COLUMNLAST] ivf[ , 1 : 6 ] What could go wrong (1) must use square brackets now (2) must have blank and comma where rows would be List first # observations of specific selection of variables {base} {base} dataframename[ , c(“var1”, “var2”)] ivf[ 1 : 6 , c ("matage", "gestwks", "sex")]
library(summarytools) descr(bplong$bp,transpose=T) Descriptive Statistics bplong$bp N: 240 Mean Std.Dev Min Q1 Median Q3 Max MAD IQR CV
bp 153.90 13.08 125.00 144.00 152.00 162.50 185.00 13.34 18.25 0. Table: Table continues below Skewness SE.Skewness Kurtosis N.Valid Pct.Valid
bp 0.30 0.16 - 0.45 2 40.00 100.
2. 2 One Discrete Variable { Package } command Example So you have it Quick descriptives on every variable (2 ways) {stargazer} stargazer(dataframename, type="text", median=TRUE) stargazer(dataframe="text",title="YOUR TITLE", out="TABLENAME.txt") library(stargazer) stargazer(framingham, type="text", median=TRUE) stargazer(lung_demo,type="text",title="Table 1: Descriptives of Lung Study",out="table1.txt") Frequency/Relative Frequency Table {summarytools} freq(dataframename$discretevariable) library(summarytools) freq(wws1000$race) Frequency/Relative Frequency Table Output in order of frequencies (largest first) {summarytools} freq(dataframename$discretevariable),order=”freq”) library(summarytools) freq(wws1000$race, order=freq) Brute force frequency/relative frequency table ntot <- length(discretevar) # sample size var_freq <- table(discretevar) # frequencies var_relfreq <- var_freq/ntot # rel. freqs var_cum <- cumsum(var_freq) # cum. freqs var_cumrel <- cumsum(var_relfreq) # cum rel freq # Create table using cbind() tablename <- cbind(var_freq, var_relfreq, var_cum, var_cumrel) # Label columns colnames(tablename) <- c("Freq", "Rel Freq", "Cum Freq", "Cum Rel Freq") # Display table tablename ntot <- length(los) los_freq <- table(los) los_relfreq <- los_freq/ntot los_cum <- cumsum(los_freq) los_cumrel <- cumsum(los_relfreq) # Create q1table q1table <- cbind(los_freq, los_relfreq, los_cum, los_cumrel) # Label columns colnames(q1table) <- c("Freq", "Rel Freq", "Cum Freq", "Cum Rel Freq") # Display table q1table
load(file=”wws1000.Rdata”)
wws1000<-wws1000[sort(names(wws1000))]
summary(wws1000) age ccity collgrad currexp everworked fwt Min. :21.00 Min. :0.000 Min. :0.000 Min. : 0.000 Min. :0.000 Min. :0. 1st Qu.:34.00 1st Qu.:0.000 1st Qu.:0.000 1st Qu.: 1.000 1st Qu.:1.000 1st Qu.:2. Median :37.00 Median :0.000 Median :0.000 Median : 3.000 Median :1.000 Median :4. Mean :36.28 Mean :0.297 Mean :0.241 Mean : 5.115 Mean :0.972 Mean :4. 3rd Qu.:40.00 3rd Qu.:1.000 3rd Qu.:0.000 3rd Qu.: 8.000 3rd Qu.:1.000 3rd Qu.:7. Max. :83.00 Max. :1.000 Max. :1.000 Max. :26.000 Max. :1.000 Max. :9. grade grade4 hours idcode industry kidage1 kidage Min. : 4.00 Min. :1.000 Min. : 1.0 Min. : 1 Min. : 1.000 Min. : 0.0 0 Min. : 0. 1st Qu.:12.00 1st Qu.:2.000 1st Qu.:35.0 1st Qu.:1258 1st Qu.: 6.000 1st Qu.: 8.00 1st Qu.: 5. Median :12.00 Median :2.000 Median :40.0 Median :2606 Median : 8.000 Median :10.00 Median : 7. Mean :13.12 Mean :2.533 Mean :37.4 Mean :2591 Mean : 8.089 Mean :10.35 Mean : 7. 3rd Qu.:15.00 3rd Qu.:3.000 3rd Qu.:40. 0 3rd Qu.:3931 3rd Qu.:11.000 3rd Qu.:13.00 3rd Qu.: 9. Max. :18.00 Max. :4.000 Max. :80.0 Max. :5159 Max. :12.000 Max. :21.00 Max. :14. NA's :2 NA's :2 NA's :2 NA's :9 NA's :235 NA's : kidage3 married marriedyrs metro networth nevermarried numkids Min. :0.00 Min. :0.00 Min. : 0.000 Min. :0.000 Min. :-7000.0 Min. :0.000 Min. :0. 1st Qu.:1.00 1st Qu.:0.00 1st Qu.: 0.000 1st Qu.:0.000 1st Qu.:-2774.1 1st Qu.:0.000 1 st Qu.:1. Median :3.00 Median :1.00 Median : 2.000 Median :1.000 Median : - 651.4 Median :0.000 Median :2. Mean :3.43 Mean :0.64 Mean : 3.558 Mean :0.704 Mean : 817.9 Mean :0.104 Mean :1. 3rd Qu.:5.00 3rd Qu.:1.00 3rd Qu.: 7.000 3rd Qu.:1.000 3rd Qu.: 2585.3 3rd Qu.:0.000 3rd Qu.:2. Max. :7.00 Max. :1.00 Max. :11.000 Max. :1.000 Max. :33198.1 Max. :1.000 Max. :3. NA's : occupation prevexp race south unempins union Min. : 1.000 Min. : 0.000 Min. :1.000 Min. :0.000 Min. : 0.00 Min. :0. 1st Qu.: 2.000 1st Qu.: 3.000 1st Qu.:1.000 1st Qu.:0.000 1st Qu.: 0.00 1st Qu.:0. Median : 3.0 00 Median : 5.000 Median :1.000 Median :0.000 Median : 0.00 Median :0. Mean : 4.593 Mean : 6.031 Mean :1.275 Mean :0.422 Mean : 30.12 Mean :0. 3rd Qu.: 6.000 3rd Qu.: 9.000 3rd Qu.:2.000 3rd Qu.:1.000 3rd Qu.: 0.00 3rd Qu.:0. Max. :13.000 Max. :25.000 Max. :3.000 Max. :1.00 0 Max. :298.00 Max. :1. NA's :6 NA's :6 NA's : uniondues wage wage2 yrschool Min. : 0.000 Min. : 0.0 Min. : 0.000 Min. : 8. 1st Qu.: 0.000 1st Qu.: 4.2 1st Qu.: 4.228 1st Qu.:12. Median : 0.000 Median : 6.4 Median : 6.345 Median :12.0 0 Mean : 5.473 Mean : 387.8 Mean : 7.821 Mean :13. 3rd Qu.:10.000 3rd Qu.: 9.6 3rd Qu.: 9.633 3rd Qu.:15. Max. :29.000 Max. :380000.0 Max. :40.200 Max. :18. NA's :3 NA's :
library(summarytools) ctable(wws1000$race,wws1000$numkids,prop=”r”, totals=TRUE) Cross-Tabulation / Row Proportions Variables: race * numkids Data Frame: wws1 000
numkids 0 1 2 3 Total race 1 183 (24.83%) 169 (22.93%) 204 (27.68%) 181 (24.56%) 737 (100.00%) 2 49 (19.52%) 70 (27.89%) 71 (28.29%) 61 (24.30%) 251 (100.00%) 3 3 (25.00%) 5 (41.67%) 2 (16.67%) 2 (16.67%) 12 (100.00%) Total 235 (23.50%) 244 (24.40%) 277 (27.70%) 244 (24.40%) 1000 (100.00%)
library(stargazer) stargazer(wws1000[c("age","uniondues","wage")], type="text", summary.stat=c("n","mean","sd","min","max")) ==================================================== Statistic N Mean St. Dev. Min Max
age 1,000 36.276 5.625 21 83 uniondues 997 5.473 8.953 0.000 2 9. wage 1,000 387.816 12,016.410 0.000 380,000.
library(tidyverse) library(stargazer)
myvars <- c('age', 'uniondues', 'wage')
stargazer(filter(wws1000[, myvars],wws1000$race==1), type = "text", summary.stat = c("n","min","mean", "max", "sd")) ========================================== Statistic N Min Mean Max St. Dev.
age 737 21 36.578 83 5. uniondues 736 0.000 5.355 29.000 8. wage 737 1.032 8.184 40.198 6.
3. 2 Two Discrete Variables { Package } command Example So you have it Quick descriptives on every variable (2 ways) {stargazer} stargazer(dataframename, type="text", median=TRUE) stargazer(dataframe="text",title="YOUR TITLE", out="TABLENAME.txt") library(stargazer) stargazer(framingham, type="text", median=TRUE) stargazer(lung_demo,type="text",title="Table 1: Descriptives of Lung Study",out="table1.txt") Two Way Crosstab - Method I {summarytools} Show counts only with(dataframe, ctable(rowvar,colvar,prop=”n”, totals=TRUE) Row Percents with(dataframe, ctable(rowvar,colvar,prop=”r”, totals=TRUE) Column Percents with(dataframe, ctable(rowvar,colvar,prop=”c”, totals=TRUE) 2x2 table COHORT: Row %, RR, and OR with(bplong, ctable(sex,when,prop="r",OR=TRUE,RR=TRUE)) 2x2 table CASE-CONTROL: Col % and OR with(bplong, ctable(sex,when,prop="c",OR=TRUE,RR=FALSE)) library(summarytools) with(wws1000,ctable(race,numkids,prop=”r”, totals=TRUE) Two Way Crosstab – Method II {gmodels} CrossTable(dataframe$rvar,dataframe$cvar,digits=2, prop.r=TRUE,prop.c=FALSE,prop.t=FALSE, prop.chisq=FALSE, dnn=c("RowTitle","ColumnTitle")) library(gmodels) CrossTable(wws1000$race,wws1000$numkids,digits=2, prop.r=TRUE,prop.c=FALSE,prop.t=FALSE, prop.chisq=FALSE, dnn=c("Race","Number of Children"))
3. 3 One Discrete Variable and Multiple Continuous Variables { Package } command Example So you have it Quick descriptives on every variable (2 ways) {stargazer} stargazer(dataframename, type="text", median=TRUE) stargazer(dataframe="text",title="YOUR TITLE", out="TABLENAME.txt") library(stargazer) stargazer(framingham, type="text", median=TRUE) stargazer(lung_demo,type="text",title="Table 1: Descriptives of Lung Study",out="table1.txt") Using package summarytools {summarytools} with(dataframe,stby(data=continuousvar, INDICES=discretevar, FUN=descr,stats=c("statistic", "statistic"))) library(summarytools) with(wws1000, stby(data = age, INDICES =race, FUN = descr, stats = c("mean", "sd", "min", "med", "max"))) Using package FSA {FSA} Summarize(continuousvar~discretevar, data=dataframe,na.rm=TRUE) library(FSA) Summarize(age~race,data=wws1000,na.rm=TRUE) Using package Rmisc Note: ci half width of 95% CI {Rmisc} summarySE(data=dataframe,measurevar="continuousvar", groupvars=c("discretevar"),na.rm=TRUE) library(Rmisc) summarySE(data=wws1000,measurevar="age", groupvars=c("race"),na.rm=TRUE)