R Tutorials--R Function Reference

An R Function and Procedures Reference

Following is an alphabetical list of most of the functions used in these tutorials, and a few choice others. If you've been looking at R help pages, you've probably discovered by now that the examples there can be positively Martian! So I've tried to supply simple examples for most of these functions. The output produced by the commands in the examples is not shown. The arguments given in the function syntax attempt to convey what type of objects the function works on, and are not necessarily the correct argument names, for which you should see the help pages. The data sets used in the examples are all built it. Get info about them by help(dataset_name) or summary(dataset_name).

--- A ---

abline(a=intercept, b=slope) - draw a line on an existing graph
abline(h=y_value) - draw a horizontal line on an existing graph
abline(v=x_value) - draw a vertical line on an existing graph
abline(lm_model) - plot the regression line on an existing scatterplot
   > plot(women)                               # a graph to work with
   > lm.out = lm(weight ~ height, data=women)  # an lm_model to work with
   > abline(lm.out)                            # draw the regression line
   > abline(v=64)                              # a vertical line at x=64
   > abline(h=132)                             # a horizontal line at y=132
   > abline(a=-170, b=5, lty="dashed")         # line with arbtrary intercept and slope

abs(value or vector) - absolute value
   > abs(-9)
   > abs(c(-9,10,11,-20,3))

acf(vector) - autocorrelation function (draws an autocorrelation plot)
   > acf(rivers)
   > acf(sunspots, lag.max=150)

addmargins(contingency_table) - print contingency table with marginal sums
   > HairEyeColor
   > addmargins(HairEyeColor)

aggregate(formula, data, function) - split variable into groups and calculate function
   > summary(warpbreaks)   # a data frame
   > aggregate(breaks ~ wool, data=warpbreaks, mean)
   > aggregate(breaks ~ tension, data=warpbreaks, median)
   > aggregate(breaks ~ wool + tension, data=warpbreaks, FUN=mean)

all(logical_vector) - are all the values in this vector TRUE? (see any)
   > all(c(T,T,T,T,T,T,F,T,T,T))

anova(lm_model) - see the anova on stored model of a linear regression
anova(lm_model1, lm_model2) - test two stored regression models against each other
   > lm.out = lm(mpg ~ hp + wt, data=mtcars)   # an lm_model
   > lm.out2 = lm(mpg ~ wt, data=mtcars)       # another lm_model (subset of first)
   > anova(lm.out)                             # sequential ANOVA
   > anova(lm.out2, lm.out)                    # hp contributes significantly

any(logical_vector) - are any of the values in this vector TRUE? (see all)
   > any(c(F,F,F,T,F,F,F,F,F,F))
   > any(c(4<2, 6>9, 8>=4))

aov(formula, data) - ANOVA using the given model formula and dataframe (Type I SS if unbalanced)
   > summary(warpbreaks)   # a data frame
   > aov.out = aov(breaks ~ wool * tension, data=warpbreaks)
   > summary(aov.out)

apropos("string") - return all function names of which "string" is part
   > apropos("mean")

args(function_name) - sometimes shows arguments taken by a function (not always!)
   > args(plot)            # fairly useless
   > args(plot.default)    # unless you know the tricks

array(data, dim) - create an array of given dimensions from data
   > array(data=1:20, dim=c(2,5,2))

as.data.frame(table or matrix) - coerce a table-like object to a data frame
   > class(state.x77)      # it's a matrix
   > states = as.data.frame(state.x77)
   > class(states)         # now it's a data frame

as.data.frame.table(contingency_table) - coerce contingency table to a data frame
   > HairEyeColor          # a contingency table
   > as.data.frame.table(HairEyeColor)

as.matrix(all_numeric_tablelike_object) - coerce to a matrix
   > str(USArrests)        # it's an all numeric data frame
   > arrests = as.matrix(USArrests)
   > class(arrests)        # not any more

attach(dataframe_name) - attach a dataframe to the search path (see detach)
   > search()        # look at the search path
   > attach(women)
   > search()        # "women" is now in position 2
   > detach(women)
   > search()        # and now it's gone

--- B ---

barplot(vector or table_name) - plot a bar graph of values in table (must be tabled first)
barplot(2D_table_name, beside=T, legend=T) - side-by-side bar graph with legend
   > means = with(warpbreaks, tapply(breaks, tension, mean))    # a vector of means
   > barplot(means)
   > HairEyeColor[,,1]     # a 2D contingency table
   > barplot(HairEyeColor[,,1], beside=T, legend=T)

bartlett.test(formula, data) - test for homogeneity of variance (fails with 2-way classifications)
   > summary(warpbreaks)   # a data frame
   > bartlett.test(breaks ~ tension, data=warpbreaks)
   > bartlett.test(breaks ~ tension + wool, data=warpbreaks)    # fail!

binom.test(x=number_of_successes, n=trials, p=null_prob) - exact binomial test
   > binom.test(x=60, n=100, p=.5)     # 60 heads in 100 coin tosses
   > binom.test(x=10, n=100, p=1/36)   # 10 boxcars in 100 dice rolls

boxplot(vector_name) - plot a boxplot of a numeric variable
boxplot(formula, data) - plot boxplots at each level of an independent variable
   > boxplot(rivers)
   > boxplot(breaks ~ tension, data=warpbreaks)
   > boxplot(count ~ spray, data=InsectSprays)

by(DV, IV, function) - break down DV by IV and calculate function (see tapply)
   > attach(warpbreaks)           # a data frame to work with, attached to search path
   > names(warpbreaks)            # see the variable names
   > by(breaks, tension, median)  # group medians (breaks by tension)
   > by(breaks, tension, var)     # group variances
   > detach(warpbreaks)           # DON'T leave it attached to the search path!

--- C ---

c(data_values) - concatenate; group values into a vector
   > xy = c(12, 17, 22, 18); xy    # create and print a numeric vector
   > xz = c("Fred","Barney","Wilma","Betty"); xz   # character vector
   > zz = c(T, F, F, T); zz        # logical vector

cbind(vector1, vector2, vector3, ...) - bind vectors into columns of a matrix
   > red = c(1,2,3,4)      # create some vectors (must be same length)
   > blue = c(5,6,7,8)
   > green = c(9,10,11,12)
   > cbind(red, blue, green)
   > cbind(green, red, blue)
   > zz = cbind(red, blue)
   > cbind(zz, green); cbind(green, zz)

chisq.test(vector_name, p=vector_null_probs) - chi square goodness of fit test
chisq.test(table_name) - chi square significance test of independence
chisq.test_result$expected - examine expected frequencies from chi square test
chisq.test_result$residuals - examine residuals from chi square test
   > obs.freqs = c(15, 15, 20, 10)
   > chisq.test(obs.freqs)           # test of all expected freqs. equal
   > chisq.test(obs.freqs, p = c(1/8, 1/4, 1/2, 1/8))    # test of unequal EFs
   > eyes = HairEyeColor[,,1]        # a two-way contingency table
   > chisq.test(eyes)                # chi square test of independence (warning: at least 1 EF<5)
   > chisq.test(eyes) -> chi.out     # save output
   > chi.out$expected                # see the expected freqs.
   > chi.out$resid                   # see cell residuals (unsquared chi values)

choose(n, k) - number of possible combinations of n things taken k at a time
   > choose(n=10, k=5)
   > choose(15,4)

class(object_name) - find out what kind of object R considers this to be
   > class(USArrests)      # a data frame
   > class(state.x77)      # a matrix
   > class(rivers)         # numeric values (a vector)

coef(lm_model) or coefficients(lm_model) - get regression coefficients
   > lm.out = lm(weight ~ height + I(height^2), data=women)   # an lm_model to work with
   > coef(lm.out)          # lm.out$coef does the same thing

colMeans(matrix or table) - get column means from a table-like object
   > colMeans(state.x77)   # a matrix
   > colMeans(USArrests)   # an all numeric data frame

colnames(tablelike_object) - create or get column names (see names)
   > head(women)
   > colnames(women)
   > colnames(women) = c("hgt","wgt")
   > head(women)

colors() - display the names of colors available for graphing
   > colors()              # be prepared for a lot of output
   > barplot(c(15,22,17), col=c("red","wheat","blue"))
   > rainbow(3)            # get three colors from the rainbow palette
   > topo.colors(4)        # four colors from the topo.colors palette
   > barplot(c(15,22,17), col=rainbow(3))
   > barplot(c(15,22,17,28), col=heat.colors(4))

confint(lm_model) - get confidence intervals for regression coefficients
   > lm.out = lm(weight ~ height, data=women)    # an lm_model to work with
   > confint(lm.out)

contrasts(factor_name) - set or check contrasts for a factor
   > data(ToothGrowth)
   > names(ToothGrowth)
   > ToothGrowth$dose = factor(ToothGrowth$dose)
   > contrasts(ToothGrowth$dose)
   > contr.treatment(3)    # "treatment" contrasts for a factor with 3 levels
   > contr.helmert(3)      # helmert contrasts for a factor with 3 levels
   > contr.sum(3)          # sum contrasts for a factor with 3 levels
   > contrasts(ToothGrowth$dose) = contr.helmert(3)
   > contrasts(ToothGrowth$dose)

cooks.distance(lm_model) - report Cook's distances from a regression model
   > lm.out = lm(weight ~ height, data=women)    # an lm_model to work with
   > cooks.distance(lm.out)
   > plot(cooks.distance(lm.out))
   > plot(cooks.distance(lm.out), type="h")

cor(vector1, vector2) - Pearson's r between numeric variables
cor(vector1, vector2, use="pairwise.complete") - if there are missing values
cor(vector1, vector2, method="spearman") - Spearman rank correlation
cor(dataframe or matrix) - get a correlation matrix if all numeric variables
   > names(USArrests)
   > with(USArrests, cor(Murder, Rape))
   > with(USArrests, cor(Murder, Rape, method="spearman"))
   > cor(USArrests)    # an all numeric data frame
   > cor(state.x77)    # a matrix
   > state.x77[20,2] = NA
   > cor(state.x77)    # effect of a missing value
   > cor(state.x77, use="pairwise.complete")

cor.test(vector1, vector2) - significance test for correlation
cor.test(vector1, vector2, alternative="greater" or "less") - one-sided tests
   > cor.test(~ Murder + Rape, data=USArrests)
   > cor.test(~ Murder + UrbanPop, data=USArrests)
   > cor.test(~ Murder + UrbanPop, data=USArrests, alternative="greater")

cos(angle_in_radians or vector_of_angles) - cosine function
   > cos(pi)
   > cos(c(pi/3, pi/4, pi, 5.01, 25.01))

cov(dataframe or matrix) - get a covariance matrix if all numeric variables
   > cov(USArrests)    # an all numeric data frame
   > cov(state.x77)    # a matrix

curve(function_of_x, from=value, to=value) - plot an equation
curve(function_of_x, from=value, to=value, add=T) - add curve to existing plot
   > curve(sin(x), from=0, to=6.28)
   > plot(weight ~ height, data=women)     # get a graph to draw on
   > lm(weight ~ height + I(height^2), data=women)
   > curve(261.9 - 7.35 * x + 0.083 * x^2, add=T)

cut(vector, breaks) - divide vector values into intervals defined by breaks
   > cut(rivers, breaks=4)
   > table(cut(rivers, breaks=4))

--- D ---

data(dataset_name) - copy a built in data set to workspace
   > data(women)
   > ls()        # built in data frame "women" is now in workspace
   > data(anorexia, package="MASS")    # get data from MASS even though it is not loaded

data.frame(vector1, vector2, vector3, ...) - create a dataframe from vectors
   > subj.names = c("Terry","Joan","Emalee")
   > height = c(69, 65, 67)
   > age = c(40, 66, 28)
   > own.teeth = factor(c("yes","yes","yes"))
   > my.data = data.frame(subj.names, height, age, own.teeth)
   > my.data

dataframe_name[i,] - get row i of the dataframe
dataframe_name[i:j,] - get rows i through j of the dataframe
dataframe_name[,k] - get column k of the dataframe; or add column to dataframe
dataframe_name$variable_name - get column of dataframe with given variable name
   > data(women)     # a data frame to work with
   > women           # see it
   > women[7,]       # row 7
   > women[7:10,]    # rows 7 through 10
   > women[,2]       # column 2
   > women$weight    # also column 2
   > women[,3] = (1:15)^2    # create a new column
   > women

date() - get the current system date and time
   > date()

dbinom(x, size, prob) - binomial probability of x successes in size trials
   > dbinom(x=7, size=10, prob=.5)     # probability of 7 heads in 10 coin toses
   > dbinom(x=3, size=10, prob=1/36)   # probability of 3 snake eyes in 10 dice rolls

density(vector) - kernel density smoothing of vector
   > density(faithful$eruptions)
   > plot(density(faithful$eruptions))

detach(dataframe_name) - remove dataframe from the search path (see attach)
   > search()      # look at the search path
   > attach(women)
   > search()      # "women" is now in position 2
   > detach(women)
   > search()      # and now it's gone

difftime(time1, time2) - time between two times or dates
   > difftime("1955-09-12", "2016-02-23", units="days")    # nice function, horrible help page!
   > difftime("1960-10-09 15:46:00", "2016-02-23 11:34:00", units="mins")

dim(object_name) - get the dimensions of a table, matrix, or dataframe
   > dim(USArrests)      # a data frame of 50 rows by 4 columns
   > dim(state.x77)      # a matrix of 50 rows by 8 columns
   > dim(HairEyeColor)   # a 4x4x2 contingency table

dimnames(matrix or array or dataframe) - get or create dimension names
   > dimnames(state.x77)
   > my.table = array(1:20, dim=c(2,5,2))
   > dimnames(my.table) = list(Sex=c("male","female"),
   +         Pet=c("cat","dog","bird","turtle","snake"),
   +         Married=c("yes","no"))
   > my.table

dir() - view files in the working directory on your hard drive
   > dir()

dnorm(value or vector, mean, sd) - normal probability density function (height of normal curve)
   > dnorm(120, mean=100, sd=15)
   > plot(dnorm(seq(from=-3,to=3,by=.1), mean=0, sd=1))    # mean=0, sd=1 are defaults

dpois(x, lambda) - poisson probability function
   > dpois(x=3, lambda=8)  # prob. of only 3 insect legs in a candy bar if average is 8

--- E ---

example(function_name) - get some examples for named function (sometimes useful)
   > example(friedman.test)    # examples from the help page (somewhat sensible in this case)

exp(value or vector) - raise e to the power of value; natural antilog
   > exp(1.632)
   > exp(1:20)

--- F ---

factor(character_vector) - declare character_vector to be a factor
factor(factor_name, labels) - set or change value labels for a factor
   > data(ToothGrowth)
   > summary(ToothGrowth)     # one factor, two numeric variables
   > factor(ToothGrowth$dose) -> ToothGrowth$dose
   > summary(ToothGrowth)     # now dose is also a factor
   > factor(ToothGrowth$dose, labels=c("Low","Moderate","High")) -> ToothGrowth$dose
   > summary(ToothGrowth)     # now the levels of dose have value labels rather than numbers

factorial(value or vector) - get value factorial (x!)
   > factorial(10)
   > factorial(1:10)

fisher.test(contingency_table) - Fisher's exact test
   > TeaTasting <- matrix(c(3, 1, 1, 3), nrow = 2,
   +        dimnames = list(Guess = c("Milk", "Tea"),
   +                        Truth = c("Milk", "Tea")))   # lady tasting tea problem
   > TeaTasting                                          # a 2x2 contingency table
   > fisher.test(TeaTasting, alternative = "greater")    # a brazen steal from the help page

fivenum(vector) - five number summary of a numeric variable
   > fivenum(rivers)       # compare to summary(rivers)

for (i in 1:n) - start a loop that will run n times; put statements to be executed inside {}
   > for(i in 1:5) {
   +    print(i)
   +    print(i^2)
   + }

ftable(contingency_table) - convert a contingency table to a flat table
   > HairEyeColor          # a contingency table
   > ftable(HairEyeColor)

function() - program your own functions into R
   > sem = function(x)  sqrt(var(x,na.rm=T)/sum(!is.na(x)))     # sqrt(var/n)
   > sem(rivers)     # a newly created function for standard error of the mean

--- G ---

getwd() - find out what your working directory is (see setwd)
   > getwd()

gl(n, k) - generate n levels of a factor containing k values per level
   > gl(n=3, k=10, labels=c("frogs","hogs","dogs")) -> ogs
   > ogs

glm(formula, family, data) - generalized linear model
   > data(menarche, package="MASS")
   > head(menarche)     # number of girls reaching menarche by age group
   > glm(cbind(Menarche,Total-Menarche) ~ Age, family=binomial(logit), data=menarche)

--- H ---

head(object_name) - see the first six cases in a data object
   > head(rivers)        # a vector
   > head(USArrests)     # a data frame
   > head(state.x77)     # a matrix
   > head(rivers, n=10)  # see 10 cases

help(function_name) - view help page for a function
   > help(mean)            # ?mean does the same thing

help.search("topic") - see list of functions that might have to do with "topic"
   > help.search("mean")   # ??mean does the same thing

hist(vector) - plot a histogram of a numeric variable
hist(vector, breaks=number or vector_of_desired_breaks) - with specified break points
   > hist(rivers)
   > hist(rivers, breaks=20)
   > hist(rivers, breaks=seq(from=0, to=4000, by=250))

--- I ---

I() - isolate a mathematical operation from the rest of a formula
   > lm.out = lm(weight ~ height + I(height^2), data=women)
   > summary(lm.out)

ifelse(test, yes, no) - if test is TRUE do yes, else do no
   > ifelse(rivers > 1000, print("long"), print("short"))

influence.measures(lm_model) - reports various measures of influence
   > lm.out = lm(weight ~ height, data=women)    # an lm_model to work with
   > influence.measures(lm.out)

interaction.plot(x.factor, trace.factor, response) - profile (interaction) plot
   > attach(ToothGrowth)      # no formula interface here so attach is easiest
   > names(ToothGrowth)       # get variable names
   > interaction.plot(x.factor=dose, trace.factor=supp, response=len)
   > detach(ToothGrowth)      # get it out of search path when done!

IQR(vector) - get the interquartile range using quantile() type=7
IQR(vector, type=2) - get "textbook" IQR
   > temp = c(3,7,8,6,5,1,8,3,4,5,2,6)
   > sort(temp)
   > IQR(temp)
   > IQR(temp, type=2)

--- J ---

jitter(vector) - add a small amount of random error to values of a vector
   > jitter(1:7)
   > jitter(1:7, factor=.5)

--- K ---

kruskal.test(formula, data) - Kruskal-Wallis rank-sum oneway ANOVA
   > names(InsectSprays)   # count is numeric, spray is a factor
   > boxplot(count ~ spray, data=InsectSprays)   # see what it looks like
   > kruskal.test(count ~ spray, data=InsectSprays)

ks.test(vector1, vector2) - Kolmogorov-Smirnov test comparing distributions
   > ks.test(rnorm(100), rnorm(100))     # same distributions
   > ks.test(rnorm(100), runif(100))     # different distributions

--- L ---

legend() - add a legend to an existing plot; complicated (see help page)
   > margin.table(Titanic, c(2,1))       # gender by class of ticket held on Titanic
   > margin.table(Titanic, c(2,1)) -> data.table
   > barplot(data.table, beside=T, axis.lty=1, ylim=c(0,1000), col=c("blue","pink"))
   > ### something to draw a legend on
   > legend(x=1, y=900,                  # where to put it
   +     legend=c("males","females"),    # what it should say
   +     fill=c("blue","pink"))          # colors matching bars

length(vector) - get the no. of values in a vector, including NAs
   > length(rivers)

letters - an object containing the lower case letters of the English alphabet
LETTERS - an object containing the upper case letters of the English alphabet
   > letters
   > letters[5:10]
   > LETTERS[5:10]

levels(factor_name) - get the levels of a categorical variable
   > levels(InsectSprays$spray)
   > levels(chickwts$feed)
   > summary(chickwts)

library("library_name") - load an optional library, for example MASS
   > search()    # see the search path
   > library("MASS")
   > search()    # MASS library has been put in position 2
   > detach("package:MASS")
   > search()    # gone

lines(x, y) - draw a line on an existing graph through (x,y) points
   > x = 1:10
   > y = x^2
   > plot(x, y)            # plot x,y points
   > lines(x=x, y=y)       # connect them with lines
   > lines(x=c(4,8), y=c(0,100), col="red")    # line from (4,0) to (8,100)

list(object_names) - create a list of the named objects
   > my.list = list(rivers, HairEyeColor, women)
   > summary(my.list)
   > my.list

lm(formula, data) - linear regression of DV on IV using least squares
lm(DV ~ IV + I(IV^2) + I(IV^3) + ..., data) - polynomial regression
lm(DV ~ IV1 + IV2 + IV3 + ..., data) - multiple linear regression
lm_model$fitted - examine the fitted values from a stored model
lm_model$residuals - examine the residuals from a stored lm model
   > lm.out = lm(weight ~ height, data=women)
   > lm.out.cubic = lm(weight ~ height + I(height^2) + I(height^3), data=women)
   > summary(lm.out)       # see results of first regression
   > summary(lm.out.cubic) # see the cubic model
   > anova(lm.out, lm.out.cubic)   # the cubic model is significantly better
   > lm.out$fitted         # fitted values for lm.out
   > lm.out$residuals      # residuals for lm.out
   > plot(lm.out$fitted, lm.out$residuals)           # clearly missed curvilinear trend
   > rm(lm.out, lm.out.cubic)      # keep it clean!
   > lm.out = lm(mpg ~ disp + hp + wt, data=mtcars)  # multiple regression
   > summary(lm.out)

load("pathname_of_previously_saved_file") - load a "saved" file (see save)
   > x = 1:7
   > y = c("Bill","Fred","Diane")
   > z = matrix(1:9, nrow=3)         # clutter up the workspace
   > ls()                            # see the mess you've made
   > save.image("mystuff.RData")     # save workspace to working directory
   > rm(x,y,z)                       # clear the workspace
   > ls()                            # see? all gone!
   > load("mystuff.RData")           # bring it back
   > ls()

log(value or vector) - take the natural log of a number or values in a vector
log(value, base) - log to any base
log10(value or vector) - base-10 logs
   > log(8)
   > log(1:8)
   > log(1:8, base=2)
   > log(1:8, base=10)
   > log10(1:8)

loglm(formula, data) - log linear analysis (this function is in the MASS library)
   > library("MASS")       # necessary to get at the function
   > UCBAdmissions         # UC Berkeley grad programs admissions data 1973
   > loglm(~ Dept * Admit * Gender, data=UCBAdmissions)
   > ### this saturated model fits the data perfectly (LR X^2 = 0)
   > loglm(~ Dept * Admit, data=UCBAdmissions)
   > ### removing Gender results in a model that does not fit the data (LR X^2 = 1405, df=12, p=0)

lowess(x, y) - return the coordinates of a lowess smoother for (x,y)-scatterplot
   > lowess(women)
   > plot(women, pch=2)
   > points(lowess(women))

ls() - list the objects in the workspace; also objects(); character(0)=empty
   > ls()
   > ls(pos=2)    # show contents of position 2 on search path

--- M ---

mad(vector) - get the median absolute deviation from the median
mad(vector, center=mean(vector)) - gives a mean-centered MAD
   > mad(rivers)
   > mad(rivers, center=mean(rivers))

manova(formula, data) - multivariate analysis of variance
   > manova.out = manova(cbind(mpg, disp, hp, wt) ~ factor(gear), data=mtcars)
   > summary.aov(manova.out)
   > summary.manova(manova.out)
   > summary.manova(manova.out, test="Wilks")

margin.table(contingency_table, 1) - get the row marginal sums
margin.table(contingency_table, 2) - get the column marginal sums
   > eyes = HairEyeColor[,,1]      # a two-way contingency table
   > eyes                          # see?
   > margin.table(eyes, margin=1)  # row marginal sums
   > margin.table(eyes, margin=2)  # column marginal sums

matrix(vector, nrow=value, ncol=value) - create a nrow x ncol matrix from a vector
   > matrix(1:10, nrow=2)
   > matrix(1:10, ncol=5)
   > matrix(1:10, ncol=5, byrow=T)     # fill across the rows

max(vector) - get maximum value in a numeric variable; na.rm=T to remove missings
   > max(rivers)

mean(vector) - get mean of a numeric variable; chokes on NAs
mean(vector, na.rm=T) - mean of variable after removing missing values
   > mean(rivers)
   > rivers[200] = NA      # insert a missing value
   > mean(rivers)          # doesn't work anymore
   > mean(rivers, na.rm=T) # there we go!

median(vector) - get median of the numeric variable; na.rm=T to remove missings
   > median(rivers)

min(vector) - get the minimum value in a numeric variable; na.rm=T to remove missings
   > min(rivers)

model.tables(aov_model, type="means") - get group means from a _balanced_ ANOVA (only!)
   > aov.out = aov(len ~ supp * factor(dose), data=ToothGrowth)
   > model.tables(aov.out, type="means")

--- N ---

NA - a missing value (R's missing value code)
na.omit(vector or dataframe_name) - remove missings or cases with missing values
   > women                 # a data frame
   > women[10,2] = NA      # insert a missing value
   > women
   > na.omit(women)        # remove cases with NAs
   > na.omit(women$weight) # works on vectors, too

names() - name variables or retrieve the names of variables
   > names(mtcars)
   > my.df = data.frame(1:5,6:10,11:15)
   > names(my.df) = c("first","second","third")    # same as colnames() in this case
   > my.df

numeric(size) - create a numeric vector of size size
   > numeric(20)     # filled with zeros by default

--- O ---

objects() - see objects in the workspace; same as ls()
   > objects()       # does the same thing as ls()

oneway.test(formula, data) - one-way Welch-corrected between subjects ANOVA
oneway.test(formula, data, var.equal=T) - one-way betw. ANOVA assuming homogeneity
   > names(InsectSprays)   # count is numeric, spray is a factor
   > oneway.test(count ~ spray, data=InsectSprays)
   > oneway.test(count ~ spray, data=InsectSprays, var.eq=T)

order() - can be used to reorder the rows in a dataframe, but syntax is twisted
          and the help page is useless in sorting it out, because it doesn't
          give a single useful example; here's one
               > data(women)
               > women                        # see how orderly it is
               > women[sample(1:15),]->women  # mix it up
               > women                        # see how mixed up it is now
               > women[order(women$weight),]  # sort on the weight variable

--- P ---

pairs(all_numeric_dataframe or matrix) - plot a scatterplot matrix
   > pairs(USArrests)      # plot(USArrests) will do the same in this case
   > pairs(state.x77)      # matrix; plot() will not work here

pairwise.t.test(x=vector, g=factor, p.adjust.method) - all possible pairwise t-tests
   > attach(InsectSprays)
   > names(InsectSprays)   # count is numeric, spray is a factor
   > pairwise.t.test(x=count, g=spray)                   # default p.adjust is holm
   > pairwise.t.test(x=count, g=spray, p.ajust="bonf")   # Bonferroni adjustments
   > pairwise.t.test(x=count, g=spray, p.ajust="none")   # Fisher LSD tests

paste() - one of many string manipulation functions available
   > paste("subject", as.character(1:10), sep=".")
   > paste("subject", as.character(1:10), sep="")
   > paste("William", "Blackstone", "King", sep=" ")

pbinom(successes, size, prob, lower.tail=T) - binomial cumulative probability function
   > pbinom(40, size=100, p=.5)          # Pr(40 or fewer tails in 100 coin tosses)
   > pbinom(40, size=100, p=.5, lower=F) # Pr(more than 40 tails in 100 tosses)
   > pbinom(40, size=100, p=.5, lower=F) + pbinom(40, size=100, p=.5)

pf(F-value, df1, df2, lower.tail=T) - F cumulative distribution function (F p-values)
   > pf(3.12, df1=3, df2=10)             # area in lower tail
   > pf(3.12, df1=3, df2=10, lower=F)    # area in upper tail

pi - the only built in mathematical constant in R
   > pi
   > r = 10; 2 * pi * r
   > r = 1:20; 2 * pi * r

pie(table_name) - plot a piechart of values in table (must be tabled first)
   > HairColorFreqs = margin.table(HairEyeColor[,,1], 1)
   > HairColorFreqs        # see what you're dealing with
   > pie(HairColorFreqs)
   > pie(HairColorFreqs, col=c("black","brown","red","yellow"))

plot(predictor_var, response_var) - scatterplot of two numeric variables
plot(response_var ~ predictor_var, data) - formula interface
plot(var1, var2, log="y") - plot with a logarithmic y-axis; fails if any y<1
   > plot(pressure)        # pressure vs. temperature, the two numerics in the data frame
   > with(pressure, plot(temperature, pressure))
   > plot(pressure ~ temperature, data=pressure)
   > plot(pressure, log="y")
   > plot(pressure, log="xy")

plot(dataframe_name) - plot a matrix of scatterplots of numerical variables (see pairs)
   > plot(USArrests)   # an all numeric data frame (seems to fail with matrices!)

plot(density(variable_name)) - kernel density smoother for numerical variable (see density)
   > plot(density(faithful$waiting))

plot(lm_model) - plot four regression diagnostic plots from stored lm results
   > lm.out = lm(weight ~ height, data=women)      # get an lm_model to work with
   > par(mfrow=c(2,2))     # set the graphics device to plot 2 rows of 2 graphs
   > plot(lm.out)

plot.new() - open the graphics device for plotting
   > plot.new()

pnorm(z, lower.tail=T) - get the area under the unit normal curve that is (at or) below z
   > pnorm(1.28)
   > pnorm(1.28, lower=F)

points(vector_of_x-values, vector_of_y-values) - plot points on a graph
   > plot(1:10, 1:10, type="n")    # a blank graph to draw on
   > points(x=c(2,4,6,8), y=c(6,10,2,4))
   > points(x=c(2,4,6,8), y=c(6,10,2,4), type="b")

power.anova.test(groups, n, between.var, within.var, sig.level, power) - power for balanced anova
   > ### one-way balanced between subjects designs only
   > ### within.var is anticipated pooled variance (i.e., MSE)
   > ### between.var is anticipated variance of group means (i.e., MSB/n)
   > DV = c(c(2,5,3,4),c(3,7,5,6),c(8,6,7,4)); IV = gl(3,4)    # a pilot study
   > aov(DV ~ IV)    # SSB=15.5 on df=2, SSE=22.5 on df=9
   > power.anova.test(groups=3, n=NULL, between.var=15.5/2/4, within.var=22.5/9,
   +    sig.level=.05, power=.8)     # n = 7.3 subjects per group for 80% power

power.prop.test(n, p1, p2, sig.level, power) - power for two-sample proportions test
   > p1 = .4     # anticipated proportion in group 1
   > p2 = .5     # anticipated proportion in group 2
   > power.prop.test(n=NULL, p1=p1, p2=p2, sig.level=.05, power=.8)
   > ### n = 387.3 subjects per group for 80% power

power.t.test(n, delta, sd, sig.level, power) - t-test power function
   > ### set exactly one of those things equal to null and R will solve for it
   > power.t.test(n=NULL, delta=3, sd=7, sig.level=.05, power=.8)
   > ### for delta (anticipated difference in means) = 3, n = 86.4 subjects per group

ppois(value, lambda, lower.tail=T) - poisson cumulative probability function
   > ppois(3, lambda=8)    # prob. of 3 or fewer insect legs in a candy bar when average is 8
   > ppois(3, lambda=8, lower=F)    # prob. of more than 3 when average is 8

predict(lm_model, list(values_to_predict_from)) - make predictions from a regression model
   > names(mtcars)
   > lm.out = lm(mpg ~ cyl + hp + wt, data=mtcars)   # get a regression model
   > predict(lm.out, list(cyl=4, hp=120, wt=2))

print(anything) - does what it says
   > print(women)
   > print("Hello there!")
   > print(pi, digits=15)

prop.table(table_name) - convert tabled frequencies to proportions of N
prop.table(table_name, 1) - convert tabled freqs. to proportions by rows
prop.table(table_name, 2) - convert tabled freqs. to proportions by columns
   > margin.table(UCBAdmissions, margin=c(1,3))    # a 2x5 contingency table
   > margin.table(UCBAdmissions, margin=c(1,3)) -> inornot
   > sum(inornot)          # total no. of cases in table (4526)
   > prop.table(inornot)   # proportions with respect to 4526 (not very useful)
   > prop.table(inornot, margin=1)   # with respect to row marginal totals
   > prop.table(inornot, margin=2)   # with respect to column marginal totals

prop.test(number_of_hits, N, p=null_prob.) - one-proportion test
prop.test(vector_of_hits, vector_of_Ns) - multiple-proportion test
   > addmargins(margin.table(Titanic, margin=c(4,2)))    # survival by gender; Titanic
   > prop.test(1490, n=2201, p=.5)   # hypothesis of 50% survival chance rejected
   > prop.test(c(367, 344), n=c(1731, 470))    # two-proportion test, men vs. women

pt(t-value, df, lower.tail=T) - t cumulative probability function (t p-values)
   > pt(2.05, df=25)
   > pt(2.05, df=25, lower=F)
   > pt(2.05, df=25, lower=F) * 2

--- Q ---

q() - quit R (same as quit)
   > q("yes")    # save the workspace and quit

qnorm(probability_in_lower_tail) - inverse normal distribution
   > qnorm(.95)      # get z-value with 95% of normal dist. (at or) below it
   > qnorm(.975)

qqline(variable_name) - draw the normal line on an existing qqnorm() plot
qqnorm(variable_name) - plot a normal probability plot of a numerical variable
qqplot(variable1, variable2) - compare distributions of two variables
   > qqnorm(rivers)
   > qqline(rivers)
   > qqplot(rnorm(141), rivers)    # compare rivers to random normal dist.
   > qqplot(rexp(141), rivers)     # compare rivers to random exponential dist.

quantile(variable_name, probs=vector_of_probs.) - get desired quantiles
quantile(variable_name, probs=c(1/4, 3/4), type=2) - get textbook Q1 and Q3
   > quantile(rivers, probs=c(.25,.5,.75))   # compare to fivenum(rivers)
   > quantile(rivers, probs=c(1/4, 3/4), type=2)   # usually same result for large vectors
   > ### see IQR

quartz(title, width, height, pointsize) - open a graphics window (Mac only)
   > quartz("Q", width=5, height=5, pointsize=10)    # width, height in inches
   > plot(pressure)

quit() - quit R (same as q)
   > quit()
   > quit("no")  # quit without saving the workspace

--- R ---

range(variable_name) - reports min() and max() (see min and max)
   > range(rivers)

rank(vector) - returns the rank (relative size or position upon sort) of values
   > rank(c(20, 18, 13, 25, 30, 25))

rbind(vector1, vector2, vector3, ...) - bind vectors into a matrix by rows
   > X = 1:5; Y = 6:10; Z = 11:15    # some vectors to bind
   > rbind(X, Y, Z)

rbinom(n, size, prob) - generates random binomially distributed values
   > rbinom(n=5, size=100, prob=.5)  # no. of heads in 5 trials of 100 coin tosses

read.csv("file_location") - read in a csv formated data file (special case of read.table)
   > write.csv(women, file="women.csv", row.names=F, quote=F)    # create one to read
   > read.csv(file="women.csv")      # read in from your working directory
   > write.csv(women, file="women.csv", row.names=T, quote=F)
   > read.csv(file="women.csv", row.names=1)   # if rownames are in the first column

read.table("file_location", header=T) - read in tabled data with headers
   > ### same as read.csv() except for white space separated values
   > ### reads files created by write.table()
   > ### the value separator can be specified in an option making it very versatile
   > ### read.table can be used for inline data entry as follows (best done as a script!)
   > Flintstones = read.table(header=T, text="
   + first.name   last.name    related.to  season  job            voiced.by
   + Fred         Flintstone   Fred        1       stone.quarry   Alan.Reed
   + Wilma        Flintstone   Fred        1       housewife      Jean.Vander.Pyl
   + Pebbles      Flintstone   Fred        3       child          Jean.Vander.Pyl
   + Dino         none         Fred        1       pet            Mel.Blanc
   + Barney       Rubble       Barney      1       unknown        Mel.Blanc
   + Betty        Rubble       Barney      1       housewife      Bea.Benaderet
   + Bamm.Bamm    Rubble       Barney      4       child          Don.Messick
   + Hoppy        none         Barney      5       pet            Don.Messick
   + Mr           Slate        NA          1       boss           John.Stephenson
   + Arnold       NA           NA          1       paperboy       Don.Messick
   + ")
   > summary(Flintstones)      # option row.names=1 might have been a good idea

rep(data_vector, freq_vector, times or each) - creates a vector with repeating elements
   > rep(1:5, times=3)
   > rep(1:5, each=3)
   > rep(c("Fred","Barney"), each=3)
   > rep(c("Fred","Barney"), c(5, 2))

relevel(factor_name, ref) - changes the reference level of a factor
   > data(InsectSprays)
   > str(InsectSprays)
   > levels(InsectSprays$spray)
   > relevel(InsectSprays$spray, ref="F")
   > relevel(InsectSprays$spray, ref="F") -> InsectSprays$spray
   > levels(InsectSprays$spray)

remove(object_name or names) - same as rm(), see
   > x = 7
   > ls()
   > remove(x)
   > ls()

require(library_names) - load a library, same as library(), see
   > require("MASS")       # except unlike library(), it gives a little message

rm(object_name or names) - remove an object from the workspace (see remove)
   > rm(list=ls())   # completely clear workspace; DON'T do it unless you really mean it!
   > x=1:7; y="Fred"; z=c(T,F); xx=matrix(1:6,nrow=3); yy=list(x,y,z)  # mess up workspace
   > rm(x)           # remove x
   > rm(y, z)        # remove y and z
   > rm(list=ls())   # remove everything else

rnorm(n, mean, sd) - generates random normally distributed values
   > rnorm(n=5)      # mean=0, sd=1 are defaults
   > rnorm(n=5, mean=10, sd=5)

round(value or vector, digits) - round values to digits decimal places
   > xyz = rnorm(5)
   > xyz
   > round(xyz, digits=3)
   > round(xyz, digits=0)
   > round(cor(state.x77), digits=3)     # try it without rounding!

rowMeans(all_numeric_tablelike_object) - get row means from a table-like object
   > my.matrix = matrix(runif(16,min=10,max=20), nrow=4)     # create such an object
   > rowMeans(my.matrix)
   > colMeans(my.matrix)

rownames(tablelike_object) - create or get row names (see colnames)
   > head(state.x77)           # a matrix
   > rownames(state.x77)
   > rownames(mtcars)          # a data frame
   > rownames(UCBAdmissions)   # a contingency table
   > my.df = data.frame(1:5, 6:10, 11:15)    # create a data frame
   > my.df
   > colnames(my.df) = c("little","bigger","biggest")
   > rownames(my.df) = c("Bob.Smith","Fred.Skinner","Lucy.Rich","Stan.TheMan","Susan.Saugt")
   > my.df

runif(n, min, max) - generates random uniformly distributed values
   > runif(n=5)      # min=0, max=1 are defaults
   > runif(n=5, min=10, max=20)

--- S ---

sample(vector, size, replace) - take a random sample from a given vector
   > sample(1:10, size=3)                # sample of 3 without replacement
   > sample(1:10, size=20, replace=T)    # sample of 20 with replacement

save(R_object, file="filename") - save a data object to a file in working directory
   > data(mtcars)                        # get something to save
   > ls()
   > save(mtcars, file="mtcars.rda")     # save to working directory (proprietary format)
   > rm(mtcars)
   > ls()
   > load(file="mtcars.rda")             # read it back in with load
   > ls()

save.image(file) - save a copy of the workspace to the working directory
   > save.image()                        # saves workspace to a file called .RData
   > save.image(file="mystuff.RData")    # saves workspace to a named file (see load)

scan(what) - most useful for entering data directly from the keyboard
   > ### enter these two vectors
   > ### 12, 16, 22, 10, 23, 18, 15, 17
   > ### Fred, Barney, Wilma, Betty, Pebbles, Dino
   > my.vector = scan()        # press Enter
   1: 12 16 22 10 23 18 15 17
   9:                          # press Enter again
   Read 8 items
   > ### you can also press enter after each data value is entered
   > ### leave a blank value at the end to terminate entry
   > Flintstones = scan(what="char")   # must say so when entering character data
   1: Fred
   2: Barney
   3: Wilma
   4: Betty
   5: Pebbles
   6: Dino
   7: 
   Read 6 items
   > my.vector
   > Flintstones

scatter.smooth(predictor_var, response_var) - scatterplot with lowess smoother
   > with(faithful, plot(waiting, eruptions))        # for comparison
   > with(faithful, scatter.smooth(waiting, eruptions))

sd(vector) - get the sample standard deviation of a numeric variable
   > sd(rivers)
   > rivers[200] = NA      # uh oh! there's a missing value
   > sd(rivers)            # fails
   > sd(rivers, na.rm=T)   # remove missing values and calculate

search() - ask R where it is looking for stuff (the search path)
   > search()      # look at the search path
   > attach(women)
   > search()      # "women" is now in position 2
   > detach(women)
   > search()      # and now it's gone

sem=function(x) sd(x)/sqrt(length(x)) - quick and dirty sem function
   > sem(rivers)   # will only work after the function is created
   > sem=function(x) sqrt(var(x,na.rm=T)/sum(!is.na(x)))     # won't choke on NAs

seq(from, to, by) - create a regular sequence of numbers
   > seq(from=1, to=10, by=1)      # same as 1:10
   > seq(from=10, to=1, by=-1)     # same as 10:1
   > seq(from=10, to=100, by=10)

setwd() - set or change the working dircetory (better to use the menus)
   > setwd("Rspace")       # if you've created this directory

shapiro.test(vector) - Shapiro-Wilk test for normal distribution
   > shapiro.test(rivers)  # not even close!

sin(angle_in_radians or vector of angles) - sine function
   > sin(pi/2)
   > sin(5)
   > sin(pi/4, pi/2, pi)

sort(variable_name) - sort values in ascending order
sort(variable_name, decreasing=T) - sort values in descending order
sort(unique())
   > sort(rivers)                # sort in ascending order
   > sort(rivers, decreasing=T)  # sort in descending order
   > sort(unique(rivers))        # sort just nonrepeats

source("location_of_script") - read in and execute a text file of R commands
   > source(file="myscript.R")   # has to exist in your working directory first
   > file = "http://ww2.coastal.edu/kingw/statistics/R-tutorials/text/myscript.R"
   > source(file, echo=T)

sqrt(number or vector) - take the square root of number or numbers
   > sqrt(120)
   > sqrt(1:10)

stack(wide_format_dataframe, select) - convert wide to long format data frame
   > data(anorexia, package="MASS")
   > anorexia        # a wide_format data frame
   > stack(anorexia, select=c(Prewt,Postwt))   # stack these two columns
   > ### we would need to add the Treat info and a subject identifier to make this useful

stem(vector) - get a stem and leaf display of a numeric variable
   > stem(rivers)
   > stem(rivers, scale=.5)    # less intervals
   > stem(rivers, scale=2)     # more intervals

step(lm_model, direction) - automated forward, backward, or stepwise regression
   > lm.out = lm(mpg ~ cyl + disp + hp + wt, data=mtcars)
   > step(lm.out, direction="backward")
   > step(lm.out, direction="backward") -> lm.out2
   > summary(lm.out2)  # cyl and hp retained even though not significant; look at AIC to see why

str(object_name) - find out the structure of an R object
   > str(rivers)       # a numeric vector
   > str(USArrests)    # a data frame
   > str(state.x77)    # a matrix

stripchart(vector or formula, method)
   > stripchart(rivers, method="jitter", pch=1)    # try it without jitter
   > stripchart(rivers, method="stack")            # an alternative to jitter
   > stripchart(mpg ~ gear, data=mtcars, method="jitter", pch=1)

subset(object_name, subset, select) - subset a vector, matrix, or dataframe
   > head(mtcars)          # dats a lotta columns!
   > fewer.columns = subset(mtcars, select=c(mpg,cyl,hp,wt))
   > head(fewer.columns)   # that's better
   > subset(fewer.columns, subset=cyl>4)   # just sixes and eights
   > subset(fewer.columns, subset=(cyl==4 | cyl==8))   # just fours and eights
   > subset(fewer.columns, subset=hp>200)  # muscle cars and roadhogs
   > subset(rivers, subset=rivers>1000)    # works with vectors, too

sum(vector_name) - sum the values in a vector; na.rm is available
   > vector = c(6, 10, 12)
   > sum(vector)
   > vector[2] = NA        # oh dear! a missing value
   > sum(vector)           # fails
   > sum(vector, na.rm=T)  # sum after removing missing values

summary(object_name) - summarize an R object; summary stats from a variable, etc
summary(aov_model) - get the anova summary table from an anova
summary(lm_model) - get a detailed summary of stored linear regression results
   > summary(chickwts$weight)  # summary of a numeric variable
   > summary(chickwts$feed)    # summary of a factor (a frequency table)
   > summary(chickwts)         # summary of a data frame
   > summary(state.x77)        # summary of a matrix
   > aov.out = aov(weight ~ feed, data=chickwts)
   > summary(aov.out)          # summary of an aov_model
   > lm.out = lm(weight ~ height, data=women)
   > summary(lm.out)           # summary of an lm_model

--- T ---

t(matrix) - transpose a matrix
   > my.matrix = matrix(1:15, nrow=3)
   > my.matrix
   > t(my.matrix)
   > t(my.matrix) %*% my.matrix    # matrix multiplication

table(vector) - create a 1D frequency table of a variable
table(vector1, vector2, ...) - create a contingency table with vector1 in rows
   > data(birthwt)         # get a data frame with lots of factors
   > str(birthwt)          # but they're not declared as factors
   > for (i in 4:9) birthwt[,i]=factor(birthwt[,i])  # call a factor a factor!
   > str(birthwt)          # better!
   > table(birthwt$ftv)
   > table(birthwt$race)
   > table(birthwt$smoke, birthwt$race)
   > xtabs(~ smoke + race, data=birthwt)   # same result with better labels
   > table(birthwt$smoke, birthwt$race, birthwt$ht)
   > xtabs(~ smoke + race + ht, data=birthwt)

tan(angle_in_radians or vector thereof) - tangent function
   > tan(pi/3)
   > tan(seq(from=0, to=2*pi, by=pi/4))

tapply(DV, IV, function) - calculate a function of a DV at each level of an IV (see by)
   > summary(mtcars)
   > attach(mtcars)        # easiest as there is no formula interface
   > tapply(mpg, factor(am), mean)   # group means
   > tapply(mpg, factor(am), var)    # group variances
   > tapply(mpg, am, sd)             # it doesn't really have to be a factor
   > tapply(mpg, list(am,gear), mean)    # or a two-way classification
   > detach(mtcars)        # don't leave it attached when done

t.test(vector, mu=null_mean) - single-sample t test
t.test(vector, mu=null_mean, alternative="greater" or "less") - one-sided test
t.test(vector1, vector2) - two-sample t test with Welch correction
t.test(vector1, vector2, var.equal=T) - no Welch correction (pooled variance t test)
t.test(vector1, vector2, alternative="greater" or "less") - one-sided test (gp1 - gp2)
t.test(formula, data) - two-sample t test using formula interface (from a dataframe)
t.test(... , paired=T) - related samples t test
   > mpg = mtcars$mpg      # a vector of automobile gas mileages
   > t.test(mpg, mu=20)    # H0: mu = 20
   > t.test(mpg, mu=20, alternative="greater")     # H0: mu > 20
   > mpg.4speeds = mtcars$mpg[mtcars$gear==4]  # a vector: mpg for 4-speeds
   > mpg.5speeds = mtcars$mpg[mtcars$gear==5]  # a vector: mpg for 5-speeds
   > t.test(mpg.4speeds, mpg.5speeds)              # Welch-corrected t-test
   > t.test(mpg.4speeds, mpg.5speeds, var.eq=T)    # pooled-variance t-test
   > t.test(mpg.4speeds, mpg.5speeds, var.eq=T, alternative="greater")   # directional
   > data(mtcars)          # a data frame
   > mtcars$am = factor(mtcars$am, labels=c("yes","no"))     # automatic trans (yes/no)
   > t.test(mpg ~ am, data=mtcars)             # Welch-corrected t-test
   > t.test(mpg ~ am, data=mtcars, var.eq=T)   # pooled-variance t-test
   > t.test(mpg ~ am, data=mtcars, var.eq=T, alternative="less")   # directional
   > data(sleep)           # a data frame with paired scores
   > t.test(sleep$extra[1:10], sleep$extra[11:20], paired=T)
   > t.test(extra ~ group, data=sleep, paired=T)
   > t.test(extra ~ group, data=sleep, paired=T, alternative="greater")  # wrong way!
   > sleep.wide = data.frame(sleep$extra[1:10], sleep$extra[11:20])    # more sensibly tabled!
   > colnames(sleep.wide) = c("control","drug")
   > sleep.wide
   > with(sleep.wide, t.test(control, drug, paired=T))

text(x-coord, y-coord, "string of text") - add text to an existing plot
   > plot(pressure)    # get a graph to write on
   > text(x=200, y=600, "text is centered at coords")

title(main, xlab, ylab) - add titles and axis labels to an existing plot
   > plot(pressure, xlab="", ylab="", main="")     # graph with no labels
   > title(main="The Main Title")
   > title(xlab="This is the x-axis")
   > title(ylab="This is the y-axis", cex.lab=1.3)

TukeyHSD(aov_model, which) - Tukey HSD post hoc test on an anova model
   > aov.out = aov(count ~ spray, data=InsectSprays)   # get an aov_model to work with
   > TukeyHSD(aov.out)
   > aov.out = aov(len ~ supp * factor(dose), data=ToothGrowth)  # a factorial
   > TukeyHSD(aov.out, which="factor(dose)")                     # Tukey on dose
   > ### don't forget the quotes in which; I always do because they seem so unnecessary

--- U ---

unique(vector) - get the unique values in a vector
   > unique(c(1,1,1,1,1,1,2,3,2,1,1,1,1))
   > vector = sample(c("Fred","Barney"), size=50, replace=T)
   > unique(vector)

update(lm_model, ~.-variable_name) - update an lm_model by deleting a variable
   > names(mtcars)
   > lm.out = lm(mpg ~ cyl + disp + hp + wt, data=mtcars)
   > summary(lm.out)
   > lm.out2 = update(lm.out, ~.-disp)
   > summary(lm.out2)
   > lm.out3 = update(lm.out2, ~.+am)    # variables can also be added
   > summary(lm.out3)

--- V ---

var(vector) - get the sample variance of a numeric variable
   > var(rivers)
   > var(USArrests)    # variance-covariance matrix (same as cov in this case)

var.test(vector1, vector2) - test for a significant difference betw. variances
   > var.test(rivers[1:70], rivers[71:140])    # test 1st half of rivers vector against 2nd half
   > var(cbind(rivers[1:70], rivers[71:140]))  # check the variance-covariance matrix
   > summary(warpbreaks)                       # a data frame
   > var.test(breaks ~ wool, data=warpbreaks)  # formula interface

vector_name[i] - get the value at position i of the vector
vector_name[i:j] - get values at positions i through j of the vector
   > rivers[100]
   > rivers[100:120]

--- W ---

weighted.mean()vector, w=weights)
   > weighted.mean(c(12, 15, 10, 8, 10))   # gives ordinary mean if no weights given
   > weighted.mean(c(12, 15, 10, 8, 10), w=c(23, 25, 20, 21, 20))  # perhaps an unbalanced design
   > grades = c(3, 2.5, 3, 4, 4, 3.5)      # calculation of GPA (B, C+, B, A, A, B+)
   > course.credits = c(1, 3, 3, 2, 1, 3)
   > weighted.mean(grades, w=course.credits)   # compare to mean(grades)

which(vector .test. value) - find out which values pass the test
   > ### .test. may be < (less than), > (greater than), == (equals), != (not equal)
   > which(rivers > 1000)          # returns case numbers that pass the test
   > rivers[which(rivers>1000)]    # see the actual values in the vector
   > rivers[rivers>1000]           # same result as last one
   > which(rivers == 1000)
   > which(rivers < 200)
   > which(rivers > 500 & rivers < 1000)   # and
   > which(rivers < 500 | rivers > 1000)   # or

wilcox.test(formula, data) - commonly called the Mann-Whitney U test, for independent groups
wilcox.test(formula, data, paired=T) - Wilcoxin test for paired scores
   > wilcox.test(extra ~ group, data=sleep)
   > wilcox.test(extra ~ group, data=sleep, paired=T)    # in fact, these scores are paired

windows(width, height, pointsize) - open a graphics window (Windows only; see quartz for Mac)
   > windows(width=5, height=5, pointsize=10)  # width and height in inches 

with(dataframe_name, function) - specify dataframe to be used with a function
   > with(women, mean(weight))     # mean of weight variable in women data frame
   > mean(women$weight)            # accomplishes the same thing
   > attach(women); mean(weight); detach(women)    # if you really want to work for it!

write.csv(dataframe_name, file) - write out a data frame as a CSV file
   > women       # a data frame to write out
   > write.csv(women, file="women.csv", row.names=F, quote=F)
   > ### it's now in your working directory as women.csv without the rownames and text unquoted
   > ### use quote=T if you've been foolish enough to put blank spaces in text items
   > ### use row.names=T if you have meaningful rownames

--- X ---

xtabs(formula, data) - crosstabulation
   > X = sample(c("Wilma","Betty"), size=50, replace=T)  # some data to crosstabulate
   > Y = sample(c("Fred","Barney"), size=50, replace=T)
   > xtabs(~ X + Y)
   > df = as.data.frame.table(UCBAdmissions)   # a data frame with a Freq column
   > head(df)
   > xtabs(Freq ~ Admit + Gender, data=df)

created 2016 February 23