Home‎ > ‎Comp‎ > ‎

R

Links


Installation, Startup

"C:\Program Files\R\R-2.6.1\bin\Rgui.exe" "--internet2" (uses IE internals)
install.packages("XML", repos = "http://www.omegahat.org/R")

Running with script and arguments from command line

[http://tolstoy.newcastle.edu.au/R/devel/05/09/2430.html:]
R --slave --vanilla --args "c(5,5)" "c(.5,.5)" < RScript.R
Args <- commandArgs()

Loading data

loc=paste("C:\\Documents and Settings\\",Sys.info()["login"],"\\My Documents\\R\\",sep="")
tft=read.csv(paste(loc,"tft.csv",sep=""))
tft=read.csv(file.choose())
tft=read.csv("http://...")
tft=data.frame(read.delim(loc.Thy, header=TRUE, sep="\t", quote="\"'"))
load(url(url.to.zip.file)) # e.g. http://www.test.com/data.zip

Vectors with: x=c(5,3,4,4,5)

c(1:3+1) # 2 3 4
seq(4,6); seq(1,6,2);seq(1,6,length=3) # [1] 4 5 6 [1] 1 3 5 [1] 1 3.5 6
rep(c(3,4),2);rep(c(3,4),each=2) # [1] 3 4 3 4 [1] 3 3 4 4
c(which.max(x),which.min(x)) # 1 2
match(4,x,nomatch=0) # 3
mean(x); range(x)==c(min(x),max(x)) # TRUE TRUE
c(sum(x), prod(x)) # 21 1200
median(c(x,NA),na.rm=TRUE) # 4
quantile(x) # ("0%") 3 ("25%") 4 ("50%") 4 ("75%") 5 ("100%") 5
quantile(x,probs=c(.05,.1)) # 3.2 3.4
c(var(x),sd(x)) # 0.70 0.84
rev(sort(x)) # 5 5 4 4 3
cut(x,2,labels=c("L","H")) # H L L L H
table(c(5,5,3,4,4)) # 1 2 2 (dimnames "3" "4" "5")
table(x<5) # ("FALSE") 2 ("TRUE") 3
prop.table(table(x<5)) # ("FALSE") 0.4 ("TRUE") 0.6
sample(x,4,replace=TRUE) # 4 4 3 5
diff(x) # -2 1 0 1
rank(x) # 4.5 1.0 2.5 2.5 4.5
cumsum(x) # 5  8 12 16 21
cummin(x) # 5 3 3 3 3
cummax(x) # 5 5 5 5 5
as.vector(round(scale(x),1)) # 1.0 -1.4 -0.2 -0.2  1.0

Data.frames & Tables: df=data.frame(h=c(1,2,4),w=c(3,6,9),s=c("M","F","F")) sec=data.frame(h=c(1,4,1,1,2,1,4,2))

Lookup s-value as per allocation in df, and write into sec$s:
sec$s=df$s[match(sec$h,df$h)] # c("M","F","M","M","F","M","F","F")
by(df[,1],df$s,sum) # ("F":) 6 ("M":) 1

tapply(df$h,df$s,mean) # ("F") 3 ("M") 1
tapply(df$h,df$s,mean)[df$s] # ("M") 1 ("F") 3 ("F") 3
ave(df$h,df$s,FUN=min) # 1 2 2
merge(df,dg=data.frame(x=c(1,2),s=c("F","F")),all.x=TRUE)
# df$h=c(1,2,4)        # dg$x=c(1,2)      # merge(df,dg,all.x=TRUE)$s=c("F","F","F","M","M")
# df$w=c(3,6,9)        # dg$s=c("F","F")  # merge
(df,dg,all.x=TRUE)$h=c(2,2,4,4,1)
# df$s=c("M","F","F")  #                  # merge
(df,dg,all.x=TRUE)$w=c(6,6,9,9,3)
#                                         # merge
(df,dg,all.x=TRUE)$x=c(1,2,1,2,NA)
t<-table(df$h<2,df$w>3) # t[1,2]: count of all df-rows with (NOT h<2) AND (w>3)
# -> column1: "NOT w>3" | column2: "w>3" || row1: "NOT h<2" | row2: "h<2"

Sets

union(c(1,2),c(2,3)) # 1 2 3
intersect(c(1,2),c(2,3)) # 2
setdiff(c(1,2),c(2,3)) # 1
setequal(c(1,2),c(2,3)) # FALSE
is.element(1,c(1,2)) # TRUE
1 %in% c(1,2) # TRUE

Matrices

m=matrix(c(1,2,4,3),nrow=2)
m;         m %*% c(1,2); t(m)
# 1 2      9             1 4
# 4 3      8             2 3
diag(m) # 1 3
t(solve(m,c(1,2))) # 1   0
rowSums(m) # 5 5    (old: rowsum(x), colsum(x))
colMeans(m) # 1.5 3.5

Ref cards:  

INSTALL package1: install package1
m1[,2]: column 2 of matrix m1 m1[,2:5] or m1[,c(2,3,4,5)]: columns 2–5
m1$a1: variable a1 in data frame m1
NA: missing data
is.na: true if data missing
library(mva): load (e.g.) the mva package
.packages(all.available=TRUE): names of currently attached (locally available) packages

Help

help(package=mva): help with (e.g.) package mva
apropos("topic1") and help.search("topic1"): commands relevant to topic1
example(command1): examples of command1

Input and output

source("file1"): run the commands in file1.
read.table("file1"): read in data from file1
data.entry(): spreadsheet
scan(x1): read a vector x1
download.file("url1"): from internet url.show("url1"),
read.table.url("url1"): remote input
sink("file1"): output to file1, until sink()
write(object1, "file1"): writes object1 to file1 
write.csv(df,paste(loc,"df.csv",sep=""),row.names=FALSE)
write.table(dataframe1,"file1"): writes a table
loc=paste("C:\\Documents and Settings\\",Sys.info()["login"],"\\My Documents\\R\\",sep="")
normalizePath(R.home(component="home")): directory of the R installation (in readable format)
win.print(df)

Managing variables and objects

attach(x1); detach(x1): put|remove x1 in search path
ls(); ls.str(); ls(pattern="^r.*"): lists [str] all the active objects [according to pattern].
str(object1): print useful information about object1
getwd(); setwd(dir): gets|sets working directory
rm(object1): remove object1
dim(matrix1): dimensions of matrix1
dimnames(x1): names of dimensions of x1
length(vector1): length of vector1
1:3=c(1,2,3)
rep(x1,n1): repeats the vector x1 n1 times
cbind(a1,b1,c1); rbind(a1,b1,c1): binds columns|rows into a matrix
merge(df1,df2): merge data frames
matrix(vector1,r1,c1): make vector1 into a matrix with r1 rows and c1 columns
data.frame(v1,v2): make a data frame from vectors v1 and v2
as.factor(), as.matrix(), as.vector(): conversion
is.factor(), is.matrix(), is.vector(): what it is
t(): switch rows and columns ['transpose']
which(x1==a1): returns indices of x1 where x1==a1

Control flow

for (i1 in vector1): repeat what follows
if (condition1) ...else ...: conditional

Arithmetic / Logic

%*%: matrix multiplication
%/%, ^, %%, sqrt(): integer division, power, modulus, square root
c(0,1) & c(1,1) # FALSE TRUE [i.e. element-wise]
c(0,1) && c(1,1) # FALSE [only first element]

Regular Expressions

t=tft$t[grep("<",tft$t)] # "<0.003"
gsub("[[:space:][:punct:]]","|","a and b.") # "a|and|b|"
spl<-strsplit(paste(tolower(xm.ti),collapse=" "),split=c("(--|[[:space:][:digit:]:.,;/()\"])"))
[:alnum:] Alphanumeric chars: [:alpha:] and [:digit:].
[:alpha:] Alphabetic chars: [:lower:] and [:upper:].
[:blank:] Blank chars: space and tab.
[:cntrl:] Control chars. In ASCII, these characters have octal codes 000 through 037, and 177 (DEL). In another character set, these are the equivalent characters, if any.
[:digit:] Digits: 0 1 2 3 4 5 6 7 8 9.
[:graph:] Graphical chars: [:alnum:] and [:punct:].
[:lower:] Lower-case letters in the current locale.
[:print:] Printable characters: [:alnum:], [:punct:] and space.
[:punct:] Punctuation chars: ! " # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \ ] ^ _ ` { | } ~.
[:space:] Space chars: tab, newline, vertical tab, form feed, carriage return, and space. [:upper:] Upper-case letters in the current locale.
[:xdigit:] Hexadecimal digits: 0 1 2 3 4 5 6 7 8 9 A B C D E F a b c d e f. 

Statistics

max(), min(), mean(), median(), sum(), var(): as named
summary(data.frame): prints statistics rank(), sort() rank and sort
ave(x1,y1): averages of x1 grouped by factor y1
by(): apply function to data frame by factor
apply(x1,n1,function1): apply function1 (e.g. mean) to x by rows (n1=1) or columns (n2=2)
tapply(x1,list1,function1): apply function to x1 by list1
table(): make a table
tabulate(): tabulate a vector
t=table(sel$trab < 1.5,sel$z.min > -7);t;summary(t)
fisher.test(t) # OR = 13.1 (2.7-90.0) p=0.00020

Basic statistical analysis

aov(), anova(), lm(), glm(): (generalized) linear models, anova
t.test(): t test
prop.test(), binom.test(): sign test
chisq.test(x1): chi-square test on matrix x1
fisher.test(): Fisher exact test
cor(a): show correlations
cor.test(a,b): test correlation
friedman.test(): Friedman test

GAM

library(gam)
summary(g<-gam(immu~s(z.min,4),sel,family="binomial"));
plot(g$data$z.min,g$fitted,ylim=c(0,1),cex=.5,type="p",
     xlab="z.min",ylab="predicted TRAB positivity",font.lab=2);grid()
lines(g$data$z.min[order(g$data$z.min)],
     g$fitted[order(g$data$z.min)],lty=2)
l=glm(immu~z.min,sel,family="binomial")
lines(l$data$z.min[order(l$data$z.min)],
     l$fitted[order(l$data$z.min)],lwd=3)
rug(l$data$z.min,ticksize=0.02)

Graphics [http]

plot(), barplot(), boxplot(), stem(), hist(): basic plots
matplot(): matrix plot
pairs(matrix): scatterplots
coplot(): conditional plot
stripplot(): strip plot
qqplot(): quantile-quantile plot
qqnorm(), qqline(): fit normal distribution
b<-barplot(rev(f[1:(n<-30)]),horiz=T,col="pink",border=NA,names.arg="")
text(0.2,b,rev(names(f[1:n])),adj=c(0,.5),cex=.8,font=2)
plot(z.min~log(trab),sel,type=c("p"));grid()
loe=loess(z.min~log(trab),sel,span=.75)
lines(loe$x[order(loe$x)],loe$fitted[order(loe$x)],lwd=2)
par()usr # plotting region =c(xmin,xmax,ymin,ymax) up to axes
windows(5,5);plot.new()
stripchart(list(
 round(4*(s.false<-sel$z.min[sel$immu==FALSE]))/4,
 round(4*(s.true<-sel$z.min[sel$immu==TRUE]))/4),
 pch=20,cex=1.5,
 method="stack",col=3:2,
 ylim=c(.5,2.5),
 group.names=c("TRAB-","TRAB+"))
lines(rep(median(s.true),2),c(1.7,1.9),lwd=3,col=2)
lines(rep(median(s.false),2),c(0.7,.9),lwd=3,col=3)

image, contour, kde2d, chull, persp

k=100
x=seq(5,30,length=k);x
y=exp(seq(log(0.2),log(10),length=k));y
z=matrix(rep(NA,k*k),ncol=k,nrow=k)
z=matrix(fx.gh.fail(fx.z(x[col(z)],y[row(z)])),ncol=k,nrow=k,byrow=T);z
image(x,y,z,add=T,col=rev(heat.colors(100))) # similar to the following code:
      #for(i in 1:(k-1)){for(j in 1:(k-1)){
      #    polygon(c(x[i],x[i],x[i+1],x[i+1]),c(y[j],y[j+1],y[j+1],y[j]),
      #    col=rev(heat.colors(100))[1+99*z[i,j]],border=NA)}}
#--------------------------
smp=tft[,c("f","t")]
xy=smp[chull(smp),]  # convex hull
polygon(xy,col=rgb(.5,1,1,alpha=.5),lwd=2)
#---------------------------
library(MASS)
z=kde2d(tft$f,tft$t,n=10)
image(z,col=gray(30:0/30),add=TRUE) #topo.colors(30) terrain.colors(20) heat.colors(10)
contour(z,nlevels=8,col="blue",drawlabels=FALSE,add=TRUE,lwd=2)
persp(z, phi=60, theta=0, xlab="fT4", ylab="log(TSH)", log="y",zlab="density",
    col="white", shade=.2, border=NA, main="Perspective plot")

GhostScript [src]

Sys.getenv("R_GSCMD")
Sys.setenv(R_GSCMD=paste(normalizePath(getwd()),"\\gs\\gs7.00\\bin\\gswin32c.exe",sep=""))
dev2bitmap("test.png",type="png256",height=1600,width=1600,units="px",res=300)
     #  "png16m", "pdfwrite", "jpeg"
Sys.unsetenv("R_GSCMD")

GrDevices

encoding="ISOLatin1"
pdf("characters.pdf",encoding=encoding)
par(pty="s")
plot(c(0,16), c(0,16), type="n", xlab="", ylab="", xaxs="i", yaxs="i")
grid(16, 16, lty=1)
for(i in 0:15) {for(j in 0:15){points(i+.5, j+.5, pch=i*16+j)}}
dev.off()

Conversion

as.numeric(as.character(tft$tsh))
format(as.Date("2-Apr-2007","%d-%b-%Y"),"%m/%y") # "04/07"
format(Sys.Date(),"%d-%m[%b/%B]-%Y[/%y]")
     # "20-02[/Feb/February]-2008[/08]"
format(Sys.time(),"%H.%M:%S [%I %p] day:%w/7=%j/365; week=%W/52")
     # "11.45:21 [11 AM] day:3/7=051/365; week=07/52"
as.POSIXct(strptime(paste("12-Feb-07","16:30"),c("%d-%b-%y %H:%M")))
as.POSIXct("2007-02-07 16:30")

Lme

library(nlme)
l1=lme(fixed=lt~f,random=~1|pmi,data=tft)
summary(l1)$tTable[,"p-value"]
beta=l1$coef$fixed["f"]
tft$tshi=tft$lt-beta*tft$f
hist(scale(tft$tshi))

Lattice

xyplot(z~date|pmi,data=sel,type='o',cex=0.5,col=1,lwd=2,xlab="Date", ylab="z")
xyplot(z~date|pmi,data=sel,cex=0.5,col=1,
 panel=function(...){
 panel.grid(h=-1,v=-1)
 panel.abline(h=c(-2,0,2))
 panel.xyplot(...,type="o")
 panel.loess(...)
 })
xyplot(z~date|pmi,data=sel,cex=0.5,main="title",
 panel=function(x,y,...){
 panel.grid(h=-1,v=-1)
 panel.abline(h=c(-2,0,2))
 panel.xyplot(x,y,type=c("o","smooth"),col=1)
 panel.text(mean(x),5,round(mean(y),1),cex=.8,font=2)
 })
xyplot(z~date|pmi,sel,type=c('o','l'),
 grid::grid.prompt(TRUE),
 strip=strip.custom(par.strip.text=list(cex=.7,font=2)),
 panel=function(x,y,subscripts,...){
 panel.grid(...)
 panel.xyplot(x,y,type='o',col=1,cex=.5,lwd=2)
 panel.abline(h=c(-2,0,2))
 panel.text(as.Date("01-Jul-07","%d-%b-%y"),-10,
 trab$trab[match(sel$pmi[subscripts],trab$pmi)],cex=.8,font=2)
 panel.text(as.Date("01-Dec-05","%d-%b-%y"),5,
 format(trab$date[match(sel$pmi[subscripts],trab$pmi)],"%b-%y"),cex=.8,font=2)
 panel.abline(v=trab$date[match(sel$pmi[subscripts],trab$pmi)],lty=2)
 })
xyplot(z~date|pmi,sel,
  layout=c(2,2),
  grid::grid.prompt(TRUE),
  strip=strip.custom(
        bg="#eeeeee",
        fg="red",
        par.strip.text=list(cex=.7,font=2,col="blue")),
  #main="tft$n>5 & tft$z.min < -2 & tft$f.max <= 20",
  panel=function(x,y,subscripts,...){
    panel.abline(h=c(-2,0,2),lty=c(1,2,1))
    panel.rug(x)
    panel.text(
        current.panel.limits()$xlim[2],
        current.panel.limits()$ylim[2],
        panel.number(),adj=c(1,1),cex=1,col=2,font=2)
    panel.xyplot(x,y,type="o",lwd=2)
    for(i in 1:length(x)){
    panel.text(x[i],y[i],i,adj=c(.5,.5),cex=.8,font=2);
    panel.text(
        current.panel.limits()$xlim[1],
        current.panel.limits()$ylim[2],
        paste(i,"=",sel$ft4[subscripts[i]],"/",
            sel$tsh[subscripts[i]],":",
            sel$src[subscripts[i]]),adj=c(0,i*1.1),cex=.7)}
})
windows(5,5), #(10,6), #
xyplot(lp~cc|pmi,data=pth[pth$n>3,],
  layout=c(5,4), # (1,1), #
  grid::grid.prompt(TRUE),
  main="log(PTH) vs adj Calcium",
  panel=function(x,y,subscripts,...){
    panel.xyplot(x,y,type="o",col=1,pch=20,cex=1.2)
    panel.abline(h=log(pth.ref),v=ca.ref,lty=2)
    lrect(ca.lo,log(pth.lo),ca.hi,log(pth.hi),lwd=1)
    prob<-round(summary(l<-lm(y~x))$coef[2,4],2)
    panel.text(
       current.panel.limits()$xlim[1],
       current.panel.limits()$ylim[1],
       prob,adj=c(-0.1,-0.1),font=2)
    panel.text(
       current.panel.limits()$xlim[2],
       current.panel.limits()$ylim[2],
       panel.number(),adj=c(1,1),cex=1,col=2,font=2)
    if(prob<0.1) panel.lmline(x,y,lwd=2,...)
})

Distributions

d=probability {mass|density} fx for {discrete|continuous} distributions
e.g. dbinom(3,5,0.5) p(k=3) on n=5 coin flips, dnorm(3,0,1): p(x=3) for N(0,1)
p=cumulative distribution function F(x)=P(X<=x)
q=quantiles; e.g. qnorm(p=0.0025) # -1.96 with P(X<=-1.96)=0.025
r=random numbers from the given distribution.
norm[al], unif[orm], exp[onential], binom[ial], pois[son], gamma, weibull, t, F, chisq[uared]
dunif(x,min=0,max=1,log=FALSE)
punif(q,min=0,max=1,lower.tail=TRUE,log.p=FALSE)
qunif(p,min=0,max=1,lower.tail=TRUE,log.p=FALSE)
runif(n,min=0,max=1)
plot(density(ref))
lines(x<-seq(-1,5,.1),dnorm(x,mean(ref),sd(ref)),col=2,lwd=3,lty=2)

ROC

sel=tft.trab[!duplicated(tft.trab$pmi),]
sel=sel[order(sel$z.min),]
n=length(sel[[1]]);n
true=length(sel$pmi[sel$immu==TRUE]);pos
false=length(sel$pmi[sel$immu==FALSE]);neg
for(i in 1:n){
  t=table(sel$z.min<sel$z.min[i],sel$immu==TRUE)
  sel$tp[i]=ifelse(length(t)>2,t[2,2],NA)
  sel$tn[i]=t[1,1]
  sel$fp[i]=ifelse(length(t)>2,t[2,1],NA)
  sel$fn[i]=t[1,2]
  sel$pos[i]=margin.table(t,1)["TRUE"]
  sel$neg[i]=margin.table(t,1)["FALSE"]
  sel$odds[i]=ifelse(length(t)>2,fisher.test(t)$estimate,NA)
  sel$odds.ll[i]=ifelse(length(t)>2,fisher.test(t)$conf.int[1],NA)
  sel$odds.ul[i]=ifelse(length(t)>2,fisher.test(t)$conf.int[2],NA)
}
sel$sens=sel$tp/true
sel$spec=sel$tn/false
sel$ppv=sel$tp/sel$pos
sel$npv=sel$tn/sel$neg
sel$fpr=1-sel$spec
sel$accu=(sel$tp+sel$tn)/n
#----------ROC-------------------------------------------
sel$sens[1]=0
plot(sens~fpr,sel,xlim=0:1,ylim=0:1,type='o',lwd=2,pch=20)
abline(h=0:1,v=0:1,lty=2)
abline(0,1,lty=3)
for(i in 1:length(sel$sens)){
  text(sel$fpr[i],sel$sens[i],round(sel$z.min[i],1),
  cex=0.7,adj=c(0,1))
}
sel$delta.fpr=(c(diff(sel$fpr),1-max(sel$fpr)))
sel$increm=sel$sens*sel$delta.fpr
sum(sel$increm)
text(.5,.5,paste(round(100*sum(sel$increm),1),"%",sep=""),adj=c(0,1),font=2,cex=2)
#--------------------------------------------------------

Miscellaneous

plot(NA,NA,xlim=c(-100,100),ylim=c(-100,100))
abline(h=0,v=0)
for(i in 1:100){
     x=cumsum(rnorm(N<-1000))
     y=cumsum(rnorm(N))
     clr=rainbow(10)[i]
     lines(x,y,type = "l", lwd = 1,col=clr)
     lines(-x,y,type = "l", lwd = 1,col=clr)
     lines(-x,-y,type = "l", lwd = 1,col=clr)
     lines(x,-y,type = "l", lwd = 1,col=clr)
     lines(y,x,type = "l", lwd = 1,col=clr)
     lines(y,-x,type = "l", lwd = 1,col=clr)
     lines(-y,-x,type = "l", lwd = 1,col=clr)
     lines(-y,x,type = "l", lwd = 1,col=clr)
     Sys.sleep(.1)
}

Regular Expression

st.gh[which(st.gh$z.mean> -2),grep("bef",names(st.gh))]

#-------------------------------------------------------
plot(NA,NA,xlim=lim<-c(-(w<-200),w),ylim=lim,
 axes=F,xlab="",ylab="")
for(j in 1:(N<-20)){
x=c(0,0)
clr=rainbow(N)[sample(1:N,1)]
mtext("+",3,1,adj=(j-1)/N,cex=.5,col=clr)
for(i in 1:1000){
  d=rt(2,2) # rt(n=2,df=2)
  lines(c(x[1],x[1]+d[1]),c(x[2],x[2]+d[2]),type = "l", lwd = 1,col=clr)
  x=x+d
  Sys.sleep(.005)
}
polygon(c(-w,-w,w,w),c(-w,w,w,-w),
 col=rgb(1,1,1,.1),border=NA)
}
#-----------------------------------------------------------

XML

match("XML",.packages(all=T),nomatch=0)>0
install.packages("XML", repos = "http://www.omegahat.org/R")
library(XML)
xm<-xmlTreeParse(readLines(loc),useInternalNodes=TRUE)
xm.ab<-sapply(getNodeSet(xm,"//Abstract/AbstractText"),xmlValue)
free(xm)
Subpages (1): Examples
Comments