Links
Contents - 1 Links
- 2 Installation, Startup
- 3 Running with script and arguments from command line
- 4 Loading data
- 4.1 Vectors with: x=c(5,3,4,4,5)
- 4.2 Data.frames & Tables: df=data.frame(h=c(1,2,4),w=c(3,6,9),s=c("M","F","F")) sec=data.frame(h=c(1,4,1,1,2,1,4,2))
- 4.3 Sets
- 4.4 Matrices
- 4.5 Ref cards:
- 4.6 Help
- 4.7 Input and output
- 4.8 Managing variables and objects
- 4.9 Control flow
- 4.10 Arithmetic / Logic
- 4.11 Regular Expressions
- 4.12 Statistics
- 4.13 Basic statistical analysis
- 4.14 GAM
- 4.15 Graphics [http]
- 4.16 image, contour, kde2d, chull, persp
- 4.17 GhostScript [src]
- 4.18 GrDevices
- 4.19 Conversion
- 4.20 Lme
- 4.21 Lattice
- 5 Distributions
- 6 ROC
- 7 Miscellaneous
- 8 Regular Expression
- 9 XML
Installation, Startup
"C:\Program Files\R\R-2.6.1\bin\Rgui.exe" "--internet2" (uses IE internals)
Running with script and arguments from command line
Loading data
loc=paste("C:\\Documents and Settings\\",Sys.info()["login"],"\\My Documents\\R\\",sep="")
tft=read.csv(paste(loc,"tft.csv",sep=""))
tft=read.csv(file.choose())
tft=read.csv("http://...")
tft=data.frame(read.delim(loc.Thy, header=TRUE, sep="\t", quote="\"'")) load(url(url.to.zip.file)) # e.g. http://www.test.com/data.zip
Vectors with: x=c(5,3,4,4,5)
c(1:3+1) # 2 3 4
seq(4,6); seq(1,6,2);seq(1,6,length=3) # [1] 4 5 6 [1] 1 3 5 [1] 1 3.5 6
rep(c(3,4),2);rep(c(3,4),each=2) # [1] 3 4 3 4 [1] 3 3 4 4
c(which.max(x),which.min(x)) # 1 2
match(4,x,nomatch=0) # 3
mean(x); range(x)==c(min(x),max(x)) # TRUE TRUE
c(sum(x), prod(x)) # 21 1200
median(c(x,NA),na.rm=TRUE) # 4
quantile(x) # ("0%") 3 ("25%") 4 ("50%") 4 ("75%") 5 ("100%") 5
quantile(x,probs=c(.05,.1)) # 3.2 3.4
c(var(x),sd(x)) # 0.70 0.84
rev(sort(x)) # 5 5 4 4 3
cut(x,2,labels=c("L","H")) # H L L L H
table(c(5,5,3,4,4)) # 1 2 2 (dimnames "3" "4" "5")
table(x<5) # ("FALSE") 2 ("TRUE") 3
prop.table(table(x<5)) # ("FALSE") 0.4 ("TRUE") 0.6
sample(x,4,replace=TRUE) # 4 4 3 5
diff(x) # -2 1 0 1
rank(x) # 4.5 1.0 2.5 2.5 4.5
cumsum(x) # 5 8 12 16 21
cummin(x) # 5 3 3 3 3
cummax(x) # 5 5 5 5 5
as.vector(round(scale(x),1)) # 1.0 -1.4 -0.2 -0.2 1.0
Data.frames & Tables: df=data.frame(h=c(1,2,4),w=c(3,6,9),s=c("M","F","F")) sec=data.frame(h=c(1,4,1,1,2,1,4,2))
Lookup s-value as per allocation in df, and write into sec$s: sec$s=df$s[match(sec$h,df$h)] # c("M","F","M","M","F","M","F","F") by(df[,1],df$s,sum) # ("F":) 6 ("M":) 1 tapply(df$h,df$s,mean) # ("F") 3 ("M") 1 tapply(df$h,df$s,mean)[df$s] # ("M") 1 ("F") 3 ("F") 3 ave(df$h,df$s,FUN=min) # 1 2 2 merge(df,dg=data.frame(x=c(1,2),s=c("F","F")),all.x=TRUE) # df$h=c(1,2,4) # dg$x=c(1,2) # merge(df,dg,all.x=TRUE)$s=c("F","F","F","M","M") # df$w=c(3,6,9) # dg$s=c("F","F") # merge(df,dg,all.x=TRUE)$h=c(2,2,4,4,1) # df$s=c("M","F","F") # # merge(df,dg,all.x=TRUE)$w=c(6,6,9,9,3) # # merge(df,dg,all.x=TRUE)$x=c(1,2,1,2,NA) t<-table(df$h<2,df$w>3) # t[1,2]: count of all df-rows with (NOT h<2) AND (w>3) # -> column1: "NOT w>3" | column2: "w>3" || row1: "NOT h<2" | row2: "h<2"
Sets
union(c(1,2),c(2,3)) # 1 2 3
intersect(c(1,2),c(2,3)) # 2
setdiff(c(1,2),c(2,3)) # 1
setequal(c(1,2),c(2,3)) # FALSE
is.element(1,c(1,2)) # TRUE
1 %in% c(1,2) # TRUE
Matrices
m=matrix(c(1,2,4,3),nrow=2)
m; m %*% c(1,2); t(m)
# 1 2 9 1 4
# 4 3 8 2 3
diag(m) # 1 3
t(solve(m,c(1,2))) # 1 0
rowSums(m) # 5 5 (old: rowsum(x), colsum(x))
colMeans(m) # 1.5 3.5
Ref cards:
INSTALL package1: install package1
m1[,2]: column 2 of matrix m1 m1[,2:5] or m1[,c(2,3,4,5)]: columns 2–5
m1$a1: variable a1 in data frame m1
NA: missing data
is.na: true if data missing
library(mva): load (e.g.) the mva package
.packages(all.available=TRUE): names of currently attached (locally available) packages
Help
help(package=mva): help with (e.g.) package mva
apropos("topic1") and help.search("topic1"): commands relevant to topic1
example(command1): examples of command1
Input and output
source("file1"): run the commands in file1.
read.table("file1"): read in data from file1
data.entry(): spreadsheet
scan(x1): read a vector x1
download.file("url1"): from internet url.show("url1"),
read.table.url("url1"): remote input
sink("file1"): output to file1, until sink()
write(object1, "file1"): writes object1 to file1
write.csv(df,paste(loc,"df.csv",sep=""),row.names=FALSE)
write.table(dataframe1,"file1"): writes a table
loc=paste("C:\\Documents and Settings\\",Sys.info()["login"],"\\My Documents\\R\\",sep="")
normalizePath(R.home(component="home")): directory of the R installation (in readable format)
win.print(df)
Managing variables and objects
attach(x1); detach(x1): put|remove x1 in search path
ls(); ls.str(); ls(pattern="^r.*"): lists [str] all the active objects [according to pattern].
str(object1): print useful information about object1
getwd(); setwd(dir): gets|sets working directory
rm(object1): remove object1
dim(matrix1): dimensions of matrix1
dimnames(x1): names of dimensions of x1
length(vector1): length of vector1
1:3=c(1,2,3)
rep(x1,n1): repeats the vector x1 n1 times
cbind(a1,b1,c1); rbind(a1,b1,c1): binds columns|rows into a matrix
merge(df1,df2): merge data frames
matrix(vector1,r1,c1): make vector1 into a matrix with r1 rows and c1 columns
data.frame(v1,v2): make a data frame from vectors v1 and v2
as.factor(), as.matrix(), as.vector(): conversion
is.factor(), is.matrix(), is.vector(): what it is
t(): switch rows and columns ['transpose']
which(x1==a1): returns indices of x1 where x1==a1
Control flow
for (i1 in vector1): repeat what follows
if (condition1) ...else ...: conditional
Arithmetic / Logic
%*%: matrix multiplication
%/%, ^, %%, sqrt(): integer division, power, modulus, square root
c(0,1) & c(1,1) # FALSE TRUE [i.e. element-wise]
c(0,1) && c(1,1) # FALSE [only first element]
Regular Expressions
t=tft$t[grep("<",tft$t)] # "<0.003"
gsub("[[:space:][:punct:]]","|","a and b.") # "a|and|b|"
spl<-strsplit(paste(tolower(xm.ti),collapse=" "),split=c("(--|[[:space:][:digit:]:.,;/()\"])"))
[:alnum:] Alphanumeric chars: [:alpha:] and [:digit:].
[:alpha:] Alphabetic chars: [:lower:] and [:upper:].
[:blank:] Blank chars: space and tab.
[:cntrl:] Control chars. In ASCII, these characters have octal codes 000 through 037, and 177 (DEL). In another character set, these are the equivalent characters, if any.
[:digit:] Digits: 0 1 2 3 4 5 6 7 8 9.
[:graph:] Graphical chars: [:alnum:] and [:punct:].
[:lower:] Lower-case letters in the current locale.
[:print:] Printable characters: [:alnum:], [:punct:] and space.
[:punct:] Punctuation chars: ! " # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \ ] ^ _ ` { | } ~.
[:space:] Space chars: tab, newline, vertical tab, form feed, carriage return, and space. [:upper:] Upper-case letters in the current locale.
[:xdigit:] Hexadecimal digits: 0 1 2 3 4 5 6 7 8 9 A B C D E F a b c d e f.
Statistics
max(), min(), mean(), median(), sum(), var(): as named
summary(data.frame): prints statistics rank(), sort() rank and sort
ave(x1,y1): averages of x1 grouped by factor y1
by(): apply function to data frame by factor
apply(x1,n1,function1): apply function1 (e.g. mean) to x by rows (n1=1) or columns (n2=2)
tapply(x1,list1,function1): apply function to x1 by list1
table(): make a table
tabulate(): tabulate a vector
t=table(sel$trab < 1.5,sel$z.min > -7);t;summary(t)
fisher.test(t) # OR = 13.1 (2.7-90.0) p=0.00020
Basic statistical analysis
aov(), anova(), lm(), glm(): (generalized) linear models, anova
t.test(): t test
prop.test(), binom.test(): sign test
chisq.test(x1): chi-square test on matrix x1
fisher.test(): Fisher exact test
cor(a): show correlations
cor.test(a,b): test correlation
friedman.test(): Friedman test
GAM
library(gam)
summary(g<-gam(immu~s(z.min,4),sel,family="binomial"));
plot(g$data$z.min,g$fitted,ylim=c(0,1),cex=.5,type="p",
xlab="z.min",ylab="predicted TRAB positivity",font.lab=2);grid()
lines(g$data$z.min[order(g$data$z.min)],
g$fitted[order(g$data$z.min)],lty=2)
l=glm(immu~z.min,sel,family="binomial")
lines(l$data$z.min[order(l$data$z.min)],
l$fitted[order(l$data$z.min)],lwd=3)
rug(l$data$z.min,ticksize=0.02)
Graphics [http]
plot(), barplot(), boxplot(), stem(), hist(): basic plots
matplot(): matrix plot
pairs(matrix): scatterplots
coplot(): conditional plot
stripplot(): strip plot
qqplot(): quantile-quantile plot
qqnorm(), qqline(): fit normal distribution
b<-barplot(rev(f[1:(n<-30)]),horiz=T,col="pink",border=NA,names.arg="")
text(0.2,b,rev(names(f[1:n])),adj=c(0,.5),cex=.8,font=2)
plot(z.min~log(trab),sel,type=c("p"));grid()
loe=loess(z.min~log(trab),sel,span=.75)
lines(loe$x[order(loe$x)],loe$fitted[order(loe$x)],lwd=2)
par()usr # plotting region =c(xmin,xmax,ymin,ymax) up to axes
windows(5,5);plot.new()
stripchart(list(
round(4*(s.false<-sel$z.min[sel$immu==FALSE]))/4,
round(4*(s.true<-sel$z.min[sel$immu==TRUE]))/4),
pch=20,cex=1.5,
method="stack",col=3:2,
ylim=c(.5,2.5),
group.names=c("TRAB-","TRAB+"))
lines(rep(median(s.true),2),c(1.7,1.9),lwd=3,col=2)
lines(rep(median(s.false),2),c(0.7,.9),lwd=3,col=3)
image, contour, kde2d, chull, persp
k=100
x=seq(5,30,length=k);x
y=exp(seq(log(0.2),log(10),length=k));y
z=matrix(rep(NA,k*k),ncol=k,nrow=k)
z=matrix(fx.gh.fail(fx.z(x[col(z)],y[row(z)])),ncol=k,nrow=k,byrow=T);z
image(x,y,z,add=T,col=rev(heat.colors(100))) # similar to the following code:
#for(i in 1:(k-1)){for(j in 1:(k-1)){
# polygon(c(x[i],x[i],x[i+1],x[i+1]),c(y[j],y[j+1],y[j+1],y[j]),
# col=rev(heat.colors(100))[1+99*z[i,j]],border=NA)}}
#--------------------------
smp=tft[,c("f","t")]
xy=smp[chull(smp),] # convex hull
polygon(xy,col=rgb(.5,1,1,alpha=.5),lwd=2)
#---------------------------
library(MASS)
z=kde2d(tft$f,tft$t,n=10)
image(z,col=gray(30:0/30),add=TRUE) #topo.colors(30) terrain.colors(20) heat.colors(10)
contour(z,nlevels=8,col="blue",drawlabels=FALSE,add=TRUE,lwd=2)
persp(z, phi=60, theta=0, xlab="fT4", ylab="log(TSH)", log="y",zlab="density",
col="white", shade=.2, border=NA, main="Perspective plot")
GhostScript [src]
Sys.getenv("R_GSCMD")
Sys.setenv(R_GSCMD=paste(normalizePath(getwd()),"\\gs\\gs7.00\\bin\\gswin32c.exe",sep=""))
dev2bitmap("test.png",type="png256",height=1600,width=1600,units="px",res=300)
# "png16m", "pdfwrite", "jpeg"
Sys.unsetenv("R_GSCMD")
GrDevices
encoding="ISOLatin1"
pdf("characters.pdf",encoding=encoding)
par(pty="s")
plot(c(0,16), c(0,16), type="n", xlab="", ylab="", xaxs="i", yaxs="i")
grid(16, 16, lty=1)
for(i in 0:15) {for(j in 0:15){points(i+.5, j+.5, pch=i*16+j)}}
dev.off()
Conversion
as.numeric(as.character(tft$tsh))
format(as.Date("2-Apr-2007","%d-%b-%Y"),"%m/%y") # "04/07"
format(Sys.Date(),"%d-%m[%b/%B]-%Y[/%y]")
# "20-02[/Feb/February]-2008[/08]"
format(Sys.time(),"%H.%M:%S [%I %p] day:%w/7=%j/365; week=%W/52")
# "11.45:21 [11 AM] day:3/7=051/365; week=07/52"
as.POSIXct(strptime(paste("12-Feb-07","16:30"),c("%d-%b-%y %H:%M")))
as.POSIXct("2007-02-07 16:30")
Lme
library(nlme)
l1=lme(fixed=lt~f,random=~1|pmi,data=tft)
summary(l1)$tTable[,"p-value"]
beta=l1$coef$fixed["f"]
tft$tshi=tft$lt-beta*tft$f
hist(scale(tft$tshi))
Lattice
xyplot(z~date|pmi,data=sel,type='o',cex=0.5,col=1,lwd=2,xlab="Date", ylab="z")
xyplot(z~date|pmi,data=sel,cex=0.5,col=1,
panel=function(...){
panel.grid(h=-1,v=-1)
panel.abline(h=c(-2,0,2))
panel.xyplot(...,type="o")
panel.loess(...)
})
xyplot(z~date|pmi,data=sel,cex=0.5,main="title",
panel=function(x,y,...){
panel.grid(h=-1,v=-1)
panel.abline(h=c(-2,0,2))
panel.xyplot(x,y,type=c("o","smooth"),col=1)
panel.text(mean(x),5,round(mean(y),1),cex=.8,font=2)
})
xyplot(z~date|pmi,sel,type=c('o','l'),
grid::grid.prompt(TRUE),
strip=strip.custom(par.strip.text=list(cex=.7,font=2)),
panel=function(x,y,subscripts,...){
panel.grid(...)
panel.xyplot(x,y,type='o',col=1,cex=.5,lwd=2)
panel.abline(h=c(-2,0,2))
panel.text(as.Date("01-Jul-07","%d-%b-%y"),-10,
trab$trab[match(sel$pmi[subscripts],trab$pmi)],cex=.8,font=2)
panel.text(as.Date("01-Dec-05","%d-%b-%y"),5,
format(trab$date[match(sel$pmi[subscripts],trab$pmi)],"%b-%y"),cex=.8,font=2)
panel.abline(v=trab$date[match(sel$pmi[subscripts],trab$pmi)],lty=2)
})
xyplot(z~date|pmi,sel,
layout=c(2,2),
grid::grid.prompt(TRUE),
strip=strip.custom(
bg="#eeeeee",
fg="red",
par.strip.text=list(cex=.7,font=2,col="blue")),
#main="tft$n>5 & tft$z.min < -2 & tft$f.max <= 20",
panel=function(x,y,subscripts,...){
panel.abline(h=c(-2,0,2),lty=c(1,2,1))
panel.rug(x)
panel.text(
current.panel.limits()$xlim[2],
current.panel.limits()$ylim[2],
panel.number(),adj=c(1,1),cex=1,col=2,font=2)
panel.xyplot(x,y,type="o",lwd=2)
for(i in 1:length(x)){
panel.text(x[i],y[i],i,adj=c(.5,.5),cex=.8,font=2);
panel.text(
current.panel.limits()$xlim[1],
current.panel.limits()$ylim[2],
paste(i,"=",sel$ft4[subscripts[i]],"/",
sel$tsh[subscripts[i]],":",
sel$src[subscripts[i]]),adj=c(0,i*1.1),cex=.7)}
})
windows(5,5), #(10,6), #
xyplot(lp~cc|pmi,data=pth[pth$n>3,],
layout=c(5,4), # (1,1), #
grid::grid.prompt(TRUE),
main="log(PTH) vs adj Calcium",
panel=function(x,y,subscripts,...){
panel.xyplot(x,y,type="o",col=1,pch=20,cex=1.2)
panel.abline(h=log(pth.ref),v=ca.ref,lty=2)
lrect(ca.lo,log(pth.lo),ca.hi,log(pth.hi),lwd=1)
prob<-round(summary(l<-lm(y~x))$coef[2,4],2)
panel.text(
current.panel.limits()$xlim[1],
current.panel.limits()$ylim[1],
prob,adj=c(-0.1,-0.1),font=2)
panel.text(
current.panel.limits()$xlim[2],
current.panel.limits()$ylim[2],
panel.number(),adj=c(1,1),cex=1,col=2,font=2)
if(prob<0.1) panel.lmline(x,y,lwd=2,...)
})
Distributions
d=probability {mass|density} fx for {discrete|continuous} distributions
e.g. dbinom(3,5,0.5) p(k=3) on n=5 coin flips, dnorm(3,0,1): p(x=3) for N(0,1)
p=cumulative distribution function F(x)=P(X<=x)
q=quantiles; e.g. qnorm(p=0.0025) # -1.96 with P(X<=-1.96)=0.025 r=random numbers from the given distribution.
norm[al], unif[orm], exp[onential], binom[ial], pois[son], gamma, weibull, t, F, chisq[uared] dunif(x,min=0,max=1,log=FALSE)punif(q,min=0,max=1,lower.tail=TRUE,log.p=FALSE) qunif(p,min=0,max=1,lower.tail=TRUE,log.p=FALSE) runif(n,min=0,max=1)
plot(density(ref))
lines(x<-seq(-1,5,.1),dnorm(x,mean(ref),sd(ref)),col=2,lwd=3,lty=2)
ROC
sel=tft.trab[!duplicated(tft.trab$pmi),]
sel=sel[order(sel$z.min),] n=length(sel[[1]]);n
true=length(sel$pmi[sel$immu==TRUE]);pos
false=length(sel$pmi[sel$immu==FALSE]);neg
for(i in 1:n){
t=table(sel$z.min<sel$z.min[i],sel$immu==TRUE)
sel$tp[i]=ifelse(length(t)>2,t[2,2],NA)
sel$tn[i]=t[1,1]
sel$fp[i]=ifelse(length(t)>2,t[2,1],NA)
sel$fn[i]=t[1,2]
sel$pos[i]=margin.table(t,1)["TRUE"]
sel$neg[i]=margin.table(t,1)["FALSE"]
sel$odds[i]=ifelse(length(t)>2,fisher.test(t)$estimate,NA)
sel$odds.ll[i]=ifelse(length(t)>2,fisher.test(t)$conf.int[1],NA)
sel$odds.ul[i]=ifelse(length(t)>2,fisher.test(t)$conf.int[2],NA)
}
sel$sens=sel$tp/true
sel$spec=sel$tn/false
sel$ppv=sel$tp/sel$pos
sel$npv=sel$tn/sel$neg
sel$fpr=1-sel$spec
sel$accu=(sel$tp+sel$tn)/n #----------ROC-------------------------------------------
sel$sens[1]=0
plot(sens~fpr,sel,xlim=0:1,ylim=0:1,type='o',lwd=2,pch=20)
abline(h=0:1,v=0:1,lty=2)
abline(0,1,lty=3)
for(i in 1:length(sel$sens)){
text(sel$fpr[i],sel$sens[i],round(sel$z.min[i],1),
cex=0.7,adj=c(0,1))
}
sel$delta.fpr=(c(diff(sel$fpr),1-max(sel$fpr)))
sel$increm=sel$sens*sel$delta.fpr
sum(sel$increm)
text(.5,.5,paste(round(100*sum(sel$increm),1),"%",sep=""),adj=c(0,1),font=2,cex=2)
#--------------------------------------------------------
Miscellaneous
plot(NA,NA,xlim=c(-100,100),ylim=c(-100,100))
abline(h=0,v=0)
for(i in 1:100){
x=cumsum(rnorm(N<-1000))
y=cumsum(rnorm(N))
clr=rainbow(10)[i]
lines(x,y,type = "l", lwd = 1,col=clr)
lines(-x,y,type = "l", lwd = 1,col=clr)
lines(-x,-y,type = "l", lwd = 1,col=clr)
lines(x,-y,type = "l", lwd = 1,col=clr)
lines(y,x,type = "l", lwd = 1,col=clr)
lines(y,-x,type = "l", lwd = 1,col=clr)
lines(-y,-x,type = "l", lwd = 1,col=clr)
lines(-y,x,type = "l", lwd = 1,col=clr)
Sys.sleep(.1)
}
Regular Expression
st.gh[which(st.gh$z.mean> -2),grep("bef",names(st.gh))]
#-------------------------------------------------------
plot(NA,NA,xlim=lim<-c(-(w<-200),w),ylim=lim,
axes=F,xlab="",ylab="")
for(j in 1:(N<-20)){
x=c(0,0)
clr=rainbow(N)[sample(1:N,1)]
mtext("+",3,1,adj=(j-1)/N,cex=.5,col=clr)
for(i in 1:1000){
d=rt(2,2) # rt(n=2,df=2)
lines(c(x[1],x[1]+d[1]),c(x[2],x[2]+d[2]),type = "l", lwd = 1,col=clr)
x=x+d
Sys.sleep(.005)
}
polygon(c(-w,-w,w,w),c(-w,w,w,-w),
col=rgb(1,1,1,.1),border=NA)
}
#-----------------------------------------------------------
XML
match("XML",.packages(all=T),nomatch=0)>0
library(XML) xm<-xmlTreeParse(readLines(loc),useInternalNodes=TRUE)
xm.ab<-sapply(getNodeSet(xm,"//Abstract/AbstractText"),xmlValue)
free(xm) |