# Getting started to Working with R #=========================R- Training 20150205========================= #programmer: Antony Karanja: andungu@cgiar.org #============== Preamble======================================= #Object Oriented-Assigning data objects 3+5 results<-3+5 results x<-3+5 x x<-c(1.5,3,8,12,0,0.8) x X<-c("A","B","C","D") X # Basic Commands x <- c(1,3,2,5) x x = c(1,6,2) x y = c(1,4,3) length(x) length(y) x+y ls() #rm(x,y) #ls() #From a list to Data frame x2=as.data.frame(x) class(x) class(X) #Case sensitive length(x) length(X) (x+1.5) 1:6 rep(c(1,2),8) #replicate x>7;x[3];x[1:3] #x[c[1:3]] x[2]<-4.5 x[7] x[7]<-28 x[x>7] (y<-x[x>7]) x[2]<-NA x[10]<-20 x is.na(x) !is.na(x) x[is.na(x)] z<-which(!is.na(x)) z #DO YOU KNOW ANY R SHORT CUTS? #(CTr+R means you want to run the cursor line) #CTr+L, to clear the console #Highlighting the specific line to blacketing, quotes.. #-------------------------------------------Matrices------------- # #================================================================= m<-matrix(nrow=2,ncol=3,data=3:8) m dim(m) m2=as.data.frame(m) is.data.frame(m2) m3=as.matrix(m2) m[2,] #second row m[,3] m[,c(1,3)] m[1,1]<-20 m args(matrix) #check on the argurment and options for the matrix cmd m<-matrix(nrow=3,ncol=3,data=3:8,byrow=T) m #Operations on a matrix ########################################## #THINGS TO NOTE ON MATRIX OPERATION: WE NOTE THE FOLLOWING ?matrix # +, -, *, / # +, -, %*%, solve(): solve Ax=b m t(m); #trmnspose m det(m); #determinant of m diag(m); #diagonal of m qr(m); # QR decomposition of a matrix. ?qr eigen(m) #returns eigenvalues and eigenvectors eigen(m)$vectors svd(m) #singular value decomposition minv=solve(m) #matrix example x=matrix(data=c(1,2,3,4), nrow=2, ncol=2) x x=matrix(c(1,2,3,4),2,2) matrix(c(1,2,3,4),2,2,byrow=TRUE) sqrt(x) x^2 #Further Example A=matrix(1:16,4,4) A A[2,3] A[c(1,3),c(2,4)] A[1:3,2:4] A[1:2,] A[,1:2] A[1,] A[-c(1,3),] A[-c(1,3),-c(1,3,4)] dim(A) #generate data from distributions x=rnorm(50) y=x+rnorm(50,mean=50,sd=.1) cor(x,y) set.seed(1303) rnorm(50) set.seed(3) y=rnorm(100) mean(y) var(y) sqrt(var(y)) sd(y) x<-rnorm(n=8,mean=100,sd=10) args(rnorm) x[z]<-c(1,2,3) #====================================Data Frames=============================== #.......................Data Entry....................................... #LISTS=VECTOR DAY=c(1,2,4,5,6,7,4,6,7,3,65,5) # HOW DO WE TURN FROM LIST TO DATA FRAME DAY1<-as.data.frame(DAY) #or DAY2<-data.frame(DAY) #or DAY3=data.frame(DAY=c(1,2,4,5,6,7,4,6,7,3,65,5)) #HOW DO WE COMBINE LISTS-VECTORS DAY4=cbind(DAY1,DAY2) DAY5=rbind(DAY1,DAY2) #TURNING A DF (DATA FRAMES) TO MATRICES AND BACK DAY6=as.matrix(DAY1) class(DAY6) DAY7=as.data.frame(DAY6) class(DAY7) #CONVERTING LISTS TO DF d<-data.frame(day=c("Mon","Tue","Wed","Thu"),tot=c(3,2,5,0),wet=c(T,F,F,T)) d names(d);colnames(d) d$day d[,3] mylist<-list(vect=x,mat=m,df=d) names(mylist) length(mylist) mylist[[1]] mylist$mat #or mylist[[2]] mylist[[2]][1,3] #================================================================================== # # DATA IMPORTATION AND SMART SCRITPING FOR REPLICATION # #================================================================================== #Introdution to script #Working folders setwd("/Users/Antony/Documents/Antony/RMG stuffs/RTraining_ICRAF") getwd() #BASE #LIBRARIES #Require installation first (only once and on internet);install.packages("package"), then call function; library("package") #library (foreign)--import/export into (.sav, .dta, .mdb etc); library() #Here am generating random data from normal distribution according to the parameters Example1<-data.frame(var1=c(rnorm(n=40,mean=100,sd=10)),var2=c(rnorm(n=40,mean=100,sd=10)),var3=c(rnorm(n=40,mean=100,sd=10)),var4=c(rnorm(n=40,mean=100,sd=10))) #Create Stata file write.dta(dataframe=Example1,file="Example1.dta",version=10) #Create SPSS file write.foreign(Example1, "Example1.txt", "Example1.sps", package="SPSS") #Create Text file write.table(Example1, "Example1.txt", sep="\t") #Create an xlsx file : library() write.xlsx(Example1,file="Example1.xlsx",sheetName="Data") #Create a csv files write.csv(Example1,file="Example1.csv") #---------------------------------------------------------------- ####################### IMPORTING DATA########################### #---------------------------------------------------------------- #read.csv, read.table-----Base data importation function Data1_Example1<-read.csv(file="Example1.csv",header=T) Data2_Example1<-read.table(file="Example1.txt",header=T,sep="\t") #read.xlsx, read.dta ,read.spss --from foreign , xlsx,.. library Data3_Example1<-read.dta(file="Example1.dta") #Now load/read data of format of choice #.................... class(Data1_Example1) dim(Data1_Example1) #no.obs (no rows) and no variables (col) #Data1_Example1[r,c] #replace #Data Manupilation- Beginners #---------------------------------------------------------------- ####################### START DATA STATISTICS########################### #---------------------------------------------------------------- mean(Example1$var2);mean(Example1$var3);mean(Example1$var3) sapply(Example1,mean) #Add variables Example1$Group<-rep(c(1,2),20) #Deleting row.names(Example1)<-NULL #Example1$var<-NULL rm(list=ls()) #Remove all the data objects