If you remember, Tor introduced "dplyr" package back in seminar 7. This week, Anthony Ndungu (Research Method Group) continued with "dplyr" and some examples. You can see his presentation below
Source code for 9th seminar is shown below.
#Antony Ndungu, RMG
#R Seminar
#Using ddply
#......................................................................
setwd("/Users/Antony/Documents/Antony/RMG stuffs/RTraining_ICRAF/Data/")
############################################
# library(dplyr) # install.packages("dplyr")
############################################
library(dplyr)
library(ggplot2)
###################
# IMPORT datasets #
###################
tree<-read.csv(file="datavis.csv",header=T)
#-------------------------
# Inspect data with head()
#-------------------------
names(tree);colnames(tree)
head(tree)
tail(tree)
#-------------------------
# Inspect R object type
#-------------------------
class(tree)
#-------------------------
# Inspect Internal structure of R object type
#-------------------------
str(tree)
glimpse(tree)
#-------------------------
# Inspect data types
#-------------------------
sapply(tree,class) #-horizontal view
lapply(tree,class) #-Vertical view
##############################
# LOOK FOR DUPLICATE RECORDS #
##############################
duplicates<-tree[anyDuplicated(tree[c("Country","Site","PosTopoSeq")]),] #Base function
#------------------------------------------
# To Drop dups and have a non-duplicated DF
#------------------------------------------
##############################
# dplyr functions #
##############################
#1.0 #### filter - By and (use comma) or use |
table(tree$Country)
Nicaragua<-filter(tree, Country == "Nicaragua")
SA<-filter(tree, Country == "South Africa")
#1.1 #### slice
Nicaragua2<-slice(tree, 1:16)
#2.0 #### arrange
arrange(tree, Site,PosTopoSeq,VegStructure)
tree_arr<-arrange(tree, Site,PosTopoSeq,VegStructure)
tree_arr<-arrange(tree, desc(Site),PosTopoSeq,VegStructure)
# Note: Same as above-base
tree[order(tree$Site, tree$PosTopoSeq, tree$VegStructure), ]
tree[order(desc(tree$Site),tree$PosTopoSeq, tree$VegStructure), ]
#Compare time, which saves time
system.time(arrange(tree, Site,PosTopoSeq,VegStructure))
system.time(tree[order(tree$Site, tree$PosTopoSeq, tree$VegStructure), ])
#2.0 #### select
tree_select<-select(tree,Country,SEVEREERO,avSlope,avTreeDen,Carbon,pH,Clay)
tree_select<-select(tree,Country,SEVEREERO,avSlope,avTreeDen,Carbon,pH>=5,Clay) #err!!!!
tree_select<-select(tree,-c(Site,PosTopoSeq,VegStructure))
tree_select<-select(tree,-(Site:VegStructure))
#2.0.1 select and helper functions
# Keep variables or drop if negative sign (-)
select(tree, everything())
select(tree, starts_with("av",ignore.case=T),starts_with("C"))
select(tree, ends_with("e"))
select(tree, contains("p"))
select(tree, matches("av"))
#2.1 #### rename
tree_rename<-rename(tree,Slope=avSlope)
tree_rename<-rename(tree,Slope=avSlope,TreeDen=avTreeDen)
#3.0 ### distinct
tree_distinct<-distinct(tree)
tree_distinct<-distinct(select(tree,Country,Site,PosTopoSeq))
#4.0 ### Mutate
tree_mute<-mutate(tree,Acidbase = 7-pH,clay.cover = Clay / avTreeDen)
#4.0.1 ### transmute
tree_mute<-transmute(tree,Acidbase = 7-pH,clay.cover = Clay / avTreeDen)
#5.0 ### sample_n()
sample_n(tree, 10,replace=F)
#5.0.1 ### sample_frac()
sample_frac(tbl=tree, size=0.1)
##############################
# Statistics using dplyr functions #
##############################
summarise(tree,count = n(),MeanCarb = mean(Carbon, na.rm = TRUE),
MeanClay = mean(Clay, na.rm = TRUE),medPh=median(pH,na.rm=T))
#Grouped summary
tree.summary <- tree %>%
group_by(Country,Site,SEVEREERO) %>%
summarise(count = n(),
meanC = mean(Carbon,na.rm=T),
meanClay = mean(Clay,na.rm=T),
sdC=sd(Carbon,na.rm=T),
sdClay=sd(Clay,na.rm=T),
medPh=median(pH,na.rm=T))
##############################
# Graphics using ggplot2 #
##############################
ggplot(tree) + geom_point(aes(x=Clay, y=Carbon, size=avTreeDen, colour=Site))
# Graphics produced using ggplot2 can be stored as objects...
p1 <- ggplot(tree) + geom_point(aes(x=Clay, y=Carbon, size=avTreeDen)) #Note the size argument!
print(p1)
# And then used to overlay or combine with other graphs
# for example a smoother
p1 + stat_smooth(aes(x=avTreeDen, y=Carbon))
## Theme the plot/graphic and store as pbject p2
p2 <- p1 + stat_smooth(aes(x=Clay, y=Carbon)) + theme_minimal(base_size=16)
p2
# And then overlay the means calculated above on p2, coloured by site
p2 + geom_point(data = tree.summary, aes(x=meanClay, y=meanC, colour=Site), size=20, alpha=0.5)
####################################################
Comments
No comments yet.