Download the datavis csv here
# Faith Musili
# R brownbag seminar November 10th, 2016
#Set your working directory
###Install package dplyr
install.packages("dplyr")
###Load dplyr package into R
library(dplyr)
###Read your csv data into R
data<-read.csv("datavis.csv")
class(data)
head(data)
tail(data)
dim(data)
colnames(data)
###Select function
#Select a set of columns: the Country, Site and VegStructure columns.
Select_data1 <- select(data, Country, Site,VegStructure)
Select_data1
#Select all the columns except a specific column i.e
Select_data2<-select(data,-SEVEREERO)
head(Select_data2)
# select a range of columns by name
Select_data3<-select(data,Country:avSlope)
tail(Select_data3)
#Select all columns that start with the character string ???C???
Select_data4<-select(data, starts_with("C"))
head(Select_data4)
#Select all columns that end with the character string ???n???
Select_data5<-select(data, ends_with("n"))
tail(Select_data5)
#Select all columns that contain the character string ???H???
Select_data6<-select(data,contains("H"))
tail(Select_data6)
#####filter function
#Filter the rows for Sites whose vegetation structure is Wooded grassland
filter1<-filter(data, VegStructure == "Wooded grassland")
filter1
#Filter the rows for Sites whose vegetation structure is Wooded grassland and have a Clay percentage of greater than 70.
filter2<-filter(data, VegStructure == "Wooded grassland",Clay >= 72)
filter2
###Pipe operator: %>%-----joins together function into one
Select_data1 <- data %>% select(Country, Site,VegStructure)%>%filter(VegStructure=="Forest")
Select_data1
#To arrange (or re-order) rows by a particular column
d<-arrange(data,VegStructure)
#combining arrange() and select() functions using pipe operator
data %>%
select(Country, Site,VegStructure) %>%
arrange(Site,VegStructure)
#combining arrange() ,select() and filter() functions using pipe operator
data %>%
select(Country, Site,VegStructure,Clay) %>%
arrange(Site,VegStructure) %>%
filter(Clay >= 75)
#combining arrange()i.e in descending order ,select() and filter() functions using pipe operator
data %>%
select(Country, Site,VegStructure,Clay) %>%
arrange(Site,desc(VegStructure)) %>%
filter(Clay >= 75)
###mutate function
#Create a new column called Carbon_PH which is the ratio of Carbon to PH.
data %>%
mutate(Carbon_PH = Carbon / pH) %>%
head
#adding more than one column at once
data %>%
mutate(Carbon_PH = Carbon / pH,
avSlope_avTreeDen = avSlope/avTreeDen ) %>%
head
###Summarise function
#Compute the average Carbon by applying the mean() function to the column Carbon and call the summary value avg_Carbon.
data %>%
summarise(avg_Carbon = mean(Carbon))
#use mean,min, and max to summarise Carbon
data %>%
summarise(avg_Carbon = mean(Carbon),
min_Carbon = min(Carbon),
max_Carbon = max(Carbon),
total = n())
###group_by function
#Split the data frame by some variable (e.g.VegStructure ), apply a function to the individual data frames and then combine the output.
data %>%
group_by(Site) %>%
summarise(avg_Carbon = mean(Carbon),
min_Carbon = min(Carbon),
max_Carbon = max(Carbon),
total = n())
Comments
No comments yet.