#####Setting working directory
setwd("/Users/FMusili/Documents/2.2")
#########Creating objects ##########################
n <- 150
n
58 -> n
n
x <- 1.4
x
X <- 19
X
################Importing Data of different formats############
######### read.csv---for csv
species_csv<-read.csv(file="/Users/FMusili/Documents/2.2/Data/species.csv")
species_csv<-read.csv(file="Data/species.csv")
class(species_csv)
dim(species_csv)
summary(species_csv)
#########Text files (.txt)
species_txt<-read.table(file="Data/species.txt",sep="\t",header=T)
class(species_txt)
dim(species_txt)
summary(species_txt)
########Read excel data files (.xlsx, xls)
install.packages("xlsx")
library(xlsx)
species_xlsx<-read.xlsx (file="Data/species.xlsx",sheetName="Sheet1")
class(species_xlsx)
dim(species_xlsx)
summary(species_xlsx)
########## Read stata file
library(foreign)
animals_stata<-read.dta (file="Data/Animals.dta")
class(animals_stata)
dim(animals_stata)
summary(animals_stata)
######### Read SPSS file
animals_spss<-read.spss(file="",to.data.frame=T)
##### Read fixed-format data files using Fortran-style format specifications.
animal.fwf<-read.fwf(file="Data/Animals_fwf.txt",width=c(16,5,6),header=T)
class(animal.fwf)
dim(animal.fwf)
summary(animal.fwf)
##################Read about the other data types not demonstrated here#########
#####DATA HANDLING#####
###### see part of the data
head(species_csv)
tail(species_csv)
colnames(species_csv)###which are the column names
colnames(species_csv)<-c('Tree_species','Height(m)','Nitrogen_fixing','seed_mass')####renaming my column names
names(species_csv)[1]<-"premium"##
colnames(species_csv)
sapply(species_csv,class) #### What is the data type of each Column
sapply(species_txt,class)#### What is the data type of each Column
sapply(animal.fwf,class)#### What is the data type of each Column
animal.fwf$animal<-as.character(animal.fwf$animal)####change the animal column from factor to a character
sapply(animal.fwf,class)
species_csv$Tree_species<-as.character(species_csv$Tree_species)####change the tree species column from factor to a character
sapply(species_csv,class)
a<-c(0,9,3,3,3,3,33,3,2,2,2)#####create new column
d<-cbind(species_xlsx,a)###add the new column to existing dataframe4
my_species<-species_csv#Duplicate data
# subset data
species<-subset(species_csv,select=c(Tree_species,seed_mass))## show Tree species and seedmass columns only
species
species_num<-species_csv[,c(2,3)] #show column 2 and 3 only
species_num
species_<-species_txt[,-1] #Show all columns except column 1
species_
#Filtering data by conditioning
Filtered_species<-subset(species_csv,(Tree_species=="Acacia abyssinica"|Tree_species=="Ficus ovata"|Tree_species=="Sesbania spinosa")) #variables
Filtered_species
#Checking for duplicates
species_csv[duplicated(species_csv), ] ####show the duplicated rows
species_csv<-species_csv[!duplicated(species_csv), ] #### remove all duplicated rows
species_csv
#Replace a data point given a condition
species_csv$`Height(m)`[species_csv$`Height(m)`=="10"]<-10.1
#####NA's
is.na(species_csv) ;summary(species_csv) #Checking any missing values
species_csv<-data.frame(na.omit(species_csv))
species_csv
#----------------------------------------------------------------
# Explore different data handling methods eg:
#merging
#appending/binding
#Reshaping (wide and long)
Comments
No comments yet.