DATA SETUP

Clear workspace (if desired/needed)

rm(list = ls())

Check working directory, set new working directory if needed.

#setwd("C:/Users/willi/OneDrive/Documents/Towson University/Data/Cover data/Team vole")
getwd()
## [1] "C:/Users/willi/OneDrive/Documents/Towson University/GitHub_repo_Towson_Williamson/Rmd"

Install/load the package libraries. Once you have installed a package, you only need to load the library for future use.

library("tidyverse")
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.0.6     v dplyr   1.0.3
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   2.1.2     v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.0.5
## Warning: package 'readr' was built under R version 4.0.5
## Warning: package 'purrr' was built under R version 4.0.5
## Warning: package 'stringr' was built under R version 4.0.5
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library("ggplot2")
library("agricolae")
## Warning: package 'agricolae' was built under R version 4.0.5
library("ggpubr")
## Warning: package 'ggpubr' was built under R version 4.0.5
library("dplyr")
library("nlme")
## Warning: package 'nlme' was built under R version 4.0.5
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
## 
##     collapse

Load in each years .csv files containing the data

TV_2018 <- read.csv("../Data/Toolik_TeamVole_2018_cover.csv", header = TRUE)

TV_2019 <- read.csv("../Data/Toolik_TeamVole_2019_cover.csv", header = TRUE)

TV_2021 <- read.csv("../Data/Toolik_TeamVole_2021_cover.csv", header = TRUE)

Look at the data to see what it contains do this for each year

head(TV_2018) #opens first 6 rows
##   year region site treatment quad    species       cover
## 1 2018     TL   ST        CT    1       moss 0.217948718
## 2 2018     TL   ST        CT    1     lichen 0.003205128
## 3 2018     TL   ST        CT    1     litter 0.224358974
## 4 2018     TL   ST        CT    1 frost boil 0.000000000
## 5 2018     TL   ST        CT    1       bare 0.000000000
## 6 2018     TL   ST        CT    1    And pol 0.000000000
tail(TV_2018) #opens last 6 rows
##      year region site treatment quad     species      cover
## 1547 2018     TL   PL        PR    8     Sal pul 0.16149068
## 1548 2018     TL   PL        PR    8     Vac uli 0.00000000
## 1549 2018     TL   PL        PR    8     Vac vit 0.00000000
## 1550 2018     TL   PL        PR    8 St. D. Bet. 0.00000000
## 1551 2018     TL   PL        PR    8     Sal ret 0.09937888
## 1552 2018     TL   PL        PR    8     Pyr gra 0.00000000
summary(TV_2018) # gives min/max/mean/mode/median... good for seeing if there are any glaringly wrong cover values
##       year         region              site            treatment        
##  Min.   :2018   Length:1552        Length:1552        Length:1552       
##  1st Qu.:2018   Class :character   Class :character   Class :character  
##  Median :2018   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :2018                                                           
##  3rd Qu.:2018                                                           
##  Max.   :2018                                                           
##       quad        species              cover        
##  Min.   :1.00   Length:1552        Min.   :0.00000  
##  1st Qu.:2.75   Class :character   1st Qu.:0.00000  
##  Median :4.50   Mode  :character   Median :0.01146  
##  Mean   :4.50                      Mean   :0.04639  
##  3rd Qu.:6.25                      3rd Qu.:0.07231  
##  Max.   :8.00                      Max.   :0.41455
str(TV_2018) #tells us what data types (numbers, factors, etc) are in the data frame
## 'data.frame':    1552 obs. of  7 variables:
##  $ year     : int  2018 2018 2018 2018 2018 2018 2018 2018 2018 2018 ...
##  $ region   : chr  "TL" "TL" "TL" "TL" ...
##  $ site     : chr  "ST" "ST" "ST" "ST" ...
##  $ treatment: chr  "CT" "CT" "CT" "CT" ...
##  $ quad     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ species  : chr  "moss" "lichen" "litter" "frost boil" ...
##  $ cover    : num  0.21795 0.00321 0.22436 0 0 ...

Then merge the years together into one dataset…

all the names of columns need to be exactly the same name (capitalization and spaces included)

MyMerge       <- function(x, y){
  df            <- merge(x, y, all = TRUE)
  rownames(df)  <- df$Row.names
  df$Row.names  <- NULL
  return(df)
}
TV_ALL           <- Reduce(MyMerge, list(TV_2018, TV_2019, TV_2021)) #list datasets here that need to be merged

make sure the # of Obs in the 1st data set + the 2nd match the # of obs in the merged data set

DATA CLEANING

Check unique vales in species column to look for naming convention errors

unique(TV_ALL$species)
##  [1] "And pol"             "bare"                "Bet nan"            
##  [4] "Car big"             "Cas tet"             "Emp nig"            
##  [7] "Eri vag"             "frost boil"          "Led pal"            
## [10] "lichen"              "litter"              "moss"               
## [13] "Ped lap"             "Pol bis"             "Pyr gra"            
## [16] "Rub cha"             "Sal pul"             "Sal ret"            
## [19] "Sax pun"             "St. D. Bet."         "Vac uli"            
## [22] "Vac vit"             "Pet fri"             "Ste edw"            
## [25] "Arc alp"             "Unk"                 "dead led pal"       
## [28] "sal ret"             "unk 2"               "unk 4"              
## [31] "flower"              "Ped sp"              "unk 1"              
## [34] "winter kill"         "winter kill vac vit" "dead cas tet"       
## [37] "ped sp"              "unk 3"               "led pal winter kill"
## [40] "vac vit winter kill" "sal sp"              "arc alp"            
## [43] "calcan"              "Dead"                "dead vac vit"       
## [46] "led pal dead"        "sal ret?"            "cas tet dead"       
## [49] "dead shrub(bet)?"    "vac vit dead"        "Ped lep"            
## [52] "Sal ret "            "st. D. Sal pul"      "Stel lon"           
## [55] "tussock litter"      "UNK 1"               "UNK 2"              
## [58] "UNK 3"               "UNK 4"               "UNK 5"              
## [61] "Pet Fri "            "st. D. Cas tet"      "Mushroom "          
## [64] "ST. D. Sal pul."     "Tussock litter"      "Unk 1"              
## [67] "Unk 2"               "Unk 3 "              "Unk 4 "             
## [70] "St. D. Cas tet"      "St. D. Sal Pul"      "tussock litter "    
## [73] "Bis viv"             "Vac Oxy"             "Unk1 "

Remove any data that you will not need for the analysis

#REMOVE MUSHROOMS
TV_ALL <- TV_ALL[!TV_ALL$species == "Mushroom ", ] 

#REMOVE UNKNOWN SPECIES 
TV_ALL <- TV_ALL[!TV_ALL$species == "UNK 3", ] 
TV_ALL <- TV_ALL[!TV_ALL$species == "unk 2", ]
TV_ALL <- TV_ALL[!TV_ALL$species == "unk", ]
TV_ALL <- TV_ALL[!TV_ALL$species == "Unk 2", ]
TV_ALL <- TV_ALL[!TV_ALL$species == "unk 3", ]
TV_ALL <- TV_ALL[!TV_ALL$species == "UNK 1", ]
TV_ALL <- TV_ALL[!TV_ALL$species == "UNK 4", ]
TV_ALL <- TV_ALL[!TV_ALL$species == "Unk 3", ]
TV_ALL <- TV_ALL[!TV_ALL$species == "unk 4", ]
TV_ALL <- TV_ALL[!TV_ALL$species == "unk 1", ]
TV_ALL <- TV_ALL[!TV_ALL$species == "UNK 2", ]
TV_ALL <- TV_ALL[!TV_ALL$species == "UNK 5", ]
TV_ALL <- TV_ALL[!TV_ALL$species == "Unk 1", ]
TV_ALL <- TV_ALL[!TV_ALL$species == "Unk 4 ", ]
TV_ALL <- TV_ALL[!TV_ALL$species == "Unk1 ", ]
TV_ALL <- TV_ALL[!TV_ALL$species == "Unk 3 ", ]
TV_ALL <- TV_ALL[!TV_ALL$species == "Unk", ]

If there are naming mistakes that create duplicates (EX: “bare” vs “Bare”) then rename them using the code below

#naming convention errors 
TV_ALL$species <- recode(TV_ALL$species, "Tussock litter" = "tussock litter")
TV_ALL$species <- recode(TV_ALL$species, "tussock litter " = "tussock litter")

TV_ALL$species <- recode(TV_ALL$species, "sal ret" = "Sal ret")
TV_ALL$species <- recode(TV_ALL$species, "sal ret?" = "Sal ret")
TV_ALL$species <- recode(TV_ALL$species, "Sal ret " = "Sal ret")

TV_ALL$species <- recode(TV_ALL$species, "arc alp" = "Arc alp")

TV_ALL$species <- recode(TV_ALL$species, "Ped lep" = "Ped lap")
TV_ALL$species <- recode(TV_ALL$species, "ped sp" = "Ped sp")

TV_ALL$species <- recode(TV_ALL$species, "Pet Fri " = "Pet fri")

Group things together that are currently more specific then they need to be/than is useful (EX: winter kill identified to species level can be grouped into the larger category “winterkill”)

#group like things together that should be group more broadly 

  #winter kill
TV_ALL$species <- recode(TV_ALL$species, "winter kill vac vit" = "winterkill")
TV_ALL$species <- recode(TV_ALL$species, "led pal winter kill" = "winterkill")
TV_ALL$species <- recode(TV_ALL$species, "vac vit winter kill" = "winterkill")
TV_ALL$species <- recode(TV_ALL$species, "vac vit winter kill" = "winterkill")
TV_ALL$species <- recode(TV_ALL$species, "winter kill" = "winterkill")

  #standing dead
TV_ALL$species <- recode(TV_ALL$species, "dead vac vit" = "std")
TV_ALL$species <- recode(TV_ALL$species, "cas tet dead" = "std")
TV_ALL$species <- recode(TV_ALL$species, "st. D. Cas tet" = "std")
TV_ALL$species <- recode(TV_ALL$species, "St. D. Bet." = "std")
TV_ALL$species <- recode(TV_ALL$species, "dead led pal" = "std")
TV_ALL$species <- recode(TV_ALL$species, "led pal dead" = "std")
TV_ALL$species <- recode(TV_ALL$species, "dead shrub(bet)?" = "std")
TV_ALL$species <- recode(TV_ALL$species, "St. D. Cas tet" = "std")
TV_ALL$species <- recode(TV_ALL$species, "Dead" = "std")
TV_ALL$species <- recode(TV_ALL$species, "vac vit dead" = "std")
TV_ALL$species <- recode(TV_ALL$species, "st. D. Sal pul" = "std")
TV_ALL$species <- recode(TV_ALL$species, "ST. D. Sal pul." = "std")
TV_ALL$species <- recode(TV_ALL$species, "St. D. Sal Pul" = "std")
TV_ALL$species <- recode(TV_ALL$species, "dead cas tet" = "std")

recheck species unique values to make sure you got everything

unique(TV_ALL$species)
##  [1] "And pol"        "bare"           "Bet nan"        "Car big"       
##  [5] "Cas tet"        "Emp nig"        "Eri vag"        "frost boil"    
##  [9] "Led pal"        "lichen"         "litter"         "moss"          
## [13] "Ped lap"        "Pol bis"        "Pyr gra"        "Rub cha"       
## [17] "Sal pul"        "Sal ret"        "Sax pun"        "std"           
## [21] "Vac uli"        "Vac vit"        "Pet fri"        "Ste edw"       
## [25] "Arc alp"        "flower"         "Ped sp"         "winterkill"    
## [29] "sal sp"         "calcan"         "Stel lon"       "tussock litter"
## [33] "Bis viv"        "Vac Oxy"
#you can also use this code to export the species list to a csv so you can sort it alphabetically. This is good to do when you think you have the dataset mostly cleaned, because its easier to catch slight errors in naming conventions when you have less unique values to sort through#

   #list.species <- unique(TV_ALL$species)
  #write.csv(list.species, file = "species list.csv")

Now check unique values for the other columns to make sure there are no naming conventions there/the #of quads looks right

unique(TV_ALL$year)
## [1] 2018 2019 2021
unique(TV_ALL$region)
## [1] "TL"
unique(TV_ALL$site)
## [1] "IM" "PL" "ST"
unique(TV_ALL$treatment)
## [1] "CT" "EX" "PR" "PU"
unique(TV_ALL$quad)
## [1] 1 2 3 4 5 6 7 8

ASSIGNING FUNCTIONAL GROUPS

Vectors for assigning functional groups

bare <- c("Bare ground")
dec_shrub <- c("Deciduous shrubs")
ev_shrub <- c("Evergreen shrubs")
gram <- c("Gramminoid")
Eri_Vag <- c("Eriophorum")
lichen <- c("Lichen")
litter <- c("Litter")
moss <- c("Moss")
Forb <- c("Forb")
St.D <- c("Standing dead")

~ subset the data by species that belong to a given functional group~

bg <- subset(TV_ALL, species== "bare" | species== "frost boil")  ## Bare ground

ds <- subset(TV_ALL, species== "Bet nan" | species== "Vac uli" | species== "Sal pul" | species== "Arc alp" | species== "Sal phl" | species== "sal sp" | species== "Sal ret") #Deciduous shrubs 

es <- subset(TV_ALL, species== "Emp nig" | species== "Led pal" | species== "Vac vit" | species== "Cas tet" | species== "And pol" | species== "Vac Oxy") #Evergreen shrubs

gr <- subset(TV_ALL, species== "calcan" | species== "Car big") #Grass

eri <- subset(TV_ALL, species== "Eri vag") #Sedge

fb <- subset(TV_ALL, species== "Bis viv" | species== "flower" | species== "Ped lap" | species== "Ped sp" | species== "Pet fri" | species== "Pol bis" | species== "Pyr gra" | species== "Rub cha" | species== "Sax pun" | species== "Ste edw" | species== "Stel lon") #forbs

lich <- subset(TV_ALL, species== "lichen") #lichen

lit <- subset(TV_ALL, species== "litter" | species== "tussock litter") #litter

ms <- subset(TV_ALL, species== "moss") #moss

std <- subset(TV_ALL, species== "std" | species== "winterkill") #standing dead + winterkill

Creates a new column named “func.group” and fills it with the the name of the functional group for each of the subset groups of data created above

bg["func.group"] <-bare
ds["func.group"] <-dec_shrub
es["func.group"] <-ev_shrub
gr["func.group"] <-gram
eri["func.group"] <-Eri_Vag
fb["func.group"] <-Forb
lich["func.group"] <-lichen
lit["func.group"] <-litter
ms["func.group"] <-moss
std["func.group"] <-St.D

Merges the subset data back together, now with a new column called “func.group”

IMPORTANT: MAKE SURE THE # OF OBSERVATIONS IN “TV_ALL_FUNCGRP” IS THE SAME AS “TV_ALL_FUNCGRP”

If # of obs do not match, that means you are missing data, likely you forgot to assign a species to one of the functional group subset code (look at the unique species names in each of the datasets and see whats missing in “TV_ALL_FUNCGRP”)

MyMerge       <- function(x, y){
  df            <- merge(x, y, all = TRUE)
  rownames(df)  <- df$Row.names
  df$Row.names  <- NULL
  return(df)
}
TV_ALL_funcgrp           <- Reduce(MyMerge, list(bg, ds, es, gr, eri, fb, lich, lit, ms, std))

#use this if you want to export these data to csv at this point
write.csv(TV_ALL_funcgrp, file = "../Derived CSVs/TeamVole_relcov_funcgrp_ALL.csv")

CALCULATING AVERAGES

!!!!!! VERY IMPORTANT STEP !!!!!!!

This step sums relative cover across species of a given functional group within each individual quadrat. If you do not do this then you will be averaging across all the species and quadrats within a functional group at the same time, instead of averaging the total relative cover of a functional group across the 8 quadrats. Basically you will deflating the average cover, so make sure you do this step!!! :)

sum.func <- (TV_ALL_funcgrp) %>% group_by(year, site, treatment, quad, func.group) %>% summarise_at(vars(cover), list(cover.func = sum ), na.rm = TRUE)

Calculate average across quads within treatments

NOTE: this assumes you are averaging across 8 quads

If there is a year/treatment where we did not sample 8 quads, the you will need to subset that year/treatment and repeat the steps below on that data set separately and adjust the vector formula to divide by the appropriate number of quads sampled then merge the dataset back together

#Sum across quads in a plot
avg.quad.func <- (sum.func) %>% group_by(year, site, treatment, func.group) %>% summarise_at(vars(cover.func), list(sum.quad = sum), na.rm = TRUE)

#Vector for calculating average
Q8.avg <- c(avg.quad.func$sum.quad/8)

#creates and fills a new column with the average (by diving that sum by the number of reps specified)
avg.quad.func["avg.quad"] <-Q8.avg

NOTE: these is another way to have R automatically calculate the averages using Vars, but this was is more accurate and gives you more control

Calculate average/SD across sites

avg.func.site <- (avg.quad.func) %>% group_by(year, treatment, func.group) %>% summarise_at(vars(avg.quad), list(avg.site = mean, site.SD = sd), na.rm = TRUE)

#export this file
#write.csv(avg.func.site, file = "avg_relcov_site.csv")

Bargraph for averaged across sites

#specify factor levels for herbivore treatment and functional groups. Helps to specify the order the species and herbivore treatment are stacked within the bar graph. If you want them to present in a different order, then change their order below and rerun the code

avg.func.site$treatment <- factor(avg.func.site$treatment, levels = c("CT", "EX", "PU", "PR"))
avg.func.site$func.group <- factor(avg.func.site$func.group, levels = c("Deciduous shrubs", 
                                                                          "Evergreen shrubs", 
                                                                          "Sedge",
                                                                          "Grass",
                                                                          "Forb",
                                                                          "Lichen",
                                                                          "Moss", 
                                                                          "Litter",
                                                                          "Standing dead",
                                                                          "Bare ground"))

#Creates stacked bar graph#
ggplot(data = avg.func.site, aes(fill=func.group, x = treatment, y = avg.site)) +
  geom_bar(position="fill", stat="identity")+ #Percent stacked
  scale_fill_manual(" ", values = c("Deciduous shrubs" = "#004c6d",
                                                           "Evergreen shrubs" = "#6996b3",
                                                           "Sedge" = "#036d3f",
                                                           "Grass"="#6fa17e",
                                                           "Forb"="#c4d6c8",
                                                           "Lichen"="#f6e8c3",
                                                           "Moss"="#dfc27d",
                                                           "Litter"="#bf812d",
                                                           "Standing dead"="#8c510a",
                                                           "Bare ground" = "#543005"))+
  theme_light()+
  #theme(aspect.ratio = 9/18.5)+ #use for vertical stack only
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())+
  theme(axis.text.x = element_text(color = "black", size = 8, angle = 45, hjust = 1))+
  theme(axis.text.y = element_text(color = "black", size = 6))+
  theme( axis.line = element_line(colour = "black", 
                      size = 1, linetype = "solid"))+
  scale_y_continuous(breaks= c(0, 0.50, 1))+
  theme(plot.title = element_text(hjust = .5))+
  labs(y = "Mean relative cover", x = " ")+ 
  #theme(axis.title.y = element_text(face = "bold"))+
    #theme(axis.title.x = element_text(face = "bold"))+
  #theme(legend.direction="horizontal")+
  #theme(legend.position="bottom")+
  facet_grid(. ~ year) #vertically stacked

#ggsave("toolik_cover_treatment_avrg.jpeg") #exports a jpeg image of the graph
avg.quad.func$treatment <- factor(avg.quad.func$treatment, levels = c("CT", "EX", "PU", "PR"))
avg.quad.func$func.group <- factor(avg.quad.func$func.group, levels = c("Deciduous shrubs", 
                                                                          "Evergreen shrubs", 
                                                                          "Sedge",
                                                                          "Grass",
                                                                          "Forb",
                                                                          "Lichen",
                                                                          "Moss", 
                                                                          "Litter",
                                                                          "Standing dead",
                                                                          "Bare ground"))

#Creates stacked bar graph#
ggplot(data = avg.quad.func, aes(fill=func.group, x = treatment, y = avg.quad)) +
  geom_bar(position="fill", stat="identity")+ #Percent stacked
  scale_fill_manual(" ", values = c("Deciduous shrubs" = "#004c6d",
                                                           "Evergreen shrubs" = "#6996b3",
                                                           "Sedge" = "#036d3f",
                                                           "Grass"="#6fa17e",
                                                           "Forb"="#c4d6c8",
                                                           "Lichen"="#f6e8c3",
                                                           "Moss"="#dfc27d",
                                                           "Litter"="#bf812d",
                                                           "Standing dead"="#8c510a",
                                                           "Bare ground" = "#543005"))+
  theme_light()+
  #theme(aspect.ratio = 9/18.5)+ #use for vertical stack only
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())+
  theme(axis.text.x = element_text(color = "black", size = 12, angle = 45, hjust = 1))+
  theme(axis.text.y = element_text(color = "black", size = 10))+
  theme( axis.line = element_line(colour = "black", 
                      size = 1, linetype = "solid"))+
  scale_y_continuous(breaks= c(0, 0.50, 1))+
  theme(plot.title = element_text(hjust = .5))+
  labs(y = "Mean relative cover", x = " ")+ 
  #theme(axis.title.y = element_text(face = "bold"))+
    #theme(axis.title.x = element_text(face = "bold"))+
  #theme(legend.direction="horizontal")+
  #theme(legend.position="bottom")+
  facet_grid(site ~ year) #vertically stacked

#ggsave("toolik_cover_treatment_site_avrg.jpeg") #exports a jpeg image of the graph