Use thisgetwd() to check working directory. Will most likely be whatever folder your Rmd file is in. If you need to change it for some reason use setwd()
Use install.packages to download and install a new R package. You only need to do this once, then you can just use library() to load it and be able to call functions within it. and load the package libraries.
TIP: It’s generally considered best practice to keep the commands to call all the packages you need to use at the top of the document
tidyr, dplyr, and readr are probably the most commonly used packages for data wrangling/cleaning.ggplot2 is very common for creating publication quality figures.
Note: The order which you install packages is important. Functions with the same names will be masked by packages loaded in after the previous.
library("tidyr")
library("dplyr")
library("readr")
library("ggplot2")
library("ggpubr")
Load in each years .csv files containing raw cover data
raw_2018 <- read.csv("../Data/LTER_89MAT_raw_cover_2018.csv", header = TRUE) %>%
filter(date != "drop" & date != "drop ")%>%
mutate(date = as.Date(date, "%m/%d/%Y"))
raw_2019 <- read.csv("../Data/LTER_89MAT_raw_cover_2019.csv", header = TRUE)%>%
filter(date != "drop" & date != "drop ")%>%
mutate(date = as.Date(date, "%m/%d/%Y"))
raw_2021 <- read.csv("../Data/LTER_89MAT_raw_cover_2021.csv", header = TRUE)%>%
filter(date != "drop" & date != "drop ")%>%
mutate(date = as.Date(date, "%m/%d/%Y"))
unique(raw_2019$treatment)
## [1] "EXNPNF" "CT2" "GHNP" "GHCT" "NP"
unique(raw_2021$treatment)
## [1] "CT2" "EXC-NP" "GHCT" "GHNP" "NP"
Combine all years together: MAKE SURE ALL HEADINGS MATCH
MyMerge <- function(x, y){
df <- merge(x, y, all = TRUE)
rownames(df) <- df$Row.names
df$Row.names <- NULL
return(df)
}
raw_orignial <- Reduce(MyMerge, list(raw_2018, raw_2019, raw_2021))
LOGIC CHECK - Do the number of observation in raw_original equal to the sum of the observations in the data sets you combined?
raw_long <- raw_orignial %>%
pivot_longer(cols = -c(date, year, region, site, treatment, block, species), names_to = "quad_num", values_to = "cover")
LOGIC CHECK - Do the number of observation in raw_long equal to the number of observations in theraw_originalX8 ?
In this case we can remove unknowns, vole activity, tussock number…ect.
#Returns a list of unique species names. Use this to see what you want to eliminate. Rerun after you remove data to make sure it worked.
unique(raw_long$species)
## [1] "And pol" "bare" "Bet nan"
## [4] "Car big" "Cas tet" "Emp nig"
## [7] "Eri vag" "frost boil" "Led pal"
## [10] "lichen" "litter" "moss"
## [13] "Ped lap" "Pol bis" "Rub cha"
## [16] "Sal pul" "St. D. Bet." "Vac uli"
## [19] "Vac vit" "Mushrooms" "grass"
## [22] "chopped vole litter " "latrienes (%)" "Sampling"
## [25] "Severed vole litter" "Trampling" "tussock #"
## [28] "vole hole (#)" "vole trail (%)" "Winter kill"
## [31] "Other dead" "Structure" "Trample"
## [34] "other S.D" "St.D. other" "Unk.N2"
## [37] "Calcan" "Car big/other" "Unk.1"
## [40] "# of shrooms" "Grass" "Other S.D."
## [43] "Green House Structure" "sampling" "tarp"
## [46] "trampling" "GH structure" "sampled "
## [49] "trampled" "trampled " "Distructive"
## [52] "Smapled" "Sampled" "St. D. other"
## [55] "Trampled" "# TUSS." "AND POL"
## [58] "ARC ALP" "BARE GR." "BET NAN"
## [61] "CAR BIG" "CAS TET" "DEAD BET"
## [64] "DEAD EV." "EMP NIG" "ERI VAG"
## [67] "EV. LITTER" "FR BOIL" "GRASS ex."
## [70] "LED PAL" "LICHEN" "LITTER"
## [73] "MOSS" "PED LAP" "PET FRI"
## [76] "POL BIS" "RUB CHA" "SAL PUL"
## [79] "SAL RET" "VAC ULI" "VAC VIT"
#Remove data you don't need/want
raw_long <- raw_long %>%
filter(species != "Unk.N2" & species != "vole hole (#)" & species != "chopped vole litter " & species != "tussock #" & species != "sampled " & species != "latrienes (%)" & species != "vole trail (%)" & species != "Severed vole litter" & species != "trampled " & species != "tarp" & species != "trampled" & species != "sampled" & species != "trampling" & species != "Unk. 1" & species != "Green House Structure" & species != "GH structure" & species != "Distructive" & species != "Trampled" & species != "Mushrooms" & species != "Sampling" & species != "# of shrooms" & species != "sampling" & species != "Smapled" & species != "# TUSS." & species != "Structure" & species != "Trample" & species != "Sampled" & species != "Trampling" & species != "Unk.1")
Check unique vales in each column to make sure that there are not naming errors
unique(raw_long$species)
If there are mistakes then rename them using the code below and recheck unique values again to make sure the recode worked. make sure you didn’t loose any observations
Examples of naming convention corrections:
#fix naming convention errors in treatment names
raw_long$treatment <- raw_long$treatment %>%
recode("EXC-NP" = "EXNPNF")
#fix naming convention errors in species names
raw_long$species <- raw_long$species %>%
recode("Other dead" = "St D", "Other S.D." = "St D", "GRASS ex." = "grass", "POL BIS" = "Pol bis", "other S.D" = "St D", "St. D. other" = "St D", "DEAD BET" = "St D Bet", "LED PAL" = "Led pal", "RUB CHA" = "Rub cha", "St. D. Bet." = "St D Bet", "St.D. other" = "St D", "AND POL" = "And pol", "DEAD EV." = "litter", "LICHEN" = "lichen", "SAL PUL" = "Sal pul", "Calcan" = "Cal can", "ARC ALP" = "Arc alp", "EMP NIG" = "Emp nig", "LITTER" = "litter", "SAL RET"="Sal ret", "Car big/other" = "grass", "BARE GR." = "bare", "ERI VAG" = "Eri vag", "MOSS" = "moss", "VAC ULI" = "Vac uli", "BET NAN" = "Bet nan", "EV. LITTER" = "litter", "PED LAP" = "Ped lap", "VAC VIT" = "Vac vit", "Grass" = "grass", "CAR BIG" = "Car big", "FR BOIL" = "Fr boil", "PET FRI"= "Pet fri", "CAS TET" = "Cas tet", "Winter kill" = "Win kill", "frost boil" = "Fr boil")
Sum functional cover across species within quadrats. This likely wont change the relative cover for most species unless they were listed more than once in a quadrat but it’s good to do to insure accuracy after fixing naming conventions.
raw_clean<- (raw_long) %>%
group_by(date, year, region, site, treatment, block, species, quad_num) %>%
summarise(cover = sum(cover, na.rm = TRUE), n = n())
## `summarise()` has grouped output by 'date', 'year', 'region', 'site', 'treatment', 'block', 'species'. You can override using the `.groups` argument.
quad_sum <- (raw_clean) %>%
group_by(date, year, region, site, treatment, block, quad_num) %>%
summarise(sum_quad = sum(cover)) %>%
ungroup()
## `summarise()` has grouped output by 'date', 'year', 'region', 'site', 'treatment', 'block'. You can override using the `.groups` argument.
cover_join <- left_join(raw_clean, quad_sum, by= c("date", "year", "region", "site", "treatment", "block", "quad_num"))
rel_cov_clean <- cover_join %>%
mutate(relcov = cover/sum_quad) %>%
select(-sum_quad, -cover)
LOGIC CHECK: # of observations should be the same as you had in raw_clean
unique(rel_cov_clean$treatment)
## [1] "CT2" "NP" "GHCT" "GHNP" "EXNPNF"