PR2 version 4.13.0
Chrysophyceae

1 Init

Load the variables common to the different scripts and the necessary libraries

  knitr::opts_chunk$set(eval=FALSE, 
                        cache = TRUE, 
                        # cache.extra = file.info("../updates/2019 Arsenieff diatoms/pr2_diatoms_2019_11_21_LA.xlsx"),
                        tidy = FALSE)
  source('PR2_init.R', echo=FALSE)

2 Reference

  • Andersen RA., Graf L., Malakhov Y., Yoon HS. 2017. Rediscovery of the Ochromonas type species Ochromonas triangulata (Chrysophyceae) from its type locality (Lake Veysove, Donetsk region, Ukraine). Phycologia 56:591–604. DOI: 10.2216/17-15.1.

3 Set up the files

  target_group =  c("Chrysophyceae")
  target_level = "class"


  dir_pr2_update <- "../updates/2020 4.13.0 Chrysophyceae Andersen"
  
  pr2.env$editor <- "D. Vaulot"

  full_path <- function(file_name){str_c(dir_pr2_update,"/", file_name)}

  file_pr2_update_excel <- full_path("pr2_Chrysophyceae.xlsx")  

# create the directory for taxonomy output
  dir.create(full_path("taxo"), showWarnings = FALSE)

4 Read the original data and reformat

4.1 Read the data

  • Number of sequences = 168
pr2_update <- read_excel(file_pr2_update_excel, sheet = "update", guess_max=200000, na=c("", "-"))
  
  str_c("Number of sequences : ", nrow(pr2_update))

4.2 Compare with sequences in PR2

  pr2_main_updated <- pr2_update  %>% 
   left_join(pr2) %>% 
   select(matches("removed|accession|updated"), domain:species, sequence_length, ambiguities)

  pr2_taxo_updated <- pr2_update  %>% 
    select(species_updated) %>% 
    distinct() %>% 
    rename(species_9 = species_updated) %>% 
    left_join(pr2_taxo) 
  
  pr2_metadata_updated <- pr2_update%>% 
   left_join(pr2) %>%  
   select(genbank_accession, strain_number, contains("gb"), pr2_sample_type)  
  
  onglets <- list("pr2_taxo_updated" = pr2_taxo_updated,
                  "pr2_main_updated" = pr2_main_updated,
                  "pr2_metadata_updated" = pr2_metadata_updated)

  write.xlsx(onglets, full_path("pr2_chrysophyceae_merged.xlsx"))

Daniel Vaulot

11 06 2020