1  Data management: metadata correction and cleansing

1.1 Reproducibility

I list below the R packages required to reproduce the analyses.

Code
## data wrangling
library(dplyr)
library(tidyr)
library(stringr)
library(readr)
library(readxl)
library(purrr)

## reporting
library(flextable)

## plotting
library(ggplot2)
library(ComplexHeatmap)
library(cowplot)
library(grid)
library(RColorBrewer)
library(igraph)

## auxiliary functions
source("R/utils.R")
today_date <- "2025-04-28"
## today_date <- format(Sys.Date(), "%Y-%m-%d")

## set the seed, for fixing generation of ComplexHeatmaps (dendogram clustering)
set.seed(20)

1.2 Barcode counts cleaning

We early discarded exp130921_in vivo.csv from the experiences

Listing 1.1: Update labelling of latest experience.
Code
exp271221 <- readr::read_csv("data/barcode-counts/exp271221.csv") 
exp271221_whole <- readr::read_csv2("old-data-backup/data-raw/barcode-counts/exp211221cor.csv") |> 
  rename(barcode_id = 1)
selected_dataset <- compare_dataframes(exp271221, exp271221_whole)

file.rename("data/barcode-counts/exp211221.csv", "data/barcode-counts/exp271221.csv") 

In Listing 1.1, we rename expression file exp211221.csv into exp271221.csv, relying on date annotations.

While the dose-response experience exp200921_dose response osim exhibits distinct biological objective, it turned out that the 4 control biological replicates are shared with classical compound response comparison exp200921.csv. However, the number of sequenced barcode IDs differ in both experiences!! In addition, note that exp200921.csv has been tagged as discarded, except for the Controls!!

Code snippets in Listing 1.2 and Listing 1.3 aim at replacing unprecise labelling of barcode IDs (no concentration, no time duration, …) in the 2022 batch experiences.

Listing 1.2: Update labelling of latest experience.
Code
## extract old colnames
exp281022_expression <- readr::read_csv("data/barcode-counts/exp281022.csv") 
## cat(colnames(exp281022_expression), file = "281022_old_labels.txt", sep = "\n")

## extract mapping old_names with new_names
mapping_old_new_281022 <- readxl::read_excel("data/Table of compounds_whole_2025-04-28.xlsx", 
                                             sheet = "Sample Mapping 281022")

labels_old_new_281022 <- setNames(object = mapping_old_new_281022$OLD_replicate_label, 
                                  nm = mapping_old_new_281022$NEW_replicate_label)

## apply change labelling
exp281022_expression <- exp281022_expression |> 
  dplyr::rename(dplyr::all_of(labels_old_new_281022))

readr::write_csv(exp281022_expression, "data/barcode-counts/exp281022.csv")
Listing 1.3: Update time course experiences.
Code
exp281022_expression_timecourse <- readr::read_csv("data/barcode-counts/exp281022_time course.csv") 
## cat(colnames(exp281022_expression_timecourse), file = "281022_timecourse_old_labels.txt", sep = "\n")

## update colnames ----

mapping_old_new_time_course <- read_excel("data/Table of compounds_whole_2025-04-28.xlsx", 
                                     sheet = "Sample Mapping Time Course")

labels_old_new_time_course <- setNames(object = mapping_old_new_time_course$OLD_replicate_label, 
                                       nm = mapping_old_new_time_course$NEW_replicate_label)

## apply change labelling
exp281022_expression_timecourse <- exp281022_expression_timecourse |> 
  dplyr::rename(dplyr::all_of(labels_old_new_time_course))

readr::write_csv(exp281022_expression_timecourse,
                 "data/barcode-counts/exp281022_time course.csv")

## To evaluate whether controls are different between experiment 281022 and 281022 Time Course
controls_exp281022 <- exp281022_expression |> 
  select(key_barcode, dplyr::matches("^ctl")) |> 
  dplyr::inner_join(exp281022_expression_timecourse |> 
                      select(key_barcode, dplyr::matches("^Ctrl")),
                    by="key_barcode")
cor(controls_exp281022$ctl2, controls_exp281022$Ctrl2) ## they strongly correlate with each other, but not a perfect match -> controls are indeed distinct

1.2.1 TODO: barcode counts inconsistencies

Things to consider:

  • Format used is French csv, instead of international CSV. CSV means for ‘comma-separated value’, maybe consider switching Excel settings.
  • I rename the first colname of each expression file as barcode_id, storing unequivocal tag.
  • Some barcode counts exhibit final line index, for which reason?

1.2.2 Overview of barcode counts

We report in Table 1.1 a summary of the barcode counts (File location, batch_id, and dimensions of the counts matrix) considered for the generation of the Heatmaps, and the construction of a drug-drug network.

Code
## 1) summarise kept batches ----
barcode_files <- list.files("./data/barcode-counts/",
                            pattern = "exp.*\\.csv",
                            full.names = TRUE)

kept_barcode_summaries <- purrr::map(barcode_files, function(filename) {
  barcode_counts <- readr::read_csv(filename, show_col_types = FALSE)
  experience_name <- filename |> basename() |> tools::file_path_sans_ext()
  experience_summary <- tibble::tibble(`Experience Name`= experience_name,
                                       Features= paste0("Barcode matrix contains: ",
                                                        nrow(barcode_counts),
                                                        " unique barcode IDs, and ",
                                                        ncol(barcode_counts)-1, " replicates."))
  return(experience_summary)
}) |>
  purrr::list_rbind()
flextable::flextable(kept_barcode_summaries) |> 
  bold(part="header")

## 2) summarise discarded batches ----
discarded_files <- list.files("./data/barcode-counts/temp discarded/",
                            pattern = "exp.*\\.csv",
                            full.names = TRUE)

## change format + barcode name
barcode_discarded_summaries <- purrr::map(discarded_files, function(filename) {
  barcode_counts <- readr::read_csv(filename, show_col_types = FALSE)
  experience_name <- filename |> basename() |> tools::file_path_sans_ext()
  experience_summary <- tibble::tibble(`Experience Name`= experience_name,
                                       Features= paste0("Barcode matrix contains: ",
                                                        nrow(barcode_counts),
                                                        " unique barcode IDs, and ",
                                                        ncol(barcode_counts)-1, " replicates."))
  return(experience_summary)
}) |>
  purrr::list_rbind()

flextable::flextable(barcode_discarded_summaries) |> 
  bold(part="header")

Experience Name

Features

exp010821

Barcode matrix contains: 869714 unique barcode IDs, and 16 replicates.

exp040821

Barcode matrix contains: 869714 unique barcode IDs, and 16 replicates.

exp070222

Barcode matrix contains: 93132 unique barcode IDs, and 68 replicates.

exp130921

Barcode matrix contains: 627539 unique barcode IDs, and 24 replicates.

exp151121

Barcode matrix contains: 844434 unique barcode IDs, and 60 replicates.

exp181021

Barcode matrix contains: 813579 unique barcode IDs, and 59 replicates.

exp200921

Barcode matrix contains: 627539 unique barcode IDs, and 40 replicates.

exp200921_dose response osim

Barcode matrix contains: 1067031 unique barcode IDs, and 20 replicates.

exp220322

Barcode matrix contains: 93132 unique barcode IDs, and 66 replicates.

exp271221

Barcode matrix contains: 844434 unique barcode IDs, and 80 replicates.

exp281022

Barcode matrix contains: 315642 unique barcode IDs, and 22 replicates.

exp281022_time course

Barcode matrix contains: 398004 unique barcode IDs, and 51 replicates.

exp300821

Barcode matrix contains: 413335 unique barcode IDs, and 28 replicates.

(a) Kept Batches

Experience Name

Features

exp130921_in vivo

Barcode matrix contains: 813579 unique barcode IDs, and 16 replicates.

exp200921_dose response osim

Barcode matrix contains: 813579 unique barcode IDs, and 20 replicates.

(b) Removed batches
Table 1.1: Overview of the Barcoding Batches.

From Table 1.1, 13 barcode experiences are included in the analysis, whereas 2 batches/experiences are filtered out.

1.3 Phenotype metadata cleaning

  • General question: what’s the difference between Run Date and Experiment Date?

Most of the R instructions reported in this section aim at rendering the Table of compounds file compliant with the tidy format, see Tip 1.1. These data-wrangling operations are split in 4 steps:

  1. In Listing 1.4, we homogenise Concentrations, Date and Duration to ISO standards, while dealing with erroneous missing values generated by fused Excel cells. Besides, we removed special, non-ascii characters that could not be processed by visualisation plots, such as \(NF-\kappa B\).

  2. In Listing 1.5, we retrieve from each individual barcode counts profile, the individual replicates ID identifying unequivocally each replicate. We set apart replicates that could be mapped back to original Table of compounds file.

  3. In Listing 1.6, we merge together in a comprehensive phenotype table the short Samples_ID (short term referring to drugs listed in Compound column), the full Compound (complete name of drugs) and map each of them to its complete list of replicates (from 2 to 8). We save the output in data-derived.

  4. Finally, in Listing 1.7 and Listing 1.8, we perform several posterior data integrity checks to verify if i) we were able to map each individual replicate ID to its original ID in Table of compounds, and ii) if the number of replicates reported in Table of compounds was consistent with the number of barcode profiles.

Listing 1.4: Coerce original Excel file reporting experimental design to tidy format.
Code
pheno_colnames <- c(Pathway = "Pathways", MoA = "...2", 
                    Compound = "Samples", Replicates = "Replicates",
                    Concentrations = "Concentrations", Date = "Experiment date",
                    `Run date` = "Run date", Duration="Duration", 
                    Kept="Kept", Comments = "Comments")

## prune irrelevant colnames ----
## data/Table of compounds_stringent_2025-04-18.xlsx
pheno_data <- readxl::read_xlsx("data/Table of compounds_whole_2025-04-28.xlsx",
                                sheet = "Experimental Design",
                                col_types = c(rep("text", 3), "numeric", 
                                              rep("text", 4), "logical", "text")) |>
  dplyr::rename(dplyr::all_of(pheno_colnames)) |>
  tidyr::fill(Pathway, MoA, .direction = "down")

pheno_data_discarded <- pheno_data |> 
  dplyr::filter(!Kept)

## Add batches and correct for fused Excel cells ----

pheno_data <- pheno_data |> 
  ## extract last 6 numbers
  dplyr::mutate(Compound = if_else(grepl("Control", MoA, ignore.case = TRUE),
                                  MoA, Compound),
                `Run date` = stringr::str_sub(`Run date`, -6, -1),
                Batch_ID= paste0("exp", Date)) |> 
  tidyr::fill(Compound, .direction = "down") |>
  ## remove unwanted replicates
  dplyr::filter(Kept) |> 
  dplyr::mutate(Batch_ID = dplyr::if_else(Comments %in% c("Dose Response"), 
                                          "exp200921_dose response osim", Batch_ID)) |> 
  dplyr::mutate(Batch_ID = dplyr::if_else(Comments %in% c("Time Course"), 
                                          "exp281022_time course", Batch_ID), 
                Pathway = if_else(is.na(Pathway), MoA, enc_to_ascii(Pathway))) |> 
  dplyr::select(- Comments, -Kept) |> 
  dplyr::distinct(.keep_all = TRUE) ## remove duplicates

## format concentrations, creating both 'Concentrations_ID' and 'Concentrations' in Moles ----
pheno_data <- pheno_data |> 
  dplyr::mutate(Concentrations=if_else(Compound=="Control", "000 uM", Concentrations)) |> 
  dplyr::mutate(Concentrations=if_else(Compound=="Osimertinb+sorafenib", "015 uM", Concentrations)) |>
  tidyr::separate_wider_delim(Concentrations, delim = " ", names = c("Concentrations Value", "Unit"), cols_remove = FALSE) |> 
  dplyr::rename(OLD_Concentrations=Concentrations) |> 
  dplyr::mutate(Unit = case_when(
    Unit %in% c("uM", "µM", "µg/ml") ~ "u",
    Unit == "nM" ~ "n", 
    Unit %in% c("mg/kg", "mM") ~ "m", 
  )) |> 
  dplyr::mutate(Concentrations=parse_number(`Concentrations Value`, trim_ws = TRUE, 
                                            locale = locale(decimal_mark = ",", grouping_mark = "."))) |> 
   dplyr::mutate(Concentrations = case_when(
    Unit == "u" ~ Concentrations *10^-6,
    Unit == "n" ~ Concentrations *10^-9, 
    Unit == "m" ~ Concentrations *10^-3)) |> 
  dplyr::mutate(`Concentrations Value`=parse_number(`Concentrations Value`, trim_ws = TRUE, 
                                            locale = locale(decimal_mark = ",", grouping_mark = ".")) |> 
                  format_concentrations()) |> 
  tidyr::unite(col ="Concentrations_ID", `Concentrations Value`, Unit, sep = "")

## format Durations, creating both 'Duration_ID' and 'Duration' as an integer value in Days ----
pheno_data <- pheno_data |> 
  ## extract last 6 numbers, as the only ones used for identification of samples downstream
  dplyr::mutate(Duration_ID = Duration,
                Duration = parse_number(Duration, trim_ws = TRUE, 
                                        locale = locale(decimal_mark = ".")), 
                Duration = dplyr::if_else(grepl("m$",  Duration_ID),
                                          Duration*30, Duration)) 
  
  • In Run date, when there was a range instead of a fixed Date, I kept only the end of the interval, as used for labelling colnames in barcode counts.

  • Homogenise units for the Concentrations colname: convert everything to moles.

  • Homogenise units for Date and Run date, using international ISO 8601 format: YYYY-MM-DD

In Listing 1.5 and Listing 1.6, we identify which raw barcode counts matrices avalaible in folder barcode-counts are not reported in the global experimental dataset, and reciprocally. Finally, we only keep experiences that are both reported in Table of compounds and present in barcode-counts folder..

Listing 1.5: Identify shared experience IDs between experimental design table (Table of compounds), and barcode-count counts profiles.
Code
## keep only relevant files 
barcodes_filenames <- list.files("./data/barcode-counts/",
                                pattern = "^exp.*\\.csv$")

barcodes_IDs <- readxl::read_xlsx("data/Table of compounds_whole_2025-04-28.xlsx",
                                       sheet="Batch Mapping") |> 
  select(Filename, Batch_ID) |> 
  filter(!Batch_ID %in% c("exp130921_in vivo")) |> 
  mutate(Filename =paste0(Filename, ".csv"), 
         Date = stringr::str_extract(Batch_ID, "(?<=^exp)[[:digit:]]{6}"))

To generate the final phenotype dataset, we only keep experiences that were reported both in Table of Compound and folder barcode-counts in Listing 1.6. Please ensure that Drugs Mapping sheet in Table of compounds is correct.

Listing 1.6: Map each compound to its full list of replicates. Generate a Batch_ID to unequivocally ientify each run of experiences, and convert all run Dates to their international format.
Code
## step 1) keep only shared Batch_ID experiences ----
barcodes_IDs <- barcodes_IDs |>
  dplyr::semi_join(pheno_data, by="Batch_ID")
## exp201021 is discarded, that's normal, we just want to keep the controls!!
pheno_data <- pheno_data |>
  dplyr::semi_join(barcodes_IDs, by="Batch_ID")

## step 2) extract individual replicate IDs directly from sample experiences ----
## Filename <- "exp281022.csv"; Batch_ID <- "exp281022"; Date <- "281022"
ID_mapping <- purrr::pmap(barcodes_IDs, function(Filename, Batch_ID, Date) {
  ## build path
  counts_path <- file.path("data","barcode-counts", 
                               paste0(Filename))
  
  ## extract replicate names (reading the header)
  replicates_ID <- strsplit(readLines(counts_path, n = 1), split = ",")[[1]] |>
    str_subset("\\S") |> ## remove empty strings and 'key'
    str_subset("barcode_id", negate = TRUE)

  ## ID: combination of letters, numbers and -, followed by '[1-4]_', starting the sample's name
  dataset_ID <- tibble::tibble(Batch_ID = Batch_ID, 
                               Date = stringr::str_extract(replicates_ID,
                                                           "(?<=_exp)[[:alnum:]]{6}"),
                               Replicates_ID = replicates_ID,
                               `Run date` = stringr::str_extract(replicates_ID,
                                                                 "(?<=_run)[[:alnum:]]{6}(?=_)"),
                               Concentrations_ID = stringr::str_extract(replicates_ID, 
                                                                        "(?<=[1-8]_)[[:digit:]]{3}[gmnux]{1}(?=_exp)")) |> 
    ## account for second notation syntax for concentrations
    dplyr::mutate(Concentrations_ID = if_else(is.na(Concentrations_ID), 
                                              stringr::str_extract(replicates_ID,
                                                                   "(?<=[1-8]_)[0-9]{1}\\.[0-9]{2}[gmnux]{1}(?=_exp)"),
                                              Concentrations_ID))
  
  ## Deal with two time notations
  if (grepl("Time",Batch_ID, ignore.case = TRUE)) {
    dataset_ID <- dataset_ID |> 
      dplyr::mutate (Samples_ID = stringr::str_extract(replicates_ID,
                                                       "(?<=_)[[[:alnum:]]\\-]+(?=[1-8]_)"), 
                     Duration_ID=stringr::str_extract(replicates_ID,
                                                       "^[[[:alnum:]]\\.]{1,3}[dm](?=_)"))
  }
  else {
      dataset_ID <- dataset_ID |> 
      dplyr::mutate (Samples_ID = stringr::str_extract(replicates_ID,
                                                       "^[[[:alnum:]]\\-]+(?=[1-8]{1}_)"), 
                     Duration_ID="9d")
  }
                                 
  return(dataset_ID)
}) |>
  purrr::list_rbind()

## deal with specific p42 and p43
ID_mapping <- ID_mapping |> 
  dplyr::mutate(Samples_ID = if_else(grepl("^p42", Replicates_ID), "p42", Samples_ID), 
                Samples_ID = if_else(grepl("^p43", Replicates_ID), "p43", Samples_ID))

## remove irrelavant samples
ID_mapping <- ID_mapping |> 
  dplyr::filter(!grepl("^p42|^p43|^CTRL", Replicates_ID))


## detect miscatech samples
## ID_mapping_missing <- ID_mapping |>
##   filter(if_any(everything(), is.na))

## step 3) map short compound IDs with full compound names ----
mapping_compounds <- readxl::read_xlsx("data/Table of compounds_whole_2025-04-28.xlsx",
                                sheet = "Drugs Mapping")
ID_mapping <- ID_mapping |> 
  inner_join(mapping_compounds, by = "Samples_ID")
## test <- ID_mapping |> anti_join(mapping_compounds, by = "Samples_ID")
## test2 <- mapping_compounds |> anti_join(ID_mapping, by = "Samples_ID")

Finally, in Listing 1.7, we map all replicates (one column in a given barcode count matrix) to its comprehensive metadata descripion (stored in Tables of compounds), using as primary and foreign keys the following 6 variables: Batch_ID, Compound, Duration_ID, Run date, Date, and Concentrations_ID.

Listing 1.7: Extract discarded replicates
Code
replicates_discarded <- dplyr::anti_join(ID_mapping, pheno_data,
                                     by = c("Batch_ID",  "Compound", "Duration_ID",
                                            "Run date", "Date", "Concentrations_ID"))

pheno_data_unmapped <- dplyr::anti_join(pheno_data,
                                     ID_mapping,
                                     by = c("Batch_ID",  "Compound", "Duration_ID",
                                            "Run date", "Date", "Concentrations_ID"))

pheno_data_unmapped |> 
  arrange(Compound, Date, Concentrations_ID) |> 
  flextable() |> 
  bold(part = "header")

Pathway

MoA

Compound

Replicates

Concentrations_ID

OLD_Concentrations

Date

Run date

Duration

Batch_ID

Concentrations

Duration_ID

Table 1.2: Identify set of experiences reported in Table of compounds that could not be mapped against their corresponding Barcode counts replicates. We check this information with an anti_join between original phenotype and counts, returning samples from Table of Compound that could not have been mapped back.

In Table 1.3, we check discrepancies between the number of barcode replicates reported in Table of compounds with the number of replicates avalaible in barcode counts matrices. And it turned out that compound: XAV-939, Date: 220322, is reported with 4 replicates, while only 2 could be found in exp220322 barcode count matrix

Listing 1.8: Secound round of data wrangling quality controls on counts versus phenotype data, focusing on divergent number of replicates.
Code
replicate_inconsistencies <- pheno_data |> 
  dplyr::inner_join(ID_mapping,
                    by = c("Batch_ID", "Date", 
                           "Compound", "Run date",
                           "Concentrations_ID", "Duration_ID")) |>
  group_by(Batch_ID, Compound, Date, Replicates, Concentrations_ID,Duration_ID) |>
  summarise(n=n()) |>
  filter(n!=Replicates)

flextable(replicate_inconsistencies) |>
  autofit() |>
  bold(part = "header")

Batch_ID

Compound

Date

Replicates

Concentrations_ID

Duration_ID

n

exp220322

XAV-939

220322

4

080u

9d

2

Table 1.3: Secound round of data wrangling quality controls on counts versus phenotype data, focusing on divergent number of replicates.

1.3.1 Save phenotype Metadata

By applying all the formatting analyses reported in Section 1.3, we generate in Table 1.4 a global metadata phenotypic table, adhering to tidy principles Tip 1.1.

  1. Each variable has its own column.
  2. Each observation has its own row – Every row corresponds to one observation.
  3. Each value has its own cell – Each cell contains a single, unique value.

This format streamlines data wrangling, and generally speaking, data analysis and visualisation. In other words, prefer simpler CSV formats for the experimental design, and avoid cell fusion in Excel documents. Finally, for the formatted tabular representation in documentations, I use flextable.

Listing 1.9: Save the final metadata annotations for barcodes, using Today’s date for historical versioning.
Code
## step 4) Use international date formats ----
pheno_data_formatted <- pheno_data |> 
  dplyr::inner_join(ID_mapping,
                    by = c("Batch_ID", "Date", 
                           "Compound", "Run date",
                           "Concentrations_ID", "Duration_ID")) |> 
  dplyr::select(Batch_ID, Pathway, MoA,
                Compound, Samples_ID, 
                Replicates, Replicates_ID,
                Concentrations, Concentrations_ID, 
                Date, `Run date`, Duration, Duration_ID) |> 
  ## convert to ISO 1860 Date format
  mutate(Date = lubridate::dmy(Date) |> format(),
         `Run date`= lubridate::dmy(`Run date`), 
  ## artifically de-duplicate controls for exp, simplify the analytical workflow 
  Replicates_ID = dplyr::if_else(Batch_ID %in% c("exp200921_dose response osim") &
                                   Compound=="Control",
                                 gsub("exp200921_", "exp200921_dose_response_", Replicates_ID),
                                 Replicates_ID))

## 2) QC: guarantee uniqueness of the replicates
pheno_data_deduplicated <- pheno_data_formatted |> 
  dplyr::distinct(Replicates_ID, .keep_all = TRUE)
all.equal(pheno_data_deduplicated, pheno_data_formatted)
##  [1] TRUE

## 3) save as .xlsx file discarded and preserved replicates

barcode_counts_summaries <- list("kept counts" = kept_barcode_summaries,
                                 "kept compounds" = pheno_data,
                                 "kept replicates" = pheno_data_formatted |> 
                                   select(Compound, Replicates_ID),
                                 "discarded counts" = barcode_discarded_summaries, 
                                 "discarded compounds"=pheno_data_discarded, 
                                 "discarded replicates" = replicates_discarded)

openxlsx::write.xlsx(x = barcode_counts_summaries,
                     file = paste0("./data/logs/barcode_metadata_overview_",
                                   today_date,".xlsx"),
                     overwrite = TRUE, asTable = TRUE)


## 4) save the tidy phenotype metatada table ----
readr::write_csv(pheno_data_formatted,
                 file = paste0("data/pheno_data_metadata_", today_date,".csv"))

flextable::flextable(head(pheno_data_formatted)) |> 
  autofit() |> 
  bold(part="header")

Batch_ID

Pathway

MoA

Compound

Samples_ID

Replicates

Replicates_ID

Concentrations

Concentrations_ID

Date

Run date

Duration

Duration_ID

exp010821

Control

Control

Control

Contro

4

Contro1_000u_exp010821_run180821_03

0

000u

2021-08-01

2021-08-18

9

9d

exp010821

Control

Control

Control

Contro

4

Contro2_000u_exp010821_run180821_04

0

000u

2021-08-01

2021-08-18

9

9d

exp010821

Control

Control

Control

Contro

4

Contro3_000u_exp010821_run180821_05

0

000u

2021-08-01

2021-08-18

9

9d

exp010821

Control

Control

Control

Contro

4

Contro4_000u_exp010821_run180821_06

0

000u

2021-08-01

2021-08-18

9

9d

exp040821

Control

Control

Control

Contro

4

Contro1_000u_exp040821_run180821_25

0

000u

2021-08-04

2021-08-18

9

9d

exp040821

Control

Control

Control

Contro

4

Contro2_000u_exp040821_run180821_26

0

000u

2021-08-04

2021-08-18

9

9d

Table 1.4

Conclusion: on a total of 544, 532 replicates are kept, and 12 replicates were removed.