2  Methods

2.1 Reproducibility

I list below the R packages required to reproduce the analyses.

Code
## data wrangling
library(dplyr)
library(tidyr)
library(stringr)
library(readr)
library(readxl)
library(purrr)

## plotting
library(ggplot2)

## reporting
library(flextable)

# automated package linting
library(xml2)
library(downlit)

## auxiliary functions
source("R/utils.R")
source("R/preprocessing.R")
source("R/differential_barcoding.R")
source("R/convert_bioc_objects.R")
today_date <- "2025-07-01"
# today_date <- format(Sys.Date(), "%Y-%m-%d")

2.2 Export to curated formats

  • Read all barcode profiles and merge them on column barcode_id.
Listing 2.1: Import all raw barcode .tsv files, and add artificial controls for experiment exp200921_dose_response.
Code
pheno_data <- readr::read_csv(paste0("./data/replicates_barcode_metadata_",
                                     today_date, ".csv"), 
                              show_col_types = FALSE) 
barcode_files <- unique(pheno_data$Filename)

## 1) artificially add duplicated metadata ----
artificial_controls_metadata <- pheno_data |>
  dplyr::filter(Compound == "Control" & Batch_ID == "exp200921") |>
  dplyr::mutate(Batch_ID = "exp200921_dose_response", 
                Replicates_ID = gsub("run111021", "run101121", Replicates_ID))
pheno_data <- pheno_data |>
  dplyr::bind_rows(artificial_controls_metadata) |> 
  # add a unique column per compound and biological condition
  dplyr::mutate(Unique_compound = paste0(Compound, "_", Batch_ID, "_",
                                        "Conc: ", Concentrations_Formatted, "_",
                                        "Days: ", Duration, sep = "_"))

barcode_counts_aggregated <- purrr::map(barcode_files, function(filename) {
  barcode_filepath <- file.path("./data/barcode-counts", filename)
  barcode_counts <- readr::read_tsv(barcode_filepath, 
                                    show_col_types = FALSE)
  
  ## 2) artificially add duplicated control samples for dose-response ----
  if (filename == "exp200921.tsv") {
    barcode_duplicated_controls <- barcode_counts |> 
      dplyr::select("barcode_id", dplyr::matches("^Contro(.*)_000u_exp200921_run111021_")) |> 
      dplyr::rename_with(
        .fn = ~ gsub("run111021", "run101121", .x),
        .cols = starts_with("Contro"))
    
    barcode_counts <- barcode_counts |> 
      dplyr::inner_join(barcode_duplicated_controls, by = "barcode_id")
  }
  
  return(barcode_counts)
}) |> 
  purrr::reduce(~ inner_join(.x, .y, by = "barcode_id"))

## 3) check that we can map each barcode replicate to its corresponding metadata information
stopifnot(identical(setdiff(pheno_data$Replicates_ID, colnames(barcode_counts_aggregated)), character(0)))
gc()
##             used  (Mb) gc trigger   (Mb)  max used   (Mb)
##  Ncells  1304414  69.7    6400028  341.8   5155219  275.4
##  Vcells 27213528 207.7  505679897 3858.1 632092217 4822.5

The resulting barcode matrix stores 37689 unique barcode IDs, and 598 unique replicates (with 4 artificially duplicated controls for dose response, Date: 20-09-2021).

2.2.1 DGEList object

  • Export to EdgeR::DGEList format.
Listing 2.2: Export to DGEList format.
Code
rm(artificial_controls_metadata)
# prepare barcode counts ----
counts <- barcode_counts_aggregated |> 
  tibble::column_to_rownames("barcode_id") |> 
  as.matrix()
## ensure consistent order
counts <- counts[, pheno_data$Replicates_ID]

rownames(pheno_data) <- pheno_data$Replicates_ID
DRB_count <- edgeR::DGEList(counts = counts, 
                            samples = pheno_data, 
                            group = pheno_data$Compound,
                            remove.zeros = TRUE)
saveRDS(DRB_count, 
        paste0("./data/processed/DGEList_before_filtering_", today_date, ".rds"))

2.2.2 SummarizedExperiment object

Listing 2.3: Export to SummarizedExperiment format.
Code
SE_object <- DGEList2SE(DRB_count)
# check that both expression matrices store the same content
stopifnot(identical(DRB_count$counts, SummarizedExperiment::assay(SE_object)))
saveRDS(SE_object, 
        paste0("./data/processed/SummarizedExperiment_before_filtering_", today_date, ".rds"))

rm(barcode_counts_aggregated, counts, pheno_data, barcode_files)

2.3 Barcode filtering

  1. Load DGEList object, removing irrelevant controls:
Code
old_DRB_count <- readRDS(paste0("./data/processed/DGEList_before_filtering_", 
                            today_date, ".rds"))
# remove Zero-controls, P42 and P43 experiences
DRB_count <- old_DRB_count[, !old_DRB_count$samples$Compound %in% c("Control - time zero", "P42", "P43", "Control - T75")]

By removing Time-Zero-controls, P42 and P43, we switch from 598 to 574 unique biological replicates.

  1. Eliminate barcodes for which the combined counts of the 4 controls per barcode are below a given threshold, here 4, in Listing 2.4:
Listing 2.4: Remove background noise.
Code
rm(old_DRB_count)
DRB_filtered_replicates <- background_noise_filtering(dgeObject = DRB_count, 
                                                      group = "Batch_ID",
                                                      compound = "Control",
                                                      thresh_background = 4)
saveRDS(DRB_filtered_replicates, 
        file = paste0("./data/processed/DRB_after_filtering_",
                          today_date, ".rds"))

2.4 Normalisation of barcoding depths

Preprocessing and normalisation functions for transcriptomics: a general overview.

  • CPM normalisation (using a different scaling factor):
Listing 2.5: CPM normalisation (with differing scaling factor, here \(100,000\))
Code
DRB_CPM_normalised_replicates <- cpm_normalisation(dgeObject = DRB_filtered_replicates,
                                                   scaling_factor = 1e5)
saveRDS(DRB_CPM_normalised_replicates, 
        file = paste0("./data/processed/DRB_CPM_normalised_replicate_",
                          today_date, ".rds"))

After filtering for background noise, and removing samples with a total of 0 barcodes,4643 unique barcode IDs are kept for 574 unique biological replicates.

  • Using edgeR::cpm normalisation, with \(\log\) factor transformation:

    • Library sizes are normalized using effective library sizes, which are the raw library totals multiplied by normalization factors usually estimated by TMM. More information available on edgeR::calcNormFactors(). The scaling normally ensures that fair comparison of expression values across samples with different sequencing depths and composition. Seems requiring first call to edgeR::calcNormFactors() to make it work!!!
    • prior.count adds a small offset before \(log\)-transformation to avoid -Inf values.
    • Besides, we \(log_2\) normalise the values such that they better fit Gaussian distributions.
Code
DRB_logCPM_normalised_replicates <- DRB_filtered_replicates
DRB_logCPM_normalised_replicates$counts <- edgeR::cpm(y = DRB_filtered_replicates, 
                                                      log = TRUE, 
                                                      prior.count = 2, 
                                                      normalized.lib.sizes = TRUE)

saveRDS(DRB_logCPM_normalised_replicates, 
        file = paste0("./data/processed/DRB_logCPM_normalised_replicate_",
                          today_date, ".rds"))

2.5 Differential barcoding

2.5.1 CPM differential analysis + binarisation

  1. Binarise Fold-changes (only those above a pre-determined threshold) per replicate:
Listing 2.6: Compute the averaged value for control replicates, then binarise for each compound. A 1 denoting a significant positive enrichment for a given barcode_id.
Code
DRB_binarised_replicates <- differential_binarisation(DRB_CPM_normalised_replicates,
                                                      group = "Batch_ID",
                                                      ref_level = "Control",
                                                      threshold_FC = 3)
saveRDS(DRB_binarised_replicates,
        file = paste0("./results/differential_analyses/DRB_binarised_replicate_",
                      today_date, ".rds"))

We removed 60 control samples (all remaining samples being associated with an effective drug).

  1. Aggregate replicates with AND operator1:
Code
# Use an all function as aggregator, but could have been a mean or sum as well.
DRB_binarised_samples <- collapseSamples(DRB_binarised_replicates,
                                         group = "Unique_compound",
                                         showReps = FALSE, 
                                         sample_colname = "Replicates_ID", 
                                         method = all)

null_barcodes <- which(rowSums(DRB_binarised_samples$counts) == 0)
if (length(null_barcodes) > 0L) {
  message(paste("We removed a total of", length(null_barcodes),
                "over a total of", nrow(DRB_binarised_samples), "barcode IDs.
                  None of them being fully differential in at least one compound."))
  DRB_binarised_samples <- DRB_binarised_samples[-null_barcodes,]
}

saveRDS(DRB_binarised_samples,
        file = paste0("./results/differential_analyses/DRB_binarised_samples_",
                      today_date, ".rds"))

2.5.2 logCPM differential analysis

logCPM DRB approach considers the following steps:

  1. Compute averaged expression per compound and batch.
  2. Compute the log fold-change (ratio on the original space, difference after \(\log_2\) transformation) fo each compound with respect to Controls.
  3. Remove barcodes not expressing a significant barcode change (\(|\log_2 (FC)| < 1\)) in at least one compoundNone of the barcodes out not removing any.
Code
DRB_logCPM_normalised_samples <- differential_barcoded_logCPM(DRB_CPM_normalised_replicates,
                                                              group = "Batch_ID",
                                                              ref_level = "Control",
                                                              threshold_LFC = log(3))
saveRDS(DRB_logCPM_normalised_samples,
        file = paste0("./results/differential_analyses/DRB_logCPM_samples_",
                      today_date, ".rds"))

2.5.3 DeSeq2 differential analysis + shrinkage

  1. We removed batches “exp281022_time_course”, “exp200921_dose_response”, “exp130921_in_vivo”, as we will use different experimental designs for these batches:
Code
selected_replicates_deseq2 <- DRB_filtered_replicates$samples |> 
  # Remove unwanted batches, and compounds
  dplyr::filter(!(Batch_ID %in% c("exp281022_time_course", "exp200921_dose_response", "exp130921_in_vivo") | 
                    Compound %in% c("Control - time zero", "P42", "P43"))) |> 
  row.names()
DRB_filtered_replicates_Deseq2 <- DRB_filtered_replicates[, selected_replicates_deseq2]
  1. In Table 2.1, we set apart three categories of compounds:
    1. For each compound available at different concentrations, we select the experiment exhibiting highest concentrations, and discard others, see Table 2.1 (b).
    2. We identify compounds differing only by the batch effect (same concentration, same treatment duration, but different batch), reported in Table 2.1 (c) -> two-fold objective: evaluate the magnitude of the batch effect with respect to the treatment effect + merge treatment effects across several batches, borrowing ideas from meta-analysis.
Code
rm(DRB_count, DRB_filtered_replicates, selected_replicates_deseq2)

# compounds in several batches, possibly with #conditions ----
compounds_with_distinct_concentrations <- DRB_filtered_replicates_Deseq2$samples |> 
  dplyr::select(-Replicates_ID) |> 
  dplyr::select(Batch_ID, Pathway, MoA, Compound, Unique_compound, 
                Concentrations_Formatted, Concentrations) |> 
  dplyr::distinct() |> 
  dplyr::group_by(Compound) |> 
  dplyr::mutate(n_batches = dplyr::n()) |> 
  dplyr::filter(n_batches > 1) |> 
  tidyr::chop(cols = c(Batch_ID, Unique_compound)) |> 
  dplyr::arrange(Compound)

# first selection, for each compound with # concentrations, select one with highest concentration
compounds_selected <- compounds_with_distinct_concentrations |> 
  dplyr::slice_max(order_by = Concentrations, n = 1, na_rm = TRUE)
compounds_with_distinct_concentrations_removed <- compounds_with_distinct_concentrations |> 
  dplyr::anti_join(y = compounds_selected, 
                   by = join_by(Compound, Concentrations_Formatted, Concentrations, n_batches, Batch_ID, Unique_compound))
samples_with_distinct_concentrations_removed <- compounds_with_distinct_concentrations_removed |>  tidyr::unchop(cols = c(Unique_compound)) |> 
  dplyr::pull(Unique_compound) |> 
  unique()
replicates_with_distinct_concentrations_removed <- DRB_filtered_replicates_Deseq2$samples |> 
    dplyr::filter(Unique_compound %in% c(samples_with_distinct_concentrations_removed)) |> 
    dplyr::pull(Replicates_ID)
DRB_filtered_replicates_Deseq2 <- DRB_filtered_replicates_Deseq2[,                                                                 setdiff(colnames(DRB_filtered_replicates_Deseq2), replicates_with_distinct_concentrations_removed)]

# set apart replicates present in # batches from those available in only one batch ----
pheno_data_DESeq2_several_batches <- compounds_selected |> 
  dplyr::filter(lengths(Batch_ID) > 1L) |> 
  dplyr::filter(Compound != "Control") |> 
  tidyr::unchop(cols = c(Batch_ID, Unique_compound))
row.names(pheno_data_DESeq2_several_batches) <- paste(pheno_data_DESeq2_several_batches$Batch_ID,
                                                      pheno_data_DESeq2_several_batches$Compound,
                                                      sep = "_")
# Table visualisations ----
flextable::flextable(compounds_with_distinct_concentrations) |> 
  flextable::bold(part = "header")

flextable::flextable(compounds_with_distinct_concentrations_removed) |> 
  flextable::bold(part = "header")

flextable::flextable(pheno_data_DESeq2_several_batches) |> 
  flextable::bold(part = "header")

Pathway

MoA

Compound

Concentrations_Formatted

Concentrations

n_batches

Batch_ID

Unique_compound

Proteasome

Proteasome inhibitor

Bortezomib

20 nM

0.00000002

2

[[character]]

[[character]]

RAS-MAPK

RAF inhibitor

CCT196969

1 uM

0.00000100

2

[[character]]

[[character]]

Chemotherapy

Alkylating agent

Cisplatin

500 nM

0.00000050

2

[[character]]

[[character]]

Chemotherapy

Alkylating agent

Cisplatin

250 nM

0.00000025

2

[[character]]

[[character]]

Control

Control

Control

000 uM

0.00000000

11

[[character]]

[[character]]

RTK

EGFR inhibitor (1st gen.)

Gefitinib

2 uM

0.00000200

3

[[character]]

[[character]]

RTK

PROTAC conjugated EGFR inhibitor

Gefitinib-PROTAC-1

10 uM

0.00001000

2

[[character]]

[[character]]

RAS-MAPK

RAF inhibitor

LY3009120

3 uM

0.00000300

2

[[character]]

[[character]]

RTK

EGFR inhibitor (3rd gen.)

Osimertinb

100 nM

0.00000010

3

[[character]]

[[character]]

Chemotherapy

Antifolate

Pemetrexed

100 nM

0.00000010

3

[[character]]

[[character]]

Chemotherapy

Antifolate

Pemetrexed

80 nM

0.00000008

3

[[character]]

[[character]]

RTK

Inhibitor of RAF, VEGFR2, VEGFR3, PDGFRβ, FLT3 and c-Kit

Sorafenib

5 uM

0.00000500

2

[[character]]

[[character]]

RAS-MAPK

MEK inhibitor

Trametinib

30 nM

0.00000003

3

[[character]]

[[character]]

RAS-MAPK

MEK inhibitor

Trametinib

40 nM

0.00000004

3

[[character]]

[[character]]

Novel compound

unknown

X13271

5 uM

0.00000500

5

[[character]]

[[character]]

Novel compound

unknown

X13271

8 uM

0.00000800

5

[[character]]

[[character]]

Novel compound

unknown

X13271

9 uM

0.00000900

5

[[character]]

[[character]]

Novel compound

unknown

X17129

8 uM

0.00000800

2

[[character]]

[[character]]

(a) Compounds present in different batches, and or different concentrations

Pathway

MoA

Compound

Concentrations_Formatted

Concentrations

n_batches

Batch_ID

Unique_compound

Chemotherapy

Alkylating agent

Cisplatin

250 nM

0.00000025

2

[[character]]

[[character]]

Chemotherapy

Antifolate

Pemetrexed

80 nM

0.00000008

3

[[character]]

[[character]]

RAS-MAPK

MEK inhibitor

Trametinib

30 nM

0.00000003

3

[[character]]

[[character]]

Novel compound

unknown

X13271

5 uM

0.00000500

5

[[character]]

[[character]]

Novel compound

unknown

X13271

8 uM

0.00000800

5

[[character]]

[[character]]

(b) Compounds discarded: when distinct concentrations were avalaible, take one with highest concentration.

Pathway

MoA

Compound

Concentrations_Formatted

Concentrations

n_batches

Batch_ID

Unique_compound

Proteasome

Proteasome inhibitor

Bortezomib

20 nM

0.00000002

2

exp200921

Bortezomib_exp200921_Conc: 20 nM_Days: 9_

Proteasome

Proteasome inhibitor

Bortezomib

20 nM

0.00000002

2

exp271221

Bortezomib_exp271221_Conc: 20 nM_Days: 9_

RAS-MAPK

RAF inhibitor

CCT196969

1 uM

0.00000100

2

exp151121

CCT196969_exp151121_Conc: 1 uM_Days: 9_

RAS-MAPK

RAF inhibitor

CCT196969

1 uM

0.00000100

2

exp300821

CCT196969_exp300821_Conc: 1 uM_Days: 9_

RTK

EGFR inhibitor (1st gen.)

Gefitinib

2 uM

0.00000200

3

exp151121

Gefitinib_exp151121_Conc: 2 uM_Days: 9_

RTK

EGFR inhibitor (1st gen.)

Gefitinib

2 uM

0.00000200

3

exp281022_t25

Gefitinib_exp281022_t25_Conc: 2 uM_Days: 9_

RTK

EGFR inhibitor (1st gen.)

Gefitinib

2 uM

0.00000200

3

exp300821

Gefitinib_exp300821_Conc: 2 uM_Days: 9_

RTK

PROTAC conjugated EGFR inhibitor

Gefitinib-PROTAC-1

10 uM

0.00001000

2

exp151121

Gefitinib-PROTAC-1_exp151121_Conc: 10 uM_Days: 9_

RTK

PROTAC conjugated EGFR inhibitor

Gefitinib-PROTAC-1

10 uM

0.00001000

2

exp281022_t25

Gefitinib-PROTAC-1_exp281022_t25_Conc: 10 uM_Days: 9_

RAS-MAPK

RAF inhibitor

LY3009120

3 uM

0.00000300

2

exp130921

LY3009120_exp130921_Conc: 3 uM_Days: 9_

RAS-MAPK

RAF inhibitor

LY3009120

3 uM

0.00000300

2

exp151121

LY3009120_exp151121_Conc: 3 uM_Days: 9_

RTK

EGFR inhibitor (3rd gen.)

Osimertinb

100 nM

0.00000010

3

exp010821

Osimertinb_exp010821_Conc: 100 nM_Days: 9_

RTK

EGFR inhibitor (3rd gen.)

Osimertinb

100 nM

0.00000010

3

exp040821

Osimertinb_exp040821_Conc: 100 nM_Days: 9_

RTK

EGFR inhibitor (3rd gen.)

Osimertinb

100 nM

0.00000010

3

exp300821

Osimertinb_exp300821_Conc: 100 nM_Days: 9_

Chemotherapy

Antifolate

Pemetrexed

100 nM

0.00000010

3

exp010821

Pemetrexed_exp010821_Conc: 100 nM_Days: 9_

Chemotherapy

Antifolate

Pemetrexed

100 nM

0.00000010

3

exp040821

Pemetrexed_exp040821_Conc: 100 nM_Days: 9_

RTK

Inhibitor of RAF, VEGFR2, VEGFR3, PDGFRβ, FLT3 and c-Kit

Sorafenib

5 uM

0.00000500

2

exp151121

Sorafenib_exp151121_Conc: 5 uM_Days: 9_

RTK

Inhibitor of RAF, VEGFR2, VEGFR3, PDGFRβ, FLT3 and c-Kit

Sorafenib

5 uM

0.00000500

2

exp300821

Sorafenib_exp300821_Conc: 5 uM_Days: 9_

Novel compound

unknown

X17129

8 uM

0.00000800

2

exp181021

X17129_exp181021_Conc: 8 uM_Days: 9_

Novel compound

unknown

X17129

8 uM

0.00000800

2

exp271221

X17129_exp271221_Conc: 8 uM_Days: 9_

(c) Final selection of compounds for batch:compound interaction evaluation
Table 2.1: Identify compounds shared across several batches
  1. We run DESeq2 differential analyses per batch, adopting the following changes compared with standard approach:
    1. Standard estimation of size factors for normalisation of distinct barcoding libary depths using the “median ratio method”
    2. Estimation of individual gene dispersions using “local” regression2
    3. Wald-test to identify DRBs for each contrast of interest, namely Compound versus Control.
Code
rm(compounds_selected, compounds_with_distinct_concentrations,
   compounds_with_distinct_concentrations_removed, replicates_with_distinct_concentrations_removed)

# Retrieve detailed differential analyses reporting by batch and compound
DRB_Reporting_Deseq2_batch_by_compound <- quantify_drug_effect_by_batch(DRB_filtered_replicates_Deseq2,
                                                                        reference_drug = "Control",
                                                                        batch_variable = "Batch_ID",
                                                                        drug_variable = "Compound",
                                                                        lfcThreshold = log(3), 
                                                                        shrinkage = TRUE)
saveRDS(DRB_Reporting_Deseq2_batch_by_compound, 
        file = paste0("./results/differential_analyses/DESeq2_detailled_differential_reporting_", today_date, ".rds"))


# Export as a SummarizedExperiment object
DRB_Deseq2_batch_by_compound_pheno_data <- DRB_filtered_replicates_Deseq2$samples |> 
  tibble::as_tibble() |> 
  dplyr::select(Batch_ID, Pathway, MoA, Compound, Unique_compound, 
                Concentrations_Formatted, Concentrations) |> 
  dplyr::filter(Compound != "Control") |> 
  dplyr::distinct()
row.names(DRB_Deseq2_batch_by_compound_pheno_data) <- paste(DRB_Deseq2_batch_by_compound_pheno_data$Batch_ID,
                                                      DRB_Deseq2_batch_by_compound_pheno_data$Compound,
                                                      sep = "_")
  
DRB_Deseq2_batch_by_compound_counts <- DRB_Reporting_Deseq2_batch_by_compound |> 
  tidyr::pivot_wider(
    id_cols = barcode_ID,
    names_from = c(Batch_ID, Compound),
    names_glue = "{Batch_ID}_{Compound}",
    values_from = log2FoldChange) |> 
  tibble::column_to_rownames("barcode_ID") |> 
  as.matrix()
DRB_Deseq2_batch_by_compound_SE <- SummarizedExperiment::SummarizedExperiment(assays = DRB_Deseq2_batch_by_compound_counts[, row.names(DRB_Deseq2_batch_by_compound_pheno_data)],
                                                                              colData = DRB_Deseq2_batch_by_compound_pheno_data)
saveRDS(DRB_Deseq2_batch_by_compound_SE, 
        file = paste0("./results/differential_analyses/DRB_DESeq2_compound_by_batch_", today_date, ".rds"))
  1. We aggregate values for samples present in different batches, using the metafor::rma function, and assuming a fixed-effects meta-model (rather than a random-effects model, congruent to the small number of batches to evaluate the heterogeneity):
Code
DRB_Reporting_Deseq2_batch_by_compound_across_batches <- DRB_Reporting_Deseq2_batch_by_compound |> 
  dplyr::semi_join(pheno_data_DESeq2_several_batches, 
                    by = join_by(Batch_ID, Compound))

DRB_aggregated_DESeq2_several_batches <- compute_meta_differential_analysis(DRB_Reporting_Deseq2_batch_by_compound_across_batches,
                                                                            batch_variable = "Batch_ID",
                                                                            drug_variable = "Compound",
                                                                            pval = 0.05)
saveRDS(DRB_aggregated_DESeq2_several_batches, 
        file = paste0("./results/differential_analyses/DESeq2_detailled_metanalysis_reporting__", today_date, ".rds"))
  1. Build a SummarizedExperiment object, merging log-fold changes resulting from meta-analysis with log fold-changes of compounds available in only one batch:
Code
# Prepare the phenotype dataset with one value per sample
DRB_Deseq2_compound_pheno_data <- SummarizedExperiment::colData(DRB_Deseq2_batch_by_compound_SE) |> 
  as.data.frame() |> 
  dplyr::select(Pathway, MoA, Compound) |> 
  dplyr::distinct()
unique_compounds_IDs <- DRB_Deseq2_compound_pheno_data |> 
  dplyr::anti_join(DRB_aggregated_DESeq2_several_batches, by = "Compound") |> 
  row.names()
row.names(DRB_Deseq2_compound_pheno_data) <- DRB_Deseq2_compound_pheno_data$Compound

# Merge barcode fold changes (meta+sample-unique)
DRB_Deseq2_compound_counts_meta <- DRB_aggregated_DESeq2_several_batches |> 
  tidyr::pivot_wider(
    id_cols = barcode_ID,
    names_from = c(Compound),
    values_from = ES_combined) |> 
  tibble::column_to_rownames("barcode_ID") |> 
  as.matrix()
DRB_Deseq2_batch_by_compound_SE_unique <- DRB_Deseq2_batch_by_compound_SE[, unique_compounds_IDs]
colnames(DRB_Deseq2_batch_by_compound_SE_unique) <- DRB_Deseq2_batch_by_compound_SE_unique$Compound
DRB_Deseq2_compound_counts <- cbind(DRB_Deseq2_compound_counts_meta,
                                    DRB_Deseq2_batch_by_compound_SE_unique |> SummarizedExperiment::assay())

# Generate the SummarizedExperiment object
DRB_Deseq2_compound_SE <- SummarizedExperiment::SummarizedExperiment(assays = DRB_Deseq2_compound_counts[, row.names(DRB_Deseq2_compound_pheno_data)],
                                                                     colData = DRB_Deseq2_compound_pheno_data)
saveRDS(DRB_Deseq2_compound_SE, 
        file = paste0("./results/differential_analyses/DRB_DESeq2_samples_", today_date, ".rds"))

  1. in other words, a barcode must be differential in all replicates of a given compound, to be consider DRB↩︎

  2. In other words, we assume the mean-variance relationship is specific to each barcode, applying “local” regression↩︎