Benchmarking CMEnt on Simulated Microarray Data

0.1 Introduction

This vignette provides an extended benchmark comparing CMEnt against multiple popular DMR detection methods on a simulated microarray dataset with known ground truth. The methods compared include:

CMEnt: Correlation-based seed-guided assembly approach
comb-p: Combines adjacent p-values using Stouffer-Liptak-Kechris correction
probeLasso: Lasso-based feature selection for DMR detection
DMRcate: Gaussian kernel smoothing approach
Bumphunter: Permutation-based regional testing

The dataset used is a simulated dataset, produced by running simulateDMRs, with default parameters, on chr21 and chr22 subset of preprocessed beta values from 656 450K illumina microarray healthy blood samples found in GEO with identifier GSE40279, with 500 simulated DMRs of varying widths and effect sizes. The simulated dataset is available for download from Zenodo (see code below).

0.2 Setup and Dependencies

try(devtools::load_all(), silent = TRUE)
suppressPackageStartupMessages({
    try(library(CMEnt), silent = TRUE)
    library(minfi)
    library(data.table)
    library(ggplot2)
    library(dplyr)
    library(GenomicRanges)
})

getBenchmarkFile <- function(filename) {
    benchmark_file <- ""
    folder <- "simulated_microarray450k"
    if (getwd() == "notebooks") {
        benchmark_file <- file.path("cached_results", folder, filename)
    } else if ("NAMESPACE" %in% dir()) {
        benchmark_file <- file.path("notebooks", "cached_results", folder, filename)
    } else {
        benchmark_file <- file.path("cached_results", folder, filename)
    }
    dir.create(dirname(benchmark_file), showWarnings = FALSE, recursive = TRUE)
    benchmark_file
}

get_method_cache_file <- function(method) {
    getBenchmarkFile(sprintf("dmrs.%s.rds", method))
}

0.2.1 Install External Dependencies

r <- getOption("repos")
if (is.null(r) || is.na(r["CRAN"]) || r["CRAN"] %in% c("@CRAN@", "")) {
  options(repos = c(CRAN = "https://cloud.r-project.org"))
}
if (!requireNamespace("BiocManager", quietly = TRUE)) {
    install.packages("BiocManager")
}

if (!requireNamespace("DMRcate", quietly = TRUE)) {
    BiocManager::install("DMRcate", update = FALSE, ask = FALSE)
}

if (!requireNamespace("bumphunter", quietly = TRUE)) {
    BiocManager::install("bumphunter", update = FALSE, ask = FALSE)
}

if (!requireNamespace("doParallel", quietly = TRUE)) {
    install.packages("doParallel")
}

if (!requireNamespace("tidyr", quietly = TRUE)) {
    install.packages("tidyr")
}

if (!requireNamespace("ChAMP", quietly = TRUE)) {
    BiocManager::install("ChAMP", update = FALSE, ask = FALSE)
}

Note: comb-p requires both the Python tool itself (combined-pvalues) and bedtools, installing using new conda env if not present:

{ command -v comb-p >/dev/null 2>&1 && command -v bedtools >/dev/null 2>&1; } || { 
    echo >&2 "comb-p or bedtools not found. Installing with conda...";
    conda create -n cment_microarray_exp python=3.9
    conda activate cment_microarray_exp
    conda install -yc bioconda combined-pvalues bedtools
}

0.3 Load Data

sim_beta_path <- getBenchmarkFile("GSE40279_sim_beta_values.tsv.gz")
sim_truth_path <- getBenchmarkFile("GSE40279_sim_dmrs.tsv")
sim_pheno_path <- getBenchmarkFile("GSE40279_sim_samplesheet.tsv")
array_type <- "450K"
genome <- "hg19"
zenodo_base_url <- "https://zenodo.org/records/20592944/files/"
if (!file.exists(sim_beta_path)) {
    curl::curl_download(
        url = paste0(zenodo_base_url, basename(sim_beta_path), "?download=1"),
        destfile = sim_beta_path
    )
}
if (!file.exists(sim_truth_path)) {
    curl::curl_download(
        url = paste0(zenodo_base_url, basename(sim_truth_path), "?download=1"),
        destfile = sim_truth_path
    )
}
if (!file.exists(sim_pheno_path)) {
    curl::curl_download(
        url = paste0(zenodo_base_url, basename(sim_pheno_path), "?download=1"),
        destfile = sim_pheno_path
    )
}

truth_df <- data.table::fread(sim_truth_path)
truth_dmrs <- makeGRangesFromDataFrame(
    truth_df,
    seqnames.field = "seqnames",
    start.field = "start",
    end.field = "end",
    keep.extra.columns = TRUE
)

beta_handler <- CMEnt::getBetaHandler(sim_beta_path, array = array_type, genome = genome)

sample_ids <- beta_handler$getBetaColNames()
pheno <- data.table::fread(sim_pheno_path, data.table = FALSE)
rownames(pheno) <- pheno$V1

if (!all(sample_ids %in% rownames(pheno))) {
    stop("The simulated phenotype file does not contain all simulated beta samples.")
}
pheno <- pheno[sample_ids, , drop = FALSE]
pheno$Sample_Group <- factor(pheno$Sample_Group, levels = c("control", "case"))

beta_mat <- as.matrix(beta_handler$getBeta())
locs <- beta_handler$getBetaLocs()
truth_probe_counts <- countOverlaps(truth_dmrs, makeGRangesFromDataFrame(
    as.data.frame(locs),
    seqnames.field = "chr",
    start.field = "start",
    end.field = "end",
    keep.extra.columns = TRUE
))
truth_overlap_n <- sum(truth_probe_counts > 0L)
if (truth_overlap_n < 0.9 * length(truth_dmrs)) {
    stop(
        sprintf(
            paste(
                "Only %d/%d truth DMRs overlap probe loci under genome='%s'.",
                "This simulated microarray truth set was generated on hg19;",
                "delete stale cache files and rerun with genome='hg19'."
            ),
            truth_overlap_n,
            length(truth_dmrs),
            genome
        )
    )
}
logit2 <- function(x) log2(x) - log2(1 - x)
mvalues <- logit2(beta_mat)
pheno$casecontrol <- pheno$Sample_Group == "case"

0.4 Find Differentially Methylated Positions

dmps <- CMEnt::findDMPsArray(beta_handler,
    pheno,
    sample_group_col = "Sample_Group",
    id_col = "V1"
)
rownames(dmps) <- dmps$site_id
# sort DMPs by location
dmps <- dmps[!is.na(dmps$pval), ]
dmp_locs <- locs[rownames(locs) %in% rownames(dmps), , drop = FALSE]
dmps <- dmps[rownames(dmp_locs), , drop = FALSE]

case_idx <- which(pheno$casecontrol)
ctrl_idx <- which(!pheno$casecontrol)
dmps$delta_beta <- rowMeans(beta_mat[rownames(dmps), case_idx, drop = FALSE]) -
    rowMeans(beta_mat[rownames(dmps), ctrl_idx, drop = FALSE])

dmps$qval <- stats::p.adjust(dmps$pval, method = "fdr")

dmps_gr <- makeGRangesFromDataFrame(
    data.frame(
        chr = dmp_locs$chr,
        start = dmp_locs$start,
        end = dmp_locs$end,
        id = rownames(dmps),
        pval = dmps$pval,
        qval = dmps$qval,
        delta_beta = dmps$delta_beta
    ),
    keep.extra.columns = TRUE
)
dmps_gr <- sort(dmps_gr)
names(dmps_gr) <- rownames(dmps)

seeds_gr <- dmps_gr[mcols(dmps_gr)$qval < 0.05]
seeds <- mcols(seeds_gr)$id
cat(sprintf("Found %d significant DMPs to be used as seeds (FDR < 0.05)\n", length(seeds)))

Found 4827 significant DMPs to be used as seeds (FDR < 0.05)

0.5 Method 1: CMEnt

cment_params <- list(
    sample_group_col = "Sample_Group",
    casecontrol_col = "casecontrol",
    max_bridge_seeds_gaps = 0,
    max_bridge_extension_gaps = 1,
    min_seeds = 2,
    min_sites = 3,
    ext_site_delta_beta = 0.1,
    max_lookup_dist = 500,
    max_pval = 0.05,
    testing_mode = "parametric",
    entanglement = "weak"
)
cment_njobs <- max(1L, min(8L, parallel::detectCores(logical = TRUE) - 1L))
options("CMEnt.njobs" = cment_njobs)

0.5.1 Assessing DMPs seeds connectivity alone

# only chr21
truth_dmrs_21 <- truth_dmrs[seqnames(truth_dmrs) == "chr21", ]
seeds_gr_21 <- subset(seeds_gr, seqnames(seeds_gr) == "chr21")
seeds_21 <- mcols(seeds_gr_21)$id
seeds_beta_21 <- beta_mat[mcols(seeds_gr_21)$id, , drop = FALSE]
gt_seeds_gr_21 <- subsetByOverlaps(seeds_gr_21, truth_dmrs_21, ignore.strand = TRUE)
seeds_dmrs_cment <- do.call(CMEnt::buildDMRs, c(cment_params, list(
    beta = seeds_beta_21,
    seeds = seeds_21,
    pheno = pheno,
    verbose = 2,
    .score_dmrs = FALSE,
    extract_motifs = FALSE,
    annotate_with_genes = FALSE)))

seeds_in_truth <- subsetByOverlaps(gt_seeds_gr_21, truth_dmrs_21, ignore.strand = TRUE)

seeds_in_dmrs <- subsetByOverlaps(gt_seeds_gr_21, seeds_dmrs_cment, ignore.strand = TRUE)
seeds_in_truth_and_dmrs <- subsetByOverlaps(seeds_in_truth, seeds_dmrs_cment, ignore.strand = TRUE)
seeds_in_truth_not_dmrs <- subsetByOverlaps(seeds_in_truth, seeds_dmrs_cment, ignore.strand = TRUE, invert = TRUE)



seed_truth_ov <- findOverlaps(seeds_in_truth, truth_dmrs_21, ignore.strand = TRUE)
truth_seed_total <- tabulate(subjectHits(seed_truth_ov), nbins = length(truth_dmrs_21))
seed_truth_hit_ov <- findOverlaps(seeds_in_truth_and_dmrs, truth_dmrs_21, ignore.strand = TRUE)


truth_seed_captured <- tabulate(subjectHits(seed_truth_hit_ov), nbins = length(truth_dmrs_21))
truth_seed_capture_pct <- ifelse(truth_seed_total > 0, round(100 * truth_seed_captured / truth_seed_total, 1), NA_real_)

recoverable_truth <- truth_seed_total >= 2L
recoverable_with2captured <- recoverable_truth & truth_seed_captured >= 2L
recoverable_fully_captured <- recoverable_truth & truth_seed_captured == truth_seed_total

stage1_quality <- data.frame(
    Truth_DMRs_Total = length(truth_dmrs_21),
    Truth_DMRs_With_Seeds = sum(truth_seed_total >= 1L),
    Truth_DMRs_2plusSeeds = sum(recoverable_truth),
    Truth_DMRs_With2plusCapturedSeeds = sum(recoverable_with2captured),
    Truth_DMRs_Stage1Recoverable_FullyCapturedSeeds = sum(recoverable_fully_captured),
    Seed_Capture_Rate_Pct_in_Truth = ifelse(length(seeds_in_truth) == 0, NA_real_, round(100 * length(seeds_in_truth_and_dmrs) / length(seeds_in_truth), 1)),
    Median_Seed_Capture_Pct_Per_Truth = ifelse(sum(truth_seed_total > 0) == 0, NA_real_, stats::median(truth_seed_capture_pct[truth_seed_total > 0], na.rm = TRUE))
)

for (col in colnames(stage1_quality)) {
    cat(sprintf("- **%s**: %s\n", col, stage1_quality[[col]]))
}

seeds_beta_21_handler <- CMEnt::getBetaHandler(
    beta = seeds_beta_21,
    genome = genome,
    array = array_type
)
group_inds <- split(seq_len(nrow(pheno)), pheno$Sample_Group)
testing_mode_per_group <- rep("parametric", length(group_inds))
names(testing_mode_per_group) <- names(group_inds)
empirical_strategy_per_group <- rep("permutation", length(group_inds))
names(empirical_strategy_per_group) <- names(group_inds)

connectivity_array_21 <- CMEnt:::.buildConnectivityArray(
    beta_handler = seeds_beta_21_handler,
    pheno = pheno,
    group_inds = group_inds,
    testing_mode_per_group = testing_mode_per_group,
    empirical_strategy_per_group = empirical_strategy_per_group,
    col_names = NULL,
    max_pval = cment_params$max_pval,
    ext_site_delta_beta = NA_real_,
    max_lookup_dist = cment_params$max_lookup_dist,
    entanglement = cment_params$entanglement,
    njobs = cment_njobs,
    max_bridge_gaps = 1,
    verbose = 3
)$connectivity_array
connectivity_array_21_no_gaps <- CMEnt:::.buildConnectivityArray(
    beta_handler = seeds_beta_21_handler,
    pheno = pheno,
    group_inds = group_inds,
    testing_mode_per_group = testing_mode_per_group,
    empirical_strategy_per_group = empirical_strategy_per_group,
    col_names = NULL,
    max_pval = cment_params$max_pval,
    ext_site_delta_beta = NA_real_,
    max_lookup_dist = cment_params$max_lookup_dist,
    entanglement = cment_params$entanglement,
    njobs = cment_njobs,
    max_bridge_gaps = 0,
    verbose = 2
)$connectivity_array

truth_hits <- findOverlaps(gt_seeds_gr_21, truth_dmrs_21, ignore.strand = TRUE)

truth_id <- rep(NA_integer_, length(gt_seeds_gr_21))
truth_id[queryHits(truth_hits)] <- subjectHits(truth_hits)

gt_vec <- rep(FALSE, length(gt_seeds_gr_21))
if (length(gt_vec) > 1L) {
    gt_vec[-length(gt_vec)] <- !is.na(truth_id[-length(truth_id)]) &
        truth_id[-length(truth_id)] == truth_id[-1]
}

gt_connectivity <- data.frame(gt = gt_vec)

colnames(connectivity_array_21_no_gaps) <- paste(colnames(connectivity_array_21_no_gaps), "ngaps", sep = "_")
connectivity_array_21 <- cbind(gt_connectivity, connectivity_array_21, connectivity_array_21_no_gaps)
x <- rle(connectivity_array_21$connected); sum(x$values)

seeds_in_truth <- subsetByOverlaps(gt_seeds_gr_21, truth_dmrs_21, ignore.strand = TRUE)

seeds_in_dmrs <- subsetByOverlaps(gt_seeds_gr_21, seeds_dmrs_cment, ignore.strand = TRUE)
seeds_in_truth_and_dmrs <- subsetByOverlaps(seeds_in_truth, seeds_dmrs_cment, ignore.strand = TRUE)
seeds_in_truth_not_dmrs <- subsetByOverlaps(seeds_in_truth, seeds_dmrs_cment, ignore.strand = TRUE, invert = TRUE)



seed_truth_ov <- findOverlaps(seeds_in_truth, truth_dmrs_21, ignore.strand = TRUE)
truth_seed_total <- tabulate(subjectHits(seed_truth_ov), nbins = length(truth_dmrs_21))
seed_truth_hit_ov <- findOverlaps(seeds_in_truth_and_dmrs, truth_dmrs_21, ignore.strand = TRUE)


truth_seed_captured <- tabulate(subjectHits(seed_truth_hit_ov), nbins = length(truth_dmrs_21))
truth_seed_capture_pct <- ifelse(truth_seed_total > 0, round(100 * truth_seed_captured / truth_seed_total, 1), NA_real_)

recoverable_truth <- truth_seed_total >= 2L
recoverable_with2captured <- recoverable_truth & truth_seed_captured >= 2L
recoverable_fully_captured <- recoverable_truth & truth_seed_captured == truth_seed_total

stage1_quality <- data.frame(
    Truth_DMRs_Total = length(truth_dmrs_21),
    Truth_DMRs_With_Seeds = sum(truth_seed_total >= 1L),
    Truth_DMRs_2plusSeeds = sum(recoverable_truth),
    Truth_DMRs_With2plusCapturedSeeds = sum(recoverable_with2captured),
    Truth_DMRs_Stage1Recoverable_FullyCapturedSeeds = sum(recoverable_fully_captured),
    Seed_Capture_Rate_Pct_in_Truth = ifelse(length(seeds_in_truth) == 0, NA_real_, round(100 * length(seeds_in_truth_and_dmrs) / length(seeds_in_truth), 1)),
    Median_Seed_Capture_Pct_Per_Truth = ifelse(sum(truth_seed_total > 0) == 0, NA_real_, stats::median(truth_seed_capture_pct[truth_seed_total > 0], na.rm = TRUE))
)

for (col in colnames(stage1_quality)) {
    cat(sprintf("- **%s**: %s\n", col, stage1_quality[[col]]))
}

cment_file <- get_method_cache_file("CMEnt")

cment_cache_valid <- FALSE
if (file.exists(cment_file)) {
    ret <- readRDS(cment_file)
    cment_cache_valid <- identical(ret$params, cment_params)
    if (cment_cache_valid) {
        dmrs_cment <- ret$dmrs
        time_cment <- ret$time
    }
}
if (!cment_cache_valid) {
    cment_njobs <- max(1L, min(8L, parallel::detectCores(logical = TRUE) - 1L))
    options("CMEnt.njobs" = cment_njobs)
    cat(sprintf("CMEnt parallel jobs: %d\n", cment_njobs))
    start_time <- Sys.time()
    dmrs_cment <- do.call(CMEnt::buildDMRs,
        c(list(
            beta = beta_handler,
            seeds = seeds,
            pheno = pheno,
            verbose = 2,
            .score_dmrs = FALSE,
            extract_motifs = FALSE,
            annotate_with_genes = FALSE
        ), cment_params)
    )
    end_time <- Sys.time()
    time_cment <- end_time - start_time
    saveRDS(list(dmrs = dmrs_cment, time = time_cment, params = cment_params), cment_file)
}

cat(sprintf("CMEnt: Found %d DMRs in %d seconds\n", length(dmrs_cment), round(as.numeric(time_cment, units = "secs"))))

CMEnt: Found 539 DMRs in 101 seconds

0.6 Method 2: comb-p

combp_file <- get_method_cache_file("combp")

run_combp <- function(dmps_gr, seed = 500, dist = 500) {
    if (Sys.which("bedtools") == "") {
        warning("bedtools not found in PATH. comb-p requires bedtools to be installed.")
        warning("Install with: conda install -yc bioconda bedtools")
        return(GRanges())
    }

    if (Sys.which("comb-p") == "") {
        warning("comb-p not found in PATH.")
        warning("Install with: conda install -yc bioconda combined-pvalues")
        return(GRanges())
    }
    tmpdir <- tempdir()
    bed_file <- file.path(tmpdir, "dmps_for_combp.bed")
    bed_df <- data.frame(
        chrom = as.character(seqnames(dmps_gr)),
        start = start(dmps_gr) - 1,
        end = end(dmps_gr),
        pval = mcols(dmps_gr)$pval
    )
    bed_df <- bed_df[order(bed_df$chrom, bed_df$start), ]

    write.table(bed_df, bed_file,
        quote = FALSE, sep = "\t",
        row.names = FALSE, col.names = TRUE
    )

    cmd <- sprintf(
        "comb-p pipeline -c 4 --seed %d --dist %d -p %s --region-filter-p 0.01 --region-filter-n 3 %s",
        seed, dist, file.path(tmpdir, "output."), bed_file
    )

    tryCatch(
        {
            exit_code <- system(cmd, ignore.stdout = FALSE, ignore.stderr = FALSE)

            if (exit_code != 0) {
                warning(sprintf("comb-p exited with code %d", exit_code))
                return(GRanges())
            }

            regions_file <- paste0(file.path(tmpdir, "output."), "regions-p.bed.gz")
            if (file.exists(regions_file)) {
                regions <- fread(cmd = sprintf("zcat %s", regions_file))
                colnames(regions) <- c("chr", "start", "end", "min_p", "n_probes", "z_p", "z_sidak_p")

                regions <- regions[regions$z_sidak_p < 0.05 & regions$n_probes >= 3, ]

                dmrs_combp <- makeGRangesFromDataFrame(regions,
                    seqnames.field = "chr",
                    start.field = "start",
                    end.field = "end",
                    keep.extra.columns = TRUE
                )
                return(dmrs_combp)
            } else {
                warning("comb-p did not produce output file")
                return(GRanges())
            }
        },
        error = function(e) {
            warning(sprintf("comb-p failed: %s", e$message))
            GRanges()
        }
    )
}

if (!file.exists(combp_file)) {
    start_time <- Sys.time()
    dmrs_combp <- run_combp(dmps_gr)
    end_time <- Sys.time()
    time_combp <- end_time - start_time
    saveRDS(list(dmrs = dmrs_combp, time = time_combp), combp_file)
} else {
    ret <- readRDS(combp_file)
    dmrs_combp <- ret$dmrs
    time_combp <- ret$time
}

cat(sprintf("comb-p: Found %d DMRs in %d seconds\n", length(dmrs_combp), round(as.numeric(time_combp, units = "secs"))))

comb-p: Found 503 DMRs in 8 seconds

0.7 Method 3: probeLasso

probelasso_file <- get_method_cache_file("probeLasso")

run_probelasso_champ <- function(beta_mat, pheno, array_type = "450K", genome = "hg19") {
    if (!requireNamespace("ChAMP", quietly = TRUE)) {
        warning("ChAMP not installed. probeLasso requires ChAMP.")
        warning("Install with: BiocManager::install('ChAMP')")
        return(GRanges(seqinfo = GenomeInfoDb::Seqinfo(genome = genome)))
    }

    library(ChAMP)

    tryCatch(
        {
            result <- champ.DMR(
                beta = beta_mat,
                pheno = pheno$Sample_Group,
                arraytype = array_type,
                method = "ProbeLasso",
                minProbes = cment_params$min_sites,
                adjPvalDmr = 0.05,
                adjPvalProbe = 0.1,
                meanLassoRadius = 500,
                minDmrSep = 500,
                minDmrSize = 50,
                PDFplot = FALSE,
                Rplot = FALSE,
                resultsDir = file.path(tempdir(), "champ_probelasso")
            )

            pl_dmr <- result$ProbeLassoDMR
            if (is.null(pl_dmr) || nrow(pl_dmr) == 0) {
                return(GRanges())
            }

            # ChAMP output column names can differ by version.
            if (!"seqnames" %in% names(pl_dmr) && "dmrChrom" %in% names(pl_dmr)) {
                pl_dmr$seqnames <- pl_dmr$dmrChrom
            }
            if (!"start" %in% names(pl_dmr) && "dmrStart" %in% names(pl_dmr)) {
                pl_dmr$start <- pl_dmr$dmrStart
            }
            if (!"end" %in% names(pl_dmr) && "dmrEnd" %in% names(pl_dmr)) {
                pl_dmr$end <- pl_dmr$dmrEnd
            }

            return(makeGRangesFromDataFrame(pl_dmr,
                seqnames.field = "seqnames",
                start.field = "start",
                end.field = "end",
                seqinfo = GenomeInfoDb::Seqinfo(genome = genome),
                keep.extra.columns = TRUE
            ))
        },
        error = function(e) {
            warning(sprintf("probeLasso (ChAMP) failed: %s", e$message))
            GRanges(seqinfo = GenomeInfoDb::Seqinfo(genome = genome))
        }
    )
}

if (!file.exists(probelasso_file)) {
    start_time <- Sys.time()
    dmrs_probelasso <- run_probelasso_champ(beta_mat, pheno, array_type = "450K", genome = genome)
    end_time <- Sys.time()
    time_probelasso <- end_time - start_time
    saveRDS(list(dmrs = dmrs_probelasso, time = time_probelasso), probelasso_file)
} else {
    ret <- readRDS(probelasso_file)
    dmrs_probelasso <- ret$dmrs
    time_probelasso <- ret$time
}

cat(sprintf("probeLasso: Found %d DMRs in %d seconds\n", length(dmrs_probelasso), round(as.numeric(time_probelasso, units = "secs"))))

probeLasso: Found 289 DMRs in 18 seconds

0.8 Method 4: DMRcate

dmrcate_file <- get_method_cache_file("dmrcate")
if (!file.exists(dmrcate_file)) {
    library(DMRcate)
    start_time <- Sys.time()
    formula_str <- "~Sample_Group"
    design <- model.matrix(stats::as.formula(formula_str), data = pheno)
    coef_col <- which(colnames(design) == "Sample_Groupcase")
    colnames(design)[1] <- "(Intercept)"
    colnames(design)[coef_col] <- "case_vs_control"

    myannotation <- cpg.annotate("array", mvalues,
        what = "M", arraytype = "450K",
        analysis.type = "differential", design = design,
        coef = coef_col, fdr = 0.05
    )

    dmrcate_results <- dmrcate(myannotation, C = 2, lambda = 1000, min.cpgs = 3, betacutoff = 0.05)
    dmrs_dmrcate <- extractRanges(dmrcate_results, genome = genome)

    end_time <- Sys.time()
    time_dmrcate <- end_time - start_time
    saveRDS(list(dmrs = dmrs_dmrcate, time = time_dmrcate), dmrcate_file)
} else {
    ret <- readRDS(dmrcate_file)
    dmrs_dmrcate <- ret$dmrs
    time_dmrcate <- ret$time
}

cat(sprintf("DMRcate: Found %d DMRs in %d seconds\n", length(dmrs_dmrcate), round(as.numeric(time_dmrcate, units = "secs"))))

DMRcate: Found 482 DMRs in 7 seconds

0.9 Method 5: Bumphunter

bumphunter_file <- get_method_cache_file("bumphunter")
if (!file.exists(bumphunter_file)) {
    suppressPackageStartupMessages({
        library(bumphunter)
        library(doParallel)
    })
    start_time <- Sys.time()
    bh_cores <- max(1L, min(8L, parallel::detectCores(logical = TRUE) - 1L))
    bh_b <- 100L
    bh_max_dmrs <- 100000L
    bh_max_gap <- 2000L
    bh_min_l <- 2L
    bh_region_p <- 0.05
    formula_str <- "~Sample_Group"
    design <- model.matrix(stats::as.formula(formula_str), data = pheno)
    coef_col <- which(colnames(design) == "Sample_Groupcase")
    colnames(design)[1] <- "(Intercept)"
    colnames(design)[coef_col] <- "case_vs_control"

    doParallel::registerDoParallel(cores = bh_cores)
    on.exit({
        try(doParallel::stopImplicitCluster(), silent = TRUE)
        gc()
    }, add = TRUE)

    bh_cluster <- bumphunter::clusterMaker(
        chr = as.character(locs$chr),
        pos = locs$start,
        maxGap = bh_max_gap
    )
    run_bumphunter_fit <- function(cutoff, B) {
        bumphunter(beta_mat,
            design = design,
            chr = as.character(locs$chr),
            pos = locs$start,
            cluster = bh_cluster,
            cutoff = cutoff,
            maxGap = max_gap,
            nullMethod = "bootstrap",
            B = B,
            type = "Beta",
            coef = coef_col
        )
    }
    has_informative_bumphunter_null <- function(fit) {
        null_values <- unlist(fit$null$value, use.names = FALSE)
        null_lengths <- unlist(fit$null$length, use.names = FALSE)
        length(null_values) > 0 &&
            any(abs(null_values) > 0) &&
            length(null_lengths) > 0 &&
            any(null_lengths > 0)
    }
    max_gap <- bh_max_gap
    dmrs_cutoff <- 0.1
    dmrs_cnt <- Inf
    iter <- 0L
    max_iter <- 10L
    while (dmrs_cnt > bh_max_dmrs && iter < max_iter) {
        iter <- iter + 1L
        dmrs <- run_bumphunter_fit(cutoff = dmrs_cutoff, B = 0)
        dmrs_cnt <- nrow(dmrs$table)
        dmrs_cutoff <- dmrs_cutoff * 1.2
        rm(dmrs)
        gc()
    }
    final_cutoff <- dmrs_cutoff / 1.2
    cat(sprintf(
        "Bumphunter: cutoff=%.4f, candidate_bumps=%d, minL=%d, maxGap=%d, cores=%d, B=%d\n",
        final_cutoff, dmrs_cnt, bh_min_l, max_gap, bh_cores, bh_b
    ))
    bumphunter_results <- run_bumphunter_fit(cutoff = final_cutoff, B = bh_b)
    bh_cutoff_floor <- 0.005
    bh_bootstrap_adjust_iter <- 0L
    bh_max_bootstrap_adjust <- 6L
    while (
        nrow(bumphunter_results$table) > 0 &&
            (all(is.na(bumphunter_results$table$p.value)) ||
                !has_informative_bumphunter_null(bumphunter_results)) &&
            final_cutoff > bh_cutoff_floor &&
            bh_bootstrap_adjust_iter < bh_max_bootstrap_adjust
    ) {
        prev_cutoff <- final_cutoff
        final_cutoff <- max(final_cutoff / 2, bh_cutoff_floor)
        if (identical(final_cutoff, prev_cutoff)) {
            break
        }
        bh_bootstrap_adjust_iter <- bh_bootstrap_adjust_iter + 1L
        cat(sprintf(
            "Bumphunter: bootstrap null degenerate at cutoff=%.4f; retrying with cutoff=%.4f\n",
            prev_cutoff, final_cutoff
        ))
        bumphunter_results <- run_bumphunter_fit(cutoff = final_cutoff, B = bh_b)
    }

    dmr_table <- bumphunter_results$table
    # bumphunter already estimates region-level p-values from the bootstrap null.
    dmr_table <- dmr_table[dmr_table$L >= bh_min_l & dmr_table$p.value <= bh_region_p, ]

    dmrs_bumphunter <- makeGRangesFromDataFrame(dmr_table,
        seqnames.field = "chr",
        start.field = "start",
        end.field = "end",
        keep.extra.columns = TRUE
    )
    end_time <- Sys.time()
    time_bumphunter <- end_time - start_time
    saveRDS(list(dmrs = dmrs_bumphunter, time = time_bumphunter), bumphunter_file)
} else {
    ret <- readRDS(bumphunter_file)
    dmrs_bumphunter <- ret$dmrs
    time_bumphunter <- ret$time
}

cat(sprintf("Bumphunter: Found %d DMRs in %d seconds\n", length(dmrs_bumphunter), round(as.numeric(time_bumphunter, units = "secs"))))

Bumphunter: Found 494 DMRs in 76 seconds

0.10 Comparative Analysis

0.10.1 Basic Statistics

get_dmr_stats <- function(dmrs, method) {
    if (length(dmrs) == 0) {
        return(data.frame(
            Method = method,
            `Number of DMRs` = 0,
            `Mean DMR Width` = NA,
            `Median DMR Width` = NA,
            `Total Coverage bp` = 0,
            `Min Width` = NA,
            `Max Width` = NA
        ))
    }

    data.frame(
        Method = method,
        `Number of DMRs` = length(dmrs),
        `Mean DMR Width` = round(mean(width(dmrs)), 1),
        `Median DMR Width` = median(width(dmrs)),
        `Total Coverage bp` = sum(width(dmrs)),
        `Min Width` = min(width(dmrs)),
        `Max Width` = max(width(dmrs))
    )
}

stats_list <- list(
    get_dmr_stats(dmrs_cment, "CMEnt"),
    get_dmr_stats(dmrs_combp, "comb-p"),
    get_dmr_stats(dmrs_probelasso, "probeLasso"),
    get_dmr_stats(dmrs_dmrcate, "DMRcate"),
    get_dmr_stats(dmrs_bumphunter, "Bumphunter")
)

results_comparison <- do.call(rbind, stats_list)
knitr::kable(results_comparison, digits = 2, caption = "Basic DMR Statistics Across Methods")

Table 1: Basic DMR Statistics Across Methods
Method	Number.of.DMRs	Mean.DMR.Width	Median.DMR.Width	Total.Coverage.bp	Min.Width	Max.Width
CMEnt	539	943.6	894.0	508603	37	2907
comb-p	503	992.7	927.0	499318	45	2909
probeLasso	289	1023.1	718.0	295668	53	6998
DMRcate	482	1539.1	1430.5	741850	43	7249
Bumphunter	494	916.9	828.5	452968	43	5578

0.10.2 Pairwise Overlap Analysis

methods_list <- list(
    CMEnt = dmrs_cment,
    `comb-p` = dmrs_combp,
    probeLasso = dmrs_probelasso,
    DMRcate = dmrs_dmrcate,
    Bumphunter = dmrs_bumphunter
)

methods_list <- methods_list[sapply(methods_list, length) > 0]
method_order <- names(methods_list)

calculate_overlap <- function(gr1, gr2) {
    if (length(gr1) == 0 || length(gr2) == 0) {
        return(list(count = 0, percentage = 0))
    }
    overlap_count <- length(subsetByOverlaps(gr1, gr2))
    pct <- (overlap_count / length(gr1)) * 100
    list(count = overlap_count, percentage = pct)
}

overlap_matrix <- matrix(0, nrow = length(methods_list), ncol = length(methods_list))
pct_matrix <- matrix(0, nrow = length(methods_list), ncol = length(methods_list))
rownames(overlap_matrix) <- colnames(overlap_matrix) <- names(methods_list)
rownames(pct_matrix) <- colnames(pct_matrix) <- names(methods_list)

for (i in seq_along(methods_list)) {
    for (j in seq_along(methods_list)) {
        if (i == j) {
            overlap_matrix[i, j] <- length(methods_list[[i]])
            pct_matrix[i, j] <- 100
        } else {
            overlap_result <- calculate_overlap(methods_list[[i]], methods_list[[j]])
            overlap_matrix[i, j] <- overlap_result$count
            pct_matrix[i, j] <- overlap_result$percentage
        }
    }
}

cat("\n#### Overlap Counts (rows overlap with columns)\n")

0.10.2.1 Overlap Counts (rows overlap with columns)

knitr::kable(overlap_matrix, digits = 0, caption = "Number of DMRs from Method A (rows) overlapping with Method B (columns)")

Table 2: Number of DMRs from Method A (rows) overlapping with Method B (columns)
	CMEnt	comb-p	probeLasso	DMRcate	Bumphunter
CMEnt	539	504	281	496	501
comb-p	503	503	277	493	500
probeLasso	286	283	289	279	282
DMRcate	482	482	270	482	482
Bumphunter	494	494	274	487	494

cat("\n#### Overlap Percentages\n")

0.10.2.2 Overlap Percentages

knitr::kable(pct_matrix, digits = 2, caption = "Percentage of DMRs from Method A (rows) overlapping with Method B (columns)")

Table 2: Percentage of DMRs from Method A (rows) overlapping with Method B (columns)
	CMEnt	comb-p	probeLasso	DMRcate	Bumphunter
CMEnt	100.00	93.51	52.13	92.02	92.95
comb-p	100.00	100.00	55.07	98.01	99.40
probeLasso	98.96	97.92	100.00	96.54	97.58
DMRcate	100.00	100.00	56.02	100.00	100.00
Bumphunter	100.00	100.00	55.47	98.58	100.00

0.10.3 Jaccard Index and IoU

jaccard_index <- function(gr1, gr2) {
    if (length(gr1) == 0 || length(gr2) == 0) {
        return(0)
    }
    overlap_count <- length(subsetByOverlaps(gr1, gr2))
    total_unique <- length(unique(c(gr1, gr2)))
    if (total_unique == 0) {
        return(0)
    }
    overlap_count / total_unique
}

iou <- function(gr1, gr2) {
    if (length(gr1) == 0 || length(gr2) == 0) {
        return(0)
    }
    intersection <- sum(width(GenomicRanges::intersect(gr1, gr2)))
    union <- sum(width(GenomicRanges::union(gr1, gr2)))
    if (union == 0) {
        return(0)
    }
    intersection / union
}

jaccard_matrix <- matrix(0, nrow = length(methods_list), ncol = length(methods_list))
iou_matrix <- matrix(0, nrow = length(methods_list), ncol = length(methods_list))
rownames(jaccard_matrix) <- colnames(jaccard_matrix) <- names(methods_list)
rownames(iou_matrix) <- colnames(iou_matrix) <- names(methods_list)

for (i in seq_along(methods_list)) {
    for (j in seq_along(methods_list)) {
        if (i < j) {
            jaccard_matrix[i, j] <- jaccard_matrix[j, i] <- jaccard_index(methods_list[[i]], methods_list[[j]])
            iou_matrix[i, j] <- iou_matrix[j, i] <- iou(methods_list[[i]], methods_list[[j]])
        } else if (i == j) {
            jaccard_matrix[i, j] <- 1
            iou_matrix[i, j] <- 1
        }
    }
}

cat("\n#### Jaccard Index\n")

0.10.3.1 Jaccard Index

knitr::kable(jaccard_matrix, digits = 4, caption = "Jaccard Index (region count-based similarity)")

Table 3: Jaccard Index (region count-based similarity)
	CMEnt	comb-p	probeLasso	DMRcate	Bumphunter
CMEnt	1.0000	0.4837	0.3394	0.6223	0.7147
comb-p	0.4837	1.0000	0.3497	0.5005	0.5015
probeLasso	0.3394	0.3497	1.0000	0.3619	0.3602
DMRcate	0.6223	0.5005	0.3619	1.0000	0.5857
Bumphunter	0.7147	0.5015	0.3602	0.5857	1.0000

cat("\n#### Intersection over Union (IoU)\n")

0.10.3.2 Intersection over Union (IoU)

knitr::kable(iou_matrix, digits = 4, caption = "IoU (genomic coverage-based similarity)")

Table 3: IoU (genomic coverage-based similarity)
	CMEnt	comb-p	probeLasso	DMRcate	Bumphunter
CMEnt	1.0000	0.9538	0.1706	0.6338	0.8631
comb-p	0.9538	1.0000	0.1716	0.6553	0.8850
probeLasso	0.1706	0.1716	1.0000	0.1462	0.1744
DMRcate	0.6338	0.6553	0.1462	1.0000	0.6027
Bumphunter	0.8631	0.8850	0.1744	0.6027	1.0000

0.11 Visualizations

0.11.1 DMR Count and Coverage Comparison

viz_data <- results_comparison |>
    dplyr::select(Method, Number.of.DMRs, Total.Coverage.bp) |>
    tidyr::pivot_longer(
        cols = c(Number.of.DMRs, Total.Coverage.bp),
        names_to = "Metric", values_to = "Value"
    )

viz_data$Metric <- ifelse(viz_data$Metric == "Number.of.DMRs",
    "Number of DMRs",
    "Total Coverage (bp)"
)

p1 <- ggplot(viz_data, aes(x = Method, y = Value, fill = Method)) +
    geom_bar(stat = "identity") +
    facet_wrap(~Metric, scales = "free_y") +
    theme_minimal() +
    theme(
        axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = "none",
        panel.grid.major.x = element_blank()
    ) +
    labs(
        title = "DMR Detection: Count and Coverage Comparison",
        y = "Value", x = ""
    )

print(p1)

0.12 Ground-Truth Evaluation

evaluate_against_truth <- function(pred, truth) {
    n_pred <- length(pred)
    n_truth <- length(truth)
    pred_overlap_truth <- sum(overlapsAny(pred, truth))
    truth_overlap_pred <- sum(overlapsAny(truth, pred))
    fp <- n_pred - pred_overlap_truth
    fn <- n_truth - truth_overlap_pred

    precision <- ifelse(n_pred == 0, 0, pred_overlap_truth / n_pred)
    recall <- ifelse(n_truth == 0, 0, truth_overlap_pred / n_truth)
    f1 <- ifelse(precision + recall == 0, 0, 2 * precision * recall / (precision + recall))

    pred_red <- reduce(pred)
    truth_red <- reduce(truth)
    inter <- GenomicRanges::intersect(pred_red, truth_red)

    pred_bp <- sum(width(pred_red))
    truth_bp <- sum(width(truth_red))
    inter_bp <- ifelse(length(inter) == 0, 0, sum(width(inter)))
    union_bp <- pred_bp + truth_bp - inter_bp

    precision_bp <- ifelse(pred_bp == 0, 0, inter_bp / pred_bp)
    recall_bp <- ifelse(truth_bp == 0, 0, inter_bp / truth_bp)
    f1_bp <- ifelse(precision_bp + recall_bp == 0, 0, 2 * precision_bp * recall_bp / (precision_bp + recall_bp))
    iou_bp <- ifelse(union_bp == 0, 0, inter_bp / union_bp)

    data.frame(
        TP_regions = truth_overlap_pred,
        FP_regions = fp,
        FN_regions = fn,
        Precision_regions = precision,
        Recall_regions = recall,
        F1_regions = f1,
        Predicted_bp = pred_bp,
        Truth_bp = truth_bp,
        Intersect_bp = inter_bp,
        Precision_bp = precision_bp,
        Recall_bp = recall_bp,
        F1_bp = f1_bp,
        IoU_bp = iou_bp
    )
}

truth_eval <- do.call(rbind, lapply(names(methods_list), function(method) {
    out <- evaluate_against_truth(methods_list[[method]], truth_dmrs)
    out$Method <- method
    out
}))
truth_eval$Method <- factor(truth_eval$Method, levels = method_order)
truth_eval <- truth_eval[order(truth_eval$Method), , drop = FALSE]
truth_eval$Method <- as.character(truth_eval$Method)


truth_eval <- truth_eval |>
    dplyr::select(
        Method,
        TP_regions,
        FP_regions, FN_regions,
        Precision_regions, Recall_regions, F1_regions,
        Precision_bp, Recall_bp, F1_bp, IoU_bp
    )

regions_truth_eval <- truth_eval |>
    dplyr::select(Method, TP_regions, FP_regions, FN_regions, Precision_regions, Recall_regions, F1_regions) |>
    dplyr::rename_with(~ sub("_regions$", "", .), ends_with("_regions"))

bp_truth_eval <- truth_eval |>
    dplyr::select(Method, Precision_bp, Recall_bp, F1_bp, IoU_bp) |>
    dplyr::rename_with(~ sub("_bp$", "", .), ends_with("_bp"))

knitr::kable(regions_truth_eval, digits = 4, caption = "Macro (Region-level) Performance Against Ground Truth")

Table 4: Macro (Region-level) Performance Against Ground Truth
Method	TP	FP	FN	Precision	Recall	F1
CMEnt	500	38	0	0.9295	1.000	0.9635
comb-p	500	3	0	0.9940	1.000	0.9970
probeLasso	276	7	224	0.9758	0.552	0.7051
DMRcate	493	0	7	1.0000	0.986	0.9930
Bumphunter	500	0	0	1.0000	1.000	1.0000

knitr::kable(bp_truth_eval, digits = 4, caption = "Micro (Base-pair-level) Performance Against Ground Truth")

Table 5: Micro (Base-pair-level) Performance Against Ground Truth
Method	Precision	Recall	F1	IoU
CMEnt	0.9646	0.9936	0.9789	0.9586
comb-p	0.9888	1.0000	0.9944	0.9888
probeLasso	0.3909	0.2341	0.2928	0.1715
DMRcate	0.6572	0.9875	0.7892	0.6518
Bumphunter	0.9870	0.9055	0.9445	0.8949

0.12.1 Overlap Matrices (Methods + Truth)

all_sets <- c(methods_list, list(`Ground Truth` = truth_dmrs))

calculate_overlap <- function(gr1, gr2) {
    if (length(gr1) == 0 || length(gr2) == 0) {
        return(list(count = 0, percentage = 0))
    }
    overlap_count <- sum(overlapsAny(gr1, gr2))
    pct <- overlap_count / length(gr1) * 100
    list(count = overlap_count, percentage = pct)
}

overlap_count_mat <- matrix(0, nrow = length(all_sets), ncol = length(all_sets))
overlap_pct_mat <- matrix(0, nrow = length(all_sets), ncol = length(all_sets))
rownames(overlap_count_mat) <- colnames(overlap_count_mat) <- names(all_sets)
rownames(overlap_pct_mat) <- colnames(overlap_pct_mat) <- names(all_sets)

for (i in seq_along(all_sets)) {
    for (j in seq_along(all_sets)) {
        if (i == j) {
            overlap_count_mat[i, j] <- length(all_sets[[i]])
            overlap_pct_mat[i, j] <- 100
        } else {
            ov <- calculate_overlap(all_sets[[i]], all_sets[[j]])
            overlap_count_mat[i, j] <- ov$count
            overlap_pct_mat[i, j] <- ov$percentage
        }
    }
}

knitr::kable(overlap_count_mat, digits = 0, caption = "Overlap Counts (query=row, reference=column)")

Table 6: Overlap Counts (query=row, reference=column)
	CMEnt	comb-p	probeLasso	DMRcate	Bumphunter	Ground Truth
CMEnt	539	504	281	496	501	501
comb-p	503	503	277	493	500	500
probeLasso	286	283	289	279	282	282
DMRcate	482	482	270	482	482	482
Bumphunter	494	494	274	487	494	494
Ground Truth	500	500	276	493	500	500

knitr::kable(overlap_pct_mat, digits = 2, caption = "Overlap Percentages (query=row, reference=column)")

Table 7: Overlap Percentages (query=row, reference=column)
	CMEnt	comb-p	probeLasso	DMRcate	Bumphunter	Ground Truth
CMEnt	100.00	93.51	52.13	92.02	92.95	92.95
comb-p	100.00	100.00	55.07	98.01	99.40	99.40
probeLasso	98.96	97.92	100.00	96.54	97.58	97.58
DMRcate	100.00	100.00	56.02	100.00	100.00	100.00
Bumphunter	100.00	100.00	55.47	98.58	100.00	100.00
Ground Truth	100.00	100.00	55.20	98.60	100.00	100.00

0.13 Visualizations

0.13.1 Ground-Truth Metrics

plot_df <- truth_eval |>
    dplyr::select(Method, F1_regions, F1_bp, IoU_bp) |>
    tidyr::pivot_longer(
        cols = c(F1_regions, F1_bp, IoU_bp),
        names_to = "Metric",
        values_to = "Value"
    )

plot_df$Metric <- factor(
    plot_df$Metric,
    levels = c("F1_regions", "F1_bp", "IoU_bp"),
    labels = c("F1 (Region)", "F1 (Base-pair)", "IoU (Base-pair)")
)
plot_df$Method <- factor(plot_df$Method, levels = method_order)

ggplot(plot_df, aes(x = Method, y = Value, fill = Method)) +
    geom_col() +
    facet_wrap(~Metric, nrow = 1) +
    theme_minimal() +
    theme(
        axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = "none"
    ) +
    ylim(0, 1) +
    labs(
        title = "Method Performance vs Ground Truth",
        x = "",
        y = "Score"
    )

0.13.2 Width Distributions

width_data <- do.call(rbind, lapply(names(all_sets), function(method) {
    x <- all_sets[[method]]
    if (length(x) == 0) {
        return(NULL)
    }
    data.frame(Method = method, Width = width(x))
}))
width_data$Method <- factor(width_data$Method, levels = names(all_sets))

ggplot(width_data, aes(x = Method, y = Width, fill = Method)) +
    geom_violin(alpha = 0.7) +
    geom_boxplot(width = 0.2, alpha = 0.5, outlier.size = 0.5) +
    scale_y_log10(labels = scales::comma) +
    theme_minimal() +
    theme(
        axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = "none"
    ) +
    labs(
        title = "DMR Width Distributions",
        x = "",
        y = "Width (bp, log scale)"
    )

0.14 Summary

cat("\n## Key Findings\n\n")

0.15 Key Findings

if (nrow(truth_eval) > 0) {
    best_f1 <- truth_eval[which.max(truth_eval$F1_regions), ]
    best_iou <- truth_eval[which.max(truth_eval$IoU_bp), ]

    cat(sprintf(
        "- Best region-level F1: **%s** (%.3f)\n",
        best_f1$Method, best_f1$F1_regions
    ))
    cat(sprintf(
        "- Best base-pair IoU: **%s** (%.3f)\n",
        best_iou$Method, best_iou$IoU_bp
    ))

    if ("CMEnt" %in% truth_eval$Method) {
        seg <- truth_eval[truth_eval$Method == "CMEnt", ]
        cat(sprintf(
            "- **CMEnt** region metrics: Precision = %.3f, Recall = %.3f, F1 = %.3f\n",
            seg$Precision_regions, seg$Recall_regions, seg$F1_regions
        ))
        cat(sprintf(
            "- **CMEnt** base-pair metrics: Precision = %.3f, Recall = %.3f, F1 = %.3f, IoU = %.3f\n",
            seg$Precision_bp, seg$Recall_bp, seg$F1_bp, seg$IoU_bp
        ))
    }
}

Best region-level F1: Bumphunter (1.000)
Best base-pair IoU: comb-p (0.989)
CMEnt region metrics: Precision = 0.929, Recall = 1.000, F1 = 0.963
CMEnt base-pair metrics: Precision = 0.965, Recall = 0.994, F1 = 0.979, IoU = 0.959

0.16 Session Info

sessionInfo()

R version 4.6.0 (2026-04-24) Platform: x86_64-pc-linux-gnu Running under: Ubuntu 24.04.4 LTS

Matrix products: default BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.12.0 LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.12.0 LAPACK version 3.12.0

locale: [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
[7] LC_PAPER=en_US.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C

time zone: Europe/Brussels tzcode source: system (glibc)

attached base packages: [1] parallel stats4 stats graphics grDevices datasets utils
[8] methods base

other attached packages: [1] dplyr_1.2.1 ggplot2_4.0.3
[3] data.table_1.18.4 minfi_1.58.0
[5] bumphunter_1.54.0 locfit_1.5-9.12
[7] iterators_1.0.14 foreach_1.5.2
[9] Biostrings_2.80.1 XVector_0.52.0
[11] SummarizedExperiment_1.42.0 Biobase_2.72.0
[13] MatrixGenerics_1.24.0 matrixStats_1.5.0
[15] GenomicRanges_1.64.0 Seqinfo_1.2.0
[17] IRanges_2.46.0 S4Vectors_0.50.1
[19] BiocGenerics_0.58.1 generics_0.1.4
[21] CMEnt_0.99.0 testthat_3.3.2
[23] BiocStyle_2.40.0

loaded via a namespace (and not attached): [1] R.methodsS3_1.8.2
[2] dichromat_2.0-0.1
[3] ggseqlogo_0.2.2
[4] strex_2.0.1
[5] progress_1.2.3
[6] RPMM_1.25
[7] shinythemes_1.2.0
[8] nnet_7.3-20
[9] DT_0.34.0
[10] HDF5Array_1.40.0
[11] vctrs_0.7.3
[12] digest_0.6.39
[13] png_0.1-9
[14] shape_1.4.6.1
[15] proxy_0.4-29
[16] lumi_2.64.0
[17] deldir_2.0-4
[18] combinat_0.0-8
[19] permute_0.9-10
[20] magick_2.9.1
[21] MASS_7.3-65
[22] pkgdown_2.2.0
[23] reshape_0.8.10
[24] reshape2_1.4.5
[25] qvalue_2.44.0
[26] httpuv_1.6.17
[27] withr_3.0.2
[28] showtextdb_3.0
[29] xfun_0.58
[30] ellipsis_0.3.3
[31] survival_3.8-6
[32] bedr_1.1.5
[33] doRNG_1.8.6.3
[34] memoise_2.0.1
[35] BiasedUrn_2.0.12
[36] sysfonts_0.8.9
[37] GlobalOptions_0.1.4
[38] gtools_3.9.5
[39] DNAcopy_1.86.0
[40] R.oo_1.27.1
[41] Formula_1.2-5
[42] prettyunits_1.2.0
[43] KEGGREST_1.52.0
[44] promises_1.5.0
[45] otel_0.2.0
[46] httr_1.4.8
[47] restfulr_0.0.16
[48] IlluminaHumanMethylation450kanno.ilmn12.hg19_0.6.1 [49] rhdf5filters_1.24.0
[50] ps_1.9.3
[51] rhdf5_2.56.0
[52] rstudioapi_0.18.0
[53] UCSC.utils_1.8.0
[54] base64enc_0.1-6
[55] processx_3.9.0
[56] curl_7.1.0
[57] h5mread_1.4.0
[58] ChAMPdata_2.44.0
[59] ExperimentHub_3.2.0
[60] quadprog_1.5-8
[61] SparseArray_1.12.2
[62] Illumina450ProbeVariants.db_1.48.0
[63] xtable_1.8-8
[64] stringr_1.6.0
[65] desc_1.4.3
[66] doParallel_1.0.17
[67] evaluate_1.0.5
[68] S4Arrays_1.12.0
[69] BiocFileCache_3.2.0
[70] preprocessCore_1.74.0
[71] hms_1.1.4
[72] bookdown_0.46
[73] colorspace_2.1-2
[74] filelock_1.0.3
[75] JADE_2.0-4
[76] IlluminaHumanMethylationEPICanno.ilm10b4.hg19_0.6.0 [77] marray_1.90.0
[78] VennDiagram_1.8.2
[79] magrittr_2.0.5
[80] readr_2.2.0
[81] viridis_0.6.5
[82] later_1.4.8
[83] lattice_0.22-9
[84] genefilter_1.94.0
[85] bspm_0.5.8
[86] XML_3.99-0.23
[87] class_7.3-23
[88] Hmisc_5.2-5
[89] pillar_1.11.1
[90] nlme_3.1-168
[91] pwalign_1.8.0
[92] gridBase_0.4-7
[93] caTools_1.18.3
[94] compiler_4.6.0
[95] beachmat_2.28.0
[96] stringi_1.8.7
[97] shinycssloaders_1.1.0
[98] showtext_0.9-8
[99] dendextend_1.19.1
[100] devtools_2.5.2
[101] GenomicAlignments_1.48.0
[102] plyr_1.8.9
[103] crayon_1.5.3
[104] abind_1.4-8
[105] BiocIO_1.22.0
[106] org.Hs.eg.db_3.23.1
[107] bit_4.6.0
[108] codetools_0.2-20
[109] openssl_2.4.2
[110] ChAMP_2.42.0
[111] bslib_0.11.0
[112] e1071_1.7-17
[113] biovizBase_1.60.0
[114] GetoptLong_1.1.1
[115] plotly_4.12.0
[116] wateRmelon_2.18.0
[117] multtest_2.68.0
[118] mime_0.13
[119] splines_4.6.0
[120] circlize_0.4.18
[121] Rcpp_1.1.1-1.1
[122] dbplyr_2.5.2
[123] sparseMatrixStats_1.24.0
[124] IlluminaHumanMethylation450kmanifest_0.4.0
[125] interp_1.1-6
[126] knitr_1.51
[127] blob_1.3.0
[128] BiocVersion_3.23.1
[129] clue_0.3-68
[130] seqLogo_1.78.0
[131] AnnotationFilter_1.36.0
[132] fs_2.1.0
[133] checkmate_2.3.4
[134] DelayedMatrixStats_1.34.0
[135] pkgbuild_1.4.8
[136] Gviz_1.56.0
[137] tibble_3.3.1
[138] Matrix_1.7-5
[139] callr_3.8.0
[140] statmod_1.5.2
[141] tzdb_0.5.0
[142] pkgconfig_2.0.3
[143] tools_4.6.0
[144] cachem_1.1.0
[145] cigarillo_1.2.0
[146] RSQLite_3.53.1
[147] globaltest_5.66.0
[148] viridisLite_0.4.3
[149] DBI_1.3.0
[150] impute_1.86.0
[151] fastmap_1.2.0
[152] rmarkdown_2.31
[153] scales_1.4.0
[154] grid_4.6.0
[155] usethis_3.2.1
[156] Rsamtools_2.28.0
[157] AnnotationHub_4.2.0
[158] sass_0.4.10
[159] FNN_1.1.4.1
[160] BiocManager_1.30.27
[161] VariantAnnotation_1.58.0
[162] scrime_1.3.7
[163] rpart_4.1.27
[164] farver_2.1.2
[165] mgcv_1.9-4
[166] yaml_2.3.12
[167] latticeExtra_0.6-31
[168] foreign_0.8-90
[169] rtracklayer_1.72.0
[170] illuminaio_0.54.0
[171] cli_3.6.6
[172] purrr_1.2.2
[173] siggenes_1.86.0
[174] txdbmaker_1.8.0
[175] GEOquery_2.80.0
[176] lifecycle_1.0.5
[177] askpass_1.2.1
[178] IlluminaHumanMethylationEPICmanifest_0.3.0
[179] lambda.r_1.2.4
[180] sessioninfo_1.2.4
[181] backports_1.5.1
[182] methylumi_2.58.0
[183] BiocParallel_1.46.0
[184] annotate_1.90.0
[185] gtable_0.3.6
[186] rjson_0.2.23
[187] missMethyl_1.46.0
[188] limma_3.68.4
[189] edgeR_4.10.1
[190] jsonlite_2.0.0
[191] isva_1.10
[192] TFBSTools_1.50.0
[193] bitops_1.0-9
[194] bit64_4.8.2
[195] brio_1.1.5
[196] base64_2.0.2
[197] geneLenDataBase_1.48.0
[198] futile.options_1.0.1
[199] jquerylib_0.1.4
[200] R.utils_2.13.0
[201] DMRcate_3.8.0
[202] lazyeval_0.2.3
[203] shiny_1.13.0
[204] affy_1.90.0
[205] htmltools_0.5.9
[206] GO.db_3.23.1
[207] rappdirs_0.3.4
[208] formatR_1.14
[209] tinytex_0.59
[210] ROC_1.88.0
[211] ensembldb_2.36.1
[212] glue_1.8.1
[213] TFMPvalue_1.0.0
[214] httr2_1.2.2
[215] RCurl_1.98-1.19
[216] rprojroot_2.1.1
[217] mclust_6.1.2
[218] BSgenome_1.80.0
[219] jpeg_0.1-11
[220] futile.logger_1.4.9
[221] gridExtra_2.3
[222] igraph_2.3.2
[223] R6_2.6.1
[224] sva_3.60.0
[225] tidyr_1.3.2
[226] DelayedDataFrame_1.28.0
[227] labeling_0.4.3
[228] GenomicFeatures_1.64.0
[229] cluster_2.1.8.2
[230] rngtools_1.5.2
[231] pkgload_1.5.2
[232] Rhdf5lib_2.0.0
[233] beanplot_1.3.1
[234] GenomeInfoDb_1.48.0
[235] bsseq_1.48.0
[236] DirichletMultinomial_1.54.0
[237] DelayedArray_0.38.2
[238] tidyselect_1.2.1
[239] ProtGenerics_1.44.0
[240] nleqslv_3.3.7
[241] htmlTable_2.5.0
[242] xml2_1.5.2
[243] inline_0.3.21
[244] AnnotationDbi_1.74.0
[245] fastICA_1.2-7
[246] goseq_1.64.0
[247] KernSmooth_2.23-26
[248] S7_0.2.2
[249] affyio_1.82.0
[250] nor1mix_1.3-3
[251] htmlwidgets_1.6.4
[252] ComplexHeatmap_2.28.0
[253] RColorBrewer_1.1-3
[254] biomaRt_2.68.0
[255] rlang_1.2.0
[256] rentrez_1.2.4