This script isolates the combined clinic VAF panel (top boxplot + bottom line trajectories), matching the figures 3A and B from the full analysis workflow. This code reproduces the base graphic reported in the article Single-workflow Nanopore whole genome sequencing with adaptive sampling for accelerated and comprehensive pediatric cancer profiling.

Data setup

library(dplyr)
library(tidyr)
library(ggplot2)
library(patchwork)

set.seed(2027)

clinic_bins <- c("<5%", "5-10%", "10-25%", ">25%")
variant_types <- c("SNV", "INDEL", "BND")

first_hits_long <- tidyr::crossing(
  sample_name = paste0("sample_", LETTERS[1:10]),
  variant_group_id = paste0("var", sprintf("%03d", 1:50)),
  detection_level = c(1, 5, 8, 10)
) %>%
  mutate(
    clinic_vaf_bin = sample(
      clinic_bins,
      n(),
      replace = TRUE
    ),
    variant_type = sample(
      variant_types,
      n(),
      replace = TRUE,
      prob = c(0.6, 0.25, 0.15)
    ),
    time_h = pmin(
      85, rgamma(n(), 
      shape = detection_level / 2, scale = 3.5
      )
    )
  )

workingframe <- tidyr::crossing(
  sample_name = paste0("sample_", LETTERS[1:10]),
  var_region = paste0(
    "chr",
    sample(c(1:22, "X"), 120, replace = TRUE),
    " @ ",
    sample(1e5:9e6, 120)
  ),
  variant_type = variant_types,
  clinic_vaf_bin = clinic_bins,
  rel_time_s = seq(0, 72 * 3600, by = 3600)
) %>%
  mutate(
    support = "variant",
    step_rate = case_when(
      variant_type == "SNV" ~ runif(n(), 1.0, 2.2),
      variant_type == "INDEL" ~ runif(n(), 0.7, 1.6),
      TRUE ~ runif(n(), 0.4, 1.2)
    ),
    incremental = rpois(n(), lambda = step_rate),
    key = interaction(
      sample_name,
      var_region,
      variant_type,
      clinic_vaf_bin,
      drop = TRUE
    )
  ) %>%
  group_by(key) %>%
  arrange(rel_time_s, .by_group = TRUE) %>%
  mutate(cumulative_reads = cumsum(incremental)) %>%
  ungroup()

Isolated combined plot

p_clinic_vaf_boxplot2 <- first_hits_long %>%
  mutate(detection_level = as.factor(detection_level)) %>%
  ggplot(aes(
    x = time_h,
    y = detection_level,
    group = interaction(
      detection_level,
      clinic_vaf_bin,
      variant_type
    ),
    fill = variant_type
  )) +
  geom_boxplot(
    position = position_dodge(width = 0.8),
    alpha = 0.7,
    outlier.size = 1
  ) +
  scale_x_continuous(
    breaks = seq(0, 72, by = 12),
    limits = c(0, 85)
  ) +
  facet_grid(
    cols = vars(clinic_vaf_bin),
    labeller = labeller(
      clinic_vaf_bin = function(x) paste("VAF:", x)
    )
  ) +
  labs(x = NULL, y = "Detection Level (reads)") +
  theme_minimal(base_size = 12)

p_clinic_vaf_lines2 <- workingframe %>%
  ggplot(aes(
    x = rel_time_s/3600,
    y = cumulative_reads,
    group = interaction(
      sample_name,
      var_region,
      variant_type
    ),
    color = variant_type
  )) +
  geom_line(alpha = 0.7) +
  scale_x_continuous(
    breaks = seq(0, 72, by = 12),
    limits = c(0, 85)
  ) +
  scale_y_log10(
    breaks = c(1, 5, 8, 10, 50, 100, 500),
    labels = c("1", "5", "8", "10", "50", "100", "500")
  ) +
  facet_grid(
    cols = vars(clinic_vaf_bin),
    scales = "free_x"
  ) +
  labs(
    x = "Sequencing Time (hours)",
    y = "Cumulative Variant Reads"
  ) +
  theme_minimal(base_size = 12) +
  theme(strip.text.x = element_blank())

p_clinic_vaf_chr2 <- (
  p_clinic_vaf_boxplot2 + theme(plot.margin = margin(b = 0))
) /
(
  p_clinic_vaf_lines2 + theme(plot.margin = margin(t = 0))
) +
  plot_layout(heights = c(1, 2))

p_clinic_vaf_chr2

Combined Clinic VAF Plot

Session Info

## R version 4.5.2 (2025-10-31)
## Platform: x86_64-pc-linux-gnu
## Running under: Linux Mint 22.2
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.12.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.12.0  LAPACK version 3.12.0
## 
## locale:
##  [1] LC_CTYPE=en_CA.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_CA.UTF-8        LC_COLLATE=en_CA.UTF-8    
##  [5] LC_MONETARY=en_CA.UTF-8    LC_MESSAGES=en_CA.UTF-8   
##  [7] LC_PAPER=en_CA.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_CA.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: America/Toronto
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## loaded via a namespace (and not attached):
##  [1] digest_0.6.39   R6_2.6.1        bookdown_0.46   fastmap_1.2.0  
##  [5] xfun_0.56       blogdown_1.23   cachem_1.1.0    knitr_1.51     
##  [9] htmltools_0.5.9 rmarkdown_2.30  lifecycle_1.0.5 cli_3.6.5      
## [13] sass_0.4.10     jquerylib_0.1.4 compiler_4.5.2  tools_4.5.2    
## [17] evaluate_1.0.5  bslib_0.10.0    yaml_2.3.12     otel_0.2.0     
## [21] jsonlite_2.0.0  rlang_1.1.7