ILD Biopsy Decision Analysis

Clinical context

All patients with suspected Interstitial Lung Disease (ILD) undergo sequential biopsy:

  1. Cryobiopsy — first procedure for every patient
  2. Surgical Lung Biopsy (SLB) — performed on every patient regardless of cryobiopsy result

A Monte Carlo simulation (N = 10,000, set.seed(2024)) propagates diagnostic uncertainty through the full pathway and samples procedural complication rates from their probability distributions.


Parameters

set.seed(2024)
N <- 10000

# Cryobiopsy
p_cryo <- c(high = 0.60, low = 0.31, nil = 0.09)

# Surgical biopsy (conditional on cryo result)

# After HIGH confidence cryo: 95% + 2.5% + 2.5% = 100%
p_surg_high <- c(
  concordant    = 0.950,
  disc_uncl     = 0.025,
  disc_hc_other = 0.025
)

# After LOW confidence cryo: 60% concordant + 20% unclassifiable + 20% HC other = 100%
p_surg_low <- c(
  concordant    = 0.60,
  disc_uncl     = 0.20,
  disc_hc_other = 0.20
)

# After NONINFORMATIVE cryo: 17% concordant + 83% discordant = 100%
# Within the 83% discordant: 60% LC other + 40% HC other
p_surg_nil <- c(
  concordant    = 0.170,
  disc_lc_other = 0.83 * 0.60,   # 49.8%
  disc_hc_other = 0.83 * 0.40    # 33.2%
)

# Complication rates
p_cryo_ptx     <- 0.08   # cryobiopsy pneumothorax
p_surg_airleak <- 0.05   # surgical persistent air leak

# Surgical mortality: Beta(4, 96) → mean 4%, 95% CI ≈ 2–8%
surg_mort_alpha <- 4
surg_mort_beta  <- 96

# Label maps (used throughout)
cryo_labels <- c(high = "High confidence", low = "Low confidence", nil = "Noninformative")
surg_labels  <- c(
  concordant    = "Concordant",
  disc_uncl     = "Discordant – Unclassifiable",
  disc_hc_other = "Discordant – HC other Dx",
  disc_lc_other = "Discordant – LC other Dx"
)
surg_colors  <- c(
  concordant    = "#1A8A4A",
  disc_uncl     = "#CA6F1E",
  disc_hc_other = "#B03A2E",
  disc_lc_other = "#D68910"
)

Decision tree (theoretical probabilities)

# Theoretical expected n per leaf (for node labels)
n_high <- round(N * p_cryo["high"])
n_low  <- round(N * p_cryo["low"])
n_nil  <- round(N * p_cryo["nil"])

fmt <- function(n, p) sprintf("%.1f%%\n(exp. n≈%d)", p * 100, round(n * p))

grViz(paste0('
digraph ILD {
  graph [layout=dot, rankdir=LR, fontname="Helvetica", nodesep=0.4, ranksep=1.0]
  node  [shape=rectangle, style="filled,rounded", fontname="Helvetica", fontsize=10, margin="0.12,0.07"]
  edge  [fontname="Helvetica", fontsize=9, color="#555555"]

  ROOT [label="ALL PATIENTS\nN = 10,000\nCryobiopsy →", fillcolor="#2471A3", fontcolor=white, shape=ellipse, fontsize=11]

  C_H [label="High confidence\n60%  (n≈60,00)", fillcolor="#1A8A4A", fontcolor=white]
  C_L [label="Low confidence\n31%  (n≈31,00)", fillcolor="#D68910", fontcolor=white]
  C_N [label="Noninformative\n9%   (n≈9,00)",  fillcolor="#B03A2E", fontcolor=white]

  SH1 [label="Concordant\n95.0%", fillcolor="#2ECC71", fontcolor=white]
  SH2 [label="Discordant\nUnclassifiable\n2.5%", fillcolor="#CA6F1E", fontcolor=white]
  SH3 [label="Discordant\nHC other Dx\n2.5%", fillcolor="#E74C3C", fontcolor=white]

  SL1 [label="Concordant\n60%", fillcolor="#2ECC71", fontcolor=white]
  SL3 [label="Discordant\nUnclassifiable\n20%", fillcolor="#CA6F1E", fontcolor=white]
  SL4 [label="Discordant\nHC other Dx\n20%", fillcolor="#E74C3C", fontcolor=white]

  SN1 [label="Concordant\n17.0%",  fillcolor="#2ECC71", fontcolor=white]
  SN2 [label="Discordant\nLC other Dx\n49.8%", fillcolor="#D68910", fontcolor=white]
  SN3 [label="Discordant\nHC other Dx\n33.2%", fillcolor="#E74C3C", fontcolor=white]

  COMP [label="Complications\n• Cryo pneumothorax 8%\n• SLB mortality 4% (95%CI 2–8%)\n• SLB air leak 5%",
        fillcolor="#6C3483", fontcolor=white, shape=note, fontsize=9]

  ROOT -> C_H [label=" 60%"]
  ROOT -> C_L [label=" 31%"]
  ROOT -> C_N [label="  9%"]

  C_H -> SH1 [label=" 95.0%"]
  C_H -> SH2 [label=" 2.5%"]
  C_H -> SH3 [label=" 2.5%"]

  C_L -> SL1 [label=" 60%"]
  C_L -> SL3 [label=" 20%"]
  C_L -> SL4 [label=" 20%"]

  C_N -> SN1 [label=" 17.0%"]
  C_N -> SN2 [label=" 49.8%"]
  C_N -> SN3 [label=" 33.2%"]

  ROOT -> COMP [style=dashed, color="#6C3483", label="  all\n  patients"]
}
'))

Monte Carlo simulation

# Step 1 — cryobiopsy result
cryo <- sample(names(p_cryo), N, replace = TRUE, prob = p_cryo)

# Step 2 — cryo complication (pneumothorax)
ptx <- rbinom(N, 1, p_cryo_ptx) == 1

# Step 3 — surgical biopsy result conditional on cryo result
surg <- character(N)
surg[cryo == "high"] <- sample(names(p_surg_high), sum(cryo == "high"), TRUE, p_surg_high)
surg[cryo == "low"]  <- sample(names(p_surg_low),  sum(cryo == "low"),  TRUE, p_surg_low)
surg[cryo == "nil"]  <- sample(names(p_surg_nil),  sum(cryo == "nil"),  TRUE, p_surg_nil)

# Step 4 — surgical complications
# Per-patient mortality probability drawn from Beta prior (propagates parameter uncertainty)
mort_p  <- rbeta(N, surg_mort_alpha, surg_mort_beta)
died    <- rbinom(N, 1, mort_p) == 1
airleak <- rbinom(N, 1, p_surg_airleak) == 1

sim <- tibble(
  patient = seq_len(N),
  cryo, ptx, surg, mort_p, died, airleak
)

cat("Simulation complete:", N, "patients\n")
## Simulation complete: 10000 patients
cat("Cryo breakdown —",
    sum(cryo=="high"), "high /",
    sum(cryo=="low"),  "low /",
    sum(cryo=="nil"),  "noninformative\n")
## Cryo breakdown — 6025 high / 3089 low / 886 noninformative
cat("Complications — pneumothorax:", sum(ptx),
    "| surgical deaths:", sum(died),
    "| air leak:", sum(airleak), "\n")
## Complications — pneumothorax: 815 | surgical deaths: 401 | air leak: 533

Results

Cryobiopsy outcome distribution

cryo_colors_vec <- c(high = "#1A8A4A", low = "#D68910", nil = "#B03A2E")

sim %>%
  count(cryo) %>%
  mutate(label = cryo_labels[cryo], pct = n / N) %>%
  ggplot(aes(x = reorder(label, -pct), y = pct, fill = cryo)) +
  geom_col(show.legend = FALSE, width = 0.55) +
  geom_text(aes(label = sprintf("n = %d\n%.1f%%", n, pct * 100)),
            vjust = -0.4, size = 4) +
  scale_fill_manual(values = cryo_colors_vec) +
  scale_y_continuous(labels = percent, limits = c(0, 0.75),
                     expand = expansion(mult = c(0, 0.12))) +
  labs(title = "Cryobiopsy results  (N = 10,000)",
       x = NULL, y = "Proportion of patients") +
  theme_minimal(base_size = 13) +
  theme(plot.title = element_text(face = "bold"))

Surgical biopsy outcomes by cryo group

sim %>%
  count(cryo, surg) %>%
  group_by(cryo) %>%
  mutate(
    pct      = n / sum(n),
    surg_lbl = surg_labels[surg],
    cryo_lbl = sprintf("%s cryo\n(n = %d)", cryo_labels[cryo], sum(n))
  ) %>%
  ungroup() %>%
  ggplot(aes(x = reorder(surg_lbl, pct), y = pct, fill = surg)) +
  geom_col(show.legend = FALSE) +
  geom_text(aes(label = sprintf("%.1f%%  (n = %d)", pct * 100, n)),
            hjust = -0.08, size = 3.2) +
  coord_flip() +
  facet_wrap(~ cryo_lbl, scales = "free_y", ncol = 1) +
  scale_y_continuous(labels = percent, limits = c(0, 1.12),
                     expand = expansion(mult = c(0, 0))) +
  scale_fill_manual(values = surg_colors) +
  labs(title = "Surgical biopsy outcomes by cryobiopsy group",
       x = NULL, y = "Proportion within cryo group") +
  theme_minimal(base_size = 12) +
  theme(plot.title = element_text(face = "bold"),
        strip.text = element_text(face = "bold", size = 11),
        panel.spacing = unit(1, "lines"))

Final diagnostic outcomes — entire cohort

final_cats <- c(
  concordant    = "Concordant",
  disc_uncl     = "Discordant – Unclassifiable",
  disc_hc_other = "Discordant – HC other Dx",
  disc_lc_other = "Discordant – LC other Dx"
)
final_colors <- c(
  "Concordant"                  = "#1A8A4A",
  "Discordant – Unclassifiable" = "#CA6F1E",
  "Discordant – HC other Dx"    = "#B03A2E",
  "Discordant – LC other Dx"    = "#D68910"
)

sim %>%
  mutate(outcome = final_cats[surg]) %>%
  count(outcome) %>%
  mutate(pct = n / N) %>%
  arrange(desc(pct)) %>%
  ggplot(aes(x = reorder(outcome, pct), y = pct, fill = outcome)) +
  geom_col(show.legend = FALSE, width = 0.65) +
  geom_text(aes(label = sprintf("n = %d   (%.1f%%)", n, pct * 100)),
            hjust = -0.05, size = 3.6) +
  coord_flip() +
  scale_y_continuous(labels = percent, limits = c(0, 0.82),
                     expand = expansion(mult = c(0, 0))) +
  scale_fill_manual(values = final_colors) +
  labs(title = "Final diagnostic outcomes — all 10,000 patients",
       subtitle = "After cryobiopsy + surgical lung biopsy",
       x = NULL, y = "Proportion of total cohort") +
  theme_minimal(base_size = 12) +
  theme(plot.title = element_text(face = "bold"))

Complication analysis

comp_df <- tibble(
  label     = c("Cryo\nPneumothorax", "SLB\nMortality", "SLB\nAir leak"),
  n         = c(sum(sim$ptx), sum(sim$died), sum(sim$airleak)),
  rate_theo = c(p_cryo_ptx, surg_mort_alpha / (surg_mort_alpha + surg_mort_beta), p_surg_airleak),
  fill_col  = c("#9B59B6", "#6C3483", "#A569BD")
) %>%
  mutate(
    pct  = n / N,
    ci_l = map_dbl(n, ~ binom.test(.x, N)$conf.int[1]),
    ci_h = map_dbl(n, ~ binom.test(.x, N)$conf.int[2])
  )

p_comp <- ggplot(comp_df, aes(x = label, y = pct, fill = label)) +
  geom_col(show.legend = FALSE, width = 0.5) +
  geom_errorbar(aes(ymin = ci_l, ymax = ci_h), width = 0.12, linewidth = 0.9) +
  geom_point(aes(y = rate_theo), shape = 18, size = 4, color = "black") +
  geom_text(aes(y = ci_h, label = sprintf("n=%d\n%.1f%%", n, pct * 100)),
            vjust = -0.5, size = 3.5) +
  scale_fill_manual(values = setNames(comp_df$fill_col, comp_df$label)) +
  scale_y_continuous(labels = percent, limits = c(0, 0.14),
                     expand = expansion(mult = c(0, 0.12))) +
  labs(title = "Complication rates",
       subtitle = "Bars = simulated; ◆ = theoretical rate; error bars = 95% CI (Wilson)",
       x = NULL, y = "Rate") +
  theme_minimal(base_size = 12) +
  theme(plot.title = element_text(face = "bold"))

# Surgical mortality prior (Beta distribution)
beta_df <- tibble(
  x    = seq(0, 0.14, length.out = 600),
  dens = dbeta(x, surg_mort_alpha, surg_mort_beta)
)
ci_lo <- qbeta(0.025, surg_mort_alpha, surg_mort_beta)
ci_hi <- qbeta(0.975, surg_mort_alpha, surg_mort_beta)

p_beta <- ggplot(beta_df, aes(x = x * 100, y = dens)) +
  geom_ribbon(data = filter(beta_df, x >= ci_lo & x <= ci_hi),
              aes(ymin = 0, ymax = dens), fill = "#6C3483", alpha = 0.25) +
  geom_line(linewidth = 1.1, color = "#6C3483") +
  geom_vline(xintercept = 4, linetype = "dashed", linewidth = 0.9) +
  geom_vline(xintercept = c(ci_lo * 100, ci_hi * 100),
             linetype = "dotted", color = "grey50", linewidth = 0.8) +
  annotate("text", x = 6, y = max(beta_df$dens) * 0.85,
           label = sprintf("Mean = 4%%\n95%% CI: %.1f%%–%.1f%%", ci_lo*100, ci_hi*100),
           size = 3.5, hjust = 0) +
  labs(title = "Surgical mortality prior",
       subtitle = "Beta(4, 96) — shaded = 95% CI",
       x = "Mortality rate (%)", y = "Density") +
  theme_minimal(base_size = 12) +
  theme(plot.title = element_text(face = "bold"))

# Distribution of per-patient sampled mortality probability
p_mort_mc <- ggplot(sim, aes(x = mort_p * 100)) +
  geom_histogram(bins = 60, fill = "#6C3483", alpha = 0.7, color = "white") +
  geom_vline(xintercept = 4, linetype = "dashed", linewidth = 0.9) +
  geom_vline(xintercept = c(ci_lo * 100, ci_hi * 100),
             linetype = "dotted", color = "grey50", linewidth = 0.8) +
  labs(title = "Monte Carlo: sampled mortality probabilities",
       subtitle = sprintf("Simulated deaths: n = %d (%.1f%%)", sum(sim$died), mean(sim$died)*100),
       x = "Per-patient mortality rate (%)", y = "Patients (n)") +
  theme_minimal(base_size = 12) +
  theme(plot.title = element_text(face = "bold"))

p_comp | p_beta | p_mort_mc


Summary table

sim %>%
  mutate(
    cryo_lbl = cryo_labels[cryo],
    surg_lbl = surg_labels[surg]
  ) %>%
  count(cryo_lbl, surg_lbl) %>%
  group_by(cryo_lbl) %>%
  mutate(pct_group = sprintf("%.1f%%", n / sum(n) * 100)) %>%
  ungroup() %>%
  mutate(pct_total = sprintf("%.1f%%", n / N * 100)) %>%
  rename(
    `Cryo result`          = cryo_lbl,
    `Surgical result`      = surg_lbl,
    `n`                    = n,
    `% in cryo group`      = pct_group,
    `% of total cohort`    = pct_total
  ) %>%
  kbl(caption = "Patient pathways — Monte Carlo simulation (N = 10,000)") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
                full_width = FALSE) %>%
  collapse_rows(columns = 1, valign = "top")
Patient pathways — Monte Carlo simulation (N = 10,000)
Cryo result Surgical result n % in cryo group % of total cohort
High confidence Concordant 5738 95.2% 57.4%
Discordant – HC other Dx 137 2.3% 1.4%
Discordant – Unclassifiable 150 2.5% 1.5%
Low confidence Concordant 1853 60.0% 18.5%
Discordant – HC other Dx 639 20.7% 6.4%
Discordant – Unclassifiable 597 19.3% 6.0%
Noninformative Concordant 145 16.4% 1.5%
Discordant – HC other Dx 328 37.0% 3.3%
Discordant – LC other Dx 413 46.6% 4.1%

Key findings

sim_final <- sim %>%
  mutate(outcome = if_else(surg == "concordant", "concordant", "discordant"))

n_conc <- sum(sim_final$outcome == "concordant")
n_disc <- sum(sim_final$outcome == "discordant")

cat(sprintf("- **Concordant diagnosis:** %d patients (%.1f%%)\n", n_conc, n_conc/N*100))
  • Concordant diagnosis: 7736 patients (77.4%)
cat(sprintf("- **Discordant / unclassifiable:** %d patients (%.1f%%)\n", n_disc, n_disc/N*100))
  • Discordant / unclassifiable: 2264 patients (22.6%)
cat(sprintf("- **Cryobiopsy pneumothorax:** %d patients (%.1f%%)\n", sum(sim$ptx), mean(sim$ptx)*100))
  • Cryobiopsy pneumothorax: 815 patients (8.2%)
cat(sprintf("- **Surgical mortality:** %d patients (%.1f%%)\n", sum(sim$died), mean(sim$died)*100))
  • Surgical mortality: 401 patients (4.0%)
cat(sprintf("- **Surgical persistent air leak:** %d patients (%.1f%%)\n", sum(sim$airleak), mean(sim$airleak)*100))
  • Surgical persistent air leak: 533 patients (5.3%)

Reproducibility: set.seed(2024) — all results are exactly reproducible.