Skip to contents

In this vignette, we will use CIBRA to identify the most impactfull mutation type. We will make use of WES data and RNA-seq data from the TCGA, specifically from colorectal cancer.

# load data
count_data <- CIBRA::TCGA_CRC_rna_data # rna count data

# genomic alteration profile
genomic_metrics <- CIBRA::genomic_alteration_profile

# remove letter to match sample names of genomic metrics
colnames(count_data) <- str_sub(colnames(count_data), end = -2)

For the refinement on mutation type, we create a definition matrix with the mutation types we want to assess compared to the reference condition. In this example, we are exploring SNV and SCNAs and their combination.

# process the genomic alteration profile for the definition matrix

# select genes of interest
goi <- c("APC", "TP53", "KRAS", "BRAF", "PIK3CA", "TTN")

definition_matrix <- genomic_metrics[goi]
rownames(definition_matrix) <- str_replace_all(rownames(definition_matrix), "-", ".")

# process the alteration definition
mutate_definition <- function(x) {
  s <- case_when(
    !str_detect(x, patter = "([gain|loss|break])|(^wt$)") ~ "snv", # SNV only
    !str_detect(x, pattern = c("mut|(^wt$)")) ~ "scna", # SCNA only
    str_detect(x, pattern = c("(mut;[gain|loss])")) ~ "snv_scna", # the combination
    TRUE ~ "wt"
  )
  return(s)
}

definition_matrix <- definition_matrix %>% mutate_all(
  mutate_definition
)

# match data between definition matrix and the count data
sample_list <- intersect(rownames(definition_matrix), colnames(count_data))

data <- count_data[sample_list]
definition_matrix <- definition_matrix[sample_list,]

Now that we have created the definition matrix and prepared the RNA count data, we can run CIBRA. We can select a subset of columns we want to asssess. CIBRA automatically assesses all definitions within the column against the control_definition provided.

# focus on the genes of interest
columns <- c("TP53", "APC", "KRAS", "BRAF", "PIK3CA", "TTN")

# set up parameters for CIBRA
control_definition <- "wt" # reference condition
confidence <- 0.1 # confidence threshold for the proportion calculation
# set permutation to 0 to skip permutations
iterations = 0 # number of permutation iterations to be performed per condition

register(SnowParam(4)) # set number of cores to be used

# run CIBRA
CIBRA_res <- run_CIBRA(count_data, as.data.frame(definition_matrix), columns, 
                       control_definition, confidence, iterations, 
                       parallel = TRUE)
#> [1] "snv_scna" "scna"     "snv"     
#> [1] "TP53"     "snv_scna"
#> [1] "TP53" "scna"
#> [1] "TP53" "snv"
#> [1] "snv"      "snv_scna" "scna"    
#> [1] "APC" "snv"
#> [1] "APC"      "snv_scna"
#> [1] "APC"  "scna"
#> [1] "snv"      "snv_scna" "scna"    
#> [1] "KRAS" "snv"
#> [1] "KRAS"     "snv_scna"
#> [1] "KRAS" "scna"
#> [1] "scna"     "snv_scna" "snv"     
#> [1] "BRAF" "scna"
#> [1] "BRAF"     "snv_scna"
#> [1] "BRAF" "snv"
#> [1] "scna"     "snv"      "snv_scna"
#> [1] "PIK3CA" "scna"
#> [1] "PIK3CA" "snv"
#> [1] "PIK3CA"   "snv_scna"
#> [1] "snv_scna" "snv"      "scna"    
#> [1] "TTN"      "snv_scna"
#> [1] "TTN" "snv"
#> [1] "TTN"  "scna"

From the results below, it can be observed that the most impactful definition is different for each gene and depends on what type of gene it is. For example for the tumor suppressor genes TP53 and APC, the combination of SNV and SCNA results in the highest impact score. While for the oncogene BRAF, only SNVs give the highest score.

# print the report table
knitr::kable(CIBRA_res$CIBRA_results, format = "html")
def col proportion sign_area investigated control proportion_rnd_avg proportion_rnd_sd sign_area_rnd_avg sign_area_rnd_sd iterations
snv_scna TP53 0.4786 0.4674 217 129 NA NA NA NA 0
scna TP53 0.3464 0.3409 66 129 NA NA NA NA 0
snv TP53 0.2713 0.2716 59 129 NA NA NA NA 0
snv APC 0.371 0.3721 216 81 NA NA NA NA 0
snv_scna APC 0.4375 0.4414 142 81 NA NA NA NA 0
scna APC 0.3405 0.4069 32 81 NA NA NA NA 0
snv KRAS 0.2848 0.2793 122 186 NA NA NA NA 0
snv_scna KRAS 0.2494 0.2252 76 186 NA NA NA NA 0
scna KRAS 0.1714 0.1093 87 186 NA NA NA NA 0
scna BRAF 0.3081 0.2862 224 181 NA NA NA NA 0
snv_scna BRAF 0.2303 0.2086 14 181 NA NA NA NA 0
snv BRAF 0.4169 0.4125 52 181 NA NA NA NA 0
scna PIK3CA 0.203 0.1616 98 242 NA NA NA NA 0
snv PIK3CA 0.2256 0.185 108 242 NA NA NA NA 0
snv_scna PIK3CA 0.1824 0.1292 23 242 NA NA NA NA 0
snv_scna TTN 0.1504 0.1601 58 143 NA NA NA NA 0
snv TTN 0.2891 0.2868 219 143 NA NA NA NA 0
scna TTN 0.2114 0.1831 51 143 NA NA NA NA 0

To also assess the significance of the score, we can perform the gamma inferred permutation test.

# calculate the pvalue by comparing to a reference distribution
perm_data <- CIBRA::perm_dist_crc_tcga

# use the gamma test to also calculate a fitted p-value
CIBRA_res_stat <-  permutation_test(CIBRA_res$CIBRA_results, perm_data, test = "gamma")

# print the report table
knitr::kable(CIBRA_res_stat$results, format = "html")
def col proportion sign_area investigated control proportion_rnd_avg proportion_rnd_sd sign_area_rnd_avg sign_area_rnd_sd iterations perm_test_prop perm_test_area pvalue_prop pvalue_sign_area
snv_scna TP53 0.4786 0.4674 217 129 NA NA NA NA 0 0.0000000 0.0000000 0.0000006 0.0003451
scna TP53 0.3464 0.3409 66 129 NA NA NA NA 0 0.0017637 0.0044092 0.0005711 0.0051794
snv TP53 0.2713 0.2716 59 129 NA NA NA NA 0 0.0299824 0.0291005 0.0151134 0.0213133
snv APC 0.371 0.3721 216 81 NA NA NA NA 0 0.0000000 0.0017637 0.0001745 0.0026894
snv_scna APC 0.4375 0.4414 142 81 NA NA NA NA 0 0.0000000 0.0000000 0.0000057 0.0006082
scna APC 0.3405 0.4069 32 81 NA NA NA NA 0 0.0035273 0.0000000 0.0007534 0.0012810
snv KRAS 0.2848 0.2793 122 186 NA NA NA NA 0 0.0176367 0.0238095 0.0087699 0.0182735
snv_scna KRAS 0.2494 0.2252 76 186 NA NA NA NA 0 0.0476190 0.0582011 0.0347619 0.0526923
scna KRAS 0.1714 0.1093 87 186 NA NA NA NA 0 0.3236332 0.3209877 0.3567770 0.3887167
scna BRAF 0.3081 0.2862 224 181 NA NA NA NA 0 0.0088183 0.0220459 0.0032632 0.0159074
snv_scna BRAF 0.2303 0.2086 14 181 NA NA NA NA 0 0.0776014 0.0679012 0.0679381 0.0720578
snv BRAF 0.4169 0.4125 52 181 NA NA NA NA 0 0.0000000 0.0000000 0.0000170 0.0011358
scna PIK3CA 0.203 0.1616 98 242 NA NA NA NA 0 0.1613757 0.1569665 0.1593826 0.1678161
snv PIK3CA 0.2256 0.185 108 242 NA NA NA NA 0 0.0908289 0.1075838 0.0794117 0.1110906
snv_scna PIK3CA 0.1824 0.1292 23 242 NA NA NA NA 0 0.2513228 0.2548501 0.2761134 0.2869234
snv_scna TTN 0.1504 0.1601 58 143 NA NA NA NA 0 0.5088183 0.1613757 0.5379990 0.1722009
snv TTN 0.2891 0.2868 219 143 NA NA NA NA 0 0.0167549 0.0220459 0.0073405 0.0157162
scna TTN 0.2114 0.1831 51 143 NA NA NA NA 0 0.1305115 0.1102293 0.1243580 0.1149510