Refining alterations on mutation type • CIBRA

In this vignette, we will use CIBRA to identify the most impactfull mutation type. We will make use of WES data and RNA-seq data from the TCGA, specifically from colorectal cancer.

# libraries
library(BiocParallel)
library(stringr)
library(dplyr)
library(CIBRA)

# load data
count_data <- CIBRA::TCGA_CRC_rna_data # rna count data

# genomic alteration profile
genomic_metrics <- CIBRA::genomic_alteration_profile

# remove letter to match sample names of genomic metrics
colnames(count_data) <- str_sub(colnames(count_data), end = -2)

For the refinement on mutation type, we create a definition matrix with the mutation types we want to assess compared to the reference condition. In this example, we are exploring SNV and SCNAs and their combination.

# process the genomic alteration profile for the definition matrix

# select genes of interest
goi <- c("APC", "TP53", "KRAS", "BRAF", "PIK3CA", "TTN")

definition_matrix <- genomic_metrics[goi]
rownames(definition_matrix) <- str_replace_all(rownames(definition_matrix), "-", ".")

# process the alteration definition
mutate_definition <- function(x) {
  s <- case_when(
    !str_detect(x, patter = "([gain|loss|break])|(^wt$)") ~ "snv", # SNV only
    !str_detect(x, pattern = c("mut|(^wt$)")) ~ "scna", # SCNA only
    str_detect(x, pattern = c("(mut;[gain|loss])")) ~ "snv_scna", # the combination
    TRUE ~ "wt"
  )
  return(s)
}

definition_matrix <- definition_matrix %>% mutate_all(
  mutate_definition
)

# match data between definition matrix and the count data
sample_list <- intersect(rownames(definition_matrix), colnames(count_data))

data <- count_data[sample_list]
definition_matrix <- definition_matrix[sample_list,]

Now that we have created the definition matrix and prepared the RNA count data, we can run CIBRA. We can select a subset of columns we want to asssess. CIBRA automatically assesses all definitions within the column against the control_definition provided.

# focus on the genes of interest
columns <- c("TP53", "APC", "KRAS", "BRAF", "PIK3CA", "TTN")

# set up parameters for CIBRA
control_definition <- "wt" # reference condition
confidence <- 0.1 # confidence threshold for the proportion calculation
# set permutation to 0 to skip permutations
iterations = 0 # number of permutation iterations to be performed per condition

register(SnowParam(4)) # set number of cores to be used

# run CIBRA
CIBRA_res <- run_CIBRA(count_data, as.data.frame(definition_matrix), columns, 
                       control_definition, confidence, iterations, 
                       parallel = TRUE)
#> [1] "snv_scna" "scna"     "snv"     
#> [1] "TP53"     "snv_scna"
#> [1] "TP53" "scna"
#> [1] "TP53" "snv"
#> [1] "snv"      "snv_scna" "scna"    
#> [1] "APC" "snv"
#> [1] "APC"      "snv_scna"
#> [1] "APC"  "scna"
#> [1] "snv"      "snv_scna" "scna"    
#> [1] "KRAS" "snv"
#> [1] "KRAS"     "snv_scna"
#> [1] "KRAS" "scna"
#> [1] "scna"     "snv_scna" "snv"     
#> [1] "BRAF" "scna"
#> [1] "BRAF"     "snv_scna"
#> [1] "BRAF" "snv"
#> [1] "scna"     "snv"      "snv_scna"
#> [1] "PIK3CA" "scna"
#> [1] "PIK3CA" "snv"
#> [1] "PIK3CA"   "snv_scna"
#> [1] "snv_scna" "snv"      "scna"    
#> [1] "TTN"      "snv_scna"
#> [1] "TTN" "snv"
#> [1] "TTN"  "scna"

From the results below, it can be observed that the most impactful definition is different for each gene and depends on what type of gene it is. For example for the tumor suppressor genes TP53 and APC, the combination of SNV and SCNA results in the highest impact score. While for the oncogene BRAF, only SNVs give the highest score.

# print the report table
knitr::kable(CIBRA_res$CIBRA_results, format = "html")

def	col	proportion	sign_area	investigated	control	proportion_rnd_avg	proportion_rnd_sd	sign_area_rnd_avg	sign_area_rnd_sd
snv_scna	TP53	0.4786	0.4674	217	129	NA	NA	NA	NA
scna	TP53	0.3464	0.3409	66	129	NA	NA	NA	NA
snv	TP53	0.2713	0.2716	59	129	NA	NA	NA	NA
snv	APC	0.371	0.3721	216	81	NA	NA	NA	NA
snv_scna	APC	0.4375	0.4414	142	81	NA	NA	NA	NA
scna	APC	0.3405	0.4069	32	81	NA	NA	NA	NA
snv	KRAS	0.2848	0.2793	122	186	NA	NA	NA	NA
snv_scna	KRAS	0.2494	0.2252	76	186	NA	NA	NA	NA
scna	KRAS	0.1714	0.1093	87	186	NA	NA	NA	NA
scna	BRAF	0.3081	0.2862	224	181	NA	NA	NA	NA
snv_scna	BRAF	0.2303	0.2086	14	181	NA	NA	NA	NA
snv	BRAF	0.4169	0.4125	52	181	NA	NA	NA	NA
scna	PIK3CA	0.203	0.1616	98	242	NA	NA	NA	NA
snv	PIK3CA	0.2256	0.185	108	242	NA	NA	NA	NA
snv_scna	PIK3CA	0.1824	0.1292	23	242	NA	NA	NA	NA
snv_scna	TTN	0.1504	0.1601	58	143	NA	NA	NA	NA
snv	TTN	0.2891	0.2868	219	143	NA	NA	NA	NA
scna	TTN	0.2114	0.1831	51	143	NA	NA	NA	NA

To also assess the significance of the score, we can perform the gamma inferred permutation test.

# calculate the pvalue by comparing to a reference distribution
perm_data <- CIBRA::perm_dist_crc_tcga

# use the gamma test to also calculate a fitted p-value
CIBRA_res_stat <-  permutation_test(CIBRA_res$CIBRA_results, perm_data, test = "gamma")

# print the report table
knitr::kable(CIBRA_res_stat$results, format = "html")

def	col	proportion	sign_area	investigated	control	proportion_rnd_avg	proportion_rnd_sd	sign_area_rnd_avg	sign_area_rnd_sd	perm_test_prop	perm_test_area	pvalue_prop	pvalue_sign_area
snv_scna	TP53	0.4786	0.4674	217	129	NA	NA	NA	NA	0.0000000	0.0000000	0.0000006	0.0003451
scna	TP53	0.3464	0.3409	66	129	NA	NA	NA	NA	0.0017637	0.0044092	0.0005711	0.0051794
snv	TP53	0.2713	0.2716	59	129	NA	NA	NA	NA	0.0299824	0.0291005	0.0151134	0.0213133
snv	APC	0.371	0.3721	216	81	NA	NA	NA	NA	0.0000000	0.0017637	0.0001745	0.0026894
snv_scna	APC	0.4375	0.4414	142	81	NA	NA	NA	NA	0.0000000	0.0000000	0.0000057	0.0006082
scna	APC	0.3405	0.4069	32	81	NA	NA	NA	NA	0.0035273	0.0000000	0.0007534	0.0012810
snv	KRAS	0.2848	0.2793	122	186	NA	NA	NA	NA	0.0176367	0.0238095	0.0087699	0.0182735
snv_scna	KRAS	0.2494	0.2252	76	186	NA	NA	NA	NA	0.0476190	0.0582011	0.0347619	0.0526923
scna	KRAS	0.1714	0.1093	87	186	NA	NA	NA	NA	0.3236332	0.3209877	0.3567770	0.3887167
scna	BRAF	0.3081	0.2862	224	181	NA	NA	NA	NA	0.0088183	0.0220459	0.0032632	0.0159074
snv_scna	BRAF	0.2303	0.2086	14	181	NA	NA	NA	NA	0.0776014	0.0679012	0.0679381	0.0720578
snv	BRAF	0.4169	0.4125	52	181	NA	NA	NA	NA	0.0000000	0.0000000	0.0000170	0.0011358
scna	PIK3CA	0.203	0.1616	98	242	NA	NA	NA	NA	0.1613757	0.1569665	0.1593826	0.1678161
snv	PIK3CA	0.2256	0.185	108	242	NA	NA	NA	NA	0.0908289	0.1075838	0.0794117	0.1110906
snv_scna	PIK3CA	0.1824	0.1292	23	242	NA	NA	NA	NA	0.2513228	0.2548501	0.2761134	0.2869234
snv_scna	TTN	0.1504	0.1601	58	143	NA	NA	NA	NA	0.5088183	0.1613757	0.5379990	0.1722009
snv	TTN	0.2891	0.2868	219	143	NA	NA	NA	NA	0.0167549	0.0220459	0.0073405	0.0157162
scna	TTN	0.2114	0.1831	51	143	NA	NA	NA	NA	0.1305115	0.1102293	0.1243580	0.1149510