Refining alterations on genomic location • CIBRA

Similarly to the vignette titled: “Refining alterations on mutation type”, in this vignette we will refine on the genomic location of the alteration.

library(CIBRA)
library(BiocParallel)
library(stringr)
library(dplyr)

We will explore the impact of variants affecting different exons in APC. To scan through genomic locations using CIBRA, the definition matrix can be set up in an extended format where samples can be assigned to multiple definitions. In this setting, it is important to have a sample column.

# load data
count_data <- CIBRA::TCGA_CRC_rna_data # rna count data

# genomic alteration profile
definition_matrix <- CIBRA::TCGA_APC_exon_definition_transcript_201

# remove rownames column
definition_matrix$...1 <- NULL

# remove letter to match sample names of genomic metrics
colnames(count_data) <- make.names(str_sub(colnames(count_data), end = -2))

# create barcode from sample ids
definition_matrix$samples <- substr(definition_matrix$samples, 1, 15)

# create valid names to match count data
definition_matrix = dplyr::mutate_all(definition_matrix, make.names)

# correct the colnames of the definition matrix (CIBRA takes only proper colnames)
colnames(definition_matrix) <- make.names(colnames(definition_matrix))

We will select the column with the exon definitions and exclude the sample column. In this way, we will only test the column we have selected with the definitions. Because we are running the definition matrix in the extended mode, we need to indicate the sample column name. In this way CIBRA is aware that there can be duplicate sample names. It will run CIBRA for each definition compared to the control definition.

# select columns with definition
columns = colnames(definition_matrix)
columns = columns[columns != "samples"] # exclude sample column

# set up parameters for CIBRA
control_definition <- "no.SNV" # reference condition
confidence <- 0.1 # confidence threshold for the proportion calculation
# set permutation to 0 to skip permutations
iterations = 0 # number of permutation iterations to be performed per condition

register(SnowParam(4)) # set number of cores to be used
  
# select column with definitions
group = definition_matrix

# run CIBRA
CIBRA_res <- run_CIBRA(data = count_data,definition_matrix = as.data.frame(definition_matrix),columns = columns,
                       control_definition = control_definition, confidence = confidence, 
                       iterations = iterations,
                       sample_column = "samples",
                       parallel = TRUE)
#>  [1] "X16"        "X12"        "X2"         "X9"         "X7"        
#>  [6] "X10"        "X13"        "X14"        "X8"         "X3"        
#> [11] "X5"         "X15"        "X6"         "X4"         "non_coding"
#> [1] "Transcript.APC.201" "X16"
#> [1] "Transcript.APC.201" "X12"               
#> [1] "number of cases < 10"
#> [1] "Transcript.APC.201" "X2"                
#> [1] "number of cases < 10"
#> [1] "Transcript.APC.201" "X9"
#> [1] "Transcript.APC.201" "X7"
#> [1] "Transcript.APC.201" "X10"
#> [1] "Transcript.APC.201" "X13"               
#> [1] "number of cases < 10"
#> [1] "Transcript.APC.201" "X14"
#> [1] "Transcript.APC.201" "X8"                
#> [1] "number of cases < 10"
#> [1] "Transcript.APC.201" "X3"                
#> [1] "number of cases < 10"
#> [1] "Transcript.APC.201" "X5"                
#> [1] "number of cases < 10"
#> [1] "Transcript.APC.201" "X15"
#> [1] "Transcript.APC.201" "X6"
#> [1] "Transcript.APC.201" "X4"                
#> [1] "number of cases < 10"
#> [1] "Transcript.APC.201" "non_coding"        
#> [1] "number of cases < 10"

The results are now structured as the exon number (def) and the column we selected as “col”. The number of cases is provided by the investigated column.

# print the report table
knitr::kable(CIBRA_res$CIBRA_results, format = "html")

def	col	proportion	sign_area	investigated	control	proportion_rnd_avg	proportion_rnd_sd	sign_area_rnd_avg	sign_area_rnd_sd
X16	Transcript.APC.201	0.4096	0.4179	301	113	NA	NA	NA	NA
X9	Transcript.APC.201	0.1689	0.1184	14	113	NA	NA	NA	NA
X7	Transcript.APC.201	0.2628	0.2633	24	113	NA	NA	NA	NA
X10	Transcript.APC.201	0.1461	0.0728	13	113	NA	NA	NA	NA
X14	Transcript.APC.201	0.2445	0.2406	14	113	NA	NA	NA	NA
X15	Transcript.APC.201	0.2188	0.2144	12	113	NA	NA	NA	NA
X6	Transcript.APC.201	0.2405	0.2338	20	113	NA	NA	NA	NA

# calculate the pvalue by comparing to a reference distribution
perm_data <- CIBRA::perm_dist_crc_tcga

# use the gamma test to also calculate a fitted p-value
CIBRA_res_stat <-  permutation_test(CIBRA_res$CIBRA_results, perm_data, test = "gamma")

# print the report table
knitr::kable(CIBRA_res_stat$results, format = "html")

def	col	proportion	sign_area	investigated	control	proportion_rnd_avg	proportion_rnd_sd	sign_area_rnd_avg	sign_area_rnd_sd	perm_test_prop	perm_test_area	pvalue_prop	pvalue_sign_area
X16	Transcript.APC.201	0.4096	0.4179	301	113	NA	NA	NA	NA	0.0000000	0.0000000	0.0000248	0.0010112
X9	Transcript.APC.201	0.1689	0.1184	14	113	NA	NA	NA	NA	0.3412698	0.2839506	0.3767298	0.3392811
X7	Transcript.APC.201	0.2628	0.2633	24	113	NA	NA	NA	NA	0.0370370	0.0352734	0.0210423	0.0251315
X10	Transcript.APC.201	0.1461	0.0728	13	113	NA	NA	NA	NA	0.5476190	0.5291005	0.5774369	0.6315102
X14	Transcript.APC.201	0.2445	0.2406	14	113	NA	NA	NA	NA	0.0537919	0.0449735	0.0415007	0.0391983
X15	Transcript.APC.201	0.2188	0.2144	12	113	NA	NA	NA	NA	0.1111111	0.0643739	0.0988729	0.0646409
X6	Transcript.APC.201	0.2405	0.2338	20	113	NA	NA	NA	NA	0.0599647	0.0493827	0.0478327	0.0446955