Genome wide screen using CIBRA • CIBRA

Similar to determining the impact of a single alterations as shown in vignette: impact_assessment_genomic_alteration, a genome-wide screen only needs a more extensive definition matrix. In this example we will showcase a more extensive exploration of the definition matrix. However due to the computational time, we will limit ourself only to 6 genes instead of genome-wide.

# load functions
library(CIBRA)
library(BiocParallel)

# load transcription data and genomic alterations
count_data <- CIBRA::TCGA_CRC_rna_data
definition_matrix <- CIBRA::TCGA_CRC_non_silent_mutation_profile

We will make use of the whole definition matrix, which consist of the mutation status of every protein coding gene measured in the TCGA.

# prepare data for CIBRA

# fix sample names for intersect between the two datatypes
rownames(definition_matrix) <- stringr::str_replace_all(rownames(definition_matrix), "-", ".")

# select intersect between definition matrix and count data
sample_list <- intersect(rownames(definition_matrix), colnames(count_data))

# select data and definition matrix as the intersect
count_data <- count_data[sample_list]
definition_matrix <- definition_matrix[sample_list,]

# optional to visualize datafrane
#if (!require("DT")) install.packages('DT')
#DT::datatable(definition_matrix)

To lower the computational time, we will only select 6 genes to assess in our screen. You can remove this limitation by providing the column names of the definition matrix.

# focus on a subset of the genes, for the genome-wide screen you can replace this line with columns = colnames(definition_matrix)
columns <- c("APC", "TP53", "KRAS", "BRAF", "PIK3CA", "TTN")

# set up parameters for CIBRA
control_definition <- "WT" # reference condition
confidence <- 0.1 # confidence threshold for the proportion calculation
# set permutation to 0 to skip permutations
iterations = 0 # number of permutation iterations to be performed per condition

register(SnowParam(4)) # set number of cores to be used

# run CIBRA
CIBRA_res <- run_CIBRA(count_data, as.data.frame(definition_matrix), columns, 
                       control_definition, confidence, iterations, 
                       parallel = TRUE)
#> [1] "SNV"
#> [1] "APC" "SNV"
#> [1] "SNV"
#> [1] "TP53" "SNV"
#> [1] "SNV"
#> [1] "KRAS" "SNV"
#> [1] "SNV"
#> [1] "BRAF" "SNV"
#> [1] "SNV"
#> [1] "PIK3CA" "SNV"
#> [1] "SNV"
#> [1] "TTN" "SNV"

# print the report table
knitr::kable(CIBRA_res$CIBRA_results, format = "html")

def	col	proportion	sign_area	investigated	control	proportion_rnd_avg	proportion_rnd_sd	sign_area_rnd_avg	sign_area_rnd_sd
SNV	APC	0.4165	0.4228	358	113	NA	NA	NA	NA
SNV	TP53	0.429	0.4348	276	195	NA	NA	NA	NA
SNV	KRAS	0.3291	0.3297	198	273	NA	NA	NA	NA
SNV	BRAF	0.4552	0.4418	66	405	NA	NA	NA	NA
SNV	PIK3CA	0.2841	0.2716	131	340	NA	NA	NA	NA
SNV	TTN	0.282	0.2636	277	194	NA	NA	NA	NA

# calculate the pvalue by comparing to a reference distribution
perm_data <- CIBRA::perm_dist_crc_tcga

# use the gamma test to also calculate a fitted p-value
CIBRA_res_stat <-  permutation_test(CIBRA_res$CIBRA_results, perm_data, test = "gamma")

# print the report table
knitr::kable(CIBRA_res_stat$results, format = "html")

def	col	proportion	sign_area	investigated	control	proportion_rnd_avg	proportion_rnd_sd	sign_area_rnd_avg	sign_area_rnd_sd	perm_test_prop	perm_test_area	pvalue_prop	pvalue_sign_area
SNV	APC	0.4165	0.4228	358	113	NA	NA	NA	NA	0.0000000	0.0000000	0.0000174	0.0009098
SNV	TP53	0.429	0.4348	276	195	NA	NA	NA	NA	0.0000000	0.0000000	0.0000090	0.0007018
SNV	KRAS	0.3291	0.3297	198	273	NA	NA	NA	NA	0.0052910	0.0070547	0.0012761	0.0065368
SNV	BRAF	0.4552	0.4418	66	405	NA	NA	NA	NA	0.0000000	0.0000000	0.0000022	0.0006030
SNV	PIK3CA	0.2841	0.2716	131	340	NA	NA	NA	NA	0.0176367	0.0291005	0.0090258	0.0213133
SNV	TTN	0.282	0.2636	277	194	NA	NA	NA	NA	0.0220459	0.0352734	0.0098357	0.0249828