Skip to contents

Similar to determining the impact of a single alterations as shown in vignette: impact_assessment_genomic_alteration, a genome-wide screen only needs a more extensive definition matrix. In this example we will showcase a more extensive exploration of the definition matrix. However due to the computational time, we will limit ourself only to 6 genes instead of genome-wide.

# load functions
library(CIBRA)
library(BiocParallel)
# load transcription data and genomic alterations
count_data <- CIBRA::TCGA_CRC_rna_data
definition_matrix <- CIBRA::TCGA_CRC_non_silent_mutation_profile

We will make use of the whole definition matrix, which consist of the mutation status of every protein coding gene measured in the TCGA.

# prepare data for CIBRA

# fix sample names for intersect between the two datatypes
rownames(definition_matrix) <- stringr::str_replace_all(rownames(definition_matrix), "-", ".")

# select intersect between definition matrix and count data
sample_list <- intersect(rownames(definition_matrix), colnames(count_data))

# select data and definition matrix as the intersect
count_data <- count_data[sample_list]
definition_matrix <- definition_matrix[sample_list,]

# optional to visualize datafrane
#if (!require("DT")) install.packages('DT')
#DT::datatable(definition_matrix)

To lower the computational time, we will only select 6 genes to assess in our screen. You can remove this limitation by providing the column names of the definition matrix.

# focus on a subset of the genes, for the genome-wide screen you can replace this line with columns = colnames(definition_matrix)
columns <- c("APC", "TP53", "KRAS", "BRAF", "PIK3CA", "TTN")

# set up parameters for CIBRA
control_definition <- "WT" # reference condition
confidence <- 0.1 # confidence threshold for the proportion calculation
# set permutation to 0 to skip permutations
iterations = 0 # number of permutation iterations to be performed per condition

register(SnowParam(4)) # set number of cores to be used
# run CIBRA
CIBRA_res <- run_CIBRA(count_data, as.data.frame(definition_matrix), columns, 
                       control_definition, confidence, iterations, 
                       parallel = TRUE)
#> [1] "SNV"
#> [1] "APC" "SNV"
#> [1] "SNV"
#> [1] "TP53" "SNV"
#> [1] "SNV"
#> [1] "KRAS" "SNV"
#> [1] "SNV"
#> [1] "BRAF" "SNV"
#> [1] "SNV"
#> [1] "PIK3CA" "SNV"
#> [1] "SNV"
#> [1] "TTN" "SNV"
# print the report table
knitr::kable(CIBRA_res$CIBRA_results, format = "html")
def col proportion sign_area investigated control proportion_rnd_avg proportion_rnd_sd sign_area_rnd_avg sign_area_rnd_sd iterations
SNV APC 0.4165 0.4228 358 113 NA NA NA NA 0
SNV TP53 0.429 0.4348 276 195 NA NA NA NA 0
SNV KRAS 0.3291 0.3297 198 273 NA NA NA NA 0
SNV BRAF 0.4552 0.4418 66 405 NA NA NA NA 0
SNV PIK3CA 0.2841 0.2716 131 340 NA NA NA NA 0
SNV TTN 0.282 0.2636 277 194 NA NA NA NA 0
# calculate the pvalue by comparing to a reference distribution
perm_data <- CIBRA::perm_dist_crc_tcga

# use the gamma test to also calculate a fitted p-value
CIBRA_res_stat <-  permutation_test(CIBRA_res$CIBRA_results, perm_data, test = "gamma")

# print the report table
knitr::kable(CIBRA_res_stat$results, format = "html")
def col proportion sign_area investigated control proportion_rnd_avg proportion_rnd_sd sign_area_rnd_avg sign_area_rnd_sd iterations perm_test_prop perm_test_area pvalue_prop pvalue_sign_area
SNV APC 0.4165 0.4228 358 113 NA NA NA NA 0 0.0000000 0.0000000 0.0000174 0.0009098
SNV TP53 0.429 0.4348 276 195 NA NA NA NA 0 0.0000000 0.0000000 0.0000090 0.0007018
SNV KRAS 0.3291 0.3297 198 273 NA NA NA NA 0 0.0052910 0.0070547 0.0012761 0.0065368
SNV BRAF 0.4552 0.4418 66 405 NA NA NA NA 0 0.0000000 0.0000000 0.0000022 0.0006030
SNV PIK3CA 0.2841 0.2716 131 340 NA NA NA NA 0 0.0176367 0.0291005 0.0090258 0.0213133
SNV TTN 0.282 0.2636 277 194 NA NA NA NA 0 0.0220459 0.0352734 0.0098357 0.0249828