Skip to contents

Similarly to the vignette titled: “Refining alterations on mutation type”, in this vignette we will refine on the genomic location of the alteration.

We will explore the impact of variants affecting different exons in APC. To scan through genomic locations using CIBRA, the definition matrix can be set up in an extended format where samples can be assigned to multiple definitions. In this setting, it is important to have a sample column.

# load data
count_data <- CIBRA::TCGA_CRC_rna_data # rna count data

# genomic alteration profile
definition_matrix <- CIBRA::TCGA_APC_exon_definition_transcript_201

# remove rownames column
definition_matrix$...1 <- NULL

# remove letter to match sample names of genomic metrics
colnames(count_data) <- make.names(str_sub(colnames(count_data), end = -2))

# create barcode from sample ids
definition_matrix$samples <- substr(definition_matrix$samples, 1, 15)

# create valid names to match count data
definition_matrix = dplyr::mutate_all(definition_matrix, make.names)

# correct the colnames of the definition matrix (CIBRA takes only proper colnames)
colnames(definition_matrix) <- make.names(colnames(definition_matrix))

We will select the column with the exon definitions and exclude the sample column. In this way, we will only test the column we have selected with the definitions. Because we are running the definition matrix in the extended mode, we need to indicate the sample column name. In this way CIBRA is aware that there can be duplicate sample names. It will run CIBRA for each definition compared to the control definition.

# select columns with definition
columns = colnames(definition_matrix)
columns = columns[columns != "samples"] # exclude sample column

# set up parameters for CIBRA
control_definition <- "no.SNV" # reference condition
confidence <- 0.1 # confidence threshold for the proportion calculation
# set permutation to 0 to skip permutations
iterations = 0 # number of permutation iterations to be performed per condition

register(SnowParam(4)) # set number of cores to be used
  
# select column with definitions
group = definition_matrix

# run CIBRA
CIBRA_res <- run_CIBRA(data = count_data,definition_matrix = as.data.frame(definition_matrix),columns = columns,
                       control_definition = control_definition, confidence = confidence, 
                       iterations = iterations,
                       sample_column = "samples",
                       parallel = TRUE)
#>  [1] "X16"        "X12"        "X2"         "X9"         "X7"        
#>  [6] "X10"        "X13"        "X14"        "X8"         "X3"        
#> [11] "X5"         "X15"        "X6"         "X4"         "non_coding"
#> [1] "Transcript.APC.201" "X16"
#> [1] "Transcript.APC.201" "X12"               
#> [1] "number of cases < 10"
#> [1] "Transcript.APC.201" "X2"                
#> [1] "number of cases < 10"
#> [1] "Transcript.APC.201" "X9"
#> [1] "Transcript.APC.201" "X7"
#> [1] "Transcript.APC.201" "X10"
#> [1] "Transcript.APC.201" "X13"               
#> [1] "number of cases < 10"
#> [1] "Transcript.APC.201" "X14"
#> [1] "Transcript.APC.201" "X8"                
#> [1] "number of cases < 10"
#> [1] "Transcript.APC.201" "X3"                
#> [1] "number of cases < 10"
#> [1] "Transcript.APC.201" "X5"                
#> [1] "number of cases < 10"
#> [1] "Transcript.APC.201" "X15"
#> [1] "Transcript.APC.201" "X6"
#> [1] "Transcript.APC.201" "X4"                
#> [1] "number of cases < 10"
#> [1] "Transcript.APC.201" "non_coding"        
#> [1] "number of cases < 10"

The results are now structured as the exon number (def) and the column we selected as “col”. The number of cases is provided by the investigated column.

# print the report table
knitr::kable(CIBRA_res$CIBRA_results, format = "html")
def col proportion sign_area investigated control proportion_rnd_avg proportion_rnd_sd sign_area_rnd_avg sign_area_rnd_sd iterations
X16 Transcript.APC.201 0.4096 0.4179 301 113 NA NA NA NA 0
X9 Transcript.APC.201 0.1689 0.1184 14 113 NA NA NA NA 0
X7 Transcript.APC.201 0.2628 0.2633 24 113 NA NA NA NA 0
X10 Transcript.APC.201 0.1461 0.0728 13 113 NA NA NA NA 0
X14 Transcript.APC.201 0.2445 0.2406 14 113 NA NA NA NA 0
X15 Transcript.APC.201 0.2188 0.2144 12 113 NA NA NA NA 0
X6 Transcript.APC.201 0.2405 0.2338 20 113 NA NA NA NA 0
# calculate the pvalue by comparing to a reference distribution
perm_data <- CIBRA::perm_dist_crc_tcga

# use the gamma test to also calculate a fitted p-value
CIBRA_res_stat <-  permutation_test(CIBRA_res$CIBRA_results, perm_data, test = "gamma")

# print the report table
knitr::kable(CIBRA_res_stat$results, format = "html")
def col proportion sign_area investigated control proportion_rnd_avg proportion_rnd_sd sign_area_rnd_avg sign_area_rnd_sd iterations perm_test_prop perm_test_area pvalue_prop pvalue_sign_area
X16 Transcript.APC.201 0.4096 0.4179 301 113 NA NA NA NA 0 0.0000000 0.0000000 0.0000248 0.0010112
X9 Transcript.APC.201 0.1689 0.1184 14 113 NA NA NA NA 0 0.3412698 0.2839506 0.3767298 0.3392811
X7 Transcript.APC.201 0.2628 0.2633 24 113 NA NA NA NA 0 0.0370370 0.0352734 0.0210423 0.0251315
X10 Transcript.APC.201 0.1461 0.0728 13 113 NA NA NA NA 0 0.5476190 0.5291005 0.5774369 0.6315102
X14 Transcript.APC.201 0.2445 0.2406 14 113 NA NA NA NA 0 0.0537919 0.0449735 0.0415007 0.0391983
X15 Transcript.APC.201 0.2188 0.2144 12 113 NA NA NA NA 0 0.1111111 0.0643739 0.0988729 0.0646409
X6 Transcript.APC.201 0.2405 0.2338 20 113 NA NA NA NA 0 0.0599647 0.0493827 0.0478327 0.0446955