Skip to contents

similarity_measures calculates the categorical similarity scores between two genomic alterations in terms of their expressional profile

Usage

similarity_measures(
  data1,
  data2,
  plotDir = "",
  cond1 = "cond1",
  cond2 = "cond2",
  abs_perm_dist = NULL,
  perm_dplus = NULL,
  perm_dmin = NULL,
  perm_mode = FALSE
)

Arguments

data1

dataframe with the columns pval (-log10 transformed adjusted pvalues from the DE analysis), fc (foldchange from the DE analysis) and type indicating the condition name.

data2

the same as data1 for the second condition

plotDir

directory path for the figures generated

cond1

name of condition 1 to be used in the figures

cond2

name of condition 2 to be used in the figures

abs_perm_dist

absolute permutation distribution of the distance score generated as shown in vignette xxx

perm_dplus

dplus permutation distribution generated as shown in vignette "CIBRA similarity score"

perm_dmin

dmin permutation distribution generated as shown in vignette "CIBRA similarity score"

perm_mode

Boolean to indicate if the similarity_measures should be calculated for a permutation distribution. in this mode, no figures will be generated. only data1 and data2 are needed as input for this mode

Value

Returns a list containing the calculated spearman correlation (rho), the similarity score (dplus), the anti-similarity score (dmin), the genes associated to the similarity score (dplus_genes) and the genes associated with the anti-similarity score (dmin_genes). It also generates figures

Examples

# generate permutation similarity measures from the permutation distribution generated for the CIBRA impact score
permutation_adj_pvalue <- CIBRA::permutation_adj_pvalue
permutation_foldchange <- CIBRA::permutation_foldchange

# transform the pvalues
trans_pvalues <- -log10(permutation_adj_pvalue)

rho_list <- c()
Dplus_list <- c()
Dmin_list <- c()

# generate 100 permutation similarity scores (recommended to atleast do 1000 permutations)
for (i in 1:100) {
# sample colnames to compare two random samples
col.sets <- sample(colnames(permutation_foldchange),1)
col2.sets <- sample(colnames(permutation_foldchange), 1)

# prepare data for diss score calculation
data1 <- data.frame(genes = rownames(permutation_foldchange), pval = trans_pvalues[[col.sets]], fc = permutation_foldchange[[col.sets]], type = col.sets)
data2 <- data.frame(genes = rownames(permutation_foldchange), pval = trans_pvalues[[col2.sets]], fc = permutation_foldchange[[col2.sets]], type = col2.sets)
# similarity score
scale_d = similarity_measures(data1, data2, perm_mode = TRUE)
Dplus_list <- c(Dplus_list, scale_d$dplus)
Dmin_list <- c(Dmin_list, scale_d$dmin)
rho_list <- c(rho_list, scale_d$rho)
}
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: the standard deviation is zero
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.

# read pvalue and fc data for gene of interest (APC exons in this example)
pvalues <- CIBRA::APC_definition_padj_values
fc <- CIBRA::APC_definition_foldchange
trans_pvalues <- -log10(pvalues)

# set variables for the permutation distributions
perm_dist <- rho_list
perm_dplus <- Dplus_list
perm_dmin <- Dmin_list

# one sided test for the correlation measure
abs_perm_dist <- abs(perm_dist)
abs_perm_dist <- abs_perm_dist[!is.na(abs_perm_dist)]

# select data of the exons of interest, in this example exon 7 and 16 will be compared
APC_exon_16 <- data.frame(genes = rownames(fc), pval = trans_pvalues$Transcript.APC.201_X16, fc = fc$Transcript.APC.201_X16, type = "APC exon 16")
APC_exon_7 <- data.frame(genes = rownames(fc), pval = trans_pvalues$Transcript.APC.201_X7, fc = fc$Transcript.APC.201_X7, type = "APC exon 7")

apc_res <- similarity_measures(data1 = APC_exon_7, data2 = APC_exon_16, cond1 = stringr::str_replace_all(unique(APC_exon_7$type), " ", "_"), cond2 = stringr::str_replace_all(unique(APC_exon_16$type), " ", "_"), abs_perm_dist = abs_perm_dist, perm_dplus =  perm_dplus, perm_dmin = perm_dmin)
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: Setting row names on a tibble is deprecated.
#> Warning: The input is a data frame-like object, convert it to a matrix.
#> Warning: Removed 46990 rows containing missing values or values outside the scale range
#> (`geom_line()`).
#> Warning: Removed 54 rows containing missing values or values outside the scale range
#> (`geom_point()`).
#> Warning: Removed 46990 rows containing missing values or values outside the scale range
#> (`geom_line()`).
#> Warning: Removed 54 rows containing missing values or values outside the scale range
#> (`geom_point()`).
#> Warning: Removed 46990 rows containing missing values or values outside the scale range
#> (`geom_line()`).
#> Warning: Removed 54 rows containing missing values or values outside the scale range
#> (`geom_point()`).