library(oncoPredict)
#This script provides an example of how to control for GLDS in pre-clinical biomarker discovery.
#Specifically, this script applies glds functions to GDSCv2 data to obtain p-values and beta values for each #drug-gene association.
#Controlling for GLDS is important because variability in GLDS is evient in cancer cell lines, and controlling for #this variability improves cancer biomarker discovery.
#Set parameters of completeMatrix()
#_____________________________________________________________________
#nPerms=50
#trainingPtype = readRDS(file = "GDSC2_Res.rds")
#There are some NA values so prcomp() will complain when you apply the GLDS function.
#senMat=trainingPtype
#Apply completeMatrix()
#_____________________________________________________________________
#This function will create a file called complete_matrix_output.txt in your working directory.
#This file is used as input in the next function.
#completeMatrix(trainingPtype)
#Apply the glds function.
#_____________________________________________________________________
#Set parameters...
#'@param drugMat A matrix of drug sensitivity data. rownames() are pre-clinical samples, and colnames() are drug names.
#The sensitivity data used here is GDSCv2.
#Read GDSC's updated cell line information file (used later).
#cellLineDetails<-read_excel('Cell_Lines_Details.xlsx')
cellLineDetails<-read.csv('Cell_Lines_Details.csv')
#I ran the response data through completeMatrix() because there were some NA values.
#NA values in the response data will cause a problem when you apply prcomp().
cm<-read.table('complete_matrix_output_GDSCv2.txt', header=TRUE, row.names=1) #Now, there are no NA values.
#rownames(cm) #Cosmic identifiers are used for cell names in this dataset...this will cause a problem later when matching cell lines between sensitivity and mutation data.
#Replace the rownames of cm with cell line names. Right now, they are cosmic ids.
#This will require using GDSC's cell line details file (which maps cosmic ids to cell line names).
newRows <- substring(rownames(cm),8) #Remove 'COSMIC'...keep the numbers after COSMIC.
indices<-match(as.numeric(newRows), as.vector(unlist(cellLineDetails[,2]))) #Refer to the cell line details file to make this replacement.
newNames<-as.vector(unlist(cellLineDetails[,1]))[indices] #Reports the corresponding cell line names
rownames(cm)<-newNames
#Fix the drug names in the cm object so that it's just the name of the drug (remove those extra numbers/identifiers at the end).
#gdscv2_drugs.xlsx contains the colnames of cm in the correct order, but with the extra identifiers removed.
#fix<-read_excel('gdscv2_drugs.xlsx')
#fix<-as.vector(unlist(fix[,2]))
fix<-as.vector(unlist(read.table('gdscv2_drugs.txt', header=TRUE)))
colnames(cm)<-as.vector(fix)
drugMat<-as.matrix(cm) #Finally, set this object as the drugMat parameter.
#dim(drugMat) #805 samples vs. 198 drugs
#'@param markerMat A matrix containing the data for which you are looking for an association with drug sensitivity (e.g. a matrix of somatic mutation data). rownames() are
#marker names (e.g. gene names), and colnames() are samples.
#The dataset used here is GDSCv2's updated mutation data for pan-cancer. It includes both CNV and coding variant.
#mutationMat<-read.csv('GDSC2_Pan_Both.csv')
#mutationMat<-mutationMat[,c(1,6,7)] #Index to these 3 columns of interest.
#colnames(mutationMat) #"cell_line_name" "genetic_feature" "is_mutated"
#Some entries are duplicated cell line name - genetic feature combos...remove them to avoid problems with pivot_wider().
#vec<-c()
#for (i in 1:nrow(mutationMat)){
# vec[i]<-paste(mutationMat[i,1],mutationMat[i,2], sep=' ')
#}
#nonDupIndices<-match(unique(vec), vec)
#mutationMat2<-mutationMat[nonDupIndices,]
#Some gene mutation entries are blank...remove them to avoid problems with pivot_wider().
#library(tidyverse)
#good<-(mutationMat2[,2]) != ""
#mutationMat3<-mutationMat2[good,]
#mutationMat4<-mutationMat3 %>%
# pivot_wider(names_from=genetic_feature,
# values_from=is_mutated)
#rownames(mutationMat4)<-as.vector(unlist(mutationMat4[,1])) #Make cell lines the #rownames...right now they are column 1.
#cols<-rownames(mutationMat4)
#mutationMat4<-as.matrix(t(mutationMat4[,-1]))
#Make sure the matrix is numeric.
#mutationMat<-mutationMat4
#mutationMat4<-apply(mutationMat4, 2, as.numeric)
#rownames(mutationMat4)<-rownames(mutationMat)
#markerMat<-mutationMat4
# replace all non-finite values with 0
#markerMat[!is.finite(markerMat)] <- 0
#colnames(markerMat)<-cols
#dim(markerMat) #1315 1389
#write.table(markerMat, file='markerMat.txt')
markerMat<-as.matrix(read.table('markerMat.txt', header=TRUE, row.names=1))
#'@param drugRelatedness
#This file is GDSC's updated drug relatedness file (obtained from bulk data download/all compounds screened/compounds-annotation).
#Note: I had to change some drug names in this file so that they matched colnames of cm.
#Ex: replace - with . (small modifications like that).
drugRelatedness <- read.csv("screened_compunds_rel_8.2.csv")
drugRelatedness<-drugRelatedness[,c(3,6)]
#colnames(drugRelatedness) #"DRUG_NAME" "TARGET_PATHWAY"
wd<-tempdir()
savedir<-setwd(wd)
glds(drugMat,
drugRelatedness,
markerMat,
minMuts=5,
additionalCovariateMatrix=NULL,
threshold=0.7)
#> | | | 0% | | | 1% | |= | 1% | |= | 2% | |== | 3% | |== | 4% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 10% | |======= | 11% | |======== | 11% | |======== | 12% | |========= | 13% | |========== | 14% | |========== | 15% | |=========== | 15% | |=========== | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 20% | |============== | 21% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================= | 24% | |================= | 25% | |================== | 25% | |================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 29% | |===================== | 30% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================== | 34% | |======================== | 35% | |========================= | 35% | |========================= | 36% | |========================== | 37% | |=========================== | 38% | |=========================== | 39% | |============================ | 39% | |============================ | 40% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |=============================== | 44% | |=============================== | 45% | |================================ | 45% | |================================ | 46% | |================================= | 46% | |================================= | 47% | |================================== | 48% | |================================== | 49% | |=================================== | 49% | |=================================== | 50% | |=================================== | 51% | |==================================== | 51% | |==================================== | 52% | |===================================== | 53% | |===================================== | 54% | |====================================== | 54% | |====================================== | 55% | |======================================= | 55% | |======================================= | 56% | |======================================== | 57% | |======================================== | 58% | |========================================= | 58% | |========================================= | 59% | |========================================== | 60% | |========================================== | 61% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 63% | |============================================= | 64% | |============================================= | 65% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 70% | |================================================= | 71% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 75% | |===================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 79% | |======================================================== | 80% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |=========================================================== | 84% | |=========================================================== | 85% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 87% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 89% | |=============================================================== | 90% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 96% | |==================================================================== | 97% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 99% | |======================================================================| 100%
#> Warning in write.csv(results_gldsPs, file = "./gldsPs.csv", row.names = TRUE, :
#> attempt to set 'col.names' ignored
#> Warning in write.csv(results_naivePs, file = "./naivePs.csv", row.names = TRUE,
#> : attempt to set 'col.names' ignored
#> Warning in write.csv(results_gldsBetas, file = "./gldsBetas.csv", row.names =
#> TRUE, : attempt to set 'col.names' ignored
#> Warning in write.csv(results_naiveBetas, file = "./naiveBetas.csv", row.names =
#> TRUE, : attempt to set 'col.names' ignored