# #<----------------------------> # # Please include this section when distributing and/or using this code. # # Please read and abide by the terms of the included LICENSE # # # # Author : Deepankar Chakroborty (https://gitlab.utu.fi/deecha) # # Report issues: https://gitlab.utu.fi/deecha/shared_scripts/-/issues # # License: https://gitlab.utu.fi/deecha/shared_scripts/-/blob/master/LICENSE # # # # PURPOSE: # # From a given vector of annotations for a particular DNA change # # this function selects the canonical variant (if present) # # by cross referencing the MANE Select and RefSeq Select sets. # # # # Logic flow: # # - If there is only one annotation; that is selected # # - If canonical transcript is not found in MANE Select + RefSeq select # # or a matching transcript ID is not found in the annotation then; # # The mutation with to the the highest position (residue number) is selected. # # - If a match for canonical isoform is found then; # # that particular mutation is selected # # # #<----------------------------> # Installing dependencies dependencies <- c("stringi", "doParallel") missing_packages <- dependencies[!(dependencies %in% installed.packages()[, "Package"])] if(length(missing_packages)) install.packages(missing_packages) rm(missing_packages,dependencies) IsolateCanonicalVariant <- function (AAchangeAnnotations){ library(doParallel) #--- Change to master when ready # importing resources refseq <- readRDS(url("https://gitlab.utu.fi/deecha/shared_scripts/-/raw/develop/asset/RefSeqSelect_Gene_Transcript.RDS"),"rb") source("https://gitlab.utu.fi/deecha/shared_scripts/-/raw/master/MutSiteFind.R") # initializing cluster myCluster <- makeCluster(parallel::detectCores(), type = "FORK",useXDR=F,.combine=cbind); registerDoParallel(myCluster);print(myCluster) file.create("log.txt") message(paste0("Logging processed mutations at: ",getwd(),"/log.txt")) # computation results <- foreach(MutInfo = AAchangeAnnotations,.combine = c) %dopar% { GENE <- can.isoform <- l <- l2 <- NA l=unique(unlist(stringi::stri_split_fixed( str = MutInfo, pattern = ","),use.names = F,recursive = F)) if(length(l)==1){ l2 <- unlist(stringi::stri_split_fixed( str = l, pattern = ":"),use.names = F,recursive = F) MUTATION <- stringi::stri_replace_first_fixed(str = l2[stringi::stri_detect_regex(str = l2, use.names = F, pattern = "^p\\.")],pattern = "p.",replacement = "") } else { GENE <- stringi::stri_sub(str = l[1],from = 1,to = stringi::stri_locate_first_fixed(str = l[1],pattern = ":")[,"end"]-1) can.isoform <- refseq$transcript_accession[refseq$gene_id==GENE] if(length(can.isoform)==0 | !any(stringi::stri_detect_fixed(str = l,pattern = can.isoform))){ # Canonical isoform not found in RefSeq, or Match for Canonical isoform not found in AAchangeAnnotations l2 <- unlist(stringi::stri_split_fixed( str = l, pattern = ":"),use.names = F,recursive = F) l3 <- stringi::stri_replace_all_fixed(str = l2[stringi::stri_startswith_fixed(str = l2,"p.")],pattern = "p.",replacement = "") MUTATION <- l3[which.max(MutSiteFind(l3))] } else { # Canonical isoform found l=l[grep(can.isoform,l)] l2 <- unlist(stringi::stri_split_fixed( str = l, pattern = ":"),use.names = F,recursive = F) MUTATION <- stringi::stri_replace_first_fixed(str = l2[stringi::stri_detect_regex(str = l2, use.names = F, pattern = "^p\\.")],pattern = "p.",replacement = "") } } if(length(MUTATION)==0){ MUTATION <- NA } cat(paste(MUTATION,"\n"),file="log.txt", append=T) return(MUTATION) } stopCluster(myCluster) return(results) }