mirror of
https://github.com/dchakro/shared_Rscripts.git
synced 2026-05-17 09:12:16 -07:00
Updates
- add IsolateCanonicalVariant.R - remove variables used to check and install dependencies
This commit is contained in:
parent
b35647d0df
commit
a1a6dd9dcf
3 changed files with 77 additions and 1 deletions
74
IsolateCanonicalVariant.R
Normal file
74
IsolateCanonicalVariant.R
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
# #<---------------------------->
|
||||
# # Please include this section when distributing and/or using this code.
|
||||
# # Please read and abide by the terms of the included LICENSE
|
||||
# #
|
||||
# # Author : Deepankar Chakroborty (https://gitlab.utu.fi/deecha)
|
||||
# # Report issues: https://gitlab.utu.fi/deecha/shared_scripts/-/issues
|
||||
# # License: https://gitlab.utu.fi/deecha/shared_scripts/-/blob/master/LICENSE
|
||||
# #
|
||||
# # PURPOSE:
|
||||
# # From a given vector of annotations for a particular DNA change
|
||||
# # this function selects the canonical variant (if present)
|
||||
# # by cross referencing the MANE Select and RefSeq Select sets.
|
||||
# #
|
||||
# # Logic flow:
|
||||
# # - If there is only one annotation; that is selected
|
||||
# # - If canonical transcript is not found in MANE Select + RefSeq select
|
||||
# # or a matching transcript ID is not found in the annotation then;
|
||||
# # The mutation with to the the highest position (residue number) is selected.
|
||||
# # - If a match for canonical isoform is found then;
|
||||
# # that particular mutation is selected
|
||||
# #
|
||||
# #<---------------------------->
|
||||
|
||||
# Installing dependencies
|
||||
dependencies <- c("stringi", "doParallel")
|
||||
missing_packages <- dependencies[!(dependencies %in% installed.packages()[, "Package"])]
|
||||
if(length(missing_packages)) install.packages(missing_packages)
|
||||
rm(missing_packages,dependencies)
|
||||
|
||||
IsolateCanonicalVariant <- function (AAchangeAnnotations){
|
||||
library(doParallel)
|
||||
#--- Change to master when ready
|
||||
# importing resources
|
||||
refseq <- readRDS(url("https://gitlab.utu.fi/deecha/shared_scripts/-/raw/develop/asset/RefSeqSelect_Gene_Transcript.RDS"),"rb")
|
||||
source("https://gitlab.utu.fi/deecha/shared_scripts/-/raw/master/MutSiteFind.R")
|
||||
|
||||
# initializing cluster
|
||||
myCluster <- makeCluster(parallel::detectCores(), type = "FORK",useXDR=F,.combine=cbind); registerDoParallel(myCluster);print(myCluster)
|
||||
|
||||
file.create("log.txt")
|
||||
message(paste0("Logging processed mutations at: ",getwd(),"/log.txt"))
|
||||
|
||||
# computation
|
||||
results <- foreach(MutInfo = AAchangeAnnotations,.combine = c) %dopar% {
|
||||
GENE <- can.isoform <- l <- l2 <- NA
|
||||
l=unique(unlist(stringi::stri_split_fixed( str = MutInfo, pattern = ","),use.names = F,recursive = F))
|
||||
|
||||
if(length(l)==1){
|
||||
l2 <- unlist(stringi::stri_split_fixed( str = l, pattern = ":"),use.names = F,recursive = F)
|
||||
MUTATION <- stringi::stri_replace_first_fixed(str = l2[stringi::stri_detect_regex(str = l2, use.names = F, pattern = "^p\\.")],pattern = "p.",replacement = "")
|
||||
} else {
|
||||
GENE <- stringi::stri_sub(str = l[1],from = 1,to = stringi::stri_locate_first_fixed(str = l[1],pattern = ":")[,"end"]-1)
|
||||
can.isoform <- refseq$transcript_accession[refseq$gene_id==GENE]
|
||||
if(length(can.isoform)==0 | !any(stringi::stri_detect_fixed(str = l,pattern = can.isoform))){
|
||||
# Canonical isoform not found in RefSeq, or Match for Canonical isoform not found in AAchangeAnnotations
|
||||
l2 <- unlist(stringi::stri_split_fixed( str = l, pattern = ":"),use.names = F,recursive = F)
|
||||
l3 <- stringi::stri_replace_all_fixed(str = l2[stringi::stri_startswith_fixed(str = l2,"p.")],pattern = "p.",replacement = "")
|
||||
MUTATION <- l3[which.max(MutSiteFind(l3))]
|
||||
} else {
|
||||
# Canonical isoform found
|
||||
l=l[grep(can.isoform,l)]
|
||||
l2 <- unlist(stringi::stri_split_fixed( str = l, pattern = ":"),use.names = F,recursive = F)
|
||||
MUTATION <- stringi::stri_replace_first_fixed(str = l2[stringi::stri_detect_regex(str = l2, use.names = F, pattern = "^p\\.")],pattern = "p.",replacement = "")
|
||||
}
|
||||
}
|
||||
if(length(MUTATION)==0){
|
||||
MUTATION <- NA
|
||||
}
|
||||
cat(paste(MUTATION,"\n"),file="log.txt", append=T)
|
||||
return(MUTATION)
|
||||
}
|
||||
stopCluster(myCluster)
|
||||
return(results)
|
||||
}
|
||||
|
|
@ -17,6 +17,7 @@
|
|||
dependencies <- c("stringi")
|
||||
missing_packages <- dependencies[!(dependencies %in% installed.packages()[, "Package"])]
|
||||
if(length(missing_packages)) install.packages(missing_packages)
|
||||
rm(missing_packages,dependencies)
|
||||
|
||||
MutSiteFind <- function(MutationColumn){
|
||||
return(unlist(x = stringi::stri_extract_first_regex(str = MutationColumn,pattern = "[[:digit:]]+"), use.names = F))
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@
|
|||
dependencies <- c("stringi", "progress")
|
||||
missing_packages <- dependencies[!(dependencies %in% installed.packages()[, "Package"])]
|
||||
if(length(missing_packages)) install.packages(missing_packages)
|
||||
rm(missing_packages,dependencies)
|
||||
|
||||
unparalog <- function(DATA, paralog_separator = ";", annotation_separator = ",", GeneColName , AnnotationColName ){
|
||||
# Sanity checks
|
||||
|
|
@ -51,7 +52,7 @@ unparalog <- function(DATA, paralog_separator = ";", annotation_separator = ",",
|
|||
stop("Inconsistencies with these input parameters.\n Ensure they are correct and try again.")
|
||||
}
|
||||
# Cleanup
|
||||
rm(missing_packages,dependencies,check_paralog_sep,check_annotation_sep)
|
||||
rm(check_paralog_sep,check_annotation_sep)
|
||||
gc()
|
||||
|
||||
current.idx <- 1 # nrow(DATA)+1
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue