diff --git a/IsolateCanonicalVariant.R b/IsolateCanonicalVariant.R index 328699d..8813f2e 100644 --- a/IsolateCanonicalVariant.R +++ b/IsolateCanonicalVariant.R @@ -1,26 +1,3 @@ -# #<----------------------------> -# # Please include this section when distributing and/or using this code. -# # Please read and abide by the terms of the included LICENSE -# # -# # Author : Deepankar Chakroborty (https://gitlab.utu.fi/deecha) -# # Report issues: https://gitlab.utu.fi/deecha/shared_scripts/-/issues -# # License: https://gitlab.utu.fi/deecha/shared_scripts/-/blob/master/LICENSE -# # -# # PURPOSE: -# # From a given vector of annotations for a particular DNA change -# # this function selects the canonical variant (if present) -# # by cross referencing the MANE Select and RefSeq Select sets. -# # -# # Logic flow: -# # - If there is only one annotation; that is selected -# # - If canonical transcript is not found in MANE Select + RefSeq select -# # or a matching transcript ID is not found in the annotation then; -# # The mutation with to the the highest position (residue number) is selected. -# # - If a match for canonical isoform is found then; -# # that particular mutation is selected -# # -# #<----------------------------> - # Installing dependencies dependencies <- c("stringi", "doParallel") missing_packages <- dependencies[!(dependencies %in% installed.packages()[, "Package"])] @@ -28,6 +5,31 @@ if(length(missing_packages)) install.packages(missing_packages) rm(missing_packages,dependencies) IsolateCanonicalVariant <- function (AAchangeAnnotations){ +# #<----------------------------> +# # You must include this section when: +# # Distributing, Using and/or Modifying this code. +# # Please read and abide by the terms of the included LICENSE. +# # Copyright 2020, Deepankar Chakroborty, All rights reserved. +# # +# # Author : Deepankar Chakroborty (https://gitlab.utu.fi/deecha) +# # Report issues: https://gitlab.utu.fi/deecha/shared_scripts/-/issues +# # License: https://gitlab.utu.fi/deecha/shared_scripts/-/blob/master/LICENSE +# #<----------------------------> + + +# # PURPOSE: +# # From a given vector of annotations for a particular DNA change +# # this function selects the canonical variant (if present) +# # by cross referencing the MANE Select and RefSeq Select sets. + +# # LOGIC FLOW: +# # - If there is only one annotation; that is selected +# # - If canonical transcript is not found in MANE Select + RefSeq select +# # or a matching transcript ID is not found in the annotation then; +# # The mutation with to the the highest position (residue number) is selected. +# # - If a match for canonical isoform is found then; +# # that particular mutation is selected + # importing resources library(doParallel) refseq <- readRDS(url("https://gitlab.utu.fi/deecha/shared_scripts/-/raw/master/asset/RefSeqSelect_Gene_Transcript.RDS"),"rb") diff --git a/MutSiteFind.R b/MutSiteFind.R index 709b666..70ccda6 100644 --- a/MutSiteFind.R +++ b/MutSiteFind.R @@ -1,25 +1,30 @@ -# #<----------------------------> -# # Please include this section when distributing and/or using this code. -# # Please read and abide by the terms of the included LICENSE -# # -# # Author : Deepankar Chakroborty (https://gitlab.utu.fi/deecha) -# # Report issues: https://gitlab.utu.fi/deecha/shared_scripts/-/issues -# # License: https://gitlab.utu.fi/deecha/shared_scripts/-/blob/master/LICENSE -# # -# # PURPOSE: -# # For a given vector of amino acid changes like A123T, V256F, E746_A750del -# # this function returns c(123, 256, 746) as amino acid positions of -# # the mutated residue. -# # In case of indels, it doesn't return the range!! (returns only the start position) -# # -# #<----------------------------> - +# Installing missing dependencies dependencies <- c("stringi") missing_packages <- dependencies[!(dependencies %in% installed.packages()[, "Package"])] if(length(missing_packages)) install.packages(missing_packages) rm(missing_packages,dependencies) MutSiteFind <- function(MutationColumn){ +# #<----------------------------> +# # You must include this section when: +# # Distributing, Using and/or Modifying this code. +# # Please read and abide by the terms of the included LICENSE. +# # Copyright 2020, Deepankar Chakroborty, All rights reserved. +# # +# # Author : Deepankar Chakroborty (https://gitlab.utu.fi/deecha) +# # Report issues: https://gitlab.utu.fi/deecha/shared_scripts/-/issues +# # License: https://gitlab.utu.fi/deecha/shared_scripts/-/blob/master/LICENSE +# #<----------------------------> + + +# # PURPOSE: +# # For a given vector of amino acid changes like A123T, V256F, E746_A750del +# # this function returns c(123, 256, 746) as amino acid positions of +# # the mutated residue. +# # In case of indels, it doesn't return the range!! +# # (i.e. returns only the start position) + + return(unlist(x = stringi::stri_extract_first_regex(str = MutationColumn,pattern = "[[:digit:]]+"), use.names = F)) } diff --git a/ggplotBreaks.R b/ggplotBreaks.R index bbac9fc..48121ac 100644 --- a/ggplotBreaks.R +++ b/ggplotBreaks.R @@ -1,11 +1,16 @@ +ggplotBreaks <- function(range,tick,skip.steps=0){ # #<----------------------------> -# # Please include this section when distributing and/or using this code. -# # Please read and abide by the terms of the included LICENSE +# # You must include this section when: +# # Distributing, Using and/or Modifying this code. +# # Please read and abide by the terms of the included LICENSE. +# # Copyright 2020, Deepankar Chakroborty, All rights reserved. # # # # Author : Deepankar Chakroborty (https://gitlab.utu.fi/deecha) # # Report issues: https://gitlab.utu.fi/deecha/shared_scripts/-/issues # # License: https://gitlab.utu.fi/deecha/shared_scripts/-/blob/master/LICENSE -# # +# #<----------------------------> + + # # PURPOSE: # # Returns a list of vectors containing breaks and labels # # for a continuous variable mapped to one of the axes for use with ggplot2 @@ -20,12 +25,8 @@ # # 0 10 20 30 40 50 60 70 80 90 100 110 120 130 140 150 # # # # $labels -# # "0" " " "20" " " "40" " " "60" " " "80" " " "100" " " -# # "120" " " "140" " " -# # -# #<----------------------------> +# # "0" " " "20" " " "40" " " "60" " " "80" " " "100" " " "120" " " "140" " " -ggplotBreaks <- function(range,tick,skip.steps=0){ if (length(range) != 2){ stop("Correct format for: range = c(min_value,max_value)") } diff --git a/unparalogMutations.R b/unparalogMutations.R index d479563..ddf3ff5 100644 --- a/unparalogMutations.R +++ b/unparalogMutations.R @@ -1,20 +1,30 @@ +# Installing missing dependencies +dependencies <- c("stringi", "progress") +missing_packages <- dependencies[!(dependencies %in% installed.packages()[, "Package"])] +if(length(missing_packages)) install.packages(missing_packages) +rm(missing_packages,dependencies) + +unparalog <- function(DATA, paralog_separator = ";", annotation_separator = ",", GeneColName , AnnotationColName ){ # #<----------------------------> -# # Please include this section when distributing and/or using this code. -# # Please read and abide by the terms of the included LICENSE +# # You must include this section when: +# # Distributing, Using and/or Modifying this code. +# # Please read and abide by the terms of the included LICENSE. +# # Copyright 2020, Deepankar Chakroborty, All rights reserved. # # # # Author : Deepankar Chakroborty (https://gitlab.utu.fi/deecha) # # Report issues: https://gitlab.utu.fi/deecha/shared_scripts/-/issues # # License: https://gitlab.utu.fi/deecha/shared_scripts/-/blob/master/LICENSE -# # +# #<----------------------------> + + # # PURPOSE: # # In the gene column in your SNV annotation if you see something like: # # e.g. PRAMEF7;PRAMEF8 OR PRAMEF7,PRAMEF8 # # then your mutations annotations have gene paralogs. # # This script aims to de-couple those paralogs into individual their rows. # # -# ------------------------ - -### Info on what to pass as the function parameters: + +# # INFO on what to pass as the function parameters: # Assign correct paralog_separator found in the gene column of your SNV annotations # e.g. if the Gene column has entries like PRAMEF7;PRAMEF8 # then the paralog_separator is ";" # or set it to whatever separator is used by your SNV annotation software. @@ -28,14 +38,7 @@ # AnnotationColName = Column name in the SNV annotation table where the Amino acid changes are listed # <---------------> - -# Installing missing dependencies -dependencies <- c("stringi", "progress") -missing_packages <- dependencies[!(dependencies %in% installed.packages()[, "Package"])] -if(length(missing_packages)) install.packages(missing_packages) -rm(missing_packages,dependencies) - -unparalog <- function(DATA, paralog_separator = ";", annotation_separator = ",", GeneColName , AnnotationColName ){ + # Sanity checks check_paralog_sep <- !any(stringi::stri_detect_fixed(str = DATA$Gene.refGene,pattern = paralog_separator)) check_annotation_sep <- !any(stringi::stri_detect_fixed(str = DATA$AAChange.refGene, pattern = annotation_separator))