Squashed commit of the following:

commit fe050d47c2dc9e61a13d4a362abf56dabfbecbd7
Author: Deepankar Chakroborty <deepankar.chakroborty@utu.fi>
Date:   Fri Jul 31 12:57:11 2020 +0300

    Include usage instruction inside function body

    Include usage instruction inside function body, so on typing just the function name to display the R code, the instructions will appear.
    Also stated things clearly in the T&C

commit fedbca4a7aa8fb436702accdf1f9b5ec7f066d60
Author: Deepankar Chakroborty <deepankar.chakroborty@utu.fi>
Date:   Thu Jul 30 12:50:20 2020 +0300

    Updates

    1. Improve readability
    2. Manage differing lengths of breaks and labels.

commit fb32fb9173c66c8862adb0ad41d82db9d128777b
Author: Deepankar Chakroborty <deepankar.chakroborty@utu.fi>
Date:   Thu Jul 30 12:43:20 2020 +0300

    skip.steps works as expected

    skip.steps = 1, now skips 1 observation.

commit 00a8bbcf04731c010e0a5989a805274609560188
Author: Deepankar Chakroborty <deepankar.chakroborty@utu.fi>
Date:   Thu Jul 30 12:32:16 2020 +0300

    add script to calculate breaks for axes in ggplot2

    calculates breaks and labels for axes in ggplot2 with user defined gaps
This commit is contained in:
Deepankar Chakroborty 2020-07-31 12:58:00 +03:00
parent f9f647c131
commit 52554283f1
4 changed files with 72 additions and 61 deletions

View file

@ -1,26 +1,3 @@
# #<---------------------------->
# # Please include this section when distributing and/or using this code.
# # Please read and abide by the terms of the included LICENSE
# #
# # Author : Deepankar Chakroborty (https://gitlab.utu.fi/deecha)
# # Report issues: https://gitlab.utu.fi/deecha/shared_scripts/-/issues
# # License: https://gitlab.utu.fi/deecha/shared_scripts/-/blob/master/LICENSE
# #
# # PURPOSE:
# # From a given vector of annotations for a particular DNA change
# # this function selects the canonical variant (if present)
# # by cross referencing the MANE Select and RefSeq Select sets.
# #
# # Logic flow:
# # - If there is only one annotation; that is selected
# # - If canonical transcript is not found in MANE Select + RefSeq select
# # or a matching transcript ID is not found in the annotation then;
# # The mutation with to the the highest position (residue number) is selected.
# # - If a match for canonical isoform is found then;
# # that particular mutation is selected
# #
# #<---------------------------->
# Installing dependencies
dependencies <- c("stringi", "doParallel")
missing_packages <- dependencies[!(dependencies %in% installed.packages()[, "Package"])]
@ -28,6 +5,31 @@ if(length(missing_packages)) install.packages(missing_packages)
rm(missing_packages,dependencies)
IsolateCanonicalVariant <- function (AAchangeAnnotations){
# #<---------------------------->
# # You must include this section when:
# # Distributing, Using and/or Modifying this code.
# # Please read and abide by the terms of the included LICENSE.
# # Copyright 2020, Deepankar Chakroborty, All rights reserved.
# #
# # Author : Deepankar Chakroborty (https://gitlab.utu.fi/deecha)
# # Report issues: https://gitlab.utu.fi/deecha/shared_scripts/-/issues
# # License: https://gitlab.utu.fi/deecha/shared_scripts/-/blob/master/LICENSE
# #<---------------------------->
# # PURPOSE:
# # From a given vector of annotations for a particular DNA change
# # this function selects the canonical variant (if present)
# # by cross referencing the MANE Select and RefSeq Select sets.
# # LOGIC FLOW:
# # - If there is only one annotation; that is selected
# # - If canonical transcript is not found in MANE Select + RefSeq select
# # or a matching transcript ID is not found in the annotation then;
# # The mutation with to the the highest position (residue number) is selected.
# # - If a match for canonical isoform is found then;
# # that particular mutation is selected
# importing resources
library(doParallel)
refseq <- readRDS(url("https://gitlab.utu.fi/deecha/shared_scripts/-/raw/master/asset/RefSeqSelect_Gene_Transcript.RDS"),"rb")

View file

@ -1,25 +1,30 @@
# #<---------------------------->
# # Please include this section when distributing and/or using this code.
# # Please read and abide by the terms of the included LICENSE
# #
# # Author : Deepankar Chakroborty (https://gitlab.utu.fi/deecha)
# # Report issues: https://gitlab.utu.fi/deecha/shared_scripts/-/issues
# # License: https://gitlab.utu.fi/deecha/shared_scripts/-/blob/master/LICENSE
# #
# # PURPOSE:
# # For a given vector of amino acid changes like A123T, V256F, E746_A750del
# # this function returns c(123, 256, 746) as amino acid positions of
# # the mutated residue.
# # In case of indels, it doesn't return the range!! (returns only the start position)
# #
# #<---------------------------->
# Installing missing dependencies
dependencies <- c("stringi")
missing_packages <- dependencies[!(dependencies %in% installed.packages()[, "Package"])]
if(length(missing_packages)) install.packages(missing_packages)
rm(missing_packages,dependencies)
MutSiteFind <- function(MutationColumn){
# #<---------------------------->
# # You must include this section when:
# # Distributing, Using and/or Modifying this code.
# # Please read and abide by the terms of the included LICENSE.
# # Copyright 2020, Deepankar Chakroborty, All rights reserved.
# #
# # Author : Deepankar Chakroborty (https://gitlab.utu.fi/deecha)
# # Report issues: https://gitlab.utu.fi/deecha/shared_scripts/-/issues
# # License: https://gitlab.utu.fi/deecha/shared_scripts/-/blob/master/LICENSE
# #<---------------------------->
# # PURPOSE:
# # For a given vector of amino acid changes like A123T, V256F, E746_A750del
# # this function returns c(123, 256, 746) as amino acid positions of
# # the mutated residue.
# # In case of indels, it doesn't return the range!!
# # (i.e. returns only the start position)
return(unlist(x = stringi::stri_extract_first_regex(str = MutationColumn,pattern = "[[:digit:]]+"), use.names = F))
}

View file

@ -1,11 +1,16 @@
ggplotBreaks <- function(range,tick,skip.steps=0){
# #<---------------------------->
# # Please include this section when distributing and/or using this code.
# # Please read and abide by the terms of the included LICENSE
# # You must include this section when:
# # Distributing, Using and/or Modifying this code.
# # Please read and abide by the terms of the included LICENSE.
# # Copyright 2020, Deepankar Chakroborty, All rights reserved.
# #
# # Author : Deepankar Chakroborty (https://gitlab.utu.fi/deecha)
# # Report issues: https://gitlab.utu.fi/deecha/shared_scripts/-/issues
# # License: https://gitlab.utu.fi/deecha/shared_scripts/-/blob/master/LICENSE
# #
# #<---------------------------->
# # PURPOSE:
# # Returns a list of vectors containing breaks and labels
# # for a continuous variable mapped to one of the axes for use with ggplot2
@ -20,12 +25,8 @@
# # 0 10 20 30 40 50 60 70 80 90 100 110 120 130 140 150
# #
# # $labels
# # "0" " " "20" " " "40" " " "60" " " "80" " " "100" " "
# # "120" " " "140" " "
# #
# #<---------------------------->
# # "0" " " "20" " " "40" " " "60" " " "80" " " "100" " " "120" " " "140" " "
ggplotBreaks <- function(range,tick,skip.steps=0){
if (length(range) != 2){
stop("Correct format for: range = c(min_value,max_value)")
}

View file

@ -1,20 +1,30 @@
# Installing missing dependencies
dependencies <- c("stringi", "progress")
missing_packages <- dependencies[!(dependencies %in% installed.packages()[, "Package"])]
if(length(missing_packages)) install.packages(missing_packages)
rm(missing_packages,dependencies)
unparalog <- function(DATA, paralog_separator = ";", annotation_separator = ",", GeneColName , AnnotationColName ){
# #<---------------------------->
# # Please include this section when distributing and/or using this code.
# # Please read and abide by the terms of the included LICENSE
# # You must include this section when:
# # Distributing, Using and/or Modifying this code.
# # Please read and abide by the terms of the included LICENSE.
# # Copyright 2020, Deepankar Chakroborty, All rights reserved.
# #
# # Author : Deepankar Chakroborty (https://gitlab.utu.fi/deecha)
# # Report issues: https://gitlab.utu.fi/deecha/shared_scripts/-/issues
# # License: https://gitlab.utu.fi/deecha/shared_scripts/-/blob/master/LICENSE
# #
# #<---------------------------->
# # PURPOSE:
# # In the gene column in your SNV annotation if you see something like:
# # e.g. PRAMEF7;PRAMEF8 OR PRAMEF7,PRAMEF8
# # then your mutations annotations have gene paralogs.
# # This script aims to de-couple those paralogs into individual their rows.
# #
# ------------------------
### Info on what to pass as the function parameters:
# # INFO on what to pass as the function parameters:
# Assign correct paralog_separator found in the gene column of your SNV annotations # e.g. if the Gene column has entries like PRAMEF7;PRAMEF8
# then the paralog_separator is ";"
# or set it to whatever separator is used by your SNV annotation software.
@ -29,13 +39,6 @@
# <--------------->
# Installing missing dependencies
dependencies <- c("stringi", "progress")
missing_packages <- dependencies[!(dependencies %in% installed.packages()[, "Package"])]
if(length(missing_packages)) install.packages(missing_packages)
rm(missing_packages,dependencies)
unparalog <- function(DATA, paralog_separator = ";", annotation_separator = ",", GeneColName , AnnotationColName ){
# Sanity checks
check_paralog_sep <- !any(stringi::stri_detect_fixed(str = DATA$Gene.refGene,pattern = paralog_separator))
check_annotation_sep <- !any(stringi::stri_detect_fixed(str = DATA$AAChange.refGene, pattern = annotation_separator))