From 584ae98360c3dd9010bc23d96fc7251f3b13cbfd Mon Sep 17 00:00:00 2001 From: dchakro <35454738+dchakro@users.noreply.github.com> Date: Wed, 31 May 2023 09:51:31 -0700 Subject: [PATCH] updates --- .Rhistory | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++ r.snippets.txt | 9 +++++++ 2 files changed, 82 insertions(+) create mode 100644 .Rhistory diff --git a/.Rhistory b/.Rhistory new file mode 100644 index 0000000..479669f --- /dev/null +++ b/.Rhistory @@ -0,0 +1,73 @@ +unparalog <- function(DATA, paralog_separator = ";", annotation_separator = ",", GeneColName , AnnotationColName ){ +# Installing missing dependencies +dependencies <- c("stringi", "progress") +missing_packages <- dependencies[!(dependencies %in% installed.packages()[, "Package"])] +if(length(missing_packages)) install.packages(missing_packages) +# Sanity checks +check_paralog_sep <- !any(stringi::stri_detect_fixed(str = DATA$Gene.refGene,pattern = paralog_separator)) +check_annotation_sep <- !any(stringi::stri_detect_fixed(str = DATA$AAChange.refGene, pattern = annotation_separator)) +idx_Gene <- which(colnames(DATA) == GeneColName) +idx_Annotation <- which(colnames(DATA) == AnnotationColName) +if(length(idx_Gene) == 0 | length(idx_Annotation) == 0 | check_paralog_sep | check_annotation_sep) { +message(stringi::stri_c("You entered :-->", +"\nparalog_separator = ",paralog_separator, +"\nannotation_separator = ",annotation_separator, +"\nGeneColName = ", GeneColName, +"\nAnnotationColName = ",AnnotationColName)) +stop("Inconsistencies with these input parameters.\n Ensure they are correct and try again.") +} +# Cleanup +rm(missing_packages,dependencies,check_paralog_sep,check_annotation_sep) +gc() +current.idx <- nrow(DATA)+1 +paralog.idx <- which(stringi::stri_detect_fixed(str = DATA$Gene.refGene,pattern = paralog_separator)) +pb <- progress::progress_bar$new(total=length(paralog.idx),format = " [:bar] :current/:total (:percent)",); pb$tick(0) +Number_of_paralogs <- sum(stringi::stri_count_fixed(str = DATA$Gene.refGene,pattern = paralog_separator)) +message(stringi::stri_c("There are ",length(paralog.idx)," annotations with ", Number_of_paralogs," paralogs.")) +# copying structure of original DATA +DATA.add <- DATA[1,]; DATA.add <- DATA.add[-1,] +# Adding the empty rows to original table. +# These rows will be populated in the for loop +DATA.add <- dplyr::bind_rows(DATA.add,data.frame(matrix(nrow = (length(paralog.idx)+Number_of_paralogs), ncol = ncol(DATA),dimnames = list(c(),colnames(DATA))),stringsAsFactors = F)) +DATA <- dplyr::bind_rows(DATA,DATA.add) +rm(DATA.add) ; gc() +# Beginning isolation of the paralogs +for(i in paralog.idx){ +Muts <- unlist(stringi::stri_split_fixed(DATA$AAChange.refGene[i],annotation_separator),use.names = F,recursive = F) +for (gene in unlist(stringi::stri_split_fixed(DATA$Gene.refGene[i],pattern = paralog_separator),use.names = F,recursive = F)){ +DATA[current.idx,] <- DATA[i,] +DATA$Gene.refGene[current.idx] <- gene +DATA$AAChange.refGene[current.idx] <- paste0(Muts[grep(gene,Muts,fixed = T)],collapse=",") +current.idx <- current.idx+1 +} +pb$tick(1) +} +# removing the original rows with the paralogs as they are all unparalogged now +DATA <- DATA[-paralog.idx,] +rownames(DATA) <- as.character(seq(1,length(DATA[,1]))) +return (DATA) +} +dependencies <- c("stringi") +missing_packages <- dependencies[!(dependencies %in% installed.packages()[, "Package"])] +if(length(missing_packages)) install.packages(missing_packages) +rm(missing_packages,dependencies) +MutSiteFind <- function(MutationColumn){ +# #<----------------------------> +# # Please include this section when distributing and/or using this code. +# # Please read and abide by the terms of the included LICENSE. +# # Copyright 2020, Deepankar Chakroborty +# # +# # Author : Deepankar Chakroborty (https://gitlab.utu.fi/deecha) +# # Report issues: https://gitlab.utu.fi/deecha/shared_scripts/-/issues +# # License: https://gitlab.utu.fi/deecha/shared_scripts/-/blob/master/LICENSE +# # +# # PURPOSE: +# # For a given vector of amino acid changes like A123T, V256F, E746_A750del +# # this function returns c(123, 256, 746) as amino acid positions of +# # the mutated residue. +# # In case of indels, it doesn't return the range!! (returns only the start position) +# # +# #<----------------------------> +return(unlist(x = stringi::stri_extract_first_regex(str = MutationColumn,pattern = "[[:digit:]]+"), use.names = F)) +} +MutSiteFind diff --git a/r.snippets.txt b/r.snippets.txt index 7626464..a92ef8c 100644 --- a/r.snippets.txt +++ b/r.snippets.txt @@ -9,6 +9,15 @@ snippet beginr rm(list = ls()) gc() library(data.table) + today <- format(Sys.Date(),format = "%Y-%m-%d") + slug <- "${2:SubFolderName}" + workingDirectory <- paste0(today,"_",slug) + setwd("~/ANALYSIS/") + ifelse(test = dir.exists(workingDirectory), + yes = message(paste0(workingDirectory," exists...")), + no = dir.create(workingDirectory)) + setwd(workingDirectory) + rm(today,workingDirectory,slug) ${0} snippet comment_date