mirror of
https://github.com/dchakro/shared_Rscripts.git
synced 2026-05-17 09:12:16 -07:00
updates
This commit is contained in:
parent
79c0931dc5
commit
584ae98360
2 changed files with 82 additions and 0 deletions
73
.Rhistory
Normal file
73
.Rhistory
Normal file
|
|
@ -0,0 +1,73 @@
|
||||||
|
unparalog <- function(DATA, paralog_separator = ";", annotation_separator = ",", GeneColName , AnnotationColName ){
|
||||||
|
# Installing missing dependencies
|
||||||
|
dependencies <- c("stringi", "progress")
|
||||||
|
missing_packages <- dependencies[!(dependencies %in% installed.packages()[, "Package"])]
|
||||||
|
if(length(missing_packages)) install.packages(missing_packages)
|
||||||
|
# Sanity checks
|
||||||
|
check_paralog_sep <- !any(stringi::stri_detect_fixed(str = DATA$Gene.refGene,pattern = paralog_separator))
|
||||||
|
check_annotation_sep <- !any(stringi::stri_detect_fixed(str = DATA$AAChange.refGene, pattern = annotation_separator))
|
||||||
|
idx_Gene <- which(colnames(DATA) == GeneColName)
|
||||||
|
idx_Annotation <- which(colnames(DATA) == AnnotationColName)
|
||||||
|
if(length(idx_Gene) == 0 | length(idx_Annotation) == 0 | check_paralog_sep | check_annotation_sep) {
|
||||||
|
message(stringi::stri_c("You entered :-->",
|
||||||
|
"\nparalog_separator = ",paralog_separator,
|
||||||
|
"\nannotation_separator = ",annotation_separator,
|
||||||
|
"\nGeneColName = ", GeneColName,
|
||||||
|
"\nAnnotationColName = ",AnnotationColName))
|
||||||
|
stop("Inconsistencies with these input parameters.\n Ensure they are correct and try again.")
|
||||||
|
}
|
||||||
|
# Cleanup
|
||||||
|
rm(missing_packages,dependencies,check_paralog_sep,check_annotation_sep)
|
||||||
|
gc()
|
||||||
|
current.idx <- nrow(DATA)+1
|
||||||
|
paralog.idx <- which(stringi::stri_detect_fixed(str = DATA$Gene.refGene,pattern = paralog_separator))
|
||||||
|
pb <- progress::progress_bar$new(total=length(paralog.idx),format = " [:bar] :current/:total (:percent)",); pb$tick(0)
|
||||||
|
Number_of_paralogs <- sum(stringi::stri_count_fixed(str = DATA$Gene.refGene,pattern = paralog_separator))
|
||||||
|
message(stringi::stri_c("There are ",length(paralog.idx)," annotations with ", Number_of_paralogs," paralogs."))
|
||||||
|
# copying structure of original DATA
|
||||||
|
DATA.add <- DATA[1,]; DATA.add <- DATA.add[-1,]
|
||||||
|
# Adding the empty rows to original table.
|
||||||
|
# These rows will be populated in the for loop
|
||||||
|
DATA.add <- dplyr::bind_rows(DATA.add,data.frame(matrix(nrow = (length(paralog.idx)+Number_of_paralogs), ncol = ncol(DATA),dimnames = list(c(),colnames(DATA))),stringsAsFactors = F))
|
||||||
|
DATA <- dplyr::bind_rows(DATA,DATA.add)
|
||||||
|
rm(DATA.add) ; gc()
|
||||||
|
# Beginning isolation of the paralogs
|
||||||
|
for(i in paralog.idx){
|
||||||
|
Muts <- unlist(stringi::stri_split_fixed(DATA$AAChange.refGene[i],annotation_separator),use.names = F,recursive = F)
|
||||||
|
for (gene in unlist(stringi::stri_split_fixed(DATA$Gene.refGene[i],pattern = paralog_separator),use.names = F,recursive = F)){
|
||||||
|
DATA[current.idx,] <- DATA[i,]
|
||||||
|
DATA$Gene.refGene[current.idx] <- gene
|
||||||
|
DATA$AAChange.refGene[current.idx] <- paste0(Muts[grep(gene,Muts,fixed = T)],collapse=",")
|
||||||
|
current.idx <- current.idx+1
|
||||||
|
}
|
||||||
|
pb$tick(1)
|
||||||
|
}
|
||||||
|
# removing the original rows with the paralogs as they are all unparalogged now
|
||||||
|
DATA <- DATA[-paralog.idx,]
|
||||||
|
rownames(DATA) <- as.character(seq(1,length(DATA[,1])))
|
||||||
|
return (DATA)
|
||||||
|
}
|
||||||
|
dependencies <- c("stringi")
|
||||||
|
missing_packages <- dependencies[!(dependencies %in% installed.packages()[, "Package"])]
|
||||||
|
if(length(missing_packages)) install.packages(missing_packages)
|
||||||
|
rm(missing_packages,dependencies)
|
||||||
|
MutSiteFind <- function(MutationColumn){
|
||||||
|
# #<---------------------------->
|
||||||
|
# # Please include this section when distributing and/or using this code.
|
||||||
|
# # Please read and abide by the terms of the included LICENSE.
|
||||||
|
# # Copyright 2020, Deepankar Chakroborty
|
||||||
|
# #
|
||||||
|
# # Author : Deepankar Chakroborty (https://gitlab.utu.fi/deecha)
|
||||||
|
# # Report issues: https://gitlab.utu.fi/deecha/shared_scripts/-/issues
|
||||||
|
# # License: https://gitlab.utu.fi/deecha/shared_scripts/-/blob/master/LICENSE
|
||||||
|
# #
|
||||||
|
# # PURPOSE:
|
||||||
|
# # For a given vector of amino acid changes like A123T, V256F, E746_A750del
|
||||||
|
# # this function returns c(123, 256, 746) as amino acid positions of
|
||||||
|
# # the mutated residue.
|
||||||
|
# # In case of indels, it doesn't return the range!! (returns only the start position)
|
||||||
|
# #
|
||||||
|
# #<---------------------------->
|
||||||
|
return(unlist(x = stringi::stri_extract_first_regex(str = MutationColumn,pattern = "[[:digit:]]+"), use.names = F))
|
||||||
|
}
|
||||||
|
MutSiteFind
|
||||||
|
|
@ -9,6 +9,15 @@ snippet beginr
|
||||||
rm(list = ls())
|
rm(list = ls())
|
||||||
gc()
|
gc()
|
||||||
library(data.table)
|
library(data.table)
|
||||||
|
today <- format(Sys.Date(),format = "%Y-%m-%d")
|
||||||
|
slug <- "${2:SubFolderName}"
|
||||||
|
workingDirectory <- paste0(today,"_",slug)
|
||||||
|
setwd("~/ANALYSIS/")
|
||||||
|
ifelse(test = dir.exists(workingDirectory),
|
||||||
|
yes = message(paste0(workingDirectory," exists...")),
|
||||||
|
no = dir.create(workingDirectory))
|
||||||
|
setwd(workingDirectory)
|
||||||
|
rm(today,workingDirectory,slug)
|
||||||
${0}
|
${0}
|
||||||
|
|
||||||
snippet comment_date
|
snippet comment_date
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue