mirror of
https://github.com/dchakro/shared_Rscripts.git
synced 2026-05-17 09:12:16 -07:00
38 lines
No EOL
1.6 KiB
R
38 lines
No EOL
1.6 KiB
R
# Installing missing dependencies
|
|
dependencies <- c("stringi")
|
|
missing_packages <- dependencies[!(dependencies %in% installed.packages()[, "Package"])]
|
|
if(length(missing_packages)) install.packages(missing_packages)
|
|
rm(missing_packages,dependencies)
|
|
|
|
parse_IUPAC_AAchange <- function(MutationColumn){
|
|
# #<---------------------------->
|
|
# # You must include this section when:
|
|
# # Distributing, Using and/or Modifying this code.
|
|
# # Please read and abide by the terms of the included LICENSE.
|
|
# # Copyright 2020, Deepankar Chakroborty, All rights reserved.
|
|
# #
|
|
# # Author : Deepankar Chakroborty (https://github.com/dchakro)
|
|
# # Website: https://www.dchakro.com
|
|
# # Report issues: https://github.com/dchakro/shared_Rscripts/issues
|
|
# # License: https://github.com/dchakro/shared_Rscripts/blob/master/LICENSE
|
|
# #<---------------------------->
|
|
|
|
|
|
# # PURPOSE:
|
|
# # For a given vector of amino acid changes like A123T, V256F, E746_A750del
|
|
# # this function returns a data frame with REF, Pos and ALT amino acids.
|
|
|
|
# # USAGE:
|
|
# # captureDF <- parse_IUPAC_AAchange(MutationColumn)
|
|
|
|
if(any(grep(pattern = "^p.",x = MutationColumn))){
|
|
MutationColumn <- gsub("p.", "", MutationColumn, fixed = T)
|
|
}
|
|
|
|
MutationColumn <- gsub("*", "X", MutationColumn, fixed = T)
|
|
AAPos <- stringi::stri_extract_first_regex(str = MutationColumn,pattern = "[0-9]+")
|
|
REF_AA <- stringi::stri_extract_first_regex(str = MutationColumn,pattern = "[ACDEFGHIKLMNPQRSTVWYX]+")
|
|
ALT_AA <- stringi::stri_extract_last_regex(str = MutationColumn,pattern = "[ACDEFGHIKLMNPQRSTVWYX]+")
|
|
return(data.frame(REF_AA,AAPos=as.numeric(AAPos),ALT_AA))
|
|
|
|
} |