shared_Rscripts/CT_GA_count.R
2020-08-25 07:44:54 +03:00

62 lines
2.1 KiB
R

CT_GA_count <- function(SampleID,Ref_Base,Alt_Base){
# #<---------------------------->
# # You must include this section when:
# # Distributing, Using and/or Modifying this code.
# # Please read and abide by the terms of the included LICENSE.
# # Copyright 2020, Deepankar Chakroborty, All rights reserved.
# #
# # Author : Deepankar Chakroborty (https://github.com/dchakro)
# # Report issues: https://github.com/dchakro/shared_Rscripts/issues
# # License: https://github.com/dchakro/shared_Rscripts/blob/master/LICENSE
# #<---------------------------->
# # PURPOSE:
# # This function takes in three vectors:
# # SampleID = Sample IDs
# # Ref_Base = Reference Base
# # Alt_Base = altered base that created the mutation.
# # And calculates the number of C > T and G > A changes are there (per sample)
# # The function returns a data frame listing the number of mutations (per sample):
# # SampleID = Sample ID
# # Total = Total number of mutations
# # CT = C > T changes
# # GA = G > A changes
# # Others = all other types of transitions and transversions combined.
MutMatrix <- data.frame(SampleID,
Ref_Base,
Alt_Base,
stringsAsFactors = F)
return.df <- data.frame(SampleID=NA,
Total=0,
CT=0,
GA=0,
Others=0)
for(SampleID in levels(MutMatrix$SampleID)){
set <- MutMatrix[ MutMatrix$SampleID == SampleID, ]
# if(dim(set)[1]==0){
# return.df <- rbind.data.frame(return.df,c(SampleID,0,0,0,0))
# next
# }
Total <- dim(set)[1]
CT <- dim(subset(set, set$Ref_Base == "C" & set$Alt_Base == "T"))[1]
GA <- dim(subset(set, set$Ref_Base == "G" & set$Alt_Base == "A"))[1]
Others=Total-CT-GA
return.df <- rbind.data.frame(return.df,list(SampleID,Total,CT,GA,Others),stringsAsFactors = F)
Total <- 0;CT <- 0;GA <- 0;Others <- 0 # re-initialize
rm(set)
}
return(return.df[-1,]) # Removes the first empty row
}