mirror of
https://github.com/dchakro/shared_Rscripts.git
synced 2026-05-18 09:14:44 -07:00
62 lines
2.1 KiB
R
62 lines
2.1 KiB
R
|
|
|
|
CT_GA_count <- function(SampleID,Ref_Base,Alt_Base){
|
|
# #<---------------------------->
|
|
# # You must include this section when:
|
|
# # Distributing, Using and/or Modifying this code.
|
|
# # Please read and abide by the terms of the included LICENSE.
|
|
# # Copyright 2020, Deepankar Chakroborty, All rights reserved.
|
|
# #
|
|
# # Author : Deepankar Chakroborty (https://github.com/dchakro)
|
|
# # Report issues: https://github.com/dchakro/shared_Rscripts/issues
|
|
# # License: https://github.com/dchakro/shared_Rscripts/blob/master/LICENSE
|
|
# #<---------------------------->
|
|
|
|
# # PURPOSE:
|
|
# # This function takes in three vectors:
|
|
# # SampleID = Sample IDs
|
|
# # Ref_Base = Reference Base
|
|
# # Alt_Base = altered base that created the mutation.
|
|
|
|
# # And calculates the number of C > T and G > A changes are there (per sample)
|
|
# # The function returns a data frame listing the number of mutations (per sample):
|
|
# # SampleID = Sample ID
|
|
# # Total = Total number of mutations
|
|
# # CT = C > T changes
|
|
# # GA = G > A changes
|
|
# # Others = all other types of transitions and transversions combined.
|
|
|
|
MutMatrix <- data.frame(SampleID,
|
|
Ref_Base,
|
|
Alt_Base,
|
|
stringsAsFactors = F)
|
|
|
|
return.df <- data.frame(SampleID=NA,
|
|
Total=0,
|
|
CT=0,
|
|
GA=0,
|
|
Others=0)
|
|
|
|
for(SampleID in levels(MutMatrix$SampleID)){
|
|
set <- MutMatrix[ MutMatrix$SampleID == SampleID, ]
|
|
|
|
# if(dim(set)[1]==0){
|
|
# return.df <- rbind.data.frame(return.df,c(SampleID,0,0,0,0))
|
|
# next
|
|
# }
|
|
|
|
Total <- dim(set)[1]
|
|
|
|
CT <- dim(subset(set, set$Ref_Base == "C" & set$Alt_Base == "T"))[1]
|
|
|
|
GA <- dim(subset(set, set$Ref_Base == "G" & set$Alt_Base == "A"))[1]
|
|
|
|
Others=Total-CT-GA
|
|
|
|
return.df <- rbind.data.frame(return.df,list(SampleID,Total,CT,GA,Others),stringsAsFactors = F)
|
|
|
|
Total <- 0;CT <- 0;GA <- 0;Others <- 0 # re-initialize
|
|
rm(set)
|
|
}
|
|
return(return.df[-1,]) # Removes the first empty row
|
|
}
|