mirror of
https://github.com/gladstone-institutes/Bioinformatics-Workshops.git
synced 2025-11-30 09:45:43 -08:00
Adding script with bash commands
This commit is contained in:
parent
08be3b61c3
commit
91f4d9e4b7
1 changed files with 134 additions and 0 deletions
134
whole-genome-analysis/script.sh
Normal file
134
whole-genome-analysis/script.sh
Normal file
|
|
@ -0,0 +1,134 @@
|
|||
# analysis.sh
|
||||
# C: Jul 8, 2019
|
||||
# M: Feb 26, 2020
|
||||
# A: Leandro Lima <leandro.lima@gladstone.ucsf.edu>
|
||||
|
||||
|
||||
########################################
|
||||
## Variables with tools and databases ##
|
||||
########################################
|
||||
|
||||
|
||||
REF_GENOME=/root/resources/chr19.fa
|
||||
DATA_DIR=/root/data/
|
||||
GATK_DIR=/root/tools/GenomeAnalysisTK-3.8-1-0
|
||||
|
||||
|
||||
#############
|
||||
## Mapping ##
|
||||
#############
|
||||
|
||||
|
||||
cd /root/analysis
|
||||
|
||||
|
||||
# BWA - http://bio-bwa.sourceforge.net/bwa.shtml
|
||||
|
||||
bwa mem $REF_GENOME $DATA_DIR/patient3.1.fastq.gz $DATA_DIR/patient3.2.fastq.gz > patient3.sam
|
||||
bwa mem $REF_GENOME $DATA_DIR/patient4.1.fastq.gz $DATA_DIR/patient4.2.fastq.gz > patient4.sam
|
||||
bwa mem $REF_GENOME $DATA_DIR/patient5.1.fastq.gz $DATA_DIR/patient5.2.fastq.gz > patient5.sam
|
||||
|
||||
|
||||
# Picard - https://broadinstitute.github.io/picard/command-line-overview.html#Overview
|
||||
|
||||
# SAM to BAM, with read group information
|
||||
for pat_id in patient3 patient4 patient5; do
|
||||
java -jar /root/tools/picard.jar AddOrReplaceReadGroups \
|
||||
I=$pat_id.sam \
|
||||
O=$pat_id.bam \
|
||||
RGID=4 \
|
||||
RGLB=lib1 \
|
||||
RGPL=illumina \
|
||||
RGPU=unit1 \
|
||||
RGSM=$pat_id
|
||||
done
|
||||
|
||||
|
||||
# Samtools - http://www.htslib.org/doc/samtools.html
|
||||
|
||||
# Sort
|
||||
samtools sort patient3.bam -o patient3.sort.bam
|
||||
samtools sort patient4.bam -o patient4.sort.bam
|
||||
samtools sort patient5.bam -o patient5.sort.bam
|
||||
|
||||
# Create index
|
||||
samtools index patient3.sort.bam
|
||||
samtools index patient4.sort.bam
|
||||
samtools index patient5.sort.bam
|
||||
|
||||
|
||||
|
||||
# Check file sizes
|
||||
ls -lh
|
||||
|
||||
# Remove SAM file
|
||||
|
||||
# Check some flags (number of mapped/unmapped reads, etc.)
|
||||
|
||||
|
||||
#####################
|
||||
## Variant Calling ##
|
||||
#####################
|
||||
|
||||
|
||||
# GATK Haplotype Caller - https://gatk.broadinstitute.org/hc/en-us/articles/360037225632-HaplotypeCaller
|
||||
|
||||
GATK_DIR=/root/tools/GenomeAnalysisTK-3.8-1-0
|
||||
|
||||
for pat_id in patient3 patient4 patient5; do
|
||||
java -jar $GATK_DIR/GenomeAnalysisTK.jar \
|
||||
-T HaplotypeCaller \
|
||||
-R $REF_GENOME \
|
||||
-I $pat_id.sort.bam \
|
||||
-o $pat_id.gatk.g.vcf \
|
||||
--emitRefConfidence GVCF
|
||||
done
|
||||
|
||||
|
||||
# GATK Joint Genotyping
|
||||
java -jar $GATK_DIR/GenomeAnalysisTK.jar \
|
||||
-T GenotypeGVCFs \
|
||||
-R $REF_GENOME \
|
||||
--variant patient3.gatk.g.vcf \
|
||||
--variant patient4.gatk.g.vcf \
|
||||
--variant patient5.gatk.g.vcf \
|
||||
-o all_patients.vcf
|
||||
|
||||
|
||||
################
|
||||
## Annotation ##
|
||||
################
|
||||
|
||||
|
||||
# snpEff - http://snpeff.sourceforge.net/SnpEff_manual.html
|
||||
|
||||
SNPEFF_DIR=/root/tools/snpEff
|
||||
# snpEff
|
||||
java -Xmx4g -jar $SNPEFF_DIR/snpEff.jar \
|
||||
-v -stats all_patients.html \
|
||||
GRCh38.86 all_patients.vcf > all_patients.ann.vcf
|
||||
|
||||
# Adding ID field from dbSNP
|
||||
java -jar $SNPEFF_DIR/SnpSift.jar annotate \
|
||||
-id /root/resources/dbSNP_chr19.vcf.gz \
|
||||
all_patients.ann.vcf > all_patients.dbSnp.vcf
|
||||
|
||||
rm all_patients.ann.vcf
|
||||
|
||||
|
||||
###################
|
||||
## APOE analysis ##
|
||||
###################
|
||||
|
||||
|
||||
# SnpSift extractFields - http://snpeff.sourceforge.net/SnpSift.html#Extract
|
||||
|
||||
# Extracting gene name, rsID and genotypes
|
||||
cat all_patients.dbSnp.vcf | grep CHROM | cut -f1-5,8,10- > APOE_status.txt
|
||||
java -jar /root/tools/snpEff/SnpSift.jar \
|
||||
extractFields \
|
||||
all_patients.dbSnp.vcf \
|
||||
CHROM POS ID REF ALT EFF[0].GENE GEN[*].GT \
|
||||
| grep -E 'rs429358|rs7412' >> APOE_status.txt
|
||||
|
||||
column -t APOE_status.txt
|
||||
Loading…
Add table
Add a link
Reference in a new issue