From cd68477b9d4e2216d8f28c1d6572eec1ca0a6deb Mon Sep 17 00:00:00 2001
From: Ayushi Agrawal
 <88406934+ayushi-agrawal-gladstone@users.noreply.github.com>
Date: Mon, 21 Mar 2022 11:46:20 -0700
Subject: [PATCH] Add files via upload

---
 intro-rna-seq/steps_on_wynton_session2.txt | 141 +++++++++++++++++++++
 1 file changed, 141 insertions(+)
 create mode 100644 intro-rna-seq/steps_on_wynton_session2.txt

diff --git a/intro-rna-seq/steps_on_wynton_session2.txt b/intro-rna-seq/steps_on_wynton_session2.txt
new file mode 100644
index 0000000..124ed86
--- /dev/null
+++ b/intro-rna-seq/steps_on_wynton_session2.txt
@@ -0,0 +1,141 @@
+#Commands to run on wynton in session 2 of the Intro to RNA-seq data analysis workshop
+
+#login to the wynton cluster
+{local}$ ssh alice@log2.wynton.ucsf.edu
+#enter your wynton password when prompted and hit enter
+
+#once you are logged in to wynton,
+#login to the development node
+[alice@log2 ~]$ ssh dev3 
+
+#go to the Intro_to_RNA-seq_data_analysis folder that we uploaded in session 1
+[alice@dev3 ~]$ cd Intro_to_RNA-seq_data_analysis/
+
+#list the contents of the Intro_to_RNA-seq_data_analysis folder
+#the singularity container "rna_seq_container.sif" that we created in session 1 should appear in the result
+[alice@dev3 Intro_to_RNA-seq_data_analysis]$ ls
+
+#run fastqc on the Bacteria_GATTACA_L001_R1_001.fastq file using the singularity container
+[alice@dev3 Intro_to_RNA-seq_data_analysis]$ singularity exec rna_seq_container.sif fastqc Bacteria_GATTACA_L001_R1_001.fastq
+
+#once the above command completes running,
+#check the output - there should be 2 output files
+#   1. Bacteria_GATTACA_L001_R1_001_fastqc.html
+#   2. Bacteria_GATTACA_L001_R1_001_fastqc.zip
+#singularity container is just another way of running the same tool 
+#so the output files should match the ones we got when we ran fastqc without singularity container in session 1
+[alice@dev3 Intro_to_RNA-seq_data_analysis]$ ls
+
+#download the fastqc html result file from wynton to the Downloads folder on local computer 
+#open a new terminal (MacOS) or command prompt (Windows) window 
+{local}$ scp alice@dt2.wynton.ucsf.edu:~/Intro_to_RNA-seq_data_analysis/Bacteria_GATTACA_L001_R1_001_fastqc.html Downloads
+
+#once the above command completes running, open the downloaded Bacteria_GATTACA_L001_R1_001_fastqc.html file 
+#inspect the fastqc html file for different QC parameters 
+#you will notice adapter content reported in the fastqc html file
+#so now, we will trim the reads in the fastq file to remove these adapters using cutadapt
+
+#go back to the terminal (MacOS) or command prompt (Windows) window where you are logged in to wynton
+#run cutadapt to trim the adapaters from the Bacteria_GATTACA_L001_R1_001.fastq file using the singularity container
+[alice@dev3 Intro_to_RNA-seq_data_analysis]$ singularity exec rna_seq_container.sif cutadapt \
+-a file:Adapter_Sequence.fasta \
+-o trimmed.fastq \
+--minimum-length 20 \
+--quality-cutoff 20 \
+Bacteria_GATTACA_L001_R1_001.fastq 
+
+#once the above command completes running,
+#check the output - there should be a trimmed.fastq output file
+#the trimmed.fastq file is the trimmed fastq file generated by cutadapt after removing the adapter content
+[alice@dev3 Intro_to_RNA-seq_data_analysis]$ ls
+
+#run fastqc on the trimmed.fastq file using the singularity container to check if the adapter content is gone
+[alice@dev3 Intro_to_RNA-seq_data_analysis]$ singularity exec rna_seq_container.sif fastqc trimmed.fastq
+
+#once the above command completes running,
+#check the output - there should be 2 output files
+#   1. trimmed_fastqc.html
+#   2. trimmed_fastqc.zip
+[alice@dev3 Intro_to_RNA-seq_data_analysis]$ ls
+
+#go back to the terminal (MacOS) or command prompt (Windows) window where you are on your local computer
+#download the fastqc html result file from wynton to the Downloads folder on local computer 
+{local}$ scp alice@dt2.wynton.ucsf.edu:~/Intro_to_RNA-seq_data_analysis/trimmed_fastqc.html Downloads
+
+#open and inpect the trimmed_fastqc.html file - the adapter content should be gone
+#if all looks good in the html file, then run STAR to align the trimmed reads to the reference genome
+
+#Before we can map reads to the reference genome using STAR, 
+#we need to index it. This will generate a transformed version of the 
+#genome that allows STAR to efficiently map sequences to it.
+#go back to the terminal (MacOS) or command prompt (Windows) window where you are logged in to wynton
+#run the below command to make a folder to store the star index files
+[alice@dev3 Intro_to_RNA-seq_data_analysis]$ mkdir star_index
+
+#run the below command to generate the star index
+[alice@dev3 Intro_to_RNA-seq_data_analysis]$ singularity exec rna_seq_container.sif STAR \
+--runMode genomeGenerate \
+--genomeDir ./star_index \
+--genomeFastaFiles rDNA_sequence.fasta \
+--genomeSAindexNbases 3
+
+#once the above command completes running,
+#check the contents of the star_index folder - there should be 8 output files
+#   1. chrLength.txt  
+#   2. chrNameLength.txt
+#   3. chrName.txt
+#   4. chrStart.txt
+#   5. Genome
+#   6. genomeParameters.txt
+#   7. SA
+#   8. SAindex
+#for more details on STAR go to: 
+#https://physiology.med.cornell.edu/faculty/skrabanek/lab/angsd/lecture_notes/STARmanual.pdf
+[alice@dev3 Intro_to_RNA-seq_data_analysis]$ ls star_index/
+
+#run the star alignment on the trimmed.fastq file using the star index generated in the previous commands
+[alice@dev3 Intro_to_RNA-seq_data_analysis]$ singularity exec rna_seq_container.sif STAR \
+--genomeDir ./star_index \
+--readFilesIn ./trimmed.fastq
+
+#once the above command completes running,
+#check the output - there should be 5 output files
+#   1. Log.final.out - a summary of mapping statistics for the sample
+#   2. Aligned.out.sam - the aligned reads, in SAM format
+#   3. Log.out - a running log from STAR, with information about the run
+#   4. Log.progress.out - job progress with the number of processed reads, % of mapped reads etc., updated every ~1 minute
+#   5. SJ.out.tab - high confidence collapsed splice junctions in tab-delimited format. Only junctions supported by uniquely mapping reads are reported
+#for more details on STAR go to: 
+#https://physiology.med.cornell.edu/faculty/skrabanek/lab/angsd/lecture_notes/STARmanual.pdf
+[alice@dev3 Intro_to_RNA-seq_data_analysis]$ ls 
+
+#look at the contents of the Log.final.out file to check the mapping statistics for the sample 
+[alice@dev3 Intro_to_RNA-seq_data_analysis]$ less Intro_to_RNA-seq_data_analysis/Log.final.out 
+
+#if the mapping statistics look good then run featureCounts to generate the read count matrices
+#if the mapping statistics donot look good then troubleshoot the issues by seeking help online or contacting the Gladstone Bioinformatics Core
+#run the featureCounts on the Aligned.out.sam file
+[alice@dev3 Intro_to_RNA-seq_data_analysis]$ singularity exec rna_seq_container.sif featureCounts \
+-a rDNA.gtf \
+-t CDS \
+-o counts.txt \
+Aligned.out.sam
+
+#once the above command completes running,
+#check the output - there should be 2 output files
+#   1. counts.txt - the counts matrix with genes as rows and samples as columns. 
+#                   there might be additional columns for gene related information( e.g., start position, end position, strand etc.) 
+#   2. counts.txt.summary - tabulates how many the reads were “assigned” or counted and the reason they remained “unassigned”
+[alice@dev3 Intro_to_RNA-seq_data_analysis]$ ls 
+
+#look at the contents of the counts.txt.summary file to check the number of reads assigned to a gene
+#make sure most of the reads are assigned to a gene, if not then try to troubleshoot by seeking help online or contacting the Gladstone Bioinformatics Core
+[alice@dev3 Intro_to_RNA-seq_data_analysis]$ less counts.txt.summary
+
+#the counts.txt file can be used for further analysis (such as differential gene expression)
+#go back to the terminal (MacOS) or command prompt (Windows) window where you are on your local computer
+#download the counts.txt file from wynton to the Downloads folder on local computer 
+{local}$ scp alice@dt2.wynton.ucsf.edu:~/Intro_to_RNA-seq_data_analysis/counts.txt Downloads
+
+
+############## END SESSION 2 ##############