mirror of
https://github.com/gladstone-institutes/Bioinformatics-Workshops.git
synced 2025-11-30 09:45:43 -08:00
update intro to unix for Feb 2025
This commit is contained in:
parent
03f07124d1
commit
f3bcc16916
8 changed files with 96 additions and 109 deletions
|
|
@ -2923,7 +2923,7 @@ types/structures</strong> (ex. nested lists)</li>
|
|||
<section id="min-break" class="title-slide slide level1">
|
||||
<h1>10 min break</h1>
|
||||
<center>
|
||||
<div class="countdown" id="timer_d165775b" data-update-every="1" tabindex="0" style="right:0;bottom:0;margin:5%;padding:50px;font-size:5em;position: relative; width: min-content;">
|
||||
<div class="countdown" id="timer_45c35f34" data-update-every="1" tabindex="0" style="right:0;bottom:0;margin:5%;padding:50px;font-size:5em;position: relative; width: min-content;">
|
||||
<div class="countdown-controls"><button class="countdown-bump-down">−</button><button class="countdown-bump-up">+</button></div>
|
||||
<code class="countdown-time"><span class="countdown-digits minutes">10</span><span class="countdown-digits colon">:</span><span class="countdown-digits seconds">00</span></code>
|
||||
</div>
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
|
|
@ -4,7 +4,7 @@
|
|||
<meta charset="utf-8">
|
||||
<meta name="generator" content="pandoc">
|
||||
<meta name="author" content="Natalie Elphick" />
|
||||
<title>Introduction to Unix Command-line - Part 2</title>
|
||||
<title>Introduction to Unix Command-line</title>
|
||||
<meta name="apple-mobile-web-app-capable" content="yes">
|
||||
<meta name="apple-mobile-web-app-status-bar-style" content="black-translucent">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no, minimal-ui">
|
||||
|
|
@ -1878,9 +1878,10 @@ document.addEventListener('DOMContentLoaded', function(e) {
|
|||
<div class="slides">
|
||||
|
||||
<section>
|
||||
<h1 class="title">Introduction to Unix Command-line - Part 2</h1>
|
||||
<h1 class="title">Introduction to Unix Command-line</h1>
|
||||
<h1 class="subtitle">Part 2</h1>
|
||||
<h2 class="author">Natalie Elphick</h2>
|
||||
<h3 class="date">March 12th 2024</h3>
|
||||
<h3 class="date">February 11th 2025</h3>
|
||||
</section>
|
||||
|
||||
<section id="section" class="slide level2">
|
||||
|
|
@ -1892,7 +1893,7 @@ document.addEventListener('DOMContentLoaded', function(e) {
|
|||
<section id="introductions" class="slide level2">
|
||||
<h2>Introductions</h2>
|
||||
<p><strong>Natalie Elphick</strong><br />
|
||||
Bioinformatician I</p>
|
||||
Bioinformatician II</p>
|
||||
<p><br></p>
|
||||
<p><strong>Yihang Xin (TA)</strong><br />
|
||||
Software Engineer III</p>
|
||||
|
|
@ -1903,11 +1904,11 @@ Software Engineer III</p>
|
|||
<p>Run the following commands if you did not attend part 1:</p>
|
||||
<pre class="text"><code>mkdir unix_workshop</code></pre>
|
||||
<pre class="text"><code>cd unix_workshop</code></pre>
|
||||
<pre class="text"><code>curl -L -o unix_workshop_2024.tar.gz 'https://www.dropbox.com/scl/fi/o8msrl3a1k986jvjll4mv/unix_workshop_2024.tar.gz?rlkey=m7jfkvpz0iq12zdzphq7013l5&dl=0'
|
||||
<pre class="text"><code>curl -L -o unix_workshop.tar.gz 'https://www.dropbox.com/scl/fi/tdzpoivf7mienlenunqhf/unix_workshop.tar.gz?rlkey=6bfxnqgc5n4lgc9mc80ld75z4&dl=0'
|
||||
</code></pre>
|
||||
<pre class="text"><code>tar -xzf unix_workshop_2024.tar.gz</code></pre>
|
||||
<pre class="text"><code>cd unix_workshop_2024</code></pre>
|
||||
<pre class="text"><code>curl -o part_2/homo_sapiens.refseq.tsv.gz https://ftp.ensembl.org/pub/current_tsv/homo_sapiens/Homo_sapiens.GRCh38.111.refseq.tsv.gz</code></pre>
|
||||
<pre class="text"><code>tar -xzf unix_workshop.tar.gz</code></pre>
|
||||
<pre class="text"><code>cd unix_workshop</code></pre>
|
||||
<pre class="text"><code>curl -o part_2/homo_sapiens.refseq.tsv.gz https://ftp.ensembl.org/pub/current_tsv/homo_sapiens/Homo_sapiens.GRCh38.113.refseq.tsv.gz</code></pre>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
|
|
@ -1937,7 +1938,7 @@ du -h part_2/homo_sapiens.refseq.tsv</code></pre>
|
|||
</ul>
|
||||
<pre class="text"><code>gzip part_2/homo_sapiens.refseq.tsv
|
||||
du -h part_2/homo_sapiens.refseq.tsv.gz</code></pre>
|
||||
<pre><code>3.2M part_2/homo_sapiens.refseq.tsv.gz</code></pre>
|
||||
<pre><code>3.3M part_2/homo_sapiens.refseq.tsv.gz</code></pre>
|
||||
<ul>
|
||||
<li>Compressing it makes it a 10th of the size</li>
|
||||
</ul>
|
||||
|
|
@ -1962,9 +1963,9 @@ compressed archive files</li>
|
|||
<pre class="text"><code>tar -czf part_1.tar.gz part_1
|
||||
ls -l</code></pre>
|
||||
<pre><code>total 8
|
||||
drwx---rw-@ 4 nelphick staff 128 Mar 12 09:36 part_1
|
||||
-rw-r--r-- 1 nelphick staff 803 Mar 12 12:52 part_1.tar.gz
|
||||
drwxr-xr-x@ 4 nelphick staff 128 Mar 12 12:52 part_2</code></pre>
|
||||
drwx---rw-@ 4 nelphick staff 128 Feb 10 11:16 part_1
|
||||
-rw-r--r-- 1 nelphick staff 801 Feb 10 11:16 part_1.tar.gz
|
||||
drwxr-xr-x@ 4 nelphick staff 128 Feb 10 11:16 part_2</code></pre>
|
||||
<ul>
|
||||
<li>-c: create a new archive</li>
|
||||
<li>-f: specify the name of the archive file</li>
|
||||
|
|
@ -1992,7 +1993,6 @@ can use <code>gunzip -c</code></li>
|
|||
</ul>
|
||||
<pre class="text"><code>gunzip -c part_2/homo_sapiens.refseq.tsv.gz | head</code></pre>
|
||||
<pre><code>gene_stable_id transcript_stable_id protein_stable_id xref db_name info_type source_identity xref_identity linkage_type
|
||||
ENSG00000228037 ENST00000424215 - NR_121638 RefSeq_ncRNA DIRECT - - -
|
||||
ENSG00000142611 ENST00000378391 ENSP00000367643 NP_955533 RefSeq_peptide DIRECT 100 100 -
|
||||
ENSG00000142611 ENST00000378391 ENSP00000367643 NM_199454 RefSeq_mRNA DIRECT 99 62 -
|
||||
ENSG00000142611 ENST00000270722 ENSP00000270722 NP_071397 RefSeq_peptide DIRECT 100 100 -
|
||||
|
|
@ -2000,7 +2000,8 @@ ENSG00000142611 ENST00000270722 ENSP00000270722 NM_022114 RefSeq_mRNA DIRECT
|
|||
ENSG00000157911 ENST00000288774 ENSP00000288774 NP_001361354 RefSeq_peptide INFERRED_PAIR - - -
|
||||
ENSG00000157911 ENST00000288774 ENSP00000288774 NP_001361355 RefSeq_peptide INFERRED_PAIR - - -
|
||||
ENSG00000157911 ENST00000288774 ENSP00000288774 NP_722540 RefSeq_peptide DIRECT 100 100 -
|
||||
ENSG00000157911 ENST00000288774 ENSP00000288774 NM_001374425 RefSeq_mRNA DIRECT 99 100 -</code></pre>
|
||||
ENSG00000157911 ENST00000288774 ENSP00000288774 NM_001374425 RefSeq_mRNA DIRECT 99 100 -
|
||||
ENSG00000157911 ENST00000288774 ENSP00000288774 NM_001374426 RefSeq_mRNA DIRECT 94 92 -</code></pre>
|
||||
</section></section>
|
||||
<section>
|
||||
<section id="system-variables" class="title-slide slide level1">
|
||||
|
|
@ -2110,13 +2111,13 @@ interpreter is</li>
|
|||
<li>By default, files are not executable</li>
|
||||
</ul>
|
||||
<pre class="text"><code>ls -l part_2/example_script.sh</code></pre>
|
||||
<pre><code>-rw-r--r-- 1 nelphick staff 287 Mar 12 12:52 part_2/example_script.sh</code></pre>
|
||||
<pre><code>-rw-r--r-- 1 nelphick staff 287 Feb 10 11:16 part_2/example_script.sh</code></pre>
|
||||
<ul>
|
||||
<li>We can set the execute bit like this</li>
|
||||
</ul>
|
||||
<pre class="text"><code>chmod u+x part_2/example_script.sh
|
||||
ls -l part_2/example_script.sh</code></pre>
|
||||
<pre><code>-rwxr--r-- 1 nelphick staff 287 Mar 12 12:52 part_2/example_script.sh</code></pre>
|
||||
<pre><code>-rwxr--r-- 1 nelphick staff 287 Feb 10 11:16 part_2/example_script.sh</code></pre>
|
||||
</section>
|
||||
<section id="example" class="slide level2">
|
||||
<h2>Example</h2>
|
||||
|
|
@ -2136,7 +2137,7 @@ echo "There are $u_genes unique genes in $1"</code></pre>
|
|||
<h2>Let’s run it</h2>
|
||||
<pre class="text"><code>./part_2/example_script.sh part_2/homo_sapiens.refseq.tsv.gz</code></pre>
|
||||
<pre><code>Counting the genes in part_2/homo_sapiens.refseq.tsv.gz
|
||||
There are 33338 unique genes in part_2/homo_sapiens.refseq.tsv.gz</code></pre>
|
||||
There are 36353 unique genes in part_2/homo_sapiens.refseq.tsv.gz</code></pre>
|
||||
</section>
|
||||
<section id="loops" class="slide level2">
|
||||
<h2>Loops</h2>
|
||||
|
|
@ -2270,7 +2271,7 @@ files</li>
|
|||
awk -F '\t' '$5 == "RefSeq_mRNA" {sum += $7; count++} \
|
||||
END {print sum / count}'
|
||||
</code></pre>
|
||||
<pre><code>64.1533</code></pre>
|
||||
<pre><code>64.2653</code></pre>
|
||||
</section>
|
||||
<section id="resources-for-learning-awk-and-sed" class="slide level2">
|
||||
<h2>Resources for learning AWK and sed</h2>
|
||||
|
|
@ -2314,18 +2315,15 @@ Line</a></li>
|
|||
</section>
|
||||
<section id="upcoming-data-science-training-program-workshops" class="slide level2">
|
||||
<h2>Upcoming Data Science Training Program Workshops</h2>
|
||||
<p><a href="https://gladstone.org/events/introduction-pathway-analysis-1">Introduction
|
||||
to Pathway Analysis</a><br />
|
||||
April 2, 2024 1:00-4:00pm PDT</p>
|
||||
<p><a href="https://gladstone.org/events/statistics-enrichment-analysis-methods-0">Statistics
|
||||
of Enrichment Analysis Methods</a><br />
|
||||
April 11-April 12, 2024 1:00-3:00pm PDT</p>
|
||||
<p><a href="https://gladstone.org/events/working-wynton">Working on
|
||||
Wynton</a><br />
|
||||
April 15, 2024 1:00-4:00pm PDT</p>
|
||||
<p><a href="https://gladstone.org/events/introduction-linear-mixed-effects-models">Introduction
|
||||
to Linear Mixed Effects Models</a><br />
|
||||
April 25-April 26, 2024 1:00-3:00pm PDT</p>
|
||||
<p><a href="https://gladstone.org/events/introduction-rna-seq-analysis-8">Introduction
|
||||
to RNA-Seq Analysis</a><br />
|
||||
February 13-February 14, 2025 1:00-4:00pm PST</p>
|
||||
<p><a href="https://gladstone.org/events/intermediate-rna-seq-analysis-using-r-6">Intermediate
|
||||
RNA-Seq Analysis Using R</a><br />
|
||||
February 20, 2025 9:00am-12:00pm PST</p>
|
||||
<p><a href="https://gladstone.org/events/introduction-statistics-experimental-design-and-hypothesis-testing-2">Introduction
|
||||
to Statistics, Experimental Design and Hypothesis Testing</a><br />
|
||||
February 24-February 25, 2025 1:00-3:00pm PST</p>
|
||||
<p><a href="https://gladstone.org/events?series=data-science-training-program">Complete
|
||||
Schedule</a></p>
|
||||
</section></section>
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
---
|
||||
title: "Introduction to Unix Command-line - Part 1"
|
||||
title: "Introduction to Unix Command-line"
|
||||
subtitle: "Part 1"
|
||||
author: "Natalie Elphick"
|
||||
date: "March 11th 2024"
|
||||
date: "February 10th 2025"
|
||||
knit: (function(input, ...) {
|
||||
rmarkdown::render(
|
||||
input,
|
||||
|
|
@ -15,7 +16,7 @@ output:
|
|||
|
||||
|
||||
```{r, setup, include=FALSE}
|
||||
knitr::opts_knit$set(root.dir = './unix_workshop_2024')
|
||||
knitr::opts_knit$set(root.dir = './unix_workshop')
|
||||
```
|
||||
|
||||
|
||||
|
|
@ -32,8 +33,8 @@ Bioinformatician I
|
|||
<br>
|
||||
|
||||
|
||||
**Ayushi Agrawal (TA)**
|
||||
Bioinformatician III
|
||||
**Reuben Thomas**
|
||||
Associate Core Director
|
||||
|
||||
|
||||
# The Unix Command-line
|
||||
|
|
@ -87,17 +88,17 @@ cd unix_workshop
|
|||
|
||||
|
||||
```{r, engine='bash', eval=FALSE, results="hide", highlight=FALSE, comment=NA, echo = TRUE}
|
||||
curl -L -o unix_workshop_2024.tar.gz 'https://www.dropbox.com/scl/fi/o8msrl3a1k986jvjll4mv/unix_workshop_2024.tar.gz?rlkey=m7jfkvpz0iq12zdzphq7013l5&dl=0'
|
||||
curl -L -o unix_workshop.tar.gz 'https://www.dropbox.com/scl/fi/tdzpoivf7mienlenunqhf/unix_workshop.tar.gz?rlkey=6bfxnqgc5n4lgc9mc80ld75z4&dl=0'
|
||||
```
|
||||
|
||||
|
||||
```{r, engine='bash', eval=FALSE, results='markup', highlight=FALSE, comment=NA, echo = TRUE}
|
||||
tar -xzf unix_workshop_2024.tar.gz
|
||||
tar -xzf unix_workshop.tar.gz
|
||||
```
|
||||
|
||||
|
||||
```{r, engine='bash', eval=FALSE, results='markup', highlight=FALSE, comment=NA, echo = TRUE}
|
||||
cd unix_workshop_2024
|
||||
cd unix_workshop
|
||||
```
|
||||
|
||||
|
||||
|
|
@ -228,7 +229,7 @@ pwd
|
|||
```
|
||||
|
||||
```{r, engine='bash', eval=TRUE, results='markup', highlight=FALSE, comment=NA, echo=FALSE}
|
||||
echo "/Users/your_username/unix_workshop_2024"
|
||||
echo "/Users/your_username/unix_workshop"
|
||||
```
|
||||
|
||||
- The default working directory when you log in or open a terminal is your user home directory <span style="background-color:#242423; color:white"> ~ </span>
|
||||
|
|
@ -257,7 +258,7 @@ ls -lah part_1
|
|||
## cd: move to a directory
|
||||
|
||||
```{r, engine='bash', eval=FALSE, results='markup', highlight=FALSE, comment=NA}
|
||||
cd unix_workshop_2024/part_1
|
||||
cd unix_workshop/part_1
|
||||
ls -l
|
||||
```
|
||||
|
||||
|
|
@ -410,7 +411,7 @@ access additional software/commands.
|
|||
- curl supports multiple protocols but the most commonly used one is HTTPS
|
||||
|
||||
```{r, engine='bash', eval=TRUE, results="hide", highlight=FALSE, comment=NA, echo = TRUE}
|
||||
curl -o part_2/homo_sapiens.refseq.tsv.gz https://ftp.ensembl.org/pub/current_tsv/homo_sapiens/Homo_sapiens.GRCh38.109.refseq.tsv.gz
|
||||
curl -o part_2/homo_sapiens.refseq.tsv.gz https://ftp.ensembl.org/pub/current_tsv/homo_sapiens/Homo_sapiens.GRCh38.113.refseq.tsv.gz
|
||||
```
|
||||
|
||||
- `-o` gives the output file name and location
|
||||
|
|
@ -575,20 +576,14 @@ https://www.surveymonkey.com/r/F75J6VZ
|
|||
|
||||
|
||||
## Upcoming Data Science Training Program Workshops
|
||||
[Introduction to RNA-Seq Analysis](https://gladstone.org/events/introduction-rna-seq-analysis-8)
|
||||
February 13-February 14, 2025 1:00-4:00pm PST
|
||||
|
||||
[Introduction to Pathway Analysis](https://gladstone.org/events/introduction-pathway-analysis-1)
|
||||
April 2, 2024 1:00-4:00pm PDT
|
||||
|
||||
[Statistics of Enrichment Analysis Methods](https://gladstone.org/events/statistics-enrichment-analysis-methods-0)
|
||||
April 11-April 12, 2024 1:00-3:00pm PDT
|
||||
|
||||
[Working on Wynton](https://gladstone.org/events/working-wynton)
|
||||
April 15, 2024 1:00-4:00pm PDT
|
||||
|
||||
[Introduction to Linear Mixed Effects Models](https://gladstone.org/events/introduction-linear-mixed-effects-models)
|
||||
April 25-April 26, 2024 1:00-3:00pm PDT
|
||||
|
||||
[Intermediate RNA-Seq Analysis Using R](https://gladstone.org/events/intermediate-rna-seq-analysis-using-r-6)
|
||||
February 20, 2025 9:00am-12:00pm PST
|
||||
|
||||
[Introduction to Statistics, Experimental Design and Hypothesis Testing](https://gladstone.org/events/introduction-statistics-experimental-design-and-hypothesis-testing-2)
|
||||
February 24-February 25, 2025 1:00-3:00pm PST
|
||||
|
||||
[Complete Schedule](https://gladstone.org/events?series=data-science-training-program)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
---
|
||||
title: "Introduction to Unix Command-line - Part 2"
|
||||
title: "Introduction to Unix Command-line"
|
||||
subtitle: "Part 2"
|
||||
author: "Natalie Elphick"
|
||||
date: "March 12th 2024"
|
||||
date: "February 11th 2025"
|
||||
knit: (function(input, ...) {
|
||||
rmarkdown::render(
|
||||
input,
|
||||
|
|
@ -13,7 +14,7 @@ output:
|
|||
css: style.css
|
||||
---
|
||||
```{r, setup, include=FALSE}
|
||||
knitr::opts_knit$set(root.dir = './unix_workshop_2024')
|
||||
knitr::opts_knit$set(root.dir = './unix_workshop')
|
||||
```
|
||||
|
||||
##
|
||||
|
|
@ -24,7 +25,7 @@ knitr::opts_knit$set(root.dir = './unix_workshop_2024')
|
|||
## Introductions
|
||||
|
||||
**Natalie Elphick**
|
||||
Bioinformatician I
|
||||
Bioinformatician II
|
||||
|
||||
<br>
|
||||
|
||||
|
|
@ -50,22 +51,22 @@ cd unix_workshop
|
|||
|
||||
|
||||
```{r, engine='bash', eval=FALSE, results="hide", highlight=FALSE, comment=NA, echo = TRUE}
|
||||
curl -L -o unix_workshop_2024.tar.gz 'https://www.dropbox.com/scl/fi/o8msrl3a1k986jvjll4mv/unix_workshop_2024.tar.gz?rlkey=m7jfkvpz0iq12zdzphq7013l5&dl=0'
|
||||
curl -L -o unix_workshop.tar.gz 'https://www.dropbox.com/scl/fi/tdzpoivf7mienlenunqhf/unix_workshop.tar.gz?rlkey=6bfxnqgc5n4lgc9mc80ld75z4&dl=0'
|
||||
|
||||
```
|
||||
|
||||
|
||||
```{r, engine='bash', eval=FALSE, results='markup', highlight=FALSE, comment=NA, echo = TRUE}
|
||||
tar -xzf unix_workshop_2024.tar.gz
|
||||
tar -xzf unix_workshop.tar.gz
|
||||
```
|
||||
|
||||
|
||||
```{r, engine='bash', eval=FALSE, results='markup', highlight=FALSE, comment=NA, echo = TRUE}
|
||||
cd unix_workshop_2024
|
||||
cd unix_workshop
|
||||
```
|
||||
|
||||
```{r, engine='bash', eval=TRUE, results="hide", highlight=FALSE, comment=NA, echo = TRUE}
|
||||
curl -o part_2/homo_sapiens.refseq.tsv.gz https://ftp.ensembl.org/pub/current_tsv/homo_sapiens/Homo_sapiens.GRCh38.111.refseq.tsv.gz
|
||||
curl -o part_2/homo_sapiens.refseq.tsv.gz https://ftp.ensembl.org/pub/current_tsv/homo_sapiens/Homo_sapiens.GRCh38.113.refseq.tsv.gz
|
||||
```
|
||||
|
||||
# File Compression
|
||||
|
|
@ -416,26 +417,21 @@ https://www.surveymonkey.com/r/DY7K5ZY
|
|||
|
||||
|
||||
## Upcoming Data Science Training Program Workshops
|
||||
[Introduction to RNA-Seq Analysis](https://gladstone.org/events/introduction-rna-seq-analysis-8)
|
||||
February 13-February 14, 2025 1:00-4:00pm PST
|
||||
|
||||
[Introduction to Pathway Analysis](https://gladstone.org/events/introduction-pathway-analysis-1)
|
||||
April 2, 2024 1:00-4:00pm PDT
|
||||
|
||||
[Statistics of Enrichment Analysis Methods](https://gladstone.org/events/statistics-enrichment-analysis-methods-0)
|
||||
April 11-April 12, 2024 1:00-3:00pm PDT
|
||||
|
||||
[Working on Wynton](https://gladstone.org/events/working-wynton)
|
||||
April 15, 2024 1:00-4:00pm PDT
|
||||
|
||||
[Introduction to Linear Mixed Effects Models](https://gladstone.org/events/introduction-linear-mixed-effects-models)
|
||||
April 25-April 26, 2024 1:00-3:00pm PDT
|
||||
|
||||
[Intermediate RNA-Seq Analysis Using R](https://gladstone.org/events/intermediate-rna-seq-analysis-using-r-6)
|
||||
February 20, 2025 9:00am-12:00pm PST
|
||||
|
||||
[Introduction to Statistics, Experimental Design and Hypothesis Testing](https://gladstone.org/events/introduction-statistics-experimental-design-and-hypothesis-testing-2)
|
||||
February 24-February 25, 2025 1:00-3:00pm PST
|
||||
|
||||
[Complete Schedule](https://gladstone.org/events?series=data-science-training-program)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
```{r, engine='bash', eval=TRUE, results='markup', highlight=FALSE, comment=NA, echo=FALSE}
|
||||
rm part_2/example_script.sh
|
||||
rm part_2/homo_sapiens.refseq.tsv*
|
||||
|
|
|
|||
BIN
intro-unix-command-line/unix_workshop.tar.gz
Normal file
BIN
intro-unix-command-line/unix_workshop.tar.gz
Normal file
Binary file not shown.
Loading…
Add table
Add a link
Reference in a new issue