diff --git a/docs/Intro_to_Unix_Part_1.html b/docs/Intro_to_Unix_Part_1.html index 4af4529..d478ba7 100644 --- a/docs/Intro_to_Unix_Part_1.html +++ b/docs/Intro_to_Unix_Part_1.html @@ -639,13 +639,14 @@ workshop.
Shell commands are basic instructions used to perform specific tasks.
-Basic structure of commands:
-command_name -[option(s)] [argument(s)]
command_name -[option(s)] [argument(s)]
Example:
ls -lah part_1
Here we are providing multiple options to the ls command
and the directory part_1 as an argument
ls -lah part_1
total 8
-drwx---rw-@ 4 nelphick staff 128B Apr 14 11:26 .
-drwxr-xr-x@ 5 nelphick staff 160B Apr 14 11:26 ..
--rw-r--rw-@ 1 nelphick staff 0B Apr 11 16:29 .hidden_file.txt
--rw-r--rw-@ 1 nelphick staff 60B Apr 12 15:40 list_numbers.tsv
+drwx---rw-@ 4 nelphick staff 128B Apr 16 21:10 .
+drwxr-xr-x@ 5 nelphick staff 160B Apr 16 21:10 ..
+-rw-r--r--@ 1 nelphick staff 0B Apr 11 16:29 .hidden_file.txt
+-rw-r--r--@ 1 nelphick staff 60B Apr 12 15:40 list_numbers.tsv
cd unix_workshop_2023/part_1
ls -l
total 8
--rw-r--rw-@ 1 nelphick staff 60 Apr 12 15:40 list_numbers.tsv
+-rw-r--r--@ 1 nelphick staff 60 Apr 12 15:40 list_numbers.tsv
cd ..
ls -l
total 0
-drwx---rw-@ 4 nelphick staff 128 Apr 14 11:26 part_1
-drwxr-xr-x@ 2 nelphick staff 64 Apr 14 11:26 part_2
+drwx---rw-@ 4 nelphick staff 128 Apr 16 21:10 part_1
+drwxr-xr-x@ 2 nelphick staff 64 Apr 16 21:10 part_2
If you are following along with the commands we have run so far, this is the file structure you should have:
-ls ./*
-./new_directory:
+ls *
+new_directory:
new_file1.txt
-./part_1:
+part_1:
list_numbers.tsv
-./part_2:
+part_2:
* is a wildcard so ls will list and
-directories in the current oneIf you followed along with the commands we have run so far, you should have this directory structure:
-ls ./*
-./new_directory:
+ls *
+new_directory:
new_file1.txt
-./part_1:
+part_1:
list_numbers.csv
list_numbers.tsv
subset_list_numbers.tsv
-./part_2:
+part_2:
homo_sapiens.refseq.tsv.gz
+cat part_1/list_numbers.csv | cut -d "," -f 1 | sort -n
+1
+7
+13
+cat part_1/list_numbers.csv | cut -d "," -f 8 | sort -nu
+1
+3
+wc : count lines and wordschmod : Change the permissions of a file or
directorychown : Change the owner of a file or directoryNatalie Elphick
+Bioinformatician I
Yihang Xin (TA)
+Software Engineer II
Run the following commands if you did not attend part 1:
+mkdir unix_workshop
+cd unix_workshop
+curl -L -o unix_workshop_2023.tar.gz 'https://www.dropbox.com/s/smb12au2y82jmvq/unix_workshop_2023.tar.gz?dl=0'
+tar -xzf unix_workshop_2023.tar.gz
+cd unix_workshop_2023
+curl -o part_2/homo_sapiens.refseq.tsv.gz https://ftp.ensembl.org/pub/current_tsv/homo_sapiens/Homo_sapiens.GRCh38.109.refseq.tsv.gz
+gzip : compresses a file and replaces it with a
+compressed version (.gz)tar : create and manipulate archive filesArchive: a single file that contains one or more +files and/or folders that have been compressed
+gunzip part_2/homo_sapiens.refseq.tsv.gz
+du -h part_2/homo_sapiens.refseq.tsv
+ 26M part_2/homo_sapiens.refseq.tsv
+gzip part_2/homo_sapiens.refseq.tsv
+du -h part_2/homo_sapiens.refseq.tsv.gz
+2.7M part_2/homo_sapiens.refseq.tsv.gz
+tar -czf part_1.tar.gz part_1
+ls -l
+total 8
+drwx---rw-@ 4 nelphick staff 128 Apr 16 21:10 part_1
+-rw-r--r--@ 1 nelphick staff 814 Apr 16 21:11 part_1.tar.gz
+drwxr-xr-x@ 3 nelphick staff 96 Apr 16 21:11 part_2
+tar -xzf part_1.tar.gz
+gunzip -cgunzip -c part_2/homo_sapiens.refseq.tsv.gz | head
+gene_stable_id transcript_stable_id protein_stable_id xref db_name info_type source_identity xref_identity linkage_type
+ENSG00000160072 ENST00000673477 ENSP00000500094 NP_001304167 RefSeq_peptide INFERRED_PAIR - - -
+ENSG00000160072 ENST00000673477 ENSP00000500094 NP_114127 RefSeq_peptide DIRECT 100 100 -
+ENSG00000160072 ENST00000673477 ENSP00000500094 NM_001317238 RefSeq_mRNA DIRECT 90 82 -
+ENSG00000160072 ENST00000673477 ENSP00000500094 NM_031921 RefSeq_mRNA DIRECT 100 100 -
+ENSG00000160072 ENST00000673477 ENSP00000500094 XM_005244806 RefSeq_mRNA_predicted DIRECT 45 94 -
+ENSG00000160072 ENST00000673477 ENSP00000500094 XM_011542241 RefSeq_mRNA_predicted DIRECT 35 87 -
+ENSG00000160072 ENST00000673477 ENSP00000500094 XM_011542244 RefSeq_mRNA_predicted DIRECT 90 87 -
+ENSG00000160072 ENST00000673477 ENSP00000500094 XM_047431593 RefSeq_mRNA_predicted SEQUENCE_MATCH 90 96 -
+ENSG00000160072 ENST00000673477 ENSP00000500094 XR_001737468 RefSeq_ncRNA_predicted DIRECT - - -
+Example:
+echo $HOME
+/Users/nelphick
+$PATH to find its associated executable fileecho $PATH
+/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:/usr/local/go/bin:/usr/local/mysql/bin
+$PATH like this:export PATH="/path/to/new/software:$PATH"
+$PATH for the current terminal
+session~/.bashrc or
+~/.zshrc$PATH incorrectly can break system
+functionalitywhich ls
+/bin/ls
+.shnano part_2/example_script.sh
+#!/bin/bash
+#! tells the OS where the
+interpreter iswhich bash
+/bin/bash
+ls -l part_2/example_script.sh
+-rw-r--r--@ 1 nelphick staff 287 Apr 16 21:11 part_2/example_script.sh
+chmod u+x part_2/example_script.sh
+ls -l part_2/example_script.sh
+-rwxr--r--@ 1 nelphick staff 287 Apr 16 21:11 part_2/example_script.sh
+#!/bin/bash
+
+# This is a comment. Comments are ignored by the shell.
+
+# $1 is the first argument passed to the script
+echo "Counting the genes in $1"
+
+# count the unique genes in the file
+u_genes=$(gunzip -c $1 | cut -f 1 | sort -u | wc -l)
+
+echo "There are $u_genes unique genes in $1"
+./part_2/example_script.sh part_2/homo_sapiens.refseq.tsv.gz
+Counting the genes in part_2/homo_sapiens.refseq.tsv.gz
+There are 32538 unique genes in part_2/homo_sapiens.refseq.tsv.gz
+for i in {1..3}
+do
+
+echo $i
+
+done
+1
+2
+3
+count=0
+
+while [ $count -lt 5 ] # loop while count is less than 5
+do
+ echo $count
+ count=$((count+1))
+done
+0
+1
+2
+3
+4
+x=5
+
+if [ $x -gt 10 ] # check if x is greater than 10
+then
+ echo "x is greater than 10"
+else
+ echo "x is not greater than 10"
+fi # end if statement
+x is not greater than 10
+Example:
+sed 's/search_string/replace_string/g' input.txt > output.txt
+ssh username@remote
+username would be your user on the remote server
+and remote is the hostname or IP address of the remote
+server or computerscp [options] [source] [destination]
+scp /path/to/local/file.txt username@remote:/path/to/remote/directory/
+scp username@remote:/path/to/file.txt /path/to/local/directory/
+Basic command:
+awk options 'pattern {action}' input_file
+awk -F '\t' '{print $1+$2}' part_1/list_numbers.tsv
+4
+15
+17
+$1,$2 : the first and second fieldsgunzip -c part_2/homo_sapiens.refseq.tsv.gz | \
+awk -F '\t' '$5 == "RefSeq_mRNA" {sum += $7; count++} \
+END {print sum / count}'
+
+65.4642
+Linear
+Mixed Effects Modeling
+April 24-April 25, 2023 10:00am-12:00pm PDT
Machine
+Learning
+April 28, 2023 10:00am-12:00pm PDT
Advanced
+Cytoscape Automation
+May 2, 2023 1:00-4:00pm PDT
Introduction
+to RNA-Seq Analysis
+May 15-May 16, 2023 9:00am-12:00pm PDT
Natalie Elphick
-Bioinformatician I
Yihang Xin (TA)
-Software Engineer II
The software that provides access to the CLI
Open your terminal
-mkdir unix_workshop
-cd unix_workshop
-curl -L -o unix_workshop_2023.tar.gz 'https://www.dropbox.com/s/smb12au2y82jmvq/unix_workshop_2023.tar.gz?dl=0'
-tar -xzf unix_workshop_2023.tar.gz
-cd unix_workshop_2023
-echo $0
-bash
-Both bash and zsh should be able to run all of the commands in this -workshop.
-Shell commands are basic instructions used to perform specific -tasks.
-Basic structure of commands:
-command_name -[option(s)] [argument(s)]
Example:
-ls -lah part_1
-Here we are providing multiple options to the ls command
-and the directory part_1 as an argument
man echo
-ECHO(1) General Commands Manual ECHO(1)
-
-NAME
- echo – write arguments to the standard output
-
-SYNOPSIS
- echo [-n] [string ...]
-
-DESCRIPTION
- The echo utility writes any specified operands, separated by single blank
- (‘ ’) characters and followed by a newline (‘\n’) character, to the
- standard output.
-
- The following option is available:
-
- -n Do not print the trailing newline character. This may also be
- achieved by appending ‘\c’ to the end of the string, as is done by
- iBCS2 compatible systems. Note that this option as well as the
- effect of ‘\c’ are implementation-defined in IEEE Std 1003.1-2001
- (“POSIX.1”) as amended by Cor. 1-2002. Applications aiming for
- maximum portability are strongly encouraged to use printf(1) to
- suppress the newline character.
-
- Some shells may provide a builtin echo command which is similar or
- identical to this utility. Most notably, the builtin echo in sh(1) does
- not accept the -n option. Consult the builtin(1) manual page.
-
-EXIT STATUS
- The echo utility exits 0 on success, and >0 if an error occurs.
-
-SEE ALSO
- builtin(1), csh(1), printf(1), sh(1)
-
-STANDARDS
- The echo utility conforms to IEEE Std 1003.1-2001 (“POSIX.1”) as amended
- by Cor. 1-2002.
-
-macOS 13.2 April 12, 2003 macOS 13.2
-Use the arrow keys to navigate the manual and press q to
-close it
MAN(1) General Commands Manual MAN(1)
-
-NAME
- man, apropos, whatis – display online manual documentation pages
-
-SYNOPSIS
- man [-adho] [-t | -w] [-M manpath] [-P pager] [-S mansect]
- [-m arch[:machine]] [-p [eprtv]] [mansect] page ...
-
- man -f [-d] [-M manpath] [-P pager] [-S mansect] keyword ...
- whatis [-d] [-s mansect] keyword ...
-
- man -k [-d] [-M manpath] [-P pager] [-S mansect] keyword ...
- apropos [-d] [-s mansect] keyword ...
-
-DESCRIPTION
- The man utility finds and displays online manual documentation pages. If
- mansect is provided, man restricts the search to the specific section of
- the manual.
-
- The sections of the manual are:
- 1. General Commands Manual
- 2. System Calls Manual
- 3. Library Functions Manual
- 4. Kernel Interfaces Manual
- 5. File Formats Manual
- 6. Games Manual
- 7. Miscellaneous Information Manual
- 8. System Manager's Manual
- 9. Kernel Developer's Manual
-
- Options that man understands:
-
- -M manpath
- Forces a specific colon separated manual path instead of the
- default search path. See manpath(1). Overrides the MANPATH
- environment variable.
-
- -P pager
- Use specified pager. Defaults to “less -sR” if color support is
- enabled, or “less -s”. Overrides the MANPAGER environment
- variable, which in turn overrides the PAGER environment variable.
-
- -S mansect
- Restricts manual sections searched to the specified colon
- delimited list. Defaults to “1:8:2:3:3lua:n:4:5:6:7:9:l”.
- Overrides the MANSECT environment variable.
-
- -a Display all manual pages instead of just the first found for each
- page argument.
-
- -d Print extra debugging information. Repeat for increased
- verbosity. Does not display the manual page.
-
- -f Emulate whatis(1). Note that only a subset of options will have
- any effect when man is invoked in this mode. See the below
- description of whatis options for details.
-
- -h Display short help message and exit.
-
- -k Emulate apropos(1). Note that only a subset of options will have
- any effect when man is invoked in this mode. See the below
- description of apropos options for details.
-
- -m arch[:machine]
- Override the default architecture and machine settings allowing
- lookup of other platform specific manual pages. This option is
- accepted, but not implemented, on macOS.
-
- -o Force use of non-localized manual pages. See IMPLEMENTATION
- NOTES for how locale specific searches work. Overrides the
- LC_ALL, LC_CTYPE, and LANG environment variables.
-
- -p [eprtv]
- Use the list of given preprocessors before running nroff(1) or
- troff(1). Valid preprocessors arguments:
-
- e eqn(1)
- p pic(1)
- r refer(1)
- t tbl(1)
- v vgrind(1)
-
- Overrides the MANROFFSEQ environment variable.
-
- -t Send manual page source through troff(1) allowing transformation
- of the manual pages to other formats.
-
- -w Display the location of the manual page instead of the contents
- of the manual page.
-
- Options that apropos and whatis understand:
-
- -d Same as the -d option for man.
-
- -s Same as the -S option for man.
-
- When man is operated in apropos or whatis emulation mode, only a subset
- of its options will be honored. Specifically, -d, -M, -P, and -S have
- equivalent functionality in the apropos and whatis implementation
- provided. The MANPATH, MANSECT, and MANPAGER environment variables will
- similarly be honored.
-
-IMPLEMENTATION NOTES
- Locale Specific Searches
- The man utility supports manual pages in different locales. The search
- behavior is dictated by the first of three environment variables with a
- nonempty string: LC_ALL, LC_CTYPE, or LANG. If set, man will search for
- locale specific manual pages using the following logic:
-
- lang_country.charset
- lang.charset
- en.charset
-
- For example, if LC_ALL is set to “ja_JP.eucJP”, man will search the
- following paths when considering section 1 manual pages in
- /usr/share/man:
-
- /usr/share/man/ja_JP.eucJP/man1
- /usr/share/man/ja.eucJP/man1
- /usr/share/man/en.eucJP/man1
- /usr/share/man/man1
-
- Displaying Specific Manual Files
- The man utility also supports displaying a specific manual page if passed
- a path to the file as long as it contains a ‘/’ character.
-
-ENVIRONMENT
- The following environment variables affect the execution of man:
-
- LC_ALL, LC_CTYPE, LANG
- Used to find locale specific manual pages. Valid values
- can be found by running the locale(1) command. See
- IMPLEMENTATION NOTES for details. Influenced by the -o
- option.
-
- MACHINE_ARCH, MACHINE
- Used to find platform specific manual pages. If unset,
- the output of “sysctl hw.machine_arch” and “sysctl
- hw.machine” is used respectively. See IMPLEMENTATION
- NOTES for details. Corresponds to the -m option.
-
- MANPATH The standard search path used by man(1) may be changed by
- specifying a path in the MANPATH environment variable.
- Invalid paths, or paths without manual databases, are
- ignored. Overridden by -M. If MANPATH begins with a
- colon, it is appended to the default list; if it ends
- with a colon, it is prepended to the default list; or if
- it contains two adjacent colons, the standard search path
- is inserted between the colons. If none of these
- conditions are met, it overrides the standard search
- path.
-
- MANROFFSEQ Used to determine the preprocessors for the manual source
- before running nroff(1) or troff(1). If unset, defaults
- to tbl(1). Corresponds to the -p option.
-
- MANSECT Restricts manual sections searched to the specified colon
- delimited list. Corresponds to the -S option.
-
- MANWIDTH If set to a numeric value, used as the width manpages
- should be displayed. Otherwise, if set to a special
- value “tty”, and output is to a terminal, the pages may
- be displayed over the whole width of the screen.
-
- MANCOLOR If set, enables color support.
-
- MANPAGER Program used to display files.
-
- If unset, and color support is enabled, “less -sR” is
- used.
-
- If unset, and color support is disabled, then PAGER is
- used. If that has no value either, “less -s” is used.
-
-FILES
- /etc/man.conf
- System configuration file.
- /usr/local/etc/man.d/*.conf
- Local configuration files.
-
-EXIT STATUS
- The man utility exits 0 on success, and >0 if an error occurs.
-
-EXAMPLES
- Show the manual page for stat(2):
-
- $ man 2 stat
-
- Show all manual pages for ‘stat’.
-
- $ man -a stat
-
- List manual pages which match the regular expression either in the title
- or in the body:
-
- $ man -k '\<copy\>.*archive'
-
- Show the manual page for ls(1) and use cat(1) as pager:
-
- $ man -P cat ls
-
- Show the location of the ls(1) manual page:
-
- $ man -w ls
-
-SEE ALSO
- apropos(1), intro(1), mandoc(1), manpath(1), whatis(1), intro(2),
- intro(3), intro(3lua), intro(4), intro(5), man.conf(5), intro(6),
- intro(7), mdoc(7), intro(8), intro(9)
-
-macOS 13.2 January 9, 2021 macOS 13.2
-message="Hello, World!"
-echo $message
-Hello, World!
-Here, we assign the string “Hello, World!” to the variable
-message and use echo to print its value.
history
-clear to clear the output from the
-terminal/data/file1.txt
-file1.txt
-./file1.txt
-pwd
-/Users/your_username/unix_workshop_2023
-ls .
-part_1
-part_2
--l show more information (file permissions and
-size)-a show all (hidden files)-h file sizes in human readable format (e.g., 1K,
-2G)ls -lah part_1
-total 8
-drwx---rw-@ 4 nelphick staff 128B Apr 14 11:26 .
-drwxr-xr-x@ 5 nelphick staff 160B Apr 14 11:26 ..
--rw-r--rw-@ 1 nelphick staff 0B Apr 11 16:29 .hidden_file.txt
--rw-r--rw-@ 1 nelphick staff 60B Apr 12 15:40 list_numbers.tsv
-cd unix_workshop_2023/part_1
-ls -l
-total 8
--rw-r--rw-@ 1 nelphick staff 60 Apr 12 15:40 list_numbers.tsv
-cd ..
-ls -l
-total 0
-drwx---rw-@ 4 nelphick staff 128 Apr 14 11:26 part_1
-drwxr-xr-x@ 2 nelphick staff 64 Apr 14 11:26 part_2
-touch new_file.txt
-mkdir new_directory
--p make parent directories if they don’t existmv new_file.txt new_directory
-mv new_directory/new_file.txt new_directory/new_file1.txt
-cp new_directory/new_file1.txt new_directory/new_file2.txt
--r to copy a folder (recursive)rm new_directory/new_file2.txt
-du -h */*
- 0B new_directory/new_file1.txt
-4.0K part_1/list_numbers.tsv
--h - Displays the output in human readable format
-If you are following along with the commands we have run so far, this -is the file structure you should have:
-ls ./*
-./new_directory:
-new_file1.txt
-
-./part_1:
-list_numbers.tsv
-
-./part_2:
-* is a wildcard so ls will list and
-directories in the current onenano new_directory/new_file1.txt
-Used to install and manage software
macOS
-WSL/Linux
-We will not install any software in this workshop but these are how -you would access additional software/commands.
-curl -o part_2/homo_sapiens.refseq.tsv.gz https://ftp.ensembl.org/pub/current_tsv/homo_sapiens/Homo_sapiens.GRCh38.109.refseq.tsv.gz
--o gives the output file name and locationscp /path/to/local/file user@remote.host:/path/to/remote/directory
-Search the contents of the input file and returns the lines that -have a match
Regular Expressions : sequence of characters -that forms a search pattern
grep "7" part_1/list_numbers.tsv
-7 8 52 13 6 42 79 1
-13 4 9 82 67 71 93 3
-grep "3$" part_1/list_numbers.tsv
-1 3 6 10 11 22 0 3
-13 4 9 82 67 71 93 3
-head -n 1 part_1/list_numbers.tsv
-1 3 6 10 11 22 0 3
-tail -n 1 part_1/list_numbers.tsv
-13 4 9 82 67 71 93 3
-cat part_1/list_numbers.tsv
-1 3 6 10 11 22 0 3
-7 8 52 13 6 42 79 1
-13 4 9 82 67 71 93 3
-cut -f 1-3,6 part_1/list_numbers.tsv
-1 3 6 22
-7 8 52 42
-13 4 9 71
-By default cut expects columns to be separated by tab
-characters.
grep "3$" part_1/list_numbers.tsv | cut -f 1-3
-1 3 6
-13 4 9
-grep "3$" part_1/list_numbers.tsv | cut -f 1-3 > part_1/subset_list_numbers.tsv
-cat part_1/list_numbers.tsv | tr "\t" "," > part_1/list_numbers.csv
-cat part_1/list_numbers.csv
-1,3,6,10,11,22,0,3
-7,8,52,13,6,42,79,1
-13,4,9,82,67,71,93,3
-If you followed along with the commands we have run so far, you -should have this directory structure:
-ls ./*
-./new_directory:
-new_file1.txt
-
-./part_1:
-list_numbers.csv
-list_numbers.tsv
-subset_list_numbers.tsv
-
-./part_2:
-homo_sapiens.refseq.tsv.gz
-chmod : Change the permissions of a file or
-directorychown : Change the owner of a file or directorydf : Display information about disk usage and available
-spaceps : Display information about running processeskill : Stop a running processless : View the contents of a file one page at a
-timedate : prints the date and timecurl wttr.in : check the weatherLinear
-Mixed Effects Modeling
-April 24-April 25, 2023 10:00am-12:00pm PDT
Machine
-Learning
-April 28, 2023 10:00am-12:00pm PDT
Advanced
-Cytoscape Automation
-May 2, 2023 1:00-4:00pm PDT
Introduction
-to RNA-Seq Analysis
-May 15-May 16, 2023 9:00am-12:00pm PDT