diff --git a/docs/Working_on_Wynton_Part_1.html b/docs/Working_on_Wynton_Part_1.html index b894631..b9cdf5a 100644 --- a/docs/Working_on_Wynton_Part_1.html +++ b/docs/Working_on_Wynton_Part_1.html @@ -417,10 +417,16 @@ right: 0; } .reveal code { background-color: #1e1e1eef; -color: white !important; +color: white; font-size: 1.2em; line-height: 1.2; } +.code-small code { +background-color: #1e1e1eef; +color: white; +font-size: 1em; +line-height: 1; +} .reveal code::selection { background-color: #d97306 !important; } @@ -429,8 +435,12 @@ background-color: #d97306 !important; background-color: black; } -.reveal pre code.output::selection { -background-color: #9c0366 !important; +.code-alt code { +background-color: #ffecd0ac; +max-height: 400px !important; +font-family: 'Menlo', sans-serif; +font-size: 0.8em; +color: rgb(76, 76, 76) } .reveal pre code.output { @@ -455,26 +465,27 @@ white-space: pre !important; overflow-x: auto !important; } +.reveal p, .reveal li, .reveal h1, .reveal h2, .reveal h3, .reveal h4, .reveal h5, .reveal h6 { +font-family: "Helvetica", sans-serif; +} +.reveal h3 { +color: black; +font-size: 0.7em; +} + .reveal h2 { font-weight: bold !important; color: #9c0366; +font-size: 1.3em; } .reveal h1 { font-weight: bold !important; color: #9c0366; +font-size: 2.0em; } .reveal .slides>section:first-child h2 { -color: #333; -font-weight: normal !important; -} - -.my-title-slide h1 { -font-weight: bold; -color: #9c0366; -} -.my-title-slide h2 { -color: #333; +color: black; font-weight: normal !important; } .reveal .slides>section:first-child h1 { @@ -492,12 +503,13 @@ margin-right: 50px !important; } .reveal ul ul { font-size: 0.75em; -margin-top: 5px !important; margin-bottom: 5px !important; } .reveal ol { display: block; -margin-bottom: 20px !important; +margin-bottom: 20px; +margin-left: 75px; +margin-right: 50px } .reveal img { max-width: 60%; @@ -523,12 +535,11 @@ margin-bottom: 10px !important; font-size: 80%; } .big-picture img{ -max-width: 70%; -border: 1px solid black !important; +max-width: 95%; } .reveal a { -color: #0c74dc !important; +color: #0c74dc; } .reveal a:hover { @@ -569,7 +580,7 @@ document.addEventListener('DOMContentLoaded', function(e) {

Introductions

Natalie Elphick
Bioinformatician I

-

Alex Pico (TA)
+

Alex Pico
Bioinformatics Core Director

@@ -608,6 +619,7 @@ a fast local network
  • A HPC Linux environment available to all UCSF researchers for free
  • +
  • Uses the Rocky 8 linux OS
  • Includes several hundred compute nodes and a large shared storage system (Cluster specifications)
    @@ -697,16 +709,38 @@ groups
    {local}$ scp local_file.tsv alice@dt1.wynton.ucsf.edu:~/

    Names:

    dt1 and dt2

    +
  • +
    +

    Compute Nodes

    + +

    Compute Jobs

    Storage

    +
    +
    +

    The File System

    +

    BeeGFS

    BeeGFS - Tips

    +
    +
    +

    Storage Advice

    +
    @@ -867,8 +917,7 @@ connections’ is set to 1

    Globus

    @@ -886,6 +935,26 @@ storage
  • Do not use rclone for transfers to Box, follow the Wynton to UCSF Box instructions
  • +
    +
    +

    Poll 1

    +

    Which of these can you not log in to from your +computer?

    +
      +
    1. Login Nodes
    2. +
    3. Development Nodes
    4. +
    5. Data transfer Nodes
    6. +
    7. Compute Nodes
    8. +
    +
    +
    +

    Poll 2

    +

    The /wynton directory is backed up on a nightly +basis so do not need to back up the data you store here.

    +
      +
    1. True
    2. +
    3. False
    4. +
    @@ -895,9 +964,9 @@ UCSF Box instructions

    Basics

      +
    • Check if the tool is already available in a module
    • Ensure the software you are trying to install is compatible with -Rocky linux (use a container if not)
    • -
    • Check if the tool is already available in a module
    • +Rocky 8 linux (use a container if not)
    • Always install software in a development node
    • Download a precompiled binary or install from source
    • @@ -923,11 +992,16 @@ from source
      [alice@dev1 ~]$ make
       [alice@dev1 ~]$ make install
    -
    -

    Install Nextflow for Part 2

    +
    +

    Install Nextflow

      -
    • In part 2, we will run the nextflow rna-seq pipeline
    • -
    • Run the following to install nextflow:
    • +
    • Scientific workflow system with a community maintained set of core bioinformatics analysis pipelines +
        +
      • We will cover an example RNA-seq pipeline in part 2
        +
      • +
    • +
    • These can be configured to use the Wynton compute job submission +system
    [alice@dev1 ~]$ cd ~/software
     [alice@dev1 ~]$ curl -s "https://get.sdkman.io" | bash
    @@ -935,25 +1009,32 @@ from source
     [alice@log1 ~]$ ssh dev1
     [alice@dev1 ~]$ sdk install java 17.0.6-tem
     [alice@dev1 ~]$ wget -qO- https://get.nextflow.io | bash
    -
      -
    • Let us know if you run into any errors
    • -

    Containers

    +
    +
    +

    Motivation

    +
      +
    • Compute heavy jobs (high RAM, multiple cores) should be run on +compute nodes
    • +
    • Containers allow us to make additional software available to the +compute nodes +
        +
      • Also allows the use of software that might be hard to install on +Rocky 8 Linux
      • +
      • Improves reproducibility
      • +
    • +
    +

    Compute Jobs

    Definitions

    • Virtualization: When software mimics the functions -of physical hardware to run virtual machines -
        -
      • Work around to use OS specific or legacy software that might be hard -to install
      • -
      • Improves reproducibility
      • -
    • +of physical hardware to run virtual machines
    • Containers: Implements virtualization using an image as its base
    • Images: An ordered collection of root filesystem @@ -967,8 +1048,8 @@ container runtime
    • Wynton supports Apptainer (formerly singularity) containers

    • Docker is a commonly used -container creation software, these can be turned into apptainer -containers easily

    • +image creation software, these can be turned into apptainer image files +(.sif) easily

    • apptainer run

      • Run predefined script within container
      • @@ -1000,8 +1081,8 @@ image file / __ / __/ / / /_/ / | |/ |/ / /_/ / / / / /_/ / /_/ /_/ /_/\___/_/_/\____/ |__/|__/\____/_/ /_/\__,_/ (_)
    -
    -

    Example Container - Hello World

    +
    +

    Example Container

    diff --git a/docs/Working_on_Wynton_Part_2.html b/docs/Working_on_Wynton_Part_2.html new file mode 100644 index 0000000..cdf57b3 --- /dev/null +++ b/docs/Working_on_Wynton_Part_2.html @@ -0,0 +1,1316 @@ + + + + + + + Working on Wynton - Part 2 + + + + + + + + + + + + + + + + + +
    +
    + +
    +

    Working on Wynton - Part 2

    +

    Natalie Elphick

    +

    April 16th, 2024

    +
    + +
    +

    +
    +Press the ? key for tips on navigating these slides +
    +
    +
    +

    Introductions

    +

    Natalie Elphick
    +Bioinformatician I

    +

    Alex Pico
    +Bioinformatics Core Director

    +
    +
    +

    Target Audience

    +
      +
    • Prior experience with UNIX command-line
    • +
    +
    +
    +

    Part 2:

    +
      +
    1. Custom Containers
    2. +
    3. Submitting Compute Jobs
    4. +
    5. Array Jobs
    6. +
    7. GPU Jobs
    8. +
    9. Running Pipelines
    10. +
    11. Jupyter Notebooks
    12. +
    13. RStudio Server
    14. +
    15. How to get help
    16. +
    +
    +
    +
    +

    Custom Containers

    + +
    +
    +

    Motivation

    +
      +
    • Compute heavy jobs (high RAM, multiple cores) should be run on +compute nodes
    • +
    • Containers allow us to make additional software available to the +compute nodes +
        +
      • Also allows the use of software that might be hard to install on +Rocky 8 Linux
      • +
      • Improves reproducibility
      • +
    • +
    +

    Compute Jobs

    +
    +
    +

    Dockerfile Basics

    +
      +
    • Dockerfiles contain instructions to build an image in +layers
    • +
    • Layers are added using Dockerfile instruction syntax
    • +
    • Images are built by navigating to the directory that contains the +Dockerfile and running:
    • +
    +
    docker build .
    +
    +
    +

    Dockerfile Instructions

    +
      +
    • First instruction is always FROM which specifies +the base image +
        +
      • Base images are a starting point with some basics already installed +like the OS and build tools, find them on DockerHub
      • +
    • +
    • RUN : Use before running any shell commands
    • +
    • SHELL : Set the shell
    • +
    • USER : Set the user (within the image)
    • +
    • CMD : Set the default instruction to be run by the +image
    • +
    • COPY : COPY files into the image
    • +
    +

    See the Dockerfile +documentation for a full list of instructions

    +
    +
    +

    Example Dockerfile

    +
      +
    • Click here +to download the example Dockerfile
    • +
    • Open in your preffered text editor
    • +
    +
    # Bioconductor base image gives us access to a lot of bioinformatics tools and R packages.
    +FROM bioconductor/bioconductor_docker:RELEASE_3_17
    +
    +# Shell options, we want to exit if any command fails
    +SHELL ["/bin/bash", "-o", "pipefail", "-c"]
    +
    +# Root permissions are required to install packages
    +USER root
    +
    +
    +# Install any UNIX packages you need
    +# First we update the package list and then install GNU make
    +# We clean up after ourselves to reduce the image size
    +RUN apt-get update && apt-get upgrade -y \
    +    && apt-get install -y --no-install-recommends make \
    +    && apt-get clean \
    +    && rm -rf /var/lib/apt/lists/*
    +
    +# Install Seurat and harmony
    +RUN Rscript -e 'install.packages(c("Seurat","harmony"))'
    +# Check if installs worked
    +RUN Rscript -e 'lapply(c("Seurat","harmony"), library, character.only = TRUE)'
    +
    +
    +# Run container as non-root to avoid permission issues
    +RUN groupadd -g 10001 notroot && \
    +   useradd -u 10000 -g notroot notroot 
    +
    +# Switch to the non-root user
    +USER notroot:notroot
    +
    +# Default command to run when the container starts
    +CMD ["/bin/bash"]
    +
    +# Copy dockerfile into the image (optional, but can be useful for reproducibility)
    +COPY Dockerfile /Dockerfile
    +
    +
    +

    Building Example Image

    +
      +
    • Do not run this during the workshop +
        +
      • It requires a lot of RAM
      • +
    • +
    • On macOS, make sure you have the Docker Desktop App running
    • +
    • We can provide an additional argument to the build +command, -t, to set the name of the docker image +
        +
      • We can add version tags after the name using “:”
      • +
    • +
    +
    docker build -t docker_hub_user/seurat-harmony:1.0 .
    +
    +
    +

    Pushing Images to DockerHub

    +
      +
    • Make sure you are signed in to your DockerHub account locally +(Docker Desktop for macOS)
    • +
    • The image name must start with your user name
    • +
    +
    docker push docker_hub_user/seurat-harmony:1.0
    +
      +
    • These can then be “pulled” on to Wynton as apptainer image files +(image must be public)
    • +
    +
    [alice@dev1 ~]$ apptainer pull docker://docker_hub_user/seurat-harmony:1.0
    +
    +
    +

    Notes on Building Custom Images

    +
      +
    • Time consuming process and uses a lot of RAM on your local +machine
    • +
    • A good base image can save you a lot of time
    • +
    • You must run apt-get update and +apt-get install in the same command +
        +
      • Otherwise you will encounter caching issues
      • +
    • +
    • Remember to use apt-get install -y +
        +
      • You will have no control over the process while it’s building
      • +
    • +
    +
    +
    +
    +

    Compute Jobs

    + +
    +
    +

    Submission Script - Basics

    + +
    #!/bin/bash           # the shell language when run outside of the job scheduler
    +#                     # lines starting with #$ is an instruction to the job scheduler
    +#$ -S /bin/bash       # the shell language when run via the job scheduler [IMPORTANT]
    +#$ -cwd               # job should run in the current working directory
    +#$ -j y               # STDERR and STDOUT should be joined
    +#$ -l mem_free=1G     # job requires up to 1 GiB of RAM per slot (core)
    +#$ -l scratch=2G      # job requires up to 2 GiB of local /scratch space
    +#$ -l h_rt=1:00:00   # job requires up to 1 hour of runtime 
    +#$ -r y               # if job crashes, it should be restarted
    +
    +date
    +hostname
    +
    +## End-of-job summary, if running as a job
    +[[ -n "$JOB_ID" ]] && qstat -j "$JOB_ID"  # This is useful for debugging and usage purposes,
    +                                          # e.g. "did my job exceed its memory request?"
    +
    +
    +

    Submission Script - Apptainer

    +
      +
    • Download +this example job submission script that uses a container
    • +
    • Paths that the container needs read/write access to need to be +mounted with APPTAINER_BINDPATH
    • +
    +
    #!/bin/bash
    +#$ -S /bin/bash      # the shell language when run via the job scheduler
    +#$ -cwd               # job should run in the current working directory
    +#$ -j y               # STDERR and STDOUT should be joined
    +#$ -l mem_free=1G     # job requires up to 1 GiB of RAM per slot
    +#$ -l scratch=2G      # job requires up to 2 GiB of local /scratch space
    +#$ -l h_rt=1:00:00    # job requires up to 1 hour of runtime
    +
    +
    +# Mount the current directory to the container
    +# Any directroy that needs to be accessed by the container should be mounted
    +directory=$(pwd)
    +export APPTAINER_BINDPATH="$directory"
    +
    +h=$(hostname)
    +
    +singularity run hello-world_1.0.sif figlet $h > $directory/hello.txt
    +
    +[[ -n "$JOB_ID" ]] && qstat -j "$JOB_ID"
    +
    +
    +

    Parallel Processing Jobs

    +
      +
    • By default jobs run on a single core

    • +
    • Multicore jobs must run in a SGE parallel environment (PE) and +tell SGE how many cores the job will use

    • +
    • Do not use more cores than requested

    • +
    • There are four parallel environments on Wynton:

      +
        +
      • smp: for single-host parallel jobs using Symmetric +multiprocessing (SMP)
      • +
      • mpi: for multiple-host parallel jobs based on MPI +parallelization
      • +
      • mpi_onehost: for single-host parallel jobs based on +MPI parallelization
      • +
      • mpi-8: for multi-threaded multi-host jobs based on +MPI parallelization
      • +
    • +
    +
    +
    +

    Example Parallel Job

    +
      +
    • The simplest parallel environment on Wynton is smp, +a single node with n cores
    • +
    • Download +this example smp job submission script
    • +
    +
    #!/bin/bash
    +#$ -S /bin/bash 
    +#$ -cwd
    +#$ -j y
    +#$ -pe smp 4                    # 4 cores on a single node
    +#$ -l mem_free=2G               # 2 GiB of RAM per slot (core), so 8 GiB total
    +#$ -l scratch=5G                # 5 GiB of local /scratch space
    +#S -l h_rt=08:00:00
    +
    +
    +# Code that requires 4 cores
    +# **Specify the number of cores as ${NSLOTS}**
    +
    +
    +
    +[[ -n "$JOB_ID" ]] && qstat -j "$JOB_ID"
    +
    +
    +

    Array Jobs

    +
      +
    • This is a good option if the script you want to run operates on +discrete sets of data +
        +
      • e.g. sample or chromosome
      • +
    • +
    • Download +this example array job submission script
    • +
    +
    #!/bin/bash           
    +#$ -S /bin/bash       
    +#$ -cwd               
    +#$ -j y               
    +#$ -l mem_free=1G     
    +#$ -l scratch=2G     
    +#$ -l h_rt=1:00:00   
    +#$ -t 1-5          # Number of tasks to run in the array (each is a job with the same resource requirements above)
    +
    +params=(sample1 sample2 sample3 sample4 sample5)
    +
    +# The task ID is stored in the variable SGE_TASK_ID
    +# This variable is used to index the array of parameters
    +# The task ID is 1-indexed
    +param=${params[$SGE_TASK_ID - 1]}
    +
    +echo "Running task $SGE_TASK_ID with parameter $param"
    +
    +# Code for each task
    +
    +[[ -n "$JOB_ID" ]] && qstat -j "$JOB_ID"
    +
    +
    +

    GPU Jobs

    +
      +
    • To run a GPU job, +specify -q gpu.q (queue) as a GPU queue +
        +
      • Other GPU queues may be available to you depending on your lab
      • +
    • +
    • It is important to specify the GPU using the +SGE_GPU variable so that your job uses its assigned GPU +
        +
      • For CUDA based tools, add export +CUDA_VISIBLE_DEVICES=$SGE_GPU to your submission script
      • +
    • +
    • GPU jobs must include a runtime request or they will be removed from +the queue
    • +
    +
    +
    +

    Submitting and Querying jobs

    +
      +
    • Use qsub to submit jobs
    • +
    +
    [alice@dev1 ~]$ qsub job1.sh
    +Your job 714888 ("job1.sh") has been submitted
    +
      +
    • Use qstat to check the status of your jobs
    • +
    +
    [alice@dev1 ~]$ qstat
    +job-ID  prior   name       user         state submit/start at     queue                          slots ja-task-ID 
    +-----------------------------------------------------------------------------------------------------------------
    + 714888 0.06532 job1 alice     r     03/25/2024 19:54:18 member.q@msg-hmio1                 1        
    + 714889 0.06532 job2 alice     r     03/25/2024 19:54:19 member.q@msg-hmio1                 1        
    +

    Read the querying +jobs Wynton documentation for more information.

    +
    +
    +

    Estimating Job Resources

    +
      +
    • Try to estimate the amount of RAM needed using a small test +dataset
    • +
    • Request a little more RAM than you need to avoid having your job +cancelled
    • +
    • Check on jobs you are running for the first time with qstat +-j to make sure they are not going over
    • +
    +
    +
    +

    Poll 3

    +

    Anything that you can run on a compute node can be run on a +development node.

    +
      +
    1. True
    2. +
    3. False
    4. +
    +
    +
    +
    +

    Running Pipelines

    + +
    +
    +

    Nextflow RNA-seq

    + +

    RNA-seq

    +
    +
    +

    Example - RNA-seq Pipeline

    +

    Do not run this during the workshop as it will fill up the +Wynton SGE queue

    +
      +
    • Download the testing +script +
        +
      • Runs a minimal test on the RNA-seq pipeline
      • +
    • +
    • Download the config +file +
        +
      • Configures nextflow to use the SGE job scheduler and sets limits on +compute job resources for each process
      • +
    • +
    • Put these in the same directory (do not use your user home directory +for this) and run the script in a screen/tmux session
    • +
    • When not running the test, the -profile should be +apptainer
    • +
    +
    +
    +
    +

    Jupyter Notebooks

    + +
    +
    +

    Installing Jupyter Notebooks

    +
      +
    • The preferred way to install and use Jupyter +notebooks on Wynton is though pip, not conda
    • +
    +
    python3 -m pip install --user notebook
    +
      +
    • Jupyter notebooks can only be run on development nodes
    • +
    • See the Wynton python +documentation for more info on managing python environments on +Wynton
    • +
    +
    +
    +

    Running Jupyter Notebooks - Step 1

    +
      +
    • You cannot connect from outside Wynton HPC directly to a development +node +
        +
      • Instead we need to use SSH port forwarding to establish the +connection with a local web browser
      • +
    • +
    • Find an available TCP port:
    • +
    +
    [alice@dev1 ~]$ module load CBI port4me
    +[alice@dev1 ~]$ port4me --tool=jupyter
    +47467
    +

    Note the port number returned by port4me, you will need this +later.

    +
    +
    +

    Running Jupyter Notebooks - Step 2

    +
      +
    • Launch Jupyter notebook using the port numer from step 1
    • +
    +
    [alice@dev1]$ jupyter notebook --no-browser --port 47467
    +[I 2024-03-20 14:48:45.693 ServerApp] jupyter_lsp | extension was successfully linked.
    +[I 2024-03-20 14:48:45.698 ServerApp] jupyter_server_terminals | extension was successfully linked.
    +[I 2024-03-20 14:48:45.703 ServerApp] jupyterlab | extension was successfully linked.
    +[I 2024-03-20 14:48:45.708 ServerApp] notebook | extension was successfully linked.
    +[I 2024-03-20 14:48:46.577 ServerApp] notebook_shim | extension was successfully linked.
    +[I 2024-03-20 14:48:46.666 ServerApp] notebook_shim | extension was successfully loaded.
    +[I 2024-03-20 14:48:46.668 ServerApp] jupyter_lsp | extension was successfully loaded.
    +[I 2024-03-20 14:48:46.669 ServerApp] jupyter_server_terminals | extension was successfully loaded.
    +[I 2024-03-20 14:48:46.675 LabApp] JupyterLab extension loaded from /wynton/home/boblab/alice/.local/lib/python3.11/site-packages/jupyterlab
    +[I 2024-03-20 14:48:46.675 LabApp] JupyterLab application directory is /wynton/home/boblab/alice/.local/share/jupyter/lab
    +[I 2024-03-20 14:48:46.677 LabApp] Extension Manager is pypi.
    +[I 2024-03-20 14:48:46.707 ServerApp] jupyterlab | extension was successfully loaded.
    +[I 2024-03-20 14:48:46.711 ServerApp] notebook | extension was successfully loaded.
    +[I 2024-03-20 14:48:46.712 ServerApp] Serving notebooks from local directory: /wynton/home/boblab/alice
    +[I 2024-03-20 14:48:46.712 ServerApp] Jupyter Server 2.13.0 is running at:
    +[I 2024-03-20 14:48:46.712 ServerApp] http://localhost:44214/tree?token=8e37f8d62fca6a1c9b2da429f27df5ebcec706a808c3a8f2
    +[I 2024-03-20 14:48:46.712 ServerApp]     http://127.0.0.1:44214/tree?token=8e37f8d62fca6a1c9b2da429f27df5ebcec706a808c3a8f2
    +[I 2024-03-20 14:48:46.712 ServerApp] Use Control-C to stop this server and shut down all kernels (twice to skip confirmation).
    +[C 2024-03-20 14:48:46.725 ServerApp]
    +
    +    To access the server, open this file in a browser:
    +        file:///wynton/home/boblab/alice/.local/share/jupyter/runtime/jpserver-2853162-open.html
    +    Or copy and paste one of these URLs:
    +        http://localhost:44214/tree?token=8e37f8d62fca6a1c9b2da429f27df5ebcec706a808c3a8f2
    +        http://127.0.0.1:44214/tree?token=8e37f8d62fca6a1c9b2da429f27df5ebcec706a808c3a8f2
    +
    +
    +

    Running Jupyter Notebooks - Step 3

    +
      +
    • Set up SSH port forwarding on your local machine in a separate +terminal, leave both terminals open
    • +
    +
    {local}$ ssh -J alice@log1.wynton.ucsf.edu -L 47467:localhost:47467 alice@dev1
    +...
    +[alice@dev1 ~]$ 
    +

    The notebook should now be available at the URL from step 2

    +
    +
    +
    +

    RStudio Server

    + +
    +
    +

    RStudio Server

    +
      +
    • RStudio +server is already available in the CBI module
    • +
    • This allows you to set up a personal RStudio instance that only you +can access
    • +
    • Requires two separate SSH connections to the cluster:
      + +
        +
      • One to launch RStudio Server
      • +
      • One to connect to it
      • +
    • +
    +
    +
    +

    RStudio Server - Step 1

    +
      +
    • Launch your own RStudio Server instance
    • +
    +
    [alice@dev1 ~]$ module load CBI rstudio-server-controller
    +[alice@dev1 ~]$ rsc start
    +alice, your personal RStudio Server 2023.09.1-494 running R 4.3.2 is available on:
    +
    +  <http://127.0.0.1:20612>
    +
    +Importantly, if you are running from a remote machine without direct access
    +to dev1, you need to set up SSH port forwarding first, which you can do by
    +running:
    +
    +  ssh -L 20612:dev1:20612 alice@log1.wynton.ucsf.edu
    +
    +in a second terminal from your local computer.
    +
    +Any R session started times out after being idle for 120 minutes.
    +WARNING: You now have 10 minutes, until 2023-11-15 17:06:50-08:00, to
    +connect and log in to the RStudio Server before everything times out.
    +Your one-time random password for RStudio Server is: y+IWo7rfl7Z7MRCPI3Z4
    +

    Note the password and URL, they will be needed to log in to the +server instance.

    +
    +
    +

    RStudio Server - Step 2

    +
      +
    • Connect to your personal RStudio Server instance from your local +machine in a separate terminal
    • +
    +
    {local}$ ssh -L 20612:dev1:20612 alice@log1.wynton.ucsf.edu
    +alice1@log1.wynton.ucsf.edu:s password: XXXXXXXXXXXXXXXXXXX
    +[alice@log1 ~]$ 
    +
    +
    +

    RStudio Server - Step 3

    +
      +
    • Open RStudio Server in your local web browser
    • +
    • Open the link from step 1
    • +
    • Enter your Wynton user name
    • +
    • Enter the password from step 1
    • +
    +
    +
    +
    +

    How to Get Help

    + +
    +
    +

    Wynton Questions

    +
      +
    • Follow the Wynton question +checklist
    • +
    • Email +
    • +
    • Slack +
        +
      • ucsf-wynton
      • +
      • Sign-up using a UCSF email address
      • +
      • Email support if that does not work
      • +
    • +
    • Zoom office hours every Tuesday at 11-12pm +
        +
      • Zoom URL in the message-of-the-day (MOTD) that you get when you log +into Wynton
      • +
    • +
    +
    +
    +

    Bioinformatics Questions

    + +
    +
    +
    +

    End of Part 2

    + +
    +
    +

    Thank You!

    + +
    +
    +

    Upcoming Data Science Training Program Workshops

    +

    Introduction +to Linear Mixed Effects Models
    +April 25-April 26, 2024 1-3pm PDT

    +

    Single +Cell RNA-Seq Data Analysis
    +April 29-April 30, 2024 9am-4pm PDT

    +

    Single +Cell ATAC-Seq Data Analysis Part 1
    +May 6-May 7, 2024 1-4pm PDT

    +

    Complete +Schedule

    +
    +
    +
    + + + + + + + + + + + + + + diff --git a/working-on-wynton-hpc/Working_on_Wynton_Part_1.Rmd b/working-on-wynton-hpc/Working_on_Wynton_Part_1.Rmd index 5127161..4fbc490 100644 --- a/working-on-wynton-hpc/Working_on_Wynton_Part_1.Rmd +++ b/working-on-wynton-hpc/Working_on_Wynton_Part_1.Rmd @@ -16,7 +16,7 @@ output: --- ```{r, setup, include=FALSE} -library(tidyverse) + ``` ## @@ -28,7 +28,7 @@ library(tidyverse) **Natalie Elphick** Bioinformatician I -**Alex Pico (TA)** +**Alex Pico** Bioinformatics Core Director @@ -61,6 +61,7 @@ Bioinformatics Core Director ## Wynton {.small-bullets} - A HPC Linux environment available to all UCSF researchers for free +- Uses the Rocky 8 linux OS - Includes several hundred compute nodes and a large shared storage system ([Cluster specifications](https://wynton.ucsf.edu/hpc/about/specs.html)) - Funded and administered cooperatively by UCSF campus IT and key research groups @@ -136,11 +137,36 @@ echo "{local}$ scp local_file.tsv alice@dt1.wynton.ucsf.edu:~/" dt1 and dt2 + +## Compute Nodes {.small-bullets .big-picture} + +- Can **not** be logged in to directly +- Used to run non-interactive compute job scripts +- The software to run the job script is provided using a container + + +![Compute Jobs](slide_materials/compute_job_workflow.svg) + + + # Storage + +## The File System {.small-bullets} + +- A file system how information is stored and retrieved on a computer + - Consists of files and directories +- A local file system is function of the operating system and only accessible from a single computer +- A shared file system is accessible from multiple computers + + + + + + ## BeeGFS {.small-bullets} -- Wynton uses a *parallel* file system called BeeGFS +- Wynton uses a *parallel* shared file system called BeeGFS - The files are stored as "chunks" spread across many different servers - BeeGFS has multiple services that work together to manage the file system - Storage (stores the chunks) @@ -162,12 +188,11 @@ dt1 and dt2 ## BeeGFS - I/O patterns {.small-bullets} - Anything that requires lots of metadata operations can feel slow - e.g: lots of writes to the same directory and lots of file lookups and directory searches (**conda**) -- Users are strongly encouraged to keep the number of reads and writes to a single directory to a reasonable number -- If using conda, putting the conda application inside a Apptainer (formerly singularity) container will result in better overall file system performance +- Keep the number of reads and writes to a single directory to a reasonable number +- If using conda, putting the conda application inside a Apptainer (formerly singularity) container will result in better performance ## BeeGFS - Tips -- Some general guidelines for optimum use of BeeGFS - Prefer fewer, large files over many small ones - Distribute reading and writing over several directories - Including compute job output and error files @@ -184,6 +209,7 @@ dt1 and dt2 - User home directory - limited to 500 GiB - /wynton/**[group_name]** - User group directory - disk quota varies by group + - Use this directory for any analysis you want to share with your lab - [More information on disk quotas](https://wynton.ucsf.edu/hpc/howto/storage-size.html#file-sizes-and-disk-quotas) To check your group disk quota run: @@ -211,13 +237,22 @@ echo 'beegfs-ctl --getquota --storagepoolid=12 --gid "$(id --group)"' - Gladstone's HIVE storage server is mounted directly to Wynton under **/gladstone** - Only certain HIVE folders are accessible directly on Wynton - Files under **/gladstone** are backed up -- Naming: **/gladstone/[lab]/[share]** +- Naming: **/gladstone/[lab]** + - Directories that are shared between multiple labs can be set up by contacting Gladstone IT - For more information visit the [IT knowledge base page](https://help.gladstone.org/support/solutions/articles/14000033963) +## Storage Advice + +- Always back up anything you store under **/wynton** +- Use **/gladstone** if you have access to it for all of your work since it is automatically backed up +- Use the scratch directories to store temporary files so they do not count against your group or user quotas + # Data Transfer + + ## Secure Copy - scp - Local file to Wynton @@ -252,7 +287,7 @@ echo "{local}$ scp alice@dt1.wynton.ucsf.edu:/path/to/local_file.tsv /destinatio ## Globus -- [Globus](https://wynton.ucsf.edu/hpc/transfers/globus.html) is a non-profit service for moving, syncing, and sharing large amounts of data asynchronously in the background +- [Globus](https://wynton.ucsf.edu/hpc/transfers/globus.html) is a service for moving, syncing, and sharing large amounts of data - Wynton Accounts are not required to transfer data with Globus - Useful for transferring data between institutions @@ -264,12 +299,33 @@ echo "{local}$ scp alice@dt1.wynton.ucsf.edu:/path/to/local_file.tsv /destinatio - Do not use rclone for transfers to Box, follow the [Wynton to UCSF Box](https://wynton.ucsf.edu/hpc/transfers/ucsf-box.html) instructions + + +## Poll 1 + +Which of these can you **not** log in to from your computer? + +1. Login Nodes +2. Development Nodes +3. Data transfer Nodes +4. Compute Nodes + +## Poll 2 + +The **/wynton** directory is backed up on a nightly basis so do not need to back up the data you store here. + +1. True +2. False + + + + # Installing Software ## Basics -- Ensure the software you are trying to install is compatible with Rocky linux (use a container if not) -- Check if the tool is already available in a [module](https://wynton.ucsf.edu/hpc/software/software-modules.html) +- Check if the tool is already available in a [module](https://wynton.ucsf.edu/hpc/software/software-repositories.html#software-repositories) +- Ensure the software you are trying to install is compatible with Rocky 8 linux (use a container if not) - Always install software in a development node - Download a precompiled binary or [install from source](https://wynton.ucsf.edu/hpc/howto/install-from-source.html) @@ -297,10 +353,12 @@ echo '[alice@dev1 ~]$ make' echo '[alice@dev1 ~]$ make install' ``` -## Install Nextflow for Part 2 +## Install Nextflow + +- Scientific workflow system with a community maintained set of [core bioinformatics analysis](https://nf-co.re/) pipelines + - We will cover an example RNA-seq pipeline in part 2 +- These can be configured to use the Wynton compute job submission system -- In part 2, we will run the nextflow rna-seq pipeline -- Run the following to install nextflow: ```{r, engine='bash', eval=TRUE, results='markup',comment=NA, highlight=TRUE, echo=FALSE} echo '[alice@dev1 ~]$ cd ~/software' @@ -312,16 +370,25 @@ echo '[alice@dev1 ~]$ wget -qO- https://get.nextflow.io | bash' ``` -- Let us know if you run into any errors + # Containers + +## Motivation {.small-bullets} + +- Compute heavy jobs (high RAM, multiple cores) should be run on compute nodes +- Containers allow us to make additional software available to the compute nodes + - Also allows the use of software that might be hard to install on Rocky 8 Linux + - Improves reproducibility + +![Compute Jobs](slide_materials/compute_job_workflow.svg) + + ## Definitions {.small-bullets} - **Virtualization:** When software mimics the functions of physical hardware to run virtual machines - - Work around to use OS specific or legacy software that might be hard to install - - Improves reproducibility - **Containers:** Implements virtualization using an *image* as its base - **Images:** An ordered collection of root filesystem changes and the corresponding execution parameters for use within a container runtime @@ -329,7 +396,7 @@ echo '[alice@dev1 ~]$ wget -qO- https://get.nextflow.io | bash' ## Apptainer {.small-bullets} - Wynton supports [Apptainer](https://wynton.ucsf.edu/hpc/software/apptainer.html) (formerly singularity) containers -- [Docker](https://docs.docker.com/) is a commonly used container creation software, these can be turned into apptainer containers easily +- [Docker](https://docs.docker.com/) is a commonly used image creation software, these can be turned into apptainer image files (.sif) easily - apptainer run - Run predefined script within container @@ -362,7 +429,7 @@ echo ' __ __ ____ _ __ __ __ __ ``` -## Example Container - Hello World +## Example Container - This container has **figlet** installed which creates ASCII art from text input - Try running this command to create your own using *exec* @@ -376,7 +443,8 @@ echo '[alice@dev1 ~]$ apptainer exec hello-world_1.0.sif figlet your_text' - Docker uses Dockerfiles to specify image creation - Preferred by the Gladstone Bioinformatics Core to create new images - In part 2, we will go over how to build custom container images from DockerFiles - - If you want to follow along, [install the docker engine](https://docs.docker.com/engine/install/) following the instructions for your OS + - If you want to follow along, [install the docker engine](https://docs.docker.com/engine/install/) following the instructions for your OS + - Set up a free [DockerHub](https://hub.docker.com/) account to store your images - To see the Dockerfile used to create the hello-world image, run: ```{r, engine='bash', eval=TRUE, results='markup',comment=NA, highlight=TRUE, echo=FALSE} @@ -405,8 +473,6 @@ April 29-April 30, 2024 9am-4pm PDT May 6-May 7, 2024 1-4pm PDT - -[Complete Schedule](https://gladstone.org/events) -Click "Data Science Training Program" +[Complete Schedule](https://gladstone.org/events?series=189) diff --git a/working-on-wynton-hpc/Working_on_Wynton_Part_2.Rmd b/working-on-wynton-hpc/Working_on_Wynton_Part_2.Rmd new file mode 100644 index 0000000..341da38 --- /dev/null +++ b/working-on-wynton-hpc/Working_on_Wynton_Part_2.Rmd @@ -0,0 +1,449 @@ +--- +title: "Working on Wynton - Part 2" +author: "Natalie Elphick" +date: "April 16th, 2024" +knit: (function(input, ...) { + rmarkdown::render( + input, + output_dir = "../docs" + ) + }) +output: + revealjs::revealjs_presentation: + theme: simple + highlight: default + css: style.css +--- + +```{r, setup, include=FALSE} + +``` + +## + +
    *Press the ? key for tips on navigating these slides*
    + +## Introductions + +**Natalie Elphick** +Bioinformatician I + +**Alex Pico** +Bioinformatics Core Director + + +## Target Audience +- Prior experience with UNIX command-line + + + +## Part 2: + +1. Custom Containers +2. Submitting Compute Jobs +3. Array Jobs +4. GPU Jobs +5. Running Pipelines +6. Jupyter Notebooks +7. RStudio Server +8. How to get help + + + + +# Custom Containers + +## Motivation {.small-bullets} + +- Compute heavy jobs (high RAM, multiple cores) should be run on compute nodes +- Containers allow us to make additional software available to the compute nodes + - Also allows the use of software that might be hard to install on Rocky 8 Linux + - Improves reproducibility + +![Compute Jobs](slide_materials/compute_job_workflow.svg) + + + + +## Dockerfile Basics + +- Dockerfiles contain instructions to build an image in **layers** +- Layers are added using Dockerfile instruction syntax +- Images are built by navigating to the directory that contains the Dockerfile and running: + +```{r, engine='bash', eval=TRUE, results='markup',comment=NA, highlight=TRUE, echo=FALSE} +echo 'docker build .' +``` + +## Dockerfile Instructions {.small-bullets} +- First instruction is always **FROM** which specifies the base image + - Base images are a starting point with some basics already installed like the OS and build tools, find them on [DockerHub](https://hub.docker.com/) +- **RUN** : Use before running any shell commands +- **SHELL** : Set the shell +- **USER** : Set the user (within the image) +- **CMD** : Set the default instruction to be run by the image +- **COPY** : COPY files into the image + + +See the [Dockerfile documentation](https://docs.docker.com/reference/dockerfile/) for a full list of instructions + +## Example Dockerfile {.code-alt} + +- Click [here](https://www.dropbox.com/scl/fi/mdbefp3h8ahdvxtgjypqo/Dockerfile?rlkey=7d4zd9ge1m3wwszlfy78712ky&dl=1) to download the example Dockerfile +- Open in your preffered text editor + + +```{r, engine='bash', eval=TRUE, results='markup',comment=NA, highlight=TRUE, echo=FALSE} +curl -s -L -o Dockerfile 'https://www.dropbox.com/scl/fi/mdbefp3h8ahdvxtgjypqo/Dockerfile?rlkey=7d4zd9ge1m3wwszlfy78712ky&dl=0' +cat Dockerfile +rm Dockerfile +``` + +## Building Example Image + +- Do not run this during the workshop + - It requires a lot of RAM +- On macOS, make sure you have the Docker Desktop App running +- We can provide an additional argument to the **build** command, -t, to set the name of the docker image + - We can add version tags after the name using ":" +```{r, engine='bash', eval=TRUE, results='markup',comment=NA, highlight=TRUE, echo=FALSE} +echo "docker build -t docker_hub_user/seurat-harmony:1.0 ." +``` + + +## Pushing Images to DockerHub {.small-bullets} + +- Make sure you are signed in to your DockerHub account locally (Docker Desktop for macOS) +- The image name must start with your user name + +```{r, engine='bash', eval=TRUE, results='markup',comment=NA, highlight=TRUE, echo=FALSE} +echo "docker push docker_hub_user/seurat-harmony:1.0" +``` + +- These can then be "pulled" on to Wynton as apptainer image files (image must be public) +```{r, engine='bash', eval=TRUE, results='markup',comment=NA, highlight=TRUE, echo=FALSE} +echo "[alice@dev1 ~]$ apptainer pull docker://docker_hub_user/seurat-harmony:1.0" +``` + +## Notes on Building Custom Images {.code-small} + +- Time consuming process and uses a lot of RAM on your local machine +- A good base image can save you a lot of time +- You must run `apt-get update` and `apt-get install` in the same command + - Otherwise you will encounter caching issues +- Remember to use `apt-get install -y` + - You will have no control over the process while it's building + +# Compute Jobs + + +## Submission Script - Basics {.small-bullets .code-alt} + +- [Download](https://www.dropbox.com/scl/fi/fzp33y1ojslw005q8epuz/simple_submission_script.sh?rlkey=xmg3lqec962y3i57a1bkriosx&dl=1) this example job submission script +- Read the full Wynton [job submission guide](https://wynton.ucsf.edu/hpc/scheduler/submit-jobs.html) +- Wynton uses the [Sun Grid Engine](https://web.archive.org/web/20210826212738/https://arc.liv.ac.uk/SGE/howto/howto.html) job scheduler + + + +```{r, engine='bash', eval=TRUE, results='markup',comment=NA, highlight=TRUE, echo=FALSE} +curl -s -L -o submission.sh 'https://www.dropbox.com/scl/fi/fzp33y1ojslw005q8epuz/simple_submission_script.sh?rlkey=xmg3lqec962y3i57a1bkriosx&dl=0' +cat submission.sh +rm submission.sh +``` + +## Submission Script - Apptainer {.small-bullets .code-alt} + +- [Download](https://www.dropbox.com/scl/fi/zzl9fnfcoxu3pyrx5ffd1/apptainer_submission_script.sh?rlkey=w05e18ahw4hvbvaucac379za9&dl=1) this example job submission script that uses a container +- Paths that the container needs read/write access to need to be mounted with APPTAINER_BINDPATH + +```{r, engine='bash', eval=TRUE, results='markup',comment=NA, highlight=TRUE, echo=FALSE} +curl -s -L -o submission.sh 'https://www.dropbox.com/scl/fi/zzl9fnfcoxu3pyrx5ffd1/apptainer_submission_script.sh?rlkey=w05e18ahw4hvbvaucac379za9&dl=1' +cat submission.sh +rm submission.sh +``` + +## Parallel Processing Jobs {.small-bullets} + +- By default jobs run on a single core +- Multicore jobs must run in a SGE parallel environment (PE) and tell SGE how many cores the job will use +- **Do not use more cores than requested** + + +- There are four parallel environments on Wynton: + - **smp**: for single-host parallel jobs using [Symmetric multiprocessing](https://en.wikipedia.org/wiki/Symmetric_multiprocessing) (SMP) + - **mpi**: for multiple-host parallel jobs based on [MPI parallelization](https://en.wikipedia.org/wiki/Message_Passing_Interface) + - **mpi_onehost**: for single-host parallel jobs based on MPI parallelization + - **mpi-8**: for multi-threaded multi-host jobs based on MPI parallelization + +## Example Parallel Job {.small-bullets .code-alt} + +- The simplest parallel environment on Wynton is **smp**, a single node with *n* cores +- [Download](https://www.dropbox.com/scl/fi/71xo0cioh266pj3uwcdps/smp_submission_script.sh?rlkey=kw7qaz8pip6jveqv317b5swqr&dl=1) this example smp job submission script +```{r, engine='bash', eval=TRUE, results='markup',comment=NA, highlight=TRUE, echo=FALSE} +curl -s -L -o submission.sh 'https://www.dropbox.com/scl/fi/71xo0cioh266pj3uwcdps/smp_submission_script.sh?rlkey=kw7qaz8pip6jveqv317b5swqr&dl=0' +cat submission.sh +rm submission.sh +``` + + +## Array Jobs {.small-bullets .code-alt} + +- This is a good option if the script you want to run operates on discrete sets of data + - e.g. sample or chromosome +- [Download](https://www.dropbox.com/scl/fi/upl71jeny62fxfzkxao1f/array_job_submission_script.sh?rlkey=ggkyjxx8nz400e1t96mif5t34&dl=1) this example array job submission script + +```{r, engine='bash', eval=TRUE, results='markup',comment=NA, highlight=TRUE, echo=FALSE} +curl -s -L -o submission.sh 'https://www.dropbox.com/scl/fi/upl71jeny62fxfzkxao1f/array_job_submission_script.sh?rlkey=ggkyjxx8nz400e1t96mif5t34&dl=0' +cat submission.sh +rm submission.sh +``` + +## GPU Jobs {.small-bullets} + +- To run a [GPU job](https://wynton.ucsf.edu/hpc/scheduler/gpu.html), specify **-q gpu.q** (queue) as a GPU queue + - Other GPU queues may be available to you depending on your lab +- It is important to specify the GPU using the **SGE_GPU** variable so that your job uses its assigned GPU + - For CUDA based tools, add **export CUDA_VISIBLE_DEVICES=$SGE_GPU** to your submission script +- GPU jobs must include a runtime request or they will be removed from the queue + + +## Submitting and Querying jobs + +- Use **qsub** to submit jobs +```{r, engine='bash', eval=TRUE, results='markup',comment=NA, highlight=TRUE, echo=FALSE} +echo '[alice@dev1 ~]$ qsub job1.sh +Your job 714888 ("job1.sh") has been submitted' +``` + + + + +- Use **qstat** to check the status of your jobs + +```{r, engine='bash', eval=TRUE, results='markup',comment=NA, highlight=TRUE, echo=FALSE} +echo '[alice@dev1 ~]$ qstat +job-ID prior name user state submit/start at queue slots ja-task-ID +----------------------------------------------------------------------------------------------------------------- + 714888 0.06532 job1 alice r 03/25/2024 19:54:18 member.q@msg-hmio1 1 + 714889 0.06532 job2 alice r 03/25/2024 19:54:19 member.q@msg-hmio1 1 +' +``` + + +Read the [querying jobs](https://wynton.ucsf.edu/hpc/scheduler/list-jobs.html) Wynton documentation for more information. + + +## Estimating Job Resources + +- Try to estimate the amount of RAM needed using a small test dataset +- Request a little more RAM than you need to avoid having your job cancelled +- Check on jobs you are running for the first time with **qstat -j ** to make sure they are not going over + + +## Poll 3 + +Anything that you can run on a compute node can be run on a development node. + +1. True +2. False + + + +# Running Pipelines + + +## Nextflow RNA-seq {.small-bullets .big-picture} + +- Scientific workflow system with a community maintained set of core bioinformatics [analysis pipelines](https://nf-co.re/) +- The most commonly used one is the [RNA-seq pipeline](https://nf-co.re/rnaseq/3.14.0) + +![RNA-seq](slide_materials/nf-core-rnaseq_metro_map_grey.png) + + +## Example - RNA-seq Pipeline {.small-bullets} + +**Do not run this during the workshop as it will fill up the Wynton SGE queue** + +- Download the [testing script](https://www.dropbox.com/scl/fi/3c9qdmnwg8vw9x517mo05/nextflow_example.sh?rlkey=e9nxbvpcdtdyi5w0y16z9k7bq&dl=0) + - Runs a minimal test on the RNA-seq pipeline +- Download the [config file](https://www.dropbox.com/scl/fi/befhl3z6nipn1fqcxpqth/nextflow.config?rlkey=pd8d9vup6pnvb7bbrmekayn2j&dl=0) + - Configures nextflow to use the SGE job scheduler and sets limits on compute job resources for each process +- Put these in the same directory (do not use your user home directory for this) and run the script in a screen/tmux session +- When not running the test, the **-profile** should be apptainer + + + + + + + +# Jupyter Notebooks + +## Installing Jupyter Notebooks +- The preferred way to install and use [Jupyter notebooks](https://wynton.ucsf.edu/hpc/howto/jupyter.html) on Wynton is though pip, not conda +```{r, engine='bash', eval=TRUE, results='markup',comment=NA, highlight=TRUE, echo=FALSE} +echo 'python3 -m pip install --user notebook' +``` +- Jupyter notebooks can only be run on development nodes +- See the Wynton [python documentation](https://wynton.ucsf.edu/hpc/howto/python.html) for more info on managing python environments on Wynton + + +## Running Jupyter Notebooks - Step 1 + +- You cannot connect from outside Wynton HPC directly to a development node + - Instead we need to use SSH port forwarding to establish the connection with a local web browser +- Find an available TCP port: +```{r, engine='bash', eval=TRUE, results='markup',comment=NA, highlight=TRUE, echo=FALSE} +echo '[alice@dev1 ~]$ module load CBI port4me +[alice@dev1 ~]$ port4me --tool=jupyter +47467' +``` + +Note the port number returned by port4me, you will need this later. + + +## Running Jupyter Notebooks - Step 2 {.code-small} +- Launch Jupyter notebook using the port numer from step 1 +```{r, engine='bash', eval=TRUE, results='markup',comment=NA, highlight=TRUE, echo=FALSE} +echo '[alice@dev1]$ jupyter notebook --no-browser --port 47467 +[I 2024-03-20 14:48:45.693 ServerApp] jupyter_lsp | extension was successfully linked. +[I 2024-03-20 14:48:45.698 ServerApp] jupyter_server_terminals | extension was successfully linked. +[I 2024-03-20 14:48:45.703 ServerApp] jupyterlab | extension was successfully linked. +[I 2024-03-20 14:48:45.708 ServerApp] notebook | extension was successfully linked. +[I 2024-03-20 14:48:46.577 ServerApp] notebook_shim | extension was successfully linked. +[I 2024-03-20 14:48:46.666 ServerApp] notebook_shim | extension was successfully loaded. +[I 2024-03-20 14:48:46.668 ServerApp] jupyter_lsp | extension was successfully loaded. +[I 2024-03-20 14:48:46.669 ServerApp] jupyter_server_terminals | extension was successfully loaded. +[I 2024-03-20 14:48:46.675 LabApp] JupyterLab extension loaded from /wynton/home/boblab/alice/.local/lib/python3.11/site-packages/jupyterlab +[I 2024-03-20 14:48:46.675 LabApp] JupyterLab application directory is /wynton/home/boblab/alice/.local/share/jupyter/lab +[I 2024-03-20 14:48:46.677 LabApp] Extension Manager is 'pypi'. +[I 2024-03-20 14:48:46.707 ServerApp] jupyterlab | extension was successfully loaded. +[I 2024-03-20 14:48:46.711 ServerApp] notebook | extension was successfully loaded. +[I 2024-03-20 14:48:46.712 ServerApp] Serving notebooks from local directory: /wynton/home/boblab/alice +[I 2024-03-20 14:48:46.712 ServerApp] Jupyter Server 2.13.0 is running at: +[I 2024-03-20 14:48:46.712 ServerApp] http://localhost:44214/tree?token=8e37f8d62fca6a1c9b2da429f27df5ebcec706a808c3a8f2 +[I 2024-03-20 14:48:46.712 ServerApp] http://127.0.0.1:44214/tree?token=8e37f8d62fca6a1c9b2da429f27df5ebcec706a808c3a8f2 +[I 2024-03-20 14:48:46.712 ServerApp] Use Control-C to stop this server and shut down all kernels (twice to skip confirmation). +[C 2024-03-20 14:48:46.725 ServerApp] + + To access the server, open this file in a browser: + file:///wynton/home/boblab/alice/.local/share/jupyter/runtime/jpserver-2853162-open.html + Or copy and paste one of these URLs: + http://localhost:44214/tree?token=8e37f8d62fca6a1c9b2da429f27df5ebcec706a808c3a8f2 + http://127.0.0.1:44214/tree?token=8e37f8d62fca6a1c9b2da429f27df5ebcec706a808c3a8f2' +``` + +## Running Jupyter Notebooks - Step 3 + +- Set up SSH port forwarding on your local machine in a separate terminal, leave both terminals open +```{r, engine='bash', eval=TRUE, results='markup',comment=NA, highlight=TRUE, echo=FALSE} +echo '{local}$ ssh -J alice@log1.wynton.ucsf.edu -L 47467:localhost:47467 alice@dev1 +... +[alice@dev1 ~]$ ' +``` + +The notebook should now be available at the URL from step 2 + + +# RStudio Server + +## RStudio Server +- [RStudio server](https://wynton.ucsf.edu/hpc/howto/rstudio.html) is already available in the CBI module +- This allows you to set up a personal RStudio instance that only you can access +- Requires two separate SSH connections to the cluster:\ + - One to launch RStudio Server + - One to connect to it + + +## RStudio Server - Step 1 {.code-small} + +- Launch your own RStudio Server instance +```{r, engine='bash', eval=TRUE, results='markup',comment=NA, highlight=TRUE, echo=FALSE} +echo '[alice@dev1 ~]$ module load CBI rstudio-server-controller +[alice@dev1 ~]$ rsc start +alice, your personal RStudio Server 2023.09.1-494 running R 4.3.2 is available on: + + + +Importantly, if you are running from a remote machine without direct access +to dev1, you need to set up SSH port forwarding first, which you can do by +running: + + ssh -L 20612:dev1:20612 alice@log1.wynton.ucsf.edu + +in a second terminal from your local computer. + +Any R session started times out after being idle for 120 minutes. +WARNING: You now have 10 minutes, until 2023-11-15 17:06:50-08:00, to +connect and log in to the RStudio Server before everything times out. +Your one-time random password for RStudio Server is: y+IWo7rfl7Z7MRCPI3Z4' +``` + +Note the password and URL, they will be needed to log in to the server instance. + + +## RStudio Server - Step 2 + +- Connect to your personal RStudio Server instance from your local machine in a separate terminal +```{r, engine='bash', eval=TRUE, results='markup',comment=NA, highlight=TRUE, echo=FALSE} +echo '{local}$ ssh -L 20612:dev1:20612 alice@log1.wynton.ucsf.edu +alice1@log1.wynton.ucsf.edu:s password: XXXXXXXXXXXXXXXXXXX +[alice@log1 ~]$ ' +``` + +## RStudio Server - Step 3 +- Open RStudio Server in your local web browser +- Open the link from step 1 +- Enter your Wynton user name +- Enter the password from step 1 + +# How to Get Help + +## Wynton Questions + +- Follow the Wynton [question checklist](https://wynton.ucsf.edu/hpc/support/index.html) +- Email + - [support@wynton.ucsf.edu](mailto:support@wynton.ucsf.edu) +- Slack + - [ucsf-wynton](https://join.slack.com/t/ucsf-wynton/signup) + - Sign-up using a UCSF email address + - Email support if that does not work +- Zoom office hours every **Tuesday at 11-12pm** + - Zoom URL in the message-of-the-day (MOTD) that you get when you log into Wynton + + + +## Bioinformatics Questions + +- Email + - [bioinformatics@gladstone.ucsf.edu](mailto:bioinformatics@gladstone.ucsf.edu) +- Slack channel #questions-about-bioinformatics + - Contact us at the email above to be added to the channel + + + +# End of Part 2 + +## Thank You! + +- Please take some time to fill out the workshop survey: +[https://www.surveymonkey.com/r/F75J6VZ](https://www.surveymonkey.com/r/F75J6VZ) + + +## Upcoming Data Science Training Program Workshops + + +[Introduction to Linear Mixed Effects Models](https://gladstone.org/events/introduction-linear-mixed-effects-models) +April 25-April 26, 2024 1-3pm PDT + +[Single Cell RNA-Seq Data Analysis](https://gladstone.org/events/single-cell-rna-seq-data-analysis) +April 29-April 30, 2024 9am-4pm PDT + +[Single Cell ATAC-Seq Data Analysis Part 1](https://gladstone.org/events/single-cell-atac-seq-data-analysis-part-1-1) +May 6-May 7, 2024 1-4pm PDT + + +[Complete Schedule](https://gladstone.org/events?series=189) + + + diff --git a/working-on-wynton-hpc/slide_materials/compute_job_workflow.svg b/working-on-wynton-hpc/slide_materials/compute_job_workflow.svg new file mode 100644 index 0000000..27614ec --- /dev/null +++ b/working-on-wynton-hpc/slide_materials/compute_job_workflow.svg @@ -0,0 +1,3 @@ + + +
    User
    Compute
    Node
    Submit
    Job
    Query
    Job Status
    User
    Job Script
    Container
    \ No newline at end of file diff --git a/working-on-wynton-hpc/slide_materials/nf-core-rnaseq_metro_map_grey.png b/working-on-wynton-hpc/slide_materials/nf-core-rnaseq_metro_map_grey.png new file mode 100644 index 0000000..0dbf23f Binary files /dev/null and b/working-on-wynton-hpc/slide_materials/nf-core-rnaseq_metro_map_grey.png differ diff --git a/working-on-wynton-hpc/style.css b/working-on-wynton-hpc/style.css index ae62ab5..3963e02 100644 --- a/working-on-wynton-hpc/style.css +++ b/working-on-wynton-hpc/style.css @@ -12,17 +12,21 @@ .reveal code { background-color: #1e1e1eef; /* Dark background for code chunks */ - color: white !important; /* White text for code */ + color: white; /* White text for code */ font-size: 1.2em; line-height: 1.2; } -.reveal code::selection { - background-color: #d97306 !important; /* Dark magenta background for selected text */ +.code-small code { + background-color: #1e1e1eef; /* Dark background for code chunks */ + color: white; /* White text for code */ + font-size: 1em; + line-height: 1; } - - +.reveal code::selection { + background-color: #d97306 !important; /* Dark orange background for selected text */ +} /* Specific styles for code output: background */ @@ -30,11 +34,17 @@ background-color: black; /* Black background for code outputs */ } -/* Style for text selection within code outputs */ -.reveal pre code.output::selection { - background-color: #9c0366 !important; /* Dark magenta background for selected text in outputs */ +/* Custom class for code alt display */ +.code-alt code { + background-color: #ffecd0ac; /* Dark background for code outputs */ + max-height: 400px !important; + font-family: 'Menlo', sans-serif; + font-size: 0.8em; + color: rgb(76, 76, 76) } + + /* Code output text color */ .reveal pre code.output { color: white; @@ -63,32 +73,34 @@ pre, code, kbd, samp { white-space: pre !important; overflow-x: auto !important; } + +/* Change the font family used for all text except code */ +.reveal p, .reveal li, .reveal h1, .reveal h2, .reveal h3, .reveal h4, .reveal h5, .reveal h6 { + font-family: "Helvetica", sans-serif; +} + +.reveal h3 { + color: black; + font-size: 0.7em; +} /* Bold slide titles and change color */ .reveal h2 { font-weight: bold !important; color: #9c0366; + font-size: 1.3em; } /* Bold slide titles and change color */ .reveal h1 { font-weight: bold !important; color: #9c0366; + font-size: 2.0em; } + .reveal .slides>section:first-child h2 { - color: #333; + color: black; font-weight: normal !important; - } -/* Custom slide title */ -.my-title-slide h1 { - font-weight: bold; - color: #9c0366; - } -.my-title-slide h2 { -color: #333; -font-weight: normal !important; } - - .reveal .slides>section:first-child h1 { font-weight: bold !important; color: #9c0366; @@ -107,12 +119,13 @@ font-weight: normal !important; .reveal ul ul { font-size: 0.75em; /* Smaller font size */ - margin-top: 5px !important; margin-bottom: 5px !important; } .reveal ol { display: block; - margin-bottom: 20px !important; + margin-bottom: 20px; + margin-left: 75px; + margin-right: 50px } @@ -147,16 +160,16 @@ small { } .big-picture img{ - max-width: 70%; - border: 1px solid black !important; + max-width: 95%; + } -/* Chage link color to purple */ +/* Chage link color to sky blue */ .reveal a { - color: #0c74dc !important; + color: #0c74dc; } -/* Change link color to purple on hover */ +/* Change link color to magenta on hover */ .reveal a:hover { color: #9c0366 !important; }