From 5055ca815fad4d8a13601f1e8f8ee1da8012cc3e Mon Sep 17 00:00:00 2001 From: Barbara Novak <19824106+bnovak32@users.noreply.github.com> Date: Mon, 9 Feb 2026 15:28:33 -0800 Subject: [PATCH] update assay suffix locations --- .../Low_Biomass/Illumina/GL-DPPD-7117.md | 88 +++++++++--------- .../Low_Biomass/Nanopore/GL-DPPD-7116.md | 90 +++++++++---------- 2 files changed, 89 insertions(+), 89 deletions(-) diff --git a/Metagenomics/Low_Biomass/Illumina/GL-DPPD-7117.md b/Metagenomics/Low_Biomass/Illumina/GL-DPPD-7117.md index 0b2f6b94..2731ed88 100644 --- a/Metagenomics/Low_Biomass/Illumina/GL-DPPD-7117.md +++ b/Metagenomics/Low_Biomass/Illumina/GL-DPPD-7117.md @@ -285,11 +285,11 @@ kraken2 --db kraken2_human_db \ sample1_R1_raw.fastq.gz sample1_R2_raw.fastq.gz # rename and gzip output files -mv sample1_R_1.fastq sample1_R1_HRrm_GLlbsMetag.fastq && \ -gzip sample1_R1_HRrm_GLlbsMetag.fastq +mv sample1_R_1.fastq sample1_GLlbsMetag_R1_HRrm.fastq && \ +gzip sample1_GLlbsMetag_R1_HRrm.fastq -mv sample1_R_2.fastq sample1_R2_HRrm_GLlbsMetag.fastq && \ -gzip sample1_R2_HRrm_GLlbsMetag.fastq +mv sample1_R_2.fastq sample1_GLlbsMetag_R2_HRrm.fastq && \ +gzip sample1_GLlbsMetag_R2_HRrm.fastq ``` **Parameter Definitions:** @@ -312,7 +312,7 @@ gzip sample1_R2_HRrm_GLlbsMetag.fastq - sample1-kraken2-output.txt (kraken2 read-based output file (one line per read)) - sample1-kraken2-report.tsv (kraken2 report output file (one line per taxa, with number of reads assigned to it)) -- **sample1_raw_HRrm_GLlbsMetag.fastq.gz** (raw sample reads with human reads removed, gzipped fasta file) +- **sample1_GLlbsMetag_[R1|R2]_HRrm.fastq.gz** (raw sample reads with human reads removed, gzipped fasta file) #### 2c. Compile Human Read Removal QC @@ -377,7 +377,7 @@ fastp --in1 sample1_R1_raw.fastq.gz --out1 temp_sample1_R1_filtered.fastq.gz \ **Input Data:** -- *raw_HRrm_GLlbsMetag.fastq.gz (raw sample reads with human reads removed, from [Step 2b](#2b-remove-human-reads)) +- *_GLlbsMetag_HRrm.fastq.gz (raw sample reads with human reads removed, from [Step 2b](#2b-remove-human-reads)) **Output Data:** @@ -386,8 +386,8 @@ fastp --in1 sample1_R1_raw.fastq.gz --out1 temp_sample1_R1_filtered.fastq.gz \ #### 3b. Trim polyG ```bash -fastp --in1 temp_sample1_R1_filtered.fastq.gz --out1 sample1_R1_filtered_GLlbsMetag.fastq.gz \ - --in2 temp_sample1_R2_filtered.fastq.gz --out2 sample1_R2_filtered_GLlbsMetag.fastq.gz \ +fastp --in1 temp_sample1_R1_filtered.fastq.gz --out1 sample1_GLlbsMetag_R1_filtered.fastq.gz \ + --in2 temp_sample1_R2_filtered.fastq.gz --out2 sample1_GLlbsMetag_R2_filtered.fastq.gz \ --qualified_quality_phred 20 \ --length_required 50 \ --thread 2 \ @@ -418,7 +418,7 @@ fastp --in1 temp_sample1_R1_filtered.fastq.gz --out1 sample1_R1_filtered_GLlbsMe **Output Data:** -- **\*filtered_GLlbsMetag.fastq.gz** (quality filtered and adapter trimmed, human removed reads) +- **\*_(filtered.fastq.gz** (quality filtered and adapter trimmed, human removed reads) #### 3c. Filtered Data QC @@ -429,11 +429,11 @@ fastqc -o filtered_fastqc_output *filtered.fastq.gz **Parameter Definitions:** - `-o` – the output directory to store results -- `*filtered_GLlbsMetag.fastq.gz` – the input reads are specified as a positional argument, and can be given all at once with wildcards like this, or as individual arguments with spaces in between them +- `*_GLlbsMetag_R[12]_filtered.fastq.gz` – the input reads are specified as a positional argument, and can be given all at once with wildcards like this, or as individual arguments with spaces in between them **Input data:** -- *filtered_GLlbsMetag.fastq.gz (trimmed and filtered reads, from [Step 3b](#3b-trim-polyg)) +- *filtered.fastq.gz (trimmed and filtered reads, from [Step 3b](#3b-trim-polyg)) **Output data:** @@ -479,8 +479,8 @@ multiqc --zip-data-dir \ #### 4a. Assemble Contaminants ```bash -cat /path/to/contaminant_fastq/*_R1_filtered_GLlbsMetag.fastq.gz > merged_R1.fastq.gz -cat /path/to/contaminant_fastq/*_R2_filtered_GLlbsMetag.fastq.gz > merged_R2.fastq.gz +cat /path/to/contaminant_fastq/*_GLlbsMetag_R1_filtered.fastq.gz > merged_R1.fastq.gz +cat /path/to/contaminant_fastq/*_GLlbsMetag_R2_filtered.fastq.gz > merged_R2.fastq.gz spades.py --meta \ --threads 8 \ @@ -503,7 +503,7 @@ mv spades.log blank-assembly.log **Input Data** -- *_R[12]_filtered_GLlbsMetag.fastq.gz (one or more paired-end, trimmed and filtered, HRrm reads from blank (negative control) samples, output from [Step 3b](#3b-trim-polyg)) +- *_GLlbsMetag_R[12]_filtered.fastq.gz (one or more paired-end, trimmed and filtered, HRrm reads from blank (negative control) samples, output from [Step 3b](#3b-trim-polyg)) **Output Data** @@ -522,14 +522,14 @@ bowtie2-build /path/to/contaminant_assembly/blank-scaffolds.fasta /path/to/blank bowtie2 -p NumberOfThreads \ -x /path/to/blank-index/blanks \ --very-sensitive-local \ - -1 sample1_R1_filtered_GLlbsMetag.fastq.gz \ - -2 sample2_R2_filtered_GLlbsMetag.fastq.gz \ + -1 sample1_GLlbsMetag_R1_filtered.fastq.gz \ + -2 sample2_GLlbsMetag_R2_filtered.fastq.gz \ --un-conc-gz sample1_decontam.fastq.gz > sample1.sam 2> sample1-mapping-info.txt # rename blank removed fastq files -mv sample1_decontam.fastq.1.gz sample1_R1_decontam_GLlbsMetag.fastq.gz -mv sample1_decontam.fastq.2.gz sample1_R2_decontam_GLlbsMetag.fastq.gz +mv sample1_decontam.fastq.1.gz sample1_GLlbsMetag_R1_decontam.fastq.gz +mv sample1_decontam.fastq.2.gz sample1_GLlbsMetag_R2_decontam.fastq.gz # remove intermediate file rm -rf sample1.sam @@ -554,11 +554,11 @@ rm -rf sample1.sam **Input Data** - /path/to/contaminant_assembly/blank-scaffolds.fasta (contaminant assembly, output from [Step 4a](#4a-assemble-contaminants)) -- sample1_R[12]_filtered_GLlbsMetag.fastq.gz (filtered and trimmed reads, output from [Step 3b](#3b-trim-polyg)) +- sample1_GLlbsMetag_R[12]_filtered.fastq.gz (filtered and trimmed reads, output from [Step 3b](#3b-trim-polyg)) **Output Data** -- sample1_R[12]_decontam_GLlbsMetag.fastq.gz (decontaminated reads) +- sample1_GLlbsMetag_R[12]_decontam.fastq.gz (decontaminated reads) - sample-mapping-info.txt (bowtie2 mapping log file)
@@ -566,17 +566,17 @@ rm -rf sample1.sam #### 4c. Contaminant Removal QC ```bash -fastqc -o decontam_fastqc_output *decontam_GLlbsMetag.fastq.gz +fastqc -o decontam_fastqc_output *decontam.fastq.gz ``` **Parameter Definitions:** - `-o` – the output directory to store results -- `*decontam_GLlbsMetag.fastq.gz` – the input reads are specified as a positional argument, and can be given all at once with wildcards like this, or as individual arguments with spaces in between them +- `*decontam.fastq.gz` – the input reads are specified as a positional argument, and can be given all at once with wildcards like this, or as individual arguments with spaces in between them **Input data:** -- *decontam_GLlbsMetag.fastq.gz (decontaminated reads) +- *decontam.fastq.gz (decontaminated reads) **Output data:** @@ -673,15 +673,15 @@ kraken2 --db kraken2_${hostname}_db \ --use-names \ --output sample-kraken2-output.txt \ --report sample-kraken2-report.tsv \ - --unclassified-out sample1_R#.fastq \ - sample1_R1_decontam.fastq.gz sample1_R2_decontam.fastq.gz + --unclassified-out sample1_GLlbsMetag_R#.fastq \ + sample1_GLlbsMetag_R1_decontam.fastq.gz sample1_GLlbsMetag_R2_decontam.fastq.gz # rename and gzip output files -mv sample1_R_1.fastq sample1_R1_HostRm_GLlbsMetag.fastq && \ -gzip sample1_R1_HostRm_GLlbsMetag.fastq +mv sample1_R_1.fastq sample1_GLlbsMetag_R1_HostRm.fastq && \ +gzip sample1_GLlbsMetag_R1_HostRm.fastq -mv sample1_R_2.fastq sample1_R2_HostRm_GLlbsMetag.fastq && \ -gzip sample1_R2_HostRm_GLlbsMetag.fastq +mv sample1_R_2.fastq sample1_GLlbsMetag_R2_HostRm.fastq && \ +gzip sample1_GLlbsMetag_R2_HostRm.fastq ``` **Parameter Definitions:** @@ -693,18 +693,18 @@ gzip sample1_R2_HostRm_GLlbsMetag.fastq - `--output` - Specifies the name of the kraken2 read-based output file (one line per read). - `--report` - Specifies the name of the kraken2 report output file (one line per taxa, with number of reads assigned to it). - `--unclassified-out` - Specifies the name of the output file containing reads that were not classified, i.e non-host reads. -- `sample1_R1_decontam_GLlbsMetag.fastq.gz sample1_R2_decontam_GLlbsMetag.fastq.gz` - Positional argument specifying the input read files. +- `sample1_GLlbsMetag_R1_decontam.fastq.gz sample1_GLlbsMetag_R2_decontam.fastq.gz` - Positional argument specifying the input read files. **Input Data:** - kraken2_host_db/ (kraken2 host database directory, output from [Step 5a](#5a-build-kraken2-host-database)) -- sample_*decontam_GLlbsMetag.fastq.gz (filtered and trimmed sample reads with contaminants removed, output from [Step 4b](#4b-build-contaminant-index-and-map-reads)) +- sample_GLlbsMetag_R[12]_decontam.fastq.gz (filtered and trimmed sample reads with contaminants removed, output from [Step 4b](#4b-build-contaminant-index-and-map-reads)) **Output Data:** - sample-kraken2-output.txt (kraken2 read-based output file (one line per read)) - sample-kraken2-report.tsv (kraken2 report output file (one line per taxa, with number of reads assigned to it)) -- **sample_HostRm_GLlbsMetag.fastq.gz** (filtered and trimmed sample reads with contaminants, human, and host reads removed, gzipped fasta file) +- **sample_GLlbsMetag_HostRm.fastq.gz** (filtered and trimmed sample reads with contaminants, human, and host reads removed, gzipped fasta file) #### 5c. Compile Host Read Removal QC @@ -1620,8 +1620,8 @@ kaiju -f kaiju-db/nr_euk/kaiju_db_nr_euk.fmi \ -t kaiju-db/nodes.dmp \ -z NumberOfThreads \ -E 1e-05 \ - -i /path/to/sample1_R1_decontam_GLlbsMetag.fastq.gz \ - -j /path/to/sample1_R2_decontam_GLlbsMetag.fastq.gz \ + -i /path/to/sample1_GLlbsMetag_R1_decontam.fastq.gz \ + -j /path/to/sample1_GLlbsMetag_R2_decontam.fastq.gz \ -o sample_kaiju.out ``` @@ -2034,7 +2034,7 @@ kraken2 --db kraken2-db/ \ --use-names \ --output sample-kraken2-output.txt \ --report sample-kraken2-report.tsv \ - /path/to/sample1_R1_decontam_GLlbsMetag.fastq.gz /path/to/sample1_R2_decontam_GLlbsMetag.fastq.gz + /path/to/sample1_GLlbsMetag_R1_decontam.fastq.gz /path/to/sample1_GLlbsMetag_R2_decontam.fastq.gz ``` **Parameter Definitions:** @@ -2045,8 +2045,8 @@ kraken2 --db kraken2-db/ \ - `--use-names` - Specifies to add taxa names in addition to taxids. - `--output` - Specifies the name of the kraken2 read-based output file. - `--report` - Specifies the name of the kraken2 report output file. -- `sample1_R1_decontam_GLlbsMetag.fastq.gz` - Positional argument specifying the forward read input file. -- `sample1_R2_decontam_GLlbsMetag.fastq.gz` - Positional argument specifying the reverse read input file. +- `sample1_GLlbsMetag_R1_decontam.fastq.gz` - Positional argument specifying the forward read input file. +- `sample1_GLlbsMetag_R2_decontam.fastq.gz` - Positional argument specifying the reverse read input file. **Input Data:** @@ -2401,7 +2401,7 @@ metaphlan --install ```bash # forward and reverse reads need to be provided combined if paired-end (if not paired-end, single-end reads are provided to the --input argument next) -cat sample1_R1_decontam_GLlbsMetag.fastq.gz sample1_R2_decontam_GLlbsMetag.fastq.gz > sample1-combined.fastq.gz +cat sample1_GLlbsMetag_R1_decontam.fastq.gz sample1_GLlbsMetag_R2_decontam.fastq.gz > sample1-combined.fastq.gz humann --input sample1-combined.fastq.gz \ --output sample1-humann3-out-dir \ @@ -2431,7 +2431,7 @@ mv sample1-humann3-out-dir/sample1_humann_temp/sample1_metaphlan_bugs_list.tsv \ **Input Data:** - `/path/to/humann3-db/` (HUMAnN databases installed in [Step 9a](#9a-download-and-install-humann-databases)) -- *_R[12]_decontam_GLlbsMetag.fastq.gz or *_R[12]_HostRm_GLlbsMetag.fastq.gz (filtered and trimmed sample reads with both +- *_GLlbsMetag_R[12]_decontam.fastq.gz or *_GLlbsMetag_R[12]_HostRm.fastq.gz (filtered and trimmed sample reads with both contaminants and human reads (and, optionally, host reads) removed, gzipped fasta file, output from [Step 4b](#4b-build-contaminant-index-and-map-reads) or [Step 5b](#5b-remove-host-reads)) @@ -2858,7 +2858,7 @@ htmlwidgets::saveWidget(ggplotly(p), glue("Metaphlan_decontam_species_barplot_GL ### 10. Sample Assembly ``` -megahit -1 sample1_R1_decontam_GLlbsMetag.fastq.gz -2 sample1_R2_decontam_GLlbsMetag.fastq.gz \ +megahit -1 sample1_GLlbsMetag_R1_decontam.fastq.gz -2 sample1_GLlbsMetag_R2_decontam.fastq.gz \ -o sample1-assembly -t NumberOfThreads --min-contig-length 500 > sample1-assembly.log 2>&1 ``` @@ -2873,7 +2873,7 @@ megahit -1 sample1_R1_decontam_GLlbsMetag.fastq.gz -2 sample1_R2_decontam_GLlbsM **Input data:** -- *_R[12]_decontam_GLlbsMetag.fastq.gz or *_R[12]_HostRm_GLlbsMetag.fastq.gz (filtered and trimmed sample reads with both +- *_GLlbsMetag_R[12]_decontam.fastq.gz or *_GLlbsMetag_R[12]_HostRm.fastq.gz (filtered and trimmed sample reads with both contaminants and human reads (and, optionally, host reads) removed, gzipped fasta file, output from [Step 4b](#4b-build-contaminant-index-and-map-reads) or [Step 5b](#5b-remove-host-reads)) @@ -3260,8 +3260,8 @@ bowtie2-build sample1_assembly_GLlbsMetag.fasta sample1-index ```bash bowtie2 --mm --quiet --threads ${task.cpus} \ -x sample1-index \ - -1 sample1_R1_decontam_GLlbsMetag.fastq.gz \ - -2 sample1_R2_decontam_GLlbsMetag.fastq.gz \ + -1 sample1_GLlbsMetag_R1_decontam.fastq.gz \ + -2 sample1_GLlbsMetag_R2_decontam.fastq.gz \ --no-unal > sample1.sam 2> sample1-mapping-info_GLlbsMetag.txt ``` @@ -3280,7 +3280,7 @@ bowtie2 --mm --quiet --threads ${task.cpus} \ **Input Data** - sample1-index (bowti2 index files, output from [Step 15a](#15a-build-reference-index)) -- *_R[12]_decontam_GLlbsMetag.fastq.gz or *_R[12]_HostRm_GLlbsMetag.fastq.gz (filtered and trimmed sample reads with both +- *_GLlbsMetag_R[12]_decontam.fastq.gz or *_GLlbsMetag_R[12]_HostRm.fastq.gz (filtered and trimmed sample reads with both contaminants and human reads (and, optionally, host reads) removed, gzipped fasta file, output from [Step 4b](#4b-build-contaminant-index-and-map-reads) or [Step 5b](#5b-remove-host-reads)) diff --git a/Metagenomics/Low_Biomass/Nanopore/GL-DPPD-7116.md b/Metagenomics/Low_Biomass/Nanopore/GL-DPPD-7116.md index 2a2ae9e0..a8bb2178 100644 --- a/Metagenomics/Low_Biomass/Nanopore/GL-DPPD-7116.md +++ b/Metagenomics/Low_Biomass/Nanopore/GL-DPPD-7116.md @@ -376,7 +376,7 @@ filtlong --min_length 200 --min_mean_q 8 /path/to/raw_data/sample.fastq.gz > sam ```bash NanoPlot --only-report \ - --prefix sample_filtered_ \ + --prefix sample_filtered_GLlblMetag_ \ --outdir /path/to/filtered_nanoplot_output \ --threads NumberOfThreads \ --fastq \ @@ -398,7 +398,7 @@ NanoPlot --only-report \ **Output Data:** -- **/path/to/filtered_nanoplot_output/sample_filtered_NanoPlot-report_GLlblMetag.html** (NanoPlot html summary) +- **/path/to/filtered_nanoplot_output/sample_filtered_GLlblMetag_NanoPlot-report.html** (NanoPlot html summary) - /path/to/filtered_nanoplot_output/sample_filtered_NanoPlot_\_\.log (NanoPlot log file) - /path/to/filtered_nanoplot_output/sample_filtered_NanoStats.txt (text file containing basic statistics) @@ -422,7 +422,7 @@ multiqc --zip-data-dir \ **Input Data:** -- /path/to/filtered_nanoplot_output/*filtered_NanoStats.txt (NanoPlot output data, from [Step 4b](#4b-filtered-data-qc)) +- /path/to/filtered_nanoplot_output/*filtered_GLlblMetag_NanoStats.txt (NanoPlot output data, from [Step 4b](#4b-filtered-data-qc)) **Output Data:** @@ -465,7 +465,7 @@ porechop --input sample_filtered.fastq \ ```bash NanoPlot --only-report \ - --prefix sample_trimmed_ \ + --prefix sample_trimmed_GLlblMetag_ \ --outdir /path/to/trimmed_nanoplot_output \ --threads NumberOfThreads \ --fastq \ @@ -487,7 +487,7 @@ NanoPlot --only-report \ **Output Data:** -- **/path/to/trimmed_nanoplot_output/sample_trimmed_NanoPlot-report_GLlblMetag.html** (NanoPlot html summary) +- **/path/to/trimmed_nanoplot_output/sample_trimmed_GLlblMetag_NanoPlot-report.html** (NanoPlot html summary) - /path/to/trimmed_nanoplot_output/sample_trimmed_NanoPlot_\_\.log (NanoPlot log file) - /path/to/trimmed_nanoplot_output/sample_trimmed_NanoStats.txt (text file containing basic statistics) @@ -511,7 +511,7 @@ multiqc --zip-data-dir \ **Input Data:** -- /path/to/trimmed_nanoplot_output/*trimmed_NanoStats.txt (NanoPlot output data, output from [Step 5b](#5b-trimmed-data-qc)) +- /path/to/trimmed_nanoplot_output/*trimmed_GLlblMetag_NanoStats.txt (NanoPlot output data, output from [Step 5b](#5b-trimmed-data-qc)) **Output Data:** @@ -572,11 +572,11 @@ kraken2 --db kraken2_human_db \ --use-names \ --output sample-kraken2-output.txt \ --report sample-kraken2-report.tsv \ - --unclassified-out sample_HRrm_GLlblMetag.fastq \ + --unclassified-out sample_GLlblMetag_HRrm.fastq \ sample_trimmed_fastq.gz # gzip fastq output file -gzip sample_HRrm_GLlblMetag.fastq +gzip sample_GLlblMetag_HRrm.fastq ``` **Parameter Definitions:** @@ -599,7 +599,7 @@ gzip sample_HRrm_GLlblMetag.fastq - sample-kraken2-output.txt (kraken2 read-based output file (one line per read)) - sample-kraken2-report.tsv (kraken2 report output file (one line per taxa, with number of reads assigned to it)) -- **sample_HRrm_GLlblMetag.fastq.gz** (filtered and trimmed sample reads with human reads removed, gzipped fastq file) +- **sample_GLlblMetag_HRrm.fastq.gz** (filtered and trimmed sample reads with human reads removed, gzipped fastq file) #### 6c. Compile Human Read Removal QC @@ -643,7 +643,7 @@ multiqc --zip-data-dir \ flye --meta \ --threads NumberOfThreads \ --out-dir /path/to/contaminant_assembly \ - --nano-raw /path/to/blank_samples/\*_HRrm_GLlblMetag.fastq.gz + --nano-raw /path/to/blank_samples/\*_GLlblMetag_HRrm.fastq.gz # rename output mv assembly.fasta blank-assembly.fasta @@ -659,7 +659,7 @@ mv flye.log blank-flye.log **Input Data** -- *_HRrm_GLlblMetag.fastq.gz (one or more filtered, trimmed, and HRrm reads from blank (negative control) samples, output from [Step 6b](#6b-remove-human-reads)) +- *_GLlblMetag_HRrm.fastq.gz (one or more filtered, trimmed, and HRrm reads from blank (negative control) samples, output from [Step 6b](#6b-remove-human-reads)) **Output Data** @@ -683,7 +683,7 @@ minimap2 -t NumberOfThreads \ -a \ -x splice \ blanks.mmi \ - sample_HRrm_GLlblMetag.fastq.gz > sample.sam 2> sample-mapping-info.txt + sample_GLlblMetag_HRrm.fastq.gz > sample.sam 2> sample-mapping-info.txt ``` **Parameter Definitions:** @@ -694,13 +694,13 @@ minimap2 -t NumberOfThreads \ - `-d` - Specifies the output file for the index (specific to the build contaminant index command). - `/path/to/contaminant_assembly/blank-assembly.fasta` - Specifies the input file in fasta format, provided as a positional argument (specific to the build contaminant index command). - `blanks.mmi` - Specifies the index file in mmi format, provided as a positional argument (specific to the map reads command). -- `/path/to/trimmed_reads/sample_HRrm_GLlblMetag.fastq.gz` - Specifies the input file in fastq format, provided as a positional argument (specific to the map reads command). +- `/path/to/trimmed_reads/sample_GLlblMetag_HRrm.fastq.gz` - Specifies the input file in fastq format, provided as a positional argument (specific to the map reads command). - `> sample.sam` - Redirects the output of the map reads command to a separate SAM file (specific to the map reads command). **Input Data** - /path/to/contaminant_assembly/blank-assembly.fasta (contaminant assembly, output from [Step 7a](#7-assemble-contaminants)) -- sample_HRrm_GLlblMetag.fastq.gz (filtered, trimmed, and HRrm reads, output from [Step 6b](#6b-remove-human-reads)) +- sample_GLlblMetag_HRrm.fastq.gz (filtered, trimmed, and HRrm reads, output from [Step 6b](#6b-remove-human-reads)) **Output Data** @@ -778,7 +778,7 @@ samtools idxstats sample_sorted.bam > sample_idxstats.txt 2> sample_idxstats.lo #### 7e. Generate Decontaminated Read Files ```bash # Retain reads that do not map to contaminants -samtools fastq -t -f 4 -o sample_decontam_GLlblMetag.fastq.gz -0 sample_decontam_GLlblMetag.fastq.gz sample_sorted.bam +samtools fastq -t -f 4 -o sample_GLlblMetag_decontam.fastq.gz -0 sample_GLlblMetag_decontam.fastq.gz sample_sorted.bam ``` **Parameter Definitions:** @@ -786,8 +786,8 @@ samtools fastq -t -f 4 -o sample_decontam_GLlblMetag.fastq.gz -0 sample_decontam - `fastq` - Positional argument specifying the program for generating fastq files from a SAM/BAM file. - `-t` - Copy RG, BC, and QT tags to the FASTQ header line. - `-f 4` - Only retain unmapped reads that have been marked with the SAM "segment unmapped" FLAG (4). -- `-o sample_decontam_GLlblMetag.fastq.gz` - Send reads flagged as either read1 or read2 to the named file (.gz ending ensures compressed output) -- `-0 sample_decontam_GLlblMetag.fastq.gz` - Send reads flagged as both read1 and read2 or neither to the same named file +- `-o sample_GLlblMetag_decontam.fastq.gz` - Send reads flagged as either read1 or read2 to the named file (.gz ending ensures compressed output) +- `-0 sample_GLlblMetag_decontam.fastq.gz` - Send reads flagged as both read1 and read2 or neither to the same named file - `sample_sorted.bam` - Positional argument specifying the input BAM file. **Input Data:** @@ -796,7 +796,7 @@ samtools fastq -t -f 4 -o sample_decontam_GLlblMetag.fastq.gz -0 sample_decontam **Output Data:** -- **sample_decontam_GLlblMetag.fastq.gz** (filtered, trimmed, and HRrm sample reads with contaminants removed in fastq format) +- **sample_GLlblMetag_decontam.fastq.gz** (filtered, trimmed, and HRrm sample reads with contaminants removed in fastq format) #### 7f. Contaminant Removal QC @@ -806,7 +806,7 @@ NanoPlot --only-report \ --outdir /path/to/decontam_nanoplot_output \ --threads NumberOfThreads \ --fastq \ - sample_decontam_GLlblMetag.fastq.gz + sample_GLlblMetag_decontam.fastq.gz ``` **Parameter Definitions:** @@ -816,11 +816,11 @@ NanoPlot --only-report \ - `--outdir` – Specifies the output directory to store results. - `--threads` - Number of parallel processing threads to use. - `--fastq` - Specifies that the input data is in fastq format. -- `sample_decontam_GLlblMetag.fastq.gz` – The input reads, specified as a positional argument. +- `sample_GLlblMetag_decontam.fastq.gz` – The input reads, specified as a positional argument. **Input Data:** -- sample_decontam_GLlblMetag.fastq.gz (filtered, trimmed, and HRrm sample reads with all contaminants removed, output from [Step 7e](#7e-generate-decontaminated-read-files)) +- sample_GLlblMetag_decontam.fastq.gz (filtered, trimmed, and HRrm sample reads with all contaminants removed, output from [Step 7e](#7e-generate-decontaminated-read-files)) **Output Data:** @@ -915,11 +915,11 @@ kraken2 --db kraken2_host_db \ --use-names \ --output sample-kraken2-output.txt \ --report sample-kraken2-report.tsv \ - --unclassified-out sample_HostRm_GLlblMetag.fastq \ - sample_decontam_GLlblMetag.fastq.gz + --unclassified-out sample_GLlblMetag_HostRm.fastq \ + sample_GLlblMetag_decontam.fastq.gz # gzip fastq output file -gzip sample_HostRm_GLlblMetag.fastq +gzip sample_GLlblMetag_HostRm.fastq ``` **Parameter Definitions:** @@ -931,18 +931,18 @@ gzip sample_HostRm_GLlblMetag.fastq - `--output` - Specifies the name of the kraken2 read-based output file (one line per read). - `--report` - Specifies the name of the kraken2 report output file (one line per taxa, with number of reads assigned to it). - `--unclassified-out` - Specifies the name of the output file containing reads that were not classified, i.e non-human reads. -- `sample_decontam_GLlblMetag.fastq.gz` - Positional argument specifying the input read file. +- `sample_GLlblMetag_decontam.fastq.gz` - Positional argument specifying the input read file. **Input Data:** - kraken2_host_db/ (kraken2 host database directory, output from [Step 8a](#8a-build-kraken2-database)) -- sample_decontam_GLlblMetag.fastq.gz (filtered, trimmed, HRrm and contaminant-removed sample reads, output from [Step 7e](#7e-generate-decontaminated-read-files)) +- sample_GLlblMetag_decontam.fastq.gz (filtered, trimmed, HRrm and contaminant-removed sample reads, output from [Step 7e](#7e-generate-decontaminated-read-files)) **Output Data:** - sample-kraken2-output.txt (kraken2 read-based output file (one line per read)) - sample-kraken2-report.tsv (kraken2 report output file (one line per taxa, with number of reads assigned to it)) -- **sample_HostRm_GLlblMetag.fastq.gz** (filtered, trimmed, HRrm and contaminant-removed sample reads with all host reads removed, gzipped fastq file) +- **sample_GLlblMetag_HostRm.fastq.gz** (filtered, trimmed, HRrm and contaminant-removed sample reads with all host reads removed, gzipped fastq file) #### 8c. Compile Host Read Removal QC @@ -1861,7 +1861,7 @@ kaiju -f kaiju-db/nr_euk/kaiju_db_nr_euk.fmi \ -t kaiju-db/nodes.dmp \ -z NumberOfThreads \ -E 1e-05 \ - -i /path/to/sample_decontam_GLlblMetag.fastq.gz \ + -i /path/to/sample_GLlblMetag_decontam.fastq.gz \ -o sample_kaiju.out ``` @@ -1878,7 +1878,7 @@ kaiju -f kaiju-db/nr_euk/kaiju_db_nr_euk.fmi \ - kaiju-db/nr_euk/kaiju_db_nr_euk.fmi (FM-index file containing the main Kaiju database index, output from [Step 10a](#10a-build-kaiju-database)) - kaiju-db/nodes.dmp (kaiju taxonomy hierarchy nodes file, output from [Step 10a](#10a-build-kaiju-database)) -- sample_decontam_GLlblMetag.fastq.gz or sample_HostRm_GLlblMetag.fastq.gz (filtered and trimmed sample reads with both +- sample_GLlblMetag_decontam.fastq.gz or sample_GLlblMetag_HostRm.fastq.gz (filtered and trimmed sample reads with both contaminants and human reads (and optionally host reads) removed, gzipped fastq file, output from [Step 7e](#7e-generate-decontaminated-read-files) or [Step 8b](#8b-remove-host-reads)) @@ -2274,7 +2274,7 @@ kraken2 --db kraken2-db/ \ --use-names \ --output sample-kraken2-output.txt \ --report sample-kraken2-report.tsv \ - /path/to/sample_decontam_GLlblMetag.fastq.gz + /path/to/sample_GLlblMetag_decontam.fastq.gz ``` **Parameter Definitions:** @@ -2285,12 +2285,12 @@ kraken2 --db kraken2-db/ \ - `--use-names` - Specifies to add taxa names in addition to taxids. - `--output` - Specifies the name of the kraken2 read-based output file. - `--report` - Specifies the name of the kraken2 report output file. -- `sample_decontam_GLlblMetag.fastq.gz` - Positional argument specifying the input file. +- `sample_GLlblMetag_decontam.fastq.gz` - Positional argument specifying the input file. **Input Data:** - kraken2-db/ (a directory containing kraken2 database files, output from [Step 11a](#11a-download-kraken2-database)) -- sample_decontam_GLlblMetag.fastq.gz or sample_HostRm_GLlblMetag.fastq.gz (filtered and trimmed sample reads with both +- sample_GLlblMetag_decontam.fastq.gz or sample_GLlblMetag_HostRm.fastq.gz (filtered and trimmed sample reads with both contaminants and human reads (and, optionally, host reads) removed, gzipped fasta file, output from [Step 7e](#7e-generate-decontaminated-read-files) or [Step 8b](#8b-remove-host-reads)) @@ -2613,7 +2613,7 @@ flye --meta \ --threads NumberOfThreads \ --out-dir sample/ \ --nano-hq \ - /path/to/sample_decontam_GLlblMetag.fastq.gz + /path/to/sample_GLlblMetag_decontam.fastq.gz # rename output files mv sample/assembly.fasta sample_assembly.fasta @@ -2626,11 +2626,11 @@ mv sample/flye.log sample_assembly.log - `--threads` - Number of parallel processing threads to use. - `--out-dir` - Specifies the name of the output directory. - `--nano-hq` - Specifies that input is from Oxford Nanopore high-quality reads (Guppy5+ SUP or Q20, <5% error). This skips a genome polishing step since the assembly will be polished with medaka in the next step. -- `/path/to/sample_decontam_GLlblMetag.fastq.gz` - Path to the input file, specified as a positional argument. +- `/path/to/sample_GLlblMetag_decontam.fastq.gz` - Path to the input file, specified as a positional argument. **Input Data** -- sample_decontam_GLlblMetag.fastq.gz or sample_HostRm_GLlblMetag.fastq.gz (filtered and trimmed sample reads with both +- sample_GLlblMetag_decontam.fastq.gz or sample_GLlblMetag_HostRm.fastq.gz (filtered and trimmed sample reads with both contaminants and human reads (and optionally host reads) removed, gzipped fasta file, output from [Step 7e](#7e-generate-decontaminated-read-files) or [Step 8b](#8b-remove-host-reads)) @@ -2647,7 +2647,7 @@ mv sample/flye.log sample_assembly.log ```bash medaka_consensus -t NumberOfThreads \ - -i /path/to/sample_decontam_GLlblMetag.fastq.gz \ + -i /path/to/sample_GLlblMetag_decontam.fastq.gz \ -d /path/to/assemblies/sample_assembly.fasta \ -o sample/ @@ -2663,7 +2663,7 @@ mv sample/consensus.fasta sample_polished.fasta **Input Data:** -- sample_decontam_GLlblMetag.fastq.gz or sample_HostRm_GLlblMetag.fastq.gz (filtered and trimmed sample reads with both +- sample_GLlblMetag_decontam.fastq.gz or sample_GLlblMetag_HostRm.fastq.gz (filtered and trimmed sample reads with both contaminants and human reads (and optionally host reads) removed, gzipped fasta file, output from [Step 7e](#7e-generate-decontaminated-read-files) or [Step 8b](#8b-remove-host-reads)) - /path/to/assemblies/sample_assembly.fasta (sample assembly, output from [Step 12](#12-sample-assembly)) @@ -3033,7 +3033,7 @@ minimap2 -a \ -x map-ont \ -t NumberOfThreads \ sample_assembly.fasta \ - sample_decontam_GLlblMetag.fastq.gz \ + sample_GLlblMetag_decontam.fastq.gz \ > sample.sam 2> sample-mapping-info.txt ``` @@ -3043,14 +3043,14 @@ minimap2 -a \ - `-x map-ont` - Specifies preset for mapping Nanopore reads to a reference. - `-t` - Number of parallel processing threads to use - `sample_assembly.fasta` – Assembly fasta file, provided as a positional argument. -- `sample_decontam_GLlblMetag.fastq.gz` - Input sequence data file, provided as a positional argument. +- `sample_GLlblMetag_decontam.fastq.gz` - Input sequence data file, provided as a positional argument. - `> sample.sam` - Redirects the output to a separate file. - `2> sample-mapping-info.txt` - Redirects the standar error to a separate file. **Input Data** - sample-assembly.fasta (contig-renamed assembly file, output from [Step 14a](#14a-rename-contig-headers)) -- sample_decontam_GLlblMetag.fastq.gz or sample_HostRm_GLlblMetag.fastq.gz (filtered and trimmed sample reads with both +- sample_GLlblMetag_decontam.fastq.gz or sample_GLlblMetag_HostRm.fastq.gz (filtered and trimmed sample reads with both contaminants and human reads (and optionally host reads) removed, gzipped fasta file, output from [Step 7e](#7e-generate-decontaminated-read-files) or [Step 8b](#8b-remove-host-reads)) @@ -3065,10 +3065,10 @@ minimap2 -a \ ```bash # Sort Sam, convert to bam and create index samtools sort --threads NumberOfThreads \ - -o sample_sorted_GLlblMetag.bam \ + -o sample_GLlblMetag_sorted.bam \ sample.sam > sample_sort.log 2>&1 -samtools index sample_sorted_GLlblMetag.bam sample_sorted_GLlblMetag.bam.bai +samtools index sample_GLlblMetag_sorted.bam sample_GLlblMetag_sorted.bam.bai ``` **Parameter Definitions:** @@ -3089,8 +3089,8 @@ samtools index sample_sorted_GLlblMetag.bam sample_sorted_GLlblMetag.bam.bai **Output Data:** -- **sample_sorted_GLlblMetag.bam** (sorted mapping to sample assembly, in BAM format) -- **sample_sorted_GLlblMetag.bam.bai** (index of sorted mapping to sample assembly) +- **sample_GLlblMetag_sorted.bam** (sorted mapping to sample assembly, in BAM format) +- **sample_GLlblMetag_sorted.bam.bai** (index of sorted mapping to sample assembly)
@@ -3265,7 +3265,7 @@ mv "Combined-gene-level-taxonomy-coverages.tsv Combined-gene-level-taxonomy-cove **Parameter Definitions:** -- `*-gene-coverage-annotation-and-tax_GLlbsMetag.tsv` - Positional arguments specifying the input tsv files, can be provided as a space-delimited list of files, or with wildcards like above. +- `*-gene-coverage-annotation-and-tax_GLlblMetag.tsv` - Positional arguments specifying the input tsv files, can be provided as a space-delimited list of files, or with wildcards like above. - `-o` – Specifies the output file prefix.