Command-Line Interfaces#

yasim#

yasim art#

art.py -- LLRG adapter for ART, a NGS DNA-Seq simulator

SYNOPSIS: python -m yasim art [-h] -F [FASTAS] [-j [JOBS]] [--simulator_name [SIMULATOR_NAME]] [-e [LLRG_EXECUTABLE_PATH]] -d [DEPTH] -o [OUT] [--not_perform_assemble]
                              [--sequencer_name [{GA1,GA2,HS10,HS20,HS25,HSXn,HSXt,MinS,MSv1,MSv3,NS50}]] [--read_length [READ_LENGTH]]
                              [--pair_end_fragment_length_mean [PAIR_END_FRAGMENT_LENGTH_MEAN]] [--pair_end_fragment_length_std [PAIR_END_FRAGMENT_LENGTH_STD]] [--is_pair_end]
                              [--preserve_intermediate_files]

OPTIONS:
  -h, --help
                        [OPTIONAL]
                        show this help message and exit
  -F [FASTAS], --fastas [FASTAS]
                        [REQUIRED] Type: str; No defaults
                        Directory of transcribed cDNA sequences in FASTA format from `transcribe` step
  -j [JOBS], --jobs [JOBS]
                        [OPTIONAL] Type: int; Default: 20
                        Number of LLRGs to be executed in parallel
  --simulator_name [SIMULATOR_NAME]
                        [OPTIONAL] Type: str; Default: None
                        Custom simulator name. Used in FASTQ tags
                        This step is done in assemble, so if --not_perform_assemble is set, this option would be useless.
  -e [LLRG_EXECUTABLE_PATH], --llrg_executable_path [LLRG_EXECUTABLE_PATH]
                        [OPTIONAL] Type: str; Default: art_illumina
                        Executable name or absolute path of art
  -d [DEPTH], --depth [DEPTH]
                        [REQUIRED] Type: str; No defaults
                        Path to input Isoform-Level Depth TSV generated by `generate_depth_v2` or `generate_isoform_depth` step
  -o [OUT], --out [OUT]
                        [REQUIRED] Type: str; No defaults
                        Output transcript prefix. The output file would be {out}.fq for single-end and {out}_1.fq, {out}_2.fq for pair end.
                        If --not_perform_assemble is set, would NOT generate FASTQ files but a {out}.d unassembled directory
  --not_perform_assemble
                        [OPTIONAL] Default: False
                        Do NOT assemble the output of each isoforms into one file.
  --sequencer_name [{GA1,GA2,HS10,HS20,HS25,HSXn,HSXt,MinS,MSv1,MSv3,NS50}]
                        [OPTIONAL] Type: str; Default: HS25
                        Name of Illumina Sequencer to Simulate: GA1 -- GenomeAnalyzer I, GA2 -- GenomeAnalyzer II, HS10 -- HiSeq 1000, HS20 -- HiSeq 2000, HS25 -- HiSeq 2500, HSXn -- HiSeqX PCR free, HSXt -- HiSeqX TruSeq, MinS -- MiniSeq TruSeq, MSv1 -- MiSeq v1, MSv3 -- MSv3 - MiSeq v3, NS50 -- NextSeq500 v2
                        CHOICES:
                            GA1
                            GA2
                            HS10
                            HS20
                            HS25
                            HSXn
                            HSXt
                            MinS
                            MSv1
                            MSv3
                            NS50
  --read_length [READ_LENGTH]
                        [OPTIONAL] Type: int; Default: 0
                        Read length. Sequencer -- Read Length Table: GenomeAnalyzer I -- [36, 44], GenomeAnalyzer II -- [50, 75], HiSeq 1000 -- [100], HiSeq 2000 -- [100], HiSeq 2500 -- [125, 150], HiSeqX PCR free -- [150], HiSeqX TruSeq -- [150], MiniSeq TruSeq -- [50], MiSeq v1 -- [250], MSv3 - MiSeq v3 -- [250], NextSeq500 v2 -- [75]
  --pair_end_fragment_length_mean [PAIR_END_FRAGMENT_LENGTH_MEAN]
                        [OPTIONAL] Type: int; Default: 0
                        [PE Only] The mean size of DNA/RNA fragments for paired-end simulations
  --pair_end_fragment_length_std [PAIR_END_FRAGMENT_LENGTH_STD]
                        [OPTIONAL] Type: int; Default: 0
                        [PE Only] The standard deviation of DNA/RNA fragment size for paired-end simulations.
  --is_pair_end
                        [OPTIONAL] Default: False
                        Whether to use Pair End (PE) Simulation
  --preserve_intermediate_files
                        [OPTIONAL] Default: False
                        Do not remove intermediate files.

yasim assemble#

assemble.py -- Assemble unassembled outputs.

SYNOPSIS: python -m yasim assemble [-h] -F [FASTAS] --simulator_name [SIMULATOR_NAME] -d [DEPTH] -o [OUT] -i [INPUT_FASTQ_DIR] [--is_pair_end]
                                   [--truncate_ratio_3p [TRUNCATE_RATIO_3P]] [--truncate_ratio_5p [TRUNCATE_RATIO_5P]]

OPTIONS:
  -h, --help
                        [OPTIONAL]
                        show this help message and exit
  -F [FASTAS], --fastas [FASTAS]
                        [REQUIRED] Type: str; No defaults
                        Directory of transcribed cDNA sequences in FASTA format from `transcribe` step
  --simulator_name [SIMULATOR_NAME]
                        [REQUIRED] Type: str; No defaults
                        Custom simulator name. Used in FASTQ tags.
  -d [DEPTH], --depth [DEPTH]
                        [REQUIRED] Type: str; No defaults
                        Path to input Isoform-Level Depth TSV generated by `generate_depth_v2` or `generate_isoform_depth` step
  -o [OUT], --out [OUT]
                        [REQUIRED] Type: str; No defaults
                        Output transcript prefix. Should be prefix path to {out}.d directory.
  -i [INPUT_FASTQ_DIR], --input_fastq_dir [INPUT_FASTQ_DIR]
                        [REQUIRED] Type: str; No defaults
                        Input transcript prefix. Should be prefix path to {out}.d directory.
  --is_pair_end
                        [OPTIONAL] Default: False
                        Whether to use Pair End (PE) Simulation
  --truncate_ratio_3p [TRUNCATE_RATIO_3P]
                        [OPTIONAL] Type: float; Default: 0.0
                        [Single End TGS Only] Ratio of 3 prime truncation, range from 0 (no truncation) to 1 (truncate all).
                        This step is done in assemble, so if --not_perform_assemble is set, this option would be useless.
  --truncate_ratio_5p [TRUNCATE_RATIO_5P]
                        [OPTIONAL] Type: float; Default: 0.0
                        [Single End TGS Only] Ratio of 5 prime truncation, range from 0 (no truncation) to 1 (truncate all).
                        This step is done in assemble, so if --not_perform_assemble is set, this option would be useless.

yasim badread#

badread.py -- LLRG adapter for BadRead, a TGS DNA-Seq simulator

SYNOPSIS: python -m yasim badread [-h] -F [FASTAS] [-j [JOBS]] [--simulator_name [SIMULATOR_NAME]] [-e [LLRG_EXECUTABLE_PATH]] -d [DEPTH] -o [OUT] [--not_perform_assemble]
                                  [--truncate_ratio_3p [TRUNCATE_RATIO_3P]] [--truncate_ratio_5p [TRUNCATE_RATIO_5P]] -m [{nanopore2018,nanopore2020,pacbio2016,verybad,verynice}]

OPTIONS:
  -h, --help
                        [OPTIONAL]
                        show this help message and exit
  -F [FASTAS], --fastas [FASTAS]
                        [REQUIRED] Type: str; No defaults
                        Directory of transcribed cDNA sequences in FASTA format from `transcribe` step
  -j [JOBS], --jobs [JOBS]
                        [OPTIONAL] Type: int; Default: 20
                        Number of LLRGs to be executed in parallel
  --simulator_name [SIMULATOR_NAME]
                        [OPTIONAL] Type: str; Default: None
                        Custom simulator name. Used in FASTQ tags
                        This step is done in assemble, so if --not_perform_assemble is set, this option would be useless.
  -e [LLRG_EXECUTABLE_PATH], --llrg_executable_path [LLRG_EXECUTABLE_PATH]
                        [OPTIONAL] Type: str; Default: badread
                        Executable name or absolute path of badread
  -d [DEPTH], --depth [DEPTH]
                        [REQUIRED] Type: str; No defaults
                        Path to input Isoform-Level Depth TSV generated by `generate_depth_v2` or `generate_isoform_depth` step
  -o [OUT], --out [OUT]
                        [REQUIRED] Type: str; No defaults
                        Output transcript prefix. The output file would be {out}.fq for single-end and {out}_1.fq, {out}_2.fq for pair end.
                        If --not_perform_assemble is set, would NOT generate FASTQ files but a {out}.d unassembled directory
  --not_perform_assemble
                        [OPTIONAL] Default: False
                        Do NOT assemble the output of each isoforms into one file.
  --truncate_ratio_3p [TRUNCATE_RATIO_3P]
                        [OPTIONAL] Type: float; Default: 0.0
                        [Single End TGS Only] Ratio of 3 prime truncation, range from 0 (no truncation) to 1 (truncate all).
                        This step is done in assemble, so if --not_perform_assemble is set, this option would be useless.
  --truncate_ratio_5p [TRUNCATE_RATIO_5P]
                        [OPTIONAL] Type: float; Default: 0.0
                        [Single End TGS Only] Ratio of 5 prime truncation, range from 0 (no truncation) to 1 (truncate all).
                        This step is done in assemble, so if --not_perform_assemble is set, this option would be useless.
  -m [{nanopore2018,nanopore2020,pacbio2016,verybad,verynice}], --model_name [{nanopore2018,nanopore2020,pacbio2016,verybad,verynice}]
                        [REQUIRED] Type: str; No defaults
                        Badread model name
                        CHOICES:
                            nanopore2018
                            nanopore2020
                            pacbio2016
                            verybad
                            verynice

yasim dtgs#

dtgs.py -- LLRG adapter for dTGS simulator (Dumb Third-Generation Sequencing Simulator).

SYNOPSIS: python -m yasim dtgs [-h] -F [FASTAS] [-j [JOBS]] [--simulator_name [SIMULATOR_NAME]] -d [DEPTH] -o [OUT] [--not_perform_assemble] [--truncate_ratio_3p [TRUNCATE_RATIO_3P]]
                               [--truncate_ratio_5p [TRUNCATE_RATIO_5P]]

OPTIONS:
  -h, --help
                        [OPTIONAL]
                        show this help message and exit
  -F [FASTAS], --fastas [FASTAS]
                        [REQUIRED] Type: str; No defaults
                        Directory of transcribed cDNA sequences in FASTA format from `transcribe` step
  -j [JOBS], --jobs [JOBS]
                        [OPTIONAL] Type: int; Default: 20
                        Number of LLRGs to be executed in parallel
  --simulator_name [SIMULATOR_NAME]
                        [OPTIONAL] Type: str; Default: None
                        Custom simulator name. Used in FASTQ tags
                        This step is done in assemble, so if --not_perform_assemble is set, this option would be useless.
  -d [DEPTH], --depth [DEPTH]
                        [REQUIRED] Type: str; No defaults
                        Path to input Isoform-Level Depth TSV generated by `generate_depth_v2` or `generate_isoform_depth` step
  -o [OUT], --out [OUT]
                        [REQUIRED] Type: str; No defaults
                        Output transcript prefix. The output file would be {out}.fq for single-end and {out}_1.fq, {out}_2.fq for pair end.
                        If --not_perform_assemble is set, would NOT generate FASTQ files but a {out}.d unassembled directory
  --not_perform_assemble
                        [OPTIONAL] Default: False
                        Do NOT assemble the output of each isoforms into one file.
  --truncate_ratio_3p [TRUNCATE_RATIO_3P]
                        [OPTIONAL] Type: float; Default: 0.0
                        [Single End TGS Only] Ratio of 3 prime truncation, range from 0 (no truncation) to 1 (truncate all).
                        This step is done in assemble, so if --not_perform_assemble is set, this option would be useless.
  --truncate_ratio_5p [TRUNCATE_RATIO_5P]
                        [OPTIONAL] Type: float; Default: 0.0
                        [Single End TGS Only] Ratio of 5 prime truncation, range from 0 (no truncation) to 1 (truncate all).
                        This step is done in assemble, so if --not_perform_assemble is set, this option would be useless.

yasim dwgsim#

dwgsim.py -- LLRG adapter for DWGSIM, a NGS DNA-Seq simulator

SYNOPSIS: python -m yasim dwgsim [-h] -F [FASTAS] [-j [JOBS]] [--simulator_name [SIMULATOR_NAME]] [-e [LLRG_EXECUTABLE_PATH]] -d [DEPTH] -o [OUT] [--not_perform_assemble]
                                 [--preserve_intermediate_files]

OPTIONS:
  -h, --help
                        [OPTIONAL]
                        show this help message and exit
  -F [FASTAS], --fastas [FASTAS]
                        [REQUIRED] Type: str; No defaults
                        Directory of transcribed cDNA sequences in FASTA format from `transcribe` step
  -j [JOBS], --jobs [JOBS]
                        [OPTIONAL] Type: int; Default: 20
                        Number of LLRGs to be executed in parallel
  --simulator_name [SIMULATOR_NAME]
                        [OPTIONAL] Type: str; Default: None
                        Custom simulator name. Used in FASTQ tags
                        This step is done in assemble, so if --not_perform_assemble is set, this option would be useless.
  -e [LLRG_EXECUTABLE_PATH], --llrg_executable_path [LLRG_EXECUTABLE_PATH]
                        [OPTIONAL] Type: str; Default: dwgsim
                        Executable name or absolute path of dwgsim
  -d [DEPTH], --depth [DEPTH]
                        [REQUIRED] Type: str; No defaults
                        Path to input Isoform-Level Depth TSV generated by `generate_depth_v2` or `generate_isoform_depth` step
  -o [OUT], --out [OUT]
                        [REQUIRED] Type: str; No defaults
                        Output transcript prefix. The output file would be {out}.fq for single-end and {out}_1.fq, {out}_2.fq for pair end.
                        If --not_perform_assemble is set, would NOT generate FASTQ files but a {out}.d unassembled directory
  --not_perform_assemble
                        [OPTIONAL] Default: False
                        Do NOT assemble the output of each isoforms into one file.
  --preserve_intermediate_files
                        [OPTIONAL] Default: False
                        Do not remove intermediate files.

yasim generate_as_events#

generate_as_events.py -- Generate Alternative Splicing Events from Reference genome using YASIM V3 API.

SYNOPSIS: python -m yasim generate_as_events [-h] -g [GTF] -f [FASTA] -c [COMPLEXITY] -o [OUT]

OPTIONS:
  -h, --help
                        [OPTIONAL]
                        show this help message and exit
  -g [GTF], --gtf [GTF]
                        [REQUIRED] Type: str; No defaults
                        Path to input genomic annotation in GTF format. Can be compressed.
  -f [FASTA], --fasta [FASTA]
                        [REQUIRED] Type: str; No defaults
                        Path to input reference genome sequence in FASTA format. Can be compressed.
  -c [COMPLEXITY], --complexity [COMPLEXITY]
                        [REQUIRED] Type: int; No defaults
                        Transcriptome Complexity Index, should be an integer between 1 and 9.
  -o [OUT], --out [OUT]
                        [REQUIRED] Type: str; No defaults
                        Path to output genome annotation with Ground-Truth AS Events in GTF.

yasim generate_gene_depth#

generate_gene_depth.py -- Generate Gene-Level Depth using YASIM V3 API.

SYNOPSIS: python -m yasim generate_gene_depth [-h] -g [GTF] -o [OUT] [-d [MU]] [--low_cutoff [LOW_CUTOFF]] [--high_cutoff_ratio [HIGH_CUTOFF_RATIO]]

OPTIONS:
  -h, --help
                        [OPTIONAL]
                        show this help message and exit
  -g [GTF], --gtf [GTF]
                        [REQUIRED] Type: str; No defaults
                        Path to input genomic annotation in GTF format. Can be compressed.
  -o [OUT], --out [OUT]
                        [REQUIRED] Type: str; No defaults
                        Path to output Gene-Level depth TSV. Can be compressed.
  -d [MU], --mu [MU]
                        [OPTIONAL] Type: float; Default: 100
                        Average depth.
  --low_cutoff [LOW_CUTOFF]
                        [OPTIONAL] Type: float; Default: 0.01
                        Depth lower than this value would be this value.
  --high_cutoff_ratio [HIGH_CUTOFF_RATIO]
                        [OPTIONAL] Type: float; Default: 200
                        Depth higher than `mu * high_cutoff_ratio` would be `mu * high_cutoff_ratio`

yasim generate_isoform_depth#

generate_isoform_depth.py -- Generate Isoform-Level Depth using YASIM V3 API.

SYNOPSIS: python -m yasim generate_isoform_depth [-h] -g [GTF] -o [OUT] -d [DEPTH] [--low_cutoff [LOW_CUTOFF]] [--high_cutoff_ratio [HIGH_CUTOFF_RATIO]] [--alpha [ALPHA]]

OPTIONS:
  -h, --help
                        [OPTIONAL]
                        show this help message and exit
  -g [GTF], --gtf [GTF]
                        [REQUIRED] Type: str; No defaults
                        Path to input genomic annotation in GTF format. Can be compressed.
  -o [OUT], --out [OUT]
                        [REQUIRED] Type: str; No defaults
                        Path to output Isoform-Level Depth TSV. Can be compressed.
  -d [DEPTH], --depth [DEPTH]
                        [REQUIRED] Type: str; No defaults
                        Path to input Gene-Level Depth TSV. Can be compressed.
  --low_cutoff [LOW_CUTOFF]
                        [OPTIONAL] Type: float; Default: 0.01
                        Depth lower than this value would be this value.
  --high_cutoff_ratio [HIGH_CUTOFF_RATIO]
                        [OPTIONAL] Type: float; Default: 200
                        Depth higher than `mu * high_cutoff_ratio` would be `mu * high_cutoff_ratio`
  --alpha [ALPHA]
                        [OPTIONAL] Type: int; Default: 4
                        Zipf's Coefficient, larger for larger differences

yasim generate_isoform_replicates#

generate_isoform_replicates.py -- Generate Technical Replicates using YASIM V3 API.

SYNOPSIS: python -m yasim generate_isoform_replicates [-h] -d [DEPTH] [-n [NUM_REPLICATES]] [-r [RANGE]]

OPTIONS:
  -h, --help
                        [OPTIONAL]
                        show this help message and exit
  -d [DEPTH], --depth [DEPTH]
                        [REQUIRED] Type: str; No defaults
                        Path to input Isoform-Level Depth TSV. Can be compressed.
  -n [NUM_REPLICATES], --num_replicates [NUM_REPLICATES]
                        [OPTIONAL] Type: int; Default: 3
                        Number of Replicates to be generated
  -r [RANGE], --range [RANGE]
                        [OPTIONAL] Type: float; Default: 0.1
                        Range of Generated Data

yasim pbsim#

pbsim.py -- LLRG adapter for PBSIM v1, a TGS DNA-Seq simulator

SYNOPSIS: python -m yasim pbsim [-h] -F [FASTAS] [-j [JOBS]] [--simulator_name [SIMULATOR_NAME]] [-e [LLRG_EXECUTABLE_PATH]] -d [DEPTH] -o [OUT] [--not_perform_assemble]
                                [--truncate_ratio_3p [TRUNCATE_RATIO_3P]] [--truncate_ratio_5p [TRUNCATE_RATIO_5P]] [-c] [--preserve_intermediate_files]

OPTIONS:
  -h, --help
                        [OPTIONAL]
                        show this help message and exit
  -F [FASTAS], --fastas [FASTAS]
                        [REQUIRED] Type: str; No defaults
                        Directory of transcribed cDNA sequences in FASTA format from `transcribe` step
  -j [JOBS], --jobs [JOBS]
                        [OPTIONAL] Type: int; Default: 20
                        Number of LLRGs to be executed in parallel
  --simulator_name [SIMULATOR_NAME]
                        [OPTIONAL] Type: str; Default: None
                        Custom simulator name. Used in FASTQ tags
                        This step is done in assemble, so if --not_perform_assemble is set, this option would be useless.
  -e [LLRG_EXECUTABLE_PATH], --llrg_executable_path [LLRG_EXECUTABLE_PATH]
                        [OPTIONAL] Type: str; Default: pbsim
                        Executable name or absolute path of pbsim
  -d [DEPTH], --depth [DEPTH]
                        [REQUIRED] Type: str; No defaults
                        Path to input Isoform-Level Depth TSV generated by `generate_depth_v2` or `generate_isoform_depth` step
  -o [OUT], --out [OUT]
                        [REQUIRED] Type: str; No defaults
                        Output transcript prefix. The output file would be {out}.fq for single-end and {out}_1.fq, {out}_2.fq for pair end.
                        If --not_perform_assemble is set, would NOT generate FASTQ files but a {out}.d unassembled directory
  --not_perform_assemble
                        [OPTIONAL] Default: False
                        Do NOT assemble the output of each isoforms into one file.
  --truncate_ratio_3p [TRUNCATE_RATIO_3P]
                        [OPTIONAL] Type: float; Default: 0.0
                        [Single End TGS Only] Ratio of 3 prime truncation, range from 0 (no truncation) to 1 (truncate all).
                        This step is done in assemble, so if --not_perform_assemble is set, this option would be useless.
  --truncate_ratio_5p [TRUNCATE_RATIO_5P]
                        [OPTIONAL] Type: float; Default: 0.0
                        [Single End TGS Only] Ratio of 5 prime truncation, range from 0 (no truncation) to 1 (truncate all).
                        This step is done in assemble, so if --not_perform_assemble is set, this option would be useless.
  -c, --ccs
                        [OPTIONAL] Default: False
                        Simulate CCS instead of CLR
  --preserve_intermediate_files
                        [OPTIONAL] Default: False
                        Do not remove intermediate files.

yasim pbsim2#

pbsim2.py -- LLRG adapter for PBSIM v2, a TGS DNA-Seq simulator

SYNOPSIS: python -m yasim pbsim2 [-h] -F [FASTAS] [-j [JOBS]] [--simulator_name [SIMULATOR_NAME]] [-e [LLRG_EXECUTABLE_PATH]] -d [DEPTH] -o [OUT] [--not_perform_assemble]
                                 [--truncate_ratio_3p [TRUNCATE_RATIO_3P]] [--truncate_ratio_5p [TRUNCATE_RATIO_5P]] -m [{R103,P5C3,P4C2,P6C4,R95,R94}]
                                 [--preserve_intermediate_files]

OPTIONS:
  -h, --help
                        [OPTIONAL]
                        show this help message and exit
  -F [FASTAS], --fastas [FASTAS]
                        [REQUIRED] Type: str; No defaults
                        Directory of transcribed cDNA sequences in FASTA format from `transcribe` step
  -j [JOBS], --jobs [JOBS]
                        [OPTIONAL] Type: int; Default: 20
                        Number of LLRGs to be executed in parallel
  --simulator_name [SIMULATOR_NAME]
                        [OPTIONAL] Type: str; Default: None
                        Custom simulator name. Used in FASTQ tags
                        This step is done in assemble, so if --not_perform_assemble is set, this option would be useless.
  -e [LLRG_EXECUTABLE_PATH], --llrg_executable_path [LLRG_EXECUTABLE_PATH]
                        [OPTIONAL] Type: str; Default: pbsim2
                        Executable name or absolute path of pbsim2
  -d [DEPTH], --depth [DEPTH]
                        [REQUIRED] Type: str; No defaults
                        Path to input Isoform-Level Depth TSV generated by `generate_depth_v2` or `generate_isoform_depth` step
  -o [OUT], --out [OUT]
                        [REQUIRED] Type: str; No defaults
                        Output transcript prefix. The output file would be {out}.fq for single-end and {out}_1.fq, {out}_2.fq for pair end.
                        If --not_perform_assemble is set, would NOT generate FASTQ files but a {out}.d unassembled directory
  --not_perform_assemble
                        [OPTIONAL] Default: False
                        Do NOT assemble the output of each isoforms into one file.
  --truncate_ratio_3p [TRUNCATE_RATIO_3P]
                        [OPTIONAL] Type: float; Default: 0.0
                        [Single End TGS Only] Ratio of 3 prime truncation, range from 0 (no truncation) to 1 (truncate all).
                        This step is done in assemble, so if --not_perform_assemble is set, this option would be useless.
  --truncate_ratio_5p [TRUNCATE_RATIO_5P]
                        [OPTIONAL] Type: float; Default: 0.0
                        [Single End TGS Only] Ratio of 5 prime truncation, range from 0 (no truncation) to 1 (truncate all).
                        This step is done in assemble, so if --not_perform_assemble is set, this option would be useless.
  -m [{R103,P5C3,P4C2,P6C4,R95,R94}], --hmm_model [{R103,P5C3,P4C2,P6C4,R95,R94}]
                        [REQUIRED] Type: str; No defaults
                        Basename or absolute path of HMM file
                        CHOICES:
                            R103
                            P5C3
                            P4C2
                            P6C4
                            R95
                            R94
  --preserve_intermediate_files
                        [OPTIONAL] Default: False
                        Do not remove intermediate files.

yasim pbsim3#

pbsim3.py -- LLRG adapter for PBSIM v3, a TGS DNA- and RNA-Seq simulator

SYNOPSIS: python -m yasim pbsim3 [-h] -F [FASTAS] [-j [JOBS]] [--simulator_name [SIMULATOR_NAME]] [-e [LLRG_EXECUTABLE_PATH]] -d [DEPTH] -o [OUT] [--not_perform_assemble]
                                 [--truncate_ratio_3p [TRUNCATE_RATIO_3P]] [--truncate_ratio_5p [TRUNCATE_RATIO_5P]] -m [HMM_MODEL] -M [{errhmm,qshmm}] [--ccs_pass [CCS_PASS]]
                                 [--ccs_path [CCS_PATH]] [--samtools_path [SAMTOOLS_PATH]] [--strategy {wgs,trans}] [--preserve_intermediate_files]

OPTIONS:
  -h, --help
                        [OPTIONAL]
                        show this help message and exit
  -F [FASTAS], --fastas [FASTAS]
                        [REQUIRED] Type: str; No defaults
                        Directory of transcribed cDNA sequences in FASTA format from `transcribe` step
  -j [JOBS], --jobs [JOBS]
                        [OPTIONAL] Type: int; Default: 20
                        Number of LLRGs to be executed in parallel
  --simulator_name [SIMULATOR_NAME]
                        [OPTIONAL] Type: str; Default: None
                        Custom simulator name. Used in FASTQ tags
                        This step is done in assemble, so if --not_perform_assemble is set, this option would be useless.
  -e [LLRG_EXECUTABLE_PATH], --llrg_executable_path [LLRG_EXECUTABLE_PATH]
                        [OPTIONAL] Type: str; Default: pbsim3
                        Executable name or absolute path of pbsim3
  -d [DEPTH], --depth [DEPTH]
                        [REQUIRED] Type: str; No defaults
                        Path to input Isoform-Level Depth TSV generated by `generate_depth_v2` or `generate_isoform_depth` step
  -o [OUT], --out [OUT]
                        [REQUIRED] Type: str; No defaults
                        Output transcript prefix. The output file would be {out}.fq for single-end and {out}_1.fq, {out}_2.fq for pair end.
                        If --not_perform_assemble is set, would NOT generate FASTQ files but a {out}.d unassembled directory
  --not_perform_assemble
                        [OPTIONAL] Default: False
                        Do NOT assemble the output of each isoforms into one file.
  --truncate_ratio_3p [TRUNCATE_RATIO_3P]
                        [OPTIONAL] Type: float; Default: 0.0
                        [Single End TGS Only] Ratio of 3 prime truncation, range from 0 (no truncation) to 1 (truncate all).
                        This step is done in assemble, so if --not_perform_assemble is set, this option would be useless.
  --truncate_ratio_5p [TRUNCATE_RATIO_5P]
                        [OPTIONAL] Type: float; Default: 0.0
                        [Single End TGS Only] Ratio of 5 prime truncation, range from 0 (no truncation) to 1 (truncate all).
                        This step is done in assemble, so if --not_perform_assemble is set, this option would be useless.
  -m [HMM_MODEL], --hmm_model [HMM_MODEL]
                        [REQUIRED] Type: str; No defaults
                        Basename of HMM file. If you select errhmm in hmm_method, it would be ['ONT', 'SEQUEL', 'RSII']If you select qshmm in hmm_method, it would be ['ONT', 'RSII']
  -M [{errhmm,qshmm}], --hmm_method [{errhmm,qshmm}]
                        [REQUIRED] Type: str; No defaults
                        Whether to simulate using quality score (as PBSIM2) or error profile (new)
                        CHOICES:
                            errhmm
                            qshmm
  --ccs_pass [CCS_PASS]
                        [OPTIONAL] Type: int; Default: 1
                        CCS Multipass Settings. Use 1 for CLR and others for CCS.
  --ccs_path [CCS_PATH]
                        [OPTIONAL] Type: str; Default: ccs
                        Executable name of ccs or pbccs. Omitted if ccs_pass == 1.
  --samtools_path [SAMTOOLS_PATH]
                        [OPTIONAL] Type: str; Default: samtools
                        Executable name of samtools. Omitted if ccs_pass == 1.
  --strategy {wgs,trans}
                        [OPTIONAL] Type: PBSIM3_STRATEGY; Default: wgs
                        Whether to use transcript (trans) mode or wgs (wgs) mode
                        CHOICES:
                            wgs -- WGS mode (as PBSIM2)
                            trans -- TRANS mode
  --preserve_intermediate_files
                        [OPTIONAL] Default: False
                        Do not remove intermediate files.

yasim sample_transcript#

transcribe.py -- General-purposed stranded transcription, from reference genome to reference cDNA.

SYNOPSIS: python -m labw_utils.bioutils transcribe [-h] -f [FASTA] -g [GTF] -o [OUT] [--no_write_single_transcript]

OPTIONS:
  -h, --help
                        [OPTIONAL]
                        show this help message and exit
  -f [FASTA], --fasta [FASTA]
                        [REQUIRED] Type: str; No defaults
                        Path to input reference genome sequence in FASTA format. Can be compressed.
  -g [GTF], --gtf [GTF]
                        [REQUIRED] Type: str; No defaults
                        Path to input genomic annotation in GTF format. Can be compressed.
  -o [OUT], --out [OUT]
                        [REQUIRED] Type: str; No defaults
                        Path of Output cDNA FASTA
  --no_write_single_transcript
                        [OPTIONAL] Default: False
                        Stop splitting cDNA of each isoform into separate file

yasim self_check#

self_check.py -- Check whether YASIM installation is complete.

.. versionadded:: 3.1.6

yasim transcribe#

transcribe.py -- General-purposed stranded transcription, from reference genome to reference cDNA.

SYNOPSIS: python -m labw_utils.bioutils transcribe [-h] -f [FASTA] -g [GTF] -o [OUT] [--no_write_single_transcript]

OPTIONS:
  -h, --help
                        [OPTIONAL]
                        show this help message and exit
  -f [FASTA], --fasta [FASTA]
                        [REQUIRED] Type: str; No defaults
                        Path to input reference genome sequence in FASTA format. Can be compressed.
  -g [GTF], --gtf [GTF]
                        [REQUIRED] Type: str; No defaults
                        Path to input genomic annotation in GTF format. Can be compressed.
  -o [OUT], --out [OUT]
                        [REQUIRED] Type: str; No defaults
                        Path of Output cDNA FASTA
  --no_write_single_transcript
                        [OPTIONAL] Default: False
                        Stop splitting cDNA of each isoform into separate file

yasim_scripts#

yasim_scripts extract_quality_from_maf#

extract_quality_from_maf.py -- Extract per-base alignment status from MAF files.

This script extracts per-base alignment status from MAF files into a TSV file of one line,
which have 4 fields for Insertion, Deletion, Match and Substitution.

Synopsis: python -m yasim_scrips extract_quality_from_maf [MAF1] [[MAF2]...]

Arguments:
    [MAF1] [[MAF2]...] path to MAF files produced by PBSIM3 or LAST aligner.

.. versionadded:: 3.1.5

yasim_scripts extract_read_length_from_maf_gp#

extract_read_length_from_maf_gp.py -- Extraction of Read Length from MAF, General-Purposed

This script can be used to extract read length of all transcript ID from transcriptomically-aligned TGS RNA-Seq MAF
for assessing read completeness.

Synopsis: python -m yasim_scrips extract_quality_from_maf [MAF1] [[MAF2]...]

Arguments:
    [MAF1] [[MAF2]...] path to MAF files produced by PBSIM3 or LAST aligner.

.. versionadded:: 3.1.5

yasim_scripts extract_read_length_from_maf_yasim#

extract_read_length_from_maf_gp.py -- Extraction of Read Length from MAF, YASIM

This script can be used to extract read length of all transcript ID from transcriptomically-aligned TGS RNA-Seq MAF
generated by YASIM for assessing read completeness.

Synopsis: python -m yasim_scrips extract_quality_from_maf [MAF1] [[MAF2]...]

Arguments:
    [MAF1] [[MAF2]...] path to MAF files produced by PBSIM3 or LAST aligner.

.. versionadded:: 3.1.5

yasim_scripts featurecounts_to_depth#

featurecounts_to_depth.py -- Convert FeatureCounts Output for NGS and TGS to YASIM input depth.

SYNOPSIS: python -m yasim_scripts featurecounts_to_depth [-h] -i [INPUT] [--software [{featureCounts,Salmon}]] -o [OUT] [-f [{GENE_ID,TRANSCRIPT_ID}]] [--read_length [READ_LENGTH] |
                                                         --read_completeness [READ_COMPLETENESS]]

OPTIONS:
  -h, --help
                        [OPTIONAL]
                        show this help message and exit
  -i [INPUT], --input [INPUT]
                        [REQUIRED] Type: str; No defaults
                        Path to featureCounts/Salmon output TSV
  --software [{featureCounts,Salmon}]
                        [OPTIONAL] Type: str; Default: featureCounts
                        name of quantification software
                        CHOICES:
                            featureCounts
                            Salmon
  -o [OUT], --out [OUT]
                        [REQUIRED] Type: str; No defaults
                        Path to output TSV
  -f [{GENE_ID,TRANSCRIPT_ID}], --feature_name [{GENE_ID,TRANSCRIPT_ID}]
                        [OPTIONAL] Type: str; Default: TRANSCRIPT_ID
                        Name of output feature
                        CHOICES:
                            GENE_ID
                            TRANSCRIPT_ID
  --read_length [READ_LENGTH]
                        [OPTIONAL] Type: int; Default: None
                        [For NGS Only] Read length
  --read_completeness [READ_COMPLETENESS]
                        [OPTIONAL] Type: float; Default: None
                        [For TGS Only] Mean read completeness

yasim_scripts merge_pbccs#

merge_pbccs.py -- Merge BAMs created by pbccs.

SYNOPSIS: python -m yasim_scripts merge_pbccs [-h] -o OUT [-e PBMERGE_PATH] --input_bam_glob INPUT_BAM_GLOB

OPTIONS:
  -h, --help
                        [OPTIONAL]
                        show this help message and exit
  -o OUT, --out OUT
                        [REQUIRED] Type: str; No defaults
                        Output BAM file
  -e PBMERGE_PATH, --pbmerge_path PBMERGE_PATH
                        [OPTIONAL] Type: str; Default: None
                        Path to pbmerge
  --input_bam_glob INPUT_BAM_GLOB
                        [REQUIRED] Type: str; No defaults
                        Glob expression for BAM files that will be merged.