Skip to content

Commit

Permalink
Merge pull request #6683 from khaled196/cnvkit_export
Browse files Browse the repository at this point in the history
Add Cnvkit export subcomand tools to convert the CNVkit output into suitable formats
  • Loading branch information
bgruening authored Jan 20, 2025
2 parents 59b031e + fe80819 commit 5199552
Show file tree
Hide file tree
Showing 16 changed files with 631 additions and 0 deletions.
60 changes: 60 additions & 0 deletions tools/cnvkit/bed.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
<tool id="cnvkit_export_bed" name="CNVkit Export BED" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05">
<description>Converts the Segmented copy ratio data file (*.cns) file into BED file</description>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="xrefs"/>
<expand macro="creators"/>
<expand macro="requirements"/>
<command detect_errors="exit_code"><![CDATA[
ln -s '$input_segmented_file' ./sample.cns &&
cnvkit.py export bed
./sample.cns
$advanced_settings.label_genes
#if $advanced_settings.sample_id
--sample-id '$advanced_settings.sample_id'
#end if
#if $advanced_settings.ploidy
--ploidy $advanced_settings.ploidy
#end if
#if str($advanced_settings.sample_sex) and $advanced_settings.sample_sex != ""
--sample-sex '$advanced_settings.sample_sex'
#end if
$advanced_settings.male_reference
--output sample.cnv.bed
#if $advanced_settings.diploid_parx_genome
--diploid-parx-genome '$advanced_settings.diploid_parx_genome'
#end if
]]></command>
<inputs>
<param name="input_segmented_file" type="data" format="tabular" label="Segmented Copy Ratio Data File (cns file)" help="" />
<section name="advanced_settings" title="Advanced settings" expanded="false">
<param argument="--label-genes" type="boolean" checked="false" truevalue="--label-genes" falsevalue="" label="Label Genes" help="Show gene names in the 4th column of the BED file" />
<param argument="--sample-id" optional="true" type="text" label="Sample ID" value="" help="Sample name to write in the genotype field of the output VCF file" />
<param argument="--ploidy" optional="true" type="integer" label="Ploidy" min="1" value="2" help="Ploidy of the sample cells. [Default: 2]" />
<expand macro="sample_sex"/>
<param argument="--male-reference" type="boolean" checked="false" truevalue="--male-reference" falsevalue="" label="Male Reference" help="Assume inputs were normalized to a male reference" />
<param argument="--diploid-parx-genome" optional="true" type="text" label="Diploid Parx Genome" value="" help="Considers the given human genome's PAR of chromosome X as autosomal. Example: 'grch38'" />
</section>
</inputs>
<outputs>
<data name="CNVs_BED" format="bed" label="${tool.name} on ${on_string}: CNVs BED file" from_work_dir="sample.cnv.bed" />
</outputs>
<tests>
<test expect_num_outputs="1">
<param name="input_segmented_file" ftype="tabular" value="sample.cns" />
<param name="sample_id" value="SampleID" />
<param name="sample_sex" value="Female" />
<output name="CNVs_BED" file="sample.cnv.bed" />
</test>
</tests>
<help><![CDATA[
Export the segmented copy number data (from a .cns file) to BED format.
The resulting BED file describes copy number gains and losses across each segment.
]]></help>
<expand macro="citations" />
</tool>




32 changes: 32 additions & 0 deletions tools/cnvkit/cdt.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
<tool id="cnvkit_export_cdt" name="CNVkit Export CDT" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05">
<description>Convert log2 ratios to Clustered Data Table (CDT)</description>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="xrefs"/>
<expand macro="creators"/>
<expand macro="requirements"/>
<command detect_errors="exit_code"><![CDATA[
ln -s '$input_segmented_file' ./sample.cnr &&
cnvkit.py export cdt
./sample.cnr
--output sample.cnv.cdt
]]></command>
<inputs>
<param name="input_segmented_file" type="data" format="tabular" label="Log2 Copy Ratio Data File" help="The output of the CNVkit 'fix' sub-command" />
</inputs>
<outputs>
<data name="CNVs_cdt" format="cdt" label="${tool.name} on ${on_string}: CNVs Nexus Basic File" from_work_dir="sample.cnv.cdt" />
</outputs>
<tests>
<test expect_num_outputs="1">
<param name="input_segmented_file" ftype="tabular" value="sample.cnr" />
<output name="CNVs_cdt" file="sample.cnv.cdt" />
</test>
</tests>
<help><![CDATA[
This tool converts CNVkit log2 copy ratio data to the CDT format, which is compatible with Java TreeView for visualisation and hierarchical clustering.
The conversion enables efficient exploration and interpretation of genomic copy number variations.
]]></help>
<expand macro="citations" />
</tool>
32 changes: 32 additions & 0 deletions tools/cnvkit/jtv.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
<tool id="cnvkit_export_jtv" name="CNVkit Export JTV" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05">
<description>Convert log2 ratios to Java TreeView's native format</description>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="xrefs"/>
<expand macro="creators"/>
<expand macro="requirements"/>
<command detect_errors="exit_code"><![CDATA[
ln -s '$input_segmented_file' ./sample.cnr &&
cnvkit.py export jtv
./sample.cnr
--output sample.cnv.jtv
]]></command>
<inputs>
<param name="input_segmented_file" type="data" format="tabular" label="Log2 Copy Ratio Data File" help="The output of the CNVkit 'fix' sub-command" />
</inputs>
<outputs>
<data name="CNVs_cdt" format="jtv" label="${tool.name} on ${on_string}: CNVs Nexus Basic File" from_work_dir="sample.cnv.jtv" />
</outputs>
<tests>
<test expect_num_outputs="1">
<param name="input_segmented_file" ftype="tabular" value="sample.cnr" />
<output name="CNVs_cdt" file="sample.cnv.jtv" />
</test>
</tests>
<help><![CDATA[
This tool converts CNVkit log2 copy ratio data to Java TreeView's native format (.jtv),
enabling easy visualisation and clustering analysis with preserved settings for seamless data exploration.
]]></help>
<expand macro="citations" />
</tool>
33 changes: 33 additions & 0 deletions tools/cnvkit/nexus_basic.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
<tool id="cnvkit_export_nexus_basic" name="CNVkit Export Nexus Basics" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05">
<description>Convert bin-level log2 ratios to Nexus Copy Number "basic" format</description>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="xrefs"/>
<expand macro="creators"/>
<expand macro="requirements"/>
<command detect_errors="exit_code"><![CDATA[
ln -s '$input_segmented_file' ./sample.cnr &&
cnvkit.py export nexus-basic
./sample.cnr
--output sample.cnv.tsv
]]></command>
<inputs>
<param name="input_segmented_file" type="data" format="tabular" label="Log2 Copy Ratio Data File" help="The output of the CNVkit 'fix' sub-command" />
</inputs>
<outputs>
<data name="CNVs_NexusBasic" format="tabular" label="${tool.name} on ${on_string}: CNVs Nexus Basic File" from_work_dir="sample.cnv.tsv" />
</outputs>
<tests>
<test expect_num_outputs="1">
<param name="input_segmented_file" ftype="tabular" value="sample.cnr" />
<output name="CNVs_NexusBasic" file="sample.cnv.tsv" />
</test>
</tests>
<help><![CDATA[
This tool converts CNVkit log2 copy ratio data to the Nexus Copy Number "basic" format,
enabling efficient visualisation and analysis of genomic alterations in Nexus software.
This conversion facilitates downstream interpretation and integration with other genomic data.
]]></help>
<expand macro="citations" />
</tool>
63 changes: 63 additions & 0 deletions tools/cnvkit/nexus_ogt.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
<tool id="cnvkit_export_nexus_ogt" name="CNVkit Export Nexus OGT" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05">
<description>Convert log2 ratios and b-allele freqs to Nexus "Custom-OGT" format</description>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="xrefs"/>
<expand macro="creators"/>
<expand macro="requirements"/>
<command detect_errors="exit_code"><![CDATA[
ln -s '$input_segmented_file' ./sample.cnr &&
ln -s '$input_vcf' ./sample.vcf &&
cnvkit.py export nexus-ogt
./sample.cnr
./sample.vcf
#if $advanced_settings.sample_id
--sample-id '$advanced_settings.sample_id'
#end if
#if $advanced_settings.normal_id
--normal-id '$advanced_settings.normal_id'
#end if
#if $advanced_settings.min_variant_depth
--min-variant-depth $advanced_settings.min_variant_depth
#end if
#if $advanced_settings.zygosity_freq
--zygosity-freq $advanced_settings.zygosity_freq
#end if
#if $advanced_settings.min_weight
--min-weight $advanced_settings.min_weight
#end if
--output sample.cnv.txt
]]></command>
<inputs>
<param name="input_segmented_file" type="data" format="tabular" label="Segmented Copy Ratio Data File (cns file)" help="The output of the CNVkit 'fix' sub-command" />
<param name="input_vcf" type="data" format="vcf" label="VCF of SNVs for the Same Sample" help="Used to calculate b-allele frequencies" />
<section name="advanced_settings" title="Advanced settings" expanded="false">
<param argument="--sample-id" optional="true" type="text" label="Sample ID" value="" help="Sample name to write in the genotype field of the output VCF file" />
<param argument="--normal-id" optional="true" type="text" label="Normal Id" value="" help="Corresponding normal sample ID in the input VCF" />
<param argument="--min-variant-depth" optional="true" type="integer" label="Minimum Variant Depth" min="1" value="20" help="Minimum read depth for a SNP in the VCF to be counted. [Default: 20]" />
<param argument="--zygosity-freq" optional="true" type="float" label="Zygosity Frequency" min="0" value="0.25" help="Ignore VCF's genotypes (GT field) and instead infer zygosity from allele frequencies. [Default if used without a number: 0.25]" />
<param argument="--min-weight" optional="true" type="float" label="Minimum Weight" min="0" max="1" value="0.0" help="Minimum weight (between 0 and 1) for a bin to be included in the output. [Default: 0.0]" />
</section>
</inputs>
<outputs>
<data name="CNVs_NexusOGT" format="txt" label="${tool.name} on ${on_string}: CNVs theta file" from_work_dir="sample.cnv.txt" />
</outputs>
<tests>
<test expect_num_outputs="1">
<param name="input_segmented_file" ftype="tabular" value="sample.cnr" />
<param name="input_vcf" ftype="vcf" value="sample.cnv.vcf" />
<output name="CNVs_NexusOGT" file="sample.cnv.txt" />
</test>
</tests>
<help><![CDATA[
This tool converts CNVkit log2 copy ratios and B-allele frequencies to the Nexus "Custom-OGT" format,
enabling comprehensive CNV analysis with allelic imbalance insights. The output is compatible with Nexus software,
facilitating advanced genomic interpretation and integration with other datasets.
]]></help>
<expand macro="citations" />
</tool>




35 changes: 35 additions & 0 deletions tools/cnvkit/seg.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
<tool id="cnvkit_export_seg" name="CNVkit Export SEG" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05">
<description>Convert segments to Segment (SEG) format</description>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="xrefs"/>
<expand macro="creators"/>
<expand macro="requirements"/>
<command detect_errors="exit_code"><![CDATA[
ln -s '$input_segmented_file' ./sample.cns &&
cnvkit.py export seg
./sample.cns
$advanced_settings.enumerate_chroms
--output sample.cnv.seg
]]></command>
<inputs>
<param name="input_segmented_file" type="data" format="tabular" label="Segmented copy ratio data file (cns file)" help="" />
<section name="advanced_settings" title="Advanced settings" expanded="false">
<param argument="--enumerate-chroms" type="boolean" checked="false" truevalue="--enumerate-chroms" falsevalue="" label="Enumerate Chroms" help="Replace chromosome names with sequential integer IDs" />
</section>
</inputs>
<outputs>
<data name="CNVs_SEG" format="seg" label="${tool.name} on ${on_string}: CNVs SEG file" from_work_dir="sample.cnv.seg" />
</outputs>
<tests>
<test expect_num_outputs="1">
<param name="input_segmented_file" ftype="tabular" value="sample.cns" />
<output name="CNVs_SEG" file="sample.cnv.seg" />
</test>
</tests>
<help><![CDATA[
Export the segmented copy number data (from a .cns file) to the standard SEG format.
]]></help>
<expand macro="citations" />
</tool>
1 change: 1 addition & 0 deletions tools/cnvkit/test-data/sample.cnv.bed
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
chrM 848 16023 SampleID 1
57 changes: 57 additions & 0 deletions tools/cnvkit/test-data/sample.cnv.cdt
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
GID CLID NAME GWEIGHT sample
AID ARRY000X
EWEIGHT 1
GENE0X IMAGE:0 chrM:848-1121:- 1 -0.545691
GENE1X IMAGE:1 chrM:1394-1667:- 1 0.488494
GENE2X IMAGE:2 chrM:1667-1940:- 1 0.0200539
GENE3X IMAGE:3 chrM:2212-2485:- 1 -0.234816
GENE4X IMAGE:4 chrM:2485-2758:- 1 0.153636
GENE5X IMAGE:5 chrM:2758-3031:- 1 -1.35597
GENE6X IMAGE:6 chrM:3031-3304:- 1 0.385198
GENE7X IMAGE:7 chrM:3306-3579:- 1 -1.37983
GENE8X IMAGE:8 chrM:3579-3853:- 1 0.471839
GENE9X IMAGE:9 chrM:3853-4126:- 1 0.444955
GENE10X IMAGE:10 chrM:4126-4400:- 1 0.465611
GENE11X IMAGE:11 chrM:4401-4695:- 1 -0.583114
GENE12X IMAGE:12 chrM:4695-4990:- 1 1.13235
GENE13X IMAGE:13 chrM:4990-5284:- 1 0.209867
GENE14X IMAGE:14 chrM:5284-5579:- 1 1.18493
GENE15X IMAGE:15 chrM:5586-5655:- 1 -21.0389
GENE16X IMAGE:16 chrM:5656-5729:- 1 -19.8096
GENE17X IMAGE:17 chrM:5760-5891:- 1 0.0
GENE18X IMAGE:18 chrM:5903-6171:- 1 -0.0283011
GENE19X IMAGE:19 chrM:6171-6440:- 1 -0.212606
GENE20X IMAGE:20 chrM:6440-6708:- 1 -0.38297
GENE21X IMAGE:21 chrM:6708-6977:- 1 -0.317296
GENE22X IMAGE:22 chrM:6977-7245:- 1 -0.127011
GENE23X IMAGE:23 chrM:7245-7514:- 1 0.72831
GENE24X IMAGE:24 chrM:7517-7767:- 1 0.680162
GENE25X IMAGE:25 chrM:7767-8018:- 1 -0.510836
GENE26X IMAGE:26 chrM:8018-8269:- 1 -0.663743
GENE27X IMAGE:27 chrM:8365-8630:- 1 0.28099
GENE28X IMAGE:28 chrM:8630-8896:- 1 -0.154606
GENE29X IMAGE:29 chrM:8896-9162:- 1 0.147562
GENE30X IMAGE:30 chrM:9162-9427:- 1 0.937798
GENE31X IMAGE:31 chrM:9693-9959:- 1 0.459741
GENE32X IMAGE:32 chrM:9959-10224:- 1 -8.75948
GENE33X IMAGE:33 chrM:10224-10490:- 1 0.509209
GENE34X IMAGE:34 chrM:10490-10756:- 1 0.0655732
GENE35X IMAGE:35 chrM:10756-11022:- 1 0.238204
GENE36X IMAGE:36 chrM:11022-11287:- 1 1.20043
GENE37X IMAGE:37 chrM:11287-11553:- 1 0.224804
GENE38X IMAGE:38 chrM:11553-11819:- 1 -1.50066
GENE39X IMAGE:39 chrM:11819-12084:- 1 0.0920864
GENE40X IMAGE:40 chrM:12084-12350:- 1 -1.15941
GENE41X IMAGE:41 chrM:12350-12616:- 1 -1.25242
GENE42X IMAGE:42 chrM:12616-12882:- 1 -0.187246
GENE43X IMAGE:43 chrM:12882-13147:- 1 0.419127
GENE44X IMAGE:44 chrM:13147-13413:- 1 -0.730007
GENE45X IMAGE:45 chrM:13413-13679:- 1 -0.0213761
GENE46X IMAGE:46 chrM:13944-14210:- 1 -0.337011
GENE47X IMAGE:47 chrM:14210-14476:- 1 -0.0427224
GENE48X IMAGE:48 chrM:14746-14987:- 1 0.722701
GENE49X IMAGE:49 chrM:14987-15228:- 1 -0.105067
GENE50X IMAGE:50 chrM:15228-15470:- 1 -0.96567
GENE51X IMAGE:51 chrM:15470-15711:- 1 0.576278
GENE52X IMAGE:52 chrM:15711-15953:- 1 -1.38778
GENE53X IMAGE:53 chrM:15955-16023:- 1 -21.1972
2 changes: 2 additions & 0 deletions tools/cnvkit/test-data/sample.cnv.input
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#ID chrm start end tumorCount normalCount
start_1_848:end_1_16023 1 848 16023 14716 39185
55 changes: 55 additions & 0 deletions tools/cnvkit/test-data/sample.cnv.jtv
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
CloneID Name sample
IMAGE: chrM:848-1121:- -0.545691
IMAGE: chrM:1394-1667:- 0.488494
IMAGE: chrM:1667-1940:- 0.0200539
IMAGE: chrM:2212-2485:- -0.234816
IMAGE: chrM:2485-2758:- 0.153636
IMAGE: chrM:2758-3031:- -1.35597
IMAGE: chrM:3031-3304:- 0.385198
IMAGE: chrM:3306-3579:- -1.37983
IMAGE: chrM:3579-3853:- 0.471839
IMAGE: chrM:3853-4126:- 0.444955
IMAGE: chrM:4126-4400:- 0.465611
IMAGE: chrM:4401-4695:- -0.583114
IMAGE: chrM:4695-4990:- 1.13235
IMAGE: chrM:4990-5284:- 0.209867
IMAGE: chrM:5284-5579:- 1.18493
IMAGE: chrM:5586-5655:- -21.0389
IMAGE: chrM:5656-5729:- -19.8096
IMAGE: chrM:5760-5891:- 0.0
IMAGE: chrM:5903-6171:- -0.0283011
IMAGE: chrM:6171-6440:- -0.212606
IMAGE: chrM:6440-6708:- -0.38297
IMAGE: chrM:6708-6977:- -0.317296
IMAGE: chrM:6977-7245:- -0.127011
IMAGE: chrM:7245-7514:- 0.72831
IMAGE: chrM:7517-7767:- 0.680162
IMAGE: chrM:7767-8018:- -0.510836
IMAGE: chrM:8018-8269:- -0.663743
IMAGE: chrM:8365-8630:- 0.28099
IMAGE: chrM:8630-8896:- -0.154606
IMAGE: chrM:8896-9162:- 0.147562
IMAGE: chrM:9162-9427:- 0.937798
IMAGE: chrM:9693-9959:- 0.459741
IMAGE: chrM:9959-10224:- -8.75948
IMAGE: chrM:10224-10490:- 0.509209
IMAGE: chrM:10490-10756:- 0.0655732
IMAGE: chrM:10756-11022:- 0.238204
IMAGE: chrM:11022-11287:- 1.20043
IMAGE: chrM:11287-11553:- 0.224804
IMAGE: chrM:11553-11819:- -1.50066
IMAGE: chrM:11819-12084:- 0.0920864
IMAGE: chrM:12084-12350:- -1.15941
IMAGE: chrM:12350-12616:- -1.25242
IMAGE: chrM:12616-12882:- -0.187246
IMAGE: chrM:12882-13147:- 0.419127
IMAGE: chrM:13147-13413:- -0.730007
IMAGE: chrM:13413-13679:- -0.0213761
IMAGE: chrM:13944-14210:- -0.337011
IMAGE: chrM:14210-14476:- -0.0427224
IMAGE: chrM:14746-14987:- 0.722701
IMAGE: chrM:14987-15228:- -0.105067
IMAGE: chrM:15228-15470:- -0.96567
IMAGE: chrM:15470-15711:- 0.576278
IMAGE: chrM:15711-15953:- -1.38778
IMAGE: chrM:15955-16023:- -21.1972
2 changes: 2 additions & 0 deletions tools/cnvkit/test-data/sample.cnv.seg
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ID chrom loc.start loc.end num.mark seg.mean
sample chrM 849 16023 54 -1.39012
Loading

0 comments on commit 5199552

Please sign in to comment.