From b573565877bc56467f0afe3cd903cedd8bde1532 Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Tue, 24 Feb 2026 12:36:39 -0500 Subject: [PATCH 1/5] ci: remove get_branch job for pypi release (#616) * This is from over 4 years ago and I don't think we actually need this. There are times when we want to create releases off other branches (and its easy to forget to update). I think we should just remove --- .github/workflows/release.yml | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index f09a00f6..8880ce39 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -4,25 +4,9 @@ on: tags: - "*.*.**" jobs: - get_branch: - runs-on: ubuntu-latest - outputs: - branch_name: ${{ steps.get_branch_name.outputs.name }} - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - name: Get branch name - id: get_branch_name - run: | - raw=$(git branch -r --contains ${{ github.ref }}) - branch=$(echo "$raw" | grep "origin/main" | grep -v "HEAD" | sed "s|origin/||" | xargs) - echo "name=$branch" >> "$GITHUB_OUTPUT" build: name: Build distribution runs-on: ubuntu-latest - needs: get_branch - if: needs.get_branch.outputs.branch_name == 'main' steps: - uses: actions/checkout@v4 - name: Set up Python From 31d3a8d50268862ba528add0ddef61d0a37d5d55 Mon Sep 17 00:00:00 2001 From: Kyle Ferriter Date: Thu, 2 Apr 2026 09:57:03 -0400 Subject: [PATCH 2/5] Treat all empty/none as '.' in string fields even though pysam does not (#614) Close #610 --- src/ga4gh/vrs/extras/annotator/vcf.py | 31 ++++++++++++++++--- .../test_vcf_expected_altsonly_output.vcf | 6 ++-- .../extras/data/test_vcf_expected_output.vcf | 6 ++-- .../test_vcf_expected_output_no_vrs_attrs.vcf | 4 +-- 4 files changed, 34 insertions(+), 13 deletions(-) diff --git a/src/ga4gh/vrs/extras/annotator/vcf.py b/src/ga4gh/vrs/extras/annotator/vcf.py index 37f0d475..3dd70dcf 100644 --- a/src/ga4gh/vrs/extras/annotator/vcf.py +++ b/src/ga4gh/vrs/extras/annotator/vcf.py @@ -41,6 +41,18 @@ class FieldName(str, Enum): ERROR_FIELD = "VRS_Error" +# String-typed INFO fields where pysam incorrectly converts None → b"" +# (empty bytes) instead of the VCF missing value ".". Integer/Float fields +# are unaffected because pysam uses proper BCF missing sentinels for those. +_STRING_FIELDS = frozenset( + { + FieldName.IDS_FIELD, + FieldName.STATES_FIELD, + FieldName.ERROR_FIELD, + } +) + + # VCF character escape map VCF_ESCAPE_MAP = str.maketrans( { @@ -293,11 +305,20 @@ def annotate( if output_vcf_path and vcf_out: for k in additional_info_fields: - # Convert "" and None values (but not 0) to None. - # Pysam outputs "." for missing values. - record.info[k.value] = [ - None if v in ("", None) else v for v in vrs_field_data[k.value] - ] + # pysam correctly converts None → "." for Integer/Float + # INFO fields, but for String fields it converts None → + # "" (empty bytes), violating the VCF spec. Work around + # by using the literal string "." for String-typed fields. + if k in _STRING_FIELDS: + record.info[k.value] = [ + "." if v in ("", None) else v + for v in vrs_field_data[k.value] + ] + else: + record.info[k.value] = [ + None if v in ("", None) else v + for v in vrs_field_data[k.value] + ] vcf_out.write(record) vcf.close() diff --git a/tests/extras/data/test_vcf_expected_altsonly_output.vcf b/tests/extras/data/test_vcf_expected_altsonly_output.vcf index 8d74d7ca..3b7f2ab5 100644 --- a/tests/extras/data/test_vcf_expected_altsonly_output.vcf +++ b/tests/extras/data/test_vcf_expected_altsonly_output.vcf @@ -240,7 +240,7 @@ chr19 82664 . C T 50 PASS platforms=2;platformnames=10X,PacBio;datasets=2;datase chr19 284350 . CA C 50 PASS platforms=4;platformnames=Illumina,10X,PacBio,CG;datasets=4;datasetnames=HiSeqPE300x,10XChromiumLR,CCS15kb_20kb,CGnormal;callsets=5;callsetnames=HiSeqPE300xGATK,10XLRGATK,CCS15kb_20kbGATK4,CGnormal,HiSeqPE300xfreebayes;datasetsmissingcall=CCS15kb_20kb,IonExome,SolidSE75bp;callable=CS_HiSeqPE300xGATK_callable;filt=CS_CCS15kb_20kbDV_filt,CS_CCS15kb_20kbGATK4_filt;difficultregion=GRCh38_AllHomopolymers_gt6bp_imperfectgt10bp_slop5,GRCh38_SimpleRepeat_imperfecthomopolgt10_slop5;VRS_Allele_IDs=ga4gh:VA.a04jFsNg0bS0RMIWjKWSbwJS4_vp7S6x;VRS_Starts=284350;VRS_Ends=284366;VRS_States=AAAAAAAAAAAAAAA;VRS_Lengths=15;VRS_RepeatSubunitLengths=1 GT:PS:DP:ADALL:AD:GQ 0/1:.:422:117,101:81,75:356 chr19 289464 . T TCACGCCTGTAATCC 50 PASS platforms=4;platformnames=Illumina,PacBio,CG,10X;datasets=4;datasetnames=HiSeqPE300x,CCS15kb_20kb,CGnormal,10XChromiumLR;callsets=6;callsetnames=HiSeqPE300xGATK,CCS15kb_20kbGATK4,CGnormal,HiSeqPE300xfreebayes,CCS15kb_20kbDV,10XLRGATK;datasetsmissingcall=IonExome,SolidSE75bp;callable=CS_HiSeqPE300xGATK_callable,CS_10XLRGATK_callable,CS_CCS15kb_20kbGATK4_callable,CS_CGnormal_callable,CS_HiSeqPE300xfreebayes_callable;filt=CS_CCS15kb_20kbDV_filt,CS_CCS15kb_20kbGATK4_filt;VRS_Allele_IDs=ga4gh:VA.ySvDptXfHB_9WEfu78v32DzBXJfwGgO7;VRS_Starts=289464;VRS_Ends=289466;VRS_States=CACGCCTGTAATCCCA;VRS_Lengths=.;VRS_RepeatSubunitLengths=. GT:PS:DP:ADALL:AD:GQ 0/1:.:518:94,98:116,137:785 chr19 28946400 . T C 50 PASS platforms=5;platformnames=Illumina,PacBio,CG,10X,Solid;datasets=5;datasetnames=HiSeqPE300x,CCS15kb_20kb,CGnormal,10XChromiumLR,SolidSE75bp;callsets=7;callsetnames=HiSeqPE300xGATK,CCS15kb_20kbDV,CCS15kb_20kbGATK4,CGnormal,HiSeqPE300xfreebayes,10XLRGATK,SolidSE75GATKHC;datasetsmissingcall=IonExome;callable=CS_HiSeqPE300xGATK_callable,CS_CCS15kb_20kbDV_callable,CS_10XLRGATK_callable,CS_CCS15kb_20kbGATK4_callable,CS_CGnormal_callable,CS_HiSeqPE300xfreebayes_callable;filt=CS_CCS15kb_20kbDV_filt,CS_CCS15kb_20kbGATK4_filt;VRS_Allele_IDs=ga4gh:VA.uV5O4M9zpiwk6sftOd-EDvtw_pkSAvdf;VRS_Starts=28946399;VRS_Ends=28946400;VRS_States=C;VRS_Lengths=.;VRS_RepeatSubunitLengths=. GT:PS:DP:ADALL:AD:GQ 1/1:.:874:0,275:115,378:502 -chr19 490414 . ACT A 50 PASS platforms=5;platformnames=Illumina,PacBio,CG,10X,Solid;datasets=5;datasetnames=HiSeqPE300x,CCS15kb_20kb,CGnormal,10XChromiumLR,SolidSE75bp;callsets=7;callsetnames=HiSeqPE300xGATK,CCS15kb_20kbDV,CCS15kb_20kbGATK4,CGnormal,HiSeqPE300xfreebayes,10XLRGATK,SolidSE75GATKHC;datasetsmissingcall=IonExome;callable=CS_HiSeqPE300xGATK_callable,CS_CCS15kb_20kbDV_callable,CS_CCS15kb_20kbGATK4_callable,CS_CGnormal_callable,CS_HiSeqPE300xfreebayes_callable;filt=CS_10XLRGATK_filt;VRS_Allele_IDs=ga4gh:VA.lok7a3lot_cvUyw626otpJi4yxk0X07v;VRS_Starts=490414;VRS_Ends=490416;VRS_States;VRS_Lengths=0;VRS_RepeatSubunitLengths=2 GT:PS:DP:ADALL:AD:GQ 0/1:.:821:163,158:239,220:1004 -chr19 54220024 . G *,A 50 PASS platforms=1;platformnames=PacBio;datasets=1;datasetnames=CCS15kb_20kb;callsets=1;callsetnames=CCS15kb_20kbGATK4;datasetsmissingcall=HiSeqPE300x,CCS15kb_20kb,10XChromiumLR,CGnormal,IonExome,SolidSE75bp;callable=CS_CCS15kb_20kbGATK4_callable;filt=CS_CCS15kb_20kbDV_filt,CS_10XLRGATK_filt,CS_HiSeqPE300xfreebayes_filt;difficultregion=HG001.hg38.300x.bam.bilkentuniv.010920.dups,hg38.segdups_sorted_merged;VRS_Allele_IDs=,ga4gh:VA.I7J3i1B36BACEUINcTwEh7uMv3I-PXT1;VRS_Starts=.,54220023;VRS_Ends=.,54220024;VRS_States=,A;VRS_Lengths=.,.;VRS_RepeatSubunitLengths=.,. GT:PS:DP:ADALL:AD:GQ 1/2:.:45:0,20,25:0,20,25:99 +chr19 490414 . ACT A 50 PASS platforms=5;platformnames=Illumina,PacBio,CG,10X,Solid;datasets=5;datasetnames=HiSeqPE300x,CCS15kb_20kb,CGnormal,10XChromiumLR,SolidSE75bp;callsets=7;callsetnames=HiSeqPE300xGATK,CCS15kb_20kbDV,CCS15kb_20kbGATK4,CGnormal,HiSeqPE300xfreebayes,10XLRGATK,SolidSE75GATKHC;datasetsmissingcall=IonExome;callable=CS_HiSeqPE300xGATK_callable,CS_CCS15kb_20kbDV_callable,CS_CCS15kb_20kbGATK4_callable,CS_CGnormal_callable,CS_HiSeqPE300xfreebayes_callable;filt=CS_10XLRGATK_filt;VRS_Allele_IDs=ga4gh:VA.lok7a3lot_cvUyw626otpJi4yxk0X07v;VRS_Starts=490414;VRS_Ends=490416;VRS_States=.;VRS_Lengths=0;VRS_RepeatSubunitLengths=2 GT:PS:DP:ADALL:AD:GQ 0/1:.:821:163,158:239,220:1004 +chr19 54220024 . G *,A 50 PASS platforms=1;platformnames=PacBio;datasets=1;datasetnames=CCS15kb_20kb;callsets=1;callsetnames=CCS15kb_20kbGATK4;datasetsmissingcall=HiSeqPE300x,CCS15kb_20kb,10XChromiumLR,CGnormal,IonExome,SolidSE75bp;callable=CS_CCS15kb_20kbGATK4_callable;filt=CS_CCS15kb_20kbDV_filt,CS_10XLRGATK_filt,CS_HiSeqPE300xfreebayes_filt;difficultregion=HG001.hg38.300x.bam.bilkentuniv.010920.dups,hg38.segdups_sorted_merged;VRS_Allele_IDs=.,ga4gh:VA.I7J3i1B36BACEUINcTwEh7uMv3I-PXT1;VRS_Starts=.,54220023;VRS_Ends=.,54220024;VRS_States=.,A;VRS_Lengths=.,.;VRS_RepeatSubunitLengths=.,. GT:PS:DP:ADALL:AD:GQ 1/2:.:45:0,20,25:0,20,25:99 chr19 54220999 . A T 50 PASS platforms=1;platformnames=PacBio;datasets=1;datasetnames=CCS15kb_20kb;callsets=1;callsetnames=CCS15kb_20kbGATK4;datasetsmissingcall=HiSeqPE300x,CCS15kb_20kb,10XChromiumLR,CGnormal,IonExome,SolidSE75bp;callable=CS_CCS15kb_20kbGATK4_callable;filt=CS_CCS15kb_20kbDV_filt,CS_10XLRGATK_filt,CS_HiSeqPE300xfreebayes_filt;difficultregion=HG001.hg38.300x.bam.bilkentuniv.010920.dups,hg38.segdups_sorted_merged;VRS_Error=Reference mismatch at GRCh38:chr19 position 54220998-54220999 (input gave 'A' but correct ref is 'T') GT:PS:DP:ADALL:AD:GQ 0/1:.:45:0,20,25:0,20,25:99 -chr19 54221654 . T A,P 50 PASS platforms=1;platformnames=PacBio;datasets=1;datasetnames=CCS15kb_20kb;callsets=1;callsetnames=CCS15kb_20kbGATK4;datasetsmissingcall=HiSeqPE300x,CCS15kb_20kb,10XChromiumLR,CGnormal,IonExome,SolidSE75bp;callable=CS_CCS15kb_20kbGATK4_callable;filt=CS_CCS15kb_20kbDV_filt,CS_10XLRGATK_filt,CS_HiSeqPE300xfreebayes_filt;difficultregion=HG001.hg38.300x.bam.bilkentuniv.010920.dups,hg38.segdups_sorted_merged;VRS_Allele_IDs=ga4gh:VA.Zzlc24htmBV1HZZzWYgPD2_GfMInkrZu,;VRS_Starts=54221653,.;VRS_Ends=54221654,.;VRS_States=A,;VRS_Lengths=.,.;VRS_RepeatSubunitLengths=.,. GT:PS:DP:ADALL:AD:GQ 0/1:.:45:0,20,25:0,20,25:99 +chr19 54221654 . T A,P 50 PASS platforms=1;platformnames=PacBio;datasets=1;datasetnames=CCS15kb_20kb;callsets=1;callsetnames=CCS15kb_20kbGATK4;datasetsmissingcall=HiSeqPE300x,CCS15kb_20kb,10XChromiumLR,CGnormal,IonExome,SolidSE75bp;callable=CS_CCS15kb_20kbGATK4_callable;filt=CS_CCS15kb_20kbDV_filt,CS_10XLRGATK_filt,CS_HiSeqPE300xfreebayes_filt;difficultregion=HG001.hg38.300x.bam.bilkentuniv.010920.dups,hg38.segdups_sorted_merged;VRS_Allele_IDs=ga4gh:VA.Zzlc24htmBV1HZZzWYgPD2_GfMInkrZu,.;VRS_Starts=54221653,.;VRS_Ends=54221654,.;VRS_States=A,.;VRS_Lengths=.,.;VRS_RepeatSubunitLengths=.,. GT:PS:DP:ADALL:AD:GQ 0/1:.:45:0,20,25:0,20,25:99 diff --git a/tests/extras/data/test_vcf_expected_output.vcf b/tests/extras/data/test_vcf_expected_output.vcf index fd781c5a..da94feb4 100644 --- a/tests/extras/data/test_vcf_expected_output.vcf +++ b/tests/extras/data/test_vcf_expected_output.vcf @@ -240,7 +240,7 @@ chr19 82664 . C T 50 PASS platforms=2;platformnames=10X,PacBio;datasets=2;datase chr19 284350 . CA C 50 PASS platforms=4;platformnames=Illumina,10X,PacBio,CG;datasets=4;datasetnames=HiSeqPE300x,10XChromiumLR,CCS15kb_20kb,CGnormal;callsets=5;callsetnames=HiSeqPE300xGATK,10XLRGATK,CCS15kb_20kbGATK4,CGnormal,HiSeqPE300xfreebayes;datasetsmissingcall=CCS15kb_20kb,IonExome,SolidSE75bp;callable=CS_HiSeqPE300xGATK_callable;filt=CS_CCS15kb_20kbDV_filt,CS_CCS15kb_20kbGATK4_filt;difficultregion=GRCh38_AllHomopolymers_gt6bp_imperfectgt10bp_slop5,GRCh38_SimpleRepeat_imperfecthomopolgt10_slop5;VRS_Allele_IDs=ga4gh:VA.xgtXGA3ZkV1WgMc6eD9l64fX27S_TScW,ga4gh:VA.a04jFsNg0bS0RMIWjKWSbwJS4_vp7S6x;VRS_Starts=284349,284350;VRS_Ends=284351,284366;VRS_States=CA,AAAAAAAAAAAAAAA;VRS_Lengths=2,15;VRS_RepeatSubunitLengths=2,1 GT:PS:DP:ADALL:AD:GQ 0/1:.:422:117,101:81,75:356 chr19 289464 . T TCACGCCTGTAATCC 50 PASS platforms=4;platformnames=Illumina,PacBio,CG,10X;datasets=4;datasetnames=HiSeqPE300x,CCS15kb_20kb,CGnormal,10XChromiumLR;callsets=6;callsetnames=HiSeqPE300xGATK,CCS15kb_20kbGATK4,CGnormal,HiSeqPE300xfreebayes,CCS15kb_20kbDV,10XLRGATK;datasetsmissingcall=IonExome,SolidSE75bp;callable=CS_HiSeqPE300xGATK_callable,CS_10XLRGATK_callable,CS_CCS15kb_20kbGATK4_callable,CS_CGnormal_callable,CS_HiSeqPE300xfreebayes_callable;filt=CS_CCS15kb_20kbDV_filt,CS_CCS15kb_20kbGATK4_filt;VRS_Allele_IDs=ga4gh:VA.nqqTUy-a2gssemOmJb4CJv-HNuFAmGrO,ga4gh:VA.ySvDptXfHB_9WEfu78v32DzBXJfwGgO7;VRS_Starts=289463,289464;VRS_Ends=289464,289466;VRS_States=T,CACGCCTGTAATCCCA;VRS_Lengths=1,.;VRS_RepeatSubunitLengths=1,. GT:PS:DP:ADALL:AD:GQ 0/1:.:518:94,98:116,137:785 chr19 28946400 . T C 50 PASS platforms=5;platformnames=Illumina,PacBio,CG,10X,Solid;datasets=5;datasetnames=HiSeqPE300x,CCS15kb_20kb,CGnormal,10XChromiumLR,SolidSE75bp;callsets=7;callsetnames=HiSeqPE300xGATK,CCS15kb_20kbDV,CCS15kb_20kbGATK4,CGnormal,HiSeqPE300xfreebayes,10XLRGATK,SolidSE75GATKHC;datasetsmissingcall=IonExome;callable=CS_HiSeqPE300xGATK_callable,CS_CCS15kb_20kbDV_callable,CS_10XLRGATK_callable,CS_CCS15kb_20kbGATK4_callable,CS_CGnormal_callable,CS_HiSeqPE300xfreebayes_callable;filt=CS_CCS15kb_20kbDV_filt,CS_CCS15kb_20kbGATK4_filt;VRS_Allele_IDs=ga4gh:VA.yPr2pVvJeWHDHarhzAvOCb5Cn9UMF6a5,ga4gh:VA.uV5O4M9zpiwk6sftOd-EDvtw_pkSAvdf;VRS_Starts=28946399,28946399;VRS_Ends=28946400,28946400;VRS_States=T,C;VRS_Lengths=1,.;VRS_RepeatSubunitLengths=1,. GT:PS:DP:ADALL:AD:GQ 1/1:.:874:0,275:115,378:502 -chr19 490414 . ACT A 50 PASS platforms=5;platformnames=Illumina,PacBio,CG,10X,Solid;datasets=5;datasetnames=HiSeqPE300x,CCS15kb_20kb,CGnormal,10XChromiumLR,SolidSE75bp;callsets=7;callsetnames=HiSeqPE300xGATK,CCS15kb_20kbDV,CCS15kb_20kbGATK4,CGnormal,HiSeqPE300xfreebayes,10XLRGATK,SolidSE75GATKHC;datasetsmissingcall=IonExome;callable=CS_HiSeqPE300xGATK_callable,CS_CCS15kb_20kbDV_callable,CS_CCS15kb_20kbGATK4_callable,CS_CGnormal_callable,CS_HiSeqPE300xfreebayes_callable;filt=CS_10XLRGATK_filt;VRS_Allele_IDs=ga4gh:VA.aje4-hx7eihWndAwfhzNq_7CZV3bRMXf,ga4gh:VA.lok7a3lot_cvUyw626otpJi4yxk0X07v;VRS_Starts=490413,490414;VRS_Ends=490416,490416;VRS_States=ACT,;VRS_Lengths=3,0;VRS_RepeatSubunitLengths=3,2 GT:PS:DP:ADALL:AD:GQ 0/1:.:821:163,158:239,220:1004 -chr19 54220024 . G *,A 50 PASS platforms=1;platformnames=PacBio;datasets=1;datasetnames=CCS15kb_20kb;callsets=1;callsetnames=CCS15kb_20kbGATK4;datasetsmissingcall=HiSeqPE300x,CCS15kb_20kb,10XChromiumLR,CGnormal,IonExome,SolidSE75bp;callable=CS_CCS15kb_20kbGATK4_callable;filt=CS_CCS15kb_20kbDV_filt,CS_10XLRGATK_filt,CS_HiSeqPE300xfreebayes_filt;difficultregion=HG001.hg38.300x.bam.bilkentuniv.010920.dups,hg38.segdups_sorted_merged;VRS_Allele_IDs=ga4gh:VA.LlmfhAC3gQlVQUwXWYiYjrn5V_K8vBz1,,ga4gh:VA.I7J3i1B36BACEUINcTwEh7uMv3I-PXT1;VRS_Starts=54220023,.,54220023;VRS_Ends=54220024,.,54220024;VRS_States=G,,A;VRS_Lengths=1,.,.;VRS_RepeatSubunitLengths=1,.,. GT:PS:DP:ADALL:AD:GQ 1/2:.:45:0,20,25:0,20,25:99 +chr19 490414 . ACT A 50 PASS platforms=5;platformnames=Illumina,PacBio,CG,10X,Solid;datasets=5;datasetnames=HiSeqPE300x,CCS15kb_20kb,CGnormal,10XChromiumLR,SolidSE75bp;callsets=7;callsetnames=HiSeqPE300xGATK,CCS15kb_20kbDV,CCS15kb_20kbGATK4,CGnormal,HiSeqPE300xfreebayes,10XLRGATK,SolidSE75GATKHC;datasetsmissingcall=IonExome;callable=CS_HiSeqPE300xGATK_callable,CS_CCS15kb_20kbDV_callable,CS_CCS15kb_20kbGATK4_callable,CS_CGnormal_callable,CS_HiSeqPE300xfreebayes_callable;filt=CS_10XLRGATK_filt;VRS_Allele_IDs=ga4gh:VA.aje4-hx7eihWndAwfhzNq_7CZV3bRMXf,ga4gh:VA.lok7a3lot_cvUyw626otpJi4yxk0X07v;VRS_Starts=490413,490414;VRS_Ends=490416,490416;VRS_States=ACT,.;VRS_Lengths=3,0;VRS_RepeatSubunitLengths=3,2 GT:PS:DP:ADALL:AD:GQ 0/1:.:821:163,158:239,220:1004 +chr19 54220024 . G *,A 50 PASS platforms=1;platformnames=PacBio;datasets=1;datasetnames=CCS15kb_20kb;callsets=1;callsetnames=CCS15kb_20kbGATK4;datasetsmissingcall=HiSeqPE300x,CCS15kb_20kb,10XChromiumLR,CGnormal,IonExome,SolidSE75bp;callable=CS_CCS15kb_20kbGATK4_callable;filt=CS_CCS15kb_20kbDV_filt,CS_10XLRGATK_filt,CS_HiSeqPE300xfreebayes_filt;difficultregion=HG001.hg38.300x.bam.bilkentuniv.010920.dups,hg38.segdups_sorted_merged;VRS_Allele_IDs=ga4gh:VA.LlmfhAC3gQlVQUwXWYiYjrn5V_K8vBz1,.,ga4gh:VA.I7J3i1B36BACEUINcTwEh7uMv3I-PXT1;VRS_Starts=54220023,.,54220023;VRS_Ends=54220024,.,54220024;VRS_States=G,.,A;VRS_Lengths=1,.,.;VRS_RepeatSubunitLengths=1,.,. GT:PS:DP:ADALL:AD:GQ 1/2:.:45:0,20,25:0,20,25:99 chr19 54220999 . A T 50 PASS platforms=1;platformnames=PacBio;datasets=1;datasetnames=CCS15kb_20kb;callsets=1;callsetnames=CCS15kb_20kbGATK4;datasetsmissingcall=HiSeqPE300x,CCS15kb_20kb,10XChromiumLR,CGnormal,IonExome,SolidSE75bp;callable=CS_CCS15kb_20kbGATK4_callable;filt=CS_CCS15kb_20kbDV_filt,CS_10XLRGATK_filt,CS_HiSeqPE300xfreebayes_filt;difficultregion=HG001.hg38.300x.bam.bilkentuniv.010920.dups,hg38.segdups_sorted_merged;VRS_Error=Reference mismatch at GRCh38:chr19 position 54220998-54220999 (input gave 'A' but correct ref is 'T') GT:PS:DP:ADALL:AD:GQ 0/1:.:45:0,20,25:0,20,25:99 -chr19 54221654 . T A,P 50 PASS platforms=1;platformnames=PacBio;datasets=1;datasetnames=CCS15kb_20kb;callsets=1;callsetnames=CCS15kb_20kbGATK4;datasetsmissingcall=HiSeqPE300x,CCS15kb_20kb,10XChromiumLR,CGnormal,IonExome,SolidSE75bp;callable=CS_CCS15kb_20kbGATK4_callable;filt=CS_CCS15kb_20kbDV_filt,CS_10XLRGATK_filt,CS_HiSeqPE300xfreebayes_filt;difficultregion=HG001.hg38.300x.bam.bilkentuniv.010920.dups,hg38.segdups_sorted_merged;VRS_Allele_IDs=ga4gh:VA.kea5G-J1teg0iHMbgUELy-4L9lbJkgoj,ga4gh:VA.Zzlc24htmBV1HZZzWYgPD2_GfMInkrZu,;VRS_Starts=54221653,54221653,.;VRS_Ends=54221654,54221654,.;VRS_States=T,A,;VRS_Lengths=1,.,.;VRS_RepeatSubunitLengths=1,.,. GT:PS:DP:ADALL:AD:GQ 0/1:.:45:0,20,25:0,20,25:99 +chr19 54221654 . T A,P 50 PASS platforms=1;platformnames=PacBio;datasets=1;datasetnames=CCS15kb_20kb;callsets=1;callsetnames=CCS15kb_20kbGATK4;datasetsmissingcall=HiSeqPE300x,CCS15kb_20kb,10XChromiumLR,CGnormal,IonExome,SolidSE75bp;callable=CS_CCS15kb_20kbGATK4_callable;filt=CS_CCS15kb_20kbDV_filt,CS_10XLRGATK_filt,CS_HiSeqPE300xfreebayes_filt;difficultregion=HG001.hg38.300x.bam.bilkentuniv.010920.dups,hg38.segdups_sorted_merged;VRS_Allele_IDs=ga4gh:VA.kea5G-J1teg0iHMbgUELy-4L9lbJkgoj,ga4gh:VA.Zzlc24htmBV1HZZzWYgPD2_GfMInkrZu,.;VRS_Starts=54221653,54221653,.;VRS_Ends=54221654,54221654,.;VRS_States=T,A,.;VRS_Lengths=1,.,.;VRS_RepeatSubunitLengths=1,.,. GT:PS:DP:ADALL:AD:GQ 0/1:.:45:0,20,25:0,20,25:99 diff --git a/tests/extras/data/test_vcf_expected_output_no_vrs_attrs.vcf b/tests/extras/data/test_vcf_expected_output_no_vrs_attrs.vcf index c81bb519..9a50b61f 100644 --- a/tests/extras/data/test_vcf_expected_output_no_vrs_attrs.vcf +++ b/tests/extras/data/test_vcf_expected_output_no_vrs_attrs.vcf @@ -236,6 +236,6 @@ chr19 284350 . CA C 50 PASS platforms=4;platformnames=Illumina,10X,PacBio,CG;dat chr19 289464 . T TCACGCCTGTAATCC 50 PASS platforms=4;platformnames=Illumina,PacBio,CG,10X;datasets=4;datasetnames=HiSeqPE300x,CCS15kb_20kb,CGnormal,10XChromiumLR;callsets=6;callsetnames=HiSeqPE300xGATK,CCS15kb_20kbGATK4,CGnormal,HiSeqPE300xfreebayes,CCS15kb_20kbDV,10XLRGATK;datasetsmissingcall=IonExome,SolidSE75bp;callable=CS_HiSeqPE300xGATK_callable,CS_10XLRGATK_callable,CS_CCS15kb_20kbGATK4_callable,CS_CGnormal_callable,CS_HiSeqPE300xfreebayes_callable;filt=CS_CCS15kb_20kbDV_filt,CS_CCS15kb_20kbGATK4_filt;VRS_Allele_IDs=ga4gh:VA.nqqTUy-a2gssemOmJb4CJv-HNuFAmGrO,ga4gh:VA.ySvDptXfHB_9WEfu78v32DzBXJfwGgO7 GT:PS:DP:ADALL:AD:GQ 0/1:.:518:94,98:116,137:785 chr19 28946400 . T C 50 PASS platforms=5;platformnames=Illumina,PacBio,CG,10X,Solid;datasets=5;datasetnames=HiSeqPE300x,CCS15kb_20kb,CGnormal,10XChromiumLR,SolidSE75bp;callsets=7;callsetnames=HiSeqPE300xGATK,CCS15kb_20kbDV,CCS15kb_20kbGATK4,CGnormal,HiSeqPE300xfreebayes,10XLRGATK,SolidSE75GATKHC;datasetsmissingcall=IonExome;callable=CS_HiSeqPE300xGATK_callable,CS_CCS15kb_20kbDV_callable,CS_10XLRGATK_callable,CS_CCS15kb_20kbGATK4_callable,CS_CGnormal_callable,CS_HiSeqPE300xfreebayes_callable;filt=CS_CCS15kb_20kbDV_filt,CS_CCS15kb_20kbGATK4_filt;VRS_Allele_IDs=ga4gh:VA.yPr2pVvJeWHDHarhzAvOCb5Cn9UMF6a5,ga4gh:VA.uV5O4M9zpiwk6sftOd-EDvtw_pkSAvdf GT:PS:DP:ADALL:AD:GQ 1/1:.:874:0,275:115,378:502 chr19 490414 . ACT A 50 PASS platforms=5;platformnames=Illumina,PacBio,CG,10X,Solid;datasets=5;datasetnames=HiSeqPE300x,CCS15kb_20kb,CGnormal,10XChromiumLR,SolidSE75bp;callsets=7;callsetnames=HiSeqPE300xGATK,CCS15kb_20kbDV,CCS15kb_20kbGATK4,CGnormal,HiSeqPE300xfreebayes,10XLRGATK,SolidSE75GATKHC;datasetsmissingcall=IonExome;callable=CS_HiSeqPE300xGATK_callable,CS_CCS15kb_20kbDV_callable,CS_CCS15kb_20kbGATK4_callable,CS_CGnormal_callable,CS_HiSeqPE300xfreebayes_callable;filt=CS_10XLRGATK_filt;VRS_Allele_IDs=ga4gh:VA.aje4-hx7eihWndAwfhzNq_7CZV3bRMXf,ga4gh:VA.lok7a3lot_cvUyw626otpJi4yxk0X07v GT:PS:DP:ADALL:AD:GQ 0/1:.:821:163,158:239,220:1004 -chr19 54220024 . G *,A 50 PASS platforms=1;platformnames=PacBio;datasets=1;datasetnames=CCS15kb_20kb;callsets=1;callsetnames=CCS15kb_20kbGATK4;datasetsmissingcall=HiSeqPE300x,CCS15kb_20kb,10XChromiumLR,CGnormal,IonExome,SolidSE75bp;callable=CS_CCS15kb_20kbGATK4_callable;filt=CS_CCS15kb_20kbDV_filt,CS_10XLRGATK_filt,CS_HiSeqPE300xfreebayes_filt;difficultregion=HG001.hg38.300x.bam.bilkentuniv.010920.dups,hg38.segdups_sorted_merged;VRS_Allele_IDs=ga4gh:VA.LlmfhAC3gQlVQUwXWYiYjrn5V_K8vBz1,,ga4gh:VA.I7J3i1B36BACEUINcTwEh7uMv3I-PXT1 GT:PS:DP:ADALL:AD:GQ 1/2:.:45:0,20,25:0,20,25:99 +chr19 54220024 . G *,A 50 PASS platforms=1;platformnames=PacBio;datasets=1;datasetnames=CCS15kb_20kb;callsets=1;callsetnames=CCS15kb_20kbGATK4;datasetsmissingcall=HiSeqPE300x,CCS15kb_20kb,10XChromiumLR,CGnormal,IonExome,SolidSE75bp;callable=CS_CCS15kb_20kbGATK4_callable;filt=CS_CCS15kb_20kbDV_filt,CS_10XLRGATK_filt,CS_HiSeqPE300xfreebayes_filt;difficultregion=HG001.hg38.300x.bam.bilkentuniv.010920.dups,hg38.segdups_sorted_merged;VRS_Allele_IDs=ga4gh:VA.LlmfhAC3gQlVQUwXWYiYjrn5V_K8vBz1,.,ga4gh:VA.I7J3i1B36BACEUINcTwEh7uMv3I-PXT1 GT:PS:DP:ADALL:AD:GQ 1/2:.:45:0,20,25:0,20,25:99 chr19 54220999 . A T 50 PASS platforms=1;platformnames=PacBio;datasets=1;datasetnames=CCS15kb_20kb;callsets=1;callsetnames=CCS15kb_20kbGATK4;datasetsmissingcall=HiSeqPE300x,CCS15kb_20kb,10XChromiumLR,CGnormal,IonExome,SolidSE75bp;callable=CS_CCS15kb_20kbGATK4_callable;filt=CS_CCS15kb_20kbDV_filt,CS_10XLRGATK_filt,CS_HiSeqPE300xfreebayes_filt;difficultregion=HG001.hg38.300x.bam.bilkentuniv.010920.dups,hg38.segdups_sorted_merged;VRS_Error=Reference mismatch at GRCh38:chr19 position 54220998-54220999 (input gave 'A' but correct ref is 'T') GT:PS:DP:ADALL:AD:GQ 0/1:.:45:0,20,25:0,20,25:99 -chr19 54221654 . T A,P 50 PASS platforms=1;platformnames=PacBio;datasets=1;datasetnames=CCS15kb_20kb;callsets=1;callsetnames=CCS15kb_20kbGATK4;datasetsmissingcall=HiSeqPE300x,CCS15kb_20kb,10XChromiumLR,CGnormal,IonExome,SolidSE75bp;callable=CS_CCS15kb_20kbGATK4_callable;filt=CS_CCS15kb_20kbDV_filt,CS_10XLRGATK_filt,CS_HiSeqPE300xfreebayes_filt;difficultregion=HG001.hg38.300x.bam.bilkentuniv.010920.dups,hg38.segdups_sorted_merged;VRS_Allele_IDs=ga4gh:VA.kea5G-J1teg0iHMbgUELy-4L9lbJkgoj,ga4gh:VA.Zzlc24htmBV1HZZzWYgPD2_GfMInkrZu, GT:PS:DP:ADALL:AD:GQ 0/1:.:45:0,20,25:0,20,25:99 +chr19 54221654 . T A,P 50 PASS platforms=1;platformnames=PacBio;datasets=1;datasetnames=CCS15kb_20kb;callsets=1;callsetnames=CCS15kb_20kbGATK4;datasetsmissingcall=HiSeqPE300x,CCS15kb_20kb,10XChromiumLR,CGnormal,IonExome,SolidSE75bp;callable=CS_CCS15kb_20kbGATK4_callable;filt=CS_CCS15kb_20kbDV_filt,CS_10XLRGATK_filt,CS_HiSeqPE300xfreebayes_filt;difficultregion=HG001.hg38.300x.bam.bilkentuniv.010920.dups,hg38.segdups_sorted_merged;VRS_Allele_IDs=ga4gh:VA.kea5G-J1teg0iHMbgUELy-4L9lbJkgoj,ga4gh:VA.Zzlc24htmBV1HZZzWYgPD2_GfMInkrZu,. GT:PS:DP:ADALL:AD:GQ 0/1:.:45:0,20,25:0,20,25:99 From 596b07c26ac56fb24171549473b48b8e4c782632 Mon Sep 17 00:00:00 2001 From: knQzx <75641500+knQzx@users.noreply.github.com> Date: Thu, 2 Apr 2026 21:15:16 +0200 Subject: [PATCH 3/5] fix: copies=0 producing wrong VRS object type (#621) copies=0 in _from_hgvs falls through to CopyNumberChange branch because `if copies:` treats 0 as falsy. changed to `if copies is not None:` --- src/ga4gh/vrs/extras/translator.py | 2 +- tests/extras/test_cnv_translator.py | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/ga4gh/vrs/extras/translator.py b/src/ga4gh/vrs/extras/translator.py index 1fe5e350..00da9bb1 100644 --- a/src/ga4gh/vrs/extras/translator.py +++ b/src/ga4gh/vrs/extras/translator.py @@ -573,7 +573,7 @@ def _from_hgvs( ) copies = kwargs.get("copies") - if copies: + if copies is not None: cnv = models.CopyNumberCount(location=location, copies=copies) else: copy_change = kwargs.get("copy_change") diff --git a/tests/extras/test_cnv_translator.py b/tests/extras/test_cnv_translator.py index 723ddfd2..a7a3fa0e 100644 --- a/tests/extras/test_cnv_translator.py +++ b/tests/extras/test_cnv_translator.py @@ -161,3 +161,15 @@ def test_from_hgvs_cn(tlr, hgvsexpr, copies, expected): """Test that _from_hgvs works correctly for copy number count""" cn = tlr._from_hgvs(hgvsexpr, copies=copies) assert cn.model_dump(exclude_none=True) == expected + + +@pytest.mark.vcr +def test_from_hgvs_cn_copies_zero(tlr): + """Test that copies=0 produces CopyNumberCount, not CopyNumberChange. + + copies=0 is a valid input (homozygous deletion), but 0 is falsy in Python + so it was previously treated as missing and fell through to CopyNumberChange. + """ + cn = tlr._from_hgvs("NC_000013.11:g.26440969_26443305del", copies=0) + assert cn.type == "CopyNumberCount" + assert cn.copies == 0 From 27d0ec43b846e06b6cd3ca7db07caa624a3d9a12 Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Sat, 23 May 2026 14:51:21 -0400 Subject: [PATCH 4/5] fix: _from_vrs handle kwargs without error --- src/ga4gh/vrs/extras/translator.py | 2 +- tests/extras/test_allele_translator.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/ga4gh/vrs/extras/translator.py b/src/ga4gh/vrs/extras/translator.py index 00da9bb1..81f38306 100644 --- a/src/ga4gh/vrs/extras/translator.py +++ b/src/ga4gh/vrs/extras/translator.py @@ -154,7 +154,7 @@ def hgvs_tools(self) -> HgvsTools: """Instantiate and return an HgvsTools instance""" return HgvsTools(self.data_proxy) - def _from_vrs(self, var: dict) -> models._VariationBase | None: + def _from_vrs(self, var: dict, **kwargs) -> models._VariationBase | None: # noqa: ARG002 """Convert from dict representation of VRS JSON to VRS object""" if not isinstance(var, Mapping): return None diff --git a/tests/extras/test_allele_translator.py b/tests/extras/test_allele_translator.py index 24c461e8..59b68862 100644 --- a/tests/extras/test_allele_translator.py +++ b/tests/extras/test_allele_translator.py @@ -331,6 +331,11 @@ def test_from_invalid(tlr): ): tlr.translate_from("BRAF amplication") + with pytest.raises( + ValueError, match="Unable to parse data as beacon, gnomad, hgvs, spdi, vrs" + ): + tlr.translate_from("BRAF amplication", assembly_name="GRCh37") + @pytest.mark.vcr def test_from_beacon(tlr): From 372e293d02610b871166a1af6895aa4134c02b8b Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Mon, 8 Jun 2026 14:12:49 -0400 Subject: [PATCH 5/5] fix: handle key error in translate_to (#639) close #638 * Unsupported `fmt` in `translate_to` should raise `NotImplementedError` --- src/ga4gh/vrs/extras/translator.py | 8 +++++++- tests/extras/test_allele_translator.py | 21 ++++----------------- 2 files changed, 11 insertions(+), 18 deletions(-) diff --git a/src/ga4gh/vrs/extras/translator.py b/src/ga4gh/vrs/extras/translator.py index 81f38306..91c45532 100644 --- a/src/ga4gh/vrs/extras/translator.py +++ b/src/ga4gh/vrs/extras/translator.py @@ -142,8 +142,14 @@ def translate_to(self, vo: models._VariationBase, fmt: str, **kwargs) -> list[st kwargs: ref_seq_limit Optional(int): If vo.state is a ReferenceLengthExpression, and `ref_seq_limit` is specified, and `fmt` is `spdi`, the reference sequence is included in the SPDI expression if it is below the limit Otherwise only the length of the reference sequence is included. If the limit is None, the reference sequence is always included. In all cases, the alt sequence is included. Default is 0 (never include reference sequence). + :raise NotImplementedError: If `fmt` is not supported """ - t = self.to_translators[fmt] + try: + t = self.to_translators[fmt] + except KeyError as e: + msg = f"{fmt} is not supported" + raise NotImplementedError(msg) from e + return t(vo, **kwargs) ############################################################################ diff --git a/tests/extras/test_allele_translator.py b/tests/extras/test_allele_translator.py index 59b68862..15f87a89 100644 --- a/tests/extras/test_allele_translator.py +++ b/tests/extras/test_allele_translator.py @@ -978,20 +978,7 @@ def test_normalize_microsatellite_counts(tlr, case): ) -# TODO: Readd these tests -# @pytest.mark.vcr -# def test_errors(tlr): -# with pytest.raises(ValueError): -# tlr._from_beacon("bogus") -# -# with pytest.raises(ValueError): -# tlr._from_gnomad("NM_182763.2:c.688+403C>T") -# -# with pytest.raises(ValueError): -# tlr._from_hgvs("NM_182763.2:c.688+403C>T") -# -# with pytest.raises(ValueError): -# tlr._from_hgvs("NM_182763.2:c.688_690inv") -# -# with pytest.raises(ValueError): -# tlr._from_spdi("NM_182763.2:c.688+403C>T") +@pytest.mark.vcr +def test_translate_to_invalid_fmt(tlr): + with pytest.raises(NotImplementedError, match="gnomad is not supported"): + tlr.translate_to(models.Allele.model_validate(snv_output), fmt="gnomad")