Skip to content

Commit c646f80

Browse files
committed
Merge Don't encode negative ints as reserved values (PR #766)
Add NEWS item describing the bug.
2 parents e7379a1 + 52368d6 commit c646f80

5 files changed

Lines changed: 46 additions & 14 deletions

File tree

NEWS

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,16 @@
11
Noteworthy changes in release a.b
22
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
33

4+
* Fixed bug where some 8 or 16-bit negative integers were stored using values
5+
reserved by the BCF specification. These numbers are now promoted to the
6+
next size up, so -120 to -128 are stored using at least 16 bits, and -32760
7+
to -32768 are stored using 32 bits.
8+
9+
Note that while BCF files affected by this bug are technically incorrect,
10+
it is still possible to read them. When converting to VCF format,
11+
HTSlib (and therefore bcftools) will interpret the values as intended
12+
and write out the correct negative numbers. (#766, thanks to John Marshall)
13+
414
Noteworthy changes in release 1.9 (18th July 2018)
515
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
616

htslib/vcf.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -953,8 +953,8 @@ static inline void bcf_enc_size(kstring_t *s, int size, int type)
953953

954954
static inline int bcf_enc_inttype(long x)
955955
{
956-
if (x <= INT8_MAX && x > bcf_int8_missing) return BCF_BT_INT8;
957-
if (x <= INT16_MAX && x > bcf_int16_missing) return BCF_BT_INT16;
956+
if (x <= INT8_MAX && x >= INT8_MIN + 8) return BCF_BT_INT8;
957+
if (x <= INT16_MAX && x >= INT16_MIN + 8) return BCF_BT_INT16;
958958
return BCF_BT_INT32;
959959
}
960960

@@ -966,10 +966,10 @@ static inline void bcf_enc_int1(kstring_t *s, int32_t x)
966966
} else if (x == bcf_int32_missing) {
967967
bcf_enc_size(s, 1, BCF_BT_INT8);
968968
kputc(bcf_int8_missing, s);
969-
} else if (x <= INT8_MAX && x > bcf_int8_missing) {
969+
} else if (x <= INT8_MAX && x >= INT8_MIN + 8) {
970970
bcf_enc_size(s, 1, BCF_BT_INT8);
971971
kputc(x, s);
972-
} else if (x <= INT16_MAX && x > bcf_int16_missing) {
972+
} else if (x <= INT16_MAX && x >= INT16_MIN + 8) {
973973
int16_t z = x;
974974
bcf_enc_size(s, 1, BCF_BT_INT16);
975975
kputsn((char*)&z, 2, s);

test/test-vcf-api.c

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ void write_bcf(char *fname)
6262
bcf_hdr_append(hdr, "##phasing=partial");
6363
bcf_hdr_append(hdr, "##INFO=<ID=NS,Number=1,Type=Integer,Description=\"Number of Samples With Data\">");
6464
bcf_hdr_append(hdr, "##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">");
65+
bcf_hdr_append(hdr, "##INFO=<ID=NEG,Number=.,Type=Integer,Description=\"Test Negative Numbers\">");
6566
bcf_hdr_append(hdr, "##INFO=<ID=AF,Number=A,Type=Float,Description=\"Allele Frequency\">");
6667
bcf_hdr_append(hdr, "##INFO=<ID=AA,Number=1,Type=String,Description=\"Ancestral Allele\">");
6768
bcf_hdr_append(hdr, "##INFO=<ID=DB,Number=0,Type=Flag,Description=\"dbSNP membership, build 129\">");
@@ -82,7 +83,7 @@ void write_bcf(char *fname)
8283

8384

8485
// Add a record
85-
// 20 14370 rs6054257 G A 29 PASS NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,.
86+
// 20 14370 rs6054257 G A 29 PASS NS=3;DP=14;NEG=-127;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,.
8687
// .. CHROM
8788
rec->rid = bcf_hdr_name2id(hdr, "20");
8889
// .. POS
@@ -101,6 +102,8 @@ void write_bcf(char *fname)
101102
bcf_update_info_int32(hdr, rec, "NS", &tmpi, 1);
102103
tmpi = 14;
103104
bcf_update_info_int32(hdr, rec, "DP", &tmpi, 1);
105+
tmpi = -127;
106+
bcf_update_info_int32(hdr, rec, "NEG", &tmpi, 1);
104107
float tmpf = 0.5;
105108
bcf_update_info_float(hdr, rec, "AF", &tmpf, 1);
106109
bcf_update_info_flag(hdr, rec, "DB", NULL, 1);
@@ -133,7 +136,7 @@ void write_bcf(char *fname)
133136
bcf_update_format_string(hdr, rec, "TS", (const char**)tmp_str, 3);
134137
if ( bcf_write1(fp, hdr, rec)!=0 ) error("Failed to write to %s\n", fname);
135138

136-
// 20 1110696 . A G,T 67 . NS=2;DP=10;AF=0.333,.;AA=T;DB GT 2 1 ./.
139+
// 20 1110696 . A G,T 67 . NS=2;DP=10;NEG=-128;AF=0.333,.;AA=T;DB GT 2 1 ./.
137140
bcf_clear1(rec);
138141
rec->rid = bcf_hdr_name2id(hdr, "20");
139142
rec->pos = 1110695;
@@ -143,6 +146,8 @@ void write_bcf(char *fname)
143146
bcf_update_info_int32(hdr, rec, "NS", &tmpi, 1);
144147
tmpi = 10;
145148
bcf_update_info_int32(hdr, rec, "DP", &tmpi, 1);
149+
tmpi = -128;
150+
bcf_update_info_int32(hdr, rec, "NEG", &tmpi, 1);
146151
float *tmpfa = (float*)malloc(2*sizeof(float));
147152
tmpfa[0] = 0.333;
148153
bcf_float_set_missing(tmpfa[1]);
@@ -291,6 +296,7 @@ void test_get_info_values(const char *fname)
291296
while (bcf_read(fp, hdr, line) == 0)
292297
{
293298
float *afs = 0;
299+
int32_t *negs = NULL;
294300
int count = 0;
295301
int ret = bcf_get_info_float(hdr, line, "AF", &afs, &count);
296302

@@ -312,6 +318,21 @@ void test_get_info_values(const char *fname)
312318
}
313319

314320
free(afs);
321+
322+
int32_t expected = (line->pos == 14369)? -127 : -128;
323+
count = 0;
324+
ret = bcf_get_info_int32(hdr, line, "NEG", &negs, &count);
325+
if (ret != 1 || negs[0] != expected)
326+
{
327+
if (ret < 0)
328+
fprintf(stderr, "NEG should be %d, got error ret=%d\n", expected, ret);
329+
else if (ret == 0)
330+
fprintf(stderr, "NEG should be %d, got no entries\n", expected);
331+
else
332+
fprintf(stderr, "NEG should be %d, got %d entries (first is %d)\n", expected, ret, negs[0]);
333+
exit(1);
334+
}
335+
free(negs);
315336
}
316337

317338
bcf_destroy(line);

test/test-vcf-api.out

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
##phasing=partial
99
##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
1010
##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
11+
##INFO=<ID=NEG,Number=.,Type=Integer,Description="Test Negative Numbers">
1112
##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">
1213
##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">
1314
##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129">
@@ -20,9 +21,9 @@
2021
##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
2122
##FORMAT=<ID=TS,Number=1,Type=String,Description="Test String">
2223
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003
23-
20 14370 rs6054257 G A 29 PASS NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ:TS 0|0:48:1:51,51:String1 1|0:48:8:51,51:SomeOtherString2 1/1:43:5:.,.:YetAnotherString3
24-
20 14370 . G A 29 PASS NS=3;DP=14;AF=0.5;DB;H2 GT:DP:HQ:TS 0|0:1:51,51:String1 1|0:8:51,51:SomeOtherString2 1/1:5:.,.:YetAnotherString3
25-
20 14370 . G A 29 PASS NS=3;DP=99;AF=0.5;DB;H2 GT:DP:HQ:TS 0|0:9:51,51:String1 1|0:9:51,51:SomeOtherString2 1/1:9:.,.:YetAnotherString3
26-
20 1110696 . A G,T 67 . NS=2;DP=10;AF=0.333,.;AA=T;DB GT 2 1 ./.
27-
20 1110696 . A G,T 67 . NS=2;DP=10;AF=0.333,.;AA=T;DB GT 2 1 ./.
28-
20 1110696 . G A 67 . NS=2;DP=99;AF=0.333,.;AA=T;DB GT:DP 2:9 1:9 ./.:9
24+
20 14370 rs6054257 G A 29 PASS NS=3;DP=14;NEG=-127;AF=0.5;DB;H2 GT:GQ:DP:HQ:TS 0|0:48:1:51,51:String1 1|0:48:8:51,51:SomeOtherString2 1/1:43:5:.,.:YetAnotherString3
25+
20 14370 . G A 29 PASS NS=3;DP=14;NEG=-127;AF=0.5;DB;H2 GT:DP:HQ:TS 0|0:1:51,51:String1 1|0:8:51,51:SomeOtherString2 1/1:5:.,.:YetAnotherString3
26+
20 14370 . G A 29 PASS NS=3;DP=99;NEG=-127;AF=0.5;DB;H2 GT:DP:HQ:TS 0|0:9:51,51:String1 1|0:9:51,51:SomeOtherString2 1/1:9:.,.:YetAnotherString3
27+
20 1110696 . A G,T 67 . NS=2;DP=10;NEG=-128;AF=0.333,.;AA=T;DB GT 2 1 ./.
28+
20 1110696 . A G,T 67 . NS=2;DP=10;NEG=-128;AF=0.333,.;AA=T;DB GT 2 1 ./.
29+
20 1110696 . G A 67 . NS=2;DP=99;NEG=-128;AF=0.333,.;AA=T;DB GT:DP 2:9 1:9 ./.:9

vcf.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1742,13 +1742,13 @@ void bcf_enc_vint(kstring_t *s, int n, int32_t *a, int wsize)
17421742
if (max < a[i]) max = a[i];
17431743
if (min > a[i]) min = a[i];
17441744
}
1745-
if (max <= INT8_MAX && min > bcf_int8_vector_end) {
1745+
if (max <= INT8_MAX && min >= INT8_MIN + 8) {
17461746
bcf_enc_size(s, wsize, BCF_BT_INT8);
17471747
for (i = 0; i < n; ++i)
17481748
if ( a[i]==bcf_int32_vector_end ) kputc(bcf_int8_vector_end, s);
17491749
else if ( a[i]==bcf_int32_missing ) kputc(bcf_int8_missing, s);
17501750
else kputc(a[i], s);
1751-
} else if (max <= INT16_MAX && min > bcf_int16_vector_end) {
1751+
} else if (max <= INT16_MAX && min >= INT16_MIN + 8) {
17521752
uint8_t *p;
17531753
bcf_enc_size(s, wsize, BCF_BT_INT16);
17541754
ks_resize(s, s->l + n * sizeof(int16_t));

0 commit comments

Comments
 (0)