Skip to content

Commit 60977f2

Browse files
committed
Pair lines with symbolic alleles by END tag
While non-symbolic variation is uniquely identified by POS,REF,ALT, symbolic alleles starting at the same position were undistinguishable. This prevented correct matching of records with the same positions and variant type but different length (INFO/END) A test case is added in bcftools in a separate commit (annotate24.*.vcf)
1 parent 7ba9ecd commit 60977f2

File tree

1 file changed

+24
-2
lines changed

1 file changed

+24
-2
lines changed

bcf_sr_sort.c

+24-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
Copyright (C) 2017-2019 Genome Research Ltd.
2+
Copyright (C) 2017-2021 Genome Research Ltd.
33
44
Author: Petr Danecek <[email protected]>
55
@@ -259,6 +259,7 @@ static int cmpstringp(const void *p1, const void *p2)
259259
return strcmp(* (char * const *) p1, * (char * const *) p2);
260260
}
261261

262+
#define DEBUG_VSETS 0
262263
#if DEBUG_VSETS
263264
void debug_vsets(sr_sort_t *srt)
264265
{
@@ -280,6 +281,7 @@ void debug_vsets(sr_sort_t *srt)
280281
}
281282
#endif
282283

284+
#define DEBUG_VBUF 0
283285
#if DEBUG_VBUF
284286
void debug_vbuf(sr_sort_t *srt)
285287
{
@@ -380,13 +382,33 @@ static int bcf_sr_sort_set(bcf_srs_t *readers, sr_sort_t *srt, const char *chr,
380382

381383
if ( srt->str.l ) kputc(';',&srt->str);
382384
srt->off[srt->noff++] = srt->str.l;
383-
size_t beg = srt->str.l;
385+
size_t beg = srt->str.l;
386+
int end_pos = -1;
384387
for (ivar=1; ivar<line->n_allele; ivar++)
385388
{
386389
if ( ivar>1 ) kputc(',',&srt->str);
387390
kputs(line->d.allele[0],&srt->str);
388391
kputc('>',&srt->str);
389392
kputs(line->d.allele[ivar],&srt->str);
393+
394+
// If symbolic allele, check also the END tag in case there are multiple events,
395+
// such as <DEL>s, starting at the same positions
396+
if ( line->d.allele[ivar][0]=='<' )
397+
{
398+
if ( end_pos==-1 )
399+
{
400+
bcf_info_t *end_info = bcf_get_info(reader->header,line,"END");
401+
if ( end_info )
402+
end_pos = (int)end_info->v1.i; // this is only to create a unique id, we don't mind a potential int64 overflow
403+
else
404+
end_pos = 0;
405+
}
406+
if ( end_pos )
407+
{
408+
kputc('/',&srt->str);
409+
kputw(end_pos, &srt->str);
410+
}
411+
}
390412
}
391413
if ( line->n_allele==1 )
392414
{

0 commit comments

Comments
 (0)