11/* vcfconvert.c -- convert between VCF/BCF and related formats.
22
3- Copyright (C) 2013-2021 Genome Research Ltd.
3+ Copyright (C) 2013-2023 Genome Research Ltd.
44
55 Author: Petr Danecek <pd3@sanger.ac.uk>
66
@@ -59,7 +59,7 @@ struct _args_t
5959 bcf_hdr_t * header ;
6060 void (* convert_func )(struct _args_t * );
6161 struct {
62- int total , skipped , hom_rr , het_ra , hom_aa , het_aa , missing ;
62+ int total , skipped , hom_rr , het_ra , hom_aa , het_aa , missing ;
6363 } n ;
6464 kstring_t str ;
6565 int32_t * gts ;
@@ -160,7 +160,7 @@ static int _set_chrom_pos_ref_alt(tsv_t *tsv, bcf1_t *rec, void *usr)
160160 // REF,ALT
161161 args -> str .l = 0 ;
162162 se = ++ ss ;
163- while ( se < tsv -> se && * se != '_' ) se ++ ;
163+ while ( se < tsv -> se && * se != '_' ) se ++ ;
164164 if ( * se != '_' ) return -1 ;
165165 kputsn (ss ,se - ss ,& args -> str );
166166 ss = ++ se ;
@@ -202,14 +202,14 @@ static int tsv_setter_chrom_pos_ref_alt_or_id(tsv_t *tsv, bcf1_t *rec, void *usr
202202{
203203 args_t * args = (args_t * )usr ;
204204 if ( _set_chrom_pos_ref_alt (tsv ,rec ,usr )== 0 ) return 0 ;
205- rec -> pos = -1 ; // mark the record as unset
205+ rec -> pos = CSI_COOR_EMPTY ; // mark the record as unset
206206 if ( !args -> output_vcf_ids ) return 0 ;
207207 return tsv_setter_id (tsv ,rec ,usr );
208208}
209209static int tsv_setter_chrom_pos_ref_alt_id_or_die (tsv_t * tsv , bcf1_t * rec , void * usr )
210210{
211211 args_t * args = (args_t * )usr ;
212- if ( rec -> pos != -1 )
212+ if ( rec -> pos != CSI_COOR_EMPTY )
213213 {
214214 if ( !args -> output_vcf_ids ) return 0 ;
215215 return tsv_setter_id (tsv ,rec ,usr );
@@ -269,12 +269,12 @@ static int tsv_setter_gt_gp(tsv_t *tsv, bcf1_t *rec, void *usr)
269269 if ( aa >= ab )
270270 {
271271 if ( aa >= bb ) args -> gts [2 * i + 0 ] = args -> gts [2 * i + 1 ] = bcf_gt_unphased (0 );
272- else args -> gts [2 * i + 0 ] = args -> gts [2 * i + 1 ] = bcf_gt_unphased (1 );
272+ else args -> gts [2 * i + 0 ] = args -> gts [2 * i + 1 ] = bcf_gt_unphased (1 );
273273 }
274- else if ( ab >= bb )
274+ else if ( ab >= bb )
275275 {
276276 args -> gts [2 * i + 0 ] = bcf_gt_unphased (0 );
277- args -> gts [2 * i + 1 ] = bcf_gt_unphased (1 );
277+ args -> gts [2 * i + 1 ] = bcf_gt_unphased (1 );
278278 }
279279 else args -> gts [2 * i + 0 ] = args -> gts [2 * i + 1 ] = bcf_gt_unphased (1 );
280280 }
@@ -293,7 +293,7 @@ static int tsv_setter_haps(tsv_t *tsv, bcf1_t *rec, void *usr)
293293 else { a0 = bcf_gt_phased (0 ); a1 = bcf_gt_phased (1 ); }
294294
295295 // up is short for "unphased"
296- int nup = 0 ;
296+ int nup = 0 ;
297297 for (i = 0 ; i < nsamples ; i ++ )
298298 {
299299 char * ss = tsv -> ss + 4 * i + nup ;
@@ -324,11 +324,11 @@ static int tsv_setter_haps(tsv_t *tsv, bcf1_t *rec, void *usr)
324324 break ;
325325 default :
326326 fprintf (stderr ,"Could not parse: [%c][%s]\n" , ss [all * 2 + up ],tsv -> ss );
327- return -1 ;
327+ return -1 ;
328328 }
329329 if ( ss [all * 2 + up + 1 ]== '*' ) up = up + 1 ;
330330 }
331-
331+
332332 if (up && up != 2 )
333333 {
334334 fprintf (stderr ,"Missing unphased marker '*': [%c][%s]" , ss [2 + up ], tsv -> ss );
@@ -356,13 +356,13 @@ static int tsv_setter_haps(tsv_t *tsv, bcf1_t *rec, void *usr)
356356static void gensample_to_vcf (args_t * args )
357357{
358358 /*
359- * Inpute: IMPUTE2 output (indentation changed here for clarity):
359+ * Inpute: IMPUTE2 output (indentation changed here for clarity):
360360 *
361361 * 20:62116619_C_T 20:62116619 62116619 C T 0.969 0.031 0 ...
362362 * --- 20:62116698_C_A 62116698 C A 1 0 0 ...
363363 *
364364 * Second column is expected in the form of CHROM:POS_REF_ALT. We use second
365- * column because the first can be empty ("--") when filling sites from reference
365+ * column because the first can be empty ("--") when filling sites from reference
366366 * panel. When the option --vcf-ids is given, the first column is used to set the
367367 * VCF ID.
368368 *
@@ -784,7 +784,7 @@ char *init_sample2sex(bcf_hdr_t *hdr, char *sex_fname)
784784 }
785785 for (i = 0 ; i < nlines ; i ++ ) free (lines [i ]);
786786 free (lines );
787- for (i = 0 ; i < bcf_hdr_nsamples (hdr ); i ++ )
787+ for (i = 0 ; i < bcf_hdr_nsamples (hdr ); i ++ )
788788 if ( !sample2sex [i ] ) error ("Missing sex for sample %s in %s\n" , bcf_hdr_int2id (hdr , BCF_DT_SAMPLE , i ),sex_fname );
789789 return sample2sex ;
790790}
@@ -847,7 +847,7 @@ static void vcf_to_gensample(args_t *args)
847847 if (sample_fname ) fprintf (stderr , "Sample file: %s\n" , sample_fname );
848848
849849 // write samples file
850- if (sample_fname )
850+ if (sample_fname )
851851 {
852852 char * sample2sex = NULL ;
853853 if ( args -> sex_fname ) sample2sex = init_sample2sex (args -> header ,args -> sex_fname );
@@ -877,7 +877,7 @@ static void vcf_to_gensample(args_t *args)
877877 return ;
878878 }
879879
880- int prev_rid = -1 , prev_pos = -1 ;
880+ int prev_rid = -1 , prev_pos = CSI_COOR_EMPTY ;
881881 int no_alt = 0 , non_biallelic = 0 , filtered = 0 , ndup = 0 , nok = 0 ;
882882 BGZF * gout = bgzf_open (gen_fname , gen_compressed ? "wg" : "wu" );
883883 while ( bcf_sr_next_line (args -> files ) )
@@ -915,7 +915,7 @@ static void vcf_to_gensample(args_t *args)
915915 nok ++ ;
916916 }
917917 }
918- fprintf (stderr , "%d records written, %d skipped: %d/%d/%d/%d no-ALT/non-biallelic/filtered/duplicated\n" ,
918+ fprintf (stderr , "%d records written, %d skipped: %d/%d/%d/%d no-ALT/non-biallelic/filtered/duplicated\n" ,
919919 nok , no_alt + non_biallelic + filtered + ndup , no_alt , non_biallelic , filtered , ndup );
920920
921921 if ( str .m ) free (str .s );
@@ -976,7 +976,7 @@ static void vcf_to_haplegendsample(args_t *args)
976976 {
977977 char * sample2sex = NULL ;
978978 if ( args -> sex_fname ) sample2sex = init_sample2sex (args -> header ,args -> sex_fname );
979-
979+
980980 int i ;
981981 BGZF * sout = bgzf_open (sample_fname , sample_compressed ? "wg" : "wu" );
982982 str .l = 0 ;
@@ -1078,7 +1078,7 @@ static void vcf_to_hapsample(args_t *args)
10781078 kputs ("%CHROM:%POS\\_%REF\\_%FIRST_ALT %ID %POS %REF %FIRST_ALT " , & str );
10791079 else
10801080 kputs ("%CHROM %CHROM:%POS\\_%REF\\_%FIRST_ALT %POS %REF %FIRST_ALT " , & str );
1081-
1081+
10821082 if ( args -> hap2dip )
10831083 kputs ("%_GT_TO_HAP2\n" , & str );
10841084 else
@@ -1229,7 +1229,7 @@ static inline int tsv_setter_aa1(args_t *args, char *ss, char *se, int alleles[]
12291229 if ( alleles [a0 ]< 0 ) alleles [a0 ] = (* nals )++ ;
12301230 if ( alleles [a1 ]< 0 ) alleles [a1 ] = (* nals )++ ;
12311231
1232- gts [0 ] = bcf_gt_unphased (alleles [a0 ]);
1232+ gts [0 ] = bcf_gt_unphased (alleles [a0 ]);
12331233 gts [1 ] = ss [1 ] ? bcf_gt_unphased (alleles [a1 ]) : bcf_int32_vector_end ;
12341234
12351235 if ( ref == a0 && ref == a1 ) args -> n .hom_rr ++ ; // hom ref: RR
@@ -1265,7 +1265,7 @@ static int tsv_setter_aa(tsv_t *tsv, bcf1_t *rec, void *usr)
12651265 }
12661266 ret = tsv_setter_aa1 (args , tsv -> ss , tsv -> se , alleles , & nals , iref , args -> gts + i * 2 );
12671267 if ( ret == -1 ) error ("Error parsing the site %s:%" PRId64 ", expected two characters\n" , bcf_hdr_id2name (args -> header ,rec -> rid ),(int64_t ) rec -> pos + 1 );
1268- if ( ret == -2 )
1268+ if ( ret == -2 )
12691269 {
12701270 // something else than a SNP
12711271 free (ref );
@@ -1275,7 +1275,7 @@ static int tsv_setter_aa(tsv_t *tsv, bcf1_t *rec, void *usr)
12751275
12761276 args -> str .l = 0 ;
12771277 kputc (ref [0 ], & args -> str );
1278- for (i = 0 ; i < 5 ; i ++ )
1278+ for (i = 0 ; i < 5 ; i ++ )
12791279 {
12801280 if ( alleles [i ]> 0 )
12811281 {
@@ -1419,7 +1419,7 @@ static void gvcf_to_vcf(args_t *args)
14191419 {
14201420 int pass = filter_test (args -> filter , line , NULL );
14211421 if ( args -> filter_logic & FLT_EXCLUDE ) pass = pass ? 0 : 1 ;
1422- if ( !pass )
1422+ if ( !pass )
14231423 {
14241424 if ( bcf_write (out_fh ,hdr ,line )!= 0 ) error ("[%s] Error: cannot write to %s\n" , __func__ ,args -> outfname );
14251425 continue ;
@@ -1667,7 +1667,7 @@ int main_vcfconvert(int argc, char *argv[])
16671667 else args -> infname = argv [optind ];
16681668 }
16691669 if ( !args -> infname ) usage ();
1670-
1670+
16711671 if ( args -> convert_func ) args -> convert_func (args );
16721672 else vcf_to_vcf (args );
16731673
0 commit comments