From dff0c487cb1a7a2e3115a2d22859fe574ac8082d Mon Sep 17 00:00:00 2001 From: LEGEAI Fabrice Date: Fri, 19 Apr 2024 11:18:01 +0200 Subject: [PATCH] change a regex to allow gene names with underscore --- ogs_merge/ogs_merge | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ogs_merge/ogs_merge b/ogs_merge/ogs_merge index 96f7002..bb72c43 100644 --- a/ogs_merge/ogs_merge +++ b/ogs_merge/ogs_merge @@ -489,7 +489,7 @@ class OgsMerger(): # FIXME CDS could be more appropriate (or maybe not...) if not li.startswith("#") and cols[2] == 'exon': cols[8] = re.sub(r'ID=([a-zA-Z0-9]+)', r'exID=\1', cols[8]) # remove already set id - cols[8] = re.sub(r'Parent=([a-zA-Z0-9]+)([\.0-9]+)?([-_]R[A-Z]+)?(,[a-zA-Z0-9\.\-_]*)?', r'ID=\1', cols[8]) # generate a fake id based on Parent + remove multiple parents (ie when an exon is part of multiple isoforms) + cols[8] = re.sub(r'Parent=([a-zA-Z0-9_]+)([\.0-9]+)?([-_]R[A-Z]+)?(,[a-zA-Z0-9\.\-_]*)?', r'ID=\1', cols[8]) # generate a fake id based on Parent + remove multiple parents (ie when an exon is part of multiple isoforms) cols[8] = cols[8].rstrip(";") # gff2bed doesn't like trailing ; print('\t'.join(cols), file=base_gff_out) base_gff_out.close()