From 7a515bb2995be3e8af242c87a6f97b7463bc5b18 Mon Sep 17 00:00:00 2001 From: shilpigoel Date: Wed, 5 Feb 2025 10:45:41 +0000 Subject: [PATCH 1/2] Fix for regex unable to capture some exonic variants --- .../brca/providers/newcastle/newcastle_handler.rb | 9 --------- .../helpers/brca/providers/rtd/rtd_constants.rb | 12 ++++++------ .../providers/newcastle/newcastle_handler_test.rb | 10 ++++++++++ 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/lib/import/brca/providers/newcastle/newcastle_handler.rb b/lib/import/brca/providers/newcastle/newcastle_handler.rb index 6fad8134..37bc4e94 100644 --- a/lib/import/brca/providers/newcastle/newcastle_handler.rb +++ b/lib/import/brca/providers/newcastle/newcastle_handler.rb @@ -1,5 +1,4 @@ require 'possibly' -# require 'import/brca/providers/newcastle/newcastle_storage_manager' module Import module Brca @@ -208,14 +207,6 @@ def no_scope?(genotype) genotype.attribute_map['genetictestscope'].scan(/Unable/i).size.positive? end - def positive_cdna?(variant) - variant.scan(CDNA_REGEX).size.positive? - end - - def positive_exonvariant?(variant) - variant.scan(EXON_VARIANT_REGEX).size.positive? - end - def pathogenic?(record) varpathclass = record.raw_fields['variantpathclass']&.downcase if (NON_PATHEGENIC_CODES.exclude? varpathclass) \ diff --git a/lib/import/helpers/brca/providers/rtd/rtd_constants.rb b/lib/import/helpers/brca/providers/rtd/rtd_constants.rb index 022bedf3..cad97ce3 100644 --- a/lib/import/helpers/brca/providers/rtd/rtd_constants.rb +++ b/lib/import/helpers/brca/providers/rtd/rtd_constants.rb @@ -77,12 +77,12 @@ module RtdConstants [0-9]+[+>_-][0-9]+[+>_-][0-9]+[ACGTdelinsup]+| [0-9]+.[0-9]+[a-z]+>[a-z]+)\s?/ix - EXON_VARIANT_REGEX = /(?del|dup|ins).+ex(on)?s?\s? - (?[0-9]+((to|and|-|\s)+[0-9]+)?)| - ex(on)?s?\s?(?[0-9]+((to|and|-|\s)+[0-9]+)?)\s? - (?del|dup|ins)?| - x(?[0-9+-? ]+)+(?del|dup|ins)| - ^(?del|dup|ins)\s?(?[0-9]+((to|and|-|\s)+[0-9]+)?) + EXON_VARIANT_REGEX = /(?del|dup|ins).+ex(on)?s?\s?(?[0-9ACGT]+((to|and|-|\s)+[0-9ACGT]+)?)| + (?ex(on)?s?\s?[0-9ACGT]+(to|and|-|\s)?ex(on)?s?[0-9ACGT]+)(?del|dup|ins)| + ex(on)?s?\s?(?[0-9ACGT]+((to|and|-|\s)+[0-9ACGT]+)?)\s?(?del|dup|ins)?| + (?x(on)?s?\s?[0-9ACGT]+(to|and|-|\s)?x(on)?s?[0-9ACGT]+)(?del|dup|ins)| + x(?[0-9+-? ACGT]+)+(?del|dup|ins)| + ^(?del|dup|ins)\s?(?[0-9]+((to|and|-|\s)+[0-9ACGT]+)?) /ix # rubocop:enable Lint/MixedRegexpCaptureTypes end diff --git a/test/lib/import/brca/providers/newcastle/newcastle_handler_test.rb b/test/lib/import/brca/providers/newcastle/newcastle_handler_test.rb index 78781dc6..177d7db7 100644 --- a/test/lib/import/brca/providers/newcastle/newcastle_handler_test.rb +++ b/test/lib/import/brca/providers/newcastle/newcastle_handler_test.rb @@ -501,6 +501,16 @@ def setup assert_equal '2-24', genotypes[0].attribute_map['exonintroncodonnumber'] assert_equal 1, genotypes[0].attribute_map['variantlocation'] assert_equal 7, genotypes[0].attribute_map['gene'] + exonic_var_rec.raw_fields['genotype'] = 'exon13A-13C dup' + genotypes = @handler.process_variant_records(@genotype, exonic_var_rec) + assert_equal '13a-13c', genotypes[0].attribute_map['exonintroncodonnumber'] + exonic_var_rec.raw_fields['genotype'] = 'x14-x20del' + genotypes = @handler.process_variant_records(@genotype, exonic_var_rec) + assert_equal 'x14-x20', genotypes[0].attribute_map['exonintroncodonnumber'] + exonic_var_rec.raw_fields['genotype'] = 'ex14-ex20del' + genotypes = @handler.process_variant_records(@genotype, exonic_var_rec) + assert_equal 'ex14-ex20', genotypes[0].attribute_map['exonintroncodonnumber'] + assert_equal 3, genotypes[0].attribute_map['sequencevarianttype'] end private From 6f01e8c2a697f59ccfacd6555eb01d5fdf882635 Mon Sep 17 00:00:00 2001 From: shilpigoel Date: Wed, 5 Feb 2025 12:39:02 +0000 Subject: [PATCH 2/2] PR review comment to capture even if there is space or no space. --- lib/import/helpers/brca/providers/rtd/rtd_constants.rb | 4 ++-- .../import/brca/providers/newcastle/newcastle_handler_test.rb | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/import/helpers/brca/providers/rtd/rtd_constants.rb b/lib/import/helpers/brca/providers/rtd/rtd_constants.rb index cad97ce3..f79cc66e 100644 --- a/lib/import/helpers/brca/providers/rtd/rtd_constants.rb +++ b/lib/import/helpers/brca/providers/rtd/rtd_constants.rb @@ -78,9 +78,9 @@ module RtdConstants [0-9]+.[0-9]+[a-z]+>[a-z]+)\s?/ix EXON_VARIANT_REGEX = /(?del|dup|ins).+ex(on)?s?\s?(?[0-9ACGT]+((to|and|-|\s)+[0-9ACGT]+)?)| - (?ex(on)?s?\s?[0-9ACGT]+(to|and|-|\s)?ex(on)?s?[0-9ACGT]+)(?del|dup|ins)| + (?ex(on)?s?\s?[0-9ACGT]+(to|and|-|\s)?ex(on)?s?[0-9ACGT]+)\s*(?del|dup|ins)| ex(on)?s?\s?(?[0-9ACGT]+((to|and|-|\s)+[0-9ACGT]+)?)\s?(?del|dup|ins)?| - (?x(on)?s?\s?[0-9ACGT]+(to|and|-|\s)?x(on)?s?[0-9ACGT]+)(?del|dup|ins)| + (?x(on)?s?\s?[0-9ACGT]+(to|and|-|\s)?x(on)?s?[0-9ACGT]+)\s*(?del|dup|ins)| x(?[0-9+-? ACGT]+)+(?del|dup|ins)| ^(?del|dup|ins)\s?(?[0-9]+((to|and|-|\s)+[0-9ACGT]+)?) /ix diff --git a/test/lib/import/brca/providers/newcastle/newcastle_handler_test.rb b/test/lib/import/brca/providers/newcastle/newcastle_handler_test.rb index 177d7db7..f3e023db 100644 --- a/test/lib/import/brca/providers/newcastle/newcastle_handler_test.rb +++ b/test/lib/import/brca/providers/newcastle/newcastle_handler_test.rb @@ -511,6 +511,10 @@ def setup genotypes = @handler.process_variant_records(@genotype, exonic_var_rec) assert_equal 'ex14-ex20', genotypes[0].attribute_map['exonintroncodonnumber'] assert_equal 3, genotypes[0].attribute_map['sequencevarianttype'] + exonic_var_rec.raw_fields['genotype'] = 'ex14-ex20 dup' + genotypes = @handler.process_variant_records(@genotype, exonic_var_rec) + assert_equal 'ex14-ex20', genotypes[0].attribute_map['exonintroncodonnumber'] + assert_equal 4, genotypes[0].attribute_map['sequencevarianttype'] end private