From 931075e18f815822ba90f8eba7e05f754054f584 Mon Sep 17 00:00:00 2001 From: shilpigoel Date: Thu, 6 Feb 2025 16:36:39 +0000 Subject: [PATCH 1/3] Adding SCG5 to colorectal genes list --- lib/import/colorectal/core/genocolorectal.rb | 6 ++++-- .../helpers/colorectal/providers/rtd/rtd_constants.rb | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/lib/import/colorectal/core/genocolorectal.rb b/lib/import/colorectal/core/genocolorectal.rb index 484e8279..13d71cc0 100644 --- a/lib/import/colorectal/core/genocolorectal.rb +++ b/lib/import/colorectal/core/genocolorectal.rb @@ -41,7 +41,8 @@ class Genocolorectal < Import::Germline::Genotype 'RAD51C' => 3615, 'RAD51D' => 3616, 'VHL' => 83, - 'ATM' => 451 }.freeze + 'ATM' => 451, + 'SCG5' => 5092 }.freeze COLORECTAL_REGEX = /(?APC)| (?BMPR1A)| @@ -69,7 +70,8 @@ class Genocolorectal < Import::Germline::Genotype (?RAD51C)| (?RAD51D)| (?VHL) | - (?ATM)/ix # Added by Francesco + (?ATM) | + (?SCG5)/ix # Added by Francesco # ------------------------ Interogators ------------------------------ diff --git a/lib/import/helpers/colorectal/providers/rtd/rtd_constants.rb b/lib/import/helpers/colorectal/providers/rtd/rtd_constants.rb index eedd9f59..ac084906 100644 --- a/lib/import/helpers/colorectal/providers/rtd/rtd_constants.rb +++ b/lib/import/helpers/colorectal/providers/rtd/rtd_constants.rb @@ -109,7 +109,8 @@ module RtdConstants SMAD4| STK11| GREM1| - NTHL1)/ix + NTHL1| + SCG5)/ix HNPCC = %w[MLH1 MSH2 MSH6 PMS2 EPCAM].freeze HNPCCMLPA = %w[MLH1 MSH2 MSH6 EPCAM].freeze COLORECTALCANCER = %w[APC BMPR1A EPCAM GREM1 MLH1 MSH2 MSH6 MUTYH NTHL1 PMS2 From 948d121f7e303330ea3e35fb384187f121a8383f Mon Sep 17 00:00:00 2001 From: shilpigoel Date: Fri, 7 Feb 2025 18:24:19 +0000 Subject: [PATCH 2/3] Fixed multigene variant --- .../newcastle/newcastle_handler_colorectal.rb | 72 ++++++++++++++++--- .../colorectal/providers/rtd/rtd_constants.rb | 20 ++---- .../newcastle_handler_colorectal_test.rb | 45 ++++++++++++ 3 files changed, 116 insertions(+), 21 deletions(-) diff --git a/lib/import/colorectal/providers/newcastle/newcastle_handler_colorectal.rb b/lib/import/colorectal/providers/newcastle/newcastle_handler_colorectal.rb index d7fc42f8..5c17bfca 100644 --- a/lib/import/colorectal/providers/newcastle/newcastle_handler_colorectal.rb +++ b/lib/import/colorectal/providers/newcastle/newcastle_handler_colorectal.rb @@ -12,6 +12,7 @@ def process_fields(record) # return for brca cases return if record.raw_fields['investigation code'].match(/BRCA/i) + record.raw_fields['genotype']&.gsub!('SGC5', 'SCG5') genocolorectal = Import::Colorectal::Core::Genocolorectal.new(record) genocolorectal.add_passthrough_fields(record.mapped_fields, record.raw_fields, @@ -23,7 +24,7 @@ def process_fields(record) add_test_scope(genocolorectal, record) add_test_type(genocolorectal, record) add_test_status(genocolorectal, record) - res = process_variant_records(genocolorectal, record) # Added by Francesco + res = process_variant_records(genocolorectal, record) res.each { |cur_genotype| @persister.integrate_and_store(cur_genotype) } end @@ -127,8 +128,7 @@ def process_fullscreen_records(genocolorectal, record, genocolorectals) genocolorectal.add_gene_colorectal(gene) if positive_rec?(genocolorectal) add_fs_negative_genes(gene, genocolorectal, genocolorectals, record) - process_variants(genocolorectal, variant) - genocolorectals.append(genocolorectal) + process_variants(genocolorectals, genocolorectal, variant) elsif gene.present? # for other status records add_fs_negative_genes(gene, genocolorectal, genocolorectals, record) genocolorectals.append(genocolorectal) @@ -138,8 +138,10 @@ def process_fullscreen_records(genocolorectal, record, genocolorectals) add_variant_class(genocolorectal, record) end - def add_fs_negative_genes(gene, genocolorectal, genocolorectals, _record) + def add_fs_negative_genes(gene, genocolorectal, genocolorectals, record) negative_genes = @genes_panel - [gene] unless @genes_panel == [gene] + variant_gene = record.raw_fields['genotype']&.scan(COLORECTAL_GENES_REGEX)&.flatten + negative_genes -= variant_gene if variant_gene.present? negative_genes&.each do |neg_gene| genocolo_other = genocolorectal.dup_colo genocolo_other.add_status(1) @@ -164,9 +166,12 @@ def process_targeted_screen(genocolorectal, record, genocolorectals) variant = record.raw_fields['genotype'] gene = get_gene(record) genocolorectal.add_gene_colorectal(gene) - process_variants(genocolorectal, variant) if positive_rec?(genocolorectal) add_variant_class(genocolorectal, record) - genocolorectals.append(genocolorectal) + if positive_rec?(genocolorectal) + process_variants(genocolorectals, genocolorectal, variant) + else + genocolorectals.append(genocolorectal) + end end def get_gene(record) @@ -212,14 +217,65 @@ def non_pathogenic?(record) false end - def process_variants(genocolorectal, variant) + def process_variants(genocolorectals, genocolorectal, variant) + genes = variant&.scan(COLORECTAL_GENES_REGEX)&.flatten + + # For variant like "het dup GREM1 and SGC5" + if genes.present? && genes.all? { |gene| %w[GREM1 SCG5].include?(gene) } && variant.scan(/dup/i).size == 1 + prepare_germ1_scg5_genos(genocolorectals, genocolorectal, variant) + elsif genes.size > 1 + process_multi_genes(genocolorectals, genocolorectal, variant, genes) + else + process_mutations(genocolorectal, variant) + genocolorectals.append(genocolorectal) + end + genocolorectals + end + + def prepare_germ1_scg5_genos(genocolorectals, genocolorectal, variant) + varianttype = variant.scan(VARIANTTYPE_REGEX) + genocolorectal_dup = genocolorectal.dup_colo + genocolorectal_dup.add_gene_colorectal('GREM1') + genocolorectal_dup.attribute_map['variantlocation'] = 5 + genocolorectal_dup.add_variant_type(varianttype.join) + genocolorectals.append(genocolorectal_dup) + genocolorectal_dup = genocolorectal.dup_colo + genocolorectal_dup.add_gene_colorectal('SCG5') + genocolorectal_dup.add_variant_type(varianttype.join) + genocolorectals.append(genocolorectal_dup) + end + + def process_multi_genes(genocolorectals, genocolorectal, variant, genes) + variant_strings = variant.split(genes[-1]) + variant_strings << '' if variant_strings.size == 1 + variant_strings[1].prepend(genes[-1]) + variant_type = variant.scan(VARIANTTYPE_REGEX) + genocolorectal.add_variant_type(variant_type.join) unless genes.size == variant_type.size + process_variant_strings(genocolorectals, genocolorectal, variant_strings) + end + + def process_variant_strings(genocolorectals, genocolorectal, variant_strings) + variant_strings.each do |variant_str| + genocolorectal_dup = genocolorectal.dup_colo + variant_str.scan(COLORECTAL_GENES_REGEX) + genocolorectal_dup.add_gene_colorectal($LAST_MATCH_INFO[:colorectal]) + process_mutations(genocolorectal_dup, variant_str) + varianttype = variant_str.scan(VARIANTTYPE_REGEX) + if genocolorectal_dup.attribute_map['sequencevarianttype'].nil? + genocolorectal_dup.add_variant_type(varianttype.join) + end + genocolorectals.append(genocolorectal_dup) + end + end + + def process_mutations(genocolorectal, variant) process_cdna_variant(genocolorectal, variant) process_exonic_variant(genocolorectal, variant) process_protein_impact(genocolorectal, variant) end def process_exonic_variant(genocolorectal, variant) - return unless variant.scan(EXON_VARIANT_REGEX).size.positive? + return unless variant.scan(EXON_REGEX).size.positive? genocolorectal.add_exon_location($LAST_MATCH_INFO[:exons]) genocolorectal.add_variant_type($LAST_MATCH_INFO[:variant]) diff --git a/lib/import/helpers/colorectal/providers/rtd/rtd_constants.rb b/lib/import/helpers/colorectal/providers/rtd/rtd_constants.rb index ac084906..726ec757 100644 --- a/lib/import/helpers/colorectal/providers/rtd/rtd_constants.rb +++ b/lib/import/helpers/colorectal/providers/rtd/rtd_constants.rb @@ -76,19 +76,12 @@ module RtdConstants ([+>_-][0-9]+[ACGTdelinsup>]+) )\]?/ix - EXON_VARIANT_REGEX = /(?del|dup|ins).+ex(?on)?(?s)?\s - (?[0-9]+(?-[0-9]+)?)| - ex(?on)?(?s)?\s?(?[0-9]+(?-[0-9]+)?)\s? - (?del|dup|ins)| - (?del|dup|ins)\sexon(?s)?\s - (?[0-9]+(?\sto\s[0-9]+))| - ex(on)?(s)?\s?(?[0-9]+\s?(\s?-\s?[0-9]+)?)\s? - (?del|dup|ins)?| - (?del|dup|ins)(?\s)?(?[0-9]+(?-[0-9]+)?)| - ex(?on)?(?s)?\s(?[0-9]+(?\sto\s[0-9]+)?)\s - (?del|dup|ins)| - x(?[0-9]+-?[0-9]+)\s?(?del|dup|ins)| - x(?[0-9]+-?[0-9]?)\s?(?del|dup|ins)/ix + EXON_REGEX = /(?del|dup|ins).+ex(on)?(s)?\s(?[0-9ACGT]+((to|and|-|\s)+[0-9ACGT]+)?)| + ex(on)?(s)?\s?(?[0-9ACGT]+((to|and|-|\s)+[0-9ACGT]+)?)\s?(?del|dup|ins)| + ^(?del|dup|ins)\s?(?[0-9ACGT]+((to|and|-|\s)+[0-9ACGT]+)?)| + x(?[0-9ACGT]+((to|and|-|\s)+[0-9ACGT]+)?)\s?(?del|dup|ins)| + ex(on)?s?\s?(?[0-9ACGT]+((to|and|-|\s)+[0-9ACGT]+)?)\s?(?del|dup|ins)?| + ex(on)?(s)?\s?(?del|dup|ins)\s?(?[0-9ACGT]+((to|and|-|\s)+[0-9ACGT]+)?)?/ix PROTEIN_REGEX = /p\.[\[(]?(?([a-z]+[0-9]+[a-z]+([^[:alnum:]][0-9]+)?)| ([a-z]+[0-9]+[^[:alnum:]]))[)\]]?/ix @@ -115,6 +108,7 @@ module RtdConstants HNPCCMLPA = %w[MLH1 MSH2 MSH6 EPCAM].freeze COLORECTALCANCER = %w[APC BMPR1A EPCAM GREM1 MLH1 MSH2 MSH6 MUTYH NTHL1 PMS2 POLD1 POLE PTEN SMAD4 STK11].freeze + VARIANTTYPE_REGEX = /delins|indel|dup|del|ins|>/ix FAPMAP = %w[APC MUTYH].freeze end end diff --git a/test/lib/import/colorectal/providers/newcastle/newcastle_handler_colorectal_test.rb b/test/lib/import/colorectal/providers/newcastle/newcastle_handler_colorectal_test.rb index 36d39c4d..5276a586 100644 --- a/test/lib/import/colorectal/providers/newcastle/newcastle_handler_colorectal_test.rb +++ b/test/lib/import/colorectal/providers/newcastle/newcastle_handler_colorectal_test.rb @@ -208,6 +208,51 @@ def setup assert_equal 2804, genotypes[4].attribute_map['gene'] end + test 'process multigene variant' do + brca_record = build_raw_record('pseudo_id1' => 'bob') + brca_record.raw_fields['genotype'] = 'EPCAM ex9-MSH2 ex2 del' + brca_record.raw_fields['investigation code'] = 'HNPCC' + @handler.add_test_scope(@genotype, brca_record) + @handler.add_test_status(@genotype, brca_record) + genotypes = @handler.process_variant_records(@genotype, brca_record) + assert_equal 5, genotypes.size + assert_equal 2744, genotypes[0].attribute_map['gene'] + assert_equal 1, genotypes[0].attribute_map['teststatus'] + assert_equal 2808, genotypes[1].attribute_map['gene'] + assert_equal 1, genotypes[1].attribute_map['teststatus'] + assert_equal 3394, genotypes[2].attribute_map['gene'] + assert_equal 1, genotypes[2].attribute_map['teststatus'] + assert_equal 1432, genotypes[3].attribute_map['gene'] + assert_equal 2, genotypes[3].attribute_map['teststatus'] + assert_equal 3, genotypes[3].attribute_map['sequencevarianttype'] + assert_equal '9', genotypes[3].attribute_map['exonintroncodonnumber'] + assert_equal 2804, genotypes[4].attribute_map['gene'] + assert_equal 2, genotypes[4].attribute_map['teststatus'] + assert_equal 3, genotypes[4].attribute_map['sequencevarianttype'] + assert_equal '2', genotypes[4].attribute_map['exonintroncodonnumber'] + end + + test 'process GREM1 dup SCG5 variant' do + brca_record = build_raw_record('pseudo_id1' => 'bob') + brca_record.raw_fields['genotype'] = 'het dup GREM1 and SCG5' + brca_record.raw_fields['moleculartestingtype'] = 'Predictive' + brca_record.raw_fields['investigation code'] = 'HNPCC' + + @handler.add_test_scope(@genotype, brca_record) + @handler.add_test_status(@genotype, brca_record) + genotypes = @handler.process_variant_records(@genotype, brca_record) + + assert_equal 6, genotypes.size + assert_equal 1882, genotypes[4].attribute_map['gene'] + assert_equal 2, genotypes[4].attribute_map['teststatus'] + assert_equal 4, genotypes[4].attribute_map['sequencevarianttype'] + assert_equal 5, genotypes[4].attribute_map['variantlocation'] + assert_equal 5092, genotypes[5].attribute_map['gene'] + assert_equal 2, genotypes[5].attribute_map['teststatus'] + assert_equal 4, genotypes[5].attribute_map['sequencevarianttype'] + assert_nil genotypes[5].attribute_map['variantlocation'] + end + private def clinical_json From 20609a5a3a97977b1a3a88367c2b07df11e49b2f Mon Sep 17 00:00:00 2001 From: shilpigoel Date: Wed, 12 Feb 2025 17:17:12 +0000 Subject: [PATCH 3/3] Fixed typo --- .../providers/newcastle/newcastle_handler_colorectal.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/import/colorectal/providers/newcastle/newcastle_handler_colorectal.rb b/lib/import/colorectal/providers/newcastle/newcastle_handler_colorectal.rb index 5c17bfca..97f199ec 100644 --- a/lib/import/colorectal/providers/newcastle/newcastle_handler_colorectal.rb +++ b/lib/import/colorectal/providers/newcastle/newcastle_handler_colorectal.rb @@ -222,7 +222,7 @@ def process_variants(genocolorectals, genocolorectal, variant) # For variant like "het dup GREM1 and SGC5" if genes.present? && genes.all? { |gene| %w[GREM1 SCG5].include?(gene) } && variant.scan(/dup/i).size == 1 - prepare_germ1_scg5_genos(genocolorectals, genocolorectal, variant) + prepare_grem1_scg5_genos(genocolorectals, genocolorectal, variant) elsif genes.size > 1 process_multi_genes(genocolorectals, genocolorectal, variant, genes) else @@ -232,7 +232,7 @@ def process_variants(genocolorectals, genocolorectal, variant) genocolorectals end - def prepare_germ1_scg5_genos(genocolorectals, genocolorectal, variant) + def prepare_grem1_scg5_genos(genocolorectals, genocolorectal, variant) varianttype = variant.scan(VARIANTTYPE_REGEX) genocolorectal_dup = genocolorectal.dup_colo genocolorectal_dup.add_gene_colorectal('GREM1')