From ca76f5976bea49b8aafe263480fcefc3aebaa668 Mon Sep 17 00:00:00 2001 From: shilpigoel Date: Fri, 14 Nov 2025 16:34:44 +0000 Subject: [PATCH 01/18] Updates to run two leeds hanlder pre and post 2025 --- .../core/colorectal_handler_mapping.rb | 3 ++- .../leeds/leeds_handler_colorectal.rb | 3 +++ .../leeds/leeds_handler_new_colorectal.rb | 11 ++++++++ .../bash/Import_all_colorectal_interactive.sh | 26 +++++++++++++++++-- .../leeds/leeds_handler_colorectal_test.rb | 17 ++++++++++++ 5 files changed, 57 insertions(+), 3 deletions(-) create mode 100644 lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal.rb diff --git a/lib/import/colorectal/core/colorectal_handler_mapping.rb b/lib/import/colorectal/core/colorectal_handler_mapping.rb index 64a38d45..5c9c05de 100644 --- a/lib/import/colorectal/core/colorectal_handler_mapping.rb +++ b/lib/import/colorectal/core/colorectal_handler_mapping.rb @@ -4,7 +4,8 @@ module Core # Provides the handler appropriate for the dataformat from each center class ColorectalHandlerMapping HANDLER_MAPPING = { - 'RR8' => Import::Colorectal::Providers::Leeds::LeedsHandlerColorectal, + 'RR8_2' => Import::Colorectal::Providers::Leeds::LeedsHandlerColorectal, + 'RR8' => Import::Colorectal::Providers::Leeds::LeedsHandlerNewColorectal, 'RNZ' => Import::Colorectal::Providers::Salisbury::SalisburyHandlerColorectal, 'RTD' => Import::Colorectal::Providers::Newcastle::NewcastleHandlerColorectal, 'RX1' => Import::Colorectal::Providers::Nottingham::NottinghamHandlerColorectal, diff --git a/lib/import/colorectal/providers/leeds/leeds_handler_colorectal.rb b/lib/import/colorectal/providers/leeds/leeds_handler_colorectal.rb index 695c33c0..ae6fd91a 100644 --- a/lib/import/colorectal/providers/leeds/leeds_handler_colorectal.rb +++ b/lib/import/colorectal/providers/leeds/leeds_handler_colorectal.rb @@ -53,6 +53,9 @@ def populate_and_persist_genotype(record) add_varclass add_organisationcode_testresult(genocolorectal) res = process_variants_from_record(genocolorectal, record) + # correcting ebatch provider and registry to RR8 (from RR8_2) to allow data to persist in the database + @batch.provider = 'RR8' + @batch.registryid = 'RR8' res.map { |cur_genotype| @persister.integrate_and_store(cur_genotype) } end diff --git a/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal.rb b/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal.rb new file mode 100644 index 00000000..59259a8c --- /dev/null +++ b/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal.rb @@ -0,0 +1,11 @@ +module Import + module Colorectal + module Providers + module Leeds + # Leeds importer for colorectal for post 2025 files + class LeedsHandlerNewColorectal < Import::Germline::ProviderHandler + end + end + end + end +end \ No newline at end of file diff --git a/lib/import/colorectal/scripts/bash/Import_all_colorectal_interactive.sh b/lib/import/colorectal/scripts/bash/Import_all_colorectal_interactive.sh index 21fb51ab..95e2a205 100755 --- a/lib/import/colorectal/scripts/bash/Import_all_colorectal_interactive.sh +++ b/lib/import/colorectal/scripts/bash/Import_all_colorectal_interactive.sh @@ -33,13 +33,35 @@ done RR8 () { PROV='RR8' IFS=$'\n' -for x in $(find $DIRPATH/$FILEPATH -not -path "*/API_BETA_RETRIEVED/*" -type f -name "*MMR*.pseudo" -o -type f -name "*other*.pseudo" -path "*/$PROV/*") +for x in $(find "$DIRPATH/$FILEPATH" \ + -type f \ + -not -path "*/API_BETA_RETRIEVED/*" \ + -path "*/$PROV/*" \ + \( -name "*MMR*.pseudo" -o -name "*Colorectal*.pseudo" -o -name "*other*.pseudo" \) \ + \( -path "*/202[5-9]/*" -o -path "*/203[0-9]/*" \) \ + ) do IFS="$OIFS" $BRAKE import:colorectal fname="$(echo "$x" | sed -e 's:.*pseudonymised_data/\(.*\):\1:')" prov_code=$PROV done } +RR8_2 () { +PROV='RR8' +PROV_OLD_FILE='RR8_2' +IFS=$'\n' +for x in $(find "$DIRPATH/$FILEPATH" \ + -not -path "*/API_BETA_RETRIEVED/*" \ + -not -path "*/2025/*" \ + -path "*/$PROV/*" \ + \( -name "*MMR*.pseudo" -o -name "*other*.pseudo" \) \ + ) +do +IFS="$OIFS" +$BRAKE import:colorectal fname="$(echo "$x" | sed -e 's:.*pseudonymised_data/\(.*\):\1:')" prov_code=$PROV_OLD_FILE +done +} + RNZ () { PROV='RNZ' IFS=$'\n' @@ -171,5 +193,5 @@ $BRAKE import:colorectal fname="$(echo "$x" | sed -e 's:.*pseudonymised_data/\(. done } -RR8; RNZ; RTD; RX1; RCU; RGT; R0A; R1K; RPY; RP4; RTH; RQ3; REP; RJ7 +RR8; RR8_2; RNZ; RTD; RX1; RCU; RGT; R0A; R1K; RPY; RP4; RTH; RQ3; REP; RJ7 diff --git a/test/lib/import/colorectal/providers/leeds/leeds_handler_colorectal_test.rb b/test/lib/import/colorectal/providers/leeds/leeds_handler_colorectal_test.rb index 6273bd9e..87bb9121 100644 --- a/test/lib/import/colorectal/providers/leeds/leeds_handler_colorectal_test.rb +++ b/test/lib/import/colorectal/providers/leeds/leeds_handler_colorectal_test.rb @@ -10,6 +10,23 @@ def setup @logger = Import::Log.get_logger end + test 'process_fields' do + e_batch = EBatch.create(original_filename: 'test_filea', + e_type: 'PSMOLE', + provider: 'RR8_2', + registryid: 'RR8_2') + handler = Import::Colorectal::Providers::Leeds::LeedsHandlerColorectal.new(e_batch) + Import::Colorectal::Providers::Leeds::LeedsHandlerColorectal.any_instance.stubs(:should_process?).returns(true) + handler.process_fields(@record) + assert_difference('EBatch.count', 1) do + handler.finalize + end + # confirm batch created now has 'RR8' as provider + e_batch.reload + assert_equal 'RR8', e_batch.provider + assert_equal 'RR8', e_batch.registryid + end + test 'add_positive_teststatus' do @handler.populate_variables(@record) assert_equal 2, @handler.allocate_test_status From 2c1baf61bfbfdece4be15f691d69e633972fb15c Mon Sep 17 00:00:00 2001 From: shilpigoel Date: Tue, 25 Nov 2025 15:30:36 +0000 Subject: [PATCH 02/18] Leeds colorectal new handler FS --- .../leeds/leeds_handler_new_format.rb | 1 - .../bash/Import_all_brca_interactive.sh | 2 +- lib/import/colorectal/core/genocolorectal.rb | 23 +- .../leeds/leeds_handler_new_colorectal.rb | 328 +++++++++++++++++- .../bash/Import_all_colorectal_interactive.sh | 2 +- .../colorectal/providers/rr8/constants.rb | 10 +- 6 files changed, 354 insertions(+), 12 deletions(-) diff --git a/lib/import/brca/providers/leeds/leeds_handler_new_format.rb b/lib/import/brca/providers/leeds/leeds_handler_new_format.rb index e09a4b03..18204e95 100644 --- a/lib/import/brca/providers/leeds/leeds_handler_new_format.rb +++ b/lib/import/brca/providers/leeds/leeds_handler_new_format.rb @@ -384,7 +384,6 @@ def classify_variant_pathogenicity(variantpathclass) end def classify_protein_impact - require 'pry' case @value1 when /C1/ 1 diff --git a/lib/import/brca/scripts/bash/Import_all_brca_interactive.sh b/lib/import/brca/scripts/bash/Import_all_brca_interactive.sh index 8eb9c955..a1a56a78 100755 --- a/lib/import/brca/scripts/bash/Import_all_brca_interactive.sh +++ b/lib/import/brca/scripts/bash/Import_all_brca_interactive.sh @@ -54,7 +54,7 @@ MBIS=$1 PROV='RR8' IFS=$'\n' for x in $(find $DIRPATH/$FILEPATH -path "*/$PROV/*" -type f \ -\( -name "*BRCA*.pseudo" -o -type f -name "*Other*.pseudo" \) \ +\( -name "*BRCA*.pseudo" -o -type f -iname "*Other*.pseudo" \) \ \( -path "*/202[5-9]/*" -o -path "*/203[0-9]/*" \) \ ! -name "bede6d1385c0ae9db4fe61fe9b07d58f86e2dc60_24.11.2021 to 31.03.2025_BRCA_DATA__2021_11_24__to__2025_03_31_b.xlsx.pseudo") do diff --git a/lib/import/colorectal/core/genocolorectal.rb b/lib/import/colorectal/core/genocolorectal.rb index 13d71cc0..727add89 100644 --- a/lib/import/colorectal/core/genocolorectal.rb +++ b/lib/import/colorectal/core/genocolorectal.rb @@ -16,11 +16,15 @@ class Genocolorectal < Import::Germline::Genotype #--------------------- Schema code mapping tables -------------------------- COLORECTAL_MAP = { 'APC' => 358, + 'BAP1' => 517, 'BMPR1A' => 577, 'EPCAM' => 1432, - 'TACSTD1' => 1432, + 'FH' => 1590, + 'FLCN' => 1603, + 'MET' => 50, 'MLH1' => 2744, 'MSH2' => 2804, + 'MSH3' => 2805, 'MSH6' => 2808, 'MUTYH' => 2850, 'PMS2' => 3394, @@ -42,14 +46,20 @@ class Genocolorectal < Import::Germline::Genotype 'RAD51D' => 3616, 'VHL' => 83, 'ATM' => 451, - 'SCG5' => 5092 }.freeze + 'SCG5' => 5092, + 'SDHB' => 68 + }.freeze COLORECTAL_REGEX = /(?APC)| + (?BAP1)| (?BMPR1A)| (?EPCAM)| - (?TACSTD1)| + (?FH)| + (?FLCN)| + (?MET)| (?MLH1)| (?MSH2)| + (?MSH3)| (?MSH6)| (?MUTYH)| (?PMS2)| @@ -71,15 +81,16 @@ class Genocolorectal < Import::Germline::Genotype (?RAD51D)| (?VHL) | (?ATM) | - (?SCG5)/ix # Added by Francesco + (?SCG5)| + (?SDHB)/ix # Added by Francesco # ------------------------ Interogators ------------------------------ def add_gene_colorectal(colorectal_input) case colorectal_input when Integer - if [1432, 358, 577, 2744, 2804, 2808, 2850, 3394, 7, 8, 79, 3186, 5019, - 3408, 5000, 62, 72, 76, 1882, 3108, 794, 83, 5019, 451].include? colorectal_input + if [1432, 358, 517, 577, 2744, 2804, 2805, 2808, 2850, 3394, 7, 8, 79, 3186, 5019, 1603, 50, 68, + 3408, 5000, 62, 72, 76, 1882, 3108, 794, 83, 5019, 451, 1590, 5092].include? colorectal_input @attribute_map['gene'] = colorectal_input @logger.debug "SUCCESSFUL gene parse for #{colorectal_input}" diff --git a/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal.rb b/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal.rb index 59259a8c..5ecb0156 100644 --- a/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal.rb +++ b/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal.rb @@ -4,8 +4,334 @@ module Providers module Leeds # Leeds importer for colorectal for post 2025 files class LeedsHandlerNewColorectal < Import::Germline::ProviderHandler + include Import::Helpers::Colorectal::Providers::Rr8::Constants + + def process_fields(record) + # check if should process record from Other Cancer file + return unless should_process?(record) + + genocolorectal = Import::Colorectal::Core::Genocolorectal.new(record) + genocolorectal.add_passthrough_fields(record.mapped_fields, + record.raw_fields, + PASS_THROUGH_FIELDS, + FIELD_NAME_MAPPINGS) + + populate_variables(record) + process_test_scope(genocolorectal) + setup_derived_values(record) + genotypes = [] + if genocolorectal.full_screen? + add_fs_moleculartestingtype(genocolorectal, record) + @genes_panel = get_genes_panel + res = process_fs_rec(genocolorectal, record, genotypes) + elsif genocolorectal.targeted? + add_targ_moleculartestingtype(genocolorectal) + res = [] + # res = process_targ_rec(genocolorectal, record, genotypes) + end + + res.map { |cur_genotype| @persister.integrate_and_store(cur_genotype) } + end + + def should_process?(record) + file_name = @batch.original_filename + return true unless file_name =~ /Other/ix + + fields = record.raw_fields + return false unless fields['moleculartestingtype'] == 'Familial' + + rep_scan = fields['report']&.scan(MMR_GENE_REGEX) + diag_scan = fields['diagnosis_report']&.scan(MMR_GENE_REGEX) + has_genes = rep_scan&.any? || diag_scan&.any? + + return false unless has_genes + return false if fields['diagnosis_report'] =~ /ataxia/i + return false if fields['codingdnasequencechange'] =~ /BRCA/i + + true + end + + def populate_variables(record) + @report = record.raw_fields['report'] + @moltestingtype = record.raw_fields['moleculartestingtype'] + @value1 = record.raw_fields['proteinimpact'] + @value12 = record.raw_fields['zygosity'] + @report_result = record.raw_fields['genotype'] + @value2 = record.raw_fields['gene'] + @result = record.raw_fields['codingdnasequencechange'] + @diag_report = record.raw_fields['diagnosis_report'] + @comment = record.raw_fields['variantpathclass'] + @pos_gene = nil + @variantpathclass = nil + @cdna_mutations = nil + @exonic_mutations = nil + @zygosity = nil + end + + def process_test_scope(genocolorectal) + if @moltestingtype == 'Familial' + genocolorectal.add_test_scope(:targeted_mutation) + else + genocolorectal.add_test_scope(:full_screen) + end + end + + def add_fs_moleculartestingtype(genocolorectal, record) + indication_catgeory = record.raw_fields['indicationcategory'] + return unless %w[R211 R414].include? indication_catgeory + + genocolorectal.add_molecular_testing_type_strict(:diagnostic) + end + + def add_targ_moleculartestingtype(genocolorectal) + if @report_result.match?(/conf/i) || @report_result.match?(/R240/i) + genocolorectal.add_molecular_testing_type_strict(:diagnostic) + elsif @report_result.match?(/pred/i) || @report_result.match?(/R242/i) + genocolorectal.add_molecular_testing_type_strict(:predictive) + end + end + + def get_genes_panel + genes = [] + # Extract genes from diagnosis report + diag_report_match = @diag_report&.match(/Genes\sscreened\sin\sthe[^.]*\./im) + if diag_report_match + diag_report_text = diag_report_match[0] + scanned_genes = diag_report_text.scan(COLORECTAL_GENES_REGEX) + genes.concat(scanned_genes) if scanned_genes + end + + # Extract genes from main report + match = @report&.match(PATIENT_SCREENED_REGEX) + if match + relevant_text = match[1] + scanned_genes = relevant_text.scan(COLORECTAL_GENES_REGEX) + genes.concat(scanned_genes) if scanned_genes + end + + # Extract genes from report results + result_genes = @report_result&.scan(COLORECTAL_GENES_REGEX) + genes.concat(result_genes) if result_genes + + # Flatten and clean up detected genes + detected_genes = genes.flatten.compact.uniq + + # Use default gene panels if no genes were detected from reports + if detected_genes.empty? + case @moltestingtype + when 'R209.1' # Comprehensive colorectal cancer panel + detected_genes = %w[APC BMPR1A EPCAM GREM1 MLH1 MSH2 MSH6 MUTYH NTHL1 PMS2 POLD1 POLE PTEN SMAD4 STK11] + when 'R210.2' # Lynch syndrome focused panel + detected_genes = %w[MLH1 MSH2 MSH6 PMS2] + end + end + + detected_genes + end + + def setup_derived_values(record) + @zygosity = calc_zygosity + @variantpathclass = cal_variantpathclass(record) + @cdna_mutations = @result&.match(CDNA_REGEX) || @value1&.match(CDNA_REGEX) + @exonic_mutations = @result&.match(EXON_VARIANT_REGEX) || @value1&.match(EXON_VARIANT_REGEX) + @protein_impact = @value1&.match(PROTEIN_REGEX) || @result&.match(PROTEIN_REGEX) + @refid = @result&.match(REF_TRANSCRIPT_ID) || @value1&.match(REF_TRANSCRIPT_ID) + end + + def process_fs_rec(genocolorectal, record, genotypes) + # priority based extracting details + return genotypes if fail_rec?(genocolorectal, genotypes) + + process_result_variant_rec(genocolorectal, record, genotypes) + process_protein_impact_variant_rec(genocolorectal, record, genotypes) + return genotypes if gene_variant_rec?(genocolorectal, record, genotypes) + return genotypes if normal_result_rec?(genocolorectal, genotypes) + return genotypes if normal_report_result?(genocolorectal, genotypes) + + first_of_report_variant_rec?(genocolorectal, record, genotypes) + end + + def fail_rec?(genocolorectal, genotypes) + return false unless @report_result =~ /fail/i && @report_result !~ /dosage/i + + process_status_genes(9, @genes_panel, genocolorectal, genotypes) + true + end + + def process_result_variant_rec(genocolorectal, record, genotypes) + return unless @value2.nil? && (@result =~ CDNA_REGEX || @result =~ EXON_REGEX || @result =~ /heterozygo/i) + + gene = @result&.scan(COLORECTAL_GENES_REGEX) + @pos_gene = gene.flatten.uniq + teststatus = case @value1 + when /C1/, /C2/ + 10 + when /C3/ + 2 + else + 2 + end + + process_variant_rec(genocolorectal, teststatus, record, genotypes) if @pos_gene.present? + end + + def process_protein_impact_variant_rec(genocolorectal, record, genotypes) + unless @value2.nil? && (@value1 =~ CDNA_REGEX || @value1 =~ EXON_REGEX || @value1 =~ /\A(?:#{GENES})/i) + return + end + + gene = @value1&.scan(COLORECTAL_GENES_REGEX) + @pos_gene = gene.flatten.uniq + @pos_gene = ['PMS2'] if @value1 =~ /NM_000535.5/ + teststatus = case @value1 + when /C1/, /C2/ + 10 + else + 2 + end + + process_variant_rec(genocolorectal, teststatus, record, genotypes) if @pos_gene.present? + end + + def gene_variant_rec?(genocolorectal, record, genotypes) + return false if @value2.nil? + + scanned_genes = @value2&.scan(COLORECTAL_GENES_REGEX) + @pos_gene = scanned_genes&.flatten&.uniq + @pos_gene -= ['CHEK2'] unless @pos_gene.nil? + return false if @pos_gene.blank? + + process_variant_rec(genocolorectal, 2, record, genotypes) + negative_genes = @genes_panel - @pos_gene + process_status_genes(1, negative_genes, genocolorectal, genotypes) + true + end + + def normal_result_rec?(genocolorectal, genotypes) + return false unless @result =~ /No.*detected/i + + negative_genes = @genes_panel + process_status_genes(1, negative_genes, genocolorectal, genotypes) + true + end + + def normal_report_result?(genocolorectal, genotypes) + return false unless @report_result =~ /normal/i + + process_status_genes(1, @genes_panel, genocolorectal, genotypes) + true + end + + def first_of_report_variant_rec?(genocolorectal, record, genotypes) + return genotypes unless @report =~ /.*heterozygous\s+for.*pathogenic\s*#{COLORECTAL_GENES_REGEX}/ix + + @pos_gene = [$LAST_MATCH_INFO[:colorectal]] + if @pos_gene.present? + process_variant_rec(genocolorectal, 2, record, genotypes) + negative_genes = @genes_panel - @pos_gene + process_status_genes(1, negative_genes, genocolorectal, genotypes) + end + genotypes + end + + def process_variant_rec(genocolorectal, status, _record, genotypes) + genocolorectal_dup = genocolorectal.dup_colo + add_geneticinheritance(genocolorectal_dup) + genocolorectal_dup.add_gene_colorectal(@pos_gene[0]) + genocolorectal_dup.add_zygosity(@zygosity) + process_cdna_variant(genocolorectal_dup, @cdna_mutations) if @cdna_mutations.present? + process_protein_impact(genocolorectal_dup, @protein_impact) if @protein_impact.present? + process_exonic_variant(genocolorectal_dup, @exonic_mutations) if @exonic_mutations.present? + genocolorectal_dup.add_referencetranscriptid(@refid.to_s) if @refid.present? + genocolorectal_dup.add_variant_class(@variantpathclass) + genocolorectal_dup.add_status(status) + genotypes << genocolorectal_dup + end + + def process_status_genes(status, negative_genes, genocolorectal, genotypes) + negative_genes&.each do |gene| + genocolorectal_dup = genocolorectal.dup_colo + genocolorectal_dup.add_gene_colorectal(gene) + genocolorectal_dup.add_status(status) + genotypes << genocolorectal_dup + end + end + + def add_geneticinheritance(genocolorectal) + geneticinheritance = if @value12 =~ /mosaic/i || + @result =~ /VAF/ || @result =~ /dosage ~0\./ + 6 + else + 4 + end + genocolorectal.attribute_map['geneticinheritance'] = geneticinheritance + end + + def calc_zygosity + [@value12, @value1, @result].each do |v| + next unless v + + return 1 if v =~ /het/i + return 2 if v =~ /homo/i + end + end + + def cal_variantpathclass(_record) + varclass = classify_variant_pathogenicity + varclass || classify_protein_impact + end + + def classify_variant_pathogenicity + case @comment + when /Likely\spathogenic/i + 4 + when /Pathogenic/i + 5 + when /Uncertain\ssignificance/i + 3 + end + end + + def classify_protein_impact + case @value1 + when /C1/ + 1 + when /C2/ + 2 + when /\(cold\sC3\)/i + 8 + when /\(hot\sC3\)/i + 9 + when /C3/ + 3 + end + end + + def process_exonic_variant(genotype, mutation) + return if mutation[:exons].blank? + + genotype.add_exon_location(mutation[:exons]) + genotype.add_variant_type(mutation[:variant]) + @logger.debug "SUCCESSFUL exon variant parse for: #{mutation}" + end + + def process_cdna_variant(genotype, mutation) + return if mutation[:cdna].blank? + + genotype.add_gene_location(mutation[:cdna]) + @logger.debug "SUCCESSFUL cdna change parse for: #{mutation}" + end + + def process_protein_impact(genotype, mutation) + if mutation[:impact].present? + genotype.add_protein_impact(mutation[:impact]) + @logger.debug "SUCCESSFUL protein parse for: #{mutation[:impact]}" + else + @logger.debug "FAILED protein parse for: #{mutation}" + end + end end end end end -end \ No newline at end of file +end diff --git a/lib/import/colorectal/scripts/bash/Import_all_colorectal_interactive.sh b/lib/import/colorectal/scripts/bash/Import_all_colorectal_interactive.sh index 95e2a205..3fd32670 100755 --- a/lib/import/colorectal/scripts/bash/Import_all_colorectal_interactive.sh +++ b/lib/import/colorectal/scripts/bash/Import_all_colorectal_interactive.sh @@ -37,7 +37,7 @@ for x in $(find "$DIRPATH/$FILEPATH" \ -type f \ -not -path "*/API_BETA_RETRIEVED/*" \ -path "*/$PROV/*" \ - \( -name "*MMR*.pseudo" -o -name "*Colorectal*.pseudo" -o -name "*other*.pseudo" \) \ + \( -name "*MMR*.pseudo" -o -iname "*Colorectal*.pseudo" -o -iname "*other*.pseudo" \) \ \( -path "*/202[5-9]/*" -o -path "*/203[0-9]/*" \) \ ) do diff --git a/lib/import/helpers/colorectal/providers/rr8/constants.rb b/lib/import/helpers/colorectal/providers/rr8/constants.rb index 5f25a7b0..2becf682 100644 --- a/lib/import/helpers/colorectal/providers/rr8/constants.rb +++ b/lib/import/helpers/colorectal/providers/rr8/constants.rb @@ -43,17 +43,21 @@ module Constants 'instigated_date' => 'requesteddate' }.freeze GENES = 'APC|ATM|BAP1|BMPR1A|BRCA1|BRCA2|CHEK2|EPCAM|FH|FLCN|GREM1|MET| - MLH1|MSH2|MSH6|MUTYH|NTHL1|PALB2|PMS2|POLD1|POLE|PTEN|RAD51C|RAD51D| + MLH1|MSH2|MSH3|MSH6|MUTYH|NTHL1|PALB2|PMS2|POLD1|POLE|PTEN|RAD51C|RAD51D| RNF43|SDHB|SMAD4|STK11|TP53|VHL'.freeze # rubocop:disable Lint/MixedRegexpCaptureTypes MMR_GENE_REGEX = /APC|BMPR1A|EPCAM|GREM1|MLH1|MSH2|MSH6|MUTYH|NTHL1|PMS2|POLD1| - POLE|PTEN|SMAD4|STK11/ix + POLE|PTEN|SMAD4|STK11|RNF43/ix + CDNA_REGEX = /c\.(?[\w+>*\-]+)?/ix PROTEIN_REGEX = /\(?p\.\(?(?\w+)\)?/ix EXON_REGEX = /(?exon(s)?[\s\-\d]+)/ix GENE_FAIL_REGEX = /(?=(?#{GENES})[\w\s]+fail)/ix NOPATH_REGEX = /.No pathogenic variant was identified./i + + PATIENT_SCREENED_REGEX = /((?:This\s+patient(?:'s\s+sample)?\s+has\s+been\s+screened| + this\s+patient\s+is\s+heterozygous\s+for)[^.]*\.)/imx EXON_VARIANT_REGEX = /(?del|dup|ins).+ex(on)?s?\s? (?[0-9]+(-[0-9]+)?)| ex(on)?s?\s?(?[0-9]+(-[0-9]+)?)\s? @@ -190,6 +194,8 @@ module Constants 'developing further MSH2-related cancers', 'developing MSH2-associated cancer' ].freeze + + REF_TRANSCRIPT_ID = /NM_\d{6}\.\d(?=:)/ix end end end From a3dca7c44c09aea5528e902cc57bd4c18837b50a Mon Sep 17 00:00:00 2001 From: shilpigoel Date: Tue, 25 Nov 2025 15:59:25 +0000 Subject: [PATCH 03/18] reverting removal of TACSTD1 as Birmingham uses it --- lib/import/colorectal/core/genocolorectal.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/import/colorectal/core/genocolorectal.rb b/lib/import/colorectal/core/genocolorectal.rb index 727add89..0a9f7f0a 100644 --- a/lib/import/colorectal/core/genocolorectal.rb +++ b/lib/import/colorectal/core/genocolorectal.rb @@ -19,6 +19,7 @@ class Genocolorectal < Import::Germline::Genotype 'BAP1' => 517, 'BMPR1A' => 577, 'EPCAM' => 1432, + 'TACSTD1' => 1432, #Old symbol for EPCAM 'FH' => 1590, 'FLCN' => 1603, 'MET' => 50, @@ -54,6 +55,7 @@ class Genocolorectal < Import::Germline::Genotype (?BAP1)| (?BMPR1A)| (?EPCAM)| + (?TACSTD1)| #Old symbol for EPCAM (?FH)| (?FLCN)| (?MET)| From 3b8f7988c8e88d8f41ce3e4a9386cec433e82760 Mon Sep 17 00:00:00 2001 From: shilpigoel Date: Tue, 2 Dec 2025 18:08:19 +0000 Subject: [PATCH 04/18] Targ rules coded --- .../leeds/leeds_handler_new_colorectal.rb | 114 +++++++++++++++++- 1 file changed, 112 insertions(+), 2 deletions(-) diff --git a/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal.rb b/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal.rb index 5ecb0156..d07b3537 100644 --- a/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal.rb +++ b/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal.rb @@ -20,14 +20,14 @@ def process_fields(record) process_test_scope(genocolorectal) setup_derived_values(record) genotypes = [] + if genocolorectal.full_screen? add_fs_moleculartestingtype(genocolorectal, record) @genes_panel = get_genes_panel res = process_fs_rec(genocolorectal, record, genotypes) elsif genocolorectal.targeted? add_targ_moleculartestingtype(genocolorectal) - res = [] - # res = process_targ_rec(genocolorectal, record, genotypes) + res = process_targ_rec(genocolorectal, record, genotypes) end res.map { |cur_genotype| @persister.integrate_and_store(cur_genotype) } @@ -61,6 +61,7 @@ def populate_variables(record) @result = record.raw_fields['codingdnasequencechange'] @diag_report = record.raw_fields['diagnosis_report'] @comment = record.raw_fields['variantpathclass'] + @test = record.raw_fields['karyotypingmethod'] @pos_gene = nil @variantpathclass = nil @cdna_mutations = nil @@ -257,6 +258,101 @@ def process_status_genes(status, negative_genes, genocolorectal, genotypes) end end + def process_targ_rec(genocolorectal, record, genotypes) + @pos_gene = [] + return genotypes if zygosity_variant_targ_rec?(genocolorectal, record, genotypes) + return genotypes if variant_absent_targ_rec?(genocolorectal, genotypes) + return genotypes if no_result_targ_rec?(genocolorectal, genotypes) + return genotypes if no_biallelic_targ_rec?(genocolorectal, genotypes) + return genotypes if cdna_het_variant_targ_rec?(genocolorectal, record, genotypes) + return genotypes if result_variant_absent_targ_rec?(genocolorectal, genotypes) + + genotypes + end + + def zygosity_variant_targ_rec?(genotype, record, genotypes) + return false unless @value12 =~ /heterozyg|homozyg|mosaic/i + + @pos_gene = @value2&.scan(MMR_GENE_REGEX) + @pos_gene = @pos_gene&.flatten&.uniq || [] + @variantpathclass = cal_variantpathclass_targ + process_variant_rec(genotype, 2, record, genotypes) + true + end + + def variant_absent_targ_rec?(genotype, genotypes) + return false unless @value12 =~ /variant\sabsent|not\sdetected/i + + negative_gene = @value2&.scan(MMR_GENE_REGEX) + negative_gene = negative_gene&.flatten&.uniq + process_status_genes(1, negative_gene, genotype, genotypes) + true + end + + def no_result_targ_rec?(genotype, genotypes) + return false unless @report_result =~ /Fail/i + + targ_gene = nil + [@value2, @test, @diag_report].each do |src| + result = src&.scan(MMR_GENE_REGEX) + if result&.flatten&.uniq&.any? + targ_gene = result&.flatten&.uniq + break + end + end + if targ_gene.size == 1 + process_status_genes(9, targ_gene, genotype, genotypes) + else + genotype_dup = genotype.dup + genotype_dup.add_status(9) + genotypes << genotype_dup + end + true + end + + def no_biallelic_targ_rec?(genotype, genotypes) + return false unless @report =~ /Biallelic.*neg/ix || @result =~ /No\sbiallelic|No\sbi-allelic/ix + + targ_gene = @report&.scan(MMR_GENE_REGEX) + targ_gene = targ_gene&.flatten&.uniq + process_status_genes(4, targ_gene, genotype, genotypes) + true + end + + def cdna_het_variant_targ_rec?(genotype, record, genotypes) + return false unless @result =~ CDNA_REGEX || @result =~ EXON_REGEX || @result =~ /het/ + + @pos_gene = [] + + [@result, @test, @report_result, @report].each do |src| + result = src&.scan(MMR_GENE_REGEX) + if result&.flatten&.uniq&.any? + @pos_gene = result&.flatten&.uniq + break + end + end + @variantpathclass = classify_first_of_report + process_variant_rec(genotype, 2, record, genotypes) + true + end + + def result_variant_absent_targ_rec?(genotype, genotypes) + return false unless @result =~ /(variant|variaint)\sabsent|no.*detected/ix + + negative_gene = [] + + [@result, @report].each do |src| + result = src&.scan(MMR_GENE_REGEX) + if result&.flatten&.uniq&.any? + negative_gene = result&.flatten&.uniq + break + end + end + + process_status_genes(1, negative_gene, genotype, genotypes) + true + end + def add_geneticinheritance(genocolorectal) geneticinheritance = if @value12 =~ /mosaic/i || @result =~ /VAF/ || @result =~ /dosage ~0\./ @@ -281,6 +377,11 @@ def cal_variantpathclass(_record) varclass || classify_protein_impact end + def cal_variantpathclass_targ + varclass = classify_variant_pathogenicity + varclass || classify_first_of_report + end + def classify_variant_pathogenicity case @comment when /Likely\spathogenic/i @@ -307,6 +408,15 @@ def classify_protein_impact end end + def classify_first_of_report + case @report + when /Likely\spathogenic/i + 4 + when /Pathogenic/i + 5 + end + end + def process_exonic_variant(genotype, mutation) return if mutation[:exons].blank? From e7eed3945fd8b8d9e654231498c1c923e1a4490b Mon Sep 17 00:00:00 2001 From: shilpigoel Date: Tue, 2 Dec 2025 18:15:05 +0000 Subject: [PATCH 05/18] rubocop fix for safenavigation --- .../leeds/leeds_handler_new_colorectal.rb | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal.rb b/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal.rb index d07b3537..ebe99687 100644 --- a/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal.rb +++ b/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal.rb @@ -295,8 +295,9 @@ def no_result_targ_rec?(genotype, genotypes) targ_gene = nil [@value2, @test, @diag_report].each do |src| result = src&.scan(MMR_GENE_REGEX) - if result&.flatten&.uniq&.any? - targ_gene = result&.flatten&.uniq + flattened_result = result&.flatten&.uniq + if flattened_result&.any? + targ_gene = flattened_result break end end @@ -326,8 +327,9 @@ def cdna_het_variant_targ_rec?(genotype, record, genotypes) [@result, @test, @report_result, @report].each do |src| result = src&.scan(MMR_GENE_REGEX) - if result&.flatten&.uniq&.any? - @pos_gene = result&.flatten&.uniq + flattened_result = result&.flatten&.uniq + if flattened_result&.any? + @pos_gene = flattened_result break end end @@ -343,8 +345,9 @@ def result_variant_absent_targ_rec?(genotype, genotypes) [@result, @report].each do |src| result = src&.scan(MMR_GENE_REGEX) - if result&.flatten&.uniq&.any? - negative_gene = result&.flatten&.uniq + flattened_result = result&.flatten&.uniq + if flattened_result&.any? + negative_gene = flattened_result break end end From 1ea5e04ade9e994cc0bd3cb0bba5a8f8800b54df Mon Sep 17 00:00:00 2001 From: shilpigoel Date: Tue, 2 Dec 2025 18:26:09 +0000 Subject: [PATCH 06/18] genes_panel method rubocopped --- .../leeds/leeds_handler_new_colorectal.rb | 64 ++++++++++--------- 1 file changed, 35 insertions(+), 29 deletions(-) diff --git a/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal.rb b/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal.rb index ebe99687..63527dce 100644 --- a/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal.rb +++ b/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal.rb @@ -23,7 +23,7 @@ def process_fields(record) if genocolorectal.full_screen? add_fs_moleculartestingtype(genocolorectal, record) - @genes_panel = get_genes_panel + @genes_panel = genes_panel res = process_fs_rec(genocolorectal, record, genotypes) elsif genocolorectal.targeted? add_targ_moleculartestingtype(genocolorectal) @@ -92,42 +92,48 @@ def add_targ_moleculartestingtype(genocolorectal) end end - def get_genes_panel + def genes_panel genes = [] - # Extract genes from diagnosis report + genes.concat(extract_genes_from_diagnosis_report) + genes.concat(extract_genes_from_main_report) + genes.concat(extract_genes_from_report_results) + + detected_genes = genes.flatten.compact.uniq + detected_genes.empty? ? default_genes_for_test_type : detected_genes + end + + def extract_genes_from_diagnosis_report diag_report_match = @diag_report&.match(/Genes\sscreened\sin\sthe[^.]*\./im) - if diag_report_match - diag_report_text = diag_report_match[0] - scanned_genes = diag_report_text.scan(COLORECTAL_GENES_REGEX) - genes.concat(scanned_genes) if scanned_genes - end + return [] unless diag_report_match + + diag_report_text = diag_report_match[0] + scanned_genes = diag_report_text.scan(COLORECTAL_GENES_REGEX) + scanned_genes || [] + end - # Extract genes from main report + def extract_genes_from_main_report match = @report&.match(PATIENT_SCREENED_REGEX) - if match - relevant_text = match[1] - scanned_genes = relevant_text.scan(COLORECTAL_GENES_REGEX) - genes.concat(scanned_genes) if scanned_genes - end + return [] unless match - # Extract genes from report results - result_genes = @report_result&.scan(COLORECTAL_GENES_REGEX) - genes.concat(result_genes) if result_genes + relevant_text = match[1] + scanned_genes = relevant_text.scan(COLORECTAL_GENES_REGEX) + scanned_genes || [] + end - # Flatten and clean up detected genes - detected_genes = genes.flatten.compact.uniq + def extract_genes_from_report_results + result_genes = @report_result&.scan(COLORECTAL_GENES_REGEX) + result_genes || [] + end - # Use default gene panels if no genes were detected from reports - if detected_genes.empty? - case @moltestingtype - when 'R209.1' # Comprehensive colorectal cancer panel - detected_genes = %w[APC BMPR1A EPCAM GREM1 MLH1 MSH2 MSH6 MUTYH NTHL1 PMS2 POLD1 POLE PTEN SMAD4 STK11] - when 'R210.2' # Lynch syndrome focused panel - detected_genes = %w[MLH1 MSH2 MSH6 PMS2] - end + def default_genes_for_test_type + case @moltestingtype + when 'R209.1' # Comprehensive colorectal cancer panel + %w[APC BMPR1A EPCAM GREM1 MLH1 MSH2 MSH6 MUTYH NTHL1 PMS2 POLD1 POLE PTEN SMAD4 STK11] + when 'R210.2' # Lynch syndrome focused panel + %w[MLH1 MSH2 MSH6 PMS2] + else + [] end - - detected_genes end def setup_derived_values(record) From bcae0216bbd416be26cc5b7d22fc6bfab50c8a3e Mon Sep 17 00:00:00 2001 From: shilpigoel Date: Wed, 3 Dec 2025 13:39:11 +0000 Subject: [PATCH 07/18] More rubocop fixes --- .../leeds/leeds_handler_new_colorectal.rb | 30 +++++++++++++++---- 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal.rb b/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal.rb index 63527dce..57e62cb4 100644 --- a/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal.rb +++ b/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal.rb @@ -139,9 +139,29 @@ def default_genes_for_test_type def setup_derived_values(record) @zygosity = calc_zygosity @variantpathclass = cal_variantpathclass(record) - @cdna_mutations = @result&.match(CDNA_REGEX) || @value1&.match(CDNA_REGEX) - @exonic_mutations = @result&.match(EXON_VARIANT_REGEX) || @value1&.match(EXON_VARIANT_REGEX) - @protein_impact = @value1&.match(PROTEIN_REGEX) || @result&.match(PROTEIN_REGEX) + setup_mutation_fields + setup_reference_transcript_id + end + + def setup_mutation_fields + @cdna_mutations = extract_cdna_mutations + @exonic_mutations = extract_exonic_mutations + @protein_impact = extract_protein_impact + end + + def extract_cdna_mutations + @result&.match(CDNA_REGEX) || @value1&.match(CDNA_REGEX) + end + + def extract_exonic_mutations + @result&.match(EXON_VARIANT_REGEX) || @value1&.match(EXON_VARIANT_REGEX) + end + + def extract_protein_impact + @value1&.match(PROTEIN_REGEX) || @result&.match(PROTEIN_REGEX) + end + + def setup_reference_transcript_id @refid = @result&.match(REF_TRANSCRIPT_ID) || @value1&.match(REF_TRANSCRIPT_ID) end @@ -173,8 +193,6 @@ def process_result_variant_rec(genocolorectal, record, genotypes) teststatus = case @value1 when /C1/, /C2/ 10 - when /C3/ - 2 else 2 end @@ -307,7 +325,7 @@ def no_result_targ_rec?(genotype, genotypes) break end end - if targ_gene.size == 1 + if targ_gene&.size == 1 process_status_genes(9, targ_gene, genotype, genotypes) else genotype_dup = genotype.dup From e6f9cc64f10fb96e4683a46c538fbc22d0e8e43e Mon Sep 17 00:00:00 2001 From: shilpigoel Date: Fri, 5 Dec 2025 11:25:19 +0000 Subject: [PATCH 08/18] tests added --- .../leeds_handler_new_colorectal_test.rb | 376 ++++++++++++++++++ 1 file changed, 376 insertions(+) create mode 100644 test/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal_test.rb diff --git a/test/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal_test.rb b/test/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal_test.rb new file mode 100644 index 00000000..5fbd5f45 --- /dev/null +++ b/test/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal_test.rb @@ -0,0 +1,376 @@ +require 'test_helper' + +class LeedsHandlerNewColorectalTest < ActiveSupport::TestCase + def setup + @record = build_raw_record('pseudo_id1' => 'bob') + @genotype = Import::Colorectal::Core::Genocolorectal.new(@record) + @importer_stdout, @importer_stderr = capture_io do + @handler = Import::Colorectal::Providers::Leeds::LeedsHandlerNewColorectal.new(EBatch.new) + end + end + + test 'process_failed_test_record' do + failed_record = build_raw_record('pseudo_id1' => 'patient1') + failed_record.raw_fields['genotype'] = 'Fail/Results not required' + failed_record.raw_fields['codingdnasequencechange'] = 'No result' + failed_record.raw_fields['moleculartestingtype'] = 'R210.2' + failed_record.raw_fields['report'] = 'Unfortunately, no results were obtained from this tissue sample.' + + res = @handler.process_fields(failed_record) + assert_equal 4, res.size + res.each do |genotype| + assert_equal 9, genotype.attribute_map['teststatus'] + end + end + + test 'process_normal_result_record' do + res = @handler.process_fields(@record) + assert_equal 1, res.size + assert_equal 1, res[0].attribute_map['teststatus'] # normal + assert_equal 3394, res[0].attribute_map['gene'] # PMS2 + end + + test 'process_result_variant_rec' do + result_record = build_raw_record('pseudo_id1' => 'patient6') + result_record.raw_fields['codingdnasequencechange'] = 'PMS2 exons 1-7 deletion heterozygote' + result_record.raw_fields['report'] = 'MLPA analysis indicates that this patient is heterozygous for a deletion of PMS2 exons 1-7.' + + res = @handler.process_fields(result_record) + assert_equal 1, res.size + + res.each do |genotype| + assert_equal 2, genotype.attribute_map['teststatus'] + assert_equal 3394, genotype.attribute_map['gene'] + assert_equal '1-7', genotype.attribute_map['exonintroncodonnumber'] + assert_equal 1, genotype.attribute_map['variantgenotype'] + assert_equal 3, genotype.attribute_map['sequencevarianttype'] + end + end + + test 'process_protein_impact_variant_rec' do + gene_record = build_raw_record('pseudo_id1' => 'patient5') + gene_record.raw_fields['gene'] = nil + gene_record.raw_fields['codingdnasequencechange'] = 'NTHL1 c.268C>T' + gene_record.raw_fields['proteinimpact'] = 'APC c.2120T>C het [C3]' + gene_record.raw_fields['zygosity'] = nil + gene_record.raw_fields['variantpathclass'] = nil + gene_record.raw_fields['report'] = 'This patient has been screened for variants in the following cancer predisposing genes by sequence analysis:\n\nAPC, BMPR1A, EPCAM*, GREM1*, MLH1, MSH2, MSH6, MUTYH, NTHL1, PMS2, POLD1, POLE, PTEN, RNF43, SMAD4, STK11' + + res = @handler.process_fields(gene_record) + assert_equal 2, res.size + + # APC variant should get c.2120T>C from proteinimpact field + protein_variant_genotype = res.find { |g| g.attribute_map['gene'] == 358 } # APC + assert_equal 'c.2120T>C', protein_variant_genotype.attribute_map['codingdnasequencechange'] + assert_equal 2, protein_variant_genotype.attribute_map['teststatus'] + + # NTHL1 variant should get c.268C>T from codingdnasequencechange field + result_variant_genotype = res.find { |g| g.attribute_map['gene'] == 3108 } # NTHL1 + assert_equal 'c.268C>T', result_variant_genotype.attribute_map['codingdnasequencechange'] + assert_equal 2, result_variant_genotype.attribute_map['teststatus'] + end + + test 'gene_variant_rec' do + gene_record = build_raw_record('pseudo_id1' => 'patient5') + gene_record.raw_fields['gene'] = 'MLH1' + gene_record.raw_fields['codingdnasequencechange'] = 'exon 16-19 deletion' + gene_record.raw_fields['proteinimpact'] = nil + gene_record.raw_fields['zygosity'] = 'Heterozygous' + gene_record.raw_fields['variantpathclass'] = 'Pathogenic' + gene_record.raw_fields['report'] = 'A germline pathogenic MLH1 copy number variant was detected in this patient sample' + gene_record.raw_fields['diagnosis_report'] = '1. Genes screened in the panel: MLH1, MSH2, MSH6, PMS2 (all coding exons and exon-intron boundaries).' + res = @handler.process_fields(gene_record) + assert_equal 4, res.size + + gene_variant_genotype = res.find { |g| g.attribute_map['gene'] == 2744 } # MLH1 + assert_equal '16-19', gene_variant_genotype.attribute_map['exonintroncodonnumber'] + assert_equal 2, gene_variant_genotype.attribute_map['teststatus'] + assert_equal 5, gene_variant_genotype.attribute_map['variantpathclass'] + + res.each do |genotype| + next if genotype.attribute_map['gene'] == 2744 + + assert_equal 1, genotype.attribute_map['teststatus'] + end + end + + test 'normal_result_rec' do + normal_record = build_raw_record('pseudo_id1' => 'patient7') + normal_record.raw_fields['codingdnasequencechange'] = 'No deletions/duplications detected' + normal_record.raw_fields['report'] = 'MLPA analysis indicates that the potential PMS2 copy number variant identified by NGS is absent in this patient.' + normal_record.raw_fields['genotype'] = 'PMS2 - MLPA conf negative' + normal_record.raw_fields['moleculartestingtype'] = nil + + res = @handler.process_fields(normal_record) + assert_equal 1, res.size + + res.each do |genotype| + assert_equal 1, genotype.attribute_map['teststatus'] # normal + end + end + + test 'normal_report_result_rec' do + normal_record = build_raw_record('pseudo_id1' => 'patient7') + normal_record.raw_fields['report'] = 'This patient has been screened for MLH1, MSH2, MSH6 and PMS2 variants by sequence analysis. No pathogenic variant was identified.' + normal_record.raw_fields['genotype'] = 'Lynch Diag; normal' + normal_record.raw_fields['proteinimpact'] = nil + normal_record.raw_fields['gene'] = nil + normal_record.raw_fields['codingdnasequencechange'] = 'No result' + + res = @handler.process_fields(normal_record) + assert_equal 4, res.size + + res.each do |genotype| + assert_equal 1, genotype.attribute_map['teststatus'] # normal + end + end + + test 'first_of_report_variant_rec' do + first_record = build_raw_record('pseudo_id1' => 'patient8') + first_record.raw_fields['codingdnasequencechange'] = 'No result' + first_record.raw_fields['proteinimpact'] = nil + first_record.raw_fields['gene'] = nil + first_record.raw_fields['report'] = 'This patient has been screened for variants in the following cancer predisposing genes by sequence analysis:' \ + 'APC, BMPR1A, EPCAM*, GREM1*, MLH1, MSH2, MSH6, MUTYH, NTHL1, PMS2, POLD1, POLE, PTEN, RNF43, SMAD4, STK11.This patient is heterozygous for the ' \ + 'pathogenic NTHL1 variants c.268C>T p.(Gln90Ter)' + first_record.raw_fields['moleculartestingtype'] = 'R211' + + res = @handler.process_fields(first_record) + assert_equal 16, res.size + variant_genotype = res.find { |g| g.attribute_map['gene'] == 3108 } # NTHL1 + assert_not_nil variant_genotype + assert_equal 'c.268C>T', variant_genotype.attribute_map['codingdnasequencechange'] + assert_equal 'p.Gln90Ter', variant_genotype.attribute_map['proteinimpact'] + assert_equal 2, variant_genotype.attribute_map['teststatus'] + + res.each do |genotype| + next if genotype.attribute_map['gene'] == 3108 + + assert_equal 1, genotype.attribute_map['teststatus'] + assert_equal 'Full screen Colorectal Lynch or MMR', genotype.attribute_map['genetictestscope'] + end + end + + # targeted tests + test 'zygosity_variant_targ_rec' do + targeted_record = build_raw_record('pseudo_id1' => 'patient10') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['zygosity'] = 'Heterozygous' + targeted_record.raw_fields['gene'] = 'MSH2' + targeted_record.raw_fields['variantpathclass'] = 'Pathogenic' + targeted_record.raw_fields['codingdnasequencechange'] = 'NM_000251.2:exon 11-16 deletion' + targeted_record.raw_fields['genotype'] = 'R242_pos_MLPA' + targeted_record.raw_fields['report'] = 'This individual is heterozygous for the germline familial pathogenic MSH2 copy number variant' + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + genotype = res[0] + assert_equal 2, genotype.attribute_map['teststatus'] + assert_equal 2804, genotype.attribute_map['gene'] # APC + assert_equal '11-16', genotype.attribute_map['exonintroncodonnumber'] + assert_equal 'NM_000251.2', genotype.attribute_map['referencetranscriptid'] + assert_equal 'Targeted Colorectal Lynch or MMR', genotype.attribute_map['genetictestscope'] + assert_equal 5, genotype.attribute_map['variantpathclass'] + assert_equal 4, genotype.attribute_map['geneticinheritance'] + end + + test 'process_targeted_mosaic_variant' do + targeted_record = build_raw_record('pseudo_id1' => 'patient11') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['zygosity'] = 'Mosaic' + targeted_record.raw_fields['gene'] = 'PMS2' + targeted_record.raw_fields['codingdnasequencechange'] = 'NM_000535.5:Whole gene deletion' + targeted_record.raw_fields['genotype'] = 'R443_Confirmation_NGS_MLPA_PMS2' + targeted_record.raw_fields['report'] = 'This patient shows mosaic pattern for the familial MLH1 variant' + targeted_record.raw_fields['variantpathclass'] = 'Pathogenic' + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + genotype = res[0] + assert_equal 2, genotype.attribute_map['teststatus'] + assert_equal 3394, genotype.attribute_map['gene'] # PMS2 + assert_equal 'NM_000535.5', genotype.attribute_map['referencetranscriptid'] + assert_equal 6, genotype.attribute_map['geneticinheritance'] + assert_equal 5, genotype.attribute_map['variantpathclass'] + end + + test 'process_targeted_variant_absent' do + targeted_record = build_raw_record('pseudo_id1' => 'patient12') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['codingdnasequencechange'] = 'NM_000251.2:Exon 1-7 deletion' + targeted_record.raw_fields['genotype'] = 'R242_neg_MLPA' + targeted_record.raw_fields['report'] = 'Dosage analysis has shown no evidence of the familial pathogenic MSH2 variant.' + targeted_record.raw_fields['proteinimpact'] = nil + targeted_record.raw_fields['zygosity'] = 'Variant NOT detected' + targeted_record.raw_fields['gene'] = 'MSH2' + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + genotype = res[0] + assert_equal 1, genotype.attribute_map['teststatus'] + assert_equal 2804, genotype.attribute_map['gene'] # MSH2 + end + + test 'process_targeted_no_result' do + targeted_record = build_raw_record('pseudo_id1' => 'patient13') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['codingdnasequencechange'] = 'No result' + targeted_record.raw_fields['genotype'] = 'Fail/Results not required' + targeted_record.raw_fields['report'] = 'No results were obtained from this sample despite repeated attempts' + targeted_record.raw_fields['proteinimpact'] = nil + targeted_record.raw_fields['zygosity'] = nil + targeted_record.raw_fields['gene'] = nil + targeted_record.raw_fields['diagnosis_report'] = 'Germline heterozygous pathogenic variants in PTEN are associated with PTEN hamartoma tumour syndrome' + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + genotype = res[0] + assert_equal 9, genotype.attribute_map['teststatus'] + assert_equal 62, genotype.attribute_map['gene'] + end + + test 'process_targeted_no_biallelic' do + targeted_record = build_raw_record('pseudo_id1' => 'patient14') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['codingdnasequencechange'] = 'No biallelic presence of familial variant' + targeted_record.raw_fields['genotype'] = 'PMS2 - Biallelic (CMMRD) pred negative' + targeted_record.raw_fields['report'] = 'Sequence analysis indicates no biallelic presence of the familial pathogenic PMS2 variant c.2404C>T in this patient.' + targeted_record.raw_fields['proteinimpact'] = nil + targeted_record.raw_fields['zygosity'] = nil + targeted_record.raw_fields['gene'] = nil + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + genotype = res[0] + assert_equal 4, genotype.attribute_map['teststatus'] + assert_equal 3394, genotype.attribute_map['gene'] # PMS2 + end + + test 'process_targeted_cdna_het_variant' do + targeted_record = build_raw_record('pseudo_id1' => 'patient15') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['codingdnasequencechange'] = 'MSH2 Exon 11-12 duplication heterozygote' + targeted_record.raw_fields['genotype'] = 'Lynch Pred MLPA +ve' + targeted_record.raw_fields['report'] = 'MLPA analysis indicates that this patient is heterozygous for the familial likely pathogenic MSH2 duplication of exons 11-12' + targeted_record.raw_fields['proteinimpact'] = nil + targeted_record.raw_fields['zygosity'] = nil + targeted_record.raw_fields['gene'] = nil + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + genotype = res[0] + assert_equal 2, genotype.attribute_map['teststatus'] + assert_equal 2804, genotype.attribute_map['gene'] # MSH2 + assert_equal '11-12', genotype.attribute_map['exonintroncodonnumber'] + assert_equal 4, genotype.attribute_map['sequencevarianttype'] + end + + test 'process_result_variant_absent_variant' do + targeted_record = build_raw_record('pseudo_id1' => 'patient15') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['codingdnasequencechange'] = 'familial variant absent' + targeted_record.raw_fields['genotype'] = 'Lynch Pred MLPA -ve' + targeted_record.raw_fields['report'] = 'MLPA analysis indicates that the familial pathogenic MLH1 exon 16-19 deletion is absent in this patient.' + targeted_record.raw_fields['proteinimpact'] = nil + targeted_record.raw_fields['zygosity'] = nil + targeted_record.raw_fields['gene'] = nil + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + genotype = res[0] + assert_equal 1, genotype.attribute_map['teststatus'] + assert_equal 2744, genotype.attribute_map['gene'] # MLH1 + end + + # Tests for should_process? method + test 'should_process_other_cancer_file_familial' do + @handler.instance_variable_set(:@batch, stub(original_filename: 'Other_Cancer_file.txt')) + + record = build_raw_record('pseudo_id1' => 'patient16') + record.raw_fields['moleculartestingtype'] = 'Familial' + record.raw_fields['report'] = 'Testing for MLH1 variants' + + assert @handler.send(:should_process?, record) + end + + test 'should_not_process_other_cancer_file_non_familial' do + @handler.instance_variable_set(:@batch, stub(original_filename: 'Other_Cancer_file.txt')) + + record = build_raw_record('pseudo_id1' => 'patient17') + record.raw_fields['moleculartestingtype'] = 'Predictive' + record.raw_fields['report'] = 'Testing for MLH1 variants' + + refute @handler.send(:should_process?, record) + end + + test 'should_not_process_ataxia_record' do + @handler.instance_variable_set(:@batch, stub(original_filename: 'Other_Cancer_file.txt')) + + record = build_raw_record('pseudo_id1' => 'patient18') + record.raw_fields['moleculartestingtype'] = 'Familial' + record.raw_fields['diagnosis_report'] = 'Testing for ataxia related genes' + record.raw_fields['report'] = 'MLH1 testing' + + refute @handler.send(:should_process?, record) + end + + test 'should_not_process_brca_record' do + @handler.instance_variable_set(:@batch, stub(original_filename: 'Other_Cancer_file.txt')) + + record = build_raw_record('pseudo_id1' => 'patient19') + record.raw_fields['moleculartestingtype'] = 'Familial' + record.raw_fields['codingdnasequencechange'] = 'BRCA1 c.181T>G' + record.raw_fields['report'] = 'MLH1 testing' + + refute @handler.send(:should_process?, record) + end + + private + + def clinical_json + { sex: '2', + consultantcode: 'Consultant Code', + providercode: 'Provider Code', + receiveddate: '2010-08-05T00:00:00.000+01:00', + authoriseddate: '2010-09-17T00:00:00.000+01:00', + servicereportidentifier: 'Service Report Identifier', + sortdate: '2010-08-05T00:00:00.000+01:00', + genetictestscope: 'R210.2', + specimentype: '5', + report: 'MLPA analysis indicates that the potential PMS2 copy number variant identified by NGS is absent in this patient.', + requesteddate: '2010-08-05T00:00:00.000+01:00', + age: 37 }.to_json + end + + def rawtext_clinical_json + { sex: 'M', + providercode: 'Provider Code', + referringclinicianname: 'Clinician Name', + consultantcode: 'Consultant Code', + servicereportidentifier: 'Service Report Identifier', + patienttype: 'NHS', + moleculartestingtype: 'R210.5', + indicationcategory: 'R210', + genotype: 'PMS2 - MLPA conf negative', + report: 'MLPA analysis indicates that the potential PMS2 copy number variant identified by NGS is absent in this patient', + diagnosis_report: 'Heterozygous mutations in PMS2 are linked to Lynch Syndrome with dominant inheritance. ' \ + 'Homozygous/compound heterozygous mutations in PMS2 are linked to mismatch repair cancer syndrome.', + receiveddate: '2010-08-05 00:00:00', + karyotypingmethod: 'MLPA P008', + codingdnasequencechange: 'No deletions/duplications detected', + proteinimpact: nil, + gene: nil, + zygosity: nil, + variantpathclass: nil, + requesteddate: '2010-08-05 00:00:00', + authoriseddate: '2010-09-17 00:00:00', + specimentype: 'Blood' }.to_json + end +end From 0e1aa8d5e3bd106e7668dfad035e8ebdc5f670c1 Mon Sep 17 00:00:00 2001 From: shilpigoel Date: Fri, 5 Dec 2025 11:26:04 +0000 Subject: [PATCH 09/18] fixed cdna regex --- lib/import/helpers/colorectal/providers/rr8/constants.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/import/helpers/colorectal/providers/rr8/constants.rb b/lib/import/helpers/colorectal/providers/rr8/constants.rb index 2becf682..65864321 100644 --- a/lib/import/helpers/colorectal/providers/rr8/constants.rb +++ b/lib/import/helpers/colorectal/providers/rr8/constants.rb @@ -50,7 +50,7 @@ module Constants MMR_GENE_REGEX = /APC|BMPR1A|EPCAM|GREM1|MLH1|MSH2|MSH6|MUTYH|NTHL1|PMS2|POLD1| POLE|PTEN|SMAD4|STK11|RNF43/ix - CDNA_REGEX = /c\.(?[\w+>*\-]+)?/ix + CDNA_REGEX = /c\.(?[\w.+>*\-]+)/ix PROTEIN_REGEX = /\(?p\.\(?(?\w+)\)?/ix EXON_REGEX = /(?exon(s)?[\s\-\d]+)/ix GENE_FAIL_REGEX = /(?=(?#{GENES})[\w\s]+fail)/ix From 42465d423dc0129d38ba87a92128654605551a78 Mon Sep 17 00:00:00 2001 From: shilpigoel Date: Fri, 5 Dec 2025 12:03:44 +0000 Subject: [PATCH 10/18] populating variantgenotype in table --- lib/import/database_wrappers/genetic_sequence_variant.rb | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/import/database_wrappers/genetic_sequence_variant.rb b/lib/import/database_wrappers/genetic_sequence_variant.rb index 635f15f9..546f656c 100644 --- a/lib/import/database_wrappers/genetic_sequence_variant.rb +++ b/lib/import/database_wrappers/genetic_sequence_variant.rb @@ -18,7 +18,6 @@ def initialize(genotype) genomicchange clinvarid cosmicid - variantgenotype variantallelefrequency variantreport raw_record From bb9ca92f0b0a372078dcac822bc33199a525d149 Mon Sep 17 00:00:00 2001 From: shilpigoel Date: Fri, 5 Dec 2025 12:04:53 +0000 Subject: [PATCH 11/18] more refinements --- .../leeds/leeds_handler_new_colorectal.rb | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal.rb b/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal.rb index 57e62cb4..ccff6bbc 100644 --- a/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal.rb +++ b/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal.rb @@ -30,7 +30,7 @@ def process_fields(record) res = process_targ_rec(genocolorectal, record, genotypes) end - res.map { |cur_genotype| @persister.integrate_and_store(cur_genotype) } + res.each { |cur_genotype| @persister.integrate_and_store(cur_genotype) } end def should_process?(record) @@ -196,8 +196,13 @@ def process_result_variant_rec(genocolorectal, record, genotypes) else 2 end + return if @pos_gene.blank? - process_variant_rec(genocolorectal, teststatus, record, genotypes) if @pos_gene.present? + @cdna_mutations = @result&.match(CDNA_REGEX) + @exonic_mutations = @result&.match(EXON_VARIANT_REGEX) + @protein_impact = @result&.match(PROTEIN_REGEX) + + process_variant_rec(genocolorectal, teststatus, record, genotypes) end def process_protein_impact_variant_rec(genocolorectal, record, genotypes) @@ -214,8 +219,12 @@ def process_protein_impact_variant_rec(genocolorectal, record, genotypes) else 2 end + return if @pos_gene.blank? - process_variant_rec(genocolorectal, teststatus, record, genotypes) if @pos_gene.present? + @cdna_mutations = @value1&.match(CDNA_REGEX) + @exonic_mutations = @value1&.match(EXON_VARIANT_REGEX) + @protein_impact = @value1&.match(PROTEIN_REGEX) + process_variant_rec(genocolorectal, teststatus, record, genotypes) end def gene_variant_rec?(genocolorectal, record, genotypes) @@ -252,6 +261,9 @@ def first_of_report_variant_rec?(genocolorectal, record, genotypes) @pos_gene = [$LAST_MATCH_INFO[:colorectal]] if @pos_gene.present? + @cdna_mutations = @report&.match(CDNA_REGEX) + @exonic_mutations = @report&.match(EXON_VARIANT_REGEX) + @protein_impact = @report&.match(PROTEIN_REGEX) process_variant_rec(genocolorectal, 2, record, genotypes) negative_genes = @genes_panel - @pos_gene process_status_genes(1, negative_genes, genocolorectal, genotypes) @@ -397,6 +409,7 @@ def calc_zygosity return 1 if v =~ /het/i return 2 if v =~ /homo/i end + nil end def cal_variantpathclass(_record) From 800cdc6dc7c46730d9a44ea619cc12630d653e57 Mon Sep 17 00:00:00 2001 From: shilpigoel Date: Fri, 5 Dec 2025 12:22:19 +0000 Subject: [PATCH 12/18] rubocop fixes --- .../leeds/leeds_handler_new_colorectal.rb | 31 +++++++++++++------ 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal.rb b/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal.rb index ccff6bbc..33c64626 100644 --- a/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal.rb +++ b/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal.rb @@ -52,6 +52,11 @@ def should_process?(record) end def populate_variables(record) + populate_raw_field_variables(record) + initialize_processing_variables + end + + def populate_raw_field_variables(record) @report = record.raw_fields['report'] @moltestingtype = record.raw_fields['moleculartestingtype'] @value1 = record.raw_fields['proteinimpact'] @@ -62,6 +67,9 @@ def populate_variables(record) @diag_report = record.raw_fields['diagnosis_report'] @comment = record.raw_fields['variantpathclass'] @test = record.raw_fields['karyotypingmethod'] + end + + def initialize_processing_variables @pos_gene = nil @variantpathclass = nil @cdna_mutations = nil @@ -328,15 +336,21 @@ def variant_absent_targ_rec?(genotype, genotypes) def no_result_targ_rec?(genotype, genotypes) return false unless @report_result =~ /Fail/i - targ_gene = nil - [@value2, @test, @diag_report].each do |src| - result = src&.scan(MMR_GENE_REGEX) - flattened_result = result&.flatten&.uniq - if flattened_result&.any? - targ_gene = flattened_result - break - end + targ_gene = find_target_gene_from_sources + process_failed_target_gene(targ_gene, genotype, genotypes) + true + end + + def find_target_gene_from_sources + [@value2, @test, @diag_report].each do |source| + result = source&.scan(MMR_GENE_REGEX) + gene = result&.flatten&.uniq + return gene if gene&.any? end + nil + end + + def process_failed_target_gene(targ_gene, genotype, genotypes) if targ_gene&.size == 1 process_status_genes(9, targ_gene, genotype, genotypes) else @@ -344,7 +358,6 @@ def no_result_targ_rec?(genotype, genotypes) genotype_dup.add_status(9) genotypes << genotype_dup end - true end def no_biallelic_targ_rec?(genotype, genotypes) From 4bb30d8efd2698b037d80c8552bd0ad7cf440519 Mon Sep 17 00:00:00 2001 From: shilpigoel Date: Fri, 5 Dec 2025 13:33:16 +0000 Subject: [PATCH 13/18] more rubocop fixes --- .../leeds/leeds_handler_new_colorectal.rb | 35 ++++++++++--------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal.rb b/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal.rb index 33c64626..e6c9d043 100644 --- a/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal.rb +++ b/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal.rb @@ -206,10 +206,7 @@ def process_result_variant_rec(genocolorectal, record, genotypes) end return if @pos_gene.blank? - @cdna_mutations = @result&.match(CDNA_REGEX) - @exonic_mutations = @result&.match(EXON_VARIANT_REGEX) - @protein_impact = @result&.match(PROTEIN_REGEX) - + extract_mutations_from_src(@result) process_variant_rec(genocolorectal, teststatus, record, genotypes) end @@ -221,20 +218,22 @@ def process_protein_impact_variant_rec(genocolorectal, record, genotypes) gene = @value1&.scan(COLORECTAL_GENES_REGEX) @pos_gene = gene.flatten.uniq @pos_gene = ['PMS2'] if @value1 =~ /NM_000535.5/ - teststatus = case @value1 - when /C1/, /C2/ - 10 - else - 2 - end return if @pos_gene.blank? - @cdna_mutations = @value1&.match(CDNA_REGEX) - @exonic_mutations = @value1&.match(EXON_VARIANT_REGEX) - @protein_impact = @value1&.match(PROTEIN_REGEX) + teststatus = determine_protein_impact_test_status + extract_mutations_from_src(@value1) process_variant_rec(genocolorectal, teststatus, record, genotypes) end + def determine_protein_impact_test_status + case @value1 + when /C1/, /C2/ + 10 + else + 2 + end + end + def gene_variant_rec?(genocolorectal, record, genotypes) return false if @value2.nil? @@ -269,9 +268,7 @@ def first_of_report_variant_rec?(genocolorectal, record, genotypes) @pos_gene = [$LAST_MATCH_INFO[:colorectal]] if @pos_gene.present? - @cdna_mutations = @report&.match(CDNA_REGEX) - @exonic_mutations = @report&.match(EXON_VARIANT_REGEX) - @protein_impact = @report&.match(PROTEIN_REGEX) + extract_mutations_from_src(@report) process_variant_rec(genocolorectal, 2, record, genotypes) negative_genes = @genes_panel - @pos_gene process_status_genes(1, negative_genes, genocolorectal, genotypes) @@ -405,6 +402,12 @@ def result_variant_absent_targ_rec?(genotype, genotypes) true end + def extract_mutations_from_src(src) + @cdna_mutations = src&.match(CDNA_REGEX) + @exonic_mutations = src&.match(EXON_VARIANT_REGEX) + @protein_impact = src&.match(PROTEIN_REGEX) + end + def add_geneticinheritance(genocolorectal) geneticinheritance = if @value12 =~ /mosaic/i || @result =~ /VAF/ || @result =~ /dosage ~0\./ From 47d4e480b5e3deda594c54975ccebfc61e1b7ebd Mon Sep 17 00:00:00 2001 From: shilpigoel Date: Fri, 5 Dec 2025 14:53:02 +0000 Subject: [PATCH 14/18] rubocop fixes --- .../providers/leeds/leeds_handler_new_colorectal.rb | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal.rb b/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal.rb index e6c9d043..7d9820f9 100644 --- a/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal.rb +++ b/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal.rb @@ -369,6 +369,13 @@ def no_biallelic_targ_rec?(genotype, genotypes) def cdna_het_variant_targ_rec?(genotype, record, genotypes) return false unless @result =~ CDNA_REGEX || @result =~ EXON_REGEX || @result =~ /het/ + find_genes_from_cdna_sources + @variantpathclass = classify_first_of_report + process_variant_rec(genotype, 2, record, genotypes) + true + end + + def find_genes_from_cdna_sources @pos_gene = [] [@result, @test, @report_result, @report].each do |src| @@ -379,9 +386,6 @@ def cdna_het_variant_targ_rec?(genotype, record, genotypes) break end end - @variantpathclass = classify_first_of_report - process_variant_rec(genotype, 2, record, genotypes) - true end def result_variant_absent_targ_rec?(genotype, genotypes) From 636a96816c14c8ceb4852d0b9aafff4d9423e58e Mon Sep 17 00:00:00 2001 From: shilpigoel Date: Fri, 5 Dec 2025 17:51:20 +0000 Subject: [PATCH 15/18] Pr Review comment done on versioning the handlers --- .../core/colorectal_handler_mapping.rb | 4 +- ...ctal.rb => leeds_handler_colorectal_v1.rb} | 6 +- ...ctal.rb => leeds_handler_colorectal_v2.rb} | 8 +- .../bash/Import_all_colorectal_interactive.sh | 11 +- .../leeds/leeds_handler_colorectal_test.rb | 8 +- .../leeds/leeds_handler_colorectal_v1_test.rb | 308 ++++++++++++++ .../leeds/leeds_handler_colorectal_v2_test.rb | 376 ++++++++++++++++++ .../leeds_handler_new_colorectal_test.rb | 4 +- 8 files changed, 705 insertions(+), 20 deletions(-) rename lib/import/colorectal/providers/leeds/{leeds_handler_colorectal.rb => leeds_handler_colorectal_v1.rb} (99%) rename lib/import/colorectal/providers/leeds/{leeds_handler_new_colorectal.rb => leeds_handler_colorectal_v2.rb} (98%) create mode 100644 test/lib/import/colorectal/providers/leeds/leeds_handler_colorectal_v1_test.rb create mode 100644 test/lib/import/colorectal/providers/leeds/leeds_handler_colorectal_v2_test.rb diff --git a/lib/import/colorectal/core/colorectal_handler_mapping.rb b/lib/import/colorectal/core/colorectal_handler_mapping.rb index 5c9c05de..8892f6ce 100644 --- a/lib/import/colorectal/core/colorectal_handler_mapping.rb +++ b/lib/import/colorectal/core/colorectal_handler_mapping.rb @@ -4,8 +4,8 @@ module Core # Provides the handler appropriate for the dataformat from each center class ColorectalHandlerMapping HANDLER_MAPPING = { - 'RR8_2' => Import::Colorectal::Providers::Leeds::LeedsHandlerColorectal, - 'RR8' => Import::Colorectal::Providers::Leeds::LeedsHandlerNewColorectal, + 'RR8_V1_PRE2025' => Import::Colorectal::Providers::Leeds::LeedsHandlerColorectalV1, + 'RR8_V2_POST2025' => Import::Colorectal::Providers::Leeds::LeedsHandlerColorectalV2, 'RNZ' => Import::Colorectal::Providers::Salisbury::SalisburyHandlerColorectal, 'RTD' => Import::Colorectal::Providers::Newcastle::NewcastleHandlerColorectal, 'RX1' => Import::Colorectal::Providers::Nottingham::NottinghamHandlerColorectal, diff --git a/lib/import/colorectal/providers/leeds/leeds_handler_colorectal.rb b/lib/import/colorectal/providers/leeds/leeds_handler_colorectal_v1.rb similarity index 99% rename from lib/import/colorectal/providers/leeds/leeds_handler_colorectal.rb rename to lib/import/colorectal/providers/leeds/leeds_handler_colorectal_v1.rb index ae6fd91a..daf0febf 100644 --- a/lib/import/colorectal/providers/leeds/leeds_handler_colorectal.rb +++ b/lib/import/colorectal/providers/leeds/leeds_handler_colorectal_v1.rb @@ -3,8 +3,8 @@ module Colorectal module Providers module Leeds # rubocop:disable Metrics/ClassLength - # Leeds importer for colorectal - class LeedsHandlerColorectal < Import::Germline::ProviderHandler + # Leeds importer for colorectal (pre-2025 format) + class LeedsHandlerColorectalV1 < Import::Germline::ProviderHandler include Import::Helpers::Colorectal::Providers::Rr8::Constants def initialize(batch) @@ -53,7 +53,7 @@ def populate_and_persist_genotype(record) add_varclass add_organisationcode_testresult(genocolorectal) res = process_variants_from_record(genocolorectal, record) - # correcting ebatch provider and registry to RR8 (from RR8_2) to allow data to persist in the database + # correcting ebatch provider and registry to RR8 (from RR8_V1_PRE2025) to allow data to persist in the database @batch.provider = 'RR8' @batch.registryid = 'RR8' res.map { |cur_genotype| @persister.integrate_and_store(cur_genotype) } diff --git a/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal.rb b/lib/import/colorectal/providers/leeds/leeds_handler_colorectal_v2.rb similarity index 98% rename from lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal.rb rename to lib/import/colorectal/providers/leeds/leeds_handler_colorectal_v2.rb index 7d9820f9..a120226f 100644 --- a/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal.rb +++ b/lib/import/colorectal/providers/leeds/leeds_handler_colorectal_v2.rb @@ -2,8 +2,8 @@ module Import module Colorectal module Providers module Leeds - # Leeds importer for colorectal for post 2025 files - class LeedsHandlerNewColorectal < Import::Germline::ProviderHandler + # Leeds importer for colorectal (post-2025 format) + class LeedsHandlerColorectalV2 < Import::Germline::ProviderHandler include Import::Helpers::Colorectal::Providers::Rr8::Constants def process_fields(record) @@ -29,7 +29,9 @@ def process_fields(record) add_targ_moleculartestingtype(genocolorectal) res = process_targ_rec(genocolorectal, record, genotypes) end - + # correcting ebatch provider and registry to RR8 (from RR8_V2_POST2025) to allow data to persist in the database + @batch.provider = 'RR8' + @batch.registryid = 'RR8' res.each { |cur_genotype| @persister.integrate_and_store(cur_genotype) } end diff --git a/lib/import/colorectal/scripts/bash/Import_all_colorectal_interactive.sh b/lib/import/colorectal/scripts/bash/Import_all_colorectal_interactive.sh index 3fd32670..508c04e5 100755 --- a/lib/import/colorectal/scripts/bash/Import_all_colorectal_interactive.sh +++ b/lib/import/colorectal/scripts/bash/Import_all_colorectal_interactive.sh @@ -30,7 +30,7 @@ $BRAKE import:colorectal fname="$(echo "$x" | sed -e 's:.*pseudonymised_data/\(. done } -RR8 () { +RR8_V2_POST2025 () { PROV='RR8' IFS=$'\n' for x in $(find "$DIRPATH/$FILEPATH" \ @@ -42,13 +42,12 @@ for x in $(find "$DIRPATH/$FILEPATH" \ ) do IFS="$OIFS" -$BRAKE import:colorectal fname="$(echo "$x" | sed -e 's:.*pseudonymised_data/\(.*\):\1:')" prov_code=$PROV +$BRAKE import:colorectal fname="$(echo "$x" | sed -e 's:.*pseudonymised_data/\(.*\):\1:')" prov_code='RR8_V2_POST2025' done } -RR8_2 () { +RR8_V1_PRE2025 () { PROV='RR8' -PROV_OLD_FILE='RR8_2' IFS=$'\n' for x in $(find "$DIRPATH/$FILEPATH" \ -not -path "*/API_BETA_RETRIEVED/*" \ @@ -58,7 +57,7 @@ for x in $(find "$DIRPATH/$FILEPATH" \ ) do IFS="$OIFS" -$BRAKE import:colorectal fname="$(echo "$x" | sed -e 's:.*pseudonymised_data/\(.*\):\1:')" prov_code=$PROV_OLD_FILE +$BRAKE import:colorectal fname="$(echo "$x" | sed -e 's:.*pseudonymised_data/\(.*\):\1:')" prov_code='RR8_V1_PRE2025' done } @@ -193,5 +192,5 @@ $BRAKE import:colorectal fname="$(echo "$x" | sed -e 's:.*pseudonymised_data/\(. done } -RR8; RR8_2; RNZ; RTD; RX1; RCU; RGT; R0A; R1K; RPY; RP4; RTH; RQ3; REP; RJ7 +RR8_V2_POST2025; RR8_V1_PRE2025; RNZ; RTD; RX1; RCU; RGT; R0A; R1K; RPY; RP4; RTH; RQ3; REP; RJ7 diff --git a/test/lib/import/colorectal/providers/leeds/leeds_handler_colorectal_test.rb b/test/lib/import/colorectal/providers/leeds/leeds_handler_colorectal_test.rb index 87bb9121..ec5ce8f5 100644 --- a/test/lib/import/colorectal/providers/leeds/leeds_handler_colorectal_test.rb +++ b/test/lib/import/colorectal/providers/leeds/leeds_handler_colorectal_test.rb @@ -1,11 +1,11 @@ require 'test_helper' -class LeedsHandlerColorectalTest < ActiveSupport::TestCase +class LeedsHandlerColorectalV1Test < ActiveSupport::TestCase def setup @record = build_raw_record('pseudo_id1' => 'bob') @genotype = Import::Colorectal::Core::Genocolorectal.new(@record) @importer_stdout, @importer_stderr = capture_io do - @handler = Import::Colorectal::Providers::Leeds::LeedsHandlerColorectal.new(EBatch.new) + @handler = Import::Colorectal::Providers::Leeds::LeedsHandlerColorectalV1.new(EBatch.new) end @logger = Import::Log.get_logger end @@ -15,8 +15,8 @@ def setup e_type: 'PSMOLE', provider: 'RR8_2', registryid: 'RR8_2') - handler = Import::Colorectal::Providers::Leeds::LeedsHandlerColorectal.new(e_batch) - Import::Colorectal::Providers::Leeds::LeedsHandlerColorectal.any_instance.stubs(:should_process?).returns(true) + handler = Import::Colorectal::Providers::Leeds::LeedsHandlerColorectalV1.new(e_batch) + Import::Colorectal::Providers::Leeds::LeedsHandlerColorectalV1.any_instance.stubs(:should_process?).returns(true) handler.process_fields(@record) assert_difference('EBatch.count', 1) do handler.finalize diff --git a/test/lib/import/colorectal/providers/leeds/leeds_handler_colorectal_v1_test.rb b/test/lib/import/colorectal/providers/leeds/leeds_handler_colorectal_v1_test.rb new file mode 100644 index 00000000..e8fb6a55 --- /dev/null +++ b/test/lib/import/colorectal/providers/leeds/leeds_handler_colorectal_v1_test.rb @@ -0,0 +1,308 @@ +require 'test_helper' + +class LeedsHandlerColorectalV1Test < ActiveSupport::TestCase + def setup + @record = build_raw_record('pseudo_id1' => 'bob') + @genotype = Import::Colorectal::Core::Genocolorectal.new(@record) + @importer_stdout, @importer_stderr = capture_io do + @handler = Import::Colorectal::Providers::Leeds::LeedsHandlerColorectalV1.new(EBatch.new) + end + @logger = Import::Log.get_logger + end + + test 'process_fields' do + e_batch = EBatch.create(original_filename: 'test_filea', + e_type: 'PSMOLE', + provider: 'RR8_2', + registryid: 'RR8_2') + handler = Import::Colorectal::Providers::Leeds::LeedsHandlerColorectalV1.new(e_batch) + Import::Colorectal::Providers::Leeds::LeedsHandlerColorectalV1.any_instance.stubs(:should_process?).returns(true) + handler.process_fields(@record) + assert_difference('EBatch.count', 1) do + handler.finalize + end + # confirm batch created now has 'RR8' as provider + e_batch.reload + assert_equal 'RR8', e_batch.provider + assert_equal 'RR8', e_batch.registryid + end + + test 'add_positive_teststatus' do + @handler.populate_variables(@record) + assert_equal 2, @handler.allocate_test_status + end + + test 'add_gene_from_report' do + @handler.populate_variables(@record) + @handler.add_scope(@genotype, @record) + genotypes = @handler.process_variants_from_record(@genotype, @record) + assert_equal 'c.847C>T', genotypes[0].attribute_map['codingdnasequencechange'] + assert_equal 'p.Arg283x', genotypes[0].attribute_map['proteinimpact'] + normal_record = build_raw_record('pseudo_id1' => 'bob') + normal_record.raw_fields['report'] = 'This patient has been screened for MLH1, MSH2, MSH6 and ' \ + 'PMS2 mutations by sequence and dosage analysis. No pathogenic mutation was identified.' \ + '\n\n\n\nThis result does not exclude a diagnosis of Lynch syndrome.\n\nTesting for other ' \ + 'genes involved in familial bowel cancer is available if appropriate.' + @handler.populate_variables(normal_record) + @handler.add_scope(@genotype, normal_record) + assert_equal 4, @handler.process_variants_from_record(@genotype, normal_record).size + end + + test 'process_scope' do + @handler.populate_variables(@record) + @handler.add_scope(@genotype, @record) + assert_equal 'Full screen Colorectal Lynch or MMR', @genotype.attribute_map['genetictestscope'] + end + + test 'add_molecular_testingtype' do + @handler.populate_variables(@record) + @handler.add_molecular_testingtype(@genotype, @record) + assert_equal 1, @genotype.attribute_map['moleculartestingtype'] + end + + test 'varclass and teststatus' do + @handler.populate_variables(@record) + @handler.add_scope(@genotype, @record) + @handler.add_varclass + genotypes = @handler.process_variants_from_record(@genotype, @record) + assert_equal 5, genotypes[0].attribute_map['variantpathclass'] + assert_equal 2, genotypes[0].attribute_map['teststatus'] + end + + test 'normal_variant_record' do + normal_variant_record = build_raw_record('pseudo_id1' => 'bob') + normal_variant_record.raw_fields['report'] = 'This patient is heterozygous for the sequence variant ' \ + 'c.1537A>G (p.Ile513Val) in exon 4 of APC' + normal_variant_record.raw_fields['genotype'] = 'FAP UV Class2' + @handler.populate_variables(normal_variant_record) + @handler.add_varclass + @handler.add_scope(@genotype, normal_variant_record) + genotypes = @handler.process_variants_from_record(@genotype, normal_variant_record) + assert_equal 1, genotypes.size + assert_equal 2, genotypes[0].attribute_map['variantpathclass'] + assert_equal 10, genotypes[0].attribute_map['teststatus'] + assert_equal 358, genotypes[0].attribute_map['gene'] + assert_equal 'c.1537A>G', genotypes[0].attribute_map['codingdnasequencechange'] + assert_equal '4', genotypes[0].attribute_map['exonintroncodonnumber'] + assert_equal 'p.Ile513Val', genotypes[0].attribute_map['proteinimpact'] + assert_equal 'Full screen Colorectal Lynch or MMR', genotypes[0].attribute_map['genetictestscope'] + end + + test 'failed targeted record' do + failed_targ_record = build_raw_record('pseudo_id1' => 'bob') + failed_targ_record.raw_fields['moleculartestingtype'] = 'Carrier test' + failed_targ_record.raw_fields['genotype'] = 'Analysis failed' + failed_targ_record.raw_fields['report'] = 'No results were obtained from this sample despite repeated attempts' + + @handler.populate_variables(failed_targ_record) + @handler.add_varclass + @handler.add_scope(@genotype, failed_targ_record) + genotypes = @handler.process_variants_from_record(@genotype, failed_targ_record) + assert_equal 1, genotypes.size + assert_nil genotypes[0].attribute_map['variantpathclass'] + assert_equal 9, genotypes[0].attribute_map['teststatus'] + assert_equal 3394, genotypes[0].attribute_map['gene'] + assert_equal 'Targeted Colorectal Lynch or MMR', genotypes[0].attribute_map['genetictestscope'] + end + + test 'positive targeted record' do + pos_targ_record = build_raw_record('pseudo_id1' => 'bob') + pos_targ_record.raw_fields['moleculartestingtype'] = 'Predictive' + pos_targ_record.raw_fields['genotype'] = 'Pred seq class 5 +ve' + pos_targ_record.raw_fields['report'] = 'Sequence analysis indicates that this patient is heterozygous for the familial pathogenic MSH2 variant c.488T>G.' + + @handler.populate_variables(pos_targ_record) + @handler.add_varclass + @handler.add_scope(@genotype, pos_targ_record) + genotypes = @handler.process_variants_from_record(@genotype, pos_targ_record) + assert_equal 1, genotypes.size + assert_equal 5, genotypes[0].attribute_map['variantpathclass'] + assert_equal 2, genotypes[0].attribute_map['teststatus'] + assert_equal 2804, genotypes[0].attribute_map['gene'] + assert_equal 'c.488T>G.', genotypes[0].attribute_map['codingdnasequencechange'] + assert_equal 'Targeted Colorectal Lynch or MMR', genotypes[0].attribute_map['genetictestscope'] + end + + test 'normal fullscreen record' do + norm_fs_record = build_raw_record('pseudo_id1' => 'bob') + norm_fs_record.raw_fields['moleculartestingtype'] = 'R210.2' + norm_fs_record.raw_fields['genotype'] = 'Lynch Diag; normal' + norm_fs_record.raw_fields['report'] = 'This patient has been screened for MLH1, MSH2, MSH6 and PMS2 variants by sequence and dosage analysis. ' \ + 'No pathogenic variant was identified.' + + @handler.populate_variables(norm_fs_record) + @handler.add_varclass + @handler.add_scope(@genotype, norm_fs_record) + genotypes = @handler.process_variants_from_record(@genotype, norm_fs_record) + + assert_equal 4, genotypes.size + assert_nil genotypes[0].attribute_map['variantpathclass'] + assert_equal 1, genotypes[0].attribute_map['teststatus'] + assert_equal 'Full screen Colorectal Lynch or MMR', genotypes[0].attribute_map['genetictestscope'] + assert_equal 2744, genotypes[0].attribute_map['gene'] + + assert_nil genotypes[1].attribute_map['variantpathclass'] + assert_equal 1, genotypes[1].attribute_map['teststatus'] + assert_equal 'Full screen Colorectal Lynch or MMR', genotypes[1].attribute_map['genetictestscope'] + assert_equal 2804, genotypes[1].attribute_map['gene'] + + assert_nil genotypes[2].attribute_map['variantpathclass'] + assert_equal 1, genotypes[2].attribute_map['teststatus'] + assert_equal 'Full screen Colorectal Lynch or MMR', genotypes[2].attribute_map['genetictestscope'] + assert_equal 2808, genotypes[2].attribute_map['gene'] + + assert_nil genotypes[3].attribute_map['variantpathclass'] + assert_equal 1, genotypes[3].attribute_map['teststatus'] + assert_equal 'Full screen Colorectal Lynch or MMR', genotypes[3].attribute_map['genetictestscope'] + assert_equal 3394, genotypes[3].attribute_map['gene'] + end + + test 'abnormal fs multi var single gene record' do + abnormal_fs_single_gene_rec = build_raw_record('pseudo_id1' => 'bob') + abnormal_fs_single_gene_rec.raw_fields['moleculartestingtype'] = 'R211.1' + abnormal_fs_single_gene_rec.raw_fields['genotype'] = 'Generic C4/5' + abnormal_fs_single_gene_rec.raw_fields['report'] = 'This patient has been screened for variants in the following cancer ' \ + 'predisposing genes by sequence analysis: APC, BMPR1A, EPCAM*, GREM1*, MLH1, MSH2, MSH6, MUTYH, NTHL1, PMS2, POLD1, ' \ + 'POLE, PTEN, RNF43, SMAD4, STK11. This patient is heterozygous for the pathogenic NTHL1 variants c. c.268C>T p.(Gln90Ter) ' \ + 'and c.390C>A (p.Tyr130Ter). Assuming that the variants are in trans, this confirms a clinical diagnosis' \ + 'of NTHL1-associated polyposis (FAP3). Testing of relatives to confirm phase of these mutations may be appropriate. This ' \ + 'patient may be at risk of developing further NTHL1-associated cancers.' + + @handler.populate_variables(abnormal_fs_single_gene_rec) + @handler.add_varclass + @handler.add_scope(@genotype, abnormal_fs_single_gene_rec) + genotypes = @handler.process_variants_from_record(@genotype, abnormal_fs_single_gene_rec) + + assert_equal 17, genotypes.size # NTHL1 gets two as 2 variants + + assert_nil genotypes[0].attribute_map['variantpathclass'] + assert_equal 1, genotypes[0].attribute_map['teststatus'] + assert_equal 'Full screen Colorectal Lynch or MMR', genotypes[0].attribute_map['genetictestscope'] + assert_equal 358, genotypes[0].attribute_map['gene'] + + assert_nil genotypes[1].attribute_map['variantpathclass'] + assert_equal 1, genotypes[1].attribute_map['teststatus'] + assert_equal 577, genotypes[1].attribute_map['gene'] + + assert_equal 5, genotypes[15].attribute_map['variantpathclass'] + assert_equal 2, genotypes[15].attribute_map['teststatus'] + assert_equal 'Full screen Colorectal Lynch or MMR', genotypes[15].attribute_map['genetictestscope'] + assert_equal 3108, genotypes[15].attribute_map['gene'] + assert_equal 'c.268C>T', genotypes[15].attribute_map['codingdnasequencechange'] + assert_equal 'p.Gln90Ter', genotypes[15].attribute_map['proteinimpact'] + + assert_equal 5, genotypes[16].attribute_map['variantpathclass'] + assert_equal 2, genotypes[16].attribute_map['teststatus'] + assert_equal 'Full screen Colorectal Lynch or MMR', genotypes[16].attribute_map['genetictestscope'] + assert_equal 3108, genotypes[16].attribute_map['gene'] + assert_equal 'c.390C>A', genotypes[16].attribute_map['codingdnasequencechange'] + assert_equal 'p.Tyr130Ter', genotypes[16].attribute_map['proteinimpact'] + end + + test 'abnormal multi gene record' do + abnormal_fs_multi_gene_rec = build_raw_record('pseudo_id1' => 'bob') + abnormal_fs_multi_gene_rec.raw_fields['moleculartestingtype'] = 'R211.1' + abnormal_fs_multi_gene_rec.raw_fields['genotype'] = 'Generic C4/5' + abnormal_fs_multi_gene_rec.raw_fields['report'] = 'This patient has been screened for variants in the following cancer ' \ + 'predisposing genes by sequence and dosage analysis: APC, BMPR1A, EPCAM*, GREM1*, MLH1, MSH2, MSH6, MUTYH, NTHL1, PMS2, ' \ + 'POLD1, POLE, PTEN, SMAD4, STK11. This patient is heterozygous for the MSH2 sequence variant c.1571G>C p.(Arg524Pro). ' \ + 'This variant is absent in population control datasets¹, but it has previously been detected in one patient with ' \ + 'Muir-Torre syndrome reported in the literature and in multiple patients reported on the ClinVar database². Functional ' \ + 'studies suggest it has a deleterious effect on protein function³. It is therefore likely to be pathogenic. This result ' \ + 'is consistent with a diagnosis of Lynch syndrome, and this patient is at risk of developing further MSH2-associated cancer. ' \ + 'This result may have important implications for relatives, and testing is now available as appropriate if these individuals are ' \ + 'referred by their local Clinical Genetics department. This patient is also heterozygous for the MSH6 variant c.899G>A p.(Arg300Gln).' + + @handler.populate_variables(abnormal_fs_multi_gene_rec) + @handler.add_varclass + @handler.add_scope(@genotype, abnormal_fs_multi_gene_rec) + genotypes = @handler.process_variants_from_record(@genotype, abnormal_fs_multi_gene_rec) + + assert_equal 15, genotypes.size + + assert_nil genotypes[0].attribute_map['variantpathclass'] + assert_equal 1, genotypes[0].attribute_map['teststatus'] + assert_equal 'Full screen Colorectal Lynch or MMR', genotypes[0].attribute_map['genetictestscope'] + assert_equal 358, genotypes[0].attribute_map['gene'] + + assert_nil genotypes[12].attribute_map['variantpathclass'] + assert_equal 1, genotypes[12].attribute_map['teststatus'] + assert_equal 'Full screen Colorectal Lynch or MMR', genotypes[12].attribute_map['genetictestscope'] + assert_equal 76, genotypes[12].attribute_map['gene'] + + assert_equal 4, genotypes[13].attribute_map['variantpathclass'] + assert_equal 2, genotypes[13].attribute_map['teststatus'] + assert_equal 'Full screen Colorectal Lynch or MMR', genotypes[13].attribute_map['genetictestscope'] + assert_equal 2804, genotypes[13].attribute_map['gene'] + assert_equal 'c.1571G>C', genotypes[13].attribute_map['codingdnasequencechange'] + assert_equal 'p.Arg524Pro', genotypes[13].attribute_map['proteinimpact'] + + assert_equal 4, genotypes[14].attribute_map['variantpathclass'] + assert_equal 2, genotypes[14].attribute_map['teststatus'] + assert_equal 'Full screen Colorectal Lynch or MMR', genotypes[14].attribute_map['genetictestscope'] + assert_equal 2808, genotypes[14].attribute_map['gene'] + assert_equal 'c.899G>A', genotypes[14].attribute_map['codingdnasequencechange'] + assert_equal 'p.Arg300Gln', genotypes[14].attribute_map['proteinimpact'] + end + + test 'fs record with no gene info in genotype or report, so genes extracted from mtype' do + fail_fs_no_gene_rec = build_raw_record('pseudo_id1' => 'bob') + fail_fs_no_gene_rec.raw_fields['moleculartestingtype'] = 'Diagnostic; Lynch' + fail_fs_no_gene_rec.raw_fields['genotype'] = 'Fail/Results not required' + fail_fs_no_gene_rec.raw_fields['report'] = nil + + @handler.populate_variables(fail_fs_no_gene_rec) + @handler.add_varclass + @handler.add_scope(@genotype, fail_fs_no_gene_rec) + genotypes = @handler.process_variants_from_record(@genotype, fail_fs_no_gene_rec) + + assert_equal 4, genotypes.size + assert_equal [9], genotypes.collect { |g| g.attribute_map['teststatus'] }.uniq + assert_equal 2744, genotypes[0].attribute_map['gene'] + assert_equal 2804, genotypes[1].attribute_map['gene'] + assert_equal 2808, genotypes[2].attribute_map['gene'] + assert_equal 3394, genotypes[3].attribute_map['gene'] + end + + private + + def clinical_json + { sex: '1', + consultantcode: 'Consultant Code', + providercode: 'Provider Code', + receiveddate: '2010-08-05T00:00:00.000+01:00', + authoriseddate: '2010-09-17T00:00:00.000+01:00', + servicereportidentifier: 'Service Report Identifier', + sortdate: '2010-08-05T00:00:00.000+01:00', + genetictestscope: 'Diagnostic', + specimentype: '5', + report: 'Analysis showed that this patient is heterozygous for the pathogenic ' \ + 'APC mutation c.847C>T (p.Arg283X). ' \ + 'This confirms a clinical diagnosis of FAP.\n\nThis result has important implications ' \ + 'for other family members at risk and testing may be performed as appropriate.', + requesteddate: '2010-08-05T00:00:00.000+01:00', + age: 99999 }.to_json + end + + def rawtext_clinical_json + { sex: 'M', + 'reffac.name' => 'Reffac Address', + provider_address: 'Provider Address', + providercode: 'Provider Code', + referringclinicianname: 'Clinician Name', + consultantcode: 'Consultant Code', + servicereportidentifier: 'Service Report Identifier', + patienttype: 'NHS', + moleculartestingtype: 'Diagnostic', + indicationcategory: '17510', + genotype: 'Diagnostic APC +ve', + report: 'Analysis showed that this patient is heterozygous for the pathogenic ' \ + 'APC mutation c.847C>T (p.Arg283X). This confirms a clinical diagnosis of FAP.\n\n' \ + 'This result has important implications for other family members at risk and testing ' \ + 'may be performed as appropriate.', + receiveddate: '2010-08-05 00:00:00', + requesteddate: '2010-08-05 00:00:00', + authoriseddate: '2010-09-17 00:00:00', + specimentype: 'Blood' }.to_json + end +end diff --git a/test/lib/import/colorectal/providers/leeds/leeds_handler_colorectal_v2_test.rb b/test/lib/import/colorectal/providers/leeds/leeds_handler_colorectal_v2_test.rb new file mode 100644 index 00000000..3535296c --- /dev/null +++ b/test/lib/import/colorectal/providers/leeds/leeds_handler_colorectal_v2_test.rb @@ -0,0 +1,376 @@ +require 'test_helper' + +class LeedsHandlerColorectalV2Test < ActiveSupport::TestCase + def setup + @record = build_raw_record('pseudo_id1' => 'bob') + @genotype = Import::Colorectal::Core::Genocolorectal.new(@record) + @importer_stdout, @importer_stderr = capture_io do + @handler = Import::Colorectal::Providers::Leeds::LeedsHandlerColorectalV2.new(EBatch.new) + end + end + + test 'process_failed_test_record' do + failed_record = build_raw_record('pseudo_id1' => 'patient1') + failed_record.raw_fields['genotype'] = 'Fail/Results not required' + failed_record.raw_fields['codingdnasequencechange'] = 'No result' + failed_record.raw_fields['moleculartestingtype'] = 'R210.2' + failed_record.raw_fields['report'] = 'Unfortunately, no results were obtained from this tissue sample.' + + res = @handler.process_fields(failed_record) + assert_equal 4, res.size + res.each do |genotype| + assert_equal 9, genotype.attribute_map['teststatus'] + end + end + + test 'process_normal_result_record' do + res = @handler.process_fields(@record) + assert_equal 1, res.size + assert_equal 1, res[0].attribute_map['teststatus'] # normal + assert_equal 3394, res[0].attribute_map['gene'] # PMS2 + end + + test 'process_result_variant_rec' do + result_record = build_raw_record('pseudo_id1' => 'patient6') + result_record.raw_fields['codingdnasequencechange'] = 'PMS2 exons 1-7 deletion heterozygote' + result_record.raw_fields['report'] = 'MLPA analysis indicates that this patient is heterozygous for a deletion of PMS2 exons 1-7.' + + res = @handler.process_fields(result_record) + assert_equal 1, res.size + + res.each do |genotype| + assert_equal 2, genotype.attribute_map['teststatus'] + assert_equal 3394, genotype.attribute_map['gene'] + assert_equal '1-7', genotype.attribute_map['exonintroncodonnumber'] + assert_equal 1, genotype.attribute_map['variantgenotype'] + assert_equal 3, genotype.attribute_map['sequencevarianttype'] + end + end + + test 'process_protein_impact_variant_rec' do + gene_record = build_raw_record('pseudo_id1' => 'patient5') + gene_record.raw_fields['gene'] = nil + gene_record.raw_fields['codingdnasequencechange'] = 'NTHL1 c.268C>T' + gene_record.raw_fields['proteinimpact'] = 'APC c.2120T>C het [C3]' + gene_record.raw_fields['zygosity'] = nil + gene_record.raw_fields['variantpathclass'] = nil + gene_record.raw_fields['report'] = 'This patient has been screened for variants in the following cancer predisposing genes by sequence analysis:\n\nAPC, BMPR1A, EPCAM*, GREM1*, MLH1, MSH2, MSH6, MUTYH, NTHL1, PMS2, POLD1, POLE, PTEN, RNF43, SMAD4, STK11' + + res = @handler.process_fields(gene_record) + assert_equal 2, res.size + + # APC variant should get c.2120T>C from proteinimpact field + protein_variant_genotype = res.find { |g| g.attribute_map['gene'] == 358 } # APC + assert_equal 'c.2120T>C', protein_variant_genotype.attribute_map['codingdnasequencechange'] + assert_equal 2, protein_variant_genotype.attribute_map['teststatus'] + + # NTHL1 variant should get c.268C>T from codingdnasequencechange field + result_variant_genotype = res.find { |g| g.attribute_map['gene'] == 3108 } # NTHL1 + assert_equal 'c.268C>T', result_variant_genotype.attribute_map['codingdnasequencechange'] + assert_equal 2, result_variant_genotype.attribute_map['teststatus'] + end + + test 'gene_variant_rec' do + gene_record = build_raw_record('pseudo_id1' => 'patient5') + gene_record.raw_fields['gene'] = 'MLH1' + gene_record.raw_fields['codingdnasequencechange'] = 'exon 16-19 deletion' + gene_record.raw_fields['proteinimpact'] = nil + gene_record.raw_fields['zygosity'] = 'Heterozygous' + gene_record.raw_fields['variantpathclass'] = 'Pathogenic' + gene_record.raw_fields['report'] = 'A germline pathogenic MLH1 copy number variant was detected in this patient sample' + gene_record.raw_fields['diagnosis_report'] = '1. Genes screened in the panel: MLH1, MSH2, MSH6, PMS2 (all coding exons and exon-intron boundaries).' + res = @handler.process_fields(gene_record) + assert_equal 4, res.size + + gene_variant_genotype = res.find { |g| g.attribute_map['gene'] == 2744 } # MLH1 + assert_equal '16-19', gene_variant_genotype.attribute_map['exonintroncodonnumber'] + assert_equal 2, gene_variant_genotype.attribute_map['teststatus'] + assert_equal 5, gene_variant_genotype.attribute_map['variantpathclass'] + + res.each do |genotype| + next if genotype.attribute_map['gene'] == 2744 + + assert_equal 1, genotype.attribute_map['teststatus'] + end + end + + test 'normal_result_rec' do + normal_record = build_raw_record('pseudo_id1' => 'patient7') + normal_record.raw_fields['codingdnasequencechange'] = 'No deletions/duplications detected' + normal_record.raw_fields['report'] = 'MLPA analysis indicates that the potential PMS2 copy number variant identified by NGS is absent in this patient.' + normal_record.raw_fields['genotype'] = 'PMS2 - MLPA conf negative' + normal_record.raw_fields['moleculartestingtype'] = nil + + res = @handler.process_fields(normal_record) + assert_equal 1, res.size + + res.each do |genotype| + assert_equal 1, genotype.attribute_map['teststatus'] # normal + end + end + + test 'normal_report_result_rec' do + normal_record = build_raw_record('pseudo_id1' => 'patient7') + normal_record.raw_fields['report'] = 'This patient has been screened for MLH1, MSH2, MSH6 and PMS2 variants by sequence analysis. No pathogenic variant was identified.' + normal_record.raw_fields['genotype'] = 'Lynch Diag; normal' + normal_record.raw_fields['proteinimpact'] = nil + normal_record.raw_fields['gene'] = nil + normal_record.raw_fields['codingdnasequencechange'] = 'No result' + + res = @handler.process_fields(normal_record) + assert_equal 4, res.size + + res.each do |genotype| + assert_equal 1, genotype.attribute_map['teststatus'] # normal + end + end + + test 'first_of_report_variant_rec' do + first_record = build_raw_record('pseudo_id1' => 'patient8') + first_record.raw_fields['codingdnasequencechange'] = 'No result' + first_record.raw_fields['proteinimpact'] = nil + first_record.raw_fields['gene'] = nil + first_record.raw_fields['report'] = 'This patient has been screened for variants in the following cancer predisposing genes by sequence analysis:' \ + 'APC, BMPR1A, EPCAM*, GREM1*, MLH1, MSH2, MSH6, MUTYH, NTHL1, PMS2, POLD1, POLE, PTEN, RNF43, SMAD4, STK11.This patient is heterozygous for the ' \ + 'pathogenic NTHL1 variants c.268C>T p.(Gln90Ter)' + first_record.raw_fields['moleculartestingtype'] = 'R211' + + res = @handler.process_fields(first_record) + assert_equal 16, res.size + variant_genotype = res.find { |g| g.attribute_map['gene'] == 3108 } # NTHL1 + assert_not_nil variant_genotype + assert_equal 'c.268C>T', variant_genotype.attribute_map['codingdnasequencechange'] + assert_equal 'p.Gln90Ter', variant_genotype.attribute_map['proteinimpact'] + assert_equal 2, variant_genotype.attribute_map['teststatus'] + + res.each do |genotype| + next if genotype.attribute_map['gene'] == 3108 + + assert_equal 1, genotype.attribute_map['teststatus'] + assert_equal 'Full screen Colorectal Lynch or MMR', genotype.attribute_map['genetictestscope'] + end + end + + # targeted tests + test 'zygosity_variant_targ_rec' do + targeted_record = build_raw_record('pseudo_id1' => 'patient10') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['zygosity'] = 'Heterozygous' + targeted_record.raw_fields['gene'] = 'MSH2' + targeted_record.raw_fields['variantpathclass'] = 'Pathogenic' + targeted_record.raw_fields['codingdnasequencechange'] = 'NM_000251.2:exon 11-16 deletion' + targeted_record.raw_fields['genotype'] = 'R242_pos_MLPA' + targeted_record.raw_fields['report'] = 'This individual is heterozygous for the germline familial pathogenic MSH2 copy number variant' + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + genotype = res[0] + assert_equal 2, genotype.attribute_map['teststatus'] + assert_equal 2804, genotype.attribute_map['gene'] # APC + assert_equal '11-16', genotype.attribute_map['exonintroncodonnumber'] + assert_equal 'NM_000251.2', genotype.attribute_map['referencetranscriptid'] + assert_equal 'Targeted Colorectal Lynch or MMR', genotype.attribute_map['genetictestscope'] + assert_equal 5, genotype.attribute_map['variantpathclass'] + assert_equal 4, genotype.attribute_map['geneticinheritance'] + end + + test 'process_targeted_mosaic_variant' do + targeted_record = build_raw_record('pseudo_id1' => 'patient11') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['zygosity'] = 'Mosaic' + targeted_record.raw_fields['gene'] = 'PMS2' + targeted_record.raw_fields['codingdnasequencechange'] = 'NM_000535.5:Whole gene deletion' + targeted_record.raw_fields['genotype'] = 'R443_Confirmation_NGS_MLPA_PMS2' + targeted_record.raw_fields['report'] = 'This patient shows mosaic pattern for the familial MLH1 variant' + targeted_record.raw_fields['variantpathclass'] = 'Pathogenic' + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + genotype = res[0] + assert_equal 2, genotype.attribute_map['teststatus'] + assert_equal 3394, genotype.attribute_map['gene'] # PMS2 + assert_equal 'NM_000535.5', genotype.attribute_map['referencetranscriptid'] + assert_equal 6, genotype.attribute_map['geneticinheritance'] + assert_equal 5, genotype.attribute_map['variantpathclass'] + end + + test 'process_targeted_variant_absent' do + targeted_record = build_raw_record('pseudo_id1' => 'patient12') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['codingdnasequencechange'] = 'NM_000251.2:Exon 1-7 deletion' + targeted_record.raw_fields['genotype'] = 'R242_neg_MLPA' + targeted_record.raw_fields['report'] = 'Dosage analysis has shown no evidence of the familial pathogenic MSH2 variant.' + targeted_record.raw_fields['proteinimpact'] = nil + targeted_record.raw_fields['zygosity'] = 'Variant NOT detected' + targeted_record.raw_fields['gene'] = 'MSH2' + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + genotype = res[0] + assert_equal 1, genotype.attribute_map['teststatus'] + assert_equal 2804, genotype.attribute_map['gene'] # MSH2 + end + + test 'process_targeted_no_result' do + targeted_record = build_raw_record('pseudo_id1' => 'patient13') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['codingdnasequencechange'] = 'No result' + targeted_record.raw_fields['genotype'] = 'Fail/Results not required' + targeted_record.raw_fields['report'] = 'No results were obtained from this sample despite repeated attempts' + targeted_record.raw_fields['proteinimpact'] = nil + targeted_record.raw_fields['zygosity'] = nil + targeted_record.raw_fields['gene'] = nil + targeted_record.raw_fields['diagnosis_report'] = 'Germline heterozygous pathogenic variants in PTEN are associated with PTEN hamartoma tumour syndrome' + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + genotype = res[0] + assert_equal 9, genotype.attribute_map['teststatus'] + assert_equal 62, genotype.attribute_map['gene'] + end + + test 'process_targeted_no_biallelic' do + targeted_record = build_raw_record('pseudo_id1' => 'patient14') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['codingdnasequencechange'] = 'No biallelic presence of familial variant' + targeted_record.raw_fields['genotype'] = 'PMS2 - Biallelic (CMMRD) pred negative' + targeted_record.raw_fields['report'] = 'Sequence analysis indicates no biallelic presence of the familial pathogenic PMS2 variant c.2404C>T in this patient.' + targeted_record.raw_fields['proteinimpact'] = nil + targeted_record.raw_fields['zygosity'] = nil + targeted_record.raw_fields['gene'] = nil + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + genotype = res[0] + assert_equal 4, genotype.attribute_map['teststatus'] + assert_equal 3394, genotype.attribute_map['gene'] # PMS2 + end + + test 'process_targeted_cdna_het_variant' do + targeted_record = build_raw_record('pseudo_id1' => 'patient15') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['codingdnasequencechange'] = 'MSH2 Exon 11-12 duplication heterozygote' + targeted_record.raw_fields['genotype'] = 'Lynch Pred MLPA +ve' + targeted_record.raw_fields['report'] = 'MLPA analysis indicates that this patient is heterozygous for the familial likely pathogenic MSH2 duplication of exons 11-12' + targeted_record.raw_fields['proteinimpact'] = nil + targeted_record.raw_fields['zygosity'] = nil + targeted_record.raw_fields['gene'] = nil + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + genotype = res[0] + assert_equal 2, genotype.attribute_map['teststatus'] + assert_equal 2804, genotype.attribute_map['gene'] # MSH2 + assert_equal '11-12', genotype.attribute_map['exonintroncodonnumber'] + assert_equal 4, genotype.attribute_map['sequencevarianttype'] + end + + test 'process_result_variant_absent_variant' do + targeted_record = build_raw_record('pseudo_id1' => 'patient15') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['codingdnasequencechange'] = 'familial variant absent' + targeted_record.raw_fields['genotype'] = 'Lynch Pred MLPA -ve' + targeted_record.raw_fields['report'] = 'MLPA analysis indicates that the familial pathogenic MLH1 exon 16-19 deletion is absent in this patient.' + targeted_record.raw_fields['proteinimpact'] = nil + targeted_record.raw_fields['zygosity'] = nil + targeted_record.raw_fields['gene'] = nil + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + genotype = res[0] + assert_equal 1, genotype.attribute_map['teststatus'] + assert_equal 2744, genotype.attribute_map['gene'] # MLH1 + end + + # Tests for should_process? method + test 'should_process_other_cancer_file_familial' do + @handler.instance_variable_set(:@batch, stub(original_filename: 'Other_Cancer_file.txt')) + + record = build_raw_record('pseudo_id1' => 'patient16') + record.raw_fields['moleculartestingtype'] = 'Familial' + record.raw_fields['report'] = 'Testing for MLH1 variants' + + assert @handler.send(:should_process?, record) + end + + test 'should_not_process_other_cancer_file_non_familial' do + @handler.instance_variable_set(:@batch, stub(original_filename: 'Other_Cancer_file.txt')) + + record = build_raw_record('pseudo_id1' => 'patient17') + record.raw_fields['moleculartestingtype'] = 'Predictive' + record.raw_fields['report'] = 'Testing for MLH1 variants' + + refute @handler.send(:should_process?, record) + end + + test 'should_not_process_ataxia_record' do + @handler.instance_variable_set(:@batch, stub(original_filename: 'Other_Cancer_file.txt')) + + record = build_raw_record('pseudo_id1' => 'patient18') + record.raw_fields['moleculartestingtype'] = 'Familial' + record.raw_fields['diagnosis_report'] = 'Testing for ataxia related genes' + record.raw_fields['report'] = 'MLH1 testing' + + refute @handler.send(:should_process?, record) + end + + test 'should_not_process_brca_record' do + @handler.instance_variable_set(:@batch, stub(original_filename: 'Other_Cancer_file.txt')) + + record = build_raw_record('pseudo_id1' => 'patient19') + record.raw_fields['moleculartestingtype'] = 'Familial' + record.raw_fields['codingdnasequencechange'] = 'BRCA1 c.181T>G' + record.raw_fields['report'] = 'MLH1 testing' + + refute @handler.send(:should_process?, record) + end + + private + + def clinical_json + { sex: '2', + consultantcode: 'Consultant Code', + providercode: 'Provider Code', + receiveddate: '2010-08-05T00:00:00.000+01:00', + authoriseddate: '2010-09-17T00:00:00.000+01:00', + servicereportidentifier: 'Service Report Identifier', + sortdate: '2010-08-05T00:00:00.000+01:00', + genetictestscope: 'R210.2', + specimentype: '5', + report: 'MLPA analysis indicates that the potential PMS2 copy number variant identified by NGS is absent in this patient.', + requesteddate: '2010-08-05T00:00:00.000+01:00', + age: 37 }.to_json + end + + def rawtext_clinical_json + { sex: 'M', + providercode: 'Provider Code', + referringclinicianname: 'Clinician Name', + consultantcode: 'Consultant Code', + servicereportidentifier: 'Service Report Identifier', + patienttype: 'NHS', + moleculartestingtype: 'R210.5', + indicationcategory: 'R210', + genotype: 'PMS2 - MLPA conf negative', + report: 'MLPA analysis indicates that the potential PMS2 copy number variant identified by NGS is absent in this patient', + diagnosis_report: 'Heterozygous mutations in PMS2 are linked to Lynch Syndrome with dominant inheritance. ' \ + 'Homozygous/compound heterozygous mutations in PMS2 are linked to mismatch repair cancer syndrome.', + receiveddate: '2010-08-05 00:00:00', + karyotypingmethod: 'MLPA P008', + codingdnasequencechange: 'No deletions/duplications detected', + proteinimpact: nil, + gene: nil, + zygosity: nil, + variantpathclass: nil, + requesteddate: '2010-08-05 00:00:00', + authoriseddate: '2010-09-17 00:00:00', + specimentype: 'Blood' }.to_json + end +end diff --git a/test/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal_test.rb b/test/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal_test.rb index 5fbd5f45..3535296c 100644 --- a/test/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal_test.rb +++ b/test/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal_test.rb @@ -1,11 +1,11 @@ require 'test_helper' -class LeedsHandlerNewColorectalTest < ActiveSupport::TestCase +class LeedsHandlerColorectalV2Test < ActiveSupport::TestCase def setup @record = build_raw_record('pseudo_id1' => 'bob') @genotype = Import::Colorectal::Core::Genocolorectal.new(@record) @importer_stdout, @importer_stderr = capture_io do - @handler = Import::Colorectal::Providers::Leeds::LeedsHandlerNewColorectal.new(EBatch.new) + @handler = Import::Colorectal::Providers::Leeds::LeedsHandlerColorectalV2.new(EBatch.new) end end From 99362c58b7e95299c2da687de8e705b387b04edf Mon Sep 17 00:00:00 2001 From: shilpigoel Date: Fri, 5 Dec 2025 17:57:20 +0000 Subject: [PATCH 16/18] Remove older test files --- .../leeds/leeds_handler_colorectal_test.rb | 308 -------------- .../leeds_handler_new_colorectal_test.rb | 376 ------------------ 2 files changed, 684 deletions(-) delete mode 100644 test/lib/import/colorectal/providers/leeds/leeds_handler_colorectal_test.rb delete mode 100644 test/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal_test.rb diff --git a/test/lib/import/colorectal/providers/leeds/leeds_handler_colorectal_test.rb b/test/lib/import/colorectal/providers/leeds/leeds_handler_colorectal_test.rb deleted file mode 100644 index ec5ce8f5..00000000 --- a/test/lib/import/colorectal/providers/leeds/leeds_handler_colorectal_test.rb +++ /dev/null @@ -1,308 +0,0 @@ -require 'test_helper' - -class LeedsHandlerColorectalV1Test < ActiveSupport::TestCase - def setup - @record = build_raw_record('pseudo_id1' => 'bob') - @genotype = Import::Colorectal::Core::Genocolorectal.new(@record) - @importer_stdout, @importer_stderr = capture_io do - @handler = Import::Colorectal::Providers::Leeds::LeedsHandlerColorectalV1.new(EBatch.new) - end - @logger = Import::Log.get_logger - end - - test 'process_fields' do - e_batch = EBatch.create(original_filename: 'test_filea', - e_type: 'PSMOLE', - provider: 'RR8_2', - registryid: 'RR8_2') - handler = Import::Colorectal::Providers::Leeds::LeedsHandlerColorectalV1.new(e_batch) - Import::Colorectal::Providers::Leeds::LeedsHandlerColorectalV1.any_instance.stubs(:should_process?).returns(true) - handler.process_fields(@record) - assert_difference('EBatch.count', 1) do - handler.finalize - end - # confirm batch created now has 'RR8' as provider - e_batch.reload - assert_equal 'RR8', e_batch.provider - assert_equal 'RR8', e_batch.registryid - end - - test 'add_positive_teststatus' do - @handler.populate_variables(@record) - assert_equal 2, @handler.allocate_test_status - end - - test 'add_gene_from_report' do - @handler.populate_variables(@record) - @handler.add_scope(@genotype, @record) - genotypes = @handler.process_variants_from_record(@genotype, @record) - assert_equal 'c.847C>T', genotypes[0].attribute_map['codingdnasequencechange'] - assert_equal 'p.Arg283x', genotypes[0].attribute_map['proteinimpact'] - normal_record = build_raw_record('pseudo_id1' => 'bob') - normal_record.raw_fields['report'] = 'This patient has been screened for MLH1, MSH2, MSH6 and ' \ - 'PMS2 mutations by sequence and dosage analysis. No pathogenic mutation was identified.' \ - '\n\n\n\nThis result does not exclude a diagnosis of Lynch syndrome.\n\nTesting for other ' \ - 'genes involved in familial bowel cancer is available if appropriate.' - @handler.populate_variables(normal_record) - @handler.add_scope(@genotype, normal_record) - assert_equal 4, @handler.process_variants_from_record(@genotype, normal_record).size - end - - test 'process_scope' do - @handler.populate_variables(@record) - @handler.add_scope(@genotype, @record) - assert_equal 'Full screen Colorectal Lynch or MMR', @genotype.attribute_map['genetictestscope'] - end - - test 'add_molecular_testingtype' do - @handler.populate_variables(@record) - @handler.add_molecular_testingtype(@genotype, @record) - assert_equal 1, @genotype.attribute_map['moleculartestingtype'] - end - - test 'varclass and teststatus' do - @handler.populate_variables(@record) - @handler.add_scope(@genotype, @record) - @handler.add_varclass - genotypes = @handler.process_variants_from_record(@genotype, @record) - assert_equal 5, genotypes[0].attribute_map['variantpathclass'] - assert_equal 2, genotypes[0].attribute_map['teststatus'] - end - - test 'normal_variant_record' do - normal_variant_record = build_raw_record('pseudo_id1' => 'bob') - normal_variant_record.raw_fields['report'] = 'This patient is heterozygous for the sequence variant ' \ - 'c.1537A>G (p.Ile513Val) in exon 4 of APC' - normal_variant_record.raw_fields['genotype'] = 'FAP UV Class2' - @handler.populate_variables(normal_variant_record) - @handler.add_varclass - @handler.add_scope(@genotype, normal_variant_record) - genotypes = @handler.process_variants_from_record(@genotype, normal_variant_record) - assert_equal 1, genotypes.size - assert_equal 2, genotypes[0].attribute_map['variantpathclass'] - assert_equal 10, genotypes[0].attribute_map['teststatus'] - assert_equal 358, genotypes[0].attribute_map['gene'] - assert_equal 'c.1537A>G', genotypes[0].attribute_map['codingdnasequencechange'] - assert_equal '4', genotypes[0].attribute_map['exonintroncodonnumber'] - assert_equal 'p.Ile513Val', genotypes[0].attribute_map['proteinimpact'] - assert_equal 'Full screen Colorectal Lynch or MMR', genotypes[0].attribute_map['genetictestscope'] - end - - test 'failed targeted record' do - failed_targ_record = build_raw_record('pseudo_id1' => 'bob') - failed_targ_record.raw_fields['moleculartestingtype'] = 'Carrier test' - failed_targ_record.raw_fields['genotype'] = 'Analysis failed' - failed_targ_record.raw_fields['report'] = 'No results were obtained from this sample despite repeated attempts' - - @handler.populate_variables(failed_targ_record) - @handler.add_varclass - @handler.add_scope(@genotype, failed_targ_record) - genotypes = @handler.process_variants_from_record(@genotype, failed_targ_record) - assert_equal 1, genotypes.size - assert_nil genotypes[0].attribute_map['variantpathclass'] - assert_equal 9, genotypes[0].attribute_map['teststatus'] - assert_equal 3394, genotypes[0].attribute_map['gene'] - assert_equal 'Targeted Colorectal Lynch or MMR', genotypes[0].attribute_map['genetictestscope'] - end - - test 'positive targeted record' do - pos_targ_record = build_raw_record('pseudo_id1' => 'bob') - pos_targ_record.raw_fields['moleculartestingtype'] = 'Predictive' - pos_targ_record.raw_fields['genotype'] = 'Pred seq class 5 +ve' - pos_targ_record.raw_fields['report'] = 'Sequence analysis indicates that this patient is heterozygous for the familial pathogenic MSH2 variant c.488T>G.' - - @handler.populate_variables(pos_targ_record) - @handler.add_varclass - @handler.add_scope(@genotype, pos_targ_record) - genotypes = @handler.process_variants_from_record(@genotype, pos_targ_record) - assert_equal 1, genotypes.size - assert_equal 5, genotypes[0].attribute_map['variantpathclass'] - assert_equal 2, genotypes[0].attribute_map['teststatus'] - assert_equal 2804, genotypes[0].attribute_map['gene'] - assert_equal 'c.488T>G', genotypes[0].attribute_map['codingdnasequencechange'] - assert_equal 'Targeted Colorectal Lynch or MMR', genotypes[0].attribute_map['genetictestscope'] - end - - test 'normal fullscreen record' do - norm_fs_record = build_raw_record('pseudo_id1' => 'bob') - norm_fs_record.raw_fields['moleculartestingtype'] = 'R210.2' - norm_fs_record.raw_fields['genotype'] = 'Lynch Diag; normal' - norm_fs_record.raw_fields['report'] = 'This patient has been screened for MLH1, MSH2, MSH6 and PMS2 variants by sequence and dosage analysis. ' \ - 'No pathogenic variant was identified.' - - @handler.populate_variables(norm_fs_record) - @handler.add_varclass - @handler.add_scope(@genotype, norm_fs_record) - genotypes = @handler.process_variants_from_record(@genotype, norm_fs_record) - - assert_equal 4, genotypes.size - assert_nil genotypes[0].attribute_map['variantpathclass'] - assert_equal 1, genotypes[0].attribute_map['teststatus'] - assert_equal 'Full screen Colorectal Lynch or MMR', genotypes[0].attribute_map['genetictestscope'] - assert_equal 2744, genotypes[0].attribute_map['gene'] - - assert_nil genotypes[1].attribute_map['variantpathclass'] - assert_equal 1, genotypes[1].attribute_map['teststatus'] - assert_equal 'Full screen Colorectal Lynch or MMR', genotypes[1].attribute_map['genetictestscope'] - assert_equal 2804, genotypes[1].attribute_map['gene'] - - assert_nil genotypes[2].attribute_map['variantpathclass'] - assert_equal 1, genotypes[2].attribute_map['teststatus'] - assert_equal 'Full screen Colorectal Lynch or MMR', genotypes[2].attribute_map['genetictestscope'] - assert_equal 2808, genotypes[2].attribute_map['gene'] - - assert_nil genotypes[3].attribute_map['variantpathclass'] - assert_equal 1, genotypes[3].attribute_map['teststatus'] - assert_equal 'Full screen Colorectal Lynch or MMR', genotypes[3].attribute_map['genetictestscope'] - assert_equal 3394, genotypes[3].attribute_map['gene'] - end - - test 'abnormal fs multi var single gene record' do - abnormal_fs_single_gene_rec = build_raw_record('pseudo_id1' => 'bob') - abnormal_fs_single_gene_rec.raw_fields['moleculartestingtype'] = 'R211.1' - abnormal_fs_single_gene_rec.raw_fields['genotype'] = 'Generic C4/5' - abnormal_fs_single_gene_rec.raw_fields['report'] = 'This patient has been screened for variants in the following cancer ' \ - 'predisposing genes by sequence analysis: APC, BMPR1A, EPCAM*, GREM1*, MLH1, MSH2, MSH6, MUTYH, NTHL1, PMS2, POLD1, ' \ - 'POLE, PTEN, RNF43, SMAD4, STK11. This patient is heterozygous for the pathogenic NTHL1 variants c. c.268C>T p.(Gln90Ter) ' \ - 'and c.390C>A (p.Tyr130Ter). Assuming that the variants are in trans, this confirms a clinical diagnosis' \ - 'of NTHL1-associated polyposis (FAP3). Testing of relatives to confirm phase of these mutations may be appropriate. This ' \ - 'patient may be at risk of developing further NTHL1-associated cancers.' - - @handler.populate_variables(abnormal_fs_single_gene_rec) - @handler.add_varclass - @handler.add_scope(@genotype, abnormal_fs_single_gene_rec) - genotypes = @handler.process_variants_from_record(@genotype, abnormal_fs_single_gene_rec) - - assert_equal 17, genotypes.size # NTHL1 gets two as 2 variants - - assert_nil genotypes[0].attribute_map['variantpathclass'] - assert_equal 1, genotypes[0].attribute_map['teststatus'] - assert_equal 'Full screen Colorectal Lynch or MMR', genotypes[0].attribute_map['genetictestscope'] - assert_equal 358, genotypes[0].attribute_map['gene'] - - assert_nil genotypes[1].attribute_map['variantpathclass'] - assert_equal 1, genotypes[1].attribute_map['teststatus'] - assert_equal 577, genotypes[1].attribute_map['gene'] - - assert_equal 5, genotypes[15].attribute_map['variantpathclass'] - assert_equal 2, genotypes[15].attribute_map['teststatus'] - assert_equal 'Full screen Colorectal Lynch or MMR', genotypes[15].attribute_map['genetictestscope'] - assert_equal 3108, genotypes[15].attribute_map['gene'] - assert_equal 'c.268C>T', genotypes[15].attribute_map['codingdnasequencechange'] - assert_equal 'p.Gln90Ter', genotypes[15].attribute_map['proteinimpact'] - - assert_equal 5, genotypes[16].attribute_map['variantpathclass'] - assert_equal 2, genotypes[16].attribute_map['teststatus'] - assert_equal 'Full screen Colorectal Lynch or MMR', genotypes[16].attribute_map['genetictestscope'] - assert_equal 3108, genotypes[16].attribute_map['gene'] - assert_equal 'c.390C>A', genotypes[16].attribute_map['codingdnasequencechange'] - assert_equal 'p.Tyr130Ter', genotypes[16].attribute_map['proteinimpact'] - end - - test 'abnormal multi gene record' do - abnormal_fs_multi_gene_rec = build_raw_record('pseudo_id1' => 'bob') - abnormal_fs_multi_gene_rec.raw_fields['moleculartestingtype'] = 'R211.1' - abnormal_fs_multi_gene_rec.raw_fields['genotype'] = 'Generic C4/5' - abnormal_fs_multi_gene_rec.raw_fields['report'] = 'This patient has been screened for variants in the following cancer ' \ - 'predisposing genes by sequence and dosage analysis: APC, BMPR1A, EPCAM*, GREM1*, MLH1, MSH2, MSH6, MUTYH, NTHL1, PMS2, ' \ - 'POLD1, POLE, PTEN, SMAD4, STK11. This patient is heterozygous for the MSH2 sequence variant c.1571G>C p.(Arg524Pro). ' \ - 'This variant is absent in population control datasets¹, but it has previously been detected in one patient with ' \ - 'Muir-Torre syndrome reported in the literature and in multiple patients reported on the ClinVar database². Functional ' \ - 'studies suggest it has a deleterious effect on protein function³. It is therefore likely to be pathogenic. This result ' \ - 'is consistent with a diagnosis of Lynch syndrome, and this patient is at risk of developing further MSH2-associated cancer. ' \ - 'This result may have important implications for relatives, and testing is now available as appropriate if these individuals are ' \ - 'referred by their local Clinical Genetics department. This patient is also heterozygous for the MSH6 variant c.899G>A p.(Arg300Gln).' - - @handler.populate_variables(abnormal_fs_multi_gene_rec) - @handler.add_varclass - @handler.add_scope(@genotype, abnormal_fs_multi_gene_rec) - genotypes = @handler.process_variants_from_record(@genotype, abnormal_fs_multi_gene_rec) - - assert_equal 15, genotypes.size - - assert_nil genotypes[0].attribute_map['variantpathclass'] - assert_equal 1, genotypes[0].attribute_map['teststatus'] - assert_equal 'Full screen Colorectal Lynch or MMR', genotypes[0].attribute_map['genetictestscope'] - assert_equal 358, genotypes[0].attribute_map['gene'] - - assert_nil genotypes[12].attribute_map['variantpathclass'] - assert_equal 1, genotypes[12].attribute_map['teststatus'] - assert_equal 'Full screen Colorectal Lynch or MMR', genotypes[12].attribute_map['genetictestscope'] - assert_equal 76, genotypes[12].attribute_map['gene'] - - assert_equal 4, genotypes[13].attribute_map['variantpathclass'] - assert_equal 2, genotypes[13].attribute_map['teststatus'] - assert_equal 'Full screen Colorectal Lynch or MMR', genotypes[13].attribute_map['genetictestscope'] - assert_equal 2804, genotypes[13].attribute_map['gene'] - assert_equal 'c.1571G>C', genotypes[13].attribute_map['codingdnasequencechange'] - assert_equal 'p.Arg524Pro', genotypes[13].attribute_map['proteinimpact'] - - assert_equal 4, genotypes[14].attribute_map['variantpathclass'] - assert_equal 2, genotypes[14].attribute_map['teststatus'] - assert_equal 'Full screen Colorectal Lynch or MMR', genotypes[14].attribute_map['genetictestscope'] - assert_equal 2808, genotypes[14].attribute_map['gene'] - assert_equal 'c.899G>A', genotypes[14].attribute_map['codingdnasequencechange'] - assert_equal 'p.Arg300Gln', genotypes[14].attribute_map['proteinimpact'] - end - - test 'fs record with no gene info in genotype or report, so genes extracted from mtype' do - fail_fs_no_gene_rec = build_raw_record('pseudo_id1' => 'bob') - fail_fs_no_gene_rec.raw_fields['moleculartestingtype'] = 'Diagnostic; Lynch' - fail_fs_no_gene_rec.raw_fields['genotype'] = 'Fail/Results not required' - fail_fs_no_gene_rec.raw_fields['report'] = nil - - @handler.populate_variables(fail_fs_no_gene_rec) - @handler.add_varclass - @handler.add_scope(@genotype, fail_fs_no_gene_rec) - genotypes = @handler.process_variants_from_record(@genotype, fail_fs_no_gene_rec) - - assert_equal 4, genotypes.size - assert_equal [9], genotypes.collect { |g| g.attribute_map['teststatus'] }.uniq - assert_equal 2744, genotypes[0].attribute_map['gene'] - assert_equal 2804, genotypes[1].attribute_map['gene'] - assert_equal 2808, genotypes[2].attribute_map['gene'] - assert_equal 3394, genotypes[3].attribute_map['gene'] - end - - private - - def clinical_json - { sex: '1', - consultantcode: 'Consultant Code', - providercode: 'Provider Code', - receiveddate: '2010-08-05T00:00:00.000+01:00', - authoriseddate: '2010-09-17T00:00:00.000+01:00', - servicereportidentifier: 'Service Report Identifier', - sortdate: '2010-08-05T00:00:00.000+01:00', - genetictestscope: 'Diagnostic', - specimentype: '5', - report: 'Analysis showed that this patient is heterozygous for the pathogenic ' \ - 'APC mutation c.847C>T (p.Arg283X). ' \ - 'This confirms a clinical diagnosis of FAP.\n\nThis result has important implications ' \ - 'for other family members at risk and testing may be performed as appropriate.', - requesteddate: '2010-08-05T00:00:00.000+01:00', - age: 99999 }.to_json - end - - def rawtext_clinical_json - { sex: 'M', - 'reffac.name' => 'Reffac Address', - provider_address: 'Provider Address', - providercode: 'Provider Code', - referringclinicianname: 'Clinician Name', - consultantcode: 'Consultant Code', - servicereportidentifier: 'Service Report Identifier', - patienttype: 'NHS', - moleculartestingtype: 'Diagnostic', - indicationcategory: '17510', - genotype: 'Diagnostic APC +ve', - report: 'Analysis showed that this patient is heterozygous for the pathogenic ' \ - 'APC mutation c.847C>T (p.Arg283X). This confirms a clinical diagnosis of FAP.\n\n' \ - 'This result has important implications for other family members at risk and testing ' \ - 'may be performed as appropriate.', - receiveddate: '2010-08-05 00:00:00', - requesteddate: '2010-08-05 00:00:00', - authoriseddate: '2010-09-17 00:00:00', - specimentype: 'Blood' }.to_json - end -end diff --git a/test/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal_test.rb b/test/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal_test.rb deleted file mode 100644 index 3535296c..00000000 --- a/test/lib/import/colorectal/providers/leeds/leeds_handler_new_colorectal_test.rb +++ /dev/null @@ -1,376 +0,0 @@ -require 'test_helper' - -class LeedsHandlerColorectalV2Test < ActiveSupport::TestCase - def setup - @record = build_raw_record('pseudo_id1' => 'bob') - @genotype = Import::Colorectal::Core::Genocolorectal.new(@record) - @importer_stdout, @importer_stderr = capture_io do - @handler = Import::Colorectal::Providers::Leeds::LeedsHandlerColorectalV2.new(EBatch.new) - end - end - - test 'process_failed_test_record' do - failed_record = build_raw_record('pseudo_id1' => 'patient1') - failed_record.raw_fields['genotype'] = 'Fail/Results not required' - failed_record.raw_fields['codingdnasequencechange'] = 'No result' - failed_record.raw_fields['moleculartestingtype'] = 'R210.2' - failed_record.raw_fields['report'] = 'Unfortunately, no results were obtained from this tissue sample.' - - res = @handler.process_fields(failed_record) - assert_equal 4, res.size - res.each do |genotype| - assert_equal 9, genotype.attribute_map['teststatus'] - end - end - - test 'process_normal_result_record' do - res = @handler.process_fields(@record) - assert_equal 1, res.size - assert_equal 1, res[0].attribute_map['teststatus'] # normal - assert_equal 3394, res[0].attribute_map['gene'] # PMS2 - end - - test 'process_result_variant_rec' do - result_record = build_raw_record('pseudo_id1' => 'patient6') - result_record.raw_fields['codingdnasequencechange'] = 'PMS2 exons 1-7 deletion heterozygote' - result_record.raw_fields['report'] = 'MLPA analysis indicates that this patient is heterozygous for a deletion of PMS2 exons 1-7.' - - res = @handler.process_fields(result_record) - assert_equal 1, res.size - - res.each do |genotype| - assert_equal 2, genotype.attribute_map['teststatus'] - assert_equal 3394, genotype.attribute_map['gene'] - assert_equal '1-7', genotype.attribute_map['exonintroncodonnumber'] - assert_equal 1, genotype.attribute_map['variantgenotype'] - assert_equal 3, genotype.attribute_map['sequencevarianttype'] - end - end - - test 'process_protein_impact_variant_rec' do - gene_record = build_raw_record('pseudo_id1' => 'patient5') - gene_record.raw_fields['gene'] = nil - gene_record.raw_fields['codingdnasequencechange'] = 'NTHL1 c.268C>T' - gene_record.raw_fields['proteinimpact'] = 'APC c.2120T>C het [C3]' - gene_record.raw_fields['zygosity'] = nil - gene_record.raw_fields['variantpathclass'] = nil - gene_record.raw_fields['report'] = 'This patient has been screened for variants in the following cancer predisposing genes by sequence analysis:\n\nAPC, BMPR1A, EPCAM*, GREM1*, MLH1, MSH2, MSH6, MUTYH, NTHL1, PMS2, POLD1, POLE, PTEN, RNF43, SMAD4, STK11' - - res = @handler.process_fields(gene_record) - assert_equal 2, res.size - - # APC variant should get c.2120T>C from proteinimpact field - protein_variant_genotype = res.find { |g| g.attribute_map['gene'] == 358 } # APC - assert_equal 'c.2120T>C', protein_variant_genotype.attribute_map['codingdnasequencechange'] - assert_equal 2, protein_variant_genotype.attribute_map['teststatus'] - - # NTHL1 variant should get c.268C>T from codingdnasequencechange field - result_variant_genotype = res.find { |g| g.attribute_map['gene'] == 3108 } # NTHL1 - assert_equal 'c.268C>T', result_variant_genotype.attribute_map['codingdnasequencechange'] - assert_equal 2, result_variant_genotype.attribute_map['teststatus'] - end - - test 'gene_variant_rec' do - gene_record = build_raw_record('pseudo_id1' => 'patient5') - gene_record.raw_fields['gene'] = 'MLH1' - gene_record.raw_fields['codingdnasequencechange'] = 'exon 16-19 deletion' - gene_record.raw_fields['proteinimpact'] = nil - gene_record.raw_fields['zygosity'] = 'Heterozygous' - gene_record.raw_fields['variantpathclass'] = 'Pathogenic' - gene_record.raw_fields['report'] = 'A germline pathogenic MLH1 copy number variant was detected in this patient sample' - gene_record.raw_fields['diagnosis_report'] = '1. Genes screened in the panel: MLH1, MSH2, MSH6, PMS2 (all coding exons and exon-intron boundaries).' - res = @handler.process_fields(gene_record) - assert_equal 4, res.size - - gene_variant_genotype = res.find { |g| g.attribute_map['gene'] == 2744 } # MLH1 - assert_equal '16-19', gene_variant_genotype.attribute_map['exonintroncodonnumber'] - assert_equal 2, gene_variant_genotype.attribute_map['teststatus'] - assert_equal 5, gene_variant_genotype.attribute_map['variantpathclass'] - - res.each do |genotype| - next if genotype.attribute_map['gene'] == 2744 - - assert_equal 1, genotype.attribute_map['teststatus'] - end - end - - test 'normal_result_rec' do - normal_record = build_raw_record('pseudo_id1' => 'patient7') - normal_record.raw_fields['codingdnasequencechange'] = 'No deletions/duplications detected' - normal_record.raw_fields['report'] = 'MLPA analysis indicates that the potential PMS2 copy number variant identified by NGS is absent in this patient.' - normal_record.raw_fields['genotype'] = 'PMS2 - MLPA conf negative' - normal_record.raw_fields['moleculartestingtype'] = nil - - res = @handler.process_fields(normal_record) - assert_equal 1, res.size - - res.each do |genotype| - assert_equal 1, genotype.attribute_map['teststatus'] # normal - end - end - - test 'normal_report_result_rec' do - normal_record = build_raw_record('pseudo_id1' => 'patient7') - normal_record.raw_fields['report'] = 'This patient has been screened for MLH1, MSH2, MSH6 and PMS2 variants by sequence analysis. No pathogenic variant was identified.' - normal_record.raw_fields['genotype'] = 'Lynch Diag; normal' - normal_record.raw_fields['proteinimpact'] = nil - normal_record.raw_fields['gene'] = nil - normal_record.raw_fields['codingdnasequencechange'] = 'No result' - - res = @handler.process_fields(normal_record) - assert_equal 4, res.size - - res.each do |genotype| - assert_equal 1, genotype.attribute_map['teststatus'] # normal - end - end - - test 'first_of_report_variant_rec' do - first_record = build_raw_record('pseudo_id1' => 'patient8') - first_record.raw_fields['codingdnasequencechange'] = 'No result' - first_record.raw_fields['proteinimpact'] = nil - first_record.raw_fields['gene'] = nil - first_record.raw_fields['report'] = 'This patient has been screened for variants in the following cancer predisposing genes by sequence analysis:' \ - 'APC, BMPR1A, EPCAM*, GREM1*, MLH1, MSH2, MSH6, MUTYH, NTHL1, PMS2, POLD1, POLE, PTEN, RNF43, SMAD4, STK11.This patient is heterozygous for the ' \ - 'pathogenic NTHL1 variants c.268C>T p.(Gln90Ter)' - first_record.raw_fields['moleculartestingtype'] = 'R211' - - res = @handler.process_fields(first_record) - assert_equal 16, res.size - variant_genotype = res.find { |g| g.attribute_map['gene'] == 3108 } # NTHL1 - assert_not_nil variant_genotype - assert_equal 'c.268C>T', variant_genotype.attribute_map['codingdnasequencechange'] - assert_equal 'p.Gln90Ter', variant_genotype.attribute_map['proteinimpact'] - assert_equal 2, variant_genotype.attribute_map['teststatus'] - - res.each do |genotype| - next if genotype.attribute_map['gene'] == 3108 - - assert_equal 1, genotype.attribute_map['teststatus'] - assert_equal 'Full screen Colorectal Lynch or MMR', genotype.attribute_map['genetictestscope'] - end - end - - # targeted tests - test 'zygosity_variant_targ_rec' do - targeted_record = build_raw_record('pseudo_id1' => 'patient10') - targeted_record.raw_fields['moleculartestingtype'] = 'Familial' - targeted_record.raw_fields['zygosity'] = 'Heterozygous' - targeted_record.raw_fields['gene'] = 'MSH2' - targeted_record.raw_fields['variantpathclass'] = 'Pathogenic' - targeted_record.raw_fields['codingdnasequencechange'] = 'NM_000251.2:exon 11-16 deletion' - targeted_record.raw_fields['genotype'] = 'R242_pos_MLPA' - targeted_record.raw_fields['report'] = 'This individual is heterozygous for the germline familial pathogenic MSH2 copy number variant' - - res = @handler.process_fields(targeted_record) - assert_equal 1, res.size - - genotype = res[0] - assert_equal 2, genotype.attribute_map['teststatus'] - assert_equal 2804, genotype.attribute_map['gene'] # APC - assert_equal '11-16', genotype.attribute_map['exonintroncodonnumber'] - assert_equal 'NM_000251.2', genotype.attribute_map['referencetranscriptid'] - assert_equal 'Targeted Colorectal Lynch or MMR', genotype.attribute_map['genetictestscope'] - assert_equal 5, genotype.attribute_map['variantpathclass'] - assert_equal 4, genotype.attribute_map['geneticinheritance'] - end - - test 'process_targeted_mosaic_variant' do - targeted_record = build_raw_record('pseudo_id1' => 'patient11') - targeted_record.raw_fields['moleculartestingtype'] = 'Familial' - targeted_record.raw_fields['zygosity'] = 'Mosaic' - targeted_record.raw_fields['gene'] = 'PMS2' - targeted_record.raw_fields['codingdnasequencechange'] = 'NM_000535.5:Whole gene deletion' - targeted_record.raw_fields['genotype'] = 'R443_Confirmation_NGS_MLPA_PMS2' - targeted_record.raw_fields['report'] = 'This patient shows mosaic pattern for the familial MLH1 variant' - targeted_record.raw_fields['variantpathclass'] = 'Pathogenic' - - res = @handler.process_fields(targeted_record) - assert_equal 1, res.size - - genotype = res[0] - assert_equal 2, genotype.attribute_map['teststatus'] - assert_equal 3394, genotype.attribute_map['gene'] # PMS2 - assert_equal 'NM_000535.5', genotype.attribute_map['referencetranscriptid'] - assert_equal 6, genotype.attribute_map['geneticinheritance'] - assert_equal 5, genotype.attribute_map['variantpathclass'] - end - - test 'process_targeted_variant_absent' do - targeted_record = build_raw_record('pseudo_id1' => 'patient12') - targeted_record.raw_fields['moleculartestingtype'] = 'Familial' - targeted_record.raw_fields['codingdnasequencechange'] = 'NM_000251.2:Exon 1-7 deletion' - targeted_record.raw_fields['genotype'] = 'R242_neg_MLPA' - targeted_record.raw_fields['report'] = 'Dosage analysis has shown no evidence of the familial pathogenic MSH2 variant.' - targeted_record.raw_fields['proteinimpact'] = nil - targeted_record.raw_fields['zygosity'] = 'Variant NOT detected' - targeted_record.raw_fields['gene'] = 'MSH2' - - res = @handler.process_fields(targeted_record) - assert_equal 1, res.size - - genotype = res[0] - assert_equal 1, genotype.attribute_map['teststatus'] - assert_equal 2804, genotype.attribute_map['gene'] # MSH2 - end - - test 'process_targeted_no_result' do - targeted_record = build_raw_record('pseudo_id1' => 'patient13') - targeted_record.raw_fields['moleculartestingtype'] = 'Familial' - targeted_record.raw_fields['codingdnasequencechange'] = 'No result' - targeted_record.raw_fields['genotype'] = 'Fail/Results not required' - targeted_record.raw_fields['report'] = 'No results were obtained from this sample despite repeated attempts' - targeted_record.raw_fields['proteinimpact'] = nil - targeted_record.raw_fields['zygosity'] = nil - targeted_record.raw_fields['gene'] = nil - targeted_record.raw_fields['diagnosis_report'] = 'Germline heterozygous pathogenic variants in PTEN are associated with PTEN hamartoma tumour syndrome' - - res = @handler.process_fields(targeted_record) - assert_equal 1, res.size - - genotype = res[0] - assert_equal 9, genotype.attribute_map['teststatus'] - assert_equal 62, genotype.attribute_map['gene'] - end - - test 'process_targeted_no_biallelic' do - targeted_record = build_raw_record('pseudo_id1' => 'patient14') - targeted_record.raw_fields['moleculartestingtype'] = 'Familial' - targeted_record.raw_fields['codingdnasequencechange'] = 'No biallelic presence of familial variant' - targeted_record.raw_fields['genotype'] = 'PMS2 - Biallelic (CMMRD) pred negative' - targeted_record.raw_fields['report'] = 'Sequence analysis indicates no biallelic presence of the familial pathogenic PMS2 variant c.2404C>T in this patient.' - targeted_record.raw_fields['proteinimpact'] = nil - targeted_record.raw_fields['zygosity'] = nil - targeted_record.raw_fields['gene'] = nil - - res = @handler.process_fields(targeted_record) - assert_equal 1, res.size - - genotype = res[0] - assert_equal 4, genotype.attribute_map['teststatus'] - assert_equal 3394, genotype.attribute_map['gene'] # PMS2 - end - - test 'process_targeted_cdna_het_variant' do - targeted_record = build_raw_record('pseudo_id1' => 'patient15') - targeted_record.raw_fields['moleculartestingtype'] = 'Familial' - targeted_record.raw_fields['codingdnasequencechange'] = 'MSH2 Exon 11-12 duplication heterozygote' - targeted_record.raw_fields['genotype'] = 'Lynch Pred MLPA +ve' - targeted_record.raw_fields['report'] = 'MLPA analysis indicates that this patient is heterozygous for the familial likely pathogenic MSH2 duplication of exons 11-12' - targeted_record.raw_fields['proteinimpact'] = nil - targeted_record.raw_fields['zygosity'] = nil - targeted_record.raw_fields['gene'] = nil - - res = @handler.process_fields(targeted_record) - assert_equal 1, res.size - - genotype = res[0] - assert_equal 2, genotype.attribute_map['teststatus'] - assert_equal 2804, genotype.attribute_map['gene'] # MSH2 - assert_equal '11-12', genotype.attribute_map['exonintroncodonnumber'] - assert_equal 4, genotype.attribute_map['sequencevarianttype'] - end - - test 'process_result_variant_absent_variant' do - targeted_record = build_raw_record('pseudo_id1' => 'patient15') - targeted_record.raw_fields['moleculartestingtype'] = 'Familial' - targeted_record.raw_fields['codingdnasequencechange'] = 'familial variant absent' - targeted_record.raw_fields['genotype'] = 'Lynch Pred MLPA -ve' - targeted_record.raw_fields['report'] = 'MLPA analysis indicates that the familial pathogenic MLH1 exon 16-19 deletion is absent in this patient.' - targeted_record.raw_fields['proteinimpact'] = nil - targeted_record.raw_fields['zygosity'] = nil - targeted_record.raw_fields['gene'] = nil - - res = @handler.process_fields(targeted_record) - assert_equal 1, res.size - - genotype = res[0] - assert_equal 1, genotype.attribute_map['teststatus'] - assert_equal 2744, genotype.attribute_map['gene'] # MLH1 - end - - # Tests for should_process? method - test 'should_process_other_cancer_file_familial' do - @handler.instance_variable_set(:@batch, stub(original_filename: 'Other_Cancer_file.txt')) - - record = build_raw_record('pseudo_id1' => 'patient16') - record.raw_fields['moleculartestingtype'] = 'Familial' - record.raw_fields['report'] = 'Testing for MLH1 variants' - - assert @handler.send(:should_process?, record) - end - - test 'should_not_process_other_cancer_file_non_familial' do - @handler.instance_variable_set(:@batch, stub(original_filename: 'Other_Cancer_file.txt')) - - record = build_raw_record('pseudo_id1' => 'patient17') - record.raw_fields['moleculartestingtype'] = 'Predictive' - record.raw_fields['report'] = 'Testing for MLH1 variants' - - refute @handler.send(:should_process?, record) - end - - test 'should_not_process_ataxia_record' do - @handler.instance_variable_set(:@batch, stub(original_filename: 'Other_Cancer_file.txt')) - - record = build_raw_record('pseudo_id1' => 'patient18') - record.raw_fields['moleculartestingtype'] = 'Familial' - record.raw_fields['diagnosis_report'] = 'Testing for ataxia related genes' - record.raw_fields['report'] = 'MLH1 testing' - - refute @handler.send(:should_process?, record) - end - - test 'should_not_process_brca_record' do - @handler.instance_variable_set(:@batch, stub(original_filename: 'Other_Cancer_file.txt')) - - record = build_raw_record('pseudo_id1' => 'patient19') - record.raw_fields['moleculartestingtype'] = 'Familial' - record.raw_fields['codingdnasequencechange'] = 'BRCA1 c.181T>G' - record.raw_fields['report'] = 'MLH1 testing' - - refute @handler.send(:should_process?, record) - end - - private - - def clinical_json - { sex: '2', - consultantcode: 'Consultant Code', - providercode: 'Provider Code', - receiveddate: '2010-08-05T00:00:00.000+01:00', - authoriseddate: '2010-09-17T00:00:00.000+01:00', - servicereportidentifier: 'Service Report Identifier', - sortdate: '2010-08-05T00:00:00.000+01:00', - genetictestscope: 'R210.2', - specimentype: '5', - report: 'MLPA analysis indicates that the potential PMS2 copy number variant identified by NGS is absent in this patient.', - requesteddate: '2010-08-05T00:00:00.000+01:00', - age: 37 }.to_json - end - - def rawtext_clinical_json - { sex: 'M', - providercode: 'Provider Code', - referringclinicianname: 'Clinician Name', - consultantcode: 'Consultant Code', - servicereportidentifier: 'Service Report Identifier', - patienttype: 'NHS', - moleculartestingtype: 'R210.5', - indicationcategory: 'R210', - genotype: 'PMS2 - MLPA conf negative', - report: 'MLPA analysis indicates that the potential PMS2 copy number variant identified by NGS is absent in this patient', - diagnosis_report: 'Heterozygous mutations in PMS2 are linked to Lynch Syndrome with dominant inheritance. ' \ - 'Homozygous/compound heterozygous mutations in PMS2 are linked to mismatch repair cancer syndrome.', - receiveddate: '2010-08-05 00:00:00', - karyotypingmethod: 'MLPA P008', - codingdnasequencechange: 'No deletions/duplications detected', - proteinimpact: nil, - gene: nil, - zygosity: nil, - variantpathclass: nil, - requesteddate: '2010-08-05 00:00:00', - authoriseddate: '2010-09-17 00:00:00', - specimentype: 'Blood' }.to_json - end -end From 1194cc6aa3c561e8c7735ae9ec229252f4aedd94 Mon Sep 17 00:00:00 2001 From: shilpigoel Date: Fri, 5 Dec 2025 18:00:00 +0000 Subject: [PATCH 17/18] Alignment fixed --- .../providers/leeds/leeds_handler_colorectal_v2.rb | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/import/colorectal/providers/leeds/leeds_handler_colorectal_v2.rb b/lib/import/colorectal/providers/leeds/leeds_handler_colorectal_v2.rb index a120226f..44c1b794 100644 --- a/lib/import/colorectal/providers/leeds/leeds_handler_colorectal_v2.rb +++ b/lib/import/colorectal/providers/leeds/leeds_handler_colorectal_v2.rb @@ -29,7 +29,8 @@ def process_fields(record) add_targ_moleculartestingtype(genocolorectal) res = process_targ_rec(genocolorectal, record, genotypes) end - # correcting ebatch provider and registry to RR8 (from RR8_V2_POST2025) to allow data to persist in the database + # correcting ebatch provider and registry to RR8 (from RR8_V2_POST2025) to allow + # data to persist in the database @batch.provider = 'RR8' @batch.registryid = 'RR8' res.each { |cur_genotype| @persister.integrate_and_store(cur_genotype) } @@ -76,7 +77,7 @@ def initialize_processing_variables @variantpathclass = nil @cdna_mutations = nil @exonic_mutations = nil - @zygosity = nil + @zygosity = nil end def process_test_scope(genocolorectal) From 4a2657d39c60e33a32d9e132f05dcb6d0368e84d Mon Sep 17 00:00:00 2001 From: shilpigoel Date: Mon, 8 Dec 2025 16:18:37 +0000 Subject: [PATCH 18/18] Fix broken tests --- lib/import/database_wrappers/genetic_sequence_variant.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/import/database_wrappers/genetic_sequence_variant.rb b/lib/import/database_wrappers/genetic_sequence_variant.rb index 546f656c..53bca2b7 100644 --- a/lib/import/database_wrappers/genetic_sequence_variant.rb +++ b/lib/import/database_wrappers/genetic_sequence_variant.rb @@ -26,9 +26,9 @@ def initialize(genotype) # Should not produce a variant record unless there actually is a variant def produce_record - # if (@field_names - ['variantpathclass']).all? - # {|x| @representative_genotype.attribute_map[x].nil?} - if @field_names.all? { |x| @representative_genotype.attribute_map[x].nil? } + # Only create a sequence variant if there are meaningful variant fields beyond just genotype + meaningful_fields = @field_names - ['variantgenotype'] + if meaningful_fields.all? { |x| @representative_genotype.attribute_map[x].nil? } nil else super()