From dd0886640226cdbe785ba4e6ae6a643131b344b0 Mon Sep 17 00:00:00 2001 From: shilpigoel Date: Thu, 11 Sep 2025 17:53:26 +0100 Subject: [PATCH 01/14] Initial commit to handle new Leeds BRCA file --- lib/import/brca/core/brca_handler_mapping.rb | 3 +- lib/import/brca/core/genotype_brca.rb | 4 +- .../leeds/leeds_handler_new_format.rb | 213 ++++++++++++++++++ .../bash/Import_all_brca_interactive.sh | 23 +- .../genetic_sequence_variant.rb | 1 - .../helpers/brca/providers/rr8/constants.rb | 7 + 6 files changed, 244 insertions(+), 7 deletions(-) create mode 100644 lib/import/brca/providers/leeds/leeds_handler_new_format.rb diff --git a/lib/import/brca/core/brca_handler_mapping.rb b/lib/import/brca/core/brca_handler_mapping.rb index 583d87b4..f6e19321 100644 --- a/lib/import/brca/core/brca_handler_mapping.rb +++ b/lib/import/brca/core/brca_handler_mapping.rb @@ -6,7 +6,8 @@ module Core # Provides the handler appropriate for the dataformat from each center class BrcaHandlerMapping HANDLER_MAPPING = { - 'RR8' => Import::Brca::Providers::Leeds::LeedsHandlerNew, + 'RR8' => Import::Brca::Providers::Leeds::LeedsHandlerNewFormat, + 'RR8_2' => Import::Brca::Providers::Leeds::LeedsHandlerNew, 'RNZ' => Import::Brca::Providers::Salisbury::SalisburyHandler, 'RVJ' => Import::Brca::Providers::Bristol::BristolHandler, 'RTD' => Import::Brca::Providers::Newcastle::NewcastleHandler, diff --git a/lib/import/brca/core/genotype_brca.rb b/lib/import/brca/core/genotype_brca.rb index 0235fd57..8506020c 100644 --- a/lib/import/brca/core/genotype_brca.rb +++ b/lib/import/brca/core/genotype_brca.rb @@ -46,6 +46,7 @@ class GenotypeBrca < Import::Germline::Genotype 'NTHL1' => 3108, 'POLD1' => 3408, 'POLE' => 5000, + 'POT1' => 5001, 'SDHB' => 68, 'VHL' => 83 }.freeze @@ -86,6 +87,7 @@ class GenotypeBrca < Import::Germline::Genotype (?NTHL1)| (?POLD1)| (?POLE)| + (?POT1)| (?SDHB)| (?VHL)/ix # Added by Francesco @@ -114,7 +116,7 @@ def add_gene(brca_input) def process_integer_imput(brca_input) if [7, 8, 72, 79, 451, 865, 3186, 2744, 1432, 2804, 2808, 3394, 62, 76, - 590, 2912, 3615, 3616, 2850, 54, 55, 74, 4952, 18, 20, 794].include? brca_input + 590, 2912, 3615, 3616, 2850, 54, 55, 74, 4952, 18, 20, 794, 5001].include? brca_input @attribute_map['gene'] = brca_input @logger.debug "SUCCESSFUL gene parse for #{brca_input}" elsif (1..2).cover? brca_input diff --git a/lib/import/brca/providers/leeds/leeds_handler_new_format.rb b/lib/import/brca/providers/leeds/leeds_handler_new_format.rb new file mode 100644 index 00000000..7ade9523 --- /dev/null +++ b/lib/import/brca/providers/leeds/leeds_handler_new_format.rb @@ -0,0 +1,213 @@ +module Import + module Brca + module Providers + module Leeds + # Process Leeds-specific record details into generalized internal genotype format for > 2025 files + class LeedsHandlerNewFormat < Import::Germline::ProviderHandler + include Import::Helpers::Brca::Providers::Rr8::Constants + + def process_fields(record) + genotype = Import::Brca::Core::GenotypeBrca.new(record) + genotype.add_passthrough_fields(record.mapped_fields, record.raw_fields, + PASS_THROUGH_FIELDS) + genotype.attribute_map['organisationcode_testresult'] = '699C0' + genotype.add_test_scope(:full_screen) + add_moleculartestingtype(genotype, record) + @genes_panel = get_genes_panel(record) + genotypes = [] + @pos_gene = nil + @variantpathclass = nil + @cdna_mutations = nil + @exonic_mutations = nil + @zygosity = nil + res = process_rec(genotype, record, genotypes) + res.each { |cur_genotype| @persister.integrate_and_store(cur_genotype) } + end + + def add_moleculartestingtype(genotype, record) + indication_catgeory = record.raw_fields['indicationcategory'] + return unless %w[R207 R444].include? indication_catgeory + + genotype.add_molecular_testing_type_strict(:diagnostic) + end + + def get_genes_panel(record) + genes = [] + genes << record.raw_fields['diagnosis_report']&.scan(NEW_FORMAT_GENES) + genes << record.raw_fields['report']&.scan(NEW_FORMAT_GENES) + genes << record.raw_fields['moleculartestingtype']&.scan(NEW_FORMAT_GENES) + genes = genes.compact_blank + if genes.empty? && record.raw_fields['moleculartestingtype']&.scan(/R208.1/i)&.size&.positive? + genes << %w[ATM BRCA1 BRCA2 CHEK2 PALB2] + end + genes.flatten.uniq + end + + def process_rec(genotype, record, genotypes) + geno = record.raw_fields['genotype'] + + value2 = record.raw_fields['gene'] + value1 = record.raw_fields['proteinimpact'] + value12 = record.raw_fields['zygosity'] + result = record.raw_fields['codingdnasequencechange'] + + # derived values + if value12 =~ /het/i + @zygosity = 1 + elsif value12 =~ /homo/i + @zygosity = 2 + end + @variantpathclass = cal_variantpathclass(record) + @cdna_mutations = result&.match(CDNA) || value1&.match(CDNA) + @exonic_mutations = result&.match(EXON_VARIANT_REGEX) + @protein_impact = value1&.match(PROTEIN_REGEX) || result&.match(PROTEIN_REGEX) + @refid = result&.match(REF_TRANSCRIPT_ID) + # priority1 + if geno =~ /fail/i && geno !~ /dosage/i + process_status_genes(9, @genes_panel, genotype, genotypes) + elsif value2.nil? && value1 =~ CDNA_REGEX # priority 2 + gene = [] + gene << 'PALB2' if value1 =~ /ALB2/ + gene << value1&.scan(NEW_FORMAT_GENES) + @pos_gene = gene.flatten.uniq + if value1 =~ /C(1|2)/ + teststatus = 10 + elsif value1 =~ /C3/ + teststatus = 2 + elsif value1 !~ /C(1|2|3)/ + teststatus = 2 + end + + process_variant_rec(genotype, teststatus, record, genotypes) if @pos_gene.present? + negative_genes = @genes_panel - @pos_gene + process_status_genes(1, negative_genes, genotype, genotypes) + elsif result =~ /No.*detected/i || result =~ /No result - dosage fail'/i || result == '-' # priority 3 + negative_genes = @genes_panel - [@pos_gene] + process_status_genes(1, negative_genes, genotype, genotypes) + end + + return genotypes if @pos_gene.present? + + @pos_gene = value2&.scan(NEW_FORMAT_GENES)&.flatten&.uniq # priority 4 + if @pos_gene.present? + process_variant_rec(genotype, 2, record, genotypes) + negative_genes = @genes_panel - @pos_gene + process_status_genes(1, negative_genes, genotype, genotypes) + return genotypes + end + + if @pos_gene.blank? # priority 5 + @pos_gene = result&.scan(NEW_FORMAT_GENES)&.flatten&.uniq + if @pos_gene.present? + process_variant_rec(genotype, 2, record, genotypes) + negative_genes = @genes_panel - @pos_gene + process_status_genes(1, negative_genes, genotype, genotypes) + return genotypes + end + end + + # priority 6 + if geno =~ /normal/i + process_status_genes(1, @genes_panel, genotype, genotypes) + return genotypes + end + + # priority 7 + first_of_report = record.raw_fields['report'] + return genotypes unless first_of_report =~ /.*heterozygous\s+for.*pathogenic\s*#{NEW_FORMAT_GENES}/i + + @pos_gene = $LAST_MATCH_INFO[:gene] + if @pos_gene.present? + process_variant_rec(genotype, 2, record, genotypes) + negative_genes = @genes_panel - [@pos_gene] + process_status_genes(1, negative_genes, genotype, genotypes) + end + genotypes + end + + def process_variant_rec(genotype, status, record, genotypes) + genotype_dup = genotype.dup + add_geneticinheritance(genotype_dup, record) + genotype_dup.add_gene(@pos_gene[0]) + genotype_dup.add_zygosity(@zygosity) + process_cdna_variant(genotype_dup, @cdna_mutations) if @cdna_mutations.present? + process_protein_impact(genotype_dup, @protein_impact) if @protein_impact.present? + process_exonic_variant(genotype_dup, @exonic_mutations) if @exonic_mutations.present? + genotype_dup.add_referencetranscriptid(@refid.to_s) if @refid.present? + genotype_dup.add_variant_class(@variantpathclass) + genotype_dup.add_status(status) + genotypes << genotype_dup + end + + def add_geneticinheritance(genotype, record) + value12 = record.raw_fields['zygosity'] + value1 = record.raw_fields['proteinimpact'] + genotype.attribute_map['geneticinheritance'] = value12 =~ /mosaic/i || value1 =~ /VAF/ ? 6 : 4 + end + + def cal_variantpathclass(record) + variantpathclass = record.raw_fields['variantpathclass'] + varclass = case variantpathclass + when /Pathogenic/i + 5 + when /Likely pathogenic/i + 4 + when /Uncertain significance/i + 3 + end + + if varclass.nil? + value1 = record.raw_fields['proteinimpact'] + varclass = case value1 + when 'C1' + 1 + when 'C2' + 2 + when 'C3' + 3 + when '(cold C3)' + 8 + when '(hot C3)' + 9 + end + end + varclass + end + + def process_status_genes(status, negative_genes, genotype, genotypes) + negative_genes&.each do |gene| + genotype_dup = genotype.dup + genotype_dup.add_gene(gene) + genotype_dup.add_status(status) + genotypes << genotype_dup + end + end + + def process_exonic_variant(genotype, mutation) + return if mutation[:exons].blank? + + genotype.add_exon_location(mutation[:exons]) + genotype.add_variant_type(mutation[:variant]) + @logger.debug "SUCCESSFUL exon variant parse for: #{mutation}" + end + + def process_cdna_variant(genotype, mutation) + return if mutation[:cdna].blank? + + genotype.add_gene_location(mutation[:cdna]) + @logger.debug "SUCCESSFUL cdna change parse for: #{mutation}" + end + + def process_protein_impact(genotype, mutation) + if mutation[:impact].present? + genotype.add_protein_impact(mutation[:impact]) + @logger.debug "SUCCESSFUL protein parse for: #{mutation[:impact]}" + else + @logger.debug "FAILED protein parse for: #{mutation}" + end + end + end + end + end + end +end diff --git a/lib/import/brca/scripts/bash/Import_all_brca_interactive.sh b/lib/import/brca/scripts/bash/Import_all_brca_interactive.sh index ccc194cf..0d9a9b28 100755 --- a/lib/import/brca/scripts/bash/Import_all_brca_interactive.sh +++ b/lib/import/brca/scripts/bash/Import_all_brca_interactive.sh @@ -15,7 +15,7 @@ OIFS="$IFS" IFS=$'\n' DIRPATH=$1 echo $DIRPATH -#DIRPATH=~/work/data_management_system +#DIRPATH=~/work/new_dms/data_management_system #FILEPATH="private/pseudonymised_data/updated_files/" FILEPATH=$2 echo $FILEPATH @@ -53,14 +53,30 @@ RR8 () { MBIS=$1 PROV='RR8' IFS=$'\n' +for x in $(find $DIRPATH/$FILEPATH -type f -name "*BRCA*.pseudo" -path "*/$PROV/*" \ +\( -path "*/202[5-9]/*" -o -path "*/203[0-9]/*" \) \ +! -name "bede6d1385c0ae9db4fe61fe9b07d58f86e2dc60_24.11.2021 to 31.03.2025_BRCA_DATA__2021_11_24__to__2025_03_31_b.xlsx.pseudo") +do +IFS="$OIFS" +$BRAKE import:brca fname="$(echo "$x" | sed -e 's:.*pseudonymised_data/\(.*\):\1:')" prov_code=$PROV +done +} + +# To handle pseudo files before 2025 +RR8_2 () { +MBIS=$1 +PROV='RR8' +PROV_OLD_FILE='RR8_2' +IFS=$'\n' for x in $(find $DIRPATH/$FILEPATH -type f -name "*.pseudo" -path "*/$PROV/*" \ -not -path "*/2017-03-17/*" \ +-not -path "*/2025/*" \ ! -name "3a4d3dc703789864fa6d2b8f5d9fe60749205979_01.01.2013 to 30.09.2018_010113_300918.xlsx.pseudo" \ ! -name "*MMR*" \ ! -name "*Colorectal*") do IFS="$OIFS" -$BRAKE import:brca fname="$(echo "$x" | sed -e 's:.*pseudonymised_data/\(.*\):\1:')" prov_code=$PROV +$BRAKE import:brca fname="$(echo "$x" | sed -e 's:.*pseudonymised_data/\(.*\):\1:')" prov_code=$PROV_OLD_FILE done } @@ -78,7 +94,6 @@ $BRAKE import:brca fname="$(echo "$x" | sed -e 's:.*pseudonymised_data/\(.*\):\1 done } - RX1 () { MBIS=$1 PROV='RX1' @@ -256,6 +271,6 @@ done } -RTD; RQ3; RR8; RNZ; RVJ; RX1; RCU; RJ1; RGT; RPY; R0A; RJ7; RJ7_2 ; RTH; R1K; RP4; REP +RTD; RQ3; RR8; RR8_2; RNZ; RVJ; RX1; RCU; RJ1; RGT; RPY; R0A; RJ7; RJ7_2 ; RTH; R1K; RP4; REP diff --git a/lib/import/database_wrappers/genetic_sequence_variant.rb b/lib/import/database_wrappers/genetic_sequence_variant.rb index 50cc664e..635f15f9 100644 --- a/lib/import/database_wrappers/genetic_sequence_variant.rb +++ b/lib/import/database_wrappers/genetic_sequence_variant.rb @@ -15,7 +15,6 @@ def initialize(genotype) %w[geneticsequencevariantid genetic_test_result_id humangenomebuild - referencetranscriptid genomicchange clinvarid cosmicid diff --git a/lib/import/helpers/brca/providers/rr8/constants.rb b/lib/import/helpers/brca/providers/rr8/constants.rb index 2e167071..707cf1a3 100644 --- a/lib/import/helpers/brca/providers/rr8/constants.rb +++ b/lib/import/helpers/brca/providers/rr8/constants.rb @@ -123,6 +123,9 @@ module Constants EPCAM|FH|FLCN|GREM1|MET|MLH1|MSH2|MSH6|MUTYH|NTHL1|PALB2| PMS2|POLD1|POLE|PTEN|RAD51C|RAD51D|SDHB|SMAD4|STK11|TP53|VHL'.freeze + NEW_FORMAT_GENES = /(?ATM|BRCA1|BRCA2|BRIP1|CHEK2|MLH1|MSH2|MSH6|PALB2|POT1|PTEN| + RAD51C|RAD51D|TP53)/x + # rubocop:disable Lint/MixedRegexpCaptureTypes BRCA_REGEX = /(?#{GENES})/ix @@ -134,9 +137,11 @@ module Constants HETEROZYGOUS_GENE_REGEX = /heterozygous[\w\s]+(?#{GENES})[\w\s]+/ix CDNA_REGEX = /c\.(?[\w+>*\-]+)?[\w\s.]+/ix + CDNA = /c\.(?[\w.+>*\-]+)/ix TARG_GENE_REGEX = /(?#{GENES})[\w\s]+(c\.(?[\w+>*\-]+)?[\w\s.]+|exon)/ix + GENE_CDNA_PROTEIN = /(?#{GENES})\s+c\.(?[\w.+>*\-]+)\s+p\.?\(?(?\w+)\)?/ix PROTEIN_REGEX = /\(?p\.\(?(?\w+)\)?/ix EXON_VARIANT_REGEX = /(?del|dup|ins).+ex(on)?s?\s? @@ -149,6 +154,8 @@ module Constants ex(on)?s?\s?(?[0-9]+(\sto\s[0-9]+)?)\s (?del|dup|ins)| x(?[0-9+-? ]+)+(?del|dup|ins)/ix + + REF_TRANSCRIPT_ID = /NM_\d{6}\.\d(?=:)/ix # rubocop:enable Lint/MixedRegexpCaptureTypes end end From aae211e31a3d0c48ecb856d43812981fae4b1af3 Mon Sep 17 00:00:00 2001 From: shilpigoel Date: Mon, 6 Oct 2025 13:54:33 +0100 Subject: [PATCH 02/14] fixed QA counts --- .../providers/leeds/leeds_handler_new_format.rb | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/lib/import/brca/providers/leeds/leeds_handler_new_format.rb b/lib/import/brca/providers/leeds/leeds_handler_new_format.rb index 7ade9523..5fda04fb 100644 --- a/lib/import/brca/providers/leeds/leeds_handler_new_format.rb +++ b/lib/import/brca/providers/leeds/leeds_handler_new_format.rb @@ -37,10 +37,20 @@ def get_genes_panel(record) genes << record.raw_fields['report']&.scan(NEW_FORMAT_GENES) genes << record.raw_fields['moleculartestingtype']&.scan(NEW_FORMAT_GENES) genes = genes.compact_blank + if genes.empty? && record.raw_fields['moleculartestingtype']&.scan(/R208.1/i)&.size&.positive? genes << %w[ATM BRCA1 BRCA2 CHEK2 PALB2] end - genes.flatten.uniq + + genes.flatten.uniq - exclude_genes(record) + end + + def exclude_genes(record) + exclude_genes = [] + exclude_genes << record.raw_fields['report']&.scan(/#{NEW_FORMAT_GENES}\sanalysis\shas\snot\sbeen\sperformed/ix) + exclude_genes << record.raw_fields['report']&.scan(/#{NEW_FORMAT_GENES}\stesting\shas\sbeen\sreported\spreviously/ix) + exclude_genes << record.raw_fields['report']&.scan(/#{NEW_FORMAT_GENES}[a-zA-Z0-9\s]+Li\sFraumeni\ssyndrome/ix) + exclude_genes.flatten.uniq end def process_rec(genotype, record, genotypes) @@ -72,9 +82,7 @@ def process_rec(genotype, record, genotypes) @pos_gene = gene.flatten.uniq if value1 =~ /C(1|2)/ teststatus = 10 - elsif value1 =~ /C3/ - teststatus = 2 - elsif value1 !~ /C(1|2|3)/ + else teststatus = 2 end @@ -84,6 +92,7 @@ def process_rec(genotype, record, genotypes) elsif result =~ /No.*detected/i || result =~ /No result - dosage fail'/i || result == '-' # priority 3 negative_genes = @genes_panel - [@pos_gene] process_status_genes(1, negative_genes, genotype, genotypes) + return genotypes end return genotypes if @pos_gene.present? From 57ab8e106620346eb165e4cc55bc950f5a93cc15 Mon Sep 17 00:00:00 2001 From: shilpigoel Date: Mon, 6 Oct 2025 15:33:22 +0100 Subject: [PATCH 03/14] rubocop fixes --- .../leeds/leeds_handler_new_format.rb | 54 ++++++++++--------- 1 file changed, 29 insertions(+), 25 deletions(-) diff --git a/lib/import/brca/providers/leeds/leeds_handler_new_format.rb b/lib/import/brca/providers/leeds/leeds_handler_new_format.rb index 5fda04fb..c6aeda43 100644 --- a/lib/import/brca/providers/leeds/leeds_handler_new_format.rb +++ b/lib/import/brca/providers/leeds/leeds_handler_new_format.rb @@ -13,15 +13,21 @@ def process_fields(record) genotype.attribute_map['organisationcode_testresult'] = '699C0' genotype.add_test_scope(:full_screen) add_moleculartestingtype(genotype, record) - @genes_panel = get_genes_panel(record) + populate_variables(record) genotypes = [] + @genes_panel = get_genes_panel(record) + res = process_rec(genotype, record, genotypes) + res.each { |cur_genotype| @persister.integrate_and_store(cur_genotype) } + end + + def populate_variables(record) + @report = record.raw_fields['report'] + @moltestingtype = record.raw_fields['moleculartestingtype'] @pos_gene = nil @variantpathclass = nil @cdna_mutations = nil @exonic_mutations = nil @zygosity = nil - res = process_rec(genotype, record, genotypes) - res.each { |cur_genotype| @persister.integrate_and_store(cur_genotype) } end def add_moleculartestingtype(genotype, record) @@ -34,22 +40,21 @@ def add_moleculartestingtype(genotype, record) def get_genes_panel(record) genes = [] genes << record.raw_fields['diagnosis_report']&.scan(NEW_FORMAT_GENES) - genes << record.raw_fields['report']&.scan(NEW_FORMAT_GENES) - genes << record.raw_fields['moleculartestingtype']&.scan(NEW_FORMAT_GENES) + genes << @report&.scan(NEW_FORMAT_GENES) + genes << @moltestingtype&.scan(NEW_FORMAT_GENES) genes = genes.compact_blank - if genes.empty? && record.raw_fields['moleculartestingtype']&.scan(/R208.1/i)&.size&.positive? - genes << %w[ATM BRCA1 BRCA2 CHEK2 PALB2] - end + r208_matches = @moltestingtype&.scan(/R208.1/i) + genes << %w[ATM BRCA1 BRCA2 CHEK2 PALB2] if genes.empty? && r208_matches&.size&.positive? - genes.flatten.uniq - exclude_genes(record) + genes.flatten.uniq - exclude_genes end - def exclude_genes(record) + def exclude_genes exclude_genes = [] - exclude_genes << record.raw_fields['report']&.scan(/#{NEW_FORMAT_GENES}\sanalysis\shas\snot\sbeen\sperformed/ix) - exclude_genes << record.raw_fields['report']&.scan(/#{NEW_FORMAT_GENES}\stesting\shas\sbeen\sreported\spreviously/ix) - exclude_genes << record.raw_fields['report']&.scan(/#{NEW_FORMAT_GENES}[a-zA-Z0-9\s]+Li\sFraumeni\ssyndrome/ix) + exclude_genes << @report&.scan(/#{NEW_FORMAT_GENES}\sanalysis\shas\snot\sbeen\sperformed/ix) + exclude_genes << @report&.scan(/#{NEW_FORMAT_GENES}\stesting\shas\sbeen\sreported\spreviously/ix) + exclude_genes << @report&.scan(/#{NEW_FORMAT_GENES}[a-zA-Z0-9\s]+Li\sFraumeni\ssyndrome/ix) exclude_genes.flatten.uniq end @@ -80,11 +85,11 @@ def process_rec(genotype, record, genotypes) gene << 'PALB2' if value1 =~ /ALB2/ gene << value1&.scan(NEW_FORMAT_GENES) @pos_gene = gene.flatten.uniq - if value1 =~ /C(1|2)/ - teststatus = 10 - else - teststatus = 2 - end + teststatus = if value1 =~ /C(1|2)/ + 10 + else + 2 + end process_variant_rec(genotype, teststatus, record, genotypes) if @pos_gene.present? negative_genes = @genes_panel - @pos_gene @@ -122,8 +127,7 @@ def process_rec(genotype, record, genotypes) end # priority 7 - first_of_report = record.raw_fields['report'] - return genotypes unless first_of_report =~ /.*heterozygous\s+for.*pathogenic\s*#{NEW_FORMAT_GENES}/i + return genotypes unless @report =~ /.*heterozygous\s+for.*pathogenic\s*#{NEW_FORMAT_GENES}/i @pos_gene = $LAST_MATCH_INFO[:gene] if @pos_gene.present? @@ -168,15 +172,15 @@ def cal_variantpathclass(record) if varclass.nil? value1 = record.raw_fields['proteinimpact'] varclass = case value1 - when 'C1' + when /C1/ 1 - when 'C2' + when /C2/ 2 - when 'C3' + when /C3/ 3 - when '(cold C3)' + when /\(cold\sC3\)/i 8 - when '(hot C3)' + when /\(hot\sC3\)/i 9 end end From 4ee4e945395b5db5106a6a5c28214de07fa972cd Mon Sep 17 00:00:00 2001 From: shilpigoel Date: Mon, 6 Oct 2025 15:57:46 +0100 Subject: [PATCH 04/14] more rubocop fixes --- .../leeds/leeds_handler_new_format.rb | 89 ++++++++++--------- 1 file changed, 48 insertions(+), 41 deletions(-) diff --git a/lib/import/brca/providers/leeds/leeds_handler_new_format.rb b/lib/import/brca/providers/leeds/leeds_handler_new_format.rb index c6aeda43..986d1e8d 100644 --- a/lib/import/brca/providers/leeds/leeds_handler_new_format.rb +++ b/lib/import/brca/providers/leeds/leeds_handler_new_format.rb @@ -23,6 +23,8 @@ def process_fields(record) def populate_variables(record) @report = record.raw_fields['report'] @moltestingtype = record.raw_fields['moleculartestingtype'] + @value1 = record.raw_fields['proteinimpact'] + @value12 = record.raw_fields['zygosity'] @pos_gene = nil @variantpathclass = nil @cdna_mutations = nil @@ -62,30 +64,28 @@ def process_rec(genotype, record, genotypes) geno = record.raw_fields['genotype'] value2 = record.raw_fields['gene'] - value1 = record.raw_fields['proteinimpact'] - value12 = record.raw_fields['zygosity'] result = record.raw_fields['codingdnasequencechange'] # derived values - if value12 =~ /het/i + if @value12 =~ /het/i @zygosity = 1 - elsif value12 =~ /homo/i + elsif @value12 =~ /homo/i @zygosity = 2 end @variantpathclass = cal_variantpathclass(record) - @cdna_mutations = result&.match(CDNA) || value1&.match(CDNA) + @cdna_mutations = result&.match(CDNA) || @value1&.match(CDNA) @exonic_mutations = result&.match(EXON_VARIANT_REGEX) - @protein_impact = value1&.match(PROTEIN_REGEX) || result&.match(PROTEIN_REGEX) + @protein_impact = @value1&.match(PROTEIN_REGEX) || result&.match(PROTEIN_REGEX) @refid = result&.match(REF_TRANSCRIPT_ID) # priority1 if geno =~ /fail/i && geno !~ /dosage/i process_status_genes(9, @genes_panel, genotype, genotypes) - elsif value2.nil? && value1 =~ CDNA_REGEX # priority 2 + elsif value2.nil? && @value1 =~ CDNA_REGEX # priority 2 gene = [] - gene << 'PALB2' if value1 =~ /ALB2/ - gene << value1&.scan(NEW_FORMAT_GENES) + gene << 'PALB2' if @value1 =~ /ALB2/ + gene << @value1&.scan(NEW_FORMAT_GENES) @pos_gene = gene.flatten.uniq - teststatus = if value1 =~ /C(1|2)/ + teststatus = if @value1 =~ /C(1|2)/ 10 else 2 @@ -102,7 +102,9 @@ def process_rec(genotype, record, genotypes) return genotypes if @pos_gene.present? - @pos_gene = value2&.scan(NEW_FORMAT_GENES)&.flatten&.uniq # priority 4 + # priority 4 + scanned_genes = value2&.scan(NEW_FORMAT_GENES) + @pos_gene = scanned_genes&.flatten&.uniq if @pos_gene.present? process_variant_rec(genotype, 2, record, genotypes) negative_genes = @genes_panel - @pos_gene @@ -110,8 +112,10 @@ def process_rec(genotype, record, genotypes) return genotypes end - if @pos_gene.blank? # priority 5 - @pos_gene = result&.scan(NEW_FORMAT_GENES)&.flatten&.uniq + # priority 5 + if @pos_gene.blank? + scanned_result = result&.scan(NEW_FORMAT_GENES) + @pos_gene = scanned_result&.flatten&.uniq if @pos_gene.present? process_variant_rec(genotype, 2, record, genotypes) negative_genes = @genes_panel - @pos_gene @@ -152,39 +156,42 @@ def process_variant_rec(genotype, status, record, genotypes) genotypes << genotype_dup end - def add_geneticinheritance(genotype, record) - value12 = record.raw_fields['zygosity'] - value1 = record.raw_fields['proteinimpact'] - genotype.attribute_map['geneticinheritance'] = value12 =~ /mosaic/i || value1 =~ /VAF/ ? 6 : 4 + def add_geneticinheritance(genotype, _record) + genotype.attribute_map['geneticinheritance'] = @value12 =~ /mosaic/i || @value1 =~ /VAF/ ? 6 : 4 end def cal_variantpathclass(record) variantpathclass = record.raw_fields['variantpathclass'] - varclass = case variantpathclass - when /Pathogenic/i - 5 - when /Likely pathogenic/i - 4 - when /Uncertain significance/i - 3 - end - - if varclass.nil? - value1 = record.raw_fields['proteinimpact'] - varclass = case value1 - when /C1/ - 1 - when /C2/ - 2 - when /C3/ - 3 - when /\(cold\sC3\)/i - 8 - when /\(hot\sC3\)/i - 9 - end + varclass = classify_variant_pathogenicity(variantpathclass) + varclass || classify_protein_impact + end + + private + + def classify_variant_pathogenicity(variantpathclass) + case variantpathclass + when /Pathogenic/i + 5 + when /Likely\spathogenic/i + 4 + when /Uncertain\ssignificance/i + 3 + end + end + + def classify_protein_impact + case @value1 + when /C1/ + 1 + when /C2/ + 2 + when /C3/ + 3 + when /\(cold\sC3\)/i + 8 + when /\(hot\sC3\)/i + 9 end - varclass end def process_status_genes(status, negative_genes, genotype, genotypes) From 0bc3b1c6ee61b676e6bb6c9589bd90f3330e4b26 Mon Sep 17 00:00:00 2001 From: shilpigoel Date: Mon, 6 Oct 2025 17:52:52 +0100 Subject: [PATCH 05/14] rubocop fixes --- .../leeds/leeds_handler_new_format.rb | 154 +++++++++++------- 1 file changed, 94 insertions(+), 60 deletions(-) diff --git a/lib/import/brca/providers/leeds/leeds_handler_new_format.rb b/lib/import/brca/providers/leeds/leeds_handler_new_format.rb index 986d1e8d..25aee305 100644 --- a/lib/import/brca/providers/leeds/leeds_handler_new_format.rb +++ b/lib/import/brca/providers/leeds/leeds_handler_new_format.rb @@ -61,76 +61,112 @@ def exclude_genes end def process_rec(genotype, record, genotypes) - geno = record.raw_fields['genotype'] + setup_derived_values(record) + process_genotype_priorities(genotype, record, genotypes) + end - value2 = record.raw_fields['gene'] + private + + def setup_derived_values(record) result = record.raw_fields['codingdnasequencechange'] - # derived values - if @value12 =~ /het/i - @zygosity = 1 - elsif @value12 =~ /homo/i - @zygosity = 2 - end + @zygosity = calc_zygosity @variantpathclass = cal_variantpathclass(record) @cdna_mutations = result&.match(CDNA) || @value1&.match(CDNA) @exonic_mutations = result&.match(EXON_VARIANT_REGEX) @protein_impact = @value1&.match(PROTEIN_REGEX) || result&.match(PROTEIN_REGEX) @refid = result&.match(REF_TRANSCRIPT_ID) - # priority1 - if geno =~ /fail/i && geno !~ /dosage/i - process_status_genes(9, @genes_panel, genotype, genotypes) - elsif value2.nil? && @value1 =~ CDNA_REGEX # priority 2 - gene = [] - gene << 'PALB2' if @value1 =~ /ALB2/ - gene << @value1&.scan(NEW_FORMAT_GENES) - @pos_gene = gene.flatten.uniq - teststatus = if @value1 =~ /C(1|2)/ - 10 - else - 2 - end - - process_variant_rec(genotype, teststatus, record, genotypes) if @pos_gene.present? - negative_genes = @genes_panel - @pos_gene - process_status_genes(1, negative_genes, genotype, genotypes) - elsif result =~ /No.*detected/i || result =~ /No result - dosage fail'/i || result == '-' # priority 3 - negative_genes = @genes_panel - [@pos_gene] - process_status_genes(1, negative_genes, genotype, genotypes) - return genotypes + end + + def calc_zygosity + case @value12 + when /het/i + 1 + when /homo/i + 2 end + end + + def process_genotype_priorities(genotype, record, genotypes) + geno = record.raw_fields['genotype'] + value2 = record.raw_fields['gene'] + result = record.raw_fields['codingdnasequencechange'] + + # priority based extracting details + return genotypes if fail_rec?(geno, genotype, genotypes) + return genotypes if variant_rec?(value2, genotype, record, genotypes) + return genotypes if normal_rec?(result, genotype, genotypes) + return genotypes if from_gene?(value2, genotype, record, genotypes) + return genotypes if from_result?(result, genotype, record, genotypes) + return genotypes if from_report_result?(geno, genotype, genotypes) + + from_first_of_report?(genotype, record, genotypes) + end + + def fail_rec?(geno, genotype, genotypes) + return false unless geno =~ /fail/i && geno !~ /dosage/i + + process_status_genes(9, @genes_panel, genotype, genotypes) + true + end + + def variant_rec?(value2, genotype, record, genotypes) + return false unless value2.nil? && @value1 =~ CDNA_REGEX + + gene = [] + gene << 'PALB2' if @value1 =~ /ALB2/ + gene << @value1&.scan(NEW_FORMAT_GENES) + @pos_gene = gene.flatten.uniq + teststatus = @value1 =~ /C(1|2)/ ? 10 : 2 + + process_variant_rec(genotype, teststatus, record, genotypes) if @pos_gene.present? + negative_genes = @genes_panel - @pos_gene + process_status_genes(1, negative_genes, genotype, genotypes) + true + end + + def normal_rec?(result, genotype, genotypes) + return false unless result =~ /No.*detected/i || result =~ /No result - dosage fail'/i || result == '-' - return genotypes if @pos_gene.present? + negative_genes = @genes_panel - [@pos_gene] + process_status_genes(1, negative_genes, genotype, genotypes) + true + end + + def from_gene?(value2, genotype, record, genotypes) + return false if @pos_gene.present? - # priority 4 scanned_genes = value2&.scan(NEW_FORMAT_GENES) - @pos_gene = scanned_genes&.flatten&.uniq - if @pos_gene.present? - process_variant_rec(genotype, 2, record, genotypes) - negative_genes = @genes_panel - @pos_gene - process_status_genes(1, negative_genes, genotype, genotypes) - return genotypes - end + @pos_gene = scanned_genes&.flatten&.uniq + return false if @pos_gene.blank? - # priority 5 - if @pos_gene.blank? - scanned_result = result&.scan(NEW_FORMAT_GENES) - @pos_gene = scanned_result&.flatten&.uniq - if @pos_gene.present? - process_variant_rec(genotype, 2, record, genotypes) - negative_genes = @genes_panel - @pos_gene - process_status_genes(1, negative_genes, genotype, genotypes) - return genotypes - end - end + process_variant_rec(genotype, 2, record, genotypes) + negative_genes = @genes_panel - @pos_gene + process_status_genes(1, negative_genes, genotype, genotypes) + true + end - # priority 6 - if geno =~ /normal/i - process_status_genes(1, @genes_panel, genotype, genotypes) - return genotypes - end + def from_result?(result, genotype, record, genotypes) + return false if @pos_gene.present? + + scanned_result = result&.scan(NEW_FORMAT_GENES) + @pos_gene = scanned_result&.flatten&.uniq + return false if @pos_gene.blank? + + process_variant_rec(genotype, 2, record, genotypes) + negative_genes = @genes_panel - @pos_gene + process_status_genes(1, negative_genes, genotype, genotypes) + true + end - # priority 7 + def from_report_result?(geno, genotype, genotypes) + return false unless geno =~ /normal/i + + process_status_genes(1, @genes_panel, genotype, genotypes) + true + end + + def from_first_of_report?(genotype, record, genotypes) return genotypes unless @report =~ /.*heterozygous\s+for.*pathogenic\s*#{NEW_FORMAT_GENES}/i @pos_gene = $LAST_MATCH_INFO[:gene] @@ -163,11 +199,9 @@ def add_geneticinheritance(genotype, _record) def cal_variantpathclass(record) variantpathclass = record.raw_fields['variantpathclass'] varclass = classify_variant_pathogenicity(variantpathclass) - varclass || classify_protein_impact + varclass || classify_protein_impact(record.raw_fields['proteinimpact']) end - private - def classify_variant_pathogenicity(variantpathclass) case variantpathclass when /Pathogenic/i @@ -179,8 +213,8 @@ def classify_variant_pathogenicity(variantpathclass) end end - def classify_protein_impact - case @value1 + def classify_protein_impact(protein_impact) + case protein_impact when /C1/ 1 when /C2/ From 4d32ede0bff78611ac6b81f8789fc8c95bf4b0fe Mon Sep 17 00:00:00 2001 From: shilpigoel Date: Mon, 6 Oct 2025 22:42:48 +0100 Subject: [PATCH 06/14] Renaming Leeds old handler and adding test --- lib/import/brca/core/brca_handler_mapping.rb | 2 +- ...ds_handler_new.rb => leeds_handler_old.rb} | 5 ++++- test/fixtures/zproviders.yml | 2 ++ ..._new_test.rb => leeds_handler_old_test.rb} | 21 +++++++++++++++++-- 4 files changed, 26 insertions(+), 4 deletions(-) rename lib/import/brca/providers/leeds/{leeds_handler_new.rb => leeds_handler_old.rb} (98%) rename test/lib/import/brca/providers/leeds/{leeds_handler_new_test.rb => leeds_handler_old_test.rb} (93%) diff --git a/lib/import/brca/core/brca_handler_mapping.rb b/lib/import/brca/core/brca_handler_mapping.rb index f6e19321..1d778372 100644 --- a/lib/import/brca/core/brca_handler_mapping.rb +++ b/lib/import/brca/core/brca_handler_mapping.rb @@ -7,7 +7,7 @@ module Core class BrcaHandlerMapping HANDLER_MAPPING = { 'RR8' => Import::Brca::Providers::Leeds::LeedsHandlerNewFormat, - 'RR8_2' => Import::Brca::Providers::Leeds::LeedsHandlerNew, + 'RR8_2' => Import::Brca::Providers::Leeds::LeedsHandlerOld, 'RNZ' => Import::Brca::Providers::Salisbury::SalisburyHandler, 'RVJ' => Import::Brca::Providers::Bristol::BristolHandler, 'RTD' => Import::Brca::Providers::Newcastle::NewcastleHandler, diff --git a/lib/import/brca/providers/leeds/leeds_handler_new.rb b/lib/import/brca/providers/leeds/leeds_handler_old.rb similarity index 98% rename from lib/import/brca/providers/leeds/leeds_handler_new.rb rename to lib/import/brca/providers/leeds/leeds_handler_old.rb index 4dff9a05..4927a11e 100644 --- a/lib/import/brca/providers/leeds/leeds_handler_new.rb +++ b/lib/import/brca/providers/leeds/leeds_handler_old.rb @@ -6,7 +6,7 @@ module Brca module Providers module Leeds # Process Leeds-specific record details into generalized internal genotype format - class LeedsHandlerNew < Import::Germline::ProviderHandler + class LeedsHandlerOld < Import::Germline::ProviderHandler include Import::Helpers::Brca::Providers::Rr8::Constants def process_fields(record) @@ -47,6 +47,9 @@ def populate_genotype(record) process_genetictestcope(genotype, record) assign_teststatus(genotype, record) res = process_variants_from_record(genotype, record) + # correcting ebatch provider and registry to RR8 (from RR8_2) to allow data to persist in the database + @batch.provider = 'RR8' + @batch.registryid = 'RR8' res.each { |cur_genotype| @persister.integrate_and_store(cur_genotype) } end diff --git a/test/fixtures/zproviders.yml b/test/fixtures/zproviders.yml index a6dddb88..45d7add5 100644 --- a/test/fixtures/zproviders.yml +++ b/test/fixtures/zproviders.yml @@ -8,3 +8,5 @@ zprovider_r0a: zproviderid: R0A zprovider_rj7: zproviderid: RJ7 +zprovider_rr8: + zproviderid: RR8 diff --git a/test/lib/import/brca/providers/leeds/leeds_handler_new_test.rb b/test/lib/import/brca/providers/leeds/leeds_handler_old_test.rb similarity index 93% rename from test/lib/import/brca/providers/leeds/leeds_handler_new_test.rb rename to test/lib/import/brca/providers/leeds/leeds_handler_old_test.rb index 7425efb2..3734e5a2 100644 --- a/test/lib/import/brca/providers/leeds/leeds_handler_new_test.rb +++ b/test/lib/import/brca/providers/leeds/leeds_handler_old_test.rb @@ -1,15 +1,32 @@ require 'test_helper' -class LeedsHandlerNewTest < ActiveSupport::TestCase +class LeedsHandlerOldTest < ActiveSupport::TestCase def setup @record = build_raw_record('pseudo_id1' => 'bob') @genotype = Import::Brca::Core::GenotypeBrca.new(@record) @importer_stdout, @importer_stderr = capture_io do - @handler = Import::Brca::Providers::Leeds::LeedsHandlerNew.new(EBatch.new) + @handler = Import::Brca::Providers::Leeds::LeedsHandlerOld.new(EBatch.new) end @logger = Import::Log.get_logger end + test 'process_fields' do + e_batch = EBatch.create(original_filename: 'test_filea', + e_type: 'PSMOLE', + provider: 'RR8_2', + registryid: 'RR8_2') + handler = Import::Brca::Providers::Leeds::LeedsHandlerOld.new(e_batch) + Import::Brca::Providers::Leeds::LeedsHandlerOld.any_instance.stubs(:should_process).returns(true) + handler.process_fields(@record) + assert_difference('EBatch.count', 1) do + handler.finalize + end + # confirm batch created now has 'RR8' as provider + e_batch.reload + assert_equal 'RR8', e_batch.provider + assert_equal 'RR8', e_batch.registryid + end + test 'process_abnormal_fs_record' do @handler.populate_variables(@record) @handler.add_moleculartestingtype(@genotype, @record) From 43bcc75b95584df884d7f43220213a2c14ce688d Mon Sep 17 00:00:00 2001 From: shilpigoel Date: Wed, 8 Oct 2025 16:58:26 +0100 Subject: [PATCH 07/14] Leeds New format BRCA Fs final and tests added --- .../leeds/leeds_handler_new_format.rb | 40 ++- .../leeds/leeds_handler_new_format_test.rb | 278 ++++++++++++++++++ 2 files changed, 296 insertions(+), 22 deletions(-) create mode 100644 test/lib/import/brca/providers/leeds/leeds_handler_new_format_test.rb diff --git a/lib/import/brca/providers/leeds/leeds_handler_new_format.rb b/lib/import/brca/providers/leeds/leeds_handler_new_format.rb index 25aee305..6cc63a31 100644 --- a/lib/import/brca/providers/leeds/leeds_handler_new_format.rb +++ b/lib/import/brca/providers/leeds/leeds_handler_new_format.rb @@ -94,13 +94,13 @@ def process_genotype_priorities(genotype, record, genotypes) # priority based extracting details return genotypes if fail_rec?(geno, genotype, genotypes) - return genotypes if variant_rec?(value2, genotype, record, genotypes) - return genotypes if normal_rec?(result, genotype, genotypes) - return genotypes if from_gene?(value2, genotype, record, genotypes) - return genotypes if from_result?(result, genotype, record, genotypes) - return genotypes if from_report_result?(geno, genotype, genotypes) + return genotypes if protein_impact_variant_rec?(value2, genotype, record, genotypes) + return genotypes if normal_result_rec?(result, genotype, genotypes) + return genotypes if gene_variant_rec?(value2, genotype, record, genotypes) + return genotypes if result_variant_rec?(result, genotype, record, genotypes) + return genotypes if normal_report_result?(geno, genotype, genotypes) - from_first_of_report?(genotype, record, genotypes) + first_of_report_variant_rec?(genotype, record, genotypes) end def fail_rec?(geno, genotype, genotypes) @@ -110,7 +110,7 @@ def fail_rec?(geno, genotype, genotypes) true end - def variant_rec?(value2, genotype, record, genotypes) + def protein_impact_variant_rec?(value2, genotype, record, genotypes) return false unless value2.nil? && @value1 =~ CDNA_REGEX gene = [] @@ -125,17 +125,15 @@ def variant_rec?(value2, genotype, record, genotypes) true end - def normal_rec?(result, genotype, genotypes) + def normal_result_rec?(result, genotype, genotypes) return false unless result =~ /No.*detected/i || result =~ /No result - dosage fail'/i || result == '-' - negative_genes = @genes_panel - [@pos_gene] + negative_genes = @genes_panel process_status_genes(1, negative_genes, genotype, genotypes) true end - def from_gene?(value2, genotype, record, genotypes) - return false if @pos_gene.present? - + def gene_variant_rec?(value2, genotype, record, genotypes) scanned_genes = value2&.scan(NEW_FORMAT_GENES) @pos_gene = scanned_genes&.flatten&.uniq return false if @pos_gene.blank? @@ -146,9 +144,7 @@ def from_gene?(value2, genotype, record, genotypes) true end - def from_result?(result, genotype, record, genotypes) - return false if @pos_gene.present? - + def result_variant_rec?(result, genotype, record, genotypes) scanned_result = result&.scan(NEW_FORMAT_GENES) @pos_gene = scanned_result&.flatten&.uniq return false if @pos_gene.blank? @@ -159,20 +155,20 @@ def from_result?(result, genotype, record, genotypes) true end - def from_report_result?(geno, genotype, genotypes) + def normal_report_result?(geno, genotype, genotypes) return false unless geno =~ /normal/i process_status_genes(1, @genes_panel, genotype, genotypes) true end - def from_first_of_report?(genotype, record, genotypes) - return genotypes unless @report =~ /.*heterozygous\s+for.*pathogenic\s*#{NEW_FORMAT_GENES}/i + def first_of_report_variant_rec?(genotype, record, genotypes) + return genotypes unless @report =~ /.*heterozygous\s+for.*pathogenic\s*#{NEW_FORMAT_GENES}/ix - @pos_gene = $LAST_MATCH_INFO[:gene] + @pos_gene = [$LAST_MATCH_INFO[:gene]] if @pos_gene.present? process_variant_rec(genotype, 2, record, genotypes) - negative_genes = @genes_panel - [@pos_gene] + negative_genes = @genes_panel - @pos_gene process_status_genes(1, negative_genes, genotype, genotypes) end genotypes @@ -204,10 +200,10 @@ def cal_variantpathclass(record) def classify_variant_pathogenicity(variantpathclass) case variantpathclass - when /Pathogenic/i - 5 when /Likely\spathogenic/i 4 + when /Pathogenic/i + 5 when /Uncertain\ssignificance/i 3 end diff --git a/test/lib/import/brca/providers/leeds/leeds_handler_new_format_test.rb b/test/lib/import/brca/providers/leeds/leeds_handler_new_format_test.rb new file mode 100644 index 00000000..6bd525a8 --- /dev/null +++ b/test/lib/import/brca/providers/leeds/leeds_handler_new_format_test.rb @@ -0,0 +1,278 @@ +require 'test_helper' + +class LeedsHandlerNewFormatTest < ActiveSupport::TestCase + def setup + @record = build_raw_record('pseudo_id1' => 'bob') + @genotype = Import::Brca::Core::GenotypeBrca.new(@record) + @importer_stdout, @importer_stderr = capture_io do + @handler = Import::Brca::Providers::Leeds::LeedsHandlerNewFormat.new(EBatch.new) + end + @logger = Import::Log.get_logger + end + + test 'moleculartestingtype' do + @handler.add_moleculartestingtype(@genotype, @record) + assert_equal 1, @genotype.attribute_map['moleculartestingtype'] + end + + test 'process_failed_test_record' do + failed_record = build_raw_record('pseudo_id1' => 'patient1') + failed_record.raw_fields['genotype'] = 'R208_fail_FFPE' + failed_record.raw_fields['report'] = 'RESULT: No results were obtained for this sample.' + + res = @handler.process_fields(failed_record) + assert_equal 1, res.size + res.each do |genotype| + assert_equal 9, genotype.attribute_map['teststatus'] + end + end + + test 'process_normal_result_record' do + res = @handler.process_fields(@record) + assert_equal 1, res.size + assert_equal 1, res[0].attribute_map['teststatus'] # normal + assert_equal 3186, res[0].attribute_map['gene'] # PALB2 + end + + test 'process_variant_rec' do + variant_record = build_raw_record('pseudo_id1' => 'patient2') + variant_record.raw_fields['gene'] = nil + variant_record.raw_fields['codingdnasequencechange'] = 'No pathogenic variants detected' + variant_record.raw_fields['proteinimpact'] = 'BRCA2 c.2698A>G het (C2)' + variant_record.raw_fields['zygosity'] = nil + variant_record.raw_fields['variantpathclass'] = nil + variant_record.raw_fields['report'] = 'This patient has been screened for variants in the following cancer predisposing genes by sequence and dosage analysis:"\ + " \n\n\n\nBRCA1, BRCA2, BRIP1, MLH1, MSH2, MSH6, PALB2, RAD51C, RAD51D.\n\n\n\nNo pathogenic variant was identified' + + res = @handler.process_fields(variant_record) + assert_equal 9, res.size + variant_genotype = res.find { |g| g.attribute_map['gene'] == 8 } # BRCA2 + assert_equal 10, variant_genotype.attribute_map['teststatus'] + assert_equal 'c.2698A>G', variant_genotype.attribute_map['codingdnasequencechange'] + assert_nil variant_genotype.attribute_map['proteinimpact'] + assert_equal 2, variant_genotype.attribute_map['variantpathclass'] + assert_equal 4, variant_genotype.attribute_map['geneticinheritance'] + + res.each do |genotype| + next if genotype.attribute_map['gene'] == 8 + + assert_equal 1, genotype.attribute_map['teststatus'] + end + end + + test 'process_multi_gene_panel_r208.1' do + panel_record = build_raw_record('pseudo_id1' => 'patient3') + panel_record.raw_fields['moleculartestingtype'] = 'R208.1' + panel_record.raw_fields['report'] = 'Results are normal' + panel_record.raw_fields['diagnosis_report'] = 'No variant found' + + res = @handler.process_fields(panel_record) + + # Should have all genes from R208.1 panel + expected_genes = %w[ATM BRCA1 BRCA2 CHEK2 PALB2] + assert_equal expected_genes.size, res.size + assert_equal 451, res[0].attribute_map['gene'] # ATM + assert_equal 7, res[1].attribute_map['gene'] # BRCA1 + assert_equal 8, res[2].attribute_map['gene'] # BRCA2 + assert_equal 865, res[3].attribute_map['gene'] # CHEK2 + assert_equal 3186, res[4].attribute_map['gene'] # PALB2 + end + + test 'process_gene_variant_rec' do + gene_variant_rec = build_raw_record('pseudo_id1' => 'patient4') + gene_variant_rec.raw_fields['gene'] = 'ATM' + gene_variant_rec.raw_fields['codingdnasequencechange'] = 'c.8156del' + gene_variant_rec.raw_fields['proteinimpact'] = 'p.(Arg2719fs)' + gene_variant_rec.raw_fields['zygosity'] = 'Mosaic' + gene_variant_rec.raw_fields['variantpathclass'] = 'Likely pathogenic' + gene_variant_rec.raw_fields['report'] = 'This patient has been screened for variants in the ' \ + 'following cancer predisposing genes by sequence and dosage analysis: \n\nATM*, BRCA1, BRCA2, ' \ + 'BRIP1, CHEK2*, MLH1, MSH2, MSH6, PALB2, RAD51C, RAD51D.\n\n\n\nThe likely pathogenic ' \ + 'ATM variant c.8156del p.(Arg2719fs)' + + res = @handler.process_fields(gene_variant_rec) + assert_equal 11, res.size + + variant_genotype = res.find { |g| g.attribute_map['gene'] == 451 } # ATM + assert_equal 2, variant_genotype.attribute_map['teststatus'] + assert_equal 'c.8156del', variant_genotype.attribute_map['codingdnasequencechange'] + assert_equal 'p.Arg2719fs', variant_genotype.attribute_map['proteinimpact'] + assert_equal 4, variant_genotype.attribute_map['variantpathclass'] + assert_equal 6, variant_genotype.attribute_map['geneticinheritance'] + + res.each do |genotype| + next if genotype.attribute_map['gene'] == 451 + + assert_equal 1, genotype.attribute_map['teststatus'] + end + end + + test 'process_result_variant_rec' do + result_variant_rec = build_raw_record('pseudo_id1' => 'patient4') + result_variant_rec.raw_fields['moleculartestingtype'] = 'R208.1' + result_variant_rec.raw_fields['genotype'] = 'No report required' + result_variant_rec.raw_fields['gene'] = nil + result_variant_rec.raw_fields['codingdnasequencechange'] = 'BRCA1 c.4065_4068del heterozygote' + result_variant_rec.raw_fields['proteinimpact'] = nil + result_variant_rec.raw_fields['zygosity'] = nil + result_variant_rec.raw_fields['variantpathclass'] = nil + result_variant_rec.raw_fields['report'] = 'Reason:Reported under a different indication.' + + res = @handler.process_fields(result_variant_rec) + assert_equal 2, res.size + variant_genotype = res[0] + assert_equal 2, variant_genotype.attribute_map['teststatus'] + assert_equal 7, variant_genotype.attribute_map['gene'] + assert_equal 'c.4065_4068del', variant_genotype.attribute_map['codingdnasequencechange'] + assert_nil variant_genotype.attribute_map['proteinimpact'] + assert_nil variant_genotype.attribute_map['variantpathclass'] + assert_equal 4, variant_genotype.attribute_map['geneticinheritance'] + + normal_genotype = res[1] + assert_equal 1, normal_genotype.attribute_map['teststatus'] + assert_equal 3186, normal_genotype.attribute_map['gene'] + assert_nil normal_genotype.attribute_map['codingdnasequencechange'] + assert_nil normal_genotype.attribute_map['proteinimpact'] + assert_nil normal_genotype.attribute_map['variantpathclass'] + assert_nil normal_genotype.attribute_map['geneticinheritance'] + end + + test 'normal_report_result_rec' do + normal_report_result_rec = build_raw_record('pseudo_id1' => 'patient5') + normal_report_result_rec.raw_fields['genotype'] = 'R208_normal_Apr22' + normal_report_result_rec.raw_fields['gene'] = nil + normal_report_result_rec.raw_fields['codingdnasequencechange'] = 'No result' + normal_report_result_rec.raw_fields['proteinimpact'] = nil + normal_report_result_rec.raw_fields['diagnosis_report'] = '1. Genes screened in the panel: BRCA1, BRCA2, BRIP1, MLH1, MSH2, MSH6, PALB2, RAD51C, RAD51D ' \ + '(all coding exons and exon-intron boundaries).' + + res = @handler.process_fields(normal_report_result_rec) + assert_equal 9, res.size + res.each do |genotype| + assert_equal 1, genotype.attribute_map['teststatus'] + end + end + + test 'first_of_report_variant_rec' do + first_of_report_variant_rec = build_raw_record('pseudo_id1' => 'patient6') + first_of_report_variant_rec.raw_fields['genotype'] = 'R208_ATM/CHEK2_C4/5_Apr22' + first_of_report_variant_rec.raw_fields['gene'] = nil + first_of_report_variant_rec.raw_fields['codingdnasequencechange'] = nil + first_of_report_variant_rec.raw_fields['proteinimpact'] = nil + first_of_report_variant_rec.raw_fields['report'] = 'RESULT: This individual is heterozygous for a germline pathogenic ATM truncating variant (details below). Heterozygous ATM pathogenic variants cause moderate risk1 cancer susceptibility, particularly breast cancer in females (OMIM: 607585; 114480). +IMPLICATIONS : Each of their offspring would be at 50% risk of inheriting this variant and genetic predisposition to ATM-associated cancers. Other relatives are also at increased risk.' + res = @handler.process_fields(first_of_report_variant_rec) + assert_equal 2, res.size + assert_equal 451, res[0].attribute_map['gene'] + assert_equal 2, res[0].attribute_map['teststatus'] + + assert_equal 3186, res[1].attribute_map['gene'] + assert_equal 1, res[1].attribute_map['teststatus'] + end + + test 'process_exonic_deletion_variant' do + exon_record = build_raw_record('pseudo_id1' => 'patient6') + exon_record.raw_fields['indicationcategory'] = 'R207' + exon_record.raw_fields['moleculartestingtype'] = 'R207.1' + exon_record.raw_fields['genotype'] = 'R207 - BRCA Diag C4/5' + exon_record.raw_fields['gene'] = 'BRCA1' + exon_record.raw_fields['codingdnasequencechange'] = 'Deletion of exons 1-23' + exon_record.raw_fields['zygosity'] = 'Heterozygous' + exon_record.raw_fields['variantpathclass'] = 'Pathogenic' + exon_record.raw_fields['report'] = 'RESULT: This individual is heterozygous for a germline pathogenic BRCA1 copy number variant (details below).' + exon_record.raw_fields['diagnosis_report'] = '1.Genes screened in R207 panel: BRCA1, BRCA2, BRIP1, MLH1, MSH2, MSH6, PALB2, RAD51C, RAD51D (all coding exons and exon-intron boundaries).' + res = @handler.process_fields(exon_record) + + assert_equal 9, res.size + + variant_genotype = res.find { |g| g.attribute_map['gene'] == 7 } # BRCA1 + assert_not_nil variant_genotype + assert_equal 2, variant_genotype.attribute_map['teststatus'] + assert_equal '1-23', variant_genotype.attribute_map['exonintroncodonnumber'] + assert_equal 3, variant_genotype.attribute_map['sequencevarianttype'] + assert_equal 5, variant_genotype.attribute_map['variantpathclass'] + assert_equal 1, variant_genotype.attribute_map['variantgenotype'] + assert_equal 4, variant_genotype.attribute_map['geneticinheritance'] + assert_nil variant_genotype.attribute_map['codingdnasequencechange'] + + res.each do |genotype| + next if genotype.attribute_map['gene'] == 7 + + assert_equal 1, genotype.attribute_map['teststatus'] + assert_nil genotype.attribute_map['codingdnasequencechange'] + assert_nil genotype.attribute_map['proteinimpact'] + assert_nil genotype.attribute_map['variantpathclass'] + assert_nil genotype.attribute_map['geneticinheritance'] + end + end + + test 'exclude_genes_functionality' do + # Test analysis not performed exclusion + @handler.instance_variable_set(:@report, 'TP53 analysis has not been performed') + excluded1 = @handler.exclude_genes + assert_includes excluded1, 'TP53' + + # Test testing reported previously exclusion + @handler.instance_variable_set(:@report, 'TP53 testing has been reported previously') + excluded2 = @handler.exclude_genes + assert_includes excluded2, 'TP53' + + # Test Li Fraumeni syndrome exclusion + @handler.instance_variable_set(:@report, 'TP53 gene analysis for Li Fraumeni syndrome has been carried out') + excluded3 = @handler.exclude_genes + assert_includes excluded3, 'TP53' + end + + private + + def clinical_json + { sex: '2', + consultantcode: 'Consultant Code', + providercode: 'Provider Code', + receiveddate: '2019-10-25T00:00:00.000+01:00', + authoriseddate: '2019-11-25T00:00:00.000+00:00', + servicereportidentifier: 'Service Report Identifier', + sortdate: '2019-10-25T00:00:00.000+01:00', + genetictestscope: 'R208.2', + specimentype: '12', + report: 'RESULT\n\nNo pathogenic copy number variants were detected in the PALB2 gene.', + requesteddate: '2019-10-25T00:00:00.000+01:00', + age: 999 }.to_json + end + + def rawtext_clinical_json + { sex: 'F', + referringclinicianname: 'Clinician', + consultantcode: 'Consultant Code', + servicereportidentifier: 'Service Report Identifier', + indicationcategory: 'R207', + specimentype: 'DNA', + moleculartestingtype: 'R208.2', + requesteddate: '2021-12-08 00:00:00', + genotype: 'R208_normal', + authoriseddate: '2021-12-14 00:00:00', + provider_address: 'International Centre for Life', + name: 'Genetics Service', + report: 'RESULT\n\nNo pathogenic copy number variants were detected in the PALB2 gene.', + diagnosis_report: 'Germline heterozygous pathogenic variants in PALB2 inherited in an autosomal ' \ + 'dominant manner are associated with a 2-6 fold increased risk of breast cancer ' \ + 'in women. Men with pathogenic variants in the PALB2 gene also have an increased ' \ + 'risk for breast cancer; this risk is much smaller than the risk for women. Pathogenic ' \ + 'variants in PALB2 are also associated with an increased risk of pancreatic cancer. ' \ + 'Biallelic pathogenic variant events cause a subtype of Fanconi anaemia.\n\nReference ' \ + 'sequence: LRG_308t1 (NM_024675.3)\n\n\n\nMLPA analysis carried out using MRC Holland ' \ + 'kit P260-C1.\n\n\n\nDetected variants are assessed at the time of reporting according ' \ + 'to the ACGS best practice guidelines (http://www.acgs.uk.com/). Variant nomenclature ' \ + 'conforms to HGVS guidelines (http://www.hgvs.org). Sequence variants of no or unlikely ' \ + 'clinical significance are omitted from the reported results.', + patienttype: 'NHS', + providercode: 'RTD07', + receiveddate: '2025-09-29 00:00:00', + karyotypingmethod: 'MLPA P260', + codingdnasequencechange: 'No deletions/duplications detected', + proteinimpact: nil, + gene: nil, + zygosity: nil, + variantpathclass: nil }.to_json + end +end From 279fab04db47acf9221416096020bc7f27242aad Mon Sep 17 00:00:00 2001 From: shilpigoel Date: Mon, 3 Nov 2025 15:35:33 +0000 Subject: [PATCH 08/14] Added processing of targeted cases --- .../leeds/leeds_handler_new_format.rb | 155 ++++++++++++++---- 1 file changed, 126 insertions(+), 29 deletions(-) diff --git a/lib/import/brca/providers/leeds/leeds_handler_new_format.rb b/lib/import/brca/providers/leeds/leeds_handler_new_format.rb index 6cc63a31..0fc8adc6 100644 --- a/lib/import/brca/providers/leeds/leeds_handler_new_format.rb +++ b/lib/import/brca/providers/leeds/leeds_handler_new_format.rb @@ -7,24 +7,56 @@ class LeedsHandlerNewFormat < Import::Germline::ProviderHandler include Import::Helpers::Brca::Providers::Rr8::Constants def process_fields(record) + # check if should process record from Other Cancer file + return unless should_process?(record) + genotype = Import::Brca::Core::GenotypeBrca.new(record) genotype.add_passthrough_fields(record.mapped_fields, record.raw_fields, PASS_THROUGH_FIELDS) genotype.attribute_map['organisationcode_testresult'] = '699C0' - genotype.add_test_scope(:full_screen) - add_moleculartestingtype(genotype, record) populate_variables(record) + + process_test_scope(genotype) + setup_derived_values(record) genotypes = [] - @genes_panel = get_genes_panel(record) - res = process_rec(genotype, record, genotypes) + if genotype.full_screen? + add_fs_moleculartestingtype(genotype, record) + @genes_panel = get_genes_panel(record) + res = process_fs_rec(genotype, record, genotypes) + elsif genotype.targeted? + add_targ_moleculartestingtype(genotype) + res = process_targ_rec(genotype, record, genotypes) + end + res.each { |cur_genotype| @persister.integrate_and_store(cur_genotype) } end + def should_process?(record) + file_name = @batch.original_filename + return true unless file_name =~ /Other/ix + + if record.raw_fields['moleculartestingtype'] == 'Familial' && + (record.raw_fields['report'].scan(NEW_TARG_GENES_REGEX).size.positive? || + record.raw_fields['diagnosis_report'].scan(NEW_TARG_GENES_REGEX).size.positive?) && + record.raw_fields['diagnosis_report'] !~ /BAP1/i && + record.raw_fields['codingdnasequencechange'] !~ /MUTYH/i && + record.raw_fields['genotype'] !~ /prenatal/i + return true + end + + false + end + def populate_variables(record) + record.raw_fields['gene']&.gsub!('CHEK 2', 'CHEK2') @report = record.raw_fields['report'] @moltestingtype = record.raw_fields['moleculartestingtype'] @value1 = record.raw_fields['proteinimpact'] @value12 = record.raw_fields['zygosity'] + @report_result = record.raw_fields['genotype'] + @value2 = record.raw_fields['gene'] + @result = record.raw_fields['codingdnasequencechange'] + @diag_report = record.raw_fields['diagnosis_report'] @pos_gene = nil @variantpathclass = nil @cdna_mutations = nil @@ -32,16 +64,32 @@ def populate_variables(record) @zygosity = nil end - def add_moleculartestingtype(genotype, record) + def process_test_scope(genotype) + if @moltestingtype == 'Familial' + genotype.add_test_scope(:targeted_mutation) + else + genotype.add_test_scope(:full_screen) + end + end + + def add_fs_moleculartestingtype(genotype, record) indication_catgeory = record.raw_fields['indicationcategory'] return unless %w[R207 R444].include? indication_catgeory genotype.add_molecular_testing_type_strict(:diagnostic) end - def get_genes_panel(record) + def add_targ_moleculartestingtype(genotype) + if @report_result.match?(/conf/i) && @report_result.match?(/R240/i) + genotype.add_molecular_testing_type_strict(:diagnostic) + elsif @report_result.match?(/pred/i) && @report_result.match?(/R242/i) + genotype.add_molecular_testing_type_strict(:predictive) + end + end + + def get_genes_panel(_record) genes = [] - genes << record.raw_fields['diagnosis_report']&.scan(NEW_FORMAT_GENES) + genes << @diag_report&.scan(NEW_FORMAT_GENES) genes << @report&.scan(NEW_FORMAT_GENES) genes << @moltestingtype&.scan(NEW_FORMAT_GENES) genes = genes.compact_blank @@ -60,22 +108,73 @@ def exclude_genes exclude_genes.flatten.uniq end - def process_rec(genotype, record, genotypes) - setup_derived_values(record) + def process_fs_rec(genotype, record, genotypes) process_genotype_priorities(genotype, record, genotypes) end + def process_targ_rec(genotype, record, genotypes) + @pos_gene = negative_gene = [] + if @value12 =~ /heterozyg|homozyg/i + @pos_gene = @value2&.scan(NEW_TARG_GENES_REGEX)&.flatten&.uniq + @pos_gene = @report&.scan(NEW_TARG_GENES_REGEX)&.flatten&.uniq if @pos_gene.empty? + process_variant_rec(genotype, 2, record, genotypes) + elsif @value12 =~ /variant\sabsent|not\sdetected/i + negative_gene = @report&.scan(NEW_TARG_GENES_REGEX)&.flatten&.uniq + process_status_genes(1, negative_gene, genotype, genotypes) + elsif @result =~ /No\sresult/i + targ_gene = @diag_report&.scan(NEW_TARG_GENES_REGEX)&.flatten&.uniq + if targ_gene.size == 1 + process_status_genes(9, targ_gene, genotype, genotypes) + else + genotype_dup = genotype.dup + genotype_dup.add_status(9) + genotypes << genotype_dup + end + elsif @result =~ /No\sbiallelic|No\sbi-allelic/ix + targ_gene = @report&.scan(NEW_TARG_GENES_REGEX)&.flatten&.uniq + process_status_genes(4, targ_gene, genotype, genotypes) + elsif @result =~ /c\.|het/ix + @pos_gene = @result&.scan(NEW_TARG_GENES_REGEX)&.flatten&.uniq + @pos_gene = @report&.scan(NEW_TARG_GENES_REGEX)&.flatten&.uniq if @pos_gene.empty? + process_variant_rec(genotype, 2, record, genotypes) + elsif @report_result =~ /Tumour\sresult\sconf\sseq\s\+ve/ix && @result =~ /No.*detected/ix + @pos_gene = @report&.scan(NEW_TARG_GENES_REGEX)&.flatten&.uniq + if @report&.match(CDNA) + @cdna_mutations = @report&.match(CDNA) + process_variant_rec(genotype, 2, record, genotypes) + elsif @report&.match(EXON_VARIANT_REGEX) + @exonic_mutations = @report&.match(EXON_VARIANT_REGEX) + process_variant_rec(genotype, 2, record, genotypes) + else + process_status_genes(1, @pos_gene, genotype, genotypes) + end + elsif @report_result =~ /pos|\+ve/ix && @result =~ /variant\sabsent/i + negative_gene = @report&.scan(NEW_TARG_GENES_REGEX)&.flatten&.uniq + process_status_genes(1, negative_gene, genotype, genotypes) + elsif @report_result !~ /pos|\+ve/ix && @result =~ /variant\sabsent|no.*detected/ix + negative_gene = @report&.scan(NEW_TARG_GENES_REGEX)&.flatten&.uniq + if negative_gene.empty? + if @report_result =~ /B1/ + negative_gene << 'BRCA1' + elsif @report_result =~ /B2/ + negative_gene << 'BRCA2' + end + negative_gene = @report_result&.scan(NEW_TARG_GENES_REGEX)&.flatten&.uniq + end + process_status_genes(1, negative_gene, genotype, genotypes) + end + genotypes + end + private def setup_derived_values(record) - result = record.raw_fields['codingdnasequencechange'] - @zygosity = calc_zygosity @variantpathclass = cal_variantpathclass(record) - @cdna_mutations = result&.match(CDNA) || @value1&.match(CDNA) - @exonic_mutations = result&.match(EXON_VARIANT_REGEX) - @protein_impact = @value1&.match(PROTEIN_REGEX) || result&.match(PROTEIN_REGEX) - @refid = result&.match(REF_TRANSCRIPT_ID) + @cdna_mutations = @result&.match(CDNA) || @value1&.match(CDNA) + @exonic_mutations = @result&.match(EXON_VARIANT_REGEX) + @protein_impact = @value1&.match(PROTEIN_REGEX) || @result&.match(PROTEIN_REGEX) + @refid = @result&.match(REF_TRANSCRIPT_ID) end def calc_zygosity @@ -89,15 +188,13 @@ def calc_zygosity def process_genotype_priorities(genotype, record, genotypes) geno = record.raw_fields['genotype'] - value2 = record.raw_fields['gene'] - result = record.raw_fields['codingdnasequencechange'] # priority based extracting details return genotypes if fail_rec?(geno, genotype, genotypes) - return genotypes if protein_impact_variant_rec?(value2, genotype, record, genotypes) - return genotypes if normal_result_rec?(result, genotype, genotypes) - return genotypes if gene_variant_rec?(value2, genotype, record, genotypes) - return genotypes if result_variant_rec?(result, genotype, record, genotypes) + return genotypes if protein_impact_variant_rec?(genotype, record, genotypes) + return genotypes if normal_result_rec?(genotype, genotypes) + return genotypes if gene_variant_rec?(genotype, record, genotypes) + return genotypes if result_variant_rec?(genotype, record, genotypes) return genotypes if normal_report_result?(geno, genotype, genotypes) first_of_report_variant_rec?(genotype, record, genotypes) @@ -110,8 +207,8 @@ def fail_rec?(geno, genotype, genotypes) true end - def protein_impact_variant_rec?(value2, genotype, record, genotypes) - return false unless value2.nil? && @value1 =~ CDNA_REGEX + def protein_impact_variant_rec?(genotype, record, genotypes) + return false unless @value2.nil? && @value1 =~ CDNA_REGEX gene = [] gene << 'PALB2' if @value1 =~ /ALB2/ @@ -125,16 +222,16 @@ def protein_impact_variant_rec?(value2, genotype, record, genotypes) true end - def normal_result_rec?(result, genotype, genotypes) - return false unless result =~ /No.*detected/i || result =~ /No result - dosage fail'/i || result == '-' + def normal_result_rec?(genotype, genotypes) + return false unless @result =~ /No.*detected/i || @result =~ /No result - dosage fail'/i || @result == '-' negative_genes = @genes_panel process_status_genes(1, negative_genes, genotype, genotypes) true end - def gene_variant_rec?(value2, genotype, record, genotypes) - scanned_genes = value2&.scan(NEW_FORMAT_GENES) + def gene_variant_rec?(genotype, record, genotypes) + scanned_genes = @value2&.scan(NEW_FORMAT_GENES) @pos_gene = scanned_genes&.flatten&.uniq return false if @pos_gene.blank? @@ -144,8 +241,8 @@ def gene_variant_rec?(value2, genotype, record, genotypes) true end - def result_variant_rec?(result, genotype, record, genotypes) - scanned_result = result&.scan(NEW_FORMAT_GENES) + def result_variant_rec?(genotype, record, genotypes) + scanned_result = @result&.scan(NEW_FORMAT_GENES) @pos_gene = scanned_result&.flatten&.uniq return false if @pos_gene.blank? From 8f5c393de01224c92c1c3bd0157d9878b0a33829 Mon Sep 17 00:00:00 2001 From: shilpigoel Date: Mon, 3 Nov 2025 15:35:52 +0000 Subject: [PATCH 09/14] targ genes regex added --- lib/import/helpers/brca/providers/rr8/constants.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/import/helpers/brca/providers/rr8/constants.rb b/lib/import/helpers/brca/providers/rr8/constants.rb index 707cf1a3..fc2b056d 100644 --- a/lib/import/helpers/brca/providers/rr8/constants.rb +++ b/lib/import/helpers/brca/providers/rr8/constants.rb @@ -126,6 +126,8 @@ module Constants NEW_FORMAT_GENES = /(?ATM|BRCA1|BRCA2|BRIP1|CHEK2|MLH1|MSH2|MSH6|PALB2|POT1|PTEN| RAD51C|RAD51D|TP53)/x + NEW_TARG_GENES_REGEX = /(?BRCA1|BRCA2|BRIP1|CHEK2|PALB2|RAD51C|RAD51D)/x + # rubocop:disable Lint/MixedRegexpCaptureTypes BRCA_REGEX = /(?#{GENES})/ix From e0f1122c9b8bff0313b27df4aa177f641c9f33a7 Mon Sep 17 00:00:00 2001 From: shilpigoel Date: Mon, 3 Nov 2025 16:46:05 +0000 Subject: [PATCH 10/14] rubocop fixes --- .../leeds/leeds_handler_new_format.rb | 165 ++++++++++++------ 1 file changed, 116 insertions(+), 49 deletions(-) diff --git a/lib/import/brca/providers/leeds/leeds_handler_new_format.rb b/lib/import/brca/providers/leeds/leeds_handler_new_format.rb index 0fc8adc6..56302058 100644 --- a/lib/import/brca/providers/leeds/leeds_handler_new_format.rb +++ b/lib/import/brca/providers/leeds/leeds_handler_new_format.rb @@ -3,9 +3,11 @@ module Brca module Providers module Leeds # Process Leeds-specific record details into generalized internal genotype format for > 2025 files + # rubocop:disable Metrics/ClassLength class LeedsHandlerNewFormat < Import::Germline::ProviderHandler include Import::Helpers::Brca::Providers::Rr8::Constants + # rubocop:disable Metrics/MethodLength def process_fields(record) # check if should process record from Other Cancer file return unless should_process?(record) @@ -30,7 +32,9 @@ def process_fields(record) res.each { |cur_genotype| @persister.integrate_and_store(cur_genotype) } end + # rubocop:enable Metrics/MethodLength + # rubocop:disable Metrics/AbcSize def should_process?(record) file_name = @batch.original_filename return true unless file_name =~ /Other/ix @@ -46,7 +50,9 @@ def should_process?(record) false end + # rubocop:enable Metrics/AbcSize + # rubocop:disable Metrics/AbcSize def populate_variables(record) record.raw_fields['gene']&.gsub!('CHEK 2', 'CHEK2') @report = record.raw_fields['report'] @@ -63,6 +69,7 @@ def populate_variables(record) @exonic_mutations = nil @zygosity = nil end + # rubocop:enable Metrics/AbcSize def process_test_scope(genotype) if @moltestingtype == 'Familial' @@ -113,57 +120,116 @@ def process_fs_rec(genotype, record, genotypes) end def process_targ_rec(genotype, record, genotypes) - @pos_gene = negative_gene = [] - if @value12 =~ /heterozyg|homozyg/i - @pos_gene = @value2&.scan(NEW_TARG_GENES_REGEX)&.flatten&.uniq - @pos_gene = @report&.scan(NEW_TARG_GENES_REGEX)&.flatten&.uniq if @pos_gene.empty? - process_variant_rec(genotype, 2, record, genotypes) - elsif @value12 =~ /variant\sabsent|not\sdetected/i - negative_gene = @report&.scan(NEW_TARG_GENES_REGEX)&.flatten&.uniq - process_status_genes(1, negative_gene, genotype, genotypes) - elsif @result =~ /No\sresult/i - targ_gene = @diag_report&.scan(NEW_TARG_GENES_REGEX)&.flatten&.uniq - if targ_gene.size == 1 - process_status_genes(9, targ_gene, genotype, genotypes) - else - genotype_dup = genotype.dup - genotype_dup.add_status(9) - genotypes << genotype_dup - end - elsif @result =~ /No\sbiallelic|No\sbi-allelic/ix - targ_gene = @report&.scan(NEW_TARG_GENES_REGEX)&.flatten&.uniq - process_status_genes(4, targ_gene, genotype, genotypes) - elsif @result =~ /c\.|het/ix - @pos_gene = @result&.scan(NEW_TARG_GENES_REGEX)&.flatten&.uniq - @pos_gene = @report&.scan(NEW_TARG_GENES_REGEX)&.flatten&.uniq if @pos_gene.empty? + @pos_gene = [] + return genotypes if zygosity_variant_targ_rec?(genotype, record, genotypes) + return genotypes if variant_absent_targ_rec?(genotype, genotypes) + return genotypes if no_result_targ_rec?(genotype, genotypes) + return genotypes if no_biallelic_targ_rec?(genotype, genotypes) + return genotypes if cdna_het_variant_targ_rec?(genotype, record, genotypes) + return genotypes if report_variant_targ_rec?(genotype, record, genotypes) + return genotypes if positive_variant_absent_targ_rec?(genotype, genotypes) + return genotypes if non_positive_variant_absent_targ_rec?(genotype, genotypes) + + genotypes + end + + def zygosity_variant_targ_rec?(genotype, record, genotypes) + return false unless @value12 =~ /heterozyg|homozyg/i + + @pos_gene = @value2&.scan(NEW_TARG_GENES_REGEX) + @pos_gene = @pos_gene&.flatten&.uniq + if @pos_gene.empty? + @pos_gene = @report&.scan(NEW_TARG_GENES_REGEX) + @pos_gene = @pos_gene&.flatten&.uniq + end + process_variant_rec(genotype, 2, record, genotypes) + true + end + + def variant_absent_targ_rec?(genotype, genotypes) + return false unless @value12 =~ /variant\sabsent|not\sdetected/i + + negative_gene = @report&.scan(NEW_TARG_GENES_REGEX) + negative_gene = negative_gene&.flatten&.uniq + process_status_genes(1, negative_gene, genotype, genotypes) + true + end + + def no_result_targ_rec?(genotype, genotypes) + return false unless @result =~ /No\sresult/i + + targ_gene = @diag_report&.scan(NEW_TARG_GENES_REGEX) + targ_gene = targ_gene&.flatten&.uniq + if targ_gene.size == 1 + process_status_genes(9, targ_gene, genotype, genotypes) + else + genotype_dup = genotype.dup + genotype_dup.add_status(9) + genotypes << genotype_dup + end + true + end + + def no_biallelic_targ_rec?(genotype, genotypes) + return false unless @result =~ /No\sbiallelic|No\sbi-allelic/ix + + targ_gene = @report&.scan(NEW_TARG_GENES_REGEX) + targ_gene = targ_gene&.flatten&.uniq + process_status_genes(4, targ_gene, genotype, genotypes) + true + end + + def cdna_het_variant_targ_rec?(genotype, record, genotypes) + return false unless @result =~ /c\.|het/ix + + @pos_gene = @result&.scan(NEW_TARG_GENES_REGEX) + @pos_gene = @pos_gene&.flatten&.uniq + if @pos_gene.empty? + @pos_gene = @report&.scan(NEW_TARG_GENES_REGEX) + @pos_gene = @pos_gene&.flatten&.uniq + end + process_variant_rec(genotype, 2, record, genotypes) + true + end + + def report_variant_targ_rec?(genotype, record, genotypes) + return false unless @report_result =~ /Tumour\sresult\sconf\sseq\s\+ve/ix && + @result =~ /No.*detected/ix + + @pos_gene = @report&.scan(NEW_TARG_GENES_REGEX) + @pos_gene = @pos_gene&.flatten&.uniq + @cdna_mutations = @report&.match(CDNA) + @exonic_mutations = @report&.match(EXON_VARIANT_REGEX) + if @cdna_mutations || @exonic_mutations process_variant_rec(genotype, 2, record, genotypes) - elsif @report_result =~ /Tumour\sresult\sconf\sseq\s\+ve/ix && @result =~ /No.*detected/ix - @pos_gene = @report&.scan(NEW_TARG_GENES_REGEX)&.flatten&.uniq - if @report&.match(CDNA) - @cdna_mutations = @report&.match(CDNA) - process_variant_rec(genotype, 2, record, genotypes) - elsif @report&.match(EXON_VARIANT_REGEX) - @exonic_mutations = @report&.match(EXON_VARIANT_REGEX) - process_variant_rec(genotype, 2, record, genotypes) - else - process_status_genes(1, @pos_gene, genotype, genotypes) - end - elsif @report_result =~ /pos|\+ve/ix && @result =~ /variant\sabsent/i - negative_gene = @report&.scan(NEW_TARG_GENES_REGEX)&.flatten&.uniq - process_status_genes(1, negative_gene, genotype, genotypes) - elsif @report_result !~ /pos|\+ve/ix && @result =~ /variant\sabsent|no.*detected/ix - negative_gene = @report&.scan(NEW_TARG_GENES_REGEX)&.flatten&.uniq - if negative_gene.empty? - if @report_result =~ /B1/ - negative_gene << 'BRCA1' - elsif @report_result =~ /B2/ - negative_gene << 'BRCA2' - end - negative_gene = @report_result&.scan(NEW_TARG_GENES_REGEX)&.flatten&.uniq - end - process_status_genes(1, negative_gene, genotype, genotypes) + else + process_status_genes(1, @pos_gene, genotype, genotypes) end - genotypes + true + end + + def positive_variant_absent_targ_rec?(genotype, genotypes) + return false unless @report_result =~ /pos|\+ve/ix && @result =~ /variant\sabsent/i + + negative_gene = @report&.scan(NEW_TARG_GENES_REGEX) + negative_gene = negative_gene&.flatten&.uniq + process_status_genes(1, negative_gene, genotype, genotypes) + true + end + + def non_positive_variant_absent_targ_rec?(genotype, genotypes) + return false unless @report_result !~ /pos|\+ve/ix && + @result =~ /variant\sabsent|no.*detected/ix + + negative_gene = @report&.scan(NEW_TARG_GENES_REGEX) + negative_gene = negative_gene&.flatten&.uniq || [] + if negative_gene.empty? + negative_gene = @report_result&.scan(NEW_TARG_GENES_REGEX) + negative_gene << malformed_brca_gene + negative_gene = negative_gene&.flatten&.uniq + end + process_status_genes(1, negative_gene, genotype, genotypes) + true end private @@ -354,6 +420,7 @@ def process_protein_impact(genotype, mutation) end end end + # rubocop:enable Metrics/ClassLength end end end From 69eff009d940e2891e1ecd93661819d40d837b21 Mon Sep 17 00:00:00 2001 From: shilpigoel Date: Mon, 3 Nov 2025 23:09:16 +0000 Subject: [PATCH 11/14] more fixes --- .../leeds/leeds_handler_new_format.rb | 39 ++++++++++++------- .../leeds/leeds_handler_new_format_test.rb | 5 --- 2 files changed, 24 insertions(+), 20 deletions(-) diff --git a/lib/import/brca/providers/leeds/leeds_handler_new_format.rb b/lib/import/brca/providers/leeds/leeds_handler_new_format.rb index 56302058..d40f932f 100644 --- a/lib/import/brca/providers/leeds/leeds_handler_new_format.rb +++ b/lib/import/brca/providers/leeds/leeds_handler_new_format.rb @@ -34,23 +34,26 @@ def process_fields(record) end # rubocop:enable Metrics/MethodLength - # rubocop:disable Metrics/AbcSize + # rubocop:disable Metrics/CyclomaticComplexity def should_process?(record) file_name = @batch.original_filename return true unless file_name =~ /Other/ix - if record.raw_fields['moleculartestingtype'] == 'Familial' && - (record.raw_fields['report'].scan(NEW_TARG_GENES_REGEX).size.positive? || - record.raw_fields['diagnosis_report'].scan(NEW_TARG_GENES_REGEX).size.positive?) && - record.raw_fields['diagnosis_report'] !~ /BAP1/i && - record.raw_fields['codingdnasequencechange'] !~ /MUTYH/i && - record.raw_fields['genotype'] !~ /prenatal/i - return true - end + fields = record.raw_fields + return false unless fields['moleculartestingtype'] == 'Familial' + + rep_scan = fields['report']&.scan(NEW_TARG_GENES_REGEX) + diag_scan = fields['diagnosis_report']&.scan(NEW_TARG_GENES_REGEX) + has_genes = rep_scan&.any? || diag_scan&.any? - false + return false unless has_genes + return false if fields['diagnosis_report'] =~ /BAP1/i + return false if fields['codingdnasequencechange'] =~ /MUTYH/i + return false if fields['genotype'] =~ /prenatal/i + + true end - # rubocop:enable Metrics/AbcSize + # rubocop:enable Metrics/CyclomaticComplexity # rubocop:disable Metrics/AbcSize def populate_variables(record) @@ -137,10 +140,10 @@ def zygosity_variant_targ_rec?(genotype, record, genotypes) return false unless @value12 =~ /heterozyg|homozyg/i @pos_gene = @value2&.scan(NEW_TARG_GENES_REGEX) - @pos_gene = @pos_gene&.flatten&.uniq + @pos_gene = @pos_gene&.flatten&.uniq || [] if @pos_gene.empty? @pos_gene = @report&.scan(NEW_TARG_GENES_REGEX) - @pos_gene = @pos_gene&.flatten&.uniq + @pos_gene = @pos_gene&.flatten&.uniq || [] end process_variant_rec(genotype, 2, record, genotypes) true @@ -183,10 +186,10 @@ def cdna_het_variant_targ_rec?(genotype, record, genotypes) return false unless @result =~ /c\.|het/ix @pos_gene = @result&.scan(NEW_TARG_GENES_REGEX) - @pos_gene = @pos_gene&.flatten&.uniq + @pos_gene = @pos_gene&.flatten&.uniq || [] if @pos_gene.empty? @pos_gene = @report&.scan(NEW_TARG_GENES_REGEX) - @pos_gene = @pos_gene&.flatten&.uniq + @pos_gene = @pos_gene&.flatten&.uniq || [] end process_variant_rec(genotype, 2, record, genotypes) true @@ -234,6 +237,12 @@ def non_positive_variant_absent_targ_rec?(genotype, genotypes) private + def malformed_brca_gene + return 'BRCA1' if @report_result =~ /\bB1\b/ + + 'BRCA2' if @report_result =~ /\bB2\b/ + end + def setup_derived_values(record) @zygosity = calc_zygosity @variantpathclass = cal_variantpathclass(record) diff --git a/test/lib/import/brca/providers/leeds/leeds_handler_new_format_test.rb b/test/lib/import/brca/providers/leeds/leeds_handler_new_format_test.rb index 6bd525a8..24e5ff95 100644 --- a/test/lib/import/brca/providers/leeds/leeds_handler_new_format_test.rb +++ b/test/lib/import/brca/providers/leeds/leeds_handler_new_format_test.rb @@ -10,11 +10,6 @@ def setup @logger = Import::Log.get_logger end - test 'moleculartestingtype' do - @handler.add_moleculartestingtype(@genotype, @record) - assert_equal 1, @genotype.attribute_map['moleculartestingtype'] - end - test 'process_failed_test_record' do failed_record = build_raw_record('pseudo_id1' => 'patient1') failed_record.raw_fields['genotype'] = 'R208_fail_FFPE' From a989dc75727754cd6f0bd2d571f01522fed6a760 Mon Sep 17 00:00:00 2001 From: shilpigoel Date: Wed, 5 Nov 2025 09:51:51 +0000 Subject: [PATCH 12/14] targeted tests and updates --- .../leeds/leeds_handler_new_format.rb | 6 +- .../bash/Import_all_brca_interactive.sh | 3 +- .../leeds/leeds_handler_new_format_test.rb | 207 ++++++++++++++++++ 3 files changed, 213 insertions(+), 3 deletions(-) diff --git a/lib/import/brca/providers/leeds/leeds_handler_new_format.rb b/lib/import/brca/providers/leeds/leeds_handler_new_format.rb index d40f932f..8480c315 100644 --- a/lib/import/brca/providers/leeds/leeds_handler_new_format.rb +++ b/lib/import/brca/providers/leeds/leeds_handler_new_format.rb @@ -90,9 +90,9 @@ def add_fs_moleculartestingtype(genotype, record) end def add_targ_moleculartestingtype(genotype) - if @report_result.match?(/conf/i) && @report_result.match?(/R240/i) + if @report_result.match?(/conf/i) || @report_result.match?(/R240/i) genotype.add_molecular_testing_type_strict(:diagnostic) - elsif @report_result.match?(/pred/i) && @report_result.match?(/R242/i) + elsif @report_result.match?(/pred/i) || @report_result.match?(/R242/i) genotype.add_molecular_testing_type_strict(:predictive) end end @@ -216,6 +216,8 @@ def positive_variant_absent_targ_rec?(genotype, genotypes) negative_gene = @report&.scan(NEW_TARG_GENES_REGEX) negative_gene = negative_gene&.flatten&.uniq + # only process second gene + negative_gene = [negative_gene[1]] if negative_gene.size == 2 process_status_genes(1, negative_gene, genotype, genotypes) true end diff --git a/lib/import/brca/scripts/bash/Import_all_brca_interactive.sh b/lib/import/brca/scripts/bash/Import_all_brca_interactive.sh index 0d9a9b28..09920c41 100755 --- a/lib/import/brca/scripts/bash/Import_all_brca_interactive.sh +++ b/lib/import/brca/scripts/bash/Import_all_brca_interactive.sh @@ -53,7 +53,8 @@ RR8 () { MBIS=$1 PROV='RR8' IFS=$'\n' -for x in $(find $DIRPATH/$FILEPATH -type f -name "*BRCA*.pseudo" -path "*/$PROV/*" \ +for x in $(find $DIRPATH/$FILEPATH -path "*/$PROV/*" -type f \ +\( -name "*BRCA*.pseudo" -o -type f -name "*Other*.pseudo" \) \ \( -path "*/202[5-9]/*" -o -path "*/203[0-9]/*" \) \ ! -name "bede6d1385c0ae9db4fe61fe9b07d58f86e2dc60_24.11.2021 to 31.03.2025_BRCA_DATA__2021_11_24__to__2025_03_31_b.xlsx.pseudo") do diff --git a/test/lib/import/brca/providers/leeds/leeds_handler_new_format_test.rb b/test/lib/import/brca/providers/leeds/leeds_handler_new_format_test.rb index 24e5ff95..3d53c111 100644 --- a/test/lib/import/brca/providers/leeds/leeds_handler_new_format_test.rb +++ b/test/lib/import/brca/providers/leeds/leeds_handler_new_format_test.rb @@ -218,6 +218,213 @@ def setup assert_includes excluded3, 'TP53' end + # Targeted testing (Familial) tests + test 'process_targeted_heterozygous_variant' do + targeted_record = build_raw_record('pseudo_id1' => 'patient_targ1') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['gene'] = 'BRCA1' + targeted_record.raw_fields['codingdnasequencechange'] = 'c.5266dup' + targeted_record.raw_fields['proteinimpact'] = 'p.(Gln1756fs)' + targeted_record.raw_fields['zygosity'] = 'Heterozygous' + targeted_record.raw_fields['variantpathclass'] = 'Pathogenic' + targeted_record.raw_fields['genotype'] = 'R242_pos_MLPA' + targeted_record.raw_fields['report'] = 'Testing for the familial BRCA1 variant c.5266dup.' + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + variant_genotype = res[0] + assert_equal 2, variant_genotype.attribute_map['teststatus'] + assert_equal 7, variant_genotype.attribute_map['gene'] # BRCA1 + assert_equal 'c.5266dup', variant_genotype.attribute_map['codingdnasequencechange'] + assert_equal 'p.Gln1756fs', variant_genotype.attribute_map['proteinimpact'] + assert_equal 5, variant_genotype.attribute_map['variantpathclass'] + assert_equal 1, variant_genotype.attribute_map['variantgenotype'] + end + + test 'process_targeted_homozygous_variant' do + targeted_record = build_raw_record('pseudo_id1' => 'patient_targ2') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['gene'] = 'BRCA2' + targeted_record.raw_fields['codingdnasequencechange'] = 'NM_007294.3:Exon 13 duplication' + targeted_record.raw_fields['zygosity'] = 'Homozygous' + targeted_record.raw_fields['variantpathclass'] = 'Pathogenic' + targeted_record.raw_fields['genotype'] = 'Familial_conf_seq_+ve_R240' + targeted_record.raw_fields['report'] = 'This individual is heterozygous for the germline familial pathogenic.' + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + variant_genotype = res[0] + assert_equal 2, variant_genotype.attribute_map['teststatus'] + assert_equal 8, variant_genotype.attribute_map['gene'] + assert_equal 2, variant_genotype.attribute_map['variantgenotype'] + assert_equal 1, variant_genotype.attribute_map['moleculartestingtype'] + assert_equal 'NM_007294.3', variant_genotype.attribute_map['referencetranscriptid'] + assert_equal '13', variant_genotype.attribute_map['exonintroncodonnumber'] + assert_equal 4, variant_genotype.attribute_map['sequencevarianttype'] + end + + test 'process_targeted_variant_absent' do + targeted_record = build_raw_record('pseudo_id1' => 'patient_targ3') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['codingdnasequencechange'] = 'NM_007294.3:Exon 13 duplication' + targeted_record.raw_fields['zygosity'] = 'Variant absent' + targeted_record.raw_fields['genotype'] = 'Familial testing negative' + targeted_record.raw_fields['report'] = 'Dosage analysis has shown no evidence of the familial pathogenic BRCA1 variant' + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + negative_genotype = res[0] + assert_equal 1, negative_genotype.attribute_map['teststatus'] + assert_equal 7, negative_genotype.attribute_map['gene'] # BRCA1 + end + + test 'process_targeted_no_result' do + targeted_record = build_raw_record('pseudo_id1' => 'patient_targ4') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['codingdnasequencechange'] = 'No result' + targeted_record.raw_fields['genotype'] = 'Fail/Results not required' + targeted_record.raw_fields['diagnosis_report'] = 'Germline pathogenic variants in CHEK2 have been reported in several studies' + targeted_record.raw_fields['report'] = 'No results were obtained from this sample despite repeated attempts.' + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + genotype = res[0] + assert_equal 9, genotype.attribute_map['teststatus'] + assert_equal 865, genotype.attribute_map['gene'] # BRCA1 + end + + test 'process_targeted_no_result_multiple_genes' do + targeted_record = build_raw_record('pseudo_id1' => 'patient_targ5') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['codingdnasequencechange'] = 'No result' + targeted_record.raw_fields['genotype'] = 'Familial testing' + targeted_record.raw_fields['diagnosis_report'] = 'Testing for BRCA1 and BRCA2 variants' + targeted_record.raw_fields['report'] = 'Unable to complete testing' + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + genotype = res[0] + assert_equal 9, genotype.attribute_map['teststatus'] + assert_nil genotype.attribute_map['gene'] + end + + test 'process_targeted_no_biallelic' do + targeted_record = build_raw_record('pseudo_id1' => 'patient_targ6') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['codingdnasequencechange'] = 'No bi-allelic inheritance of familial PALB2 variants' + targeted_record.raw_fields['genotype'] = 'FA familial C5 normal' + targeted_record.raw_fields['report'] = 'Sequence analysis indicates the absence of bi-allelic inheritance of the familial PALB2 variants' + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + genotype = res[0] + assert_equal 4, genotype.attribute_map['teststatus'] + assert_equal 3186, genotype.attribute_map['gene'] # PALB2 + end + + test 'process_targeted_cdna_het_variant' do + targeted_record = build_raw_record('pseudo_id1' => 'patient_targ7') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['codingdnasequencechange'] = 'BRCA1 exon 1 deletion heterozygote' + targeted_record.raw_fields['genotype'] = 'BRCA - Pred B1 C4/C5 MLPA pos' + targeted_record.raw_fields['report'] = 'This patient is heterozygous for the familial likely pathogenic deletion of BRCA1 exons 1A and 1B' + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + genotype = res[0] + assert_equal 2, genotype.attribute_map['teststatus'] + assert_equal 7, genotype.attribute_map['gene'] # BRCA1 + assert_equal '1', genotype.attribute_map['exonintroncodonnumber'] + assert_equal 3, genotype.attribute_map['sequencevarianttype'] + assert_equal 2, genotype.attribute_map['moleculartestingtype'] + end + + test 'process_targeted_report_variant' do + targeted_record = build_raw_record('pseudo_id1' => 'patient_targ8') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['codingdnasequencechange'] = 'No deletions/duplications detected' + targeted_record.raw_fields['genotype'] = 'Tumour result conf seq +ve' + targeted_record.raw_fields['report'] = 'Tumour testing for BRCA2 variant c.1234A>G detected in tumour' + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + genotype = res[0] + assert_equal 2, genotype.attribute_map['teststatus'] + assert_equal 8, genotype.attribute_map['gene'] # BRCA2 + assert_equal 'c.1234A>G', genotype.attribute_map['codingdnasequencechange'] + end + + test 'process_targeted_tumour_result_with_exon' do + targeted_record = build_raw_record('pseudo_id1' => 'patient_targ9') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['codingdnasequencechange'] = 'No germline variant detected' + targeted_record.raw_fields['genotype'] = 'Tumour result conf seq +ve' + targeted_record.raw_fields['report'] = 'Tumour testing for BRCA1. Deletion of exon 5 detected in tumour.' + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + genotype = res[0] + assert_equal 2, genotype.attribute_map['teststatus'] + assert_equal 7, genotype.attribute_map['gene'] # BRCA1 + assert_equal '5', genotype.attribute_map['exonintroncodonnumber'] + assert_equal 3, genotype.attribute_map['sequencevarianttype'] + assert_equal 1, genotype.attribute_map['moleculartestingtype'] + end + + test 'process_targeted_positive_variant_absent' do + targeted_record = build_raw_record('pseudo_id1' => 'patient_targ10') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['codingdnasequencechange'] = 'PALB2 variant absent' + targeted_record.raw_fields['genotype'] = 'BRCA - Pred B1 C4/C5 seq pos' + targeted_record.raw_fields['report'] = 'Analysis indicates that the familial pathogenic PALB2 variant c.3116del is absent in this patient.' + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + genotype = res[0] + assert_equal 1, genotype.attribute_map['teststatus'] + assert_equal 3186, genotype.attribute_map['gene'] + end + + test 'process_targeted_non_positive_variant_absent_with_brca1' do + targeted_record = build_raw_record('pseudo_id1' => 'patient_targ11') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['codingdnasequencechange'] = 'familial variant absent' + targeted_record.raw_fields['genotype'] = 'BRCA - Pred B1 C4/C5 MLPA neg' + targeted_record.raw_fields['report'] = 'MLPA analysis indicates that the familial pathogenic BRCA1 duplication of exon 13 is absent in this patient' + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + genotype = res[0] + assert_equal 1, genotype.attribute_map['teststatus'] + assert_equal 7, genotype.attribute_map['gene'] + end + + test 'process_targeted_non_positive_variant_absent_with_brca2' do + targeted_record = build_raw_record('pseudo_id1' => 'patient_targ12') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['codingdnasequencechange'] = 'No variant detected' + targeted_record.raw_fields['genotype'] = 'Result B2' + targeted_record.raw_fields['report'] = 'Testing completed' + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + genotype = res[0] + assert_equal 1, genotype.attribute_map['teststatus'] + assert_equal 8, genotype.attribute_map['gene'] # BRCA2 + end + private def clinical_json From 4cc176d06e2ef4231ddd1d6fcbb9724c42a13df8 Mon Sep 17 00:00:00 2001 From: shilpigoel Date: Wed, 5 Nov 2025 09:54:23 +0000 Subject: [PATCH 13/14] reverted not needed change --- lib/import/brca/scripts/bash/Import_all_brca_interactive.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/import/brca/scripts/bash/Import_all_brca_interactive.sh b/lib/import/brca/scripts/bash/Import_all_brca_interactive.sh index 09920c41..8eb9c955 100755 --- a/lib/import/brca/scripts/bash/Import_all_brca_interactive.sh +++ b/lib/import/brca/scripts/bash/Import_all_brca_interactive.sh @@ -15,7 +15,7 @@ OIFS="$IFS" IFS=$'\n' DIRPATH=$1 echo $DIRPATH -#DIRPATH=~/work/new_dms/data_management_system +#DIRPATH=~/work/data_management_system #FILEPATH="private/pseudonymised_data/updated_files/" FILEPATH=$2 echo $FILEPATH From b32117a0d5d41d8aeaa5406ae390b86652fa7762 Mon Sep 17 00:00:00 2001 From: shilpigoel Date: Thu, 13 Nov 2025 14:48:08 +0000 Subject: [PATCH 14/14] Review comment incorporated to fix varpathclass --- .../brca/providers/leeds/leeds_handler_new_format.rb | 11 ++++++----- lib/import/germline/genotype.rb | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/lib/import/brca/providers/leeds/leeds_handler_new_format.rb b/lib/import/brca/providers/leeds/leeds_handler_new_format.rb index 8480c315..e09a4b03 100644 --- a/lib/import/brca/providers/leeds/leeds_handler_new_format.rb +++ b/lib/import/brca/providers/leeds/leeds_handler_new_format.rb @@ -369,7 +369,7 @@ def add_geneticinheritance(genotype, _record) def cal_variantpathclass(record) variantpathclass = record.raw_fields['variantpathclass'] varclass = classify_variant_pathogenicity(variantpathclass) - varclass || classify_protein_impact(record.raw_fields['proteinimpact']) + varclass || classify_protein_impact end def classify_variant_pathogenicity(variantpathclass) @@ -383,18 +383,19 @@ def classify_variant_pathogenicity(variantpathclass) end end - def classify_protein_impact(protein_impact) - case protein_impact + def classify_protein_impact + require 'pry' + case @value1 when /C1/ 1 when /C2/ 2 - when /C3/ - 3 when /\(cold\sC3\)/i 8 when /\(hot\sC3\)/i 9 + when /C3/ + 3 end end diff --git a/lib/import/germline/genotype.rb b/lib/import/germline/genotype.rb index ac9c3370..c9aabe41 100644 --- a/lib/import/germline/genotype.rb +++ b/lib/import/germline/genotype.rb @@ -340,7 +340,7 @@ def add_variant_impact(impact) end def add_variant_class(variant) - if variant.is_a?(Integer) && variant >= 1 && variant <= 7 + if variant.is_a?(Integer) && variant >= 1 && variant <= 9 @attribute_map['variantpathclass'] = variant elsif variant.is_a?(String) if VARIANT_CLASS_MAP[variant.downcase.strip]