diff --git a/lib/import/brca/core/brca_handler_mapping.rb b/lib/import/brca/core/brca_handler_mapping.rb index 583d87b4..1d778372 100644 --- a/lib/import/brca/core/brca_handler_mapping.rb +++ b/lib/import/brca/core/brca_handler_mapping.rb @@ -6,7 +6,8 @@ module Core # Provides the handler appropriate for the dataformat from each center class BrcaHandlerMapping HANDLER_MAPPING = { - 'RR8' => Import::Brca::Providers::Leeds::LeedsHandlerNew, + 'RR8' => Import::Brca::Providers::Leeds::LeedsHandlerNewFormat, + 'RR8_2' => Import::Brca::Providers::Leeds::LeedsHandlerOld, 'RNZ' => Import::Brca::Providers::Salisbury::SalisburyHandler, 'RVJ' => Import::Brca::Providers::Bristol::BristolHandler, 'RTD' => Import::Brca::Providers::Newcastle::NewcastleHandler, diff --git a/lib/import/brca/core/genotype_brca.rb b/lib/import/brca/core/genotype_brca.rb index 0235fd57..8506020c 100644 --- a/lib/import/brca/core/genotype_brca.rb +++ b/lib/import/brca/core/genotype_brca.rb @@ -46,6 +46,7 @@ class GenotypeBrca < Import::Germline::Genotype 'NTHL1' => 3108, 'POLD1' => 3408, 'POLE' => 5000, + 'POT1' => 5001, 'SDHB' => 68, 'VHL' => 83 }.freeze @@ -86,6 +87,7 @@ class GenotypeBrca < Import::Germline::Genotype (?NTHL1)| (?POLD1)| (?POLE)| + (?POT1)| (?SDHB)| (?VHL)/ix # Added by Francesco @@ -114,7 +116,7 @@ def add_gene(brca_input) def process_integer_imput(brca_input) if [7, 8, 72, 79, 451, 865, 3186, 2744, 1432, 2804, 2808, 3394, 62, 76, - 590, 2912, 3615, 3616, 2850, 54, 55, 74, 4952, 18, 20, 794].include? brca_input + 590, 2912, 3615, 3616, 2850, 54, 55, 74, 4952, 18, 20, 794, 5001].include? brca_input @attribute_map['gene'] = brca_input @logger.debug "SUCCESSFUL gene parse for #{brca_input}" elsif (1..2).cover? brca_input diff --git a/lib/import/brca/providers/leeds/leeds_handler_new_format.rb b/lib/import/brca/providers/leeds/leeds_handler_new_format.rb new file mode 100644 index 00000000..e09a4b03 --- /dev/null +++ b/lib/import/brca/providers/leeds/leeds_handler_new_format.rb @@ -0,0 +1,439 @@ +module Import + module Brca + module Providers + module Leeds + # Process Leeds-specific record details into generalized internal genotype format for > 2025 files + # rubocop:disable Metrics/ClassLength + class LeedsHandlerNewFormat < Import::Germline::ProviderHandler + include Import::Helpers::Brca::Providers::Rr8::Constants + + # rubocop:disable Metrics/MethodLength + def process_fields(record) + # check if should process record from Other Cancer file + return unless should_process?(record) + + genotype = Import::Brca::Core::GenotypeBrca.new(record) + genotype.add_passthrough_fields(record.mapped_fields, record.raw_fields, + PASS_THROUGH_FIELDS) + genotype.attribute_map['organisationcode_testresult'] = '699C0' + populate_variables(record) + + process_test_scope(genotype) + setup_derived_values(record) + genotypes = [] + if genotype.full_screen? + add_fs_moleculartestingtype(genotype, record) + @genes_panel = get_genes_panel(record) + res = process_fs_rec(genotype, record, genotypes) + elsif genotype.targeted? + add_targ_moleculartestingtype(genotype) + res = process_targ_rec(genotype, record, genotypes) + end + + res.each { |cur_genotype| @persister.integrate_and_store(cur_genotype) } + end + # rubocop:enable Metrics/MethodLength + + # rubocop:disable Metrics/CyclomaticComplexity + def should_process?(record) + file_name = @batch.original_filename + return true unless file_name =~ /Other/ix + + fields = record.raw_fields + return false unless fields['moleculartestingtype'] == 'Familial' + + rep_scan = fields['report']&.scan(NEW_TARG_GENES_REGEX) + diag_scan = fields['diagnosis_report']&.scan(NEW_TARG_GENES_REGEX) + has_genes = rep_scan&.any? || diag_scan&.any? + + return false unless has_genes + return false if fields['diagnosis_report'] =~ /BAP1/i + return false if fields['codingdnasequencechange'] =~ /MUTYH/i + return false if fields['genotype'] =~ /prenatal/i + + true + end + # rubocop:enable Metrics/CyclomaticComplexity + + # rubocop:disable Metrics/AbcSize + def populate_variables(record) + record.raw_fields['gene']&.gsub!('CHEK 2', 'CHEK2') + @report = record.raw_fields['report'] + @moltestingtype = record.raw_fields['moleculartestingtype'] + @value1 = record.raw_fields['proteinimpact'] + @value12 = record.raw_fields['zygosity'] + @report_result = record.raw_fields['genotype'] + @value2 = record.raw_fields['gene'] + @result = record.raw_fields['codingdnasequencechange'] + @diag_report = record.raw_fields['diagnosis_report'] + @pos_gene = nil + @variantpathclass = nil + @cdna_mutations = nil + @exonic_mutations = nil + @zygosity = nil + end + # rubocop:enable Metrics/AbcSize + + def process_test_scope(genotype) + if @moltestingtype == 'Familial' + genotype.add_test_scope(:targeted_mutation) + else + genotype.add_test_scope(:full_screen) + end + end + + def add_fs_moleculartestingtype(genotype, record) + indication_catgeory = record.raw_fields['indicationcategory'] + return unless %w[R207 R444].include? indication_catgeory + + genotype.add_molecular_testing_type_strict(:diagnostic) + end + + def add_targ_moleculartestingtype(genotype) + if @report_result.match?(/conf/i) || @report_result.match?(/R240/i) + genotype.add_molecular_testing_type_strict(:diagnostic) + elsif @report_result.match?(/pred/i) || @report_result.match?(/R242/i) + genotype.add_molecular_testing_type_strict(:predictive) + end + end + + def get_genes_panel(_record) + genes = [] + genes << @diag_report&.scan(NEW_FORMAT_GENES) + genes << @report&.scan(NEW_FORMAT_GENES) + genes << @moltestingtype&.scan(NEW_FORMAT_GENES) + genes = genes.compact_blank + + r208_matches = @moltestingtype&.scan(/R208.1/i) + genes << %w[ATM BRCA1 BRCA2 CHEK2 PALB2] if genes.empty? && r208_matches&.size&.positive? + + genes.flatten.uniq - exclude_genes + end + + def exclude_genes + exclude_genes = [] + exclude_genes << @report&.scan(/#{NEW_FORMAT_GENES}\sanalysis\shas\snot\sbeen\sperformed/ix) + exclude_genes << @report&.scan(/#{NEW_FORMAT_GENES}\stesting\shas\sbeen\sreported\spreviously/ix) + exclude_genes << @report&.scan(/#{NEW_FORMAT_GENES}[a-zA-Z0-9\s]+Li\sFraumeni\ssyndrome/ix) + exclude_genes.flatten.uniq + end + + def process_fs_rec(genotype, record, genotypes) + process_genotype_priorities(genotype, record, genotypes) + end + + def process_targ_rec(genotype, record, genotypes) + @pos_gene = [] + return genotypes if zygosity_variant_targ_rec?(genotype, record, genotypes) + return genotypes if variant_absent_targ_rec?(genotype, genotypes) + return genotypes if no_result_targ_rec?(genotype, genotypes) + return genotypes if no_biallelic_targ_rec?(genotype, genotypes) + return genotypes if cdna_het_variant_targ_rec?(genotype, record, genotypes) + return genotypes if report_variant_targ_rec?(genotype, record, genotypes) + return genotypes if positive_variant_absent_targ_rec?(genotype, genotypes) + return genotypes if non_positive_variant_absent_targ_rec?(genotype, genotypes) + + genotypes + end + + def zygosity_variant_targ_rec?(genotype, record, genotypes) + return false unless @value12 =~ /heterozyg|homozyg/i + + @pos_gene = @value2&.scan(NEW_TARG_GENES_REGEX) + @pos_gene = @pos_gene&.flatten&.uniq || [] + if @pos_gene.empty? + @pos_gene = @report&.scan(NEW_TARG_GENES_REGEX) + @pos_gene = @pos_gene&.flatten&.uniq || [] + end + process_variant_rec(genotype, 2, record, genotypes) + true + end + + def variant_absent_targ_rec?(genotype, genotypes) + return false unless @value12 =~ /variant\sabsent|not\sdetected/i + + negative_gene = @report&.scan(NEW_TARG_GENES_REGEX) + negative_gene = negative_gene&.flatten&.uniq + process_status_genes(1, negative_gene, genotype, genotypes) + true + end + + def no_result_targ_rec?(genotype, genotypes) + return false unless @result =~ /No\sresult/i + + targ_gene = @diag_report&.scan(NEW_TARG_GENES_REGEX) + targ_gene = targ_gene&.flatten&.uniq + if targ_gene.size == 1 + process_status_genes(9, targ_gene, genotype, genotypes) + else + genotype_dup = genotype.dup + genotype_dup.add_status(9) + genotypes << genotype_dup + end + true + end + + def no_biallelic_targ_rec?(genotype, genotypes) + return false unless @result =~ /No\sbiallelic|No\sbi-allelic/ix + + targ_gene = @report&.scan(NEW_TARG_GENES_REGEX) + targ_gene = targ_gene&.flatten&.uniq + process_status_genes(4, targ_gene, genotype, genotypes) + true + end + + def cdna_het_variant_targ_rec?(genotype, record, genotypes) + return false unless @result =~ /c\.|het/ix + + @pos_gene = @result&.scan(NEW_TARG_GENES_REGEX) + @pos_gene = @pos_gene&.flatten&.uniq || [] + if @pos_gene.empty? + @pos_gene = @report&.scan(NEW_TARG_GENES_REGEX) + @pos_gene = @pos_gene&.flatten&.uniq || [] + end + process_variant_rec(genotype, 2, record, genotypes) + true + end + + def report_variant_targ_rec?(genotype, record, genotypes) + return false unless @report_result =~ /Tumour\sresult\sconf\sseq\s\+ve/ix && + @result =~ /No.*detected/ix + + @pos_gene = @report&.scan(NEW_TARG_GENES_REGEX) + @pos_gene = @pos_gene&.flatten&.uniq + @cdna_mutations = @report&.match(CDNA) + @exonic_mutations = @report&.match(EXON_VARIANT_REGEX) + if @cdna_mutations || @exonic_mutations + process_variant_rec(genotype, 2, record, genotypes) + else + process_status_genes(1, @pos_gene, genotype, genotypes) + end + true + end + + def positive_variant_absent_targ_rec?(genotype, genotypes) + return false unless @report_result =~ /pos|\+ve/ix && @result =~ /variant\sabsent/i + + negative_gene = @report&.scan(NEW_TARG_GENES_REGEX) + negative_gene = negative_gene&.flatten&.uniq + # only process second gene + negative_gene = [negative_gene[1]] if negative_gene.size == 2 + process_status_genes(1, negative_gene, genotype, genotypes) + true + end + + def non_positive_variant_absent_targ_rec?(genotype, genotypes) + return false unless @report_result !~ /pos|\+ve/ix && + @result =~ /variant\sabsent|no.*detected/ix + + negative_gene = @report&.scan(NEW_TARG_GENES_REGEX) + negative_gene = negative_gene&.flatten&.uniq || [] + if negative_gene.empty? + negative_gene = @report_result&.scan(NEW_TARG_GENES_REGEX) + negative_gene << malformed_brca_gene + negative_gene = negative_gene&.flatten&.uniq + end + process_status_genes(1, negative_gene, genotype, genotypes) + true + end + + private + + def malformed_brca_gene + return 'BRCA1' if @report_result =~ /\bB1\b/ + + 'BRCA2' if @report_result =~ /\bB2\b/ + end + + def setup_derived_values(record) + @zygosity = calc_zygosity + @variantpathclass = cal_variantpathclass(record) + @cdna_mutations = @result&.match(CDNA) || @value1&.match(CDNA) + @exonic_mutations = @result&.match(EXON_VARIANT_REGEX) + @protein_impact = @value1&.match(PROTEIN_REGEX) || @result&.match(PROTEIN_REGEX) + @refid = @result&.match(REF_TRANSCRIPT_ID) + end + + def calc_zygosity + case @value12 + when /het/i + 1 + when /homo/i + 2 + end + end + + def process_genotype_priorities(genotype, record, genotypes) + geno = record.raw_fields['genotype'] + + # priority based extracting details + return genotypes if fail_rec?(geno, genotype, genotypes) + return genotypes if protein_impact_variant_rec?(genotype, record, genotypes) + return genotypes if normal_result_rec?(genotype, genotypes) + return genotypes if gene_variant_rec?(genotype, record, genotypes) + return genotypes if result_variant_rec?(genotype, record, genotypes) + return genotypes if normal_report_result?(geno, genotype, genotypes) + + first_of_report_variant_rec?(genotype, record, genotypes) + end + + def fail_rec?(geno, genotype, genotypes) + return false unless geno =~ /fail/i && geno !~ /dosage/i + + process_status_genes(9, @genes_panel, genotype, genotypes) + true + end + + def protein_impact_variant_rec?(genotype, record, genotypes) + return false unless @value2.nil? && @value1 =~ CDNA_REGEX + + gene = [] + gene << 'PALB2' if @value1 =~ /ALB2/ + gene << @value1&.scan(NEW_FORMAT_GENES) + @pos_gene = gene.flatten.uniq + teststatus = @value1 =~ /C(1|2)/ ? 10 : 2 + + process_variant_rec(genotype, teststatus, record, genotypes) if @pos_gene.present? + negative_genes = @genes_panel - @pos_gene + process_status_genes(1, negative_genes, genotype, genotypes) + true + end + + def normal_result_rec?(genotype, genotypes) + return false unless @result =~ /No.*detected/i || @result =~ /No result - dosage fail'/i || @result == '-' + + negative_genes = @genes_panel + process_status_genes(1, negative_genes, genotype, genotypes) + true + end + + def gene_variant_rec?(genotype, record, genotypes) + scanned_genes = @value2&.scan(NEW_FORMAT_GENES) + @pos_gene = scanned_genes&.flatten&.uniq + return false if @pos_gene.blank? + + process_variant_rec(genotype, 2, record, genotypes) + negative_genes = @genes_panel - @pos_gene + process_status_genes(1, negative_genes, genotype, genotypes) + true + end + + def result_variant_rec?(genotype, record, genotypes) + scanned_result = @result&.scan(NEW_FORMAT_GENES) + @pos_gene = scanned_result&.flatten&.uniq + return false if @pos_gene.blank? + + process_variant_rec(genotype, 2, record, genotypes) + negative_genes = @genes_panel - @pos_gene + process_status_genes(1, negative_genes, genotype, genotypes) + true + end + + def normal_report_result?(geno, genotype, genotypes) + return false unless geno =~ /normal/i + + process_status_genes(1, @genes_panel, genotype, genotypes) + true + end + + def first_of_report_variant_rec?(genotype, record, genotypes) + return genotypes unless @report =~ /.*heterozygous\s+for.*pathogenic\s*#{NEW_FORMAT_GENES}/ix + + @pos_gene = [$LAST_MATCH_INFO[:gene]] + if @pos_gene.present? + process_variant_rec(genotype, 2, record, genotypes) + negative_genes = @genes_panel - @pos_gene + process_status_genes(1, negative_genes, genotype, genotypes) + end + genotypes + end + + def process_variant_rec(genotype, status, record, genotypes) + genotype_dup = genotype.dup + add_geneticinheritance(genotype_dup, record) + genotype_dup.add_gene(@pos_gene[0]) + genotype_dup.add_zygosity(@zygosity) + process_cdna_variant(genotype_dup, @cdna_mutations) if @cdna_mutations.present? + process_protein_impact(genotype_dup, @protein_impact) if @protein_impact.present? + process_exonic_variant(genotype_dup, @exonic_mutations) if @exonic_mutations.present? + genotype_dup.add_referencetranscriptid(@refid.to_s) if @refid.present? + genotype_dup.add_variant_class(@variantpathclass) + genotype_dup.add_status(status) + genotypes << genotype_dup + end + + def add_geneticinheritance(genotype, _record) + genotype.attribute_map['geneticinheritance'] = @value12 =~ /mosaic/i || @value1 =~ /VAF/ ? 6 : 4 + end + + def cal_variantpathclass(record) + variantpathclass = record.raw_fields['variantpathclass'] + varclass = classify_variant_pathogenicity(variantpathclass) + varclass || classify_protein_impact + end + + def classify_variant_pathogenicity(variantpathclass) + case variantpathclass + when /Likely\spathogenic/i + 4 + when /Pathogenic/i + 5 + when /Uncertain\ssignificance/i + 3 + end + end + + def classify_protein_impact + require 'pry' + case @value1 + when /C1/ + 1 + when /C2/ + 2 + when /\(cold\sC3\)/i + 8 + when /\(hot\sC3\)/i + 9 + when /C3/ + 3 + end + end + + def process_status_genes(status, negative_genes, genotype, genotypes) + negative_genes&.each do |gene| + genotype_dup = genotype.dup + genotype_dup.add_gene(gene) + genotype_dup.add_status(status) + genotypes << genotype_dup + end + end + + def process_exonic_variant(genotype, mutation) + return if mutation[:exons].blank? + + genotype.add_exon_location(mutation[:exons]) + genotype.add_variant_type(mutation[:variant]) + @logger.debug "SUCCESSFUL exon variant parse for: #{mutation}" + end + + def process_cdna_variant(genotype, mutation) + return if mutation[:cdna].blank? + + genotype.add_gene_location(mutation[:cdna]) + @logger.debug "SUCCESSFUL cdna change parse for: #{mutation}" + end + + def process_protein_impact(genotype, mutation) + if mutation[:impact].present? + genotype.add_protein_impact(mutation[:impact]) + @logger.debug "SUCCESSFUL protein parse for: #{mutation[:impact]}" + else + @logger.debug "FAILED protein parse for: #{mutation}" + end + end + end + # rubocop:enable Metrics/ClassLength + end + end + end +end diff --git a/lib/import/brca/providers/leeds/leeds_handler_new.rb b/lib/import/brca/providers/leeds/leeds_handler_old.rb similarity index 98% rename from lib/import/brca/providers/leeds/leeds_handler_new.rb rename to lib/import/brca/providers/leeds/leeds_handler_old.rb index 4dff9a05..4927a11e 100644 --- a/lib/import/brca/providers/leeds/leeds_handler_new.rb +++ b/lib/import/brca/providers/leeds/leeds_handler_old.rb @@ -6,7 +6,7 @@ module Brca module Providers module Leeds # Process Leeds-specific record details into generalized internal genotype format - class LeedsHandlerNew < Import::Germline::ProviderHandler + class LeedsHandlerOld < Import::Germline::ProviderHandler include Import::Helpers::Brca::Providers::Rr8::Constants def process_fields(record) @@ -47,6 +47,9 @@ def populate_genotype(record) process_genetictestcope(genotype, record) assign_teststatus(genotype, record) res = process_variants_from_record(genotype, record) + # correcting ebatch provider and registry to RR8 (from RR8_2) to allow data to persist in the database + @batch.provider = 'RR8' + @batch.registryid = 'RR8' res.each { |cur_genotype| @persister.integrate_and_store(cur_genotype) } end diff --git a/lib/import/brca/scripts/bash/Import_all_brca_interactive.sh b/lib/import/brca/scripts/bash/Import_all_brca_interactive.sh index ccc194cf..8eb9c955 100755 --- a/lib/import/brca/scripts/bash/Import_all_brca_interactive.sh +++ b/lib/import/brca/scripts/bash/Import_all_brca_interactive.sh @@ -53,14 +53,31 @@ RR8 () { MBIS=$1 PROV='RR8' IFS=$'\n' +for x in $(find $DIRPATH/$FILEPATH -path "*/$PROV/*" -type f \ +\( -name "*BRCA*.pseudo" -o -type f -name "*Other*.pseudo" \) \ +\( -path "*/202[5-9]/*" -o -path "*/203[0-9]/*" \) \ +! -name "bede6d1385c0ae9db4fe61fe9b07d58f86e2dc60_24.11.2021 to 31.03.2025_BRCA_DATA__2021_11_24__to__2025_03_31_b.xlsx.pseudo") +do +IFS="$OIFS" +$BRAKE import:brca fname="$(echo "$x" | sed -e 's:.*pseudonymised_data/\(.*\):\1:')" prov_code=$PROV +done +} + +# To handle pseudo files before 2025 +RR8_2 () { +MBIS=$1 +PROV='RR8' +PROV_OLD_FILE='RR8_2' +IFS=$'\n' for x in $(find $DIRPATH/$FILEPATH -type f -name "*.pseudo" -path "*/$PROV/*" \ -not -path "*/2017-03-17/*" \ +-not -path "*/2025/*" \ ! -name "3a4d3dc703789864fa6d2b8f5d9fe60749205979_01.01.2013 to 30.09.2018_010113_300918.xlsx.pseudo" \ ! -name "*MMR*" \ ! -name "*Colorectal*") do IFS="$OIFS" -$BRAKE import:brca fname="$(echo "$x" | sed -e 's:.*pseudonymised_data/\(.*\):\1:')" prov_code=$PROV +$BRAKE import:brca fname="$(echo "$x" | sed -e 's:.*pseudonymised_data/\(.*\):\1:')" prov_code=$PROV_OLD_FILE done } @@ -78,7 +95,6 @@ $BRAKE import:brca fname="$(echo "$x" | sed -e 's:.*pseudonymised_data/\(.*\):\1 done } - RX1 () { MBIS=$1 PROV='RX1' @@ -256,6 +272,6 @@ done } -RTD; RQ3; RR8; RNZ; RVJ; RX1; RCU; RJ1; RGT; RPY; R0A; RJ7; RJ7_2 ; RTH; R1K; RP4; REP +RTD; RQ3; RR8; RR8_2; RNZ; RVJ; RX1; RCU; RJ1; RGT; RPY; R0A; RJ7; RJ7_2 ; RTH; R1K; RP4; REP diff --git a/lib/import/database_wrappers/genetic_sequence_variant.rb b/lib/import/database_wrappers/genetic_sequence_variant.rb index 50cc664e..635f15f9 100644 --- a/lib/import/database_wrappers/genetic_sequence_variant.rb +++ b/lib/import/database_wrappers/genetic_sequence_variant.rb @@ -15,7 +15,6 @@ def initialize(genotype) %w[geneticsequencevariantid genetic_test_result_id humangenomebuild - referencetranscriptid genomicchange clinvarid cosmicid diff --git a/lib/import/germline/genotype.rb b/lib/import/germline/genotype.rb index ac9c3370..c9aabe41 100644 --- a/lib/import/germline/genotype.rb +++ b/lib/import/germline/genotype.rb @@ -340,7 +340,7 @@ def add_variant_impact(impact) end def add_variant_class(variant) - if variant.is_a?(Integer) && variant >= 1 && variant <= 7 + if variant.is_a?(Integer) && variant >= 1 && variant <= 9 @attribute_map['variantpathclass'] = variant elsif variant.is_a?(String) if VARIANT_CLASS_MAP[variant.downcase.strip] diff --git a/lib/import/helpers/brca/providers/rr8/constants.rb b/lib/import/helpers/brca/providers/rr8/constants.rb index 2e167071..fc2b056d 100644 --- a/lib/import/helpers/brca/providers/rr8/constants.rb +++ b/lib/import/helpers/brca/providers/rr8/constants.rb @@ -123,6 +123,11 @@ module Constants EPCAM|FH|FLCN|GREM1|MET|MLH1|MSH2|MSH6|MUTYH|NTHL1|PALB2| PMS2|POLD1|POLE|PTEN|RAD51C|RAD51D|SDHB|SMAD4|STK11|TP53|VHL'.freeze + NEW_FORMAT_GENES = /(?ATM|BRCA1|BRCA2|BRIP1|CHEK2|MLH1|MSH2|MSH6|PALB2|POT1|PTEN| + RAD51C|RAD51D|TP53)/x + + NEW_TARG_GENES_REGEX = /(?BRCA1|BRCA2|BRIP1|CHEK2|PALB2|RAD51C|RAD51D)/x + # rubocop:disable Lint/MixedRegexpCaptureTypes BRCA_REGEX = /(?#{GENES})/ix @@ -134,9 +139,11 @@ module Constants HETEROZYGOUS_GENE_REGEX = /heterozygous[\w\s]+(?#{GENES})[\w\s]+/ix CDNA_REGEX = /c\.(?[\w+>*\-]+)?[\w\s.]+/ix + CDNA = /c\.(?[\w.+>*\-]+)/ix TARG_GENE_REGEX = /(?#{GENES})[\w\s]+(c\.(?[\w+>*\-]+)?[\w\s.]+|exon)/ix + GENE_CDNA_PROTEIN = /(?#{GENES})\s+c\.(?[\w.+>*\-]+)\s+p\.?\(?(?\w+)\)?/ix PROTEIN_REGEX = /\(?p\.\(?(?\w+)\)?/ix EXON_VARIANT_REGEX = /(?del|dup|ins).+ex(on)?s?\s? @@ -149,6 +156,8 @@ module Constants ex(on)?s?\s?(?[0-9]+(\sto\s[0-9]+)?)\s (?del|dup|ins)| x(?[0-9+-? ]+)+(?del|dup|ins)/ix + + REF_TRANSCRIPT_ID = /NM_\d{6}\.\d(?=:)/ix # rubocop:enable Lint/MixedRegexpCaptureTypes end end diff --git a/test/fixtures/zproviders.yml b/test/fixtures/zproviders.yml index a6dddb88..45d7add5 100644 --- a/test/fixtures/zproviders.yml +++ b/test/fixtures/zproviders.yml @@ -8,3 +8,5 @@ zprovider_r0a: zproviderid: R0A zprovider_rj7: zproviderid: RJ7 +zprovider_rr8: + zproviderid: RR8 diff --git a/test/lib/import/brca/providers/leeds/leeds_handler_new_format_test.rb b/test/lib/import/brca/providers/leeds/leeds_handler_new_format_test.rb new file mode 100644 index 00000000..3d53c111 --- /dev/null +++ b/test/lib/import/brca/providers/leeds/leeds_handler_new_format_test.rb @@ -0,0 +1,480 @@ +require 'test_helper' + +class LeedsHandlerNewFormatTest < ActiveSupport::TestCase + def setup + @record = build_raw_record('pseudo_id1' => 'bob') + @genotype = Import::Brca::Core::GenotypeBrca.new(@record) + @importer_stdout, @importer_stderr = capture_io do + @handler = Import::Brca::Providers::Leeds::LeedsHandlerNewFormat.new(EBatch.new) + end + @logger = Import::Log.get_logger + end + + test 'process_failed_test_record' do + failed_record = build_raw_record('pseudo_id1' => 'patient1') + failed_record.raw_fields['genotype'] = 'R208_fail_FFPE' + failed_record.raw_fields['report'] = 'RESULT: No results were obtained for this sample.' + + res = @handler.process_fields(failed_record) + assert_equal 1, res.size + res.each do |genotype| + assert_equal 9, genotype.attribute_map['teststatus'] + end + end + + test 'process_normal_result_record' do + res = @handler.process_fields(@record) + assert_equal 1, res.size + assert_equal 1, res[0].attribute_map['teststatus'] # normal + assert_equal 3186, res[0].attribute_map['gene'] # PALB2 + end + + test 'process_variant_rec' do + variant_record = build_raw_record('pseudo_id1' => 'patient2') + variant_record.raw_fields['gene'] = nil + variant_record.raw_fields['codingdnasequencechange'] = 'No pathogenic variants detected' + variant_record.raw_fields['proteinimpact'] = 'BRCA2 c.2698A>G het (C2)' + variant_record.raw_fields['zygosity'] = nil + variant_record.raw_fields['variantpathclass'] = nil + variant_record.raw_fields['report'] = 'This patient has been screened for variants in the following cancer predisposing genes by sequence and dosage analysis:"\ + " \n\n\n\nBRCA1, BRCA2, BRIP1, MLH1, MSH2, MSH6, PALB2, RAD51C, RAD51D.\n\n\n\nNo pathogenic variant was identified' + + res = @handler.process_fields(variant_record) + assert_equal 9, res.size + variant_genotype = res.find { |g| g.attribute_map['gene'] == 8 } # BRCA2 + assert_equal 10, variant_genotype.attribute_map['teststatus'] + assert_equal 'c.2698A>G', variant_genotype.attribute_map['codingdnasequencechange'] + assert_nil variant_genotype.attribute_map['proteinimpact'] + assert_equal 2, variant_genotype.attribute_map['variantpathclass'] + assert_equal 4, variant_genotype.attribute_map['geneticinheritance'] + + res.each do |genotype| + next if genotype.attribute_map['gene'] == 8 + + assert_equal 1, genotype.attribute_map['teststatus'] + end + end + + test 'process_multi_gene_panel_r208.1' do + panel_record = build_raw_record('pseudo_id1' => 'patient3') + panel_record.raw_fields['moleculartestingtype'] = 'R208.1' + panel_record.raw_fields['report'] = 'Results are normal' + panel_record.raw_fields['diagnosis_report'] = 'No variant found' + + res = @handler.process_fields(panel_record) + + # Should have all genes from R208.1 panel + expected_genes = %w[ATM BRCA1 BRCA2 CHEK2 PALB2] + assert_equal expected_genes.size, res.size + assert_equal 451, res[0].attribute_map['gene'] # ATM + assert_equal 7, res[1].attribute_map['gene'] # BRCA1 + assert_equal 8, res[2].attribute_map['gene'] # BRCA2 + assert_equal 865, res[3].attribute_map['gene'] # CHEK2 + assert_equal 3186, res[4].attribute_map['gene'] # PALB2 + end + + test 'process_gene_variant_rec' do + gene_variant_rec = build_raw_record('pseudo_id1' => 'patient4') + gene_variant_rec.raw_fields['gene'] = 'ATM' + gene_variant_rec.raw_fields['codingdnasequencechange'] = 'c.8156del' + gene_variant_rec.raw_fields['proteinimpact'] = 'p.(Arg2719fs)' + gene_variant_rec.raw_fields['zygosity'] = 'Mosaic' + gene_variant_rec.raw_fields['variantpathclass'] = 'Likely pathogenic' + gene_variant_rec.raw_fields['report'] = 'This patient has been screened for variants in the ' \ + 'following cancer predisposing genes by sequence and dosage analysis: \n\nATM*, BRCA1, BRCA2, ' \ + 'BRIP1, CHEK2*, MLH1, MSH2, MSH6, PALB2, RAD51C, RAD51D.\n\n\n\nThe likely pathogenic ' \ + 'ATM variant c.8156del p.(Arg2719fs)' + + res = @handler.process_fields(gene_variant_rec) + assert_equal 11, res.size + + variant_genotype = res.find { |g| g.attribute_map['gene'] == 451 } # ATM + assert_equal 2, variant_genotype.attribute_map['teststatus'] + assert_equal 'c.8156del', variant_genotype.attribute_map['codingdnasequencechange'] + assert_equal 'p.Arg2719fs', variant_genotype.attribute_map['proteinimpact'] + assert_equal 4, variant_genotype.attribute_map['variantpathclass'] + assert_equal 6, variant_genotype.attribute_map['geneticinheritance'] + + res.each do |genotype| + next if genotype.attribute_map['gene'] == 451 + + assert_equal 1, genotype.attribute_map['teststatus'] + end + end + + test 'process_result_variant_rec' do + result_variant_rec = build_raw_record('pseudo_id1' => 'patient4') + result_variant_rec.raw_fields['moleculartestingtype'] = 'R208.1' + result_variant_rec.raw_fields['genotype'] = 'No report required' + result_variant_rec.raw_fields['gene'] = nil + result_variant_rec.raw_fields['codingdnasequencechange'] = 'BRCA1 c.4065_4068del heterozygote' + result_variant_rec.raw_fields['proteinimpact'] = nil + result_variant_rec.raw_fields['zygosity'] = nil + result_variant_rec.raw_fields['variantpathclass'] = nil + result_variant_rec.raw_fields['report'] = 'Reason:Reported under a different indication.' + + res = @handler.process_fields(result_variant_rec) + assert_equal 2, res.size + variant_genotype = res[0] + assert_equal 2, variant_genotype.attribute_map['teststatus'] + assert_equal 7, variant_genotype.attribute_map['gene'] + assert_equal 'c.4065_4068del', variant_genotype.attribute_map['codingdnasequencechange'] + assert_nil variant_genotype.attribute_map['proteinimpact'] + assert_nil variant_genotype.attribute_map['variantpathclass'] + assert_equal 4, variant_genotype.attribute_map['geneticinheritance'] + + normal_genotype = res[1] + assert_equal 1, normal_genotype.attribute_map['teststatus'] + assert_equal 3186, normal_genotype.attribute_map['gene'] + assert_nil normal_genotype.attribute_map['codingdnasequencechange'] + assert_nil normal_genotype.attribute_map['proteinimpact'] + assert_nil normal_genotype.attribute_map['variantpathclass'] + assert_nil normal_genotype.attribute_map['geneticinheritance'] + end + + test 'normal_report_result_rec' do + normal_report_result_rec = build_raw_record('pseudo_id1' => 'patient5') + normal_report_result_rec.raw_fields['genotype'] = 'R208_normal_Apr22' + normal_report_result_rec.raw_fields['gene'] = nil + normal_report_result_rec.raw_fields['codingdnasequencechange'] = 'No result' + normal_report_result_rec.raw_fields['proteinimpact'] = nil + normal_report_result_rec.raw_fields['diagnosis_report'] = '1. Genes screened in the panel: BRCA1, BRCA2, BRIP1, MLH1, MSH2, MSH6, PALB2, RAD51C, RAD51D ' \ + '(all coding exons and exon-intron boundaries).' + + res = @handler.process_fields(normal_report_result_rec) + assert_equal 9, res.size + res.each do |genotype| + assert_equal 1, genotype.attribute_map['teststatus'] + end + end + + test 'first_of_report_variant_rec' do + first_of_report_variant_rec = build_raw_record('pseudo_id1' => 'patient6') + first_of_report_variant_rec.raw_fields['genotype'] = 'R208_ATM/CHEK2_C4/5_Apr22' + first_of_report_variant_rec.raw_fields['gene'] = nil + first_of_report_variant_rec.raw_fields['codingdnasequencechange'] = nil + first_of_report_variant_rec.raw_fields['proteinimpact'] = nil + first_of_report_variant_rec.raw_fields['report'] = 'RESULT: This individual is heterozygous for a germline pathogenic ATM truncating variant (details below). Heterozygous ATM pathogenic variants cause moderate risk1 cancer susceptibility, particularly breast cancer in females (OMIM: 607585; 114480). +IMPLICATIONS : Each of their offspring would be at 50% risk of inheriting this variant and genetic predisposition to ATM-associated cancers. Other relatives are also at increased risk.' + res = @handler.process_fields(first_of_report_variant_rec) + assert_equal 2, res.size + assert_equal 451, res[0].attribute_map['gene'] + assert_equal 2, res[0].attribute_map['teststatus'] + + assert_equal 3186, res[1].attribute_map['gene'] + assert_equal 1, res[1].attribute_map['teststatus'] + end + + test 'process_exonic_deletion_variant' do + exon_record = build_raw_record('pseudo_id1' => 'patient6') + exon_record.raw_fields['indicationcategory'] = 'R207' + exon_record.raw_fields['moleculartestingtype'] = 'R207.1' + exon_record.raw_fields['genotype'] = 'R207 - BRCA Diag C4/5' + exon_record.raw_fields['gene'] = 'BRCA1' + exon_record.raw_fields['codingdnasequencechange'] = 'Deletion of exons 1-23' + exon_record.raw_fields['zygosity'] = 'Heterozygous' + exon_record.raw_fields['variantpathclass'] = 'Pathogenic' + exon_record.raw_fields['report'] = 'RESULT: This individual is heterozygous for a germline pathogenic BRCA1 copy number variant (details below).' + exon_record.raw_fields['diagnosis_report'] = '1.Genes screened in R207 panel: BRCA1, BRCA2, BRIP1, MLH1, MSH2, MSH6, PALB2, RAD51C, RAD51D (all coding exons and exon-intron boundaries).' + res = @handler.process_fields(exon_record) + + assert_equal 9, res.size + + variant_genotype = res.find { |g| g.attribute_map['gene'] == 7 } # BRCA1 + assert_not_nil variant_genotype + assert_equal 2, variant_genotype.attribute_map['teststatus'] + assert_equal '1-23', variant_genotype.attribute_map['exonintroncodonnumber'] + assert_equal 3, variant_genotype.attribute_map['sequencevarianttype'] + assert_equal 5, variant_genotype.attribute_map['variantpathclass'] + assert_equal 1, variant_genotype.attribute_map['variantgenotype'] + assert_equal 4, variant_genotype.attribute_map['geneticinheritance'] + assert_nil variant_genotype.attribute_map['codingdnasequencechange'] + + res.each do |genotype| + next if genotype.attribute_map['gene'] == 7 + + assert_equal 1, genotype.attribute_map['teststatus'] + assert_nil genotype.attribute_map['codingdnasequencechange'] + assert_nil genotype.attribute_map['proteinimpact'] + assert_nil genotype.attribute_map['variantpathclass'] + assert_nil genotype.attribute_map['geneticinheritance'] + end + end + + test 'exclude_genes_functionality' do + # Test analysis not performed exclusion + @handler.instance_variable_set(:@report, 'TP53 analysis has not been performed') + excluded1 = @handler.exclude_genes + assert_includes excluded1, 'TP53' + + # Test testing reported previously exclusion + @handler.instance_variable_set(:@report, 'TP53 testing has been reported previously') + excluded2 = @handler.exclude_genes + assert_includes excluded2, 'TP53' + + # Test Li Fraumeni syndrome exclusion + @handler.instance_variable_set(:@report, 'TP53 gene analysis for Li Fraumeni syndrome has been carried out') + excluded3 = @handler.exclude_genes + assert_includes excluded3, 'TP53' + end + + # Targeted testing (Familial) tests + test 'process_targeted_heterozygous_variant' do + targeted_record = build_raw_record('pseudo_id1' => 'patient_targ1') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['gene'] = 'BRCA1' + targeted_record.raw_fields['codingdnasequencechange'] = 'c.5266dup' + targeted_record.raw_fields['proteinimpact'] = 'p.(Gln1756fs)' + targeted_record.raw_fields['zygosity'] = 'Heterozygous' + targeted_record.raw_fields['variantpathclass'] = 'Pathogenic' + targeted_record.raw_fields['genotype'] = 'R242_pos_MLPA' + targeted_record.raw_fields['report'] = 'Testing for the familial BRCA1 variant c.5266dup.' + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + variant_genotype = res[0] + assert_equal 2, variant_genotype.attribute_map['teststatus'] + assert_equal 7, variant_genotype.attribute_map['gene'] # BRCA1 + assert_equal 'c.5266dup', variant_genotype.attribute_map['codingdnasequencechange'] + assert_equal 'p.Gln1756fs', variant_genotype.attribute_map['proteinimpact'] + assert_equal 5, variant_genotype.attribute_map['variantpathclass'] + assert_equal 1, variant_genotype.attribute_map['variantgenotype'] + end + + test 'process_targeted_homozygous_variant' do + targeted_record = build_raw_record('pseudo_id1' => 'patient_targ2') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['gene'] = 'BRCA2' + targeted_record.raw_fields['codingdnasequencechange'] = 'NM_007294.3:Exon 13 duplication' + targeted_record.raw_fields['zygosity'] = 'Homozygous' + targeted_record.raw_fields['variantpathclass'] = 'Pathogenic' + targeted_record.raw_fields['genotype'] = 'Familial_conf_seq_+ve_R240' + targeted_record.raw_fields['report'] = 'This individual is heterozygous for the germline familial pathogenic.' + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + variant_genotype = res[0] + assert_equal 2, variant_genotype.attribute_map['teststatus'] + assert_equal 8, variant_genotype.attribute_map['gene'] + assert_equal 2, variant_genotype.attribute_map['variantgenotype'] + assert_equal 1, variant_genotype.attribute_map['moleculartestingtype'] + assert_equal 'NM_007294.3', variant_genotype.attribute_map['referencetranscriptid'] + assert_equal '13', variant_genotype.attribute_map['exonintroncodonnumber'] + assert_equal 4, variant_genotype.attribute_map['sequencevarianttype'] + end + + test 'process_targeted_variant_absent' do + targeted_record = build_raw_record('pseudo_id1' => 'patient_targ3') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['codingdnasequencechange'] = 'NM_007294.3:Exon 13 duplication' + targeted_record.raw_fields['zygosity'] = 'Variant absent' + targeted_record.raw_fields['genotype'] = 'Familial testing negative' + targeted_record.raw_fields['report'] = 'Dosage analysis has shown no evidence of the familial pathogenic BRCA1 variant' + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + negative_genotype = res[0] + assert_equal 1, negative_genotype.attribute_map['teststatus'] + assert_equal 7, negative_genotype.attribute_map['gene'] # BRCA1 + end + + test 'process_targeted_no_result' do + targeted_record = build_raw_record('pseudo_id1' => 'patient_targ4') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['codingdnasequencechange'] = 'No result' + targeted_record.raw_fields['genotype'] = 'Fail/Results not required' + targeted_record.raw_fields['diagnosis_report'] = 'Germline pathogenic variants in CHEK2 have been reported in several studies' + targeted_record.raw_fields['report'] = 'No results were obtained from this sample despite repeated attempts.' + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + genotype = res[0] + assert_equal 9, genotype.attribute_map['teststatus'] + assert_equal 865, genotype.attribute_map['gene'] # BRCA1 + end + + test 'process_targeted_no_result_multiple_genes' do + targeted_record = build_raw_record('pseudo_id1' => 'patient_targ5') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['codingdnasequencechange'] = 'No result' + targeted_record.raw_fields['genotype'] = 'Familial testing' + targeted_record.raw_fields['diagnosis_report'] = 'Testing for BRCA1 and BRCA2 variants' + targeted_record.raw_fields['report'] = 'Unable to complete testing' + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + genotype = res[0] + assert_equal 9, genotype.attribute_map['teststatus'] + assert_nil genotype.attribute_map['gene'] + end + + test 'process_targeted_no_biallelic' do + targeted_record = build_raw_record('pseudo_id1' => 'patient_targ6') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['codingdnasequencechange'] = 'No bi-allelic inheritance of familial PALB2 variants' + targeted_record.raw_fields['genotype'] = 'FA familial C5 normal' + targeted_record.raw_fields['report'] = 'Sequence analysis indicates the absence of bi-allelic inheritance of the familial PALB2 variants' + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + genotype = res[0] + assert_equal 4, genotype.attribute_map['teststatus'] + assert_equal 3186, genotype.attribute_map['gene'] # PALB2 + end + + test 'process_targeted_cdna_het_variant' do + targeted_record = build_raw_record('pseudo_id1' => 'patient_targ7') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['codingdnasequencechange'] = 'BRCA1 exon 1 deletion heterozygote' + targeted_record.raw_fields['genotype'] = 'BRCA - Pred B1 C4/C5 MLPA pos' + targeted_record.raw_fields['report'] = 'This patient is heterozygous for the familial likely pathogenic deletion of BRCA1 exons 1A and 1B' + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + genotype = res[0] + assert_equal 2, genotype.attribute_map['teststatus'] + assert_equal 7, genotype.attribute_map['gene'] # BRCA1 + assert_equal '1', genotype.attribute_map['exonintroncodonnumber'] + assert_equal 3, genotype.attribute_map['sequencevarianttype'] + assert_equal 2, genotype.attribute_map['moleculartestingtype'] + end + + test 'process_targeted_report_variant' do + targeted_record = build_raw_record('pseudo_id1' => 'patient_targ8') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['codingdnasequencechange'] = 'No deletions/duplications detected' + targeted_record.raw_fields['genotype'] = 'Tumour result conf seq +ve' + targeted_record.raw_fields['report'] = 'Tumour testing for BRCA2 variant c.1234A>G detected in tumour' + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + genotype = res[0] + assert_equal 2, genotype.attribute_map['teststatus'] + assert_equal 8, genotype.attribute_map['gene'] # BRCA2 + assert_equal 'c.1234A>G', genotype.attribute_map['codingdnasequencechange'] + end + + test 'process_targeted_tumour_result_with_exon' do + targeted_record = build_raw_record('pseudo_id1' => 'patient_targ9') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['codingdnasequencechange'] = 'No germline variant detected' + targeted_record.raw_fields['genotype'] = 'Tumour result conf seq +ve' + targeted_record.raw_fields['report'] = 'Tumour testing for BRCA1. Deletion of exon 5 detected in tumour.' + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + genotype = res[0] + assert_equal 2, genotype.attribute_map['teststatus'] + assert_equal 7, genotype.attribute_map['gene'] # BRCA1 + assert_equal '5', genotype.attribute_map['exonintroncodonnumber'] + assert_equal 3, genotype.attribute_map['sequencevarianttype'] + assert_equal 1, genotype.attribute_map['moleculartestingtype'] + end + + test 'process_targeted_positive_variant_absent' do + targeted_record = build_raw_record('pseudo_id1' => 'patient_targ10') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['codingdnasequencechange'] = 'PALB2 variant absent' + targeted_record.raw_fields['genotype'] = 'BRCA - Pred B1 C4/C5 seq pos' + targeted_record.raw_fields['report'] = 'Analysis indicates that the familial pathogenic PALB2 variant c.3116del is absent in this patient.' + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + genotype = res[0] + assert_equal 1, genotype.attribute_map['teststatus'] + assert_equal 3186, genotype.attribute_map['gene'] + end + + test 'process_targeted_non_positive_variant_absent_with_brca1' do + targeted_record = build_raw_record('pseudo_id1' => 'patient_targ11') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['codingdnasequencechange'] = 'familial variant absent' + targeted_record.raw_fields['genotype'] = 'BRCA - Pred B1 C4/C5 MLPA neg' + targeted_record.raw_fields['report'] = 'MLPA analysis indicates that the familial pathogenic BRCA1 duplication of exon 13 is absent in this patient' + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + genotype = res[0] + assert_equal 1, genotype.attribute_map['teststatus'] + assert_equal 7, genotype.attribute_map['gene'] + end + + test 'process_targeted_non_positive_variant_absent_with_brca2' do + targeted_record = build_raw_record('pseudo_id1' => 'patient_targ12') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['codingdnasequencechange'] = 'No variant detected' + targeted_record.raw_fields['genotype'] = 'Result B2' + targeted_record.raw_fields['report'] = 'Testing completed' + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + genotype = res[0] + assert_equal 1, genotype.attribute_map['teststatus'] + assert_equal 8, genotype.attribute_map['gene'] # BRCA2 + end + + private + + def clinical_json + { sex: '2', + consultantcode: 'Consultant Code', + providercode: 'Provider Code', + receiveddate: '2019-10-25T00:00:00.000+01:00', + authoriseddate: '2019-11-25T00:00:00.000+00:00', + servicereportidentifier: 'Service Report Identifier', + sortdate: '2019-10-25T00:00:00.000+01:00', + genetictestscope: 'R208.2', + specimentype: '12', + report: 'RESULT\n\nNo pathogenic copy number variants were detected in the PALB2 gene.', + requesteddate: '2019-10-25T00:00:00.000+01:00', + age: 999 }.to_json + end + + def rawtext_clinical_json + { sex: 'F', + referringclinicianname: 'Clinician', + consultantcode: 'Consultant Code', + servicereportidentifier: 'Service Report Identifier', + indicationcategory: 'R207', + specimentype: 'DNA', + moleculartestingtype: 'R208.2', + requesteddate: '2021-12-08 00:00:00', + genotype: 'R208_normal', + authoriseddate: '2021-12-14 00:00:00', + provider_address: 'International Centre for Life', + name: 'Genetics Service', + report: 'RESULT\n\nNo pathogenic copy number variants were detected in the PALB2 gene.', + diagnosis_report: 'Germline heterozygous pathogenic variants in PALB2 inherited in an autosomal ' \ + 'dominant manner are associated with a 2-6 fold increased risk of breast cancer ' \ + 'in women. Men with pathogenic variants in the PALB2 gene also have an increased ' \ + 'risk for breast cancer; this risk is much smaller than the risk for women. Pathogenic ' \ + 'variants in PALB2 are also associated with an increased risk of pancreatic cancer. ' \ + 'Biallelic pathogenic variant events cause a subtype of Fanconi anaemia.\n\nReference ' \ + 'sequence: LRG_308t1 (NM_024675.3)\n\n\n\nMLPA analysis carried out using MRC Holland ' \ + 'kit P260-C1.\n\n\n\nDetected variants are assessed at the time of reporting according ' \ + 'to the ACGS best practice guidelines (http://www.acgs.uk.com/). Variant nomenclature ' \ + 'conforms to HGVS guidelines (http://www.hgvs.org). Sequence variants of no or unlikely ' \ + 'clinical significance are omitted from the reported results.', + patienttype: 'NHS', + providercode: 'RTD07', + receiveddate: '2025-09-29 00:00:00', + karyotypingmethod: 'MLPA P260', + codingdnasequencechange: 'No deletions/duplications detected', + proteinimpact: nil, + gene: nil, + zygosity: nil, + variantpathclass: nil }.to_json + end +end diff --git a/test/lib/import/brca/providers/leeds/leeds_handler_new_test.rb b/test/lib/import/brca/providers/leeds/leeds_handler_old_test.rb similarity index 93% rename from test/lib/import/brca/providers/leeds/leeds_handler_new_test.rb rename to test/lib/import/brca/providers/leeds/leeds_handler_old_test.rb index 7425efb2..3734e5a2 100644 --- a/test/lib/import/brca/providers/leeds/leeds_handler_new_test.rb +++ b/test/lib/import/brca/providers/leeds/leeds_handler_old_test.rb @@ -1,15 +1,32 @@ require 'test_helper' -class LeedsHandlerNewTest < ActiveSupport::TestCase +class LeedsHandlerOldTest < ActiveSupport::TestCase def setup @record = build_raw_record('pseudo_id1' => 'bob') @genotype = Import::Brca::Core::GenotypeBrca.new(@record) @importer_stdout, @importer_stderr = capture_io do - @handler = Import::Brca::Providers::Leeds::LeedsHandlerNew.new(EBatch.new) + @handler = Import::Brca::Providers::Leeds::LeedsHandlerOld.new(EBatch.new) end @logger = Import::Log.get_logger end + test 'process_fields' do + e_batch = EBatch.create(original_filename: 'test_filea', + e_type: 'PSMOLE', + provider: 'RR8_2', + registryid: 'RR8_2') + handler = Import::Brca::Providers::Leeds::LeedsHandlerOld.new(e_batch) + Import::Brca::Providers::Leeds::LeedsHandlerOld.any_instance.stubs(:should_process).returns(true) + handler.process_fields(@record) + assert_difference('EBatch.count', 1) do + handler.finalize + end + # confirm batch created now has 'RR8' as provider + e_batch.reload + assert_equal 'RR8', e_batch.provider + assert_equal 'RR8', e_batch.registryid + end + test 'process_abnormal_fs_record' do @handler.populate_variables(@record) @handler.add_moleculartestingtype(@genotype, @record)