diff --git a/lib/import/brca/providers/leeds/leeds_handler_new_format.rb b/lib/import/brca/providers/leeds/leeds_handler_new_format.rb index e09a4b03..18204e95 100644 --- a/lib/import/brca/providers/leeds/leeds_handler_new_format.rb +++ b/lib/import/brca/providers/leeds/leeds_handler_new_format.rb @@ -384,7 +384,6 @@ def classify_variant_pathogenicity(variantpathclass) end def classify_protein_impact - require 'pry' case @value1 when /C1/ 1 diff --git a/lib/import/brca/scripts/bash/Import_all_brca_interactive.sh b/lib/import/brca/scripts/bash/Import_all_brca_interactive.sh index 8eb9c955..a1a56a78 100755 --- a/lib/import/brca/scripts/bash/Import_all_brca_interactive.sh +++ b/lib/import/brca/scripts/bash/Import_all_brca_interactive.sh @@ -54,7 +54,7 @@ MBIS=$1 PROV='RR8' IFS=$'\n' for x in $(find $DIRPATH/$FILEPATH -path "*/$PROV/*" -type f \ -\( -name "*BRCA*.pseudo" -o -type f -name "*Other*.pseudo" \) \ +\( -name "*BRCA*.pseudo" -o -type f -iname "*Other*.pseudo" \) \ \( -path "*/202[5-9]/*" -o -path "*/203[0-9]/*" \) \ ! -name "bede6d1385c0ae9db4fe61fe9b07d58f86e2dc60_24.11.2021 to 31.03.2025_BRCA_DATA__2021_11_24__to__2025_03_31_b.xlsx.pseudo") do diff --git a/lib/import/colorectal/core/colorectal_handler_mapping.rb b/lib/import/colorectal/core/colorectal_handler_mapping.rb index 64a38d45..8892f6ce 100644 --- a/lib/import/colorectal/core/colorectal_handler_mapping.rb +++ b/lib/import/colorectal/core/colorectal_handler_mapping.rb @@ -4,7 +4,8 @@ module Core # Provides the handler appropriate for the dataformat from each center class ColorectalHandlerMapping HANDLER_MAPPING = { - 'RR8' => Import::Colorectal::Providers::Leeds::LeedsHandlerColorectal, + 'RR8_V1_PRE2025' => Import::Colorectal::Providers::Leeds::LeedsHandlerColorectalV1, + 'RR8_V2_POST2025' => Import::Colorectal::Providers::Leeds::LeedsHandlerColorectalV2, 'RNZ' => Import::Colorectal::Providers::Salisbury::SalisburyHandlerColorectal, 'RTD' => Import::Colorectal::Providers::Newcastle::NewcastleHandlerColorectal, 'RX1' => Import::Colorectal::Providers::Nottingham::NottinghamHandlerColorectal, diff --git a/lib/import/colorectal/core/genocolorectal.rb b/lib/import/colorectal/core/genocolorectal.rb index 13d71cc0..0a9f7f0a 100644 --- a/lib/import/colorectal/core/genocolorectal.rb +++ b/lib/import/colorectal/core/genocolorectal.rb @@ -16,11 +16,16 @@ class Genocolorectal < Import::Germline::Genotype #--------------------- Schema code mapping tables -------------------------- COLORECTAL_MAP = { 'APC' => 358, + 'BAP1' => 517, 'BMPR1A' => 577, 'EPCAM' => 1432, - 'TACSTD1' => 1432, + 'TACSTD1' => 1432, #Old symbol for EPCAM + 'FH' => 1590, + 'FLCN' => 1603, + 'MET' => 50, 'MLH1' => 2744, 'MSH2' => 2804, + 'MSH3' => 2805, 'MSH6' => 2808, 'MUTYH' => 2850, 'PMS2' => 3394, @@ -42,14 +47,21 @@ class Genocolorectal < Import::Germline::Genotype 'RAD51D' => 3616, 'VHL' => 83, 'ATM' => 451, - 'SCG5' => 5092 }.freeze + 'SCG5' => 5092, + 'SDHB' => 68 + }.freeze COLORECTAL_REGEX = /(?APC)| + (?BAP1)| (?BMPR1A)| (?EPCAM)| - (?TACSTD1)| + (?TACSTD1)| #Old symbol for EPCAM + (?FH)| + (?FLCN)| + (?MET)| (?MLH1)| (?MSH2)| + (?MSH3)| (?MSH6)| (?MUTYH)| (?PMS2)| @@ -71,15 +83,16 @@ class Genocolorectal < Import::Germline::Genotype (?RAD51D)| (?VHL) | (?ATM) | - (?SCG5)/ix # Added by Francesco + (?SCG5)| + (?SDHB)/ix # Added by Francesco # ------------------------ Interogators ------------------------------ def add_gene_colorectal(colorectal_input) case colorectal_input when Integer - if [1432, 358, 577, 2744, 2804, 2808, 2850, 3394, 7, 8, 79, 3186, 5019, - 3408, 5000, 62, 72, 76, 1882, 3108, 794, 83, 5019, 451].include? colorectal_input + if [1432, 358, 517, 577, 2744, 2804, 2805, 2808, 2850, 3394, 7, 8, 79, 3186, 5019, 1603, 50, 68, + 3408, 5000, 62, 72, 76, 1882, 3108, 794, 83, 5019, 451, 1590, 5092].include? colorectal_input @attribute_map['gene'] = colorectal_input @logger.debug "SUCCESSFUL gene parse for #{colorectal_input}" diff --git a/lib/import/colorectal/providers/leeds/leeds_handler_colorectal.rb b/lib/import/colorectal/providers/leeds/leeds_handler_colorectal_v1.rb similarity index 98% rename from lib/import/colorectal/providers/leeds/leeds_handler_colorectal.rb rename to lib/import/colorectal/providers/leeds/leeds_handler_colorectal_v1.rb index 695c33c0..daf0febf 100644 --- a/lib/import/colorectal/providers/leeds/leeds_handler_colorectal.rb +++ b/lib/import/colorectal/providers/leeds/leeds_handler_colorectal_v1.rb @@ -3,8 +3,8 @@ module Colorectal module Providers module Leeds # rubocop:disable Metrics/ClassLength - # Leeds importer for colorectal - class LeedsHandlerColorectal < Import::Germline::ProviderHandler + # Leeds importer for colorectal (pre-2025 format) + class LeedsHandlerColorectalV1 < Import::Germline::ProviderHandler include Import::Helpers::Colorectal::Providers::Rr8::Constants def initialize(batch) @@ -53,6 +53,9 @@ def populate_and_persist_genotype(record) add_varclass add_organisationcode_testresult(genocolorectal) res = process_variants_from_record(genocolorectal, record) + # correcting ebatch provider and registry to RR8 (from RR8_V1_PRE2025) to allow data to persist in the database + @batch.provider = 'RR8' + @batch.registryid = 'RR8' res.map { |cur_genotype| @persister.integrate_and_store(cur_genotype) } end diff --git a/lib/import/colorectal/providers/leeds/leeds_handler_colorectal_v2.rb b/lib/import/colorectal/providers/leeds/leeds_handler_colorectal_v2.rb new file mode 100644 index 00000000..44c1b794 --- /dev/null +++ b/lib/import/colorectal/providers/leeds/leeds_handler_colorectal_v2.rb @@ -0,0 +1,510 @@ +module Import + module Colorectal + module Providers + module Leeds + # Leeds importer for colorectal (post-2025 format) + class LeedsHandlerColorectalV2 < Import::Germline::ProviderHandler + include Import::Helpers::Colorectal::Providers::Rr8::Constants + + def process_fields(record) + # check if should process record from Other Cancer file + return unless should_process?(record) + + genocolorectal = Import::Colorectal::Core::Genocolorectal.new(record) + genocolorectal.add_passthrough_fields(record.mapped_fields, + record.raw_fields, + PASS_THROUGH_FIELDS, + FIELD_NAME_MAPPINGS) + + populate_variables(record) + process_test_scope(genocolorectal) + setup_derived_values(record) + genotypes = [] + + if genocolorectal.full_screen? + add_fs_moleculartestingtype(genocolorectal, record) + @genes_panel = genes_panel + res = process_fs_rec(genocolorectal, record, genotypes) + elsif genocolorectal.targeted? + add_targ_moleculartestingtype(genocolorectal) + res = process_targ_rec(genocolorectal, record, genotypes) + end + # correcting ebatch provider and registry to RR8 (from RR8_V2_POST2025) to allow + # data to persist in the database + @batch.provider = 'RR8' + @batch.registryid = 'RR8' + res.each { |cur_genotype| @persister.integrate_and_store(cur_genotype) } + end + + def should_process?(record) + file_name = @batch.original_filename + return true unless file_name =~ /Other/ix + + fields = record.raw_fields + return false unless fields['moleculartestingtype'] == 'Familial' + + rep_scan = fields['report']&.scan(MMR_GENE_REGEX) + diag_scan = fields['diagnosis_report']&.scan(MMR_GENE_REGEX) + has_genes = rep_scan&.any? || diag_scan&.any? + + return false unless has_genes + return false if fields['diagnosis_report'] =~ /ataxia/i + return false if fields['codingdnasequencechange'] =~ /BRCA/i + + true + end + + def populate_variables(record) + populate_raw_field_variables(record) + initialize_processing_variables + end + + def populate_raw_field_variables(record) + @report = record.raw_fields['report'] + @moltestingtype = record.raw_fields['moleculartestingtype'] + @value1 = record.raw_fields['proteinimpact'] + @value12 = record.raw_fields['zygosity'] + @report_result = record.raw_fields['genotype'] + @value2 = record.raw_fields['gene'] + @result = record.raw_fields['codingdnasequencechange'] + @diag_report = record.raw_fields['diagnosis_report'] + @comment = record.raw_fields['variantpathclass'] + @test = record.raw_fields['karyotypingmethod'] + end + + def initialize_processing_variables + @pos_gene = nil + @variantpathclass = nil + @cdna_mutations = nil + @exonic_mutations = nil + @zygosity = nil + end + + def process_test_scope(genocolorectal) + if @moltestingtype == 'Familial' + genocolorectal.add_test_scope(:targeted_mutation) + else + genocolorectal.add_test_scope(:full_screen) + end + end + + def add_fs_moleculartestingtype(genocolorectal, record) + indication_catgeory = record.raw_fields['indicationcategory'] + return unless %w[R211 R414].include? indication_catgeory + + genocolorectal.add_molecular_testing_type_strict(:diagnostic) + end + + def add_targ_moleculartestingtype(genocolorectal) + if @report_result.match?(/conf/i) || @report_result.match?(/R240/i) + genocolorectal.add_molecular_testing_type_strict(:diagnostic) + elsif @report_result.match?(/pred/i) || @report_result.match?(/R242/i) + genocolorectal.add_molecular_testing_type_strict(:predictive) + end + end + + def genes_panel + genes = [] + genes.concat(extract_genes_from_diagnosis_report) + genes.concat(extract_genes_from_main_report) + genes.concat(extract_genes_from_report_results) + + detected_genes = genes.flatten.compact.uniq + detected_genes.empty? ? default_genes_for_test_type : detected_genes + end + + def extract_genes_from_diagnosis_report + diag_report_match = @diag_report&.match(/Genes\sscreened\sin\sthe[^.]*\./im) + return [] unless diag_report_match + + diag_report_text = diag_report_match[0] + scanned_genes = diag_report_text.scan(COLORECTAL_GENES_REGEX) + scanned_genes || [] + end + + def extract_genes_from_main_report + match = @report&.match(PATIENT_SCREENED_REGEX) + return [] unless match + + relevant_text = match[1] + scanned_genes = relevant_text.scan(COLORECTAL_GENES_REGEX) + scanned_genes || [] + end + + def extract_genes_from_report_results + result_genes = @report_result&.scan(COLORECTAL_GENES_REGEX) + result_genes || [] + end + + def default_genes_for_test_type + case @moltestingtype + when 'R209.1' # Comprehensive colorectal cancer panel + %w[APC BMPR1A EPCAM GREM1 MLH1 MSH2 MSH6 MUTYH NTHL1 PMS2 POLD1 POLE PTEN SMAD4 STK11] + when 'R210.2' # Lynch syndrome focused panel + %w[MLH1 MSH2 MSH6 PMS2] + else + [] + end + end + + def setup_derived_values(record) + @zygosity = calc_zygosity + @variantpathclass = cal_variantpathclass(record) + setup_mutation_fields + setup_reference_transcript_id + end + + def setup_mutation_fields + @cdna_mutations = extract_cdna_mutations + @exonic_mutations = extract_exonic_mutations + @protein_impact = extract_protein_impact + end + + def extract_cdna_mutations + @result&.match(CDNA_REGEX) || @value1&.match(CDNA_REGEX) + end + + def extract_exonic_mutations + @result&.match(EXON_VARIANT_REGEX) || @value1&.match(EXON_VARIANT_REGEX) + end + + def extract_protein_impact + @value1&.match(PROTEIN_REGEX) || @result&.match(PROTEIN_REGEX) + end + + def setup_reference_transcript_id + @refid = @result&.match(REF_TRANSCRIPT_ID) || @value1&.match(REF_TRANSCRIPT_ID) + end + + def process_fs_rec(genocolorectal, record, genotypes) + # priority based extracting details + return genotypes if fail_rec?(genocolorectal, genotypes) + + process_result_variant_rec(genocolorectal, record, genotypes) + process_protein_impact_variant_rec(genocolorectal, record, genotypes) + return genotypes if gene_variant_rec?(genocolorectal, record, genotypes) + return genotypes if normal_result_rec?(genocolorectal, genotypes) + return genotypes if normal_report_result?(genocolorectal, genotypes) + + first_of_report_variant_rec?(genocolorectal, record, genotypes) + end + + def fail_rec?(genocolorectal, genotypes) + return false unless @report_result =~ /fail/i && @report_result !~ /dosage/i + + process_status_genes(9, @genes_panel, genocolorectal, genotypes) + true + end + + def process_result_variant_rec(genocolorectal, record, genotypes) + return unless @value2.nil? && (@result =~ CDNA_REGEX || @result =~ EXON_REGEX || @result =~ /heterozygo/i) + + gene = @result&.scan(COLORECTAL_GENES_REGEX) + @pos_gene = gene.flatten.uniq + teststatus = case @value1 + when /C1/, /C2/ + 10 + else + 2 + end + return if @pos_gene.blank? + + extract_mutations_from_src(@result) + process_variant_rec(genocolorectal, teststatus, record, genotypes) + end + + def process_protein_impact_variant_rec(genocolorectal, record, genotypes) + unless @value2.nil? && (@value1 =~ CDNA_REGEX || @value1 =~ EXON_REGEX || @value1 =~ /\A(?:#{GENES})/i) + return + end + + gene = @value1&.scan(COLORECTAL_GENES_REGEX) + @pos_gene = gene.flatten.uniq + @pos_gene = ['PMS2'] if @value1 =~ /NM_000535.5/ + return if @pos_gene.blank? + + teststatus = determine_protein_impact_test_status + extract_mutations_from_src(@value1) + process_variant_rec(genocolorectal, teststatus, record, genotypes) + end + + def determine_protein_impact_test_status + case @value1 + when /C1/, /C2/ + 10 + else + 2 + end + end + + def gene_variant_rec?(genocolorectal, record, genotypes) + return false if @value2.nil? + + scanned_genes = @value2&.scan(COLORECTAL_GENES_REGEX) + @pos_gene = scanned_genes&.flatten&.uniq + @pos_gene -= ['CHEK2'] unless @pos_gene.nil? + return false if @pos_gene.blank? + + process_variant_rec(genocolorectal, 2, record, genotypes) + negative_genes = @genes_panel - @pos_gene + process_status_genes(1, negative_genes, genocolorectal, genotypes) + true + end + + def normal_result_rec?(genocolorectal, genotypes) + return false unless @result =~ /No.*detected/i + + negative_genes = @genes_panel + process_status_genes(1, negative_genes, genocolorectal, genotypes) + true + end + + def normal_report_result?(genocolorectal, genotypes) + return false unless @report_result =~ /normal/i + + process_status_genes(1, @genes_panel, genocolorectal, genotypes) + true + end + + def first_of_report_variant_rec?(genocolorectal, record, genotypes) + return genotypes unless @report =~ /.*heterozygous\s+for.*pathogenic\s*#{COLORECTAL_GENES_REGEX}/ix + + @pos_gene = [$LAST_MATCH_INFO[:colorectal]] + if @pos_gene.present? + extract_mutations_from_src(@report) + process_variant_rec(genocolorectal, 2, record, genotypes) + negative_genes = @genes_panel - @pos_gene + process_status_genes(1, negative_genes, genocolorectal, genotypes) + end + genotypes + end + + def process_variant_rec(genocolorectal, status, _record, genotypes) + genocolorectal_dup = genocolorectal.dup_colo + add_geneticinheritance(genocolorectal_dup) + genocolorectal_dup.add_gene_colorectal(@pos_gene[0]) + genocolorectal_dup.add_zygosity(@zygosity) + process_cdna_variant(genocolorectal_dup, @cdna_mutations) if @cdna_mutations.present? + process_protein_impact(genocolorectal_dup, @protein_impact) if @protein_impact.present? + process_exonic_variant(genocolorectal_dup, @exonic_mutations) if @exonic_mutations.present? + genocolorectal_dup.add_referencetranscriptid(@refid.to_s) if @refid.present? + genocolorectal_dup.add_variant_class(@variantpathclass) + genocolorectal_dup.add_status(status) + genotypes << genocolorectal_dup + end + + def process_status_genes(status, negative_genes, genocolorectal, genotypes) + negative_genes&.each do |gene| + genocolorectal_dup = genocolorectal.dup_colo + genocolorectal_dup.add_gene_colorectal(gene) + genocolorectal_dup.add_status(status) + genotypes << genocolorectal_dup + end + end + + def process_targ_rec(genocolorectal, record, genotypes) + @pos_gene = [] + return genotypes if zygosity_variant_targ_rec?(genocolorectal, record, genotypes) + return genotypes if variant_absent_targ_rec?(genocolorectal, genotypes) + return genotypes if no_result_targ_rec?(genocolorectal, genotypes) + return genotypes if no_biallelic_targ_rec?(genocolorectal, genotypes) + return genotypes if cdna_het_variant_targ_rec?(genocolorectal, record, genotypes) + return genotypes if result_variant_absent_targ_rec?(genocolorectal, genotypes) + + genotypes + end + + def zygosity_variant_targ_rec?(genotype, record, genotypes) + return false unless @value12 =~ /heterozyg|homozyg|mosaic/i + + @pos_gene = @value2&.scan(MMR_GENE_REGEX) + @pos_gene = @pos_gene&.flatten&.uniq || [] + @variantpathclass = cal_variantpathclass_targ + process_variant_rec(genotype, 2, record, genotypes) + true + end + + def variant_absent_targ_rec?(genotype, genotypes) + return false unless @value12 =~ /variant\sabsent|not\sdetected/i + + negative_gene = @value2&.scan(MMR_GENE_REGEX) + negative_gene = negative_gene&.flatten&.uniq + process_status_genes(1, negative_gene, genotype, genotypes) + true + end + + def no_result_targ_rec?(genotype, genotypes) + return false unless @report_result =~ /Fail/i + + targ_gene = find_target_gene_from_sources + process_failed_target_gene(targ_gene, genotype, genotypes) + true + end + + def find_target_gene_from_sources + [@value2, @test, @diag_report].each do |source| + result = source&.scan(MMR_GENE_REGEX) + gene = result&.flatten&.uniq + return gene if gene&.any? + end + nil + end + + def process_failed_target_gene(targ_gene, genotype, genotypes) + if targ_gene&.size == 1 + process_status_genes(9, targ_gene, genotype, genotypes) + else + genotype_dup = genotype.dup + genotype_dup.add_status(9) + genotypes << genotype_dup + end + end + + def no_biallelic_targ_rec?(genotype, genotypes) + return false unless @report =~ /Biallelic.*neg/ix || @result =~ /No\sbiallelic|No\sbi-allelic/ix + + targ_gene = @report&.scan(MMR_GENE_REGEX) + targ_gene = targ_gene&.flatten&.uniq + process_status_genes(4, targ_gene, genotype, genotypes) + true + end + + def cdna_het_variant_targ_rec?(genotype, record, genotypes) + return false unless @result =~ CDNA_REGEX || @result =~ EXON_REGEX || @result =~ /het/ + + find_genes_from_cdna_sources + @variantpathclass = classify_first_of_report + process_variant_rec(genotype, 2, record, genotypes) + true + end + + def find_genes_from_cdna_sources + @pos_gene = [] + + [@result, @test, @report_result, @report].each do |src| + result = src&.scan(MMR_GENE_REGEX) + flattened_result = result&.flatten&.uniq + if flattened_result&.any? + @pos_gene = flattened_result + break + end + end + end + + def result_variant_absent_targ_rec?(genotype, genotypes) + return false unless @result =~ /(variant|variaint)\sabsent|no.*detected/ix + + negative_gene = [] + + [@result, @report].each do |src| + result = src&.scan(MMR_GENE_REGEX) + flattened_result = result&.flatten&.uniq + if flattened_result&.any? + negative_gene = flattened_result + break + end + end + + process_status_genes(1, negative_gene, genotype, genotypes) + true + end + + def extract_mutations_from_src(src) + @cdna_mutations = src&.match(CDNA_REGEX) + @exonic_mutations = src&.match(EXON_VARIANT_REGEX) + @protein_impact = src&.match(PROTEIN_REGEX) + end + + def add_geneticinheritance(genocolorectal) + geneticinheritance = if @value12 =~ /mosaic/i || + @result =~ /VAF/ || @result =~ /dosage ~0\./ + 6 + else + 4 + end + genocolorectal.attribute_map['geneticinheritance'] = geneticinheritance + end + + def calc_zygosity + [@value12, @value1, @result].each do |v| + next unless v + + return 1 if v =~ /het/i + return 2 if v =~ /homo/i + end + nil + end + + def cal_variantpathclass(_record) + varclass = classify_variant_pathogenicity + varclass || classify_protein_impact + end + + def cal_variantpathclass_targ + varclass = classify_variant_pathogenicity + varclass || classify_first_of_report + end + + def classify_variant_pathogenicity + case @comment + when /Likely\spathogenic/i + 4 + when /Pathogenic/i + 5 + when /Uncertain\ssignificance/i + 3 + end + end + + def classify_protein_impact + case @value1 + when /C1/ + 1 + when /C2/ + 2 + when /\(cold\sC3\)/i + 8 + when /\(hot\sC3\)/i + 9 + when /C3/ + 3 + end + end + + def classify_first_of_report + case @report + when /Likely\spathogenic/i + 4 + when /Pathogenic/i + 5 + end + end + + def process_exonic_variant(genotype, mutation) + return if mutation[:exons].blank? + + genotype.add_exon_location(mutation[:exons]) + genotype.add_variant_type(mutation[:variant]) + @logger.debug "SUCCESSFUL exon variant parse for: #{mutation}" + end + + def process_cdna_variant(genotype, mutation) + return if mutation[:cdna].blank? + + genotype.add_gene_location(mutation[:cdna]) + @logger.debug "SUCCESSFUL cdna change parse for: #{mutation}" + end + + def process_protein_impact(genotype, mutation) + if mutation[:impact].present? + genotype.add_protein_impact(mutation[:impact]) + @logger.debug "SUCCESSFUL protein parse for: #{mutation[:impact]}" + else + @logger.debug "FAILED protein parse for: #{mutation}" + end + end + end + end + end + end +end diff --git a/lib/import/colorectal/scripts/bash/Import_all_colorectal_interactive.sh b/lib/import/colorectal/scripts/bash/Import_all_colorectal_interactive.sh index 21fb51ab..508c04e5 100755 --- a/lib/import/colorectal/scripts/bash/Import_all_colorectal_interactive.sh +++ b/lib/import/colorectal/scripts/bash/Import_all_colorectal_interactive.sh @@ -30,13 +30,34 @@ $BRAKE import:colorectal fname="$(echo "$x" | sed -e 's:.*pseudonymised_data/\(. done } -RR8 () { +RR8_V2_POST2025 () { PROV='RR8' IFS=$'\n' -for x in $(find $DIRPATH/$FILEPATH -not -path "*/API_BETA_RETRIEVED/*" -type f -name "*MMR*.pseudo" -o -type f -name "*other*.pseudo" -path "*/$PROV/*") +for x in $(find "$DIRPATH/$FILEPATH" \ + -type f \ + -not -path "*/API_BETA_RETRIEVED/*" \ + -path "*/$PROV/*" \ + \( -name "*MMR*.pseudo" -o -iname "*Colorectal*.pseudo" -o -iname "*other*.pseudo" \) \ + \( -path "*/202[5-9]/*" -o -path "*/203[0-9]/*" \) \ + ) do IFS="$OIFS" -$BRAKE import:colorectal fname="$(echo "$x" | sed -e 's:.*pseudonymised_data/\(.*\):\1:')" prov_code=$PROV +$BRAKE import:colorectal fname="$(echo "$x" | sed -e 's:.*pseudonymised_data/\(.*\):\1:')" prov_code='RR8_V2_POST2025' +done +} + +RR8_V1_PRE2025 () { +PROV='RR8' +IFS=$'\n' +for x in $(find "$DIRPATH/$FILEPATH" \ + -not -path "*/API_BETA_RETRIEVED/*" \ + -not -path "*/2025/*" \ + -path "*/$PROV/*" \ + \( -name "*MMR*.pseudo" -o -name "*other*.pseudo" \) \ + ) +do +IFS="$OIFS" +$BRAKE import:colorectal fname="$(echo "$x" | sed -e 's:.*pseudonymised_data/\(.*\):\1:')" prov_code='RR8_V1_PRE2025' done } @@ -171,5 +192,5 @@ $BRAKE import:colorectal fname="$(echo "$x" | sed -e 's:.*pseudonymised_data/\(. done } -RR8; RNZ; RTD; RX1; RCU; RGT; R0A; R1K; RPY; RP4; RTH; RQ3; REP; RJ7 +RR8_V2_POST2025; RR8_V1_PRE2025; RNZ; RTD; RX1; RCU; RGT; R0A; R1K; RPY; RP4; RTH; RQ3; REP; RJ7 diff --git a/lib/import/database_wrappers/genetic_sequence_variant.rb b/lib/import/database_wrappers/genetic_sequence_variant.rb index 635f15f9..53bca2b7 100644 --- a/lib/import/database_wrappers/genetic_sequence_variant.rb +++ b/lib/import/database_wrappers/genetic_sequence_variant.rb @@ -18,7 +18,6 @@ def initialize(genotype) genomicchange clinvarid cosmicid - variantgenotype variantallelefrequency variantreport raw_record @@ -27,9 +26,9 @@ def initialize(genotype) # Should not produce a variant record unless there actually is a variant def produce_record - # if (@field_names - ['variantpathclass']).all? - # {|x| @representative_genotype.attribute_map[x].nil?} - if @field_names.all? { |x| @representative_genotype.attribute_map[x].nil? } + # Only create a sequence variant if there are meaningful variant fields beyond just genotype + meaningful_fields = @field_names - ['variantgenotype'] + if meaningful_fields.all? { |x| @representative_genotype.attribute_map[x].nil? } nil else super() diff --git a/lib/import/helpers/colorectal/providers/rr8/constants.rb b/lib/import/helpers/colorectal/providers/rr8/constants.rb index 5f25a7b0..65864321 100644 --- a/lib/import/helpers/colorectal/providers/rr8/constants.rb +++ b/lib/import/helpers/colorectal/providers/rr8/constants.rb @@ -43,17 +43,21 @@ module Constants 'instigated_date' => 'requesteddate' }.freeze GENES = 'APC|ATM|BAP1|BMPR1A|BRCA1|BRCA2|CHEK2|EPCAM|FH|FLCN|GREM1|MET| - MLH1|MSH2|MSH6|MUTYH|NTHL1|PALB2|PMS2|POLD1|POLE|PTEN|RAD51C|RAD51D| + MLH1|MSH2|MSH3|MSH6|MUTYH|NTHL1|PALB2|PMS2|POLD1|POLE|PTEN|RAD51C|RAD51D| RNF43|SDHB|SMAD4|STK11|TP53|VHL'.freeze # rubocop:disable Lint/MixedRegexpCaptureTypes MMR_GENE_REGEX = /APC|BMPR1A|EPCAM|GREM1|MLH1|MSH2|MSH6|MUTYH|NTHL1|PMS2|POLD1| - POLE|PTEN|SMAD4|STK11/ix - CDNA_REGEX = /c\.(?[\w+>*\-]+)?/ix + POLE|PTEN|SMAD4|STK11|RNF43/ix + + CDNA_REGEX = /c\.(?[\w.+>*\-]+)/ix PROTEIN_REGEX = /\(?p\.\(?(?\w+)\)?/ix EXON_REGEX = /(?exon(s)?[\s\-\d]+)/ix GENE_FAIL_REGEX = /(?=(?#{GENES})[\w\s]+fail)/ix NOPATH_REGEX = /.No pathogenic variant was identified./i + + PATIENT_SCREENED_REGEX = /((?:This\s+patient(?:'s\s+sample)?\s+has\s+been\s+screened| + this\s+patient\s+is\s+heterozygous\s+for)[^.]*\.)/imx EXON_VARIANT_REGEX = /(?del|dup|ins).+ex(on)?s?\s? (?[0-9]+(-[0-9]+)?)| ex(on)?s?\s?(?[0-9]+(-[0-9]+)?)\s? @@ -190,6 +194,8 @@ module Constants 'developing further MSH2-related cancers', 'developing MSH2-associated cancer' ].freeze + + REF_TRANSCRIPT_ID = /NM_\d{6}\.\d(?=:)/ix end end end diff --git a/test/lib/import/colorectal/providers/leeds/leeds_handler_colorectal_test.rb b/test/lib/import/colorectal/providers/leeds/leeds_handler_colorectal_v1_test.rb similarity index 94% rename from test/lib/import/colorectal/providers/leeds/leeds_handler_colorectal_test.rb rename to test/lib/import/colorectal/providers/leeds/leeds_handler_colorectal_v1_test.rb index 6273bd9e..e8fb6a55 100644 --- a/test/lib/import/colorectal/providers/leeds/leeds_handler_colorectal_test.rb +++ b/test/lib/import/colorectal/providers/leeds/leeds_handler_colorectal_v1_test.rb @@ -1,15 +1,32 @@ require 'test_helper' -class LeedsHandlerColorectalTest < ActiveSupport::TestCase +class LeedsHandlerColorectalV1Test < ActiveSupport::TestCase def setup @record = build_raw_record('pseudo_id1' => 'bob') @genotype = Import::Colorectal::Core::Genocolorectal.new(@record) @importer_stdout, @importer_stderr = capture_io do - @handler = Import::Colorectal::Providers::Leeds::LeedsHandlerColorectal.new(EBatch.new) + @handler = Import::Colorectal::Providers::Leeds::LeedsHandlerColorectalV1.new(EBatch.new) end @logger = Import::Log.get_logger end + test 'process_fields' do + e_batch = EBatch.create(original_filename: 'test_filea', + e_type: 'PSMOLE', + provider: 'RR8_2', + registryid: 'RR8_2') + handler = Import::Colorectal::Providers::Leeds::LeedsHandlerColorectalV1.new(e_batch) + Import::Colorectal::Providers::Leeds::LeedsHandlerColorectalV1.any_instance.stubs(:should_process?).returns(true) + handler.process_fields(@record) + assert_difference('EBatch.count', 1) do + handler.finalize + end + # confirm batch created now has 'RR8' as provider + e_batch.reload + assert_equal 'RR8', e_batch.provider + assert_equal 'RR8', e_batch.registryid + end + test 'add_positive_teststatus' do @handler.populate_variables(@record) assert_equal 2, @handler.allocate_test_status @@ -102,7 +119,7 @@ def setup assert_equal 5, genotypes[0].attribute_map['variantpathclass'] assert_equal 2, genotypes[0].attribute_map['teststatus'] assert_equal 2804, genotypes[0].attribute_map['gene'] - assert_equal 'c.488T>G', genotypes[0].attribute_map['codingdnasequencechange'] + assert_equal 'c.488T>G.', genotypes[0].attribute_map['codingdnasequencechange'] assert_equal 'Targeted Colorectal Lynch or MMR', genotypes[0].attribute_map['genetictestscope'] end diff --git a/test/lib/import/colorectal/providers/leeds/leeds_handler_colorectal_v2_test.rb b/test/lib/import/colorectal/providers/leeds/leeds_handler_colorectal_v2_test.rb new file mode 100644 index 00000000..3535296c --- /dev/null +++ b/test/lib/import/colorectal/providers/leeds/leeds_handler_colorectal_v2_test.rb @@ -0,0 +1,376 @@ +require 'test_helper' + +class LeedsHandlerColorectalV2Test < ActiveSupport::TestCase + def setup + @record = build_raw_record('pseudo_id1' => 'bob') + @genotype = Import::Colorectal::Core::Genocolorectal.new(@record) + @importer_stdout, @importer_stderr = capture_io do + @handler = Import::Colorectal::Providers::Leeds::LeedsHandlerColorectalV2.new(EBatch.new) + end + end + + test 'process_failed_test_record' do + failed_record = build_raw_record('pseudo_id1' => 'patient1') + failed_record.raw_fields['genotype'] = 'Fail/Results not required' + failed_record.raw_fields['codingdnasequencechange'] = 'No result' + failed_record.raw_fields['moleculartestingtype'] = 'R210.2' + failed_record.raw_fields['report'] = 'Unfortunately, no results were obtained from this tissue sample.' + + res = @handler.process_fields(failed_record) + assert_equal 4, res.size + res.each do |genotype| + assert_equal 9, genotype.attribute_map['teststatus'] + end + end + + test 'process_normal_result_record' do + res = @handler.process_fields(@record) + assert_equal 1, res.size + assert_equal 1, res[0].attribute_map['teststatus'] # normal + assert_equal 3394, res[0].attribute_map['gene'] # PMS2 + end + + test 'process_result_variant_rec' do + result_record = build_raw_record('pseudo_id1' => 'patient6') + result_record.raw_fields['codingdnasequencechange'] = 'PMS2 exons 1-7 deletion heterozygote' + result_record.raw_fields['report'] = 'MLPA analysis indicates that this patient is heterozygous for a deletion of PMS2 exons 1-7.' + + res = @handler.process_fields(result_record) + assert_equal 1, res.size + + res.each do |genotype| + assert_equal 2, genotype.attribute_map['teststatus'] + assert_equal 3394, genotype.attribute_map['gene'] + assert_equal '1-7', genotype.attribute_map['exonintroncodonnumber'] + assert_equal 1, genotype.attribute_map['variantgenotype'] + assert_equal 3, genotype.attribute_map['sequencevarianttype'] + end + end + + test 'process_protein_impact_variant_rec' do + gene_record = build_raw_record('pseudo_id1' => 'patient5') + gene_record.raw_fields['gene'] = nil + gene_record.raw_fields['codingdnasequencechange'] = 'NTHL1 c.268C>T' + gene_record.raw_fields['proteinimpact'] = 'APC c.2120T>C het [C3]' + gene_record.raw_fields['zygosity'] = nil + gene_record.raw_fields['variantpathclass'] = nil + gene_record.raw_fields['report'] = 'This patient has been screened for variants in the following cancer predisposing genes by sequence analysis:\n\nAPC, BMPR1A, EPCAM*, GREM1*, MLH1, MSH2, MSH6, MUTYH, NTHL1, PMS2, POLD1, POLE, PTEN, RNF43, SMAD4, STK11' + + res = @handler.process_fields(gene_record) + assert_equal 2, res.size + + # APC variant should get c.2120T>C from proteinimpact field + protein_variant_genotype = res.find { |g| g.attribute_map['gene'] == 358 } # APC + assert_equal 'c.2120T>C', protein_variant_genotype.attribute_map['codingdnasequencechange'] + assert_equal 2, protein_variant_genotype.attribute_map['teststatus'] + + # NTHL1 variant should get c.268C>T from codingdnasequencechange field + result_variant_genotype = res.find { |g| g.attribute_map['gene'] == 3108 } # NTHL1 + assert_equal 'c.268C>T', result_variant_genotype.attribute_map['codingdnasequencechange'] + assert_equal 2, result_variant_genotype.attribute_map['teststatus'] + end + + test 'gene_variant_rec' do + gene_record = build_raw_record('pseudo_id1' => 'patient5') + gene_record.raw_fields['gene'] = 'MLH1' + gene_record.raw_fields['codingdnasequencechange'] = 'exon 16-19 deletion' + gene_record.raw_fields['proteinimpact'] = nil + gene_record.raw_fields['zygosity'] = 'Heterozygous' + gene_record.raw_fields['variantpathclass'] = 'Pathogenic' + gene_record.raw_fields['report'] = 'A germline pathogenic MLH1 copy number variant was detected in this patient sample' + gene_record.raw_fields['diagnosis_report'] = '1. Genes screened in the panel: MLH1, MSH2, MSH6, PMS2 (all coding exons and exon-intron boundaries).' + res = @handler.process_fields(gene_record) + assert_equal 4, res.size + + gene_variant_genotype = res.find { |g| g.attribute_map['gene'] == 2744 } # MLH1 + assert_equal '16-19', gene_variant_genotype.attribute_map['exonintroncodonnumber'] + assert_equal 2, gene_variant_genotype.attribute_map['teststatus'] + assert_equal 5, gene_variant_genotype.attribute_map['variantpathclass'] + + res.each do |genotype| + next if genotype.attribute_map['gene'] == 2744 + + assert_equal 1, genotype.attribute_map['teststatus'] + end + end + + test 'normal_result_rec' do + normal_record = build_raw_record('pseudo_id1' => 'patient7') + normal_record.raw_fields['codingdnasequencechange'] = 'No deletions/duplications detected' + normal_record.raw_fields['report'] = 'MLPA analysis indicates that the potential PMS2 copy number variant identified by NGS is absent in this patient.' + normal_record.raw_fields['genotype'] = 'PMS2 - MLPA conf negative' + normal_record.raw_fields['moleculartestingtype'] = nil + + res = @handler.process_fields(normal_record) + assert_equal 1, res.size + + res.each do |genotype| + assert_equal 1, genotype.attribute_map['teststatus'] # normal + end + end + + test 'normal_report_result_rec' do + normal_record = build_raw_record('pseudo_id1' => 'patient7') + normal_record.raw_fields['report'] = 'This patient has been screened for MLH1, MSH2, MSH6 and PMS2 variants by sequence analysis. No pathogenic variant was identified.' + normal_record.raw_fields['genotype'] = 'Lynch Diag; normal' + normal_record.raw_fields['proteinimpact'] = nil + normal_record.raw_fields['gene'] = nil + normal_record.raw_fields['codingdnasequencechange'] = 'No result' + + res = @handler.process_fields(normal_record) + assert_equal 4, res.size + + res.each do |genotype| + assert_equal 1, genotype.attribute_map['teststatus'] # normal + end + end + + test 'first_of_report_variant_rec' do + first_record = build_raw_record('pseudo_id1' => 'patient8') + first_record.raw_fields['codingdnasequencechange'] = 'No result' + first_record.raw_fields['proteinimpact'] = nil + first_record.raw_fields['gene'] = nil + first_record.raw_fields['report'] = 'This patient has been screened for variants in the following cancer predisposing genes by sequence analysis:' \ + 'APC, BMPR1A, EPCAM*, GREM1*, MLH1, MSH2, MSH6, MUTYH, NTHL1, PMS2, POLD1, POLE, PTEN, RNF43, SMAD4, STK11.This patient is heterozygous for the ' \ + 'pathogenic NTHL1 variants c.268C>T p.(Gln90Ter)' + first_record.raw_fields['moleculartestingtype'] = 'R211' + + res = @handler.process_fields(first_record) + assert_equal 16, res.size + variant_genotype = res.find { |g| g.attribute_map['gene'] == 3108 } # NTHL1 + assert_not_nil variant_genotype + assert_equal 'c.268C>T', variant_genotype.attribute_map['codingdnasequencechange'] + assert_equal 'p.Gln90Ter', variant_genotype.attribute_map['proteinimpact'] + assert_equal 2, variant_genotype.attribute_map['teststatus'] + + res.each do |genotype| + next if genotype.attribute_map['gene'] == 3108 + + assert_equal 1, genotype.attribute_map['teststatus'] + assert_equal 'Full screen Colorectal Lynch or MMR', genotype.attribute_map['genetictestscope'] + end + end + + # targeted tests + test 'zygosity_variant_targ_rec' do + targeted_record = build_raw_record('pseudo_id1' => 'patient10') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['zygosity'] = 'Heterozygous' + targeted_record.raw_fields['gene'] = 'MSH2' + targeted_record.raw_fields['variantpathclass'] = 'Pathogenic' + targeted_record.raw_fields['codingdnasequencechange'] = 'NM_000251.2:exon 11-16 deletion' + targeted_record.raw_fields['genotype'] = 'R242_pos_MLPA' + targeted_record.raw_fields['report'] = 'This individual is heterozygous for the germline familial pathogenic MSH2 copy number variant' + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + genotype = res[0] + assert_equal 2, genotype.attribute_map['teststatus'] + assert_equal 2804, genotype.attribute_map['gene'] # APC + assert_equal '11-16', genotype.attribute_map['exonintroncodonnumber'] + assert_equal 'NM_000251.2', genotype.attribute_map['referencetranscriptid'] + assert_equal 'Targeted Colorectal Lynch or MMR', genotype.attribute_map['genetictestscope'] + assert_equal 5, genotype.attribute_map['variantpathclass'] + assert_equal 4, genotype.attribute_map['geneticinheritance'] + end + + test 'process_targeted_mosaic_variant' do + targeted_record = build_raw_record('pseudo_id1' => 'patient11') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['zygosity'] = 'Mosaic' + targeted_record.raw_fields['gene'] = 'PMS2' + targeted_record.raw_fields['codingdnasequencechange'] = 'NM_000535.5:Whole gene deletion' + targeted_record.raw_fields['genotype'] = 'R443_Confirmation_NGS_MLPA_PMS2' + targeted_record.raw_fields['report'] = 'This patient shows mosaic pattern for the familial MLH1 variant' + targeted_record.raw_fields['variantpathclass'] = 'Pathogenic' + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + genotype = res[0] + assert_equal 2, genotype.attribute_map['teststatus'] + assert_equal 3394, genotype.attribute_map['gene'] # PMS2 + assert_equal 'NM_000535.5', genotype.attribute_map['referencetranscriptid'] + assert_equal 6, genotype.attribute_map['geneticinheritance'] + assert_equal 5, genotype.attribute_map['variantpathclass'] + end + + test 'process_targeted_variant_absent' do + targeted_record = build_raw_record('pseudo_id1' => 'patient12') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['codingdnasequencechange'] = 'NM_000251.2:Exon 1-7 deletion' + targeted_record.raw_fields['genotype'] = 'R242_neg_MLPA' + targeted_record.raw_fields['report'] = 'Dosage analysis has shown no evidence of the familial pathogenic MSH2 variant.' + targeted_record.raw_fields['proteinimpact'] = nil + targeted_record.raw_fields['zygosity'] = 'Variant NOT detected' + targeted_record.raw_fields['gene'] = 'MSH2' + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + genotype = res[0] + assert_equal 1, genotype.attribute_map['teststatus'] + assert_equal 2804, genotype.attribute_map['gene'] # MSH2 + end + + test 'process_targeted_no_result' do + targeted_record = build_raw_record('pseudo_id1' => 'patient13') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['codingdnasequencechange'] = 'No result' + targeted_record.raw_fields['genotype'] = 'Fail/Results not required' + targeted_record.raw_fields['report'] = 'No results were obtained from this sample despite repeated attempts' + targeted_record.raw_fields['proteinimpact'] = nil + targeted_record.raw_fields['zygosity'] = nil + targeted_record.raw_fields['gene'] = nil + targeted_record.raw_fields['diagnosis_report'] = 'Germline heterozygous pathogenic variants in PTEN are associated with PTEN hamartoma tumour syndrome' + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + genotype = res[0] + assert_equal 9, genotype.attribute_map['teststatus'] + assert_equal 62, genotype.attribute_map['gene'] + end + + test 'process_targeted_no_biallelic' do + targeted_record = build_raw_record('pseudo_id1' => 'patient14') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['codingdnasequencechange'] = 'No biallelic presence of familial variant' + targeted_record.raw_fields['genotype'] = 'PMS2 - Biallelic (CMMRD) pred negative' + targeted_record.raw_fields['report'] = 'Sequence analysis indicates no biallelic presence of the familial pathogenic PMS2 variant c.2404C>T in this patient.' + targeted_record.raw_fields['proteinimpact'] = nil + targeted_record.raw_fields['zygosity'] = nil + targeted_record.raw_fields['gene'] = nil + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + genotype = res[0] + assert_equal 4, genotype.attribute_map['teststatus'] + assert_equal 3394, genotype.attribute_map['gene'] # PMS2 + end + + test 'process_targeted_cdna_het_variant' do + targeted_record = build_raw_record('pseudo_id1' => 'patient15') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['codingdnasequencechange'] = 'MSH2 Exon 11-12 duplication heterozygote' + targeted_record.raw_fields['genotype'] = 'Lynch Pred MLPA +ve' + targeted_record.raw_fields['report'] = 'MLPA analysis indicates that this patient is heterozygous for the familial likely pathogenic MSH2 duplication of exons 11-12' + targeted_record.raw_fields['proteinimpact'] = nil + targeted_record.raw_fields['zygosity'] = nil + targeted_record.raw_fields['gene'] = nil + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + genotype = res[0] + assert_equal 2, genotype.attribute_map['teststatus'] + assert_equal 2804, genotype.attribute_map['gene'] # MSH2 + assert_equal '11-12', genotype.attribute_map['exonintroncodonnumber'] + assert_equal 4, genotype.attribute_map['sequencevarianttype'] + end + + test 'process_result_variant_absent_variant' do + targeted_record = build_raw_record('pseudo_id1' => 'patient15') + targeted_record.raw_fields['moleculartestingtype'] = 'Familial' + targeted_record.raw_fields['codingdnasequencechange'] = 'familial variant absent' + targeted_record.raw_fields['genotype'] = 'Lynch Pred MLPA -ve' + targeted_record.raw_fields['report'] = 'MLPA analysis indicates that the familial pathogenic MLH1 exon 16-19 deletion is absent in this patient.' + targeted_record.raw_fields['proteinimpact'] = nil + targeted_record.raw_fields['zygosity'] = nil + targeted_record.raw_fields['gene'] = nil + + res = @handler.process_fields(targeted_record) + assert_equal 1, res.size + + genotype = res[0] + assert_equal 1, genotype.attribute_map['teststatus'] + assert_equal 2744, genotype.attribute_map['gene'] # MLH1 + end + + # Tests for should_process? method + test 'should_process_other_cancer_file_familial' do + @handler.instance_variable_set(:@batch, stub(original_filename: 'Other_Cancer_file.txt')) + + record = build_raw_record('pseudo_id1' => 'patient16') + record.raw_fields['moleculartestingtype'] = 'Familial' + record.raw_fields['report'] = 'Testing for MLH1 variants' + + assert @handler.send(:should_process?, record) + end + + test 'should_not_process_other_cancer_file_non_familial' do + @handler.instance_variable_set(:@batch, stub(original_filename: 'Other_Cancer_file.txt')) + + record = build_raw_record('pseudo_id1' => 'patient17') + record.raw_fields['moleculartestingtype'] = 'Predictive' + record.raw_fields['report'] = 'Testing for MLH1 variants' + + refute @handler.send(:should_process?, record) + end + + test 'should_not_process_ataxia_record' do + @handler.instance_variable_set(:@batch, stub(original_filename: 'Other_Cancer_file.txt')) + + record = build_raw_record('pseudo_id1' => 'patient18') + record.raw_fields['moleculartestingtype'] = 'Familial' + record.raw_fields['diagnosis_report'] = 'Testing for ataxia related genes' + record.raw_fields['report'] = 'MLH1 testing' + + refute @handler.send(:should_process?, record) + end + + test 'should_not_process_brca_record' do + @handler.instance_variable_set(:@batch, stub(original_filename: 'Other_Cancer_file.txt')) + + record = build_raw_record('pseudo_id1' => 'patient19') + record.raw_fields['moleculartestingtype'] = 'Familial' + record.raw_fields['codingdnasequencechange'] = 'BRCA1 c.181T>G' + record.raw_fields['report'] = 'MLH1 testing' + + refute @handler.send(:should_process?, record) + end + + private + + def clinical_json + { sex: '2', + consultantcode: 'Consultant Code', + providercode: 'Provider Code', + receiveddate: '2010-08-05T00:00:00.000+01:00', + authoriseddate: '2010-09-17T00:00:00.000+01:00', + servicereportidentifier: 'Service Report Identifier', + sortdate: '2010-08-05T00:00:00.000+01:00', + genetictestscope: 'R210.2', + specimentype: '5', + report: 'MLPA analysis indicates that the potential PMS2 copy number variant identified by NGS is absent in this patient.', + requesteddate: '2010-08-05T00:00:00.000+01:00', + age: 37 }.to_json + end + + def rawtext_clinical_json + { sex: 'M', + providercode: 'Provider Code', + referringclinicianname: 'Clinician Name', + consultantcode: 'Consultant Code', + servicereportidentifier: 'Service Report Identifier', + patienttype: 'NHS', + moleculartestingtype: 'R210.5', + indicationcategory: 'R210', + genotype: 'PMS2 - MLPA conf negative', + report: 'MLPA analysis indicates that the potential PMS2 copy number variant identified by NGS is absent in this patient', + diagnosis_report: 'Heterozygous mutations in PMS2 are linked to Lynch Syndrome with dominant inheritance. ' \ + 'Homozygous/compound heterozygous mutations in PMS2 are linked to mismatch repair cancer syndrome.', + receiveddate: '2010-08-05 00:00:00', + karyotypingmethod: 'MLPA P008', + codingdnasequencechange: 'No deletions/duplications detected', + proteinimpact: nil, + gene: nil, + zygosity: nil, + variantpathclass: nil, + requesteddate: '2010-08-05 00:00:00', + authoriseddate: '2010-09-17 00:00:00', + specimentype: 'Blood' }.to_json + end +end