diff --git a/lib/import/brca/core/brca_base.rb b/lib/import/brca/core/brca_base.rb index ef7a10fe..9e7fa3b3 100644 --- a/lib/import/brca/core/brca_base.rb +++ b/lib/import/brca/core/brca_base.rb @@ -7,7 +7,6 @@ # folder = File.expand_path('../', __dir__) # $LOAD_PATH.unshift(folder) unless $LOAD_PATH.include?(folder) - module Import module Brca module Core @@ -34,6 +33,8 @@ def load return load_manchester(tables) if 'R0A' == @batch.provider + return load_salisbury(tables) if 'RNZ' == @batch.provider + # Enumerate over the tables # Under normal circustances, there will only be one table tables.each do |_tablename, table_content| @@ -63,6 +64,27 @@ def load_manchester(tables) end end + def load_salisbury(tables) + tables.each do |_tablename, table_content| + mapped_table = table_mapping.transform(table_content) + grouped_records_by_linkage = mapped_table.to_a[1..-1].group_by do |_klass, fields, _i| + grouping = fields.values_at('pseudo_id1', 'pseudo_id2') + rawtext = JSON.parse(fields['rawtext_clinical.to_json']) + grouping << rawtext['servicereportidentifier'] + grouping << rawtext['moleculartestingtype'] + grouping << rawtext['authoriseddate'] + grouping + end + + cleaned_records = [] + # From each set of grouped records, build a normalised record + grouped_records_by_linkage.each do |_linkage, records| + cleaned_records << [records.first.first, grouped_rawtext_record_from(records)] + end + cleaned_records.each { |klass, fields| build_and_process_records(klass, fields) } + end + end + private # `records` is an array of many [klass, fields, index] diff --git a/lib/import/brca/providers/salisbury/salisbury_handler.rb b/lib/import/brca/providers/salisbury/salisbury_handler.rb index a05ee615..9c7fc0eb 100644 --- a/lib/import/brca/providers/salisbury/salisbury_handler.rb +++ b/lib/import/brca/providers/salisbury/salisbury_handler.rb @@ -1,5 +1,3 @@ -require 'possibly' - module Import module Brca module Providers @@ -13,19 +11,200 @@ def process_fields(record) genotype.add_passthrough_fields(record.mapped_fields, record.raw_fields, PASS_THROUGH_FIELDS) - - process_molecular_testing(genotype, record) add_organisationcode_testresult(genotype) - extract_teststatus(genotype, record) add_provider_code(genotype, record, ORG_CODE_MAP) - results = process_variant_record(genotype, record) - results.each { |cur_genotype| @persister.integrate_and_store(cur_genotype) } + record.raw_fields.reject! do |raw_record| + raw_record['status'].match(/Variant\sfor\sAlissa\sreview/ix) + end + + # For clarity, `raw_fields` contains multiple raw records for same SRI + assign_molecular_testing_var(record) + res = process_record(genotype, record) + res.each { |cur_genotype| @persister.integrate_and_store(cur_genotype) } + end + + def assign_molecular_testing_var(record) + @mol_testing_type = record.raw_fields.pluck('moleculartestingtype')&.uniq&.first&.downcase + end + + def add_provider_code(genotype, record, org_code_map) + raw_org = record.raw_fields.pluck('providercode')&.uniq&.first&.downcase + org_code = org_code_map[raw_org] + return if org_code.blank? + + genotype.attribute_map['providercode'] = org_code + end + + def process_record(genotype, record) + process_molecular_testing(genotype) + genotypes = [] + if ROW_LEVEL.include?(@mol_testing_type) + record.raw_fields.each { |raw_record| process_row_case(genotypes, genotype, raw_record) } + elsif PANEL_LEVEL.keys.include?(@mol_testing_type) + process_panel_case(genotypes, genotype, record) + elsif HYBRID_LEVEL.include?(@mol_testing_type) + process_hybrid_case(genotypes, genotype, record) + end + + genotypes + end + + def process_row_case(genotypes, genotype, record) + genotype_new = genotype.dup + assign_status_var(record) + status = extract_teststatus_record + genotype_new.add_status(status) + extract_gene_row(genotype_new, record) + if [2, 10].include? status + handle_variant_record(genotype_new, record, genotypes) + else + genotypes << genotype_new + end + genotypes + end + + def process_panel_case(genotypes, genotype, record) + @all_genes = PANEL_LEVEL[@mol_testing_type] + @status_genes_hash = {} + + prepare_gene_status_hash(record) + + record.raw_fields.each do |raw_record| + assign_status_var(raw_record) + process_panel_record(genotypes, genotype, raw_record) unless @all_genes.empty? + end + + # Mark rest of genes in panel as 1 + return if @all_genes.blank? + + process_status_genes(@all_genes, 1, genotype, genotypes, record) + end + + def process_hybrid_case(genotypes, genotype, record) + record.raw_fields.each do |raw_record| + process_row_case(genotypes, genotype, raw_record) + end + + # check if all genes covered + genes_to_be_present = HYBRID_LEVEL[@mol_testing_type] + genes_processed = genotypes.each.collect { |a| a.attribute_map['gene'] }.uniq + + genes_to_be_added = genes_to_be_present - genes_processed + + return if genes_to_be_added.blank? + + genes_to_be_added.each do |gene| + genotype_new = genotype.dup + genotype_new.add_gene(gene) + genotype_new.add_status(1) + genotypes << genotype_new + end + end + + def process_panel_record(genotypes, genotype, raw_record) + status_genes = extract_genes(%w[test genotype], raw_record) + status_genes.each do |status_gene| + status_found = @status_genes_hash[status_gene]&.uniq + if status_found.size > 1 + process_multi_status_genes([status_gene], status_found, genotype, genotypes, raw_record) + elsif UNKNOWN_STATUS.include? @status + process_status_genes([status_gene], 4, genotype, genotypes, raw_record) + elsif FAILED_TEST.match(@status) + process_status_genes(@all_genes, 9, genotype, genotypes, raw_record) + elsif ABNORMAL_STATUS.include? @status + process_status_genes([status_gene], 10, genotype, genotypes, raw_record) + elsif NEGATIVE_STATUS.include? @status + process_status_genes([status_gene], 1, genotype, genotypes, raw_record) + elsif POSITIVE_STATUS.include?(@status) || @status.match(/^variant*/ix) + process_status_genes([status_gene], 2, genotype, genotypes, raw_record) + end + end + end + + def prepare_gene_status_hash(record) + record.raw_fields.each do |raw_record| + assign_status_var(raw_record) + + status_genes = extract_genes(%w[test genotype], raw_record) + status_genes.each do |status_gene| + if @status_genes_hash[status_gene] + @status_genes_hash[status_gene] << @status + else + @status_genes_hash[status_gene] = [@status] + end + end + end + end + + def extract_genes(fields, raw_record) + status_genes = [] + fields.each do |field| + result = raw_record[field]&.scan(BRCA_REGEX)&.flatten&.uniq + if result.present? + status_genes = result + break + end + end + status_genes end - def process_molecular_testing(genotype, record) - mol_testing_type = record.raw_fields['moleculartestingtype']&.downcase - genotype.add_molecular_testing_type_strict(TEST_TYPE_MAPPING[mol_testing_type]) - scope = TEST_SCOPE_MAPPING[mol_testing_type].presence || :no_genetictestscope + def assign_status_var(raw_record) + @status = raw_record['status']&.downcase + end + + # Use priority if more than one status is present for same gene for a given record + # rubocop:disable Metrics/CyclomaticComplexity, Metrics/AbcSize, Metrics/PerceivedComplexity + def process_multi_status_genes(status_genes, status_found, genotype, genotypes, raw_record) + if status_found.intersect?(POSITIVE_STATUS) || status_found.any? { |e| e.match(/^variant/) } + process_status_genes(status_genes, 2, genotype, genotypes, raw_record) if extract_teststatus_record == 2 + elsif status_found.intersect?(ABNORMAL_STATUS) + process_status_genes(status_genes, 10, genotype, genotypes, raw_record) if extract_teststatus_record == 10 + elsif status_found.intersect?(NEGATIVE_STATUS) + process_status_genes(status_genes, 1, genotype, genotypes, raw_record) if extract_teststatus_record == 1 + elsif status_found.match(FAILED_TEST) + process_status_genes(@all_genes, 9, genotype, genotypes, raw_record) if extract_teststatus_record == 9 + elsif status_found.intersect?(UNKNOWN_STATUS) + process_status_genes(status_genes, 4, genotype, genotypes, raw_record) if extract_teststatus_record == 4 + end + end + # rubocop:enable Metrics/CyclomaticComplexity, Metrics/AbcSize, Metrics/PerceivedComplexity + + def process_status_genes(genes, status, genotype, genotypes, record) + return unless genes&.all? { |gene| @all_genes.include?(gene) } + + genes.each do |gene| + @all_genes -= [gene] + genotype_new = genotype.dup + genotype_new.add_gene(gene) + genotype_new.add_status(status) + if [2, 10].include? status + handle_variant_record(genotype_new, record, genotypes) + else + genotypes << genotype_new + end + end + genotypes + end + + def handle_variant_record(genotype_new, record, genotypes) + variant = record['genotype'] + if variant.present? + if (variant.scan(CDNA_REGEX).size + + variant.scan(EXON_VARIANT_REGEX).size) > 1 + process_multi_vars(genotype_new, variant, genotypes) + else + # Assign varpath class only to single variant records + assign_variantpathclass_record(genotype_new) + process_variants(genotype_new, variant, genotypes) + end + else + genotypes << genotype_new + end + end + + def process_molecular_testing(genotype) + genotype.add_molecular_testing_type_strict(TEST_TYPE_MAPPING[@mol_testing_type]) + scope = TEST_SCOPE_MAPPING[@mol_testing_type].presence || :no_genetictestscope genotype.add_test_scope(scope) end @@ -33,42 +212,62 @@ def add_organisationcode_testresult(genotype) genotype.attribute_map['organisationcode_testresult'] = '699H0' end - def extract_teststatus(genotype, record) - status = record.raw_fields['status']&.downcase - geno_string = record.raw_fields['genotype'] - if POSITIVE_STATUS.include?(status) - genotype.add_status(:positive) - elsif NEGATIVE_STATUS.include?(status) - genotype.add_status(:negative) - elsif FAILED_TEST.match(record.raw_fields['status']) - genotype.add_status(:failed) - elsif UNKNOWN_STATUS.include?(status) - genotype.add_status(:unknown) - elsif GENO_DEPEND_STATUS.include?(status) - teststatus = geno_string.blank? ? :negative : :positive - genotype.add_status(teststatus) + def extract_teststatus_record + if POSITIVE_STATUS.include?(@status) || @status.match(/^variant*/ix) + 2 + elsif NEGATIVE_STATUS.include?(@status) + 1 + elsif FAILED_TEST.match(@status) + 9 + elsif UNKNOWN_STATUS.include?(@status) + 4 + elsif ABNORMAL_STATUS.include? @status + 10 end - @logger.debug "#{genotype.attribute_map['teststatus']} status for : #{status}" end - def process_variant_record(genotype, record) - genotypes = [] - variant = record.raw_fields['genotype'] - test_string = record.raw_fields['test'] - gene = extract_gene(test_string, variant, record) - genotype.add_gene(gene[0]) - add_fs_negative_gene(gene, genotype, genotypes) if test_string.scan(CONFIRM_SEQ_NGS).size.positive? + def assign_variantpathclass_record(genotype) + case @status + when /like(ly)?\spathogenic/ix + genotype.add_variant_class(4) + when /pathogenic/ix + genotype.add_variant_class(5) + when /likely\sbenign/ix + genotype.add_variant_class(2) + when /benign/ix + genotype.add_variant_class(1) + when /variant/ix + genotype.add_variant_class(3) + end + end - process_variants(genotype, variant) if positive_record?(genotype) && variant.present? - genotypes.append(genotype) unless test_string.scan(CONFIRM_SEQ_NGS).size.positive? && gene.blank? + def extract_gene_row(genotype, record) + gene = extract_genes(%w[test genotype moleculartestingtype], record) + return if gene.blank? - genotypes + replacements = { 'BC1' => 'BRCA1', 'BC2' => 'BRCA2' } + gene.map! { |g| replacements[g] || g } + genotype.add_gene(gene.first) end - def process_variants(genotype, variant) - process_cdna_variant(genotype, variant) - process_exonic_variant(genotype, variant) - process_protein_impact(genotype, variant) + def process_multi_vars(genotype_new, variant, genotypes) + variants = variant.split(/;|,/) + variants.each do |var| + genotype_dup = genotype_new.dup + gene = var&.scan(BRCA_REGEX)&.flatten&.uniq + if gene.present? + genotype_dup.add_gene(gene[0]) + @all_genes -= gene if @all_genes.present? + end + process_variants(genotype_dup, var, genotypes) + end + end + + def process_variants(genotype_new, variant, genotypes) + process_cdna_variant(genotype_new, variant) + process_exonic_variant(genotype_new, variant) + process_protein_impact(genotype_new, variant) + genotypes << genotype_new end def process_exonic_variant(genotype, variant) @@ -92,47 +291,6 @@ def process_protein_impact(genotype, variant) genotype.add_protein_impact($LAST_MATCH_INFO[:impact]) @logger.debug "SUCCESSFUL protein parse for: #{$LAST_MATCH_INFO[:impact]}" end - - def add_fs_negative_gene(positive_gene, genotype, genotypes) - negative_genes = %w[BRCA1 BRCA2] - positive_gene - negative_genes&.each do |brca_gene| - genotype_dup = genotype.dup - genotype_dup.add_gene(brca_gene) - genotype_dup.add_status(:negative) - genotypes.append(genotype_dup) - end - end - - def positive_record?(genotype) - genotype.attribute_map['teststatus'] == 2 - end - - # rubocop:disable Metrics/MethodLength, Metrics/CyclomaticComplexity - def extract_gene(test_string, geno_string, record) - positive_gene = [] - positive_gene << 'BRCA1' if record.raw_fields['servicereportidentifier'] == 'W1715894' - - gene_string = test_string.scan(CONFIRM_SEQ_NGS).size.positive? ? geno_string : test_string - - case gene_string - when /BRCA1|BC1/i - positive_gene << 'BRCA1' - when /BRCA2|BC2/i - positive_gene << 'BRCA2' - when /PALB2|Variant\s+1/i - positive_gene << 'PALB2' - when /BRIP1/i - positive_gene << 'BRIP1' - when /MLH1/i - positive_gene << 'MLH1' - when /MSH6/i - positive_gene << 'MSH6' - when /MSH2/i - positive_gene << 'MSH2' - end - positive_gene - end - # rubocop:enable Metrics/MethodLength, Metrics/CyclomaticComplexity end end end diff --git a/lib/import/helpers/brca/providers/rnz/rnz_constants.rb b/lib/import/helpers/brca/providers/rnz/rnz_constants.rb index c7d9d771..ecb95833 100644 --- a/lib/import/helpers/brca/providers/rnz/rnz_constants.rb +++ b/lib/import/helpers/brca/providers/rnz/rnz_constants.rb @@ -19,7 +19,18 @@ module RnzConstants 'palb2 data only' => :full_screen, 'palb2 mlpa only' => :no_genetictestscope, 'palb2 targetted testing' => :targeted_mutation, - 'brca ashkenazi mutations' => :aj_screen }.freeze + 'brca ashkenazi mutations' => :aj_screen, + 'breast and ovarian cancer 7-gene panel (r208)' => :full_screen, + 'breast and ovarian cancer reanalysis' => :full_screen, + 'breast and ovarian cancer targeted testing' => :targeted_mutation, + 'default' => :full_screen, + 'ovarian cancer 9 gene panel reanalysis' => :full_screen, + 'ovarian cancer 9-gene panel (r207)' => :full_screen, + 'ovarian cancer targeted testing' => :targeted_mutation, + 'ovarian cancer targeted testing profile' => :targeted_mutation, + 'prostate cancer 2-gene panel (r444)' => :full_screen, + 'prostate cancer 8-gene panel (r430)' => :full_screen, + 'prostate cancer targeted testing' => :targeted_mutation }.freeze TEST_TYPE_MAPPING = { 'brca mainstreaming - 3 gene panel (r208)' => :diagnostic, 'brca mainstreaming' => :diagnostic, @@ -39,40 +50,34 @@ module RnzConstants 'like pathogenic', 'pathogenic', 'pathogenic mutation detected', - '?single exon deletion'].freeze + '?single exon deletion', + 'pathogenic cnv'].freeze - NEGATIVE_STATUS = ['benign', - 'likely benign', - 'absent', + ABNORMAL_STATUS = ['benign', + 'likely benign'].freeze + + NEGATIVE_STATUS = ['absent', 'no variant detected', - 'no mutation detected'].freeze + 'no mutation detected', + 'normal'].freeze UNKNOWN_STATUS = ['gaps present', + 'variant - supplementary', + 'no gaps', 'completed'].freeze - GENO_DEPEND_STATUS = ['normal', - 'variant', - 'variant - not reported', + GENO_DEPEND_STATUS = ['variant - not reported', 'variant - not reported', 'variant not reported'].freeze FAILED_TEST = /Fail*+/i - BRCA_REGEX = /(?BRCA1|BRCA2|PALB2|ATM|CHEK2|TP53|MLH1|CDH1| + BRCA_REGEX = /(?BRCA1|BRCA2|PALB2|ATM|CHEK2|TP53|MLH1|CDH1|BC1|BC2| MSH2|MSH6|PMS2|STK11|PTEN|BRIP1|NBN|RAD51C|RAD51D)/ix CONFIRM_SEQ_NGS = /Confirmation\sSequencing|NGS\sResults/ix # rubocop:disable Lint/MixedRegexpCaptureTypes - CDNA_REGEX = /c\.\[?(? - ([0-9]+[+>_-][0-9][+>_-][0-9]+[+>_-][0-9][ACGTdelinsup]+)| - ([0-9]+[+>_-][0-9][+>_-][0-9]+[+>_-][0-9]+[ACGTdelinsup]+)| - ([0-9]+[+>_-][0-9]+[ACGTdelinsup][+>_-][ACGTdelinsup])| - ([0-9]+[ACGTdelinsup]+[+>_-][ACGTdelinsup])| - ([0-9]+[+>_-][0-9]+[ACGTdelinsup]+)| - ([0-9]+[+>_-][0-9]+[+>_-][0-9]+[0-9]+[ACGTdelinsup]+)| - ([0-9]+[?+>_-]+[0-9]+[?+>_-]+[ACGTdelinsup]+)| - ([0-9]+[ACGTdelinsup]+) - )\]?/ix + CDNA_REGEX = /c\.(?[\w+>*\-]+)?[\w\s.]?/ix PROTEIN_REGEX = /p\.\((?.+)\)| \(p\.(?[A-Za-z]+.+)\)| @@ -97,6 +102,53 @@ module RnzConstants 'molecular genetics department' => 'RNZ02', 'wessex clinical genetics service' => 'RNZ02' }.freeze + + ROW_LEVEL = [ + 'brca ashkenazi mutations', + 'brca mainstreaming', + 'brca mlpa only', + 'brca unaffected full screen', + 'breast cancer full screen', + 'breast cancer full screen (htsf lab)', + 'breast cancer full screen data only', + 'breast cancer predictives', + 'palb2', + 'palb2 data only', + 'palb2 mlpa only', + 'palb2 targetted testing', + 'breast and ovarian cancer targeted testing', + 'ovarian cancer targeted testing', + 'ovarian cancer targeted testing profile', + 'prostate cancer 2-gene panel (r444)', + 'prostate cancer targeted testing' + ].freeze + + PANEL_LEVEL = { + 'breast and ovarian cancer 7-gene panel (r208)' => %w[ATM BRCA1 BRCA2 CHEK2 PALB2 RAD51C RAD51D], + 'breast and ovarian cancer reanalysis' => %w[ATM BRCA1 BRCA2 CHEK2 PALB2 RAD51C RAD51D], + 'default' => %w[ATM BRCA1 BRCA2 CHEK2 PALB2 RAD51C RAD51D], + 'ovarian cancer 9 gene panel reanalysis' => %w[BRCA1 BRCA2 BRIP1 MLH1 MSH2 MSH6 PALB2 RAD51C RAD51D], + 'ovarian cancer 9-gene panel (r207)' => %w[BRCA1 BRCA2 BRIP1 MLH1 MSH2 MSH6 PALB2 RAD51C RAD51D], + 'prostate cancer 8-gene panel (r430)' => %w[ATM BRCA1 BRCA2 CHEK2 MLH1 MSH2 MSH6 PALB2] + }.freeze + + HYBRID_LEVEL = { + 'brca mainstreaming - 3 gene panel (r208)' => [3186], + 'breast and ovarian cancer 3-gene panel (r208)' => [3186], + 'ovarian cancer 8-gene panel - data only' => [3615, 3616], + 'ovarian cancer 8-gene panel (r207)' => [3615, 3616] + }.freeze + + STATUS_PANEL_VARPATHCLASS = { + 'likely pathogenic' => 4, + 'pathogenic' => 5, + 'like pathogenic' => 4, + 'pathogenic mutation detected' => 5, + 'benign' => 1, + 'likely benign' => 2, + 'variant' => 3, + 'pathogenic cnv' => 5 + }.freeze # rubocop:enable Lint/MixedRegexpCaptureTypes end end diff --git a/test/lib/import/brca/providers/salisbury/salisbury_handler_test.rb b/test/lib/import/brca/providers/salisbury/salisbury_handler_test.rb index c9dd6eb7..f89176b5 100644 --- a/test/lib/import/brca/providers/salisbury/salisbury_handler_test.rb +++ b/test/lib/import/brca/providers/salisbury/salisbury_handler_test.rb @@ -2,7 +2,7 @@ class SalisburyHandlerTest < ActiveSupport::TestCase def setup - @record = build_raw_record('pseudo_id1' => 'bob') + @record = build_raw_record(options: { 'pseudo_id1' => 'bob' }) @genotype = Import::Brca::Core::GenotypeBrca.new(@record) @importer_stdout, @importer_stderr = capture_io do @@ -12,31 +12,26 @@ def setup @logger = Import::Log.get_logger end - test 'extract_gene' do - gene = @handler.extract_gene(@record.raw_fields['test'], @record.raw_fields['genotype'], @record) - assert_equal 'BRCA2', gene[0] - end - - test 'extract_teststatus' do - @handler.extract_teststatus(@genotype, @record) - assert_equal 2, @genotype.attribute_map['teststatus'] - broken_record = build_raw_record('pseudo_id1' => 'bob') - broken_record.raw_fields['status'] = 'Failed' - @handler.extract_teststatus(@genotype, broken_record) - assert_equal 9, @genotype.attribute_map['teststatus'] + test 'extract_teststatus_record' do + @handler.assign_status_var(@record.raw_fields.first) + assert_equal 2, @handler.extract_teststatus_record + broken_record = build_raw_record(options: { 'pseudo_id1' => 'bob' }) + broken_record.raw_fields.first['status'] = 'Failed' + @handler.assign_status_var(broken_record.raw_fields.first) + assert_equal 9, @handler.extract_teststatus_record end test 'process_variants' do - @handler.process_variants(@genotype, @record.raw_fields['genotype']) + @handler.process_variants(@genotype, @record.raw_fields.first['genotype'], []) assert_equal 'c.9382C>T', @genotype.attribute_map['codingdnasequencechange'] assert_equal 'p.Arg3128Ter', @genotype.attribute_map['proteinimpact'] assert_equal 1, @genotype.attribute_map['sequencevarianttype'] end test 'process_exonic_variants' do - exonic_record = build_raw_record('pseudo_id1' => 'bob') - exonic_record.raw_fields['genotype'] = 'exons 21-24' - @handler.process_variants(@genotype, exonic_record.raw_fields['genotype']) + exonic_record = build_raw_record(options: { 'pseudo_id1' => 'bob' }) + exonic_record.raw_fields.first['genotype'] = 'exons 21-24' + @handler.process_variants(@genotype, exonic_record.raw_fields.first['genotype'], []) assert_equal '21-24', @genotype.attribute_map['exonintroncodonnumber'] assert_equal 10, @genotype.attribute_map['sequencevarianttype'] assert_equal 1, @genotype.attribute_map['variantlocation'] @@ -47,33 +42,17 @@ def setup assert_equal '699H0', @genotype.attribute_map['organisationcode_testresult'] end - test 'process_ngs_fs_record' do - ngs_fs_record = build_raw_record('pseudo_id1' => 'bob') - ngs_fs_record.raw_fields['moleculartestingtype'] = 'Breast and Ovarian cancer 3-gene Panel (R208)' - ngs_fs_record.raw_fields['status'] = 'No mutation detected' - ngs_fs_record.raw_fields['test'] = 'NGS results' - ngs_fs_record.raw_fields['genotype'] = nil - @handler.process_molecular_testing(@genotype, ngs_fs_record) - @handler.extract_teststatus(@genotype, ngs_fs_record) - genotypes = @handler.process_variant_record(@genotype, ngs_fs_record) - assert_equal 2, genotypes.size - assert_equal 'Full screen BRCA1 and BRCA2', genotypes[0].attribute_map['genetictestscope'] - assert_equal 1, genotypes[0].attribute_map['teststatus'] - assert_equal 7, genotypes[0].attribute_map['gene'] - assert_equal 'Full screen BRCA1 and BRCA2', genotypes[1].attribute_map['genetictestscope'] - assert_equal 1, genotypes[1].attribute_map['teststatus'] - assert_equal 8, genotypes[1].attribute_map['gene'] - end - test 'targeted_rec' do - targ_rec = build_raw_record('pseudo_id1' => 'bob') - targ_rec.raw_fields['moleculartestingtype'] = 'Breast cancer predictives' - targ_rec.raw_fields['status'] = 'Normal' - targ_rec.raw_fields['test'] = 'BC2_11J' - targ_rec.raw_fields['genotype'] = nil - @handler.process_molecular_testing(@genotype, targ_rec) - @handler.extract_teststatus(@genotype, targ_rec) - genotypes = @handler.process_variant_record(@genotype, targ_rec) + targ_rec = build_raw_record(options: { 'pseudo_id1' => 'bob' }) + targ_rec.raw_fields.first['moleculartestingtype'] = 'Breast cancer predictives' + targ_rec.raw_fields.first['status'] = 'Normal' + targ_rec.raw_fields.first['test'] = 'BC2_11J' + targ_rec.raw_fields.first['genotype'] = nil + @handler.assign_molecular_testing_var(targ_rec) + @handler.process_molecular_testing(@genotype) + @handler.assign_status_var(targ_rec.raw_fields.first) + @handler.extract_teststatus_record + genotypes = @handler.process_record(@genotype, targ_rec) assert_equal 1, genotypes.size assert_equal 'Targeted BRCA mutation test', genotypes[0].attribute_map['genetictestscope'] assert_equal 1, genotypes[0].attribute_map['teststatus'] @@ -81,34 +60,41 @@ def setup end test 'targeted_rec_path' do - targ_rec_path = build_raw_record('pseudo_id1' => 'bob') - targ_rec_path.raw_fields['moleculartestingtype'] = 'Breast cancer predictives' - targ_rec_path.raw_fields['status'] = 'Pathogenic' - targ_rec_path.raw_fields['test'] = 'BC2_02' - targ_rec_path.raw_fields['genotype'] = 'c.51_52delAC p.(Arg18LeufsTer12)' - @handler.process_molecular_testing(@genotype, targ_rec_path) - @handler.extract_teststatus(@genotype, targ_rec_path) - genotypes = @handler.process_variant_record(@genotype, targ_rec_path) + targ_rec_path = build_raw_record(options: { 'pseudo_id1' => 'bob' }) + targ_rec_path.raw_fields.first['moleculartestingtype'] = 'Breast cancer predictives' + targ_rec_path.raw_fields.first['status'] = 'Pathogenic' + targ_rec_path.raw_fields.first['test'] = 'BC2_02' + targ_rec_path.raw_fields.first['genotype'] = 'c.51_52delAC p.(Arg18LeufsTer12)' + @handler.assign_molecular_testing_var(targ_rec_path) + @handler.process_molecular_testing(@genotype) + @handler.assign_status_var(targ_rec_path.raw_fields.first) + @handler.extract_teststatus_record + genotypes = @handler.process_record(@genotype, targ_rec_path) assert_equal 1, genotypes.size assert_equal 'Targeted BRCA mutation test', genotypes[0].attribute_map['genetictestscope'] assert_equal 2, genotypes[0].attribute_map['teststatus'] + assert_equal 5, genotypes[0].attribute_map['variantpathclass'] assert_equal 8, genotypes[0].attribute_map['gene'] assert_equal 'c.51_52del', genotypes[0].attribute_map['codingdnasequencechange'] assert_equal 'p.Arg18LeufsTer12', genotypes[0].attribute_map['proteinimpact'] + assert_equal 5, genotypes[0].attribute_map['variantpathclass'] end test 'fs_rec_path' do - fs_rec_path = build_raw_record('pseudo_id1' => 'bob') - fs_rec_path.raw_fields['moleculartestingtype'] = 'Breast cancer full screen' - fs_rec_path.raw_fields['status'] = 'Likely pathogenic' - fs_rec_path.raw_fields['test'] = 'BRCA2 mutation analysis' - fs_rec_path.raw_fields['genotype'] = 'c.68-7T>A' - @handler.process_molecular_testing(@genotype, fs_rec_path) - @handler.extract_teststatus(@genotype, fs_rec_path) - genotypes = @handler.process_variant_record(@genotype, fs_rec_path) + fs_rec_path = build_raw_record(options: { 'pseudo_id1' => 'bob' }) + fs_rec_path.raw_fields.first['moleculartestingtype'] = 'Breast cancer full screen' + fs_rec_path.raw_fields.first['status'] = 'Likely pathogenic' + fs_rec_path.raw_fields.first['test'] = 'BRCA2 mutation analysis' + fs_rec_path.raw_fields.first['genotype'] = 'c.68-7T>A' + @handler.assign_molecular_testing_var(fs_rec_path) + @handler.process_molecular_testing(@genotype) + @handler.assign_status_var(fs_rec_path.raw_fields.first) + @handler.extract_teststatus_record + genotypes = @handler.process_record(@genotype, fs_rec_path) assert_equal 1, genotypes.size assert_equal 'Full screen BRCA1 and BRCA2', genotypes[0].attribute_map['genetictestscope'] assert_equal 2, genotypes[0].attribute_map['teststatus'] + assert_equal 4, genotypes[0].attribute_map['variantpathclass'] assert_equal 8, genotypes[0].attribute_map['gene'] assert_equal 'c.68-7T>A', genotypes[0].attribute_map['codingdnasequencechange'] assert_nil genotypes[0].attribute_map['proteinimpact'] @@ -116,18 +102,18 @@ def setup end test 'no_scope_rec' do - no_scope_rec = build_raw_record('pseudo_id1' => 'bob') - no_scope_rec.raw_fields['moleculartestingtype'] = 'Ovarian cancer targeted testing profile' - no_scope_rec.raw_fields['status'] = 'No mutation detected' - @handler.process_molecular_testing(@genotype, no_scope_rec) + no_scope_rec = build_raw_record(options: { 'pseudo_id1' => 'bob' }) + no_scope_rec.raw_fields.first['moleculartestingtype'] = 'BRCA MLPA only' + no_scope_rec.raw_fields.first['status'] = 'No mutation detected' + @handler.assign_molecular_testing_var(no_scope_rec) + @handler.process_molecular_testing(@genotype) assert_equal 'Unable to assign BRCA genetictestscope', @genotype.attribute_map['genetictestscope'] end test 'add_provider_code' do - prov_record = build_raw_record('pseudo_id1' => 'bob') - + prov_record = build_raw_record(options: { 'pseudo_id1' => 'bob' }) # For which codes are there - prov_record.raw_fields['providercode'] = 'Royal Cornwall Hospital Trust' + prov_record.raw_fields.first['providercode'] = 'Royal Cornwall Hospital Trust' prov_record.mapped_fields['providercode'] = 'Royal Cornwall Hospital Trust' @genotype.add_passthrough_fields(prov_record.mapped_fields, prov_record.raw_fields, Import::Helpers::Brca::Providers::Rnz::RnzConstants::PASS_THROUGH_FIELDS) @@ -135,19 +121,131 @@ def setup assert_equal 'REF12', @genotype.attribute_map['providercode'] # For which codes are not there - prov_record.raw_fields['providercode'] = 'North Devon District Hospital' + genotype = Import::Brca::Core::GenotypeBrca.new(prov_record) + prov_record.raw_fields.first['providercode'] = 'North Devon District Hospital' prov_record.mapped_fields['providercode'] = 'North Devon District Hospital' - @genotype.add_passthrough_fields(prov_record.mapped_fields, prov_record.raw_fields, - Import::Helpers::Brca::Providers::Rnz::RnzConstants::PASS_THROUGH_FIELDS) - @handler.add_provider_code(@genotype, prov_record, Import::Helpers::Brca::Providers::Rnz::RnzConstants::ORG_CODE_MAP) - assert_equal 'North Devon District Hospital', @genotype.attribute_map['providercode'] + genotype.add_passthrough_fields(prov_record.mapped_fields, prov_record.raw_fields, + Import::Helpers::Brca::Providers::Rnz::RnzConstants::PASS_THROUGH_FIELDS) + @handler.add_provider_code(genotype, prov_record, Import::Helpers::Brca::Providers::Rnz::RnzConstants::ORG_CODE_MAP) + assert_equal 'North Devon District Hospital', genotype.attribute_map['providercode'] + end + + test 'process_ngs_fs__hybrid_record' do + ngs_fs_record = build_raw_record(raw_hash: second_rawtext_clinical_hash, options: { 'pseudo_id1' => 'bob' }) + ngs_fs_record.raw_fields.first['moleculartestingtype'] = 'Breast and Ovarian cancer 3-gene Panel (R208)' + ngs_fs_record.raw_fields.first['status'] = 'No mutation detected' + ngs_fs_record.raw_fields.first['test'] = 'NGS results' + ngs_fs_record.raw_fields.first['genotype'] = nil + @handler.assign_molecular_testing_var(ngs_fs_record) + @handler.process_molecular_testing(@genotype) + @handler.assign_status_var(ngs_fs_record.raw_fields.first) + @handler.extract_teststatus_record + genotypes = @handler.process_record(@genotype, ngs_fs_record) + assert_equal 3, genotypes.size + assert_equal 'Full screen BRCA1 and BRCA2', genotypes[0].attribute_map['genetictestscope'] + assert_equal 1, genotypes[0].attribute_map['teststatus'] + assert_nil genotypes[0].attribute_map['gene'] + assert_equal 'Full screen BRCA1 and BRCA2', genotypes[1].attribute_map['genetictestscope'] + assert_equal 1, genotypes[1].attribute_map['teststatus'] + assert_equal 7, genotypes[1].attribute_map['gene'] + # PALB2 is added separately + assert_equal 'Full screen BRCA1 and BRCA2', genotypes[2].attribute_map['genetictestscope'] + assert_equal 1, genotypes[2].attribute_map['teststatus'] + assert_equal 3186, genotypes[2].attribute_map['gene'] + end + + test 'process_row_level_record' do + row_level_record = build_raw_record(options: { 'pseudo_id1' => 'bob' }) + row_level_record.raw_fields.first['moleculartestingtype'] = 'PALB2 targetted testing' + row_level_record.raw_fields.first['status'] = 'No mutation detected' + row_level_record.raw_fields.first['test'] = 'PALB2' + row_level_record.raw_fields.first['genotype'] = nil + @handler.assign_molecular_testing_var(row_level_record) + @handler.process_molecular_testing(@genotype) + @handler.assign_status_var(row_level_record.raw_fields.first) + @handler.extract_teststatus_record + genotypes = @handler.process_record(@genotype, row_level_record) + assert_equal 1, genotypes.size + assert_equal 'Targeted BRCA mutation test', genotypes[0].attribute_map['genetictestscope'] + assert_equal 1, genotypes[0].attribute_map['teststatus'] + assert_equal 3186, genotypes[0].attribute_map['gene'] + end + + test 'process_panel_record' do + panel_rec = build_raw_record(raw_hash: second_rawtext_clinical_hash, options: { 'pseudo_id1' => 'bob' }) + panel_rec.raw_fields.first['moleculartestingtype'] = 'Breast and ovarian cancer 7-gene panel (R208)' + panel_rec.raw_fields.first['status'] = 'Normal' + panel_rec.raw_fields.first['test'] = 'BRCA2 dosage analysis' + panel_rec.raw_fields.first['genotype'] = nil + @handler.assign_molecular_testing_var(panel_rec) + @handler.process_molecular_testing(@genotype) + genotypes = @handler.process_record(@genotype, panel_rec) + assert_equal 7, genotypes.size + assert_equal 'Full screen BRCA1 and BRCA2', genotypes[0].attribute_map['genetictestscope'] + assert_equal 1, genotypes[0].attribute_map['teststatus'] + assert_equal 8, genotypes[0].attribute_map['gene'] + assert_equal 'Full screen BRCA1 and BRCA2', genotypes[1].attribute_map['genetictestscope'] + assert_equal 1, genotypes[1].attribute_map['teststatus'] + assert_equal 7, genotypes[1].attribute_map['gene'] + assert_equal 'Full screen BRCA1 and BRCA2', genotypes[2].attribute_map['genetictestscope'] + assert_equal 1, genotypes[2].attribute_map['teststatus'] + assert_equal 451, genotypes[2].attribute_map['gene'] + assert_equal 'Full screen BRCA1 and BRCA2', genotypes[3].attribute_map['genetictestscope'] + assert_equal 1, genotypes[3].attribute_map['teststatus'] + assert_equal 865, genotypes[3].attribute_map['gene'] + assert_equal 3186, genotypes[4].attribute_map['gene'] + assert_equal 'Full screen BRCA1 and BRCA2', genotypes[5].attribute_map['genetictestscope'] + assert_equal 1, genotypes[5].attribute_map['teststatus'] + assert_equal 3615, genotypes[5].attribute_map['gene'] + assert_equal 'Full screen BRCA1 and BRCA2', genotypes[6].attribute_map['genetictestscope'] + assert_equal 1, genotypes[6].attribute_map['teststatus'] + assert_equal 3616, genotypes[6].attribute_map['gene'] + end + + test 'process multivariant cases' do + panel_rec = build_raw_record(options: { 'pseudo_id1' => 'bob' }) + panel_rec.raw_fields.first['moleculartestingtype'] = 'Breast and ovarian cancer 7-gene panel (R208)' + panel_rec.raw_fields.first['genotype'] = 'BRCA2 c.5dupC p.(Gln74); CHEK2 c.4G>A' + panel_rec.raw_fields.first['test'] = 'Cartagenia/Congenica analysis' + @handler.assign_molecular_testing_var(panel_rec) + @handler.process_molecular_testing(@genotype) + genotypes = @handler.process_record(@genotype, panel_rec) + assert_equal 7, genotypes.size + assert_equal 'Full screen BRCA1 and BRCA2', genotypes[0].attribute_map['genetictestscope'] + assert_equal 2, genotypes[0].attribute_map['teststatus'] + assert_equal 8, genotypes[0].attribute_map['gene'] + assert_equal 'c.5dupC', genotypes[0].attribute_map['codingdnasequencechange'] + assert_equal 'p.Gln74', genotypes[0].attribute_map['proteinimpact'] + assert_nil genotypes[0].attribute_map['variantpathclass'] + assert_equal 2, genotypes[1].attribute_map['teststatus'] + assert_equal 865, genotypes[1].attribute_map['gene'] + assert_equal 'c.4G>A', genotypes[1].attribute_map['codingdnasequencechange'] + assert_nil genotypes[1].attribute_map['variantpathclass'] + assert_nil genotypes[1].attribute_map['proteinimpact'] + assert_equal 1, genotypes[2].attribute_map['teststatus'] + assert_equal 451, genotypes[2].attribute_map['gene'] + assert_equal 1, genotypes[3].attribute_map['teststatus'] + assert_equal 7, genotypes[3].attribute_map['gene'] + assert_equal 1, genotypes[4].attribute_map['teststatus'] + assert_equal 3186, genotypes[4].attribute_map['gene'] end private + def build_raw_record(raw_hash: {}, options: {}) + default_options = { 'pseudo_id1' => '', + 'pseudo_id2' => '', + 'encrypted_demog' => '', + 'clinical.to_json' => clinical_json, + 'encrypted_rawtext_demog' => '', + 'rawtext_clinical.to_json' => rawtext_clinical_json(raw_hash) } + Import::Germline::RawRecord.new(default_options.merge!(options)) + end + def clinical_json { sex: '2', consultantcode: 'Consultant Code', + providercode: 'Provider Code', receiveddate: '2017-06-20T00: 00: 00.000+01: 00', authoriseddate: '2017-07-25T00: 00: 00.000+01: 00', servicereportidentifier: 'Service Report Identifier', @@ -156,19 +254,36 @@ def clinical_json age: 999 }.to_json end - def rawtext_clinical_json + def first_rawtext_clinical_hash { sex: 'Female', providercode: 'Provider Name', consultantname: 'Consultant Name', servicereportidentifier: 'Service Report Identifier', service_level: 'NHS', - moleculartestingtype: 'Breast cancer full screen', + moleculartestingtype: 'Breast and Ovarian cancer 3-gene Panel (R208)', requesteddate: '2017-06-20 00: 00: 00', receiveddate: '2017-06-20 00: 00: 00', authoriseddate: '2017-07-25 10: 08: 18', specimentype: 'Blood', status: 'Pathogenic mutation detected', genotype: 'c.9382C>T p.(Arg3128Ter)', - test: 'BRCA2 mutation analysis' }.to_json + test: 'BRCA2 mutation analysis' } + end + + def second_rawtext_clinical_hash + { sex: 'Female', + servicereportidentifier: 'Service Report Identifier', + moleculartestingtype: 'Breast and Ovarian cancer 3-gene Panel (R208)', + specimentype: 'External DNA', + status: 'Normal', + genotype: nil, + test: 'BRCA1 dosage analysis' } + end + + def rawtext_clinical_json(raw_hash) + raw_json = [] + raw_json << first_rawtext_clinical_hash + raw_json << raw_hash if raw_hash.present? + raw_json.to_json end end