Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions lib/import/colorectal/core/genocolorectal.rb
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ class Genocolorectal < Import::Germline::Genotype
'RAD51C' => 3615,
'RAD51D' => 3616,
'VHL' => 83,
'ATM' => 451 }.freeze
'ATM' => 451,
'SCG5' => 5092 }.freeze

COLORECTAL_REGEX = /(?<apc>APC)|
(?<bmpr>BMPR1A)|
Expand Down Expand Up @@ -69,7 +70,8 @@ class Genocolorectal < Import::Germline::Genotype
(?<rad51c>RAD51C)|
(?<rad51d>RAD51D)|
(?<vhl>VHL) |
(?<atm>ATM)/ix # Added by Francesco
(?<atm>ATM) |
(?<scg5>SCG5)/ix # Added by Francesco

# ------------------------ Interogators ------------------------------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ def process_fields(record)
# return for brca cases
return if record.raw_fields['investigation code'].match(/BRCA/i)

record.raw_fields['genotype']&.gsub!('SGC5', 'SCG5')
genocolorectal = Import::Colorectal::Core::Genocolorectal.new(record)
genocolorectal.add_passthrough_fields(record.mapped_fields,
record.raw_fields,
Expand All @@ -23,7 +24,7 @@ def process_fields(record)
add_test_scope(genocolorectal, record)
add_test_type(genocolorectal, record)
add_test_status(genocolorectal, record)
res = process_variant_records(genocolorectal, record) # Added by Francesco
res = process_variant_records(genocolorectal, record)
res.each { |cur_genotype| @persister.integrate_and_store(cur_genotype) }
end

Expand Down Expand Up @@ -127,8 +128,7 @@ def process_fullscreen_records(genocolorectal, record, genocolorectals)
genocolorectal.add_gene_colorectal(gene)
if positive_rec?(genocolorectal)
add_fs_negative_genes(gene, genocolorectal, genocolorectals, record)
process_variants(genocolorectal, variant)
genocolorectals.append(genocolorectal)
process_variants(genocolorectals, genocolorectal, variant)
elsif gene.present? # for other status records
add_fs_negative_genes(gene, genocolorectal, genocolorectals, record)
genocolorectals.append(genocolorectal)
Expand All @@ -138,8 +138,10 @@ def process_fullscreen_records(genocolorectal, record, genocolorectals)
add_variant_class(genocolorectal, record)
end

def add_fs_negative_genes(gene, genocolorectal, genocolorectals, _record)
def add_fs_negative_genes(gene, genocolorectal, genocolorectals, record)
negative_genes = @genes_panel - [gene] unless @genes_panel == [gene]
variant_gene = record.raw_fields['genotype']&.scan(COLORECTAL_GENES_REGEX)&.flatten
negative_genes -= variant_gene if variant_gene.present?
negative_genes&.each do |neg_gene|
genocolo_other = genocolorectal.dup_colo
genocolo_other.add_status(1)
Expand All @@ -164,9 +166,12 @@ def process_targeted_screen(genocolorectal, record, genocolorectals)
variant = record.raw_fields['genotype']
gene = get_gene(record)
genocolorectal.add_gene_colorectal(gene)
process_variants(genocolorectal, variant) if positive_rec?(genocolorectal)
add_variant_class(genocolorectal, record)
genocolorectals.append(genocolorectal)
if positive_rec?(genocolorectal)
process_variants(genocolorectals, genocolorectal, variant)
else
genocolorectals.append(genocolorectal)
end
end

def get_gene(record)
Expand Down Expand Up @@ -212,14 +217,65 @@ def non_pathogenic?(record)
false
end

def process_variants(genocolorectal, variant)
def process_variants(genocolorectals, genocolorectal, variant)
genes = variant&.scan(COLORECTAL_GENES_REGEX)&.flatten

# For variant like "het dup GREM1 and SGC5"
if genes.present? && genes.all? { |gene| %w[GREM1 SCG5].include?(gene) } && variant.scan(/dup/i).size == 1
prepare_grem1_scg5_genos(genocolorectals, genocolorectal, variant)
elsif genes.size > 1
process_multi_genes(genocolorectals, genocolorectal, variant, genes)
else
process_mutations(genocolorectal, variant)
genocolorectals.append(genocolorectal)
end
genocolorectals
end

def prepare_grem1_scg5_genos(genocolorectals, genocolorectal, variant)
varianttype = variant.scan(VARIANTTYPE_REGEX)
genocolorectal_dup = genocolorectal.dup_colo
genocolorectal_dup.add_gene_colorectal('GREM1')
genocolorectal_dup.attribute_map['variantlocation'] = 5
genocolorectal_dup.add_variant_type(varianttype.join)
genocolorectals.append(genocolorectal_dup)
genocolorectal_dup = genocolorectal.dup_colo
genocolorectal_dup.add_gene_colorectal('SCG5')
genocolorectal_dup.add_variant_type(varianttype.join)
genocolorectals.append(genocolorectal_dup)
end

def process_multi_genes(genocolorectals, genocolorectal, variant, genes)
variant_strings = variant.split(genes[-1])
variant_strings << '' if variant_strings.size == 1
variant_strings[1].prepend(genes[-1])
variant_type = variant.scan(VARIANTTYPE_REGEX)
genocolorectal.add_variant_type(variant_type.join) unless genes.size == variant_type.size
process_variant_strings(genocolorectals, genocolorectal, variant_strings)
end

def process_variant_strings(genocolorectals, genocolorectal, variant_strings)
variant_strings.each do |variant_str|
genocolorectal_dup = genocolorectal.dup_colo
variant_str.scan(COLORECTAL_GENES_REGEX)
genocolorectal_dup.add_gene_colorectal($LAST_MATCH_INFO[:colorectal])
process_mutations(genocolorectal_dup, variant_str)
varianttype = variant_str.scan(VARIANTTYPE_REGEX)
if genocolorectal_dup.attribute_map['sequencevarianttype'].nil?
genocolorectal_dup.add_variant_type(varianttype.join)
end
genocolorectals.append(genocolorectal_dup)
end
end

def process_mutations(genocolorectal, variant)
process_cdna_variant(genocolorectal, variant)
process_exonic_variant(genocolorectal, variant)
process_protein_impact(genocolorectal, variant)
end

def process_exonic_variant(genocolorectal, variant)
return unless variant.scan(EXON_VARIANT_REGEX).size.positive?
return unless variant.scan(EXON_REGEX).size.positive?

genocolorectal.add_exon_location($LAST_MATCH_INFO[:exons])
genocolorectal.add_variant_type($LAST_MATCH_INFO[:variant])
Expand Down
23 changes: 9 additions & 14 deletions lib/import/helpers/colorectal/providers/rtd/rtd_constants.rb
Original file line number Diff line number Diff line change
Expand Up @@ -76,19 +76,12 @@ module RtdConstants
([+>_-][0-9]+[ACGTdelinsup>]+)
)\]?/ix

EXON_VARIANT_REGEX = /(?<variant>del|dup|ins).+ex(?<on>on)?(?<s>s)?\s
(?<exons>[0-9]+(?<dgs>-[0-9]+)?)|
ex(?<on>on)?(?<s>s)?\s?(?<exons>[0-9]+(?<dgs>-[0-9]+)?)\s?
(?<variant>del|dup|ins)|
(?<variant>del|dup|ins)\sexon(?<s>s)?\s
(?<exons>[0-9]+(?<dgs>\sto\s[0-9]+))|
ex(on)?(s)?\s?(?<exons>[0-9]+\s?(\s?-\s?[0-9]+)?)\s?
(?<variant>del|dup|ins)?|
(?<variant>del|dup|ins)(?<s>\s)?(?<exons>[0-9]+(?<dgs>-[0-9]+)?)|
ex(?<on>on)?(?<s>s)?\s(?<exons>[0-9]+(?<dgs>\sto\s[0-9]+)?)\s
(?<variant>del|dup|ins)|
x(?<exons>[0-9]+-?[0-9]+)\s?(?<variant>del|dup|ins)|
x(?<exons>[0-9]+-?[0-9]?)\s?(?<variant>del|dup|ins)/ix
EXON_REGEX = /(?<variant>del|dup|ins).+ex(on)?(s)?\s(?<exons>[0-9ACGT]+((to|and|-|\s)+[0-9ACGT]+)?)|
ex(on)?(s)?\s?(?<exons>[0-9ACGT]+((to|and|-|\s)+[0-9ACGT]+)?)\s?(?<variant>del|dup|ins)|
^(?<variant>del|dup|ins)\s?(?<exons>[0-9ACGT]+((to|and|-|\s)+[0-9ACGT]+)?)|
x(?<exons>[0-9ACGT]+((to|and|-|\s)+[0-9ACGT]+)?)\s?(?<variant>del|dup|ins)|
ex(on)?s?\s?(?<exons>[0-9ACGT]+((to|and|-|\s)+[0-9ACGT]+)?)\s?(?<variant>del|dup|ins)?|
ex(on)?(s)?\s?(?<variant>del|dup|ins)\s?(?<exons>[0-9ACGT]+((to|and|-|\s)+[0-9ACGT]+)?)?/ix

PROTEIN_REGEX = /p\.[\[(]?(?<impact>([a-z]+[0-9]+[a-z]+([^[:alnum:]][0-9]+)?)|
([a-z]+[0-9]+[^[:alnum:]]))[)\]]?/ix
Expand All @@ -109,11 +102,13 @@ module RtdConstants
SMAD4|
STK11|
GREM1|
NTHL1)/ix
NTHL1|
SCG5)/ix
HNPCC = %w[MLH1 MSH2 MSH6 PMS2 EPCAM].freeze
HNPCCMLPA = %w[MLH1 MSH2 MSH6 EPCAM].freeze
COLORECTALCANCER = %w[APC BMPR1A EPCAM GREM1 MLH1 MSH2 MSH6 MUTYH NTHL1 PMS2
POLD1 POLE PTEN SMAD4 STK11].freeze
VARIANTTYPE_REGEX = /delins|indel|dup|del|ins|>/ix
FAPMAP = %w[APC MUTYH].freeze
end
end
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,51 @@ def setup
assert_equal 2804, genotypes[4].attribute_map['gene']
end

test 'process multigene variant' do
brca_record = build_raw_record('pseudo_id1' => 'bob')
brca_record.raw_fields['genotype'] = 'EPCAM ex9-MSH2 ex2 del'
brca_record.raw_fields['investigation code'] = 'HNPCC'
@handler.add_test_scope(@genotype, brca_record)
@handler.add_test_status(@genotype, brca_record)
genotypes = @handler.process_variant_records(@genotype, brca_record)
assert_equal 5, genotypes.size
assert_equal 2744, genotypes[0].attribute_map['gene']
assert_equal 1, genotypes[0].attribute_map['teststatus']
assert_equal 2808, genotypes[1].attribute_map['gene']
assert_equal 1, genotypes[1].attribute_map['teststatus']
assert_equal 3394, genotypes[2].attribute_map['gene']
assert_equal 1, genotypes[2].attribute_map['teststatus']
assert_equal 1432, genotypes[3].attribute_map['gene']
assert_equal 2, genotypes[3].attribute_map['teststatus']
assert_equal 3, genotypes[3].attribute_map['sequencevarianttype']
assert_equal '9', genotypes[3].attribute_map['exonintroncodonnumber']
assert_equal 2804, genotypes[4].attribute_map['gene']
assert_equal 2, genotypes[4].attribute_map['teststatus']
assert_equal 3, genotypes[4].attribute_map['sequencevarianttype']
assert_equal '2', genotypes[4].attribute_map['exonintroncodonnumber']
end

test 'process GREM1 dup SCG5 variant' do
brca_record = build_raw_record('pseudo_id1' => 'bob')
brca_record.raw_fields['genotype'] = 'het dup GREM1 and SCG5'
brca_record.raw_fields['moleculartestingtype'] = 'Predictive'
brca_record.raw_fields['investigation code'] = 'HNPCC'

@handler.add_test_scope(@genotype, brca_record)
@handler.add_test_status(@genotype, brca_record)
genotypes = @handler.process_variant_records(@genotype, brca_record)

assert_equal 6, genotypes.size
assert_equal 1882, genotypes[4].attribute_map['gene']
assert_equal 2, genotypes[4].attribute_map['teststatus']
assert_equal 4, genotypes[4].attribute_map['sequencevarianttype']
assert_equal 5, genotypes[4].attribute_map['variantlocation']
assert_equal 5092, genotypes[5].attribute_map['gene']
assert_equal 2, genotypes[5].attribute_map['teststatus']
assert_equal 4, genotypes[5].attribute_map['sequencevarianttype']
assert_nil genotypes[5].attribute_map['variantlocation']
end

private

def clinical_json
Expand Down
Loading