diff --git a/src/main/java/org/broadinstitute/quartetibdanalysis/tools/AlleleCountKMeansModel.java b/src/main/java/org/broadinstitute/quartetibdanalysis/tools/AlleleCountKMeansModel.java index 6d5aa5c..694b636 100644 --- a/src/main/java/org/broadinstitute/quartetibdanalysis/tools/AlleleCountKMeansModel.java +++ b/src/main/java/org/broadinstitute/quartetibdanalysis/tools/AlleleCountKMeansModel.java @@ -82,7 +82,7 @@ public IBDState ibdClass(final SlidingWindow.WindowCente return IBDState.ZERO; } if (dist1 < dist0 && dist1 < dist2) { - return IBDState.ONE; + return IBDState.ONEF; } return IBDState.TWO; } diff --git a/src/main/java/org/broadinstitute/quartetibdanalysis/tools/IBDMedianFilter.java b/src/main/java/org/broadinstitute/quartetibdanalysis/tools/IBDMedianFilter.java index 9dea2f3..5daffe6 100644 --- a/src/main/java/org/broadinstitute/quartetibdanalysis/tools/IBDMedianFilter.java +++ b/src/main/java/org/broadinstitute/quartetibdanalysis/tools/IBDMedianFilter.java @@ -36,7 +36,8 @@ public IBDMedianFilter(final int windowSize) { ibdClassWindow = new ArrayDeque<>(windowSize); coords = new ArrayDeque<>(windowSize / 2); clusterCounts.put(IBDState.ZERO, 0); - clusterCounts.put(IBDState.ONE, 0); + clusterCounts.put(IBDState.ONEF, 0); + clusterCounts.put(IBDState.ONEM, 0); clusterCounts.put(IBDState.TWO, 0); } @@ -62,8 +63,8 @@ public FilteredValue next(final int start, final IBDState ibdClass) { private IBDState median() { if (clusterCounts.get(IBDState.ZERO) >= windowSize / 2) { return IBDState.ZERO; - } else if (clusterCounts.get(IBDState.ZERO) + clusterCounts.get(IBDState.ONE) >= windowSize / 2) { - return IBDState.ONE; + } else if (clusterCounts.get(IBDState.ZERO) + clusterCounts.get(IBDState.ONEF) >= windowSize / 2) { + return IBDState.ONEF; } else { return IBDState.TWO; } diff --git a/src/main/java/org/broadinstitute/quartetibdanalysis/tools/IBDObservation.java b/src/main/java/org/broadinstitute/quartetibdanalysis/tools/IBDObservation.java index 5e190b6..47c175c 100644 --- a/src/main/java/org/broadinstitute/quartetibdanalysis/tools/IBDObservation.java +++ b/src/main/java/org/broadinstitute/quartetibdanalysis/tools/IBDObservation.java @@ -28,9 +28,11 @@ public enum IBDObservation { ZERO, ONE, TWO, - ZERO_OR_ONE, + ZERO_OR_ONEF, + ZERO_OR_ONEM, ZERO_OR_TWO, - ONE_OR_TWO, + ONEF_OR_TWO, + ONEM_OR_TWO, ZERO_OR_ONE_OR_TWO; private static boolean bothHom(final Genotype sib1Gt, final Genotype sib2Gt) { @@ -44,15 +46,15 @@ private static boolean bothHet(final Genotype sib1Gt, final Genotype sib2Gt) { /** * Given the four genotypes at a locus, return the type of IBD state observation * - * @param p1 parental genotype 1 - * @param p2 parental genotype 2 + * @param paternalGt paternal genotype + * @param maternalGt maternal genotype * @param s1 sibling genotype 1 * @param s2 sibling genotype 2 * @return the type of IBD observation indicated by the genotype configuration of the quartet * */ - public static IBDObservation getIBDStateObservation(final Genotype p1, final Genotype p2, final Genotype s1, final Genotype s2) { - if (bothHet(p1, p2)) { + public static IBDObservation getIBDStateObservation(final Genotype paternalGt, final Genotype maternalGt, final Genotype s1, final Genotype s2) { + if (bothHet(paternalGt, maternalGt)) { if (bothHom(s1, s2)) { if (s1.sameGenotype(s2)) { return IBDObservation.TWO; @@ -65,33 +67,40 @@ public static IBDObservation getIBDStateObservation(final Genotype p1, final Gen // one sib het, the other hom return IBDObservation.ONE; } - } else if (p1.isHet() && p2.isHom() || (p1.isHom() && p2.isHet())) { - if (bothHet(s1, s2)) { - return IBDObservation.ONE_OR_TWO; - } else if (bothHom(s1,s2)) { - return IBDObservation.ONE_OR_TWO; + } else if (paternalGt.isHet() && maternalGt.isHom() || (paternalGt.isHom() && maternalGt.isHet())) { + if (bothHet(s1, s2) || bothHom(s1, s2)) { + if (paternalGt.isHet()) { + return IBDObservation.ONEF_OR_TWO; + } else { + return IBDObservation.ONEM_OR_TWO; + } } else { // one sib het, the other hom - return IBDObservation.ZERO_OR_ONE; + if (paternalGt.isHom()) { + return IBDObservation.ZERO_OR_ONEF; + } else { + return IBDObservation.ZERO_OR_ONEM; + } } } else { - if (! bothHom(p1,p2)) { - throw new GATKException("Unexpected parental genotypes " + p1 + " and " + p2 + "; expect both to be het, both to be hom, or one het and one hom"); + if (! bothHom(paternalGt,maternalGt)) { + throw new GATKException("Unexpected parental genotypes " + paternalGt + " and " + maternalGt + "; expect both to be het, both to be hom, or one het and one hom"); } - assert(bothHom(p1,p2)); + assert(bothHom(paternalGt,maternalGt)); return IBDObservation.ZERO_OR_ONE_OR_TWO; } - } public boolean agreesWith(IBDState state) { switch(this) { case ZERO: return state == IBDState.ZERO; - case ONE: return state == IBDState.ONE; + case ONE: return state == IBDState.ONEF || state == IBDState.ONEM; case TWO: return state == IBDState.TWO; - case ZERO_OR_ONE: return state != IBDState.TWO; - case ONE_OR_TWO: return state != IBDState.ZERO; - case ZERO_OR_TWO: return state != IBDState.ONE; + case ZERO_OR_ONEF: return state == IBDState.ZERO || state == IBDState.ONEF; + case ZERO_OR_ONEM: return state == IBDState.ZERO || state == IBDState.ONEM; + case ONEF_OR_TWO: return state == IBDState.ONEF || state == IBDState.TWO; + case ONEM_OR_TWO: return state == IBDState.ONEM || state == IBDState.TWO; + case ZERO_OR_TWO: return state == IBDState.ZERO || state == IBDState.TWO; case ZERO_OR_ONE_OR_TWO: return true; } return false; diff --git a/src/main/java/org/broadinstitute/quartetibdanalysis/tools/IBDState.java b/src/main/java/org/broadinstitute/quartetibdanalysis/tools/IBDState.java index 25bb3ca..5d8079a 100644 --- a/src/main/java/org/broadinstitute/quartetibdanalysis/tools/IBDState.java +++ b/src/main/java/org/broadinstitute/quartetibdanalysis/tools/IBDState.java @@ -18,6 +18,7 @@ */ enum IBDState { ZERO, - ONE, + ONEF, + ONEM, TWO; } diff --git a/src/main/java/org/broadinstitute/quartetibdanalysis/tools/QuartetIBDStateHMM.java b/src/main/java/org/broadinstitute/quartetibdanalysis/tools/QuartetIBDStateHMM.java index e80b5eb..3a3f770 100644 --- a/src/main/java/org/broadinstitute/quartetibdanalysis/tools/QuartetIBDStateHMM.java +++ b/src/main/java/org/broadinstitute/quartetibdanalysis/tools/QuartetIBDStateHMM.java @@ -70,14 +70,16 @@ public QuartetIBDStateHMM(final double recombinationProb, final double errorProb private void initProbs(final double recombinationProb, final double errorProb) { initialProbs = new EnumMap<>(IBDState.class); - initialProbs.put(IBDState.ZERO, Math.log10(.33)); - initialProbs.put(IBDState.ONE, Math.log10(.33)); - initialProbs.put(IBDState.TWO, Math.log10(.33)); + initialProbs.put(IBDState.ZERO, Math.log10(.25)); + initialProbs.put(IBDState.ONEF, Math.log10(.25)); + initialProbs.put(IBDState.ONEM, Math.log10(.25)); + initialProbs.put(IBDState.TWO, Math.log10(.25)); transitionProbs = new EnumMap<>(IBDState.class); - transitionProbs.put(IBDState.ZERO, new Double[]{Math.log10((1 - recombinationProb) - (errorProb / 2)), Math.log10(recombinationProb - (errorProb / 2)), Math.log10(errorProb)}); - transitionProbs.put(IBDState.ONE, new Double[]{Math.log10((recombinationProb / 2)), Math.log10(1 - recombinationProb), Math.log10(recombinationProb / 2)}); - transitionProbs.put(IBDState.TWO, new Double[]{Math.log10(errorProb), Math.log10(recombinationProb - (errorProb / 2)), Math.log10((1 - recombinationProb) - (errorProb / 2))}); + transitionProbs.put(IBDState.ZERO, new Double[]{Math.log10((1 - recombinationProb) - (errorProb / 2)), Math.log10(recombinationProb / 2 - (errorProb / 4)), Math.log10(recombinationProb / 2 - (errorProb / 4)), Math.log10(errorProb)}); + transitionProbs.put(IBDState.ONEF, new Double[]{Math.log10((recombinationProb / 2)), Math.log10(1 - recombinationProb - errorProb), Math.log10(errorProb), Math.log10(recombinationProb / 2)}); + transitionProbs.put(IBDState.ONEM, new Double[]{Math.log10((recombinationProb / 2)), Math.log10(errorProb), Math.log10(1 - recombinationProb - errorProb), Math.log10(recombinationProb / 2)}); + transitionProbs.put(IBDState.TWO, new Double[]{Math.log10(errorProb), Math.log10(recombinationProb / 2 - (errorProb / 4)), Math.log10(recombinationProb / 2 - (errorProb / 4)), Math.log10((1 - recombinationProb) - (errorProb / 2))}); } /** @@ -252,24 +254,36 @@ protected ModelResults runModel() { private Double getEmission(final IBDState s, final IBDObservation o) { switch (s) { case ZERO: - if (o == IBDObservation.ZERO || o == IBDObservation.ZERO_OR_ONE || o == IBDObservation.ZERO_OR_TWO) { + if (o == IBDObservation.ZERO || o == IBDObservation.ZERO_OR_TWO) { + return Math.log10(NON_ERROR_EMISSION_PROBABILITY / 6); + } else if (o == IBDObservation.ZERO_OR_ONEF || o == IBDObservation.ZERO_OR_ONEM) { return Math.log10(NON_ERROR_EMISSION_PROBABILITY / 3); } else { - return Math.log10(ERROR_EMISSION_PROBABILITY / 3); + return Math.log10(ERROR_EMISSION_PROBABILITY / 4); } - case ONE: - if (o == IBDObservation.ONE || o == IBDObservation.ZERO_OR_ONE || o == IBDObservation.ONE_OR_TWO) { + case ONEF: + if (o == IBDObservation.ONE || o == IBDObservation.ZERO_OR_ONEF || o == IBDObservation.ONEF_OR_TWO) { return Math.log10(NON_ERROR_EMISSION_PROBABILITY / 3); } else if (o == IBDObservation.ZERO_OR_TWO) { return Math.log10(IBD1_HET_ERROR_EMISSION_PROBABILITY); } else { - return Math.log10(IDB1_OTHER_ERROR_EMISSION_PROBABILITY / 2); + return Math.log10(IDB1_OTHER_ERROR_EMISSION_PROBABILITY / 4); + } + case ONEM: + if (o == IBDObservation.ONE || o == IBDObservation.ZERO_OR_ONEM || o == IBDObservation.ONEM_OR_TWO) { + return Math.log10(NON_ERROR_EMISSION_PROBABILITY / 3); + } else if (o == IBDObservation.ZERO_OR_TWO) { + return Math.log10(IBD1_HET_ERROR_EMISSION_PROBABILITY); + } else { + return Math.log10(IDB1_OTHER_ERROR_EMISSION_PROBABILITY / 4); } case TWO: - if (o == IBDObservation.TWO || o == IBDObservation.ZERO_OR_TWO || o == IBDObservation.ONE_OR_TWO) { + if (o == IBDObservation.TWO || o == IBDObservation.ZERO_OR_TWO) { + return Math.log10(NON_ERROR_EMISSION_PROBABILITY / 6); + } else if (o == IBDObservation.ONEF_OR_TWO || o == IBDObservation.ONEM_OR_TWO) { return Math.log10(NON_ERROR_EMISSION_PROBABILITY / 3); } else { - return Math.log10(ERROR_EMISSION_PROBABILITY / 3); + return Math.log10(ERROR_EMISSION_PROBABILITY / 4); } } return Double.NEGATIVE_INFINITY; diff --git a/src/main/java/org/broadinstitute/quartetibdanalysis/tools/QuartetIBDStateModel.java b/src/main/java/org/broadinstitute/quartetibdanalysis/tools/QuartetIBDStateModel.java index 3fbe91f..9866909 100644 --- a/src/main/java/org/broadinstitute/quartetibdanalysis/tools/QuartetIBDStateModel.java +++ b/src/main/java/org/broadinstitute/quartetibdanalysis/tools/QuartetIBDStateModel.java @@ -86,7 +86,6 @@ public List addSite(final VariantContext vc, final GenotypesCon if (observation == IBDObservation.ZERO_OR_ONE_OR_TWO) { return result; } - if (spFile != null) { spFile.print(vc.getContig() + "\t" + vc.getStart() + "\t" + siblingPair.getName() + "\t" + observation.ordinal() + "\n"); } @@ -126,7 +125,11 @@ private boolean passesABHetThreshold(final Genotype sib1Gt, final Genotype sib2G } private boolean passesABHetThreshold(final Genotype gt) { - final double rawAB = ((double) gt.getAD()[0]) / (gt.getAD()[0] + gt.getAD()[1]); + int[] alleleReadCounts = gt.getAD(); + if (alleleReadCounts == null) { + return true; + } + final double rawAB = ((double) alleleReadCounts[0]) / (alleleReadCounts[0] + alleleReadCounts[1]); final double AB = Math.min(rawAB, 1 - rawAB); return AB > hetABThreshold; } diff --git a/src/main/java/org/broadinstitute/quartetibdanalysis/tools/SiblingIBD.java b/src/main/java/org/broadinstitute/quartetibdanalysis/tools/SiblingIBD.java index 8f91695..995aa7c 100644 --- a/src/main/java/org/broadinstitute/quartetibdanalysis/tools/SiblingIBD.java +++ b/src/main/java/org/broadinstitute/quartetibdanalysis/tools/SiblingIBD.java @@ -114,7 +114,6 @@ public class SiblingIBD extends VariantWalker { @Argument(fullName="pedigree", shortName="ped", doc="Pedigree file") private File pedigreeFile = null; - @Argument(fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME, doc="File to which variants should be written") @@ -321,6 +320,7 @@ public void apply(final VariantContext vc, final ReadsContext readsContext, fina if (sib1Gt.isCalled() && sib2Gt.isCalled()) { final List variantIDBStatesForSibPair = model.addSite(subsetVariantContext, genotypes, siblingPair, sib1Gt, sib2Gt, overlappingCNVs,outFile != null); + if (variantIDBStatesForSibPair.size() == 0) { continue; } @@ -356,7 +356,6 @@ public void apply(final VariantContext vc, final ReadsContext readsContext, fina logger.debug("emitting " + result); } reduce(result, ibdRegionAccumulators); - } private VariantContext addIBDStatesToVC(final VariantContextBuilder builder, final VariantContext vc, final List variantIBDStates) { @@ -459,7 +458,7 @@ private void accumulateValues(final List valueList, final Map valueList, final Map> ibdStateModifications = new TreeMap<>(); final List finalValues = model.finalizeModel(vcfWriter != null); + for (final VariantIBDState variantIBDState : finalValues) { if (! ibdStateModifications.containsKey(variantIBDState.pos)) { ibdStateModifications.put(variantIBDState.pos, new ArrayList<>()); @@ -506,7 +506,7 @@ public Object onTraversalSuccess() { } final IBDRegion region = accumulator.getFinalRegion(); - if (region.state == IBDState.ONE) { + if (region.state == IBDState.ONEF || region.state == IBDState.ONEM) { ibdRegionsFile.print(siblingPair.getName() + "\t" + siblingPair.sib1.getID() + "\t" + siblingPair.sib2.getID() + "\t" + region.chr + "\t" + region.start + "\t" + region.end + "\t" + region.state + "\t" + region.paternalIndicators + "\t" + region.maternalIndicators + "\t" + region.sites + "\t" + region.siteAgreements + "\n"); } else { ibdRegionsFile.print(siblingPair.getName() + "\t" + siblingPair.sib1.getID() + "\t" + siblingPair.sib2.getID() + "\t" + region.chr + "\t" + region.start + "\t" + region.end + "\t" + region.state + "\t" + "NA" + "\t" + "NA" + "\t" + region.sites + "\t" + region.siteAgreements + "\n");