Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 28 additions & 8 deletions switch/src/models/haplotype_checker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,25 +105,45 @@ void haplotype_checker::writePerSample(string fout) {
vrb.bullet("Timing: " + stb.str(tac.rel_time()*1.0/1000, 2) + "s");
}


void haplotype_checker::writeFlipSwitchErrorPerSample(string fout) {
tac.clock();
vrb.title("Writing phasing flip and switch errors per sample in [" + fout + "]");
output_file fdo (fout);

int whole_study_true_errors = 0, whole_study_errors = 0, whole_study_checked = 0;
for (int i = 0 ; i < H.IDXesti.size() ; i++) {
vector < int > HET;
for (int l = 0 ; l < H.n_variants ; l ++) if (Checked[i][l]) HET.push_back(l);

int n_flips = 0, n_switches = 0, n_correct = 0;
for (int h = 2 ; h < HET.size() ; h ++) {
int n_errors = Errors[i][HET[h-1]] + Errors[i][HET[h]];
n_correct += (n_errors == 0);
n_switches += (n_errors == 1);
n_flips += (n_errors == 2);
int n_flips = 0, n_all_switches = 0, n_correct = 0, n_consecutive_flips = 0, prior_prior_error = 0, prior_error = 0;
for (int h = 0 ; h < HET.size() ; h ++) {
if ( h > 0 ){
prior_error = Errors[i][HET[h-1]];
}
if ( h > 1) {
prior_prior_error = Errors[i][HET[h-2]];
}
int current_error = Errors[i][HET[h]];
n_correct += (current_error == 0);
n_all_switches += (current_error == 1);
n_flips += ((prior_error + current_error) == 2);
n_consecutive_flips += ((prior_error + prior_prior_error + current_error) == 3);
}
int total = n_switches + n_flips + n_correct;
fdo << H.vecSamples[H.IDXesti[i]] << " " << n_switches << " " << n_flips << " " << n_correct << " " << stb.str(n_switches * 100.0f / total, 2) << " " << stb.str(n_flips * 100.0f / total, 2) << " " << stb.str(n_correct * 100.0f / total, 2) << endl;

n_flips -= n_consecutive_flips;
int total = n_all_switches + n_flips + n_correct + n_consecutive_flips;
int n_pure_switches = n_all_switches - (n_flips*2 + n_consecutive_flips);
whole_study_errors += n_all_switches;
whole_study_true_errors += n_pure_switches;
whole_study_checked += total;

fdo << H.vecSamples[H.IDXesti[i]] << " " << n_all_switches << " " << n_flips << " " << n_consecutive_flips << " " << n_pure_switches << " " << n_correct << " " << total << " " << stb.str(n_all_switches * 100.0f / total, 2) << " " << stb.str((n_flips*2 + n_consecutive_flips) * 100.0f / total, 2) << " " << stb.str(n_pure_switches * 100.0f / total, 4) << " " << stb.str(n_correct * 100.0f / total, 2) << endl;
Comment on lines +135 to +141
Copy link

Copilot AI Mar 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

total is computed as n_all_switches + n_flips + n_correct + n_consecutive_flips, which mixes per-variant switch outcomes (n_all_switches, n_correct) with event-based counts (n_flips, n_consecutive_flips). This inflates the denominator and makes the reported per-sample/overall switch error rates inconsistent with check()/writePerSample() (which use sum(Checked) as the denominator). Consider defining total_checked as HET.size() (or n_all_switches + n_correct) and using that for percentages and whole_study_checked, while keeping flip-event counts as separate columns.

Suggested change
int total = n_all_switches + n_flips + n_correct + n_consecutive_flips;
int n_pure_switches = n_all_switches - (n_flips*2 + n_consecutive_flips);
whole_study_errors += n_all_switches;
whole_study_true_errors += n_pure_switches;
whole_study_checked += total;
fdo << H.vecSamples[H.IDXesti[i]] << " " << n_all_switches << " " << n_flips << " " << n_consecutive_flips << " " << n_pure_switches << " " << n_correct << " " << total << " " << stb.str(n_all_switches * 100.0f / total, 2) << " " << stb.str((n_flips*2 + n_consecutive_flips) * 100.0f / total, 2) << " " << stb.str(n_pure_switches * 100.0f / total, 4) << " " << stb.str(n_correct * 100.0f / total, 2) << endl;
// Number of checked heterozygous variants for this sample
int total_checked = HET.size();
int n_pure_switches = n_all_switches - (n_flips*2 + n_consecutive_flips);
whole_study_errors += n_all_switches;
whole_study_true_errors += n_pure_switches;
whole_study_checked += total_checked;
fdo << H.vecSamples[H.IDXesti[i]] << " " << n_all_switches << " " << n_flips << " " << n_consecutive_flips << " " << n_pure_switches << " " << n_correct << " " << total_checked << " " << stb.str(n_all_switches * 100.0f / total_checked, 2) << " " << stb.str((n_flips*2 + n_consecutive_flips) * 100.0f / total_checked, 2) << " " << stb.str(n_pure_switches * 100.0f / total_checked, 4) << " " << stb.str(n_correct * 100.0f / total_checked, 2) << endl;

Copilot uses AI. Check for mistakes.
}
fdo.close();
vrb.bullet("#Overall switch error rate = " + stb.str(whole_study_errors * 100.0f / whole_study_checked, 5));
vrb.bullet("#Overall pure switch error rate = " + stb.str(whole_study_true_errors * 100.0f / whole_study_checked, 5));
Comment on lines +135 to +145
Copy link

Copilot AI Mar 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are multiple divisions by total / whole_study_checked that can be zero when a sample has no Checked variants (so HET is empty) or when no samples contribute any checked variants. This will produce a divide-by-zero at output time. Please add guards (e.g., emit zeros/NA for that sample and skip adding to whole-study totals; and only print overall rates if whole_study_checked > 0).

Copilot uses AI. Check for mistakes.

vrb.bullet("Timing: " + stb.str(tac.rel_time()*1.0/1000, 2) + "s");
}

Expand Down
Loading