From 5a8d630597cd1a6332be7879deac457314c8b932 Mon Sep 17 00:00:00 2001
From: Rachel Colquhoun <rachel.colquhoun@ed.ac.uk>
Date: Mon, 16 Jun 2025 15:18:54 +0100
Subject: [PATCH 1/5] add a scaled confidence threshold for dehosting to test

---
 include/classify_stats.hpp   | 6 ++++++
 include/dehost_arguments.hpp | 2 ++
 include/read_entry.hpp       | 6 ++++--
 3 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/include/classify_stats.hpp b/include/classify_stats.hpp
index e93f2ca..ea567b3 100644
--- a/include/classify_stats.hpp
+++ b/include/classify_stats.hpp
@@ -401,6 +401,7 @@ class StatsModel {
     uint32_t min_length_;
     float min_compression_;
     int8_t confidence_threshold_;
+    float confidence_probability_threshold_;
     uint8_t min_hits_;
     float host_unique_prop_lo_threshold_;
     float min_proportion_difference_;
@@ -444,6 +445,7 @@ class StatsModel {
             min_length_(opt.min_length),
             min_compression_(opt.min_compression),
             confidence_threshold_(opt.confidence_threshold),
+            confidence_probability_threshold_(opt.confidence_probability_threshold),
             host_unique_prop_lo_threshold_(opt.host_unique_prop_lo_threshold),
             min_proportion_difference_(opt.min_proportion_difference),
             min_prob_difference_(opt.min_prob_difference){
@@ -496,6 +498,10 @@ class StatsModel {
         return confidence_threshold_;
     }
 
+    float confidence_probability_threshold() const {
+        return confidence_probability_threshold_;
+    }
+
     uint8_t min_num_hits() const {
         return min_hits_;
     }
diff --git a/include/dehost_arguments.hpp b/include/dehost_arguments.hpp
index 26412b1..fbd0b6c 100644
--- a/include/dehost_arguments.hpp
+++ b/include/dehost_arguments.hpp
@@ -31,6 +31,7 @@ struct DehostArguments {
     uint32_t min_length { 140 };
     float min_compression {0};
     uint8_t confidence_threshold{2};
+    float confidence_probability_threshold{5};
     float host_unique_prop_lo_threshold{ 0.05 };
     float min_proportion_difference { 0.04 };
     float min_prob_difference{ 0 };
@@ -63,6 +64,7 @@ struct DehostArguments {
         ss += "\tmin_quality:\t\t\t" + std::to_string(min_quality) + "\n";
         ss += "\tmin_compression:\t\t" + std::to_string(min_compression) + "\n";
         ss += "\tconfidence_threshold:\t\t" + std::to_string(confidence_threshold) + "\n";
+        ss += "\tconfidence_probability_threshold:\t\t" + std::to_string(confidence_probability_threshold) + "\n";
         ss += "\thost_unique_prop_lo_threshold:\t" + std::to_string(host_unique_prop_lo_threshold) + "\n";
         ss += "\tmin_proportion_difference:\t" + std::to_string(min_proportion_difference) + "\n";
         ss += "\tmin_prob_difference:\t\t" + std::to_string(min_prob_difference) + "\n\n";
diff --git a/include/read_entry.hpp b/include/read_entry.hpp
index 6950029..c645823 100644
--- a/include/read_entry.hpp
+++ b/include/read_entry.hpp
@@ -270,13 +270,15 @@ class ReadEntry {
         if (host_unique_prop > other_unique_prop
             and host_unique_prop - other_unique_prop > stats_model.min_proportion_difference()
             and host_prob > other_prob
-            and host_prob - other_prob > stats_model.min_prob_difference())
+            and host_prob - other_prob > stats_model.min_prob_difference()
+            and std::max(host_prob * confidence_score_,static_cast<double>(confidence_score_)) >= stats_model.confidence_probability_threshold())
             call_ = host_index;
         else if (host_unique_prop < stats_model.host_unique_prop_lo_threshold()
             and host_unique_prop < other_unique_prop
             and other_unique_prop - host_unique_prop > stats_model.min_proportion_difference()
             and host_prob < other_prob
-            and other_prob - host_prob > stats_model.min_prob_difference())
+            and other_prob - host_prob > stats_model.min_prob_difference()
+            and std::max(other_prob * confidence_score_,static_cast<double>(confidence_score_)) >= stats_model.confidence_probability_threshold())
             call_ = other_index;
     }
 

From 6e12126d751dafb06f18379ea0d356e5e318c25a Mon Sep 17 00:00:00 2001
From: Rachel Colquhoun <rachel.colquhoun@ed.ac.uk>
Date: Fri, 20 Jun 2025 11:06:02 +0100
Subject: [PATCH 2/5] avoid segfault by reducing from max value

---
 include/index_arguments.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/index_arguments.hpp b/include/index_arguments.hpp
index 0867ab6..b02d1c8 100644
--- a/include/index_arguments.hpp
+++ b/include/index_arguments.hpp
@@ -17,7 +17,7 @@ struct IndexArguments {
     uint8_t kmer_size { 19 };
 
     // IBF options
-    mutable size_t bits {std::numeric_limits<uint32_t>::max()}; // Allow to change bits for each partition
+    mutable size_t bits {std::numeric_limits<uint32_t>::max()-2}; // Allow to change bits for each partition
     uint8_t num_hash {3};
     double max_fpr {0.01};
 

From b148792d013cfb0c652246496ae68d5d35b4ef01 Mon Sep 17 00:00:00 2001
From: Rachel Colquhoun <rachel.colquhoun@ed.ac.uk>
Date: Mon, 23 Jun 2025 12:32:40 +0100
Subject: [PATCH 3/5] enable classify to work for more than 2 categories

---
 include/classify_arguments.hpp |   6 +-
 include/classify_stats.hpp     |   1 -
 include/read_entry.hpp         | 136 ++++++++++++--------------
 src/classify_main.cpp          | 169 +++++++++++++++++++--------------
 4 files changed, 160 insertions(+), 152 deletions(-)

diff --git a/include/classify_arguments.hpp b/include/classify_arguments.hpp
index 8b9e6d3..994e8e0 100644
--- a/include/classify_arguments.hpp
+++ b/include/classify_arguments.hpp
@@ -25,8 +25,7 @@ struct ClassifyArguments {
     float min_quality { 10.0 };
     uint32_t min_length { 140 };
     float min_compression {0.15};
-    uint8_t confidence_threshold{0};
-    uint8_t min_hits{2};
+    uint8_t confidence_threshold{2};
     float min_proportion_difference { 0.00 };
 
     // Output options
@@ -58,8 +57,7 @@ struct ClassifyArguments {
         ss += "\tmin_quality:\t\t" + std::to_string(min_quality) + "\n";
         ss += "\tmin_compression:\t\t" + std::to_string(min_compression) + "\n";
         ss += "\tconfidence_threshold:\t" + std::to_string(confidence_threshold) + "\n";
-        ss += "\tmin_hits:\t\t" + std::to_string(min_hits) + "\n";
-        ss += "\tmin_diff:\t\t" + std::to_string(min_proportion_difference) + "\n\n";
+        ss += "\tmin_proportion_diff:\t\t" + std::to_string(min_proportion_difference) + "\n\n";
 
         ss += "\tcategory_to_extract:\t" + category_to_extract + "\n";
         ss += "\tprefix:\t" + prefix + "\n\n";
diff --git a/include/classify_stats.hpp b/include/classify_stats.hpp
index ea567b3..12be7cd 100644
--- a/include/classify_stats.hpp
+++ b/include/classify_stats.hpp
@@ -430,7 +430,6 @@ class StatsModel {
             min_length_(opt.min_length),
             min_compression_(opt.min_compression),
             confidence_threshold_(opt.confidence_threshold),
-            min_hits_(opt.min_hits),
             min_proportion_difference_(opt.min_proportion_difference) {
         for (auto i = 0; i < summary.num_categories(); ++i) {
             models_.emplace_back(Model(i, opt.dist));
diff --git a/include/read_entry.hpp b/include/read_entry.hpp
index c645823..5889891 100644
--- a/include/read_entry.hpp
+++ b/include/read_entry.hpp
@@ -21,7 +21,7 @@ class ReadEntry {
     float compression_;
 
     uint32_t num_hashes_{0};
-    std::vector<seqan3::interleaved_bloom_filter< seqan3::compressed >::membership_agent_type::binning_bitvector> bits_;// this collects over all bins
+    std::vector<seqan3::interleaved_bloom_filter<seqan3::compressed>::membership_agent_type::binning_bitvector> bits_;// this collects over all bins
     std::unordered_map<uint8_t, std::vector<bool>> max_bits_; // this summarizes over categories (which may have multiple bins)
     std::vector<uint32_t> counts_;
     std::vector<uint32_t> unique_counts_;
@@ -43,17 +43,17 @@ class ReadEntry {
 
     ~ReadEntry() = default;
 
-    ReadEntry(const std::string& read_id, const uint32_t& length, const float& mean_quality, const float& compression, const InputSummary &summary) :
+    ReadEntry(const std::string &read_id, const uint32_t &length, const float &mean_quality, const float &compression,
+              const InputSummary &summary) :
             read_id_(read_id),
             length_(length),
             mean_quality_(mean_quality),
             compression_(compression),
             counts_(summary.num_categories(), 0),
-            proportions_(summary.num_categories(),0),
-            unique_proportions_(summary.num_categories(),0),
+            proportions_(summary.num_categories(), 0),
+            unique_proportions_(summary.num_categories(), 0),
             unique_counts_(summary.num_categories(), 0),
-            probabilities_(summary.num_categories(),1)
-    {
+            probabilities_(summary.num_categories(), 1) {
         PLOG_DEBUG << "Initialize entry with read_id " << read_id << " and length " << length;
         bits_.reserve(length);
 
@@ -63,15 +63,15 @@ class ReadEntry {
         PLOG_VERBOSE << "Initializing complete for read_id " << read_id;
     }
 
-    const std::string& read_id() const {
+    const std::string &read_id() const {
         return read_id_;
     }
 
-    const std::vector<float>& proportions() const {
+    const std::vector<float> &proportions() const {
         return proportions_;
     }
 
-    const std::vector<float>& unique_proportions() const {
+    const std::vector<float> &unique_proportions() const {
         return unique_proportions_;
     }
 
@@ -94,20 +94,19 @@ class ReadEntry {
 
         // get totals in each bin
         seqan3::counting_vector<uint64_t> total_bits_per_bin(summary.num_bins, 0);
-        for (const auto & entry : bits_)
-        {
+        for (const auto &entry: bits_) {
             total_bits_per_bin += entry;
         }
         PLOG_DEBUG << "Have the following total hits per bin: " << total_bits_per_bin;
 
         // identify max bin per category
-        std::vector<uint8_t> index_per_category(summary.num_categories(),std::numeric_limits<uint8_t>::max());
-        for ( auto bin=0; bin < total_bits_per_bin.size(); ++bin)
-        {
-            const auto & bits_in_bin = total_bits_per_bin[bin];
-            const auto & category = summary.bin_to_category.at(bin);
-            const auto & index = summary.category_index(category);
-            if (index_per_category.at(index) == std::numeric_limits<uint8_t>::max() or total_bits_per_bin[bin] > total_bits_per_bin[index_per_category[index]]) {
+        std::vector<uint8_t> index_per_category(summary.num_categories(), std::numeric_limits<uint8_t>::max());
+        for (auto bin = 0; bin < total_bits_per_bin.size(); ++bin) {
+            const auto &bits_in_bin = total_bits_per_bin[bin];
+            const auto &category = summary.bin_to_category.at(bin);
+            const auto &index = summary.category_index(category);
+            if (index_per_category.at(index) == std::numeric_limits<uint8_t>::max() or
+                total_bits_per_bin[bin] > total_bits_per_bin[index_per_category[index]]) {
                 index_per_category.at(index) = bin;
                 counts_.at(index) = total_bits_per_bin[bin];
                 PLOG_DEBUG << "Category " << category << " has max index " << +bin;
@@ -121,14 +120,12 @@ class ReadEntry {
 
         // collect together the bitvector for max bits and the unique_counts
         std::vector<uint8_t> found;
-        for (const auto & entry : bits_){
+        for (const auto &entry: bits_) {
             found.clear();
-            for (auto category=0; category < index_per_category.size(); ++category)
-            {
-                const auto & category_index = index_per_category[category];
+            for (auto category = 0; category < index_per_category.size(); ++category) {
+                const auto &category_index = index_per_category[category];
                 max_bits_.at(category).emplace_back(entry[category_index]);
-                if (entry[category_index] == 1)
-                {
+                if (entry[category_index] == 1) {
                     found.push_back(category);
                 }
             }
@@ -158,31 +155,22 @@ class ReadEntry {
     }
 
     void call_category(const StatsModel &stats_model) {
-        //TODO extend this to work with more than 2 categories
-        assert(probabilities_.size() == 2);
-
-        double first = probabilities_.at(0);
+        // We want to compare based on unique_counts_ because confidence is defined based on the difference of the
+        // top 2 unique_counts_. However, we will reject the result if the probabilities do not support this ordering.
         uint8_t first_pos = 0;
-        double second = probabilities_.at(1);
         uint8_t second_pos = 1;
-        if (unique_counts_.at(second_pos) > unique_counts_.at(first_pos))
-        {
-            std::swap(first, second);
+        if (unique_counts_.at(second_pos) > unique_counts_.at(first_pos)) {
             std::swap(first_pos, second_pos);
         }
 
-        /*for (auto i = 0; i < probabilities_.size(); ++i) {
-            const double &val = probabilities_.at(i);
-            if (val >= second) {
-                second = val;
+        for (auto i = 2; i < unique_counts_.size(); ++i) {
+            if (unique_counts_.at(i) > unique_counts_.at(second_pos)) {
                 second_pos = i;
-                if (second >= first)
-                {
-                    std::swap(first, second);
+                if (unique_counts_.at(second_pos) > unique_counts_.at(first_pos)) {
                     std::swap(first_pos, second_pos);
                 }
             }
-        }*/
+        }
 
         auto raw_confidence = unique_counts_.at(first_pos) - unique_counts_.at(second_pos);
         if (raw_confidence > std::numeric_limits<uint8_t>::max())
@@ -190,49 +178,46 @@ class ReadEntry {
         else
             confidence_score_ = static_cast<uint8_t>(raw_confidence);
 
-        if (mean_quality_ < stats_model.min_quality()){
+        if (mean_quality_ < stats_model.min_quality()) {
             return;
         }
 
-        if (length_ < stats_model.min_length()){
+        if (length_ < stats_model.min_length()) {
             return;
         }
 
-        if (compression_ < stats_model.min_compression()){
+        if (compression_ < stats_model.min_compression()) {
             return;
         }
 
-        if (second == 0 and first > 0) {
+        if (probabilities_.at(second_pos) == 0 and probabilities_.at(first_pos) > 0) {
             call_ = first_pos;
         } else {
-            /*auto ratio = first/second;
-            PLOG_VERBOSE << "confidence score " << ratio << " from " << first << "/" << second;
-            if (ratio < std::numeric_limits<uint8_t>::max())
-                confidence_score_ = static_cast<uint8_t>(ratio);
-            else
-                confidence_score_ = std::numeric_limits<uint8_t>::max();*/
-            if (confidence_score_ > stats_model.confidence_threshold() and first > second)
+            if (confidence_score_ > stats_model.confidence_threshold() and
+                probabilities_.at(first_pos) > probabilities_.at(second_pos))
                 call_ = first_pos;
         }
 
-        const auto & first_count = counts_.at(first_pos);
-        const auto & second_count = counts_.at(second_pos);
-        if (second_count > first_count or first_count - second_count < stats_model.min_num_hits()){
-            PLOG_DEBUG << read_id_ << " has first count " << +first_count << " and second count " << +second_count << " which have differences less than " << +stats_model.min_num_hits();
+        const auto &first_count = counts_.at(first_pos);
+        const auto &second_count = counts_.at(second_pos);
+        if (second_count > first_count or first_count - second_count < stats_model.min_num_hits()) {
+            PLOG_DEBUG << read_id_ << " has first count " << +first_count << " and second count " << +second_count
+                       << " which have differences less than " << +stats_model.min_num_hits();
             call_ = std::numeric_limits<uint8_t>::max(); // if we don't see at least this number of hits difference, then no call
         }
 
-        const auto & first_prop = proportions_.at(first_pos);
-        const auto & second_prop = proportions_.at(second_pos);
-        if (second_prop > first_prop or first_prop - second_prop < stats_model.min_proportion_difference()){
-            PLOG_DEBUG << read_id_ << " has first proportion " << first_prop << " and second proportion " << second_prop << " which have differences less than " << stats_model.min_proportion_difference();
+        const auto &first_prop = proportions_.at(first_pos);
+        const auto &second_prop = proportions_.at(second_pos);
+        if (second_prop > first_prop or first_prop - second_prop < stats_model.min_proportion_difference()) {
+            PLOG_DEBUG << read_id_ << " has first proportion " << first_prop << " and second proportion " << second_prop
+                       << " which have differences less than " << stats_model.min_proportion_difference();
             call_ = std::numeric_limits<uint8_t>::max(); // if we don't see at least this level of discrepancy between the proportion hitting against each database don't call
         }
     }
 
     void call_host(const StatsModel &stats_model, const uint8_t host_index) {
         assert(probabilities_.size() == 2);
-        const uint8_t other_index = 1-host_index;
+        const uint8_t other_index = 1 - host_index;
 
         double host_unique_prop = unique_proportions_.at(host_index);
         double other_unique_prop = unique_proportions_.at(other_index);
@@ -241,8 +226,7 @@ class ReadEntry {
 
         auto first_pos = host_index;
         auto second_pos = other_index;
-        if (host_unique_prop < other_unique_prop)
-        {
+        if (host_unique_prop < other_unique_prop) {
             first_pos = other_index;
             second_pos = host_index;
         }
@@ -251,19 +235,19 @@ class ReadEntry {
             confidence_score_ = std::numeric_limits<uint8_t>::max();
         else
             confidence_score_ = static_cast<uint8_t>(raw_confidence);
-        if (confidence_score_ < stats_model.confidence_threshold()){
+        if (confidence_score_ < stats_model.confidence_threshold()) {
             return;
         }
 
-        if (mean_quality_ < stats_model.min_quality()){
+        if (mean_quality_ < stats_model.min_quality()) {
             return;
         }
 
-        if (length_ < stats_model.min_length()){
+        if (length_ < stats_model.min_length()) {
             return;
         }
 
-        if (compression_ < stats_model.min_compression()){
+        if (compression_ < stats_model.min_compression()) {
             return;
         }
 
@@ -271,14 +255,16 @@ class ReadEntry {
             and host_unique_prop - other_unique_prop > stats_model.min_proportion_difference()
             and host_prob > other_prob
             and host_prob - other_prob > stats_model.min_prob_difference()
-            and std::max(host_prob * confidence_score_,static_cast<double>(confidence_score_)) >= stats_model.confidence_probability_threshold())
+            and std::max(host_prob * confidence_score_, static_cast<double>(confidence_score_)) >=
+                stats_model.confidence_probability_threshold())
             call_ = host_index;
         else if (host_unique_prop < stats_model.host_unique_prop_lo_threshold()
-            and host_unique_prop < other_unique_prop
-            and other_unique_prop - host_unique_prop > stats_model.min_proportion_difference()
-            and host_prob < other_prob
-            and other_prob - host_prob > stats_model.min_prob_difference()
-            and std::max(other_prob * confidence_score_,static_cast<double>(confidence_score_)) >= stats_model.confidence_probability_threshold())
+                 and host_unique_prop < other_unique_prop
+                 and other_unique_prop - host_unique_prop > stats_model.min_proportion_difference()
+                 and host_prob < other_prob
+                 and other_prob - host_prob > stats_model.min_prob_difference()
+                 and std::max(other_prob * confidence_score_, static_cast<double>(confidence_score_)) >=
+                     stats_model.confidence_probability_threshold())
             call_ = other_index;
     }
 
@@ -305,7 +291,8 @@ class ReadEntry {
     }
 
     void print_result(const InputSummary &summary) {
-        std::cout << read_id_ << "\t" << num_hashes_ << "\t" << summary.category_name(call_) << "\t" << +confidence_score_ << "\t" ;
+        std::cout << read_id_ << "\t" << num_hashes_ << "\t" << summary.category_name(call_) << "\t"
+                  << +confidence_score_ << "\t";
         for (auto i = 0; i < summary.num_categories(); i++) {
             std::cout << summary.categories.at(i) << ":" << counts_.at(i) << ":" << probabilities_.at(i) << "\t";
         }
@@ -339,7 +326,8 @@ class ReadEntry {
         else
             std::cout << "C" << "\t";
         std::cout.precision(6);
-        std::cout << read_id_ << "\t" << summary.category_name(call_) << "\t" << length_ << "\t" << num_hashes_ << "\t" << mean_quality_ << "\t" << +confidence_score_ << "\t" << compression_ << "\t";
+        std::cout << read_id_ << "\t" << summary.category_name(call_) << "\t" << length_ << "\t" << num_hashes_ << "\t"
+                  << mean_quality_ << "\t" << +confidence_score_ << "\t" << compression_ << "\t";
         for (auto i = 0; i < summary.num_categories(); i++) {
             std::cout << summary.categories.at(i) << ":" << counts_.at(i) << ":" << proportions_.at(i)
                       << ":" << unique_proportions_.at(i) << ":" << probabilities_.at(i) << " ";
diff --git a/src/classify_main.cpp b/src/classify_main.cpp
index 17b14ee..274a287 100644
--- a/src/classify_main.cpp
+++ b/src/classify_main.cpp
@@ -22,39 +22,29 @@
 #include <seqan3/search/dream_index/interleaved_bloom_filter.hpp>
 
 
-void setup_classify_subcommand(CLI::App& app)
-{
+void setup_classify_subcommand(CLI::App &app) {
     auto opt = std::make_shared<ClassifyArguments>();
-    auto* classify_subcommand = app.add_subcommand(
-        "classify", "Classify read file using index.");
+    auto *classify_subcommand = app.add_subcommand(
+            "classify", "Classify read file using index.");
 
     classify_subcommand->add_option("<fastaq>", opt->read_file, "Fasta/q file")
-        ->required()
-        ->transform(make_absolute)
-        ->check(CLI::ExistingFile.description(""))
-        ->type_name("FILE");
+            ->required()
+            ->transform(make_absolute)
+            ->check(CLI::ExistingFile.description(""))
+            ->type_name("FILE");
 
     classify_subcommand->add_option("<fastaq>", opt->read_file2, "Paired Fasta/q file")
             ->transform(make_absolute)
             ->check(CLI::ExistingFile.description(""))
             ->type_name("FILE");
 
-    classify_subcommand
-            ->add_option("--chunk_size", opt->chunk_size, "Read file is read in chunks of this size, to be processed in parallel within a chunk.")
-            ->type_name("INT")
-            ->capture_default_str();
-
-    classify_subcommand
-        ->add_option("-t,--threads", opt->threads, "Maximum number of threads to use.")
-        ->type_name("INT")
-        ->capture_default_str();
-
     classify_subcommand->add_option("--db", opt->db, "Prefix for the index.")
-        ->type_name("FILE")
-        ->required()
-        ->check(CLI::ExistingPath.description(""));
+            ->type_name("FILE")
+            ->required()
+            ->check(CLI::ExistingPath.description(""));
 
-    classify_subcommand->add_option("-e,--extract", opt->category_to_extract, "Reads from this category in the index will be extracted to file.")
+    classify_subcommand->add_option("-e,--extract", opt->category_to_extract,
+                                    "Reads from this category in the index will be extracted to file.")
             ->type_name("STRING");
 
     classify_subcommand->add_option("-p,--prefix", opt->prefix, "Prefix for the output files.")
@@ -62,26 +52,52 @@ void setup_classify_subcommand(CLI::App& app)
             ->check(CLI::NonexistentPath.description(""))
             ->default_str("<prefix>");
 
+    classify_subcommand
+            ->add_option("--chunk_size", opt->chunk_size,
+                         "Read file is read in chunks of this size, to be processed in parallel within a chunk.")
+            ->type_name("INT")
+            ->capture_default_str();
+
+    classify_subcommand
+            ->add_option("--lo_hi_threshold", opt->lo_hi_threshold,
+                         "Threshold used during model fitting stage to decide if read should be used to train lo or hi distribution.")
+            ->type_name("FLOAT")
+            ->capture_default_str();
+
+    classify_subcommand
+            ->add_option("--num_reads_to_fit", opt->num_reads_to_fit,
+                         "Number of reads to use to train each distribution in the model.")
+            ->type_name("INT")
+            ->capture_default_str();
+
     classify_subcommand->add_option("-d,--dist", opt->dist, "Probability distribution to use for modelling.")
             ->type_name("STRING");
 
     classify_subcommand
-            ->add_option("--confidence", opt->confidence_threshold, "Minimum difference between the top 2 unique hit counts.")
+            ->add_option("--min_quality", opt->min_quality, "Minimum read quality to classify.")
             ->type_name("INT")
             ->capture_default_str();
 
     classify_subcommand
-            ->add_option("--min_hits", opt->min_hits, "Minimum difference between the top 2 (non-unique) hit counts.")
+            ->add_option("--min_length", opt->min_length, "Minimum read length to classify.")
             ->type_name("INT")
             ->capture_default_str();
 
     classify_subcommand
-            ->add_option("--min_length", opt->min_length, "Minimum length of read for classification.")
+            ->add_option("--min_compression", opt->min_compression,
+                         "Minimum read gzip compression ratio to classify (a measure of how much information is in the read.")
+            ->type_name("FLOAT")
+            ->capture_default_str();
+
+    classify_subcommand
+            ->add_option("--confidence", opt->confidence_threshold,
+                         "Minimum difference between the top 2 unique hit counts.")
             ->type_name("INT")
             ->capture_default_str();
 
     classify_subcommand
-            ->add_option("--min_diff", opt->min_proportion_difference, "Minimum difference between the proportion of (non-unique) kmers found in each category.")
+            ->add_option("--min_proportion_diff", opt->min_proportion_difference,
+                         "Minimum difference between the proportion of (non-unique) kmers found in each category.")
             ->type_name("FLOAT")
             ->capture_default_str();
 
@@ -89,17 +105,24 @@ void setup_classify_subcommand(CLI::App& app)
             ->transform(make_absolute)
             ->type_name("FILE");
 
+    classify_subcommand
+            ->add_option("-t,--threads", opt->threads, "Maximum number of threads to use.")
+            ->type_name("INT")
+            ->capture_default_str();
+
     classify_subcommand->add_flag(
-        "-v", opt->verbosity, "Verbosity of logging. Repeat for increased verbosity");
+            "-v", opt->verbosity, "Verbosity of logging. Repeat for increased verbosity");
+
 
     // Set the function that will be called when this subcommand is issued.
     classify_subcommand->callback([opt]() { classify_main(*opt); });
 }
 
-void classify_reads(const ClassifyArguments& opt, const Index& index){
+void classify_reads(const ClassifyArguments &opt, const Index &index) {
     PLOG_INFO << "Classifying file " << opt.read_file;
 
-    auto hash_adaptor = seqan3::views::minimiser_hash(seqan3::shape{seqan3::ungapped{index.kmer_size()}}, seqan3::window_size{index.window_size()});
+    auto hash_adaptor = seqan3::views::minimiser_hash(seqan3::shape{seqan3::ungapped{index.kmer_size()}},
+                                                      seqan3::window_size{index.window_size()});
     PLOG_VERBOSE << "Defined hash_adaptor";
 
     auto agent = index.agent();
@@ -116,41 +139,40 @@ void classify_reads(const ClassifyArguments& opt, const Index& index){
 
     PLOG_DEBUG << "Defined Result with " << +index.num_bins() << " bins";
 
-    for (auto && chunk : fin | seqan3::views::chunk(opt.chunk_size))
-    {
+    for (auto &&chunk: fin | seqan3::views::chunk(opt.chunk_size)) {
         // You can use a for loop:
-        for (auto & record : chunk)
-        {
+        for (auto &record: chunk) {
             records.push_back(std::move(record));
         }
 
 #pragma omp parallel for firstprivate(agent, hash_adaptor) num_threads(opt.threads) shared(result)
-        for (auto i=0; i<records.size(); ++i){
+        for (auto i = 0; i < records.size(); ++i) {
 
-            const record_type & record = records[i];
+            const record_type &record = records[i];
             const auto read_id = split(record.id(), " ")[0];
             const auto read_length = record.sequence().size();
-            if (read_length > std::numeric_limits<uint32_t>::max()){
+            if (read_length > std::numeric_limits<uint32_t>::max()) {
                 PLOG_WARNING << "Ignoring read " << record.id() << " as too long!";
                 continue;
             }
-            if (read_length == 0){
+            if (read_length == 0) {
                 PLOG_WARNING << "Ignoring read " << record.id() << " as has zero length!";
                 continue;
             }
-            auto qualities = record.base_qualities() | std::views::transform( [](auto quality) { return seqan3::to_phred(quality); });
+            auto qualities = record.base_qualities() |
+                             std::views::transform([](auto quality) { return seqan3::to_phred(quality); });
             auto sum = std::accumulate(qualities.begin(), qualities.end(), 0);
             float mean_quality = 0;
             if (std::ranges::size(qualities) > 0)
-                mean_quality = static_cast< float >( sum )/ static_cast< float >(std::ranges::size(qualities));
+                mean_quality = static_cast< float >( sum ) / static_cast< float >(std::ranges::size(qualities));
             PLOG_VERBOSE << "Mean quality of read  " << record.id() << " is " << mean_quality;
 
             float compression_ratio = get_compression_ratio(sequence_to_string(record.sequence()));
             PLOG_VERBOSE << "Found compression ratio of read  " << record.id() << " is " << compression_ratio;
 
             auto read = ReadEntry(read_id, read_length, mean_quality, compression_ratio, result.input_summary());
-            for (auto && value : record.sequence() | hash_adaptor) {
-                const auto & entry = agent.bulk_contains(value);
+            for (auto &&value: record.sequence() | hash_adaptor) {
+                const auto &entry = agent.bulk_contains(value);
                 read.update_entry(entry);
             }
             PLOG_VERBOSE << "Finished adding raw hash counts for read " << read_id;
@@ -166,10 +188,11 @@ void classify_reads(const ClassifyArguments& opt, const Index& index){
 }
 
 
-void classify_paired_reads(const ClassifyArguments& opt, const Index& index){
+void classify_paired_reads(const ClassifyArguments &opt, const Index &index) {
     PLOG_INFO << "Classifying files " << opt.read_file << " and " << opt.read_file2;
 
-    auto hash_adaptor = seqan3::views::minimiser_hash(seqan3::shape{seqan3::ungapped{index.kmer_size()}}, seqan3::window_size{index.window_size()});
+    auto hash_adaptor = seqan3::views::minimiser_hash(seqan3::shape{seqan3::ungapped{index.kmer_size()}},
+                                                      seqan3::window_size{index.window_size()});
     PLOG_VERBOSE << "Defined hash_adaptor";
 
     auto agent = index.agent();
@@ -188,50 +211,50 @@ void classify_paired_reads(const ClassifyArguments& opt, const Index& index){
 
     PLOG_DEBUG << "Defined Result with " << +index.num_bins() << " bins";
 
-    for (auto && chunk : fin1 | seqan3::views::chunk(opt.chunk_size))
-    {
-        for (auto & record : chunk)
-        {
+    for (auto &&chunk: fin1 | seqan3::views::chunk(opt.chunk_size)) {
+        for (auto &record: chunk) {
             records1.push_back(std::move(record));
         }
 
         // loop in the second file and get same amount of reads
-        for ( auto& record2 : fin2 | std::views::take( opt.chunk_size ) )
-        {
-            records2.push_back( std::move( record2 ));
+        for (auto &record2: fin2 | std::views::take(opt.chunk_size)) {
+            records2.push_back(std::move(record2));
         }
 
 #pragma omp parallel for firstprivate(agent, hash_adaptor) num_threads(opt.threads) shared(result)
-        for (auto i=0; i<records1.size(); ++i){
+        for (auto i = 0; i < records1.size(); ++i) {
 
-            const auto & record1 = records1[i];
-            const auto & record2 = records2[i];
+            const auto &record1 = records1[i];
+            const auto &record2 = records2[i];
 
             auto id1 = record1.id();
             id1.erase(id1.size() - 1);
             auto id2 = record2.id();
             id2.erase(id2.size() - 1);
-            if (id1 != id2){
+            if (id1 != id2) {
                 std::cout << id1 << " " << id2;
                 throw std::runtime_error("Your pairs don't match for read ids.");
             }
             const auto read_id = split(record1.id(), " ")[0];
             const auto read_length = record1.sequence().size() + record2.sequence().size();
-            if (read_length > std::numeric_limits<uint32_t>::max()){
+            if (read_length > std::numeric_limits<uint32_t>::max()) {
                 PLOG_WARNING << "Ignoring read " << record1.id() << " as too long!";
                 continue;
             }
-            if (read_length == 0){
+            if (read_length == 0) {
                 PLOG_WARNING << "Ignoring read " << record1.id() << " as has zero length!";
                 continue;
             }
-            auto qualities1 = record1.base_qualities() | std::views::transform( [](auto quality) { return seqan3::to_phred(quality); });
-            auto qualities2 = record2.base_qualities() | std::views::transform( [](auto quality) { return seqan3::to_phred(quality); });
+            auto qualities1 = record1.base_qualities() |
+                              std::views::transform([](auto quality) { return seqan3::to_phred(quality); });
+            auto qualities2 = record2.base_qualities() |
+                              std::views::transform([](auto quality) { return seqan3::to_phred(quality); });
             auto sum = std::accumulate(qualities1.begin(), qualities1.end(), 0);
             sum = std::accumulate(qualities2.begin(), qualities2.end(), sum);
             float mean_quality = 0;
             if (std::ranges::size(qualities1) + std::ranges::size(qualities2) > 0)
-                mean_quality = static_cast< float >( sum )/ static_cast< float >(std::ranges::size(qualities1) + std::ranges::size(qualities2));
+                mean_quality = static_cast< float >( sum ) /
+                               static_cast< float >(std::ranges::size(qualities1) + std::ranges::size(qualities2));
             PLOG_VERBOSE << "Mean quality of read  " << record1.id() << " is " << mean_quality;
 
             auto combined_record = sequence_to_string(record1.sequence()) + sequence_to_string(record2.sequence());
@@ -239,12 +262,12 @@ void classify_paired_reads(const ClassifyArguments& opt, const Index& index){
             PLOG_VERBOSE << "Found compression ratio of read  " << record1.id() << " is " << compression_ratio;
 
             auto read = ReadEntry(read_id, read_length, mean_quality, compression_ratio, result.input_summary());
-            for (auto && value : record1.sequence() | hash_adaptor) {
-                const auto & entry = agent.bulk_contains(value);
+            for (auto &&value: record1.sequence() | hash_adaptor) {
+                const auto &entry = agent.bulk_contains(value);
                 read.update_entry(entry);
             }
-            for (auto && value : record2.sequence() | hash_adaptor) {
-                const auto & entry = agent.bulk_contains(value);
+            for (auto &&value: record2.sequence() | hash_adaptor) {
+                const auto &entry = agent.bulk_contains(value);
                 read.update_entry(entry);
             }
             PLOG_VERBOSE << "Finished adding raw hash counts for read " << read_id;
@@ -261,8 +284,7 @@ void classify_paired_reads(const ClassifyArguments& opt, const Index& index){
 }
 
 
-int classify_main(ClassifyArguments & opt)
-{
+int classify_main(ClassifyArguments &opt) {
     auto log_level = plog::info;
     if (opt.verbosity == 1) {
         log_level = plog::debug;
@@ -288,14 +310,14 @@ int classify_main(ClassifyArguments & opt)
 
     opt.run_extract = (opt.category_to_extract != "");
     const auto categories = index.categories();
-    if (opt.run_extract and opt.category_to_extract != "all" and std::find(categories.begin(), categories.end(), opt.category_to_extract) == categories.end())
-    {
+    if (opt.run_extract and opt.category_to_extract != "all" and
+        std::find(categories.begin(), categories.end(), opt.category_to_extract) == categories.end()) {
         std::string options = "";
         for (auto i: categories)
             options += i + " ";
         PLOG_ERROR << "Cannot extract " << opt.category_to_extract << ", please chose one of [ all " << options << "]";
         return 1;
-    } else if (opt.run_extract){
+    } else if (opt.run_extract) {
         if (opt.prefix == "")
             opt.prefix = "charon";
         std::vector<std::string> to_extract;
@@ -304,19 +326,20 @@ int classify_main(ClassifyArguments & opt)
         else
             to_extract.push_back(opt.category_to_extract);
         const auto extension = get_extension(opt.read_file);
-        for (const auto & category : to_extract){
+        for (const auto &category: to_extract) {
             const auto category_index = index.get_category_index(category);
             if (opt.is_paired) {
-                opt.extract_category_to_file[category_index].push_back(opt.prefix + "_" + category + "_1" + extension + ".gz");
-                opt.extract_category_to_file[category_index].push_back(opt.prefix + "_" + category + "_2" + extension + ".gz");
+                opt.extract_category_to_file[category_index].push_back(
+                        opt.prefix + "_" + category + "_1" + extension + ".gz");
+                opt.extract_category_to_file[category_index].push_back(
+                        opt.prefix + "_" + category + "_2" + extension + ".gz");
             } else {
                 opt.extract_category_to_file[category_index].push_back(opt.prefix + "_" + category + extension + ".gz");
             }
         }
     }
 
-    if (opt.dist != "gamma" and opt.dist != "beta")
-    {
+    if (opt.dist != "gamma" and opt.dist != "beta") {
         PLOG_ERROR << "Supported distributions are [gamma , beta]";
         return 1;
     }

From b642c96f1a965e2776a1c72e6e7b9dc75b6cc668 Mon Sep 17 00:00:00 2001
From: Rachel Colquhoun <rachel.colquhoun@ed.ac.uk>
Date: Mon, 23 Jun 2025 16:23:45 +0100
Subject: [PATCH 4/5] change confidence threshold to 7 for dehost

---
 include/dehost_arguments.hpp | 37 +++++++++++++++++-------------------
 1 file changed, 17 insertions(+), 20 deletions(-)

diff --git a/include/dehost_arguments.hpp b/include/dehost_arguments.hpp
index fbd0b6c..4371c65 100644
--- a/include/dehost_arguments.hpp
+++ b/include/dehost_arguments.hpp
@@ -10,41 +10,39 @@ struct DehostArguments {
     // IO options
     std::filesystem::path read_file;
     std::filesystem::path read_file2;
-    bool is_paired { false };
+    bool is_paired{false};
     std::string db;
 
     // Output options
-    bool run_extract {false};
+    bool run_extract{false};
     std::string category_to_extract;
     std::string prefix;
     std::unordered_map<uint8_t, std::vector<std::filesystem::path>> extract_category_to_file;
 
-    uint8_t chunk_size { 100 };
+    uint8_t chunk_size{100};
 
     // Stats options
-    float lo_hi_threshold {0.15};
-    uint16_t num_reads_to_fit {5000};
+    float lo_hi_threshold{0.15};
+    uint16_t num_reads_to_fit{5000};
     std::string dist{"kde"};
 
     // thresholds for filtering
-    float min_quality { 15.0 };
-    uint32_t min_length { 140 };
-    float min_compression {0};
-    uint8_t confidence_threshold{2};
-    float confidence_probability_threshold{5};
-    float host_unique_prop_lo_threshold{ 0.05 };
-    float min_proportion_difference { 0.04 };
-    float min_prob_difference{ 0 };
-
+    float min_quality{15.0};
+    uint32_t min_length{140};
+    float min_compression{0};
+    uint8_t confidence_threshold{7};
+    float confidence_probability_threshold{0};
+    float host_unique_prop_lo_threshold{0.05};
+    float min_proportion_difference{0.04};
+    float min_prob_difference{0};
 
 
     // General options
-    std::string log_file {"charon.log"};
-    uint8_t threads { 1 };
-    uint8_t verbosity { 0 };
+    std::string log_file{"charon.log"};
+    uint8_t threads{1};
+    uint8_t verbosity{0};
 
-    std::string to_string()
-    {
+    std::string to_string() {
         std::string ss;
 
         ss += "\n\nDehost Arguments:\n\n";
@@ -70,7 +68,6 @@ struct DehostArguments {
         ss += "\tmin_prob_difference:\t\t" + std::to_string(min_prob_difference) + "\n\n";
 
 
-
         ss += "\tlog_file:\t\t\t" + log_file + "\n";
         ss += "\tthreads:\t\t\t" + std::to_string(threads) + "\n";
         ss += "\tverbosity:\t\t\t" + std::to_string(verbosity) + "\n\n";

From 96563e46fdb71b7b67557131ad85eeb453c3fd26 Mon Sep 17 00:00:00 2001
From: Rachel Colquhoun <rachel.colquhoun@ed.ac.uk>
Date: Mon, 23 Jun 2025 16:25:30 +0100
Subject: [PATCH 5/5] reformat code

---
 CMakeLists.txt                 |  42 ++--
 include/classify_arguments.hpp |  27 +-
 include/classify_main.hpp      |   7 +-
 include/classify_stats.hpp     |  23 +-
 include/counts.hpp             |  35 ++-
 include/dehost_main.hpp        |   7 +-
 include/index.hpp              | 297 +++++++++++-----------
 include/index_arguments.hpp    |  21 +-
 include/index_main.hpp         |  16 +-
 include/input_stats.hpp        |  84 +++----
 include/input_summary.hpp      |  99 ++++----
 include/load_index.hpp         |   2 +-
 include/result.hpp             | 310 +++++++++++------------
 include/store_index.hpp        |   3 +-
 include/utils.hpp              |  30 ++-
 src/dehost_main.cpp            | 435 +++++++++++++++++----------------
 src/index_main.cpp             | 152 ++++++------
 src/load_index.cpp             |   3 +-
 src/main.cpp                   |  17 +-
 src/utils.cpp                  | 112 ++++-----
 20 files changed, 846 insertions(+), 876 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a3f0bb7..ee7dce7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2,7 +2,7 @@
 # charon
 # =============================================================================
 set(PROJECT_NAME_STR charon)
-cmake_minimum_required( VERSION 3.9 FATAL_ERROR )
+cmake_minimum_required(VERSION 3.9 FATAL_ERROR)
 
 add_custom_target(version
         ${CMAKE_COMMAND} -D SRC=${CMAKE_SOURCE_DIR}/version.h.in
@@ -10,7 +10,7 @@ add_custom_target(version
         -P ${CMAKE_SOURCE_DIR}/cmake/GenerateVersion.cmake
 )
 
-project( ${PROJECT_NAME_STR} LANGUAGES CXX )
+project(${PROJECT_NAME_STR} LANGUAGES CXX)
 
 # Use C++20
 set(CMAKE_CXX_STANDARD 20)
@@ -22,34 +22,34 @@ set(CMAKE_CXX_EXTENSIONS OFF)
 # -----------------------------------------------------------------------------
 # dependencies and 3rd party libraries
 # -----------------------------------------------------------------------------
-set (PROGRAM_SUBMODULES_DIR
-     "${CMAKE_CURRENT_LIST_DIR}/lib"
-     CACHE STRING "Directory containing submodules."
+set(PROGRAM_SUBMODULES_DIR
+        "${CMAKE_CURRENT_LIST_DIR}/lib"
+        CACHE STRING "Directory containing submodules."
 )
 
 # Specify the directories where to store the built archives, libraries and executables
-set (CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
-set (CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
-set (CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin")
-set (CMAKE_INSTALL_BINDIR "bin")
+set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
+set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin")
+set(CMAKE_INSTALL_BINDIR "bin")
 
 # Require OPENMP
 find_package(OpenMP REQUIRED)
-find_package( ZLIB REQUIRED )
+find_package(ZLIB REQUIRED)
 
-set (CPM_INDENT "CMake Package Manager CPM: ")
-include (${PROJECT_SOURCE_DIR}/cmake/CPM.cmake)
-CPMUsePackageLock (${PROJECT_SOURCE_DIR}/cmake/package-lock.cmake)
+set(CPM_INDENT "CMake Package Manager CPM: ")
+include(${PROJECT_SOURCE_DIR}/cmake/CPM.cmake)
+CPMUsePackageLock(${PROJECT_SOURCE_DIR}/cmake/package-lock.cmake)
 
-CPMGetPackage (seqan3)
-CPMGetPackage (plog)
-CPMGetPackage (gcem)
-CPMGetPackage (statslib)
-CPMGetPackage (bzip2)
-CPMGetPackage (zlib)
-CPMGetPackage (gzip)
+CPMGetPackage(seqan3)
+CPMGetPackage(plog)
+CPMGetPackage(gcem)
+CPMGetPackage(statslib)
+CPMGetPackage(bzip2)
+CPMGetPackage(zlib)
+CPMGetPackage(gzip)
 
-add_definitions(-D ZLIB_CONST )
+add_definitions(-D ZLIB_CONST)
 
 # -----------------------------------------------------------------------------
 # install
diff --git a/include/classify_arguments.hpp b/include/classify_arguments.hpp
index 994e8e0..ae91dee 100644
--- a/include/classify_arguments.hpp
+++ b/include/classify_arguments.hpp
@@ -11,36 +11,35 @@ struct ClassifyArguments {
     // IO options
     std::filesystem::path read_file;
     std::filesystem::path read_file2;
-    bool is_paired { false };
+    bool is_paired{false};
     std::string db;
-    uint8_t chunk_size { 100 };
+    uint8_t chunk_size{100};
 
 
     // Stats options
-    float lo_hi_threshold {0.15};
-    uint16_t num_reads_to_fit {5000};
+    float lo_hi_threshold{0.15};
+    uint16_t num_reads_to_fit{5000};
     std::string dist{"beta"};
 
     // thresholds for filtering
-    float min_quality { 10.0 };
-    uint32_t min_length { 140 };
-    float min_compression {0.15};
+    float min_quality{10.0};
+    uint32_t min_length{140};
+    float min_compression{0.15};
     uint8_t confidence_threshold{2};
-    float min_proportion_difference { 0.00 };
+    float min_proportion_difference{0.00};
 
     // Output options
-    bool run_extract {false};
+    bool run_extract{false};
     std::string category_to_extract;
     std::string prefix;
     std::unordered_map<uint8_t, std::vector<std::filesystem::path>> extract_category_to_file;
 
     // General options
-    std::string log_file {"charon.log"};
-    uint8_t threads { 1 };
-    uint8_t verbosity { 0 };
+    std::string log_file{"charon.log"};
+    uint8_t threads{1};
+    uint8_t verbosity{0};
 
-    std::string to_string()
-    {
+    std::string to_string() {
         std::string ss;
 
         ss += "\n\nClassify Arguments:\n\n";
diff --git a/include/classify_main.hpp b/include/classify_main.hpp
index a221d94..5bac524 100644
--- a/include/classify_main.hpp
+++ b/include/classify_main.hpp
@@ -11,13 +11,14 @@
 #include "result.hpp"
 
 class Index;
+
 struct ClassifyArguments;
 
-void setup_classify_subcommand(CLI::App& app);
+void setup_classify_subcommand(CLI::App &app);
 
-void classify_reads(const ClassifyArguments& opt, const Index& index);
+void classify_reads(const ClassifyArguments &opt, const Index &index);
 
-int classify_main(ClassifyArguments & opt);
+int classify_main(ClassifyArguments &opt);
 
 
 #endif // CHARON_CLASSIFY_MAIN_H
diff --git a/include/classify_stats.hpp b/include/classify_stats.hpp
index 12be7cd..6b1d36b 100644
--- a/include/classify_stats.hpp
+++ b/include/classify_stats.hpp
@@ -217,7 +217,7 @@ struct KDEParams {
         std::sort(dataset.begin(), dataset.end());
     }
 
-    float quantile(const float& alpha) const {
+    float quantile(const float &alpha) const {
         int idx = std::ceil((1. - alpha) * dataset.size());
         return dataset[idx];
     }
@@ -227,7 +227,7 @@ struct KDEParams {
         const auto mu = mean(dataset);
         const auto var = variance(dataset, mu);
         const auto std = sqrt(var);
-        const auto A = std::min(iqr/1.34, std);
+        const auto A = std::min(iqr / 1.34, std);
         h = 0.9 * A * pow(dataset.size(), -0.2);
     }
 
@@ -239,16 +239,16 @@ struct KDEParams {
         PLOG_INFO << "Fitting KDE to data with factor " << h;
     }
 
-    float K(const float & x) const {
-        return std::exp(-std::pow(x,2)/2)/sqrt(2*3.141592653589793238463);
+    float K(const float &x) const {
+        return std::exp(-std::pow(x, 2) / 2) / sqrt(2 * 3.141592653589793238463);
     }
 
-    float prob(const float & x) const {
+    float prob(const float &x) const {
         float total_sum = 0;
-        for (const auto &xi : dataset){
+        for (const auto &xi: dataset) {
             total_sum += K((x - xi) / h);
         }
-        return total_sum/(h*dataset.size());
+        return total_sum / (h * dataset.size());
     }
 
 };
@@ -267,8 +267,8 @@ class Model {
     GammaParams g_neg{10, 0, 0.005};
     BetaParams b_pos{6, 4};
     BetaParams b_neg{6, 40};
-    KDEParams k_pos{default_pos_data,0.1};
-    KDEParams k_neg{default_neg_data,0.001};
+    KDEParams k_pos{default_pos_data, 0.1};
+    KDEParams k_neg{default_neg_data, 0.001};
 public:
     Model() = default;
 
@@ -308,7 +308,8 @@ class Model {
             g_neg.fit_loc(training_data.neg);
             //auto ad = g_neg.calculate_anderson_darling(training_data.neg);
             PLOG_INFO << "Model " << +id << " using default for g_neg data with Gamma (shape:" << g_neg.shape
-                      << ", loc: " << g_neg.loc << ", scale: " << g_neg.scale << ")";//. Anderson-darling statistic is " << ad;
+                      << ", loc: " << g_neg.loc << ", scale: " << g_neg.scale
+                      << ")";//. Anderson-darling statistic is " << ad;
         }
     }
 
@@ -447,7 +448,7 @@ class StatsModel {
             confidence_probability_threshold_(opt.confidence_probability_threshold),
             host_unique_prop_lo_threshold_(opt.host_unique_prop_lo_threshold),
             min_proportion_difference_(opt.min_proportion_difference),
-            min_prob_difference_(opt.min_prob_difference){
+            min_prob_difference_(opt.min_prob_difference) {
         for (auto i = 0; i < summary.num_categories(); ++i) {
             models_.emplace_back(Model(i, opt.dist));
             training_data_.emplace_back(TrainingData(opt, i));
diff --git a/include/counts.hpp b/include/counts.hpp
index 3853e60..fe43e70 100644
--- a/include/counts.hpp
+++ b/include/counts.hpp
@@ -10,44 +10,43 @@
 // Extremly simple Counts class - represents a lower diagonal square matrix including the diagonal,
 // expecting [row,col]==[col,row] so can store it once not twice
 template<class T>
-class Counts
-{
+class Counts {
 public:
-    Counts(){};
-    Counts(size_t size)
-    {
+    Counts() {};
+
+    Counts(size_t size) {
         assert(size > 0);
         mRows = size;
-        mData.resize(mRows * (mRows+1) / 2);
+        mData.resize(mRows * (mRows + 1) / 2);
         mData.shrink_to_fit();
     }
-    void set_size(size_t size)
-    {
+
+    void set_size(size_t size) {
         assert(size > 0);
         mRows = size;
-        mData.resize(mRows * (mRows+1) / 2);
+        mData.resize(mRows * (mRows + 1) / 2);
         mData.shrink_to_fit();
     }
-    T& operator()(size_t row, size_t col)
-    {
+
+    T &operator()(size_t row, size_t col) {
         assert(row < mRows);
         assert(col < mRows);
-        if (row < col)
-        {
-            col,row = row,col;
+        if (row < col) {
+            col, row = row, col;
         }
         return mData[row * (row + 1) / 2 + col];
     }
-    const T& operator()(size_t row, size_t col) const
-    {
+
+    const T &operator()(size_t row, size_t col) const {
         assert(row >= 0 && row < mRows);
         assert(col >= 0 && row < mRows);
         return mData[row * (row + 1) / 2 + col];
     }
-    size_t rows() const noexcept
-    {
+
+    size_t rows() const noexcept {
         return mRows;
     }
+
 protected:
     size_t mRows;
     std::vector<T> mData;
diff --git a/include/dehost_main.hpp b/include/dehost_main.hpp
index cab29b4..1f451b7 100644
--- a/include/dehost_main.hpp
+++ b/include/dehost_main.hpp
@@ -13,13 +13,14 @@
 #include "result.hpp"
 
 class Index;
+
 struct DehostArguments;
 
-void setup_dehost_subcommand(CLI::App& app);
+void setup_dehost_subcommand(CLI::App &app);
 
-void dehost_reads(const DehostArguments& opt, const Index& index);
+void dehost_reads(const DehostArguments &opt, const Index &index);
 
-int dehost_main(DehostArguments & opt);
+int dehost_main(DehostArguments &opt);
 
 
 #endif // CHARON_DEHOST_MAIN_H
diff --git a/include/index.hpp b/include/index.hpp
index 97b81d0..f1b0140 100644
--- a/include/index.hpp
+++ b/include/index.hpp
@@ -16,166 +16,151 @@
 #include <input_stats.hpp>
 
 
-class Index
-{
-    private:
-        uint8_t window_size_{};
-        uint8_t kmer_size_{};
-        double max_fpr_{};
-        InputSummary summary_{};
-        InputStats stats_{};
-        seqan3::interleaved_bloom_filter<seqan3::data_layout::compressed> ibf_{};
-
-    public:
-        static constexpr uint32_t version{3u};
-
-        Index() = default;
-        Index(Index const &) = default;
-        Index(Index &&) = default;
-        Index & operator=(Index const &) = default;
-        Index & operator=(Index &&) = default;
-        ~Index() = default;
-
-        Index(const IndexArguments & arguments, const InputSummary & summary, const InputStats & stats, const seqan3::interleaved_bloom_filter<seqan3::uncompressed>& ibf):
-                window_size_{arguments.window_size},
-                kmer_size_{arguments.kmer_size},
-                max_fpr_{arguments.max_fpr},
-                summary_{summary},
-                stats_{stats},
-                ibf_(ibf)
-        {}
-
-        uint8_t window_size() const
-        {
-            return window_size_;
+class Index {
+private:
+    uint8_t window_size_{};
+    uint8_t kmer_size_{};
+    double max_fpr_{};
+    InputSummary summary_{};
+    InputStats stats_{};
+    seqan3::interleaved_bloom_filter<seqan3::data_layout::compressed> ibf_{};
+
+public:
+    static constexpr uint32_t version{3u};
+
+    Index() = default;
+
+    Index(Index const &) = default;
+
+    Index(Index &&) = default;
+
+    Index &operator=(Index const &) = default;
+
+    Index &operator=(Index &&) = default;
+
+    ~Index() = default;
+
+    Index(const IndexArguments &arguments, const InputSummary &summary, const InputStats &stats,
+          const seqan3::interleaved_bloom_filter<seqan3::uncompressed> &ibf) :
+            window_size_{arguments.window_size},
+            kmer_size_{arguments.kmer_size},
+            max_fpr_{arguments.max_fpr},
+            summary_{summary},
+            stats_{stats},
+            ibf_(ibf) {}
+
+    uint8_t window_size() const {
+        return window_size_;
+    }
+
+    uint8_t kmer_size() const {
+        return kmer_size_;
+    }
+
+    uint8_t num_bins() const {
+        return summary_.num_bins;
+    }
+
+    uint8_t num_categories() const {
+        return summary_.num_categories();
+    }
+
+    std::vector<std::string> categories() const {
+        return summary_.categories;
+    }
+
+    uint8_t get_host_index() const {
+        const auto index1 = summary_.category_index("host");
+        const auto index2 = summary_.category_index("human");
+        auto index = std::min(index1, index2);
+        if (index == std::numeric_limits<uint8_t>::max())
+            PLOG_ERROR << "Index does not contain 'host' or 'human' as a category ";
+        assert(index < std::numeric_limits<uint8_t>::max());
+        return index;
+    }
+
+    uint8_t get_category_index(const std::string category) const {
+        const auto index = summary_.category_index(category);
+        if (index == std::numeric_limits<uint8_t>::max())
+            PLOG_ERROR << "Index does not contain category ";
+        assert(index < std::numeric_limits<uint8_t>::max());
+        return index;
+    }
+
+    double max_fpr() const {
+        return max_fpr_;
+    }
+
+    InputStats stats() const {
+        return stats_;
+    }
+
+    InputSummary summary() const {
+        return summary_;
+    }
+
+    std::unordered_map<uint8_t, std::string> bin_to_category() const {
+        return summary_.bin_to_category;
+    }
+
+    seqan3::interleaved_bloom_filter<seqan3::data_layout::compressed> const &ibf() const {
+        return ibf_;
+    }
+
+    seqan3::interleaved_bloom_filter<seqan3::data_layout::compressed>::membership_agent_type agent() const {
+        return ibf_.membership_agent();
+    }
+
+    /*!\cond DEV
+     * \brief Serialisation support function.
+     * \tparam archive_t Type of `archive`; must satisfy seqan3::cereal_archive.
+     * \param[in] archive The archive being serialised from/to.
+     *
+     * \attention These functions are never called directly.
+     * \sa https://docs.seqan.de/seqan/3.2.0/group__io.html#serialisation
+     */
+    template<seqan3::cereal_archive archive_t>
+    void CEREAL_SERIALIZE_FUNCTION_NAME(archive_t &archive) {
+        try {
+            archive(window_size_);
+            archive(kmer_size_);
+            archive(max_fpr_);
+            archive(summary_);
+            archive(stats_);
+            archive(ibf_);
         }
-
-        uint8_t kmer_size() const
-        {
-            return kmer_size_;
-        }
-
-        uint8_t num_bins() const
-        {
-            return summary_.num_bins;
-        }
-
-        uint8_t num_categories() const
-        {
-            return summary_.num_categories();
-        }
-
-        std::vector<std::string> categories() const
-        {
-            return summary_.categories;
-        }
-
-        uint8_t get_host_index() const
-        {
-            const auto index1 = summary_.category_index("host");
-            const auto index2 = summary_.category_index("human");
-            auto index = std::min(index1, index2);
-            if (index == std::numeric_limits<uint8_t>::max())
-                PLOG_ERROR << "Index does not contain 'host' or 'human' as a category ";
-            assert(index < std::numeric_limits<uint8_t>::max());
-            return index;
-        }
-
-        uint8_t get_category_index(const std::string category) const
-        {
-            const auto index = summary_.category_index(category);
-            if (index == std::numeric_limits<uint8_t>::max())
-                PLOG_ERROR << "Index does not contain category ";
-            assert(index < std::numeric_limits<uint8_t>::max());
-            return index;
-        }
-
-        double max_fpr() const
-        {
-            return max_fpr_;
+            // GCOVR_EXCL_START
+        catch (std::exception const &e) {
+            PLOG_ERROR << "Cannot read index: " + std::string{e.what()};
+            exit(1);
         }
 
-        InputStats stats() const
-        {
-            return stats_;
+    }
+
+    /* \brief Serialisation support function. Do not load the actual data.
+     * \tparam archive_t Type of `archive`; must satisfy seqan3::cereal_input_archive.
+     * \param[in] archive The archive being serialised from/to.
+     *
+     * \attention These functions are never called directly.
+     * \sa https://docs.seqan.de/seqan/3.2.0/group__io.html#serialisation
+     */
+    template<seqan3::cereal_input_archive archive_t>
+    void load_parameters(archive_t &archive) {
+        try {
+            archive(window_size_);
+            archive(kmer_size_);
+            archive(max_fpr_);
+            archive(summary_);
+            archive(stats_);
+            archive(ibf_);
         }
-
-        InputSummary summary() const
-        {
-            return summary_;
-        }
-
-        std::unordered_map<uint8_t, std::string> bin_to_category() const
-        {
-            return summary_.bin_to_category;
-        }
-
-        seqan3::interleaved_bloom_filter<seqan3::data_layout::compressed> const & ibf() const
-        {
-            return ibf_;
-        }
-
-        seqan3::interleaved_bloom_filter<seqan3::data_layout::compressed>::membership_agent_type agent() const
-        {
-            return ibf_.membership_agent();
-        }
-
-        /*!\cond DEV
-         * \brief Serialisation support function.
-         * \tparam archive_t Type of `archive`; must satisfy seqan3::cereal_archive.
-         * \param[in] archive The archive being serialised from/to.
-         *
-         * \attention These functions are never called directly.
-         * \sa https://docs.seqan.de/seqan/3.2.0/group__io.html#serialisation
-         */
-        template <seqan3::cereal_archive archive_t>
-        void CEREAL_SERIALIZE_FUNCTION_NAME(archive_t & archive)
-        {
-            try
-                {
-                    archive(window_size_);
-                    archive(kmer_size_);
-                    archive(max_fpr_);
-                    archive(summary_);
-                    archive(stats_);
-                    archive(ibf_);
-                }
-                    // GCOVR_EXCL_START
-                catch (std::exception const & e)
-                {
-                    PLOG_ERROR << "Cannot read index: " + std::string{e.what()};
-                    exit(1);
-                }
-
-        }
-
-        /* \brief Serialisation support function. Do not load the actual data.
-         * \tparam archive_t Type of `archive`; must satisfy seqan3::cereal_input_archive.
-         * \param[in] archive The archive being serialised from/to.
-         *
-         * \attention These functions are never called directly.
-         * \sa https://docs.seqan.de/seqan/3.2.0/group__io.html#serialisation
-         */
-        template <seqan3::cereal_input_archive archive_t>
-        void load_parameters(archive_t & archive)
-        {
-            try
-                {
-                    archive(window_size_);
-                    archive(kmer_size_);
-                    archive(max_fpr_);
-                    archive(summary_);
-                    archive(stats_);
-                    archive(ibf_);
-                }
-                    // GCOVR_EXCL_START
-                catch (std::exception const & e)
-                {
-                    PLOG_ERROR << "Cannot read index: " + std::string{e.what()};
-                    exit(1);
-                }
+            // GCOVR_EXCL_START
+        catch (std::exception const &e) {
+            PLOG_ERROR << "Cannot read index: " + std::string{e.what()};
+            exit(1);
         }
-        //!\endcond
-    };
+    }
+    //!\endcond
+};
 
 #endif // CHARON_INDEX_H
diff --git a/include/index_arguments.hpp b/include/index_arguments.hpp
index b02d1c8..6b26105 100644
--- a/include/index_arguments.hpp
+++ b/include/index_arguments.hpp
@@ -13,22 +13,21 @@ struct IndexArguments {
     std::string tmp_dir;
 
     // kmer/sketching
-    uint8_t window_size { 41 };
-    uint8_t kmer_size { 19 };
+    uint8_t window_size{41};
+    uint8_t kmer_size{19};
 
     // IBF options
-    mutable size_t bits {std::numeric_limits<uint32_t>::max()-2}; // Allow to change bits for each partition
-    uint8_t num_hash {3};
-    double max_fpr {0.01};
+    mutable size_t bits{std::numeric_limits<uint32_t>::max() - 2}; // Allow to change bits for each partition
+    uint8_t num_hash{3};
+    double max_fpr{0.01};
 
     // General options
-    std::string log_file {"charon.log"};
-    uint8_t threads { 1 };
-    uint8_t verbosity { 0 };
-    bool optimize { false };
+    std::string log_file{"charon.log"};
+    uint8_t threads{1};
+    uint8_t verbosity{0};
+    bool optimize{false};
 
-    std::string to_string()
-    {
+    std::string to_string() {
         std::string ss;
 
         ss += "\n\nIndex Arguments:\n\n";
diff --git a/include/index_main.hpp b/include/index_main.hpp
index 6e99ef8..a323ab6 100644
--- a/include/index_main.hpp
+++ b/include/index_main.hpp
@@ -12,21 +12,25 @@
 #include "index_arguments.hpp"
 
 class Index;
+
 class InputStats;
+
 class InputSummary;
 
 
-void setup_index_subcommand(CLI::App& app);
+void setup_index_subcommand(CLI::App &app);
 
-InputSummary parse_input_file(const std::filesystem::path& input_file);
+InputSummary parse_input_file(const std::filesystem::path &input_file);
 
-InputStats count_and_store_hashes(const IndexArguments& opt, const InputSummary& summary);
+InputStats count_and_store_hashes(const IndexArguments &opt, const InputSummary &summary);
 
-std::unordered_map<uint8_t, std::vector<uint8_t>> optimize_layout(const IndexArguments& opt, InputSummary& summary, InputStats& stats);
+std::unordered_map<uint8_t, std::vector<uint8_t>>
+optimize_layout(const IndexArguments &opt, InputSummary &summary, InputStats &stats);
 
-Index build_index(const IndexArguments& opt, const InputSummary& summary, InputStats& stats, const std::unordered_map<uint8_t, std::vector<uint8_t>>& bucket_to_bins_map);
+Index build_index(const IndexArguments &opt, const InputSummary &summary, InputStats &stats,
+                  const std::unordered_map<uint8_t, std::vector<uint8_t>> &bucket_to_bins_map);
 
-int index_main(IndexArguments & opt);
+int index_main(IndexArguments &opt);
 
 
 #endif // CHARON_INDEX_MAIN_H
diff --git a/include/input_stats.hpp b/include/input_stats.hpp
index a2ee2cb..77147b1 100644
--- a/include/input_stats.hpp
+++ b/include/input_stats.hpp
@@ -12,71 +12,67 @@
 #include <plog/Log.h>
 
 
-struct InputStats
-{
-        //uint8_t num_bins{0};
-        uint32_t num_files{0};
-        std::unordered_map<uint8_t, uint64_t> records_per_bin{};
-        std::unordered_map<uint8_t, uint64_t> hashes_per_bin{};
-
-    public:
-        InputStats() = default;
-        InputStats(InputStats const &) = default;
-        InputStats(InputStats &&) = default;
-        InputStats & operator=(InputStats const &) = default;
-        InputStats & operator=(InputStats &&) = default;
-        ~InputStats() = default;
-
-        std::vector<std::pair<uint8_t, uint64_t> > bins_by_size() const
-        {
-            std::vector<std::pair<uint8_t, uint64_t> > sorted_pairs;
-            for (auto& it : hashes_per_bin) {
-                sorted_pairs.push_back(it);
-            }
-            std::sort(sorted_pairs.begin(), sorted_pairs.end(), [](auto &left, auto &right) {
-                return left.second < right.second;
-            });
-            return sorted_pairs;
-        }
+struct InputStats {
+    //uint8_t num_bins{0};
+    uint32_t num_files{0};
+    std::unordered_map<uint8_t, uint64_t> records_per_bin{};
+    std::unordered_map<uint8_t, uint64_t> hashes_per_bin{};
+
+public:
+    InputStats() = default;
+
+    InputStats(InputStats const &) = default;
+
+    InputStats(InputStats &&) = default;
+
+    InputStats &operator=(InputStats const &) = default;
 
-        uint64_t max_num_hashes()
-        {
-            auto sorted_pairs = bins_by_size();
-            return sorted_pairs.back().second;
+    InputStats &operator=(InputStats &&) = default;
+
+    ~InputStats() = default;
+
+    std::vector<std::pair<uint8_t, uint64_t> > bins_by_size() const {
+        std::vector<std::pair<uint8_t, uint64_t> > sorted_pairs;
+        for (auto &it: hashes_per_bin) {
+            sorted_pairs.push_back(it);
         }
+        std::sort(sorted_pairs.begin(), sorted_pairs.end(), [](auto &left, auto &right) {
+            return left.second < right.second;
+        });
+        return sorted_pairs;
+    }
+
+    uint64_t max_num_hashes() {
+        auto sorted_pairs = bins_by_size();
+        return sorted_pairs.back().second;
+    }
 
-    template <seqan3::cereal_archive archive_t>
-    void CEREAL_SERIALIZE_FUNCTION_NAME(archive_t & archive)
-    {
-        try
-        {
+    template<seqan3::cereal_archive archive_t>
+    void CEREAL_SERIALIZE_FUNCTION_NAME(archive_t &archive) {
+        try {
             //archive(num_bins);
             archive(num_files);
             archive(records_per_bin);
             archive(hashes_per_bin);
         }
             // GCOVR_EXCL_START
-        catch (std::exception const & e)
-        {
+        catch (std::exception const &e) {
             PLOG_ERROR << "Cannot read input_stats: " + std::string{e.what()};
             exit(1);
         }
 
     }
 
-    template <seqan3::cereal_input_archive archive_t>
-    void load_parameters(archive_t & archive)
-    {
-        try
-        {
+    template<seqan3::cereal_input_archive archive_t>
+    void load_parameters(archive_t &archive) {
+        try {
             //archive(num_bins);
             archive(num_files);
             archive(records_per_bin);
             archive(hashes_per_bin);
         }
             // GCOVR_EXCL_START
-        catch (std::exception const & e)
-        {
+        catch (std::exception const &e) {
             PLOG_ERROR << "Cannot read input_stats: " + std::string{e.what()};
             exit(1);
         }
diff --git a/include/input_summary.hpp b/include/input_summary.hpp
index 32901cd..9b1abfd 100644
--- a/include/input_summary.hpp
+++ b/include/input_summary.hpp
@@ -13,87 +13,80 @@
 #include <seqan3/core/concept/cereal.hpp>
 #include <plog/Log.h>
 
-struct InputSummary
-{
+struct InputSummary {
     uint8_t num_bins{0};
     std::vector<std::string> categories;
     std::vector<std::pair<std::string, uint8_t>> filepath_to_bin;
     std::unordered_map<uint8_t, std::string> bin_to_category;
 
-    public:
-        InputSummary() = default;
-        InputSummary(InputSummary const &) = default;
-        InputSummary(InputSummary &&) = default;
-        InputSummary & operator=(InputSummary const &) = default;
-        InputSummary & operator=(InputSummary &&) = default;
-        ~InputSummary() = default;
-
-        uint8_t num_categories() const
-        {
-            return categories.size();
-        }
+public:
+    InputSummary() = default;
 
-        uint8_t category_index(const std::string category) const
-        {
-            for (auto i=0; i<categories.size(); ++i)
-            {
-                if (category == categories.at(i))
-                    return i;
-            }
-            return std::numeric_limits<uint8_t>::max();
-        }
+    InputSummary(InputSummary const &) = default;
 
-        uint8_t host_category_index() const
-        {
-            auto index1 = category_index("human");
-            auto index2 = category_index("host");
-            auto index = std::min(index1, index2);
-            if (index == std::numeric_limits<uint8_t>::max())
-                PLOG_ERROR << "Neither 'human' nor 'host' appear as categories in the index";
-            assert(index != std::numeric_limits<uint8_t>::max());
-            return index;
-        }
+    InputSummary(InputSummary &&) = default;
+
+    InputSummary &operator=(InputSummary const &) = default;
 
-        std::string category_name(const uint8_t index) const
-        {
-            if (index > categories.size())
-                return "";
-            else
-                return categories.at(index);
+    InputSummary &operator=(InputSummary &&) = default;
+
+    ~InputSummary() = default;
+
+    uint8_t num_categories() const {
+        return categories.size();
+    }
+
+    uint8_t category_index(const std::string category) const {
+        for (auto i = 0; i < categories.size(); ++i) {
+            if (category == categories.at(i))
+                return i;
         }
+        return std::numeric_limits<uint8_t>::max();
+    }
+
+    uint8_t host_category_index() const {
+        auto index1 = category_index("human");
+        auto index2 = category_index("host");
+        auto index = std::min(index1, index2);
+        if (index == std::numeric_limits<uint8_t>::max())
+            PLOG_ERROR << "Neither 'human' nor 'host' appear as categories in the index";
+        assert(index != std::numeric_limits<uint8_t>::max());
+        return index;
+    }
+
+    std::string category_name(const uint8_t index) const {
+        if (index > categories.size())
+            return "";
+        else
+            return categories.at(index);
+    }
 
-    template <seqan3::cereal_archive archive_t>
-    void CEREAL_SERIALIZE_FUNCTION_NAME(archive_t & archive)
-    {
-        try
-        {
+    template<seqan3::cereal_archive archive_t>
+    void CEREAL_SERIALIZE_FUNCTION_NAME(archive_t &archive) {
+        try {
             archive(num_bins);
             archive(categories);
             archive(filepath_to_bin);
             archive(bin_to_category);
         }
             // GCOVR_EXCL_START
-        catch (std::exception const & e)
-        {
+        catch (std::exception const &e) {
             PLOG_ERROR << "Cannot read input_summary: " + std::string{e.what()};
             exit(1);
         }
 
     }
 
-    template <seqan3::cereal_input_archive archive_t>
-    void load_parameters(archive_t & archive)
-    {
-        try
-        {
+    template<seqan3::cereal_input_archive archive_t>
+    void load_parameters(archive_t &archive) {
+        try {
             archive(num_bins);
             archive(categories);
             archive(filepath_to_bin);
             archive(bin_to_category);
         }
             // GCOVR_EXCL_START
-        catch (std::exception const & e)
-        {
+        catch (std::exception const &e) {
             PLOG_ERROR << "Cannot read input_summary: " + std::string{e.what()};
             exit(1);
         }
diff --git a/include/load_index.hpp b/include/load_index.hpp
index 0b9ad0d..91f75a4 100644
--- a/include/load_index.hpp
+++ b/include/load_index.hpp
@@ -6,6 +6,6 @@
 #include <filesystem>
 #include <index.hpp>
 
-void load_index(Index & index, std::filesystem::path const & path);
+void load_index(Index &index, std::filesystem::path const &path);
 
 #endif // CHARON_LOAD_INDEX_MAIN_H
\ No newline at end of file
diff --git a/include/result.hpp b/include/result.hpp
index e6661af..39f4a6a 100644
--- a/include/result.hpp
+++ b/include/result.hpp
@@ -15,15 +15,13 @@
 #include "input_summary.hpp"
 #include "classify_stats.hpp"
 
-struct ResultSummary
-{
+struct ResultSummary {
     std::vector<uint64_t> classified_counts;
     uint64_t unclassified_count{0};
     uint64_t extracted_count{0};
 
-    ResultSummary(const uint8_t size):
-            classified_counts(size,0)
-    {};
+    ResultSummary(const uint8_t size) :
+            classified_counts(size, 0) {};
 };
 
 template<class record_type>
@@ -35,195 +33,185 @@ struct ReadRecord {
 };
 
 template<class record_type, class outfile_field_ids, class outfile_format>
-class Result
-{
-    private:
-        InputSummary input_summary_;
-        ResultSummary result_summary_;
-        StatsModel stats_model_;
-        std::vector<ReadRecord<record_type>> cached_reads_;
-
-        bool run_extract_;
-        std::unordered_map<uint8_t, std::vector<seqan3::sequence_file_output<outfile_field_ids, outfile_format>>> extract_handles_;
-
-    public:
-        Result() = default;
-        Result(Result const &) = default;
-        Result(Result &&) = default;
-        Result & operator=(Result const &) = default;
-        Result & operator=(Result &&) = default;
-        ~Result() = default;
-
-        Result(const ClassifyArguments& opt, const InputSummary & summary):
-                input_summary_{summary},
-                result_summary_(summary.num_categories()),
-                run_extract_(opt.run_extract)
-        {
-            stats_model_ = StatsModel(opt, summary);
-            if (opt.run_extract){
-                for (const auto [category_index,extract_files] : opt.extract_category_to_file) {
-                    for (const auto extract_file : extract_files)
-                        extract_handles_[category_index].push_back(seqan3::sequence_file_output{extract_file});
-                    cached_reads_.reserve(opt.num_reads_to_fit*summary.num_categories()*4);
-                }
-            }
+class Result {
+private:
+    InputSummary input_summary_;
+    ResultSummary result_summary_;
+    StatsModel stats_model_;
+    std::vector<ReadRecord<record_type>> cached_reads_;
 
+    bool run_extract_;
+    std::unordered_map<uint8_t, std::vector<seqan3::sequence_file_output<outfile_field_ids, outfile_format>>> extract_handles_;
 
-        };
+public:
+    Result() = default;
 
-        Result(const DehostArguments& opt, const InputSummary & summary):
-                input_summary_{summary},
-                result_summary_(summary.num_categories()),
-                run_extract_(opt.run_extract)
-        {
-            stats_model_ = StatsModel(opt, summary);
-            if (opt.run_extract){
-                for (const auto [category_index,extract_files] : opt.extract_category_to_file) {
-                    for (const auto extract_file : extract_files)
-                        extract_handles_[category_index].push_back(seqan3::sequence_file_output{extract_file});
-                    cached_reads_.reserve(opt.num_reads_to_fit*summary.num_categories()*4);
-                }
-            }
-        };
+    Result(Result const &) = default;
 
-        const InputSummary& input_summary() const
-        {
-            return input_summary_;
+    Result(Result &&) = default;
+
+    Result &operator=(Result const &) = default;
+
+    Result &operator=(Result &&) = default;
+
+    ~Result() = default;
+
+    Result(const ClassifyArguments &opt, const InputSummary &summary) :
+            input_summary_{summary},
+            result_summary_(summary.num_categories()),
+            run_extract_(opt.run_extract) {
+        stats_model_ = StatsModel(opt, summary);
+        if (opt.run_extract) {
+            for (const auto [category_index, extract_files]: opt.extract_category_to_file) {
+                for (const auto extract_file: extract_files)
+                    extract_handles_[category_index].push_back(seqan3::sequence_file_output{extract_file});
+                cached_reads_.reserve(opt.num_reads_to_fit * summary.num_categories() * 4);
+            }
         }
 
-        const uint8_t category_index(const std::string& category) const
-        {
-            return input_summary_.category_index(category);
+
+    };
+
+    Result(const DehostArguments &opt, const InputSummary &summary) :
+            input_summary_{summary},
+            result_summary_(summary.num_categories()),
+            run_extract_(opt.run_extract) {
+        stats_model_ = StatsModel(opt, summary);
+        if (opt.run_extract) {
+            for (const auto [category_index, extract_files]: opt.extract_category_to_file) {
+                for (const auto extract_file: extract_files)
+                    extract_handles_[category_index].push_back(seqan3::sequence_file_output{extract_file});
+                cached_reads_.reserve(opt.num_reads_to_fit * summary.num_categories() * 4);
+            }
         }
+    };
 
-        uint8_t classify_read(ReadEntry& read_entry, const bool dehost=false)
-        {
-            PLOG_VERBOSE << "Classify read " << read_entry.read_id();
-            if (dehost)
-                read_entry.dehost(stats_model_, input_summary_.host_category_index());
-            else
-                read_entry.classify(stats_model_);
+    const InputSummary &input_summary() const {
+        return input_summary_;
+    }
+
+    const uint8_t category_index(const std::string &category) const {
+        return input_summary_.category_index(category);
+    }
+
+    uint8_t classify_read(ReadEntry &read_entry, const bool dehost = false) {
+        PLOG_VERBOSE << "Classify read " << read_entry.read_id();
+        if (dehost)
+            read_entry.dehost(stats_model_, input_summary_.host_category_index());
+        else
+            read_entry.classify(stats_model_);
 #pragma omp critical(print_result)
-            read_entry.print_assignment_result(input_summary_);
+        read_entry.print_assignment_result(input_summary_);
 #pragma omp critical(update_result_count)
-            {
-                const auto &call = read_entry.call();
-                if (call < std::numeric_limits<uint8_t>::max())
-                {
-                    result_summary_.classified_counts[call] += 1;
-                } else {
-                    result_summary_.unclassified_count += 1;
-                }
+        {
+            const auto &call = read_entry.call();
+            if (call < std::numeric_limits<uint8_t>::max()) {
+                result_summary_.classified_counts[call] += 1;
+            } else {
+                result_summary_.unclassified_count += 1;
             }
-            return read_entry.call();
-
         }
+        return read_entry.call();
 
-        void extract_read(const uint8_t category_index, const record_type& record)
-        {
+    }
+
+    void extract_read(const uint8_t category_index, const record_type &record) {
 #pragma omp critical(extract_read)
-            extract_handles_[category_index][0].push_back(record);
-        }
+        extract_handles_[category_index][0].push_back(record);
+    }
 
-        void extract_paired_read(const uint8_t category_index, const record_type& record, const record_type& record2)
-        {
+    void extract_paired_read(const uint8_t category_index, const record_type &record, const record_type &record2) {
 #pragma omp critical(extract_read)
-            extract_handles_[category_index][0].push_back(record);
+        extract_handles_[category_index][0].push_back(record);
 #pragma omp critical(extract_read2)
-            extract_handles_[category_index][1].push_back(record2);
-        }
-
-        void add_read(ReadEntry& read_entry, const record_type& record, bool dehost=false){
-            if (stats_model_.ready()) {
-                auto category_index = classify_read(read_entry, dehost);
-                if (run_extract_ and extract_handles_.find(category_index) != extract_handles_.end()){
-                    extract_read(category_index, record);
-                }
-            } else {
-                PLOG_VERBOSE << "Add read " << read_entry.read_id() << " to training ";
+        extract_handles_[category_index][1].push_back(record2);
+    }
+
+    void add_read(ReadEntry &read_entry, const record_type &record, bool dehost = false) {
+        if (stats_model_.ready()) {
+            auto category_index = classify_read(read_entry, dehost);
+            if (run_extract_ and extract_handles_.find(category_index) != extract_handles_.end()) {
+                extract_read(category_index, record);
+            }
+        } else {
+            PLOG_VERBOSE << "Add read " << read_entry.read_id() << " to training ";
 #pragma omp critical(add_to_cache)
-                {
-                    bool training_complete = false;
-                    if (cached_reads_.size() < cached_reads_.capacity())
-                    {
-                        cached_reads_.emplace_back(ReadRecord(false, read_entry, record, record));
-                        training_complete = stats_model_.add_read_to_training_data(read_entry.unique_proportions());
-                    } else {
-                        stats_model_.force_ready();
-                        training_complete = true;
-                    }
-
-                    if (training_complete)
-                        classify_cache(dehost);
+            {
+                bool training_complete = false;
+                if (cached_reads_.size() < cached_reads_.capacity()) {
+                    cached_reads_.emplace_back(ReadRecord(false, read_entry, record, record));
+                    training_complete = stats_model_.add_read_to_training_data(read_entry.unique_proportions());
+                } else {
+                    stats_model_.force_ready();
+                    training_complete = true;
                 }
+
+                if (training_complete)
+                    classify_cache(dehost);
             }
         }
-
-        void add_paired_read(ReadEntry& read_entry, const record_type& record, const record_type& record2, const bool dehost=false){
-            if (stats_model_.ready()) {
-                auto category_index = classify_read(read_entry, dehost);
-                if (run_extract_ and extract_handles_.find(category_index) != extract_handles_.end()){
-                    extract_paired_read(category_index, record, record2);
-                }
-            } else {
-                PLOG_VERBOSE << "Add read " << read_entry.read_id() << " to training ";
+    }
+
+    void add_paired_read(ReadEntry &read_entry, const record_type &record, const record_type &record2,
+                         const bool dehost = false) {
+        if (stats_model_.ready()) {
+            auto category_index = classify_read(read_entry, dehost);
+            if (run_extract_ and extract_handles_.find(category_index) != extract_handles_.end()) {
+                extract_paired_read(category_index, record, record2);
+            }
+        } else {
+            PLOG_VERBOSE << "Add read " << read_entry.read_id() << " to training ";
 #pragma omp critical(add_to_cache)
-                {
-                    bool training_complete = false;
-                    if (cached_reads_.size() < cached_reads_.capacity())
-                    {
-                        cached_reads_.emplace_back(ReadRecord(true, read_entry, record, record2));
-                        training_complete = stats_model_.add_read_to_training_data(read_entry.unique_proportions());
-                    } else {
-                        stats_model_.force_ready();
-                        training_complete = true;
-                    }
-
-                    if (training_complete)
-                        classify_cache(dehost);
+            {
+                bool training_complete = false;
+                if (cached_reads_.size() < cached_reads_.capacity()) {
+                    cached_reads_.emplace_back(ReadRecord(true, read_entry, record, record2));
+                    training_complete = stats_model_.add_read_to_training_data(read_entry.unique_proportions());
+                } else {
+                    stats_model_.force_ready();
+                    training_complete = true;
                 }
+
+                if (training_complete)
+                    classify_cache(dehost);
             }
         }
-
-        void classify_cache(const bool dehost=false)
-        {
-            PLOG_VERBOSE << "Classify cached reads";
-
-            for (auto read_record : cached_reads_)
-            {
-                auto & read_entry = read_record.read;
-                const auto & record = read_record.record;
-                auto category_index = classify_read(read_entry, dehost);
-                if (run_extract_ and extract_handles_.find(category_index) != extract_handles_.end()){
-                    if (read_record.is_paired)
-                    {
-                        const auto & record2 = read_record.record2;
-                        extract_paired_read(category_index, record, record2);
-                    } else {
-                        extract_read(category_index, record);
-                    }
+    }
+
+    void classify_cache(const bool dehost = false) {
+        PLOG_VERBOSE << "Classify cached reads";
+
+        for (auto read_record: cached_reads_) {
+            auto &read_entry = read_record.read;
+            const auto &record = read_record.record;
+            auto category_index = classify_read(read_entry, dehost);
+            if (run_extract_ and extract_handles_.find(category_index) != extract_handles_.end()) {
+                if (read_record.is_paired) {
+                    const auto &record2 = read_record.record2;
+                    extract_paired_read(category_index, record, record2);
+                } else {
+                    extract_read(category_index, record);
                 }
             }
-            cached_reads_.resize(0);
-        }
-
-        void complete(const bool dehost=false)
-        {
-            classify_cache(dehost);
         }
+        cached_reads_.resize(0);
+    }
 
+    void complete(const bool dehost = false) {
+        classify_cache(dehost);
+    }
 
 
-        void print_summary() const {
+    void print_summary() const {
 
-            PLOG_INFO << "Results summary: ";
-            for (auto i = 0; i < result_summary_.classified_counts.size(); i++) {
-                const auto & category = input_summary_.categories.at(i);
-                PLOG_INFO << category << " :\t\t" << result_summary_.classified_counts.at(i);
-            }
-            PLOG_INFO << "unclassified :\t" << result_summary_.unclassified_count;
+        PLOG_INFO << "Results summary: ";
+        for (auto i = 0; i < result_summary_.classified_counts.size(); i++) {
+            const auto &category = input_summary_.categories.at(i);
+            PLOG_INFO << category << " :\t\t" << result_summary_.classified_counts.at(i);
         }
-    };
+        PLOG_INFO << "unclassified :\t" << result_summary_.unclassified_count;
+    }
+};
 
 
 #endif // CHARON_RESULT_H
diff --git a/include/store_index.hpp b/include/store_index.hpp
index 9ffa2ce..e6f2f4c 100644
--- a/include/store_index.hpp
+++ b/include/store_index.hpp
@@ -9,8 +9,7 @@
 
 #include <index.hpp>
 
-static inline void store_index(std::filesystem::path const & path, Index && index)
-{
+static inline void store_index(std::filesystem::path const &path, Index &&index) {
     PLOG_INFO << "Saving index to file " << path;
     std::ofstream os{path, std::ios::binary};
     cereal::BinaryOutputArchive oarchive{os};
diff --git a/include/utils.hpp b/include/utils.hpp
index 4fdf67a..a48ab4b 100644
--- a/include/utils.hpp
+++ b/include/utils.hpp
@@ -14,33 +14,37 @@
 
 class IndexArguments;
 
-struct my_traits : seqan3::sequence_file_input_default_traits_dna
-{
+struct my_traits : seqan3::sequence_file_input_default_traits_dna {
     using quality_alphabet = seqan3::phred94;
 };
 
 // used to transform paths to absolute paths - designed to be used with CLI11 transform
 std::filesystem::path make_absolute(std::filesystem::path);
 
-std::vector<std::string> split(const std::string&, const std::string&);
+std::vector<std::string> split(const std::string &, const std::string &);
 
 bool ends_with(std::string str, std::string suffix);
+
 bool starts_with(std::string str, std::string prefix);
 
-void store_hashes( const std::string target,
-                   const std::unordered_set< uint64_t >& hashes,
-                   const std::string tmp_output_folder );
-std::vector< uint64_t > load_hashes( const std::string target,
-                                     const std::string tmp_output_folder );
-void delete_hashes( const std::vector<uint8_t>& targets, const std::string tmp_output_folder );
+void store_hashes(const std::string target,
+                  const std::unordered_set<uint64_t> &hashes,
+                  const std::string tmp_output_folder);
+
+std::vector<uint64_t> load_hashes(const std::string target,
+                                  const std::string tmp_output_folder);
 
-size_t bin_size_in_bits(const IndexArguments & opt, const uint64_t & num_elements);
+void delete_hashes(const std::vector<uint8_t> &targets, const std::string tmp_output_folder);
 
-size_t max_num_hashes_for_fpr(const IndexArguments & opt);
+size_t bin_size_in_bits(const IndexArguments &opt, const uint64_t &num_elements);
 
-std::string sequence_to_string(const __type_pack_element<0, std::vector<seqan3::dna5>, std::string, std::vector<seqan3::phred94>>& input);
+size_t max_num_hashes_for_fpr(const IndexArguments &opt);
 
-float get_compression_ratio(const std::string& sequence);
+std::string sequence_to_string(
+        const __type_pack_element<0, std::vector<seqan3::dna5>, std::string, std::vector<seqan3::phred94>> &input);
+
+float get_compression_ratio(const std::string &sequence);
 
 std::string get_extension(const std::filesystem::path);
+
 #endif
diff --git a/src/dehost_main.cpp b/src/dehost_main.cpp
index 2cd6b75..0931157 100644
--- a/src/dehost_main.cpp
+++ b/src/dehost_main.cpp
@@ -20,202 +20,201 @@
 #include <seqan3/utility/range/concept.hpp>
 #include <seqan3/search/dream_index/interleaved_bloom_filter.hpp>
 
-const std::vector<float> default_pos_data = {0.603448, 0.480363, 0.411268, 0.444444, 0.392523, 0.31383 ,
-                                             0.281447, 0.53539 , 0.553792, 0.537906, 0.273994, 0.693023,
+const std::vector<float> default_pos_data = {0.603448, 0.480363, 0.411268, 0.444444, 0.392523, 0.31383,
+                                             0.281447, 0.53539, 0.553792, 0.537906, 0.273994, 0.693023,
                                              0.680751, 0.428571, 0.254005, 0.696133, 0.251462, 0.594203,
                                              0.361582, 0.515789, 0.724771, 0.312821, 0.457627, 0.728614,
                                              0.573222, 0.440476, 0.381633, 0.234127, 0.673988, 0.368421,
                                              0.548387, 0.304688, 0.365348, 0.280899, 0.482315, 0.445283,
-                                             0.449541, 0.59589 , 0.671362, 0.59854 , 0.445415, 0.394265,
-                                             0.525597, 0.201365, 0.702703, 0.47505 , 0.509174, 0.692308,
+                                             0.449541, 0.59589, 0.671362, 0.59854, 0.445415, 0.394265,
+                                             0.525597, 0.201365, 0.702703, 0.47505, 0.509174, 0.692308,
                                              0.230573, 0.553616, 0.653846, 0.690476, 0.432203, 0.674377,
                                              0.298387, 0.626712, 0.589347, 0.545045, 0.624679, 0.240658,
                                              0.254003, 0.746667, 0.357143, 0.606909, 0.344558, 0.409836,
-                                             0.266998, 0.588235, 0.537815, 0.50641 , 0.371951, 0.415929,
-                                             0.689266, 0.309735, 0.705645, 0.549296, 0.80754 , 0.398979,
-                                             0.459941, 0.491647, 0.292453, 0.43871 , 0.649254, 0.309154,
+                                             0.266998, 0.588235, 0.537815, 0.50641, 0.371951, 0.415929,
+                                             0.689266, 0.309735, 0.705645, 0.549296, 0.80754, 0.398979,
+                                             0.459941, 0.491647, 0.292453, 0.43871, 0.649254, 0.309154,
                                              0.617021, 0.601671, 0.383721, 0.628571, 0.487936, 0.440367,
                                              0.382465, 0.660297, 0.742489, 0.737143, 0.282759, 0.490196,
                                              0.247191, 0.690994, 0.731602, 0.666667, 0.535714, 0.322368,
                                              0.694389, 0.273333, 0.783972, 0.307692, 0.284091, 0.661333,
                                              0.366492, 0.733728, 0.352113, 0.803922, 0.269911, 0.366599,
                                              0.642197, 0.662921, 0.294118, 0.234146, 0.345309, 0.364407,
-                                             0.352941, 0.288462, 0.547619, 0.712121, 0.53211 , 0.510823,
+                                             0.352941, 0.288462, 0.547619, 0.712121, 0.53211, 0.510823,
                                              0.631579, 0.490132, 0.651007, 0.653846, 0.773931, 0.550265,
-                                             0.56044 , 0.45977 , 0.445743, 0.643761, 0.232422, 0.564171,
-                                             0.443966, 0.638356, 0.314516, 0.276398, 0.51746 , 0.745342,
+                                             0.56044, 0.45977, 0.445743, 0.643761, 0.232422, 0.564171,
+                                             0.443966, 0.638356, 0.314516, 0.276398, 0.51746, 0.745342,
                                              0.366093, 0.471049, 0.205198, 0.643423, 0.581197, 0.453686,
                                              0.587719, 0.305322, 0.473118, 0.582593, 0.595486, 0.573822,
-                                             0.370192, 0.681661, 0.513514, 0.289655, 0.349364, 0.35873 ,
-                                             0.571429, 0.28125 , 0.785425, 0.689055, 0.675   , 0.255814,
+                                             0.370192, 0.681661, 0.513514, 0.289655, 0.349364, 0.35873,
+                                             0.571429, 0.28125, 0.785425, 0.689055, 0.675, 0.255814,
                                              0.542553, 0.333333, 0.544643, 0.668831, 0.534483, 0.237838,
-                                             0.372308, 0.548387, 0.227818, 0.33101 , 0.640931, 0.55814 ,
-                                             0.355993, 0.643443, 0.333333, 0.323288, 0.634146, 0.352   ,
+                                             0.372308, 0.548387, 0.227818, 0.33101, 0.640931, 0.55814,
+                                             0.355993, 0.643443, 0.333333, 0.323288, 0.634146, 0.352,
                                              0.243902, 0.333333, 0.663968, 0.246212, 0.540323, 0.565217,
-                                             0.715847, 0.655431, 0.707736, 0.407792, 0.52924 , 0.511292,
-                                             0.281195, 0.2875  , 0.323296, 0.230392, 0.262222, 0.371245,
-                                             0.206025, 0.438525, 0.660377, 0.30426 , 0.261044, 0.360902,
-                                             0.34    , 0.695   , 0.455462, 0.621891, 0.41003 , 0.349481,
+                                             0.715847, 0.655431, 0.707736, 0.407792, 0.52924, 0.511292,
+                                             0.281195, 0.2875, 0.323296, 0.230392, 0.262222, 0.371245,
+                                             0.206025, 0.438525, 0.660377, 0.30426, 0.261044, 0.360902,
+                                             0.34, 0.695, 0.455462, 0.621891, 0.41003, 0.349481,
                                              0.656863, 0.482143, 0.379032, 0.389474, 0.658451, 0.295181,
-                                             0.745614, 0.23622 , 0.520408, 0.570213, 0.476923, 0.336864,
+                                             0.745614, 0.23622, 0.520408, 0.570213, 0.476923, 0.336864,
                                              0.431227, 0.389381, 0.368601, 0.489879, 0.228682, 0.413636,
-                                             0.511905, 0.34375 , 0.688235, 0.717131, 0.340996, 0.521739,
+                                             0.511905, 0.34375, 0.688235, 0.717131, 0.340996, 0.521739,
                                              0.302752, 0.724551, 0.247059, 0.634551, 0.375204, 0.585271,
-                                             0.488372, 0.707071, 0.698305, 0.384615, 0.620743, 0.43738 ,
-                                             0.8     , 0.275   , 0.434783, 0.335079, 0.28789 , 0.242915,
+                                             0.488372, 0.707071, 0.698305, 0.384615, 0.620743, 0.43738,
+                                             0.8, 0.275, 0.434783, 0.335079, 0.28789, 0.242915,
                                              0.657588, 0.664134, 0.255172, 0.636364, 0.349593, 0.849057,
                                              0.371681, 0.535032, 0.383721, 0.597403, 0.694268, 0.588235,
-                                             0.481013, 0.555556, 0.5     , 0.37931 , 0.259843, 0.681259,
-                                             0.545455, 0.550725, 0.338583, 0.3085  , 0.437318, 0.540123,
-                                             0.464481, 0.597598, 0.481928, 0.488889, 0.565543, 0.54549 ,
-                                             0.428571, 0.34891 , 0.62069 , 0.656428, 0.480392, 0.738295,
-                                             0.475   , 0.409574, 0.5625  , 0.582677, 0.640572, 0.307263,
-                                             0.414634, 0.641566, 0.486188, 0.679245, 0.566845, 0.53528 ,
-                                             0.48318 , 0.422096, 0.362694, 0.261866, 0.207268, 0.756184,
+                                             0.481013, 0.555556, 0.5, 0.37931, 0.259843, 0.681259,
+                                             0.545455, 0.550725, 0.338583, 0.3085, 0.437318, 0.540123,
+                                             0.464481, 0.597598, 0.481928, 0.488889, 0.565543, 0.54549,
+                                             0.428571, 0.34891, 0.62069, 0.656428, 0.480392, 0.738295,
+                                             0.475, 0.409574, 0.5625, 0.582677, 0.640572, 0.307263,
+                                             0.414634, 0.641566, 0.486188, 0.679245, 0.566845, 0.53528,
+                                             0.48318, 0.422096, 0.362694, 0.261866, 0.207268, 0.756184,
                                              0.431373, 0.781818, 0.479079, 0.505464, 0.318792, 0.209524,
-                                             0.55268 , 0.666667, 0.365482, 0.745455, 0.662791, 0.670455,
-                                             0.611511, 0.562197, 0.737557, 0.315315, 0.520661, 0.35514 ,
-                                             0.476489, 0.310502, 0.511416, 0.642857, 0.378277, 0.44898 ,
+                                             0.55268, 0.666667, 0.365482, 0.745455, 0.662791, 0.670455,
+                                             0.611511, 0.562197, 0.737557, 0.315315, 0.520661, 0.35514,
+                                             0.476489, 0.310502, 0.511416, 0.642857, 0.378277, 0.44898,
                                              0.665635, 0.494518, 0.487179, 0.614568, 0.528736, 0.405263,
                                              0.275556, 0.504808, 0.264591, 0.518817, 0.587156, 0.652278,
-                                             0.551429, 0.497797, 0.54646 , 0.546392, 0.409938, 0.447531,
-                                             0.438247, 0.363636, 0.637631, 0.57037 , 0.276042, 0.554479,
-                                             0.56    , 0.367521, 0.348039, 0.259259, 0.210818, 0.485477,
-                                             0.285714, 0.39801 , 0.546921, 0.603306, 0.521739, 0.409812,
-                                             0.509729, 0.349593, 0.54023 , 0.487805, 0.516468, 0.606383,
+                                             0.551429, 0.497797, 0.54646, 0.546392, 0.409938, 0.447531,
+                                             0.438247, 0.363636, 0.637631, 0.57037, 0.276042, 0.554479,
+                                             0.56, 0.367521, 0.348039, 0.259259, 0.210818, 0.485477,
+                                             0.285714, 0.39801, 0.546921, 0.603306, 0.521739, 0.409812,
+                                             0.509729, 0.349593, 0.54023, 0.487805, 0.516468, 0.606383,
                                              0.746606, 0.549133, 0.394265, 0.776722, 0.380762, 0.661348,
                                              0.493056, 0.293147, 0.566265, 0.241573, 0.257956, 0.577519,
-                                             0.563506, 0.694981, 0.550725, 0.48169 , 0.654391, 0.505208,
+                                             0.563506, 0.694981, 0.550725, 0.48169, 0.654391, 0.505208,
                                              0.241158, 0.530488, 0.223785, 0.689655, 0.507246, 0.421731,
                                              0.449541, 0.651007, 0.272727, 0.642412, 0.398098, 0.281588,
                                              0.429379, 0.647059, 0.716667, 0.668966, 0.727811, 0.608355,
-                                             0.63785 , 0.428571, 0.647376, 0.549223, 0.409091, 0.479365,
-                                             0.317333, 0.49373 , 0.706186, 0.357287, 0.534884, 0.603306,
+                                             0.63785, 0.428571, 0.647376, 0.549223, 0.409091, 0.479365,
+                                             0.317333, 0.49373, 0.706186, 0.357287, 0.534884, 0.603306,
                                              0.630435, 0.506944, 0.653846, 0.385714, 0.362126, 0.439252,
-                                             0.61    , 0.367742, 0.474638, 0.37247 , 0.819549, 0.65762 ,
-                                             0.66416 , 0.479532, 0.532338, 0.379562, 0.205524, 0.706564,
-                                             0.466667, 0.555556, 0.70801 , 0.719397, 0.333333, 0.791667,
+                                             0.61, 0.367742, 0.474638, 0.37247, 0.819549, 0.65762,
+                                             0.66416, 0.479532, 0.532338, 0.379562, 0.205524, 0.706564,
+                                             0.466667, 0.555556, 0.70801, 0.719397, 0.333333, 0.791667,
                                              0.831707, 0.537081, 0.434343, 0.349398, 0.367537, 0.371939,
-                                             0.78392 , 0.343612, 0.541667, 0.631016, 0.390625, 0.7     ,
-                                             0.566524, 0.305   , 0.472868, 0.693182, 0.413534, 0.653696,
+                                             0.78392, 0.343612, 0.541667, 0.631016, 0.390625, 0.7,
+                                             0.566524, 0.305, 0.472868, 0.693182, 0.413534, 0.653696,
                                              0.220264, 0.397959, 0.371069, 0.214674, 0.495114, 0.719298,
-                                             0.351955, 0.213483, 0.289796, 0.515464, 0.30599 , 0.574257,
+                                             0.351955, 0.213483, 0.289796, 0.515464, 0.30599, 0.574257,
                                              0.617883, 0.376226, 0.408582, 0.339426, 0.671815, 0.333333,
                                              0.364179, 0.541126, 0.633188, 0.506045, 0.702512, 0.355014,
-                                             0.494845, 0.66    , 0.55814 , 0.375   , 0.238889, 0.594444,
-                                             0.597701, 0.37234 };
-const std::vector<float> default_neg_data = {0.029661  , 0.00869565, 0.0830769 , 0.0914513 , 0.00950119,
-                                             0.144928  , 0.0286494 , 0.0164141 , 0.0425532 , 0.032345  ,
-                                             0.0186514 , 0.0281899 , 0.        , 0.0379651 , 0.0427928 ,
-                                             0.0571696 , 0.0196319 , 0.00830484, 0.0254545 , 0.0104167 ,
-                                             0.0192781 , 0.0434917 , 0.0165426 , 0.0256917 , 0.0288875 ,
-                                             0.0231214 , 0.0139555 , 0.0181997 , 0.0180437 , 0.0222222 ,
-                                             0.0177083 , 0.0252525 , 0.0381125 , 0.0156128 , 0.048603  ,
-                                             0.0373519 , 0.0379747 , 0.0123748 , 0.0380165 , 0.042953  ,
-                                             0.0284091 , 0.0450205 , 0.014876  , 0.0477674 , 0.0108873 ,
-                                             0.0485075 , 0.0283688 , 0.0397799 , 0.037058  , 0.0195178 ,
-                                             0.0285344 , 0.032835  , 0.0380864 , 0.0271024 , 0.        ,
-                                             0.0509554 , 0.0440744 , 0.0226349 , 0.0388787 , 0.0307692 ,
-                                             0.0197628 , 0.0183486 , 0.0220588 , 0.0194175 , 0.00157233,
-                                             0.0150637 , 0.0372737 , 0.0365034 , 0.0361446 , 0.0120919 ,
-                                             0.0185449 , 0.0505495 , 0.0140728 , 0.039557  , 0.0632184 ,
-                                             0.0312891 , 0.0134626 , 0.0209111 , 0.0341297 , 0.0286807 ,
-                                             0.0166852 , 0.0230701 , 0.0101138 , 0.0255102 , 0.0425532 ,
-                                             0.0276442 , 0.0402415 , 0.0368196 , 0.0164688 , 0.0488798 ,
-                                             0.0283768 , 0.0268714 , 0.0496157 , 0.0496806 , 0.0158311 ,
-                                             0.0191657 , 0.0465511 , 0.0460564 , 0.0228311 , 0.0272793 ,
-                                             0.0475241 , 0.0167959 , 0.0221843 , 0.0322767 , 0.0318091 ,
-                                             0.0171265 , 0.0280222 , 0.0237781 , 0.0108481 , 0.0288136 ,
-                                             0.0408627 , 0.0212766 , 0.0322148 , 0.0272989 , 0.0347826 ,
-                                             0.00402901, 0.0259009 , 0.0351459 , 0.00761905, 0.0261438 ,
-                                             0.0463215 , 0.0265372 , 0.0582691 , 0.0325203 , 0.0304878 ,
-                                             0.0283688 , 0.0220264 , 0.00813638, 0.0259434 , 0.0219928 ,
-                                             0.0142292 , 0.0324519 , 0.0206349 , 0.0231729 , 0.0179541 ,
-                                             0.0447154 , 0.0357436 , 0.0221402 , 0.0304124 , 0.0559384 ,
-                                             0.00854701, 0.0185567 , 0.0107759 , 0.016129  , 0.0297824 ,
-                                             0.0211216 , 0.0352645 , 0.046285  , 0.0254181 , 0.0440367 ,
-                                             0.0274964 , 0.0229277 , 0.0542232 , 0.00228833, 0.0342029 ,
-                                             0.0397839 , 0.0209068 , 0.0293501 , 0.0262069 , 0.0157853 ,
-                                             0.0365854 , 0.026362  , 0.0125953 , 0.0194384 , 0.0250284 ,
-                                             0.0417973 , 0.0236829 , 0.00610583, 0.0164331 , 0.026455  ,
-                                             0.047619  , 0.0228571 , 0.0458221 , 0.0965909 , 0.0348162 ,
-                                             0.0821918 , 0.0198265 , 0.026455  , 0.0153846 , 0.0615977 ,
-                                             0.0296296 , 0.034555  , 0.0374805 , 0.0253921 , 0.0186992 ,
-                                             0.0233236 , 0.0119363 , 0.0366881 , 0.00961538, 0.0453846 ,
-                                             0.0459094 , 0.0391798 , 0.0248075 , 0.0330113 , 0.00591716,
-                                             0.0397306 , 0.0323607 , 0.0278035 , 0.0193237 , 0.0242152 ,
-                                             0.00440529, 0.0294826 , 0.0188679 , 0.0543933 , 0.0327363 ,
-                                             0.0365233 , 0.0515856 , 0.042086  , 0.012907  , 0.0109664 ,
-                                             0.0351605 , 0.0251852 , 0.0494148 , 0.0401606 , 0.        ,
-                                             0.0369686 , 0.0411622 , 0.0257143 , 0.0416667 , 0.0400943 ,
-                                             0.0409836 , 0.00296296, 0.0714286 , 0.0270856 , 0.0117521 ,
-                                             0.0174472 , 0.0354167 , 0.0237056 , 0.0292683 , 0.0253723 ,
-                                             0.025     , 0.0163043 , 0.0229983 , 0.0418502 , 0.0259179 ,
-                                             0.0386047 , 0.0355295 , 0.0451613 , 0.0353982 , 0.0310711 ,
-                                             0.0250344 , 0.0125604 , 0.0237691 , 0.0406835 , 0.0524476 ,
-                                             0.0307692 , 0.0248619 , 0.0264085 , 0.03003   , 0.0219273 ,
-                                             0.0214827 , 0.039629  , 0.0393462 , 0.0207164 , 0.0193896 ,
-                                             0.0312947 , 0.0485252 , 0.0265004 , 0.0298322 , 0.0178571 ,
-                                             0.0333333 , 0.00632911, 0.0328767 , 0.0355556 , 0.0331825 ,
-                                             0.0197568 , 0.0413793 , 0.0215983 , 0.0182584 , 0.0211161 ,
-                                             0.0290276 , 0.00200803, 0.035284  , 0.0368509 , 0.0279971 ,
-                                             0.02334   , 0.0245637 , 0.00180505, 0.0298013 , 0.0173069 ,
-                                             0.0167254 , 0.0321569 , 0.0357616 , 0.0438791 , 0.024147  ,
-                                             0.0399344 , 0.0285962 , 0.00518623, 0.0311218 , 0.0288498 ,
-                                             0.0236686 , 0.012583  , 0.0243531 , 0.0218015 , 0.0121639 ,
-                                             0.041868  , 0.0243223 , 0.0260317 , 0.0256233 , 0.0325203 ,
-                                             0.024186  , 0.0125116 , 0.0138889 , 0.0221271 , 0.0445293 ,
-                                             0.0166667 , 0.0292373 , 0.00485437, 0.0443599 , 0.0174766 ,
-                                             0.023511  , 0.0411594 , 0.0189873 , 0.0137931 , 0.0388889 ,
-                                             0.0490798 , 0.0394737 , 0.0539568 , 0.0342466 , 0.0679612 ,
-                                             0.0250696 , 0.03125   , 0.0365034 , 0.0265252 , 0.060241  ,
-                                             0.0166008 , 0.0295602 , 0.0162665 , 0.0100925 , 0.0506757 ,
-                                             0.0150754 , 0.0737705 , 0.0371367 , 0.026362  , 0.0289958 ,
-                                             0.0297619 , 0.0301003 , 0.0219895 , 0.0134804 , 0.0175439 ,
-                                             0.0443319 , 0.0137457 , 0.0286561 , 0.0546875 , 0.0655738 ,
-                                             0.0326741 , 0.0251232 , 0.0363743 , 0.0313901 , 0.0442404 ,
-                                             0.0262935 , 0.019601  , 0.0291751 , 0.0316978 , 0.0287356 ,
-                                             0.0226586 , 0.0341207 , 0.00504356, 0.0359589 , 0.0232751 ,
-                                             0.0324374 , 0.0125962 , 0.0216401 , 0.0282589 , 0.0180505 ,
-                                             0.0345912 , 0.0228188 , 0.0260374 , 0.0195787 , 0.00486618,
-                                             0.0508732 , 0.0280469 , 0.0224719 , 0.0320856 , 0.0444444 ,
-                                             0.0365535 , 0.018797  , 0.0759494 , 0.072     , 0.0235199 ,
-                                             0.0193548 , 0.0453202 , 0.0414972 , 0.019802  , 0.019802  ,
-                                             0.0317003 , 0.0341047 , 0.025544  , 0.0518672 , 0.0271859 ,
-                                             0.0528355 , 0.02398   , 0.030303  , 0.0219146 , 0.0397456 ,
-                                             0.010582  , 0.0218182 , 0.010846  , 0.0348174 , 0.032967  ,
-                                             0.011117  , 0.0404494 , 0.0176829 , 0.052     , 0.0307692 ,
-                                             0.0404355 , 0.0392216 , 0.00652667, 0.0440465 , 0.0325444 ,
-                                             0.0322581 , 0.0220096 , 0.030146  , 0.0356802 , 0.0349938 ,
-                                             0.0285036 , 0.0176194 , 0.0571429 , 0.0108509 , 0.0279416 ,
-                                             0.0334225 , 0.0136482 , 0.0535294 , 0.0213592 , 0.027933  ,
-                                             0.0296296 , 0.0430925 , 0.0200236 , 0.0278884 , 0.0463054 ,
-                                             0.0368098 , 0.0205479 , 0.027833  , 0.0247748 , 0.0292398 ,
-                                             0.0149489 , 0.0326087 , 0.024183  , 0.0265487 , 0.0101597 ,
-                                             0.028125  , 0.0333333 , 0.024296  , 0.0395074 , 0.0255272 ,
-                                             0.0282575 , 0.0175867 , 0.0225443 , 0.0253317 , 0.0226131 ,
-                                             0.0478723 , 0.0238837 , 0.0229253 , 0.0208153 , 0.0377277 ,
-                                             0.0350765 , 0.0526316 , 0.0409556 , 0.0325581 , 0.0384906 ,
-                                             0.0438909 , 0.0250765 , 0.0383352 , 0.021057  , 0.0140392 ,
-                                             0.0248447 , 0.0277778 , 0.0128023 , 0.0438523 , 0.0338753 ,
-                                             0.0164701 , 0.0372493 , 0.0367422 , 0.0209832 , 0.0272031 ,
-                                             0.0305114 , 0.0208228 , 0.0246575 , 0.0269278 , 0.034039  ,
-                                             0.0348558 , 0.0205266 , 0.0447761 , 0.0470446 , 0.028169  ,
-                                             0.0107239 , 0.021978  , 0.00425532, 0.0169492 , 0.0470163 ,
-                                             0.0326693 , 0.0257069 , 0.0371163 , 0.0258519 , 0.0670444 ,
-                                             0.0401786 , 0.00708215, 0.0335475 , 0.030137  , 0.0210084};
-
-void setup_dehost_subcommand(CLI::App& app)
-{
+                                             0.494845, 0.66, 0.55814, 0.375, 0.238889, 0.594444,
+                                             0.597701, 0.37234};
+const std::vector<float> default_neg_data = {0.029661, 0.00869565, 0.0830769, 0.0914513, 0.00950119,
+                                             0.144928, 0.0286494, 0.0164141, 0.0425532, 0.032345,
+                                             0.0186514, 0.0281899, 0., 0.0379651, 0.0427928,
+                                             0.0571696, 0.0196319, 0.00830484, 0.0254545, 0.0104167,
+                                             0.0192781, 0.0434917, 0.0165426, 0.0256917, 0.0288875,
+                                             0.0231214, 0.0139555, 0.0181997, 0.0180437, 0.0222222,
+                                             0.0177083, 0.0252525, 0.0381125, 0.0156128, 0.048603,
+                                             0.0373519, 0.0379747, 0.0123748, 0.0380165, 0.042953,
+                                             0.0284091, 0.0450205, 0.014876, 0.0477674, 0.0108873,
+                                             0.0485075, 0.0283688, 0.0397799, 0.037058, 0.0195178,
+                                             0.0285344, 0.032835, 0.0380864, 0.0271024, 0.,
+                                             0.0509554, 0.0440744, 0.0226349, 0.0388787, 0.0307692,
+                                             0.0197628, 0.0183486, 0.0220588, 0.0194175, 0.00157233,
+                                             0.0150637, 0.0372737, 0.0365034, 0.0361446, 0.0120919,
+                                             0.0185449, 0.0505495, 0.0140728, 0.039557, 0.0632184,
+                                             0.0312891, 0.0134626, 0.0209111, 0.0341297, 0.0286807,
+                                             0.0166852, 0.0230701, 0.0101138, 0.0255102, 0.0425532,
+                                             0.0276442, 0.0402415, 0.0368196, 0.0164688, 0.0488798,
+                                             0.0283768, 0.0268714, 0.0496157, 0.0496806, 0.0158311,
+                                             0.0191657, 0.0465511, 0.0460564, 0.0228311, 0.0272793,
+                                             0.0475241, 0.0167959, 0.0221843, 0.0322767, 0.0318091,
+                                             0.0171265, 0.0280222, 0.0237781, 0.0108481, 0.0288136,
+                                             0.0408627, 0.0212766, 0.0322148, 0.0272989, 0.0347826,
+                                             0.00402901, 0.0259009, 0.0351459, 0.00761905, 0.0261438,
+                                             0.0463215, 0.0265372, 0.0582691, 0.0325203, 0.0304878,
+                                             0.0283688, 0.0220264, 0.00813638, 0.0259434, 0.0219928,
+                                             0.0142292, 0.0324519, 0.0206349, 0.0231729, 0.0179541,
+                                             0.0447154, 0.0357436, 0.0221402, 0.0304124, 0.0559384,
+                                             0.00854701, 0.0185567, 0.0107759, 0.016129, 0.0297824,
+                                             0.0211216, 0.0352645, 0.046285, 0.0254181, 0.0440367,
+                                             0.0274964, 0.0229277, 0.0542232, 0.00228833, 0.0342029,
+                                             0.0397839, 0.0209068, 0.0293501, 0.0262069, 0.0157853,
+                                             0.0365854, 0.026362, 0.0125953, 0.0194384, 0.0250284,
+                                             0.0417973, 0.0236829, 0.00610583, 0.0164331, 0.026455,
+                                             0.047619, 0.0228571, 0.0458221, 0.0965909, 0.0348162,
+                                             0.0821918, 0.0198265, 0.026455, 0.0153846, 0.0615977,
+                                             0.0296296, 0.034555, 0.0374805, 0.0253921, 0.0186992,
+                                             0.0233236, 0.0119363, 0.0366881, 0.00961538, 0.0453846,
+                                             0.0459094, 0.0391798, 0.0248075, 0.0330113, 0.00591716,
+                                             0.0397306, 0.0323607, 0.0278035, 0.0193237, 0.0242152,
+                                             0.00440529, 0.0294826, 0.0188679, 0.0543933, 0.0327363,
+                                             0.0365233, 0.0515856, 0.042086, 0.012907, 0.0109664,
+                                             0.0351605, 0.0251852, 0.0494148, 0.0401606, 0.,
+                                             0.0369686, 0.0411622, 0.0257143, 0.0416667, 0.0400943,
+                                             0.0409836, 0.00296296, 0.0714286, 0.0270856, 0.0117521,
+                                             0.0174472, 0.0354167, 0.0237056, 0.0292683, 0.0253723,
+                                             0.025, 0.0163043, 0.0229983, 0.0418502, 0.0259179,
+                                             0.0386047, 0.0355295, 0.0451613, 0.0353982, 0.0310711,
+                                             0.0250344, 0.0125604, 0.0237691, 0.0406835, 0.0524476,
+                                             0.0307692, 0.0248619, 0.0264085, 0.03003, 0.0219273,
+                                             0.0214827, 0.039629, 0.0393462, 0.0207164, 0.0193896,
+                                             0.0312947, 0.0485252, 0.0265004, 0.0298322, 0.0178571,
+                                             0.0333333, 0.00632911, 0.0328767, 0.0355556, 0.0331825,
+                                             0.0197568, 0.0413793, 0.0215983, 0.0182584, 0.0211161,
+                                             0.0290276, 0.00200803, 0.035284, 0.0368509, 0.0279971,
+                                             0.02334, 0.0245637, 0.00180505, 0.0298013, 0.0173069,
+                                             0.0167254, 0.0321569, 0.0357616, 0.0438791, 0.024147,
+                                             0.0399344, 0.0285962, 0.00518623, 0.0311218, 0.0288498,
+                                             0.0236686, 0.012583, 0.0243531, 0.0218015, 0.0121639,
+                                             0.041868, 0.0243223, 0.0260317, 0.0256233, 0.0325203,
+                                             0.024186, 0.0125116, 0.0138889, 0.0221271, 0.0445293,
+                                             0.0166667, 0.0292373, 0.00485437, 0.0443599, 0.0174766,
+                                             0.023511, 0.0411594, 0.0189873, 0.0137931, 0.0388889,
+                                             0.0490798, 0.0394737, 0.0539568, 0.0342466, 0.0679612,
+                                             0.0250696, 0.03125, 0.0365034, 0.0265252, 0.060241,
+                                             0.0166008, 0.0295602, 0.0162665, 0.0100925, 0.0506757,
+                                             0.0150754, 0.0737705, 0.0371367, 0.026362, 0.0289958,
+                                             0.0297619, 0.0301003, 0.0219895, 0.0134804, 0.0175439,
+                                             0.0443319, 0.0137457, 0.0286561, 0.0546875, 0.0655738,
+                                             0.0326741, 0.0251232, 0.0363743, 0.0313901, 0.0442404,
+                                             0.0262935, 0.019601, 0.0291751, 0.0316978, 0.0287356,
+                                             0.0226586, 0.0341207, 0.00504356, 0.0359589, 0.0232751,
+                                             0.0324374, 0.0125962, 0.0216401, 0.0282589, 0.0180505,
+                                             0.0345912, 0.0228188, 0.0260374, 0.0195787, 0.00486618,
+                                             0.0508732, 0.0280469, 0.0224719, 0.0320856, 0.0444444,
+                                             0.0365535, 0.018797, 0.0759494, 0.072, 0.0235199,
+                                             0.0193548, 0.0453202, 0.0414972, 0.019802, 0.019802,
+                                             0.0317003, 0.0341047, 0.025544, 0.0518672, 0.0271859,
+                                             0.0528355, 0.02398, 0.030303, 0.0219146, 0.0397456,
+                                             0.010582, 0.0218182, 0.010846, 0.0348174, 0.032967,
+                                             0.011117, 0.0404494, 0.0176829, 0.052, 0.0307692,
+                                             0.0404355, 0.0392216, 0.00652667, 0.0440465, 0.0325444,
+                                             0.0322581, 0.0220096, 0.030146, 0.0356802, 0.0349938,
+                                             0.0285036, 0.0176194, 0.0571429, 0.0108509, 0.0279416,
+                                             0.0334225, 0.0136482, 0.0535294, 0.0213592, 0.027933,
+                                             0.0296296, 0.0430925, 0.0200236, 0.0278884, 0.0463054,
+                                             0.0368098, 0.0205479, 0.027833, 0.0247748, 0.0292398,
+                                             0.0149489, 0.0326087, 0.024183, 0.0265487, 0.0101597,
+                                             0.028125, 0.0333333, 0.024296, 0.0395074, 0.0255272,
+                                             0.0282575, 0.0175867, 0.0225443, 0.0253317, 0.0226131,
+                                             0.0478723, 0.0238837, 0.0229253, 0.0208153, 0.0377277,
+                                             0.0350765, 0.0526316, 0.0409556, 0.0325581, 0.0384906,
+                                             0.0438909, 0.0250765, 0.0383352, 0.021057, 0.0140392,
+                                             0.0248447, 0.0277778, 0.0128023, 0.0438523, 0.0338753,
+                                             0.0164701, 0.0372493, 0.0367422, 0.0209832, 0.0272031,
+                                             0.0305114, 0.0208228, 0.0246575, 0.0269278, 0.034039,
+                                             0.0348558, 0.0205266, 0.0447761, 0.0470446, 0.028169,
+                                             0.0107239, 0.021978, 0.00425532, 0.0169492, 0.0470163,
+                                             0.0326693, 0.0257069, 0.0371163, 0.0258519, 0.0670444,
+                                             0.0401786, 0.00708215, 0.0335475, 0.030137, 0.0210084};
+
+void setup_dehost_subcommand(CLI::App &app) {
     auto opt = std::make_shared<DehostArguments>();
-    auto* dehost_subcommand = app.add_subcommand(
-        "dehost", "Dehost read file into host and other using index.");
+    auto *dehost_subcommand = app.add_subcommand(
+            "dehost", "Dehost read file into host and other using index.");
 
     dehost_subcommand->add_option("<fastaq>", opt->read_file, "Fasta/q file")
-        ->required()
-        ->transform(make_absolute)
-        ->check(CLI::ExistingFile.description(""))
-        ->type_name("FILE");
+            ->required()
+            ->transform(make_absolute)
+            ->check(CLI::ExistingFile.description(""))
+            ->type_name("FILE");
 
     dehost_subcommand->add_option("<fastaq>", opt->read_file2, "Paired Fasta/q file")
             ->transform(make_absolute)
@@ -227,7 +226,8 @@ void setup_dehost_subcommand(CLI::App& app)
             ->required()
             ->check(CLI::ExistingPath.description(""));
 
-    dehost_subcommand->add_option("-e,--extract", opt->category_to_extract, "Reads from this category in the index will be extracted to file.")
+    dehost_subcommand->add_option("-e,--extract", opt->category_to_extract,
+                                  "Reads from this category in the index will be extracted to file.")
             ->type_name("STRING");
 
     dehost_subcommand->add_option("-p,--prefix", opt->prefix, "Prefix for the output files.")
@@ -236,17 +236,20 @@ void setup_dehost_subcommand(CLI::App& app)
             ->default_str("<prefix>");
 
     dehost_subcommand
-            ->add_option("--chunk_size", opt->chunk_size, "Read file is read in chunks of this size, to be processed in parallel within a chunk.")
+            ->add_option("--chunk_size", opt->chunk_size,
+                         "Read file is read in chunks of this size, to be processed in parallel within a chunk.")
             ->type_name("INT")
             ->capture_default_str();
 
     dehost_subcommand
-            ->add_option("--lo_hi_threshold", opt->lo_hi_threshold, "Threshold used during model fitting stage to decide if read should be used to train lo or hi distribution.")
+            ->add_option("--lo_hi_threshold", opt->lo_hi_threshold,
+                         "Threshold used during model fitting stage to decide if read should be used to train lo or hi distribution.")
             ->type_name("FLOAT")
             ->capture_default_str();
 
     dehost_subcommand
-            ->add_option("--num_reads_to_fit", opt->num_reads_to_fit, "Number of reads to use to train each distribution in the model.")
+            ->add_option("--num_reads_to_fit", opt->num_reads_to_fit,
+                         "Number of reads to use to train each distribution in the model.")
             ->type_name("INT")
             ->capture_default_str();
 
@@ -264,26 +267,31 @@ void setup_dehost_subcommand(CLI::App& app)
             ->capture_default_str();
 
     dehost_subcommand
-            ->add_option("--min_compression", opt->min_compression, "Minimum read gzip compression ratio to classify (a measure of how much information is in the read.")
+            ->add_option("--min_compression", opt->min_compression,
+                         "Minimum read gzip compression ratio to classify (a measure of how much information is in the read.")
             ->type_name("FLOAT")
             ->capture_default_str();
 
     dehost_subcommand
-            ->add_option("--confidence", opt->confidence_threshold, "Minimum difference between the top 2 unique hit counts.")
+            ->add_option("--confidence", opt->confidence_threshold,
+                         "Minimum difference between the top 2 unique hit counts.")
             ->type_name("INT")
             ->capture_default_str();
 
     dehost_subcommand
-            ->add_option("--host_unique_prop_lo_threshold", opt->host_unique_prop_lo_threshold, "Require non-host reads to have unique host proportion below this threshold for classification.")
+            ->add_option("--host_unique_prop_lo_threshold", opt->host_unique_prop_lo_threshold,
+                         "Require non-host reads to have unique host proportion below this threshold for classification.")
             ->type_name("INT")
             ->capture_default_str();
 
     dehost_subcommand
-            ->add_option("--min_proportion_diff", opt->min_proportion_difference, "Minimum difference between the proportion of (non-unique) kmers found in each category.")
+            ->add_option("--min_proportion_diff", opt->min_proportion_difference,
+                         "Minimum difference between the proportion of (non-unique) kmers found in each category.")
             ->type_name("FLOAT")
             ->capture_default_str();
     dehost_subcommand
-            ->add_option("--min_probability_diff", opt->min_prob_difference, "Minimum difference between the probability found in each category.")
+            ->add_option("--min_probability_diff", opt->min_prob_difference,
+                         "Minimum difference between the probability found in each category.")
             ->type_name("FLOAT")
             ->capture_default_str();
 
@@ -297,16 +305,17 @@ void setup_dehost_subcommand(CLI::App& app)
             ->capture_default_str();
 
     dehost_subcommand->add_flag(
-        "-v", opt->verbosity, "Verbosity of logging. Repeat for increased verbosity");
+            "-v", opt->verbosity, "Verbosity of logging. Repeat for increased verbosity");
 
     // Set the function that will be called when this subcommand is issued.
     dehost_subcommand->callback([opt]() { dehost_main(*opt); });
 }
 
-void dehost_reads(const DehostArguments& opt, const Index& index){
+void dehost_reads(const DehostArguments &opt, const Index &index) {
     PLOG_INFO << "Dehosting file " << opt.read_file;
 
-    auto hash_adaptor = seqan3::views::minimiser_hash(seqan3::shape{seqan3::ungapped{index.kmer_size()}}, seqan3::window_size{index.window_size()});
+    auto hash_adaptor = seqan3::views::minimiser_hash(seqan3::shape{seqan3::ungapped{index.kmer_size()}},
+                                                      seqan3::window_size{index.window_size()});
     PLOG_VERBOSE << "Defined hash_adaptor";
 
     auto agent = index.agent();
@@ -323,41 +332,40 @@ void dehost_reads(const DehostArguments& opt, const Index& index){
 
     PLOG_DEBUG << "Defined Result with " << +index.num_bins() << " bins";
 
-    for (auto && chunk : fin | seqan3::views::chunk(opt.chunk_size))
-    {
+    for (auto &&chunk: fin | seqan3::views::chunk(opt.chunk_size)) {
         // You can use a for loop:
-        for (auto & record : chunk)
-        {
+        for (auto &record: chunk) {
             records.push_back(std::move(record));
         }
 
 #pragma omp parallel for firstprivate(agent, hash_adaptor) num_threads(opt.threads) shared(result)
-        for (auto i=0; i<records.size(); ++i){
+        for (auto i = 0; i < records.size(); ++i) {
 
-            const record_type & record = records[i];
+            const record_type &record = records[i];
             const auto read_id = split(record.id(), " ")[0];
             const uint32_t read_length = std::ranges::size(record.sequence());
-            if (read_length > std::numeric_limits<uint32_t>::max()){
+            if (read_length > std::numeric_limits<uint32_t>::max()) {
                 PLOG_WARNING << "Ignoring read " << record.id() << " as too long!";
                 continue;
             }
-            if (read_length == 0){
+            if (read_length == 0) {
                 PLOG_WARNING << "Ignoring read " << record.id() << " as has zero length!";
                 continue;
             }
-            auto qualities = record.base_qualities() | std::views::transform( [](auto quality) { return seqan3::to_phred(quality); });
+            auto qualities = record.base_qualities() |
+                             std::views::transform([](auto quality) { return seqan3::to_phred(quality); });
             auto sum = std::accumulate(qualities.begin(), qualities.end(), 0);
             float mean_quality = 0;
             if (std::ranges::size(qualities) > 0)
-                mean_quality = static_cast< float >( sum )/ static_cast< float >(std::ranges::size(qualities));
+                mean_quality = static_cast< float >( sum ) / static_cast< float >(std::ranges::size(qualities));
             PLOG_VERBOSE << "Mean quality of read  " << record.id() << " is " << mean_quality;
 
             float compression_ratio = get_compression_ratio(sequence_to_string(record.sequence()));
             PLOG_VERBOSE << "Found compression ratio of read  " << record.id() << " is " << compression_ratio;
 
             auto read = ReadEntry(read_id, read_length, mean_quality, compression_ratio, result.input_summary());
-            for (auto && value : record.sequence() | hash_adaptor) {
-                const auto & entry = agent.bulk_contains(value);
+            for (auto &&value: record.sequence() | hash_adaptor) {
+                const auto &entry = agent.bulk_contains(value);
                 read.update_entry(entry);
             }
             PLOG_VERBOSE << "Finished adding raw hash counts for read " << read_id;
@@ -373,10 +381,11 @@ void dehost_reads(const DehostArguments& opt, const Index& index){
 }
 
 
-void dehost_paired_reads(const DehostArguments& opt, const Index& index){
+void dehost_paired_reads(const DehostArguments &opt, const Index &index) {
     PLOG_INFO << "Dehosting files " << opt.read_file << " and " << opt.read_file2;
 
-    auto hash_adaptor = seqan3::views::minimiser_hash(seqan3::shape{seqan3::ungapped{index.kmer_size()}}, seqan3::window_size{index.window_size()});
+    auto hash_adaptor = seqan3::views::minimiser_hash(seqan3::shape{seqan3::ungapped{index.kmer_size()}},
+                                                      seqan3::window_size{index.window_size()});
     PLOG_VERBOSE << "Defined hash_adaptor";
 
     auto agent = index.agent();
@@ -395,50 +404,50 @@ void dehost_paired_reads(const DehostArguments& opt, const Index& index){
 
     PLOG_DEBUG << "Defined Result with " << +index.num_bins() << " bins";
 
-    for (auto && chunk : fin1 | seqan3::views::chunk(opt.chunk_size))
-    {
-        for (auto & record : chunk)
-        {
+    for (auto &&chunk: fin1 | seqan3::views::chunk(opt.chunk_size)) {
+        for (auto &record: chunk) {
             records1.push_back(std::move(record));
         }
 
         // loop in the second file and get same amount of reads
-        for ( auto& record2 : fin2 | std::views::take( opt.chunk_size ) )
-        {
-            records2.push_back( std::move( record2 ));
+        for (auto &record2: fin2 | std::views::take(opt.chunk_size)) {
+            records2.push_back(std::move(record2));
         }
 
 #pragma omp parallel for firstprivate(agent, hash_adaptor) num_threads(opt.threads) shared(result)
-        for (auto i=0; i<records1.size(); ++i){
+        for (auto i = 0; i < records1.size(); ++i) {
 
-            const auto & record1 = records1[i];
-            const auto & record2 = records2[i];
+            const auto &record1 = records1[i];
+            const auto &record2 = records2[i];
 
             auto id1 = record1.id();
             id1.erase(id1.size() - 1);
             auto id2 = record2.id();
             id2.erase(id2.size() - 1);
-            if (id1 != id2){
+            if (id1 != id2) {
                 std::cout << id1 << " " << id2;
                 throw std::runtime_error("Your pairs don't match for read ids.");
             }
             const auto read_id = split(record1.id(), " ")[0];
             const uint32_t read_length = std::ranges::size(record1.sequence()) + std::ranges::size(record2.sequence());
-            if (read_length > std::numeric_limits<uint32_t>::max()){
+            if (read_length > std::numeric_limits<uint32_t>::max()) {
                 PLOG_WARNING << "Ignoring read " << record1.id() << " as too long!";
                 continue;
             }
-            if (read_length == 0){
+            if (read_length == 0) {
                 PLOG_WARNING << "Ignoring read " << record1.id() << " as has zero length!";
                 continue;
             }
-            auto qualities1 = record1.base_qualities() | std::views::transform( [](auto quality) { return seqan3::to_phred(quality); });
-            auto qualities2 = record2.base_qualities() | std::views::transform( [](auto quality) { return seqan3::to_phred(quality); });
+            auto qualities1 = record1.base_qualities() |
+                              std::views::transform([](auto quality) { return seqan3::to_phred(quality); });
+            auto qualities2 = record2.base_qualities() |
+                              std::views::transform([](auto quality) { return seqan3::to_phred(quality); });
             auto sum = std::accumulate(qualities1.begin(), qualities1.end(), 0);
             sum = std::accumulate(qualities2.begin(), qualities2.end(), sum);
             float mean_quality = 0;
             if (std::ranges::size(qualities1) + std::ranges::size(qualities2) > 0)
-                mean_quality = static_cast< float >( sum )/ static_cast< float >(std::ranges::size(qualities1) + std::ranges::size(qualities2));
+                mean_quality = static_cast< float >( sum ) /
+                               static_cast< float >(std::ranges::size(qualities1) + std::ranges::size(qualities2));
             PLOG_VERBOSE << "Mean quality of read  " << record1.id() << " is " << mean_quality;
 
             auto combined_record = sequence_to_string(record1.sequence()) + sequence_to_string(record2.sequence());
@@ -446,12 +455,12 @@ void dehost_paired_reads(const DehostArguments& opt, const Index& index){
             PLOG_VERBOSE << "Found compression ratio of read  " << record1.id() << " is " << compression_ratio;
 
             auto read = ReadEntry(read_id, read_length, mean_quality, compression_ratio, result.input_summary());
-            for (auto && value : record1.sequence() | hash_adaptor) {
-                const auto & entry = agent.bulk_contains(value);
+            for (auto &&value: record1.sequence() | hash_adaptor) {
+                const auto &entry = agent.bulk_contains(value);
                 read.update_entry(entry);
             }
-            for (auto && value : record2.sequence() | hash_adaptor) {
-                const auto & entry = agent.bulk_contains(value);
+            for (auto &&value: record2.sequence() | hash_adaptor) {
+                const auto &entry = agent.bulk_contains(value);
                 read.update_entry(entry);
             }
             PLOG_VERBOSE << "Finished adding raw hash counts for read " << read_id;
@@ -468,8 +477,7 @@ void dehost_paired_reads(const DehostArguments& opt, const Index& index){
 }
 
 
-int dehost_main(DehostArguments & opt)
-{
+int dehost_main(DehostArguments &opt) {
     auto log_level = plog::info;
     if (opt.verbosity == 1) {
         log_level = plog::debug;
@@ -497,14 +505,14 @@ int dehost_main(DehostArguments & opt)
 
     opt.run_extract = (opt.category_to_extract != "");
     const auto categories = index.categories();
-    if (opt.run_extract and opt.category_to_extract != "all" and std::find(categories.begin(), categories.end(), opt.category_to_extract) == categories.end())
-    {
+    if (opt.run_extract and opt.category_to_extract != "all" and
+        std::find(categories.begin(), categories.end(), opt.category_to_extract) == categories.end()) {
         std::string options = "";
         for (auto i: categories)
             options += i + " ";
         PLOG_ERROR << "Cannot extract " << opt.category_to_extract << ", please chose one of [ all " << options << "]";
         return 1;
-    } else if (opt.run_extract){
+    } else if (opt.run_extract) {
         if (opt.prefix == "")
             opt.prefix = "charon";
         std::vector<std::string> to_extract;
@@ -514,19 +522,20 @@ int dehost_main(DehostArguments & opt)
             to_extract.push_back(opt.category_to_extract);
 
         const auto extension = get_extension(opt.read_file);
-        for (const auto & category : to_extract){
+        for (const auto &category: to_extract) {
             const auto category_index = index.get_category_index(category);
             if (opt.is_paired) {
-                opt.extract_category_to_file[category_index].push_back(opt.prefix + "_" + category + "_1" + extension + ".gz");
-                opt.extract_category_to_file[category_index].push_back(opt.prefix + "_" + category + "_2" + extension + ".gz");
+                opt.extract_category_to_file[category_index].push_back(
+                        opt.prefix + "_" + category + "_1" + extension + ".gz");
+                opt.extract_category_to_file[category_index].push_back(
+                        opt.prefix + "_" + category + "_2" + extension + ".gz");
             } else {
                 opt.extract_category_to_file[category_index].push_back(opt.prefix + "_" + category + extension + ".gz");
             }
         }
     }
 
-    if (opt.dist != "gamma" and opt.dist != "beta" and opt.dist != "kde")
-    {
+    if (opt.dist != "gamma" and opt.dist != "beta" and opt.dist != "kde") {
         PLOG_ERROR << "Supported distributions are [gamma , beta, kde]";
         return 1;
     }
diff --git a/src/index_main.cpp b/src/index_main.cpp
index 0b89d6e..ed5dba4 100644
--- a/src/index_main.cpp
+++ b/src/index_main.cpp
@@ -20,37 +20,37 @@
 #include <seqan3/search/views/minimiser_hash.hpp>
 
 
-void setup_index_subcommand(CLI::App& app)
-{
+void setup_index_subcommand(CLI::App &app) {
     auto opt = std::make_shared<IndexArguments>();
-    auto* index_subcommand = app.add_subcommand(
-        "index", "Build an index (IBF) for a number of references split into a small number of bins.");
+    auto *index_subcommand = app.add_subcommand(
+            "index", "Build an index (IBF) for a number of references split into a small number of bins.");
 
-    index_subcommand->add_option("<input>", opt->input_file, "Tab separated file with columns for filename and category")
-        ->required()
-        ->transform(make_absolute)
-        ->check(CLI::ExistingFile.description(""))
-        ->type_name("FILE");
+    index_subcommand->add_option("<input>", opt->input_file,
+                                 "Tab separated file with columns for filename and category")
+            ->required()
+            ->transform(make_absolute)
+            ->check(CLI::ExistingFile.description(""))
+            ->type_name("FILE");
 
     index_subcommand
-        ->add_option(
-            "-w", opt->window_size, "Window size for (w,k,s)-minimers (must be <=k).")
-        ->type_name("INT")
-        ->capture_default_str();
+            ->add_option(
+                    "-w", opt->window_size, "Window size for (w,k,s)-minimers (must be <=k).")
+            ->type_name("INT")
+            ->capture_default_str();
 
     index_subcommand->add_option("-k", opt->kmer_size, "K-mer size for (w,k,s)-minimers.")
-        ->type_name("INT")
-        ->capture_default_str();
+            ->type_name("INT")
+            ->capture_default_str();
 
     index_subcommand
-        ->add_option("-t,--threads", opt->threads, "Maximum number of threads to use.")
-        ->type_name("INT")
-        ->capture_default_str();
+            ->add_option("-t,--threads", opt->threads, "Maximum number of threads to use.")
+            ->type_name("INT")
+            ->capture_default_str();
 
     index_subcommand->add_option("-p,--prefix", opt->prefix, "Prefix for the output index.")
-        ->type_name("FILE")
-        ->check(CLI::NonexistentPath.description(""))
-        ->default_str("<prefix>");
+            ->type_name("FILE")
+            ->check(CLI::NonexistentPath.description(""))
+            ->default_str("<prefix>");
 
     index_subcommand->add_option("--temp", opt->tmp_dir, "Temporary directory for index construction files.")
             ->type_name("DIR")
@@ -64,7 +64,7 @@ void setup_index_subcommand(CLI::App& app)
             "--optimize", opt->optimize, "Compress the number of bins for improved classification run time");
 
     index_subcommand->add_flag(
-        "-v", opt->verbosity, "Verbosity of logging. Repeat for increased verbosity");
+            "-v", opt->verbosity, "Verbosity of logging. Repeat for increased verbosity");
 
 
     // Set the function that will be called when this subcommand is issued.
@@ -72,7 +72,7 @@ void setup_index_subcommand(CLI::App& app)
 }
 
 
-InputSummary parse_input_file(const std::filesystem::path& input_file){
+InputSummary parse_input_file(const std::filesystem::path &input_file) {
     PLOG_INFO << "Parsing input file " << input_file;
     InputSummary summary;
     uint8_t next_bin = 0;
@@ -86,8 +86,7 @@ InputSummary parse_input_file(const std::filesystem::path& input_file){
     }
 
     std::string line;
-    while (std::getline(input_ifstream, line))
-    {
+    while (std::getline(input_ifstream, line)) {
         if (!line.empty()) {
             auto parts = split(line, "\t");
             if (parts.size() >= 2) {
@@ -96,8 +95,9 @@ InputSummary parse_input_file(const std::filesystem::path& input_file){
                 summary.bin_to_category[next_bin] = name;
                 categories.insert(name);
                 summary.filepath_to_bin.emplace_back(std::make_pair(path, next_bin));
-                if (next_bin == std::numeric_limits<uint8_t>::max()){
-                    PLOG_WARNING << "User has reached the maximum number of files which is " << +next_bin << " - ignoring any additional lines!";
+                if (next_bin == std::numeric_limits<uint8_t>::max()) {
+                    PLOG_WARNING << "User has reached the maximum number of files which is " << +next_bin
+                                 << " - ignoring any additional lines!";
                     break;
                 }
                 next_bin++;
@@ -109,15 +109,16 @@ InputSummary parse_input_file(const std::filesystem::path& input_file){
     summary.num_bins = next_bin;
     summary.categories.insert(summary.categories.end(), categories.begin(), categories.end());
 
-    PLOG_INFO << "Found " << summary.filepath_to_bin.size() << " files corresponding to " << +summary.num_categories() << " categories";
+    PLOG_INFO << "Found " << summary.filepath_to_bin.size() << " files corresponding to " << +summary.num_categories()
+              << " categories";
 
     return summary;
 }
 
-InputStats count_and_store_hashes(const IndexArguments& opt, const InputSummary& summary)
-{
+InputStats count_and_store_hashes(const IndexArguments &opt, const InputSummary &summary) {
     PLOG_INFO << "Extracting hashes from files";
-    const auto hash_adaptor = seqan3::views::minimiser_hash(seqan3::shape{seqan3::ungapped{opt.kmer_size}}, seqan3::window_size{opt.window_size});
+    const auto hash_adaptor = seqan3::views::minimiser_hash(seqan3::shape{seqan3::ungapped{opt.kmer_size}},
+                                                            seqan3::window_size{opt.window_size});
     PLOG_DEBUG << "Defined hash adaptor";
     InputStats stats;
     PLOG_DEBUG << "Defined stats";
@@ -126,9 +127,9 @@ InputStats count_and_store_hashes(const IndexArguments& opt, const InputSummary&
     PLOG_INFO << "Maximum hashes permitted per bin for fpr rate " << opt.max_fpr << " is " << max_num_hashes;
 
 #pragma omp parallel for num_threads(opt.threads)
-    for (const auto pair : summary.filepath_to_bin) {
-        const auto& fasta_file = pair.first;
-        const auto& bin = pair.second;
+    for (const auto pair: summary.filepath_to_bin) {
+        const auto &fasta_file = pair.first;
+        const auto &bin = pair.second;
         stats.records_per_bin[bin] += 0;
 
         PLOG_DEBUG << "Adding file " << fasta_file;
@@ -138,33 +139,35 @@ InputStats count_and_store_hashes(const IndexArguments& opt, const InputSummary&
 
         auto record_count = 0;
         std::unordered_set<uint64_t> hashes;
-        for (const auto & record : fin){
+        for (const auto &record: fin) {
             stats.records_per_bin[bin] += 1;
             const auto mh = record.sequence() | hash_adaptor | std::views::common;
-            hashes.insert( mh.begin(), mh.end() );
+            hashes.insert(mh.begin(), mh.end());
             record_count++;
         }
-        store_hashes( std::to_string(bin), hashes, opt.tmp_dir );
+        store_hashes(std::to_string(bin), hashes, opt.tmp_dir);
         stats.hashes_per_bin[bin] += hashes.size();
-        PLOG_INFO << "Added file " << fasta_file << " with " << record_count << " records and " << hashes.size() << " hashes to bin " << +bin;
+        PLOG_INFO << "Added file " << fasta_file << " with " << record_count << " records and " << hashes.size()
+                  << " hashes to bin " << +bin;
 
-        if (stats.hashes_per_bin[bin] > max_num_hashes){
-            PLOG_WARNING << "File " << fasta_file << " with " << hashes.size() << " will exceed max_fpr " << opt.max_fpr;
+        if (stats.hashes_per_bin[bin] > max_num_hashes) {
+            PLOG_WARNING
+                        << "File " << fasta_file << " with " << hashes.size() << " will exceed max_fpr " << opt.max_fpr;
         }
     }
 
     return stats;
 }
 
-std::unordered_map<uint8_t, std::vector<uint8_t>> optimize_layout(const IndexArguments& opt, InputSummary& summary, InputStats& stats)
-{
+std::unordered_map<uint8_t, std::vector<uint8_t>>
+optimize_layout(const IndexArguments &opt, InputSummary &summary, InputStats &stats) {
     std::unordered_map<uint8_t, uint8_t> bin_to_bucket_map;
     std::unordered_map<uint8_t, std::vector<uint8_t>> bucket_to_bins_map;
 
     if (stats.hashes_per_bin.size() == 0) {
         return bucket_to_bins_map;
-    } else if (not opt.optimize){
-        for ( auto bin=0; bin<summary.num_bins; ++bin){
+    } else if (not opt.optimize) {
+        for (auto bin = 0; bin < summary.num_bins; ++bin) {
             bucket_to_bins_map[bin].push_back(bin);
         }
         return bucket_to_bins_map;
@@ -184,18 +187,18 @@ std::unordered_map<uint8_t, std::vector<uint8_t>> optimize_layout(const IndexArg
 
     // update stats
     PLOG_INFO << "Reassign bins";
-    for (const auto & pair : sorted_pairs){
-        const auto& bin = pair.first;
+    for (const auto &pair: sorted_pairs) {
+        const auto &bin = pair.first;
         PLOG_DEBUG << "Reassign bin " << +bin;
-        assert( summary.bin_to_category.find(bin) != summary.bin_to_category.end() );
-        const auto& category = summary.bin_to_category.at(bin);
-        const auto& num_hashes = pair.second;
+        assert(summary.bin_to_category.find(bin) != summary.bin_to_category.end());
+        const auto &category = summary.bin_to_category.at(bin);
+        const auto &num_hashes = pair.second;
 
         auto assigned_bucket = next_bin;
-        if (last_bin.find(category) != last_bin.end()){
-            const auto& last_bin_in_category = last_bin[category];
-            const auto& num_hashes_in_last_bin = new_stats.hashes_per_bin[last_bin_in_category];
-            if (num_hashes_in_last_bin + num_hashes < max_num_hashes){
+        if (last_bin.find(category) != last_bin.end()) {
+            const auto &last_bin_in_category = last_bin[category];
+            const auto &num_hashes_in_last_bin = new_stats.hashes_per_bin[last_bin_in_category];
+            if (num_hashes_in_last_bin + num_hashes < max_num_hashes) {
                 assigned_bucket = last_bin_in_category;
             } else {
                 next_bin++;
@@ -207,8 +210,9 @@ std::unordered_map<uint8_t, std::vector<uint8_t>> optimize_layout(const IndexArg
         bin_to_bucket_map[bin] = assigned_bucket;
         bucket_to_bins_map[assigned_bucket].push_back(bin);
         new_stats.hashes_per_bin[assigned_bucket] += num_hashes;
-        PLOG_INFO << "Bin " << +bin << " assigned to bucket " << +assigned_bucket << " which now has " << new_stats.hashes_per_bin[assigned_bucket] << " hashes";
-        assert( stats.records_per_bin.find(bin) != stats.records_per_bin.end() );
+        PLOG_INFO << "Bin " << +bin << " assigned to bucket " << +assigned_bucket << " which now has "
+                  << new_stats.hashes_per_bin[assigned_bucket] << " hashes";
+        assert(stats.records_per_bin.find(bin) != stats.records_per_bin.end());
         new_stats.records_per_bin[assigned_bucket] += stats.records_per_bin.at(bin);
     }
     stats = new_stats;
@@ -217,13 +221,13 @@ std::unordered_map<uint8_t, std::vector<uint8_t>> optimize_layout(const IndexArg
     PLOG_INFO << "Update summary";
     new_summary.num_bins = next_bin;
     new_summary.categories = summary.categories;
-    for (const auto & pair : summary.filepath_to_bin){
-        const auto & filepath = pair.first;
-        const auto & bin = pair.second;
-        PLOG_DEBUG << "Update summary for bin " << +bin;  
-        const auto & bucket = bin_to_bucket_map[bin];
+    for (const auto &pair: summary.filepath_to_bin) {
+        const auto &filepath = pair.first;
+        const auto &bin = pair.second;
+        PLOG_DEBUG << "Update summary for bin " << +bin;
+        const auto &bucket = bin_to_bucket_map[bin];
         new_summary.filepath_to_bin.emplace_back(std::make_pair(filepath, bucket));
-        assert( summary.bin_to_category.find(bin) != summary.bin_to_category.end() );
+        assert(summary.bin_to_category.find(bin) != summary.bin_to_category.end());
         new_summary.bin_to_category[bucket] = summary.bin_to_category.at(bin);
     }
     summary = new_summary;
@@ -231,36 +235,34 @@ std::unordered_map<uint8_t, std::vector<uint8_t>> optimize_layout(const IndexArg
     return bucket_to_bins_map;
 }
 
-Index build_index(const IndexArguments& opt, const InputSummary& summary, InputStats& stats, const std::unordered_map<uint8_t, std::vector<uint8_t>>& bucket_to_bins_map)
-{
+Index build_index(const IndexArguments &opt, const InputSummary &summary, InputStats &stats,
+                  const std::unordered_map<uint8_t, std::vector<uint8_t>> &bucket_to_bins_map) {
     const auto max_num_hashes = stats.max_num_hashes();
     const auto num_bits = bin_size_in_bits(opt, max_num_hashes);
     PLOG_INFO << "Create new IBF with " << +summary.num_bins << " bins and " << +num_bits << " bits";
     seqan3::interleaved_bloom_filter ibf{seqan3::bin_count{summary.num_bins},
-                                             seqan3::bin_size{num_bits},
-                                             seqan3::hash_function_count{opt.num_hash}};
+                                         seqan3::bin_size{num_bits},
+                                         seqan3::hash_function_count{opt.num_hash}};
 
 #pragma omp parallel for
-    for ( uint8_t bucket=0; bucket<summary.num_bins; ++bucket ) {
-        const auto & bins = bucket_to_bins_map.at(bucket);
-        for (auto const & bin : bins){
-            const auto& hashes = load_hashes( std::to_string(bin), opt.tmp_dir );
+    for (uint8_t bucket = 0; bucket < summary.num_bins; ++bucket) {
+        const auto &bins = bucket_to_bins_map.at(bucket);
+        for (auto const &bin: bins) {
+            const auto &hashes = load_hashes(std::to_string(bin), opt.tmp_dir);
 #pragma omp critical
-            for ( auto && value : hashes )
-            {
-                ibf.emplace( value, seqan3::bin_index{bucket} );
+            for (auto &&value: hashes) {
+                ibf.emplace(value, seqan3::bin_index{bucket});
             }
             PLOG_DEBUG << "Added " << hashes.size() << " hashes to bin " << +bucket;
         }
-        delete_hashes( bins, opt.tmp_dir );
+        delete_hashes(bins, opt.tmp_dir);
     }
 
 
     return Index(opt, summary, stats, ibf);
 }
 
-int index_main(IndexArguments & opt)
-{
+int index_main(IndexArguments &opt) {
     auto log_level = plog::info;
     if (opt.verbosity == 1) {
         log_level = plog::debug;
diff --git a/src/load_index.cpp b/src/load_index.cpp
index 920d526..98dae8a 100644
--- a/src/load_index.cpp
+++ b/src/load_index.cpp
@@ -5,8 +5,7 @@
 
 #include <load_index.hpp>
 
-void load_index(Index & index, std::filesystem::path const & path)
-{
+void load_index(Index &index, std::filesystem::path const &path) {
     PLOG_INFO << "Loading index from file " << path;
     std::ifstream is{path, std::ios::binary};
     cereal::BinaryInputArchive iarchive{is};
diff --git a/src/main.cpp b/src/main.cpp
index 3577b75..78ab2d7 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -9,8 +9,7 @@
 
 class MyFormatter : public CLI::Formatter {
 public:
-    std::string make_option_opts(const CLI::Option* opt) const override
-    {
+    std::string make_option_opts(const CLI::Option *opt) const override {
         std::stringstream out;
 
         if (opt->get_type_size() != 0) {
@@ -26,19 +25,18 @@ class MyFormatter : public CLI::Formatter {
             out << " (" << get_label("Env") << ":" << opt->get_envname() << ")";
         if (!opt->get_needs().empty()) {
             out << " " << get_label("Needs") << ":";
-            for (const CLI::Option* op : opt->get_needs())
+            for (const CLI::Option *op: opt->get_needs())
                 out << " " << op->get_name();
         }
         if (!opt->get_excludes().empty()) {
             out << " " << get_label("Excludes") << ":";
-            for (const CLI::Option* op : opt->get_excludes())
+            for (const CLI::Option *op: opt->get_excludes())
                 out << " " << op->get_name();
         }
         return out.str();
     }
 
-    std::string make_option_desc(const CLI::Option* opt) const override
-    {
+    std::string make_option_desc(const CLI::Option *opt) const override {
         std::stringstream out;
         out << opt->get_description();
         if (!opt->get_default_str().empty()) {
@@ -48,13 +46,12 @@ class MyFormatter : public CLI::Formatter {
     }
 };
 
-int main(int argc, char* argv[])
-{
-    CLI::App app { "Charon: Categorize reads into a small number of classes." };
+int main(int argc, char *argv[]) {
+    CLI::App app{"Charon: Categorize reads into a small number of classes."};
     app.formatter(std::make_shared<MyFormatter>());
     app.add_flag_callback("-V,--version", []() {
         std::cout << "Charon version: " << SOFTWARE_VERSION << std::endl;
-        throw(CLI::Success {});
+        throw (CLI::Success{});
     });
     setup_index_subcommand(app);
     setup_classify_subcommand(app);
diff --git a/src/utils.cpp b/src/utils.cpp
index 68d0bd2..d74a9b7 100644
--- a/src/utils.cpp
+++ b/src/utils.cpp
@@ -6,10 +6,10 @@
 
 std::filesystem::path make_absolute(std::filesystem::path path) { return std::filesystem::absolute(path); }
 
-std::vector<std::string> split(const std::string& s, const std::string& delimiter){
+std::vector<std::string> split(const std::string &s, const std::string &delimiter) {
     std::vector<std::string> substrings;
 
-    int start, end = -1*delimiter.size();
+    int start, end = -1 * delimiter.size();
     do {
         start = end + delimiter.size();
         end = s.find(delimiter, start);
@@ -19,68 +19,60 @@ std::vector<std::string> split(const std::string& s, const std::string& delimite
     return substrings;
 }
 
-bool ends_with(std::string str, std::string suffix)
-{
-    return str.size() >= suffix.size() && str.compare(str.size()-suffix.size(), suffix.size(), suffix) == 0;
+bool ends_with(std::string str, std::string suffix) {
+    return str.size() >= suffix.size() && str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0;
 }
 
-bool starts_with(std::string str, std::string prefix)
-{
+bool starts_with(std::string str, std::string prefix) {
     return str.size() >= prefix.size() && str.compare(0, prefix.size(), prefix) == 0;
 }
 
-void store_hashes( const std::string target,
-                   const std::unordered_set< uint64_t >& hashes,
-                   const std::string tmp_output_folder )
-    {
-        /*
-         * store hashes from set to disk in the specified folder (or current folder ".")
-         */
-        std::filesystem::path outf{ tmp_output_folder };
-        outf += "/" + target + ".min";
-        std::ofstream outfile{ outf, std::ios::binary | std::ios::app };
-        for ( auto&& h : hashes )
-        {
-            outfile.write( reinterpret_cast< const char* >( &h ), sizeof( h ) );
-        }
-        outfile.close();
+void store_hashes(const std::string target,
+                  const std::unordered_set<uint64_t> &hashes,
+                  const std::string tmp_output_folder) {
+    /*
+     * store hashes from set to disk in the specified folder (or current folder ".")
+     */
+    std::filesystem::path outf{tmp_output_folder};
+    outf += "/" + target + ".min";
+    std::ofstream outfile{outf, std::ios::binary | std::ios::app};
+    for (auto &&h: hashes) {
+        outfile.write(reinterpret_cast< const char * >( &h ), sizeof(h));
     }
+    outfile.close();
+}
 
 
-    std::vector< uint64_t > load_hashes( const std::string target,
-                                         const std::string tmp_output_folder )
-    {
-        /*
-         * load hashes file from disk and returns them in a vector
-         */
-        uint64_t                hash;
-        std::vector< uint64_t > hashes;
-        std::filesystem::path file{ tmp_output_folder };
-        file += "/" + target + ".min";
-        std::ifstream           infile{ file, std::ios::binary };
-        while ( infile.read( reinterpret_cast< char* >( &hash ), sizeof( hash ) ) )
-            hashes.push_back( hash );
-        return hashes;
-    }
+std::vector<uint64_t> load_hashes(const std::string target,
+                                  const std::string tmp_output_folder) {
+    /*
+     * load hashes file from disk and returns them in a vector
+     */
+    uint64_t hash;
+    std::vector<uint64_t> hashes;
+    std::filesystem::path file{tmp_output_folder};
+    file += "/" + target + ".min";
+    std::ifstream infile{file, std::ios::binary};
+    while (infile.read(reinterpret_cast< char * >( &hash ), sizeof(hash)))
+        hashes.push_back(hash);
+    return hashes;
+}
 
-void delete_hashes( const std::vector<uint8_t>& targets, const std::string tmp_output_folder )
-{
+void delete_hashes(const std::vector<uint8_t> &targets, const std::string tmp_output_folder) {
     /*
      * delete hashes from disk
      */
-    for ( const auto & target: targets )
-    {
-        std::filesystem::path outf{ tmp_output_folder };
+    for (const auto &target: targets) {
+        std::filesystem::path outf{tmp_output_folder};
         outf += "/" + std::to_string(target) + ".min";
-        if ( std::filesystem::exists( outf ) )
-            std::filesystem::remove( outf );
+        if (std::filesystem::exists(outf))
+            std::filesystem::remove(outf);
     }
-    if ( std::filesystem::is_empty( tmp_output_folder ) )
+    if (std::filesystem::is_empty(tmp_output_folder))
         std::filesystem::remove(tmp_output_folder);
 }
 
-size_t bin_size_in_bits(const IndexArguments & opt, const uint64_t & num_elements)
-{
+size_t bin_size_in_bits(const IndexArguments &opt, const uint64_t &num_elements) {
     assert(opt.num_hash > 0);
     assert(opt.max_fpr > 0.0);
     assert(opt.max_fpr < 1.0);
@@ -89,15 +81,15 @@ size_t bin_size_in_bits(const IndexArguments & opt, const uint64_t & num_element
     double const denominator{std::log(1 - std::exp(std::log(opt.max_fpr) / opt.num_hash))};
     double const result{std::ceil(numerator / denominator)};
 
-    if (result > opt.bits){
-        PLOG_WARNING << "Require " << +result << " bits for max_fpr " << opt.max_fpr << " but only have " << +opt.bits << " bits available";
+    if (result > opt.bits) {
+        PLOG_WARNING << "Require " << +result << " bits for max_fpr " << opt.max_fpr << " but only have " << +opt.bits
+                     << " bits available";
         return opt.bits;
     }
     return result;
 }
 
-size_t max_num_hashes_for_fpr(const IndexArguments & opt)
-{
+size_t max_num_hashes_for_fpr(const IndexArguments &opt) {
     assert(opt.bits > 0);
     assert(opt.max_fpr > 0.0);
     assert(opt.max_fpr < 1.0);
@@ -108,30 +100,32 @@ size_t max_num_hashes_for_fpr(const IndexArguments & opt)
 
     return result;
 }
+
 //std::__tuple_element_t<std::vector<seqan3::dna5>,std::vector<seqan3::phred94>>
-std::string sequence_to_string(const __type_pack_element<0, std::vector<seqan3::dna5>, std::string, std::vector<seqan3::phred94>>& input){
+std::string sequence_to_string(
+        const __type_pack_element<0, std::vector<seqan3::dna5>, std::string, std::vector<seqan3::phred94>> &input) {
     std::vector<char> sequence{};
-    for (auto c : input)
+    for (auto c: input)
         sequence.push_back(seqan3::to_char(c));
     std::string str(sequence.begin(), sequence.end());
     return str;
 }
 
-float get_compression_ratio(const std::string& sequence){
-    const char * pointer = sequence.data();
+float get_compression_ratio(const std::string &sequence) {
+    const char *pointer = sequence.data();
     std::size_t initial_size = sequence.size();
 
     std::string compressed_data = gzip::compress(pointer, initial_size);
-    const char * compressed_pointer = compressed_data.data();
+    const char *compressed_pointer = compressed_data.data();
     std::size_t compressed_size = compressed_data.size();
 
     assert(compressed_size != 0);
-    return static_cast<double>(compressed_size)/static_cast<double>(initial_size);
+    return static_cast<double>(compressed_size) / static_cast<double>(initial_size);
 }
 
-std::string get_extension(const std::filesystem::path read_file){
+std::string get_extension(const std::filesystem::path read_file) {
     auto ext = read_file.extension().string();
-    if (ext == ".gz"){
+    if (ext == ".gz") {
         std::filesystem::path short_read_file = read_file.stem();
         ext = short_read_file.extension().string();
     }