From 6875ad44ac62309b0d29da95200b731c573eb039 Mon Sep 17 00:00:00 2001
From: Kimmo Palin <kimmo.palin@gmail.com>
Date: Sat, 18 Aug 2018 18:28:55 +0300
Subject: [PATCH 1/4] Add option to store discovered adapters and use them in
 subsequent runs.

---
 porechop/porechop.py | 60 +++++++++++++++++++++++++++++++++-----------
 1 file changed, 45 insertions(+), 15 deletions(-)

diff --git a/porechop/porechop.py b/porechop/porechop.py
index 45dea2c..985d6ac 100755
--- a/porechop/porechop.py
+++ b/porechop/porechop.py
@@ -16,16 +16,19 @@
 
 import argparse
 import os
+import os.path
 import sys
 import subprocess
 import multiprocessing
 import shutil
 import re
+import json
 from multiprocessing.dummy import Pool as ThreadPool
 from collections import defaultdict
 from .misc import load_fasta_or_fastq, print_table, red, bold_underline, MyHelpFormatter, int_to_str
 from .adapters import ADAPTERS, make_full_native_barcode_adapter, make_full_rapid_barcode_adapter
 from .nanopore_read import NanoporeRead
+from .adapters import Adapter
 from .version import __version__
 
 
@@ -33,22 +36,8 @@ def main():
     args = get_arguments()
     reads, check_reads, read_type = load_reads(args.input, args.verbosity, args.print_dest,
                                                args.check_reads)
+    matching_sets,forward_or_reverse_barcodes = get_matching_sets(args,check_reads)
 
-    matching_sets = find_matching_adapter_sets(check_reads, args.verbosity, args.end_size,
-                                               args.scoring_scheme_vals, args.print_dest,
-                                               args.adapter_threshold, args.threads)
-    matching_sets = exclude_end_adapters_for_rapid(matching_sets)
-    matching_sets = fix_up_1d2_sets(matching_sets)
-    display_adapter_set_results(matching_sets, args.verbosity, args.print_dest)
-    matching_sets = add_full_barcode_adapter_sets(matching_sets)
-
-    if args.barcode_dir:
-        forward_or_reverse_barcodes = choose_barcoding_kit(matching_sets, args.verbosity,
-                                                           args.print_dest)
-    else:
-        forward_or_reverse_barcodes = None
-    if args.verbosity > 0:
-        print('\n', file=args.print_dest)
 
     if matching_sets:
         check_barcodes = (args.barcode_dir is not None)
@@ -133,6 +122,9 @@ def get_arguments():
     adapter_search_group = parser.add_argument_group('Adapter search settings',
                                                      'Control how the program determines which '
                                                      'adapter sets are present')
+    adapter_search_group.add_argument('--adapter_storage', type=str, default=None,
+                                      help='Name of a JSON file to store discovered adapters or '
+                                           'to load them (and skip discovery)')
     adapter_search_group.add_argument('--adapter_threshold', type=float, default=90.0,
                                       help='An adapter set has to have at least this percent '
                                            'identity to be labelled as present and trimmed off '
@@ -219,6 +211,44 @@ def get_arguments():
     return args
 
 
+def get_matching_sets(args,check_reads):
+    if args.adapter_storage is not None and os.path.exists(args.adapter_storage):
+        adapter_dict = json.load(open(args.adapter_storage))
+        assert adapter_dict["__version__"] == __version__, "Can only use adapter storage for version {}".format(__version__)
+        forward_or_reverse_barcodes = adapter_dict["forward_or_reverse_barcodes"]
+        matching_sets = [Adapter(name,**seqs) for name,seqs in adapter_dict["matching_sets"].items() ]
+
+    else:
+        matching_sets = find_matching_adapter_sets(check_reads, args.verbosity, args.end_size,
+                                                    args.scoring_scheme_vals, args.print_dest,
+                                                    args.adapter_threshold, args.threads)
+        matching_sets = exclude_end_adapters_for_rapid(matching_sets)
+        matching_sets = fix_up_1d2_sets(matching_sets)
+        display_adapter_set_results(matching_sets, args.verbosity, args.print_dest)
+        matching_sets = add_full_barcode_adapter_sets(matching_sets)
+
+        if args.barcode_dir:
+            forward_or_reverse_barcodes = choose_barcoding_kit(matching_sets, args.verbosity,
+                                                                args.print_dest)
+        else:
+            forward_or_reverse_barcodes = None
+        if args.verbosity > 0:
+            print('\n', file=args.print_dest)
+
+        if args.adapter_storage is not None:
+            adapter_dict={"__version__":__version__,
+                "forward_or_reverse_barcodes":forward_or_reverse_barcodes,
+                "matching_sets": {} }
+
+            for adapter in matching_sets:
+                adapter_dict["matching_sets"][adapter.name] = {
+                                "start_sequence":adapter.start_sequence,
+                                "end_sequence":adapter.end_sequence
+                                }
+            json.dump(adapter_dict,open(args.adapter_storage,"w"))
+
+    return matching_sets,forward_or_reverse_barcodes
+
 def load_reads(input_file_or_directory, verbosity, print_dest, check_read_count):
 
     # If the input is a file, just load reads from that file. The check reads will just be the

From 480658dd64e4eef691651fd6fda91935d9b7ebba Mon Sep 17 00:00:00 2001
From: Kimmo Palin <kimmo.palin@helsinki.fi>
Date: Wed, 23 Jan 2019 09:33:56 +0000
Subject: [PATCH 2/4] Fixed Merge conflict

---
 porechop/porechop.py | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/porechop/porechop.py b/porechop/porechop.py
index a20dc56..1335ed8 100755
--- a/porechop/porechop.py
+++ b/porechop/porechop.py
@@ -39,25 +39,6 @@ def main():
                                                args.check_reads)
     matching_sets,forward_or_reverse_barcodes = get_matching_sets(args,check_reads)
 
-<<<<<<< HEAD
-=======
-    matching_sets = find_matching_adapter_sets(check_reads, args.verbosity, args.end_size,
-                                               args.scoring_scheme_vals, args.print_dest,
-                                               args.adapter_threshold, args.threads)
-    matching_sets = fix_up_1d2_sets(matching_sets)
-
-    if args.barcode_dir:
-        forward_or_reverse_barcodes = choose_barcoding_kit(matching_sets, args.verbosity,
-                                                           args.print_dest)
-    else:
-        forward_or_reverse_barcodes = None
-
-    display_adapter_set_results(matching_sets, args.verbosity, args.print_dest)
-    matching_sets = add_full_barcode_adapter_sets(matching_sets)
-
-    if args.verbosity > 0:
-        print('\n', file=args.print_dest)
->>>>>>> upstream/master
 
     if matching_sets:
         check_barcodes = (args.barcode_dir is not None)

From 483611a55356dbf4c4249ffb2ecbf6a009adc591 Mon Sep 17 00:00:00 2001
From: Kimmo Palin <kimmo.palin@helsinki.fi>
Date: Wed, 23 Jan 2019 11:09:30 +0000
Subject: [PATCH 3/4] Fixed more of the merge conflict

---
 porechop/cpp_function_wrappers.py | 2 +-
 porechop/porechop.py              | 8 +++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/porechop/cpp_function_wrappers.py b/porechop/cpp_function_wrappers.py
index ba3f993..56026c7 100644
--- a/porechop/cpp_function_wrappers.py
+++ b/porechop/cpp_function_wrappers.py
@@ -21,7 +21,7 @@
 SO_FILE = 'cpp_functions.so'
 SO_FILE_FULL = os.path.join(os.path.dirname(os.path.realpath(__file__)), SO_FILE)
 if not os.path.isfile(SO_FILE_FULL):
-    sys.exit('could not find ' + SO_FILE + ' - please reinstall')
+    sys.exit('could not find ' + SO_FILE_FULL + ' - please reinstall')
 C_LIB = CDLL(SO_FILE_FULL)
 
 C_LIB.adapterAlignment.argtypes = [c_char_p,  # Read sequence
diff --git a/porechop/porechop.py b/porechop/porechop.py
index 1335ed8..4cf6bfc 100755
--- a/porechop/porechop.py
+++ b/porechop/porechop.py
@@ -223,16 +223,18 @@ def get_matching_sets(args,check_reads):
         matching_sets = find_matching_adapter_sets(check_reads, args.verbosity, args.end_size,
                                                     args.scoring_scheme_vals, args.print_dest,
                                                     args.adapter_threshold, args.threads)
-        matching_sets = exclude_end_adapters_for_rapid(matching_sets)
+
         matching_sets = fix_up_1d2_sets(matching_sets)
-        display_adapter_set_results(matching_sets, args.verbosity, args.print_dest)
-        matching_sets = add_full_barcode_adapter_sets(matching_sets)
 
         if args.barcode_dir:
             forward_or_reverse_barcodes = choose_barcoding_kit(matching_sets, args.verbosity,
                                                                 args.print_dest)
         else:
             forward_or_reverse_barcodes = None
+
+        display_adapter_set_results(matching_sets, args.verbosity, args.print_dest)
+        matching_sets = add_full_barcode_adapter_sets(matching_sets)
+
         if args.verbosity > 0:
             print('\n', file=args.print_dest)
 

From 9583dead0b73c11dc7326ec0b991c7e380ca75ec Mon Sep 17 00:00:00 2001
From: Kimmo Palin <kimmo.palin@helsinki.fi>
Date: Wed, 23 Jan 2019 13:17:15 +0200
Subject: [PATCH 4/4] Added more of the seqann

---
 .../async_wave_execution_interface.h          |  252 +++
 .../seqan/align_parallel/dp_kernel_adaptor.h  |  343 ++++
 .../dp_parallel_execution_policies.h          |  174 ++
 .../seqan/align_parallel/dp_parallel_scout.h  |  263 +++
 .../align_parallel/dp_parallel_scout_simd.h   |  362 ++++
 .../seqan/align_parallel/dp_settings.h        |  109 ++
 .../include/seqan/align_parallel/dp_traits.h  |  120 ++
 .../align_parallel/parallel_align_interface.h |  366 ++++
 .../wavefront_alignment_executor.h            |   98 ++
 .../wavefront_alignment_result.h              |  165 ++
 .../wavefront_alignment_scheduler.h           |  347 ++++
 .../align_parallel/wavefront_alignment_task.h |  404 +++++
 ...wavefront_alignment_thread_local_storage.h |  130 ++
 .../seqan/align_parallel/wavefront_task.h     |  365 ++++
 .../align_parallel/wavefront_task_event.h     |  104 ++
 .../align_parallel/wavefront_task_executor.h  |  146 ++
 .../align_parallel/wavefront_task_queue.h     |  139 ++
 .../align_parallel/wavefront_task_scheduler.h |  218 +++
 .../align_parallel/wavefront_task_util.h      |  557 ++++++
 porechop/include/seqan/simd/simd_base.h       |  390 +++++
 .../include/seqan/simd/simd_base_seqan_impl.h |  154 ++
 .../seqan/simd/simd_base_seqan_impl_avx2.h    | 1492 +++++++++++++++++
 .../seqan/simd/simd_base_seqan_impl_avx512.h  |  284 ++++
 .../seqan/simd/simd_base_seqan_impl_sse4.2.h  | 1053 ++++++++++++
 .../seqan/simd/simd_base_seqan_interface.h    |  392 +++++
 .../seqan/simd/simd_base_umesimd_impl.h       |  655 ++++++++
 26 files changed, 9082 insertions(+)
 create mode 100644 porechop/include/seqan/align_parallel/async_wave_execution_interface.h
 create mode 100644 porechop/include/seqan/align_parallel/dp_kernel_adaptor.h
 create mode 100644 porechop/include/seqan/align_parallel/dp_parallel_execution_policies.h
 create mode 100644 porechop/include/seqan/align_parallel/dp_parallel_scout.h
 create mode 100644 porechop/include/seqan/align_parallel/dp_parallel_scout_simd.h
 create mode 100644 porechop/include/seqan/align_parallel/dp_settings.h
 create mode 100644 porechop/include/seqan/align_parallel/dp_traits.h
 create mode 100644 porechop/include/seqan/align_parallel/parallel_align_interface.h
 create mode 100644 porechop/include/seqan/align_parallel/wavefront_alignment_executor.h
 create mode 100644 porechop/include/seqan/align_parallel/wavefront_alignment_result.h
 create mode 100644 porechop/include/seqan/align_parallel/wavefront_alignment_scheduler.h
 create mode 100644 porechop/include/seqan/align_parallel/wavefront_alignment_task.h
 create mode 100644 porechop/include/seqan/align_parallel/wavefront_alignment_thread_local_storage.h
 create mode 100644 porechop/include/seqan/align_parallel/wavefront_task.h
 create mode 100644 porechop/include/seqan/align_parallel/wavefront_task_event.h
 create mode 100644 porechop/include/seqan/align_parallel/wavefront_task_executor.h
 create mode 100644 porechop/include/seqan/align_parallel/wavefront_task_queue.h
 create mode 100644 porechop/include/seqan/align_parallel/wavefront_task_scheduler.h
 create mode 100644 porechop/include/seqan/align_parallel/wavefront_task_util.h
 create mode 100644 porechop/include/seqan/simd/simd_base.h
 create mode 100644 porechop/include/seqan/simd/simd_base_seqan_impl.h
 create mode 100644 porechop/include/seqan/simd/simd_base_seqan_impl_avx2.h
 create mode 100644 porechop/include/seqan/simd/simd_base_seqan_impl_avx512.h
 create mode 100644 porechop/include/seqan/simd/simd_base_seqan_impl_sse4.2.h
 create mode 100644 porechop/include/seqan/simd/simd_base_seqan_interface.h
 create mode 100644 porechop/include/seqan/simd/simd_base_umesimd_impl.h

diff --git a/porechop/include/seqan/align_parallel/async_wave_execution_interface.h b/porechop/include/seqan/align_parallel/async_wave_execution_interface.h
new file mode 100644
index 0000000..3ebb98a
--- /dev/null
+++ b/porechop/include/seqan/align_parallel/async_wave_execution_interface.h
@@ -0,0 +1,252 @@
+// ==========================================================================
+//                 SeqAn - The Library for Sequence Analysis
+// ==========================================================================
+// Copyright (c) 2006-2018, Knut Reinert, FU Berlin
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above copyright
+//       notice, this list of conditions and the following disclaimer in the
+//       documentation and/or other materials provided with the distribution.
+//     * Neither the name of Knut Reinert or the FU Berlin nor the names of
+//       its contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+// DAMAGE.
+//
+// ==========================================================================
+// Author: Rene Rahn <rene.rahn@fu-berlin.de>
+// ==========================================================================
+
+#ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_ASYNC_WAVE_EXECUTION_INTERFACE_H_
+#define INCLUDE_SEQAN_ALIGN_PARALLEL_ASYNC_WAVE_EXECUTION_INTERFACE_H_
+
+namespace seqan
+{
+namespace impl
+{
+
+// ============================================================================
+// Forwards
+// ============================================================================
+
+// ============================================================================
+// Tags, Classes, Enums
+// ============================================================================
+
+/*
+ * @class AsyncWaveAlignExecutor
+ * @brief Executor of the wave-front alignment mode with no SIMD vectorization.
+ * @headerfile <seqan/align_parallel.h>
+ *
+ * Manges shared data for the wave-front execution before executing the alignments.
+ */
+template <typename TSeqH, typename TSeqV, typename TSettings>
+class AsyncWaveAlignExecutor
+{
+public:
+
+    using TAlignmentTask = WavefrontAlignmentTask<TSeqH, TSeqV, TSettings>;
+    using TThreadLocal   = typename WavefrontAlignmentTaskConfig<TSettings>::TThreadLocal;
+    using TStorage       = EnumerableThreadLocal<TThreadLocal, CountingThreadLocalManager>;
+    using TExecutor      = WavefrontAlignmentExecutor<WavefrontTaskScheduler, TStorage>;
+
+    TSettings                                   _settings;
+    // Initialize the alignment scheduler.
+    WavefrontAlignmentScheduler                 _alignScheduler;
+
+    TStorage                                    _threadLocalStorage{};
+    TExecutor                                   _executor{};
+    unsigned                                    _alignCounter{0};
+    unsigned                                    _blockSize{};
+
+    template <typename TSpec>
+    AsyncWaveAlignExecutor(TSettings settings,
+                           ExecutionPolicy<WavefrontAlignment<TSpec>, Serial> const & execPolicy) :
+        _settings(std::move(settings)),
+        _alignScheduler(parallelAlignments(execPolicy), numThreads(execPolicy)),
+        _threadLocalStorage(TThreadLocal{parallelAlignments(execPolicy)}),
+        _blockSize(blockSize(execPolicy))
+    {
+        _executor.ptrTaskScheduler = &taskScheduler(_alignScheduler);
+        _executor.ptrThreadLocal   = &_threadLocalStorage;
+        setCount(storageManager(_threadLocalStorage), numThreads(execPolicy));
+    }
+};
+
+/*
+ * @fn AsyncWaveAlignExecutor#submit
+ * @brief Submits a new alignment job asynchronosly.
+ */
+template <typename ...TArgs,
+          typename TSeqH,
+          typename TSeqV,
+          typename TCallable>
+inline void
+submit(AsyncWaveAlignExecutor<TArgs...> & me,
+       TSeqH const & seqH,
+       TSeqV const & seqV,
+       TCallable && callback)
+{
+    using TAlignTask = typename AsyncWaveAlignExecutor<TArgs...>::TAlignmentTask;
+
+    std::function<void(uint16_t)> f =
+        [&, func = TAlignTask{me._alignCounter++, seqH, seqV, me._settings, me._blockSize}](uint16_t id) mutable
+        {
+            func(id, me._executor, std::forward<TCallable>(callback));
+        };
+    scheduleTask(me._alignScheduler, f);
+}
+
+/*
+ * @fn AsyncWaveAlignExecutor#wait
+ * @brief Explicit barrier to wait for all submitted jobs to be finished.
+ */
+template <typename ...TArgs>
+inline void
+wait(AsyncWaveAlignExecutor<TArgs...> & me)
+{
+    notify(me._alignScheduler);
+    wait(me._alignScheduler);
+}
+
+/*
+ * @class AsyncWaveAlignExecutorSimd
+ * @brief Executor of the wave-front alignment mode with SIMD vectorization.
+ * @headerfile <seqan/align_parallel.h>
+ *
+ * Manges shared data for the wave-front execution before executing the alignments.
+ */
+#ifdef SEQAN_SIMD_ENABLED
+template <typename TSeqH, typename TSeqV, typename TSettings, typename TWaveSpec>
+class AsyncWaveAlignExecutorSimd
+{
+public:
+
+    // Translate dp settings into simd settings.
+    using TSimdSettings = SimdDPSettings<TSettings, TWaveSpec>;
+
+    using TAlignmentTask = WavefrontAlignmentTask<TSeqH, TSeqV, TSimdSettings,
+                                                  WavefrontAlignmentSimdTaskConfig<TSimdSettings>>;
+    using TWavefrontTask = WavefrontTask<typename TAlignmentTask::TTaskContext>;
+    using TSimdTaskQueue = WavefrontTaskQueue<TWavefrontTask, LENGTH<typename TSimdSettings::TScoreValueSimd>::VALUE>;
+
+    using TThreadLocal  = typename WavefrontAlignmentSimdTaskConfig<TSimdSettings>::TThreadLocal;
+    using TStorage      = EnumerableThreadLocal<TThreadLocal, CountingThreadLocalManager>;
+    using TExecutor     = WavefrontAlignmentExecutor<WavefrontTaskScheduler, TStorage>;
+
+
+    TSimdSettings                               _settings;
+    // Initialize the alignment scheduler.
+    WavefrontAlignmentScheduler                 _alignScheduler;
+
+    TStorage                                    _threadLocalStorage;
+    TExecutor                                   _executor{};
+    TSimdTaskQueue                              _simdTaskQueue{};
+    unsigned                                    _alignCounter{0};
+    unsigned                                    _blockSize{};
+
+    template <typename TSpec>
+    AsyncWaveAlignExecutorSimd(TSettings const & settings,
+                               ExecutionPolicy<WavefrontAlignment<TSpec>, Vectorial> const & execPolicy) :
+        _settings(settings.scoringScheme),
+        _alignScheduler(parallelAlignments(execPolicy), numThreads(execPolicy)),
+        _threadLocalStorage(TThreadLocal{parallelAlignments(execPolicy)}),
+        _blockSize(blockSize(execPolicy))
+    {
+        _executor.ptrTaskScheduler = &taskScheduler(_alignScheduler);
+        _executor.ptrThreadLocal   = &_threadLocalStorage;
+        setCount(storageManager(_threadLocalStorage), numThreads(execPolicy));
+    }
+};
+
+/*
+ * @fn AsyncWaveAlignExecutorSimd#submit
+ * @brief Submits a new alignment job asynchronosly.
+ */
+template <typename ...TArgs,
+          typename TSeqH,
+          typename TSeqV,
+          typename TCallable>
+inline void
+submit(AsyncWaveAlignExecutorSimd<TArgs...> & me,
+       TSeqH const & seqH,
+       TSeqV const & seqV,
+       TCallable && callback)
+{
+    using TAlignTask = typename AsyncWaveAlignExecutorSimd<TArgs...>::TAlignmentTask;
+
+    // Continuator for calling the alignment instance functor.
+    std::function<void(uint16_t)> f =
+        [&, func = TAlignTask{me._alignCounter++, seqH, seqV, me._settings, me._blockSize}](uint16_t id) mutable
+        {
+            func(id, me._executor, me._simdTaskQueue, std::forward<TCallable>(callback));
+        };
+    scheduleTask(me._alignScheduler, f);
+}
+
+/*
+ * @fn AsyncWaveAlignExecutorSimd#wait
+ * @brief Explicit barrier to wait for all submitted jobs to be finished.
+ */
+template <typename ...TArgs>
+inline void
+wait(AsyncWaveAlignExecutorSimd<TArgs...> & me)
+{
+    notify(me._alignScheduler);
+    wait2(me._alignScheduler, me._simdTaskQueue);
+}
+#endif // SEQAN_SIMD_ENABLED
+
+/*
+ * @fn alignExecBatch
+ * @brief Global interface for scheduling and running all alignment jobs with wave-front model.
+ */
+template <typename TSpec, typename TSimdSpec,
+          typename TSetH,
+          typename TSetV,
+          typename TSettings,
+          typename TCallable>
+inline void
+alignExecBatch(ExecutionPolicy<WavefrontAlignment<TSpec>, TSimdSpec> const & execPolicy,
+               TSetH const & setH,
+               TSetV const & setV,
+               TSettings const & settings,
+               TCallable && callback)
+{
+    using TSeqH = typename Value<TSetH const>::Type;
+    using TSeqV = typename Value<TSetV const>::Type;
+
+#ifdef SEQAN_SIMD_ENABLED
+    using TExecutor = std::conditional_t<std::is_same<TSimdSpec, Vectorial>::value,
+                                         AsyncWaveAlignExecutorSimd<TSeqH, TSeqV, TSettings, TSpec>,
+                                         AsyncWaveAlignExecutor<TSeqH, TSeqV, TSettings>>;
+#else
+    using TExecutor = AsyncWaveAlignExecutor<TSeqH, TSeqV, TSettings>;
+#endif
+    TExecutor executor(settings, execPolicy);
+
+    for (size_t i = 0u; i < length(setH); ++i)
+    {
+        submit(executor, setH[i], setV[i], std::forward<TCallable>(callback));
+    }
+    wait(executor);
+}
+
+}  // namespace impl
+}  // namespace seqan
+#endif  // INCLUDE_SEQAN_ALIGN_PARALLEL_ASYNC_WAVE_EXECUTION_INTERFACE_H_
diff --git a/porechop/include/seqan/align_parallel/dp_kernel_adaptor.h b/porechop/include/seqan/align_parallel/dp_kernel_adaptor.h
new file mode 100644
index 0000000..e4a3eb8
--- /dev/null
+++ b/porechop/include/seqan/align_parallel/dp_kernel_adaptor.h
@@ -0,0 +1,343 @@
+// ==========================================================================
+//                 SeqAn - The Library for Sequence Analysis
+// ==========================================================================
+// Copyright (c) 2006-2018, Knut Reinert, FU Berlin
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above copyright
+//       notice, this list of conditions and the following disclaimer in the
+//       documentation and/or other materials provided with the distribution.
+//     * Neither the name of Knut Reinert or the FU Berlin nor the names of
+//       its contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+// DAMAGE.
+//
+// ==========================================================================
+// Author: Rene Rahn <rene.rahn@fu-berlin.de>
+// ==========================================================================
+
+#ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_DP_KERNEL_ADAPTOR_H_
+#define INCLUDE_SEQAN_ALIGN_PARALLEL_DP_KERNEL_ADAPTOR_H_
+
+namespace seqan
+{
+
+// ============================================================================
+// Forwards
+// ============================================================================
+
+// ============================================================================
+// Tags, Classes, Enums
+// ============================================================================
+
+// ============================================================================
+// Metafunctions
+// ============================================================================
+
+template <typename TCond1, typename TCond2>
+struct CorrectLastColumn_ : False
+{};
+
+template <>
+struct CorrectLastColumn_<True, True> : True
+{};
+
+template <typename TCond1, typename TCond2>
+struct CorrectLastRow_ : False
+{};
+
+template <>
+struct CorrectLastRow_<True, True> : True
+{};
+
+// ============================================================================
+// Functions
+// ============================================================================
+
+// ----------------------------------------------------------------------------
+// Function _computeCell(); InitialCol;
+// ----------------------------------------------------------------------------
+
+// The _computeCell function is the basic interface that is called to comute
+// the score for each cell and to store the corresponding traceback.
+// The MetaColumnDescriptor and the CellDescriptor describe which cell in the dp matrix
+// is computed. We use this information to overload the functions in order
+// to initialize from the passed buffer and to store the last row/column in the buffer.
+
+// Vertical initialization values are copied from buffer.
+template <typename TDPScout,
+          typename TTraceMatrixNavigator,
+          typename TDPCell,
+          typename TSequenceHValue,
+          typename TSequenceVValue,
+          typename TScoringScheme,
+          typename TCellDescriptor,
+          typename TAlgo, typename TGapCosts, typename TTraceConfig>
+inline void
+_computeCell(TDPScout & scout,
+             TTraceMatrixNavigator & traceMatrixNavigator,
+             TDPCell & current,
+             TDPCell & /*cacheDiag*/,
+             TDPCell const & /*cacheHori*/,
+             TDPCell & /*cacheVert*/,
+             TSequenceHValue const & /*seqHVal*/,
+             TSequenceVValue const & /*seqVVal*/,
+             TScoringScheme const & /*scoringScheme*/,
+             MetaColumnDescriptor<DPInitialColumn, FullColumn> const &,
+             TCellDescriptor const &,   // One of FirstCell, InnerCell or LastCell.
+             DPProfile_<TAlgo, TGapCosts, TTraceConfig, Parallel> const &)
+{
+    typedef DPProfile_<TAlgo, TGapCosts, TTraceConfig, Parallel>                            TDPProfile;
+    typedef DPMetaColumn_<TDPProfile, MetaColumnDescriptor<DPInitialColumn, FullColumn> >   TMetaColumn;
+
+    current = (*scout.state.ptrVerBuffer)[scout.verticalPos].i1;
+    assignValue(traceMatrixNavigator, (*scout.state.ptrVerBuffer)[scout.verticalPos].i2);
+
+    if (TrackingEnabled_<TMetaColumn, TCellDescriptor>::VALUE)
+    {
+        _scoutBestScore(scout, current, traceMatrixNavigator, False(), False());
+    }
+}
+
+// ----------------------------------------------------------------------------
+// Function _computeCell(); InnerCol; FirstCell
+// ----------------------------------------------------------------------------
+
+// Horizontal initialization values are copied from buffer for all first cells.
+template <typename TDPScout,
+          typename TTraceMatrixNavigator,
+          typename TDPCell,
+          typename TSequenceHValue,
+          typename TSequenceVValue,
+          typename TScoringScheme,
+          typename TAlgo, typename TGapCosts, typename TTraceConfig>
+inline void
+_computeCell(TDPScout & scout,
+             TTraceMatrixNavigator & traceMatrixNavigator,
+             TDPCell & current,
+             TDPCell & cacheDiag,
+             TDPCell const & cacheHori,
+             TDPCell & cacheVert,
+             TSequenceHValue const & /*seqHVal*/,
+             TSequenceVValue const & /*seqVVal*/,
+             TScoringScheme const & /*scoringScheme*/,
+             MetaColumnDescriptor<DPInnerColumn, FullColumn> const &,
+             FirstCell const &,   // One of FirstCell, InnerCell or LastCell.
+             DPProfile_<TAlgo, TGapCosts, TTraceConfig, Parallel> const &)
+{
+    _scoreOfCell(cacheDiag) = _scoreOfCell(cacheHori);
+    current = (*scout.state.ptrHorBuffer)[scout.horizontalPos - 1].i1;
+    cacheVert = current;
+    assignValue(traceMatrixNavigator, (*scout.state.ptrHorBuffer)[scout.horizontalPos - 1].i2);
+}
+
+// ----------------------------------------------------------------------------
+// Function _computeCell(); InnerCol; LastCell
+// ----------------------------------------------------------------------------
+
+// Values of last call are copied into the horizontal buffer for initializing next tile below.
+template <typename TDPScout,
+          typename TTraceMatrixNavigator,
+          typename TDPCell,
+          typename TSequenceHValue,
+          typename TSequenceVValue,
+          typename TScoringScheme,
+          typename TAlgo, typename TGapCosts, typename TTraceConfig>
+inline void
+_computeCell(TDPScout & scout,
+             TTraceMatrixNavigator & traceMatrixNavigator,
+             TDPCell & current,
+             TDPCell & cacheDiag,
+             TDPCell const & cacheHori,
+             TDPCell & cacheVert,
+             TSequenceHValue const & seqHVal,
+             TSequenceVValue const & seqVVal,
+             TScoringScheme const & scoringScheme,
+             MetaColumnDescriptor<DPInnerColumn, FullColumn> const &,
+             LastCell const & /*cellDescriptor*/,
+             DPProfile_<TAlgo, TGapCosts, TTraceConfig, Parallel> const &)
+{
+    typedef DPProfile_<TAlgo, TGapCosts, TTraceConfig, Parallel>                            TDPProfile;
+    typedef DPMetaColumn_<TDPProfile, MetaColumnDescriptor<DPInnerColumn, FullColumn> >     TMetaColumn;
+
+    assignValue(traceMatrixNavigator,
+                _computeScore(current, cacheDiag, cacheHori, cacheVert, seqHVal, seqVVal,
+                              scoringScheme, typename RecursionDirection_<TMetaColumn, LastCell>::Type(),
+                              TDPProfile()));
+    // Copy values into horizontal buffer for the tile below this tile in vertical direction.
+    // TODO(rrahn): We need to do this only for affine gaps?
+    _setVerticalScoreOfCell(current, _verticalScoreOfCell(cacheVert));
+    (*scout.state.ptrHorBuffer)[scout.horizontalPos - 1].i1 = current;
+    if (IsTracebackEnabled_<TTraceConfig>::VALUE)
+    {
+        (*scout.state.ptrHorBuffer)[scout.horizontalPos - 1].i2 = value(traceMatrixNavigator);
+    }
+
+    if (TrackingEnabled_<TMetaColumn, LastCell>::VALUE)
+    {
+        _scoutBestScore(scout, current, traceMatrixNavigator, False(), True());
+    }
+}
+
+
+// ----------------------------------------------------------------------------
+// Function _computeCell(); FinalCol; FirstCell
+// ----------------------------------------------------------------------------
+
+// Horizontal initialization values are copied from buffer for all first cells.
+// Vertical buffer is filled with value.
+template <typename TDPScout,
+          typename TTraceMatrixNavigator,
+          typename TDPCell,
+          typename TSequenceHValue,
+          typename TSequenceVValue,
+          typename TScoringScheme,
+          typename TAlgo, typename TGapCosts, typename TTraceConfig>
+inline void
+_computeCell(TDPScout & scout,
+             TTraceMatrixNavigator & traceMatrixNavigator,
+             TDPCell & current,
+             TDPCell & cacheDiag,
+             TDPCell const & cacheHori,
+             TDPCell & cacheVert,
+             TSequenceHValue const & /*seqHVal*/,
+             TSequenceVValue const & /*seqVVal*/,
+             TScoringScheme const & /*scoringScheme*/,
+             MetaColumnDescriptor<DPFinalColumn, FullColumn> const &,
+             FirstCell const &,   // One of FirstCell, InnerCell or LastCell.
+             DPProfile_<TAlgo, TGapCosts, TTraceConfig, Parallel> const &)
+{
+    typedef DPProfile_<TAlgo, TGapCosts, TTraceConfig, Parallel>                            TDPProfile;
+    typedef DPMetaColumn_<TDPProfile, MetaColumnDescriptor<DPFinalColumn, FullColumn> >     TMetaColumn;
+
+    // cache previous diagonal.
+    _scoreOfCell(cacheDiag) = _scoreOfCell(cacheHori);
+    current =
+        front(*scout.state.ptrVerBuffer).i1 = (*scout.state.ptrHorBuffer)[scout.horizontalPos - 1].i1;  // Copy horizontal buffer value in active cell and in
+    assignValue(traceMatrixNavigator, (*scout.state.ptrHorBuffer)[scout.horizontalPos - 1].i2);
+    cacheVert = current;
+    if (IsTracebackEnabled_<TTraceConfig>::VALUE)
+    {
+        front(*scout.state.ptrVerBuffer).i2 = value(traceMatrixNavigator);   // Store trace value in vertical buffer.
+    }
+
+    if (TrackingEnabled_<TMetaColumn, FirstCell>::VALUE)
+    {
+        _scoutBestScore(scout, current, traceMatrixNavigator, True(), False());
+    }
+}
+
+// ----------------------------------------------------------------------------
+// Function _computeCell(); FinalCol, InnerCell;
+// ----------------------------------------------------------------------------
+
+// Stores computed values in vertical buffer for initializing next tile right of the current.
+template <typename TDPScout,
+          typename TTraceMatrixNavigator,
+          typename TDPCell,
+          typename TSequenceHValue,
+          typename TSequenceVValue,
+          typename TScoringScheme,
+          typename TAlgo, typename TGapCosts, typename TTraceConfig>
+inline void
+_computeCell(TDPScout & scout,
+             TTraceMatrixNavigator & traceMatrixNavigator,
+             TDPCell & current,
+             TDPCell & cacheDiag,
+             TDPCell const & cacheHori,
+             TDPCell & cacheVert,
+             TSequenceHValue const & seqHVal,
+             TSequenceVValue const & seqVVal,
+             TScoringScheme const & scoringScheme,
+             MetaColumnDescriptor<DPFinalColumn, FullColumn> const &,
+             InnerCell const &,
+             DPProfile_<TAlgo, TGapCosts, TTraceConfig, Parallel> const &)
+{
+    typedef DPProfile_<TAlgo, TGapCosts, TTraceConfig, Parallel>                            TDPProfile;
+    typedef DPMetaColumn_<TDPProfile, MetaColumnDescriptor<DPFinalColumn, FullColumn> >     TMetaColumn;
+
+    assignValue(traceMatrixNavigator,
+                _computeScore(current, cacheDiag, cacheHori, cacheVert, seqHVal, seqVVal,
+                              scoringScheme, typename RecursionDirection_<TMetaColumn, InnerCell>::Type(),
+                              TDPProfile()));
+    // Store values in vertical buffer.
+    _setVerticalScoreOfCell(current, _verticalScoreOfCell(cacheVert));
+    (*scout.state.ptrVerBuffer)[scout.verticalPos].i1 = current;
+    if (IsTracebackEnabled_<TTraceConfig>::VALUE)
+    {
+        (*scout.state.ptrVerBuffer)[scout.verticalPos].i2 = value(traceMatrixNavigator);
+    }
+
+    if (TrackingEnabled_<TMetaColumn, InnerCell>::VALUE)
+    {
+        _scoutBestScore(scout, current, traceMatrixNavigator, True(), False());
+    }
+}
+
+// ----------------------------------------------------------------------------
+// Function _computeCell(); FinalCol, LastCell;
+// ----------------------------------------------------------------------------
+
+// Stores computed values in vertical buffer for initializing next tile right of the current.
+// Stores computed values in horizontal buffer for initializing next tile below.
+template <typename TDPScout,
+          typename TTraceMatrixNavigator,
+          typename TDPCell,
+          typename TSequenceHValue,
+          typename TSequenceVValue,
+          typename TScoringScheme,
+          typename TAlgo, typename TGapCosts, typename TTraceConfig>
+inline void
+_computeCell(TDPScout & scout,
+             TTraceMatrixNavigator & traceMatrixNavigator,
+             TDPCell & current,
+             TDPCell & cacheDiag,
+             TDPCell const & cacheHori,
+             TDPCell & cacheVert,
+             TSequenceHValue const & seqHVal,
+             TSequenceVValue const & seqVVal,
+             TScoringScheme const & scoringScheme,
+             MetaColumnDescriptor<DPFinalColumn, FullColumn> const &,
+             LastCell const &,
+             DPProfile_<TAlgo, TGapCosts, TTraceConfig, Parallel> const &)
+{
+    typedef DPProfile_<TAlgo, TGapCosts, TTraceConfig, Parallel>                            TDPProfile;
+    typedef DPMetaColumn_<TDPProfile, MetaColumnDescriptor<DPFinalColumn, FullColumn> >     TMetaColumn;
+
+    assignValue(traceMatrixNavigator,
+                _computeScore(current, cacheDiag, cacheHori, cacheVert, seqHVal, seqVVal,
+                              scoringScheme, typename RecursionDirection_<TMetaColumn, LastCell>::Type(),
+                              TDPProfile()));
+    // Store values in vertical and horizontal buffer
+    _setVerticalScoreOfCell(current, _verticalScoreOfCell(cacheVert));
+    (*scout.state.ptrHorBuffer)[scout.horizontalPos - 1].i1 = (*scout.state.ptrVerBuffer)[scout.verticalPos].i1 = current;
+    if (IsTracebackEnabled_<TTraceConfig>::VALUE)
+    {
+        (*scout.state.ptrHorBuffer)[scout.horizontalPos - 1].i2 =
+            (*scout.state.ptrVerBuffer)[scout.verticalPos].i2 = value(traceMatrixNavigator);
+    }
+    if (TrackingEnabled_<TMetaColumn, LastCell>::VALUE)
+    {
+        _scoutBestScore(scout, current, traceMatrixNavigator, True(), True());
+    }
+}
+
+}  // namespace seqan
+
+#endif  // #ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_DP_KERNEL_ADAPTOR_H_
diff --git a/porechop/include/seqan/align_parallel/dp_parallel_execution_policies.h b/porechop/include/seqan/align_parallel/dp_parallel_execution_policies.h
new file mode 100644
index 0000000..a2885ec
--- /dev/null
+++ b/porechop/include/seqan/align_parallel/dp_parallel_execution_policies.h
@@ -0,0 +1,174 @@
+// ==========================================================================
+//                 SeqAn - The Library for Sequence Analysis
+// ==========================================================================
+// Copyright (c) 2006-2018, Knut Reinert, FU Berlin
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above copyright
+//       notice, this list of conditions and the following disclaimer in the
+//       documentation and/or other materials provided with the distribution.
+//     * Neither the name of Knut Reinert or the FU Berlin nor the names of
+//       its contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+// DAMAGE.
+//
+// ==========================================================================
+// Author: Rene Rahn <rene.rahn@fu-berlin.de>
+// ==========================================================================
+// Policies used for parallel alignment computation.
+// ==========================================================================
+
+#ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_DP_PARALLEL_EXECUTION_PLOCIES_H_
+#define INCLUDE_SEQAN_ALIGN_PARALLEL_DP_PARALLEL_EXECUTION_PLOCIES_H_
+
+namespace seqan
+{
+// ============================================================================
+// Forwards
+// ============================================================================
+
+// ============================================================================
+// Tags, Classes, Enums
+// ============================================================================
+
+/*!
+ * @tag BlockOffsetOptimization
+ * @brief Optimization for vectorized wave-front execution model.
+ * @headerfile <seqan/align_parallel.h>
+ * @see WavefrontExecutionPolicy
+ */
+struct BlockOffsetOptimization_;
+using BlockOffsetOptimization = Tag<BlockOffsetOptimization_>;
+
+/*!
+ * @class WavefrontExecutionPolicy
+ * @headerfile <seqan/align_parallel.h>
+ * @extends ExecutionPolicy
+ * @brief Policy to select runtime execution mode for algorithms.
+ * @signature template<typename TWaveSpec, typename TVectorizationMode>
+ *            struct ExecutionPolicy<WavefrontAlignment<TWaveSpec>, TVectorizationMode>;
+ * @tparam TWaveSpec Type specializing the wave-front threading model.
+ *         Can be <tt>void</tt> (default) or @link BlockOffsetOptimization @endlink.
+ * @tparam TVectorizationMode Type specifying the vectorization model.
+ *         Can be @link ParallelismTags#Vectorial @endlink or @link ParallelismTags#Serial @endlink (default).
+ *
+ * Special execution policy for computing sequence alignments with wave-front parallelization strategy.
+ * In the wave-front execution the DP matrix is partitioned into blocks which can be executed
+ * in parallel along the minor diagonal of the DP matrix.
+ * The execution policy can be further specialized if used in combination with the @link ParallelismTags#Vectorial @endlink
+ * execution mode (see @link WavefrontExecutionPolicy @endlink).
+ *
+ * @section Vectorization
+ *
+ * In the vectorization mode, the blocks are gathered into SIMD registers.
+ * The @link BlockOffsetOptimization @endlink can be used to always ensure that <tt>sizeof(SIMD) / 2</tt> many blocks
+ * can be packed into one SIMD register.
+ * This requires, that the available instruction set supports 16 bit packed SIMD operations (e.g. SSE4, AVX2)
+ * and the score value type (@link Score @endlink) is bigger then 16 bit.
+ * In the default mode, the optimization is disabled and the number of packed alignment blocks is solely determined by
+ * the score value type passed to the algorithm as a parameter (e.g. see @link globalAlignmentScore @endlink).
+ */
+ template <typename TSpec = void>
+ struct WavefrontAlignment;
+
+template <typename TSpec, typename TVectorizationSpec>
+struct ExecutionPolicy<WavefrontAlignment<TSpec>, TVectorizationSpec> :
+    public ExecutionPolicy<Parallel, TVectorizationSpec>
+{
+    /*!
+     *@var size_t WavefrontExecutionPolicy::blockSize
+     * @brief The size of the blocks to use. Defaults to 100.
+     */
+    size_t blockSize{100};
+    /*!
+     * @var size_t WavefrontExecutionPolicy::parallelAlignments
+     * @brief Number of alignments scheduled concurrently. Defaults to <tt>std::thread::hardware_concurrency()</tt>.
+     */
+    size_t parallelAlignments{std::thread::hardware_concurrency()};
+};
+
+// ============================================================================
+// Metafunctions
+// ============================================================================
+
+// ============================================================================
+// Functions
+// ============================================================================
+
+/*!
+ * @fn WavefrontExecutionPolicy#blockSize
+ * @brief Getter for the current block size.
+ * @signature size_t blockSize(exec);
+ * @param[in] exec The wave-front execution policy to query.
+ */
+template <typename TSpec, typename TVectorizationSpec>
+inline auto
+blockSize(ExecutionPolicy<WavefrontAlignment<TSpec>, TVectorizationSpec> const & p)
+{
+    return p.blockSize;
+}
+
+/*!
+ * @fn WavefrontExecutionPolicy#setBlockSize
+ * @brief Setter for the current block size.
+ * @signature void setBlockSize(exec, bs);
+ * @param[in,out] exec The wave-front execution policy to update.
+ * @param[in] bs The new block size to set. Must be a positive integral number greater or equal than 5.
+ */
+template <typename TSpec, typename TVectorizationSpec>
+inline void
+setBlockSize(ExecutionPolicy<WavefrontAlignment<TSpec>, TVectorizationSpec> & p,
+             size_t const bs)
+{
+    SEQAN_ASSERT_GEQ(bs, static_cast<size_t>(5));
+    p.blockSize = bs;
+}
+
+/*!
+ * @fn WavefrontExecutionPolicy#parallelAlignments
+ * @brief Getter for the current number of alignments executed in parallel.
+ * @signature void parallelAlignments(exec);
+ * @param[in] exec The wave-front execution policy to update.
+ */
+template <typename TSpec, typename TVectorizationSpec>
+inline auto
+parallelAlignments(ExecutionPolicy<WavefrontAlignment<TSpec>, TVectorizationSpec> const & p)
+{
+    return p.parallelAlignments;
+}
+
+/*!
+ * @fn WavefrontExecutionPolicy#setParallelAlignments
+ * @brief Setter for the current number of alignments executed in parallel.
+ * @signature void setParallelAlignments(exec, pa);
+ * @param[in,out] exec The wave-front execution policy to update.
+ * @param[in] pa The number of alignments to execute in parallel. Must be a positive integral number greater than 0.
+ */
+template <typename TSpec, typename TVectorizationSpec>
+inline void
+setParallelAlignments(ExecutionPolicy<WavefrontAlignment<TSpec>, TVectorizationSpec> & p,
+                      size_t const pi)
+{
+    SEQAN_ASSERT_GT(pi, static_cast<size_t>(0));
+    p.parallelAlignments = pi;
+}
+
+}  // namespace seqan
+
+#endif  // INCLUDE_SEQAN_ALIGN_PARALLEL_DP_PARALLEL_EXECUTION_PLOCIES_H_
diff --git a/porechop/include/seqan/align_parallel/dp_parallel_scout.h b/porechop/include/seqan/align_parallel/dp_parallel_scout.h
new file mode 100644
index 0000000..58a238d
--- /dev/null
+++ b/porechop/include/seqan/align_parallel/dp_parallel_scout.h
@@ -0,0 +1,263 @@
+// ==========================================================================
+//                 SeqAn - The Library for Sequence Analysis
+// ==========================================================================
+// Copyright (c) 2006-2018, Knut Reinert, FU Berlin
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above copyright
+//       notice, this list of conditions and the following disclaimer in the
+//       documentation and/or other materials provided with the distribution.
+//     * Neither the name of Knut Reinert or the FU Berlin nor the names of
+//       its contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+// DAMAGE.
+//
+// ==========================================================================
+// Author: Rene Rahn <rene.rahn@fu-berlin.de>
+// ==========================================================================
+
+#ifndef INCLUDE_SEQAN_DP_PARALLEL_DP_PARALLEL_SCOUT_H_
+#define INCLUDE_SEQAN_DP_PARALLEL_DP_PARALLEL_SCOUT_H_
+
+namespace seqan
+{
+
+// ============================================================================
+// Forwards
+// ============================================================================
+
+// ============================================================================
+// Tags, Classes, Enums
+// ============================================================================
+
+// ----------------------------------------------------------------------------
+// Class DPTileBuffer
+// ----------------------------------------------------------------------------
+
+// The structure owning the horizontal/vertical buffer.
+template <typename TDPCellBuff, typename TBuffer = String<TDPCellBuff> >
+struct DPTileBuffer
+{
+    TBuffer horizontalBuffer;
+    TBuffer verticalBuffer;
+};
+
+// ----------------------------------------------------------------------------
+// Tag DPTiled<TBuffer>
+// ----------------------------------------------------------------------------
+
+// Tag used to subclass DPScoutState and DPScout.
+// T represents the buffer type.
+template <typename TBuffer, typename TThreadContext = Default, typename TSimdSpec = void>
+struct DPTiled;
+
+// ----------------------------------------------------------------------------
+// Class DPScoutState_; DPTiled
+// ----------------------------------------------------------------------------
+
+// The overloaded DPScoutState which simply stores the pointers to the corresponding buffer.
+template <typename TBuffer, typename TThreadContext>
+class DPScoutState_<DPTiled<TBuffer, TThreadContext, void> >
+{
+public:
+
+    using TDPCell = typename Value<typename Value<TBuffer>::Type, 1>::Type;
+
+    TBuffer* ptrHorBuffer = nullptr;
+    TBuffer* ptrVerBuffer = nullptr;
+    TThreadContext threadContext{};
+
+    DPScoutState_() = default;
+
+    DPScoutState_(TBuffer & horBuffer, TBuffer & verBuffer) :
+        ptrHorBuffer(&horBuffer),
+        ptrVerBuffer(&verBuffer)
+    {}
+
+    DPScoutState_(TBuffer & horBuffer, TBuffer & verBuffer, TThreadContext pThreadContext) :
+        ptrHorBuffer(&horBuffer),
+        ptrVerBuffer(&verBuffer),
+        threadContext(std::move(pThreadContext))
+    {}
+};
+
+// ----------------------------------------------------------------------------
+// Class DPScout_; DPTiled
+// ----------------------------------------------------------------------------
+
+// Overloaded DPScout to store the corresponding buffer for the current dp tile.
+template <typename TDPCell, typename TBuffer, typename TThreadContext>
+class DPScout_<TDPCell, DPTiled<TBuffer, TThreadContext, void> > :
+    public DPScout_<TDPCell, Default>
+{
+public:
+    using TBase = DPScout_<TDPCell, Default>;
+
+    DPScoutState_<DPTiled<TBuffer, TThreadContext, void> > state;
+
+    size_t   horizontalPos;
+    size_t   verticalPos;
+    bool     forceTracking;
+
+    DPScout_(DPScoutState_<DPTiled<TBuffer, TThreadContext, void> > state,
+             bool pForceTracking = false) :
+        TBase(),
+        state(state),
+        forceTracking(pForceTracking)
+    {}
+};
+
+// ============================================================================
+// Metafunctions
+// ============================================================================
+
+// ----------------------------------------------------------------------------
+// Metafunction ScoutSpecForSimdAlignment_
+// ----------------------------------------------------------------------------
+
+template<typename TAlignmentAlgorithm, typename TThreadContext, typename TBuffer>
+struct ScoutSpecForAlignmentAlgorithm_<TAlignmentAlgorithm, DPScoutState_<DPTiled<TBuffer, TThreadContext, void> > >
+{
+    using Type = DPTiled<TBuffer, TThreadContext, void>;
+};
+
+// ============================================================================
+// Functions
+// ============================================================================
+
+template <typename TDPCell, typename TBuffer, typename TThreadContext, typename TSpec,
+          typename TIsLastColumn,
+          typename TIsLastRow>
+inline bool
+isTrackingEnabled(DPScout_<TDPCell, DPTiled<TBuffer, TThreadContext, TSpec> > const & /*dpScout*/,
+                  TIsLastColumn const & /*unused*/,
+                  TIsLastRow const & /*unused*/)
+{
+    return false;
+}
+
+template <typename TDPCell, typename TBuffer, typename TThreadContext>
+inline bool
+isTrackingEnabled(DPScout_<TDPCell, DPTiled<TBuffer, TThreadContext, void> > const & dpScout,
+                  True const & /*unused*/,
+                  True const & /*unused*/)
+{
+    return (dpScout.forceTracking || (dpScout.state.threadContext.task._lastHBlock &&
+                                       dpScout.state.threadContext.task._lastVBlock));
+}
+
+template <typename TDPCell, typename TBuffer, typename TThreadContext>
+inline bool
+isTrackingEnabled(DPScout_<TDPCell, DPTiled<TBuffer, TThreadContext, void> > const & dpScout,
+                  True const & /*unused*/,
+                  False const & /*unused*/)
+{
+    return (dpScout.forceTracking || dpScout.state.threadContext.task._lastHBlock);
+}
+
+template <typename TDPCell, typename TBuffer, typename TThreadContext>
+inline bool
+isTrackingEnabled(DPScout_<TDPCell, DPTiled<TBuffer, TThreadContext, void> > const & dpScout,
+                  False const & /*unused*/,
+                  True const & /*unused*/)
+{
+    return (dpScout.forceTracking || dpScout.state.threadContext.task._lastVBlock);
+}
+
+// ----------------------------------------------------------------------------
+// Function _scoutBestScore()
+// ----------------------------------------------------------------------------
+
+template <typename TDPCell, typename TBuffer,
+          typename TTraceMatrixNavigator,
+          typename TIsLastColumn,
+          typename TIsLastRow>
+inline void
+_scoutBestScore(DPScout_<TDPCell, DPTiled<TBuffer, Default, void> > & dpScout,
+                TDPCell const & activeCell,
+                TTraceMatrixNavigator const & navigator,
+                TIsLastColumn const & isLastColumn,
+                TIsLastRow const & isLastRow)
+{
+    using TBaseScout = typename DPScout_<TDPCell, DPTiled<TBuffer, Default, void> >::TBase;
+    _scoutBestScore(static_cast<TBaseScout&>(dpScout), activeCell, navigator, isLastColumn, isLastRow);
+}
+
+// Tracks the new score, if it is the new maximum.
+template <typename TDPCell, typename TBuffer, typename TThreadContext, typename TTraceMatrixNavigator,
+          typename TIsLastColumn, typename TIsLastRow>
+inline void
+_scoutBestScore(DPScout_<TDPCell, DPTiled<TBuffer, TThreadContext, void> > & dpScout,
+                TDPCell const & activeCell,
+                TTraceMatrixNavigator const & navigator,
+                TIsLastColumn const & isLastColumn,
+                TIsLastRow const & isLastRow)
+{
+    using TBaseScout = typename DPScout_<TDPCell, DPTiled<TBuffer, TThreadContext, void> >::TBase;
+    if (isTrackingEnabled(dpScout, isLastColumn, isLastRow))
+        _scoutBestScore(static_cast<TBaseScout&>(dpScout), activeCell, navigator, isLastColumn, isLastRow);
+}
+
+// ----------------------------------------------------------------------------
+// Function _preInitScoutHorizontal()
+// ----------------------------------------------------------------------------
+
+template <typename TDPCell, typename TBuffer, typename TThreadContext, typename TSpec>
+inline void
+_preInitScoutHorizontal(DPScout_<TDPCell, DPTiled<TBuffer, TThreadContext, TSpec> > & scout)
+{
+    scout.horizontalPos = 0;
+}
+
+// ----------------------------------------------------------------------------
+// Function _preInitScoutVertical()
+// ----------------------------------------------------------------------------
+
+template <typename TDPCell, typename TBuffer, typename TThreadContext, typename TSpec>
+inline void
+_preInitScoutVertical(DPScout_<TDPCell, DPTiled<TBuffer, TThreadContext, TSpec> > & scout)
+{
+    scout.verticalPos = 0;
+}
+
+// ----------------------------------------------------------------------------
+// Function _incHorizontalPos()
+// ----------------------------------------------------------------------------
+
+template <typename TDPCell, typename TBuffer, typename TThreadContext, typename TSpec>
+inline void
+_incHorizontalPos(DPScout_<TDPCell, DPTiled<TBuffer, TThreadContext, TSpec> > & scout)
+{
+    ++scout.horizontalPos;
+}
+
+// ----------------------------------------------------------------------------
+// Function _incVerticalPos()
+// ----------------------------------------------------------------------------
+
+template <typename TDPCell, typename TBuffer, typename TThreadContext, typename TSpec>
+inline void
+_incVerticalPos(DPScout_<TDPCell, DPTiled<TBuffer, TThreadContext, TSpec> > & scout)
+{
+    ++scout.verticalPos;
+}
+
+}  // namespace seqan
+
+#endif  // #ifndef INCLUDE_SEQAN_DP_PARALLEL_DP_PARALLEL_SCOUT_H_
diff --git a/porechop/include/seqan/align_parallel/dp_parallel_scout_simd.h b/porechop/include/seqan/align_parallel/dp_parallel_scout_simd.h
new file mode 100644
index 0000000..4d977e1
--- /dev/null
+++ b/porechop/include/seqan/align_parallel/dp_parallel_scout_simd.h
@@ -0,0 +1,362 @@
+// ==========================================================================
+//                 SeqAn - The Library for Sequence Analysis
+// ==========================================================================
+// Copyright (c) 2006-2018, Knut Reinert, FU Berlin
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above copyright
+//       notice, this list of conditions and the following disclaimer in the
+//       documentation and/or other materials provided with the distribution.
+//     * Neither the name of Knut Reinert or the FU Berlin nor the names of
+//       its contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+// DAMAGE.
+//
+// ==========================================================================
+// Author: Rene Rahn <rene.rahn@fu-berlin.de>
+// ==========================================================================
+
+#ifndef INCLUDE_SEQAN_DP_PARALLEL_DP_PARALLEL_SCOUT_SIMD_H_
+#define INCLUDE_SEQAN_DP_PARALLEL_DP_PARALLEL_SCOUT_SIMD_H_
+
+namespace seqan
+{
+
+// ============================================================================
+// Forwards
+// ============================================================================
+
+// ============================================================================
+// Tags, Classes, Enums
+// ============================================================================
+
+// ----------------------------------------------------------------------------
+// Class DPScoutState_; DPTiled
+// ----------------------------------------------------------------------------
+
+// The overloaded DPScoutState which simply stores the pointers to the corresponding buffer.
+template <typename TBuffer, typename TThreadContext, typename TSimdSpec>
+class DPScoutState_<DPTiled<TBuffer, TThreadContext, TSimdSpec> > :
+    public DPScoutState_<DPTiled<TBuffer, TThreadContext, void> >,
+    public DPScoutState_<TSimdSpec>
+{
+public:
+
+    DPScoutState_() = default;
+
+    DPScoutState_(TBuffer & horBuffer, TBuffer & verBuffer) :
+        DPScoutState_<DPTiled<TBuffer, TThreadContext, void> >(horBuffer, verBuffer),
+        DPScoutState_<TSimdSpec>()
+    {}
+
+    DPScoutState_(TBuffer & horBuffer, TBuffer & verBuffer, TThreadContext && pThreadContext) :
+        DPScoutState_<DPTiled<TBuffer, TThreadContext, void> >(horBuffer, verBuffer, std::move(pThreadContext)),
+        DPScoutState_<TSimdSpec>()
+    {}
+};
+
+// ----------------------------------------------------------------------------
+// Class DPScout_; DPTiled
+// ----------------------------------------------------------------------------
+
+// Overloaded DPScout to store the corresponding buffer for the current dp tile.
+template <typename TDPCell, typename TBuffer, typename TThreadContext, typename TSimdSpec>
+class DPScout_<TDPCell, DPTiled<TBuffer, TThreadContext, SimdAlignmentScout<TSimdSpec> > > :
+    public DPScout_<TDPCell, SimdAlignmentScout<TSimdSpec>>
+{
+public:
+    using TBase = DPScout_<TDPCell, SimdAlignmentScout<TSimdSpec> >;
+
+    DPScoutState_<DPTiled<TBuffer, TThreadContext, TSimdSpec> > state;
+    size_t   horizontalPos;
+    size_t   verticalPos;
+    bool  forceTracking;
+
+    DPScout_(DPScoutState_<DPTiled<TBuffer, TThreadContext, TSimdSpec> > & state,
+             bool const pForceTracking) :
+        TBase(static_cast<DPScoutState_<TSimdSpec>&>(state)),
+        state(state),
+        forceTracking(pForceTracking)
+    {}
+
+    DPScout_(DPScoutState_<DPTiled<TBuffer, TThreadContext, TSimdSpec> > & state) : DPScout_(state, false)
+    {}
+};
+
+// ============================================================================
+// Metafunctions
+// ============================================================================
+
+// ----------------------------------------------------------------------------
+// Metafunction ScoutSpecForSimdAlignment_
+// ----------------------------------------------------------------------------
+
+template<typename TAlignmentAlgorithm, typename TBuffer, typename TThreadContext>
+struct ScoutSpecForAlignmentAlgorithm_<TAlignmentAlgorithm,
+                                       DPScoutState_<DPTiled<TBuffer, TThreadContext, SimdAlignEqualLength> > >
+{
+    using Type = DPTiled<TBuffer, TThreadContext, SimdAlignmentScout<SimdAlignEqualLength> >;
+};
+
+template<typename TAlignmentAlgorithm, typename TBuffer, typename TThreadContext, typename TTraits>
+struct ScoutSpecForAlignmentAlgorithm_<TAlignmentAlgorithm,
+                                       DPScoutState_<DPTiled<TBuffer,
+                                                             TThreadContext,
+                                                             SimdAlignVariableLength<TTraits> > > >
+{
+    using Type = DPTiled<TBuffer, TThreadContext, SimdAlignmentScout<SimdAlignVariableLength<TTraits> > >;
+};
+
+// ============================================================================
+// Functions
+// ============================================================================
+
+// ----------------------------------------------------------------------------
+// Function isTrackingEnabled()
+// ----------------------------------------------------------------------------
+
+template <typename TDPCell, typename TBuffer, typename TThreadContext, typename TSimdSpec>
+inline bool
+isTrackingEnabled(DPScout_<TDPCell, DPTiled<TBuffer, TThreadContext, SimdAlignmentScout<TSimdSpec> > > const & dpScout,
+                  True const & /*unused*/,
+                  True const & /*unused*/)
+{
+    // TODO(rrahn): Implement me!
+    return (dpScout.forceTracking);
+}
+
+template <typename TDPCell, typename TBuffer, typename TThreadContext, typename TSimdSpec>
+inline bool
+isTrackingEnabled(DPScout_<TDPCell, DPTiled<TBuffer, TThreadContext, SimdAlignmentScout<TSimdSpec> > > const & dpScout,
+                  True const & /*unused*/,
+                  False const & /*unused*/)
+{
+    // TODO(rrahn): Implement me!
+    return (dpScout.forceTracking);
+}
+
+template <typename TDPCell, typename TBuffer, typename TThreadContext, typename TSimdSpec>
+inline bool
+isTrackingEnabled(DPScout_<TDPCell, DPTiled<TBuffer, TThreadContext, SimdAlignmentScout<TSimdSpec> > > const & dpScout,
+                  False const & /*unused*/,
+                  True const & /*unused*/)
+{
+    // TODO(rrahn): Implement me!
+    return (dpScout.forceTracking);
+}
+
+// ----------------------------------------------------------------------------
+// Function _scoutBestScore()
+// ----------------------------------------------------------------------------
+
+template <typename TDPCell, typename TBuffer, typename TThreadContext, typename TSimdSpec,
+          typename TTraceMatrixNavigator,
+          typename TIsLastColumn,
+          typename TIsLastRow>
+inline void
+_scoutBestScore(DPScout_<TDPCell, DPTiled<TBuffer, TThreadContext, SimdAlignmentScout<TSimdSpec> > > & dpScout,
+                TDPCell const & activeCell,
+                TTraceMatrixNavigator const & navigator,
+                TIsLastColumn const & isLastColumn,
+                TIsLastRow const & isLastRow)
+{
+    using TScoutBase = typename DPScout_<TDPCell,
+                                         DPTiled<TBuffer, TThreadContext, SimdAlignmentScout<TSimdSpec>>>::TBase;
+    _scoutBestScore(static_cast<TScoutBase&>(dpScout), activeCell, navigator, isLastColumn, isLastRow);
+}
+
+// ----------------------------------------------------------------------------
+// Function maxHostCoordinate()
+// ----------------------------------------------------------------------------
+
+template <typename TDPCell, typename TBuffer, typename TThreadContext, typename TSimdSpec,
+typename TDimension>
+inline auto
+maxHostCoordinate(DPScout_<TDPCell, DPTiled<TBuffer, TThreadContext, SimdAlignmentScout<TSimdSpec> > > const & dpScout,
+                  TDimension const dimension)
+{
+    using TScoutBase = typename DPScout_<TDPCell,
+                                         DPTiled<TBuffer, TThreadContext, SimdAlignmentScout<TSimdSpec> > >::TBase;
+    return maxHostCoordinate(static_cast<TScoutBase const &>(dpScout), dimension);
+}
+
+// ----------------------------------------------------------------------------
+// Function _setSimdLane()
+// ----------------------------------------------------------------------------
+
+template <typename TDPCell, typename TBuffer, typename TThreadContext, typename TSimdSpec,
+typename TPosition>
+inline void
+_setSimdLane(DPScout_<TDPCell, DPTiled<TBuffer, TThreadContext, SimdAlignmentScout<TSimdSpec> > > & dpScout,
+             TPosition const pos)
+{
+    using TScoutBase = typename DPScout_<TDPCell, DPTiled<TBuffer, TThreadContext, SimdAlignmentScout<TSimdSpec> > >::TBase;
+    _setSimdLane(static_cast<TScoutBase&>(dpScout), pos);
+}
+
+// ----------------------------------------------------------------------------
+// Function _preInitScoutHorizontal()
+// ----------------------------------------------------------------------------
+
+template <typename TDPCell, typename TBuffer, typename TThreadContext, typename TTraits>
+inline void
+_preInitScoutHorizontal(DPScout_<TDPCell, DPTiled<TBuffer, TThreadContext, SimdAlignmentScout<SimdAlignVariableLength<TTraits> > > > & scout)
+{
+    using TScoutBase = typename DPScout_<TDPCell,
+                                         DPTiled<TBuffer,
+                                                 TThreadContext,
+                                                 SimdAlignmentScout<SimdAlignVariableLength<TTraits>>>>::TBase;
+    _preInitScoutHorizontal(static_cast<TScoutBase&>(scout));
+    scout.horizontalPos = 0;
+}
+
+// ----------------------------------------------------------------------------
+// Function _preInitScoutVertical()
+// ----------------------------------------------------------------------------
+
+template <typename TDPCell, typename TBuffer, typename TThreadContext, typename TTraits>
+inline void
+_preInitScoutVertical(DPScout_<TDPCell,
+                               DPTiled<TBuffer,
+                                       TThreadContext,
+                                       SimdAlignmentScout<SimdAlignVariableLength<TTraits>>>> & scout)
+{
+    using TScoutBase = typename DPScout_<TDPCell,
+                                         DPTiled<TBuffer,
+                                                 TThreadContext,
+                                                 SimdAlignmentScout<SimdAlignVariableLength<TTraits>>>>::TBase;
+    _preInitScoutVertical(static_cast<TScoutBase&>(scout));
+    scout.verticalPos = 0;
+}
+
+// ----------------------------------------------------------------------------
+// Function _reachedHorizontalEndPoint()
+// ----------------------------------------------------------------------------
+
+template <typename TDPCell, typename TBuffer, typename TThreadContext, typename TTraits, typename TIter>
+inline bool
+_reachedHorizontalEndPoint(DPScout_<TDPCell,
+                                    DPTiled<TBuffer,
+                                            TThreadContext,
+                                            SimdAlignmentScout<SimdAlignVariableLength<TTraits>>>> & scout,
+                           TIter const & hIt)
+{
+    using TScoutBase = typename DPScout_<TDPCell,
+                                         DPTiled<TBuffer,
+                                                 TThreadContext,
+                                                 SimdAlignmentScout<SimdAlignVariableLength<TTraits>>>>::TBase;
+    return _reachedHorizontalEndPoint(static_cast<TScoutBase&>(scout), hIt);
+}
+
+// ----------------------------------------------------------------------------
+// Function _reachedVerticalEndPoint()
+// ----------------------------------------------------------------------------
+
+template <typename TDPCell, typename TBuffer, typename TThreadContext, typename TTraits, typename TIter>
+inline bool
+_reachedVerticalEndPoint(DPScout_<TDPCell,
+                                  DPTiled<TBuffer,
+                                          TThreadContext,
+                                          SimdAlignmentScout<SimdAlignVariableLength<TTraits> > > > & scout,
+                         TIter const & vIt)
+{
+    using TScoutBase = typename DPScout_<TDPCell,
+                                         DPTiled<TBuffer,
+                                                 TThreadContext,
+                                                 SimdAlignmentScout<SimdAlignVariableLength<TTraits> > > >::TBase;
+    return _reachedVerticalEndPoint(static_cast<TScoutBase&>(scout), vIt);
+}
+
+// ----------------------------------------------------------------------------
+// Function _nextHorizontalEndPos()
+// ----------------------------------------------------------------------------
+
+template <typename TDPCell, typename TBuffer, typename TThreadContext, typename TTraits>
+inline void
+_nextHorizontalEndPos(DPScout_<TDPCell,
+                               DPTiled<TBuffer,
+                                       TThreadContext,
+                                       SimdAlignmentScout<SimdAlignVariableLength<TTraits> > > > & scout)
+{
+    using TScoutBase = typename DPScout_<TDPCell,
+                                         DPTiled<TBuffer,
+                                                 TThreadContext,
+                                                 SimdAlignmentScout<SimdAlignVariableLength<TTraits> > > >::TBase;
+    _nextHorizontalEndPos(static_cast<TScoutBase&>(scout));
+}
+
+// ----------------------------------------------------------------------------
+// Function _nextVerticalEndPos()
+// ----------------------------------------------------------------------------
+
+template <typename TDPCell, typename TBuffer, typename TThreadContext, typename TTraits>
+inline void
+_nextVerticalEndPos(DPScout_<TDPCell,
+                             DPTiled<TBuffer,
+                                     TThreadContext,
+                                     SimdAlignmentScout<SimdAlignVariableLength<TTraits> > > > & scout)
+{
+    using TScoutBase = typename DPScout_<TDPCell,
+                                         DPTiled<TBuffer,
+                                                 TThreadContext,
+                                                 SimdAlignmentScout<SimdAlignVariableLength<TTraits> > > >::TBase;
+    _nextVerticalEndPos(static_cast<TScoutBase&>(scout));
+}
+
+// ----------------------------------------------------------------------------
+// Function _incHorizontalPos()
+// ----------------------------------------------------------------------------
+
+template <typename TDPCell, typename TBuffer, typename TThreadContext, typename TTraits>
+inline void
+_incHorizontalPos(DPScout_<TDPCell,
+                           DPTiled<TBuffer,
+                                   TThreadContext,
+                                   SimdAlignmentScout<SimdAlignVariableLength<TTraits> > > > & scout)
+{
+    using TScoutBase = typename DPScout_<TDPCell,
+                                         DPTiled<TBuffer,
+                                                 TThreadContext,
+                                                 SimdAlignmentScout<SimdAlignVariableLength<TTraits> > > >::TBase;
+    _incHorizontalPos(static_cast<TScoutBase&>(scout));
+    ++scout.horizontalPos;
+}
+
+// ----------------------------------------------------------------------------
+// Function _incVerticalPos()
+// ----------------------------------------------------------------------------
+
+template <typename TDPCell, typename TBuffer, typename TThreadContext, typename TTraits>
+inline void
+_incVerticalPos(DPScout_<TDPCell,
+                         DPTiled<TBuffer,
+                                 TThreadContext,
+                                 SimdAlignmentScout<SimdAlignVariableLength<TTraits> > > > & scout)
+{
+    using TScoutBase = typename DPScout_<TDPCell,
+                                         DPTiled<TBuffer,
+                                                 TThreadContext,
+                                                 SimdAlignmentScout<SimdAlignVariableLength<TTraits> > > >::TBase;
+    _incVerticalPos(static_cast<TScoutBase&>(scout));
+    ++scout.verticalPos;
+}
+
+}  // namespace seqan
+
+#endif  // #ifndef INCLUDE_SEQAN_DP_PARALLEL_DP_PARALLEL_SCOUT_SIMD_H_
diff --git a/porechop/include/seqan/align_parallel/dp_settings.h b/porechop/include/seqan/align_parallel/dp_settings.h
new file mode 100644
index 0000000..6695391
--- /dev/null
+++ b/porechop/include/seqan/align_parallel/dp_settings.h
@@ -0,0 +1,109 @@
+// ==========================================================================
+//                 SeqAn - The Library for Sequence Analysis
+// ==========================================================================
+// Copyright (c) 2006-2018, Knut Reinert, FU Berlin
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above copyright
+//       notice, this list of conditions and the following disclaimer in the
+//       documentation and/or other materials provided with the distribution.
+//     * Neither the name of Knut Reinert or the FU Berlin nor the names of
+//       its contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+// DAMAGE.
+//
+// ==========================================================================
+// Author: Rene Rahn <rene.rahn@fu-berlin.de>
+// ==========================================================================
+
+#ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_DP_SETTINGS_H_
+#define INCLUDE_SEQAN_ALIGN_PARALLEL_DP_SETTINGS_H_
+
+namespace seqan
+{
+
+// ============================================================================
+// Forwards
+// ============================================================================
+
+// ============================================================================
+// Tags, Classes, Enums
+// ============================================================================
+
+// Translates global function interface into setting struct.
+template <typename TScoringScheme_, typename TDPTraits = DPTraits::GlobalLinear>
+struct DPSettings
+{
+    using TTraits        = TDPTraits;
+    using TScoringScheme = TScoringScheme_;
+    using TBandConfig    = DPBandConfig<typename TDPTraits::TBandType>;
+
+    TScoringScheme  scoringScheme;
+    TBandConfig     bandScheme;
+
+    DPSettings() = default;
+
+    explicit DPSettings(TScoringScheme score) : scoringScheme(std::move(score))
+    {}
+};
+
+#ifdef SEQAN_SIMD_ENABLED
+// Simd version of DP settings.
+template <typename TDPSettings, typename TOffsetSpec = False>
+struct SimdDPSettings : public TDPSettings
+{
+    //-------------------------------------------------------------------------
+    // Member Types.
+
+    using TTraits = typename TDPSettings::TTraits;
+    using TScoringScheme = typename TDPSettings::TScoringScheme;
+    using TScoreValue = typename Value<TScoringScheme>::Type;
+    using TScoreValueSimd = typename SimdVector<
+                                        std::conditional_t<std::is_same<TOffsetSpec, BlockOffsetOptimization>::value,
+                                                           int16_t,
+                                                           TScoreValue>>::Type;
+    using TSimdScoringScheme = Score<TScoreValueSimd, ScoreSimdWrapper<TScoringScheme>>;
+
+    //-------------------------------------------------------------------------
+    // Members.
+
+    TSimdScoringScheme  simdScoringScheme;
+
+    //-------------------------------------------------------------------------
+    // Constructor.
+
+    SimdDPSettings() = default;
+
+    explicit SimdDPSettings(TScoringScheme score) :
+        TDPSettings(std::move(score)),
+        simdScoringScheme(score)
+    {}
+};
+#endif  // SEQAN_SIMD_ENABLED
+// ============================================================================
+// Metafunctions
+// ============================================================================
+
+// ============================================================================
+// Functions
+// ============================================================================
+
+}  // namespace seqan
+
+#endif  // INCLUDE_SEQAN_ALIGN_PARALLEL_DP_SETTINGS_H_
diff --git a/porechop/include/seqan/align_parallel/dp_traits.h b/porechop/include/seqan/align_parallel/dp_traits.h
new file mode 100644
index 0000000..2606408
--- /dev/null
+++ b/porechop/include/seqan/align_parallel/dp_traits.h
@@ -0,0 +1,120 @@
+// ==========================================================================
+//                 SeqAn - The Library for Sequence Analysis
+// ==========================================================================
+// Copyright (c) 2006-2018, Knut Reinert, FU Berlin
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above copyright
+//       notice, this list of conditions and the following disclaimer in the
+//       documentation and/or other materials provided with the distribution.
+//     * Neither the name of Knut Reinert or the FU Berlin nor the names of
+//       its contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+// DAMAGE.
+//
+// ==========================================================================
+// Author: Rene Rahn <rene.rahn@fu-berlin.de>
+// ==========================================================================
+
+#ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_DP_TRAITS_H_
+#define INCLUDE_SEQAN_ALIGN_PARALLEL_DP_TRAITS_H_
+
+namespace seqan
+{
+
+// ============================================================================
+// Forwards
+// ============================================================================
+
+// ============================================================================
+// Tags, Classes, Enums
+// ============================================================================
+
+// Traits for DP configuration. Currently used only internally.
+struct DPTraits
+{
+    // Gocal alignment with linear gap costs.
+    struct GlobalLinear
+    {
+        // The algorithm to choose.
+        using TAlgorithmType    = GlobalAlignment_<>;
+        // The Gaps to choos
+        using TGapType          = LinearGaps;
+        // The Band to choose.
+        using TBandType         = BandOff;
+        // The traceback.
+        using TTracebackType    = TracebackOn<TracebackConfig_<SingleTrace, GapsLeft>>;
+        // The output to choose.
+        using TFormat           = ArrayGaps;
+    };
+
+    // Global alignment with affine gap costs.
+    struct GlobalAffine : public GlobalLinear
+    {
+        using TGapType          = AffineGaps;
+    };
+
+    // Global alignment with affine gap costs.
+    struct SemiGlobalLinear : public GlobalLinear
+    {
+        using TAlgorithmType = GlobalAlignment_<FreeEndGaps_<True, False, True, False>>;
+    };
+
+    // Global alignment with affine gap costs.
+    struct SemiGlobalAffine : public GlobalAffine
+    {
+        using TAlgorithmType = GlobalAlignment_<FreeEndGaps_<True, False, True, False>>;
+    };
+
+    // Banded global alignment with linear gap costs.
+    struct BandedGlobalLinear : public GlobalLinear
+    {
+        using TBandType         = BandOn;
+    };
+
+    // Banded global alignment with affine gap costs.
+    struct BandedGlobalAffine : public BandedGlobalLinear
+    {
+        using TGapType          = AffineGaps;
+    };
+
+    // Local alignment with linear gap costs.
+    struct LocalLinear : public GlobalLinear
+    {
+        using TAlgorithmType    = LocalAlignment_<>;
+    };
+
+    // Local alignment with affine gap costs.
+    struct LocalAffine : public LocalLinear
+    {
+        using TGapType          = AffineGaps;
+    };
+};
+
+// ============================================================================
+// Metafunctions
+// ============================================================================
+
+// ============================================================================
+// Functions
+// ============================================================================
+
+}  // namespace seqan
+
+#endif  // INCLUDE_SEQAN_ALIGN_PARALLEL_DP_TRAITS_H_
diff --git a/porechop/include/seqan/align_parallel/parallel_align_interface.h b/porechop/include/seqan/align_parallel/parallel_align_interface.h
new file mode 100644
index 0000000..764aa2a
--- /dev/null
+++ b/porechop/include/seqan/align_parallel/parallel_align_interface.h
@@ -0,0 +1,366 @@
+// ==========================================================================
+//                 SeqAn - The Library for Sequence Analysis
+// ==========================================================================
+// Copyright (c) 2006-2018, Knut Reinert, FU Berlin
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above copyright
+//       notice, this list of conditions and the following disclaimer in the
+//       documentation and/or other materials provided with the distribution.
+//     * Neither the name of Knut Reinert or the FU Berlin nor the names of
+//       its contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+// DAMAGE.
+//
+// ==========================================================================
+// Author: Rene Rahn <rene.rahn@fu-berlin.de>
+// ==========================================================================
+
+#ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_ALIGN_INTERFACE_H_
+#define INCLUDE_SEQAN_ALIGN_PARALLEL_ALIGN_INTERFACE_H_
+
+namespace seqan
+{
+
+// ============================================================================
+// Forwards
+// ============================================================================
+
+// ============================================================================
+// Tags, Classes, Enums
+// ============================================================================
+
+namespace impl
+{
+
+/*
+ * Executor class that implements the correct execution mode.
+ */
+struct ParallelAlignmentExecutor
+{
+    template <typename TKernel,
+              typename TSetH,
+              typename TSetV,
+              typename ...TArgs>
+    auto operator()(Sequential const & /*execPolicy*/,
+                    TKernel && kernel,
+                    TSetH const & setH,
+                    TSetV const & setV,
+                    TArgs && ...args)
+    {
+        SEQAN_ASSERT_EQ(length(setH), length(setV));
+
+        using TResult = decltype(kernel(setH, setV, std::forward<TArgs>(args)...));
+
+        TResult superSet;
+        resize(superSet, length(setH));
+
+        auto zipCont = makeZipView(setH, setV, superSet);
+#ifdef DP_PARALLEL_SHOW_PROGRESS
+        ::impl::dp_parallel_progress::show_progress(length(setH));
+#endif  // DP_PARALLEL_SHOW_PROGRESS
+        for (auto && pwInst : zipCont)
+        {
+            std::get<2>(pwInst) = kernel(std::get<0>(pwInst), std::get<1>(pwInst), std::forward<TArgs>(args)...);
+        }
+        return superSet;
+    }
+
+    template <typename TKernel,
+              typename TSetH,
+              typename ...TArgs>
+    auto operator()(ExecutionPolicy<Serial, Vectorial> const & /*execPolicy*/,
+                    TKernel && kernel,
+                    TSetH const & setH,
+                    TArgs && ...args)
+    {
+#ifdef DP_PARALLEL_SHOW_PROGRESS
+        ::impl::dp_parallel_progress::show_progress(length(setH));
+#endif  // DP_PARALLEL_SHOW_PROGRESS
+        // Automaically chooses vectorized code, or falls back to sequential code.
+        return kernel(setH, std::forward<TArgs>(args)...);
+    }
+
+    template <typename TKernel,
+              typename TSetH,
+              typename TSetV,
+              typename ...TArgs>
+    auto operator()(SEQAN_UNUSED ExecutionPolicy<Parallel, Vectorial> const & execPolicy,  // maybe unused due to missing OMP support in clang.
+                    TKernel && kernel,
+                    TSetH const & setH,
+                    TSetV const & setV,
+                    TArgs && ...args)
+
+    {
+        SEQAN_ASSERT_EQ(length(setH), length(setV));
+
+        using TPos = std::make_signed_t<decltype(length(setH))>;
+        using TResult = decltype(kernel(setH, setV, std::forward<TArgs>(args)...));
+
+        TPos chunkSize = _min(static_cast<TPos>(length(setH)), static_cast<TPos>(256));
+        String<TPos> splitter;
+        computeSplitters(splitter, length(setH), static_cast<TPos>(length(setH)/chunkSize));
+
+        std::vector<TResult> superSet;
+        superSet.resize(length(splitter));
+
+#ifdef DP_PARALLEL_SHOW_PROGRESS
+        ::impl::dp_parallel_progress::show_progress(length(setH));
+#endif  // DP_PARALLEL_SHOW_PROGRESS
+
+        SEQAN_OMP_PRAGMA(parallel for num_threads(numThreads(execPolicy)) schedule(guided))
+        for (TPos job = 0; job < static_cast<TPos>(length(splitter)) - 1; ++job)  // TODO(rrahn): Why -1; Is there a bug in computeSplitters?
+        {
+            auto infSetH = infix(setH, splitter[job], splitter[job + 1]);
+            auto infSetV = infix(setV, splitter[job], splitter[job + 1]);
+
+            superSet[job] = kernel(infSetH, infSetV, std::forward<TArgs>(args)...);
+        }
+        // Reduce the result.
+        TResult res;
+        resize(res, length(setH));
+        auto it = begin(res, Standard());
+        for (auto && set : superSet)
+        {
+            arrayMoveForward(begin(set, Standard()), end(set, Standard()), it);
+            it += length(set);
+        }
+        return res;
+    }
+
+    template <typename TKernel,
+              typename TSetH,
+              typename TSetV,
+              typename ...TArgs>
+    auto operator()(ExecutionPolicy<Parallel, Serial> const & execPolicy,
+                    TKernel && kernel,
+                    TSetH const & setH,
+                    TSetV const & setV,
+                    TArgs && ...args)
+
+    {
+        SEQAN_ASSERT_EQ(length(setH), length(setV));
+
+        using TPos = std::make_signed_t<decltype(length(setH))>;
+        using TResult = decltype(kernel(setH, setV, std::forward<TArgs>(args)...));
+
+        Splitter<TPos> splitter(0, length(setH), numThreads(execPolicy));
+
+        TResult superSet;
+        resize(superSet, length(setH));
+
+        auto zipCont = makeZipView(setH, setV, superSet);
+
+#ifdef DP_PARALLEL_SHOW_PROGRESS
+        ::impl::dp_parallel_progress::show_progress(length(setH));
+#endif  // DP_PARALLEL_SHOW_PROGRESS
+
+        SEQAN_OMP_PRAGMA(parallel for num_threads(length(splitter)))
+        for (TPos job = 0; job < static_cast<TPos>(length(splitter)); ++job)
+        {
+            auto it = begin(zipCont, Standard()) + splitter[job];
+            auto itEnd = begin(zipCont, Standard()) + splitter[job + 1];
+
+            // NOTE(marehr): auto && seqPair does not work, thus declaring the
+            // type explicitly, s.t. <=icpc 18.0.1 can compile the code (ticket
+            // #03204483)
+            using TSeqPair = decltype(*it);
+            std::for_each(it, itEnd, [&](TSeqPair && seqPair)
+            {
+                std::get<2>(seqPair) = kernel(std::get<0>(seqPair), std::get<1>(seqPair), std::forward<TArgs>(args)...);
+            });
+        }
+        return superSet;
+    }
+};
+
+template <typename TWaveSpec, typename TVectorizationPolicy,
+          typename TAlgorithmSpec,
+          typename TSetH,
+          typename TSetV,
+          typename TScore,
+          typename ...TArgs,
+          std::enable_if_t<!std::is_same<WavefrontAlignment<TWaveSpec>, Serial>::value &&
+                           !std::is_same<WavefrontAlignment<TWaveSpec>, Parallel>::value,
+                           int> = 0>
+inline auto
+doWaveAlignment(ExecutionPolicy<WavefrontAlignment<TWaveSpec>, TVectorizationPolicy> const & execPolicy,
+                TAlgorithmSpec const & /*tag*/,
+                TSetH const & setH,
+                TSetV const & setV,
+                TScore const & scoringScheme,
+                TArgs && .../*args*/)
+{
+    using TScoreValue = typename Value<TScore>::Type;
+
+    // The vector containing the scores.
+    std::vector<TScoreValue> res;
+    res.resize(length(setH));
+
+    auto dispatcher = [&res](auto && ...args)
+    {
+        alignExecBatch(std::forward<decltype(args)>(args)...,
+                             [&res](auto const id, auto const score)
+                             {
+                                 res[id] = score;
+                             });
+    };
+
+    // Differentiate between affine and linear gap costs.
+    // TODO(rrahn): Setup configuration cascade.
+    if (scoreGapOpen(scoringScheme) == scoreGapExtend(scoringScheme))
+    {
+        struct DPConfigTraits
+        {
+            using TAlgorithmType SEQAN_UNUSED = TAlgorithmSpec;
+            using TGapType       SEQAN_UNUSED = LinearGaps;
+            using TBandType      SEQAN_UNUSED = BandOff;
+            using TTracebackType SEQAN_UNUSED = TracebackOff;
+            using TFormat        SEQAN_UNUSED = ArrayGaps;
+        };
+
+        using TDPSettings = seqan::DPSettings<TScore, DPConfigTraits>;
+
+        TDPSettings settings;
+        settings.scoringScheme = scoringScheme;
+        dispatcher(execPolicy, setH, setV, settings);
+    }
+    else
+    {
+        struct DPConfigTraits
+        {
+            using TAlgorithmType SEQAN_UNUSED = TAlgorithmSpec;
+            using TGapType       SEQAN_UNUSED = AffineGaps;
+            using TBandType      SEQAN_UNUSED = BandOff;
+            using TTracebackType SEQAN_UNUSED = TracebackOff;
+            using TFormat        SEQAN_UNUSED = ArrayGaps;
+        };
+
+        using TDPSettings = seqan::DPSettings<TScore, DPConfigTraits>;
+
+        TDPSettings settings;
+        settings.scoringScheme = scoringScheme;
+        dispatcher(execPolicy, setH, setV, settings);
+    }
+    return res;
+}
+
+} // namespace impl
+
+// ============================================================================
+// Metafunctions
+// ============================================================================
+
+// ============================================================================
+// Functions
+// ============================================================================
+
+/*
+ * Wrapper functions for calling globalAlignmentScore and localAlignmentScore with an ExecutionPolicy.
+ * Note the parallel interfaces are documented as part of the standard documentation in seqan/align module.
+ */
+template <typename TParallelPolicy, typename TVectorizationPolicy,
+          typename ...TArgs,
+          std::enable_if_t<std::is_same<TParallelPolicy, Serial>::value ||
+                           std::is_same<TParallelPolicy, Parallel>::value,
+                           int> = 0>
+inline auto
+globalAlignmentScore(ExecutionPolicy<TParallelPolicy, TVectorizationPolicy> const & execPolicy,
+                     TArgs && ...args)
+{
+    auto kernel = [](auto && ...args)
+    {
+        return globalAlignmentScore(std::forward<decltype(args)>(args)...);
+    };
+    return impl::ParallelAlignmentExecutor{}(execPolicy, kernel, std::forward<TArgs>(args)...);
+}
+
+template <typename TParallelPolicy, typename TVectorizationPolicy,
+          typename ...TArgs,
+          std::enable_if_t<std::is_same<TParallelPolicy, Serial>::value ||
+                           std::is_same<TParallelPolicy, Parallel>::value,
+                           int> = 0>
+inline auto
+localAlignmentScore(ExecutionPolicy<TParallelPolicy, TVectorizationPolicy> const & execPolicy,
+                    TArgs && ...args)
+{
+    auto kernel = [](auto && ...args)
+    {
+        return localAlignmentScore(std::forward<decltype(args)>(args)...);
+    };
+    return impl::ParallelAlignmentExecutor{}(execPolicy, kernel, std::forward<TArgs>(args)...);
+}
+
+// Wavefront execution of globalAlignmentScore w/ config.
+template <typename TWaveSpec, typename TVectorizationPolicy,
+          typename TSetH,
+          typename TSetV,
+          typename TScore,
+          typename TConfig,
+          std::enable_if_t<!std::is_same<WavefrontAlignment<TWaveSpec>, Serial>::value &&
+                           !std::is_same<WavefrontAlignment<TWaveSpec>, Parallel>::value,
+                           int> = 0>
+
+inline auto
+globalAlignmentScore(ExecutionPolicy<WavefrontAlignment<TWaveSpec>, TVectorizationPolicy> const & execPolicy,
+                     TSetH const & setH,
+                     TSetV const & setV,
+                     TScore const & scoringScheme,
+                     TConfig const & /*config*/)
+{
+    return impl::doWaveAlignment(execPolicy,
+                                 GlobalAlignment_<typename SubstituteAlignConfig_<TConfig>::Type>{},
+                                 setH,
+                                 setV,
+                                 scoringScheme);
+}
+
+// Wavefront execution of globalAlignmentScore w/o config.
+template <typename TWaveSpec, typename TVectorizationPolicy,
+          typename TSetH,
+          typename TSetV,
+          typename TScore,
+          std::enable_if_t<!std::is_same<WavefrontAlignment<TWaveSpec>, Serial>::value &&
+                           !std::is_same<WavefrontAlignment<TWaveSpec>, Parallel>::value,
+                           int> = 0>
+
+inline auto
+globalAlignmentScore(ExecutionPolicy<WavefrontAlignment<TWaveSpec>, TVectorizationPolicy> const & execPolicy,
+                     TSetH const & setH,
+                     TSetV const & setV,
+                     TScore const & scoringScheme)
+{
+    return globalAlignmentScore(execPolicy, setH, setV, scoringScheme, AlignConfig<>{});
+}
+
+template <typename TWaveSpec, typename TVectorizationPolicy,
+          typename ...TArgs,
+          std::enable_if_t<!std::is_same<WavefrontAlignment<TWaveSpec>, Serial>::value &&
+                           !std::is_same<WavefrontAlignment<TWaveSpec>, Parallel>::value,
+                           int> = 0>
+inline auto
+localAlignmentScore(ExecutionPolicy<WavefrontAlignment<TWaveSpec>, TVectorizationPolicy> const & execPolicy,
+                    TArgs && ...args)
+{
+    return impl::doWaveAlignment(execPolicy, LocalAlignment_<>{}, std::forward<TArgs>(args)...);
+}
+
+}  // namespace seqan
+
+#endif  // INCLUDE_SEQAN_ALIGN_PARALLEL_ALIGN_INTERFACE_H_
diff --git a/porechop/include/seqan/align_parallel/wavefront_alignment_executor.h b/porechop/include/seqan/align_parallel/wavefront_alignment_executor.h
new file mode 100644
index 0000000..00db124
--- /dev/null
+++ b/porechop/include/seqan/align_parallel/wavefront_alignment_executor.h
@@ -0,0 +1,98 @@
+// ==========================================================================
+//                 SeqAn - The Library for Sequence Analysis
+// ==========================================================================
+// Copyright (c) 2006-2018, Knut Reinert, FU Berlin
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above copyright
+//       notice, this list of conditions and the following disclaimer in the
+//       documentation and/or other materials provided with the distribution.
+//     * Neither the name of Knut Reinert or the FU Berlin nor the names of
+//       its contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+// DAMAGE.
+//
+// ==========================================================================
+// Author: Rene Rahn <rene.rahn@fu-berlin.de>
+// ==========================================================================
+
+#ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_ALIGNMENT_EXECUTOR_H_
+#define INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_ALIGNMENT_EXECUTOR_H_
+
+namespace seqan
+{
+
+// ============================================================================
+// Forwards
+// ============================================================================
+
+// ============================================================================
+// Tags, Classes, Enums
+// ============================================================================
+
+// Executor class for an alignment task in the wave-front model.
+// Stores the scheduler and the thread local storage.
+template <typename TScheduler, typename TThreadLocalStore>
+struct WavefrontAlignmentExecutor
+{
+    // Shared data in parallel context.
+    TScheduler *                  ptrTaskScheduler{nullptr};
+    TThreadLocalStore *           ptrThreadLocal{nullptr};
+
+    //NOTE(rrahn) Bug in g++-4.9 prevents us from using as aggregate type.
+    WavefrontAlignmentExecutor() = default;
+
+    WavefrontAlignmentExecutor(TScheduler * _ptrScheduler,
+                               TThreadLocalStore * _ptrTls) :
+            ptrTaskScheduler{_ptrScheduler},
+            ptrThreadLocal(_ptrTls)
+    {}
+};
+
+// ============================================================================
+// Metafunctions
+// ============================================================================
+
+// ============================================================================
+// Functions
+// ============================================================================
+
+// Asynchronosly schedule a new alignment job.
+template <typename ...TArgs,
+          typename TTaskExecutor>
+inline void
+spawn(WavefrontAlignmentExecutor<TArgs...> & executor,
+      TTaskExecutor && taskExec)
+{
+    SEQAN_ASSERT(executor.ptrTaskScheduler != nullptr);
+    scheduleTask(*executor.ptrTaskScheduler, std::forward<TTaskExecutor>(taskExec));
+}
+
+// Access thread local storage.
+template <typename ...TArgs>
+inline auto &
+local(WavefrontAlignmentExecutor<TArgs...> & executor)
+{
+    SEQAN_ASSERT(executor.ptrThreadLocal != nullptr);
+    return local(*executor.ptrThreadLocal);
+}
+
+}  // namespace seqan
+
+#endif  // #ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_ALIGNMENT_EXECUTOR_H_
diff --git a/porechop/include/seqan/align_parallel/wavefront_alignment_result.h b/porechop/include/seqan/align_parallel/wavefront_alignment_result.h
new file mode 100644
index 0000000..e2e7900
--- /dev/null
+++ b/porechop/include/seqan/align_parallel/wavefront_alignment_result.h
@@ -0,0 +1,165 @@
+// ==========================================================================
+//                 SeqAn - The Library for Sequence Analysis
+// ==========================================================================
+// Copyright (c) 2006-2018, Knut Reinert, FU Berlin
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above copyright
+//       notice, this list of conditions and the following disclaimer in the
+//       documentation and/or other materials provided with the distribution.
+//     * Neither the name of Knut Reinert or the FU Berlin nor the names of
+//       its contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+// DAMAGE.
+//
+// ==========================================================================
+// Author: Rene Rahn <rene.rahn@fu-berlin.de>
+// ==========================================================================
+
+#ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_ALIGNMENT_RESULT_H_
+#define INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_ALIGNMENT_RESULT_H_
+
+namespace seqan
+{
+
+// ============================================================================
+// Forwards
+// ============================================================================
+
+// ============================================================================
+// Tags, Classes, Enums
+// ============================================================================
+
+// The intermediate result stored by each thread independently.
+// After an alignment has been finished, the intermediate results are reduced to a global result.
+template <typename TTraits>
+struct WavefrontAlignmentResult
+{
+    // ----------------------------------------------------------------------------
+    // Member Types.
+
+    using TState = std::pair<typename TTraits::TScoreValue, typename TTraits::THostPosition>;
+
+    // ----------------------------------------------------------------------------
+    // Member Variables
+
+    TState  _maxState{std::numeric_limits<typename TTraits::TScoreValue>::min(), typename TTraits::THostPosition{}};
+    size_t  _tileCol{0};
+    size_t  _tileRow{0};
+
+    //NOTE(rrahn) Bug in g++-4.9 prevents us from using as aggregate type.
+    // ----------------------------------------------------------------------------
+    // Constructors.
+
+    // Note: Although, this could be an aggregate type, the icpc-17 crashes,
+    // when compiling without the defaulted constructor.
+    WavefrontAlignmentResult() = default;
+
+    WavefrontAlignmentResult(TState const maxState) :
+        _maxState(std::move(maxState))
+    {}
+
+    WavefrontAlignmentResult(TState const maxState, size_t const tileCol, size_t  const tileRow) :
+        _maxState(std::move(maxState)),
+        _tileCol(tileCol),
+        _tileRow(tileRow)
+    {}
+
+    // ----------------------------------------------------------------------------
+    // Member Functions.
+};
+
+// ============================================================================
+// Metafunctions
+// ============================================================================
+
+// ============================================================================
+// Functions
+// ============================================================================
+
+namespace impl
+{
+
+template <typename TIntermediate,
+          typename TState>
+inline void
+updateMax(TIntermediate & me,
+          TState const & state,
+          size_t const tileCol,
+          size_t const tileRow)
+{
+    if (state.first > me._maxState.first)
+    {
+        me._maxState = state;
+        me._tileCol = tileCol;
+        me._tileRow = tileRow;
+    }
+}
+}  // namespace impl
+
+// Update the intermediate result if new optimum has been found.
+template <typename ...TArgs>
+inline void
+updateMax(WavefrontAlignmentResult<TArgs...> & me,
+          typename WavefrontAlignmentResult<TArgs...>::TState const & state,
+          size_t const tileCol,
+          size_t const tileRow)
+{
+    impl::updateMax(me, state, tileCol, tileRow);
+}
+
+template <typename ...TArgs>
+inline void
+updateMax(WavefrontAlignmentResult<TArgs...> & lhs,
+          WavefrontAlignmentResult<TArgs...> const & rhs)
+{
+    impl::updateMax(lhs, rhs._maxState, rhs._tileCol, rhs._tileRow);
+}
+
+// Reset the intermediate result.
+template <typename ...TArgs>
+inline void
+clear(WavefrontAlignmentResult<TArgs...> & me)
+{
+    me = WavefrontAlignmentResult<TArgs...>{};
+}
+
+// Get the intermediate result.
+template <typename ...TArgs>
+inline typename WavefrontAlignmentResult<TArgs...>::TState const &
+value(WavefrontAlignmentResult<TArgs...> const & me)
+{
+    return me._maxState;
+}
+
+// Swap two intermediate results.
+template <typename ...TArgs>
+inline void
+swap(WavefrontAlignmentResult<TArgs...> & lhs,
+     WavefrontAlignmentResult<TArgs...> & rhs)
+{
+    // TODO (rrahn): report issue with Intel
+    WavefrontAlignmentResult<TArgs...> tmp = std::move(lhs);
+    lhs = std::move(rhs);
+    rhs = std::move(tmp);
+}
+
+}  // namespace seqan
+
+#endif  // #ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_ALIGNMENT_RESULT_H_
diff --git a/porechop/include/seqan/align_parallel/wavefront_alignment_scheduler.h b/porechop/include/seqan/align_parallel/wavefront_alignment_scheduler.h
new file mode 100644
index 0000000..b7cb246
--- /dev/null
+++ b/porechop/include/seqan/align_parallel/wavefront_alignment_scheduler.h
@@ -0,0 +1,347 @@
+// ==========================================================================
+//                 SeqAn - The Library for Sequence Analysis
+// ==========================================================================
+// Copyright (c) 2006-2018, Knut Reinert, FU Berlin
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above copyright
+//       notice, this list of conditions and the following disclaimer in the
+//       documentation and/or other materials provided with the distribution.
+//     * Neither the name of Knut Reinert or the FU Berlin nor the names of
+//       its contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+// DAMAGE.
+//
+// ==========================================================================
+// Author: Rene Rahn <rene.rahn@fu-berlin.de>
+// ==========================================================================
+
+#ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_PARALLEL_ALIGNMENT_SCHEDULER_H_
+#define INCLUDE_SEQAN_ALIGN_PARALLEL_PARALLEL_ALIGNMENT_SCHEDULER_H_
+
+namespace seqan
+{
+
+// ============================================================================
+// Forwards
+// ============================================================================
+
+// ============================================================================
+// Tags, Classes, Enums
+// ============================================================================
+
+// Yet internal class. Might need some redesign to make it truly generic.
+/*
+ * @class WavefrontAlignmentScheduler
+ * @headerfile <align_parallel.h>
+ * @brief A generic scheduler allowing to execute callables with a ring buffer for the stored tasks.
+ *
+ * @signature class WavefrontAlignmentScheduler;
+ *
+ * This schedule is at the moment only used for the wave-front alignment execution but could be generalized later.
+ * It stores all scheduled callables in a @link ConcurrentSuspendableQueue @endlink which can hold a user defined
+ * number of callables at the same time. It then uses recycable ids to fill up the queue with waiting jobs.
+ * If the queue is full and a thread tries to add a new job, it will be suspended, until resources are freed by
+ * the scheduler.
+ */
+class WavefrontAlignmentScheduler
+{
+public:
+
+    //-------------------------------------------------------------------------
+    // Member Types.
+
+    using TCallable         = std::function<void(uint16_t)>;
+    using TAlignmentQueue   = ConcurrentQueue<TCallable, Suspendable<Limit>>;
+    using TRecycleList      = std::list<uint16_t>;
+
+    //-------------------------------------------------------------------------
+    // Private Member Variables.
+
+    WavefrontTaskScheduler  _taskScheduler;
+    ThreadPool              _pool;
+    TRecycleList            _recycableIds;
+    TAlignmentQueue         _queue;
+    bool                    _receivedEndSignal;
+
+    std::mutex              _mutexRecycleId;
+    unsigned                _numParallelAlignments;
+
+    std::mutex                       _mutexPushException;
+    std::vector<std::exception_ptr>  _exceptionPointers;
+
+    std::atomic<bool>               _isValid{true};
+
+    std::function<void()> job = [this] ()
+    {
+        while (true)
+        {
+            TCallable callable;
+            if (!popFront(callable, _queue))
+                break;  // End of thread => No writers and queue is empty.
+
+            uint16_t id = -1;
+
+            { // Receive id.
+                std::lock_guard<std::mutex> lck(_mutexRecycleId);
+                SEQAN_ASSERT_NOT(_recycableIds.empty());
+                id = _recycableIds.front();
+                _recycableIds.pop_front();
+            }
+
+            try
+            {
+                callable(id);  // invokes the alignment with assigned id.
+            }
+            catch (...)
+            {  // Catch any exception thrown by callable. Store exception, and set *this invalid.
+               // We still keep running until the queue is empty. The thread is cleaned either by,
+               // explicit wait or by destruction of *this.
+                _isValid.store(false, std::memory_order_release);
+                {
+                    std::lock_guard<std::mutex> lck(_mutexPushException);
+                    _exceptionPointers.push_back(std::current_exception());
+                }
+            }
+
+            // Check if task scheduler is still valid.
+            // If not, something went wrong, and we should not continue adding new tasks.
+            // So we propagate the invalid state to *this and break exceution chain.
+            if (!isValid(_taskScheduler))
+            {
+                _isValid.store(false, std::memory_order_release);
+            }
+
+            { // recycle id, when done.
+                std::lock_guard<std::mutex> lck(_mutexRecycleId);
+                _recycableIds.push_back(id);
+            }
+        }
+        unlockReading(_queue);  // Notify that this reader is finished.
+        unlockWriting(_taskScheduler);  // Notify that this writer is finished.
+    };
+
+    //-------------------------------------------------------------------------
+    // Constructors.
+
+    // implicitly deleted default constructor.
+
+    WavefrontAlignmentScheduler(size_t const numParallelAlignments, size_t const numParallelTasks) :
+        _taskScheduler(numParallelTasks),
+        _queue(numParallelAlignments),
+        _receivedEndSignal(false),
+        _numParallelAlignments(numParallelAlignments)
+    {
+        SEQAN_ASSERT_GT(numParallelAlignments, 0u);  // Bad if reader is 0.
+
+        // Setup recycable ids.
+        _recycableIds.resize(numParallelAlignments);
+        std::iota(std::begin(_recycableIds), std::end(_recycableIds), 0);
+
+        setReaderWriterCount(_queue, numParallelAlignments, 1);
+
+        _exceptionPointers.resize(numParallelAlignments, nullptr);
+
+        try
+        { // Create the threads here, later we can try to make lazy thread creation.
+            for (unsigned i = 0; i < numParallelAlignments; ++i)
+            {
+                spawn(_pool, job);
+            }
+        }
+        catch (...)  // Make sure all the spawned threads are safely stopped before re-throwing the exception.
+        {
+            unlockWriting(_queue);
+            waitForWriters(_taskScheduler);
+            join(_pool);
+            throw;
+        }
+
+        setWriterCount(_taskScheduler, numParallelAlignments);
+        // Notify task scheduler, that everything was setup correctly.
+        for (unsigned i = 0; i < numParallelAlignments; ++i)
+        {
+            lockWriting(_taskScheduler);
+        }
+        waitForWriters(_taskScheduler);  // Invoke task scheduler.
+    }
+
+    // Default constructor.
+    WavefrontAlignmentScheduler() : WavefrontAlignmentScheduler(16, 8)
+    {}
+
+    // Copy & Move C'tor
+    WavefrontAlignmentScheduler(WavefrontAlignmentScheduler const &) = delete;
+    WavefrontAlignmentScheduler(WavefrontAlignmentScheduler &&)      = delete;
+
+    ///-------------------------------------------------------------------------
+    // Destructor.
+
+    ~WavefrontAlignmentScheduler()
+    {
+        // Signal that no more alignments will be added.
+        if (!_receivedEndSignal)
+            unlockWriting(_queue);
+
+        SEQAN_ASSERT(_queue.writerCount == 0);
+
+        // Wait until all remaining threads are finished with their execution.
+        join(_pool);
+
+        // In destructor of thread pool we wait for the outstanding alignments to be finished
+        // and then continue destruction of the remaining members and cleaning up the stack.
+    }
+
+    // ------------------------------------------------------------------------
+    // Member Functions.
+
+    // Copy & Move assignment
+    WavefrontAlignmentScheduler& operator=(WavefrontAlignmentScheduler const &) = delete;
+    WavefrontAlignmentScheduler& operator=(WavefrontAlignmentScheduler &&)      = delete;
+};
+
+// ============================================================================
+// Metafunctions
+// ============================================================================
+
+template<>
+struct SchedulerTraits<WavefrontAlignmentScheduler>
+{
+    using TTask = typename WavefrontAlignmentScheduler::TCallable;
+};
+
+// ============================================================================
+// Functions
+// ============================================================================
+
+/*
+ * @fn WavefrontAlignmentScheduler#isValid
+ * @headerfile <align_parallel.h>
+ * @brief Checks if scheduler is in a valid state. This means that no callable has terminated with an exception.
+ */
+inline bool
+isValid(WavefrontAlignmentScheduler const & me)
+{
+    return me._isValid.load(std::memory_order_acquire);
+}
+
+/*
+ * @fn WavefrontAlignmentScheduler#scheduleTask
+ * @headerfile <align_parallel.h>
+ * @brief Adds a new task to the scheduler. Suspends until resources become available.
+ * @throws ExceptionType?
+ */
+// basic exception-safety guarantee.
+// Throws if appendValue failed.
+inline void
+scheduleTask(WavefrontAlignmentScheduler & me,
+             typename SchedulerTraits<WavefrontAlignmentScheduler>::TTask && callable)
+{
+    if (!isValid(me))
+        throw std::runtime_error("Invalid alignment scheduler!");
+
+    // Spins until there is enough space to add to the queue.
+    if (!appendValue(me._queue, std::forward<decltype(callable)>(callable)))
+        throw std::runtime_error("Invalid alignment scheduler 2!");
+}
+
+inline void
+scheduleTask(WavefrontAlignmentScheduler & me,
+             typename SchedulerTraits<WavefrontAlignmentScheduler>::TTask & callable)
+{
+    if (!isValid(me))
+        throw std::runtime_error("Invalid alignment scheduler!");
+    // Spins until there is enough space to add to the queue.
+    if(!appendValue(me._queue, callable))
+        throw std::runtime_error("Invalid alignment scheduler 2!");
+}
+
+/*
+ * @fn WavefrontAlignmentScheduler#notify
+ * @headerfile <align_parallel.h>
+ * @brief Notify the scheduler that no more jobs will follow.
+ */
+inline void
+notify(WavefrontAlignmentScheduler & me)
+{
+    unlockWriting(me._queue);
+    me._receivedEndSignal = true;
+}
+
+/*
+ * @fn WavefrontAlignmentScheduler#wait
+ * @headerfile <align_parallel.h>
+ * @brief Explicit barrier on the scheduler. Suspends until all scheduled jobs have been finsihed.
+ *
+ * Note, can dead lock if notify is never called.
+ */
+// Only possible if some other thread is signaling the end of it.
+inline void
+wait(WavefrontAlignmentScheduler & me)
+{
+    join(me._pool);
+    wait(me._taskScheduler);
+}
+
+/*
+ * @fn WavefrontAlignmentScheduler#wait2
+ * @headerfile <align_parallel.h>
+ * @brief Explicit barrier on the scheduler. Suspends until all scheduled jobs have been finsihed.
+ *
+ * Note, can dead lock if notify is never called.
+ */
+template <typename TNotifiable>
+inline void
+wait2(WavefrontAlignmentScheduler & me, TNotifiable & notifiable)
+{
+    join(me._pool);
+    notify(notifiable);
+    wait(me._taskScheduler);
+}
+
+/*
+ * @fn WavefrontAlignmentScheduler#getExceptions
+ * @headerfile <align_parallel.h>
+ * @brief Returns vector of captured exceptions if any was thrown by the callable.
+ *
+ * Note, can dead lock if notify is never called.
+ */
+inline auto
+getExceptions(WavefrontAlignmentScheduler & me)
+{
+    auto vec = me._exceptionPointers;
+    auto innerExceptions = getExceptions(me._taskScheduler);
+    std::copy(std::begin(innerExceptions), std::end(innerExceptions), std::back_inserter(vec));
+    return vec;
+}
+
+/*
+ * @fn WavefrontAlignmentScheduler#taskScheduler
+ * @headerfile <align_parallel.h>
+ * @brief Returns lvalue reference to the underlying task_scheduler.
+ */
+inline auto&
+taskScheduler(WavefrontAlignmentScheduler & me)
+{
+    return me._taskScheduler;
+}
+
+}  // namespace seqan
+
+#endif  // #ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_PARALLEL_ALIGNMENT_SCHEDULER_H_
diff --git a/porechop/include/seqan/align_parallel/wavefront_alignment_task.h b/porechop/include/seqan/align_parallel/wavefront_alignment_task.h
new file mode 100644
index 0000000..0334103
--- /dev/null
+++ b/porechop/include/seqan/align_parallel/wavefront_alignment_task.h
@@ -0,0 +1,404 @@
+// ==========================================================================
+//                 SeqAn - The Library for Sequence Analysis
+// ==========================================================================
+// Copyright (c) 2006-2018, Knut Reinert, FU Berlin
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above copyright
+//       notice, this list of conditions and the following disclaimer in the
+//       documentation and/or other materials provided with the distribution.
+//     * Neither the name of Knut Reinert or the FU Berlin nor the names of
+//       its contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+// DAMAGE.
+//
+// ==========================================================================
+// Author: Rene Rahn <rene.rahn@fu-berlin.de>
+// ==========================================================================
+
+#ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_ALIGNMENT_TASK_H_
+#define INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_ALIGNMENT_TASK_H_
+
+namespace seqan
+{
+// ============================================================================
+// Forwards
+// ============================================================================
+
+// ============================================================================
+// Tags, Classes, Enums
+// ============================================================================
+
+// Config structre for the execution of one alignment using the wave-front model.
+template <typename TDPSettings>
+struct WavefrontAlignmentTaskConfig
+{
+    // ----------------------------------------------------------------------------
+    // Member Typedefs.
+
+    // DPTrait type forwarding.
+    using TDPTraits       = typename TDPSettings::TTraits;
+    using TScoreValue     = typename Value<typename TDPSettings::TScoringScheme>::Type;
+    using TAlgorithmType  = typename TDPTraits::TAlgorithmType;
+    using TTracebackType  = typename TDPTraits::TTracebackType;
+    using TGapType        = typename TDPTraits::TGapType;
+
+    // Wavefront Alignment Context.
+    using TDPCell         = DPCell_<TScoreValue, TGapType>;
+    using TBufferValue    = Pair<TDPCell, typename TraceBitMap_<>::Type>;
+    using TBuffer         = String<TBufferValue>;
+    using TBlockBuffer    = DPTileBuffer<TBuffer>;
+
+    // DP Execution Context.
+    using TDPProfile      = DPProfile_<TAlgorithmType, TGapType, TTracebackType, Parallel>;
+    using TDPCache        = DPContext<TDPCell, typename TraceBitMap_<>::Type>;
+    using TDPScout        = DPScout_<TDPCell, Default>;
+
+    // Parallel Context.
+    struct IntermediateTraits_
+    {
+        using TScoreValue   = decltype(maxScore(std::declval<TDPScout>()));
+        using THostPosition = decltype(maxHostPosition(std::declval<TDPScout>()));
+    };
+
+    using TDPIntermediate = WavefrontAlignmentResult<IntermediateTraits_>;
+
+    struct AlignThreadLocalConfig_
+    {
+        using TIntermediate = TDPIntermediate;
+        using TCache        = TDPCache;
+
+        using TLocalHost    = std::tuple<TIntermediate, TCache>;
+    };
+
+    using TThreadLocal = WavefrontAlignmentThreadLocalStorage<AlignThreadLocalConfig_>;
+    using TAlignEvent  = WavefrontTaskEvent;
+};
+
+#ifdef SEQAN_SIMD_ENABLED
+template <typename TDPSettings>
+struct WavefrontAlignmentSimdTaskConfig : public WavefrontAlignmentTaskConfig<TDPSettings>
+{
+    // ----------------------------------------------------------------------------
+    // Member Typedefs.
+
+    using TBase_ = WavefrontAlignmentTaskConfig<TDPSettings>;
+
+    using TDPSimdCell        = DPCell_<typename TDPSettings::TScoreValueSimd, typename TBase_::TGapType>;
+    using TDPSimdTraceValue  = typename TraceBitMap_<typename TDPSettings::TScoreValueSimd>::Type;
+
+    using TDPSimdScoreMatrix = String<TDPSimdCell, Alloc<OverAligned>>;
+    using TDPSimdTraceMatrix = String<TDPSimdTraceValue, Alloc<OverAligned>>;
+    using TDPSimdCache       = DPContext<TDPSimdCell, TDPSimdTraceValue, TDPSimdScoreMatrix, TDPSimdTraceMatrix>;
+
+    using TDPScout_          = DPScout_<TDPSimdCell, SimdAlignmentScout<> >;
+    using TDPIntermediate    = WavefrontAlignmentResult<typename TBase_::IntermediateTraits_>;
+
+    // Parallel Context.
+    struct SimdAlignThreadLocalConfig_
+    {
+
+        using TIntermediate = TDPIntermediate;
+        using TCache        = typename TBase_::TDPCache;
+        using TSimdCache    = TDPSimdCache;
+
+        using TLocalHost    = std::tuple<TIntermediate, TCache, TSimdCache>;
+    };
+
+    using TThreadLocal = WavefrontAlignmentThreadLocalStorage<SimdAlignThreadLocalConfig_>;
+    using TAlignEvent  = WavefrontTaskEvent;
+};
+#endif
+
+// Incubator to setup the alignment job.
+template <typename WavefrontAlignmentTaskConfigConcept>
+struct WavefrontAlignmentTaskIncubator
+{
+    using TWatc = WavefrontAlignmentTaskConfigConcept;
+
+    // ----------------------------------------------------------------------------
+    // Function createBlocks()
+    // ----------------------------------------------------------------------------
+
+    template <typename TSeq>
+    static auto createBlocks(TSeq const & seq, size_t const blockSize)
+    {
+        using TIter = typename Iterator<typename Infix<TSeq const>::Type, Standard>::Type;
+        String<Range<TIter>> blocks;
+        resize(blocks, (length(seq) + blockSize - 1) / blockSize, Exact());
+
+        for (unsigned id = 0; id < length(blocks); ++id)
+            blocks[id] = toRange(infix(seq, id * blockSize, _min(length(seq),(id + 1) * blockSize)));
+        return blocks;
+    }
+
+    // ----------------------------------------------------------------------------
+    // Function createBlockBuffer()
+    // ----------------------------------------------------------------------------
+
+    template <typename TSeqHBlocks, typename TSeqVBlovcks, typename TScore>
+    static auto createBlockBuffer(TSeqHBlocks const & seqHBlocks, TSeqVBlovcks const & seqVBlocks, TScore const & score)
+    {
+        using TDPCell = typename TWatc::TDPCell;
+        typename TWatc::TBlockBuffer buffer;
+        resize(buffer.horizontalBuffer, length(seqHBlocks), Exact());
+        resize(buffer.verticalBuffer, length(seqVBlocks), Exact());
+
+        typename TWatc::TBufferValue tmp;
+
+        using TDPMetaColH = DPMetaColumn_<typename TWatc::TDPProfile, MetaColumnDescriptor<DPInnerColumn, FullColumn>>;
+        using TDPMetaColV = DPMetaColumn_<typename TWatc::TDPProfile, MetaColumnDescriptor<DPInitialColumn, FullColumn>>;
+
+        TDPCell dummyCellD;
+        TDPCell dummyCellH;
+        TDPCell dummyCellV;
+        tmp.i2 = _computeScore(tmp.i1, dummyCellD, dummyCellH, dummyCellV,  Nothing(), Nothing(), score,
+                               RecursionDirectionZero(), typename TWatc::TDPProfile());
+        for (auto itH = begin(buffer.horizontalBuffer, Standard());
+             itH != end(buffer.horizontalBuffer, Standard());
+             ++itH)
+        {
+            resize(*itH, length(front(seqHBlocks)), Exact());
+            for (auto it = begin(*itH, Standard()); it != end(*itH, Standard()); ++it)
+            {
+                it->i2 = _computeScore(it->i1, dummyCellD, tmp.i1, dummyCellV, Nothing(), Nothing(), score,
+                                       typename RecursionDirection_<TDPMetaColH, FirstCell>::Type(),
+                                       typename TWatc::TDPProfile());
+                tmp.i1 = it->i1;
+            }
+        }
+        tmp.i1 = decltype(tmp.i1){};
+        tmp.i2 = _computeScore(tmp.i1, dummyCellD, dummyCellH, dummyCellV, Nothing(), Nothing(), score,
+                               RecursionDirectionZero(), typename TWatc::TDPProfile());
+
+        for (auto itV = begin(buffer.verticalBuffer, Standard()); itV != end(buffer.verticalBuffer, Standard()); ++itV)
+        {
+            resize(*itV, length(front(seqVBlocks)) + 1, Exact());
+            auto it = begin(*itV, Standard());
+            it->i2 = tmp.i2;
+            it->i1 = tmp.i1;
+            ++it;
+            for (; it != end(*itV, Standard()); ++it)
+            {
+                it->i2 = _computeScore(it->i1, dummyCellD, dummyCellH, dummyCellV, Nothing(), Nothing(), score,
+                                       typename RecursionDirection_<TDPMetaColV, InnerCell>::Type(),
+                                       typename TWatc::TDPProfile());
+                _setVerticalScoreOfCell(it->i1, _verticalScoreOfCell(dummyCellV));
+                tmp.i1 = it->i1;
+                tmp.i2 = it->i2;  // TODO(rrahn): Move out of loop.
+            }
+        }
+        return buffer;
+    }
+
+    // ----------------------------------------------------------------------------
+    // Function createTaskGraph()
+    // ----------------------------------------------------------------------------
+
+    template <typename TWavefrontTaskContext>
+    static auto createTaskGraph(TWavefrontTaskContext & taskContext)
+    {
+        using TDagTask = WavefrontTask<TWavefrontTaskContext>;
+
+        std::vector<std::vector<std::shared_ptr<TDagTask>>> graph;
+
+        resize(graph, length(taskContext.seqHBlocks));
+        for (int i = length(taskContext.seqHBlocks); --i >= 0;)
+        {
+            resize(graph[i], length(taskContext.seqVBlocks));
+            for (int j = length(taskContext.seqVBlocks); --j >= 0;)
+            {
+                using TSize = decltype(length(taskContext.seqHBlocks));
+                TDagTask * successorRight = (static_cast<TSize>(i + 1) < length(taskContext.seqHBlocks))
+                                                ?  graph[i+1][j].get()
+                                                : nullptr;
+                TDagTask * successorDown  = (static_cast<TSize>(j + 1) < length(taskContext.seqVBlocks))
+                                                ? graph[i][j+1].get()
+                                                : nullptr;
+                graph[i][j] = std::make_shared<TDagTask>(taskContext,
+                                                         std::array<TDagTask*, 2>{{successorRight, successorDown}},
+                                                         static_cast<size_t>(i), static_cast<size_t>(j),
+                                                         static_cast<size_t>(((i > 0) ? 1 : 0) + ((j > 0) ? 1 : 0)),
+                                                         (static_cast<TSize>(i + 1) == length(taskContext.seqHBlocks)),
+                                                         (static_cast<TSize>(j + 1) == length(taskContext.seqVBlocks)));
+            }
+        }
+        return graph;
+    }
+};
+
+// The actual alignment task that is executed by the wave-front model.
+template <typename TSeqH,
+          typename TSeqV,
+          typename TDPSettings,
+          typename TConfig = WavefrontAlignmentTaskConfig<TDPSettings>>
+class WavefrontAlignmentTask
+{
+public:
+
+    using TIncubator    = WavefrontAlignmentTaskIncubator<TConfig>;
+
+    using TSeqHBlocks   = decltype(TIncubator::createBlocks(std::declval<TSeqH>(), std::declval<size_t>()));
+    using TSeqVBlocks   = decltype(TIncubator::createBlocks(std::declval<TSeqV>(), std::declval<size_t>()));
+    using TTileBuffer   = decltype(TIncubator::createBlockBuffer(std::declval<TSeqHBlocks>(),
+                                                                 std::declval<TSeqVBlocks>(),
+                                                                 std::declval<typename TDPSettings::TScoringScheme>()));
+
+    using TTaskContext  = WavefrontAlignmentContext<TSeqHBlocks, TSeqVBlocks, TTileBuffer, TDPSettings>;
+
+    // ----------------------------------------------------------------------------
+    // Member Variables.
+    // ----------------------------------------------------------------------------
+
+    size_t              alignmentId{0};
+    TSeqH const &       seqH;
+    TSeqV const &       seqV;
+    TDPSettings const & dpSettings;
+    size_t              blockSize;
+
+    // ----------------------------------------------------------------------------
+    // Constructors.
+    // ----------------------------------------------------------------------------
+
+    WavefrontAlignmentTask() = delete;
+
+    WavefrontAlignmentTask(TSeqH const & seqH,
+                           TSeqV const & seqV,
+                           TDPSettings const & dpSetting,
+                           size_t const & blockSize) :
+        seqH(seqH),
+        seqV(seqV),
+        dpSettings(dpSetting),
+        blockSize(blockSize)
+    {}
+
+
+    WavefrontAlignmentTask(size_t const id,
+                           TSeqH const & seqH,
+                           TSeqV const & seqV,
+                           TDPSettings const & dpSetting,
+                           size_t const & blockSize) :
+        alignmentId(id),
+        seqH(seqH),
+        seqV(seqV),
+        dpSettings(dpSetting),
+        blockSize(blockSize)
+    {}
+
+    // ----------------------------------------------------------------------------
+    // Member Functions.
+    // ----------------------------------------------------------------------------
+
+    // This function now run's in a separate thread.
+    template <typename TWavefrontExecutor,
+              typename TCallback>
+    inline void
+    operator()(uint16_t const instanceId,
+               TWavefrontExecutor & executor,
+               TCallback && callback)
+    {
+        // Initialize the strings.
+        auto seqHBlocks = TIncubator::createBlocks(seqH, blockSize);
+        auto seqVBlocks = TIncubator::createBlocks(seqV, blockSize);
+
+        // Create the buffer for the matrix.
+        auto buffer = TIncubator::createBlockBuffer(seqHBlocks, seqVBlocks, dpSettings.scoringScheme);
+
+        // Setup the task context and create task graph.
+        TTaskContext taskContext{instanceId, seqHBlocks, seqVBlocks, buffer, dpSettings};
+        auto taskGraph = TIncubator::createTaskGraph(taskContext);
+
+        // Prepare event.
+        WavefrontTaskEvent event;
+        context(*taskGraph.back().back()).ptrEvent = &event;
+
+        // Kick off the execution.
+        using TWavefrontTaskExec = WavefrontTaskExecutor<std::decay_t<decltype(*taskGraph[0][0])>, TWavefrontExecutor>;
+        spawn(executor, TWavefrontTaskExec{taskGraph[0][0].get(), &executor});
+
+        // Wait for alignment to finish.
+        wait(event);
+
+        // Reduce.
+        typename TConfig::TDPIntermediate interMax{};
+        auto collectAndReset = [&](auto & threadLocalStorage)
+        {
+            updateMax(interMax, intermediate(threadLocalStorage, instanceId));
+            clear(intermediate(threadLocalStorage, instanceId));
+        };
+        combineEach(*executor.ptrThreadLocal, collectAndReset);
+        // Continue execution.
+        callback(alignmentId, interMax._maxState.first);
+    }
+
+    template <typename TWavefrontExecutor,
+              typename TSimdTaskQueue,
+              typename TCallback>
+    inline void
+    operator()(uint16_t const instanceId,
+               TWavefrontExecutor & executor,
+               TSimdTaskQueue & taskQueue,
+               TCallback && callback)
+    {
+        // Initialize the strings.
+        auto seqHBlocks = TIncubator::createBlocks(seqH, blockSize);
+        auto seqVBlocks = TIncubator::createBlocks(seqV, blockSize);
+
+        // Create the buffer for the matrix.
+        auto buffer = TIncubator::createBlockBuffer(seqHBlocks, seqVBlocks, dpSettings.scoringScheme);
+
+        // Setup the task context and create task graph.
+        TTaskContext taskContext{instanceId, seqHBlocks, seqVBlocks, buffer, dpSettings};
+        auto taskGraph = TIncubator::createTaskGraph(taskContext);
+
+        // Prepare event.
+        WavefrontTaskEvent event;
+        context(*taskGraph.back().back()).ptrEvent = &event;
+
+        // Kick off the execution.
+        using TWavefrontTaskExec = WavefrontTaskExecutor<TSimdTaskQueue, TWavefrontExecutor>;
+        appendValue(taskQueue, *taskGraph[0][0]);
+        spawn(executor, TWavefrontTaskExec{&taskQueue, &executor});
+
+        // Wait for alignment to finish.
+        wait(event);
+
+        // Reduce.
+        typename TConfig::TDPIntermediate interMax{};
+        auto collectAndReset = [&](auto & threadLocalStorage)
+        {
+            updateMax(interMax, intermediate(threadLocalStorage, instanceId));
+            clear(intermediate(threadLocalStorage, instanceId));
+        };
+        combineEach(*executor.ptrThreadLocal, collectAndReset);
+        callback(alignmentId, interMax._maxState.first);
+    }
+};
+
+// ============================================================================
+// Metafunctions
+// ============================================================================
+
+// ============================================================================
+// Functions
+// ============================================================================
+
+}  // namespace seqan
+
+#endif  // #ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_ALIGNMENT_TASK_H_
diff --git a/porechop/include/seqan/align_parallel/wavefront_alignment_thread_local_storage.h b/porechop/include/seqan/align_parallel/wavefront_alignment_thread_local_storage.h
new file mode 100644
index 0000000..42f6aa2
--- /dev/null
+++ b/porechop/include/seqan/align_parallel/wavefront_alignment_thread_local_storage.h
@@ -0,0 +1,130 @@
+// ==========================================================================
+//                 SeqAn - The Library for Sequence Analysis
+// ==========================================================================
+// Copyright (c) 2006-2018, Knut Reinert, FU Berlin
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above copyright
+//       notice, this list of conditions and the following disclaimer in the
+//       documentation and/or other materials provided with the distribution.
+//     * Neither the name of Knut Reinert or the FU Berlin nor the names of
+//       its contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+// DAMAGE.
+//
+// ==========================================================================
+// Author: Rene Rahn <rene.rahn@fu-berlin.de>
+// ==========================================================================
+
+#ifndef SEQAN_INCLUDE_ALIGN_PARALLEL_DP_THREAD_LOCAL_STORAGE_H_
+#define SEQAN_INCLUDE_ALIGN_PARALLEL_DP_THREAD_LOCAL_STORAGE_H_
+
+namespace seqan
+{
+
+// ============================================================================
+// Forwards
+// ============================================================================
+
+// ============================================================================
+// Tags, Classes, Enums
+// ============================================================================
+
+// Shared thread local storage for the parallel alignment instances.
+template <typename TConfig>
+class WavefrontAlignmentThreadLocalStorage
+{
+public:
+    //-------------------------------------------------------------------------
+    // Member Types.
+
+    using TAlignmentLocal = typename TConfig::TLocalHost;
+
+    //-------------------------------------------------------------------------
+    // Private Members.
+
+    std::vector<TAlignmentLocal>   _multiAlignmentThreadLocal;
+
+    //-------------------------------------------------------------------------
+    // Constructor.
+
+    explicit WavefrontAlignmentThreadLocalStorage(size_t const numAlignments) :
+        _multiAlignmentThreadLocal(numAlignments)
+    {}
+
+    // Delegating default constructor.
+    WavefrontAlignmentThreadLocalStorage() : WavefrontAlignmentThreadLocalStorage(1)
+    {}
+
+    WavefrontAlignmentThreadLocalStorage(WavefrontAlignmentThreadLocalStorage const &) = default;
+    WavefrontAlignmentThreadLocalStorage(WavefrontAlignmentThreadLocalStorage &&) = default;
+
+    //-------------------------------------------------------------------------
+    // Destructor.
+
+    ~WavefrontAlignmentThreadLocalStorage() = default;
+
+    //-------------------------------------------------------------------------
+    // Member Functions.
+
+    WavefrontAlignmentThreadLocalStorage& operator=(WavefrontAlignmentThreadLocalStorage const &) = default;
+    WavefrontAlignmentThreadLocalStorage& operator=(WavefrontAlignmentThreadLocalStorage &&) = default;
+};
+
+// ============================================================================
+// Metafunctions
+// ============================================================================
+
+// ============================================================================
+// Functions
+// ============================================================================
+
+// Gets the intermediate result for the specific alignment job.
+template <typename TConfig>
+inline typename TConfig::TIntermediate &
+intermediate(WavefrontAlignmentThreadLocalStorage<TConfig> & me,
+             size_t const alignId)
+{
+    SEQAN_ASSERT_LT(alignId, me._multiAlignmentThreadLocal.size());
+    return std::get<typename TConfig::TIntermediate>(me._multiAlignmentThreadLocal[alignId]);
+}
+
+// Gets the chache for the specific alignment job.
+template <typename TConfig>
+inline typename TConfig::TCache &
+cache(WavefrontAlignmentThreadLocalStorage<TConfig> & me,
+      size_t const alignId)
+{
+    SEQAN_ASSERT_LT(alignId, me._multiAlignmentThreadLocal.size());
+    return std::get<typename TConfig::TCache>(me._multiAlignmentThreadLocal[alignId]);
+}
+
+// Gets the simd chache for the specific alignment job.
+template <typename TConfig>
+inline typename TConfig::TSimdCache &
+simdCache(WavefrontAlignmentThreadLocalStorage<TConfig> & me,
+          size_t const alignId)
+{
+    SEQAN_ASSERT_LT(alignId, me._multiAlignmentThreadLocal.size());
+    return std::get<typename TConfig::TSimdCache>(me._multiAlignmentThreadLocal[alignId]);
+}
+
+}  // namespace seqan
+
+#endif  // SEQAN_INCLUDE_ALIGN_PARALLEL_DP_THREAD_LOCAL_STORAGE_H_
diff --git a/porechop/include/seqan/align_parallel/wavefront_task.h b/porechop/include/seqan/align_parallel/wavefront_task.h
new file mode 100644
index 0000000..b9bd8a4
--- /dev/null
+++ b/porechop/include/seqan/align_parallel/wavefront_task.h
@@ -0,0 +1,365 @@
+// ==========================================================================
+//                 SeqAn - The Library for Sequence Analysis
+// ==========================================================================
+// Copyright (c) 2006-2018, Knut Reinert, FU Berlin
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above copyright
+//       notice, this list of conditions and the following disclaimer in the
+//       documentation and/or other materials provided with the distribution.
+//     * Neither the name of Knut Reinert or the FU Berlin nor the names of
+//       its contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+// DAMAGE.
+//
+// ==========================================================================
+// Author: Rene Rahn <rene.rahn@fu-berlin.de>
+// ==========================================================================
+
+#ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_TASK_H_
+#define INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_TASK_H_
+
+namespace seqan
+{
+// ============================================================================
+// Forwards
+// ============================================================================
+
+// ============================================================================
+// Tags, Classes, Enums
+// ============================================================================
+
+// Context used per task. Access information like the infixes of the sequences for this block and other.
+template <typename TSeqHBlocks,
+          typename TSeqVBlocks,
+          typename TTileBuffer,
+          typename TDPSettings,
+          typename TEvent = WavefrontTaskEvent>
+struct WavefrontAlignmentContext
+{
+    size_t              alignmentId{0};
+    TSeqHBlocks const & seqHBlocks;
+    TSeqVBlocks const & seqVBlocks;
+    TTileBuffer       & tileBuffer;
+    TDPSettings const & dpSettings;
+    TEvent            * ptrEvent{nullptr};
+
+    //NOTE(rrahn) Bug in g++-4.9 prevents us from using as aggregate type.
+    WavefrontAlignmentContext(size_t const _alignmentId,
+                              TSeqHBlocks const & _seqHBlocks,
+                              TSeqVBlocks const & _seqVBlocks,
+                              TTileBuffer       & _tileBuffer,
+                              TDPSettings const & _dpSettings) :
+        alignmentId(_alignmentId),
+        seqHBlocks(_seqHBlocks),
+        seqVBlocks(_seqVBlocks),
+        tileBuffer(_tileBuffer),
+        dpSettings(_dpSettings)
+    {}
+};
+
+// The abstract task that is executed as separat alignment instance.
+template <typename TAlignmentContext>
+class WavefrontTask
+{
+public:
+
+    using TContext  = TAlignmentContext;
+
+    TContext &                     context;
+    std::array<WavefrontTask*, 2>  successor{{nullptr, nullptr}};
+    size_t                         col{0};
+    size_t                         row{0};
+    std::atomic<size_t>            refCount{0};
+    bool                           lastTileH{false};
+    bool                           lastTileV{false};
+
+
+    //-------------------------------------------------------------------------
+    // Constructor
+    WavefrontTask() = delete;
+
+    WavefrontTask(TContext & context, std::array<WavefrontTask*, 2> successor,
+                  size_t const col,
+                  size_t const row,
+                  size_t const refCount,
+                  bool const lastTileH,
+                  bool const lastTileV) :
+        context(context),
+        successor(std::move(successor)),
+        col(col), row(row),
+        refCount(refCount),
+        lastTileH(lastTileH), lastTileV(lastTileV)
+    {}
+};
+
+// ============================================================================
+// Metafunctions
+// ============================================================================
+
+template <typename TContext>
+struct TaskExecutionTraits;
+
+template <typename ...TArgs>
+struct TaskExecutionTraits<WavefrontAlignmentContext<TArgs...>>
+{
+    using TaskContext_      = WavefrontAlignmentContext<TArgs...>;
+
+    using TSeqHBlocks       = typename std::decay<decltype(std::declval<TaskContext_>().seqHBlocks)>::type;
+    using TSeqVBlocks       = typename std::decay<decltype(std::declval<TaskContext_>().seqVBlocks)>::type;
+    using TWavefrontBuffer  = typename std::decay<decltype(std::declval<TaskContext_>().tileBuffer)>::type;
+    using TDPSettings       = typename std::decay<decltype(std::declval<TaskContext_>().dpSettings)>::type;
+
+    using TTileBuffer       = typename std::decay<decltype(std::declval<TWavefrontBuffer>().horizontalBuffer[0])>::type;
+    using TDPScoutState     = DPScoutState_<DPTiled<TTileBuffer>>;
+
+    // Sequence types.
+    using TSeqH = typename Value<TSeqHBlocks>::Type;
+    using TSeqV = typename Value<TSeqVBlocks>::Type;
+
+    // DPTrait type forwarding.
+    using TDPTraits         = typename TDPSettings::TTraits;
+    using TScoreValue       = typename Value<typename TDPSettings::TScoringScheme>::Type;
+    using TAlgorithmType    = typename TDPTraits::TAlgorithmType;
+    using TTracebackType    = typename TDPTraits::TTracebackType;
+    using TGapType          = typename TDPTraits::TGapType;
+
+    // Wavefront Alignment Context.
+    using TDPCell           = DPCell_<TScoreValue, TGapType>;
+
+    using TScoutSpec        = typename ScoutSpecForAlignmentAlgorithm_<TAlgorithmType, TDPScoutState>::Type;
+    using TDPScout          = DPScout_<TDPCell, TScoutSpec>;
+};
+
+template <typename TWavefrontAlignmentContextConcept>
+struct SimdTaskExecutionTraits : public TaskExecutionTraits<TWavefrontAlignmentContextConcept>
+{
+    using TBase = TaskExecutionTraits<TWavefrontAlignmentContextConcept>;
+
+    using TScoreValue = typename TBase::TDPSettings::TScoreValueSimd;
+    using TDPCell     = DPCell_<TScoreValue, typename TBase::TGapType>;
+    using TTraceValue = typename TraceBitMap_<TScoreValue>::Type;
+    using TBufferValue = Pair<TDPCell, TTraceValue>;
+};
+
+// ============================================================================
+// Functions
+// ============================================================================
+
+template <typename ...TArgs>
+inline void
+setRefCount(WavefrontTask<TArgs...> & me, size_t const count)
+{
+    me.refCount.store(count, std::memory_order_relaxed);
+}
+
+template <typename ...TArgs>
+inline unsigned
+decrementRefCount(WavefrontTask<TArgs...> & me)
+{
+    return --me.refCount;
+}
+
+template <typename ...TArgs>
+inline unsigned
+incrementRefCount(WavefrontTask<TArgs...> & me)
+{
+    return ++me.refCount;
+}
+
+template <typename TTask>
+inline auto
+column(TTask const & task) -> decltype(task.col)
+{
+    return task.col;
+}
+
+template <typename TTask>
+inline auto
+row(TTask const & task) -> decltype(task.row)
+{
+    return task.row;
+}
+
+template <typename TTask>
+inline bool
+inLastColumn(TTask const & task)
+{
+    return task.lastTileH;
+}
+
+template <typename TTask>
+inline bool
+inLastRow(TTask const & task)
+{
+    return task.lastTileV;
+}
+
+template <typename TTask>
+inline bool
+isLastTask(TTask const & task)
+{
+    return inLastColumn(task) && inLastRow(task);
+}
+
+template <typename TTask>
+inline auto
+successor(TTask & task) -> std::add_lvalue_reference_t<decltype(task.successor)>
+{
+    return task.successor;
+}
+
+template <typename TTask>
+inline auto
+successor(TTask const & task) -> std::add_lvalue_reference_t<std::add_const_t<decltype(task.successor)>>
+{
+    return task.successor;
+}
+
+template <typename TTask>
+inline auto
+context(TTask & task) -> std::add_lvalue_reference_t<decltype(task.context)>
+{
+    return task.context;
+}
+
+template <typename TTask>
+inline auto
+context(TTask const & task) -> std::add_lvalue_reference_t<std::add_const_t<decltype(task.context)>>
+{
+    return task.context;
+}
+
+template <typename TAlgorithm, typename TTask>
+inline bool
+isTrackTile(TTask const & task)
+{
+    return isLastColumn(task) && isLastRow(task);
+}
+
+template <typename TTask>
+inline bool
+isTrackTile(TTask const & task)
+{
+    return isLastColumn(task) && isLastRow(task);
+}
+
+template <typename TTask, typename TDPLocalData>
+inline void
+executeScalar(TTask & task, TDPLocalData & dpLocal)
+{
+    using TExecTraits = TaskExecutionTraits<typename TTask::TContext>;
+
+    auto & taskContext = context(task);
+    // Load the cache from the local data.
+    auto & dpCache = cache(dpLocal, taskContext.alignmentId);
+    auto & buffer = taskContext.tileBuffer;
+
+    // Capture the buffer.
+    typename TExecTraits::TDPScoutState scoutState(buffer.horizontalBuffer[column(task)],
+                                                   buffer.verticalBuffer[row(task)]);  // Task local
+
+    typename TExecTraits::TDPScout scout(scoutState);
+
+    impl::computeTile(dpCache, scout,
+                      taskContext.seqHBlocks[column(task)],
+                      taskContext.seqVBlocks[row(task)],
+                      taskContext.dpSettings.scoringScheme,
+                      taskContext.dpSettings);
+    // We want to get the state here from the scout.
+    if(impl::AlgorithmProperty<typename TExecTraits::TAlgorithmType>::isTrackingEnabled(task))
+    {
+        // TODO(rrahn): Implement the interface.
+        // TODO(rrahn): Make it a member function of a policy so that we don't have to implement the specifics here
+        updateMax(intermediate(dpLocal, taskContext.alignmentId),
+                  {maxScore(scout), maxHostPosition(scout)},
+                  column(task),
+                  row(task));
+    }
+}
+
+template <typename TBuffer>
+inline void
+printSimdBuffer(TBuffer const & buffer, size_t const l)
+{
+    for (auto simdHolder : buffer)
+    {
+        std::cout << "<";
+        unsigned i = 0;
+        for (; i < l - 1; ++i)
+        {
+            std::cout << simdHolder.i1._score[i] << ", ";
+        }
+        std::cout << simdHolder.i1._score[i] << ">\n";
+    }
+}
+
+#ifdef SEQAN_SIMD_ENABLED
+template <typename TTasks, typename TDPLocalData>
+inline void
+executeSimd(TTasks & tasks, TDPLocalData & dpLocal)
+{
+    using TTask = typename std::remove_pointer<typename Value<TTasks>::Type>::type;
+    using TExecTraits = SimdTaskExecutionTraits<typename TTask::TContext>;
+
+    auto offset = impl::computeOffset(tasks, TExecTraits{});
+    // Has to be adapted to take the correct buffer from the corresponding task.
+    auto simdBufferH = impl::gatherSimdBuffer(tasks,
+                                              [] (auto & task)
+                                              {
+                                                  return &context(task).tileBuffer.horizontalBuffer[column(task)];
+                                              },
+                                              offset,
+                                              TExecTraits{});
+    auto simdBufferV = impl::gatherSimdBuffer(tasks,
+                                              [] (auto & task)
+                                              {
+                                                  return &context(task).tileBuffer.verticalBuffer[row(task)];
+                                              },
+                                              offset,
+                                              TExecTraits{});
+
+    // Does not really make sense.
+    auto & cache = simdCache(dpLocal, 0);
+    // Run alignment.
+    impl::computeSimdBatch(cache, simdBufferH, simdBufferV, tasks, dpLocal, offset, TExecTraits{});
+
+    // Write back into buffer.
+    impl::scatterSimdBuffer(tasks,
+                            simdBufferH,
+                            [](auto & task)
+                            {
+                                return &context(task).tileBuffer.horizontalBuffer[column(task)];
+                            },
+                            offset,
+                            TExecTraits{});
+    impl::scatterSimdBuffer(tasks,
+                            simdBufferV,
+                            [](auto & task)
+                            {
+                                return &context(task).tileBuffer.verticalBuffer[row(task)];
+                            },
+                            offset,
+                            TExecTraits{});
+}
+#endif  // SEQAN_SIMD_ENABLED
+
+}  // namespace seqan
+
+#endif  // INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_TASK_H_
diff --git a/porechop/include/seqan/align_parallel/wavefront_task_event.h b/porechop/include/seqan/align_parallel/wavefront_task_event.h
new file mode 100644
index 0000000..ccd18dc
--- /dev/null
+++ b/porechop/include/seqan/align_parallel/wavefront_task_event.h
@@ -0,0 +1,104 @@
+// ==========================================================================
+//                 SeqAn - The Library for Sequence Analysis
+// ==========================================================================
+// Copyright (c) 2006-2018, Knut Reinert, FU Berlin
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above copyright
+//       notice, this list of conditions and the following disclaimer in the
+//       documentation and/or other materials provided with the distribution.
+//     * Neither the name of Knut Reinert or the FU Berlin nor the names of
+//       its contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+// DAMAGE.
+//
+// ==========================================================================
+// Author: Rene Rahn <rene.rahn@fu-berlin.de>
+// ==========================================================================
+
+#ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_TASK_EVENT_H_
+#define INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_TASK_EVENT_H_
+
+namespace seqan
+{
+
+// ============================================================================
+// Forwards
+// ============================================================================
+
+// ============================================================================
+// Tags, Classes, Enums
+// ============================================================================
+
+// Event to signal end of one alignment instance.
+class WavefrontTaskEvent
+{
+public:
+    std::mutex                  mutexLastTask{};
+    std::condition_variable     conditionLastTask{};
+    bool                        readyLastTask{false};
+
+    WavefrontTaskEvent() = default;
+
+    WavefrontTaskEvent(WavefrontTaskEvent const &) = delete;
+    WavefrontTaskEvent(WavefrontTaskEvent &&) = delete;
+
+    WavefrontTaskEvent& operator=(WavefrontTaskEvent const &) = delete;
+    WavefrontTaskEvent& operator=(WavefrontTaskEvent &&) = delete;
+
+    ~WavefrontTaskEvent()
+    {
+        if (!readyLastTask)
+        {
+            {
+                std::lock_guard<decltype(mutexLastTask)> lck(mutexLastTask);
+                readyLastTask = true;
+            }
+            conditionLastTask.notify_one();
+        }
+    }
+};
+
+// ============================================================================
+// Metafunctions
+// ============================================================================
+
+// ============================================================================
+// Functions
+// ============================================================================
+
+inline void
+notify(WavefrontTaskEvent & event)
+{
+    std::lock_guard<decltype(event.mutexLastTask)> lck(event.mutexLastTask);
+    event.readyLastTask = true;
+    event.conditionLastTask.notify_one();  // We require a strict synchronization between waiting and notifying thread.
+}
+
+inline void
+wait(WavefrontTaskEvent & event)
+{
+    std::unique_lock<decltype(event.mutexLastTask)> lck(event.mutexLastTask);
+    if (!event.readyLastTask)
+        event.conditionLastTask.wait(lck, [&] { return event.readyLastTask; });
+}
+
+}  // namespace seqan
+
+#endif  // #ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_TASK_EVENT_H_
diff --git a/porechop/include/seqan/align_parallel/wavefront_task_executor.h b/porechop/include/seqan/align_parallel/wavefront_task_executor.h
new file mode 100644
index 0000000..ab0f933
--- /dev/null
+++ b/porechop/include/seqan/align_parallel/wavefront_task_executor.h
@@ -0,0 +1,146 @@
+// ==========================================================================
+//                 SeqAn - The Library for Sequence Analysis
+// ==========================================================================
+// Copyright (c) 2006-2018, Knut Reinert, FU Berlin
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above copyright
+//       notice, this list of conditions and the following disclaimer in the
+//       documentation and/or other materials provided with the distribution.
+//     * Neither the name of Knut Reinert or the FU Berlin nor the names of
+//       its contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+// DAMAGE.
+//
+// ==========================================================================
+// Author: Rene Rahn <rene.rahn@fu-berlin.de>
+// ==========================================================================
+
+#ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_TASK_EXECUTOR_H_
+#define INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_TASK_EXECUTOR_H_
+
+namespace seqan
+{
+
+// ============================================================================
+// Forwards
+// ============================================================================
+
+template <typename TResource>
+struct WavefrontTaskExecutionPolicy;
+
+// ============================================================================
+// Tags, Classes, Enums
+// ============================================================================
+
+// Task executor. Manages the execution of single alignment blocks.
+template <typename TResource, typename TWavefrontExecutor>
+struct WavefrontTaskExecutor
+{
+    TResource*            _ptrResource{nullptr};
+    TWavefrontExecutor *  _ptrWavefrontExecutor{nullptr};
+
+    //NOTE(rrahn) Bug in g++-4.9 prevents us from using as aggregate type.
+    WavefrontTaskExecutor() = default;
+
+    WavefrontTaskExecutor(TResource * _resource,
+                          TWavefrontExecutor * _wavefrontExecutor) :
+            _ptrResource{_resource},
+            _ptrWavefrontExecutor(_wavefrontExecutor)
+    {}
+
+    inline void operator()()
+    {
+        WavefrontTaskExecutionPolicy<TResource>::execute(*_ptrResource, *_ptrWavefrontExecutor);
+    }
+};
+
+// Policy for no SIMD execution.
+template <typename ...TArgs>
+struct WavefrontTaskExecutionPolicy<WavefrontTask<TArgs...>>
+{
+
+    template <typename TResource, typename TWavefrontExecutor>
+    inline static void
+    execute(TResource & task, TWavefrontExecutor & wavefrontExec)
+    {
+        using TWaveTaskExec = WavefrontTaskExecutor<TResource, TWavefrontExecutor>;
+
+        executeScalar(task, local(wavefrontExec));
+        for (auto succ : successor(task))
+        {
+            if (succ && decrementRefCount(*succ) == 0)
+                spawn(wavefrontExec, TWaveTaskExec{succ, &wavefrontExec});
+        }
+        if (isLastTask(task))
+        {
+            notify(*(context(task).ptrEvent));
+        }
+    }
+};
+
+// Policy for SIMD execution.
+template <typename TValue, size_t VECTOR_SIZE>
+struct WavefrontTaskExecutionPolicy<WavefrontTaskQueue<TValue, VECTOR_SIZE>>
+{
+    template <typename TResource, typename TWavefrontExecutor>
+    inline static void
+    execute(TResource & resource, TWavefrontExecutor & wavefrontExec)
+    {
+        using TWaveTaskExec = WavefrontTaskExecutor<TResource, TWavefrontExecutor>;
+
+        typename TResource::ResultType tasks;
+        if (!tryPopTasks(tasks, resource))
+            return;
+
+        SEQAN_ASSERT(!empty(tasks));
+        if (tasks.size()  == 1)
+            executeScalar(*front(tasks), local(wavefrontExec));
+        else
+            executeSimd(tasks, local(wavefrontExec));
+
+        for (auto task : tasks)
+        {
+            for (auto succ : successor(*task))
+            {
+                if (succ && decrementRefCount(*succ) == 0)
+                {
+                    appendValue(resource, *succ);
+                    spawn(wavefrontExec, TWaveTaskExec{&resource, &wavefrontExec});
+                }
+            }
+            if (isLastTask(*task))
+            {
+                notify(*(context(*task).ptrEvent));
+            }
+        }
+    }
+};
+
+// ============================================================================
+// Metafunctions
+// ============================================================================
+
+// ============================================================================
+// Functions
+// ============================================================================
+
+}  // namespace seqan
+
+#endif  // #ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_TASK_EXECUTOR_H_
diff --git a/porechop/include/seqan/align_parallel/wavefront_task_queue.h b/porechop/include/seqan/align_parallel/wavefront_task_queue.h
new file mode 100644
index 0000000..d64b61f
--- /dev/null
+++ b/porechop/include/seqan/align_parallel/wavefront_task_queue.h
@@ -0,0 +1,139 @@
+// ==========================================================================
+//                 SeqAn - The Library for Sequence Analysis
+// ==========================================================================
+// Copyright (c) 2006-2018, Knut Reinert, FU Berlin
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above copyright
+//       notice, this list of conditions and the following disclaimer in the
+//       documentation and/or other materials provided with the distribution.
+//     * Neither the name of Knut Reinert or the FU Berlin nor the names of
+//       its contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+// DAMAGE.
+//
+// ==========================================================================
+// Author: Rene Rahn <rene.rahn@fu-berlin.de>
+// ==========================================================================
+
+#ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_DP_WAVEFRONT_TASK_QUEUE_H_
+#define INCLUDE_SEQAN_ALIGN_PARALLEL_DP_WAVEFRONT_TASK_QUEUE_H_
+
+namespace seqan
+{
+
+// ============================================================================
+// Forwards
+// ============================================================================
+
+// ============================================================================
+// Tags, Classes, Enums
+// ============================================================================
+
+// Central task queue used in simd mode to gather multiple blocks to gather full simd registers.
+template <typename TValue,
+          size_t VECTOR_SIZE_>
+class WavefrontTaskQueue
+{
+public:
+
+
+    // Member Types.
+    using TQueue = ConcurrentQueue<TValue*>;
+    using ResultType = std::vector<TValue*>;
+    using ValueType = TValue;
+
+    // Members.
+    static constexpr size_t VECTOR_SIZE{VECTOR_SIZE_};
+
+    TQueue      queue;
+    std::mutex  mutexPopQueue;
+    bool        hasNotified{false};
+
+    // Constructors.
+    WavefrontTaskQueue()
+    {
+        lockWriting(queue);
+        lockReading(queue);
+    }
+
+    WavefrontTaskQueue(WavefrontTaskQueue const&) = delete;
+    WavefrontTaskQueue(WavefrontTaskQueue &&) = delete;
+
+    WavefrontTaskQueue& operator=(WavefrontTaskQueue const &) = delete;
+    WavefrontTaskQueue& operator=(WavefrontTaskQueue &&) = delete;
+
+    ~WavefrontTaskQueue()
+    {
+        if (!hasNotified)
+            unlockWriting(queue);
+        unlockReading(queue);
+    }
+};
+
+// ============================================================================
+// Metafunctions
+// ============================================================================
+
+// ============================================================================
+// Functions
+// ============================================================================
+
+template <typename TValue, size_t VECTOR_SIZE>
+inline bool
+tryPopTasks(typename WavefrontTaskQueue<TValue, VECTOR_SIZE>::ResultType & tasks,
+            WavefrontTaskQueue<TValue, VECTOR_SIZE> & me)
+{
+    clear(tasks);
+    std::lock_guard<std::mutex> lck(me.mutexPopQueue);
+    if (length(me.queue) < WavefrontTaskQueue<TValue, VECTOR_SIZE>::VECTOR_SIZE)
+    {
+        resize(tasks, 1);
+        if (!popFront(tasks[0], me.queue, Serial()))
+        {
+            return false;
+        }
+    }
+    else
+    {
+        for (size_t lane = 0u; lane < VECTOR_SIZE; ++lane)
+            tasks.push_back(popFront(me.queue, Serial()));
+    }
+    return true;
+}
+
+template <typename TValue, size_t VECTOR_SIZE>
+inline void
+appendValue(WavefrontTaskQueue<TValue, VECTOR_SIZE> & me,
+            TValue & newTask)
+{
+    appendValue(me.queue, &newTask);
+}
+
+template <typename TValue, size_t VECTOR_SIZE>
+inline void
+notify(WavefrontTaskQueue<TValue, VECTOR_SIZE> & me)
+{
+    me.hasNotified = true;
+    unlockWriting(me.queue);
+}
+
+}  // namespace seqan
+
+#endif  // #ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_DP_WAVEFRONT_TASK_QUEUE_H_
diff --git a/porechop/include/seqan/align_parallel/wavefront_task_scheduler.h b/porechop/include/seqan/align_parallel/wavefront_task_scheduler.h
new file mode 100644
index 0000000..c246796
--- /dev/null
+++ b/porechop/include/seqan/align_parallel/wavefront_task_scheduler.h
@@ -0,0 +1,218 @@
+// ==========================================================================
+//                 SeqAn - The Library for Sequence Analysis
+// ==========================================================================
+// Copyright (c) 2006-2018, Knut Reinert, FU Berlin
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above copyright
+//       notice, this list of conditions and the following disclaimer in the
+//       documentation and/or other materials provided with the distribution.
+//     * Neither the name of Knut Reinert or the FU Berlin nor the names of
+//       its contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+// DAMAGE.
+//
+// ==========================================================================
+// Author: Rene Rahn <rene.rahn@fu-berlin.de>
+// ==========================================================================
+
+#ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_TASK_SCHEDULER_H_
+#define INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_TASK_SCHEDULER_H_
+
+namespace seqan
+{
+
+// ============================================================================
+// Forwards
+// ============================================================================
+
+// ============================================================================
+// Tags, Classes, Enums
+// ============================================================================
+
+// Scheduler for wavefront tasks.
+class WavefrontTaskScheduler
+{
+public:
+
+    //-------------------------------------------------------------------------
+    // Memeber Types
+
+    using TWrapper = std::function<void()>;
+    using TTaskQueue = ConcurrentQueue<TWrapper>;
+
+    //-------------------------------------------------------------------------
+    // Member Variables
+
+    ThreadPool  _threadPool;
+    TTaskQueue  _taskQueue;
+
+    unsigned    _writerCount;
+
+    std::mutex                      _mutexPushException;
+    std::vector<std::exception_ptr> _exceptionPointers;
+    std::atomic<bool>               _isValid{true};
+
+    std::function<void()> job = [this] ()
+    {
+        lockReading(_taskQueue);
+        waitForFirstValue(_taskQueue);  // Wait for all writers to be setup.
+
+        std::function<void()> _dummy = [] ()
+        {  // TODO(rrahn): Could throw exception to signal something went terribly wrong.
+            SEQAN_ASSERT_FAIL("Trying to exceute empty wavefront task in a thread");
+        };
+        TWrapper task{_dummy};
+
+        while (true)
+        {
+            if (!popFront(task, _taskQueue))
+                break;  // Empty queue and no writer registered.
+
+            try
+            {
+                task();  // Execute the task;
+            }
+            catch (...)
+            {  // Catch exception, and signal failure. Continue running until queue is empty.
+                {
+                    std::lock_guard<std::mutex> lck(_mutexPushException);
+                    _exceptionPointers.push_back(std::current_exception());
+                }
+                _isValid.store(false, std::memory_order_release);
+            }
+        }
+        unlockReading(_taskQueue);
+    };
+
+    //-------------------------------------------------------------------------
+    // Constructor
+
+    WavefrontTaskScheduler(size_t const threadCount, size_t const writerCount) :
+        _writerCount(writerCount)
+    {
+
+        for (unsigned i = 0; i < threadCount; ++i)
+        {
+            spawn(_threadPool, job);
+        }
+        setCpuAffinity(_threadPool, 0, 1);
+    }
+
+    WavefrontTaskScheduler(size_t const threadCount) : WavefrontTaskScheduler(threadCount, 0)
+    {}
+
+    WavefrontTaskScheduler(WavefrontTaskScheduler const &) = delete;
+    WavefrontTaskScheduler(WavefrontTaskScheduler &&) = delete;
+
+    //-------------------------------------------------------------------------
+    // Member Functions
+
+    WavefrontTaskScheduler& operator=(WavefrontTaskScheduler const &) = delete;
+    WavefrontTaskScheduler& operator=(WavefrontTaskScheduler &&) = delete;
+
+    //-------------------------------------------------------------------------
+    // Destructor
+
+    ~WavefrontTaskScheduler()
+    {}
+    // In destructor of thread pool we wait for the outstanding alignments to be finished
+    // and then continue destruction of the remaining members and cleaning up the stack.
+    // Note the number of writers must be set to 0, for the queue to stop spinning.
+};
+
+// ============================================================================
+// Metafunctions
+// ============================================================================
+
+template <typename TScheduler>
+struct SchedulerTraits;
+
+template <>
+struct SchedulerTraits<WavefrontTaskScheduler>
+{
+    using TWrapper_ = typename WavefrontTaskScheduler::TWrapper;
+    using TTask  = TWrapper_;
+};
+
+// ============================================================================
+// Functions
+// ============================================================================
+
+inline void
+setWriterCount(WavefrontTaskScheduler & me, size_t const count) noexcept
+{
+    me._writerCount = count;
+}
+
+inline void
+lockWriting(WavefrontTaskScheduler & me) noexcept
+{
+    lockWriting(me._taskQueue);
+}
+
+inline void
+unlockWriting(WavefrontTaskScheduler & me) noexcept
+{
+    unlockWriting(me._taskQueue);
+}
+
+inline void
+waitForWriters(WavefrontTaskScheduler & me) noexcept
+{
+    waitForWriters(me._taskQueue, me._writerCount);
+}
+
+inline bool
+isValid(WavefrontTaskScheduler & me) noexcept
+{
+    return me._isValid.load(std::memory_order_acquire);
+}
+
+inline void
+scheduleTask(WavefrontTaskScheduler & me,
+             typename SchedulerTraits<WavefrontTaskScheduler>::TTask task)
+{
+    if (!isValid(me))
+    {  // TODO(rrahn): Improve error handling.
+        throw std::runtime_error("Invalid Task Scheduler");
+    }
+    appendValue(me._taskQueue, std::move(task));
+}
+
+inline void
+wait(WavefrontTaskScheduler & me)
+{
+    SEQAN_ASSERT(me._taskQueue.writerCount == 0);
+
+    join(me._threadPool);
+
+    SEQAN_ASSERT(empty(me._taskQueue));
+    SEQAN_ASSERT(me._taskQueue.readerCount == 0);
+}
+
+inline auto
+getExceptions(WavefrontTaskScheduler & me)
+{
+    return me._exceptionPointers;
+}
+
+}  // namespace seqan
+
+#endif  // #ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_TASK_SCHEDULER_H_
diff --git a/porechop/include/seqan/align_parallel/wavefront_task_util.h b/porechop/include/seqan/align_parallel/wavefront_task_util.h
new file mode 100644
index 0000000..1208055
--- /dev/null
+++ b/porechop/include/seqan/align_parallel/wavefront_task_util.h
@@ -0,0 +1,557 @@
+// ==========================================================================
+//                 SeqAn - The Library for Sequence Analysis
+// ==========================================================================
+// Copyright (c) 2006-2018, Knut Reinert, FU Berlin
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above copyright
+//       notice, this list of conditions and the following disclaimer in the
+//       documentation and/or other materials provided with the distribution.
+//     * Neither the name of Knut Reinert or the FU Berlin nor the names of
+//       its contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+// DAMAGE.
+//
+// ==========================================================================
+// Author: Rene Rahn <rene.rahn@fu-berlin.de>
+// ==========================================================================
+
+#ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_TASK_UTIL_H_
+#define INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_TASK_UTIL_H_
+
+namespace seqan
+{
+namespace impl
+{
+
+// ============================================================================
+// Forwards
+// ============================================================================
+
+// ============================================================================
+// Tags, Classes, Enums
+// ============================================================================
+
+// ============================================================================
+// Metafunctions
+// ============================================================================
+
+// Helper meta-function to extract the correct DP Property.
+template <typename TAlgotrithm>
+struct AlgorithmProperty
+{
+    template <typename TTask>
+    inline static bool
+    isTrackingEnabled(TTask const & tile)
+    {
+        return isLastColumn(tile) && isLastRow(tile);
+    }
+};
+
+template <typename TFreeEndGaps>
+struct AlgorithmProperty<GlobalAlignment_<TFreeEndGaps>>
+{
+    template <typename TTask>
+    inline static bool
+    isTrackingEnabled(TTask const & tile)
+    {
+        return (IsFreeEndGap_<TFreeEndGaps, DPLastColumn>::VALUE && inLastColumn(tile)) ||
+               (IsFreeEndGap_<TFreeEndGaps, DPLastRow>::VALUE && inLastRow(tile)) ||
+               (inLastColumn(tile) && inLastRow(tile));
+    }
+};
+
+template <typename TSpec>
+struct AlgorithmProperty<LocalAlignment_<TSpec>>
+{
+    template <typename TTask>
+    inline static bool
+    isTrackingEnabled(TTask const & /*tile*/)
+    {
+        return true;
+    }
+};
+
+// ============================================================================
+// Functions
+// ============================================================================
+
+// ----------------------------------------------------------------------------
+// Function computeTile()
+// ----------------------------------------------------------------------------
+
+// Wrapper function to call alignment core for the specific block.
+template <typename TScoreValue, typename TTraceValue, typename TScoreMatHost, typename TTraceMatHost,
+          typename TDPScout,
+          typename TSequenceH,
+          typename TSequenceV,
+          typename TScoringScheme,
+          typename TDPSettings>
+inline void
+computeTile(DPContext<TScoreValue, TTraceValue, TScoreMatHost, TTraceMatHost> & dpContext,
+            TDPScout & scout,
+            TSequenceH const & seqH,
+            TSequenceV const & seqV,
+            TScoringScheme const & scoringScheme,
+            TDPSettings const & /*settings*/)
+{
+    using TDPTraits = typename TDPSettings::TTraits;
+
+    using TScoreMatrixSpec = typename DefaultScoreMatrixSpec_<typename TDPTraits::TAlgorithmType>::Type;
+
+    using TDPScoreMatrix = DPMatrix_<TScoreValue, TScoreMatrixSpec, TScoreMatHost>;
+    using TDPTraceMatrix = DPMatrix_<TTraceValue, FullDPMatrix, TTraceMatHost>;
+
+    using TDPScoreMatrixNavigator = DPMatrixNavigator_<TDPScoreMatrix, DPScoreMatrix, NavigateColumnWise>;
+    using TDPTraceMatrixNavigator = DPMatrixNavigator_<TDPTraceMatrix, DPTraceMatrix<typename TDPTraits::TTracebackType>, NavigateColumnWise>;
+
+    using TDPProfile = DPProfile_<typename TDPTraits::TAlgorithmType,
+                                  typename TDPTraits::TGapType,
+                                  typename TDPTraits::TTracebackType,
+                                  Parallel>;
+
+    // Setup the score and trace matrix.
+    TDPScoreMatrix dpScoreMatrix;
+    TDPTraceMatrix dpTraceMatrix;
+
+    setLength(dpScoreMatrix, +DPMatrixDimension_::HORIZONTAL, length(seqH) + 1);
+    setLength(dpScoreMatrix, +DPMatrixDimension_::VERTICAL, length(seqV) + 1);
+
+    setLength(dpTraceMatrix, +DPMatrixDimension_::HORIZONTAL, length(seqH) + 1);
+    setLength(dpTraceMatrix, +DPMatrixDimension_::VERTICAL, length(seqV) + 1);
+
+    // Resue the buffer from the cache.
+    setHost(dpScoreMatrix, getDpScoreMatrix(dpContext));
+    setHost(dpTraceMatrix, getDpTraceMatrix(dpContext));
+
+    resize(dpScoreMatrix);
+    // We do not need to allocate the memory for the trace matrix if the traceback is disabled.
+    if /*constexpr*/(IsTracebackEnabled_<typename TDPTraits::TTracebackType>::VALUE)
+    {
+        static_assert(std::is_same<typename TDPTraits::TTracebackType, TracebackOff>::value, "Traceback not implemented!");
+        resize(dpTraceMatrix);
+    }
+
+    // Initialize the navigators.
+    TDPScoreMatrixNavigator dpScoreMatrixNavigator{dpScoreMatrix, DPBandConfig<BandOff>{}};
+    TDPTraceMatrixNavigator dpTraceMatrixNavigator{dpTraceMatrix, DPBandConfig<BandOff>{}};
+
+    // Execute the alignment.
+    _computeAlignmentImpl(scout, dpScoreMatrixNavigator, dpTraceMatrixNavigator, seqH, seqV,
+                          scoringScheme, DPBandConfig<BandOff>{}, TDPProfile(), NavigateColumnWise{});
+}
+
+#ifdef SEQAN_SIMD_ENABLED
+// Some utility functions.
+template <typename TTasks,
+          typename TScoreValueScalar,
+          typename TScoreValueSimd>
+inline auto
+doComputeOffset(TTasks const &tasks,
+                TScoreValueScalar const & /*scalarScore*/,
+                TScoreValueSimd const & /*simdScore*/)
+{
+    String<TScoreValueScalar> offset;
+    resize(offset, length(tasks), std::numeric_limits<TScoreValueScalar>::min(), Exact());
+
+    size_t pos = 0;
+
+    for (auto task : tasks)
+    {
+            offset[pos] = front(context(*task).tileBuffer.horizontalBuffer[column(*task)]).i1._score;
+        ++pos;
+    }
+
+    return offset;
+}
+
+template <typename TTasks,
+          typename TScoreValue>
+inline auto
+doComputeOffset(TTasks const &tasks,
+                TScoreValue const & /*scalarScore*/,
+                TScoreValue const & /*simdScore*/)
+{
+    String<TScoreValue> offset;
+    resize(offset, length(tasks), 0, Exact());
+    return offset;
+}
+
+template <typename TTasks,
+          typename TTaskTraits>
+inline auto
+computeOffset(TTasks const &tasks, TTaskTraits const & /*traits*/)
+{
+    using TDPSettings       = typename TTaskTraits::TDPSettings;
+    using TScoreValueScalar = typename Value<typename TDPSettings::TScoringScheme>::Type;
+    using TScoreValueSimd   = typename Value<typename TDPSettings::TSimdScoringScheme>::Type;
+    using TDPSimdValue      = typename Value<TScoreValueSimd>::Type;
+
+    return doComputeOffset(tasks, TScoreValueScalar{}, TDPSimdValue{});
+}
+
+template <typename TDPCell, typename TTrace,
+          typename TTasks,
+          typename TPos,
+          typename TFunc,
+          typename TOffset>
+inline void
+loadIntoSimd(Pair<TDPCell, TTrace> & target,
+             TTasks const & tasks,
+             TPos const pos,
+             TFunc && getBuffer,
+             TOffset const & offset,
+             LinearGaps const & /*unsused*/)
+{
+    using TSimdVec = typename Value<TDPCell>::Type;
+    using TVecVal = typename Value<TSimdVec>::Type;
+
+    alignas(sizeof(TSimdVec)) std::array<TVecVal, LENGTH<TSimdVec>::VALUE> scoreVec;
+    alignas(sizeof(TSimdVec)) std::array<TVecVal, LENGTH<TSimdVec>::VALUE> traceVec;
+
+    auto zipCont = makeZipView(tasks, scoreVec, traceVec, offset);
+
+    std::for_each(begin(zipCont), end(zipCont),
+                  [&, getBuffer = std::move(getBuffer)](auto tuple)
+                  {
+                      auto & buffer = *getBuffer(*std::get<0>(tuple));
+                      auto val = (length(buffer) > pos) ? buffer[pos] : typename std::decay<decltype(buffer[0])>::type{};
+
+                      // We might access values out of bounds here.
+                      std::get<1>(tuple) = static_cast<TVecVal>(val.i1._score - std::get<3>(tuple));
+                      std::get<2>(tuple) = val.i2;
+                  });
+
+    target.i1._score = load<TSimdVec>(&scoreVec[0]);
+    target.i2 = load<TSimdVec>(&traceVec[0]);
+}
+
+template <typename TDPCell, typename TTrace,
+          typename TTasks,
+          typename TPos,
+          typename TFunc,
+          typename TOffset>
+inline void
+loadIntoSimd(Pair<TDPCell, TTrace> & target,
+             TTasks const & tasks,
+             TPos const pos,
+             TFunc && getBuffer,
+             TOffset const & offset,
+             AffineGaps const & /*unsused*/)
+{
+    using TSimdVec = typename Value<TDPCell>::Type;
+    using TVecVal = typename Value<TSimdVec>::Type;
+
+    alignas(sizeof(TSimdVec)) std::array<TVecVal, LENGTH<TSimdVec>::VALUE> scoreVec;
+    alignas(sizeof(TSimdVec)) std::array<TVecVal, LENGTH<TSimdVec>::VALUE> scoreHorVec;
+    alignas(sizeof(TSimdVec)) std::array<TVecVal, LENGTH<TSimdVec>::VALUE> scoreVerVec;
+    alignas(sizeof(TSimdVec)) std::array<TVecVal, LENGTH<TSimdVec>::VALUE> traceVec;
+
+    auto zipCont = makeZipView(tasks, scoreVec, scoreHorVec, scoreVerVec, traceVec, offset);
+
+    std::for_each(begin(zipCont), end(zipCont),
+                  [&, getBuffer = std::move(getBuffer)](auto tuple)
+                  {
+                      auto & buffer = *getBuffer(*std::get<0>(tuple));
+                      auto val = (length(buffer) > pos) ? buffer[pos] : typename std::decay<decltype(buffer[0])>::type{};
+                      using TDPCellVar = decltype(val.i1);
+                      using TDPCell16 = DPCell_<TVecVal, AffineGaps>;
+
+                      // We might access values out of bounds here.
+                      std::get<1>(tuple) = static_cast<TVecVal>(val.i1._score - std::get<5>(tuple));
+
+                      std::get<2>(tuple) =
+                        (val.i1._horizontalScore <= DPCellDefaultInfinity<TDPCellVar>::VALUE) ?
+                            DPCellDefaultInfinity<TDPCell16>::VALUE :
+                            static_cast<TVecVal>(val.i1._horizontalScore - std::get<5>(tuple));
+                      std::get<3>(tuple) =
+                        (val.i1._verticalScore <= DPCellDefaultInfinity<TDPCellVar>::VALUE) ?
+                        DPCellDefaultInfinity<TDPCell16>::VALUE :
+                        static_cast<TVecVal>(val.i1._verticalScore - std::get<5>(tuple));
+                      std::get<4>(tuple) = val.i2;
+                  });
+
+    target.i1._score = load<TSimdVec>(&scoreVec[0]);
+    target.i1._horizontalScore = load<TSimdVec>(&scoreHorVec[0]);
+    target.i1._verticalScore = load<TSimdVec>(&scoreVerVec[0]);
+    target.i2 = load<TSimdVec>(&traceVec[0]);
+}
+
+template <typename TTasks,
+          typename TDPCell, typename TTrace,
+          typename TPos,
+          typename TFunc,
+          typename TOffset>
+inline void
+storeIntoBuffer(TTasks & tasks,
+                Pair<TDPCell, TTrace> const & source,
+                TPos const pos,
+                TFunc && getBuffer,
+                TOffset const & offset,
+                LinearGaps const & /*unsused*/)
+{
+    using TSimdVec = typename Value<TDPCell>::Type;
+    using TVecVal = typename Value<TSimdVec>::Type;
+
+    alignas(sizeof(TSimdVec)) std::array<TVecVal, LENGTH<TSimdVec>::VALUE> scoreVec;
+    alignas(sizeof(TSimdVec)) std::array<TVecVal, LENGTH<TSimdVec>::VALUE> traceVec;
+
+    storeu(&scoreVec[0], source.i1._score);
+    storeu(&traceVec[0], source.i2);
+
+    auto zipCont = makeZipView(tasks, scoreVec, traceVec, offset);
+
+    std::for_each(begin(zipCont), end(zipCont),
+                  [&, getBuffer = std::move(getBuffer)] (auto tuple)
+                  {
+                      auto & buffer = *getBuffer(*std::get<0>(tuple));
+                      if (length(buffer) > pos)
+                      {
+                          auto & pair = buffer[pos];
+                          pair.i1._score = std::get<1>(tuple) + std::get<3>(tuple);
+                          pair.i2 = std::get<2>(tuple);
+                      }
+                  });
+}
+
+template <typename TTasks,
+          typename TDPCell, typename TTrace,
+          typename TPos,
+          typename TFunc,
+          typename TOffset>
+inline void
+storeIntoBuffer(TTasks & tasks,
+                Pair<TDPCell, TTrace> const & source,
+                TPos const pos,
+                TFunc && getBuffer,
+                TOffset const & offset,
+                AffineGaps const & /*unsused*/)
+{
+    using TSimdVec = typename Value<TDPCell>::Type;
+    using TVecVal = typename Value<TSimdVec>::Type;
+
+    alignas(sizeof(TSimdVec)) std::array<TVecVal, LENGTH<TSimdVec>::VALUE> scoreVec;
+    alignas(sizeof(TSimdVec)) std::array<TVecVal, LENGTH<TSimdVec>::VALUE> scoreHorVec;
+    alignas(sizeof(TSimdVec)) std::array<TVecVal, LENGTH<TSimdVec>::VALUE> scoreVerVec;
+    alignas(sizeof(TSimdVec)) std::array<TVecVal, LENGTH<TSimdVec>::VALUE> traceVec;
+
+    storeu(&scoreVec[0], source.i1._score);
+    storeu(&scoreHorVec[0], source.i1._horizontalScore);
+    storeu(&scoreVerVec[0], source.i1._verticalScore);
+    storeu(&traceVec[0], source.i2);
+
+    auto zipCont = makeZipView(tasks, scoreVec, scoreHorVec, scoreVerVec, traceVec, offset);
+
+    std::for_each(begin(zipCont), end(zipCont),
+                  [&, getBuffer = std::move(getBuffer)](auto tuple)
+                  {
+                      auto & buffer = *getBuffer(*std::get<0>(tuple));
+                      if (length(buffer) > pos)
+                      {
+                          auto & pair = buffer[pos];
+                          pair.i1._score = std::get<1>(tuple) + std::get<5>(tuple);
+                          pair.i1._horizontalScore = std::get<2>(tuple) + std::get<5>(tuple);
+                          pair.i1._verticalScore = std::get<3>(tuple) + std::get<5>(tuple);
+                          pair.i2 = std::get<4>(tuple);
+                      }
+                  });
+}
+
+template <typename TTasks,
+          typename TFunc,
+          typename TOffset,
+          typename TExecTraits>
+inline auto
+gatherSimdBuffer(TTasks const & tasks,
+                 TFunc && getBuffer,
+                 TOffset const & offset,
+                 TExecTraits const & /*traits*/)
+{
+    // Check for valid simd length.
+    SEQAN_ASSERT_EQ(LENGTH<typename TExecTraits::TScoreValue>::VALUE, length(tasks));
+
+    String<typename TExecTraits::TBufferValue, Alloc<OverAligned> > simdSet;
+
+    auto maxLength = length(*getBuffer(*tasks[0]));
+    std::for_each(begin(tasks, Standard()) + 1, end(tasks, Standard()),
+                  [&](auto & task)
+                  {
+                      auto len = length(*getBuffer(*task));
+                      maxLength = (len > maxLength) ? len : maxLength;
+                  });
+
+    resize(simdSet, maxLength, Exact());
+    for (unsigned i = 0; i < length(simdSet); ++i)
+    {
+        loadIntoSimd(simdSet[i], tasks, i, std::forward<TFunc>(getBuffer), offset, typename TExecTraits::TGapType());
+    }
+    return simdSet;
+}
+
+template <typename TTasks,
+          typename TBufferValue, typename TSpec,
+          typename TFunc,
+          typename TOffset,
+          typename TExecTraits>
+inline void
+scatterSimdBuffer(TTasks & tasks,
+                  String<TBufferValue, TSpec> const & simdSet,
+                  TFunc && getBuffer,
+                  TOffset const & offset,
+                  TExecTraits const & /*traits*/)
+{
+    for (unsigned i = 0; i < length(simdSet); ++i)
+    {
+        storeIntoBuffer(tasks, simdSet[i], i, std::forward<TFunc>(getBuffer), offset, typename TExecTraits::TGapType());
+    }
+}
+
+// Compute tasks as simd alignment.
+template <typename TDPCell, typename TTraceValue, typename TScoreMat, typename TTraceMat,
+          typename TTasks,
+          typename TSimdBufferH,
+          typename TSimdBufferV,
+          typename TDPLocal,
+          typename TOffset,
+          typename TExecTraits>
+inline void
+computeSimdBatch(DPContext<TDPCell, TTraceValue, TScoreMat, TTraceMat> & cache,
+                 TSimdBufferH                                          & bufferH,
+                 TSimdBufferV                                          & bufferV,
+                 TTasks                                                & tasks,
+                 TDPLocal                                              & dpLocal,
+                 TOffset                                               & offset,
+                 TExecTraits                                     const & /*traits*/)
+{
+    // Now what?
+    using TSeqH    = typename TExecTraits::TSeqH;
+    using TSeqV    = typename TExecTraits::TSeqV;
+    using TSimdVec = typename TExecTraits::TScoreValue;
+
+    // Prepare sequence set.
+    StringSet<TSeqH, Dependent<> > depSetH;
+    StringSet<TSeqV, Dependent<> > depSetV;
+    bool allSameLength = true;
+    auto ptrTask = tasks[0];
+    auto lenH = length(context(*ptrTask).seqHBlocks[column(*ptrTask)]);
+    auto lenV = length(context(*ptrTask).seqVBlocks[row(*ptrTask)]);
+
+    for (auto ptrTask : tasks)
+    {
+        appendValue(depSetH, context(*ptrTask).seqHBlocks[column(*ptrTask)]);
+        appendValue(depSetV, context(*ptrTask).seqVBlocks[row(*ptrTask)]);
+        if (lenH != length(context(*ptrTask).seqHBlocks[column(*ptrTask)]) ||
+            lenV != length(context(*ptrTask).seqVBlocks[row(*ptrTask)]))
+        {
+            allSameLength = false;
+        }
+    }
+
+    // Dummy trace set.
+    StringSet<String<Nothing> > trace;  // We need to instantiate it, but it will not be used.
+
+    // We can compute with one simd score, but might collect them here.
+    auto const & scoringScheme = context(*tasks[0]).dpSettings.simdScoringScheme;
+
+    // Preapare and run alingment.
+    String<TSimdVec, Alloc<OverAligned> > stringSimdH;
+    String<TSimdVec, Alloc<OverAligned> > stringSimdV;
+
+    if (allSameLength)
+    {
+        using TScoutState = DPScoutState_<DPTiled<TSimdBufferH, Default, SimdAlignEqualLength>>;
+        TScoutState scoutState(bufferH, bufferV);
+        _prepareSimdAlignment(stringSimdH, stringSimdV, depSetH, depSetV, scoutState);
+
+        using TScoutSpec = typename ScoutSpecForAlignmentAlgorithm_<typename TExecTraits::TAlgorithmType, TScoutState>::Type;
+        using TDPScout = DPScout_<TDPCell, TScoutSpec>;
+
+        TDPScout dpScout(scoutState);
+        // We rather want to set
+        computeTile(cache, dpScout, stringSimdH, stringSimdV, scoringScheme, context(*tasks[0]).dpSettings);
+
+        // Now we need to run the scout check for all tasks.
+
+        // We want to get the state here from the scout.
+        for (size_t pos = 0; pos < length(tasks); ++pos)
+        {
+            auto & task = *tasks[pos];
+            if (AlgorithmProperty<typename TExecTraits::TAlgorithmType>::isTrackingEnabled(task))
+            {
+                // TODO(rrahn): Implement the interface.
+                // TODO(rrahn): Make it a member function of a policy so that we don't have to implement the specifics here
+                _setSimdLane(dpScout, pos);
+                auto & taskContext = context(task);
+                updateMax(intermediate(dpLocal, taskContext.alignmentId),
+                          {maxScoreAt(dpScout) + offset[pos], 0u},
+                          column(task),
+                          row(task));
+            }
+        }
+    }
+    else
+    {
+        using TDPSettings = std::decay_t<decltype(context(*tasks[0]).dpSettings)>;
+        using TDPTraits = typename TDPSettings::TTraits;
+
+        using TDPProfile = DPProfile_<typename TDPTraits::TAlgorithmType,
+                                      typename TDPTraits::TGapType,
+                                      typename TDPTraits::TTracebackType,
+                                      Parallel>;
+
+        using TSimdScoutTrait = SimdAlignVariableLengthTraits<TSimdVec,
+                                                              decltype(depSetH),
+                                                              decltype(depSetV),
+                                                              TDPProfile>;
+        using TScoutState = DPScoutState_<DPTiled<TSimdBufferH, Default, SimdAlignVariableLength<TSimdScoutTrait>>>;
+
+        String<size_t> lengthsH;
+        String<size_t> lengthsV;
+
+        TScoutState scoutState(bufferH, bufferV);
+        _prepareSimdAlignment(stringSimdH, stringSimdV, depSetH, depSetV, lengthsH, lengthsV, scoutState);
+
+        using TScoutSpec = typename ScoutSpecForAlignmentAlgorithm_<typename TExecTraits::TAlgorithmType, TScoutState>::Type;
+        using TDPScout = DPScout_<TDPCell, TScoutSpec>;
+
+        TDPScout dpScout(scoutState);
+        computeTile(cache, dpScout, stringSimdH, stringSimdV, scoringScheme, context(*tasks[0]).dpSettings);
+        // We want to get the state here from the scout.
+        for (size_t pos = 0; pos < length(tasks); ++pos)
+        {
+            auto & task = *tasks[pos];
+            if (AlgorithmProperty<typename TExecTraits::TAlgorithmType>::isTrackingEnabled(task))
+            {
+                // TODO(rrahn): Implement the interface.
+                // TODO(rrahn): Make it a member function of a policy so that we don't have to implement the specifics here
+                _setSimdLane(dpScout, pos);
+                auto & taskContext = context(task);
+                updateMax(intermediate(dpLocal, taskContext.alignmentId),
+                          {maxScoreAt(dpScout) + offset[pos], 0u},
+                          column(task),
+                          row(task));
+            }
+        }
+    }
+}
+#endif // SEQAN_SIMD_ENABLED
+}  // namespace impl
+}  // namespace seqan
+
+#endif  // #ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_TASK_UTIL_H_
diff --git a/porechop/include/seqan/simd/simd_base.h b/porechop/include/seqan/simd/simd_base.h
new file mode 100644
index 0000000..59f5dcd
--- /dev/null
+++ b/porechop/include/seqan/simd/simd_base.h
@@ -0,0 +1,390 @@
+// ==========================================================================
+//                 SeqAn - The Library for Sequence Analysis
+// ==========================================================================
+// Copyright (c) 2006-2018, Knut Reinert, FU Berlin
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above copyright
+//       notice, this list of conditions and the following disclaimer in the
+//       documentation and/or other materials provided with the distribution.
+//     * Neither the name of Knut Reinert or the FU Berlin nor the names of
+//       its contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+// DAMAGE.
+//
+// ==========================================================================
+// Author: Marcel Ehrhardt <marcel.ehrhardt@fu-berlin.de>
+// ==========================================================================
+// generic SIMD interface for SSE3 / AVX2
+// ==========================================================================
+
+#ifndef SEQAN_INCLUDE_SEQAN_SIMD_SIMD_BASE_H_
+#define SEQAN_INCLUDE_SEQAN_SIMD_SIMD_BASE_H_
+
+namespace seqan
+{
+
+// a metafunction returning the biggest supported SIMD vector
+template <typename TValue, int LENGTH>
+struct SimdVector;
+
+template <typename TValue, int LENGTH>
+struct Value<SimdVector<TValue, LENGTH> >
+{
+    typedef TValue Type;
+};
+
+template <typename TValue, int LENGTH>
+struct Value<SimdVector<TValue, LENGTH> const> :
+    public Value<SimdVector<TValue, LENGTH> >
+{};
+
+template <typename TValue, int LENGTH_>
+struct LENGTH<SimdVector<TValue, LENGTH_> >
+{
+    enum { VALUE = LENGTH_ };
+};
+
+template <typename TValue, int LENGTH_>
+struct LENGTH<SimdVector<TValue, LENGTH_> const> :
+    public LENGTH<SimdVector<TValue, LENGTH_> >
+{};
+
+// define a concept and its models
+// they allow us to define generic vector functions
+SEQAN_CONCEPT(SimdMaskVectorConcept, (TSimdMaskVector))
+{
+    typedef typename Reference<TSimdMaskVector>::Type TReference;
+
+    TSimdMaskVector a;
+
+    SEQAN_CONCEPT_USAGE(SimdMaskVectorConcept)
+    {
+        static_assert(IsSameType<decltype(a[0]), TReference>::VALUE, "Type of a[] should be the same as the reference type of a.");
+    }
+};
+
+SEQAN_CONCEPT_REFINE(SimdVectorConcept, (TSimdVector), (SimdMaskVectorConcept))
+{
+    SEQAN_CONCEPT_USAGE(SimdVectorConcept)
+    {}
+};
+
+template <typename TSimdVector, typename TIsSimdVec>
+struct SimdMaskVectorImpl {
+    using Type = Nothing;
+};
+
+/**
+ * SimdMaskVector is the return type of all logical operations of simd vectors
+ * like comparisons.
+ *
+ * ```
+ * using TSimdVector = SimdVector<uint32_t, 4>::Type;
+ * using TSimdMaskVector = SimdMaskVector<TSimdVector>::Type;
+ *
+ * TSimdVector vec1 {2, 4, 8, 16}, vec2 {16, 8, 4, 2};
+ * TSimdMaskVector cmp = vec1 > vec2; // cmp = {false, false, true, true}
+ * ```
+ */
+template <typename TSimdVector>
+struct SimdMaskVector : SimdMaskVectorImpl<TSimdVector, typename Is<SimdVectorConcept<TSimdVector> >::Type >
+{
+};
+
+template <typename TSimdVector, typename TIsSimdVec>
+struct SimdSwizzleVectorImpl;
+
+/**
+ * SimdSwizzleVector is needed for shuffleVector() as index type.
+ *
+ * ```
+ * using TSimdVector = SimdVector<uint32_t, 4>::Type;
+ * using TSimdSwizzleVector = SimdSwizzleVector<TSimdVector>::Type;
+ *
+ * TSimdVector vec {2, 4, 8, 16}, res;
+ * TSimdSwizzleVector swizzle {3, 2, 0, 2};
+ *
+ * res = shuffleVector(vec, swizzle); // res = {16, 8, 2, 8}
+ * ```
+ */
+template <typename TSimdVector>
+struct SimdSwizzleVector : SimdSwizzleVectorImpl<TSimdVector, typename Is<SimdVectorConcept<TSimdVector> >::Type >
+{};
+
+/**
+ * ```
+ * getValue(a, pos);
+ *
+ * // same as
+ *
+ * a[pos];
+ * ```
+ */
+template <typename TSimdVector, typename TPosition>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, typename Value<TSimdVector>::Type)
+getValue(TSimdVector const & vector, TPosition const pos);
+
+/**
+ * ```
+ * value(a, pos);
+ *
+ * // same as
+ *
+ * a[pos];
+ * ```
+ */
+template <typename TSimdVector, typename TPosition>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, typename Value<TSimdVector>::Type)
+value(TSimdVector const & vector, TPosition const pos);
+
+/**
+ * ```
+ * assignValue(a, pos, value);
+ *
+ * // same as
+ *
+ * a[pos] = value;
+ * ```
+ */
+template <typename TSimdVector, typename TPosition, typename TValue2>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, void)
+assignValue(TSimdVector & vector, TPosition const pos, TValue2 const value);
+
+template <int ROWS, typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, void)
+transpose(TSimdVector matrix[ROWS]);
+
+/**
+ * ```
+ * clearVector(a);
+ *
+ * // same as
+ *
+ * for(auto i = 0u; i < LENGTH; ++i)
+ *     c[i] = 0;
+ * ```
+ */
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, void)
+clearVector(TSimdVector & vector);
+
+/**
+ * ```
+ * auto c = createVector<SimdVector4Int>(a);
+ *
+ * // same as
+ *
+ * for(auto i = 0u; i < LENGTH; ++i)
+ *     c[i] = a;
+ * ```
+ */
+template <typename TSimdVector, typename TValue>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+createVector(TValue const x);
+
+/**
+ * ```
+ * fillVector(a, 1, 3, 23, 1337);
+ *
+ * // same as
+ *
+ * a[0] = 1;
+ * a[1] = 3;
+ * a[2] = 13;
+ * a[3] = 1337;
+ * ```
+ */
+template <typename TSimdVector, typename ...TValue>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, void)
+fillVector(TSimdVector & vector, TValue const... args);
+
+/**
+ * ```
+ * auto c = cmpEq(a, b);
+ *
+ * // same as
+ *
+ * auto c = a == b;
+ * ```
+ *
+ * NOTE:
+ * The type of c might change from unsigned to signed if auto is used
+ *
+ * ```
+ * using TSimdVector = SimdVector<uint32_t, 4>::Type;
+ * TSimdVector a, b;
+ *
+ * auto c = a == b; // type of c might change to SimdVector<int32_t, 4>::Type
+ * TSimdVector d = a == b; // has the same type
+ * ```
+ */
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, typename SimdMaskVector<TSimdVector>::Type)
+cmpEq (TSimdVector const & a, TSimdVector const & b);
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, typename SimdMaskVector<TSimdVector>::Type)
+operator==(TSimdVector const & a, TSimdVector const & b);
+
+/**
+ * ```
+ * auto c = cmpGt(a, b);
+ *
+ * // same as
+ *
+ * auto c = a > b;
+ * ```
+ *
+ * NOTE:
+ * The type of c might change from unsigned to signed if auto is used
+ *
+ * ```
+ * using TSimdVector = SimdVector<uint32_t, 4>::Type;
+ * using TSimdMaskVector = SimdMaskVector<TSimdVector>::Type;
+ * TSimdVector a, b;
+ *
+ * auto c = a > b; // type of c might change to SimdVector<int32_t, 4>::Type
+ * TSimdMaskVector d = a > b; // has the same type
+ * ```
+ */
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, typename SimdMaskVector<TSimdVector>::Type)
+cmpGt (TSimdVector const & a, TSimdVector const & b);
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, typename SimdMaskVector<TSimdVector>::Type)
+operator>(TSimdVector const & a, TSimdVector const & b);
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+max(TSimdVector const & a, TSimdVector const & b);
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+min(TSimdVector const & a, TSimdVector const & b);
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+operator|(TSimdVector const & a, TSimdVector const & b);
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector &)
+operator|=(TSimdVector & a, TSimdVector const & b);
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+operator&(TSimdVector const & a, TSimdVector const & b);
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector &)
+operator&=(TSimdVector & a, TSimdVector const & b);
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+operator~(TSimdVector const & a);
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+operator+(TSimdVector const & a, TSimdVector const & b);
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+operator-(TSimdVector const & a, TSimdVector const & b);
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+operator*(TSimdVector const & a, TSimdVector const & b);
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+operator/(TSimdVector const & a, TSimdVector const & b);
+
+/**
+ * ```
+ * c = andNot(a, b);
+ *
+ * // same as
+ *
+ * for(auto i = 0u; i < LENGTH; ++i)
+ *     c[i] = (~a[i]) & b[i];
+ * ```
+ */
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+andNot(TSimdVector const & a, TSimdVector const & b);
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+shiftRightLogical(TSimdVector const & vector, const int imm);
+
+template <typename TSimdVector, typename TSimdVectorMask>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+blend(TSimdVector const & a, TSimdVector const & b, TSimdVectorMask const & mask);
+
+/**
+ * Unaligned store, i.e. memAddr does not need to be aligned (e.g. SEE4.2 16byte
+ * aligned, AVX2 32byte aligned).
+ */
+template <typename T, typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, void)
+storeu(T * memAddr, TSimdVector const & vec);
+
+/**
+ * Aligned load, i.e. memAddr MUST be aligned (e.g. SEE4.2 16byte
+ * aligned, AVX2 32byte aligned).
+ */
+template <typename TSimdVector, typename T>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+load(T const * memAddr);
+
+template <typename TValue, typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+gather(TValue const * memAddr, TSimdVector const & idx);
+
+template <typename TSimdVector1, typename TSimdVector2>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector1> >, TSimdVector1)
+shuffleVector(TSimdVector1 const & vector, TSimdVector2 const & indices);
+
+// NOTE(rmaerker): Make this function available, also if SIMD is not enabled.
+template <typename TSimdVector, typename TValue>
+inline SEQAN_FUNC_ENABLE_IF(Is<NumberConcept<TSimdVector>>, TSimdVector)
+createVector(TValue const x)
+{
+    return x;
+}
+
+// --------------------------------------------------------------------------
+// Function print()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdMaskVectorConcept<TSimdVector> >, std::ostream &)
+print(std::ostream & stream, TSimdVector const & vector)
+{
+    stream << '<';
+    for (int i = 0; i < LENGTH<TSimdVector>::VALUE; ++i)
+        stream << '\t' << vector[i];
+    stream << "\t>\n";
+    return stream;
+}
+
+}  // namespace seqan
+
+#endif // SEQAN_INCLUDE_SEQAN_SIMD_SIMD_BASE_H_
diff --git a/porechop/include/seqan/simd/simd_base_seqan_impl.h b/porechop/include/seqan/simd/simd_base_seqan_impl.h
new file mode 100644
index 0000000..6f98254
--- /dev/null
+++ b/porechop/include/seqan/simd/simd_base_seqan_impl.h
@@ -0,0 +1,154 @@
+// ==========================================================================
+//                 SeqAn - The Library for Sequence Analysis
+// ==========================================================================
+// Copyright (c) 2006-2018, Knut Reinert, FU Berlin
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above copyright
+//       notice, this list of conditions and the following disclaimer in the
+//       documentation and/or other materials provided with the distribution.
+//     * Neither the name of Knut Reinert or the FU Berlin nor the names of
+//       its contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+// DAMAGE.
+//
+// ==========================================================================
+// Author: David Weese <david.weese@fu-berlin.de>
+//         René Rahn <rene.rahn@fu-berlin.de>
+//         Stefan Budach <stefan.budach@fu-berlin.de>
+// ==========================================================================
+// generic SIMD interface for SSE3 / AVX2
+// ==========================================================================
+
+#ifndef SEQAN_INCLUDE_SEQAN_SIMD_SIMD_BASE_SEQAN_IMPL_H_
+#define SEQAN_INCLUDE_SEQAN_SIMD_SIMD_BASE_SEQAN_IMPL_H_
+
+#include <utility>
+#include <tuple>
+
+#if defined(PLATFORM_WINDOWS_VS)
+  /* Microsoft C/C++-compatible compiler */
+  #include <intrin.h>
+#elif defined(PLATFORM_GCC) && (defined(__x86_64__) || defined(__i386__))
+  /* GCC-compatible compiler, targeting x86/x86-64 */
+  #include <x86intrin.h>
+#elif defined(SEQAN_SIMD_ENABLED)
+  #pragma message "You are trying to build with -DSEQAN_SIMD_ENABLED, which might be " \
+  "auto-defined if AVX or SSE was enabled (e.g. -march=native, -msse4, ...), " \
+  "but we only support x86/x86-64 architectures for SIMD vectorization! " \
+  "You might want to use UME::SIMD (https://github.com/edanor/umesimd) combined " \
+  "with -DSEQAN_UMESIMD_ENABLED for a different SIMD backend."
+#endif
+
+namespace seqan {
+
+#ifdef COMPILER_LINTEL
+#include <type_traits>
+#define SEQAN_VECTOR_CAST_(T, v) static_cast<typename std::decay<T>::type>(v)
+#define SEQAN_VECTOR_CAST_LVALUE_(T, v) static_cast<T>(v)
+#else
+#define SEQAN_VECTOR_CAST_(T, v) reinterpret_cast<T>(v)
+#define SEQAN_VECTOR_CAST_LVALUE_(T, v) reinterpret_cast<T>(v)
+#endif
+
+// ============================================================================
+// Forwards
+// ============================================================================
+
+// ============================================================================
+// Useful Macros
+// ============================================================================
+
+#define SEQAN_DEFINE_SIMD_VECTOR_GETVALUE_(TSimdVector)                                                 \
+template <typename TPosition>                                                                           \
+inline typename Value<TSimdVector>::Type                                                                \
+getValue(TSimdVector & vector, TPosition const pos)                                                     \
+{                                                                                                       \
+    return vector[pos];                                                                                 \
+}
+
+#define SEQAN_DEFINE_SIMD_VECTOR_VALUE_(TSimdVector)                                                    \
+template <typename TPosition>                                                                           \
+inline typename Value<TSimdVector>::Type                                                                \
+value(TSimdVector & vector, TPosition const pos)                                                        \
+{                                                                                                       \
+    return getValue(vector, pos);                                                                       \
+}
+
+#define SEQAN_DEFINE_SIMD_VECTOR_ASSIGNVALUE_(TSimdVector)                                              \
+template <typename TPosition, typename TValue2>                                                         \
+inline void                                                                                             \
+assignValue(TSimdVector & vector, TPosition const pos, TValue2 const value)                             \
+{                                                                                                       \
+    vector[pos] = value;                                                                                \
+}
+
+// Only include following code if simd instructions are enabled.
+#ifdef SEQAN_SIMD_ENABLED
+
+// ============================================================================
+// Tags, Classes, Enums
+// ============================================================================
+
+// a metafunction returning the biggest supported SIMD vector
+template <typename TValue, int LENGTH = SEQAN_SIZEOF_MAX_VECTOR / sizeof(TValue)>
+struct SimdVector;
+
+// internal struct to specialize for vector parameters
+// VEC_SIZE    = Vector size := sizeof(vec)
+// LENGTH      = number of elements := VEC_SIZE / sizeof(InnerValue<TVec>::Type)
+// SCALAR_TYPE = the scalar type of the vector (maybe optional, if the type
+//               doesn't matter for the operation)
+template <int VEC_SIZE, int LENGTH = 0, typename SCALAR_TYPE = void>
+struct SimdParams_
+{};
+
+// internal traits meta-function to capture correct the mask type.
+template <typename TSimdVector, typename TSimdParams>
+struct SimdVectorTraits
+{
+    using MaskType = TSimdVector;
+};
+
+// internal struct to specialize for matrix parameters
+template <int ROWS, int COLS, int BITS_PER_VALUE>
+struct SimdMatrixParams_
+{};
+
+#define SEQAN_DEFINE_SIMD_VECTOR_(TSimdVector, TValue, SIZEOF_VECTOR)                                           \
+        typedef TValue TSimdVector __attribute__ ((__vector_size__(SIZEOF_VECTOR)));                            \
+        template <> struct SimdVector<TValue, SIZEOF_VECTOR / sizeof(TValue)> {  typedef TSimdVector Type; };   \
+        template <> struct Value<TSimdVector>           { typedef TValue Type; };                               \
+        template <> struct Value<TSimdVector const>:  public Value<TSimdVector> {};                             \
+        template <> struct LENGTH<TSimdVector>          { enum { VALUE = SIZEOF_VECTOR / sizeof(TValue) }; };   \
+        template <> struct LENGTH<TSimdVector const>: public LENGTH<TSimdVector> {};                            \
+        SEQAN_DEFINE_SIMD_VECTOR_GETVALUE_(TSimdVector)                                                         \
+        SEQAN_DEFINE_SIMD_VECTOR_GETVALUE_(TSimdVector const)                                                   \
+        SEQAN_DEFINE_SIMD_VECTOR_VALUE_(TSimdVector)                                                            \
+        SEQAN_DEFINE_SIMD_VECTOR_VALUE_(TSimdVector const)                                                      \
+        SEQAN_DEFINE_SIMD_VECTOR_ASSIGNVALUE_(TSimdVector)                                                      \
+        template <>                                                                                             \
+        SEQAN_CONCEPT_IMPL((TSimdVector),       (SimdVectorConcept));                                           \
+        template <>                                                                                             \
+        SEQAN_CONCEPT_IMPL((TSimdVector const), (SimdVectorConcept));
+#endif  // SEQAN_SIMD_ENABLED
+
+} // namespace seqan
+
+#endif // SEQAN_INCLUDE_SEQAN_SIMD_SIMD_BASE_SEQAN_IMPL_H_
diff --git a/porechop/include/seqan/simd/simd_base_seqan_impl_avx2.h b/porechop/include/seqan/simd/simd_base_seqan_impl_avx2.h
new file mode 100644
index 0000000..cae230f
--- /dev/null
+++ b/porechop/include/seqan/simd/simd_base_seqan_impl_avx2.h
@@ -0,0 +1,1492 @@
+// ==========================================================================
+//                 SeqAn - The Library for Sequence Analysis
+// ==========================================================================
+// Copyright (c) 2006-2018, Knut Reinert, FU Berlin
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above copyright
+//       notice, this list of conditions and the following disclaimer in the
+//       documentation and/or other materials provided with the distribution.
+//     * Neither the name of Knut Reinert or the FU Berlin nor the names of
+//       its contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+// DAMAGE.
+//
+// ==========================================================================
+// Author: David Weese <david.weese@fu-berlin.de>
+//         René Rahn <rene.rahn@fu-berlin.de>
+//         Stefan Budach <stefan.budach@fu-berlin.de>
+// ==========================================================================
+// generic SIMD interface for SSE3 / AVX2
+// ==========================================================================
+
+#ifndef SEQAN_INCLUDE_SEQAN_SIMD_SIMD_BASE_SEQAN_IMPL_AVX2_H_
+#define SEQAN_INCLUDE_SEQAN_SIMD_SIMD_BASE_SEQAN_IMPL_AVX2_H_
+
+namespace seqan {
+
+// SimdParams_<32, 32>: 256bit = 32 elements * 8bit
+SEQAN_DEFINE_SIMD_VECTOR_(SimdVector32Char,     char,           32)
+SEQAN_DEFINE_SIMD_VECTOR_(SimdVector32SChar,    signed char,    32)
+SEQAN_DEFINE_SIMD_VECTOR_(SimdVector32UChar,    unsigned char,  32)
+
+// SimdParams_<32, 16>: 256bit = 16 elements * 2 * 8bit
+SEQAN_DEFINE_SIMD_VECTOR_(SimdVector16Short,    short,          32)
+SEQAN_DEFINE_SIMD_VECTOR_(SimdVector16UShort,   unsigned short, 32)
+
+// SimdParams_<32, 8>: 256bit = 8 elements * 4 * 8bit
+SEQAN_DEFINE_SIMD_VECTOR_(SimdVector8Int,       int,            32)
+SEQAN_DEFINE_SIMD_VECTOR_(SimdVector8UInt,      unsigned int,   32)
+
+// SimdParams_<32, 4>: 256bit = 4 elements * 8 * 8bit
+SEQAN_DEFINE_SIMD_VECTOR_(SimdVector4Int64,     int64_t,        32)
+SEQAN_DEFINE_SIMD_VECTOR_(SimdVector4UInt64,    uint64_t,       32)
+
+// ============================================================================
+// Functions
+// ============================================================================
+
+// ============================================================================
+// AVX/AVX2 wrappers (256bit vectors)
+// ============================================================================
+
+// --------------------------------------------------------------------------
+// _fillVector (256bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector, typename ...TValue>
+inline void
+_fillVector(TSimdVector & vector,
+            std::tuple<TValue...> const & x,
+            std::index_sequence<0> const &, SimdParams_<32, 32>)
+{
+    vector = SEQAN_VECTOR_CAST_(TSimdVector, _mm256_set1_epi8(std::get<0>(x)));
+}
+
+template <typename TSimdVector, typename ...TValue>
+inline void
+_fillVector(TSimdVector & vector,
+            std::tuple<TValue...> const & x,
+            std::index_sequence<0> const &, SimdParams_<32, 16>)
+{
+    vector = SEQAN_VECTOR_CAST_(TSimdVector, _mm256_set1_epi16(std::get<0>(x)));
+}
+
+template <typename TSimdVector, typename ...TValue>
+inline void
+_fillVector(TSimdVector & vector,
+            std::tuple<TValue...> const & x,
+            std::index_sequence<0> const &, SimdParams_<32, 8>)
+{
+    vector = SEQAN_VECTOR_CAST_(TSimdVector, _mm256_set1_epi32(std::get<0>(x)));
+}
+
+template <typename TSimdVector, typename ...TValue>
+inline void
+_fillVector(TSimdVector & vector,
+            std::tuple<TValue...> const & x,
+            std::index_sequence<0> const &, SimdParams_<32, 4>)
+{
+    vector = SEQAN_VECTOR_CAST_(TSimdVector, _mm256_set1_epi64x(std::get<0>(x)));
+}
+
+template <typename TSimdVector, typename ...TValue, size_t ...INDICES>
+inline void
+_fillVector(TSimdVector & vector,
+            std::tuple<TValue...> const & args, std::index_sequence<INDICES...> const &, SimdParams_<32, 32>)
+{
+    vector = SEQAN_VECTOR_CAST_(TSimdVector, _mm256_setr_epi8(std::get<INDICES>(args)...));
+}
+
+template <typename TSimdVector, typename ...TValue, size_t ...INDICES>
+inline void
+_fillVector(TSimdVector & vector,
+            std::tuple<TValue...> const & args, std::index_sequence<INDICES...> const &, SimdParams_<32, 16>)
+{
+    vector = SEQAN_VECTOR_CAST_(TSimdVector, _mm256_setr_epi16(std::get<INDICES>(args)...));
+}
+template <typename TSimdVector, typename ...TValue, size_t ...INDICES>
+inline void
+_fillVector(TSimdVector & vector,
+            std::tuple<TValue...> const & args, std::index_sequence<INDICES...> const &, SimdParams_<32, 8>)
+{
+    vector = SEQAN_VECTOR_CAST_(TSimdVector, _mm256_setr_epi32(std::get<INDICES>(args)...));
+}
+
+template <typename TSimdVector, typename ...TValue, size_t ...INDICES>
+inline void
+_fillVector(TSimdVector & vector,
+            std::tuple<TValue...> const & args, std::index_sequence<INDICES...> const &, SimdParams_<32, 4>)
+{
+    // reverse argument list 0, 1, 2, 3 -> 3, 2, 1, 0
+    // NOTE(marehr): Intel linux fails to reverse argument list and only
+    // _mm256_set_epi64x has no reverse equivalent
+    vector = SEQAN_VECTOR_CAST_(TSimdVector, _mm256_set_epi64x(std::get<sizeof...(INDICES) - 1 - INDICES>(args)...));
+}
+
+// --------------------------------------------------------------------------
+// _clearVector (256bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector, int L>
+inline void _clearVector(TSimdVector & vector, SimdParams_<32, L>)
+{
+    vector = SEQAN_VECTOR_CAST_(TSimdVector, _mm256_setzero_si256());
+}
+
+// --------------------------------------------------------------------------
+// _createVector (256bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector, typename TValue>
+inline TSimdVector _createVector(TValue const x, SimdParams_<32, 32>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_set1_epi8(x));
+}
+
+template <typename TSimdVector, typename TValue>
+inline TSimdVector _createVector(TValue const x, SimdParams_<32, 16>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_set1_epi16(x));
+}
+
+template <typename TSimdVector, typename TValue>
+inline TSimdVector _createVector(TValue const x, SimdParams_<32, 8>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_set1_epi32(x));
+}
+
+template <typename TSimdVector, typename TValue>
+inline TSimdVector _createVector(TValue const x, SimdParams_< 32, 4>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_set1_epi64x(x));
+}
+
+// --------------------------------------------------------------------------
+// _cmpEq (256bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline TSimdVector _cmpEq(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 32>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_cmpeq_epi8(SEQAN_VECTOR_CAST_(const __m256i&, a),
+                                                             SEQAN_VECTOR_CAST_(const __m256i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _cmpEq(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 16>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_cmpeq_epi16(SEQAN_VECTOR_CAST_(const __m256i&, a),
+                                                              SEQAN_VECTOR_CAST_(const __m256i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _cmpEq(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 8>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_cmpeq_epi32(SEQAN_VECTOR_CAST_(const __m256i&, a),
+                                                              SEQAN_VECTOR_CAST_(const __m256i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _cmpEq(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 4>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_cmpeq_epi64(SEQAN_VECTOR_CAST_(const __m256i&, a),
+                                                              SEQAN_VECTOR_CAST_(const __m256i&, b)));
+}
+
+// --------------------------------------------------------------------------
+// _cmpGt (256bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline TSimdVector _cmpGt(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 32, int8_t>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_cmpgt_epi8(SEQAN_VECTOR_CAST_(const __m256i&, a),
+                                                             SEQAN_VECTOR_CAST_(const __m256i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _cmpGt(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 32, uint8_t>)
+{
+    // There is no unsigned cmpgt, we reduce it to the signed case.
+    // Note that 0x80 = ~0x7F (prevent overflow messages).
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm256_cmpgt_epi8(
+                                  _mm256_xor_si256(SEQAN_VECTOR_CAST_(const __m256i&, a), _mm256_set1_epi8(~0x7F)),
+                                  _mm256_xor_si256(SEQAN_VECTOR_CAST_(const __m256i&, b), _mm256_set1_epi8(~0x7F))));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _cmpGt(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 16, int16_t>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_cmpgt_epi16(SEQAN_VECTOR_CAST_(const __m256i&, a),
+                                                              SEQAN_VECTOR_CAST_(const __m256i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _cmpGt(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 16, uint16_t>)
+{
+    // There is no unsigned cmpgt, we reduce it to the signed case.
+    // Note that 0x8000 = ~0x7FFF (prevent overflow messages).
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm256_cmpgt_epi16(
+                                  _mm256_xor_si256(SEQAN_VECTOR_CAST_(const __m256i&, a), _mm256_set1_epi16(~0x7FFF)),
+                                  _mm256_xor_si256(SEQAN_VECTOR_CAST_(const __m256i&, b), _mm256_set1_epi16(~0x7FFF))));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _cmpGt(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 8, int32_t>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_cmpgt_epi32(SEQAN_VECTOR_CAST_(const __m256i&, a),
+                                                              SEQAN_VECTOR_CAST_(const __m256i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _cmpGt(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 8, uint32_t>)
+{
+    // There is no unsigned cmpgt, we reduce it to the signed case.
+    // Note that 0x80000000 = ~0x7FFFFFFF (prevent overflow messages).
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm256_cmpgt_epi32(
+                                  _mm256_xor_si256(SEQAN_VECTOR_CAST_(const __m256i&, a), _mm256_set1_epi32(~0x7FFFFFFF)),
+                                  _mm256_xor_si256(SEQAN_VECTOR_CAST_(const __m256i&, b), _mm256_set1_epi32(~0x7FFFFFFF))));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _cmpGt(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 4, int64_t>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_cmpgt_epi64(SEQAN_VECTOR_CAST_(const __m256i&, a),
+                                                              SEQAN_VECTOR_CAST_(const __m256i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _cmpGt(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 4, uint64_t>)
+{
+    // There is no unsigned cmpgt, we reduce it to the signed case.
+    // Note that 0x8000000000000000ul = ~0x7FFFFFFFFFFFFFFFul (prevent overflow messages).
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm256_cmpgt_epi64(
+                                  _mm256_xor_si256(SEQAN_VECTOR_CAST_(const __m256i&, a) ,_mm256_set1_epi64x(~0x7FFFFFFFFFFFFFFFul)),
+                                  _mm256_xor_si256(SEQAN_VECTOR_CAST_(const __m256i&, b), _mm256_set1_epi64x(~0x7FFFFFFFFFFFFFFFul))));
+}
+
+// --------------------------------------------------------------------------
+// _bitwiseOr (256bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector, int L>
+inline TSimdVector _bitwiseOr(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, L>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_or_si256(SEQAN_VECTOR_CAST_(const __m256i&, a),
+                                                           SEQAN_VECTOR_CAST_(const __m256i&, b)));
+}
+
+// --------------------------------------------------------------------------
+// _bitwiseAnd (256bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector, int L>
+inline TSimdVector _bitwiseAnd(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, L>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_and_si256(SEQAN_VECTOR_CAST_(const __m256i&, a),
+                                                            SEQAN_VECTOR_CAST_(const __m256i&, b)));
+}
+
+// --------------------------------------------------------------------------
+// _bitwiseAndNot (256bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector, int L>
+inline TSimdVector _bitwiseAndNot(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, L>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_andnot_si256(SEQAN_VECTOR_CAST_(const __m256i&, a),
+                                                               SEQAN_VECTOR_CAST_(const __m256i&, b)));
+}
+
+// --------------------------------------------------------------------------
+// _bitwiseNot (256bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline TSimdVector _bitwiseNot(TSimdVector const & a, SimdParams_<32, 32>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm256_cmpeq_epi8(SEQAN_VECTOR_CAST_(const __m256i&, a), _mm256_setzero_si256()));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _bitwiseNot(TSimdVector const & a, SimdParams_<32, 16>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm256_cmpeq_epi16(SEQAN_VECTOR_CAST_(const __m256i&, a), _mm256_setzero_si256()));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _bitwiseNot(TSimdVector const & a, SimdParams_<32, 8>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm256_cmpeq_epi32(SEQAN_VECTOR_CAST_(const __m256i&, a), _mm256_setzero_si256()));
+
+}
+template <typename TSimdVector>
+inline TSimdVector _bitwiseNot(TSimdVector const & a, SimdParams_<32, 4>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm256_cmpeq_epi64(SEQAN_VECTOR_CAST_(const __m256i&, a), _mm256_setzero_si256()));
+}
+
+// --------------------------------------------------------------------------
+// _divide (256bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline TSimdVector _divide(TSimdVector const & a, int b, SimdParams_<32, 32>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_div_epi8(a, _mm256_set1_epi8(b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _divide(TSimdVector const & a, int b, SimdParams_<32, 16>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_div_epi16(a, _mm256_set1_epi16(b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _divide(TSimdVector const & a, int b, SimdParams_<32, 8>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_div_epi32(a, _mm256_set1_epi32(b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _divide(TSimdVector const & a, int b, SimdParams_<32, 4>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_div_epi64(a, _mm256_set1_epi64x(b)));
+}
+
+// --------------------------------------------------------------------------
+// _add (256bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline TSimdVector _add(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 32>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm256_add_epi8(SEQAN_VECTOR_CAST_(const __m256i&, a),
+                                              SEQAN_VECTOR_CAST_(const __m256i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _add(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 16>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm256_add_epi16(SEQAN_VECTOR_CAST_(const __m256i&, a),
+                                               SEQAN_VECTOR_CAST_(const __m256i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _add(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 8>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm256_add_epi32(SEQAN_VECTOR_CAST_(const __m256i&, a),
+                                               SEQAN_VECTOR_CAST_(const __m256i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _add(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 4>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm256_add_epi64(SEQAN_VECTOR_CAST_(const __m256i&, a),
+                                               SEQAN_VECTOR_CAST_(const __m256i&, b)));
+}
+
+// --------------------------------------------------------------------------
+// _sub (256bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline TSimdVector _sub(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 32>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm256_sub_epi8(SEQAN_VECTOR_CAST_(const __m256i&, a),
+                                              SEQAN_VECTOR_CAST_(const __m256i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _sub(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 16>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm256_sub_epi16(SEQAN_VECTOR_CAST_(const __m256i&, a),
+                                               SEQAN_VECTOR_CAST_(const __m256i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _sub(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 8>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm256_sub_epi32(SEQAN_VECTOR_CAST_(const __m256i&, a),
+                                               SEQAN_VECTOR_CAST_(const __m256i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _sub(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 4>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm256_sub_epi64(SEQAN_VECTOR_CAST_(const __m256i&, a),
+                                               SEQAN_VECTOR_CAST_(const __m256i&, b)));
+}
+
+// --------------------------------------------------------------------------
+// _mult (256bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline TSimdVector _mult(TSimdVector const & a, TSimdVector const &/*b*/, SimdParams_<32, 32>)
+{
+    SEQAN_SKIP_TEST;
+    SEQAN_ASSERT_FAIL("AVX2 intrinsics for multiplying 8 bit values not implemented!");
+    return a;
+}
+
+template <typename TSimdVector>
+inline TSimdVector _mult(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 16>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm256_mullo_epi16(SEQAN_VECTOR_CAST_(const __m256i&, a),
+                                                 SEQAN_VECTOR_CAST_(const __m256i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _mult(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 8>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm256_mullo_epi32(SEQAN_VECTOR_CAST_(const __m256i&, a),
+                                                 SEQAN_VECTOR_CAST_(const __m256i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _mult(TSimdVector const & a, TSimdVector const &/*b*/, SimdParams_<32, 4>)
+{
+    SEQAN_SKIP_TEST;
+    SEQAN_ASSERT_FAIL("AVX2 intrinsics for multiplying 64 bit values not implemented!");
+    return a;
+}
+
+// --------------------------------------------------------------------------
+// _max (256bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline TSimdVector _max(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 32, int8_t>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm256_max_epi8(SEQAN_VECTOR_CAST_(const __m256i&, a),
+                                              SEQAN_VECTOR_CAST_(const __m256i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _max(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 32, uint8_t>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm256_max_epu8(SEQAN_VECTOR_CAST_(const __m256i&, a),
+                                              SEQAN_VECTOR_CAST_(const __m256i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _max(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 16, int16_t>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm256_max_epi16(SEQAN_VECTOR_CAST_(const __m256i&, a),
+                                               SEQAN_VECTOR_CAST_(const __m256i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _max(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 16, uint16_t>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm256_max_epu16(SEQAN_VECTOR_CAST_(const __m256i&, a),
+                                               SEQAN_VECTOR_CAST_(const __m256i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _max(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 8, int32_t>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm256_max_epi32(SEQAN_VECTOR_CAST_(const __m256i&, a),
+                                               SEQAN_VECTOR_CAST_(const __m256i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _max(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 8, uint32_t>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm256_max_epu32(SEQAN_VECTOR_CAST_(const __m256i&, a),
+                                               SEQAN_VECTOR_CAST_(const __m256i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _max(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 4, int64_t>)
+{
+    #if defined(__AVX512VL__)
+        return SEQAN_VECTOR_CAST_(TSimdVector,
+                                  _mm256_max_epi64(SEQAN_VECTOR_CAST_(const __m256i&, a),
+                                                   SEQAN_VECTOR_CAST_(const __m256i&, b)));
+    #else // defined(__AVX512VL__)
+        return blend(b, a, cmpGt(a, b));
+    #endif // defined(__AVX512VL__)
+}
+
+template <typename TSimdVector>
+inline TSimdVector _max(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 4, uint64_t>)
+{
+    #if defined(__AVX512VL__)
+        return SEQAN_VECTOR_CAST_(TSimdVector,
+                                  _mm256_max_epu64(SEQAN_VECTOR_CAST_(const __m256i&, a),
+                                                   SEQAN_VECTOR_CAST_(const __m256i&, b)));
+    #else // defined(__AVX512VL__)
+        return blend(b, a, cmpGt(a, b));
+    #endif // defined(__AVX512VL__)
+}
+
+
+// --------------------------------------------------------------------------
+// _min (256bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline TSimdVector _min(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 32, int8_t>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm256_min_epi8(SEQAN_VECTOR_CAST_(const __m256i&, a),
+                                              SEQAN_VECTOR_CAST_(const __m256i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _min(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 32, uint8_t>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm256_min_epu8(SEQAN_VECTOR_CAST_(const __m256i&, a),
+                                              SEQAN_VECTOR_CAST_(const __m256i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _min(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 16, int16_t>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm256_min_epi16(SEQAN_VECTOR_CAST_(const __m256i&, a),
+                                               SEQAN_VECTOR_CAST_(const __m256i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _min(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 16, uint16_t>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm256_min_epu16(SEQAN_VECTOR_CAST_(const __m256i&, a),
+                                               SEQAN_VECTOR_CAST_(const __m256i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _min(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 8, int32_t>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm256_min_epi32(SEQAN_VECTOR_CAST_(const __m256i&, a),
+                                               SEQAN_VECTOR_CAST_(const __m256i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _min(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 8, uint32_t>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm256_min_epu32(SEQAN_VECTOR_CAST_(const __m256i&, a),
+                                               SEQAN_VECTOR_CAST_(const __m256i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _min(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 4, int64_t>)
+{
+    #if defined(__AVX512VL__)
+        return SEQAN_VECTOR_CAST_(TSimdVector,
+                                  _mm256_min_epi64(SEQAN_VECTOR_CAST_(const __m256i&, a),
+                                                   SEQAN_VECTOR_CAST_(const __m256i&, b)));
+    #else // defined(__AVX512VL__)
+        return blend(a, b, cmpGt(a, b));
+    #endif // defined(__AVX512VL__)
+}
+
+template <typename TSimdVector>
+inline TSimdVector _min(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 4, uint64_t>)
+{
+    #if defined(__AVX512VL__)
+        return SEQAN_VECTOR_CAST_(TSimdVector,
+                                  _mm256_min_epu64(SEQAN_VECTOR_CAST_(const __m256i&, a),
+                                                   SEQAN_VECTOR_CAST_(const __m256i&, b)));
+    #else // defined(__AVX512VL__)
+        return blend(a, b, cmpGt(a, b));
+    #endif // defined(__AVX512VL__)
+}
+
+// --------------------------------------------------------------------------
+// _blend (256bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector, typename TSimdVectorMask, int L>
+inline TSimdVector _blend(TSimdVector const & a, TSimdVector const & b, TSimdVectorMask const & mask, SimdParams_<32, L>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm256_blendv_epi8(SEQAN_VECTOR_CAST_(const __m256i &, a),
+                                                 SEQAN_VECTOR_CAST_(const __m256i &, b),
+                                                 SEQAN_VECTOR_CAST_(const __m256i &, mask)));
+}
+
+// --------------------------------------------------------------------------
+// _storeu (256bit)
+// --------------------------------------------------------------------------
+
+template <typename T, typename TSimdVector, int L>
+inline void _storeu(T * memAddr, TSimdVector const & vec, SimdParams_<32, L>)
+{
+    _mm256_storeu_si256((__m256i*)memAddr, SEQAN_VECTOR_CAST_(const __m256i&, vec));
+}
+
+// ----------------------------------------------------------------------------
+// Function _load() 256bit
+// ----------------------------------------------------------------------------
+
+template <typename TSimdVector, typename T, int L>
+inline TSimdVector _load(T const * memAddr, SimdParams_<32, L>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_load_si256((__m256i const *) memAddr));
+}
+
+// --------------------------------------------------------------------------
+// _shiftRightLogical (256bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline TSimdVector _shiftRightLogical(TSimdVector const & vector, const int imm, SimdParams_<32, 32>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_srli_epi16(SEQAN_VECTOR_CAST_(const __m256i &, vector), imm) & _mm256_set1_epi8(0xff >> imm));
+}
+template <typename TSimdVector>
+inline TSimdVector _shiftRightLogical(TSimdVector const & vector, const int imm, SimdParams_<32, 16>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_srli_epi16(SEQAN_VECTOR_CAST_(const __m256i &, vector), imm));
+}
+template <typename TSimdVector>
+inline TSimdVector _shiftRightLogical(TSimdVector const & vector, const int imm, SimdParams_<32, 8>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_srli_epi32(SEQAN_VECTOR_CAST_(const __m256i &, vector), imm));
+}
+template <typename TSimdVector>
+inline TSimdVector _shiftRightLogical(TSimdVector const & vector, const int imm, SimdParams_<32, 4>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_srli_epi64(SEQAN_VECTOR_CAST_(const __m256i &, vector), imm));
+}
+
+// --------------------------------------------------------------------------
+// Extend sign from integer types 256bit
+// --------------------------------------------------------------------------
+
+inline __m256i
+seqan_mm256_i16sign_extend_epis8(__m256i const & v)
+{
+    return _mm256_or_si256( // extend sign (v | hi-bits)
+        v,
+        _mm256_and_si256( // select hi-bits (hi-bits = msk & 0xff00)
+            _mm256_sub_epi16( // msk = msb - 1
+                _mm256_andnot_si256( //msb = ~v & 0x80 (select msb)
+                    v,
+                    _mm256_set1_epi16(0x80)
+                ),
+                _mm256_set1_epi16(1)
+            ),
+            _mm256_set1_epi16(static_cast<uint16_t>(0xff00u))
+        )
+    );
+}
+
+inline __m256i
+seqan_mm256_i32sign_extend_epis8(__m256i const & v)
+{
+    return _mm256_or_si256( // extend sign (v | hi-bits)
+        v,
+        _mm256_and_si256( // select hi-bits (hi-bits = msk & 0xffffff00u)
+            _mm256_sub_epi32( // msk = msb - 1
+                _mm256_andnot_si256( //msb = ~v & 0x80 (select msb)
+                    v,
+                    _mm256_set1_epi32(0x80)
+                ),
+                _mm256_set1_epi32(1)
+            ),
+            _mm256_set1_epi32(static_cast<uint32_t>(0xffffff00u))
+        )
+    );
+}
+
+inline __m256i
+seqan_mm256_i32sign_extend_epis16(__m256i const & v)
+{
+    return _mm256_or_si256( // extend sign (v | hi-bits)
+        v,
+        _mm256_and_si256( // select hi-bits (hi-bits = msk & 0xffff0000u)
+            _mm256_sub_epi32( // msk = msb - 1
+                _mm256_andnot_si256( //msb = ~v & 0x8000 (select msb)
+                    v,
+                    _mm256_set1_epi32(0x8000)
+                ),
+                _mm256_set1_epi32(1)
+            ),
+            _mm256_set1_epi32(static_cast<uint32_t>(0xffff0000u))
+        )
+    );
+}
+
+inline __m256i
+seqan_mm256_i64sign_extend_epis8(__m256i const & v)
+{
+    return _mm256_or_si256( // extend sign (v | hi-bits)
+        v,
+        _mm256_and_si256( // select hi-bits (hi-bits = msk & 0xffffffffffffff00ul)
+            _mm256_sub_epi64( // msk = msb - 1
+                _mm256_andnot_si256( //msb = ~v & 0x80 (select msb)
+                    v,
+                    _mm256_set1_epi64x(0x80)
+                ),
+                _mm256_set1_epi64x(1)
+            ),
+            _mm256_set1_epi64x(static_cast<uint64_t>(0xffffffffffffff00ul))
+        )
+    );
+}
+
+inline __m256i
+seqan_mm256_i64sign_extend_epis16(__m256i const & v)
+{
+    return _mm256_or_si256( // extend sign (v | hi-bits)
+        v,
+        _mm256_and_si256( // select hi-bits (hi-bits = msk & 0xffffffffffff0000ul)
+            _mm256_sub_epi64( // msk = msb - 1
+                _mm256_andnot_si256( //msb = ~v & 0x8000 (select msb)
+                    v,
+                    _mm256_set1_epi64x(0x8000)
+                ),
+                _mm256_set1_epi64x(1)
+            ),
+            _mm256_set1_epi64x(static_cast<uint64_t>(0xffffffffffff0000ul))
+        )
+    );
+}
+
+inline __m256i
+seqan_mm256_i64sign_extend_epis32(__m256i const & v)
+{
+    return _mm256_or_si256( // extend sign (v | hi-bits)
+        v,
+        _mm256_and_si256( // select hi-bits (hi-bits = msk & 0xffffffffffff0000ul)
+            _mm256_sub_epi64( // msk = msb - 1
+                _mm256_andnot_si256( //msb = ~v & 0x80000000 (select msb)
+                    v,
+                    _mm256_set1_epi64x(0x80000000)
+                ),
+                _mm256_set1_epi64x(1)
+            ),
+            _mm256_set1_epi64x(static_cast<uint64_t>(0xffffffff00000000ul))
+        )
+    );
+}
+
+// --------------------------------------------------------------------------
+// _gather (256bit)
+// --------------------------------------------------------------------------
+
+template <typename TValue, typename TSize, TSize SCALE>
+inline __m256i
+seqan_mm256_i8gather_epi(TValue const * memAddr,
+                         __m256i const & idx,
+                         std::integral_constant<TSize, SCALE> const & /*scale*/)
+{
+    // mem:    ( 0,  3,  6,  9 | 12, 15, 18, 21 | 24, 27, 30, 33 | 36, 39, 42, 45 || 48, 51, 54, 57 | 60, 63, 66, 69 | 72, 75, 78, 81 | 84, 87, 90, 93)
+    // idx:    (31, 30, 29, 28 | 27, 26, 25, 24 | 23, 22, 21, 20 | 19, 18, 17, 16 || 15, 14, 13, 12 | 11, 10,  9,  8 |  7,  6,  5,  4 |  3,  2,  1,  0)
+    // pack:   (93, 90, 87, 84 | 81, 78, 75, 72 | 69, 66, 63, 60 | 57, 54, 51, 48 || 45, 42, 39, 36 | 33, 30, 27, 24 | 21, 18, 15, 12 |  9,  6,  3,  0)
+    return _mm256_packus_epi16(
+        // pckLow: (93,  0, 90,  0 | 87,  0, 84,  0 | 81,  0, 78,  0 | 75,  0, 72,  0 || 45,  0, 42,  0 | 39,  0, 36,  0 | 33,  0, 30,  0 | 27,  0, 24,  0)
+        _mm256_packus_epi16(
+            // mskLL:  (93,  0,  0,  0 | 90,  0,  0,  0 | 87,  0,  0,  0 | 84,  0,  0,  0 || 45,  0,  0,  0 | 42,  0,  0,  0 | 39,  0,  0,  0 | 36,  0,  0,  0)
+            _mm256_and_si256(
+                // gtrLL:  (93, 31, 30, 29 | 90, 93, 31, 30 | 87, 90, 93, 31 | 84, 87, 90, 93 || 45, 48, 51, 54 | 42, 45, 48, 51 | 39, 42, 45, 48 | 36, 39, 42, 45)
+                _mm256_i32gather_epi32(
+                    (const int *) memAddr,
+                    // lowlow: (31,  0,  0,  0 | 30,  0,  0,  0 | 29,  0,  0,  0 | 28,  0,  0,  0 || 15,  0,  0,  0 | 14,  0,  0,  0 | 13,  0,  0,  0 | 12,  0,  0,  0)
+                    _mm256_shuffle_epi8(idx, __m256i {
+                        ~0xFF000000FFl | 0x0100000000, ~0xFF000000FFl | 0x0300000002,
+                        ~0xFF000000FFl | 0x0100000000, ~0xFF000000FFl | 0x0300000002
+                    }),
+                    SCALE
+                ),
+                _mm256_set1_epi32(0xFF)
+            ),
+            // mskLH:  (81,  0,  0,  0 | 78,  0,  0,  0 | 75,  0,  0,  0 | 72,  0,  0,  0 || 33,  0,  0,  0 | 30,  0,  0,  0 | 27,  0,  0,  0 | 24,  0,  0,  0)
+            _mm256_and_si256(
+                // gtrLH:  (81, 84, 87, 90 | 78, 81, 84, 87 | 75, 78, 81, 84 | 72, 75, 78, 81 || 33, 36, 39, 42 | 30, 33, 36, 39 | 27, 30, 33, 36 | 24, 27, 30, 33)
+                _mm256_i32gather_epi32(
+                    (const int *) memAddr,
+                    // lowhig: (27,  0,  0,  0 | 26,  0,  0,  0 | 25,  0,  0,  0 | 24,  0,  0,  0 || 11,  0,  0,  0 | 10,  0,  0,  0 |  9,  0,  0,  0 |  8,  0,  0,  0)
+                    _mm256_shuffle_epi8(idx, __m256i {
+                        ~0xFF000000FFl | 0x0500000004, ~0xFF000000FFl | 0x0700000006,
+                        ~0xFF000000FFl | 0x0500000004, ~0xFF000000FFl | 0x0700000006
+                    }),
+                    SCALE
+                ),
+                _mm256_set1_epi32(0xFF)
+            )
+        ),
+        // pckHih: (69,  0, 66,  0 | 63,  0, 60,  0 | 57,  0, 54,  0 | 51,  0, 48,  0 || 21,  0, 18,  0 | 15,  0, 12,  0 |  9,  0,  6,  0 |  3,  0,  0,  0)
+        _mm256_packus_epi16(
+            // mskHL:  (69,  0,  0,  0 | 66,  0,  0,  0 | 63,  0,  0,  0 | 60,  0,  0,  0 || 21,  0,  0,  0 | 18,  0,  0,  0 | 15,  0,  0,  0 | 12,  0,  0,  0)
+            _mm256_and_si256(
+                // gtrHL:  (69, 72, 75, 78 | 66, 69, 72, 75 | 63, 66, 69, 72 | 60, 63, 66, 69 || 21, 24, 27, 30 | 18, 21, 24, 27 | 15, 18, 21, 24 | 12, 15, 18, 21)
+                _mm256_i32gather_epi32(
+                    (const int *) memAddr,
+                    // higlow: (23,  0,  0,  0 | 22,  0,  0,  0 | 21,  0,  0,  0 | 20,  0,  0,  0 ||  7,  0,  0,  0 |  6,  0,  0,  0 |  5,  0,  0,  0 |  4,  0,  0,  0)
+                    _mm256_shuffle_epi8(idx, __m256i {
+                        ~0xFF000000FFl | 0x0900000008, ~0xFF000000FFl | 0x0B0000000A,
+                        ~0xFF000000FFl | 0x0900000008, ~0xFF000000FFl | 0x0B0000000A
+                    }),
+                    SCALE
+                ),
+                _mm256_set1_epi32(0xFF)
+            ),
+            // mskHH:  (57,  0,  0,  0 | 54,  0,  0,  0 | 51,  0,  0,  0 | 48,  0,  0,  0 ||  9,  0,  0,  0 |  6,  0,  0,  0 |  3,  0,  0,  0 |  0,  0,  0,  0)
+            _mm256_and_si256(
+                // gtrHH:  (57, 60, 63, 66 | 54, 57, 60, 63 | 51, 54, 57, 60 | 48, 51, 54, 57 ||  9, 12, 15, 18 |  6,  9, 12, 15 |  3,  6,  9, 12 |  0,  3,  6,  9)
+                _mm256_i32gather_epi32(
+                    (const int *) memAddr,
+                    // highig: (19,  0,  0,  0 | 18,  0,  0,  0 | 17,  0,  0,  0 | 16,  0,  0,  0 ||  3,  0,  0,  0 |  2,  0,  0,  0 |  1,  0,  0,  0 |  0,  0,  0,  0)
+                    _mm256_shuffle_epi8(idx, __m256i {
+                        ~0xFF000000FFl | 0x0D0000000C, ~0xFF000000FFl | 0x0F0000000E,
+                        ~0xFF000000FFl | 0x0D0000000C, ~0xFF000000FFl | 0x0F0000000E
+                    }),
+                    SCALE
+                ),
+                _mm256_set1_epi32(0xFF)
+            )
+        )
+    );
+}
+
+template <typename TValue, typename TSize, TSize SCALE>
+inline __m256i
+seqan_mm256_i16gather_epi(TValue const * memAddr,
+                          __m256i const & idx,
+                          std::integral_constant<TSize, SCALE> const & /*scale*/)
+{
+    using TUnsignedValue = typename MakeUnsigned<TValue>::Type;
+
+    // The cast makes sure that the max value of TValue = (u)int64_t and
+    // (u)int32_t will be max value of int16_t (i.e. `~0` in int16_t), because
+    // the resulting __m256i can only hold int16_t values.
+    //
+    // NOTE(marehr): the masking is only needed for TValue = (u)int8_t and
+    // (u)int16_t. It could be omitted if _mm256_packus_epi32 would be exchanged
+    // by _mm256_packs_epi32, because for (u)int32_t and (u)int64_t the masking
+    // operations are basically the identity function.
+    constexpr int const mask = static_cast<uint16_t>(MaxValue<TUnsignedValue>::VALUE);
+
+    // 1. Unpack low idx values and interleave with 0 and gather from memAddr.
+    // 2. Unpack high idx values and interleave with 0, than gather from memAddr.
+    // 3. Merge 2 8x32 vectors into 1x16 vector by signed saturation. This operation reverts the interleave by the unpack operations above.
+    //
+    // The following is an example for SimdVector<uint16_t, 16> idx and uint16_t
+    // const * memAddr:
+    // mem:    ( 0,  0,  3,  0 |  6,  0,  9,  0 | 12,  0, 15,  0 | 18,  0, 21,  0 || 24,  0, 27,  0 | 30,  0, 33,  0 | 36,  0, 39,  0 | 42,  0, 45,  0)
+    // idx:    (15,  0, 14,  0 | 13,  0, 12,  0 | 11,  0, 10,  0 |  9,  0,  8,  0 ||  7,  0,  6,  0 |  5,  0,  4,  0 |  3,  0,  2,  0 |  1,  0,  0,  0)
+    // pack:   (45,  0, 42,  0 | 39,  0, 36,  0 | 33,  0, 30,  0 | 27,  0, 24,  0 || 21,  0, 18,  0 | 15,  0, 12,  0 |  9,  0,  6,  0 |  3,  0,  0,  0)
+    return _mm256_packus_epi32(
+        // mskLow: (45,  0,  0,  0 | 42,  0,  0,  0 | 39,  0,  0,  0 | 36,  0,  0,  0 || 21,  0,  0,  0 | 18,  0,  0,  0 | 15,  0,  0,  0 | 12,  0,  0,  0)
+        _mm256_and_si256(
+            // gtrLow: (45,  0, 15,  0 | 42,  0, 45,  0 | 39,  0, 42,  0 | 36,  0, 39,  0 || 21,  0, 24,  0 | 18,  0, 21,  0 | 15,  0, 18,  0 | 12,  0, 15,  0)
+            _mm256_i32gather_epi32(
+                (const int *) memAddr,
+                // low:    (15,  0,  0,  0 | 14,  0,  0,  0 | 13,  0,  0,  0 | 12,  0,  0,  0 ||  7,  0,  0,  0 |  6,  0,  0,  0 |  5,  0,  0,  0 |  4,  0,  0,  0)
+                _mm256_unpacklo_epi16(
+                    idx, _mm256_set1_epi16(0)
+                ),
+                SCALE
+            ),
+            _mm256_set1_epi32(mask)
+        ),
+        // mskHih: (33,  0,  0,  0 | 30,  0,  0,  0 | 27,  0,  0,  0 | 24,  0,  0,  0 ||  9,  0,  0,  0 |  6,  0,  0,  0 |  3,  0,  0,  0 |  0,  0,  0,  0)
+        _mm256_and_si256(
+            // gtrHih: (33,  0, 36,  0 | 30,  0, 33,  0 | 27,  0, 30,  0 | 24,  0, 27,  0 ||  9,  0, 12,  0 |  6,  0,  9,  0 |  3,  0,  6,  0 |  0,  0,  3,  0)
+            _mm256_i32gather_epi32(
+                (const int *) memAddr,
+                // high:   (11,  0,  0,  0 | 10,  0,  0,  0 |  9,  0,  0,  0 |  8,  0,  0,  0 ||  3,  0,  0,  0 |  2,  0,  0,  0 |  1,  0,  0,  0 |  0,  0,  0,  0)
+                _mm256_unpackhi_epi16(
+                    idx, _mm256_set1_epi16(0)
+                ),
+                SCALE
+            ),
+            _mm256_set1_epi32(mask)
+        )
+    );
+}
+
+template <typename TValue, typename TSize, TSize SCALE>
+inline __m256i
+seqan_mm256_i32gather_epi(TValue const * memAddr,
+                          __m256i const & idx,
+                          std::integral_constant<TSize, SCALE> const & /*scale*/)
+{
+    using TUnsignedValue = typename MakeUnsigned<TValue>::Type;
+    constexpr auto const mask = static_cast<uint32_t>(MaxValue<TUnsignedValue>::VALUE);
+
+    return _mm256_and_si256(
+        _mm256_i32gather_epi32((const int *) memAddr, idx, SCALE),
+        _mm256_set1_epi32(mask)
+    );
+}
+
+template <typename TValue, typename TSize, TSize SCALE>
+inline __m256i
+seqan_mm256_i64gather_epi(TValue const * memAddr,
+                          __m256i const & idx,
+                          std::integral_constant<TSize, SCALE> const & /*scale*/)
+{
+    using TUnsignedValue = typename MakeUnsigned<TValue>::Type;
+    constexpr auto const mask = static_cast<uint64_t>(MaxValue<TUnsignedValue>::VALUE);
+
+    return _mm256_and_si256(
+        _mm256_i64gather_epi64((const long long *) memAddr, idx, SCALE),
+        _mm256_set1_epi64x(mask)
+    );
+}
+
+template <typename TValue, typename TSimdVector, typename TSize, TSize SCALE>
+inline TSimdVector
+_gather(TValue const * memAddr,
+        TSimdVector const & idx,
+        std::integral_constant<TSize, SCALE> const & scale,
+        SimdParams_<32, 32>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+        seqan_mm256_i8gather_epi(
+            memAddr,
+            SEQAN_VECTOR_CAST_(__m256i const &, idx),
+            scale
+        )
+    );
+}
+
+template <typename TSimdVector, typename TSize, TSize SCALE>
+inline TSimdVector
+_gather(int8_t const * memAddr,
+        TSimdVector const & idx,
+        std::integral_constant<TSize, SCALE> const & scale,
+        SimdParams_<32, 16>)
+{
+    // Note that memAddr is a signed integer type, thus a cast would extend the
+    // sign. E.g., -3 = 253 in 8 bit, but would be 65533 in 16 bit.
+    // Use _gather(uint8_t) and extend the sign to [u]int16_t.
+    return SEQAN_VECTOR_CAST_(
+        TSimdVector,
+        seqan_mm256_i16sign_extend_epis8(
+            seqan_mm256_i16gather_epi(
+                memAddr,
+                SEQAN_VECTOR_CAST_(__m256i const &, idx),
+                scale
+            )
+        )
+    );
+}
+
+template <typename TValue, typename TSimdVector, typename TSize, TSize SCALE>
+inline TSimdVector
+_gather(TValue const * memAddr,
+        TSimdVector const & idx,
+        std::integral_constant<TSize, SCALE> const & scale,
+        SimdParams_<32, 16>)
+{
+    return SEQAN_VECTOR_CAST_(
+        TSimdVector,
+        seqan_mm256_i16gather_epi(
+            memAddr,
+            SEQAN_VECTOR_CAST_(__m256i const &, idx),
+            scale
+        )
+    );
+}
+
+template <typename TSimdVector, typename TSize, TSize SCALE>
+inline TSimdVector
+_gather(int8_t const * memAddr,
+        TSimdVector const & idx,
+        std::integral_constant<TSize, SCALE> const & scale,
+        SimdParams_<32, 8>)
+{
+    // Note that memAddr is a signed integer type, thus a cast would extend the
+    // sign.
+    return SEQAN_VECTOR_CAST_(
+        TSimdVector,
+        seqan_mm256_i32sign_extend_epis8(
+            seqan_mm256_i32gather_epi(
+                memAddr,
+                SEQAN_VECTOR_CAST_(__m256i const &, idx),
+                scale
+            )
+        )
+    );
+}
+
+template <typename TSimdVector, typename TSize, TSize SCALE>
+inline TSimdVector
+_gather(int16_t const * memAddr,
+        TSimdVector const & idx,
+        std::integral_constant<TSize, SCALE> const & scale,
+        SimdParams_<32, 8>)
+{
+    // Note that memAddr is a signed integer type, thus a cast would extend the
+    // sign.
+    return SEQAN_VECTOR_CAST_(
+        TSimdVector,
+        seqan_mm256_i32sign_extend_epis16(
+            seqan_mm256_i32gather_epi(
+                memAddr,
+                SEQAN_VECTOR_CAST_(__m256i const &, idx),
+                scale
+            )
+        )
+    );
+}
+
+template <typename TValue, typename TSimdVector, typename TSize, TSize SCALE>
+inline TSimdVector
+_gather(TValue const * memAddr,
+        TSimdVector const & idx,
+        std::integral_constant<TSize, SCALE> const & scale,
+        SimdParams_<32, 8>)
+{
+    return SEQAN_VECTOR_CAST_(
+        TSimdVector,
+        seqan_mm256_i32gather_epi(
+            memAddr,
+            SEQAN_VECTOR_CAST_(__m256i const &, idx),
+            scale
+        )
+    );
+}
+
+template <typename TSimdVector, typename TSize, TSize SCALE>
+inline TSimdVector
+_gather(int8_t const * memAddr,
+        TSimdVector const & idx,
+        std::integral_constant<TSize, SCALE> const & scale,
+        SimdParams_<32, 4>)
+{
+    return SEQAN_VECTOR_CAST_(
+        TSimdVector,
+        seqan_mm256_i64sign_extend_epis8(
+            seqan_mm256_i64gather_epi(
+                memAddr,
+                SEQAN_VECTOR_CAST_(__m256i const &, idx),
+                scale
+            )
+        )
+    );
+}
+
+template <typename TSimdVector, typename TSize, TSize SCALE>
+inline TSimdVector
+_gather(int16_t const * memAddr,
+        TSimdVector const & idx,
+        std::integral_constant<TSize, SCALE> const & scale,
+        SimdParams_<32, 4>)
+{
+    return SEQAN_VECTOR_CAST_(
+        TSimdVector,
+        seqan_mm256_i64sign_extend_epis16(
+            seqan_mm256_i64gather_epi(
+                memAddr,
+                SEQAN_VECTOR_CAST_(__m256i const &, idx),
+                scale
+            )
+        )
+    );
+}
+
+template <typename TSimdVector, typename TSize, TSize SCALE>
+inline TSimdVector
+_gather(int32_t const * memAddr,
+        TSimdVector const & idx,
+        std::integral_constant<TSize, SCALE> const & scale,
+        SimdParams_<32, 4>)
+{
+    return SEQAN_VECTOR_CAST_(
+        TSimdVector,
+        seqan_mm256_i64sign_extend_epis32(
+            seqan_mm256_i64gather_epi(
+                memAddr,
+                SEQAN_VECTOR_CAST_(__m256i const &, idx),
+                scale
+            )
+        )
+    );
+}
+
+template <typename TValue, typename TSimdVector, typename TSize, TSize SCALE>
+inline TSimdVector
+_gather(TValue const * memAddr,
+        TSimdVector const & idx,
+        std::integral_constant<TSize, SCALE> const & scale,
+        SimdParams_<32, 4>)
+{
+    return SEQAN_VECTOR_CAST_(
+        TSimdVector,
+        seqan_mm256_i64gather_epi(
+            memAddr,
+            SEQAN_VECTOR_CAST_(__m256i const &, idx),
+            scale
+        )
+    );
+}
+
+// --------------------------------------------------------------------------
+// _shuffleVector (256bit)
+// --------------------------------------------------------------------------
+
+inline __m256i
+seqan_m256_shuffle_epi8(__m256i const & vector, __m256i const & indices)
+{
+    return _mm256_xor_si256(
+        // shuffle bytes from the lower bytes of vector
+        _mm256_shuffle_epi8(
+            // repeat twice the low bytes of vector in a new __m256i vector i.e.
+            //   vh[127:0] = v[127:0]
+            //   vh[255:128] = v[127:0]
+            _mm256_broadcastsi128_si256(
+                 _mm256_extracti128_si256(vector, 0)
+            ),
+            // ((indices[i] << 3) & 0b1000 0000) ^ indices[i]:
+            //   Adds the 5th bit of indices[i] as most significant bit. If the
+            //   5th bit is set, that means that indices[i] >= 16.
+            //   r = _mm256_shuffle_epi8(vl, indices) will set r[i] = 0 if the
+            //   most significant bit of indices[i] is 1. Since this bit is the
+            //   5th bit, r[i] = 0 if indices[i] >= 16 and r[i] = vl[indices[i]]
+            //   if indices[i] < 16.
+            _mm256_xor_si256(
+                _mm256_and_si256(
+                    _mm256_slli_epi16(indices, 3),
+                    _mm256_set1_epi8(-127) // 0b1000 0000
+                ),
+                indices
+            )
+        ),
+        // shuffle bytes from the higher bytes of vector
+        _mm256_shuffle_epi8(
+            // repeat twice the higher bytes of vector in a new __m256i vector
+            // i.e.
+            //   vh[127:0] = v[255:128]
+            //   vh[255:128] = v[255:128]
+            _mm256_broadcastsi128_si256(
+                 _mm256_extracti128_si256(vector, 1)
+            ),
+            // indices[i] - 16:
+            //   r = _mm256_shuffle_epi8(vh, indices)
+            //   will return r[i] = 0 if the most significant bit of the byte
+            //   indices[i] is 1. Thus, indices[i] - 16 will select all high
+            //   bytes in vh, i.e. r[i] = vh[indices[i] - 16], if indices[i] >=
+            //   16 and r[i] = 0 if indices[i] < 16.
+            _mm256_sub_epi8(
+                indices,
+                _mm256_set1_epi8(16)
+            )
+        )
+    );
+}
+
+inline __m256i
+seqan_m256_shuffle_epi16(const __m256i a, const __m256i b)
+{
+    // multiply by 2
+    __m256i idx = _mm256_slli_epi16(
+        _mm256_permute4x64_epi64(b, 0b01010000),
+        1
+    );
+    // _print(_mm256_add_epi8(idx, _mm256_set1_epi8(1)));
+    // _print(        _mm256_unpacklo_epi8(
+    //             idx,
+    //             _mm256_add_epi8(idx, _mm256_set1_epi8(1))
+    //         ));
+    return seqan_m256_shuffle_epi8(
+        a,
+        // interleave idx[15:0]   = 2*indices[15],   ..., 2*indices[0]
+        // with       idx[15:0]+1 = 2*indices[15]+1, ..., 2*indices[0]+1
+        // => 2*indices[15]+1, 2*indices[15], ..., 2*indices[0]+1, 2*indices[0]
+        _mm256_unpacklo_epi8(
+            idx,
+            _mm256_add_epi8(idx, _mm256_set1_epi8(1))
+        )
+    );
+}
+
+inline __m256i
+seqan_m256_shuffle_epi32(const __m256i a, const __m256i b)
+{
+    // multiply by 4
+    __m256i idx = _mm256_slli_epi16(
+        _mm256_permutevar8x32_epi32(b, __m256i {0x0, 0x0, 0x1, 0x0}),
+        2
+    );
+    return seqan_m256_shuffle_epi8(
+        a,
+        // interleave 4*indices[7]+1, 4*indices[7]+0; ..., 4*indices[0]+1, 4*indices[0]+0
+        // with       4*indices[7]+3, 4*indices[7]+2; ..., 4*indices[0]+3, 4*indices[0]+2
+        // => 4*indices[7]+3, 4*indices[7]+2; 4*indices[7]+1, 4*indices[7]+0;
+        //    ...
+        //    4*indices[0]+3, 4*indices[0]+2; 4*indices[0]+1, 4*indices[0]+0
+        _mm256_unpacklo_epi16(
+            // interleave idx[7:0]+0 = 4*indices[7]+0; ...; 4*indices[0]+0
+            // with       idx[7:0]+1 = 4*indices[7]+1; ...; 4*indices[0]+1
+            // => 4*indices[7]+1; 4*indices[7]+0; ...; 4*indices[0]+1; 4*indices[0]+0
+            _mm256_unpacklo_epi8(
+                idx,
+                _mm256_add_epi8(idx, _mm256_set1_epi8(1))
+            ),
+            // interleave idx[7:0]+2 = 4*indices[7]+2; ...; 4*indices[0]+2
+            // with       idx[7:0]+3 = 4*indices[7]+3; ...; 4*indices[0]+3
+            // => 4*indices[7]+3; 4*indices[7]+2; ...; 4*indices[0]+3; 4*indices[0]+2
+            _mm256_unpacklo_epi8(
+                _mm256_add_epi8(idx, _mm256_set1_epi8(2)),
+                _mm256_add_epi8(idx, _mm256_set1_epi8(3))
+            )
+    ));
+}
+
+#define seqan_mm256_set_m128i(v0, v1) _mm256_insertf128_si256(_mm256_castsi128_si256(v1), (v0), 1)
+
+inline __m256i
+seqan_m256_shuffle_epi64(const __m256i a, const __m256i b)
+{
+    __m128i lowidx = _mm256_extracti128_si256(
+        // multiply by 8
+        _mm256_slli_epi16(b, 3),
+        0
+    );
+
+    __m256i idx = seqan_mm256_set_m128i(
+        _mm_srli_si128(lowidx, 2),
+        lowidx
+    );
+
+    return seqan_m256_shuffle_epi8(
+        a,
+        _mm256_unpacklo_epi32(
+            // interleave 8*indices[3]+1, 8*indices[3]+0; ..., 8*indices[0]+1, 8*indices[0]+0
+            // with       8*indices[3]+3, 8*indices[3]+2; ..., 8*indices[0]+3, 8*indices[0]+2
+            // => 8*indices[3]+3, 8*indices[3]+2; 8*indices[3]+1, 8*indices[3]+0;
+            //    ...
+            //    8*indices[0]+3, 8*indices[0]+2; 8*indices[0]+1, 8*indices[0]+0
+            _mm256_unpacklo_epi16(
+                // interleave idx[3:0]+0 = 8*indices[3]+0; ...; 8*indices[0]+0
+                // with       idx[3:0]+1 = 8*indices[3]+1; ...; 8*indices[0]+1
+                // => 8*indices[3]+1; 8*indices[3]+0; ...; 8*indices[0]+1; 8*indices[0]+0
+               _mm256_unpacklo_epi8(
+                   idx,
+                   _mm256_add_epi8(idx, _mm256_set1_epi8(1))
+               ),
+               // interleave idx[3:0]+2 = 8*indices[3]+2; ...; 8*indices[0]+2
+               // with       idx[3:0]+3 = 8*indices[3]+3; ...; 8*indices[0]+3
+               // => 8*indices[3]+3; 8*indices[3]+2; ...; 8*indices[0]+3; 8*indices[0]+2
+               _mm256_unpacklo_epi8(
+                   _mm256_add_epi8(idx, _mm256_set1_epi8(2)),
+                   _mm256_add_epi8(idx, _mm256_set1_epi8(3))
+               )
+           ),
+           // interleave 8*indices[3]+5, 8*indices[3]+4; ..., 8*indices[0]+5, 8*indices[0]+4
+           // with       8*indices[3]+7, 8*indices[3]+6; ..., 8*indices[0]+7, 8*indices[0]+6
+           // => 8*indices[3]+7, 8*indices[3]+6; 8*indices[3]+5, 8*indices[3]+4;
+           //    ...
+           //    8*indices[0]+7, 8*indices[0]+6; 8*indices[0]+5, 8*indices[0]+4
+            _mm256_unpacklo_epi16(
+                // interleave idx[3:0]+4 = 8*indices[3]+4; ...; 8*indices[0]+4
+                // with       idx[3:0]+5 = 8*indices[3]+5; ...; 8*indices[0]+5
+                // => 8*indices[3]+5; 8*indices[3]+4; ...; 8*indices[0]+5; 8*indices[0]+4
+                _mm256_unpacklo_epi8(
+                    _mm256_add_epi8(idx, _mm256_set1_epi8(4)),
+                    _mm256_add_epi8(idx, _mm256_set1_epi8(5))
+                ),
+                // interleave idx[3:0]+6 = 8*indices[3]+6; ...; 8*indices[0]+6
+                // with       idx[3:0]+7 = 8*indices[3]+7; ...; 8*indices[0]+7
+                // => 8*indices[3]+7; 8*indices[3]+6; ...; 8*indices[0]+7; 8*indices[0]+6
+                _mm256_unpacklo_epi8(
+                    _mm256_add_epi8(idx, _mm256_set1_epi8(6)),
+                    _mm256_add_epi8(idx, _mm256_set1_epi8(7))
+                )
+            )
+        )
+    );
+}
+
+template <typename TSimdVector1, typename TSimdVector2>
+inline TSimdVector1
+_shuffleVector(TSimdVector1 const & vector, TSimdVector2 const & indices, SimdParams_<32, 16>, SimdParams_<16, 16>)
+{
+    // copy 2nd 64bit word to 3rd, compute 2*idx
+    __m256i idx = _mm256_slli_epi16(_mm256_permute4x64_epi64(_mm256_castsi128_si256(SEQAN_VECTOR_CAST_(const __m128i &, indices)), 0x50), 1);
+
+    // interleave with 2*idx+1 and call shuffle
+    return SEQAN_VECTOR_CAST_(TSimdVector1,
+        _mm256_shuffle_epi8(
+            SEQAN_VECTOR_CAST_(const __m256i &, vector),
+            _mm256_unpacklo_epi8(
+                idx,
+                _mm256_add_epi8(
+                    idx, _mm256_set1_epi8(1)
+                )
+            )
+        )
+    );
+}
+
+template <typename TSimdVector1, typename TSimdVector2>
+inline TSimdVector1
+_shuffleVector(TSimdVector1 const & vector, TSimdVector2 const & indices, SimdParams_<32, 32>, SimdParams_<32, 32>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector1, seqan_m256_shuffle_epi8(
+        SEQAN_VECTOR_CAST_(const __m256i &, vector),
+        SEQAN_VECTOR_CAST_(const __m256i &, indices)
+    ));
+}
+
+template <typename TSimdVector1, typename TSimdVector2>
+inline TSimdVector1
+_shuffleVector(TSimdVector1 const & vector, TSimdVector2 const & indices, SimdParams_<32, 16>, SimdParams_<32, 32>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector1, seqan_m256_shuffle_epi16(
+        SEQAN_VECTOR_CAST_(const __m256i &, vector),
+        SEQAN_VECTOR_CAST_(const __m256i &, indices)
+    ));
+}
+
+template <typename TSimdVector1, typename TSimdVector2>
+inline TSimdVector1
+_shuffleVector(TSimdVector1 const & vector, TSimdVector2 const & indices, SimdParams_<32, 8>, SimdParams_<32, 32>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector1, seqan_m256_shuffle_epi32(
+        SEQAN_VECTOR_CAST_(const __m256i &, vector),
+        SEQAN_VECTOR_CAST_(const __m256i &, indices)
+    ));
+}
+
+template <typename TSimdVector1, typename TSimdVector2>
+inline TSimdVector1
+_shuffleVector(TSimdVector1 const & vector, TSimdVector2 const & indices, SimdParams_<32, 4>, SimdParams_<32, 32>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector1, seqan_m256_shuffle_epi64(
+        SEQAN_VECTOR_CAST_(const __m256i &, vector),
+        SEQAN_VECTOR_CAST_(const __m256i &, indices)
+    ));
+}
+
+// --------------------------------------------------------------------------
+// _transposeMatrix (256bit)
+// --------------------------------------------------------------------------
+
+// emulate missing _mm256_unpacklo_epi128/_mm256_unpackhi_epi128 instructions
+inline __m256i _mm256_unpacklo_epi128(__m256i const & a, __m256i const & b)
+{
+    return _mm256_permute2x128_si256(a, b, 0x20);
+//    return _mm256_inserti128_si256(a, _mm256_extracti128_si256(b, 0), 1);
+}
+
+inline __m256i _mm256_unpackhi_epi128(__m256i const & a, __m256i const & b)
+{
+    return _mm256_permute2x128_si256(a, b, 0x31);
+//    return _mm256_inserti128_si256(b, _mm256_extracti128_si256(a, 1), 0);
+}
+
+template <typename TSimdVector>
+inline void
+_transposeMatrix(TSimdVector matrix[], SimdMatrixParams_<32, 32, 8>)
+{
+    // we need a look-up table to reverse the lowest 4 bits
+    // in order to place the permute the transposed rows
+    static const unsigned char bitRev[] = { 0, 8, 4,12, 2,10, 6,14, 1, 9, 5,13, 3,11, 7,15,
+                                           16,24,20,28,18,26,22,30,17,25,21,29,19,27,23,31};
+
+    // transpose a 32x32 byte matrix
+    __m256i tmp1[32];
+    for (int i = 0; i < 16; ++i)
+    {
+        tmp1[i]    = _mm256_unpacklo_epi8(
+            SEQAN_VECTOR_CAST_(const __m256i &, matrix[2*i]),
+            SEQAN_VECTOR_CAST_(const __m256i &, matrix[2*i+1])
+        );
+        tmp1[i+16] = _mm256_unpackhi_epi8(
+            SEQAN_VECTOR_CAST_(const __m256i &, matrix[2*i]),
+            SEQAN_VECTOR_CAST_(const __m256i &, matrix[2*i+1])
+        );
+    }
+    __m256i  tmp2[32];
+    for (int i = 0; i < 16; ++i)
+    {
+        tmp2[i]    = _mm256_unpacklo_epi16(tmp1[2*i], tmp1[2*i+1]);
+        tmp2[i+16] = _mm256_unpackhi_epi16(tmp1[2*i], tmp1[2*i+1]);
+    }
+    for (int i = 0; i < 16; ++i)
+    {
+        tmp1[i]    = _mm256_unpacklo_epi32(tmp2[2*i], tmp2[2*i+1]);
+        tmp1[i+16] = _mm256_unpackhi_epi32(tmp2[2*i], tmp2[2*i+1]);
+    }
+    for (int i = 0; i < 16; ++i)
+    {
+        tmp2[i]    = _mm256_unpacklo_epi64(tmp1[2*i], tmp1[2*i+1]);
+        tmp2[i+16] = _mm256_unpackhi_epi64(tmp1[2*i], tmp1[2*i+1]);
+    }
+    for (int i = 0; i < 16; ++i)
+    {
+        matrix[bitRev[i]]    = SEQAN_VECTOR_CAST_(TSimdVector, _mm256_unpacklo_epi128(tmp2[2*i],tmp2[2*i+1]));
+        matrix[bitRev[i+16]] = SEQAN_VECTOR_CAST_(TSimdVector, _mm256_unpackhi_epi128(tmp2[2*i],tmp2[2*i+1]));
+    }
+}
+
+// --------------------------------------------------------------------------
+// Function _testAllZeros (256bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, int)
+inline _testAllZeros(TSimdVector const & vector, TSimdVector const & mask, SimdParams_<32>)
+{
+    return _mm256_testz_si256(SEQAN_VECTOR_CAST_(const __m256i &, vector),
+                              SEQAN_VECTOR_CAST_(const __m256i &, mask));
+}
+
+// --------------------------------------------------------------------------
+// Function _testAllOnes (256bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline int _testAllOnes(TSimdVector const & vector, SimdParams_<32>)
+{
+    __m256i vec = SEQAN_VECTOR_CAST_(const __m256i &, vector);
+    return _mm256_testc_si256(vec, _mm256_cmpeq_epi32(vec, vec));
+}
+
+} // namespace seqan
+
+#endif // SEQAN_INCLUDE_SEQAN_SIMD_SIMD_BASE_SEQAN_IMPL_AVX2_H_
diff --git a/porechop/include/seqan/simd/simd_base_seqan_impl_avx512.h b/porechop/include/seqan/simd/simd_base_seqan_impl_avx512.h
new file mode 100644
index 0000000..b0ab6c3
--- /dev/null
+++ b/porechop/include/seqan/simd/simd_base_seqan_impl_avx512.h
@@ -0,0 +1,284 @@
+// ==========================================================================
+//                 SeqAn - The Library for Sequence Analysis
+// ==========================================================================
+// Copyright (c) 2006-2018, Knut Reinert, FU Berlin
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above copyright
+//       notice, this list of conditions and the following disclaimer in the
+//       documentation and/or other materials provided with the distribution.
+//     * Neither the name of Knut Reinert or the FU Berlin nor the names of
+//       its contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+// DAMAGE.
+//
+// ==========================================================================
+// Author: Marcel Ehrhardt <marcel.ehrhardt@fu-berlin.de>
+// ==========================================================================
+// generic SIMD interface for AVX512
+// ==========================================================================
+
+#ifndef SEQAN_INCLUDE_SEQAN_SIMD_SIMD_BASE_SEQAN_IMPL_AVX512_H_
+#define SEQAN_INCLUDE_SEQAN_SIMD_SIMD_BASE_SEQAN_IMPL_AVX512_H_
+
+namespace seqan {
+
+// SimdParams_<64, 64>: 512bit = 64 elements * 8bit
+SEQAN_DEFINE_SIMD_VECTOR_(SimdVector64Char,     char,           64)
+SEQAN_DEFINE_SIMD_VECTOR_(SimdVector64SChar,    signed char,    64)
+SEQAN_DEFINE_SIMD_VECTOR_(SimdVector64UChar,    unsigned char,  64)
+
+// SimdParams_<64, 32>: 512bit = 32 elements * 2 * 8bit
+SEQAN_DEFINE_SIMD_VECTOR_(SimdVector32Short,    short,          64)
+SEQAN_DEFINE_SIMD_VECTOR_(SimdVector32UShort,   unsigned short, 64)
+
+// SimdParams_<64, 16>: 512bit = 16 elements * 4 * 8bit
+SEQAN_DEFINE_SIMD_VECTOR_(SimdVector16Int,      int,            64)
+SEQAN_DEFINE_SIMD_VECTOR_(SimdVector16UInt,     unsigned int,   64)
+
+// SimdParams_<64, 8>: 512bit = 8 elements * 8 * 8bit
+SEQAN_DEFINE_SIMD_VECTOR_(SimdVector8Int64,     int64_t,        64)
+SEQAN_DEFINE_SIMD_VECTOR_(SimdVector8UInt64,    uint64_t,       64)
+
+// ============================================================================
+// Functions
+// ============================================================================
+
+// ============================================================================
+// AVX512 wrappers (512bit vectors)
+// ============================================================================
+
+// --------------------------------------------------------------------------
+// _fillVector (512bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector, int L, typename TValue>
+inline void
+_fillVector(TSimdVector & vector,
+            std::tuple<TValue> const & x,
+            std::index_sequence<0> const &,
+            SimdParams_<64, L>)
+{
+    vector = createVector<TSimdVector>(std::get<0>(x));
+}
+
+template <typename TSimdVector, int L, typename ...TValue, size_t ...INDICES>
+inline void
+_fillVector(TSimdVector & vector,
+            std::tuple<TValue...> const & args,
+            std::index_sequence<INDICES...> const &,
+            SimdParams_<64, L>)
+{
+    using TSimdValue = typename Value<TSimdVector>::Type;
+    vector = TSimdVector{static_cast<TSimdValue>(std::get<INDICES>(args))...};
+}
+
+// --------------------------------------------------------------------------
+// _clearVector (512bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector, int L>
+inline void _clearVector(TSimdVector & vector, SimdParams_<64, L>)
+{
+    vector = TSimdVector{};
+}
+
+// --------------------------------------------------------------------------
+// _createVector (512bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector, typename TValue, int L>
+inline TSimdVector _createVector(TValue const x, SimdParams_<64, L>)
+{
+    using TValue_ = typename Value<TSimdVector>::Type;
+    return TSimdVector{} + static_cast<TValue_>(x);
+}
+
+// --------------------------------------------------------------------------
+// _cmpEq (512bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector, int L>
+inline TSimdVector _cmpEq(TSimdVector & a, TSimdVector & b, SimdParams_<64, L>)
+{
+    return a == b;
+}
+
+// bad auto-vectorization for gcc
+#ifndef __AVX512BW__
+template <typename TSimdVector>
+inline TSimdVector _cmpEq(TSimdVector const & a, TSimdVector const & b, SimdParams_<64, 32>)
+{
+    auto aLow = _mm512_extracti64x4_epi64(SEQAN_VECTOR_CAST_(const __m512i&, a), 0);
+    auto bLow = _mm512_extracti64x4_epi64(SEQAN_VECTOR_CAST_(const __m512i&, b), 0);
+    auto cmpLow = _mm256_cmpeq_epi16(aLow, bLow);
+
+    auto aHigh = _mm512_extracti64x4_epi64(SEQAN_VECTOR_CAST_(const __m512i&, a), 1);
+    auto bHigh = _mm512_extracti64x4_epi64(SEQAN_VECTOR_CAST_(const __m512i&, b), 1);
+    auto cmpHigh = _mm256_cmpeq_epi16(aHigh, bHigh);
+
+    auto result = _mm512_broadcast_i64x4(cmpLow);
+    result = _mm512_inserti64x4(result, cmpHigh, 1);
+    return SEQAN_VECTOR_CAST_(TSimdVector, result);
+}
+#endif
+
+// --------------------------------------------------------------------------
+// _cmpGt (512bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector, int L, typename TValue>
+inline TSimdVector _cmpGt(TSimdVector & a, TSimdVector & b, SimdParams_<64, L, TValue>)
+{
+    return a > b;
+}
+
+// --------------------------------------------------------------------------
+// _bitwiseAndNot (512bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector, int L>
+inline TSimdVector _bitwiseAndNot(TSimdVector & a, TSimdVector & b, SimdParams_<64, L>)
+{
+    return (~a & b);
+}
+
+// --------------------------------------------------------------------------
+// _max (512bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector, int L, typename TValue>
+inline TSimdVector _max(TSimdVector & a, TSimdVector & b, SimdParams_<64, L, TValue>)
+{
+    return a > b ? a : b;
+}
+
+// --------------------------------------------------------------------------
+// _min (512bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector, int L, typename TValue>
+inline TSimdVector _min(TSimdVector & a, TSimdVector & b, SimdParams_<64, L, TValue>)
+{
+    return a < b ? a : b;
+}
+
+// --------------------------------------------------------------------------
+// _blend (512bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector, typename TSimdVectorMask, int L>
+inline TSimdVector _blend(TSimdVector const & a, TSimdVector const & b, TSimdVectorMask const & mask, SimdParams_<64, L>)
+{
+    return mask ? b : a;
+}
+
+// bad auto-vectorization for gcc
+#ifndef __AVX512BW__
+template <typename TSimdVector, typename TSimdVectorMask>
+inline TSimdVector _blend(TSimdVector const & a, TSimdVector const & b, TSimdVectorMask const & mask, SimdParams_<64, 32>)
+{
+    auto aLow = _mm512_extracti64x4_epi64(SEQAN_VECTOR_CAST_(const __m512i&, a), 0);
+    auto bLow = _mm512_extracti64x4_epi64(SEQAN_VECTOR_CAST_(const __m512i&, b), 0);
+    auto maskLow = _mm512_extracti64x4_epi64(SEQAN_VECTOR_CAST_(const __m512i&, mask), 0);
+    auto blendLow = _mm256_blendv_epi8(aLow, bLow, maskLow);
+
+    auto aHigh = _mm512_extracti64x4_epi64(SEQAN_VECTOR_CAST_(const __m512i&, a), 1);
+    auto bHigh = _mm512_extracti64x4_epi64(SEQAN_VECTOR_CAST_(const __m512i&, b), 1);
+    auto maskHigh = _mm512_extracti64x4_epi64(SEQAN_VECTOR_CAST_(const __m512i&, mask), 1);
+    auto blendHigh = _mm256_blendv_epi8(aHigh, bHigh, maskHigh);
+
+    auto result = _mm512_broadcast_i64x4(blendLow);
+    result = _mm512_inserti64x4(result, blendHigh, 1);
+    return SEQAN_VECTOR_CAST_(TSimdVector, result);
+}
+#endif
+
+// --------------------------------------------------------------------------
+// _storeu (512bit)
+// --------------------------------------------------------------------------
+
+template <typename T, typename TSimdVector, int L>
+inline void _storeu(T * memAddr, TSimdVector & vec, SimdParams_<64, L>)
+{
+    constexpr auto length = LENGTH<TSimdVector>::VALUE;
+    for (unsigned i = 0; i < length; i++)
+        memAddr[i] = vec[i];
+}
+
+// ----------------------------------------------------------------------------
+// Function _load() 512bit
+// ----------------------------------------------------------------------------
+
+template <typename TSimdVector, typename T, int L>
+inline TSimdVector _load(T const * memAddr, SimdParams_<64, L>)
+{
+    constexpr auto length = LENGTH<TSimdVector>::VALUE;
+    TSimdVector result;
+    for (unsigned i = 0; i < length; i++)
+        result[i] = memAddr[i];
+    return result;
+}
+
+// --------------------------------------------------------------------------
+// _shiftRightLogical (512bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector, int L>
+inline TSimdVector _shiftRightLogical(TSimdVector const & vector, const int imm, SimdParams_<64, L>)
+{
+    return vector >> imm;
+}
+
+// --------------------------------------------------------------------------
+// _gather (512bit)
+// --------------------------------------------------------------------------
+
+template <typename TValue, typename TSimdVector, typename TSize, TSize SCALE, int L>
+inline TSimdVector
+_gather(TValue const * memAddr,
+        TSimdVector const & idx,
+        std::integral_constant<TSize, SCALE> const & /*scale*/,
+        SimdParams_<64, L>)
+{
+    constexpr auto length = LENGTH<TSimdVector>::VALUE;
+    TSimdVector result;
+    for (unsigned i = 0; i < length; i++)
+        result[i] = memAddr[idx[i]];
+    return result;
+}
+
+// --------------------------------------------------------------------------
+// _shuffleVector (512bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector1, typename TSimdVector2, int L>
+inline TSimdVector1
+_shuffleVector(TSimdVector1 const & vector, TSimdVector2 const & indices, SimdParams_<64, L>, SimdParams_<64, 64>)
+{
+    constexpr auto length = seqan::LENGTH<TSimdVector1>::VALUE;
+    TSimdVector1 result{};
+    for(unsigned i = 0u; i < length; ++i)
+        result[i] = vector[indices[i]];
+    return result;
+}
+
+} // namespace seqan
+
+#endif // SEQAN_INCLUDE_SEQAN_SIMD_SIMD_BASE_SEQAN_IMPL_AVX512_H_
diff --git a/porechop/include/seqan/simd/simd_base_seqan_impl_sse4.2.h b/porechop/include/seqan/simd/simd_base_seqan_impl_sse4.2.h
new file mode 100644
index 0000000..03b86ed
--- /dev/null
+++ b/porechop/include/seqan/simd/simd_base_seqan_impl_sse4.2.h
@@ -0,0 +1,1053 @@
+// ==========================================================================
+//                 SeqAn - The Library for Sequence Analysis
+// ==========================================================================
+// Copyright (c) 2006-2018, Knut Reinert, FU Berlin
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above copyright
+//       notice, this list of conditions and the following disclaimer in the
+//       documentation and/or other materials provided with the distribution.
+//     * Neither the name of Knut Reinert or the FU Berlin nor the names of
+//       its contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+// DAMAGE.
+//
+// ==========================================================================
+// Author: David Weese <david.weese@fu-berlin.de>
+//         René Rahn <rene.rahn@fu-berlin.de>
+//         Stefan Budach <stefan.budach@fu-berlin.de>
+// ==========================================================================
+// generic SIMD interface for SSE3 / AVX2
+// ==========================================================================
+
+#ifndef SEQAN_INCLUDE_SEQAN_SIMD_SIMD_BASE_SEQAN_IMPL_SSE4_2_H_
+#define SEQAN_INCLUDE_SEQAN_SIMD_SIMD_BASE_SEQAN_IMPL_SSE4_2_H_
+
+namespace seqan {
+
+// SimdParams_<8, 8>: 64bit = 8 elements * 8bit
+SEQAN_DEFINE_SIMD_VECTOR_(SimdVector8Char,      char,           8)
+SEQAN_DEFINE_SIMD_VECTOR_(SimdVector8SChar,     signed char,    8)
+SEQAN_DEFINE_SIMD_VECTOR_(SimdVector8UChar,     unsigned char,  8)
+
+// SimdParams_<8, 4>: 64bit = 4 elements * 2 * 8bit
+SEQAN_DEFINE_SIMD_VECTOR_(SimdVector4Short,     short,          8)
+SEQAN_DEFINE_SIMD_VECTOR_(SimdVector4UShort,    unsigned short, 8)
+
+// SimdParams_<8, 2>: 64bit = 2 elements * 4 * 8bit
+SEQAN_DEFINE_SIMD_VECTOR_(SimdVector2Int,       int,            8)
+SEQAN_DEFINE_SIMD_VECTOR_(SimdVector2UInt,      unsigned int,   8)
+
+// SimdParams_<16, 16>: 128bit = 16 elements * 8bit
+SEQAN_DEFINE_SIMD_VECTOR_(SimdVector16Char,     char,           16)
+SEQAN_DEFINE_SIMD_VECTOR_(SimdVector16SChar,    signed char,    16)
+SEQAN_DEFINE_SIMD_VECTOR_(SimdVector16UChar,    unsigned char,  16)
+
+// SimdParams_<16, 8>: 128bit = 8 elements * 2 * 8bit
+SEQAN_DEFINE_SIMD_VECTOR_(SimdVector8Short,     short,          16)
+SEQAN_DEFINE_SIMD_VECTOR_(SimdVector8UShort,    unsigned short, 16)
+
+// SimdParams_<16, 4>: 128bit = 4 elements * 4 * 8bit
+SEQAN_DEFINE_SIMD_VECTOR_(SimdVector4Int,       int,            16)
+SEQAN_DEFINE_SIMD_VECTOR_(SimdVector4UInt,      unsigned int,   16)
+
+// SimdParams_<16, 2>: 128bit = 2 elements * 8 * 8bit
+SEQAN_DEFINE_SIMD_VECTOR_(SimdVector2Int64,     int64_t,        16)
+SEQAN_DEFINE_SIMD_VECTOR_(SimdVector2UInt64,    uint64_t,       16)
+
+// ============================================================================
+// Functions
+// ============================================================================
+
+// --------------------------------------------------------------------------
+// _fillVector (128bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector, typename... TValue>
+inline void
+_fillVector(TSimdVector & vector,
+            std::tuple<TValue...> const & x,
+            std::index_sequence<0> const &,
+            SimdParams_<16, 16> const &)
+{
+  vector = SEQAN_VECTOR_CAST_(TSimdVector, _mm_set1_epi8(std::get<0>(x)));
+}
+
+template <typename TSimdVector, typename... TValue>
+inline void
+_fillVector(TSimdVector & vector,
+            std::tuple<TValue...> const & x,
+            std::index_sequence<0> const &,
+            SimdParams_<16, 8> const &)
+{
+  vector = SEQAN_VECTOR_CAST_(TSimdVector, _mm_set1_epi16(std::get<0>(x)));
+}
+
+template <typename TSimdVector, typename... TValue>
+inline void
+_fillVector(TSimdVector & vector,
+            std::tuple<TValue...> const & x,
+            std::index_sequence<0> const &,
+            SimdParams_<16, 4> const &)
+{
+  vector = SEQAN_VECTOR_CAST_(TSimdVector, _mm_set1_epi32(std::get<0>(x)));
+}
+
+template <typename TSimdVector, typename... TValue>
+inline void
+_fillVector(TSimdVector & vector,
+            std::tuple<TValue...> const & x,
+            std::index_sequence<0> const &,
+            SimdParams_<16, 2> const &)
+{
+  vector = SEQAN_VECTOR_CAST_(TSimdVector, _mm_set1_epi64x(std::get<0>(x)));
+}
+
+template <typename TSimdVector, typename ...TValue, size_t ...INDICES>
+inline void
+_fillVector(TSimdVector & vector,
+            std::tuple<TValue...> const & args,
+            std::index_sequence<INDICES...> const &,
+            SimdParams_<16, 16> const &)
+{
+    vector = SEQAN_VECTOR_CAST_(TSimdVector, _mm_setr_epi8(std::get<INDICES>(args)...));
+}
+
+template <typename TSimdVector, typename ...TValue, size_t ...INDICES>
+inline void
+_fillVector(TSimdVector & vector,
+            std::tuple<TValue...> const & args,
+            std::index_sequence<INDICES...> const &,
+            SimdParams_<16, 8> const &)
+{
+    vector = SEQAN_VECTOR_CAST_(TSimdVector, _mm_setr_epi16(std::get<INDICES>(args)...));
+}
+
+template <typename TSimdVector, typename ...TValue, size_t ...INDICES>
+inline void
+_fillVector(TSimdVector & vector,
+            std::tuple<TValue...> const & args,
+            std::index_sequence<INDICES...> const &,
+            SimdParams_<16, 4> const &)
+{
+    vector = SEQAN_VECTOR_CAST_(TSimdVector, _mm_setr_epi32(std::get<INDICES>(args)...));
+}
+
+template <typename TSimdVector, typename ...TValue, size_t ...INDICES>
+inline void
+_fillVector(TSimdVector & vector,
+            std::tuple<TValue...> const & args,
+            std::index_sequence<INDICES...> const &,
+            SimdParams_<16, 2> const &)
+{
+    // reverse argument list 0, 1 -> 1, 0
+    // NOTE(marehr): Intel linux fails to reverse argument list and only
+    // _mm_set_epi64x has no reverse equivalent
+    // NOTE(rrahn): For g++-4.9 the set_epi function is a macro, which does not work with parameter pack expansion.
+    vector = SEQAN_VECTOR_CAST_(TSimdVector, _mm_set_epi64x(std::get<sizeof...(INDICES) - 1 - INDICES>(args)...));
+}
+
+// --------------------------------------------------------------------------
+// _clearVector (128bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector, int L>
+inline void _clearVector(TSimdVector & vector, SimdParams_<16, L>)
+{
+    vector = SEQAN_VECTOR_CAST_(TSimdVector, _mm_setzero_si128());
+}
+
+// --------------------------------------------------------------------------
+// _createVector (128bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector, typename TValue>
+inline TSimdVector _createVector(TValue const x, SimdParams_<16, 16>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector, _mm_set1_epi8(x));
+}
+
+template <typename TSimdVector, typename TValue>
+inline TSimdVector _createVector(TValue const x, SimdParams_<16, 8>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector, _mm_set1_epi16(x));
+}
+
+template <typename TSimdVector, typename TValue>
+inline TSimdVector _createVector(TValue const x, SimdParams_<16, 4>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector, _mm_set1_epi32(x));
+}
+
+template <typename TSimdVector, typename TValue>
+inline TSimdVector _createVector(TValue const x, SimdParams_<16, 2>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector, _mm_set1_epi64x(x));
+}
+
+// --------------------------------------------------------------------------
+// cmpEq (128bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline TSimdVector _cmpEq(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 16>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_cmpeq_epi8(SEQAN_VECTOR_CAST_(const __m128i&, a),
+                                             SEQAN_VECTOR_CAST_(const __m128i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _cmpEq(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 8>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_cmpeq_epi16(SEQAN_VECTOR_CAST_(const __m128i&, a),
+                                              SEQAN_VECTOR_CAST_(const __m128i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _cmpEq(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 4>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_cmpeq_epi32(SEQAN_VECTOR_CAST_(const __m128i&, a),
+                                              SEQAN_VECTOR_CAST_(const __m128i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _cmpEq(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 2>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_cmpeq_epi64(SEQAN_VECTOR_CAST_(const __m128i&, a),
+                                              SEQAN_VECTOR_CAST_(const __m128i&, b)));
+}
+
+// --------------------------------------------------------------------------
+// _cmpGt (128bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline TSimdVector _cmpGt(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 16, int8_t>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_cmpgt_epi8(SEQAN_VECTOR_CAST_(const __m128i&, a),
+                                             SEQAN_VECTOR_CAST_(const __m128i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _cmpGt(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 16, uint8_t>)
+{
+    // There is no unsigned cmpgt, we reduce it to the signed case.
+    // Note that 0x80 = ~0x7F (prevent overflow messages).
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_cmpgt_epi8(
+                                  _mm_xor_si128(SEQAN_VECTOR_CAST_(const __m128i&, a), _mm_set1_epi8(~0x7F)),
+                                  _mm_xor_si128(SEQAN_VECTOR_CAST_(const __m128i&, b), _mm_set1_epi8(~0x7F))));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _cmpGt(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 8, int16_t>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_cmpgt_epi16(SEQAN_VECTOR_CAST_(const __m128i&, a),
+                                              SEQAN_VECTOR_CAST_(const __m128i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _cmpGt(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 8, uint16_t>)
+{
+    // There is no unsigned cmpgt, we reduce it to the signed case.
+    // Note that 0x8000 = ~0x7FFF (prevent overflow messages).
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_cmpgt_epi16(
+                                  _mm_xor_si128(SEQAN_VECTOR_CAST_(const __m128i&, a), _mm_set1_epi16(~0x7FFF)),
+                                  _mm_xor_si128(SEQAN_VECTOR_CAST_(const __m128i&, b), _mm_set1_epi16(~0x7FFF))));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _cmpGt(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 4, int32_t>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_cmpgt_epi32(SEQAN_VECTOR_CAST_(const __m128i&, a),
+                                              SEQAN_VECTOR_CAST_(const __m128i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _cmpGt(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 4, uint32_t>)
+{
+    // There is no unsigned cmpgt, we reduce it to the signed case.
+    // Note that 0x80000000 = ~0x7FFFFFFF (prevent overflow messages).
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_cmpgt_epi32(
+                                  _mm_xor_si128(SEQAN_VECTOR_CAST_(const __m128i&, a), _mm_set1_epi32(~0x7FFFFFFF)),
+                                  _mm_xor_si128(SEQAN_VECTOR_CAST_(const __m128i&, b), _mm_set1_epi32(~0x7FFFFFFF))));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _cmpGt(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 2, int64_t>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_cmpgt_epi64(SEQAN_VECTOR_CAST_(const __m128i&, a),
+                                              SEQAN_VECTOR_CAST_(const __m128i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _cmpGt(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 2, uint64_t>)
+{
+    // There is no unsigned cmpgt, we reduce it to the signed case.
+    // Note that 0x8000000000000000ul = ~0x7FFFFFFFFFFFFFFFul (prevent overflow messages).
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_cmpgt_epi64(
+                                  _mm_xor_si128(SEQAN_VECTOR_CAST_(const __m128i&, a) ,_mm_set1_epi64x(~0x7FFFFFFFFFFFFFFFul)),
+                                  _mm_xor_si128(SEQAN_VECTOR_CAST_(const __m128i&, b), _mm_set1_epi64x(~0x7FFFFFFFFFFFFFFFul))));
+}
+
+// --------------------------------------------------------------------------
+// _bitwiseOr (128bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector, int L>
+inline TSimdVector _bitwiseOr(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, L>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_or_si128(SEQAN_VECTOR_CAST_(const __m128i&, a),
+                                           SEQAN_VECTOR_CAST_(const __m128i&, b)));
+}
+
+// --------------------------------------------------------------------------
+// _bitwiseAnd (128bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector, int L>
+inline TSimdVector _bitwiseAnd(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, L>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_and_si128(SEQAN_VECTOR_CAST_(const __m128i&, a),
+                                            SEQAN_VECTOR_CAST_(const __m128i&, b)));
+}
+
+// --------------------------------------------------------------------------
+// _bitwiseAndNot (128bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector, int L>
+inline TSimdVector _bitwiseAndNot(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, L>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_andnot_si128(SEQAN_VECTOR_CAST_(const __m128i&, a),
+                                               SEQAN_VECTOR_CAST_(const __m128i&, b)));
+}
+
+// --------------------------------------------------------------------------
+// _bitwiseNot (128bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline TSimdVector _bitwiseNot(TSimdVector const & a, SimdParams_<16, 16>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_cmpeq_epi8(SEQAN_VECTOR_CAST_(const __m128i&, a),
+                                             _mm_setzero_si128()));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _bitwiseNot(TSimdVector const & a, SimdParams_<16, 8>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_cmpeq_epi16(SEQAN_VECTOR_CAST_(const __m128i&, a),
+                                              _mm_setzero_si128()));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _bitwiseNot(TSimdVector const & a, SimdParams_<16, 4>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_cmpeq_epi32(SEQAN_VECTOR_CAST_(const __m128i&, a),
+                                              _mm_setzero_si128()));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _bitwiseNot(TSimdVector const & a, SimdParams_<16, 2>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_cmpeq_epi64(SEQAN_VECTOR_CAST_(const __m128i&, a),
+                                              _mm_setzero_si128()));
+}
+
+// --------------------------------------------------------------------------
+// _divide (128bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline TSimdVector _divide(TSimdVector const & a, int b, SimdParams_<16, 16>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector, _mm_div_epi8(a, _mm_set1_epi8(b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _divide(TSimdVector const & a, int b, SimdParams_<16, 8>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector, _mm_div_epi16(a, _mm_set1_epi16(b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _divide(TSimdVector const & a, int b, SimdParams_<16, 4>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector, _mm_div_epi32(a, _mm_set1_epi32(b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _divide(TSimdVector const & a, int b, SimdParams_<16, 2>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector, _mm_div_epi64(a, _mm_set1_epi64x(b)));
+}
+
+// --------------------------------------------------------------------------
+// _add (128bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline TSimdVector _add(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 16>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_add_epi8(SEQAN_VECTOR_CAST_(const __m128i&, a),
+                                           SEQAN_VECTOR_CAST_(const __m128i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _add(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 8>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_add_epi16(SEQAN_VECTOR_CAST_(const __m128i&, a),
+                                            SEQAN_VECTOR_CAST_(const __m128i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _add(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 4>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_add_epi32(SEQAN_VECTOR_CAST_(const __m128i&, a),
+                                            SEQAN_VECTOR_CAST_(const __m128i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _add(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 2>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_add_epi64(SEQAN_VECTOR_CAST_(const __m128i&, a),
+                                            SEQAN_VECTOR_CAST_(const __m128i&, b)));
+}
+
+// --------------------------------------------------------------------------
+// _sub (128bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline TSimdVector _sub(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 16>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_sub_epi8(SEQAN_VECTOR_CAST_(const __m128i&, a),
+                                           SEQAN_VECTOR_CAST_(const __m128i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _sub(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 8>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_sub_epi16(SEQAN_VECTOR_CAST_(const __m128i&, a),
+                                            SEQAN_VECTOR_CAST_(const __m128i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _sub(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 4>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_sub_epi32(SEQAN_VECTOR_CAST_(const __m128i&, a),
+                                            SEQAN_VECTOR_CAST_(const __m128i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _sub(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 2>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_sub_epi64(SEQAN_VECTOR_CAST_(const __m128i&, a),
+                                            SEQAN_VECTOR_CAST_(const __m128i&, b)));
+}
+
+// --------------------------------------------------------------------------
+// _mult (128bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline TSimdVector _mult(TSimdVector const & a, TSimdVector const &/*b*/, SimdParams_<16, 16>)
+{
+    SEQAN_ASSERT_FAIL("SSE intrinsics for multiplying 8 bit values not implemented!");
+    return a;
+}
+
+template <typename TSimdVector>
+inline TSimdVector _mult(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 8>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_mullo_epi16(SEQAN_VECTOR_CAST_(const __m128i&, a),
+                                              SEQAN_VECTOR_CAST_(const __m128i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _mult(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 4>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_mullo_epi32(SEQAN_VECTOR_CAST_(const __m128i&, a),
+                                              SEQAN_VECTOR_CAST_(const __m128i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _mult(TSimdVector const & a, TSimdVector const &/*b*/, SimdParams_<16, 2>)
+{
+    SEQAN_ASSERT_FAIL("SSE intrinsics for multiplying 64 bit values not implemented!");
+    return a;
+}
+
+// --------------------------------------------------------------------------
+// _max (128bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline TSimdVector _max(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 16, int8_t>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_max_epi8(SEQAN_VECTOR_CAST_(const __m128i&, a),
+                                           SEQAN_VECTOR_CAST_(const __m128i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _max(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 16, uint8_t>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_max_epu8(SEQAN_VECTOR_CAST_(const __m128i&, a),
+                                           SEQAN_VECTOR_CAST_(const __m128i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _max(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 8, int16_t>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_max_epi16(SEQAN_VECTOR_CAST_(const __m128i&, a),
+                                            SEQAN_VECTOR_CAST_(const __m128i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _max(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 8, uint16_t>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_max_epu16(SEQAN_VECTOR_CAST_(const __m128i&, a),
+                                            SEQAN_VECTOR_CAST_(const __m128i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _max(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 4, int32_t>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_max_epi32(SEQAN_VECTOR_CAST_(const __m128i&, a),
+                                            SEQAN_VECTOR_CAST_(const __m128i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _max(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 4, uint32_t>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_max_epu32(SEQAN_VECTOR_CAST_(const __m128i&, a),
+                                            SEQAN_VECTOR_CAST_(const __m128i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _max(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 2, int64_t>)
+{
+#if defined(__AVX512VL__)
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_max_epi64(SEQAN_VECTOR_CAST_(const __m128i&, a),
+                                            SEQAN_VECTOR_CAST_(const __m128i&, b)));
+#else // defined(__AVX512VL__)
+    return blend(b, a, cmpGt(a, b));
+#endif // defined(__AVX512VL__)
+}
+
+template <typename TSimdVector>
+inline TSimdVector _max(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 2, uint64_t>)
+{
+#if defined(__AVX512VL__)
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_max_epu64(SEQAN_VECTOR_CAST_(const __m128i&, a),
+                                            SEQAN_VECTOR_CAST_(const __m128i&, b)));
+#else // defined(__AVX512VL__)
+    return blend(b, a, cmpGt(a, b));
+#endif // defined(__AVX512VL__)
+}
+
+
+// --------------------------------------------------------------------------
+// _min (128bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline TSimdVector _min(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 16, int8_t>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_min_epi8(SEQAN_VECTOR_CAST_(const __m128i&, a),
+                                           SEQAN_VECTOR_CAST_(const __m128i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _min(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 16, uint8_t>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_min_epu8(SEQAN_VECTOR_CAST_(const __m128i&, a),
+                                           SEQAN_VECTOR_CAST_(const __m128i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _min(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 8, int16_t>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_min_epi16(SEQAN_VECTOR_CAST_(const __m128i&, a),
+                                            SEQAN_VECTOR_CAST_(const __m128i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _min(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 8, uint16_t>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_min_epu16(SEQAN_VECTOR_CAST_(const __m128i&, a),
+                                            SEQAN_VECTOR_CAST_(const __m128i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _min(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 4, int32_t>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_min_epi32(SEQAN_VECTOR_CAST_(const __m128i&, a),
+                                            SEQAN_VECTOR_CAST_(const __m128i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _min(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 4, uint32_t>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_min_epu32(SEQAN_VECTOR_CAST_(const __m128i&, a),
+                                            SEQAN_VECTOR_CAST_(const __m128i&, b)));
+}
+
+template <typename TSimdVector>
+inline TSimdVector _min(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 2, int64_t>)
+{
+#if defined(__AVX512VL__)
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_min_epi64(SEQAN_VECTOR_CAST_(const __m128i&, a),
+                                            SEQAN_VECTOR_CAST_(const __m128i&, b)));
+#else // defined(__AVX512VL__)
+    return blend(a, b, cmpGt(a, b));
+#endif // defined(__AVX512VL__)
+}
+
+template <typename TSimdVector>
+inline TSimdVector _min(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 2, uint64_t>)
+{
+#if defined(__AVX512VL__)
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_min_epu64(SEQAN_VECTOR_CAST_(const __m128i&, a),
+                                            SEQAN_VECTOR_CAST_(const __m128i&, b)));
+#else // defined(__AVX512VL__)
+    return blend(a, b, cmpGt(a, b));
+#endif // defined(__AVX512VL__)
+}
+
+// --------------------------------------------------------------------------
+// _blend (128bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector, typename TSimdVectorMask, int L>
+inline TSimdVector _blend(TSimdVector const & a, TSimdVector const & b, TSimdVectorMask const & mask, SimdParams_<16, L>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector,
+                              _mm_blendv_epi8(SEQAN_VECTOR_CAST_(const __m128i&, a),
+                                              SEQAN_VECTOR_CAST_(const __m128i&, b),
+                                              SEQAN_VECTOR_CAST_(const __m128i&, mask)));
+}
+
+// --------------------------------------------------------------------------
+// _storeu (128bit)
+// --------------------------------------------------------------------------
+
+template <typename T, typename TSimdVector, int L>
+inline void _storeu(T * memAddr, TSimdVector const & vec, SimdParams_<16, L>)
+{
+    _mm_storeu_si128((__m128i*)memAddr, reinterpret_cast<const __m128i &>(vec));
+}
+
+// ----------------------------------------------------------------------------
+// Function _load() 128bit
+// ----------------------------------------------------------------------------
+
+template <typename TSimdVector, typename T, int L>
+inline TSimdVector _load(T const * memAddr, SimdParams_<16, L>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector, _mm_load_si128((__m128i const *) memAddr));
+}
+
+// --------------------------------------------------------------------------
+// _shiftRightLogical (128bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline TSimdVector _shiftRightLogical(TSimdVector const & vector, const int imm, SimdParams_<16, 16>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector, _mm_srli_epi16(SEQAN_VECTOR_CAST_(const __m128i &, vector), imm) & _mm_set1_epi8(0xff >> imm));
+}
+template <typename TSimdVector>
+inline TSimdVector _shiftRightLogical(TSimdVector const & vector, const int imm, SimdParams_<16, 8>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector, _mm_srli_epi16(SEQAN_VECTOR_CAST_(const __m128i &, vector), imm));
+}
+template <typename TSimdVector>
+inline TSimdVector _shiftRightLogical(TSimdVector const & vector, const int imm, SimdParams_<16, 4>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector, _mm_srli_epi32(SEQAN_VECTOR_CAST_(const __m128i &, vector), imm));
+}
+template <typename TSimdVector>
+inline TSimdVector _shiftRightLogical(TSimdVector const & vector, const int imm, SimdParams_<16, 2>)
+{
+    return SEQAN_VECTOR_CAST_(TSimdVector, _mm_srli_epi64(SEQAN_VECTOR_CAST_(const __m128i &, vector), imm));
+}
+
+// --------------------------------------------------------------------------
+// _gather (128bit)
+// --------------------------------------------------------------------------
+
+template <typename TValue, typename TSimdVector, typename TSize, TSize SCALE, typename TSimdParams>
+inline TSimdVector
+_gather(TValue const * memAddr,
+        TSimdVector const & idx,
+        std::integral_constant<TSize, SCALE> const & /*scale*/,
+        TSimdParams)
+{
+    TSimdVector ret;
+    for (auto i = 0u; i < LENGTH<TSimdVector>::VALUE; ++i)
+    {
+        ret[i] = memAddr[idx[i]];
+    }
+    return ret;
+}
+
+// --------------------------------------------------------------------------
+// _shuffleVector (128bit)
+// --------------------------------------------------------------------------
+
+inline __m128i
+seqan_mm_shuffle_epi16(const __m128i a, const __m128i b)
+{
+    // multiply by 2
+    __m128i idx = _mm_slli_epi16(b, 1);
+    return _mm_shuffle_epi8(
+        a,
+        // interleave idx[7:0]   = 2*indices[7],   ..., 2*indices[0]
+        // with       idx[7:0]+1 = 2*indices[7]+1, ..., 2*indices[0]+1
+        // => 2*indices[7]+1, 2*indices[7], ..., 2*indices[0]+1, 2*indices[0]
+        _mm_unpacklo_epi8(
+            idx,
+            _mm_add_epi8(idx, _mm_set1_epi8(1))
+        )
+    );
+}
+
+inline __m128i
+seqan_mm_shuffle_epi32(const __m128i a, const __m128i b)
+{
+    // multiply by 4
+    __m128i idx = _mm_slli_epi16(b, 2);
+    return _mm_shuffle_epi8(
+        a,
+        // interleave 4*indices[3]+1, 4*indices[3]+0; ..., 4*indices[0]+1, 4*indices[0]+0
+        // with       4*indices[3]+3, 4*indices[3]+2; ..., 4*indices[0]+3, 4*indices[0]+2
+        // => 4*indices[3]+3, 4*indices[3]+2; 4*indices[3]+1, 4*indices[3]+0;
+        //    ...
+        //    4*indices[0]+3, 4*indices[0]+2; 4*indices[0]+1, 4*indices[0]+0
+        _mm_unpacklo_epi16(
+            // interleave idx[3:0]+0 = 4*indices[3]+0; ...; 4*indices[0]+0
+            // with       idx[3:0]+1 = 4*indices[3]+1; ...; 4*indices[0]+1
+            // => 4*indices[3]+1; 4*indices[3]+0; ...; 4*indices[0]+1; 4*indices[0]+0
+            _mm_unpacklo_epi8(
+                idx,
+                _mm_add_epi8(idx, _mm_set1_epi8(1))
+            ),
+            // interleave idx[3:0]+2 = 4*indices[3]+2; ...; 4*indices[0]+2
+            // with       idx[3:0]+3 = 4*indices[3]+3; ...; 4*indices[0]+3
+            // => 4*indices[3]+3; 4*indices[3]+2; ...; 4*indices[0]+3; 4*indices[0]+2
+            _mm_unpacklo_epi8(
+                _mm_add_epi8(idx, _mm_set1_epi8(2)),
+                _mm_add_epi8(idx, _mm_set1_epi8(3))
+            )
+    ));
+}
+
+inline __m128i
+seqan_mm_shuffle_epi64(const __m128i a, const __m128i b)
+{
+    // multiply by 8
+    __m128i idx = _mm_slli_epi16(b, 3);
+    return _mm_shuffle_epi8(
+        a,
+        _mm_unpacklo_epi32(
+            // interleave 8*indices[1]+1, 8*indices[1]+0; ..., 8*indices[0]+1, 8*indices[0]+0
+            // with       8*indices[1]+3, 8*indices[1]+2; ..., 8*indices[0]+3, 8*indices[0]+2
+            // => 8*indices[1]+3, 8*indices[1]+2; 8*indices[1]+1, 8*indices[1]+0;
+            //    ...
+            //    8*indices[0]+3, 8*indices[0]+2; 8*indices[0]+1, 8*indices[0]+0
+            _mm_unpacklo_epi16(
+                // interleave idx[1:0]+0 = 8*indices[1]+0; ...; 8*indices[0]+0
+                // with       idx[1:0]+1 = 8*indices[1]+1; ...; 8*indices[0]+1
+                // => 8*indices[1]+1; 8*indices[1]+0; ...; 8*indices[0]+1; 8*indices[0]+0
+                _mm_unpacklo_epi8(
+                    idx,
+                    _mm_add_epi8(idx, _mm_set1_epi8(1))
+                ),
+                // interleave idx[1:0]+2 = 8*indices[1]+2; ...; 8*indices[0]+2
+                // with       idx[1:0]+3 = 8*indices[1]+3; ...; 8*indices[0]+3
+                // => 8*indices[1]+3; 8*indices[1]+2; ...; 8*indices[0]+3; 8*indices[0]+2
+                _mm_unpacklo_epi8(
+                    _mm_add_epi8(idx, _mm_set1_epi8(2)),
+                    _mm_add_epi8(idx, _mm_set1_epi8(3))
+                )
+            ),
+            // interleave 8*indices[1]+5, 8*indices[1]+4; ..., 8*indices[0]+5, 8*indices[0]+4
+            // with       8*indices[1]+7, 8*indices[1]+6; ..., 8*indices[0]+7, 8*indices[0]+6
+            // => 8*indices[1]+7, 8*indices[1]+6; 8*indices[1]+5, 8*indices[1]+4;
+            //    ...
+            //    8*indices[0]+7, 8*indices[0]+6; 8*indices[0]+5, 8*indices[0]+4
+            _mm_unpacklo_epi16(
+                // interleave idx[1:0]+4 = 8*indices[1]+4; ...; 8*indices[0]+4
+                // with       idx[1:0]+5 = 8*indices[1]+5; ...; 8*indices[0]+5
+                // => 8*indices[1]+5; 8*indices[1]+4; ...; 8*indices[0]+5; 8*indices[0]+4
+                _mm_unpacklo_epi8(
+                    _mm_add_epi8(idx, _mm_set1_epi8(4)),
+                    _mm_add_epi8(idx, _mm_set1_epi8(5))
+                ),
+                // interleave idx[1:0]+6 = 8*indices[1]+6; ...; 8*indices[0]+6
+                // with       idx[1:0]+7 = 8*indices[1]+7; ...; 8*indices[0]+7
+                // => 8*indices[1]+7; 8*indices[1]+6; ...; 8*indices[0]+7; 8*indices[0]+6
+                _mm_unpacklo_epi8(
+                    _mm_add_epi8(idx, _mm_set1_epi8(6)),
+                    _mm_add_epi8(idx, _mm_set1_epi8(7))
+                )
+            )
+        )
+    );
+}
+
+template <typename TSimdVector1, typename TSimdVector2>
+[[deprecated("Here be dragons")]]
+inline TSimdVector1
+_shuffleVector(TSimdVector1 const & vector, TSimdVector2 const & indices, SimdParams_<16, 8>, SimdParams_<8, 8>)
+{
+#if SEQAN_IS_32_BIT
+    __m128i idx = _mm_slli_epi16(
+        _mm_unpacklo_epi32(
+            _mm_cvtsi32_si128(reinterpret_cast<const uint32_t &>(indices)),
+            _mm_cvtsi32_si128(reinterpret_cast<const uint64_t &>(indices) >> 32)
+        ),
+        1
+    );
+#else
+    __m128i idx = _mm_slli_epi16(_mm_cvtsi64_si128(reinterpret_cast<const uint64_t &>(indices)), 1);
+#endif  // SEQAN_IS_32_BIT
+    return SEQAN_VECTOR_CAST_(TSimdVector1,
+        _mm_shuffle_epi8(
+            SEQAN_VECTOR_CAST_(const __m128i &, vector),
+            _mm_unpacklo_epi8(idx, _mm_add_epi8(idx, _mm_set1_epi8(1)))
+        ));
+}
+
+template <typename TSimdVector1, typename TSimdVector2>
+inline TSimdVector1
+_shuffleVector(TSimdVector1 const & vector, TSimdVector2 const & indices, SimdParams_<16, 16>, SimdParams_<16, 16>)
+{
+    return SEQAN_VECTOR_CAST_(
+        TSimdVector1,
+        _mm_shuffle_epi8(
+            SEQAN_VECTOR_CAST_(const __m128i &, vector),
+            SEQAN_VECTOR_CAST_(const __m128i &, indices)
+    ));
+}
+
+template <typename TSimdVector1, typename TSimdVector2>
+inline TSimdVector1
+_shuffleVector(TSimdVector1 const & vector, TSimdVector2 const & indices, SimdParams_<16, 8>, SimdParams_<16, 16>)
+{
+    return SEQAN_VECTOR_CAST_(
+        TSimdVector1,
+        seqan_mm_shuffle_epi16(
+            SEQAN_VECTOR_CAST_(const __m128i &, vector),
+            SEQAN_VECTOR_CAST_(const __m128i &, indices)
+    ));
+}
+
+template <typename TSimdVector1, typename TSimdVector2>
+inline TSimdVector1
+_shuffleVector(TSimdVector1 const & vector, TSimdVector2 const & indices, SimdParams_<16, 4>, SimdParams_<16, 16>)
+{
+    return SEQAN_VECTOR_CAST_(
+        TSimdVector1,
+        seqan_mm_shuffle_epi32(
+            SEQAN_VECTOR_CAST_(const __m128i &, vector),
+            SEQAN_VECTOR_CAST_(const __m128i &, indices)
+    ));
+}
+
+template <typename TSimdVector1, typename TSimdVector2>
+inline TSimdVector1
+_shuffleVector(TSimdVector1 const & vector, TSimdVector2 const & indices, SimdParams_<16, 2>, SimdParams_<16, 16>)
+{
+    return SEQAN_VECTOR_CAST_(
+        TSimdVector1,
+        seqan_mm_shuffle_epi64(
+            SEQAN_VECTOR_CAST_(const __m128i &, vector),
+            SEQAN_VECTOR_CAST_(const __m128i &, indices)
+    ));
+}
+
+// --------------------------------------------------------------------------
+// _transposeMatrix (128bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline void
+_transposeMatrix(TSimdVector matrix[], SimdMatrixParams_<8, 8, 8>)
+{
+    // we need a look-up table to reverse the lowest 4 bits
+    // in order to place the permute the transposed rows
+    static const unsigned char bitRev[] = {0,4,2,6,1,5,3,7};
+
+    // transpose a 8x8 byte matrix
+    __m64 tmp1[8];
+    for (int i = 0; i < 4; ++i)
+    {
+        tmp1[i]   = _mm_unpacklo_pi8(SEQAN_VECTOR_CAST_(const __m64 &, matrix[2*i]), SEQAN_VECTOR_CAST_(const __m64 &, matrix[2*i+1]));
+        tmp1[i+4] = _mm_unpackhi_pi8(SEQAN_VECTOR_CAST_(const __m64 &, matrix[2*i]), SEQAN_VECTOR_CAST_(const __m64 &, matrix[2*i+1]));
+    }
+    __m64 tmp2[8];
+    for (int i = 0; i < 4; ++i)
+    {
+        tmp2[i]   = _mm_unpacklo_pi16(tmp1[2*i], tmp1[2*i+1]);
+        tmp2[i+4] = _mm_unpackhi_pi16(tmp1[2*i], tmp1[2*i+1]);
+    }
+    for (int i = 0; i < 4; ++i)
+    {
+        matrix[bitRev[i]]   = SEQAN_VECTOR_CAST_(TSimdVector, _mm_unpacklo_pi32(tmp2[2*i], tmp2[2*i+1]));
+        matrix[bitRev[i+4]] = SEQAN_VECTOR_CAST_(TSimdVector, _mm_unpackhi_pi32(tmp2[2*i], tmp2[2*i+1]));
+    }
+}
+
+template <typename TSimdVector>
+inline void
+_transposeMatrix(TSimdVector matrix[], SimdMatrixParams_<16, 16, 8>)
+{
+    // we need a look-up table to reverse the lowest 4 bits
+    // in order to place the permute the transposed rows
+    static const unsigned char bitRev[] = {0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15};
+
+    // transpose a 16x16 byte matrix
+    //
+    // matrix =
+    // A0 A1 A2 ... Ae Af
+    // B0 B1 B2 ... Be Bf
+    // ...
+    // P0 P1 P2 ... Pe Pf
+    __m128i tmp1[16];
+    for (int i = 0; i < 8; ++i)
+    {
+        tmp1[i]   = _mm_unpacklo_epi8(SEQAN_VECTOR_CAST_(const __m128i &, matrix[2*i]), SEQAN_VECTOR_CAST_(const __m128i &, matrix[2*i+1]));
+        tmp1[i+8] = _mm_unpackhi_epi8(SEQAN_VECTOR_CAST_(const __m128i &, matrix[2*i]), SEQAN_VECTOR_CAST_(const __m128i &, matrix[2*i+1]));
+    }
+    // tmp1[0]  = A0 B0 A1 B1 ... A7 B7
+    // tmp1[1]  = C0 D0 C1 D1 ... C7 D7
+    // ...
+    // tmp1[7]  = O0 P0 O1 P1 ... O7 P7
+    // tmp1[8]  = A8 B8 A9 B9 ... Af Bf
+    // ...
+    // tmp1[15] = O8 P8 O9 P9 ... Of Pf
+    __m128i tmp2[16];
+    for (int i = 0; i < 8; ++i)
+    {
+        tmp2[i]   = _mm_unpacklo_epi16(tmp1[2*i], tmp1[2*i+1]);
+        tmp2[i+8] = _mm_unpackhi_epi16(tmp1[2*i], tmp1[2*i+1]);
+    }
+    // tmp2[0]  = A0 B0 C0 D0 ... A3 B3 C3 D3
+    // tmp2[1]  = E0 F0 G0 H0 ... E3 F3 G3 H3
+    // ...
+    // tmp2[3]  = M0 N0 O0 P0 ... M3 N3 O3 P3
+    // tmp2[4]  = A8 B8 C8 D8 ... Ab Bb Cb Db
+    // ...
+    // tmp2[7]  = M8 N8 O8 P8 ... Mb Nb Ob Pb
+    // tmp2[8]  = A4 B4 C4 D4 ... A7 B7 C7 D7
+    // ..
+    // tmp2[12] = Ac Bc Cc Dc ... Af Bf Cf Df
+    // ...
+    // tmp2[15] = Mc Nc Oc Pc ... Mf Nf Of Pf
+    for (int i = 0; i < 8; ++i)
+    {
+        tmp1[i]   = _mm_unpacklo_epi32(tmp2[2*i], tmp2[2*i+1]);
+        tmp1[i+8] = _mm_unpackhi_epi32(tmp2[2*i], tmp2[2*i+1]);
+    }
+    // tmp1[0]  = A0 B0 .... H0 A1 B1 .... H1
+    // tmp1[1]  = I0 J0 .... P0 I1 J1 .... P1
+    // ...
+    // tmp1[4]  = A0 B0 .... H0 A1 B1 .... H1
+    // tmp1[1]  = I0 J0 .... P0 I1 J1 .... P1
+    for (int i = 0; i < 8; ++i)
+    {
+        matrix[bitRev[i]]   = SEQAN_VECTOR_CAST_(TSimdVector, _mm_unpacklo_epi64(tmp1[2*i], tmp1[2*i+1]));
+        matrix[bitRev[i+8]] = SEQAN_VECTOR_CAST_(TSimdVector, _mm_unpackhi_epi64(tmp1[2*i], tmp1[2*i+1]));
+    }
+}
+
+// --------------------------------------------------------------------------
+// Function _testAllZeros (128bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, int)
+inline _testAllZeros(TSimdVector const & vector, TSimdVector const & mask, SimdParams_<16>)
+{
+    return _mm_testz_si128(SEQAN_VECTOR_CAST_(const __m128i &, vector),
+                           SEQAN_VECTOR_CAST_(const __m128i &, mask));
+}
+
+// --------------------------------------------------------------------------
+// Function _testAllOnes (128bit)
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline
+SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, int)
+_testAllOnes(TSimdVector const & vector, SimdParams_<16>)
+{
+    return _mm_test_all_ones(SEQAN_VECTOR_CAST_(const __m128i &, vector));
+}
+
+} // namespace seqan
+
+#endif // SEQAN_INCLUDE_SEQAN_SIMD_SIMD_BASE_SEQAN_IMPL_SSE4_2_H_
diff --git a/porechop/include/seqan/simd/simd_base_seqan_interface.h b/porechop/include/seqan/simd/simd_base_seqan_interface.h
new file mode 100644
index 0000000..82c4931
--- /dev/null
+++ b/porechop/include/seqan/simd/simd_base_seqan_interface.h
@@ -0,0 +1,392 @@
+// ==========================================================================
+//                 SeqAn - The Library for Sequence Analysis
+// ==========================================================================
+// Copyright (c) 2006-2018, Knut Reinert, FU Berlin
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above copyright
+//       notice, this list of conditions and the following disclaimer in the
+//       documentation and/or other materials provided with the distribution.
+//     * Neither the name of Knut Reinert or the FU Berlin nor the names of
+//       its contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+// DAMAGE.
+//
+// ==========================================================================
+// Author: David Weese <david.weese@fu-berlin.de>
+//         René Rahn <rene.rahn@fu-berlin.de>
+//         Stefan Budach <stefan.budach@fu-berlin.de>
+// ==========================================================================
+// generic SIMD interface for SSE3 / AVX2
+// ==========================================================================
+
+#ifndef SEQAN_INCLUDE_SEQAN_SIMD_SIMD_BASE_SEQAN_INTERFACE_H_
+#define SEQAN_INCLUDE_SEQAN_SIMD_SIMD_BASE_SEQAN_INTERFACE_H_
+
+namespace seqan {
+
+template <typename TSimdVector>
+struct SimdMaskVectorImpl<TSimdVector, True>
+{
+    using Type = typename SimdVectorTraits<TSimdVector, SimdParams_<sizeof(TSimdVector), LENGTH<TSimdVector>::VALUE>>::MaskType;
+};
+
+template <typename TSimdVector>
+struct SimdSwizzleVectorImpl<TSimdVector, True>
+{
+    typedef typename SimdVector<uint8_t, sizeof(TSimdVector)>::Type Type;
+};
+
+// ============================================================================
+//
+// INTERFACE FUNCTIONS
+// - these should be used in the actual code, they will call one of the wrapper
+//   functions defined above based on the vector type
+//
+// ============================================================================
+
+// --------------------------------------------------------------------------
+// Function transpose()
+// --------------------------------------------------------------------------
+
+template <int ROWS, typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, void)
+transpose(TSimdVector matrix[ROWS])
+{
+    typedef typename Value<TSimdVector>::Type TValue;
+    _transposeMatrix(matrix, SimdMatrixParams_<ROWS, LENGTH<TSimdVector>::VALUE, BitsPerValue<TValue>::VALUE>());
+}
+
+// --------------------------------------------------------------------------
+// Function clearVector()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, void)
+clearVector(TSimdVector & vector)
+{
+    typedef typename Value<TSimdVector>::Type TValue;
+    _clearVector(vector, SimdParams_<sizeof(TSimdVector), sizeof(TSimdVector) / sizeof(TValue)>());
+}
+
+// --------------------------------------------------------------------------
+// Function createVector()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector, typename TValue>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+createVector(TValue const x)
+{
+    typedef typename Value<TSimdVector>::Type TIVal;
+    return _createVector<TSimdVector>(x, SimdParams_<sizeof(TSimdVector), sizeof(TSimdVector) / sizeof(TIVal)>());
+}
+
+// --------------------------------------------------------------------------
+// Function fillVector()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector, typename ...TValue>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, void)
+fillVector(TSimdVector & vector, TValue const... args)
+{
+    // On clang (<= 4.0)
+    // std::make_tuple(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17) reaches the
+    // template recursion limit of 256 (e.g. -ftemplate-depth=256 is default)
+    //
+    // See same issue asked on http://stackoverflow.com/q/23374953
+    // See also discussion to increase -ftemplate-depth to 1024 by default in
+    // clang https://llvm.org/bugs/show_bug.cgi?id=18417
+    typedef typename Value<TSimdVector>::Type TIVal;
+    _fillVector(vector, std::make_tuple(args...),
+                std::make_index_sequence<sizeof...(args)>{},
+                SimdParams_<sizeof(TSimdVector), sizeof(TSimdVector) / sizeof(TIVal)>());
+}
+
+// --------------------------------------------------------------------------
+// Function cmpEq()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, typename SimdMaskVector<TSimdVector>::Type)
+cmpEq (TSimdVector const & a, TSimdVector const & b)
+{
+    typedef typename Value<TSimdVector>::Type TValue;
+    return _cmpEq(a, b, SimdParams_<sizeof(TSimdVector), sizeof(TSimdVector) / sizeof(TValue)>());
+}
+
+// --------------------------------------------------------------------------
+// Function operator==()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, typename SimdMaskVector<TSimdVector>::Type)
+operator==(TSimdVector const & a, TSimdVector const & b)
+{
+    typedef typename Value<TSimdVector>::Type TValue;
+    return _cmpEq(a, b, SimdParams_<sizeof(TSimdVector), sizeof(TSimdVector) / sizeof(TValue)>());
+}
+
+// --------------------------------------------------------------------------
+// Function operatorGt()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, typename SimdMaskVector<TSimdVector>::Type)
+cmpGt (TSimdVector const & a, TSimdVector const & b)
+{
+    typedef typename Value<TSimdVector>::Type TValue;
+    return _cmpGt(a, b, SimdParams_<sizeof(TSimdVector), sizeof(TSimdVector) / sizeof(TValue), TValue>());
+}
+
+// --------------------------------------------------------------------------
+// Function operator>()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, typename SimdMaskVector<TSimdVector>::Type)
+operator>(TSimdVector const & a, TSimdVector const & b)
+{
+    typedef typename Value<TSimdVector>::Type TValue;
+    return _cmpGt(a, b, SimdParams_<sizeof(TSimdVector), sizeof(TSimdVector) / sizeof(TValue)>());
+}
+
+// --------------------------------------------------------------------------
+// Function max()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+max(TSimdVector const & a, TSimdVector const & b)
+{
+    typedef typename Value<TSimdVector>::Type TValue;
+    return _max(a, b, SimdParams_<sizeof(TSimdVector), sizeof(TSimdVector) / sizeof(TValue), TValue>());
+}
+
+// --------------------------------------------------------------------------
+// Function min()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+min(TSimdVector const & a, TSimdVector const & b)
+{
+    typedef typename Value<TSimdVector>::Type TValue;
+    return _min(a, b, SimdParams_<sizeof(TSimdVector), sizeof(TSimdVector) / sizeof(TValue), TValue>());
+}
+
+// --------------------------------------------------------------------------
+// Function operator|()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+operator|(TSimdVector const & a, TSimdVector const & b)
+{
+    typedef typename Value<TSimdVector>::Type TValue;
+    return _bitwiseOr(a, b, SimdParams_<sizeof(TSimdVector), sizeof(TSimdVector) / sizeof(TValue)>());
+}
+
+// --------------------------------------------------------------------------
+// Function operator|=()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector &)
+operator|=(TSimdVector & a, TSimdVector const & b)
+{
+    a = a | b;
+    return a;
+}
+
+// --------------------------------------------------------------------------
+// Function operator&()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+operator&(TSimdVector const & a, TSimdVector const & b)
+{
+    typedef typename Value<TSimdVector>::Type TValue;
+    return _bitwiseAnd(a, b, SimdParams_<sizeof(TSimdVector), sizeof(TSimdVector) / sizeof(TValue)>());
+}
+
+// --------------------------------------------------------------------------
+// Function operator&=()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector &)
+operator&=(TSimdVector & a, TSimdVector const & b)
+{
+    a = a & b;
+    return a;
+}
+
+// --------------------------------------------------------------------------
+// Function operator~()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+operator~(TSimdVector const & a)
+{
+    typedef typename Value<TSimdVector>::Type TValue;
+    return _bitwiseNot(a, SimdParams_<sizeof(TSimdVector), sizeof(TSimdVector) / sizeof(TValue)>());
+}
+
+// --------------------------------------------------------------------------
+// Function operator+()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+operator+(TSimdVector const & a, TSimdVector const & b)
+{
+    typedef typename Value<TSimdVector>::Type TValue;
+    return _add(a, b, SimdParams_<sizeof(TSimdVector), sizeof(TSimdVector) / sizeof(TValue)>());
+}
+
+// --------------------------------------------------------------------------
+// Function operator-()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+operator-(TSimdVector const & a, TSimdVector const & b)
+{
+    typedef typename Value<TSimdVector>::Type TValue;
+    return _sub(a, b, SimdParams_<sizeof(TSimdVector), sizeof(TSimdVector) / sizeof(TValue)>());
+}
+
+// --------------------------------------------------------------------------
+// Function operator*()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+operator*(TSimdVector const & a, TSimdVector const & b)
+{
+    typedef typename Value<TSimdVector>::Type TValue;
+    return _mult(a, b, SimdParams_<sizeof(TSimdVector), sizeof(TSimdVector) / sizeof(TValue)>());
+}
+
+// --------------------------------------------------------------------------
+// Function operator/()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+operator/(TSimdVector const & a, TSimdVector const & b)
+{
+    typedef typename Value<TSimdVector>::Type TValue;
+    return _div(a, b, SimdParams_<sizeof(TSimdVector), sizeof(TSimdVector) / sizeof(TValue)>());
+}
+
+// --------------------------------------------------------------------------
+// Function andNot
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+andNot(TSimdVector const & a, TSimdVector const & b)
+{
+    typedef typename Value<TSimdVector>::Type TValue;
+    return _bitwiseAndNot(a, b, SimdParams_<sizeof(TSimdVector), sizeof(TSimdVector) / sizeof(TValue)>());
+}
+
+// --------------------------------------------------------------------------
+// Function shiftRightLogical()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+shiftRightLogical(TSimdVector const & vector, const int imm)
+{
+    typedef typename Value<TSimdVector>::Type TValue;
+    return _shiftRightLogical(vector, imm, SimdParams_<sizeof(TSimdVector), sizeof(TSimdVector) / sizeof(TValue)>());
+}
+
+// --------------------------------------------------------------------------
+// Function blend()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector, typename TSimdVectorMask>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+blend(TSimdVector const & a, TSimdVector const & b, TSimdVectorMask const & mask)
+{
+    typedef typename Value<TSimdVector>::Type TValue;
+    return _blend(a, b, mask, SimdParams_<sizeof(TSimdVector), sizeof(TSimdVector) / sizeof(TValue)>());
+}
+
+// --------------------------------------------------------------------------
+// Function storeu()
+// --------------------------------------------------------------------------
+
+template <typename T, typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, void)
+storeu(T * memAddr, TSimdVector const & vec)
+{
+    typedef typename Value<TSimdVector>::Type TValue;
+    _storeu(memAddr, vec, SimdParams_<sizeof(TSimdVector), sizeof(TSimdVector) / sizeof(TValue)>());
+}
+
+// --------------------------------------------------------------------------
+// Function load()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector, typename T>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+load(T const * memAddr)
+{
+    typedef typename Value<TSimdVector>::Type TValue;
+    return _load<TSimdVector>(memAddr, SimdParams_<sizeof(TSimdVector), sizeof(TSimdVector) / sizeof(TValue)>());
+}
+
+// --------------------------------------------------------------------------
+// Function gather()
+// --------------------------------------------------------------------------
+
+template <typename TValue, typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+gather(TValue const * memAddr, TSimdVector const & idx)
+{
+    typedef typename Value<TSimdVector>::Type TInnerValue;
+    return _gather(memAddr, idx, std::integral_constant<size_t, sizeof(TValue)>(), SimdParams_<sizeof(TSimdVector), sizeof(TSimdVector) / sizeof(TInnerValue)>());
+}
+
+// --------------------------------------------------------------------------
+// Function shuffleVector()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector1, typename TSimdVector2>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector1> >, TSimdVector1)
+shuffleVector(TSimdVector1 const & vector, TSimdVector2 const & indices)
+{
+    typedef typename Value<TSimdVector1>::Type TValue1;
+    typedef typename Value<TSimdVector2>::Type TValue2;
+    return _shuffleVector(
+                vector,
+                indices,
+                SimdParams_<sizeof(TSimdVector1), sizeof(TSimdVector1) / sizeof(TValue1)>(),
+                SimdParams_<sizeof(TSimdVector2), sizeof(TSimdVector2) / sizeof(TValue2)>());
+}
+
+} // namespace seqan
+
+#endif // SEQAN_INCLUDE_SEQAN_SIMD_SIMD_BASE_SEQAN_INTERFACE_H_
diff --git a/porechop/include/seqan/simd/simd_base_umesimd_impl.h b/porechop/include/seqan/simd/simd_base_umesimd_impl.h
new file mode 100644
index 0000000..da2e20c
--- /dev/null
+++ b/porechop/include/seqan/simd/simd_base_umesimd_impl.h
@@ -0,0 +1,655 @@
+// ==========================================================================
+//                 SeqAn - The Library for Sequence Analysis
+// ==========================================================================
+// Copyright (c) 2006-2018, Knut Reinert, FU Berlin
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above copyright
+//       notice, this list of conditions and the following disclaimer in the
+//       documentation and/or other materials provided with the distribution.
+//     * Neither the name of Knut Reinert or the FU Berlin nor the names of
+//       its contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+// DAMAGE.
+//
+// ==========================================================================
+// Author: Marcel Ehrhardt <marcel.ehrhardt@fu-berlin.de>
+// ==========================================================================
+// SIMD implementation of umesimd
+// ==========================================================================
+
+#ifndef SEQAN_INCLUDE_SEQAN_SIMD_SIMD_BASE_UMESIMD_IMPL_H_
+#define SEQAN_INCLUDE_SEQAN_SIMD_SIMD_BASE_UMESIMD_IMPL_H_
+
+#include "umesimd/UMESimd.h"
+
+namespace seqan
+{
+
+template <typename TSimdVector>
+struct SimdMaskVectorImpl<TSimdVector, True>
+{
+    using Type = typename UME::SIMD::SIMDTraits<TSimdVector>::MASK_T;
+};
+
+template <typename TSimdVector>
+struct SimdSwizzleVectorImpl<TSimdVector, True>
+{
+    using Type = typename UME::SIMD::SIMDTraits<TSimdVector>::SWIZZLE_T;
+};
+
+template <typename TValue, int LENGTH>
+struct SimdVector
+{
+    typedef UME::SIMD::SIMDVec<TValue, LENGTH> Type;
+};
+
+// // 64 bit
+// using SimdVector8Char   = UME::SIMD::SIMDVec<char, 8>;
+using SimdVector8SChar  = UME::SIMD::SIMDVec<signed char, 8>;
+using SimdVector8UChar  = UME::SIMD::SIMDVec<unsigned char, 8>;
+using SimdVector4Short  = UME::SIMD::SIMDVec<short, 4>;
+using SimdVector4UShort = UME::SIMD::SIMDVec<unsigned short, 4>;
+using SimdVector2Int    = UME::SIMD::SIMDVec<int, 2>;
+using SimdVector2UInt   = UME::SIMD::SIMDVec<unsigned int, 2>;
+
+// 128 bit
+// using SimdVector16Char  = UME::SIMD::SIMDVec<char, 16>;
+using SimdVector16SChar = UME::SIMD::SIMDVec<signed char, 16>;
+using SimdVector16UChar = UME::SIMD::SIMDVec<unsigned char, 16>;
+using SimdVector8Short  = UME::SIMD::SIMDVec<short, 8>;
+using SimdVector8UShort = UME::SIMD::SIMDVec<unsigned short, 8>;
+using SimdVector4Int    = UME::SIMD::SIMDVec<int, 4>;
+using SimdVector4UInt   = UME::SIMD::SIMDVec<unsigned int, 4>;
+using SimdVector2Int64  = UME::SIMD::SIMDVec<int64_t, 2>;
+using SimdVector2UInt64 = UME::SIMD::SIMDVec<uint64_t, 2>;
+
+// 256 bit
+// using SimdVector32Char   = UME::SIMD::SIMDVec<char, 32>;
+using SimdVector32SChar  = UME::SIMD::SIMDVec<signed char, 32>;
+using SimdVector32UChar  = UME::SIMD::SIMDVec<unsigned char, 32>;
+using SimdVector16Short  = UME::SIMD::SIMDVec<short, 16>;
+using SimdVector16UShort = UME::SIMD::SIMDVec<unsigned short, 16>;
+using SimdVector8Int     = UME::SIMD::SIMDVec<int, 8>;
+using SimdVector8UInt    = UME::SIMD::SIMDVec<unsigned int, 8>;
+using SimdVector4Int64   = UME::SIMD::SIMDVec<int64_t, 4>;
+using SimdVector4UInt64  = UME::SIMD::SIMDVec<uint64_t, 4>;
+
+// 512 bit
+// using SimdVector64Char   = UME::SIMD::SIMDVec<char, 64>;
+using SimdVector64SChar  = UME::SIMD::SIMDVec<signed char, 64>;
+using SimdVector64UChar  = UME::SIMD::SIMDVec<unsigned char, 64>;
+using SimdVector32Short  = UME::SIMD::SIMDVec<short, 32>;
+using SimdVector32UShort = UME::SIMD::SIMDVec<unsigned short, 32>;
+using SimdVector16Int    = UME::SIMD::SIMDVec<int, 16>;
+using SimdVector16UInt   = UME::SIMD::SIMDVec<unsigned int, 16>;
+using SimdVector8Int64   = UME::SIMD::SIMDVec<int64_t, 8>;
+using SimdVector8UInt64  = UME::SIMD::SIMDVec<uint64_t, 8>;
+
+// ============================================================================
+// SIMDMaskVector
+// ============================================================================
+
+template <uint32_t LENGTH>
+SEQAN_CONCEPT_IMPL((typename UME::SIMD::SIMDVecMask<LENGTH>),       (SimdMaskVectorConcept));
+
+template <uint32_t LENGTH>
+SEQAN_CONCEPT_IMPL((typename UME::SIMD::SIMDVecMask<LENGTH> const), (SimdMaskVectorConcept));
+
+template <uint32_t LENGTH>
+struct Value<UME::SIMD::SIMDVecMask<LENGTH> >
+{
+    typedef bool Type;
+};
+
+template <uint32_t LENGTH_>
+struct LENGTH<UME::SIMD::SIMDVecMask<LENGTH_> >
+{
+    enum { VALUE = LENGTH_ };
+};
+
+template <uint32_t LENGTH, typename TPosition>
+inline typename Value<UME::SIMD::SIMDVecMask<LENGTH> >::Type
+getValue(UME::SIMD::SIMDVecMask<LENGTH> const & vector, TPosition const pos)
+{
+    return vector[pos];
+}
+
+template <uint32_t LENGTH, typename TPosition>
+inline typename Value<UME::SIMD::SIMDVecMask<LENGTH> >::Type
+value(UME::SIMD::SIMDVecMask<LENGTH> const & vector, TPosition const pos)
+{
+    return vector[pos];
+}
+
+template <uint32_t LENGTH, typename TPosition, typename TValue2>
+inline void
+assignValue(UME::SIMD::SIMDVecMask<LENGTH> &vector, TPosition const pos, TValue2 const value)
+{
+    vector.insert(pos, value);
+}
+
+// ============================================================================
+// SIMDSwizzle
+// ============================================================================
+
+template <uint32_t LENGTH>
+SEQAN_CONCEPT_IMPL((typename UME::SIMD::SIMDSwizzle<LENGTH>),       (SimdVectorConcept));
+
+template <uint32_t LENGTH>
+SEQAN_CONCEPT_IMPL((typename UME::SIMD::SIMDSwizzle<LENGTH> const), (SimdVectorConcept));
+
+template <uint32_t LENGTH>
+struct Value<UME::SIMD::SIMDSwizzle<LENGTH> >
+{
+    typedef uint32_t Type;
+};
+
+template <uint32_t LENGTH_>
+struct LENGTH<UME::SIMD::SIMDSwizzle<LENGTH_> >
+{
+    enum { VALUE = LENGTH_ };
+};
+
+template <uint32_t LENGTH, typename TPosition>
+inline typename Value<UME::SIMD::SIMDSwizzle<LENGTH> >::Type
+getValue(UME::SIMD::SIMDSwizzle<LENGTH> const & vector, TPosition const pos)
+{
+    return vector[pos];
+}
+
+template <uint32_t LENGTH, typename TPosition>
+inline typename Value<UME::SIMD::SIMDSwizzle<LENGTH> >::Type
+value(UME::SIMD::SIMDSwizzle<LENGTH> const & vector, TPosition const pos)
+{
+    return vector[pos];
+}
+
+template <uint32_t LENGTH, typename TPosition, typename TValue2>
+inline void
+assignValue(UME::SIMD::SIMDSwizzle<LENGTH> &vector, TPosition const pos, TValue2 const value)
+{
+    vector.insert(pos, value);
+}
+
+// ============================================================================
+// SIMDVec_u
+// ============================================================================
+
+template <typename TValue, uint32_t LENGTH>
+SEQAN_CONCEPT_IMPL((typename UME::SIMD::SIMDVec_u<TValue, LENGTH>),       (SimdVectorConcept));
+
+template <typename TValue, uint32_t LENGTH>
+SEQAN_CONCEPT_IMPL((typename UME::SIMD::SIMDVec_u<TValue, LENGTH> const), (SimdVectorConcept));
+
+template <typename TValue, uint32_t LENGTH>
+struct Value<UME::SIMD::SIMDVec_u<TValue, LENGTH> >
+{
+    typedef TValue Type;
+};
+
+template <typename TValue, uint32_t LENGTH_>
+struct LENGTH<UME::SIMD::SIMDVec_u<TValue, LENGTH_> > {
+    enum { VALUE = LENGTH_ };
+};
+
+template <typename TValue, uint32_t LENGTH, typename TPosition>
+inline TValue
+getValue(UME::SIMD::SIMDVec_u<TValue, LENGTH> const & vector, TPosition const pos)
+{
+    return vector[pos];
+}
+
+template <typename TValue, uint32_t LENGTH, typename TPosition>
+inline TValue
+value(UME::SIMD::SIMDVec_u<TValue, LENGTH> const & vector, TPosition const pos)
+{
+
+    return vector[pos];
+}
+
+template <typename TValue, uint32_t LENGTH, typename TPosition, typename TValue2>
+inline void
+assignValue(UME::SIMD::SIMDVec_u<TValue, LENGTH> &vector, TPosition const pos, TValue2 const value)
+{
+    vector[pos] = value;
+}
+
+// ============================================================================
+// SIMDVec_i
+// ============================================================================
+
+template <typename TValue, uint32_t LENGTH>
+SEQAN_CONCEPT_IMPL((typename UME::SIMD::SIMDVec_i<TValue, LENGTH>),       (SimdVectorConcept));
+
+template <typename TValue, uint32_t LENGTH>
+SEQAN_CONCEPT_IMPL((typename UME::SIMD::SIMDVec_i<TValue, LENGTH> const), (SimdVectorConcept));
+
+template <typename TValue, uint32_t LENGTH>
+struct Value<UME::SIMD::SIMDVec_i<TValue, LENGTH> >
+{
+    typedef TValue Type;
+};
+
+template <typename TValue, uint32_t LENGTH_>
+struct LENGTH<UME::SIMD::SIMDVec_i<TValue, LENGTH_> > {
+    enum { VALUE = LENGTH_ };
+};
+
+template <typename TValue, uint32_t LENGTH, typename TPosition>
+inline TValue
+getValue(UME::SIMD::SIMDVec_i<TValue, LENGTH> const & vector, TPosition const pos)
+{
+    return vector[pos];
+}
+
+template <typename TValue, uint32_t LENGTH, typename TPosition>
+inline TValue
+value(UME::SIMD::SIMDVec_i<TValue, LENGTH> const & vector, TPosition const pos)
+{
+
+    return vector[pos];
+}
+
+template <typename TValue, uint32_t LENGTH, typename TPosition, typename TValue2>
+inline void
+assignValue(UME::SIMD::SIMDVec_i<TValue, LENGTH> &vector, TPosition const pos, TValue2 const value)
+{
+    vector[pos] = value;
+}
+
+// ============================================================================
+// SIMDVec_f
+// ============================================================================
+
+template <typename TValue, uint32_t LENGTH>
+SEQAN_CONCEPT_IMPL((typename UME::SIMD::SIMDVec_f<TValue, LENGTH>),       (SimdVectorConcept));
+
+template <typename TValue, uint32_t LENGTH>
+SEQAN_CONCEPT_IMPL((typename UME::SIMD::SIMDVec_f<TValue, LENGTH> const), (SimdVectorConcept));
+
+template <typename TValue, uint32_t LENGTH>
+struct Value<UME::SIMD::SIMDVec_f<TValue, LENGTH> >
+{
+    typedef TValue Type;
+};
+
+template <typename TValue, uint32_t LENGTH_>
+struct LENGTH<UME::SIMD::SIMDVec_f<TValue, LENGTH_> > {
+    enum { VALUE = LENGTH_ };
+};
+
+template <typename TValue, uint32_t LENGTH, typename TPosition>
+inline TValue
+getValue(UME::SIMD::SIMDVec_f<TValue, LENGTH> const & vector, TPosition const pos)
+{
+    return vector[pos];
+}
+
+template <typename TValue, uint32_t LENGTH, typename TPosition>
+inline TValue
+value(UME::SIMD::SIMDVec_f<TValue, LENGTH> const & vector, TPosition const pos)
+{
+
+    return vector[pos];
+}
+
+template <typename TValue, uint32_t LENGTH, typename TPosition, typename TValue2>
+inline void
+assignValue(UME::SIMD::SIMDVec_f<TValue, LENGTH> &vector, TPosition const pos, TValue2 const value)
+{
+    vector[pos] = value;
+}
+
+} // namespace seqan
+
+namespace UME
+{
+namespace SIMD
+{
+    template <typename TStream,
+              typename TVector, typename TScalar>
+    inline TStream & operator<<(TStream & stream,
+               IntermediateIndex<TVector, TScalar> const & pInterIndex)
+    {
+        stream << static_cast<TScalar>(pInterIndex);
+        return stream;
+    }
+}
+}
+
+namespace seqan
+{
+
+// --------------------------------------------------------------------------
+// Function clearVector()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, void)
+clearVector(TSimdVector & vector)
+{
+    vector = 0;
+}
+
+// --------------------------------------------------------------------------
+// Function createVector()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector, typename TValue>
+inline SEQAN_FUNC_ENABLE_IF(And<Is<SimdMaskVectorConcept<TSimdVector>>,
+                                Not<Is<SimdVectorConcept<TSimdVector>>>>, TSimdVector)
+createVector(TValue const x)
+{
+    return TSimdVector(static_cast<bool>(x));
+}
+
+// --------------------------------------------------------------------------
+// Function createVector()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector, typename TValue>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+createVector(TValue const x)
+{
+    return TSimdVector(x);
+}
+
+// --------------------------------------------------------------------------
+// Function fillVector()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector, typename ...TValue>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, void)
+fillVector(TSimdVector & vector, TValue const... args)
+{
+    vector = TSimdVector(args...);
+}
+
+// --------------------------------------------------------------------------
+// Function cmpEq()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, typename SimdMaskVector<TSimdVector>::Type)
+cmpEq (TSimdVector const & a, TSimdVector const & b)
+{
+    return a.cmpeq(b);
+}
+
+// --------------------------------------------------------------------------
+// Function operator==()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, typename SimdMaskVector<TSimdVector>::Type)
+operator==(TSimdVector const & a, TSimdVector const & b)
+{
+    return a.cmpeq(b);
+}
+
+// --------------------------------------------------------------------------
+// Function operatorGt()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, typename SimdMaskVector<TSimdVector>::Type)
+cmpGt (TSimdVector const & a, TSimdVector const & b)
+{
+    return a.cmpgt(b);
+}
+
+// --------------------------------------------------------------------------
+// Function operator>()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, typename SimdMaskVector<TSimdVector>::Type)
+operator>(TSimdVector const & a, TSimdVector const & b)
+{
+    return a.cmpgt(b);
+}
+
+// --------------------------------------------------------------------------
+// Function max()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+max(TSimdVector const & a, TSimdVector const & b)
+{
+    return a.max(b);
+}
+
+// --------------------------------------------------------------------------
+// Function min()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+min(TSimdVector const & a, TSimdVector const & b)
+{
+    return a.min(b);
+}
+
+// --------------------------------------------------------------------------
+// Function operator|()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+operator|(TSimdVector const & a, TSimdVector const & b)
+{
+    return a.bor(b);
+}
+
+// --------------------------------------------------------------------------
+// Function operator|=()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector &)
+operator|=(TSimdVector & a, TSimdVector const & b)
+{
+    return a.bora(b);
+}
+
+// --------------------------------------------------------------------------
+// Function operator&()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+operator&(TSimdVector const & a, TSimdVector const & b)
+{
+    return a.band(b);
+}
+
+// --------------------------------------------------------------------------
+// Function operator&=()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector &)
+operator&=(TSimdVector & a, TSimdVector const & b)
+{
+    return a.banda(b);
+}
+
+// --------------------------------------------------------------------------
+// Function operator~()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+operator~(TSimdVector const & a)
+{
+    return a.bnot();
+}
+
+// --------------------------------------------------------------------------
+// Function operator+()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+operator+(TSimdVector const & a, TSimdVector const & b)
+{
+    return a.add(b);
+}
+
+// --------------------------------------------------------------------------
+// Function operator-()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+operator-(TSimdVector const & a, TSimdVector const & b)
+{
+    return a.sub(b);
+}
+
+// --------------------------------------------------------------------------
+// Function operator*()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+operator*(TSimdVector const & a, TSimdVector const & b)
+{
+    return a.mul(b);
+}
+
+// --------------------------------------------------------------------------
+// Function operator/()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+operator/(TSimdVector const & a, TSimdVector const & b)
+{
+    return a.div(b);
+}
+
+// --------------------------------------------------------------------------
+// Function andNot
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+andNot(TSimdVector const & a, TSimdVector const & b)
+{
+    return a.bandnot(b);
+}
+
+
+// --------------------------------------------------------------------------
+// Function shiftRightLogical()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+shiftRightLogical(TSimdVector const & vector, const int imm)
+{
+    return vector.rsh(imm);
+}
+
+// --------------------------------------------------------------------------
+// Function blend()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector, typename TSimdVectorMask>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+blend(TSimdVector const & a, TSimdVector const & b, TSimdVectorMask const & mask)
+{
+    return a.blend(mask, b);
+}
+
+// --------------------------------------------------------------------------
+// Function storeu()
+// --------------------------------------------------------------------------
+
+template <typename T, typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, void)
+storeu(T * memAddr, TSimdVector const & vec)
+{
+    vec.store(memAddr);
+}
+
+// --------------------------------------------------------------------------
+// Function load()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector, typename T>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+load(T const * memAddr)
+{
+    return TSimdVector(memAddr);
+}
+
+// --------------------------------------------------------------------------
+// Function gather()
+// --------------------------------------------------------------------------
+
+template <typename TValue, typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(IsSameType<TValue, typename Value<TSimdVector>::Type>, TSimdVector)
+_gather(TValue const * memAddr, TSimdVector const & idx)
+{
+    using TIndexVector = typename UME::SIMD::SIMDTraits<TSimdVector>::UINT_VEC_T;
+
+    TSimdVector a;
+    a.gather(memAddr, static_cast<TIndexVector>(idx));
+    return a;
+}
+
+template <typename TValue, typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Not<IsSameType<TValue, typename Value<TSimdVector>::Type> >, TSimdVector)
+_gather(TValue const * memAddr, TSimdVector const & idx)
+{
+    using TIndexVector = typename UME::SIMD::SIMDTraits<TSimdVector>::UINT_VEC_T;
+
+    TSimdVector a;
+    for (auto i = 0u; i < TIndexVector::length(); ++i)
+    {
+        a[i] = memAddr[idx[i]];
+    }
+    return a;
+}
+
+template <typename TValue, typename TSimdVector>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector> >, TSimdVector)
+gather(TValue const * memAddr, TSimdVector const & idx)
+{
+    return _gather(memAddr, idx);
+}
+
+// --------------------------------------------------------------------------
+// Function shuffleVector()
+// --------------------------------------------------------------------------
+
+template <typename TSimdVector1, typename TSimdVector2>
+inline SEQAN_FUNC_ENABLE_IF(Is<SimdVectorConcept<TSimdVector1> >, TSimdVector1)
+shuffleVector(TSimdVector1 const & vector, TSimdVector2 const & indices)
+{
+    return vector.swizzle(indices);
+}
+
+}
+
+#endif // SEQAN_INCLUDE_SEQAN_SIMD_SIMD_BASE_UMESIMD_IMPL_H_