From 6875ad44ac62309b0d29da95200b731c573eb039 Mon Sep 17 00:00:00 2001 From: Kimmo Palin Date: Sat, 18 Aug 2018 18:28:55 +0300 Subject: [PATCH 1/4] Add option to store discovered adapters and use them in subsequent runs. --- porechop/porechop.py | 60 +++++++++++++++++++++++++++++++++----------- 1 file changed, 45 insertions(+), 15 deletions(-) diff --git a/porechop/porechop.py b/porechop/porechop.py index 45dea2c..985d6ac 100755 --- a/porechop/porechop.py +++ b/porechop/porechop.py @@ -16,16 +16,19 @@ import argparse import os +import os.path import sys import subprocess import multiprocessing import shutil import re +import json from multiprocessing.dummy import Pool as ThreadPool from collections import defaultdict from .misc import load_fasta_or_fastq, print_table, red, bold_underline, MyHelpFormatter, int_to_str from .adapters import ADAPTERS, make_full_native_barcode_adapter, make_full_rapid_barcode_adapter from .nanopore_read import NanoporeRead +from .adapters import Adapter from .version import __version__ @@ -33,22 +36,8 @@ def main(): args = get_arguments() reads, check_reads, read_type = load_reads(args.input, args.verbosity, args.print_dest, args.check_reads) + matching_sets,forward_or_reverse_barcodes = get_matching_sets(args,check_reads) - matching_sets = find_matching_adapter_sets(check_reads, args.verbosity, args.end_size, - args.scoring_scheme_vals, args.print_dest, - args.adapter_threshold, args.threads) - matching_sets = exclude_end_adapters_for_rapid(matching_sets) - matching_sets = fix_up_1d2_sets(matching_sets) - display_adapter_set_results(matching_sets, args.verbosity, args.print_dest) - matching_sets = add_full_barcode_adapter_sets(matching_sets) - - if args.barcode_dir: - forward_or_reverse_barcodes = choose_barcoding_kit(matching_sets, args.verbosity, - args.print_dest) - else: - forward_or_reverse_barcodes = None - if args.verbosity > 0: - print('\n', file=args.print_dest) if matching_sets: check_barcodes = (args.barcode_dir is not None) @@ -133,6 +122,9 @@ def get_arguments(): adapter_search_group = parser.add_argument_group('Adapter search settings', 'Control how the program determines which ' 'adapter sets are present') + adapter_search_group.add_argument('--adapter_storage', type=str, default=None, + help='Name of a JSON file to store discovered adapters or ' + 'to load them (and skip discovery)') adapter_search_group.add_argument('--adapter_threshold', type=float, default=90.0, help='An adapter set has to have at least this percent ' 'identity to be labelled as present and trimmed off ' @@ -219,6 +211,44 @@ def get_arguments(): return args +def get_matching_sets(args,check_reads): + if args.adapter_storage is not None and os.path.exists(args.adapter_storage): + adapter_dict = json.load(open(args.adapter_storage)) + assert adapter_dict["__version__"] == __version__, "Can only use adapter storage for version {}".format(__version__) + forward_or_reverse_barcodes = adapter_dict["forward_or_reverse_barcodes"] + matching_sets = [Adapter(name,**seqs) for name,seqs in adapter_dict["matching_sets"].items() ] + + else: + matching_sets = find_matching_adapter_sets(check_reads, args.verbosity, args.end_size, + args.scoring_scheme_vals, args.print_dest, + args.adapter_threshold, args.threads) + matching_sets = exclude_end_adapters_for_rapid(matching_sets) + matching_sets = fix_up_1d2_sets(matching_sets) + display_adapter_set_results(matching_sets, args.verbosity, args.print_dest) + matching_sets = add_full_barcode_adapter_sets(matching_sets) + + if args.barcode_dir: + forward_or_reverse_barcodes = choose_barcoding_kit(matching_sets, args.verbosity, + args.print_dest) + else: + forward_or_reverse_barcodes = None + if args.verbosity > 0: + print('\n', file=args.print_dest) + + if args.adapter_storage is not None: + adapter_dict={"__version__":__version__, + "forward_or_reverse_barcodes":forward_or_reverse_barcodes, + "matching_sets": {} } + + for adapter in matching_sets: + adapter_dict["matching_sets"][adapter.name] = { + "start_sequence":adapter.start_sequence, + "end_sequence":adapter.end_sequence + } + json.dump(adapter_dict,open(args.adapter_storage,"w")) + + return matching_sets,forward_or_reverse_barcodes + def load_reads(input_file_or_directory, verbosity, print_dest, check_read_count): # If the input is a file, just load reads from that file. The check reads will just be the From 480658dd64e4eef691651fd6fda91935d9b7ebba Mon Sep 17 00:00:00 2001 From: Kimmo Palin Date: Wed, 23 Jan 2019 09:33:56 +0000 Subject: [PATCH 2/4] Fixed Merge conflict --- porechop/porechop.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/porechop/porechop.py b/porechop/porechop.py index a20dc56..1335ed8 100755 --- a/porechop/porechop.py +++ b/porechop/porechop.py @@ -39,25 +39,6 @@ def main(): args.check_reads) matching_sets,forward_or_reverse_barcodes = get_matching_sets(args,check_reads) -<<<<<<< HEAD -======= - matching_sets = find_matching_adapter_sets(check_reads, args.verbosity, args.end_size, - args.scoring_scheme_vals, args.print_dest, - args.adapter_threshold, args.threads) - matching_sets = fix_up_1d2_sets(matching_sets) - - if args.barcode_dir: - forward_or_reverse_barcodes = choose_barcoding_kit(matching_sets, args.verbosity, - args.print_dest) - else: - forward_or_reverse_barcodes = None - - display_adapter_set_results(matching_sets, args.verbosity, args.print_dest) - matching_sets = add_full_barcode_adapter_sets(matching_sets) - - if args.verbosity > 0: - print('\n', file=args.print_dest) ->>>>>>> upstream/master if matching_sets: check_barcodes = (args.barcode_dir is not None) From 483611a55356dbf4c4249ffb2ecbf6a009adc591 Mon Sep 17 00:00:00 2001 From: Kimmo Palin Date: Wed, 23 Jan 2019 11:09:30 +0000 Subject: [PATCH 3/4] Fixed more of the merge conflict --- porechop/cpp_function_wrappers.py | 2 +- porechop/porechop.py | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/porechop/cpp_function_wrappers.py b/porechop/cpp_function_wrappers.py index ba3f993..56026c7 100644 --- a/porechop/cpp_function_wrappers.py +++ b/porechop/cpp_function_wrappers.py @@ -21,7 +21,7 @@ SO_FILE = 'cpp_functions.so' SO_FILE_FULL = os.path.join(os.path.dirname(os.path.realpath(__file__)), SO_FILE) if not os.path.isfile(SO_FILE_FULL): - sys.exit('could not find ' + SO_FILE + ' - please reinstall') + sys.exit('could not find ' + SO_FILE_FULL + ' - please reinstall') C_LIB = CDLL(SO_FILE_FULL) C_LIB.adapterAlignment.argtypes = [c_char_p, # Read sequence diff --git a/porechop/porechop.py b/porechop/porechop.py index 1335ed8..4cf6bfc 100755 --- a/porechop/porechop.py +++ b/porechop/porechop.py @@ -223,16 +223,18 @@ def get_matching_sets(args,check_reads): matching_sets = find_matching_adapter_sets(check_reads, args.verbosity, args.end_size, args.scoring_scheme_vals, args.print_dest, args.adapter_threshold, args.threads) - matching_sets = exclude_end_adapters_for_rapid(matching_sets) + matching_sets = fix_up_1d2_sets(matching_sets) - display_adapter_set_results(matching_sets, args.verbosity, args.print_dest) - matching_sets = add_full_barcode_adapter_sets(matching_sets) if args.barcode_dir: forward_or_reverse_barcodes = choose_barcoding_kit(matching_sets, args.verbosity, args.print_dest) else: forward_or_reverse_barcodes = None + + display_adapter_set_results(matching_sets, args.verbosity, args.print_dest) + matching_sets = add_full_barcode_adapter_sets(matching_sets) + if args.verbosity > 0: print('\n', file=args.print_dest) From 9583dead0b73c11dc7326ec0b991c7e380ca75ec Mon Sep 17 00:00:00 2001 From: Kimmo Palin Date: Wed, 23 Jan 2019 13:17:15 +0200 Subject: [PATCH 4/4] Added more of the seqann --- .../async_wave_execution_interface.h | 252 +++ .../seqan/align_parallel/dp_kernel_adaptor.h | 343 ++++ .../dp_parallel_execution_policies.h | 174 ++ .../seqan/align_parallel/dp_parallel_scout.h | 263 +++ .../align_parallel/dp_parallel_scout_simd.h | 362 ++++ .../seqan/align_parallel/dp_settings.h | 109 ++ .../include/seqan/align_parallel/dp_traits.h | 120 ++ .../align_parallel/parallel_align_interface.h | 366 ++++ .../wavefront_alignment_executor.h | 98 ++ .../wavefront_alignment_result.h | 165 ++ .../wavefront_alignment_scheduler.h | 347 ++++ .../align_parallel/wavefront_alignment_task.h | 404 +++++ ...wavefront_alignment_thread_local_storage.h | 130 ++ .../seqan/align_parallel/wavefront_task.h | 365 ++++ .../align_parallel/wavefront_task_event.h | 104 ++ .../align_parallel/wavefront_task_executor.h | 146 ++ .../align_parallel/wavefront_task_queue.h | 139 ++ .../align_parallel/wavefront_task_scheduler.h | 218 +++ .../align_parallel/wavefront_task_util.h | 557 ++++++ porechop/include/seqan/simd/simd_base.h | 390 +++++ .../include/seqan/simd/simd_base_seqan_impl.h | 154 ++ .../seqan/simd/simd_base_seqan_impl_avx2.h | 1492 +++++++++++++++++ .../seqan/simd/simd_base_seqan_impl_avx512.h | 284 ++++ .../seqan/simd/simd_base_seqan_impl_sse4.2.h | 1053 ++++++++++++ .../seqan/simd/simd_base_seqan_interface.h | 392 +++++ .../seqan/simd/simd_base_umesimd_impl.h | 655 ++++++++ 26 files changed, 9082 insertions(+) create mode 100644 porechop/include/seqan/align_parallel/async_wave_execution_interface.h create mode 100644 porechop/include/seqan/align_parallel/dp_kernel_adaptor.h create mode 100644 porechop/include/seqan/align_parallel/dp_parallel_execution_policies.h create mode 100644 porechop/include/seqan/align_parallel/dp_parallel_scout.h create mode 100644 porechop/include/seqan/align_parallel/dp_parallel_scout_simd.h create mode 100644 porechop/include/seqan/align_parallel/dp_settings.h create mode 100644 porechop/include/seqan/align_parallel/dp_traits.h create mode 100644 porechop/include/seqan/align_parallel/parallel_align_interface.h create mode 100644 porechop/include/seqan/align_parallel/wavefront_alignment_executor.h create mode 100644 porechop/include/seqan/align_parallel/wavefront_alignment_result.h create mode 100644 porechop/include/seqan/align_parallel/wavefront_alignment_scheduler.h create mode 100644 porechop/include/seqan/align_parallel/wavefront_alignment_task.h create mode 100644 porechop/include/seqan/align_parallel/wavefront_alignment_thread_local_storage.h create mode 100644 porechop/include/seqan/align_parallel/wavefront_task.h create mode 100644 porechop/include/seqan/align_parallel/wavefront_task_event.h create mode 100644 porechop/include/seqan/align_parallel/wavefront_task_executor.h create mode 100644 porechop/include/seqan/align_parallel/wavefront_task_queue.h create mode 100644 porechop/include/seqan/align_parallel/wavefront_task_scheduler.h create mode 100644 porechop/include/seqan/align_parallel/wavefront_task_util.h create mode 100644 porechop/include/seqan/simd/simd_base.h create mode 100644 porechop/include/seqan/simd/simd_base_seqan_impl.h create mode 100644 porechop/include/seqan/simd/simd_base_seqan_impl_avx2.h create mode 100644 porechop/include/seqan/simd/simd_base_seqan_impl_avx512.h create mode 100644 porechop/include/seqan/simd/simd_base_seqan_impl_sse4.2.h create mode 100644 porechop/include/seqan/simd/simd_base_seqan_interface.h create mode 100644 porechop/include/seqan/simd/simd_base_umesimd_impl.h diff --git a/porechop/include/seqan/align_parallel/async_wave_execution_interface.h b/porechop/include/seqan/align_parallel/async_wave_execution_interface.h new file mode 100644 index 0000000..3ebb98a --- /dev/null +++ b/porechop/include/seqan/align_parallel/async_wave_execution_interface.h @@ -0,0 +1,252 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2018, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: Rene Rahn +// ========================================================================== + +#ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_ASYNC_WAVE_EXECUTION_INTERFACE_H_ +#define INCLUDE_SEQAN_ALIGN_PARALLEL_ASYNC_WAVE_EXECUTION_INTERFACE_H_ + +namespace seqan +{ +namespace impl +{ + +// ============================================================================ +// Forwards +// ============================================================================ + +// ============================================================================ +// Tags, Classes, Enums +// ============================================================================ + +/* + * @class AsyncWaveAlignExecutor + * @brief Executor of the wave-front alignment mode with no SIMD vectorization. + * @headerfile + * + * Manges shared data for the wave-front execution before executing the alignments. + */ +template +class AsyncWaveAlignExecutor +{ +public: + + using TAlignmentTask = WavefrontAlignmentTask; + using TThreadLocal = typename WavefrontAlignmentTaskConfig::TThreadLocal; + using TStorage = EnumerableThreadLocal; + using TExecutor = WavefrontAlignmentExecutor; + + TSettings _settings; + // Initialize the alignment scheduler. + WavefrontAlignmentScheduler _alignScheduler; + + TStorage _threadLocalStorage{}; + TExecutor _executor{}; + unsigned _alignCounter{0}; + unsigned _blockSize{}; + + template + AsyncWaveAlignExecutor(TSettings settings, + ExecutionPolicy, Serial> const & execPolicy) : + _settings(std::move(settings)), + _alignScheduler(parallelAlignments(execPolicy), numThreads(execPolicy)), + _threadLocalStorage(TThreadLocal{parallelAlignments(execPolicy)}), + _blockSize(blockSize(execPolicy)) + { + _executor.ptrTaskScheduler = &taskScheduler(_alignScheduler); + _executor.ptrThreadLocal = &_threadLocalStorage; + setCount(storageManager(_threadLocalStorage), numThreads(execPolicy)); + } +}; + +/* + * @fn AsyncWaveAlignExecutor#submit + * @brief Submits a new alignment job asynchronosly. + */ +template +inline void +submit(AsyncWaveAlignExecutor & me, + TSeqH const & seqH, + TSeqV const & seqV, + TCallable && callback) +{ + using TAlignTask = typename AsyncWaveAlignExecutor::TAlignmentTask; + + std::function f = + [&, func = TAlignTask{me._alignCounter++, seqH, seqV, me._settings, me._blockSize}](uint16_t id) mutable + { + func(id, me._executor, std::forward(callback)); + }; + scheduleTask(me._alignScheduler, f); +} + +/* + * @fn AsyncWaveAlignExecutor#wait + * @brief Explicit barrier to wait for all submitted jobs to be finished. + */ +template +inline void +wait(AsyncWaveAlignExecutor & me) +{ + notify(me._alignScheduler); + wait(me._alignScheduler); +} + +/* + * @class AsyncWaveAlignExecutorSimd + * @brief Executor of the wave-front alignment mode with SIMD vectorization. + * @headerfile + * + * Manges shared data for the wave-front execution before executing the alignments. + */ +#ifdef SEQAN_SIMD_ENABLED +template +class AsyncWaveAlignExecutorSimd +{ +public: + + // Translate dp settings into simd settings. + using TSimdSettings = SimdDPSettings; + + using TAlignmentTask = WavefrontAlignmentTask>; + using TWavefrontTask = WavefrontTask; + using TSimdTaskQueue = WavefrontTaskQueue::VALUE>; + + using TThreadLocal = typename WavefrontAlignmentSimdTaskConfig::TThreadLocal; + using TStorage = EnumerableThreadLocal; + using TExecutor = WavefrontAlignmentExecutor; + + + TSimdSettings _settings; + // Initialize the alignment scheduler. + WavefrontAlignmentScheduler _alignScheduler; + + TStorage _threadLocalStorage; + TExecutor _executor{}; + TSimdTaskQueue _simdTaskQueue{}; + unsigned _alignCounter{0}; + unsigned _blockSize{}; + + template + AsyncWaveAlignExecutorSimd(TSettings const & settings, + ExecutionPolicy, Vectorial> const & execPolicy) : + _settings(settings.scoringScheme), + _alignScheduler(parallelAlignments(execPolicy), numThreads(execPolicy)), + _threadLocalStorage(TThreadLocal{parallelAlignments(execPolicy)}), + _blockSize(blockSize(execPolicy)) + { + _executor.ptrTaskScheduler = &taskScheduler(_alignScheduler); + _executor.ptrThreadLocal = &_threadLocalStorage; + setCount(storageManager(_threadLocalStorage), numThreads(execPolicy)); + } +}; + +/* + * @fn AsyncWaveAlignExecutorSimd#submit + * @brief Submits a new alignment job asynchronosly. + */ +template +inline void +submit(AsyncWaveAlignExecutorSimd & me, + TSeqH const & seqH, + TSeqV const & seqV, + TCallable && callback) +{ + using TAlignTask = typename AsyncWaveAlignExecutorSimd::TAlignmentTask; + + // Continuator for calling the alignment instance functor. + std::function f = + [&, func = TAlignTask{me._alignCounter++, seqH, seqV, me._settings, me._blockSize}](uint16_t id) mutable + { + func(id, me._executor, me._simdTaskQueue, std::forward(callback)); + }; + scheduleTask(me._alignScheduler, f); +} + +/* + * @fn AsyncWaveAlignExecutorSimd#wait + * @brief Explicit barrier to wait for all submitted jobs to be finished. + */ +template +inline void +wait(AsyncWaveAlignExecutorSimd & me) +{ + notify(me._alignScheduler); + wait2(me._alignScheduler, me._simdTaskQueue); +} +#endif // SEQAN_SIMD_ENABLED + +/* + * @fn alignExecBatch + * @brief Global interface for scheduling and running all alignment jobs with wave-front model. + */ +template +inline void +alignExecBatch(ExecutionPolicy, TSimdSpec> const & execPolicy, + TSetH const & setH, + TSetV const & setV, + TSettings const & settings, + TCallable && callback) +{ + using TSeqH = typename Value::Type; + using TSeqV = typename Value::Type; + +#ifdef SEQAN_SIMD_ENABLED + using TExecutor = std::conditional_t::value, + AsyncWaveAlignExecutorSimd, + AsyncWaveAlignExecutor>; +#else + using TExecutor = AsyncWaveAlignExecutor; +#endif + TExecutor executor(settings, execPolicy); + + for (size_t i = 0u; i < length(setH); ++i) + { + submit(executor, setH[i], setV[i], std::forward(callback)); + } + wait(executor); +} + +} // namespace impl +} // namespace seqan +#endif // INCLUDE_SEQAN_ALIGN_PARALLEL_ASYNC_WAVE_EXECUTION_INTERFACE_H_ diff --git a/porechop/include/seqan/align_parallel/dp_kernel_adaptor.h b/porechop/include/seqan/align_parallel/dp_kernel_adaptor.h new file mode 100644 index 0000000..e4a3eb8 --- /dev/null +++ b/porechop/include/seqan/align_parallel/dp_kernel_adaptor.h @@ -0,0 +1,343 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2018, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: Rene Rahn +// ========================================================================== + +#ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_DP_KERNEL_ADAPTOR_H_ +#define INCLUDE_SEQAN_ALIGN_PARALLEL_DP_KERNEL_ADAPTOR_H_ + +namespace seqan +{ + +// ============================================================================ +// Forwards +// ============================================================================ + +// ============================================================================ +// Tags, Classes, Enums +// ============================================================================ + +// ============================================================================ +// Metafunctions +// ============================================================================ + +template +struct CorrectLastColumn_ : False +{}; + +template <> +struct CorrectLastColumn_ : True +{}; + +template +struct CorrectLastRow_ : False +{}; + +template <> +struct CorrectLastRow_ : True +{}; + +// ============================================================================ +// Functions +// ============================================================================ + +// ---------------------------------------------------------------------------- +// Function _computeCell(); InitialCol; +// ---------------------------------------------------------------------------- + +// The _computeCell function is the basic interface that is called to comute +// the score for each cell and to store the corresponding traceback. +// The MetaColumnDescriptor and the CellDescriptor describe which cell in the dp matrix +// is computed. We use this information to overload the functions in order +// to initialize from the passed buffer and to store the last row/column in the buffer. + +// Vertical initialization values are copied from buffer. +template +inline void +_computeCell(TDPScout & scout, + TTraceMatrixNavigator & traceMatrixNavigator, + TDPCell & current, + TDPCell & /*cacheDiag*/, + TDPCell const & /*cacheHori*/, + TDPCell & /*cacheVert*/, + TSequenceHValue const & /*seqHVal*/, + TSequenceVValue const & /*seqVVal*/, + TScoringScheme const & /*scoringScheme*/, + MetaColumnDescriptor const &, + TCellDescriptor const &, // One of FirstCell, InnerCell or LastCell. + DPProfile_ const &) +{ + typedef DPProfile_ TDPProfile; + typedef DPMetaColumn_ > TMetaColumn; + + current = (*scout.state.ptrVerBuffer)[scout.verticalPos].i1; + assignValue(traceMatrixNavigator, (*scout.state.ptrVerBuffer)[scout.verticalPos].i2); + + if (TrackingEnabled_::VALUE) + { + _scoutBestScore(scout, current, traceMatrixNavigator, False(), False()); + } +} + +// ---------------------------------------------------------------------------- +// Function _computeCell(); InnerCol; FirstCell +// ---------------------------------------------------------------------------- + +// Horizontal initialization values are copied from buffer for all first cells. +template +inline void +_computeCell(TDPScout & scout, + TTraceMatrixNavigator & traceMatrixNavigator, + TDPCell & current, + TDPCell & cacheDiag, + TDPCell const & cacheHori, + TDPCell & cacheVert, + TSequenceHValue const & /*seqHVal*/, + TSequenceVValue const & /*seqVVal*/, + TScoringScheme const & /*scoringScheme*/, + MetaColumnDescriptor const &, + FirstCell const &, // One of FirstCell, InnerCell or LastCell. + DPProfile_ const &) +{ + _scoreOfCell(cacheDiag) = _scoreOfCell(cacheHori); + current = (*scout.state.ptrHorBuffer)[scout.horizontalPos - 1].i1; + cacheVert = current; + assignValue(traceMatrixNavigator, (*scout.state.ptrHorBuffer)[scout.horizontalPos - 1].i2); +} + +// ---------------------------------------------------------------------------- +// Function _computeCell(); InnerCol; LastCell +// ---------------------------------------------------------------------------- + +// Values of last call are copied into the horizontal buffer for initializing next tile below. +template +inline void +_computeCell(TDPScout & scout, + TTraceMatrixNavigator & traceMatrixNavigator, + TDPCell & current, + TDPCell & cacheDiag, + TDPCell const & cacheHori, + TDPCell & cacheVert, + TSequenceHValue const & seqHVal, + TSequenceVValue const & seqVVal, + TScoringScheme const & scoringScheme, + MetaColumnDescriptor const &, + LastCell const & /*cellDescriptor*/, + DPProfile_ const &) +{ + typedef DPProfile_ TDPProfile; + typedef DPMetaColumn_ > TMetaColumn; + + assignValue(traceMatrixNavigator, + _computeScore(current, cacheDiag, cacheHori, cacheVert, seqHVal, seqVVal, + scoringScheme, typename RecursionDirection_::Type(), + TDPProfile())); + // Copy values into horizontal buffer for the tile below this tile in vertical direction. + // TODO(rrahn): We need to do this only for affine gaps? + _setVerticalScoreOfCell(current, _verticalScoreOfCell(cacheVert)); + (*scout.state.ptrHorBuffer)[scout.horizontalPos - 1].i1 = current; + if (IsTracebackEnabled_::VALUE) + { + (*scout.state.ptrHorBuffer)[scout.horizontalPos - 1].i2 = value(traceMatrixNavigator); + } + + if (TrackingEnabled_::VALUE) + { + _scoutBestScore(scout, current, traceMatrixNavigator, False(), True()); + } +} + + +// ---------------------------------------------------------------------------- +// Function _computeCell(); FinalCol; FirstCell +// ---------------------------------------------------------------------------- + +// Horizontal initialization values are copied from buffer for all first cells. +// Vertical buffer is filled with value. +template +inline void +_computeCell(TDPScout & scout, + TTraceMatrixNavigator & traceMatrixNavigator, + TDPCell & current, + TDPCell & cacheDiag, + TDPCell const & cacheHori, + TDPCell & cacheVert, + TSequenceHValue const & /*seqHVal*/, + TSequenceVValue const & /*seqVVal*/, + TScoringScheme const & /*scoringScheme*/, + MetaColumnDescriptor const &, + FirstCell const &, // One of FirstCell, InnerCell or LastCell. + DPProfile_ const &) +{ + typedef DPProfile_ TDPProfile; + typedef DPMetaColumn_ > TMetaColumn; + + // cache previous diagonal. + _scoreOfCell(cacheDiag) = _scoreOfCell(cacheHori); + current = + front(*scout.state.ptrVerBuffer).i1 = (*scout.state.ptrHorBuffer)[scout.horizontalPos - 1].i1; // Copy horizontal buffer value in active cell and in + assignValue(traceMatrixNavigator, (*scout.state.ptrHorBuffer)[scout.horizontalPos - 1].i2); + cacheVert = current; + if (IsTracebackEnabled_::VALUE) + { + front(*scout.state.ptrVerBuffer).i2 = value(traceMatrixNavigator); // Store trace value in vertical buffer. + } + + if (TrackingEnabled_::VALUE) + { + _scoutBestScore(scout, current, traceMatrixNavigator, True(), False()); + } +} + +// ---------------------------------------------------------------------------- +// Function _computeCell(); FinalCol, InnerCell; +// ---------------------------------------------------------------------------- + +// Stores computed values in vertical buffer for initializing next tile right of the current. +template +inline void +_computeCell(TDPScout & scout, + TTraceMatrixNavigator & traceMatrixNavigator, + TDPCell & current, + TDPCell & cacheDiag, + TDPCell const & cacheHori, + TDPCell & cacheVert, + TSequenceHValue const & seqHVal, + TSequenceVValue const & seqVVal, + TScoringScheme const & scoringScheme, + MetaColumnDescriptor const &, + InnerCell const &, + DPProfile_ const &) +{ + typedef DPProfile_ TDPProfile; + typedef DPMetaColumn_ > TMetaColumn; + + assignValue(traceMatrixNavigator, + _computeScore(current, cacheDiag, cacheHori, cacheVert, seqHVal, seqVVal, + scoringScheme, typename RecursionDirection_::Type(), + TDPProfile())); + // Store values in vertical buffer. + _setVerticalScoreOfCell(current, _verticalScoreOfCell(cacheVert)); + (*scout.state.ptrVerBuffer)[scout.verticalPos].i1 = current; + if (IsTracebackEnabled_::VALUE) + { + (*scout.state.ptrVerBuffer)[scout.verticalPos].i2 = value(traceMatrixNavigator); + } + + if (TrackingEnabled_::VALUE) + { + _scoutBestScore(scout, current, traceMatrixNavigator, True(), False()); + } +} + +// ---------------------------------------------------------------------------- +// Function _computeCell(); FinalCol, LastCell; +// ---------------------------------------------------------------------------- + +// Stores computed values in vertical buffer for initializing next tile right of the current. +// Stores computed values in horizontal buffer for initializing next tile below. +template +inline void +_computeCell(TDPScout & scout, + TTraceMatrixNavigator & traceMatrixNavigator, + TDPCell & current, + TDPCell & cacheDiag, + TDPCell const & cacheHori, + TDPCell & cacheVert, + TSequenceHValue const & seqHVal, + TSequenceVValue const & seqVVal, + TScoringScheme const & scoringScheme, + MetaColumnDescriptor const &, + LastCell const &, + DPProfile_ const &) +{ + typedef DPProfile_ TDPProfile; + typedef DPMetaColumn_ > TMetaColumn; + + assignValue(traceMatrixNavigator, + _computeScore(current, cacheDiag, cacheHori, cacheVert, seqHVal, seqVVal, + scoringScheme, typename RecursionDirection_::Type(), + TDPProfile())); + // Store values in vertical and horizontal buffer + _setVerticalScoreOfCell(current, _verticalScoreOfCell(cacheVert)); + (*scout.state.ptrHorBuffer)[scout.horizontalPos - 1].i1 = (*scout.state.ptrVerBuffer)[scout.verticalPos].i1 = current; + if (IsTracebackEnabled_::VALUE) + { + (*scout.state.ptrHorBuffer)[scout.horizontalPos - 1].i2 = + (*scout.state.ptrVerBuffer)[scout.verticalPos].i2 = value(traceMatrixNavigator); + } + if (TrackingEnabled_::VALUE) + { + _scoutBestScore(scout, current, traceMatrixNavigator, True(), True()); + } +} + +} // namespace seqan + +#endif // #ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_DP_KERNEL_ADAPTOR_H_ diff --git a/porechop/include/seqan/align_parallel/dp_parallel_execution_policies.h b/porechop/include/seqan/align_parallel/dp_parallel_execution_policies.h new file mode 100644 index 0000000..a2885ec --- /dev/null +++ b/porechop/include/seqan/align_parallel/dp_parallel_execution_policies.h @@ -0,0 +1,174 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2018, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: Rene Rahn +// ========================================================================== +// Policies used for parallel alignment computation. +// ========================================================================== + +#ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_DP_PARALLEL_EXECUTION_PLOCIES_H_ +#define INCLUDE_SEQAN_ALIGN_PARALLEL_DP_PARALLEL_EXECUTION_PLOCIES_H_ + +namespace seqan +{ +// ============================================================================ +// Forwards +// ============================================================================ + +// ============================================================================ +// Tags, Classes, Enums +// ============================================================================ + +/*! + * @tag BlockOffsetOptimization + * @brief Optimization for vectorized wave-front execution model. + * @headerfile + * @see WavefrontExecutionPolicy + */ +struct BlockOffsetOptimization_; +using BlockOffsetOptimization = Tag; + +/*! + * @class WavefrontExecutionPolicy + * @headerfile + * @extends ExecutionPolicy + * @brief Policy to select runtime execution mode for algorithms. + * @signature template + * struct ExecutionPolicy, TVectorizationMode>; + * @tparam TWaveSpec Type specializing the wave-front threading model. + * Can be void (default) or @link BlockOffsetOptimization @endlink. + * @tparam TVectorizationMode Type specifying the vectorization model. + * Can be @link ParallelismTags#Vectorial @endlink or @link ParallelismTags#Serial @endlink (default). + * + * Special execution policy for computing sequence alignments with wave-front parallelization strategy. + * In the wave-front execution the DP matrix is partitioned into blocks which can be executed + * in parallel along the minor diagonal of the DP matrix. + * The execution policy can be further specialized if used in combination with the @link ParallelismTags#Vectorial @endlink + * execution mode (see @link WavefrontExecutionPolicy @endlink). + * + * @section Vectorization + * + * In the vectorization mode, the blocks are gathered into SIMD registers. + * The @link BlockOffsetOptimization @endlink can be used to always ensure that sizeof(SIMD) / 2 many blocks + * can be packed into one SIMD register. + * This requires, that the available instruction set supports 16 bit packed SIMD operations (e.g. SSE4, AVX2) + * and the score value type (@link Score @endlink) is bigger then 16 bit. + * In the default mode, the optimization is disabled and the number of packed alignment blocks is solely determined by + * the score value type passed to the algorithm as a parameter (e.g. see @link globalAlignmentScore @endlink). + */ + template + struct WavefrontAlignment; + +template +struct ExecutionPolicy, TVectorizationSpec> : + public ExecutionPolicy +{ + /*! + *@var size_t WavefrontExecutionPolicy::blockSize + * @brief The size of the blocks to use. Defaults to 100. + */ + size_t blockSize{100}; + /*! + * @var size_t WavefrontExecutionPolicy::parallelAlignments + * @brief Number of alignments scheduled concurrently. Defaults to std::thread::hardware_concurrency(). + */ + size_t parallelAlignments{std::thread::hardware_concurrency()}; +}; + +// ============================================================================ +// Metafunctions +// ============================================================================ + +// ============================================================================ +// Functions +// ============================================================================ + +/*! + * @fn WavefrontExecutionPolicy#blockSize + * @brief Getter for the current block size. + * @signature size_t blockSize(exec); + * @param[in] exec The wave-front execution policy to query. + */ +template +inline auto +blockSize(ExecutionPolicy, TVectorizationSpec> const & p) +{ + return p.blockSize; +} + +/*! + * @fn WavefrontExecutionPolicy#setBlockSize + * @brief Setter for the current block size. + * @signature void setBlockSize(exec, bs); + * @param[in,out] exec The wave-front execution policy to update. + * @param[in] bs The new block size to set. Must be a positive integral number greater or equal than 5. + */ +template +inline void +setBlockSize(ExecutionPolicy, TVectorizationSpec> & p, + size_t const bs) +{ + SEQAN_ASSERT_GEQ(bs, static_cast(5)); + p.blockSize = bs; +} + +/*! + * @fn WavefrontExecutionPolicy#parallelAlignments + * @brief Getter for the current number of alignments executed in parallel. + * @signature void parallelAlignments(exec); + * @param[in] exec The wave-front execution policy to update. + */ +template +inline auto +parallelAlignments(ExecutionPolicy, TVectorizationSpec> const & p) +{ + return p.parallelAlignments; +} + +/*! + * @fn WavefrontExecutionPolicy#setParallelAlignments + * @brief Setter for the current number of alignments executed in parallel. + * @signature void setParallelAlignments(exec, pa); + * @param[in,out] exec The wave-front execution policy to update. + * @param[in] pa The number of alignments to execute in parallel. Must be a positive integral number greater than 0. + */ +template +inline void +setParallelAlignments(ExecutionPolicy, TVectorizationSpec> & p, + size_t const pi) +{ + SEQAN_ASSERT_GT(pi, static_cast(0)); + p.parallelAlignments = pi; +} + +} // namespace seqan + +#endif // INCLUDE_SEQAN_ALIGN_PARALLEL_DP_PARALLEL_EXECUTION_PLOCIES_H_ diff --git a/porechop/include/seqan/align_parallel/dp_parallel_scout.h b/porechop/include/seqan/align_parallel/dp_parallel_scout.h new file mode 100644 index 0000000..58a238d --- /dev/null +++ b/porechop/include/seqan/align_parallel/dp_parallel_scout.h @@ -0,0 +1,263 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2018, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: Rene Rahn +// ========================================================================== + +#ifndef INCLUDE_SEQAN_DP_PARALLEL_DP_PARALLEL_SCOUT_H_ +#define INCLUDE_SEQAN_DP_PARALLEL_DP_PARALLEL_SCOUT_H_ + +namespace seqan +{ + +// ============================================================================ +// Forwards +// ============================================================================ + +// ============================================================================ +// Tags, Classes, Enums +// ============================================================================ + +// ---------------------------------------------------------------------------- +// Class DPTileBuffer +// ---------------------------------------------------------------------------- + +// The structure owning the horizontal/vertical buffer. +template > +struct DPTileBuffer +{ + TBuffer horizontalBuffer; + TBuffer verticalBuffer; +}; + +// ---------------------------------------------------------------------------- +// Tag DPTiled +// ---------------------------------------------------------------------------- + +// Tag used to subclass DPScoutState and DPScout. +// T represents the buffer type. +template +struct DPTiled; + +// ---------------------------------------------------------------------------- +// Class DPScoutState_; DPTiled +// ---------------------------------------------------------------------------- + +// The overloaded DPScoutState which simply stores the pointers to the corresponding buffer. +template +class DPScoutState_ > +{ +public: + + using TDPCell = typename Value::Type, 1>::Type; + + TBuffer* ptrHorBuffer = nullptr; + TBuffer* ptrVerBuffer = nullptr; + TThreadContext threadContext{}; + + DPScoutState_() = default; + + DPScoutState_(TBuffer & horBuffer, TBuffer & verBuffer) : + ptrHorBuffer(&horBuffer), + ptrVerBuffer(&verBuffer) + {} + + DPScoutState_(TBuffer & horBuffer, TBuffer & verBuffer, TThreadContext pThreadContext) : + ptrHorBuffer(&horBuffer), + ptrVerBuffer(&verBuffer), + threadContext(std::move(pThreadContext)) + {} +}; + +// ---------------------------------------------------------------------------- +// Class DPScout_; DPTiled +// ---------------------------------------------------------------------------- + +// Overloaded DPScout to store the corresponding buffer for the current dp tile. +template +class DPScout_ > : + public DPScout_ +{ +public: + using TBase = DPScout_; + + DPScoutState_ > state; + + size_t horizontalPos; + size_t verticalPos; + bool forceTracking; + + DPScout_(DPScoutState_ > state, + bool pForceTracking = false) : + TBase(), + state(state), + forceTracking(pForceTracking) + {} +}; + +// ============================================================================ +// Metafunctions +// ============================================================================ + +// ---------------------------------------------------------------------------- +// Metafunction ScoutSpecForSimdAlignment_ +// ---------------------------------------------------------------------------- + +template +struct ScoutSpecForAlignmentAlgorithm_ > > +{ + using Type = DPTiled; +}; + +// ============================================================================ +// Functions +// ============================================================================ + +template +inline bool +isTrackingEnabled(DPScout_ > const & /*dpScout*/, + TIsLastColumn const & /*unused*/, + TIsLastRow const & /*unused*/) +{ + return false; +} + +template +inline bool +isTrackingEnabled(DPScout_ > const & dpScout, + True const & /*unused*/, + True const & /*unused*/) +{ + return (dpScout.forceTracking || (dpScout.state.threadContext.task._lastHBlock && + dpScout.state.threadContext.task._lastVBlock)); +} + +template +inline bool +isTrackingEnabled(DPScout_ > const & dpScout, + True const & /*unused*/, + False const & /*unused*/) +{ + return (dpScout.forceTracking || dpScout.state.threadContext.task._lastHBlock); +} + +template +inline bool +isTrackingEnabled(DPScout_ > const & dpScout, + False const & /*unused*/, + True const & /*unused*/) +{ + return (dpScout.forceTracking || dpScout.state.threadContext.task._lastVBlock); +} + +// ---------------------------------------------------------------------------- +// Function _scoutBestScore() +// ---------------------------------------------------------------------------- + +template +inline void +_scoutBestScore(DPScout_ > & dpScout, + TDPCell const & activeCell, + TTraceMatrixNavigator const & navigator, + TIsLastColumn const & isLastColumn, + TIsLastRow const & isLastRow) +{ + using TBaseScout = typename DPScout_ >::TBase; + _scoutBestScore(static_cast(dpScout), activeCell, navigator, isLastColumn, isLastRow); +} + +// Tracks the new score, if it is the new maximum. +template +inline void +_scoutBestScore(DPScout_ > & dpScout, + TDPCell const & activeCell, + TTraceMatrixNavigator const & navigator, + TIsLastColumn const & isLastColumn, + TIsLastRow const & isLastRow) +{ + using TBaseScout = typename DPScout_ >::TBase; + if (isTrackingEnabled(dpScout, isLastColumn, isLastRow)) + _scoutBestScore(static_cast(dpScout), activeCell, navigator, isLastColumn, isLastRow); +} + +// ---------------------------------------------------------------------------- +// Function _preInitScoutHorizontal() +// ---------------------------------------------------------------------------- + +template +inline void +_preInitScoutHorizontal(DPScout_ > & scout) +{ + scout.horizontalPos = 0; +} + +// ---------------------------------------------------------------------------- +// Function _preInitScoutVertical() +// ---------------------------------------------------------------------------- + +template +inline void +_preInitScoutVertical(DPScout_ > & scout) +{ + scout.verticalPos = 0; +} + +// ---------------------------------------------------------------------------- +// Function _incHorizontalPos() +// ---------------------------------------------------------------------------- + +template +inline void +_incHorizontalPos(DPScout_ > & scout) +{ + ++scout.horizontalPos; +} + +// ---------------------------------------------------------------------------- +// Function _incVerticalPos() +// ---------------------------------------------------------------------------- + +template +inline void +_incVerticalPos(DPScout_ > & scout) +{ + ++scout.verticalPos; +} + +} // namespace seqan + +#endif // #ifndef INCLUDE_SEQAN_DP_PARALLEL_DP_PARALLEL_SCOUT_H_ diff --git a/porechop/include/seqan/align_parallel/dp_parallel_scout_simd.h b/porechop/include/seqan/align_parallel/dp_parallel_scout_simd.h new file mode 100644 index 0000000..4d977e1 --- /dev/null +++ b/porechop/include/seqan/align_parallel/dp_parallel_scout_simd.h @@ -0,0 +1,362 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2018, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: Rene Rahn +// ========================================================================== + +#ifndef INCLUDE_SEQAN_DP_PARALLEL_DP_PARALLEL_SCOUT_SIMD_H_ +#define INCLUDE_SEQAN_DP_PARALLEL_DP_PARALLEL_SCOUT_SIMD_H_ + +namespace seqan +{ + +// ============================================================================ +// Forwards +// ============================================================================ + +// ============================================================================ +// Tags, Classes, Enums +// ============================================================================ + +// ---------------------------------------------------------------------------- +// Class DPScoutState_; DPTiled +// ---------------------------------------------------------------------------- + +// The overloaded DPScoutState which simply stores the pointers to the corresponding buffer. +template +class DPScoutState_ > : + public DPScoutState_ >, + public DPScoutState_ +{ +public: + + DPScoutState_() = default; + + DPScoutState_(TBuffer & horBuffer, TBuffer & verBuffer) : + DPScoutState_ >(horBuffer, verBuffer), + DPScoutState_() + {} + + DPScoutState_(TBuffer & horBuffer, TBuffer & verBuffer, TThreadContext && pThreadContext) : + DPScoutState_ >(horBuffer, verBuffer, std::move(pThreadContext)), + DPScoutState_() + {} +}; + +// ---------------------------------------------------------------------------- +// Class DPScout_; DPTiled +// ---------------------------------------------------------------------------- + +// Overloaded DPScout to store the corresponding buffer for the current dp tile. +template +class DPScout_ > > : + public DPScout_> +{ +public: + using TBase = DPScout_ >; + + DPScoutState_ > state; + size_t horizontalPos; + size_t verticalPos; + bool forceTracking; + + DPScout_(DPScoutState_ > & state, + bool const pForceTracking) : + TBase(static_cast&>(state)), + state(state), + forceTracking(pForceTracking) + {} + + DPScout_(DPScoutState_ > & state) : DPScout_(state, false) + {} +}; + +// ============================================================================ +// Metafunctions +// ============================================================================ + +// ---------------------------------------------------------------------------- +// Metafunction ScoutSpecForSimdAlignment_ +// ---------------------------------------------------------------------------- + +template +struct ScoutSpecForAlignmentAlgorithm_ > > +{ + using Type = DPTiled >; +}; + +template +struct ScoutSpecForAlignmentAlgorithm_ > > > +{ + using Type = DPTiled > >; +}; + +// ============================================================================ +// Functions +// ============================================================================ + +// ---------------------------------------------------------------------------- +// Function isTrackingEnabled() +// ---------------------------------------------------------------------------- + +template +inline bool +isTrackingEnabled(DPScout_ > > const & dpScout, + True const & /*unused*/, + True const & /*unused*/) +{ + // TODO(rrahn): Implement me! + return (dpScout.forceTracking); +} + +template +inline bool +isTrackingEnabled(DPScout_ > > const & dpScout, + True const & /*unused*/, + False const & /*unused*/) +{ + // TODO(rrahn): Implement me! + return (dpScout.forceTracking); +} + +template +inline bool +isTrackingEnabled(DPScout_ > > const & dpScout, + False const & /*unused*/, + True const & /*unused*/) +{ + // TODO(rrahn): Implement me! + return (dpScout.forceTracking); +} + +// ---------------------------------------------------------------------------- +// Function _scoutBestScore() +// ---------------------------------------------------------------------------- + +template +inline void +_scoutBestScore(DPScout_ > > & dpScout, + TDPCell const & activeCell, + TTraceMatrixNavigator const & navigator, + TIsLastColumn const & isLastColumn, + TIsLastRow const & isLastRow) +{ + using TScoutBase = typename DPScout_>>::TBase; + _scoutBestScore(static_cast(dpScout), activeCell, navigator, isLastColumn, isLastRow); +} + +// ---------------------------------------------------------------------------- +// Function maxHostCoordinate() +// ---------------------------------------------------------------------------- + +template +inline auto +maxHostCoordinate(DPScout_ > > const & dpScout, + TDimension const dimension) +{ + using TScoutBase = typename DPScout_ > >::TBase; + return maxHostCoordinate(static_cast(dpScout), dimension); +} + +// ---------------------------------------------------------------------------- +// Function _setSimdLane() +// ---------------------------------------------------------------------------- + +template +inline void +_setSimdLane(DPScout_ > > & dpScout, + TPosition const pos) +{ + using TScoutBase = typename DPScout_ > >::TBase; + _setSimdLane(static_cast(dpScout), pos); +} + +// ---------------------------------------------------------------------------- +// Function _preInitScoutHorizontal() +// ---------------------------------------------------------------------------- + +template +inline void +_preInitScoutHorizontal(DPScout_ > > > & scout) +{ + using TScoutBase = typename DPScout_>>>::TBase; + _preInitScoutHorizontal(static_cast(scout)); + scout.horizontalPos = 0; +} + +// ---------------------------------------------------------------------------- +// Function _preInitScoutVertical() +// ---------------------------------------------------------------------------- + +template +inline void +_preInitScoutVertical(DPScout_>>> & scout) +{ + using TScoutBase = typename DPScout_>>>::TBase; + _preInitScoutVertical(static_cast(scout)); + scout.verticalPos = 0; +} + +// ---------------------------------------------------------------------------- +// Function _reachedHorizontalEndPoint() +// ---------------------------------------------------------------------------- + +template +inline bool +_reachedHorizontalEndPoint(DPScout_>>> & scout, + TIter const & hIt) +{ + using TScoutBase = typename DPScout_>>>::TBase; + return _reachedHorizontalEndPoint(static_cast(scout), hIt); +} + +// ---------------------------------------------------------------------------- +// Function _reachedVerticalEndPoint() +// ---------------------------------------------------------------------------- + +template +inline bool +_reachedVerticalEndPoint(DPScout_ > > > & scout, + TIter const & vIt) +{ + using TScoutBase = typename DPScout_ > > >::TBase; + return _reachedVerticalEndPoint(static_cast(scout), vIt); +} + +// ---------------------------------------------------------------------------- +// Function _nextHorizontalEndPos() +// ---------------------------------------------------------------------------- + +template +inline void +_nextHorizontalEndPos(DPScout_ > > > & scout) +{ + using TScoutBase = typename DPScout_ > > >::TBase; + _nextHorizontalEndPos(static_cast(scout)); +} + +// ---------------------------------------------------------------------------- +// Function _nextVerticalEndPos() +// ---------------------------------------------------------------------------- + +template +inline void +_nextVerticalEndPos(DPScout_ > > > & scout) +{ + using TScoutBase = typename DPScout_ > > >::TBase; + _nextVerticalEndPos(static_cast(scout)); +} + +// ---------------------------------------------------------------------------- +// Function _incHorizontalPos() +// ---------------------------------------------------------------------------- + +template +inline void +_incHorizontalPos(DPScout_ > > > & scout) +{ + using TScoutBase = typename DPScout_ > > >::TBase; + _incHorizontalPos(static_cast(scout)); + ++scout.horizontalPos; +} + +// ---------------------------------------------------------------------------- +// Function _incVerticalPos() +// ---------------------------------------------------------------------------- + +template +inline void +_incVerticalPos(DPScout_ > > > & scout) +{ + using TScoutBase = typename DPScout_ > > >::TBase; + _incVerticalPos(static_cast(scout)); + ++scout.verticalPos; +} + +} // namespace seqan + +#endif // #ifndef INCLUDE_SEQAN_DP_PARALLEL_DP_PARALLEL_SCOUT_SIMD_H_ diff --git a/porechop/include/seqan/align_parallel/dp_settings.h b/porechop/include/seqan/align_parallel/dp_settings.h new file mode 100644 index 0000000..6695391 --- /dev/null +++ b/porechop/include/seqan/align_parallel/dp_settings.h @@ -0,0 +1,109 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2018, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: Rene Rahn +// ========================================================================== + +#ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_DP_SETTINGS_H_ +#define INCLUDE_SEQAN_ALIGN_PARALLEL_DP_SETTINGS_H_ + +namespace seqan +{ + +// ============================================================================ +// Forwards +// ============================================================================ + +// ============================================================================ +// Tags, Classes, Enums +// ============================================================================ + +// Translates global function interface into setting struct. +template +struct DPSettings +{ + using TTraits = TDPTraits; + using TScoringScheme = TScoringScheme_; + using TBandConfig = DPBandConfig; + + TScoringScheme scoringScheme; + TBandConfig bandScheme; + + DPSettings() = default; + + explicit DPSettings(TScoringScheme score) : scoringScheme(std::move(score)) + {} +}; + +#ifdef SEQAN_SIMD_ENABLED +// Simd version of DP settings. +template +struct SimdDPSettings : public TDPSettings +{ + //------------------------------------------------------------------------- + // Member Types. + + using TTraits = typename TDPSettings::TTraits; + using TScoringScheme = typename TDPSettings::TScoringScheme; + using TScoreValue = typename Value::Type; + using TScoreValueSimd = typename SimdVector< + std::conditional_t::value, + int16_t, + TScoreValue>>::Type; + using TSimdScoringScheme = Score>; + + //------------------------------------------------------------------------- + // Members. + + TSimdScoringScheme simdScoringScheme; + + //------------------------------------------------------------------------- + // Constructor. + + SimdDPSettings() = default; + + explicit SimdDPSettings(TScoringScheme score) : + TDPSettings(std::move(score)), + simdScoringScheme(score) + {} +}; +#endif // SEQAN_SIMD_ENABLED +// ============================================================================ +// Metafunctions +// ============================================================================ + +// ============================================================================ +// Functions +// ============================================================================ + +} // namespace seqan + +#endif // INCLUDE_SEQAN_ALIGN_PARALLEL_DP_SETTINGS_H_ diff --git a/porechop/include/seqan/align_parallel/dp_traits.h b/porechop/include/seqan/align_parallel/dp_traits.h new file mode 100644 index 0000000..2606408 --- /dev/null +++ b/porechop/include/seqan/align_parallel/dp_traits.h @@ -0,0 +1,120 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2018, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: Rene Rahn +// ========================================================================== + +#ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_DP_TRAITS_H_ +#define INCLUDE_SEQAN_ALIGN_PARALLEL_DP_TRAITS_H_ + +namespace seqan +{ + +// ============================================================================ +// Forwards +// ============================================================================ + +// ============================================================================ +// Tags, Classes, Enums +// ============================================================================ + +// Traits for DP configuration. Currently used only internally. +struct DPTraits +{ + // Gocal alignment with linear gap costs. + struct GlobalLinear + { + // The algorithm to choose. + using TAlgorithmType = GlobalAlignment_<>; + // The Gaps to choos + using TGapType = LinearGaps; + // The Band to choose. + using TBandType = BandOff; + // The traceback. + using TTracebackType = TracebackOn>; + // The output to choose. + using TFormat = ArrayGaps; + }; + + // Global alignment with affine gap costs. + struct GlobalAffine : public GlobalLinear + { + using TGapType = AffineGaps; + }; + + // Global alignment with affine gap costs. + struct SemiGlobalLinear : public GlobalLinear + { + using TAlgorithmType = GlobalAlignment_>; + }; + + // Global alignment with affine gap costs. + struct SemiGlobalAffine : public GlobalAffine + { + using TAlgorithmType = GlobalAlignment_>; + }; + + // Banded global alignment with linear gap costs. + struct BandedGlobalLinear : public GlobalLinear + { + using TBandType = BandOn; + }; + + // Banded global alignment with affine gap costs. + struct BandedGlobalAffine : public BandedGlobalLinear + { + using TGapType = AffineGaps; + }; + + // Local alignment with linear gap costs. + struct LocalLinear : public GlobalLinear + { + using TAlgorithmType = LocalAlignment_<>; + }; + + // Local alignment with affine gap costs. + struct LocalAffine : public LocalLinear + { + using TGapType = AffineGaps; + }; +}; + +// ============================================================================ +// Metafunctions +// ============================================================================ + +// ============================================================================ +// Functions +// ============================================================================ + +} // namespace seqan + +#endif // INCLUDE_SEQAN_ALIGN_PARALLEL_DP_TRAITS_H_ diff --git a/porechop/include/seqan/align_parallel/parallel_align_interface.h b/porechop/include/seqan/align_parallel/parallel_align_interface.h new file mode 100644 index 0000000..764aa2a --- /dev/null +++ b/porechop/include/seqan/align_parallel/parallel_align_interface.h @@ -0,0 +1,366 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2018, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: Rene Rahn +// ========================================================================== + +#ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_ALIGN_INTERFACE_H_ +#define INCLUDE_SEQAN_ALIGN_PARALLEL_ALIGN_INTERFACE_H_ + +namespace seqan +{ + +// ============================================================================ +// Forwards +// ============================================================================ + +// ============================================================================ +// Tags, Classes, Enums +// ============================================================================ + +namespace impl +{ + +/* + * Executor class that implements the correct execution mode. + */ +struct ParallelAlignmentExecutor +{ + template + auto operator()(Sequential const & /*execPolicy*/, + TKernel && kernel, + TSetH const & setH, + TSetV const & setV, + TArgs && ...args) + { + SEQAN_ASSERT_EQ(length(setH), length(setV)); + + using TResult = decltype(kernel(setH, setV, std::forward(args)...)); + + TResult superSet; + resize(superSet, length(setH)); + + auto zipCont = makeZipView(setH, setV, superSet); +#ifdef DP_PARALLEL_SHOW_PROGRESS + ::impl::dp_parallel_progress::show_progress(length(setH)); +#endif // DP_PARALLEL_SHOW_PROGRESS + for (auto && pwInst : zipCont) + { + std::get<2>(pwInst) = kernel(std::get<0>(pwInst), std::get<1>(pwInst), std::forward(args)...); + } + return superSet; + } + + template + auto operator()(ExecutionPolicy const & /*execPolicy*/, + TKernel && kernel, + TSetH const & setH, + TArgs && ...args) + { +#ifdef DP_PARALLEL_SHOW_PROGRESS + ::impl::dp_parallel_progress::show_progress(length(setH)); +#endif // DP_PARALLEL_SHOW_PROGRESS + // Automaically chooses vectorized code, or falls back to sequential code. + return kernel(setH, std::forward(args)...); + } + + template + auto operator()(SEQAN_UNUSED ExecutionPolicy const & execPolicy, // maybe unused due to missing OMP support in clang. + TKernel && kernel, + TSetH const & setH, + TSetV const & setV, + TArgs && ...args) + + { + SEQAN_ASSERT_EQ(length(setH), length(setV)); + + using TPos = std::make_signed_t; + using TResult = decltype(kernel(setH, setV, std::forward(args)...)); + + TPos chunkSize = _min(static_cast(length(setH)), static_cast(256)); + String splitter; + computeSplitters(splitter, length(setH), static_cast(length(setH)/chunkSize)); + + std::vector superSet; + superSet.resize(length(splitter)); + +#ifdef DP_PARALLEL_SHOW_PROGRESS + ::impl::dp_parallel_progress::show_progress(length(setH)); +#endif // DP_PARALLEL_SHOW_PROGRESS + + SEQAN_OMP_PRAGMA(parallel for num_threads(numThreads(execPolicy)) schedule(guided)) + for (TPos job = 0; job < static_cast(length(splitter)) - 1; ++job) // TODO(rrahn): Why -1; Is there a bug in computeSplitters? + { + auto infSetH = infix(setH, splitter[job], splitter[job + 1]); + auto infSetV = infix(setV, splitter[job], splitter[job + 1]); + + superSet[job] = kernel(infSetH, infSetV, std::forward(args)...); + } + // Reduce the result. + TResult res; + resize(res, length(setH)); + auto it = begin(res, Standard()); + for (auto && set : superSet) + { + arrayMoveForward(begin(set, Standard()), end(set, Standard()), it); + it += length(set); + } + return res; + } + + template + auto operator()(ExecutionPolicy const & execPolicy, + TKernel && kernel, + TSetH const & setH, + TSetV const & setV, + TArgs && ...args) + + { + SEQAN_ASSERT_EQ(length(setH), length(setV)); + + using TPos = std::make_signed_t; + using TResult = decltype(kernel(setH, setV, std::forward(args)...)); + + Splitter splitter(0, length(setH), numThreads(execPolicy)); + + TResult superSet; + resize(superSet, length(setH)); + + auto zipCont = makeZipView(setH, setV, superSet); + +#ifdef DP_PARALLEL_SHOW_PROGRESS + ::impl::dp_parallel_progress::show_progress(length(setH)); +#endif // DP_PARALLEL_SHOW_PROGRESS + + SEQAN_OMP_PRAGMA(parallel for num_threads(length(splitter))) + for (TPos job = 0; job < static_cast(length(splitter)); ++job) + { + auto it = begin(zipCont, Standard()) + splitter[job]; + auto itEnd = begin(zipCont, Standard()) + splitter[job + 1]; + + // NOTE(marehr): auto && seqPair does not work, thus declaring the + // type explicitly, s.t. <=icpc 18.0.1 can compile the code (ticket + // #03204483) + using TSeqPair = decltype(*it); + std::for_each(it, itEnd, [&](TSeqPair && seqPair) + { + std::get<2>(seqPair) = kernel(std::get<0>(seqPair), std::get<1>(seqPair), std::forward(args)...); + }); + } + return superSet; + } +}; + +template , Serial>::value && + !std::is_same, Parallel>::value, + int> = 0> +inline auto +doWaveAlignment(ExecutionPolicy, TVectorizationPolicy> const & execPolicy, + TAlgorithmSpec const & /*tag*/, + TSetH const & setH, + TSetV const & setV, + TScore const & scoringScheme, + TArgs && .../*args*/) +{ + using TScoreValue = typename Value::Type; + + // The vector containing the scores. + std::vector res; + res.resize(length(setH)); + + auto dispatcher = [&res](auto && ...args) + { + alignExecBatch(std::forward(args)..., + [&res](auto const id, auto const score) + { + res[id] = score; + }); + }; + + // Differentiate between affine and linear gap costs. + // TODO(rrahn): Setup configuration cascade. + if (scoreGapOpen(scoringScheme) == scoreGapExtend(scoringScheme)) + { + struct DPConfigTraits + { + using TAlgorithmType SEQAN_UNUSED = TAlgorithmSpec; + using TGapType SEQAN_UNUSED = LinearGaps; + using TBandType SEQAN_UNUSED = BandOff; + using TTracebackType SEQAN_UNUSED = TracebackOff; + using TFormat SEQAN_UNUSED = ArrayGaps; + }; + + using TDPSettings = seqan::DPSettings; + + TDPSettings settings; + settings.scoringScheme = scoringScheme; + dispatcher(execPolicy, setH, setV, settings); + } + else + { + struct DPConfigTraits + { + using TAlgorithmType SEQAN_UNUSED = TAlgorithmSpec; + using TGapType SEQAN_UNUSED = AffineGaps; + using TBandType SEQAN_UNUSED = BandOff; + using TTracebackType SEQAN_UNUSED = TracebackOff; + using TFormat SEQAN_UNUSED = ArrayGaps; + }; + + using TDPSettings = seqan::DPSettings; + + TDPSettings settings; + settings.scoringScheme = scoringScheme; + dispatcher(execPolicy, setH, setV, settings); + } + return res; +} + +} // namespace impl + +// ============================================================================ +// Metafunctions +// ============================================================================ + +// ============================================================================ +// Functions +// ============================================================================ + +/* + * Wrapper functions for calling globalAlignmentScore and localAlignmentScore with an ExecutionPolicy. + * Note the parallel interfaces are documented as part of the standard documentation in seqan/align module. + */ +template ::value || + std::is_same::value, + int> = 0> +inline auto +globalAlignmentScore(ExecutionPolicy const & execPolicy, + TArgs && ...args) +{ + auto kernel = [](auto && ...args) + { + return globalAlignmentScore(std::forward(args)...); + }; + return impl::ParallelAlignmentExecutor{}(execPolicy, kernel, std::forward(args)...); +} + +template ::value || + std::is_same::value, + int> = 0> +inline auto +localAlignmentScore(ExecutionPolicy const & execPolicy, + TArgs && ...args) +{ + auto kernel = [](auto && ...args) + { + return localAlignmentScore(std::forward(args)...); + }; + return impl::ParallelAlignmentExecutor{}(execPolicy, kernel, std::forward(args)...); +} + +// Wavefront execution of globalAlignmentScore w/ config. +template , Serial>::value && + !std::is_same, Parallel>::value, + int> = 0> + +inline auto +globalAlignmentScore(ExecutionPolicy, TVectorizationPolicy> const & execPolicy, + TSetH const & setH, + TSetV const & setV, + TScore const & scoringScheme, + TConfig const & /*config*/) +{ + return impl::doWaveAlignment(execPolicy, + GlobalAlignment_::Type>{}, + setH, + setV, + scoringScheme); +} + +// Wavefront execution of globalAlignmentScore w/o config. +template , Serial>::value && + !std::is_same, Parallel>::value, + int> = 0> + +inline auto +globalAlignmentScore(ExecutionPolicy, TVectorizationPolicy> const & execPolicy, + TSetH const & setH, + TSetV const & setV, + TScore const & scoringScheme) +{ + return globalAlignmentScore(execPolicy, setH, setV, scoringScheme, AlignConfig<>{}); +} + +template , Serial>::value && + !std::is_same, Parallel>::value, + int> = 0> +inline auto +localAlignmentScore(ExecutionPolicy, TVectorizationPolicy> const & execPolicy, + TArgs && ...args) +{ + return impl::doWaveAlignment(execPolicy, LocalAlignment_<>{}, std::forward(args)...); +} + +} // namespace seqan + +#endif // INCLUDE_SEQAN_ALIGN_PARALLEL_ALIGN_INTERFACE_H_ diff --git a/porechop/include/seqan/align_parallel/wavefront_alignment_executor.h b/porechop/include/seqan/align_parallel/wavefront_alignment_executor.h new file mode 100644 index 0000000..00db124 --- /dev/null +++ b/porechop/include/seqan/align_parallel/wavefront_alignment_executor.h @@ -0,0 +1,98 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2018, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: Rene Rahn +// ========================================================================== + +#ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_ALIGNMENT_EXECUTOR_H_ +#define INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_ALIGNMENT_EXECUTOR_H_ + +namespace seqan +{ + +// ============================================================================ +// Forwards +// ============================================================================ + +// ============================================================================ +// Tags, Classes, Enums +// ============================================================================ + +// Executor class for an alignment task in the wave-front model. +// Stores the scheduler and the thread local storage. +template +struct WavefrontAlignmentExecutor +{ + // Shared data in parallel context. + TScheduler * ptrTaskScheduler{nullptr}; + TThreadLocalStore * ptrThreadLocal{nullptr}; + + //NOTE(rrahn) Bug in g++-4.9 prevents us from using as aggregate type. + WavefrontAlignmentExecutor() = default; + + WavefrontAlignmentExecutor(TScheduler * _ptrScheduler, + TThreadLocalStore * _ptrTls) : + ptrTaskScheduler{_ptrScheduler}, + ptrThreadLocal(_ptrTls) + {} +}; + +// ============================================================================ +// Metafunctions +// ============================================================================ + +// ============================================================================ +// Functions +// ============================================================================ + +// Asynchronosly schedule a new alignment job. +template +inline void +spawn(WavefrontAlignmentExecutor & executor, + TTaskExecutor && taskExec) +{ + SEQAN_ASSERT(executor.ptrTaskScheduler != nullptr); + scheduleTask(*executor.ptrTaskScheduler, std::forward(taskExec)); +} + +// Access thread local storage. +template +inline auto & +local(WavefrontAlignmentExecutor & executor) +{ + SEQAN_ASSERT(executor.ptrThreadLocal != nullptr); + return local(*executor.ptrThreadLocal); +} + +} // namespace seqan + +#endif // #ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_ALIGNMENT_EXECUTOR_H_ diff --git a/porechop/include/seqan/align_parallel/wavefront_alignment_result.h b/porechop/include/seqan/align_parallel/wavefront_alignment_result.h new file mode 100644 index 0000000..e2e7900 --- /dev/null +++ b/porechop/include/seqan/align_parallel/wavefront_alignment_result.h @@ -0,0 +1,165 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2018, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: Rene Rahn +// ========================================================================== + +#ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_ALIGNMENT_RESULT_H_ +#define INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_ALIGNMENT_RESULT_H_ + +namespace seqan +{ + +// ============================================================================ +// Forwards +// ============================================================================ + +// ============================================================================ +// Tags, Classes, Enums +// ============================================================================ + +// The intermediate result stored by each thread independently. +// After an alignment has been finished, the intermediate results are reduced to a global result. +template +struct WavefrontAlignmentResult +{ + // ---------------------------------------------------------------------------- + // Member Types. + + using TState = std::pair; + + // ---------------------------------------------------------------------------- + // Member Variables + + TState _maxState{std::numeric_limits::min(), typename TTraits::THostPosition{}}; + size_t _tileCol{0}; + size_t _tileRow{0}; + + //NOTE(rrahn) Bug in g++-4.9 prevents us from using as aggregate type. + // ---------------------------------------------------------------------------- + // Constructors. + + // Note: Although, this could be an aggregate type, the icpc-17 crashes, + // when compiling without the defaulted constructor. + WavefrontAlignmentResult() = default; + + WavefrontAlignmentResult(TState const maxState) : + _maxState(std::move(maxState)) + {} + + WavefrontAlignmentResult(TState const maxState, size_t const tileCol, size_t const tileRow) : + _maxState(std::move(maxState)), + _tileCol(tileCol), + _tileRow(tileRow) + {} + + // ---------------------------------------------------------------------------- + // Member Functions. +}; + +// ============================================================================ +// Metafunctions +// ============================================================================ + +// ============================================================================ +// Functions +// ============================================================================ + +namespace impl +{ + +template +inline void +updateMax(TIntermediate & me, + TState const & state, + size_t const tileCol, + size_t const tileRow) +{ + if (state.first > me._maxState.first) + { + me._maxState = state; + me._tileCol = tileCol; + me._tileRow = tileRow; + } +} +} // namespace impl + +// Update the intermediate result if new optimum has been found. +template +inline void +updateMax(WavefrontAlignmentResult & me, + typename WavefrontAlignmentResult::TState const & state, + size_t const tileCol, + size_t const tileRow) +{ + impl::updateMax(me, state, tileCol, tileRow); +} + +template +inline void +updateMax(WavefrontAlignmentResult & lhs, + WavefrontAlignmentResult const & rhs) +{ + impl::updateMax(lhs, rhs._maxState, rhs._tileCol, rhs._tileRow); +} + +// Reset the intermediate result. +template +inline void +clear(WavefrontAlignmentResult & me) +{ + me = WavefrontAlignmentResult{}; +} + +// Get the intermediate result. +template +inline typename WavefrontAlignmentResult::TState const & +value(WavefrontAlignmentResult const & me) +{ + return me._maxState; +} + +// Swap two intermediate results. +template +inline void +swap(WavefrontAlignmentResult & lhs, + WavefrontAlignmentResult & rhs) +{ + // TODO (rrahn): report issue with Intel + WavefrontAlignmentResult tmp = std::move(lhs); + lhs = std::move(rhs); + rhs = std::move(tmp); +} + +} // namespace seqan + +#endif // #ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_ALIGNMENT_RESULT_H_ diff --git a/porechop/include/seqan/align_parallel/wavefront_alignment_scheduler.h b/porechop/include/seqan/align_parallel/wavefront_alignment_scheduler.h new file mode 100644 index 0000000..b7cb246 --- /dev/null +++ b/porechop/include/seqan/align_parallel/wavefront_alignment_scheduler.h @@ -0,0 +1,347 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2018, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: Rene Rahn +// ========================================================================== + +#ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_PARALLEL_ALIGNMENT_SCHEDULER_H_ +#define INCLUDE_SEQAN_ALIGN_PARALLEL_PARALLEL_ALIGNMENT_SCHEDULER_H_ + +namespace seqan +{ + +// ============================================================================ +// Forwards +// ============================================================================ + +// ============================================================================ +// Tags, Classes, Enums +// ============================================================================ + +// Yet internal class. Might need some redesign to make it truly generic. +/* + * @class WavefrontAlignmentScheduler + * @headerfile + * @brief A generic scheduler allowing to execute callables with a ring buffer for the stored tasks. + * + * @signature class WavefrontAlignmentScheduler; + * + * This schedule is at the moment only used for the wave-front alignment execution but could be generalized later. + * It stores all scheduled callables in a @link ConcurrentSuspendableQueue @endlink which can hold a user defined + * number of callables at the same time. It then uses recycable ids to fill up the queue with waiting jobs. + * If the queue is full and a thread tries to add a new job, it will be suspended, until resources are freed by + * the scheduler. + */ +class WavefrontAlignmentScheduler +{ +public: + + //------------------------------------------------------------------------- + // Member Types. + + using TCallable = std::function; + using TAlignmentQueue = ConcurrentQueue>; + using TRecycleList = std::list; + + //------------------------------------------------------------------------- + // Private Member Variables. + + WavefrontTaskScheduler _taskScheduler; + ThreadPool _pool; + TRecycleList _recycableIds; + TAlignmentQueue _queue; + bool _receivedEndSignal; + + std::mutex _mutexRecycleId; + unsigned _numParallelAlignments; + + std::mutex _mutexPushException; + std::vector _exceptionPointers; + + std::atomic _isValid{true}; + + std::function job = [this] () + { + while (true) + { + TCallable callable; + if (!popFront(callable, _queue)) + break; // End of thread => No writers and queue is empty. + + uint16_t id = -1; + + { // Receive id. + std::lock_guard lck(_mutexRecycleId); + SEQAN_ASSERT_NOT(_recycableIds.empty()); + id = _recycableIds.front(); + _recycableIds.pop_front(); + } + + try + { + callable(id); // invokes the alignment with assigned id. + } + catch (...) + { // Catch any exception thrown by callable. Store exception, and set *this invalid. + // We still keep running until the queue is empty. The thread is cleaned either by, + // explicit wait or by destruction of *this. + _isValid.store(false, std::memory_order_release); + { + std::lock_guard lck(_mutexPushException); + _exceptionPointers.push_back(std::current_exception()); + } + } + + // Check if task scheduler is still valid. + // If not, something went wrong, and we should not continue adding new tasks. + // So we propagate the invalid state to *this and break exceution chain. + if (!isValid(_taskScheduler)) + { + _isValid.store(false, std::memory_order_release); + } + + { // recycle id, when done. + std::lock_guard lck(_mutexRecycleId); + _recycableIds.push_back(id); + } + } + unlockReading(_queue); // Notify that this reader is finished. + unlockWriting(_taskScheduler); // Notify that this writer is finished. + }; + + //------------------------------------------------------------------------- + // Constructors. + + // implicitly deleted default constructor. + + WavefrontAlignmentScheduler(size_t const numParallelAlignments, size_t const numParallelTasks) : + _taskScheduler(numParallelTasks), + _queue(numParallelAlignments), + _receivedEndSignal(false), + _numParallelAlignments(numParallelAlignments) + { + SEQAN_ASSERT_GT(numParallelAlignments, 0u); // Bad if reader is 0. + + // Setup recycable ids. + _recycableIds.resize(numParallelAlignments); + std::iota(std::begin(_recycableIds), std::end(_recycableIds), 0); + + setReaderWriterCount(_queue, numParallelAlignments, 1); + + _exceptionPointers.resize(numParallelAlignments, nullptr); + + try + { // Create the threads here, later we can try to make lazy thread creation. + for (unsigned i = 0; i < numParallelAlignments; ++i) + { + spawn(_pool, job); + } + } + catch (...) // Make sure all the spawned threads are safely stopped before re-throwing the exception. + { + unlockWriting(_queue); + waitForWriters(_taskScheduler); + join(_pool); + throw; + } + + setWriterCount(_taskScheduler, numParallelAlignments); + // Notify task scheduler, that everything was setup correctly. + for (unsigned i = 0; i < numParallelAlignments; ++i) + { + lockWriting(_taskScheduler); + } + waitForWriters(_taskScheduler); // Invoke task scheduler. + } + + // Default constructor. + WavefrontAlignmentScheduler() : WavefrontAlignmentScheduler(16, 8) + {} + + // Copy & Move C'tor + WavefrontAlignmentScheduler(WavefrontAlignmentScheduler const &) = delete; + WavefrontAlignmentScheduler(WavefrontAlignmentScheduler &&) = delete; + + ///------------------------------------------------------------------------- + // Destructor. + + ~WavefrontAlignmentScheduler() + { + // Signal that no more alignments will be added. + if (!_receivedEndSignal) + unlockWriting(_queue); + + SEQAN_ASSERT(_queue.writerCount == 0); + + // Wait until all remaining threads are finished with their execution. + join(_pool); + + // In destructor of thread pool we wait for the outstanding alignments to be finished + // and then continue destruction of the remaining members and cleaning up the stack. + } + + // ------------------------------------------------------------------------ + // Member Functions. + + // Copy & Move assignment + WavefrontAlignmentScheduler& operator=(WavefrontAlignmentScheduler const &) = delete; + WavefrontAlignmentScheduler& operator=(WavefrontAlignmentScheduler &&) = delete; +}; + +// ============================================================================ +// Metafunctions +// ============================================================================ + +template<> +struct SchedulerTraits +{ + using TTask = typename WavefrontAlignmentScheduler::TCallable; +}; + +// ============================================================================ +// Functions +// ============================================================================ + +/* + * @fn WavefrontAlignmentScheduler#isValid + * @headerfile + * @brief Checks if scheduler is in a valid state. This means that no callable has terminated with an exception. + */ +inline bool +isValid(WavefrontAlignmentScheduler const & me) +{ + return me._isValid.load(std::memory_order_acquire); +} + +/* + * @fn WavefrontAlignmentScheduler#scheduleTask + * @headerfile + * @brief Adds a new task to the scheduler. Suspends until resources become available. + * @throws ExceptionType? + */ +// basic exception-safety guarantee. +// Throws if appendValue failed. +inline void +scheduleTask(WavefrontAlignmentScheduler & me, + typename SchedulerTraits::TTask && callable) +{ + if (!isValid(me)) + throw std::runtime_error("Invalid alignment scheduler!"); + + // Spins until there is enough space to add to the queue. + if (!appendValue(me._queue, std::forward(callable))) + throw std::runtime_error("Invalid alignment scheduler 2!"); +} + +inline void +scheduleTask(WavefrontAlignmentScheduler & me, + typename SchedulerTraits::TTask & callable) +{ + if (!isValid(me)) + throw std::runtime_error("Invalid alignment scheduler!"); + // Spins until there is enough space to add to the queue. + if(!appendValue(me._queue, callable)) + throw std::runtime_error("Invalid alignment scheduler 2!"); +} + +/* + * @fn WavefrontAlignmentScheduler#notify + * @headerfile + * @brief Notify the scheduler that no more jobs will follow. + */ +inline void +notify(WavefrontAlignmentScheduler & me) +{ + unlockWriting(me._queue); + me._receivedEndSignal = true; +} + +/* + * @fn WavefrontAlignmentScheduler#wait + * @headerfile + * @brief Explicit barrier on the scheduler. Suspends until all scheduled jobs have been finsihed. + * + * Note, can dead lock if notify is never called. + */ +// Only possible if some other thread is signaling the end of it. +inline void +wait(WavefrontAlignmentScheduler & me) +{ + join(me._pool); + wait(me._taskScheduler); +} + +/* + * @fn WavefrontAlignmentScheduler#wait2 + * @headerfile + * @brief Explicit barrier on the scheduler. Suspends until all scheduled jobs have been finsihed. + * + * Note, can dead lock if notify is never called. + */ +template +inline void +wait2(WavefrontAlignmentScheduler & me, TNotifiable & notifiable) +{ + join(me._pool); + notify(notifiable); + wait(me._taskScheduler); +} + +/* + * @fn WavefrontAlignmentScheduler#getExceptions + * @headerfile + * @brief Returns vector of captured exceptions if any was thrown by the callable. + * + * Note, can dead lock if notify is never called. + */ +inline auto +getExceptions(WavefrontAlignmentScheduler & me) +{ + auto vec = me._exceptionPointers; + auto innerExceptions = getExceptions(me._taskScheduler); + std::copy(std::begin(innerExceptions), std::end(innerExceptions), std::back_inserter(vec)); + return vec; +} + +/* + * @fn WavefrontAlignmentScheduler#taskScheduler + * @headerfile + * @brief Returns lvalue reference to the underlying task_scheduler. + */ +inline auto& +taskScheduler(WavefrontAlignmentScheduler & me) +{ + return me._taskScheduler; +} + +} // namespace seqan + +#endif // #ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_PARALLEL_ALIGNMENT_SCHEDULER_H_ diff --git a/porechop/include/seqan/align_parallel/wavefront_alignment_task.h b/porechop/include/seqan/align_parallel/wavefront_alignment_task.h new file mode 100644 index 0000000..0334103 --- /dev/null +++ b/porechop/include/seqan/align_parallel/wavefront_alignment_task.h @@ -0,0 +1,404 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2018, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: Rene Rahn +// ========================================================================== + +#ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_ALIGNMENT_TASK_H_ +#define INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_ALIGNMENT_TASK_H_ + +namespace seqan +{ +// ============================================================================ +// Forwards +// ============================================================================ + +// ============================================================================ +// Tags, Classes, Enums +// ============================================================================ + +// Config structre for the execution of one alignment using the wave-front model. +template +struct WavefrontAlignmentTaskConfig +{ + // ---------------------------------------------------------------------------- + // Member Typedefs. + + // DPTrait type forwarding. + using TDPTraits = typename TDPSettings::TTraits; + using TScoreValue = typename Value::Type; + using TAlgorithmType = typename TDPTraits::TAlgorithmType; + using TTracebackType = typename TDPTraits::TTracebackType; + using TGapType = typename TDPTraits::TGapType; + + // Wavefront Alignment Context. + using TDPCell = DPCell_; + using TBufferValue = Pair::Type>; + using TBuffer = String; + using TBlockBuffer = DPTileBuffer; + + // DP Execution Context. + using TDPProfile = DPProfile_; + using TDPCache = DPContext::Type>; + using TDPScout = DPScout_; + + // Parallel Context. + struct IntermediateTraits_ + { + using TScoreValue = decltype(maxScore(std::declval())); + using THostPosition = decltype(maxHostPosition(std::declval())); + }; + + using TDPIntermediate = WavefrontAlignmentResult; + + struct AlignThreadLocalConfig_ + { + using TIntermediate = TDPIntermediate; + using TCache = TDPCache; + + using TLocalHost = std::tuple; + }; + + using TThreadLocal = WavefrontAlignmentThreadLocalStorage; + using TAlignEvent = WavefrontTaskEvent; +}; + +#ifdef SEQAN_SIMD_ENABLED +template +struct WavefrontAlignmentSimdTaskConfig : public WavefrontAlignmentTaskConfig +{ + // ---------------------------------------------------------------------------- + // Member Typedefs. + + using TBase_ = WavefrontAlignmentTaskConfig; + + using TDPSimdCell = DPCell_; + using TDPSimdTraceValue = typename TraceBitMap_::Type; + + using TDPSimdScoreMatrix = String>; + using TDPSimdTraceMatrix = String>; + using TDPSimdCache = DPContext; + + using TDPScout_ = DPScout_ >; + using TDPIntermediate = WavefrontAlignmentResult; + + // Parallel Context. + struct SimdAlignThreadLocalConfig_ + { + + using TIntermediate = TDPIntermediate; + using TCache = typename TBase_::TDPCache; + using TSimdCache = TDPSimdCache; + + using TLocalHost = std::tuple; + }; + + using TThreadLocal = WavefrontAlignmentThreadLocalStorage; + using TAlignEvent = WavefrontTaskEvent; +}; +#endif + +// Incubator to setup the alignment job. +template +struct WavefrontAlignmentTaskIncubator +{ + using TWatc = WavefrontAlignmentTaskConfigConcept; + + // ---------------------------------------------------------------------------- + // Function createBlocks() + // ---------------------------------------------------------------------------- + + template + static auto createBlocks(TSeq const & seq, size_t const blockSize) + { + using TIter = typename Iterator::Type, Standard>::Type; + String> blocks; + resize(blocks, (length(seq) + blockSize - 1) / blockSize, Exact()); + + for (unsigned id = 0; id < length(blocks); ++id) + blocks[id] = toRange(infix(seq, id * blockSize, _min(length(seq),(id + 1) * blockSize))); + return blocks; + } + + // ---------------------------------------------------------------------------- + // Function createBlockBuffer() + // ---------------------------------------------------------------------------- + + template + static auto createBlockBuffer(TSeqHBlocks const & seqHBlocks, TSeqVBlovcks const & seqVBlocks, TScore const & score) + { + using TDPCell = typename TWatc::TDPCell; + typename TWatc::TBlockBuffer buffer; + resize(buffer.horizontalBuffer, length(seqHBlocks), Exact()); + resize(buffer.verticalBuffer, length(seqVBlocks), Exact()); + + typename TWatc::TBufferValue tmp; + + using TDPMetaColH = DPMetaColumn_>; + using TDPMetaColV = DPMetaColumn_>; + + TDPCell dummyCellD; + TDPCell dummyCellH; + TDPCell dummyCellV; + tmp.i2 = _computeScore(tmp.i1, dummyCellD, dummyCellH, dummyCellV, Nothing(), Nothing(), score, + RecursionDirectionZero(), typename TWatc::TDPProfile()); + for (auto itH = begin(buffer.horizontalBuffer, Standard()); + itH != end(buffer.horizontalBuffer, Standard()); + ++itH) + { + resize(*itH, length(front(seqHBlocks)), Exact()); + for (auto it = begin(*itH, Standard()); it != end(*itH, Standard()); ++it) + { + it->i2 = _computeScore(it->i1, dummyCellD, tmp.i1, dummyCellV, Nothing(), Nothing(), score, + typename RecursionDirection_::Type(), + typename TWatc::TDPProfile()); + tmp.i1 = it->i1; + } + } + tmp.i1 = decltype(tmp.i1){}; + tmp.i2 = _computeScore(tmp.i1, dummyCellD, dummyCellH, dummyCellV, Nothing(), Nothing(), score, + RecursionDirectionZero(), typename TWatc::TDPProfile()); + + for (auto itV = begin(buffer.verticalBuffer, Standard()); itV != end(buffer.verticalBuffer, Standard()); ++itV) + { + resize(*itV, length(front(seqVBlocks)) + 1, Exact()); + auto it = begin(*itV, Standard()); + it->i2 = tmp.i2; + it->i1 = tmp.i1; + ++it; + for (; it != end(*itV, Standard()); ++it) + { + it->i2 = _computeScore(it->i1, dummyCellD, dummyCellH, dummyCellV, Nothing(), Nothing(), score, + typename RecursionDirection_::Type(), + typename TWatc::TDPProfile()); + _setVerticalScoreOfCell(it->i1, _verticalScoreOfCell(dummyCellV)); + tmp.i1 = it->i1; + tmp.i2 = it->i2; // TODO(rrahn): Move out of loop. + } + } + return buffer; + } + + // ---------------------------------------------------------------------------- + // Function createTaskGraph() + // ---------------------------------------------------------------------------- + + template + static auto createTaskGraph(TWavefrontTaskContext & taskContext) + { + using TDagTask = WavefrontTask; + + std::vector>> graph; + + resize(graph, length(taskContext.seqHBlocks)); + for (int i = length(taskContext.seqHBlocks); --i >= 0;) + { + resize(graph[i], length(taskContext.seqVBlocks)); + for (int j = length(taskContext.seqVBlocks); --j >= 0;) + { + using TSize = decltype(length(taskContext.seqHBlocks)); + TDagTask * successorRight = (static_cast(i + 1) < length(taskContext.seqHBlocks)) + ? graph[i+1][j].get() + : nullptr; + TDagTask * successorDown = (static_cast(j + 1) < length(taskContext.seqVBlocks)) + ? graph[i][j+1].get() + : nullptr; + graph[i][j] = std::make_shared(taskContext, + std::array{{successorRight, successorDown}}, + static_cast(i), static_cast(j), + static_cast(((i > 0) ? 1 : 0) + ((j > 0) ? 1 : 0)), + (static_cast(i + 1) == length(taskContext.seqHBlocks)), + (static_cast(j + 1) == length(taskContext.seqVBlocks))); + } + } + return graph; + } +}; + +// The actual alignment task that is executed by the wave-front model. +template > +class WavefrontAlignmentTask +{ +public: + + using TIncubator = WavefrontAlignmentTaskIncubator; + + using TSeqHBlocks = decltype(TIncubator::createBlocks(std::declval(), std::declval())); + using TSeqVBlocks = decltype(TIncubator::createBlocks(std::declval(), std::declval())); + using TTileBuffer = decltype(TIncubator::createBlockBuffer(std::declval(), + std::declval(), + std::declval())); + + using TTaskContext = WavefrontAlignmentContext; + + // ---------------------------------------------------------------------------- + // Member Variables. + // ---------------------------------------------------------------------------- + + size_t alignmentId{0}; + TSeqH const & seqH; + TSeqV const & seqV; + TDPSettings const & dpSettings; + size_t blockSize; + + // ---------------------------------------------------------------------------- + // Constructors. + // ---------------------------------------------------------------------------- + + WavefrontAlignmentTask() = delete; + + WavefrontAlignmentTask(TSeqH const & seqH, + TSeqV const & seqV, + TDPSettings const & dpSetting, + size_t const & blockSize) : + seqH(seqH), + seqV(seqV), + dpSettings(dpSetting), + blockSize(blockSize) + {} + + + WavefrontAlignmentTask(size_t const id, + TSeqH const & seqH, + TSeqV const & seqV, + TDPSettings const & dpSetting, + size_t const & blockSize) : + alignmentId(id), + seqH(seqH), + seqV(seqV), + dpSettings(dpSetting), + blockSize(blockSize) + {} + + // ---------------------------------------------------------------------------- + // Member Functions. + // ---------------------------------------------------------------------------- + + // This function now run's in a separate thread. + template + inline void + operator()(uint16_t const instanceId, + TWavefrontExecutor & executor, + TCallback && callback) + { + // Initialize the strings. + auto seqHBlocks = TIncubator::createBlocks(seqH, blockSize); + auto seqVBlocks = TIncubator::createBlocks(seqV, blockSize); + + // Create the buffer for the matrix. + auto buffer = TIncubator::createBlockBuffer(seqHBlocks, seqVBlocks, dpSettings.scoringScheme); + + // Setup the task context and create task graph. + TTaskContext taskContext{instanceId, seqHBlocks, seqVBlocks, buffer, dpSettings}; + auto taskGraph = TIncubator::createTaskGraph(taskContext); + + // Prepare event. + WavefrontTaskEvent event; + context(*taskGraph.back().back()).ptrEvent = &event; + + // Kick off the execution. + using TWavefrontTaskExec = WavefrontTaskExecutor, TWavefrontExecutor>; + spawn(executor, TWavefrontTaskExec{taskGraph[0][0].get(), &executor}); + + // Wait for alignment to finish. + wait(event); + + // Reduce. + typename TConfig::TDPIntermediate interMax{}; + auto collectAndReset = [&](auto & threadLocalStorage) + { + updateMax(interMax, intermediate(threadLocalStorage, instanceId)); + clear(intermediate(threadLocalStorage, instanceId)); + }; + combineEach(*executor.ptrThreadLocal, collectAndReset); + // Continue execution. + callback(alignmentId, interMax._maxState.first); + } + + template + inline void + operator()(uint16_t const instanceId, + TWavefrontExecutor & executor, + TSimdTaskQueue & taskQueue, + TCallback && callback) + { + // Initialize the strings. + auto seqHBlocks = TIncubator::createBlocks(seqH, blockSize); + auto seqVBlocks = TIncubator::createBlocks(seqV, blockSize); + + // Create the buffer for the matrix. + auto buffer = TIncubator::createBlockBuffer(seqHBlocks, seqVBlocks, dpSettings.scoringScheme); + + // Setup the task context and create task graph. + TTaskContext taskContext{instanceId, seqHBlocks, seqVBlocks, buffer, dpSettings}; + auto taskGraph = TIncubator::createTaskGraph(taskContext); + + // Prepare event. + WavefrontTaskEvent event; + context(*taskGraph.back().back()).ptrEvent = &event; + + // Kick off the execution. + using TWavefrontTaskExec = WavefrontTaskExecutor; + appendValue(taskQueue, *taskGraph[0][0]); + spawn(executor, TWavefrontTaskExec{&taskQueue, &executor}); + + // Wait for alignment to finish. + wait(event); + + // Reduce. + typename TConfig::TDPIntermediate interMax{}; + auto collectAndReset = [&](auto & threadLocalStorage) + { + updateMax(interMax, intermediate(threadLocalStorage, instanceId)); + clear(intermediate(threadLocalStorage, instanceId)); + }; + combineEach(*executor.ptrThreadLocal, collectAndReset); + callback(alignmentId, interMax._maxState.first); + } +}; + +// ============================================================================ +// Metafunctions +// ============================================================================ + +// ============================================================================ +// Functions +// ============================================================================ + +} // namespace seqan + +#endif // #ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_ALIGNMENT_TASK_H_ diff --git a/porechop/include/seqan/align_parallel/wavefront_alignment_thread_local_storage.h b/porechop/include/seqan/align_parallel/wavefront_alignment_thread_local_storage.h new file mode 100644 index 0000000..42f6aa2 --- /dev/null +++ b/porechop/include/seqan/align_parallel/wavefront_alignment_thread_local_storage.h @@ -0,0 +1,130 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2018, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: Rene Rahn +// ========================================================================== + +#ifndef SEQAN_INCLUDE_ALIGN_PARALLEL_DP_THREAD_LOCAL_STORAGE_H_ +#define SEQAN_INCLUDE_ALIGN_PARALLEL_DP_THREAD_LOCAL_STORAGE_H_ + +namespace seqan +{ + +// ============================================================================ +// Forwards +// ============================================================================ + +// ============================================================================ +// Tags, Classes, Enums +// ============================================================================ + +// Shared thread local storage for the parallel alignment instances. +template +class WavefrontAlignmentThreadLocalStorage +{ +public: + //------------------------------------------------------------------------- + // Member Types. + + using TAlignmentLocal = typename TConfig::TLocalHost; + + //------------------------------------------------------------------------- + // Private Members. + + std::vector _multiAlignmentThreadLocal; + + //------------------------------------------------------------------------- + // Constructor. + + explicit WavefrontAlignmentThreadLocalStorage(size_t const numAlignments) : + _multiAlignmentThreadLocal(numAlignments) + {} + + // Delegating default constructor. + WavefrontAlignmentThreadLocalStorage() : WavefrontAlignmentThreadLocalStorage(1) + {} + + WavefrontAlignmentThreadLocalStorage(WavefrontAlignmentThreadLocalStorage const &) = default; + WavefrontAlignmentThreadLocalStorage(WavefrontAlignmentThreadLocalStorage &&) = default; + + //------------------------------------------------------------------------- + // Destructor. + + ~WavefrontAlignmentThreadLocalStorage() = default; + + //------------------------------------------------------------------------- + // Member Functions. + + WavefrontAlignmentThreadLocalStorage& operator=(WavefrontAlignmentThreadLocalStorage const &) = default; + WavefrontAlignmentThreadLocalStorage& operator=(WavefrontAlignmentThreadLocalStorage &&) = default; +}; + +// ============================================================================ +// Metafunctions +// ============================================================================ + +// ============================================================================ +// Functions +// ============================================================================ + +// Gets the intermediate result for the specific alignment job. +template +inline typename TConfig::TIntermediate & +intermediate(WavefrontAlignmentThreadLocalStorage & me, + size_t const alignId) +{ + SEQAN_ASSERT_LT(alignId, me._multiAlignmentThreadLocal.size()); + return std::get(me._multiAlignmentThreadLocal[alignId]); +} + +// Gets the chache for the specific alignment job. +template +inline typename TConfig::TCache & +cache(WavefrontAlignmentThreadLocalStorage & me, + size_t const alignId) +{ + SEQAN_ASSERT_LT(alignId, me._multiAlignmentThreadLocal.size()); + return std::get(me._multiAlignmentThreadLocal[alignId]); +} + +// Gets the simd chache for the specific alignment job. +template +inline typename TConfig::TSimdCache & +simdCache(WavefrontAlignmentThreadLocalStorage & me, + size_t const alignId) +{ + SEQAN_ASSERT_LT(alignId, me._multiAlignmentThreadLocal.size()); + return std::get(me._multiAlignmentThreadLocal[alignId]); +} + +} // namespace seqan + +#endif // SEQAN_INCLUDE_ALIGN_PARALLEL_DP_THREAD_LOCAL_STORAGE_H_ diff --git a/porechop/include/seqan/align_parallel/wavefront_task.h b/porechop/include/seqan/align_parallel/wavefront_task.h new file mode 100644 index 0000000..b9bd8a4 --- /dev/null +++ b/porechop/include/seqan/align_parallel/wavefront_task.h @@ -0,0 +1,365 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2018, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: Rene Rahn +// ========================================================================== + +#ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_TASK_H_ +#define INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_TASK_H_ + +namespace seqan +{ +// ============================================================================ +// Forwards +// ============================================================================ + +// ============================================================================ +// Tags, Classes, Enums +// ============================================================================ + +// Context used per task. Access information like the infixes of the sequences for this block and other. +template +struct WavefrontAlignmentContext +{ + size_t alignmentId{0}; + TSeqHBlocks const & seqHBlocks; + TSeqVBlocks const & seqVBlocks; + TTileBuffer & tileBuffer; + TDPSettings const & dpSettings; + TEvent * ptrEvent{nullptr}; + + //NOTE(rrahn) Bug in g++-4.9 prevents us from using as aggregate type. + WavefrontAlignmentContext(size_t const _alignmentId, + TSeqHBlocks const & _seqHBlocks, + TSeqVBlocks const & _seqVBlocks, + TTileBuffer & _tileBuffer, + TDPSettings const & _dpSettings) : + alignmentId(_alignmentId), + seqHBlocks(_seqHBlocks), + seqVBlocks(_seqVBlocks), + tileBuffer(_tileBuffer), + dpSettings(_dpSettings) + {} +}; + +// The abstract task that is executed as separat alignment instance. +template +class WavefrontTask +{ +public: + + using TContext = TAlignmentContext; + + TContext & context; + std::array successor{{nullptr, nullptr}}; + size_t col{0}; + size_t row{0}; + std::atomic refCount{0}; + bool lastTileH{false}; + bool lastTileV{false}; + + + //------------------------------------------------------------------------- + // Constructor + WavefrontTask() = delete; + + WavefrontTask(TContext & context, std::array successor, + size_t const col, + size_t const row, + size_t const refCount, + bool const lastTileH, + bool const lastTileV) : + context(context), + successor(std::move(successor)), + col(col), row(row), + refCount(refCount), + lastTileH(lastTileH), lastTileV(lastTileV) + {} +}; + +// ============================================================================ +// Metafunctions +// ============================================================================ + +template +struct TaskExecutionTraits; + +template +struct TaskExecutionTraits> +{ + using TaskContext_ = WavefrontAlignmentContext; + + using TSeqHBlocks = typename std::decay().seqHBlocks)>::type; + using TSeqVBlocks = typename std::decay().seqVBlocks)>::type; + using TWavefrontBuffer = typename std::decay().tileBuffer)>::type; + using TDPSettings = typename std::decay().dpSettings)>::type; + + using TTileBuffer = typename std::decay().horizontalBuffer[0])>::type; + using TDPScoutState = DPScoutState_>; + + // Sequence types. + using TSeqH = typename Value::Type; + using TSeqV = typename Value::Type; + + // DPTrait type forwarding. + using TDPTraits = typename TDPSettings::TTraits; + using TScoreValue = typename Value::Type; + using TAlgorithmType = typename TDPTraits::TAlgorithmType; + using TTracebackType = typename TDPTraits::TTracebackType; + using TGapType = typename TDPTraits::TGapType; + + // Wavefront Alignment Context. + using TDPCell = DPCell_; + + using TScoutSpec = typename ScoutSpecForAlignmentAlgorithm_::Type; + using TDPScout = DPScout_; +}; + +template +struct SimdTaskExecutionTraits : public TaskExecutionTraits +{ + using TBase = TaskExecutionTraits; + + using TScoreValue = typename TBase::TDPSettings::TScoreValueSimd; + using TDPCell = DPCell_; + using TTraceValue = typename TraceBitMap_::Type; + using TBufferValue = Pair; +}; + +// ============================================================================ +// Functions +// ============================================================================ + +template +inline void +setRefCount(WavefrontTask & me, size_t const count) +{ + me.refCount.store(count, std::memory_order_relaxed); +} + +template +inline unsigned +decrementRefCount(WavefrontTask & me) +{ + return --me.refCount; +} + +template +inline unsigned +incrementRefCount(WavefrontTask & me) +{ + return ++me.refCount; +} + +template +inline auto +column(TTask const & task) -> decltype(task.col) +{ + return task.col; +} + +template +inline auto +row(TTask const & task) -> decltype(task.row) +{ + return task.row; +} + +template +inline bool +inLastColumn(TTask const & task) +{ + return task.lastTileH; +} + +template +inline bool +inLastRow(TTask const & task) +{ + return task.lastTileV; +} + +template +inline bool +isLastTask(TTask const & task) +{ + return inLastColumn(task) && inLastRow(task); +} + +template +inline auto +successor(TTask & task) -> std::add_lvalue_reference_t +{ + return task.successor; +} + +template +inline auto +successor(TTask const & task) -> std::add_lvalue_reference_t> +{ + return task.successor; +} + +template +inline auto +context(TTask & task) -> std::add_lvalue_reference_t +{ + return task.context; +} + +template +inline auto +context(TTask const & task) -> std::add_lvalue_reference_t> +{ + return task.context; +} + +template +inline bool +isTrackTile(TTask const & task) +{ + return isLastColumn(task) && isLastRow(task); +} + +template +inline bool +isTrackTile(TTask const & task) +{ + return isLastColumn(task) && isLastRow(task); +} + +template +inline void +executeScalar(TTask & task, TDPLocalData & dpLocal) +{ + using TExecTraits = TaskExecutionTraits; + + auto & taskContext = context(task); + // Load the cache from the local data. + auto & dpCache = cache(dpLocal, taskContext.alignmentId); + auto & buffer = taskContext.tileBuffer; + + // Capture the buffer. + typename TExecTraits::TDPScoutState scoutState(buffer.horizontalBuffer[column(task)], + buffer.verticalBuffer[row(task)]); // Task local + + typename TExecTraits::TDPScout scout(scoutState); + + impl::computeTile(dpCache, scout, + taskContext.seqHBlocks[column(task)], + taskContext.seqVBlocks[row(task)], + taskContext.dpSettings.scoringScheme, + taskContext.dpSettings); + // We want to get the state here from the scout. + if(impl::AlgorithmProperty::isTrackingEnabled(task)) + { + // TODO(rrahn): Implement the interface. + // TODO(rrahn): Make it a member function of a policy so that we don't have to implement the specifics here + updateMax(intermediate(dpLocal, taskContext.alignmentId), + {maxScore(scout), maxHostPosition(scout)}, + column(task), + row(task)); + } +} + +template +inline void +printSimdBuffer(TBuffer const & buffer, size_t const l) +{ + for (auto simdHolder : buffer) + { + std::cout << "<"; + unsigned i = 0; + for (; i < l - 1; ++i) + { + std::cout << simdHolder.i1._score[i] << ", "; + } + std::cout << simdHolder.i1._score[i] << ">\n"; + } +} + +#ifdef SEQAN_SIMD_ENABLED +template +inline void +executeSimd(TTasks & tasks, TDPLocalData & dpLocal) +{ + using TTask = typename std::remove_pointer::Type>::type; + using TExecTraits = SimdTaskExecutionTraits; + + auto offset = impl::computeOffset(tasks, TExecTraits{}); + // Has to be adapted to take the correct buffer from the corresponding task. + auto simdBufferH = impl::gatherSimdBuffer(tasks, + [] (auto & task) + { + return &context(task).tileBuffer.horizontalBuffer[column(task)]; + }, + offset, + TExecTraits{}); + auto simdBufferV = impl::gatherSimdBuffer(tasks, + [] (auto & task) + { + return &context(task).tileBuffer.verticalBuffer[row(task)]; + }, + offset, + TExecTraits{}); + + // Does not really make sense. + auto & cache = simdCache(dpLocal, 0); + // Run alignment. + impl::computeSimdBatch(cache, simdBufferH, simdBufferV, tasks, dpLocal, offset, TExecTraits{}); + + // Write back into buffer. + impl::scatterSimdBuffer(tasks, + simdBufferH, + [](auto & task) + { + return &context(task).tileBuffer.horizontalBuffer[column(task)]; + }, + offset, + TExecTraits{}); + impl::scatterSimdBuffer(tasks, + simdBufferV, + [](auto & task) + { + return &context(task).tileBuffer.verticalBuffer[row(task)]; + }, + offset, + TExecTraits{}); +} +#endif // SEQAN_SIMD_ENABLED + +} // namespace seqan + +#endif // INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_TASK_H_ diff --git a/porechop/include/seqan/align_parallel/wavefront_task_event.h b/porechop/include/seqan/align_parallel/wavefront_task_event.h new file mode 100644 index 0000000..ccd18dc --- /dev/null +++ b/porechop/include/seqan/align_parallel/wavefront_task_event.h @@ -0,0 +1,104 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2018, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: Rene Rahn +// ========================================================================== + +#ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_TASK_EVENT_H_ +#define INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_TASK_EVENT_H_ + +namespace seqan +{ + +// ============================================================================ +// Forwards +// ============================================================================ + +// ============================================================================ +// Tags, Classes, Enums +// ============================================================================ + +// Event to signal end of one alignment instance. +class WavefrontTaskEvent +{ +public: + std::mutex mutexLastTask{}; + std::condition_variable conditionLastTask{}; + bool readyLastTask{false}; + + WavefrontTaskEvent() = default; + + WavefrontTaskEvent(WavefrontTaskEvent const &) = delete; + WavefrontTaskEvent(WavefrontTaskEvent &&) = delete; + + WavefrontTaskEvent& operator=(WavefrontTaskEvent const &) = delete; + WavefrontTaskEvent& operator=(WavefrontTaskEvent &&) = delete; + + ~WavefrontTaskEvent() + { + if (!readyLastTask) + { + { + std::lock_guard lck(mutexLastTask); + readyLastTask = true; + } + conditionLastTask.notify_one(); + } + } +}; + +// ============================================================================ +// Metafunctions +// ============================================================================ + +// ============================================================================ +// Functions +// ============================================================================ + +inline void +notify(WavefrontTaskEvent & event) +{ + std::lock_guard lck(event.mutexLastTask); + event.readyLastTask = true; + event.conditionLastTask.notify_one(); // We require a strict synchronization between waiting and notifying thread. +} + +inline void +wait(WavefrontTaskEvent & event) +{ + std::unique_lock lck(event.mutexLastTask); + if (!event.readyLastTask) + event.conditionLastTask.wait(lck, [&] { return event.readyLastTask; }); +} + +} // namespace seqan + +#endif // #ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_TASK_EVENT_H_ diff --git a/porechop/include/seqan/align_parallel/wavefront_task_executor.h b/porechop/include/seqan/align_parallel/wavefront_task_executor.h new file mode 100644 index 0000000..ab0f933 --- /dev/null +++ b/porechop/include/seqan/align_parallel/wavefront_task_executor.h @@ -0,0 +1,146 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2018, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: Rene Rahn +// ========================================================================== + +#ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_TASK_EXECUTOR_H_ +#define INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_TASK_EXECUTOR_H_ + +namespace seqan +{ + +// ============================================================================ +// Forwards +// ============================================================================ + +template +struct WavefrontTaskExecutionPolicy; + +// ============================================================================ +// Tags, Classes, Enums +// ============================================================================ + +// Task executor. Manages the execution of single alignment blocks. +template +struct WavefrontTaskExecutor +{ + TResource* _ptrResource{nullptr}; + TWavefrontExecutor * _ptrWavefrontExecutor{nullptr}; + + //NOTE(rrahn) Bug in g++-4.9 prevents us from using as aggregate type. + WavefrontTaskExecutor() = default; + + WavefrontTaskExecutor(TResource * _resource, + TWavefrontExecutor * _wavefrontExecutor) : + _ptrResource{_resource}, + _ptrWavefrontExecutor(_wavefrontExecutor) + {} + + inline void operator()() + { + WavefrontTaskExecutionPolicy::execute(*_ptrResource, *_ptrWavefrontExecutor); + } +}; + +// Policy for no SIMD execution. +template +struct WavefrontTaskExecutionPolicy> +{ + + template + inline static void + execute(TResource & task, TWavefrontExecutor & wavefrontExec) + { + using TWaveTaskExec = WavefrontTaskExecutor; + + executeScalar(task, local(wavefrontExec)); + for (auto succ : successor(task)) + { + if (succ && decrementRefCount(*succ) == 0) + spawn(wavefrontExec, TWaveTaskExec{succ, &wavefrontExec}); + } + if (isLastTask(task)) + { + notify(*(context(task).ptrEvent)); + } + } +}; + +// Policy for SIMD execution. +template +struct WavefrontTaskExecutionPolicy> +{ + template + inline static void + execute(TResource & resource, TWavefrontExecutor & wavefrontExec) + { + using TWaveTaskExec = WavefrontTaskExecutor; + + typename TResource::ResultType tasks; + if (!tryPopTasks(tasks, resource)) + return; + + SEQAN_ASSERT(!empty(tasks)); + if (tasks.size() == 1) + executeScalar(*front(tasks), local(wavefrontExec)); + else + executeSimd(tasks, local(wavefrontExec)); + + for (auto task : tasks) + { + for (auto succ : successor(*task)) + { + if (succ && decrementRefCount(*succ) == 0) + { + appendValue(resource, *succ); + spawn(wavefrontExec, TWaveTaskExec{&resource, &wavefrontExec}); + } + } + if (isLastTask(*task)) + { + notify(*(context(*task).ptrEvent)); + } + } + } +}; + +// ============================================================================ +// Metafunctions +// ============================================================================ + +// ============================================================================ +// Functions +// ============================================================================ + +} // namespace seqan + +#endif // #ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_TASK_EXECUTOR_H_ diff --git a/porechop/include/seqan/align_parallel/wavefront_task_queue.h b/porechop/include/seqan/align_parallel/wavefront_task_queue.h new file mode 100644 index 0000000..d64b61f --- /dev/null +++ b/porechop/include/seqan/align_parallel/wavefront_task_queue.h @@ -0,0 +1,139 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2018, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: Rene Rahn +// ========================================================================== + +#ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_DP_WAVEFRONT_TASK_QUEUE_H_ +#define INCLUDE_SEQAN_ALIGN_PARALLEL_DP_WAVEFRONT_TASK_QUEUE_H_ + +namespace seqan +{ + +// ============================================================================ +// Forwards +// ============================================================================ + +// ============================================================================ +// Tags, Classes, Enums +// ============================================================================ + +// Central task queue used in simd mode to gather multiple blocks to gather full simd registers. +template +class WavefrontTaskQueue +{ +public: + + + // Member Types. + using TQueue = ConcurrentQueue; + using ResultType = std::vector; + using ValueType = TValue; + + // Members. + static constexpr size_t VECTOR_SIZE{VECTOR_SIZE_}; + + TQueue queue; + std::mutex mutexPopQueue; + bool hasNotified{false}; + + // Constructors. + WavefrontTaskQueue() + { + lockWriting(queue); + lockReading(queue); + } + + WavefrontTaskQueue(WavefrontTaskQueue const&) = delete; + WavefrontTaskQueue(WavefrontTaskQueue &&) = delete; + + WavefrontTaskQueue& operator=(WavefrontTaskQueue const &) = delete; + WavefrontTaskQueue& operator=(WavefrontTaskQueue &&) = delete; + + ~WavefrontTaskQueue() + { + if (!hasNotified) + unlockWriting(queue); + unlockReading(queue); + } +}; + +// ============================================================================ +// Metafunctions +// ============================================================================ + +// ============================================================================ +// Functions +// ============================================================================ + +template +inline bool +tryPopTasks(typename WavefrontTaskQueue::ResultType & tasks, + WavefrontTaskQueue & me) +{ + clear(tasks); + std::lock_guard lck(me.mutexPopQueue); + if (length(me.queue) < WavefrontTaskQueue::VECTOR_SIZE) + { + resize(tasks, 1); + if (!popFront(tasks[0], me.queue, Serial())) + { + return false; + } + } + else + { + for (size_t lane = 0u; lane < VECTOR_SIZE; ++lane) + tasks.push_back(popFront(me.queue, Serial())); + } + return true; +} + +template +inline void +appendValue(WavefrontTaskQueue & me, + TValue & newTask) +{ + appendValue(me.queue, &newTask); +} + +template +inline void +notify(WavefrontTaskQueue & me) +{ + me.hasNotified = true; + unlockWriting(me.queue); +} + +} // namespace seqan + +#endif // #ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_DP_WAVEFRONT_TASK_QUEUE_H_ diff --git a/porechop/include/seqan/align_parallel/wavefront_task_scheduler.h b/porechop/include/seqan/align_parallel/wavefront_task_scheduler.h new file mode 100644 index 0000000..c246796 --- /dev/null +++ b/porechop/include/seqan/align_parallel/wavefront_task_scheduler.h @@ -0,0 +1,218 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2018, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: Rene Rahn +// ========================================================================== + +#ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_TASK_SCHEDULER_H_ +#define INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_TASK_SCHEDULER_H_ + +namespace seqan +{ + +// ============================================================================ +// Forwards +// ============================================================================ + +// ============================================================================ +// Tags, Classes, Enums +// ============================================================================ + +// Scheduler for wavefront tasks. +class WavefrontTaskScheduler +{ +public: + + //------------------------------------------------------------------------- + // Memeber Types + + using TWrapper = std::function; + using TTaskQueue = ConcurrentQueue; + + //------------------------------------------------------------------------- + // Member Variables + + ThreadPool _threadPool; + TTaskQueue _taskQueue; + + unsigned _writerCount; + + std::mutex _mutexPushException; + std::vector _exceptionPointers; + std::atomic _isValid{true}; + + std::function job = [this] () + { + lockReading(_taskQueue); + waitForFirstValue(_taskQueue); // Wait for all writers to be setup. + + std::function _dummy = [] () + { // TODO(rrahn): Could throw exception to signal something went terribly wrong. + SEQAN_ASSERT_FAIL("Trying to exceute empty wavefront task in a thread"); + }; + TWrapper task{_dummy}; + + while (true) + { + if (!popFront(task, _taskQueue)) + break; // Empty queue and no writer registered. + + try + { + task(); // Execute the task; + } + catch (...) + { // Catch exception, and signal failure. Continue running until queue is empty. + { + std::lock_guard lck(_mutexPushException); + _exceptionPointers.push_back(std::current_exception()); + } + _isValid.store(false, std::memory_order_release); + } + } + unlockReading(_taskQueue); + }; + + //------------------------------------------------------------------------- + // Constructor + + WavefrontTaskScheduler(size_t const threadCount, size_t const writerCount) : + _writerCount(writerCount) + { + + for (unsigned i = 0; i < threadCount; ++i) + { + spawn(_threadPool, job); + } + setCpuAffinity(_threadPool, 0, 1); + } + + WavefrontTaskScheduler(size_t const threadCount) : WavefrontTaskScheduler(threadCount, 0) + {} + + WavefrontTaskScheduler(WavefrontTaskScheduler const &) = delete; + WavefrontTaskScheduler(WavefrontTaskScheduler &&) = delete; + + //------------------------------------------------------------------------- + // Member Functions + + WavefrontTaskScheduler& operator=(WavefrontTaskScheduler const &) = delete; + WavefrontTaskScheduler& operator=(WavefrontTaskScheduler &&) = delete; + + //------------------------------------------------------------------------- + // Destructor + + ~WavefrontTaskScheduler() + {} + // In destructor of thread pool we wait for the outstanding alignments to be finished + // and then continue destruction of the remaining members and cleaning up the stack. + // Note the number of writers must be set to 0, for the queue to stop spinning. +}; + +// ============================================================================ +// Metafunctions +// ============================================================================ + +template +struct SchedulerTraits; + +template <> +struct SchedulerTraits +{ + using TWrapper_ = typename WavefrontTaskScheduler::TWrapper; + using TTask = TWrapper_; +}; + +// ============================================================================ +// Functions +// ============================================================================ + +inline void +setWriterCount(WavefrontTaskScheduler & me, size_t const count) noexcept +{ + me._writerCount = count; +} + +inline void +lockWriting(WavefrontTaskScheduler & me) noexcept +{ + lockWriting(me._taskQueue); +} + +inline void +unlockWriting(WavefrontTaskScheduler & me) noexcept +{ + unlockWriting(me._taskQueue); +} + +inline void +waitForWriters(WavefrontTaskScheduler & me) noexcept +{ + waitForWriters(me._taskQueue, me._writerCount); +} + +inline bool +isValid(WavefrontTaskScheduler & me) noexcept +{ + return me._isValid.load(std::memory_order_acquire); +} + +inline void +scheduleTask(WavefrontTaskScheduler & me, + typename SchedulerTraits::TTask task) +{ + if (!isValid(me)) + { // TODO(rrahn): Improve error handling. + throw std::runtime_error("Invalid Task Scheduler"); + } + appendValue(me._taskQueue, std::move(task)); +} + +inline void +wait(WavefrontTaskScheduler & me) +{ + SEQAN_ASSERT(me._taskQueue.writerCount == 0); + + join(me._threadPool); + + SEQAN_ASSERT(empty(me._taskQueue)); + SEQAN_ASSERT(me._taskQueue.readerCount == 0); +} + +inline auto +getExceptions(WavefrontTaskScheduler & me) +{ + return me._exceptionPointers; +} + +} // namespace seqan + +#endif // #ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_TASK_SCHEDULER_H_ diff --git a/porechop/include/seqan/align_parallel/wavefront_task_util.h b/porechop/include/seqan/align_parallel/wavefront_task_util.h new file mode 100644 index 0000000..1208055 --- /dev/null +++ b/porechop/include/seqan/align_parallel/wavefront_task_util.h @@ -0,0 +1,557 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2018, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: Rene Rahn +// ========================================================================== + +#ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_TASK_UTIL_H_ +#define INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_TASK_UTIL_H_ + +namespace seqan +{ +namespace impl +{ + +// ============================================================================ +// Forwards +// ============================================================================ + +// ============================================================================ +// Tags, Classes, Enums +// ============================================================================ + +// ============================================================================ +// Metafunctions +// ============================================================================ + +// Helper meta-function to extract the correct DP Property. +template +struct AlgorithmProperty +{ + template + inline static bool + isTrackingEnabled(TTask const & tile) + { + return isLastColumn(tile) && isLastRow(tile); + } +}; + +template +struct AlgorithmProperty> +{ + template + inline static bool + isTrackingEnabled(TTask const & tile) + { + return (IsFreeEndGap_::VALUE && inLastColumn(tile)) || + (IsFreeEndGap_::VALUE && inLastRow(tile)) || + (inLastColumn(tile) && inLastRow(tile)); + } +}; + +template +struct AlgorithmProperty> +{ + template + inline static bool + isTrackingEnabled(TTask const & /*tile*/) + { + return true; + } +}; + +// ============================================================================ +// Functions +// ============================================================================ + +// ---------------------------------------------------------------------------- +// Function computeTile() +// ---------------------------------------------------------------------------- + +// Wrapper function to call alignment core for the specific block. +template +inline void +computeTile(DPContext & dpContext, + TDPScout & scout, + TSequenceH const & seqH, + TSequenceV const & seqV, + TScoringScheme const & scoringScheme, + TDPSettings const & /*settings*/) +{ + using TDPTraits = typename TDPSettings::TTraits; + + using TScoreMatrixSpec = typename DefaultScoreMatrixSpec_::Type; + + using TDPScoreMatrix = DPMatrix_; + using TDPTraceMatrix = DPMatrix_; + + using TDPScoreMatrixNavigator = DPMatrixNavigator_; + using TDPTraceMatrixNavigator = DPMatrixNavigator_, NavigateColumnWise>; + + using TDPProfile = DPProfile_; + + // Setup the score and trace matrix. + TDPScoreMatrix dpScoreMatrix; + TDPTraceMatrix dpTraceMatrix; + + setLength(dpScoreMatrix, +DPMatrixDimension_::HORIZONTAL, length(seqH) + 1); + setLength(dpScoreMatrix, +DPMatrixDimension_::VERTICAL, length(seqV) + 1); + + setLength(dpTraceMatrix, +DPMatrixDimension_::HORIZONTAL, length(seqH) + 1); + setLength(dpTraceMatrix, +DPMatrixDimension_::VERTICAL, length(seqV) + 1); + + // Resue the buffer from the cache. + setHost(dpScoreMatrix, getDpScoreMatrix(dpContext)); + setHost(dpTraceMatrix, getDpTraceMatrix(dpContext)); + + resize(dpScoreMatrix); + // We do not need to allocate the memory for the trace matrix if the traceback is disabled. + if /*constexpr*/(IsTracebackEnabled_::VALUE) + { + static_assert(std::is_same::value, "Traceback not implemented!"); + resize(dpTraceMatrix); + } + + // Initialize the navigators. + TDPScoreMatrixNavigator dpScoreMatrixNavigator{dpScoreMatrix, DPBandConfig{}}; + TDPTraceMatrixNavigator dpTraceMatrixNavigator{dpTraceMatrix, DPBandConfig{}}; + + // Execute the alignment. + _computeAlignmentImpl(scout, dpScoreMatrixNavigator, dpTraceMatrixNavigator, seqH, seqV, + scoringScheme, DPBandConfig{}, TDPProfile(), NavigateColumnWise{}); +} + +#ifdef SEQAN_SIMD_ENABLED +// Some utility functions. +template +inline auto +doComputeOffset(TTasks const &tasks, + TScoreValueScalar const & /*scalarScore*/, + TScoreValueSimd const & /*simdScore*/) +{ + String offset; + resize(offset, length(tasks), std::numeric_limits::min(), Exact()); + + size_t pos = 0; + + for (auto task : tasks) + { + offset[pos] = front(context(*task).tileBuffer.horizontalBuffer[column(*task)]).i1._score; + ++pos; + } + + return offset; +} + +template +inline auto +doComputeOffset(TTasks const &tasks, + TScoreValue const & /*scalarScore*/, + TScoreValue const & /*simdScore*/) +{ + String offset; + resize(offset, length(tasks), 0, Exact()); + return offset; +} + +template +inline auto +computeOffset(TTasks const &tasks, TTaskTraits const & /*traits*/) +{ + using TDPSettings = typename TTaskTraits::TDPSettings; + using TScoreValueScalar = typename Value::Type; + using TScoreValueSimd = typename Value::Type; + using TDPSimdValue = typename Value::Type; + + return doComputeOffset(tasks, TScoreValueScalar{}, TDPSimdValue{}); +} + +template +inline void +loadIntoSimd(Pair & target, + TTasks const & tasks, + TPos const pos, + TFunc && getBuffer, + TOffset const & offset, + LinearGaps const & /*unsused*/) +{ + using TSimdVec = typename Value::Type; + using TVecVal = typename Value::Type; + + alignas(sizeof(TSimdVec)) std::array::VALUE> scoreVec; + alignas(sizeof(TSimdVec)) std::array::VALUE> traceVec; + + auto zipCont = makeZipView(tasks, scoreVec, traceVec, offset); + + std::for_each(begin(zipCont), end(zipCont), + [&, getBuffer = std::move(getBuffer)](auto tuple) + { + auto & buffer = *getBuffer(*std::get<0>(tuple)); + auto val = (length(buffer) > pos) ? buffer[pos] : typename std::decay::type{}; + + // We might access values out of bounds here. + std::get<1>(tuple) = static_cast(val.i1._score - std::get<3>(tuple)); + std::get<2>(tuple) = val.i2; + }); + + target.i1._score = load(&scoreVec[0]); + target.i2 = load(&traceVec[0]); +} + +template +inline void +loadIntoSimd(Pair & target, + TTasks const & tasks, + TPos const pos, + TFunc && getBuffer, + TOffset const & offset, + AffineGaps const & /*unsused*/) +{ + using TSimdVec = typename Value::Type; + using TVecVal = typename Value::Type; + + alignas(sizeof(TSimdVec)) std::array::VALUE> scoreVec; + alignas(sizeof(TSimdVec)) std::array::VALUE> scoreHorVec; + alignas(sizeof(TSimdVec)) std::array::VALUE> scoreVerVec; + alignas(sizeof(TSimdVec)) std::array::VALUE> traceVec; + + auto zipCont = makeZipView(tasks, scoreVec, scoreHorVec, scoreVerVec, traceVec, offset); + + std::for_each(begin(zipCont), end(zipCont), + [&, getBuffer = std::move(getBuffer)](auto tuple) + { + auto & buffer = *getBuffer(*std::get<0>(tuple)); + auto val = (length(buffer) > pos) ? buffer[pos] : typename std::decay::type{}; + using TDPCellVar = decltype(val.i1); + using TDPCell16 = DPCell_; + + // We might access values out of bounds here. + std::get<1>(tuple) = static_cast(val.i1._score - std::get<5>(tuple)); + + std::get<2>(tuple) = + (val.i1._horizontalScore <= DPCellDefaultInfinity::VALUE) ? + DPCellDefaultInfinity::VALUE : + static_cast(val.i1._horizontalScore - std::get<5>(tuple)); + std::get<3>(tuple) = + (val.i1._verticalScore <= DPCellDefaultInfinity::VALUE) ? + DPCellDefaultInfinity::VALUE : + static_cast(val.i1._verticalScore - std::get<5>(tuple)); + std::get<4>(tuple) = val.i2; + }); + + target.i1._score = load(&scoreVec[0]); + target.i1._horizontalScore = load(&scoreHorVec[0]); + target.i1._verticalScore = load(&scoreVerVec[0]); + target.i2 = load(&traceVec[0]); +} + +template +inline void +storeIntoBuffer(TTasks & tasks, + Pair const & source, + TPos const pos, + TFunc && getBuffer, + TOffset const & offset, + LinearGaps const & /*unsused*/) +{ + using TSimdVec = typename Value::Type; + using TVecVal = typename Value::Type; + + alignas(sizeof(TSimdVec)) std::array::VALUE> scoreVec; + alignas(sizeof(TSimdVec)) std::array::VALUE> traceVec; + + storeu(&scoreVec[0], source.i1._score); + storeu(&traceVec[0], source.i2); + + auto zipCont = makeZipView(tasks, scoreVec, traceVec, offset); + + std::for_each(begin(zipCont), end(zipCont), + [&, getBuffer = std::move(getBuffer)] (auto tuple) + { + auto & buffer = *getBuffer(*std::get<0>(tuple)); + if (length(buffer) > pos) + { + auto & pair = buffer[pos]; + pair.i1._score = std::get<1>(tuple) + std::get<3>(tuple); + pair.i2 = std::get<2>(tuple); + } + }); +} + +template +inline void +storeIntoBuffer(TTasks & tasks, + Pair const & source, + TPos const pos, + TFunc && getBuffer, + TOffset const & offset, + AffineGaps const & /*unsused*/) +{ + using TSimdVec = typename Value::Type; + using TVecVal = typename Value::Type; + + alignas(sizeof(TSimdVec)) std::array::VALUE> scoreVec; + alignas(sizeof(TSimdVec)) std::array::VALUE> scoreHorVec; + alignas(sizeof(TSimdVec)) std::array::VALUE> scoreVerVec; + alignas(sizeof(TSimdVec)) std::array::VALUE> traceVec; + + storeu(&scoreVec[0], source.i1._score); + storeu(&scoreHorVec[0], source.i1._horizontalScore); + storeu(&scoreVerVec[0], source.i1._verticalScore); + storeu(&traceVec[0], source.i2); + + auto zipCont = makeZipView(tasks, scoreVec, scoreHorVec, scoreVerVec, traceVec, offset); + + std::for_each(begin(zipCont), end(zipCont), + [&, getBuffer = std::move(getBuffer)](auto tuple) + { + auto & buffer = *getBuffer(*std::get<0>(tuple)); + if (length(buffer) > pos) + { + auto & pair = buffer[pos]; + pair.i1._score = std::get<1>(tuple) + std::get<5>(tuple); + pair.i1._horizontalScore = std::get<2>(tuple) + std::get<5>(tuple); + pair.i1._verticalScore = std::get<3>(tuple) + std::get<5>(tuple); + pair.i2 = std::get<4>(tuple); + } + }); +} + +template +inline auto +gatherSimdBuffer(TTasks const & tasks, + TFunc && getBuffer, + TOffset const & offset, + TExecTraits const & /*traits*/) +{ + // Check for valid simd length. + SEQAN_ASSERT_EQ(LENGTH::VALUE, length(tasks)); + + String > simdSet; + + auto maxLength = length(*getBuffer(*tasks[0])); + std::for_each(begin(tasks, Standard()) + 1, end(tasks, Standard()), + [&](auto & task) + { + auto len = length(*getBuffer(*task)); + maxLength = (len > maxLength) ? len : maxLength; + }); + + resize(simdSet, maxLength, Exact()); + for (unsigned i = 0; i < length(simdSet); ++i) + { + loadIntoSimd(simdSet[i], tasks, i, std::forward(getBuffer), offset, typename TExecTraits::TGapType()); + } + return simdSet; +} + +template +inline void +scatterSimdBuffer(TTasks & tasks, + String const & simdSet, + TFunc && getBuffer, + TOffset const & offset, + TExecTraits const & /*traits*/) +{ + for (unsigned i = 0; i < length(simdSet); ++i) + { + storeIntoBuffer(tasks, simdSet[i], i, std::forward(getBuffer), offset, typename TExecTraits::TGapType()); + } +} + +// Compute tasks as simd alignment. +template +inline void +computeSimdBatch(DPContext & cache, + TSimdBufferH & bufferH, + TSimdBufferV & bufferV, + TTasks & tasks, + TDPLocal & dpLocal, + TOffset & offset, + TExecTraits const & /*traits*/) +{ + // Now what? + using TSeqH = typename TExecTraits::TSeqH; + using TSeqV = typename TExecTraits::TSeqV; + using TSimdVec = typename TExecTraits::TScoreValue; + + // Prepare sequence set. + StringSet > depSetH; + StringSet > depSetV; + bool allSameLength = true; + auto ptrTask = tasks[0]; + auto lenH = length(context(*ptrTask).seqHBlocks[column(*ptrTask)]); + auto lenV = length(context(*ptrTask).seqVBlocks[row(*ptrTask)]); + + for (auto ptrTask : tasks) + { + appendValue(depSetH, context(*ptrTask).seqHBlocks[column(*ptrTask)]); + appendValue(depSetV, context(*ptrTask).seqVBlocks[row(*ptrTask)]); + if (lenH != length(context(*ptrTask).seqHBlocks[column(*ptrTask)]) || + lenV != length(context(*ptrTask).seqVBlocks[row(*ptrTask)])) + { + allSameLength = false; + } + } + + // Dummy trace set. + StringSet > trace; // We need to instantiate it, but it will not be used. + + // We can compute with one simd score, but might collect them here. + auto const & scoringScheme = context(*tasks[0]).dpSettings.simdScoringScheme; + + // Preapare and run alingment. + String > stringSimdH; + String > stringSimdV; + + if (allSameLength) + { + using TScoutState = DPScoutState_>; + TScoutState scoutState(bufferH, bufferV); + _prepareSimdAlignment(stringSimdH, stringSimdV, depSetH, depSetV, scoutState); + + using TScoutSpec = typename ScoutSpecForAlignmentAlgorithm_::Type; + using TDPScout = DPScout_; + + TDPScout dpScout(scoutState); + // We rather want to set + computeTile(cache, dpScout, stringSimdH, stringSimdV, scoringScheme, context(*tasks[0]).dpSettings); + + // Now we need to run the scout check for all tasks. + + // We want to get the state here from the scout. + for (size_t pos = 0; pos < length(tasks); ++pos) + { + auto & task = *tasks[pos]; + if (AlgorithmProperty::isTrackingEnabled(task)) + { + // TODO(rrahn): Implement the interface. + // TODO(rrahn): Make it a member function of a policy so that we don't have to implement the specifics here + _setSimdLane(dpScout, pos); + auto & taskContext = context(task); + updateMax(intermediate(dpLocal, taskContext.alignmentId), + {maxScoreAt(dpScout) + offset[pos], 0u}, + column(task), + row(task)); + } + } + } + else + { + using TDPSettings = std::decay_t; + using TDPTraits = typename TDPSettings::TTraits; + + using TDPProfile = DPProfile_; + + using TSimdScoutTrait = SimdAlignVariableLengthTraits; + using TScoutState = DPScoutState_>>; + + String lengthsH; + String lengthsV; + + TScoutState scoutState(bufferH, bufferV); + _prepareSimdAlignment(stringSimdH, stringSimdV, depSetH, depSetV, lengthsH, lengthsV, scoutState); + + using TScoutSpec = typename ScoutSpecForAlignmentAlgorithm_::Type; + using TDPScout = DPScout_; + + TDPScout dpScout(scoutState); + computeTile(cache, dpScout, stringSimdH, stringSimdV, scoringScheme, context(*tasks[0]).dpSettings); + // We want to get the state here from the scout. + for (size_t pos = 0; pos < length(tasks); ++pos) + { + auto & task = *tasks[pos]; + if (AlgorithmProperty::isTrackingEnabled(task)) + { + // TODO(rrahn): Implement the interface. + // TODO(rrahn): Make it a member function of a policy so that we don't have to implement the specifics here + _setSimdLane(dpScout, pos); + auto & taskContext = context(task); + updateMax(intermediate(dpLocal, taskContext.alignmentId), + {maxScoreAt(dpScout) + offset[pos], 0u}, + column(task), + row(task)); + } + } + } +} +#endif // SEQAN_SIMD_ENABLED +} // namespace impl +} // namespace seqan + +#endif // #ifndef INCLUDE_SEQAN_ALIGN_PARALLEL_WAVEFRONT_TASK_UTIL_H_ diff --git a/porechop/include/seqan/simd/simd_base.h b/porechop/include/seqan/simd/simd_base.h new file mode 100644 index 0000000..59f5dcd --- /dev/null +++ b/porechop/include/seqan/simd/simd_base.h @@ -0,0 +1,390 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2018, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: Marcel Ehrhardt +// ========================================================================== +// generic SIMD interface for SSE3 / AVX2 +// ========================================================================== + +#ifndef SEQAN_INCLUDE_SEQAN_SIMD_SIMD_BASE_H_ +#define SEQAN_INCLUDE_SEQAN_SIMD_SIMD_BASE_H_ + +namespace seqan +{ + +// a metafunction returning the biggest supported SIMD vector +template +struct SimdVector; + +template +struct Value > +{ + typedef TValue Type; +}; + +template +struct Value const> : + public Value > +{}; + +template +struct LENGTH > +{ + enum { VALUE = LENGTH_ }; +}; + +template +struct LENGTH const> : + public LENGTH > +{}; + +// define a concept and its models +// they allow us to define generic vector functions +SEQAN_CONCEPT(SimdMaskVectorConcept, (TSimdMaskVector)) +{ + typedef typename Reference::Type TReference; + + TSimdMaskVector a; + + SEQAN_CONCEPT_USAGE(SimdMaskVectorConcept) + { + static_assert(IsSameType::VALUE, "Type of a[] should be the same as the reference type of a."); + } +}; + +SEQAN_CONCEPT_REFINE(SimdVectorConcept, (TSimdVector), (SimdMaskVectorConcept)) +{ + SEQAN_CONCEPT_USAGE(SimdVectorConcept) + {} +}; + +template +struct SimdMaskVectorImpl { + using Type = Nothing; +}; + +/** + * SimdMaskVector is the return type of all logical operations of simd vectors + * like comparisons. + * + * ``` + * using TSimdVector = SimdVector::Type; + * using TSimdMaskVector = SimdMaskVector::Type; + * + * TSimdVector vec1 {2, 4, 8, 16}, vec2 {16, 8, 4, 2}; + * TSimdMaskVector cmp = vec1 > vec2; // cmp = {false, false, true, true} + * ``` + */ +template +struct SimdMaskVector : SimdMaskVectorImpl >::Type > +{ +}; + +template +struct SimdSwizzleVectorImpl; + +/** + * SimdSwizzleVector is needed for shuffleVector() as index type. + * + * ``` + * using TSimdVector = SimdVector::Type; + * using TSimdSwizzleVector = SimdSwizzleVector::Type; + * + * TSimdVector vec {2, 4, 8, 16}, res; + * TSimdSwizzleVector swizzle {3, 2, 0, 2}; + * + * res = shuffleVector(vec, swizzle); // res = {16, 8, 2, 8} + * ``` + */ +template +struct SimdSwizzleVector : SimdSwizzleVectorImpl >::Type > +{}; + +/** + * ``` + * getValue(a, pos); + * + * // same as + * + * a[pos]; + * ``` + */ +template +inline SEQAN_FUNC_ENABLE_IF(Is >, typename Value::Type) +getValue(TSimdVector const & vector, TPosition const pos); + +/** + * ``` + * value(a, pos); + * + * // same as + * + * a[pos]; + * ``` + */ +template +inline SEQAN_FUNC_ENABLE_IF(Is >, typename Value::Type) +value(TSimdVector const & vector, TPosition const pos); + +/** + * ``` + * assignValue(a, pos, value); + * + * // same as + * + * a[pos] = value; + * ``` + */ +template +inline SEQAN_FUNC_ENABLE_IF(Is >, void) +assignValue(TSimdVector & vector, TPosition const pos, TValue2 const value); + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, void) +transpose(TSimdVector matrix[ROWS]); + +/** + * ``` + * clearVector(a); + * + * // same as + * + * for(auto i = 0u; i < LENGTH; ++i) + * c[i] = 0; + * ``` + */ +template +inline SEQAN_FUNC_ENABLE_IF(Is >, void) +clearVector(TSimdVector & vector); + +/** + * ``` + * auto c = createVector(a); + * + * // same as + * + * for(auto i = 0u; i < LENGTH; ++i) + * c[i] = a; + * ``` + */ +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +createVector(TValue const x); + +/** + * ``` + * fillVector(a, 1, 3, 23, 1337); + * + * // same as + * + * a[0] = 1; + * a[1] = 3; + * a[2] = 13; + * a[3] = 1337; + * ``` + */ +template +inline SEQAN_FUNC_ENABLE_IF(Is >, void) +fillVector(TSimdVector & vector, TValue const... args); + +/** + * ``` + * auto c = cmpEq(a, b); + * + * // same as + * + * auto c = a == b; + * ``` + * + * NOTE: + * The type of c might change from unsigned to signed if auto is used + * + * ``` + * using TSimdVector = SimdVector::Type; + * TSimdVector a, b; + * + * auto c = a == b; // type of c might change to SimdVector::Type + * TSimdVector d = a == b; // has the same type + * ``` + */ +template +inline SEQAN_FUNC_ENABLE_IF(Is >, typename SimdMaskVector::Type) +cmpEq (TSimdVector const & a, TSimdVector const & b); + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, typename SimdMaskVector::Type) +operator==(TSimdVector const & a, TSimdVector const & b); + +/** + * ``` + * auto c = cmpGt(a, b); + * + * // same as + * + * auto c = a > b; + * ``` + * + * NOTE: + * The type of c might change from unsigned to signed if auto is used + * + * ``` + * using TSimdVector = SimdVector::Type; + * using TSimdMaskVector = SimdMaskVector::Type; + * TSimdVector a, b; + * + * auto c = a > b; // type of c might change to SimdVector::Type + * TSimdMaskVector d = a > b; // has the same type + * ``` + */ +template +inline SEQAN_FUNC_ENABLE_IF(Is >, typename SimdMaskVector::Type) +cmpGt (TSimdVector const & a, TSimdVector const & b); + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, typename SimdMaskVector::Type) +operator>(TSimdVector const & a, TSimdVector const & b); + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +max(TSimdVector const & a, TSimdVector const & b); + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +min(TSimdVector const & a, TSimdVector const & b); + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +operator|(TSimdVector const & a, TSimdVector const & b); + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector &) +operator|=(TSimdVector & a, TSimdVector const & b); + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +operator&(TSimdVector const & a, TSimdVector const & b); + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector &) +operator&=(TSimdVector & a, TSimdVector const & b); + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +operator~(TSimdVector const & a); + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +operator+(TSimdVector const & a, TSimdVector const & b); + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +operator-(TSimdVector const & a, TSimdVector const & b); + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +operator*(TSimdVector const & a, TSimdVector const & b); + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +operator/(TSimdVector const & a, TSimdVector const & b); + +/** + * ``` + * c = andNot(a, b); + * + * // same as + * + * for(auto i = 0u; i < LENGTH; ++i) + * c[i] = (~a[i]) & b[i]; + * ``` + */ +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +andNot(TSimdVector const & a, TSimdVector const & b); + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +shiftRightLogical(TSimdVector const & vector, const int imm); + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +blend(TSimdVector const & a, TSimdVector const & b, TSimdVectorMask const & mask); + +/** + * Unaligned store, i.e. memAddr does not need to be aligned (e.g. SEE4.2 16byte + * aligned, AVX2 32byte aligned). + */ +template +inline SEQAN_FUNC_ENABLE_IF(Is >, void) +storeu(T * memAddr, TSimdVector const & vec); + +/** + * Aligned load, i.e. memAddr MUST be aligned (e.g. SEE4.2 16byte + * aligned, AVX2 32byte aligned). + */ +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +load(T const * memAddr); + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +gather(TValue const * memAddr, TSimdVector const & idx); + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector1) +shuffleVector(TSimdVector1 const & vector, TSimdVector2 const & indices); + +// NOTE(rmaerker): Make this function available, also if SIMD is not enabled. +template +inline SEQAN_FUNC_ENABLE_IF(Is>, TSimdVector) +createVector(TValue const x) +{ + return x; +} + +// -------------------------------------------------------------------------- +// Function print() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, std::ostream &) +print(std::ostream & stream, TSimdVector const & vector) +{ + stream << '<'; + for (int i = 0; i < LENGTH::VALUE; ++i) + stream << '\t' << vector[i]; + stream << "\t>\n"; + return stream; +} + +} // namespace seqan + +#endif // SEQAN_INCLUDE_SEQAN_SIMD_SIMD_BASE_H_ diff --git a/porechop/include/seqan/simd/simd_base_seqan_impl.h b/porechop/include/seqan/simd/simd_base_seqan_impl.h new file mode 100644 index 0000000..6f98254 --- /dev/null +++ b/porechop/include/seqan/simd/simd_base_seqan_impl.h @@ -0,0 +1,154 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2018, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: David Weese +// René Rahn +// Stefan Budach +// ========================================================================== +// generic SIMD interface for SSE3 / AVX2 +// ========================================================================== + +#ifndef SEQAN_INCLUDE_SEQAN_SIMD_SIMD_BASE_SEQAN_IMPL_H_ +#define SEQAN_INCLUDE_SEQAN_SIMD_SIMD_BASE_SEQAN_IMPL_H_ + +#include +#include + +#if defined(PLATFORM_WINDOWS_VS) + /* Microsoft C/C++-compatible compiler */ + #include +#elif defined(PLATFORM_GCC) && (defined(__x86_64__) || defined(__i386__)) + /* GCC-compatible compiler, targeting x86/x86-64 */ + #include +#elif defined(SEQAN_SIMD_ENABLED) + #pragma message "You are trying to build with -DSEQAN_SIMD_ENABLED, which might be " \ + "auto-defined if AVX or SSE was enabled (e.g. -march=native, -msse4, ...), " \ + "but we only support x86/x86-64 architectures for SIMD vectorization! " \ + "You might want to use UME::SIMD (https://github.com/edanor/umesimd) combined " \ + "with -DSEQAN_UMESIMD_ENABLED for a different SIMD backend." +#endif + +namespace seqan { + +#ifdef COMPILER_LINTEL +#include +#define SEQAN_VECTOR_CAST_(T, v) static_cast::type>(v) +#define SEQAN_VECTOR_CAST_LVALUE_(T, v) static_cast(v) +#else +#define SEQAN_VECTOR_CAST_(T, v) reinterpret_cast(v) +#define SEQAN_VECTOR_CAST_LVALUE_(T, v) reinterpret_cast(v) +#endif + +// ============================================================================ +// Forwards +// ============================================================================ + +// ============================================================================ +// Useful Macros +// ============================================================================ + +#define SEQAN_DEFINE_SIMD_VECTOR_GETVALUE_(TSimdVector) \ +template \ +inline typename Value::Type \ +getValue(TSimdVector & vector, TPosition const pos) \ +{ \ + return vector[pos]; \ +} + +#define SEQAN_DEFINE_SIMD_VECTOR_VALUE_(TSimdVector) \ +template \ +inline typename Value::Type \ +value(TSimdVector & vector, TPosition const pos) \ +{ \ + return getValue(vector, pos); \ +} + +#define SEQAN_DEFINE_SIMD_VECTOR_ASSIGNVALUE_(TSimdVector) \ +template \ +inline void \ +assignValue(TSimdVector & vector, TPosition const pos, TValue2 const value) \ +{ \ + vector[pos] = value; \ +} + +// Only include following code if simd instructions are enabled. +#ifdef SEQAN_SIMD_ENABLED + +// ============================================================================ +// Tags, Classes, Enums +// ============================================================================ + +// a metafunction returning the biggest supported SIMD vector +template +struct SimdVector; + +// internal struct to specialize for vector parameters +// VEC_SIZE = Vector size := sizeof(vec) +// LENGTH = number of elements := VEC_SIZE / sizeof(InnerValue::Type) +// SCALAR_TYPE = the scalar type of the vector (maybe optional, if the type +// doesn't matter for the operation) +template +struct SimdParams_ +{}; + +// internal traits meta-function to capture correct the mask type. +template +struct SimdVectorTraits +{ + using MaskType = TSimdVector; +}; + +// internal struct to specialize for matrix parameters +template +struct SimdMatrixParams_ +{}; + +#define SEQAN_DEFINE_SIMD_VECTOR_(TSimdVector, TValue, SIZEOF_VECTOR) \ + typedef TValue TSimdVector __attribute__ ((__vector_size__(SIZEOF_VECTOR))); \ + template <> struct SimdVector { typedef TSimdVector Type; }; \ + template <> struct Value { typedef TValue Type; }; \ + template <> struct Value: public Value {}; \ + template <> struct LENGTH { enum { VALUE = SIZEOF_VECTOR / sizeof(TValue) }; }; \ + template <> struct LENGTH: public LENGTH {}; \ + SEQAN_DEFINE_SIMD_VECTOR_GETVALUE_(TSimdVector) \ + SEQAN_DEFINE_SIMD_VECTOR_GETVALUE_(TSimdVector const) \ + SEQAN_DEFINE_SIMD_VECTOR_VALUE_(TSimdVector) \ + SEQAN_DEFINE_SIMD_VECTOR_VALUE_(TSimdVector const) \ + SEQAN_DEFINE_SIMD_VECTOR_ASSIGNVALUE_(TSimdVector) \ + template <> \ + SEQAN_CONCEPT_IMPL((TSimdVector), (SimdVectorConcept)); \ + template <> \ + SEQAN_CONCEPT_IMPL((TSimdVector const), (SimdVectorConcept)); +#endif // SEQAN_SIMD_ENABLED + +} // namespace seqan + +#endif // SEQAN_INCLUDE_SEQAN_SIMD_SIMD_BASE_SEQAN_IMPL_H_ diff --git a/porechop/include/seqan/simd/simd_base_seqan_impl_avx2.h b/porechop/include/seqan/simd/simd_base_seqan_impl_avx2.h new file mode 100644 index 0000000..cae230f --- /dev/null +++ b/porechop/include/seqan/simd/simd_base_seqan_impl_avx2.h @@ -0,0 +1,1492 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2018, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: David Weese +// René Rahn +// Stefan Budach +// ========================================================================== +// generic SIMD interface for SSE3 / AVX2 +// ========================================================================== + +#ifndef SEQAN_INCLUDE_SEQAN_SIMD_SIMD_BASE_SEQAN_IMPL_AVX2_H_ +#define SEQAN_INCLUDE_SEQAN_SIMD_SIMD_BASE_SEQAN_IMPL_AVX2_H_ + +namespace seqan { + +// SimdParams_<32, 32>: 256bit = 32 elements * 8bit +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector32Char, char, 32) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector32SChar, signed char, 32) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector32UChar, unsigned char, 32) + +// SimdParams_<32, 16>: 256bit = 16 elements * 2 * 8bit +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector16Short, short, 32) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector16UShort, unsigned short, 32) + +// SimdParams_<32, 8>: 256bit = 8 elements * 4 * 8bit +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector8Int, int, 32) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector8UInt, unsigned int, 32) + +// SimdParams_<32, 4>: 256bit = 4 elements * 8 * 8bit +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector4Int64, int64_t, 32) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector4UInt64, uint64_t, 32) + +// ============================================================================ +// Functions +// ============================================================================ + +// ============================================================================ +// AVX/AVX2 wrappers (256bit vectors) +// ============================================================================ + +// -------------------------------------------------------------------------- +// _fillVector (256bit) +// -------------------------------------------------------------------------- + +template +inline void +_fillVector(TSimdVector & vector, + std::tuple const & x, + std::index_sequence<0> const &, SimdParams_<32, 32>) +{ + vector = SEQAN_VECTOR_CAST_(TSimdVector, _mm256_set1_epi8(std::get<0>(x))); +} + +template +inline void +_fillVector(TSimdVector & vector, + std::tuple const & x, + std::index_sequence<0> const &, SimdParams_<32, 16>) +{ + vector = SEQAN_VECTOR_CAST_(TSimdVector, _mm256_set1_epi16(std::get<0>(x))); +} + +template +inline void +_fillVector(TSimdVector & vector, + std::tuple const & x, + std::index_sequence<0> const &, SimdParams_<32, 8>) +{ + vector = SEQAN_VECTOR_CAST_(TSimdVector, _mm256_set1_epi32(std::get<0>(x))); +} + +template +inline void +_fillVector(TSimdVector & vector, + std::tuple const & x, + std::index_sequence<0> const &, SimdParams_<32, 4>) +{ + vector = SEQAN_VECTOR_CAST_(TSimdVector, _mm256_set1_epi64x(std::get<0>(x))); +} + +template +inline void +_fillVector(TSimdVector & vector, + std::tuple const & args, std::index_sequence const &, SimdParams_<32, 32>) +{ + vector = SEQAN_VECTOR_CAST_(TSimdVector, _mm256_setr_epi8(std::get(args)...)); +} + +template +inline void +_fillVector(TSimdVector & vector, + std::tuple const & args, std::index_sequence const &, SimdParams_<32, 16>) +{ + vector = SEQAN_VECTOR_CAST_(TSimdVector, _mm256_setr_epi16(std::get(args)...)); +} +template +inline void +_fillVector(TSimdVector & vector, + std::tuple const & args, std::index_sequence const &, SimdParams_<32, 8>) +{ + vector = SEQAN_VECTOR_CAST_(TSimdVector, _mm256_setr_epi32(std::get(args)...)); +} + +template +inline void +_fillVector(TSimdVector & vector, + std::tuple const & args, std::index_sequence const &, SimdParams_<32, 4>) +{ + // reverse argument list 0, 1, 2, 3 -> 3, 2, 1, 0 + // NOTE(marehr): Intel linux fails to reverse argument list and only + // _mm256_set_epi64x has no reverse equivalent + vector = SEQAN_VECTOR_CAST_(TSimdVector, _mm256_set_epi64x(std::get(args)...)); +} + +// -------------------------------------------------------------------------- +// _clearVector (256bit) +// -------------------------------------------------------------------------- + +template +inline void _clearVector(TSimdVector & vector, SimdParams_<32, L>) +{ + vector = SEQAN_VECTOR_CAST_(TSimdVector, _mm256_setzero_si256()); +} + +// -------------------------------------------------------------------------- +// _createVector (256bit) +// -------------------------------------------------------------------------- + +template +inline TSimdVector _createVector(TValue const x, SimdParams_<32, 32>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_set1_epi8(x)); +} + +template +inline TSimdVector _createVector(TValue const x, SimdParams_<32, 16>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_set1_epi16(x)); +} + +template +inline TSimdVector _createVector(TValue const x, SimdParams_<32, 8>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_set1_epi32(x)); +} + +template +inline TSimdVector _createVector(TValue const x, SimdParams_< 32, 4>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_set1_epi64x(x)); +} + +// -------------------------------------------------------------------------- +// _cmpEq (256bit) +// -------------------------------------------------------------------------- + +template +inline TSimdVector _cmpEq(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 32>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_cmpeq_epi8(SEQAN_VECTOR_CAST_(const __m256i&, a), + SEQAN_VECTOR_CAST_(const __m256i&, b))); +} + +template +inline TSimdVector _cmpEq(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 16>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_cmpeq_epi16(SEQAN_VECTOR_CAST_(const __m256i&, a), + SEQAN_VECTOR_CAST_(const __m256i&, b))); +} + +template +inline TSimdVector _cmpEq(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 8>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_cmpeq_epi32(SEQAN_VECTOR_CAST_(const __m256i&, a), + SEQAN_VECTOR_CAST_(const __m256i&, b))); +} + +template +inline TSimdVector _cmpEq(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 4>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_cmpeq_epi64(SEQAN_VECTOR_CAST_(const __m256i&, a), + SEQAN_VECTOR_CAST_(const __m256i&, b))); +} + +// -------------------------------------------------------------------------- +// _cmpGt (256bit) +// -------------------------------------------------------------------------- + +template +inline TSimdVector _cmpGt(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 32, int8_t>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_cmpgt_epi8(SEQAN_VECTOR_CAST_(const __m256i&, a), + SEQAN_VECTOR_CAST_(const __m256i&, b))); +} + +template +inline TSimdVector _cmpGt(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 32, uint8_t>) +{ + // There is no unsigned cmpgt, we reduce it to the signed case. + // Note that 0x80 = ~0x7F (prevent overflow messages). + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm256_cmpgt_epi8( + _mm256_xor_si256(SEQAN_VECTOR_CAST_(const __m256i&, a), _mm256_set1_epi8(~0x7F)), + _mm256_xor_si256(SEQAN_VECTOR_CAST_(const __m256i&, b), _mm256_set1_epi8(~0x7F)))); +} + +template +inline TSimdVector _cmpGt(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 16, int16_t>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_cmpgt_epi16(SEQAN_VECTOR_CAST_(const __m256i&, a), + SEQAN_VECTOR_CAST_(const __m256i&, b))); +} + +template +inline TSimdVector _cmpGt(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 16, uint16_t>) +{ + // There is no unsigned cmpgt, we reduce it to the signed case. + // Note that 0x8000 = ~0x7FFF (prevent overflow messages). + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm256_cmpgt_epi16( + _mm256_xor_si256(SEQAN_VECTOR_CAST_(const __m256i&, a), _mm256_set1_epi16(~0x7FFF)), + _mm256_xor_si256(SEQAN_VECTOR_CAST_(const __m256i&, b), _mm256_set1_epi16(~0x7FFF)))); +} + +template +inline TSimdVector _cmpGt(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 8, int32_t>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_cmpgt_epi32(SEQAN_VECTOR_CAST_(const __m256i&, a), + SEQAN_VECTOR_CAST_(const __m256i&, b))); +} + +template +inline TSimdVector _cmpGt(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 8, uint32_t>) +{ + // There is no unsigned cmpgt, we reduce it to the signed case. + // Note that 0x80000000 = ~0x7FFFFFFF (prevent overflow messages). + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm256_cmpgt_epi32( + _mm256_xor_si256(SEQAN_VECTOR_CAST_(const __m256i&, a), _mm256_set1_epi32(~0x7FFFFFFF)), + _mm256_xor_si256(SEQAN_VECTOR_CAST_(const __m256i&, b), _mm256_set1_epi32(~0x7FFFFFFF)))); +} + +template +inline TSimdVector _cmpGt(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 4, int64_t>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_cmpgt_epi64(SEQAN_VECTOR_CAST_(const __m256i&, a), + SEQAN_VECTOR_CAST_(const __m256i&, b))); +} + +template +inline TSimdVector _cmpGt(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 4, uint64_t>) +{ + // There is no unsigned cmpgt, we reduce it to the signed case. + // Note that 0x8000000000000000ul = ~0x7FFFFFFFFFFFFFFFul (prevent overflow messages). + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm256_cmpgt_epi64( + _mm256_xor_si256(SEQAN_VECTOR_CAST_(const __m256i&, a) ,_mm256_set1_epi64x(~0x7FFFFFFFFFFFFFFFul)), + _mm256_xor_si256(SEQAN_VECTOR_CAST_(const __m256i&, b), _mm256_set1_epi64x(~0x7FFFFFFFFFFFFFFFul)))); +} + +// -------------------------------------------------------------------------- +// _bitwiseOr (256bit) +// -------------------------------------------------------------------------- + +template +inline TSimdVector _bitwiseOr(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, L>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_or_si256(SEQAN_VECTOR_CAST_(const __m256i&, a), + SEQAN_VECTOR_CAST_(const __m256i&, b))); +} + +// -------------------------------------------------------------------------- +// _bitwiseAnd (256bit) +// -------------------------------------------------------------------------- + +template +inline TSimdVector _bitwiseAnd(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, L>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_and_si256(SEQAN_VECTOR_CAST_(const __m256i&, a), + SEQAN_VECTOR_CAST_(const __m256i&, b))); +} + +// -------------------------------------------------------------------------- +// _bitwiseAndNot (256bit) +// -------------------------------------------------------------------------- + +template +inline TSimdVector _bitwiseAndNot(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, L>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_andnot_si256(SEQAN_VECTOR_CAST_(const __m256i&, a), + SEQAN_VECTOR_CAST_(const __m256i&, b))); +} + +// -------------------------------------------------------------------------- +// _bitwiseNot (256bit) +// -------------------------------------------------------------------------- + +template +inline TSimdVector _bitwiseNot(TSimdVector const & a, SimdParams_<32, 32>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm256_cmpeq_epi8(SEQAN_VECTOR_CAST_(const __m256i&, a), _mm256_setzero_si256())); +} + +template +inline TSimdVector _bitwiseNot(TSimdVector const & a, SimdParams_<32, 16>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm256_cmpeq_epi16(SEQAN_VECTOR_CAST_(const __m256i&, a), _mm256_setzero_si256())); +} + +template +inline TSimdVector _bitwiseNot(TSimdVector const & a, SimdParams_<32, 8>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm256_cmpeq_epi32(SEQAN_VECTOR_CAST_(const __m256i&, a), _mm256_setzero_si256())); + +} +template +inline TSimdVector _bitwiseNot(TSimdVector const & a, SimdParams_<32, 4>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm256_cmpeq_epi64(SEQAN_VECTOR_CAST_(const __m256i&, a), _mm256_setzero_si256())); +} + +// -------------------------------------------------------------------------- +// _divide (256bit) +// -------------------------------------------------------------------------- + +template +inline TSimdVector _divide(TSimdVector const & a, int b, SimdParams_<32, 32>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_div_epi8(a, _mm256_set1_epi8(b))); +} + +template +inline TSimdVector _divide(TSimdVector const & a, int b, SimdParams_<32, 16>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_div_epi16(a, _mm256_set1_epi16(b))); +} + +template +inline TSimdVector _divide(TSimdVector const & a, int b, SimdParams_<32, 8>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_div_epi32(a, _mm256_set1_epi32(b))); +} + +template +inline TSimdVector _divide(TSimdVector const & a, int b, SimdParams_<32, 4>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_div_epi64(a, _mm256_set1_epi64x(b))); +} + +// -------------------------------------------------------------------------- +// _add (256bit) +// -------------------------------------------------------------------------- + +template +inline TSimdVector _add(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 32>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm256_add_epi8(SEQAN_VECTOR_CAST_(const __m256i&, a), + SEQAN_VECTOR_CAST_(const __m256i&, b))); +} + +template +inline TSimdVector _add(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 16>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm256_add_epi16(SEQAN_VECTOR_CAST_(const __m256i&, a), + SEQAN_VECTOR_CAST_(const __m256i&, b))); +} + +template +inline TSimdVector _add(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 8>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm256_add_epi32(SEQAN_VECTOR_CAST_(const __m256i&, a), + SEQAN_VECTOR_CAST_(const __m256i&, b))); +} + +template +inline TSimdVector _add(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 4>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm256_add_epi64(SEQAN_VECTOR_CAST_(const __m256i&, a), + SEQAN_VECTOR_CAST_(const __m256i&, b))); +} + +// -------------------------------------------------------------------------- +// _sub (256bit) +// -------------------------------------------------------------------------- + +template +inline TSimdVector _sub(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 32>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm256_sub_epi8(SEQAN_VECTOR_CAST_(const __m256i&, a), + SEQAN_VECTOR_CAST_(const __m256i&, b))); +} + +template +inline TSimdVector _sub(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 16>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm256_sub_epi16(SEQAN_VECTOR_CAST_(const __m256i&, a), + SEQAN_VECTOR_CAST_(const __m256i&, b))); +} + +template +inline TSimdVector _sub(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 8>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm256_sub_epi32(SEQAN_VECTOR_CAST_(const __m256i&, a), + SEQAN_VECTOR_CAST_(const __m256i&, b))); +} + +template +inline TSimdVector _sub(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 4>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm256_sub_epi64(SEQAN_VECTOR_CAST_(const __m256i&, a), + SEQAN_VECTOR_CAST_(const __m256i&, b))); +} + +// -------------------------------------------------------------------------- +// _mult (256bit) +// -------------------------------------------------------------------------- + +template +inline TSimdVector _mult(TSimdVector const & a, TSimdVector const &/*b*/, SimdParams_<32, 32>) +{ + SEQAN_SKIP_TEST; + SEQAN_ASSERT_FAIL("AVX2 intrinsics for multiplying 8 bit values not implemented!"); + return a; +} + +template +inline TSimdVector _mult(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 16>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm256_mullo_epi16(SEQAN_VECTOR_CAST_(const __m256i&, a), + SEQAN_VECTOR_CAST_(const __m256i&, b))); +} + +template +inline TSimdVector _mult(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 8>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm256_mullo_epi32(SEQAN_VECTOR_CAST_(const __m256i&, a), + SEQAN_VECTOR_CAST_(const __m256i&, b))); +} + +template +inline TSimdVector _mult(TSimdVector const & a, TSimdVector const &/*b*/, SimdParams_<32, 4>) +{ + SEQAN_SKIP_TEST; + SEQAN_ASSERT_FAIL("AVX2 intrinsics for multiplying 64 bit values not implemented!"); + return a; +} + +// -------------------------------------------------------------------------- +// _max (256bit) +// -------------------------------------------------------------------------- + +template +inline TSimdVector _max(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 32, int8_t>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm256_max_epi8(SEQAN_VECTOR_CAST_(const __m256i&, a), + SEQAN_VECTOR_CAST_(const __m256i&, b))); +} + +template +inline TSimdVector _max(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 32, uint8_t>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm256_max_epu8(SEQAN_VECTOR_CAST_(const __m256i&, a), + SEQAN_VECTOR_CAST_(const __m256i&, b))); +} + +template +inline TSimdVector _max(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 16, int16_t>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm256_max_epi16(SEQAN_VECTOR_CAST_(const __m256i&, a), + SEQAN_VECTOR_CAST_(const __m256i&, b))); +} + +template +inline TSimdVector _max(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 16, uint16_t>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm256_max_epu16(SEQAN_VECTOR_CAST_(const __m256i&, a), + SEQAN_VECTOR_CAST_(const __m256i&, b))); +} + +template +inline TSimdVector _max(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 8, int32_t>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm256_max_epi32(SEQAN_VECTOR_CAST_(const __m256i&, a), + SEQAN_VECTOR_CAST_(const __m256i&, b))); +} + +template +inline TSimdVector _max(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 8, uint32_t>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm256_max_epu32(SEQAN_VECTOR_CAST_(const __m256i&, a), + SEQAN_VECTOR_CAST_(const __m256i&, b))); +} + +template +inline TSimdVector _max(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 4, int64_t>) +{ + #if defined(__AVX512VL__) + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm256_max_epi64(SEQAN_VECTOR_CAST_(const __m256i&, a), + SEQAN_VECTOR_CAST_(const __m256i&, b))); + #else // defined(__AVX512VL__) + return blend(b, a, cmpGt(a, b)); + #endif // defined(__AVX512VL__) +} + +template +inline TSimdVector _max(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 4, uint64_t>) +{ + #if defined(__AVX512VL__) + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm256_max_epu64(SEQAN_VECTOR_CAST_(const __m256i&, a), + SEQAN_VECTOR_CAST_(const __m256i&, b))); + #else // defined(__AVX512VL__) + return blend(b, a, cmpGt(a, b)); + #endif // defined(__AVX512VL__) +} + + +// -------------------------------------------------------------------------- +// _min (256bit) +// -------------------------------------------------------------------------- + +template +inline TSimdVector _min(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 32, int8_t>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm256_min_epi8(SEQAN_VECTOR_CAST_(const __m256i&, a), + SEQAN_VECTOR_CAST_(const __m256i&, b))); +} + +template +inline TSimdVector _min(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 32, uint8_t>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm256_min_epu8(SEQAN_VECTOR_CAST_(const __m256i&, a), + SEQAN_VECTOR_CAST_(const __m256i&, b))); +} + +template +inline TSimdVector _min(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 16, int16_t>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm256_min_epi16(SEQAN_VECTOR_CAST_(const __m256i&, a), + SEQAN_VECTOR_CAST_(const __m256i&, b))); +} + +template +inline TSimdVector _min(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 16, uint16_t>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm256_min_epu16(SEQAN_VECTOR_CAST_(const __m256i&, a), + SEQAN_VECTOR_CAST_(const __m256i&, b))); +} + +template +inline TSimdVector _min(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 8, int32_t>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm256_min_epi32(SEQAN_VECTOR_CAST_(const __m256i&, a), + SEQAN_VECTOR_CAST_(const __m256i&, b))); +} + +template +inline TSimdVector _min(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 8, uint32_t>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm256_min_epu32(SEQAN_VECTOR_CAST_(const __m256i&, a), + SEQAN_VECTOR_CAST_(const __m256i&, b))); +} + +template +inline TSimdVector _min(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 4, int64_t>) +{ + #if defined(__AVX512VL__) + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm256_min_epi64(SEQAN_VECTOR_CAST_(const __m256i&, a), + SEQAN_VECTOR_CAST_(const __m256i&, b))); + #else // defined(__AVX512VL__) + return blend(a, b, cmpGt(a, b)); + #endif // defined(__AVX512VL__) +} + +template +inline TSimdVector _min(TSimdVector const & a, TSimdVector const & b, SimdParams_<32, 4, uint64_t>) +{ + #if defined(__AVX512VL__) + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm256_min_epu64(SEQAN_VECTOR_CAST_(const __m256i&, a), + SEQAN_VECTOR_CAST_(const __m256i&, b))); + #else // defined(__AVX512VL__) + return blend(a, b, cmpGt(a, b)); + #endif // defined(__AVX512VL__) +} + +// -------------------------------------------------------------------------- +// _blend (256bit) +// -------------------------------------------------------------------------- + +template +inline TSimdVector _blend(TSimdVector const & a, TSimdVector const & b, TSimdVectorMask const & mask, SimdParams_<32, L>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm256_blendv_epi8(SEQAN_VECTOR_CAST_(const __m256i &, a), + SEQAN_VECTOR_CAST_(const __m256i &, b), + SEQAN_VECTOR_CAST_(const __m256i &, mask))); +} + +// -------------------------------------------------------------------------- +// _storeu (256bit) +// -------------------------------------------------------------------------- + +template +inline void _storeu(T * memAddr, TSimdVector const & vec, SimdParams_<32, L>) +{ + _mm256_storeu_si256((__m256i*)memAddr, SEQAN_VECTOR_CAST_(const __m256i&, vec)); +} + +// ---------------------------------------------------------------------------- +// Function _load() 256bit +// ---------------------------------------------------------------------------- + +template +inline TSimdVector _load(T const * memAddr, SimdParams_<32, L>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_load_si256((__m256i const *) memAddr)); +} + +// -------------------------------------------------------------------------- +// _shiftRightLogical (256bit) +// -------------------------------------------------------------------------- + +template +inline TSimdVector _shiftRightLogical(TSimdVector const & vector, const int imm, SimdParams_<32, 32>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_srli_epi16(SEQAN_VECTOR_CAST_(const __m256i &, vector), imm) & _mm256_set1_epi8(0xff >> imm)); +} +template +inline TSimdVector _shiftRightLogical(TSimdVector const & vector, const int imm, SimdParams_<32, 16>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_srli_epi16(SEQAN_VECTOR_CAST_(const __m256i &, vector), imm)); +} +template +inline TSimdVector _shiftRightLogical(TSimdVector const & vector, const int imm, SimdParams_<32, 8>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_srli_epi32(SEQAN_VECTOR_CAST_(const __m256i &, vector), imm)); +} +template +inline TSimdVector _shiftRightLogical(TSimdVector const & vector, const int imm, SimdParams_<32, 4>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, _mm256_srli_epi64(SEQAN_VECTOR_CAST_(const __m256i &, vector), imm)); +} + +// -------------------------------------------------------------------------- +// Extend sign from integer types 256bit +// -------------------------------------------------------------------------- + +inline __m256i +seqan_mm256_i16sign_extend_epis8(__m256i const & v) +{ + return _mm256_or_si256( // extend sign (v | hi-bits) + v, + _mm256_and_si256( // select hi-bits (hi-bits = msk & 0xff00) + _mm256_sub_epi16( // msk = msb - 1 + _mm256_andnot_si256( //msb = ~v & 0x80 (select msb) + v, + _mm256_set1_epi16(0x80) + ), + _mm256_set1_epi16(1) + ), + _mm256_set1_epi16(static_cast(0xff00u)) + ) + ); +} + +inline __m256i +seqan_mm256_i32sign_extend_epis8(__m256i const & v) +{ + return _mm256_or_si256( // extend sign (v | hi-bits) + v, + _mm256_and_si256( // select hi-bits (hi-bits = msk & 0xffffff00u) + _mm256_sub_epi32( // msk = msb - 1 + _mm256_andnot_si256( //msb = ~v & 0x80 (select msb) + v, + _mm256_set1_epi32(0x80) + ), + _mm256_set1_epi32(1) + ), + _mm256_set1_epi32(static_cast(0xffffff00u)) + ) + ); +} + +inline __m256i +seqan_mm256_i32sign_extend_epis16(__m256i const & v) +{ + return _mm256_or_si256( // extend sign (v | hi-bits) + v, + _mm256_and_si256( // select hi-bits (hi-bits = msk & 0xffff0000u) + _mm256_sub_epi32( // msk = msb - 1 + _mm256_andnot_si256( //msb = ~v & 0x8000 (select msb) + v, + _mm256_set1_epi32(0x8000) + ), + _mm256_set1_epi32(1) + ), + _mm256_set1_epi32(static_cast(0xffff0000u)) + ) + ); +} + +inline __m256i +seqan_mm256_i64sign_extend_epis8(__m256i const & v) +{ + return _mm256_or_si256( // extend sign (v | hi-bits) + v, + _mm256_and_si256( // select hi-bits (hi-bits = msk & 0xffffffffffffff00ul) + _mm256_sub_epi64( // msk = msb - 1 + _mm256_andnot_si256( //msb = ~v & 0x80 (select msb) + v, + _mm256_set1_epi64x(0x80) + ), + _mm256_set1_epi64x(1) + ), + _mm256_set1_epi64x(static_cast(0xffffffffffffff00ul)) + ) + ); +} + +inline __m256i +seqan_mm256_i64sign_extend_epis16(__m256i const & v) +{ + return _mm256_or_si256( // extend sign (v | hi-bits) + v, + _mm256_and_si256( // select hi-bits (hi-bits = msk & 0xffffffffffff0000ul) + _mm256_sub_epi64( // msk = msb - 1 + _mm256_andnot_si256( //msb = ~v & 0x8000 (select msb) + v, + _mm256_set1_epi64x(0x8000) + ), + _mm256_set1_epi64x(1) + ), + _mm256_set1_epi64x(static_cast(0xffffffffffff0000ul)) + ) + ); +} + +inline __m256i +seqan_mm256_i64sign_extend_epis32(__m256i const & v) +{ + return _mm256_or_si256( // extend sign (v | hi-bits) + v, + _mm256_and_si256( // select hi-bits (hi-bits = msk & 0xffffffffffff0000ul) + _mm256_sub_epi64( // msk = msb - 1 + _mm256_andnot_si256( //msb = ~v & 0x80000000 (select msb) + v, + _mm256_set1_epi64x(0x80000000) + ), + _mm256_set1_epi64x(1) + ), + _mm256_set1_epi64x(static_cast(0xffffffff00000000ul)) + ) + ); +} + +// -------------------------------------------------------------------------- +// _gather (256bit) +// -------------------------------------------------------------------------- + +template +inline __m256i +seqan_mm256_i8gather_epi(TValue const * memAddr, + __m256i const & idx, + std::integral_constant const & /*scale*/) +{ + // mem: ( 0, 3, 6, 9 | 12, 15, 18, 21 | 24, 27, 30, 33 | 36, 39, 42, 45 || 48, 51, 54, 57 | 60, 63, 66, 69 | 72, 75, 78, 81 | 84, 87, 90, 93) + // idx: (31, 30, 29, 28 | 27, 26, 25, 24 | 23, 22, 21, 20 | 19, 18, 17, 16 || 15, 14, 13, 12 | 11, 10, 9, 8 | 7, 6, 5, 4 | 3, 2, 1, 0) + // pack: (93, 90, 87, 84 | 81, 78, 75, 72 | 69, 66, 63, 60 | 57, 54, 51, 48 || 45, 42, 39, 36 | 33, 30, 27, 24 | 21, 18, 15, 12 | 9, 6, 3, 0) + return _mm256_packus_epi16( + // pckLow: (93, 0, 90, 0 | 87, 0, 84, 0 | 81, 0, 78, 0 | 75, 0, 72, 0 || 45, 0, 42, 0 | 39, 0, 36, 0 | 33, 0, 30, 0 | 27, 0, 24, 0) + _mm256_packus_epi16( + // mskLL: (93, 0, 0, 0 | 90, 0, 0, 0 | 87, 0, 0, 0 | 84, 0, 0, 0 || 45, 0, 0, 0 | 42, 0, 0, 0 | 39, 0, 0, 0 | 36, 0, 0, 0) + _mm256_and_si256( + // gtrLL: (93, 31, 30, 29 | 90, 93, 31, 30 | 87, 90, 93, 31 | 84, 87, 90, 93 || 45, 48, 51, 54 | 42, 45, 48, 51 | 39, 42, 45, 48 | 36, 39, 42, 45) + _mm256_i32gather_epi32( + (const int *) memAddr, + // lowlow: (31, 0, 0, 0 | 30, 0, 0, 0 | 29, 0, 0, 0 | 28, 0, 0, 0 || 15, 0, 0, 0 | 14, 0, 0, 0 | 13, 0, 0, 0 | 12, 0, 0, 0) + _mm256_shuffle_epi8(idx, __m256i { + ~0xFF000000FFl | 0x0100000000, ~0xFF000000FFl | 0x0300000002, + ~0xFF000000FFl | 0x0100000000, ~0xFF000000FFl | 0x0300000002 + }), + SCALE + ), + _mm256_set1_epi32(0xFF) + ), + // mskLH: (81, 0, 0, 0 | 78, 0, 0, 0 | 75, 0, 0, 0 | 72, 0, 0, 0 || 33, 0, 0, 0 | 30, 0, 0, 0 | 27, 0, 0, 0 | 24, 0, 0, 0) + _mm256_and_si256( + // gtrLH: (81, 84, 87, 90 | 78, 81, 84, 87 | 75, 78, 81, 84 | 72, 75, 78, 81 || 33, 36, 39, 42 | 30, 33, 36, 39 | 27, 30, 33, 36 | 24, 27, 30, 33) + _mm256_i32gather_epi32( + (const int *) memAddr, + // lowhig: (27, 0, 0, 0 | 26, 0, 0, 0 | 25, 0, 0, 0 | 24, 0, 0, 0 || 11, 0, 0, 0 | 10, 0, 0, 0 | 9, 0, 0, 0 | 8, 0, 0, 0) + _mm256_shuffle_epi8(idx, __m256i { + ~0xFF000000FFl | 0x0500000004, ~0xFF000000FFl | 0x0700000006, + ~0xFF000000FFl | 0x0500000004, ~0xFF000000FFl | 0x0700000006 + }), + SCALE + ), + _mm256_set1_epi32(0xFF) + ) + ), + // pckHih: (69, 0, 66, 0 | 63, 0, 60, 0 | 57, 0, 54, 0 | 51, 0, 48, 0 || 21, 0, 18, 0 | 15, 0, 12, 0 | 9, 0, 6, 0 | 3, 0, 0, 0) + _mm256_packus_epi16( + // mskHL: (69, 0, 0, 0 | 66, 0, 0, 0 | 63, 0, 0, 0 | 60, 0, 0, 0 || 21, 0, 0, 0 | 18, 0, 0, 0 | 15, 0, 0, 0 | 12, 0, 0, 0) + _mm256_and_si256( + // gtrHL: (69, 72, 75, 78 | 66, 69, 72, 75 | 63, 66, 69, 72 | 60, 63, 66, 69 || 21, 24, 27, 30 | 18, 21, 24, 27 | 15, 18, 21, 24 | 12, 15, 18, 21) + _mm256_i32gather_epi32( + (const int *) memAddr, + // higlow: (23, 0, 0, 0 | 22, 0, 0, 0 | 21, 0, 0, 0 | 20, 0, 0, 0 || 7, 0, 0, 0 | 6, 0, 0, 0 | 5, 0, 0, 0 | 4, 0, 0, 0) + _mm256_shuffle_epi8(idx, __m256i { + ~0xFF000000FFl | 0x0900000008, ~0xFF000000FFl | 0x0B0000000A, + ~0xFF000000FFl | 0x0900000008, ~0xFF000000FFl | 0x0B0000000A + }), + SCALE + ), + _mm256_set1_epi32(0xFF) + ), + // mskHH: (57, 0, 0, 0 | 54, 0, 0, 0 | 51, 0, 0, 0 | 48, 0, 0, 0 || 9, 0, 0, 0 | 6, 0, 0, 0 | 3, 0, 0, 0 | 0, 0, 0, 0) + _mm256_and_si256( + // gtrHH: (57, 60, 63, 66 | 54, 57, 60, 63 | 51, 54, 57, 60 | 48, 51, 54, 57 || 9, 12, 15, 18 | 6, 9, 12, 15 | 3, 6, 9, 12 | 0, 3, 6, 9) + _mm256_i32gather_epi32( + (const int *) memAddr, + // highig: (19, 0, 0, 0 | 18, 0, 0, 0 | 17, 0, 0, 0 | 16, 0, 0, 0 || 3, 0, 0, 0 | 2, 0, 0, 0 | 1, 0, 0, 0 | 0, 0, 0, 0) + _mm256_shuffle_epi8(idx, __m256i { + ~0xFF000000FFl | 0x0D0000000C, ~0xFF000000FFl | 0x0F0000000E, + ~0xFF000000FFl | 0x0D0000000C, ~0xFF000000FFl | 0x0F0000000E + }), + SCALE + ), + _mm256_set1_epi32(0xFF) + ) + ) + ); +} + +template +inline __m256i +seqan_mm256_i16gather_epi(TValue const * memAddr, + __m256i const & idx, + std::integral_constant const & /*scale*/) +{ + using TUnsignedValue = typename MakeUnsigned::Type; + + // The cast makes sure that the max value of TValue = (u)int64_t and + // (u)int32_t will be max value of int16_t (i.e. `~0` in int16_t), because + // the resulting __m256i can only hold int16_t values. + // + // NOTE(marehr): the masking is only needed for TValue = (u)int8_t and + // (u)int16_t. It could be omitted if _mm256_packus_epi32 would be exchanged + // by _mm256_packs_epi32, because for (u)int32_t and (u)int64_t the masking + // operations are basically the identity function. + constexpr int const mask = static_cast(MaxValue::VALUE); + + // 1. Unpack low idx values and interleave with 0 and gather from memAddr. + // 2. Unpack high idx values and interleave with 0, than gather from memAddr. + // 3. Merge 2 8x32 vectors into 1x16 vector by signed saturation. This operation reverts the interleave by the unpack operations above. + // + // The following is an example for SimdVector idx and uint16_t + // const * memAddr: + // mem: ( 0, 0, 3, 0 | 6, 0, 9, 0 | 12, 0, 15, 0 | 18, 0, 21, 0 || 24, 0, 27, 0 | 30, 0, 33, 0 | 36, 0, 39, 0 | 42, 0, 45, 0) + // idx: (15, 0, 14, 0 | 13, 0, 12, 0 | 11, 0, 10, 0 | 9, 0, 8, 0 || 7, 0, 6, 0 | 5, 0, 4, 0 | 3, 0, 2, 0 | 1, 0, 0, 0) + // pack: (45, 0, 42, 0 | 39, 0, 36, 0 | 33, 0, 30, 0 | 27, 0, 24, 0 || 21, 0, 18, 0 | 15, 0, 12, 0 | 9, 0, 6, 0 | 3, 0, 0, 0) + return _mm256_packus_epi32( + // mskLow: (45, 0, 0, 0 | 42, 0, 0, 0 | 39, 0, 0, 0 | 36, 0, 0, 0 || 21, 0, 0, 0 | 18, 0, 0, 0 | 15, 0, 0, 0 | 12, 0, 0, 0) + _mm256_and_si256( + // gtrLow: (45, 0, 15, 0 | 42, 0, 45, 0 | 39, 0, 42, 0 | 36, 0, 39, 0 || 21, 0, 24, 0 | 18, 0, 21, 0 | 15, 0, 18, 0 | 12, 0, 15, 0) + _mm256_i32gather_epi32( + (const int *) memAddr, + // low: (15, 0, 0, 0 | 14, 0, 0, 0 | 13, 0, 0, 0 | 12, 0, 0, 0 || 7, 0, 0, 0 | 6, 0, 0, 0 | 5, 0, 0, 0 | 4, 0, 0, 0) + _mm256_unpacklo_epi16( + idx, _mm256_set1_epi16(0) + ), + SCALE + ), + _mm256_set1_epi32(mask) + ), + // mskHih: (33, 0, 0, 0 | 30, 0, 0, 0 | 27, 0, 0, 0 | 24, 0, 0, 0 || 9, 0, 0, 0 | 6, 0, 0, 0 | 3, 0, 0, 0 | 0, 0, 0, 0) + _mm256_and_si256( + // gtrHih: (33, 0, 36, 0 | 30, 0, 33, 0 | 27, 0, 30, 0 | 24, 0, 27, 0 || 9, 0, 12, 0 | 6, 0, 9, 0 | 3, 0, 6, 0 | 0, 0, 3, 0) + _mm256_i32gather_epi32( + (const int *) memAddr, + // high: (11, 0, 0, 0 | 10, 0, 0, 0 | 9, 0, 0, 0 | 8, 0, 0, 0 || 3, 0, 0, 0 | 2, 0, 0, 0 | 1, 0, 0, 0 | 0, 0, 0, 0) + _mm256_unpackhi_epi16( + idx, _mm256_set1_epi16(0) + ), + SCALE + ), + _mm256_set1_epi32(mask) + ) + ); +} + +template +inline __m256i +seqan_mm256_i32gather_epi(TValue const * memAddr, + __m256i const & idx, + std::integral_constant const & /*scale*/) +{ + using TUnsignedValue = typename MakeUnsigned::Type; + constexpr auto const mask = static_cast(MaxValue::VALUE); + + return _mm256_and_si256( + _mm256_i32gather_epi32((const int *) memAddr, idx, SCALE), + _mm256_set1_epi32(mask) + ); +} + +template +inline __m256i +seqan_mm256_i64gather_epi(TValue const * memAddr, + __m256i const & idx, + std::integral_constant const & /*scale*/) +{ + using TUnsignedValue = typename MakeUnsigned::Type; + constexpr auto const mask = static_cast(MaxValue::VALUE); + + return _mm256_and_si256( + _mm256_i64gather_epi64((const long long *) memAddr, idx, SCALE), + _mm256_set1_epi64x(mask) + ); +} + +template +inline TSimdVector +_gather(TValue const * memAddr, + TSimdVector const & idx, + std::integral_constant const & scale, + SimdParams_<32, 32>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + seqan_mm256_i8gather_epi( + memAddr, + SEQAN_VECTOR_CAST_(__m256i const &, idx), + scale + ) + ); +} + +template +inline TSimdVector +_gather(int8_t const * memAddr, + TSimdVector const & idx, + std::integral_constant const & scale, + SimdParams_<32, 16>) +{ + // Note that memAddr is a signed integer type, thus a cast would extend the + // sign. E.g., -3 = 253 in 8 bit, but would be 65533 in 16 bit. + // Use _gather(uint8_t) and extend the sign to [u]int16_t. + return SEQAN_VECTOR_CAST_( + TSimdVector, + seqan_mm256_i16sign_extend_epis8( + seqan_mm256_i16gather_epi( + memAddr, + SEQAN_VECTOR_CAST_(__m256i const &, idx), + scale + ) + ) + ); +} + +template +inline TSimdVector +_gather(TValue const * memAddr, + TSimdVector const & idx, + std::integral_constant const & scale, + SimdParams_<32, 16>) +{ + return SEQAN_VECTOR_CAST_( + TSimdVector, + seqan_mm256_i16gather_epi( + memAddr, + SEQAN_VECTOR_CAST_(__m256i const &, idx), + scale + ) + ); +} + +template +inline TSimdVector +_gather(int8_t const * memAddr, + TSimdVector const & idx, + std::integral_constant const & scale, + SimdParams_<32, 8>) +{ + // Note that memAddr is a signed integer type, thus a cast would extend the + // sign. + return SEQAN_VECTOR_CAST_( + TSimdVector, + seqan_mm256_i32sign_extend_epis8( + seqan_mm256_i32gather_epi( + memAddr, + SEQAN_VECTOR_CAST_(__m256i const &, idx), + scale + ) + ) + ); +} + +template +inline TSimdVector +_gather(int16_t const * memAddr, + TSimdVector const & idx, + std::integral_constant const & scale, + SimdParams_<32, 8>) +{ + // Note that memAddr is a signed integer type, thus a cast would extend the + // sign. + return SEQAN_VECTOR_CAST_( + TSimdVector, + seqan_mm256_i32sign_extend_epis16( + seqan_mm256_i32gather_epi( + memAddr, + SEQAN_VECTOR_CAST_(__m256i const &, idx), + scale + ) + ) + ); +} + +template +inline TSimdVector +_gather(TValue const * memAddr, + TSimdVector const & idx, + std::integral_constant const & scale, + SimdParams_<32, 8>) +{ + return SEQAN_VECTOR_CAST_( + TSimdVector, + seqan_mm256_i32gather_epi( + memAddr, + SEQAN_VECTOR_CAST_(__m256i const &, idx), + scale + ) + ); +} + +template +inline TSimdVector +_gather(int8_t const * memAddr, + TSimdVector const & idx, + std::integral_constant const & scale, + SimdParams_<32, 4>) +{ + return SEQAN_VECTOR_CAST_( + TSimdVector, + seqan_mm256_i64sign_extend_epis8( + seqan_mm256_i64gather_epi( + memAddr, + SEQAN_VECTOR_CAST_(__m256i const &, idx), + scale + ) + ) + ); +} + +template +inline TSimdVector +_gather(int16_t const * memAddr, + TSimdVector const & idx, + std::integral_constant const & scale, + SimdParams_<32, 4>) +{ + return SEQAN_VECTOR_CAST_( + TSimdVector, + seqan_mm256_i64sign_extend_epis16( + seqan_mm256_i64gather_epi( + memAddr, + SEQAN_VECTOR_CAST_(__m256i const &, idx), + scale + ) + ) + ); +} + +template +inline TSimdVector +_gather(int32_t const * memAddr, + TSimdVector const & idx, + std::integral_constant const & scale, + SimdParams_<32, 4>) +{ + return SEQAN_VECTOR_CAST_( + TSimdVector, + seqan_mm256_i64sign_extend_epis32( + seqan_mm256_i64gather_epi( + memAddr, + SEQAN_VECTOR_CAST_(__m256i const &, idx), + scale + ) + ) + ); +} + +template +inline TSimdVector +_gather(TValue const * memAddr, + TSimdVector const & idx, + std::integral_constant const & scale, + SimdParams_<32, 4>) +{ + return SEQAN_VECTOR_CAST_( + TSimdVector, + seqan_mm256_i64gather_epi( + memAddr, + SEQAN_VECTOR_CAST_(__m256i const &, idx), + scale + ) + ); +} + +// -------------------------------------------------------------------------- +// _shuffleVector (256bit) +// -------------------------------------------------------------------------- + +inline __m256i +seqan_m256_shuffle_epi8(__m256i const & vector, __m256i const & indices) +{ + return _mm256_xor_si256( + // shuffle bytes from the lower bytes of vector + _mm256_shuffle_epi8( + // repeat twice the low bytes of vector in a new __m256i vector i.e. + // vh[127:0] = v[127:0] + // vh[255:128] = v[127:0] + _mm256_broadcastsi128_si256( + _mm256_extracti128_si256(vector, 0) + ), + // ((indices[i] << 3) & 0b1000 0000) ^ indices[i]: + // Adds the 5th bit of indices[i] as most significant bit. If the + // 5th bit is set, that means that indices[i] >= 16. + // r = _mm256_shuffle_epi8(vl, indices) will set r[i] = 0 if the + // most significant bit of indices[i] is 1. Since this bit is the + // 5th bit, r[i] = 0 if indices[i] >= 16 and r[i] = vl[indices[i]] + // if indices[i] < 16. + _mm256_xor_si256( + _mm256_and_si256( + _mm256_slli_epi16(indices, 3), + _mm256_set1_epi8(-127) // 0b1000 0000 + ), + indices + ) + ), + // shuffle bytes from the higher bytes of vector + _mm256_shuffle_epi8( + // repeat twice the higher bytes of vector in a new __m256i vector + // i.e. + // vh[127:0] = v[255:128] + // vh[255:128] = v[255:128] + _mm256_broadcastsi128_si256( + _mm256_extracti128_si256(vector, 1) + ), + // indices[i] - 16: + // r = _mm256_shuffle_epi8(vh, indices) + // will return r[i] = 0 if the most significant bit of the byte + // indices[i] is 1. Thus, indices[i] - 16 will select all high + // bytes in vh, i.e. r[i] = vh[indices[i] - 16], if indices[i] >= + // 16 and r[i] = 0 if indices[i] < 16. + _mm256_sub_epi8( + indices, + _mm256_set1_epi8(16) + ) + ) + ); +} + +inline __m256i +seqan_m256_shuffle_epi16(const __m256i a, const __m256i b) +{ + // multiply by 2 + __m256i idx = _mm256_slli_epi16( + _mm256_permute4x64_epi64(b, 0b01010000), + 1 + ); + // _print(_mm256_add_epi8(idx, _mm256_set1_epi8(1))); + // _print( _mm256_unpacklo_epi8( + // idx, + // _mm256_add_epi8(idx, _mm256_set1_epi8(1)) + // )); + return seqan_m256_shuffle_epi8( + a, + // interleave idx[15:0] = 2*indices[15], ..., 2*indices[0] + // with idx[15:0]+1 = 2*indices[15]+1, ..., 2*indices[0]+1 + // => 2*indices[15]+1, 2*indices[15], ..., 2*indices[0]+1, 2*indices[0] + _mm256_unpacklo_epi8( + idx, + _mm256_add_epi8(idx, _mm256_set1_epi8(1)) + ) + ); +} + +inline __m256i +seqan_m256_shuffle_epi32(const __m256i a, const __m256i b) +{ + // multiply by 4 + __m256i idx = _mm256_slli_epi16( + _mm256_permutevar8x32_epi32(b, __m256i {0x0, 0x0, 0x1, 0x0}), + 2 + ); + return seqan_m256_shuffle_epi8( + a, + // interleave 4*indices[7]+1, 4*indices[7]+0; ..., 4*indices[0]+1, 4*indices[0]+0 + // with 4*indices[7]+3, 4*indices[7]+2; ..., 4*indices[0]+3, 4*indices[0]+2 + // => 4*indices[7]+3, 4*indices[7]+2; 4*indices[7]+1, 4*indices[7]+0; + // ... + // 4*indices[0]+3, 4*indices[0]+2; 4*indices[0]+1, 4*indices[0]+0 + _mm256_unpacklo_epi16( + // interleave idx[7:0]+0 = 4*indices[7]+0; ...; 4*indices[0]+0 + // with idx[7:0]+1 = 4*indices[7]+1; ...; 4*indices[0]+1 + // => 4*indices[7]+1; 4*indices[7]+0; ...; 4*indices[0]+1; 4*indices[0]+0 + _mm256_unpacklo_epi8( + idx, + _mm256_add_epi8(idx, _mm256_set1_epi8(1)) + ), + // interleave idx[7:0]+2 = 4*indices[7]+2; ...; 4*indices[0]+2 + // with idx[7:0]+3 = 4*indices[7]+3; ...; 4*indices[0]+3 + // => 4*indices[7]+3; 4*indices[7]+2; ...; 4*indices[0]+3; 4*indices[0]+2 + _mm256_unpacklo_epi8( + _mm256_add_epi8(idx, _mm256_set1_epi8(2)), + _mm256_add_epi8(idx, _mm256_set1_epi8(3)) + ) + )); +} + +#define seqan_mm256_set_m128i(v0, v1) _mm256_insertf128_si256(_mm256_castsi128_si256(v1), (v0), 1) + +inline __m256i +seqan_m256_shuffle_epi64(const __m256i a, const __m256i b) +{ + __m128i lowidx = _mm256_extracti128_si256( + // multiply by 8 + _mm256_slli_epi16(b, 3), + 0 + ); + + __m256i idx = seqan_mm256_set_m128i( + _mm_srli_si128(lowidx, 2), + lowidx + ); + + return seqan_m256_shuffle_epi8( + a, + _mm256_unpacklo_epi32( + // interleave 8*indices[3]+1, 8*indices[3]+0; ..., 8*indices[0]+1, 8*indices[0]+0 + // with 8*indices[3]+3, 8*indices[3]+2; ..., 8*indices[0]+3, 8*indices[0]+2 + // => 8*indices[3]+3, 8*indices[3]+2; 8*indices[3]+1, 8*indices[3]+0; + // ... + // 8*indices[0]+3, 8*indices[0]+2; 8*indices[0]+1, 8*indices[0]+0 + _mm256_unpacklo_epi16( + // interleave idx[3:0]+0 = 8*indices[3]+0; ...; 8*indices[0]+0 + // with idx[3:0]+1 = 8*indices[3]+1; ...; 8*indices[0]+1 + // => 8*indices[3]+1; 8*indices[3]+0; ...; 8*indices[0]+1; 8*indices[0]+0 + _mm256_unpacklo_epi8( + idx, + _mm256_add_epi8(idx, _mm256_set1_epi8(1)) + ), + // interleave idx[3:0]+2 = 8*indices[3]+2; ...; 8*indices[0]+2 + // with idx[3:0]+3 = 8*indices[3]+3; ...; 8*indices[0]+3 + // => 8*indices[3]+3; 8*indices[3]+2; ...; 8*indices[0]+3; 8*indices[0]+2 + _mm256_unpacklo_epi8( + _mm256_add_epi8(idx, _mm256_set1_epi8(2)), + _mm256_add_epi8(idx, _mm256_set1_epi8(3)) + ) + ), + // interleave 8*indices[3]+5, 8*indices[3]+4; ..., 8*indices[0]+5, 8*indices[0]+4 + // with 8*indices[3]+7, 8*indices[3]+6; ..., 8*indices[0]+7, 8*indices[0]+6 + // => 8*indices[3]+7, 8*indices[3]+6; 8*indices[3]+5, 8*indices[3]+4; + // ... + // 8*indices[0]+7, 8*indices[0]+6; 8*indices[0]+5, 8*indices[0]+4 + _mm256_unpacklo_epi16( + // interleave idx[3:0]+4 = 8*indices[3]+4; ...; 8*indices[0]+4 + // with idx[3:0]+5 = 8*indices[3]+5; ...; 8*indices[0]+5 + // => 8*indices[3]+5; 8*indices[3]+4; ...; 8*indices[0]+5; 8*indices[0]+4 + _mm256_unpacklo_epi8( + _mm256_add_epi8(idx, _mm256_set1_epi8(4)), + _mm256_add_epi8(idx, _mm256_set1_epi8(5)) + ), + // interleave idx[3:0]+6 = 8*indices[3]+6; ...; 8*indices[0]+6 + // with idx[3:0]+7 = 8*indices[3]+7; ...; 8*indices[0]+7 + // => 8*indices[3]+7; 8*indices[3]+6; ...; 8*indices[0]+7; 8*indices[0]+6 + _mm256_unpacklo_epi8( + _mm256_add_epi8(idx, _mm256_set1_epi8(6)), + _mm256_add_epi8(idx, _mm256_set1_epi8(7)) + ) + ) + ) + ); +} + +template +inline TSimdVector1 +_shuffleVector(TSimdVector1 const & vector, TSimdVector2 const & indices, SimdParams_<32, 16>, SimdParams_<16, 16>) +{ + // copy 2nd 64bit word to 3rd, compute 2*idx + __m256i idx = _mm256_slli_epi16(_mm256_permute4x64_epi64(_mm256_castsi128_si256(SEQAN_VECTOR_CAST_(const __m128i &, indices)), 0x50), 1); + + // interleave with 2*idx+1 and call shuffle + return SEQAN_VECTOR_CAST_(TSimdVector1, + _mm256_shuffle_epi8( + SEQAN_VECTOR_CAST_(const __m256i &, vector), + _mm256_unpacklo_epi8( + idx, + _mm256_add_epi8( + idx, _mm256_set1_epi8(1) + ) + ) + ) + ); +} + +template +inline TSimdVector1 +_shuffleVector(TSimdVector1 const & vector, TSimdVector2 const & indices, SimdParams_<32, 32>, SimdParams_<32, 32>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector1, seqan_m256_shuffle_epi8( + SEQAN_VECTOR_CAST_(const __m256i &, vector), + SEQAN_VECTOR_CAST_(const __m256i &, indices) + )); +} + +template +inline TSimdVector1 +_shuffleVector(TSimdVector1 const & vector, TSimdVector2 const & indices, SimdParams_<32, 16>, SimdParams_<32, 32>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector1, seqan_m256_shuffle_epi16( + SEQAN_VECTOR_CAST_(const __m256i &, vector), + SEQAN_VECTOR_CAST_(const __m256i &, indices) + )); +} + +template +inline TSimdVector1 +_shuffleVector(TSimdVector1 const & vector, TSimdVector2 const & indices, SimdParams_<32, 8>, SimdParams_<32, 32>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector1, seqan_m256_shuffle_epi32( + SEQAN_VECTOR_CAST_(const __m256i &, vector), + SEQAN_VECTOR_CAST_(const __m256i &, indices) + )); +} + +template +inline TSimdVector1 +_shuffleVector(TSimdVector1 const & vector, TSimdVector2 const & indices, SimdParams_<32, 4>, SimdParams_<32, 32>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector1, seqan_m256_shuffle_epi64( + SEQAN_VECTOR_CAST_(const __m256i &, vector), + SEQAN_VECTOR_CAST_(const __m256i &, indices) + )); +} + +// -------------------------------------------------------------------------- +// _transposeMatrix (256bit) +// -------------------------------------------------------------------------- + +// emulate missing _mm256_unpacklo_epi128/_mm256_unpackhi_epi128 instructions +inline __m256i _mm256_unpacklo_epi128(__m256i const & a, __m256i const & b) +{ + return _mm256_permute2x128_si256(a, b, 0x20); +// return _mm256_inserti128_si256(a, _mm256_extracti128_si256(b, 0), 1); +} + +inline __m256i _mm256_unpackhi_epi128(__m256i const & a, __m256i const & b) +{ + return _mm256_permute2x128_si256(a, b, 0x31); +// return _mm256_inserti128_si256(b, _mm256_extracti128_si256(a, 1), 0); +} + +template +inline void +_transposeMatrix(TSimdVector matrix[], SimdMatrixParams_<32, 32, 8>) +{ + // we need a look-up table to reverse the lowest 4 bits + // in order to place the permute the transposed rows + static const unsigned char bitRev[] = { 0, 8, 4,12, 2,10, 6,14, 1, 9, 5,13, 3,11, 7,15, + 16,24,20,28,18,26,22,30,17,25,21,29,19,27,23,31}; + + // transpose a 32x32 byte matrix + __m256i tmp1[32]; + for (int i = 0; i < 16; ++i) + { + tmp1[i] = _mm256_unpacklo_epi8( + SEQAN_VECTOR_CAST_(const __m256i &, matrix[2*i]), + SEQAN_VECTOR_CAST_(const __m256i &, matrix[2*i+1]) + ); + tmp1[i+16] = _mm256_unpackhi_epi8( + SEQAN_VECTOR_CAST_(const __m256i &, matrix[2*i]), + SEQAN_VECTOR_CAST_(const __m256i &, matrix[2*i+1]) + ); + } + __m256i tmp2[32]; + for (int i = 0; i < 16; ++i) + { + tmp2[i] = _mm256_unpacklo_epi16(tmp1[2*i], tmp1[2*i+1]); + tmp2[i+16] = _mm256_unpackhi_epi16(tmp1[2*i], tmp1[2*i+1]); + } + for (int i = 0; i < 16; ++i) + { + tmp1[i] = _mm256_unpacklo_epi32(tmp2[2*i], tmp2[2*i+1]); + tmp1[i+16] = _mm256_unpackhi_epi32(tmp2[2*i], tmp2[2*i+1]); + } + for (int i = 0; i < 16; ++i) + { + tmp2[i] = _mm256_unpacklo_epi64(tmp1[2*i], tmp1[2*i+1]); + tmp2[i+16] = _mm256_unpackhi_epi64(tmp1[2*i], tmp1[2*i+1]); + } + for (int i = 0; i < 16; ++i) + { + matrix[bitRev[i]] = SEQAN_VECTOR_CAST_(TSimdVector, _mm256_unpacklo_epi128(tmp2[2*i],tmp2[2*i+1])); + matrix[bitRev[i+16]] = SEQAN_VECTOR_CAST_(TSimdVector, _mm256_unpackhi_epi128(tmp2[2*i],tmp2[2*i+1])); + } +} + +// -------------------------------------------------------------------------- +// Function _testAllZeros (256bit) +// -------------------------------------------------------------------------- + +template +SEQAN_FUNC_ENABLE_IF(Is >, int) +inline _testAllZeros(TSimdVector const & vector, TSimdVector const & mask, SimdParams_<32>) +{ + return _mm256_testz_si256(SEQAN_VECTOR_CAST_(const __m256i &, vector), + SEQAN_VECTOR_CAST_(const __m256i &, mask)); +} + +// -------------------------------------------------------------------------- +// Function _testAllOnes (256bit) +// -------------------------------------------------------------------------- + +template +inline int _testAllOnes(TSimdVector const & vector, SimdParams_<32>) +{ + __m256i vec = SEQAN_VECTOR_CAST_(const __m256i &, vector); + return _mm256_testc_si256(vec, _mm256_cmpeq_epi32(vec, vec)); +} + +} // namespace seqan + +#endif // SEQAN_INCLUDE_SEQAN_SIMD_SIMD_BASE_SEQAN_IMPL_AVX2_H_ diff --git a/porechop/include/seqan/simd/simd_base_seqan_impl_avx512.h b/porechop/include/seqan/simd/simd_base_seqan_impl_avx512.h new file mode 100644 index 0000000..b0ab6c3 --- /dev/null +++ b/porechop/include/seqan/simd/simd_base_seqan_impl_avx512.h @@ -0,0 +1,284 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2018, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: Marcel Ehrhardt +// ========================================================================== +// generic SIMD interface for AVX512 +// ========================================================================== + +#ifndef SEQAN_INCLUDE_SEQAN_SIMD_SIMD_BASE_SEQAN_IMPL_AVX512_H_ +#define SEQAN_INCLUDE_SEQAN_SIMD_SIMD_BASE_SEQAN_IMPL_AVX512_H_ + +namespace seqan { + +// SimdParams_<64, 64>: 512bit = 64 elements * 8bit +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector64Char, char, 64) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector64SChar, signed char, 64) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector64UChar, unsigned char, 64) + +// SimdParams_<64, 32>: 512bit = 32 elements * 2 * 8bit +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector32Short, short, 64) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector32UShort, unsigned short, 64) + +// SimdParams_<64, 16>: 512bit = 16 elements * 4 * 8bit +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector16Int, int, 64) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector16UInt, unsigned int, 64) + +// SimdParams_<64, 8>: 512bit = 8 elements * 8 * 8bit +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector8Int64, int64_t, 64) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector8UInt64, uint64_t, 64) + +// ============================================================================ +// Functions +// ============================================================================ + +// ============================================================================ +// AVX512 wrappers (512bit vectors) +// ============================================================================ + +// -------------------------------------------------------------------------- +// _fillVector (512bit) +// -------------------------------------------------------------------------- + +template +inline void +_fillVector(TSimdVector & vector, + std::tuple const & x, + std::index_sequence<0> const &, + SimdParams_<64, L>) +{ + vector = createVector(std::get<0>(x)); +} + +template +inline void +_fillVector(TSimdVector & vector, + std::tuple const & args, + std::index_sequence const &, + SimdParams_<64, L>) +{ + using TSimdValue = typename Value::Type; + vector = TSimdVector{static_cast(std::get(args))...}; +} + +// -------------------------------------------------------------------------- +// _clearVector (512bit) +// -------------------------------------------------------------------------- + +template +inline void _clearVector(TSimdVector & vector, SimdParams_<64, L>) +{ + vector = TSimdVector{}; +} + +// -------------------------------------------------------------------------- +// _createVector (512bit) +// -------------------------------------------------------------------------- + +template +inline TSimdVector _createVector(TValue const x, SimdParams_<64, L>) +{ + using TValue_ = typename Value::Type; + return TSimdVector{} + static_cast(x); +} + +// -------------------------------------------------------------------------- +// _cmpEq (512bit) +// -------------------------------------------------------------------------- + +template +inline TSimdVector _cmpEq(TSimdVector & a, TSimdVector & b, SimdParams_<64, L>) +{ + return a == b; +} + +// bad auto-vectorization for gcc +#ifndef __AVX512BW__ +template +inline TSimdVector _cmpEq(TSimdVector const & a, TSimdVector const & b, SimdParams_<64, 32>) +{ + auto aLow = _mm512_extracti64x4_epi64(SEQAN_VECTOR_CAST_(const __m512i&, a), 0); + auto bLow = _mm512_extracti64x4_epi64(SEQAN_VECTOR_CAST_(const __m512i&, b), 0); + auto cmpLow = _mm256_cmpeq_epi16(aLow, bLow); + + auto aHigh = _mm512_extracti64x4_epi64(SEQAN_VECTOR_CAST_(const __m512i&, a), 1); + auto bHigh = _mm512_extracti64x4_epi64(SEQAN_VECTOR_CAST_(const __m512i&, b), 1); + auto cmpHigh = _mm256_cmpeq_epi16(aHigh, bHigh); + + auto result = _mm512_broadcast_i64x4(cmpLow); + result = _mm512_inserti64x4(result, cmpHigh, 1); + return SEQAN_VECTOR_CAST_(TSimdVector, result); +} +#endif + +// -------------------------------------------------------------------------- +// _cmpGt (512bit) +// -------------------------------------------------------------------------- + +template +inline TSimdVector _cmpGt(TSimdVector & a, TSimdVector & b, SimdParams_<64, L, TValue>) +{ + return a > b; +} + +// -------------------------------------------------------------------------- +// _bitwiseAndNot (512bit) +// -------------------------------------------------------------------------- + +template +inline TSimdVector _bitwiseAndNot(TSimdVector & a, TSimdVector & b, SimdParams_<64, L>) +{ + return (~a & b); +} + +// -------------------------------------------------------------------------- +// _max (512bit) +// -------------------------------------------------------------------------- + +template +inline TSimdVector _max(TSimdVector & a, TSimdVector & b, SimdParams_<64, L, TValue>) +{ + return a > b ? a : b; +} + +// -------------------------------------------------------------------------- +// _min (512bit) +// -------------------------------------------------------------------------- + +template +inline TSimdVector _min(TSimdVector & a, TSimdVector & b, SimdParams_<64, L, TValue>) +{ + return a < b ? a : b; +} + +// -------------------------------------------------------------------------- +// _blend (512bit) +// -------------------------------------------------------------------------- + +template +inline TSimdVector _blend(TSimdVector const & a, TSimdVector const & b, TSimdVectorMask const & mask, SimdParams_<64, L>) +{ + return mask ? b : a; +} + +// bad auto-vectorization for gcc +#ifndef __AVX512BW__ +template +inline TSimdVector _blend(TSimdVector const & a, TSimdVector const & b, TSimdVectorMask const & mask, SimdParams_<64, 32>) +{ + auto aLow = _mm512_extracti64x4_epi64(SEQAN_VECTOR_CAST_(const __m512i&, a), 0); + auto bLow = _mm512_extracti64x4_epi64(SEQAN_VECTOR_CAST_(const __m512i&, b), 0); + auto maskLow = _mm512_extracti64x4_epi64(SEQAN_VECTOR_CAST_(const __m512i&, mask), 0); + auto blendLow = _mm256_blendv_epi8(aLow, bLow, maskLow); + + auto aHigh = _mm512_extracti64x4_epi64(SEQAN_VECTOR_CAST_(const __m512i&, a), 1); + auto bHigh = _mm512_extracti64x4_epi64(SEQAN_VECTOR_CAST_(const __m512i&, b), 1); + auto maskHigh = _mm512_extracti64x4_epi64(SEQAN_VECTOR_CAST_(const __m512i&, mask), 1); + auto blendHigh = _mm256_blendv_epi8(aHigh, bHigh, maskHigh); + + auto result = _mm512_broadcast_i64x4(blendLow); + result = _mm512_inserti64x4(result, blendHigh, 1); + return SEQAN_VECTOR_CAST_(TSimdVector, result); +} +#endif + +// -------------------------------------------------------------------------- +// _storeu (512bit) +// -------------------------------------------------------------------------- + +template +inline void _storeu(T * memAddr, TSimdVector & vec, SimdParams_<64, L>) +{ + constexpr auto length = LENGTH::VALUE; + for (unsigned i = 0; i < length; i++) + memAddr[i] = vec[i]; +} + +// ---------------------------------------------------------------------------- +// Function _load() 512bit +// ---------------------------------------------------------------------------- + +template +inline TSimdVector _load(T const * memAddr, SimdParams_<64, L>) +{ + constexpr auto length = LENGTH::VALUE; + TSimdVector result; + for (unsigned i = 0; i < length; i++) + result[i] = memAddr[i]; + return result; +} + +// -------------------------------------------------------------------------- +// _shiftRightLogical (512bit) +// -------------------------------------------------------------------------- + +template +inline TSimdVector _shiftRightLogical(TSimdVector const & vector, const int imm, SimdParams_<64, L>) +{ + return vector >> imm; +} + +// -------------------------------------------------------------------------- +// _gather (512bit) +// -------------------------------------------------------------------------- + +template +inline TSimdVector +_gather(TValue const * memAddr, + TSimdVector const & idx, + std::integral_constant const & /*scale*/, + SimdParams_<64, L>) +{ + constexpr auto length = LENGTH::VALUE; + TSimdVector result; + for (unsigned i = 0; i < length; i++) + result[i] = memAddr[idx[i]]; + return result; +} + +// -------------------------------------------------------------------------- +// _shuffleVector (512bit) +// -------------------------------------------------------------------------- + +template +inline TSimdVector1 +_shuffleVector(TSimdVector1 const & vector, TSimdVector2 const & indices, SimdParams_<64, L>, SimdParams_<64, 64>) +{ + constexpr auto length = seqan::LENGTH::VALUE; + TSimdVector1 result{}; + for(unsigned i = 0u; i < length; ++i) + result[i] = vector[indices[i]]; + return result; +} + +} // namespace seqan + +#endif // SEQAN_INCLUDE_SEQAN_SIMD_SIMD_BASE_SEQAN_IMPL_AVX512_H_ diff --git a/porechop/include/seqan/simd/simd_base_seqan_impl_sse4.2.h b/porechop/include/seqan/simd/simd_base_seqan_impl_sse4.2.h new file mode 100644 index 0000000..03b86ed --- /dev/null +++ b/porechop/include/seqan/simd/simd_base_seqan_impl_sse4.2.h @@ -0,0 +1,1053 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2018, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: David Weese +// René Rahn +// Stefan Budach +// ========================================================================== +// generic SIMD interface for SSE3 / AVX2 +// ========================================================================== + +#ifndef SEQAN_INCLUDE_SEQAN_SIMD_SIMD_BASE_SEQAN_IMPL_SSE4_2_H_ +#define SEQAN_INCLUDE_SEQAN_SIMD_SIMD_BASE_SEQAN_IMPL_SSE4_2_H_ + +namespace seqan { + +// SimdParams_<8, 8>: 64bit = 8 elements * 8bit +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector8Char, char, 8) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector8SChar, signed char, 8) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector8UChar, unsigned char, 8) + +// SimdParams_<8, 4>: 64bit = 4 elements * 2 * 8bit +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector4Short, short, 8) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector4UShort, unsigned short, 8) + +// SimdParams_<8, 2>: 64bit = 2 elements * 4 * 8bit +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector2Int, int, 8) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector2UInt, unsigned int, 8) + +// SimdParams_<16, 16>: 128bit = 16 elements * 8bit +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector16Char, char, 16) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector16SChar, signed char, 16) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector16UChar, unsigned char, 16) + +// SimdParams_<16, 8>: 128bit = 8 elements * 2 * 8bit +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector8Short, short, 16) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector8UShort, unsigned short, 16) + +// SimdParams_<16, 4>: 128bit = 4 elements * 4 * 8bit +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector4Int, int, 16) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector4UInt, unsigned int, 16) + +// SimdParams_<16, 2>: 128bit = 2 elements * 8 * 8bit +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector2Int64, int64_t, 16) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector2UInt64, uint64_t, 16) + +// ============================================================================ +// Functions +// ============================================================================ + +// -------------------------------------------------------------------------- +// _fillVector (128bit) +// -------------------------------------------------------------------------- + +template +inline void +_fillVector(TSimdVector & vector, + std::tuple const & x, + std::index_sequence<0> const &, + SimdParams_<16, 16> const &) +{ + vector = SEQAN_VECTOR_CAST_(TSimdVector, _mm_set1_epi8(std::get<0>(x))); +} + +template +inline void +_fillVector(TSimdVector & vector, + std::tuple const & x, + std::index_sequence<0> const &, + SimdParams_<16, 8> const &) +{ + vector = SEQAN_VECTOR_CAST_(TSimdVector, _mm_set1_epi16(std::get<0>(x))); +} + +template +inline void +_fillVector(TSimdVector & vector, + std::tuple const & x, + std::index_sequence<0> const &, + SimdParams_<16, 4> const &) +{ + vector = SEQAN_VECTOR_CAST_(TSimdVector, _mm_set1_epi32(std::get<0>(x))); +} + +template +inline void +_fillVector(TSimdVector & vector, + std::tuple const & x, + std::index_sequence<0> const &, + SimdParams_<16, 2> const &) +{ + vector = SEQAN_VECTOR_CAST_(TSimdVector, _mm_set1_epi64x(std::get<0>(x))); +} + +template +inline void +_fillVector(TSimdVector & vector, + std::tuple const & args, + std::index_sequence const &, + SimdParams_<16, 16> const &) +{ + vector = SEQAN_VECTOR_CAST_(TSimdVector, _mm_setr_epi8(std::get(args)...)); +} + +template +inline void +_fillVector(TSimdVector & vector, + std::tuple const & args, + std::index_sequence const &, + SimdParams_<16, 8> const &) +{ + vector = SEQAN_VECTOR_CAST_(TSimdVector, _mm_setr_epi16(std::get(args)...)); +} + +template +inline void +_fillVector(TSimdVector & vector, + std::tuple const & args, + std::index_sequence const &, + SimdParams_<16, 4> const &) +{ + vector = SEQAN_VECTOR_CAST_(TSimdVector, _mm_setr_epi32(std::get(args)...)); +} + +template +inline void +_fillVector(TSimdVector & vector, + std::tuple const & args, + std::index_sequence const &, + SimdParams_<16, 2> const &) +{ + // reverse argument list 0, 1 -> 1, 0 + // NOTE(marehr): Intel linux fails to reverse argument list and only + // _mm_set_epi64x has no reverse equivalent + // NOTE(rrahn): For g++-4.9 the set_epi function is a macro, which does not work with parameter pack expansion. + vector = SEQAN_VECTOR_CAST_(TSimdVector, _mm_set_epi64x(std::get(args)...)); +} + +// -------------------------------------------------------------------------- +// _clearVector (128bit) +// -------------------------------------------------------------------------- + +template +inline void _clearVector(TSimdVector & vector, SimdParams_<16, L>) +{ + vector = SEQAN_VECTOR_CAST_(TSimdVector, _mm_setzero_si128()); +} + +// -------------------------------------------------------------------------- +// _createVector (128bit) +// -------------------------------------------------------------------------- + +template +inline TSimdVector _createVector(TValue const x, SimdParams_<16, 16>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, _mm_set1_epi8(x)); +} + +template +inline TSimdVector _createVector(TValue const x, SimdParams_<16, 8>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, _mm_set1_epi16(x)); +} + +template +inline TSimdVector _createVector(TValue const x, SimdParams_<16, 4>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, _mm_set1_epi32(x)); +} + +template +inline TSimdVector _createVector(TValue const x, SimdParams_<16, 2>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, _mm_set1_epi64x(x)); +} + +// -------------------------------------------------------------------------- +// cmpEq (128bit) +// -------------------------------------------------------------------------- + +template +inline TSimdVector _cmpEq(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 16>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_cmpeq_epi8(SEQAN_VECTOR_CAST_(const __m128i&, a), + SEQAN_VECTOR_CAST_(const __m128i&, b))); +} + +template +inline TSimdVector _cmpEq(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 8>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_cmpeq_epi16(SEQAN_VECTOR_CAST_(const __m128i&, a), + SEQAN_VECTOR_CAST_(const __m128i&, b))); +} + +template +inline TSimdVector _cmpEq(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 4>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_cmpeq_epi32(SEQAN_VECTOR_CAST_(const __m128i&, a), + SEQAN_VECTOR_CAST_(const __m128i&, b))); +} + +template +inline TSimdVector _cmpEq(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 2>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_cmpeq_epi64(SEQAN_VECTOR_CAST_(const __m128i&, a), + SEQAN_VECTOR_CAST_(const __m128i&, b))); +} + +// -------------------------------------------------------------------------- +// _cmpGt (128bit) +// -------------------------------------------------------------------------- + +template +inline TSimdVector _cmpGt(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 16, int8_t>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_cmpgt_epi8(SEQAN_VECTOR_CAST_(const __m128i&, a), + SEQAN_VECTOR_CAST_(const __m128i&, b))); +} + +template +inline TSimdVector _cmpGt(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 16, uint8_t>) +{ + // There is no unsigned cmpgt, we reduce it to the signed case. + // Note that 0x80 = ~0x7F (prevent overflow messages). + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_cmpgt_epi8( + _mm_xor_si128(SEQAN_VECTOR_CAST_(const __m128i&, a), _mm_set1_epi8(~0x7F)), + _mm_xor_si128(SEQAN_VECTOR_CAST_(const __m128i&, b), _mm_set1_epi8(~0x7F)))); +} + +template +inline TSimdVector _cmpGt(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 8, int16_t>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_cmpgt_epi16(SEQAN_VECTOR_CAST_(const __m128i&, a), + SEQAN_VECTOR_CAST_(const __m128i&, b))); +} + +template +inline TSimdVector _cmpGt(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 8, uint16_t>) +{ + // There is no unsigned cmpgt, we reduce it to the signed case. + // Note that 0x8000 = ~0x7FFF (prevent overflow messages). + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_cmpgt_epi16( + _mm_xor_si128(SEQAN_VECTOR_CAST_(const __m128i&, a), _mm_set1_epi16(~0x7FFF)), + _mm_xor_si128(SEQAN_VECTOR_CAST_(const __m128i&, b), _mm_set1_epi16(~0x7FFF)))); +} + +template +inline TSimdVector _cmpGt(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 4, int32_t>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_cmpgt_epi32(SEQAN_VECTOR_CAST_(const __m128i&, a), + SEQAN_VECTOR_CAST_(const __m128i&, b))); +} + +template +inline TSimdVector _cmpGt(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 4, uint32_t>) +{ + // There is no unsigned cmpgt, we reduce it to the signed case. + // Note that 0x80000000 = ~0x7FFFFFFF (prevent overflow messages). + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_cmpgt_epi32( + _mm_xor_si128(SEQAN_VECTOR_CAST_(const __m128i&, a), _mm_set1_epi32(~0x7FFFFFFF)), + _mm_xor_si128(SEQAN_VECTOR_CAST_(const __m128i&, b), _mm_set1_epi32(~0x7FFFFFFF)))); +} + +template +inline TSimdVector _cmpGt(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 2, int64_t>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_cmpgt_epi64(SEQAN_VECTOR_CAST_(const __m128i&, a), + SEQAN_VECTOR_CAST_(const __m128i&, b))); +} + +template +inline TSimdVector _cmpGt(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 2, uint64_t>) +{ + // There is no unsigned cmpgt, we reduce it to the signed case. + // Note that 0x8000000000000000ul = ~0x7FFFFFFFFFFFFFFFul (prevent overflow messages). + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_cmpgt_epi64( + _mm_xor_si128(SEQAN_VECTOR_CAST_(const __m128i&, a) ,_mm_set1_epi64x(~0x7FFFFFFFFFFFFFFFul)), + _mm_xor_si128(SEQAN_VECTOR_CAST_(const __m128i&, b), _mm_set1_epi64x(~0x7FFFFFFFFFFFFFFFul)))); +} + +// -------------------------------------------------------------------------- +// _bitwiseOr (128bit) +// -------------------------------------------------------------------------- + +template +inline TSimdVector _bitwiseOr(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, L>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_or_si128(SEQAN_VECTOR_CAST_(const __m128i&, a), + SEQAN_VECTOR_CAST_(const __m128i&, b))); +} + +// -------------------------------------------------------------------------- +// _bitwiseAnd (128bit) +// -------------------------------------------------------------------------- + +template +inline TSimdVector _bitwiseAnd(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, L>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_and_si128(SEQAN_VECTOR_CAST_(const __m128i&, a), + SEQAN_VECTOR_CAST_(const __m128i&, b))); +} + +// -------------------------------------------------------------------------- +// _bitwiseAndNot (128bit) +// -------------------------------------------------------------------------- + +template +inline TSimdVector _bitwiseAndNot(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, L>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_andnot_si128(SEQAN_VECTOR_CAST_(const __m128i&, a), + SEQAN_VECTOR_CAST_(const __m128i&, b))); +} + +// -------------------------------------------------------------------------- +// _bitwiseNot (128bit) +// -------------------------------------------------------------------------- + +template +inline TSimdVector _bitwiseNot(TSimdVector const & a, SimdParams_<16, 16>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_cmpeq_epi8(SEQAN_VECTOR_CAST_(const __m128i&, a), + _mm_setzero_si128())); +} + +template +inline TSimdVector _bitwiseNot(TSimdVector const & a, SimdParams_<16, 8>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_cmpeq_epi16(SEQAN_VECTOR_CAST_(const __m128i&, a), + _mm_setzero_si128())); +} + +template +inline TSimdVector _bitwiseNot(TSimdVector const & a, SimdParams_<16, 4>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_cmpeq_epi32(SEQAN_VECTOR_CAST_(const __m128i&, a), + _mm_setzero_si128())); +} + +template +inline TSimdVector _bitwiseNot(TSimdVector const & a, SimdParams_<16, 2>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_cmpeq_epi64(SEQAN_VECTOR_CAST_(const __m128i&, a), + _mm_setzero_si128())); +} + +// -------------------------------------------------------------------------- +// _divide (128bit) +// -------------------------------------------------------------------------- + +template +inline TSimdVector _divide(TSimdVector const & a, int b, SimdParams_<16, 16>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, _mm_div_epi8(a, _mm_set1_epi8(b))); +} + +template +inline TSimdVector _divide(TSimdVector const & a, int b, SimdParams_<16, 8>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, _mm_div_epi16(a, _mm_set1_epi16(b))); +} + +template +inline TSimdVector _divide(TSimdVector const & a, int b, SimdParams_<16, 4>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, _mm_div_epi32(a, _mm_set1_epi32(b))); +} + +template +inline TSimdVector _divide(TSimdVector const & a, int b, SimdParams_<16, 2>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, _mm_div_epi64(a, _mm_set1_epi64x(b))); +} + +// -------------------------------------------------------------------------- +// _add (128bit) +// -------------------------------------------------------------------------- + +template +inline TSimdVector _add(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 16>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_add_epi8(SEQAN_VECTOR_CAST_(const __m128i&, a), + SEQAN_VECTOR_CAST_(const __m128i&, b))); +} + +template +inline TSimdVector _add(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 8>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_add_epi16(SEQAN_VECTOR_CAST_(const __m128i&, a), + SEQAN_VECTOR_CAST_(const __m128i&, b))); +} + +template +inline TSimdVector _add(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 4>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_add_epi32(SEQAN_VECTOR_CAST_(const __m128i&, a), + SEQAN_VECTOR_CAST_(const __m128i&, b))); +} + +template +inline TSimdVector _add(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 2>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_add_epi64(SEQAN_VECTOR_CAST_(const __m128i&, a), + SEQAN_VECTOR_CAST_(const __m128i&, b))); +} + +// -------------------------------------------------------------------------- +// _sub (128bit) +// -------------------------------------------------------------------------- + +template +inline TSimdVector _sub(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 16>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_sub_epi8(SEQAN_VECTOR_CAST_(const __m128i&, a), + SEQAN_VECTOR_CAST_(const __m128i&, b))); +} + +template +inline TSimdVector _sub(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 8>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_sub_epi16(SEQAN_VECTOR_CAST_(const __m128i&, a), + SEQAN_VECTOR_CAST_(const __m128i&, b))); +} + +template +inline TSimdVector _sub(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 4>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_sub_epi32(SEQAN_VECTOR_CAST_(const __m128i&, a), + SEQAN_VECTOR_CAST_(const __m128i&, b))); +} + +template +inline TSimdVector _sub(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 2>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_sub_epi64(SEQAN_VECTOR_CAST_(const __m128i&, a), + SEQAN_VECTOR_CAST_(const __m128i&, b))); +} + +// -------------------------------------------------------------------------- +// _mult (128bit) +// -------------------------------------------------------------------------- + +template +inline TSimdVector _mult(TSimdVector const & a, TSimdVector const &/*b*/, SimdParams_<16, 16>) +{ + SEQAN_ASSERT_FAIL("SSE intrinsics for multiplying 8 bit values not implemented!"); + return a; +} + +template +inline TSimdVector _mult(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 8>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_mullo_epi16(SEQAN_VECTOR_CAST_(const __m128i&, a), + SEQAN_VECTOR_CAST_(const __m128i&, b))); +} + +template +inline TSimdVector _mult(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 4>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_mullo_epi32(SEQAN_VECTOR_CAST_(const __m128i&, a), + SEQAN_VECTOR_CAST_(const __m128i&, b))); +} + +template +inline TSimdVector _mult(TSimdVector const & a, TSimdVector const &/*b*/, SimdParams_<16, 2>) +{ + SEQAN_ASSERT_FAIL("SSE intrinsics for multiplying 64 bit values not implemented!"); + return a; +} + +// -------------------------------------------------------------------------- +// _max (128bit) +// -------------------------------------------------------------------------- + +template +inline TSimdVector _max(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 16, int8_t>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_max_epi8(SEQAN_VECTOR_CAST_(const __m128i&, a), + SEQAN_VECTOR_CAST_(const __m128i&, b))); +} + +template +inline TSimdVector _max(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 16, uint8_t>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_max_epu8(SEQAN_VECTOR_CAST_(const __m128i&, a), + SEQAN_VECTOR_CAST_(const __m128i&, b))); +} + +template +inline TSimdVector _max(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 8, int16_t>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_max_epi16(SEQAN_VECTOR_CAST_(const __m128i&, a), + SEQAN_VECTOR_CAST_(const __m128i&, b))); +} + +template +inline TSimdVector _max(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 8, uint16_t>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_max_epu16(SEQAN_VECTOR_CAST_(const __m128i&, a), + SEQAN_VECTOR_CAST_(const __m128i&, b))); +} + +template +inline TSimdVector _max(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 4, int32_t>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_max_epi32(SEQAN_VECTOR_CAST_(const __m128i&, a), + SEQAN_VECTOR_CAST_(const __m128i&, b))); +} + +template +inline TSimdVector _max(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 4, uint32_t>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_max_epu32(SEQAN_VECTOR_CAST_(const __m128i&, a), + SEQAN_VECTOR_CAST_(const __m128i&, b))); +} + +template +inline TSimdVector _max(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 2, int64_t>) +{ +#if defined(__AVX512VL__) + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_max_epi64(SEQAN_VECTOR_CAST_(const __m128i&, a), + SEQAN_VECTOR_CAST_(const __m128i&, b))); +#else // defined(__AVX512VL__) + return blend(b, a, cmpGt(a, b)); +#endif // defined(__AVX512VL__) +} + +template +inline TSimdVector _max(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 2, uint64_t>) +{ +#if defined(__AVX512VL__) + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_max_epu64(SEQAN_VECTOR_CAST_(const __m128i&, a), + SEQAN_VECTOR_CAST_(const __m128i&, b))); +#else // defined(__AVX512VL__) + return blend(b, a, cmpGt(a, b)); +#endif // defined(__AVX512VL__) +} + + +// -------------------------------------------------------------------------- +// _min (128bit) +// -------------------------------------------------------------------------- + +template +inline TSimdVector _min(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 16, int8_t>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_min_epi8(SEQAN_VECTOR_CAST_(const __m128i&, a), + SEQAN_VECTOR_CAST_(const __m128i&, b))); +} + +template +inline TSimdVector _min(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 16, uint8_t>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_min_epu8(SEQAN_VECTOR_CAST_(const __m128i&, a), + SEQAN_VECTOR_CAST_(const __m128i&, b))); +} + +template +inline TSimdVector _min(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 8, int16_t>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_min_epi16(SEQAN_VECTOR_CAST_(const __m128i&, a), + SEQAN_VECTOR_CAST_(const __m128i&, b))); +} + +template +inline TSimdVector _min(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 8, uint16_t>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_min_epu16(SEQAN_VECTOR_CAST_(const __m128i&, a), + SEQAN_VECTOR_CAST_(const __m128i&, b))); +} + +template +inline TSimdVector _min(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 4, int32_t>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_min_epi32(SEQAN_VECTOR_CAST_(const __m128i&, a), + SEQAN_VECTOR_CAST_(const __m128i&, b))); +} + +template +inline TSimdVector _min(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 4, uint32_t>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_min_epu32(SEQAN_VECTOR_CAST_(const __m128i&, a), + SEQAN_VECTOR_CAST_(const __m128i&, b))); +} + +template +inline TSimdVector _min(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 2, int64_t>) +{ +#if defined(__AVX512VL__) + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_min_epi64(SEQAN_VECTOR_CAST_(const __m128i&, a), + SEQAN_VECTOR_CAST_(const __m128i&, b))); +#else // defined(__AVX512VL__) + return blend(a, b, cmpGt(a, b)); +#endif // defined(__AVX512VL__) +} + +template +inline TSimdVector _min(TSimdVector const & a, TSimdVector const & b, SimdParams_<16, 2, uint64_t>) +{ +#if defined(__AVX512VL__) + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_min_epu64(SEQAN_VECTOR_CAST_(const __m128i&, a), + SEQAN_VECTOR_CAST_(const __m128i&, b))); +#else // defined(__AVX512VL__) + return blend(a, b, cmpGt(a, b)); +#endif // defined(__AVX512VL__) +} + +// -------------------------------------------------------------------------- +// _blend (128bit) +// -------------------------------------------------------------------------- + +template +inline TSimdVector _blend(TSimdVector const & a, TSimdVector const & b, TSimdVectorMask const & mask, SimdParams_<16, L>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, + _mm_blendv_epi8(SEQAN_VECTOR_CAST_(const __m128i&, a), + SEQAN_VECTOR_CAST_(const __m128i&, b), + SEQAN_VECTOR_CAST_(const __m128i&, mask))); +} + +// -------------------------------------------------------------------------- +// _storeu (128bit) +// -------------------------------------------------------------------------- + +template +inline void _storeu(T * memAddr, TSimdVector const & vec, SimdParams_<16, L>) +{ + _mm_storeu_si128((__m128i*)memAddr, reinterpret_cast(vec)); +} + +// ---------------------------------------------------------------------------- +// Function _load() 128bit +// ---------------------------------------------------------------------------- + +template +inline TSimdVector _load(T const * memAddr, SimdParams_<16, L>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, _mm_load_si128((__m128i const *) memAddr)); +} + +// -------------------------------------------------------------------------- +// _shiftRightLogical (128bit) +// -------------------------------------------------------------------------- + +template +inline TSimdVector _shiftRightLogical(TSimdVector const & vector, const int imm, SimdParams_<16, 16>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, _mm_srli_epi16(SEQAN_VECTOR_CAST_(const __m128i &, vector), imm) & _mm_set1_epi8(0xff >> imm)); +} +template +inline TSimdVector _shiftRightLogical(TSimdVector const & vector, const int imm, SimdParams_<16, 8>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, _mm_srli_epi16(SEQAN_VECTOR_CAST_(const __m128i &, vector), imm)); +} +template +inline TSimdVector _shiftRightLogical(TSimdVector const & vector, const int imm, SimdParams_<16, 4>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, _mm_srli_epi32(SEQAN_VECTOR_CAST_(const __m128i &, vector), imm)); +} +template +inline TSimdVector _shiftRightLogical(TSimdVector const & vector, const int imm, SimdParams_<16, 2>) +{ + return SEQAN_VECTOR_CAST_(TSimdVector, _mm_srli_epi64(SEQAN_VECTOR_CAST_(const __m128i &, vector), imm)); +} + +// -------------------------------------------------------------------------- +// _gather (128bit) +// -------------------------------------------------------------------------- + +template +inline TSimdVector +_gather(TValue const * memAddr, + TSimdVector const & idx, + std::integral_constant const & /*scale*/, + TSimdParams) +{ + TSimdVector ret; + for (auto i = 0u; i < LENGTH::VALUE; ++i) + { + ret[i] = memAddr[idx[i]]; + } + return ret; +} + +// -------------------------------------------------------------------------- +// _shuffleVector (128bit) +// -------------------------------------------------------------------------- + +inline __m128i +seqan_mm_shuffle_epi16(const __m128i a, const __m128i b) +{ + // multiply by 2 + __m128i idx = _mm_slli_epi16(b, 1); + return _mm_shuffle_epi8( + a, + // interleave idx[7:0] = 2*indices[7], ..., 2*indices[0] + // with idx[7:0]+1 = 2*indices[7]+1, ..., 2*indices[0]+1 + // => 2*indices[7]+1, 2*indices[7], ..., 2*indices[0]+1, 2*indices[0] + _mm_unpacklo_epi8( + idx, + _mm_add_epi8(idx, _mm_set1_epi8(1)) + ) + ); +} + +inline __m128i +seqan_mm_shuffle_epi32(const __m128i a, const __m128i b) +{ + // multiply by 4 + __m128i idx = _mm_slli_epi16(b, 2); + return _mm_shuffle_epi8( + a, + // interleave 4*indices[3]+1, 4*indices[3]+0; ..., 4*indices[0]+1, 4*indices[0]+0 + // with 4*indices[3]+3, 4*indices[3]+2; ..., 4*indices[0]+3, 4*indices[0]+2 + // => 4*indices[3]+3, 4*indices[3]+2; 4*indices[3]+1, 4*indices[3]+0; + // ... + // 4*indices[0]+3, 4*indices[0]+2; 4*indices[0]+1, 4*indices[0]+0 + _mm_unpacklo_epi16( + // interleave idx[3:0]+0 = 4*indices[3]+0; ...; 4*indices[0]+0 + // with idx[3:0]+1 = 4*indices[3]+1; ...; 4*indices[0]+1 + // => 4*indices[3]+1; 4*indices[3]+0; ...; 4*indices[0]+1; 4*indices[0]+0 + _mm_unpacklo_epi8( + idx, + _mm_add_epi8(idx, _mm_set1_epi8(1)) + ), + // interleave idx[3:0]+2 = 4*indices[3]+2; ...; 4*indices[0]+2 + // with idx[3:0]+3 = 4*indices[3]+3; ...; 4*indices[0]+3 + // => 4*indices[3]+3; 4*indices[3]+2; ...; 4*indices[0]+3; 4*indices[0]+2 + _mm_unpacklo_epi8( + _mm_add_epi8(idx, _mm_set1_epi8(2)), + _mm_add_epi8(idx, _mm_set1_epi8(3)) + ) + )); +} + +inline __m128i +seqan_mm_shuffle_epi64(const __m128i a, const __m128i b) +{ + // multiply by 8 + __m128i idx = _mm_slli_epi16(b, 3); + return _mm_shuffle_epi8( + a, + _mm_unpacklo_epi32( + // interleave 8*indices[1]+1, 8*indices[1]+0; ..., 8*indices[0]+1, 8*indices[0]+0 + // with 8*indices[1]+3, 8*indices[1]+2; ..., 8*indices[0]+3, 8*indices[0]+2 + // => 8*indices[1]+3, 8*indices[1]+2; 8*indices[1]+1, 8*indices[1]+0; + // ... + // 8*indices[0]+3, 8*indices[0]+2; 8*indices[0]+1, 8*indices[0]+0 + _mm_unpacklo_epi16( + // interleave idx[1:0]+0 = 8*indices[1]+0; ...; 8*indices[0]+0 + // with idx[1:0]+1 = 8*indices[1]+1; ...; 8*indices[0]+1 + // => 8*indices[1]+1; 8*indices[1]+0; ...; 8*indices[0]+1; 8*indices[0]+0 + _mm_unpacklo_epi8( + idx, + _mm_add_epi8(idx, _mm_set1_epi8(1)) + ), + // interleave idx[1:0]+2 = 8*indices[1]+2; ...; 8*indices[0]+2 + // with idx[1:0]+3 = 8*indices[1]+3; ...; 8*indices[0]+3 + // => 8*indices[1]+3; 8*indices[1]+2; ...; 8*indices[0]+3; 8*indices[0]+2 + _mm_unpacklo_epi8( + _mm_add_epi8(idx, _mm_set1_epi8(2)), + _mm_add_epi8(idx, _mm_set1_epi8(3)) + ) + ), + // interleave 8*indices[1]+5, 8*indices[1]+4; ..., 8*indices[0]+5, 8*indices[0]+4 + // with 8*indices[1]+7, 8*indices[1]+6; ..., 8*indices[0]+7, 8*indices[0]+6 + // => 8*indices[1]+7, 8*indices[1]+6; 8*indices[1]+5, 8*indices[1]+4; + // ... + // 8*indices[0]+7, 8*indices[0]+6; 8*indices[0]+5, 8*indices[0]+4 + _mm_unpacklo_epi16( + // interleave idx[1:0]+4 = 8*indices[1]+4; ...; 8*indices[0]+4 + // with idx[1:0]+5 = 8*indices[1]+5; ...; 8*indices[0]+5 + // => 8*indices[1]+5; 8*indices[1]+4; ...; 8*indices[0]+5; 8*indices[0]+4 + _mm_unpacklo_epi8( + _mm_add_epi8(idx, _mm_set1_epi8(4)), + _mm_add_epi8(idx, _mm_set1_epi8(5)) + ), + // interleave idx[1:0]+6 = 8*indices[1]+6; ...; 8*indices[0]+6 + // with idx[1:0]+7 = 8*indices[1]+7; ...; 8*indices[0]+7 + // => 8*indices[1]+7; 8*indices[1]+6; ...; 8*indices[0]+7; 8*indices[0]+6 + _mm_unpacklo_epi8( + _mm_add_epi8(idx, _mm_set1_epi8(6)), + _mm_add_epi8(idx, _mm_set1_epi8(7)) + ) + ) + ) + ); +} + +template +[[deprecated("Here be dragons")]] +inline TSimdVector1 +_shuffleVector(TSimdVector1 const & vector, TSimdVector2 const & indices, SimdParams_<16, 8>, SimdParams_<8, 8>) +{ +#if SEQAN_IS_32_BIT + __m128i idx = _mm_slli_epi16( + _mm_unpacklo_epi32( + _mm_cvtsi32_si128(reinterpret_cast(indices)), + _mm_cvtsi32_si128(reinterpret_cast(indices) >> 32) + ), + 1 + ); +#else + __m128i idx = _mm_slli_epi16(_mm_cvtsi64_si128(reinterpret_cast(indices)), 1); +#endif // SEQAN_IS_32_BIT + return SEQAN_VECTOR_CAST_(TSimdVector1, + _mm_shuffle_epi8( + SEQAN_VECTOR_CAST_(const __m128i &, vector), + _mm_unpacklo_epi8(idx, _mm_add_epi8(idx, _mm_set1_epi8(1))) + )); +} + +template +inline TSimdVector1 +_shuffleVector(TSimdVector1 const & vector, TSimdVector2 const & indices, SimdParams_<16, 16>, SimdParams_<16, 16>) +{ + return SEQAN_VECTOR_CAST_( + TSimdVector1, + _mm_shuffle_epi8( + SEQAN_VECTOR_CAST_(const __m128i &, vector), + SEQAN_VECTOR_CAST_(const __m128i &, indices) + )); +} + +template +inline TSimdVector1 +_shuffleVector(TSimdVector1 const & vector, TSimdVector2 const & indices, SimdParams_<16, 8>, SimdParams_<16, 16>) +{ + return SEQAN_VECTOR_CAST_( + TSimdVector1, + seqan_mm_shuffle_epi16( + SEQAN_VECTOR_CAST_(const __m128i &, vector), + SEQAN_VECTOR_CAST_(const __m128i &, indices) + )); +} + +template +inline TSimdVector1 +_shuffleVector(TSimdVector1 const & vector, TSimdVector2 const & indices, SimdParams_<16, 4>, SimdParams_<16, 16>) +{ + return SEQAN_VECTOR_CAST_( + TSimdVector1, + seqan_mm_shuffle_epi32( + SEQAN_VECTOR_CAST_(const __m128i &, vector), + SEQAN_VECTOR_CAST_(const __m128i &, indices) + )); +} + +template +inline TSimdVector1 +_shuffleVector(TSimdVector1 const & vector, TSimdVector2 const & indices, SimdParams_<16, 2>, SimdParams_<16, 16>) +{ + return SEQAN_VECTOR_CAST_( + TSimdVector1, + seqan_mm_shuffle_epi64( + SEQAN_VECTOR_CAST_(const __m128i &, vector), + SEQAN_VECTOR_CAST_(const __m128i &, indices) + )); +} + +// -------------------------------------------------------------------------- +// _transposeMatrix (128bit) +// -------------------------------------------------------------------------- + +template +inline void +_transposeMatrix(TSimdVector matrix[], SimdMatrixParams_<8, 8, 8>) +{ + // we need a look-up table to reverse the lowest 4 bits + // in order to place the permute the transposed rows + static const unsigned char bitRev[] = {0,4,2,6,1,5,3,7}; + + // transpose a 8x8 byte matrix + __m64 tmp1[8]; + for (int i = 0; i < 4; ++i) + { + tmp1[i] = _mm_unpacklo_pi8(SEQAN_VECTOR_CAST_(const __m64 &, matrix[2*i]), SEQAN_VECTOR_CAST_(const __m64 &, matrix[2*i+1])); + tmp1[i+4] = _mm_unpackhi_pi8(SEQAN_VECTOR_CAST_(const __m64 &, matrix[2*i]), SEQAN_VECTOR_CAST_(const __m64 &, matrix[2*i+1])); + } + __m64 tmp2[8]; + for (int i = 0; i < 4; ++i) + { + tmp2[i] = _mm_unpacklo_pi16(tmp1[2*i], tmp1[2*i+1]); + tmp2[i+4] = _mm_unpackhi_pi16(tmp1[2*i], tmp1[2*i+1]); + } + for (int i = 0; i < 4; ++i) + { + matrix[bitRev[i]] = SEQAN_VECTOR_CAST_(TSimdVector, _mm_unpacklo_pi32(tmp2[2*i], tmp2[2*i+1])); + matrix[bitRev[i+4]] = SEQAN_VECTOR_CAST_(TSimdVector, _mm_unpackhi_pi32(tmp2[2*i], tmp2[2*i+1])); + } +} + +template +inline void +_transposeMatrix(TSimdVector matrix[], SimdMatrixParams_<16, 16, 8>) +{ + // we need a look-up table to reverse the lowest 4 bits + // in order to place the permute the transposed rows + static const unsigned char bitRev[] = {0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15}; + + // transpose a 16x16 byte matrix + // + // matrix = + // A0 A1 A2 ... Ae Af + // B0 B1 B2 ... Be Bf + // ... + // P0 P1 P2 ... Pe Pf + __m128i tmp1[16]; + for (int i = 0; i < 8; ++i) + { + tmp1[i] = _mm_unpacklo_epi8(SEQAN_VECTOR_CAST_(const __m128i &, matrix[2*i]), SEQAN_VECTOR_CAST_(const __m128i &, matrix[2*i+1])); + tmp1[i+8] = _mm_unpackhi_epi8(SEQAN_VECTOR_CAST_(const __m128i &, matrix[2*i]), SEQAN_VECTOR_CAST_(const __m128i &, matrix[2*i+1])); + } + // tmp1[0] = A0 B0 A1 B1 ... A7 B7 + // tmp1[1] = C0 D0 C1 D1 ... C7 D7 + // ... + // tmp1[7] = O0 P0 O1 P1 ... O7 P7 + // tmp1[8] = A8 B8 A9 B9 ... Af Bf + // ... + // tmp1[15] = O8 P8 O9 P9 ... Of Pf + __m128i tmp2[16]; + for (int i = 0; i < 8; ++i) + { + tmp2[i] = _mm_unpacklo_epi16(tmp1[2*i], tmp1[2*i+1]); + tmp2[i+8] = _mm_unpackhi_epi16(tmp1[2*i], tmp1[2*i+1]); + } + // tmp2[0] = A0 B0 C0 D0 ... A3 B3 C3 D3 + // tmp2[1] = E0 F0 G0 H0 ... E3 F3 G3 H3 + // ... + // tmp2[3] = M0 N0 O0 P0 ... M3 N3 O3 P3 + // tmp2[4] = A8 B8 C8 D8 ... Ab Bb Cb Db + // ... + // tmp2[7] = M8 N8 O8 P8 ... Mb Nb Ob Pb + // tmp2[8] = A4 B4 C4 D4 ... A7 B7 C7 D7 + // .. + // tmp2[12] = Ac Bc Cc Dc ... Af Bf Cf Df + // ... + // tmp2[15] = Mc Nc Oc Pc ... Mf Nf Of Pf + for (int i = 0; i < 8; ++i) + { + tmp1[i] = _mm_unpacklo_epi32(tmp2[2*i], tmp2[2*i+1]); + tmp1[i+8] = _mm_unpackhi_epi32(tmp2[2*i], tmp2[2*i+1]); + } + // tmp1[0] = A0 B0 .... H0 A1 B1 .... H1 + // tmp1[1] = I0 J0 .... P0 I1 J1 .... P1 + // ... + // tmp1[4] = A0 B0 .... H0 A1 B1 .... H1 + // tmp1[1] = I0 J0 .... P0 I1 J1 .... P1 + for (int i = 0; i < 8; ++i) + { + matrix[bitRev[i]] = SEQAN_VECTOR_CAST_(TSimdVector, _mm_unpacklo_epi64(tmp1[2*i], tmp1[2*i+1])); + matrix[bitRev[i+8]] = SEQAN_VECTOR_CAST_(TSimdVector, _mm_unpackhi_epi64(tmp1[2*i], tmp1[2*i+1])); + } +} + +// -------------------------------------------------------------------------- +// Function _testAllZeros (128bit) +// -------------------------------------------------------------------------- + +template +SEQAN_FUNC_ENABLE_IF(Is >, int) +inline _testAllZeros(TSimdVector const & vector, TSimdVector const & mask, SimdParams_<16>) +{ + return _mm_testz_si128(SEQAN_VECTOR_CAST_(const __m128i &, vector), + SEQAN_VECTOR_CAST_(const __m128i &, mask)); +} + +// -------------------------------------------------------------------------- +// Function _testAllOnes (128bit) +// -------------------------------------------------------------------------- + +template +inline +SEQAN_FUNC_ENABLE_IF(Is >, int) +_testAllOnes(TSimdVector const & vector, SimdParams_<16>) +{ + return _mm_test_all_ones(SEQAN_VECTOR_CAST_(const __m128i &, vector)); +} + +} // namespace seqan + +#endif // SEQAN_INCLUDE_SEQAN_SIMD_SIMD_BASE_SEQAN_IMPL_SSE4_2_H_ diff --git a/porechop/include/seqan/simd/simd_base_seqan_interface.h b/porechop/include/seqan/simd/simd_base_seqan_interface.h new file mode 100644 index 0000000..82c4931 --- /dev/null +++ b/porechop/include/seqan/simd/simd_base_seqan_interface.h @@ -0,0 +1,392 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2018, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: David Weese +// René Rahn +// Stefan Budach +// ========================================================================== +// generic SIMD interface for SSE3 / AVX2 +// ========================================================================== + +#ifndef SEQAN_INCLUDE_SEQAN_SIMD_SIMD_BASE_SEQAN_INTERFACE_H_ +#define SEQAN_INCLUDE_SEQAN_SIMD_SIMD_BASE_SEQAN_INTERFACE_H_ + +namespace seqan { + +template +struct SimdMaskVectorImpl +{ + using Type = typename SimdVectorTraits::VALUE>>::MaskType; +}; + +template +struct SimdSwizzleVectorImpl +{ + typedef typename SimdVector::Type Type; +}; + +// ============================================================================ +// +// INTERFACE FUNCTIONS +// - these should be used in the actual code, they will call one of the wrapper +// functions defined above based on the vector type +// +// ============================================================================ + +// -------------------------------------------------------------------------- +// Function transpose() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, void) +transpose(TSimdVector matrix[ROWS]) +{ + typedef typename Value::Type TValue; + _transposeMatrix(matrix, SimdMatrixParams_::VALUE, BitsPerValue::VALUE>()); +} + +// -------------------------------------------------------------------------- +// Function clearVector() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, void) +clearVector(TSimdVector & vector) +{ + typedef typename Value::Type TValue; + _clearVector(vector, SimdParams_()); +} + +// -------------------------------------------------------------------------- +// Function createVector() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +createVector(TValue const x) +{ + typedef typename Value::Type TIVal; + return _createVector(x, SimdParams_()); +} + +// -------------------------------------------------------------------------- +// Function fillVector() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, void) +fillVector(TSimdVector & vector, TValue const... args) +{ + // On clang (<= 4.0) + // std::make_tuple(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17) reaches the + // template recursion limit of 256 (e.g. -ftemplate-depth=256 is default) + // + // See same issue asked on http://stackoverflow.com/q/23374953 + // See also discussion to increase -ftemplate-depth to 1024 by default in + // clang https://llvm.org/bugs/show_bug.cgi?id=18417 + typedef typename Value::Type TIVal; + _fillVector(vector, std::make_tuple(args...), + std::make_index_sequence{}, + SimdParams_()); +} + +// -------------------------------------------------------------------------- +// Function cmpEq() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, typename SimdMaskVector::Type) +cmpEq (TSimdVector const & a, TSimdVector const & b) +{ + typedef typename Value::Type TValue; + return _cmpEq(a, b, SimdParams_()); +} + +// -------------------------------------------------------------------------- +// Function operator==() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, typename SimdMaskVector::Type) +operator==(TSimdVector const & a, TSimdVector const & b) +{ + typedef typename Value::Type TValue; + return _cmpEq(a, b, SimdParams_()); +} + +// -------------------------------------------------------------------------- +// Function operatorGt() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, typename SimdMaskVector::Type) +cmpGt (TSimdVector const & a, TSimdVector const & b) +{ + typedef typename Value::Type TValue; + return _cmpGt(a, b, SimdParams_()); +} + +// -------------------------------------------------------------------------- +// Function operator>() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, typename SimdMaskVector::Type) +operator>(TSimdVector const & a, TSimdVector const & b) +{ + typedef typename Value::Type TValue; + return _cmpGt(a, b, SimdParams_()); +} + +// -------------------------------------------------------------------------- +// Function max() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +max(TSimdVector const & a, TSimdVector const & b) +{ + typedef typename Value::Type TValue; + return _max(a, b, SimdParams_()); +} + +// -------------------------------------------------------------------------- +// Function min() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +min(TSimdVector const & a, TSimdVector const & b) +{ + typedef typename Value::Type TValue; + return _min(a, b, SimdParams_()); +} + +// -------------------------------------------------------------------------- +// Function operator|() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +operator|(TSimdVector const & a, TSimdVector const & b) +{ + typedef typename Value::Type TValue; + return _bitwiseOr(a, b, SimdParams_()); +} + +// -------------------------------------------------------------------------- +// Function operator|=() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector &) +operator|=(TSimdVector & a, TSimdVector const & b) +{ + a = a | b; + return a; +} + +// -------------------------------------------------------------------------- +// Function operator&() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +operator&(TSimdVector const & a, TSimdVector const & b) +{ + typedef typename Value::Type TValue; + return _bitwiseAnd(a, b, SimdParams_()); +} + +// -------------------------------------------------------------------------- +// Function operator&=() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector &) +operator&=(TSimdVector & a, TSimdVector const & b) +{ + a = a & b; + return a; +} + +// -------------------------------------------------------------------------- +// Function operator~() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +operator~(TSimdVector const & a) +{ + typedef typename Value::Type TValue; + return _bitwiseNot(a, SimdParams_()); +} + +// -------------------------------------------------------------------------- +// Function operator+() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +operator+(TSimdVector const & a, TSimdVector const & b) +{ + typedef typename Value::Type TValue; + return _add(a, b, SimdParams_()); +} + +// -------------------------------------------------------------------------- +// Function operator-() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +operator-(TSimdVector const & a, TSimdVector const & b) +{ + typedef typename Value::Type TValue; + return _sub(a, b, SimdParams_()); +} + +// -------------------------------------------------------------------------- +// Function operator*() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +operator*(TSimdVector const & a, TSimdVector const & b) +{ + typedef typename Value::Type TValue; + return _mult(a, b, SimdParams_()); +} + +// -------------------------------------------------------------------------- +// Function operator/() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +operator/(TSimdVector const & a, TSimdVector const & b) +{ + typedef typename Value::Type TValue; + return _div(a, b, SimdParams_()); +} + +// -------------------------------------------------------------------------- +// Function andNot +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +andNot(TSimdVector const & a, TSimdVector const & b) +{ + typedef typename Value::Type TValue; + return _bitwiseAndNot(a, b, SimdParams_()); +} + +// -------------------------------------------------------------------------- +// Function shiftRightLogical() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +shiftRightLogical(TSimdVector const & vector, const int imm) +{ + typedef typename Value::Type TValue; + return _shiftRightLogical(vector, imm, SimdParams_()); +} + +// -------------------------------------------------------------------------- +// Function blend() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +blend(TSimdVector const & a, TSimdVector const & b, TSimdVectorMask const & mask) +{ + typedef typename Value::Type TValue; + return _blend(a, b, mask, SimdParams_()); +} + +// -------------------------------------------------------------------------- +// Function storeu() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, void) +storeu(T * memAddr, TSimdVector const & vec) +{ + typedef typename Value::Type TValue; + _storeu(memAddr, vec, SimdParams_()); +} + +// -------------------------------------------------------------------------- +// Function load() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +load(T const * memAddr) +{ + typedef typename Value::Type TValue; + return _load(memAddr, SimdParams_()); +} + +// -------------------------------------------------------------------------- +// Function gather() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +gather(TValue const * memAddr, TSimdVector const & idx) +{ + typedef typename Value::Type TInnerValue; + return _gather(memAddr, idx, std::integral_constant(), SimdParams_()); +} + +// -------------------------------------------------------------------------- +// Function shuffleVector() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector1) +shuffleVector(TSimdVector1 const & vector, TSimdVector2 const & indices) +{ + typedef typename Value::Type TValue1; + typedef typename Value::Type TValue2; + return _shuffleVector( + vector, + indices, + SimdParams_(), + SimdParams_()); +} + +} // namespace seqan + +#endif // SEQAN_INCLUDE_SEQAN_SIMD_SIMD_BASE_SEQAN_INTERFACE_H_ diff --git a/porechop/include/seqan/simd/simd_base_umesimd_impl.h b/porechop/include/seqan/simd/simd_base_umesimd_impl.h new file mode 100644 index 0000000..da2e20c --- /dev/null +++ b/porechop/include/seqan/simd/simd_base_umesimd_impl.h @@ -0,0 +1,655 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2018, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: Marcel Ehrhardt +// ========================================================================== +// SIMD implementation of umesimd +// ========================================================================== + +#ifndef SEQAN_INCLUDE_SEQAN_SIMD_SIMD_BASE_UMESIMD_IMPL_H_ +#define SEQAN_INCLUDE_SEQAN_SIMD_SIMD_BASE_UMESIMD_IMPL_H_ + +#include "umesimd/UMESimd.h" + +namespace seqan +{ + +template +struct SimdMaskVectorImpl +{ + using Type = typename UME::SIMD::SIMDTraits::MASK_T; +}; + +template +struct SimdSwizzleVectorImpl +{ + using Type = typename UME::SIMD::SIMDTraits::SWIZZLE_T; +}; + +template +struct SimdVector +{ + typedef UME::SIMD::SIMDVec Type; +}; + +// // 64 bit +// using SimdVector8Char = UME::SIMD::SIMDVec; +using SimdVector8SChar = UME::SIMD::SIMDVec; +using SimdVector8UChar = UME::SIMD::SIMDVec; +using SimdVector4Short = UME::SIMD::SIMDVec; +using SimdVector4UShort = UME::SIMD::SIMDVec; +using SimdVector2Int = UME::SIMD::SIMDVec; +using SimdVector2UInt = UME::SIMD::SIMDVec; + +// 128 bit +// using SimdVector16Char = UME::SIMD::SIMDVec; +using SimdVector16SChar = UME::SIMD::SIMDVec; +using SimdVector16UChar = UME::SIMD::SIMDVec; +using SimdVector8Short = UME::SIMD::SIMDVec; +using SimdVector8UShort = UME::SIMD::SIMDVec; +using SimdVector4Int = UME::SIMD::SIMDVec; +using SimdVector4UInt = UME::SIMD::SIMDVec; +using SimdVector2Int64 = UME::SIMD::SIMDVec; +using SimdVector2UInt64 = UME::SIMD::SIMDVec; + +// 256 bit +// using SimdVector32Char = UME::SIMD::SIMDVec; +using SimdVector32SChar = UME::SIMD::SIMDVec; +using SimdVector32UChar = UME::SIMD::SIMDVec; +using SimdVector16Short = UME::SIMD::SIMDVec; +using SimdVector16UShort = UME::SIMD::SIMDVec; +using SimdVector8Int = UME::SIMD::SIMDVec; +using SimdVector8UInt = UME::SIMD::SIMDVec; +using SimdVector4Int64 = UME::SIMD::SIMDVec; +using SimdVector4UInt64 = UME::SIMD::SIMDVec; + +// 512 bit +// using SimdVector64Char = UME::SIMD::SIMDVec; +using SimdVector64SChar = UME::SIMD::SIMDVec; +using SimdVector64UChar = UME::SIMD::SIMDVec; +using SimdVector32Short = UME::SIMD::SIMDVec; +using SimdVector32UShort = UME::SIMD::SIMDVec; +using SimdVector16Int = UME::SIMD::SIMDVec; +using SimdVector16UInt = UME::SIMD::SIMDVec; +using SimdVector8Int64 = UME::SIMD::SIMDVec; +using SimdVector8UInt64 = UME::SIMD::SIMDVec; + +// ============================================================================ +// SIMDMaskVector +// ============================================================================ + +template +SEQAN_CONCEPT_IMPL((typename UME::SIMD::SIMDVecMask), (SimdMaskVectorConcept)); + +template +SEQAN_CONCEPT_IMPL((typename UME::SIMD::SIMDVecMask const), (SimdMaskVectorConcept)); + +template +struct Value > +{ + typedef bool Type; +}; + +template +struct LENGTH > +{ + enum { VALUE = LENGTH_ }; +}; + +template +inline typename Value >::Type +getValue(UME::SIMD::SIMDVecMask const & vector, TPosition const pos) +{ + return vector[pos]; +} + +template +inline typename Value >::Type +value(UME::SIMD::SIMDVecMask const & vector, TPosition const pos) +{ + return vector[pos]; +} + +template +inline void +assignValue(UME::SIMD::SIMDVecMask &vector, TPosition const pos, TValue2 const value) +{ + vector.insert(pos, value); +} + +// ============================================================================ +// SIMDSwizzle +// ============================================================================ + +template +SEQAN_CONCEPT_IMPL((typename UME::SIMD::SIMDSwizzle), (SimdVectorConcept)); + +template +SEQAN_CONCEPT_IMPL((typename UME::SIMD::SIMDSwizzle const), (SimdVectorConcept)); + +template +struct Value > +{ + typedef uint32_t Type; +}; + +template +struct LENGTH > +{ + enum { VALUE = LENGTH_ }; +}; + +template +inline typename Value >::Type +getValue(UME::SIMD::SIMDSwizzle const & vector, TPosition const pos) +{ + return vector[pos]; +} + +template +inline typename Value >::Type +value(UME::SIMD::SIMDSwizzle const & vector, TPosition const pos) +{ + return vector[pos]; +} + +template +inline void +assignValue(UME::SIMD::SIMDSwizzle &vector, TPosition const pos, TValue2 const value) +{ + vector.insert(pos, value); +} + +// ============================================================================ +// SIMDVec_u +// ============================================================================ + +template +SEQAN_CONCEPT_IMPL((typename UME::SIMD::SIMDVec_u), (SimdVectorConcept)); + +template +SEQAN_CONCEPT_IMPL((typename UME::SIMD::SIMDVec_u const), (SimdVectorConcept)); + +template +struct Value > +{ + typedef TValue Type; +}; + +template +struct LENGTH > { + enum { VALUE = LENGTH_ }; +}; + +template +inline TValue +getValue(UME::SIMD::SIMDVec_u const & vector, TPosition const pos) +{ + return vector[pos]; +} + +template +inline TValue +value(UME::SIMD::SIMDVec_u const & vector, TPosition const pos) +{ + + return vector[pos]; +} + +template +inline void +assignValue(UME::SIMD::SIMDVec_u &vector, TPosition const pos, TValue2 const value) +{ + vector[pos] = value; +} + +// ============================================================================ +// SIMDVec_i +// ============================================================================ + +template +SEQAN_CONCEPT_IMPL((typename UME::SIMD::SIMDVec_i), (SimdVectorConcept)); + +template +SEQAN_CONCEPT_IMPL((typename UME::SIMD::SIMDVec_i const), (SimdVectorConcept)); + +template +struct Value > +{ + typedef TValue Type; +}; + +template +struct LENGTH > { + enum { VALUE = LENGTH_ }; +}; + +template +inline TValue +getValue(UME::SIMD::SIMDVec_i const & vector, TPosition const pos) +{ + return vector[pos]; +} + +template +inline TValue +value(UME::SIMD::SIMDVec_i const & vector, TPosition const pos) +{ + + return vector[pos]; +} + +template +inline void +assignValue(UME::SIMD::SIMDVec_i &vector, TPosition const pos, TValue2 const value) +{ + vector[pos] = value; +} + +// ============================================================================ +// SIMDVec_f +// ============================================================================ + +template +SEQAN_CONCEPT_IMPL((typename UME::SIMD::SIMDVec_f), (SimdVectorConcept)); + +template +SEQAN_CONCEPT_IMPL((typename UME::SIMD::SIMDVec_f const), (SimdVectorConcept)); + +template +struct Value > +{ + typedef TValue Type; +}; + +template +struct LENGTH > { + enum { VALUE = LENGTH_ }; +}; + +template +inline TValue +getValue(UME::SIMD::SIMDVec_f const & vector, TPosition const pos) +{ + return vector[pos]; +} + +template +inline TValue +value(UME::SIMD::SIMDVec_f const & vector, TPosition const pos) +{ + + return vector[pos]; +} + +template +inline void +assignValue(UME::SIMD::SIMDVec_f &vector, TPosition const pos, TValue2 const value) +{ + vector[pos] = value; +} + +} // namespace seqan + +namespace UME +{ +namespace SIMD +{ + template + inline TStream & operator<<(TStream & stream, + IntermediateIndex const & pInterIndex) + { + stream << static_cast(pInterIndex); + return stream; + } +} +} + +namespace seqan +{ + +// -------------------------------------------------------------------------- +// Function clearVector() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, void) +clearVector(TSimdVector & vector) +{ + vector = 0; +} + +// -------------------------------------------------------------------------- +// Function createVector() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(And>, + Not>>>, TSimdVector) +createVector(TValue const x) +{ + return TSimdVector(static_cast(x)); +} + +// -------------------------------------------------------------------------- +// Function createVector() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +createVector(TValue const x) +{ + return TSimdVector(x); +} + +// -------------------------------------------------------------------------- +// Function fillVector() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, void) +fillVector(TSimdVector & vector, TValue const... args) +{ + vector = TSimdVector(args...); +} + +// -------------------------------------------------------------------------- +// Function cmpEq() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, typename SimdMaskVector::Type) +cmpEq (TSimdVector const & a, TSimdVector const & b) +{ + return a.cmpeq(b); +} + +// -------------------------------------------------------------------------- +// Function operator==() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, typename SimdMaskVector::Type) +operator==(TSimdVector const & a, TSimdVector const & b) +{ + return a.cmpeq(b); +} + +// -------------------------------------------------------------------------- +// Function operatorGt() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, typename SimdMaskVector::Type) +cmpGt (TSimdVector const & a, TSimdVector const & b) +{ + return a.cmpgt(b); +} + +// -------------------------------------------------------------------------- +// Function operator>() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, typename SimdMaskVector::Type) +operator>(TSimdVector const & a, TSimdVector const & b) +{ + return a.cmpgt(b); +} + +// -------------------------------------------------------------------------- +// Function max() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +max(TSimdVector const & a, TSimdVector const & b) +{ + return a.max(b); +} + +// -------------------------------------------------------------------------- +// Function min() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +min(TSimdVector const & a, TSimdVector const & b) +{ + return a.min(b); +} + +// -------------------------------------------------------------------------- +// Function operator|() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +operator|(TSimdVector const & a, TSimdVector const & b) +{ + return a.bor(b); +} + +// -------------------------------------------------------------------------- +// Function operator|=() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector &) +operator|=(TSimdVector & a, TSimdVector const & b) +{ + return a.bora(b); +} + +// -------------------------------------------------------------------------- +// Function operator&() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +operator&(TSimdVector const & a, TSimdVector const & b) +{ + return a.band(b); +} + +// -------------------------------------------------------------------------- +// Function operator&=() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector &) +operator&=(TSimdVector & a, TSimdVector const & b) +{ + return a.banda(b); +} + +// -------------------------------------------------------------------------- +// Function operator~() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +operator~(TSimdVector const & a) +{ + return a.bnot(); +} + +// -------------------------------------------------------------------------- +// Function operator+() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +operator+(TSimdVector const & a, TSimdVector const & b) +{ + return a.add(b); +} + +// -------------------------------------------------------------------------- +// Function operator-() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +operator-(TSimdVector const & a, TSimdVector const & b) +{ + return a.sub(b); +} + +// -------------------------------------------------------------------------- +// Function operator*() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +operator*(TSimdVector const & a, TSimdVector const & b) +{ + return a.mul(b); +} + +// -------------------------------------------------------------------------- +// Function operator/() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +operator/(TSimdVector const & a, TSimdVector const & b) +{ + return a.div(b); +} + +// -------------------------------------------------------------------------- +// Function andNot +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +andNot(TSimdVector const & a, TSimdVector const & b) +{ + return a.bandnot(b); +} + + +// -------------------------------------------------------------------------- +// Function shiftRightLogical() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +shiftRightLogical(TSimdVector const & vector, const int imm) +{ + return vector.rsh(imm); +} + +// -------------------------------------------------------------------------- +// Function blend() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +blend(TSimdVector const & a, TSimdVector const & b, TSimdVectorMask const & mask) +{ + return a.blend(mask, b); +} + +// -------------------------------------------------------------------------- +// Function storeu() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, void) +storeu(T * memAddr, TSimdVector const & vec) +{ + vec.store(memAddr); +} + +// -------------------------------------------------------------------------- +// Function load() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +load(T const * memAddr) +{ + return TSimdVector(memAddr); +} + +// -------------------------------------------------------------------------- +// Function gather() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(IsSameType::Type>, TSimdVector) +_gather(TValue const * memAddr, TSimdVector const & idx) +{ + using TIndexVector = typename UME::SIMD::SIMDTraits::UINT_VEC_T; + + TSimdVector a; + a.gather(memAddr, static_cast(idx)); + return a; +} + +template +inline SEQAN_FUNC_ENABLE_IF(Not::Type> >, TSimdVector) +_gather(TValue const * memAddr, TSimdVector const & idx) +{ + using TIndexVector = typename UME::SIMD::SIMDTraits::UINT_VEC_T; + + TSimdVector a; + for (auto i = 0u; i < TIndexVector::length(); ++i) + { + a[i] = memAddr[idx[i]]; + } + return a; +} + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector) +gather(TValue const * memAddr, TSimdVector const & idx) +{ + return _gather(memAddr, idx); +} + +// -------------------------------------------------------------------------- +// Function shuffleVector() +// -------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSimdVector1) +shuffleVector(TSimdVector1 const & vector, TSimdVector2 const & indices) +{ + return vector.swizzle(indices); +} + +} + +#endif // SEQAN_INCLUDE_SEQAN_SIMD_SIMD_BASE_UMESIMD_IMPL_H_