-
Notifications
You must be signed in to change notification settings - Fork 298
Add shift by multiple constant #1220
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
85fcf7b
991b087
116829c
923059e
8cc985d
cde846f
7119e9a
92afec4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,83 @@ | ||
| /*************************************************************************** | ||
| * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * | ||
| * Martin Renou * | ||
| * Copyright (c) QuantStack * | ||
| * Copyright (c) Serge Guelton * | ||
| * Copyright (c) Marco Barbone * | ||
| * * | ||
| * Distributed under the terms of the BSD 3-Clause License. * | ||
| * * | ||
| * The full license is in the file LICENSE, distributed with this software. * | ||
| ****************************************************************************/ | ||
|
|
||
| #ifndef XSIMD_UTILS_SHIFTS_HPP | ||
| #define XSIMD_UTILS_SHIFTS_HPP | ||
|
|
||
| #include "../../config/xsimd_inline.hpp" | ||
| #include "../../types/xsimd_batch.hpp" | ||
| #include "../../types/xsimd_batch_constant.hpp" | ||
| #include "../../types/xsimd_traits.hpp" | ||
|
|
||
| namespace xsimd | ||
| { | ||
| namespace kernel | ||
| { | ||
| namespace utils | ||
| { | ||
| template <typename I, I offset, I length, I... Vs> | ||
| struct select_stride | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps a more hardcore C++ developer wouldn't mind, but I would add comments to these helpers so that people don't have to decipher the code. |
||
| { | ||
| static constexpr I values_array[] = { Vs... }; | ||
|
|
||
| template <typename K> | ||
| static constexpr K get(K i, K) | ||
| { | ||
| return static_cast<K>(values_array[length * i + offset]); | ||
| } | ||
| }; | ||
|
|
||
| template <typename I> | ||
| constexpr I lsb_mask(I bit_index) | ||
| { | ||
| if (bit_index == 8 * sizeof(I)) | ||
| { | ||
| return ~I { 0 }; | ||
| } | ||
| return static_cast<I>((I { 1 } << bit_index) - I { 1 }); | ||
| } | ||
|
|
||
| template <class T, class A, T V0, T... Vs> | ||
| constexpr bool all_equals(batch_constant<T, A, V0, Vs...> c) | ||
| { | ||
AntoinePrv marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| return (c == std::integral_constant<T, V0> {}).all(); | ||
| } | ||
|
|
||
| template <class T, class A, T... Vs> | ||
| XSIMD_INLINE batch<T, A> bitwise_lshift_as_twice_larger( | ||
| batch<T, A> const& self, batch_constant<T, A, Vs...>) noexcept | ||
| { | ||
| using T2 = widen_t<T>; | ||
|
|
||
| const auto self2 = bitwise_cast<T2>(self); | ||
|
|
||
| // Lower byte: shift as twice the size and mask bits flowing to higher byte. | ||
| constexpr auto shifts_lo = make_batch_constant<T2, select_stride<T, 0, 2, Vs...>, A>(); | ||
| constexpr auto mask_lo = lsb_mask<T2>(8 * sizeof(T)); | ||
| const auto shifted_lo = bitwise_lshift(self2, shifts_lo); | ||
| constexpr auto batch_mask_lo = make_batch_constant<T2, mask_lo, A>(); | ||
| const auto masked_lo = bitwise_and(shifted_lo, batch_mask_lo.as_batch()); | ||
|
|
||
| // Higher byte: mask bits that would flow from lower byte and shift as twice the size. | ||
| constexpr auto shifts_hi = make_batch_constant<T2, select_stride<T, 1, 2, Vs...>, A>(); | ||
| constexpr auto mask_hi = mask_lo << (8 * sizeof(T)); | ||
| constexpr auto batch_mask_hi = make_batch_constant<T2, mask_hi, A>(); | ||
| const auto masked_hi = bitwise_and(self2, batch_mask_hi.as_batch()); | ||
| const auto shifted_hi = bitwise_lshift(masked_hi, shifts_hi); | ||
|
|
||
| return bitwise_cast<T>(bitwise_or(masked_lo, shifted_hi)); | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| #endif | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -353,6 +353,43 @@ namespace xsimd | |
| return kernel::bitwise_cast<A>(x, batch<T_out, A> {}, A {}); | ||
| } | ||
|
|
||
| namespace detail | ||
| { | ||
| // Detection for kernel overloads accepting ``batch_constant`` in ``bitwise_lshift`` | ||
| // directly (or in a parent register function). | ||
| // The ``batch_constant`` overload is a rare but useful optimization. | ||
| // Running the detection here is less error prone than to add a fallback to all | ||
| // architectures. | ||
|
|
||
| template <class Arch, class Batch, class BatchConstant, class = void> | ||
| struct has_bitwise_lshift_batch_const : std::false_type | ||
| { | ||
| }; | ||
|
|
||
| template <class Arch, class Batch, class BatchConstant> | ||
| struct has_bitwise_lshift_batch_const< | ||
| Arch, Batch, BatchConstant, | ||
| void_t<decltype(kernel::bitwise_lshift<Arch>( | ||
| std::declval<Batch>(), std::declval<BatchConstant>(), Arch {}))>> | ||
| : std::true_type | ||
| { | ||
| }; | ||
|
Comment on lines
+356
to
+376
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We run the detection of an optimization in the
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I know we've been having issues with error-prone overloads in the past, but I'm not a big fan of that approach. I'll let it pass this time, but I think we should settle on something better.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree, batch_constant versions are tedious because they break full ordering of which dispatch is the best. In this case, I first tried implementing all functions. Coming back to it much later, it became a big effort to track what dispatches to what. This solution was much simpler because it changes the default dispatch from lower architecture to runtime alternative. However I am now realizing this has the same pitfalls. I agree we should ideally stick to the default behavior. If C++17 is not too far away, this could more easily be done with a simplification of overloads.
Note that I plan to add a symetric API for In fact we should probably add a placeholder API for batch_constant for all functions, even the ones with no constant implementation. That way if something gets added, users would get the benefit without code changes. Otherwise they need to track their |
||
|
|
||
| template <class Arch, class T, T... Values> | ||
| XSIMD_INLINE batch<T, Arch> bitwise_lshift_batch_const(batch<T, Arch> const& x, batch_constant<T, Arch, Values...> shift, std::true_type) noexcept | ||
| { | ||
| // Optimized ``batch_constant`` implementation | ||
| return kernel::bitwise_lshift<Arch>(x, shift, Arch {}); | ||
| } | ||
|
|
||
| template <class Arch, class T, T... Values> | ||
| XSIMD_INLINE batch<T, Arch> bitwise_lshift_batch_const(batch<T, Arch> const& x, batch_constant<T, Arch, Values...> shift, std::false_type) noexcept | ||
| { | ||
| // Fallback to regular run-time implementation | ||
| return kernel::bitwise_lshift<Arch>(x, shift.as_batch(), Arch {}); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * @ingroup batch_bitwise | ||
| * | ||
|
|
@@ -367,17 +404,24 @@ namespace xsimd | |
| detail::static_check_supported_config<T, A>(); | ||
| return kernel::bitwise_lshift<A>(x, shift, A {}); | ||
| } | ||
| template <size_t shift, class T, class A> | ||
| XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& x) noexcept | ||
| { | ||
| detail::static_check_supported_config<T, A>(); | ||
| return kernel::bitwise_lshift<shift, A>(x, A {}); | ||
| } | ||
| template <class T, class A> | ||
| XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& x, batch<T, A> const& shift) noexcept | ||
| { | ||
| detail::static_check_supported_config<T, A>(); | ||
| return kernel::bitwise_lshift<A>(x, shift, A {}); | ||
| } | ||
| template <size_t shift, class T, class A> | ||
| XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& x) noexcept | ||
| template <class T, class A, T... Values> | ||
| XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& x, batch_constant<T, A, Values...> shift) noexcept | ||
| { | ||
| detail::static_check_supported_config<T, A>(); | ||
| return kernel::bitwise_lshift<shift, A>(x, A {}); | ||
| using has_batch_const_impl = detail::has_bitwise_lshift_batch_const<A, decltype(x), decltype(shift)>; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it seems to me that the dispatch here should be 'if all
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That is a nice improvement to add as well, but this is not what I added this pattern because in most cases, the extra overload would forward to the dynamic version.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ack |
||
| return detail::bitwise_lshift_batch_const<A>(x, shift, has_batch_const_impl {}); | ||
| } | ||
|
|
||
| /** | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I named this
utilascommonseems to be more used for implementing thecommonarchitecture.