Skip to content
2 changes: 1 addition & 1 deletion cpp/src/arrow/util/bit_run_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ BitRunReader::BitRunReader(const uint8_t* bitmap, int64_t start_offset, int64_t

// Prepare for inversion in NextRun.
// Clear out any preceding bits.
word_ = word_ & ~bit_util::LeastSignificantBitMask(position_);
word_ = word_ & ~bit_util::LeastSignificantBitMask<uint64_t>(position_);
}

#endif
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/arrow/util/bit_run_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ class ARROW_EXPORT BitRunReader {
int64_t start_bit_offset = start_position & 63;
// Invert the word for proper use of CountTrailingZeros and
// clear bits so CountTrailingZeros can do it magic.
word_ = ~word_ & ~bit_util::LeastSignificantBitMask(start_bit_offset);
word_ = ~word_ & ~bit_util::LeastSignificantBitMask<uint64_t>(start_bit_offset);

// Go forward until the next change from unset to set.
int64_t new_bits = bit_util::CountTrailingZeros(word_) - start_bit_offset;
Expand Down Expand Up @@ -311,12 +311,12 @@ class BaseSetBitRunReader {
memcpy(reinterpret_cast<char*>(&word) + 8 - num_bytes, bitmap_, num_bytes);
// XXX MostSignificantBitmask
return (bit_util::ToLittleEndian(word) << bit_offset) &
~bit_util::LeastSignificantBitMask(64 - num_bits);
~bit_util::LeastSignificantBitMask<uint64_t>(64 - num_bits);
} else {
memcpy(&word, bitmap_, num_bytes);
bitmap_ += num_bytes;
return (bit_util::ToLittleEndian(word) >> bit_offset) &
bit_util::LeastSignificantBitMask(num_bits);
bit_util::LeastSignificantBitMask<uint64_t>(num_bits);
}
}

Expand Down
99 changes: 12 additions & 87 deletions cpp/src/arrow/util/bit_stream_utils_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@

#pragma once

#include <algorithm>
#include <cstdint>
#include <cstring>
#include <type_traits>
Expand Down Expand Up @@ -249,110 +248,36 @@ inline bool BitWriter::PutAligned(T val, int num_bytes) {
return true;
}

namespace detail {

template <typename T>
inline void GetValue_(int num_bits, T* v, int max_bytes, const uint8_t* buffer,
int* bit_offset, int* byte_offset, uint64_t* buffered_values) {
#ifdef _MSC_VER
# pragma warning(push)
# pragma warning(disable : 4800)
#endif
*v = static_cast<T>(bit_util::TrailingBits(*buffered_values, *bit_offset + num_bits) >>
*bit_offset);
#ifdef _MSC_VER
# pragma warning(pop)
#endif
*bit_offset += num_bits;
if (*bit_offset >= 64) {
*byte_offset += 8;
*bit_offset -= 64;

*buffered_values =
detail::ReadLittleEndianWord(buffer + *byte_offset, max_bytes - *byte_offset);
#ifdef _MSC_VER
# pragma warning(push)
# pragma warning(disable : 4800 4805)
#endif
// Read bits of v that crossed into new buffered_values_
if (ARROW_PREDICT_TRUE(num_bits - *bit_offset < static_cast<int>(8 * sizeof(T)))) {
// if shift exponent(num_bits - *bit_offset) is not less than sizeof(T), *v will not
// change and the following code may cause a runtime error that the shift exponent
// is too large
*v = *v | static_cast<T>(bit_util::TrailingBits(*buffered_values, *bit_offset)
<< (num_bits - *bit_offset));
}
#ifdef _MSC_VER
# pragma warning(pop)
#endif
ARROW_DCHECK_LE(*bit_offset, 64);
}
}

} // namespace detail

template <typename T>
inline bool BitReader::GetValue(int num_bits, T* v) {
return GetBatch(num_bits, v, 1) == 1;
}

namespace internal_bit_reader {
template <typename T>
struct unpack_detect {
using type = std::make_unsigned_t<T>;
};

template <>
struct unpack_detect<bool> {
using type = bool;
};
} // namespace internal_bit_reader

template <typename T>
inline int BitReader::GetBatch(int num_bits, T* v, int batch_size) {
ARROW_DCHECK(buffer_ != NULL);
ARROW_DCHECK_LE(num_bits, static_cast<int>(sizeof(T) * 8)) << "num_bits: " << num_bits;
constexpr uint64_t kBitsPerByte = 8;

int bit_offset = bit_offset_;
int byte_offset = byte_offset_;
uint64_t buffered_values = buffered_values_;
int max_bytes = max_bytes_;
const uint8_t* buffer = buffer_;
ARROW_DCHECK(buffer_ != NULLPTR);
ARROW_DCHECK_LE(num_bits, static_cast<int>(sizeof(T) * 8)) << "num_bits: " << num_bits;

const int64_t needed_bits = num_bits * static_cast<int64_t>(batch_size);
constexpr uint64_t kBitsPerByte = 8;
const int64_t remaining_bits =
static_cast<int64_t>(max_bytes - byte_offset) * kBitsPerByte - bit_offset;
static_cast<int64_t>(max_bytes_ - byte_offset_) * kBitsPerByte - bit_offset_;
if (remaining_bits < needed_bits) {
batch_size = static_cast<int>(remaining_bits / num_bits);
}

int i = 0;
if (ARROW_PREDICT_FALSE(bit_offset != 0)) {
for (; i < batch_size && bit_offset != 0; ++i) {
detail::GetValue_(num_bits, &v[i], max_bytes, buffer, &bit_offset, &byte_offset,
&buffered_values);
}
}

using unpack_t = typename internal_bit_reader::unpack_detect<T>::type;

int num_unpacked = ::arrow::internal::unpack(
buffer + byte_offset, reinterpret_cast<unpack_t*>(v + i), batch_size - i, num_bits);
i += num_unpacked;
byte_offset += num_unpacked * num_bits / 8;

buffered_values =
detail::ReadLittleEndianWord(buffer + byte_offset, max_bytes - byte_offset);
if constexpr (std::is_same_v<T, bool>) {
::arrow::internal::unpack(buffer_ + byte_offset_, v, batch_size, num_bits,
bit_offset_);

for (; i < batch_size; ++i) {
detail::GetValue_(num_bits, &v[i], max_bytes, buffer, &bit_offset, &byte_offset,
&buffered_values);
} else {
::arrow::internal::unpack(buffer_ + byte_offset_,
reinterpret_cast<std::make_unsigned_t<T>*>(v), batch_size,
num_bits, bit_offset_);
}

bit_offset_ = bit_offset;
byte_offset_ = byte_offset;
buffered_values_ = buffered_values;
Advance(batch_size * num_bits);

return batch_size;
}
Expand Down
13 changes: 10 additions & 3 deletions cpp/src/arrow/util/bit_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,9 +113,16 @@ constexpr bool IsMultipleOf64(int64_t n) { return (n & 63) == 0; }
constexpr bool IsMultipleOf8(int64_t n) { return (n & 7) == 0; }

// Returns a mask for the bit_index lower order bits.
// Only valid for bit_index in the range [0, 64).
constexpr uint64_t LeastSignificantBitMask(int64_t bit_index) {
return (static_cast<uint64_t>(1) << bit_index) - 1;
// Valid in the range `[0, 8*sizof(Uint)]` if `kAllowUpperBound`
// otherwise `[0, 8*sizof(Uint)[`
template <typename Uint, bool kAllowUpperBound = false>
constexpr auto LeastSignificantBitMask(Uint bit_index) {
if constexpr (kAllowUpperBound) {
if (bit_index == 8 * sizeof(Uint)) {
return ~Uint{0};
}
}
return (Uint{1} << bit_index) - Uint{1};
}

// Returns 'value' rounded up to the nearest multiple of 'factor'
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/util/bitmap_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ class BitmapUInt64Reader {
memcpy(&word, bitmap_, num_bytes);
bitmap_ += num_bytes;
return (bit_util::ToLittleEndian(word) >> bit_offset) &
bit_util::LeastSignificantBitMask(num_bits);
bit_util::LeastSignificantBitMask<uint64_t>(num_bits);
}

const uint8_t* bitmap_;
Expand Down
16 changes: 8 additions & 8 deletions cpp/src/arrow/util/bpacking.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,19 +50,19 @@ struct UnpackDynamicFunction {
} // namespace

template <typename Uint>
int unpack(const uint8_t* in, Uint* out, int batch_size, int num_bits) {
void unpack(const uint8_t* in, Uint* out, int batch_size, int num_bits, int bit_offset) {
#if defined(ARROW_HAVE_NEON)
return unpack_neon(in, out, batch_size, num_bits);
return unpack_neon(in, out, batch_size, num_bits, bit_offset);
#else
static DynamicDispatch<UnpackDynamicFunction<Uint> > dispatch;
return dispatch.func(in, out, batch_size, num_bits);
return dispatch.func(in, out, batch_size, num_bits, bit_offset);
#endif
}

template int unpack<bool>(const uint8_t*, bool*, int, int);
template int unpack<uint8_t>(const uint8_t*, uint8_t*, int, int);
template int unpack<uint16_t>(const uint8_t*, uint16_t*, int, int);
template int unpack<uint32_t>(const uint8_t*, uint32_t*, int, int);
template int unpack<uint64_t>(const uint8_t*, uint64_t*, int, int);
template void unpack<bool>(const uint8_t*, bool*, int, int, int);
template void unpack<uint8_t>(const uint8_t*, uint8_t*, int, int, int);
template void unpack<uint16_t>(const uint8_t*, uint16_t*, int, int, int);
template void unpack<uint32_t>(const uint8_t*, uint32_t*, int, int, int);
template void unpack<uint64_t>(const uint8_t*, uint64_t*, int, int, int);

} // namespace arrow::internal
4 changes: 2 additions & 2 deletions cpp/src/arrow/util/bpacking_benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ namespace arrow::internal {
namespace {

template <typename Int>
using UnpackFunc = int (*)(const uint8_t*, Int*, int, int);
using UnpackFunc = void (*)(const uint8_t*, Int*, int, int, int);

/// Get the number of bytes associate with a packing.
constexpr int32_t GetNumBytes(int32_t num_values, int32_t bit_width) {
Expand Down Expand Up @@ -89,7 +89,7 @@ void BM_Unpack(benchmark::State& state, bool aligned, UnpackFunc<Int> unpack, bo
std::vector<Int> unpacked(num_values, 0);

for (auto _ : state) {
unpack(packed_ptr, unpacked.data(), num_values, bit_width);
unpack(packed_ptr, unpacked.data(), num_values, bit_width, /* bit_offset = */ 0);
benchmark::ClobberMemory();
}
state.SetItemsProcessed(num_values * state.iterations());
Expand Down
Loading
Loading