SERVER-100659 Generalize BSONColumnBuilder binary reopen (#44341)
Co-authored-by: Daniel Moody <dmoody256@gmail.com> GitOrigin-RevId: 37753ad80748c2015a59c2099a3b6ffb8d14291f
This commit is contained in:
parent
c6587d9cb9
commit
848dace7fc
@ -18,6 +18,7 @@ mongo_cc_library(
|
||||
"simple8b_type_util.cpp",
|
||||
],
|
||||
hdrs = [
|
||||
"binary_reopen.h",
|
||||
"bsoncolumn.h",
|
||||
"bsoncolumn.inl",
|
||||
"bsoncolumn_helpers.h",
|
||||
@ -117,6 +118,20 @@ mongo_cc_unit_test(
|
||||
],
|
||||
)
|
||||
|
||||
mongo_cc_unit_test(
|
||||
name = "binary_reopen_test",
|
||||
srcs = [
|
||||
"binary_reopen_test.cpp",
|
||||
],
|
||||
tags = [
|
||||
"mongo_unittest_seventh_group",
|
||||
"server-bsoncolumn",
|
||||
],
|
||||
deps = [
|
||||
":column",
|
||||
],
|
||||
)
|
||||
|
||||
mongo_cc_benchmark(
|
||||
name = "simple8b_bm",
|
||||
srcs = [
|
||||
|
||||
763
src/mongo/bson/column/binary_reopen.h
Normal file
763
src/mongo/bson/column/binary_reopen.h
Normal file
@ -0,0 +1,763 @@
|
||||
/**
|
||||
* Copyright (C) 2025-present MongoDB, Inc.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the Server Side Public License, version 1,
|
||||
* as published by MongoDB, Inc.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* Server Side Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the Server Side Public License
|
||||
* along with this program. If not, see
|
||||
* <http://www.mongodb.com/licensing/server-side-public-license>.
|
||||
*
|
||||
* As a special exception, the copyright holders give permission to link the
|
||||
* code of portions of this program with the OpenSSL library under certain
|
||||
* conditions as described in each individual source file and distribute
|
||||
* linked combinations including the program with the OpenSSL library. You
|
||||
* must comply with the Server Side Public License in all respects for
|
||||
* all of the code used other than as permitted herein. If you modify file(s)
|
||||
* with this exception, you may extend this exception to your version of the
|
||||
* file(s), but you are not obligated to do so. If you do not wish to do so,
|
||||
* delete this exception statement from your version. If you delete this
|
||||
* exception statement from all source files in the program, then also delete
|
||||
* it in the license file.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "mongo/bson/column/bsoncolumn_util.h"
|
||||
#include "mongo/bson/column/simple8b.h"
|
||||
#include "mongo/bson/column/simple8b_builder.h"
|
||||
|
||||
#include <boost/optional/optional.hpp>
|
||||
|
||||
namespace mongo::bsoncolumn::internal {
|
||||
|
||||
/**
|
||||
* Constant to indicate invalid index for overflow or pending RLE.
|
||||
*/
|
||||
static constexpr int kInvalidIndex = -1;
|
||||
|
||||
/**
|
||||
* Helper struct for a scanned control block.
|
||||
*
|
||||
* lastAtEndOfBlock and scaleIndex are only set for control blocks containing double data.
|
||||
*/
|
||||
struct ControlBlock {
|
||||
const char* control = nullptr;
|
||||
double lastAtEndOfBlock = 0.0;
|
||||
uint8_t scaleIndex = Simple8bTypeUtil::kMemoryAsInteger;
|
||||
};
|
||||
|
||||
using ControlBlockContainer = std::vector<ControlBlock>;
|
||||
|
||||
/**
|
||||
* Helper range of control blocks to allow for range based for loops.
|
||||
*/
|
||||
class ControlBlockRange {
|
||||
public:
|
||||
ControlBlockRange() = default;
|
||||
ControlBlockRange(ControlBlockContainer::const_iterator b,
|
||||
ControlBlockContainer::const_iterator e)
|
||||
: _begin(b), _end(e) {}
|
||||
|
||||
ControlBlockContainer::const_iterator begin() const {
|
||||
return _begin;
|
||||
}
|
||||
|
||||
ControlBlockContainer::const_iterator end() const {
|
||||
return _end;
|
||||
}
|
||||
|
||||
private:
|
||||
ControlBlockContainer::const_iterator _begin;
|
||||
ControlBlockContainer::const_iterator _end;
|
||||
};
|
||||
|
||||
/**
|
||||
* Calculated overflow point used to initialize the BSONColumnBuilder in the binary reopen
|
||||
* operation.
|
||||
*/
|
||||
template <typename T>
|
||||
struct OverflowPoint {
|
||||
explicit OverflowPoint(boost::optional<T> val) : _last(val) {}
|
||||
|
||||
/**
|
||||
* Calculated control byte for overflow/no-overflow.
|
||||
*
|
||||
* If overflow occurred, overflow() will return true and index() the position in this
|
||||
* control byte where we overflowd. Data before and including the overflow point needs to be
|
||||
* written to the buffer and data after this overflow point needs to be appended to the
|
||||
* pending state.
|
||||
*
|
||||
* If no overflow, overflow() will return false and index() return -1. No data should be
|
||||
* written to the buffer and all data in this control should be appended to the pending
|
||||
* state.
|
||||
*/
|
||||
const char* control() const {
|
||||
return _control;
|
||||
}
|
||||
|
||||
/**
|
||||
* Scale index for the control byte returned by control().
|
||||
*/
|
||||
uint8_t scaleIndex() const {
|
||||
return _scaleIndex;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if overflow occurred, false otherwise.
|
||||
*/
|
||||
bool overflow() const {
|
||||
return overflowIndex != bsoncolumn::internal::kInvalidIndex;
|
||||
}
|
||||
/**
|
||||
* Returns index of the simple8b block where the overflow occurred, -1 if no overflow.
|
||||
*/
|
||||
int index() const {
|
||||
return overflowIndex;
|
||||
}
|
||||
/**
|
||||
* Returns true if all values after the overflow point are identical to the value returned
|
||||
* by last().
|
||||
*/
|
||||
bool allValuesIdentical() const {
|
||||
return _allIdentical;
|
||||
}
|
||||
|
||||
/**
|
||||
* Last control byte of the binary prior to reopen.
|
||||
*/
|
||||
uint8_t lastControl() const {
|
||||
return _lastControl;
|
||||
}
|
||||
/**
|
||||
* Offset to the last control byte of the binary prior to reopen.
|
||||
*/
|
||||
uint16_t lastControlOffset() const {
|
||||
return _lastControlOffset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Range of control blocks after the overflow point that needs to be appended to the pending
|
||||
* state.
|
||||
*/
|
||||
const ControlBlockRange& remaining() const {
|
||||
return _remaining;
|
||||
}
|
||||
|
||||
/**
|
||||
* Last value at the overflow point. Used to decode values after overflow when they start
|
||||
* with RLE and to setup the pending state with RLE if necessary.
|
||||
*/
|
||||
const boost::optional<T>& last() const {
|
||||
return _last;
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal helper used by OverflowState to set final OverflowPoint result.
|
||||
*/
|
||||
void setControl(const char* ctrl,
|
||||
uint8_t scale,
|
||||
ControlBlockRange remain,
|
||||
uint8_t lastControl,
|
||||
uint16_t lastControlOffset) {
|
||||
_control = ctrl;
|
||||
_scaleIndex = scale;
|
||||
_remaining = remain;
|
||||
_lastControl = lastControl;
|
||||
_lastControlOffset = lastControlOffset;
|
||||
}
|
||||
/**
|
||||
* Internal helper to set last.
|
||||
*/
|
||||
void setLast(boost::optional<T> value) {
|
||||
_last = value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal helper to mark that overflow occurred, at which position and if all values after
|
||||
* the overflow are identical or not.
|
||||
*/
|
||||
void markOverflow(int index, bool allIdentical) {
|
||||
invariant(index != bsoncolumn::internal::kInvalidIndex);
|
||||
overflowIndex = index;
|
||||
_allIdentical = allIdentical;
|
||||
}
|
||||
/**
|
||||
* Internal helper to override overflow to set it back to no overflow.
|
||||
*/
|
||||
void markNoOverflow() {
|
||||
overflowIndex = bsoncolumn::internal::kInvalidIndex;
|
||||
}
|
||||
/**
|
||||
* Internal helper to explicitly set that all values are identical.
|
||||
*/
|
||||
void setAllIdentical() {
|
||||
_allIdentical = true;
|
||||
}
|
||||
|
||||
private:
|
||||
boost::optional<T> _last;
|
||||
const char* _control = nullptr;
|
||||
int overflowIndex = bsoncolumn::internal::kInvalidIndex;
|
||||
uint8_t _scaleIndex = kInvalidScaleIndex;
|
||||
uint8_t _lastControl = 0;
|
||||
uint16_t _lastControlOffset = 0;
|
||||
ControlBlockRange _remaining;
|
||||
bool _allIdentical = false;
|
||||
};
|
||||
|
||||
/**
|
||||
* Helper to calculate how to re-initialize the compressor from a compressed binary.
|
||||
*
|
||||
* The main difficulty with re-initializing the compressor from a compressed binary is how to
|
||||
* undo the 'finalize()/intermediate()' call where pending values are flushed out to simple8b
|
||||
* blocks in the binary. We need to undo this operation by putting back these values back into
|
||||
* the pending state. The point in the binary where we need to do this undo is called the
|
||||
* overflow point.
|
||||
*
|
||||
* For this to be efficient we need to calculate this from the end of the binary rather than the
|
||||
* beginning. In the typical case we will use a dummy Simple8bBuilder where values are added in
|
||||
* the reverse order to observe when we can no longer add values without needing to write full
|
||||
* simple8b blocks.
|
||||
*
|
||||
* The two main sources of complexity in this algorithm is how to deal with RLE and double
|
||||
* values that have been rescaled. RLE in BSONColumn is defined as a repeat of the prior value
|
||||
* so we must be able to travserse past RLE to determine if the overflow point happens before or
|
||||
* after. For rescale we have a similar problem, either the rescale can be undone and put back
|
||||
* into pending or it is required due to an incompatible value.
|
||||
*/
|
||||
template <typename T>
|
||||
class OverflowState {
|
||||
public:
|
||||
// Initialize the overflow state from the last control block.
|
||||
OverflowState(ControlBlock cb);
|
||||
|
||||
// Perform the overflow detection.
|
||||
const OverflowPoint<T>& detect(const ControlBlockContainer& controls);
|
||||
|
||||
private:
|
||||
// Helper function to handle the special case where the binary ends with RLE
|
||||
void _detectEndsRLE(ControlBlock cb);
|
||||
// Helper function to perform the regular detection logic. When overflow is detected it
|
||||
// returns a number of control blocks already processed that the overflow point refers to.
|
||||
// This can happen when we are processing RLE but discover that the RLE data belong prior to
|
||||
// the overflow point.
|
||||
int _detectRegular(ControlBlock cb);
|
||||
// Helper function when a rescale is detected. This automatically ends the overflow
|
||||
// detection but we need to calculate if the rescaled data should be included in the
|
||||
// overflow or not. Returns a binary offset to the last control byte in the case where the
|
||||
// rescaled data was only output because of the finalize/intermediate call.
|
||||
uint16_t _detectRescale(ControlBlockRange before, ControlBlockRange after);
|
||||
|
||||
OverflowPoint<T> _op;
|
||||
Simple8bBuilder<T> _overflowDetector;
|
||||
int _pendingRle = bsoncolumn::internal::kInvalidIndex;
|
||||
int _pendingRleBlocks = 0;
|
||||
};
|
||||
|
||||
/**
|
||||
* Result from the 'findOverflow' call.
|
||||
*
|
||||
* lastValue is the last value in the simple8b causing overflow. If no overflow was detected it is
|
||||
* set to the previously known last value.
|
||||
*
|
||||
* overflowIndex is the index position of the simple8b block in the control that caused overflow.
|
||||
* Invalid if no overflow was detected.
|
||||
*
|
||||
* pendingRLEindex is the index position of the first non-RLE simple8b block when the control begins
|
||||
* with RLE and no overflow was detected.
|
||||
*/
|
||||
template <typename T>
|
||||
struct OverflowResult {
|
||||
boost::optional<T> lastValue;
|
||||
int overflowIndex;
|
||||
int pendingRLEindex;
|
||||
};
|
||||
|
||||
/**
|
||||
* Result from the 'findLastNonRLE' call.
|
||||
*
|
||||
* lastValue is the last value in the last non-RLE simple8b. Only applicable when
|
||||
* 'index' is set to a non-invalid index.
|
||||
*
|
||||
* index is the index position of the last non-RLE simple8b in this control
|
||||
*/
|
||||
template <typename T>
|
||||
struct LastNonRLEResult {
|
||||
boost::optional<T> lastValue;
|
||||
int index;
|
||||
};
|
||||
|
||||
/**
|
||||
* Helper to get a simple8b block at index from a control block
|
||||
*/
|
||||
const char* s8b(const char* control, int index);
|
||||
|
||||
/**
|
||||
* Helper to determine if the provided simple8b block is an RLE block.
|
||||
*/
|
||||
bool isRLE(const char* s8b);
|
||||
|
||||
/**
|
||||
* Estimates the last non-skip value in a control block.
|
||||
*
|
||||
* If the last block is RLE, 0 is returned.
|
||||
* If no non-skip value can be found within what could fit in a non-RLE block, 'none' is returned.
|
||||
*/
|
||||
template <typename T>
|
||||
boost::optional<T> estimateLastValue(const char* control);
|
||||
|
||||
/**
|
||||
* Finds the last non-skip value in a control block. The last block must NOT be RLE.
|
||||
*
|
||||
* If no non-skip value can be found within what could fit in a non-RLE block, 'none' is returned.
|
||||
*/
|
||||
template <typename T>
|
||||
boost::optional<T> findLastNonSkip(const char* control, int numBlocks);
|
||||
|
||||
/**
|
||||
* Finds which simple8b block in the provided control block causes overflow, searches in reverse
|
||||
* order.
|
||||
*
|
||||
* 'lastValForRLE' indicates how any encountered RLE blocks should be interpreted.
|
||||
*
|
||||
* 'overflowDetector' is appended to internally, overflow is detected when it needs to write a
|
||||
* simple8b block. The same detector may be used in multiple calls for finding overflow.
|
||||
*/
|
||||
template <typename T>
|
||||
OverflowResult<T> findOverflow(const char* control,
|
||||
boost::optional<T> lastValForRLE,
|
||||
Simple8bBuilder<T>& overflowDetector);
|
||||
|
||||
/**
|
||||
* Finds the last non-RLE simple8b block in the provided control, returns its index position and
|
||||
* last value.
|
||||
*
|
||||
* 'index' indicates position to start search that is performed in reverse order.
|
||||
*/
|
||||
template <typename T>
|
||||
LastNonRLEResult<T> findLastNonRLE(const char* control);
|
||||
template <typename T>
|
||||
LastNonRLEResult<T> findLastNonRLE(const char* control, int index);
|
||||
|
||||
|
||||
template <typename T>
|
||||
OverflowState<T>::OverflowState(ControlBlock cb)
|
||||
: _op(bsoncolumn::internal::estimateLastValue<T>(cb.control)),
|
||||
_overflowDetector(_op.last(), 0) {}
|
||||
|
||||
template <typename T>
|
||||
const OverflowPoint<T>& OverflowState<T>::detect(const ControlBlockContainer& controls) {
|
||||
using namespace bsoncolumn::internal;
|
||||
|
||||
// Setup reverse iteration.
|
||||
auto begin = controls.rbegin();
|
||||
auto it = begin;
|
||||
auto end = controls.rend();
|
||||
|
||||
// Setup some internal state, the algorithm is different if the last block is RLE.
|
||||
uint16_t lastControlOffset = 0;
|
||||
bool endsWithRLE = isRLE(s8b(controls.back().control,
|
||||
numSimple8bBlocksForControlByte(*controls.back().control) - 1));
|
||||
|
||||
// Search backwards for the overflow point.
|
||||
for (; it != end; ++it) {
|
||||
if (it->scaleIndex == controls.back().scaleIndex) {
|
||||
if (endsWithRLE) {
|
||||
// If we end with RLE, we simply search backwards for the first non-RLE value. This
|
||||
// will be the last non-RLE value in the binary and that will be our overflow point.
|
||||
_detectEndsRLE(*it);
|
||||
} else {
|
||||
// Regular case where we don't end with RLE. If RLE is encountered during the
|
||||
// iteration we need to continue to search until the next non-RLE value is
|
||||
// encountered. Depending on its value we might have to go back to where we were
|
||||
// prior to the RLE and assign that as our overflow point. _detectRegular will
|
||||
// return how many blocks we need to undo if this is the case.
|
||||
it = std::prev(it, _detectRegular(*it));
|
||||
}
|
||||
|
||||
// _detectEndsRLE or _detectRegular will internally mark for overflow if it happened. If
|
||||
// this is the case, break out of the iteration as we are done.
|
||||
if (_op.overflow()) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// Special case for the double type when a control block of a different scale was
|
||||
// detected. We have a special algorithm to determine if the overflow happened in this
|
||||
// rescaled control or prior. _detectRescale will return an offset to the last
|
||||
// (rescaled) control if it can be undone and all values put back to pending.
|
||||
if constexpr (std::is_same_v<T, uint64_t>) {
|
||||
lastControlOffset =
|
||||
_detectRescale({controls.begin(), it.base()}, {it.base(), controls.end()});
|
||||
break;
|
||||
} else {
|
||||
// This cannot happen as scan() has already verified this.
|
||||
MONGO_UNREACHABLE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check if we've finished the iteration without finding an overflow
|
||||
if (it == end) {
|
||||
if (_pendingRle != bsoncolumn::internal::kInvalidIndex) {
|
||||
// We are in pending RLE without finding an overflow. We can put everything back in
|
||||
// pending if the pending RLE value is 0 which is the only allowed form of RLE in the
|
||||
// beginning of the binary.
|
||||
if (_op.last() == T{0}) {
|
||||
_pendingRleBlocks = 0;
|
||||
_op.setAllIdentical();
|
||||
} else {
|
||||
// Our pending RLE value is non-zero which means that the RLE cannot be put in
|
||||
// pending and the overflow happened after the RLE. Restore the state to this point.
|
||||
_op.markOverflow(_pendingRle, false);
|
||||
it = std::prev(it, _pendingRleBlocks + 1);
|
||||
_pendingRleBlocks = 0;
|
||||
}
|
||||
}
|
||||
// As we got to the beginning, set last to 0 which is how RLE in the beginning of the binary
|
||||
// must be interpreted.
|
||||
_op.setLast(T{0});
|
||||
// If we end with RLE but never detect overflow, all values are identical to 0.
|
||||
if (endsWithRLE) {
|
||||
_op.setAllIdentical();
|
||||
}
|
||||
}
|
||||
|
||||
// If we have found an overflow that happened in the last simple8b block of a control, we can
|
||||
// transform this to a non-overflow at the beginning of the control after (moving the iterator
|
||||
// will be done in the next if-statement).
|
||||
if (_op.overflow() && it != begin && _op.index() == kMaxNumSimple8bPerControl - 1) {
|
||||
_op.markNoOverflow();
|
||||
}
|
||||
|
||||
// If no overflow occurred, go back to the previous control as we should not add data from the
|
||||
// current control.
|
||||
if (!_op.overflow() && it != begin && lastControlOffset == 0) {
|
||||
it = std::prev(it, _pendingRleBlocks + 1);
|
||||
}
|
||||
|
||||
// Record final calculatetion
|
||||
_op.setControl(it->control,
|
||||
it->scaleIndex,
|
||||
{it.base(), controls.end()},
|
||||
// If lastControlOffset is non-zero we're in the special rescale case where we
|
||||
// need to report the final control byte from the binary.
|
||||
lastControlOffset == 0 ? *it->control : *controls.back().control,
|
||||
lastControlOffset);
|
||||
return _op;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void OverflowState<T>::_detectEndsRLE(ControlBlock cb) {
|
||||
// If the last block ends with RLE we just need to look for the last non-RLE block to
|
||||
// discover the overflow point.
|
||||
using namespace bsoncolumn::internal;
|
||||
LastNonRLEResult<T> res = findLastNonRLE<T>(cb.control);
|
||||
_op.setLast(res.lastValue);
|
||||
if (res.index != kInvalidIndex) {
|
||||
_op.markOverflow(res.index, true);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
int OverflowState<T>::_detectRegular(ControlBlock cb) {
|
||||
using namespace bsoncolumn::internal;
|
||||
if (_pendingRle == kInvalidIndex) {
|
||||
// If we haven't encountered an RLE block in the beginning of a control block yet then
|
||||
// continue with the regular overflow detection.
|
||||
OverflowResult<T> res = findOverflow<T>(cb.control, _op.last(), _overflowDetector);
|
||||
_op.setLast(res.lastValue);
|
||||
// If this block begins with RLE we need to remember the index position after this RLE.
|
||||
_pendingRle = res.pendingRLEindex;
|
||||
if (res.overflowIndex != kInvalidIndex) {
|
||||
_op.markOverflow(res.overflowIndex, /* allIdentical= */ _pendingRle != kInvalidIndex);
|
||||
}
|
||||
|
||||
} else {
|
||||
// When we've encountered RLE in the beginning of a control block we need to continue to
|
||||
// search for the next non-RLE block to determine where the overflow point is.
|
||||
LastNonRLEResult<T> res = findLastNonRLE<T>(cb.control);
|
||||
if (res.index == kInvalidIndex) {
|
||||
// Still no overflow, increment how many control blocks we've consumed in this state.
|
||||
++_pendingRleBlocks;
|
||||
} else if (res.lastValue == _op.last()) {
|
||||
// Last value prior to RLE matches our RLE state after RLE. We then overflow in
|
||||
// the block prior to RLE. Reset pending blocks and mark the overflow with all identical
|
||||
// values.
|
||||
_pendingRleBlocks = 0;
|
||||
_op.markOverflow(res.index, /* allIdentical= */ true);
|
||||
} else {
|
||||
// Values to not match, so the overflow happened in the pending block after the RLE,
|
||||
// we've saved this position in _pendingRle.
|
||||
_op.markOverflow(_pendingRle, /* allIdentical= */ false);
|
||||
_op.setLast(res.lastValue);
|
||||
// Return how many control blocks ago the overflow position refers to.
|
||||
auto ret = _pendingRleBlocks + 1;
|
||||
_pendingRleBlocks = 0;
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
uint16_t OverflowState<T>::_detectRescale(ControlBlockRange before, ControlBlockRange after) {
|
||||
using namespace bsoncolumn::internal;
|
||||
|
||||
// Calculate last value before the rescaling event. Search backwards for the last non-RLE block
|
||||
// and get the last value from it.
|
||||
auto it = std::make_reverse_iterator(before.end());
|
||||
auto end = std::make_reverse_iterator(before.begin());
|
||||
auto blockWithOldScale = *it;
|
||||
auto blocks = numSimple8bBlocksForControlByte(*blockWithOldScale.control);
|
||||
|
||||
for (; it != end; ++it) {
|
||||
LastNonRLEResult<T> res = findLastNonRLE<T>(it->control);
|
||||
// kInvalidIndex index means that all blocks were RLE and we need to continue to next block.
|
||||
if (res.index != kInvalidIndex) {
|
||||
_op.setLast(res.lastValue);
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Nothing found, 0 is used as last when the stream begins with RLE.
|
||||
if (it == end) {
|
||||
_op.setLast(T{0});
|
||||
}
|
||||
|
||||
|
||||
// If this rescaled block is full, we know that we can treat this as a no-overflow in the next
|
||||
// control as nothing more can fit in this one anyway.
|
||||
if (blocks == kMaxNumSimple8bPerControl) {
|
||||
// If we're in pending RLE, we can additionally mark all values as identical.
|
||||
if (_pendingRle != kInvalidIndex) {
|
||||
_op.setAllIdentical();
|
||||
_pendingRleBlocks = 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Based on this actual last value, re-calculate if we will overflow with the data in
|
||||
// the control blocks we've already processed. Previously we used an estimated last.
|
||||
Simple8bBuilder<uint64_t> s8bBuilder(_op.last(), 0);
|
||||
for (auto&& cb : after) {
|
||||
OverflowResult<T> res = findOverflow<T>(cb.control, _op.last(), s8bBuilder);
|
||||
// If overflow is detected, we treat this as a non-overflow in the next control block. This
|
||||
// is signalled by not marking for overflow and returning 0 offset to the final control
|
||||
// block. Everything remaining will be put back into pending.
|
||||
if (res.overflowIndex != kInvalidIndex) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// RLE detected, we then know that all values are identical.
|
||||
if (res.pendingRLEindex != kInvalidIndex) {
|
||||
_op.setAllIdentical();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Next we need to see if the first value stored in the future control blocks (with a different
|
||||
// scale) can be scaled using this scale factor that we've now encountered. First, take the next
|
||||
// control block (the range is guaranteed to be non-empty).
|
||||
const auto& next = *after.begin();
|
||||
|
||||
// Encode the last value using next scale factor, this is needed to expand future deltas. This
|
||||
// is guaranteed to succeed as the scan() function has already validated this.
|
||||
auto encoded =
|
||||
Simple8bTypeUtil::encodeDouble(blockWithOldScale.lastAtEndOfBlock, next.scaleIndex);
|
||||
|
||||
// Extract the first value from the next control block. Simple8b cannot be empty, so we can
|
||||
// dereference the begin iterator without further checking.
|
||||
boost::optional<T> nextVal =
|
||||
*Simple8b<uint64_t>(next.control + 1, sizeof(uint64_t), _op.last()).begin();
|
||||
|
||||
// Skipped values can be always be scaled with any scale factor
|
||||
if (nextVal) {
|
||||
// Calculate the encoded delta of the next value and then try to encode it using our new
|
||||
// scale factor
|
||||
encoded = expandDelta(*encoded, Simple8bTypeUtil::decodeInt64(*nextVal));
|
||||
if (!Simple8bTypeUtil::encodeDouble(
|
||||
Simple8bTypeUtil::decodeDouble(*encoded, next.scaleIndex),
|
||||
blockWithOldScale.scaleIndex)) {
|
||||
// Not possible to scale this value using the last scale factor. We return 0 to signal
|
||||
// this as non-overflow in the block after the rescale, which effectively discards
|
||||
// everything before the rescale as they will never be needed.
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Rescaling was possible, all the rescaled values will then need to be written back as pending
|
||||
// values. This is signalled as an overflow in the last position of this control block. We also
|
||||
// return an offset to the last control byte of the actual rescaled control block in the binary.
|
||||
_op.markOverflow(blocks - 1, false);
|
||||
return blocks * sizeof(uint64_t) + 1 +
|
||||
std::distance(after.begin(), after.end() - 1) *
|
||||
(kMaxNumSimple8bPerControl * sizeof(uint64_t) + 1);
|
||||
}
|
||||
|
||||
inline const char* s8b(const char* control, int index) {
|
||||
return control +
|
||||
/* offset to block at index */ index * /* simple8b block size */ sizeof(uint64_t) +
|
||||
/* skip control byte*/ 1;
|
||||
}
|
||||
|
||||
inline bool isRLE(const char* s8b) {
|
||||
// Read simple8b block and mask out the selector
|
||||
return (ConstDataView(s8b).read<LittleEndian<uint64_t>>() &
|
||||
simple8b_internal::kBaseSelectorMask) == simple8b_internal::kRleSelector;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
boost::optional<T> estimateLastValue(const char* control) {
|
||||
auto numBlocks = numSimple8bBlocksForControlByte(*control);
|
||||
if (isRLE(s8b(control, numBlocks - 1))) {
|
||||
return T{0};
|
||||
}
|
||||
|
||||
// Assume that the last value in Simple8b blocks is the same as the one before the
|
||||
// first. This assumption will hold if all values are equal and RLE is eligible. If it
|
||||
// turns out to be incorrect the Simple8bBuilder will internally reset and disregard
|
||||
// RLE.
|
||||
return findLastNonSkip<T>(control, numBlocks);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
boost::optional<T> findLastNonSkip(const char* control, int numBlocks) {
|
||||
// Limit the search for a non-skip value. If we go above 60 without overflow then we consider
|
||||
// skip to be the last value for RLE as it would be the only one eligible for RLE.
|
||||
constexpr int kMaxNumSkipInNonRLEBlock = 60;
|
||||
for (int index = numBlocks - 1, numSkips = 0; index >= 0 && numSkips < kMaxNumSkipInNonRLEBlock;
|
||||
--index) {
|
||||
const char* block = s8b(control, index);
|
||||
// Abort this operation when an RLE block is found, they are handled in a separate code
|
||||
// path.
|
||||
if (isRLE(block)) {
|
||||
break;
|
||||
}
|
||||
Simple8b<T> s8b(block, sizeof(uint64_t));
|
||||
for (auto it = s8b.begin(), end = s8b.end();
|
||||
it != end && numSkips < kMaxNumSkipInNonRLEBlock;
|
||||
++it) {
|
||||
const auto& elem = *it;
|
||||
if (elem) {
|
||||
// We do not need to use the actual last value for RLE when determining overflow
|
||||
// point later. We can use the first value we discover when performing this
|
||||
// iteration. For a RLE block to be undone and put back into the pending state all
|
||||
// values need to be the same. So if a value later in this Simple8b block is
|
||||
// different from this value we cannot undo all these containing a RLE. If the
|
||||
// values are not all the same we will not fit 120 zeros in pending and the RLE
|
||||
// block will be left as-is.
|
||||
return elem;
|
||||
}
|
||||
++numSkips;
|
||||
}
|
||||
}
|
||||
// We did not find any value, so use skip as RLE. It is important that we use 'none' to
|
||||
// interpret RLE blocks going forward so we can properly undo simple8b blocks containing all
|
||||
// skip and RLE blocks.
|
||||
return boost::none;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
OverflowResult<T> findOverflow(const char* control,
|
||||
boost::optional<T> lastValForRLE,
|
||||
Simple8bBuilder<T>& overflowDetector) {
|
||||
// Search is performed in reverse order
|
||||
int index = numSimple8bBlocksForControlByte(*control) - 1;
|
||||
for (; index >= 0; --index) {
|
||||
// Get pointer to Simple8b block at this index position
|
||||
const char* block = s8b(control, index);
|
||||
|
||||
// If this is an RLE block and if the overflow detector is in RLE mode, we need to skip to
|
||||
// the next non-RLE block and compare its last value against the values after RLE.
|
||||
if (isRLE(block)) {
|
||||
// If we are not in RLE mode then we know that overflow occurred in this RLE block,
|
||||
// return its position.
|
||||
if (!overflowDetector.rlePossible()) {
|
||||
return {lastValForRLE, index, kInvalidIndex};
|
||||
}
|
||||
|
||||
// Search for the next non-RLE block and get the last value from it.
|
||||
LastNonRLEResult<T> res = findLastNonRLE<T>(control, index - 1);
|
||||
if (res.index == kInvalidIndex) {
|
||||
// We exhausted this control block without determining where the overflow point
|
||||
// is. Return pending RLE index so we can continue this operation in the prior
|
||||
// control block. If the value we find prior to the RLE is different, then the
|
||||
// overflow happened at this 'pending RLE' index.
|
||||
return {lastValForRLE, kInvalidIndex, index};
|
||||
} else if (res.lastValue == lastValForRLE) {
|
||||
// Last value prior to RLE matches our RLE state after RLE. We then overflow in
|
||||
// the block prior to RLE.
|
||||
return {lastValForRLE, res.index, kInvalidIndex};
|
||||
}
|
||||
|
||||
// Last value prior to RLE does not match our RLE state after RLE. We then overflow in
|
||||
// the RLE block with the previous value set to the actual RLE value from the block
|
||||
// prior to RLE.
|
||||
return {res.lastValue, index, kInvalidIndex};
|
||||
}
|
||||
|
||||
// Regular non-RLE block. We extract all values and append it to our overflow detector to
|
||||
// see if they cause overflow.
|
||||
Simple8b<T> s8b(block,
|
||||
/* one block at a time */ sizeof(uint64_t),
|
||||
lastValForRLE);
|
||||
boost::optional<T> last;
|
||||
|
||||
bool overflow = false;
|
||||
auto writeFn = [&overflow](uint64_t block) mutable {
|
||||
overflow = true;
|
||||
};
|
||||
for (auto&& elem : s8b) {
|
||||
last = elem;
|
||||
if (elem) {
|
||||
overflowDetector.append(*last, writeFn);
|
||||
} else {
|
||||
overflowDetector.skip(writeFn);
|
||||
}
|
||||
}
|
||||
|
||||
// If overflow point detected, we return this index position and its calculated last value.
|
||||
if (overflow) {
|
||||
return {last, index, kInvalidIndex};
|
||||
}
|
||||
}
|
||||
|
||||
// We have depleated this control block without finding an overflow position, return invalid
|
||||
// index positions.
|
||||
return {lastValForRLE, kInvalidIndex, kInvalidIndex};
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
LastNonRLEResult<T> findLastNonRLE(const char* control) {
|
||||
return findLastNonRLE<T>(control, numSimple8bBlocksForControlByte(*control) - 1);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
LastNonRLEResult<T> findLastNonRLE(const char* control, int index) {
|
||||
// Search is performed in reverse order
|
||||
for (; index >= 0; --index) {
|
||||
const char* block = s8b(control, index);
|
||||
if (isRLE(block)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Non-RLE block found, calculate its last value and return. Last value for RLE is unused as
|
||||
// we already know that this is not an RLE block.
|
||||
uint64_t unused = simple8b::kInvalidSimple8b;
|
||||
boost::optional<T> last = simple8b::last<T>(block, sizeof(uint64_t), unused);
|
||||
|
||||
return {last, index};
|
||||
}
|
||||
|
||||
return {T{}, index};
|
||||
}
|
||||
|
||||
} // namespace mongo::bsoncolumn::internal
|
||||
942
src/mongo/bson/column/binary_reopen_test.cpp
Normal file
942
src/mongo/bson/column/binary_reopen_test.cpp
Normal file
@ -0,0 +1,942 @@
|
||||
/**
|
||||
* Copyright (C) 2025-present MongoDB, Inc.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the Server Side Public License, version 1,
|
||||
* as published by MongoDB, Inc.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* Server Side Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the Server Side Public License
|
||||
* along with this program. If not, see
|
||||
* <http://www.mongodb.com/licensing/server-side-public-license>.
|
||||
*
|
||||
* As a special exception, the copyright holders give permission to link the
|
||||
* code of portions of this program with the OpenSSL library under certain
|
||||
* conditions as described in each individual source file and distribute
|
||||
* linked combinations including the program with the OpenSSL library. You
|
||||
* must comply with the Server Side Public License in all respects for
|
||||
* all of the code used other than as permitted herein. If you modify file(s)
|
||||
* with this exception, you may extend this exception to your version of the
|
||||
* file(s), but you are not obligated to do so. If you do not wish to do so,
|
||||
* delete this exception statement from your version. If you delete this
|
||||
* exception statement from all source files in the program, then also delete
|
||||
* it in the license file.
|
||||
*/
|
||||
|
||||
#include "mongo/bson/column/binary_reopen.h"
|
||||
|
||||
#include "mongo/bson/column/simple8b.h"
|
||||
#include "mongo/bson/column/simple8b_builder.h"
|
||||
#include "mongo/unittest/unittest.h"
|
||||
#include "mongo/util/assert_util.h"
|
||||
|
||||
namespace mongo::bsoncolumn::internal {
|
||||
using V = boost::optional<uint64_t>;
|
||||
|
||||
class BinaryReopenTest : public unittest::Test {
|
||||
public:
|
||||
BinaryReopenTest();
|
||||
|
||||
// Generates a BSONColumn control block for a set of simple8b blocks and optionally provided
|
||||
// scale factor
|
||||
const char* control(std::vector<uint64_t> blocks,
|
||||
uint8_t scaleIndex = Simple8bTypeUtil::kMemoryAsInteger);
|
||||
|
||||
// Simple8b block constants to be used in the tests
|
||||
uint64_t block1Skip;
|
||||
uint64_t block1Zero;
|
||||
uint64_t block1One;
|
||||
uint64_t block5Two;
|
||||
uint64_t block6Skip;
|
||||
uint64_t block60Skip;
|
||||
uint64_t block60Zero;
|
||||
uint64_t block2Zero1Skip;
|
||||
uint64_t block3One1Skip;
|
||||
uint64_t block3Skip1One;
|
||||
uint64_t block6Skip1Two;
|
||||
uint64_t blockFullOne;
|
||||
uint64_t block1RLE;
|
||||
uint64_t block16RLE;
|
||||
|
||||
private:
|
||||
// Memory for generated control blocks
|
||||
std::forward_list<std::unique_ptr<char[]>> _ownedControls;
|
||||
};
|
||||
|
||||
BinaryReopenTest::BinaryReopenTest() {
|
||||
// Helper to generate a single simple8b block with the provided values.
|
||||
auto generateSimple8b = [](boost::optional<uint64_t> value,
|
||||
int count,
|
||||
boost::optional<uint64_t> value2 = boost::none,
|
||||
int count2 = 0) {
|
||||
boost::optional<uint64_t> block;
|
||||
auto writeFn = [&](uint64_t b) mutable {
|
||||
if (block) {
|
||||
FAIL("Should only write one block");
|
||||
}
|
||||
block = b;
|
||||
};
|
||||
Simple8bBuilder<uint64_t> builder;
|
||||
for (int i = 0; i < count; i++) {
|
||||
if (value) {
|
||||
builder.append(*value, writeFn);
|
||||
} else {
|
||||
builder.skip(writeFn);
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < count2; i++) {
|
||||
if (value2) {
|
||||
builder.append(*value2, writeFn);
|
||||
} else {
|
||||
builder.skip(writeFn);
|
||||
}
|
||||
}
|
||||
builder.flush(writeFn);
|
||||
ASSERT_TRUE(block.has_value());
|
||||
return *block;
|
||||
};
|
||||
|
||||
// Helper to generate a simple8b block that can fit the maximum amount of a particular value
|
||||
auto generateFullSimple8b = [](boost::optional<uint64_t> value) {
|
||||
boost::optional<uint64_t> block;
|
||||
bool written = false;
|
||||
auto writeFn = [&](uint64_t b) mutable {
|
||||
block = b;
|
||||
written = true;
|
||||
};
|
||||
// We need to disable RLE, so we generate a previous value that is different from the value
|
||||
// we're appending.
|
||||
boost::optional<uint64_t> different = value ? V{*value + 1} : V{0};
|
||||
// Initialize RLE with this value
|
||||
Simple8bBuilder<uint64_t> builder(different, 0);
|
||||
// Append until a simple8b block has been full and written out
|
||||
while (!written) {
|
||||
if (value) {
|
||||
builder.append(*value, writeFn);
|
||||
} else {
|
||||
builder.skip(writeFn);
|
||||
}
|
||||
}
|
||||
return *block;
|
||||
};
|
||||
|
||||
// Some constants used in the tests below
|
||||
block1Skip = generateSimple8b(boost::none, 1);
|
||||
block1Zero = generateSimple8b(0, 1);
|
||||
block1One = generateSimple8b(1, 1);
|
||||
block5Two = generateSimple8b(2, 5);
|
||||
block6Skip = generateSimple8b(boost::none, 6);
|
||||
block60Skip = generateSimple8b(boost::none, 60);
|
||||
block60Zero = generateSimple8b(0, 60);
|
||||
block2Zero1Skip = generateSimple8b(0, 2, boost::none, 1);
|
||||
block3One1Skip = generateSimple8b(1, 3, boost::none, 1);
|
||||
block3Skip1One = generateSimple8b(boost::none, 3, 1, 1);
|
||||
block6Skip1Two = generateSimple8b(boost::none, 6, 2, 1);
|
||||
blockFullOne = generateFullSimple8b(1);
|
||||
block1RLE = simple8b_internal::kRleSelector;
|
||||
block16RLE = simple8b_internal::kRleSelector | 0xF0;
|
||||
}
|
||||
|
||||
const char* BinaryReopenTest::control(std::vector<uint64_t> blocks, uint8_t scaleIndex) {
|
||||
// A control block contains between 1 and 16 simple8b blocks.
|
||||
ASSERT_GT(blocks.size(), 0);
|
||||
ASSERT_LTE(blocks.size(), 16);
|
||||
|
||||
// Allocate enough memory to also fit the control byte preceding the simple8b blocks.
|
||||
auto c = std::make_unique<char[]>(blocks.size() * sizeof(uint64_t) + 1);
|
||||
// Write control byte with out scale factor and number of simple8b blocks.
|
||||
*c.get() = kControlByteForScaleIndex[scaleIndex] | (blocks.size() - 1);
|
||||
// Copy simple8b data
|
||||
memcpy(c.get() + 1, blocks.data(), blocks.size() * sizeof(uint64_t));
|
||||
auto ptr = c.get();
|
||||
// Store internally to simplify memory management in the tests
|
||||
_ownedControls.push_front(std::move(c));
|
||||
return ptr;
|
||||
}
|
||||
|
||||
TEST_F(BinaryReopenTest, EstimateLastValue) {
|
||||
// Block with zeros return zero
|
||||
ASSERT_EQ(estimateLastValue<uint64_t>(control({block1Zero})), V{0});
|
||||
|
||||
// Skips before a value does not affect the last value
|
||||
ASSERT_EQ(estimateLastValue<uint64_t>(control({block6Skip, block6Skip1Two})), V{2});
|
||||
|
||||
// Block ending with skips returns last non-skip value
|
||||
ASSERT_EQ(estimateLastValue<uint64_t>(control({block1Zero, block6Skip})), V{0});
|
||||
ASSERT_EQ(estimateLastValue<uint64_t>(control({block5Two, block6Skip})), V{2});
|
||||
ASSERT_EQ(estimateLastValue<uint64_t>(control({block3One1Skip, block6Skip})), V{1});
|
||||
|
||||
// Block ending with 60 or more skips return none even if value exists before the skips
|
||||
ASSERT_EQ(estimateLastValue<uint64_t>(control({block5Two, block60Skip})), V{boost::none});
|
||||
ASSERT_EQ(estimateLastValue<uint64_t>(control({block5Two, block60Skip, block6Skip})),
|
||||
V{boost::none});
|
||||
ASSERT_EQ(estimateLastValue<uint64_t>(control({block5Two,
|
||||
block6Skip,
|
||||
block6Skip,
|
||||
block6Skip,
|
||||
block6Skip,
|
||||
block6Skip,
|
||||
block6Skip,
|
||||
block6Skip,
|
||||
block6Skip,
|
||||
block6Skip,
|
||||
block6Skip})),
|
||||
V{boost::none});
|
||||
|
||||
// Block ending with 59 or fewer skips returns last non-skip value
|
||||
ASSERT_EQ(estimateLastValue<uint64_t>(control({block5Two,
|
||||
block6Skip,
|
||||
block6Skip,
|
||||
block6Skip,
|
||||
block6Skip,
|
||||
block6Skip,
|
||||
block6Skip,
|
||||
block6Skip,
|
||||
block6Skip,
|
||||
block6Skip,
|
||||
block1Skip,
|
||||
block1Skip,
|
||||
block1Skip,
|
||||
block1Skip,
|
||||
block1Skip})),
|
||||
V{2});
|
||||
|
||||
// Block with skips only returns none
|
||||
ASSERT_EQ(estimateLastValue<uint64_t>(control({block6Skip,
|
||||
block6Skip,
|
||||
block6Skip,
|
||||
block6Skip,
|
||||
block6Skip,
|
||||
block6Skip,
|
||||
block6Skip,
|
||||
block6Skip,
|
||||
block6Skip,
|
||||
block6Skip})),
|
||||
V{boost::none});
|
||||
ASSERT_EQ(estimateLastValue<uint64_t>(control({block6Skip,
|
||||
block6Skip,
|
||||
block6Skip,
|
||||
block6Skip,
|
||||
block6Skip,
|
||||
block6Skip,
|
||||
block6Skip,
|
||||
block6Skip,
|
||||
block6Skip,
|
||||
block1Skip,
|
||||
block1Skip,
|
||||
block1Skip,
|
||||
block1Skip,
|
||||
block1Skip})),
|
||||
V{boost::none});
|
||||
ASSERT_EQ(estimateLastValue<uint64_t>(control({block1Skip})), V{boost::none});
|
||||
|
||||
// Block with RLE returns zero regardless of what's before it
|
||||
ASSERT_EQ(estimateLastValue<uint64_t>(control({block1RLE})), V{0});
|
||||
ASSERT_EQ(estimateLastValue<uint64_t>(control({block1One, block1RLE})), V{0});
|
||||
ASSERT_EQ(estimateLastValue<uint64_t>(control({block1Zero, block1RLE})), V{0});
|
||||
ASSERT_EQ(estimateLastValue<uint64_t>(control({block1Skip, block1RLE})), V{0});
|
||||
}
|
||||
|
||||
TEST_F(BinaryReopenTest, FindOverflow) {
|
||||
OverflowResult<uint64_t> res;
|
||||
auto findOverflowHelper = [](const char* control, V lastVal) {
|
||||
Simple8bBuilder<uint64_t> detector(lastVal, 0);
|
||||
return findOverflow<uint64_t>(control, lastVal, detector);
|
||||
};
|
||||
|
||||
// Basic case of a single simple8b block with skip does not overflow
|
||||
res = findOverflowHelper(control({block1Skip}), V{boost::none});
|
||||
ASSERT_EQ(res.overflowIndex, kInvalidIndex);
|
||||
ASSERT_EQ(res.lastValue, V{boost::none}); // last value is unchanged when there is no overflow
|
||||
ASSERT_EQ(res.pendingRLEindex, kInvalidIndex);
|
||||
|
||||
// Basic case of a single simple8b block with values does not overflow
|
||||
res = findOverflowHelper(control({block5Two}), V{0});
|
||||
ASSERT_EQ(res.overflowIndex, kInvalidIndex);
|
||||
ASSERT_EQ(res.lastValue, V{0}); // last value is unchanged when there is no overflow
|
||||
ASSERT_EQ(res.pendingRLEindex, kInvalidIndex);
|
||||
|
||||
// Two blocks with identical values does not overflow if there is a block that could have all
|
||||
// fit in
|
||||
res = findOverflowHelper(control({block5Two, block5Two}),
|
||||
V{0}); // Different value for RLE disables RLE mode
|
||||
ASSERT_EQ(res.overflowIndex, kInvalidIndex);
|
||||
ASSERT_EQ(res.lastValue, V{0}); // last value is unchanged when there is no overflow
|
||||
ASSERT_EQ(res.pendingRLEindex, kInvalidIndex);
|
||||
|
||||
// Two blocks with over 60 values that are different cannot fit in a single block so we overflow
|
||||
// at index 0.
|
||||
res = findOverflowHelper(control({block5Two, block60Zero}), V{0});
|
||||
ASSERT_EQ(res.overflowIndex, 0);
|
||||
ASSERT_EQ(res.lastValue, V{2}); // last value in block that overflowed
|
||||
ASSERT_EQ(res.pendingRLEindex, kInvalidIndex);
|
||||
|
||||
// Three blocks with over 60 values that are different cannot fit in a single block so we
|
||||
// overflow at index 1.
|
||||
res = findOverflowHelper(control({block5Two, block5Two, block60Zero}), V{0});
|
||||
ASSERT_EQ(res.overflowIndex, 1);
|
||||
ASSERT_EQ(res.lastValue, V{2}); // last value in block that overflowed
|
||||
ASSERT_EQ(res.pendingRLEindex, kInvalidIndex);
|
||||
|
||||
// Changing the last value does not affect the overflow point as RLE is not in play
|
||||
res = findOverflowHelper(control({block5Two, block5Two, block60Zero}), V{2});
|
||||
ASSERT_EQ(res.overflowIndex, 1);
|
||||
ASSERT_EQ(res.lastValue, V{2}); // last value in block that overflowed
|
||||
ASSERT_EQ(res.pendingRLEindex, kInvalidIndex);
|
||||
|
||||
// Without RLE we can only fit 30 '1' values in a single block, so overflow happens at index
|
||||
// 1 even though values are identical
|
||||
res = findOverflowHelper(control({block5Two, blockFullOne, blockFullOne}), V{0});
|
||||
ASSERT_EQ(res.overflowIndex, 1);
|
||||
ASSERT_EQ(res.lastValue, V{1}); // last value in block that overflowed
|
||||
ASSERT_EQ(res.pendingRLEindex, kInvalidIndex);
|
||||
|
||||
// With RLE overflow happens in first block with a different value
|
||||
res = findOverflowHelper(control({block5Two, blockFullOne, blockFullOne}), V{1});
|
||||
ASSERT_EQ(res.overflowIndex, 0);
|
||||
ASSERT_EQ(res.lastValue, V{2}); // last value in block that overflowed
|
||||
ASSERT_EQ(res.pendingRLEindex, kInvalidIndex);
|
||||
|
||||
// No overflow if the values are all identical and RLE is in play
|
||||
res = findOverflowHelper(control({blockFullOne, blockFullOne}), V{1});
|
||||
ASSERT_EQ(res.overflowIndex, kInvalidIndex);
|
||||
ASSERT_EQ(res.lastValue, V{1}); // last value in block that overflowed
|
||||
ASSERT_EQ(res.pendingRLEindex, kInvalidIndex);
|
||||
|
||||
// With RLE block and all values are identical the overflow happens before the RLE block
|
||||
res = findOverflowHelper(control({block5Two, block1RLE, block5Two}), V{2});
|
||||
ASSERT_EQ(res.overflowIndex, 0);
|
||||
ASSERT_EQ(res.lastValue, V{2}); // last value in block that overflowed
|
||||
ASSERT_EQ(res.pendingRLEindex, kInvalidIndex);
|
||||
|
||||
// With RLE block and values are different before and after RLE the overflow happens at the RLE
|
||||
// block
|
||||
res = findOverflowHelper(control({blockFullOne, block1RLE, block5Two}), V{2});
|
||||
ASSERT_EQ(res.overflowIndex, 1);
|
||||
ASSERT_EQ(res.lastValue, V{1}); // last value in block that overflowed
|
||||
ASSERT_EQ(res.pendingRLEindex, kInvalidIndex);
|
||||
|
||||
// With RLE block and values are different before and after RLE the overflow happens at the last
|
||||
// RLE block
|
||||
res = findOverflowHelper(control({blockFullOne, block16RLE, block1RLE, block5Two}), V{2});
|
||||
ASSERT_EQ(res.overflowIndex, 2);
|
||||
ASSERT_EQ(res.lastValue, V{1}); // last value in block that overflowed
|
||||
ASSERT_EQ(res.pendingRLEindex, kInvalidIndex);
|
||||
|
||||
// Only RLE returns no overflow but pending RLE at the last RLE block
|
||||
res = findOverflowHelper(control({block16RLE, block1RLE}), V{2});
|
||||
ASSERT_EQ(res.overflowIndex, kInvalidIndex);
|
||||
ASSERT_EQ(res.lastValue, V{2}); // last value in block that overflowed
|
||||
ASSERT_EQ(res.pendingRLEindex, 1);
|
||||
|
||||
// RLE followed by non-RLE compatible with last value returns no overflow but pending RLE at
|
||||
// the last RLE block
|
||||
res = findOverflowHelper(control({block16RLE, block1RLE, block5Two}), V{2});
|
||||
ASSERT_EQ(res.overflowIndex, kInvalidIndex);
|
||||
ASSERT_EQ(res.lastValue, V{2}); // last value in block that overflowed
|
||||
ASSERT_EQ(res.pendingRLEindex, 1);
|
||||
|
||||
// RLE followed by non-RLE not compatible with last value returns overflow at the
|
||||
// non-RLE block
|
||||
res = findOverflowHelper(control({block16RLE, block1RLE, block5Two}), V{1});
|
||||
ASSERT_EQ(res.overflowIndex, 1);
|
||||
ASSERT_EQ(res.lastValue,
|
||||
V{1}); // last value is left unchanged when it cannot be determined due to RLE
|
||||
ASSERT_EQ(res.pendingRLEindex, kInvalidIndex);
|
||||
}
|
||||
|
||||
TEST_F(BinaryReopenTest, FindLastNonRLE) {
|
||||
LastNonRLEResult<uint64_t> res;
|
||||
|
||||
// Single non-RLE returns index 0 and the last value in the block
|
||||
res = findLastNonRLE<uint64_t>(control({block1Zero}));
|
||||
ASSERT_EQ(res.index, 0);
|
||||
ASSERT_EQ(res.lastValue, V{0});
|
||||
|
||||
// Single non-RLE returns index 0 and the last value in the block
|
||||
res = findLastNonRLE<uint64_t>(control({block2Zero1Skip}));
|
||||
ASSERT_EQ(res.index, 0);
|
||||
ASSERT_EQ(res.lastValue, V{boost::none});
|
||||
|
||||
// Single non-RLE returns index 0 and the last value in the block
|
||||
res = findLastNonRLE<uint64_t>(control({block6Skip1Two}));
|
||||
ASSERT_EQ(res.index, 0);
|
||||
ASSERT_EQ(res.lastValue, V{2});
|
||||
|
||||
// Multiple non-RLE blocks returns index to last block and the last value in that block
|
||||
res = findLastNonRLE<uint64_t>(control({blockFullOne, block6Skip1Two}));
|
||||
ASSERT_EQ(res.index, 1);
|
||||
ASSERT_EQ(res.lastValue, V{2});
|
||||
|
||||
// RLE at the end is skipped. Position and last value to prior non-RLE block is returned
|
||||
res = findLastNonRLE<uint64_t>(control({blockFullOne, block1RLE}));
|
||||
ASSERT_EQ(res.index, 0);
|
||||
ASSERT_EQ(res.lastValue, V{1});
|
||||
|
||||
// RLE at the end is skipped. Position and last value to prior non-RLE block is returned
|
||||
res = findLastNonRLE<uint64_t>(control({block1RLE, blockFullOne, block16RLE, block1RLE}));
|
||||
ASSERT_EQ(res.index, 1);
|
||||
ASSERT_EQ(res.lastValue, V{1});
|
||||
|
||||
// Only RLE blocks returns invalid index and last value of 0
|
||||
res = findLastNonRLE<uint64_t>(control({block1RLE}));
|
||||
ASSERT_EQ(res.index, kInvalidIndex);
|
||||
ASSERT_EQ(res.lastValue, V{0});
|
||||
|
||||
// Only RLE blocks returns invalid index and last value of 0
|
||||
res = findLastNonRLE<uint64_t>(control({block16RLE, block1RLE}));
|
||||
ASSERT_EQ(res.index, kInvalidIndex);
|
||||
ASSERT_EQ(res.lastValue, V{0});
|
||||
|
||||
// Index parameter limits the search to before that index
|
||||
res =
|
||||
findLastNonRLE<uint64_t>(control({block6Skip1Two, blockFullOne, block16RLE, block1RLE}), 0);
|
||||
ASSERT_EQ(res.index, 0);
|
||||
ASSERT_EQ(res.lastValue, V{2});
|
||||
|
||||
// Index parameter limits the search to before that index
|
||||
res = findLastNonRLE<uint64_t>(control({blockFullOne, block6Skip1Two, blockFullOne}), 1);
|
||||
ASSERT_EQ(res.index, 1);
|
||||
ASSERT_EQ(res.lastValue, V{2});
|
||||
|
||||
// Index parameter limits the search to before that index
|
||||
res = findLastNonRLE<uint64_t>(
|
||||
control({blockFullOne, block6Skip1Two, block16RLE, block1RLE, blockFullOne}), 3);
|
||||
ASSERT_EQ(res.index, 1);
|
||||
ASSERT_EQ(res.lastValue, V{2});
|
||||
}
|
||||
|
||||
TEST_F(BinaryReopenTest, Overflow) {
|
||||
// Helper to run the overflow detection on the OverflowState class
|
||||
auto overflowHelper = [](std::vector<const char*> controls) -> OverflowPoint<uint64_t> {
|
||||
ControlBlockContainer cbs;
|
||||
// Generate the control block container. We can ignore the data used for the double type
|
||||
// (checked in OverflowScaled below)
|
||||
for (auto&& c : controls) {
|
||||
cbs.push_back({c});
|
||||
}
|
||||
OverflowState<uint64_t> overflow(cbs.back());
|
||||
return overflow.detect(cbs);
|
||||
};
|
||||
|
||||
// Helper for a control block full of RLE
|
||||
auto fullRLEControl = [&]() {
|
||||
return control({block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE});
|
||||
};
|
||||
|
||||
std::vector<const char*> controls;
|
||||
|
||||
// Single control without overflow
|
||||
controls = {control({block5Two})};
|
||||
OverflowPoint<uint64_t> point = overflowHelper(controls);
|
||||
ASSERT_EQ(point.control(), controls[0]);
|
||||
ASSERT_FALSE(point.overflow());
|
||||
ASSERT_EQ(point.index(), kInvalidIndex);
|
||||
ASSERT_EQ(point.last(), V{0}); // Last is defined as 0 when there is no overflow
|
||||
ASSERT_FALSE(point.allValuesIdentical()); // this is never set unless RLE is involved
|
||||
ASSERT_EQ(point.lastControl(), (uint8_t)*controls[0]);
|
||||
ASSERT_EQ(point.lastControlOffset(), 0);
|
||||
ASSERT_EQ(std::distance(point.remaining().begin(), point.remaining().end()), 0);
|
||||
|
||||
// Single control with overflow at index 0
|
||||
controls = {control({block5Two, block60Zero})};
|
||||
point = overflowHelper(controls);
|
||||
ASSERT_EQ(point.control(), controls[0]);
|
||||
ASSERT_TRUE(point.overflow());
|
||||
ASSERT_EQ(point.index(), 0);
|
||||
ASSERT_EQ(point.last(), V{2}); // Last value in block that caused overflow
|
||||
ASSERT_FALSE(point.allValuesIdentical()); // this is never set unless RLE is involved
|
||||
ASSERT_EQ(point.lastControl(), (uint8_t)*controls[0]);
|
||||
ASSERT_EQ(point.lastControlOffset(), 0);
|
||||
ASSERT_EQ(std::distance(point.remaining().begin(), point.remaining().end()), 0);
|
||||
|
||||
// Two controls with overflow in the first control at the second to last index position
|
||||
controls = {control({
|
||||
block5Two,
|
||||
block5Two,
|
||||
block5Two,
|
||||
block5Two,
|
||||
block5Two,
|
||||
block5Two,
|
||||
block5Two,
|
||||
block5Two,
|
||||
block5Two,
|
||||
block5Two,
|
||||
block5Two,
|
||||
block5Two,
|
||||
block5Two,
|
||||
block5Two,
|
||||
blockFullOne,
|
||||
block5Two,
|
||||
}),
|
||||
control({block5Two})};
|
||||
point = overflowHelper(controls);
|
||||
ASSERT_EQ(point.control(), controls[0]);
|
||||
ASSERT_TRUE(point.overflow());
|
||||
ASSERT_EQ(point.index(), 14);
|
||||
ASSERT_EQ(point.last(), V{1}); // Last value in block that caused overflow
|
||||
ASSERT_FALSE(point.allValuesIdentical()); // this is never set unless RLE is involved
|
||||
ASSERT_EQ(point.lastControl(), (uint8_t)*controls[0]);
|
||||
ASSERT_EQ(point.lastControlOffset(), 0);
|
||||
ASSERT_EQ(std::distance(point.remaining().begin(), point.remaining().end()), 1);
|
||||
|
||||
// Two controls with overflow in the first control at last index position is treated as no
|
||||
// overflow with the second control returned.
|
||||
controls = {control({block5Two,
|
||||
block5Two,
|
||||
block5Two,
|
||||
block5Two,
|
||||
block5Two,
|
||||
block5Two,
|
||||
block5Two,
|
||||
block5Two,
|
||||
block5Two,
|
||||
block5Two,
|
||||
block5Two,
|
||||
block5Two,
|
||||
block5Two,
|
||||
block5Two,
|
||||
block5Two,
|
||||
blockFullOne}),
|
||||
control({block5Two})};
|
||||
point = overflowHelper(controls);
|
||||
ASSERT_EQ(point.control(), controls[1]);
|
||||
ASSERT_FALSE(point.overflow());
|
||||
ASSERT_EQ(point.index(), kInvalidIndex);
|
||||
ASSERT_EQ(point.last(), V{1}); // Last value in block that caused overflow
|
||||
ASSERT_FALSE(point.allValuesIdentical()); // this is never set unless RLE is involved
|
||||
ASSERT_EQ(point.lastControl(), (uint8_t)*controls[1]);
|
||||
ASSERT_EQ(point.lastControlOffset(), 0);
|
||||
ASSERT_EQ(std::distance(point.remaining().begin(), point.remaining().end()), 0);
|
||||
|
||||
// Single control with RLE only returns no overflow with last value of 0
|
||||
controls = {control({block1RLE})};
|
||||
point = overflowHelper(controls);
|
||||
ASSERT_EQ(point.control(), controls[0]);
|
||||
ASSERT_FALSE(point.overflow());
|
||||
ASSERT_EQ(point.index(), kInvalidIndex);
|
||||
ASSERT_EQ(point.last(), V{0}); // Last is defined as 0 when there is no overflow
|
||||
ASSERT_TRUE(point.allValuesIdentical());
|
||||
ASSERT_EQ(point.lastControl(), (uint8_t)*controls[0]);
|
||||
ASSERT_EQ(point.lastControlOffset(), 0);
|
||||
ASSERT_EQ(std::distance(point.remaining().begin(), point.remaining().end()), 0);
|
||||
|
||||
// Only RLE can span more than one control which yields the same result
|
||||
controls = {fullRLEControl(), control({block16RLE, block1RLE})};
|
||||
point = overflowHelper(controls);
|
||||
ASSERT_EQ(point.control(), controls[0]);
|
||||
ASSERT_FALSE(point.overflow());
|
||||
ASSERT_EQ(point.index(), kInvalidIndex);
|
||||
ASSERT_EQ(point.last(), V{0}); // Last is defined as 0 when there is no overflow
|
||||
ASSERT_TRUE(point.allValuesIdentical());
|
||||
ASSERT_EQ(point.lastControl(), (uint8_t)*controls[0]);
|
||||
ASSERT_EQ(point.lastControlOffset(), 0);
|
||||
ASSERT_EQ(std::distance(point.remaining().begin(), point.remaining().end()), 1);
|
||||
|
||||
// RLE spanning more than one control followed by blocks containing only zeros also yields the
|
||||
// same result
|
||||
controls = {fullRLEControl(),
|
||||
fullRLEControl(),
|
||||
fullRLEControl(),
|
||||
control({block16RLE, block1RLE, block60Zero})};
|
||||
point = overflowHelper(controls);
|
||||
ASSERT_EQ(point.control(), controls[0]);
|
||||
ASSERT_FALSE(point.overflow());
|
||||
ASSERT_EQ(point.index(), kInvalidIndex);
|
||||
ASSERT_EQ(point.last(), V{0}); // Last is defined as 0 when there is no overflow
|
||||
ASSERT_TRUE(point.allValuesIdentical());
|
||||
ASSERT_EQ(point.lastControl(), (uint8_t)*controls[0]);
|
||||
ASSERT_EQ(point.lastControlOffset(), 0);
|
||||
ASSERT_EQ(std::distance(point.remaining().begin(), point.remaining().end()), 3);
|
||||
|
||||
// Value followed by RLE spanning more than one control is overflow at the index before the RLE
|
||||
// starts
|
||||
controls = {control({block5Two,
|
||||
block5Two,
|
||||
blockFullOne,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE}),
|
||||
control({block16RLE, block1RLE})};
|
||||
point = overflowHelper(controls);
|
||||
ASSERT_EQ(point.control(), controls[0]);
|
||||
ASSERT_TRUE(point.overflow());
|
||||
ASSERT_EQ(point.index(), 2);
|
||||
ASSERT_EQ(point.last(), V{1}); // Last value in block that caused overflow
|
||||
ASSERT_TRUE(point.allValuesIdentical());
|
||||
ASSERT_EQ(point.lastControl(), (uint8_t)*controls[0]);
|
||||
ASSERT_EQ(point.lastControlOffset(), 0);
|
||||
ASSERT_EQ(std::distance(point.remaining().begin(), point.remaining().end()), 1);
|
||||
|
||||
// Value followed by RLE spanning more than one control is overflow at the index before the
|
||||
// RLE starts as long as the value after RLE is the same as before RLE
|
||||
controls = {control({block5Two,
|
||||
block5Two,
|
||||
blockFullOne,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE}),
|
||||
control({block16RLE, block1RLE, block1One})};
|
||||
point = overflowHelper(controls);
|
||||
ASSERT_EQ(point.control(), controls[0]);
|
||||
ASSERT_TRUE(point.overflow());
|
||||
ASSERT_EQ(point.index(), 2);
|
||||
ASSERT_EQ(point.last(), V{1}); // Last value in block that caused overflow
|
||||
ASSERT_TRUE(point.allValuesIdentical());
|
||||
ASSERT_EQ(point.lastControl(), (uint8_t)*controls[0]);
|
||||
ASSERT_EQ(point.lastControlOffset(), 0);
|
||||
ASSERT_EQ(std::distance(point.remaining().begin(), point.remaining().end()), 1);
|
||||
|
||||
// When value before RLE is different from the value after RLE the overflow happens at the
|
||||
// last RLE block
|
||||
controls = {control({block5Two,
|
||||
block5Two,
|
||||
blockFullOne,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE}),
|
||||
fullRLEControl(),
|
||||
control({block16RLE, block1RLE, block5Two})};
|
||||
point = overflowHelper(controls);
|
||||
ASSERT_EQ(point.control(), controls[2]);
|
||||
ASSERT_TRUE(point.overflow());
|
||||
ASSERT_EQ(point.index(), 1);
|
||||
ASSERT_EQ(point.last(), V{1}); // Last value in block that caused overflow
|
||||
ASSERT_FALSE(point.allValuesIdentical());
|
||||
ASSERT_EQ(point.lastControl(), (uint8_t)*controls[2]);
|
||||
ASSERT_EQ(point.lastControlOffset(), 0);
|
||||
ASSERT_EQ(std::distance(point.remaining().begin(), point.remaining().end()), 0);
|
||||
|
||||
// When the stream starts with RLE but the value after RLE is not zero then the overflow happens
|
||||
// at the last RLE block
|
||||
controls = {fullRLEControl(), fullRLEControl(), control({block16RLE, block1RLE, block5Two})};
|
||||
point = overflowHelper(controls);
|
||||
ASSERT_EQ(point.control(), controls[2]);
|
||||
ASSERT_TRUE(point.overflow());
|
||||
ASSERT_EQ(point.index(), 1);
|
||||
ASSERT_EQ(point.last(), V{0}); // Last value in block that caused overflow
|
||||
ASSERT_FALSE(point.allValuesIdentical()); // this is never set unless RLE is involved
|
||||
ASSERT_EQ(point.lastControl(), (uint8_t)*controls[2]);
|
||||
ASSERT_EQ(point.lastControlOffset(), 0);
|
||||
ASSERT_EQ(std::distance(point.remaining().begin(), point.remaining().end()), 0);
|
||||
}
|
||||
|
||||
TEST_F(BinaryReopenTest, OverflowScaled) {
|
||||
// Helper to run the overflow detection for doubles
|
||||
auto overflowHelper = [](double base,
|
||||
std::vector<const char*> controls) -> OverflowPoint<uint64_t> {
|
||||
ControlBlockContainer cbs;
|
||||
|
||||
// Every control block needs to set lastAtEndOfBlock. We calculate this based on 'base' and
|
||||
// the control blocks provided.
|
||||
uint64_t prevNonRLE = simple8b::kSingleZero;
|
||||
auto ret =
|
||||
Simple8bTypeUtil::encodeDouble(base, scaleIndexForControlByte(*controls.front()));
|
||||
ASSERT_TRUE(ret.has_value());
|
||||
int64_t encoded = *ret;
|
||||
|
||||
for (auto&& c : controls) {
|
||||
uint8_t scaleIndex = scaleIndexForControlByte(*c);
|
||||
// Doubles uses delta encoding, so we can use a sum to get the delta for the last value.
|
||||
encoded += simple8b::sum<int64_t>(
|
||||
c + 1, numSimple8bBlocksForControlByte(*c) * sizeof(uint64_t), prevNonRLE);
|
||||
base = Simple8bTypeUtil::decodeDouble(encoded, scaleIndex);
|
||||
cbs.push_back({c, base, scaleIndex});
|
||||
}
|
||||
|
||||
OverflowState<uint64_t> overflow(cbs.back());
|
||||
return overflow.detect(cbs);
|
||||
};
|
||||
|
||||
std::vector<const char*> controls;
|
||||
|
||||
// Rescale after full block is reported as no overflow in the first control with a different
|
||||
// scale
|
||||
controls = {control({blockFullOne,
|
||||
blockFullOne,
|
||||
blockFullOne,
|
||||
blockFullOne,
|
||||
blockFullOne,
|
||||
blockFullOne,
|
||||
blockFullOne,
|
||||
blockFullOne,
|
||||
blockFullOne,
|
||||
blockFullOne,
|
||||
blockFullOne,
|
||||
blockFullOne,
|
||||
blockFullOne,
|
||||
blockFullOne,
|
||||
blockFullOne,
|
||||
blockFullOne},
|
||||
0),
|
||||
control({block5Two}, Simple8bTypeUtil::kMemoryAsInteger)};
|
||||
OverflowPoint<uint64_t> point = overflowHelper(1.0, controls);
|
||||
ASSERT_EQ(point.control(), controls[1]);
|
||||
ASSERT_FALSE(point.overflow());
|
||||
ASSERT_EQ(point.index(), kInvalidIndex);
|
||||
ASSERT_EQ(point.last(), V{1});
|
||||
ASSERT_FALSE(point.allValuesIdentical());
|
||||
ASSERT_EQ(point.lastControl(), (uint8_t)*controls[1]);
|
||||
ASSERT_EQ(point.lastControlOffset(), 0);
|
||||
ASSERT_EQ(std::distance(point.remaining().begin(), point.remaining().end()), 0);
|
||||
|
||||
// Same but the last value before rescale is skip
|
||||
controls = {control({blockFullOne,
|
||||
blockFullOne,
|
||||
blockFullOne,
|
||||
blockFullOne,
|
||||
blockFullOne,
|
||||
blockFullOne,
|
||||
blockFullOne,
|
||||
blockFullOne,
|
||||
blockFullOne,
|
||||
blockFullOne,
|
||||
blockFullOne,
|
||||
blockFullOne,
|
||||
blockFullOne,
|
||||
blockFullOne,
|
||||
blockFullOne,
|
||||
block3One1Skip},
|
||||
0),
|
||||
control({block5Two}, Simple8bTypeUtil::kMemoryAsInteger)};
|
||||
point = overflowHelper(1.0, controls);
|
||||
ASSERT_EQ(point.control(), controls[1]);
|
||||
ASSERT_FALSE(point.overflow());
|
||||
ASSERT_EQ(point.index(), kInvalidIndex);
|
||||
ASSERT_EQ(point.last(), V{boost::none});
|
||||
ASSERT_FALSE(point.allValuesIdentical());
|
||||
ASSERT_EQ(point.lastControl(), (uint8_t)*controls[1]);
|
||||
ASSERT_EQ(point.lastControlOffset(), 0);
|
||||
ASSERT_EQ(std::distance(point.remaining().begin(), point.remaining().end()), 0);
|
||||
|
||||
// Same but there are RLE before the rescale
|
||||
controls = {control({blockFullOne,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE},
|
||||
0),
|
||||
control({block5Two}, Simple8bTypeUtil::kMemoryAsInteger)};
|
||||
point = overflowHelper(1.0, controls);
|
||||
ASSERT_EQ(point.control(), controls[1]);
|
||||
ASSERT_FALSE(point.overflow());
|
||||
ASSERT_EQ(point.index(), kInvalidIndex);
|
||||
ASSERT_EQ(point.last(), V{1});
|
||||
ASSERT_FALSE(point.allValuesIdentical());
|
||||
ASSERT_EQ(point.lastControl(), (uint8_t)*controls[1]);
|
||||
ASSERT_EQ(point.lastControlOffset(), 0);
|
||||
ASSERT_EQ(std::distance(point.remaining().begin(), point.remaining().end()), 0);
|
||||
|
||||
// Same but there are only RLE before the rescale
|
||||
controls = {control({block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE},
|
||||
0),
|
||||
control({block5Two}, Simple8bTypeUtil::kMemoryAsInteger)};
|
||||
point = overflowHelper(1.0, controls);
|
||||
ASSERT_EQ(point.control(), controls[1]);
|
||||
ASSERT_FALSE(point.overflow());
|
||||
ASSERT_EQ(point.index(), kInvalidIndex);
|
||||
ASSERT_EQ(point.last(), V{0});
|
||||
ASSERT_FALSE(point.allValuesIdentical());
|
||||
ASSERT_EQ(point.lastControl(), (uint8_t)*controls[1]);
|
||||
ASSERT_EQ(point.lastControlOffset(), 0);
|
||||
ASSERT_EQ(std::distance(point.remaining().begin(), point.remaining().end()), 0);
|
||||
|
||||
// RLE can be before and after the rescale
|
||||
controls = {control({blockFullOne,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE},
|
||||
0),
|
||||
control({block16RLE}, Simple8bTypeUtil::kMemoryAsInteger)};
|
||||
point = overflowHelper(1.0, controls);
|
||||
ASSERT_EQ(point.control(), controls[1]);
|
||||
ASSERT_FALSE(point.overflow());
|
||||
ASSERT_EQ(point.index(), kInvalidIndex);
|
||||
ASSERT_EQ(point.last(), V{1});
|
||||
ASSERT_FALSE(point.allValuesIdentical());
|
||||
ASSERT_EQ(point.lastControl(), (uint8_t)*controls[1]);
|
||||
ASSERT_EQ(point.lastControlOffset(), 0);
|
||||
ASSERT_EQ(std::distance(point.remaining().begin(), point.remaining().end()), 0);
|
||||
|
||||
// Block before rescale is not full but it is not possible to scale the first value with scale
|
||||
// factor kMemoryAsInteger with scale factor 0 so we also treat this as a no overflow but return
|
||||
// the first control after the rescale.
|
||||
controls = {control({blockFullOne, blockFullOne, blockFullOne}, 0),
|
||||
control({block5Two}, Simple8bTypeUtil::kMemoryAsInteger)};
|
||||
point = overflowHelper(1.0, controls);
|
||||
ASSERT_EQ(point.control(), controls[1]);
|
||||
ASSERT_FALSE(point.overflow());
|
||||
ASSERT_EQ(point.index(), kInvalidIndex);
|
||||
ASSERT_EQ(point.last(), V{1});
|
||||
ASSERT_FALSE(point.allValuesIdentical());
|
||||
ASSERT_EQ(point.lastControl(), (uint8_t)*controls[1]);
|
||||
ASSERT_EQ(point.lastControlOffset(), 0);
|
||||
ASSERT_EQ(std::distance(point.remaining().begin(), point.remaining().end()), 0);
|
||||
|
||||
// Last value before rescale can be scaled with the next scale factor and all values fit in
|
||||
// pending without causing overflow. We then report the first control with a binary offset to
|
||||
// the control byte after the scaling.
|
||||
controls = {control({blockFullOne, blockFullOne, blockFullOne}, 1), control({block5Two}, 0)};
|
||||
point = overflowHelper(1.0, controls);
|
||||
ASSERT_EQ(point.control(), controls[0]);
|
||||
ASSERT_TRUE(point.overflow());
|
||||
ASSERT_EQ(point.index(), 2);
|
||||
ASSERT_EQ(point.last(), V{1});
|
||||
ASSERT_FALSE(point.allValuesIdentical());
|
||||
ASSERT_EQ(point.lastControl(), (uint8_t)*controls[1]);
|
||||
ASSERT_EQ(point.lastControlOffset(),
|
||||
numSimple8bBlocksForControlByte(*controls[0]) * sizeof(uint64_t) + 1);
|
||||
ASSERT_EQ(std::distance(point.remaining().begin(), point.remaining().end()), 1);
|
||||
|
||||
// Like above but we have a large amount of RLE after the rescale. The result is basically the
|
||||
// same, but we report a larger offset and more values remaining.
|
||||
controls = {control({blockFullOne, blockFullOne, blockFullOne}, 1),
|
||||
control({block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE,
|
||||
block16RLE},
|
||||
0),
|
||||
control({block16RLE, block16RLE, blockFullOne}, 0)};
|
||||
point = overflowHelper(1.0, controls);
|
||||
ASSERT_EQ(point.control(), controls[0]);
|
||||
ASSERT_TRUE(point.overflow());
|
||||
ASSERT_EQ(point.index(), 2);
|
||||
ASSERT_EQ(point.last(), V{1});
|
||||
ASSERT_FALSE(point.allValuesIdentical()); // Values are not identical even if we have a large
|
||||
// amont of RLE because the scaling is different
|
||||
ASSERT_EQ(point.lastControl(), (uint8_t)*controls[2]);
|
||||
ASSERT_EQ(point.lastControlOffset(),
|
||||
(numSimple8bBlocksForControlByte(*controls[0]) + kMaxNumSimple8bPerControl) *
|
||||
sizeof(uint64_t) +
|
||||
2); // binary offset to the third control byte
|
||||
ASSERT_EQ(std::distance(point.remaining().begin(), point.remaining().end()), 2);
|
||||
|
||||
// Same but with RLE on both sides of the scaling
|
||||
controls = {control({blockFullOne, blockFullOne, block16RLE}, 1), control({block1RLE}, 0)};
|
||||
point = overflowHelper(1.0, controls);
|
||||
ASSERT_EQ(point.control(), controls[0]);
|
||||
ASSERT_TRUE(point.overflow());
|
||||
ASSERT_EQ(point.index(), 2);
|
||||
ASSERT_EQ(point.last(), V{1});
|
||||
ASSERT_FALSE(point.allValuesIdentical()); // Values are not identical even if we have a large
|
||||
// amont of RLE because the scaling is different
|
||||
ASSERT_EQ(point.lastControl(), (uint8_t)*controls[1]);
|
||||
ASSERT_EQ(point.lastControlOffset(),
|
||||
numSimple8bBlocksForControlByte(*controls[0]) * sizeof(uint64_t) +
|
||||
1); // binary offset to the second control byte
|
||||
ASSERT_EQ(std::distance(point.remaining().begin(), point.remaining().end()), 1);
|
||||
|
||||
// Last value before rescale can be scaled with the next scale factor but all values cannot fit
|
||||
// in pending without causing overflow. This case is also treated as no overflow
|
||||
controls = {control({blockFullOne, blockFullOne, block5Two}, 1),
|
||||
control({blockFullOne, block1One}, 0)};
|
||||
point = overflowHelper(1.0, controls);
|
||||
ASSERT_EQ(point.control(), controls[1]);
|
||||
ASSERT_FALSE(point.overflow());
|
||||
ASSERT_EQ(point.index(), kInvalidIndex);
|
||||
ASSERT_EQ(point.last(), V{2});
|
||||
ASSERT_FALSE(point.allValuesIdentical());
|
||||
ASSERT_EQ(point.lastControl(), (uint8_t)*controls[1]);
|
||||
ASSERT_EQ(point.lastControlOffset(), 0);
|
||||
ASSERT_EQ(std::distance(point.remaining().begin(), point.remaining().end()), 0);
|
||||
}
|
||||
} // namespace mongo::bsoncolumn::internal
|
||||
@ -102,13 +102,10 @@ extern "C" int LLVMFuzzerTestOneInput(const char* Data, size_t Size) {
|
||||
<< base64::encode(diff.data(), diff.size()));
|
||||
|
||||
// Verify binary reopen gives identical state as intermediate
|
||||
// TODO SERVER-100659: Uncomment this after reopen bug is fixed
|
||||
/*
|
||||
BSONColumnBuilder reopen(diff.data(), diff.size());
|
||||
invariant(builder.isInternalStateIdentical(reopen),
|
||||
str::stream() << "Binary reopen does not yield equivalent state. Column: "
|
||||
<< base64::encode(diff.data(), diff.size()));
|
||||
*/
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -30,42 +30,10 @@
|
||||
#include "mongo/bson/bson_validate.h"
|
||||
#include "mongo/bson/bsonelement.h"
|
||||
#include "mongo/bson/column/bsoncolumn.h"
|
||||
#include "mongo/bson/column/bsoncolumnbuilder.h"
|
||||
#include "mongo/bson/util/bsonobj_traversal.h"
|
||||
#include "mongo/util/base64.h"
|
||||
|
||||
// Returns true if the binary contains interleaved data. This function just scans the binary for an
|
||||
// interleaved start control byte, it does no validation nor decompression.
|
||||
static bool isDataInterleaved(const char* binary, size_t size) {
|
||||
using namespace mongo;
|
||||
const char* pos = binary;
|
||||
const char* end = binary + size;
|
||||
|
||||
while (pos != end) {
|
||||
uint8_t control = *pos;
|
||||
if (control == stdx::to_underlying(BSONType::eoo)) {
|
||||
// Reached the end of the binary.
|
||||
return false;
|
||||
}
|
||||
|
||||
if (bsoncolumn::isInterleavedStartControlByte(control)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (bsoncolumn::isUncompressedLiteralControlByte(control)) {
|
||||
// Scan over the entire literal.
|
||||
BSONElement literal(pos, 1, BSONElement::TrustedInitTag{});
|
||||
pos += literal.size();
|
||||
continue;
|
||||
}
|
||||
|
||||
// If there are no control bytes, scan over the simple8b block.
|
||||
uint8_t size = bsoncolumn::numSimple8bBlocksForControlByte(control) * sizeof(uint64_t);
|
||||
pos += size + 1;
|
||||
}
|
||||
|
||||
return false;
|
||||
};
|
||||
|
||||
// There are two decoding APIs. For all data that pass validation, both decoder implementations
|
||||
// must produce the same results.
|
||||
extern "C" int LLVMFuzzerTestOneInput(const char* Data, size_t Size) {
|
||||
@ -84,6 +52,7 @@ extern "C" int LLVMFuzzerTestOneInput(const char* Data, size_t Size) {
|
||||
std::vector<BSONElement> blockBasedElems = {};
|
||||
std::string blockBasedError;
|
||||
std::string iteratorError;
|
||||
std::string reopenError;
|
||||
|
||||
// Attempt to decompress using the block-based API.
|
||||
try {
|
||||
@ -102,14 +71,23 @@ extern "C" int LLVMFuzzerTestOneInput(const char* Data, size_t Size) {
|
||||
iteratorError = e.toString();
|
||||
}
|
||||
|
||||
// If one API failed, then both APIs must fail.
|
||||
if (!iteratorError.empty() || !blockBasedError.empty()) {
|
||||
invariant(!(iteratorError.empty() || blockBasedError.empty()),
|
||||
// Attempt to reopen using the reopen API.
|
||||
try {
|
||||
BSONColumnBuilder(Data, Size);
|
||||
} catch (const DBException& e) {
|
||||
reopenError = e.toString();
|
||||
}
|
||||
|
||||
// If one API failed, then all APIs must fail.
|
||||
if (!iteratorError.empty() || !blockBasedError.empty() || !reopenError.empty()) {
|
||||
invariant(!(iteratorError.empty() || blockBasedError.empty() || reopenError.empty()),
|
||||
str::stream() << "For the input: " << base64::encode(StringData(Data, Size))
|
||||
<< ". Iterator API returned "
|
||||
<< (iteratorError.empty() ? "results" : iteratorError)
|
||||
<< ". The block based API returned "
|
||||
<< (blockBasedError.empty() ? "results" : blockBasedError));
|
||||
<< (blockBasedError.empty() ? "results" : blockBasedError)
|
||||
<< ". The reopen API returned "
|
||||
<< (reopenError.empty() ? "results" : reopenError));
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@ -1331,7 +1331,7 @@ TEST_F(BSONColumnTest, BuilderFuzzerReopenDiscoveredEdgeCases) {
|
||||
//
|
||||
std::vector<StringData> binariesBase64 = {
|
||||
// Pending fix of SERVER-100659
|
||||
// "gPz/////////CAAAgP7/////////AQAAAAAAAAAAYI/OxcXFxcXFAQ4AAAAAAAAB7uLi4uLi4gAuHR0dHR2dAI5xcXFxcXEAjnFxcXFxcQCOcXFxcXFxAK6rq6urq2sAzri4uLi4OADOuLi4uLg4AM64uLi4uDgAzri4uLi4OADOuLi4uLg4AM64uLi4uDgAzri4uLi4OADOuLi4uLg4AI9ulpaWlpY2AG5cXFxcXBwAblxcXFxcHABuXFxcXFwcAG5cXFxcXBwAblxcXFxcHABuXFxcXFwcAG5cXFxcXBwAblxcXFxcHABuXFxcXFwcAG5cXFxcXBwAblxcXFxcHABuXFxcXFwcAG5cXFxcXBwAblxcXFxcHABuXFxcXFwcAI9uXFxcXFwcAG5cXFxcXBwA7gsMDAwMHAAuLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAI8uLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAI8uLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAK6wr6+vrwcALhcXFxcXBwAuFxcXFxcHAC4XFxcXFwcALhcXFxcXBwAuFxcXFxcHAC4XFxcXFwcALhcXFxcXBwAuFxcXFxcHAI8uFxcXFxcHAC4XFxcXFwcALhcXFxcXBwAuFxcXFxcHAC4XFxcXFwcALhcXFxcXBwAuFxcXFxcHAC4XFxcXFwcALhcXFxcXBwAuFxcXFxcHAC4XFxcXFwcALhcXFxcXBwAuFxcXFxcHAC4XFxcXFwcALhcXFxcXBwAuFxcXFxcHAIYuFxcXFxcHAC4XFxcXFwcALhcXFxcXBwAuFxcXFxcHAC4XFxcXFwcALhcXFxcXBwAuFxcXFxcHAAA="_sd,
|
||||
"gPz/////////CAAAgP7/////////AQAAAAAAAAAAYI/OxcXFxcXFAQ4AAAAAAAAB7uLi4uLi4gAuHR0dHR2dAI5xcXFxcXEAjnFxcXFxcQCOcXFxcXFxAK6rq6urq2sAzri4uLi4OADOuLi4uLg4AM64uLi4uDgAzri4uLi4OADOuLi4uLg4AM64uLi4uDgAzri4uLi4OADOuLi4uLg4AI9ulpaWlpY2AG5cXFxcXBwAblxcXFxcHABuXFxcXFwcAG5cXFxcXBwAblxcXFxcHABuXFxcXFwcAG5cXFxcXBwAblxcXFxcHABuXFxcXFwcAG5cXFxcXBwAblxcXFxcHABuXFxcXFwcAG5cXFxcXBwAblxcXFxcHABuXFxcXFwcAI9uXFxcXFwcAG5cXFxcXBwA7gsMDAwMHAAuLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAI8uLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAI8uLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAK6wr6+vrwcALhcXFxcXBwAuFxcXFxcHAC4XFxcXFwcALhcXFxcXBwAuFxcXFxcHAC4XFxcXFwcALhcXFxcXBwAuFxcXFxcHAI8uFxcXFxcHAC4XFxcXFwcALhcXFxcXBwAuFxcXFxcHAC4XFxcXFwcALhcXFxcXBwAuFxcXFxcHAC4XFxcXFwcALhcXFxcXBwAuFxcXFxcHAC4XFxcXFwcALhcXFxcXBwAuFxcXFxcHAC4XFxcXFwcALhcXFxcXBwAuFxcXFxcHAIYuFxcXFxcHAC4XFxcXFwcALhcXFxcXBwAuFxcXFxcHAC4XFxcXFwcALhcXFxcXBwAuFxcXFxcHAAA="_sd,
|
||||
};
|
||||
|
||||
for (auto&& binaryBase64 : binariesBase64) {
|
||||
|
||||
@ -40,6 +40,9 @@ static constexpr char kInterleavedStartControlByte = (char)0xF1;
|
||||
static constexpr char kInterleavedStartArrayRootControlByte = (char)0xF2;
|
||||
static constexpr uint8_t kInvalidScaleIndex = 0xFF;
|
||||
static constexpr uint8_t kInvalidControlByte = 0xFE;
|
||||
static constexpr uint8_t kMaxNumSimple8bPerControl = 16;
|
||||
static constexpr std::array<uint8_t, Simple8bTypeUtil::kMemoryAsInteger + 1>
|
||||
kControlByteForScaleIndex = {0x90, 0xA0, 0xB0, 0xC0, 0xD0, 0x80};
|
||||
|
||||
inline bool isUncompressedLiteralControlByte(uint8_t control) {
|
||||
return (control & 0xE0) == 0 || control == (uint8_t)stdx::to_underlying(BSONType::minKey) ||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -269,9 +269,6 @@ struct EncodingState {
|
||||
F controlBlockWriter,
|
||||
const Allocator&);
|
||||
void _initializeFromPrevious(const Allocator&);
|
||||
template <class F>
|
||||
ptrdiff_t _incrementSimple8bCount(allocator_aware::BufBuilder<Allocator>& buffer,
|
||||
F controlBlockWriter);
|
||||
|
||||
// Encoders for 64bit and 128bit types.
|
||||
std::variant<Encoder64, Encoder128> _encoder;
|
||||
@ -521,7 +518,7 @@ private:
|
||||
int lastBufLength = 0;
|
||||
// Finalized state of last control byte written out by the previous intermediate() call.
|
||||
uint8_t lastControl;
|
||||
uint8_t lastControlOffset = 0;
|
||||
uint16_t lastControlOffset = 0;
|
||||
};
|
||||
|
||||
// Internal helper to perform reopen/initialization of this class from a BSONColumn binary.
|
||||
|
||||
@ -352,6 +352,9 @@ static constexpr uint64_t kSingleSkip = 0xFFFFFFFFFFFFFFFE;
|
||||
// Constant for a simple8b block containing a single zero value.
|
||||
static constexpr uint64_t kSingleZero = 0xE;
|
||||
|
||||
// Constant for an invalid simple8b block, trying to read this will throw.
|
||||
static constexpr uint64_t kInvalidSimple8b = 0;
|
||||
|
||||
/**
|
||||
* Visits all values in sequence with provided callbacks
|
||||
* visit - a callback for receiving all non-missing values (including 0)
|
||||
@ -382,6 +385,13 @@ MONGO_COMPILER_ALWAYS_INLINE_GCC14 inline size_t visitAll(const char* buffer,
|
||||
*/
|
||||
inline size_t count(const char* buffer, size_t size);
|
||||
|
||||
/**
|
||||
* Returns the last value (can be missing) over multiple Simple8b blocks. If called with unsigned T
|
||||
* it returns the encoded value in this slot. If called with signed T it returns the decoded value.
|
||||
*/
|
||||
template <typename T>
|
||||
boost::optional<T> last(const char* buffer, size_t size, uint64_t& prevNonRLE);
|
||||
|
||||
/**
|
||||
* Calculates the sum for multiple simple8b blocks in a buffer. 'prevNonRLE' should be initialized
|
||||
* to 'kSingleSkip' when calculating sum for the first buffer. If the caller needs sum from multiple
|
||||
|
||||
@ -103,12 +103,20 @@ struct SimpleDecoder {
|
||||
}
|
||||
|
||||
// Returns value of last slot. 'kMissing' is returned for missing.
|
||||
static int64_t last(uint64_t encoded) {
|
||||
static int64_t lastDecoded(uint64_t encoded) {
|
||||
encoded >>= (bits * (iters - 1));
|
||||
if (encoded == mask)
|
||||
return kMissing;
|
||||
return Simple8bTypeUtil::decodeInt64(encoded);
|
||||
}
|
||||
|
||||
// Returns value of last slot. 'kMissing' is returned for missing.
|
||||
static uint64_t lastEncoded(uint64_t encoded) {
|
||||
encoded >>= (bits * (iters - 1));
|
||||
if (encoded == mask)
|
||||
return kMissing;
|
||||
return encoded;
|
||||
}
|
||||
};
|
||||
|
||||
// Table-based decoder that uses a lookup table for decoding unsigned integers into signed. Suitable
|
||||
@ -212,7 +220,7 @@ struct TableDecoder {
|
||||
}
|
||||
|
||||
// Returns value of last slot. 'kMissing' is returned for missing.
|
||||
int64_t last(uint64_t encoded) const {
|
||||
int64_t lastDecoded(uint64_t encoded) const {
|
||||
encoded >>= (bits * (iters - 1));
|
||||
const auto& entry = table[encoded];
|
||||
if (!entry.num) {
|
||||
@ -220,6 +228,15 @@ struct TableDecoder {
|
||||
}
|
||||
return entry.decoded;
|
||||
}
|
||||
|
||||
uint64_t lastEncoded(uint64_t encoded) const {
|
||||
encoded >>= (bits * (iters - 1));
|
||||
const auto& entry = table[encoded];
|
||||
if (!entry.num) {
|
||||
return kMissing;
|
||||
}
|
||||
return encoded;
|
||||
}
|
||||
};
|
||||
|
||||
// Table-based decoder that uses a lookup table for decoding multiple unsigned integers into signed
|
||||
@ -510,7 +527,7 @@ struct ExtendedDecoder {
|
||||
|
||||
// Returns value of last slot. 'kMissing' is returned for missing.
|
||||
template <typename T>
|
||||
T last(uint64_t encoded) const {
|
||||
T lastDecoded(uint64_t encoded) const {
|
||||
encoded >>= (bits * (iters - 1));
|
||||
if ((encoded & mask) == mask)
|
||||
return kMissing;
|
||||
@ -525,6 +542,24 @@ struct ExtendedDecoder {
|
||||
|
||||
return Simple8bTypeUtil::decodeInt(value << numZeroes);
|
||||
}
|
||||
|
||||
// Returns value of last slot. 'kMissing' is returned for missing.
|
||||
template <typename T>
|
||||
T lastEncoded(uint64_t encoded) const {
|
||||
encoded >>= (bits * (iters - 1));
|
||||
if ((encoded & mask) == mask)
|
||||
return kMissing;
|
||||
|
||||
uint64_t count = encoded & countMask;
|
||||
T value = (encoded >> countBits) & valueMask;
|
||||
auto numZeroes = count * countScale;
|
||||
// UBSAN will complain if shift values are greater than bit length
|
||||
if constexpr (std::is_same<T, uint64_t>::value) {
|
||||
numZeroes %= 64;
|
||||
}
|
||||
|
||||
return value << numZeroes;
|
||||
}
|
||||
};
|
||||
|
||||
// Storage for all decoders that we need for our various selector types
|
||||
@ -672,55 +707,57 @@ T decodeLastSlotIgnoreSkip(uint64_t encoded) {
|
||||
case 15:
|
||||
break;
|
||||
default:
|
||||
uasserted(10065906, "Bad selector");
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T decodeLastSlot(uint64_t encoded) {
|
||||
T lastDecoded(uint64_t encoded) {
|
||||
auto selector = encoded & simple8b_internal::kBaseSelectorMask;
|
||||
encoded >>= 4;
|
||||
switch (selector) {
|
||||
case 1:
|
||||
// Encoded and decoded value is the same for the 1 bit case
|
||||
return decoder1.last(encoded);
|
||||
case 2:
|
||||
return decoder2.last(encoded);
|
||||
return decoder2.lastDecoded(encoded);
|
||||
case 3:
|
||||
return decoder3.last(encoded);
|
||||
return decoder3.lastDecoded(encoded);
|
||||
case 4:
|
||||
return decoder4.last(encoded);
|
||||
return decoder4.lastDecoded(encoded);
|
||||
case 5:
|
||||
return decoder5.last(encoded);
|
||||
return decoder5.lastDecoded(encoded);
|
||||
case 6:
|
||||
return decoder6.last(encoded);
|
||||
return decoder6.lastDecoded(encoded);
|
||||
case 7: {
|
||||
|
||||
auto extended = encoded & simple8b_internal::kBaseSelectorMask;
|
||||
encoded >>= 4;
|
||||
switch (extended) {
|
||||
case 0:
|
||||
return decoder7.last(encoded);
|
||||
return decoder7.lastDecoded(encoded);
|
||||
case 1:
|
||||
return decoderExtended7_1.last<T>(encoded);
|
||||
return decoderExtended7_1.lastDecoded<T>(encoded);
|
||||
case 2:
|
||||
return decoderExtended7_2.last<T>(encoded);
|
||||
return decoderExtended7_2.lastDecoded<T>(encoded);
|
||||
case 3:
|
||||
return decoderExtended7_3.last<T>(encoded);
|
||||
return decoderExtended7_3.lastDecoded<T>(encoded);
|
||||
case 4:
|
||||
return decoderExtended7_4.last<T>(encoded);
|
||||
return decoderExtended7_4.lastDecoded<T>(encoded);
|
||||
case 5:
|
||||
return decoderExtended7_5.last<T>(encoded);
|
||||
return decoderExtended7_5.lastDecoded<T>(encoded);
|
||||
case 6:
|
||||
return decoderExtended7_6.last<T>(encoded);
|
||||
return decoderExtended7_6.lastDecoded<T>(encoded);
|
||||
case 7:
|
||||
return decoderExtended7_7.last<T>(encoded);
|
||||
return decoderExtended7_7.lastDecoded<T>(encoded);
|
||||
case 8:
|
||||
return decoderExtended7_8.last<T>(encoded);
|
||||
return decoderExtended7_8.lastDecoded<T>(encoded);
|
||||
case 9:
|
||||
return decoderExtended7_9.last<T>(encoded);
|
||||
return decoderExtended7_9.lastDecoded<T>(encoded);
|
||||
default:
|
||||
invariant(false); // invalid encoding
|
||||
uasserted(10065900, "Bad extended selector");
|
||||
break;
|
||||
}
|
||||
break;
|
||||
@ -730,54 +767,163 @@ T decodeLastSlot(uint64_t encoded) {
|
||||
encoded >>= 4;
|
||||
switch (extended) {
|
||||
case 0:
|
||||
return decoder8.last(encoded);
|
||||
return decoder8.lastDecoded(encoded);
|
||||
case 1:
|
||||
return decoderExtended8_1.last<T>(encoded);
|
||||
return decoderExtended8_1.lastDecoded<T>(encoded);
|
||||
case 2:
|
||||
return decoderExtended8_2.last<T>(encoded);
|
||||
return decoderExtended8_2.lastDecoded<T>(encoded);
|
||||
case 3:
|
||||
return decoderExtended8_3.last<T>(encoded);
|
||||
return decoderExtended8_3.lastDecoded<T>(encoded);
|
||||
case 4:
|
||||
return decoderExtended8_4.last<T>(encoded);
|
||||
return decoderExtended8_4.lastDecoded<T>(encoded);
|
||||
case 5:
|
||||
return decoderExtended8_5.last<T>(encoded);
|
||||
return decoderExtended8_5.lastDecoded<T>(encoded);
|
||||
case 6:
|
||||
return decoderExtended8_6.last<T>(encoded);
|
||||
return decoderExtended8_6.lastDecoded<T>(encoded);
|
||||
case 7:
|
||||
return decoderExtended8_7.last<T>(encoded);
|
||||
return decoderExtended8_7.lastDecoded<T>(encoded);
|
||||
case 8:
|
||||
return decoderExtended8_8.last<T>(encoded);
|
||||
return decoderExtended8_8.lastDecoded<T>(encoded);
|
||||
case 9:
|
||||
return decoderExtended8_9.last<T>(encoded);
|
||||
return decoderExtended8_9.lastDecoded<T>(encoded);
|
||||
case 10:
|
||||
return decoderExtended8_10.last<T>(encoded);
|
||||
return decoderExtended8_10.lastDecoded<T>(encoded);
|
||||
case 11:
|
||||
return decoderExtended8_11.last<T>(encoded);
|
||||
return decoderExtended8_11.lastDecoded<T>(encoded);
|
||||
case 12:
|
||||
return decoderExtended8_12.last<T>(encoded);
|
||||
return decoderExtended8_12.lastDecoded<T>(encoded);
|
||||
case 13:
|
||||
return decoderExtended8_13.last<T>(encoded);
|
||||
return decoderExtended8_13.lastDecoded<T>(encoded);
|
||||
default:
|
||||
invariant(false); // invalid encoding
|
||||
uasserted(10065901, "Bad extended selector");
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 9:
|
||||
return decoder10.last(encoded);
|
||||
return decoder10.lastDecoded(encoded);
|
||||
case 10:
|
||||
return decoder12.last(encoded);
|
||||
return decoder12.lastDecoded(encoded);
|
||||
case 11:
|
||||
return decoder15.last(encoded);
|
||||
return decoder15.lastDecoded(encoded);
|
||||
case 12:
|
||||
return decoder20.last(encoded);
|
||||
return decoder20.lastDecoded(encoded);
|
||||
case 13:
|
||||
return decoder30.last(encoded);
|
||||
return decoder30.lastDecoded(encoded);
|
||||
case 14:
|
||||
return decoder60.last(encoded);
|
||||
return decoder60.lastDecoded(encoded);
|
||||
case 15:
|
||||
break;
|
||||
default:
|
||||
uasserted(10065905, "Bad selector");
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T lastEncoded(uint64_t encoded) {
|
||||
auto selector = encoded & simple8b_internal::kBaseSelectorMask;
|
||||
encoded >>= 4;
|
||||
switch (selector) {
|
||||
case 1:
|
||||
// Encoded and decoded value is the same for the 1 bit case
|
||||
return decoder1.last(encoded);
|
||||
case 2:
|
||||
return decoder2.lastEncoded(encoded);
|
||||
case 3:
|
||||
return decoder3.lastEncoded(encoded);
|
||||
case 4:
|
||||
return decoder4.lastEncoded(encoded);
|
||||
case 5:
|
||||
return decoder5.lastEncoded(encoded);
|
||||
case 6:
|
||||
return decoder6.lastEncoded(encoded);
|
||||
case 7: {
|
||||
|
||||
auto extended = encoded & simple8b_internal::kBaseSelectorMask;
|
||||
encoded >>= 4;
|
||||
switch (extended) {
|
||||
case 0:
|
||||
return decoder7.lastEncoded(encoded);
|
||||
case 1:
|
||||
return decoderExtended7_1.lastEncoded<T>(encoded);
|
||||
case 2:
|
||||
return decoderExtended7_2.lastEncoded<T>(encoded);
|
||||
case 3:
|
||||
return decoderExtended7_3.lastEncoded<T>(encoded);
|
||||
case 4:
|
||||
return decoderExtended7_4.lastEncoded<T>(encoded);
|
||||
case 5:
|
||||
return decoderExtended7_5.lastEncoded<T>(encoded);
|
||||
case 6:
|
||||
return decoderExtended7_6.lastEncoded<T>(encoded);
|
||||
case 7:
|
||||
return decoderExtended7_7.lastEncoded<T>(encoded);
|
||||
case 8:
|
||||
return decoderExtended7_8.lastEncoded<T>(encoded);
|
||||
case 9:
|
||||
return decoderExtended7_9.lastEncoded<T>(encoded);
|
||||
default:
|
||||
uasserted(10065902, "Bad extended selector");
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
auto extended = encoded & simple8b_internal::kBaseSelectorMask;
|
||||
encoded >>= 4;
|
||||
switch (extended) {
|
||||
case 0:
|
||||
return decoder8.lastEncoded(encoded);
|
||||
case 1:
|
||||
return decoderExtended8_1.lastEncoded<T>(encoded);
|
||||
case 2:
|
||||
return decoderExtended8_2.lastEncoded<T>(encoded);
|
||||
case 3:
|
||||
return decoderExtended8_3.lastEncoded<T>(encoded);
|
||||
case 4:
|
||||
return decoderExtended8_4.lastEncoded<T>(encoded);
|
||||
case 5:
|
||||
return decoderExtended8_5.lastEncoded<T>(encoded);
|
||||
case 6:
|
||||
return decoderExtended8_6.lastEncoded<T>(encoded);
|
||||
case 7:
|
||||
return decoderExtended8_7.lastEncoded<T>(encoded);
|
||||
case 8:
|
||||
return decoderExtended8_8.lastEncoded<T>(encoded);
|
||||
case 9:
|
||||
return decoderExtended8_9.lastEncoded<T>(encoded);
|
||||
case 10:
|
||||
return decoderExtended8_10.lastEncoded<T>(encoded);
|
||||
case 11:
|
||||
return decoderExtended8_11.lastEncoded<T>(encoded);
|
||||
case 12:
|
||||
return decoderExtended8_12.lastEncoded<T>(encoded);
|
||||
case 13:
|
||||
return decoderExtended8_13.lastEncoded<T>(encoded);
|
||||
default:
|
||||
uasserted(10065903, "Bad extended selector");
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 9:
|
||||
return decoder10.lastEncoded(encoded);
|
||||
case 10:
|
||||
return decoder12.lastEncoded(encoded);
|
||||
case 11:
|
||||
return decoder15.lastEncoded(encoded);
|
||||
case 12:
|
||||
return decoder20.lastEncoded(encoded);
|
||||
case 13:
|
||||
return decoder30.lastEncoded(encoded);
|
||||
case 14:
|
||||
return decoder60.lastEncoded(encoded);
|
||||
case 15:
|
||||
break;
|
||||
default:
|
||||
uasserted(10065904, "Bad selector");
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
@ -925,7 +1071,7 @@ MONGO_COMPILER_ALWAYS_INLINE_GCC14 inline size_t decodeAndVisit(uint64_t encoded
|
||||
return decoder60.visitAll<T>(encoded, visit, visitZero, visitMissing);
|
||||
break;
|
||||
case simple8b_internal::kRleSelector: {
|
||||
const T lastValue = decodeLastSlot<T>(*prevNonRLE);
|
||||
const T lastValue = lastDecoded<T>(*prevNonRLE);
|
||||
size_t count = ((encoded & 0xf) + 1) * simple8b_internal::kRleMultiplier;
|
||||
if (lastValue == kMissing) {
|
||||
for (size_t i = 0; i < count; ++i) {
|
||||
@ -1168,7 +1314,7 @@ T decodeAndPrefixSum(uint64_t encoded, T& prefix, uint64_t* prevNonRLE) {
|
||||
case 14:
|
||||
return decoder60.prefixSum<T>(encoded, prefix);
|
||||
case simple8b_internal::kRleSelector: {
|
||||
T last = decodeLastSlot<T>(*prevNonRLE);
|
||||
T last = lastDecoded<T>(*prevNonRLE);
|
||||
if (last == kMissing)
|
||||
return 0;
|
||||
|
||||
@ -1229,6 +1375,29 @@ inline size_t count(const char* buffer, size_t size) {
|
||||
return numElements;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
boost::optional<T> last(const char* buffer, size_t size, uint64_t& prevNonRLE) {
|
||||
invariant(size % 8 == 0);
|
||||
const char* end = buffer + size;
|
||||
while (buffer != end) {
|
||||
uint64_t encoded = ConstDataView(buffer).read<LittleEndian<uint64_t>>();
|
||||
auto selector = encoded & simple8b_internal::kBaseSelectorMask;
|
||||
if (selector != simple8b_internal::kRleSelector) {
|
||||
prevNonRLE = encoded;
|
||||
}
|
||||
|
||||
buffer += sizeof(uint64_t);
|
||||
}
|
||||
|
||||
if constexpr (std::is_same_v<T, uint64_t> || std::is_same_v<T, uint128_t>) {
|
||||
T encoded = lastEncoded<T>(prevNonRLE);
|
||||
return encoded == kMissing ? boost::optional<T>{} : boost::optional<T>{encoded};
|
||||
} else {
|
||||
T decoded = lastDecoded<T>(prevNonRLE);
|
||||
return decoded == kMissing ? boost::optional<T>{} : boost::optional<T>{decoded};
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T sum(const char* buffer, size_t size, uint64_t& prevNonRLE) {
|
||||
invariant(size % 8 == 0);
|
||||
|
||||
@ -71,6 +71,7 @@ public:
|
||||
// Callback to handle writing of finalized Simple-8b blocks. Machine Endian byte order, the
|
||||
// value need to be converted to Little Endian before persisting.
|
||||
explicit Simple8bBuilder(Allocator = {});
|
||||
Simple8bBuilder(boost::optional<T> val, int64_t num, Allocator = {});
|
||||
~Simple8bBuilder();
|
||||
|
||||
Simple8bBuilder(const Simple8bBuilder&) = default;
|
||||
@ -79,6 +80,20 @@ public:
|
||||
Simple8bBuilder& operator=(const Simple8bBuilder&) = default;
|
||||
Simple8bBuilder& operator=(Simple8bBuilder&&) = default;
|
||||
|
||||
/**
|
||||
* Returns the allocator used by this Simple8bBuilder.
|
||||
*/
|
||||
Allocator allocator() const {
|
||||
// There is a bug in the version of MSVC we are using that fails to perform this cast when
|
||||
// the allocator is std::allocator<void>. It is a stateless allocator so we just return a
|
||||
// new instance as a workaround.
|
||||
if constexpr (std::is_same_v<Allocator, std::allocator<void>>) {
|
||||
return Allocator{};
|
||||
} else {
|
||||
return _pendingValues.get_allocator();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Appends a multiple missing value to Simple8b. Should be called before any other values are
|
||||
* appended. This is intended to be used to initialize a new builder with a large series of
|
||||
@ -175,17 +190,6 @@ public:
|
||||
*/
|
||||
bool rlePossible() const;
|
||||
|
||||
/**
|
||||
* Forcibly set last value so future append/skip calls may use this to construct RLE. This
|
||||
* should not be called in normal operation.
|
||||
*/
|
||||
void setLastForRLE(boost::optional<T> val);
|
||||
|
||||
/**
|
||||
* Reset RLE state on the last value, if needed. This should not be called in normal operation.
|
||||
*/
|
||||
void resetLastForRLEIfNeeded();
|
||||
|
||||
/**
|
||||
* Initialize RLE state from another builder
|
||||
*/
|
||||
@ -499,6 +503,20 @@ bool Simple8bBuilder<T, Allocator>::PendingIterator::operator!=(
|
||||
template <typename T, class Allocator>
|
||||
Simple8bBuilder<T, Allocator>::Simple8bBuilder(Allocator allocator) : _pendingValues(allocator) {}
|
||||
|
||||
template <typename T, class Allocator>
|
||||
Simple8bBuilder<T, Allocator>::Simple8bBuilder(boost::optional<T> val,
|
||||
int64_t num,
|
||||
Allocator allocator)
|
||||
: _rleCount(num), _lastValueInPrevWord(val), _pendingValues(allocator) {
|
||||
if (val) {
|
||||
auto pendingValue = _calculatePendingValue(*val);
|
||||
invariant(pendingValue);
|
||||
invariant(_doesIntegerFitInCurrentWord(*pendingValue));
|
||||
}
|
||||
_lastValidExtensionType = 0;
|
||||
isSelectorPossible.fill(true);
|
||||
}
|
||||
|
||||
template <typename T, class Allocator>
|
||||
Simple8bBuilder<T, Allocator>::~Simple8bBuilder() = default;
|
||||
|
||||
@ -584,23 +602,6 @@ void Simple8bBuilder<T, Allocator>::flush(F&& writeFn) {
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, class Allocator>
|
||||
void Simple8bBuilder<T, Allocator>::setLastForRLE(boost::optional<T> val) {
|
||||
_lastValueInPrevWord = val;
|
||||
if (val) {
|
||||
auto pendingValue = _calculatePendingValue(*val);
|
||||
invariant(pendingValue);
|
||||
invariant(_doesIntegerFitInCurrentWord(*pendingValue));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, class Allocator>
|
||||
void Simple8bBuilder<T, Allocator>::resetLastForRLEIfNeeded() {
|
||||
if (!rlePossible()) {
|
||||
_lastValueInPrevWord = 0;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, class Allocator>
|
||||
void Simple8bBuilder<T, Allocator>::initializeRLEFrom(const Simple8bBuilder<T, Allocator>& other) {
|
||||
if (other.rlePossible()) {
|
||||
|
||||
@ -31,6 +31,7 @@
|
||||
#include "mongo/bson/column/simple8b.h"
|
||||
#include "mongo/bson/column/simple8b_type_util.h"
|
||||
#include "mongo/logv2/log.h"
|
||||
#include "mongo/platform/int128.h"
|
||||
#include "mongo/util/hex.h"
|
||||
|
||||
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kDefault
|
||||
@ -39,6 +40,11 @@ static constexpr int128_t add(int128_t lhs, int128_t rhs) {
|
||||
return static_cast<int128_t>(static_cast<uint128_t>(lhs) + static_cast<uint128_t>(rhs));
|
||||
}
|
||||
|
||||
struct LastResult {
|
||||
boost::optional<uint128_t> encoded;
|
||||
boost::optional<int128_t> decoded;
|
||||
};
|
||||
|
||||
extern "C" int LLVMFuzzerTestOneInput(const char* Data, size_t Size) {
|
||||
using namespace mongo;
|
||||
|
||||
@ -65,16 +71,45 @@ extern "C" int LLVMFuzzerTestOneInput(const char* Data, size_t Size) {
|
||||
}
|
||||
}();
|
||||
|
||||
auto oldLast = [&]() -> boost::optional<LastResult> {
|
||||
try {
|
||||
boost::optional<uint128_t> last = uint128_t{0};
|
||||
Simple8b<uint128_t> s8b(Data, bufferSize);
|
||||
for (auto&& val : s8b) {
|
||||
last = val;
|
||||
}
|
||||
if (last) {
|
||||
return LastResult{last, Simple8bTypeUtil::decodeInt(*last)};
|
||||
} else {
|
||||
return LastResult{boost::optional<uint128_t>(boost::none),
|
||||
boost::optional<int128_t>(boost::none)};
|
||||
}
|
||||
|
||||
} catch (const DBException&) {
|
||||
return boost::none;
|
||||
}
|
||||
}();
|
||||
|
||||
auto sum = [&]() -> boost::optional<int128_t> {
|
||||
try {
|
||||
uint64_t prev =
|
||||
0xE; // Previous value 0, this is one simple8b value containing a zero.
|
||||
uint64_t prev = simple8b::kSingleZero;
|
||||
return simple8b::sum<int128_t>(Data, bufferSize, prev);
|
||||
} catch (const DBException&) {
|
||||
return boost::none;
|
||||
}
|
||||
}();
|
||||
|
||||
auto last = [&]() -> boost::optional<LastResult> {
|
||||
try {
|
||||
uint64_t prev1 = simple8b::kSingleZero;
|
||||
uint64_t prev2 = simple8b::kSingleZero;
|
||||
return LastResult{simple8b::last<uint128_t>(Data, bufferSize, prev1),
|
||||
simple8b::last<int128_t>(Data, bufferSize, prev2)};
|
||||
} catch (const DBException&) {
|
||||
return boost::none;
|
||||
}
|
||||
}();
|
||||
|
||||
if (sum != oldSum) {
|
||||
LOGV2_DEBUG(8384500,
|
||||
2,
|
||||
@ -91,6 +126,11 @@ extern "C" int LLVMFuzzerTestOneInput(const char* Data, size_t Size) {
|
||||
// (as they'd lead to crashes), while using edge cases leading to interesting control flow
|
||||
// paths in both implementations.
|
||||
invariant(sum == oldSum);
|
||||
// simple8b::last is not required to decode everything so an invalid binary might not throw.
|
||||
if (last && oldLast) {
|
||||
invariant(last->encoded == oldLast->encoded);
|
||||
invariant(last->decoded == oldLast->decoded);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -119,18 +119,34 @@ void testSimple8b(const std::vector<boost::optional<T>>& expectedValues,
|
||||
assertValuesEqual(s8b, expectedValues);
|
||||
|
||||
make_signed_t<T> sum = 0;
|
||||
boost::optional<T> last = T{0};
|
||||
for (auto&& val : expectedValues) {
|
||||
if (val) {
|
||||
sum = add(sum, Simple8bTypeUtil::decodeInt(*val));
|
||||
}
|
||||
last = val;
|
||||
}
|
||||
|
||||
uint64_t prev = 0xE; // Tests in this file assume that the previous value was '0'. This is
|
||||
// different semantics from BSONColumn.
|
||||
uint64_t prev = simple8b::kSingleZero;
|
||||
auto s = simple8b::sum<make_signed_t<T>>(
|
||||
reinterpret_cast<const char*>(expectedBinary.data()), expectedBinary.size(), prev);
|
||||
ASSERT_EQ(s, sum);
|
||||
|
||||
// Test last
|
||||
prev = simple8b::kSingleZero;
|
||||
ASSERT_EQ(last,
|
||||
simple8b::last<T>(reinterpret_cast<const char*>(expectedBinary.data()),
|
||||
expectedBinary.size(),
|
||||
prev));
|
||||
if (last.has_value()) {
|
||||
prev = simple8b::kSingleZero;
|
||||
ASSERT_EQ(
|
||||
Simple8bTypeUtil::decodeInt(*last),
|
||||
simple8b::last<make_signed_t<T>>(
|
||||
reinterpret_cast<const char*>(expectedBinary.data()), expectedBinary.size(), prev));
|
||||
}
|
||||
|
||||
|
||||
auto testPrefixSum = [&](auto prefix) {
|
||||
make_signed_t<T> sum = prefix;
|
||||
make_signed_t<T> prefixSum = 0;
|
||||
@ -142,8 +158,7 @@ void testSimple8b(const std::vector<boost::optional<T>>& expectedValues,
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t prev = 0xE; // Tests in this file assume that the previous value was '0'. This is
|
||||
// different semantics from BSONColumn.
|
||||
uint64_t prev = simple8b::kSingleZero;
|
||||
auto ps = simple8b::prefixSum<make_signed_t<T>>(
|
||||
reinterpret_cast<const char*>(expectedBinary.data()),
|
||||
expectedBinary.size(),
|
||||
|
||||
Loading…
Reference in New Issue
Block a user