SERVER-100659 Generalize BSONColumnBuilder binary reopen (#44341)

Co-authored-by: Daniel Moody <dmoody256@gmail.com>
GitOrigin-RevId: 37753ad80748c2015a59c2099a3b6ffb8d14291f
This commit is contained in:
henrikedin 2025-11-25 14:36:36 -05:00 committed by MongoDB Bot
parent c6587d9cb9
commit 848dace7fc
14 changed files with 2305 additions and 783 deletions

View File

@ -18,6 +18,7 @@ mongo_cc_library(
"simple8b_type_util.cpp",
],
hdrs = [
"binary_reopen.h",
"bsoncolumn.h",
"bsoncolumn.inl",
"bsoncolumn_helpers.h",
@ -117,6 +118,20 @@ mongo_cc_unit_test(
],
)
mongo_cc_unit_test(
name = "binary_reopen_test",
srcs = [
"binary_reopen_test.cpp",
],
tags = [
"mongo_unittest_seventh_group",
"server-bsoncolumn",
],
deps = [
":column",
],
)
mongo_cc_benchmark(
name = "simple8b_bm",
srcs = [

View File

@ -0,0 +1,763 @@
/**
* Copyright (C) 2025-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#pragma once
#include "mongo/bson/column/bsoncolumn_util.h"
#include "mongo/bson/column/simple8b.h"
#include "mongo/bson/column/simple8b_builder.h"
#include <boost/optional/optional.hpp>
namespace mongo::bsoncolumn::internal {
/**
* Constant to indicate invalid index for overflow or pending RLE.
*/
static constexpr int kInvalidIndex = -1;
/**
* Helper struct for a scanned control block.
*
* lastAtEndOfBlock and scaleIndex are only set for control blocks containing double data.
*/
struct ControlBlock {
const char* control = nullptr;
double lastAtEndOfBlock = 0.0;
uint8_t scaleIndex = Simple8bTypeUtil::kMemoryAsInteger;
};
using ControlBlockContainer = std::vector<ControlBlock>;
/**
* Helper range of control blocks to allow for range based for loops.
*/
class ControlBlockRange {
public:
ControlBlockRange() = default;
ControlBlockRange(ControlBlockContainer::const_iterator b,
ControlBlockContainer::const_iterator e)
: _begin(b), _end(e) {}
ControlBlockContainer::const_iterator begin() const {
return _begin;
}
ControlBlockContainer::const_iterator end() const {
return _end;
}
private:
ControlBlockContainer::const_iterator _begin;
ControlBlockContainer::const_iterator _end;
};
/**
* Calculated overflow point used to initialize the BSONColumnBuilder in the binary reopen
* operation.
*/
template <typename T>
struct OverflowPoint {
explicit OverflowPoint(boost::optional<T> val) : _last(val) {}
/**
* Calculated control byte for overflow/no-overflow.
*
* If overflow occurred, overflow() will return true and index() the position in this
* control byte where we overflowd. Data before and including the overflow point needs to be
* written to the buffer and data after this overflow point needs to be appended to the
* pending state.
*
* If no overflow, overflow() will return false and index() return -1. No data should be
* written to the buffer and all data in this control should be appended to the pending
* state.
*/
const char* control() const {
return _control;
}
/**
* Scale index for the control byte returned by control().
*/
uint8_t scaleIndex() const {
return _scaleIndex;
}
/**
* Returns true if overflow occurred, false otherwise.
*/
bool overflow() const {
return overflowIndex != bsoncolumn::internal::kInvalidIndex;
}
/**
* Returns index of the simple8b block where the overflow occurred, -1 if no overflow.
*/
int index() const {
return overflowIndex;
}
/**
* Returns true if all values after the overflow point are identical to the value returned
* by last().
*/
bool allValuesIdentical() const {
return _allIdentical;
}
/**
* Last control byte of the binary prior to reopen.
*/
uint8_t lastControl() const {
return _lastControl;
}
/**
* Offset to the last control byte of the binary prior to reopen.
*/
uint16_t lastControlOffset() const {
return _lastControlOffset;
}
/**
* Range of control blocks after the overflow point that needs to be appended to the pending
* state.
*/
const ControlBlockRange& remaining() const {
return _remaining;
}
/**
* Last value at the overflow point. Used to decode values after overflow when they start
* with RLE and to setup the pending state with RLE if necessary.
*/
const boost::optional<T>& last() const {
return _last;
}
/**
* Internal helper used by OverflowState to set final OverflowPoint result.
*/
void setControl(const char* ctrl,
uint8_t scale,
ControlBlockRange remain,
uint8_t lastControl,
uint16_t lastControlOffset) {
_control = ctrl;
_scaleIndex = scale;
_remaining = remain;
_lastControl = lastControl;
_lastControlOffset = lastControlOffset;
}
/**
* Internal helper to set last.
*/
void setLast(boost::optional<T> value) {
_last = value;
}
/**
* Internal helper to mark that overflow occurred, at which position and if all values after
* the overflow are identical or not.
*/
void markOverflow(int index, bool allIdentical) {
invariant(index != bsoncolumn::internal::kInvalidIndex);
overflowIndex = index;
_allIdentical = allIdentical;
}
/**
* Internal helper to override overflow to set it back to no overflow.
*/
void markNoOverflow() {
overflowIndex = bsoncolumn::internal::kInvalidIndex;
}
/**
* Internal helper to explicitly set that all values are identical.
*/
void setAllIdentical() {
_allIdentical = true;
}
private:
boost::optional<T> _last;
const char* _control = nullptr;
int overflowIndex = bsoncolumn::internal::kInvalidIndex;
uint8_t _scaleIndex = kInvalidScaleIndex;
uint8_t _lastControl = 0;
uint16_t _lastControlOffset = 0;
ControlBlockRange _remaining;
bool _allIdentical = false;
};
/**
* Helper to calculate how to re-initialize the compressor from a compressed binary.
*
* The main difficulty with re-initializing the compressor from a compressed binary is how to
* undo the 'finalize()/intermediate()' call where pending values are flushed out to simple8b
* blocks in the binary. We need to undo this operation by putting back these values back into
* the pending state. The point in the binary where we need to do this undo is called the
* overflow point.
*
* For this to be efficient we need to calculate this from the end of the binary rather than the
* beginning. In the typical case we will use a dummy Simple8bBuilder where values are added in
* the reverse order to observe when we can no longer add values without needing to write full
* simple8b blocks.
*
* The two main sources of complexity in this algorithm is how to deal with RLE and double
* values that have been rescaled. RLE in BSONColumn is defined as a repeat of the prior value
* so we must be able to travserse past RLE to determine if the overflow point happens before or
* after. For rescale we have a similar problem, either the rescale can be undone and put back
* into pending or it is required due to an incompatible value.
*/
template <typename T>
class OverflowState {
public:
// Initialize the overflow state from the last control block.
OverflowState(ControlBlock cb);
// Perform the overflow detection.
const OverflowPoint<T>& detect(const ControlBlockContainer& controls);
private:
// Helper function to handle the special case where the binary ends with RLE
void _detectEndsRLE(ControlBlock cb);
// Helper function to perform the regular detection logic. When overflow is detected it
// returns a number of control blocks already processed that the overflow point refers to.
// This can happen when we are processing RLE but discover that the RLE data belong prior to
// the overflow point.
int _detectRegular(ControlBlock cb);
// Helper function when a rescale is detected. This automatically ends the overflow
// detection but we need to calculate if the rescaled data should be included in the
// overflow or not. Returns a binary offset to the last control byte in the case where the
// rescaled data was only output because of the finalize/intermediate call.
uint16_t _detectRescale(ControlBlockRange before, ControlBlockRange after);
OverflowPoint<T> _op;
Simple8bBuilder<T> _overflowDetector;
int _pendingRle = bsoncolumn::internal::kInvalidIndex;
int _pendingRleBlocks = 0;
};
/**
* Result from the 'findOverflow' call.
*
* lastValue is the last value in the simple8b causing overflow. If no overflow was detected it is
* set to the previously known last value.
*
* overflowIndex is the index position of the simple8b block in the control that caused overflow.
* Invalid if no overflow was detected.
*
* pendingRLEindex is the index position of the first non-RLE simple8b block when the control begins
* with RLE and no overflow was detected.
*/
template <typename T>
struct OverflowResult {
boost::optional<T> lastValue;
int overflowIndex;
int pendingRLEindex;
};
/**
* Result from the 'findLastNonRLE' call.
*
* lastValue is the last value in the last non-RLE simple8b. Only applicable when
* 'index' is set to a non-invalid index.
*
* index is the index position of the last non-RLE simple8b in this control
*/
template <typename T>
struct LastNonRLEResult {
boost::optional<T> lastValue;
int index;
};
/**
* Helper to get a simple8b block at index from a control block
*/
const char* s8b(const char* control, int index);
/**
* Helper to determine if the provided simple8b block is an RLE block.
*/
bool isRLE(const char* s8b);
/**
* Estimates the last non-skip value in a control block.
*
* If the last block is RLE, 0 is returned.
* If no non-skip value can be found within what could fit in a non-RLE block, 'none' is returned.
*/
template <typename T>
boost::optional<T> estimateLastValue(const char* control);
/**
* Finds the last non-skip value in a control block. The last block must NOT be RLE.
*
* If no non-skip value can be found within what could fit in a non-RLE block, 'none' is returned.
*/
template <typename T>
boost::optional<T> findLastNonSkip(const char* control, int numBlocks);
/**
* Finds which simple8b block in the provided control block causes overflow, searches in reverse
* order.
*
* 'lastValForRLE' indicates how any encountered RLE blocks should be interpreted.
*
* 'overflowDetector' is appended to internally, overflow is detected when it needs to write a
* simple8b block. The same detector may be used in multiple calls for finding overflow.
*/
template <typename T>
OverflowResult<T> findOverflow(const char* control,
boost::optional<T> lastValForRLE,
Simple8bBuilder<T>& overflowDetector);
/**
* Finds the last non-RLE simple8b block in the provided control, returns its index position and
* last value.
*
* 'index' indicates position to start search that is performed in reverse order.
*/
template <typename T>
LastNonRLEResult<T> findLastNonRLE(const char* control);
template <typename T>
LastNonRLEResult<T> findLastNonRLE(const char* control, int index);
template <typename T>
OverflowState<T>::OverflowState(ControlBlock cb)
: _op(bsoncolumn::internal::estimateLastValue<T>(cb.control)),
_overflowDetector(_op.last(), 0) {}
template <typename T>
const OverflowPoint<T>& OverflowState<T>::detect(const ControlBlockContainer& controls) {
using namespace bsoncolumn::internal;
// Setup reverse iteration.
auto begin = controls.rbegin();
auto it = begin;
auto end = controls.rend();
// Setup some internal state, the algorithm is different if the last block is RLE.
uint16_t lastControlOffset = 0;
bool endsWithRLE = isRLE(s8b(controls.back().control,
numSimple8bBlocksForControlByte(*controls.back().control) - 1));
// Search backwards for the overflow point.
for (; it != end; ++it) {
if (it->scaleIndex == controls.back().scaleIndex) {
if (endsWithRLE) {
// If we end with RLE, we simply search backwards for the first non-RLE value. This
// will be the last non-RLE value in the binary and that will be our overflow point.
_detectEndsRLE(*it);
} else {
// Regular case where we don't end with RLE. If RLE is encountered during the
// iteration we need to continue to search until the next non-RLE value is
// encountered. Depending on its value we might have to go back to where we were
// prior to the RLE and assign that as our overflow point. _detectRegular will
// return how many blocks we need to undo if this is the case.
it = std::prev(it, _detectRegular(*it));
}
// _detectEndsRLE or _detectRegular will internally mark for overflow if it happened. If
// this is the case, break out of the iteration as we are done.
if (_op.overflow()) {
break;
}
} else {
// Special case for the double type when a control block of a different scale was
// detected. We have a special algorithm to determine if the overflow happened in this
// rescaled control or prior. _detectRescale will return an offset to the last
// (rescaled) control if it can be undone and all values put back to pending.
if constexpr (std::is_same_v<T, uint64_t>) {
lastControlOffset =
_detectRescale({controls.begin(), it.base()}, {it.base(), controls.end()});
break;
} else {
// This cannot happen as scan() has already verified this.
MONGO_UNREACHABLE;
}
}
}
// Check if we've finished the iteration without finding an overflow
if (it == end) {
if (_pendingRle != bsoncolumn::internal::kInvalidIndex) {
// We are in pending RLE without finding an overflow. We can put everything back in
// pending if the pending RLE value is 0 which is the only allowed form of RLE in the
// beginning of the binary.
if (_op.last() == T{0}) {
_pendingRleBlocks = 0;
_op.setAllIdentical();
} else {
// Our pending RLE value is non-zero which means that the RLE cannot be put in
// pending and the overflow happened after the RLE. Restore the state to this point.
_op.markOverflow(_pendingRle, false);
it = std::prev(it, _pendingRleBlocks + 1);
_pendingRleBlocks = 0;
}
}
// As we got to the beginning, set last to 0 which is how RLE in the beginning of the binary
// must be interpreted.
_op.setLast(T{0});
// If we end with RLE but never detect overflow, all values are identical to 0.
if (endsWithRLE) {
_op.setAllIdentical();
}
}
// If we have found an overflow that happened in the last simple8b block of a control, we can
// transform this to a non-overflow at the beginning of the control after (moving the iterator
// will be done in the next if-statement).
if (_op.overflow() && it != begin && _op.index() == kMaxNumSimple8bPerControl - 1) {
_op.markNoOverflow();
}
// If no overflow occurred, go back to the previous control as we should not add data from the
// current control.
if (!_op.overflow() && it != begin && lastControlOffset == 0) {
it = std::prev(it, _pendingRleBlocks + 1);
}
// Record final calculatetion
_op.setControl(it->control,
it->scaleIndex,
{it.base(), controls.end()},
// If lastControlOffset is non-zero we're in the special rescale case where we
// need to report the final control byte from the binary.
lastControlOffset == 0 ? *it->control : *controls.back().control,
lastControlOffset);
return _op;
}
template <typename T>
void OverflowState<T>::_detectEndsRLE(ControlBlock cb) {
// If the last block ends with RLE we just need to look for the last non-RLE block to
// discover the overflow point.
using namespace bsoncolumn::internal;
LastNonRLEResult<T> res = findLastNonRLE<T>(cb.control);
_op.setLast(res.lastValue);
if (res.index != kInvalidIndex) {
_op.markOverflow(res.index, true);
}
}
template <typename T>
int OverflowState<T>::_detectRegular(ControlBlock cb) {
using namespace bsoncolumn::internal;
if (_pendingRle == kInvalidIndex) {
// If we haven't encountered an RLE block in the beginning of a control block yet then
// continue with the regular overflow detection.
OverflowResult<T> res = findOverflow<T>(cb.control, _op.last(), _overflowDetector);
_op.setLast(res.lastValue);
// If this block begins with RLE we need to remember the index position after this RLE.
_pendingRle = res.pendingRLEindex;
if (res.overflowIndex != kInvalidIndex) {
_op.markOverflow(res.overflowIndex, /* allIdentical= */ _pendingRle != kInvalidIndex);
}
} else {
// When we've encountered RLE in the beginning of a control block we need to continue to
// search for the next non-RLE block to determine where the overflow point is.
LastNonRLEResult<T> res = findLastNonRLE<T>(cb.control);
if (res.index == kInvalidIndex) {
// Still no overflow, increment how many control blocks we've consumed in this state.
++_pendingRleBlocks;
} else if (res.lastValue == _op.last()) {
// Last value prior to RLE matches our RLE state after RLE. We then overflow in
// the block prior to RLE. Reset pending blocks and mark the overflow with all identical
// values.
_pendingRleBlocks = 0;
_op.markOverflow(res.index, /* allIdentical= */ true);
} else {
// Values to not match, so the overflow happened in the pending block after the RLE,
// we've saved this position in _pendingRle.
_op.markOverflow(_pendingRle, /* allIdentical= */ false);
_op.setLast(res.lastValue);
// Return how many control blocks ago the overflow position refers to.
auto ret = _pendingRleBlocks + 1;
_pendingRleBlocks = 0;
return ret;
}
}
return 0;
}
template <typename T>
uint16_t OverflowState<T>::_detectRescale(ControlBlockRange before, ControlBlockRange after) {
using namespace bsoncolumn::internal;
// Calculate last value before the rescaling event. Search backwards for the last non-RLE block
// and get the last value from it.
auto it = std::make_reverse_iterator(before.end());
auto end = std::make_reverse_iterator(before.begin());
auto blockWithOldScale = *it;
auto blocks = numSimple8bBlocksForControlByte(*blockWithOldScale.control);
for (; it != end; ++it) {
LastNonRLEResult<T> res = findLastNonRLE<T>(it->control);
// kInvalidIndex index means that all blocks were RLE and we need to continue to next block.
if (res.index != kInvalidIndex) {
_op.setLast(res.lastValue);
break;
}
}
// Nothing found, 0 is used as last when the stream begins with RLE.
if (it == end) {
_op.setLast(T{0});
}
// If this rescaled block is full, we know that we can treat this as a no-overflow in the next
// control as nothing more can fit in this one anyway.
if (blocks == kMaxNumSimple8bPerControl) {
// If we're in pending RLE, we can additionally mark all values as identical.
if (_pendingRle != kInvalidIndex) {
_op.setAllIdentical();
_pendingRleBlocks = 0;
}
return 0;
}
// Based on this actual last value, re-calculate if we will overflow with the data in
// the control blocks we've already processed. Previously we used an estimated last.
Simple8bBuilder<uint64_t> s8bBuilder(_op.last(), 0);
for (auto&& cb : after) {
OverflowResult<T> res = findOverflow<T>(cb.control, _op.last(), s8bBuilder);
// If overflow is detected, we treat this as a non-overflow in the next control block. This
// is signalled by not marking for overflow and returning 0 offset to the final control
// block. Everything remaining will be put back into pending.
if (res.overflowIndex != kInvalidIndex) {
return 0;
}
// RLE detected, we then know that all values are identical.
if (res.pendingRLEindex != kInvalidIndex) {
_op.setAllIdentical();
break;
}
}
// Next we need to see if the first value stored in the future control blocks (with a different
// scale) can be scaled using this scale factor that we've now encountered. First, take the next
// control block (the range is guaranteed to be non-empty).
const auto& next = *after.begin();
// Encode the last value using next scale factor, this is needed to expand future deltas. This
// is guaranteed to succeed as the scan() function has already validated this.
auto encoded =
Simple8bTypeUtil::encodeDouble(blockWithOldScale.lastAtEndOfBlock, next.scaleIndex);
// Extract the first value from the next control block. Simple8b cannot be empty, so we can
// dereference the begin iterator without further checking.
boost::optional<T> nextVal =
*Simple8b<uint64_t>(next.control + 1, sizeof(uint64_t), _op.last()).begin();
// Skipped values can be always be scaled with any scale factor
if (nextVal) {
// Calculate the encoded delta of the next value and then try to encode it using our new
// scale factor
encoded = expandDelta(*encoded, Simple8bTypeUtil::decodeInt64(*nextVal));
if (!Simple8bTypeUtil::encodeDouble(
Simple8bTypeUtil::decodeDouble(*encoded, next.scaleIndex),
blockWithOldScale.scaleIndex)) {
// Not possible to scale this value using the last scale factor. We return 0 to signal
// this as non-overflow in the block after the rescale, which effectively discards
// everything before the rescale as they will never be needed.
return 0;
}
}
// Rescaling was possible, all the rescaled values will then need to be written back as pending
// values. This is signalled as an overflow in the last position of this control block. We also
// return an offset to the last control byte of the actual rescaled control block in the binary.
_op.markOverflow(blocks - 1, false);
return blocks * sizeof(uint64_t) + 1 +
std::distance(after.begin(), after.end() - 1) *
(kMaxNumSimple8bPerControl * sizeof(uint64_t) + 1);
}
inline const char* s8b(const char* control, int index) {
return control +
/* offset to block at index */ index * /* simple8b block size */ sizeof(uint64_t) +
/* skip control byte*/ 1;
}
inline bool isRLE(const char* s8b) {
// Read simple8b block and mask out the selector
return (ConstDataView(s8b).read<LittleEndian<uint64_t>>() &
simple8b_internal::kBaseSelectorMask) == simple8b_internal::kRleSelector;
}
template <typename T>
boost::optional<T> estimateLastValue(const char* control) {
auto numBlocks = numSimple8bBlocksForControlByte(*control);
if (isRLE(s8b(control, numBlocks - 1))) {
return T{0};
}
// Assume that the last value in Simple8b blocks is the same as the one before the
// first. This assumption will hold if all values are equal and RLE is eligible. If it
// turns out to be incorrect the Simple8bBuilder will internally reset and disregard
// RLE.
return findLastNonSkip<T>(control, numBlocks);
}
template <typename T>
boost::optional<T> findLastNonSkip(const char* control, int numBlocks) {
// Limit the search for a non-skip value. If we go above 60 without overflow then we consider
// skip to be the last value for RLE as it would be the only one eligible for RLE.
constexpr int kMaxNumSkipInNonRLEBlock = 60;
for (int index = numBlocks - 1, numSkips = 0; index >= 0 && numSkips < kMaxNumSkipInNonRLEBlock;
--index) {
const char* block = s8b(control, index);
// Abort this operation when an RLE block is found, they are handled in a separate code
// path.
if (isRLE(block)) {
break;
}
Simple8b<T> s8b(block, sizeof(uint64_t));
for (auto it = s8b.begin(), end = s8b.end();
it != end && numSkips < kMaxNumSkipInNonRLEBlock;
++it) {
const auto& elem = *it;
if (elem) {
// We do not need to use the actual last value for RLE when determining overflow
// point later. We can use the first value we discover when performing this
// iteration. For a RLE block to be undone and put back into the pending state all
// values need to be the same. So if a value later in this Simple8b block is
// different from this value we cannot undo all these containing a RLE. If the
// values are not all the same we will not fit 120 zeros in pending and the RLE
// block will be left as-is.
return elem;
}
++numSkips;
}
}
// We did not find any value, so use skip as RLE. It is important that we use 'none' to
// interpret RLE blocks going forward so we can properly undo simple8b blocks containing all
// skip and RLE blocks.
return boost::none;
}
template <typename T>
OverflowResult<T> findOverflow(const char* control,
boost::optional<T> lastValForRLE,
Simple8bBuilder<T>& overflowDetector) {
// Search is performed in reverse order
int index = numSimple8bBlocksForControlByte(*control) - 1;
for (; index >= 0; --index) {
// Get pointer to Simple8b block at this index position
const char* block = s8b(control, index);
// If this is an RLE block and if the overflow detector is in RLE mode, we need to skip to
// the next non-RLE block and compare its last value against the values after RLE.
if (isRLE(block)) {
// If we are not in RLE mode then we know that overflow occurred in this RLE block,
// return its position.
if (!overflowDetector.rlePossible()) {
return {lastValForRLE, index, kInvalidIndex};
}
// Search for the next non-RLE block and get the last value from it.
LastNonRLEResult<T> res = findLastNonRLE<T>(control, index - 1);
if (res.index == kInvalidIndex) {
// We exhausted this control block without determining where the overflow point
// is. Return pending RLE index so we can continue this operation in the prior
// control block. If the value we find prior to the RLE is different, then the
// overflow happened at this 'pending RLE' index.
return {lastValForRLE, kInvalidIndex, index};
} else if (res.lastValue == lastValForRLE) {
// Last value prior to RLE matches our RLE state after RLE. We then overflow in
// the block prior to RLE.
return {lastValForRLE, res.index, kInvalidIndex};
}
// Last value prior to RLE does not match our RLE state after RLE. We then overflow in
// the RLE block with the previous value set to the actual RLE value from the block
// prior to RLE.
return {res.lastValue, index, kInvalidIndex};
}
// Regular non-RLE block. We extract all values and append it to our overflow detector to
// see if they cause overflow.
Simple8b<T> s8b(block,
/* one block at a time */ sizeof(uint64_t),
lastValForRLE);
boost::optional<T> last;
bool overflow = false;
auto writeFn = [&overflow](uint64_t block) mutable {
overflow = true;
};
for (auto&& elem : s8b) {
last = elem;
if (elem) {
overflowDetector.append(*last, writeFn);
} else {
overflowDetector.skip(writeFn);
}
}
// If overflow point detected, we return this index position and its calculated last value.
if (overflow) {
return {last, index, kInvalidIndex};
}
}
// We have depleated this control block without finding an overflow position, return invalid
// index positions.
return {lastValForRLE, kInvalidIndex, kInvalidIndex};
}
template <typename T>
LastNonRLEResult<T> findLastNonRLE(const char* control) {
return findLastNonRLE<T>(control, numSimple8bBlocksForControlByte(*control) - 1);
}
template <typename T>
LastNonRLEResult<T> findLastNonRLE(const char* control, int index) {
// Search is performed in reverse order
for (; index >= 0; --index) {
const char* block = s8b(control, index);
if (isRLE(block)) {
continue;
}
// Non-RLE block found, calculate its last value and return. Last value for RLE is unused as
// we already know that this is not an RLE block.
uint64_t unused = simple8b::kInvalidSimple8b;
boost::optional<T> last = simple8b::last<T>(block, sizeof(uint64_t), unused);
return {last, index};
}
return {T{}, index};
}
} // namespace mongo::bsoncolumn::internal

View File

@ -0,0 +1,942 @@
/**
* Copyright (C) 2025-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#include "mongo/bson/column/binary_reopen.h"
#include "mongo/bson/column/simple8b.h"
#include "mongo/bson/column/simple8b_builder.h"
#include "mongo/unittest/unittest.h"
#include "mongo/util/assert_util.h"
namespace mongo::bsoncolumn::internal {
using V = boost::optional<uint64_t>;
class BinaryReopenTest : public unittest::Test {
public:
BinaryReopenTest();
// Generates a BSONColumn control block for a set of simple8b blocks and optionally provided
// scale factor
const char* control(std::vector<uint64_t> blocks,
uint8_t scaleIndex = Simple8bTypeUtil::kMemoryAsInteger);
// Simple8b block constants to be used in the tests
uint64_t block1Skip;
uint64_t block1Zero;
uint64_t block1One;
uint64_t block5Two;
uint64_t block6Skip;
uint64_t block60Skip;
uint64_t block60Zero;
uint64_t block2Zero1Skip;
uint64_t block3One1Skip;
uint64_t block3Skip1One;
uint64_t block6Skip1Two;
uint64_t blockFullOne;
uint64_t block1RLE;
uint64_t block16RLE;
private:
// Memory for generated control blocks
std::forward_list<std::unique_ptr<char[]>> _ownedControls;
};
BinaryReopenTest::BinaryReopenTest() {
// Helper to generate a single simple8b block with the provided values.
auto generateSimple8b = [](boost::optional<uint64_t> value,
int count,
boost::optional<uint64_t> value2 = boost::none,
int count2 = 0) {
boost::optional<uint64_t> block;
auto writeFn = [&](uint64_t b) mutable {
if (block) {
FAIL("Should only write one block");
}
block = b;
};
Simple8bBuilder<uint64_t> builder;
for (int i = 0; i < count; i++) {
if (value) {
builder.append(*value, writeFn);
} else {
builder.skip(writeFn);
}
}
for (int i = 0; i < count2; i++) {
if (value2) {
builder.append(*value2, writeFn);
} else {
builder.skip(writeFn);
}
}
builder.flush(writeFn);
ASSERT_TRUE(block.has_value());
return *block;
};
// Helper to generate a simple8b block that can fit the maximum amount of a particular value
auto generateFullSimple8b = [](boost::optional<uint64_t> value) {
boost::optional<uint64_t> block;
bool written = false;
auto writeFn = [&](uint64_t b) mutable {
block = b;
written = true;
};
// We need to disable RLE, so we generate a previous value that is different from the value
// we're appending.
boost::optional<uint64_t> different = value ? V{*value + 1} : V{0};
// Initialize RLE with this value
Simple8bBuilder<uint64_t> builder(different, 0);
// Append until a simple8b block has been full and written out
while (!written) {
if (value) {
builder.append(*value, writeFn);
} else {
builder.skip(writeFn);
}
}
return *block;
};
// Some constants used in the tests below
block1Skip = generateSimple8b(boost::none, 1);
block1Zero = generateSimple8b(0, 1);
block1One = generateSimple8b(1, 1);
block5Two = generateSimple8b(2, 5);
block6Skip = generateSimple8b(boost::none, 6);
block60Skip = generateSimple8b(boost::none, 60);
block60Zero = generateSimple8b(0, 60);
block2Zero1Skip = generateSimple8b(0, 2, boost::none, 1);
block3One1Skip = generateSimple8b(1, 3, boost::none, 1);
block3Skip1One = generateSimple8b(boost::none, 3, 1, 1);
block6Skip1Two = generateSimple8b(boost::none, 6, 2, 1);
blockFullOne = generateFullSimple8b(1);
block1RLE = simple8b_internal::kRleSelector;
block16RLE = simple8b_internal::kRleSelector | 0xF0;
}
const char* BinaryReopenTest::control(std::vector<uint64_t> blocks, uint8_t scaleIndex) {
// A control block contains between 1 and 16 simple8b blocks.
ASSERT_GT(blocks.size(), 0);
ASSERT_LTE(blocks.size(), 16);
// Allocate enough memory to also fit the control byte preceding the simple8b blocks.
auto c = std::make_unique<char[]>(blocks.size() * sizeof(uint64_t) + 1);
// Write control byte with out scale factor and number of simple8b blocks.
*c.get() = kControlByteForScaleIndex[scaleIndex] | (blocks.size() - 1);
// Copy simple8b data
memcpy(c.get() + 1, blocks.data(), blocks.size() * sizeof(uint64_t));
auto ptr = c.get();
// Store internally to simplify memory management in the tests
_ownedControls.push_front(std::move(c));
return ptr;
}
TEST_F(BinaryReopenTest, EstimateLastValue) {
// Block with zeros return zero
ASSERT_EQ(estimateLastValue<uint64_t>(control({block1Zero})), V{0});
// Skips before a value does not affect the last value
ASSERT_EQ(estimateLastValue<uint64_t>(control({block6Skip, block6Skip1Two})), V{2});
// Block ending with skips returns last non-skip value
ASSERT_EQ(estimateLastValue<uint64_t>(control({block1Zero, block6Skip})), V{0});
ASSERT_EQ(estimateLastValue<uint64_t>(control({block5Two, block6Skip})), V{2});
ASSERT_EQ(estimateLastValue<uint64_t>(control({block3One1Skip, block6Skip})), V{1});
// Block ending with 60 or more skips return none even if value exists before the skips
ASSERT_EQ(estimateLastValue<uint64_t>(control({block5Two, block60Skip})), V{boost::none});
ASSERT_EQ(estimateLastValue<uint64_t>(control({block5Two, block60Skip, block6Skip})),
V{boost::none});
ASSERT_EQ(estimateLastValue<uint64_t>(control({block5Two,
block6Skip,
block6Skip,
block6Skip,
block6Skip,
block6Skip,
block6Skip,
block6Skip,
block6Skip,
block6Skip,
block6Skip})),
V{boost::none});
// Block ending with 59 or fewer skips returns last non-skip value
ASSERT_EQ(estimateLastValue<uint64_t>(control({block5Two,
block6Skip,
block6Skip,
block6Skip,
block6Skip,
block6Skip,
block6Skip,
block6Skip,
block6Skip,
block6Skip,
block1Skip,
block1Skip,
block1Skip,
block1Skip,
block1Skip})),
V{2});
// Block with skips only returns none
ASSERT_EQ(estimateLastValue<uint64_t>(control({block6Skip,
block6Skip,
block6Skip,
block6Skip,
block6Skip,
block6Skip,
block6Skip,
block6Skip,
block6Skip,
block6Skip})),
V{boost::none});
ASSERT_EQ(estimateLastValue<uint64_t>(control({block6Skip,
block6Skip,
block6Skip,
block6Skip,
block6Skip,
block6Skip,
block6Skip,
block6Skip,
block6Skip,
block1Skip,
block1Skip,
block1Skip,
block1Skip,
block1Skip})),
V{boost::none});
ASSERT_EQ(estimateLastValue<uint64_t>(control({block1Skip})), V{boost::none});
// Block with RLE returns zero regardless of what's before it
ASSERT_EQ(estimateLastValue<uint64_t>(control({block1RLE})), V{0});
ASSERT_EQ(estimateLastValue<uint64_t>(control({block1One, block1RLE})), V{0});
ASSERT_EQ(estimateLastValue<uint64_t>(control({block1Zero, block1RLE})), V{0});
ASSERT_EQ(estimateLastValue<uint64_t>(control({block1Skip, block1RLE})), V{0});
}
TEST_F(BinaryReopenTest, FindOverflow) {
OverflowResult<uint64_t> res;
auto findOverflowHelper = [](const char* control, V lastVal) {
Simple8bBuilder<uint64_t> detector(lastVal, 0);
return findOverflow<uint64_t>(control, lastVal, detector);
};
// Basic case of a single simple8b block with skip does not overflow
res = findOverflowHelper(control({block1Skip}), V{boost::none});
ASSERT_EQ(res.overflowIndex, kInvalidIndex);
ASSERT_EQ(res.lastValue, V{boost::none}); // last value is unchanged when there is no overflow
ASSERT_EQ(res.pendingRLEindex, kInvalidIndex);
// Basic case of a single simple8b block with values does not overflow
res = findOverflowHelper(control({block5Two}), V{0});
ASSERT_EQ(res.overflowIndex, kInvalidIndex);
ASSERT_EQ(res.lastValue, V{0}); // last value is unchanged when there is no overflow
ASSERT_EQ(res.pendingRLEindex, kInvalidIndex);
// Two blocks with identical values does not overflow if there is a block that could have all
// fit in
res = findOverflowHelper(control({block5Two, block5Two}),
V{0}); // Different value for RLE disables RLE mode
ASSERT_EQ(res.overflowIndex, kInvalidIndex);
ASSERT_EQ(res.lastValue, V{0}); // last value is unchanged when there is no overflow
ASSERT_EQ(res.pendingRLEindex, kInvalidIndex);
// Two blocks with over 60 values that are different cannot fit in a single block so we overflow
// at index 0.
res = findOverflowHelper(control({block5Two, block60Zero}), V{0});
ASSERT_EQ(res.overflowIndex, 0);
ASSERT_EQ(res.lastValue, V{2}); // last value in block that overflowed
ASSERT_EQ(res.pendingRLEindex, kInvalidIndex);
// Three blocks with over 60 values that are different cannot fit in a single block so we
// overflow at index 1.
res = findOverflowHelper(control({block5Two, block5Two, block60Zero}), V{0});
ASSERT_EQ(res.overflowIndex, 1);
ASSERT_EQ(res.lastValue, V{2}); // last value in block that overflowed
ASSERT_EQ(res.pendingRLEindex, kInvalidIndex);
// Changing the last value does not affect the overflow point as RLE is not in play
res = findOverflowHelper(control({block5Two, block5Two, block60Zero}), V{2});
ASSERT_EQ(res.overflowIndex, 1);
ASSERT_EQ(res.lastValue, V{2}); // last value in block that overflowed
ASSERT_EQ(res.pendingRLEindex, kInvalidIndex);
// Without RLE we can only fit 30 '1' values in a single block, so overflow happens at index
// 1 even though values are identical
res = findOverflowHelper(control({block5Two, blockFullOne, blockFullOne}), V{0});
ASSERT_EQ(res.overflowIndex, 1);
ASSERT_EQ(res.lastValue, V{1}); // last value in block that overflowed
ASSERT_EQ(res.pendingRLEindex, kInvalidIndex);
// With RLE overflow happens in first block with a different value
res = findOverflowHelper(control({block5Two, blockFullOne, blockFullOne}), V{1});
ASSERT_EQ(res.overflowIndex, 0);
ASSERT_EQ(res.lastValue, V{2}); // last value in block that overflowed
ASSERT_EQ(res.pendingRLEindex, kInvalidIndex);
// No overflow if the values are all identical and RLE is in play
res = findOverflowHelper(control({blockFullOne, blockFullOne}), V{1});
ASSERT_EQ(res.overflowIndex, kInvalidIndex);
ASSERT_EQ(res.lastValue, V{1}); // last value in block that overflowed
ASSERT_EQ(res.pendingRLEindex, kInvalidIndex);
// With RLE block and all values are identical the overflow happens before the RLE block
res = findOverflowHelper(control({block5Two, block1RLE, block5Two}), V{2});
ASSERT_EQ(res.overflowIndex, 0);
ASSERT_EQ(res.lastValue, V{2}); // last value in block that overflowed
ASSERT_EQ(res.pendingRLEindex, kInvalidIndex);
// With RLE block and values are different before and after RLE the overflow happens at the RLE
// block
res = findOverflowHelper(control({blockFullOne, block1RLE, block5Two}), V{2});
ASSERT_EQ(res.overflowIndex, 1);
ASSERT_EQ(res.lastValue, V{1}); // last value in block that overflowed
ASSERT_EQ(res.pendingRLEindex, kInvalidIndex);
// With RLE block and values are different before and after RLE the overflow happens at the last
// RLE block
res = findOverflowHelper(control({blockFullOne, block16RLE, block1RLE, block5Two}), V{2});
ASSERT_EQ(res.overflowIndex, 2);
ASSERT_EQ(res.lastValue, V{1}); // last value in block that overflowed
ASSERT_EQ(res.pendingRLEindex, kInvalidIndex);
// Only RLE returns no overflow but pending RLE at the last RLE block
res = findOverflowHelper(control({block16RLE, block1RLE}), V{2});
ASSERT_EQ(res.overflowIndex, kInvalidIndex);
ASSERT_EQ(res.lastValue, V{2}); // last value in block that overflowed
ASSERT_EQ(res.pendingRLEindex, 1);
// RLE followed by non-RLE compatible with last value returns no overflow but pending RLE at
// the last RLE block
res = findOverflowHelper(control({block16RLE, block1RLE, block5Two}), V{2});
ASSERT_EQ(res.overflowIndex, kInvalidIndex);
ASSERT_EQ(res.lastValue, V{2}); // last value in block that overflowed
ASSERT_EQ(res.pendingRLEindex, 1);
// RLE followed by non-RLE not compatible with last value returns overflow at the
// non-RLE block
res = findOverflowHelper(control({block16RLE, block1RLE, block5Two}), V{1});
ASSERT_EQ(res.overflowIndex, 1);
ASSERT_EQ(res.lastValue,
V{1}); // last value is left unchanged when it cannot be determined due to RLE
ASSERT_EQ(res.pendingRLEindex, kInvalidIndex);
}
TEST_F(BinaryReopenTest, FindLastNonRLE) {
LastNonRLEResult<uint64_t> res;
// Single non-RLE returns index 0 and the last value in the block
res = findLastNonRLE<uint64_t>(control({block1Zero}));
ASSERT_EQ(res.index, 0);
ASSERT_EQ(res.lastValue, V{0});
// Single non-RLE returns index 0 and the last value in the block
res = findLastNonRLE<uint64_t>(control({block2Zero1Skip}));
ASSERT_EQ(res.index, 0);
ASSERT_EQ(res.lastValue, V{boost::none});
// Single non-RLE returns index 0 and the last value in the block
res = findLastNonRLE<uint64_t>(control({block6Skip1Two}));
ASSERT_EQ(res.index, 0);
ASSERT_EQ(res.lastValue, V{2});
// Multiple non-RLE blocks returns index to last block and the last value in that block
res = findLastNonRLE<uint64_t>(control({blockFullOne, block6Skip1Two}));
ASSERT_EQ(res.index, 1);
ASSERT_EQ(res.lastValue, V{2});
// RLE at the end is skipped. Position and last value to prior non-RLE block is returned
res = findLastNonRLE<uint64_t>(control({blockFullOne, block1RLE}));
ASSERT_EQ(res.index, 0);
ASSERT_EQ(res.lastValue, V{1});
// RLE at the end is skipped. Position and last value to prior non-RLE block is returned
res = findLastNonRLE<uint64_t>(control({block1RLE, blockFullOne, block16RLE, block1RLE}));
ASSERT_EQ(res.index, 1);
ASSERT_EQ(res.lastValue, V{1});
// Only RLE blocks returns invalid index and last value of 0
res = findLastNonRLE<uint64_t>(control({block1RLE}));
ASSERT_EQ(res.index, kInvalidIndex);
ASSERT_EQ(res.lastValue, V{0});
// Only RLE blocks returns invalid index and last value of 0
res = findLastNonRLE<uint64_t>(control({block16RLE, block1RLE}));
ASSERT_EQ(res.index, kInvalidIndex);
ASSERT_EQ(res.lastValue, V{0});
// Index parameter limits the search to before that index
res =
findLastNonRLE<uint64_t>(control({block6Skip1Two, blockFullOne, block16RLE, block1RLE}), 0);
ASSERT_EQ(res.index, 0);
ASSERT_EQ(res.lastValue, V{2});
// Index parameter limits the search to before that index
res = findLastNonRLE<uint64_t>(control({blockFullOne, block6Skip1Two, blockFullOne}), 1);
ASSERT_EQ(res.index, 1);
ASSERT_EQ(res.lastValue, V{2});
// Index parameter limits the search to before that index
res = findLastNonRLE<uint64_t>(
control({blockFullOne, block6Skip1Two, block16RLE, block1RLE, blockFullOne}), 3);
ASSERT_EQ(res.index, 1);
ASSERT_EQ(res.lastValue, V{2});
}
TEST_F(BinaryReopenTest, Overflow) {
// Helper to run the overflow detection on the OverflowState class
auto overflowHelper = [](std::vector<const char*> controls) -> OverflowPoint<uint64_t> {
ControlBlockContainer cbs;
// Generate the control block container. We can ignore the data used for the double type
// (checked in OverflowScaled below)
for (auto&& c : controls) {
cbs.push_back({c});
}
OverflowState<uint64_t> overflow(cbs.back());
return overflow.detect(cbs);
};
// Helper for a control block full of RLE
auto fullRLEControl = [&]() {
return control({block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE});
};
std::vector<const char*> controls;
// Single control without overflow
controls = {control({block5Two})};
OverflowPoint<uint64_t> point = overflowHelper(controls);
ASSERT_EQ(point.control(), controls[0]);
ASSERT_FALSE(point.overflow());
ASSERT_EQ(point.index(), kInvalidIndex);
ASSERT_EQ(point.last(), V{0}); // Last is defined as 0 when there is no overflow
ASSERT_FALSE(point.allValuesIdentical()); // this is never set unless RLE is involved
ASSERT_EQ(point.lastControl(), (uint8_t)*controls[0]);
ASSERT_EQ(point.lastControlOffset(), 0);
ASSERT_EQ(std::distance(point.remaining().begin(), point.remaining().end()), 0);
// Single control with overflow at index 0
controls = {control({block5Two, block60Zero})};
point = overflowHelper(controls);
ASSERT_EQ(point.control(), controls[0]);
ASSERT_TRUE(point.overflow());
ASSERT_EQ(point.index(), 0);
ASSERT_EQ(point.last(), V{2}); // Last value in block that caused overflow
ASSERT_FALSE(point.allValuesIdentical()); // this is never set unless RLE is involved
ASSERT_EQ(point.lastControl(), (uint8_t)*controls[0]);
ASSERT_EQ(point.lastControlOffset(), 0);
ASSERT_EQ(std::distance(point.remaining().begin(), point.remaining().end()), 0);
// Two controls with overflow in the first control at the second to last index position
controls = {control({
block5Two,
block5Two,
block5Two,
block5Two,
block5Two,
block5Two,
block5Two,
block5Two,
block5Two,
block5Two,
block5Two,
block5Two,
block5Two,
block5Two,
blockFullOne,
block5Two,
}),
control({block5Two})};
point = overflowHelper(controls);
ASSERT_EQ(point.control(), controls[0]);
ASSERT_TRUE(point.overflow());
ASSERT_EQ(point.index(), 14);
ASSERT_EQ(point.last(), V{1}); // Last value in block that caused overflow
ASSERT_FALSE(point.allValuesIdentical()); // this is never set unless RLE is involved
ASSERT_EQ(point.lastControl(), (uint8_t)*controls[0]);
ASSERT_EQ(point.lastControlOffset(), 0);
ASSERT_EQ(std::distance(point.remaining().begin(), point.remaining().end()), 1);
// Two controls with overflow in the first control at last index position is treated as no
// overflow with the second control returned.
controls = {control({block5Two,
block5Two,
block5Two,
block5Two,
block5Two,
block5Two,
block5Two,
block5Two,
block5Two,
block5Two,
block5Two,
block5Two,
block5Two,
block5Two,
block5Two,
blockFullOne}),
control({block5Two})};
point = overflowHelper(controls);
ASSERT_EQ(point.control(), controls[1]);
ASSERT_FALSE(point.overflow());
ASSERT_EQ(point.index(), kInvalidIndex);
ASSERT_EQ(point.last(), V{1}); // Last value in block that caused overflow
ASSERT_FALSE(point.allValuesIdentical()); // this is never set unless RLE is involved
ASSERT_EQ(point.lastControl(), (uint8_t)*controls[1]);
ASSERT_EQ(point.lastControlOffset(), 0);
ASSERT_EQ(std::distance(point.remaining().begin(), point.remaining().end()), 0);
// Single control with RLE only returns no overflow with last value of 0
controls = {control({block1RLE})};
point = overflowHelper(controls);
ASSERT_EQ(point.control(), controls[0]);
ASSERT_FALSE(point.overflow());
ASSERT_EQ(point.index(), kInvalidIndex);
ASSERT_EQ(point.last(), V{0}); // Last is defined as 0 when there is no overflow
ASSERT_TRUE(point.allValuesIdentical());
ASSERT_EQ(point.lastControl(), (uint8_t)*controls[0]);
ASSERT_EQ(point.lastControlOffset(), 0);
ASSERT_EQ(std::distance(point.remaining().begin(), point.remaining().end()), 0);
// Only RLE can span more than one control which yields the same result
controls = {fullRLEControl(), control({block16RLE, block1RLE})};
point = overflowHelper(controls);
ASSERT_EQ(point.control(), controls[0]);
ASSERT_FALSE(point.overflow());
ASSERT_EQ(point.index(), kInvalidIndex);
ASSERT_EQ(point.last(), V{0}); // Last is defined as 0 when there is no overflow
ASSERT_TRUE(point.allValuesIdentical());
ASSERT_EQ(point.lastControl(), (uint8_t)*controls[0]);
ASSERT_EQ(point.lastControlOffset(), 0);
ASSERT_EQ(std::distance(point.remaining().begin(), point.remaining().end()), 1);
// RLE spanning more than one control followed by blocks containing only zeros also yields the
// same result
controls = {fullRLEControl(),
fullRLEControl(),
fullRLEControl(),
control({block16RLE, block1RLE, block60Zero})};
point = overflowHelper(controls);
ASSERT_EQ(point.control(), controls[0]);
ASSERT_FALSE(point.overflow());
ASSERT_EQ(point.index(), kInvalidIndex);
ASSERT_EQ(point.last(), V{0}); // Last is defined as 0 when there is no overflow
ASSERT_TRUE(point.allValuesIdentical());
ASSERT_EQ(point.lastControl(), (uint8_t)*controls[0]);
ASSERT_EQ(point.lastControlOffset(), 0);
ASSERT_EQ(std::distance(point.remaining().begin(), point.remaining().end()), 3);
// Value followed by RLE spanning more than one control is overflow at the index before the RLE
// starts
controls = {control({block5Two,
block5Two,
blockFullOne,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE}),
control({block16RLE, block1RLE})};
point = overflowHelper(controls);
ASSERT_EQ(point.control(), controls[0]);
ASSERT_TRUE(point.overflow());
ASSERT_EQ(point.index(), 2);
ASSERT_EQ(point.last(), V{1}); // Last value in block that caused overflow
ASSERT_TRUE(point.allValuesIdentical());
ASSERT_EQ(point.lastControl(), (uint8_t)*controls[0]);
ASSERT_EQ(point.lastControlOffset(), 0);
ASSERT_EQ(std::distance(point.remaining().begin(), point.remaining().end()), 1);
// Value followed by RLE spanning more than one control is overflow at the index before the
// RLE starts as long as the value after RLE is the same as before RLE
controls = {control({block5Two,
block5Two,
blockFullOne,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE}),
control({block16RLE, block1RLE, block1One})};
point = overflowHelper(controls);
ASSERT_EQ(point.control(), controls[0]);
ASSERT_TRUE(point.overflow());
ASSERT_EQ(point.index(), 2);
ASSERT_EQ(point.last(), V{1}); // Last value in block that caused overflow
ASSERT_TRUE(point.allValuesIdentical());
ASSERT_EQ(point.lastControl(), (uint8_t)*controls[0]);
ASSERT_EQ(point.lastControlOffset(), 0);
ASSERT_EQ(std::distance(point.remaining().begin(), point.remaining().end()), 1);
// When value before RLE is different from the value after RLE the overflow happens at the
// last RLE block
controls = {control({block5Two,
block5Two,
blockFullOne,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE}),
fullRLEControl(),
control({block16RLE, block1RLE, block5Two})};
point = overflowHelper(controls);
ASSERT_EQ(point.control(), controls[2]);
ASSERT_TRUE(point.overflow());
ASSERT_EQ(point.index(), 1);
ASSERT_EQ(point.last(), V{1}); // Last value in block that caused overflow
ASSERT_FALSE(point.allValuesIdentical());
ASSERT_EQ(point.lastControl(), (uint8_t)*controls[2]);
ASSERT_EQ(point.lastControlOffset(), 0);
ASSERT_EQ(std::distance(point.remaining().begin(), point.remaining().end()), 0);
// When the stream starts with RLE but the value after RLE is not zero then the overflow happens
// at the last RLE block
controls = {fullRLEControl(), fullRLEControl(), control({block16RLE, block1RLE, block5Two})};
point = overflowHelper(controls);
ASSERT_EQ(point.control(), controls[2]);
ASSERT_TRUE(point.overflow());
ASSERT_EQ(point.index(), 1);
ASSERT_EQ(point.last(), V{0}); // Last value in block that caused overflow
ASSERT_FALSE(point.allValuesIdentical()); // this is never set unless RLE is involved
ASSERT_EQ(point.lastControl(), (uint8_t)*controls[2]);
ASSERT_EQ(point.lastControlOffset(), 0);
ASSERT_EQ(std::distance(point.remaining().begin(), point.remaining().end()), 0);
}
TEST_F(BinaryReopenTest, OverflowScaled) {
// Helper to run the overflow detection for doubles
auto overflowHelper = [](double base,
std::vector<const char*> controls) -> OverflowPoint<uint64_t> {
ControlBlockContainer cbs;
// Every control block needs to set lastAtEndOfBlock. We calculate this based on 'base' and
// the control blocks provided.
uint64_t prevNonRLE = simple8b::kSingleZero;
auto ret =
Simple8bTypeUtil::encodeDouble(base, scaleIndexForControlByte(*controls.front()));
ASSERT_TRUE(ret.has_value());
int64_t encoded = *ret;
for (auto&& c : controls) {
uint8_t scaleIndex = scaleIndexForControlByte(*c);
// Doubles uses delta encoding, so we can use a sum to get the delta for the last value.
encoded += simple8b::sum<int64_t>(
c + 1, numSimple8bBlocksForControlByte(*c) * sizeof(uint64_t), prevNonRLE);
base = Simple8bTypeUtil::decodeDouble(encoded, scaleIndex);
cbs.push_back({c, base, scaleIndex});
}
OverflowState<uint64_t> overflow(cbs.back());
return overflow.detect(cbs);
};
std::vector<const char*> controls;
// Rescale after full block is reported as no overflow in the first control with a different
// scale
controls = {control({blockFullOne,
blockFullOne,
blockFullOne,
blockFullOne,
blockFullOne,
blockFullOne,
blockFullOne,
blockFullOne,
blockFullOne,
blockFullOne,
blockFullOne,
blockFullOne,
blockFullOne,
blockFullOne,
blockFullOne,
blockFullOne},
0),
control({block5Two}, Simple8bTypeUtil::kMemoryAsInteger)};
OverflowPoint<uint64_t> point = overflowHelper(1.0, controls);
ASSERT_EQ(point.control(), controls[1]);
ASSERT_FALSE(point.overflow());
ASSERT_EQ(point.index(), kInvalidIndex);
ASSERT_EQ(point.last(), V{1});
ASSERT_FALSE(point.allValuesIdentical());
ASSERT_EQ(point.lastControl(), (uint8_t)*controls[1]);
ASSERT_EQ(point.lastControlOffset(), 0);
ASSERT_EQ(std::distance(point.remaining().begin(), point.remaining().end()), 0);
// Same but the last value before rescale is skip
controls = {control({blockFullOne,
blockFullOne,
blockFullOne,
blockFullOne,
blockFullOne,
blockFullOne,
blockFullOne,
blockFullOne,
blockFullOne,
blockFullOne,
blockFullOne,
blockFullOne,
blockFullOne,
blockFullOne,
blockFullOne,
block3One1Skip},
0),
control({block5Two}, Simple8bTypeUtil::kMemoryAsInteger)};
point = overflowHelper(1.0, controls);
ASSERT_EQ(point.control(), controls[1]);
ASSERT_FALSE(point.overflow());
ASSERT_EQ(point.index(), kInvalidIndex);
ASSERT_EQ(point.last(), V{boost::none});
ASSERT_FALSE(point.allValuesIdentical());
ASSERT_EQ(point.lastControl(), (uint8_t)*controls[1]);
ASSERT_EQ(point.lastControlOffset(), 0);
ASSERT_EQ(std::distance(point.remaining().begin(), point.remaining().end()), 0);
// Same but there are RLE before the rescale
controls = {control({blockFullOne,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE},
0),
control({block5Two}, Simple8bTypeUtil::kMemoryAsInteger)};
point = overflowHelper(1.0, controls);
ASSERT_EQ(point.control(), controls[1]);
ASSERT_FALSE(point.overflow());
ASSERT_EQ(point.index(), kInvalidIndex);
ASSERT_EQ(point.last(), V{1});
ASSERT_FALSE(point.allValuesIdentical());
ASSERT_EQ(point.lastControl(), (uint8_t)*controls[1]);
ASSERT_EQ(point.lastControlOffset(), 0);
ASSERT_EQ(std::distance(point.remaining().begin(), point.remaining().end()), 0);
// Same but there are only RLE before the rescale
controls = {control({block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE},
0),
control({block5Two}, Simple8bTypeUtil::kMemoryAsInteger)};
point = overflowHelper(1.0, controls);
ASSERT_EQ(point.control(), controls[1]);
ASSERT_FALSE(point.overflow());
ASSERT_EQ(point.index(), kInvalidIndex);
ASSERT_EQ(point.last(), V{0});
ASSERT_FALSE(point.allValuesIdentical());
ASSERT_EQ(point.lastControl(), (uint8_t)*controls[1]);
ASSERT_EQ(point.lastControlOffset(), 0);
ASSERT_EQ(std::distance(point.remaining().begin(), point.remaining().end()), 0);
// RLE can be before and after the rescale
controls = {control({blockFullOne,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE},
0),
control({block16RLE}, Simple8bTypeUtil::kMemoryAsInteger)};
point = overflowHelper(1.0, controls);
ASSERT_EQ(point.control(), controls[1]);
ASSERT_FALSE(point.overflow());
ASSERT_EQ(point.index(), kInvalidIndex);
ASSERT_EQ(point.last(), V{1});
ASSERT_FALSE(point.allValuesIdentical());
ASSERT_EQ(point.lastControl(), (uint8_t)*controls[1]);
ASSERT_EQ(point.lastControlOffset(), 0);
ASSERT_EQ(std::distance(point.remaining().begin(), point.remaining().end()), 0);
// Block before rescale is not full but it is not possible to scale the first value with scale
// factor kMemoryAsInteger with scale factor 0 so we also treat this as a no overflow but return
// the first control after the rescale.
controls = {control({blockFullOne, blockFullOne, blockFullOne}, 0),
control({block5Two}, Simple8bTypeUtil::kMemoryAsInteger)};
point = overflowHelper(1.0, controls);
ASSERT_EQ(point.control(), controls[1]);
ASSERT_FALSE(point.overflow());
ASSERT_EQ(point.index(), kInvalidIndex);
ASSERT_EQ(point.last(), V{1});
ASSERT_FALSE(point.allValuesIdentical());
ASSERT_EQ(point.lastControl(), (uint8_t)*controls[1]);
ASSERT_EQ(point.lastControlOffset(), 0);
ASSERT_EQ(std::distance(point.remaining().begin(), point.remaining().end()), 0);
// Last value before rescale can be scaled with the next scale factor and all values fit in
// pending without causing overflow. We then report the first control with a binary offset to
// the control byte after the scaling.
controls = {control({blockFullOne, blockFullOne, blockFullOne}, 1), control({block5Two}, 0)};
point = overflowHelper(1.0, controls);
ASSERT_EQ(point.control(), controls[0]);
ASSERT_TRUE(point.overflow());
ASSERT_EQ(point.index(), 2);
ASSERT_EQ(point.last(), V{1});
ASSERT_FALSE(point.allValuesIdentical());
ASSERT_EQ(point.lastControl(), (uint8_t)*controls[1]);
ASSERT_EQ(point.lastControlOffset(),
numSimple8bBlocksForControlByte(*controls[0]) * sizeof(uint64_t) + 1);
ASSERT_EQ(std::distance(point.remaining().begin(), point.remaining().end()), 1);
// Like above but we have a large amount of RLE after the rescale. The result is basically the
// same, but we report a larger offset and more values remaining.
controls = {control({blockFullOne, blockFullOne, blockFullOne}, 1),
control({block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE,
block16RLE},
0),
control({block16RLE, block16RLE, blockFullOne}, 0)};
point = overflowHelper(1.0, controls);
ASSERT_EQ(point.control(), controls[0]);
ASSERT_TRUE(point.overflow());
ASSERT_EQ(point.index(), 2);
ASSERT_EQ(point.last(), V{1});
ASSERT_FALSE(point.allValuesIdentical()); // Values are not identical even if we have a large
// amont of RLE because the scaling is different
ASSERT_EQ(point.lastControl(), (uint8_t)*controls[2]);
ASSERT_EQ(point.lastControlOffset(),
(numSimple8bBlocksForControlByte(*controls[0]) + kMaxNumSimple8bPerControl) *
sizeof(uint64_t) +
2); // binary offset to the third control byte
ASSERT_EQ(std::distance(point.remaining().begin(), point.remaining().end()), 2);
// Same but with RLE on both sides of the scaling
controls = {control({blockFullOne, blockFullOne, block16RLE}, 1), control({block1RLE}, 0)};
point = overflowHelper(1.0, controls);
ASSERT_EQ(point.control(), controls[0]);
ASSERT_TRUE(point.overflow());
ASSERT_EQ(point.index(), 2);
ASSERT_EQ(point.last(), V{1});
ASSERT_FALSE(point.allValuesIdentical()); // Values are not identical even if we have a large
// amont of RLE because the scaling is different
ASSERT_EQ(point.lastControl(), (uint8_t)*controls[1]);
ASSERT_EQ(point.lastControlOffset(),
numSimple8bBlocksForControlByte(*controls[0]) * sizeof(uint64_t) +
1); // binary offset to the second control byte
ASSERT_EQ(std::distance(point.remaining().begin(), point.remaining().end()), 1);
// Last value before rescale can be scaled with the next scale factor but all values cannot fit
// in pending without causing overflow. This case is also treated as no overflow
controls = {control({blockFullOne, blockFullOne, block5Two}, 1),
control({blockFullOne, block1One}, 0)};
point = overflowHelper(1.0, controls);
ASSERT_EQ(point.control(), controls[1]);
ASSERT_FALSE(point.overflow());
ASSERT_EQ(point.index(), kInvalidIndex);
ASSERT_EQ(point.last(), V{2});
ASSERT_FALSE(point.allValuesIdentical());
ASSERT_EQ(point.lastControl(), (uint8_t)*controls[1]);
ASSERT_EQ(point.lastControlOffset(), 0);
ASSERT_EQ(std::distance(point.remaining().begin(), point.remaining().end()), 0);
}
} // namespace mongo::bsoncolumn::internal

View File

@ -102,13 +102,10 @@ extern "C" int LLVMFuzzerTestOneInput(const char* Data, size_t Size) {
<< base64::encode(diff.data(), diff.size()));
// Verify binary reopen gives identical state as intermediate
// TODO SERVER-100659: Uncomment this after reopen bug is fixed
/*
BSONColumnBuilder reopen(diff.data(), diff.size());
invariant(builder.isInternalStateIdentical(reopen),
str::stream() << "Binary reopen does not yield equivalent state. Column: "
<< base64::encode(diff.data(), diff.size()));
*/
return 0;
}

View File

@ -30,42 +30,10 @@
#include "mongo/bson/bson_validate.h"
#include "mongo/bson/bsonelement.h"
#include "mongo/bson/column/bsoncolumn.h"
#include "mongo/bson/column/bsoncolumnbuilder.h"
#include "mongo/bson/util/bsonobj_traversal.h"
#include "mongo/util/base64.h"
// Returns true if the binary contains interleaved data. This function just scans the binary for an
// interleaved start control byte, it does no validation nor decompression.
static bool isDataInterleaved(const char* binary, size_t size) {
using namespace mongo;
const char* pos = binary;
const char* end = binary + size;
while (pos != end) {
uint8_t control = *pos;
if (control == stdx::to_underlying(BSONType::eoo)) {
// Reached the end of the binary.
return false;
}
if (bsoncolumn::isInterleavedStartControlByte(control)) {
return true;
}
if (bsoncolumn::isUncompressedLiteralControlByte(control)) {
// Scan over the entire literal.
BSONElement literal(pos, 1, BSONElement::TrustedInitTag{});
pos += literal.size();
continue;
}
// If there are no control bytes, scan over the simple8b block.
uint8_t size = bsoncolumn::numSimple8bBlocksForControlByte(control) * sizeof(uint64_t);
pos += size + 1;
}
return false;
};
// There are two decoding APIs. For all data that pass validation, both decoder implementations
// must produce the same results.
extern "C" int LLVMFuzzerTestOneInput(const char* Data, size_t Size) {
@ -84,6 +52,7 @@ extern "C" int LLVMFuzzerTestOneInput(const char* Data, size_t Size) {
std::vector<BSONElement> blockBasedElems = {};
std::string blockBasedError;
std::string iteratorError;
std::string reopenError;
// Attempt to decompress using the block-based API.
try {
@ -102,14 +71,23 @@ extern "C" int LLVMFuzzerTestOneInput(const char* Data, size_t Size) {
iteratorError = e.toString();
}
// If one API failed, then both APIs must fail.
if (!iteratorError.empty() || !blockBasedError.empty()) {
invariant(!(iteratorError.empty() || blockBasedError.empty()),
// Attempt to reopen using the reopen API.
try {
BSONColumnBuilder(Data, Size);
} catch (const DBException& e) {
reopenError = e.toString();
}
// If one API failed, then all APIs must fail.
if (!iteratorError.empty() || !blockBasedError.empty() || !reopenError.empty()) {
invariant(!(iteratorError.empty() || blockBasedError.empty() || reopenError.empty()),
str::stream() << "For the input: " << base64::encode(StringData(Data, Size))
<< ". Iterator API returned "
<< (iteratorError.empty() ? "results" : iteratorError)
<< ". The block based API returned "
<< (blockBasedError.empty() ? "results" : blockBasedError));
<< (blockBasedError.empty() ? "results" : blockBasedError)
<< ". The reopen API returned "
<< (reopenError.empty() ? "results" : reopenError));
return 0;
}

View File

@ -1331,7 +1331,7 @@ TEST_F(BSONColumnTest, BuilderFuzzerReopenDiscoveredEdgeCases) {
//
std::vector<StringData> binariesBase64 = {
// Pending fix of SERVER-100659
// "gPz/////////CAAAgP7/////////AQAAAAAAAAAAYI/OxcXFxcXFAQ4AAAAAAAAB7uLi4uLi4gAuHR0dHR2dAI5xcXFxcXEAjnFxcXFxcQCOcXFxcXFxAK6rq6urq2sAzri4uLi4OADOuLi4uLg4AM64uLi4uDgAzri4uLi4OADOuLi4uLg4AM64uLi4uDgAzri4uLi4OADOuLi4uLg4AI9ulpaWlpY2AG5cXFxcXBwAblxcXFxcHABuXFxcXFwcAG5cXFxcXBwAblxcXFxcHABuXFxcXFwcAG5cXFxcXBwAblxcXFxcHABuXFxcXFwcAG5cXFxcXBwAblxcXFxcHABuXFxcXFwcAG5cXFxcXBwAblxcXFxcHABuXFxcXFwcAI9uXFxcXFwcAG5cXFxcXBwA7gsMDAwMHAAuLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAI8uLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAI8uLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAK6wr6+vrwcALhcXFxcXBwAuFxcXFxcHAC4XFxcXFwcALhcXFxcXBwAuFxcXFxcHAC4XFxcXFwcALhcXFxcXBwAuFxcXFxcHAI8uFxcXFxcHAC4XFxcXFwcALhcXFxcXBwAuFxcXFxcHAC4XFxcXFwcALhcXFxcXBwAuFxcXFxcHAC4XFxcXFwcALhcXFxcXBwAuFxcXFxcHAC4XFxcXFwcALhcXFxcXBwAuFxcXFxcHAC4XFxcXFwcALhcXFxcXBwAuFxcXFxcHAIYuFxcXFxcHAC4XFxcXFwcALhcXFxcXBwAuFxcXFxcHAC4XFxcXFwcALhcXFxcXBwAuFxcXFxcHAAA="_sd,
"gPz/////////CAAAgP7/////////AQAAAAAAAAAAYI/OxcXFxcXFAQ4AAAAAAAAB7uLi4uLi4gAuHR0dHR2dAI5xcXFxcXEAjnFxcXFxcQCOcXFxcXFxAK6rq6urq2sAzri4uLi4OADOuLi4uLg4AM64uLi4uDgAzri4uLi4OADOuLi4uLg4AM64uLi4uDgAzri4uLi4OADOuLi4uLg4AI9ulpaWlpY2AG5cXFxcXBwAblxcXFxcHABuXFxcXFwcAG5cXFxcXBwAblxcXFxcHABuXFxcXFwcAG5cXFxcXBwAblxcXFxcHABuXFxcXFwcAG5cXFxcXBwAblxcXFxcHABuXFxcXFwcAG5cXFxcXBwAblxcXFxcHABuXFxcXFwcAI9uXFxcXFwcAG5cXFxcXBwA7gsMDAwMHAAuLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAI8uLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAI8uLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAC4uLi4uLg4ALi4uLi4uDgAuLi4uLi4OAK6wr6+vrwcALhcXFxcXBwAuFxcXFxcHAC4XFxcXFwcALhcXFxcXBwAuFxcXFxcHAC4XFxcXFwcALhcXFxcXBwAuFxcXFxcHAI8uFxcXFxcHAC4XFxcXFwcALhcXFxcXBwAuFxcXFxcHAC4XFxcXFwcALhcXFxcXBwAuFxcXFxcHAC4XFxcXFwcALhcXFxcXBwAuFxcXFxcHAC4XFxcXFwcALhcXFxcXBwAuFxcXFxcHAC4XFxcXFwcALhcXFxcXBwAuFxcXFxcHAIYuFxcXFxcHAC4XFxcXFwcALhcXFxcXBwAuFxcXFxcHAC4XFxcXFwcALhcXFxcXBwAuFxcXFxcHAAA="_sd,
};
for (auto&& binaryBase64 : binariesBase64) {

View File

@ -40,6 +40,9 @@ static constexpr char kInterleavedStartControlByte = (char)0xF1;
static constexpr char kInterleavedStartArrayRootControlByte = (char)0xF2;
static constexpr uint8_t kInvalidScaleIndex = 0xFF;
static constexpr uint8_t kInvalidControlByte = 0xFE;
static constexpr uint8_t kMaxNumSimple8bPerControl = 16;
static constexpr std::array<uint8_t, Simple8bTypeUtil::kMemoryAsInteger + 1>
kControlByteForScaleIndex = {0x90, 0xA0, 0xB0, 0xC0, 0xD0, 0x80};
inline bool isUncompressedLiteralControlByte(uint8_t control) {
return (control & 0xE0) == 0 || control == (uint8_t)stdx::to_underlying(BSONType::minKey) ||

File diff suppressed because it is too large Load Diff

View File

@ -269,9 +269,6 @@ struct EncodingState {
F controlBlockWriter,
const Allocator&);
void _initializeFromPrevious(const Allocator&);
template <class F>
ptrdiff_t _incrementSimple8bCount(allocator_aware::BufBuilder<Allocator>& buffer,
F controlBlockWriter);
// Encoders for 64bit and 128bit types.
std::variant<Encoder64, Encoder128> _encoder;
@ -521,7 +518,7 @@ private:
int lastBufLength = 0;
// Finalized state of last control byte written out by the previous intermediate() call.
uint8_t lastControl;
uint8_t lastControlOffset = 0;
uint16_t lastControlOffset = 0;
};
// Internal helper to perform reopen/initialization of this class from a BSONColumn binary.

View File

@ -352,6 +352,9 @@ static constexpr uint64_t kSingleSkip = 0xFFFFFFFFFFFFFFFE;
// Constant for a simple8b block containing a single zero value.
static constexpr uint64_t kSingleZero = 0xE;
// Constant for an invalid simple8b block, trying to read this will throw.
static constexpr uint64_t kInvalidSimple8b = 0;
/**
* Visits all values in sequence with provided callbacks
* visit - a callback for receiving all non-missing values (including 0)
@ -382,6 +385,13 @@ MONGO_COMPILER_ALWAYS_INLINE_GCC14 inline size_t visitAll(const char* buffer,
*/
inline size_t count(const char* buffer, size_t size);
/**
* Returns the last value (can be missing) over multiple Simple8b blocks. If called with unsigned T
* it returns the encoded value in this slot. If called with signed T it returns the decoded value.
*/
template <typename T>
boost::optional<T> last(const char* buffer, size_t size, uint64_t& prevNonRLE);
/**
* Calculates the sum for multiple simple8b blocks in a buffer. 'prevNonRLE' should be initialized
* to 'kSingleSkip' when calculating sum for the first buffer. If the caller needs sum from multiple

View File

@ -103,12 +103,20 @@ struct SimpleDecoder {
}
// Returns value of last slot. 'kMissing' is returned for missing.
static int64_t last(uint64_t encoded) {
static int64_t lastDecoded(uint64_t encoded) {
encoded >>= (bits * (iters - 1));
if (encoded == mask)
return kMissing;
return Simple8bTypeUtil::decodeInt64(encoded);
}
// Returns value of last slot. 'kMissing' is returned for missing.
static uint64_t lastEncoded(uint64_t encoded) {
encoded >>= (bits * (iters - 1));
if (encoded == mask)
return kMissing;
return encoded;
}
};
// Table-based decoder that uses a lookup table for decoding unsigned integers into signed. Suitable
@ -212,7 +220,7 @@ struct TableDecoder {
}
// Returns value of last slot. 'kMissing' is returned for missing.
int64_t last(uint64_t encoded) const {
int64_t lastDecoded(uint64_t encoded) const {
encoded >>= (bits * (iters - 1));
const auto& entry = table[encoded];
if (!entry.num) {
@ -220,6 +228,15 @@ struct TableDecoder {
}
return entry.decoded;
}
uint64_t lastEncoded(uint64_t encoded) const {
encoded >>= (bits * (iters - 1));
const auto& entry = table[encoded];
if (!entry.num) {
return kMissing;
}
return encoded;
}
};
// Table-based decoder that uses a lookup table for decoding multiple unsigned integers into signed
@ -510,7 +527,7 @@ struct ExtendedDecoder {
// Returns value of last slot. 'kMissing' is returned for missing.
template <typename T>
T last(uint64_t encoded) const {
T lastDecoded(uint64_t encoded) const {
encoded >>= (bits * (iters - 1));
if ((encoded & mask) == mask)
return kMissing;
@ -525,6 +542,24 @@ struct ExtendedDecoder {
return Simple8bTypeUtil::decodeInt(value << numZeroes);
}
// Returns value of last slot. 'kMissing' is returned for missing.
template <typename T>
T lastEncoded(uint64_t encoded) const {
encoded >>= (bits * (iters - 1));
if ((encoded & mask) == mask)
return kMissing;
uint64_t count = encoded & countMask;
T value = (encoded >> countBits) & valueMask;
auto numZeroes = count * countScale;
// UBSAN will complain if shift values are greater than bit length
if constexpr (std::is_same<T, uint64_t>::value) {
numZeroes %= 64;
}
return value << numZeroes;
}
};
// Storage for all decoders that we need for our various selector types
@ -672,55 +707,57 @@ T decodeLastSlotIgnoreSkip(uint64_t encoded) {
case 15:
break;
default:
uasserted(10065906, "Bad selector");
break;
}
return 0;
}
template <typename T>
T decodeLastSlot(uint64_t encoded) {
T lastDecoded(uint64_t encoded) {
auto selector = encoded & simple8b_internal::kBaseSelectorMask;
encoded >>= 4;
switch (selector) {
case 1:
// Encoded and decoded value is the same for the 1 bit case
return decoder1.last(encoded);
case 2:
return decoder2.last(encoded);
return decoder2.lastDecoded(encoded);
case 3:
return decoder3.last(encoded);
return decoder3.lastDecoded(encoded);
case 4:
return decoder4.last(encoded);
return decoder4.lastDecoded(encoded);
case 5:
return decoder5.last(encoded);
return decoder5.lastDecoded(encoded);
case 6:
return decoder6.last(encoded);
return decoder6.lastDecoded(encoded);
case 7: {
auto extended = encoded & simple8b_internal::kBaseSelectorMask;
encoded >>= 4;
switch (extended) {
case 0:
return decoder7.last(encoded);
return decoder7.lastDecoded(encoded);
case 1:
return decoderExtended7_1.last<T>(encoded);
return decoderExtended7_1.lastDecoded<T>(encoded);
case 2:
return decoderExtended7_2.last<T>(encoded);
return decoderExtended7_2.lastDecoded<T>(encoded);
case 3:
return decoderExtended7_3.last<T>(encoded);
return decoderExtended7_3.lastDecoded<T>(encoded);
case 4:
return decoderExtended7_4.last<T>(encoded);
return decoderExtended7_4.lastDecoded<T>(encoded);
case 5:
return decoderExtended7_5.last<T>(encoded);
return decoderExtended7_5.lastDecoded<T>(encoded);
case 6:
return decoderExtended7_6.last<T>(encoded);
return decoderExtended7_6.lastDecoded<T>(encoded);
case 7:
return decoderExtended7_7.last<T>(encoded);
return decoderExtended7_7.lastDecoded<T>(encoded);
case 8:
return decoderExtended7_8.last<T>(encoded);
return decoderExtended7_8.lastDecoded<T>(encoded);
case 9:
return decoderExtended7_9.last<T>(encoded);
return decoderExtended7_9.lastDecoded<T>(encoded);
default:
invariant(false); // invalid encoding
uasserted(10065900, "Bad extended selector");
break;
}
break;
@ -730,54 +767,163 @@ T decodeLastSlot(uint64_t encoded) {
encoded >>= 4;
switch (extended) {
case 0:
return decoder8.last(encoded);
return decoder8.lastDecoded(encoded);
case 1:
return decoderExtended8_1.last<T>(encoded);
return decoderExtended8_1.lastDecoded<T>(encoded);
case 2:
return decoderExtended8_2.last<T>(encoded);
return decoderExtended8_2.lastDecoded<T>(encoded);
case 3:
return decoderExtended8_3.last<T>(encoded);
return decoderExtended8_3.lastDecoded<T>(encoded);
case 4:
return decoderExtended8_4.last<T>(encoded);
return decoderExtended8_4.lastDecoded<T>(encoded);
case 5:
return decoderExtended8_5.last<T>(encoded);
return decoderExtended8_5.lastDecoded<T>(encoded);
case 6:
return decoderExtended8_6.last<T>(encoded);
return decoderExtended8_6.lastDecoded<T>(encoded);
case 7:
return decoderExtended8_7.last<T>(encoded);
return decoderExtended8_7.lastDecoded<T>(encoded);
case 8:
return decoderExtended8_8.last<T>(encoded);
return decoderExtended8_8.lastDecoded<T>(encoded);
case 9:
return decoderExtended8_9.last<T>(encoded);
return decoderExtended8_9.lastDecoded<T>(encoded);
case 10:
return decoderExtended8_10.last<T>(encoded);
return decoderExtended8_10.lastDecoded<T>(encoded);
case 11:
return decoderExtended8_11.last<T>(encoded);
return decoderExtended8_11.lastDecoded<T>(encoded);
case 12:
return decoderExtended8_12.last<T>(encoded);
return decoderExtended8_12.lastDecoded<T>(encoded);
case 13:
return decoderExtended8_13.last<T>(encoded);
return decoderExtended8_13.lastDecoded<T>(encoded);
default:
invariant(false); // invalid encoding
uasserted(10065901, "Bad extended selector");
break;
}
break;
}
case 9:
return decoder10.last(encoded);
return decoder10.lastDecoded(encoded);
case 10:
return decoder12.last(encoded);
return decoder12.lastDecoded(encoded);
case 11:
return decoder15.last(encoded);
return decoder15.lastDecoded(encoded);
case 12:
return decoder20.last(encoded);
return decoder20.lastDecoded(encoded);
case 13:
return decoder30.last(encoded);
return decoder30.lastDecoded(encoded);
case 14:
return decoder60.last(encoded);
return decoder60.lastDecoded(encoded);
case 15:
break;
default:
uasserted(10065905, "Bad selector");
break;
}
return 0;
}
template <typename T>
T lastEncoded(uint64_t encoded) {
auto selector = encoded & simple8b_internal::kBaseSelectorMask;
encoded >>= 4;
switch (selector) {
case 1:
// Encoded and decoded value is the same for the 1 bit case
return decoder1.last(encoded);
case 2:
return decoder2.lastEncoded(encoded);
case 3:
return decoder3.lastEncoded(encoded);
case 4:
return decoder4.lastEncoded(encoded);
case 5:
return decoder5.lastEncoded(encoded);
case 6:
return decoder6.lastEncoded(encoded);
case 7: {
auto extended = encoded & simple8b_internal::kBaseSelectorMask;
encoded >>= 4;
switch (extended) {
case 0:
return decoder7.lastEncoded(encoded);
case 1:
return decoderExtended7_1.lastEncoded<T>(encoded);
case 2:
return decoderExtended7_2.lastEncoded<T>(encoded);
case 3:
return decoderExtended7_3.lastEncoded<T>(encoded);
case 4:
return decoderExtended7_4.lastEncoded<T>(encoded);
case 5:
return decoderExtended7_5.lastEncoded<T>(encoded);
case 6:
return decoderExtended7_6.lastEncoded<T>(encoded);
case 7:
return decoderExtended7_7.lastEncoded<T>(encoded);
case 8:
return decoderExtended7_8.lastEncoded<T>(encoded);
case 9:
return decoderExtended7_9.lastEncoded<T>(encoded);
default:
uasserted(10065902, "Bad extended selector");
break;
}
break;
}
case 8: {
auto extended = encoded & simple8b_internal::kBaseSelectorMask;
encoded >>= 4;
switch (extended) {
case 0:
return decoder8.lastEncoded(encoded);
case 1:
return decoderExtended8_1.lastEncoded<T>(encoded);
case 2:
return decoderExtended8_2.lastEncoded<T>(encoded);
case 3:
return decoderExtended8_3.lastEncoded<T>(encoded);
case 4:
return decoderExtended8_4.lastEncoded<T>(encoded);
case 5:
return decoderExtended8_5.lastEncoded<T>(encoded);
case 6:
return decoderExtended8_6.lastEncoded<T>(encoded);
case 7:
return decoderExtended8_7.lastEncoded<T>(encoded);
case 8:
return decoderExtended8_8.lastEncoded<T>(encoded);
case 9:
return decoderExtended8_9.lastEncoded<T>(encoded);
case 10:
return decoderExtended8_10.lastEncoded<T>(encoded);
case 11:
return decoderExtended8_11.lastEncoded<T>(encoded);
case 12:
return decoderExtended8_12.lastEncoded<T>(encoded);
case 13:
return decoderExtended8_13.lastEncoded<T>(encoded);
default:
uasserted(10065903, "Bad extended selector");
break;
}
break;
}
case 9:
return decoder10.lastEncoded(encoded);
case 10:
return decoder12.lastEncoded(encoded);
case 11:
return decoder15.lastEncoded(encoded);
case 12:
return decoder20.lastEncoded(encoded);
case 13:
return decoder30.lastEncoded(encoded);
case 14:
return decoder60.lastEncoded(encoded);
case 15:
break;
default:
uasserted(10065904, "Bad selector");
break;
}
return 0;
@ -925,7 +1071,7 @@ MONGO_COMPILER_ALWAYS_INLINE_GCC14 inline size_t decodeAndVisit(uint64_t encoded
return decoder60.visitAll<T>(encoded, visit, visitZero, visitMissing);
break;
case simple8b_internal::kRleSelector: {
const T lastValue = decodeLastSlot<T>(*prevNonRLE);
const T lastValue = lastDecoded<T>(*prevNonRLE);
size_t count = ((encoded & 0xf) + 1) * simple8b_internal::kRleMultiplier;
if (lastValue == kMissing) {
for (size_t i = 0; i < count; ++i) {
@ -1168,7 +1314,7 @@ T decodeAndPrefixSum(uint64_t encoded, T& prefix, uint64_t* prevNonRLE) {
case 14:
return decoder60.prefixSum<T>(encoded, prefix);
case simple8b_internal::kRleSelector: {
T last = decodeLastSlot<T>(*prevNonRLE);
T last = lastDecoded<T>(*prevNonRLE);
if (last == kMissing)
return 0;
@ -1229,6 +1375,29 @@ inline size_t count(const char* buffer, size_t size) {
return numElements;
}
template <typename T>
boost::optional<T> last(const char* buffer, size_t size, uint64_t& prevNonRLE) {
invariant(size % 8 == 0);
const char* end = buffer + size;
while (buffer != end) {
uint64_t encoded = ConstDataView(buffer).read<LittleEndian<uint64_t>>();
auto selector = encoded & simple8b_internal::kBaseSelectorMask;
if (selector != simple8b_internal::kRleSelector) {
prevNonRLE = encoded;
}
buffer += sizeof(uint64_t);
}
if constexpr (std::is_same_v<T, uint64_t> || std::is_same_v<T, uint128_t>) {
T encoded = lastEncoded<T>(prevNonRLE);
return encoded == kMissing ? boost::optional<T>{} : boost::optional<T>{encoded};
} else {
T decoded = lastDecoded<T>(prevNonRLE);
return decoded == kMissing ? boost::optional<T>{} : boost::optional<T>{decoded};
}
}
template <typename T>
T sum(const char* buffer, size_t size, uint64_t& prevNonRLE) {
invariant(size % 8 == 0);

View File

@ -71,6 +71,7 @@ public:
// Callback to handle writing of finalized Simple-8b blocks. Machine Endian byte order, the
// value need to be converted to Little Endian before persisting.
explicit Simple8bBuilder(Allocator = {});
Simple8bBuilder(boost::optional<T> val, int64_t num, Allocator = {});
~Simple8bBuilder();
Simple8bBuilder(const Simple8bBuilder&) = default;
@ -79,6 +80,20 @@ public:
Simple8bBuilder& operator=(const Simple8bBuilder&) = default;
Simple8bBuilder& operator=(Simple8bBuilder&&) = default;
/**
* Returns the allocator used by this Simple8bBuilder.
*/
Allocator allocator() const {
// There is a bug in the version of MSVC we are using that fails to perform this cast when
// the allocator is std::allocator<void>. It is a stateless allocator so we just return a
// new instance as a workaround.
if constexpr (std::is_same_v<Allocator, std::allocator<void>>) {
return Allocator{};
} else {
return _pendingValues.get_allocator();
}
}
/**
* Appends a multiple missing value to Simple8b. Should be called before any other values are
* appended. This is intended to be used to initialize a new builder with a large series of
@ -175,17 +190,6 @@ public:
*/
bool rlePossible() const;
/**
* Forcibly set last value so future append/skip calls may use this to construct RLE. This
* should not be called in normal operation.
*/
void setLastForRLE(boost::optional<T> val);
/**
* Reset RLE state on the last value, if needed. This should not be called in normal operation.
*/
void resetLastForRLEIfNeeded();
/**
* Initialize RLE state from another builder
*/
@ -499,6 +503,20 @@ bool Simple8bBuilder<T, Allocator>::PendingIterator::operator!=(
template <typename T, class Allocator>
Simple8bBuilder<T, Allocator>::Simple8bBuilder(Allocator allocator) : _pendingValues(allocator) {}
template <typename T, class Allocator>
Simple8bBuilder<T, Allocator>::Simple8bBuilder(boost::optional<T> val,
int64_t num,
Allocator allocator)
: _rleCount(num), _lastValueInPrevWord(val), _pendingValues(allocator) {
if (val) {
auto pendingValue = _calculatePendingValue(*val);
invariant(pendingValue);
invariant(_doesIntegerFitInCurrentWord(*pendingValue));
}
_lastValidExtensionType = 0;
isSelectorPossible.fill(true);
}
template <typename T, class Allocator>
Simple8bBuilder<T, Allocator>::~Simple8bBuilder() = default;
@ -584,23 +602,6 @@ void Simple8bBuilder<T, Allocator>::flush(F&& writeFn) {
}
}
template <typename T, class Allocator>
void Simple8bBuilder<T, Allocator>::setLastForRLE(boost::optional<T> val) {
_lastValueInPrevWord = val;
if (val) {
auto pendingValue = _calculatePendingValue(*val);
invariant(pendingValue);
invariant(_doesIntegerFitInCurrentWord(*pendingValue));
}
}
template <typename T, class Allocator>
void Simple8bBuilder<T, Allocator>::resetLastForRLEIfNeeded() {
if (!rlePossible()) {
_lastValueInPrevWord = 0;
}
}
template <typename T, class Allocator>
void Simple8bBuilder<T, Allocator>::initializeRLEFrom(const Simple8bBuilder<T, Allocator>& other) {
if (other.rlePossible()) {

View File

@ -31,6 +31,7 @@
#include "mongo/bson/column/simple8b.h"
#include "mongo/bson/column/simple8b_type_util.h"
#include "mongo/logv2/log.h"
#include "mongo/platform/int128.h"
#include "mongo/util/hex.h"
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kDefault
@ -39,6 +40,11 @@ static constexpr int128_t add(int128_t lhs, int128_t rhs) {
return static_cast<int128_t>(static_cast<uint128_t>(lhs) + static_cast<uint128_t>(rhs));
}
struct LastResult {
boost::optional<uint128_t> encoded;
boost::optional<int128_t> decoded;
};
extern "C" int LLVMFuzzerTestOneInput(const char* Data, size_t Size) {
using namespace mongo;
@ -65,16 +71,45 @@ extern "C" int LLVMFuzzerTestOneInput(const char* Data, size_t Size) {
}
}();
auto oldLast = [&]() -> boost::optional<LastResult> {
try {
boost::optional<uint128_t> last = uint128_t{0};
Simple8b<uint128_t> s8b(Data, bufferSize);
for (auto&& val : s8b) {
last = val;
}
if (last) {
return LastResult{last, Simple8bTypeUtil::decodeInt(*last)};
} else {
return LastResult{boost::optional<uint128_t>(boost::none),
boost::optional<int128_t>(boost::none)};
}
} catch (const DBException&) {
return boost::none;
}
}();
auto sum = [&]() -> boost::optional<int128_t> {
try {
uint64_t prev =
0xE; // Previous value 0, this is one simple8b value containing a zero.
uint64_t prev = simple8b::kSingleZero;
return simple8b::sum<int128_t>(Data, bufferSize, prev);
} catch (const DBException&) {
return boost::none;
}
}();
auto last = [&]() -> boost::optional<LastResult> {
try {
uint64_t prev1 = simple8b::kSingleZero;
uint64_t prev2 = simple8b::kSingleZero;
return LastResult{simple8b::last<uint128_t>(Data, bufferSize, prev1),
simple8b::last<int128_t>(Data, bufferSize, prev2)};
} catch (const DBException&) {
return boost::none;
}
}();
if (sum != oldSum) {
LOGV2_DEBUG(8384500,
2,
@ -91,6 +126,11 @@ extern "C" int LLVMFuzzerTestOneInput(const char* Data, size_t Size) {
// (as they'd lead to crashes), while using edge cases leading to interesting control flow
// paths in both implementations.
invariant(sum == oldSum);
// simple8b::last is not required to decode everything so an invalid binary might not throw.
if (last && oldLast) {
invariant(last->encoded == oldLast->encoded);
invariant(last->decoded == oldLast->decoded);
}
}

View File

@ -119,18 +119,34 @@ void testSimple8b(const std::vector<boost::optional<T>>& expectedValues,
assertValuesEqual(s8b, expectedValues);
make_signed_t<T> sum = 0;
boost::optional<T> last = T{0};
for (auto&& val : expectedValues) {
if (val) {
sum = add(sum, Simple8bTypeUtil::decodeInt(*val));
}
last = val;
}
uint64_t prev = 0xE; // Tests in this file assume that the previous value was '0'. This is
// different semantics from BSONColumn.
uint64_t prev = simple8b::kSingleZero;
auto s = simple8b::sum<make_signed_t<T>>(
reinterpret_cast<const char*>(expectedBinary.data()), expectedBinary.size(), prev);
ASSERT_EQ(s, sum);
// Test last
prev = simple8b::kSingleZero;
ASSERT_EQ(last,
simple8b::last<T>(reinterpret_cast<const char*>(expectedBinary.data()),
expectedBinary.size(),
prev));
if (last.has_value()) {
prev = simple8b::kSingleZero;
ASSERT_EQ(
Simple8bTypeUtil::decodeInt(*last),
simple8b::last<make_signed_t<T>>(
reinterpret_cast<const char*>(expectedBinary.data()), expectedBinary.size(), prev));
}
auto testPrefixSum = [&](auto prefix) {
make_signed_t<T> sum = prefix;
make_signed_t<T> prefixSum = 0;
@ -142,8 +158,7 @@ void testSimple8b(const std::vector<boost::optional<T>>& expectedValues,
}
}
uint64_t prev = 0xE; // Tests in this file assume that the previous value was '0'. This is
// different semantics from BSONColumn.
uint64_t prev = simple8b::kSingleZero;
auto ps = simple8b::prefixSum<make_signed_t<T>>(
reinterpret_cast<const char*>(expectedBinary.data()),
expectedBinary.size(),