SERVER-121534 [v8.3] Make geo matcher expressions 2dsphereIndex version aware (#50415)

GitOrigin-RevId: 344e4c482a4fcbacc4f0c9746825d05d16ba88ac
This commit is contained in:
Finley Lau 2026-03-26 12:49:39 -05:00 committed by MongoDB Bot
parent cd5753c295
commit b59bd51a17
26 changed files with 1143 additions and 53 deletions

View File

@ -37,6 +37,9 @@ selector:
- jstests/core/query/release_memory/group.js
# Running analyze on a query from this test creates a BSON document that is larger than 16 MB
- jstests/core/index/geo/geo_circle_spilling.js
# TODO SERVER-121749: CBR hits MONGO_UNREACHABLE_TASSERT(5837103) when estimating a plan whose
# FetchNode filter contains $_internalBucketGeoWithin.
- jstests/core/timeseries/geo/timeseries_2dsphere_index_version_matcher_consistency.js
# TODO SERVER-100451: $elemMatch estimation using histograms
- jstests/core/query/boolean_expression_simplification.js

View File

@ -0,0 +1,250 @@
/**
* Tests that the geo matchers use the same 2dsphere index version semantics as the query's index
* when filtering buckets. This prevents index-returned buckets from being incorrectly discarded
* when control min/max are parsed with a different geometry type order.
*
* Background: For object-type geo elements that can be parsed as both legacy point and GeoJSON,
* V3 tries legacy first; V4 tries GeoJSON first. If the matcher used the wrong version, a bucket
* correctly returned by the index could be filtered out (or vice versa). This test uses an
* "ambiguous" document {x: 0, y: 0, type: "Point", coordinates: [10, 10]} which parses as (0,0)
* under V3 and (10,10) under V4.
*
* Also confirms the geo matcher works without an index (collection scan path uses default V4
* semantics when no index version is available).
*
* @tags: [
* does_not_support_stepdowns,
* requires_pipeline_optimization,
* requires_timeseries,
* featureFlag2dsphereIndexVersion4,
* cannot_run_during_upgrade_downgrade,
* multiversion_incompatible,
* ]
*/
import {getAggPlanStage} from "jstests/libs/query/analyze_plan.js";
import {describe, it, before} from "jstests/libs/mochalite.js";
const timeFieldName = "time";
const metaFieldName = "meta";
// Polygon that contains (0,0) but not (10,10). Used with V3 index + ambiguous doc.
const polygonAroundZeroZero = {
type: "Polygon",
coordinates: [
[
[0, 0],
[0.5, 0],
[0.5, 0.5],
[0, 0.5],
[0, 0],
],
],
};
// Polygon that contains (10,10) but not (0,0). Used with V4 index or no index + ambiguous doc.
const polygonAroundTenTen = {
type: "Polygon",
coordinates: [
[
[9.5, 9.5],
[10.5, 9.5],
[10.5, 10.5],
[9.5, 10.5],
[9.5, 9.5],
],
],
};
// Ambiguous geo: V3 parses as legacy point (0,0), V4 parses as GeoJSON Point (10,10).
// BSON key order preserves "x" first, so firstElement().isNumber() is true for V3.
const ambiguousLoc = {x: 0, y: 0, type: "Point", coordinates: [10, 10]};
const now = new Date();
describe("2dsphere index version / matcher consistency", function () {
before(function () {
this.testDb = db.getSiblingDB(jsTestName());
this.coll = this.testDb.getCollection(jsTestName());
this.resetCollection = function (indexOptions = {}) {
this.coll.drop();
assert.commandWorked(
this.testDb.createCollection(this.coll.getName(), {
timeseries: {timeField: timeFieldName, metaField: metaFieldName},
}),
);
if (Object.keys(indexOptions).length > 0) {
assert.commandWorked(this.coll.createIndex({loc: "2dsphere"}, indexOptions));
}
};
});
// V3 index + polygon around (0,0). The bucket-level matcher must use V3 semantics so the
// ambiguous doc is parsed as (0,0), lies inside the polygon, and the bucket is kept.
// Expected: one result. If the matcher used V4, it would parse as (10,10) and wrongly discard.
it("V3 2dsphere index: matcher uses V3 so bucket with ambiguous loc is kept", function () {
this.resetCollection({"2dsphereIndexVersion": 3});
assert.commandWorked(
this.coll.insert([{[timeFieldName]: now, [metaFieldName]: {s: 1}, _id: 1, loc: ambiguousLoc}]),
);
const results = this.coll
.aggregate([{$match: {loc: {$geoWithin: {$geometry: polygonAroundZeroZero}}}}])
.toArray();
assert.eq(results.length, 1, "With V3 index, matcher must parse control as (0,0)");
assert.docEq(ambiguousLoc, results[0].loc);
});
// V4 index + polygon around (10,10). The matcher must use V4 semantics so the ambiguous doc
// is parsed as (10,10), lies inside the polygon, and the bucket is kept.
// Expected: one result. If the matcher used V3, it would parse as (0,0) and wrongly discard.
it("V4 2dsphere index: matcher uses V4 so bucket with ambiguous loc is kept", function () {
this.resetCollection({"2dsphereIndexVersion": 4});
assert.commandWorked(
this.coll.insert([{[timeFieldName]: now, [metaFieldName]: {s: 1}, _id: 2, loc: ambiguousLoc}]),
);
const results = this.coll
.aggregate([{$match: {loc: {$geoWithin: {$geometry: polygonAroundTenTen}}}}])
.toArray();
assert.eq(results.length, 1, "With V4 index, matcher must parse control as (10,10)");
assert.docEq(ambiguousLoc, results[0].loc);
});
// No 2dsphere index: the server cannot infer an index version, so the matcher uses default
// V4 semantics. Query uses polygon around (10,10); ambiguous doc must parse as (10,10).
// Expected: one result. Ensures collection-scan path behaves consistently (V4).
it("No index (collection scan): matcher uses default V4 semantics", function () {
this.resetCollection();
assert.commandWorked(
this.coll.insert([{[timeFieldName]: now, [metaFieldName]: {s: 1}, _id: 3, loc: ambiguousLoc}]),
);
const results = this.coll
.aggregate([{$match: {loc: {$geoWithin: {$geometry: polygonAroundTenTen}}}}])
.toArray();
assert.eq(results.length, 1, "Without index, matcher defaults to V4");
assert.docEq(ambiguousLoc, results[0].loc);
});
// V4 index + polygon around (0,0). With V4, ambiguous doc is (10,10), which is outside the
// polygon. The matcher must use V4 so the bucket is correctly excluded.
// Expected: zero results. If the matcher wrongly used V3, it would keep the bucket (false positive).
it("V4 index: polygon around (0,0) must not match ambiguous doc", function () {
this.resetCollection({"2dsphereIndexVersion": 4});
assert.commandWorked(
this.coll.insert([{[timeFieldName]: now, [metaFieldName]: {s: 1}, _id: 4, loc: ambiguousLoc}]),
);
const results = this.coll
.aggregate([{$match: {loc: {$geoWithin: {$geometry: polygonAroundZeroZero}}}}])
.toArray();
assert.eq(results.length, 0, "V4 parses as (10,10); polygon does not contain (10,10)");
});
// V3 index + polygon around (10,10). With V3, ambiguous doc is (0,0), which is outside the
// polygon. The matcher must use V3 so the bucket is correctly excluded.
// Expected: zero results. If the matcher wrongly used V4, it would keep the bucket (false positive).
it("V3 index: polygon around (10,10) must not match ambiguous doc", function () {
this.resetCollection({"2dsphereIndexVersion": 3});
assert.commandWorked(
this.coll.insert([{[timeFieldName]: now, [metaFieldName]: {s: 1}, _id: 5, loc: ambiguousLoc}]),
);
const results = this.coll
.aggregate([{$match: {loc: {$geoWithin: {$geometry: polygonAroundTenTen}}}}])
.toArray();
assert.eq(results.length, 0, "V3 parses as (0,0); polygon does not contain (0,0)");
});
// $and of $geoWithin (polygon around 0,0) and a meta filter. With a V3 index, the event-level
// filter (after unpacking) must use V3 to parse the ambiguous doc as (0,0), so it matches.
// Expected: one result. Confirms the version from the index is applied to the event filter in
// $and.
it("V3 index with $and: geo + meta filter uses V3 for event filter", function () {
this.resetCollection({"2dsphereIndexVersion": 3});
assert.commandWorked(
this.coll.insert([{[timeFieldName]: now, [metaFieldName]: {s: 1}, _id: 6, loc: ambiguousLoc}]),
);
const results = this.coll
.aggregate([
{
$match: {
$and: [{loc: {$geoWithin: {$geometry: polygonAroundZeroZero}}}, {[metaFieldName]: {s: 1}}],
},
},
])
.toArray();
assert.eq(results.length, 1, "With V3 index, $and with geo must parse event loc as (0,0)");
assert.docEq(ambiguousLoc, results[0].loc);
});
// $and of $geoWithin (polygon around 10,10) and a meta filter. With a V4 index, the
// event-level filter must use V4 to parse the ambiguous doc as (10,10), so it matches.
// Expected: one result. Confirms the version from the index is applied to the event filter in
// $and.
it("V4 index with $and: geo + meta filter uses V4 for event filter", function () {
this.resetCollection({"2dsphereIndexVersion": 4});
assert.commandWorked(
this.coll.insert([{[timeFieldName]: now, [metaFieldName]: {s: 1}, _id: 7, loc: ambiguousLoc}]),
);
const results = this.coll
.aggregate([
{
$match: {
$and: [{loc: {$geoWithin: {$geometry: polygonAroundTenTen}}}, {[metaFieldName]: {s: 1}}],
},
},
])
.toArray();
assert.eq(results.length, 1, "With V4 index, $and with geo must parse event loc as (10,10)");
assert.docEq(ambiguousLoc, results[0].loc);
});
// $or of two $geoWithin (polygon 0,0 and polygon 10,10). With V3, the ambiguous doc is (0,0),
// so only the first branch matches. The event filter for each branch must use V3.
// Expected: one result. Confirms $or uses the same index version for all branches.
it("V3 index with $or: ambiguous doc matches first branch (polygon around 0,0)", function () {
this.resetCollection({"2dsphereIndexVersion": 3});
assert.commandWorked(
this.coll.insert([{[timeFieldName]: now, [metaFieldName]: {s: 1}, _id: 8, loc: ambiguousLoc}]),
);
const results = this.coll
.aggregate([
{
$match: {
$or: [
{loc: {$geoWithin: {$geometry: polygonAroundZeroZero}}},
{loc: {$geoWithin: {$geometry: polygonAroundTenTen}}},
],
},
},
])
.toArray();
assert.eq(results.length, 1, "With V3 index, $or must use V3 for event filter so first branch (0,0) matches");
assert.docEq(ambiguousLoc, results[0].loc);
});
// $or of two $geoWithin (polygon 0,0 and polygon 10,10). With V4, the ambiguous doc is (10,10),
// so only the second branch matches. The event filter for each branch must use V4.
// Expected: one result. Confirms $or uses the same index version for all branches.
it("V4 index with $or: ambiguous doc matches second branch (polygon around 10,10)", function () {
this.resetCollection({"2dsphereIndexVersion": 4});
assert.commandWorked(
this.coll.insert([{[timeFieldName]: now, [metaFieldName]: {s: 1}, _id: 9, loc: ambiguousLoc}]),
);
const results = this.coll
.aggregate([
{
$match: {
$or: [
{loc: {$geoWithin: {$geometry: polygonAroundZeroZero}}},
{loc: {$geoWithin: {$geometry: polygonAroundTenTen}}},
],
},
},
])
.toArray();
assert.eq(
results.length,
1,
"With V4 index, $or must use V4 for event filter so second branch (10,10) matches",
);
assert.docEq(ambiguousLoc, results[0].loc);
});
});

View File

@ -57,13 +57,14 @@ bool geoContains(const GeometryContainer& queryGeom,
bool geoContains(const GeometryContainer& queryGeom,
const GeoExpression::Predicate& queryPredicate,
bool skipValidation,
const BSONElement& e) {
const BSONElement& e,
boost::optional<S2IndexVersion> indexVersion) {
if (!e.isABSONObj()) {
return false;
}
GeometryContainer geometry;
if (!geometry.parseFromStorage(e, skipValidation).isOK()) {
if (!geometry.parseFromStorage(e, skipValidation, indexVersion).isOK()) {
return false;
}
@ -110,8 +111,11 @@ bool matchesGeoContainer(const GeoMatchExpression* expr, const GeometryContainer
void MatchesSingleElementEvaluator::visit(const GeoMatchExpression* expr) {
const auto& query = expr->getGeoExpression();
_result =
geoContains(query.getGeometry(), query.getPred(), expr->getCanSkipValidation(), _elem);
_result = geoContains(query.getGeometry(),
query.getPred(),
expr->getCanSkipValidation(),
_elem,
expr->get2dsphereIndexVersion());
}
void MatchesSingleElementEvaluator::visit(const TwoDPtInAnnulusExpression* expr) {
@ -213,26 +217,29 @@ bool matchesBSONObj(const InternalBucketGeoWithinMatchExpression* expr, const BS
}
// Returns true if the bucket should be unpacked and all the data within the bucket should be
// checked later.
auto parseMinMaxPoint = [](const BSONElement& elem, PointWithCRS* out) -> bool {
auto geoObj = elem.embeddedObject();
if (BSONType::array == elem.type() || geoObj.firstElement().isNumber()) {
// Legacy Point.
if (!GeoParser::parseLegacyPoint(elem, out, true).isOK()) {
return true;
}
} else {
// If the bucket contains GeoJSON objects of types other than 'Points' we cannot be sure
// whether this bucket contains data is within the provided region.
if (!geoObj.hasField(GEOJSON_TYPE) ||
geoObj[GEOJSON_TYPE].String() != GEOJSON_TYPE_POINT) {
return true;
}
// GeoJSON Point.
if (!GeoParser::parseGeoJSONPoint(geoObj, out).isOK()) {
return true;
}
auto parseMinMaxPoint = [indexVersion = expr->getIndexVersion()](const BSONElement& elem,
PointWithCRS* out) -> bool {
if (!indexVersion && elem.type() == BSONType::object &&
elem.Obj().firstElement().isNumber()) {
// Without an index version (e.g., on a mongos without catalog access), we cannot
// resolve the V3 vs V4+ parsing ambiguity for objects whose first field is numeric:
// V3 tries legacy-point first; V4+ tries GeoJSON first. Conservatively unpack the
// bucket to avoid discarding data that the index correctly returned.
return true;
}
return false;
auto geoObj = elem.embeddedObject();
GeometryContainer geometry;
Status status = geometry.parseFromStorage(elem, false, indexVersion);
if (!status.isOK()) {
return true; // Parsing from storage failed, we should unpack the bucket to be safe.
}
if (!geometry.isPoint()) {
// The stored value didn't parse as a point, so we can't do a
// bounding-box comparison on the control min/max; unpack the bucket.
return true;
}
*out = geometry.getPoint();
return false; // No need to unpack.
};
PointWithCRS minPoint;

View File

@ -31,6 +31,7 @@
#include "mongo/db/exec/matcher/match_details.h"
#include "mongo/db/geo/geometry_container.h"
#include "mongo/db/index/s2_common.h"
#include "mongo/db/matcher/expression_geo.h"
#include "mongo/util/modules.h"
@ -41,7 +42,8 @@ namespace exec::matcher {
bool geoContains(const GeometryContainer& queryGeom,
const GeoExpression::Predicate& queryPredicate,
bool skipValidation,
const BSONElement& e);
const BSONElement& e,
boost::optional<S2IndexVersion> indexVersion = boost::none);
bool geoContains(const GeometryContainer& queryGeom,
const GeoExpression::Predicate& queryPredicate,

View File

@ -31,6 +31,7 @@
#include "mongo/db/exec/matcher/matcher.h"
#include "mongo/db/matcher/expression_geo.h"
#include "mongo/db/matcher/expression_internal_bucket_geo_within.h"
#include "mongo/db/matcher/extensions_callback_noop.h"
#include "mongo/db/pipeline/expression_context_for_test.h"
#include "mongo/db/query/compiler/parsers/matcher/expression_geo_parser.h"
#include "mongo/db/query/compiler/parsers/matcher/expression_parser.h"
@ -79,7 +80,7 @@ public:
}
std::unique_ptr<MatchExpression> getDummyBucketGeoExpr() {
auto bucketGeoExpr = fromjson(R"(
_ownedGeoNoVersion = fromjson(R"(
{$_internalBucketGeoWithin: {
withinRegion: {
$geometry: {
@ -89,14 +90,14 @@ public:
},
field: "loc"
}})");
auto expr = MatchExpressionParser::parse(bucketGeoExpr, _expCtx);
auto expr = MatchExpressionParser::parse(_ownedGeoNoVersion, _expCtx);
ASSERT_OK(expr.getStatus());
return std::move(expr.getValue());
}
std::unique_ptr<MatchExpression> getDummyBucketGeoExprLegacy() {
auto bucketGeoExpr = fromjson(R"(
_ownedGeoLegacy = fromjson(R"(
{$_internalBucketGeoWithin: {
withinRegion: {
$box: [
@ -106,12 +107,41 @@ public:
},
field: "loc"
}})");
auto expr = MatchExpressionParser::parse(bucketGeoExpr, _expCtx);
auto expr = MatchExpressionParser::parse(_ownedGeoLegacy, _expCtx);
ASSERT_OK(expr.getStatus());
return std::move(expr.getValue());
}
std::unique_ptr<MatchExpression> getDummyBucketGeoExprWithIndexVersion(int indexVersion) {
_ownedGeoWithVersion =
BSON("$_internalBucketGeoWithin"
<< BSON("withinRegion"
<< BSON("$geometry" << BSON(
"type" << "Polygon"
<< "coordinates"
<< BSON_ARRAY(BSON_ARRAY(
BSON_ARRAY(0 << 0)
<< BSON_ARRAY(0 << 5) << BSON_ARRAY(5 << 5)
<< BSON_ARRAY(5 << 0) << BSON_ARRAY(0 << 0)))))
<< "field"
<< "loc"
<< "2dsphereIndexVersion" << indexVersion));
auto expr = MatchExpressionParser::parse(_ownedGeoWithVersion,
_expCtx,
ExtensionsCallbackNoop(),
MatchExpressionParser::kAllowAllSpecialFeatures);
ASSERT_OK(expr.getStatus());
return std::move(expr.getValue());
}
protected:
BSONObj _ownedGeoWithVersion;
BSONObj _ownedGeoNoVersion;
BSONObj _ownedGeoLegacy;
private:
boost::intrusive_ptr<ExpressionContextForTest> _expCtx = new ExpressionContextForTest();
};
@ -259,6 +289,48 @@ TEST_F(InternalBucketGeoWithinExpression, BucketContainsNonPointTypeLegacy) {
ASSERT_TRUE(exec::matcher::matchesBSON(expr.get(), obj));
}
TEST_F(InternalBucketGeoWithinExpression, With2dsphereIndexVersion4GeoJSONPoints) {
auto expr = getDummyBucketGeoExprWithIndexVersion(4);
auto obj = createBucketObj(BSON("loc" << BSON("type" << "Point"
<< "coordinates" << BSON_ARRAY(1 << 1)))
.firstElement(),
BSON("loc" << BSON("type" << "Point"
<< "coordinates" << BSON_ARRAY(3 << 3)))
.firstElement());
ASSERT_TRUE(exec::matcher::matchesBSON(expr.get(), obj));
}
TEST_F(InternalBucketGeoWithinExpression, With2dsphereIndexVersion3LegacyPoints) {
auto expr = getDummyBucketGeoExprWithIndexVersion(3);
auto obj = createBucketObj(BSON("loc" << BSON_ARRAY(1 << 1)).firstElement(),
BSON("loc" << BSON_ARRAY(3 << 3)).firstElement());
ASSERT_TRUE(exec::matcher::matchesBSON(expr.get(), obj));
}
TEST_F(InternalBucketGeoWithinExpression, EquivalentAndClonePreserveIndexVersion) {
auto exprWithVersion = getDummyBucketGeoExprWithIndexVersion(4);
auto exprWithoutVersion = getDummyBucketGeoExpr();
// Expressions with and without index version should not be equivalent.
ASSERT_FALSE(exprWithVersion->equivalent(exprWithoutVersion.get()));
ASSERT_FALSE(exprWithoutVersion->equivalent(exprWithVersion.get()));
// Clone should preserve index version and be equivalent to original.
auto clone = exprWithVersion->clone();
ASSERT_TRUE(exprWithVersion->equivalent(clone.get()));
auto* ibgwWithVersion =
static_cast<InternalBucketGeoWithinMatchExpression*>(exprWithVersion.get());
auto* ibgwClone = static_cast<InternalBucketGeoWithinMatchExpression*>(clone.get());
ASSERT_TRUE(ibgwWithVersion->getIndexVersion());
ASSERT_TRUE(ibgwClone->getIndexVersion());
ASSERT_EQUALS(*ibgwWithVersion->getIndexVersion(), *ibgwClone->getIndexVersion());
}
TEST(ExpressionGeoTest, Geo1) {
BSONObj query = fromjson("{loc:{$within:{$box:[{x: 4, y:4},[6,6]]}}}");

View File

@ -99,7 +99,8 @@ GeoMatchExpression::GeoMatchExpression(boost::optional<StringData> path,
: LeafMatchExpression(GEO, path, std::move(annotation)),
_rawObj(rawObj),
_query(query),
_canSkipValidation(false) {}
_canSkipValidation(false),
_2dsphereIndexVersion(boost::none) {}
/**
* Takes shared ownership of the passed-in GeoExpression.
@ -111,7 +112,8 @@ GeoMatchExpression::GeoMatchExpression(boost::optional<StringData> path,
: LeafMatchExpression(GEO, path, std::move(annotation)),
_rawObj(rawObj),
_query(query),
_canSkipValidation(false) {}
_canSkipValidation(false),
_2dsphereIndexVersion(boost::none) {}
void GeoMatchExpression::debugString(StringBuilder& debug, int indentationLevel) const {
_debugAddSpace(debug, indentationLevel);
@ -141,13 +143,15 @@ bool GeoMatchExpression::equivalent(const MatchExpression* other) const {
if (path() != realOther->path())
return false;
return SimpleBSONObjComparator::kInstance.evaluate(_rawObj == realOther->_rawObj);
return SimpleBSONObjComparator::kInstance.evaluate(_rawObj == realOther->_rawObj) &&
_2dsphereIndexVersion == realOther->_2dsphereIndexVersion;
}
std::unique_ptr<MatchExpression> GeoMatchExpression::clone() const {
std::unique_ptr<GeoMatchExpression> next =
std::make_unique<GeoMatchExpression>(path(), _query, _rawObj, _errorAnnotation);
next->_canSkipValidation = _canSkipValidation;
next->_2dsphereIndexVersion = _2dsphereIndexVersion;
if (getTag()) {
next->setTag(getTag()->clone());
}

View File

@ -40,6 +40,7 @@
#include "mongo/bson/util/builder_fwd.h"
#include "mongo/db/geo/geometry_container.h"
#include "mongo/db/geo/shapes.h"
#include "mongo/db/index/s2_common.h"
#include "mongo/db/matcher/expression.h"
#include "mongo/db/matcher/expression_leaf.h"
#include "mongo/db/matcher/expression_visitor.h"
@ -136,6 +137,13 @@ public:
return *_query;
}
boost::optional<S2IndexVersion> get2dsphereIndexVersion() const {
return _2dsphereIndexVersion;
}
void set2dsphereIndexVersion(boost::optional<S2IndexVersion> v) {
_2dsphereIndexVersion = v;
}
void acceptVisitor(MatchExpressionMutableVisitor* visitor) final {
visitor->visit(this);
}
@ -155,10 +163,11 @@ private:
// Share ownership of our query with all of our clones
std::shared_ptr<const GeoExpression> _query;
bool _canSkipValidation;
boost::optional<S2IndexVersion> _2dsphereIndexVersion;
};
// TODO: Make a struct, turn parse stuff into something like
// TODO SERVER-122401: Make a struct, turn parse stuff into something like
// static Status parseNearQuery(const BSONObj& obj, NearQuery** out);
class GeoNearExpression {
GeoNearExpression(const GeoNearExpression&) = delete;
@ -243,7 +252,7 @@ private:
/**
* Expression which checks whether a legacy 2D index point is contained within our near
* search annulus. See nextInterval() below for more discussion.
* TODO: Make this a standard type of GEO match expression
* TODO SERVER-122399: Make this a standard type of GEO match expression
*/
class TwoDPtInAnnulusExpression : public LeafMatchExpression {
public:

View File

@ -54,7 +54,7 @@ bool InternalBucketGeoWithinMatchExpression::equivalent(const MatchExpression* e
return SimpleBSONObjComparator::kInstance.evaluate(
_geoContainer->getGeoElement().Obj() ==
other->getGeoContainer().getGeoElement().Obj()) &&
_field == other->getField();
_field == other->getField() && _indexVersion == other->_indexVersion;
}
void InternalBucketGeoWithinMatchExpression::serialize(BSONObjBuilder* builder,
@ -73,12 +73,16 @@ void InternalBucketGeoWithinMatchExpression::serialize(BSONObjBuilder* builder,
// Serialize the field which is being searched over.
bob.append(InternalBucketGeoWithinMatchExpression::kField,
opts.serializeFieldPathFromString(_field));
if (_indexVersion) {
bob.append("2dsphereIndexVersion", static_cast<int>(*_indexVersion));
}
bob.doneFast();
}
std::unique_ptr<MatchExpression> InternalBucketGeoWithinMatchExpression::clone() const {
std::unique_ptr<InternalBucketGeoWithinMatchExpression> next =
std::make_unique<InternalBucketGeoWithinMatchExpression>(_geoContainer, _field);
std::make_unique<InternalBucketGeoWithinMatchExpression>(
_geoContainer, _field, nullptr, _indexVersion);
if (getTag()) {
next->setTag(getTag()->clone());
}

View File

@ -37,6 +37,7 @@
#include "mongo/bson/util/builder_fwd.h"
#include "mongo/db/field_ref.h"
#include "mongo/db/geo/geometry_container.h"
#include "mongo/db/index/s2_common.h"
#include "mongo/db/matcher/expression.h"
#include "mongo/db/matcher/expression_visitor.h"
#include "mongo/db/query/query_shape/serialization_options.h"
@ -80,14 +81,17 @@ public:
static constexpr StringData kWithinRegion = "withinRegion"_sd;
static constexpr StringData kField = "field"_sd;
InternalBucketGeoWithinMatchExpression(std::shared_ptr<GeometryContainer> container,
std::string field,
clonable_ptr<ErrorAnnotation> annotation = nullptr)
InternalBucketGeoWithinMatchExpression(
std::shared_ptr<GeometryContainer> container,
std::string field,
clonable_ptr<ErrorAnnotation> annotation = nullptr,
boost::optional<S2IndexVersion> indexVersion = boost::none)
: MatchExpression(MatchExpression::INTERNAL_BUCKET_GEO_WITHIN, std::move(annotation)),
_geoContainer(container),
_indexField("data." + field),
_fieldRef(_indexField),
_field(std::move(field)) {}
_field(std::move(field)),
_indexVersion(indexVersion) {}
void debugString(StringBuilder& debug, int indentationLevel) const final;
@ -127,6 +131,10 @@ public:
return *_geoContainer;
}
boost::optional<S2IndexVersion> getIndexVersion() const {
return _indexVersion;
}
StringData path() const final {
return _indexField;
}
@ -148,6 +156,7 @@ private:
std::string _indexField;
FieldRef _fieldRef;
std::string _field;
boost::optional<S2IndexVersion> _indexVersion;
};
} // namespace mongo

View File

@ -587,6 +587,7 @@ mongo_cc_library(
"//src/mongo/db/storage:spill_util",
"//src/mongo/db/storage:storage_options",
"//src/mongo/db/timeseries:catalog_helper",
"//src/mongo/db/timeseries:timeseries_2dsphere_index_version_lookup",
"//src/mongo/db/timeseries:timeseries_conversion_util",
"//src/mongo/db/timeseries:timeseries_options",
"//src/mongo/db/update:update_document_diff",

View File

@ -45,7 +45,9 @@
#include "mongo/db/exec/document_value/document.h"
#include "mongo/db/exec/document_value/document_metadata_fields.h"
#include "mongo/db/exec/plan_stats.h"
#include "mongo/db/index/s2_common.h"
#include "mongo/db/matcher/expression.h"
#include "mongo/db/matcher/expression_geo.h"
#include "mongo/db/matcher/extensions_callback_noop.h"
#include "mongo/db/pipeline/accumulation_statement.h"
#include "mongo/db/pipeline/accumulator.h"
@ -635,6 +637,30 @@ boost::intrusive_ptr<Expression> rewriteGroupByField(
return ExpressionObject::create(pExpCtx.get(), std::move(fieldsAndExprs));
}
// Walks the match expression tree and sets the 2dsphere index version on any
// GeoMatchExpression nodes whose field appears in versionMap. This is needed
// because setEventFilter re-parses the filter from BSON, producing a fresh
// expression tree with no index version; the version must be re-applied from
// the catalog snapshot stored in _geo2dsphereIndexVersions.
void set2dsphereIndexVersionOnGeoPredicates(MatchExpression* expr,
const boost::optional<StringMap<int>>& versionMap) {
if (!versionMap) {
return;
}
if (expr->matchType() == MatchExpression::GEO) {
auto* geoExpr = static_cast<GeoMatchExpression*>(expr);
auto field = std::string(geoExpr->getGeoExpression().getField());
auto it = versionMap->find(field);
if (it != versionMap->end()) {
geoExpr->set2dsphereIndexVersion(static_cast<S2IndexVersion>(it->second));
}
return;
}
for (size_t i = 0; i < expr->numChildren(); ++i) {
set2dsphereIndexVersionOnGeoPredicates(expr->getChild(i), versionMap);
}
}
} // namespace
DocumentSourceInternalUnpackBucket::DocumentSourceInternalUnpackBucket(
@ -1117,6 +1143,25 @@ void DocumentSourceInternalUnpackBucket::setEventFilter(BSONObj eventFilterBson,
_eventFilterDeps = DepsTracker();
dependency_analysis::addDependencies(_sharedState->_eventFilter.get(), &_eventFilterDeps);
// The event filter was just re-parsed from raw BSON, which does not carry
// 2dsphere index version information. _geo2dsphereIndexVersions was populated
// from the index catalog at translation time (on mongod) and stored separately
// in the stage spec. Stamp the version onto any geo predicates now so that
// bucket-level geo optimization can use the correct V3 vs V4+ parsing semantics.
set2dsphereIndexVersionOnGeoPredicates(_sharedState->_eventFilter.get(),
_geo2dsphereIndexVersions);
}
void DocumentSourceInternalUnpackBucket::set2dsphereIndexVersions(
boost::optional<StringMap<int>> versions) {
_geo2dsphereIndexVersions = std::move(versions);
// If an event filter was already parsed before the version map became available
// (e.g. from BSON deserialization on a shard receiving a router-translated pipeline
// where the router lacked index info), stamp the versions onto its geo predicates now.
if (_sharedState->_eventFilter && _geo2dsphereIndexVersions) {
set2dsphereIndexVersionOnGeoPredicates(_sharedState->_eventFilter.get(),
_geo2dsphereIndexVersions);
}
}
void DocumentSourceInternalUnpackBucket::internalizeProject(const BSONObj& project,
@ -1181,6 +1226,16 @@ std::pair<BSONObj, bool> DocumentSourceInternalUnpackBucket::extractOrBuildProje
BucketSpec::BucketPredicate DocumentSourceInternalUnpackBucket::createPredicatesOnBucketLevelField(
const MatchExpression* matchExpr) const {
timeseries::Get2dsphereIndexVersionFn get2dsphereIndexVersion;
if (_geo2dsphereIndexVersions) {
get2dsphereIndexVersion = [&versionMap = *_geo2dsphereIndexVersions](
OperationContext*, const NamespaceString&, StringData field) {
auto it = versionMap.find(std::string(field));
return it != versionMap.end()
? boost::make_optional(static_cast<S2IndexVersion>(it->second))
: boost::optional<S2IndexVersion>();
};
}
return BucketSpec::createPredicatesOnBucketLevelField(
matchExpr,
_sharedState->_bucketUnpacker.bucketSpec(),
@ -1190,7 +1245,8 @@ BucketSpec::BucketPredicate DocumentSourceInternalUnpackBucket::createPredicates
_sharedState->_bucketUnpacker.includeMetaField(),
_assumeNoMixedSchemaData,
BucketSpec::IneligiblePredicatePolicy::kIgnore,
_fixedBuckets);
_fixedBuckets,
get2dsphereIndexVersion);
}
bool DocumentSourceInternalUnpackBucket::generateBucketLevelIdPredicates(

View File

@ -48,6 +48,7 @@
#include "mongo/db/timeseries/mixed_schema_buckets_state.h"
#include "mongo/util/assert_util.h"
#include "mongo/util/modules.h"
#include "mongo/util/string_map.h"
#include <memory>
#include <set>
@ -262,6 +263,15 @@ public:
_sharedState->_bucketUnpacker.setIncludeMinTimeAsMetadata();
}
// Sets the 2dsphere index version map and, if an event filter was already parsed before this
// map became available (e.g. deserialized from BSON on a shard receiving a router-translated
// pipeline), stamps the versions onto its geo predicates immediately.
void set2dsphereIndexVersions(boost::optional<StringMap<int>> versions);
bool has2dsphereIndexVersions() const {
return _geo2dsphereIndexVersions.has_value();
}
void setIncludeMaxTimeAsMetadata() {
_sharedState->_bucketUnpacker.setIncludeMaxTimeAsMetadata();
}
@ -391,6 +401,12 @@ private:
// predicates in order to ensure correctness.
bool _assumeNoMixedSchemaData = false;
// Map from user-facing field name to 2dsphereIndexVersion, built at translation time from the
// collection's index catalog. boost::none means the map was not computed (e.g. for sharded
// queries translated at the router), in which case setEventFilter falls back to a catalog
// lookup.
boost::optional<StringMap<int>> _geo2dsphereIndexVersions = boost::none;
// This is true if 'bucketRoundingSeconds' and 'bucketMaxSpanSeconds' are set, equal, and
// unchanged. Then we can push down certain $match and $group queries.
bool _fixedBuckets = false;

View File

@ -382,6 +382,7 @@ mongo_cc_library(
],
deps = [
"//src/mongo/db:query_expressions",
"//src/mongo/db/index:expression_params",
"//src/mongo/db/matcher:expression_algo",
"//src/mongo/db/query/compiler/parsers/matcher:matcher_parser",
"//src/third_party/s2",

View File

@ -23,6 +23,7 @@ mongo_cc_library(
"//src/mongo/db/matcher/doc_validation",
"//src/mongo/db/query/compiler/parsers/matcher/schema:json_schema_parser",
"//src/mongo/db/query/compiler/rewrites/matcher:matcher_rewrites",
"//src/mongo/db/shard_role/shard_catalog:index_catalog",
],
)

View File

@ -81,6 +81,7 @@
#include "mongo/db/query/query_execution_knobs_gen.h"
#include "mongo/db/query/query_integration_knobs_gen.h"
#include "mongo/db/query/query_optimization_knobs_gen.h"
#include "mongo/db/shard_role/shard_catalog/index_descriptor.h"
#include "mongo/db/stats/counters.h"
#include "mongo/platform/atomic_word.h"
#include "mongo/util/assert_util.h"
@ -1203,8 +1204,27 @@ StatusWithMatchExpression parseInternalBucketGeoWithinMatchExpression(
// Parse the field.
std::string field = subobj["field"].String();
boost::optional<S2IndexVersion> indexVersion = boost::none;
if (subobj.hasField(IndexDescriptor::k2dsphereVersionFieldName)) {
BSONElement versionElem = subobj[IndexDescriptor::k2dsphereVersionFieldName];
if (!versionElem.isNumber()) {
return {ErrorCodes::TypeMismatch,
str::stream() << InternalBucketGeoWithinMatchExpression::kName
<< "'s '2dsphereIndexVersion' field must be a number"};
}
long long versionVal = versionElem.safeNumberLong();
if (versionVal != 1 && versionVal != 2 && versionVal != 3 && versionVal != 4) {
return {ErrorCodes::BadValue,
str::stream() << InternalBucketGeoWithinMatchExpression::kName
<< "'s '2dsphereIndexVersion' must be 1, 2, 3, or 4, got: "
<< versionVal};
}
indexVersion = static_cast<S2IndexVersion>(versionVal);
}
expCtx->setSbeCompatibility(SbeCompatibility::notCompatible);
return {std::make_unique<InternalBucketGeoWithinMatchExpression>(geoContainer, field)};
return {std::make_unique<InternalBucketGeoWithinMatchExpression>(
geoContainer, field, nullptr, indexVersion)};
}
StatusWithMatchExpression parseInternalSchemaAllowedProperties(
@ -1859,7 +1879,7 @@ StatusWithMatchExpression parseSubField(const BSONObj& context,
return parseMOD(name, e, expCtx);
case PathAcceptingKeyword::OPTIONS: {
// TODO: try to optimize this
// TODO SERVER-122402 try to optimize this
// we have to do this since $options can be before or after a $regex
// but we validate here
for (auto temp : context) {

View File

@ -32,6 +32,7 @@
#include "mongo/bson/json.h"
#include "mongo/db/matcher/expression.h"
#include "mongo/db/matcher/expression_geo.h"
#include "mongo/db/matcher/expression_internal_bucket_geo_within.h"
#include "mongo/db/matcher/extensions_callback_noop.h"
#include "mongo/db/pipeline/expression_context.h"
#include "mongo/db/pipeline/expression_context_for_test.h"
@ -1099,4 +1100,128 @@ TEST(ExpressionGeoTest, RoundTripSerializeInternalBucketGeoWithin) {
}})"));
}
TEST(ExpressionGeoTest, ParseInternalBucketGeoWithin2dsphereIndexVersionValid) {
boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest());
for (int version : {1, 2, 3, 4}) {
BSONObj query =
BSON("$_internalBucketGeoWithin"
<< BSON("withinRegion"
<< BSON("$geometry" << BSON(
"type" << "Polygon"
<< "coordinates"
<< BSON_ARRAY(BSON_ARRAY(
BSON_ARRAY(0 << 0)
<< BSON_ARRAY(5 << 0) << BSON_ARRAY(5 << 5)
<< BSON_ARRAY(0 << 5) << BSON_ARRAY(0 << 0)))))
<< "field"
<< "loc"
<< "2dsphereIndexVersion" << version));
auto result = MatchExpressionParser::parse(query,
expCtx,
ExtensionsCallbackNoop(),
MatchExpressionParser::kAllowAllSpecialFeatures);
ASSERT_OK(result.getStatus()) << "Failed for version " << version;
auto* ibgw = static_cast<InternalBucketGeoWithinMatchExpression*>(result.getValue().get());
ASSERT_TRUE(ibgw->getIndexVersion()) << "Expected index version for version " << version;
ASSERT_EQUALS(static_cast<int>(*ibgw->getIndexVersion()), version)
<< "Wrong index version for version " << version;
}
}
TEST(ExpressionGeoTest, ParseInternalBucketGeoWithin2dsphereIndexVersionInvalid) {
boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest());
for (long long version : {0, 5}) {
BSONObj query =
BSON("$_internalBucketGeoWithin"
<< BSON("withinRegion"
<< BSON("$geometry" << BSON(
"type" << "Polygon"
<< "coordinates"
<< BSON_ARRAY(BSON_ARRAY(
BSON_ARRAY(0 << 0)
<< BSON_ARRAY(5 << 0) << BSON_ARRAY(5 << 5)
<< BSON_ARRAY(0 << 5) << BSON_ARRAY(0 << 0)))))
<< "field"
<< "loc"
<< "2dsphereIndexVersion" << version));
auto result = MatchExpressionParser::parse(query,
expCtx,
ExtensionsCallbackNoop(),
MatchExpressionParser::kAllowAllSpecialFeatures);
ASSERT_NOT_OK(result.getStatus()) << "Should fail for version " << version;
ASSERT_EQUALS(result.getStatus().code(), ErrorCodes::BadValue);
}
}
TEST(ExpressionGeoTest, ParseInternalBucketGeoWithin2dsphereIndexVersionTypeMismatch) {
boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest());
// Use BSON() because JSON does not allow field names starting with a digit
// (2dsphereIndexVersion).
BSONObj query =
BSON("$_internalBucketGeoWithin"
<< BSON("withinRegion"
<< BSON("$geometry"
<< BSON("type" << "Polygon"
<< "coordinates"
<< BSON_ARRAY(BSON_ARRAY(
BSON_ARRAY(0 << 0)
<< BSON_ARRAY(5 << 0) << BSON_ARRAY(5 << 5)
<< BSON_ARRAY(0 << 5) << BSON_ARRAY(0 << 0)))))
<< "field"
<< "loc"
<< "2dsphereIndexVersion"
<< "not_a_number"));
auto result = MatchExpressionParser::parse(
query, expCtx, ExtensionsCallbackNoop(), MatchExpressionParser::kAllowAllSpecialFeatures);
ASSERT_NOT_OK(result.getStatus());
ASSERT_EQUALS(result.getStatus().code(), ErrorCodes::TypeMismatch);
}
TEST(ExpressionGeoTest, RoundTripSerializeInternalBucketGeoWithinWith2dsphereIndexVersion) {
auto opts = SerializationOptions{LiteralSerializationPolicy::kToRepresentativeParseableValue};
boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest());
// Use BSON() because JSON does not allow field names starting with a digit
// (2dsphereIndexVersion).
BSONObj inputExpr =
BSON("$_internalBucketGeoWithin"
<< BSON("withinRegion"
<< BSON("$geometry"
<< BSON("type" << "Polygon"
<< "coordinates"
<< BSON_ARRAY(BSON_ARRAY(
BSON_ARRAY(0 << 0)
<< BSON_ARRAY(0 << 5) << BSON_ARRAY(5 << 5)
<< BSON_ARRAY(5 << 0) << BSON_ARRAY(0 << 0)))))
<< "field"
<< "loc"
<< "2dsphereIndexVersion" << 4));
auto result = MatchExpressionParser::parse(inputExpr, expCtx);
ASSERT_OK(result.getStatus());
BSONObjBuilder bob;
result.getValue()->serialize(&bob, opts);
auto serialized = bob.obj();
ASSERT_TRUE(serialized.hasField("$_internalBucketGeoWithin"));
ASSERT_TRUE(serialized["$_internalBucketGeoWithin"].Obj().hasField("2dsphereIndexVersion"));
ASSERT_EQUALS(serialized["$_internalBucketGeoWithin"].Obj()["2dsphereIndexVersion"].Int(), 4);
// Round-trip: parse the serialized expression and verify index version is preserved.
auto roundTripped = MatchExpressionParser::parse(serialized, expCtx);
ASSERT_OK(roundTripped.getStatus());
auto* ibgw =
static_cast<InternalBucketGeoWithinMatchExpression*>(roundTripped.getValue().get());
ASSERT_TRUE(ibgw->getIndexVersion());
ASSERT_EQUALS(static_cast<int>(*ibgw->getIndexVersion()), 4);
}
} // namespace mongo

View File

@ -16,8 +16,10 @@ mongo_cc_unit_test(
],
tags = ["mongo_unittest_third_group"],
deps = [
"//src/mongo/db/index:expression_params",
"//src/mongo/db/pipeline",
"//src/mongo/db/pipeline:expression_context_for_test",
"//src/mongo/db/shard_role/shard_catalog:catalog_test_fixture",
"//src/mongo/db/timeseries:timeseries_test_fixture",
],
)

View File

@ -133,7 +133,8 @@ BucketSpec::BucketPredicate BucketSpec::createPredicatesOnBucketLevelField(
bool includeMetaField,
bool assumeNoMixedSchemaData,
IneligiblePredicatePolicy policy,
bool fixedBuckets) {
bool fixedBuckets,
Get2dsphereIndexVersionFn get2dsphereIndexVersion) {
tassert(5916304, "BucketSpec::createPredicatesOnBucketLevelField nullptr", matchExpr);
@ -184,7 +185,8 @@ BucketSpec::BucketPredicate BucketSpec::createPredicatesOnBucketLevelField(
includeMetaField,
assumeNoMixedSchemaData,
policy,
fixedBuckets);
fixedBuckets,
get2dsphereIndexVersion);
if (child.loosePredicate) {
looseAndExpression->add(std::move(child.loosePredicate));
}
@ -242,7 +244,8 @@ BucketSpec::BucketPredicate BucketSpec::createPredicatesOnBucketLevelField(
includeMetaField,
assumeNoMixedSchemaData,
policy,
fixedBuckets);
fixedBuckets,
get2dsphereIndexVersion);
if (looseOrExpression && child.loosePredicate) {
looseOrExpression->add(std::move(child.loosePredicate));
} else {
@ -308,8 +311,13 @@ BucketSpec::BucketPredicate BucketSpec::createPredicatesOnBucketLevelField(
auto& geoExpr = static_cast<const GeoMatchExpression*>(matchExpr)->getGeoExpression();
if (geoExpr.getPred() == GeoExpression::WITHIN ||
geoExpr.getPred() == GeoExpression::INTERSECT) {
boost::optional<S2IndexVersion> indexVersion = get2dsphereIndexVersion
? get2dsphereIndexVersion(pExpCtx->getOperationContext(),
pExpCtx->getNamespaceString(),
geoExpr.getField())
: boost::none;
return {std::make_unique<InternalBucketGeoWithinMatchExpression>(
geoExpr.getGeometryPtr(), geoExpr.getField()),
geoExpr.getGeometryPtr(), geoExpr.getField(), nullptr, indexVersion),
nullptr};
}
} else if (matchExpr->matchType() == MatchExpression::EXISTS) {
@ -394,7 +402,8 @@ std::pair<bool, BSONObj> BucketSpec::pushdownPredicate(
bool includeMetaField,
bool assumeNoMixedSchemaData,
IneligiblePredicatePolicy policy,
bool fixedBuckets) {
bool fixedBuckets,
Get2dsphereIndexVersionFn get2dsphereIndexVersion) {
auto [metaOnlyPred, bucketMetricPred, residualPred] =
getPushdownPredicates(expCtx,
tsOptions,
@ -403,7 +412,8 @@ std::pair<bool, BSONObj> BucketSpec::pushdownPredicate(
includeMetaField,
assumeNoMixedSchemaData,
policy,
fixedBuckets);
fixedBuckets,
get2dsphereIndexVersion);
BSONObjBuilder result;
if (metaOnlyPred)
metaOnlyPred->serialize(&result, {});
@ -437,7 +447,8 @@ BucketSpec::SplitPredicates BucketSpec::getPushdownPredicates(
bool includeMetaField,
bool assumeNoMixedSchemaData,
IneligiblePredicatePolicy policy,
bool fixedBuckets) {
bool fixedBuckets,
Get2dsphereIndexVersionFn get2dsphereIndexVersion) {
auto allowedFeatures = MatchExpressionParser::kDefaultSpecialFeatures;
auto matchExpr = uassertStatusOK(
@ -465,7 +476,8 @@ BucketSpec::SplitPredicates BucketSpec::getPushdownPredicates(
includeMetaField,
assumeNoMixedSchemaData,
policy,
fixedBuckets);
fixedBuckets,
get2dsphereIndexVersion);
bucketMetricPred = std::move(bucketPredicate.loosePredicate);
if (!expCtx->getRequiresTimeseriesExtendedRangeSupport()) {
// It may be possible to generate _id predicates or even '$alwaysTrue' or '$alwaysFalse'

View File

@ -32,6 +32,7 @@
#include "mongo/base/string_data.h"
#include "mongo/bson/bsonobj.h"
#include "mongo/db/exec/document_value/document_internal.h"
#include "mongo/db/index/s2_common.h"
#include "mongo/db/matcher/expression.h"
#include "mongo/db/pipeline/expression_context.h"
#include "mongo/db/timeseries/timeseries_gen.h"
@ -49,6 +50,11 @@
namespace mongo::timeseries {
// Optional callback to look up 2dsphere index version for a field.
// When nullptr or not called, the most up to date version (currently v4) is used.
using Get2dsphereIndexVersionFn = std::function<boost::optional<S2IndexVersion>(
OperationContext*, const NamespaceString&, StringData)>;
/**
* Carries parameters for unpacking a bucket. The order of operations applied to determine which
* fields are in the final document are:
@ -216,7 +222,8 @@ public:
bool includeMetaField,
bool assumeNoMixedSchemaData,
IneligiblePredicatePolicy policy,
bool fixedBuckets);
bool fixedBuckets,
Get2dsphereIndexVersionFn get2dsphereIndexVersion = nullptr);
/**
* Takes predicates that were generated by createPredicatesOnBucketLevelField() and possibly
@ -303,7 +310,8 @@ public:
bool includeMetaField,
bool assumeNoMixedSchemaData,
IneligiblePredicatePolicy policy,
bool fixedBuckets);
bool fixedBuckets,
Get2dsphereIndexVersionFn get2dsphereIndexVersion = nullptr);
/**
* Splits out a predicate on the meta field from a predicate on the bucket metric field.
@ -335,7 +343,8 @@ public:
bool includeMetaField,
bool assumeNoMixedSchemaData,
IneligiblePredicatePolicy policy,
bool fixedBuckets);
bool fixedBuckets,
Get2dsphereIndexVersionFn get2dsphereIndexVersion = nullptr);
bool includeMinTimeAsMetadata = false;
bool includeMaxTimeAsMetadata = false;

View File

@ -34,6 +34,7 @@
#include "mongo/db/pipeline/document_source_internal_convert_bucket_index_stats.h"
#include "mongo/db/pipeline/document_source_internal_unpack_bucket.h"
#include "mongo/db/shard_role/shard_catalog/raw_data_operation.h"
#include "mongo/db/timeseries/timeseries_2dsphere_index_version_lookup.h"
#include "mongo/db/timeseries/timeseries_index_schema_conversion_functions.h"
#include "mongo/db/timeseries/timeseries_options.h"
@ -268,6 +269,30 @@ void translateIndexHintIfRequiredImpl(const boost::intrusive_ptr<ExpressionConte
request.setHint(rewrittenHintWithStatus.getValue());
}
}
// Walks 'pipeline' looking for $_internalUnpackBucket stages that were deserialized without
// collection-derived parameters (e.g. a pipeline received from a router that lacked index info),
// and fills them in from the already-acquired 'collPtr'. This is the mechanism by which
// shard-side executions obtain parameters that can only be derived from the actual collection
// (currently: the 2dsphere index version map).
void populateUnpackBucketStagesFromCollection(Pipeline& pipeline, const CollectionPtr& collPtr) {
if (!collPtr) {
return;
}
for (auto& source : pipeline.getSources()) {
auto* unpack = dynamic_cast<DocumentSourceInternalUnpackBucket*>(source.get());
if (!unpack) {
continue;
}
if (unpack->has2dsphereIndexVersions()) {
continue;
}
boost::optional<StringMap<int>> versions =
timeseries::build2dsphereIndexVersionMap(*collPtr.get());
unpack->set2dsphereIndexVersions(versions);
break;
}
}
} // namespace
bool requiresViewlessTimeseriesTranslation(OperationContext* const opCtx,
@ -286,6 +311,12 @@ void translateStagesIfRequired(const boost::intrusive_ptr<ExpressionContext>& ex
Pipeline& pipeline,
const CollectionOrViewAcquisition& collOrView) {
translateStagesIfRequiredImpl<CollectionOrViewAcquisition>(expCtx, pipeline, collOrView);
// For pipelines that arrived pre-translated from a router (where CollectionRoutingInfo was used
// and no index info was available), modify the unpack stage with any collection-related
// metadata.
if (collOrView.isCollection()) {
populateUnpackBucketStagesFromCollection(pipeline, collOrView.getCollectionPtr());
}
}
void translateStagesIfRequired(const boost::intrusive_ptr<ExpressionContext>& expCtx,

View File

@ -30,12 +30,18 @@
#include "mongo/db/query/timeseries/timeseries_translation.h"
#include "mongo/bson/json.h"
#include "mongo/db/index/s2_common.h"
#include "mongo/db/matcher/expression_internal_bucket_geo_within.h"
#include "mongo/db/namespace_string.h"
#include "mongo/db/pipeline/document_source_internal_unpack_bucket.h"
#include "mongo/db/pipeline/document_source_match.h"
#include "mongo/db/pipeline/expression_context_for_test.h"
#include "mongo/db/pipeline/pipeline_factory.h"
#include "mongo/db/query/util/make_data_structure.h"
#include "mongo/db/shard_role/shard_catalog/catalog_test_fixture.h"
#include "mongo/db/shard_role/shard_catalog/create_collection.h"
#include "mongo/db/shard_role/shard_role.h"
#include "mongo/db/timeseries/timeseries_gen.h"
#include "mongo/db/timeseries/timeseries_test_fixture.h"
#include "mongo/idl/server_parameter_test_controller.h"
#include "mongo/unittest/unittest.h"
@ -374,5 +380,79 @@ TEST_F(TimeseriesRewritesTest, BucketsFixedTest) {
}
}
/**
* Fixture that sets up a timeseries buckets collection with a 2dsphere_bucket index to verify
* the bucket spec uses the index's 2dsphereIndexVersion when creating
* InternalBucketGeoWithinMatchExpression.
*/
class GeoIndexVersionWithCatalogTest : public CatalogTestFixture {
public:
void setUp() override {
CatalogTestFixture::setUp();
auto opCtx = operationContext();
CreateCommand createCmd(_measurementsNss);
createCmd.getCreateCollectionRequest().setTimeseries(TimeseriesOptions("time"));
ASSERT_OK(createCollection(opCtx, createCmd));
// Add a 2dsphere_bucket index on data.loc with 2dsphereIndexVersion 3.
const BSONObj indexSpec =
BSON("v" << 2 << "key" << BSON("data.loc" << "2dsphere_bucket") << "name"
<< "loc_2dsphere"
<< "2dsphereIndexVersion" << 3);
ASSERT_OK(storageInterface()->createIndexesOnEmptyCollection(
opCtx, _measurementsNss.makeTimeseriesBucketsNamespace(), {indexSpec}));
}
boost::intrusive_ptr<ExpressionContextForTest> getExpCtx() {
return make_intrusive<ExpressionContextForTest>(operationContext(), _measurementsNss);
}
CollectionOrViewAcquisition getCollAcquisition() {
return acquireCollectionOrView(operationContext(),
CollectionOrViewAcquisitionRequest(
_measurementsNss.makeTimeseriesBucketsNamespace(),
PlacementConcern{boost::none, ShardVersion::UNTRACKED()},
repl::ReadConcernArgs::get(operationContext()),
AcquisitionPrerequisites::kRead),
MODE_IS);
}
const NamespaceString _measurementsNss =
NamespaceString::createNamespaceString_forTest("test", "pipeline_test");
};
TEST_F(GeoIndexVersionWithCatalogTest, GeoWithinUses2dsphereBucketIndexVersionFromCatalog) {
// Simulate a pipeline arriving from the router: it has a pre-translated unpack stage but no
// 2dsphere index version info (the router lacks index catalog access).
auto expCtx = getExpCtx();
auto pipeline = pipeline_factory::makePipeline(
makeVector(fromjson("{$_internalUnpackBucket: {exclude: [], timeField: "
"'time', bucketMaxSpanSeconds: 3600}}"),
fromjson("{$match: {loc: {$geoWithin: {$geometry: {type: \"Polygon\", "
"coordinates: [ [ [ 0, 0 ], [ 3, 6 ], [ 6, 1 ], [ 0, 0 ] ] ]}}}}}")),
expCtx,
pipeline_factory::kOptionsMinimal);
auto collAcquisition = getCollAcquisition();
timeseries::translateStagesIfRequired(expCtx, *pipeline, collAcquisition);
auto& container = pipeline->getSources();
ASSERT_EQ(container.size(), 2U);
auto original = dynamic_cast<DocumentSourceMatch*>(container.back().get());
auto predicate = dynamic_cast<DocumentSourceInternalUnpackBucket*>(container.front().get())
->createPredicatesOnBucketLevelField(original->getMatchExpression());
ASSERT_TRUE(predicate.loosePredicate);
auto* ibgw =
dynamic_cast<InternalBucketGeoWithinMatchExpression*>(predicate.loosePredicate.get());
ASSERT_NE(ibgw, nullptr)
<< "Expected InternalBucketGeoWithinMatchExpression when $geoWithin is pushed down";
ASSERT_TRUE(ibgw->getIndexVersion())
<< "Expected index version from catalog when 2dsphere_bucket index exists";
ASSERT_EQ(*ibgw->getIndexVersion(), S2_INDEX_VERSION_3)
<< "Expected 2dsphereIndexVersion 3 from the index we created";
}
} // namespace
} // namespace mongo

View File

@ -46,6 +46,26 @@ mongo_cc_library(
],
)
mongo_cc_library(
name = "timeseries_constants",
hdrs = ["timeseries_constants.h"],
deps = [
"//src/mongo:base",
"//src/mongo/db/commands:create_command",
],
)
mongo_cc_library(
name = "timeseries_2dsphere_index_version_lookup",
srcs = ["timeseries_2dsphere_index_version_lookup.cpp"],
deps = [
":timeseries_constants",
"//src/mongo/db:server_base",
"//src/mongo/db/shard_role/shard_catalog:collection_catalog",
"//src/mongo/db/shard_role/shard_catalog:index_catalog",
],
)
mongo_cc_library(
name = "timeseries_metadata",
srcs = [
@ -258,6 +278,7 @@ mongo_cc_unit_test(
"bucket_compression_test.cpp",
"catalog_helper_test.cpp",
"collection_pre_conditions_util_test.cpp",
"timeseries_2dsphere_index_version_lookup_test.cpp",
"timeseries_collmod_test.cpp",
"timeseries_dotted_path_support_test.cpp",
"timeseries_extended_range_test.cpp",
@ -275,6 +296,7 @@ mongo_cc_unit_test(
deps = [
":bucket_compression",
":collection_pre_conditions_util",
":timeseries_2dsphere_index_version_lookup",
":timeseries_conversion_util",
":timeseries_extended_range",
":timeseries_options",

View File

@ -3,6 +3,9 @@ filters:
- "*":
approvers:
- 10gen/server-collection-write-path
- "timeseries_2dsphere_index_version_lookup*":
approvers:
- 10gen/query-integration-timeseries
- "*catalog_helper*":
approvers:
- 10gen/server-catalog-and-routing-shard-catalog

View File

@ -0,0 +1,71 @@
/**
* Copyright (C) 2026-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#include "mongo/db/timeseries/timeseries_2dsphere_index_version_lookup.h"
#include "mongo/db/index_names.h"
#include "mongo/db/shard_role/shard_catalog/collection.h"
#include "mongo/db/shard_role/shard_catalog/index_catalog.h"
#include "mongo/db/shard_role/shard_catalog/index_descriptor.h"
#include "mongo/db/timeseries/timeseries_constants.h"
namespace mongo::timeseries {
StringMap<int> build2dsphereIndexVersionMap(const Collection& coll) {
StringMap<int> result;
const std::string dataPrefix = str::stream() << kBucketDataFieldName << ".";
auto it = coll.getIndexCatalog()->getIndexIterator(IndexCatalog::InclusionPolicy::kReady);
while (it->more()) {
const auto* entry = it->next();
const auto* desc = entry->descriptor();
BSONElement versionElt =
desc->infoObj().getField(IndexDescriptor::k2dsphereVersionFieldName);
if (!versionElt.isNumber()) {
continue;
}
long long version = versionElt.numberLong();
if (version < 1 || version > 4) {
continue;
}
for (auto&& keyElt : desc->keyPattern()) {
if (keyElt.valueStringDataSafe() != IndexNames::GEO_2DSPHERE_BUCKET) {
continue;
}
StringData keyPath = keyElt.fieldNameStringData();
if (keyPath.size() <= dataPrefix.size() || !keyPath.starts_with(dataPrefix)) {
continue;
}
result.emplace(std::string(keyPath.substr(dataPrefix.size())),
static_cast<int>(version));
}
}
return result;
}
} // namespace mongo::timeseries

View File

@ -0,0 +1,48 @@
/**
* Copyright (C) 2026-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#pragma once
#include "mongo/db/shard_role/shard_role.h"
#include "mongo/util/modules.h"
#include "mongo/util/string_map.h"
namespace mongo {
namespace timeseries {
/**
* Scans all ready indexes on 'coll' and returns a map from user-facing field name to
* 2dsphereIndexVersion for every 2dsphere_bucket index found. When multiple indexes cover the
* same field, the version from the first index encountered is kept.
*/
MONGO_MOD_PUBLIC StringMap<int> build2dsphereIndexVersionMap(const Collection& coll);
} // namespace timeseries
} // namespace mongo

View File

@ -0,0 +1,232 @@
/**
* Copyright (C) 2026-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#include "mongo/db/timeseries/timeseries_2dsphere_index_version_lookup.h"
#include "mongo/base/string_data.h"
#include "mongo/bson/bsonobj.h"
#include "mongo/bson/bsonobjbuilder.h"
#include "mongo/db/index_names.h"
#include "mongo/db/namespace_string.h"
#include "mongo/db/server_options.h"
#include "mongo/db/shard_role/lock_manager/lock_manager_defs.h"
#include "mongo/db/shard_role/shard_catalog/catalog_raii.h"
#include "mongo/db/shard_role/shard_catalog/catalog_test_fixture.h"
#include "mongo/db/shard_role/shard_catalog/collection.h"
#include "mongo/db/shard_role/shard_catalog/index_catalog.h"
#include "mongo/db/shard_role/shard_catalog/index_descriptor.h"
#include "mongo/db/storage/write_unit_of_work.h"
#include "mongo/idl/server_parameter_test_controller.h"
#include "mongo/unittest/unittest.h"
#include <optional>
#include <boost/optional/optional.hpp>
namespace mongo {
namespace {
/** Builds a minimal ready index spec for a 2dsphere_bucket index on 'bucketKeyPath' (e.g.
* data.loc). */
BSONObj make2dsphereBucketIndexSpec(StringData indexName,
StringData bucketKeyPath,
BSONObj extraFields = {}) {
BSONObjBuilder bob;
bob.append("v", 2);
bob.append("key", BSON(bucketKeyPath << IndexNames::GEO_2DSPHERE_BUCKET));
bob.append("name", indexName);
bob.appendElements(extraFields);
return bob.obj();
}
class Build2dsphereIndexVersionMapTest : public CatalogTestFixture {
public:
OperationContext* opCtxV() {
return operationContext();
}
const CollectionPtr& collPtr() const {
return *_coll.get();
}
const Collection& collection() const {
return *collPtr().get();
}
void createIndexAssertOk(const BSONObj& spec) {
WriteUnitOfWork wuow(opCtxV());
CollectionWriter writer{opCtxV(), _coll.get()};
auto* writable = writer.getWritableCollection(opCtxV());
auto sw =
writable->getIndexCatalog()->createIndexOnEmptyCollection(opCtxV(), writable, spec);
ASSERT_OK(sw);
wuow.commit();
}
protected:
void setUp() override {
CatalogTestFixture::setUp();
ASSERT_OK(storageInterface()->createCollection(opCtxV(), _nss, {}));
_coll.emplace(opCtxV(), _nss, MODE_X);
}
void tearDown() override {
_coll.reset();
CatalogTestFixture::tearDown();
}
NamespaceString _nss = NamespaceString::createNamespaceString_forTest("test", "ts_buckets");
boost::optional<AutoGetCollection> _coll;
};
/**
* Index specs with 2dsphereIndexVersion 4 are only accepted when the v4 feature flag is enabled
* and FCV is new enough.
*/
class Build2dsphereIndexVersionMapV4Test : public Build2dsphereIndexVersionMapTest {
protected:
void setUp() override {
auto fcvSnap = serverGlobalParams.mutableFCV.acquireFCVSnapshot();
if (fcvSnap.isVersionInitialized()) {
_fcvBefore = fcvSnap.getVersion();
}
_enableV4.emplace("featureFlag2dsphereIndexVersion4", true);
// (Generic FCV reference): kLatest so getDefaultS2IndexVersion() is 4 and v4 bucket indexes
// can be created in this test fixture.
serverGlobalParams.mutableFCV.setVersion(multiversion::GenericFCV::kLatest);
Build2dsphereIndexVersionMapTest::setUp();
}
void tearDown() override {
Build2dsphereIndexVersionMapTest::tearDown();
_enableV4.reset();
if (_fcvBefore) {
serverGlobalParams.mutableFCV.setVersion(*_fcvBefore);
} else {
serverGlobalParams.mutableFCV.reset();
}
}
private:
std::optional<RAIIServerParameterControllerForTest> _enableV4;
std::optional<multiversion::FeatureCompatibilityVersion> _fcvBefore;
};
TEST_F(Build2dsphereIndexVersionMapTest, EmptyCatalogReturnsEmptyMap) {
auto m = timeseries::build2dsphereIndexVersionMap(collection());
ASSERT(m.empty());
}
TEST_F(Build2dsphereIndexVersionMapTest, MapsDataFieldToVersion) {
createIndexAssertOk(make2dsphereBucketIndexSpec(
"loc_2dsphere", "data.loc"_sd, BSON(IndexDescriptor::k2dsphereVersionFieldName << 3)));
auto m = timeseries::build2dsphereIndexVersionMap(collection());
ASSERT_EQ(m.size(), 1U);
auto it = m.find("loc");
ASSERT(it != m.end());
ASSERT_EQ(it->second, 3);
}
TEST_F(Build2dsphereIndexVersionMapTest, StripsDataPrefixForNestedPath) {
createIndexAssertOk(make2dsphereBucketIndexSpec(
"geo_2dsphere", "data.geo.sub"_sd, BSON(IndexDescriptor::k2dsphereVersionFieldName << 3)));
auto m = timeseries::build2dsphereIndexVersionMap(collection());
ASSERT_EQ(m.size(), 1U);
auto it = m.find("geo.sub");
ASSERT(it != m.end());
ASSERT_EQ(it->second, 3);
}
TEST_F(Build2dsphereIndexVersionMapTest, CompoundIndexMapsOnly2dsphereBucketField) {
BSONObj spec =
BSON("v" << 2 << "key"
<< BSON("data.ct" << 1 << "data.loc" << IndexNames::GEO_2DSPHERE_BUCKET) << "name"
<< "cmp_ct_loc" << IndexDescriptor::k2dsphereVersionFieldName << 3);
createIndexAssertOk(spec);
auto m = timeseries::build2dsphereIndexVersionMap(collection());
ASSERT_EQ(m.size(), 1U);
ASSERT_EQ(m["loc"], 3);
}
TEST_F(Build2dsphereIndexVersionMapTest, MultipleIndexesAccumulateDistinctFields) {
createIndexAssertOk(make2dsphereBucketIndexSpec(
"a_geo", "data.a"_sd, BSON(IndexDescriptor::k2dsphereVersionFieldName << 3)));
createIndexAssertOk(make2dsphereBucketIndexSpec(
"b_geo", "data.b"_sd, BSON(IndexDescriptor::k2dsphereVersionFieldName << 3)));
auto m = timeseries::build2dsphereIndexVersionMap(collection());
ASSERT_EQ(m.size(), 2U);
ASSERT_EQ(m["a"], 3);
ASSERT_EQ(m["b"], 3);
}
TEST_F(Build2dsphereIndexVersionMapTest, Non2dsphereBucketKeySkipped) {
BSONObj spec = BSON("v" << 2 << "key" << BSON("data.loc" << 1) << "name"
<< "loc_1" << IndexDescriptor::k2dsphereVersionFieldName << 3);
createIndexAssertOk(spec);
auto m = timeseries::build2dsphereIndexVersionMap(collection());
ASSERT(m.empty());
}
TEST_F(Build2dsphereIndexVersionMapTest, KeyNotUnderDataPrefixSkipped) {
// Path must be "data.<userField>" for the map; a root-level geo field is ignored here.
createIndexAssertOk(make2dsphereBucketIndexSpec(
"root_geo", "loc"_sd, BSON(IndexDescriptor::k2dsphereVersionFieldName << 3)));
auto m = timeseries::build2dsphereIndexVersionMap(collection());
ASSERT(m.empty());
}
TEST_F(Build2dsphereIndexVersionMapV4Test, MapsDataFieldToVersion4) {
createIndexAssertOk(make2dsphereBucketIndexSpec(
"loc_2dsphere_v4", "data.loc"_sd, BSON(IndexDescriptor::k2dsphereVersionFieldName << 4)));
auto m = timeseries::build2dsphereIndexVersionMap(collection());
ASSERT_EQ(m.size(), 1U);
ASSERT_EQ(m["loc"], 4);
}
TEST_F(Build2dsphereIndexVersionMapV4Test, CompoundIndexMaps2dsphereBucketFieldWithVersion4) {
BSONObj spec =
BSON("v" << 2 << "key"
<< BSON("data.ct" << 1 << "data.loc" << IndexNames::GEO_2DSPHERE_BUCKET) << "name"
<< "cmp_ct_loc_v4" << IndexDescriptor::k2dsphereVersionFieldName << 4);
createIndexAssertOk(spec);
auto m = timeseries::build2dsphereIndexVersionMap(collection());
ASSERT_EQ(m.size(), 1U);
ASSERT_EQ(m["loc"], 4);
}
} // namespace
} // namespace mongo