diff --git a/buildscripts/resmokeconfig/suites/query_cbr_histogram_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/query_cbr_histogram_jscore_passthrough.yml index e8fa5d6df7e..ae78f653940 100644 --- a/buildscripts/resmokeconfig/suites/query_cbr_histogram_jscore_passthrough.yml +++ b/buildscripts/resmokeconfig/suites/query_cbr_histogram_jscore_passthrough.yml @@ -37,6 +37,9 @@ selector: - jstests/core/query/release_memory/group.js # Running analyze on a query from this test creates a BSON document that is larger than 16 MB - jstests/core/index/geo/geo_circle_spilling.js + # TODO SERVER-121749: CBR hits MONGO_UNREACHABLE_TASSERT(5837103) when estimating a plan whose + # FetchNode filter contains $_internalBucketGeoWithin. + - jstests/core/timeseries/geo/timeseries_2dsphere_index_version_matcher_consistency.js # TODO SERVER-100451: $elemMatch estimation using histograms - jstests/core/query/boolean_expression_simplification.js diff --git a/jstests/core/timeseries/geo/timeseries_2dsphere_index_version_matcher_consistency.js b/jstests/core/timeseries/geo/timeseries_2dsphere_index_version_matcher_consistency.js new file mode 100644 index 00000000000..61c453dac2b --- /dev/null +++ b/jstests/core/timeseries/geo/timeseries_2dsphere_index_version_matcher_consistency.js @@ -0,0 +1,250 @@ +/** + * Tests that the geo matchers use the same 2dsphere index version semantics as the query's index + * when filtering buckets. This prevents index-returned buckets from being incorrectly discarded + * when control min/max are parsed with a different geometry type order. + * + * Background: For object-type geo elements that can be parsed as both legacy point and GeoJSON, + * V3 tries legacy first; V4 tries GeoJSON first. If the matcher used the wrong version, a bucket + * correctly returned by the index could be filtered out (or vice versa). This test uses an + * "ambiguous" document {x: 0, y: 0, type: "Point", coordinates: [10, 10]} which parses as (0,0) + * under V3 and (10,10) under V4. + * + * Also confirms the geo matcher works without an index (collection scan path uses default V4 + * semantics when no index version is available). + * + * @tags: [ + * does_not_support_stepdowns, + * requires_pipeline_optimization, + * requires_timeseries, + * featureFlag2dsphereIndexVersion4, + * cannot_run_during_upgrade_downgrade, + * multiversion_incompatible, + * ] + */ + +import {getAggPlanStage} from "jstests/libs/query/analyze_plan.js"; +import {describe, it, before} from "jstests/libs/mochalite.js"; + +const timeFieldName = "time"; +const metaFieldName = "meta"; + +// Polygon that contains (0,0) but not (10,10). Used with V3 index + ambiguous doc. +const polygonAroundZeroZero = { + type: "Polygon", + coordinates: [ + [ + [0, 0], + [0.5, 0], + [0.5, 0.5], + [0, 0.5], + [0, 0], + ], + ], +}; + +// Polygon that contains (10,10) but not (0,0). Used with V4 index or no index + ambiguous doc. +const polygonAroundTenTen = { + type: "Polygon", + coordinates: [ + [ + [9.5, 9.5], + [10.5, 9.5], + [10.5, 10.5], + [9.5, 10.5], + [9.5, 9.5], + ], + ], +}; + +// Ambiguous geo: V3 parses as legacy point (0,0), V4 parses as GeoJSON Point (10,10). +// BSON key order preserves "x" first, so firstElement().isNumber() is true for V3. +const ambiguousLoc = {x: 0, y: 0, type: "Point", coordinates: [10, 10]}; + +const now = new Date(); + +describe("2dsphere index version / matcher consistency", function () { + before(function () { + this.testDb = db.getSiblingDB(jsTestName()); + this.coll = this.testDb.getCollection(jsTestName()); + this.resetCollection = function (indexOptions = {}) { + this.coll.drop(); + assert.commandWorked( + this.testDb.createCollection(this.coll.getName(), { + timeseries: {timeField: timeFieldName, metaField: metaFieldName}, + }), + ); + if (Object.keys(indexOptions).length > 0) { + assert.commandWorked(this.coll.createIndex({loc: "2dsphere"}, indexOptions)); + } + }; + }); + + // V3 index + polygon around (0,0). The bucket-level matcher must use V3 semantics so the + // ambiguous doc is parsed as (0,0), lies inside the polygon, and the bucket is kept. + // Expected: one result. If the matcher used V4, it would parse as (10,10) and wrongly discard. + it("V3 2dsphere index: matcher uses V3 so bucket with ambiguous loc is kept", function () { + this.resetCollection({"2dsphereIndexVersion": 3}); + assert.commandWorked( + this.coll.insert([{[timeFieldName]: now, [metaFieldName]: {s: 1}, _id: 1, loc: ambiguousLoc}]), + ); + const results = this.coll + .aggregate([{$match: {loc: {$geoWithin: {$geometry: polygonAroundZeroZero}}}}]) + .toArray(); + assert.eq(results.length, 1, "With V3 index, matcher must parse control as (0,0)"); + assert.docEq(ambiguousLoc, results[0].loc); + }); + + // V4 index + polygon around (10,10). The matcher must use V4 semantics so the ambiguous doc + // is parsed as (10,10), lies inside the polygon, and the bucket is kept. + // Expected: one result. If the matcher used V3, it would parse as (0,0) and wrongly discard. + it("V4 2dsphere index: matcher uses V4 so bucket with ambiguous loc is kept", function () { + this.resetCollection({"2dsphereIndexVersion": 4}); + assert.commandWorked( + this.coll.insert([{[timeFieldName]: now, [metaFieldName]: {s: 1}, _id: 2, loc: ambiguousLoc}]), + ); + const results = this.coll + .aggregate([{$match: {loc: {$geoWithin: {$geometry: polygonAroundTenTen}}}}]) + .toArray(); + assert.eq(results.length, 1, "With V4 index, matcher must parse control as (10,10)"); + assert.docEq(ambiguousLoc, results[0].loc); + }); + + // No 2dsphere index: the server cannot infer an index version, so the matcher uses default + // V4 semantics. Query uses polygon around (10,10); ambiguous doc must parse as (10,10). + // Expected: one result. Ensures collection-scan path behaves consistently (V4). + it("No index (collection scan): matcher uses default V4 semantics", function () { + this.resetCollection(); + assert.commandWorked( + this.coll.insert([{[timeFieldName]: now, [metaFieldName]: {s: 1}, _id: 3, loc: ambiguousLoc}]), + ); + const results = this.coll + .aggregate([{$match: {loc: {$geoWithin: {$geometry: polygonAroundTenTen}}}}]) + .toArray(); + assert.eq(results.length, 1, "Without index, matcher defaults to V4"); + assert.docEq(ambiguousLoc, results[0].loc); + }); + + // V4 index + polygon around (0,0). With V4, ambiguous doc is (10,10), which is outside the + // polygon. The matcher must use V4 so the bucket is correctly excluded. + // Expected: zero results. If the matcher wrongly used V3, it would keep the bucket (false positive). + it("V4 index: polygon around (0,0) must not match ambiguous doc", function () { + this.resetCollection({"2dsphereIndexVersion": 4}); + assert.commandWorked( + this.coll.insert([{[timeFieldName]: now, [metaFieldName]: {s: 1}, _id: 4, loc: ambiguousLoc}]), + ); + const results = this.coll + .aggregate([{$match: {loc: {$geoWithin: {$geometry: polygonAroundZeroZero}}}}]) + .toArray(); + assert.eq(results.length, 0, "V4 parses as (10,10); polygon does not contain (10,10)"); + }); + + // V3 index + polygon around (10,10). With V3, ambiguous doc is (0,0), which is outside the + // polygon. The matcher must use V3 so the bucket is correctly excluded. + // Expected: zero results. If the matcher wrongly used V4, it would keep the bucket (false positive). + it("V3 index: polygon around (10,10) must not match ambiguous doc", function () { + this.resetCollection({"2dsphereIndexVersion": 3}); + assert.commandWorked( + this.coll.insert([{[timeFieldName]: now, [metaFieldName]: {s: 1}, _id: 5, loc: ambiguousLoc}]), + ); + const results = this.coll + .aggregate([{$match: {loc: {$geoWithin: {$geometry: polygonAroundTenTen}}}}]) + .toArray(); + assert.eq(results.length, 0, "V3 parses as (0,0); polygon does not contain (0,0)"); + }); + + // $and of $geoWithin (polygon around 0,0) and a meta filter. With a V3 index, the event-level + // filter (after unpacking) must use V3 to parse the ambiguous doc as (0,0), so it matches. + // Expected: one result. Confirms the version from the index is applied to the event filter in + // $and. + it("V3 index with $and: geo + meta filter uses V3 for event filter", function () { + this.resetCollection({"2dsphereIndexVersion": 3}); + assert.commandWorked( + this.coll.insert([{[timeFieldName]: now, [metaFieldName]: {s: 1}, _id: 6, loc: ambiguousLoc}]), + ); + const results = this.coll + .aggregate([ + { + $match: { + $and: [{loc: {$geoWithin: {$geometry: polygonAroundZeroZero}}}, {[metaFieldName]: {s: 1}}], + }, + }, + ]) + .toArray(); + assert.eq(results.length, 1, "With V3 index, $and with geo must parse event loc as (0,0)"); + assert.docEq(ambiguousLoc, results[0].loc); + }); + + // $and of $geoWithin (polygon around 10,10) and a meta filter. With a V4 index, the + // event-level filter must use V4 to parse the ambiguous doc as (10,10), so it matches. + // Expected: one result. Confirms the version from the index is applied to the event filter in + // $and. + it("V4 index with $and: geo + meta filter uses V4 for event filter", function () { + this.resetCollection({"2dsphereIndexVersion": 4}); + assert.commandWorked( + this.coll.insert([{[timeFieldName]: now, [metaFieldName]: {s: 1}, _id: 7, loc: ambiguousLoc}]), + ); + const results = this.coll + .aggregate([ + { + $match: { + $and: [{loc: {$geoWithin: {$geometry: polygonAroundTenTen}}}, {[metaFieldName]: {s: 1}}], + }, + }, + ]) + .toArray(); + assert.eq(results.length, 1, "With V4 index, $and with geo must parse event loc as (10,10)"); + assert.docEq(ambiguousLoc, results[0].loc); + }); + + // $or of two $geoWithin (polygon 0,0 and polygon 10,10). With V3, the ambiguous doc is (0,0), + // so only the first branch matches. The event filter for each branch must use V3. + // Expected: one result. Confirms $or uses the same index version for all branches. + it("V3 index with $or: ambiguous doc matches first branch (polygon around 0,0)", function () { + this.resetCollection({"2dsphereIndexVersion": 3}); + assert.commandWorked( + this.coll.insert([{[timeFieldName]: now, [metaFieldName]: {s: 1}, _id: 8, loc: ambiguousLoc}]), + ); + const results = this.coll + .aggregate([ + { + $match: { + $or: [ + {loc: {$geoWithin: {$geometry: polygonAroundZeroZero}}}, + {loc: {$geoWithin: {$geometry: polygonAroundTenTen}}}, + ], + }, + }, + ]) + .toArray(); + assert.eq(results.length, 1, "With V3 index, $or must use V3 for event filter so first branch (0,0) matches"); + assert.docEq(ambiguousLoc, results[0].loc); + }); + + // $or of two $geoWithin (polygon 0,0 and polygon 10,10). With V4, the ambiguous doc is (10,10), + // so only the second branch matches. The event filter for each branch must use V4. + // Expected: one result. Confirms $or uses the same index version for all branches. + it("V4 index with $or: ambiguous doc matches second branch (polygon around 10,10)", function () { + this.resetCollection({"2dsphereIndexVersion": 4}); + assert.commandWorked( + this.coll.insert([{[timeFieldName]: now, [metaFieldName]: {s: 1}, _id: 9, loc: ambiguousLoc}]), + ); + const results = this.coll + .aggregate([ + { + $match: { + $or: [ + {loc: {$geoWithin: {$geometry: polygonAroundZeroZero}}}, + {loc: {$geoWithin: {$geometry: polygonAroundTenTen}}}, + ], + }, + }, + ]) + .toArray(); + assert.eq( + results.length, + 1, + "With V4 index, $or must use V4 for event filter so second branch (10,10) matches", + ); + assert.docEq(ambiguousLoc, results[0].loc); + }); +}); diff --git a/src/mongo/db/exec/matcher/matcher_geo.cpp b/src/mongo/db/exec/matcher/matcher_geo.cpp index 6aabdb9a22b..88f3788e2a2 100644 --- a/src/mongo/db/exec/matcher/matcher_geo.cpp +++ b/src/mongo/db/exec/matcher/matcher_geo.cpp @@ -57,13 +57,14 @@ bool geoContains(const GeometryContainer& queryGeom, bool geoContains(const GeometryContainer& queryGeom, const GeoExpression::Predicate& queryPredicate, bool skipValidation, - const BSONElement& e) { + const BSONElement& e, + boost::optional indexVersion) { if (!e.isABSONObj()) { return false; } GeometryContainer geometry; - if (!geometry.parseFromStorage(e, skipValidation).isOK()) { + if (!geometry.parseFromStorage(e, skipValidation, indexVersion).isOK()) { return false; } @@ -110,8 +111,11 @@ bool matchesGeoContainer(const GeoMatchExpression* expr, const GeometryContainer void MatchesSingleElementEvaluator::visit(const GeoMatchExpression* expr) { const auto& query = expr->getGeoExpression(); - _result = - geoContains(query.getGeometry(), query.getPred(), expr->getCanSkipValidation(), _elem); + _result = geoContains(query.getGeometry(), + query.getPred(), + expr->getCanSkipValidation(), + _elem, + expr->get2dsphereIndexVersion()); } void MatchesSingleElementEvaluator::visit(const TwoDPtInAnnulusExpression* expr) { @@ -213,26 +217,29 @@ bool matchesBSONObj(const InternalBucketGeoWithinMatchExpression* expr, const BS } // Returns true if the bucket should be unpacked and all the data within the bucket should be // checked later. - auto parseMinMaxPoint = [](const BSONElement& elem, PointWithCRS* out) -> bool { - auto geoObj = elem.embeddedObject(); - if (BSONType::array == elem.type() || geoObj.firstElement().isNumber()) { - // Legacy Point. - if (!GeoParser::parseLegacyPoint(elem, out, true).isOK()) { - return true; - } - } else { - // If the bucket contains GeoJSON objects of types other than 'Points' we cannot be sure - // whether this bucket contains data is within the provided region. - if (!geoObj.hasField(GEOJSON_TYPE) || - geoObj[GEOJSON_TYPE].String() != GEOJSON_TYPE_POINT) { - return true; - } - // GeoJSON Point. - if (!GeoParser::parseGeoJSONPoint(geoObj, out).isOK()) { - return true; - } + auto parseMinMaxPoint = [indexVersion = expr->getIndexVersion()](const BSONElement& elem, + PointWithCRS* out) -> bool { + if (!indexVersion && elem.type() == BSONType::object && + elem.Obj().firstElement().isNumber()) { + // Without an index version (e.g., on a mongos without catalog access), we cannot + // resolve the V3 vs V4+ parsing ambiguity for objects whose first field is numeric: + // V3 tries legacy-point first; V4+ tries GeoJSON first. Conservatively unpack the + // bucket to avoid discarding data that the index correctly returned. + return true; } - return false; + auto geoObj = elem.embeddedObject(); + GeometryContainer geometry; + Status status = geometry.parseFromStorage(elem, false, indexVersion); + if (!status.isOK()) { + return true; // Parsing from storage failed, we should unpack the bucket to be safe. + } + if (!geometry.isPoint()) { + // The stored value didn't parse as a point, so we can't do a + // bounding-box comparison on the control min/max; unpack the bucket. + return true; + } + *out = geometry.getPoint(); + return false; // No need to unpack. }; PointWithCRS minPoint; diff --git a/src/mongo/db/exec/matcher/matcher_geo.h b/src/mongo/db/exec/matcher/matcher_geo.h index acbdb9ab797..dabe5e6aab3 100644 --- a/src/mongo/db/exec/matcher/matcher_geo.h +++ b/src/mongo/db/exec/matcher/matcher_geo.h @@ -31,6 +31,7 @@ #include "mongo/db/exec/matcher/match_details.h" #include "mongo/db/geo/geometry_container.h" +#include "mongo/db/index/s2_common.h" #include "mongo/db/matcher/expression_geo.h" #include "mongo/util/modules.h" @@ -41,7 +42,8 @@ namespace exec::matcher { bool geoContains(const GeometryContainer& queryGeom, const GeoExpression::Predicate& queryPredicate, bool skipValidation, - const BSONElement& e); + const BSONElement& e, + boost::optional indexVersion = boost::none); bool geoContains(const GeometryContainer& queryGeom, const GeoExpression::Predicate& queryPredicate, diff --git a/src/mongo/db/exec/matcher/matcher_geo_test.cpp b/src/mongo/db/exec/matcher/matcher_geo_test.cpp index ab99f30740e..7f70a4cae35 100644 --- a/src/mongo/db/exec/matcher/matcher_geo_test.cpp +++ b/src/mongo/db/exec/matcher/matcher_geo_test.cpp @@ -31,6 +31,7 @@ #include "mongo/db/exec/matcher/matcher.h" #include "mongo/db/matcher/expression_geo.h" #include "mongo/db/matcher/expression_internal_bucket_geo_within.h" +#include "mongo/db/matcher/extensions_callback_noop.h" #include "mongo/db/pipeline/expression_context_for_test.h" #include "mongo/db/query/compiler/parsers/matcher/expression_geo_parser.h" #include "mongo/db/query/compiler/parsers/matcher/expression_parser.h" @@ -79,7 +80,7 @@ public: } std::unique_ptr getDummyBucketGeoExpr() { - auto bucketGeoExpr = fromjson(R"( + _ownedGeoNoVersion = fromjson(R"( {$_internalBucketGeoWithin: { withinRegion: { $geometry: { @@ -89,14 +90,14 @@ public: }, field: "loc" }})"); - auto expr = MatchExpressionParser::parse(bucketGeoExpr, _expCtx); + auto expr = MatchExpressionParser::parse(_ownedGeoNoVersion, _expCtx); ASSERT_OK(expr.getStatus()); return std::move(expr.getValue()); } std::unique_ptr getDummyBucketGeoExprLegacy() { - auto bucketGeoExpr = fromjson(R"( + _ownedGeoLegacy = fromjson(R"( {$_internalBucketGeoWithin: { withinRegion: { $box: [ @@ -106,12 +107,41 @@ public: }, field: "loc" }})"); - auto expr = MatchExpressionParser::parse(bucketGeoExpr, _expCtx); + auto expr = MatchExpressionParser::parse(_ownedGeoLegacy, _expCtx); ASSERT_OK(expr.getStatus()); return std::move(expr.getValue()); } + std::unique_ptr getDummyBucketGeoExprWithIndexVersion(int indexVersion) { + _ownedGeoWithVersion = + BSON("$_internalBucketGeoWithin" + << BSON("withinRegion" + << BSON("$geometry" << BSON( + "type" << "Polygon" + << "coordinates" + << BSON_ARRAY(BSON_ARRAY( + BSON_ARRAY(0 << 0) + << BSON_ARRAY(0 << 5) << BSON_ARRAY(5 << 5) + << BSON_ARRAY(5 << 0) << BSON_ARRAY(0 << 0))))) + << "field" + << "loc" + << "2dsphereIndexVersion" << indexVersion)); + + auto expr = MatchExpressionParser::parse(_ownedGeoWithVersion, + _expCtx, + ExtensionsCallbackNoop(), + MatchExpressionParser::kAllowAllSpecialFeatures); + ASSERT_OK(expr.getStatus()); + + return std::move(expr.getValue()); + } + +protected: + BSONObj _ownedGeoWithVersion; + BSONObj _ownedGeoNoVersion; + BSONObj _ownedGeoLegacy; + private: boost::intrusive_ptr _expCtx = new ExpressionContextForTest(); }; @@ -259,6 +289,48 @@ TEST_F(InternalBucketGeoWithinExpression, BucketContainsNonPointTypeLegacy) { ASSERT_TRUE(exec::matcher::matchesBSON(expr.get(), obj)); } +TEST_F(InternalBucketGeoWithinExpression, With2dsphereIndexVersion4GeoJSONPoints) { + auto expr = getDummyBucketGeoExprWithIndexVersion(4); + + auto obj = createBucketObj(BSON("loc" << BSON("type" << "Point" + << "coordinates" << BSON_ARRAY(1 << 1))) + .firstElement(), + BSON("loc" << BSON("type" << "Point" + << "coordinates" << BSON_ARRAY(3 << 3))) + .firstElement()); + + ASSERT_TRUE(exec::matcher::matchesBSON(expr.get(), obj)); +} + +TEST_F(InternalBucketGeoWithinExpression, With2dsphereIndexVersion3LegacyPoints) { + auto expr = getDummyBucketGeoExprWithIndexVersion(3); + + auto obj = createBucketObj(BSON("loc" << BSON_ARRAY(1 << 1)).firstElement(), + BSON("loc" << BSON_ARRAY(3 << 3)).firstElement()); + + ASSERT_TRUE(exec::matcher::matchesBSON(expr.get(), obj)); +} + +TEST_F(InternalBucketGeoWithinExpression, EquivalentAndClonePreserveIndexVersion) { + auto exprWithVersion = getDummyBucketGeoExprWithIndexVersion(4); + auto exprWithoutVersion = getDummyBucketGeoExpr(); + + // Expressions with and without index version should not be equivalent. + ASSERT_FALSE(exprWithVersion->equivalent(exprWithoutVersion.get())); + ASSERT_FALSE(exprWithoutVersion->equivalent(exprWithVersion.get())); + + // Clone should preserve index version and be equivalent to original. + auto clone = exprWithVersion->clone(); + ASSERT_TRUE(exprWithVersion->equivalent(clone.get())); + + auto* ibgwWithVersion = + static_cast(exprWithVersion.get()); + auto* ibgwClone = static_cast(clone.get()); + ASSERT_TRUE(ibgwWithVersion->getIndexVersion()); + ASSERT_TRUE(ibgwClone->getIndexVersion()); + ASSERT_EQUALS(*ibgwWithVersion->getIndexVersion(), *ibgwClone->getIndexVersion()); +} + TEST(ExpressionGeoTest, Geo1) { BSONObj query = fromjson("{loc:{$within:{$box:[{x: 4, y:4},[6,6]]}}}"); diff --git a/src/mongo/db/matcher/expression_geo.cpp b/src/mongo/db/matcher/expression_geo.cpp index 736a4c2e3f7..02aee17cf19 100644 --- a/src/mongo/db/matcher/expression_geo.cpp +++ b/src/mongo/db/matcher/expression_geo.cpp @@ -99,7 +99,8 @@ GeoMatchExpression::GeoMatchExpression(boost::optional path, : LeafMatchExpression(GEO, path, std::move(annotation)), _rawObj(rawObj), _query(query), - _canSkipValidation(false) {} + _canSkipValidation(false), + _2dsphereIndexVersion(boost::none) {} /** * Takes shared ownership of the passed-in GeoExpression. @@ -111,7 +112,8 @@ GeoMatchExpression::GeoMatchExpression(boost::optional path, : LeafMatchExpression(GEO, path, std::move(annotation)), _rawObj(rawObj), _query(query), - _canSkipValidation(false) {} + _canSkipValidation(false), + _2dsphereIndexVersion(boost::none) {} void GeoMatchExpression::debugString(StringBuilder& debug, int indentationLevel) const { _debugAddSpace(debug, indentationLevel); @@ -141,13 +143,15 @@ bool GeoMatchExpression::equivalent(const MatchExpression* other) const { if (path() != realOther->path()) return false; - return SimpleBSONObjComparator::kInstance.evaluate(_rawObj == realOther->_rawObj); + return SimpleBSONObjComparator::kInstance.evaluate(_rawObj == realOther->_rawObj) && + _2dsphereIndexVersion == realOther->_2dsphereIndexVersion; } std::unique_ptr GeoMatchExpression::clone() const { std::unique_ptr next = std::make_unique(path(), _query, _rawObj, _errorAnnotation); next->_canSkipValidation = _canSkipValidation; + next->_2dsphereIndexVersion = _2dsphereIndexVersion; if (getTag()) { next->setTag(getTag()->clone()); } diff --git a/src/mongo/db/matcher/expression_geo.h b/src/mongo/db/matcher/expression_geo.h index 61922b5c6f7..5f782be2ca2 100644 --- a/src/mongo/db/matcher/expression_geo.h +++ b/src/mongo/db/matcher/expression_geo.h @@ -40,6 +40,7 @@ #include "mongo/bson/util/builder_fwd.h" #include "mongo/db/geo/geometry_container.h" #include "mongo/db/geo/shapes.h" +#include "mongo/db/index/s2_common.h" #include "mongo/db/matcher/expression.h" #include "mongo/db/matcher/expression_leaf.h" #include "mongo/db/matcher/expression_visitor.h" @@ -136,6 +137,13 @@ public: return *_query; } + boost::optional get2dsphereIndexVersion() const { + return _2dsphereIndexVersion; + } + void set2dsphereIndexVersion(boost::optional v) { + _2dsphereIndexVersion = v; + } + void acceptVisitor(MatchExpressionMutableVisitor* visitor) final { visitor->visit(this); } @@ -155,10 +163,11 @@ private: // Share ownership of our query with all of our clones std::shared_ptr _query; bool _canSkipValidation; + boost::optional _2dsphereIndexVersion; }; -// TODO: Make a struct, turn parse stuff into something like +// TODO SERVER-122401: Make a struct, turn parse stuff into something like // static Status parseNearQuery(const BSONObj& obj, NearQuery** out); class GeoNearExpression { GeoNearExpression(const GeoNearExpression&) = delete; @@ -243,7 +252,7 @@ private: /** * Expression which checks whether a legacy 2D index point is contained within our near * search annulus. See nextInterval() below for more discussion. - * TODO: Make this a standard type of GEO match expression + * TODO SERVER-122399: Make this a standard type of GEO match expression */ class TwoDPtInAnnulusExpression : public LeafMatchExpression { public: diff --git a/src/mongo/db/matcher/expression_internal_bucket_geo_within.cpp b/src/mongo/db/matcher/expression_internal_bucket_geo_within.cpp index 02ac56469d5..b4543f05e39 100644 --- a/src/mongo/db/matcher/expression_internal_bucket_geo_within.cpp +++ b/src/mongo/db/matcher/expression_internal_bucket_geo_within.cpp @@ -54,7 +54,7 @@ bool InternalBucketGeoWithinMatchExpression::equivalent(const MatchExpression* e return SimpleBSONObjComparator::kInstance.evaluate( _geoContainer->getGeoElement().Obj() == other->getGeoContainer().getGeoElement().Obj()) && - _field == other->getField(); + _field == other->getField() && _indexVersion == other->_indexVersion; } void InternalBucketGeoWithinMatchExpression::serialize(BSONObjBuilder* builder, @@ -73,12 +73,16 @@ void InternalBucketGeoWithinMatchExpression::serialize(BSONObjBuilder* builder, // Serialize the field which is being searched over. bob.append(InternalBucketGeoWithinMatchExpression::kField, opts.serializeFieldPathFromString(_field)); + if (_indexVersion) { + bob.append("2dsphereIndexVersion", static_cast(*_indexVersion)); + } bob.doneFast(); } std::unique_ptr InternalBucketGeoWithinMatchExpression::clone() const { std::unique_ptr next = - std::make_unique(_geoContainer, _field); + std::make_unique( + _geoContainer, _field, nullptr, _indexVersion); if (getTag()) { next->setTag(getTag()->clone()); } diff --git a/src/mongo/db/matcher/expression_internal_bucket_geo_within.h b/src/mongo/db/matcher/expression_internal_bucket_geo_within.h index 38df3556382..4773bcc513b 100644 --- a/src/mongo/db/matcher/expression_internal_bucket_geo_within.h +++ b/src/mongo/db/matcher/expression_internal_bucket_geo_within.h @@ -37,6 +37,7 @@ #include "mongo/bson/util/builder_fwd.h" #include "mongo/db/field_ref.h" #include "mongo/db/geo/geometry_container.h" +#include "mongo/db/index/s2_common.h" #include "mongo/db/matcher/expression.h" #include "mongo/db/matcher/expression_visitor.h" #include "mongo/db/query/query_shape/serialization_options.h" @@ -80,14 +81,17 @@ public: static constexpr StringData kWithinRegion = "withinRegion"_sd; static constexpr StringData kField = "field"_sd; - InternalBucketGeoWithinMatchExpression(std::shared_ptr container, - std::string field, - clonable_ptr annotation = nullptr) + InternalBucketGeoWithinMatchExpression( + std::shared_ptr container, + std::string field, + clonable_ptr annotation = nullptr, + boost::optional indexVersion = boost::none) : MatchExpression(MatchExpression::INTERNAL_BUCKET_GEO_WITHIN, std::move(annotation)), _geoContainer(container), _indexField("data." + field), _fieldRef(_indexField), - _field(std::move(field)) {} + _field(std::move(field)), + _indexVersion(indexVersion) {} void debugString(StringBuilder& debug, int indentationLevel) const final; @@ -127,6 +131,10 @@ public: return *_geoContainer; } + boost::optional getIndexVersion() const { + return _indexVersion; + } + StringData path() const final { return _indexField; } @@ -148,6 +156,7 @@ private: std::string _indexField; FieldRef _fieldRef; std::string _field; + boost::optional _indexVersion; }; } // namespace mongo diff --git a/src/mongo/db/pipeline/BUILD.bazel b/src/mongo/db/pipeline/BUILD.bazel index 064b418a2a8..549049b1878 100644 --- a/src/mongo/db/pipeline/BUILD.bazel +++ b/src/mongo/db/pipeline/BUILD.bazel @@ -587,6 +587,7 @@ mongo_cc_library( "//src/mongo/db/storage:spill_util", "//src/mongo/db/storage:storage_options", "//src/mongo/db/timeseries:catalog_helper", + "//src/mongo/db/timeseries:timeseries_2dsphere_index_version_lookup", "//src/mongo/db/timeseries:timeseries_conversion_util", "//src/mongo/db/timeseries:timeseries_options", "//src/mongo/db/update:update_document_diff", diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp index f5a91a3ec23..6d8ddd9b1b2 100644 --- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp +++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp @@ -45,7 +45,9 @@ #include "mongo/db/exec/document_value/document.h" #include "mongo/db/exec/document_value/document_metadata_fields.h" #include "mongo/db/exec/plan_stats.h" +#include "mongo/db/index/s2_common.h" #include "mongo/db/matcher/expression.h" +#include "mongo/db/matcher/expression_geo.h" #include "mongo/db/matcher/extensions_callback_noop.h" #include "mongo/db/pipeline/accumulation_statement.h" #include "mongo/db/pipeline/accumulator.h" @@ -635,6 +637,30 @@ boost::intrusive_ptr rewriteGroupByField( return ExpressionObject::create(pExpCtx.get(), std::move(fieldsAndExprs)); } +// Walks the match expression tree and sets the 2dsphere index version on any +// GeoMatchExpression nodes whose field appears in versionMap. This is needed +// because setEventFilter re-parses the filter from BSON, producing a fresh +// expression tree with no index version; the version must be re-applied from +// the catalog snapshot stored in _geo2dsphereIndexVersions. +void set2dsphereIndexVersionOnGeoPredicates(MatchExpression* expr, + const boost::optional>& versionMap) { + if (!versionMap) { + return; + } + if (expr->matchType() == MatchExpression::GEO) { + auto* geoExpr = static_cast(expr); + auto field = std::string(geoExpr->getGeoExpression().getField()); + auto it = versionMap->find(field); + if (it != versionMap->end()) { + geoExpr->set2dsphereIndexVersion(static_cast(it->second)); + } + return; + } + for (size_t i = 0; i < expr->numChildren(); ++i) { + set2dsphereIndexVersionOnGeoPredicates(expr->getChild(i), versionMap); + } +} + } // namespace DocumentSourceInternalUnpackBucket::DocumentSourceInternalUnpackBucket( @@ -1117,6 +1143,25 @@ void DocumentSourceInternalUnpackBucket::setEventFilter(BSONObj eventFilterBson, _eventFilterDeps = DepsTracker(); dependency_analysis::addDependencies(_sharedState->_eventFilter.get(), &_eventFilterDeps); + // The event filter was just re-parsed from raw BSON, which does not carry + // 2dsphere index version information. _geo2dsphereIndexVersions was populated + // from the index catalog at translation time (on mongod) and stored separately + // in the stage spec. Stamp the version onto any geo predicates now so that + // bucket-level geo optimization can use the correct V3 vs V4+ parsing semantics. + set2dsphereIndexVersionOnGeoPredicates(_sharedState->_eventFilter.get(), + _geo2dsphereIndexVersions); +} + +void DocumentSourceInternalUnpackBucket::set2dsphereIndexVersions( + boost::optional> versions) { + _geo2dsphereIndexVersions = std::move(versions); + // If an event filter was already parsed before the version map became available + // (e.g. from BSON deserialization on a shard receiving a router-translated pipeline + // where the router lacked index info), stamp the versions onto its geo predicates now. + if (_sharedState->_eventFilter && _geo2dsphereIndexVersions) { + set2dsphereIndexVersionOnGeoPredicates(_sharedState->_eventFilter.get(), + _geo2dsphereIndexVersions); + } } void DocumentSourceInternalUnpackBucket::internalizeProject(const BSONObj& project, @@ -1181,6 +1226,16 @@ std::pair DocumentSourceInternalUnpackBucket::extractOrBuildProje BucketSpec::BucketPredicate DocumentSourceInternalUnpackBucket::createPredicatesOnBucketLevelField( const MatchExpression* matchExpr) const { + timeseries::Get2dsphereIndexVersionFn get2dsphereIndexVersion; + if (_geo2dsphereIndexVersions) { + get2dsphereIndexVersion = [&versionMap = *_geo2dsphereIndexVersions]( + OperationContext*, const NamespaceString&, StringData field) { + auto it = versionMap.find(std::string(field)); + return it != versionMap.end() + ? boost::make_optional(static_cast(it->second)) + : boost::optional(); + }; + } return BucketSpec::createPredicatesOnBucketLevelField( matchExpr, _sharedState->_bucketUnpacker.bucketSpec(), @@ -1190,7 +1245,8 @@ BucketSpec::BucketPredicate DocumentSourceInternalUnpackBucket::createPredicates _sharedState->_bucketUnpacker.includeMetaField(), _assumeNoMixedSchemaData, BucketSpec::IneligiblePredicatePolicy::kIgnore, - _fixedBuckets); + _fixedBuckets, + get2dsphereIndexVersion); } bool DocumentSourceInternalUnpackBucket::generateBucketLevelIdPredicates( diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h index 76eee7a49a0..10b6ea6b4d0 100644 --- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h +++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h @@ -48,6 +48,7 @@ #include "mongo/db/timeseries/mixed_schema_buckets_state.h" #include "mongo/util/assert_util.h" #include "mongo/util/modules.h" +#include "mongo/util/string_map.h" #include #include @@ -262,6 +263,15 @@ public: _sharedState->_bucketUnpacker.setIncludeMinTimeAsMetadata(); } + // Sets the 2dsphere index version map and, if an event filter was already parsed before this + // map became available (e.g. deserialized from BSON on a shard receiving a router-translated + // pipeline), stamps the versions onto its geo predicates immediately. + void set2dsphereIndexVersions(boost::optional> versions); + + bool has2dsphereIndexVersions() const { + return _geo2dsphereIndexVersions.has_value(); + } + void setIncludeMaxTimeAsMetadata() { _sharedState->_bucketUnpacker.setIncludeMaxTimeAsMetadata(); } @@ -391,6 +401,12 @@ private: // predicates in order to ensure correctness. bool _assumeNoMixedSchemaData = false; + // Map from user-facing field name to 2dsphereIndexVersion, built at translation time from the + // collection's index catalog. boost::none means the map was not computed (e.g. for sharded + // queries translated at the router), in which case setEventFilter falls back to a catalog + // lookup. + boost::optional> _geo2dsphereIndexVersions = boost::none; + // This is true if 'bucketRoundingSeconds' and 'bucketMaxSpanSeconds' are set, equal, and // unchanged. Then we can push down certain $match and $group queries. bool _fixedBuckets = false; diff --git a/src/mongo/db/query/BUILD.bazel b/src/mongo/db/query/BUILD.bazel index aa0149322fd..5c2ddb8d84f 100644 --- a/src/mongo/db/query/BUILD.bazel +++ b/src/mongo/db/query/BUILD.bazel @@ -382,6 +382,7 @@ mongo_cc_library( ], deps = [ "//src/mongo/db:query_expressions", + "//src/mongo/db/index:expression_params", "//src/mongo/db/matcher:expression_algo", "//src/mongo/db/query/compiler/parsers/matcher:matcher_parser", "//src/third_party/s2", diff --git a/src/mongo/db/query/compiler/parsers/matcher/BUILD.bazel b/src/mongo/db/query/compiler/parsers/matcher/BUILD.bazel index 9f9a79cd450..b9c3037ae86 100644 --- a/src/mongo/db/query/compiler/parsers/matcher/BUILD.bazel +++ b/src/mongo/db/query/compiler/parsers/matcher/BUILD.bazel @@ -23,6 +23,7 @@ mongo_cc_library( "//src/mongo/db/matcher/doc_validation", "//src/mongo/db/query/compiler/parsers/matcher/schema:json_schema_parser", "//src/mongo/db/query/compiler/rewrites/matcher:matcher_rewrites", + "//src/mongo/db/shard_role/shard_catalog:index_catalog", ], ) diff --git a/src/mongo/db/query/compiler/parsers/matcher/expression_parser.cpp b/src/mongo/db/query/compiler/parsers/matcher/expression_parser.cpp index c740867b88f..42a178f14c2 100644 --- a/src/mongo/db/query/compiler/parsers/matcher/expression_parser.cpp +++ b/src/mongo/db/query/compiler/parsers/matcher/expression_parser.cpp @@ -81,6 +81,7 @@ #include "mongo/db/query/query_execution_knobs_gen.h" #include "mongo/db/query/query_integration_knobs_gen.h" #include "mongo/db/query/query_optimization_knobs_gen.h" +#include "mongo/db/shard_role/shard_catalog/index_descriptor.h" #include "mongo/db/stats/counters.h" #include "mongo/platform/atomic_word.h" #include "mongo/util/assert_util.h" @@ -1203,8 +1204,27 @@ StatusWithMatchExpression parseInternalBucketGeoWithinMatchExpression( // Parse the field. std::string field = subobj["field"].String(); + boost::optional indexVersion = boost::none; + if (subobj.hasField(IndexDescriptor::k2dsphereVersionFieldName)) { + BSONElement versionElem = subobj[IndexDescriptor::k2dsphereVersionFieldName]; + if (!versionElem.isNumber()) { + return {ErrorCodes::TypeMismatch, + str::stream() << InternalBucketGeoWithinMatchExpression::kName + << "'s '2dsphereIndexVersion' field must be a number"}; + } + long long versionVal = versionElem.safeNumberLong(); + if (versionVal != 1 && versionVal != 2 && versionVal != 3 && versionVal != 4) { + return {ErrorCodes::BadValue, + str::stream() << InternalBucketGeoWithinMatchExpression::kName + << "'s '2dsphereIndexVersion' must be 1, 2, 3, or 4, got: " + << versionVal}; + } + indexVersion = static_cast(versionVal); + } + expCtx->setSbeCompatibility(SbeCompatibility::notCompatible); - return {std::make_unique(geoContainer, field)}; + return {std::make_unique( + geoContainer, field, nullptr, indexVersion)}; } StatusWithMatchExpression parseInternalSchemaAllowedProperties( @@ -1859,7 +1879,7 @@ StatusWithMatchExpression parseSubField(const BSONObj& context, return parseMOD(name, e, expCtx); case PathAcceptingKeyword::OPTIONS: { - // TODO: try to optimize this + // TODO SERVER-122402 try to optimize this // we have to do this since $options can be before or after a $regex // but we validate here for (auto temp : context) { diff --git a/src/mongo/db/query/compiler/parsers/matcher/expression_parser_geo_test.cpp b/src/mongo/db/query/compiler/parsers/matcher/expression_parser_geo_test.cpp index 0be0a2c61e4..3bc55ac2285 100644 --- a/src/mongo/db/query/compiler/parsers/matcher/expression_parser_geo_test.cpp +++ b/src/mongo/db/query/compiler/parsers/matcher/expression_parser_geo_test.cpp @@ -32,6 +32,7 @@ #include "mongo/bson/json.h" #include "mongo/db/matcher/expression.h" #include "mongo/db/matcher/expression_geo.h" +#include "mongo/db/matcher/expression_internal_bucket_geo_within.h" #include "mongo/db/matcher/extensions_callback_noop.h" #include "mongo/db/pipeline/expression_context.h" #include "mongo/db/pipeline/expression_context_for_test.h" @@ -1099,4 +1100,128 @@ TEST(ExpressionGeoTest, RoundTripSerializeInternalBucketGeoWithin) { }})")); } +TEST(ExpressionGeoTest, ParseInternalBucketGeoWithin2dsphereIndexVersionValid) { + boost::intrusive_ptr expCtx(new ExpressionContextForTest()); + + for (int version : {1, 2, 3, 4}) { + BSONObj query = + BSON("$_internalBucketGeoWithin" + << BSON("withinRegion" + << BSON("$geometry" << BSON( + "type" << "Polygon" + << "coordinates" + << BSON_ARRAY(BSON_ARRAY( + BSON_ARRAY(0 << 0) + << BSON_ARRAY(5 << 0) << BSON_ARRAY(5 << 5) + << BSON_ARRAY(0 << 5) << BSON_ARRAY(0 << 0))))) + << "field" + << "loc" + << "2dsphereIndexVersion" << version)); + + auto result = MatchExpressionParser::parse(query, + expCtx, + ExtensionsCallbackNoop(), + MatchExpressionParser::kAllowAllSpecialFeatures); + ASSERT_OK(result.getStatus()) << "Failed for version " << version; + + auto* ibgw = static_cast(result.getValue().get()); + ASSERT_TRUE(ibgw->getIndexVersion()) << "Expected index version for version " << version; + ASSERT_EQUALS(static_cast(*ibgw->getIndexVersion()), version) + << "Wrong index version for version " << version; + } +} + +TEST(ExpressionGeoTest, ParseInternalBucketGeoWithin2dsphereIndexVersionInvalid) { + boost::intrusive_ptr expCtx(new ExpressionContextForTest()); + + for (long long version : {0, 5}) { + BSONObj query = + BSON("$_internalBucketGeoWithin" + << BSON("withinRegion" + << BSON("$geometry" << BSON( + "type" << "Polygon" + << "coordinates" + << BSON_ARRAY(BSON_ARRAY( + BSON_ARRAY(0 << 0) + << BSON_ARRAY(5 << 0) << BSON_ARRAY(5 << 5) + << BSON_ARRAY(0 << 5) << BSON_ARRAY(0 << 0))))) + << "field" + << "loc" + << "2dsphereIndexVersion" << version)); + + auto result = MatchExpressionParser::parse(query, + expCtx, + ExtensionsCallbackNoop(), + MatchExpressionParser::kAllowAllSpecialFeatures); + ASSERT_NOT_OK(result.getStatus()) << "Should fail for version " << version; + ASSERT_EQUALS(result.getStatus().code(), ErrorCodes::BadValue); + } +} + +TEST(ExpressionGeoTest, ParseInternalBucketGeoWithin2dsphereIndexVersionTypeMismatch) { + boost::intrusive_ptr expCtx(new ExpressionContextForTest()); + + // Use BSON() because JSON does not allow field names starting with a digit + // (2dsphereIndexVersion). + BSONObj query = + BSON("$_internalBucketGeoWithin" + << BSON("withinRegion" + << BSON("$geometry" + << BSON("type" << "Polygon" + << "coordinates" + << BSON_ARRAY(BSON_ARRAY( + BSON_ARRAY(0 << 0) + << BSON_ARRAY(5 << 0) << BSON_ARRAY(5 << 5) + << BSON_ARRAY(0 << 5) << BSON_ARRAY(0 << 0))))) + << "field" + << "loc" + << "2dsphereIndexVersion" + << "not_a_number")); + + auto result = MatchExpressionParser::parse( + query, expCtx, ExtensionsCallbackNoop(), MatchExpressionParser::kAllowAllSpecialFeatures); + ASSERT_NOT_OK(result.getStatus()); + ASSERT_EQUALS(result.getStatus().code(), ErrorCodes::TypeMismatch); +} + +TEST(ExpressionGeoTest, RoundTripSerializeInternalBucketGeoWithinWith2dsphereIndexVersion) { + auto opts = SerializationOptions{LiteralSerializationPolicy::kToRepresentativeParseableValue}; + boost::intrusive_ptr expCtx(new ExpressionContextForTest()); + + // Use BSON() because JSON does not allow field names starting with a digit + // (2dsphereIndexVersion). + BSONObj inputExpr = + BSON("$_internalBucketGeoWithin" + << BSON("withinRegion" + << BSON("$geometry" + << BSON("type" << "Polygon" + << "coordinates" + << BSON_ARRAY(BSON_ARRAY( + BSON_ARRAY(0 << 0) + << BSON_ARRAY(0 << 5) << BSON_ARRAY(5 << 5) + << BSON_ARRAY(5 << 0) << BSON_ARRAY(0 << 0))))) + << "field" + << "loc" + << "2dsphereIndexVersion" << 4)); + + auto result = MatchExpressionParser::parse(inputExpr, expCtx); + ASSERT_OK(result.getStatus()); + + BSONObjBuilder bob; + result.getValue()->serialize(&bob, opts); + auto serialized = bob.obj(); + ASSERT_TRUE(serialized.hasField("$_internalBucketGeoWithin")); + ASSERT_TRUE(serialized["$_internalBucketGeoWithin"].Obj().hasField("2dsphereIndexVersion")); + ASSERT_EQUALS(serialized["$_internalBucketGeoWithin"].Obj()["2dsphereIndexVersion"].Int(), 4); + + // Round-trip: parse the serialized expression and verify index version is preserved. + auto roundTripped = MatchExpressionParser::parse(serialized, expCtx); + ASSERT_OK(roundTripped.getStatus()); + + auto* ibgw = + static_cast(roundTripped.getValue().get()); + ASSERT_TRUE(ibgw->getIndexVersion()); + ASSERT_EQUALS(static_cast(*ibgw->getIndexVersion()), 4); +} + } // namespace mongo diff --git a/src/mongo/db/query/timeseries/BUILD.bazel b/src/mongo/db/query/timeseries/BUILD.bazel index 07f3096a26b..a1f850b32d6 100644 --- a/src/mongo/db/query/timeseries/BUILD.bazel +++ b/src/mongo/db/query/timeseries/BUILD.bazel @@ -16,8 +16,10 @@ mongo_cc_unit_test( ], tags = ["mongo_unittest_third_group"], deps = [ + "//src/mongo/db/index:expression_params", "//src/mongo/db/pipeline", "//src/mongo/db/pipeline:expression_context_for_test", + "//src/mongo/db/shard_role/shard_catalog:catalog_test_fixture", "//src/mongo/db/timeseries:timeseries_test_fixture", ], ) diff --git a/src/mongo/db/query/timeseries/bucket_spec.cpp b/src/mongo/db/query/timeseries/bucket_spec.cpp index 6a8452c8002..46b2c19cb03 100644 --- a/src/mongo/db/query/timeseries/bucket_spec.cpp +++ b/src/mongo/db/query/timeseries/bucket_spec.cpp @@ -133,7 +133,8 @@ BucketSpec::BucketPredicate BucketSpec::createPredicatesOnBucketLevelField( bool includeMetaField, bool assumeNoMixedSchemaData, IneligiblePredicatePolicy policy, - bool fixedBuckets) { + bool fixedBuckets, + Get2dsphereIndexVersionFn get2dsphereIndexVersion) { tassert(5916304, "BucketSpec::createPredicatesOnBucketLevelField nullptr", matchExpr); @@ -184,7 +185,8 @@ BucketSpec::BucketPredicate BucketSpec::createPredicatesOnBucketLevelField( includeMetaField, assumeNoMixedSchemaData, policy, - fixedBuckets); + fixedBuckets, + get2dsphereIndexVersion); if (child.loosePredicate) { looseAndExpression->add(std::move(child.loosePredicate)); } @@ -242,7 +244,8 @@ BucketSpec::BucketPredicate BucketSpec::createPredicatesOnBucketLevelField( includeMetaField, assumeNoMixedSchemaData, policy, - fixedBuckets); + fixedBuckets, + get2dsphereIndexVersion); if (looseOrExpression && child.loosePredicate) { looseOrExpression->add(std::move(child.loosePredicate)); } else { @@ -308,8 +311,13 @@ BucketSpec::BucketPredicate BucketSpec::createPredicatesOnBucketLevelField( auto& geoExpr = static_cast(matchExpr)->getGeoExpression(); if (geoExpr.getPred() == GeoExpression::WITHIN || geoExpr.getPred() == GeoExpression::INTERSECT) { + boost::optional indexVersion = get2dsphereIndexVersion + ? get2dsphereIndexVersion(pExpCtx->getOperationContext(), + pExpCtx->getNamespaceString(), + geoExpr.getField()) + : boost::none; return {std::make_unique( - geoExpr.getGeometryPtr(), geoExpr.getField()), + geoExpr.getGeometryPtr(), geoExpr.getField(), nullptr, indexVersion), nullptr}; } } else if (matchExpr->matchType() == MatchExpression::EXISTS) { @@ -394,7 +402,8 @@ std::pair BucketSpec::pushdownPredicate( bool includeMetaField, bool assumeNoMixedSchemaData, IneligiblePredicatePolicy policy, - bool fixedBuckets) { + bool fixedBuckets, + Get2dsphereIndexVersionFn get2dsphereIndexVersion) { auto [metaOnlyPred, bucketMetricPred, residualPred] = getPushdownPredicates(expCtx, tsOptions, @@ -403,7 +412,8 @@ std::pair BucketSpec::pushdownPredicate( includeMetaField, assumeNoMixedSchemaData, policy, - fixedBuckets); + fixedBuckets, + get2dsphereIndexVersion); BSONObjBuilder result; if (metaOnlyPred) metaOnlyPred->serialize(&result, {}); @@ -437,7 +447,8 @@ BucketSpec::SplitPredicates BucketSpec::getPushdownPredicates( bool includeMetaField, bool assumeNoMixedSchemaData, IneligiblePredicatePolicy policy, - bool fixedBuckets) { + bool fixedBuckets, + Get2dsphereIndexVersionFn get2dsphereIndexVersion) { auto allowedFeatures = MatchExpressionParser::kDefaultSpecialFeatures; auto matchExpr = uassertStatusOK( @@ -465,7 +476,8 @@ BucketSpec::SplitPredicates BucketSpec::getPushdownPredicates( includeMetaField, assumeNoMixedSchemaData, policy, - fixedBuckets); + fixedBuckets, + get2dsphereIndexVersion); bucketMetricPred = std::move(bucketPredicate.loosePredicate); if (!expCtx->getRequiresTimeseriesExtendedRangeSupport()) { // It may be possible to generate _id predicates or even '$alwaysTrue' or '$alwaysFalse' diff --git a/src/mongo/db/query/timeseries/bucket_spec.h b/src/mongo/db/query/timeseries/bucket_spec.h index a4157dfeb81..ee7bc24ff08 100644 --- a/src/mongo/db/query/timeseries/bucket_spec.h +++ b/src/mongo/db/query/timeseries/bucket_spec.h @@ -32,6 +32,7 @@ #include "mongo/base/string_data.h" #include "mongo/bson/bsonobj.h" #include "mongo/db/exec/document_value/document_internal.h" +#include "mongo/db/index/s2_common.h" #include "mongo/db/matcher/expression.h" #include "mongo/db/pipeline/expression_context.h" #include "mongo/db/timeseries/timeseries_gen.h" @@ -49,6 +50,11 @@ namespace mongo::timeseries { +// Optional callback to look up 2dsphere index version for a field. +// When nullptr or not called, the most up to date version (currently v4) is used. +using Get2dsphereIndexVersionFn = std::function( + OperationContext*, const NamespaceString&, StringData)>; + /** * Carries parameters for unpacking a bucket. The order of operations applied to determine which * fields are in the final document are: @@ -216,7 +222,8 @@ public: bool includeMetaField, bool assumeNoMixedSchemaData, IneligiblePredicatePolicy policy, - bool fixedBuckets); + bool fixedBuckets, + Get2dsphereIndexVersionFn get2dsphereIndexVersion = nullptr); /** * Takes predicates that were generated by createPredicatesOnBucketLevelField() and possibly @@ -303,7 +310,8 @@ public: bool includeMetaField, bool assumeNoMixedSchemaData, IneligiblePredicatePolicy policy, - bool fixedBuckets); + bool fixedBuckets, + Get2dsphereIndexVersionFn get2dsphereIndexVersion = nullptr); /** * Splits out a predicate on the meta field from a predicate on the bucket metric field. @@ -335,7 +343,8 @@ public: bool includeMetaField, bool assumeNoMixedSchemaData, IneligiblePredicatePolicy policy, - bool fixedBuckets); + bool fixedBuckets, + Get2dsphereIndexVersionFn get2dsphereIndexVersion = nullptr); bool includeMinTimeAsMetadata = false; bool includeMaxTimeAsMetadata = false; diff --git a/src/mongo/db/query/timeseries/timeseries_translation.cpp b/src/mongo/db/query/timeseries/timeseries_translation.cpp index 9cb1ecda7ba..d86988bf9b3 100644 --- a/src/mongo/db/query/timeseries/timeseries_translation.cpp +++ b/src/mongo/db/query/timeseries/timeseries_translation.cpp @@ -34,6 +34,7 @@ #include "mongo/db/pipeline/document_source_internal_convert_bucket_index_stats.h" #include "mongo/db/pipeline/document_source_internal_unpack_bucket.h" #include "mongo/db/shard_role/shard_catalog/raw_data_operation.h" +#include "mongo/db/timeseries/timeseries_2dsphere_index_version_lookup.h" #include "mongo/db/timeseries/timeseries_index_schema_conversion_functions.h" #include "mongo/db/timeseries/timeseries_options.h" @@ -268,6 +269,30 @@ void translateIndexHintIfRequiredImpl(const boost::intrusive_ptr(source.get()); + if (!unpack) { + continue; + } + if (unpack->has2dsphereIndexVersions()) { + continue; + } + boost::optional> versions = + timeseries::build2dsphereIndexVersionMap(*collPtr.get()); + unpack->set2dsphereIndexVersions(versions); + break; + } +} + } // namespace bool requiresViewlessTimeseriesTranslation(OperationContext* const opCtx, @@ -286,6 +311,12 @@ void translateStagesIfRequired(const boost::intrusive_ptr& ex Pipeline& pipeline, const CollectionOrViewAcquisition& collOrView) { translateStagesIfRequiredImpl(expCtx, pipeline, collOrView); + // For pipelines that arrived pre-translated from a router (where CollectionRoutingInfo was used + // and no index info was available), modify the unpack stage with any collection-related + // metadata. + if (collOrView.isCollection()) { + populateUnpackBucketStagesFromCollection(pipeline, collOrView.getCollectionPtr()); + } } void translateStagesIfRequired(const boost::intrusive_ptr& expCtx, diff --git a/src/mongo/db/query/timeseries/timeseries_translation_test.cpp b/src/mongo/db/query/timeseries/timeseries_translation_test.cpp index 6730eb9939a..758f16a4720 100644 --- a/src/mongo/db/query/timeseries/timeseries_translation_test.cpp +++ b/src/mongo/db/query/timeseries/timeseries_translation_test.cpp @@ -30,12 +30,18 @@ #include "mongo/db/query/timeseries/timeseries_translation.h" #include "mongo/bson/json.h" +#include "mongo/db/index/s2_common.h" +#include "mongo/db/matcher/expression_internal_bucket_geo_within.h" #include "mongo/db/namespace_string.h" #include "mongo/db/pipeline/document_source_internal_unpack_bucket.h" +#include "mongo/db/pipeline/document_source_match.h" #include "mongo/db/pipeline/expression_context_for_test.h" #include "mongo/db/pipeline/pipeline_factory.h" +#include "mongo/db/query/util/make_data_structure.h" +#include "mongo/db/shard_role/shard_catalog/catalog_test_fixture.h" #include "mongo/db/shard_role/shard_catalog/create_collection.h" #include "mongo/db/shard_role/shard_role.h" +#include "mongo/db/timeseries/timeseries_gen.h" #include "mongo/db/timeseries/timeseries_test_fixture.h" #include "mongo/idl/server_parameter_test_controller.h" #include "mongo/unittest/unittest.h" @@ -374,5 +380,79 @@ TEST_F(TimeseriesRewritesTest, BucketsFixedTest) { } } +/** + * Fixture that sets up a timeseries buckets collection with a 2dsphere_bucket index to verify + * the bucket spec uses the index's 2dsphereIndexVersion when creating + * InternalBucketGeoWithinMatchExpression. + */ +class GeoIndexVersionWithCatalogTest : public CatalogTestFixture { +public: + void setUp() override { + CatalogTestFixture::setUp(); + auto opCtx = operationContext(); + + CreateCommand createCmd(_measurementsNss); + createCmd.getCreateCollectionRequest().setTimeseries(TimeseriesOptions("time")); + ASSERT_OK(createCollection(opCtx, createCmd)); + + // Add a 2dsphere_bucket index on data.loc with 2dsphereIndexVersion 3. + const BSONObj indexSpec = + BSON("v" << 2 << "key" << BSON("data.loc" << "2dsphere_bucket") << "name" + << "loc_2dsphere" + << "2dsphereIndexVersion" << 3); + ASSERT_OK(storageInterface()->createIndexesOnEmptyCollection( + opCtx, _measurementsNss.makeTimeseriesBucketsNamespace(), {indexSpec})); + } + + boost::intrusive_ptr getExpCtx() { + return make_intrusive(operationContext(), _measurementsNss); + } + + CollectionOrViewAcquisition getCollAcquisition() { + return acquireCollectionOrView(operationContext(), + CollectionOrViewAcquisitionRequest( + _measurementsNss.makeTimeseriesBucketsNamespace(), + PlacementConcern{boost::none, ShardVersion::UNTRACKED()}, + repl::ReadConcernArgs::get(operationContext()), + AcquisitionPrerequisites::kRead), + MODE_IS); + } + + const NamespaceString _measurementsNss = + NamespaceString::createNamespaceString_forTest("test", "pipeline_test"); +}; + +TEST_F(GeoIndexVersionWithCatalogTest, GeoWithinUses2dsphereBucketIndexVersionFromCatalog) { + // Simulate a pipeline arriving from the router: it has a pre-translated unpack stage but no + // 2dsphere index version info (the router lacks index catalog access). + auto expCtx = getExpCtx(); + auto pipeline = pipeline_factory::makePipeline( + makeVector(fromjson("{$_internalUnpackBucket: {exclude: [], timeField: " + "'time', bucketMaxSpanSeconds: 3600}}"), + fromjson("{$match: {loc: {$geoWithin: {$geometry: {type: \"Polygon\", " + "coordinates: [ [ [ 0, 0 ], [ 3, 6 ], [ 6, 1 ], [ 0, 0 ] ] ]}}}}}")), + expCtx, + pipeline_factory::kOptionsMinimal); + + auto collAcquisition = getCollAcquisition(); + timeseries::translateStagesIfRequired(expCtx, *pipeline, collAcquisition); + + auto& container = pipeline->getSources(); + ASSERT_EQ(container.size(), 2U); + + auto original = dynamic_cast(container.back().get()); + auto predicate = dynamic_cast(container.front().get()) + ->createPredicatesOnBucketLevelField(original->getMatchExpression()); + + ASSERT_TRUE(predicate.loosePredicate); + auto* ibgw = + dynamic_cast(predicate.loosePredicate.get()); + ASSERT_NE(ibgw, nullptr) + << "Expected InternalBucketGeoWithinMatchExpression when $geoWithin is pushed down"; + ASSERT_TRUE(ibgw->getIndexVersion()) + << "Expected index version from catalog when 2dsphere_bucket index exists"; + ASSERT_EQ(*ibgw->getIndexVersion(), S2_INDEX_VERSION_3) + << "Expected 2dsphereIndexVersion 3 from the index we created"; +} } // namespace } // namespace mongo diff --git a/src/mongo/db/timeseries/BUILD.bazel b/src/mongo/db/timeseries/BUILD.bazel index dee44dfe941..c5ccec68a4f 100644 --- a/src/mongo/db/timeseries/BUILD.bazel +++ b/src/mongo/db/timeseries/BUILD.bazel @@ -46,6 +46,26 @@ mongo_cc_library( ], ) +mongo_cc_library( + name = "timeseries_constants", + hdrs = ["timeseries_constants.h"], + deps = [ + "//src/mongo:base", + "//src/mongo/db/commands:create_command", + ], +) + +mongo_cc_library( + name = "timeseries_2dsphere_index_version_lookup", + srcs = ["timeseries_2dsphere_index_version_lookup.cpp"], + deps = [ + ":timeseries_constants", + "//src/mongo/db:server_base", + "//src/mongo/db/shard_role/shard_catalog:collection_catalog", + "//src/mongo/db/shard_role/shard_catalog:index_catalog", + ], +) + mongo_cc_library( name = "timeseries_metadata", srcs = [ @@ -258,6 +278,7 @@ mongo_cc_unit_test( "bucket_compression_test.cpp", "catalog_helper_test.cpp", "collection_pre_conditions_util_test.cpp", + "timeseries_2dsphere_index_version_lookup_test.cpp", "timeseries_collmod_test.cpp", "timeseries_dotted_path_support_test.cpp", "timeseries_extended_range_test.cpp", @@ -275,6 +296,7 @@ mongo_cc_unit_test( deps = [ ":bucket_compression", ":collection_pre_conditions_util", + ":timeseries_2dsphere_index_version_lookup", ":timeseries_conversion_util", ":timeseries_extended_range", ":timeseries_options", diff --git a/src/mongo/db/timeseries/OWNERS.yml b/src/mongo/db/timeseries/OWNERS.yml index af9dbe74a3e..e2aef1207f8 100644 --- a/src/mongo/db/timeseries/OWNERS.yml +++ b/src/mongo/db/timeseries/OWNERS.yml @@ -3,6 +3,9 @@ filters: - "*": approvers: - 10gen/server-collection-write-path + - "timeseries_2dsphere_index_version_lookup*": + approvers: + - 10gen/query-integration-timeseries - "*catalog_helper*": approvers: - 10gen/server-catalog-and-routing-shard-catalog diff --git a/src/mongo/db/timeseries/timeseries_2dsphere_index_version_lookup.cpp b/src/mongo/db/timeseries/timeseries_2dsphere_index_version_lookup.cpp new file mode 100644 index 00000000000..1ae69ef45c5 --- /dev/null +++ b/src/mongo/db/timeseries/timeseries_2dsphere_index_version_lookup.cpp @@ -0,0 +1,71 @@ +/** + * Copyright (C) 2026-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/timeseries/timeseries_2dsphere_index_version_lookup.h" + +#include "mongo/db/index_names.h" +#include "mongo/db/shard_role/shard_catalog/collection.h" +#include "mongo/db/shard_role/shard_catalog/index_catalog.h" +#include "mongo/db/shard_role/shard_catalog/index_descriptor.h" +#include "mongo/db/timeseries/timeseries_constants.h" + +namespace mongo::timeseries { + +StringMap build2dsphereIndexVersionMap(const Collection& coll) { + StringMap result; + const std::string dataPrefix = str::stream() << kBucketDataFieldName << "."; + auto it = coll.getIndexCatalog()->getIndexIterator(IndexCatalog::InclusionPolicy::kReady); + while (it->more()) { + const auto* entry = it->next(); + const auto* desc = entry->descriptor(); + BSONElement versionElt = + desc->infoObj().getField(IndexDescriptor::k2dsphereVersionFieldName); + if (!versionElt.isNumber()) { + continue; + } + long long version = versionElt.numberLong(); + if (version < 1 || version > 4) { + continue; + } + for (auto&& keyElt : desc->keyPattern()) { + if (keyElt.valueStringDataSafe() != IndexNames::GEO_2DSPHERE_BUCKET) { + continue; + } + StringData keyPath = keyElt.fieldNameStringData(); + if (keyPath.size() <= dataPrefix.size() || !keyPath.starts_with(dataPrefix)) { + continue; + } + result.emplace(std::string(keyPath.substr(dataPrefix.size())), + static_cast(version)); + } + } + return result; +} + +} // namespace mongo::timeseries diff --git a/src/mongo/db/timeseries/timeseries_2dsphere_index_version_lookup.h b/src/mongo/db/timeseries/timeseries_2dsphere_index_version_lookup.h new file mode 100644 index 00000000000..cd90ad7ea8e --- /dev/null +++ b/src/mongo/db/timeseries/timeseries_2dsphere_index_version_lookup.h @@ -0,0 +1,48 @@ +/** + * Copyright (C) 2026-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/db/shard_role/shard_role.h" +#include "mongo/util/modules.h" +#include "mongo/util/string_map.h" + +namespace mongo { + +namespace timeseries { + +/** + * Scans all ready indexes on 'coll' and returns a map from user-facing field name to + * 2dsphereIndexVersion for every 2dsphere_bucket index found. When multiple indexes cover the + * same field, the version from the first index encountered is kept. + */ +MONGO_MOD_PUBLIC StringMap build2dsphereIndexVersionMap(const Collection& coll); + +} // namespace timeseries +} // namespace mongo diff --git a/src/mongo/db/timeseries/timeseries_2dsphere_index_version_lookup_test.cpp b/src/mongo/db/timeseries/timeseries_2dsphere_index_version_lookup_test.cpp new file mode 100644 index 00000000000..6beb0cb1965 --- /dev/null +++ b/src/mongo/db/timeseries/timeseries_2dsphere_index_version_lookup_test.cpp @@ -0,0 +1,232 @@ +/** + * Copyright (C) 2026-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/timeseries/timeseries_2dsphere_index_version_lookup.h" + +#include "mongo/base/string_data.h" +#include "mongo/bson/bsonobj.h" +#include "mongo/bson/bsonobjbuilder.h" +#include "mongo/db/index_names.h" +#include "mongo/db/namespace_string.h" +#include "mongo/db/server_options.h" +#include "mongo/db/shard_role/lock_manager/lock_manager_defs.h" +#include "mongo/db/shard_role/shard_catalog/catalog_raii.h" +#include "mongo/db/shard_role/shard_catalog/catalog_test_fixture.h" +#include "mongo/db/shard_role/shard_catalog/collection.h" +#include "mongo/db/shard_role/shard_catalog/index_catalog.h" +#include "mongo/db/shard_role/shard_catalog/index_descriptor.h" +#include "mongo/db/storage/write_unit_of_work.h" +#include "mongo/idl/server_parameter_test_controller.h" +#include "mongo/unittest/unittest.h" + +#include + +#include + +namespace mongo { +namespace { + +/** Builds a minimal ready index spec for a 2dsphere_bucket index on 'bucketKeyPath' (e.g. + * data.loc). */ +BSONObj make2dsphereBucketIndexSpec(StringData indexName, + StringData bucketKeyPath, + BSONObj extraFields = {}) { + BSONObjBuilder bob; + bob.append("v", 2); + bob.append("key", BSON(bucketKeyPath << IndexNames::GEO_2DSPHERE_BUCKET)); + bob.append("name", indexName); + bob.appendElements(extraFields); + return bob.obj(); +} + +class Build2dsphereIndexVersionMapTest : public CatalogTestFixture { +public: + OperationContext* opCtxV() { + return operationContext(); + } + + const CollectionPtr& collPtr() const { + return *_coll.get(); + } + + const Collection& collection() const { + return *collPtr().get(); + } + + void createIndexAssertOk(const BSONObj& spec) { + WriteUnitOfWork wuow(opCtxV()); + CollectionWriter writer{opCtxV(), _coll.get()}; + auto* writable = writer.getWritableCollection(opCtxV()); + auto sw = + writable->getIndexCatalog()->createIndexOnEmptyCollection(opCtxV(), writable, spec); + ASSERT_OK(sw); + wuow.commit(); + } + +protected: + void setUp() override { + CatalogTestFixture::setUp(); + ASSERT_OK(storageInterface()->createCollection(opCtxV(), _nss, {})); + _coll.emplace(opCtxV(), _nss, MODE_X); + } + + void tearDown() override { + _coll.reset(); + CatalogTestFixture::tearDown(); + } + + NamespaceString _nss = NamespaceString::createNamespaceString_forTest("test", "ts_buckets"); + boost::optional _coll; +}; + +/** + * Index specs with 2dsphereIndexVersion 4 are only accepted when the v4 feature flag is enabled + * and FCV is new enough. + */ +class Build2dsphereIndexVersionMapV4Test : public Build2dsphereIndexVersionMapTest { +protected: + void setUp() override { + auto fcvSnap = serverGlobalParams.mutableFCV.acquireFCVSnapshot(); + if (fcvSnap.isVersionInitialized()) { + _fcvBefore = fcvSnap.getVersion(); + } + _enableV4.emplace("featureFlag2dsphereIndexVersion4", true); + // (Generic FCV reference): kLatest so getDefaultS2IndexVersion() is 4 and v4 bucket indexes + // can be created in this test fixture. + serverGlobalParams.mutableFCV.setVersion(multiversion::GenericFCV::kLatest); + Build2dsphereIndexVersionMapTest::setUp(); + } + + void tearDown() override { + Build2dsphereIndexVersionMapTest::tearDown(); + _enableV4.reset(); + if (_fcvBefore) { + serverGlobalParams.mutableFCV.setVersion(*_fcvBefore); + } else { + serverGlobalParams.mutableFCV.reset(); + } + } + +private: + std::optional _enableV4; + std::optional _fcvBefore; +}; + +TEST_F(Build2dsphereIndexVersionMapTest, EmptyCatalogReturnsEmptyMap) { + auto m = timeseries::build2dsphereIndexVersionMap(collection()); + ASSERT(m.empty()); +} + +TEST_F(Build2dsphereIndexVersionMapTest, MapsDataFieldToVersion) { + createIndexAssertOk(make2dsphereBucketIndexSpec( + "loc_2dsphere", "data.loc"_sd, BSON(IndexDescriptor::k2dsphereVersionFieldName << 3))); + + auto m = timeseries::build2dsphereIndexVersionMap(collection()); + ASSERT_EQ(m.size(), 1U); + auto it = m.find("loc"); + ASSERT(it != m.end()); + ASSERT_EQ(it->second, 3); +} + +TEST_F(Build2dsphereIndexVersionMapTest, StripsDataPrefixForNestedPath) { + createIndexAssertOk(make2dsphereBucketIndexSpec( + "geo_2dsphere", "data.geo.sub"_sd, BSON(IndexDescriptor::k2dsphereVersionFieldName << 3))); + + auto m = timeseries::build2dsphereIndexVersionMap(collection()); + ASSERT_EQ(m.size(), 1U); + auto it = m.find("geo.sub"); + ASSERT(it != m.end()); + ASSERT_EQ(it->second, 3); +} + +TEST_F(Build2dsphereIndexVersionMapTest, CompoundIndexMapsOnly2dsphereBucketField) { + BSONObj spec = + BSON("v" << 2 << "key" + << BSON("data.ct" << 1 << "data.loc" << IndexNames::GEO_2DSPHERE_BUCKET) << "name" + << "cmp_ct_loc" << IndexDescriptor::k2dsphereVersionFieldName << 3); + createIndexAssertOk(spec); + + auto m = timeseries::build2dsphereIndexVersionMap(collection()); + ASSERT_EQ(m.size(), 1U); + ASSERT_EQ(m["loc"], 3); +} + +TEST_F(Build2dsphereIndexVersionMapTest, MultipleIndexesAccumulateDistinctFields) { + createIndexAssertOk(make2dsphereBucketIndexSpec( + "a_geo", "data.a"_sd, BSON(IndexDescriptor::k2dsphereVersionFieldName << 3))); + createIndexAssertOk(make2dsphereBucketIndexSpec( + "b_geo", "data.b"_sd, BSON(IndexDescriptor::k2dsphereVersionFieldName << 3))); + + auto m = timeseries::build2dsphereIndexVersionMap(collection()); + ASSERT_EQ(m.size(), 2U); + ASSERT_EQ(m["a"], 3); + ASSERT_EQ(m["b"], 3); +} + +TEST_F(Build2dsphereIndexVersionMapTest, Non2dsphereBucketKeySkipped) { + BSONObj spec = BSON("v" << 2 << "key" << BSON("data.loc" << 1) << "name" + << "loc_1" << IndexDescriptor::k2dsphereVersionFieldName << 3); + createIndexAssertOk(spec); + + auto m = timeseries::build2dsphereIndexVersionMap(collection()); + ASSERT(m.empty()); +} + +TEST_F(Build2dsphereIndexVersionMapTest, KeyNotUnderDataPrefixSkipped) { + // Path must be "data." for the map; a root-level geo field is ignored here. + createIndexAssertOk(make2dsphereBucketIndexSpec( + "root_geo", "loc"_sd, BSON(IndexDescriptor::k2dsphereVersionFieldName << 3))); + + auto m = timeseries::build2dsphereIndexVersionMap(collection()); + ASSERT(m.empty()); +} + +TEST_F(Build2dsphereIndexVersionMapV4Test, MapsDataFieldToVersion4) { + createIndexAssertOk(make2dsphereBucketIndexSpec( + "loc_2dsphere_v4", "data.loc"_sd, BSON(IndexDescriptor::k2dsphereVersionFieldName << 4))); + + auto m = timeseries::build2dsphereIndexVersionMap(collection()); + ASSERT_EQ(m.size(), 1U); + ASSERT_EQ(m["loc"], 4); +} + +TEST_F(Build2dsphereIndexVersionMapV4Test, CompoundIndexMaps2dsphereBucketFieldWithVersion4) { + BSONObj spec = + BSON("v" << 2 << "key" + << BSON("data.ct" << 1 << "data.loc" << IndexNames::GEO_2DSPHERE_BUCKET) << "name" + << "cmp_ct_loc_v4" << IndexDescriptor::k2dsphereVersionFieldName << 4); + createIndexAssertOk(spec); + + auto m = timeseries::build2dsphereIndexVersionMap(collection()); + ASSERT_EQ(m.size(), 1U); + ASSERT_EQ(m["loc"], 4); +} + +} // namespace +} // namespace mongo