SERVER-123276 Update Geodata PBT to stress index maintenance (#50985)

GitOrigin-RevId: 04079db64e5f215384a28fee6baac0ee4e79deb8
This commit is contained in:
Benjamin Pearce 2026-04-09 14:28:19 -04:00 committed by MongoDB Bot
parent fcecda1336
commit a53bb22fb3
9 changed files with 397 additions and 217 deletions

View File

@ -0,0 +1,12 @@
load("//bazel:mongo_js_rules.bzl", "all_subpackage_javascript_files", "mongo_js_library")
package(default_visibility = ["//visibility:public"])
mongo_js_library(
name = "all_javascript_files",
srcs = glob([
"*.js",
]),
)
all_subpackage_javascript_files()

View File

@ -0,0 +1,87 @@
/**
* Leverage mochalite to generate a describe block with necessary hooks for geodata PBTs.
* This is needed to split up long-running test cases into separate files to minimize evergreen timeout.
*/
import {describe, it} from "jstests/libs/mochalite.js";
import {fc} from "jstests/third_party/fast_check/fc-4.6.0.js";
import {makeEmptyModel} from "jstests/write_path/timeseries/pbt/lib/command_grammar.js";
import {assertCollectionsMatch} from "jstests/write_path/timeseries/pbt/lib/assertions.js";
import {getFcAssertArgs} from "jstests/write_path/timeseries/pbt/lib/fast_check_params.js";
import {getTimeseriesCollForRawOps} from "jstests/libs/raw_operation_utils.js";
const fcAssertArgs = getFcAssertArgs();
/**
* The Fixture class maintains the functions for Mochalite and fast-check lifecycle hooks for
* geodata PBTs. Specific test case files should set up the fixture and run it by passing a
* programArbitrary as an argument to `run`.
*/
export class Fixture {
constructor(db, ctrlCollName, tsCollName, metaField, timeField, geoField) {
this.db = db;
this.ctrlCollName = ctrlCollName;
this.tsCollName = tsCollName;
this.metaField = metaField;
this.timeField = timeField;
this.geoField = geoField;
this.beforeHook();
}
// Create fresh collections for the property testing.
beforeHook() {
this.db[this.ctrlCollName].drop();
this.db[this.tsCollName].drop();
this.db.createCollection(this.ctrlCollName);
this.db.createCollection(this.tsCollName, {timeseries: {timeField: this.timeField, metaField: this.metaField}});
this.ctrlColl = this.db.getCollection(this.ctrlCollName);
this.tsColl = this.db.getCollection(this.tsCollName);
this.bucketColl = getTimeseriesCollForRawOps(this.tsColl.getDB(), this.tsColl);
this.ctrlColl.createIndex({[this.geoField]: "2dsphere", [this.metaField]: 1});
this.tsColl.createIndex({[this.geoField]: "2dsphere", [this.metaField]: 1});
}
run(programArb, descriptor, aggregationsArb = fc.constant([[]])) {
describe("Geospatial Comparative PBT for timeseries collections", () => {
it(descriptor, () => {
fc.assert(
fc
.property(programArb, aggregationsArb, (cmds, pipelines) => {
const model = makeEmptyModel(this.ctrlColl, this.bucketColl);
fc.modelRun(
() => ({model: model, real: {tsColl: this.tsColl, ctrlColl: this.ctrlColl}}),
cmds,
);
for (const pipeline of pipelines) {
assertCollectionsMatch(this.tsColl, this.ctrlColl, pipeline);
}
// Next check every point in the collection and ensure that it can be queried using the index
const geoPoints = this.ctrlColl
.aggregate([
{$group: {_id: null, locs: {$push: `$${this.geoField}`}}},
{$unwind: "$locs"},
{$replaceRoot: {newRoot: "$locs"}},
])
.toArray();
for (const geoPoint of geoPoints) {
const intersectPipeline = [
{$match: {[this.geoField]: {$geoIntersects: {$geometry: geoPoint}}}},
];
assert.gt(this.tsColl.aggregate(intersectPipeline).toArray().length, 0, {
error: "Point not found in aggregation",
geoPoint,
});
}
})
.beforeEach(() => this.beforeHook()),
fcAssertArgs,
);
});
});
}
}

View File

@ -0,0 +1,103 @@
/*
* Arbitraries for generating GeoJSON metrics.
*/
import {fc} from "jstests/third_party/fast_check/fc-4.6.0.js";
const defaultLatitudeMin = -90;
const defaultLatitudeMax = 90;
const defaultLongitudeMin = -180;
const defaultLongitudeMax = 180;
/**
* Function for producing realistic values for floating point data. fc.double has a tendency
* to cluster values at extremes.
*
* @param {number} min minimum value
* @param {number} max maximum value
* @param {number} [resolution] number of steps to break integers down into
*/
const normalDistRealArb = (min, max, resolution = 1000.0) => {
return fc
.integer({min: min * resolution, max: max * resolution})
.map((x) => Math.min(Math.max(x / resolution, min), max));
};
/**
* Make a GeoData Point arbitrary.
* @param {Object} [ranges] ranges for longitude and latitude
* @param {Range} [ranges.longitudeRange]
* @param {Range} [ranges.latitudeRange]
*
* @returns {fc.Arbitrary<Object>} an arbitrary to generate a GeoJSON point
*/
export function makeLongLatArb(ranges = {}) {
const longitudeRange = ranges?.longitudeRange ?? {min: defaultLongitudeMin, max: defaultLongitudeMax};
const latitudeRange = ranges?.latitudeRange ?? {min: defaultLatitudeMin, max: defaultLatitudeMax};
return fc.tuple(
normalDistRealArb(longitudeRange.min, longitudeRange.max),
normalDistRealArb(latitudeRange.min, latitudeRange.max),
);
}
/**
* Make a GeoData Point arbitrary.
* @param {Object} [ranges] ranges for longitude and latitude
* @param {Range} [ranges.latitudeRange]
* @param {Range} [ranges.longitudeRange]
*
* @returns {fc.Arbitrary<Object>} an arbitrary to generate a GeoJSON point
*/
export function makeGeoPointArb(ranges = {}) {
return makeLongLatArb(ranges).map(([long, lat]) => {
return {type: "Point", coordinates: [long, lat]};
});
}
/**
* Factory to make a GeoData Point arbitrary.
* @param {function} [prototype]
* @param {Object} [options] options to forward to the prototype
*
* @returns {fc.Arbitrary<Object>} an arbitrary to generate a GeoJSON point
*/
export function makeGeoArbFactory(prototype, options = {}) {
return function () {
return prototype(options);
};
}
/**
* Make an arbitrary that generates a $geoNear aggregation pipeline using spherical distance
* (equivalent to $nearSphere). Works with both timeseries and regular collections.
*
* @param {string} geoField geospatial data field (used as the index key)
* @param {number} [maxDistanceMeters] maximum distance in meters
* @param {number} [minDistanceMeters] minimum distance in meters
* @param {Object} [ranges]
* @param {Range} [ranges.latitudeRange]
* @param {Range} [ranges.longitudeRange]
* @returns {fc.Arbitrary<Array>} an arbitrary that generates a $geoNear aggregation pipeline
*/
export function makeGeospatialAggregationPipelineArb(
geoField,
maxDistanceMeters = 45_000_000,
minDistanceMeters = 0.0,
ranges = {},
) {
const radiusArb = normalDistRealArb(minDistanceMeters, maxDistanceMeters);
const longLatArb = makeLongLatArb(ranges);
return fc.tuple(radiusArb, longLatArb).map(([maxDist, [long, lat]]) => {
return [
{
$geoNear: {
near: {type: "Point", coordinates: [long, lat]},
distanceField: "dist",
maxDistance: maxDist,
key: geoField,
spherical: true,
},
},
];
});
}

View File

@ -0,0 +1,85 @@
/**
* A property-based test that exercises geonear queries against timeseries collections.
*
* @tags: [
* query_intensive_pbt,
* requires_timeseries,
* # Runs queries that may return many results, requiring getmores.
* requires_getmore,
* # This test runs commands that are not allowed with security token: setParameter.
* not_allowed_with_signed_security_token,
* ]
*/
import {fc} from "jstests/third_party/fast_check/fc-4.6.0.js";
import {
makeBatchInsertCommandArb,
makeDeleteByFilterCommandArb,
makeDeleteByRandomIdCommandArb,
makeInsertCommandArb,
} from "jstests/write_path/timeseries/pbt/lib/command_arbitraries.js";
import {
makeGeoArbFactory,
makeGeoPointArb,
makeGeospatialAggregationPipelineArb,
} from "jstests/write_path/timeseries/pbt/geodata/geodata_arbitraries.js";
import {getFcParams} from "jstests/write_path/timeseries/pbt/lib/fast_check_params.js";
import {Fixture} from "jstests/write_path/timeseries/pbt/geodata/generate_pbt_fixture.js";
const fcParams = getFcParams();
const geoField = "loc";
const timeField = "ts";
const metaField = "meta";
const metaValue = "geospatial";
const ctrlCollName = jsTestName() + "_control";
const tsCollName = jsTestName() + "_timeseries";
const fixture = new Fixture(db, ctrlCollName, tsCollName, metaField, timeField, geoField);
const minDate = new Date("2026-03-25T00:00:00Z");
let maxDate = new Date(minDate);
maxDate.setHours(minDate.getHours() + 1);
const insertOptions = {
explicitArbitraries: {
[geoField]: makeGeoArbFactory(makeGeoPointArb, {
longitudeRange: {min: -5.0, max: 5.0},
latitudeRange: {min: -5.0, max: 5.0},
}),
},
dateRange: {min: minDate, max: maxDate},
};
const [minFields, maxFields, minDocs, maxDocs] = [1, 3, 0, 100];
const insertArb = makeInsertCommandArb(timeField, metaField, metaValue, minFields, maxFields, insertOptions);
const batchInsertArb = makeBatchInsertCommandArb(
timeField,
metaField,
metaValue,
minFields,
maxFields,
minDocs,
maxDocs,
insertOptions,
);
const deleteArb = makeDeleteByRandomIdCommandArb();
const programArb = fc.commands(
[insertArb, batchInsertArb, deleteArb],
fcParams.maxCommands || 200,
fcParams.replayPath,
);
const maxDistanceMeters = 10_000;
const aggregationCountRange = {minLength: 5, maxLength: 60};
const aggregationsArb = fc.array(
makeGeospatialAggregationPipelineArb(geoField, maxDistanceMeters),
aggregationCountRange,
);
fixture.run(
programArb,
"produces equal $geoNear aggregations across Insert, BatchInsert, and Delete Commands",
aggregationsArb,
);

View File

@ -0,0 +1,48 @@
/**
* A property-based test that exercises geodata sync in timeseries collections.
*
* @tags: [
* query_intensive_pbt,
* requires_timeseries,
* # Runs queries that may return many results, requiring getmores.
* requires_getmore,
* # This test runs commands that are not allowed with security token: setParameter.
* not_allowed_with_signed_security_token,
* ]
*/
import {makeTimeseriesCommandSequenceArb} from "jstests/write_path/timeseries/pbt/lib/command_arbitraries.js";
import {makeGeoArbFactory, makeGeoPointArb} from "jstests/write_path/timeseries/pbt/geodata/geodata_arbitraries.js";
import {getFcParams} from "jstests/write_path/timeseries/pbt/lib/fast_check_params.js";
import {Fixture} from "jstests/write_path/timeseries/pbt/geodata/generate_pbt_fixture.js";
const fcParams = getFcParams();
const geoField = "loc";
const timeField = "ts";
const metaField = "meta";
const metaValue = "geospatial";
const ctrlCollName = jsTestName() + "_control";
const tsCollName = jsTestName() + "_timeseries";
const fixture = new Fixture(db, ctrlCollName, tsCollName, metaField, timeField, geoField);
const programArb = makeTimeseriesCommandSequenceArb(
/* minCommands */ fcParams.minCommands || 1,
/* maxCommands */ fcParams.maxCommands || 30,
/* timeField */ timeField,
/* metaField */ metaField,
/* metaValue */ metaValue,
/* minFields */ 1,
/* maxFields */ 1,
/* minDocs */ 0,
/* maxDocs */ 100,
/* options */ {
explicitArbitraries: {[geoField]: makeGeoArbFactory(makeGeoPointArb)},
},
/* fieldNameArb */ undefined, // use default short-string field names
/* replayPath */ fcParams.replayPath,
);
fixture.run(programArb, "keeps tsColl and ctrlColl in sync under insert/batch-insert/delete of GeoPoint data");

View File

@ -2,6 +2,7 @@
* Assertions PBT
*/
import {getWinningPlanFromExplain} from "jstests/libs/query/analyze_plan.js";
import {getTimeseriesCollForRawOps} from "jstests/libs/raw_operation_utils.js";
/**
@ -16,18 +17,33 @@ export function assertCollectionValid(tsColl) {
}
/**
* Compare the results of a query against a timeseries collection and standard collection which should be identical.
* Compare the results of an aggregation pipeline against a timeseries collection and standard
* collection which should be identical. An empty pipeline selects all documents, equivalent to
* find({}). Pipelines beginning with $geoNear are also supported.
*
* @param {DBCollection} tsColl timeseries collection representing "actual" state
* @param {DBCollection} ctrlColl standard collection uses as control representing "expected" state
* @param {DBCollection} bucketColl raw timeseries bucket collection
* @param {Object} [query] query specification
* @param {Array} [pipeline] aggregation pipeline to run against both collections
* @param {boolean} [verbose] enable verbose output of the query results and query spec for both collections, useful for debugging
*/
export function assertCollectionsMatch(tsColl, ctrlColl, query = {}) {
export function assertCollectionsMatch(tsColl, ctrlColl, pipeline = [], verbose = false) {
const timeField = tsColl.getMetadata().options.timeseries.timeField;
const metaField = tsColl.getMetadata().options.timeseries.metaField;
const tsDocs = tsColl.find(query).sort({_id: 1}).toArray();
const ctrlDocs = ctrlColl.find(query).sort({_id: 1}).toArray();
const sorted = [...pipeline, {$sort: {_id: 1}}];
const tsDocs = tsColl.aggregate(sorted).toArray();
const ctrlDocs = ctrlColl.aggregate(sorted).toArray();
if (verbose) {
const tsExplain = tsColl.explain().aggregate(pipeline);
const ctrlExplain = ctrlColl.explain().aggregate(pipeline);
jsTest.log.info("Collection comparison aggregation results", {
tsDocs,
ctrlDocs,
pipeline,
tsWinningPlan: getWinningPlanFromExplain(tsExplain),
ctrlWinningPlan: getWinningPlanFromExplain(ctrlExplain),
});
}
if (tsDocs.length != ctrlDocs.length) {
jsTest.log.warning("The tsColl and ctrlColl size differs", {

View File

@ -1,85 +0,0 @@
/*
* Arbitraries for generating GeoJSON metrics.
*/
import {fc} from "jstests/third_party/fast_check/fc-4.6.0.js";
const defaultLatitudeMin = -90.0;
const defaultLatitudeMax = 90.0;
const defaultLongitudeMin = -180.0;
const defaultLongitudeMax = 180.0;
const doubleArbOpts = {
minExcluded: false,
maxExcluded: false,
noNaN: true,
noDefaultInfinity: false,
};
/**
* Make a GeoData Point arbitrary.
* @param {Object} [ranges] ranges for longitude and latitude
* @param {Range} [ranges.latitudeRange]
* @param {Range} [ranges.longitudeRange]
*
* @returns {fc.Arbitrary<Object>} an arbitrary to generate a GeoJSON point
*/
export function makeLongLatArb(ranges = {}) {
const latitudeRange = ranges?.latitudeRange ?? {min: defaultLatitudeMin, max: defaultLatitudeMax};
const longitudeRange = ranges?.longitudeRange ?? {min: defaultLongitudeMin, max: defaultLongitudeMax};
return fc.tuple(
fc.double({min: longitudeRange.min, max: longitudeRange.max, ...doubleArbOpts}),
fc.double({min: latitudeRange.min, max: latitudeRange.max, ...doubleArbOpts}),
);
}
/**
* Make a GeoData Point arbitrary.
* @param {Object} [ranges] ranges for longitude and latitude
* @param {Range} [ranges.latitudeRange]
* @param {Range} [ranges.longitudeRange]
*
* @returns {fc.Arbitrary<Object>} an arbitrary to generate a GeoJSON point
*/
export function makeGeoPointArb(ranges = {}) {
return fc.record({
type: fc.constant("Point"),
coordinates: makeLongLatArb(ranges),
});
}
/**
* Factory to make a GeoData Point arbitrary.
* @param {function} [prototype]
* @param {Object} [options] options to forward to the prototype
*
* @returns {fc.Arbitrary<Object>} an arbitrary to generate a GeoJSON point
*/
export function makeGeoArbFactory(prototype, options = {}) {
return function () {
return prototype(options);
};
}
/**
*
* @param {string} geoField geospatial data field
* @param {number | undefined} [maxDistanceMeters]
* @param {number} [minDistanceMeters]
* @param {Object} [ranges]
* @param {Range} [ranges.latitudeRange]
* @param {Range} [ranges.longitudeRange]
* @returns {fc.Arbitrary<Object>} an arbitrary to generate a geoWithin query
*/
export function makeGeospatialQueryArb(geoField, maxDistanceMeters = undefined, minDistanceMeters = 0.0, ranges = {}) {
return fc.record({
[geoField]: fc.record({
"$geoWithin": fc.record({
"$centerSphere": fc.tuple(
makeLongLatArb(ranges),
fc.double({min: minDistanceMeters, max: maxDistanceMeters, noNaN: true}),
),
}),
}),
});
}

View File

@ -0,0 +1,40 @@
/**
* This is a sampling test of geodata arbitraries.
* It can be run with the no_passthrough suite to test GeoJSON point and query generation.
*/
import {describe, it} from "jstests/libs/mochalite.js";
import {fc} from "jstests/third_party/fast_check/fc-4.6.0.js";
import {
makeGeoPointArb,
makeGeospatialQueryArb,
makeLongLatArb,
} from "jstests/write_path/timeseries/pbt/geodata/geodata_arbitraries.js";
describe("Geodata arbitraries", () => {
it("makeLongLatArb creates longitude/latitude coordinate pairs", () => {
const samples = fc.sample(makeLongLatArb(), 20);
jsTest.log.info({samples});
});
it("makeGeoPointArb creates GeoJSON Point objects", () => {
const samples = fc.sample(makeGeoPointArb(), 20);
jsTest.log.info({samples});
});
it("makeGeoPointArb creates GeoJSON Points with restricted ranges", () => {
const samples = fc.sample(
makeGeoPointArb({
longitudeRange: {min: -5.0, max: 5.0},
latitudeRange: {min: -5.0, max: 5.0},
}),
20,
);
jsTest.log.info({samples});
});
it("makeGeospatialQueryArb creates $geoWithin query specs", () => {
const samples = fc.sample(makeGeospatialQueryArb("loc", 10_000), 20);
jsTest.log.info({samples});
});
});

View File

@ -1,126 +0,0 @@
/**
* A property-based test that exercises geodata in timeseries collection.
*
* @tags: [
* query_intensive_pbt,
* requires_timeseries,
* # Runs queries that may return many results, requiring getmores.
* requires_getmore,
* # This test runs commands that are not allowed with security token: setParameter.
* not_allowed_with_signed_security_token,
* ]
*/
import {describe, it} from "jstests/libs/mochalite.js";
import {fc} from "jstests/third_party/fast_check/fc-4.6.0.js";
import {makeEmptyModel} from "jstests/write_path/timeseries/pbt/lib/command_grammar.js";
import {makeTimeseriesCommandSequenceArb} from "jstests/write_path/timeseries/pbt/lib/command_arbitraries.js";
import {
makeGeoArbFactory,
makeGeoPointArb,
makeGeospatialQueryArb,
} from "jstests/write_path/timeseries/pbt/lib/geodata_arbitraries.js";
import {assertCollectionValid, assertCollectionsMatch} from "jstests/write_path/timeseries/pbt/lib/assertions.js";
import {getFcParams, getFcAssertArgs} from "jstests/write_path/timeseries/pbt/lib/fast_check_params.js";
import {getTimeseriesCollForRawOps} from "jstests/libs/raw_operation_utils.js";
const fcParams = getFcParams();
const fcAssertArgs = getFcAssertArgs();
const ctrlCollName = jsTestName() + "_control";
const tsCollName = jsTestName() + "_timeseries";
const geoField = "loc";
const timeField = "ts";
const metaField = "meta";
const metaValue = "geospatial";
describe("Geospatial Query Comparative Test for Timeseries", () => {
let tsColl;
let ctrlColl;
let bucketColl;
const beforeHook = () => {
db[ctrlCollName].drop();
db[tsCollName].drop();
db.createCollection(ctrlCollName);
db.createCollection(tsCollName, {timeseries: {timeField: "ts", metaField: "meta"}});
ctrlColl = db.getCollection(ctrlCollName);
tsColl = db.getCollection(tsCollName);
bucketColl = getTimeseriesCollForRawOps(tsColl.getDB(), tsColl);
// This test needs to create 2dsphere indexes to properly exercise the timeseries write path.
ctrlColl.createIndex({[geoField]: "2dsphere"});
tsColl.createIndex({[geoField]: "2dsphere"});
};
it("keeps tsColl and ctrlColl in sync under insert/batch-insert/delete of GeoPoint data", () => {
const programArb = makeTimeseriesCommandSequenceArb(
/* minCommands */ fcParams.minCommands || 1,
/* maxCommands */ fcParams.maxCommands || 30,
/* timeField */ timeField,
/* metaField */ metaField,
/* metaValue */ metaValue,
/* minFields */ 1,
/* maxFields */ 1,
/* minDocs */ 0,
/* maxDocs */ 10,
/* options */ {
explicitArbitraries: {[geoField]: makeGeoArbFactory(makeGeoPointArb)},
},
/* fieldNameArb */ undefined, // use default short-string field names
/* replayPath */ fcParams.replayPath,
);
fc.assert(
fc
.property(programArb, (cmds) => {
const model = makeEmptyModel(ctrlColl, bucketColl);
fc.modelRun(() => ({model: model, real: {tsColl, ctrlColl}}), cmds);
assertCollectionsMatch(tsColl, ctrlColl);
assertCollectionValid(tsColl);
})
.beforeEach(beforeHook),
fcAssertArgs,
);
});
it("produces equal geonear queries", () => {
const programArb = makeTimeseriesCommandSequenceArb(
/* minCommands */ fcParams.minCommands || 1,
/* maxCommands */ fcParams.maxCommands || 30,
/* timeField */ timeField,
/* metaField */ metaField,
/* metaValue */ metaValue,
/* minFields */ 1,
/* maxFields */ 1,
/* minDocs */ 0,
/* maxDocs */ 10,
/* options */ {
explicitArbitraries: {[geoField]: makeGeoArbFactory(makeGeoPointArb)},
},
/* fieldNameArb */ undefined, // use default short-string field names
/* replayPath */ fcParams.replayPath,
);
fc.assert(
fc
.property(
programArb,
fc.array(makeGeospatialQueryArb(geoField, 10000), {minLength: 1, maxLength: 40}),
(cmds, queries) => {
const model = makeEmptyModel(ctrlColl, bucketColl);
fc.modelRun(() => ({model: model, real: {tsColl, ctrlColl}}), cmds);
for (const query of queries) {
assertCollectionsMatch(tsColl, ctrlColl, query);
}
assertCollectionValid(tsColl);
},
)
.beforeEach(beforeHook),
fcAssertArgs,
);
});
});