SERVER-124097 Report that $function might use RNG (#53943)

GitOrigin-RevId: f2ab725f6045ca3cbb5da91bc23f54f03e3057f2
This commit is contained in:
Vesko Karaganev 2026-05-20 09:52:55 +01:00 committed by MongoDB Bot
parent 16889ae975
commit 04de6c9fb5
4 changed files with 88 additions and 47 deletions

View File

@ -680,47 +680,3 @@ runTest({
expected: [{b: 0, x: [{fromLookup: 1}]}],
policy: "always",
});
// TODO(SERVER-124097): Fix this incorrect rewrite.
// it("$function side effects: outer $set with $function not hoisted before $lookup", function () {
// const coll = db.hoist_computation;
// coll.drop();
// coll.insertOne({c: 1});
//
// const readDistinctTime = function () {
// var now = Date.now();
// while (Date.now() === now) {}
// return new Date(now);
// };
//
// const timeFunction = {$function: {body: readDistinctTime, args: [], lang: "js"}};
//
// // The first $set is nested inside the $lookup sub-pipeline and captures time1.
// // It spins until the clock ticks so time1 is at a strictly earlier millisecond.
// // The outer $set captures time2 after the lookup completes.
// // If the optimizer hoisted the outer $set before the $lookup, time2 < time1, which is wrong.
// const pipeline = [
// {
// $lookup: {
// from: "hoist_computation_secondary",
// pipeline: [{$set: {time1: timeFunction}}],
// as: "result",
// },
// },
// {$set: {time2: timeFunction}},
// ];
//
// // Verify the outer $set is not hoisted before the $lookup.
// const explain = coll.explain().aggregate(pipeline);
// const stageTypes = extractUserStages(explain).map((s) => Object.keys(s)[0]);
// assert.eq(stageTypes, ["$lookup", "$set"], tojson({stageTypes}));
//
// // Verify execution order: time1 (set inside the lookup) must precede time2 (set after).
// const [
// {
// result: [{time1}],
// time2,
// },
// ] = coll.aggregate(pipeline).toArray();
// assert.lt(time1.getTime(), time2.getTime(), tojson({time1, time2}));
// });

View File

@ -0,0 +1,74 @@
/**
* Tests that $set stages containing $function expressions are not hoisted before $lookup, because
* $function may have side-effects or depend on execution order.
*
* @tags: [
* requires_scripting,
* requires_pipeline_optimization,
* # Uses a knob (internalQueryTransformHoistPolicy) that does not exist on older binaries.
* multiversion_incompatible,
* assumes_unsharded_collection,
* assumes_stable_shard_list,
* # Wrapping in $facet changes the explain structure so the stage-order check below cannot
* # inspect the original pipeline stages.
* do_not_wrap_aggregations_in_facets,
* # featureFlagImprovedDepsAnalysis behavior can differ across FCV boundaries.
* cannot_run_during_upgrade_downgrade,
* ]
*/
import {it} from "jstests/libs/mochalite.js";
db.hoist_computation_function_secondary.drop();
assert.commandWorked(db.hoist_computation_function_secondary.insert({}));
it("$function side effects: outer $set with $function not hoisted before $lookup", function () {
const coll = db.hoist_computation_function;
coll.drop();
coll.insertOne({c: 1});
const readDistinctTime = function () {
var now = Date.now();
while (Date.now() === now) {}
return new Date(now);
};
const timeFunction = {$function: {body: readDistinctTime, args: [], lang: "js"}};
// The first $set is nested inside the $lookup sub-pipeline and captures time1.
// It spins until the clock ticks so time1 is at a strictly earlier millisecond.
// The outer $set captures time2 after the lookup completes.
// If the optimizer hoisted the outer $set before the $lookup, time2 < time1, which is wrong.
const pipeline = [
{
$lookup: {
from: "hoist_computation_function_secondary",
pipeline: [{$set: {time1: timeFunction}}],
as: "result",
},
},
{$set: {time2: timeFunction}},
];
// Verify the outer $set is not hoisted before the $lookup.
// In a mongos passthrough the stages live under explain.shards[...].stages, not at the top
// level, so explain.stages is absent; skip the stage-order check and rely on the timing
// check below for correctness.
const explain = coll.explain().aggregate(pipeline);
if (explain.stages) {
const stages = explain.stages.map((s) => Object.keys(s)[0]);
assert.eq(
stages.filter((s) => s === "$lookup" || s === "$set"),
["$lookup", "$set"],
{stages},
);
}
// Verify execution order: time1 (set inside the lookup) must precede time2 (set after).
const [
{
result: [{time1}],
time2,
},
] = coll.aggregate(pipeline).toArray();
assert.lt(time1.getTime(), time2.getTime(), tojson({time1, time2}));
});

View File

@ -260,9 +260,10 @@ struct MONGO_MOD_NEEDS_REPLACEMENT DepsTracker {
bool needWholeDocument = false; // If true, ignore 'fields'; the whole document is needed.
// The output of some operators (such as $sample and $rand) depends on a source of fresh random
// numbers. During execution this dependency is implicit, but during optimize() we need to know
// about this dependency to decide whether it's ok to cache or reevaluate an operator.
// The output of some operators depends on a source of fresh random numbers or other
// non-deterministic state (e.g. $sample, $rand, $function, $_internalJsEmit). During
// execution this dependency is implicit, but during optimize() we need to know about it
// to decide whether it is safe to cache, reevaluate, or reorder an operator.
bool needRandomGenerator = false;
private:

View File

@ -255,6 +255,16 @@ public:
_deps->needRandomGenerator = true;
}
void visit(const ExpressionFunction*) final {
// JavaScript functions may call Math.random(), Date.now(), or other non-deterministic
// mechanisms that make reordering or caching unsafe.
_deps->needRandomGenerator = true;
}
void visit(const ExpressionInternalJsEmit*) final {
_deps->needRandomGenerator = true;
}
void visit(const ExpressionFieldPath* expr) final {
if (!expr->isVariableReference()) { // includes CURRENT when it is equivalent to ROOT.
if (expr->getFieldPath().getPathLength() == 1) {