SERVER-124097 Report that $function might use RNG (#53943)
GitOrigin-RevId: f2ab725f6045ca3cbb5da91bc23f54f03e3057f2
This commit is contained in:
parent
16889ae975
commit
04de6c9fb5
@ -680,47 +680,3 @@ runTest({
|
||||
expected: [{b: 0, x: [{fromLookup: 1}]}],
|
||||
policy: "always",
|
||||
});
|
||||
|
||||
// TODO(SERVER-124097): Fix this incorrect rewrite.
|
||||
// it("$function side effects: outer $set with $function not hoisted before $lookup", function () {
|
||||
// const coll = db.hoist_computation;
|
||||
// coll.drop();
|
||||
// coll.insertOne({c: 1});
|
||||
//
|
||||
// const readDistinctTime = function () {
|
||||
// var now = Date.now();
|
||||
// while (Date.now() === now) {}
|
||||
// return new Date(now);
|
||||
// };
|
||||
//
|
||||
// const timeFunction = {$function: {body: readDistinctTime, args: [], lang: "js"}};
|
||||
//
|
||||
// // The first $set is nested inside the $lookup sub-pipeline and captures time1.
|
||||
// // It spins until the clock ticks so time1 is at a strictly earlier millisecond.
|
||||
// // The outer $set captures time2 after the lookup completes.
|
||||
// // If the optimizer hoisted the outer $set before the $lookup, time2 < time1, which is wrong.
|
||||
// const pipeline = [
|
||||
// {
|
||||
// $lookup: {
|
||||
// from: "hoist_computation_secondary",
|
||||
// pipeline: [{$set: {time1: timeFunction}}],
|
||||
// as: "result",
|
||||
// },
|
||||
// },
|
||||
// {$set: {time2: timeFunction}},
|
||||
// ];
|
||||
//
|
||||
// // Verify the outer $set is not hoisted before the $lookup.
|
||||
// const explain = coll.explain().aggregate(pipeline);
|
||||
// const stageTypes = extractUserStages(explain).map((s) => Object.keys(s)[0]);
|
||||
// assert.eq(stageTypes, ["$lookup", "$set"], tojson({stageTypes}));
|
||||
//
|
||||
// // Verify execution order: time1 (set inside the lookup) must precede time2 (set after).
|
||||
// const [
|
||||
// {
|
||||
// result: [{time1}],
|
||||
// time2,
|
||||
// },
|
||||
// ] = coll.aggregate(pipeline).toArray();
|
||||
// assert.lt(time1.getTime(), time2.getTime(), tojson({time1, time2}));
|
||||
// });
|
||||
|
||||
@ -0,0 +1,74 @@
|
||||
/**
|
||||
* Tests that $set stages containing $function expressions are not hoisted before $lookup, because
|
||||
* $function may have side-effects or depend on execution order.
|
||||
*
|
||||
* @tags: [
|
||||
* requires_scripting,
|
||||
* requires_pipeline_optimization,
|
||||
* # Uses a knob (internalQueryTransformHoistPolicy) that does not exist on older binaries.
|
||||
* multiversion_incompatible,
|
||||
* assumes_unsharded_collection,
|
||||
* assumes_stable_shard_list,
|
||||
* # Wrapping in $facet changes the explain structure so the stage-order check below cannot
|
||||
* # inspect the original pipeline stages.
|
||||
* do_not_wrap_aggregations_in_facets,
|
||||
* # featureFlagImprovedDepsAnalysis behavior can differ across FCV boundaries.
|
||||
* cannot_run_during_upgrade_downgrade,
|
||||
* ]
|
||||
*/
|
||||
import {it} from "jstests/libs/mochalite.js";
|
||||
|
||||
db.hoist_computation_function_secondary.drop();
|
||||
assert.commandWorked(db.hoist_computation_function_secondary.insert({}));
|
||||
|
||||
it("$function side effects: outer $set with $function not hoisted before $lookup", function () {
|
||||
const coll = db.hoist_computation_function;
|
||||
coll.drop();
|
||||
coll.insertOne({c: 1});
|
||||
|
||||
const readDistinctTime = function () {
|
||||
var now = Date.now();
|
||||
while (Date.now() === now) {}
|
||||
return new Date(now);
|
||||
};
|
||||
|
||||
const timeFunction = {$function: {body: readDistinctTime, args: [], lang: "js"}};
|
||||
|
||||
// The first $set is nested inside the $lookup sub-pipeline and captures time1.
|
||||
// It spins until the clock ticks so time1 is at a strictly earlier millisecond.
|
||||
// The outer $set captures time2 after the lookup completes.
|
||||
// If the optimizer hoisted the outer $set before the $lookup, time2 < time1, which is wrong.
|
||||
const pipeline = [
|
||||
{
|
||||
$lookup: {
|
||||
from: "hoist_computation_function_secondary",
|
||||
pipeline: [{$set: {time1: timeFunction}}],
|
||||
as: "result",
|
||||
},
|
||||
},
|
||||
{$set: {time2: timeFunction}},
|
||||
];
|
||||
|
||||
// Verify the outer $set is not hoisted before the $lookup.
|
||||
// In a mongos passthrough the stages live under explain.shards[...].stages, not at the top
|
||||
// level, so explain.stages is absent; skip the stage-order check and rely on the timing
|
||||
// check below for correctness.
|
||||
const explain = coll.explain().aggregate(pipeline);
|
||||
if (explain.stages) {
|
||||
const stages = explain.stages.map((s) => Object.keys(s)[0]);
|
||||
assert.eq(
|
||||
stages.filter((s) => s === "$lookup" || s === "$set"),
|
||||
["$lookup", "$set"],
|
||||
{stages},
|
||||
);
|
||||
}
|
||||
|
||||
// Verify execution order: time1 (set inside the lookup) must precede time2 (set after).
|
||||
const [
|
||||
{
|
||||
result: [{time1}],
|
||||
time2,
|
||||
},
|
||||
] = coll.aggregate(pipeline).toArray();
|
||||
assert.lt(time1.getTime(), time2.getTime(), tojson({time1, time2}));
|
||||
});
|
||||
@ -260,9 +260,10 @@ struct MONGO_MOD_NEEDS_REPLACEMENT DepsTracker {
|
||||
|
||||
bool needWholeDocument = false; // If true, ignore 'fields'; the whole document is needed.
|
||||
|
||||
// The output of some operators (such as $sample and $rand) depends on a source of fresh random
|
||||
// numbers. During execution this dependency is implicit, but during optimize() we need to know
|
||||
// about this dependency to decide whether it's ok to cache or reevaluate an operator.
|
||||
// The output of some operators depends on a source of fresh random numbers or other
|
||||
// non-deterministic state (e.g. $sample, $rand, $function, $_internalJsEmit). During
|
||||
// execution this dependency is implicit, but during optimize() we need to know about it
|
||||
// to decide whether it is safe to cache, reevaluate, or reorder an operator.
|
||||
bool needRandomGenerator = false;
|
||||
|
||||
private:
|
||||
|
||||
@ -255,6 +255,16 @@ public:
|
||||
_deps->needRandomGenerator = true;
|
||||
}
|
||||
|
||||
void visit(const ExpressionFunction*) final {
|
||||
// JavaScript functions may call Math.random(), Date.now(), or other non-deterministic
|
||||
// mechanisms that make reordering or caching unsafe.
|
||||
_deps->needRandomGenerator = true;
|
||||
}
|
||||
|
||||
void visit(const ExpressionInternalJsEmit*) final {
|
||||
_deps->needRandomGenerator = true;
|
||||
}
|
||||
|
||||
void visit(const ExpressionFieldPath* expr) final {
|
||||
if (!expr->isVariableReference()) { // includes CURRENT when it is equivalent to ROOT.
|
||||
if (expr->getFieldPath().getPathLength() == 1) {
|
||||
|
||||
Loading…
Reference in New Issue
Block a user