SERVER-123708 DeadlineMonitor in WAsm Scope, assorted fixes related to Scope.kill() (#51535)

GitOrigin-RevId: 0c3bc53737646faf77d42f721037669821de7a90
This commit is contained in:
Samuel Mercier 2026-04-28 13:08:02 -04:00 committed by MongoDB Bot
parent ac3877e112
commit 937795e2db
10 changed files with 181 additions and 39 deletions

View File

@ -11,8 +11,6 @@
* requires_getmore,
* # TODO: SERVER-119660 Ensure server_status_with_time_out_cursors.js does not leak cursors.
* can_leak_idle_cursors,
* # TODO SERVER-116054: Add support for $where.
* mozjs_wasm_unsupported,
* ]
*/
export const $config = (function () {

View File

@ -5,8 +5,6 @@
* @tags: [
* requires_sharding,
* requires_scripting,
* # TODO SERVER-116054: Add support for $where.
* mozjs_wasm_unsupported,
* ]
*/
import {FixtureHelpers} from "jstests/libs/fixture_helpers.js";
@ -206,8 +204,8 @@ function executeTest(db, isMongos) {
//
// Many-batch negative test for getmore:
// - Issue a many-batch find() with a 20-second time limit where the results take 10 seconds to
// generate; the find() should not hit the time limit.
// - Issue a many-batch find() with a 35-second time limit where the results take 10 seconds
// (+ overhead) to generate; the find() should not hit the time limit.
//
(function manyBatchNegativeGetMore() {
const t = db.many_batch_negative_get_more;
@ -225,7 +223,7 @@ function executeTest(db, isMongos) {
})
.sort({_id: 1});
cursor.batchSize(3);
cursor.maxTimeMS(20 * 1000);
cursor.maxTimeMS(35 * 1000);
assert.doesNotThrow(
function () {
// SERVER-40305: Add some additional logging here in case this fails to help us track

View File

@ -2,9 +2,6 @@
* Tests where/function can be interrupted through maxTimeMS and query knob.
* @tags: [
* requires_scripting,
* # TODO SERVER-116052: Add support for $function.
* # TODO SERVER-116054: Add support for $where.
* mozjs_wasm_unsupported,
* ]
*/
const mongodOptions = {};

View File

@ -161,6 +161,11 @@ bool MozJSWasmBridge::_callFuncNoArgs(wc::Func& func, wc::Val* results, size_t n
wt::Result<std::monostate> callResult = func.call(getContext(), empty, resultsSpan);
if (!callResult) {
_state.store(State::Trapped);
// If we are pending a kill we either timed out or got killed.
// Throw interrupted to indicate this to the caller.
if (isKillPending()) {
uasserted(ErrorCodes::Interrupted, callResult.err().message());
}
uasserted(kWasmtimeTrapErrorCode, callResult.err().message());
}
auto postResult = func.post_return(getContext());

View File

@ -151,6 +151,11 @@ private:
wt::Result<std::monostate> callResult = func.call(getContext(), argsSpan, resultsSpan);
if (!callResult) {
_state.store(State::Trapped);
// If we are pending a kill we either timed out or got killed.
// Throw interrupted to indicate this to the caller.
if (isKillPending()) {
uasserted(ErrorCodes::Interrupted, callResult.err().message());
}
uasserted(kWasmtimeTrapErrorCode, callResult.err().message());
}
auto postResult = func.post_return(getContext());

View File

@ -33,6 +33,7 @@
#include "mongo/bson/bsonobjbuilder.h"
#include "mongo/bson/bsontypes_util.h"
#include "mongo/scripting/config_engine_gen.h"
#include "mongo/scripting/deadline_monitor.h"
#include "mongo/scripting/mozjs/wasm/wasmtime_engine.h"
#include "mongo/util/str.h"
@ -51,13 +52,33 @@ WasmtimeImplScope::WasmtimeImplScope(std::shared_ptr<wasm::WasmEngineContext> wa
init(nullptr);
}
WasmtimeImplScope::~WasmtimeImplScope() {
// Must unregister before _bridge is destroyed, otherwise a concurrent interrupt() reading the
// OperationContext decoration could dereference a freed _bridge. This is because
// unregisterOperation() takes the Client lock, serialising with respect to interrupt().
unregisterOperation();
}
void WasmtimeImplScope::reset() {
// Clear the decoration under the Client lock before tearing down _bridge, so a racing
// interrupt() cannot call kill() on a dangling bridge.
unregisterOperation();
if (_bridge && _bridge->isInitialized() && !_bridge->hasTrapped() && !_bridge->hasOomError() &&
!_bridge->isKillPending()) {
_bridge->shutdown();
}
_bridge = nullptr;
// Drop the old engine context and build a fresh one. kill() calls Engine::increment_epoch(),
// which is engine-wide state — reusing the same engine after a kill poisons any future Store
// created from it (new instantiations can fail outright, or be born past their epoch
// deadline). A fresh Engine+Component per reset() cycle keeps contamination bounded to the
// bridge that was actually killed.
_wasmEngineCtx.reset();
if (auto* engine = getGlobalScriptEngine()) {
_wasmEngineCtx = static_cast<WasmtimeScriptEngine*>(engine)->createWasmEngineContext();
}
_cachedFunctions.clear();
init(nullptr);
@ -105,9 +126,6 @@ int WasmtimeImplScope::invoke(ScriptingFunction func,
bool ignoreReturn,
bool readOnlyArgs,
bool readOnlyRecv) {
// TODO(SERVER-122128): enforce timeoutMs by running the function in a separate thread and
// killing it if it runs too long.
// TODO(SERVER-122738): readOnlyArgs and readOnlyRecv are silently ignored here. In the MozJS
// implementation these flags cause SpiderMonkey to freeze the corresponding JS objects so that
// JavaScript code cannot mutate them during execution.
@ -116,21 +134,27 @@ int WasmtimeImplScope::invoke(ScriptingFunction func,
uassert(ErrorCodes::BadValue,
"emit() cannot be used in a function that returns a value",
ignoreReturn);
_deadlineMonitor.startDeadline(this, timeoutMs);
_bridge->invokeMap(func, *recv);
_deadlineMonitor.stopDeadline(this);
_drainEmitToCallback();
return 0;
}
_deadlineMonitor.startDeadline(this, timeoutMs);
bool predicateResult = _bridge->invokePredicate(func, *recv);
_deadlineMonitor.stopDeadline(this);
if (!ignoreReturn) {
_lastReturnValue = BSON(kReturnValueField << predicateResult);
}
return 0;
}
_deadlineMonitor.startDeadline(this, timeoutMs);
// Consider having invokeFunction return the value directly.
// This would eliminate the extra round trip to the engine.
auto result = _bridge->invokeFunction(func, args ? *args : BSONObj(), ignoreReturn);
_deadlineMonitor.stopDeadline(this);
uassertStatusOK(result.getStatus());
if (!ignoreReturn) {
// invokeFunction's direct return goes through getGlobal which flattens JS arrays
@ -298,7 +322,6 @@ std::string WasmtimeImplScope::getError() {
return "";
}
// TODO (SERVER-122128): Implement interrupt support
void WasmtimeImplScope::registerOperation(OperationContext* opCtx) {
_opCtx = opCtx;
if (auto* engine = getGlobalScriptEngine()) {
@ -314,7 +337,8 @@ void WasmtimeImplScope::unregisterOperation() {
}
}
void WasmtimeImplScope::kill() {
_bridge->kill();
if (_bridge)
_bridge->kill();
}
bool WasmtimeImplScope::isKillPending() const {
return _bridge->isKillPending();

View File

@ -29,6 +29,7 @@
#pragma once
#include "mongo/scripting/deadline_monitor.h"
#include "mongo/scripting/engine.h"
#include "mongo/scripting/mozjs/wasm/bridge/bridge.h"
#include "mongo/util/modules.h"
@ -43,6 +44,7 @@ class WasmtimeImplScope : public Scope {
public:
WasmtimeImplScope(std::shared_ptr<wasm::WasmEngineContext> wasmEngineCtx,
boost::optional<int> jsHeapLimitMB = boost::none);
~WasmtimeImplScope() override;
int invoke(ScriptingFunction func,
const BSONObj* args,
@ -112,10 +114,13 @@ protected:
ScriptingFunction _createFunction(const char* code) override;
private:
const std::shared_ptr<wasm::WasmEngineContext> _wasmEngineCtx;
// Rebuilt on reset() after a kill so engine-wide state (e.g. interrupt epoch) doesn't persist
// across the pool handoff. Owned solely by this scope.
std::shared_ptr<wasm::WasmEngineContext> _wasmEngineCtx;
const boost::optional<int> _jsHeapLimitMB;
std::unique_ptr<wasm::MozJSWasmBridge> _bridge;
DeadlineMonitor<WasmtimeImplScope> _deadlineMonitor;
void _drainEmitToCallback();
void _installHelpers();
BSONObj _resolveGlobal(const char* field) const;

View File

@ -39,6 +39,24 @@
using namespace mongo;
using namespace mongo::mozjs;
namespace {
// Tests that exercise WasmtimeImplScope::reset() need the global script engine to be set,
// because reset() rebuilds its WasmEngineContext via getGlobalScriptEngine().
// This guard does the same for the lifetime of a single test and clears it on destruction so
// tests stay isolated.
struct GlobalEngineGuard {
GlobalEngineGuard() {
setGlobalScriptEngine(new WasmtimeScriptEngine());
}
~GlobalEngineGuard() {
setGlobalScriptEngine(nullptr);
}
WasmtimeScriptEngine& engine() {
return *static_cast<WasmtimeScriptEngine*>(getGlobalScriptEngine());
}
};
} // namespace
TEST(WasmtimeScope, CreateAndInvoke_SimpleReturn_ProxyAndThreadLocal) {
WasmtimeScriptEngine engine;
@ -315,8 +333,8 @@ TEST(WasmtimeScope, Invoke_IgnoreReturn_FunctionIsStillInvoked) {
// kill() flag is cleared by reset().
TEST(WasmtimeScope, Lifecycle_Reset_ClearsKillFlag) {
WasmtimeScriptEngine engine;
std::unique_ptr<Scope> scope(engine.createScopeForCurrentThread(boost::none));
GlobalEngineGuard engineGuard;
std::unique_ptr<Scope> scope(engineGuard.engine().createScopeForCurrentThread(boost::none));
scope->kill();
ASSERT_TRUE(scope->isKillPending());
@ -326,8 +344,8 @@ TEST(WasmtimeScope, Lifecycle_Reset_ClearsKillFlag) {
// Emit state is cleared by reset(); the new bridge can accept a fresh injectNative("emit").
TEST(WasmtimeScope, Lifecycle_Reset_ClearsEmitState) {
WasmtimeScriptEngine engine;
std::unique_ptr<Scope> scope(engine.createScopeForCurrentThread(boost::none));
GlobalEngineGuard engineGuard;
std::unique_ptr<Scope> scope(engineGuard.engine().createScopeForCurrentThread(boost::none));
std::vector<BSONObj> emitted;
scope->injectNative(
@ -359,8 +377,8 @@ TEST(WasmtimeScope, Lifecycle_Reset_ClearsEmitState) {
// Function handles created before reset() are invalidated; using one throws.
TEST(WasmtimeScope, Lifecycle_Reset_InvalidatesHandles) {
WasmtimeScriptEngine engine;
std::unique_ptr<Scope> scope(engine.createScopeForCurrentThread(boost::none));
GlobalEngineGuard engineGuard;
std::unique_ptr<Scope> scope(engineGuard.engine().createScopeForCurrentThread(boost::none));
ScriptingFunction staleHandle = scope->createFunction("return 1;");
scope->reset();
@ -372,8 +390,8 @@ TEST(WasmtimeScope, Lifecycle_Reset_InvalidatesHandles) {
// reset() re-initializes the bridge; globals from before reset are cleared.
TEST(WasmtimeScope, Lifecycle_Reset_ClearsGlobals) {
WasmtimeScriptEngine engine;
std::unique_ptr<Scope> scope(engine.createScopeForCurrentThread(boost::none));
GlobalEngineGuard engineGuard;
std::unique_ptr<Scope> scope(engineGuard.engine().createScopeForCurrentThread(boost::none));
scope->setNumber("x", 99.0);
scope->reset();
@ -585,8 +603,8 @@ TEST(WasmtimeScope, MemoryLimit_RuntimeChangeAffectsNewScope) {
// Note: the JS heap limit is cached per-scope at construction time, so only the store
// limit changes on reset.
TEST(WasmtimeScope, MemoryLimit_ResetPicksUpNewValue) {
WasmtimeScriptEngine engine;
std::unique_ptr<Scope> scope(engine.createScopeForCurrentThread(boost::none));
GlobalEngineGuard engineGuard;
std::unique_ptr<Scope> scope(engineGuard.engine().createScopeForCurrentThread(boost::none));
ASSERT(scope);
// Change the store limit after the scope was created, then reset.
@ -700,8 +718,8 @@ TEST(WasmtimeScope, MemoryLimit_LargeHeapUsesPercentOverhead) {
// reset() re-reads the server params; if the new combination is invalid it must fail.
TEST(WasmtimeScope, MemoryLimit_ResetWithInvalidParamsFails) {
WasmtimeScriptEngine engine;
std::unique_ptr<Scope> scope(engine.createScopeForCurrentThread(boost::none));
GlobalEngineGuard engineGuard;
std::unique_ptr<Scope> scope(engineGuard.engine().createScopeForCurrentThread(boost::none));
ASSERT(scope);
auto savedHeap = gJSHeapLimitMB.load();
@ -750,8 +768,8 @@ TEST(WasmtimeScope, OOM_SetOnJsHeapOom) {
// reset() clears the OOM flag.
TEST(WasmtimeScope, OOM_ResetClearsFlag) {
WasmtimeScriptEngine engine;
std::unique_ptr<Scope> scope(engine.createScopeForCurrentThread(boost::none));
GlobalEngineGuard engineGuard;
std::unique_ptr<Scope> scope(engineGuard.engine().createScopeForCurrentThread(boost::none));
ScriptingFunction fn = scope->createFunction(
"function() {"
@ -770,3 +788,85 @@ TEST(WasmtimeScope, OOM_ResetClearsFlag) {
scope->reset();
ASSERT_FALSE(scope->hasOutOfMemoryException());
}
// Test killing a process.
TEST(WasmtimeScope, KillProcess) {
WasmtimeScriptEngine engine;
std::unique_ptr<Scope> scope(engine.createScopeForCurrentThread(boost::none));
// Allocate until the JS heap runs out of memory.
ScriptingFunction fn = scope->createFunction(
"function() {"
" var arr = [];"
" var s = new Array(1024 * 1024 + 1).join('a');"
" for (var i = 0; i < 10; i++) {"
" arr.push(s);"
" s = s + s;"
" }"
" return arr.length;"
"}");
ASSERT_THROWS(scope->invoke(fn, nullptr, nullptr, 0), DBException);
ASSERT_TRUE(scope->hasOutOfMemoryException());
}
// Test timeouts with a regular function.
TEST(WasmtimeScope, TimeoutFunction) {
WasmtimeScriptEngine engine;
std::unique_ptr<Scope> scope(engine.createScopeForCurrentThread(boost::none));
ScriptingFunction fn = scope->createFunction("while (true) {}");
ASSERT_THROWS_WITH_CHECK(scope->invoke(fn, nullptr, nullptr, 1),
AssertionException,
[](auto&& ex) { ASSERT_STRING_CONTAINS(ex.reason(), "interrupt"); });
}
// Test timeouts with predicate.
TEST(WasmtimeScope, TimeoutPredicate) {
WasmtimeScriptEngine engine;
std::unique_ptr<Scope> scope(engine.createScopeForCurrentThread(boost::none));
ScriptingFunction fn =
scope->createFunction("function() { while (true) {}; return this.age >= 18; }");
ASSERT(fn != 0);
BSONObj doc1 = BSON("age" << 25);
ASSERT_THROWS_WITH_CHECK(scope->invoke(fn, nullptr, &doc1, 1),
AssertionException,
[](auto&& ex) { ASSERT_STRING_CONTAINS(ex.reason(), "interrupt"); });
}
// Test timeouts with map.
TEST(WasmtimeScope, TimeoutMap) {
WasmtimeScriptEngine engine;
std::unique_ptr<Scope> scope(engine.createScopeForCurrentThread(boost::none));
std::vector<BSONObj> emitted2;
scope->injectNative(
"emit",
[](const BSONObj& args, void* data) -> BSONObj {
static_cast<std::vector<BSONObj>*>(data)->push_back(args.getOwned());
return BSONObj();
},
&emitted2);
ScriptingFunction mapFn =
scope->createFunction("function() { while(true) {}; emit(this.k, this.v); }");
BSONObj doc = BSON("k" << "x" << "v" << 1);
ASSERT_THROWS_WITH_CHECK(scope->invoke(mapFn, nullptr, &doc, 1, true),
AssertionException,
[](auto&& ex) { ASSERT_STRING_CONTAINS(ex.reason(), "interrupt"); });
}
// Test timeouts with sleep.
TEST(WasmtimeScope, TimeoutSleep) {
WasmtimeScriptEngine engine;
std::unique_ptr<Scope> scope(engine.createScopeForCurrentThread(boost::none));
ScriptingFunction fn = scope->createFunction("sleep(10000)");
ASSERT_THROWS_WITH_CHECK(scope->invoke(fn, nullptr, nullptr, 1),
AssertionException,
[](auto&& ex) { ASSERT_STRING_CONTAINS(ex.reason(), "interrupt"); });
}

View File

@ -83,15 +83,16 @@ auto operationWasmtimeScopeDecoration =
namespace mozjs {
WasmtimeScriptEngine::WasmtimeScriptEngine() {
size_t size =
static_cast<size_t>(_binary_mozjs_wasm_api_cwasm_end - _binary_mozjs_wasm_api_cwasm_start);
_wasmEngineCtx =
wasm::WasmEngineContext::createFromPrecompiled(_binary_mozjs_wasm_api_cwasm_start, size);
}
WasmtimeScriptEngine::WasmtimeScriptEngine() {}
WasmtimeScriptEngine::~WasmtimeScriptEngine() {}
std::shared_ptr<wasm::WasmEngineContext> WasmtimeScriptEngine::createWasmEngineContext() const {
size_t size =
static_cast<size_t>(_binary_mozjs_wasm_api_cwasm_end - _binary_mozjs_wasm_api_cwasm_start);
return wasm::WasmEngineContext::createFromPrecompiled(_binary_mozjs_wasm_api_cwasm_start, size);
}
mongo::Scope* WasmtimeScriptEngine::createScope() {
return createScopeForCurrentThread(boost::none);
}
@ -101,7 +102,7 @@ mongo::Scope* WasmtimeScriptEngine::createScopeForCurrentThread(
// Resolve the heap limit: use passed value if provided, otherwise use global config.
// If a limit is passed, cap it at the global limit (like MozJS does).
const auto resolvedLimit = jsHeapLimitMB ? *jsHeapLimitMB : getJSHeapLimitMB();
return new WasmtimeImplScope(_wasmEngineCtx, resolvedLimit);
return new WasmtimeImplScope(createWasmEngineContext(), resolvedLimit);
}
// TODO (SERVER-122128): Implement interrupt support
@ -123,9 +124,15 @@ void WasmtimeScriptEngine::interruptAll(ServiceContextLock& svcCtxLock) {
}
}
void WasmtimeScriptEngine::registerOperation(OperationContext* opCtx, WasmtimeImplScope* scope) {
std::lock_guard lk(*opCtx->getClient());
(*opCtx)[operationWasmtimeScopeDecoration] = scope;
if (auto status = opCtx->checkForInterruptNoAssert(); !status.isOK()) {
scope->kill();
}
}
void WasmtimeScriptEngine::unregisterOperation(OperationContext* opCtx) {
std::lock_guard lk(*opCtx->getClient());
(*opCtx)[operationWasmtimeScopeDecoration] = nullptr;
}

View File

@ -83,8 +83,11 @@ public:
void registerOperation(OperationContext* ctx, WasmtimeImplScope* scope);
void unregisterOperation(OperationContext* opCtx);
private:
std::shared_ptr<wasm::WasmEngineContext> _wasmEngineCtx;
// Builds a fresh Wasmtime Engine + deserialized Component from the embedded precompiled
// component bytes. Each scope (and each post-kill rebuild inside a scope) gets its own
// context so that engine-wide state (notably the interrupt-epoch counter bumped by kill())
// cannot leak across scopes or survive a reset().
std::shared_ptr<wasm::WasmEngineContext> createWasmEngineContext() const;
};
} // namespace mozjs