SERVER-122576 Make kernel 6.19 crash more graceful than status quo (#50435)

Co-authored-by: Blake Oler <blake.oler@mongodb.com>
Co-authored-by: Mathias Stearn <mathias@mongodb.com>
Co-authored-by: Mathias Stearn <redbeard0531@gmail.com>
GitOrigin-RevId: df132e3c84a853eb778c0b6952c201c7a64bb054
This commit is contained in:
Guillaume Racicot 2026-03-30 16:14:08 -04:00 committed by MongoDB Bot
parent df97db41ff
commit 32cbb3e856
21 changed files with 424 additions and 0 deletions

View File

@ -36,6 +36,7 @@ selector:
exclude_files:
- jstests/noPassthrough/libs/**/*.js
exclude_with_any_tags:
- requires_kernel_619
- incompatible_with_execution_control_with_prioritization
roots:
- jstests/noPassthrough/**/*.js

View File

@ -40,6 +40,7 @@ executor:
matrix_suite: true
selector:
exclude_with_any_tags:
- requires_kernel_619
- primary_driven_index_builds_incompatible
- rolling_index_builds
- requires_commit_quorum

View File

@ -216,6 +216,9 @@ filters:
- "*rate_limited.yml":
approvers:
- 10gen/server-workload-resilience
- "*rseq_kernel_check*":
approvers:
- 10gen/server-workload-resilience
- "replica_sets_with_priority_ports.yml":
approvers:
- 10gen/server-catalog-and-routing

View File

@ -19,6 +19,7 @@ selector:
exclude_files:
- jstests/noPassthrough/libs/**/*.js
exclude_with_any_tags:
- requires_kernel_619
# noPassthrough tests start their own mongod's.
executor:

View File

@ -32,6 +32,7 @@ selector:
- jstests/noPassthrough/query/plan_cache/sbe_plan_cache_key_reporting.js
exclude_with_any_tags:
- requires_kernel_619
# noPassthrough tests start their own mongod's.
executor:

View File

@ -50,6 +50,8 @@ selector:
- jstests/aggregation/sources/lookup/lookup_unwind_equijoin.js
# TODO SERVER-118416 Fix join reordering tripwire assertions for lookup+unwind pipelines and re-enable lookup_unwind_pbt PBT
- jstests/aggregation/sources/lookup/lookup_unwind_pbt.js
exclude_with_any_tags:
- requires_kernel_619
executor:
config:
shell_options:

View File

@ -0,0 +1,20 @@
test_kind: js_test
description: |
Tests for rseq/tcmalloc kernel compatibility on Linux >= 6.19.
Only runs on the ubuntu2404-kernel619-rseq-check variant.
selector:
roots:
- jstests/noPassthrough/rseq_linux_compatibility/**/*.js
include_with_any_tags:
- requires_kernel_619
executor:
config:
shell_options:
nodb: ""
process_kwargs:
env_vars:
# We need to set the environment variable at the suite level because
# mongo shell would also crash due to the tcmalloc issue otherwise.
GLIBC_TUNABLES: "glibc.pthread.rseq=1"

View File

@ -1453,6 +1453,23 @@ tasks:
args:
- "./src/evergreen/packages_crypt_publish.sh"
- name: rseq_kernel_compatibility_check_jstest
tags: ["assigned_to_jira_team_server_workload_scheduling", "auxiliary"]
depends_on:
- name: version_expansions_gen
variant: generate-tasks-for-version
commands:
- func: "bazel compile"
vars:
targets: install-devcore
bazel_args: >-
--config=evg
- func: "f_expansions_write"
- func: "run tests"
vars:
suite: rseq_kernel_check
install_dir: bazel-bin/install/bin
################################################
# Task Groups #
################################################
@ -1472,6 +1489,11 @@ task_groups:
tasks:
- run_bazel_program
- <<: *compile_bazel_task_group_template
name: rseq_kernel_compatibility_check_TG
tasks:
- rseq_kernel_compatibility_check_jstest
# We want to run these sequentially to put less load on the cluster
- <<: *compile_task_group_template
name: hydrate_bazel_profile_TG

View File

@ -34,6 +34,19 @@ buildvariants:
- name: hydrate_bazel_profile_TG
- name: .clang_tidy
- name: &ubuntu2404-kernel619-rseq-check ubuntu2404-kernel619-rseq-check
display_name: "~ Ubuntu 24.04 Kernel 6.19 rseq Check"
tags: []
cron: "0 4 * * 0" # Weekly on Sunday at 04:00 UTC
run_on:
- DEVPROD-30151-ubuntu2404-kernel619-m6i-4xlarge
stepback: false
expansions:
compile_variant: *ubuntu2404-kernel619-rseq-check
evergreen_remote_exec: on
tasks:
- name: rseq_kernel_compatibility_check_TG
- name: &upload_source_graph_index upload_source_graph_index
display_name: "~ Upload Source Graph Index"
tags: ["source_graph_indexing"]

View File

@ -0,0 +1,12 @@
load("//bazel:mongo_js_rules.bzl", "all_subpackage_javascript_files", "mongo_js_library")
package(default_visibility = ["//visibility:public"])
mongo_js_library(
name = "all_javascript_files",
srcs = glob([
"*.js",
]),
)
all_subpackage_javascript_files()

View File

@ -0,0 +1,5 @@
version: 2.0.0
filters:
- "*":
approvers:
- 10gen/server-workload-resilience

View File

@ -0,0 +1,92 @@
/**
* This test checks if mongod correctly crash on startup on linux 6.19 with
* tcmalloc per-CPU cache.
*
* @tags: [requires_kernel_619]
*/
import {ReplSetTest} from "jstests/libs/replsettest.js";
const gracefulExitLogID = 12257600;
const findGracefulExitLogLine = new RegExp(`"id":${gracefulExitLogID}`);
function checkGracefulExitOnIncompatibleEnv(conn) {
const exitCode = waitProgram(conn.pid);
assert.eq(exitCode, 1, `Expected server to exit with code 1, got ${exitCode}`);
assert(
rawMongoProgramOutput(".*").search(findGracefulExitLogLine) >= 0,
`Expected fatal log message with ID ${gracefulExitLogID} in server output`,
);
}
function checkGracefulExitOnCompatibleEnv(conn) {
const exitCode = waitProgram(conn.pid);
assert.eq(exitCode, 0, `Expected server to exit with code 0, got ${exitCode}`);
assert(
rawMongoProgramOutput(".*").search(findGracefulExitLogLine) === -1,
`Unexpected fatal log message with ID ${gracefulExitLogID} in server output`,
);
}
function testMongodPerCPUCacheEnabled() {
clearRawMongoProgramOutput();
const conn = MongoRunner.runMongod({
env: {GLIBC_TUNABLES: "glibc.pthread.rseq=0"},
waitForConnect: false,
setParameter: {
"failpoint.shutdownAtStartup": '{mode:"alwaysOn"}',
},
});
checkGracefulExitOnIncompatibleEnv(conn);
}
function testMongodPerCPUCacheDisabled() {
clearRawMongoProgramOutput();
const conn = MongoRunner.runMongod({
env: {GLIBC_TUNABLES: "glibc.pthread.rseq=1"},
waitForConnect: false,
setParameter: {
"failpoint.shutdownAtStartup": '{mode:"alwaysOn"}',
},
});
checkGracefulExitOnCompatibleEnv(conn);
}
function testMongosPerCPUCacheEnabled() {
const configRS = new ReplSetTest({nodes: 1});
configRS.startSet({configsvr: "", env: {GLIBC_TUNABLES: "glibc.pthread.rseq=1"}});
configRS.initiate();
clearRawMongoProgramOutput();
const conn = MongoRunner.runMongos({
configdb: configRS.getURL(),
env: {GLIBC_TUNABLES: "glibc.pthread.rseq=0"},
waitForConnect: false,
setParameter: {
"failpoint.shutdownAtStartup": '{mode:"alwaysOn"}',
},
});
checkGracefulExitOnIncompatibleEnv(conn);
configRS.stopSet();
}
function testMongosPerCPUCacheDisabled() {
const configRS = new ReplSetTest({nodes: 1});
configRS.startSet({configsvr: "", env: {GLIBC_TUNABLES: "glibc.pthread.rseq=1"}});
configRS.initiate();
clearRawMongoProgramOutput();
const conn = MongoRunner.runMongos({
configdb: configRS.getURL(),
env: {GLIBC_TUNABLES: "glibc.pthread.rseq=1"},
waitForConnect: false,
setParameter: {
"failpoint.shutdownAtStartup": '{mode:"alwaysOn"}',
},
});
checkGracefulExitOnCompatibleEnv(conn);
configRS.stopSet();
}
testMongodPerCPUCacheEnabled();
testMongodPerCPUCacheDisabled();
testMongosPerCPUCacheEnabled();
testMongosPerCPUCacheDisabled();

View File

@ -117,6 +117,7 @@ core.servers:
- src/mongo/s/mongos.cpp
- src/mongo/s/mongos_main.*
- src/mongo/s/mongos_options*
- src/mongo/db/startup_check_rseq*
core.commands:
meta:

View File

@ -49,6 +49,29 @@ mongo_cc_library(
],
)
mongo_cc_library(
name = "startup_check_rseq",
srcs = ["startup_check_rseq.cpp"],
hdrs = ["startup_check_rseq.h"],
deps = [
"//src/mongo:base",
] + select({
"//bazel/config:tcmalloc_google_enabled": [
"//src/third_party/tcmalloc:tcmalloc_internal_percpu",
],
"//conditions:default": [],
}),
)
mongo_cc_unit_test(
name = "startup_check_rseq_test",
srcs = ["startup_check_rseq_test.cpp"],
tags = ["mongo_unittest_seventh_group"],
deps = [
"startup_check_rseq",
],
)
render_template(
name = "feature_flag_test_tpl",
srcs = [
@ -2656,6 +2679,7 @@ mongo_cc_library(
# satisfy symbol dependencies from the files listed above in `sources`. If you need to add a
# library to inject a static or mongo initializer to mongod, please add that library as a
# private libdep of mongod_initializers.
"startup_check_rseq",
"//src/mongo/client:clientdriver_minimal",
"//src/mongo/db/auth:user_cache_invalidator",
"//src/mongo/db/query/search:search_task_executors",

View File

@ -355,6 +355,9 @@ filters:
- "action_duration_metrics*":
approvers:
- 10gen/server-storage-engine-integration
- "startup_check_rseq*":
approvers:
- 10gen/server-workload-resilience
- "feature_flag.*":
approvers:
- 10gen/server-programmability

View File

@ -188,6 +188,7 @@
#include "mongo/db/sharding_environment/sharding_feature_flags_gen.h"
#include "mongo/db/sharding_environment/sharding_initialization_mongod.h"
#include "mongo/db/sharding_environment/sharding_ready.h"
#include "mongo/db/startup_check_rseq.h"
#include "mongo/db/startup_recovery.h"
#include "mongo/db/startup_warnings_mongod.h"
#include "mongo/db/stats/system_buckets_metrics.h"
@ -2043,6 +2044,8 @@ int mongod_main(int argc, char* argv[]) {
setupSignalHandlers();
validateRseqKernelCompat();
srand(static_cast<unsigned>(curTimeMicros64())); // NOLINT
Status status = mongo::runGlobalInitializers(std::vector<std::string>(argv, argv + argc));

View File

@ -0,0 +1,109 @@
/**
* Copyright (C) 2026-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#include "mongo/db/startup_check_rseq.h"
#include "mongo/base/parse_number.h"
#include "mongo/base/string_data.h"
#include "mongo/config.h"
#include "mongo/logv2/log.h"
#include "mongo/util/exit_code.h"
#include "mongo/util/quick_exit.h"
#include <boost/optional.hpp>
#ifdef __linux__
#include <sys/utsname.h>
#endif
#ifdef MONGO_CONFIG_TCMALLOC_GOOGLE
#include <tcmalloc/internal/percpu.h>
#endif
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kControl
namespace mongo {
bool isKernelVersionSafeForTCMallocPerCPUCache(StringData release) {
int major = 0, minor = 0;
char* end = nullptr;
if (!NumberParser::strToAny(10)(release, &major, &end).isOK() || *end != '.' ||
!NumberParser::strToAny(10)(end + 1, &minor).isOK()) {
// If the version cannot be parsed, assume the kernel is compatible
LOGV2_WARNING(12257601,
"Unable to parse kernel version, cannot check for kernel "
"version compatibility",
"kernel-version"_attr = release);
return true;
}
return major < 6 || (major == 6 && minor < 19);
}
namespace {
bool isKernelSafeForTCMallocPerCPUCache() {
#ifdef __linux__
struct utsname u;
if (uname(&u) != 0) {
LOGV2_WARNING(12257602,
"Unable to determine kernel version via uname, cannot check for kernel "
"version compatibility");
return true;
}
StringData release{u.release};
if (!isKernelVersionSafeForTCMallocPerCPUCache(release)) {
return false;
}
#endif
return true;
}
bool isTCMallocPerCPUCacheActive() {
#ifdef MONGO_CONFIG_TCMALLOC_GOOGLE
return tcmalloc::tcmalloc_internal::subtle::percpu::IsFast();
#else
return false;
#endif
}
} // namespace
void validateRseqKernelCompat() {
if (isTCMallocPerCPUCacheActive() && !isKernelSafeForTCMallocPerCPUCache()) {
LOGV2_FATAL_OPTIONS(
12257600,
logv2::LogOptions(logv2::LogComponent::kControl, logv2::FatalMode::kContinue),
"MongoDB cannot start: Linux kernel versions 6.19 and newer has a known "
"incompatibility with this version of MongoDB. See "
"https://jira.mongodb.org/browse/SERVER-121912 for more information.");
quickExit(ExitCode::fail);
}
}
} // namespace mongo

View File

@ -0,0 +1,40 @@
/**
* Copyright (C) 2026-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#pragma once
#include "mongo/base/string_data.h"
#include "mongo/util/modules.h"
namespace mongo {
void validateRseqKernelCompat();
MONGO_MOD_FILE_PRIVATE bool isKernelVersionSafeForTCMallocPerCPUCache(StringData release);
} // namespace mongo

View File

@ -0,0 +1,67 @@
/**
* Copyright (C) 2026-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#include "mongo/db/startup_check_rseq.h"
#include "mongo/unittest/unittest.h"
namespace mongo {
namespace {
TEST(StartupCheckRseq, SafeKernels) {
ASSERT_TRUE(isKernelVersionSafeForTCMallocPerCPUCache("5.15.0-generic"));
ASSERT_TRUE(isKernelVersionSafeForTCMallocPerCPUCache("6.18.99"));
ASSERT_TRUE(isKernelVersionSafeForTCMallocPerCPUCache("6.0.0"));
ASSERT_TRUE(isKernelVersionSafeForTCMallocPerCPUCache("4.19.0-aws"));
ASSERT_TRUE(isKernelVersionSafeForTCMallocPerCPUCache("5.4.0"));
ASSERT_TRUE(isKernelVersionSafeForTCMallocPerCPUCache("6.18.0"));
ASSERT_TRUE(isKernelVersionSafeForTCMallocPerCPUCache("6.18"));
}
TEST(StartupCheckRseq, UnsafeKernels) {
ASSERT_FALSE(isKernelVersionSafeForTCMallocPerCPUCache("6.19"));
ASSERT_FALSE(isKernelVersionSafeForTCMallocPerCPUCache("6.19.0"));
ASSERT_FALSE(isKernelVersionSafeForTCMallocPerCPUCache("6.19.3-generic"));
ASSERT_FALSE(isKernelVersionSafeForTCMallocPerCPUCache("6.20.0"));
ASSERT_FALSE(isKernelVersionSafeForTCMallocPerCPUCache("7.0.0"));
ASSERT_FALSE(isKernelVersionSafeForTCMallocPerCPUCache("10.0.0"));
}
TEST(StartupCheckRseq, UnparseableReturnsTrue) {
ASSERT_TRUE(isKernelVersionSafeForTCMallocPerCPUCache(""));
ASSERT_TRUE(isKernelVersionSafeForTCMallocPerCPUCache("invalid"));
ASSERT_TRUE(isKernelVersionSafeForTCMallocPerCPUCache("abc.def"));
ASSERT_TRUE(isKernelVersionSafeForTCMallocPerCPUCache("6"));
ASSERT_TRUE(isKernelVersionSafeForTCMallocPerCPUCache("6."));
ASSERT_TRUE(isKernelVersionSafeForTCMallocPerCPUCache(".19"));
ASSERT_TRUE(isKernelVersionSafeForTCMallocPerCPUCache(".6.19.0"));
}
} // namespace
} // namespace mongo

View File

@ -585,6 +585,7 @@ mongo_cc_library(
# library to inject a static or mongo initializer to mongos,
# please add that library as a private libdep of
# mongos_initializers.
"//src/mongo/db:startup_check_rseq",
"//src/mongo/client:remote_command_targeter",
"//src/mongo/db:audit",
"//src/mongo/db/auth:authserver",

View File

@ -92,6 +92,7 @@
#include "mongo/db/sharding_environment/shard_id.h"
#include "mongo/db/sharding_environment/sharding_initialization.h"
#include "mongo/db/sharding_environment/version_mongos.h"
#include "mongo/db/startup_check_rseq.h"
#include "mongo/db/startup_warnings_common.h"
#include "mongo/db/stats/system_buckets_metrics.h"
#include "mongo/db/topology/cluster_parameters/cluster_server_parameter_refresher.h"
@ -1053,6 +1054,8 @@ ExitCode mongos_main(int argc, char* argv[]) {
setupSignalHandlers();
validateRseqKernelCompat();
Status status = runGlobalInitializers(std::vector<std::string>(argv, argv + argc));
if (!status.isOK()) {
LOGV2_FATAL_OPTIONS(