SERVER-114673 Add support for mounting specific test composer directories (#45330)

GitOrigin-RevId: 47cae99a491023bfb1211aa0500ae45b979e5335
This commit is contained in:
Trevor Guidry 2025-12-17 11:14:08 -06:00 committed by MongoDB Bot
parent 5de2fbd9c5
commit 1c339b6a9c
16 changed files with 436 additions and 8 deletions

1
.gitignore vendored
View File

@ -327,6 +327,7 @@ buildscripts/antithesis/base_images/mongo_binaries/bin
buildscripts/antithesis/base_images/mongo_binaries/lib
buildscripts/antithesis/base_images/mongo_binaries/libvoidstar.so
buildscripts/antithesis/base_images/mongo_binaries/tsan.suppressions
buildscripts/antithesis/base_images/mongo_binaries/src
buildscripts/antithesis/base_images/workload/bin
buildscripts/antithesis/base_images/workload/lib

View File

@ -0,0 +1,146 @@
const MONGOD_URI = connection_string;
const MAX_RETRIES = 180; // 3 minutes
// Executes the given function, retrying if it fails
// due to the network being unreachable (MongoNetworkError)
// or cluster unavailable (MongoServerSelectionError)
// or retryable write error.
function retryOnFailure(func) {
let retries = 0;
while (true) {
try {
const result = func();
return result;
} catch (err) {
if (retries++ >= MAX_RETRIES) {
print(`failed (exhausted all retries): ${JSON.stringify(err)}`);
throw err;
}
if (
err.name === "MongoServerSelectionError" ||
err.name === "MongoNetworkError" ||
err.name === "MongoNetworkTimeoutError" ||
err.name === "MongoPoolClearedError" ||
err.name === "PoolClearedOnNetworkError" ||
err.message === "read ECONNRESET" ||
err.message === "read ETIMEDOUT" ||
err.message === "Shutting down"
) {
print(`Attempt ${retries} failed due to ${err.name}, retrying in 1 second...`);
sleep(1000);
continue;
}
if (
Array.isArray(err.errorResponse?.errorLabels) &&
err.errorResponse.errorLabels.includes("RetryableWriteError")
) {
print(`Attempt ${retries} failed due to ${err.codeName}, retrying in 1 second...`);
sleep(1000);
continue;
}
// Some other non-retryable issue, re-throw
print(`failed due to unretryable error: ${JSON.stringify(err)}`);
throw err;
}
}
}
// Exclusive upper bound (0 to max-1)
function randomInt(max) {
return Math.floor(Math.random() * max);
}
// Establishes a conenction
function getDB() {
return retryOnFailure(() => new Mongo(MONGOD_URI).getDB("test"));
}
function find() {
const db = getDB();
print("executing find");
for (var i = 0; i < 100; i++) {
const val = Math.floor(Math.random() * 10000) + 1;
retryOnFailure(() => db.test.findOne({x: {$gt: val}}));
}
print("done executing find");
}
function fsync() {
const db = getDB();
print("executing fsync");
const result = retryOnFailure(() => db.adminCommand({fsync: 1}));
print("done executing fsync. result:", JSON.stringify(result));
assert(result.ok);
}
function insert() {
const db = getDB();
for (var i = 0; i < 100; i++) {
const val = Math.floor(Math.random() * 10000) + 1;
retryOnFailure(() => db.test.insertOne({x: val}));
}
}
// Executes a write, then a point-in-time read
// to fetch the older state of the doc prior to the write.
function pitRead() {
// Set up initial state, a doc with field "value":0
const db = getDB();
const _id = ObjectId();
retryOnFailure(() => {
db.createCollection("test_snapshots");
db.test_snapshots.updateOne({_id}, {$set: {value: 0}}, {upsert: true, writeConcern: {w: "majority"}});
});
// Execute an update (set value=1) and capture its timestamp as t1
const t1 = retryOnFailure(() => {
const ses1 = db.getMongo().startSession();
const ses1Coll = ses1.getDatabase(db.getName()).test_snapshots;
ses1Coll.updateOne({_id}, {$set: {value: 1}}, {writeConcern: {w: "majority"}});
const t1 = ses1.getOperationTime();
db.test_snapshots.updateOne({_id}, {$set: {value: 2}}, {writeConcern: {w: "majority"}});
return t1;
});
// Sleep for a random period between 0 and 30 seconds.
// The snapshot history window is configured to 10 seconds,
// so sometimes this will attempt a read outside the window.
sleep(randomInt(30) * 1000);
let snapshotReadResult;
try {
snapshotReadResult = retryOnFailure(() => {
// Do a snapshot read to get the document's state as of t1
return db.runCommand({
find: "test_snapshots",
filter: {_id},
readConcern: {level: "snapshot", atClusterTime: t1},
});
});
} catch (e) {
if (e.codeName === "SnapshotTooOld") {
// This case is expected sometimes, so exit cleanly.
return;
}
throw e;
}
// Assert that the snapshot read actually returned the version of the
// doc at the older value (1), not the newer one (2).
assert(EJSON.stringify(snapshotReadResult.cursor.firstBatch) == EJSON.stringify([{_id, value: 1}]));
}
function validateCollections() {
const db = getDB();
const dbs = retryOnFailure(() => db.adminCommand("listDatabases").databases.map((x) => x.name));
dbs.forEach((dbName) => {
const collectionNames = retryOnFailure(() => db.getSiblingDB(dbName).getCollectionNames());
collectionNames.forEach((coll) => {
print(`validating ${dbName}.${coll}...`);
const validateResult = retryOnFailure(() => db.getSiblingDB(dbName).getCollection(coll).validate());
assert(validateResult.valid, "collection is not valid");
});
});
}

View File

@ -0,0 +1,6 @@
#!/usr/bin/env bash
SCRIPT_DIR=$(dirname $(realpath "$BASH_SOURCE"))
CONNECTION_URL=$(bash /scripts/print_connection_string.sh)
/bin/mongo --nodb --eval "var connection_string=${CONNECTION_URL};load('$SCRIPT_DIR/js/commands.js'); find();"

View File

@ -0,0 +1,6 @@
#!/usr/bin/env bash
SCRIPT_DIR=$(dirname $(realpath "$BASH_SOURCE"))
CONNECTION_URL=$(bash /scripts/print_connection_string.sh)
/bin/mongo --nodb --eval "var connection_string=${CONNECTION_URL};load('$SCRIPT_DIR/js/commands.js'); fsync();"

View File

@ -0,0 +1,6 @@
#!/usr/bin/env bash
SCRIPT_DIR=$(dirname $(realpath "$BASH_SOURCE"))
CONNECTION_URL=$(bash /scripts/print_connection_string.sh)
/bin/mongo --nodb --eval "var connection_string=${CONNECTION_URL};load('$SCRIPT_DIR/js/commands.js'); insert();"

View File

@ -0,0 +1,6 @@
#!/usr/bin/env bash
SCRIPT_DIR=$(dirname $(realpath "$BASH_SOURCE"))
CONNECTION_URL=$(bash /scripts/print_connection_string.sh)
/bin/mongo --nodb --eval "var connection_string=${CONNECTION_URL};load('$SCRIPT_DIR/js/commands.js'); pitRead();"

View File

@ -0,0 +1,6 @@
#!/usr/bin/env bash
SCRIPT_DIR=$(dirname $(realpath "$BASH_SOURCE"))
CONNECTION_URL=$(bash /scripts/print_connection_string.sh)
/bin/mongo --nodb --eval "var connection_string=${CONNECTION_URL};load('$SCRIPT_DIR/js/commands.js'); validateCollections();"

View File

@ -0,0 +1,4 @@
#!/bin/bash
cd /mongo
/scripts/run_resmoke.sh --seed $(od -vAn -N4 -tu4 </dev/urandom) --shuffle --sanityCheck

View File

@ -0,0 +1,4 @@
#!/bin/bash
cd /mongo
/scripts/run_resmoke.sh --seed $(od -vAn -N4 -tu4 </dev/urandom) --shuffle --sanityCheck

View File

@ -201,6 +201,8 @@ DEFAULTS = {
"otel_collector_dir": None,
# The images to build for an External System Under Test
"docker_compose_build_images": None,
# The list of test composer directories to copy into the config container
"docker_compose_test_composer_dirs": None,
# Where the `--dockerComposeBuildImages` is happening.
"docker_compose_build_env": "local",
# Tag to use for images built & used for an External System Under Test
@ -815,6 +817,9 @@ SANITY_CHECK = False
# The images to build for an External System Under Test
DOCKER_COMPOSE_BUILD_IMAGES = None
# The list of test composer directories to copy into the config container
DOCKER_COMPOSE_TEST_COMPOSER_DIRS = None
# Where the `--dockerComposeBuildImages` is happening.
DOCKER_COMPOSE_BUILD_ENV = "local"

View File

@ -537,6 +537,11 @@ flags in common: {common_set}
_config.DOCKER_COMPOSE_BUILD_IMAGES = config.pop("docker_compose_build_images")
if _config.DOCKER_COMPOSE_BUILD_IMAGES is not None:
_config.DOCKER_COMPOSE_BUILD_IMAGES = _config.DOCKER_COMPOSE_BUILD_IMAGES.split(",")
_config.DOCKER_COMPOSE_TEST_COMPOSER_DIRS = config.pop("docker_compose_test_composer_dirs")
if _config.DOCKER_COMPOSE_TEST_COMPOSER_DIRS is not None:
_config.DOCKER_COMPOSE_TEST_COMPOSER_DIRS = _config.DOCKER_COMPOSE_TEST_COMPOSER_DIRS.split(
","
)
_config.DOCKER_COMPOSE_BUILD_ENV = config.pop("docker_compose_build_env")
_config.DOCKER_COMPOSE_TAG = config.pop("docker_compose_tag")
_config.EXTERNAL_SUT = config.pop("external_sut")

View File

@ -1453,6 +1453,18 @@ class RunPlugin(PluginInterface):
help=("The `tag` name to use for images built during a `--dockerComposeBuildImages`."),
)
parser.add_argument(
"--dockerComposeTestComposerDirs",
dest="docker_compose_test_composer_dirs",
metavar="DIR1,DIR2",
help=(
"Comma separated list of test composer directories to include in the config image"
" built with `--dockerComposeBuildImages`. These directories should be relative paths"
" within `buildscripts/antithesis/test_composer/`."
" Example: `random_resmoke,basic_js_commands`."
),
)
parser.add_argument(
"--externalSUT",
dest="external_sut",

View File

@ -48,6 +48,7 @@ class DockerComposeImageBuilder:
self.suite_fixture = suite_fixture
self.tag = config.DOCKER_COMPOSE_TAG
self.in_evergreen = config.DOCKER_COMPOSE_BUILD_ENV == "evergreen"
self.test_composer_dirs = config.DOCKER_COMPOSE_TEST_COMPOSER_DIRS
# Build context constants
self.DOCKER_COMPOSE_BUILD_CONTEXT = f"docker_compose/{self.suite_name}"
@ -128,15 +129,21 @@ class DockerComposeImageBuilder:
else:
ip_suffix = self.next_available_fault_disabled_ip
self.next_available_fault_disabled_ip += 1
volumes = [
f"./logs/{name}:/var/log/mongodb/",
"./scripts:/scripts/",
f"./data/{name}:/data/db",
]
if name == "workload" and self.test_composer_dirs:
volumes.append("./test_composer:/opt/antithesis/test/v1/")
return {
"container_name": name,
"hostname": name,
"image": f'{"workload" if name == "workload" else "mongo-binaries"}:{self.tag}',
"volumes": [
f"./logs/{name}:/var/log/mongodb/",
"./scripts:/scripts/",
f"./data/{name}:/data/db",
],
"volumes": volumes,
"command": f"/bin/bash /scripts/{name}.sh",
"networks": {"antithesis-net": {"ipv4_address": f"10.20.20.{ip_suffix}"}},
"depends_on": depends_on,
@ -169,6 +176,27 @@ class DockerComposeImageBuilder:
with open(os.path.join(build_context, "scripts", "run_resmoke.sh"), "w") as run_resmoke:
run_resmoke.write(f'{self.get_resmoke_run_command()} "$@"\n')
print("Writing print_connection_string.sh for convenience...")
with open(
os.path.join(build_context, "scripts", "print_connection_string.sh"), "w"
) as print_connection_string:
print_connection_string.write(
f'echo "{self.suite_fixture.get_shell_connection_string()}"\n'
)
if self.test_composer_dirs:
print("Copying test composer directories into build context...")
image_test_composer_dir = os.path.join(build_context, "test_composer")
base_test_composer_dir = os.path.join("buildscripts", "antithesis", "test_composer")
for test_composer_dir in self.test_composer_dirs:
origin_test_composer_dir = os.path.join(base_test_composer_dir, test_composer_dir)
target_test_composer_dir = os.path.join(image_test_composer_dir, test_composer_dir)
if not os.path.exists(origin_test_composer_dir):
raise ValueError(
f"Test composer directory does not exist: {origin_test_composer_dir}"
)
shutil.copytree(origin_test_composer_dir, target_test_composer_dir)
print("Writing mongo{d,s} init scripts...")
for process in self.suite_fixture.all_processes():
# Add the `Process` as a service in the docker-compose.yml
@ -189,10 +217,12 @@ class DockerComposeImageBuilder:
with open(os.path.join(build_context, "Dockerfile"), "w") as dockerfile:
dockerfile.write("FROM scratch\n")
dockerfile.write("COPY docker-compose.yml /\n")
dockerfile.write("ADD scripts /scripts\n")
dockerfile.write("ADD --chmod=0755 scripts /scripts\n")
dockerfile.write("ADD logs /logs\n")
dockerfile.write("ADD data /data\n")
dockerfile.write("ADD debug /debug\n")
if self.test_composer_dirs:
dockerfile.write("ADD --chmod=0755 test_composer /test_composer\n")
def _initialize_docker_compose_build_context(self, build_context) -> None:
"""

View File

@ -199,6 +199,147 @@ Important Note: This will happen for every antithesis task you schedule in your
`evergreen patch --param schedule_antithesis_tests=true`
## Types of testing in antithesis
### Normal resmoke testing
Antithesis constantly runs your resmoke suite with one random test from the suite at a time.
We support this out-of-the-box with most resmoke suites that use python fixtures.
This is very similar to how tests run in evergreen.
Your antithesis tasks in evergreen will default to this if the `antithesis_test_composer_dir` var is not specified on the task.
### Test Composer
Antithesis offers a resource called [Test Composer](https://antithesis.com/docs/test_templates/) to run "test templates" against our clusters. Test Composer enables autonomous testing by letting you define templates that guide Antithesis in generating thousands of test cases across multiple system states. Your evergreen tasks will automatically use test composure if the `antithesis_test_composer_dir` var is specified in the task as show in the example below.
#### What is Test Composer?
Test Composer uses an opinionated framework based on naming conventions to detect and run tests. Unlike traditional example-based testing, Test Composer templates tell Antithesis how to handle parallelism, test length, command order, and fault injection to explore your system's behavior comprehensively.
#### Test Composer Structure in MongoDB
MongoDB's Test Composer implementations are located in `buildscripts/antithesis/test_composer/`. The setup still uses a resmoke suite to determine cluster configuration, but test execution is controlled by Test Composer commands rather than running jstests directly.
#### Test Command Types
Test commands must be executable and placed directly under `/opt/antithesis/test/v1/<test_dir>/`. Our evergreen tasks handle building the images and putting the tests in the correct place for you. They follow the naming convention `<prefix>_<command>` where the prefix determines the command's behavior.
##### Driver Commands
Run during fault injection periods. At least one driver or anytime command is required.
- **`parallel_driver_<command>`**: Can run concurrently with other parallel drivers (including itself)
- Example: [parallel_driver_mongod_find.sh](../../buildscripts/antithesis/test_composer/basic_js_commands/parallel_driver_mongod_find.sh) - Executes random find queries
- Example: [parallel_driver_mongod_insert.sh](../../buildscripts/antithesis/test_composer/basic_js_commands/parallel_driver_mongod_insert.sh) - Inserts random documents
- Use for: Concurrent client operations, continuous availability checks, parallel workloads
- **`singleton_driver_<command>`**: Runs as the only driver command in a history branch
- Example: [singleton_driver_resmoke.sh](../../buildscripts/antithesis/test_composer/random_resmoke/singleton_driver_resmoke.sh) - Runs a single random resmoke test
- Use for: Porting existing integration tests, running complete workloads without interference
- **`serial_driver_<command>`**: Runs when no other driver commands are active
- Example: [serial_driver_resmoke.sh](../../buildscripts/antithesis/test_composer/random_resmoke/serial_driver_resmoke.sh) - Runs resmoke tests sequentially
- Use for: Full failover operations, validation steps that require quiescence
##### Quiescent Commands
Run in the absence of faults.
- **`first_<command>`**: Optional setup command that runs once before any driver commands
- Use for: Data initialization, schema setup, bootstrapping
- **`eventually_<command>`**: Runs after driver commands start. Kills all drivers and stops faults, creating a new branch
- Use for: Testing eventual consistency, availability after recovery, final state validation
- Note: Include retry loops for service availability
- **`finally_<command>`**: Like eventually, but only runs after all driver commands complete naturally
- Use for: Testing subtle invariants, final consistency checks
##### Advanced Commands
- **`anytime_<command>`**: Can run at any time after first command, even during singleton/serial drivers
- Use for: Continuous invariant checks, monitoring, low-consistency availability checks
#### MongoDB Test Composer Examples
##### Example 1: basic_js_commands Template
This template runs parallel JavaScript operations against MongoDB with built-in retry logic for network failures.
**Commands:**
- [parallel_driver_mongod_find.sh](../../buildscripts/antithesis/test_composer/basic_js_commands/parallel_driver_mongod_find.sh) - Random find queries
- [parallel_driver_mongod_insert.sh](../../buildscripts/antithesis/test_composer/basic_js_commands/parallel_driver_mongod_insert.sh) - Random inserts
- [parallel_driver_mongod_fsync.sh](../../buildscripts/antithesis/test_composer/basic_js_commands/parallel_driver_mongod_fsync.sh) - fsync operations
- [parallel_driver_mongod_pitread.sh](../../buildscripts/antithesis/test_composer/basic_js_commands/parallel_driver_mongod_pitread.sh) - Point-in-time reads with snapshot testing
- [parallel_driver_mongod_validate_collections.sh](../../buildscripts/antithesis/test_composer/basic_js_commands/parallel_driver_mongod_validate_collections.sh) - Collection validation
**Shared Logic:** [commands.js](../../buildscripts/antithesis/test_composer/basic_js_commands/js/commands.js) provides retry mechanisms for network errors and connection helpers.
**Key Features:**
- Automatic retry on `MongoNetworkError`, `MongoServerSelectionError`, `RetryableWriteError`
- Random test data generation
- Connection string discovery via `/scripts/print_connection_string.sh`
##### Example 2: random_resmoke Template
This template runs resmoke tests with randomization, adapting existing test infrastructure for Test Composer.
**Commands:**
- [singleton_driver_resmoke.sh](../../buildscripts/antithesis/test_composer/random_resmoke/singleton_driver_resmoke.sh) - Single random resmoke test
- [serial_driver_resmoke.sh](../../buildscripts/antithesis/test_composer/random_resmoke/serial_driver_resmoke.sh) - Sequential random resmoke tests
Both use random seeds and shuffling: `--seed $(od -vAn -N4 -tu4 < /dev/urandom) --shuffle --sanityCheck`
#### Creating a New Test Template
1. **Create a test directory:** `buildscripts/antithesis/test_composer/<your_template_name>/`
2. **Write test commands:** Create executable scripts with appropriate prefixes:
```bash
#!/usr/bin/env bash
# buildscripts/antithesis/test_composer/<template>/parallel_driver_mytest.sh
# Your test logic here
# This can run in parallel with other parallel_driver commands
```
3. **Make scripts executable:** `chmod +x buildscripts/antithesis/test_composer/<template>/*.sh`
4. **Helper files:** Use `helper_` prefix or subdirectories for shared code - these are ignored by Test Composer
#### Best Practices
- **Retry logic**: Always include retry mechanisms for network and transient errors (see [commands.js](../../buildscripts/antithesis/test_composer/basic_js_commands/js/commands.js) for examples)
- **Add Randomization**: The more randomization you add to your tests, the more it allows antithesis to explore. It can control and reproduce the randomization so if it finds an interesting path it can explore it more.
- **Start simple**: Begin with a `singleton_driver` to adapt existing tests, then evolve to parallel/serial commands
- **Idempotency**: Design tests to handle being killed and restarted at any time
#### Configuring Test Composer in Evergreen
To use Test Composer instead of normal resmoke testing, set the `antithesis_test_composer_dir` variable in your Evergreen task:
```yaml
- <<: *antithesis_task_template
name: antithesis_resmoke_suite_with_test_template
tags: ...
commands:
...
- func: "antithesis image build and push"
vars:
suite: concurrency_sharded_replication_with_balancer_and_config_transitions_and_add_remove_shard # Still used for cluster topology
resmoke_args: >- # any args that change the cluster topology can still be used
--runAllFeatureFlagTests
antithesis_test_composer_dir: basic_js_commands # Directory name under buildscripts/antithesis/test_composer/
```
## Additional Resources
If you are interested in leveraging Antithesis feel free to reach out to #server-testing on Slack.
If you are interested in leveraging Antithesis feel free to reach out to #ask-devprod-correctness or #server-testing on Slack.

View File

@ -352,6 +352,50 @@ tasks:
resmoke_args: >-
--runAllFeatureFlagTests
- <<: *antithesis_task_template
name: antithesis_replica_sets_basic_js_commands
tags:
[
"assigned_to_jira_team_devprod_correctness",
"experimental",
"antithesis",
]
commands:
- func: "do setup"
- func: "do setup for antithesis"
- func: "set up remote credentials"
vars:
aws_key_remote: ${aws_key}
aws_secret_remote: ${aws_secret}
- func: "antithesis image build and push"
vars:
suite: replica_sets_jscore_passthrough
resmoke_args: >-
--runAllFeatureFlagTests
antithesis_test_composer_dir: basic_js_commands
- <<: *antithesis_task_template
name: antithesis_sharding_basic_js_commands
tags:
[
"assigned_to_jira_team_devprod_correctness",
"experimental",
"antithesis",
]
commands:
- func: "do setup"
- func: "do setup for antithesis"
- func: "set up remote credentials"
vars:
aws_key_remote: ${aws_key}
aws_secret_remote: ${aws_secret}
- func: "antithesis image build and push"
vars:
suite: sharding_jscore_passthrough
resmoke_args: >-
--runAllFeatureFlagTests
antithesis_test_composer_dir: basic_js_commands
- <<: *antithesis_task_template
name: antithesis_replica_sets_lag_oplog_application_jscore_passthrough
tags:

View File

@ -47,11 +47,17 @@ echo "${antithesis_repo_key}" >mongodb.key.json
cat mongodb.key.json | sudo docker login -u _json_key https://us-central1-docker.pkg.dev --password-stdin
rm mongodb.key.json
extra_args=""
if [ -n "${antithesis_test_composer_dir:-}" ]; then
extra_args="--dockerComposeTestComposerDirs ${antithesis_test_composer_dir}"
fi
# Build Image
cd src
activate_venv
setup_db_contrib_tool
$python buildscripts/resmoke.py run --suite ${suite} ${resmoke_args} --dockerComposeTag $tag --dockerComposeBuildImages workload,config,mongo-binaries --dockerComposeBuildEnv evergreen
$python buildscripts/resmoke.py run --suite ${suite} ${resmoke_args} --dockerComposeTag $tag --dockerComposeBuildImages workload,config,mongo-binaries --dockerComposeBuildEnv evergreen ${extra_args}
# Test Image
docker-compose -f docker_compose/${suite}/docker-compose.yml up -d