PYTHON-5253 Automated Spec Test Sync (#2409)

Co-authored-by: Noah Stapp <noah@noahstapp.com>
This commit is contained in:
Iris 2025-07-15 08:34:47 -07:00 committed by GitHub
parent 84db915d91
commit ca3cbc3f31
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 493 additions and 6 deletions

View File

@ -42,3 +42,23 @@ post:
- func: "upload mo artifacts"
- func: "upload test results"
- func: "cleanup"
tasks:
- name: resync_specs
commands:
- command: subprocess.exec
params:
binary: bash
include_expansions_in_env: [AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN]
args:
- .evergreen/scripts/resync-all-specs.sh
working_dir: src
buildvariants:
- name: resync_specs
display_name: "Resync Specs"
run_on: rhel80-small
cron: '0 16 * * MON'
patchable: true
tasks:
- name: resync_specs

View File

@ -0,0 +1,45 @@
#!/bin/bash
PYMONGO=$(dirname "$(cd "$(dirname "$0")" || exit; pwd)")
rm $PYMONGO/test/transactions/legacy/errors-client.json # PYTHON-1894
rm $PYMONGO/test/connection_monitoring/wait-queue-fairness.json # PYTHON-1873
rm $PYMONGO/test/client-side-encryption/spec/unified/fle2v2-BypassQueryAnalysis.json # PYTHON-5143
rm $PYMONGO/test/client-side-encryption/spec/unified/fle2v2-EncryptedFields-vs-EncryptedFieldsMap.json # PYTHON-5143
rm $PYMONGO/test/client-side-encryption/spec/unified/localSchema.json # PYTHON-5143
rm $PYMONGO/test/client-side-encryption/spec/unified/maxWireVersion.json # PYTHON-5143
rm $PYMONGO/test/unified-test-format/valid-pass/poc-queryable-encryption.json # PYTHON-5143
rm $PYMONGO/test/gridfs/rename.json # PYTHON-4931
rm $PYMONGO/test/discovery_and_monitoring/unified/pool-clear-application-error.json # PYTHON-4918
rm $PYMONGO/test/discovery_and_monitoring/unified/pool-clear-checkout-error.json # PYTHON-4918
rm $PYMONGO/test/discovery_and_monitoring/unified/pool-clear-min-pool-size-error.json # PYTHON-4918
# Python doesn't implement DRIVERS-3064
rm $PYMONGO/test/collection_management/listCollections-rawdata.json
rm $PYMONGO/test/crud/unified/aggregate-rawdata.json
rm $PYMONGO/test/crud/unified/bulkWrite-deleteMany-rawdata.json
rm $PYMONGO/test/crud/unified/bulkWrite-deleteOne-rawdata.json
rm $PYMONGO/test/crud/unified/bulkWrite-replaceOne-rawdata.json
rm $PYMONGO/test/crud/unified/bulkWrite-updateMany-rawdata.json
rm $PYMONGO/test/crud/unified/bulkWrite-updateOne-rawdata.json
rm $PYMONGO/test/crud/unified/client-bulkWrite-delete-rawdata.json
rm $PYMONGO/test/crud/unified/client-bulkWrite-replaceOne-rawdata.json
rm $PYMONGO/test/crud/unified/client-bulkWrite-update-rawdata.json
rm $PYMONGO/test/crud/unified/count-rawdata.json
rm $PYMONGO/test/crud/unified/countDocuments-rawdata.json
rm $PYMONGO/test/crud/unified/db-aggregate-rawdata.json
rm $PYMONGO/test/crud/unified/deleteMany-rawdata.json
rm $PYMONGO/test/crud/unified/deleteOne-rawdata.json
rm $PYMONGO/test/crud/unified/distinct-rawdata.json
rm $PYMONGO/test/crud/unified/estimatedDocumentCount-rawdata.json
rm $PYMONGO/test/crud/unified/find-rawdata.json
rm $PYMONGO/test/crud/unified/findOneAndDelete-rawdata.json
rm $PYMONGO/test/crud/unified/findOneAndReplace-rawdata.json
rm $PYMONGO/test/crud/unified/findOneAndUpdate-rawdata.json
rm $PYMONGO/test/crud/unified/insertMany-rawdata.json
rm $PYMONGO/test/crud/unified/insertOne-rawdata.json
rm $PYMONGO/test/crud/unified/replaceOne-rawdata.json
rm $PYMONGO/test/crud/unified/updateMany-rawdata.json
rm $PYMONGO/test/crud/unified/updateOne-rawdata.json
rm $PYMONGO/test/index_management/index-rawdata.json
echo "Done removing unimplemented tests\n"

View File

@ -45,9 +45,12 @@ then
fi
# Ensure the JSON files are up to date.
cd $SPECS/source
make
cd -
if ! [ -n "${CI:-}" ]
then
cd $SPECS/source
make
cd -
fi
# cpjson unified-test-format/tests/invalid unified-test-format/invalid
# * param1: Path to spec tests dir in specifications repo
# * param2: Path to where the corresponding tests live in Python.
@ -110,7 +113,6 @@ do
cmap|CMAP|connection-monitoring-and-pooling)
cpjson connection-monitoring-and-pooling/tests/logging connection_logging
cpjson connection-monitoring-and-pooling/tests/cmap-format connection_monitoring
rm $PYMONGO/test/connection_monitoring/wait-queue-fairness.json # PYTHON-1873
;;
apm|APM|command-monitoring|command_monitoring)
cpjson command-logging-and-monitoring/tests/monitoring command_monitoring
@ -174,7 +176,7 @@ do
;;
server-selection|server_selection)
cpjson server-selection/tests/ server_selection
rm -rf $PYMONGO/test/server_selection/logging
rm -rf $PYMONGO/test/server_selection/logging # these tests live in server_selection_logging
cpjson server-selection/tests/logging server_selection_logging
;;
server-selection-logging|server_selection_logging)
@ -186,7 +188,6 @@ do
transactions|transactions-convenient-api)
cpjson transactions/tests/ transactions
cpjson transactions-convenient-api/tests/ transactions-convenient-api
rm $PYMONGO/test/transactions/legacy/errors-client.json # PYTHON-1894
;;
unified|unified-test-format)
cpjson unified-test-format/tests/ unified-test-format/

View File

@ -0,0 +1,50 @@
#!/usr/bin/env bash
tools="$(realpath -s "../drivers-tools")"
pushd $tools/.evergreen/github_app || exit
owner="mongodb"
repo="mongo-python-driver"
# Bootstrap the app.
echo "bootstrapping"
source utils.sh
bootstrap drivers/comment-bot
# Run the app.
source ./secrets-export.sh
# Get a github access token for the git checkout.
echo "Getting github token..."
token=$(bash ./get-access-token.sh $repo $owner)
if [ -z "${token}" ]; then
echo "Failed to get github access token!"
popd || exit
exit 1
fi
echo "Getting github token... done."
popd || exit
# Make the git checkout and create a new branch.
echo "Creating the git checkout..."
branch="spec-resync-"$(date '+%m-%d-%Y')
git remote set-url origin https://x-access-token:${token}@github.com/$owner/$repo.git
git checkout -b $branch "origin/master"
git add ./test
git commit -am "resyncing specs $(date '+%m-%d-%Y')"
echo "Creating the git checkout... done."
git push origin $branch
resp=$(curl -L \
-X POST \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer $token" \
-H "X-GitHub-Api-Version: 2022-11-28" \
-d "{\"title\":\"[Spec Resync] $(date '+%m-%d-%Y')\",\"body\":\"$(cat "$1")\",\"head\":\"${branch}\",\"base\":\"master\"}" \
--url https://api.github.com/repos/$owner/$repo/pulls)
echo $resp | jq '.html_url'
echo "Creating the PR... done."
rm -rf $tools

View File

@ -0,0 +1,119 @@
from __future__ import annotations
import argparse
import os
import pathlib
import subprocess
from argparse import Namespace
from subprocess import CalledProcessError
from typing import Optional
def resync_specs(directory: pathlib.Path, errored: dict[str, str]) -> None:
"""Actually sync the specs"""
print("Beginning to sync specs") # noqa: T201
for spec in os.scandir(directory):
if not spec.is_dir():
continue
if spec.name in ["asynchronous"]:
continue
try:
subprocess.run(
["bash", "./.evergreen/resync-specs.sh", spec.name], # noqa: S603, S607
capture_output=True,
text=True,
check=True,
)
except CalledProcessError as exc:
errored[spec.name] = exc.stderr
print("Done syncing specs") # noqa: T201
def apply_patches():
print("Beginning to apply patches") # noqa: T201
subprocess.run(["bash", "./.evergreen/remove-unimplemented-tests.sh"], check=True) # noqa: S603, S607
subprocess.run(["git apply -R --allow-empty ./.evergreen/spec-patch/*"], shell=True, check=True) # noqa: S602, S607
def check_new_spec_directories(directory: pathlib.Path) -> list[str]:
"""Check to see if there are any directories in the spec repo that don't exist in pymongo/test"""
spec_dir = pathlib.Path(os.environ["MDB_SPECS"]) / "source"
spec_set = {
entry.name.replace("-", "_")
for entry in os.scandir(spec_dir)
if entry.is_dir()
and (pathlib.Path(entry.path) / "tests").is_dir()
and len(list(os.scandir(pathlib.Path(entry.path) / "tests"))) > 1
}
test_set = {entry.name.replace("-", "_") for entry in os.scandir(directory) if entry.is_dir()}
known_mappings = {
"ocsp_support": "ocsp",
"client_side_operations_timeout": "csot",
"mongodb_handshake": "handshake",
"load_balancers": "load_balancer",
"atlas_data_lake_testing": "atlas",
"connection_monitoring_and_pooling": "connection_monitoring",
"command_logging_and_monitoring": "command_logging",
"initial_dns_seedlist_discovery": "srv_seedlist",
"server_discovery_and_monitoring": "sdam_monitoring",
}
for k, v in known_mappings.items():
if k in spec_set:
spec_set.remove(k)
spec_set.add(v)
return list(spec_set - test_set)
def write_summary(errored: dict[str, str], new: list[str], filename: Optional[str]) -> None:
"""Generate the PR description"""
pr_body = ""
process = subprocess.run(
["git diff --name-only | awk -F'/' '{print $2}' | sort | uniq"], # noqa: S607
shell=True, # noqa: S602
capture_output=True,
text=True,
check=True,
)
succeeded = process.stdout.strip().split()
if len(succeeded) > 0:
pr_body += "The following specs were changed:\n -"
pr_body += "\n -".join(succeeded)
pr_body += "\n"
if len(errored) > 0:
pr_body += "\n\nThe following spec syncs encountered errors:\n -"
for k, v in errored.items():
pr_body += f"\n -{k}\n```{v}\n```"
pr_body += "\n"
if len(new) > 0:
pr_body += "\n\nThe following directories are in the specification repository and not in our test directory:\n -"
pr_body += "\n -".join(new)
pr_body += "\n"
if pr_body != "":
if filename is None:
print(f"\n{pr_body}") # noqa: T201
else:
with open(filename, "w") as f:
# replacements made for proper json
f.write(pr_body.replace("\n", "\\n").replace("\t", "\\t"))
def main(args: Namespace):
directory = pathlib.Path("./test")
errored: dict[str, str] = {}
resync_specs(directory, errored)
apply_patches()
new = check_new_spec_directories(directory)
write_summary(errored, new, args.filename)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Python Script to resync all specs and generate summary for PR."
)
parser.add_argument(
"--filename", help="Name of file for the summary to be written into.", default=None
)
args = parser.parse_args()
main(args)

View File

@ -0,0 +1,42 @@
#!/usr/bin/env bash
# Run spec syncing script and create PR
# SETUP
SRC_URL="https://github.com/mongodb/specifications.git"
# needs to be set for resync-specs.sh
SPEC_SRC="$(realpath "../specifications")"
SCRIPT="$(realpath "./.evergreen/resync-specs.sh")"
# Clone the spec repo if the directory does not exist
if [[ ! -d $SPEC_SRC ]]; then
git clone $SRC_URL $SPEC_SRC
if [[ $? -ne 0 ]]; then
echo "Error: Failed to clone repository."
exit 1
fi
fi
# Set environment variable to the cloned spec repo for resync-specs.sh
export MDB_SPECS="$SPEC_SRC"
# Check that resync-specs.sh exists and is executable
if [[ ! -x $SCRIPT ]]; then
echo "Error: $SCRIPT not found or is not executable."
exit 1
fi
PR_DESC="spec_sync.txt"
# run python script that actually does all the resyncing
if ! [ -n "${CI:-}" ]
then
# we're running locally
python3 ./.evergreen/scripts/resync-all-specs.py
else
/opt/devtools/bin/python3.11 ./.evergreen/scripts/resync-all-specs.py "$PR_DESC"
if [[ -f $PR_DESC ]]; then
# changes were made -> call scrypt to create PR for us
.evergreen/scripts/create-spec-pr.sh "$PR_DESC"
rm "$PR_DESC"
fi
fi

View File

@ -0,0 +1,12 @@
diff --git a/test/bson_corpus/datetime.json b/test/bson_corpus/datetime.json
index f857afdc..1554341d 100644
--- a/test/bson_corpus/datetime.json
+++ b/test/bson_corpus/datetime.json
@@ -24,6 +24,7 @@
{
"description" : "Y10K",
"canonical_bson" : "1000000009610000DC1FD277E6000000",
+ "relaxed_extjson" : "{\"a\":{\"$date\":{\"$numberLong\":\"253402300800000\"}}}",
"canonical_extjson" : "{\"a\":{\"$date\":{\"$numberLong\":\"253402300800000\"}}}"
},
{

View File

@ -0,0 +1,24 @@
diff --git a/test/connection_monitoring/pool-create-min-size-error.json b/test/connection_monitoring/pool-create-min-size-error.json
index 1c744b85..509b2a23 100644
--- a/test/connection_monitoring/pool-create-min-size-error.json
+++ b/test/connection_monitoring/pool-create-min-size-error.json
@@ -49,15 +49,15 @@
"type": "ConnectionCreated",
"address": 42
},
+ {
+ "type": "ConnectionPoolCleared",
+ "address": 42
+ },
{
"type": "ConnectionClosed",
"address": 42,
"connectionId": 42,
"reason": "error"
- },
- {
- "type": "ConnectionPoolCleared",
- "address": 42
}
],
"ignore": [

View File

@ -0,0 +1,93 @@
diff --git a/test/gridfs/delete.json b/test/gridfs/delete.json
index 277b9ed7..9a9b22fc 100644
--- a/test/gridfs/delete.json
+++ b/test/gridfs/delete.json
@@ -497,7 +497,7 @@
}
},
"expectError": {
- "isError": true
+ "isClientError": true
}
}
],
@@ -650,7 +650,7 @@
}
},
"expectError": {
- "isError": true
+ "isClientError": true
}
}
],
diff --git a/test/gridfs/download.json b/test/gridfs/download.json
index f0cb8517..67658ac5 100644
--- a/test/gridfs/download.json
+++ b/test/gridfs/download.json
@@ -338,7 +338,7 @@
}
},
"expectError": {
- "isError": true
+ "isClientError": true
}
}
]
@@ -370,7 +370,7 @@
}
},
"expectError": {
- "isError": true
+ "isClientError": true
}
}
]
@@ -402,7 +402,7 @@
}
},
"expectError": {
- "isError": true
+ "isClientError": true
}
}
]
@@ -471,7 +471,7 @@
}
},
"expectError": {
- "isError": true
+ "isClientError": true
}
}
]
@@ -514,7 +514,7 @@
}
},
"expectError": {
- "isError": true
+ "isClientError": true
}
}
]
diff --git a/test/gridfs/downloadByName.json b/test/gridfs/downloadByName.json
index 7b20933c..45abaf7b 100644
--- a/test/gridfs/downloadByName.json
+++ b/test/gridfs/downloadByName.json
@@ -290,7 +290,7 @@
"filename": "xyz"
},
"expectError": {
- "isError": true
+ "isClientError": true
}
}
]
@@ -306,7 +306,7 @@
"revision": 999
},
"expectError": {
- "isError": true
+ "isClientError": true
}
}
]

View File

@ -0,0 +1,48 @@
diff --git a/test/sessions/driver-sessions-dirty-session-errors.json b/test/sessions/driver-sessions-dirty-session-errors.json
index 6aa1da1d..d7a1c6ab 100644
--- a/test/sessions/driver-sessions-dirty-session-errors.json
+++ b/test/sessions/driver-sessions-dirty-session-errors.json
@@ -347,7 +347,9 @@
"x": 1
}
},
- "new": false,
+ "new": {
+ "$$unsetOrMatches": false
+ },
"lsid": {
"$$sessionLsid": "session0"
},
@@ -375,7 +377,9 @@
"x": 1
}
},
- "new": false,
+ "new": {
+ "$$unsetOrMatches": false
+ },
"lsid": {
"$$sessionLsid": "session0"
},
@@ -627,7 +631,9 @@
"x": 1
}
},
- "new": false,
+ "new": {
+ "$$unsetOrMatches": false
+ },
"lsid": {
"$$type": "object"
},
@@ -655,7 +661,9 @@
"x": 1
}
},
- "new": false,
+ "new": {
+ "$$unsetOrMatches": false
+ },
"lsid": {
"$$type": "object"
},

View File

@ -441,6 +441,39 @@ update in PyMongo. This is primarily helpful if you are implementing a
new feature in PyMongo that has spec tests already implemented, or if
you are attempting to validate new spec tests in PyMongo.
### Automated Specification Test Resyncing
The (`/.evergreen/scripts/resync-all-specs.sh`) script
automatically runs once a week to resync all the specs with the [specifications repo](https://github.com/mongodb/specifications).
A PR will be generated by mongodb-drivers-pr-bot containing any changes picked up by this resync.
The PR description will display the name(s) of the updated specs along
with any errors that occurred.
Spec test changes associated with a behavioral change or bugfix that has yet to be implemented in PyMongo
must be added to a patch file in `/.evergreen/spec-patch`. Each patch
file must be named after the associated PYTHON ticket and contain the
test differences between PyMongo's current tests and the specification.
All changes listed in these patch files will be *undone* by the script and won't
be applied to PyMongo's tests.
When a new test file or folder is added to the spec repo before the associated code changes are implemented, that test's path must be added to `.evergreen/remove-unimplemented-tests.sh` along with a comment indicating the associated PYTHON ticket for those changes.
Any PR that implements a PYTHON ticket documented in a patch file or within `.evergreen/remove-unimplemented-tests.sh` must also remove the associated patch file or entry in `remove-unimplemented-tests.sh`.
#### Adding to a patch file
To add to or create a patch file, run `git diff` to show the desired changes to undo and copy the
results into the patch file.
For example: the imaginary, unimplemented PYTHON-1234 ticket has associated spec test changes. To add those changes to `PYTHON-1234.patch`), do the following:
```bash
git diff HEAD~1 path/to/file >> .evergreen/spec-patch/PYTHON-1234.patch
#### Running Locally
Both `resync-all-specs.sh` and `resync-all-specs.py` can be run locally (and won't generate a PR).
```bash
./.evergreen/scripts/resync-all-specs.sh
python3 ./.evergreen/scripts/resync-all-specs.py
```
## Making a Release
Follow the [Python Driver Release Process Wiki](https://wiki.corp.mongodb.com/display/DRIVERS/Python+Driver+Release+Process).