From ca3cbc3f31d46236317a82e29229d55a02a9c307 Mon Sep 17 00:00:00 2001 From: Iris <58442094+sleepyStick@users.noreply.github.com> Date: Tue, 15 Jul 2025 08:34:47 -0700 Subject: [PATCH 1/5] PYTHON-5253 Automated Spec Test Sync (#2409) Co-authored-by: Noah Stapp --- .evergreen/config.yml | 20 ++++ .evergreen/remove-unimplemented-tests.sh | 45 +++++++++ .evergreen/resync-specs.sh | 13 +-- .evergreen/scripts/create-spec-pr.sh | 50 ++++++++++ .evergreen/scripts/resync-all-specs.py | 119 +++++++++++++++++++++++ .evergreen/scripts/resync-all-specs.sh | 42 ++++++++ .evergreen/spec-patch/PYTHON-4884.patch | 12 +++ .evergreen/spec-patch/PYTHON-4918.patch | 24 +++++ .evergreen/spec-patch/PYTHON-4931.patch | 93 ++++++++++++++++++ .evergreen/spec-patch/PYTHON-5237.patch | 48 +++++++++ CONTRIBUTING.md | 33 +++++++ 11 files changed, 493 insertions(+), 6 deletions(-) create mode 100755 .evergreen/remove-unimplemented-tests.sh create mode 100755 .evergreen/scripts/create-spec-pr.sh create mode 100644 .evergreen/scripts/resync-all-specs.py create mode 100755 .evergreen/scripts/resync-all-specs.sh create mode 100644 .evergreen/spec-patch/PYTHON-4884.patch create mode 100644 .evergreen/spec-patch/PYTHON-4918.patch create mode 100644 .evergreen/spec-patch/PYTHON-4931.patch create mode 100644 .evergreen/spec-patch/PYTHON-5237.patch diff --git a/.evergreen/config.yml b/.evergreen/config.yml index 46c86103a..91fa44277 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -42,3 +42,23 @@ post: - func: "upload mo artifacts" - func: "upload test results" - func: "cleanup" + +tasks: + - name: resync_specs + commands: + - command: subprocess.exec + params: + binary: bash + include_expansions_in_env: [AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN] + args: + - .evergreen/scripts/resync-all-specs.sh + working_dir: src + +buildvariants: + - name: resync_specs + display_name: "Resync Specs" + run_on: rhel80-small + cron: '0 16 * * MON' + patchable: true + tasks: + - name: resync_specs diff --git a/.evergreen/remove-unimplemented-tests.sh b/.evergreen/remove-unimplemented-tests.sh new file mode 100755 index 000000000..e31011f61 --- /dev/null +++ b/.evergreen/remove-unimplemented-tests.sh @@ -0,0 +1,45 @@ +#!/bin/bash +PYMONGO=$(dirname "$(cd "$(dirname "$0")" || exit; pwd)") + +rm $PYMONGO/test/transactions/legacy/errors-client.json # PYTHON-1894 +rm $PYMONGO/test/connection_monitoring/wait-queue-fairness.json # PYTHON-1873 +rm $PYMONGO/test/client-side-encryption/spec/unified/fle2v2-BypassQueryAnalysis.json # PYTHON-5143 +rm $PYMONGO/test/client-side-encryption/spec/unified/fle2v2-EncryptedFields-vs-EncryptedFieldsMap.json # PYTHON-5143 +rm $PYMONGO/test/client-side-encryption/spec/unified/localSchema.json # PYTHON-5143 +rm $PYMONGO/test/client-side-encryption/spec/unified/maxWireVersion.json # PYTHON-5143 +rm $PYMONGO/test/unified-test-format/valid-pass/poc-queryable-encryption.json # PYTHON-5143 +rm $PYMONGO/test/gridfs/rename.json # PYTHON-4931 +rm $PYMONGO/test/discovery_and_monitoring/unified/pool-clear-application-error.json # PYTHON-4918 +rm $PYMONGO/test/discovery_and_monitoring/unified/pool-clear-checkout-error.json # PYTHON-4918 +rm $PYMONGO/test/discovery_and_monitoring/unified/pool-clear-min-pool-size-error.json # PYTHON-4918 + +# Python doesn't implement DRIVERS-3064 +rm $PYMONGO/test/collection_management/listCollections-rawdata.json +rm $PYMONGO/test/crud/unified/aggregate-rawdata.json +rm $PYMONGO/test/crud/unified/bulkWrite-deleteMany-rawdata.json +rm $PYMONGO/test/crud/unified/bulkWrite-deleteOne-rawdata.json +rm $PYMONGO/test/crud/unified/bulkWrite-replaceOne-rawdata.json +rm $PYMONGO/test/crud/unified/bulkWrite-updateMany-rawdata.json +rm $PYMONGO/test/crud/unified/bulkWrite-updateOne-rawdata.json +rm $PYMONGO/test/crud/unified/client-bulkWrite-delete-rawdata.json +rm $PYMONGO/test/crud/unified/client-bulkWrite-replaceOne-rawdata.json +rm $PYMONGO/test/crud/unified/client-bulkWrite-update-rawdata.json +rm $PYMONGO/test/crud/unified/count-rawdata.json +rm $PYMONGO/test/crud/unified/countDocuments-rawdata.json +rm $PYMONGO/test/crud/unified/db-aggregate-rawdata.json +rm $PYMONGO/test/crud/unified/deleteMany-rawdata.json +rm $PYMONGO/test/crud/unified/deleteOne-rawdata.json +rm $PYMONGO/test/crud/unified/distinct-rawdata.json +rm $PYMONGO/test/crud/unified/estimatedDocumentCount-rawdata.json +rm $PYMONGO/test/crud/unified/find-rawdata.json +rm $PYMONGO/test/crud/unified/findOneAndDelete-rawdata.json +rm $PYMONGO/test/crud/unified/findOneAndReplace-rawdata.json +rm $PYMONGO/test/crud/unified/findOneAndUpdate-rawdata.json +rm $PYMONGO/test/crud/unified/insertMany-rawdata.json +rm $PYMONGO/test/crud/unified/insertOne-rawdata.json +rm $PYMONGO/test/crud/unified/replaceOne-rawdata.json +rm $PYMONGO/test/crud/unified/updateMany-rawdata.json +rm $PYMONGO/test/crud/unified/updateOne-rawdata.json +rm $PYMONGO/test/index_management/index-rawdata.json + +echo "Done removing unimplemented tests\n" diff --git a/.evergreen/resync-specs.sh b/.evergreen/resync-specs.sh index d7dfafbba..765af2a56 100755 --- a/.evergreen/resync-specs.sh +++ b/.evergreen/resync-specs.sh @@ -45,9 +45,12 @@ then fi # Ensure the JSON files are up to date. -cd $SPECS/source -make -cd - +if ! [ -n "${CI:-}" ] +then + cd $SPECS/source + make + cd - +fi # cpjson unified-test-format/tests/invalid unified-test-format/invalid # * param1: Path to spec tests dir in specifications repo # * param2: Path to where the corresponding tests live in Python. @@ -110,7 +113,6 @@ do cmap|CMAP|connection-monitoring-and-pooling) cpjson connection-monitoring-and-pooling/tests/logging connection_logging cpjson connection-monitoring-and-pooling/tests/cmap-format connection_monitoring - rm $PYMONGO/test/connection_monitoring/wait-queue-fairness.json # PYTHON-1873 ;; apm|APM|command-monitoring|command_monitoring) cpjson command-logging-and-monitoring/tests/monitoring command_monitoring @@ -174,7 +176,7 @@ do ;; server-selection|server_selection) cpjson server-selection/tests/ server_selection - rm -rf $PYMONGO/test/server_selection/logging + rm -rf $PYMONGO/test/server_selection/logging # these tests live in server_selection_logging cpjson server-selection/tests/logging server_selection_logging ;; server-selection-logging|server_selection_logging) @@ -186,7 +188,6 @@ do transactions|transactions-convenient-api) cpjson transactions/tests/ transactions cpjson transactions-convenient-api/tests/ transactions-convenient-api - rm $PYMONGO/test/transactions/legacy/errors-client.json # PYTHON-1894 ;; unified|unified-test-format) cpjson unified-test-format/tests/ unified-test-format/ diff --git a/.evergreen/scripts/create-spec-pr.sh b/.evergreen/scripts/create-spec-pr.sh new file mode 100755 index 000000000..a5e49bb21 --- /dev/null +++ b/.evergreen/scripts/create-spec-pr.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash + +tools="$(realpath -s "../drivers-tools")" +pushd $tools/.evergreen/github_app || exit + +owner="mongodb" +repo="mongo-python-driver" + +# Bootstrap the app. +echo "bootstrapping" +source utils.sh +bootstrap drivers/comment-bot + +# Run the app. +source ./secrets-export.sh + +# Get a github access token for the git checkout. +echo "Getting github token..." + +token=$(bash ./get-access-token.sh $repo $owner) +if [ -z "${token}" ]; then + echo "Failed to get github access token!" + popd || exit + exit 1 +fi +echo "Getting github token... done." +popd || exit + +# Make the git checkout and create a new branch. +echo "Creating the git checkout..." +branch="spec-resync-"$(date '+%m-%d-%Y') + +git remote set-url origin https://x-access-token:${token}@github.com/$owner/$repo.git +git checkout -b $branch "origin/master" +git add ./test +git commit -am "resyncing specs $(date '+%m-%d-%Y')" +echo "Creating the git checkout... done." + +git push origin $branch +resp=$(curl -L \ + -X POST \ + -H "Accept: application/vnd.github+json" \ + -H "Authorization: Bearer $token" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + -d "{\"title\":\"[Spec Resync] $(date '+%m-%d-%Y')\",\"body\":\"$(cat "$1")\",\"head\":\"${branch}\",\"base\":\"master\"}" \ + --url https://api.github.com/repos/$owner/$repo/pulls) +echo $resp | jq '.html_url' +echo "Creating the PR... done." + +rm -rf $tools diff --git a/.evergreen/scripts/resync-all-specs.py b/.evergreen/scripts/resync-all-specs.py new file mode 100644 index 000000000..d824211d4 --- /dev/null +++ b/.evergreen/scripts/resync-all-specs.py @@ -0,0 +1,119 @@ +from __future__ import annotations + +import argparse +import os +import pathlib +import subprocess +from argparse import Namespace +from subprocess import CalledProcessError +from typing import Optional + + +def resync_specs(directory: pathlib.Path, errored: dict[str, str]) -> None: + """Actually sync the specs""" + print("Beginning to sync specs") # noqa: T201 + for spec in os.scandir(directory): + if not spec.is_dir(): + continue + + if spec.name in ["asynchronous"]: + continue + try: + subprocess.run( + ["bash", "./.evergreen/resync-specs.sh", spec.name], # noqa: S603, S607 + capture_output=True, + text=True, + check=True, + ) + except CalledProcessError as exc: + errored[spec.name] = exc.stderr + print("Done syncing specs") # noqa: T201 + + +def apply_patches(): + print("Beginning to apply patches") # noqa: T201 + subprocess.run(["bash", "./.evergreen/remove-unimplemented-tests.sh"], check=True) # noqa: S603, S607 + subprocess.run(["git apply -R --allow-empty ./.evergreen/spec-patch/*"], shell=True, check=True) # noqa: S602, S607 + + +def check_new_spec_directories(directory: pathlib.Path) -> list[str]: + """Check to see if there are any directories in the spec repo that don't exist in pymongo/test""" + spec_dir = pathlib.Path(os.environ["MDB_SPECS"]) / "source" + spec_set = { + entry.name.replace("-", "_") + for entry in os.scandir(spec_dir) + if entry.is_dir() + and (pathlib.Path(entry.path) / "tests").is_dir() + and len(list(os.scandir(pathlib.Path(entry.path) / "tests"))) > 1 + } + test_set = {entry.name.replace("-", "_") for entry in os.scandir(directory) if entry.is_dir()} + known_mappings = { + "ocsp_support": "ocsp", + "client_side_operations_timeout": "csot", + "mongodb_handshake": "handshake", + "load_balancers": "load_balancer", + "atlas_data_lake_testing": "atlas", + "connection_monitoring_and_pooling": "connection_monitoring", + "command_logging_and_monitoring": "command_logging", + "initial_dns_seedlist_discovery": "srv_seedlist", + "server_discovery_and_monitoring": "sdam_monitoring", + } + + for k, v in known_mappings.items(): + if k in spec_set: + spec_set.remove(k) + spec_set.add(v) + return list(spec_set - test_set) + + +def write_summary(errored: dict[str, str], new: list[str], filename: Optional[str]) -> None: + """Generate the PR description""" + pr_body = "" + process = subprocess.run( + ["git diff --name-only | awk -F'/' '{print $2}' | sort | uniq"], # noqa: S607 + shell=True, # noqa: S602 + capture_output=True, + text=True, + check=True, + ) + succeeded = process.stdout.strip().split() + if len(succeeded) > 0: + pr_body += "The following specs were changed:\n -" + pr_body += "\n -".join(succeeded) + pr_body += "\n" + if len(errored) > 0: + pr_body += "\n\nThe following spec syncs encountered errors:\n -" + for k, v in errored.items(): + pr_body += f"\n -{k}\n```{v}\n```" + pr_body += "\n" + if len(new) > 0: + pr_body += "\n\nThe following directories are in the specification repository and not in our test directory:\n -" + pr_body += "\n -".join(new) + pr_body += "\n" + if pr_body != "": + if filename is None: + print(f"\n{pr_body}") # noqa: T201 + else: + with open(filename, "w") as f: + # replacements made for proper json + f.write(pr_body.replace("\n", "\\n").replace("\t", "\\t")) + + +def main(args: Namespace): + directory = pathlib.Path("./test") + errored: dict[str, str] = {} + resync_specs(directory, errored) + apply_patches() + new = check_new_spec_directories(directory) + write_summary(errored, new, args.filename) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Python Script to resync all specs and generate summary for PR." + ) + parser.add_argument( + "--filename", help="Name of file for the summary to be written into.", default=None + ) + args = parser.parse_args() + main(args) diff --git a/.evergreen/scripts/resync-all-specs.sh b/.evergreen/scripts/resync-all-specs.sh new file mode 100755 index 000000000..0f7ae2ccd --- /dev/null +++ b/.evergreen/scripts/resync-all-specs.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash +# Run spec syncing script and create PR + +# SETUP +SRC_URL="https://github.com/mongodb/specifications.git" +# needs to be set for resync-specs.sh +SPEC_SRC="$(realpath "../specifications")" +SCRIPT="$(realpath "./.evergreen/resync-specs.sh")" + +# Clone the spec repo if the directory does not exist +if [[ ! -d $SPEC_SRC ]]; then + git clone $SRC_URL $SPEC_SRC + if [[ $? -ne 0 ]]; then + echo "Error: Failed to clone repository." + exit 1 + fi +fi + +# Set environment variable to the cloned spec repo for resync-specs.sh +export MDB_SPECS="$SPEC_SRC" + +# Check that resync-specs.sh exists and is executable +if [[ ! -x $SCRIPT ]]; then + echo "Error: $SCRIPT not found or is not executable." + exit 1 +fi + +PR_DESC="spec_sync.txt" + +# run python script that actually does all the resyncing +if ! [ -n "${CI:-}" ] +then + # we're running locally + python3 ./.evergreen/scripts/resync-all-specs.py +else + /opt/devtools/bin/python3.11 ./.evergreen/scripts/resync-all-specs.py "$PR_DESC" + if [[ -f $PR_DESC ]]; then + # changes were made -> call scrypt to create PR for us + .evergreen/scripts/create-spec-pr.sh "$PR_DESC" + rm "$PR_DESC" + fi +fi diff --git a/.evergreen/spec-patch/PYTHON-4884.patch b/.evergreen/spec-patch/PYTHON-4884.patch new file mode 100644 index 000000000..0ef66e072 --- /dev/null +++ b/.evergreen/spec-patch/PYTHON-4884.patch @@ -0,0 +1,12 @@ +diff --git a/test/bson_corpus/datetime.json b/test/bson_corpus/datetime.json +index f857afdc..1554341d 100644 +--- a/test/bson_corpus/datetime.json ++++ b/test/bson_corpus/datetime.json +@@ -24,6 +24,7 @@ + { + "description" : "Y10K", + "canonical_bson" : "1000000009610000DC1FD277E6000000", ++ "relaxed_extjson" : "{\"a\":{\"$date\":{\"$numberLong\":\"253402300800000\"}}}", + "canonical_extjson" : "{\"a\":{\"$date\":{\"$numberLong\":\"253402300800000\"}}}" + }, + { diff --git a/.evergreen/spec-patch/PYTHON-4918.patch b/.evergreen/spec-patch/PYTHON-4918.patch new file mode 100644 index 000000000..5f409c587 --- /dev/null +++ b/.evergreen/spec-patch/PYTHON-4918.patch @@ -0,0 +1,24 @@ +diff --git a/test/connection_monitoring/pool-create-min-size-error.json b/test/connection_monitoring/pool-create-min-size-error.json +index 1c744b85..509b2a23 100644 +--- a/test/connection_monitoring/pool-create-min-size-error.json ++++ b/test/connection_monitoring/pool-create-min-size-error.json +@@ -49,15 +49,15 @@ + "type": "ConnectionCreated", + "address": 42 + }, ++ { ++ "type": "ConnectionPoolCleared", ++ "address": 42 ++ }, + { + "type": "ConnectionClosed", + "address": 42, + "connectionId": 42, + "reason": "error" +- }, +- { +- "type": "ConnectionPoolCleared", +- "address": 42 + } + ], + "ignore": [ diff --git a/.evergreen/spec-patch/PYTHON-4931.patch b/.evergreen/spec-patch/PYTHON-4931.patch new file mode 100644 index 000000000..ad7086b37 --- /dev/null +++ b/.evergreen/spec-patch/PYTHON-4931.patch @@ -0,0 +1,93 @@ +diff --git a/test/gridfs/delete.json b/test/gridfs/delete.json +index 277b9ed7..9a9b22fc 100644 +--- a/test/gridfs/delete.json ++++ b/test/gridfs/delete.json +@@ -497,7 +497,7 @@ + } + }, + "expectError": { +- "isError": true ++ "isClientError": true + } + } + ], +@@ -650,7 +650,7 @@ + } + }, + "expectError": { +- "isError": true ++ "isClientError": true + } + } + ], +diff --git a/test/gridfs/download.json b/test/gridfs/download.json +index f0cb8517..67658ac5 100644 +--- a/test/gridfs/download.json ++++ b/test/gridfs/download.json +@@ -338,7 +338,7 @@ + } + }, + "expectError": { +- "isError": true ++ "isClientError": true + } + } + ] +@@ -370,7 +370,7 @@ + } + }, + "expectError": { +- "isError": true ++ "isClientError": true + } + } + ] +@@ -402,7 +402,7 @@ + } + }, + "expectError": { +- "isError": true ++ "isClientError": true + } + } + ] +@@ -471,7 +471,7 @@ + } + }, + "expectError": { +- "isError": true ++ "isClientError": true + } + } + ] +@@ -514,7 +514,7 @@ + } + }, + "expectError": { +- "isError": true ++ "isClientError": true + } + } + ] +diff --git a/test/gridfs/downloadByName.json b/test/gridfs/downloadByName.json +index 7b20933c..45abaf7b 100644 +--- a/test/gridfs/downloadByName.json ++++ b/test/gridfs/downloadByName.json +@@ -290,7 +290,7 @@ + "filename": "xyz" + }, + "expectError": { +- "isError": true ++ "isClientError": true + } + } + ] +@@ -306,7 +306,7 @@ + "revision": 999 + }, + "expectError": { +- "isError": true ++ "isClientError": true + } + } + ] diff --git a/.evergreen/spec-patch/PYTHON-5237.patch b/.evergreen/spec-patch/PYTHON-5237.patch new file mode 100644 index 000000000..01de56b6c --- /dev/null +++ b/.evergreen/spec-patch/PYTHON-5237.patch @@ -0,0 +1,48 @@ +diff --git a/test/sessions/driver-sessions-dirty-session-errors.json b/test/sessions/driver-sessions-dirty-session-errors.json +index 6aa1da1d..d7a1c6ab 100644 +--- a/test/sessions/driver-sessions-dirty-session-errors.json ++++ b/test/sessions/driver-sessions-dirty-session-errors.json +@@ -347,7 +347,9 @@ + "x": 1 + } + }, +- "new": false, ++ "new": { ++ "$$unsetOrMatches": false ++ }, + "lsid": { + "$$sessionLsid": "session0" + }, +@@ -375,7 +377,9 @@ + "x": 1 + } + }, +- "new": false, ++ "new": { ++ "$$unsetOrMatches": false ++ }, + "lsid": { + "$$sessionLsid": "session0" + }, +@@ -627,7 +631,9 @@ + "x": 1 + } + }, +- "new": false, ++ "new": { ++ "$$unsetOrMatches": false ++ }, + "lsid": { + "$$type": "object" + }, +@@ -655,7 +661,9 @@ + "x": 1 + } + }, +- "new": false, ++ "new": { ++ "$$unsetOrMatches": false ++ }, + "lsid": { + "$$type": "object" + }, diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 5b5ddef9a..dc5ee4fe8 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -441,6 +441,39 @@ update in PyMongo. This is primarily helpful if you are implementing a new feature in PyMongo that has spec tests already implemented, or if you are attempting to validate new spec tests in PyMongo. +### Automated Specification Test Resyncing +The (`/.evergreen/scripts/resync-all-specs.sh`) script +automatically runs once a week to resync all the specs with the [specifications repo](https://github.com/mongodb/specifications). +A PR will be generated by mongodb-drivers-pr-bot containing any changes picked up by this resync. +The PR description will display the name(s) of the updated specs along +with any errors that occurred. + +Spec test changes associated with a behavioral change or bugfix that has yet to be implemented in PyMongo +must be added to a patch file in `/.evergreen/spec-patch`. Each patch +file must be named after the associated PYTHON ticket and contain the +test differences between PyMongo's current tests and the specification. +All changes listed in these patch files will be *undone* by the script and won't +be applied to PyMongo's tests. + +When a new test file or folder is added to the spec repo before the associated code changes are implemented, that test's path must be added to `.evergreen/remove-unimplemented-tests.sh` along with a comment indicating the associated PYTHON ticket for those changes. + +Any PR that implements a PYTHON ticket documented in a patch file or within `.evergreen/remove-unimplemented-tests.sh` must also remove the associated patch file or entry in `remove-unimplemented-tests.sh`. + +#### Adding to a patch file +To add to or create a patch file, run `git diff` to show the desired changes to undo and copy the +results into the patch file. + +For example: the imaginary, unimplemented PYTHON-1234 ticket has associated spec test changes. To add those changes to `PYTHON-1234.patch`), do the following: +```bash +git diff HEAD~1 path/to/file >> .evergreen/spec-patch/PYTHON-1234.patch + +#### Running Locally +Both `resync-all-specs.sh` and `resync-all-specs.py` can be run locally (and won't generate a PR). +```bash +./.evergreen/scripts/resync-all-specs.sh +python3 ./.evergreen/scripts/resync-all-specs.py +``` + ## Making a Release Follow the [Python Driver Release Process Wiki](https://wiki.corp.mongodb.com/display/DRIVERS/Python+Driver+Release+Process). From 1e67c5c02c4f60eb92ec9740340f9c69abe097c3 Mon Sep 17 00:00:00 2001 From: Casey Clements Date: Tue, 15 Jul 2025 10:17:30 -0700 Subject: [PATCH 2/5] PYTHON-5289 Validate ignored bits are 0 on write for bson.BinaryVector (#2397) --- bson/binary.py | 17 +++++++++++++++++ doc/changelog.rst | 4 ++++ test/test_bson.py | 24 ++++++++++++++---------- 3 files changed, 35 insertions(+), 10 deletions(-) diff --git a/bson/binary.py b/bson/binary.py index a1f63adf2..a43f81bf2 100644 --- a/bson/binary.py +++ b/bson/binary.py @@ -14,6 +14,7 @@ from __future__ import annotations import struct +import warnings from enum import Enum from typing import TYPE_CHECKING, Any, Optional, Sequence, Tuple, Type, Union, overload from uuid import UUID @@ -255,6 +256,9 @@ class BinaryVector: self.dtype == other.dtype and self.padding == other.padding and self.data == other.data ) + def __len__(self) -> int: + return len(self.data) + class Binary(bytes): """Representation of BSON binary data. @@ -439,6 +443,9 @@ class Binary(bytes): :param padding: For fractional bytes, number of bits to ignore at end of vector. :return: Binary packed data identified by dtype and padding. + .. versionchanged:: 4.14 + When padding is non-zero, ignored bits should be zero. Raise exception on encoding, warn on decoding. + .. versionadded:: 4.10 """ if isinstance(vector, BinaryVector): @@ -471,6 +478,10 @@ class Binary(bytes): metadata = struct.pack(" BinaryVector: @@ -522,6 +533,12 @@ class Binary(bytes): dtype_format = "B" format_string = f"<{n_values}{dtype_format}" unpacked_uint8s = list(struct.unpack_from(format_string, self, position)) + if padding and n_values and unpacked_uint8s[-1] & (1 << padding) - 1 != 0: + warnings.warn( + "Vector has a padding P, but bits in the final byte lower than P are non-zero. For pymongo>=5.0, they must be zero.", + DeprecationWarning, + stacklevel=2, + ) return BinaryVector(unpacked_uint8s, dtype, padding) else: diff --git a/doc/changelog.rst b/doc/changelog.rst index 933e2922d..e4da11209 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -58,6 +58,10 @@ PyMongo 4.13 brings a number of changes including: or the `migration guide `_ for more information. - Fixed a bug where :class:`pymongo.write_concern.WriteConcern` repr was not eval-able when using ``w="majority"``. +- When padding is set, ignored bits in a BSON BinaryVector of PACKED_BIT dtype should be set to zero. + When encoding, this is enforced and is a breaking change. + It is not yet enforced when decoding, so reading from the database will not fail, however a warning will be triggered. + From PyMongo 5.0, this rule will be enforced for both encoding and decoding. Issues Resolved ............... diff --git a/test/test_bson.py b/test/test_bson.py index e9a1dd1ca..e4cf85c46 100644 --- a/test/test_bson.py +++ b/test/test_bson.py @@ -739,7 +739,7 @@ class TestBSON(unittest.TestCase): """Tests of subtype 9""" # We start with valid cases, across the 3 dtypes implemented. # Work with a simple vector that can be interpreted as int8, float32, or ubyte - list_vector = [127, 7] + list_vector = [127, 8] # As INT8, vector has length 2 binary_vector = Binary.from_vector(list_vector, BinaryVectorDtype.INT8) vector = binary_vector.as_vector() @@ -764,18 +764,18 @@ class TestBSON(unittest.TestCase): uncompressed = "" for val in list_vector: uncompressed += format(val, "08b") - assert uncompressed[:-padding] == "0111111100000" + assert uncompressed[:-padding] == "0111111100001" # It is worthwhile explicitly showing the values encoded to BSON padded_doc = {"padded_vec": padded_vec} assert ( encode(padded_doc) - == b"\x1a\x00\x00\x00\x05padded_vec\x00\x04\x00\x00\x00\t\x10\x03\x7f\x07\x00" + == b"\x1a\x00\x00\x00\x05padded_vec\x00\x04\x00\x00\x00\t\x10\x03\x7f\x08\x00" ) # and dumped to json assert ( json_util.dumps(padded_doc) - == '{"padded_vec": {"$binary": {"base64": "EAN/Bw==", "subType": "09"}}}' + == '{"padded_vec": {"$binary": {"base64": "EAN/CA==", "subType": "09"}}}' ) # FLOAT32 is also implemented @@ -784,15 +784,19 @@ class TestBSON(unittest.TestCase): # Now some invalid cases for x in [-1, 257]: - try: + with self.assertRaises(struct.error): Binary.from_vector([x], BinaryVectorDtype.PACKED_BIT) - except Exception as exc: - self.assertIsInstance(exc, struct.error) - else: - self.fail("Failed to raise an exception.") + + # Test one must pass zeros for all ignored bits + with self.assertRaises(ValueError): + Binary.from_vector([255], BinaryVectorDtype.PACKED_BIT, padding=7) + + with self.assertWarns(DeprecationWarning): + meta = struct.pack(" Date: Tue, 15 Jul 2025 11:27:38 -0700 Subject: [PATCH 3/5] PYTHON-5289 Fixes indentation in docstring of Binary.from_vector (#2432) --- bson/binary.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bson/binary.py b/bson/binary.py index a43f81bf2..693b838b8 100644 --- a/bson/binary.py +++ b/bson/binary.py @@ -444,7 +444,7 @@ class Binary(bytes): :return: Binary packed data identified by dtype and padding. .. versionchanged:: 4.14 - When padding is non-zero, ignored bits should be zero. Raise exception on encoding, warn on decoding. + When padding is non-zero, ignored bits should be zero. Raise exception on encoding, warn on decoding. .. versionadded:: 4.10 """ From 83fcf7cd084f6095c0be394b4f464ffa817fe321 Mon Sep 17 00:00:00 2001 From: Iris <58442094+sleepyStick@users.noreply.github.com> Date: Tue, 15 Jul 2025 12:15:05 -0700 Subject: [PATCH 4/5] PYTHON-4931 Add spec tests for GridFS rename (#2431) --- .evergreen/remove-unimplemented-tests.sh | 1 - .evergreen/spec-patch/PYTHON-4931.patch | 93 ------------ test/asynchronous/unified_format.py | 3 + test/gridfs/delete.json | 4 +- test/gridfs/download.json | 10 +- test/gridfs/downloadByName.json | 4 +- test/gridfs/rename.json | 179 +++++++++++++++++++++++ test/unified_format.py | 3 + 8 files changed, 194 insertions(+), 103 deletions(-) delete mode 100644 .evergreen/spec-patch/PYTHON-4931.patch create mode 100644 test/gridfs/rename.json diff --git a/.evergreen/remove-unimplemented-tests.sh b/.evergreen/remove-unimplemented-tests.sh index e31011f61..d4eaff473 100755 --- a/.evergreen/remove-unimplemented-tests.sh +++ b/.evergreen/remove-unimplemented-tests.sh @@ -8,7 +8,6 @@ rm $PYMONGO/test/client-side-encryption/spec/unified/fle2v2-EncryptedFields-vs-E rm $PYMONGO/test/client-side-encryption/spec/unified/localSchema.json # PYTHON-5143 rm $PYMONGO/test/client-side-encryption/spec/unified/maxWireVersion.json # PYTHON-5143 rm $PYMONGO/test/unified-test-format/valid-pass/poc-queryable-encryption.json # PYTHON-5143 -rm $PYMONGO/test/gridfs/rename.json # PYTHON-4931 rm $PYMONGO/test/discovery_and_monitoring/unified/pool-clear-application-error.json # PYTHON-4918 rm $PYMONGO/test/discovery_and_monitoring/unified/pool-clear-checkout-error.json # PYTHON-4918 rm $PYMONGO/test/discovery_and_monitoring/unified/pool-clear-min-pool-size-error.json # PYTHON-4918 diff --git a/.evergreen/spec-patch/PYTHON-4931.patch b/.evergreen/spec-patch/PYTHON-4931.patch deleted file mode 100644 index ad7086b37..000000000 --- a/.evergreen/spec-patch/PYTHON-4931.patch +++ /dev/null @@ -1,93 +0,0 @@ -diff --git a/test/gridfs/delete.json b/test/gridfs/delete.json -index 277b9ed7..9a9b22fc 100644 ---- a/test/gridfs/delete.json -+++ b/test/gridfs/delete.json -@@ -497,7 +497,7 @@ - } - }, - "expectError": { -- "isError": true -+ "isClientError": true - } - } - ], -@@ -650,7 +650,7 @@ - } - }, - "expectError": { -- "isError": true -+ "isClientError": true - } - } - ], -diff --git a/test/gridfs/download.json b/test/gridfs/download.json -index f0cb8517..67658ac5 100644 ---- a/test/gridfs/download.json -+++ b/test/gridfs/download.json -@@ -338,7 +338,7 @@ - } - }, - "expectError": { -- "isError": true -+ "isClientError": true - } - } - ] -@@ -370,7 +370,7 @@ - } - }, - "expectError": { -- "isError": true -+ "isClientError": true - } - } - ] -@@ -402,7 +402,7 @@ - } - }, - "expectError": { -- "isError": true -+ "isClientError": true - } - } - ] -@@ -471,7 +471,7 @@ - } - }, - "expectError": { -- "isError": true -+ "isClientError": true - } - } - ] -@@ -514,7 +514,7 @@ - } - }, - "expectError": { -- "isError": true -+ "isClientError": true - } - } - ] -diff --git a/test/gridfs/downloadByName.json b/test/gridfs/downloadByName.json -index 7b20933c..45abaf7b 100644 ---- a/test/gridfs/downloadByName.json -+++ b/test/gridfs/downloadByName.json -@@ -290,7 +290,7 @@ - "filename": "xyz" - }, - "expectError": { -- "isError": true -+ "isClientError": true - } - } - ] -@@ -306,7 +306,7 @@ - "revision": 999 - }, - "expectError": { -- "isError": true -+ "isClientError": true - } - } - ] diff --git a/test/asynchronous/unified_format.py b/test/asynchronous/unified_format.py index 5c221a6df..5b2e3563f 100644 --- a/test/asynchronous/unified_format.py +++ b/test/asynchronous/unified_format.py @@ -66,6 +66,7 @@ from bson import SON, json_util from bson.codec_options import DEFAULT_CODEC_OPTIONS from bson.objectid import ObjectId from gridfs import AsyncGridFSBucket, GridOut, NoFile +from gridfs.errors import CorruptGridFile from pymongo import ASCENDING, AsyncMongoClient, CursorType, _csot from pymongo.asynchronous.change_stream import AsyncChangeStream from pymongo.asynchronous.client_session import AsyncClientSession, TransactionOptions, _TxnState @@ -613,6 +614,8 @@ class UnifiedSpecTestMixinV1(AsyncIntegrationTest): # Connection errors are considered client errors. if isinstance(error, ConnectionFailure): self.assertNotIsInstance(error, NotPrimaryError) + elif isinstance(error, CorruptGridFile): + pass elif isinstance(error, (InvalidOperation, ConfigurationError, EncryptionError, NoFile)): pass else: diff --git a/test/gridfs/delete.json b/test/gridfs/delete.json index 277b9ed7e..9a9b22fc1 100644 --- a/test/gridfs/delete.json +++ b/test/gridfs/delete.json @@ -497,7 +497,7 @@ } }, "expectError": { - "isError": true + "isClientError": true } } ], @@ -650,7 +650,7 @@ } }, "expectError": { - "isError": true + "isClientError": true } } ], diff --git a/test/gridfs/download.json b/test/gridfs/download.json index f0cb85170..67658ac51 100644 --- a/test/gridfs/download.json +++ b/test/gridfs/download.json @@ -338,7 +338,7 @@ } }, "expectError": { - "isError": true + "isClientError": true } } ] @@ -370,7 +370,7 @@ } }, "expectError": { - "isError": true + "isClientError": true } } ] @@ -402,7 +402,7 @@ } }, "expectError": { - "isError": true + "isClientError": true } } ] @@ -471,7 +471,7 @@ } }, "expectError": { - "isError": true + "isClientError": true } } ] @@ -514,7 +514,7 @@ } }, "expectError": { - "isError": true + "isClientError": true } } ] diff --git a/test/gridfs/downloadByName.json b/test/gridfs/downloadByName.json index 7b20933c1..45abaf7b4 100644 --- a/test/gridfs/downloadByName.json +++ b/test/gridfs/downloadByName.json @@ -290,7 +290,7 @@ "filename": "xyz" }, "expectError": { - "isError": true + "isClientError": true } } ] @@ -306,7 +306,7 @@ "revision": 999 }, "expectError": { - "isError": true + "isClientError": true } } ] diff --git a/test/gridfs/rename.json b/test/gridfs/rename.json new file mode 100644 index 000000000..08064d4a5 --- /dev/null +++ b/test/gridfs/rename.json @@ -0,0 +1,179 @@ +{ + "description": "gridfs-rename", + "schemaVersion": "1.0", + "createEntities": [ + { + "client": { + "id": "client0" + } + }, + { + "database": { + "id": "database0", + "client": "client0", + "databaseName": "gridfs-tests" + } + }, + { + "bucket": { + "id": "bucket0", + "database": "database0" + } + }, + { + "collection": { + "id": "bucket0_files_collection", + "database": "database0", + "collectionName": "fs.files" + } + }, + { + "collection": { + "id": "bucket0_chunks_collection", + "database": "database0", + "collectionName": "fs.chunks" + } + } + ], + "initialData": [ + { + "collectionName": "fs.files", + "databaseName": "gridfs-tests", + "documents": [ + { + "_id": { + "$oid": "000000000000000000000001" + }, + "length": 0, + "chunkSize": 4, + "uploadDate": { + "$date": "1970-01-01T00:00:00.000Z" + }, + "filename": "filename", + "metadata": {} + }, + { + "_id": { + "$oid": "000000000000000000000002" + }, + "length": 0, + "chunkSize": 4, + "uploadDate": { + "$date": "1970-01-01T00:00:00.000Z" + }, + "filename": "filename", + "metadata": {} + } + ] + }, + { + "collectionName": "fs.chunks", + "databaseName": "gridfs-tests", + "documents": [ + { + "_id": { + "$oid": "000000000000000000000001" + }, + "files_id": { + "$oid": "000000000000000000000002" + }, + "n": 0, + "data": { + "$binary": { + "base64": "", + "subType": "00" + } + } + } + ] + } + ], + "tests": [ + { + "description": "rename by id", + "operations": [ + { + "name": "rename", + "object": "bucket0", + "arguments": { + "id": { + "$oid": "000000000000000000000001" + }, + "newFilename": "newfilename" + } + } + ], + "outcome": [ + { + "collectionName": "fs.files", + "databaseName": "gridfs-tests", + "documents": [ + { + "_id": { + "$oid": "000000000000000000000001" + }, + "length": 0, + "chunkSize": 4, + "uploadDate": { + "$date": "1970-01-01T00:00:00.000Z" + }, + "filename": "newfilename", + "metadata": {} + }, + { + "_id": { + "$oid": "000000000000000000000002" + }, + "length": 0, + "chunkSize": 4, + "uploadDate": { + "$date": "1970-01-01T00:00:00.000Z" + }, + "filename": "filename", + "metadata": {} + } + ] + }, + { + "collectionName": "fs.chunks", + "databaseName": "gridfs-tests", + "documents": [ + { + "_id": { + "$oid": "000000000000000000000001" + }, + "files_id": { + "$oid": "000000000000000000000002" + }, + "n": 0, + "data": { + "$binary": { + "base64": "", + "subType": "00" + } + } + } + ] + } + ] + }, + { + "description": "rename when file id does not exist", + "operations": [ + { + "name": "rename", + "object": "bucket0", + "arguments": { + "id": { + "$oid": "000000000000000000000003" + }, + "newFilename": "newfilename" + }, + "expectError": { + "isClientError": true + } + } + ] + } + ] +} diff --git a/test/unified_format.py b/test/unified_format.py index f1e55d87b..859cb2997 100644 --- a/test/unified_format.py +++ b/test/unified_format.py @@ -65,6 +65,7 @@ from bson import SON, json_util from bson.codec_options import DEFAULT_CODEC_OPTIONS from bson.objectid import ObjectId from gridfs import GridFSBucket, GridOut, NoFile +from gridfs.errors import CorruptGridFile from pymongo import ASCENDING, CursorType, MongoClient, _csot from pymongo.driver_info import DriverInfo from pymongo.encryption_options import _HAVE_PYMONGOCRYPT @@ -612,6 +613,8 @@ class UnifiedSpecTestMixinV1(IntegrationTest): # Connection errors are considered client errors. if isinstance(error, ConnectionFailure): self.assertNotIsInstance(error, NotPrimaryError) + elif isinstance(error, CorruptGridFile): + pass elif isinstance(error, (InvalidOperation, ConfigurationError, EncryptionError, NoFile)): pass else: From 3be7f76763464fe55f710ee427369197a21c0843 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Tue, 15 Jul 2025 15:38:15 -0500 Subject: [PATCH 5/5] PYTHON-4203 Update prose tests for mongos deprioritization during retryable ops (#2430) --- test/asynchronous/test_retryable_reads.py | 52 +++++++++++++++++++---- test/test_retryable_reads.py | 50 ++++++++++++++++++---- 2 files changed, 84 insertions(+), 18 deletions(-) diff --git a/test/asynchronous/test_retryable_reads.py b/test/asynchronous/test_retryable_reads.py index 4ac694b58..26454b382 100644 --- a/test/asynchronous/test_retryable_reads.py +++ b/test/asynchronous/test_retryable_reads.py @@ -21,7 +21,7 @@ import sys import threading from test.asynchronous.utils import async_set_fail_point -from pymongo.errors import AutoReconnect +from pymongo.errors import OperationFailure sys.path[0:0] = [""] @@ -147,15 +147,11 @@ class TestPoolPausedError(AsyncIntegrationTest): class TestRetryableReads(AsyncIntegrationTest): @async_client_context.require_multiple_mongoses @async_client_context.require_failCommand_fail_point - async def test_retryable_reads_in_sharded_cluster_multiple_available(self): + async def test_retryable_reads_are_retried_on_a_different_mongos_when_one_is_available(self): fail_command = { "configureFailPoint": "failCommand", "mode": {"times": 1}, - "data": { - "failCommands": ["find"], - "closeConnection": True, - "appName": "retryableReadTest", - }, + "data": {"failCommands": ["find"], "errorCode": 6}, } mongos_clients = [] @@ -168,12 +164,11 @@ class TestRetryableReads(AsyncIntegrationTest): listener = OvertCommandListener() client = await self.async_rs_or_single_client( async_client_context.mongos_seeds(), - appName="retryableReadTest", event_listeners=[listener], retryReads=True, ) - with self.assertRaises(AutoReconnect): + with self.assertRaises(OperationFailure): await client.t.t.find_one({}) # Disable failpoints on each mongos @@ -184,6 +179,45 @@ class TestRetryableReads(AsyncIntegrationTest): self.assertEqual(len(listener.failed_events), 2) self.assertEqual(len(listener.succeeded_events), 0) + # Assert that both events occurred on different mongos. + assert listener.failed_events[0].connection_id != listener.failed_events[1].connection_id + + @async_client_context.require_multiple_mongoses + @async_client_context.require_failCommand_fail_point + async def test_retryable_reads_are_retried_on_the_same_mongos_when_no_others_are_available( + self + ): + fail_command = { + "configureFailPoint": "failCommand", + "mode": {"times": 1}, + "data": {"failCommands": ["find"], "errorCode": 6}, + } + + host = async_client_context.mongos_seeds().split(",")[0] + mongos_client = await self.async_rs_or_single_client(host) + await async_set_fail_point(mongos_client, fail_command) + + listener = OvertCommandListener() + client = await self.async_rs_or_single_client( + host, + directConnection=False, + event_listeners=[listener], + retryReads=True, + ) + + await client.t.t.find_one({}) + + # Disable failpoint. + fail_command["mode"] = "off" + await async_set_fail_point(mongos_client, fail_command) + + # Assert that exactly one failed command event and one succeeded command event occurred. + self.assertEqual(len(listener.failed_events), 1) + self.assertEqual(len(listener.succeeded_events), 1) + + # Assert that both events occurred on the same mongos. + assert listener.succeeded_events[0].connection_id == listener.failed_events[0].connection_id + if __name__ == "__main__": unittest.main() diff --git a/test/test_retryable_reads.py b/test/test_retryable_reads.py index cfd85b1ac..fb8a374da 100644 --- a/test/test_retryable_reads.py +++ b/test/test_retryable_reads.py @@ -21,7 +21,7 @@ import sys import threading from test.utils import set_fail_point -from pymongo.errors import AutoReconnect +from pymongo.errors import OperationFailure sys.path[0:0] = [""] @@ -147,15 +147,11 @@ class TestPoolPausedError(IntegrationTest): class TestRetryableReads(IntegrationTest): @client_context.require_multiple_mongoses @client_context.require_failCommand_fail_point - def test_retryable_reads_in_sharded_cluster_multiple_available(self): + def test_retryable_reads_are_retried_on_a_different_mongos_when_one_is_available(self): fail_command = { "configureFailPoint": "failCommand", "mode": {"times": 1}, - "data": { - "failCommands": ["find"], - "closeConnection": True, - "appName": "retryableReadTest", - }, + "data": {"failCommands": ["find"], "errorCode": 6}, } mongos_clients = [] @@ -168,12 +164,11 @@ class TestRetryableReads(IntegrationTest): listener = OvertCommandListener() client = self.rs_or_single_client( client_context.mongos_seeds(), - appName="retryableReadTest", event_listeners=[listener], retryReads=True, ) - with self.assertRaises(AutoReconnect): + with self.assertRaises(OperationFailure): client.t.t.find_one({}) # Disable failpoints on each mongos @@ -184,6 +179,43 @@ class TestRetryableReads(IntegrationTest): self.assertEqual(len(listener.failed_events), 2) self.assertEqual(len(listener.succeeded_events), 0) + # Assert that both events occurred on different mongos. + assert listener.failed_events[0].connection_id != listener.failed_events[1].connection_id + + @client_context.require_multiple_mongoses + @client_context.require_failCommand_fail_point + def test_retryable_reads_are_retried_on_the_same_mongos_when_no_others_are_available(self): + fail_command = { + "configureFailPoint": "failCommand", + "mode": {"times": 1}, + "data": {"failCommands": ["find"], "errorCode": 6}, + } + + host = client_context.mongos_seeds().split(",")[0] + mongos_client = self.rs_or_single_client(host) + set_fail_point(mongos_client, fail_command) + + listener = OvertCommandListener() + client = self.rs_or_single_client( + host, + directConnection=False, + event_listeners=[listener], + retryReads=True, + ) + + client.t.t.find_one({}) + + # Disable failpoint. + fail_command["mode"] = "off" + set_fail_point(mongos_client, fail_command) + + # Assert that exactly one failed command event and one succeeded command event occurred. + self.assertEqual(len(listener.failed_events), 1) + self.assertEqual(len(listener.succeeded_events), 1) + + # Assert that both events occurred on the same mongos. + assert listener.succeeded_events[0].connection_id == listener.failed_events[0].connection_id + if __name__ == "__main__": unittest.main()