This commit is contained in:
Jeffrey 'Alex' Clark 2026-05-07 01:58:39 +00:00 committed by GitHub
commit fb32d26142
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
156 changed files with 21323 additions and 2059 deletions

4
.codecov.yml Normal file
View File

@ -0,0 +1,4 @@
# do not notify until at least 100 builds have been uploaded from the CI pipeline
# you can also set after_n_builds on comments independently
comment:
after_n_builds: 100

View File

@ -111,6 +111,8 @@ functions:
- LOAD_BALANCER
- LOCAL_ATLAS
- NO_EXT
- PYMONGO_BUILD_RUST
- PYMONGO_USE_RUST
type: test
- command: expansions.update
params:
@ -152,6 +154,8 @@ functions:
- IS_WIN32
- REQUIRE_FIPS
- TEST_MIN_DEPS
- PYMONGO_BUILD_RUST
- PYMONGO_USE_RUST
type: test
- command: subprocess.exec
params:
@ -250,6 +254,7 @@ functions:
working_dir: src
include_expansions_in_env:
- TOOLCHAIN_VERSION
- COVERAGE
type: test
# Upload coverage codecov
@ -268,7 +273,7 @@ functions:
- github_pr_number
- github_pr_head_branch
- github_author
- is_patch
- requester
- branch_name
type: test

View File

@ -75,7 +75,7 @@ tasks:
SUB_TEST_NAME: session-creds
TOOLCHAIN_VERSION: 3.14t
tags: [auth-aws, auth-aws-session-creds, free-threaded]
- name: test-auth-aws-rapid-web-identity-python3.14
- name: test-auth-aws-rapid-web-identity-python3.14-cov
commands:
- func: run server
vars:
@ -87,7 +87,8 @@ tasks:
TEST_NAME: auth_aws
SUB_TEST_NAME: web-identity
TOOLCHAIN_VERSION: "3.14"
tags: [auth-aws, auth-aws-web-identity]
COVERAGE: "1"
tags: [auth-aws, auth-aws-web-identity, pr]
- name: test-auth-aws-rapid-web-identity-session-name-python3.14
commands:
- func: run server
@ -904,7 +905,7 @@ tasks:
- ocsp-ecdsa
- rapid
- ocsp-staple
- name: test-ocsp-ecdsa-valid-cert-server-staples-latest-python3.14
- name: test-ocsp-ecdsa-valid-cert-server-staples-latest-python3.14-cov
commands:
- func: run tests
vars:
@ -913,11 +914,13 @@ tasks:
TEST_NAME: ocsp
TOOLCHAIN_VERSION: "3.14"
VERSION: latest
COVERAGE: "1"
tags:
- ocsp
- ocsp-ecdsa
- latest
- ocsp-staple
- pr
- name: test-ocsp-ecdsa-invalid-cert-server-staples-v4.4-python3.10-min-deps
commands:
- func: run tests
@ -1928,7 +1931,7 @@ tasks:
- ocsp-rsa
- rapid
- ocsp-staple
- name: test-ocsp-rsa-valid-cert-server-staples-latest-python3.14
- name: test-ocsp-rsa-valid-cert-server-staples-latest-python3.14-cov
commands:
- func: run tests
vars:
@ -1937,11 +1940,13 @@ tasks:
TEST_NAME: ocsp
TOOLCHAIN_VERSION: "3.14"
VERSION: latest
COVERAGE: "1"
tags:
- ocsp
- ocsp-rsa
- latest
- ocsp-staple
- pr
- name: test-ocsp-rsa-invalid-cert-server-staples-v4.4-python3.10-min-deps
commands:
- func: run tests
@ -2554,6 +2559,21 @@ tasks:
- func: attach benchmark test results
- func: send dashboard data
tags: [perf]
- name: perf-8.0-standalone-ssl-rust
commands:
- func: run server
vars:
VERSION: v8.0-perf
SSL: ssl
- func: run tests
vars:
TEST_NAME: perf
SUB_TEST_NAME: rust
PYMONGO_BUILD_RUST: "1"
PYMONGO_USE_RUST: "1"
- func: attach benchmark test results
- func: send dashboard data
tags: [perf]
- name: perf-8.0-standalone
commands:
- func: run server
@ -2580,6 +2600,21 @@ tasks:
- func: attach benchmark test results
- func: send dashboard data
tags: [perf]
- name: perf-8.0-standalone-rust
commands:
- func: run server
vars:
VERSION: v8.0-perf
SSL: nossl
- func: run tests
vars:
TEST_NAME: perf
SUB_TEST_NAME: rust
PYMONGO_BUILD_RUST: "1"
PYMONGO_USE_RUST: "1"
- func: attach benchmark test results
- func: send dashboard data
tags: [perf]
# Search index tests
- name: test-search-index-helpers
@ -2615,20 +2650,18 @@ tasks:
- replica_set-auth-nossl
- async
- free-threaded
- name: test-server-version-python3.13-sync-auth-nossl-replica-set-cov
- name: test-server-version-python3.13-sync-auth-nossl-replica-set
commands:
- func: run server
vars:
AUTH: auth
SSL: nossl
TOPOLOGY: replica_set
COVERAGE: "1"
- func: run tests
vars:
AUTH: auth
SSL: nossl
TOPOLOGY: replica_set
COVERAGE: "1"
TOOLCHAIN_VERSION: "3.13"
TEST_NAME: default_sync
tags:
@ -2636,20 +2669,18 @@ tasks:
- python-3.13
- replica_set-auth-nossl
- sync
- name: test-server-version-python3.12-async-auth-ssl-replica-set-cov
- name: test-server-version-python3.12-async-auth-ssl-replica-set
commands:
- func: run server
vars:
AUTH: auth
SSL: ssl
TOPOLOGY: replica_set
COVERAGE: "1"
- func: run tests
vars:
AUTH: auth
SSL: ssl
TOPOLOGY: replica_set
COVERAGE: "1"
TOOLCHAIN_VERSION: "3.12"
TEST_NAME: default_async
tags:
@ -2657,20 +2688,18 @@ tasks:
- python-3.12
- replica_set-auth-ssl
- async
- name: test-server-version-python3.11-sync-auth-ssl-replica-set-cov
- name: test-server-version-python3.11-sync-auth-ssl-replica-set
commands:
- func: run server
vars:
AUTH: auth
SSL: ssl
TOPOLOGY: replica_set
COVERAGE: "1"
- func: run tests
vars:
AUTH: auth
SSL: ssl
TOPOLOGY: replica_set
COVERAGE: "1"
TOOLCHAIN_VERSION: "3.11"
TEST_NAME: default_sync
tags:
@ -2743,20 +2772,18 @@ tasks:
- python-pypy3.11
- replica_set-noauth-ssl
- async
- name: test-server-version-python3.14-sync-noauth-ssl-replica-set-cov
- name: test-server-version-python3.14-sync-noauth-ssl-replica-set
commands:
- func: run server
vars:
AUTH: noauth
SSL: ssl
TOPOLOGY: replica_set
COVERAGE: "1"
- func: run tests
vars:
AUTH: noauth
SSL: ssl
TOPOLOGY: replica_set
COVERAGE: "1"
TOOLCHAIN_VERSION: "3.14"
TEST_NAME: default_sync
tags:
@ -2764,20 +2791,18 @@ tasks:
- python-3.14
- replica_set-noauth-ssl
- sync
- name: test-server-version-python3.14-async-auth-nossl-sharded-cluster-cov
- name: test-server-version-python3.14-async-auth-nossl-sharded-cluster
commands:
- func: run server
vars:
AUTH: auth
SSL: nossl
TOPOLOGY: sharded_cluster
COVERAGE: "1"
- func: run tests
vars:
AUTH: auth
SSL: nossl
TOPOLOGY: sharded_cluster
COVERAGE: "1"
TOOLCHAIN_VERSION: "3.14"
TEST_NAME: default_async
tags:
@ -2829,20 +2854,18 @@ tasks:
- sharded_cluster-auth-ssl
- async
- pr
- name: test-server-version-python3.11-async-auth-ssl-sharded-cluster-cov
- name: test-server-version-python3.11-async-auth-ssl-sharded-cluster
commands:
- func: run server
vars:
AUTH: auth
SSL: ssl
TOPOLOGY: sharded_cluster
COVERAGE: "1"
- func: run tests
vars:
AUTH: auth
SSL: ssl
TOPOLOGY: sharded_cluster
COVERAGE: "1"
TOOLCHAIN_VERSION: "3.11"
TEST_NAME: default_async
tags:
@ -2850,20 +2873,18 @@ tasks:
- python-3.11
- sharded_cluster-auth-ssl
- async
- name: test-server-version-python3.12-async-auth-ssl-sharded-cluster-cov
- name: test-server-version-python3.12-async-auth-ssl-sharded-cluster
commands:
- func: run server
vars:
AUTH: auth
SSL: ssl
TOPOLOGY: sharded_cluster
COVERAGE: "1"
- func: run tests
vars:
AUTH: auth
SSL: ssl
TOPOLOGY: sharded_cluster
COVERAGE: "1"
TOOLCHAIN_VERSION: "3.12"
TEST_NAME: default_async
tags:
@ -2871,20 +2892,18 @@ tasks:
- python-3.12
- sharded_cluster-auth-ssl
- async
- name: test-server-version-python3.13-async-auth-ssl-sharded-cluster-cov
- name: test-server-version-python3.13-async-auth-ssl-sharded-cluster
commands:
- func: run server
vars:
AUTH: auth
SSL: ssl
TOPOLOGY: sharded_cluster
COVERAGE: "1"
- func: run tests
vars:
AUTH: auth
SSL: ssl
TOPOLOGY: sharded_cluster
COVERAGE: "1"
TOOLCHAIN_VERSION: "3.13"
TEST_NAME: default_async
tags:
@ -2892,20 +2911,18 @@ tasks:
- python-3.13
- sharded_cluster-auth-ssl
- async
- name: test-server-version-python3.14-async-auth-ssl-sharded-cluster-cov
- name: test-server-version-python3.14-async-auth-ssl-sharded-cluster
commands:
- func: run server
vars:
AUTH: auth
SSL: ssl
TOPOLOGY: sharded_cluster
COVERAGE: "1"
- func: run tests
vars:
AUTH: auth
SSL: ssl
TOPOLOGY: sharded_cluster
COVERAGE: "1"
TOOLCHAIN_VERSION: "3.14"
TEST_NAME: default_async
tags:
@ -2976,20 +2993,18 @@ tasks:
- sharded_cluster-auth-ssl
- sync
- pr
- name: test-server-version-python3.11-sync-auth-ssl-sharded-cluster-cov
- name: test-server-version-python3.11-sync-auth-ssl-sharded-cluster
commands:
- func: run server
vars:
AUTH: auth
SSL: ssl
TOPOLOGY: sharded_cluster
COVERAGE: "1"
- func: run tests
vars:
AUTH: auth
SSL: ssl
TOPOLOGY: sharded_cluster
COVERAGE: "1"
TOOLCHAIN_VERSION: "3.11"
TEST_NAME: default_sync
tags:
@ -2997,20 +3012,18 @@ tasks:
- python-3.11
- sharded_cluster-auth-ssl
- sync
- name: test-server-version-python3.12-sync-auth-ssl-sharded-cluster-cov
- name: test-server-version-python3.12-sync-auth-ssl-sharded-cluster
commands:
- func: run server
vars:
AUTH: auth
SSL: ssl
TOPOLOGY: sharded_cluster
COVERAGE: "1"
- func: run tests
vars:
AUTH: auth
SSL: ssl
TOPOLOGY: sharded_cluster
COVERAGE: "1"
TOOLCHAIN_VERSION: "3.12"
TEST_NAME: default_sync
tags:
@ -3018,20 +3031,18 @@ tasks:
- python-3.12
- sharded_cluster-auth-ssl
- sync
- name: test-server-version-python3.13-sync-auth-ssl-sharded-cluster-cov
- name: test-server-version-python3.13-sync-auth-ssl-sharded-cluster
commands:
- func: run server
vars:
AUTH: auth
SSL: ssl
TOPOLOGY: sharded_cluster
COVERAGE: "1"
- func: run tests
vars:
AUTH: auth
SSL: ssl
TOPOLOGY: sharded_cluster
COVERAGE: "1"
TOOLCHAIN_VERSION: "3.13"
TEST_NAME: default_sync
tags:
@ -3039,20 +3050,18 @@ tasks:
- python-3.13
- sharded_cluster-auth-ssl
- sync
- name: test-server-version-python3.14-sync-auth-ssl-sharded-cluster-cov
- name: test-server-version-python3.14-sync-auth-ssl-sharded-cluster
commands:
- func: run server
vars:
AUTH: auth
SSL: ssl
TOPOLOGY: sharded_cluster
COVERAGE: "1"
- func: run tests
vars:
AUTH: auth
SSL: ssl
TOPOLOGY: sharded_cluster
COVERAGE: "1"
TOOLCHAIN_VERSION: "3.14"
TEST_NAME: default_sync
tags:
@ -3099,20 +3108,18 @@ tasks:
- python-pypy3.11
- sharded_cluster-auth-ssl
- sync
- name: test-server-version-python3.12-async-noauth-nossl-sharded-cluster-cov
- name: test-server-version-python3.12-async-noauth-nossl-sharded-cluster
commands:
- func: run server
vars:
AUTH: noauth
SSL: nossl
TOPOLOGY: sharded_cluster
COVERAGE: "1"
- func: run tests
vars:
AUTH: noauth
SSL: nossl
TOPOLOGY: sharded_cluster
COVERAGE: "1"
TOOLCHAIN_VERSION: "3.12"
TEST_NAME: default_async
tags:
@ -3120,20 +3127,18 @@ tasks:
- python-3.12
- sharded_cluster-noauth-nossl
- async
- name: test-server-version-python3.11-sync-noauth-nossl-sharded-cluster-cov
- name: test-server-version-python3.11-sync-noauth-nossl-sharded-cluster
commands:
- func: run server
vars:
AUTH: noauth
SSL: nossl
TOPOLOGY: sharded_cluster
COVERAGE: "1"
- func: run tests
vars:
AUTH: noauth
SSL: nossl
TOPOLOGY: sharded_cluster
COVERAGE: "1"
TOOLCHAIN_VERSION: "3.11"
TEST_NAME: default_sync
tags:
@ -3141,7 +3146,7 @@ tasks:
- python-3.11
- sharded_cluster-noauth-nossl
- sync
- name: test-server-version-python3.10-async-noauth-ssl-sharded-cluster-min-deps-cov
- name: test-server-version-python3.10-async-noauth-ssl-sharded-cluster-min-deps
commands:
- func: run server
vars:
@ -3149,14 +3154,12 @@ tasks:
SSL: ssl
TOPOLOGY: sharded_cluster
TEST_MIN_DEPS: "1"
COVERAGE: "1"
- func: run tests
vars:
AUTH: noauth
SSL: ssl
TOPOLOGY: sharded_cluster
TEST_MIN_DEPS: "1"
COVERAGE: "1"
TOOLCHAIN_VERSION: "3.10"
TEST_NAME: default_async
tags:
@ -3183,20 +3186,18 @@ tasks:
- python-pypy3.11
- sharded_cluster-noauth-ssl
- sync
- name: test-server-version-python3.13-async-auth-nossl-standalone-cov
- name: test-server-version-python3.13-async-auth-nossl-standalone
commands:
- func: run server
vars:
AUTH: auth
SSL: nossl
TOPOLOGY: standalone
COVERAGE: "1"
- func: run tests
vars:
AUTH: auth
SSL: nossl
TOPOLOGY: standalone
COVERAGE: "1"
TOOLCHAIN_VERSION: "3.13"
TEST_NAME: default_async
tags:
@ -3204,20 +3205,18 @@ tasks:
- python-3.13
- standalone-auth-nossl
- async
- name: test-server-version-python3.12-sync-auth-nossl-standalone-cov
- name: test-server-version-python3.12-sync-auth-nossl-standalone
commands:
- func: run server
vars:
AUTH: auth
SSL: nossl
TOPOLOGY: standalone
COVERAGE: "1"
- func: run tests
vars:
AUTH: auth
SSL: nossl
TOPOLOGY: standalone
COVERAGE: "1"
TOOLCHAIN_VERSION: "3.12"
TEST_NAME: default_sync
tags:
@ -3225,20 +3224,18 @@ tasks:
- python-3.12
- standalone-auth-nossl
- sync
- name: test-server-version-python3.11-async-auth-ssl-standalone-cov
- name: test-server-version-python3.11-async-auth-ssl-standalone
commands:
- func: run server
vars:
AUTH: auth
SSL: ssl
TOPOLOGY: standalone
COVERAGE: "1"
- func: run tests
vars:
AUTH: auth
SSL: ssl
TOPOLOGY: standalone
COVERAGE: "1"
TOOLCHAIN_VERSION: "3.11"
TEST_NAME: default_async
tags:
@ -3246,7 +3243,7 @@ tasks:
- python-3.11
- standalone-auth-ssl
- async
- name: test-server-version-python3.10-sync-auth-ssl-standalone-min-deps-cov
- name: test-server-version-python3.10-sync-auth-ssl-standalone-min-deps
commands:
- func: run server
vars:
@ -3254,14 +3251,12 @@ tasks:
SSL: ssl
TOPOLOGY: standalone
TEST_MIN_DEPS: "1"
COVERAGE: "1"
- func: run tests
vars:
AUTH: auth
SSL: ssl
TOPOLOGY: standalone
TEST_MIN_DEPS: "1"
COVERAGE: "1"
TOOLCHAIN_VERSION: "3.10"
TEST_NAME: default_sync
tags:
@ -3293,18 +3288,20 @@ tasks:
- standalone-noauth-nossl
- async
- pr
- name: test-server-version-pypy3.11-sync-noauth-nossl-standalone
- name: test-server-version-pypy3.11-sync-noauth-nossl-standalone-cov
commands:
- func: run server
vars:
AUTH: noauth
SSL: nossl
TOPOLOGY: standalone
COVERAGE: "1"
- func: run tests
vars:
AUTH: noauth
SSL: nossl
TOPOLOGY: standalone
COVERAGE: "1"
TOOLCHAIN_VERSION: pypy3.11
TEST_NAME: default_sync
tags:
@ -3313,20 +3310,18 @@ tasks:
- standalone-noauth-nossl
- sync
- pr
- name: test-server-version-python3.14-async-noauth-ssl-standalone-cov
- name: test-server-version-python3.14-async-noauth-ssl-standalone
commands:
- func: run server
vars:
AUTH: noauth
SSL: ssl
TOPOLOGY: standalone
COVERAGE: "1"
- func: run tests
vars:
AUTH: noauth
SSL: ssl
TOPOLOGY: standalone
COVERAGE: "1"
TOOLCHAIN_VERSION: "3.14"
TEST_NAME: default_async
tags:
@ -4082,7 +4077,7 @@ tasks:
- standalone-noauth-nossl
- async
- pypy
- name: test-standard-latest-python3.12-async-noauth-ssl-replica-set
- name: test-standard-latest-python3.12-async-noauth-ssl-replica-set-cov
commands:
- func: run server
vars:
@ -4090,12 +4085,14 @@ tasks:
SSL: ssl
TOPOLOGY: replica_set
VERSION: latest
COVERAGE: "1"
- func: run tests
vars:
AUTH: noauth
SSL: ssl
TOPOLOGY: replica_set
VERSION: latest
COVERAGE: "1"
TOOLCHAIN_VERSION: "3.12"
TEST_NAME: default_async
tags:
@ -4128,7 +4125,7 @@ tasks:
- replica_set-noauth-ssl
- async
- pypy
- name: test-standard-latest-python3.13-async-auth-ssl-sharded-cluster
- name: test-standard-latest-python3.13-async-auth-ssl-sharded-cluster-cov
commands:
- func: run server
vars:
@ -4136,12 +4133,14 @@ tasks:
SSL: ssl
TOPOLOGY: sharded_cluster
VERSION: latest
COVERAGE: "1"
- func: run tests
vars:
AUTH: auth
SSL: ssl
TOPOLOGY: sharded_cluster
VERSION: latest
COVERAGE: "1"
TOOLCHAIN_VERSION: "3.13"
TEST_NAME: default_async
tags:
@ -4151,7 +4150,7 @@ tasks:
- sharded_cluster-auth-ssl
- async
- pr
- name: test-standard-latest-python3.11-async-noauth-nossl-standalone
- name: test-standard-latest-python3.11-async-noauth-nossl-standalone-cov
commands:
- func: run server
vars:
@ -4159,12 +4158,14 @@ tasks:
SSL: nossl
TOPOLOGY: standalone
VERSION: latest
COVERAGE: "1"
- func: run tests
vars:
AUTH: noauth
SSL: nossl
TOPOLOGY: standalone
VERSION: latest
COVERAGE: "1"
TOOLCHAIN_VERSION: "3.11"
TEST_NAME: default_async
tags:
@ -4174,7 +4175,7 @@ tasks:
- standalone-noauth-nossl
- async
- pr
- name: test-standard-latest-python3.14-async-noauth-nossl-standalone
- name: test-standard-latest-python3.14-async-noauth-nossl-standalone-cov
commands:
- func: run server
vars:
@ -4182,12 +4183,14 @@ tasks:
SSL: nossl
TOPOLOGY: standalone
VERSION: latest
COVERAGE: "1"
- func: run tests
vars:
AUTH: noauth
SSL: nossl
TOPOLOGY: standalone
VERSION: latest
COVERAGE: "1"
TOOLCHAIN_VERSION: "3.14"
TEST_NAME: default_async
tags:
@ -4829,7 +4832,7 @@ tasks:
- python-3.13
- standalone-noauth-nossl
- noauth
- name: test-non-standard-latest-python3.14t-noauth-ssl-replica-set
- name: test-non-standard-latest-python3.14t-noauth-ssl-replica-set-cov
commands:
- func: run server
vars:
@ -4837,12 +4840,14 @@ tasks:
SSL: ssl
TOPOLOGY: replica_set
VERSION: latest
COVERAGE: "1"
- func: run tests
vars:
AUTH: noauth
SSL: ssl
TOPOLOGY: replica_set
VERSION: latest
COVERAGE: "1"
TOOLCHAIN_VERSION: 3.14t
tags:
- test-non-standard
@ -4874,7 +4879,7 @@ tasks:
- replica_set-noauth-ssl
- noauth
- pypy
- name: test-non-standard-latest-python3.14-auth-ssl-sharded-cluster
- name: test-non-standard-latest-python3.14-auth-ssl-sharded-cluster-cov
commands:
- func: run server
vars:
@ -4882,12 +4887,14 @@ tasks:
SSL: ssl
TOPOLOGY: sharded_cluster
VERSION: latest
COVERAGE: "1"
- func: run tests
vars:
AUTH: auth
SSL: ssl
TOPOLOGY: sharded_cluster
VERSION: latest
COVERAGE: "1"
TOOLCHAIN_VERSION: "3.14"
tags:
- test-non-standard
@ -4896,7 +4903,7 @@ tasks:
- sharded_cluster-auth-ssl
- auth
- pr
- name: test-non-standard-latest-python3.13-noauth-nossl-standalone
- name: test-non-standard-latest-python3.13-noauth-nossl-standalone-cov
commands:
- func: run server
vars:
@ -4904,12 +4911,14 @@ tasks:
SSL: nossl
TOPOLOGY: standalone
VERSION: latest
COVERAGE: "1"
- func: run tests
vars:
AUTH: noauth
SSL: nossl
TOPOLOGY: standalone
VERSION: latest
COVERAGE: "1"
TOOLCHAIN_VERSION: "3.13"
tags:
- test-non-standard
@ -5007,7 +5016,7 @@ tasks:
- pypy
# Test numpy tests
- name: test-numpy-python3.10
- name: test-numpy-python3.10-python3.10
commands:
- func: test numpy
vars:
@ -5017,16 +5026,18 @@ tasks:
- vector
- python-3.10
- test-numpy
- name: test-numpy-python3.14
- name: test-numpy-python3.14-python3.14-cov
commands:
- func: test numpy
vars:
TOOLCHAIN_VERSION: "3.14"
COVERAGE: "1"
tags:
- binary
- vector
- python-3.14
- test-numpy
- pr
# Test standard auth tests
- name: test-standard-auth-v4.2-python3.10-auth-ssl-sharded-cluster-min-deps
@ -5290,7 +5301,7 @@ tasks:
- sharded_cluster-auth-ssl
- auth
- pypy
- name: test-standard-auth-latest-python3.11-auth-ssl-sharded-cluster
- name: test-standard-auth-latest-python3.11-auth-ssl-sharded-cluster-cov
commands:
- func: run server
vars:
@ -5298,12 +5309,14 @@ tasks:
SSL: ssl
TOPOLOGY: sharded_cluster
VERSION: latest
COVERAGE: "1"
- func: run tests
vars:
AUTH: auth
SSL: ssl
TOPOLOGY: sharded_cluster
VERSION: latest
COVERAGE: "1"
TOOLCHAIN_VERSION: "3.11"
tags:
- test-standard-auth

View File

@ -368,7 +368,6 @@ buildvariants:
run_on:
- rhel87-small
expansions:
COVERAGE: "1"
NO_EXT: "1"
# No server tests
@ -420,6 +419,8 @@ buildvariants:
run_on:
- ubuntu2204-small
batchtime: 1440
expansions:
COVERAGE: "1"
tags: [pr]
- name: auth-oidc-macos
tasks:
@ -477,6 +478,40 @@ buildvariants:
expansions:
SUB_TEST_NAME: pyopenssl
# Rust tests
- name: test-with-rust-extension
tasks:
- name: .test-standard .server-latest .pr
display_name: Test with Rust Extension
run_on:
- rhel87-small
expansions:
PYMONGO_BUILD_RUST: "1"
PYMONGO_USE_RUST: "1"
tags: [rust, pr]
- name: test-with-rust-extension---macos-arm64
tasks:
- name: .test-standard .server-latest !.pr
display_name: Test with Rust Extension - macOS ARM64
run_on:
- macos-14-arm64
batchtime: 10080
expansions:
PYMONGO_BUILD_RUST: "1"
PYMONGO_USE_RUST: "1"
tags: [rust]
- name: test-with-rust-extension---windows
tasks:
- name: .test-standard .server-latest !.pr
display_name: Test with Rust Extension - Windows
run_on:
- windows-64-vsMulti-small
batchtime: 10080
expansions:
PYMONGO_BUILD_RUST: "1"
PYMONGO_USE_RUST: "1"
tags: [rust]
# Search index tests
- name: search-index-helpers-rhel8
tasks:
@ -614,6 +649,7 @@ buildvariants:
- name: test-win64
tasks:
- name: .test-standard !.pypy
- name: .test-no-orchestration !.pypy
display_name: "* Test Win64"
run_on:
- windows-2022-latest-small

View File

@ -94,6 +94,9 @@ do
change-streams|change_streams)
cpjson change-streams/tests/ change_streams/
;;
client-backpressure|client_backpressure)
cpjson client-backpressure/tests client-backpressure
;;
client-side-encryption|csfle|fle)
cpjson client-side-encryption/tests/ client-side-encryption/spec
cpjson client-side-encryption/corpus/ client-side-encryption/corpus

View File

@ -38,6 +38,7 @@ trap "cleanup_tests" SIGINT ERR
# Start the test runner.
echo "Running tests with UV_PYTHON=${UV_PYTHON:-}..."
echo "UV_ARGS=${UV_ARGS}"
uv run ${UV_ARGS} --reinstall-package pymongo .evergreen/scripts/run_tests.py "$@"
echo "Running tests with UV_PYTHON=${UV_PYTHON:-}... done."

View File

@ -14,6 +14,7 @@ fi
PROJECT_DIRECTORY="$(pwd)"
DRIVERS_TOOLS="$(dirname $PROJECT_DIRECTORY)/drivers-tools"
CARGO_HOME=${CARGO_HOME:-${DRIVERS_TOOLS}/.cargo}
RUSTUP_HOME=${RUSTUP_HOME:-${CARGO_HOME}}
UV_TOOL_DIR=$PROJECT_DIRECTORY/.local/uv/tools
UV_CACHE_DIR=$PROJECT_DIRECTORY/.local/uv/cache
DRIVERS_TOOLS_BINARIES="$DRIVERS_TOOLS/.bin"
@ -27,13 +28,14 @@ else
PYMONGO_BIN_DIR=$HOME/cli_bin
fi
PATH_EXT="$MONGODB_BINARIES:$DRIVERS_TOOLS_BINARIES:$PYMONGO_BIN_DIR:\$PATH"
PATH_EXT="$MONGODB_BINARIES:$DRIVERS_TOOLS_BINARIES:$PYMONGO_BIN_DIR:$CARGO_HOME/bin:\$PATH"
# Python has cygwin path problems on Windows. Detect prospective mongo-orchestration home directory
if [ "Windows_NT" = "${OS:-}" ]; then # Magic variable in cygwin
DRIVERS_TOOLS=$(cygpath -m $DRIVERS_TOOLS)
PROJECT_DIRECTORY=$(cygpath -m $PROJECT_DIRECTORY)
CARGO_HOME=$(cygpath -m $CARGO_HOME)
RUSTUP_HOME=$(cygpath -m $RUSTUP_HOME)
UV_TOOL_DIR=$(cygpath -m "$UV_TOOL_DIR")
UV_CACHE_DIR=$(cygpath -m "$UV_CACHE_DIR")
DRIVERS_TOOLS_BINARIES=$(cygpath -m "$DRIVERS_TOOLS_BINARIES")
@ -62,6 +64,7 @@ export DRIVERS_TOOLS_BINARIES="$DRIVERS_TOOLS_BINARIES"
export PROJECT_DIRECTORY="$PROJECT_DIRECTORY"
export CARGO_HOME="$CARGO_HOME"
export RUSTUP_HOME="$RUSTUP_HOME"
export UV_TOOL_DIR="$UV_TOOL_DIR"
export UV_CACHE_DIR="$UV_CACHE_DIR"
export UV_TOOL_BIN_DIR="$DRIVERS_TOOLS_BINARIES"

View File

@ -97,6 +97,8 @@ def create_standard_nonlinux_variants() -> list[BuildVariant]:
tasks = [
f".test-standard !.pypy .server-{version}" for version in get_versions_from("6.0")
]
if host_name == "win64":
tasks.append(".test-no-orchestration !.pypy")
host = HOSTS[host_name]
tags = ["standard-non-linux"]
expansions = dict()
@ -318,7 +320,7 @@ def create_green_framework_variants():
def create_no_c_ext_variants():
host = DEFAULT_HOST
tasks = [".test-standard"]
expansions = dict(COVERAGE="1")
expansions = dict()
handle_c_ext(C_EXTS[0], expansions)
display_name = get_variant_name("No C Ext", host)
return [create_variant(tasks, display_name, host=host, expansions=expansions)]
@ -344,8 +346,12 @@ def create_test_numpy_tasks():
tasks = []
for python in MIN_MAX_PYTHON:
tags = ["binary", "vector", f"python-{python}", "test-numpy"]
task_name = get_task_name("test-numpy", python=python)
test_func = FunctionCall(func="test numpy", vars=dict(TOOLCHAIN_VERSION=python))
vars = dict(TOOLCHAIN_VERSION=python)
if python == MIN_MAX_PYTHON[-1]:
tags.append("pr")
vars["COVERAGE"] = "1"
task_name = get_task_name("test-numpy", python=python, **vars)
test_func = FunctionCall(func="test numpy", vars=vars)
tasks.append(EvgTask(name=task_name, tags=tags, commands=[test_func]))
return tasks
@ -397,6 +403,7 @@ def create_oidc_auth_variants():
tags=["pr"],
host=host,
batchtime=BATCHTIME_DAY,
expansions=dict(COVERAGE="1"),
)
)
return variants
@ -596,7 +603,7 @@ def create_server_version_tasks():
expansions["TEST_MIN_DEPS"] = "1"
if "t" in python:
tags.append("free-threaded")
if python not in PYPYS and "t" not in python:
if "pr" in tags:
expansions["COVERAGE"] = "1"
name = get_task_name(
"test-server-version",
@ -661,6 +668,8 @@ def create_test_non_standard_tasks():
expansions = dict(AUTH=auth, SSL=ssl, TOPOLOGY=topology, VERSION=version)
if python == ALL_PYTHONS[0]:
expansions["TEST_MIN_DEPS"] = "1"
elif pr:
expansions["COVERAGE"] = "1"
name = get_task_name("test-non-standard", python=python, **expansions)
server_func = FunctionCall(func="run server", vars=expansions)
test_vars = expansions.copy()
@ -703,6 +712,8 @@ def create_test_standard_auth_tasks():
expansions = dict(AUTH=auth, SSL=ssl, TOPOLOGY=topology, VERSION=version)
if python == ALL_PYTHONS[0]:
expansions["TEST_MIN_DEPS"] = "1"
elif pr:
expansions["COVERAGE"] = "1"
name = get_task_name("test-standard-auth", python=python, **expansions)
server_func = FunctionCall(func="run server", vars=expansions)
test_vars = expansions.copy()
@ -741,6 +752,8 @@ def create_standard_tasks():
expansions = dict(AUTH=auth, SSL=ssl, TOPOLOGY=topology, VERSION=version)
if python == ALL_PYTHONS[0]:
expansions["TEST_MIN_DEPS"] = "1"
elif pr:
expansions["COVERAGE"] = "1"
name = get_task_name("test-standard", python=python, sync=sync, **expansions)
server_func = FunctionCall(func="run server", vars=expansions)
test_vars = expansions.copy()
@ -810,8 +823,11 @@ def create_aws_tasks():
if "t" in python:
tags.append("free-threaded")
test_vars = dict(TEST_NAME="auth_aws", SUB_TEST_NAME=test_type, TOOLCHAIN_VERSION=python)
if python == ALL_PYTHONS[0]:
if python == MIN_MAX_PYTHON[0]:
test_vars["TEST_MIN_DEPS"] = "1"
elif python == MIN_MAX_PYTHON[-1]:
tags.append("pr")
test_vars["COVERAGE"] = "1"
name = get_task_name(f"{base_name}-{test_type}", **test_vars)
test_func = FunctionCall(func="run tests", vars=test_vars)
funcs = [server_func, assume_func, test_func]
@ -849,11 +865,11 @@ def create_oidc_tasks():
tasks = []
for sub_test in ["default", "azure", "gcp", "eks", "aks", "gke"]:
vars = dict(TEST_NAME="auth_oidc", SUB_TEST_NAME=sub_test)
test_func = FunctionCall(func="run tests", vars=vars)
task_name = f"test-auth-oidc-{sub_test}"
tags = ["auth_oidc"]
if sub_test != "default":
tags.append("auth_oidc_remote")
test_func = FunctionCall(func="run tests", vars=vars)
task_name = get_task_name(f"test-auth-oidc-{sub_test}", **vars)
tasks.append(EvgTask(name=task_name, tags=tags, commands=[test_func]))
return tasks
@ -903,14 +919,14 @@ def _create_ocsp_tasks(algo, variant, server_type, base_task_name):
)
if python == ALL_PYTHONS[0]:
vars["TEST_MIN_DEPS"] = "1"
test_func = FunctionCall(func="run tests", vars=vars)
tags = ["ocsp", f"ocsp-{algo}", version]
if "disableStapling" not in variant:
tags.append("ocsp-staple")
if algo == "valid-cert-server-staples" and version == "latest":
if base_task_name == "valid-cert-server-staples" and version == "latest":
tags.append("pr")
if "TEST_MIN_DEPS" not in vars:
vars["COVERAGE"] = "1"
test_func = FunctionCall(func="run tests", vars=vars)
task_name = get_task_name(f"test-ocsp-{algo}-{base_task_name}", **vars)
tasks.append(EvgTask(name=task_name, tags=tags, commands=[test_func]))
@ -958,11 +974,15 @@ def create_search_index_tasks():
def create_perf_tasks():
tasks = []
for version, ssl, sync in product(["8.0"], ["ssl", "nossl"], ["sync", "async"]):
for version, ssl, sync in product(["8.0"], ["ssl", "nossl"], ["sync", "async", "rust"]):
vars = dict(VERSION=f"v{version}-perf", SSL=ssl)
server_func = FunctionCall(func="run server", vars=vars)
vars = dict(TEST_NAME="perf", SUB_TEST_NAME=sync)
test_func = FunctionCall(func="run tests", vars=vars)
test_vars = dict(TEST_NAME="perf", SUB_TEST_NAME=sync)
# Enable Rust for rust perf tests
if sync == "rust":
test_vars["PYMONGO_BUILD_RUST"] = "1"
test_vars["PYMONGO_USE_RUST"] = "1"
test_func = FunctionCall(func="run tests", vars=test_vars)
attach_func = FunctionCall(func="attach benchmark test results")
send_func = FunctionCall(func="send dashboard data")
task_name = f"perf-{version}-standalone"
@ -970,6 +990,8 @@ def create_perf_tasks():
task_name += "-ssl"
if sync == "async":
task_name += "-async"
elif sync == "rust":
task_name += "-rust"
tags = ["perf"]
commands = [server_func, test_func, attach_func, send_func]
tasks.append(EvgTask(name=task_name, tags=tags, commands=commands))
@ -1087,7 +1109,7 @@ def create_upload_coverage_codecov_func():
"github_pr_number",
"github_pr_head_branch",
"github_author",
"is_patch",
"requester",
"branch_name",
]
args = [
@ -1189,6 +1211,8 @@ def create_run_server_func():
"LOAD_BALANCER",
"LOCAL_ATLAS",
"NO_EXT",
"PYMONGO_BUILD_RUST",
"PYMONGO_USE_RUST",
]
args = [".evergreen/just.sh", "run-server", "${TEST_NAME}"]
sub_cmd = get_subprocess_exec(include_expansions_in_env=includes, args=args)
@ -1222,6 +1246,8 @@ def create_run_tests_func():
"IS_WIN32",
"REQUIRE_FIPS",
"TEST_MIN_DEPS",
"PYMONGO_BUILD_RUST",
"PYMONGO_USE_RUST",
]
args = [".evergreen/just.sh", "setup-tests", "${TEST_NAME}", "${SUB_TEST_NAME}"]
setup_cmd = get_subprocess_exec(include_expansions_in_env=includes, args=args)
@ -1230,7 +1256,7 @@ def create_run_tests_func():
def create_test_numpy_func():
includes = ["TOOLCHAIN_VERSION"]
includes = ["TOOLCHAIN_VERSION", "COVERAGE"]
test_cmd = get_subprocess_exec(
include_expansions_in_env=includes, args=[".evergreen/just.sh", "test-numpy"]
)
@ -1283,6 +1309,55 @@ def create_send_dashboard_data_func():
return "send dashboard data", cmds
def create_rust_variants():
"""Create build variants that test with Rust extension alongside C extension."""
variants = []
# Test Rust on Linux (primary platform) - runs on PRs
# Run standard tests with Rust enabled (both sync and async)
variant = create_variant(
[".test-standard .server-latest .pr"],
"Test with Rust Extension",
host=DEFAULT_HOST,
tags=["rust", "pr"],
expansions=dict(
PYMONGO_BUILD_RUST="1",
PYMONGO_USE_RUST="1",
),
)
variants.append(variant)
# Test on macOS ARM64 (important for M1/M2 Macs)
variant = create_variant(
[".test-standard .server-latest !.pr"],
"Test with Rust Extension - macOS ARM64",
host=HOSTS["macos-arm64"],
tags=["rust"],
batchtime=BATCHTIME_WEEK,
expansions=dict(
PYMONGO_BUILD_RUST="1",
PYMONGO_USE_RUST="1",
),
)
variants.append(variant)
# Test on Windows (important for cross-platform compatibility)
variant = create_variant(
[".test-standard .server-latest !.pr"],
"Test with Rust Extension - Windows",
host=HOSTS["win64"],
tags=["rust"],
batchtime=BATCHTIME_WEEK,
expansions=dict(
PYMONGO_BUILD_RUST="1",
PYMONGO_USE_RUST="1",
),
)
variants.append(variant)
return variants
mod = sys.modules[__name__]
write_variants_to_file(mod)
write_tasks_to_file(mod)

View File

@ -30,7 +30,7 @@ fi
# Ensure just is installed.
if ! command -v just &>/dev/null; then
uv tool install rust-just
uv tool install rust-just || uv tool install --force rust-just
fi
popd > /dev/null

View File

@ -0,0 +1,71 @@
#!/bin/bash
# Install Rust toolchain for building the Rust BSON extension.
set -eu
echo "Installing Rust toolchain..."
# Check if Rust is already installed
if command -v cargo &> /dev/null; then
echo "Rust is already installed:"
rustc --version
cargo --version
echo "Updating Rust toolchain..."
rustup update stable
else
echo "Rust not found. Installing Rust..."
# Install Rust using rustup
if [ "Windows_NT" = "${OS:-}" ]; then
# Windows installation
curl --proto '=https' --tlsv1.2 -sSf https://win.rustup.rs/x86_64 -o rustup-init.exe
./rustup-init.exe -y --default-toolchain stable
rm rustup-init.exe
# Add to PATH for current session
export PATH="$HOME/.cargo/bin:$PATH"
else
# Unix-like installation (Linux, macOS)
# Ensure CARGO_HOME is exported so rustup uses it
export CARGO_HOME="${CARGO_HOME:-$HOME/.cargo}"
export RUSTUP_HOME="${RUSTUP_HOME:-${CARGO_HOME}}"
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
# Source cargo env from the installation location
# On CI, CARGO_HOME is set to ${DRIVERS_TOOLS}/.cargo by configure-env.sh
CARGO_ENV_PATH="${CARGO_HOME}/env"
if [ -f "${CARGO_ENV_PATH}" ]; then
source "${CARGO_ENV_PATH}"
else
echo "Error: Cargo env file not found at ${CARGO_ENV_PATH}"
echo "CARGO_HOME=${CARGO_HOME}"
echo "RUSTUP_HOME=${RUSTUP_HOME}"
echo "HOME=${HOME}"
exit 1
fi
fi
echo "Rust installation complete:"
rustc --version
cargo --version
fi
# Ensure default toolchain is set (needed for rustup to work properly)
echo "Setting default toolchain to stable..."
rustup default stable
# Install maturin if not already installed
if ! command -v maturin &> /dev/null; then
echo "Installing maturin..."
# Use pip instead of cargo to avoid yanked dependency issues
# (e.g., maturin 1.12.2 depends on cargo-xwin which has yanked xwin versions)
pip install maturin
echo "maturin installation complete:"
maturin --version
else
echo "maturin is already installed:"
maturin --version
fi
echo "Rust toolchain setup complete."

View File

@ -7,6 +7,8 @@ import subprocess
from argparse import Namespace
from subprocess import CalledProcessError
JIRA_FILTER = "https://jira.mongodb.org/issues/?jql=labels%20%3D%20automated-sync%20AND%20status%20!%3D%20Closed"
def resync_specs(directory: pathlib.Path, errored: dict[str, str]) -> None:
"""Actually sync the specs"""
@ -117,6 +119,7 @@ def write_summary(errored: dict[str, str], new: list[str], filename: str | None)
pr_body += "\n -".join(new)
pr_body += "\n"
if pr_body != "":
pr_body = f"Jira tickets: {JIRA_FILTER}\n\n" + pr_body
if filename is None:
print(f"\n{pr_body}")
else:

View File

@ -12,7 +12,7 @@ def set_env(name: str, value: Any = "1") -> None:
def start_server():
opts, extra_opts = get_test_options(
"Run a MongoDB server. All given flags will be passed to run-orchestration.sh in DRIVERS_TOOLS.",
"Run a MongoDB server. All given flags will be passed to run-mongodb.sh in DRIVERS_TOOLS.",
require_sub_test_name=False,
allow_extra_opts=True,
)
@ -51,7 +51,7 @@ def start_server():
elif opts.quiet:
extra_opts.append("-q")
cmd = ["bash", f"{DRIVERS_TOOLS}/.evergreen/run-orchestration.sh", *extra_opts]
cmd = ["bash", f"{DRIVERS_TOOLS}/.evergreen/run-mongodb.sh", "start", *extra_opts]
run_command(cmd, cwd=DRIVERS_TOOLS)

View File

@ -4,7 +4,9 @@ import json
import logging
import os
import platform
import shlex
import shutil
import subprocess
import sys
from datetime import datetime
from pathlib import Path
@ -151,6 +153,30 @@ def run() -> None:
if os.environ.get("PYMONGOCRYPT_LIB"):
handle_pymongocrypt()
# Check if Rust extension is being used
LOGGER.info(f"PYMONGO_USE_RUST={os.environ.get('PYMONGO_USE_RUST', 'not set')}")
LOGGER.info(f"PYMONGO_BUILD_RUST={os.environ.get('PYMONGO_BUILD_RUST', 'not set')}")
if os.environ.get("PYMONGO_USE_RUST") or os.environ.get("PYMONGO_BUILD_RUST"):
try:
import bson
impl = bson.get_bson_implementation()
has_rust = bson.has_rust()
has_c = bson.has_c()
LOGGER.info(f"BSON implementation in use: {impl}")
LOGGER.info(f"Has Rust: {has_rust}, Has C: {has_c}")
if impl == "rust":
LOGGER.info("✓ Rust extension is ACTIVE")
elif impl == "c":
LOGGER.info("✓ C extension is ACTIVE")
else:
LOGGER.info("✓ Pure Python implementation is ACTIVE")
except Exception as e:
LOGGER.warning(f"Could not check BSON implementation: {e}")
LOGGER.info(f"Test setup:\n{AUTH=}\n{SSL=}\n{UV_ARGS=}\n{TEST_ARGS=}")
# Record the start time for a perf test.
@ -202,6 +228,16 @@ def run() -> None:
if os.environ.get("DEBUG_LOG"):
TEST_ARGS.extend(f"-o log_cli_level={logging.DEBUG}".split())
if os.environ.get("COVERAGE"):
binary = sys.executable.replace(os.sep, "/")
cmd = f"{binary} -m coverage run -m pytest {' '.join(TEST_ARGS)} {' '.join(sys.argv[1:])}"
result = subprocess.run(shlex.split(cmd), check=False) # noqa: S603
cmd = f"{binary} -m coverage report"
subprocess.run(shlex.split(cmd), check=False) # noqa: S603
if result.returncode != 0:
print(result.stderr)
sys.exit(result.returncode)
# Run local tests.
ret = pytest.main(TEST_ARGS + sys.argv[1:])
if ret != 0:

View File

@ -22,6 +22,11 @@ bash $HERE/install-dependencies.sh
# Handle the value for UV_PYTHON.
. $HERE/setup-uv-python.sh
# Show Rust toolchain status for debugging
echo "Rust toolchain: $(rustc --version 2>/dev/null || echo 'not found')"
echo "Cargo: $(cargo --version 2>/dev/null || echo 'not found')"
echo "Maturin: $(maturin --version 2>/dev/null || echo 'not found')"
# Only run the next part if not running on CI.
if [ -z "${CI:-}" ]; then
# Add the default install path to the path if needed.

View File

@ -13,6 +13,8 @@ set -eu
# MONGODB_API_VERSION The mongodb api version to use in tests.
# MONGODB_URI If non-empty, use as the MONGODB_URI in tests.
# USE_ACTIVE_VENV If non-empty, use the active virtual environment.
# PYMONGO_BUILD_RUST If non-empty, build and test with Rust extension.
# PYMONGO_USE_RUST If non-empty, use the Rust extension for tests.
SCRIPT_DIR=$(dirname ${BASH_SOURCE:-$0})
@ -21,6 +23,12 @@ if [ -f $SCRIPT_DIR/env.sh ]; then
source $SCRIPT_DIR/env.sh
fi
# Install Rust toolchain if building Rust extension
if [ -n "${PYMONGO_BUILD_RUST:-}" ]; then
echo "PYMONGO_BUILD_RUST is set, installing Rust toolchain..."
bash $SCRIPT_DIR/install-rust.sh
fi
echo "Setting up tests with args \"$*\"..."
uv run ${USE_ACTIVE_VENV:+--active} "$SCRIPT_DIR/setup_tests.py" "$@"
echo "Setting up tests with args \"$*\"... done."

View File

@ -32,6 +32,8 @@ PASS_THROUGH_ENV = [
"UV_PYTHON",
"REQUIRE_FIPS",
"IS_WIN32",
"PYMONGO_USE_RUST",
"PYMONGO_BUILD_RUST",
]
# Map the test name to test extra.
@ -153,6 +155,10 @@ def handle_test_env() -> None:
# Start compiling the args we'll pass to uv.
UV_ARGS = ["--extra test --no-group dev"]
# If USE_ACTIVE_VENV is set, add --active to UV_ARGS so run-tests.sh uses the active venv.
if is_set("USE_ACTIVE_VENV"):
UV_ARGS.append("--active")
test_title = test_name
if sub_test_name:
test_title += f" {sub_test_name}"
@ -324,7 +330,8 @@ def handle_test_env() -> None:
version = os.environ.get("VERSION", "latest")
cmd = [
"bash",
f"{DRIVERS_TOOLS}/.evergreen/run-orchestration.sh",
f"{DRIVERS_TOOLS}/.evergreen/run-mongodb.sh",
"start",
"--ssl",
"--version",
version,
@ -431,6 +438,9 @@ def handle_test_env() -> None:
# We do not want the default client_context to be initialized.
write_env("DISABLE_CONTEXT")
if test_name == "numpy":
UV_ARGS.append("--with numpy")
if test_name == "perf":
data_dir = ROOT / "specifications/source/benchmarking/data"
if not data_dir.exists():
@ -447,7 +457,7 @@ def handle_test_env() -> None:
# PYTHON-4769 Run perf_test.py directly otherwise pytest's test collection negatively
# affects the benchmark results.
if sub_test_name == "sync":
if sub_test_name == "sync" or sub_test_name == "rust":
TEST_ARGS = f"test/performance/perf_test.py {TEST_ARGS}"
else:
TEST_ARGS = f"test/performance/async_perf_test.py {TEST_ARGS}"
@ -458,12 +468,14 @@ def handle_test_env() -> None:
# Keep in sync with combine-coverage.sh.
# coverage >=5 is needed for relative_files=true.
UV_ARGS.append("--group coverage")
TEST_ARGS = f"{TEST_ARGS} --cov"
write_env("COVERAGE")
if opts.green_framework:
framework = opts.green_framework or os.environ["GREEN_FRAMEWORK"]
UV_ARGS.append(f"--group {framework}")
if framework == "gevent" and opts.test_min_deps:
# PYTHON-5729. This can be removed when the min supported gevent is moved to 25.9.1.
UV_ARGS.append('--with "setuptools==81.0"')
else:
TEST_ARGS = f"-v --durations=5 {TEST_ARGS}"
@ -471,6 +483,10 @@ def handle_test_env() -> None:
if TEST_SUITE:
TEST_ARGS = f"-m {TEST_SUITE} {TEST_ARGS}"
# For test_bson, run the specific test file
if test_name == "test_bson":
TEST_ARGS = f"test/test_bson.py {TEST_ARGS}"
write_env("TEST_ARGS", TEST_ARGS)
write_env("UV_ARGS", " ".join(UV_ARGS))

View File

@ -1,5 +1,5 @@
#!/bin/bash
# Stop a server that was started using run-orchestration.sh in DRIVERS_TOOLS.
# Stop a server that was started using run-mongodb.sh in DRIVERS_TOOLS.
set -eu
HERE=$(dirname ${BASH_SOURCE:-$0})
@ -11,4 +11,4 @@ if [ -f $HERE/env.sh ]; then
source $HERE/env.sh
fi
bash ${DRIVERS_TOOLS}/.evergreen/stop-orchestration.sh
bash ${DRIVERS_TOOLS}/.evergreen/run-mongodb.sh stop

View File

@ -8,18 +8,20 @@ ROOT=$(dirname "$(dirname $HERE)")
pushd $ROOT > /dev/null
export FNAME=coverage.xml
if [ -n "${is_patch:-}" ]; then
echo "This is a patch build, not running codecov"
exit 0
fi
REQUESTER=${requester:-}
if [ ! -f ".coverage" ]; then
echo "There are no coverage results, not running codecov"
exit 0
fi
echo "Uploading..."
if [[ "${REQUESTER}" == "github_pr" || "${REQUESTER}" == "commit" ]]; then
echo "Uploading codecov for $REQUESTER..."
else
echo "Error: requester must be 'github_pr' or 'commit', got '${REQUESTER}'" >&2
exit 1
fi
printf 'sha: %s\n' "$github_commit"
printf 'flag: %s-%s\n' "$build_variant" "$task_name"
printf 'file: %s\n' "$FNAME"
@ -40,18 +42,16 @@ codecov_args=(
if [ -n "${github_pr_number:-}" ]; then
printf 'branch: %s:%s\n' "$github_author" "$github_pr_head_branch"
printf 'pr: %s\n' "$github_pr_number"
uv tool run --from codecov-cli codecovcli \
"${codecov_args[@]}" \
--pr "${github_pr_number}" \
--branch "${github_author}:${github_pr_head_branch}"
else
printf 'branch: %s\n' "$branch_name"
uv tool run --from codecov-cli codecovcli \
"${codecov_args[@]}" \
--branch "${branch_name}"
fi
echo "Uploading...done."
echo "Uploading codecov for $REQUESTER... done."
popd > /dev/null

View File

@ -44,6 +44,8 @@ TEST_SUITE_MAP = {
"mockupdb": "mockupdb",
"ocsp": "ocsp",
"perf": "perf",
"numpy": "",
"test_bson": "",
}
# Tests that require a sub test suite.
@ -51,7 +53,7 @@ SUB_TEST_REQUIRED = ["auth_aws", "auth_oidc", "kms", "mod_wsgi", "perf"]
EXTRA_TESTS = ["mod_wsgi", "aws_lambda", "doctest"]
# Tests that do not use run-orchestration directly.
# Tests that do not use run-mongodb directly.
NO_RUN_ORCHESTRATION = [
"auth_oidc",
"atlas_connect",

View File

@ -1,64 +0,0 @@
diff --git a/test/load_balancer/cursors.json b/test/load_balancer/cursors.json
index 43e4fbb4f..4e2a55fd4 100644
--- a/test/load_balancer/cursors.json
+++ b/test/load_balancer/cursors.json
@@ -376,7 +376,7 @@
]
},
{
+ "description": "pinned connections are not returned after an network error during getMore",
- "description": "pinned connections are returned after an network error during getMore",
"operations": [
{
"name": "failPoint",
@@ -440,7 +440,7 @@
"object": "testRunner",
"arguments": {
"client": "client0",
+ "connections": 1
- "connections": 0
}
},
{
@@ -659,7 +659,7 @@
]
},
{
+ "description": "pinned connections are not returned to the pool after a non-network error on getMore",
- "description": "pinned connections are returned to the pool after a non-network error on getMore",
"operations": [
{
"name": "failPoint",
@@ -715,7 +715,7 @@
"object": "testRunner",
"arguments": {
"client": "client0",
+ "connections": 1
- "connections": 0
}
},
{
diff --git a/test/load_balancer/sdam-error-handling.json b/test/load_balancer/sdam-error-handling.json
index 63aabc04d..462fa0aac 100644
--- a/test/load_balancer/sdam-error-handling.json
+++ b/test/load_balancer/sdam-error-handling.json
@@ -366,6 +366,9 @@
{
"connectionCreatedEvent": {}
},
+ {
+ "poolClearedEvent": {}
+ },
{
"connectionClosedEvent": {
"reason": "error"
@@ -378,9 +375,6 @@
"connectionCheckOutFailedEvent": {
"reason": "connectionError"
}
- },
- {
- "poolClearedEvent": {}
}
]
}

View File

@ -1,14 +0,0 @@
diff --git a/test/discovery_and_monitoring/unified/serverMonitoringMode.json b/test/discovery_and_monitoring/unified/serverMonitoringMode.json
index e44fad1b..4b492f7d 100644
--- a/test/discovery_and_monitoring/unified/serverMonitoringMode.json
+++ b/test/discovery_and_monitoring/unified/serverMonitoringMode.json
@@ -5,7 +5,8 @@
{
"topologies": [
"single",
- "sharded"
+ "sharded",
+ "sharded-replicaset"
],
"serverless": "forbid"
}

View File

@ -1,61 +0,0 @@
diff --git a/test/server_selection_logging/replica-set.json b/test/server_selection_logging/replica-set.json
index 830b1ea51..5eba784bf 100644
--- a/test/server_selection_logging/replica-set.json
+++ b/test/server_selection_logging/replica-set.json
@@ -184,7 +184,7 @@
}
},
{
- "level": "debug",
+ "level": "info",
"component": "serverSelection",
"data": {
"message": "Waiting for suitable server to become available",
diff --git a/test/server_selection_logging/standalone.json b/test/server_selection_logging/standalone.json
index 830b1ea51..5eba784bf 100644
--- a/test/server_selection_logging/standalone.json
+++ b/test/server_selection_logging/standalone.json
@@ -191,7 +191,7 @@
}
},
{
- "level": "debug",
+ "level": "info",
"component": "serverSelection",
"data": {
"message": "Waiting for suitable server to become available",
diff --git a/test/server_selection_logging/sharded.json b/test/server_selection_logging/sharded.json
index 830b1ea51..5eba784bf 100644
--- a/test/server_selection_logging/sharded.json
+++ b/test/server_selection_logging/sharded.json
@@ -193,7 +193,7 @@
}
},
{
- "level": "debug",
+ "level": "info",
"component": "serverSelection",
"data": {
"message": "Waiting for suitable server to become available",
diff --git a/test/server_selection_logging/sharded.json b/test/server_selection_logging/operation-id.json
index 830b1ea51..5eba784bf 100644
--- a/test/server_selection_logging/operation-id.json
+++ b/test/server_selection_logging/operation-id.json
@@ -197,7 +197,7 @@
}
},
{
- "level": "debug",
+ "level": "info",
"component": "serverSelection",
"data": {
"message": "Waiting for suitable server to become available",
@@ -383,7 +383,7 @@
}
},
{
- "level": "debug",
+ "level": "info",
"component": "serverSelection",
"data": {
"message": "Waiting for suitable server to become available",

View File

@ -1,31 +0,0 @@
diff --git a/test/discovery_and_monitoring/errors/error_handling_handshake.json b/test/discovery_and_monitoring/errors/error_handling_handshake.json
index 56ca7d113..bf83f46f6 100644
--- a/test/discovery_and_monitoring/errors/error_handling_handshake.json
+++ b/test/discovery_and_monitoring/errors/error_handling_handshake.json
@@ -97,14 +97,22 @@
"outcome": {
"servers": {
"a:27017": {
- "type": "Unknown",
- "topologyVersion": null,
+ "type": "RSPrimary",
+ "setName": "rs",
+ "topologyVersion": {
+ "processId": {
+ "$oid": "000000000000000000000001"
+ },
+ "counter": {
+ "$numberLong": "1"
+ }
+ },
"pool": {
- "generation": 1
+ "generation": 0
}
}
},
- "topologyType": "ReplicaSetNoPrimary",
+ "topologyType": "ReplicaSetWithPrimary",
"logicalSessionTimeoutMinutes": null,
"setName": "rs"
}

View File

@ -0,0 +1,460 @@
diff --git a/test/client-side-encryption/spec/unified/accessToken-azure.json b/test/client-side-encryption/spec/unified/accessToken-azure.json
new file mode 100644
index 00000000..510d8795
--- /dev/null
+++ b/test/client-side-encryption/spec/unified/accessToken-azure.json
@@ -0,0 +1,186 @@
+{
+ "description": "accessToken-azure",
+ "schemaVersion": "1.28",
+ "runOnRequirements": [
+ {
+ "minServerVersion": "4.1.10",
+ "csfle": {
+ "minLibmongocryptVersion": "1.6.0"
+ }
+ }
+ ],
+ "createEntities": [
+ {
+ "client": {
+ "id": "client",
+ "autoEncryptOpts": {
+ "keyVaultNamespace": "keyvault.datakeys",
+ "kmsProviders": {
+ "azure": {
+ "accessToken": {
+ "$$placeholder": 1
+ }
+ }
+ }
+ }
+ }
+ },
+ {
+ "database": {
+ "id": "db",
+ "client": "client",
+ "databaseName": "db"
+ }
+ },
+ {
+ "collection": {
+ "id": "coll",
+ "database": "db",
+ "collectionName": "coll"
+ }
+ },
+ {
+ "clientEncryption": {
+ "id": "clientEncryption",
+ "clientEncryptionOpts": {
+ "keyVaultClient": "client",
+ "keyVaultNamespace": "keyvault.datakeys",
+ "kmsProviders": {
+ "azure": {
+ "accessToken": {
+ "$$placeholder": 1
+ }
+ }
+ }
+ }
+ }
+ }
+ ],
+ "initialData": [
+ {
+ "databaseName": "db",
+ "collectionName": "coll",
+ "documents": [],
+ "createOptions": {
+ "validator": {
+ "$jsonSchema": {
+ "properties": {
+ "secret": {
+ "encrypt": {
+ "keyId": [
+ {
+ "$binary": {
+ "base64": "AZURE+AAAAAAAAAAAAAAAA==",
+ "subType": "04"
+ }
+ }
+ ],
+ "bsonType": "string",
+ "algorithm": "AEAD_AES_256_CBC_HMAC_SHA_512-Deterministic"
+ }
+ }
+ },
+ "bsonType": "object"
+ }
+ }
+ }
+ },
+ {
+ "databaseName": "keyvault",
+ "collectionName": "datakeys",
+ "documents": [
+ {
+ "_id": {
+ "$binary": {
+ "base64": "AZURE+AAAAAAAAAAAAAAAA==",
+ "subType": "04"
+ }
+ },
+ "keyAltNames": [
+ "my-key"
+ ],
+ "keyMaterial": {
+ "$binary": {
+ "base64": "n+HWZ0ZSVOYA3cvQgP7inN4JSXfOH85IngmeQxRpQHjCCcqT3IFqEWNlrsVHiz3AELimHhX4HKqOLWMUeSIT6emUDDoQX9BAv8DR1+E1w4nGs/NyEneac78EYFkK3JysrFDOgl2ypCCTKAypkn9CkAx1if4cfgQE93LW4kczcyHdGiH36CIxrCDGv1UzAvERN5Qa47DVwsM6a+hWsF2AAAJVnF0wYLLJU07TuRHdMrrphPWXZsFgyV+lRqJ7DDpReKNO8nMPLV/mHqHBHGPGQiRdb9NoJo8CvokGz4+KE8oLwzKf6V24dtwZmRkrsDV4iOhvROAzz+Euo1ypSkL3mw==",
+ "subType": "00"
+ }
+ },
+ "creationDate": {
+ "$date": {
+ "$numberLong": "1552949630483"
+ }
+ },
+ "updateDate": {
+ "$date": {
+ "$numberLong": "1552949630483"
+ }
+ },
+ "status": {
+ "$numberInt": "0"
+ },
+ "masterKey": {
+ "provider": "azure",
+ "keyVaultEndpoint": "key-vault-csfle.vault.azure.net",
+ "keyName": "key-name-csfle"
+ }
+ }
+ ]
+ }
+ ],
+ "tests": [
+ {
+ "description": "Auto encrypt using access token Azure credentials",
+ "operations": [
+ {
+ "name": "insertOne",
+ "arguments": {
+ "document": {
+ "_id": 1,
+ "secret": "string0"
+ }
+ },
+ "object": "coll"
+ }
+ ],
+ "outcome": [
+ {
+ "documents": [
+ {
+ "_id": 1,
+ "secret": {
+ "$binary": {
+ "base64": "AQGVERPgAAAAAAAAAAAAAAAC5DbBSwPwfSlBrDtRuglvNvCXD1KzDuCKY2P+4bRFtHDjpTOE2XuytPAUaAbXf1orsPq59PVZmsbTZbt2CB8qaQ==",
+ "subType": "06"
+ }
+ }
+ }
+ ],
+ "collectionName": "coll",
+ "databaseName": "db"
+ }
+ ]
+ },
+ {
+ "description": "Explicit encrypt using access token Azure credentials",
+ "operations": [
+ {
+ "name": "encrypt",
+ "object": "clientEncryption",
+ "arguments": {
+ "value": "string0",
+ "opts": {
+ "keyAltName": "my-key",
+ "algorithm": "AEAD_AES_256_CBC_HMAC_SHA_512-Deterministic"
+ }
+ },
+ "expectResult": {
+ "$binary": {
+ "base64": "AQGVERPgAAAAAAAAAAAAAAAC5DbBSwPwfSlBrDtRuglvNvCXD1KzDuCKY2P+4bRFtHDjpTOE2XuytPAUaAbXf1orsPq59PVZmsbTZbt2CB8qaQ==",
+ "subType": "06"
+ }
+ }
+ }
+ ]
+ }
+ ]
+}
diff --git a/test/client-side-encryption/spec/unified/accessToken-gcp.json b/test/client-side-encryption/spec/unified/accessToken-gcp.json
new file mode 100644
index 00000000..f5cf8914
--- /dev/null
+++ b/test/client-side-encryption/spec/unified/accessToken-gcp.json
@@ -0,0 +1,188 @@
+{
+ "description": "accessToken-gcp",
+ "schemaVersion": "1.28",
+ "runOnRequirements": [
+ {
+ "minServerVersion": "4.1.10",
+ "csfle": {
+ "minLibmongocryptVersion": "1.6.0"
+ }
+ }
+ ],
+ "createEntities": [
+ {
+ "client": {
+ "id": "client",
+ "autoEncryptOpts": {
+ "keyVaultNamespace": "keyvault.datakeys",
+ "kmsProviders": {
+ "gcp": {
+ "accessToken": {
+ "$$placeholder": 1
+ }
+ }
+ }
+ }
+ }
+ },
+ {
+ "database": {
+ "id": "db",
+ "client": "client",
+ "databaseName": "db"
+ }
+ },
+ {
+ "collection": {
+ "id": "coll",
+ "database": "db",
+ "collectionName": "coll"
+ }
+ },
+ {
+ "clientEncryption": {
+ "id": "clientEncryption",
+ "clientEncryptionOpts": {
+ "keyVaultClient": "client",
+ "keyVaultNamespace": "keyvault.datakeys",
+ "kmsProviders": {
+ "gcp": {
+ "accessToken": {
+ "$$placeholder": 1
+ }
+ }
+ }
+ }
+ }
+ }
+ ],
+ "initialData": [
+ {
+ "databaseName": "db",
+ "collectionName": "coll",
+ "documents": [],
+ "createOptions": {
+ "validator": {
+ "$jsonSchema": {
+ "properties": {
+ "secret": {
+ "encrypt": {
+ "keyId": [
+ {
+ "$binary": {
+ "base64": "GCP+AAAAAAAAAAAAAAAAAA==",
+ "subType": "04"
+ }
+ }
+ ],
+ "bsonType": "string",
+ "algorithm": "AEAD_AES_256_CBC_HMAC_SHA_512-Deterministic"
+ }
+ }
+ },
+ "bsonType": "object"
+ }
+ }
+ }
+ },
+ {
+ "databaseName": "keyvault",
+ "collectionName": "datakeys",
+ "documents": [
+ {
+ "_id": {
+ "$binary": {
+ "base64": "GCP+AAAAAAAAAAAAAAAAAA==",
+ "subType": "04"
+ }
+ },
+ "keyAltNames": [
+ "my-key"
+ ],
+ "keyMaterial": {
+ "$binary": {
+ "base64": "CiQAIgLj0WyktnB4dfYHo5SLZ41K4ASQrjJUaSzl5vvVH0G12G0SiQEAjlV8XPlbnHDEDFbdTO4QIe8ER2/172U1ouLazG0ysDtFFIlSvWX5ZnZUrRMmp/R2aJkzLXEt/zf8Mn4Lfm+itnjgo5R9K4pmPNvvPKNZX5C16lrPT+aA+rd+zXFSmlMg3i5jnxvTdLHhg3G7Q/Uv1ZIJskKt95bzLoe0tUVzRWMYXLIEcohnQg==",
+ "subType": "00"
+ }
+ },
+ "creationDate": {
+ "$date": {
+ "$numberLong": "1552949630483"
+ }
+ },
+ "updateDate": {
+ "$date": {
+ "$numberLong": "1552949630483"
+ }
+ },
+ "status": {
+ "$numberInt": "0"
+ },
+ "masterKey": {
+ "provider": "gcp",
+ "projectId": "devprod-drivers",
+ "location": "global",
+ "keyRing": "key-ring-csfle",
+ "keyName": "key-name-csfle"
+ }
+ }
+ ]
+ }
+ ],
+ "tests": [
+ {
+ "description": "Auto encrypt using access token GCP credentials",
+ "operations": [
+ {
+ "name": "insertOne",
+ "arguments": {
+ "document": {
+ "_id": 1,
+ "secret": "string0"
+ }
+ },
+ "object": "coll"
+ }
+ ],
+ "outcome": [
+ {
+ "documents": [
+ {
+ "_id": 1,
+ "secret": {
+ "$binary": {
+ "base64": "ARgj/gAAAAAAAAAAAAAAAAACwFd+Y5Ojw45GUXNvbcIpN9YkRdoHDHkR4kssdn0tIMKlDQOLFkWFY9X07IRlXsxPD8DcTiKnl6XINK28vhcGlg==",
+ "subType": "06"
+ }
+ }
+ }
+ ],
+ "collectionName": "coll",
+ "databaseName": "db"
+ }
+ ]
+ },
+ {
+ "description": "Explicit encrypt using access token GCP credentials",
+ "operations": [
+ {
+ "name": "encrypt",
+ "object": "clientEncryption",
+ "arguments": {
+ "value": "string0",
+ "opts": {
+ "keyAltName": "my-key",
+ "algorithm": "AEAD_AES_256_CBC_HMAC_SHA_512-Deterministic"
+ }
+ },
+ "expectResult": {
+ "$binary": {
+ "base64": "ARgj/gAAAAAAAAAAAAAAAAACwFd+Y5Ojw45GUXNvbcIpN9YkRdoHDHkR4kssdn0tIMKlDQOLFkWFY9X07IRlXsxPD8DcTiKnl6XINK28vhcGlg==",
+ "subType": "06"
+ }
+ }
+ }
+ ]
+ }
+ ]
+}
diff --git a/test/unified-test-format/invalid/clientEncryptionOpts-kmsProviders-azure-accessToken-type.json b/test/unified-test-format/invalid/clientEncryptionOpts-kmsProviders-azure-accessToken-type.json
new file mode 100644
index 00000000..8fe5c150
--- /dev/null
+++ b/test/unified-test-format/invalid/clientEncryptionOpts-kmsProviders-azure-accessToken-type.json
@@ -0,0 +1,31 @@
+{
+ "description": "clientEncryptionOpts-kmsProviders-azure-accessToken-type",
+ "schemaVersion": "1.28",
+ "createEntities": [
+ {
+ "client": {
+ "id": "client0"
+ }
+ },
+ {
+ "clientEncryption": {
+ "id": "clientEncryption0",
+ "clientEncryptionOpts": {
+ "keyVaultClient": "client0",
+ "keyVaultNamespace": "keyvault.datakeys",
+ "kmsProviders": {
+ "azure": {
+ "accessToken": 0
+ }
+ }
+ }
+ }
+ }
+ ],
+ "tests": [
+ {
+ "description": "",
+ "operations": []
+ }
+ ]
+}
diff --git a/test/unified-test-format/invalid/clientEncryptionOpts-kmsProviders-gcp-accessToken-type.json b/test/unified-test-format/invalid/clientEncryptionOpts-kmsProviders-gcp-accessToken-type.json
new file mode 100644
index 00000000..2284e26c
--- /dev/null
+++ b/test/unified-test-format/invalid/clientEncryptionOpts-kmsProviders-gcp-accessToken-type.json
@@ -0,0 +1,31 @@
+{
+ "description": "clientEncryptionOpts-kmsProviders-gcp-accessToken-type",
+ "schemaVersion": "1.28",
+ "createEntities": [
+ {
+ "client": {
+ "id": "client0"
+ }
+ },
+ {
+ "clientEncryption": {
+ "id": "clientEncryption0",
+ "clientEncryptionOpts": {
+ "keyVaultClient": "client0",
+ "keyVaultNamespace": "keyvault.datakeys",
+ "kmsProviders": {
+ "gcp": {
+ "accessToken": 0
+ }
+ }
+ }
+ }
+ }
+ ],
+ "tests": [
+ {
+ "description": "",
+ "operations": []
+ }
+ ]
+}

44
.github/copilot-instructions.md vendored Normal file
View File

@ -0,0 +1,44 @@
When reviewing code, focus on:
## Security Critical Issues
- Check for hardcoded secrets, API keys, or credentials.
- Check for instances of potential method call injection, dynamic code execution, symbol injection or other code injection vulnerabilities.
## Performance Red Flags
- Spot inefficient loops and algorithmic issues.
- Check for memory leaks and resource cleanup.
## Code Quality Essentials
- Methods should be focused and appropriately sized. If a method is doing too much, suggest refactorings to split it up.
- Use clear, descriptive naming conventions.
- Avoid encapsulation violations and ensure proper separation of concerns.
- All public classes, modules, and methods should have clear documentation in Sphinx format.
## PyMongo-specific Concerns
- Do not review files within `pymongo/synchronous` or files in `test/` that also have a file of the same name in `test/asynchronous` unless the reviewed changes include a `_IS_SYNC` statement. PyMongo generates these files from `pymongo/asynchronous` and `test/asynchronous` using `tools/synchro.py`.
- All asynchronous functions must not call any blocking I/O.
## Review Style
- Be specific and actionable in feedback.
- Explain the "why" behind recommendations.
- Acknowledge good patterns when you see them.
- Ask clarifying questions when code intent is unclear.
Always prioritize security vulnerabilities and performance issues that could impact users.
Always suggest changes to improve readability and testability. For example, this suggestion seeks to make the code more readable, reusable, and testable:
```python
# Instead of:
if user.email and "@" in user.email and len(user.email) > 5:
submit_button.enabled = True
else:
submit_button.enabled = False
# Consider:
def valid_email(email):
return email and "@" in email and len(email) > 5
submit_button.enabled = valid_email(user.email)
```

View File

@ -6,8 +6,8 @@ If you are an external contributor and there is no JIRA ticket associated with y
for the PR title. A MongoDB employee will create a JIRA ticket and edit the name and links as appropriate.
Note on AI Contributions:
We do not accept pull requests that are primarily or substantially generated by AI tools (ChatGPT, Copilot, etc.).
All contributions must be written and understood by human contributors.
We only accept pull requests that are authored and submitted by human contributors who fully understand the changes they are proposing.
All contributions must be written and understood by human contributors. Please read about our policy in our contributing guide.
-->
[JIRA TICKET]

View File

@ -61,7 +61,7 @@ jobs:
- name: Set up QEMU
if: runner.os == 'Linux'
uses: docker/setup-qemu-action@c7c53464625b32c7a7e944ae62b3e17d2b600130 # v3
uses: docker/setup-qemu-action@ce360397dd3f832beb865e1373c09c0e9f86d70a # v4.0.0
with:
# setup-qemu-action by default uses `tonistiigi/binfmt:latest` image,
# which is out of date. This causes seg faults during build.
@ -92,7 +92,7 @@ jobs:
# Free-threading builds:
ls wheelhouse/*cp314t*.whl
- uses: actions/upload-artifact@v6
- uses: actions/upload-artifact@v7
with:
name: wheel-${{ matrix.buildplat[1] }}
path: ./wheelhouse/*.whl
@ -125,7 +125,7 @@ jobs:
cd ..
python -c "from pymongo import has_c; assert has_c()"
- uses: actions/upload-artifact@v6
- uses: actions/upload-artifact@v7
with:
name: "sdist"
path: ./dist/*.tar.gz
@ -136,13 +136,13 @@ jobs:
name: Download Wheels
steps:
- name: Download all workflow run artifacts
uses: actions/download-artifact@v7
uses: actions/download-artifact@v8
- name: Flatten directory
working-directory: .
run: |
find . -mindepth 2 -type f -exec mv {} . \;
find . -type d -empty -delete
- uses: actions/upload-artifact@v6
- uses: actions/upload-artifact@v7
with:
name: all-dist-${{ github.run_id }}
path: "./*"

View File

@ -75,7 +75,7 @@ jobs:
id-token: write
steps:
- name: Download all the dists
uses: actions/download-artifact@v7
uses: actions/download-artifact@v8
with:
name: all-dist-${{ github.run_id }}
path: dist/

View File

@ -67,7 +67,7 @@ jobs:
run: rm -rf .venv .venv-sbom sbom-requirements.txt
- name: Upload SBOM artifact
uses: actions/upload-artifact@v6
uses: actions/upload-artifact@v7
with:
name: sbom
path: sbom.json

View File

@ -26,7 +26,7 @@ jobs:
with:
persist-credentials: false
- name: Install uv
uses: astral-sh/setup-uv@803947b9bd8e9f986429fa0c5a41c367cd732b41 # v7
uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7
with:
enable-cache: true
python-version: "3.10"
@ -61,23 +61,40 @@ jobs:
os: [ubuntu-latest]
python-version: ["3.10", "pypy-3.11", "3.13t"]
mongodb-version: ["8.0"]
extension: ["c", "rust"]
exclude:
# Don't test Rust with pypy
- python-version: "pypy-3.11"
extension: "rust"
# Don't test Rust with free-threaded Python (not yet supported)
- python-version: "3.13t"
extension: "rust"
name: CPython ${{ matrix.python-version }}-${{ matrix.os }}
name: CPython ${{ matrix.python-version }}-${{ matrix.os }}-${{ matrix.extension }}
continue-on-error: ${{ matrix.extension == 'rust' }}
steps:
- uses: actions/checkout@v6
with:
persist-credentials: false
- name: Install uv
uses: astral-sh/setup-uv@803947b9bd8e9f986429fa0c5a41c367cd732b41 # v7
uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7
with:
enable-cache: true
python-version: ${{ matrix.python-version }}
- name: Install Rust toolchain
if: matrix.extension == 'rust'
uses: dtolnay/rust-toolchain@efa25f7f19611383d5b0ccf2d1c8914531636bf9 # stable
with:
toolchain: stable
- id: setup-mongodb
uses: mongodb-labs/drivers-evergreen-tools@master
with:
version: "${{ matrix.mongodb-version }}"
- name: Run tests
run: uv run --extra test pytest -v
env:
PYMONGO_BUILD_RUST: ${{ matrix.extension == 'rust' && '1' || '' }}
PYMONGO_USE_RUST: ${{ matrix.extension == 'rust' && '1' || '' }}
coverage:
# This enables a coverage report for a given PR, which will be augmented by
@ -90,7 +107,7 @@ jobs:
with:
persist-credentials: false
- name: Install uv
uses: astral-sh/setup-uv@803947b9bd8e9f986429fa0c5a41c367cd732b41 # v7
uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7
with:
enable-cache: true
python-version: "3.10"
@ -118,7 +135,7 @@ jobs:
with:
persist-credentials: false
- name: Install uv
uses: astral-sh/setup-uv@803947b9bd8e9f986429fa0c5a41c367cd732b41 # v7
uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7
with:
enable-cache: true
python-version: "3.10"
@ -143,7 +160,7 @@ jobs:
with:
persist-credentials: false
- name: Install uv
uses: astral-sh/setup-uv@803947b9bd8e9f986429fa0c5a41c367cd732b41 # v7
uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7
with:
enable-cache: true
python-version: "3.10"
@ -162,7 +179,7 @@ jobs:
with:
persist-credentials: false
- name: Install uv
uses: astral-sh/setup-uv@803947b9bd8e9f986429fa0c5a41c367cd732b41 # v7
uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7
with:
enable-cache: true
python-version: "3.10"
@ -184,7 +201,7 @@ jobs:
with:
persist-credentials: false
- name: Install uv
uses: astral-sh/setup-uv@803947b9bd8e9f986429fa0c5a41c367cd732b41 # v7
uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7
with:
enable-cache: true
python-version: "${{matrix.python}}"
@ -205,7 +222,7 @@ jobs:
with:
persist-credentials: false
- name: Install uv
uses: astral-sh/setup-uv@803947b9bd8e9f986429fa0c5a41c367cd732b41 # v7
uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7
with:
enable-cache: true
python-version: "3.10"
@ -245,7 +262,7 @@ jobs:
run: |
pip install build
python -m build --sdist
- uses: actions/upload-artifact@v6
- uses: actions/upload-artifact@v7
with:
name: "sdist"
path: dist/*.tar.gz
@ -257,7 +274,7 @@ jobs:
timeout-minutes: 20
steps:
- name: Download sdist
uses: actions/download-artifact@v7
uses: actions/download-artifact@v8
with:
path: sdist/
- name: Unpack SDist
@ -295,7 +312,7 @@ jobs:
with:
persist-credentials: false
- name: Install uv
uses: astral-sh/setup-uv@803947b9bd8e9f986429fa0c5a41c367cd732b41 # v7
uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7
with:
python-version: "3.9"
- id: setup-mongodb

View File

@ -18,4 +18,4 @@ jobs:
with:
persist-credentials: false
- name: Run zizmor 🌈
uses: zizmorcore/zizmor-action@135698455da5c3b3e55f73f4419e481ab68cdd95 # v0.4.1
uses: zizmorcore/zizmor-action@71321a20a9ded102f6e9ce5718a2fcec2c4f70d8 # v0.5.2

5
.gitignore vendored
View File

@ -43,3 +43,8 @@ test/lambda/*.json
xunit-results/
coverage.xml
server.log
.coverage
# Rust build artifacts
target/
Cargo.lock

View File

@ -103,7 +103,8 @@ repos:
# - test/test_bson.py:267: isnt ==> isn't
# - test/versioned-api/crud-api-version-1-strict.json:514: nin ==> inn, min, bin, nine
# - test/test_client.py:188: te ==> the, be, we, to
args: ["-L", "fle,fo,infinit,isnt,nin,te,aks"]
# - README.md:534: crate ==> create (Rust terminology - a crate is a Rust package)
args: ["-L", "fle,fo,infinit,isnt,nin,te,aks,crate"]
- repo: local
hooks:

View File

@ -85,49 +85,53 @@ likelihood for getting review sooner shoots up.
- `versionadded:: 3.11`
- `versionchanged:: 3.5`
**Pull Request Template Breakdown**
### AI-Generated Contributions Policy
- **Github PR Title**
#### Our Stance
- The PR Title format should always be
`[JIRA-ID] : Jira Title or Blurb Summary`.
We only accept pull requests that are authored and submitted by human contributors who fully understand the changes they are proposing. Pull requests that are not clearly owned and understood by a human contributor may be closed. **All contributions must be submitted, reviewed, and understood by human contributors.**
- **JIRA LINK**
##### Why This Policy Exists
- Convenient link to the associated JIRA ticket.
At MongoDB, we understand the power and prevalence of AI tools in software development. With that being said, many MongoDB libraries are foundational tools used in production systems worldwide. The nature of these libraries requires:
- **Summary**
- **Deep domain expertise**: MongoDB's wire protocol, BSON specification, connection pooling, authentication mechanisms, and concurrency patterns require an understanding that AI alone cannot substantiate.
- Small blurb on why this is needed. The JIRA task should have
the more in-depth description, but this should still, at a
high level, give anyone looking an understanding of why the
PR has been checked in.
- **Long-term maintainability**: Contributors need to be able to explain *why* code is written a certain way, explain design decisions, and be available to iterate on their contributions.
- **Changes in this PR**
- **Security responsibility**: Authentication, credential handling, and TLS implementation cannot be left to probabilistic code generation.
- The explicit code changes that this PR is introducing. This
should be more specific than just the task name. (Unless the
task name is very clear).
##### What This Means for Contributors
- **Test Plan**
**Required:**
- Everything needs a test description. Describe what you did
to validate your changes actually worked; if you did
nothing, then document you did not test it. Aim to make
these steps reproducible by other engineers, specifically
with your primary reviewer in mind.
- Full understanding of every line of code you submit
- Ability to explain and defend your implementation choices
- Willingness to iterate and maintain your contributions
- **Screenshots**
**Encouraged:**
- Any images that provide more context to the PR. Usually,
these just coincide with the test plan.
- Using AI assistants as learning tools to understand concepts
- IDE autocomplete features that suggest standard patterns
- AI help for brainstorming approaches (but write the code yourself)
- Writing code using AI tools, reviewing each line and revising code as necessary.
- **Callouts or follow-up items**
**Not allowed:**
- This is a good place for identifying "to-dos" that you've
placed in the code (Must have an accompanying JIRA Ticket).
- Potential bugs that you are unsure how to test in the code.
- Opinions you want to receive about your code.
- Submitting PRs generated solely by AI tools
- Copy-pasting AI-generated code without full understanding
##### Disclosure
If you used AI assistance in any way during your contribution, please disclose what the AI assistant was used for in your PR description. We would love to know what tools developers have found useful in iterating in their day to day.
##### Questions?
If you're unsure whether your contribution complies with this policy, please ask for guidance within the scope of the PR and clarify any uncertainty. We're happy to guide contributors toward successful contributions.
---
*This policy helps us maintain the reliability, security, and trustworthiness that production applications depend on. Thank you for understanding and for contributing thoughtfully to PyMongo.*
## Running Linters
@ -197,7 +201,7 @@ the pages will re-render and the browser will automatically refresh.
version of Python, set `UV_PYTHON` before running `just install`.
- Ensure you have started the appropriate Mongo Server(s). You can run `just run-server` with optional args
to set up the server. All given options will be passed to
[`run-orchestration.sh`](https://github.com/mongodb-labs/drivers-evergreen-tools/blob/master/.evergreen/run-orchestration.sh). Run `$DRIVERS_TOOLS/.evergreen/run-orchestration.sh -h`
[`run-mongodb.sh`](https://github.com/mongodb-labs/drivers-evergreen-tools/blob/master/.evergreen/run-mongodb.sh). Run `$DRIVERS_TOOLS/.evergreen/run-mongodb.sh start -h`
for a full list of options.
- Run `just test` or `pytest` to run all of the tests.
- Append `test/<mod_name>.py::<class_name>::<test_name>` to run
@ -205,6 +209,7 @@ the pages will re-render and the browser will automatically refresh.
and the `<class_name>` to test a full module. For example:
`just test test/test_change_stream.py::TestUnifiedChangeStreamsErrors::test_change_stream_errors_on_ElectionInProgress`.
- Use the `-k` argument to select tests by pattern.
- Run `just test-coverage` to run tests with coverage and display a report. After running tests with coverage, use `just coverage-html` to generate an HTML report in `htmlcov/index.html`.
## Running tests that require secrets, services, or other configuration
@ -396,7 +401,7 @@ To run any of the test suites with minimum supported dependencies, pass `--test-
- If adding new tests files that should only be run for that test suite, add a pytest marker to the file and add
to the list of pytest markers in `pyproject.toml`. Then add the test suite to the `TEST_SUITE_MAP` in `.evergreen/scripts/utils.py`. If for some reason it is not a pytest-runnable test, add it to the list of `EXTRA_TESTS` instead.
- If the test uses Atlas or otherwise doesn't use `run-orchestration.sh`, add it to the `NO_RUN_ORCHESTRATION` list in
- If the test uses Atlas or otherwise doesn't use `run-mongodb.sh`, add it to the `NO_RUN_ORCHESTRATION` list in
`.evergreen/scripts/utils.py`.
- If there is something special required to run the local server or there is an extra flag that should always be set
like `AUTH`, add that logic to `.evergreen/scripts/run_server.py`.
@ -500,13 +505,20 @@ python3 ./.evergreen/scripts/resync-all-specs.py
Follow the [Python Driver Release Process Wiki](https://wiki.corp.mongodb.com/display/DRIVERS/Python+Driver+Release+Process).
## Asyncio considerations
## Project Structure and Asyncio Considerations
PyMongo adds asyncio capability by modifying the source files in `*/asynchronous` to `*/synchronous` using
[unasync](https://github.com/python-trio/unasync/) and some custom transforms.
This section describes the layout of the `pymongo/` package.
Where possible, edit the code in `*/asynchronous/*.py` and not the synchronous files.
You can run `pre-commit run --all-files synchro` before running tests if you are testing synchronous code.
Within `pymongo/`, the code is further divided into the `pymongo/asynchronous` and `pymongo/synchronous` subdirectories.
Files in `pymongo/synchronous` are generated from `pymongo/asynchronous` using the `synchro` pre-commit hook, which uses [unasync](https://github.com/python-trio/unasync/) and some custom transforms.
As a result, **all modifications** within `pymongo` must be made in either the top-level `pymongo` directory when they have to exhibit differing behavior between sync and async contexts or the `pymongo/asynchronous` directory, not `pymongo/synchronous`.
Any changes made directly to files in the `pymongo/synchronous` directory will be overwritten by the `synchro` hook when it is run, which happens automatically on commit.
Some top-level files (e.g. `pymongo/collection.py`) are re-export files for existing import compatibility and should not be modified directly.
The other top-level files (e.g. `pymongo/network_layer.py`, `pymongo/pool_shared.py`) contain either shared code used in both the asynchronous and synchronous APIs, or code that is very different between the two APIs and therefore cannot be generated from the async version using `synchro`.
Run `pre-commit run --all-files synchro` before running tests to generate the latest version of the synchronous code.
To prevent the `synchro` hook from accidentally overwriting code, it first checks to see whether a sync version
of a file is changing and not its async counterpart, and will fail.

View File

@ -4,6 +4,7 @@
[![Python Versions](https://img.shields.io/pypi/pyversions/pymongo)](https://pypi.org/project/pymongo)
[![Monthly Downloads](https://static.pepy.tech/badge/pymongo/month)](https://pepy.tech/project/pymongo)
[![API Documentation Status](https://readthedocs.org/projects/pymongo/badge/?version=stable)](http://pymongo.readthedocs.io/en/stable/api?badge=stable)
[![codecov](https://codecov.io/gh/mongodb/mongo-python-driver/graph/badge.svg?branch=master)](https://codecov.io/gh/mongodb/mongo-python-driver)
## About
@ -215,4 +216,4 @@ pip install -e ".[test]"
pytest
```
For more advanced testing scenarios, see the [contributing guide](./CONTRIBUTING.md#running-tests-locally).
For more advanced testing scenarios, see the [contributing guide](https://github.com/mongodb/mongo-python-driver/blob/master/CONTRIBUTING.md#running-tests-locally).

View File

@ -72,6 +72,7 @@ bytes [#bytes]_ binary both
from __future__ import annotations
import datetime
import importlib.util
import itertools
import os
import re
@ -143,12 +144,79 @@ if TYPE_CHECKING:
from bson.raw_bson import RawBSONDocument
from bson.typings import _DocumentType, _ReadableBuffer
try:
from bson import _cbson # type: ignore[attr-defined]
# Try to import C and Rust extensions
_cbson = None
_rbson = None
_HAS_C = False
_HAS_RUST = False
_USE_C = True
except ImportError:
_USE_C = False
# Use importlib to avoid circular import issues
_spec = None
try:
# Check if already loaded (e.g., when reloading bson module)
if "bson._cbson" in sys.modules:
_cbson = sys.modules["bson._cbson"]
if hasattr(_cbson, "_bson_to_dict"):
_HAS_C = True
else:
_spec = importlib.util.find_spec("bson._cbson")
if _spec and _spec.loader:
_cbson = importlib.util.module_from_spec(_spec)
_spec.loader.exec_module(_cbson)
if hasattr(_cbson, "_bson_to_dict"):
_HAS_C = True
else:
_cbson = None
except (ImportError, AttributeError):
pass
try:
# Check if already loaded (e.g., when reloading bson module)
if "bson._rbson" in sys.modules:
_rbson = sys.modules["bson._rbson"]
if hasattr(_rbson, "_bson_to_dict"):
_HAS_RUST = True
else:
_spec = importlib.util.find_spec("bson._rbson")
if _spec and _spec.loader:
_rbson = importlib.util.module_from_spec(_spec)
_spec.loader.exec_module(_rbson)
if hasattr(_rbson, "_bson_to_dict"):
_HAS_RUST = True
else:
_rbson = None
except (ImportError, AttributeError):
pass
# Clean up the spec variable to avoid polluting the module namespace
del _spec
# Determine which extension to use at runtime
# Priority: PYMONGO_USE_RUST env var > C extension (default) > pure Python
_USE_RUST_RUNTIME = os.environ.get("PYMONGO_USE_RUST", "").lower() in ("1", "true", "yes")
# Decide which extension to actually use
_USE_C = False
_USE_RUST = False
if _USE_RUST_RUNTIME:
if _HAS_RUST:
# User requested Rust and it's available - use Rust, not C
_USE_RUST = True
elif _HAS_C:
# User requested Rust but it's not available - warn and use C
import warnings
warnings.warn(
"PYMONGO_USE_RUST is set but Rust extension is not available. "
"Falling back to C extension.",
stacklevel=2,
)
_USE_C = True
else:
# User didn't request Rust - use C by default if available
if _HAS_C:
_USE_C = True
__all__ = [
"ALL_UUID_SUBTYPES",
@ -209,6 +277,8 @@ __all__ = [
"is_valid",
"BSON",
"has_c",
"has_rust",
"get_bson_implementation",
"DatetimeConversion",
"DatetimeMS",
]
@ -543,7 +613,7 @@ if _USE_C:
) -> Tuple[str, Any, int]:
return cast(
"Tuple[str, Any, int]",
_cbson._element_to_dict(data, position, obj_end, opts, raw_array),
_cbson._element_to_dict(data, position, obj_end, opts, raw_array), # type: ignore[union-attr]
)
else:
@ -634,8 +704,13 @@ def _bson_to_dict(data: Any, opts: CodecOptions[_DocumentType]) -> _DocumentType
raise InvalidBSON(str(exc_value)).with_traceback(exc_tb) from None
if _USE_C:
_bson_to_dict = _cbson._bson_to_dict
# Save reference to Python implementation before overriding
_bson_to_dict_python = _bson_to_dict
if _USE_RUST:
_bson_to_dict = _rbson._bson_to_dict # type: ignore[union-attr]
elif _USE_C:
_bson_to_dict = _cbson._bson_to_dict # type: ignore[union-attr]
_PACK_FLOAT = struct.Struct("<d").pack
@ -1017,8 +1092,10 @@ def _dict_to_bson(
return _PACK_INT(len(encoded) + 5) + encoded + b"\x00"
if _USE_C:
_dict_to_bson = _cbson._dict_to_bson
if _USE_RUST:
_dict_to_bson = _rbson._dict_to_bson # type: ignore[union-attr]
elif _USE_C:
_dict_to_bson = _cbson._dict_to_bson # type: ignore[union-attr]
_CODEC_OPTIONS_TYPE_ERROR = TypeError("codec_options must be an instance of CodecOptions")
@ -1130,7 +1207,7 @@ def _decode_all(data: _ReadableBuffer, opts: CodecOptions[_DocumentType]) -> lis
if _USE_C:
_decode_all = _cbson._decode_all
_decode_all = _cbson._decode_all # type: ignore[union-attr]
@overload
@ -1223,7 +1300,7 @@ def _array_of_documents_to_buffer(data: Union[memoryview, bytes]) -> bytes:
if _USE_C:
_array_of_documents_to_buffer = _cbson._array_of_documents_to_buffer
_array_of_documents_to_buffer = _cbson._array_of_documents_to_buffer # type: ignore[union-attr]
def _convert_raw_document_lists_to_streams(document: Any) -> None:
@ -1470,7 +1547,30 @@ class BSON(bytes):
def has_c() -> bool:
"""Is the C extension installed?"""
return _USE_C
return _HAS_C
def has_rust() -> bool:
"""Is the Rust extension installed?
.. versionadded:: 5.0
"""
return _HAS_RUST
def get_bson_implementation() -> str:
"""Get the name of the BSON implementation being used.
Returns one of: 'rust', 'c', or 'python'.
.. versionadded:: 5.0
"""
if _USE_RUST:
return "rust"
elif _USE_C:
return "c"
else:
return "python"
def _after_fork() -> None:

View File

@ -356,7 +356,8 @@ static PyObject* datetime_ms_from_millis(PyObject* self, long long millis){
if (!(ll_millis = PyLong_FromLongLong(millis))){
return NULL;
}
dt = PyObject_CallFunctionObjArgs(state->DatetimeMS, ll_millis, NULL);
PyObject* args[1] = {ll_millis};
dt = PyObject_Vectorcall(state->DatetimeMS, args, 1, NULL);
Py_DECREF(ll_millis);
return dt;
}
@ -401,7 +402,9 @@ static PyObject* decode_datetime(PyObject* self, long long millis, const codec_o
int64_t min_millis_offset = 0;
int64_t max_millis_offset = 0;
if (options->tz_aware && options->tzinfo && options->tzinfo != Py_None) {
PyObject* utcoffset = PyObject_CallMethodObjArgs(options->tzinfo, state->_utcoffset_str, state->min_datetime, NULL);
PyObject* utcoffset_args[2] = {options->tzinfo, state->min_datetime};
PyObject* utcoffset = PyObject_VectorcallMethod(
state->_utcoffset_str, utcoffset_args, 2, NULL);
if (utcoffset == NULL) {
return 0;
}
@ -420,7 +423,9 @@ static PyObject* decode_datetime(PyObject* self, long long millis, const codec_o
(PyDateTime_DELTA_GET_MICROSECONDS(utcoffset) / 1000);
}
Py_DECREF(utcoffset);
utcoffset = PyObject_CallMethodObjArgs(options->tzinfo, state->_utcoffset_str, state->max_datetime, NULL);
utcoffset_args[1] = state->max_datetime;
utcoffset = PyObject_VectorcallMethod(
state->_utcoffset_str, utcoffset_args, 2, NULL);
if (utcoffset == NULL) {
return 0;
}
@ -481,7 +486,9 @@ static PyObject* decode_datetime(PyObject* self, long long millis, const codec_o
/* convert to local time */
if (options->tzinfo != Py_None) {
PyObject* temp = PyObject_CallMethodObjArgs(value, state->_astimezone_str, options->tzinfo, NULL);
PyObject* astimezone_args[2] = {value, options->tzinfo};
PyObject* temp = PyObject_VectorcallMethod(
state->_astimezone_str, astimezone_args, 2, NULL);
Py_DECREF(value);
value = temp;
}
@ -688,7 +695,8 @@ static int _load_python_objects(PyObject* module) {
return 1;
}
compiled = PyObject_CallFunction(re_compile, "O", empty_string);
PyObject* compile_args[1] = {empty_string};
compiled = PyObject_Vectorcall(re_compile, compile_args, 1, NULL);
Py_DECREF(re_compile);
if (compiled == NULL) {
state->REType = NULL;
@ -711,13 +719,19 @@ static long _type_marker(PyObject* object, PyObject* _type_marker_str) {
PyObject* type_marker = NULL;
long type = 0;
if (PyObject_HasAttr(object, _type_marker_str)) {
type_marker = PyObject_GetAttr(object, _type_marker_str);
if (type_marker == NULL) {
#if PY_VERSION_HEX >= 0x030D0000
// 3.13
if (PyObject_GetOptionalAttr(object, _type_marker_str, &type_marker) == -1) {
return -1;
}
}
# else
if (PyObject_HasAttr(object, _type_marker_str)) {
type_marker = PyObject_GetAttr(object, _type_marker_str);
if (type_marker == NULL) {
return -1;
}
}
#endif
/*
* Python objects with broken __getattr__ implementations could return
* arbitrary types for a call to PyObject_GetAttrString. For example
@ -814,6 +828,7 @@ int convert_codec_options(PyObject* self, PyObject* options_obj, codec_options_t
}
options->is_raw_bson = (101 == type_marker);
options->is_dict_class = (options->document_class == (PyObject*)&PyDict_Type);
options->options_obj = options_obj;
Py_INCREF(options->options_obj);
@ -1013,10 +1028,20 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer,
}
/*
* Use _type_marker attribute instead of PyObject_IsInstance for better perf.
*
* Skip _type_marker lookup for common built-in types
* that we know don't have a _type_marker attribute. This avoids the overhead
* of PyObject_HasAttr/PyObject_GetAttr calls for the most common cases.
*/
type = _type_marker(value, state->_type_marker_str);
if (type < 0) {
return 0;
if (PyUnicode_CheckExact(value) || PyLong_CheckExact(value) || PyFloat_CheckExact(value) ||
PyBool_Check(value) || PyDict_CheckExact(value) || PyList_CheckExact(value) ||
PyTuple_CheckExact(value) || PyBytes_CheckExact(value) || value == Py_None) {
type = 0;
} else {
type = _type_marker(value, state->_type_marker_str);
if (type < 0) {
return 0;
}
}
switch (type) {
@ -1227,7 +1252,9 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer,
case 100:
{
/* DBRef */
PyObject* as_doc = PyObject_CallMethodObjArgs(value, state->_as_doc_str, NULL);
PyObject* as_doc_args[1] = {value};
PyObject* as_doc = PyObject_VectorcallMethod(
state->_as_doc_str, as_doc_args, 1, NULL);
if (!as_doc) {
return 0;
}
@ -1383,7 +1410,9 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer,
return write_unicode(buffer, value);
} else if (PyDateTime_Check(value)) {
long long millis;
PyObject* utcoffset = PyObject_CallMethodObjArgs(value, state->_utcoffset_str , NULL);
PyObject* utcoffset_args[1] = {value};
PyObject* utcoffset = PyObject_VectorcallMethod(
state->_utcoffset_str, utcoffset_args, 1, NULL);
if (utcoffset == NULL)
return 0;
if (utcoffset != Py_None) {
@ -1422,7 +1451,9 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer,
if (!(uuid_rep_obj = PyLong_FromLong(options->uuid_rep))) {
return 0;
}
binary_value = PyObject_CallMethodObjArgs(state->Binary, state->_from_uuid_str, value, uuid_rep_obj, NULL);
PyObject* from_uuid_args[3] = {state->Binary, value, uuid_rep_obj};
binary_value = PyObject_VectorcallMethod(
state->_from_uuid_str, from_uuid_args, 3, NULL);
Py_DECREF(uuid_rep_obj);
if (binary_value == NULL) {
@ -1452,7 +1483,8 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer,
if (converter != NULL) {
/* Transform types that have a registered converter.
* A new reference is created upon transformation. */
new_value = PyObject_CallFunctionObjArgs(converter, value, NULL);
PyObject* converter_args[1] = {value};
new_value = PyObject_Vectorcall(converter, converter_args, 1, NULL);
if (new_value == NULL) {
return 0;
}
@ -1466,8 +1498,9 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer,
/* Try the fallback encoder if one is provided and we have not already
* attempted to use the fallback encoder. */
if (!in_fallback_call && options->type_registry.has_fallback_encoder) {
new_value = PyObject_CallFunctionObjArgs(
options->type_registry.fallback_encoder, value, NULL);
PyObject* fallback_args[1] = {value};
new_value = PyObject_Vectorcall(
options->type_registry.fallback_encoder, fallback_args, 1, NULL);
if (new_value == NULL) {
// propagate any exception raised by the callback
return 0;
@ -1668,7 +1701,8 @@ void handle_invalid_doc_error(PyObject* dict) {
goto cleanup;
}
// Add doc to the error instance as a property.
new_evalue = PyObject_CallFunctionObjArgs(InvalidDocument, new_msg, dict, NULL);
PyObject* exc_args[2] = {new_msg, dict};
new_evalue = PyObject_Vectorcall(InvalidDocument, exc_args, 2, NULL);
Py_DECREF(evalue);
Py_DECREF(etype);
etype = InvalidDocument;
@ -1944,7 +1978,8 @@ static PyObject *_dbref_hook(PyObject* self, PyObject* value) {
PyMapping_DelItem(value, state->_dollar_db_str);
}
ret = PyObject_CallFunctionObjArgs(state->DBRef, ref, id, database, value, NULL);
PyObject* dbref_args[4] = {ref, id, database, value};
ret = PyObject_Vectorcall(state->DBRef, dbref_args, 4, NULL);
Py_DECREF(value);
} else {
ret = value;
@ -2160,7 +2195,13 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer,
goto uuiderror;
}
binary_value = PyObject_CallFunction(state->Binary, "(Oi)", data, subtype);
PyObject* subtype_obj = PyLong_FromLong(subtype);
if (!subtype_obj) {
goto uuiderror;
}
PyObject* binary_args[2] = {data, subtype_obj};
binary_value = PyObject_Vectorcall(state->Binary, binary_args, 2, NULL);
Py_DECREF(subtype_obj);
if (binary_value == NULL) {
goto uuiderror;
}
@ -2175,7 +2216,9 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer,
if (!uuid_rep_obj) {
goto uuiderror;
}
value = PyObject_CallMethodObjArgs(binary_value, state->_as_uuid_str, uuid_rep_obj, NULL);
PyObject* as_uuid_args[2] = {binary_value, uuid_rep_obj};
value = PyObject_VectorcallMethod(
state->_as_uuid_str, as_uuid_args, 2, NULL);
Py_DECREF(uuid_rep_obj);
}
@ -2194,7 +2237,8 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer,
Py_DECREF(data);
goto invalid;
}
value = PyObject_CallFunctionObjArgs(state->Binary, data, st, NULL);
PyObject* binary_args[2] = {data, st};
value = PyObject_Vectorcall(state->Binary, binary_args, 2, NULL);
Py_DECREF(st);
Py_DECREF(data);
if (!value) {
@ -2215,7 +2259,13 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer,
if (max < 12) {
goto invalid;
}
value = PyObject_CallFunction(state->ObjectId, "y#", buffer + *position, (Py_ssize_t)12);
PyObject* oid_bytes = PyBytes_FromStringAndSize(buffer + *position, 12);
if (!oid_bytes) {
goto invalid;
}
PyObject* oid_args[1] = {oid_bytes};
value = PyObject_Vectorcall(state->ObjectId, oid_args, 1, NULL);
Py_DECREF(oid_bytes);
*position += 12;
break;
}
@ -2294,7 +2344,14 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer,
}
*position += (unsigned)flags_length + 1;
value = PyObject_CallFunction(state->Regex, "Oi", pattern, flags);
PyObject* flags_obj = PyLong_FromLong(flags);
if (!flags_obj) {
Py_DECREF(pattern);
goto invalid;
}
PyObject* regex_args[2] = {pattern, flags_obj};
value = PyObject_Vectorcall(state->Regex, regex_args, 2, NULL);
Py_DECREF(flags_obj);
Py_DECREF(pattern);
break;
}
@ -2327,13 +2384,21 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer,
}
*position += coll_length;
id = PyObject_CallFunction(state->ObjectId, "y#", buffer + *position, (Py_ssize_t)12);
PyObject* oid_bytes = PyBytes_FromStringAndSize(buffer + *position, 12);
if (!oid_bytes) {
Py_DECREF(collection);
goto invalid;
}
PyObject* oid_args[1] = {oid_bytes};
id = PyObject_Vectorcall(state->ObjectId, oid_args, 1, NULL);
Py_DECREF(oid_bytes);
if (!id) {
Py_DECREF(collection);
goto invalid;
}
*position += 12;
value = PyObject_CallFunctionObjArgs(state->DBRef, collection, id, NULL);
PyObject* dbref_args[2] = {collection, id};
value = PyObject_Vectorcall(state->DBRef, dbref_args, 2, NULL);
Py_DECREF(collection);
Py_DECREF(id);
break;
@ -2363,7 +2428,8 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer,
goto invalid;
}
*position += value_length;
value = PyObject_CallFunctionObjArgs(state->Code, code, NULL, NULL);
PyObject* code_args[1] = {code};
value = PyObject_Vectorcall(state->Code, code_args, 1, NULL);
Py_DECREF(code);
break;
}
@ -2429,7 +2495,8 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer,
}
*position += scope_size;
value = PyObject_CallFunctionObjArgs(state->Code, code, scope, NULL);
PyObject* code_scope_args[2] = {code, scope};
value = PyObject_Vectorcall(state->Code, code_scope_args, 2, NULL);
Py_DECREF(code);
Py_DECREF(scope);
break;
@ -2459,7 +2526,19 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer,
memcpy(&time, buffer + *position + 4, 4);
inc = BSON_UINT32_FROM_LE(inc);
time = BSON_UINT32_FROM_LE(time);
value = PyObject_CallFunction(state->Timestamp, "II", time, inc);
PyObject* time_obj = PyLong_FromUnsignedLong(time);
if (!time_obj) {
goto invalid;
}
PyObject* inc_obj = PyLong_FromUnsignedLong(inc);
if (!inc_obj) {
Py_DECREF(time_obj);
goto invalid;
}
PyObject* ts_args[2] = {time_obj, inc_obj};
value = PyObject_Vectorcall(state->Timestamp, ts_args, 2, NULL);
Py_DECREF(time_obj);
Py_DECREF(inc_obj);
*position += 8;
break;
}
@ -2471,7 +2550,13 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer,
}
memcpy(&ll, buffer + *position, 8);
ll = (int64_t)BSON_UINT64_FROM_LE(ll);
value = PyObject_CallFunction(state->BSONInt64, "L", ll);
PyObject* ll_obj = PyLong_FromLongLong(ll);
if (!ll_obj) {
goto invalid;
}
PyObject* int64_args[1] = {ll_obj};
value = PyObject_Vectorcall(state->BSONInt64, int64_args, 1, NULL);
Py_DECREF(ll_obj);
*position += 8;
break;
}
@ -2484,19 +2569,21 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer,
if (!_bytes_obj) {
goto invalid;
}
value = PyObject_CallMethodObjArgs(state->Decimal128, state->_from_bid_str, _bytes_obj, NULL);
PyObject* dec128_args[2] = {state->Decimal128, _bytes_obj};
value = PyObject_VectorcallMethod(
state->_from_bid_str, dec128_args, 2, NULL);
Py_DECREF(_bytes_obj);
*position += 16;
break;
}
case 255:
{
value = PyObject_CallFunctionObjArgs(state->MinKey, NULL);
value = PyObject_Vectorcall(state->MinKey, NULL, 0, NULL);
break;
}
case 127:
{
value = PyObject_CallFunctionObjArgs(state->MaxKey, NULL);
value = PyObject_Vectorcall(state->MaxKey, NULL, 0, NULL);
break;
}
default:
@ -2548,7 +2635,8 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer,
}
converter = PyDict_GetItem(options->type_registry.decoder_map, value_type);
if (converter != NULL) {
PyObject* new_value = PyObject_CallFunctionObjArgs(converter, value, NULL);
PyObject* converter_args[1] = {value};
PyObject* new_value = PyObject_Vectorcall(converter, converter_args, 1, NULL);
Py_DECREF(value_type);
Py_DECREF(value);
return new_value;
@ -2716,11 +2804,20 @@ static PyObject* _elements_to_dict(PyObject* self, const char* string,
unsigned max,
const codec_options_t* options) {
unsigned position = 0;
PyObject* dict = PyObject_CallObject(options->document_class, NULL);
PyObject* dict;
int raw_array = 0;
/* Use PyDict_New() directly when document_class is dict.
* This avoids the overhead of PyObject_CallObject() for the common case. */
if (options->is_dict_class) {
dict = PyDict_New();
} else {
dict = PyObject_CallObject(options->document_class, NULL);
}
if (!dict) {
return NULL;
}
int raw_array = 0;
while (position < max) {
PyObject* name = NULL;
PyObject* value = NULL;
@ -2735,7 +2832,24 @@ static PyObject* _elements_to_dict(PyObject* self, const char* string,
position = (unsigned)new_position;
}
PyObject_SetItem(dict, name, value);
/* Use PyDict_SetItem() when document_class is dict.
* PyDict_SetItem() is faster than PyObject_SetItem() because it
* avoids method lookup overhead. */
if (options->is_dict_class) {
if (PyDict_SetItem(dict, name, value) < 0) {
Py_DECREF(name);
Py_DECREF(value);
Py_DECREF(dict);
return NULL;
}
} else {
if (PyObject_SetItem(dict, name, value) < 0) {
Py_DECREF(name);
Py_DECREF(value);
Py_DECREF(dict);
return NULL;
}
}
Py_DECREF(name);
Py_DECREF(value);
}
@ -2747,9 +2861,14 @@ static PyObject* elements_to_dict(PyObject* self, const char* string,
const codec_options_t* options) {
PyObject* result;
if (options->is_raw_bson) {
return PyObject_CallFunction(
options->document_class, "y#O",
string, max, options->options_obj);
PyObject* bson_bytes = PyBytes_FromStringAndSize(string, max);
if (!bson_bytes) {
return NULL;
}
PyObject* raw_args[2] = {bson_bytes, options->options_obj};
result = PyObject_Vectorcall(options->document_class, raw_args, 2, NULL);
Py_DECREF(bson_bytes);
return result;
}
if (Py_EnterRecursiveCall(" while decoding a BSON document"))
return NULL;

View File

@ -72,6 +72,7 @@ typedef struct codec_options_t {
unsigned char datetime_conversion;
PyObject* options_obj;
unsigned char is_raw_bson;
unsigned char is_dict_class;
} codec_options_t;
/* C API functions */

20
bson/_rbson/Cargo.toml Normal file
View File

@ -0,0 +1,20 @@
[package]
name = "bson-rbson"
version = "0.1.0"
edition = "2021"
[lib]
name = "_rbson"
crate-type = ["cdylib"]
[dependencies]
pyo3 = { version = "0.23", features = ["extension-module", "abi3-py39"] }
bson = "2.13"
serde = "1.0"
once_cell = "1.20"
[profile.release]
opt-level = 3
lto = true
codegen-units = 1
strip = true

441
bson/_rbson/README.md Normal file
View File

@ -0,0 +1,441 @@
# Rust BSON Extension Module
⚠️ **NOT PRODUCTION READY** - This is an experimental implementation with incomplete feature support and performance limitations. See [Test Status](#test-status) and [Performance Analysis](#performance-analysis) sections below.
This directory contains a Rust-based implementation of BSON encoding/decoding for PyMongo, developed as part of [PYTHON-5683](https://jira.mongodb.org/browse/PYTHON-5683).
## Overview
The Rust extension (`_rbson`) provides a **partial implementation** of the C extension (`_cbson`) interface, implemented in Rust using:
- **PyO3**: Python bindings for Rust
- **bson crate**: MongoDB's official Rust BSON library
- **Maturin**: Build tool for Rust Python extensions
## Test Status
### ✅ Core BSON Tests: 86 passed, 2 skipped
The basic BSON encoding/decoding functionality works correctly (`test/test_bson.py`).
### ⏭️ Skipped Tests: ~85 tests across multiple test files
The following features are **not implemented** and tests are skipped when using the Rust extension:
#### Custom Type Encoders (test/test_custom_types.py)
- **`TypeEncoder` and `TypeRegistry`** - Custom type encoding/decoding
- **`FallbackEncoder`** - Fallback encoding for unknown types
- **Tests skipped**: All tests in `TestBSONFallbackEncoder`, `TestCustomPythonBSONTypeToBSONMonolithicCodec`, `TestCustomPythonBSONTypeToBSONMultiplexedCodec`
- **Reason**: Rust extension doesn't support custom type encoders or fallback encoders
#### RawBSONDocument (test/test_raw_bson.py)
- **`RawBSONDocument` codec options** - Raw BSON document handling
- **Tests skipped**: All tests in `TestRawBSONDocument`
- **Reason**: Rust extension doesn't implement RawBSONDocument codec options
#### DBRef Edge Cases (test/test_dbref.py)
- **DBRef validation and edge cases**
- **Tests skipped**: Some DBRef tests
- **Reason**: Incomplete DBRef handling in Rust extension
#### Type Checking (test/test_typing.py)
- **Type hints and mypy validation**
- **Tests skipped**: Some typing tests
- **Reason**: Type checking issues with Rust extension
### Skip Mechanism
Tests are skipped using the `@skip_if_rust_bson` pytest marker defined in `test/__init__.py`:
```python
skip_if_rust_bson = pytest.mark.skipif(
_use_rust_bson(), reason="Rust BSON extension does not support this feature"
)
```
This marker is applied to test classes and methods that use unimplemented features.
## Implementation History
This implementation was developed through [PR #2695](https://github.com/mongodb/mongo-python-driver/pull/2695) to investigate using Rust as an alternative to C for Python extension modules.
### Key Milestones
1. **Initial Implementation** - Basic BSON type support with core functionality
2. **Performance Optimizations** - Type caching, fast paths for common types, direct byte operations
3. **Modular Refactoring** - Split monolithic lib.rs into 6 well-organized modules
4. **Test Integration** - Added skip markers for unimplemented features (~85 tests skipped)
## Features
### Supported BSON Types
The Rust extension supports basic BSON types:
- **Primitives**: Double, String, Int32, Int64, Boolean, Null
- **Complex Types**: Document, Array, Binary, ObjectId, DateTime
- **Special Types**: Regex, Code, Timestamp, Decimal128, MinKey, MaxKey
- **Deprecated Types**: DBPointer (decodes to DBRef)
### CodecOptions Support
**Partial** support for PyMongo's `CodecOptions`:
- ✅ `document_class` - Custom document classes (basic support)
- ✅ `tz_aware` - Timezone-aware datetime handling
- ✅ `tzinfo` - Timezone conversion
- ✅ `uuid_representation` - UUID encoding/decoding modes
- ✅ `datetime_conversion` - DateTime handling modes (AUTO, CLAMP, MS)
- ✅ `unicode_decode_error_handler` - UTF-8 error handling
- ❌ `type_registry` - Custom type encoders/decoders (NOT IMPLEMENTED)
- ❌ RawBSONDocument support (NOT IMPLEMENTED)
### Runtime Selection
The Rust extension can be enabled via environment variable:
```bash
export PYMONGO_USE_RUST=1
python your_script.py
```
Without this variable, PyMongo uses the C extension by default.
## Performance Analysis
### Current Performance: ~0.21x (5x slower than C)
**Benchmark Results** (from PR #2695):
```
Simple documents: C: 100% | Rust: 21%
Mixed types: C: 100% | Rust: 20%
Nested documents: C: 100% | Rust: 18%
Lists: C: 100% | Rust: 22%
```
### Root Cause: Architectural Difference
The performance gap is due to a fundamental architectural difference:
**C Extension Architecture:**
```
Python objects → BSON bytes (direct)
```
- Writes BSON bytes directly from Python objects
- No intermediate data structures
- Minimal memory allocations
**Rust Extension Architecture:**
```
Python objects → Rust Bson enum → BSON bytes
```
- Converts Python objects to Rust `Bson` enum
- Then serializes `Bson` to bytes
- Extra conversion layer adds overhead
### Optimization Attempts
Multiple optimization strategies were attempted in PR #2695:
1. **Type Caching** - Cache frequently used Python types (UUID, datetime, etc.)
2. **Fast Paths** - Special handling for common types (int, str, bool, None)
3. **Direct Byte Writing** - Write BSON bytes directly without intermediate `Document`
4. **PyDict Fast Path** - Use `PyDict_Next` for efficient dict iteration
**Result**: These optimizations improved performance from ~0.15x to ~0.21x, but the fundamental architectural difference remains.
## Comparison with Copilot POC (PR #2689)
The current implementation evolved significantly from the initial Copilot-generated proof-of-concept in PR #2689:
### Copilot POC (PR #2689) - Initial Spike
**Status**: 53/88 tests passing (60%)
**Build System**: `cargo build --release` (manual copy of .so file)
- Used raw `cargo` commands
- Manual file copying to project root
- No wheel generation
- Located in `rust/` directory
**What it had:**
- ✅ Basic BSON type support (int, float, string, bool, bytes, dict, list, null)
- ✅ ObjectId, DateTime, Regex encoding/decoding
- ✅ Binary, Code, Timestamp, Decimal128, MinKey, MaxKey support
- ✅ DBRef and DBPointer decoding
- ✅ Int64 type marker support
- ✅ Basic CodecOptions (tz_aware, uuid_representation)
- ✅ Buffer protocol support (memoryview, array)
- ✅ _id field ordering at top level
- ✅ Benchmark scripts and performance analysis
- ✅ Comprehensive documentation (RUST_SPIKE_RESULTS.md)
- ✅ **Same Rust architecture**: PyO3 0.27 + bson 2.13 crate (Python → Bson enum → bytes)
**What it lacked:**
- ❌ Only 60% test pass rate (53/88 tests)
- ❌ Incomplete datetime handling (no DATETIME_CLAMP, DATETIME_AUTO, DATETIME_MS modes)
- ❌ Missing unicode_decode_error_handler support
- ❌ No document_class support from CodecOptions
- ❌ No tzinfo conversion support
- ❌ Missing BSON validation (size checks, null terminator)
- ❌ No performance optimizations (type caching, fast paths)
- ❌ Located in `rust/` directory instead of `bson/_rbson/`
**Performance Claims**: 2.89x average speedup over C (from benchmarks in POC)
**Why the POC appeared faster:**
The Copilot POC's claimed 2.89x speedup was likely due to:
1. **Limited test scope** - Benchmarks only tested simple documents that passed (53/88 tests)
2. **Missing validation** - No BSON size checks, null terminator validation, or extra bytes detection
3. **Incomplete CodecOptions** - Skipped expensive operations like:
- Timezone conversions (`tzinfo` with `astimezone()`)
- DateTime mode handling (CLAMP, AUTO, MS)
- Unicode error handler fallbacks to Python
- Custom document_class instantiation
4. **Optimistic measurements** - May have measured only the fast path without edge cases
5. **Different test methodology** - POC used custom benchmarks vs production testing with full PyMongo test suite
When these missing features were added to achieve 100% compatibility, the true performance cost of the Rust `Bson` enum architecture became apparent.
### Current Implementation (PR #2695) - Experimental
**Status**: 86/88 core BSON tests passing, ~85 feature tests skipped
**Build System**: `maturin build --release` (proper wheel generation)
- Uses Maturin for proper Python packaging
- Generates wheels with correct metadata
- Extracts .so file to `bson/` directory
- Located in `bson/_rbson/` directory (proper module structure)
**Improvements over Copilot POC:**
- ✅ **Core BSON functionality** (86/88 tests passing in test_bson.py)
- ✅ **Basic CodecOptions support**:
- `document_class` - Custom document classes (basic support)
- `tzinfo` - Timezone conversion with astimezone()
- `datetime_conversion` - All modes (AUTO, CLAMP, MS)
- `unicode_decode_error_handler` - Fallback to Python for non-strict handlers
- ✅ **BSON validation** (size checks, null terminator, extra bytes detection)
- ✅ **Performance optimizations**:
- Type caching (UUID, datetime, Pattern, etc.)
- Fast paths for common types (int, str, bool, None)
- Direct byte operations where possible
- PyDict fast path with pre-allocation
- ✅ **Modular code structure** (6 well-organized Rust modules)
- ✅ **Proper module structure** (`bson/_rbson/` with build.sh and maturin)
- ✅ **Runtime selection** via PYMONGO_USE_RUST environment variable
- ✅ **Test skip markers** for unimplemented features
- ✅ **Same Rust architecture**: PyO3 0.23 + bson 2.13 crate (Python → Bson enum → bytes)
**Missing Features** (see [Test Status](#test-status)):
- ❌ **Custom type encoders** (`TypeEncoder`, `TypeRegistry`, `FallbackEncoder`)
- ❌ **RawBSONDocument** codec options
- ❌ **Some DBRef edge cases**
- ❌ **Complete type checking support**
**Performance Reality**: ~0.21x (5x slower than C) - see Performance Analysis section
**Key Insights**:
1. **Same Architecture, Different Results**: Both implementations use the same Rust architecture (PyO3 + bson crate with intermediate `Bson` enum), so the build system (cargo vs maturin) is not the cause of the performance difference.
2. **Incomplete Implementation**: The current implementation has ~85 tests skipped due to unimplemented features (custom type encoders, RawBSONDocument, etc.). This is an experimental implementation, not production-ready.
3. **The Fundamental Issue**: The Rust architecture (Python → Bson enum → bytes) has inherent performance limitations compared to the C extension's direct byte-writing approach.
## Direct Byte-Writing Performance Results
### Implementation: `_dict_to_bson_direct()`
A new implementation has been added that writes BSON bytes directly from Python objects without converting to `Bson` enum types first. This eliminates the intermediate conversion layer.
**Architecture Comparison:**
```
Regular: Python objects → Rust Bson enum → BSON bytes
Direct: Python objects → BSON bytes (no intermediate types)
```
### Benchmark Results
Comprehensive benchmarks on realistic document types show **consistent 2x speedup**:
| Document Type | Regular (ops/sec) | Direct (ops/sec) | Speedup |
|--------------|-------------------|------------------|---------|
| User Profile | 99,970 | 208,658 | **2.09x** |
| E-commerce Order | 93,578 | 165,636 | **1.77x** |
| IoT Sensor Data | 136,824 | 312,058 | **2.28x** |
| Blog Post | 65,782 | 134,154 | **2.04x** |
**Average Speedup: 2.04x** (range: 1.77x - 2.28x)
### Performance by Document Composition
| Document Type | Regular (ops/sec) | Direct (ops/sec) | Speedup |
|--------------|-------------------|------------------|---------|
| Simple types (int, str, float, bool, None) | 177,588 | 800,670 | **4.51x** |
| Mixed types | 223,856 | 342,305 | **1.53x** |
| Nested documents | 130,884 | 287,758 | **2.20x** |
| BSON-specific types only | 342,059 | 304,844 | 0.89x |
### Key Findings
1. **Massive speedup for simple types**: 4.51x faster for documents with Python native types
2. **Consistent 2x improvement for real-world documents**: All realistic mixed-type documents show 1.77x - 2.28x speedup
3. **Slight slowdown for pure BSON types**: Documents with only BSON-specific types (ObjectId, Binary, etc.) are 10% slower due to extra Python attribute lookups
4. **100% correctness**: All outputs verified to be byte-identical to the regular implementation
### Why Direct Byte-Writing is Faster
1. **Eliminates heap allocations**: No need to create intermediate `Bson` enum values
2. **Reduces function call overhead**: Writes bytes immediately instead of going through `python_to_bson()``write_bson_value()`
3. **Better for common types**: Python's native types (int, str, float, bool) can be written directly without any conversion
### Implementation Details
The direct approach is implemented in these functions:
- `_dict_to_bson_direct()` - Public API function
- `write_document_bytes_direct()` - Writes document structure directly
- `write_element_direct()` - Writes individual elements without Bson conversion
- `write_bson_type_direct()` - Handles BSON-specific types directly
### Usage
```python
from bson import _rbson
from bson.codec_options import DEFAULT_CODEC_OPTIONS
# Use direct byte-writing approach
doc = {"name": "John", "age": 30, "score": 95.5}
bson_bytes = _rbson._dict_to_bson_direct(doc, False, DEFAULT_CODEC_OPTIONS)
```
## Steps to Achieve Performance Parity with C Extensions
Based on the analysis in PR #2695 and the direct byte-writing results, here are the steps needed to match C extension performance:
### 1. ✅ Eliminate Intermediate Bson Enum (High Impact) - COMPLETED
**Current**: Python → Bson → bytes
**Target**: Python → bytes (direct)
**Status**: ✅ **Implemented as `_dict_to_bson_direct()`**
**Actual Impact**: **2.04x average speedup** on realistic documents (range: 1.77x - 2.28x)
This brings the Rust extension from ~0.21x (5x slower than C) to **~0.43x (2.3x slower than C)** - a significant improvement!
### 2. Optimize Python API Calls (Medium Impact)
- Reduce `getattr()` calls by caching attribute lookups
- Use `PyDict_GetItem` instead of `dict.get_item()`
- Minimize Python exception handling overhead
- Use `PyTuple_GET_ITEM` for tuple access
**Estimated Impact**: 1.2-1.5x performance improvement
### 3. Memory Allocation Optimization (Low-Medium Impact)
- Pre-allocate buffers based on estimated document size
- Reuse buffers across multiple encode operations
- Use arena allocation for temporary objects
**Estimated Impact**: 1.1-1.3x performance improvement
### 4. SIMD Optimizations (Low Impact)
- Use SIMD for byte copying operations
- Vectorize validation checks
- Optimize string encoding/decoding
**Estimated Impact**: 1.05-1.1x performance improvement
### Combined Potential (Updated with Direct Byte-Writing Results)
With direct byte-writing implemented:
- **Before**: 0.21x (5x slower than C)
- **After direct byte-writing**: 0.43x (2.3x slower than C) ✅
- **With all optimizations**: 0.43x × 1.3 × 1.2 × 1.05 = **~0.71x** (1.4x slower than C)
- **Optimistic target**: Could potentially reach **~0.9x - 1.0x** (parity with C)
The direct byte-writing approach has already delivered the largest performance gain (2x). Additional optimizations could close the remaining gap to C extension performance.
## Building
```bash
cd bson/_rbson
./build.sh
```
Or using maturin directly:
```bash
maturin develop --release
```
## Testing
Run the core BSON test suite with the Rust extension:
```bash
PYMONGO_USE_RUST=1 python -m pytest test/test_bson.py -v
# Expected: 86 passed, 2 skipped
```
Run all tests (including skipped tests):
```bash
PYMONGO_USE_RUST=1 python -m pytest test/ -v
# Expected: Many tests passed, ~85 tests skipped due to unimplemented features
```
Run performance benchmarks:
```bash
# Quick benchmark run
FASTBENCH=1 python test/performance/perf_test.py -v
# With Rust extension enabled
PYMONGO_USE_RUST=1 FASTBENCH=1 python test/performance/perf_test.py -v
# Full benchmark setup (see test/performance/perf_test.py for details)
python -m pip install simplejson
git clone --depth 1 https://github.com/mongodb/specifications.git
cd specifications/source/benchmarking/data
tar xf extended_bson.tgz
tar xf parallel.tgz
tar xf single_and_multi_document.tgz
cd -
export TEST_PATH="specifications/source/benchmarking/data"
export OUTPUT_FILE="results.json"
python test/performance/perf_test.py -v
```
## Module Structure
The Rust codebase is organized into 6 well-structured modules (refactored from a single 3,117-line file):
- **`lib.rs`** (76 lines) - Module exports and public API
- **`types.rs`** (266 lines) - Type cache and BSON type markers
- **`errors.rs`** (56 lines) - Error handling utilities
- **`utils.rs`** (154 lines) - Utility functions (datetime, regex, validation)
- **`encode.rs`** (1,545 lines) - BSON encoding functions
- **`decode.rs`** (1,141 lines) - BSON decoding functions
This modular structure improves:
- Code organization and maintainability
- Compilation times (parallel module compilation)
- Code navigation and testing
- Clear separation of concerns
## Conclusion
The Rust extension demonstrates that:
1. ✅ **Rust can provide basic BSON encoding/decoding functionality**
2. ❌ **Complete feature parity with C extension is not achieved** (~85 tests skipped)
3. ❌ **Performance parity with C requires bypassing the `bson` crate**
4. ❌ **The engineering effort may not justify the benefits**
### Recommendation
⚠️ **NOT PRODUCTION READY** - The Rust extension is **experimental** and has significant limitations:
**Missing Features:**
- Custom type encoders (`TypeEncoder`, `TypeRegistry`, `FallbackEncoder`)
- RawBSONDocument codec options
- Some DBRef edge cases
- Complete type checking support
**Performance Issues:**
- ~5x slower than C extension (0.21x performance)
- Even with direct byte-writing optimizations, still ~2.3x slower (0.43x performance)
**Use Cases for Rust Extension:**
- **Experimental/research purposes only**
- Testing Rust-Python interop with PyO3
- Platforms where C compilation is difficult (with caveats about missing features)
- Future exploration if `bson` crate performance improves
**For production use, the C extension (`_cbson`) is strongly recommended.**
For more details, see:
- [PYTHON-5683 JIRA ticket](https://jira.mongodb.org/browse/PYTHON-5683)
- [PR #2695](https://github.com/mongodb/mongo-python-driver/pull/2695)

84
bson/_rbson/build.sh Executable file
View File

@ -0,0 +1,84 @@
#!/bin/bash
# Build script for Rust BSON extension POC
#
# This script builds the Rust extension and makes it available for testing
# alongside the existing C extension.
set -eu
HERE=$(dirname ${BASH_SOURCE:-$0})
HERE="$( cd -- "$HERE" > /dev/null 2>&1 && pwd )"
BSON_DIR=$(dirname "$HERE")
echo "=== Building Rust BSON Extension POC ==="
echo ""
# Check if Rust is installed
if ! command -v cargo &>/dev/null; then
echo "Error: Rust is not installed"
echo ""
echo "Install Rust with:"
echo " curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh"
echo ""
exit 1
fi
echo "Rust toolchain found: $(rustc --version)"
# Check if maturin is installed
if ! command -v maturin &>/dev/null; then
echo "maturin not found, installing..."
pip install maturin
fi
echo "maturin found: $(maturin --version)"
echo ""
# Build the extension
echo "Building Rust extension..."
cd "$HERE"
# Build wheel to a temporary directory
TEMP_DIR=$(mktemp -d)
trap 'rm -rf "$TEMP_DIR"' EXIT
maturin build --release --out "$TEMP_DIR"
# Extract the .so file from the wheel
echo "Extracting extension from wheel..."
WHEEL_FILE=$(ls "$TEMP_DIR"/*.whl | head -1)
if [ -z "$WHEEL_FILE" ]; then
echo "Error: No wheel file found"
exit 1
fi
# Wheels are zip files - extract the .so file
python -c "
import zipfile
import sys
from pathlib import Path
wheel_path = Path(sys.argv[1])
bson_dir = Path(sys.argv[2])
with zipfile.ZipFile(wheel_path, 'r') as whl:
for name in whl.namelist():
if name.endswith(('.so', '.pyd')) and '_rbson' in name:
# Extract to bson/ directory
so_data = whl.read(name)
so_name = Path(name).name
target = bson_dir / so_name
target.write_bytes(so_data)
print(f'Installed to {target}')
sys.exit(0)
print('Error: Could not find .so file in wheel')
sys.exit(1)
" "$WHEEL_FILE" "$BSON_DIR"
echo ""
echo "Build complete!"
echo ""
echo "Test the extension with:"
echo " python -c 'from bson import _rbson; print(_rbson._test_rust_extension())'"
echo ""

1140
bson/_rbson/src/decode.rs Normal file

File diff suppressed because it is too large Load Diff

1543
bson/_rbson/src/encode.rs Normal file

File diff suppressed because it is too large Load Diff

55
bson/_rbson/src/errors.rs Normal file
View File

@ -0,0 +1,55 @@
// Copyright 2025-present MongoDB, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Error handling utilities for BSON operations
use pyo3::prelude::*;
use pyo3::types::{PyAny, PyTuple};
use crate::types::TYPE_CACHE;
/// Helper to create InvalidDocument exception
pub(crate) fn invalid_document_error(py: Python, msg: String) -> PyErr {
let invalid_document = TYPE_CACHE.get_invalid_document_class(py)
.expect("Failed to get InvalidDocument class");
PyErr::from_value(
invalid_document.bind(py)
.call1((msg,))
.expect("Failed to create InvalidDocument")
)
}
/// Helper to create InvalidDocument exception with document property
pub(crate) fn invalid_document_error_with_doc(py: Python, msg: String, doc: &Bound<'_, PyAny>) -> PyErr {
let invalid_document = TYPE_CACHE.get_invalid_document_class(py)
.expect("Failed to get InvalidDocument class");
// Call with positional arguments: InvalidDocument(message, document)
let args = PyTuple::new_bound(py, &[msg.into_py(py), doc.clone().into_py(py)]);
PyErr::from_value(
invalid_document.bind(py)
.call1(args)
.expect("Failed to create InvalidDocument")
)
}
/// Helper to create InvalidBSON exception
pub(crate) fn invalid_bson_error(py: Python, msg: String) -> PyErr {
let invalid_bson = TYPE_CACHE.get_invalid_bson_class(py)
.expect("Failed to get InvalidBSON class");
PyErr::from_value(
invalid_bson.bind(py)
.call1((msg,))
.expect("Failed to create InvalidBSON")
)
}

85
bson/_rbson/src/lib.rs Normal file
View File

@ -0,0 +1,85 @@
// Copyright 2025-present MongoDB, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Rust implementation of BSON encoding/decoding functions
//!
//! ⚠️ **NOT PRODUCTION READY** - Experimental implementation with incomplete features.
//!
//! This module provides a **partial implementation** of the C extension (bson._cbson)
//! interface, implemented in Rust using PyO3 and the bson library.
//!
//! # Implementation Status
//!
//! - ✅ Core BSON encoding/decoding: 86/88 tests passing
//! - ❌ Custom type encoders: NOT IMPLEMENTED (~85 tests skipped)
//! - ❌ RawBSONDocument: NOT IMPLEMENTED
//! - ❌ Performance: ~5x slower than C extension
//!
//! # Implementation History
//!
//! This implementation was developed as part of PYTHON-5683 to investigate
//! using Rust as an alternative to C for Python extension modules.
//!
//! See PR #2695 for the complete implementation history, including:
//! - Initial implementation with core BSON functionality
//! - Performance optimizations (type caching, fast paths, direct conversions)
//! - Modular refactoring (split into 6 modules)
//! - Test skip markers for unimplemented features
//!
//! # Performance
//!
//! Current performance: ~0.21x (5x slower than C extension)
//! Root cause: Architectural difference (Python ↔ Bson ↔ bytes vs Python ↔ bytes)
//! See README.md for detailed performance analysis and optimization opportunities.
//!
//! # Module Structure
//!
//! The codebase is organized into the following modules:
//! - `types`: Type cache and BSON type markers
//! - `errors`: Error handling utilities
//! - `utils`: Utility functions (datetime, regex, validation, string writing)
//! - `encode`: BSON encoding functions
//! - `decode`: BSON decoding functions
#![allow(clippy::useless_conversion)]
mod types;
mod errors;
mod utils;
mod encode;
mod decode;
use pyo3::prelude::*;
use pyo3::types::PyDict;
/// Test function to verify the Rust extension is loaded
#[pyfunction]
fn _test_rust_extension(py: Python) -> PyResult<PyObject> {
let result = PyDict::new(py);
result.set_item("implementation", "rust")?;
result.set_item("version", "0.1.0")?;
result.set_item("status", "experimental")?;
result.set_item("pyo3_version", env!("CARGO_PKG_VERSION"))?;
Ok(result.into())
}
/// Python module definition
#[pymodule]
fn _rbson(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_function(wrap_pyfunction!(encode::_dict_to_bson, m)?)?;
m.add_function(wrap_pyfunction!(encode::_dict_to_bson_direct, m)?)?;
m.add_function(wrap_pyfunction!(decode::_bson_to_dict, m)?)?;
m.add_function(wrap_pyfunction!(_test_rust_extension, m)?)?;
Ok(())
}

265
bson/_rbson/src/types.rs Normal file
View File

@ -0,0 +1,265 @@
// Copyright 2025-present MongoDB, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Type cache for Python type objects
//!
//! This module provides a cache for Python type objects to avoid repeated imports.
//! This matches the C extension's approach of caching all BSON types at module initialization.
use once_cell::sync::OnceCell;
use pyo3::prelude::*;
use pyo3::types::PyAny;
/// Cache for Python type objects to avoid repeated imports
/// This matches the C extension's approach of caching all BSON types at module initialization
pub(crate) struct TypeCache {
// Standard library types
pub(crate) uuid_class: OnceCell<PyObject>,
pub(crate) datetime_class: OnceCell<PyObject>,
pub(crate) pattern_class: OnceCell<PyObject>,
// BSON types
pub(crate) binary_class: OnceCell<PyObject>,
pub(crate) code_class: OnceCell<PyObject>,
pub(crate) objectid_class: OnceCell<PyObject>,
pub(crate) dbref_class: OnceCell<PyObject>,
pub(crate) regex_class: OnceCell<PyObject>,
pub(crate) timestamp_class: OnceCell<PyObject>,
pub(crate) int64_class: OnceCell<PyObject>,
pub(crate) decimal128_class: OnceCell<PyObject>,
pub(crate) minkey_class: OnceCell<PyObject>,
pub(crate) maxkey_class: OnceCell<PyObject>,
pub(crate) datetime_ms_class: OnceCell<PyObject>,
// Utility objects
pub(crate) utc: OnceCell<PyObject>,
pub(crate) calendar_timegm: OnceCell<PyObject>,
// Error classes
pub(crate) invalid_document_class: OnceCell<PyObject>,
pub(crate) invalid_bson_class: OnceCell<PyObject>,
// Fallback decoder
pub(crate) bson_to_dict_python: OnceCell<PyObject>,
}
pub(crate) static TYPE_CACHE: TypeCache = TypeCache {
uuid_class: OnceCell::new(),
datetime_class: OnceCell::new(),
pattern_class: OnceCell::new(),
binary_class: OnceCell::new(),
code_class: OnceCell::new(),
objectid_class: OnceCell::new(),
dbref_class: OnceCell::new(),
regex_class: OnceCell::new(),
timestamp_class: OnceCell::new(),
int64_class: OnceCell::new(),
decimal128_class: OnceCell::new(),
minkey_class: OnceCell::new(),
maxkey_class: OnceCell::new(),
datetime_ms_class: OnceCell::new(),
utc: OnceCell::new(),
calendar_timegm: OnceCell::new(),
invalid_document_class: OnceCell::new(),
invalid_bson_class: OnceCell::new(),
bson_to_dict_python: OnceCell::new(),
};
impl TypeCache {
/// Get or initialize the UUID class
pub(crate) fn get_uuid_class(&self, py: Python) -> PyResult<Py<PyAny>> {
Ok(self.uuid_class.get_or_try_init(|| {
py.import_bound("uuid")?
.getattr("UUID")
.map(|c| c.unbind())
})?.clone_ref(py))
}
/// Get or initialize the datetime class
pub(crate) fn get_datetime_class(&self, py: Python) -> PyResult<Py<PyAny>> {
Ok(self.datetime_class.get_or_try_init(|| {
py.import_bound("datetime")?
.getattr("datetime")
.map(|c| c.unbind())
})?.clone_ref(py))
}
/// Get or initialize the regex Pattern class
pub(crate) fn get_pattern_class(&self, py: Python) -> PyResult<Py<PyAny>> {
Ok(self.pattern_class.get_or_try_init(|| {
py.import_bound("re")?
.getattr("Pattern")
.map(|c| c.unbind())
})?.clone_ref(py))
}
/// Get or initialize the Binary class
pub(crate) fn get_binary_class(&self, py: Python) -> PyResult<Py<PyAny>> {
Ok(self.binary_class.get_or_try_init(|| {
py.import_bound("bson.binary")?
.getattr("Binary")
.map(|c| c.unbind())
})?.clone_ref(py))
}
/// Get or initialize the Code class
pub(crate) fn get_code_class(&self, py: Python) -> PyResult<Py<PyAny>> {
Ok(self.code_class.get_or_try_init(|| {
py.import_bound("bson.code")?
.getattr("Code")
.map(|c| c.unbind())
})?.clone_ref(py))
}
/// Get or initialize the ObjectId class
pub(crate) fn get_objectid_class(&self, py: Python) -> PyResult<Py<PyAny>> {
Ok(self.objectid_class.get_or_try_init(|| {
py.import_bound("bson.objectid")?
.getattr("ObjectId")
.map(|c| c.unbind())
})?.clone_ref(py))
}
/// Get or initialize the DBRef class
pub(crate) fn get_dbref_class(&self, py: Python) -> PyResult<Py<PyAny>> {
Ok(self.dbref_class.get_or_try_init(|| {
py.import_bound("bson.dbref")?
.getattr("DBRef")
.map(|c| c.unbind())
})?.clone_ref(py))
}
/// Get or initialize the Regex class
pub(crate) fn get_regex_class(&self, py: Python) -> PyResult<Py<PyAny>> {
Ok(self.regex_class.get_or_try_init(|| {
py.import_bound("bson.regex")?
.getattr("Regex")
.map(|c| c.unbind())
})?.clone_ref(py))
}
/// Get or initialize the Timestamp class
pub(crate) fn get_timestamp_class(&self, py: Python) -> PyResult<Py<PyAny>> {
Ok(self.timestamp_class.get_or_try_init(|| {
py.import_bound("bson.timestamp")?
.getattr("Timestamp")
.map(|c| c.unbind())
})?.clone_ref(py))
}
/// Get or initialize the Int64 class
pub(crate) fn get_int64_class(&self, py: Python) -> PyResult<Py<PyAny>> {
Ok(self.int64_class.get_or_try_init(|| {
py.import_bound("bson.int64")?
.getattr("Int64")
.map(|c| c.unbind())
})?.clone_ref(py))
}
/// Get or initialize the Decimal128 class
pub(crate) fn get_decimal128_class(&self, py: Python) -> PyResult<Py<PyAny>> {
Ok(self.decimal128_class.get_or_try_init(|| {
py.import_bound("bson.decimal128")?
.getattr("Decimal128")
.map(|c| c.unbind())
})?.clone_ref(py))
}
/// Get or initialize the MinKey class
pub(crate) fn get_minkey_class(&self, py: Python) -> PyResult<Py<PyAny>> {
Ok(self.minkey_class.get_or_try_init(|| {
py.import_bound("bson.min_key")?
.getattr("MinKey")
.map(|c| c.unbind())
})?.clone_ref(py))
}
/// Get or initialize the MaxKey class
pub(crate) fn get_maxkey_class(&self, py: Python) -> PyResult<Py<PyAny>> {
Ok(self.maxkey_class.get_or_try_init(|| {
py.import_bound("bson.max_key")?
.getattr("MaxKey")
.map(|c| c.unbind())
})?.clone_ref(py))
}
/// Get or initialize the DatetimeMS class
pub(crate) fn get_datetime_ms_class(&self, py: Python) -> PyResult<Py<PyAny>> {
Ok(self.datetime_ms_class.get_or_try_init(|| {
py.import_bound("bson.datetime_ms")?
.getattr("DatetimeMS")
.map(|c| c.unbind())
})?.clone_ref(py))
}
/// Get or initialize the UTC timezone object
pub(crate) fn get_utc(&self, py: Python) -> PyResult<Py<PyAny>> {
Ok(self.utc.get_or_try_init(|| {
py.import_bound("bson.tz_util")?
.getattr("utc")
.map(|c| c.unbind())
})?.clone_ref(py))
}
/// Get or initialize calendar.timegm function
pub(crate) fn get_calendar_timegm(&self, py: Python) -> PyResult<Py<PyAny>> {
Ok(self.calendar_timegm.get_or_try_init(|| {
py.import_bound("calendar")?
.getattr("timegm")
.map(|c| c.unbind())
})?.clone_ref(py))
}
/// Get or initialize InvalidDocument exception class
pub(crate) fn get_invalid_document_class(&self, py: Python) -> PyResult<Py<PyAny>> {
Ok(self.invalid_document_class.get_or_try_init(|| {
py.import_bound("bson.errors")?
.getattr("InvalidDocument")
.map(|c| c.unbind())
})?.clone_ref(py))
}
/// Get or initialize InvalidBSON exception class
pub(crate) fn get_invalid_bson_class(&self, py: Python) -> PyResult<Py<PyAny>> {
Ok(self.invalid_bson_class.get_or_try_init(|| {
py.import_bound("bson.errors")?
.getattr("InvalidBSON")
.map(|c| c.unbind())
})?.clone_ref(py))
}
/// Get or initialize the Python fallback decoder
pub(crate) fn get_bson_to_dict_python(&self, py: Python) -> PyResult<Py<PyAny>> {
Ok(self.bson_to_dict_python.get_or_try_init(|| {
py.import_bound("bson")?
.getattr("_bson_to_dict_python")
.map(|c| c.unbind())
})?.clone_ref(py))
}
}
// Type markers for BSON objects
pub(crate) const BINARY_TYPE_MARKER: i32 = 5;
pub(crate) const OBJECTID_TYPE_MARKER: i32 = 7;
pub(crate) const DATETIME_TYPE_MARKER: i32 = 9;
pub(crate) const REGEX_TYPE_MARKER: i32 = 11;
pub(crate) const CODE_TYPE_MARKER: i32 = 13;
pub(crate) const SYMBOL_TYPE_MARKER: i32 = 14;
pub(crate) const DBPOINTER_TYPE_MARKER: i32 = 15;
pub(crate) const TIMESTAMP_TYPE_MARKER: i32 = 17;
pub(crate) const INT64_TYPE_MARKER: i32 = 18;
pub(crate) const DECIMAL128_TYPE_MARKER: i32 = 19;
pub(crate) const DBREF_TYPE_MARKER: i32 = 100;
pub(crate) const MAXKEY_TYPE_MARKER: i32 = 127;
pub(crate) const MINKEY_TYPE_MARKER: i32 = 255;

153
bson/_rbson/src/utils.rs Normal file
View File

@ -0,0 +1,153 @@
// Copyright 2025-present MongoDB, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Utility functions for BSON operations
use pyo3::prelude::*;
use pyo3::types::PyAny;
use crate::types::TYPE_CACHE;
/// Convert Python datetime to milliseconds since epoch UTC
/// This is equivalent to Python's bson.datetime_ms._datetime_to_millis()
pub(crate) fn datetime_to_millis(py: Python, dtm: &Bound<'_, PyAny>) -> PyResult<i64> {
// Get datetime components
let year: i32 = dtm.getattr("year")?.extract()?;
let month: i32 = dtm.getattr("month")?.extract()?;
let day: i32 = dtm.getattr("day")?.extract()?;
let hour: i32 = dtm.getattr("hour")?.extract()?;
let minute: i32 = dtm.getattr("minute")?.extract()?;
let second: i32 = dtm.getattr("second")?.extract()?;
let microsecond: i32 = dtm.getattr("microsecond")?.extract()?;
// Check if datetime has timezone offset
let utcoffset = dtm.call_method0("utcoffset")?;
let offset_seconds: i64 = if !utcoffset.is_none() {
// Get total_seconds() from timedelta
let total_seconds: f64 = utcoffset.call_method0("total_seconds")?.extract()?;
total_seconds as i64
} else {
0
};
// Calculate seconds since epoch using the same algorithm as Python's calendar.timegm
// This is: (year - 1970) * 365.25 days + month/day adjustments + time
// We'll use Python's calendar.timegm for accuracy
let timegm = TYPE_CACHE.get_calendar_timegm(py)?;
// Create a time tuple (year, month, day, hour, minute, second, weekday, yearday, isdst)
// We need timetuple() method
let timetuple = dtm.call_method0("timetuple")?;
let seconds_since_epoch: i64 = timegm.bind(py).call1((timetuple,))?.extract()?;
// Adjust for timezone offset (subtract to get UTC)
let utc_seconds = seconds_since_epoch - offset_seconds;
// Convert to milliseconds and add microseconds
let millis = utc_seconds * 1000 + (microsecond / 1000) as i64;
Ok(millis)
}
/// Convert Python regex flags (int) to BSON regex options (string)
pub(crate) fn int_flags_to_str(flags: i32) -> String {
let mut options = String::new();
// Python re module flags to BSON regex options:
// re.IGNORECASE = 2 -> 'i'
// re.MULTILINE = 8 -> 'm'
// re.DOTALL = 16 -> 's'
// re.VERBOSE = 64 -> 'x'
// Note: re.LOCALE and re.UNICODE are Python-specific
if flags & 2 != 0 {
options.push('i');
}
if flags & 4 != 0 {
options.push('l'); // Preserved for round-trip compatibility
}
if flags & 8 != 0 {
options.push('m');
}
if flags & 16 != 0 {
options.push('s');
}
if flags & 32 != 0 {
options.push('u'); // Preserved for round-trip compatibility
}
if flags & 64 != 0 {
options.push('x');
}
options
}
/// Convert BSON regex options (string) to Python regex flags (int)
pub(crate) fn str_flags_to_int(options: &str) -> i32 {
let mut flags = 0;
for ch in options.chars() {
match ch {
'i' => flags |= 2, // re.IGNORECASE
'l' => flags |= 4, // re.LOCALE
'm' => flags |= 8, // re.MULTILINE
's' => flags |= 16, // re.DOTALL
'u' => flags |= 32, // re.UNICODE
'x' => flags |= 64, // re.VERBOSE
_ => {} // Ignore unknown flags
}
}
flags
}
/// Validate a document key
pub(crate) fn validate_key(key: &str, check_keys: bool) -> PyResult<()> {
// Check for null bytes (always invalid)
if key.contains('\0') {
return Err(PyErr::new::<pyo3::exceptions::PyValueError, _>(
"Key names must not contain the NULL byte"
));
}
// Check keys if requested (but not for _id)
if check_keys && key != "_id" {
if key.starts_with('$') {
return Err(PyErr::new::<pyo3::exceptions::PyValueError, _>(
format!("key '{}' must not start with '$'", key)
));
}
if key.contains('.') {
return Err(PyErr::new::<pyo3::exceptions::PyValueError, _>(
format!("key '{}' must not contain '.'", key)
));
}
}
Ok(())
}
/// Write a C-style null-terminated string
pub(crate) fn write_cstring(buf: &mut Vec<u8>, s: &str) {
buf.extend_from_slice(s.as_bytes());
buf.push(0);
}
/// Write a BSON string (int32 length + string + null terminator)
pub(crate) fn write_string(buf: &mut Vec<u8>, s: &str) {
let len = (s.len() + 1) as i32; // +1 for null terminator
buf.extend_from_slice(&len.to_le_bytes());
buf.extend_from_slice(s.as_bytes());
buf.push(0);
}

View File

@ -22,6 +22,7 @@ from __future__ import annotations
import copy
import re
import warnings
from collections.abc import Mapping as _Mapping
from typing import (
Any,
@ -99,13 +100,28 @@ class SON(Dict[_Key, _Value]):
yield from self.__keys
def has_key(self, key: _Key) -> bool:
warnings.warn(
"SON.has_key() is deprecated, use the in operator instead",
DeprecationWarning,
stacklevel=2,
)
return key in self.__keys
def iterkeys(self) -> Iterator[_Key]:
warnings.warn(
"SON.iterkeys() is deprecated, use the keys() method instead",
DeprecationWarning,
stacklevel=2,
)
return self.__iter__()
# fourth level uses definitions from lower levels
def itervalues(self) -> Iterator[_Value]:
warnings.warn(
"SON.itervalues() is deprecated, use the values() method instead",
DeprecationWarning,
stacklevel=2,
)
for _, v in self.items():
yield v

View File

@ -1,6 +1,22 @@
Changelog
=========
Changes in Version 4.17.0 (2026/04/20)
--------------------------------------
PyMongo 4.17 brings a number of changes including:
- ``has_key``, ``iterkeys`` and ``itervalues`` in :class:`bson.son.SON` have
been deprecated and will be removed in PyMongo 5.0. These methods were
deprecated in favor of the standard dictionary containment operator ``in``
and the ``keys()`` and ``values()`` methods, respectively.
- Added the :meth:`~pymongo.asynchronous.client_session.AsyncClientSession.bind` and :meth:`~pymongo.client_session.ClientSession.bind` methods
that allow users to bind a session to all database operations within the scope of a context manager instead of having to explicitly pass the session to each individual operation.
See the `Transactions docs <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/crud/transactions/#methods>`_ for examples and more information.
- Added support for MongoDB's Intelligent Workload Management (IWM) and ingress connection rate limiting features.
The driver now gracefully handles write-blocking scenarios and optimizes connection establishment during high-load conditions to maintain application availability.
See the `IWM <https://www.mongodb.com/docs/atlas/intelligent-workload-management>`_ or `Overload Errors <https://www.mongodb.com/docs/atlas/overload-errors/?interface=driver&language=python>`_ docs for more information.
Changes in Version 4.16.0 (2026/01/07)
--------------------------------------

View File

@ -2,8 +2,12 @@
from __future__ import annotations
import os
import shutil
import subprocess
import sys
import tempfile
import warnings
import zipfile
from pathlib import Path
from hatchling.builders.hooks.plugin.interface import BuildHookInterface
@ -12,6 +16,116 @@ from hatchling.builders.hooks.plugin.interface import BuildHookInterface
class CustomHook(BuildHookInterface):
"""The pymongo build hook."""
def _build_rust_extension(self, here: Path, *, required: bool = False) -> bool:
"""Build the Rust BSON extension if Rust toolchain is available.
Args:
here: The root directory of the project.
required: If True, raise an error if the build fails. If False, issue a warning.
Returns True if built successfully, False otherwise.
"""
# Check if Rust is available
if not shutil.which("cargo"):
msg = (
"Rust toolchain not found. "
"Install Rust from https://rustup.rs/ to enable the Rust extension."
)
if required:
raise RuntimeError(msg)
warnings.warn(
f"{msg} Skipping Rust extension build.",
stacklevel=2,
)
return False
# Check if maturin is available
if not shutil.which("maturin"):
try:
# Try uv pip first, fall back to pip
if shutil.which("uv"):
subprocess.run(
["uv", "pip", "install", "maturin"],
check=True,
capture_output=True,
)
else:
subprocess.run(
[sys.executable, "-m", "pip", "install", "maturin"],
check=True,
capture_output=True,
)
except subprocess.CalledProcessError as e:
msg = f"Failed to install maturin: {e}"
if required:
raise RuntimeError(msg) from e
warnings.warn(
f"{msg}. Skipping Rust extension build.",
stacklevel=2,
)
return False
# Build the Rust extension
rust_dir = here / "bson" / "_rbson"
if not rust_dir.exists():
msg = f"Rust extension directory not found: {rust_dir}"
if required:
raise RuntimeError(msg)
return False
try:
# Build the wheel to a temporary directory
with tempfile.TemporaryDirectory() as tmpdir:
subprocess.run(
[
"maturin",
"build",
"--release",
"--out",
tmpdir,
"--manifest-path",
str(rust_dir / "Cargo.toml"),
],
check=True,
cwd=str(rust_dir),
)
# Extract the .so file from the wheel
# Find the wheel file
wheel_files = list(Path(tmpdir).glob("*.whl"))
if not wheel_files:
msg = "No wheel file generated by maturin"
if required:
raise RuntimeError(msg)
return False
# Extract the .so file from the wheel
# The wheel contains _rbson/_rbson.abi3.so, we want bson/_rbson.abi3.so
with zipfile.ZipFile(wheel_files[0], "r") as whl:
for name in whl.namelist():
if name.endswith((".so", ".pyd")) and "_rbson" in name:
# Extract to bson/ directory
so_data = whl.read(name)
so_name = Path(name).name # Just the filename, e.g., _rbson.abi3.so
dest = here / "bson" / so_name
dest.write_bytes(so_data)
return True
msg = "No Rust extension binary found in wheel"
if required:
raise RuntimeError(msg)
return False
except (subprocess.CalledProcessError, Exception) as e:
msg = f"Failed to build Rust extension: {e}"
if required:
raise RuntimeError(msg) from e
warnings.warn(
f"{msg}. The C extension will be used instead.",
stacklevel=2,
)
return False
def initialize(self, version, build_data):
"""Initialize the hook."""
if self.target_name == "sdist":
@ -19,7 +133,32 @@ class CustomHook(BuildHookInterface):
here = Path(__file__).parent.resolve()
sys.path.insert(0, str(here))
subprocess.run([sys.executable, "_setup.py", "build_ext", "-i"], check=True)
# Build C extensions
try:
subprocess.run([sys.executable, "_setup.py", "build_ext", "-i"], check=True)
except (subprocess.CalledProcessError, FileNotFoundError) as e:
warnings.warn(
f"Failed to build C extension: {e}. "
"The package will be installed without compiled extensions.",
stacklevel=2,
)
# Build Rust extension (optional)
# Only build if PYMONGO_BUILD_RUST is set or Rust is available
# Skip for free-threaded Python (not yet supported)
is_free_threaded = hasattr(sys, "_is_gil_enabled") and not sys._is_gil_enabled()
build_rust = os.environ.get("PYMONGO_BUILD_RUST", "").lower() in ("1", "true", "yes")
if build_rust and is_free_threaded:
warnings.warn(
"Rust extension is not yet supported on free-threaded Python. Skipping build.",
stacklevel=2,
)
elif build_rust:
# If PYMONGO_BUILD_RUST is explicitly set, the build must succeed
self._build_rust_extension(here, required=True)
elif shutil.which("cargo") and not is_free_threaded:
# If Rust is available but not explicitly requested, build is optional
self._build_rust_extension(here, required=False)
# Ensure wheel is marked as binary and contains the binary files.
build_data["infer_tag"] = True

View File

@ -16,61 +16,78 @@ default:
resync:
@uv sync --quiet
# Set up the development environment
install:
bash .evergreen/scripts/setup-dev-env.sh
# Build the HTML documentation
[group('docs')]
docs: && resync
{{docs_run}} sphinx-build -W -b html doc {{doc_build}}/html
# Serve the docs locally with live-reload
[group('docs')]
docs-serve: && resync
{{docs_run}} sphinx-autobuild -W -b html doc --watch ./pymongo --watch ./bson --watch ./gridfs {{doc_build}}/serve
# Check documentation hyperlinks for broken URLs
[group('docs')]
docs-linkcheck: && resync
{{docs_run}} sphinx-build -E -b linkcheck doc {{doc_build}}/linkcheck
# Run mypy and pyright
[group('typing')]
typing: && resync
just typing-mypy
just typing-pyright
# Run mypy against the library source and test suite
[group('typing')]
typing-mypy: && resync
{{typing_run}} python -m mypy {{mypy_args}} bson gridfs tools pymongo
{{typing_run}} python -m mypy {{mypy_args}} --config-file mypy_test.ini test
{{typing_run}} python -m mypy {{mypy_args}} test/test_typing.py test/test_typing_strict.py
# Run pyright against the typing test files
[group('typing')]
typing-pyright: && resync
{{typing_run}} python -m pyright test/test_typing.py test/test_typing_strict.py
{{typing_run}} python -m pyright -p strict_pyrightconfig.json test/test_typing_strict.py
# Run all pre-commit hooks across the repository
[group('lint')]
lint *args="": && resync
uvx pre-commit run --all-files {{args}}
# Run shellcheck, doc8, and slotscheck
[group('lint')]
lint-manual *args="": && resync
uvx pre-commit run --all-files --hook-stage manual {{args}}
# Run pytest (e.g. just test test/test_uri_parser.py)
[group('test')]
test *args="-v --durations=5 --maxfail=10": && resync
uv run --extra test python -m pytest {{args}}
#!/usr/bin/env bash
set -euo pipefail
uv run ${USE_ACTIVE_VENV:+--active} --extra test python -m pytest {{args}}
# Run the BSON test suite with numpy
[group('test')]
test-numpy: && resync
uv run --extra test --with numpy python -m pytest test/test_bson.py
test-numpy *args="": && resync
just setup-tests numpy {{args}}
just run-tests test/test_bson.py
# Run tests via the Evergreen test runner script
[group('test')]
run-tests *args: && resync
bash ./.evergreen/run-tests.sh {{args}}
# Set up the test environment (auth, TLS, etc.)
[group('test')]
setup-tests *args="":
bash .evergreen/scripts/setup-tests.sh {{args}}
# Tear down resources created by setup-tests
[group('test')]
teardown-tests:
bash .evergreen/scripts/teardown-tests.sh
@ -79,6 +96,30 @@ teardown-tests:
integration-tests:
bash integration_tests/run.sh
# Run the full test suite with coverage
[group('test')]
test-coverage *args="":
just setup-tests --cov
just run-tests {{args}}
# Print the coverage summary to the terminal
[group('coverage')]
coverage-report:
uv tool run --with "coverage[toml]" coverage report
# Generate an HTML coverage report in htmlcov/
[group('coverage')]
coverage-html:
uv tool run --with "coverage[toml]" coverage html
@echo "Coverage report generated in htmlcov/index.html"
# Generate an XML coverage report at coverage.xml
[group('coverage')]
coverage-xml:
uv tool run --with "coverage[toml]" coverage xml
@echo "Coverage report generated in coverage.xml"
# Start a MongoDB server via drivers-evergreen-tools
[group('server')]
run-server *args="":
bash .evergreen/scripts/run-server.sh {{args}}
@ -86,3 +127,31 @@ run-server *args="":
[group('server')]
stop-server:
bash .evergreen/scripts/stop-server.sh
[group('rust')]
rust-build:
cd bson/_rbson && ./build.sh
[group('rust')]
rust-clean:
rm -f bson/_rbson*.so bson/_rbson*.pyd
cd bson/_rbson && cargo clean
[group('rust')]
rust-rebuild: rust-clean rust-build
[group('rust')]
rust-install:
PYMONGO_BUILD_RUST=1 pip install --force-reinstall --no-deps .
[group('rust')]
rust-install-full:
PYMONGO_BUILD_RUST=1 pip install --force-reinstall .
[group('rust')]
rust-test:
PYMONGO_USE_RUST=1 uv run --extra test python -m pytest test/test_bson.py -v
[group('rust')]
rust-check:
@python -c 'import os; os.environ["PYMONGO_USE_RUST"] = "1"; import bson; print("Rust extension:", bson.get_bson_implementation())'

View File

@ -18,7 +18,7 @@ from __future__ import annotations
import re
from typing import List, Tuple, Union
__version__ = "4.17.0.dev0"
__version__ = "4.18.0.dev0"
def get_version_tuple(version: str) -> Tuple[Union[int, str], ...]:

View File

@ -59,6 +59,7 @@ from pymongo.errors import (
InvalidOperation,
NotPrimaryError,
OperationFailure,
PyMongoError,
WaitQueueTimeoutError,
)
from pymongo.helpers_shared import _RETRYABLE_ERROR_CODES
@ -563,9 +564,17 @@ class _AsyncClientBulk:
error, ConnectionFailure
) and not isinstance(error, (NotPrimaryError, WaitQueueTimeoutError))
retryable_label_error = isinstance(
error, PyMongoError
) and error.has_error_label("RetryableError")
# Synthesize the full bulk result without modifying the
# current one because this write operation may be retried.
if retryable and (retryable_top_level_error or retryable_network_error):
if retryable and (
retryable_top_level_error
or retryable_network_error
or retryable_label_error
):
full = copy.deepcopy(full_result)
_merge_command(self.ops, self.idx_offset, full, result)
_throw_client_bulk_write_exception(full, self.verbose_results)

View File

@ -135,10 +135,13 @@ Classes
from __future__ import annotations
import asyncio
import collections
import random
import time
import uuid
from collections.abc import Mapping as _Mapping
from contextvars import ContextVar, Token
from typing import (
TYPE_CHECKING,
Any,
@ -161,7 +164,9 @@ from pymongo.asynchronous.cursor_base import _ConnectionManager
from pymongo.errors import (
ConfigurationError,
ConnectionFailure,
ExecutionTimeout,
InvalidOperation,
NetworkTimeout,
OperationFailure,
PyMongoError,
WTimeoutError,
@ -181,6 +186,28 @@ if TYPE_CHECKING:
_IS_SYNC = False
_SESSION: ContextVar[Optional[AsyncClientSession]] = ContextVar("SESSION", default=None)
class _AsyncBoundSessionContext:
"""Context manager returned by AsyncClientSession.bind() that manages bound state."""
def __init__(self, session: AsyncClientSession, end_session: bool) -> None:
self._session = session
self._session_token: Optional[Token[AsyncClientSession]] = None
self._end_session = end_session
async def __aenter__(self) -> AsyncClientSession:
self._session_token = _SESSION.set(self._session) # type: ignore[assignment]
return self._session
async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
if self._session_token:
_SESSION.reset(self._session_token) # type: ignore[arg-type]
self._session_token = None
if self._end_session:
await self._session.end_session()
class SessionOptions:
"""Options for a new :class:`AsyncClientSession`.
@ -404,6 +431,7 @@ class _Transaction:
self.recovery_token = None
self.attempt = 0
self.client = client
self.has_completed_command = False
def active(self) -> bool:
return self.state in (_TxnState.STARTING, _TxnState.IN_PROGRESS)
@ -411,6 +439,9 @@ class _Transaction:
def starting(self) -> bool:
return self.state == _TxnState.STARTING
def set_starting(self) -> None:
self.state = _TxnState.STARTING
@property
def pinned_conn(self) -> Optional[AsyncConnection]:
if self.active() and self.conn_mgr:
@ -436,6 +467,7 @@ class _Transaction:
self.sharded = False
self.recovery_token = None
self.attempt = 0
self.has_completed_command = False
def __del__(self) -> None:
if self.conn_mgr:
@ -470,11 +502,29 @@ _UNKNOWN_COMMIT_ERROR_CODES: frozenset = _RETRYABLE_ERROR_CODES | frozenset( #
# This limit is non-configurable and was chosen to be twice the 60 second
# default value of MongoDB's `transactionLifetimeLimitSeconds` parameter.
_WITH_TRANSACTION_RETRY_TIME_LIMIT = 120
_BACKOFF_MAX = 0.500 # 500ms max backoff
_BACKOFF_INITIAL = 0.005 # 5ms initial backoff
def _within_time_limit(start_time: float) -> bool:
def _within_time_limit(start_time: float, backoff: float = 0) -> bool:
"""Are we within the with_transaction retry limit?"""
return time.monotonic() - start_time < _WITH_TRANSACTION_RETRY_TIME_LIMIT
remaining = _csot.remaining()
if remaining is not None and remaining <= 0:
return False
return time.monotonic() + backoff - start_time < _WITH_TRANSACTION_RETRY_TIME_LIMIT
def _make_timeout_error(error: BaseException) -> PyMongoError:
"""Convert error to a NetworkTimeout or ExecutionTimeout as appropriate."""
if _csot.remaining() is not None:
timeout_error: PyMongoError = ExecutionTimeout(
str(error), 50, {"ok": 0, "errmsg": str(error), "code": 50}
)
else:
timeout_error = NetworkTimeout(str(error))
if isinstance(error, PyMongoError):
timeout_error._error_labels = error._error_labels.copy()
return timeout_error
_T = TypeVar("_T")
@ -547,6 +597,24 @@ class AsyncClientSession:
if self._server_session is None:
raise InvalidOperation("Cannot use ended session")
def bind(self, end_session: bool = True) -> _AsyncBoundSessionContext:
"""Bind this session so it is implicitly passed to all database operations within the returned context.
.. code-block:: python
async with client.start_session() as s:
async with s.bind():
# session=s is passed implicitly
await client.db.collection.insert_one({"x": 1})
:param end_session: Whether to end the session on exiting the returned context. Defaults to True.
If set to False, :meth:`~pymongo.asynchronous.client_session.AsyncClientSession.end_session()` must be called
once the session is no longer used.
.. versionadded:: 4.17
"""
return _AsyncBoundSessionContext(self, end_session)
async def __aenter__(self) -> AsyncClientSession:
return self
@ -703,7 +771,17 @@ class AsyncClientSession:
https://github.com/mongodb/specifications/blob/master/source/transactions-convenient-api/transactions-convenient-api.md#handling-errors-inside-the-callback
"""
start_time = time.monotonic()
retry = 0
last_error: Optional[BaseException] = None
while True:
if retry: # Implement exponential backoff on retry.
jitter = random.random() # noqa: S311
backoff = jitter * min(_BACKOFF_INITIAL * (1.5**retry), _BACKOFF_MAX)
if not _within_time_limit(start_time, backoff):
assert last_error is not None
raise _make_timeout_error(last_error) from last_error
await asyncio.sleep(backoff)
retry += 1
await self.start_transaction(
read_concern, write_concern, read_preference, max_commit_time_ms
)
@ -711,15 +789,16 @@ class AsyncClientSession:
ret = await callback(self)
# Catch KeyboardInterrupt, CancelledError, etc. and cleanup.
except BaseException as exc:
last_error = exc
if self.in_transaction:
await self.abort_transaction()
if (
isinstance(exc, PyMongoError)
and exc.has_error_label("TransientTransactionError")
and _within_time_limit(start_time)
if isinstance(exc, PyMongoError) and exc.has_error_label(
"TransientTransactionError"
):
# Retry the entire transaction.
continue
if _within_time_limit(start_time):
# Retry the entire transaction.
continue
raise _make_timeout_error(last_error) from exc
raise
if not self.in_transaction:
@ -730,17 +809,18 @@ class AsyncClientSession:
try:
await self.commit_transaction()
except PyMongoError as exc:
if (
exc.has_error_label("UnknownTransactionCommitResult")
and _within_time_limit(start_time)
and not _max_time_expired_error(exc)
):
last_error = exc
if exc.has_error_label(
"UnknownTransactionCommitResult"
) and not _max_time_expired_error(exc):
if not _within_time_limit(start_time):
raise _make_timeout_error(last_error) from exc
# Retry the commit.
continue
if exc.has_error_label("TransientTransactionError") and _within_time_limit(
start_time
):
if exc.has_error_label("TransientTransactionError"):
if not _within_time_limit(start_time):
raise _make_timeout_error(last_error) from exc
# Retry the entire transaction.
break
raise
@ -1021,7 +1101,11 @@ class AsyncClientSession:
read_preference: _ServerMode,
conn: AsyncConnection,
) -> None:
if not conn.supports_sessions:
# getMores must be sent with a session if the cursor was opened with one
operation = next(iter(command))
if not conn.supports_sessions and (
isinstance(self._server_session, _EmptyServerSession) or operation != "getMore"
):
if not self._implicit:
raise ConfigurationError("Sessions are not supported by this MongoDB deployment")
return

View File

@ -20,7 +20,6 @@ from collections import abc
from typing import (
TYPE_CHECKING,
Any,
AsyncContextManager,
Callable,
Coroutine,
Generic,
@ -571,11 +570,6 @@ class AsyncCollection(common.BaseObject, Generic[_DocumentType]):
await change_stream._initialize_cursor()
return change_stream
async def _conn_for_writes(
self, session: Optional[AsyncClientSession], operation: str
) -> AsyncContextManager[AsyncConnection]:
return await self._database.client._conn_for_writes(session, operation)
async def _command(
self,
conn: AsyncConnection,
@ -652,7 +646,10 @@ class AsyncCollection(common.BaseObject, Generic[_DocumentType]):
if "size" in options:
options["size"] = float(options["size"])
cmd.update(options)
async with await self._conn_for_writes(session, operation=_Op.CREATE) as conn:
async def inner(
session: Optional[AsyncClientSession], conn: AsyncConnection, _retryable_write: bool
) -> None:
if qev2_required and conn.max_wire_version < 21:
raise ConfigurationError(
"Driver support of Queryable Encryption is incompatible with server. "
@ -669,6 +666,8 @@ class AsyncCollection(common.BaseObject, Generic[_DocumentType]):
session=session,
)
await self.database.client._retryable_write(False, inner, session, _Op.CREATE)
async def _create(
self,
options: MutableMapping[str, Any],
@ -2240,7 +2239,10 @@ class AsyncCollection(common.BaseObject, Generic[_DocumentType]):
command (like maxTimeMS) can be passed as keyword arguments.
"""
names = []
async with await self._conn_for_writes(session, operation=_Op.CREATE_INDEXES) as conn:
async def inner(
session: Optional[AsyncClientSession], conn: AsyncConnection, _retryable_write: bool
) -> list[str]:
supports_quorum = conn.max_wire_version >= 9
def gen_indexes() -> Iterator[Mapping[str, Any]]:
@ -2269,7 +2271,11 @@ class AsyncCollection(common.BaseObject, Generic[_DocumentType]):
write_concern=self._write_concern_for(session),
session=session,
)
return names
return names
return await self.database.client._retryable_write(
False, inner, session, _Op.CREATE_INDEXES
)
async def create_index(
self,
@ -2422,7 +2428,6 @@ class AsyncCollection(common.BaseObject, Generic[_DocumentType]):
kwargs["comment"] = comment
await self._drop_index("*", session=session, **kwargs)
@_csot.apply
async def drop_index(
self,
index_or_name: _IndexKeyHint,
@ -2490,7 +2495,10 @@ class AsyncCollection(common.BaseObject, Generic[_DocumentType]):
cmd.update(kwargs)
if comment is not None:
cmd["comment"] = comment
async with await self._conn_for_writes(session, operation=_Op.DROP_INDEXES) as conn:
async def inner(
session: Optional[AsyncClientSession], conn: AsyncConnection, _retryable_write: bool
) -> None:
await self._command(
conn,
cmd,
@ -2500,6 +2508,8 @@ class AsyncCollection(common.BaseObject, Generic[_DocumentType]):
session=session,
)
await self.database.client._retryable_write(False, inner, session, _Op.DROP_INDEXES)
async def list_indexes(
self,
session: Optional[AsyncClientSession] = None,
@ -2763,17 +2773,22 @@ class AsyncCollection(common.BaseObject, Generic[_DocumentType]):
cmd = {"createSearchIndexes": self.name, "indexes": list(gen_indexes())}
cmd.update(kwargs)
async with await self._conn_for_writes(
session, operation=_Op.CREATE_SEARCH_INDEXES
) as conn:
async def inner(
session: Optional[AsyncClientSession], conn: AsyncConnection, _retryable_write: bool
) -> list[str]:
resp = await self._command(
conn,
cmd,
read_preference=ReadPreference.PRIMARY,
codec_options=_UNICODE_REPLACE_CODEC_OPTIONS,
session=session,
)
return [index["name"] for index in resp["indexesCreated"]]
return await self.database.client._retryable_write(
False, inner, session, _Op.CREATE_SEARCH_INDEXES
)
async def drop_search_index(
self,
name: str,
@ -2799,15 +2814,21 @@ class AsyncCollection(common.BaseObject, Generic[_DocumentType]):
cmd.update(kwargs)
if comment is not None:
cmd["comment"] = comment
async with await self._conn_for_writes(session, operation=_Op.DROP_SEARCH_INDEXES) as conn:
async def inner(
session: Optional[AsyncClientSession], conn: AsyncConnection, _retryable_write: bool
) -> None:
await self._command(
conn,
cmd,
read_preference=ReadPreference.PRIMARY,
allowable_errors=["ns not found", 26],
codec_options=_UNICODE_REPLACE_CODEC_OPTIONS,
session=session,
)
await self.database.client._retryable_write(False, inner, session, _Op.DROP_SEARCH_INDEXES)
async def update_search_index(
self,
name: str,
@ -2835,15 +2856,21 @@ class AsyncCollection(common.BaseObject, Generic[_DocumentType]):
cmd.update(kwargs)
if comment is not None:
cmd["comment"] = comment
async with await self._conn_for_writes(session, operation=_Op.UPDATE_SEARCH_INDEX) as conn:
async def inner(
session: Optional[AsyncClientSession], conn: AsyncConnection, _retryable_write: bool
) -> None:
await self._command(
conn,
cmd,
read_preference=ReadPreference.PRIMARY,
allowable_errors=["ns not found", 26],
codec_options=_UNICODE_REPLACE_CODEC_OPTIONS,
session=session,
)
await self.database.client._retryable_write(False, inner, session, _Op.UPDATE_SEARCH_INDEX)
async def options(
self,
session: Optional[AsyncClientSession] = None,
@ -2918,6 +2945,7 @@ class AsyncCollection(common.BaseObject, Generic[_DocumentType]):
session,
retryable=not cmd._performs_write,
operation=_Op.AGGREGATE,
is_aggregate_write=cmd._performs_write,
)
async def aggregate(
@ -3123,17 +3151,21 @@ class AsyncCollection(common.BaseObject, Generic[_DocumentType]):
if comment is not None:
cmd["comment"] = comment
write_concern = self._write_concern_for_cmd(cmd, session)
client = self._database.client
async with await self._conn_for_writes(session, operation=_Op.RENAME) as conn:
async with self._database.client._tmp_session(session) as s:
return await conn.command(
"admin",
cmd,
write_concern=write_concern,
parse_write_concern_error=True,
session=s,
client=self._database.client,
)
async def inner(
session: Optional[AsyncClientSession], conn: AsyncConnection, _retryable_write: bool
) -> MutableMapping[str, Any]:
return await conn.command(
"admin",
cmd,
write_concern=write_concern,
parse_write_concern_error=True,
session=session,
client=client,
)
return await client._retryable_write(False, inner, session, _Op.RENAME)
async def distinct(
self,

View File

@ -931,14 +931,15 @@ class AsyncDatabase(common.BaseObject, Generic[_DocumentType]):
if read_preference is None:
read_preference = (session and session._txn_read_preference()) or ReadPreference.PRIMARY
async with await self._client._conn_for_reads(
read_preference, session, operation=command_name
) as (
connection,
read_preference,
):
async def inner(
session: Optional[AsyncClientSession],
_server: Server,
conn: AsyncConnection,
read_preference: _ServerMode,
) -> Union[dict[str, Any], _CodecDocumentType]:
return await self._command(
connection,
conn,
command,
value,
check,
@ -949,6 +950,10 @@ class AsyncDatabase(common.BaseObject, Generic[_DocumentType]):
**kwargs,
)
return await self._client._retryable_read(
inner, read_preference, session, command_name, None, False, is_run_command=True
)
@_csot.apply
async def cursor_command(
self,
@ -1016,17 +1021,17 @@ class AsyncDatabase(common.BaseObject, Generic[_DocumentType]):
async with self._client._tmp_session(session) as tmp_session:
opts = codec_options or DEFAULT_CODEC_OPTIONS
if read_preference is None:
read_preference = (
tmp_session and tmp_session._txn_read_preference()
) or ReadPreference.PRIMARY
async with await self._client._conn_for_reads(
read_preference, tmp_session, command_name
) as (
conn,
read_preference,
):
async def inner(
session: Optional[AsyncClientSession],
_server: Server,
conn: AsyncConnection,
read_preference: _ServerMode,
) -> AsyncCommandCursor[_DocumentType]:
response = await self._command(
conn,
command,
@ -1035,7 +1040,7 @@ class AsyncDatabase(common.BaseObject, Generic[_DocumentType]):
None,
read_preference,
opts,
session=tmp_session,
session=session,
**kwargs,
)
coll = self.get_collection("$cmd", read_preference=read_preference)
@ -1045,7 +1050,7 @@ class AsyncDatabase(common.BaseObject, Generic[_DocumentType]):
response["cursor"],
conn.address,
max_await_time_ms=max_await_time_ms,
session=tmp_session,
session=session,
comment=comment,
)
await cmd_cursor._maybe_pin_connection(conn)
@ -1053,6 +1058,10 @@ class AsyncDatabase(common.BaseObject, Generic[_DocumentType]):
else:
raise InvalidOperation("Command does not return a cursor.")
return await self.client._retryable_read(
inner, read_preference, tmp_session, command_name, None, False
)
async def _retryable_read_command(
self,
command: Union[str, MutableMapping[str, Any]],
@ -1254,9 +1263,11 @@ class AsyncDatabase(common.BaseObject, Generic[_DocumentType]):
if comment is not None:
command["comment"] = comment
async with await self._client._conn_for_writes(session, operation=_Op.DROP) as connection:
async def inner(
session: Optional[AsyncClientSession], conn: AsyncConnection, _retryable_write: bool
) -> dict[str, Any]:
return await self._command(
connection,
conn,
command,
allowable_errors=["ns not found", 26],
write_concern=self._write_concern_for(session),
@ -1264,6 +1275,8 @@ class AsyncDatabase(common.BaseObject, Generic[_DocumentType]):
session=session,
)
return await self.client._retryable_write(False, inner, session, _Op.DROP)
@_csot.apply
async def drop_collection(
self,

View File

@ -17,8 +17,11 @@ from __future__ import annotations
import asyncio
import builtins
import functools
import random
import socket
import sys
import time as time # noqa: PLC0414 # needed in sync version
from typing import (
Any,
Callable,
@ -26,6 +29,8 @@ from typing import (
cast,
)
from pymongo import _csot
from pymongo.common import MAX_ADAPTIVE_RETRIES
from pymongo.errors import (
OperationFailure,
)
@ -38,6 +43,7 @@ F = TypeVar("F", bound=Callable[..., Any])
def _handle_reauth(func: F) -> F:
@functools.wraps(func)
async def inner(*args: Any, **kwargs: Any) -> Any:
no_reauth = kwargs.pop("no_reauth", False)
from pymongo.asynchronous.pool import AsyncConnection
@ -70,6 +76,46 @@ def _handle_reauth(func: F) -> F:
return cast(F, inner)
_BACKOFF_INITIAL = 0.1
_BACKOFF_MAX = 10
def _backoff(
attempt: int, initial_delay: float = _BACKOFF_INITIAL, max_delay: float = _BACKOFF_MAX
) -> float:
jitter = random.random() # noqa: S311
return jitter * min(initial_delay * (2**attempt), max_delay)
class _RetryPolicy:
"""A retry limiter that performs exponential backoff with jitter."""
def __init__(
self,
attempts: int = MAX_ADAPTIVE_RETRIES,
backoff_initial: float = _BACKOFF_INITIAL,
backoff_max: float = _BACKOFF_MAX,
):
self.attempts = attempts
self.backoff_initial = backoff_initial
self.backoff_max = backoff_max
def backoff(self, attempt: int) -> float:
"""Return the backoff duration for the given attempt."""
return _backoff(max(0, attempt - 1), self.backoff_initial, self.backoff_max)
async def should_retry(self, attempt: int, delay: float) -> bool:
"""Return if we have retry attempts remaining and the next backoff would not exceed a timeout."""
if attempt > self.attempts:
return False
if _csot.get_timeout():
if time.monotonic() + delay > _csot.get_deadline():
return False
return True
async def _getaddrinfo(
host: Any, port: Any, **kwargs: Any
) -> list[

View File

@ -35,6 +35,7 @@ from __future__ import annotations
import asyncio
import contextlib
import os
import time as time # noqa: PLC0414 # needed in sync version
import warnings
import weakref
from collections import defaultdict
@ -65,8 +66,11 @@ from pymongo import _csot, common, helpers_shared, periodic_executor
from pymongo.asynchronous import client_session, database, uri_parser
from pymongo.asynchronous.change_stream import AsyncChangeStream, AsyncClusterChangeStream
from pymongo.asynchronous.client_bulk import _AsyncClientBulk
from pymongo.asynchronous.client_session import _EmptyServerSession
from pymongo.asynchronous.client_session import _SESSION, _EmptyServerSession
from pymongo.asynchronous.command_cursor import AsyncCommandCursor
from pymongo.asynchronous.helpers import (
_RetryPolicy,
)
from pymongo.asynchronous.settings import TopologySettings
from pymongo.asynchronous.topology import Topology, _ErrorContext
from pymongo.client_options import ClientOptions
@ -610,8 +614,18 @@ class AsyncMongoClient(common.BaseObject, Generic[_DocumentType]):
client to use Stable API. See `versioned API <https://www.mongodb.com/docs/manual/reference/stable-api/#what-is-the-stable-api--and-should-you-use-it->`_ for
details.
| **Overload retry options:**
- `max_adaptive_retries`: (int) How many retries to allow for overload errors. Defaults to ``2``.
- `enable_overload_retargeting`: (boolean) Whether overload retargeting is enabled for this client.
If enabled, server overload errors will cause retry attempts to select a server that has not yet returned an overload error, if possible.
Defaults to ``False``.
.. seealso:: The MongoDB documentation on `connections <https://dochub.mongodb.org/core/connections>`_.
.. versionchanged:: 4.17
Added the ``max_adaptive_retries`` and ``enable_overload_retargeting`` URI and keyword arguments.
.. versionchanged:: 4.5
Added the ``serverMonitoringMode`` keyword argument.
@ -879,11 +893,14 @@ class AsyncMongoClient(common.BaseObject, Generic[_DocumentType]):
self._options.read_concern,
)
self._retry_policy = _RetryPolicy(attempts=self._options.max_adaptive_retries)
self._init_based_on_options(self._seeds, srv_max_hosts, srv_service_name)
self._opened = False
self._closed = False
self._loop: Optional[asyncio.AbstractEventLoop] = None
if not is_srv:
self._init_background()
@ -1408,7 +1425,8 @@ class AsyncMongoClient(common.BaseObject, Generic[_DocumentType]):
def _ensure_session(
self, session: Optional[AsyncClientSession] = None
) -> Optional[AsyncClientSession]:
"""If provided session is None, lend a temporary session."""
"""If provided session and bound session are None, lend a temporary session."""
session = session or self._get_bound_session()
if session:
return session
@ -1990,6 +2008,8 @@ class AsyncMongoClient(common.BaseObject, Generic[_DocumentType]):
read_pref: Optional[_ServerMode] = None,
retryable: bool = False,
operation_id: Optional[int] = None,
is_run_command: bool = False,
is_aggregate_write: bool = False,
) -> T:
"""Internal retryable helper for all client transactions.
@ -2001,6 +2021,8 @@ class AsyncMongoClient(common.BaseObject, Generic[_DocumentType]):
:param address: Server Address, defaults to None
:param read_pref: Topology of read operation, defaults to None
:param retryable: If the operation should be retried once, defaults to None
:param is_run_command: If this is a runCommand operation, defaults to False
:param is_aggregate_write: If this is a aggregate operation with a write, defaults to False.
:return: Output of the calling func()
"""
@ -2015,6 +2037,8 @@ class AsyncMongoClient(common.BaseObject, Generic[_DocumentType]):
address=address,
retryable=retryable,
operation_id=operation_id,
is_run_command=is_run_command,
is_aggregate_write=is_aggregate_write,
).run()
async def _retryable_read(
@ -2026,6 +2050,8 @@ class AsyncMongoClient(common.BaseObject, Generic[_DocumentType]):
address: Optional[_Address] = None,
retryable: bool = True,
operation_id: Optional[int] = None,
is_run_command: bool = False,
is_aggregate_write: bool = False,
) -> T:
"""Execute an operation with consecutive retries if possible
@ -2041,6 +2067,8 @@ class AsyncMongoClient(common.BaseObject, Generic[_DocumentType]):
:param address: Optional address when sending a message, defaults to None
:param retryable: if we should attempt retries
(may not always be supported even if supplied), defaults to False
:param is_run_command: If this is a runCommand operation, defaults to False.
:param is_aggregate_write: If this is a aggregate operation with a write, defaults to False.
"""
# Ensure that the client supports retrying on reads and there is no session in
@ -2059,6 +2087,8 @@ class AsyncMongoClient(common.BaseObject, Generic[_DocumentType]):
read_pref=read_pref,
retryable=retryable,
operation_id=operation_id,
is_run_command=is_run_command,
is_aggregate_write=is_aggregate_write,
)
async def _retryable_write(
@ -2267,11 +2297,14 @@ class AsyncMongoClient(common.BaseObject, Generic[_DocumentType]):
self, session: Optional[client_session.AsyncClientSession]
) -> AsyncGenerator[Optional[client_session.AsyncClientSession], None]:
"""If provided session is None, lend a temporary session."""
if session is not None:
if not isinstance(session, client_session.AsyncClientSession):
raise ValueError(
f"'session' argument must be an AsyncClientSession or None, not {type(session)}"
)
if session is not None and not isinstance(session, client_session.AsyncClientSession):
raise ValueError(
f"'session' argument must be an AsyncClientSession or None, not {type(session)}"
)
# Check for a bound session. If one exists, treat it as an explicitly passed session.
session = session or self._get_bound_session()
if session:
# Don't call end_session.
yield session
return
@ -2301,6 +2334,18 @@ class AsyncMongoClient(common.BaseObject, Generic[_DocumentType]):
if session is not None:
session._process_response(reply)
def _get_bound_session(self) -> Optional[AsyncClientSession]:
bound_session = _SESSION.get()
if bound_session:
if bound_session.client is self:
return bound_session
else:
raise InvalidOperation(
"Only the client that created the bound session can perform operations within its context block. See <PLACEHOLDER> for more information."
)
else:
return None
async def server_info(
self, session: Optional[client_session.AsyncClientSession] = None
) -> dict[str, Any]:
@ -2438,15 +2483,13 @@ class AsyncMongoClient(common.BaseObject, Generic[_DocumentType]):
f"name_or_database must be an instance of str or a AsyncDatabase, not {type(name)}"
)
async with await self._conn_for_writes(session, operation=_Op.DROP_DATABASE) as conn:
await self[name]._command(
conn,
{"dropDatabase": 1, "comment": comment},
read_preference=ReadPreference.PRIMARY,
write_concern=self._write_concern_for(session),
parse_write_concern_error=True,
session=session,
)
await self[name].command(
{"dropDatabase": 1, "comment": comment},
read_preference=ReadPreference.PRIMARY,
write_concern=self._write_concern_for(session),
parse_write_concern_error=True,
session=session,
)
@_csot.apply
async def bulk_write(
@ -2730,12 +2773,15 @@ class _ClientConnectionRetryable(Generic[T]):
address: Optional[_Address] = None,
retryable: bool = False,
operation_id: Optional[int] = None,
is_run_command: bool = False,
is_aggregate_write: bool = False,
):
self._last_error: Optional[Exception] = None
self._retrying = False
self._multiple_retries = _csot.get_timeout() is not None
self._always_retryable = False
self._max_retries = float("inf") if _csot.get_timeout() is not None else 1
self._client = mongo_client
self._retry_policy = mongo_client._retry_policy
self._func = func
self._bulk = bulk
self._session = session
@ -2751,6 +2797,8 @@ class _ClientConnectionRetryable(Generic[T]):
self._operation = operation
self._operation_id = operation_id
self._attempt_number = 0
self._is_run_command = is_run_command
self._is_aggregate_write = is_aggregate_write
async def run(self) -> T:
"""Runs the supplied func() and attempts a retry
@ -2770,7 +2818,13 @@ class _ClientConnectionRetryable(Generic[T]):
while True:
self._check_last_error(check_csot=True)
try:
return await self._read() if self._is_read else await self._write()
res = await self._read() if self._is_read else await self._write()
# Track whether the transaction has completed a command.
# If we need to apply backpressure to the first command,
# we will need to revert back to starting state.
if self._session is not None and self._session.in_transaction:
self._session._transaction.has_completed_command = True
return res
except ServerSelectionTimeoutError:
# The application may think the write was never attempted
# if we raise ServerSelectionTimeoutError on the retry
@ -2781,37 +2835,80 @@ class _ClientConnectionRetryable(Generic[T]):
# most likely be a waste of time.
raise
except PyMongoError as exc:
always_retryable = False
overloaded = False
exc_to_check = exc
if self._is_run_command and not (
self._client.options.retry_reads and self._client.options.retry_writes
):
raise
if self._is_aggregate_write and not self._client.options.retry_writes:
raise
# Execute specialized catch on read
if self._is_read:
if isinstance(exc, (ConnectionFailure, OperationFailure)):
# ConnectionFailures do not supply a code property
exc_code = getattr(exc, "code", None)
if self._is_not_eligible_for_retry() or (
isinstance(exc, OperationFailure)
and exc_code not in helpers_shared._RETRYABLE_ERROR_CODES
overloaded = exc.has_error_label("SystemOverloadedError")
if overloaded:
self._max_retries = self._client.options.max_adaptive_retries
always_retryable = exc.has_error_label("RetryableError") and overloaded
if not self._client.options.retry_reads or (
not always_retryable
and (
self._is_not_eligible_for_retry()
or (
isinstance(exc, OperationFailure)
and exc_code not in helpers_shared._RETRYABLE_ERROR_CODES
)
)
):
raise
self._retrying = True
self._last_error = exc
self._attempt_number += 1
# Revert back to starting state if we're in a transaction but haven't completed the first
# command.
if (
overloaded
and self._session is not None
and self._session.in_transaction
):
transaction = self._session._transaction
if not transaction.has_completed_command:
transaction.set_starting()
transaction.attempt = 0
else:
raise
# Specialized catch on write operation
if not self._is_read:
if not self._retryable:
if isinstance(exc, ClientBulkWriteException) and isinstance(
exc.error, PyMongoError
):
exc_to_check = exc.error
retryable_write_label = exc_to_check.has_error_label("RetryableWriteError")
overloaded = exc_to_check.has_error_label("SystemOverloadedError")
if overloaded:
self._max_retries = self._client.options.max_adaptive_retries
always_retryable = exc_to_check.has_error_label("RetryableError") and overloaded
# Always retry abortTransaction and commitTransaction up to once
if self._operation not in ["abortTransaction", "commitTransaction"] and (
not self._client.options.retry_writes
or not (self._retryable or always_retryable)
):
raise
if isinstance(exc, ClientBulkWriteException) and exc.error:
retryable_write_error_exc = isinstance(
exc.error, PyMongoError
) and exc.error.has_error_label("RetryableWriteError")
else:
retryable_write_error_exc = exc.has_error_label("RetryableWriteError")
if retryable_write_error_exc:
if retryable_write_label or always_retryable:
assert self._session
await self._session._unpin()
if not retryable_write_error_exc or self._is_not_eligible_for_retry():
if exc.has_error_label("NoWritesPerformed") and self._last_error:
if not always_retryable and (
not retryable_write_label or self._is_not_eligible_for_retry()
):
if exc_to_check.has_error_label("NoWritesPerformed") and self._last_error:
raise self._last_error from exc
else:
raise
@ -2820,17 +2917,39 @@ class _ClientConnectionRetryable(Generic[T]):
self._bulk.retrying = True
else:
self._retrying = True
if not exc.has_error_label("NoWritesPerformed"):
if not exc_to_check.has_error_label("NoWritesPerformed"):
self._last_error = exc
if self._last_error is None:
self._last_error = exc
# Revert back to starting state if we're in a transaction but haven't completed the first
# command.
if overloaded and self._session is not None and self._session.in_transaction:
transaction = self._session._transaction
if not transaction.has_completed_command:
transaction.set_starting()
transaction.attempt = 0
if self._server is not None:
if self._server is not None and (
self._client.topology_description.topology_type_name == "Sharded"
or (overloaded and self._client.options.enable_overload_retargeting)
):
self._deprioritized_servers.append(self._server)
self._always_retryable = always_retryable
if overloaded:
delay = self._retry_policy.backoff(self._attempt_number)
if not await self._retry_policy.should_retry(self._attempt_number, delay):
if exc_to_check.has_error_label("NoWritesPerformed") and self._last_error:
raise self._last_error from exc
else:
raise
await asyncio.sleep(delay)
def _is_not_eligible_for_retry(self) -> bool:
"""Checks if the exchange is not eligible for retry"""
return not self._retryable or (self._is_retrying() and not self._multiple_retries)
return not self._retryable or (
self._is_retrying() and self._attempt_number >= self._max_retries
)
def _is_retrying(self) -> bool:
"""Checks if the exchange is currently undergoing a retry"""
@ -2889,7 +3008,7 @@ class _ClientConnectionRetryable(Generic[T]):
and conn.supports_sessions
)
is_mongos = conn.is_mongos
if not sessions_supported:
if not self._always_retryable and not sessions_supported:
# A retry is not possible because this server does
# not support sessions raise the last error.
self._check_last_error()
@ -2921,7 +3040,7 @@ class _ClientConnectionRetryable(Generic[T]):
conn,
read_pref,
):
if self._retrying and not self._retryable:
if self._retrying and not self._retryable and not self._always_retryable:
self._check_last_error()
if self._retrying:
_debug_log(

View File

@ -19,6 +19,8 @@ import collections
import contextlib
import logging
import os
import socket
import ssl
import sys
import time
import weakref
@ -52,10 +54,12 @@ from pymongo.errors import ( # type:ignore[attr-defined]
DocumentTooLarge,
ExecutionTimeout,
InvalidOperation,
NetworkTimeout,
NotPrimaryError,
OperationFailure,
PyMongoError,
WaitQueueTimeoutError,
_CertificateError,
)
from pymongo.hello import Hello, HelloCompat
from pymongo.helpers_shared import _get_timeout_details, format_timeout_details
@ -250,6 +254,7 @@ class AsyncConnection:
cmd = self.hello_cmd()
performing_handshake = not self.performed_handshake
awaitable = False
cmd["backpressure"] = True
if performing_handshake:
self.performed_handshake = True
cmd["client"] = self.opts.metadata
@ -752,8 +757,8 @@ class Pool:
# Enforces: maxConnecting
# Also used for: clearing the wait queue
self._max_connecting_cond = _async_create_condition(self.lock)
self._max_connecting = self.opts.max_connecting
self._pending = 0
self._max_connecting = self.opts.max_connecting
self._client_id = client_id
if self.enabled_for_cmap:
assert self.opts._event_listeners is not None
@ -986,6 +991,21 @@ class Pool:
self.requests -= 1
self.size_cond.notify()
def _handle_connection_error(self, error: BaseException) -> None:
# Handle system overload condition for non-sdam pools.
# Look for errors of type AutoReconnect and add error labels if appropriate.
if self.is_sdam or type(error) not in (AutoReconnect, NetworkTimeout):
return
assert isinstance(error, AutoReconnect) # Appease type checker.
# If the original error was a DNS, certificate, or SSL error, ignore it.
if isinstance(error.__cause__, (_CertificateError, SSLErrors, socket.gaierror)):
# End of file errors are excluded, because the server may have disconnected
# during the handshake.
if not isinstance(error.__cause__, (ssl.SSLEOFError, ssl.SSLZeroReturnError)):
return
error._add_error_label("SystemOverloadedError")
error._add_error_label("RetryableError")
async def connect(self, handler: Optional[_MongoClientErrorHandler] = None) -> AsyncConnection:
"""Connect to Mongo and return a new AsyncConnection.
@ -1037,10 +1057,10 @@ class Pool:
reason=_verbose_connection_error_reason(ConnectionClosedReason.ERROR),
error=ConnectionClosedReason.ERROR,
)
self._handle_connection_error(error)
if isinstance(error, (IOError, OSError, *SSLErrors)):
details = _get_timeout_details(self.opts)
_raise_connection_failure(self.address, error, timeout_details=details)
raise
conn = AsyncConnection(networking_interface, self, self.address, conn_id, self.is_sdam) # type: ignore[arg-type]
@ -1049,18 +1069,22 @@ class Pool:
self.active_contexts.discard(tmp_context)
if tmp_context.cancelled:
conn.cancel_context.cancel()
completed_hello = False
try:
if not self.is_sdam:
await conn.hello()
completed_hello = True
self.is_writable = conn.is_writable
if handler:
handler.contribute_socket(conn, completed_handshake=False)
await conn.authenticate()
# Catch KeyboardInterrupt, CancelledError, etc. and cleanup.
except BaseException:
except BaseException as e:
async with self.lock:
self.active_contexts.discard(conn.cancel_context)
if not completed_hello:
self._handle_connection_error(e)
await conn.close_conn(ConnectionClosedReason.ERROR)
raise
@ -1389,8 +1413,8 @@ class Pool:
:class:`~pymongo.errors.AutoReconnect` exceptions on server
hiccups, etc. We only check if the socket was closed by an external
error if it has been > 1 second since the socket was checked into the
pool, to keep performance reasonable - we can't avoid AutoReconnects
completely anyway.
pool to keep performance reasonable -
we can't avoid AutoReconnects completely anyway.
"""
idle_time_seconds = conn.idle_time_seconds()
# If socket is idle, open a new one.
@ -1401,8 +1425,9 @@ class Pool:
await conn.close_conn(ConnectionClosedReason.IDLE)
return True
if self._check_interval_seconds is not None and (
self._check_interval_seconds == 0 or idle_time_seconds > self._check_interval_seconds
check_interval_seconds = self._check_interval_seconds
if check_interval_seconds is not None and (
check_interval_seconds == 0 or idle_time_seconds > check_interval_seconds
):
if conn.conn_closed():
await conn.close_conn(ConnectionClosedReason.ERROR)

View File

@ -913,7 +913,9 @@ class Topology:
# Clear the pool.
await server.reset(service_id)
elif isinstance(error, ConnectionFailure):
if isinstance(error, WaitQueueTimeoutError):
if isinstance(error, WaitQueueTimeoutError) or (
error.has_error_label("SystemOverloadedError")
):
return
# "Client MUST replace the server's description with type Unknown
# ... MUST NOT request an immediate check of the server."

View File

@ -235,6 +235,16 @@ class ClientOptions:
self.__server_monitoring_mode = options.get(
"servermonitoringmode", common.SERVER_MONITORING_MODE
)
self.__max_adaptive_retries = (
options.get("max_adaptive_retries", common.MAX_ADAPTIVE_RETRIES)
if "max_adaptive_retries" in options
else options.get("maxadaptiveretries", common.MAX_ADAPTIVE_RETRIES)
)
self.__enable_overload_retargeting = (
options.get("enable_overload_retargeting", common.ENABLE_OVERLOAD_RETARGETING)
if "enable_overload_retargeting" in options
else options.get("enableoverloadretargeting", common.ENABLE_OVERLOAD_RETARGETING)
)
@property
def _options(self) -> Mapping[str, Any]:
@ -346,3 +356,19 @@ class ClientOptions:
.. versionadded:: 4.5
"""
return self.__server_monitoring_mode
@property
def max_adaptive_retries(self) -> int:
"""The configured maxAdaptiveRetries option.
.. versionadded:: 4.17
"""
return self.__max_adaptive_retries
@property
def enable_overload_retargeting(self) -> bool:
"""The configured enableOverloadRetargeting option.
.. versionadded:: 4.17
"""
return self.__enable_overload_retargeting

View File

@ -140,6 +140,12 @@ SRV_SERVICE_NAME = "mongodb"
# Default value for serverMonitoringMode
SERVER_MONITORING_MODE = "auto" # poll/stream/auto
# Default value for max adaptive retries
MAX_ADAPTIVE_RETRIES = 2
# Default value for enableOverloadRetargeting
ENABLE_OVERLOAD_RETARGETING = False
# Auth mechanism properties that must raise an error instead of warning if they invalidate.
_MECH_PROP_MUST_RAISE = ["CANONICALIZE_HOST_NAME"]
@ -233,13 +239,6 @@ def validate_readable(option: str, value: Any) -> Optional[str]:
return value
def validate_positive_integer_or_none(option: str, value: Any) -> Optional[int]:
"""Validate that 'value' is a positive integer or None."""
if value is None:
return value
return validate_positive_integer(option, value)
def validate_non_negative_integer_or_none(option: str, value: Any) -> Optional[int]:
"""Validate that 'value' is a positive integer or 0 or None."""
if value is None:
@ -261,20 +260,6 @@ def validate_string_or_none(option: str, value: Any) -> Optional[str]:
return validate_string(option, value)
def validate_int_or_basestring(option: str, value: Any) -> Union[int, str]:
"""Validates that 'value' is an integer or string."""
if isinstance(value, int):
return value
elif isinstance(value, str):
try:
return int(value)
except ValueError:
return value
raise TypeError(
f"Wrong type for {option}, value must be an integer or a string, not {type(value)}"
)
def validate_non_negative_int_or_basestring(option: Any, value: Any) -> Union[int, str]:
"""Validates that 'value' is an integer or string."""
if isinstance(value, int):
@ -738,6 +723,8 @@ URI_OPTIONS_VALIDATOR_MAP: dict[str, Callable[[Any, Any], Any]] = {
"srvmaxhosts": validate_non_negative_integer,
"timeoutms": validate_timeoutms,
"servermonitoringmode": validate_server_monitoring_mode,
"maxadaptiveretries": validate_non_negative_integer,
"enableoverloadretargeting": validate_boolean_or_string,
}
# Dictionary where keys are the names of URI options specific to pymongo,
@ -771,6 +758,8 @@ KW_VALIDATORS: dict[str, Callable[[Any, Any], Any]] = {
"server_selector": validate_is_callable_or_none,
"auto_encryption_opts": validate_auto_encryption_opts_or_none,
"authoidcallowedhosts": validate_list,
"max_adaptive_retries": validate_non_negative_integer,
"enable_overload_retargeting": validate_boolean_or_string,
}
# Dictionary where keys are any URI option name, and values are the
@ -817,16 +806,6 @@ TIMEOUT_OPTIONS: list[str] = [
"waitqueuetimeoutms",
]
_AUTH_OPTIONS = frozenset(["authmechanismproperties"])
def validate_auth_option(option: str, value: Any) -> tuple[str, Any]:
"""Validate optional authentication parameters."""
lower, value = validate(option, value)
if lower not in _AUTH_OPTIONS:
raise ConfigurationError(f"Unknown option: {option}. Must be in {_AUTH_OPTIONS}")
return option, value
def _get_validator(
key: str, validators: dict[str, Callable[[Any, Any], Any]], normed_key: Optional[str] = None

View File

@ -59,6 +59,7 @@ from pymongo.errors import (
InvalidOperation,
NotPrimaryError,
OperationFailure,
PyMongoError,
WaitQueueTimeoutError,
)
from pymongo.helpers_shared import _RETRYABLE_ERROR_CODES
@ -561,9 +562,17 @@ class _ClientBulk:
error, ConnectionFailure
) and not isinstance(error, (NotPrimaryError, WaitQueueTimeoutError))
retryable_label_error = isinstance(
error, PyMongoError
) and error.has_error_label("RetryableError")
# Synthesize the full bulk result without modifying the
# current one because this write operation may be retried.
if retryable and (retryable_top_level_error or retryable_network_error):
if retryable and (
retryable_top_level_error
or retryable_network_error
or retryable_label_error
):
full = copy.deepcopy(full_result)
_merge_command(self.ops, self.idx_offset, full, result)
_throw_client_bulk_write_exception(full, self.verbose_results)

View File

@ -136,9 +136,11 @@ Classes
from __future__ import annotations
import collections
import random
import time
import uuid
from collections.abc import Mapping as _Mapping
from contextvars import ContextVar, Token
from typing import (
TYPE_CHECKING,
Any,
@ -159,7 +161,9 @@ from pymongo import _csot
from pymongo.errors import (
ConfigurationError,
ConnectionFailure,
ExecutionTimeout,
InvalidOperation,
NetworkTimeout,
OperationFailure,
PyMongoError,
WTimeoutError,
@ -180,6 +184,28 @@ if TYPE_CHECKING:
_IS_SYNC = True
_SESSION: ContextVar[Optional[ClientSession]] = ContextVar("SESSION", default=None)
class _BoundSessionContext:
"""Context manager returned by ClientSession.bind() that manages bound state."""
def __init__(self, session: ClientSession, end_session: bool) -> None:
self._session = session
self._session_token: Optional[Token[ClientSession]] = None
self._end_session = end_session
def __enter__(self) -> ClientSession:
self._session_token = _SESSION.set(self._session) # type: ignore[assignment]
return self._session
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
if self._session_token:
_SESSION.reset(self._session_token) # type: ignore[arg-type]
self._session_token = None
if self._end_session:
self._session.end_session()
class SessionOptions:
"""Options for a new :class:`ClientSession`.
@ -403,6 +429,7 @@ class _Transaction:
self.recovery_token = None
self.attempt = 0
self.client = client
self.has_completed_command = False
def active(self) -> bool:
return self.state in (_TxnState.STARTING, _TxnState.IN_PROGRESS)
@ -410,6 +437,9 @@ class _Transaction:
def starting(self) -> bool:
return self.state == _TxnState.STARTING
def set_starting(self) -> None:
self.state = _TxnState.STARTING
@property
def pinned_conn(self) -> Optional[Connection]:
if self.active() and self.conn_mgr:
@ -435,6 +465,7 @@ class _Transaction:
self.sharded = False
self.recovery_token = None
self.attempt = 0
self.has_completed_command = False
def __del__(self) -> None:
if self.conn_mgr:
@ -469,11 +500,29 @@ _UNKNOWN_COMMIT_ERROR_CODES: frozenset = _RETRYABLE_ERROR_CODES | frozenset( #
# This limit is non-configurable and was chosen to be twice the 60 second
# default value of MongoDB's `transactionLifetimeLimitSeconds` parameter.
_WITH_TRANSACTION_RETRY_TIME_LIMIT = 120
_BACKOFF_MAX = 0.500 # 500ms max backoff
_BACKOFF_INITIAL = 0.005 # 5ms initial backoff
def _within_time_limit(start_time: float) -> bool:
def _within_time_limit(start_time: float, backoff: float = 0) -> bool:
"""Are we within the with_transaction retry limit?"""
return time.monotonic() - start_time < _WITH_TRANSACTION_RETRY_TIME_LIMIT
remaining = _csot.remaining()
if remaining is not None and remaining <= 0:
return False
return time.monotonic() + backoff - start_time < _WITH_TRANSACTION_RETRY_TIME_LIMIT
def _make_timeout_error(error: BaseException) -> PyMongoError:
"""Convert error to a NetworkTimeout or ExecutionTimeout as appropriate."""
if _csot.remaining() is not None:
timeout_error: PyMongoError = ExecutionTimeout(
str(error), 50, {"ok": 0, "errmsg": str(error), "code": 50}
)
else:
timeout_error = NetworkTimeout(str(error))
if isinstance(error, PyMongoError):
timeout_error._error_labels = error._error_labels.copy()
return timeout_error
_T = TypeVar("_T")
@ -546,6 +595,24 @@ class ClientSession:
if self._server_session is None:
raise InvalidOperation("Cannot use ended session")
def bind(self, end_session: bool = True) -> _BoundSessionContext:
"""Bind this session so it is implicitly passed to all database operations within the returned context.
.. code-block:: python
with client.start_session() as s:
with s.bind():
# session=s is passed implicitly
client.db.collection.insert_one({"x": 1})
:param end_session: Whether to end the session on exiting the returned context. Defaults to True.
If set to False, :meth:`~pymongo.client_session.ClientSession.end_session()` must be called
once the session is no longer used.
.. versionadded:: 4.17
"""
return _BoundSessionContext(self, end_session)
def __enter__(self) -> ClientSession:
return self
@ -702,21 +769,32 @@ class ClientSession:
https://github.com/mongodb/specifications/blob/master/source/transactions-convenient-api/transactions-convenient-api.md#handling-errors-inside-the-callback
"""
start_time = time.monotonic()
retry = 0
last_error: Optional[BaseException] = None
while True:
if retry: # Implement exponential backoff on retry.
jitter = random.random() # noqa: S311
backoff = jitter * min(_BACKOFF_INITIAL * (1.5**retry), _BACKOFF_MAX)
if not _within_time_limit(start_time, backoff):
assert last_error is not None
raise _make_timeout_error(last_error) from last_error
time.sleep(backoff)
retry += 1
self.start_transaction(read_concern, write_concern, read_preference, max_commit_time_ms)
try:
ret = callback(self)
# Catch KeyboardInterrupt, CancelledError, etc. and cleanup.
except BaseException as exc:
last_error = exc
if self.in_transaction:
self.abort_transaction()
if (
isinstance(exc, PyMongoError)
and exc.has_error_label("TransientTransactionError")
and _within_time_limit(start_time)
if isinstance(exc, PyMongoError) and exc.has_error_label(
"TransientTransactionError"
):
# Retry the entire transaction.
continue
if _within_time_limit(start_time):
# Retry the entire transaction.
continue
raise _make_timeout_error(last_error) from exc
raise
if not self.in_transaction:
@ -727,17 +805,18 @@ class ClientSession:
try:
self.commit_transaction()
except PyMongoError as exc:
if (
exc.has_error_label("UnknownTransactionCommitResult")
and _within_time_limit(start_time)
and not _max_time_expired_error(exc)
):
last_error = exc
if exc.has_error_label(
"UnknownTransactionCommitResult"
) and not _max_time_expired_error(exc):
if not _within_time_limit(start_time):
raise _make_timeout_error(last_error) from exc
# Retry the commit.
continue
if exc.has_error_label("TransientTransactionError") and _within_time_limit(
start_time
):
if exc.has_error_label("TransientTransactionError"):
if not _within_time_limit(start_time):
raise _make_timeout_error(last_error) from exc
# Retry the entire transaction.
break
raise
@ -1018,7 +1097,11 @@ class ClientSession:
read_preference: _ServerMode,
conn: Connection,
) -> None:
if not conn.supports_sessions:
# getMores must be sent with a session if the cursor was opened with one
operation = next(iter(command))
if not conn.supports_sessions and (
isinstance(self._server_session, _EmptyServerSession) or operation != "getMore"
):
if not self._implicit:
raise ConfigurationError("Sessions are not supported by this MongoDB deployment")
return

View File

@ -21,7 +21,6 @@ from typing import (
TYPE_CHECKING,
Any,
Callable,
ContextManager,
Generic,
Iterable,
Iterator,
@ -572,11 +571,6 @@ class Collection(common.BaseObject, Generic[_DocumentType]):
change_stream._initialize_cursor()
return change_stream
def _conn_for_writes(
self, session: Optional[ClientSession], operation: str
) -> ContextManager[Connection]:
return self._database.client._conn_for_writes(session, operation)
def _command(
self,
conn: Connection,
@ -653,7 +647,10 @@ class Collection(common.BaseObject, Generic[_DocumentType]):
if "size" in options:
options["size"] = float(options["size"])
cmd.update(options)
with self._conn_for_writes(session, operation=_Op.CREATE) as conn:
def inner(
session: Optional[ClientSession], conn: Connection, _retryable_write: bool
) -> None:
if qev2_required and conn.max_wire_version < 21:
raise ConfigurationError(
"Driver support of Queryable Encryption is incompatible with server. "
@ -670,6 +667,8 @@ class Collection(common.BaseObject, Generic[_DocumentType]):
session=session,
)
self.database.client._retryable_write(False, inner, session, _Op.CREATE)
def _create(
self,
options: MutableMapping[str, Any],
@ -2237,7 +2236,10 @@ class Collection(common.BaseObject, Generic[_DocumentType]):
command (like maxTimeMS) can be passed as keyword arguments.
"""
names = []
with self._conn_for_writes(session, operation=_Op.CREATE_INDEXES) as conn:
def inner(
session: Optional[ClientSession], conn: Connection, _retryable_write: bool
) -> list[str]:
supports_quorum = conn.max_wire_version >= 9
def gen_indexes() -> Iterator[Mapping[str, Any]]:
@ -2266,7 +2268,9 @@ class Collection(common.BaseObject, Generic[_DocumentType]):
write_concern=self._write_concern_for(session),
session=session,
)
return names
return names
return self.database.client._retryable_write(False, inner, session, _Op.CREATE_INDEXES)
def create_index(
self,
@ -2419,7 +2423,6 @@ class Collection(common.BaseObject, Generic[_DocumentType]):
kwargs["comment"] = comment
self._drop_index("*", session=session, **kwargs)
@_csot.apply
def drop_index(
self,
index_or_name: _IndexKeyHint,
@ -2487,7 +2490,10 @@ class Collection(common.BaseObject, Generic[_DocumentType]):
cmd.update(kwargs)
if comment is not None:
cmd["comment"] = comment
with self._conn_for_writes(session, operation=_Op.DROP_INDEXES) as conn:
def inner(
session: Optional[ClientSession], conn: Connection, _retryable_write: bool
) -> None:
self._command(
conn,
cmd,
@ -2497,6 +2503,8 @@ class Collection(common.BaseObject, Generic[_DocumentType]):
session=session,
)
self.database.client._retryable_write(False, inner, session, _Op.DROP_INDEXES)
def list_indexes(
self,
session: Optional[ClientSession] = None,
@ -2760,15 +2768,22 @@ class Collection(common.BaseObject, Generic[_DocumentType]):
cmd = {"createSearchIndexes": self.name, "indexes": list(gen_indexes())}
cmd.update(kwargs)
with self._conn_for_writes(session, operation=_Op.CREATE_SEARCH_INDEXES) as conn:
def inner(
session: Optional[ClientSession], conn: Connection, _retryable_write: bool
) -> list[str]:
resp = self._command(
conn,
cmd,
read_preference=ReadPreference.PRIMARY,
codec_options=_UNICODE_REPLACE_CODEC_OPTIONS,
session=session,
)
return [index["name"] for index in resp["indexesCreated"]]
return self.database.client._retryable_write(
False, inner, session, _Op.CREATE_SEARCH_INDEXES
)
def drop_search_index(
self,
name: str,
@ -2794,15 +2809,21 @@ class Collection(common.BaseObject, Generic[_DocumentType]):
cmd.update(kwargs)
if comment is not None:
cmd["comment"] = comment
with self._conn_for_writes(session, operation=_Op.DROP_SEARCH_INDEXES) as conn:
def inner(
session: Optional[ClientSession], conn: Connection, _retryable_write: bool
) -> None:
self._command(
conn,
cmd,
read_preference=ReadPreference.PRIMARY,
allowable_errors=["ns not found", 26],
codec_options=_UNICODE_REPLACE_CODEC_OPTIONS,
session=session,
)
self.database.client._retryable_write(False, inner, session, _Op.DROP_SEARCH_INDEXES)
def update_search_index(
self,
name: str,
@ -2830,15 +2851,21 @@ class Collection(common.BaseObject, Generic[_DocumentType]):
cmd.update(kwargs)
if comment is not None:
cmd["comment"] = comment
with self._conn_for_writes(session, operation=_Op.UPDATE_SEARCH_INDEX) as conn:
def inner(
session: Optional[ClientSession], conn: Connection, _retryable_write: bool
) -> None:
self._command(
conn,
cmd,
read_preference=ReadPreference.PRIMARY,
allowable_errors=["ns not found", 26],
codec_options=_UNICODE_REPLACE_CODEC_OPTIONS,
session=session,
)
self.database.client._retryable_write(False, inner, session, _Op.UPDATE_SEARCH_INDEX)
def options(
self,
session: Optional[ClientSession] = None,
@ -2911,6 +2938,7 @@ class Collection(common.BaseObject, Generic[_DocumentType]):
session,
retryable=not cmd._performs_write,
operation=_Op.AGGREGATE,
is_aggregate_write=cmd._performs_write,
)
def aggregate(
@ -3116,17 +3144,21 @@ class Collection(common.BaseObject, Generic[_DocumentType]):
if comment is not None:
cmd["comment"] = comment
write_concern = self._write_concern_for_cmd(cmd, session)
client = self._database.client
with self._conn_for_writes(session, operation=_Op.RENAME) as conn:
with self._database.client._tmp_session(session) as s:
return conn.command(
"admin",
cmd,
write_concern=write_concern,
parse_write_concern_error=True,
session=s,
client=self._database.client,
)
def inner(
session: Optional[ClientSession], conn: Connection, _retryable_write: bool
) -> MutableMapping[str, Any]:
return conn.command(
"admin",
cmd,
write_concern=write_concern,
parse_write_concern_error=True,
session=session,
client=client,
)
return client._retryable_write(False, inner, session, _Op.RENAME)
def distinct(
self,

View File

@ -931,12 +931,15 @@ class Database(common.BaseObject, Generic[_DocumentType]):
if read_preference is None:
read_preference = (session and session._txn_read_preference()) or ReadPreference.PRIMARY
with self._client._conn_for_reads(read_preference, session, operation=command_name) as (
connection,
read_preference,
):
def inner(
session: Optional[ClientSession],
_server: Server,
conn: Connection,
read_preference: _ServerMode,
) -> Union[dict[str, Any], _CodecDocumentType]:
return self._command(
connection,
conn,
command,
value,
check,
@ -947,6 +950,10 @@ class Database(common.BaseObject, Generic[_DocumentType]):
**kwargs,
)
return self._client._retryable_read(
inner, read_preference, session, command_name, None, False, is_run_command=True
)
@_csot.apply
def cursor_command(
self,
@ -1014,15 +1021,17 @@ class Database(common.BaseObject, Generic[_DocumentType]):
with self._client._tmp_session(session) as tmp_session:
opts = codec_options or DEFAULT_CODEC_OPTIONS
if read_preference is None:
read_preference = (
tmp_session and tmp_session._txn_read_preference()
) or ReadPreference.PRIMARY
with self._client._conn_for_reads(read_preference, tmp_session, command_name) as (
conn,
read_preference,
):
def inner(
session: Optional[ClientSession],
_server: Server,
conn: Connection,
read_preference: _ServerMode,
) -> CommandCursor[_DocumentType]:
response = self._command(
conn,
command,
@ -1031,7 +1040,7 @@ class Database(common.BaseObject, Generic[_DocumentType]):
None,
read_preference,
opts,
session=tmp_session,
session=session,
**kwargs,
)
coll = self.get_collection("$cmd", read_preference=read_preference)
@ -1041,7 +1050,7 @@ class Database(common.BaseObject, Generic[_DocumentType]):
response["cursor"],
conn.address,
max_await_time_ms=max_await_time_ms,
session=tmp_session,
session=session,
comment=comment,
)
cmd_cursor._maybe_pin_connection(conn)
@ -1049,6 +1058,10 @@ class Database(common.BaseObject, Generic[_DocumentType]):
else:
raise InvalidOperation("Command does not return a cursor.")
return self.client._retryable_read(
inner, read_preference, tmp_session, command_name, None, False
)
def _retryable_read_command(
self,
command: Union[str, MutableMapping[str, Any]],
@ -1247,9 +1260,11 @@ class Database(common.BaseObject, Generic[_DocumentType]):
if comment is not None:
command["comment"] = comment
with self._client._conn_for_writes(session, operation=_Op.DROP) as connection:
def inner(
session: Optional[ClientSession], conn: Connection, _retryable_write: bool
) -> dict[str, Any]:
return self._command(
connection,
conn,
command,
allowable_errors=["ns not found", 26],
write_concern=self._write_concern_for(session),
@ -1257,6 +1272,8 @@ class Database(common.BaseObject, Generic[_DocumentType]):
session=session,
)
return self.client._retryable_write(False, inner, session, _Op.DROP)
@_csot.apply
def drop_collection(
self,

View File

@ -17,8 +17,11 @@ from __future__ import annotations
import asyncio
import builtins
import functools
import random
import socket
import sys
import time as time # noqa: PLC0414 # needed in sync version
from typing import (
Any,
Callable,
@ -26,6 +29,8 @@ from typing import (
cast,
)
from pymongo import _csot
from pymongo.common import MAX_ADAPTIVE_RETRIES
from pymongo.errors import (
OperationFailure,
)
@ -38,6 +43,7 @@ F = TypeVar("F", bound=Callable[..., Any])
def _handle_reauth(func: F) -> F:
@functools.wraps(func)
def inner(*args: Any, **kwargs: Any) -> Any:
no_reauth = kwargs.pop("no_reauth", False)
from pymongo.message import _BulkWriteContext
@ -70,6 +76,46 @@ def _handle_reauth(func: F) -> F:
return cast(F, inner)
_BACKOFF_INITIAL = 0.1
_BACKOFF_MAX = 10
def _backoff(
attempt: int, initial_delay: float = _BACKOFF_INITIAL, max_delay: float = _BACKOFF_MAX
) -> float:
jitter = random.random() # noqa: S311
return jitter * min(initial_delay * (2**attempt), max_delay)
class _RetryPolicy:
"""A retry limiter that performs exponential backoff with jitter."""
def __init__(
self,
attempts: int = MAX_ADAPTIVE_RETRIES,
backoff_initial: float = _BACKOFF_INITIAL,
backoff_max: float = _BACKOFF_MAX,
):
self.attempts = attempts
self.backoff_initial = backoff_initial
self.backoff_max = backoff_max
def backoff(self, attempt: int) -> float:
"""Return the backoff duration for the given attempt."""
return _backoff(max(0, attempt - 1), self.backoff_initial, self.backoff_max)
def should_retry(self, attempt: int, delay: float) -> bool:
"""Return if we have retry attempts remaining and the next backoff would not exceed a timeout."""
if attempt > self.attempts:
return False
if _csot.get_timeout():
if time.monotonic() + delay > _csot.get_deadline():
return False
return True
def _getaddrinfo(
host: Any, port: Any, **kwargs: Any
) -> list[

View File

@ -35,6 +35,7 @@ from __future__ import annotations
import asyncio
import contextlib
import os
import time as time # noqa: PLC0414 # needed in sync version
import warnings
import weakref
from collections import defaultdict
@ -108,8 +109,11 @@ from pymongo.server_type import SERVER_TYPE
from pymongo.synchronous import client_session, database, uri_parser
from pymongo.synchronous.change_stream import ChangeStream, ClusterChangeStream
from pymongo.synchronous.client_bulk import _ClientBulk
from pymongo.synchronous.client_session import _EmptyServerSession
from pymongo.synchronous.client_session import _SESSION, _EmptyServerSession
from pymongo.synchronous.command_cursor import CommandCursor
from pymongo.synchronous.helpers import (
_RetryPolicy,
)
from pymongo.synchronous.settings import TopologySettings
from pymongo.synchronous.topology import Topology, _ErrorContext
from pymongo.topology_description import TOPOLOGY_TYPE, TopologyDescription
@ -610,8 +614,18 @@ class MongoClient(common.BaseObject, Generic[_DocumentType]):
client to use Stable API. See `versioned API <https://www.mongodb.com/docs/manual/reference/stable-api/#what-is-the-stable-api--and-should-you-use-it->`_ for
details.
| **Overload retry options:**
- `max_adaptive_retries`: (int) How many retries to allow for overload errors. Defaults to ``2``.
- `enable_overload_retargeting`: (boolean) Whether overload retargeting is enabled for this client.
If enabled, server overload errors will cause retry attempts to select a server that has not yet returned an overload error, if possible.
Defaults to ``False``.
.. seealso:: The MongoDB documentation on `connections <https://dochub.mongodb.org/core/connections>`_.
.. versionchanged:: 4.17
Added the ``max_adaptive_retries`` and ``enable_overload_retargeting`` URI and keyword arguments.
.. versionchanged:: 4.5
Added the ``serverMonitoringMode`` keyword argument.
@ -879,11 +893,14 @@ class MongoClient(common.BaseObject, Generic[_DocumentType]):
self._options.read_concern,
)
self._retry_policy = _RetryPolicy(attempts=self._options.max_adaptive_retries)
self._init_based_on_options(self._seeds, srv_max_hosts, srv_service_name)
self._opened = False
self._closed = False
self._loop: Optional[asyncio.AbstractEventLoop] = None
if not is_srv:
self._init_background()
@ -1406,7 +1423,8 @@ class MongoClient(common.BaseObject, Generic[_DocumentType]):
)
def _ensure_session(self, session: Optional[ClientSession] = None) -> Optional[ClientSession]:
"""If provided session is None, lend a temporary session."""
"""If provided session and bound session are None, lend a temporary session."""
session = session or self._get_bound_session()
if session:
return session
@ -1986,6 +2004,8 @@ class MongoClient(common.BaseObject, Generic[_DocumentType]):
read_pref: Optional[_ServerMode] = None,
retryable: bool = False,
operation_id: Optional[int] = None,
is_run_command: bool = False,
is_aggregate_write: bool = False,
) -> T:
"""Internal retryable helper for all client transactions.
@ -1997,6 +2017,8 @@ class MongoClient(common.BaseObject, Generic[_DocumentType]):
:param address: Server Address, defaults to None
:param read_pref: Topology of read operation, defaults to None
:param retryable: If the operation should be retried once, defaults to None
:param is_run_command: If this is a runCommand operation, defaults to False
:param is_aggregate_write: If this is a aggregate operation with a write, defaults to False.
:return: Output of the calling func()
"""
@ -2011,6 +2033,8 @@ class MongoClient(common.BaseObject, Generic[_DocumentType]):
address=address,
retryable=retryable,
operation_id=operation_id,
is_run_command=is_run_command,
is_aggregate_write=is_aggregate_write,
).run()
def _retryable_read(
@ -2022,6 +2046,8 @@ class MongoClient(common.BaseObject, Generic[_DocumentType]):
address: Optional[_Address] = None,
retryable: bool = True,
operation_id: Optional[int] = None,
is_run_command: bool = False,
is_aggregate_write: bool = False,
) -> T:
"""Execute an operation with consecutive retries if possible
@ -2037,6 +2063,8 @@ class MongoClient(common.BaseObject, Generic[_DocumentType]):
:param address: Optional address when sending a message, defaults to None
:param retryable: if we should attempt retries
(may not always be supported even if supplied), defaults to False
:param is_run_command: If this is a runCommand operation, defaults to False.
:param is_aggregate_write: If this is a aggregate operation with a write, defaults to False.
"""
# Ensure that the client supports retrying on reads and there is no session in
@ -2055,6 +2083,8 @@ class MongoClient(common.BaseObject, Generic[_DocumentType]):
read_pref=read_pref,
retryable=retryable,
operation_id=operation_id,
is_run_command=is_run_command,
is_aggregate_write=is_aggregate_write,
)
def _retryable_write(
@ -2263,11 +2293,14 @@ class MongoClient(common.BaseObject, Generic[_DocumentType]):
self, session: Optional[client_session.ClientSession]
) -> Generator[Optional[client_session.ClientSession], None]:
"""If provided session is None, lend a temporary session."""
if session is not None:
if not isinstance(session, client_session.ClientSession):
raise ValueError(
f"'session' argument must be a ClientSession or None, not {type(session)}"
)
if session is not None and not isinstance(session, client_session.ClientSession):
raise ValueError(
f"'session' argument must be a ClientSession or None, not {type(session)}"
)
# Check for a bound session. If one exists, treat it as an explicitly passed session.
session = session or self._get_bound_session()
if session:
# Don't call end_session.
yield session
return
@ -2295,6 +2328,18 @@ class MongoClient(common.BaseObject, Generic[_DocumentType]):
if session is not None:
session._process_response(reply)
def _get_bound_session(self) -> Optional[ClientSession]:
bound_session = _SESSION.get()
if bound_session:
if bound_session.client is self:
return bound_session
else:
raise InvalidOperation(
"Only the client that created the bound session can perform operations within its context block. See <PLACEHOLDER> for more information."
)
else:
return None
def server_info(self, session: Optional[client_session.ClientSession] = None) -> dict[str, Any]:
"""Get information about the MongoDB server we're connected to.
@ -2428,15 +2473,13 @@ class MongoClient(common.BaseObject, Generic[_DocumentType]):
f"name_or_database must be an instance of str or a Database, not {type(name)}"
)
with self._conn_for_writes(session, operation=_Op.DROP_DATABASE) as conn:
self[name]._command(
conn,
{"dropDatabase": 1, "comment": comment},
read_preference=ReadPreference.PRIMARY,
write_concern=self._write_concern_for(session),
parse_write_concern_error=True,
session=session,
)
self[name].command(
{"dropDatabase": 1, "comment": comment},
read_preference=ReadPreference.PRIMARY,
write_concern=self._write_concern_for(session),
parse_write_concern_error=True,
session=session,
)
@_csot.apply
def bulk_write(
@ -2720,12 +2763,15 @@ class _ClientConnectionRetryable(Generic[T]):
address: Optional[_Address] = None,
retryable: bool = False,
operation_id: Optional[int] = None,
is_run_command: bool = False,
is_aggregate_write: bool = False,
):
self._last_error: Optional[Exception] = None
self._retrying = False
self._multiple_retries = _csot.get_timeout() is not None
self._always_retryable = False
self._max_retries = float("inf") if _csot.get_timeout() is not None else 1
self._client = mongo_client
self._retry_policy = mongo_client._retry_policy
self._func = func
self._bulk = bulk
self._session = session
@ -2741,6 +2787,8 @@ class _ClientConnectionRetryable(Generic[T]):
self._operation = operation
self._operation_id = operation_id
self._attempt_number = 0
self._is_run_command = is_run_command
self._is_aggregate_write = is_aggregate_write
def run(self) -> T:
"""Runs the supplied func() and attempts a retry
@ -2760,7 +2808,13 @@ class _ClientConnectionRetryable(Generic[T]):
while True:
self._check_last_error(check_csot=True)
try:
return self._read() if self._is_read else self._write()
res = self._read() if self._is_read else self._write()
# Track whether the transaction has completed a command.
# If we need to apply backpressure to the first command,
# we will need to revert back to starting state.
if self._session is not None and self._session.in_transaction:
self._session._transaction.has_completed_command = True
return res
except ServerSelectionTimeoutError:
# The application may think the write was never attempted
# if we raise ServerSelectionTimeoutError on the retry
@ -2771,37 +2825,80 @@ class _ClientConnectionRetryable(Generic[T]):
# most likely be a waste of time.
raise
except PyMongoError as exc:
always_retryable = False
overloaded = False
exc_to_check = exc
if self._is_run_command and not (
self._client.options.retry_reads and self._client.options.retry_writes
):
raise
if self._is_aggregate_write and not self._client.options.retry_writes:
raise
# Execute specialized catch on read
if self._is_read:
if isinstance(exc, (ConnectionFailure, OperationFailure)):
# ConnectionFailures do not supply a code property
exc_code = getattr(exc, "code", None)
if self._is_not_eligible_for_retry() or (
isinstance(exc, OperationFailure)
and exc_code not in helpers_shared._RETRYABLE_ERROR_CODES
overloaded = exc.has_error_label("SystemOverloadedError")
if overloaded:
self._max_retries = self._client.options.max_adaptive_retries
always_retryable = exc.has_error_label("RetryableError") and overloaded
if not self._client.options.retry_reads or (
not always_retryable
and (
self._is_not_eligible_for_retry()
or (
isinstance(exc, OperationFailure)
and exc_code not in helpers_shared._RETRYABLE_ERROR_CODES
)
)
):
raise
self._retrying = True
self._last_error = exc
self._attempt_number += 1
# Revert back to starting state if we're in a transaction but haven't completed the first
# command.
if (
overloaded
and self._session is not None
and self._session.in_transaction
):
transaction = self._session._transaction
if not transaction.has_completed_command:
transaction.set_starting()
transaction.attempt = 0
else:
raise
# Specialized catch on write operation
if not self._is_read:
if not self._retryable:
if isinstance(exc, ClientBulkWriteException) and isinstance(
exc.error, PyMongoError
):
exc_to_check = exc.error
retryable_write_label = exc_to_check.has_error_label("RetryableWriteError")
overloaded = exc_to_check.has_error_label("SystemOverloadedError")
if overloaded:
self._max_retries = self._client.options.max_adaptive_retries
always_retryable = exc_to_check.has_error_label("RetryableError") and overloaded
# Always retry abortTransaction and commitTransaction up to once
if self._operation not in ["abortTransaction", "commitTransaction"] and (
not self._client.options.retry_writes
or not (self._retryable or always_retryable)
):
raise
if isinstance(exc, ClientBulkWriteException) and exc.error:
retryable_write_error_exc = isinstance(
exc.error, PyMongoError
) and exc.error.has_error_label("RetryableWriteError")
else:
retryable_write_error_exc = exc.has_error_label("RetryableWriteError")
if retryable_write_error_exc:
if retryable_write_label or always_retryable:
assert self._session
self._session._unpin()
if not retryable_write_error_exc or self._is_not_eligible_for_retry():
if exc.has_error_label("NoWritesPerformed") and self._last_error:
if not always_retryable and (
not retryable_write_label or self._is_not_eligible_for_retry()
):
if exc_to_check.has_error_label("NoWritesPerformed") and self._last_error:
raise self._last_error from exc
else:
raise
@ -2810,17 +2907,39 @@ class _ClientConnectionRetryable(Generic[T]):
self._bulk.retrying = True
else:
self._retrying = True
if not exc.has_error_label("NoWritesPerformed"):
if not exc_to_check.has_error_label("NoWritesPerformed"):
self._last_error = exc
if self._last_error is None:
self._last_error = exc
# Revert back to starting state if we're in a transaction but haven't completed the first
# command.
if overloaded and self._session is not None and self._session.in_transaction:
transaction = self._session._transaction
if not transaction.has_completed_command:
transaction.set_starting()
transaction.attempt = 0
if self._server is not None:
if self._server is not None and (
self._client.topology_description.topology_type_name == "Sharded"
or (overloaded and self._client.options.enable_overload_retargeting)
):
self._deprioritized_servers.append(self._server)
self._always_retryable = always_retryable
if overloaded:
delay = self._retry_policy.backoff(self._attempt_number)
if not self._retry_policy.should_retry(self._attempt_number, delay):
if exc_to_check.has_error_label("NoWritesPerformed") and self._last_error:
raise self._last_error from exc
else:
raise
time.sleep(delay)
def _is_not_eligible_for_retry(self) -> bool:
"""Checks if the exchange is not eligible for retry"""
return not self._retryable or (self._is_retrying() and not self._multiple_retries)
return not self._retryable or (
self._is_retrying() and self._attempt_number >= self._max_retries
)
def _is_retrying(self) -> bool:
"""Checks if the exchange is currently undergoing a retry"""
@ -2879,7 +2998,7 @@ class _ClientConnectionRetryable(Generic[T]):
and conn.supports_sessions
)
is_mongos = conn.is_mongos
if not sessions_supported:
if not self._always_retryable and not sessions_supported:
# A retry is not possible because this server does
# not support sessions raise the last error.
self._check_last_error()
@ -2911,7 +3030,7 @@ class _ClientConnectionRetryable(Generic[T]):
conn,
read_pref,
):
if self._retrying and not self._retryable:
if self._retrying and not self._retryable and not self._always_retryable:
self._check_last_error()
if self._retrying:
_debug_log(

View File

@ -19,6 +19,8 @@ import collections
import contextlib
import logging
import os
import socket
import ssl
import sys
import time
import weakref
@ -49,10 +51,12 @@ from pymongo.errors import ( # type:ignore[attr-defined]
DocumentTooLarge,
ExecutionTimeout,
InvalidOperation,
NetworkTimeout,
NotPrimaryError,
OperationFailure,
PyMongoError,
WaitQueueTimeoutError,
_CertificateError,
)
from pymongo.hello import Hello, HelloCompat
from pymongo.helpers_shared import _get_timeout_details, format_timeout_details
@ -250,6 +254,7 @@ class Connection:
cmd = self.hello_cmd()
performing_handshake = not self.performed_handshake
awaitable = False
cmd["backpressure"] = True
if performing_handshake:
self.performed_handshake = True
cmd["client"] = self.opts.metadata
@ -750,8 +755,8 @@ class Pool:
# Enforces: maxConnecting
# Also used for: clearing the wait queue
self._max_connecting_cond = _create_condition(self.lock)
self._max_connecting = self.opts.max_connecting
self._pending = 0
self._max_connecting = self.opts.max_connecting
self._client_id = client_id
if self.enabled_for_cmap:
assert self.opts._event_listeners is not None
@ -982,6 +987,21 @@ class Pool:
self.requests -= 1
self.size_cond.notify()
def _handle_connection_error(self, error: BaseException) -> None:
# Handle system overload condition for non-sdam pools.
# Look for errors of type AutoReconnect and add error labels if appropriate.
if self.is_sdam or type(error) not in (AutoReconnect, NetworkTimeout):
return
assert isinstance(error, AutoReconnect) # Appease type checker.
# If the original error was a DNS, certificate, or SSL error, ignore it.
if isinstance(error.__cause__, (_CertificateError, SSLErrors, socket.gaierror)):
# End of file errors are excluded, because the server may have disconnected
# during the handshake.
if not isinstance(error.__cause__, (ssl.SSLEOFError, ssl.SSLZeroReturnError)):
return
error._add_error_label("SystemOverloadedError")
error._add_error_label("RetryableError")
def connect(self, handler: Optional[_MongoClientErrorHandler] = None) -> Connection:
"""Connect to Mongo and return a new Connection.
@ -1033,10 +1053,10 @@ class Pool:
reason=_verbose_connection_error_reason(ConnectionClosedReason.ERROR),
error=ConnectionClosedReason.ERROR,
)
self._handle_connection_error(error)
if isinstance(error, (IOError, OSError, *SSLErrors)):
details = _get_timeout_details(self.opts)
_raise_connection_failure(self.address, error, timeout_details=details)
raise
conn = Connection(networking_interface, self, self.address, conn_id, self.is_sdam) # type: ignore[arg-type]
@ -1045,18 +1065,22 @@ class Pool:
self.active_contexts.discard(tmp_context)
if tmp_context.cancelled:
conn.cancel_context.cancel()
completed_hello = False
try:
if not self.is_sdam:
conn.hello()
completed_hello = True
self.is_writable = conn.is_writable
if handler:
handler.contribute_socket(conn, completed_handshake=False)
conn.authenticate()
# Catch KeyboardInterrupt, CancelledError, etc. and cleanup.
except BaseException:
except BaseException as e:
with self.lock:
self.active_contexts.discard(conn.cancel_context)
if not completed_hello:
self._handle_connection_error(e)
conn.close_conn(ConnectionClosedReason.ERROR)
raise
@ -1385,8 +1409,8 @@ class Pool:
:class:`~pymongo.errors.AutoReconnect` exceptions on server
hiccups, etc. We only check if the socket was closed by an external
error if it has been > 1 second since the socket was checked into the
pool, to keep performance reasonable - we can't avoid AutoReconnects
completely anyway.
pool to keep performance reasonable -
we can't avoid AutoReconnects completely anyway.
"""
idle_time_seconds = conn.idle_time_seconds()
# If socket is idle, open a new one.
@ -1397,8 +1421,9 @@ class Pool:
conn.close_conn(ConnectionClosedReason.IDLE)
return True
if self._check_interval_seconds is not None and (
self._check_interval_seconds == 0 or idle_time_seconds > self._check_interval_seconds
check_interval_seconds = self._check_interval_seconds
if check_interval_seconds is not None and (
check_interval_seconds == 0 or idle_time_seconds > check_interval_seconds
):
if conn.conn_closed():
conn.close_conn(ConnectionClosedReason.ERROR)

View File

@ -911,7 +911,9 @@ class Topology:
# Clear the pool.
server.reset(service_id)
elif isinstance(error, ConnectionFailure):
if isinstance(error, WaitQueueTimeoutError):
if isinstance(error, WaitQueueTimeoutError) or (
error.has_error_label("SystemOverloadedError")
):
return
# "Client MUST replace the server's description with type Unknown
# ... MUST NOT request an immediate check of the server."

View File

@ -51,7 +51,6 @@ dev = []
pip = ["pip>=20.2"]
gevent = ["gevent>=21.12"]
coverage = [
"pytest-cov>=4.0.0",
"coverage[toml]>=5,<=7.10.7"
]
mockupdb = [
@ -133,6 +132,7 @@ markers = [
"mockupdb: tests that rely on mockupdb",
"default: default test suite",
"default_async: default async test suite",
"test_bson: bson module tests",
]
[tool.mypy]

View File

@ -84,6 +84,22 @@ from test.version import Version
_IS_SYNC = True
# Skip tests when using Rust BSON extension for features not yet implemented
# Import pytest lazily to avoid requiring it for integration tests
try:
import pytest
import bson
skip_if_rust_bson = pytest.mark.skipif(
bson.get_bson_implementation() == "rust",
reason="Feature not yet implemented in Rust BSON extension",
)
except ImportError:
# pytest not available, define a no-op decorator
def skip_if_rust_bson(func):
return func
def _connection_string(h):
if h.startswith(("mongodb://", "mongodb+srv://")):

View File

@ -84,6 +84,22 @@ from test.version import Version
_IS_SYNC = False
# Skip tests when using Rust BSON extension for features not yet implemented
# Import pytest lazily to avoid requiring it for integration tests
try:
import pytest
import bson
skip_if_rust_bson = pytest.mark.skipif(
bson.get_bson_implementation() == "rust",
reason="Feature not yet implemented in Rust BSON extension",
)
except ImportError:
# pytest not available, define a no-op decorator
def skip_if_rust_bson(func):
return func
def _connection_string(h):
if h.startswith(("mongodb://", "mongodb+srv://")):

View File

@ -652,6 +652,38 @@ class AsyncClientUnitTest(AsyncUnitTest):
with self.assertWarns(UserWarning):
self.simple_client(multi_host)
async def test_max_adaptive_retries(self):
# Assert that max adaptive retries defaults to 2.
c = self.simple_client(connect=False)
self.assertEqual(c.options.max_adaptive_retries, 2)
# Assert that max adaptive retries can be configured through connection or client options.
c = self.simple_client(connect=False, max_adaptive_retries=10)
self.assertEqual(c.options.max_adaptive_retries, 10)
c = self.simple_client(connect=False, maxAdaptiveRetries=10)
self.assertEqual(c.options.max_adaptive_retries, 10)
c = self.simple_client(host="mongodb://localhost/?maxAdaptiveRetries=10", connect=False)
self.assertEqual(c.options.max_adaptive_retries, 10)
async def test_enable_overload_retargeting(self):
# Assert that overload retargeting defaults to false.
c = self.simple_client(connect=False)
self.assertFalse(c.options.enable_overload_retargeting)
# Assert that overload retargeting can be enabled through connection or client options.
c = self.simple_client(connect=False, enable_overload_retargeting=True)
self.assertTrue(c.options.enable_overload_retargeting)
c = self.simple_client(connect=False, enableOverloadRetargeting=True)
self.assertTrue(c.options.enable_overload_retargeting)
c = self.simple_client(
host="mongodb://localhost/?enableOverloadRetargeting=true", connect=False
)
self.assertTrue(c.options.enable_overload_retargeting)
class TestClient(AsyncIntegrationTest):
def test_multiple_uris(self):
@ -1034,7 +1066,7 @@ class TestClient(AsyncIntegrationTest):
db_names = await self.client.list_database_names()
self.assertIn("pymongo_test", db_names)
self.assertIn("pymongo_test_mike", db_names)
self.assertEqual(db_names, cmd_names)
self.assertCountEqual(db_names, cmd_names)
async def test_drop_database(self):
with self.assertRaises(TypeError):

View File

@ -0,0 +1,312 @@
# Copyright 2025-present MongoDB, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Test Client Backpressure spec."""
from __future__ import annotations
import os
import pathlib
import sys
from time import perf_counter
from unittest.mock import patch
from pymongo.common import MAX_ADAPTIVE_RETRIES
sys.path[0:0] = [""]
from test.asynchronous import (
AsyncIntegrationTest,
async_client_context,
unittest,
)
from test.asynchronous.unified_format import generate_test_classes
from test.utils_shared import EventListener, OvertCommandListener
from pymongo.errors import OperationFailure, PyMongoError
_IS_SYNC = False
# Mock a system overload error.
mock_overload_error = {
"configureFailPoint": "failCommand",
"mode": {"times": 1},
"data": {
"failCommands": ["find", "insert", "update"],
"errorCode": 462, # IngressRequestRateLimitExceeded
"errorLabels": ["RetryableError", "SystemOverloadedError"],
},
}
def get_mock_overload_error(times: int):
error = mock_overload_error.copy()
error["mode"] = {"times": times}
return error
class TestBackpressure(AsyncIntegrationTest):
RUN_ON_LOAD_BALANCER = True
@async_client_context.require_failCommand_appName
async def test_retry_overload_error_command(self):
await self.db.t.insert_one({"x": 1})
# Ensure command is retried on overload error.
fail_many = get_mock_overload_error(MAX_ADAPTIVE_RETRIES)
async with self.fail_point(fail_many):
await self.db.command("find", "t")
# Ensure command stops retrying after MAX_ADAPTIVE_RETRIES.
fail_too_many = get_mock_overload_error(MAX_ADAPTIVE_RETRIES + 1)
async with self.fail_point(fail_too_many):
with self.assertRaises(PyMongoError) as error:
await self.db.command("find", "t")
self.assertIn("RetryableError", str(error.exception))
self.assertIn("SystemOverloadedError", str(error.exception))
@async_client_context.require_failCommand_appName
async def test_retry_overload_error_find(self):
await self.db.t.insert_one({"x": 1})
# Ensure command is retried on overload error.
fail_many = get_mock_overload_error(MAX_ADAPTIVE_RETRIES)
async with self.fail_point(fail_many):
await self.db.t.find_one()
# Ensure command stops retrying after MAX_ADAPTIVE_RETRIES.
fail_too_many = get_mock_overload_error(MAX_ADAPTIVE_RETRIES + 1)
async with self.fail_point(fail_too_many):
with self.assertRaises(PyMongoError) as error:
await self.db.t.find_one()
self.assertIn("RetryableError", str(error.exception))
self.assertIn("SystemOverloadedError", str(error.exception))
@async_client_context.require_failCommand_appName
async def test_retry_overload_error_insert_one(self):
# Ensure command is retried on overload error.
fail_many = get_mock_overload_error(MAX_ADAPTIVE_RETRIES)
async with self.fail_point(fail_many):
await self.db.t.insert_one({"x": 1})
# Ensure command stops retrying after MAX_ADAPTIVE_RETRIES.
fail_too_many = get_mock_overload_error(MAX_ADAPTIVE_RETRIES + 1)
async with self.fail_point(fail_too_many):
with self.assertRaises(PyMongoError) as error:
await self.db.t.insert_one({"x": 1})
self.assertIn("RetryableError", str(error.exception))
self.assertIn("SystemOverloadedError", str(error.exception))
@async_client_context.require_failCommand_appName
async def test_retry_overload_error_update_many(self):
# Even though update_many is not a retryable write operation, it will
# still be retried via the "RetryableError" error label.
await self.db.t.insert_one({"x": 1})
# Ensure command is retried on overload error.
fail_many = get_mock_overload_error(MAX_ADAPTIVE_RETRIES)
async with self.fail_point(fail_many):
await self.db.t.update_many({}, {"$set": {"x": 2}})
# Ensure command stops retrying after MAX_ADAPTIVE_RETRIES.
fail_too_many = get_mock_overload_error(MAX_ADAPTIVE_RETRIES + 1)
async with self.fail_point(fail_too_many):
with self.assertRaises(PyMongoError) as error:
await self.db.t.update_many({}, {"$set": {"x": 2}})
self.assertIn("RetryableError", str(error.exception))
self.assertIn("SystemOverloadedError", str(error.exception))
@async_client_context.require_failCommand_appName
async def test_retry_overload_error_getMore(self):
coll = self.db.t
await coll.insert_many([{"x": 1} for _ in range(10)])
# Ensure command is retried on overload error.
fail_many = {
"configureFailPoint": "failCommand",
"mode": {"times": MAX_ADAPTIVE_RETRIES},
"data": {
"failCommands": ["getMore"],
"errorCode": 462, # IngressRequestRateLimitExceeded
"errorLabels": ["RetryableError", "SystemOverloadedError"],
},
}
cursor = coll.find(batch_size=2)
await cursor.next()
async with self.fail_point(fail_many):
await cursor.to_list()
# Ensure command stops retrying after MAX_ADAPTIVE_RETRIES.
fail_too_many = fail_many.copy()
fail_too_many["mode"] = {"times": MAX_ADAPTIVE_RETRIES + 1}
cursor = coll.find(batch_size=2)
await cursor.next()
async with self.fail_point(fail_too_many):
with self.assertRaises(PyMongoError) as error:
await cursor.to_list()
self.assertIn("RetryableError", str(error.exception))
self.assertIn("SystemOverloadedError", str(error.exception))
# Prose tests.
class AsyncTestClientBackpressure(AsyncIntegrationTest):
listener: EventListener
@classmethod
def setUpClass(cls) -> None:
cls.listener = OvertCommandListener()
@async_client_context.require_connection
async def asyncSetUp(self) -> None:
await super().asyncSetUp()
self.listener.reset()
self.app_name = self.__class__.__name__.lower()
self.client = await self.async_rs_or_single_client(
event_listeners=[self.listener], appName=self.app_name
)
@patch("random.random")
@async_client_context.require_failCommand_appName
async def test_01_operation_retry_uses_exponential_backoff(self, random_func):
# Drivers should test that retries do not occur immediately when a SystemOverloadedError is encountered.
# 1. let `client` be a `MongoClient`
client = self.client
# 2. let `collection` be a collection
collection = client.test.test
# 3. Now, run transactions without backoff:
# a. Configure the random number generator used for jitter to always return `0` -- this effectively disables backoff.
random_func.return_value = 0
# b. Configure the following failPoint:
fail_point = dict(
mode="alwaysOn",
data=dict(
failCommands=["insert"],
errorCode=2,
errorLabels=["SystemOverloadedError", "RetryableError"],
appName=self.app_name,
),
)
async with self.fail_point(fail_point):
# c. Execute the following command. Expect that the command errors. Measure the duration of the command execution.
start0 = perf_counter()
with self.assertRaises(OperationFailure):
await collection.insert_one({"a": 1})
end0 = perf_counter()
# d. Configure the random number generator used for jitter to always return `1`.
random_func.return_value = 1
# e. Execute step c again.
start1 = perf_counter()
with self.assertRaises(OperationFailure):
await collection.insert_one({"a": 1})
end1 = perf_counter()
# f. Compare the times between the two runs.
# The sum of 2 backoffs is 0.3 seconds. There is a 0.3-second window to account for potential variance between the two
# runs.
self.assertTrue(abs((end1 - start1) - (end0 - start0 + 0.3)) < 0.3)
@async_client_context.require_failCommand_appName
async def test_03_overload_retries_limited(self):
# Drivers should test that overload errors are retried a maximum of two times.
# 1. Let `client` be a `MongoClient`.
client = self.client
# 2. Let `coll` be a collection.
coll = client.pymongo_test.coll
# 3. Configure the following failpoint:
failpoint = {
"configureFailPoint": "failCommand",
"mode": "alwaysOn",
"data": {
"failCommands": ["find"],
"errorCode": 462, # IngressRequestRateLimitExceeded
"errorLabels": ["RetryableError", "SystemOverloadedError"],
},
}
# 4. Perform a find operation with `coll` that fails.
async with self.fail_point(failpoint):
with self.assertRaises(PyMongoError) as error:
await coll.find_one({})
# 5. Assert that the raised error contains both the `RetryableError` and `SystemOverloadedError` error labels.
self.assertIn("RetryableError", str(error.exception))
self.assertIn("SystemOverloadedError", str(error.exception))
# 6. Assert that the total number of started commands is MAX_ADAPTIVE_RETRIES + 1.
self.assertEqual(len(self.listener.started_events), MAX_ADAPTIVE_RETRIES + 1)
@async_client_context.require_failCommand_appName
async def test_04_overload_retries_limited_configured(self):
# Drivers should test that overload errors are retried a maximum of maxAdaptiveRetries times.
max_retries = 1
# 1. Let `client` be a `MongoClient` with `maxAdaptiveRetries=1` and command event monitoring enabled.
client = await self.async_single_client(
maxAdaptiveRetries=max_retries, event_listeners=[self.listener]
)
# 2. Let `coll` be a collection.
coll = client.pymongo_test.coll
# 3. Configure the following failpoint:
failpoint = {
"configureFailPoint": "failCommand",
"mode": "alwaysOn",
"data": {
"failCommands": ["find"],
"errorCode": 462, # IngressRequestRateLimitExceeded
"errorLabels": ["RetryableError", "SystemOverloadedError"],
},
}
# 4. Perform a find operation with `coll` that fails.
async with self.fail_point(failpoint):
with self.assertRaises(PyMongoError) as error:
await coll.find_one({})
# 5. Assert that the raised error contains both the `RetryableError` and `SystemOverloadedError` error labels.
self.assertIn("RetryableError", str(error.exception))
self.assertIn("SystemOverloadedError", str(error.exception))
# 6. Assert that the total number of started commands is max_retries + 1.
self.assertEqual(len(self.listener.started_events), max_retries + 1)
# Location of JSON test specifications.
if _IS_SYNC:
_TEST_PATH = os.path.join(pathlib.Path(__file__).resolve().parent, "client-backpressure")
else:
_TEST_PATH = os.path.join(pathlib.Path(__file__).resolve().parent.parent, "client-backpressure")
globals().update(
generate_test_classes(
_TEST_PATH,
module=__name__,
)
)
if __name__ == "__main__":
unittest.main()

View File

@ -219,6 +219,19 @@ class TestClientMetadataProse(AsyncIntegrationTest):
# add same metadata again
await self.check_metadata_added(client, "Framework", None, None)
async def test_handshake_documents_include_backpressure(self):
# Create a `MongoClient` that is configured to record all handshake documents sent to the server as a part of
# connection establishment.
client = await self.async_rs_or_single_client("mongodb://" + self.server.address_string)
# Send a `ping` command to the server and verify that the command succeeds. This ensure that a connection is
# established on all topologies. Note: MockupDB only supports standalone servers.
await client.admin.command("ping")
# Assert that for every handshake document intercepted:
# the document has a field `backpressure` whose value is `true`.
self.assertEqual(self.handshake_req["backpressure"], True)
if __name__ == "__main__":
unittest.main()

View File

@ -257,7 +257,6 @@ class TestCollation(AsyncIntegrationTest):
self.assertEqual(
ja_collation.document["locale"], indexes["japanese_version"]["collation"]["locale"]
)
self.assertNotIn("collation", indexes["simple"])
await self.db.test.drop_index("fieldname_1")
indexes = await self.db.test.index_information()
self.assertIn("japanese_version", indexes)

View File

@ -30,7 +30,12 @@ import pymongo
sys.path[0:0] = [""]
from test.asynchronous import AsyncIntegrationTest, async_client_context, unittest
from test.asynchronous import (
AsyncIntegrationTest,
async_client_context,
skip_if_rust_bson,
unittest,
)
from test.asynchronous.utils import flaky
from test.utils_shared import (
AllowListEventListener,
@ -1507,6 +1512,7 @@ class TestCursor(AsyncIntegrationTest):
self.assertTrue(ctx.exception.timeout)
@skip_if_rust_bson
class TestRawBatchCursor(AsyncIntegrationTest):
async def test_find_raw(self):
c = self.db.test
@ -1682,6 +1688,7 @@ class TestRawBatchCursor(AsyncIntegrationTest):
await cursor.close()
@skip_if_rust_bson
class TestRawBatchCommandCursor(AsyncIntegrationTest):
async def test_aggregate_raw(self):
c = self.db.test

View File

@ -28,7 +28,12 @@ from gridfs.asynchronous.grid_file import AsyncGridIn, AsyncGridOut
sys.path[0:0] = [""]
from test.asynchronous import AsyncIntegrationTest, async_client_context, unittest
from test.asynchronous import (
AsyncIntegrationTest,
async_client_context,
skip_if_rust_bson,
unittest,
)
from bson import (
_BUILT_IN_TYPES,
@ -196,12 +201,14 @@ class CustomBSONTypeTests:
fileobj.close()
@skip_if_rust_bson
class TestCustomPythonBSONTypeToBSONMonolithicCodec(CustomBSONTypeTests, unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.codecopts = DECIMAL_CODECOPTS
@skip_if_rust_bson
class TestCustomPythonBSONTypeToBSONMultiplexedCodec(CustomBSONTypeTests, unittest.TestCase):
@classmethod
def setUpClass(cls):
@ -211,6 +218,7 @@ class TestCustomPythonBSONTypeToBSONMultiplexedCodec(CustomBSONTypeTests, unitte
cls.codecopts = codec_options
@skip_if_rust_bson
class TestBSONFallbackEncoder(unittest.TestCase):
def _get_codec_options(self, fallback_encoder):
type_registry = TypeRegistry(fallback_encoder=fallback_encoder)
@ -273,6 +281,7 @@ class TestBSONFallbackEncoder(unittest.TestCase):
self.assertEqual(called_with, [2 << 65])
@skip_if_rust_bson
class TestBSONTypeEnDeCodecs(unittest.TestCase):
def test_instantiation(self):
msg = "Can't instantiate abstract class"
@ -336,6 +345,7 @@ class TestBSONTypeEnDeCodecs(unittest.TestCase):
self.assertFalse(issubclass(TypeEncoder, TypeDecoder))
@skip_if_rust_bson
class TestBSONCustomTypeEncoderAndFallbackEncoderTandem(unittest.TestCase):
TypeA: Any
TypeB: Any
@ -432,6 +442,7 @@ class TestBSONCustomTypeEncoderAndFallbackEncoderTandem(unittest.TestCase):
encode({"x": self.TypeA(100)}, codec_options=codecopts)
@skip_if_rust_bson
class TestTypeRegistry(unittest.TestCase):
types: Tuple[object, object]
codecs: Tuple[Type[TypeCodec], Type[TypeCodec]]
@ -622,6 +633,7 @@ class TestTypeRegistry(unittest.TestCase):
run_test(TypeCodec, {"bson_type": Decimal128, "transform_bson": lambda x: x})
@skip_if_rust_bson
class TestCollectionWCustomType(AsyncIntegrationTest):
async def asyncSetUp(self):
await super().asyncSetUp()
@ -744,6 +756,7 @@ class TestCollectionWCustomType(AsyncIntegrationTest):
self.assertIsNone(await c.find_one())
@skip_if_rust_bson
class TestGridFileCustomType(AsyncIntegrationTest):
async def asyncSetUp(self):
await super().asyncSetUp()
@ -910,6 +923,7 @@ class ChangeStreamsWCustomTypesTestMixin:
await run_test(doc_cls)
@skip_if_rust_bson
class TestCollectionChangeStreamsWCustomTypes(
AsyncIntegrationTest, ChangeStreamsWCustomTypesTestMixin
):
@ -929,6 +943,7 @@ class TestCollectionChangeStreamsWCustomTypes(
await self.input_target.delete_many({})
@skip_if_rust_bson
class TestDatabaseChangeStreamsWCustomTypes(
AsyncIntegrationTest, ChangeStreamsWCustomTypesTestMixin
):
@ -949,6 +964,7 @@ class TestDatabaseChangeStreamsWCustomTypes(
await self.input_target.insert_one({"data": "dummy"})
@skip_if_rust_bson
class TestClusterChangeStreamsWCustomTypes(
AsyncIntegrationTest, ChangeStreamsWCustomTypesTestMixin
):

View File

@ -25,8 +25,10 @@ from asyncio import StreamReader, StreamWriter
from pathlib import Path
from test.asynchronous.helpers import ConcurrentRunner
from test.asynchronous.utils import flaky
from test.utils_shared import delay
from pymongo.asynchronous.pool import AsyncConnection
from pymongo.errors import ConnectionFailure
from pymongo.operations import _Op
from pymongo.server_selectors import writable_server_selector
@ -70,7 +72,12 @@ from pymongo.errors import (
)
from pymongo.hello import Hello, HelloCompat
from pymongo.helpers_shared import _check_command_response, _check_write_command_response
from pymongo.monitoring import ServerHeartbeatFailedEvent, ServerHeartbeatStartedEvent
from pymongo.monitoring import (
ConnectionCheckOutFailedEvent,
PoolClearedEvent,
ServerHeartbeatFailedEvent,
ServerHeartbeatStartedEvent,
)
from pymongo.server_description import SERVER_TYPE, ServerDescription
from pymongo.topology_description import TOPOLOGY_TYPE
@ -131,6 +138,9 @@ async def got_app_error(topology, app_error):
raise AssertionError
except (AutoReconnect, NotPrimaryError, OperationFailure) as e:
if when == "beforeHandshakeCompletes":
# The pool would have added the SystemOverloadedError in this case.
if isinstance(e, AutoReconnect):
e._add_error_label("SystemOverloadedError")
completed_handshake = False
elif when == "afterHandshakeCompletes":
completed_handshake = True
@ -439,6 +449,59 @@ class TestPoolManagement(AsyncIntegrationTest):
AsyncConnection.close_conn = original_close
class TestPoolBackpressure(AsyncIntegrationTest):
@async_client_context.require_version_min(7, 0, 0)
async def test_connection_pool_is_not_cleared(self):
listener = CMAPListener()
# Create a client that listens to CMAP events, with maxConnecting=100.
client = await self.async_rs_or_single_client(maxConnecting=100, event_listeners=[listener])
# Enable the ingress rate limiter.
await client.admin.command(
"setParameter", 1, ingressConnectionEstablishmentRateLimiterEnabled=True
)
await client.admin.command("setParameter", 1, ingressConnectionEstablishmentRatePerSec=20)
await client.admin.command(
"setParameter", 1, ingressConnectionEstablishmentBurstCapacitySecs=1
)
await client.admin.command("setParameter", 1, ingressConnectionEstablishmentMaxQueueDepth=1)
# Disable the ingress rate limiter on teardown.
# Sleep for 1 second before disabling to avoid the rate limiter.
async def teardown():
await asyncio.sleep(1)
await client.admin.command(
"setParameter", 1, ingressConnectionEstablishmentRateLimiterEnabled=False
)
self.addAsyncCleanup(teardown)
# Make sure the collection has at least one document.
await client.test.test.delete_many({})
await client.test.test.insert_one({})
# Run a slow operation to tie up the connection.
async def target():
try:
await client.test.test.find_one({"$where": delay(0.1)})
except ConnectionFailure:
pass
# Run 100 parallel operations that contend for connections.
tasks = []
for _ in range(100):
tasks.append(ConcurrentRunner(target=target))
for t in tasks:
await t.start()
for t in tasks:
await t.join()
# Verify there were at least 10 connection checkout failed event but no pool cleared events.
self.assertGreater(len(listener.events_by_type(ConnectionCheckOutFailedEvent)), 10)
self.assertEqual(len(listener.events_by_type(PoolClearedEvent)), 0)
class TestServerMonitoringMode(AsyncIntegrationTest):
@async_client_context.require_no_load_balancer
async def asyncSetUp(self):

View File

@ -876,6 +876,8 @@ class TestViews(AsyncEncryptionIntegrationTest):
class TestCorpus(AsyncEncryptionIntegrationTest):
# PYTHON-5708: Encryption tests sending large payloads fail on some mongocryptd versions.
@async_client_context.require_version_max(6, 99)
@unittest.skipUnless(any(AWS_CREDS.values()), "AWS environment credentials are not set")
async def asyncSetUp(self):
await super().asyncSetUp()
@ -1052,6 +1054,8 @@ class TestBsonSizeBatches(AsyncEncryptionIntegrationTest):
client_encrypted: AsyncMongoClient
listener: OvertCommandListener
# PYTHON-5708: Encryption tests sending large payloads fail on some mongocryptd versions.
@async_client_context.require_version_max(6, 99)
async def asyncSetUp(self):
await super().asyncSetUp()
db = async_client_context.client.db
@ -3322,6 +3326,7 @@ class TestAutomaticDecryptionKeys(AsyncEncryptionIntegrationTest):
class TestExplicitTextEncryptionProse(AsyncEncryptionIntegrationTest):
@async_client_context.require_no_standalone
@async_client_context.require_version_min(8, 2, -1)
@async_client_context.require_version_max(8, 99, 99)
@async_client_context.require_libmongocrypt_min(1, 15, 1)
@async_client_context.require_pymongocrypt_min(1, 16, 0)
async def asyncSetUp(self):

View File

@ -513,6 +513,39 @@ class TestPooling(_TestPoolingBase):
str(error.exception),
)
@async_client_context.require_failCommand_appName
async def test_pool_backpressure_preserves_existing_connections(self):
client = await self.async_rs_or_single_client()
coll = client.pymongo_test.t
pool = await async_get_pool(client)
await coll.insert_many([{"x": 1} for _ in range(10)])
t = SocketGetter(self.c, pool)
await t.start()
while t.state != "connection":
await asyncio.sleep(0.1)
assert not t.sock.conn_closed()
# Mock a session establishment overload.
mock_connection_fail = {
"configureFailPoint": "failCommand",
"mode": {"times": 1},
"data": {
"closeConnection": True,
},
}
async with self.fail_point(mock_connection_fail):
await coll.find_one({})
# Make sure the existing socket was not affected.
assert not t.sock.conn_closed()
# Cleanup
await t.release_conn()
await t.join()
await pool.close()
class TestPoolMaxSize(_TestPoolingBase):
async def test_max_pool_size(self):

View File

@ -19,7 +19,12 @@ import uuid
sys.path[0:0] = [""]
from test.asynchronous import AsyncIntegrationTest, async_client_context, unittest
from test.asynchronous import (
AsyncIntegrationTest,
async_client_context,
skip_if_rust_bson,
unittest,
)
from bson import Code, DBRef, decode, encode
from bson.binary import JAVA_LEGACY, Binary, UuidRepresentation
@ -31,6 +36,7 @@ from bson.son import SON
_IS_SYNC = False
@skip_if_rust_bson
class TestRawBSONDocument(AsyncIntegrationTest):
# {'_id': ObjectId('556df68b6e32ab21a95e0785'),
# 'name': 'Sherlock',

View File

@ -19,9 +19,12 @@ import os
import pprint
import sys
import threading
from test.asynchronous.utils import async_set_fail_point
from test.asynchronous.utils import async_ensure_all_connected, async_set_fail_point
from unittest import mock
from pymongo.errors import OperationFailure
from pymongo import MongoClient
from pymongo.common import MAX_ADAPTIVE_RETRIES
from pymongo.errors import OperationFailure, PyMongoError
sys.path[0:0] = [""]
@ -38,6 +41,7 @@ from test.utils_shared import (
)
from pymongo.monitoring import (
CommandFailedEvent,
ConnectionCheckedOutEvent,
ConnectionCheckOutFailedEvent,
ConnectionCheckOutFailedReason,
@ -145,6 +149,19 @@ class TestPoolPausedError(AsyncIntegrationTest):
class TestRetryableReads(AsyncIntegrationTest):
async def asyncSetUp(self) -> None:
await super().asyncSetUp()
self.setup_client = MongoClient(**async_client_context.client_options)
self.addCleanup(self.setup_client.close)
# TODO: After PYTHON-4595 we can use async event handlers and remove this workaround.
def configure_fail_point_sync(self, command_args, off=False) -> None:
cmd = {"configureFailPoint": "failCommand", **command_args}
if off:
cmd["mode"] = "off"
cmd.pop("data", None)
self.setup_client.admin.command(cmd)
@async_client_context.require_multiple_mongoses
@async_client_context.require_failCommand_fail_point
async def test_retryable_reads_are_retried_on_a_different_mongos_when_one_is_available(self):
@ -261,6 +278,248 @@ class TestRetryableReads(AsyncIntegrationTest):
self.assertEqual(command_docs[0]["lsid"], command_docs[1]["lsid"])
self.assertIsNot(command_docs[0], command_docs[1])
@async_client_context.require_replica_set
@async_client_context.require_secondaries_count(1)
@async_client_context.require_failCommand_fail_point
@async_client_context.require_version_min(4, 4, 0)
async def test_03_01_retryable_reads_caused_by_overload_errors_are_retried_on_a_different_replicaset_server_when_one_is_available_and_overload_retargeting_is_enabled(
self
):
listener = OvertCommandListener()
# 1. Create a client `client` with `retryReads=true`, `readPreference=primaryPreferred`, `enableOverloadRetargeting=True`, and command event monitoring enabled.
client = await self.async_rs_or_single_client(
event_listeners=[listener],
retryReads=True,
readPreference="primaryPreferred",
enableOverloadRetargeting=True,
)
# Ensure the client has discovered all nodes.
await async_ensure_all_connected(client)
# 2. Configure a fail point with the RetryableError and SystemOverloadedError error labels.
command_args = {
"configureFailPoint": "failCommand",
"mode": {"times": 1},
"data": {
"failCommands": ["find"],
"errorLabels": ["RetryableError", "SystemOverloadedError"],
"errorCode": 6,
},
}
await async_set_fail_point(client, command_args)
# 3. Reset the command event monitor to clear the fail point command from its stored events.
listener.reset()
# 4. Execute a `find` command with `client`.
await client.t.t.find_one({})
# 5. Assert that one failed command event and one successful command event occurred.
self.assertEqual(len(listener.failed_events), 1)
self.assertEqual(len(listener.succeeded_events), 1)
# 6. Assert that both events occurred on different servers.
assert listener.failed_events[0].connection_id != listener.succeeded_events[0].connection_id
@async_client_context.require_replica_set
@async_client_context.require_secondaries_count(1)
@async_client_context.require_failCommand_fail_point
@async_client_context.require_version_min(4, 4, 0)
async def test_03_02_retryable_reads_caused_by_non_overload_errors_are_retried_on_the_same_replicaset_server(
self
):
listener = OvertCommandListener()
# 1. Create a client `client` with `retryReads=true`, `readPreference=primaryPreferred`, and command event monitoring enabled.
client = await self.async_rs_or_single_client(
event_listeners=[listener], retryReads=True, readPreference="primaryPreferred"
)
# Ensure the client has discovered all nodes.
await async_ensure_all_connected(client)
# 2. Configure a fail point with the RetryableError error label.
command_args = {
"configureFailPoint": "failCommand",
"mode": {"times": 1},
"data": {
"failCommands": ["find"],
"errorLabels": ["RetryableError"],
"errorCode": 6,
},
}
await async_set_fail_point(client, command_args)
# 3. Reset the command event monitor to clear the fail point command from its stored events.
listener.reset()
# 4. Execute a `find` command with `client`.
await client.t.t.find_one({})
# 5. Assert that one failed command event and one successful command event occurred.
self.assertEqual(len(listener.failed_events), 1)
self.assertEqual(len(listener.succeeded_events), 1)
# 6. Assert that both events occurred the same server.
assert listener.failed_events[0].connection_id == listener.succeeded_events[0].connection_id
@async_client_context.require_replica_set
@async_client_context.require_secondaries_count(1)
@async_client_context.require_failCommand_fail_point
@async_client_context.require_version_min(4, 4, 0)
async def test_03_03_retryable_reads_caused_by_overload_errors_are_retried_on_the_same_replicaset_server_when_one_is_available_and_overload_retargeting_is_disabled(
self
):
listener = OvertCommandListener()
# 1. Create a client `client` with `retryReads=true`, `readPreference=primaryPreferred`, and command event monitoring enabled.
client = await self.async_rs_or_single_client(
event_listeners=[listener],
retryReads=True,
readPreference="primaryPreferred",
)
# Ensure the client has discovered all nodes.
await async_ensure_all_connected(client)
# 2. Configure a fail point with the RetryableError and SystemOverloadedError error labels.
command_args = {
"configureFailPoint": "failCommand",
"mode": {"times": 1},
"data": {
"failCommands": ["find"],
"errorLabels": ["RetryableError", "SystemOverloadedError"],
"errorCode": 6,
},
}
await async_set_fail_point(client, command_args)
# 3. Reset the command event monitor to clear the fail point command from its stored events.
listener.reset()
# 4. Execute a `find` command with `client`.
await client.t.t.find_one({})
# 5. Assert that one failed command event and one successful command event occurred.
self.assertEqual(len(listener.failed_events), 1)
self.assertEqual(len(listener.succeeded_events), 1)
# 6. Assert that both events occurred on the same server.
assert listener.failed_events[0].connection_id == listener.succeeded_events[0].connection_id
@async_client_context.require_failCommand_fail_point
@async_client_context.require_version_min(4, 4, 0) # type:ignore[untyped-decorator]
async def test_overload_then_nonoverload_retries_increased_reads(self) -> None:
# Create a client.
listener = OvertCommandListener()
# Configure the client to listen to CommandFailedEvents. In the attached listener, configure a fail point with error
# code `91` (ShutdownInProgress) and `RetryableError` and `SystemOverloadedError` labels.
overload_fail_point = {
"configureFailPoint": "failCommand",
"mode": {"times": 1},
"data": {
"failCommands": ["find"],
"errorLabels": ["RetryableError", "SystemOverloadedError"],
"errorCode": 91,
},
}
# Configure a fail point with error code `91` (ShutdownInProgress) with only the `RetryableError` error label.
non_overload_fail_point = {
"configureFailPoint": "failCommand",
"mode": "alwaysOn",
"data": {
"failCommands": ["find"],
"errorCode": 91,
"errorLabels": ["RetryableError"],
},
}
def failed(event: CommandFailedEvent) -> None:
# Configure the fail point command only if the failed event is for the 91 error configured in step 2.
if listener.failed_events:
return
assert event.failure["code"] == 91
self.configure_fail_point_sync(non_overload_fail_point)
self.addCleanup(self.configure_fail_point_sync, {}, off=True)
listener.failed_events.append(event)
listener.failed = failed
client = await self.async_rs_client(event_listeners=[listener])
await client.test.test.insert_one({})
self.configure_fail_point_sync(overload_fail_point)
self.addCleanup(self.configure_fail_point_sync, {}, off=True)
with self.assertRaises(PyMongoError):
await client.test.test.find_one()
started_finds = [e for e in listener.started_events if e.command_name == "find"]
self.assertEqual(len(started_finds), MAX_ADAPTIVE_RETRIES + 1)
@async_client_context.require_failCommand_fail_point
@async_client_context.require_version_min(4, 4, 0) # type:ignore[untyped-decorator]
async def test_backoff_is_not_applied_for_non_overload_errors(self):
if _IS_SYNC:
mock_target = "pymongo.synchronous.helpers._RetryPolicy.backoff"
else:
mock_target = "pymongo.asynchronous.helpers._RetryPolicy.backoff"
# Create a client.
listener = OvertCommandListener()
# Configure the client to listen to CommandFailedEvents. In the attached listener, configure a fail point with error
# code `91` (ShutdownInProgress) and `RetryableError` and `SystemOverloadedError` labels.
overload_fail_point = {
"configureFailPoint": "failCommand",
"mode": {"times": 1},
"data": {
"failCommands": ["find"],
"errorLabels": ["RetryableError", "SystemOverloadedError"],
"errorCode": 91,
},
}
# Configure a fail point with error code `91` (ShutdownInProgress) with only the `RetryableError` error label.
non_overload_fail_point = {
"configureFailPoint": "failCommand",
"mode": "alwaysOn",
"data": {
"failCommands": ["find"],
"errorCode": 91,
"errorLabels": ["RetryableError"],
},
}
def failed(event: CommandFailedEvent) -> None:
# Configure the fail point command only if the failed event is for the 91 error configured in step 2.
if listener.failed_events:
return
assert event.failure["code"] == 91
self.configure_fail_point_sync(non_overload_fail_point)
self.addCleanup(self.configure_fail_point_sync, {}, off=True)
listener.failed_events.append(event)
listener.failed = failed
client = await self.async_rs_client(event_listeners=[listener])
await client.test.test.insert_one({})
self.configure_fail_point_sync(overload_fail_point)
self.addCleanup(self.configure_fail_point_sync, {}, off=True)
# Perform a findOne operation with coll. Expect the operation to fail.
with mock.patch(mock_target, return_value=0) as mock_backoff:
with self.assertRaises(PyMongoError):
await client.test.test.find_one()
# Assert that backoff was applied only once for the initial overload error and not for the subsequent non-overload retryable errors.
self.assertEqual(mock_backoff.call_count, 1)
if __name__ == "__main__":
unittest.main()

View File

@ -21,6 +21,9 @@ import pprint
import sys
import threading
from test.asynchronous.utils import async_set_fail_point, flaky
from unittest import mock
from pymongo.common import MAX_ADAPTIVE_RETRIES
sys.path[0:0] = [""]
@ -43,14 +46,17 @@ from bson.codec_options import DEFAULT_CODEC_OPTIONS
from bson.int64 import Int64
from bson.raw_bson import RawBSONDocument
from bson.son import SON
from pymongo import MongoClient
from pymongo.errors import (
AutoReconnect,
ConnectionFailure,
OperationFailure,
NotPrimaryError,
PyMongoError,
ServerSelectionTimeoutError,
WriteConcernError,
)
from pymongo.monitoring import (
CommandFailedEvent,
CommandSucceededEvent,
ConnectionCheckedOutEvent,
ConnectionCheckOutFailedEvent,
@ -601,5 +607,291 @@ class TestRetryableWritesTxnNumber(IgnoreDeprecationsTest):
self.assertEqual(sent_txn_id, final_txn_id, msg)
class TestErrorPropagationAfterEncounteringMultipleErrors(AsyncIntegrationTest):
# Only run against replica sets as mongos does not propagate the NoWritesPerformed label to the drivers.
@async_client_context.require_replica_set
# Run against server versions 6.0 and above.
@async_client_context.require_version_min(6, 0) # type: ignore[untyped-decorator]
async def asyncSetUp(self) -> None:
await super().asyncSetUp()
self.setup_client = MongoClient(**async_client_context.default_client_options)
self.addCleanup(self.setup_client.close)
# TODO: After PYTHON-4595 we can use async event handlers and remove this workaround.
def configure_fail_point_sync(self, command_args, off=False) -> None:
cmd = {"configureFailPoint": "failCommand"}
cmd.update(command_args)
if off:
cmd["mode"] = "off"
cmd.pop("data", None)
self.setup_client.admin.command(cmd)
async def test_01_drivers_return_the_correct_error_when_receiving_only_errors_without_NoWritesPerformed(
self
) -> None:
# Create a client with retryWrites=true.
listener = OvertCommandListener()
# Configure a fail point with error code 91 (ShutdownInProgress) with the RetryableError and SystemOverloadedError error labels.
command_args = {
"configureFailPoint": "failCommand",
"mode": {"times": 1},
"data": {
"failCommands": ["insert"],
"errorLabels": ["RetryableError", "SystemOverloadedError"],
"errorCode": 91,
},
}
# Via the command monitoring CommandFailedEvent, configure a fail point with error code 10107 (NotWritablePrimary).
command_args_inner = {
"configureFailPoint": "failCommand",
"mode": "alwaysOn",
"data": {
"failCommands": ["insert"],
"errorCode": 10107,
"errorLabels": ["RetryableError", "SystemOverloadedError"],
},
}
def failed(event: CommandFailedEvent) -> None:
# Configure the 10107 fail point command only if the the failed event is for the 91 error configured in step 2.
if listener.failed_events:
return
assert event.failure["code"] == 91
self.configure_fail_point_sync(command_args_inner)
self.addCleanup(self.configure_fail_point_sync, {}, off=True)
listener.failed_events.append(event)
listener.failed = failed
client = await self.async_rs_client(retryWrites=True, event_listeners=[listener])
self.configure_fail_point_sync(command_args)
self.addCleanup(self.configure_fail_point_sync, {}, off=True)
# Attempt an insertOne operation on any record for any database and collection.
# Expect the insertOne to fail with a server error.
with self.assertRaises(NotPrimaryError) as exc:
await client.test.test.insert_one({})
# Assert that the error code of the server error is 10107.
assert exc.exception.errors["code"] == 10107 # type:ignore[call-overload]
async def test_02_drivers_return_the_correct_error_when_receiving_only_errors_with_NoWritesPerformed(
self
) -> None:
# Create a client with retryWrites=true.
listener = OvertCommandListener()
# Configure a fail point with error code 91 (ShutdownInProgress) with the RetryableError and SystemOverloadedError error labels.
command_args = {
"configureFailPoint": "failCommand",
"mode": {"times": 1},
"data": {
"failCommands": ["insert"],
"errorLabels": ["RetryableError", "SystemOverloadedError", "NoWritesPerformed"],
"errorCode": 91,
},
}
# Via the command monitoring CommandFailedEvent, configure a fail point with error code `10107` (NotWritablePrimary)
# and a NoWritesPerformed label.
command_args_inner = {
"configureFailPoint": "failCommand",
"mode": "alwaysOn",
"data": {
"failCommands": ["insert"],
"errorCode": 10107,
"errorLabels": ["RetryableError", "SystemOverloadedError", "NoWritesPerformed"],
},
}
def failed(event: CommandFailedEvent) -> None:
if listener.failed_events:
return
# Configure the 10107 fail point command only if the the failed event is for the 91 error configured in step 2.
assert event.failure["code"] == 91
self.configure_fail_point_sync(command_args_inner)
self.addCleanup(self.configure_fail_point_sync, {}, off=True)
listener.failed_events.append(event)
listener.failed = failed
client = await self.async_rs_client(retryWrites=True, event_listeners=[listener])
self.configure_fail_point_sync(command_args)
self.addCleanup(self.configure_fail_point_sync, {}, off=True)
# Attempt an insertOne operation on any record for any database and collection.
# Expect the insertOne to fail with a server error.
with self.assertRaises(NotPrimaryError) as exc:
await client.test.test.insert_one({})
# Assert that the error code of the server error is 91.
assert exc.exception.errors["code"] == 91 # type:ignore[call-overload]
async def test_03_drivers_return_the_correct_error_when_receiving_some_errors_with_NoWritesPerformed_and_some_without_NoWritesPerformed(
self
) -> None:
# Create a client with retryWrites=true.
listener = OvertCommandListener()
# Configure the client to listen to CommandFailedEvents. In the attached listener, configure a fail point with error
# code `91` (NotWritablePrimary) and the `NoWritesPerformed`, `RetryableError` and `SystemOverloadedError` labels.
command_args_inner = {
"configureFailPoint": "failCommand",
"mode": "alwaysOn",
"data": {
"failCommands": ["insert"],
"errorLabels": ["RetryableError", "SystemOverloadedError", "NoWritesPerformed"],
"errorCode": 91,
},
}
# Configure a fail point with error code `91` (ShutdownInProgress) with the `RetryableError` and
# `SystemOverloadedError` error labels but without the `NoWritesPerformed` error label.
command_args = {
"configureFailPoint": "failCommand",
"mode": {"times": 1},
"data": {
"failCommands": ["insert"],
"errorCode": 91,
"errorLabels": ["RetryableError", "SystemOverloadedError"],
},
}
def failed(event: CommandFailedEvent) -> None:
# Configure the fail point command only if the failed event is for the 91 error configured in step 2.
if listener.failed_events:
return
assert event.failure["code"] == 91
self.configure_fail_point_sync(command_args_inner)
self.addCleanup(self.configure_fail_point_sync, {}, off=True)
listener.failed_events.append(event)
listener.failed = failed
client = await self.async_rs_client(retryWrites=True, event_listeners=[listener])
self.configure_fail_point_sync(command_args)
self.addCleanup(self.configure_fail_point_sync, {}, off=True)
# Attempt an insertOne operation on any record for any database and collection.
# Expect the insertOne to fail with a server error.
with self.assertRaises(PyMongoError) as exc:
await client.test.test.insert_one({})
# Assert that the error code of the server error is 91.
assert exc.exception.errors["code"] == 91
# Assert that the error does not contain the error label `NoWritesPerformed`.
assert "NoWritesPerformed" not in exc.exception.errors["errorLabels"]
async def test_overload_then_nonoverload_retries_increased_writes(self) -> None:
# Create a client with retryWrites=true.
listener = OvertCommandListener()
# Configure the client to listen to CommandFailedEvents. In the attached listener, configure a fail point with error
# code `91` (ShutdownInProgress) and `RetryableError` and `SystemOverloadedError` labels.
overload_fail_point = {
"configureFailPoint": "failCommand",
"mode": {"times": 1},
"data": {
"failCommands": ["insert"],
"errorLabels": ["RetryableError", "SystemOverloadedError"],
"errorCode": 91,
},
}
# Configure a fail point with error code `91` (ShutdownInProgress) with the `RetryableError` and `RetryableWriteError` error labels.
non_overload_fail_point = {
"configureFailPoint": "failCommand",
"mode": "alwaysOn",
"data": {
"failCommands": ["insert"],
"errorCode": 91,
"errorLabels": ["RetryableError", "RetryableWriteError"],
},
}
def failed(event: CommandFailedEvent) -> None:
# Configure the fail point command only if the failed event is for the 91 error configured in step 2.
if listener.failed_events:
return
assert event.failure["code"] == 91
self.configure_fail_point_sync(non_overload_fail_point)
self.addCleanup(self.configure_fail_point_sync, {}, off=True)
listener.failed_events.append(event)
listener.failed = failed
client = await self.async_rs_client(retryWrites=True, event_listeners=[listener])
self.configure_fail_point_sync(overload_fail_point)
self.addCleanup(self.configure_fail_point_sync, {}, off=True)
with self.assertRaises(PyMongoError):
await client.test.test.insert_one({"x": 1})
started_inserts = [e for e in listener.started_events if e.command_name == "insert"]
self.assertEqual(len(started_inserts), MAX_ADAPTIVE_RETRIES + 1)
async def test_backoff_is_not_applied_for_non_overload_errors(self):
if _IS_SYNC:
mock_target = "pymongo.synchronous.helpers._RetryPolicy.backoff"
else:
mock_target = "pymongo.asynchronous.helpers._RetryPolicy.backoff"
# Create a client.
listener = OvertCommandListener()
# Configure the client to listen to CommandFailedEvents. In the attached listener, configure a fail point with error
# code `91` (ShutdownInProgress) and `RetryableError` and `SystemOverloadedError` labels.
overload_fail_point = {
"configureFailPoint": "failCommand",
"mode": {"times": 1},
"data": {
"failCommands": ["insert"],
"errorLabels": ["RetryableError", "SystemOverloadedError"],
"errorCode": 91,
},
}
# Configure a fail point with error code `91` (ShutdownInProgress) with only the `RetryableError` error label.
non_overload_fail_point = {
"configureFailPoint": "failCommand",
"mode": "alwaysOn",
"data": {
"failCommands": ["insert"],
"errorCode": 91,
"errorLabels": ["RetryableError", "RetryableWriteError"],
},
}
def failed(event: CommandFailedEvent) -> None:
# Configure the fail point command only if the failed event is for the 91 error configured in step 2.
if listener.failed_events:
return
assert event.failure["code"] == 91
self.configure_fail_point_sync(non_overload_fail_point)
self.addCleanup(self.configure_fail_point_sync, {}, off=True)
listener.failed_events.append(event)
listener.failed = failed
client = await self.async_rs_client(event_listeners=[listener])
self.configure_fail_point_sync(overload_fail_point)
self.addCleanup(self.configure_fail_point_sync, {}, off=True)
# Perform a findOne operation with coll. Expect the operation to fail.
with mock.patch(mock_target, return_value=0) as mock_backoff:
with self.assertRaises(PyMongoError):
await client.test.test.insert_one({})
# Assert that backoff was applied only once for the initial overload error and not for the subsequent non-overload retryable errors.
self.assertEqual(mock_backoff.call_count, 1)
if __name__ == "__main__":
unittest.main()

View File

@ -15,7 +15,6 @@
"""Test the client_session module."""
from __future__ import annotations
import asyncio
import copy
import sys
import time
@ -24,8 +23,6 @@ from io import BytesIO
from test.asynchronous.helpers import ExceptionCatchingTask
from typing import Any, Callable, List, Set, Tuple
from pymongo.synchronous.mongo_client import MongoClient
sys.path[0:0] = [""]
from test.asynchronous import (
@ -45,7 +42,7 @@ from test.utils_shared import (
from bson import DBRef
from gridfs.asynchronous.grid_file import AsyncGridFS, AsyncGridFSBucket
from pymongo import ASCENDING, AsyncMongoClient, _csot, monitoring
from pymongo import ASCENDING, AsyncMongoClient, monitoring
from pymongo.asynchronous.command_cursor import AsyncCommandCursor
from pymongo.asynchronous.cursor import AsyncCursor
from pymongo.asynchronous.helpers import anext
@ -189,6 +186,52 @@ class TestSession(AsyncIntegrationTest):
f"{f.__name__} did not return implicit session to pool",
)
# Explicit bound session
for f, args, kw in ops:
async with client.start_session() as s:
async with s.bind():
listener.reset()
s._materialize()
last_use = s._server_session.last_use
start = time.monotonic()
self.assertLessEqual(last_use, start)
# In case "f" modifies its inputs.
args = copy.copy(args)
kw = copy.copy(kw)
await f(*args, **kw)
self.assertGreaterEqual(len(listener.started_events), 1)
for event in listener.started_events:
self.assertIn(
"lsid",
event.command,
f"{f.__name__} sent no lsid with {event.command_name}",
)
self.assertEqual(
s.session_id,
event.command["lsid"],
f"{f.__name__} sent wrong lsid with {event.command_name}",
)
self.assertFalse(s.has_ended)
self.assertTrue(s.has_ended)
with self.assertRaisesRegex(InvalidOperation, "ended session"):
async with s.bind():
await f(*args, **kw)
# Test a session cannot be used on another client.
async with self.client2.start_session() as s:
async with s.bind():
# In case "f" modifies its inputs.
args = copy.copy(args)
kw = copy.copy(kw)
with self.assertRaisesRegex(
InvalidOperation,
"Only the client that created the bound session can perform operations within its context block",
):
await f(*args, **kw)
async def test_implicit_sessions_checkout(self):
# "To confirm that implicit sessions only allocate their server session after a
# successful connection checkout" test from Driver Sessions Spec.
@ -825,6 +868,106 @@ class TestSession(AsyncIntegrationTest):
async with client.start_session() as s:
self.assertRaises(TypeError, lambda: copy.copy(s))
async def test_nested_session_binding(self):
coll = self.client.pymongo_test.test
await coll.insert_one({"x": 1})
session1 = self.client.start_session()
session2 = self.client.start_session()
session1._materialize()
session2._materialize()
try:
self.listener.reset()
# Uses implicit session
await coll.find_one()
implicit_lsid = self.listener.started_events[0].command.get("lsid")
self.assertIsNotNone(implicit_lsid)
self.assertNotEqual(implicit_lsid, session1.session_id)
self.assertNotEqual(implicit_lsid, session2.session_id)
async with session1.bind(end_session=False):
self.listener.reset()
# Uses bound session1
await coll.find_one()
session1_lsid = self.listener.started_events[0].command.get("lsid")
self.assertEqual(session1_lsid, session1.session_id)
async with session2.bind(end_session=False):
self.listener.reset()
# Uses bound session2
await coll.find_one()
session2_lsid = self.listener.started_events[0].command.get("lsid")
self.assertEqual(session2_lsid, session2.session_id)
self.assertNotEqual(session2_lsid, session1.session_id)
self.listener.reset()
# Use bound session1 again
await coll.find_one()
session1_lsid = self.listener.started_events[0].command.get("lsid")
self.assertEqual(session1_lsid, session1.session_id)
self.assertNotEqual(session1_lsid, session2.session_id)
self.listener.reset()
# Uses implicit session
await coll.find_one()
implicit_lsid = self.listener.started_events[0].command.get("lsid")
self.assertIsNotNone(implicit_lsid)
self.assertNotEqual(implicit_lsid, session1.session_id)
self.assertNotEqual(implicit_lsid, session2.session_id)
finally:
await session1.end_session()
await session2.end_session()
async def test_session_binding_end_session(self):
coll = self.client.pymongo_test.test
await coll.insert_one({"x": 1})
async with self.client.start_session().bind() as s1:
await coll.find_one()
self.assertTrue(s1.has_ended)
async with self.client.start_session().bind(end_session=False) as s2:
await coll.find_one()
self.assertFalse(s2.has_ended)
await s2.end_session()
async def test_getmore_preserves_lsid_after_session_support_lost(self):
listener = OvertCommandListener()
client = await self.async_rs_or_single_client(event_listeners=[listener], maxPoolSize=1)
coll = client.pymongo_test.test
await coll.drop()
await coll.insert_many([{"x": i} for i in range(10)])
self.addAsyncCleanup(coll.drop)
async with client.start_session() as s:
cursor = coll.find({}, batch_size=2, session=s)
await anext(cursor)
find_event = next(e for e in listener.started_events if e.command_name == "find")
lsid = find_event.command["lsid"]
# Simulate a node stepping down: mark idle connections as not supporting sessions.
for server in client._topology._servers.values():
for conn in server.pool.conns:
conn.supports_sessions = False
listener.reset()
await cursor.to_list()
getmore_events = [e for e in listener.started_events if e.command_name == "getMore"]
self.assertGreater(len(getmore_events), 0, "expected at least one getMore command")
for event in getmore_events:
self.assertIn(
"lsid", event.command, "getMore must include lsid when session is materialized"
)
self.assertEqual(
lsid, event.command["lsid"], "getMore lsid must match the session lsid from find"
)
class TestCausalConsistency(AsyncUnitTest):
listener: SessionTestListener

View File

@ -48,19 +48,11 @@ from pymongo.write_concern import WriteConcern
_HAVE_PYOPENSSL = False
try:
# All of these must be available to use PyOpenSSL
import OpenSSL
import requests
import service_identity
# Ensure service_identity>=18.1 is installed
from service_identity.pyopenssl import verify_ip_address
from pymongo.ocsp_support import _load_trusted_ca_certs
from pymongo import pyopenssl_context
_HAVE_PYOPENSSL = True
except ImportError:
_load_trusted_ca_certs = None # type: ignore
pass
if HAVE_SSL:
@ -136,11 +128,6 @@ class TestClientSSL(AsyncPyMongoTestCase):
def test_use_pyopenssl_when_available(self):
self.assertTrue(HAVE_PYSSL)
@unittest.skipUnless(_HAVE_PYOPENSSL, "Cannot test without PyOpenSSL")
def test_load_trusted_ca_certs(self):
trusted_ca_certs = _load_trusted_ca_certs(CA_BUNDLE_PEM)
self.assertEqual(2, len(trusted_ca_certs))
class TestSSL(AsyncIntegrationTest):
saved_port: int

View File

@ -16,9 +16,13 @@
from __future__ import annotations
import asyncio
import random
import sys
import time
from io import BytesIO
from unittest.mock import patch
import pymongo
from gridfs.asynchronous.grid_file import AsyncGridFS, AsyncGridFSBucket
from pymongo.asynchronous.pool import PoolState
from pymongo.server_selectors import writable_server_selector
@ -45,7 +49,9 @@ from pymongo.errors import (
CollectionInvalid,
ConfigurationError,
ConnectionFailure,
ExecutionTimeout,
InvalidOperation,
NetworkTimeout,
OperationFailure,
)
from pymongo.operations import IndexModel, InsertOne
@ -434,7 +440,7 @@ class TestTransactionsConvenientAPI(AsyncTransactionsBase):
await self.configure_fail_point(client, command_args)
@async_client_context.require_transactions
async def test_callback_raises_custom_error(self):
async def test_1_callback_raises_custom_error(self):
class _MyException(Exception):
pass
@ -446,7 +452,7 @@ class TestTransactionsConvenientAPI(AsyncTransactionsBase):
await s.with_transaction(raise_error)
@async_client_context.require_transactions
async def test_callback_returns_value(self):
async def test_2_callback_returns_value(self):
async def callback(_):
return "Foo"
@ -474,7 +480,7 @@ class TestTransactionsConvenientAPI(AsyncTransactionsBase):
self.assertEqual(await s.with_transaction(callback), "Foo")
@async_client_context.require_transactions
async def test_callback_not_retried_after_timeout(self):
async def test_3_1_callback_not_retried_after_timeout(self):
listener = OvertCommandListener()
client = await self.async_rs_client(event_listeners=[listener])
coll = client[self.db.name].test
@ -495,14 +501,16 @@ class TestTransactionsConvenientAPI(AsyncTransactionsBase):
listener.reset()
async with client.start_session() as s:
with PatchSessionTimeout(0):
with self.assertRaises(OperationFailure):
with self.assertRaises(NetworkTimeout) as context:
await s.with_transaction(callback)
self.assertEqual(listener.started_command_names(), ["insert", "abortTransaction"])
# Assert that the timeout error has the same labels as the error it wraps.
self.assertTrue(context.exception.has_error_label("TransientTransactionError"))
@async_client_context.require_test_commands
@async_client_context.require_transactions
async def test_callback_not_retried_after_commit_timeout(self):
async def test_3_2_callback_not_retried_after_commit_timeout(self):
listener = OvertCommandListener()
client = await self.async_rs_client(event_listeners=[listener])
coll = client[self.db.name].test
@ -529,14 +537,16 @@ class TestTransactionsConvenientAPI(AsyncTransactionsBase):
async with client.start_session() as s:
with PatchSessionTimeout(0):
with self.assertRaises(OperationFailure):
with self.assertRaises(NetworkTimeout) as context:
await s.with_transaction(callback)
self.assertEqual(listener.started_command_names(), ["insert", "commitTransaction"])
# Assert that the timeout error has the same labels as the error it wraps.
self.assertTrue(context.exception.has_error_label("TransientTransactionError"))
@async_client_context.require_test_commands
@async_client_context.require_transactions
async def test_commit_not_retried_after_timeout(self):
async def test_3_3_commit_not_retried_after_timeout(self):
listener = OvertCommandListener()
client = await self.async_rs_client(event_listeners=[listener])
coll = client[self.db.name].test
@ -560,7 +570,7 @@ class TestTransactionsConvenientAPI(AsyncTransactionsBase):
async with client.start_session() as s:
with PatchSessionTimeout(0):
with self.assertRaises(ConnectionFailure):
with self.assertRaises(NetworkTimeout) as context:
await s.with_transaction(callback)
# One insert for the callback and two commits (includes the automatic
@ -568,6 +578,40 @@ class TestTransactionsConvenientAPI(AsyncTransactionsBase):
self.assertEqual(
listener.started_command_names(), ["insert", "commitTransaction", "commitTransaction"]
)
# Assert that the timeout error has the same labels as the error it wraps.
self.assertTrue(context.exception.has_error_label("UnknownTransactionCommitResult"))
@async_client_context.require_transactions
async def test_callback_not_retried_after_csot_timeout(self):
listener = OvertCommandListener()
client = await self.async_rs_client(event_listeners=[listener])
coll = client[self.db.name].test
async def callback(session):
await coll.insert_one({}, session=session)
err: dict = {
"ok": 0,
"errmsg": "Transaction 7819 has been aborted.",
"code": 251,
"codeName": "NoSuchTransaction",
"errorLabels": ["TransientTransactionError"],
}
raise OperationFailure(err["errmsg"], err["code"], err)
# Create the collection.
await coll.insert_one({})
listener.reset()
async with client.start_session() as s:
with pymongo.timeout(1.0):
with self.assertRaises(ExecutionTimeout):
await s.with_transaction(callback)
# At least two attempts: the original and one or more retries.
inserts = len([x for x in listener.started_command_names() if x == "insert"])
aborts = len([x for x in listener.started_command_names() if x == "abortTransaction"])
self.assertGreaterEqual(inserts, 2)
self.assertGreaterEqual(aborts, 2)
# Tested here because this supports Motor's convenient transactions API.
@async_client_context.require_transactions
@ -606,6 +650,63 @@ class TestTransactionsConvenientAPI(AsyncTransactionsBase):
await s.with_transaction(callback)
self.assertFalse(s.in_transaction)
@async_client_context.require_test_commands
@async_client_context.require_transactions
async def test_4_retry_backoff_is_enforced(self):
client = async_client_context.client
coll = client[self.db.name].test
end = start = no_backoff_time = 0
# Make random.random always return 0 (no backoff)
with patch.object(random, "random", return_value=0):
# set fail point to trigger transaction failure and trigger backoff
await self.set_fail_point(
{
"configureFailPoint": "failCommand",
"mode": {"times": 13},
"data": {
"failCommands": ["commitTransaction"],
"errorCode": 251,
},
}
)
self.addAsyncCleanup(
self.set_fail_point, {"configureFailPoint": "failCommand", "mode": "off"}
)
async def callback(session):
await coll.insert_one({}, session=session)
start = time.monotonic()
async with self.client.start_session() as s:
await s.with_transaction(callback)
end = time.monotonic()
no_backoff_time = end - start
# Make random.random always return 1 (max backoff)
with patch.object(random, "random", return_value=1):
# set fail point to trigger transaction failure and trigger backoff
await self.set_fail_point(
{
"configureFailPoint": "failCommand",
"mode": {
"times": 13
}, # sufficiently high enough such that the time effect of backoff is noticeable
"data": {
"failCommands": ["commitTransaction"],
"errorCode": 251,
},
}
)
self.addAsyncCleanup(
self.set_fail_point, {"configureFailPoint": "failCommand", "mode": "off"}
)
start = time.monotonic()
async with self.client.start_session() as s:
await s.with_transaction(callback)
end = time.monotonic()
self.assertLess(abs(end - start - (no_backoff_time + 2.2)), 1) # sum of 13 backoffs is 2.2
class TestOptionsInsideTransactionProse(AsyncTransactionsBase):
@async_client_context.require_transactions

View File

@ -1464,11 +1464,6 @@ class UnifiedSpecTestMixinV1(AsyncIntegrationTest):
self.assertListEqual(sorted_expected_documents, actual_documents)
async def run_scenario(self, spec, uri=None):
# Kill all sessions before and after each test to prevent an open
# transaction (from a test failure) from blocking collection/database
# operations during test set up and tear down.
await self.kill_all_sessions()
# Handle flaky tests.
flaky_tests = [
("PYTHON-5170", ".*test_discovery_and_monitoring.*"),
@ -1504,6 +1499,15 @@ class UnifiedSpecTestMixinV1(AsyncIntegrationTest):
if skip_reason is not None:
raise unittest.SkipTest(f"{skip_reason}")
# Kill all sessions after each test with transactions to prevent an open
# transaction (from a test failure) from blocking collection/database
# operations during test set up and tear down.
for op in spec["operations"]:
name = op["name"]
if name == "startTransaction" or name == "withTransaction":
self.addAsyncCleanup(self.kill_all_sessions)
break
# process createEntities
self._uri = uri
self.entity_map = EntityMapUtil(self)

View File

@ -16,43 +16,13 @@
from __future__ import annotations
import asyncio
import functools
import os
import time
import unittest
from collections import abc
from inspect import iscoroutinefunction
from test.asynchronous import AsyncIntegrationTest, async_client_context, client_knobs
from test.asynchronous import async_client_context
from test.asynchronous.helpers import ConcurrentRunner
from test.utils_shared import (
CMAPListener,
CompareType,
EventListener,
OvertCommandListener,
ScenarioDict,
ServerAndTopologyEventListener,
camel_to_snake,
camel_to_snake_args,
parse_spec_options,
prepare_spec_arguments,
)
from typing import List
from test.utils_shared import ScenarioDict
from bson import ObjectId, decode, encode, json_util
from bson.binary import Binary
from bson.int64 import Int64
from bson.son import SON
from gridfs import GridFSBucket
from gridfs.asynchronous.grid_file import AsyncGridFSBucket
from pymongo.asynchronous import client_session
from pymongo.asynchronous.command_cursor import AsyncCommandCursor
from pymongo.asynchronous.cursor import AsyncCursor
from pymongo.errors import AutoReconnect, BulkWriteError, OperationFailure, PyMongoError
from bson import json_util
from pymongo.lock import _async_cond_wait, _async_create_condition, _async_create_lock
from pymongo.read_concern import ReadConcern
from pymongo.read_preferences import ReadPreference
from pymongo.results import BulkWriteResult, _WriteResult
from pymongo.write_concern import WriteConcern
_IS_SYNC = False
@ -219,597 +189,3 @@ class AsyncSpecTestCreator:
self._create_tests()
else:
asyncio.run(self._create_tests())
class AsyncSpecRunner(AsyncIntegrationTest):
mongos_clients: List
knobs: client_knobs
listener: EventListener
async def asyncSetUp(self) -> None:
await super().asyncSetUp()
self.mongos_clients = []
# Speed up the tests by decreasing the heartbeat frequency.
self.knobs = client_knobs(heartbeat_frequency=0.1, min_heartbeat_interval=0.1)
self.knobs.enable()
self.targets = {}
self.listener = None # type: ignore
self.pool_listener = None
self.server_listener = None
self.maxDiff = None
async def asyncTearDown(self) -> None:
self.knobs.disable()
async def set_fail_point(self, command_args):
clients = self.mongos_clients if self.mongos_clients else [self.client]
for client in clients:
await self.configure_fail_point(client, command_args)
async def targeted_fail_point(self, session, fail_point):
"""Run the targetedFailPoint test operation.
Enable the fail point on the session's pinned mongos.
"""
clients = {c.address: c for c in self.mongos_clients}
client = clients[session._pinned_address]
await self.configure_fail_point(client, fail_point)
self.addAsyncCleanup(self.set_fail_point, {"mode": "off"})
def assert_session_pinned(self, session):
"""Run the assertSessionPinned test operation.
Assert that the given session is pinned.
"""
self.assertIsNotNone(session._transaction.pinned_address)
def assert_session_unpinned(self, session):
"""Run the assertSessionUnpinned test operation.
Assert that the given session is not pinned.
"""
self.assertIsNone(session._pinned_address)
self.assertIsNone(session._transaction.pinned_address)
async def assert_collection_exists(self, database, collection):
"""Run the assertCollectionExists test operation."""
db = self.client[database]
self.assertIn(collection, await db.list_collection_names())
async def assert_collection_not_exists(self, database, collection):
"""Run the assertCollectionNotExists test operation."""
db = self.client[database]
self.assertNotIn(collection, await db.list_collection_names())
async def assert_index_exists(self, database, collection, index):
"""Run the assertIndexExists test operation."""
coll = self.client[database][collection]
self.assertIn(index, [doc["name"] async for doc in await coll.list_indexes()])
async def assert_index_not_exists(self, database, collection, index):
"""Run the assertIndexNotExists test operation."""
coll = self.client[database][collection]
self.assertNotIn(index, [doc["name"] async for doc in await coll.list_indexes()])
async def wait(self, ms):
"""Run the "wait" test operation."""
await asyncio.sleep(ms / 1000.0)
def assertErrorLabelsContain(self, exc, expected_labels):
labels = [l for l in expected_labels if exc.has_error_label(l)]
self.assertEqual(labels, expected_labels)
def assertErrorLabelsOmit(self, exc, omit_labels):
for label in omit_labels:
self.assertFalse(
exc.has_error_label(label), msg=f"error labels should not contain {label}"
)
async def kill_all_sessions(self):
clients = self.mongos_clients if self.mongos_clients else [self.client]
for client in clients:
try:
await client.admin.command("killAllSessions", [])
except (OperationFailure, AutoReconnect):
# "operation was interrupted" by killing the command's
# own session.
# On 8.0+ killAllSessions sometimes returns a network error.
pass
def check_command_result(self, expected_result, result):
# Only compare the keys in the expected result.
filtered_result = {}
for key in expected_result:
try:
filtered_result[key] = result[key]
except KeyError:
pass
self.assertEqual(filtered_result, expected_result)
# TODO: factor the following function with test_crud.py.
def check_result(self, expected_result, result):
if isinstance(result, _WriteResult):
for res in expected_result:
prop = camel_to_snake(res)
# SPEC-869: Only BulkWriteResult has upserted_count.
if prop == "upserted_count" and not isinstance(result, BulkWriteResult):
if result.upserted_id is not None:
upserted_count = 1
else:
upserted_count = 0
self.assertEqual(upserted_count, expected_result[res], prop)
elif prop == "inserted_ids":
# BulkWriteResult does not have inserted_ids.
if isinstance(result, BulkWriteResult):
self.assertEqual(len(expected_result[res]), result.inserted_count)
else:
# InsertManyResult may be compared to [id1] from the
# crud spec or {"0": id1} from the retryable write spec.
ids = expected_result[res]
if isinstance(ids, dict):
ids = [ids[str(i)] for i in range(len(ids))]
self.assertEqual(ids, result.inserted_ids, prop)
elif prop == "upserted_ids":
# Convert indexes from strings to integers.
ids = expected_result[res]
expected_ids = {}
for str_index in ids:
expected_ids[int(str_index)] = ids[str_index]
self.assertEqual(expected_ids, result.upserted_ids, prop)
else:
self.assertEqual(getattr(result, prop), expected_result[res], prop)
return True
else:
def _helper(expected_result, result):
if isinstance(expected_result, abc.Mapping):
for i in expected_result.keys():
self.assertEqual(expected_result[i], result[i])
elif isinstance(expected_result, list):
for i, k in zip(expected_result, result):
_helper(i, k)
else:
self.assertEqual(expected_result, result)
_helper(expected_result, result)
return None
def get_object_name(self, op):
"""Allow subclasses to override handling of 'object'
Transaction spec says 'object' is required.
"""
return op["object"]
@staticmethod
def parse_options(opts):
return parse_spec_options(opts)
async def run_operation(self, sessions, collection, operation):
original_collection = collection
name = camel_to_snake(operation["name"])
if name == "run_command":
name = "command"
elif name == "download_by_name":
name = "open_download_stream_by_name"
elif name == "download":
name = "open_download_stream"
elif name == "map_reduce":
self.skipTest("PyMongo does not support mapReduce")
elif name == "count":
self.skipTest("PyMongo does not support count")
database = collection.database
collection = database.get_collection(collection.name)
if "collectionOptions" in operation:
collection = collection.with_options(
**self.parse_options(operation["collectionOptions"])
)
object_name = self.get_object_name(operation)
if object_name == "gridfsbucket":
# Only create the GridFSBucket when we need it (for the gridfs
# retryable reads tests).
obj = AsyncGridFSBucket(database, bucket_name=collection.name)
else:
objects = {
"client": database.client,
"database": database,
"collection": collection,
"testRunner": self,
}
objects.update(sessions)
obj = objects[object_name]
# Combine arguments with options and handle special cases.
arguments = operation.get("arguments", {})
arguments.update(arguments.pop("options", {}))
self.parse_options(arguments)
cmd = getattr(obj, name)
with_txn_callback = functools.partial(
self.run_operations, sessions, original_collection, in_with_transaction=True
)
prepare_spec_arguments(operation, arguments, name, sessions, with_txn_callback)
if name == "run_on_thread":
args = {"sessions": sessions, "collection": collection}
args.update(arguments)
arguments = args
if not _IS_SYNC and iscoroutinefunction(cmd):
result = await cmd(**dict(arguments))
else:
result = cmd(**dict(arguments))
# Cleanup open change stream cursors.
if name == "watch":
self.addAsyncCleanup(result.close)
if name == "aggregate":
if arguments["pipeline"] and "$out" in arguments["pipeline"][-1]:
# Read from the primary to ensure causal consistency.
out = collection.database.get_collection(
arguments["pipeline"][-1]["$out"], read_preference=ReadPreference.PRIMARY
)
return out.find()
if "download" in name:
result = Binary(result.read())
if isinstance(result, AsyncCursor) or isinstance(result, AsyncCommandCursor):
return await result.to_list()
return result
def allowable_errors(self, op):
"""Allow encryption spec to override expected error classes."""
return (PyMongoError,)
async def _run_op(self, sessions, collection, op, in_with_transaction):
expected_result = op.get("result")
if expect_error(op):
with self.assertRaises(self.allowable_errors(op), msg=op["name"]) as context:
await self.run_operation(sessions, collection, op.copy())
exc = context.exception
if expect_error_message(expected_result):
if isinstance(exc, BulkWriteError):
errmsg = str(exc.details).lower()
else:
errmsg = str(exc).lower()
self.assertIn(expected_result["errorContains"].lower(), errmsg)
if expect_error_code(expected_result):
self.assertEqual(expected_result["errorCodeName"], exc.details.get("codeName"))
if expect_error_labels_contain(expected_result):
self.assertErrorLabelsContain(exc, expected_result["errorLabelsContain"])
if expect_error_labels_omit(expected_result):
self.assertErrorLabelsOmit(exc, expected_result["errorLabelsOmit"])
if expect_timeout_error(expected_result):
self.assertIsInstance(exc, PyMongoError)
if not exc.timeout:
# Re-raise the exception for better diagnostics.
raise exc
# Reraise the exception if we're in the with_transaction
# callback.
if in_with_transaction:
raise context.exception
else:
result = await self.run_operation(sessions, collection, op.copy())
if "result" in op:
if op["name"] == "runCommand":
self.check_command_result(expected_result, result)
else:
self.check_result(expected_result, result)
async def run_operations(self, sessions, collection, ops, in_with_transaction=False):
for op in ops:
await self._run_op(sessions, collection, op, in_with_transaction)
# TODO: factor with test_command_monitoring.py
def check_events(self, test, listener, session_ids):
events = listener.started_events
if not len(test["expectations"]):
return
# Give a nicer message when there are missing or extra events
cmds = decode_raw([event.command for event in events])
self.assertEqual(len(events), len(test["expectations"]), cmds)
for i, expectation in enumerate(test["expectations"]):
event_type = next(iter(expectation))
event = events[i]
# The tests substitute 42 for any number other than 0.
if event.command_name == "getMore" and event.command["getMore"]:
event.command["getMore"] = Int64(42)
elif event.command_name == "killCursors":
event.command["cursors"] = [Int64(42)]
elif event.command_name == "update":
# TODO: remove this once PYTHON-1744 is done.
# Add upsert and multi fields back into expectations.
updates = expectation[event_type]["command"]["updates"]
for update in updates:
update.setdefault("upsert", False)
update.setdefault("multi", False)
# Replace afterClusterTime: 42 with actual afterClusterTime.
expected_cmd = expectation[event_type]["command"]
expected_read_concern = expected_cmd.get("readConcern")
if expected_read_concern is not None:
time = expected_read_concern.get("afterClusterTime")
if time == 42:
actual_time = event.command.get("readConcern", {}).get("afterClusterTime")
if actual_time is not None:
expected_read_concern["afterClusterTime"] = actual_time
recovery_token = expected_cmd.get("recoveryToken")
if recovery_token == 42:
expected_cmd["recoveryToken"] = CompareType(dict)
# Replace lsid with a name like "session0" to match test.
if "lsid" in event.command:
for name, lsid in session_ids.items():
if event.command["lsid"] == lsid:
event.command["lsid"] = name
break
for attr, expected in expectation[event_type].items():
actual = getattr(event, attr)
expected = wrap_types(expected)
if isinstance(expected, dict):
for key, val in expected.items():
if val is None:
if key in actual:
self.fail(f"Unexpected key [{key}] in {actual!r}")
elif key not in actual:
self.fail(f"Expected key [{key}] in {actual!r}")
else:
self.assertEqual(
val, decode_raw(actual[key]), f"Key [{key}] in {actual}"
)
else:
self.assertEqual(actual, expected)
def maybe_skip_scenario(self, test):
if test.get("skipReason"):
self.skipTest(test.get("skipReason"))
def get_scenario_db_name(self, scenario_def):
"""Allow subclasses to override a test's database name."""
return scenario_def["database_name"]
def get_scenario_coll_name(self, scenario_def):
"""Allow subclasses to override a test's collection name."""
return scenario_def["collection_name"]
def get_outcome_coll_name(self, outcome, collection):
"""Allow subclasses to override outcome collection."""
return collection.name
async def run_test_ops(self, sessions, collection, test):
"""Added to allow retryable writes spec to override a test's
operation.
"""
await self.run_operations(sessions, collection, test["operations"])
def parse_client_options(self, opts):
"""Allow encryption spec to override a clientOptions parsing."""
return opts
async def setup_scenario(self, scenario_def):
"""Allow specs to override a test's setup."""
db_name = self.get_scenario_db_name(scenario_def)
coll_name = self.get_scenario_coll_name(scenario_def)
documents = scenario_def["data"]
# Setup the collection with as few majority writes as possible.
db = async_client_context.client.get_database(db_name)
coll_exists = bool(await db.list_collection_names(filter={"name": coll_name}))
if coll_exists:
await db[coll_name].delete_many({})
# Only use majority wc only on the final write.
wc = WriteConcern(w="majority")
if documents:
db.get_collection(coll_name, write_concern=wc).insert_many(documents)
elif not coll_exists:
# Ensure collection exists.
await db.create_collection(coll_name, write_concern=wc)
async def run_scenario(self, scenario_def, test):
self.maybe_skip_scenario(test)
# Kill all sessions before and after each test to prevent an open
# transaction (from a test failure) from blocking collection/database
# operations during test set up and tear down.
await self.kill_all_sessions()
self.addAsyncCleanup(self.kill_all_sessions)
await self.setup_scenario(scenario_def)
database_name = self.get_scenario_db_name(scenario_def)
collection_name = self.get_scenario_coll_name(scenario_def)
# SPEC-1245 workaround StaleDbVersion on distinct
for c in self.mongos_clients:
await c[database_name][collection_name].distinct("x")
# Configure the fail point before creating the client.
if "failPoint" in test:
fp = test["failPoint"]
await self.set_fail_point(fp)
self.addAsyncCleanup(
self.set_fail_point, {"configureFailPoint": fp["configureFailPoint"], "mode": "off"}
)
listener = OvertCommandListener()
pool_listener = CMAPListener()
server_listener = ServerAndTopologyEventListener()
# Create a new client, to avoid interference from pooled sessions.
client_options = self.parse_client_options(test["clientOptions"])
use_multi_mongos = test["useMultipleMongoses"]
host = None
if use_multi_mongos:
if async_client_context.load_balancer:
host = async_client_context.MULTI_MONGOS_LB_URI
elif async_client_context.is_mongos:
host = async_client_context.mongos_seeds()
client = await self.async_rs_client(
h=host, event_listeners=[listener, pool_listener, server_listener], **client_options
)
self.scenario_client = client
self.listener = listener
self.pool_listener = pool_listener
self.server_listener = server_listener
# Create session0 and session1.
sessions = {}
session_ids = {}
for i in range(2):
# Don't attempt to create sessions if they are not supported by
# the running server version.
if not async_client_context.sessions_enabled:
break
session_name = "session%d" % i
opts = camel_to_snake_args(test["sessionOptions"][session_name])
if "default_transaction_options" in opts:
txn_opts = self.parse_options(opts["default_transaction_options"])
txn_opts = client_session.TransactionOptions(**txn_opts)
opts["default_transaction_options"] = txn_opts
s = client.start_session(**dict(opts))
sessions[session_name] = s
# Store lsid so we can access it after end_session, in check_events.
session_ids[session_name] = s.session_id
self.addAsyncCleanup(end_sessions, sessions)
collection = client[database_name][collection_name]
await self.run_test_ops(sessions, collection, test)
await end_sessions(sessions)
self.check_events(test, listener, session_ids)
# Disable fail points.
if "failPoint" in test:
fp = test["failPoint"]
await self.set_fail_point(
{"configureFailPoint": fp["configureFailPoint"], "mode": "off"}
)
# Assert final state is expected.
outcome = test["outcome"]
expected_c = outcome.get("collection")
if expected_c is not None:
outcome_coll_name = self.get_outcome_coll_name(outcome, collection)
# Read from the primary with local read concern to ensure causal
# consistency.
outcome_coll = async_client_context.client[collection.database.name].get_collection(
outcome_coll_name,
read_preference=ReadPreference.PRIMARY,
read_concern=ReadConcern("local"),
)
actual_data = await outcome_coll.find(sort=[("_id", 1)]).to_list()
# The expected data needs to be the left hand side here otherwise
# CompareType(Binary) doesn't work.
self.assertEqual(wrap_types(expected_c["data"]), actual_data)
def expect_any_error(op):
if isinstance(op, dict):
return op.get("error")
return False
def expect_error_message(expected_result):
if isinstance(expected_result, dict):
return isinstance(expected_result["errorContains"], str)
return False
def expect_error_code(expected_result):
if isinstance(expected_result, dict):
return expected_result["errorCodeName"]
return False
def expect_error_labels_contain(expected_result):
if isinstance(expected_result, dict):
return expected_result["errorLabelsContain"]
return False
def expect_error_labels_omit(expected_result):
if isinstance(expected_result, dict):
return expected_result["errorLabelsOmit"]
return False
def expect_timeout_error(expected_result):
if isinstance(expected_result, dict):
return expected_result["isTimeoutError"]
return False
def expect_error(op):
expected_result = op.get("result")
return (
expect_any_error(op)
or expect_error_message(expected_result)
or expect_error_code(expected_result)
or expect_error_labels_contain(expected_result)
or expect_error_labels_omit(expected_result)
or expect_timeout_error(expected_result)
)
async def end_sessions(sessions):
for s in sessions.values():
# Aborts the transaction if it's open.
await s.end_session()
def decode_raw(val):
"""Decode RawBSONDocuments in the given container."""
if isinstance(val, (list, abc.Mapping)):
return decode(encode({"v": val}))["v"]
return val
TYPES = {
"binData": Binary,
"long": Int64,
"int": int,
"string": str,
"objectId": ObjectId,
"object": dict,
"array": list,
}
def wrap_types(val):
"""Support $$type assertion in command results."""
if isinstance(val, list):
return [wrap_types(v) for v in val]
if isinstance(val, abc.Mapping):
typ = val.get("$$type")
if typ:
if isinstance(typ, str):
types = TYPES[typ]
else:
types = tuple(TYPES[t] for t in typ)
return CompareType(types)
d = {}
for key in val:
d[key] = wrap_types(val[key])
return d
return val

View File

@ -42,6 +42,91 @@
}
],
"tests": [
{
"description": "disambiguatedPaths is not present when showExpandedEvents is false/unset",
"runOnRequirements": [
{
"minServerVersion": "6.1.0",
"maxServerVersion": "8.1.99",
"topologies": [
"replicaset",
"load-balanced",
"sharded"
],
"serverless": "forbid"
},
{
"minServerVersion": "8.2.1",
"topologies": [
"replicaset",
"load-balanced",
"sharded"
],
"serverless": "forbid"
}
],
"operations": [
{
"name": "insertOne",
"object": "collection0",
"arguments": {
"document": {
"_id": 1,
"a": {
"1": 1
}
}
}
},
{
"name": "createChangeStream",
"object": "collection0",
"arguments": {
"pipeline": []
},
"saveResultAsEntity": "changeStream0"
},
{
"name": "updateOne",
"object": "collection0",
"arguments": {
"filter": {
"_id": 1
},
"update": {
"$set": {
"a.1": 2
}
}
}
},
{
"name": "iterateUntilDocumentOrError",
"object": "changeStream0",
"expectResult": {
"operationType": "update",
"ns": {
"db": "database0",
"coll": "collection0"
},
"updateDescription": {
"updatedFields": {
"$$exists": true
},
"removedFields": {
"$$exists": true
},
"truncatedArrays": {
"$$exists": true
},
"disambiguatedPaths": {
"$$exists": false
}
}
}
}
]
},
{
"description": "disambiguatedPaths is present on updateDescription when an ambiguous path is present",
"operations": [

View File

@ -63,47 +63,6 @@
}
]
},
{
"description": "nsType is present when creating timeseries",
"operations": [
{
"name": "dropCollection",
"object": "database0",
"arguments": {
"collection": "foo"
}
},
{
"name": "createChangeStream",
"object": "database0",
"arguments": {
"pipeline": [],
"showExpandedEvents": true
},
"saveResultAsEntity": "changeStream0"
},
{
"name": "createCollection",
"object": "database0",
"arguments": {
"collection": "foo",
"timeseries": {
"timeField": "time",
"metaField": "meta",
"granularity": "minutes"
}
}
},
{
"name": "iterateUntilDocumentOrError",
"object": "changeStream0",
"expectResult": {
"operationType": "create",
"nsType": "timeseries"
}
}
]
},
{
"description": "nsType is present when creating views",
"operations": [

View File

@ -0,0 +1,111 @@
{
"description": "tests that connections are returned to the pool on retry attempts for overload errors",
"schemaVersion": "1.3",
"runOnRequirements": [
{
"minServerVersion": "4.4",
"topologies": [
"replicaset",
"sharded",
"load-balanced"
]
}
],
"createEntities": [
{
"client": {
"id": "client",
"useMultipleMongoses": false,
"observeEvents": [
"connectionCheckedOutEvent",
"connectionCheckedInEvent"
]
}
},
{
"client": {
"id": "fail_point_client",
"useMultipleMongoses": false
}
},
{
"database": {
"id": "database",
"client": "client",
"databaseName": "backpressure-connection-checkin"
}
},
{
"collection": {
"id": "collection",
"database": "database",
"collectionName": "coll"
}
}
],
"tests": [
{
"description": "overload error retry attempts return connections to the pool",
"operations": [
{
"name": "failPoint",
"object": "testRunner",
"arguments": {
"client": "fail_point_client",
"failPoint": {
"configureFailPoint": "failCommand",
"mode": "alwaysOn",
"data": {
"failCommands": [
"find"
],
"errorLabels": [
"RetryableError",
"SystemOverloadedError"
],
"errorCode": 2
}
}
}
},
{
"name": "find",
"object": "collection",
"arguments": {
"filter": {}
},
"expectError": {
"isError": true,
"isClientError": false
}
}
],
"expectEvents": [
{
"client": "client",
"eventType": "cmap",
"events": [
{
"connectionCheckedOutEvent": {}
},
{
"connectionCheckedInEvent": {}
},
{
"connectionCheckedOutEvent": {}
},
{
"connectionCheckedInEvent": {}
},
{
"connectionCheckedOutEvent": {}
},
{
"connectionCheckedInEvent": {}
}
]
}
]
}
]
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,253 @@
{
"description": "getMore-retried-backpressure",
"schemaVersion": "1.3",
"runOnRequirements": [
{
"minServerVersion": "4.4"
}
],
"createEntities": [
{
"client": {
"id": "client0",
"useMultipleMongoses": false,
"observeEvents": [
"commandStartedEvent",
"commandFailedEvent",
"commandSucceededEvent"
]
}
},
{
"client": {
"id": "failPointClient",
"useMultipleMongoses": false
}
},
{
"database": {
"id": "db",
"client": "client0",
"databaseName": "default"
}
},
{
"collection": {
"id": "coll",
"database": "db",
"collectionName": "default"
}
}
],
"initialData": [
{
"databaseName": "default",
"collectionName": "default",
"documents": [
{
"a": 1
},
{
"a": 2
},
{
"a": 3
}
]
}
],
"tests": [
{
"description": "getMores are retried",
"operations": [
{
"name": "failPoint",
"object": "testRunner",
"arguments": {
"client": "failPointClient",
"failPoint": {
"configureFailPoint": "failCommand",
"mode": {
"times": 2
},
"data": {
"failCommands": [
"getMore"
],
"errorLabels": [
"RetryableError",
"SystemOverloadedError"
],
"errorCode": 2
}
}
}
},
{
"name": "find",
"object": "coll",
"arguments": {
"batchSize": 2,
"filter": {},
"sort": {
"a": 1
}
},
"expectResult": [
{
"a": 1
},
{
"a": 2
},
{
"a": 3
}
]
}
],
"expectEvents": [
{
"client": "client0",
"events": [
{
"commandStartedEvent": {
"commandName": "find"
}
},
{
"commandSucceededEvent": {
"commandName": "find"
}
},
{
"commandStartedEvent": {
"commandName": "getMore"
}
},
{
"commandFailedEvent": {
"commandName": "getMore"
}
},
{
"commandStartedEvent": {
"commandName": "getMore"
}
},
{
"commandFailedEvent": {
"commandName": "getMore"
}
},
{
"commandStartedEvent": {
"commandName": "getMore"
}
},
{
"commandSucceededEvent": {
"commandName": "getMore"
}
}
]
}
]
},
{
"description": "getMores are retried maxAttempts=2 times",
"operations": [
{
"name": "failPoint",
"object": "testRunner",
"arguments": {
"client": "failPointClient",
"failPoint": {
"configureFailPoint": "failCommand",
"mode": "alwaysOn",
"data": {
"failCommands": [
"getMore"
],
"errorLabels": [
"RetryableError",
"SystemOverloadedError"
],
"errorCode": 2
}
}
}
},
{
"name": "find",
"arguments": {
"batchSize": 2,
"filter": {}
},
"object": "coll",
"expectError": {
"isError": true,
"isClientError": false
}
}
],
"expectEvents": [
{
"client": "client0",
"events": [
{
"commandStartedEvent": {
"commandName": "find"
}
},
{
"commandSucceededEvent": {
"commandName": "find"
}
},
{
"commandStartedEvent": {
"commandName": "getMore"
}
},
{
"commandFailedEvent": {
"commandName": "getMore"
}
},
{
"commandStartedEvent": {
"commandName": "getMore"
}
},
{
"commandFailedEvent": {
"commandName": "getMore"
}
},
{
"commandStartedEvent": {
"commandName": "getMore"
}
},
{
"commandFailedEvent": {
"commandName": "getMore"
}
},
{
"commandStartedEvent": {
"commandName": "killCursors"
}
},
{
"commandSucceededEvent": {
"commandName": "killCursors"
}
}
]
}
]
}
]
}

View File

@ -4,6 +4,7 @@
"runOnRequirements": [
{
"minServerVersion": "8.2.0",
"maxServerVersion": "8.99.99",
"topologies": [
"replicaset",
"sharded",

View File

@ -4,6 +4,7 @@
"runOnRequirements": [
{
"minServerVersion": "8.2.0",
"maxServerVersion": "8.99.99",
"topologies": [
"replicaset",
"sharded",

View File

@ -4,6 +4,7 @@
"runOnRequirements": [
{
"minServerVersion": "8.2.0",
"maxServerVersion": "8.99.99",
"topologies": [
"replicaset",
"sharded",

View File

@ -126,7 +126,7 @@
],
"tests": [
{
"description": "Insert QE suffixPreview",
"description": "Insert QE substringPreview",
"operations": [
{
"name": "insertOne",

Some files were not shown because too many files have changed in this diff Show More