From 6e4f194b8e8fada69ff843c024c7a9693df90cd3 Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Thu, 8 Feb 2024 12:22:57 -0800 Subject: [PATCH] Final resync (#1516) --- .evergreen/run-tests.sh | 4 +- .github/workflows/release-python.yml | 4 +- bson/json_util.py | 126 ++++++++++++++------------- doc/changelog.rst | 3 + doc/contributors.rst | 1 + pymongo/encryption.py | 4 +- pyproject.toml | 2 + test/auth_oidc/test_auth_oidc.py | 2 +- test/performance/perf_test.py | 13 ++- test/test_encryption.py | 22 +++-- test/test_json_util.py | 17 ++-- test/test_monitor.py | 4 +- test/unified_format.py | 4 + 13 files changed, 120 insertions(+), 86 deletions(-) diff --git a/.evergreen/run-tests.sh b/.evergreen/run-tests.sh index d660cd1e0..0dd3daf77 100755 --- a/.evergreen/run-tests.sh +++ b/.evergreen/run-tests.sh @@ -246,7 +246,9 @@ fi PIP_QUIET=0 python -m pip list if [ -z "$GREEN_FRAMEWORK" ]; then - python -m pytest -v --durations=5 --maxfail=10 $TEST_ARGS + # Use --capture=tee-sys so pytest prints test output inline: + # https://docs.pytest.org/en/stable/how-to/capture-stdout-stderr.html + python -m pytest -v --capture=tee-sys --durations=5 --maxfail=10 $TEST_ARGS else python green_framework_test.py $GREEN_FRAMEWORK -v $TEST_ARGS fi diff --git a/.github/workflows/release-python.yml b/.github/workflows/release-python.yml index b21ca6415..1bf2160df 100644 --- a/.github/workflows/release-python.yml +++ b/.github/workflows/release-python.yml @@ -38,7 +38,7 @@ jobs: - [windows-2019, win32] python: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312"] include: - - buildplat: [macos-11, macosx_x86_64] + - buildplat: [macos-11, macosx_*] python: "cp37" - buildplat: [macos-11, macosx_*] python: "cp38" @@ -207,7 +207,7 @@ jobs: - name: Download all the dists uses: actions/download-artifact@v4 with: - name: all-dist-${{ github.head_ref || github.ref_name }} + name: all-dist-${{ github.job }} path: dist/ - name: Publish distribution 📦 to PyPI uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/bson/json_util.py b/bson/json_util.py index 4e80df463..6c5197c75 100644 --- a/bson/json_util.py +++ b/bson/json_util.py @@ -499,7 +499,11 @@ def loads(s: Union[str, bytes, bytearray], *args: Any, **kwargs: Any) -> Any: Accepts optional parameter `json_options`. See :class:`JSONOptions`. """ json_options = kwargs.pop("json_options", DEFAULT_JSON_OPTIONS) - kwargs["object_pairs_hook"] = lambda pairs: object_pairs_hook(pairs, json_options) + # Execution time optimization if json_options.document_class is dict + if json_options.document_class is dict: + kwargs["object_hook"] = lambda obj: object_hook(obj, json_options) + else: + kwargs["object_pairs_hook"] = lambda pairs: object_pairs_hook(pairs, json_options) return json.loads(s, *args, **kwargs) @@ -524,54 +528,17 @@ def object_pairs_hook( def object_hook(dct: Mapping[str, Any], json_options: JSONOptions = DEFAULT_JSON_OPTIONS) -> Any: - if "$oid" in dct: - return _parse_canonical_oid(dct) - if ( - isinstance(dct.get("$ref"), str) - and "$id" in dct - and isinstance(dct.get("$db"), (str, type(None))) - ): - return _parse_canonical_dbref(dct) - if "$date" in dct: - return _parse_canonical_datetime(dct, json_options) - if "$regex" in dct: - return _parse_legacy_regex(dct) - if "$minKey" in dct: - return _parse_canonical_minkey(dct) - if "$maxKey" in dct: - return _parse_canonical_maxkey(dct) - if "$binary" in dct: - if "$type" in dct: - return _parse_legacy_binary(dct, json_options) - else: - return _parse_canonical_binary(dct, json_options) - if "$code" in dct: - return _parse_canonical_code(dct) - if "$uuid" in dct: - return _parse_legacy_uuid(dct, json_options) - if "$undefined" in dct: - return None - if "$numberLong" in dct: - return _parse_canonical_int64(dct) - if "$timestamp" in dct: - tsp = dct["$timestamp"] - return Timestamp(tsp["t"], tsp["i"]) - if "$numberDecimal" in dct: - return _parse_canonical_decimal128(dct) - if "$dbPointer" in dct: - return _parse_canonical_dbpointer(dct) - if "$regularExpression" in dct: - return _parse_canonical_regex(dct) - if "$symbol" in dct: - return _parse_canonical_symbol(dct) - if "$numberInt" in dct: - return _parse_canonical_int32(dct) - if "$numberDouble" in dct: - return _parse_canonical_double(dct) + match = None + for k in dct: + if k in _PARSERS_SET: + match = k + break + if match: + return _PARSERS[match](dct, json_options) return dct -def _parse_legacy_regex(doc: Any) -> Any: +def _parse_legacy_regex(doc: Any, dummy0: Any) -> Any: pattern = doc["$regex"] # Check if this is the $regex query operator. if not isinstance(pattern, (str, bytes)): @@ -707,14 +674,14 @@ def _parse_canonical_datetime( return _millis_to_datetime(int(dtm), cast("CodecOptions[Any]", json_options)) -def _parse_canonical_oid(doc: Any) -> ObjectId: +def _parse_canonical_oid(doc: Any, dummy0: Any) -> ObjectId: """Decode a JSON ObjectId to bson.objectid.ObjectId.""" if len(doc) != 1: raise TypeError(f"Bad $oid, extra field(s): {doc}") return ObjectId(doc["$oid"]) -def _parse_canonical_symbol(doc: Any) -> str: +def _parse_canonical_symbol(doc: Any, dummy0: Any) -> str: """Decode a JSON symbol to Python string.""" symbol = doc["$symbol"] if len(doc) != 1: @@ -722,7 +689,7 @@ def _parse_canonical_symbol(doc: Any) -> str: return str(symbol) -def _parse_canonical_code(doc: Any) -> Code: +def _parse_canonical_code(doc: Any, dummy0: Any) -> Code: """Decode a JSON code to bson.code.Code.""" for key in doc: if key not in ("$code", "$scope"): @@ -730,7 +697,7 @@ def _parse_canonical_code(doc: Any) -> Code: return Code(doc["$code"], scope=doc.get("$scope")) -def _parse_canonical_regex(doc: Any) -> Regex[str]: +def _parse_canonical_regex(doc: Any, dummy0: Any) -> Regex[str]: """Decode a JSON regex to bson.regex.Regex.""" regex = doc["$regularExpression"] if len(doc) != 1: @@ -747,12 +714,18 @@ def _parse_canonical_regex(doc: Any) -> Regex[str]: return Regex(regex["pattern"], opts) -def _parse_canonical_dbref(doc: Any) -> DBRef: +def _parse_canonical_dbref(doc: Any, dummy0: Any) -> Any: """Decode a JSON DBRef to bson.dbref.DBRef.""" - return DBRef(doc.pop("$ref"), doc.pop("$id"), database=doc.pop("$db", None), **doc) + if ( + isinstance(doc.get("$ref"), str) + and "$id" in doc + and isinstance(doc.get("$db"), (str, type(None))) + ): + return DBRef(doc.pop("$ref"), doc.pop("$id"), database=doc.pop("$db", None), **doc) + return doc -def _parse_canonical_dbpointer(doc: Any) -> Any: +def _parse_canonical_dbpointer(doc: Any, dummy0: Any) -> Any: """Decode a JSON (deprecated) DBPointer to bson.dbref.DBRef.""" dbref = doc["$dbPointer"] if len(doc) != 1: @@ -771,7 +744,7 @@ def _parse_canonical_dbpointer(doc: Any) -> Any: raise TypeError(f"Bad $dbPointer, expected a DBRef: {doc}") -def _parse_canonical_int32(doc: Any) -> int: +def _parse_canonical_int32(doc: Any, dummy0: Any) -> int: """Decode a JSON int32 to python int.""" i_str = doc["$numberInt"] if len(doc) != 1: @@ -781,7 +754,7 @@ def _parse_canonical_int32(doc: Any) -> int: return int(i_str) -def _parse_canonical_int64(doc: Any) -> Int64: +def _parse_canonical_int64(doc: Any, dummy0: Any) -> Int64: """Decode a JSON int64 to bson.int64.Int64.""" l_str = doc["$numberLong"] if len(doc) != 1: @@ -789,7 +762,7 @@ def _parse_canonical_int64(doc: Any) -> Int64: return Int64(l_str) -def _parse_canonical_double(doc: Any) -> float: +def _parse_canonical_double(doc: Any, dummy0: Any) -> float: """Decode a JSON double to python float.""" d_str = doc["$numberDouble"] if len(doc) != 1: @@ -799,7 +772,7 @@ def _parse_canonical_double(doc: Any) -> float: return float(d_str) -def _parse_canonical_decimal128(doc: Any) -> Decimal128: +def _parse_canonical_decimal128(doc: Any, dummy0: Any) -> Decimal128: """Decode a JSON decimal128 to bson.decimal128.Decimal128.""" d_str = doc["$numberDecimal"] if len(doc) != 1: @@ -809,7 +782,7 @@ def _parse_canonical_decimal128(doc: Any) -> Decimal128: return Decimal128(d_str) -def _parse_canonical_minkey(doc: Any) -> MinKey: +def _parse_canonical_minkey(doc: Any, dummy0: Any) -> MinKey: """Decode a JSON MinKey to bson.min_key.MinKey.""" if type(doc["$minKey"]) is not int or doc["$minKey"] != 1: # noqa: E721 raise TypeError(f"$minKey value must be 1: {doc}") @@ -818,7 +791,7 @@ def _parse_canonical_minkey(doc: Any) -> MinKey: return MinKey() -def _parse_canonical_maxkey(doc: Any) -> MaxKey: +def _parse_canonical_maxkey(doc: Any, dummy0: Any) -> MaxKey: """Decode a JSON MaxKey to bson.max_key.MaxKey.""" if type(doc["$maxKey"]) is not int or doc["$maxKey"] != 1: # noqa: E721 raise TypeError("$maxKey value must be 1: %s", (doc,)) @@ -827,6 +800,41 @@ def _parse_canonical_maxkey(doc: Any) -> MaxKey: return MaxKey() +def _parse_binary(doc: Any, json_options: JSONOptions) -> Union[Binary, uuid.UUID]: + if "$type" in doc: + return _parse_legacy_binary(doc, json_options) + else: + return _parse_canonical_binary(doc, json_options) + + +def _parse_timestamp(doc: Any, dummy0: Any) -> Timestamp: + tsp = doc["$timestamp"] + return Timestamp(tsp["t"], tsp["i"]) + + +_PARSERS: dict[str, Callable[[Any, JSONOptions], Any]] = { + "$oid": _parse_canonical_oid, + "$ref": _parse_canonical_dbref, + "$date": _parse_canonical_datetime, + "$regex": _parse_legacy_regex, + "$minKey": _parse_canonical_minkey, + "$maxKey": _parse_canonical_maxkey, + "$binary": _parse_binary, + "$code": _parse_canonical_code, + "$uuid": _parse_legacy_uuid, + "$undefined": lambda _, _1: None, + "$numberLong": _parse_canonical_int64, + "$timestamp": _parse_timestamp, + "$numberDecimal": _parse_canonical_decimal128, + "$dbPointer": _parse_canonical_dbpointer, + "$regularExpression": _parse_canonical_regex, + "$symbol": _parse_canonical_symbol, + "$numberInt": _parse_canonical_int32, + "$numberDouble": _parse_canonical_double, +} +_PARSERS_SET = set(_PARSERS) + + def _encode_binary(data: bytes, subtype: int, json_options: JSONOptions) -> Any: if json_options.json_mode == JSONMode.LEGACY: return {"$binary": base64.b64encode(data).decode(), "$type": "%02x" % subtype} diff --git a/doc/changelog.rst b/doc/changelog.rst index 5cc96089d..01226e124 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -18,6 +18,9 @@ PyMongo 4.7 brings a number of improvements including: Named KMS providers enables more than one of each KMS provider type to be configured. See the docstring for :class:`~pymongo.encryption_options.AutoEncryptionOpts`. Note that named KMS providers requires pymongocrypt >=1.9 and libmongocrypt >=1.9. +- :meth:`~pymongo.encryption.ClientEncryption.encrypt` and + :meth:`~pymongo.encryption.ClientEncryption.encrypt_expression` now allow ``key_id`` + to be passed in as a :class:`uuid.UUID`. - Fixed a bug where :class:`~bson.int64.Int64` instances could not always be encoded by `orjson`_. The following now works:: diff --git a/doc/contributors.rst b/doc/contributors.rst index b6e143440..d7f103032 100644 --- a/doc/contributors.rst +++ b/doc/contributors.rst @@ -99,3 +99,4 @@ The following is a list of people who have contributed to - Iris Ho (sleepyStick) - Stephan Hof (stephan-hof) - Casey Clements (caseyclements) +- Ivan Lukyanchikov (ilukyanchikov) diff --git a/pymongo/encryption.py b/pymongo/encryption.py index 384d4f5e7..c7f02766c 100644 --- a/pymongo/encryption.py +++ b/pymongo/encryption.py @@ -831,7 +831,7 @@ class ClientEncryption(Generic[_DocumentType]): :return: The encrypted value, a :class:`~bson.binary.Binary` with subtype 6. .. versionchanged:: 4.7 - ``key_id`` can now be passed in as a :class:`uuid.UUID`. + ``key_id`` can now be passed in as a :class:`uuid.UUID`. .. versionchanged:: 4.2 Added the `query_type` and `contention_factor` parameters. @@ -883,7 +883,7 @@ class ClientEncryption(Generic[_DocumentType]): :return: The encrypted expression, a :class:`~bson.RawBSONDocument`. .. versionchanged:: 4.7 - ``key_id`` can now be passed in as a :class:`uuid.UUID`. + ``key_id`` can now be passed in as a :class:`uuid.UUID`. .. versionadded:: 4.4 """ diff --git a/pyproject.toml b/pyproject.toml index d47751ef4..a0595f6b9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -236,3 +236,5 @@ test-command = "python {project}/tools/fail_if_no_c.py" [tool.cibuildwheel.linux] archs = "x86_64 aarch64 ppc64le s390x i686" +[tool.cibuildwheel.macos] +archs = "x86_64 arm64" diff --git a/test/auth_oidc/test_auth_oidc.py b/test/auth_oidc/test_auth_oidc.py index 5374f8ab3..774876149 100644 --- a/test/auth_oidc/test_auth_oidc.py +++ b/test/auth_oidc/test_auth_oidc.py @@ -32,11 +32,11 @@ from test.utils import EventListener from bson import SON from pymongo import MongoClient +from pymongo._azure_helpers import _get_azure_response from pymongo.auth_oidc import ( OIDCCallback, OIDCCallbackResult, ) -from pymongo.azure_helpers import _get_azure_response from pymongo.cursor import CursorType from pymongo.errors import AutoReconnect, ConfigurationError, OperationFailure from pymongo.hello import HelloCompat diff --git a/test/performance/perf_test.py b/test/performance/perf_test.py index 95b43bf57..add3332b8 100644 --- a/test/performance/perf_test.py +++ b/test/performance/perf_test.py @@ -78,15 +78,17 @@ class PerformanceTest: client_context.init() def setUp(self): - pass + self.setup_time = time.monotonic() def tearDown(self): + duration = time.monotonic() - self.setup_time # Remove "Test" so that TestFlatEncoding is reported as "FlatEncoding". name = self.__class__.__name__[4:] median = self.percentile(50) megabytes_per_sec = self.data_size / median / 1000000 print( - f"Running {self.__class__.__name__}. MB/s={megabytes_per_sec}, MEDIAN={self.percentile(50)}" + f"Completed {self.__class__.__name__} {megabytes_per_sec:.3f} MB/s, MEDIAN={self.percentile(50):.3f}s, " + f"total time={duration:.3f}s" ) result_data.append( { @@ -149,6 +151,7 @@ class PerformanceTest: class MicroTest(PerformanceTest): def setUp(self): + super().setUp() # Location of test data. with open(os.path.join(TEST_PATH, os.path.join("extended_bson", self.dataset))) as data: self.file_data = data.read() @@ -256,6 +259,7 @@ class TestRunCommand(PerformanceTest, unittest.TestCase): data_size = len(encode({"hello": True})) * NUM_DOCS def setUp(self): + super().setUp() self.client = client_context.client self.client.drop_database("perftest") @@ -267,6 +271,7 @@ class TestRunCommand(PerformanceTest, unittest.TestCase): class TestDocument(PerformanceTest): def setUp(self): + super().setUp() # Location of test data. with open( os.path.join(TEST_PATH, os.path.join("single_and_multi_document", self.dataset)) @@ -458,6 +463,7 @@ def read_gridfs_file(filename): class TestJsonMultiImport(PerformanceTest, unittest.TestCase): def setUp(self): + super().setUp() self.client = client_context.client self.client.drop_database("perftest") ldjson_path = os.path.join(TEST_PATH, os.path.join("parallel", "ldjson_multi")) @@ -481,6 +487,7 @@ class TestJsonMultiImport(PerformanceTest, unittest.TestCase): class TestJsonMultiExport(PerformanceTest, unittest.TestCase): def setUp(self): + super().setUp() self.client = client_context.client self.client.drop_database("perftest") self.client.perfest.corpus.create_index("file") @@ -501,6 +508,7 @@ class TestJsonMultiExport(PerformanceTest, unittest.TestCase): class TestGridFsMultiFileUpload(PerformanceTest, unittest.TestCase): def setUp(self): + super().setUp() self.client = client_context.client self.client.drop_database("perftest") gridfs_path = os.path.join(TEST_PATH, os.path.join("parallel", "gridfs_multi")) @@ -525,6 +533,7 @@ class TestGridFsMultiFileUpload(PerformanceTest, unittest.TestCase): class TestGridFsMultiFileDownload(PerformanceTest, unittest.TestCase): def setUp(self): + super().setUp() self.client = client_context.client self.client.drop_database("perftest") diff --git a/test/test_encryption.py b/test/test_encryption.py index c1ebb60cd..77a1c9a52 100644 --- a/test/test_encryption.py +++ b/test/test_encryption.py @@ -2623,10 +2623,12 @@ class TestRangeQueryProse(EncryptionIntegrationTest): self.db = self.encrypted_client.db self.addCleanup(self.encrypted_client.close) - def run_expression_find(self, name, expression, expected_elems, range_opts, use_expr=False): + def run_expression_find( + self, name, expression, expected_elems, range_opts, use_expr=False, key_id=None + ): find_payload = self.client_encryption.encrypt_expression( expression=expression, - key_id=self.key1_id, + key_id=key_id or self.key1_id, algorithm=Algorithm.RANGEPREVIEW, query_type=QueryType.RANGEPREVIEW, contention_factor=0, @@ -2668,16 +2670,20 @@ class TestRangeQueryProse(EncryptionIntegrationTest): self.assertEqual(self.client_encryption.decrypt(insert_payload), cast_func(6)) # Case 2. + expression = { + "$and": [ + {f"encrypted{name}": {"$gte": cast_func(6)}}, + {f"encrypted{name}": {"$lte": cast_func(200)}}, + ] + } + self.run_expression_find(name, expression, [cast_func(i) for i in [6, 30, 200]], range_opts) + # Case 2, with UUID key_id self.run_expression_find( name, - { - "$and": [ - {f"encrypted{name}": {"$gte": cast_func(6)}}, - {f"encrypted{name}": {"$lte": cast_func(200)}}, - ] - }, + expression, [cast_func(i) for i in [6, 30, 200]], range_opts, + key_id=self.key1_id.as_uuid(), ) # Case 3. diff --git a/test/test_json_util.py b/test/test_json_util.py index 74cf12f33..0f73a8efd 100644 --- a/test/test_json_util.py +++ b/test/test_json_util.py @@ -20,6 +20,7 @@ import json import re import sys import uuid +from collections import OrderedDict from typing import Any, List, MutableMapping, Tuple, Type from bson.codec_options import CodecOptions, DatetimeConversion @@ -557,15 +558,13 @@ class TestJsonUtil(unittest.TestCase): ) def test_loads_document_class(self): - # document_class dict should always work - self.assertEqual( - {"foo": "bar"}, - json_util.loads('{"foo": "bar"}', json_options=JSONOptions(document_class=dict)), - ) - self.assertEqual( - SON([("foo", "bar"), ("b", 1)]), - json_util.loads('{"foo": "bar", "b": 1}', json_options=JSONOptions(document_class=SON)), - ) + json_doc = '{"foo": "bar", "b": 1, "d": {"a": 1}}' + expected_doc = {"foo": "bar", "b": 1, "d": {"a": 1}} + for cls in (dict, SON, OrderedDict): + doc = json_util.loads(json_doc, json_options=JSONOptions(document_class=cls)) + self.assertEqual(doc, expected_doc) + self.assertIsInstance(doc, cls) + self.assertIsInstance(doc["d"], cls) def test_encode_subclass(self): cases: list[Tuple[Type, Any]] = [ diff --git a/test/test_monitor.py b/test/test_monitor.py index 8ccec7fd0..92bcdc49a 100644 --- a/test/test_monitor.py +++ b/test/test_monitor.py @@ -83,9 +83,9 @@ class TestMonitor(IntegrationTest): def test_no_thread_start_runtime_err_on_shutdown(self): """Test we silence noisy runtime errors fired when the MongoClient spawns a new thread on process shutdown.""" - command = [sys.executable, "-c", "'from pymongo import MongoClient; c = MongoClient()'"] + command = [sys.executable, "-c", "from pymongo import MongoClient; c = MongoClient()"] completed_process: subprocess.CompletedProcess = subprocess.run( - " ".join(command), shell=True, capture_output=True + command, capture_output=True ) self.assertFalse(completed_process.stderr) diff --git a/test/unified_format.py b/test/unified_format.py index 532ffdd92..c1c36fa99 100644 --- a/test/unified_format.py +++ b/test/unified_format.py @@ -1045,6 +1045,10 @@ class UnifiedSpecTestMixinV1(IntegrationTest): if "timeoutMS applied to entire download" in spec["description"]: self.skipTest("PyMongo's open_download_stream does not cap the stream's lifetime") + if "unpin after TransientTransactionError error on abort" in spec["description"]: + if client_context.version[0] == 8: + self.skipTest("Skipping TransientTransactionError pending PYTHON-4182") + class_name = self.__class__.__name__.lower() description = spec["description"].lower() if "csot" in class_name: