From 1e67c5c02c4f60eb92ec9740340f9c69abe097c3 Mon Sep 17 00:00:00 2001 From: Casey Clements Date: Tue, 15 Jul 2025 10:17:30 -0700 Subject: [PATCH] PYTHON-5289 Validate ignored bits are 0 on write for bson.BinaryVector (#2397) --- bson/binary.py | 17 +++++++++++++++++ doc/changelog.rst | 4 ++++ test/test_bson.py | 24 ++++++++++++++---------- 3 files changed, 35 insertions(+), 10 deletions(-) diff --git a/bson/binary.py b/bson/binary.py index a1f63adf2..a43f81bf2 100644 --- a/bson/binary.py +++ b/bson/binary.py @@ -14,6 +14,7 @@ from __future__ import annotations import struct +import warnings from enum import Enum from typing import TYPE_CHECKING, Any, Optional, Sequence, Tuple, Type, Union, overload from uuid import UUID @@ -255,6 +256,9 @@ class BinaryVector: self.dtype == other.dtype and self.padding == other.padding and self.data == other.data ) + def __len__(self) -> int: + return len(self.data) + class Binary(bytes): """Representation of BSON binary data. @@ -439,6 +443,9 @@ class Binary(bytes): :param padding: For fractional bytes, number of bits to ignore at end of vector. :return: Binary packed data identified by dtype and padding. + .. versionchanged:: 4.14 + When padding is non-zero, ignored bits should be zero. Raise exception on encoding, warn on decoding. + .. versionadded:: 4.10 """ if isinstance(vector, BinaryVector): @@ -471,6 +478,10 @@ class Binary(bytes): metadata = struct.pack(" BinaryVector: @@ -522,6 +533,12 @@ class Binary(bytes): dtype_format = "B" format_string = f"<{n_values}{dtype_format}" unpacked_uint8s = list(struct.unpack_from(format_string, self, position)) + if padding and n_values and unpacked_uint8s[-1] & (1 << padding) - 1 != 0: + warnings.warn( + "Vector has a padding P, but bits in the final byte lower than P are non-zero. For pymongo>=5.0, they must be zero.", + DeprecationWarning, + stacklevel=2, + ) return BinaryVector(unpacked_uint8s, dtype, padding) else: diff --git a/doc/changelog.rst b/doc/changelog.rst index 933e2922d..e4da11209 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -58,6 +58,10 @@ PyMongo 4.13 brings a number of changes including: or the `migration guide `_ for more information. - Fixed a bug where :class:`pymongo.write_concern.WriteConcern` repr was not eval-able when using ``w="majority"``. +- When padding is set, ignored bits in a BSON BinaryVector of PACKED_BIT dtype should be set to zero. + When encoding, this is enforced and is a breaking change. + It is not yet enforced when decoding, so reading from the database will not fail, however a warning will be triggered. + From PyMongo 5.0, this rule will be enforced for both encoding and decoding. Issues Resolved ............... diff --git a/test/test_bson.py b/test/test_bson.py index e9a1dd1ca..e4cf85c46 100644 --- a/test/test_bson.py +++ b/test/test_bson.py @@ -739,7 +739,7 @@ class TestBSON(unittest.TestCase): """Tests of subtype 9""" # We start with valid cases, across the 3 dtypes implemented. # Work with a simple vector that can be interpreted as int8, float32, or ubyte - list_vector = [127, 7] + list_vector = [127, 8] # As INT8, vector has length 2 binary_vector = Binary.from_vector(list_vector, BinaryVectorDtype.INT8) vector = binary_vector.as_vector() @@ -764,18 +764,18 @@ class TestBSON(unittest.TestCase): uncompressed = "" for val in list_vector: uncompressed += format(val, "08b") - assert uncompressed[:-padding] == "0111111100000" + assert uncompressed[:-padding] == "0111111100001" # It is worthwhile explicitly showing the values encoded to BSON padded_doc = {"padded_vec": padded_vec} assert ( encode(padded_doc) - == b"\x1a\x00\x00\x00\x05padded_vec\x00\x04\x00\x00\x00\t\x10\x03\x7f\x07\x00" + == b"\x1a\x00\x00\x00\x05padded_vec\x00\x04\x00\x00\x00\t\x10\x03\x7f\x08\x00" ) # and dumped to json assert ( json_util.dumps(padded_doc) - == '{"padded_vec": {"$binary": {"base64": "EAN/Bw==", "subType": "09"}}}' + == '{"padded_vec": {"$binary": {"base64": "EAN/CA==", "subType": "09"}}}' ) # FLOAT32 is also implemented @@ -784,15 +784,19 @@ class TestBSON(unittest.TestCase): # Now some invalid cases for x in [-1, 257]: - try: + with self.assertRaises(struct.error): Binary.from_vector([x], BinaryVectorDtype.PACKED_BIT) - except Exception as exc: - self.assertIsInstance(exc, struct.error) - else: - self.fail("Failed to raise an exception.") + + # Test one must pass zeros for all ignored bits + with self.assertRaises(ValueError): + Binary.from_vector([255], BinaryVectorDtype.PACKED_BIT, padding=7) + + with self.assertWarns(DeprecationWarning): + meta = struct.pack("