PYTHON-5126 - Implemented new test cases for Binary Vector (#2393)

This commit is contained in:
Casey Clements 2025-06-18 13:35:23 -04:00 committed by GitHub
parent 8a94de1c1b
commit 336163aaa0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 39 additions and 18 deletions

View File

@ -490,6 +490,11 @@ class Binary(bytes):
dtype = BinaryVectorDtype(dtype)
n_values = len(self) - position
if padding and dtype != BinaryVectorDtype.PACKED_BIT:
raise ValueError(
f"Corrupt data. Padding ({padding}) must be 0 for all but PACKED_BIT dtypes. ({dtype=})"
)
if dtype == BinaryVectorDtype.INT8:
dtype_format = "b"
format_string = f"<{n_values}{dtype_format}"
@ -510,6 +515,10 @@ class Binary(bytes):
elif dtype == BinaryVectorDtype.PACKED_BIT:
# data packed as uint8
if padding and not n_values:
raise ValueError("Corrupt data. Vector has a padding P, but no data.")
if padding > 7 or padding < 0:
raise ValueError(f"Corrupt data. Padding ({padding}) must be between 0 and 7.")
dtype_format = "B"
format_string = f"<{n_values}{dtype_format}"
unpacked_uint8s = list(struct.unpack_from(format_string, self, position))

View File

@ -20,6 +20,15 @@
"padding": 0,
"canonical_bson": "1600000005766563746F7200040000000910007F0700"
},
{
"description": "PACKED_BIT with padding",
"valid": true,
"vector": [127, 8],
"dtype_hex": "0x10",
"dtype_alias": "PACKED_BIT",
"padding": 3,
"canonical_bson": "1600000005766563746F7200040000000910037F0800"
},
{
"description": "Empty Vector PACKED_BIT",
"valid": true,
@ -29,15 +38,6 @@
"padding": 0,
"canonical_bson": "1400000005766563746F72000200000009100000"
},
{
"description": "PACKED_BIT with padding",
"valid": true,
"vector": [127, 7],
"dtype_hex": "0x10",
"dtype_alias": "PACKED_BIT",
"padding": 3,
"canonical_bson": "1600000005766563746F7200040000000910037F0700"
},
{
"description": "Overflow Vector PACKED_BIT",
"valid": false,

View File

@ -48,7 +48,7 @@ def create_test(case_spec):
def run_test(self):
for test_case in case_spec.get("tests", []):
description = test_case["description"]
vector_exp = test_case.get("vector", [])
vector_exp = test_case.get("vector")
dtype_hex_exp = test_case["dtype_hex"]
dtype_alias_exp = test_case.get("dtype_alias")
padding_exp = test_case.get("padding", 0)
@ -85,14 +85,26 @@ def create_test(case_spec):
self.assertEqual(cB_obs, canonical_bson_exp, description)
else:
with self.assertRaises((struct.error, ValueError), msg=description):
# Tests Binary.from_vector
Binary.from_vector(vector_exp, dtype_exp, padding_exp)
# Tests Binary.as_vector
cB_exp = binascii.unhexlify(canonical_bson_exp.encode("utf8"))
decoded_doc = decode(cB_exp)
binary_obs = decoded_doc[test_key]
binary_obs.as_vector()
"""
#### To prove correct in an invalid case (`valid:false`), one MUST
- (encoding case) if the vector field is present, raise an exception
when attempting to encode a document from the numeric values,dtype, and padding.
- (decoding case) if the canonical_bson field is present, raise an exception
when attempting to deserialize it into the corresponding
numeric values, as the field contains corrupted data.
"""
# Tests Binary.from_vector()
if vector_exp is not None:
with self.assertRaises((struct.error, ValueError), msg=description):
Binary.from_vector(vector_exp, dtype_exp, padding_exp)
# Tests Binary.as_vector()
if canonical_bson_exp is not None:
with self.assertRaises((struct.error, ValueError), msg=description):
cB_exp = binascii.unhexlify(canonical_bson_exp.encode("utf8"))
decoded_doc = decode(cB_exp)
binary_obs = decoded_doc[test_key]
binary_obs.as_vector()
return run_test