PYTHON-5126 - Implemented new test cases for Binary Vector (#2393)

This commit is contained in:
Casey Clements 2025-06-18 13:35:23 -04:00 committed by GitHub
parent 8a94de1c1b
commit 336163aaa0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 39 additions and 18 deletions

View File

@ -490,6 +490,11 @@ class Binary(bytes):
dtype = BinaryVectorDtype(dtype) dtype = BinaryVectorDtype(dtype)
n_values = len(self) - position n_values = len(self) - position
if padding and dtype != BinaryVectorDtype.PACKED_BIT:
raise ValueError(
f"Corrupt data. Padding ({padding}) must be 0 for all but PACKED_BIT dtypes. ({dtype=})"
)
if dtype == BinaryVectorDtype.INT8: if dtype == BinaryVectorDtype.INT8:
dtype_format = "b" dtype_format = "b"
format_string = f"<{n_values}{dtype_format}" format_string = f"<{n_values}{dtype_format}"
@ -510,6 +515,10 @@ class Binary(bytes):
elif dtype == BinaryVectorDtype.PACKED_BIT: elif dtype == BinaryVectorDtype.PACKED_BIT:
# data packed as uint8 # data packed as uint8
if padding and not n_values:
raise ValueError("Corrupt data. Vector has a padding P, but no data.")
if padding > 7 or padding < 0:
raise ValueError(f"Corrupt data. Padding ({padding}) must be between 0 and 7.")
dtype_format = "B" dtype_format = "B"
format_string = f"<{n_values}{dtype_format}" format_string = f"<{n_values}{dtype_format}"
unpacked_uint8s = list(struct.unpack_from(format_string, self, position)) unpacked_uint8s = list(struct.unpack_from(format_string, self, position))

View File

@ -20,6 +20,15 @@
"padding": 0, "padding": 0,
"canonical_bson": "1600000005766563746F7200040000000910007F0700" "canonical_bson": "1600000005766563746F7200040000000910007F0700"
}, },
{
"description": "PACKED_BIT with padding",
"valid": true,
"vector": [127, 8],
"dtype_hex": "0x10",
"dtype_alias": "PACKED_BIT",
"padding": 3,
"canonical_bson": "1600000005766563746F7200040000000910037F0800"
},
{ {
"description": "Empty Vector PACKED_BIT", "description": "Empty Vector PACKED_BIT",
"valid": true, "valid": true,
@ -29,15 +38,6 @@
"padding": 0, "padding": 0,
"canonical_bson": "1400000005766563746F72000200000009100000" "canonical_bson": "1400000005766563746F72000200000009100000"
}, },
{
"description": "PACKED_BIT with padding",
"valid": true,
"vector": [127, 7],
"dtype_hex": "0x10",
"dtype_alias": "PACKED_BIT",
"padding": 3,
"canonical_bson": "1600000005766563746F7200040000000910037F0700"
},
{ {
"description": "Overflow Vector PACKED_BIT", "description": "Overflow Vector PACKED_BIT",
"valid": false, "valid": false,

View File

@ -48,7 +48,7 @@ def create_test(case_spec):
def run_test(self): def run_test(self):
for test_case in case_spec.get("tests", []): for test_case in case_spec.get("tests", []):
description = test_case["description"] description = test_case["description"]
vector_exp = test_case.get("vector", []) vector_exp = test_case.get("vector")
dtype_hex_exp = test_case["dtype_hex"] dtype_hex_exp = test_case["dtype_hex"]
dtype_alias_exp = test_case.get("dtype_alias") dtype_alias_exp = test_case.get("dtype_alias")
padding_exp = test_case.get("padding", 0) padding_exp = test_case.get("padding", 0)
@ -85,14 +85,26 @@ def create_test(case_spec):
self.assertEqual(cB_obs, canonical_bson_exp, description) self.assertEqual(cB_obs, canonical_bson_exp, description)
else: else:
with self.assertRaises((struct.error, ValueError), msg=description): """
# Tests Binary.from_vector #### To prove correct in an invalid case (`valid:false`), one MUST
Binary.from_vector(vector_exp, dtype_exp, padding_exp) - (encoding case) if the vector field is present, raise an exception
# Tests Binary.as_vector when attempting to encode a document from the numeric values,dtype, and padding.
cB_exp = binascii.unhexlify(canonical_bson_exp.encode("utf8")) - (decoding case) if the canonical_bson field is present, raise an exception
decoded_doc = decode(cB_exp) when attempting to deserialize it into the corresponding
binary_obs = decoded_doc[test_key] numeric values, as the field contains corrupted data.
binary_obs.as_vector() """
# Tests Binary.from_vector()
if vector_exp is not None:
with self.assertRaises((struct.error, ValueError), msg=description):
Binary.from_vector(vector_exp, dtype_exp, padding_exp)
# Tests Binary.as_vector()
if canonical_bson_exp is not None:
with self.assertRaises((struct.error, ValueError), msg=description):
cB_exp = binascii.unhexlify(canonical_bson_exp.encode("utf8"))
decoded_doc = decode(cB_exp)
binary_obs = decoded_doc[test_key]
binary_obs.as_vector()
return run_test return run_test