PYTHON-5126 Resync bson vector spec tests following additions (#2161)

This commit is contained in:
Casey Clements 2025-03-03 12:51:32 -05:00 committed by GitHub
parent 38f97a3b49
commit 6da1fdbed9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 79 additions and 9 deletions

View File

@ -450,6 +450,10 @@ class Binary(bytes):
raise ValueError(f"padding does not apply to {dtype=}")
elif dtype == BinaryVectorDtype.PACKED_BIT: # pack ints in [0, 255] as unsigned uint8
format_str = "B"
if 0 <= padding > 7:
raise ValueError(f"{padding=}. It must be in [0,1, ..7].")
if padding and not vector:
raise ValueError("Empty vector with non-zero padding.")
elif dtype == BinaryVectorDtype.FLOAT32: # pack floats as float32
format_str = "f"
if padding:

View File

@ -11,6 +11,15 @@
"padding": 0,
"canonical_bson": "1C00000005766563746F72000A0000000927000000FE420000E04000"
},
{
"description": "Vector with decimals and negative value FLOAT32",
"valid": true,
"vector": [127.7, -7.7],
"dtype_hex": "0x27",
"dtype_alias": "FLOAT32",
"padding": 0,
"canonical_bson": "1C00000005766563746F72000A0000000927006666FF426666F6C000"
},
{
"description": "Empty Vector FLOAT32",
"valid": true,
@ -35,8 +44,22 @@
"vector": [127.0, 7.0],
"dtype_hex": "0x27",
"dtype_alias": "FLOAT32",
"padding": 3
"padding": 3,
"canonical_bson": "1C00000005766563746F72000A0000000927030000FE420000E04000"
},
{
"description": "Insufficient vector data with 3 bytes FLOAT32",
"valid": false,
"dtype_hex": "0x27",
"dtype_alias": "FLOAT32",
"canonical_bson": "1700000005766563746F7200050000000927002A2A2A00"
},
{
"description": "Insufficient vector data with 5 bytes FLOAT32",
"valid": false,
"dtype_hex": "0x27",
"dtype_alias": "FLOAT32",
"canonical_bson": "1900000005766563746F7200070000000927002A2A2A2A2A00"
}
]
}

View File

@ -42,7 +42,8 @@
"vector": [127, 7],
"dtype_hex": "0x03",
"dtype_alias": "INT8",
"padding": 3
"padding": 3,
"canonical_bson": "1600000005766563746F7200040000000903037F0700"
},
{
"description": "INT8 with float inputs",
@ -54,4 +55,3 @@
}
]
}

View File

@ -2,6 +2,15 @@
"description": "Tests of Binary subtype 9, Vectors, with dtype PACKED_BIT",
"test_key": "vector",
"tests": [
{
"description": "Padding specified with no vector data PACKED_BIT",
"valid": false,
"vector": [],
"dtype_hex": "0x10",
"dtype_alias": "PACKED_BIT",
"padding": 1,
"canonical_bson": "1400000005766563746F72000200000009100100"
},
{
"description": "Simple Vector PACKED_BIT",
"valid": true,
@ -44,7 +53,31 @@
"dtype_hex": "0x10",
"dtype_alias": "PACKED_BIT",
"padding": 0
},
{
"description": "Vector with float values PACKED_BIT",
"valid": false,
"vector": [127.5],
"dtype_hex": "0x10",
"dtype_alias": "PACKED_BIT",
"padding": 0
},
{
"description": "Exceeding maximum padding PACKED_BIT",
"valid": false,
"vector": [1],
"dtype_hex": "0x10",
"dtype_alias": "PACKED_BIT",
"padding": 8,
"canonical_bson": "1500000005766563746F7200030000000910080100"
},
{
"description": "Negative padding PACKED_BIT",
"valid": false,
"vector": [1],
"dtype_hex": "0x10",
"dtype_alias": "PACKED_BIT",
"padding": -1
}
]
}

View File

@ -49,7 +49,7 @@ def create_test(case_spec):
def run_test(self):
for test_case in case_spec.get("tests", []):
description = test_case["description"]
vector_exp = test_case["vector"]
vector_exp = test_case.get("vector", [])
dtype_hex_exp = test_case["dtype_hex"]
dtype_alias_exp = test_case.get("dtype_alias")
padding_exp = test_case.get("padding", 0)
@ -76,9 +76,13 @@ def create_test(case_spec):
self.assertEqual(
vector_obs.dtype, BinaryVectorDtype[dtype_alias_exp], description
)
self.assertEqual(vector_obs.data, vector_exp, description)
self.assertEqual(vector_obs.padding, padding_exp, description)
if dtype_exp in [BinaryVectorDtype.FLOAT32]:
[
self.assertAlmostEqual(vector_obs.data[i], vector_exp[i], delta=1e-5)
for i in range(len(vector_exp))
]
else:
self.assertEqual(vector_obs.data, vector_exp, description)
# Test Binary Vector to BSON
vector_exp = Binary.from_vector(vector_exp, dtype_exp, padding_exp)
cB_obs = binascii.hexlify(encode({test_key: vector_exp})).decode().upper()
@ -86,7 +90,13 @@ def create_test(case_spec):
else:
with self.assertRaises((struct.error, ValueError), msg=description):
# Tests Binary.from_vector
Binary.from_vector(vector_exp, dtype_exp, padding_exp)
# Tests Binary.as_vector
cB_exp = binascii.unhexlify(canonical_bson_exp.encode("utf8"))
decoded_doc = decode(cB_exp)
binary_obs = decoded_doc[test_key]
binary_obs.as_vector()
return run_test