Revert "PYTHON-5126 & PYTHON-5280 Addresses issues raised in DRIVERS-3097 and DRIVERS-3123 " (#2337)
This commit is contained in:
parent
aa41e70523
commit
75f6a3718e
@ -462,10 +462,6 @@ class Binary(bytes):
|
||||
raise ValueError(f"{padding=}. It must be in [0,1, ..7].")
|
||||
if padding and not vector:
|
||||
raise ValueError("Empty vector with non-zero padding.")
|
||||
if padding and not (vector[-1] & ((1 << padding) - 1)) == 0: # type: ignore
|
||||
raise ValueError(
|
||||
"If padding p is provided, all bits in the final byte lower than p must be 0."
|
||||
)
|
||||
elif dtype == BinaryVectorDtype.FLOAT32: # pack floats as float32
|
||||
format_str = "f"
|
||||
if padding:
|
||||
@ -494,11 +490,6 @@ class Binary(bytes):
|
||||
dtype = BinaryVectorDtype(dtype)
|
||||
n_values = len(self) - position
|
||||
|
||||
if padding and dtype != BinaryVectorDtype.PACKED_BIT:
|
||||
raise ValueError(
|
||||
f"Corrupt data. Padding ({padding}) must be 0 for all but PACKED_BIT dtypes. ({dtype=})"
|
||||
)
|
||||
|
||||
if dtype == BinaryVectorDtype.INT8:
|
||||
dtype_format = "b"
|
||||
format_string = f"<{n_values}{dtype_format}"
|
||||
@ -522,12 +513,6 @@ class Binary(bytes):
|
||||
dtype_format = "B"
|
||||
format_string = f"<{n_values}{dtype_format}"
|
||||
unpacked_uint8s = list(struct.unpack_from(format_string, self, position))
|
||||
if padding and not n_values:
|
||||
raise ValueError("Corrupt data. Vector has a padding P, but no data.")
|
||||
if padding and n_values and not (unpacked_uint8s[-1] & ((1 << padding) - 1)) == 0:
|
||||
raise ValueError(
|
||||
"Corrupt data. Vector has a padding P, but bits in the final byte lower than P are non-zero."
|
||||
)
|
||||
return BinaryVector(unpacked_uint8s, dtype, padding)
|
||||
|
||||
else:
|
||||
|
||||
@ -20,24 +20,6 @@
|
||||
"padding": 0,
|
||||
"canonical_bson": "1600000005766563746F7200040000000910007F0700"
|
||||
},
|
||||
{
|
||||
"description": "PACKED_BIT with padding",
|
||||
"valid": true,
|
||||
"vector": [127, 8],
|
||||
"dtype_hex": "0x10",
|
||||
"dtype_alias": "PACKED_BIT",
|
||||
"padding": 3,
|
||||
"canonical_bson": "1600000005766563746F7200040000000910037F0800"
|
||||
},
|
||||
{
|
||||
"description": "PACKED_BIT with inconsistent padding",
|
||||
"valid": false,
|
||||
"vector": [127, 7],
|
||||
"dtype_hex": "0x10",
|
||||
"dtype_alias": "PACKED_BIT",
|
||||
"padding": 3,
|
||||
"canonical_bson": "1600000005766563746F7200040000000910037F0700"
|
||||
},
|
||||
{
|
||||
"description": "Empty Vector PACKED_BIT",
|
||||
"valid": true,
|
||||
@ -47,6 +29,15 @@
|
||||
"padding": 0,
|
||||
"canonical_bson": "1400000005766563746F72000200000009100000"
|
||||
},
|
||||
{
|
||||
"description": "PACKED_BIT with padding",
|
||||
"valid": true,
|
||||
"vector": [127, 7],
|
||||
"dtype_hex": "0x10",
|
||||
"dtype_alias": "PACKED_BIT",
|
||||
"padding": 3,
|
||||
"canonical_bson": "1600000005766563746F7200040000000910037F0700"
|
||||
},
|
||||
{
|
||||
"description": "Overflow Vector PACKED_BIT",
|
||||
"valid": false,
|
||||
|
||||
@ -739,7 +739,7 @@ class TestBSON(unittest.TestCase):
|
||||
"""Tests of subtype 9"""
|
||||
# We start with valid cases, across the 3 dtypes implemented.
|
||||
# Work with a simple vector that can be interpreted as int8, float32, or ubyte
|
||||
list_vector = [127, 8]
|
||||
list_vector = [127, 7]
|
||||
# As INT8, vector has length 2
|
||||
binary_vector = Binary.from_vector(list_vector, BinaryVectorDtype.INT8)
|
||||
vector = binary_vector.as_vector()
|
||||
@ -764,18 +764,18 @@ class TestBSON(unittest.TestCase):
|
||||
uncompressed = ""
|
||||
for val in list_vector:
|
||||
uncompressed += format(val, "08b")
|
||||
assert uncompressed[:-padding] == "0111111100001"
|
||||
assert uncompressed[:-padding] == "0111111100000"
|
||||
|
||||
# It is worthwhile explicitly showing the values encoded to BSON
|
||||
padded_doc = {"padded_vec": padded_vec}
|
||||
assert (
|
||||
encode(padded_doc)
|
||||
== b"\x1a\x00\x00\x00\x05padded_vec\x00\x04\x00\x00\x00\t\x10\x03\x7f\x08\x00"
|
||||
== b"\x1a\x00\x00\x00\x05padded_vec\x00\x04\x00\x00\x00\t\x10\x03\x7f\x07\x00"
|
||||
)
|
||||
# and dumped to json
|
||||
assert (
|
||||
json_util.dumps(padded_doc)
|
||||
== '{"padded_vec": {"$binary": {"base64": "EAN/CA==", "subType": "09"}}}'
|
||||
== '{"padded_vec": {"$binary": {"base64": "EAN/Bw==", "subType": "09"}}}'
|
||||
)
|
||||
|
||||
# FLOAT32 is also implemented
|
||||
|
||||
@ -48,11 +48,11 @@ def create_test(case_spec):
|
||||
def run_test(self):
|
||||
for test_case in case_spec.get("tests", []):
|
||||
description = test_case["description"]
|
||||
vector_exp = test_case.get("vector", None)
|
||||
vector_exp = test_case.get("vector", [])
|
||||
dtype_hex_exp = test_case["dtype_hex"]
|
||||
dtype_alias_exp = test_case.get("dtype_alias")
|
||||
padding_exp = test_case.get("padding", 0)
|
||||
canonical_bson_exp = test_case.get("canonical_bson", None)
|
||||
canonical_bson_exp = test_case.get("canonical_bson")
|
||||
# Convert dtype hex string into bytes
|
||||
dtype_exp = BinaryVectorDtype(int(dtype_hex_exp, 16).to_bytes(1, byteorder="little"))
|
||||
|
||||
@ -85,25 +85,14 @@ def create_test(case_spec):
|
||||
self.assertEqual(cB_obs, canonical_bson_exp, description)
|
||||
|
||||
else:
|
||||
"""
|
||||
#### To prove correct in an invalid case (`valid:false`), one MUST
|
||||
- if the vector field is present, raise an exception when attempting to encode a document from the numeric values,
|
||||
dtype, and padding.
|
||||
- if the canonical_bson field is present, raise an exception when attempting to deserialize it into the corresponding
|
||||
numeric values, as the field contains corrupted data.
|
||||
"""
|
||||
# Tests Binary.from_vector()
|
||||
if vector_exp is not None:
|
||||
with self.assertRaises((struct.error, ValueError), msg=description):
|
||||
Binary.from_vector(vector_exp, dtype_exp, padding_exp)
|
||||
|
||||
# Tests Binary.as_vector()
|
||||
if canonical_bson_exp is not None:
|
||||
with self.assertRaises((struct.error, ValueError), msg=description):
|
||||
cB_exp = binascii.unhexlify(canonical_bson_exp.encode("utf8"))
|
||||
decoded_doc = decode(cB_exp)
|
||||
binary_obs = decoded_doc[test_key]
|
||||
binary_obs.as_vector()
|
||||
with self.assertRaises((struct.error, ValueError), msg=description):
|
||||
# Tests Binary.from_vector
|
||||
Binary.from_vector(vector_exp, dtype_exp, padding_exp)
|
||||
# Tests Binary.as_vector
|
||||
cB_exp = binascii.unhexlify(canonical_bson_exp.encode("utf8"))
|
||||
decoded_doc = decode(cB_exp)
|
||||
binary_obs = decoded_doc[test_key]
|
||||
binary_obs.as_vector()
|
||||
|
||||
return run_test
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user