Compare commits

...

1 Commits

Author SHA1 Message Date
Cloud User
9da2c8db39 resyncing specs 12-03-2025 2025-12-03 19:49:33 +00:00
15 changed files with 718 additions and 74 deletions

View File

@ -239,6 +239,17 @@ functions:
working_dir: src
type: test
# Test numpy
test numpy:
- command: subprocess.exec
params:
binary: bash
args:
- .evergreen/just.sh
- test-numpy
working_dir: src
type: test
# Upload coverage
upload coverage:
- command: ec2.assume_role

View File

@ -4768,6 +4768,12 @@ tasks:
- noauth
- pypy
# Test numpy tests
- name: test-numpy
commands:
- func: test numpy
tags: [binary, vector]
# Test standard auth tests
- name: test-standard-auth-v4.2-python3.10-auth-ssl-sharded-cluster
commands:

View File

@ -621,3 +621,42 @@ buildvariants:
- rhel87-small
expansions:
STORAGE_ENGINE: inmemory
# Test numpy tests
- name: test-numpy-rhel8
tasks:
- name: test-numpy
display_name: Test Numpy RHEL8
run_on:
- rhel87-small
tags: [binary-vector]
- name: test-numpy-macos
tasks:
- name: test-numpy
display_name: Test Numpy macOS
run_on:
- macos-14
tags: [binary-vector]
- name: test-numpy-macos-arm64
tasks:
- name: test-numpy
display_name: Test Numpy macOS Arm64
run_on:
- macos-14-arm64
tags: [binary-vector]
- name: test-numpy-win64
tasks:
- name: test-numpy
display_name: Test Numpy Win64
run_on:
- windows-64-vsMulti-small
tags: [binary-vector]
- name: test-numpy-win32
tasks:
- name: test-numpy
display_name: Test Numpy Win32
run_on:
- windows-64-vsMulti-small
expansions:
IS_WIN32: "1"
tags: [binary-vector]

View File

@ -339,6 +339,32 @@ def create_disable_test_commands_variants():
return [create_variant(tasks, display_name, host=host, expansions=expansions)]
def create_test_numpy_tasks():
test_func = FunctionCall(func="test numpy")
task_name = "test-numpy"
tags = ["binary", "vector"]
return [EvgTask(name=task_name, tags=tags, commands=[test_func])]
def create_test_numpy_variants() -> list[BuildVariant]:
variants = []
base_display_name = "Test Numpy"
# Test a subset on each of the other platforms.
for host_name in ("rhel8", "macos", "macos-arm64", "win64", "win32"):
tasks = ["test-numpy"]
host = HOSTS[host_name]
tags = ["binary-vector"]
expansions = dict()
if host_name == "win32":
expansions["IS_WIN32"] = "1"
display_name = get_variant_name(base_display_name, host)
variant = create_variant(tasks, display_name, host=host, tags=tags, expansions=expansions)
variants.append(variant)
return variants
def create_oidc_auth_variants():
variants = []
for host_name in ["ubuntu22", "macos", "win64"]:
@ -1140,6 +1166,11 @@ def create_run_tests_func():
return "run tests", [setup_cmd, test_cmd]
def create_test_numpy_func():
test_cmd = get_subprocess_exec(args=[".evergreen/just.sh", "test-numpy"])
return "test numpy", [test_cmd]
def create_cleanup_func():
cmd = get_subprocess_exec(args=[".evergreen/scripts/cleanup.sh"])
return "cleanup", [cmd]

View File

@ -214,3 +214,7 @@ def run() -> None:
if __name__ == "__main__":
run()
# TODO - Make changes here to create a pytest marker that runs as desired
# Use encryption as example

View File

@ -65,6 +65,9 @@ if TYPE_CHECKING:
from array import array as _array
from mmap import mmap as _mmap
import numpy as np
import numpy.typing as npt
class UuidRepresentation:
UNSPECIFIED = 0
@ -234,13 +237,20 @@ class BinaryVector:
__slots__ = ("data", "dtype", "padding")
def __init__(self, data: Sequence[float | int], dtype: BinaryVectorDtype, padding: int = 0):
def __init__(
self,
data: Union[Sequence[float | int], npt.NDArray[np.number]],
dtype: BinaryVectorDtype,
padding: int = 0,
):
"""
:param data: Sequence of numbers representing the mathematical vector.
:param dtype: The data type stored in binary
:param padding: The number of bits in the final byte that are to be ignored
when a vector element's size is less than a byte
and the length of the vector is not a multiple of 8.
(Padding is equivalent to a negative value of `count` in
`numpy.unpackbits <https://numpy.org/doc/stable/reference/generated/numpy.unpackbits.html>`_)
"""
self.data = data
self.dtype = dtype
@ -425,9 +435,19 @@ class Binary(bytes):
...
@classmethod
@overload
def from_vector(
cls: Type[Binary],
vector: Union[BinaryVector, list[int], list[float]],
vector: npt.NDArray[np.number],
dtype: BinaryVectorDtype,
padding: int = 0,
) -> Binary:
...
@classmethod
def from_vector(
cls: Type[Binary],
vector: Union[BinaryVector, list[int], list[float], npt.NDArray[np.number]],
dtype: Optional[BinaryVectorDtype] = None,
padding: Optional[int] = None,
) -> Binary:
@ -459,34 +479,72 @@ class Binary(bytes):
vector = vector.data # type: ignore
padding = 0 if padding is None else padding
if dtype == BinaryVectorDtype.INT8: # pack ints in [-128, 127] as signed int8
format_str = "b"
if padding:
raise ValueError(f"padding does not apply to {dtype=}")
elif dtype == BinaryVectorDtype.PACKED_BIT: # pack ints in [0, 255] as unsigned uint8
format_str = "B"
if 0 <= padding > 7:
raise ValueError(f"{padding=}. It must be in [0,1, ..7].")
if padding and not vector:
raise ValueError("Empty vector with non-zero padding.")
elif dtype == BinaryVectorDtype.FLOAT32: # pack floats as float32
format_str = "f"
if padding:
raise ValueError(f"padding does not apply to {dtype=}")
else:
raise NotImplementedError("%s not yet supported" % dtype)
if not isinstance(dtype, BinaryVectorDtype):
raise TypeError(
"dtype must be a bson.BinaryVectorDtype of BinaryVectorDType.INT8, PACKED_BIT, FLOAT32"
)
metadata = struct.pack("<sB", dtype.value, padding)
data = struct.pack(f"<{len(vector)}{format_str}", *vector) # type: ignore
if isinstance(vector, list):
if dtype == BinaryVectorDtype.INT8: # pack ints in [-128, 127] as signed int8
format_str = "b"
if padding:
raise ValueError(f"padding does not apply to {dtype=}")
elif dtype == BinaryVectorDtype.PACKED_BIT: # pack ints in [0, 255] as unsigned uint8
format_str = "B"
if 0 <= padding > 7:
raise ValueError(f"{padding=}. It must be in [0,1, ..7].")
if padding and not vector:
raise ValueError("Empty vector with non-zero padding.")
elif dtype == BinaryVectorDtype.FLOAT32: # pack floats as float32
format_str = "f"
if padding:
raise ValueError(f"padding does not apply to {dtype=}")
else:
raise NotImplementedError("%s not yet supported" % dtype)
data = struct.pack(f"<{len(vector)}{format_str}", *vector)
else: # vector is numpy array or incorrect type.
try:
import numpy as np
except ImportError as exc:
raise ImportError(
"Failed to create binary from vector. Check type. If numpy array, numpy must be installed."
) from exc
if not isinstance(vector, np.ndarray):
raise TypeError(
"Could not create Binary. Vector must be a BinaryVector, list[int], list[float] or numpy ndarray."
)
if vector.ndim != 1:
raise ValueError(
"from_numpy_vector only supports 1D arrays as it creates a single vector."
)
if dtype == BinaryVectorDtype.FLOAT32:
vector = vector.astype(np.dtype("float32"), copy=False)
elif dtype == BinaryVectorDtype.INT8:
if vector.min() >= -128 and vector.max() <= 127:
vector = vector.astype(np.dtype("int8"), copy=False)
else:
raise ValueError("Values found outside INT8 range.")
elif dtype == BinaryVectorDtype.PACKED_BIT:
if vector.min() >= 0 and vector.max() <= 127:
vector = vector.astype(np.dtype("uint8"), copy=False)
else:
raise ValueError("Values found outside UINT8 range.")
else:
raise NotImplementedError("%s not yet supported" % dtype)
data = vector.tobytes()
if padding and len(vector) and not (data[-1] & ((1 << padding) - 1)) == 0:
raise ValueError(
"Vector has a padding P, but bits in the final byte lower than P are non-zero. They must be zero."
)
return cls(metadata + data, subtype=VECTOR_SUBTYPE)
def as_vector(self) -> BinaryVector:
"""From the Binary, create a list of numbers, along with dtype and padding.
def as_vector(self, return_numpy: bool = False) -> BinaryVector:
"""From the Binary, create a list or 1-d numpy array of numbers, along with dtype and padding.
:param return_numpy: If True, BinaryVector.data will be a one-dimensional numpy array. By default, it is a list.
:return: BinaryVector
.. versionadded:: 4.10
@ -495,54 +553,84 @@ class Binary(bytes):
if self.subtype != VECTOR_SUBTYPE:
raise ValueError(f"Cannot decode subtype {self.subtype} as a vector")
position = 0
dtype, padding = struct.unpack_from("<sB", self, position)
position += 2
dtype, padding = struct.unpack_from("<sB", self)
dtype = BinaryVectorDtype(dtype)
n_values = len(self) - position
offset = 2
n_bytes = len(self) - offset
if padding and dtype != BinaryVectorDtype.PACKED_BIT:
raise ValueError(
f"Corrupt data. Padding ({padding}) must be 0 for all but PACKED_BIT dtypes. ({dtype=})"
)
if dtype == BinaryVectorDtype.INT8:
dtype_format = "b"
format_string = f"<{n_values}{dtype_format}"
vector = list(struct.unpack_from(format_string, self, position))
return BinaryVector(vector, dtype, padding)
if not return_numpy:
if dtype == BinaryVectorDtype.INT8:
dtype_format = "b"
format_string = f"<{n_bytes}{dtype_format}"
vector = list(struct.unpack_from(format_string, self, offset))
return BinaryVector(vector, dtype, padding)
elif dtype == BinaryVectorDtype.FLOAT32:
n_bytes = len(self) - position
n_values = n_bytes // 4
if n_bytes % 4:
raise ValueError(
"Corrupt data. N bytes for a float32 vector must be a multiple of 4."
)
dtype_format = "f"
format_string = f"<{n_values}{dtype_format}"
vector = list(struct.unpack_from(format_string, self, position))
return BinaryVector(vector, dtype, padding)
elif dtype == BinaryVectorDtype.FLOAT32:
n_values = n_bytes // 4
if n_bytes % 4:
raise ValueError(
"Corrupt data. N bytes for a float32 vector must be a multiple of 4."
)
dtype_format = "f"
format_string = f"<{n_values}{dtype_format}"
vector = list(struct.unpack_from(format_string, self, offset))
return BinaryVector(vector, dtype, padding)
elif dtype == BinaryVectorDtype.PACKED_BIT:
# data packed as uint8
if padding and not n_values:
raise ValueError("Corrupt data. Vector has a padding P, but no data.")
if padding > 7 or padding < 0:
raise ValueError(f"Corrupt data. Padding ({padding}) must be between 0 and 7.")
dtype_format = "B"
format_string = f"<{n_values}{dtype_format}"
unpacked_uint8s = list(struct.unpack_from(format_string, self, position))
if padding and n_values and unpacked_uint8s[-1] & (1 << padding) - 1 != 0:
warnings.warn(
"Vector has a padding P, but bits in the final byte lower than P are non-zero. For pymongo>=5.0, they must be zero.",
DeprecationWarning,
stacklevel=2,
)
return BinaryVector(unpacked_uint8s, dtype, padding)
elif dtype == BinaryVectorDtype.PACKED_BIT:
# data packed as uint8
if padding and not n_bytes:
raise ValueError("Corrupt data. Vector has a padding P, but no data.")
if padding > 7 or padding < 0:
raise ValueError(f"Corrupt data. Padding ({padding}) must be between 0 and 7.")
dtype_format = "B"
format_string = f"<{n_bytes}{dtype_format}"
unpacked_uint8s = list(struct.unpack_from(format_string, self, offset))
if padding and n_bytes and unpacked_uint8s[-1] & (1 << padding) - 1 != 0:
warnings.warn(
"Vector has a padding P, but bits in the final byte lower than P are non-zero. For pymongo>=5.0, they must be zero.",
DeprecationWarning,
stacklevel=2,
)
return BinaryVector(unpacked_uint8s, dtype, padding)
else:
raise NotImplementedError("Binary Vector dtype %s not yet supported" % dtype.name)
else:
raise NotImplementedError("Binary Vector dtype %s not yet supported" % dtype.name)
else: # create a numpy array
try:
import numpy as np
except ImportError as exc:
raise ImportError(
"Converting binary to numpy.ndarray requires numpy to be installed."
) from exc
if dtype == BinaryVectorDtype.INT8:
data = np.frombuffer(self[offset:], dtype="int8")
elif dtype == BinaryVectorDtype.FLOAT32:
if n_bytes % 4:
raise ValueError(
"Corrupt data. N bytes for a float32 vector must be a multiple of 4."
)
data = np.frombuffer(self[offset:], dtype="float32")
elif dtype == BinaryVectorDtype.PACKED_BIT:
# data packed as uint8
if padding and not n_bytes:
raise ValueError("Corrupt data. Vector has a padding P, but no data.")
if padding > 7 or padding < 0:
raise ValueError(f"Corrupt data. Padding ({padding}) must be between 0 and 7.")
data = np.frombuffer(self[offset:], dtype="uint8")
if padding and np.unpackbits(data[-1])[-padding:].sum() > 0:
warnings.warn(
"Vector has a padding P, but bits in the final byte lower than P are non-zero. For pymongo>=5.0, they must be zero.",
DeprecationWarning,
stacklevel=2,
)
else:
raise NotImplementedError("Binary Vector dtype %s not yet supported" % dtype.name)
return BinaryVector(data, dtype, padding)
@property
def subtype(self) -> int:

View File

@ -19,6 +19,7 @@ PyMongo 4.16 brings a number of changes including:
- Use Zstandard support from the standard library for Python 3.14+, and use ``backports.zstd`` for older versions.
- Fixed return type annotation for ``find_one_and_*`` methods on :class:`~pymongo.asynchronous.collection.AsyncCollection`
and :class:`~pymongo.synchronous.collection.Collection` to include ``None``.
- Added support for NumPy 1D-arrays in :class:`bson.binary.BinaryVector`.
Changes in Version 4.15.5 (2025/XX/XX)
--------------------------------------
@ -43,6 +44,7 @@ in this release.
.. _PyMongo 4.15.4 release notes in JIRA: https://jira.mongodb.org/secure/ReleaseNote.jspa?projectId=10004&version=47237
Changes in Version 4.15.3 (2025/10/07)
--------------------------------------

View File

@ -2,7 +2,7 @@
set shell := ["bash", "-c"]
# Commonly used command segments.
typing_run := "uv run --group typing --extra aws --extra encryption --extra ocsp --extra snappy --extra test --extra zstd"
typing_run := "uv run --group typing --extra aws --extra encryption --with numpy --extra ocsp --extra snappy --extra test --extra zstd"
docs_run := "uv run --extra docs"
doc_build := "./doc/_build"
mypy_args := "--install-types --non-interactive"
@ -38,14 +38,14 @@ typing: && resync
[group('typing')]
typing-mypy: && resync
{{typing_run}} mypy {{mypy_args}} bson gridfs tools pymongo
{{typing_run}} mypy {{mypy_args}} --config-file mypy_test.ini test
{{typing_run}} mypy {{mypy_args}} test/test_typing.py test/test_typing_strict.py
{{typing_run}} python -m mypy {{mypy_args}} bson gridfs tools pymongo
{{typing_run}} python -m mypy {{mypy_args}} --config-file mypy_test.ini test
{{typing_run}} python -m mypy {{mypy_args}} test/test_typing.py test/test_typing_strict.py
[group('typing')]
typing-pyright: && resync
{{typing_run}} pyright test/test_typing.py test/test_typing_strict.py
{{typing_run}} pyright -p strict_pyrightconfig.json test/test_typing_strict.py
{{typing_run}} python -m pyright test/test_typing.py test/test_typing_strict.py
{{typing_run}} python -m pyright -p strict_pyrightconfig.json test/test_typing_strict.py
[group('lint')]
lint *args="": && resync
@ -57,7 +57,11 @@ lint-manual *args="": && resync
[group('test')]
test *args="-v --durations=5 --maxfail=10": && resync
uv run --extra test pytest {{args}}
uv run --extra test python -m pytest {{args}}
[group('test')]
test-numpy *args="-v --durations=5 --maxfail=10": && resync
uv run --extra test --with numpy python -m pytest test/test_bson.py
[group('test')]
run-tests *args: && resync

View File

@ -17,7 +17,7 @@
"isMaster",
"hello"
],
"closeConnection": true,
"errorCode": 91,
"appName": "poolCreateMinSizeErrorTest"
}
},

View File

@ -97,14 +97,22 @@
"outcome": {
"servers": {
"a:27017": {
"type": "Unknown",
"topologyVersion": null,
"type": "RSPrimary",
"setName": "rs",
"topologyVersion": {
"processId": {
"$oid": "000000000000000000000001"
},
"counter": {
"$numberLong": "1"
}
},
"pool": {
"generation": 1
"generation": 0
}
}
},
"topologyType": "ReplicaSetNoPrimary",
"topologyType": "ReplicaSetWithPrimary",
"logicalSessionTimeoutMinutes": null,
"setName": "rs"
}

View File

@ -0,0 +1,140 @@
{
"description": "backpressure-network-error-fail",
"schemaVersion": "1.17",
"runOnRequirements": [
{
"minServerVersion": "4.4",
"serverless": "forbid",
"topologies": [
"single",
"replicaset",
"sharded"
]
}
],
"createEntities": [
{
"client": {
"id": "setupClient",
"useMultipleMongoses": false
}
}
],
"initialData": [
{
"collectionName": "backpressure-network-error-fail",
"databaseName": "sdam-tests",
"documents": [
{
"_id": 1
},
{
"_id": 2
}
]
}
],
"tests": [
{
"description": "apply backpressure on network connection errors during connection establishment",
"operations": [
{
"name": "createEntities",
"object": "testRunner",
"arguments": {
"entities": [
{
"client": {
"id": "client",
"useMultipleMongoses": false,
"observeEvents": [
"serverHeartbeatSucceededEvent",
"poolClearedEvent"
],
"uriOptions": {
"retryWrites": false,
"heartbeatFrequencyMS": 1000000,
"serverMonitoringMode": "poll",
"appname": "backpressureNetworkErrorFailTest"
}
}
},
{
"database": {
"id": "database",
"client": "client",
"databaseName": "sdam-tests"
}
},
{
"collection": {
"id": "collection",
"database": "database",
"collectionName": "backpressure-network-error-fail"
}
}
]
}
},
{
"name": "waitForEvent",
"object": "testRunner",
"arguments": {
"client": "client",
"event": {
"serverHeartbeatSucceededEvent": {}
},
"count": 1
}
},
{
"name": "failPoint",
"object": "testRunner",
"arguments": {
"client": "setupClient",
"failPoint": {
"configureFailPoint": "failCommand",
"mode": "alwaysOn",
"data": {
"failCommands": [
"isMaster",
"hello"
],
"appName": "backpressureNetworkErrorFailTest",
"closeConnection": true
}
}
}
},
{
"name": "insertMany",
"object": "collection",
"arguments": {
"documents": [
{
"_id": 3
},
{
"_id": 4
}
]
},
"expectError": {
"isError": true,
"errorLabelsContain": [
"SystemOverloadedError",
"RetryableError"
]
}
}
],
"expectEvents": [
{
"client": "client",
"eventType": "cmap",
"events": []
}
]
}
]
}

View File

@ -0,0 +1,143 @@
{
"description": "backpressure-network-timeout-error",
"schemaVersion": "1.17",
"runOnRequirements": [
{
"minServerVersion": "4.4",
"serverless": "forbid",
"topologies": [
"single",
"replicaset",
"sharded"
]
}
],
"createEntities": [
{
"client": {
"id": "setupClient",
"useMultipleMongoses": false
}
}
],
"initialData": [
{
"collectionName": "backpressure-network-timeout-error",
"databaseName": "sdam-tests",
"documents": [
{
"_id": 1
},
{
"_id": 2
}
]
}
],
"tests": [
{
"description": "apply backpressure on network timeout error during connection establishment",
"operations": [
{
"name": "createEntities",
"object": "testRunner",
"arguments": {
"entities": [
{
"client": {
"id": "client",
"useMultipleMongoses": false,
"observeEvents": [
"serverDescriptionChangedEvent",
"poolClearedEvent"
],
"uriOptions": {
"retryWrites": false,
"heartbeatFrequencyMS": 1000000,
"appname": "backpressureNetworkTimeoutErrorTest",
"serverMonitoringMode": "poll",
"connectTimeoutMS": 250,
"socketTimeoutMS": 250
}
}
},
{
"database": {
"id": "database",
"client": "client",
"databaseName": "sdam-tests"
}
},
{
"collection": {
"id": "collection",
"database": "database",
"collectionName": "backpressure-network-timeout-error"
}
}
]
}
},
{
"name": "waitForEvent",
"object": "testRunner",
"arguments": {
"client": "client",
"event": {
"serverDescriptionChangedEvent": {}
},
"count": 1
}
},
{
"name": "failPoint",
"object": "testRunner",
"arguments": {
"client": "setupClient",
"failPoint": {
"configureFailPoint": "failCommand",
"mode": "alwaysOn",
"data": {
"failCommands": [
"isMaster",
"hello"
],
"blockConnection": true,
"blockTimeMS": 500,
"appName": "backpressureNetworkTimeoutErrorTest"
}
}
}
},
{
"name": "insertMany",
"object": "collection",
"arguments": {
"documents": [
{
"_id": 3
},
{
"_id": 4
}
]
},
"expectError": {
"isError": true,
"errorLabelsContain": [
"SystemOverloadedError",
"RetryableError"
]
}
}
],
"expectEvents": [
{
"client": "client",
"eventType": "cmap",
"events": []
}
]
}
]
}

View File

@ -0,0 +1,106 @@
{
"description": "backpressure-server-description-unchanged-on-min-pool-size-population-error",
"schemaVersion": "1.17",
"runOnRequirements": [
{
"minServerVersion": "4.4",
"serverless": "forbid",
"topologies": [
"single"
]
}
],
"createEntities": [
{
"client": {
"id": "setupClient",
"useMultipleMongoses": false
}
}
],
"tests": [
{
"description": "the server description is not changed on handshake error during minPoolSize population",
"operations": [
{
"name": "createEntities",
"object": "testRunner",
"arguments": {
"entities": [
{
"client": {
"id": "client",
"observeEvents": [
"serverDescriptionChangedEvent",
"connectionClosedEvent"
],
"uriOptions": {
"appname": "authErrorTest",
"minPoolSize": 5,
"maxConnecting": 1,
"serverMonitoringMode": "poll",
"heartbeatFrequencyMS": 1000000
}
}
}
]
}
},
{
"name": "failPoint",
"object": "testRunner",
"arguments": {
"client": "setupClient",
"failPoint": {
"configureFailPoint": "failCommand",
"mode": {
"skip": 1
},
"data": {
"failCommands": [
"hello",
"isMaster"
],
"appName": "authErrorTest",
"closeConnection": true
}
}
}
},
{
"name": "waitForEvent",
"object": "testRunner",
"arguments": {
"client": "client",
"event": {
"serverDescriptionChangedEvent": {}
},
"count": 1
}
},
{
"name": "waitForEvent",
"object": "testRunner",
"arguments": {
"client": "client",
"event": {
"connectionClosedEvent": {}
},
"count": 1
}
}
],
"expectEvents": [
{
"client": "client",
"eventType": "sdam",
"events": [
{
"serverDescriptionChangedEvent": {}
}
]
}
]
}
]
}

View File

@ -282,7 +282,7 @@
"isMaster",
"hello"
],
"closeConnection": true,
"errorCode": 11600,
"appName": "lbSDAMErrorTestClient"
}
}
@ -297,7 +297,7 @@
}
},
"expectError": {
"isClientError": true
"isError": true
}
}
],

View File

@ -19,6 +19,7 @@ from __future__ import annotations
import array
import collections
import datetime
import importlib.util
import mmap
import os
import pickle
@ -71,6 +72,8 @@ from bson.son import SON
from bson.timestamp import Timestamp
from bson.tz_util import FixedOffset, utc
_NUMPY_AVAILABLE = importlib.util.find_spec("numpy") is not None
class NotADict(abc.MutableMapping):
"""Non-dict type that implements the mapping protocol."""
@ -871,6 +874,65 @@ class TestBSON(unittest.TestCase):
BinaryVector([1], BinaryVectorDtype.INT8), BinaryVector([2], BinaryVectorDtype.INT8)
)
@unittest.skipIf(not _NUMPY_AVAILABLE, "numpy optional-dependency not installed.")
def test_vector_from_numpy(self):
"""Follows test_vector except for input type numpy.ndarray"""
# Simple data values could be treated as any of our BinaryVectorDtypes
import numpy as np
arr = np.array([2, 3])
# INT8
binary_vector_int8 = Binary.from_vector(arr, BinaryVectorDtype.INT8)
# as_vector
vector = binary_vector_int8.as_vector()
assert isinstance(vector, BinaryVector)
assert vector.data == arr.tolist()
# as_numpy_vector
vector_np = binary_vector_int8.as_vector(return_numpy=True)
assert isinstance(vector_np, BinaryVector)
assert isinstance(vector_np.data, np.ndarray)
assert np.all(vector.data == arr)
# PACKED_BIT
binary_vector_uint8 = Binary.from_vector(arr, BinaryVectorDtype.PACKED_BIT)
# as_vector
vector = binary_vector_uint8.as_vector()
assert isinstance(vector, BinaryVector)
assert vector.data == arr.tolist()
# as_numpy_vector
vector_np = binary_vector_uint8.as_vector(return_numpy=True)
assert isinstance(vector_np, BinaryVector)
assert isinstance(vector_np.data, np.ndarray)
assert np.all(vector_np.data == arr)
# FLOAT32
binary_vector_float32 = Binary.from_vector(arr, BinaryVectorDtype.FLOAT32)
# as_vector
vector = binary_vector_float32.as_vector()
assert isinstance(vector, BinaryVector)
assert vector.data == arr.tolist()
# as_numpy_vector
vector_np = binary_vector_float32.as_vector(return_numpy=True)
assert isinstance(vector_np, BinaryVector)
assert isinstance(vector_np.data, np.ndarray)
assert np.all(vector_np.data == arr)
# Invalid cases
with self.assertRaises(ValueError):
Binary.from_vector(np.array([-1]), BinaryVectorDtype.PACKED_BIT)
with self.assertRaises(ValueError):
Binary.from_vector(np.array([128]), BinaryVectorDtype.PACKED_BIT)
with self.assertRaises(ValueError):
Binary.from_vector(np.array([-198]), BinaryVectorDtype.INT8)
# Unexpected cases
# Creating a vector of INT8 from a list of doubles will be caught by struct.pack
# Numpy's default behavior is to cast to the type requested.
list_floats = [-1.1, 1.1]
cast_bin = Binary.from_vector(np.array(list_floats), BinaryVectorDtype.INT8)
vector = cast_bin.as_vector()
vector_np = cast_bin.as_vector(return_numpy=True)
assert vector.data != list_floats
assert vector.data == vector_np.data.tolist() == [-1, 1]
def test_unicode_regex(self):
"""Tests we do not get a segfault for C extension on unicode RegExs.
This had been happening.