PYTHON-3280 Support for Range Indexes (#1140)

This commit is contained in:
Julius Park 2023-01-24 15:33:56 -08:00 committed by GitHub
parent ec074010d8
commit d3117ce75d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 759 additions and 23 deletions

View File

@ -41,7 +41,7 @@ from bson.son import SON
from pymongo import _csot
from pymongo.cursor import Cursor
from pymongo.daemon import _spawn_daemon
from pymongo.encryption_options import AutoEncryptionOpts
from pymongo.encryption_options import AutoEncryptionOpts, RangeOpts
from pymongo.errors import (
ConfigurationError,
EncryptionError,
@ -416,6 +416,14 @@ class Algorithm(str, enum.Enum):
.. versionadded:: 4.2
"""
RANGEPREVIEW = "RangePreview"
"""RangePreview.
.. note:: Support for Range queries is in beta.
Backwards-breaking changes may be made before the final release.
.. versionadded:: 4.4
"""
class QueryType(str, enum.Enum):
@ -430,6 +438,9 @@ class QueryType(str, enum.Enum):
EQUALITY = "equality"
"""Used to encrypt a value for an equality query."""
RANGEPREVIEW = "rangePreview"
"""Used to encrypt a value for a range query."""
class ClientEncryption(Generic[_DocumentType]):
"""Explicit client-side field level encryption."""
@ -627,6 +638,45 @@ class ClientEncryption(Generic[_DocumentType]):
key_material=key_material,
)
def _encrypt_helper(
self,
value,
algorithm,
key_id=None,
key_alt_name=None,
query_type=None,
contention_factor=None,
range_opts=None,
is_expression=False,
):
self._check_closed()
if key_id is not None and not (
isinstance(key_id, Binary) and key_id.subtype == UUID_SUBTYPE
):
raise TypeError("key_id must be a bson.binary.Binary with subtype 4")
doc = encode(
{"v": value},
codec_options=self._codec_options,
)
if range_opts:
range_opts = encode(
range_opts.document,
codec_options=self._codec_options,
)
with _wrap_encryption_errors():
encrypted_doc = self._encryption.encrypt(
value=doc,
algorithm=algorithm,
key_id=key_id,
key_alt_name=key_alt_name,
query_type=query_type,
contention_factor=contention_factor,
range_opts=range_opts,
is_expression=is_expression,
)
return decode(encrypted_doc)["v"] # type: ignore[index]
def encrypt(
self,
value: Any,
@ -635,6 +685,7 @@ class ClientEncryption(Generic[_DocumentType]):
key_alt_name: Optional[str] = None,
query_type: Optional[str] = None,
contention_factor: Optional[int] = None,
range_opts: Optional[RangeOpts] = None,
) -> Binary:
"""Encrypt a BSON value with a given key and algorithm.
@ -655,10 +706,10 @@ class ClientEncryption(Generic[_DocumentType]):
when the algorithm is :attr:`Algorithm.INDEXED`. An integer value
*must* be given when the :attr:`Algorithm.INDEXED` algorithm is
used.
- `range_opts`: **(BETA)** An instance of RangeOpts.
.. note:: `query_type` and `contention_factor` are part of the
Queryable Encryption beta. Backwards-breaking changes may be made before the
final release.
.. note:: `query_type`, `contention_factor` and `range_opts` are part of the Queryable Encryption beta.
Backwards-breaking changes may be made before the final release.
:Returns:
The encrypted value, a :class:`~bson.binary.Binary` with subtype 6.
@ -667,23 +718,66 @@ class ClientEncryption(Generic[_DocumentType]):
Added the `query_type` and `contention_factor` parameters.
"""
self._check_closed()
if key_id is not None and not (
isinstance(key_id, Binary) and key_id.subtype == UUID_SUBTYPE
):
raise TypeError("key_id must be a bson.binary.Binary with subtype 4")
return self._encrypt_helper(
value=value,
algorithm=algorithm,
key_id=key_id,
key_alt_name=key_alt_name,
query_type=query_type,
contention_factor=contention_factor,
range_opts=range_opts,
is_expression=False,
)
doc = encode({"v": value}, codec_options=self._codec_options)
with _wrap_encryption_errors():
encrypted_doc = self._encryption.encrypt(
doc,
algorithm,
key_id=key_id,
key_alt_name=key_alt_name,
query_type=query_type,
contention_factor=contention_factor,
)
return decode(encrypted_doc)["v"] # type: ignore[index]
def encrypt_expression(
self,
expression: Mapping[str, Any],
algorithm: str,
key_id: Optional[Binary] = None,
key_alt_name: Optional[str] = None,
query_type: Optional[str] = None,
contention_factor: Optional[int] = None,
range_opts: Optional[RangeOpts] = None,
) -> RawBSONDocument:
"""Encrypt a BSON expression with a given key and algorithm.
Note that exactly one of ``key_id`` or ``key_alt_name`` must be
provided.
:Parameters:
- `expression`: **(BETA)** The BSON aggregate or match expression to encrypt.
- `algorithm` (string): The encryption algorithm to use. See
:class:`Algorithm` for some valid options.
- `key_id`: Identifies a data key by ``_id`` which must be a
:class:`~bson.binary.Binary` with subtype 4 (
:attr:`~bson.binary.UUID_SUBTYPE`).
- `key_alt_name`: Identifies a key vault document by 'keyAltName'.
- `query_type` (str): **(BETA)** The query type to execute. See
:class:`QueryType` for valid options.
- `contention_factor` (int): **(BETA)** The contention factor to use
when the algorithm is :attr:`Algorithm.INDEXED`. An integer value
*must* be given when the :attr:`Algorithm.INDEXED` algorithm is
used.
- `range_opts`: **(BETA)** An instance of RangeOpts.
.. note:: Support for range queries is in beta.
Backwards-breaking changes may be made before the final release.
:Returns:
The encrypted expression, a :class:`~bson.RawBSONDocument`.
.. versionadded:: 4.4
"""
return self._encrypt_helper(
value=expression,
algorithm=algorithm,
key_id=key_id,
key_alt_name=key_alt_name,
query_type=query_type,
contention_factor=contention_factor,
range_opts=range_opts,
is_expression=True,
)
def decrypt(self, value: Binary) -> Any:
"""Decrypt an encrypted value.

View File

@ -22,7 +22,7 @@ try:
_HAVE_PYMONGOCRYPT = True
except ImportError:
_HAVE_PYMONGOCRYPT = False
from bson import int64
from pymongo.common import validate_is_mapping
from pymongo.errors import ConfigurationError
from pymongo.uri_parser import _parse_kms_tls_options
@ -219,3 +219,45 @@ class AutoEncryptionOpts(object):
# Maps KMS provider name to a SSLContext.
self._kms_ssl_contexts = _parse_kms_tls_options(kms_tls_options)
self._bypass_query_analysis = bypass_query_analysis
class RangeOpts:
"""Options to configure encrypted queries using the rangePreview algorithm."""
def __init__(
self,
sparsity: int,
min: Optional[Any] = None,
max: Optional[Any] = None,
precision: Optional[int] = None,
) -> None:
"""Options to configure encrypted queries using the rangePreview algorithm.
.. note:: Support for Range queries is in beta.
Backwards-breaking changes may be made before the final release.
:Parameters:
- `sparsity`: An integer.
- `min`: A BSON scalar value corresponding to the type being queried.
- `max`: A BSON scalar value corresponding to the type being queried.
- `precision`: An integer, may only be set for double or decimal128 types.
.. versionadded:: 4.4
"""
self.min = min
self.max = max
self.sparsity = sparsity
self.precision = precision
@property
def document(self) -> Mapping[str, Any]:
doc = {}
for k, v in [
("sparsity", int64.Int64(self.sparsity)),
("precision", self.precision),
("min", self.min),
("max", self.max),
]:
if v is not None:
doc[k] = v
return doc

View File

@ -0,0 +1,36 @@
{
"escCollection": "enxcol_.default.esc",
"eccCollection": "enxcol_.default.ecc",
"ecocCollection": "enxcol_.default.ecoc",
"fields": [
{
"keyId": {
"$binary": {
"base64": "EjRWeBI0mHYSNBI0VniQEg==",
"subType": "04"
}
},
"path": "encryptedDate",
"bsonType": "date",
"queries": {
"queryType": "rangePreview",
"contention": {
"$numberLong": "0"
},
"sparsity": {
"$numberLong": "1"
},
"min": {
"$date": {
"$numberLong": "0"
}
},
"max": {
"$date": {
"$numberLong": "200"
}
}
}
}
]
}

View File

@ -0,0 +1,26 @@
{
"escCollection": "enxcol_.default.esc",
"eccCollection": "enxcol_.default.ecc",
"ecocCollection": "enxcol_.default.ecoc",
"fields": [
{
"keyId": {
"$binary": {
"base64": "EjRWeBI0mHYSNBI0VniQEg==",
"subType": "04"
}
},
"path": "encryptedDecimal",
"bsonType": "decimal",
"queries": {
"queryType": "rangePreview",
"contention": {
"$numberLong": "0"
},
"sparsity": {
"$numberLong": "1"
}
}
}
]
}

View File

@ -0,0 +1,35 @@
{
"escCollection": "enxcol_.default.esc",
"eccCollection": "enxcol_.default.ecc",
"ecocCollection": "enxcol_.default.ecoc",
"fields": [
{
"keyId": {
"$binary": {
"base64": "EjRWeBI0mHYSNBI0VniQEg==",
"subType": "04"
}
},
"path": "encryptedDecimalPrecision",
"bsonType": "decimal",
"queries": {
"queryType": "rangePreview",
"contention": {
"$numberLong": "0"
},
"sparsity": {
"$numberLong": "1"
},
"min": {
"$numberDecimal": "0.0"
},
"max": {
"$numberDecimal": "200.0"
},
"precision": {
"$numberInt": "2"
}
}
}
]
}

View File

@ -0,0 +1,26 @@
{
"escCollection": "enxcol_.default.esc",
"eccCollection": "enxcol_.default.ecc",
"ecocCollection": "enxcol_.default.ecoc",
"fields": [
{
"keyId": {
"$binary": {
"base64": "EjRWeBI0mHYSNBI0VniQEg==",
"subType": "04"
}
},
"path": "encryptedDouble",
"bsonType": "double",
"queries": {
"queryType": "rangePreview",
"contention": {
"$numberLong": "0"
},
"sparsity": {
"$numberLong": "1"
}
}
}
]
}

View File

@ -0,0 +1,35 @@
{
"escCollection": "enxcol_.default.esc",
"eccCollection": "enxcol_.default.ecc",
"ecocCollection": "enxcol_.default.ecoc",
"fields": [
{
"keyId": {
"$binary": {
"base64": "EjRWeBI0mHYSNBI0VniQEg==",
"subType": "04"
}
},
"path": "encryptedDoublePrecision",
"bsonType": "double",
"queries": {
"queryType": "rangePreview",
"contention": {
"$numberLong": "0"
},
"sparsity": {
"$numberLong": "1"
},
"min": {
"$numberDouble": "0.0"
},
"max": {
"$numberDouble": "200.0"
},
"precision": {
"$numberInt": "2"
}
}
}
]
}

View File

@ -0,0 +1,32 @@
{
"escCollection": "enxcol_.default.esc",
"eccCollection": "enxcol_.default.ecc",
"ecocCollection": "enxcol_.default.ecoc",
"fields": [
{
"keyId": {
"$binary": {
"base64": "EjRWeBI0mHYSNBI0VniQEg==",
"subType": "04"
}
},
"path": "encryptedInt",
"bsonType": "int",
"queries": {
"queryType": "rangePreview",
"contention": {
"$numberLong": "0"
},
"sparsity": {
"$numberLong": "1"
},
"min": {
"$numberInt": "0"
},
"max": {
"$numberInt": "200"
}
}
}
]
}

View File

@ -0,0 +1,32 @@
{
"escCollection": "enxcol_.default.esc",
"eccCollection": "enxcol_.default.ecc",
"ecocCollection": "enxcol_.default.ecoc",
"fields": [
{
"keyId": {
"$binary": {
"base64": "EjRWeBI0mHYSNBI0VniQEg==",
"subType": "04"
}
},
"path": "encryptedLong",
"bsonType": "long",
"queries": {
"queryType": "rangePreview",
"contention": {
"$numberLong": "0"
},
"sparsity": {
"$numberLong": "1"
},
"min": {
"$numberLong": "0"
},
"max": {
"$numberLong": "200"
}
}
}
]
}

View File

@ -0,0 +1,30 @@
{
"fields": [
{
"keyId": {
"$binary": {
"base64": "EjRWeBI0mHYSNBI0VniQEg==",
"subType": "04"
}
},
"path": "encryptedDate",
"bsonType": "date",
"queries": {
"queryType": "rangePreview",
"sparsity": {
"$numberLong": "1"
},
"min": {
"$date": {
"$numberLong": "0"
}
},
"max": {
"$date": {
"$numberLong": "200"
}
}
}
}
]
}

View File

@ -0,0 +1,21 @@
{
"fields": [
{
"keyId": {
"$binary": {
"base64": "EjRWeBI0mHYSNBI0VniQEg==",
"subType": "04"
}
},
"path": "encryptedDecimalNoPrecision",
"bsonType": "decimal",
"queries": {
"queryType": "rangePreview",
"sparsity": {
"$numberInt": "1"
}
}
}
]
}

View File

@ -0,0 +1,29 @@
{
"fields": [
{
"keyId": {
"$binary": {
"base64": "EjRWeBI0mHYSNBI0VniQEg==",
"subType": "04"
}
},
"path": "encryptedDecimalPrecision",
"bsonType": "decimal",
"queries": {
"queryType": "rangePreview",
"sparsity": {
"$numberInt": "1"
},
"min": {
"$numberDecimal": "0.0"
},
"max": {
"$numberDecimal": "200.0"
},
"precision": {
"$numberInt": "2"
}
}
}
]
}

View File

@ -0,0 +1,21 @@
{
"fields": [
{
"keyId": {
"$binary": {
"base64": "EjRWeBI0mHYSNBI0VniQEg==",
"subType": "04"
}
},
"path": "encryptedDoubleNoPrecision",
"bsonType": "double",
"queries": {
"queryType": "rangePreview",
"sparsity": {
"$numberLong": "1"
}
}
}
]
}

View File

@ -0,0 +1,30 @@
{
"fields": [
{
"keyId": {
"$binary": {
"base64": "EjRWeBI0mHYSNBI0VniQEg==",
"subType": "04"
}
},
"path": "encryptedDoublePrecision",
"bsonType": "double",
"queries": {
"queryType": "rangePreview",
"sparsity": {
"$numberLong": "1"
},
"min": {
"$numberDouble": "0.0"
},
"max": {
"$numberDouble": "200.0"
},
"precision": {
"$numberInt": "2"
}
}
}
]
}

View File

@ -0,0 +1,27 @@
{
"fields": [
{
"keyId": {
"$binary": {
"base64": "EjRWeBI0mHYSNBI0VniQEg==",
"subType": "04"
}
},
"path": "encryptedInt",
"bsonType": "int",
"queries": {
"queryType": "rangePreview",
"sparsity": {
"$numberLong": "1"
},
"min": {
"$numberInt": "0"
},
"max": {
"$numberInt": "200"
}
}
}
]
}

View File

@ -0,0 +1,27 @@
{
"fields": [
{
"keyId": {
"$binary": {
"base64": "EjRWeBI0mHYSNBI0VniQEg==",
"subType": "04"
}
},
"path": "encryptedLong",
"bsonType": "long",
"queries": {
"queryType": "rangePreview",
"sparsity": {
"$numberLong": "1"
},
"min": {
"$numberLong": "0"
},
"max": {
"$numberLong": "200"
}
}
}
]
}

View File

@ -59,7 +59,7 @@ from test.utils import (
)
from test.utils_spec_runner import SpecRunner
from bson import encode, json_util
from bson import DatetimeMS, Decimal128, encode, json_util
from bson.binary import UUID_SUBTYPE, Binary, UuidRepresentation
from bson.codec_options import CodecOptions
from bson.errors import BSONError
@ -68,7 +68,7 @@ from bson.son import SON
from pymongo import encryption
from pymongo.cursor import CursorType
from pymongo.encryption import Algorithm, ClientEncryption, QueryType
from pymongo.encryption_options import _HAVE_PYMONGOCRYPT, AutoEncryptionOpts
from pymongo.encryption_options import _HAVE_PYMONGOCRYPT, AutoEncryptionOpts, RangeOpts
from pymongo.errors import (
AutoReconnect,
BulkWriteError,
@ -2494,5 +2494,198 @@ class TestQueryableEncryptionDocsExample(EncryptionIntegrationTest):
client_encryption.close()
# https://github.com/mongodb/specifications/blob/master/source/client-side-encryption/tests/README.rst#range-explicit-encryption
class TestRangeQueryProse(EncryptionIntegrationTest):
@client_context.require_no_standalone
@client_context.require_version_min(6, 2, -1)
def setUp(self):
super().setUp()
self.key1_document = json_data("etc", "data", "keys", "key1-document.json")
self.key1_id = self.key1_document["_id"]
self.client.drop_database(self.db)
key_vault = create_key_vault(self.client.keyvault.datakeys, self.key1_document)
self.addCleanup(key_vault.drop)
self.key_vault_client = self.client
self.client_encryption = ClientEncryption(
{"local": {"key": LOCAL_MASTER_KEY}}, key_vault.full_name, self.key_vault_client, OPTS
)
self.addCleanup(self.client_encryption.close)
opts = AutoEncryptionOpts(
{"local": {"key": LOCAL_MASTER_KEY}},
key_vault.full_name,
bypass_query_analysis=True,
)
self.encrypted_client = rs_or_single_client(auto_encryption_opts=opts)
self.db = self.encrypted_client.db
self.addCleanup(self.encrypted_client.close)
def run_expression_find(self, name, expression, expected_elems, range_opts, use_expr=False):
find_payload = self.client_encryption.encrypt_expression(
expression=expression,
key_id=self.key1_id,
algorithm=Algorithm.RANGEPREVIEW,
query_type=QueryType.RANGEPREVIEW,
contention_factor=0,
range_opts=range_opts,
)
if use_expr:
find_payload = {"$expr": find_payload}
sorted_find = sorted(
self.encrypted_client.db.explicit_encryption.find(find_payload), key=lambda x: x["_id"]
)
for elem, expected in zip(sorted_find, expected_elems):
self.assertEqual(elem[f"encrypted{name}"], expected)
def run_test_cases(self, name, range_opts, cast_func):
encrypted_fields = json_data("etc", "data", f"range-encryptedFields-{name}.json")
self.db.drop_collection("explicit_encryption", encrypted_fields=encrypted_fields)
self.db.create_collection("explicit_encryption", encryptedFields=encrypted_fields)
def encrypt_and_cast(i):
return self.client_encryption.encrypt(
cast_func(i),
key_id=self.key1_id,
algorithm=Algorithm.RANGEPREVIEW,
contention_factor=0,
range_opts=range_opts,
)
for elem in [{f"encrypted{name}": encrypt_and_cast(i)} for i in [0, 6, 30, 200]]:
self.encrypted_client.db.explicit_encryption.insert_one(elem)
# Case 1.
insert_payload = self.client_encryption.encrypt(
cast_func(6),
key_id=self.key1_id,
algorithm=Algorithm.RANGEPREVIEW,
contention_factor=0,
range_opts=range_opts,
)
self.assertEqual(self.client_encryption.decrypt(insert_payload), cast_func(6))
# Case 2.
self.run_expression_find(
name,
{
"$and": [
{f"encrypted{name}": {"$gte": cast_func(6)}},
{f"encrypted{name}": {"$lte": cast_func(200)}},
]
},
[cast_func(i) for i in [6, 30, 200]],
range_opts,
)
# Case 3.
self.run_expression_find(
name,
{
"$and": [
{f"encrypted{name}": {"$gte": cast_func(0)}},
{f"encrypted{name}": {"$lte": cast_func(6)}},
]
},
[cast_func(i) for i in [0, 6]],
range_opts,
)
# Case 4.
self.run_expression_find(
name,
{
"$and": [
{f"encrypted{name}": {"$gt": cast_func(30)}},
]
},
[cast_func(i) for i in [200]],
range_opts,
)
# Case 5.
self.run_expression_find(
name,
{"$and": [{"$lt": [f"$encrypted{name}", cast_func(30)]}]},
[cast_func(i) for i in [0, 6]],
range_opts,
use_expr=True,
)
# The spec says to skip the following tests for no precision decimal or double types.
if name not in ("DoubleNoPrecision", "DecimalNoPrecision"):
# Case 6.
with self.assertRaisesRegex(
EncryptionError,
"greater than or equal to the minimum value and less than or equal to the maximum value",
):
self.client_encryption.encrypt(
cast_func(201),
key_id=self.key1_id,
algorithm=Algorithm.RANGEPREVIEW,
contention_factor=0,
range_opts=range_opts,
)
# Case 7.
with self.assertRaisesRegex(
EncryptionError, "expected matching 'min' and value type. Got range option"
):
self.client_encryption.encrypt(
int(6) if cast_func != int else float(6),
key_id=self.key1_id,
algorithm=Algorithm.RANGEPREVIEW,
contention_factor=0,
range_opts=range_opts,
)
# Case 8.
# The spec says we must additionally not run this case with any precision type, not just the ones above.
if "Precision" not in name:
with self.assertRaisesRegex(
EncryptionError,
"expected 'precision' to be set with double or decimal128 index, but got:",
):
self.client_encryption.encrypt(
cast_func(6),
key_id=self.key1_id,
algorithm=Algorithm.RANGEPREVIEW,
contention_factor=0,
range_opts=RangeOpts(
min=cast_func(0), max=cast_func(200), sparsity=1, precision=2
),
)
def test_double_no_precision(self):
self.run_test_cases("DoubleNoPrecision", RangeOpts(sparsity=1), float)
def test_double_precision(self):
self.run_test_cases(
"DoublePrecision",
RangeOpts(min=0.0, max=200.0, sparsity=1, precision=2),
float,
)
def test_decimal_no_precision(self):
self.run_test_cases(
"DecimalNoPrecision", RangeOpts(sparsity=1), lambda x: Decimal128(str(x))
)
def test_decimal_precision(self):
self.run_test_cases(
"DecimalPrecision",
RangeOpts(min=Decimal128("0.0"), max=Decimal128("200.0"), sparsity=1, precision=2),
lambda x: Decimal128(str(x)),
)
def test_datetime(self):
self.run_test_cases(
"Date",
RangeOpts(min=DatetimeMS(0), max=DatetimeMS(200), sparsity=1),
lambda x: DatetimeMS(x).as_datetime(),
)
def test_int(self):
self.run_test_cases("Int", RangeOpts(min=0, max=200, sparsity=1), int)
if __name__ == "__main__":
unittest.main()