PYTHON-3245 Support explicit queryable encryption (#959)

This commit is contained in:
Shane Harvey 2022-06-03 13:11:28 -07:00 committed by GitHub
parent 09385be549
commit 154d8787c5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 298 additions and 18 deletions

View File

@ -93,6 +93,7 @@ do
cpjson client-side-encryption/corpus/ client-side-encryption/corpus
cpjson client-side-encryption/external/ client-side-encryption/external
cpjson client-side-encryption/limits/ client-side-encryption/limits
cpjson client-side-encryption/etc/data client-side-encryption/etc/data
;;
cmap|CMAP|connection-monitoring-and-pooling)
cpjson connection-monitoring-and-pooling/tests cmap

View File

@ -139,7 +139,7 @@ if [ -n "$TEST_ENCRYPTION" ]; then
export PYMONGOCRYPT_LIB
# TODO: Test with 'pip install pymongocrypt'
git clone --branch master https://github.com/mongodb/libmongocrypt.git libmongocrypt_git
git clone https://github.com/mongodb/libmongocrypt.git libmongocrypt_git
python -m pip install --prefer-binary -r .evergreen/test-encryption-requirements.txt
python -m pip install ./libmongocrypt_git/bindings/python
python -c "import pymongocrypt; print('pymongocrypt version: '+pymongocrypt.__version__)"

View File

@ -15,6 +15,7 @@
"""Support for explicit client-side field level encryption."""
import contextlib
import enum
import uuid
import weakref
from typing import Any, Mapping, Optional, Sequence
@ -303,6 +304,7 @@ class _Encrypter(object):
crypt_shared_lib_path=opts._crypt_shared_lib_path,
crypt_shared_lib_required=opts._crypt_shared_lib_required,
bypass_encryption=opts._bypass_auto_encryption,
bypass_query_analysis=opts._bypass_query_analysis,
),
)
self._closed = False
@ -352,11 +354,33 @@ class _Encrypter(object):
self._internal_client = None
class Algorithm(object):
class Algorithm(str, enum.Enum):
"""An enum that defines the supported encryption algorithms."""
AEAD_AES_256_CBC_HMAC_SHA_512_Deterministic = "AEAD_AES_256_CBC_HMAC_SHA_512-Deterministic"
"""AEAD_AES_256_CBC_HMAC_SHA_512_Deterministic."""
AEAD_AES_256_CBC_HMAC_SHA_512_Random = "AEAD_AES_256_CBC_HMAC_SHA_512-Random"
"""AEAD_AES_256_CBC_HMAC_SHA_512_Random."""
INDEXED = "Indexed"
"""Indexed.
.. versionadded:: 4.2
"""
UNINDEXED = "Unindexed"
"""Unindexed.
.. versionadded:: 4.2
"""
class QueryType(enum.IntEnum):
"""An enum that defines the supported values for explicit encryption query_type.
.. versionadded:: 4.2
"""
EQUALITY = 1
"""Used to encrypt a value for an equality query."""
class ClientEncryption(object):
@ -550,6 +574,9 @@ class ClientEncryption(object):
algorithm: str,
key_id: Optional[Binary] = None,
key_alt_name: Optional[str] = None,
index_key_id: Optional[Binary] = None,
query_type: Optional[int] = None,
contention_factor: Optional[int] = None,
) -> Binary:
"""Encrypt a BSON value with a given key and algorithm.
@ -564,20 +591,38 @@ class ClientEncryption(object):
:class:`~bson.binary.Binary` with subtype 4 (
:attr:`~bson.binary.UUID_SUBTYPE`).
- `key_alt_name`: Identifies a key vault document by 'keyAltName'.
- `index_key_id` (bytes): the index key id to use for Queryable Encryption.
- `query_type` (int): The query type to execute. See
:class:`QueryType` for valid options.
- `contention_factor` (int): The contention factor to use
when the algorithm is "Indexed".
:Returns:
The encrypted value, a :class:`~bson.binary.Binary` with subtype 6.
.. versionchanged:: 4.2
Added the `index_key_id`, `query_type`, and `contention_factor` parameters.
"""
self._check_closed()
if key_id is not None and not (
isinstance(key_id, Binary) and key_id.subtype == UUID_SUBTYPE
):
raise TypeError("key_id must be a bson.binary.Binary with subtype 4")
if index_key_id is not None and not (
isinstance(index_key_id, Binary) and index_key_id.subtype == UUID_SUBTYPE
):
raise TypeError("index_key_id must be a bson.binary.Binary with subtype 4")
doc = encode({"v": value}, codec_options=self._codec_options)
with _wrap_encryption_errors():
encrypted_doc = self._encryption.encrypt(
doc, algorithm, key_id=key_id, key_alt_name=key_alt_name
doc,
algorithm,
key_id=key_id,
key_alt_name=key_alt_name,
index_key_id=index_key_id,
query_type=query_type,
contention_factor=contention_factor,
)
return decode(encrypted_doc)["v"] # type: ignore[index]

View File

@ -47,6 +47,7 @@ class AutoEncryptionOpts(object):
kms_tls_options: Optional[Mapping[str, Any]] = None,
crypt_shared_lib_path: Optional[str] = None,
crypt_shared_lib_required: bool = False,
bypass_query_analysis: bool = False,
) -> None:
"""Options to configure automatic client-side field level encryption.
@ -145,9 +146,14 @@ class AutoEncryptionOpts(object):
- `crypt_shared_lib_path` (optional): Override the path to load the crypt_shared library.
- `crypt_shared_lib_required` (optional): If True, raise an error if libmongocrypt is
unable to load the crypt_shared library.
- `bypass_query_analysis` (optional): If ``True``, disable automatic analysis of
outgoing commands. Set `bypass_query_analysis` to use explicit
encryption on indexed fields without the MongoDB Enterprise Advanced
licensed crypt_shared library.
.. versionchanged:: 4.2
Added `crypt_shared_lib_path` and `crypt_shared_lib_required` parameters
Added `crypt_shared_lib_path`, `crypt_shared_lib_required`, and `bypass_query_analysis`
parameters.
.. versionchanged:: 4.0
Added the `kms_tls_options` parameter and the "kmip" KMS provider.
@ -179,3 +185,4 @@ class AutoEncryptionOpts(object):
self._mongocryptd_spawn_args.append("--idleShutdownTimeoutSecs=60")
# Maps KMS provider name to a SSLContext.
self._kms_ssl_contexts = _parse_kms_tls_options(kms_tls_options)
self._bypass_query_analysis = bypass_query_analysis

View File

@ -0,0 +1,33 @@
{
"escCollection": "enxcol_.default.esc",
"eccCollection": "enxcol_.default.ecc",
"ecocCollection": "enxcol_.default.ecoc",
"fields": [
{
"keyId": {
"$binary": {
"base64": "EjRWeBI0mHYSNBI0VniQEg==",
"subType": "04"
}
},
"path": "encryptedIndexed",
"bsonType": "string",
"queries": {
"queryType": "equality",
"contention": {
"$numberLong": "0"
}
}
},
{
"keyId": {
"$binary": {
"base64": "q83vqxI0mHYSNBI0VniQEg==",
"subType": "04"
}
},
"path": "encryptedUnindexed",
"bsonType": "string"
}
]
}

View File

@ -0,0 +1,30 @@
{
"_id": {
"$binary": {
"base64": "EjRWeBI0mHYSNBI0VniQEg==",
"subType": "04"
}
},
"keyMaterial": {
"$binary": {
"base64": "sHe0kz57YW7v8g9VP9sf/+K1ex4JqKc5rf/URX3n3p8XdZ6+15uXPaSayC6adWbNxkFskuMCOifDoTT+rkqMtFkDclOy884RuGGtUysq3X7zkAWYTKi8QAfKkajvVbZl2y23UqgVasdQu3OVBQCrH/xY00nNAs/52e958nVjBuzQkSb1T8pKJAyjZsHJ60+FtnfafDZSTAIBJYn7UWBCwQ==",
"subType": "00"
}
},
"creationDate": {
"$date": {
"$numberLong": "1648914851981"
}
},
"updateDate": {
"$date": {
"$numberLong": "1648914851981"
}
},
"status": {
"$numberInt": "0"
},
"masterKey": {
"provider": "local"
}
}

View File

@ -0,0 +1,6 @@
{
"$binary": {
"base64": "EjRWeBI0mHYSNBI0VniQEg==",
"subType": "04"
}
}

View File

@ -0,0 +1,30 @@
{
"_id": {
"$binary": {
"base64": "q83vqxI0mHYSNBI0VniQEg==",
"subType": "04"
}
},
"keyMaterial": {
"$binary": {
"base64": "HBk9BWihXExNDvTp1lUxOuxuZK2Pe2ZdVdlsxPEBkiO1bS4mG5NNDsQ7zVxJAH8BtdOYp72Ku4Y3nwc0BUpIKsvAKX4eYXtlhv5zUQxWdeNFhg9qK7qb8nqhnnLeT0f25jFSqzWJoT379hfwDeu0bebJHr35QrJ8myZdPMTEDYF08QYQ48ShRBli0S+QzBHHAQiM2iJNr4svg2WR8JSeWQ==",
"subType": "00"
}
},
"creationDate": {
"$date": {
"$numberLong": "1648914851981"
}
},
"updateDate": {
"$date": {
"$numberLong": "1648914851981"
}
},
"status": {
"$numberInt": "0"
},
"masterKey": {
"provider": "local"
}
}

View File

@ -0,0 +1,6 @@
{
"$binary": {
"base64": "q83vqxI0mHYSNBI0VniQEg==",
"subType": "04"
}
}

View File

@ -51,14 +51,14 @@ from test.utils import (
from test.utils_spec_runner import SpecRunner
from bson import encode, json_util
from bson.binary import JAVA_LEGACY, STANDARD, UUID_SUBTYPE, Binary, UuidRepresentation
from bson.binary import UUID_SUBTYPE, Binary, UuidRepresentation
from bson.codec_options import CodecOptions
from bson.errors import BSONError
from bson.json_util import JSONOptions
from bson.son import SON
from pymongo import encryption
from pymongo.cursor import CursorType
from pymongo.encryption import Algorithm, ClientEncryption
from pymongo.encryption import Algorithm, ClientEncryption, QueryType
from pymongo.encryption_options import _HAVE_PYMONGOCRYPT, AutoEncryptionOpts
from pymongo.errors import (
BulkWriteError,
@ -212,11 +212,11 @@ class EncryptionIntegrationTest(IntegrationTest):
BASE = os.path.join(os.path.dirname(os.path.realpath(__file__)), "client-side-encryption")
SPEC_PATH = os.path.join(BASE, "spec")
OPTS = CodecOptions(uuid_representation=STANDARD)
OPTS = CodecOptions()
# Use SON to preserve the order of fields while parsing json. Use tz_aware
# =False to match how CodecOptions decodes dates.
JSON_OPTS = JSONOptions(document_class=SON, uuid_representation=STANDARD, tz_aware=False)
JSON_OPTS = JSONOptions(document_class=SON, tz_aware=False)
def read(*paths):
@ -324,7 +324,7 @@ class TestClientSimple(EncryptionIntegrationTest):
class TestEncryptedBulkWrite(BulkTestBase, EncryptionIntegrationTest):
def test_upsert_uuid_standard_encrypte(self):
def test_upsert_uuid_standard_encrypt(self):
opts = AutoEncryptionOpts(KMS_PROVIDERS, "keyvault.datakeys")
client = rs_or_single_client(auto_encryption_opts=opts)
self.addCleanup(client.close)
@ -449,11 +449,19 @@ class TestExplicitSimple(EncryptionIntegrationTest):
msg = "key_id must be a bson.binary.Binary with subtype 4"
algo = Algorithm.AEAD_AES_256_CBC_HMAC_SHA_512_Deterministic
uid = uuid.uuid4()
with self.assertRaisesRegex(TypeError, msg):
client_encryption.encrypt("str", algo, key_id=uuid.uuid4()) # type: ignore[arg-type]
client_encryption.encrypt("str", algo, key_id=uid) # type: ignore[arg-type]
with self.assertRaisesRegex(TypeError, msg):
client_encryption.encrypt("str", algo, key_id=Binary(b"123"))
msg = "index_key_id must be a bson.binary.Binary with subtype 4"
algo = Algorithm.INDEXED
with self.assertRaisesRegex(TypeError, msg):
client_encryption.encrypt("str", algo, index_key_id=uid) # type: ignore[arg-type]
with self.assertRaisesRegex(TypeError, msg):
client_encryption.encrypt("str", algo, index_key_id=Binary(b"123"))
def test_bson_errors(self):
client_encryption = ClientEncryption(
KMS_PROVIDERS, "keyvault.datakeys", client_context.client, OPTS
@ -466,7 +474,7 @@ class TestExplicitSimple(EncryptionIntegrationTest):
client_encryption.encrypt(
unencodable_value,
Algorithm.AEAD_AES_256_CBC_HMAC_SHA_512_Deterministic,
key_id=Binary(uuid.uuid4().bytes, UUID_SUBTYPE),
key_id=Binary.from_uuid(uuid.uuid4()),
)
def test_codec_options(self):
@ -475,7 +483,7 @@ class TestExplicitSimple(EncryptionIntegrationTest):
KMS_PROVIDERS, "keyvault.datakeys", client_context.client, None # type: ignore[arg-type]
)
opts = CodecOptions(uuid_representation=JAVA_LEGACY)
opts = CodecOptions(uuid_representation=UuidRepresentation.JAVA_LEGACY)
client_encryption_legacy = ClientEncryption(
KMS_PROVIDERS, "keyvault.datakeys", client_context.client, opts
)
@ -493,8 +501,9 @@ class TestExplicitSimple(EncryptionIntegrationTest):
self.assertEqual(decrypted_value_legacy, value)
# Encrypt the same UUID with STANDARD codec options.
opts = CodecOptions(uuid_representation=UuidRepresentation.STANDARD)
client_encryption = ClientEncryption(
KMS_PROVIDERS, "keyvault.datakeys", client_context.client, OPTS
KMS_PROVIDERS, "keyvault.datakeys", client_context.client, opts
)
self.addCleanup(client_encryption.close)
encrypted_standard = client_encryption.encrypt(
@ -986,9 +995,7 @@ class TestCorpus(EncryptionIntegrationTest):
)
self.addCleanup(vault.drop)
client_encrypted = rs_or_single_client(
auto_encryption_opts=opts, uuidRepresentation="standard"
)
client_encrypted = rs_or_single_client(auto_encryption_opts=opts)
self.addCleanup(client_encrypted.close)
client_encryption = ClientEncryption(
@ -1436,7 +1443,7 @@ class AzureGCPEncryptionTestMixin(object):
ciphertext = client_encryption.encrypt(
"string0",
algorithm=Algorithm.AEAD_AES_256_CBC_HMAC_SHA_512_Deterministic,
key_id=Binary.from_uuid(self.DEK["_id"], STANDARD),
key_id=self.DEK["_id"],
)
self.assertEqual(bytes(ciphertext), base64.b64decode(expectation))
@ -1972,9 +1979,124 @@ class TestKmsTLSOptions(EncryptionIntegrationTest):
with self.assertRaisesRegex(EncryptionError, "expired|certificate verify failed"):
self.client_encryption_expired.create_data_key("kmip")
# Invalid cert hostname error.
with self.assertRaisesRegex(EncryptionError, "IP address mismatch|wronghost"):
with self.assertRaisesRegex(
EncryptionError, "IP address mismatch|wronghost|IPAddressMismatch"
):
self.client_encryption_invalid_hostname.create_data_key("kmip")
# https://github.com/mongodb/specifications/blob/d4c9432/source/client-side-encryption/tests/README.rst#explicit-encryption
class TestExplicitQueryableEncryption(EncryptionIntegrationTest):
@client_context.require_no_standalone
@client_context.require_version_min(6, 0, -1)
def setUp(self):
super().setUp()
self.encrypted_fields = json_data("etc", "data", "encryptedFields.json")
self.key1_document = json_data("etc", "data", "keys", "key1-document.json")
self.key1_id = self.key1_document["_id"]
self.db = self.client.test_queryable_encryption
self.client.drop_database(self.db)
self.db.command("create", self.encrypted_fields["escCollection"])
self.db.command("create", self.encrypted_fields["eccCollection"])
self.db.command("create", self.encrypted_fields["ecocCollection"])
self.db.command("create", "explicit_encryption", encryptedFields=self.encrypted_fields)
key_vault = create_key_vault(self.client.keyvault.datakeys, self.key1_document)
self.addCleanup(key_vault.drop)
self.key_vault_client = self.client
self.client_encryption = ClientEncryption(
{"local": {"key": LOCAL_MASTER_KEY}}, key_vault.full_name, self.key_vault_client, OPTS
)
self.addCleanup(self.client_encryption.close)
opts = AutoEncryptionOpts(
{"local": {"key": LOCAL_MASTER_KEY}},
key_vault.full_name,
bypass_query_analysis=True,
)
self.encrypted_client = rs_or_single_client(auto_encryption_opts=opts)
self.addCleanup(self.encrypted_client.close)
def test_01_insert_encrypted_indexed_and_find(self):
val = "encrypted indexed value"
insert_payload = self.client_encryption.encrypt(val, Algorithm.INDEXED, self.key1_id)
self.encrypted_client[self.db.name].explicit_encryption.insert_one(
{"encryptedIndexed": insert_payload}
)
find_payload = self.client_encryption.encrypt(
val, Algorithm.INDEXED, self.key1_id, query_type=QueryType.EQUALITY
)
docs = list(
self.encrypted_client[self.db.name].explicit_encryption.find(
{"encryptedIndexed": find_payload}
)
)
self.assertEqual(len(docs), 1)
self.assertEqual(docs[0]["encryptedIndexed"], val)
def test_02_insert_encrypted_indexed_and_find_contention(self):
val = "encrypted indexed value"
contention = 10
for _ in range(contention):
insert_payload = self.client_encryption.encrypt(
val, Algorithm.INDEXED, self.key1_id, contention_factor=contention
)
self.encrypted_client[self.db.name].explicit_encryption.insert_one(
{"encryptedIndexed": insert_payload}
)
# Find without contention_factor non-deterministically returns 0-9 documents.
find_payload = self.client_encryption.encrypt(
val, Algorithm.INDEXED, self.key1_id, query_type=QueryType.EQUALITY
)
docs = list(
self.encrypted_client[self.db.name].explicit_encryption.find(
{"encryptedIndexed": find_payload}
)
)
self.assertLessEqual(len(docs), 10)
for doc in docs:
self.assertEqual(doc["encryptedIndexed"], val)
# Find with contention_factor will return all 10 documents.
find_payload = self.client_encryption.encrypt(
val,
Algorithm.INDEXED,
self.key1_id,
query_type=QueryType.EQUALITY,
contention_factor=contention,
)
docs = list(
self.encrypted_client[self.db.name].explicit_encryption.find(
{"encryptedIndexed": find_payload}
)
)
self.assertEqual(len(docs), 10)
for doc in docs:
self.assertEqual(doc["encryptedIndexed"], val)
def test_03_insert_encrypted_unindexed(self):
val = "encrypted unindexed value"
insert_payload = self.client_encryption.encrypt(val, Algorithm.UNINDEXED, self.key1_id)
self.encrypted_client[self.db.name].explicit_encryption.insert_one(
{"_id": 1, "encryptedUnindexed": insert_payload}
)
docs = list(self.encrypted_client[self.db.name].explicit_encryption.find({"_id": 1}))
self.assertEqual(len(docs), 1)
self.assertEqual(docs[0]["encryptedUnindexed"], val)
def test_04_roundtrip_encrypted_indexed(self):
val = "encrypted indexed value"
payload = self.client_encryption.encrypt(val, Algorithm.INDEXED, self.key1_id)
decrypted = self.client_encryption.decrypt(payload)
self.assertEqual(decrypted, val)
def test_05_roundtrip_encrypted_unindexed(self):
val = "encrypted indexed value"
payload = self.client_encryption.encrypt(val, Algorithm.UNINDEXED, self.key1_id)
decrypted = self.client_encryption.decrypt(payload)
self.assertEqual(decrypted, val)
if __name__ == "__main__":
unittest.main()