From c673d8b3cea48f65615cf632fb287e1b9e57be72 Mon Sep 17 00:00:00 2001 From: Prashant Mital <5883388+prashantmital@users.noreply.github.com> Date: Thu, 17 Dec 2020 13:58:03 -0800 Subject: [PATCH] PYTHON-2318 Atlas Data Lake testing (#500) --- .evergreen/config.yml | 37 ++++++++++++ .evergreen/run-tests.sh | 23 ++++++-- test/__init__.py | 18 +++++- test/crud_v2_format.py | 48 ++++++++++++++++ test/data_lake/aggregate.json | 53 ++++++++++++++++++ test/data_lake/estimatedDocumentCount.json | 25 +++++++++ test/data_lake/find.json | 65 ++++++++++++++++++++++ test/data_lake/getMore.json | 57 +++++++++++++++++++ test/data_lake/listCollections.json | 25 +++++++++ test/data_lake/listDatabases.json | 24 ++++++++ test/data_lake/runCommand.json | 31 +++++++++++ test/test_crud_v2.py | 35 ++---------- test/test_data_lake.py | 61 ++++++++++++++++++++ test/utils_spec_runner.py | 2 +- 14 files changed, 466 insertions(+), 38 deletions(-) create mode 100644 test/crud_v2_format.py create mode 100644 test/data_lake/aggregate.json create mode 100644 test/data_lake/estimatedDocumentCount.json create mode 100644 test/data_lake/find.json create mode 100644 test/data_lake/getMore.json create mode 100644 test/data_lake/listCollections.json create mode 100644 test/data_lake/listDatabases.json create mode 100644 test/data_lake/runCommand.json create mode 100644 test/test_data_lake.py diff --git a/.evergreen/config.yml b/.evergreen/config.yml index 8000bea13..c37309df7 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -301,6 +301,25 @@ functions: - key: MONGODB_STARTED value: "1" + "bootstrap data lake": + - command: shell.exec + type: setup + params: + script: | + set -o xtrace + ${PREPARE_SHELL} + cd ${DRIVERS_TOOLS}/.evergreen/atlas_data_lake + DRIVERS_TOOLS="${DRIVERS_TOOLS}" sh build-mongohouse-local.sh + - command: shell.exec + type: setup + params: + background: true + script: | + set -o xtrace + ${PREPARE_SHELL} + cd ${DRIVERS_TOOLS}/.evergreen/atlas_data_lake + DRIVERS_TOOLS="${DRIVERS_TOOLS}" sh run-mongohouse-local.sh + "stop mongo-orchestration": - command: shell.exec params: @@ -405,6 +424,7 @@ functions: COMPRESSORS=${COMPRESSORS} \ AUTH=${AUTH} \ SSL=${SSL} \ + DATA_LAKE=${DATA_LAKE} \ sh ${PROJECT_DIRECTORY}/.evergreen/run-tests.sh "run enterprise auth tests": @@ -1157,6 +1177,13 @@ tasks: commands: - func: "run atlas tests" + - name: atlas-data-lake-tests + commands: + - func: "bootstrap data lake" + - func: "run tests" + vars: + DATA_LAKE: "true" + - name: test-ocsp-rsa-valid-cert-server-staples tags: ["ocsp", "ocsp-rsa", "ocsp-staple"] commands: @@ -2547,6 +2574,16 @@ buildvariants: tasks: - name: "atlas-connect" +- matrix_name: "data-lake-spec-tests" + matrix_spec: + platform: ubuntu-16.04 + python-version: ["2.7", "3.4", "3.8"] + auth: "auth" + c-extensions: "*" + display_name: "Atlas Data Lake ${python-version} ${c-extensions}" + tasks: + - name: atlas-data-lake-tests + - matrix_name: "ocsp-test" matrix_spec: platform: ubuntu-16.04 diff --git a/.evergreen/run-tests.sh b/.evergreen/run-tests.sh index 66b126a60..50357d49d 100755 --- a/.evergreen/run-tests.sh +++ b/.evergreen/run-tests.sh @@ -19,7 +19,6 @@ else set +x fi - AUTH=${AUTH:-noauth} SSL=${SSL:-nossl} PYTHON_BINARY=${PYTHON_BINARY:-} @@ -30,6 +29,7 @@ COMPRESSORS=${COMPRESSORS:-} TEST_ENCRYPTION=${TEST_ENCRYPTION:-} LIBMONGOCRYPT_URL=${LIBMONGOCRYPT_URL:-} SETDEFAULTENCODING=${SETDEFAULTENCODING:-} +DATA_LAKE=${DATA_LAKE:-} if [ -n "$COMPRESSORS" ]; then export COMPRESSORS=$COMPRESSORS @@ -38,8 +38,13 @@ fi export JAVA_HOME=/opt/java/jdk8 if [ "$AUTH" != "noauth" ]; then - export DB_USER="bob" - export DB_PASSWORD="pwd123" + if [ -z "$DATA_LAKE" ]; then + export DB_USER="bob" + export DB_PASSWORD="pwd123" + else + export DB_USER="mhuser" + export DB_PASSWORD="pencil" + fi fi if [ "$SSL" != "nossl" ]; then @@ -149,9 +154,15 @@ fi PYTHON_IMPL=$($PYTHON -c "import platform, sys; sys.stdout.write(platform.python_implementation())") if [ $PYTHON_IMPL = "Jython" ]; then - EXTRA_ARGS="-J-XX:-UseGCOverheadLimit -J-Xmx4096m" + PYTHON_ARGS="-J-XX:-UseGCOverheadLimit -J-Xmx4096m" else - EXTRA_ARGS="" + PYTHON_ARGS="" +fi + +if [ -z "$DATA_LAKE" ]; then + TEST_ARGS="" +else + TEST_ARGS="-s test.test_data_lake" fi # Don't download unittest-xml-reporting from pypi, which often fails. @@ -200,7 +211,7 @@ if [ -z "$GREEN_FRAMEWORK" ]; then # causing this script to exit. $PYTHON -c "from bson import _cbson; from pymongo import _cmessage" fi - $COVERAGE_OR_PYTHON $EXTRA_ARGS $COVERAGE_ARGS setup.py $C_EXTENSIONS test $OUTPUT + $COVERAGE_OR_PYTHON $PYTHON_ARGS $COVERAGE_ARGS setup.py $C_EXTENSIONS test $TEST_ARGS $OUTPUT else # --no_ext has to come before "test" so there is no way to toggle extensions here. $PYTHON green_framework_test.py $GREEN_FRAMEWORK $OUTPUT diff --git a/test/__init__.py b/test/__init__.py index a517e1a27..18e88dc66 100644 --- a/test/__init__.py +++ b/test/__init__.py @@ -191,6 +191,7 @@ class ClientContext(object): self.sessions_enabled = False self.client = None self.conn_lock = threading.Lock() + self.is_data_lake = False if COMPRESSORS: self.default_client_options["compressors"] = COMPRESSORS @@ -231,6 +232,19 @@ class ClientContext(object): def _init_client(self): self.client = self._connect(host, port) + + if self.client is not None: + # Return early when connected to dataLake as mongohoused does not + # support the getCmdLineOpts command and is tested without TLS. + build_info = self.client.admin.command('buildInfo') + if 'dataLake' in build_info: + self.is_data_lake = True + self.auth_enabled = True + self.client = self._connect( + host, port, username=db_user, password=db_pwd) + self.connected = True + return + if HAVE_SSL and not self.client: # Is MongoDB configured for SSL? self.client = self._connect(host, port, **TLS_OPTIONS) @@ -845,14 +859,14 @@ def teardown(): if garbage: assert False, '\n'.join(garbage) c = client_context.client - if c: + if c and not client_context.is_data_lake: c.drop_database("pymongo-pooling-tests") c.drop_database("pymongo_test") c.drop_database("pymongo_test1") c.drop_database("pymongo_test2") c.drop_database("pymongo_test_mike") c.drop_database("pymongo_test_bernie") - c.close() + c.close() # Jython does not support gc.get_objects. if not sys.platform.startswith('java'): diff --git a/test/crud_v2_format.py b/test/crud_v2_format.py new file mode 100644 index 000000000..55dcaae5f --- /dev/null +++ b/test/crud_v2_format.py @@ -0,0 +1,48 @@ +# Copyright 2020-present MongoDB, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""v2 format CRUD test runner. + +https://github.com/mongodb/specifications/blob/master/source/crud/tests/README.rst +""" + +from test.utils_spec_runner import SpecRunner + + +class TestCrudV2(SpecRunner): + # Default test database and collection names. + TEST_DB = None + TEST_COLLECTION = None + + def get_scenario_db_name(self, scenario_def): + """Crud spec says database_name is optional.""" + return scenario_def.get('database_name', self.TEST_DB) + + def get_scenario_coll_name(self, scenario_def): + """Crud spec says collection_name is optional.""" + return scenario_def.get('collection_name', self.TEST_COLLECTION) + + def get_object_name(self, op): + """Crud spec says object is optional and defaults to 'collection'.""" + return op.get('object', 'collection') + + def get_outcome_coll_name(self, outcome, collection): + """Crud spec says outcome has an optional 'collection.name'.""" + return outcome['collection'].get('name', collection.name) + + def setup_scenario(self, scenario_def): + """Allow specs to override a test's setup.""" + # PYTHON-1935 Only create the collection if there is data to insert. + if scenario_def['data']: + super(TestCrudV2, self).setup_scenario(scenario_def) diff --git a/test/data_lake/aggregate.json b/test/data_lake/aggregate.json new file mode 100644 index 000000000..99995bca4 --- /dev/null +++ b/test/data_lake/aggregate.json @@ -0,0 +1,53 @@ +{ + "collection_name": "driverdata", + "database_name": "test", + "tests": [ + { + "description": "Aggregate with pipeline (project, sort, limit)", + "operations": [ + { + "object": "collection", + "name": "aggregate", + "arguments": { + "pipeline": [ + { + "$project": { + "_id": 0 + } + }, + { + "$sort": { + "a": 1 + } + }, + { + "$limit": 2 + } + ] + }, + "result": [ + { + "a": 1, + "b": 2, + "c": 3 + }, + { + "a": 2, + "b": 3, + "c": 4 + } + ] + } + ], + "expectations": [ + { + "command_started_event": { + "command": { + "aggregate": "driverdata" + } + } + } + ] + } + ] +} diff --git a/test/data_lake/estimatedDocumentCount.json b/test/data_lake/estimatedDocumentCount.json new file mode 100644 index 000000000..d039a51f0 --- /dev/null +++ b/test/data_lake/estimatedDocumentCount.json @@ -0,0 +1,25 @@ +{ + "collection_name": "driverdata", + "database_name": "test", + "tests": [ + { + "description": "estimatedDocumentCount succeeds", + "operations": [ + { + "object": "collection", + "name": "estimatedDocumentCount", + "result": 15 + } + ], + "expectations": [ + { + "command_started_event": { + "command": { + "count": "driverdata" + } + } + } + ] + } + ] +} diff --git a/test/data_lake/find.json b/test/data_lake/find.json new file mode 100644 index 000000000..8a3468a13 --- /dev/null +++ b/test/data_lake/find.json @@ -0,0 +1,65 @@ +{ + "collection_name": "driverdata", + "database_name": "test", + "tests": [ + { + "description": "Find with projection and sort", + "operations": [ + { + "object": "collection", + "name": "find", + "arguments": { + "filter": { + "b": { + "$gt": 5 + } + }, + "projection": { + "_id": 0 + }, + "sort": { + "a": 1 + }, + "limit": 5 + }, + "result": [ + { + "a": 5, + "b": 6, + "c": 7 + }, + { + "a": 6, + "b": 7, + "c": 8 + }, + { + "a": 7, + "b": 8, + "c": 9 + }, + { + "a": 8, + "b": 9, + "c": 10 + }, + { + "a": 9, + "b": 10, + "c": 11 + } + ] + } + ], + "expectations": [ + { + "command_started_event": { + "command": { + "find": "driverdata" + } + } + } + ] + } + ] +} diff --git a/test/data_lake/getMore.json b/test/data_lake/getMore.json new file mode 100644 index 000000000..fa1deab4f --- /dev/null +++ b/test/data_lake/getMore.json @@ -0,0 +1,57 @@ +{ + "collection_name": "driverdata", + "database_name": "test", + "tests": [ + { + "description": "A successful find event with getMore", + "operations": [ + { + "object": "collection", + "name": "find", + "arguments": { + "filter": { + "a": { + "$gte": 2 + } + }, + "sort": { + "a": 1 + }, + "batchSize": 3, + "limit": 4 + } + } + ], + "expectations": [ + { + "command_started_event": { + "command": { + "find": "driverdata", + "filter": { + "a": { + "$gte": 2 + } + }, + "sort": { + "a": 1 + }, + "batchSize": 3, + "limit": 4 + }, + "command_name": "find", + "database_name": "test" + } + }, + { + "command_started_event": { + "command": { + "batchSize": 1 + }, + "command_name": "getMore", + "database_name": "cursors" + } + } + ] + } + ] +} \ No newline at end of file diff --git a/test/data_lake/listCollections.json b/test/data_lake/listCollections.json new file mode 100644 index 000000000..8d8a8f6c1 --- /dev/null +++ b/test/data_lake/listCollections.json @@ -0,0 +1,25 @@ +{ + "database_name": "test", + "tests": [ + { + "description": "ListCollections succeeds", + "operations": [ + { + "name": "listCollections", + "object": "database" + } + ], + "expectations": [ + { + "command_started_event": { + "command_name": "listCollections", + "database_name": "test", + "command": { + "listCollections": 1 + } + } + } + ] + } + ] +} \ No newline at end of file diff --git a/test/data_lake/listDatabases.json b/test/data_lake/listDatabases.json new file mode 100644 index 000000000..f8ec9a0bf --- /dev/null +++ b/test/data_lake/listDatabases.json @@ -0,0 +1,24 @@ +{ + "tests": [ + { + "description": "ListDatabases succeeds", + "operations": [ + { + "name": "listDatabases", + "object": "client" + } + ], + "expectations": [ + { + "command_started_event": { + "command_name": "listDatabases", + "database_name": "admin", + "command": { + "listDatabases": 1 + } + } + } + ] + } + ] +} \ No newline at end of file diff --git a/test/data_lake/runCommand.json b/test/data_lake/runCommand.json new file mode 100644 index 000000000..f72e863ba --- /dev/null +++ b/test/data_lake/runCommand.json @@ -0,0 +1,31 @@ +{ + "database_name": "test", + "tests": [ + { + "description": "ping succeeds using runCommand", + "operations": [ + { + "name": "runCommand", + "object": "database", + "command_name": "ping", + "arguments": { + "command": { + "ping": 1 + } + } + } + ], + "expectations": [ + { + "command_started_event": { + "command_name": "ping", + "database_name": "test", + "command": { + "ping": 1 + } + } + } + ] + } + ] +} \ No newline at end of file diff --git a/test/test_crud_v2.py b/test/test_crud_v2.py index 562e119aa..6d9514f91 100644 --- a/test/test_crud_v2.py +++ b/test/test_crud_v2.py @@ -20,41 +20,19 @@ import sys sys.path[0:0] = [""] from test import unittest +from test.crud_v2_format import TestCrudV2 from test.utils import TestCreator -from test.utils_spec_runner import SpecRunner # Location of JSON test specifications. _TEST_PATH = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'crud', 'v2') -# Default test database and collection names. -TEST_DB = 'testdb' -TEST_COLLECTION = 'testcollection' - -class TestSpec(SpecRunner): - def get_scenario_db_name(self, scenario_def): - """Crud spec says database_name is optional.""" - return scenario_def.get('database_name', TEST_DB) - - def get_scenario_coll_name(self, scenario_def): - """Crud spec says collection_name is optional.""" - return scenario_def.get('collection_name', TEST_COLLECTION) - - def get_object_name(self, op): - """Crud spec says object is optional and defaults to 'collection'.""" - return op.get('object', 'collection') - - def get_outcome_coll_name(self, outcome, collection): - """Crud spec says outcome has an optional 'collection.name'.""" - return outcome['collection'].get('name', collection.name) - - def setup_scenario(self, scenario_def): - """Allow specs to override a test's setup.""" - # PYTHON-1935 Only create the collection if there is data to insert. - if scenario_def['data']: - super(TestSpec, self).setup_scenario(scenario_def) +class TestSpec(TestCrudV2): + # Default test database and collection names. + TEST_DB = 'testdb' + TEST_COLLECTION = 'testcollection' def create_test(scenario_def, test, name): @@ -64,8 +42,7 @@ def create_test(scenario_def, test, name): return run_scenario -test_creator = TestCreator(create_test, TestSpec, _TEST_PATH) -test_creator.create_tests() +TestCreator(create_test, TestSpec, _TEST_PATH).create_tests() if __name__ == "__main__": diff --git a/test/test_data_lake.py b/test/test_data_lake.py new file mode 100644 index 000000000..4ce2cd508 --- /dev/null +++ b/test/test_data_lake.py @@ -0,0 +1,61 @@ +# Copyright 2020-present MongoDB, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Test Atlas Data Lake.""" + +import os +import sys + +sys.path[0:0] = [""] + +from test import client_context, unittest +from test.crud_v2_format import TestCrudV2 +from test.utils import TestCreator + + +# Location of JSON test specifications. +_TEST_PATH = os.path.join( + os.path.dirname(os.path.realpath(__file__)), "data_lake") + + +class DataLakeTestSpec(TestCrudV2): + # Default test database and collection names. + TEST_DB = 'test' + TEST_COLLECTION = 'driverdata' + + @classmethod + def setUpClass(cls): + super(DataLakeTestSpec, cls).setUpClass() + # Skip these tests unless connected to data lake. + if not client_context.is_data_lake: + raise unittest.SkipTest('Not connected to Atlas Data Lake') + + def setup_scenario(self, scenario_def): + # Spec tests MUST NOT insert data/drop collection for + # data lake testing. + pass + + +def create_test(scenario_def, test, name): + def run_scenario(self): + self.run_scenario(scenario_def, test) + + return run_scenario + + +TestCreator(create_test, DataLakeTestSpec, _TEST_PATH).create_tests() + + +if __name__ == "__main__": + unittest.main() diff --git a/test/utils_spec_runner.py b/test/utils_spec_runner.py index a15537c71..09798fb80 100644 --- a/test/utils_spec_runner.py +++ b/test/utils_spec_runner.py @@ -525,7 +525,7 @@ class SpecRunner(IntegrationTest): def maybe_skip_scenario(self, test): if test.get('skipReason'): - raise unittest.SkipTest(test.get('skipReason')) + self.skipTest(test.get('skipReason')) def get_scenario_db_name(self, scenario_def): """Allow subclasses to override a test's database name."""