From c673d8b3cea48f65615cf632fb287e1b9e57be72 Mon Sep 17 00:00:00 2001
From: Prashant Mital <5883388+prashantmital@users.noreply.github.com>
Date: Thu, 17 Dec 2020 13:58:03 -0800
Subject: [PATCH] PYTHON-2318 Atlas Data Lake testing (#500)

---
 .evergreen/config.yml                      | 37 ++++++++++++
 .evergreen/run-tests.sh                    | 23 ++++++--
 test/__init__.py                           | 18 +++++-
 test/crud_v2_format.py                     | 48 ++++++++++++++++
 test/data_lake/aggregate.json              | 53 ++++++++++++++++++
 test/data_lake/estimatedDocumentCount.json | 25 +++++++++
 test/data_lake/find.json                   | 65 ++++++++++++++++++++++
 test/data_lake/getMore.json                | 57 +++++++++++++++++++
 test/data_lake/listCollections.json        | 25 +++++++++
 test/data_lake/listDatabases.json          | 24 ++++++++
 test/data_lake/runCommand.json             | 31 +++++++++++
 test/test_crud_v2.py                       | 35 ++----------
 test/test_data_lake.py                     | 61 ++++++++++++++++++++
 test/utils_spec_runner.py                  |  2 +-
 14 files changed, 466 insertions(+), 38 deletions(-)
 create mode 100644 test/crud_v2_format.py
 create mode 100644 test/data_lake/aggregate.json
 create mode 100644 test/data_lake/estimatedDocumentCount.json
 create mode 100644 test/data_lake/find.json
 create mode 100644 test/data_lake/getMore.json
 create mode 100644 test/data_lake/listCollections.json
 create mode 100644 test/data_lake/listDatabases.json
 create mode 100644 test/data_lake/runCommand.json
 create mode 100644 test/test_data_lake.py

diff --git a/.evergreen/config.yml b/.evergreen/config.yml
index 8000bea13..c37309df7 100644
--- a/.evergreen/config.yml
+++ b/.evergreen/config.yml
@@ -301,6 +301,25 @@ functions:
           - key: MONGODB_STARTED
             value: "1"
 
+  "bootstrap data lake":
+    - command: shell.exec
+      type: setup
+      params:
+        script: |
+          set -o xtrace
+          ${PREPARE_SHELL}
+          cd ${DRIVERS_TOOLS}/.evergreen/atlas_data_lake
+          DRIVERS_TOOLS="${DRIVERS_TOOLS}" sh build-mongohouse-local.sh
+    - command: shell.exec
+      type: setup
+      params:
+        background: true
+        script: |
+          set -o xtrace
+          ${PREPARE_SHELL}
+          cd ${DRIVERS_TOOLS}/.evergreen/atlas_data_lake
+          DRIVERS_TOOLS="${DRIVERS_TOOLS}" sh run-mongohouse-local.sh
+
   "stop mongo-orchestration":
     - command: shell.exec
       params:
@@ -405,6 +424,7 @@ functions:
             COMPRESSORS=${COMPRESSORS} \
             AUTH=${AUTH} \
             SSL=${SSL} \
+            DATA_LAKE=${DATA_LAKE} \
             sh ${PROJECT_DIRECTORY}/.evergreen/run-tests.sh
 
   "run enterprise auth tests":
@@ -1157,6 +1177,13 @@ tasks:
       commands:
         - func: "run atlas tests"
 
+    - name: atlas-data-lake-tests
+      commands:
+        - func: "bootstrap data lake"
+        - func: "run tests"
+          vars:
+            DATA_LAKE: "true"
+
     - name: test-ocsp-rsa-valid-cert-server-staples
       tags: ["ocsp", "ocsp-rsa", "ocsp-staple"]
       commands:
@@ -2547,6 +2574,16 @@ buildvariants:
   tasks:
     - name: "atlas-connect"
 
+- matrix_name: "data-lake-spec-tests"
+  matrix_spec:
+    platform: ubuntu-16.04
+    python-version: ["2.7", "3.4", "3.8"]
+    auth: "auth"
+    c-extensions: "*"
+  display_name: "Atlas Data Lake ${python-version} ${c-extensions}"
+  tasks:
+    - name: atlas-data-lake-tests
+
 - matrix_name: "ocsp-test"
   matrix_spec:
     platform: ubuntu-16.04
diff --git a/.evergreen/run-tests.sh b/.evergreen/run-tests.sh
index 66b126a60..50357d49d 100755
--- a/.evergreen/run-tests.sh
+++ b/.evergreen/run-tests.sh
@@ -19,7 +19,6 @@ else
     set +x
 fi
 
-
 AUTH=${AUTH:-noauth}
 SSL=${SSL:-nossl}
 PYTHON_BINARY=${PYTHON_BINARY:-}
@@ -30,6 +29,7 @@ COMPRESSORS=${COMPRESSORS:-}
 TEST_ENCRYPTION=${TEST_ENCRYPTION:-}
 LIBMONGOCRYPT_URL=${LIBMONGOCRYPT_URL:-}
 SETDEFAULTENCODING=${SETDEFAULTENCODING:-}
+DATA_LAKE=${DATA_LAKE:-}
 
 if [ -n "$COMPRESSORS" ]; then
     export COMPRESSORS=$COMPRESSORS
@@ -38,8 +38,13 @@ fi
 export JAVA_HOME=/opt/java/jdk8
 
 if [ "$AUTH" != "noauth" ]; then
-    export DB_USER="bob"
-    export DB_PASSWORD="pwd123"
+    if [ -z "$DATA_LAKE" ]; then
+        export DB_USER="bob"
+        export DB_PASSWORD="pwd123"
+    else
+        export DB_USER="mhuser"
+        export DB_PASSWORD="pencil"
+    fi
 fi
 
 if [ "$SSL" != "nossl" ]; then
@@ -149,9 +154,15 @@ fi
 
 PYTHON_IMPL=$($PYTHON -c "import platform, sys; sys.stdout.write(platform.python_implementation())")
 if [ $PYTHON_IMPL = "Jython" ]; then
-    EXTRA_ARGS="-J-XX:-UseGCOverheadLimit -J-Xmx4096m"
+    PYTHON_ARGS="-J-XX:-UseGCOverheadLimit -J-Xmx4096m"
 else
-    EXTRA_ARGS=""
+    PYTHON_ARGS=""
+fi
+
+if [ -z "$DATA_LAKE" ]; then
+    TEST_ARGS=""
+else
+    TEST_ARGS="-s test.test_data_lake"
 fi
 
 # Don't download unittest-xml-reporting from pypi, which often fails.
@@ -200,7 +211,7 @@ if [ -z "$GREEN_FRAMEWORK" ]; then
         # causing this script to exit.
         $PYTHON -c "from bson import _cbson; from pymongo import _cmessage"
     fi
-    $COVERAGE_OR_PYTHON $EXTRA_ARGS $COVERAGE_ARGS setup.py $C_EXTENSIONS test $OUTPUT
+    $COVERAGE_OR_PYTHON $PYTHON_ARGS $COVERAGE_ARGS setup.py $C_EXTENSIONS test $TEST_ARGS $OUTPUT
 else
     # --no_ext has to come before "test" so there is no way to toggle extensions here.
     $PYTHON green_framework_test.py $GREEN_FRAMEWORK $OUTPUT
diff --git a/test/__init__.py b/test/__init__.py
index a517e1a27..18e88dc66 100644
--- a/test/__init__.py
+++ b/test/__init__.py
@@ -191,6 +191,7 @@ class ClientContext(object):
         self.sessions_enabled = False
         self.client = None
         self.conn_lock = threading.Lock()
+        self.is_data_lake = False
 
         if COMPRESSORS:
             self.default_client_options["compressors"] = COMPRESSORS
@@ -231,6 +232,19 @@ class ClientContext(object):
 
     def _init_client(self):
         self.client = self._connect(host, port)
+
+        if self.client is not None:
+            # Return early when connected to dataLake as mongohoused does not
+            # support the getCmdLineOpts command and is tested without TLS.
+            build_info = self.client.admin.command('buildInfo')
+            if 'dataLake' in build_info:
+                self.is_data_lake = True
+                self.auth_enabled = True
+                self.client = self._connect(
+                    host, port, username=db_user, password=db_pwd)
+                self.connected = True
+                return
+
         if HAVE_SSL and not self.client:
             # Is MongoDB configured for SSL?
             self.client = self._connect(host, port, **TLS_OPTIONS)
@@ -845,14 +859,14 @@ def teardown():
     if garbage:
         assert False, '\n'.join(garbage)
     c = client_context.client
-    if c:
+    if c and not client_context.is_data_lake:
         c.drop_database("pymongo-pooling-tests")
         c.drop_database("pymongo_test")
         c.drop_database("pymongo_test1")
         c.drop_database("pymongo_test2")
         c.drop_database("pymongo_test_mike")
         c.drop_database("pymongo_test_bernie")
-        c.close()
+    c.close()
 
     # Jython does not support gc.get_objects.
     if not sys.platform.startswith('java'):
diff --git a/test/crud_v2_format.py b/test/crud_v2_format.py
new file mode 100644
index 000000000..55dcaae5f
--- /dev/null
+++ b/test/crud_v2_format.py
@@ -0,0 +1,48 @@
+# Copyright 2020-present MongoDB, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""v2 format CRUD test runner.
+
+https://github.com/mongodb/specifications/blob/master/source/crud/tests/README.rst
+"""
+
+from test.utils_spec_runner import SpecRunner
+
+
+class TestCrudV2(SpecRunner):
+    # Default test database and collection names.
+    TEST_DB = None
+    TEST_COLLECTION = None
+
+    def get_scenario_db_name(self, scenario_def):
+        """Crud spec says database_name is optional."""
+        return scenario_def.get('database_name', self.TEST_DB)
+
+    def get_scenario_coll_name(self, scenario_def):
+        """Crud spec says collection_name is optional."""
+        return scenario_def.get('collection_name', self.TEST_COLLECTION)
+
+    def get_object_name(self, op):
+        """Crud spec says object is optional and defaults to 'collection'."""
+        return op.get('object', 'collection')
+
+    def get_outcome_coll_name(self, outcome, collection):
+        """Crud spec says outcome has an optional 'collection.name'."""
+        return outcome['collection'].get('name', collection.name)
+
+    def setup_scenario(self, scenario_def):
+        """Allow specs to override a test's setup."""
+        # PYTHON-1935 Only create the collection if there is data to insert.
+        if scenario_def['data']:
+            super(TestCrudV2, self).setup_scenario(scenario_def)
diff --git a/test/data_lake/aggregate.json b/test/data_lake/aggregate.json
new file mode 100644
index 000000000..99995bca4
--- /dev/null
+++ b/test/data_lake/aggregate.json
@@ -0,0 +1,53 @@
+{
+  "collection_name": "driverdata",
+  "database_name": "test",
+  "tests": [
+    {
+      "description": "Aggregate with pipeline (project, sort, limit)",
+      "operations": [
+        {
+          "object": "collection",
+          "name": "aggregate",
+          "arguments": {
+            "pipeline": [
+              {
+                "$project": {
+                  "_id": 0
+                }
+              },
+              {
+                "$sort": {
+                  "a": 1
+                }
+              },
+              {
+                "$limit": 2
+              }
+            ]
+          },
+          "result": [
+            {
+              "a": 1,
+              "b": 2,
+              "c": 3
+            },
+            {
+              "a": 2,
+              "b": 3,
+              "c": 4
+            }
+          ]
+        }
+      ],
+      "expectations": [
+        {
+          "command_started_event": {
+            "command": {
+              "aggregate": "driverdata"
+            }
+          }
+        }
+      ]
+    }
+  ]
+}
diff --git a/test/data_lake/estimatedDocumentCount.json b/test/data_lake/estimatedDocumentCount.json
new file mode 100644
index 000000000..d039a51f0
--- /dev/null
+++ b/test/data_lake/estimatedDocumentCount.json
@@ -0,0 +1,25 @@
+{
+  "collection_name": "driverdata",
+  "database_name": "test",
+  "tests": [
+    {
+      "description": "estimatedDocumentCount succeeds",
+      "operations": [
+        {
+          "object": "collection",
+          "name": "estimatedDocumentCount",
+          "result": 15
+        }
+      ],
+      "expectations": [
+        {
+          "command_started_event": {
+            "command": {
+              "count": "driverdata"
+            }
+          }
+        }
+      ]
+    }
+  ]
+}
diff --git a/test/data_lake/find.json b/test/data_lake/find.json
new file mode 100644
index 000000000..8a3468a13
--- /dev/null
+++ b/test/data_lake/find.json
@@ -0,0 +1,65 @@
+{
+  "collection_name": "driverdata",
+  "database_name": "test",
+  "tests": [
+    {
+      "description": "Find with projection and sort",
+      "operations": [
+        {
+          "object": "collection",
+          "name": "find",
+          "arguments": {
+            "filter": {
+              "b": {
+                "$gt": 5
+              }
+            },
+            "projection": {
+              "_id": 0
+            },
+            "sort": {
+              "a": 1
+            },
+            "limit": 5
+          },
+          "result": [
+            {
+              "a": 5,
+              "b": 6,
+              "c": 7
+            },
+            {
+              "a": 6,
+              "b": 7,
+              "c": 8
+            },
+            {
+              "a": 7,
+              "b": 8,
+              "c": 9
+            },
+            {
+              "a": 8,
+              "b": 9,
+              "c": 10
+            },
+            {
+              "a": 9,
+              "b": 10,
+              "c": 11
+            }
+          ]
+        }
+      ],
+      "expectations": [
+        {
+          "command_started_event": {
+            "command": {
+              "find": "driverdata"
+            }
+          }
+        }
+      ]
+    }
+  ]
+}
diff --git a/test/data_lake/getMore.json b/test/data_lake/getMore.json
new file mode 100644
index 000000000..fa1deab4f
--- /dev/null
+++ b/test/data_lake/getMore.json
@@ -0,0 +1,57 @@
+{
+  "collection_name": "driverdata",
+  "database_name": "test",
+  "tests": [
+    {
+      "description": "A successful find event with getMore",
+      "operations": [
+        {
+          "object": "collection",
+          "name": "find",
+          "arguments": {
+            "filter": {
+              "a": {
+                "$gte": 2
+              }
+            },
+            "sort": {
+              "a": 1
+            },
+            "batchSize": 3,
+            "limit": 4
+          }
+        }
+      ],
+      "expectations": [
+        {
+          "command_started_event": {
+            "command": {
+              "find": "driverdata",
+              "filter": {
+                "a": {
+                  "$gte": 2
+                }
+              },
+              "sort": {
+                "a": 1
+              },
+              "batchSize": 3,
+              "limit": 4
+            },
+            "command_name": "find",
+            "database_name": "test"
+          }
+        },
+        {
+          "command_started_event": {
+            "command": {
+              "batchSize": 1
+            },
+            "command_name": "getMore",
+            "database_name": "cursors"
+          }
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/test/data_lake/listCollections.json b/test/data_lake/listCollections.json
new file mode 100644
index 000000000..8d8a8f6c1
--- /dev/null
+++ b/test/data_lake/listCollections.json
@@ -0,0 +1,25 @@
+{
+  "database_name": "test",
+  "tests": [
+    {
+      "description": "ListCollections succeeds",
+      "operations": [
+        {
+          "name": "listCollections",
+          "object": "database"
+        }
+      ],
+      "expectations": [
+        {
+          "command_started_event": {
+            "command_name": "listCollections",
+            "database_name": "test",
+            "command": {
+              "listCollections": 1
+            }
+          }
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/test/data_lake/listDatabases.json b/test/data_lake/listDatabases.json
new file mode 100644
index 000000000..f8ec9a0bf
--- /dev/null
+++ b/test/data_lake/listDatabases.json
@@ -0,0 +1,24 @@
+{
+  "tests": [
+    {
+      "description": "ListDatabases succeeds",
+      "operations": [
+        {
+          "name": "listDatabases",
+          "object": "client"
+        }
+      ],
+      "expectations": [
+        {
+          "command_started_event": {
+            "command_name": "listDatabases",
+            "database_name": "admin",
+            "command": {
+              "listDatabases": 1
+            }
+          }
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/test/data_lake/runCommand.json b/test/data_lake/runCommand.json
new file mode 100644
index 000000000..f72e863ba
--- /dev/null
+++ b/test/data_lake/runCommand.json
@@ -0,0 +1,31 @@
+{
+  "database_name": "test",
+  "tests": [
+    {
+      "description": "ping succeeds using runCommand",
+      "operations": [
+        {
+          "name": "runCommand",
+          "object": "database",
+          "command_name": "ping",
+          "arguments": {
+            "command": {
+              "ping": 1
+            }
+          }
+        }
+      ],
+      "expectations": [
+        {
+          "command_started_event": {
+            "command_name": "ping",
+            "database_name": "test",
+            "command": {
+              "ping": 1
+            }
+          }
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/test/test_crud_v2.py b/test/test_crud_v2.py
index 562e119aa..6d9514f91 100644
--- a/test/test_crud_v2.py
+++ b/test/test_crud_v2.py
@@ -20,41 +20,19 @@ import sys
 sys.path[0:0] = [""]
 
 from test import unittest
+from test.crud_v2_format import TestCrudV2
 from test.utils import TestCreator
-from test.utils_spec_runner import SpecRunner
 
 
 # Location of JSON test specifications.
 _TEST_PATH = os.path.join(
     os.path.dirname(os.path.realpath(__file__)), 'crud', 'v2')
 
-# Default test database and collection names.
-TEST_DB = 'testdb'
-TEST_COLLECTION = 'testcollection'
 
-
-class TestSpec(SpecRunner):
-    def get_scenario_db_name(self, scenario_def):
-        """Crud spec says database_name is optional."""
-        return scenario_def.get('database_name', TEST_DB)
-
-    def get_scenario_coll_name(self, scenario_def):
-        """Crud spec says collection_name is optional."""
-        return scenario_def.get('collection_name', TEST_COLLECTION)
-
-    def get_object_name(self, op):
-        """Crud spec says object is optional and defaults to 'collection'."""
-        return op.get('object', 'collection')
-
-    def get_outcome_coll_name(self, outcome, collection):
-        """Crud spec says outcome has an optional 'collection.name'."""
-        return outcome['collection'].get('name', collection.name)
-
-    def setup_scenario(self, scenario_def):
-        """Allow specs to override a test's setup."""
-        # PYTHON-1935 Only create the collection if there is data to insert.
-        if scenario_def['data']:
-            super(TestSpec, self).setup_scenario(scenario_def)
+class TestSpec(TestCrudV2):
+    # Default test database and collection names.
+    TEST_DB = 'testdb'
+    TEST_COLLECTION = 'testcollection'
 
 
 def create_test(scenario_def, test, name):
@@ -64,8 +42,7 @@ def create_test(scenario_def, test, name):
     return run_scenario
 
 
-test_creator = TestCreator(create_test, TestSpec, _TEST_PATH)
-test_creator.create_tests()
+TestCreator(create_test, TestSpec, _TEST_PATH).create_tests()
 
 
 if __name__ == "__main__":
diff --git a/test/test_data_lake.py b/test/test_data_lake.py
new file mode 100644
index 000000000..4ce2cd508
--- /dev/null
+++ b/test/test_data_lake.py
@@ -0,0 +1,61 @@
+# Copyright 2020-present MongoDB, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Test Atlas Data Lake."""
+
+import os
+import sys
+
+sys.path[0:0] = [""]
+
+from test import client_context, unittest
+from test.crud_v2_format import TestCrudV2
+from test.utils import TestCreator
+
+
+# Location of JSON test specifications.
+_TEST_PATH = os.path.join(
+    os.path.dirname(os.path.realpath(__file__)), "data_lake")
+
+
+class DataLakeTestSpec(TestCrudV2):
+    # Default test database and collection names.
+    TEST_DB = 'test'
+    TEST_COLLECTION = 'driverdata'
+
+    @classmethod
+    def setUpClass(cls):
+        super(DataLakeTestSpec, cls).setUpClass()
+        # Skip these tests unless connected to data lake.
+        if not client_context.is_data_lake:
+            raise unittest.SkipTest('Not connected to Atlas Data Lake')
+
+    def setup_scenario(self, scenario_def):
+        # Spec tests MUST NOT insert data/drop collection for
+        # data lake testing.
+        pass
+
+
+def create_test(scenario_def, test, name):
+    def run_scenario(self):
+        self.run_scenario(scenario_def, test)
+
+    return run_scenario
+
+
+TestCreator(create_test, DataLakeTestSpec, _TEST_PATH).create_tests()
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/test/utils_spec_runner.py b/test/utils_spec_runner.py
index a15537c71..09798fb80 100644
--- a/test/utils_spec_runner.py
+++ b/test/utils_spec_runner.py
@@ -525,7 +525,7 @@ class SpecRunner(IntegrationTest):
 
     def maybe_skip_scenario(self, test):
         if test.get('skipReason'):
-            raise unittest.SkipTest(test.get('skipReason'))
+            self.skipTest(test.get('skipReason'))
 
     def get_scenario_db_name(self, scenario_def):
         """Allow subclasses to override a test's database name."""