diff --git a/.evergreen/config.yml b/.evergreen/config.yml index 2e0a19d6f..19b4ab370 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -15,7 +15,7 @@ command_type: system # Protect ourself against rogue test case, or curl gone wild, that runs forever # Good rule of thumb: the averageish length a task takes, times 5 # That roughly accounts for variable system performance for various buildvariants -exec_timeout_secs: 1800 # 6 minutes is the longest we'll ever run +exec_timeout_secs: 1800 # 30 minutes is the longest we'll ever run # What to do when evergreen hits the timeout (`post:` tasks are run automatically) timeout: diff --git a/.evergreen/perf.yml b/.evergreen/perf.yml new file mode 100644 index 000000000..5ec500170 --- /dev/null +++ b/.evergreen/perf.yml @@ -0,0 +1,234 @@ +######################################## +# Evergreen Template for MongoDB Drivers +######################################## + +# When a task that used to pass starts to fail +# Go through all versions that may have been skipped to detect +# when the task started failing +stepback: true + +# Mark a failure as a system/bootstrap failure (purple box) rather then a task +# failure by default. +# Actual testing tasks are marked with `type: test` +command_type: system + +# Protect ourself against rogue test case, or curl gone wild, that runs forever +# Good rule of thumb: the averageish length a task takes, times 5 +# That roughly accounts for variable system performance for various buildvariants +exec_timeout_secs: 3600 # 60 minutes is the longest we'll ever run + +# What to do when evergreen hits the timeout (`post:` tasks are run automatically) +timeout: + - command: shell.exec + params: + script: | + ls -la + +functions: + "fetch source": + # Executes git clone and applies the submitted patch, if any + - command: git.get_project + params: + directory: "src" + # Applies the subitted patch, if any + # Deprecated. Should be removed. But still needed for certain agents (ZAP) + - command: git.apply_patch + # Make an evergreen exapanstion file with dynamic values + - command: shell.exec + params: + working_dir: "src" + script: | + # Get the current unique version of this checkout + if [ "${is_patch}" = "true" ]; then + CURRENT_VERSION=$(git describe)-patch-${version_id} + else + CURRENT_VERSION=latest + fi + + export DRIVERS_TOOLS="$(pwd)/../drivers-tools" + export PROJECT_DIRECTORY="$(pwd)" + + # Python has cygwin path problems on Windows. Detect prospective mongo-orchestration home directory + if [ "Windows_NT" = "$OS" ]; then # Magic variable in cygwin + export DRIVERS_TOOLS=$(cygpath -m $DRIVERS_TOOLS) + export PROJECT_DIRECTORY=$(cygpath -m $PROJECT_DIRECTORY) + fi + + export MONGO_ORCHESTRATION_HOME="$DRIVERS_TOOLS/.evergreen/orchestration" + export MONGODB_BINARIES="$DRIVERS_TOOLS/mongodb/bin" + export UPLOAD_BUCKET="${project}" + + cat < expansion.yml + CURRENT_VERSION: "$CURRENT_VERSION" + DRIVERS_TOOLS: "$DRIVERS_TOOLS" + MONGO_ORCHESTRATION_HOME: "$MONGO_ORCHESTRATION_HOME" + MONGODB_BINARIES: "$MONGODB_BINARIES" + UPLOAD_BUCKET: "$UPLOAD_BUCKET" + PROJECT_DIRECTORY: "$PROJECT_DIRECTORY" + PREPARE_SHELL: | + set -o errexit + set -o xtrace + export DRIVERS_TOOLS="$DRIVERS_TOOLS" + export MONGO_ORCHESTRATION_HOME="$MONGO_ORCHESTRATION_HOME" + export MONGODB_BINARIES="$MONGODB_BINARIES" + export UPLOAD_BUCKET="$UPLOAD_BUCKET" + export PROJECT_DIRECTORY="$PROJECT_DIRECTORY" + + export TMPDIR="$MONGO_ORCHESTRATION_HOME/db" + export PATH="$MONGODB_BINARIES:$PATH" + export PROJECT="${project}" + EOT + # See what we've done + cat expansion.yml + + # Load the expansion file to make an evergreen variable with the current unique version + - command: expansions.update + params: + file: src/expansion.yml + + "prepare resources": + - command: shell.exec + params: + script: | + ${PREPARE_SHELL} + rm -rf $DRIVERS_TOOLS + if [ "${project}" = "drivers-tools" ]; then + # If this was a patch build, doing a fresh clone would not actually test the patch + cp -R ${PROJECT_DIRECTORY}/ $DRIVERS_TOOLS + else + git clone git://github.com/mongodb-labs/drivers-evergreen-tools.git $DRIVERS_TOOLS + fi + echo "{ \"releases\": { \"default\": \"$MONGODB_BINARIES\" }}" > $MONGO_ORCHESTRATION_HOME/orchestration.config + + "bootstrap mongo-orchestration": + - command: shell.exec + params: + script: | + ${PREPARE_SHELL} + MONGODB_VERSION=${VERSION} TOPOLOGY=${TOPOLOGY} AUTH=${AUTH} SSL=${SSL} STORAGE_ENGINE=${STORAGE_ENGINE} sh ${DRIVERS_TOOLS}/.evergreen/run-orchestration.sh + # run-orchestration generates expansion file with the MONGODB_URI for the cluster + - command: expansions.update + params: + file: mo-expansion.yml + + "stop mongo-orchestration": + - command: shell.exec + params: + script: | + ${PREPARE_SHELL} + sh ${DRIVERS_TOOLS}/.evergreen/stop-orchestration.sh + + "run perf tests": + - command: shell.exec + type: test + params: + working_dir: "src" + script: | + ${PREPARE_SHELL} + PROJECT_DIRECTORY=${PROJECT_DIRECTORY} sh ${PROJECT_DIRECTORY}/.evergreen/run-perf-tests.sh + + "attach benchmark test results": + - command: attach.results + params: + file_location: src/report.json + + "send dashboard data": + - command: json.send + params: + name: perf + file: src/results.json + + "cleanup": + - command: shell.exec + params: + script: | + ${PREPARE_SHELL} + rm -rf $DRIVERS_TOOLS || true + + "fix absolute paths": + - command: shell.exec + params: + script: | + ${PREPARE_SHELL} + for filename in $(find ${DRIVERS_TOOLS} -name \*.json); do + perl -p -i -e "s|ABSOLUTE_PATH_REPLACEMENT_TOKEN|${DRIVERS_TOOLS}|g" $filename + done + + "windows fix": + - command: shell.exec + params: + script: | + ${PREPARE_SHELL} + for i in $(find ${DRIVERS_TOOLS}/.evergreen ${PROJECT_DIRECTORY}/.evergreen -name \*.sh); do + cat $i | tr -d '\r' > $i.new + mv $i.new $i + done + # Copy client certificate because symlinks do not work on Windows. + cp ${DRIVERS_TOOLS}/.evergreen/x509gen/client.pem ${MONGO_ORCHESTRATION_HOME}/lib/client.pem + + "make files executable": + - command: shell.exec + params: + script: | + ${PREPARE_SHELL} + for i in $(find ${DRIVERS_TOOLS}/.evergreen ${PROJECT_DIRECTORY}/.evergreen -name \*.sh); do + chmod +x $i + done + + "install dependencies": + - command: shell.exec + params: + working_dir: "src" + script: | + ${PREPARE_SHELL} + file="${PROJECT_DIRECTORY}/.evergreen/install-dependencies.sh" + # Don't use ${file} syntax here because evergreen treats it as an empty expansion. + [ -f "$file" ] && sh $file || echo "$file not available, skipping" + +pre: + - func: "fetch source" + - func: "prepare resources" + # We don't run perf on Windows (yet) + #- func: "windows fix" + - func: "fix absolute paths" + - func: "make files executable" + # We're not testing with TLS (yet) + #- func: "install dependencies" + +post: + - func: "stop mongo-orchestration" + - func: "cleanup" + +tasks: + - name: "perf-3.0-standalone" + tags: ["perf"] + commands: + - func: "bootstrap mongo-orchestration" + vars: + VERSION: "3.0" + TOPOLOGY: "server" + - func: "run perf tests" + - func: "attach benchmark test results" + - func: "send dashboard data" + + - name: "perf-3.6-standalone" + tags: ["perf"] + commands: + - func: "bootstrap mongo-orchestration" + vars: + VERSION: "3.6" + TOPOLOGY: "server" + - func: "run perf tests" + - func: "attach benchmark test results" + - func: "send dashboard data" + +buildvariants: + +- name: "perf-tests" + display_name: "Performance Benchmark Tests" + batchtime: 10080 # 7 days + run_on: centos6-perf + tasks: + - name: "perf-3.0-standalone" + - name: "perf-3.6-standalone" + diff --git a/.evergreen/run-perf-tests.sh b/.evergreen/run-perf-tests.sh new file mode 100644 index 000000000..bbebf34c9 --- /dev/null +++ b/.evergreen/run-perf-tests.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +set -o xtrace +set -o errexit + +git clone https://github.com/mongodb-labs/driver-performance-test-data.git +cd driver-performance-test-data +tar xf extended_bson.tgz +tar xf parallel.tgz +tar xf single_and_multi_document.tgz +cd .. + +export TEST_PATH="${PROJECT_DIRECTORY}/driver-performance-test-data" +export OUTPUT_FILE="${PROJECT_DIRECTORY}/results.json" + +MTCBIN=/opt/mongodbtoolchain/v2/bin +VIRTUALENV="$MTCBIN/virtualenv -p $MTCBIN/python2.7" + +$VIRTUALENV pyperftest +. pyperftest/bin/activate +pip install simplejson + +python setup.py build_ext -i +start_time=$(date +%s) +python test/performance/perf_test.py +end_time=$(date +%s) +elapsed_secs=$((end_time-start_time)) + +cat results.json + +echo "{\"failures\": 0, \"results\": [{\"status\": \"pass\", \"exit_code\": 0, \"test_file\": \"BenchMarkTests\", \"start\": $start_time, \"end\": $end_time, \"elapsed\": $elapsed_secs}]}" > report.json + +cat report.json diff --git a/test/performance/perf_test.py b/test/performance/perf_test.py index d86185b78..cf98364e1 100644 --- a/test/performance/perf_test.py +++ b/test/performance/perf_test.py @@ -14,32 +14,47 @@ """Tests for the MongoDB Driver Performance Benchmarking Spec.""" -import json import multiprocessing as mp import os -import tempfile -import shutil import sys +import tempfile import warnings +try: + import simplejson as json +except ImportError: + import json + sys.path[0:0] = [""] -from bson import BSON, CodecOptions +from bson import BSON from bson.json_util import loads -from bson.objectid import ObjectId from gridfs import GridFSBucket from pymongo import MongoClient from pymongo.monotonic import time -from pymongo.operations import InsertOne from test import client_context, host, port, unittest NUM_ITERATIONS = 100 MAX_ITERATION_TIME = 300 NUM_DOCS = 10000 -TEST_PATH = os.path.join( +TEST_PATH = os.environ.get('TEST_PATH', os.path.join( os.path.dirname(os.path.realpath(__file__)), - os.path.join('performance_testdata')) + os.path.join('data'))) + +OUTPUT_FILE = os.environ.get('OUTPUT_FILE') + +result_data = [] + +def tearDownModule(): + output = json.dumps({ + 'results': result_data + }, indent=4) + if OUTPUT_FILE: + with open(OUTPUT_FILE, 'w') as opf: + opf.write(output) + else: + print(output) class Timer(object): @@ -57,8 +72,19 @@ class PerformanceTest(object): pass def tearDown(self): + name = self.__class__.__name__ + median = self.percentile(50) + result = self.data_size / median print('Running %s. MEDIAN=%s' % (self.__class__.__name__, self.percentile(50))) + result_data.append({ + 'name': name, + 'results': { + '1': { + 'ops_per_sec': result + } + } + }) def before(self): pass @@ -114,56 +140,63 @@ class BsonDecodingTest(PerformanceTest): self.document = BSON.encode(json.loads(data.read())) def do_task(self): - codec_options = CodecOptions(tz_aware=True) for _ in range(NUM_DOCS): - self.document.decode(codec_options=codec_options) + self.document.decode() class TestFlatEncoding(BsonEncodingTest, unittest.TestCase): dataset = 'flat_bson.json' + data_size = 75310000 class TestFlatDecoding(BsonDecodingTest, unittest.TestCase): dataset = 'flat_bson.json' + data_size = 75310000 class TestDeepEncoding(BsonEncodingTest, unittest.TestCase): dataset = 'deep_bson.json' + data_size = 19640000 class TestDeepDecoding(BsonDecodingTest, unittest.TestCase): dataset = 'deep_bson.json' + data_size = 19640000 class TestFullEncoding(BsonEncodingTest, unittest.TestCase): dataset = 'full_bson.json' + data_size = 57340000 class TestFullDecoding(BsonDecodingTest, unittest.TestCase): dataset = 'full_bson.json' + data_size = 57340000 # SINGLE-DOC BENCHMARKS class TestRunCommand(PerformanceTest, unittest.TestCase): + data_size = 160000 def setUp(self): - self.client = client_context.rs_or_standalone_client + self.client = client_context.client self.client.drop_database('perftest') def do_task(self): - isMaster = {'isMaster': True} + command = self.client.perftest.command for _ in range(NUM_DOCS): - self.client.perftest.command(isMaster) + command("ismaster") class TestDocument(PerformanceTest): def setUp(self): # Location of test data. - with open(os.path.join( + with open( + os.path.join( TEST_PATH, os.path.join( - 'single_document', self.dataset)), 'r') as data: + 'single_and_multi_document', self.dataset)), 'r') as data: self.document = json.loads(data.read()) - self.client = client_context.rs_or_standalone_client + self.client = client_context.client self.client.drop_database('perftest') def tearDown(self): @@ -171,16 +204,16 @@ class TestDocument(PerformanceTest): self.client.drop_database('perftest') def before(self): - self.client.perftest.command({'create': 'corpus'}) - self.corpus = self.client.perftest.corpus + self.corpus = self.client.perftest.create_collection('corpus') def after(self): self.client.perftest.drop_collection('corpus') class TestFindOneByID(TestDocument, unittest.TestCase): + data_size = 16220000 def setUp(self): - self.dataset = 'TWEET.json' + self.dataset = 'tweet.json' super(TestFindOneByID, self).setUp() documents = [self.document.copy() for _ in range(NUM_DOCS)] @@ -189,8 +222,9 @@ class TestFindOneByID(TestDocument, unittest.TestCase): self.inserted_ids = result.inserted_ids def do_task(self): - for i in self.inserted_ids: - self.corpus.find_one({'_id': i}) + find_one = self.corpus.find_one + for _id in self.inserted_ids: + find_one({'_id': _id}) def before(self): pass @@ -200,33 +234,38 @@ class TestFindOneByID(TestDocument, unittest.TestCase): class TestSmallDocInsertOne(TestDocument, unittest.TestCase): + data_size = 2750000 def setUp(self): - self.dataset = 'SMALL_DOC.json' + self.dataset = 'small_doc.json' super(TestSmallDocInsertOne, self).setUp() self.documents = [self.document.copy() for _ in range(NUM_DOCS)] def do_task(self): + insert_one = self.corpus.insert_one for doc in self.documents: - self.corpus.insert_one(doc) + insert_one(doc) class TestLargeDocInsertOne(TestDocument, unittest.TestCase): + data_size = 27310890 def setUp(self): - self.dataset = 'LARGE_DOC.json' + self.dataset = 'large_doc.json' super(TestLargeDocInsertOne, self).setUp() self.documents = [self.document.copy() for _ in range(10)] def do_task(self): + insert_one = self.corpus.insert_one for doc in self.documents: - self.corpus.insert_one(doc) + insert_one(doc) # MULTI-DOC BENCHMARKS class TestFindManyAndEmptyCursor(TestDocument, unittest.TestCase): + data_size = 16220000 def setUp(self): - self.dataset = 'TWEET.json' + self.dataset = 'tweet.json' super(TestFindManyAndEmptyCursor, self).setUp() for _ in range(10): @@ -236,8 +275,7 @@ class TestFindManyAndEmptyCursor(TestDocument, unittest.TestCase): self.corpus = self.client.perftest.corpus def do_task(self): - for _ in self.corpus.find(): - pass + list(self.corpus.find()) def before(self): pass @@ -247,40 +285,42 @@ class TestFindManyAndEmptyCursor(TestDocument, unittest.TestCase): class TestSmallDocBulkInsert(TestDocument, unittest.TestCase): + data_size = 2750000 def setUp(self): - self.dataset = 'SMALL_DOC.json' + self.dataset = 'small_doc.json' super(TestSmallDocBulkInsert, self).setUp() self.documents = [self.document.copy() for _ in range(NUM_DOCS)] def before(self): - self.client.perftest.command({'create': 'corpus'}) - self.corpus = self.client.perftest.corpus + self.corpus = self.client.perftest.create_collection('corpus') def do_task(self): self.corpus.insert_many(self.documents, ordered=True) class TestLargeDocBulkInsert(TestDocument, unittest.TestCase): + data_size = 27310890 def setUp(self): - self.dataset = 'LARGE_DOC.json' + self.dataset = 'large_doc.json' super(TestLargeDocBulkInsert, self).setUp() self.documents = [self.document.copy() for _ in range(10)] def before(self): - self.client.perftest.command({'create': 'corpus'}) - self.corpus = self.client.perftest.corpus + self.corpus = self.client.perftest.create_collection('corpus') def do_task(self): self.corpus.insert_many(self.documents, ordered=True) class TestGridFsUpload(PerformanceTest, unittest.TestCase): + data_size = 52428800 def setUp(self): - self.client = client_context.rs_or_standalone_client + self.client = client_context.client self.client.drop_database('perftest') gridfs_path = os.path.join( - TEST_PATH, os.path.join('single_document', 'GRIDFS_LARGE')) + TEST_PATH, + os.path.join('single_and_multi_document', 'gridfs_large.bin')) with open(gridfs_path, 'rb') as data: self.document = data.read() @@ -291,8 +331,6 @@ class TestGridFsUpload(PerformanceTest, unittest.TestCase): self.client.drop_database('perftest') def before(self): - self.client.perftest.drop_collection('fs.files') - self.client.perftest.drop_collection('fs.chunks') self.bucket.upload_from_stream('init', b'x') def do_task(self): @@ -300,12 +338,14 @@ class TestGridFsUpload(PerformanceTest, unittest.TestCase): class TestGridFsDownload(PerformanceTest, unittest.TestCase): + data_size = 52428800 def setUp(self): - self.client = client_context.rs_or_standalone_client + self.client = client_context.client self.client.drop_database('perftest') gridfs_path = os.path.join( - TEST_PATH, os.path.join('single_document', 'GRIDFS_LARGE')) + TEST_PATH, + os.path.join('single_and_multi_document', 'gridfs_large.bin')) self.bucket = GridFSBucket(self.client.perftest) with open(gridfs_path, 'rb') as gfile: @@ -320,59 +360,58 @@ class TestGridFsDownload(PerformanceTest, unittest.TestCase): self.bucket.open_download_stream(self.uploaded_id).read() +proc_client = None + + +def proc_init(*dummy): + global proc_client + proc_client = MongoClient(host, port) + + # PARALLEL BENCHMARKS -def mp_map(map_func, length): # TODO: create each threads MongoClient in setup - pool = mp.Pool() - pool.map(map_func, length) +def mp_map(map_func, files): + pool = mp.Pool(initializer=proc_init) + pool.map(map_func, files) pool.close() - pool.join() def insert_json_file(filename): - client = MongoClient(host, port) - - documents = [] with open(filename, 'r') as data: - for line in data: - documents.append(json.loads(line)) - - client.perftest.corpus.insert_many(documents) + coll = proc_client.perftest.corpus + coll.insert_many([json.loads(line) for line in data]) def insert_json_file_with_file_id(filename): - client = MongoClient(host, port) - documents = [] with open(filename, 'r') as data: for line in data: doc = json.loads(line) doc['file'] = filename documents.append(doc) - - client.perftest.corpus.insert_many(documents) + coll = proc_client.perftest.corpus + coll.insert_many(documents) def read_json_file(filename): - client = MongoClient(host, port) + coll = proc_client.perftest.corpus temp = tempfile.TemporaryFile() try: - for doc in client.perftest.corpus.find({'file': filename}): - temp.write(str(doc) + '\n') + temp.writelines( + [json.dumps(doc) + '\n' for + doc in coll.find({'file': filename}, {'_id': False})]) finally: temp.close() def insert_gridfs_file(filename): - client = MongoClient(host, port) - bucket = GridFSBucket(client.perftest) + bucket = GridFSBucket(proc_client.perftest) with open(filename, 'rb') as gfile: bucket.upload_from_stream(filename, gfile) def read_gridfs_file(filename): - client = MongoClient(host, port) - bucket = GridFSBucket(client.perftest) + bucket = GridFSBucket(proc_client.perftest) temp = tempfile.TemporaryFile() try: @@ -382,8 +421,9 @@ def read_gridfs_file(filename): class TestJsonMultiImport(PerformanceTest, unittest.TestCase): + data_size = 565000000 def setUp(self): - self.client = client_context.rs_or_standalone_client + self.client = client_context.client self.client.drop_database('perftest') def before(self): @@ -391,7 +431,7 @@ class TestJsonMultiImport(PerformanceTest, unittest.TestCase): self.corpus = self.client.perftest.corpus ldjson_path = os.path.join( - TEST_PATH, os.path.join('parallel', 'LDJSON_MULTI')) + TEST_PATH, os.path.join('parallel', 'ldjson_multi')) self.files = [os.path.join( ldjson_path, s) for s in os.listdir(ldjson_path)] @@ -407,13 +447,14 @@ class TestJsonMultiImport(PerformanceTest, unittest.TestCase): class TestJsonMultiExport(PerformanceTest, unittest.TestCase): + data_size = 565000000 def setUp(self): - self.client = client_context.rs_or_standalone_client + self.client = client_context.client self.client.drop_database('perftest') self.client.perfest.corpus.create_index('file') ldjson_path = os.path.join( - TEST_PATH, os.path.join('parallel', 'LDJSON_MULTI')) + TEST_PATH, os.path.join('parallel', 'ldjson_multi')) self.files = [os.path.join( ldjson_path, s) for s in os.listdir(ldjson_path)] @@ -428,8 +469,9 @@ class TestJsonMultiExport(PerformanceTest, unittest.TestCase): class TestGridFsMultiFileUpload(PerformanceTest, unittest.TestCase): + data_size = 262144000 def setUp(self): - self.client = client_context.rs_or_standalone_client + self.client = client_context.client self.client.drop_database('perftest') def before(self): @@ -438,7 +480,7 @@ class TestGridFsMultiFileUpload(PerformanceTest, unittest.TestCase): self.bucket = GridFSBucket(self.client.perftest) gridfs_path = os.path.join( - TEST_PATH, os.path.join('parallel', 'GRIDFS_MULTI')) + TEST_PATH, os.path.join('parallel', 'gridfs_multi')) self.files = [os.path.join( gridfs_path, s) for s in os.listdir(gridfs_path)] @@ -451,14 +493,15 @@ class TestGridFsMultiFileUpload(PerformanceTest, unittest.TestCase): class TestGridFsMultiFileDownload(PerformanceTest, unittest.TestCase): + data_size = 262144000 def setUp(self): - self.client = client_context.rs_or_standalone_client + self.client = client_context.client self.client.drop_database('perftest') bucket = GridFSBucket(self.client.perftest) gridfs_path = os.path.join( - TEST_PATH, os.path.join('parallel', 'GRIDFS_MULTI')) + TEST_PATH, os.path.join('parallel', 'gridfs_multi')) self.files = [os.path.join( gridfs_path, s) for s in os.listdir(gridfs_path)] @@ -473,5 +516,6 @@ class TestGridFsMultiFileDownload(PerformanceTest, unittest.TestCase): super(TestGridFsMultiFileDownload, self).tearDown() self.client.drop_database('perftest') + if __name__ == "__main__": unittest.main()