PYTHON-1277 - Port benchmark tests to Evergreen

This commit is contained in:
Bernie Hackett 2018-01-30 15:20:29 -08:00 committed by Bernie Hackett
parent 6656b7c985
commit ad348a2e0b
4 changed files with 382 additions and 71 deletions

View File

@ -15,7 +15,7 @@ command_type: system
# Protect ourself against rogue test case, or curl gone wild, that runs forever
# Good rule of thumb: the averageish length a task takes, times 5
# That roughly accounts for variable system performance for various buildvariants
exec_timeout_secs: 1800 # 6 minutes is the longest we'll ever run
exec_timeout_secs: 1800 # 30 minutes is the longest we'll ever run
# What to do when evergreen hits the timeout (`post:` tasks are run automatically)
timeout:

234
.evergreen/perf.yml Normal file
View File

@ -0,0 +1,234 @@
########################################
# Evergreen Template for MongoDB Drivers
########################################
# When a task that used to pass starts to fail
# Go through all versions that may have been skipped to detect
# when the task started failing
stepback: true
# Mark a failure as a system/bootstrap failure (purple box) rather then a task
# failure by default.
# Actual testing tasks are marked with `type: test`
command_type: system
# Protect ourself against rogue test case, or curl gone wild, that runs forever
# Good rule of thumb: the averageish length a task takes, times 5
# That roughly accounts for variable system performance for various buildvariants
exec_timeout_secs: 3600 # 60 minutes is the longest we'll ever run
# What to do when evergreen hits the timeout (`post:` tasks are run automatically)
timeout:
- command: shell.exec
params:
script: |
ls -la
functions:
"fetch source":
# Executes git clone and applies the submitted patch, if any
- command: git.get_project
params:
directory: "src"
# Applies the subitted patch, if any
# Deprecated. Should be removed. But still needed for certain agents (ZAP)
- command: git.apply_patch
# Make an evergreen exapanstion file with dynamic values
- command: shell.exec
params:
working_dir: "src"
script: |
# Get the current unique version of this checkout
if [ "${is_patch}" = "true" ]; then
CURRENT_VERSION=$(git describe)-patch-${version_id}
else
CURRENT_VERSION=latest
fi
export DRIVERS_TOOLS="$(pwd)/../drivers-tools"
export PROJECT_DIRECTORY="$(pwd)"
# Python has cygwin path problems on Windows. Detect prospective mongo-orchestration home directory
if [ "Windows_NT" = "$OS" ]; then # Magic variable in cygwin
export DRIVERS_TOOLS=$(cygpath -m $DRIVERS_TOOLS)
export PROJECT_DIRECTORY=$(cygpath -m $PROJECT_DIRECTORY)
fi
export MONGO_ORCHESTRATION_HOME="$DRIVERS_TOOLS/.evergreen/orchestration"
export MONGODB_BINARIES="$DRIVERS_TOOLS/mongodb/bin"
export UPLOAD_BUCKET="${project}"
cat <<EOT > expansion.yml
CURRENT_VERSION: "$CURRENT_VERSION"
DRIVERS_TOOLS: "$DRIVERS_TOOLS"
MONGO_ORCHESTRATION_HOME: "$MONGO_ORCHESTRATION_HOME"
MONGODB_BINARIES: "$MONGODB_BINARIES"
UPLOAD_BUCKET: "$UPLOAD_BUCKET"
PROJECT_DIRECTORY: "$PROJECT_DIRECTORY"
PREPARE_SHELL: |
set -o errexit
set -o xtrace
export DRIVERS_TOOLS="$DRIVERS_TOOLS"
export MONGO_ORCHESTRATION_HOME="$MONGO_ORCHESTRATION_HOME"
export MONGODB_BINARIES="$MONGODB_BINARIES"
export UPLOAD_BUCKET="$UPLOAD_BUCKET"
export PROJECT_DIRECTORY="$PROJECT_DIRECTORY"
export TMPDIR="$MONGO_ORCHESTRATION_HOME/db"
export PATH="$MONGODB_BINARIES:$PATH"
export PROJECT="${project}"
EOT
# See what we've done
cat expansion.yml
# Load the expansion file to make an evergreen variable with the current unique version
- command: expansions.update
params:
file: src/expansion.yml
"prepare resources":
- command: shell.exec
params:
script: |
${PREPARE_SHELL}
rm -rf $DRIVERS_TOOLS
if [ "${project}" = "drivers-tools" ]; then
# If this was a patch build, doing a fresh clone would not actually test the patch
cp -R ${PROJECT_DIRECTORY}/ $DRIVERS_TOOLS
else
git clone git://github.com/mongodb-labs/drivers-evergreen-tools.git $DRIVERS_TOOLS
fi
echo "{ \"releases\": { \"default\": \"$MONGODB_BINARIES\" }}" > $MONGO_ORCHESTRATION_HOME/orchestration.config
"bootstrap mongo-orchestration":
- command: shell.exec
params:
script: |
${PREPARE_SHELL}
MONGODB_VERSION=${VERSION} TOPOLOGY=${TOPOLOGY} AUTH=${AUTH} SSL=${SSL} STORAGE_ENGINE=${STORAGE_ENGINE} sh ${DRIVERS_TOOLS}/.evergreen/run-orchestration.sh
# run-orchestration generates expansion file with the MONGODB_URI for the cluster
- command: expansions.update
params:
file: mo-expansion.yml
"stop mongo-orchestration":
- command: shell.exec
params:
script: |
${PREPARE_SHELL}
sh ${DRIVERS_TOOLS}/.evergreen/stop-orchestration.sh
"run perf tests":
- command: shell.exec
type: test
params:
working_dir: "src"
script: |
${PREPARE_SHELL}
PROJECT_DIRECTORY=${PROJECT_DIRECTORY} sh ${PROJECT_DIRECTORY}/.evergreen/run-perf-tests.sh
"attach benchmark test results":
- command: attach.results
params:
file_location: src/report.json
"send dashboard data":
- command: json.send
params:
name: perf
file: src/results.json
"cleanup":
- command: shell.exec
params:
script: |
${PREPARE_SHELL}
rm -rf $DRIVERS_TOOLS || true
"fix absolute paths":
- command: shell.exec
params:
script: |
${PREPARE_SHELL}
for filename in $(find ${DRIVERS_TOOLS} -name \*.json); do
perl -p -i -e "s|ABSOLUTE_PATH_REPLACEMENT_TOKEN|${DRIVERS_TOOLS}|g" $filename
done
"windows fix":
- command: shell.exec
params:
script: |
${PREPARE_SHELL}
for i in $(find ${DRIVERS_TOOLS}/.evergreen ${PROJECT_DIRECTORY}/.evergreen -name \*.sh); do
cat $i | tr -d '\r' > $i.new
mv $i.new $i
done
# Copy client certificate because symlinks do not work on Windows.
cp ${DRIVERS_TOOLS}/.evergreen/x509gen/client.pem ${MONGO_ORCHESTRATION_HOME}/lib/client.pem
"make files executable":
- command: shell.exec
params:
script: |
${PREPARE_SHELL}
for i in $(find ${DRIVERS_TOOLS}/.evergreen ${PROJECT_DIRECTORY}/.evergreen -name \*.sh); do
chmod +x $i
done
"install dependencies":
- command: shell.exec
params:
working_dir: "src"
script: |
${PREPARE_SHELL}
file="${PROJECT_DIRECTORY}/.evergreen/install-dependencies.sh"
# Don't use ${file} syntax here because evergreen treats it as an empty expansion.
[ -f "$file" ] && sh $file || echo "$file not available, skipping"
pre:
- func: "fetch source"
- func: "prepare resources"
# We don't run perf on Windows (yet)
#- func: "windows fix"
- func: "fix absolute paths"
- func: "make files executable"
# We're not testing with TLS (yet)
#- func: "install dependencies"
post:
- func: "stop mongo-orchestration"
- func: "cleanup"
tasks:
- name: "perf-3.0-standalone"
tags: ["perf"]
commands:
- func: "bootstrap mongo-orchestration"
vars:
VERSION: "3.0"
TOPOLOGY: "server"
- func: "run perf tests"
- func: "attach benchmark test results"
- func: "send dashboard data"
- name: "perf-3.6-standalone"
tags: ["perf"]
commands:
- func: "bootstrap mongo-orchestration"
vars:
VERSION: "3.6"
TOPOLOGY: "server"
- func: "run perf tests"
- func: "attach benchmark test results"
- func: "send dashboard data"
buildvariants:
- name: "perf-tests"
display_name: "Performance Benchmark Tests"
batchtime: 10080 # 7 days
run_on: centos6-perf
tasks:
- name: "perf-3.0-standalone"
- name: "perf-3.6-standalone"

View File

@ -0,0 +1,33 @@
#!/bin/bash
set -o xtrace
set -o errexit
git clone https://github.com/mongodb-labs/driver-performance-test-data.git
cd driver-performance-test-data
tar xf extended_bson.tgz
tar xf parallel.tgz
tar xf single_and_multi_document.tgz
cd ..
export TEST_PATH="${PROJECT_DIRECTORY}/driver-performance-test-data"
export OUTPUT_FILE="${PROJECT_DIRECTORY}/results.json"
MTCBIN=/opt/mongodbtoolchain/v2/bin
VIRTUALENV="$MTCBIN/virtualenv -p $MTCBIN/python2.7"
$VIRTUALENV pyperftest
. pyperftest/bin/activate
pip install simplejson
python setup.py build_ext -i
start_time=$(date +%s)
python test/performance/perf_test.py
end_time=$(date +%s)
elapsed_secs=$((end_time-start_time))
cat results.json
echo "{\"failures\": 0, \"results\": [{\"status\": \"pass\", \"exit_code\": 0, \"test_file\": \"BenchMarkTests\", \"start\": $start_time, \"end\": $end_time, \"elapsed\": $elapsed_secs}]}" > report.json
cat report.json

View File

@ -14,32 +14,47 @@
"""Tests for the MongoDB Driver Performance Benchmarking Spec."""
import json
import multiprocessing as mp
import os
import tempfile
import shutil
import sys
import tempfile
import warnings
try:
import simplejson as json
except ImportError:
import json
sys.path[0:0] = [""]
from bson import BSON, CodecOptions
from bson import BSON
from bson.json_util import loads
from bson.objectid import ObjectId
from gridfs import GridFSBucket
from pymongo import MongoClient
from pymongo.monotonic import time
from pymongo.operations import InsertOne
from test import client_context, host, port, unittest
NUM_ITERATIONS = 100
MAX_ITERATION_TIME = 300
NUM_DOCS = 10000
TEST_PATH = os.path.join(
TEST_PATH = os.environ.get('TEST_PATH', os.path.join(
os.path.dirname(os.path.realpath(__file__)),
os.path.join('performance_testdata'))
os.path.join('data')))
OUTPUT_FILE = os.environ.get('OUTPUT_FILE')
result_data = []
def tearDownModule():
output = json.dumps({
'results': result_data
}, indent=4)
if OUTPUT_FILE:
with open(OUTPUT_FILE, 'w') as opf:
opf.write(output)
else:
print(output)
class Timer(object):
@ -57,8 +72,19 @@ class PerformanceTest(object):
pass
def tearDown(self):
name = self.__class__.__name__
median = self.percentile(50)
result = self.data_size / median
print('Running %s. MEDIAN=%s' % (self.__class__.__name__,
self.percentile(50)))
result_data.append({
'name': name,
'results': {
'1': {
'ops_per_sec': result
}
}
})
def before(self):
pass
@ -114,56 +140,63 @@ class BsonDecodingTest(PerformanceTest):
self.document = BSON.encode(json.loads(data.read()))
def do_task(self):
codec_options = CodecOptions(tz_aware=True)
for _ in range(NUM_DOCS):
self.document.decode(codec_options=codec_options)
self.document.decode()
class TestFlatEncoding(BsonEncodingTest, unittest.TestCase):
dataset = 'flat_bson.json'
data_size = 75310000
class TestFlatDecoding(BsonDecodingTest, unittest.TestCase):
dataset = 'flat_bson.json'
data_size = 75310000
class TestDeepEncoding(BsonEncodingTest, unittest.TestCase):
dataset = 'deep_bson.json'
data_size = 19640000
class TestDeepDecoding(BsonDecodingTest, unittest.TestCase):
dataset = 'deep_bson.json'
data_size = 19640000
class TestFullEncoding(BsonEncodingTest, unittest.TestCase):
dataset = 'full_bson.json'
data_size = 57340000
class TestFullDecoding(BsonDecodingTest, unittest.TestCase):
dataset = 'full_bson.json'
data_size = 57340000
# SINGLE-DOC BENCHMARKS
class TestRunCommand(PerformanceTest, unittest.TestCase):
data_size = 160000
def setUp(self):
self.client = client_context.rs_or_standalone_client
self.client = client_context.client
self.client.drop_database('perftest')
def do_task(self):
isMaster = {'isMaster': True}
command = self.client.perftest.command
for _ in range(NUM_DOCS):
self.client.perftest.command(isMaster)
command("ismaster")
class TestDocument(PerformanceTest):
def setUp(self):
# Location of test data.
with open(os.path.join(
with open(
os.path.join(
TEST_PATH, os.path.join(
'single_document', self.dataset)), 'r') as data:
'single_and_multi_document', self.dataset)), 'r') as data:
self.document = json.loads(data.read())
self.client = client_context.rs_or_standalone_client
self.client = client_context.client
self.client.drop_database('perftest')
def tearDown(self):
@ -171,16 +204,16 @@ class TestDocument(PerformanceTest):
self.client.drop_database('perftest')
def before(self):
self.client.perftest.command({'create': 'corpus'})
self.corpus = self.client.perftest.corpus
self.corpus = self.client.perftest.create_collection('corpus')
def after(self):
self.client.perftest.drop_collection('corpus')
class TestFindOneByID(TestDocument, unittest.TestCase):
data_size = 16220000
def setUp(self):
self.dataset = 'TWEET.json'
self.dataset = 'tweet.json'
super(TestFindOneByID, self).setUp()
documents = [self.document.copy() for _ in range(NUM_DOCS)]
@ -189,8 +222,9 @@ class TestFindOneByID(TestDocument, unittest.TestCase):
self.inserted_ids = result.inserted_ids
def do_task(self):
for i in self.inserted_ids:
self.corpus.find_one({'_id': i})
find_one = self.corpus.find_one
for _id in self.inserted_ids:
find_one({'_id': _id})
def before(self):
pass
@ -200,33 +234,38 @@ class TestFindOneByID(TestDocument, unittest.TestCase):
class TestSmallDocInsertOne(TestDocument, unittest.TestCase):
data_size = 2750000
def setUp(self):
self.dataset = 'SMALL_DOC.json'
self.dataset = 'small_doc.json'
super(TestSmallDocInsertOne, self).setUp()
self.documents = [self.document.copy() for _ in range(NUM_DOCS)]
def do_task(self):
insert_one = self.corpus.insert_one
for doc in self.documents:
self.corpus.insert_one(doc)
insert_one(doc)
class TestLargeDocInsertOne(TestDocument, unittest.TestCase):
data_size = 27310890
def setUp(self):
self.dataset = 'LARGE_DOC.json'
self.dataset = 'large_doc.json'
super(TestLargeDocInsertOne, self).setUp()
self.documents = [self.document.copy() for _ in range(10)]
def do_task(self):
insert_one = self.corpus.insert_one
for doc in self.documents:
self.corpus.insert_one(doc)
insert_one(doc)
# MULTI-DOC BENCHMARKS
class TestFindManyAndEmptyCursor(TestDocument, unittest.TestCase):
data_size = 16220000
def setUp(self):
self.dataset = 'TWEET.json'
self.dataset = 'tweet.json'
super(TestFindManyAndEmptyCursor, self).setUp()
for _ in range(10):
@ -236,8 +275,7 @@ class TestFindManyAndEmptyCursor(TestDocument, unittest.TestCase):
self.corpus = self.client.perftest.corpus
def do_task(self):
for _ in self.corpus.find():
pass
list(self.corpus.find())
def before(self):
pass
@ -247,40 +285,42 @@ class TestFindManyAndEmptyCursor(TestDocument, unittest.TestCase):
class TestSmallDocBulkInsert(TestDocument, unittest.TestCase):
data_size = 2750000
def setUp(self):
self.dataset = 'SMALL_DOC.json'
self.dataset = 'small_doc.json'
super(TestSmallDocBulkInsert, self).setUp()
self.documents = [self.document.copy() for _ in range(NUM_DOCS)]
def before(self):
self.client.perftest.command({'create': 'corpus'})
self.corpus = self.client.perftest.corpus
self.corpus = self.client.perftest.create_collection('corpus')
def do_task(self):
self.corpus.insert_many(self.documents, ordered=True)
class TestLargeDocBulkInsert(TestDocument, unittest.TestCase):
data_size = 27310890
def setUp(self):
self.dataset = 'LARGE_DOC.json'
self.dataset = 'large_doc.json'
super(TestLargeDocBulkInsert, self).setUp()
self.documents = [self.document.copy() for _ in range(10)]
def before(self):
self.client.perftest.command({'create': 'corpus'})
self.corpus = self.client.perftest.corpus
self.corpus = self.client.perftest.create_collection('corpus')
def do_task(self):
self.corpus.insert_many(self.documents, ordered=True)
class TestGridFsUpload(PerformanceTest, unittest.TestCase):
data_size = 52428800
def setUp(self):
self.client = client_context.rs_or_standalone_client
self.client = client_context.client
self.client.drop_database('perftest')
gridfs_path = os.path.join(
TEST_PATH, os.path.join('single_document', 'GRIDFS_LARGE'))
TEST_PATH,
os.path.join('single_and_multi_document', 'gridfs_large.bin'))
with open(gridfs_path, 'rb') as data:
self.document = data.read()
@ -291,8 +331,6 @@ class TestGridFsUpload(PerformanceTest, unittest.TestCase):
self.client.drop_database('perftest')
def before(self):
self.client.perftest.drop_collection('fs.files')
self.client.perftest.drop_collection('fs.chunks')
self.bucket.upload_from_stream('init', b'x')
def do_task(self):
@ -300,12 +338,14 @@ class TestGridFsUpload(PerformanceTest, unittest.TestCase):
class TestGridFsDownload(PerformanceTest, unittest.TestCase):
data_size = 52428800
def setUp(self):
self.client = client_context.rs_or_standalone_client
self.client = client_context.client
self.client.drop_database('perftest')
gridfs_path = os.path.join(
TEST_PATH, os.path.join('single_document', 'GRIDFS_LARGE'))
TEST_PATH,
os.path.join('single_and_multi_document', 'gridfs_large.bin'))
self.bucket = GridFSBucket(self.client.perftest)
with open(gridfs_path, 'rb') as gfile:
@ -320,59 +360,58 @@ class TestGridFsDownload(PerformanceTest, unittest.TestCase):
self.bucket.open_download_stream(self.uploaded_id).read()
proc_client = None
def proc_init(*dummy):
global proc_client
proc_client = MongoClient(host, port)
# PARALLEL BENCHMARKS
def mp_map(map_func, length): # TODO: create each threads MongoClient in setup
pool = mp.Pool()
pool.map(map_func, length)
def mp_map(map_func, files):
pool = mp.Pool(initializer=proc_init)
pool.map(map_func, files)
pool.close()
pool.join()
def insert_json_file(filename):
client = MongoClient(host, port)
documents = []
with open(filename, 'r') as data:
for line in data:
documents.append(json.loads(line))
client.perftest.corpus.insert_many(documents)
coll = proc_client.perftest.corpus
coll.insert_many([json.loads(line) for line in data])
def insert_json_file_with_file_id(filename):
client = MongoClient(host, port)
documents = []
with open(filename, 'r') as data:
for line in data:
doc = json.loads(line)
doc['file'] = filename
documents.append(doc)
client.perftest.corpus.insert_many(documents)
coll = proc_client.perftest.corpus
coll.insert_many(documents)
def read_json_file(filename):
client = MongoClient(host, port)
coll = proc_client.perftest.corpus
temp = tempfile.TemporaryFile()
try:
for doc in client.perftest.corpus.find({'file': filename}):
temp.write(str(doc) + '\n')
temp.writelines(
[json.dumps(doc) + '\n' for
doc in coll.find({'file': filename}, {'_id': False})])
finally:
temp.close()
def insert_gridfs_file(filename):
client = MongoClient(host, port)
bucket = GridFSBucket(client.perftest)
bucket = GridFSBucket(proc_client.perftest)
with open(filename, 'rb') as gfile:
bucket.upload_from_stream(filename, gfile)
def read_gridfs_file(filename):
client = MongoClient(host, port)
bucket = GridFSBucket(client.perftest)
bucket = GridFSBucket(proc_client.perftest)
temp = tempfile.TemporaryFile()
try:
@ -382,8 +421,9 @@ def read_gridfs_file(filename):
class TestJsonMultiImport(PerformanceTest, unittest.TestCase):
data_size = 565000000
def setUp(self):
self.client = client_context.rs_or_standalone_client
self.client = client_context.client
self.client.drop_database('perftest')
def before(self):
@ -391,7 +431,7 @@ class TestJsonMultiImport(PerformanceTest, unittest.TestCase):
self.corpus = self.client.perftest.corpus
ldjson_path = os.path.join(
TEST_PATH, os.path.join('parallel', 'LDJSON_MULTI'))
TEST_PATH, os.path.join('parallel', 'ldjson_multi'))
self.files = [os.path.join(
ldjson_path, s) for s in os.listdir(ldjson_path)]
@ -407,13 +447,14 @@ class TestJsonMultiImport(PerformanceTest, unittest.TestCase):
class TestJsonMultiExport(PerformanceTest, unittest.TestCase):
data_size = 565000000
def setUp(self):
self.client = client_context.rs_or_standalone_client
self.client = client_context.client
self.client.drop_database('perftest')
self.client.perfest.corpus.create_index('file')
ldjson_path = os.path.join(
TEST_PATH, os.path.join('parallel', 'LDJSON_MULTI'))
TEST_PATH, os.path.join('parallel', 'ldjson_multi'))
self.files = [os.path.join(
ldjson_path, s) for s in os.listdir(ldjson_path)]
@ -428,8 +469,9 @@ class TestJsonMultiExport(PerformanceTest, unittest.TestCase):
class TestGridFsMultiFileUpload(PerformanceTest, unittest.TestCase):
data_size = 262144000
def setUp(self):
self.client = client_context.rs_or_standalone_client
self.client = client_context.client
self.client.drop_database('perftest')
def before(self):
@ -438,7 +480,7 @@ class TestGridFsMultiFileUpload(PerformanceTest, unittest.TestCase):
self.bucket = GridFSBucket(self.client.perftest)
gridfs_path = os.path.join(
TEST_PATH, os.path.join('parallel', 'GRIDFS_MULTI'))
TEST_PATH, os.path.join('parallel', 'gridfs_multi'))
self.files = [os.path.join(
gridfs_path, s) for s in os.listdir(gridfs_path)]
@ -451,14 +493,15 @@ class TestGridFsMultiFileUpload(PerformanceTest, unittest.TestCase):
class TestGridFsMultiFileDownload(PerformanceTest, unittest.TestCase):
data_size = 262144000
def setUp(self):
self.client = client_context.rs_or_standalone_client
self.client = client_context.client
self.client.drop_database('perftest')
bucket = GridFSBucket(self.client.perftest)
gridfs_path = os.path.join(
TEST_PATH, os.path.join('parallel', 'GRIDFS_MULTI'))
TEST_PATH, os.path.join('parallel', 'gridfs_multi'))
self.files = [os.path.join(
gridfs_path, s) for s in os.listdir(gridfs_path)]
@ -473,5 +516,6 @@ class TestGridFsMultiFileDownload(PerformanceTest, unittest.TestCase):
super(TestGridFsMultiFileDownload, self).tearDown()
self.client.drop_database('perftest')
if __name__ == "__main__":
unittest.main()