From df221cf2feecc72be2842b01a0cabcc58b45453c Mon Sep 17 00:00:00 2001 From: Mike Dirolf Date: Wed, 25 Feb 2009 10:07:16 -0500 Subject: [PATCH] replace old benchmark suite with new one --- tools/benchmark.py | 141 +++++++++++++++++++++++++++++++++----------- tools/benchmark1.py | 140 ------------------------------------------- 2 files changed, 108 insertions(+), 173 deletions(-) delete mode 100644 tools/benchmark1.py diff --git a/tools/benchmark.py b/tools/benchmark.py index 25b061001..aca3518f3 100644 --- a/tools/benchmark.py +++ b/tools/benchmark.py @@ -12,54 +12,129 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Jim's benchmarking suite -""" +"""MongoDB benchmarking suite.""" -import datetime +import time import sys sys.path[0:0] = [""] -from pymongo.connection import Connection +import datetime +import cProfile + +from pymongo import connection from pymongo import ASCENDING -N = 30000 +trials = 2 +per_trial = 5000 +batch_size = 100 +small = {} +medium = {"integer": 5, + "number": 5.05, + "boolean": False, + "array": ["test", "benchmark"] + } +# this is similar to the benchmark data posted to the user list +large = {"base_url": "http://www.example.com/test-me", + "total_word_count": 6743, + "access_time": datetime.datetime.now(), + "meta_tags": {"description": "i am a long description string", + "author": "Holly Man", + "dynamically_created_meta_tag": "who know\n what" + }, + "page_structure": {"counted_tags": 3450, + "no_of_js_attached": 10, + "no_of_images": 6 + }, + "harvested_words": ["10gen","web","open","source","application","paas", + "platform-as-a-service","technology","helps", + "developers","focus","building","mongodb","mongo"] * 20 + } -def timed(function, db): - before = datetime.datetime.now() - function(db) - print "%s%s" % (function.__name__.ljust(15), datetime.datetime.now() - before) +def setup_insert(db, collection, object): + db.drop_collection(collection) -def insert(db): - for i in range(N): - db.test.insert({"i": i}) +def insert(db, collection, object): + for i in range(per_trial): + to_insert = object.copy() + to_insert["x"] = i + db[collection].insert(to_insert) -def find_one(db): - for _ in range(N): - db.test.find_one() +def insert_batch(db, collection, object): + for i in range(per_trial / batch_size): + db[collection].insert([object] * batch_size) -def find(db): - for _ in range(N): - for _ in db.test.find({"i": 3}): - pass - for _ in db.test.find({"i": 234}): - pass - for _ in db.test.find({"i": 9876}): +def find_one(db, collection, x): + for _ in range(per_trial): + db[collection].find_one({"x": x}) + +def find(db, collection, x): + for _ in range(per_trial): + for _ in db[collection].find({"x": x}): pass -def find_range(db): - for _ in range(N): - for _ in db.test.find({"i": {"$gt": 200, "$lt": 200}}): - pass +def timed(name, function, args=[], setup=None): + times = [] + for _ in range(trials): + if setup: + setup(*args) + start = time.time() + function(*args) + times.append(time.time() - start) + best_time = min(times) + print "%s%d" % (name + (60 - len(name)) * ".", per_trial / best_time) + return best_time def main(): - db = Connection().benchmark - db.drop_collection("test") - db.test.create_index("i", ASCENDING) + connection._TIMEOUT=60 # jack up the timeout + c = connection.Connection() + c.drop_database("benchmark") + db = c.benchmark - timed(insert, db) - timed(find_one, db) - timed(find, db) - timed(find_range, db) + timed("insert (small, no index)", insert, [db, 'small_none', small], setup_insert) + timed("insert (medium, no index)", insert, [db, 'medium_none', medium], setup_insert) + timed("insert (large, no index)", insert, [db, 'large_none', large], setup_insert) + + db.small_index.create_index("x", ASCENDING) + timed("insert (small, indexed)", insert, [db, 'small_index', small]) + db.medium_index.create_index("x", ASCENDING) + timed("insert (medium, indexed)", insert, [db, 'medium_index', medium]) + db.large_index.create_index("x", ASCENDING) + timed("insert (large, indexed)", insert, [db, 'large_index', large]) + + timed("batch insert (small, no index)", insert_batch, [db, 'small_bulk', small], setup_insert) + timed("batch insert (medium, no index)", insert_batch, [db, 'medium_bulk', medium], setup_insert) + timed("batch insert (large, no index)", insert_batch, [db, 'large_bulk', large], setup_insert) + + timed("find_one (small, no index)", find_one, [db, 'small_none', per_trial / 2]) + timed("find_one (medium, no index)", find_one, [db, 'medium_none', per_trial / 2]) + timed("find_one (large, no index)", find_one, [db, 'large_none', per_trial / 2]) + + timed("find_one (small, indexed)", find_one, [db, 'small_index', per_trial / 2]) + timed("find_one (medium, indexed)", find_one, [db, 'medium_index', per_trial / 2]) + timed("find_one (large, indexed)", find_one, [db, 'large_index', per_trial / 2]) + + timed("find (small, no index)", find, [db, 'small_none', per_trial / 2]) + timed("find (medium, no index)", find, [db, 'medium_none', per_trial / 2]) + timed("find (large, no index)", find, [db, 'large_none', per_trial / 2]) + + timed("find (small, indexed)", find, [db, 'small_index', per_trial / 2]) + timed("find (medium, indexed)", find, [db, 'medium_index', per_trial / 2]) + timed("find (large, indexed)", find, [db, 'large_index', per_trial / 2]) + +# timed("find range (small, no index)", find, +# [db, 'small_none', {"$gt": per_trial / 4, "$lt": 3 * per_trial / 4}]) +# timed("find range (medium, no index)", find, +# [db, 'medium_none', {"$gt": per_trial / 4, "$lt": 3 * per_trial / 4}]) +# timed("find range (large, no index)", find, +# [db, 'large_none', {"$gt": per_trial / 4, "$lt": 3 * per_trial / 4}]) + + timed("find range (small, indexed)", find, + [db, 'small_index', {"$gt": per_trial / 2, "$lt": per_trial / 2 + batch_size}]) + timed("find range (medium, indexed)", find, + [db, 'medium_index', {"$gt": per_trial / 2, "$lt": per_trial / 2 + batch_size}]) + timed("find range (large, indexed)", find, + [db, 'large_index', {"$gt": per_trial / 2, "$lt": per_trial / 2 + batch_size}]) if __name__ == "__main__": +# cProfile.run("main()") main() diff --git a/tools/benchmark1.py b/tools/benchmark1.py deleted file mode 100644 index aca3518f3..000000000 --- a/tools/benchmark1.py +++ /dev/null @@ -1,140 +0,0 @@ -# Copyright 2009 10gen, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""MongoDB benchmarking suite.""" - -import time -import sys -sys.path[0:0] = [""] - -import datetime -import cProfile - -from pymongo import connection -from pymongo import ASCENDING - -trials = 2 -per_trial = 5000 -batch_size = 100 -small = {} -medium = {"integer": 5, - "number": 5.05, - "boolean": False, - "array": ["test", "benchmark"] - } -# this is similar to the benchmark data posted to the user list -large = {"base_url": "http://www.example.com/test-me", - "total_word_count": 6743, - "access_time": datetime.datetime.now(), - "meta_tags": {"description": "i am a long description string", - "author": "Holly Man", - "dynamically_created_meta_tag": "who know\n what" - }, - "page_structure": {"counted_tags": 3450, - "no_of_js_attached": 10, - "no_of_images": 6 - }, - "harvested_words": ["10gen","web","open","source","application","paas", - "platform-as-a-service","technology","helps", - "developers","focus","building","mongodb","mongo"] * 20 - } - -def setup_insert(db, collection, object): - db.drop_collection(collection) - -def insert(db, collection, object): - for i in range(per_trial): - to_insert = object.copy() - to_insert["x"] = i - db[collection].insert(to_insert) - -def insert_batch(db, collection, object): - for i in range(per_trial / batch_size): - db[collection].insert([object] * batch_size) - -def find_one(db, collection, x): - for _ in range(per_trial): - db[collection].find_one({"x": x}) - -def find(db, collection, x): - for _ in range(per_trial): - for _ in db[collection].find({"x": x}): - pass - -def timed(name, function, args=[], setup=None): - times = [] - for _ in range(trials): - if setup: - setup(*args) - start = time.time() - function(*args) - times.append(time.time() - start) - best_time = min(times) - print "%s%d" % (name + (60 - len(name)) * ".", per_trial / best_time) - return best_time - -def main(): - connection._TIMEOUT=60 # jack up the timeout - c = connection.Connection() - c.drop_database("benchmark") - db = c.benchmark - - timed("insert (small, no index)", insert, [db, 'small_none', small], setup_insert) - timed("insert (medium, no index)", insert, [db, 'medium_none', medium], setup_insert) - timed("insert (large, no index)", insert, [db, 'large_none', large], setup_insert) - - db.small_index.create_index("x", ASCENDING) - timed("insert (small, indexed)", insert, [db, 'small_index', small]) - db.medium_index.create_index("x", ASCENDING) - timed("insert (medium, indexed)", insert, [db, 'medium_index', medium]) - db.large_index.create_index("x", ASCENDING) - timed("insert (large, indexed)", insert, [db, 'large_index', large]) - - timed("batch insert (small, no index)", insert_batch, [db, 'small_bulk', small], setup_insert) - timed("batch insert (medium, no index)", insert_batch, [db, 'medium_bulk', medium], setup_insert) - timed("batch insert (large, no index)", insert_batch, [db, 'large_bulk', large], setup_insert) - - timed("find_one (small, no index)", find_one, [db, 'small_none', per_trial / 2]) - timed("find_one (medium, no index)", find_one, [db, 'medium_none', per_trial / 2]) - timed("find_one (large, no index)", find_one, [db, 'large_none', per_trial / 2]) - - timed("find_one (small, indexed)", find_one, [db, 'small_index', per_trial / 2]) - timed("find_one (medium, indexed)", find_one, [db, 'medium_index', per_trial / 2]) - timed("find_one (large, indexed)", find_one, [db, 'large_index', per_trial / 2]) - - timed("find (small, no index)", find, [db, 'small_none', per_trial / 2]) - timed("find (medium, no index)", find, [db, 'medium_none', per_trial / 2]) - timed("find (large, no index)", find, [db, 'large_none', per_trial / 2]) - - timed("find (small, indexed)", find, [db, 'small_index', per_trial / 2]) - timed("find (medium, indexed)", find, [db, 'medium_index', per_trial / 2]) - timed("find (large, indexed)", find, [db, 'large_index', per_trial / 2]) - -# timed("find range (small, no index)", find, -# [db, 'small_none', {"$gt": per_trial / 4, "$lt": 3 * per_trial / 4}]) -# timed("find range (medium, no index)", find, -# [db, 'medium_none', {"$gt": per_trial / 4, "$lt": 3 * per_trial / 4}]) -# timed("find range (large, no index)", find, -# [db, 'large_none', {"$gt": per_trial / 4, "$lt": 3 * per_trial / 4}]) - - timed("find range (small, indexed)", find, - [db, 'small_index', {"$gt": per_trial / 2, "$lt": per_trial / 2 + batch_size}]) - timed("find range (medium, indexed)", find, - [db, 'medium_index', {"$gt": per_trial / 2, "$lt": per_trial / 2 + batch_size}]) - timed("find range (large, indexed)", find, - [db, 'large_index', {"$gt": per_trial / 2, "$lt": per_trial / 2 + batch_size}]) - -if __name__ == "__main__": -# cProfile.run("main()") - main()