PYTHON-3823 Audit benchmark data_size and calculate dynamically it where possible (#1439)
This commit is contained in:
parent
fc220532df
commit
ec35f7f76e
@ -21,7 +21,7 @@ import sys
|
||||
import tempfile
|
||||
import time
|
||||
import warnings
|
||||
from typing import Any, List
|
||||
from typing import Any, List, Optional
|
||||
|
||||
try:
|
||||
import simplejson as json
|
||||
@ -70,9 +70,8 @@ class Timer:
|
||||
|
||||
|
||||
class PerformanceTest:
|
||||
dataset: Any
|
||||
data_size: Any
|
||||
do_task: Any
|
||||
dataset: str
|
||||
data_size: int
|
||||
fail: Any
|
||||
|
||||
@classmethod
|
||||
@ -87,7 +86,9 @@ class PerformanceTest:
|
||||
name = self.__class__.__name__[4:]
|
||||
median = self.percentile(50)
|
||||
megabytes_per_sec = self.data_size / median / 1000000
|
||||
print(f"Running {self.__class__.__name__}. MEDIAN={self.percentile(50)}")
|
||||
print(
|
||||
f"Running {self.__class__.__name__}. MB/s={megabytes_per_sec}, MEDIAN={self.percentile(50)}"
|
||||
)
|
||||
result_data.append(
|
||||
{
|
||||
"info": {
|
||||
@ -105,6 +106,9 @@ class PerformanceTest:
|
||||
def before(self):
|
||||
pass
|
||||
|
||||
def do_task(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def after(self):
|
||||
pass
|
||||
|
||||
@ -120,12 +124,13 @@ class PerformanceTest:
|
||||
def runTest(self):
|
||||
results = []
|
||||
start = time.monotonic()
|
||||
self.max_iterations = NUM_ITERATIONS
|
||||
for i in range(NUM_ITERATIONS):
|
||||
if time.monotonic() - start > MAX_ITERATION_TIME:
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("default")
|
||||
warnings.warn("Test timed out, completed %s iterations." % i)
|
||||
warnings.warn(
|
||||
f"Test timed out after {MAX_ITERATION_TIME}s, completed {i}/{NUM_ITERATIONS} iterations."
|
||||
)
|
||||
break
|
||||
self.before()
|
||||
with Timer() as timer:
|
||||
@ -142,6 +147,7 @@ class BsonEncodingTest(PerformanceTest):
|
||||
# Location of test data.
|
||||
with open(os.path.join(TEST_PATH, os.path.join("extended_bson", self.dataset))) as data:
|
||||
self.document = loads(data.read())
|
||||
self.data_size = len(encode(self.document)) * NUM_DOCS
|
||||
|
||||
def do_task(self):
|
||||
for _ in range(NUM_DOCS):
|
||||
@ -154,6 +160,8 @@ class BsonDecodingTest(PerformanceTest):
|
||||
with open(os.path.join(TEST_PATH, os.path.join("extended_bson", self.dataset))) as data:
|
||||
self.document = encode(json.loads(data.read()))
|
||||
|
||||
self.data_size = len(self.document) * NUM_DOCS
|
||||
|
||||
def do_task(self):
|
||||
for _ in range(NUM_DOCS):
|
||||
decode(self.document)
|
||||
@ -161,37 +169,31 @@ class BsonDecodingTest(PerformanceTest):
|
||||
|
||||
class TestFlatEncoding(BsonEncodingTest, unittest.TestCase):
|
||||
dataset = "flat_bson.json"
|
||||
data_size = 75310000
|
||||
|
||||
|
||||
class TestFlatDecoding(BsonDecodingTest, unittest.TestCase):
|
||||
dataset = "flat_bson.json"
|
||||
data_size = 75310000
|
||||
|
||||
|
||||
class TestDeepEncoding(BsonEncodingTest, unittest.TestCase):
|
||||
dataset = "deep_bson.json"
|
||||
data_size = 19640000
|
||||
|
||||
|
||||
class TestDeepDecoding(BsonDecodingTest, unittest.TestCase):
|
||||
dataset = "deep_bson.json"
|
||||
data_size = 19640000
|
||||
|
||||
|
||||
class TestFullEncoding(BsonEncodingTest, unittest.TestCase):
|
||||
dataset = "full_bson.json"
|
||||
data_size = 57340000
|
||||
|
||||
|
||||
class TestFullDecoding(BsonDecodingTest, unittest.TestCase):
|
||||
dataset = "full_bson.json"
|
||||
data_size = 57340000
|
||||
|
||||
|
||||
# SINGLE-DOC BENCHMARKS
|
||||
class TestRunCommand(PerformanceTest, unittest.TestCase):
|
||||
data_size = 160000
|
||||
data_size = len(encode({"hello": True})) * NUM_DOCS
|
||||
|
||||
def setUp(self):
|
||||
self.client = client_context.client
|
||||
@ -200,7 +202,7 @@ class TestRunCommand(PerformanceTest, unittest.TestCase):
|
||||
def do_task(self):
|
||||
command = self.client.perftest.command
|
||||
for _ in range(NUM_DOCS):
|
||||
command("ping")
|
||||
command("hello", True)
|
||||
|
||||
|
||||
class TestDocument(PerformanceTest):
|
||||
@ -225,23 +227,17 @@ class TestDocument(PerformanceTest):
|
||||
self.client.perftest.drop_collection("corpus")
|
||||
|
||||
|
||||
class TestFindOneByID(TestDocument, unittest.TestCase):
|
||||
data_size = 16220000
|
||||
class FindTest(TestDocument):
|
||||
dataset = "tweet.json"
|
||||
|
||||
def setUp(self):
|
||||
self.dataset = "tweet.json"
|
||||
super().setUp()
|
||||
|
||||
self.data_size = len(encode(self.document)) * NUM_DOCS
|
||||
documents = [self.document.copy() for _ in range(NUM_DOCS)]
|
||||
self.corpus = self.client.perftest.corpus
|
||||
result = self.corpus.insert_many(documents)
|
||||
self.inserted_ids = result.inserted_ids
|
||||
|
||||
def do_task(self):
|
||||
find_one = self.corpus.find_one
|
||||
for _id in self.inserted_ids:
|
||||
find_one({"_id": _id})
|
||||
|
||||
def before(self):
|
||||
pass
|
||||
|
||||
@ -249,30 +245,40 @@ class TestFindOneByID(TestDocument, unittest.TestCase):
|
||||
pass
|
||||
|
||||
|
||||
class TestSmallDocInsertOne(TestDocument, unittest.TestCase):
|
||||
data_size = 2750000
|
||||
class TestFindOneByID(FindTest, unittest.TestCase):
|
||||
def do_task(self):
|
||||
find_one = self.corpus.find_one
|
||||
for _id in self.inserted_ids:
|
||||
find_one({"_id": _id})
|
||||
|
||||
|
||||
class SmallDocInsertTest(TestDocument):
|
||||
dataset = "small_doc.json"
|
||||
|
||||
def setUp(self):
|
||||
self.dataset = "small_doc.json"
|
||||
super().setUp()
|
||||
|
||||
self.data_size = len(encode(self.document)) * NUM_DOCS
|
||||
self.documents = [self.document.copy() for _ in range(NUM_DOCS)]
|
||||
|
||||
|
||||
class TestSmallDocInsertOne(SmallDocInsertTest, unittest.TestCase):
|
||||
def do_task(self):
|
||||
insert_one = self.corpus.insert_one
|
||||
for doc in self.documents:
|
||||
insert_one(doc)
|
||||
|
||||
|
||||
class TestLargeDocInsertOne(TestDocument, unittest.TestCase):
|
||||
data_size = 27310890
|
||||
class LargeDocInsertTest(TestDocument):
|
||||
dataset = "large_doc.json"
|
||||
|
||||
def setUp(self):
|
||||
self.dataset = "large_doc.json"
|
||||
super().setUp()
|
||||
n_docs = 10
|
||||
self.data_size = len(encode(self.document)) * n_docs
|
||||
self.documents = [self.document.copy() for _ in range(n_docs)]
|
||||
|
||||
self.documents = [self.document.copy() for _ in range(10)]
|
||||
|
||||
class TestLargeDocInsertOne(LargeDocInsertTest, unittest.TestCase):
|
||||
def do_task(self):
|
||||
insert_one = self.corpus.insert_one
|
||||
for doc in self.documents:
|
||||
@ -280,61 +286,24 @@ class TestLargeDocInsertOne(TestDocument, unittest.TestCase):
|
||||
|
||||
|
||||
# MULTI-DOC BENCHMARKS
|
||||
class TestFindManyAndEmptyCursor(TestDocument, unittest.TestCase):
|
||||
data_size = 16220000
|
||||
|
||||
def setUp(self):
|
||||
self.dataset = "tweet.json"
|
||||
super().setUp()
|
||||
|
||||
for _ in range(10):
|
||||
self.client.perftest.command("insert", "corpus", documents=[self.document] * 1000)
|
||||
self.corpus = self.client.perftest.corpus
|
||||
|
||||
class TestFindManyAndEmptyCursor(FindTest, unittest.TestCase):
|
||||
def do_task(self):
|
||||
list(self.corpus.find())
|
||||
|
||||
def before(self):
|
||||
pass
|
||||
|
||||
def after(self):
|
||||
pass
|
||||
|
||||
|
||||
class TestSmallDocBulkInsert(TestDocument, unittest.TestCase):
|
||||
data_size = 2750000
|
||||
|
||||
def setUp(self):
|
||||
self.dataset = "small_doc.json"
|
||||
super().setUp()
|
||||
self.documents = [self.document.copy() for _ in range(NUM_DOCS)]
|
||||
|
||||
def before(self):
|
||||
self.corpus = self.client.perftest.create_collection("corpus")
|
||||
|
||||
class TestSmallDocBulkInsert(SmallDocInsertTest, unittest.TestCase):
|
||||
def do_task(self):
|
||||
self.corpus.insert_many(self.documents, ordered=True)
|
||||
|
||||
|
||||
class TestLargeDocBulkInsert(TestDocument, unittest.TestCase):
|
||||
data_size = 27310890
|
||||
|
||||
def setUp(self):
|
||||
self.dataset = "large_doc.json"
|
||||
super().setUp()
|
||||
self.documents = [self.document.copy() for _ in range(10)]
|
||||
|
||||
def before(self):
|
||||
self.corpus = self.client.perftest.create_collection("corpus")
|
||||
|
||||
class TestLargeDocBulkInsert(LargeDocInsertTest, unittest.TestCase):
|
||||
def do_task(self):
|
||||
self.corpus.insert_many(self.documents, ordered=True)
|
||||
|
||||
|
||||
class TestGridFsUpload(PerformanceTest, unittest.TestCase):
|
||||
data_size = 52428800
|
||||
|
||||
class GridFsTest(PerformanceTest):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
self.client = client_context.client
|
||||
self.client.drop_database("perftest")
|
||||
|
||||
@ -343,44 +312,33 @@ class TestGridFsUpload(PerformanceTest, unittest.TestCase):
|
||||
)
|
||||
with open(gridfs_path, "rb") as data:
|
||||
self.document = data.read()
|
||||
|
||||
self.data_size = len(self.document)
|
||||
self.bucket = GridFSBucket(self.client.perftest)
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
self.client.drop_database("perftest")
|
||||
|
||||
|
||||
class TestGridFsUpload(GridFsTest, unittest.TestCase):
|
||||
def before(self):
|
||||
# Create the bucket.
|
||||
self.bucket.upload_from_stream("init", b"x")
|
||||
|
||||
def do_task(self):
|
||||
self.bucket.upload_from_stream("gridfstest", self.document)
|
||||
|
||||
|
||||
class TestGridFsDownload(PerformanceTest, unittest.TestCase):
|
||||
data_size = 52428800
|
||||
|
||||
class TestGridFsDownload(GridFsTest, unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.client = client_context.client
|
||||
self.client.drop_database("perftest")
|
||||
|
||||
gridfs_path = os.path.join(
|
||||
TEST_PATH, os.path.join("single_and_multi_document", "gridfs_large.bin")
|
||||
)
|
||||
|
||||
self.bucket = GridFSBucket(self.client.perftest)
|
||||
with open(gridfs_path, "rb") as gfile:
|
||||
self.uploaded_id = self.bucket.upload_from_stream("gridfstest", gfile)
|
||||
|
||||
def tearDown(self):
|
||||
super().tearDown()
|
||||
self.client.drop_database("perftest")
|
||||
super().setUp()
|
||||
self.uploaded_id = self.bucket.upload_from_stream("gridfstest", self.document)
|
||||
|
||||
def do_task(self):
|
||||
self.bucket.open_download_stream(self.uploaded_id).read()
|
||||
|
||||
|
||||
proc_client = None
|
||||
proc_client: Optional[MongoClient] = None
|
||||
|
||||
|
||||
def proc_init(*dummy):
|
||||
|
||||
Loading…
Reference in New Issue
Block a user