From ec35f7f76e5b7a7f4039de37e0f51bdbcc8c2fdc Mon Sep 17 00:00:00 2001 From: Shane Harvey Date: Fri, 17 Nov 2023 12:07:33 -0800 Subject: [PATCH] PYTHON-3823 Audit benchmark data_size and calculate dynamically it where possible (#1439) --- test/performance/perf_test.py | 144 ++++++++++++---------------------- 1 file changed, 51 insertions(+), 93 deletions(-) diff --git a/test/performance/perf_test.py b/test/performance/perf_test.py index 6aabb595e..ec3fb0bd4 100644 --- a/test/performance/perf_test.py +++ b/test/performance/perf_test.py @@ -21,7 +21,7 @@ import sys import tempfile import time import warnings -from typing import Any, List +from typing import Any, List, Optional try: import simplejson as json @@ -70,9 +70,8 @@ class Timer: class PerformanceTest: - dataset: Any - data_size: Any - do_task: Any + dataset: str + data_size: int fail: Any @classmethod @@ -87,7 +86,9 @@ class PerformanceTest: name = self.__class__.__name__[4:] median = self.percentile(50) megabytes_per_sec = self.data_size / median / 1000000 - print(f"Running {self.__class__.__name__}. MEDIAN={self.percentile(50)}") + print( + f"Running {self.__class__.__name__}. MB/s={megabytes_per_sec}, MEDIAN={self.percentile(50)}" + ) result_data.append( { "info": { @@ -105,6 +106,9 @@ class PerformanceTest: def before(self): pass + def do_task(self): + raise NotImplementedError + def after(self): pass @@ -120,12 +124,13 @@ class PerformanceTest: def runTest(self): results = [] start = time.monotonic() - self.max_iterations = NUM_ITERATIONS for i in range(NUM_ITERATIONS): if time.monotonic() - start > MAX_ITERATION_TIME: with warnings.catch_warnings(): warnings.simplefilter("default") - warnings.warn("Test timed out, completed %s iterations." % i) + warnings.warn( + f"Test timed out after {MAX_ITERATION_TIME}s, completed {i}/{NUM_ITERATIONS} iterations." + ) break self.before() with Timer() as timer: @@ -142,6 +147,7 @@ class BsonEncodingTest(PerformanceTest): # Location of test data. with open(os.path.join(TEST_PATH, os.path.join("extended_bson", self.dataset))) as data: self.document = loads(data.read()) + self.data_size = len(encode(self.document)) * NUM_DOCS def do_task(self): for _ in range(NUM_DOCS): @@ -154,6 +160,8 @@ class BsonDecodingTest(PerformanceTest): with open(os.path.join(TEST_PATH, os.path.join("extended_bson", self.dataset))) as data: self.document = encode(json.loads(data.read())) + self.data_size = len(self.document) * NUM_DOCS + def do_task(self): for _ in range(NUM_DOCS): decode(self.document) @@ -161,37 +169,31 @@ class BsonDecodingTest(PerformanceTest): class TestFlatEncoding(BsonEncodingTest, unittest.TestCase): dataset = "flat_bson.json" - data_size = 75310000 class TestFlatDecoding(BsonDecodingTest, unittest.TestCase): dataset = "flat_bson.json" - data_size = 75310000 class TestDeepEncoding(BsonEncodingTest, unittest.TestCase): dataset = "deep_bson.json" - data_size = 19640000 class TestDeepDecoding(BsonDecodingTest, unittest.TestCase): dataset = "deep_bson.json" - data_size = 19640000 class TestFullEncoding(BsonEncodingTest, unittest.TestCase): dataset = "full_bson.json" - data_size = 57340000 class TestFullDecoding(BsonDecodingTest, unittest.TestCase): dataset = "full_bson.json" - data_size = 57340000 # SINGLE-DOC BENCHMARKS class TestRunCommand(PerformanceTest, unittest.TestCase): - data_size = 160000 + data_size = len(encode({"hello": True})) * NUM_DOCS def setUp(self): self.client = client_context.client @@ -200,7 +202,7 @@ class TestRunCommand(PerformanceTest, unittest.TestCase): def do_task(self): command = self.client.perftest.command for _ in range(NUM_DOCS): - command("ping") + command("hello", True) class TestDocument(PerformanceTest): @@ -225,23 +227,17 @@ class TestDocument(PerformanceTest): self.client.perftest.drop_collection("corpus") -class TestFindOneByID(TestDocument, unittest.TestCase): - data_size = 16220000 +class FindTest(TestDocument): + dataset = "tweet.json" def setUp(self): - self.dataset = "tweet.json" super().setUp() - + self.data_size = len(encode(self.document)) * NUM_DOCS documents = [self.document.copy() for _ in range(NUM_DOCS)] self.corpus = self.client.perftest.corpus result = self.corpus.insert_many(documents) self.inserted_ids = result.inserted_ids - def do_task(self): - find_one = self.corpus.find_one - for _id in self.inserted_ids: - find_one({"_id": _id}) - def before(self): pass @@ -249,30 +245,40 @@ class TestFindOneByID(TestDocument, unittest.TestCase): pass -class TestSmallDocInsertOne(TestDocument, unittest.TestCase): - data_size = 2750000 +class TestFindOneByID(FindTest, unittest.TestCase): + def do_task(self): + find_one = self.corpus.find_one + for _id in self.inserted_ids: + find_one({"_id": _id}) + + +class SmallDocInsertTest(TestDocument): + dataset = "small_doc.json" def setUp(self): - self.dataset = "small_doc.json" super().setUp() - + self.data_size = len(encode(self.document)) * NUM_DOCS self.documents = [self.document.copy() for _ in range(NUM_DOCS)] + +class TestSmallDocInsertOne(SmallDocInsertTest, unittest.TestCase): def do_task(self): insert_one = self.corpus.insert_one for doc in self.documents: insert_one(doc) -class TestLargeDocInsertOne(TestDocument, unittest.TestCase): - data_size = 27310890 +class LargeDocInsertTest(TestDocument): + dataset = "large_doc.json" def setUp(self): - self.dataset = "large_doc.json" super().setUp() + n_docs = 10 + self.data_size = len(encode(self.document)) * n_docs + self.documents = [self.document.copy() for _ in range(n_docs)] - self.documents = [self.document.copy() for _ in range(10)] +class TestLargeDocInsertOne(LargeDocInsertTest, unittest.TestCase): def do_task(self): insert_one = self.corpus.insert_one for doc in self.documents: @@ -280,61 +286,24 @@ class TestLargeDocInsertOne(TestDocument, unittest.TestCase): # MULTI-DOC BENCHMARKS -class TestFindManyAndEmptyCursor(TestDocument, unittest.TestCase): - data_size = 16220000 - - def setUp(self): - self.dataset = "tweet.json" - super().setUp() - - for _ in range(10): - self.client.perftest.command("insert", "corpus", documents=[self.document] * 1000) - self.corpus = self.client.perftest.corpus - +class TestFindManyAndEmptyCursor(FindTest, unittest.TestCase): def do_task(self): list(self.corpus.find()) - def before(self): - pass - - def after(self): - pass - - -class TestSmallDocBulkInsert(TestDocument, unittest.TestCase): - data_size = 2750000 - - def setUp(self): - self.dataset = "small_doc.json" - super().setUp() - self.documents = [self.document.copy() for _ in range(NUM_DOCS)] - - def before(self): - self.corpus = self.client.perftest.create_collection("corpus") +class TestSmallDocBulkInsert(SmallDocInsertTest, unittest.TestCase): def do_task(self): self.corpus.insert_many(self.documents, ordered=True) -class TestLargeDocBulkInsert(TestDocument, unittest.TestCase): - data_size = 27310890 - - def setUp(self): - self.dataset = "large_doc.json" - super().setUp() - self.documents = [self.document.copy() for _ in range(10)] - - def before(self): - self.corpus = self.client.perftest.create_collection("corpus") - +class TestLargeDocBulkInsert(LargeDocInsertTest, unittest.TestCase): def do_task(self): self.corpus.insert_many(self.documents, ordered=True) -class TestGridFsUpload(PerformanceTest, unittest.TestCase): - data_size = 52428800 - +class GridFsTest(PerformanceTest): def setUp(self): + super().setUp() self.client = client_context.client self.client.drop_database("perftest") @@ -343,44 +312,33 @@ class TestGridFsUpload(PerformanceTest, unittest.TestCase): ) with open(gridfs_path, "rb") as data: self.document = data.read() - + self.data_size = len(self.document) self.bucket = GridFSBucket(self.client.perftest) def tearDown(self): super().tearDown() self.client.drop_database("perftest") + +class TestGridFsUpload(GridFsTest, unittest.TestCase): def before(self): + # Create the bucket. self.bucket.upload_from_stream("init", b"x") def do_task(self): self.bucket.upload_from_stream("gridfstest", self.document) -class TestGridFsDownload(PerformanceTest, unittest.TestCase): - data_size = 52428800 - +class TestGridFsDownload(GridFsTest, unittest.TestCase): def setUp(self): - self.client = client_context.client - self.client.drop_database("perftest") - - gridfs_path = os.path.join( - TEST_PATH, os.path.join("single_and_multi_document", "gridfs_large.bin") - ) - - self.bucket = GridFSBucket(self.client.perftest) - with open(gridfs_path, "rb") as gfile: - self.uploaded_id = self.bucket.upload_from_stream("gridfstest", gfile) - - def tearDown(self): - super().tearDown() - self.client.drop_database("perftest") + super().setUp() + self.uploaded_id = self.bucket.upload_from_stream("gridfstest", self.document) def do_task(self): self.bucket.open_download_stream(self.uploaded_id).read() -proc_client = None +proc_client: Optional[MongoClient] = None def proc_init(*dummy):