diff --git a/.evergreen/config.yml b/.evergreen/config.yml index 953579169..7119d13e9 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -1026,6 +1026,10 @@ axes: display_name: zlib compression variables: COMPRESSORS: "zlib" + - id: zstd + display_name: zstd compression + variables: + COMPRESSORS: "zstd" - id: python-version display_name: "Python" values: @@ -1283,15 +1287,22 @@ buildvariants: compression: "*" - python-version: ["jython2.7"] c-extensions: "*" - compression: "snappy" + compression: ["snappy", "zstd"] batchtime: 10080 # 7 days display_name: "${compression} ${c-extensions} ${python-version} (x86_64)" # Ubuntu 16.04 images have libsnappy-dev installed run_on: ubuntu1604-test tasks: - - "test-latest-standalone" - - "test-4.0-standalone" - - "test-3.6-standalone" + - "test-latest-standalone" + rules: + - if: + python-version: "*" + c-extensions: "*" + compression: ["snappy", "zlib"] + then: + add_tasks: + - "test-4.0-standalone" + - "test-3.6-standalone" - matrix_name: "tests-python-version-py37-plus-compression" matrix_spec: {"python-version-requires-openssl-102-plus": "*", "c-extensions": "*", "compression": "*"} @@ -1301,9 +1312,16 @@ buildvariants: # Ubuntu 16.04 images have libsnappy-dev installed, and provides OpenSSL 1.0.2 run_on: ubuntu1604-test tasks: - - "test-latest-standalone" - - "test-4.0-standalone" - - "test-3.6-standalone" + - "test-latest-standalone" + rules: + - if: + python-version: "*" + c-extensions: "*" + compression: ["snappy", "zlib"] + then: + add_tasks: + - "test-4.0-standalone" + - "test-3.6-standalone" - matrix_name: "tests-python-version-green-framework-rhel62" matrix_spec: {"python-version": "*", "green-framework": "*", auth-ssl: "*"} diff --git a/.evergreen/run-tests.sh b/.evergreen/run-tests.sh index eb2933a7b..c5233f687 100755 --- a/.evergreen/run-tests.sh +++ b/.evergreen/run-tests.sh @@ -56,6 +56,12 @@ elif [ $COMPRESSORS = "snappy" ]; then # 0.5.2 has issues in pypy3(.5) pip install python-snappy==0.5.1 PYTHON=python +elif [ $COMPRESSORS = "zstd" ]; then + $PYTHON_BINARY -m virtualenv --system-site-packages --never-download zstdtest + . zstdtest/bin/activate + trap "deactivate; rm -rf zstdtest" EXIT HUP + pip install zstandard + PYTHON=python else PYTHON="$PYTHON_BINARY" fi diff --git a/README.rst b/README.rst index 25d2f20f7..8361467c9 100644 --- a/README.rst +++ b/README.rst @@ -118,10 +118,15 @@ Wire protocol compression with snappy requires `python-snappy $ python -m pip install pymongo[snappy] +Wire protocol compression with zstandard requires `zstandard +`_:: + + $ python -m pip install pymongo[zstd] + You can install all dependencies automatically with the following command:: - $ python -m pip install pymongo[snappy,gssapi,srv,tls] + $ python -m pip install pymongo[snappy,gssapi,srv,tls,zstd] Other optional packages: diff --git a/doc/changelog.rst b/doc/changelog.rst index 12a863113..3ec906915 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -36,6 +36,7 @@ Version 3.9 adds support for MongoDB 4.2. Highlights include: - Support for retryable reads and the ``retryReads`` URI option which is enabled by default. See the :class:`~pymongo.mongo_client.MongoClient` documentation for details. +- Support zstandard for wire protocol compression. Now that supported operations are retried automatically and transparently, users should consider adjusting any custom retry logic to prevent diff --git a/doc/installation.rst b/doc/installation.rst index 989b5a8b6..c659f6c4a 100644 --- a/doc/installation.rst +++ b/doc/installation.rst @@ -75,10 +75,15 @@ Wire protocol compression with snappy requires `python-snappy $ python -m pip install pymongo[snappy] +Wire protocol compression with zstandard requires `zstandard +`_:: + + $ python -m pip install pymongo[zstd] + You can install all dependencies automatically with the following command:: - $ python -m pip install pymongo[snappy,gssapi,srv,tls] + $ python -m pip install pymongo[snappy,gssapi,srv,tls,zstd] Other optional packages: diff --git a/pymongo/compression_support.py b/pymongo/compression_support.py index f52d5e0d9..b6662f22f 100644 --- a/pymongo/compression_support.py +++ b/pymongo/compression_support.py @@ -28,9 +28,15 @@ except ImportError: # Python built without zlib support. _HAVE_ZLIB = False +try: + from zstandard import ZstdCompressor, ZstdDecompressor + _HAVE_ZSTD = True +except ImportError: + _HAVE_ZSTD = False + from pymongo.monitoring import _SENSITIVE_COMMANDS -_SUPPORTED_COMPRESSORS = set(["snappy", "zlib"]) +_SUPPORTED_COMPRESSORS = set(["snappy", "zlib", "zstd"]) _NO_COMPRESSION = set(['ismaster']) _NO_COMPRESSION.update(_SENSITIVE_COMMANDS) @@ -57,6 +63,11 @@ def validate_compressors(dummy, value): warnings.warn( "Wire protocol compression with zlib is not available. " "The zlib module is not available.") + elif compressor == "zstd" and not _HAVE_ZSTD: + compressors.remove(compressor) + warnings.warn( + "Wire protocol compression with zstandard is not available. " + "You must install the zstandard module for zstandard support.") return compressors @@ -83,6 +94,8 @@ class CompressionSettings(object): return SnappyContext() elif chosen == "zlib": return ZlibContext(self.zlib_compression_level) + elif chosen == "zstd": + return ZstdContext() def _zlib_no_compress(data): @@ -113,6 +126,16 @@ class ZlibContext(object): self.compress = lambda data: zlib.compress(data, level) +class ZstdContext(object): + compressor_id = 3 + + @staticmethod + def compress(data): + # ZstdCompressor is not thread safe. + # TODO: Use a pool? + return ZstdCompressor().compress(data) + + def decompress(data, compressor_id): if compressor_id == SnappyContext.compressor_id: # python-snappy doesn't support the buffer interface. @@ -126,5 +149,9 @@ def decompress(data, compressor_id): return snappy.uncompress(bytes(data)) elif compressor_id == ZlibContext.compressor_id: return zlib.decompress(data) + elif compressor_id == ZstdContext.compressor_id: + # ZstdDecompressor is not thread safe. + # TODO: Use a pool? + return ZstdDecompressor().decompress(data) else: raise ValueError("Unknown compressorId %d" % (compressor_id,)) diff --git a/pymongo/mongo_client.py b/pymongo/mongo_client.py index 6d94817cb..106a800a7 100644 --- a/pymongo/mongo_client.py +++ b/pymongo/mongo_client.py @@ -312,13 +312,15 @@ class MongoClient(common.BaseObject): https://docs.mongodb.com/manual/faq/diagnostics/#does-tcp-keepalive-time-affect-mongodb-deployments", - `compressors`: Comma separated list of compressors for wire protocol compression. The list is used to negotiate a compressor - with the server. Currently supported options are "snappy" and - "zlib". Support for snappy requires the + with the server. Currently supported options are "snappy", "zlib" + and "zstd". Support for snappy requires the `python-snappy `_ package. - zlib support requires the Python standard library zlib module. - By default no compression is used. Compression support must also be - enabled on the server. MongoDB 3.4+ supports snappy compression. - MongoDB 3.6+ supports snappy and zlib. + zlib support requires the Python standard library zlib module. zstd + requires the `zstandard `_ + package. By default no compression is used. Compression support + must also be enabled on the server. MongoDB 3.4+ supports snappy + compression. MongoDB 3.6 adds support for zlib. MongoDB 4.2 adds + support for zstd. - `zlibCompressionLevel`: (int) The zlib compression level to use when zlib is used as the wire protocol compressor. Supported values are -1 through 9. -1 tells the zlib library to use its default diff --git a/setup.py b/setup.py index 3dcb07763..6601b2aa4 100755 --- a/setup.py +++ b/setup.py @@ -317,7 +317,7 @@ ext_modules = [Extension('bson._cbson', sources=['pymongo/_cmessagemodule.c', 'bson/buffer.c'])] -extras_require = {'snappy': ["python-snappy"]} +extras_require = {'snappy': ["python-snappy"], 'zstd': ["zstandard"]} vi = sys.version_info if vi[0] == 2: extras_require.update( diff --git a/test/test_client.py b/test/test_client.py index aeea75942..e8c41583a 100644 --- a/test/test_client.py +++ b/test/test_client.py @@ -36,7 +36,7 @@ from bson.tz_util import utc from pymongo import auth, message from pymongo.common import _UUID_REPRESENTATIONS from pymongo.command_cursor import CommandCursor -from pymongo.compression_support import _HAVE_SNAPPY +from pymongo.compression_support import _HAVE_SNAPPY, _HAVE_ZSTD from pymongo.cursor import Cursor, CursorType from pymongo.database import Database from pymongo.errors import (AutoReconnect, @@ -1343,6 +1343,21 @@ class TestClient(IntegrationTest): opts = compression_settings(client) self.assertEqual(opts.compressors, ['snappy', 'zlib']) + if not _HAVE_ZSTD: + uri = "mongodb://localhost:27017/?compressors=zstd" + client = MongoClient(uri, connect=False) + opts = compression_settings(client) + self.assertEqual(opts.compressors, []) + else: + uri = "mongodb://localhost:27017/?compressors=zstd" + client = MongoClient(uri, connect=False) + opts = compression_settings(client) + self.assertEqual(opts.compressors, ['zstd']) + uri = "mongodb://localhost:27017/?compressors=zstd,zlib" + client = MongoClient(uri, connect=False) + opts = compression_settings(client) + self.assertEqual(opts.compressors, ['zstd', 'zlib']) + options = client_context.default_client_options if "compressors" in options and "zlib" in options["compressors"]: for level in range(-1, 10):