From 651aa6aa98641c49c4dbbff5ab83f28904ee3baf Mon Sep 17 00:00:00 2001 From: Prashant Mital Date: Mon, 24 Feb 2020 13:35:28 -0800 Subject: [PATCH] PYTHON-2035: support for allowDiskUse in find() commands --- doc/changelog.rst | 2 + pymongo/collection.py | 64 +++++++++++++---------- pymongo/cursor.py | 30 ++++++++++- pymongo/message.py | 20 ++++++-- test/crud/v2/find-allowdiskuse.json | 78 +++++++++++++++++++++++++++++ test/test_cursor.py | 12 +++++ 6 files changed, 173 insertions(+), 33 deletions(-) create mode 100644 test/crud/v2/find-allowdiskuse.json diff --git a/doc/changelog.rst b/doc/changelog.rst index e9d57304b..9aedb72fc 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -6,6 +6,8 @@ Changes in Version 3.11.0 Version 3.11 adds support for MongoDB 4.4. Highlights include: +- Added the ``allow_disk_use`` parameters to + :meth:`pymongo.collection.Collection.find`. - Support for :ref:`OCSP` (Online Certificate Status Protocol) - Support for `PyOpenSSL `_ as an alternative TLS implementation. PyOpenSSL is required for :ref:`OCSP` diff --git a/pymongo/collection.py b/pymongo/collection.py index 502c9533d..60889fd9a 100644 --- a/pymongo/collection.py +++ b/pymongo/collection.py @@ -1412,6 +1412,11 @@ class Collection(common.BaseObject): - `modifiers` (optional): **DEPRECATED** - A dict specifying additional MongoDB query modifiers. Use the keyword arguments listed above instead. + - `allow_disk_use` (optional): if True, MongoDB may use temporary + disk files to store data exceeding the system memory limit while + processing a blocking sort operation. The option has no effect if + MongoDB can satisfy the specified sort using an index, or if the + blocking sort requires less memory than the 100 MiB limit. .. note:: There are a number of caveats to using :attr:`~pymongo.cursor.CursorType.EXHAUST` as cursor_type: @@ -1429,48 +1434,55 @@ class Collection(common.BaseObject): connection will be closed and discarded without being returned to the connection pool. - .. versionchanged:: 3.7 - Deprecated the `snapshot` option, which is deprecated in MongoDB - 3.6 and removed in MongoDB 4.0. - Deprecated the `max_scan` option. Support for this option is - deprecated in MongoDB 4.0. Use `max_time_ms` instead to limit server - side execution time. + .. versionchanged:: 3.11 + Added the ``allow_disk_use`` option. + .. versionchanged:: 3.7 + Deprecated the ``snapshot`` option, which is deprecated in MongoDB + 3.6 and removed in MongoDB 4.0. + Deprecated the ``max_scan`` option. Support for this option is + deprecated in MongoDB 4.0. Use ``max_time_ms`` instead to limit + server-side execution time. .. versionchanged:: 3.6 Added ``session`` parameter. .. versionchanged:: 3.5 - Added the options `return_key`, `show_record_id`, `snapshot`, - `hint`, `max_time_ms`, `max_scan`, `min`, `max`, and `comment`. - Deprecated the option `modifiers`. + Added the options ``return_key``, ``show_record_id``, ``snapshot``, + ``hint``, ``max_time_ms``, ``max_scan``, ``min``, ``max``, and + ``comment``. + Deprecated the ``modifiers`` option. .. versionchanged:: 3.4 - Support the `collation` option. + Added support for the ``collation`` option. .. versionchanged:: 3.0 - Changed the parameter names `spec`, `fields`, `timeout`, and - `partial` to `filter`, `projection`, `no_cursor_timeout`, and - `allow_partial_results` respectively. - Added the `cursor_type`, `oplog_replay`, and `modifiers` options. - Removed the `network_timeout`, `read_preference`, `tag_sets`, - `secondary_acceptable_latency_ms`, `max_scan`, `snapshot`, - `tailable`, `await_data`, `exhaust`, `as_class`, and slave_okay - parameters. Removed `compile_re` option: PyMongo now always + Changed the parameter names ``spec``, ``fields``, ``timeout``, and + ``partial`` to ``filter``, ``projection``, ``no_cursor_timeout``, + and ``allow_partial_results`` respectively. + Added the ``cursor_type``, ``oplog_replay``, and ``modifiers`` + options. + Removed the ``network_timeout``, ``read_preference``, ``tag_sets``, + ``secondary_acceptable_latency_ms``, ``max_scan``, ``snapshot``, + ``tailable``, ``await_data``, ``exhaust``, ``as_class``, and + slave_okay parameters. + Removed ``compile_re`` option: PyMongo now always represents BSON regular expressions as :class:`~bson.regex.Regex` objects. Use :meth:`~bson.regex.Regex.try_compile` to attempt to convert from a BSON regular expression to a Python regular - expression object. Soft deprecated the `manipulate` option. + expression object. + Soft deprecated the ``manipulate`` option. .. versionchanged:: 2.7 - Added `compile_re` option. If set to False, PyMongo represented BSON - regular expressions as :class:`~bson.regex.Regex` objects instead of - attempting to compile BSON regular expressions as Python native - regular expressions, thus preventing errors for some incompatible - patterns, see `PYTHON-500`_. + Added ``compile_re`` option. If set to False, PyMongo represented + BSON regular expressions as :class:`~bson.regex.Regex` objects + instead of attempting to compile BSON regular expressions as Python + native regular expressions, thus preventing errors for some + incompatible patterns, see `PYTHON-500`_. - .. versionadded:: 2.3 - The `tag_sets` and `secondary_acceptable_latency_ms` parameters. + .. versionchanged:: 2.3 + Added the ``tag_sets`` and ``secondary_acceptable_latency_ms`` + parameters. .. _PYTHON-500: https://jira.mongodb.org/browse/PYTHON-500 diff --git a/pymongo/cursor.py b/pymongo/cursor.py index 5de40518c..52ed1c0c5 100644 --- a/pymongo/cursor.py +++ b/pymongo/cursor.py @@ -114,7 +114,8 @@ class Cursor(object): modifiers=None, batch_size=0, manipulate=True, collation=None, hint=None, max_scan=None, max_time_ms=None, max=None, min=None, return_key=False, show_record_id=False, - snapshot=False, comment=None, session=None): + snapshot=False, comment=None, session=None, + allow_disk_use=None): """Create a new cursor. Should not be called directly by application developers - see @@ -159,6 +160,9 @@ class Cursor(object): raise TypeError("batch_size must be an integer") if batch_size < 0: raise ValueError("batch_size must be >= 0") + # Only set if allow_disk_use is provided by the user, else None. + if allow_disk_use is not None: + allow_disk_use = validate_boolean("allow_disk_use", allow_disk_use) if projection is not None: if not projection: @@ -184,6 +188,7 @@ class Cursor(object): self.__collation = validate_collation_or_none(collation) self.__return_key = return_key self.__show_record_id = show_record_id + self.__allow_disk_use = allow_disk_use self.__snapshot = snapshot self.__set_hint(hint) @@ -426,6 +431,26 @@ class Cursor(object): self.__query_flags &= ~mask return self + def allow_disk_use(self, allow_disk_use): + """Specifies whether MongoDB can use temporary disk files while + processing a blocking sort operation. + + Raises :exc:`TypeError` is `allow_disk_use` is not a boolean. + + :Parameters: + - `allow_disk_use`: if True, MongoDB may use temporary + disk files to store data exceeding the system memory limit while + processing a blocking sort operation. + + .. versionadded:: 3.11 + """ + if not isinstance(allow_disk_use, bool): + raise TypeError('allow_disk_use must be a bool') + self.__check_okay_to_chain() + + self.__allow_disk_use = allow_disk_use + return self + def limit(self, limit): """Limits the number of results to be returned by this cursor. @@ -1069,7 +1094,8 @@ class Cursor(object): self.__read_concern, self.__collation, self.__session, - self.__collection.database.client) + self.__collection.database.client, + self.__allow_disk_use) self.__send_message(q) elif self.__id: # Get More if self.__limit: diff --git a/pymongo/message.py b/pymongo/message.py index 1f34efa95..9efb835c1 100644 --- a/pymongo/message.py +++ b/pymongo/message.py @@ -182,7 +182,8 @@ _MODIFIERS = SON([ def _gen_find_command(coll, spec, projection, skip, limit, batch_size, options, - read_concern, collation=None, session=None): + read_concern, collation=None, session=None, + allow_disk_use=None): """Generate a find command document.""" cmd = SON([('find', coll)]) if '$query' in spec: @@ -209,10 +210,13 @@ def _gen_find_command(coll, spec, projection, skip, limit, batch_size, options, cmd['readConcern'] = read_concern.document if collation: cmd['collation'] = collation + if allow_disk_use is not None: + cmd['allowDiskUse'] = allow_disk_use if options: cmd.update([(opt, True) for opt, val in _OPTIONS.items() if options & val]) + return cmd @@ -233,7 +237,7 @@ class _Query(object): __slots__ = ('flags', 'db', 'coll', 'ntoskip', 'spec', 'fields', 'codec_options', 'read_preference', 'limit', 'batch_size', 'name', 'read_concern', 'collation', - 'session', 'client', '_as_command') + 'session', 'client', 'allow_disk_use', '_as_command') # For compatibility with the _GetMore class. exhaust_mgr = None @@ -241,7 +245,8 @@ class _Query(object): def __init__(self, flags, db, coll, ntoskip, spec, fields, codec_options, read_preference, limit, - batch_size, read_concern, collation, session, client): + batch_size, read_concern, collation, session, client, + allow_disk_use): self.flags = flags self.db = db self.coll = coll @@ -256,6 +261,7 @@ class _Query(object): self.collation = collation self.session = session self.client = client + self.allow_disk_use = allow_disk_use self.name = 'find' self._as_command = None @@ -279,6 +285,10 @@ class _Query(object): 'Specifying a collation is unsupported with a max wire ' 'version of %d.' % (sock_info.max_wire_version,)) + if sock_info.max_wire_version < 4 and self.allow_disk_use is not None: + # Ignore allowDiskUse for MongoDB < 3.2. + self.allow_disk_use = None + sock_info.validate_session(self.client, self.session) return use_find_cmd @@ -294,7 +304,7 @@ class _Query(object): cmd = _gen_find_command( self.coll, self.spec, self.fields, self.ntoskip, self.limit, self.batch_size, self.flags, self.read_concern, - self.collation, self.session) + self.collation, self.session, self.allow_disk_use) if explain: self.name = 'explain' cmd = SON([('explain', cmd)]) @@ -1629,7 +1639,7 @@ def _first_batch(sock_info, db, coll, query, ntoreturn, query = _Query( 0, db, coll, 0, query, None, codec_options, read_preference, ntoreturn, 0, DEFAULT_READ_CONCERN, None, None, - None) + None, None) name = next(iter(cmd)) publish = listeners.enabled_for_commands diff --git a/test/crud/v2/find-allowdiskuse.json b/test/crud/v2/find-allowdiskuse.json new file mode 100644 index 000000000..2df4dbc98 --- /dev/null +++ b/test/crud/v2/find-allowdiskuse.json @@ -0,0 +1,78 @@ +{ + "runOn": [ + { + "minServerVersion": "4.3.1" + } + ], + "collection_name": "test_find_allowdiskuse", + "tests": [ + { + "description": "Find does not send allowDiskuse when value is not specified", + "operations": [ + { + "object": "collection", + "name": "find", + "arguments": { + "filter": {} + } + } + ], + "expectations": [ + { + "command_started_event": { + "command": { + "find": "test_find_allowdiskuse", + "allowDiskUse": null + } + } + } + ] + }, + { + "description": "Find sends allowDiskuse false when false is specified", + "operations": [ + { + "object": "collection", + "name": "find", + "arguments": { + "filter": {}, + "allowDiskUse": false + } + } + ], + "expectations": [ + { + "command_started_event": { + "command": { + "find": "test_find_allowdiskuse", + "allowDiskUse": false + } + } + } + ] + }, + { + "description": "Find sends allowDiskUse true when true is specified", + "operations": [ + { + "object": "collection", + "name": "find", + "arguments": { + "filter": {}, + "allowDiskUse": true + } + } + ], + "expectations": [ + { + "command_started_event": { + "command": { + "find": "test_find_allowdiskuse", + "allowDiskUse": true + } + } + } + ] + } + ] +} \ No newline at end of file diff --git a/test/test_cursor.py b/test/test_cursor.py index 6c5f34afa..c04c58def 100644 --- a/test/test_cursor.py +++ b/test/test_cursor.py @@ -146,6 +146,18 @@ class TestCursor(IntegrationTest): self.assertEqual(0, cursor._Cursor__query_flags) self.assertFalse(cursor._Cursor__exhaust) + def test_allow_disk_use(self): + db = self.db + db.pymongo_test.drop() + coll = db.pymongo_test + + self.assertRaises(TypeError, coll.find().allow_disk_use, 'baz') + + cursor = coll.find().allow_disk_use(True) + self.assertEqual(True, cursor._Cursor__allow_disk_use) + cursor = coll.find().allow_disk_use(False) + self.assertEqual(False, cursor._Cursor__allow_disk_use) + def test_max_time_ms(self): db = self.db db.pymongo_test.drop()