PYTHON-1742 add postBatchResumeToken support

PYTHON-1815 add tests for postBatchResumeToken support
PYTHON-1845 clarify resume token used in resuming and getResumeToken
This commit is contained in:
Prashant Mital 2019-06-21 17:31:45 -04:00
parent 010e8d429b
commit ddac30d2ff
No known key found for this signature in database
GPG Key ID: 3D2DAA9E483ABE51
4 changed files with 814 additions and 428 deletions

View File

@ -62,6 +62,8 @@ Version 3.9 adds support for MongoDB 4.2. Highlights include:
:meth:`~pymongo.operations.UpdateMany`.
- :class:`~bson.binary.Binary` now supports any bytes-like type that implements
the buffer protocol.
- Resume tokens can now be accessed from a ``ChangeStream`` cursor using the
:attr:`~pymongo.change_stream.ChangeStream.resume_token` attribute.
.. _URI options specification: https://github.com/mongodb/specifications/blob/master/source/uri-options/uri-options.rst

View File

@ -77,13 +77,16 @@ class ChangeStream(object):
self._pipeline = copy.deepcopy(pipeline)
self._full_document = full_document
self._resume_token = copy.deepcopy(resume_after)
self._uses_start_after = start_after is not None
self._uses_resume_after = resume_after is not None
self._resume_token = copy.deepcopy(start_after or resume_after)
self._max_await_time_ms = max_await_time_ms
self._batch_size = batch_size
self._collation = collation
self._start_at_operation_time = start_at_operation_time
self._session = session
self._start_after = copy.deepcopy(start_after)
# Initialize cursor.
self._cursor = self._create_cursor()
@property
@ -102,10 +105,14 @@ class ChangeStream(object):
options = {}
if self._full_document is not None:
options['fullDocument'] = self._full_document
if self._resume_token is not None:
options['resumeAfter'] = self._resume_token
if self._start_after is not None:
options['startAfter'] = self._start_after
resume_token = self.resume_token
if resume_token is not None:
if self._uses_start_after:
options['startAfter'] = resume_token
if self._uses_resume_after:
options['resumeAfter'] = resume_token
if self._start_at_operation_time is not None:
options['startAtOperationTime'] = self._start_at_operation_time
return options
@ -127,12 +134,18 @@ class ChangeStream(object):
return full_pipeline
def _process_result(self, result, session, server, sock_info, slave_ok):
"""Callback that records a change stream cursor's operationTime."""
if (self._start_at_operation_time is None and
self._resume_token is None and
self._start_after is None and
sock_info.max_wire_version >= 7):
self._start_at_operation_time = result["operationTime"]
"""Callback that caches the startAtOperationTime from a changeStream
aggregate command response containing an empty batch of change
documents.
This is implemented as a callback because we need access to the wire
version in order to determine whether to cache this value.
"""
if not result['cursor']['firstBatch']:
if (self._start_at_operation_time is None and
self.resume_token is None and
sock_info.max_wire_version >= 7):
self._start_at_operation_time = result["operationTime"]
def _run_aggregation_cmd(self, session, explicit_session):
"""Run the full aggregation pipeline for this ChangeStream and return
@ -168,6 +181,15 @@ class ChangeStream(object):
def __iter__(self):
return self
@property
def resume_token(self):
"""The cached resume token that will be used to resume after the most
recently returned change.
.. versionadded:: 3.9
"""
return copy.deepcopy(self._resume_token)
def next(self):
"""Advance the cursor.
@ -249,10 +271,18 @@ class ChangeStream(object):
self._resume()
change = self._cursor._try_next(False)
# No changes are available.
# If no changes are available.
if change is None:
return None
# We have either iterated over all documents in the cursor,
# OR the most-recently returned batch is empty. In either case,
# update the cached resume token with the postBatchResumeToken if
# one was returned. We also clear the startAtOperationTime.
if self._cursor._post_batch_resume_token is not None:
self._resume_token = self._cursor._post_batch_resume_token
self._start_at_operation_time = None
return change
# Else, changes are available.
try:
resume_token = change['_id']
except KeyError:
@ -260,9 +290,20 @@ class ChangeStream(object):
raise InvalidOperation(
"Cannot provide resume functionality when the resume "
"token is missing.")
self._resume_token = copy.copy(resume_token)
# If this is the last change document from the current batch, cache the
# postBatchResumeToken.
if (not self._cursor._has_next() and
self._cursor._post_batch_resume_token):
resume_token = self._cursor._post_batch_resume_token
# Hereafter, don't use startAfter; instead use resumeAfter.
self._uses_start_after = False
self._uses_resume_after = True
# Cache the resume token and clear startAtOperationTime.
self._resume_token = resume_token
self._start_at_operation_time = None
self._start_after = None
if self._decode_custom:
return _bson_to_dict(change.raw, self._orig_codec_options)

View File

@ -14,18 +14,14 @@
"""CommandCursor class to iterate over command results."""
import datetime
from collections import deque
from bson.py3compat import integer_types
from pymongo import helpers
from pymongo.errors import (ConnectionFailure,
InvalidOperation,
NotMasterError,
OperationFailure)
from pymongo.message import (_convert_exception,
_CursorAddress,
from pymongo.message import (_CursorAddress,
_GetMore,
_RawBatchGetMore)
@ -43,8 +39,9 @@ class CommandCursor(object):
"""
self.__collection = collection
self.__id = cursor_info['id']
self.__address = address
self.__data = deque(cursor_info['firstBatch'])
self.__postbatchresumetoken = cursor_info.get('postBatchResumeToken')
self.__address = address
self.__batch_size = batch_size
self.__max_await_time_ms = max_await_time_ms
self.__session = session
@ -119,6 +116,17 @@ class CommandCursor(object):
self.__batch_size = batch_size == 1 and 2 or batch_size
return self
def _has_next(self):
"""Returns `True` if the cursor has documents remaining from the
previous batch."""
return len(self.__data) > 0
@property
def _post_batch_resume_token(self):
"""Retrieve the postBatchResumeToken from the response to a
changeStream aggregate or getMore."""
return self.__postbatchresumetoken
def __send_message(self, operation):
"""Send a getmore message and handle the response.
"""
@ -157,6 +165,7 @@ class CommandCursor(object):
if from_command:
cursor = docs[0]['cursor']
documents = cursor['nextBatch']
self.__postbatchresumetoken = cursor.get('postBatchResumeToken')
self.__id = cursor['id']
else:
documents = docs

File diff suppressed because it is too large Load Diff