PYTHON-1742 add postBatchResumeToken support
PYTHON-1815 add tests for postBatchResumeToken support PYTHON-1845 clarify resume token used in resuming and getResumeToken
This commit is contained in:
parent
010e8d429b
commit
ddac30d2ff
@ -62,6 +62,8 @@ Version 3.9 adds support for MongoDB 4.2. Highlights include:
|
||||
:meth:`~pymongo.operations.UpdateMany`.
|
||||
- :class:`~bson.binary.Binary` now supports any bytes-like type that implements
|
||||
the buffer protocol.
|
||||
- Resume tokens can now be accessed from a ``ChangeStream`` cursor using the
|
||||
:attr:`~pymongo.change_stream.ChangeStream.resume_token` attribute.
|
||||
|
||||
.. _URI options specification: https://github.com/mongodb/specifications/blob/master/source/uri-options/uri-options.rst
|
||||
|
||||
|
||||
@ -77,13 +77,16 @@ class ChangeStream(object):
|
||||
|
||||
self._pipeline = copy.deepcopy(pipeline)
|
||||
self._full_document = full_document
|
||||
self._resume_token = copy.deepcopy(resume_after)
|
||||
self._uses_start_after = start_after is not None
|
||||
self._uses_resume_after = resume_after is not None
|
||||
self._resume_token = copy.deepcopy(start_after or resume_after)
|
||||
self._max_await_time_ms = max_await_time_ms
|
||||
self._batch_size = batch_size
|
||||
self._collation = collation
|
||||
self._start_at_operation_time = start_at_operation_time
|
||||
self._session = session
|
||||
self._start_after = copy.deepcopy(start_after)
|
||||
|
||||
# Initialize cursor.
|
||||
self._cursor = self._create_cursor()
|
||||
|
||||
@property
|
||||
@ -102,10 +105,14 @@ class ChangeStream(object):
|
||||
options = {}
|
||||
if self._full_document is not None:
|
||||
options['fullDocument'] = self._full_document
|
||||
if self._resume_token is not None:
|
||||
options['resumeAfter'] = self._resume_token
|
||||
if self._start_after is not None:
|
||||
options['startAfter'] = self._start_after
|
||||
|
||||
resume_token = self.resume_token
|
||||
if resume_token is not None:
|
||||
if self._uses_start_after:
|
||||
options['startAfter'] = resume_token
|
||||
if self._uses_resume_after:
|
||||
options['resumeAfter'] = resume_token
|
||||
|
||||
if self._start_at_operation_time is not None:
|
||||
options['startAtOperationTime'] = self._start_at_operation_time
|
||||
return options
|
||||
@ -127,12 +134,18 @@ class ChangeStream(object):
|
||||
return full_pipeline
|
||||
|
||||
def _process_result(self, result, session, server, sock_info, slave_ok):
|
||||
"""Callback that records a change stream cursor's operationTime."""
|
||||
if (self._start_at_operation_time is None and
|
||||
self._resume_token is None and
|
||||
self._start_after is None and
|
||||
sock_info.max_wire_version >= 7):
|
||||
self._start_at_operation_time = result["operationTime"]
|
||||
"""Callback that caches the startAtOperationTime from a changeStream
|
||||
aggregate command response containing an empty batch of change
|
||||
documents.
|
||||
|
||||
This is implemented as a callback because we need access to the wire
|
||||
version in order to determine whether to cache this value.
|
||||
"""
|
||||
if not result['cursor']['firstBatch']:
|
||||
if (self._start_at_operation_time is None and
|
||||
self.resume_token is None and
|
||||
sock_info.max_wire_version >= 7):
|
||||
self._start_at_operation_time = result["operationTime"]
|
||||
|
||||
def _run_aggregation_cmd(self, session, explicit_session):
|
||||
"""Run the full aggregation pipeline for this ChangeStream and return
|
||||
@ -168,6 +181,15 @@ class ChangeStream(object):
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
@property
|
||||
def resume_token(self):
|
||||
"""The cached resume token that will be used to resume after the most
|
||||
recently returned change.
|
||||
|
||||
.. versionadded:: 3.9
|
||||
"""
|
||||
return copy.deepcopy(self._resume_token)
|
||||
|
||||
def next(self):
|
||||
"""Advance the cursor.
|
||||
|
||||
@ -249,10 +271,18 @@ class ChangeStream(object):
|
||||
self._resume()
|
||||
change = self._cursor._try_next(False)
|
||||
|
||||
# No changes are available.
|
||||
# If no changes are available.
|
||||
if change is None:
|
||||
return None
|
||||
# We have either iterated over all documents in the cursor,
|
||||
# OR the most-recently returned batch is empty. In either case,
|
||||
# update the cached resume token with the postBatchResumeToken if
|
||||
# one was returned. We also clear the startAtOperationTime.
|
||||
if self._cursor._post_batch_resume_token is not None:
|
||||
self._resume_token = self._cursor._post_batch_resume_token
|
||||
self._start_at_operation_time = None
|
||||
return change
|
||||
|
||||
# Else, changes are available.
|
||||
try:
|
||||
resume_token = change['_id']
|
||||
except KeyError:
|
||||
@ -260,9 +290,20 @@ class ChangeStream(object):
|
||||
raise InvalidOperation(
|
||||
"Cannot provide resume functionality when the resume "
|
||||
"token is missing.")
|
||||
self._resume_token = copy.copy(resume_token)
|
||||
|
||||
# If this is the last change document from the current batch, cache the
|
||||
# postBatchResumeToken.
|
||||
if (not self._cursor._has_next() and
|
||||
self._cursor._post_batch_resume_token):
|
||||
resume_token = self._cursor._post_batch_resume_token
|
||||
|
||||
# Hereafter, don't use startAfter; instead use resumeAfter.
|
||||
self._uses_start_after = False
|
||||
self._uses_resume_after = True
|
||||
|
||||
# Cache the resume token and clear startAtOperationTime.
|
||||
self._resume_token = resume_token
|
||||
self._start_at_operation_time = None
|
||||
self._start_after = None
|
||||
|
||||
if self._decode_custom:
|
||||
return _bson_to_dict(change.raw, self._orig_codec_options)
|
||||
|
||||
@ -14,18 +14,14 @@
|
||||
|
||||
"""CommandCursor class to iterate over command results."""
|
||||
|
||||
import datetime
|
||||
|
||||
from collections import deque
|
||||
|
||||
from bson.py3compat import integer_types
|
||||
from pymongo import helpers
|
||||
from pymongo.errors import (ConnectionFailure,
|
||||
InvalidOperation,
|
||||
NotMasterError,
|
||||
OperationFailure)
|
||||
from pymongo.message import (_convert_exception,
|
||||
_CursorAddress,
|
||||
from pymongo.message import (_CursorAddress,
|
||||
_GetMore,
|
||||
_RawBatchGetMore)
|
||||
|
||||
@ -43,8 +39,9 @@ class CommandCursor(object):
|
||||
"""
|
||||
self.__collection = collection
|
||||
self.__id = cursor_info['id']
|
||||
self.__address = address
|
||||
self.__data = deque(cursor_info['firstBatch'])
|
||||
self.__postbatchresumetoken = cursor_info.get('postBatchResumeToken')
|
||||
self.__address = address
|
||||
self.__batch_size = batch_size
|
||||
self.__max_await_time_ms = max_await_time_ms
|
||||
self.__session = session
|
||||
@ -119,6 +116,17 @@ class CommandCursor(object):
|
||||
self.__batch_size = batch_size == 1 and 2 or batch_size
|
||||
return self
|
||||
|
||||
def _has_next(self):
|
||||
"""Returns `True` if the cursor has documents remaining from the
|
||||
previous batch."""
|
||||
return len(self.__data) > 0
|
||||
|
||||
@property
|
||||
def _post_batch_resume_token(self):
|
||||
"""Retrieve the postBatchResumeToken from the response to a
|
||||
changeStream aggregate or getMore."""
|
||||
return self.__postbatchresumetoken
|
||||
|
||||
def __send_message(self, operation):
|
||||
"""Send a getmore message and handle the response.
|
||||
"""
|
||||
@ -157,6 +165,7 @@ class CommandCursor(object):
|
||||
if from_command:
|
||||
cursor = docs[0]['cursor']
|
||||
documents = cursor['nextBatch']
|
||||
self.__postbatchresumetoken = cursor.get('postBatchResumeToken')
|
||||
self.__id = cursor['id']
|
||||
else:
|
||||
documents = docs
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user