PYTHON-1337 Add __slots__ to commonly used bson classes (#739)

This commit is contained in:
Julius Park 2021-09-24 13:54:40 -07:00 committed by GitHub
parent 4b447365d1
commit c38085269b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 158 additions and 8 deletions

40
bson/_helpers.py Normal file
View File

@ -0,0 +1,40 @@
# Copyright 2021-present MongoDB, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Setstate and getstate functions for objects with __slots__, allowing
compatibility with default pickling protocol
"""
def _setstate_slots(self, state):
for slot, value in state.items():
setattr(self, slot, value)
def _mangle_name(name, prefix):
if name.startswith("__"):
prefix = "_"+prefix
else:
prefix = ""
return prefix + name
def _getstate_slots(self):
prefix = self.__class__.__name__
ret = dict()
for name in self.__slots__:
mangled_name = _mangle_name(name, prefix)
if hasattr(self, mangled_name):
ret[mangled_name] = getattr(self, mangled_name)
return ret

View File

@ -17,12 +17,14 @@
from copy import deepcopy
from bson.son import SON
from bson._helpers import _getstate_slots, _setstate_slots
class DBRef(object):
"""A reference to a document stored in MongoDB.
"""
__slots__ = "__collection", "__id", "__database", "__kwargs"
__getstate__ = _getstate_slots
__setstate__ = _setstate_slots
# DBRef isn't actually a BSON "type" so this number was arbitrarily chosen.
_type_marker = 100
@ -81,12 +83,6 @@ class DBRef(object):
except KeyError:
raise AttributeError(key)
# Have to provide __setstate__ to avoid
# infinite recursion since we override
# __getattr__.
def __setstate__(self, state):
self.__dict__.update(state)
def as_doc(self):
"""Get the SON document representation of this DBRef.

View File

@ -24,5 +24,12 @@ class Int64(int):
:Parameters:
- `value`: the numeric value to represent
"""
__slots__ = ()
_type_marker = 18
def __getstate__(self):
return {}
def __setstate__(self, state):
pass

View File

@ -18,9 +18,16 @@
class MaxKey(object):
"""MongoDB internal MaxKey type."""
__slots__ = ()
_type_marker = 127
def __getstate__(self):
return {}
def __setstate__(self, state):
pass
def __eq__(self, other):
return isinstance(other, MaxKey)

View File

@ -18,9 +18,16 @@
class MinKey(object):
"""MongoDB internal MinKey type."""
__slots__ = ()
_type_marker = 255
def __getstate__(self):
return {}
def __setstate__(self, state):
pass
def __eq__(self, other):
return isinstance(other, MinKey)

View File

@ -18,6 +18,7 @@
import re
from bson.son import RE_TYPE
from bson._helpers import _getstate_slots, _setstate_slots
def str_flags_to_int(str_flags):
@ -40,6 +41,11 @@ def str_flags_to_int(str_flags):
class Regex(object):
"""BSON regular expression data."""
__slots__ = ("pattern", "flags")
__getstate__ = _getstate_slots
__setstate__ = _setstate_slots
_type_marker = 11
@classmethod

View File

@ -19,6 +19,7 @@ import calendar
import datetime
from bson.tz_util import utc
from bson._helpers import _getstate_slots, _setstate_slots
UPPERBOUND = 4294967296
@ -26,6 +27,10 @@ UPPERBOUND = 4294967296
class Timestamp(object):
"""MongoDB internal timestamps used in the opLog.
"""
__slots__ = ("__time", "__inc")
__getstate__ = _getstate_slots
__setstate__ = _setstate_slots
_type_marker = 17

View File

@ -143,6 +143,11 @@ Breaking Changes in 4.0
opposed to
the previous syntax which was simply ``if collection:`` or ``if database:``.
You must now explicitly compare with None.
- Classes :class:`~bson.int64.Int64`, :class:`~bson.min_key.MinKey`,
:class:`~bson.max_key.MaxKey`, :class:`~bson.timestamp.Timestamp`,
:class:`~bson.regex.Regex`, and :class:`~bson.dbref.DBRef` all implement
``__slots__`` now. This means that their attributes are fixed, and new
attributes cannot be added to them at runtime.
- Empty projections (eg {} or []) for
:meth:`~pymongo.collection.Collection.find`, and
:meth:`~pymongo.collection.Collection.find_one`

View File

@ -840,3 +840,12 @@ The default uuid_representation for :class:`~bson.codec_options.CodecOptions`,
:data:`bson.binary.UuidRepresentation.UNSPECIFIED`. Attempting to encode a
:class:`uuid.UUID` instance to BSON or JSON now produces an error by default.
See :ref:`handling-uuid-data-example` for details.
Additional BSON classes implement ``__slots__``
...............................................
:class:`~bson.int64.Int64`, :class:`~bson.min_key.MinKey`,
:class:`~bson.max_key.MaxKey`, :class:`~bson.timestamp.Timestamp`,
:class:`~bson.regex.Regex`, and :class:`~bson.dbref.DBRef` now implement
``__slots__`` to reduce memory usage. This means that their attributes are fixed, and new
attributes cannot be added to the object at runtime.

View File

@ -25,6 +25,7 @@ import re
import sys
import tempfile
import uuid
import pickle
from collections import abc, OrderedDict
from io import BytesIO
@ -1053,6 +1054,73 @@ class TestCodecOptions(unittest.TestCase):
self.assertRaises(InvalidBSON, decode, invalid_both, CodecOptions(
unicode_decode_error_handler="junk"))
def round_trip_pickle(self, obj, pickled_with_older):
pickled_with_older_obj = pickle.loads(pickled_with_older)
for protocol in range(pickle.HIGHEST_PROTOCOL + 1):
pkl = pickle.dumps(obj, protocol=protocol)
obj2 = pickle.loads(pkl)
self.assertEqual(obj, obj2)
self.assertEqual(pickled_with_older_obj, obj2)
def test_regex_pickling(self):
reg = Regex(".?")
pickled_with_3 = (b'\x80\x04\x959\x00\x00\x00\x00\x00\x00\x00\x8c\n'
b'bson.regex\x94\x8c\x05Regex\x94\x93\x94)\x81\x94}'
b'\x94(\x8c\x07pattern\x94\x8c\x02.?\x94\x8c\x05flag'
b's\x94K\x00ub.')
self.round_trip_pickle(reg, pickled_with_3)
def test_timestamp_pickling(self):
ts = Timestamp(0, 1)
pickled_with_3 = (b'\x80\x04\x95Q\x00\x00\x00\x00\x00\x00\x00\x8c'
b'\x0ebson.timestamp\x94\x8c\tTimestamp\x94\x93\x94)'
b'\x81\x94}\x94('
b'\x8c\x10_Timestamp__time\x94K\x00\x8c'
b'\x0f_Timestamp__inc\x94K\x01ub.')
self.round_trip_pickle(ts, pickled_with_3)
def test_dbref_pickling(self):
dbr = DBRef("foo", 5)
pickled_with_3 = (b'\x80\x04\x95q\x00\x00\x00\x00\x00\x00\x00\x8c\n'
b'bson.dbref\x94\x8c\x05DBRef\x94\x93\x94)\x81\x94}'
b'\x94(\x8c\x12_DBRef__collection\x94\x8c\x03foo\x94'
b'\x8c\n_DBRef__id\x94K\x05\x8c\x10_DBRef__database'
b'\x94N\x8c\x0e_DBRef__kwargs\x94}\x94ub.')
self.round_trip_pickle(dbr, pickled_with_3)
dbr = DBRef("foo", 5, database='db', kwargs1=None)
pickled_with_3 = (b'\x80\x04\x95\x81\x00\x00\x00\x00\x00\x00\x00\x8c'
b'\nbson.dbref\x94\x8c\x05DBRef\x94\x93\x94)\x81\x94}'
b'\x94(\x8c\x12_DBRef__collection\x94\x8c\x03foo\x94'
b'\x8c\n_DBRef__id\x94K\x05\x8c\x10_DBRef__database'
b'\x94\x8c\x02db\x94\x8c\x0e_DBRef__kwargs\x94}\x94'
b'\x8c\x07kwargs1\x94Nsub.')
self.round_trip_pickle(dbr, pickled_with_3)
def test_minkey_pickling(self):
mink = MinKey()
pickled_with_3 = (b'\x80\x04\x95\x1e\x00\x00\x00\x00\x00\x00\x00\x8c'
b'\x0cbson.min_key\x94\x8c\x06MinKey\x94\x93\x94)'
b'\x81\x94.')
self.round_trip_pickle(mink, pickled_with_3)
def test_maxkey_pickling(self):
maxk = MaxKey()
pickled_with_3 = (b'\x80\x04\x95\x1e\x00\x00\x00\x00\x00\x00\x00\x8c'
b'\x0cbson.max_key\x94\x8c\x06MaxKey\x94\x93\x94)'
b'\x81\x94.')
self.round_trip_pickle(maxk, pickled_with_3)
def test_int64_pickling(self):
i64 = Int64(9)
pickled_with_3 = (b'\x80\x04\x95\x1e\x00\x00\x00\x00\x00\x00\x00\x8c\n'
b'bson.int64\x94\x8c\x05Int64\x94\x93\x94K\t\x85\x94'
b'\x81\x94.')
self.round_trip_pickle(i64, pickled_with_3)
if __name__ == "__main__":
unittest.main()