From c38085269bf615a53594db755dec3876bdfe0f6a Mon Sep 17 00:00:00 2001 From: Julius Park Date: Fri, 24 Sep 2021 13:54:40 -0700 Subject: [PATCH] PYTHON-1337 Add __slots__ to commonly used bson classes (#739) --- bson/_helpers.py | 40 ++++++++++++++++++++++ bson/dbref.py | 12 +++---- bson/int64.py | 7 ++++ bson/max_key.py | 7 ++++ bson/min_key.py | 7 ++++ bson/regex.py | 6 ++++ bson/timestamp.py | 5 +++ doc/changelog.rst | 5 +++ doc/migrate-to-pymongo4.rst | 9 +++++ test/test_bson.py | 68 +++++++++++++++++++++++++++++++++++++ 10 files changed, 158 insertions(+), 8 deletions(-) create mode 100644 bson/_helpers.py diff --git a/bson/_helpers.py b/bson/_helpers.py new file mode 100644 index 000000000..6449705eb --- /dev/null +++ b/bson/_helpers.py @@ -0,0 +1,40 @@ +# Copyright 2021-present MongoDB, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Setstate and getstate functions for objects with __slots__, allowing + compatibility with default pickling protocol +""" + + +def _setstate_slots(self, state): + for slot, value in state.items(): + setattr(self, slot, value) + + +def _mangle_name(name, prefix): + if name.startswith("__"): + prefix = "_"+prefix + else: + prefix = "" + return prefix + name + + +def _getstate_slots(self): + prefix = self.__class__.__name__ + ret = dict() + for name in self.__slots__: + mangled_name = _mangle_name(name, prefix) + if hasattr(self, mangled_name): + ret[mangled_name] = getattr(self, mangled_name) + return ret diff --git a/bson/dbref.py b/bson/dbref.py index 2edaf6902..24e97a669 100644 --- a/bson/dbref.py +++ b/bson/dbref.py @@ -17,12 +17,14 @@ from copy import deepcopy from bson.son import SON - +from bson._helpers import _getstate_slots, _setstate_slots class DBRef(object): """A reference to a document stored in MongoDB. """ - + __slots__ = "__collection", "__id", "__database", "__kwargs" + __getstate__ = _getstate_slots + __setstate__ = _setstate_slots # DBRef isn't actually a BSON "type" so this number was arbitrarily chosen. _type_marker = 100 @@ -81,12 +83,6 @@ class DBRef(object): except KeyError: raise AttributeError(key) - # Have to provide __setstate__ to avoid - # infinite recursion since we override - # __getattr__. - def __setstate__(self, state): - self.__dict__.update(state) - def as_doc(self): """Get the SON document representation of this DBRef. diff --git a/bson/int64.py b/bson/int64.py index 4fce5ad7b..fb9bfe914 100644 --- a/bson/int64.py +++ b/bson/int64.py @@ -24,5 +24,12 @@ class Int64(int): :Parameters: - `value`: the numeric value to represent """ + __slots__ = () _type_marker = 18 + + def __getstate__(self): + return {} + + def __setstate__(self, state): + pass diff --git a/bson/max_key.py b/bson/max_key.py index efdf5c78b..afd7fcb1b 100644 --- a/bson/max_key.py +++ b/bson/max_key.py @@ -18,9 +18,16 @@ class MaxKey(object): """MongoDB internal MaxKey type.""" + __slots__ = () _type_marker = 127 + def __getstate__(self): + return {} + + def __setstate__(self, state): + pass + def __eq__(self, other): return isinstance(other, MaxKey) diff --git a/bson/min_key.py b/bson/min_key.py index 7d2b3a6dd..bcb7f9e60 100644 --- a/bson/min_key.py +++ b/bson/min_key.py @@ -18,9 +18,16 @@ class MinKey(object): """MongoDB internal MinKey type.""" + __slots__ = () _type_marker = 255 + def __getstate__(self): + return {} + + def __setstate__(self, state): + pass + def __eq__(self, other): return isinstance(other, MinKey) diff --git a/bson/regex.py b/bson/regex.py index 3a9042500..5cf097f08 100644 --- a/bson/regex.py +++ b/bson/regex.py @@ -18,6 +18,7 @@ import re from bson.son import RE_TYPE +from bson._helpers import _getstate_slots, _setstate_slots def str_flags_to_int(str_flags): @@ -40,6 +41,11 @@ def str_flags_to_int(str_flags): class Regex(object): """BSON regular expression data.""" + __slots__ = ("pattern", "flags") + + __getstate__ = _getstate_slots + __setstate__ = _setstate_slots + _type_marker = 11 @classmethod diff --git a/bson/timestamp.py b/bson/timestamp.py index 5e497f4c8..69c061d2a 100644 --- a/bson/timestamp.py +++ b/bson/timestamp.py @@ -19,6 +19,7 @@ import calendar import datetime from bson.tz_util import utc +from bson._helpers import _getstate_slots, _setstate_slots UPPERBOUND = 4294967296 @@ -26,6 +27,10 @@ UPPERBOUND = 4294967296 class Timestamp(object): """MongoDB internal timestamps used in the opLog. """ + __slots__ = ("__time", "__inc") + + __getstate__ = _getstate_slots + __setstate__ = _setstate_slots _type_marker = 17 diff --git a/doc/changelog.rst b/doc/changelog.rst index b50563bc4..d570d8377 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -143,6 +143,11 @@ Breaking Changes in 4.0 opposed to the previous syntax which was simply ``if collection:`` or ``if database:``. You must now explicitly compare with None. +- Classes :class:`~bson.int64.Int64`, :class:`~bson.min_key.MinKey`, + :class:`~bson.max_key.MaxKey`, :class:`~bson.timestamp.Timestamp`, + :class:`~bson.regex.Regex`, and :class:`~bson.dbref.DBRef` all implement + ``__slots__`` now. This means that their attributes are fixed, and new + attributes cannot be added to them at runtime. - Empty projections (eg {} or []) for :meth:`~pymongo.collection.Collection.find`, and :meth:`~pymongo.collection.Collection.find_one` diff --git a/doc/migrate-to-pymongo4.rst b/doc/migrate-to-pymongo4.rst index 0ac6d393d..0630f5816 100644 --- a/doc/migrate-to-pymongo4.rst +++ b/doc/migrate-to-pymongo4.rst @@ -840,3 +840,12 @@ The default uuid_representation for :class:`~bson.codec_options.CodecOptions`, :data:`bson.binary.UuidRepresentation.UNSPECIFIED`. Attempting to encode a :class:`uuid.UUID` instance to BSON or JSON now produces an error by default. See :ref:`handling-uuid-data-example` for details. + +Additional BSON classes implement ``__slots__`` +............................................... + +:class:`~bson.int64.Int64`, :class:`~bson.min_key.MinKey`, +:class:`~bson.max_key.MaxKey`, :class:`~bson.timestamp.Timestamp`, +:class:`~bson.regex.Regex`, and :class:`~bson.dbref.DBRef` now implement +``__slots__`` to reduce memory usage. This means that their attributes are fixed, and new +attributes cannot be added to the object at runtime. \ No newline at end of file diff --git a/test/test_bson.py b/test/test_bson.py index 5c0f163bb..b91bc7f5f 100644 --- a/test/test_bson.py +++ b/test/test_bson.py @@ -25,6 +25,7 @@ import re import sys import tempfile import uuid +import pickle from collections import abc, OrderedDict from io import BytesIO @@ -1053,6 +1054,73 @@ class TestCodecOptions(unittest.TestCase): self.assertRaises(InvalidBSON, decode, invalid_both, CodecOptions( unicode_decode_error_handler="junk")) + def round_trip_pickle(self, obj, pickled_with_older): + pickled_with_older_obj = pickle.loads(pickled_with_older) + for protocol in range(pickle.HIGHEST_PROTOCOL + 1): + pkl = pickle.dumps(obj, protocol=protocol) + obj2 = pickle.loads(pkl) + self.assertEqual(obj, obj2) + self.assertEqual(pickled_with_older_obj, obj2) + + def test_regex_pickling(self): + reg = Regex(".?") + pickled_with_3 = (b'\x80\x04\x959\x00\x00\x00\x00\x00\x00\x00\x8c\n' + b'bson.regex\x94\x8c\x05Regex\x94\x93\x94)\x81\x94}' + b'\x94(\x8c\x07pattern\x94\x8c\x02.?\x94\x8c\x05flag' + b's\x94K\x00ub.') + self.round_trip_pickle(reg, pickled_with_3) + + def test_timestamp_pickling(self): + ts = Timestamp(0, 1) + pickled_with_3 = (b'\x80\x04\x95Q\x00\x00\x00\x00\x00\x00\x00\x8c' + b'\x0ebson.timestamp\x94\x8c\tTimestamp\x94\x93\x94)' + b'\x81\x94}\x94(' + b'\x8c\x10_Timestamp__time\x94K\x00\x8c' + b'\x0f_Timestamp__inc\x94K\x01ub.') + self.round_trip_pickle(ts, pickled_with_3) + + def test_dbref_pickling(self): + dbr = DBRef("foo", 5) + pickled_with_3 = (b'\x80\x04\x95q\x00\x00\x00\x00\x00\x00\x00\x8c\n' + b'bson.dbref\x94\x8c\x05DBRef\x94\x93\x94)\x81\x94}' + b'\x94(\x8c\x12_DBRef__collection\x94\x8c\x03foo\x94' + b'\x8c\n_DBRef__id\x94K\x05\x8c\x10_DBRef__database' + b'\x94N\x8c\x0e_DBRef__kwargs\x94}\x94ub.') + self.round_trip_pickle(dbr, pickled_with_3) + + dbr = DBRef("foo", 5, database='db', kwargs1=None) + pickled_with_3 = (b'\x80\x04\x95\x81\x00\x00\x00\x00\x00\x00\x00\x8c' + b'\nbson.dbref\x94\x8c\x05DBRef\x94\x93\x94)\x81\x94}' + b'\x94(\x8c\x12_DBRef__collection\x94\x8c\x03foo\x94' + b'\x8c\n_DBRef__id\x94K\x05\x8c\x10_DBRef__database' + b'\x94\x8c\x02db\x94\x8c\x0e_DBRef__kwargs\x94}\x94' + b'\x8c\x07kwargs1\x94Nsub.') + + self.round_trip_pickle(dbr, pickled_with_3) + + def test_minkey_pickling(self): + mink = MinKey() + pickled_with_3 = (b'\x80\x04\x95\x1e\x00\x00\x00\x00\x00\x00\x00\x8c' + b'\x0cbson.min_key\x94\x8c\x06MinKey\x94\x93\x94)' + b'\x81\x94.') + + self.round_trip_pickle(mink, pickled_with_3) + + def test_maxkey_pickling(self): + maxk = MaxKey() + pickled_with_3 = (b'\x80\x04\x95\x1e\x00\x00\x00\x00\x00\x00\x00\x8c' + b'\x0cbson.max_key\x94\x8c\x06MaxKey\x94\x93\x94)' + b'\x81\x94.') + + self.round_trip_pickle(maxk, pickled_with_3) + + def test_int64_pickling(self): + i64 = Int64(9) + pickled_with_3 = (b'\x80\x04\x95\x1e\x00\x00\x00\x00\x00\x00\x00\x8c\n' + b'bson.int64\x94\x8c\x05Int64\x94\x93\x94K\t\x85\x94' + b'\x81\x94.') + self.round_trip_pickle(i64, pickled_with_3) + if __name__ == "__main__": unittest.main()