PYTHON-1374 Optimize json_util performance (#1460)
This commit is contained in:
parent
7adda818a5
commit
f67e9ae207
@ -137,7 +137,7 @@ from bson.max_key import MaxKey
|
||||
from bson.min_key import MinKey
|
||||
from bson.objectid import ObjectId
|
||||
from bson.regex import Regex
|
||||
from bson.son import RE_TYPE, SON
|
||||
from bson.son import RE_TYPE
|
||||
from bson.timestamp import Timestamp
|
||||
from bson.tz_util import utc
|
||||
|
||||
@ -505,7 +505,7 @@ def _json_convert(obj: Any, json_options: JSONOptions = DEFAULT_JSON_OPTIONS) ->
|
||||
converted into json.
|
||||
"""
|
||||
if hasattr(obj, "items"):
|
||||
return SON(((k, _json_convert(v, json_options)) for k, v in obj.items()))
|
||||
return {k: _json_convert(v, json_options) for k, v in obj.items()}
|
||||
elif hasattr(obj, "__iter__") and not isinstance(obj, (str, bytes)):
|
||||
return [_json_convert(v, json_options) for v in obj]
|
||||
try:
|
||||
@ -826,20 +826,83 @@ def _parse_canonical_maxkey(doc: Any) -> MaxKey:
|
||||
|
||||
def _encode_binary(data: bytes, subtype: int, json_options: JSONOptions) -> Any:
|
||||
if json_options.json_mode == JSONMode.LEGACY:
|
||||
return SON([("$binary", base64.b64encode(data).decode()), ("$type", "%02x" % subtype)])
|
||||
return {
|
||||
"$binary": SON([("base64", base64.b64encode(data).decode()), ("subType", "%02x" % subtype)])
|
||||
}
|
||||
return {"$binary": base64.b64encode(data).decode(), "$type": "%02x" % subtype}
|
||||
return {"$binary": {"base64": base64.b64encode(data).decode(), "subType": "%02x" % subtype}}
|
||||
|
||||
|
||||
def _encode_datetimems(obj: Any, json_options: JSONOptions) -> dict:
|
||||
if (
|
||||
json_options.datetime_representation == DatetimeRepresentation.ISO8601
|
||||
and 0 <= int(obj) <= _max_datetime_ms()
|
||||
):
|
||||
return default(obj.as_datetime(), json_options)
|
||||
elif json_options.datetime_representation == DatetimeRepresentation.LEGACY:
|
||||
return {"$date": str(int(obj))}
|
||||
return {"$date": {"$numberLong": str(int(obj))}}
|
||||
|
||||
|
||||
def _encode_code(obj: Code, json_options: JSONOptions) -> dict:
|
||||
if obj.scope is None:
|
||||
return {"$code": str(obj)}
|
||||
else:
|
||||
return {"$code": str(obj), "$scope": _json_convert(obj.scope, json_options)}
|
||||
|
||||
|
||||
def _encode_int64(obj: Int64, json_options: JSONOptions) -> Any:
|
||||
if json_options.strict_number_long:
|
||||
return {"$numberLong": str(obj)}
|
||||
else:
|
||||
return obj
|
||||
|
||||
|
||||
# Encoders for BSON types
|
||||
_encoders = {
|
||||
5: lambda obj, json_options: _encode_binary(obj, obj.subtype, json_options), # Binary
|
||||
7: lambda obj, json_options: {"$oid": str(obj)}, # noqa: ARG005 ObjectId
|
||||
9: _encode_datetimems, # DatetimeMS
|
||||
13: _encode_code, # Code
|
||||
17: lambda obj, json_options: {"$timestamp": {"t": obj.time, "i": obj.inc}}, # noqa: ARG005 Timestamp
|
||||
18: _encode_int64, # Int64
|
||||
19: lambda obj, json_options: {"$numberDecimal": str(obj)}, # noqa: ARG005 Decimal128
|
||||
100: lambda obj, json_options: _json_convert(obj.as_doc(), json_options=json_options), # DBRef
|
||||
127: lambda obj, json_options: {"$maxKey": 1}, # noqa: ARG005 MaxKey
|
||||
255: lambda obj, json_options: {"$minKey": 1}, # noqa: ARG005 MinKey
|
||||
}
|
||||
|
||||
|
||||
def default(obj: Any, json_options: JSONOptions = DEFAULT_JSON_OPTIONS) -> Any:
|
||||
# We preserve key order when rendering SON, DBRef, etc. as JSON by
|
||||
# returning a SON for those types instead of a dict.
|
||||
if isinstance(obj, ObjectId):
|
||||
return {"$oid": str(obj)}
|
||||
if isinstance(obj, DBRef):
|
||||
return _json_convert(obj.as_doc(), json_options=json_options)
|
||||
if isinstance(obj, datetime.datetime):
|
||||
if isinstance(obj, bool):
|
||||
return obj
|
||||
elif isinstance(obj, (RE_TYPE, Regex)):
|
||||
flags = ""
|
||||
if obj.flags & re.IGNORECASE:
|
||||
flags += "i"
|
||||
if obj.flags & re.LOCALE:
|
||||
flags += "l"
|
||||
if obj.flags & re.MULTILINE:
|
||||
flags += "m"
|
||||
if obj.flags & re.DOTALL:
|
||||
flags += "s"
|
||||
if obj.flags & re.UNICODE:
|
||||
flags += "u"
|
||||
if obj.flags & re.VERBOSE:
|
||||
flags += "x"
|
||||
if isinstance(obj.pattern, str):
|
||||
pattern = obj.pattern
|
||||
else:
|
||||
pattern = obj.pattern.decode("utf-8")
|
||||
if json_options.json_mode == JSONMode.LEGACY:
|
||||
return {"$regex": pattern, "$options": flags}
|
||||
return {"$regularExpression": {"pattern": pattern, "options": flags}}
|
||||
elif hasattr(obj, "_type_marker"):
|
||||
type_marker = obj._type_marker
|
||||
try:
|
||||
return _encoders[type_marker](obj, json_options) # type: ignore[no-untyped-call]
|
||||
except KeyError:
|
||||
raise TypeError("%r is not JSON serializable" % obj) from None
|
||||
elif isinstance(obj, datetime.datetime):
|
||||
if json_options.datetime_representation == DatetimeRepresentation.ISO8601:
|
||||
if not obj.tzinfo:
|
||||
obj = obj.replace(tzinfo=utc)
|
||||
@ -860,67 +923,19 @@ def default(obj: Any, json_options: JSONOptions = DEFAULT_JSON_OPTIONS) -> Any:
|
||||
if json_options.datetime_representation == DatetimeRepresentation.LEGACY:
|
||||
return {"$date": millis}
|
||||
return {"$date": {"$numberLong": str(millis)}}
|
||||
if isinstance(obj, DatetimeMS):
|
||||
if (
|
||||
json_options.datetime_representation == DatetimeRepresentation.ISO8601
|
||||
and 0 <= int(obj) <= _max_datetime_ms()
|
||||
):
|
||||
return default(obj.as_datetime(), json_options)
|
||||
elif json_options.datetime_representation == DatetimeRepresentation.LEGACY:
|
||||
return {"$date": str(int(obj))}
|
||||
return {"$date": {"$numberLong": str(int(obj))}}
|
||||
if json_options.strict_number_long and isinstance(obj, Int64):
|
||||
return {"$numberLong": str(obj)}
|
||||
if isinstance(obj, (RE_TYPE, Regex)):
|
||||
flags = ""
|
||||
if obj.flags & re.IGNORECASE:
|
||||
flags += "i"
|
||||
if obj.flags & re.LOCALE:
|
||||
flags += "l"
|
||||
if obj.flags & re.MULTILINE:
|
||||
flags += "m"
|
||||
if obj.flags & re.DOTALL:
|
||||
flags += "s"
|
||||
if obj.flags & re.UNICODE:
|
||||
flags += "u"
|
||||
if obj.flags & re.VERBOSE:
|
||||
flags += "x"
|
||||
if isinstance(obj.pattern, str):
|
||||
pattern = obj.pattern
|
||||
else:
|
||||
pattern = obj.pattern.decode("utf-8")
|
||||
if json_options.json_mode == JSONMode.LEGACY:
|
||||
return SON([("$regex", pattern), ("$options", flags)])
|
||||
return {"$regularExpression": SON([("pattern", pattern), ("options", flags)])}
|
||||
if isinstance(obj, MinKey):
|
||||
return {"$minKey": 1}
|
||||
if isinstance(obj, MaxKey):
|
||||
return {"$maxKey": 1}
|
||||
if isinstance(obj, Timestamp):
|
||||
return {"$timestamp": SON([("t", obj.time), ("i", obj.inc)])}
|
||||
if isinstance(obj, Code):
|
||||
if obj.scope is None:
|
||||
return {"$code": str(obj)}
|
||||
return SON([("$code", str(obj)), ("$scope", _json_convert(obj.scope, json_options))])
|
||||
if isinstance(obj, Binary):
|
||||
return _encode_binary(obj, obj.subtype, json_options)
|
||||
if isinstance(obj, bytes):
|
||||
elif isinstance(obj, bytes):
|
||||
return _encode_binary(obj, 0, json_options)
|
||||
if isinstance(obj, uuid.UUID):
|
||||
elif isinstance(obj, uuid.UUID):
|
||||
if json_options.strict_uuid:
|
||||
binval = Binary.from_uuid(obj, uuid_representation=json_options.uuid_representation)
|
||||
return _encode_binary(binval, binval.subtype, json_options)
|
||||
else:
|
||||
return {"$uuid": obj.hex}
|
||||
if isinstance(obj, Decimal128):
|
||||
return {"$numberDecimal": str(obj)}
|
||||
if isinstance(obj, bool):
|
||||
return obj
|
||||
if json_options.json_mode == JSONMode.CANONICAL and isinstance(obj, int):
|
||||
elif json_options.json_mode == JSONMode.CANONICAL and isinstance(obj, int):
|
||||
if -(2**31) <= obj < 2**31:
|
||||
return {"$numberInt": str(obj)}
|
||||
return {"$numberLong": str(obj)}
|
||||
if json_options.json_mode != JSONMode.LEGACY and isinstance(obj, float):
|
||||
elif json_options.json_mode != JSONMode.LEGACY and isinstance(obj, float):
|
||||
if math.isnan(obj):
|
||||
return {"$numberDouble": "NaN"}
|
||||
elif math.isinf(obj):
|
||||
|
||||
@ -13,6 +13,7 @@ PyMongo 4.7 brings a number of improvements including:
|
||||
- Fixed a bug where inflating a :class:`~bson.raw_bson.RawBSONDocument` containing a :class:`~bson.code.Code` would cause an error.
|
||||
- Replaced usage of :class:`bson.son.SON` on all internal classes and commands to dict,
|
||||
:attr:`options.pool_options.metadata` is now of type ``dict`` as opposed to :class:`bson.son.SON`.
|
||||
- Significantly improved the performance of encoding BSON documents to JSON.
|
||||
|
||||
Changes in Version 4.6.1
|
||||
------------------------
|
||||
|
||||
Loading…
Reference in New Issue
Block a user