PYTHON-5737 - BSON encoding/decoding performance improvements (#2715)
This commit is contained in:
parent
84814b2a72
commit
469a32a9dd
@ -356,7 +356,8 @@ static PyObject* datetime_ms_from_millis(PyObject* self, long long millis){
|
||||
if (!(ll_millis = PyLong_FromLongLong(millis))){
|
||||
return NULL;
|
||||
}
|
||||
dt = PyObject_CallFunctionObjArgs(state->DatetimeMS, ll_millis, NULL);
|
||||
PyObject* args[1] = {ll_millis};
|
||||
dt = PyObject_Vectorcall(state->DatetimeMS, args, 1, NULL);
|
||||
Py_DECREF(ll_millis);
|
||||
return dt;
|
||||
}
|
||||
@ -401,7 +402,9 @@ static PyObject* decode_datetime(PyObject* self, long long millis, const codec_o
|
||||
int64_t min_millis_offset = 0;
|
||||
int64_t max_millis_offset = 0;
|
||||
if (options->tz_aware && options->tzinfo && options->tzinfo != Py_None) {
|
||||
PyObject* utcoffset = PyObject_CallMethodObjArgs(options->tzinfo, state->_utcoffset_str, state->min_datetime, NULL);
|
||||
PyObject* utcoffset_args[2] = {options->tzinfo, state->min_datetime};
|
||||
PyObject* utcoffset = PyObject_VectorcallMethod(
|
||||
state->_utcoffset_str, utcoffset_args, 2, NULL);
|
||||
if (utcoffset == NULL) {
|
||||
return 0;
|
||||
}
|
||||
@ -420,7 +423,9 @@ static PyObject* decode_datetime(PyObject* self, long long millis, const codec_o
|
||||
(PyDateTime_DELTA_GET_MICROSECONDS(utcoffset) / 1000);
|
||||
}
|
||||
Py_DECREF(utcoffset);
|
||||
utcoffset = PyObject_CallMethodObjArgs(options->tzinfo, state->_utcoffset_str, state->max_datetime, NULL);
|
||||
utcoffset_args[1] = state->max_datetime;
|
||||
utcoffset = PyObject_VectorcallMethod(
|
||||
state->_utcoffset_str, utcoffset_args, 2, NULL);
|
||||
if (utcoffset == NULL) {
|
||||
return 0;
|
||||
}
|
||||
@ -481,7 +486,9 @@ static PyObject* decode_datetime(PyObject* self, long long millis, const codec_o
|
||||
|
||||
/* convert to local time */
|
||||
if (options->tzinfo != Py_None) {
|
||||
PyObject* temp = PyObject_CallMethodObjArgs(value, state->_astimezone_str, options->tzinfo, NULL);
|
||||
PyObject* astimezone_args[2] = {value, options->tzinfo};
|
||||
PyObject* temp = PyObject_VectorcallMethod(
|
||||
state->_astimezone_str, astimezone_args, 2, NULL);
|
||||
Py_DECREF(value);
|
||||
value = temp;
|
||||
}
|
||||
@ -688,7 +695,8 @@ static int _load_python_objects(PyObject* module) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
compiled = PyObject_CallFunction(re_compile, "O", empty_string);
|
||||
PyObject* compile_args[1] = {empty_string};
|
||||
compiled = PyObject_Vectorcall(re_compile, compile_args, 1, NULL);
|
||||
Py_DECREF(re_compile);
|
||||
if (compiled == NULL) {
|
||||
state->REType = NULL;
|
||||
@ -711,13 +719,19 @@ static long _type_marker(PyObject* object, PyObject* _type_marker_str) {
|
||||
PyObject* type_marker = NULL;
|
||||
long type = 0;
|
||||
|
||||
if (PyObject_HasAttr(object, _type_marker_str)) {
|
||||
type_marker = PyObject_GetAttr(object, _type_marker_str);
|
||||
if (type_marker == NULL) {
|
||||
#if PY_VERSION_HEX >= 0x030D0000
|
||||
// 3.13
|
||||
if (PyObject_GetOptionalAttr(object, _type_marker_str, &type_marker) == -1) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
# else
|
||||
if (PyObject_HasAttr(object, _type_marker_str)) {
|
||||
type_marker = PyObject_GetAttr(object, _type_marker_str);
|
||||
if (type_marker == NULL) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
/*
|
||||
* Python objects with broken __getattr__ implementations could return
|
||||
* arbitrary types for a call to PyObject_GetAttrString. For example
|
||||
@ -814,6 +828,7 @@ int convert_codec_options(PyObject* self, PyObject* options_obj, codec_options_t
|
||||
}
|
||||
|
||||
options->is_raw_bson = (101 == type_marker);
|
||||
options->is_dict_class = (options->document_class == (PyObject*)&PyDict_Type);
|
||||
options->options_obj = options_obj;
|
||||
|
||||
Py_INCREF(options->options_obj);
|
||||
@ -1013,10 +1028,20 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer,
|
||||
}
|
||||
/*
|
||||
* Use _type_marker attribute instead of PyObject_IsInstance for better perf.
|
||||
*
|
||||
* Skip _type_marker lookup for common built-in types
|
||||
* that we know don't have a _type_marker attribute. This avoids the overhead
|
||||
* of PyObject_HasAttr/PyObject_GetAttr calls for the most common cases.
|
||||
*/
|
||||
type = _type_marker(value, state->_type_marker_str);
|
||||
if (type < 0) {
|
||||
return 0;
|
||||
if (PyUnicode_CheckExact(value) || PyLong_CheckExact(value) || PyFloat_CheckExact(value) ||
|
||||
PyBool_Check(value) || PyDict_CheckExact(value) || PyList_CheckExact(value) ||
|
||||
PyTuple_CheckExact(value) || PyBytes_CheckExact(value) || value == Py_None) {
|
||||
type = 0;
|
||||
} else {
|
||||
type = _type_marker(value, state->_type_marker_str);
|
||||
if (type < 0) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
switch (type) {
|
||||
@ -1227,7 +1252,9 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer,
|
||||
case 100:
|
||||
{
|
||||
/* DBRef */
|
||||
PyObject* as_doc = PyObject_CallMethodObjArgs(value, state->_as_doc_str, NULL);
|
||||
PyObject* as_doc_args[1] = {value};
|
||||
PyObject* as_doc = PyObject_VectorcallMethod(
|
||||
state->_as_doc_str, as_doc_args, 1, NULL);
|
||||
if (!as_doc) {
|
||||
return 0;
|
||||
}
|
||||
@ -1383,7 +1410,9 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer,
|
||||
return write_unicode(buffer, value);
|
||||
} else if (PyDateTime_Check(value)) {
|
||||
long long millis;
|
||||
PyObject* utcoffset = PyObject_CallMethodObjArgs(value, state->_utcoffset_str , NULL);
|
||||
PyObject* utcoffset_args[1] = {value};
|
||||
PyObject* utcoffset = PyObject_VectorcallMethod(
|
||||
state->_utcoffset_str, utcoffset_args, 1, NULL);
|
||||
if (utcoffset == NULL)
|
||||
return 0;
|
||||
if (utcoffset != Py_None) {
|
||||
@ -1422,7 +1451,9 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer,
|
||||
if (!(uuid_rep_obj = PyLong_FromLong(options->uuid_rep))) {
|
||||
return 0;
|
||||
}
|
||||
binary_value = PyObject_CallMethodObjArgs(state->Binary, state->_from_uuid_str, value, uuid_rep_obj, NULL);
|
||||
PyObject* from_uuid_args[3] = {state->Binary, value, uuid_rep_obj};
|
||||
binary_value = PyObject_VectorcallMethod(
|
||||
state->_from_uuid_str, from_uuid_args, 3, NULL);
|
||||
Py_DECREF(uuid_rep_obj);
|
||||
|
||||
if (binary_value == NULL) {
|
||||
@ -1452,7 +1483,8 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer,
|
||||
if (converter != NULL) {
|
||||
/* Transform types that have a registered converter.
|
||||
* A new reference is created upon transformation. */
|
||||
new_value = PyObject_CallFunctionObjArgs(converter, value, NULL);
|
||||
PyObject* converter_args[1] = {value};
|
||||
new_value = PyObject_Vectorcall(converter, converter_args, 1, NULL);
|
||||
if (new_value == NULL) {
|
||||
return 0;
|
||||
}
|
||||
@ -1466,8 +1498,9 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer,
|
||||
/* Try the fallback encoder if one is provided and we have not already
|
||||
* attempted to use the fallback encoder. */
|
||||
if (!in_fallback_call && options->type_registry.has_fallback_encoder) {
|
||||
new_value = PyObject_CallFunctionObjArgs(
|
||||
options->type_registry.fallback_encoder, value, NULL);
|
||||
PyObject* fallback_args[1] = {value};
|
||||
new_value = PyObject_Vectorcall(
|
||||
options->type_registry.fallback_encoder, fallback_args, 1, NULL);
|
||||
if (new_value == NULL) {
|
||||
// propagate any exception raised by the callback
|
||||
return 0;
|
||||
@ -1668,7 +1701,8 @@ void handle_invalid_doc_error(PyObject* dict) {
|
||||
goto cleanup;
|
||||
}
|
||||
// Add doc to the error instance as a property.
|
||||
new_evalue = PyObject_CallFunctionObjArgs(InvalidDocument, new_msg, dict, NULL);
|
||||
PyObject* exc_args[2] = {new_msg, dict};
|
||||
new_evalue = PyObject_Vectorcall(InvalidDocument, exc_args, 2, NULL);
|
||||
Py_DECREF(evalue);
|
||||
Py_DECREF(etype);
|
||||
etype = InvalidDocument;
|
||||
@ -1944,7 +1978,8 @@ static PyObject *_dbref_hook(PyObject* self, PyObject* value) {
|
||||
PyMapping_DelItem(value, state->_dollar_db_str);
|
||||
}
|
||||
|
||||
ret = PyObject_CallFunctionObjArgs(state->DBRef, ref, id, database, value, NULL);
|
||||
PyObject* dbref_args[4] = {ref, id, database, value};
|
||||
ret = PyObject_Vectorcall(state->DBRef, dbref_args, 4, NULL);
|
||||
Py_DECREF(value);
|
||||
} else {
|
||||
ret = value;
|
||||
@ -2160,7 +2195,13 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer,
|
||||
goto uuiderror;
|
||||
}
|
||||
|
||||
binary_value = PyObject_CallFunction(state->Binary, "(Oi)", data, subtype);
|
||||
PyObject* subtype_obj = PyLong_FromLong(subtype);
|
||||
if (!subtype_obj) {
|
||||
goto uuiderror;
|
||||
}
|
||||
PyObject* binary_args[2] = {data, subtype_obj};
|
||||
binary_value = PyObject_Vectorcall(state->Binary, binary_args, 2, NULL);
|
||||
Py_DECREF(subtype_obj);
|
||||
if (binary_value == NULL) {
|
||||
goto uuiderror;
|
||||
}
|
||||
@ -2175,7 +2216,9 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer,
|
||||
if (!uuid_rep_obj) {
|
||||
goto uuiderror;
|
||||
}
|
||||
value = PyObject_CallMethodObjArgs(binary_value, state->_as_uuid_str, uuid_rep_obj, NULL);
|
||||
PyObject* as_uuid_args[2] = {binary_value, uuid_rep_obj};
|
||||
value = PyObject_VectorcallMethod(
|
||||
state->_as_uuid_str, as_uuid_args, 2, NULL);
|
||||
Py_DECREF(uuid_rep_obj);
|
||||
}
|
||||
|
||||
@ -2194,7 +2237,8 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer,
|
||||
Py_DECREF(data);
|
||||
goto invalid;
|
||||
}
|
||||
value = PyObject_CallFunctionObjArgs(state->Binary, data, st, NULL);
|
||||
PyObject* binary_args[2] = {data, st};
|
||||
value = PyObject_Vectorcall(state->Binary, binary_args, 2, NULL);
|
||||
Py_DECREF(st);
|
||||
Py_DECREF(data);
|
||||
if (!value) {
|
||||
@ -2215,7 +2259,13 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer,
|
||||
if (max < 12) {
|
||||
goto invalid;
|
||||
}
|
||||
value = PyObject_CallFunction(state->ObjectId, "y#", buffer + *position, (Py_ssize_t)12);
|
||||
PyObject* oid_bytes = PyBytes_FromStringAndSize(buffer + *position, 12);
|
||||
if (!oid_bytes) {
|
||||
goto invalid;
|
||||
}
|
||||
PyObject* oid_args[1] = {oid_bytes};
|
||||
value = PyObject_Vectorcall(state->ObjectId, oid_args, 1, NULL);
|
||||
Py_DECREF(oid_bytes);
|
||||
*position += 12;
|
||||
break;
|
||||
}
|
||||
@ -2294,7 +2344,14 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer,
|
||||
}
|
||||
*position += (unsigned)flags_length + 1;
|
||||
|
||||
value = PyObject_CallFunction(state->Regex, "Oi", pattern, flags);
|
||||
PyObject* flags_obj = PyLong_FromLong(flags);
|
||||
if (!flags_obj) {
|
||||
Py_DECREF(pattern);
|
||||
goto invalid;
|
||||
}
|
||||
PyObject* regex_args[2] = {pattern, flags_obj};
|
||||
value = PyObject_Vectorcall(state->Regex, regex_args, 2, NULL);
|
||||
Py_DECREF(flags_obj);
|
||||
Py_DECREF(pattern);
|
||||
break;
|
||||
}
|
||||
@ -2327,13 +2384,21 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer,
|
||||
}
|
||||
*position += coll_length;
|
||||
|
||||
id = PyObject_CallFunction(state->ObjectId, "y#", buffer + *position, (Py_ssize_t)12);
|
||||
PyObject* oid_bytes = PyBytes_FromStringAndSize(buffer + *position, 12);
|
||||
if (!oid_bytes) {
|
||||
Py_DECREF(collection);
|
||||
goto invalid;
|
||||
}
|
||||
PyObject* oid_args[1] = {oid_bytes};
|
||||
id = PyObject_Vectorcall(state->ObjectId, oid_args, 1, NULL);
|
||||
Py_DECREF(oid_bytes);
|
||||
if (!id) {
|
||||
Py_DECREF(collection);
|
||||
goto invalid;
|
||||
}
|
||||
*position += 12;
|
||||
value = PyObject_CallFunctionObjArgs(state->DBRef, collection, id, NULL);
|
||||
PyObject* dbref_args[2] = {collection, id};
|
||||
value = PyObject_Vectorcall(state->DBRef, dbref_args, 2, NULL);
|
||||
Py_DECREF(collection);
|
||||
Py_DECREF(id);
|
||||
break;
|
||||
@ -2363,7 +2428,8 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer,
|
||||
goto invalid;
|
||||
}
|
||||
*position += value_length;
|
||||
value = PyObject_CallFunctionObjArgs(state->Code, code, NULL, NULL);
|
||||
PyObject* code_args[1] = {code};
|
||||
value = PyObject_Vectorcall(state->Code, code_args, 1, NULL);
|
||||
Py_DECREF(code);
|
||||
break;
|
||||
}
|
||||
@ -2429,7 +2495,8 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer,
|
||||
}
|
||||
*position += scope_size;
|
||||
|
||||
value = PyObject_CallFunctionObjArgs(state->Code, code, scope, NULL);
|
||||
PyObject* code_scope_args[2] = {code, scope};
|
||||
value = PyObject_Vectorcall(state->Code, code_scope_args, 2, NULL);
|
||||
Py_DECREF(code);
|
||||
Py_DECREF(scope);
|
||||
break;
|
||||
@ -2459,7 +2526,19 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer,
|
||||
memcpy(&time, buffer + *position + 4, 4);
|
||||
inc = BSON_UINT32_FROM_LE(inc);
|
||||
time = BSON_UINT32_FROM_LE(time);
|
||||
value = PyObject_CallFunction(state->Timestamp, "II", time, inc);
|
||||
PyObject* time_obj = PyLong_FromUnsignedLong(time);
|
||||
if (!time_obj) {
|
||||
goto invalid;
|
||||
}
|
||||
PyObject* inc_obj = PyLong_FromUnsignedLong(inc);
|
||||
if (!inc_obj) {
|
||||
Py_DECREF(time_obj);
|
||||
goto invalid;
|
||||
}
|
||||
PyObject* ts_args[2] = {time_obj, inc_obj};
|
||||
value = PyObject_Vectorcall(state->Timestamp, ts_args, 2, NULL);
|
||||
Py_DECREF(time_obj);
|
||||
Py_DECREF(inc_obj);
|
||||
*position += 8;
|
||||
break;
|
||||
}
|
||||
@ -2471,7 +2550,13 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer,
|
||||
}
|
||||
memcpy(&ll, buffer + *position, 8);
|
||||
ll = (int64_t)BSON_UINT64_FROM_LE(ll);
|
||||
value = PyObject_CallFunction(state->BSONInt64, "L", ll);
|
||||
PyObject* ll_obj = PyLong_FromLongLong(ll);
|
||||
if (!ll_obj) {
|
||||
goto invalid;
|
||||
}
|
||||
PyObject* int64_args[1] = {ll_obj};
|
||||
value = PyObject_Vectorcall(state->BSONInt64, int64_args, 1, NULL);
|
||||
Py_DECREF(ll_obj);
|
||||
*position += 8;
|
||||
break;
|
||||
}
|
||||
@ -2484,19 +2569,21 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer,
|
||||
if (!_bytes_obj) {
|
||||
goto invalid;
|
||||
}
|
||||
value = PyObject_CallMethodObjArgs(state->Decimal128, state->_from_bid_str, _bytes_obj, NULL);
|
||||
PyObject* dec128_args[2] = {state->Decimal128, _bytes_obj};
|
||||
value = PyObject_VectorcallMethod(
|
||||
state->_from_bid_str, dec128_args, 2, NULL);
|
||||
Py_DECREF(_bytes_obj);
|
||||
*position += 16;
|
||||
break;
|
||||
}
|
||||
case 255:
|
||||
{
|
||||
value = PyObject_CallFunctionObjArgs(state->MinKey, NULL);
|
||||
value = PyObject_Vectorcall(state->MinKey, NULL, 0, NULL);
|
||||
break;
|
||||
}
|
||||
case 127:
|
||||
{
|
||||
value = PyObject_CallFunctionObjArgs(state->MaxKey, NULL);
|
||||
value = PyObject_Vectorcall(state->MaxKey, NULL, 0, NULL);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@ -2548,7 +2635,8 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer,
|
||||
}
|
||||
converter = PyDict_GetItem(options->type_registry.decoder_map, value_type);
|
||||
if (converter != NULL) {
|
||||
PyObject* new_value = PyObject_CallFunctionObjArgs(converter, value, NULL);
|
||||
PyObject* converter_args[1] = {value};
|
||||
PyObject* new_value = PyObject_Vectorcall(converter, converter_args, 1, NULL);
|
||||
Py_DECREF(value_type);
|
||||
Py_DECREF(value);
|
||||
return new_value;
|
||||
@ -2716,11 +2804,20 @@ static PyObject* _elements_to_dict(PyObject* self, const char* string,
|
||||
unsigned max,
|
||||
const codec_options_t* options) {
|
||||
unsigned position = 0;
|
||||
PyObject* dict = PyObject_CallObject(options->document_class, NULL);
|
||||
PyObject* dict;
|
||||
int raw_array = 0;
|
||||
|
||||
/* Use PyDict_New() directly when document_class is dict.
|
||||
* This avoids the overhead of PyObject_CallObject() for the common case. */
|
||||
if (options->is_dict_class) {
|
||||
dict = PyDict_New();
|
||||
} else {
|
||||
dict = PyObject_CallObject(options->document_class, NULL);
|
||||
}
|
||||
if (!dict) {
|
||||
return NULL;
|
||||
}
|
||||
int raw_array = 0;
|
||||
|
||||
while (position < max) {
|
||||
PyObject* name = NULL;
|
||||
PyObject* value = NULL;
|
||||
@ -2735,7 +2832,24 @@ static PyObject* _elements_to_dict(PyObject* self, const char* string,
|
||||
position = (unsigned)new_position;
|
||||
}
|
||||
|
||||
PyObject_SetItem(dict, name, value);
|
||||
/* Use PyDict_SetItem() when document_class is dict.
|
||||
* PyDict_SetItem() is faster than PyObject_SetItem() because it
|
||||
* avoids method lookup overhead. */
|
||||
if (options->is_dict_class) {
|
||||
if (PyDict_SetItem(dict, name, value) < 0) {
|
||||
Py_DECREF(name);
|
||||
Py_DECREF(value);
|
||||
Py_DECREF(dict);
|
||||
return NULL;
|
||||
}
|
||||
} else {
|
||||
if (PyObject_SetItem(dict, name, value) < 0) {
|
||||
Py_DECREF(name);
|
||||
Py_DECREF(value);
|
||||
Py_DECREF(dict);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
Py_DECREF(name);
|
||||
Py_DECREF(value);
|
||||
}
|
||||
@ -2747,9 +2861,14 @@ static PyObject* elements_to_dict(PyObject* self, const char* string,
|
||||
const codec_options_t* options) {
|
||||
PyObject* result;
|
||||
if (options->is_raw_bson) {
|
||||
return PyObject_CallFunction(
|
||||
options->document_class, "y#O",
|
||||
string, max, options->options_obj);
|
||||
PyObject* bson_bytes = PyBytes_FromStringAndSize(string, max);
|
||||
if (!bson_bytes) {
|
||||
return NULL;
|
||||
}
|
||||
PyObject* raw_args[2] = {bson_bytes, options->options_obj};
|
||||
result = PyObject_Vectorcall(options->document_class, raw_args, 2, NULL);
|
||||
Py_DECREF(bson_bytes);
|
||||
return result;
|
||||
}
|
||||
if (Py_EnterRecursiveCall(" while decoding a BSON document"))
|
||||
return NULL;
|
||||
|
||||
@ -72,6 +72,7 @@ typedef struct codec_options_t {
|
||||
unsigned char datetime_conversion;
|
||||
PyObject* options_obj;
|
||||
unsigned char is_raw_bson;
|
||||
unsigned char is_dict_class;
|
||||
} codec_options_t;
|
||||
|
||||
/* C API functions */
|
||||
|
||||
Loading…
Reference in New Issue
Block a user