diff --git a/.travis.yml b/.travis.yml index 6e9bd3944..5dd72f6da 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,6 +6,7 @@ python: - 3.5 - 3.6 - 3.7 + - 3.8 - pypy - pypy3.5 diff --git a/bson/_cbsonmodule.c b/bson/_cbsonmodule.c index e82e1bb34..1fbb48cc9 100644 --- a/bson/_cbsonmodule.c +++ b/bson/_cbsonmodule.c @@ -20,6 +20,7 @@ * should be used to speed up BSON encoding and decoding. */ +#define PY_SSIZE_T_CLEAN #include "Python.h" #include "datetime.h" @@ -1818,13 +1819,8 @@ static PyObject* _cbson_dict_to_bson(PyObject* self, PyObject* args) { } /* objectify buffer */ -#if PY_MAJOR_VERSION >= 3 - result = Py_BuildValue("y#", buffer_get_buffer(buffer), - buffer_get_position(buffer)); -#else - result = Py_BuildValue("s#", buffer_get_buffer(buffer), - buffer_get_position(buffer)); -#endif + result = Py_BuildValue(BYTES_FORMAT_STRING, buffer_get_buffer(buffer), + (Py_ssize_t)buffer_get_position(buffer)); destroy_codec_options(&options); buffer_free(buffer); return result; @@ -1896,7 +1892,7 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer, if (options->is_raw_bson) { value = PyObject_CallFunction( options->document_class, BYTES_FORMAT_STRING "O", - buffer + *position, size, options->options_obj); + buffer + *position, (Py_ssize_t)size, options->options_obj); if (!value) { goto invalid; } @@ -2175,11 +2171,8 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer, goto invalid; } if ((objectid_type = _get_object(state->ObjectId, "bson.objectid", "ObjectId"))) { -#if PY_MAJOR_VERSION >= 3 - value = PyObject_CallFunction(objectid_type, "y#", buffer + *position, 12); -#else - value = PyObject_CallFunction(objectid_type, "s#", buffer + *position, 12); -#endif + value = PyObject_CallFunction(objectid_type, BYTES_FORMAT_STRING, + buffer + *position, (Py_ssize_t)12); Py_DECREF(objectid_type); } *position += 12; @@ -2365,11 +2358,8 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer, *position += coll_length; if ((objectid_type = _get_object(state->ObjectId, "bson.objectid", "ObjectId"))) { -#if PY_MAJOR_VERSION >= 3 - id = PyObject_CallFunction(objectid_type, "y#", buffer + *position, 12); -#else - id = PyObject_CallFunction(objectid_type, "s#", buffer + *position, 12); -#endif + id = PyObject_CallFunction(objectid_type, BYTES_FORMAT_STRING, + buffer + *position, (Py_ssize_t)12); Py_DECREF(objectid_type); } if (!id) { @@ -2556,13 +2546,9 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer, "Decimal128"))) { value = PyObject_CallMethod(dec128, "from_bid", -#if PY_MAJOR_VERSION >= 3 - "y#", -#else - "s#", -#endif + BYTES_FORMAT_STRING, buffer + *position, - 16); + (Py_ssize_t)16); Py_DECREF(dec128); } *position += 16; @@ -2939,7 +2925,7 @@ static PyObject* _cbson_bson_to_dict(PyObject* self, PyObject* args) { /* No need to decode fields if using RawBSONDocument */ if (options.is_raw_bson) { result = PyObject_CallFunction( - options.document_class, BYTES_FORMAT_STRING "O", string, size, + options.document_class, BYTES_FORMAT_STRING "O", string, (Py_ssize_t)size, options_obj); } else { @@ -3031,7 +3017,7 @@ static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) { /* No need to decode fields if using RawBSONDocument. */ if (options.is_raw_bson) { dict = PyObject_CallFunction( - options.document_class, BYTES_FORMAT_STRING "O", string, size, + options.document_class, BYTES_FORMAT_STRING "O", string, (Py_ssize_t)size, options_obj); } else { dict = elements_to_dict(self, string + 4, (unsigned)size - 5, &options); @@ -3143,6 +3129,7 @@ init_cbson(void) _cbson_API[_cbson_buffer_write_int64_INDEX] = (void *) buffer_write_int64; _cbson_API[_cbson_buffer_write_int32_at_position_INDEX] = (void *) buffer_write_int32_at_position; + _cbson_API[_cbson_downcast_and_check_INDEX] = (void *) _downcast_and_check; #if PY_VERSION_HEX >= 0x03010000 /* PyCapsule is new in python 3.1 */ diff --git a/bson/_cbsonmodule.h b/bson/_cbsonmodule.h index 237eea037..69590d564 100644 --- a/bson/_cbsonmodule.h +++ b/bson/_cbsonmodule.h @@ -113,8 +113,12 @@ typedef struct codec_options_t { #define _cbson_buffer_write_int32_at_position_RETURN void #define _cbson_buffer_write_int32_at_position_PROTO (buffer_t buffer, int position, int32_t data) +#define _cbson_downcast_and_check_INDEX 10 +#define _cbson_downcast_and_check_RETURN int +#define _cbson_downcast_and_check_PROTO (Py_ssize_t size, uint8_t extra) + /* Total number of C API pointers */ -#define _cbson_API_POINTER_COUNT 10 +#define _cbson_API_POINTER_COUNT 11 #ifdef _CBSON_MODULE /* This section is used when compiling _cbsonmodule */ @@ -139,6 +143,8 @@ static _cbson_buffer_write_int64_RETURN buffer_write_int64 _cbson_buffer_write_i static _cbson_buffer_write_int32_at_position_RETURN buffer_write_int32_at_position _cbson_buffer_write_int32_at_position_PROTO; +static _cbson_downcast_and_check_RETURN _downcast_and_check _cbson_downcast_and_check_PROTO; + #else /* This section is used in modules that use _cbsonmodule's API */ @@ -164,6 +170,8 @@ static void **_cbson_API; #define buffer_write_int32_at_position (*(_cbson_buffer_write_int32_at_position_RETURN (*)_cbson_buffer_write_int32_at_position_PROTO) _cbson_API[_cbson_buffer_write_int32_at_position_INDEX]) +#define _downcast_and_check (*(_cbson_downcast_and_check_RETURN (*)_cbson_downcast_and_check_PROTO) _cbson_API[_cbson_downcast_and_check_INDEX]) + #define _cbson_IMPORT _cbson_API = (void **)PyCapsule_Import("_cbson._C_API", 0) #endif diff --git a/bson/time64.c b/bson/time64.c index d9173e7f7..bad6b51dc 100644 --- a/bson/time64.c +++ b/bson/time64.c @@ -44,6 +44,7 @@ gmtime64_r() is a 64-bit equivalent of gmtime_r(). #endif /* Including Python.h fixes issues with interpreters built with -std=c99. */ +#define PY_SSIZE_T_CLEAN #include "Python.h" #include diff --git a/pymongo/_cmessagemodule.c b/pymongo/_cmessagemodule.c index 2ed23dd2c..b3a82d631 100644 --- a/pymongo/_cmessagemodule.c +++ b/pymongo/_cmessagemodule.c @@ -20,6 +20,7 @@ * should be used to speed up message creation. */ +#define PY_SSIZE_T_CLEAN #include "Python.h" #include "_cbsonmodule.h" @@ -37,12 +38,6 @@ struct module_state { static struct module_state _state; #endif -#if PY_MAJOR_VERSION >= 3 -#define BYTES_FORMAT_STRING "y#" -#else -#define BYTES_FORMAT_STRING "s#" -#endif - #define DOC_TOO_LARGE_FMT "BSON document too large (%d bytes)" \ " - the connected server supports" \ " BSON document sizes up to %ld bytes." @@ -61,10 +56,21 @@ static PyObject* _error(char* name) { return error; } +/* The same as buffer_write_bytes except that it also validates + * "size" will fit in an int. + * Returns 0 on failure */ +static int buffer_write_bytes_ssize_t(buffer_t buffer, const char* data, Py_ssize_t size) { + int downsize = _downcast_and_check(size, 0); + if (size == -1) { + return 0; + } + return buffer_write_bytes(buffer, data, downsize); +} + /* add a lastError message on the end of the buffer. * returns 0 on failure */ static int add_last_error(PyObject* self, buffer_t buffer, - int request_id, char* ns, int nslen, + int request_id, char* ns, Py_ssize_t nslen, codec_options_t* options, PyObject* args) { struct module_state *state = GETSTATE(self); @@ -91,8 +97,7 @@ static int add_last_error(PyObject* self, buffer_t buffer, "\xd4\x07\x00\x00" /* opcode */ "\x00\x00\x00\x00", /* options */ 12) || - !buffer_write_bytes(buffer, - ns, nslen) || /* database */ + !buffer_write_bytes_ssize_t(buffer, ns, nslen) || /* database */ !buffer_write_bytes(buffer, ".$cmd\x00" /* collection name */ "\x00\x00\x00\x00" /* skip */ @@ -142,7 +147,7 @@ static int add_last_error(PyObject* self, buffer_t buffer, } static int init_insert_buffer(buffer_t buffer, int request_id, int options, - const char* coll_name, int coll_name_len, + const char* coll_name, Py_ssize_t coll_name_len, int compress) { int length_location = 0; if (!compress) { @@ -161,9 +166,9 @@ static int init_insert_buffer(buffer_t buffer, int request_id, int options, } } if (!buffer_write_int32(buffer, (int32_t)options) || - !buffer_write_bytes(buffer, - coll_name, - coll_name_len + 1)) { + !buffer_write_bytes_ssize_t(buffer, + coll_name, + coll_name_len + 1)) { return -1; } return length_location; @@ -177,7 +182,7 @@ static PyObject* _cbson_insert_message(PyObject* self, PyObject* args) { /* NOTE just using a random number as the request_id */ int request_id = rand(); char* collection_name = NULL; - int collection_name_length; + Py_ssize_t collection_name_length; PyObject* docs; PyObject* doc; PyObject* iterator; @@ -293,7 +298,7 @@ static PyObject* _cbson_insert_message(PyObject* self, PyObject* args) { /* objectify buffer */ result = Py_BuildValue("i" BYTES_FORMAT_STRING "i", request_id, buffer_get_buffer(buffer), - buffer_get_position(buffer), + (Py_ssize_t)buffer_get_position(buffer), max_size); destroy_codec_options(&options); buffer_free(buffer); @@ -306,7 +311,7 @@ static PyObject* _cbson_update_message(PyObject* self, PyObject* args) { int request_id = rand(); char* collection_name = NULL; - int collection_name_length; + Py_ssize_t collection_name_length; int before, cur_size, max_size = 0; PyObject* doc; PyObject* spec; @@ -360,9 +365,9 @@ static PyObject* _cbson_update_message(PyObject* self, PyObject* args) { "\xd1\x07\x00\x00" "\x00\x00\x00\x00", 12) || - !buffer_write_bytes(buffer, - collection_name, - collection_name_length + 1) || + !buffer_write_bytes_ssize_t(buffer, + collection_name, + collection_name_length + 1) || !buffer_write_int32(buffer, (int32_t)flags)) { destroy_codec_options(&options); buffer_free(buffer); @@ -409,7 +414,7 @@ static PyObject* _cbson_update_message(PyObject* self, PyObject* args) { /* objectify buffer */ result = Py_BuildValue("i" BYTES_FORMAT_STRING "i", request_id, buffer_get_buffer(buffer), - buffer_get_position(buffer), + (Py_ssize_t)buffer_get_position(buffer), max_size); destroy_codec_options(&options); buffer_free(buffer); @@ -424,7 +429,7 @@ static PyObject* _cbson_query_message(PyObject* self, PyObject* args) { PyObject* cluster_time = NULL; unsigned int flags; char* collection_name = NULL; - int collection_name_length; + Py_ssize_t collection_name_length; int begin, cur_size, max_size = 0; int num_to_skip; int num_to_return; @@ -490,8 +495,8 @@ static PyObject* _cbson_query_message(PyObject* self, PyObject* args) { if (!buffer_write_int32(buffer, (int32_t)request_id) || !buffer_write_bytes(buffer, "\x00\x00\x00\x00\xd4\x07\x00\x00", 8) || !buffer_write_int32(buffer, (int32_t)flags) || - !buffer_write_bytes(buffer, collection_name, - collection_name_length + 1) || + !buffer_write_bytes_ssize_t(buffer, collection_name, + collection_name_length + 1) || !buffer_write_int32(buffer, (int32_t)num_to_skip) || !buffer_write_int32(buffer, (int32_t)num_to_return)) { goto fail; @@ -548,7 +553,7 @@ static PyObject* _cbson_query_message(PyObject* self, PyObject* args) { /* objectify buffer */ result = Py_BuildValue("i" BYTES_FORMAT_STRING "i", request_id, buffer_get_buffer(buffer), - buffer_get_position(buffer), + (Py_ssize_t)buffer_get_position(buffer), max_size); fail: @@ -563,7 +568,7 @@ static PyObject* _cbson_get_more_message(PyObject* self, PyObject* args) { /* NOTE just using a random number as the request_id */ int request_id = rand(); char* collection_name = NULL; - int collection_name_length; + Py_ssize_t collection_name_length; int num_to_return; long long cursor_id; buffer_t buffer; @@ -597,9 +602,9 @@ static PyObject* _cbson_get_more_message(PyObject* self, PyObject* args) { "\x00\x00\x00\x00" "\xd5\x07\x00\x00" "\x00\x00\x00\x00", 12) || - !buffer_write_bytes(buffer, - collection_name, - collection_name_length + 1) || + !buffer_write_bytes_ssize_t(buffer, + collection_name, + collection_name_length + 1) || !buffer_write_int32(buffer, (int32_t)num_to_return) || !buffer_write_int64(buffer, (int64_t)cursor_id)) { buffer_free(buffer); @@ -616,7 +621,7 @@ static PyObject* _cbson_get_more_message(PyObject* self, PyObject* args) { /* objectify buffer */ result = Py_BuildValue("i" BYTES_FORMAT_STRING, request_id, buffer_get_buffer(buffer), - buffer_get_position(buffer)); + (Py_ssize_t)buffer_get_position(buffer)); buffer_free(buffer); return result; } @@ -634,7 +639,7 @@ static PyObject* _cbson_op_msg(PyObject* self, PyObject* args) { unsigned int flags; PyObject* command; char* identifier = NULL; - int identifier_length = 0; + Py_ssize_t identifier_length = 0; PyObject* docs; PyObject* doc; unsigned char check_keys = 0; @@ -696,7 +701,7 @@ static PyObject* _cbson_op_msg(PyObject* self, PyObject* args) { /* save space for payload 0 length */ payload_one_length_location = buffer_save_space(buffer, 4); /* C string identifier */ - if (!buffer_write_bytes(buffer, identifier, identifier_length + 1)) { + if (!buffer_write_bytes_ssize_t(buffer, identifier, identifier_length + 1)) { goto encodefail; } iterator = PyObject_GetIter(docs); @@ -730,7 +735,7 @@ static PyObject* _cbson_op_msg(PyObject* self, PyObject* args) { /* objectify buffer */ result = Py_BuildValue("i" BYTES_FORMAT_STRING "ii", request_id, buffer_get_buffer(buffer), - buffer_get_position(buffer), + (Py_ssize_t)buffer_get_position(buffer), total_size, max_doc_size); encodefail: @@ -763,7 +768,7 @@ _set_document_too_large(int size, long max) { static PyObject* _send_insert(PyObject* self, PyObject* ctx, PyObject* gle_args, buffer_t buffer, - char* coll_name, int coll_len, int request_id, int safe, + char* coll_name, Py_ssize_t coll_len, int request_id, int safe, codec_options_t* options, PyObject* to_publish, int compress) { if (safe) { @@ -779,7 +784,7 @@ _send_insert(PyObject* self, PyObject* ctx, "i" BYTES_FORMAT_STRING "iNOi", request_id, buffer_get_buffer(buffer), - buffer_get_position(buffer), + (Py_ssize_t)buffer_get_position(buffer), 0, PyBool_FromLong((long)safe), to_publish, compress); @@ -792,7 +797,7 @@ static PyObject* _cbson_do_batched_insert(PyObject* self, PyObject* args) { int request_id = rand(); int send_safe, flags = 0; int length_location, message_length; - int collection_name_length; + Py_ssize_t collection_name_length; int compress; char* collection_name = NULL; PyObject* docs; @@ -1344,7 +1349,7 @@ _cbson_encode_batched_op_msg(PyObject* self, PyObject* args) { result = Py_BuildValue(BYTES_FORMAT_STRING "O", buffer_get_buffer(buffer), - buffer_get_position(buffer), + (Py_ssize_t)buffer_get_position(buffer), to_publish); fail: destroy_codec_options(&options); @@ -1415,7 +1420,7 @@ _cbson_batched_op_msg(PyObject* self, PyObject* args) { buffer_write_int32_at_position(buffer, 4, (int32_t)request_id); result = Py_BuildValue("i" BYTES_FORMAT_STRING "O", request_id, buffer_get_buffer(buffer), - buffer_get_position(buffer), + (Py_ssize_t)buffer_get_position(buffer), to_publish); fail: destroy_codec_options(&options); @@ -1428,7 +1433,7 @@ fail: static int _batched_write_command( - char* ns, int ns_len, unsigned char op, int check_keys, + char* ns, Py_ssize_t ns_len, unsigned char op, int check_keys, PyObject* command, PyObject* docs, PyObject* ctx, PyObject* to_publish, codec_options_t options, buffer_t buffer, struct module_state *state) { @@ -1476,8 +1481,7 @@ _batched_write_command( if (!buffer_write_bytes(buffer, "\x00\x00\x00\x00", /* flags */ 4) || - !buffer_write_bytes(buffer, - ns, ns_len + 1) || /* namespace */ + !buffer_write_bytes_ssize_t(buffer, ns, ns_len + 1) || /* namespace */ !buffer_write_bytes(buffer, "\x00\x00\x00\x00" /* skip */ "\xFF\xFF\xFF\xFF", /* limit (-1) */ @@ -1634,7 +1638,7 @@ _cbson_encode_batched_write_command(PyObject* self, PyObject* args) { char *ns = NULL; unsigned char op; unsigned char check_keys; - int ns_len; + Py_ssize_t ns_len; PyObject* command; PyObject* docs; PyObject* ctx = NULL; @@ -1677,7 +1681,7 @@ _cbson_encode_batched_write_command(PyObject* self, PyObject* args) { result = Py_BuildValue(BYTES_FORMAT_STRING "O", buffer_get_buffer(buffer), - buffer_get_position(buffer), + (Py_ssize_t)buffer_get_position(buffer), to_publish); fail: PyMem_Free(ns); @@ -1692,7 +1696,7 @@ _cbson_batched_write_command(PyObject* self, PyObject* args) { char *ns = NULL; unsigned char op; unsigned char check_keys; - int ns_len; + Py_ssize_t ns_len; int request_id; int position; PyObject* command; @@ -1752,7 +1756,7 @@ _cbson_batched_write_command(PyObject* self, PyObject* args) { buffer_write_int32_at_position(buffer, 4, (int32_t)request_id); result = Py_BuildValue("i" BYTES_FORMAT_STRING "O", request_id, buffer_get_buffer(buffer), - buffer_get_position(buffer), + (Py_ssize_t)buffer_get_position(buffer), to_publish); fail: PyMem_Free(ns); diff --git a/pymongo/database.py b/pymongo/database.py index c6900fed1..701e55221 100644 --- a/pymongo/database.py +++ b/pymongo/database.py @@ -823,7 +823,7 @@ class Database(common.BaseObject): For example, to list all non-system collections:: - filter = {"name": {"$regex": r"^(?!system\.)"}} + filter = {"name": {"$regex": r"^(?!system\\.)"}} db.list_collection_names(filter=filter) :Parameters: diff --git a/test/test_custom_types.py b/test/test_custom_types.py index b0a190d0f..3d937082f 100644 --- a/test/test_custom_types.py +++ b/test/test_custom_types.py @@ -512,8 +512,8 @@ class TestTypeRegistry(unittest.TestCase): def test_builtin_types_override_fails(self): def run_test(base, attrs): - msg = ("TypeEncoders cannot change how built-in types " - "are encoded \(encoder .* transforms type .*\)") + msg = (r"TypeEncoders cannot change how built-in types " + r"are encoded \(encoder .* transforms type .*\)") for pytype in _BUILT_IN_TYPES: attrs.update({'python_type': pytype, 'transform_python': lambda x: x}) diff --git a/test/test_database.py b/test/test_database.py index 983bcecec..76a549c95 100644 --- a/test/test_database.py +++ b/test/test_database.py @@ -1062,7 +1062,7 @@ class TestDatabaseAggregation(IntegrationTest): @client_context.require_version_max(3, 6, 0, -1) def test_database_aggregation_unsupported(self): - err_msg = "Database.aggregate\(\) is only supported on MongoDB 3.6\+." + err_msg = r"Database.aggregate\(\) is only supported on MongoDB 3.6\+." with self.assertRaisesRegex(ConfigurationError, err_msg): with self.admin.aggregate(self.pipeline) as _: pass diff --git a/test/test_encryption.py b/test/test_encryption.py index 58a392d69..fbd6c7010 100644 --- a/test/test_encryption.py +++ b/test/test_encryption.py @@ -684,8 +684,8 @@ class TestDataKeyDoubleEncryption(EncryptionIntegrationTest): self.assertEqual(aws_encrypted_altname, aws_encrypted) # Explicitly encrypting an auto encrypted field. - msg = ('Cannot encrypt element of type binData because schema ' - 'requires that type is one of: \[ string \]') + msg = (r'Cannot encrypt element of type binData because schema ' + r'requires that type is one of: \[ string \]') with self.assertRaisesRegex(EncryptionError, msg): client_encrypted.db.coll.insert_one( {'encrypted_placeholder': local_encrypted}) diff --git a/test/test_pooling.py b/test/test_pooling.py index f5945a43d..922deecdc 100644 --- a/test/test_pooling.py +++ b/test/test_pooling.py @@ -53,7 +53,7 @@ def gc_collect_until_done(threads, timeout=60): assert (time.time() - start) < timeout, "Threads timed out" for t in running: t.join(0.1) - if not t.isAlive(): + if not t.is_alive(): running.remove(t) gc.collect() diff --git a/test/utils.py b/test/utils.py index e308a53c6..d26b96138 100644 --- a/test/utils.py +++ b/test/utils.py @@ -579,7 +579,7 @@ def joinall(threads): """Join threads with a 5-minute timeout, assert joins succeeded""" for t in threads: t.join(300) - assert not t.isAlive(), "Thread %s hung" % t + assert not t.is_alive(), "Thread %s hung" % t def connected(client): @@ -708,7 +708,7 @@ def run_threads(collection, target): for t in threads: t.join(60) - assert not t.isAlive() + assert not t.is_alive() @contextlib.contextmanager