Better error message when C extension raises InvalidStringData, PYTHON-533
This commit is contained in:
parent
43cda4470f
commit
9103755d06
@ -655,8 +655,24 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer, int type_by
|
||||
if (status == NOT_UTF_8) {
|
||||
PyObject* InvalidStringData = _error("InvalidStringData");
|
||||
if (InvalidStringData) {
|
||||
PyErr_SetString(InvalidStringData,
|
||||
"strings in documents must be valid UTF-8");
|
||||
PyObject* repr = PyObject_Repr(value);
|
||||
char* repr_as_cstr = repr ? PyString_AsString(repr) : NULL;
|
||||
if (repr_as_cstr) {
|
||||
PyObject *message = PyString_FromFormat(
|
||||
"strings in documents must be valid UTF-8: %s",
|
||||
repr_as_cstr);
|
||||
|
||||
if (message) {
|
||||
PyErr_SetObject(InvalidStringData, message);
|
||||
Py_DECREF(message);
|
||||
}
|
||||
} else {
|
||||
/* repr(value) failed, use a generic message. */
|
||||
PyErr_SetString(
|
||||
InvalidStringData,
|
||||
"strings in documents must be valid UTF-8");
|
||||
}
|
||||
Py_XDECREF(repr);
|
||||
Py_DECREF(InvalidStringData);
|
||||
}
|
||||
return 0;
|
||||
|
||||
@ -343,18 +343,26 @@ class TestBSON(unittest.TestCase):
|
||||
w = {u"aéあ": u"aéあ"}
|
||||
self.assertEqual(w, BSON.encode(w).decode())
|
||||
|
||||
if not PY3:
|
||||
# In python3 this would be stored as BSON binary
|
||||
# subtype 0 and wouldn't raise an exception.
|
||||
y = {"hello": u"aé".encode("iso-8859-1")}
|
||||
self.assertRaises(InvalidStringData, BSON.encode, y)
|
||||
iso8859_bytes = u"aé".encode("iso-8859-1")
|
||||
y = {"hello": iso8859_bytes}
|
||||
if PY3:
|
||||
# Stored as BSON binary subtype 0.
|
||||
out = BSON.encode(y).decode()
|
||||
self.assertTrue(isinstance(out['hello'], bytes))
|
||||
self.assertEqual(out['hello'], iso8859_bytes)
|
||||
else:
|
||||
# Python 2.
|
||||
try:
|
||||
BSON.encode(y)
|
||||
except InvalidStringData, e:
|
||||
self.assertTrue(repr(iso8859_bytes) in str(e))
|
||||
|
||||
# The next two tests only make sense in python 2.x since
|
||||
# you can't use `bytes` type as document keys in python 3.x.
|
||||
x = {u"aéあ".encode("utf-8"): u"aéあ".encode("utf-8")}
|
||||
self.assertEqual(w, BSON.encode(x).decode())
|
||||
|
||||
z = {u"aé".encode("iso-8859-1"): "hello"}
|
||||
z = {iso8859_bytes: "hello"}
|
||||
self.assertRaises(InvalidStringData, BSON.encode, z)
|
||||
|
||||
def test_null_character(self):
|
||||
|
||||
Loading…
Reference in New Issue
Block a user