Better error message when C extension raises InvalidStringData, PYTHON-533

This commit is contained in:
A. Jesse Jiryu Davis 2013-06-04 12:13:24 -04:00
parent 43cda4470f
commit 9103755d06
2 changed files with 32 additions and 8 deletions

View File

@ -655,8 +655,24 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer, int type_by
if (status == NOT_UTF_8) {
PyObject* InvalidStringData = _error("InvalidStringData");
if (InvalidStringData) {
PyErr_SetString(InvalidStringData,
"strings in documents must be valid UTF-8");
PyObject* repr = PyObject_Repr(value);
char* repr_as_cstr = repr ? PyString_AsString(repr) : NULL;
if (repr_as_cstr) {
PyObject *message = PyString_FromFormat(
"strings in documents must be valid UTF-8: %s",
repr_as_cstr);
if (message) {
PyErr_SetObject(InvalidStringData, message);
Py_DECREF(message);
}
} else {
/* repr(value) failed, use a generic message. */
PyErr_SetString(
InvalidStringData,
"strings in documents must be valid UTF-8");
}
Py_XDECREF(repr);
Py_DECREF(InvalidStringData);
}
return 0;

View File

@ -343,18 +343,26 @@ class TestBSON(unittest.TestCase):
w = {u"aéあ": u"aéあ"}
self.assertEqual(w, BSON.encode(w).decode())
if not PY3:
# In python3 this would be stored as BSON binary
# subtype 0 and wouldn't raise an exception.
y = {"hello": u"".encode("iso-8859-1")}
self.assertRaises(InvalidStringData, BSON.encode, y)
iso8859_bytes = u"".encode("iso-8859-1")
y = {"hello": iso8859_bytes}
if PY3:
# Stored as BSON binary subtype 0.
out = BSON.encode(y).decode()
self.assertTrue(isinstance(out['hello'], bytes))
self.assertEqual(out['hello'], iso8859_bytes)
else:
# Python 2.
try:
BSON.encode(y)
except InvalidStringData, e:
self.assertTrue(repr(iso8859_bytes) in str(e))
# The next two tests only make sense in python 2.x since
# you can't use `bytes` type as document keys in python 3.x.
x = {u"aéあ".encode("utf-8"): u"aéあ".encode("utf-8")}
self.assertEqual(w, BSON.encode(x).decode())
z = {u"".encode("iso-8859-1"): "hello"}
z = {iso8859_bytes: "hello"}
self.assertRaises(InvalidStringData, BSON.encode, z)
def test_null_character(self):