From 9103755d06d7ef57c2b6c62b3bc394670ade43ee Mon Sep 17 00:00:00 2001 From: "A. Jesse Jiryu Davis" Date: Tue, 4 Jun 2013 12:13:24 -0400 Subject: [PATCH] Better error message when C extension raises InvalidStringData, PYTHON-533 --- bson/_cbsonmodule.c | 20 ++++++++++++++++++-- test/test_bson.py | 20 ++++++++++++++------ 2 files changed, 32 insertions(+), 8 deletions(-) diff --git a/bson/_cbsonmodule.c b/bson/_cbsonmodule.c index 70af918f5..3c52d4dd3 100644 --- a/bson/_cbsonmodule.c +++ b/bson/_cbsonmodule.c @@ -655,8 +655,24 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer, int type_by if (status == NOT_UTF_8) { PyObject* InvalidStringData = _error("InvalidStringData"); if (InvalidStringData) { - PyErr_SetString(InvalidStringData, - "strings in documents must be valid UTF-8"); + PyObject* repr = PyObject_Repr(value); + char* repr_as_cstr = repr ? PyString_AsString(repr) : NULL; + if (repr_as_cstr) { + PyObject *message = PyString_FromFormat( + "strings in documents must be valid UTF-8: %s", + repr_as_cstr); + + if (message) { + PyErr_SetObject(InvalidStringData, message); + Py_DECREF(message); + } + } else { + /* repr(value) failed, use a generic message. */ + PyErr_SetString( + InvalidStringData, + "strings in documents must be valid UTF-8"); + } + Py_XDECREF(repr); Py_DECREF(InvalidStringData); } return 0; diff --git a/test/test_bson.py b/test/test_bson.py index 334d6ae3f..a8448aadc 100644 --- a/test/test_bson.py +++ b/test/test_bson.py @@ -343,18 +343,26 @@ class TestBSON(unittest.TestCase): w = {u"aéあ": u"aéあ"} self.assertEqual(w, BSON.encode(w).decode()) - if not PY3: - # In python3 this would be stored as BSON binary - # subtype 0 and wouldn't raise an exception. - y = {"hello": u"aé".encode("iso-8859-1")} - self.assertRaises(InvalidStringData, BSON.encode, y) + iso8859_bytes = u"aé".encode("iso-8859-1") + y = {"hello": iso8859_bytes} + if PY3: + # Stored as BSON binary subtype 0. + out = BSON.encode(y).decode() + self.assertTrue(isinstance(out['hello'], bytes)) + self.assertEqual(out['hello'], iso8859_bytes) + else: + # Python 2. + try: + BSON.encode(y) + except InvalidStringData, e: + self.assertTrue(repr(iso8859_bytes) in str(e)) # The next two tests only make sense in python 2.x since # you can't use `bytes` type as document keys in python 3.x. x = {u"aéあ".encode("utf-8"): u"aéあ".encode("utf-8")} self.assertEqual(w, BSON.encode(x).decode()) - z = {u"aé".encode("iso-8859-1"): "hello"} + z = {iso8859_bytes: "hello"} self.assertRaises(InvalidStringData, BSON.encode, z) def test_null_character(self):