make sure we fail fast on when we get a string that's not utf-8

This commit is contained in:
Mike Dirolf 2009-02-17 10:53:03 -05:00
parent 413851ddc0
commit fc08abcbfa
3 changed files with 17 additions and 8 deletions

View File

@ -104,7 +104,14 @@ static PyObject* _cbson_element_to_bson(PyObject* self, PyObject* args) {
* here we check for type equivalence, not isinstance in some
* places. */
if (PyString_CheckExact(value)) {
return build_string(0x02, value, name);
// we have to do the encoding so we can fail fast if they give us non utf-8
PyObject* encoded = PyString_AsEncodedObject(value, "utf-8", "strict");
if (!encoded) {
return NULL;
}
PyObject* result = build_string(0x02, encoded, name);
Py_DECREF(encoded);
return result;
} else if (PyUnicode_CheckExact(value)) {
PyObject* encoded = PyUnicode_AsUTF8String(value);
if (!encoded) {

View File

@ -52,10 +52,8 @@ def _get_c_string(data):
return (unicode(data[:end], "utf-8"), data[end + 1:])
def _make_c_string(string, encode=True):
if encode and isinstance(string, unicode):
return string.encode("utf-8") + "\x00"
return string + "\x00"
def _make_c_string(string):
return string.encode("utf-8") + "\x00"
def _validate_number(data):
assert len(data) >= 8
@ -298,11 +296,11 @@ def _element_to_bson(key, value):
value = struct.pack("<i", len(value)) + value
return "\x05" + name + struct.pack("<i", len(value)) + chr(subtype) + value
if isinstance(value, Code):
cstring = _make_c_string(value, False)
cstring = _make_c_string(value)
length = struct.pack("<i", len(cstring))
return "\x0D" + name + length + cstring
if isinstance(value, str):
cstring = _make_c_string(value, False)
cstring = _make_c_string(value)
length = struct.pack("<i", len(cstring))
return "\x02" + name + length + cstring
if isinstance(value, unicode):
@ -342,7 +340,7 @@ def _element_to_bson(key, value):
flags += "u"
if value.flags & re.VERBOSE:
flags += "x"
return "\x0B" + name + _make_c_string(pattern) + _make_c_string(flags, False)
return "\x0B" + name + _make_c_string(pattern) + _make_c_string(flags)
if isinstance(value, DBRef):
ns = _make_c_string(value.collection())
return "\x0C" + name + struct.pack("<i", len(ns)) + ns + _shuffle_oid(str(value.id()))

View File

@ -156,5 +156,9 @@ class TestBSON(unittest.TestCase):
f.write(bson)
f.close()
def test_bad_encode(self):
self.assertRaises(UnicodeDecodeError, BSON.from_dict,
{"lalala": '\xf4\xe0\xf0\xe1\xc0 Color Touch'})
if __name__ == "__main__":
unittest.main()