PYTHON-2061 bson: check for negative entry size in decode_file_iter (#429)

Raise InvalidBSON instead of ValueError when decode_file_iter reads an invalid
BSON object size.
This commit is contained in:
paul fisher 2019-11-22 17:57:00 -05:00 committed by Shane Harvey
parent b8ce14dfd2
commit 6c4e1c9371
3 changed files with 39 additions and 36 deletions

View File

@ -1166,7 +1166,7 @@ def decode_file_iter(file_obj, codec_options=DEFAULT_CODEC_OPTIONS):
elif len(size_data) != 4:
raise InvalidBSON("cut off in middle of objsize")
obj_size = _UNPACK_INT_FROM(size_data, 0)[0] - 4
elements = size_data + file_obj.read(obj_size)
elements = size_data + file_obj.read(max(0, obj_size))
yield _bson_to_dict(elements, codec_options)

View File

@ -86,3 +86,4 @@ The following is a list of people who have contributed to
- Shrey Batra(shreybatra)
- Felipe Rodrigues(fbidu)
- Terence Honles (terencehonles)
- Paul Fisher (thetorpedodog)

View File

@ -18,8 +18,10 @@
import collections
import datetime
import os
import re
import sys
import tempfile
import uuid
sys.path[0:0] = [""]
@ -335,41 +337,41 @@ class TestBSON(unittest.TestCase):
self.assertRaises(InvalidBSON, list,
decode_file_iter(StringIO(b"\x1B")))
# An object size that's too small to even include the object size,
# but is correctly encoded, along with a correct EOO (and no data).
data = b"\x01\x00\x00\x00\x00"
self.assertRaises(InvalidBSON, decode_all, data)
self.assertRaises(InvalidBSON, list, decode_iter(data))
self.assertRaises(InvalidBSON, list, decode_file_iter(StringIO(data)))
# One object, but with object size listed smaller than it is in the
# data.
data = (b"\x1A\x00\x00\x00\x0E\x74\x65\x73\x74"
b"\x00\x0C\x00\x00\x00\x68\x65\x6C\x6C"
b"\x6f\x20\x77\x6F\x72\x6C\x64\x00\x00"
b"\x05\x00\x00\x00\x00")
self.assertRaises(InvalidBSON, decode_all, data)
self.assertRaises(InvalidBSON, list, decode_iter(data))
self.assertRaises(InvalidBSON, list, decode_file_iter(StringIO(data)))
# One object, missing the EOO at the end.
data = (b"\x1B\x00\x00\x00\x0E\x74\x65\x73\x74"
b"\x00\x0C\x00\x00\x00\x68\x65\x6C\x6C"
b"\x6f\x20\x77\x6F\x72\x6C\x64\x00\x00"
b"\x05\x00\x00\x00")
self.assertRaises(InvalidBSON, decode_all, data)
self.assertRaises(InvalidBSON, list, decode_iter(data))
self.assertRaises(InvalidBSON, list, decode_file_iter(StringIO(data)))
# One object, sized correctly, with a spot for an EOO, but the EOO
# isn't 0x00.
data = (b"\x1B\x00\x00\x00\x0E\x74\x65\x73\x74"
b"\x00\x0C\x00\x00\x00\x68\x65\x6C\x6C"
b"\x6f\x20\x77\x6F\x72\x6C\x64\x00\x00"
b"\x05\x00\x00\x00\xFF")
self.assertRaises(InvalidBSON, decode_all, data)
self.assertRaises(InvalidBSON, list, decode_iter(data))
self.assertRaises(InvalidBSON, list, decode_file_iter(StringIO(data)))
bad_bsons = [
# An object size that's too small to even include the object size,
# but is correctly encoded, along with a correct EOO (and no data).
b"\x01\x00\x00\x00\x00",
# One object, but with object size listed smaller than it is in the
# data.
(b"\x1A\x00\x00\x00\x0E\x74\x65\x73\x74"
b"\x00\x0C\x00\x00\x00\x68\x65\x6C\x6C"
b"\x6f\x20\x77\x6F\x72\x6C\x64\x00\x00"
b"\x05\x00\x00\x00\x00"),
# One object, missing the EOO at the end.
(b"\x1B\x00\x00\x00\x0E\x74\x65\x73\x74"
b"\x00\x0C\x00\x00\x00\x68\x65\x6C\x6C"
b"\x6f\x20\x77\x6F\x72\x6C\x64\x00\x00"
b"\x05\x00\x00\x00"),
# One object, sized correctly, with a spot for an EOO, but the EOO
# isn't 0x00.
(b"\x1B\x00\x00\x00\x0E\x74\x65\x73\x74"
b"\x00\x0C\x00\x00\x00\x68\x65\x6C\x6C"
b"\x6f\x20\x77\x6F\x72\x6C\x64\x00\x00"
b"\x05\x00\x00\x00\xFF"),
]
for i, data in enumerate(bad_bsons):
msg = "bad_bson[{}]".format(i)
with self.assertRaises(InvalidBSON, msg=msg):
decode_all(data)
with self.assertRaises(InvalidBSON, msg=msg):
list(decode_iter(data))
with self.assertRaises(InvalidBSON, msg=msg):
list(decode_file_iter(StringIO(data)))
with tempfile.TemporaryFile() as scratch:
scratch.write(data)
scratch.seek(0, os.SEEK_SET)
with self.assertRaises(InvalidBSON, msg=msg):
list(decode_file_iter(scratch))
def test_data_timestamp(self):
self.assertEqual({"test": Timestamp(4, 20)},