diff --git a/doc/api/gridfs/grid_file.rst b/doc/api/gridfs/grid_file.rst index b264e0135..1fb1181e2 100644 --- a/doc/api/gridfs/grid_file.rst +++ b/doc/api/gridfs/grid_file.rst @@ -13,6 +13,7 @@ :members: .. autoattribute:: _id + .. automethod:: __iter__ .. autoclass:: GridFile :members: diff --git a/gridfs/grid_file.py b/gridfs/grid_file.py index 45c1d77b5..2936c3f6b 100644 --- a/gridfs/grid_file.py +++ b/gridfs/grid_file.py @@ -15,6 +15,7 @@ """Tools for representing files stored in GridFS.""" import datetime +import math import os try: from cStringIO import StringIO @@ -351,7 +352,7 @@ class GridOut(object): chunk = self.__chunks.find_one({"files_id": self._id, "n": chunk_number}) if not chunk: - raise CorruptGridFile("no chunk for n = " + chunk_number) + raise CorruptGridFile("no chunk #%d" % chunk_number) if not data: data += chunk["data"][self.__position % self.chunk_size:] @@ -397,6 +398,37 @@ class GridOut(object): self.__position = new_pos self.__buffer = "" + def __iter__(self): + """Return an iterator over all of this file's data. + + The iterator will return chunk-sized instances of + :class:`str`. This can be useful when serving files using a + webserver that handles such an iterator efficiently. + """ + return GridOutIterator(self, self.__chunks) + + +class GridOutIterator(object): + def __init__(self, grid_out, chunks): + self.__id = grid_out._id + self.__chunks = chunks + self.__current_chunk = 0 + self.__max_chunk = math.ceil(float(grid_out.length) / + grid_out.chunk_size) + + def __iter__(self): + return self + + def next(self): + if self.__current_chunk >= self.__max_chunk: + raise StopIteration + chunk = self.__chunks.find_one({"files_id": self.__id, + "n": self.__current_chunk}) + if not chunk: + raise CorruptGridFile("no chunk #%d" % self.__current_chunk) + self.__current_chunk += 1 + return str(chunk["data"]) + class GridFile(object): """No longer supported. diff --git a/test/test_grid_file.py b/test/test_grid_file.py index 46ebf1d27..5c80bf3ca 100644 --- a/test/test_grid_file.py +++ b/test/test_grid_file.py @@ -350,6 +350,27 @@ class TestGridFile(unittest.TestCase): self.assertEqual("d", g.read(2)) self.assertEqual("", g.read(2)) + def test_iterator(self): + f = GridIn(self.db.fs) + f.close() + g = GridOut(self.db.fs, f._id) + self.assertEqual([], list(g)) + + f = GridIn(self.db.fs) + f.write("hello world") + f.close() + g = GridOut(self.db.fs, f._id) + self.assertEqual(["hello world"], list(g)) + self.assertEqual("hello", g.read(5)) + self.assertEqual(["hello world"], list(g)) + self.assertEqual(" worl", g.read(5)) + + f = GridIn(self.db.fs, chunk_size=2) + f.write("hello world") + f.close() + g = GridOut(self.db.fs, f._id) + self.assertEqual(["he", "ll", "o ", "wo", "rl", "d"], list(g)) + def test_read_chunks_unaligned_buffer_size(self): in_data = "This is a text that doesn't quite fit in a single 16-byte chunk." f = GridIn(self.db.fs, chunkSize=16)