make GridOut an iterator returning chunk sized strings

This commit is contained in:
Mike Dirolf 2010-04-02 10:37:37 -04:00
parent 946bedc8e7
commit bec638f6f5
3 changed files with 55 additions and 1 deletions

View File

@ -13,6 +13,7 @@
:members:
.. autoattribute:: _id
.. automethod:: __iter__
.. autoclass:: GridFile
:members:

View File

@ -15,6 +15,7 @@
"""Tools for representing files stored in GridFS."""
import datetime
import math
import os
try:
from cStringIO import StringIO
@ -351,7 +352,7 @@ class GridOut(object):
chunk = self.__chunks.find_one({"files_id": self._id,
"n": chunk_number})
if not chunk:
raise CorruptGridFile("no chunk for n = " + chunk_number)
raise CorruptGridFile("no chunk #%d" % chunk_number)
if not data:
data += chunk["data"][self.__position % self.chunk_size:]
@ -397,6 +398,37 @@ class GridOut(object):
self.__position = new_pos
self.__buffer = ""
def __iter__(self):
"""Return an iterator over all of this file's data.
The iterator will return chunk-sized instances of
:class:`str`. This can be useful when serving files using a
webserver that handles such an iterator efficiently.
"""
return GridOutIterator(self, self.__chunks)
class GridOutIterator(object):
def __init__(self, grid_out, chunks):
self.__id = grid_out._id
self.__chunks = chunks
self.__current_chunk = 0
self.__max_chunk = math.ceil(float(grid_out.length) /
grid_out.chunk_size)
def __iter__(self):
return self
def next(self):
if self.__current_chunk >= self.__max_chunk:
raise StopIteration
chunk = self.__chunks.find_one({"files_id": self.__id,
"n": self.__current_chunk})
if not chunk:
raise CorruptGridFile("no chunk #%d" % self.__current_chunk)
self.__current_chunk += 1
return str(chunk["data"])
class GridFile(object):
"""No longer supported.

View File

@ -350,6 +350,27 @@ class TestGridFile(unittest.TestCase):
self.assertEqual("d", g.read(2))
self.assertEqual("", g.read(2))
def test_iterator(self):
f = GridIn(self.db.fs)
f.close()
g = GridOut(self.db.fs, f._id)
self.assertEqual([], list(g))
f = GridIn(self.db.fs)
f.write("hello world")
f.close()
g = GridOut(self.db.fs, f._id)
self.assertEqual(["hello world"], list(g))
self.assertEqual("hello", g.read(5))
self.assertEqual(["hello world"], list(g))
self.assertEqual(" worl", g.read(5))
f = GridIn(self.db.fs, chunk_size=2)
f.write("hello world")
f.close()
g = GridOut(self.db.fs, f._id)
self.assertEqual(["he", "ll", "o ", "wo", "rl", "d"], list(g))
def test_read_chunks_unaligned_buffer_size(self):
in_data = "This is a text that doesn't quite fit in a single 16-byte chunk."
f = GridIn(self.db.fs, chunkSize=16)