minor: clean up and docs for gridfs

This commit is contained in:
Mike Dirolf 2010-01-26 16:40:02 -05:00
parent d80de9cb13
commit 580c8f256a
3 changed files with 151 additions and 101 deletions

View File

@ -14,21 +14,21 @@
"""GridFS is a specification for storing large objects in Mongo.
The `gridfs` package is an implementation of GridFS on top of `pymongo`,
exposing a file-like interface.
The :mod:`gridfs` package is an implementation of GridFS on top of
:mod:`pymongo`, exposing a file-like interface.
"""
from grid_file import GridFile
from pymongo.database import Database
class GridFS(object):
"""An instance of GridFS on top of a single `pymongo.database.Database`.
"""An instance of GridFS on top of a single Database.
"""
def __init__(self, database):
"""Create a new instance of GridFS.
"""Create a new instance of :class:`GridFS`.
Raises TypeError if database is not an instance of
`pymongo.database.Database`.
Raises :class:`TypeError` if `database` is not an instance of
:class:`~pymongo.database.Database`.
:Parameters:
- `database`: database to use
@ -39,31 +39,41 @@ class GridFS(object):
self.__database = database
def open(self, filename, mode="r", collection="fs"):
"""Open a GridFile for reading or writing.
"""Open a :class:`~gridfs.grid_file.GridFile` for reading or
writing.
Shorthand method for creating / opening a GridFile from a filename. mode
must be a mode supported by `gridfs.grid_file.GridFile`.
Shorthand method for creating / opening a
:class:`~gridfs.grid_file.GridFile` with name
`filename`. `mode` must be a mode supported by
:class:`~gridfs.grid_file.GridFile`.
Only a single opened GridFile instance may exist for a file in gridfs
at any time. Care must be taken to close GridFile instances when done
using them. GridFiles support the context manager protocol (the "with"
statement).
Only a single opened :class:`~gridfs.grid_file.GridFile`
instance may exist for a file in gridfs at any time. Care must
be taken to close :class:`~gridfs.grid_file.GridFile`
instances when done using
them. :class:`~gridfs.grid_file.GridFile` instances support
the context manager protocol (the "with" statement).
:Parameters:
- `filename`: name of the GridFile to open
- `filename`: name of the :class:`~gridfs.grid_file.GridFile`
to open
- `mode` (optional): mode to open the file in
- `collection` (optional): root collection to use for this file
"""
return GridFile({"filename": filename}, self.__database, mode, collection)
def remove(self, filename_or_spec, collection="fs"):
"""Remove one or more GridFile(s).
"""Remove one or more :class:`~gridfs.grid_file.GridFile`
instances.
Can remove by filename, or by an entire file spec (see
`gridfs.grid_file.GridFile` for documentation on valid fields. Delete
all GridFiles that match filename_or_spec. Raises TypeError if
filename_or_spec is not an instance of (str, unicode, dict, SON) or
collection is not an instance of (str, unicode).
:meth:`~gridfs.grid_file.GridFile` for documentation on valid
fields. Delete all :class:`~gridfs.grid_file.GridFile`
instances that match `filename_or_spec`. Raises
:class:`TypeError` if `filename_or_spec` is not an instance of
(:class:`basestring`, :class:`dict`,
:class:`~pymongo.son.SON`) or collection is not an instance of
:class:`basestring`.
:Parameters:
- `filename_or_spec`: identifier of file(s) to remove
@ -74,9 +84,9 @@ class GridFS(object):
spec = {"filename": filename_or_spec}
if not isinstance(spec, dict):
raise TypeError("filename_or_spec must be an "
"instance of (str, dict, SON)")
"instance of (basestring, dict, SON)")
if not isinstance(collection, basestring):
raise TypeError("collection must be an instance of (str, unicode)")
raise TypeError("collection must be an instance of basestring")
# convert to _id's so we can uniquely create GridFile instances
ids = []
@ -91,15 +101,17 @@ class GridFS(object):
self.__database[collection].files.remove(spec)
def list(self, collection="fs"):
"""List the names of all GridFiles stored in this instance of GridFS.
"""List the names of all :class:`~gridfs.grid_file.GridFile`
instances stored in this instance of :class:`GridFS`.
Raises TypeError if collection is not an instance of (str, unicode).
Raises :class:`TypeError` if collection is not an instance of
:class:`basestring`.
:Parameters:
- `collection` (optional): root collection to list files from
"""
if not isinstance(collection, basestring):
raise TypeError("collection must be an instance of (str, unicode)")
raise TypeError("collection must be an instance of basestring")
names = []
for grid_file in self.__database[collection].files.find():
names.append(grid_file["filename"])

View File

@ -12,8 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""Exceptions raised by the `gridfs` package"""
"""Exceptions raised by the :mod:`gridfs` package"""
class CorruptGridFile(Exception):
"""Raised when a GridFS "file" is malformed.
"""Raised when a :class:`~gridfs.grid_file.GridFile` instance is
malformed.
"""

View File

@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""File-like object used for reading from and writing to GridFS"""
"""Tools for representing files stored in GridFS."""
import datetime
import math
@ -28,7 +28,7 @@ from pymongo.database import Database
from pymongo.objectid import ObjectId
from pymongo.dbref import DBRef
from pymongo.binary import Binary
from errors import CorruptGridFile
from gridfs.errors import CorruptGridFile
from pymongo import ASCENDING
try:
@ -56,52 +56,65 @@ class GridFile(object):
def __init__(self, file_spec, database, mode="r", collection="fs"):
"""Open a "file" in GridFS.
Application developers should generally not need to instantiate this
class directly - instead see the `gridfs.GridFS.open` method.
Application developers should generally not need to
instantiate this class directly - instead see the
:meth:`~gridfs.GridFS.open` method.
Only a single opened GridFile instance may exist for a file in gridfs
at any time. Care must be taken to close GridFile instances when done
using them. GridFiles support the context manager protocol (the "with"
statement).
Only a single opened :class:`GridFile` instance may exist for
a file in gridfs at any time. Care must be taken to close
:class:`GridFile` instances when done using
them. :class:`GridFile` instances support the context manager
protocol (the "with" statement).
Raises TypeError if file_spec is not an instance of dict, database is
not an instance of `pymongo.database.Database`, or collection is not an
instance of (str, unicode).
Raises :class:`TypeError` if `file_spec` is not an instance of
:class:`dict`, `database` is not an instance of
:class:`~pymongo.database.Database`, or `collection` is not an
instance of :class:`basestring`.
The file_spec argument must be a SON query specifier for the file to
open. The *first* file matching the specifier will be opened. If no such
files exist, a new file is created using the metadata in file_spec.
The valid fields in a file_spec are as follows:
The `file_spec` argument must be a query specifying the file
to open. The *first* file matching the query will be
opened. If no such files exist, a new file is created using
the metadata in `file_spec`. The valid fields in a
`file_spec` are as follows:
- "_id": unique ID for this file
* default: `pymongo.objectid.ObjectId()`
- "filename": human name for the file
- "contentType": valid mime-type for the file
- "length": size of the file, in bytes
* only used for querying, automatically set for inserts
- "chunkSize": size of each of the chunks, in bytes
* default: 256 kb
- "uploadDate": date when the object was first stored
* only used for querying, automatically set for inserts
- "aliases": array of alias strings
- "metadata": a SON document containing arbitrary data
- ``"_id"``: unique ID for this file (default:
:class:`~pymongo.objectid.ObjectId`)
- ``"filename"``: human name for the file
- ``"contentType"``: valid mime-type for the file
- ``"length"``: size of the file, in bytes
.. note:: only used for querying, automatically set for inserts
- ``"chunkSize"``: size of each of the chunks, in bytes
(default: 256 kb)
- ``"uploadDate"``: date when the object was first stored
.. note:: only used for querying, automatically set for inserts
- ``"aliases"``: array of alias strings
- ``"metadata"``: document containing arbitrary metadata
:Parameters:
- `file_spec`: query specifier as described above
- `database`: the database to store/retrieve this file in
- `mode` (optional): the mode to open this file with, one of
("r", "w")
- `collection` (optional): the collection in which to store/retrieve
this file
(``"r"``, ``"w"``)
- `collection` (optional): the collection in which to
store/retrieve this file
"""
if not isinstance(file_spec, dict):
raise TypeError("file_spec must be an instance of (dict, SON)")
if not isinstance(database, Database):
raise TypeError("database must be an instance of database")
if not isinstance(collection, basestring):
raise TypeError("collection must be an instance of (str, unicode)")
raise TypeError("collection must be an instance of basestring")
if not isinstance(mode, basestring):
raise TypeError("mode must be an instance of (str, unicode)")
raise TypeError("mode must be an instance of basestring")
if mode not in ("r", "w"):
raise ValueError("mode must be one of ('r', 'w')")
@ -152,41 +165,56 @@ class GridFile(object):
self.__collection.chunks.remove({"files_id": self.__id})
def closed(self):
"""Is this :class:`GridFile` closed?
"""
return self.__closed
closed = property(closed)
def mode(self):
"""Mode this :class:`GridFile` was opened with.
"""
return self.__mode
mode = property(mode)
def __create_property(field_name, read_only=False):
def __create_property(field_name, docstring, read_only=False):
def getter(self):
return self.__collection.files.find_one({"_id": self.__id}).get(field_name, None)
def setter(self, value):
grid_file = self.__collection.files.find_one({"_id": self.__id})
grid_file[field_name] = value
self.__collection.files.save(grid_file)
if not read_only:
return property(getter, setter)
return property(getter)
name = __create_property("filename", True)
content_type = __create_property("contentType")
length = __create_property("length", True)
chunk_size = __create_property("chunkSize", True)
upload_date = __create_property("uploadDate", True)
aliases = __create_property("aliases")
metadata = __create_property("metadata")
md5 = __create_property("md5", True)
if not read_only:
return property(getter, setter, doc=docstring)
return property(getter, doc=docstring)
name = __create_property("filename", "Name of this :class:`GridFile`.",
True)
content_type = __create_property("contentType", "Mime-type for "
"this :class:`GridFile`.")
length = __create_property("length", "Length (in bytes) of this "
":class:`GridFile`.", True)
chunk_size = __create_property("chunkSize", "Chunk size for this "
":class:`GridFile`.", True)
upload_date = __create_property("uploadDate", "Date that this "
":class:`GridFile` was first uploaded.",
True)
aliases = __create_property("aliases", "List of aliases for this "
":class:`GridFile`.")
metadata = __create_property("metadata", "Metadata attached to this "
":class:`GridFile`.")
md5 = __create_property("md5", "MD5 of the contents of this "
":class:`GridFile` (generated on the server).",
True)
def rename(self, filename):
"""Rename this GridFile.
"""Rename this :class:`GridFile`.
Due to buffering, the rename might not actually occur until `flush()` or
`close()` is called.
Due to buffering, the rename might not actually occur until
:meth:`flush` or :meth:`close` is called.
:Parameters:
- `filename`: the new name for this GridFile
- `filename`: the new name for this :class:`GridFile`
"""
grid_file = self.__collection.files.find_one({"_id": self.__id})
grid_file["filename"] = filename
@ -218,7 +246,7 @@ class GridFile(object):
self.__write_buffer = StringIO()
def flush(self):
"""Flush the GridFile to the database.
"""Flush the :class:`GridFile` to the database.
"""
self.__assert_open()
if self.mode != "w":
@ -235,10 +263,10 @@ class GridFile(object):
self.__collection.files.save(grid_file)
def close(self):
"""Close the GridFile.
"""Close the :class:`GridFile`.
A closed GridFile cannot be read or written any more. Calling `close()`
more than once is allowed.
A closed :class:`GridFile` cannot be read or written any
more. Calling :meth:`close` more than once is allowed.
"""
if not self.__closed:
self.flush()
@ -257,11 +285,13 @@ class GridFile(object):
raise ValueError("operation cannot be performed on a closed GridFile")
def read(self, size=-1):
"""Read at most size bytes from the file (less if there isn't enough
data).
"""Read at most `size` bytes from the file (less if there
isn't enough data).
The bytes are returned as a string object. If size is negative or omitted
all data is read. Raises ValueError if this GridFile is already closed.
The bytes are returned as an instance of :class:`str`. If
`size` is negative or omitted all data is read. Raises
:class:`ValueError` if this :class:`GridFile` is already
closed.
:Parameters:
- `size` (optional): the number of bytes to read
@ -297,47 +327,54 @@ class GridFile(object):
# TODO should support writing unicode to a file. this means that files will
# need to have an encoding attribute.
def write(self, str):
"""Write a string to the GridFile. There is no return value.
def write(self, data):
"""Write a string to the :class:`GridFile`. There is no return
value.
Due to buffering, the string may not actually show up in the database
until the `flush()` or `close()` method is called. Raises ValueError if
this GridFile is already closed. Raises TypeErrer if str is not an
instance of str.
Due to buffering, the string may not actually show up in the
database until the :meth:`flush` or :meth:`close` method is
called. Raises :class:`ValueError` if this :class:`GridFile`
is already closed. Raises :class:`TypeErrer` if `data` is not
an instance of :class:`str`.
:Parameters:
- `str`: string of bytes to be written to the file
- `data`: string of bytes to be written to the file
"""
self.__assert_open("w")
if not isinstance(str, basestring):
if not isinstance(data, str):
raise TypeError("can only write strings")
while str:
while data:
space = self.__chunk_size - self.__write_buffer.tell()
if len(str) <= space:
self.__write_buffer.write(str)
if len(data) <= space:
self.__write_buffer.write(data)
break
else:
self.__write_buffer.write(str[:space])
self.__write_buffer.write(data[:space])
self.__flush_write_buffer()
str = str[space:]
data = data[space:]
def tell(self):
"""Return the GridFile's current position (read-mode files only).
"""Return the current position of this :class:`GridFile`
(read-mode files only).
"""
self.__assert_open("r")
return self.__position
def seek(self, pos, whence=_SEEK_SET):
"""Set the current position of the GridFile (read-mode files only).
"""Set the current position of this :class:`GridFile`
(read-mode files only).
:Parameters:
- `pos`: the position (or offset if using relative positioning) to seek to
- `whence` (optional): where to seek from. os.SEEK_SET (0) for absolute
file positioning, os.SEEK_CUR (1) to seek relative to the current
position, os.SEEK_END (2) to seek relative to the file's end.
- `pos`: the position (or offset if using relative
positioning) to seek to
- `whence` (optional): where to seek
from. :attr:`os.SEEK_SET` (``0``) for absolute file
positioning, :attr:`os.SEEK_CUR` (``1``) to seek relative
to the current position, :attr:`os.SEEK_END` (``2``) to
seek relative to the file's end.
"""
self.__assert_open("r")
if whence == _SEEK_SET: