toward a gridfs module
This commit is contained in:
parent
fe9e805c79
commit
cd86a107ea
19
gridfs/__init__.py
Normal file
19
gridfs/__init__.py
Normal file
@ -0,0 +1,19 @@
|
||||
# Copyright 2009 10gen, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""GridFS is a specification for storing large objects in Mongo.
|
||||
|
||||
The `gridfs` package is an implementation of GridFS on top of `pymongo`,
|
||||
exposing a file-like interface.
|
||||
"""
|
||||
19
gridfs/errors.py
Normal file
19
gridfs/errors.py
Normal file
@ -0,0 +1,19 @@
|
||||
# Copyright 2009 10gen, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Exceptions raised by the `gridfs` package"""
|
||||
|
||||
class CorruptGridFile(Exception):
|
||||
"""Raised when a GridFS "file" is malformed.
|
||||
"""
|
||||
275
gridfs/grid_file.py
Normal file
275
gridfs/grid_file.py
Normal file
@ -0,0 +1,275 @@
|
||||
# Copyright 2009 10gen, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""File-like object used for reading from and writing to GridFS"""
|
||||
|
||||
import types
|
||||
import datetime
|
||||
|
||||
from pymongo.son import SON
|
||||
from pymongo.database import Database
|
||||
from pymongo.objectid import ObjectId
|
||||
from pymongo.dbref import DBRef
|
||||
from errors import CorruptGridFile
|
||||
|
||||
class GridFile(object):
|
||||
"""A "file" stored in GridFS.
|
||||
"""
|
||||
# TODO should be able to create a GridFile given a Collection object instead
|
||||
# of a database and collection name?
|
||||
def __init__(self, file_spec, database, mode="r", collection="_files"):
|
||||
"""Open a "file" in GridFS.
|
||||
|
||||
Application developers should not need to instantiate this class
|
||||
directly.
|
||||
|
||||
Raises TypeError if file_spec is not an instance of (dict,
|
||||
`pymongo.son.SON`), database is not an instance of
|
||||
`pymongo.database.Database`, or collection is not an instance of (str,
|
||||
unicode).
|
||||
|
||||
The file_spec argument must be a SON query specifier for the file to
|
||||
open. The first file matching the specifier will be opened. If no such
|
||||
files exist, a new file is created using the metadata in file_spec.
|
||||
The valid fields in a file_spec are as follows:
|
||||
|
||||
- "_id": unique ID for this file
|
||||
* default: `pymongo.objectid.ObjectId()`
|
||||
- "filename": human name for the file
|
||||
- "contentType": valid mime-type for the file
|
||||
- "length": size of the file, in bytes
|
||||
* only used for querying, automatically set for inserts
|
||||
- "chunkSize": size of each of the chunks, in bytes
|
||||
* default: 256 kb
|
||||
- "uploadDate": date when the object was first stored
|
||||
* only used for querying, automatically set for inserts
|
||||
- "aliases": array of alias strings
|
||||
|
||||
Arguments:
|
||||
- `file_spec`: query specifier as described above
|
||||
- `database`: the database to store/retrieve this file in
|
||||
- `mode` (optional): the mode to open this file with, one of ("r", "w")
|
||||
- `collection` (optional): the collection to store/retrieve this file in
|
||||
"""
|
||||
if not isinstance(file_spec, (types.DictType, SON)):
|
||||
raise TypeError("file_spec must be an instance of (dict, SON)")
|
||||
if not isinstance(database, Database):
|
||||
raise TypeError("database must be an instance of database")
|
||||
if not isinstance(collection, types.StringTypes):
|
||||
raise TypeError("collection must be an instance of (str, unicode)")
|
||||
if mode not in ("r", "w"):
|
||||
raise ValueError("mode must be one of ('r', 'w')")
|
||||
|
||||
self.__collection = database[collection]
|
||||
|
||||
file = self.__collection.find_one(file_spec)
|
||||
if file:
|
||||
self.__id = file["_id"]
|
||||
else:
|
||||
file_spec["length"] = 0
|
||||
file_spec["uploadDate"] = datetime.datetime.now()
|
||||
file_spec.setdefault("chunkSize", 256000)
|
||||
self.__id = self.__collection.insert(file_spec)["_id"]
|
||||
|
||||
self.__mode = mode
|
||||
if mode == "w":
|
||||
self.__erase()
|
||||
self.__current_chunk = None
|
||||
self.__closed = False
|
||||
|
||||
def __del__(self):
|
||||
self.close()
|
||||
|
||||
def __erase(self):
|
||||
"""Erase all of the data stored in this GridFile.
|
||||
"""
|
||||
file = self.__collection.find_one(self.__id)
|
||||
|
||||
next = file.get("next", None)
|
||||
chunk_number = 0
|
||||
|
||||
while next:
|
||||
chunk = self.__collection.database().dereference(next)
|
||||
if not chunk:
|
||||
raise CorruptGridFile("could not dereference: %r" % next)
|
||||
if chunk["cn"] != chunk_number:
|
||||
raise CorruptGridFile("incorrect chunk number: %r, should be: %r" %
|
||||
(chunk["cn"], chunk_number))
|
||||
|
||||
self.__collection.database()[next.collection()].remove({"_id": next.id()})
|
||||
|
||||
chunk_number += 1
|
||||
next = chunk["next"]
|
||||
|
||||
file["next"] = None
|
||||
file["length"] = 0
|
||||
self.__collection.save(file)
|
||||
|
||||
@property
|
||||
def closed(self):
|
||||
return self.__closed
|
||||
|
||||
@property
|
||||
def mode(self):
|
||||
return self.__mode
|
||||
|
||||
def __create_property(field_name, read_only=False):
|
||||
def get(self):
|
||||
return self.__collection.find_one(self.__id).get(field_name, None)
|
||||
def set(self, value):
|
||||
file = self.__collection.find_one(self.__id)
|
||||
file[field_name] = value
|
||||
self.__collection.save(file)
|
||||
if not read_only:
|
||||
return property(get, set)
|
||||
return property(get)
|
||||
|
||||
name = __create_property("filename")
|
||||
content_type = __create_property("contentType")
|
||||
length = __create_property("length", True)
|
||||
chunk_size = __create_property("filename", True)
|
||||
upload_date = __create_property("uploadDate", True)
|
||||
aliases = __create_property("aliases")
|
||||
next = __create_property("next", True)
|
||||
|
||||
__chunks_collection = "_chunks"
|
||||
def __write_current_chunk(self):
|
||||
# TODO _chunks collection should be configurable?
|
||||
self.__collection.database()[self.__chunks_collection].save(self.__current_chunk)
|
||||
|
||||
def flush(self):
|
||||
"""Flush the GridFile to the database.
|
||||
"""
|
||||
self.__assert_open()
|
||||
if self.mode != "w":
|
||||
return
|
||||
|
||||
file = self.__collection.find_one(self.__id)
|
||||
|
||||
length = file["chunkSize"] * self.__current_chunk["cn"] + len(self.__current_chunk["data"])
|
||||
file["length"] = length
|
||||
|
||||
self.__write_current_chunk()
|
||||
self.__collection.save(file)
|
||||
|
||||
def close(self):
|
||||
"""Close the GridFile.
|
||||
|
||||
A closed GridFile cannot be read or written any more. Calling `close()`
|
||||
more than once is allowed.
|
||||
"""
|
||||
if not self.__closed:
|
||||
self.flush()
|
||||
self.__closed = True
|
||||
|
||||
def __assert_open(self, mode=None):
|
||||
if mode and self.mode != mode:
|
||||
raise ValueError("file must be open in mode %r" % mode)
|
||||
if self.closed:
|
||||
raise ValueError("operation cannot be performed on a closed GridFile")
|
||||
|
||||
def read(self, size=-1):
|
||||
"""Read at most size bytes from the file (less if there isn't enough
|
||||
data).
|
||||
|
||||
The bytes are returned as a string object. If size is negative or omitted
|
||||
all data is read. Raises ValueError if this GridFile is already closed.
|
||||
|
||||
Arguments:
|
||||
- `size` (optional): the number of bytes to read
|
||||
"""
|
||||
self.__assert_open("r")
|
||||
|
||||
if size < 0 or size > self.length:
|
||||
size = self.length
|
||||
|
||||
bytes = ""
|
||||
next = self.next
|
||||
chunk_number = 0
|
||||
while len(bytes) < size:
|
||||
if not next:
|
||||
raise CorruptGridFile("incorrect length for file: %r" % self)
|
||||
chunk = self.__collection.database().dereference(next)
|
||||
if not chunk:
|
||||
raise CorruptGridFile("could not dereference: %r" % next)
|
||||
if chunk["cn"] != chunk_number:
|
||||
raise CorruptGridFile("incorrect chunk number: %r, should be: %r" %
|
||||
(chunk["cn"], chunk_number))
|
||||
|
||||
bytes += chunk["data"]
|
||||
|
||||
chunk_number += 1
|
||||
next = chunk["next"]
|
||||
|
||||
bytes = bytes[:size]
|
||||
return bytes
|
||||
|
||||
# TODO should support writing unicode to a file. this means that files will
|
||||
# need to have an encoding attribute.
|
||||
def write(self, str):
|
||||
"""Write a string to the GridFile. There is no return value.
|
||||
|
||||
Due to buffering, the string may not actually show up in the database
|
||||
until the `flush()` or `close()` method is called. Raises ValueError if
|
||||
this GridFile is already closed. Raises TypeErrer if str is not an
|
||||
instance of str.
|
||||
|
||||
Arguments:
|
||||
- `str`: string of bytes to be written to the file
|
||||
"""
|
||||
self.__assert_open("w")
|
||||
|
||||
if not isinstance(str, types.StringType):
|
||||
raise TypeError("can only write strings")
|
||||
|
||||
if not len(str):
|
||||
return
|
||||
|
||||
def initialize_chunk(number):
|
||||
id = ObjectId()
|
||||
new_chunk = SON([("_id", id),
|
||||
("cn", number),
|
||||
("data", ""),
|
||||
("next", None)])
|
||||
return new_chunk
|
||||
|
||||
if not self.__current_chunk:
|
||||
self.__current_chunk = initialize_chunk(0)
|
||||
ref = DBRef(self.__chunks_collection, self.__current_chunk["_id"])
|
||||
file = self.__collection.find_one(self.__id)
|
||||
file["next"] = ref
|
||||
self.__collection.save(file)
|
||||
|
||||
data = self.__current_chunk["data"]
|
||||
data += str
|
||||
|
||||
while len(data) > self.chunk_size:
|
||||
self.__current_chunk["data"] = data[:self.chunk_size]
|
||||
data = data[self.chunk_size:]
|
||||
|
||||
new_chunk = initialize_chunk(self.__current_chunk["cn"] + 1)
|
||||
|
||||
self.__current_chunk["next"] = DBRef(self.__chunks_collection, new_chunk["_id"])
|
||||
self.__write_current_chunk()
|
||||
self.__current_chunk = new_chunk
|
||||
|
||||
self.__current_chunk["data"] = data
|
||||
|
||||
def writelines(self, sequence):
|
||||
"""Write a sequence of strings to the file.
|
||||
|
||||
Does not add seperators.
|
||||
"""
|
||||
for line in sequence:
|
||||
self.write(line)
|
||||
40
test/test_grid_file.py
Normal file
40
test/test_grid_file.py
Normal file
@ -0,0 +1,40 @@
|
||||
# Copyright 2009 10gen, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Tests for the grid_file module.
|
||||
"""
|
||||
|
||||
import unittest
|
||||
|
||||
import qcheck
|
||||
from test_connection import get_connection
|
||||
from gridfs.grid_file import GridFile
|
||||
|
||||
class TestGridFile(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.db = get_connection().test
|
||||
|
||||
def test_write_then_read(self):
|
||||
self.db._files.remove({})
|
||||
|
||||
file = GridFile({"filename": "test"}, self.db, "w")
|
||||
file.write("hello world")
|
||||
file.close()
|
||||
|
||||
file = GridFile({"filename": "test"}, self.db)
|
||||
self.assertEqual(file.read(), "hello world")
|
||||
file.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Loading…
Reference in New Issue
Block a user