PYTHON-960 - GridFS spec compliance

This commit is contained in:
aherlihy 2015-08-06 12:41:40 -04:00
parent 8b986a4f2c
commit b9baa8ae8a
10 changed files with 2721 additions and 107 deletions

View File

@ -25,10 +25,11 @@ from collections import Mapping
from gridfs.errors import NoFile
from gridfs.grid_file import (GridIn,
GridOut,
GridOutCursor)
GridOutCursor,
DEFAULT_CHUNK_SIZE)
from pymongo import (ASCENDING,
DESCENDING)
from pymongo.common import UNAUTHORIZED_CODES
from pymongo.common import UNAUTHORIZED_CODES, validate_string
from pymongo.database import Database
from pymongo.errors import ConfigurationError, OperationFailure
@ -46,6 +47,9 @@ class GridFS(object):
- `database`: database to use
- `collection` (optional): root collection to use
.. versionchanged:: 3.1
Indexes are only ensured on the first write to the DB.
.. versionchanged:: 3.0
`database` must use an acknowledged
:attr:`~pymongo.database.Database.write_concern`
@ -64,29 +68,6 @@ class GridFS(object):
self.__files = self.__collection.files
self.__chunks = self.__collection.chunks
def __is_secondary(self):
return not self.__database.client._is_writable()
def __create_index(self, coll, key, **kwargs):
try:
if not self.__is_secondary():
coll.create_index(key, **kwargs)
except OperationFailure as exc:
if not (exc.code in UNAUTHORIZED_CODES
or "authorized" in str(exc)):
raise exc
def __ensure_index_files_id(self):
self.__create_index(self.__chunks,
[("files_id", ASCENDING), ("n", ASCENDING)],
unique=True)
def __ensure_index_filename(self):
self.__create_index(self.__files,
[("filename", ASCENDING),
("uploadDate", DESCENDING)])
def new_file(self, **kwargs):
"""Create a new file in GridFS.
@ -180,17 +161,15 @@ class GridFS(object):
Raises :class:`~gridfs.errors.NoFile` if no such version of
that file exists.
An index on ``{filename: 1, uploadDate: -1}`` will
automatically be created when this method is called the first
time.
:Parameters:
- `filename`: ``"filename"`` of the file to get, or `None`
- `version` (optional): version of the file to get (defaults
to -1, the most recent version uploaded)
- `**kwargs` (optional): find files by custom metadata.
.. versionchanged:: 3.1
``get_version`` no longer ensures indexes.
"""
self.__ensure_index_filename()
query = kwargs
if filename is not None:
query["filename"] = filename
@ -237,8 +216,10 @@ class GridFS(object):
:Parameters:
- `file_id`: ``"_id"`` of the file to delete
.. versionchanged:: 3.1
``delete`` no longer ensures indexes.
"""
self.__ensure_index_files_id()
self.__files.delete_one({"_id": file_id})
self.__chunks.delete_many({"files_id": file_id})
@ -246,15 +227,9 @@ class GridFS(object):
"""List the names of all files stored in this instance of
:class:`GridFS`.
An index on ``{filename: 1, uploadDate: -1}`` will
automatically be created when this method is called the first
time.
.. versionchanged:: 2.7
``list`` ensures an index, the same as ``get_version``.
.. versionchanged:: 3.1
``list`` no longer ensures indexes.
"""
self.__ensure_index_filename()
# With an index, distinct includes documents with no filename
# as None.
return [
@ -381,3 +356,364 @@ class GridFS(object):
if kwargs:
return self.__files.find_one(kwargs, ["_id"]) is not None
return self.__files.find_one(document_or_id, ["_id"]) is not None
class GridFSBucket(object):
"""An instance of GridFS on top of a single Database."""
def __init__(self, db, bucket_name="fs",
chunk_size_bytes=DEFAULT_CHUNK_SIZE, write_concern=None,
read_preference=None):
"""Create a new instance of :class:`GridFSBucket`.
Raises :exc:`TypeError` if `database` is not an instance of
:class:`~pymongo.database.Database`.
Raises :exc:`~pymongo.errors.ConfigurationError` if `write_concern`
is not acknowledged.
:Parameters:
- `database`: database to use.
- `bucket_name` (optional): The name of the bucket. Defaults to 'fs'.
- `chunk_size_bytes` (optional): The chunk size in bytes. Defaults
to 255KB.
- `write_concern` (optional): The
:class:`~pymongo.write_concern.WriteConcern` to use. If ``None``
(the default) db.write_concern is used.
- `read_preference` (optional): The read preference to use. If
``None`` (the default) db.read_preference is used.
.. versionadded:: 3.1
.. mongodoc:: gridfs
"""
if not isinstance(db, Database):
raise TypeError("database must be an instance of Database")
wtc = write_concern if write_concern is not None else db.write_concern
if not wtc.acknowledged:
raise ConfigurationError('write concern must be acknowledged')
self._db = db
self._bucket_name = bucket_name
self._collection = db[bucket_name]
self._chunks = self._collection.chunks.with_options(
write_concern=write_concern,
read_preference=read_preference)
self._files = self._collection.files.with_options(
write_concern=write_concern,
read_preference=read_preference)
self._chunk_size_bytes = chunk_size_bytes
def open_upload_stream(self, filename, chunk_size_bytes=None,
metadata=None):
"""Opens a Stream that the application can write the contents of the
file to.
The user must specify the filename, and can choose to add any
additional information in the metadata field of the file document or
modify the chunk size.
For example::
my_db = MongoClient().test
fs = GridFSBucket(my_db)
grid_in, file_id = fs.open_upload_stream(
"test_file", chunk_size_bytes=4,
metadata={"contentType": "text/plain"})
grid_in.write("data I want to store!")
grid_in.close() # uploaded on close
Returns an instance of :class:`~gridfs.grid_file.GridIn` and the _id
of the file to upload.
Raises :exc:`~gridfs.errors.NoFile` if no such version of
that file exists.
Raises :exc:`~ValueError` if `filename` is not a string.
:Parameters:
- `filename`: The name of the file to upload.
- `chunk_size_bytes` (options): The number of bytes per chunk of this
file. Defaults to the chunk_size_bytes in :class:`GridFSBucket`.
- `metadata` (optional): User data for the 'metadata' field of the
files collection document. If not provided the metadata field will
be omitted from the files collection document.
"""
validate_string("filename", filename)
opts = {"filename": filename,
"chunk_size": (chunk_size_bytes if chunk_size_bytes
is not None else self._chunk_size_bytes)}
if metadata is not None:
opts["metadata"] = metadata
gin = GridIn(self._collection, **opts)
return gin, gin._id
def upload_from_stream(self, filename, source, chunk_size_bytes=None,
metadata=None):
"""Uploads a user file to a GridFS bucket.
Reads the contents of the user file from `source` and uploads
it to the file `filename`. Source can be a string or file-like object.
For example::
my_db = MongoClient().test
fs = GridFSBucket(my_db)
file_id = fs.upload_from_stream(
"test_file",
"data I want to store!",
chunk_size_bytes=4,
metadata={"contentType": "text/plain"})
Returns the _id of the uploaded file.
Raises :exc:`~gridfs.errors.NoFile` if no such version of
that file exists.
Raises :exc:`~ValueError` if `filename` is not a string.
:Parameters:
- `filename`: The name of the file to upload.
- `source`: The source stream of the content to be uploaded. Must be
a file-like object that implements :meth:`read` or a string.
- `chunk_size_bytes` (options): The number of bytes per chunk of this
file. Defaults to the chunk_size_bytes of :class:`GridFSBucket`.
- `metadata` (optional): User data for the 'metadata' field of the
files collection document. If not provided the metadata field will
be omitted from the files collection document.
"""
gin, _id = self.open_upload_stream(filename, chunk_size_bytes,
metadata)
try:
gin.write(source)
finally:
gin.close()
return _id
def open_download_stream(self, file_id):
"""Opens a Stream from which the application can read the contents of
the stored file specified by file_id.
For example::
my_db = MongoClient().test
fs = GridFSBucket(my_db)
# get _id of file to read.
file_id = fs.upload_from_stream("test_file", "data I want to store!")
grid_out = fs.open_download_stream(file_id)
contents = grid_out.read()
Returns an instance of :class:`~gridfs.grid_file.GridOut`.
Raises :exc:`~gridfs.errors.NoFile` if no file with file_id exists.
:Parameters:
-`file_id`: The _id of the file to be downloaded.
"""
gout = GridOut(self._collection, file_id)
# Raise NoFile now, instead of on first attribute access.
gout._ensure_file()
return gout
def download_to_stream(self, file_id, destination):
"""Downloads the contents of the stored file specified by file_id and
writes the contents to `destination`.
For example::
my_db = MongoClient().test
fs = GridFSBucket(my_db)
# Get _id of file to read
file_id = fs.upload_from_stream("test_file", "data I want to store!")
# Get file to write to
file = open('myfile','rw')
fs.download_to_stream(file_id, file)
contents = file.read()
Raises :exc:`~gridfs.errors.NoFile` if no file with file_id exists.
:Parameters:
-`file_id`: The _id of the file to be downloaded.
-`destination`: a file-like object implementing :meth:`write`.
"""
gout = self.open_download_stream(file_id)
destination.write(gout)
def delete(self, file_id):
"""Given an file_id, delete this stored file's files collection document
and associated chunks from a GridFS bucket.
For example::
my_db = MongoClient().test
fs = GridFSBucket(my_db)
# Get _id of file to delete
file_id = fs.upload_from_stream("test_file", "data I want to store!")
fs.delete(file_id)
Raises :exc:`~gridfs.errors.NoFile` if no file with file_id exists.
:Parameters:
-`file_id`: The _id of the file to be deleted.
"""
res = self._files.delete_one({"_id": file_id})
self._chunks.delete_many({"files_id": file_id})
if not res.deleted_count:
raise NoFile(
"no file could be deleted because none matched %s" % file_id)
def find(self, *args, **kwargs):
"""Find and return the files collection documents that match ``filter``
Returns a cursor that iterates across files matching
arbitrary queries on the files collection. Can be combined
with other modifiers for additional control.
For example::
for grid_data in fs.find({"filename": "lisa.txt"},
no_cursor_timeout=True):
data = grid_data.read()
would iterate through all versions of "lisa.txt" stored in GridFS.
Note that setting no_cursor_timeout to True may be important to
prevent the cursor from timing out during long multi-file processing
work.
As another example, the call::
most_recent_three = fs.find().sort("uploadDate", -1).limit(3)
would return a cursor to the three most recently uploaded files
in GridFS.
Follows a similar interface to
:meth:`~pymongo.collection.Collection.find`
in :class:`~pymongo.collection.Collection`.
:Parameters:
- `filter`: Search query.
- `batch_size` (optional): The number of documents to return per
batch.
- `limit` (optional): The maximum number of documents to return.
- `no_cursor_timeout` (optional): The server normally times out idle
cursors after an inactivity period (10 minutes) to prevent excess
memory use. Set this option to True prevent that.
- `skip` (optional): The number of documents to skip before
returning.
- `sort` (optional): The order by which to sort results. Defaults to
None.
"""
return GridOutCursor(self._collection, *args, **kwargs)
def open_download_stream_by_name(self, filename, revision=-1):
"""Opens a Stream from which the application can read the contents of
`filename` and optional `revision`.
For example::
my_db = MongoClient().test
fs = GridFSBucket(my_db)
grid_out = fs.open_download_stream_by_name("test_file")
contents = grid_out.read()
Returns an instance of :class:`~gridfs.grid_file.GridOut`.
Raises :exc:`~gridfs.errors.NoFile` if no such version of
that file exists.
Raises :exc:`~ValueError` filename is not a string.
:Parameters:
- `filename`: The name of the file to read from.
- `revision` (optional): Which revision (documents with the same
filename and different uploadDate) of the file to retrieve.
Defaults to -1 (the most recent revision).
:Note: Revision numbers are defined as follows:
0 = the original stored file
1 = the first revision
2 = the second revision
etc...
-2 = the second most recent revision
-1 = the most recent revision
"""
validate_string("filename", filename)
query = {"filename": filename}
cursor = self._files.find(query)
if revision < 0:
skip = abs(revision) - 1
cursor.limit(-1).skip(skip).sort("uploadDate", DESCENDING)
else:
cursor.limit(-1).skip(revision).sort("uploadDate", ASCENDING)
try:
grid_file = next(cursor)
return GridOut(self._collection, file_document=grid_file)
except StopIteration:
raise NoFile(
"no version %d for filename %r" % (revision, filename))
def download_to_stream_by_name(self, filename, destination, revision=-1):
"""Write the contents of `filename` (with optional `revision`) to
`destination`.
For example::
my_db = MongoClient().test
fs = GridFSBucket(my_db)
# Get file to write to
file = open('myfile','w')
fs.download_to_stream_by_name("test_file", file)
Raises :exc:`~gridfs.errors.NoFile` if no such version of
that file exists.
Raises :exc:`~ValueError` if `filename` is not a string.
:Parameters:
- `filename`: The name of the file to read from.
- `destination`: A file-like object that implements :meth:`write`.
- `revision` (optional): Which revision (documents with the same
filename and different uploadDate) of the file to retrieve.
Defaults to -1 (the most recent revision).
:Note: Revision numbers are defined as follows:
0 = the original stored file
1 = the first revision
2 = the second revision
etc...
-2 = the second most recent revision
-1 = the most recent revision
"""
gout = self.open_download_stream_by_name(filename, revision)
destination.write(gout)
def rename(self, file_id, new_filename):
"""Renames the stored file with the specified file_id.
For example::
my_db = MongoClient().test
fs = GridFSBucket(my_db)
# Get _id of file to rename
file_id = fs.upload_from_stream("test_file", "data I want to store!")
fs.rename(file_id, "new_test_name")
Raises :exc:`~gridfs.errors.NoFile` if no file with file_id exists.
:Parameters:
- `file_id`: The _id of the file to be renamed.
- `new_filename`: The new name of the file.
"""
result = self._files.update_one({"_id": file_id},
{"$set": {"filename": new_filename}})
if not result.matched_count:
raise NoFile("no files could be renamed %r because none "
"matched file_id %i" % (new_filename, file_id))

View File

@ -13,25 +13,22 @@
# limitations under the License.
"""Tools for representing files stored in GridFS."""
import datetime
import math
import os
from hashlib import md5
from bson.binary import Binary
from bson.objectid import ObjectId
from bson.py3compat import text_type, StringIO
from gridfs.errors import (CorruptGridFile,
FileExists,
NoFile)
from gridfs.errors import CorruptGridFile, FileExists, NoFile
from pymongo import ASCENDING
from pymongo.collection import Collection
from pymongo.common import UNAUTHORIZED_CODES
from pymongo.cursor import Cursor
from pymongo.errors import (ConfigurationError,
DuplicateKeyError,
OperationFailure)
from pymongo.read_preferences import ReadPreference
try:
_SEEK_SET = os.SEEK_SET
@ -50,6 +47,9 @@ NEWLN = b"\n"
# Slightly under a power of 2, to work well with server's record allocations.
DEFAULT_CHUNK_SIZE = 255 * 1024
_C_INDEX = [("files_id", ASCENDING), ("n", ASCENDING)]
_F_INDEX = [("filename", ASCENDING), ("uploadDate", ASCENDING)]
def _grid_in_property(field_name, docstring, read_only=False,
closed_only=False):
@ -155,6 +155,7 @@ class GridIn(object):
if "chunk_size" in kwargs:
kwargs["chunkSize"] = kwargs.pop("chunk_size")
kwargs['md5'] = md5()
# Defaults
kwargs["_id"] = kwargs.get("_id", ObjectId())
kwargs["chunkSize"] = kwargs.get("chunkSize", DEFAULT_CHUNK_SIZE)
@ -167,18 +168,30 @@ class GridIn(object):
object.__setattr__(self, "_closed", False)
object.__setattr__(self, "_ensured_index", False)
def _ensure_index(self):
if not object.__getattribute__(self, "_ensured_index"):
def __create_index(self, collection, index, unique):
doc = collection.find_one(projection={"_id": 1})
if doc is None:
try:
self._coll.chunks.create_index(
[("files_id", ASCENDING), ("n", ASCENDING)],
unique=True)
except OperationFailure as exc:
if not (exc.code in UNAUTHORIZED_CODES
or "authorized" in str(exc)):
raise exc
indexes = list(collection.list_indexes())
except OperationFailure:
indexes = []
if index not in indexes:
collection.create_index(index, unique=unique)
def __ensure_indexes(self):
if not object.__getattribute__(self, "_ensured_index"):
self.__create_index(self._coll.files, _F_INDEX, False)
self.__create_index(self._coll.chunks, _C_INDEX, True)
object.__setattr__(self, "_ensured_index", True)
def abort(self):
"""Remove all chunks/files that may have been uploaded and close.
"""
self._coll.chunks.delete_many({"files_id": self._file['_id']})
self._coll.files.delete_one({"_id": self._file['_id']})
object.__setattr__(self, "_closed", True)
@property
def closed(self):
"""Is this file closed?
@ -225,7 +238,8 @@ class GridIn(object):
"""
# Ensure the index, even if there's nothing to write, so
# the filemd5 command always succeeds.
self._ensure_index()
self.__ensure_indexes()
self._file['md5'].update(data)
if not data:
return
@ -255,12 +269,7 @@ class GridIn(object):
try:
self.__flush_buffer()
db = self._coll.database
md5 = db.command(
"filemd5", self._id, root=self._coll.name,
read_preference=ReadPreference.PRIMARY)["md5"]
self._file["md5"] = md5
self._file['md5'] = self._file["md5"].hexdigest()
self._file["length"] = self._position
self._file["uploadDate"] = datetime.datetime.utcnow()
@ -326,10 +335,14 @@ class GridIn(object):
# Make sure to flush only when _buffer is complete
space = self.chunk_size - self._buffer.tell()
if space:
to_write = read(space)
try:
to_write = read(space)
except:
self.abort()
raise
self._buffer.write(to_write)
if len(to_write) < space:
return # EOF or incomplete
return # EOF or incomplete
self.__flush_buffer()
to_write = read(self.chunk_size)
while to_write and len(to_write) == self.chunk_size:
@ -475,6 +488,16 @@ class GridOut(object):
received += len(chunk_data)
data.write(chunk_data)
# Detect extra chunks.
max_chunk_n = math.ceil(self.length / float(self.chunk_size))
chunk = self.__chunks.find_one({"files_id": self._id,
"n": {"$gte": max_chunk_n}})
# According to spec, ignore extra chunks if they are empty.
if chunk is not None and len(chunk['data']):
raise CorruptGridFile(
"Extra chunk found: expected %i chunks but found "
"chunk with n=%i" % (max_chunk_n, chunk['n']))
self.__position -= received - size
# Return 'size' bytes and store the rest.
@ -605,7 +628,7 @@ class GridOutCursor(Cursor):
of an arbitrary query against the GridFS files collection.
"""
def __init__(self, collection, filter=None, skip=0, limit=0,
no_cursor_timeout=False, sort=None):
no_cursor_timeout=False, sort=None, batch_size=0):
"""Create a new cursor, similar to the normal
:class:`~pymongo.cursor.Cursor`.
@ -621,7 +644,8 @@ class GridOutCursor(Cursor):
super(GridOutCursor, self).__init__(
collection.files, filter, skip=skip, limit=limit,
no_cursor_timeout=no_cursor_timeout, sort=sort)
no_cursor_timeout=no_cursor_timeout, sort=sort,
batch_size=batch_size)
def next(self):
"""Get next GridOut object from cursor.

412
test/gridfs/delete.json Normal file
View File

@ -0,0 +1,412 @@
{
"data": {
"files": [
{
"_id": {
"$oid": "000000000000000000000001"
},
"length": 0,
"chunkSize": 4,
"uploadDate": {
"$date": "1970-01-01T00:00:00.000Z"
},
"md5": "d41d8cd98f00b204e9800998ecf8427e",
"filename": "length-0",
"contentType": "application/octet-stream",
"aliases": [
],
"metadata": {
}
},
{
"_id": {
"$oid": "000000000000000000000002"
},
"length": 0,
"chunkSize": 4,
"uploadDate": {
"$date": "1970-01-01T00:00:00.000Z"
},
"md5": "d41d8cd98f00b204e9800998ecf8427e",
"filename": "length-0-with-empty-chunk",
"contentType": "application/octet-stream",
"aliases": [
],
"metadata": {
}
},
{
"_id": {
"$oid": "000000000000000000000003"
},
"length": 2,
"chunkSize": 4,
"uploadDate": {
"$date": "1970-01-01T00:00:00.000Z"
},
"md5": "c700ed4fdb1d27055aa3faa2c2432283",
"filename": "length-2",
"contentType": "application/octet-stream",
"aliases": [
],
"metadata": {
}
},
{
"_id": {
"$oid": "000000000000000000000004"
},
"length": 8,
"chunkSize": 4,
"uploadDate": {
"$date": "1970-01-01T00:00:00.000Z"
},
"md5": "dd254cdc958e53abaa67da9f797125f5",
"filename": "length-8",
"contentType": "application/octet-stream",
"aliases": [
],
"metadata": {
}
},
{
"_id": {
"$oid": "000000000000000000000005"
},
"length": 8,
"chunkSize": 4,
"uploadDate": {
"$date": "1970-01-01T00:00:00.000Z"
},
"md5": "dd254cdc958e53abaa67da9f797125f5",
"filename": "length-8-with-empty-chunk",
"contentType": "application/octet-stream",
"aliases": [
],
"metadata": {
}
}
],
"chunks": [
{
"_id": {
"$oid": "000000000000000000000001"
},
"files_id": {
"$oid": "000000000000000000000002"
},
"n": 0,
"data": {
"$hex": ""
}
},
{
"_id": {
"$oid": "000000000000000000000002"
},
"files_id": {
"$oid": "000000000000000000000003"
},
"n": 0,
"data": {
"$hex": "1122"
}
},
{
"_id": {
"$oid": "000000000000000000000003"
},
"files_id": {
"$oid": "000000000000000000000004"
},
"n": 0,
"data": {
"$hex": "11223344"
}
},
{
"_id": {
"$oid": "000000000000000000000004"
},
"files_id": {
"$oid": "000000000000000000000004"
},
"n": 1,
"data": {
"$hex": "55667788"
}
},
{
"_id": {
"$oid": "000000000000000000000005"
},
"files_id": {
"$oid": "000000000000000000000005"
},
"n": 0,
"data": {
"$hex": "11223344"
}
},
{
"_id": {
"$oid": "000000000000000000000006"
},
"files_id": {
"$oid": "000000000000000000000005"
},
"n": 1,
"data": {
"$hex": "55667788"
}
},
{
"_id": {
"$oid": "000000000000000000000007"
},
"files_id": {
"$oid": "000000000000000000000005"
},
"n": 2,
"data": {
"$hex": ""
}
}
]
},
"tests": [
{
"description": "Delete when length is 0",
"act": {
"operation": "delete",
"arguments": {
"id": {
"$oid": "000000000000000000000001"
}
}
},
"assert": {
"result": "void",
"data": [
{
"delete": "expected.files",
"deletes": [
{
"q": {
"_id": {
"$oid": "000000000000000000000001"
}
},
"limit": 1
}
]
}
]
}
},
{
"description": "Delete when length is 0 and there is one extra empty chunk",
"act": {
"operation": "delete",
"arguments": {
"id": {
"$oid": "000000000000000000000002"
}
}
},
"assert": {
"result": "void",
"data": [
{
"delete": "expected.files",
"deletes": [
{
"q": {
"_id": {
"$oid": "000000000000000000000002"
}
},
"limit": 1
}
]
},
{
"delete": "expected.chunks",
"deletes": [
{
"q": {
"files_id": {
"$oid": "000000000000000000000002"
}
},
"limit": 0
}
]
}
]
}
},
{
"description": "Delete when length is 8",
"act": {
"operation": "delete",
"arguments": {
"id": {
"$oid": "000000000000000000000004"
}
}
},
"assert": {
"result": "void",
"data": [
{
"delete": "expected.files",
"deletes": [
{
"q": {
"_id": {
"$oid": "000000000000000000000004"
}
},
"limit": 1
}
]
},
{
"delete": "expected.chunks",
"deletes": [
{
"q": {
"files_id": {
"$oid": "000000000000000000000004"
}
},
"limit": 0
}
]
}
]
}
},
{
"description": "Delete when length is 8 and there is one extra empty chunk",
"act": {
"operation": "delete",
"arguments": {
"id": {
"$oid": "000000000000000000000005"
}
}
},
"assert": {
"result": "void",
"data": [
{
"delete": "expected.files",
"deletes": [
{
"q": {
"_id": {
"$oid": "000000000000000000000005"
}
},
"limit": 1
}
]
},
{
"delete": "expected.chunks",
"deletes": [
{
"q": {
"files_id": {
"$oid": "000000000000000000000005"
}
},
"limit": 0
}
]
}
]
}
},
{
"description": "Delete when files entry does not exist",
"act": {
"operation": "delete",
"arguments": {
"id": {
"$oid": "000000000000000000000000"
}
}
},
"assert": {
"error": "FileNotFound"
}
},
{
"description": "Delete when files entry does not exist and there are orphaned chunks",
"arrange": {
"data": [
{
"delete": "fs.files",
"deletes": [
{
"q": {
"_id": {
"$oid": "000000000000000000000005"
}
},
"limit": 1
}
]
}
]
},
"act": {
"operation": "delete",
"arguments": {
"id": {
"$oid": "000000000000000000000005"
}
}
},
"assert": {
"error": "FileNotFound",
"data": [
{
"delete": "expected.files",
"deletes": [
{
"q": {
"_id": {
"$oid": "000000000000000000000005"
}
},
"limit": 1
}
]
},
{
"delete": "expected.chunks",
"deletes": [
{
"q": {
"files_id": {
"$oid": "000000000000000000000005"
}
},
"limit": 0
}
]
}
]
}
}
]
}

564
test/gridfs/download.json Normal file
View File

@ -0,0 +1,564 @@
{
"data": {
"files": [
{
"_id": {
"$oid": "000000000000000000000001"
},
"length": 0,
"chunkSize": 4,
"uploadDate": {
"$date": "1970-01-01T00:00:00.000Z"
},
"md5": "d41d8cd98f00b204e9800998ecf8427e",
"filename": "length-0",
"contentType": "application/octet-stream",
"aliases": [
],
"metadata": {
}
},
{
"_id": {
"$oid": "000000000000000000000002"
},
"length": 0,
"chunkSize": 4,
"uploadDate": {
"$date": "1970-01-01T00:00:00.000Z"
},
"md5": "d41d8cd98f00b204e9800998ecf8427e",
"filename": "length-0-with-empty-chunk",
"contentType": "application/octet-stream",
"aliases": [
],
"metadata": {
}
},
{
"_id": {
"$oid": "000000000000000000000003"
},
"length": 2,
"chunkSize": 4,
"uploadDate": {
"$date": "1970-01-01T00:00:00.000Z"
},
"md5": "c700ed4fdb1d27055aa3faa2c2432283",
"filename": "length-2",
"contentType": "application/octet-stream",
"aliases": [
],
"metadata": {
}
},
{
"_id": {
"$oid": "000000000000000000000004"
},
"length": 8,
"chunkSize": 4,
"uploadDate": {
"$date": "1970-01-01T00:00:00.000Z"
},
"md5": "dd254cdc958e53abaa67da9f797125f5",
"filename": "length-8",
"contentType": "application/octet-stream",
"aliases": [
],
"metadata": {
}
},
{
"_id": {
"$oid": "000000000000000000000005"
},
"length": 10,
"chunkSize": 4,
"uploadDate": {
"$date": "1970-01-01T00:00:00.000Z"
},
"md5": "57d83cd477bfb1ccd975ab33d827a92b",
"filename": "length-10",
"contentType": "application/octet-stream",
"aliases": [
],
"metadata": {
}
},
{
"_id": {
"$oid": "000000000000000000000006"
},
"length": 12,
"chunkSize": 4,
"uploadDate": {
"$date": "1970-01-01T00:00:00.000Z"
},
"md5": "6289ac1db331d1c7677a4b7e123178f9",
"filename": "length-12-with-empty-chunk",
"contentType": "application/octet-stream",
"aliases": [
],
"metadata": {
}
}
],
"chunks": [
{
"_id": {
"$oid": "000000000000000000000001"
},
"files_id": {
"$oid": "000000000000000000000002"
},
"n": 0,
"data": {
"$hex": ""
}
},
{
"_id": {
"$oid": "000000000000000000000002"
},
"files_id": {
"$oid": "000000000000000000000003"
},
"n": 0,
"data": {
"$hex": "1122"
}
},
{
"_id": {
"$oid": "000000000000000000000003"
},
"files_id": {
"$oid": "000000000000000000000004"
},
"n": 0,
"data": {
"$hex": "11223344"
}
},
{
"_id": {
"$oid": "000000000000000000000004"
},
"files_id": {
"$oid": "000000000000000000000004"
},
"n": 1,
"data": {
"$hex": "55667788"
}
},
{
"_id": {
"$oid": "000000000000000000000005"
},
"files_id": {
"$oid": "000000000000000000000005"
},
"n": 0,
"data": {
"$hex": "11223344"
}
},
{
"_id": {
"$oid": "000000000000000000000006"
},
"files_id": {
"$oid": "000000000000000000000005"
},
"n": 1,
"data": {
"$hex": "55667788"
}
},
{
"_id": {
"$oid": "000000000000000000000007"
},
"files_id": {
"$oid": "000000000000000000000005"
},
"n": 2,
"data": {
"$hex": "99aa"
}
},
{
"_id": {
"$oid": "000000000000000000000008"
},
"files_id": {
"$oid": "000000000000000000000006"
},
"n": 0,
"data": {
"$hex": "11223344"
}
},
{
"_id": {
"$oid": "000000000000000000000009"
},
"files_id": {
"$oid": "000000000000000000000006"
},
"n": 1,
"data": {
"$hex": "55667788"
}
},
{
"_id": {
"$oid": "000000000000000000000010"
},
"files_id": {
"$oid": "000000000000000000000006"
},
"n": 2,
"data": {
"$hex": "99aabbcc"
}
},
{
"_id": {
"$oid": "000000000000000000000011"
},
"files_id": {
"$oid": "000000000000000000000006"
},
"n": 3,
"data": {
"$hex": ""
}
}
]
},
"tests": [
{
"description": "Download when length is zero",
"act": {
"operation": "download",
"arguments": {
"id": {
"$oid": "000000000000000000000001"
},
"options": {
}
}
},
"assert": {
"result": {
"$hex": ""
}
}
},
{
"description": "Download when length is zero and there is one empty chunk",
"act": {
"operation": "download",
"arguments": {
"id": {
"$oid": "000000000000000000000002"
},
"options": {
}
}
},
"assert": {
"result": {
"$hex": ""
}
}
},
{
"description": "Download when there is one chunk",
"act": {
"operation": "download",
"arguments": {
"id": {
"$oid": "000000000000000000000003"
},
"options": {
}
}
},
"assert": {
"result": {
"$hex": "1122"
}
}
},
{
"description": "Download when there are two chunks",
"act": {
"operation": "download",
"arguments": {
"id": {
"$oid": "000000000000000000000004"
},
"options": {
}
}
},
"assert": {
"result": {
"$hex": "1122334455667788"
}
}
},
{
"description": "Download when there are three chunks",
"act": {
"operation": "download",
"arguments": {
"id": {
"$oid": "000000000000000000000005"
},
"options": {
}
}
},
"assert": {
"result": {
"$hex": "112233445566778899aa"
}
}
},
{
"description": "Download when there are three chunks and one extra empty chunk at the end",
"act": {
"operation": "download",
"arguments": {
"id": {
"$oid": "000000000000000000000006"
},
"options": {
}
}
},
"assert": {
"result": {
"$hex": "112233445566778899aabbcc"
}
}
},
{
"description": "Download when files entry does not exist",
"act": {
"operation": "download",
"arguments": {
"id": {
"$oid": "000000000000000000000000"
},
"options": {
}
}
},
"assert": {
"error": "FileNotFound"
}
},
{
"description": "Download when an intermediate chunk is missing",
"arrange": {
"data": [
{
"delete": "fs.chunks",
"deletes": [
{
"q": {
"files_id": {
"$oid": "000000000000000000000005"
},
"n": 1
},
"limit": 1
}
]
}
]
},
"act": {
"operation": "download",
"arguments": {
"id": {
"$oid": "000000000000000000000005"
}
}
},
"assert": {
"error": "ChunkIsMissing"
}
},
{
"description": "Download when final chunk is missing",
"arrange": {
"data": [
{
"delete": "fs.chunks",
"deletes": [
{
"q": {
"files_id": {
"$oid": "000000000000000000000005"
},
"n": 1
},
"limit": 1
}
]
}
]
},
"act": {
"operation": "download",
"arguments": {
"id": {
"$oid": "000000000000000000000005"
}
}
},
"assert": {
"error": "ChunkIsMissing"
}
},
{
"description": "Download when there is an extra chunk",
"arrange": {
"data": [
{
"insert": "fs.chunks",
"documents": [
{
"_id": {
"$oid": "000000000000000000000012"
},
"files_id": {
"$oid": "000000000000000000000004"
},
"n": 2,
"data": {
"$hex": "99"
}
}
]
}
]
},
"act": {
"operation": "download",
"arguments": {
"id": {
"$oid": "000000000000000000000004"
}
}
},
"assert": {
"error": "ExtraChunk"
}
},
{
"description": "Download when an intermediate chunk is the wrong size",
"arrange": {
"data": [
{
"update": "fs.chunks",
"updates": [
{
"q": {
"files_id": {
"$oid": "000000000000000000000005"
},
"n": 1
},
"u": {
"$set": {
"data": {
"$hex": "556677"
}
}
}
},
{
"q": {
"files_id": {
"$oid": "000000000000000000000005"
},
"n": 2
},
"u": {
"$set": {
"data": {
"$hex": "8899aa"
}
}
}
}
]
}
]
},
"act": {
"operation": "download",
"arguments": {
"id": {
"$oid": "000000000000000000000005"
}
}
},
"assert": {
"error": "ChunkIsWrongSize"
}
},
{
"description": "Download when final chunk is the wrong size",
"arrange": {
"data": [
{
"update": "fs.chunks",
"updates": [
{
"q": {
"files_id": {
"$oid": "000000000000000000000005"
},
"n": 2
},
"u": {
"$set": {
"data": {
"$hex": "99"
}
}
}
}
]
}
]
},
"act": {
"operation": "download",
"arguments": {
"id": {
"$oid": "000000000000000000000005"
}
}
},
"assert": {
"error": "ChunkIsWrongSize"
}
}
]
}

View File

@ -0,0 +1,255 @@
{
"data": {
"files": [
{
"_id": {
"$oid": "000000000000000000000001"
},
"length": 1,
"chunkSize": 4,
"uploadDate": {
"$date": "1970-01-01T00:00:00.000Z"
},
"md5": "47ed733b8d10be225eceba344d533586",
"filename": "abc",
"contentType": "application/octet-stream",
"aliases": [
],
"metadata": {
}
},
{
"_id": {
"$oid": "000000000000000000000002"
},
"length": 1,
"chunkSize": 4,
"uploadDate": {
"$date": "1970-01-02T00:00:00.000Z"
},
"md5": "b15835f133ff2e27c7cb28117bfae8f4",
"filename": "abc",
"contentType": "application/octet-stream",
"aliases": [
],
"metadata": {
}
},
{
"_id": {
"$oid": "000000000000000000000003"
},
"length": 1,
"chunkSize": 4,
"uploadDate": {
"$date": "1970-01-03T00:00:00.000Z"
},
"md5": "eccbc87e4b5ce2fe28308fd9f2a7baf3",
"filename": "abc",
"contentType": "application/octet-stream",
"aliases": [
],
"metadata": {
}
},
{
"_id": {
"$oid": "000000000000000000000004"
},
"length": 1,
"chunkSize": 4,
"uploadDate": {
"$date": "1970-01-04T00:00:00.000Z"
},
"md5": "f623e75af30e62bbd73d6df5b50bb7b5",
"filename": "abc",
"contentType": "application/octet-stream",
"aliases": [
],
"metadata": {
}
},
{
"_id": {
"$oid": "000000000000000000000005"
},
"length": 1,
"chunkSize": 4,
"uploadDate": {
"$date": "1970-01-05T00:00:00.000Z"
},
"md5": "4c614360da93c0a041b22e537de151eb",
"filename": "abc",
"contentType": "application/octet-stream",
"aliases": [
],
"metadata": {
}
}
],
"chunks": [
{
"_id": {
"$oid": "000000000000000000000001"
},
"files_id": {
"$oid": "000000000000000000000001"
},
"n": 0,
"data": {
"$hex": "11"
}
},
{
"_id": {
"$oid": "000000000000000000000002"
},
"files_id": {
"$oid": "000000000000000000000002"
},
"n": 0,
"data": {
"$hex": "22"
}
},
{
"_id": {
"$oid": "000000000000000000000003"
},
"files_id": {
"$oid": "000000000000000000000003"
},
"n": 0,
"data": {
"$hex": "33"
}
},
{
"_id": {
"$oid": "000000000000000000000004"
},
"files_id": {
"$oid": "000000000000000000000004"
},
"n": 0,
"data": {
"$hex": "44"
}
},
{
"_id": {
"$oid": "000000000000000000000005"
},
"files_id": {
"$oid": "000000000000000000000005"
},
"n": 0,
"data": {
"$hex": "55"
}
}
]
},
"tests": [
{
"description": "Download_by_name when revision is 0",
"act": {
"operation": "download_by_name",
"arguments": {
"filename": "abc",
"options": {
"revision": 0
}
}
},
"assert": {
"result": {
"$hex": "11"
}
}
},
{
"description": "Download_by_name when revision is 1",
"act": {
"operation": "download_by_name",
"arguments": {
"filename": "abc",
"options": {
"revision": 1
}
}
},
"assert": {
"result": {
"$hex": "22"
}
}
},
{
"description": "Download_by_name when revision is -2",
"act": {
"operation": "download_by_name",
"arguments": {
"filename": "abc",
"options": {
"revision": -2
}
}
},
"assert": {
"result": {
"$hex": "44"
}
}
},
{
"description": "Download_by_name when revision is -1",
"act": {
"operation": "download_by_name",
"arguments": {
"filename": "abc",
"options": {
"revision": -1
}
}
},
"assert": {
"result": {
"$hex": "55"
}
}
},
{
"description": "Download_by_name when files entry does not exist",
"act": {
"operation": "download_by_name",
"arguments": {
"filename": "xyz"
}
},
"assert": {
"error": "FileNotFound"
}
},
{
"description": "Download_by_name when revision does not exist",
"act": {
"operation": "download_by_name",
"arguments": {
"filename": "abc",
"options": {
"revision": 999
}
}
},
"assert": {
"error": "RevisionNotFound"
}
}
]
}

391
test/gridfs/upload.json Normal file
View File

@ -0,0 +1,391 @@
{
"data": {
"files": [
],
"chunks": [
]
},
"tests": [
{
"description": "Upload when length is 0",
"act": {
"operation": "upload",
"arguments": {
"filename": "filename",
"source": {
"$hex": ""
},
"options": {
"chunkSizeBytes": 4
}
}
},
"assert": {
"result": "&result",
"data": [
{
"insert": "expected.files",
"documents": [
{
"_id": "*result",
"length": 0,
"chunkSize": 4,
"uploadDate": "*actual",
"md5": "d41d8cd98f00b204e9800998ecf8427e",
"filename": "filename"
}
]
}
]
}
},
{
"description": "Upload when length is 1",
"act": {
"operation": "upload",
"arguments": {
"filename": "filename",
"source": {
"$hex": "11"
},
"options": {
"chunkSizeBytes": 4
}
}
},
"assert": {
"result": "&result",
"data": [
{
"insert": "expected.files",
"documents": [
{
"_id": "*result",
"length": 1,
"chunkSize": 4,
"uploadDate": "*actual",
"md5": "47ed733b8d10be225eceba344d533586",
"filename": "filename"
}
]
},
{
"insert": "expected.chunks",
"documents": [
{
"_id": "*actual",
"files_id": "*result",
"n": 0,
"data": {
"$hex": "11"
}
}
]
}
]
}
},
{
"description": "Upload when length is 3",
"act": {
"operation": "upload",
"arguments": {
"filename": "filename",
"source": {
"$hex": "112233"
},
"options": {
"chunkSizeBytes": 4
}
}
},
"assert": {
"result": "&result",
"data": [
{
"insert": "expected.files",
"documents": [
{
"_id": "*result",
"length": 3,
"chunkSize": 4,
"uploadDate": "*actual",
"md5": "bafae3a174ab91fc70db7a6aa50f4f52",
"filename": "filename"
}
]
},
{
"insert": "expected.chunks",
"documents": [
{
"_id": "*actual",
"files_id": "*result",
"n": 0,
"data": {
"$hex": "112233"
}
}
]
}
]
}
},
{
"description": "Upload when length is 4",
"act": {
"operation": "upload",
"arguments": {
"filename": "filename",
"source": {
"$hex": "11223344"
},
"options": {
"chunkSizeBytes": 4
}
}
},
"assert": {
"result": "&result",
"data": [
{
"insert": "expected.files",
"documents": [
{
"_id": "*result",
"length": 4,
"chunkSize": 4,
"uploadDate": "*actual",
"md5": "7e7c77cff5705d1f7574a25ef6662117",
"filename": "filename"
}
]
},
{
"insert": "expected.chunks",
"documents": [
{
"_id": "*actual",
"files_id": "*result",
"n": 0,
"data": {
"$hex": "11223344"
}
}
]
}
]
}
},
{
"description": "Upload when length is 5",
"act": {
"operation": "upload",
"arguments": {
"filename": "filename",
"source": {
"$hex": "1122334455"
},
"options": {
"chunkSizeBytes": 4
}
}
},
"assert": {
"result": "&result",
"data": [
{
"insert": "expected.files",
"documents": [
{
"_id": "*result",
"length": 5,
"chunkSize": 4,
"uploadDate": "*actual",
"md5": "283d4fea5dded59cf837d3047328f5af",
"filename": "filename"
}
]
},
{
"insert": "expected.chunks",
"documents": [
{
"_id": "*actual",
"files_id": "*result",
"n": 0,
"data": {
"$hex": "11223344"
}
},
{
"_id": "*actual",
"files_id": "*result",
"n": 1,
"data": {
"$hex": "55"
}
}
]
}
]
}
},
{
"description": "Upload when length is 8",
"act": {
"operation": "upload",
"arguments": {
"filename": "filename",
"source": {
"$hex": "1122334455667788"
},
"options": {
"chunkSizeBytes": 4
}
}
},
"assert": {
"result": "&result",
"data": [
{
"insert": "expected.files",
"documents": [
{
"_id": "*result",
"length": 8,
"chunkSize": 4,
"uploadDate": "*actual",
"md5": "dd254cdc958e53abaa67da9f797125f5",
"filename": "filename"
}
]
},
{
"insert": "expected.chunks",
"documents": [
{
"_id": "*actual",
"files_id": "*result",
"n": 0,
"data": {
"$hex": "11223344"
}
},
{
"_id": "*actual",
"files_id": "*result",
"n": 1,
"data": {
"$hex": "55667788"
}
}
]
}
]
}
},
{
"description": "Upload when contentType is provided",
"act": {
"operation": "upload",
"arguments": {
"filename": "filename",
"source": {
"$hex": "11"
},
"options": {
"chunkSizeBytes": 4,
"contentType": "image/jpeg"
}
}
},
"assert": {
"result": "&result",
"data": [
{
"insert": "expected.files",
"documents": [
{
"_id": "*result",
"length": 1,
"chunkSize": 4,
"uploadDate": "*actual",
"md5": "47ed733b8d10be225eceba344d533586",
"filename": "filename",
"contentType": "image/jpeg"
}
]
},
{
"insert": "expected.chunks",
"documents": [
{
"_id": "*actual",
"files_id": "*result",
"n": 0,
"data": {
"$hex": "11"
}
}
]
}
]
}
},
{
"description": "Upload when metadata is provided",
"act": {
"operation": "upload",
"arguments": {
"filename": "filename",
"source": {
"$hex": "11"
},
"options": {
"chunkSizeBytes": 4,
"metadata": {
"x": 1
}
}
}
},
"assert": {
"result": "&result",
"data": [
{
"insert": "expected.files",
"documents": [
{
"_id": "*result",
"length": 1,
"chunkSize": 4,
"uploadDate": "*actual",
"md5": "47ed733b8d10be225eceba344d533586",
"filename": "filename",
"metadata": {
"x": 1
}
}
]
},
{
"insert": "expected.chunks",
"documents": [
{
"_id": "*actual",
"files_id": "*result",
"n": 0,
"data": {
"$hex": "11"
}
}
]
}
]
}
}
]
}

View File

@ -32,7 +32,7 @@ from gridfs.grid_file import (DEFAULT_CHUNK_SIZE,
GridOutCursor)
from gridfs.errors import NoFile
from pymongo import MongoClient
from pymongo.errors import ConfigurationError, ConnectionFailure
from pymongo.errors import ConfigurationError, ServerSelectionTimeoutError
from test import (IntegrationTest,
host,
port,
@ -173,7 +173,6 @@ class TestGridFile(IntegrationTest):
a.metadata = {"foo": 1}
self.assertEqual({"foo": 1}, a.metadata)
self.assertRaises(AttributeError, getattr, a, "md5")
self.assertRaises(AttributeError, setattr, a, "md5", 5)
a.close()
@ -241,8 +240,8 @@ class TestGridFile(IntegrationTest):
def test_grid_out_custom_opts(self):
one = GridIn(self.db.fs, _id=5, filename="my_file",
contentType="text/html", chunkSize=1000, aliases=["foo"],
metadata={"foo": 1, "bar": 2}, bar=3, baz="hello")
contentType="text/html", chunkSize=1000, aliases=["foo"],
metadata={"foo": 1, "bar": 2}, bar=3, baz="hello")
one.write(b"hello world")
one.close()
@ -272,7 +271,8 @@ class TestGridFile(IntegrationTest):
two = GridOut(self.db.fs, file_document=self.db.fs.files.find_one())
self.assertEqual(b"foo bar", two.read())
three = GridOut(self.db.fs, 5, file_document=self.db.fs.files.find_one())
three = GridOut(self.db.fs, 5,
file_document=self.db.fs.files.find_one())
self.assertEqual(b"foo bar", three.read())
four = GridOut(self.db.fs, file_document={})
@ -604,8 +604,9 @@ Bye"""))
serverSelectionTimeoutMS=10)
fs = client.db.fs
infile = GridIn(fs, file_id=-1, chunk_size=1)
self.assertRaises(ConnectionFailure, infile.write, b'data')
self.assertRaises(ConnectionFailure, infile.close)
self.assertRaises(ServerSelectionTimeoutError, infile.write, b'data')
self.assertRaises(ServerSelectionTimeoutError, infile.close)
def test_unacknowledged(self):
# w=0 is prohibited.

View File

@ -27,7 +27,9 @@ import gridfs
from bson.binary import Binary
from bson.py3compat import u, StringIO, string_type
from pymongo.mongo_client import MongoClient
from pymongo.errors import ConfigurationError, ConnectionFailure
from pymongo.errors import (ConfigurationError,
ConnectionFailure,
ServerSelectionTimeoutError)
from pymongo.read_preferences import ReadPreference
from gridfs.errors import CorruptGridFile, FileExists, NoFile
from test.test_replica_set_client import TestReplicaSetClientBase
@ -40,9 +42,7 @@ from test.utils import (joinall,
single_client,
one,
rs_client,
rs_or_single_client,
rs_or_single_client_noauth,
remove_all_users)
rs_or_single_client)
class JustWrite(threading.Thread):
@ -174,24 +174,21 @@ class TestGridfs(IntegrationTest):
finally:
self.fs.delete(files_id)
def test_delete_ensures_index(self):
def test_put_ensures_index(self):
# setUp has dropped collections.
names = self.db.collection_names()
self.assertFalse([name for name in names if name.startswith('fs')])
chunks = self.db.fs.chunks
files = self.db.fs.files
self.fs.put(b"junk")
self.fs.delete(file_id=1)
# delete() has ensured an index on (files_id, n).
# index_information() is like:
# {
# '_id_': {'key': [('_id', 1)]},
# 'files_id_1_n_1': {'key': [('files_id', 1), ('n', 1)]}
# }
self.assertTrue(any(
info.get('key') == [('files_id', 1), ('n', 1)]
for info in chunks.index_information().values()))
self.assertTrue(any(
info.get('key') == [('filename', 1), ('uploadDate', 1)]
for info in files.index_information().values()))
def test_alt_collection(self):
oid = self.alt.put(b"hello world")
@ -246,7 +243,7 @@ class TestGridfs(IntegrationTest):
# Should have created 100 versions of 'test' file
self.assertEqual(
100,
self.db.fs.files.find({'filename':'test'}).count()
self.db.fs.files.find({'filename': 'test'}).count()
)
def test_get_last_version(self):
@ -404,11 +401,11 @@ class TestGridfs(IntegrationTest):
serverSelectionTimeoutMS=10)
db = client.db
gfs = gridfs.GridFS(db)
self.assertRaises(ConnectionFailure, gfs.list)
self.assertRaises(ServerSelectionTimeoutError, gfs.list)
fs = gridfs.GridFS(db)
f = fs.new_file() # Still no connection.
self.assertRaises(ConnectionFailure, f.close)
f = fs.new_file()
self.assertRaises(ServerSelectionTimeoutError, f.close)
def test_gridfs_find(self):
self.fs.put(b"test2", filename="two")
@ -418,7 +415,7 @@ class TestGridfs(IntegrationTest):
self.fs.put(b"test1", filename="one")
time.sleep(0.01)
self.fs.put(b"test2++", filename="two")
self.assertEqual(3, self.fs.find({"filename":"two"}).count())
self.assertEqual(3, self.fs.find({"filename": "two"}).count())
self.assertEqual(4, self.fs.find().count())
cursor = self.fs.find(
no_cursor_timeout=False).sort("uploadDate", -1).skip(1).limit(2)
@ -516,23 +513,6 @@ class TestGridfsReplicaSet(TestReplicaSetClientBase):
rsc.pymongo_test.drop_collection('fs.files')
rsc.pymongo_test.drop_collection('fs.chunks')
class TestGridfsAuth(IntegrationTest):
@client_context.require_auth
def test_gridfs_readonly(self):
# "self.client" is logged in as root. Make a read-only user.
auth_db = self.client.test_gridfs_readonly
auth_db.add_user('readonly', 'pw', readOnly=True)
self.addCleanup(remove_all_users, auth_db)
db = rs_or_single_client_noauth().test_gridfs_readonly
db.authenticate('readonly', 'pw')
fs = gridfs.GridFS(db)
file = fs.new_file()
file._ensure_index()
fs.list()
if __name__ == "__main__":
unittest.main()

430
test/test_gridfs_bucket.py Normal file
View File

@ -0,0 +1,430 @@
# -*- coding: utf-8 -*-
#
# Copyright 2015 MongoDB, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for the gridfs package.
"""
import datetime
import threading
import time
import gridfs
from bson.binary import Binary
from bson.py3compat import StringIO, string_type
from gridfs.errors import NoFile, CorruptGridFile
from pymongo.errors import (ConfigurationError,
ConnectionFailure,
ServerSelectionTimeoutError,
OperationFailure)
from pymongo.mongo_client import MongoClient
from pymongo.read_preferences import ReadPreference
from test import (client_context,
IntegrationTest)
from test.test_replica_set_client import TestReplicaSetClientBase
from test.utils import (joinall,
single_client,
one,
rs_client,
rs_or_single_client,
rs_or_single_client_noauth,
remove_all_users)
class JustWrite(threading.Thread):
def __init__(self, gfs, num):
threading.Thread.__init__(self)
self.gfs = gfs
self.num = num
self.setDaemon(True)
def run(self):
for _ in range(self.num):
file, _ = self.gfs.open_upload_stream("test")
file.write(b"hello")
file.close()
class JustRead(threading.Thread):
def __init__(self, gfs, num, results):
threading.Thread.__init__(self)
self.gfs = gfs
self.num = num
self.results = results
self.setDaemon(True)
def run(self):
for _ in range(self.num):
file = self.gfs.open_download_stream_by_name("test")
data = file.read()
self.results.append(data)
assert data == b"hello"
class TestGridfs(IntegrationTest):
@classmethod
def setUpClass(cls):
super(TestGridfs, cls).setUpClass()
cls.fs = gridfs.GridFSBucket(cls.db)
cls.alt = gridfs.GridFSBucket(
cls.db, bucket_name="alt")
def setUp(self):
self.db.drop_collection("fs.files")
self.db.drop_collection("fs.chunks")
self.db.drop_collection("alt.files")
self.db.drop_collection("alt.chunks")
def test_basic(self):
oid = self.fs.upload_from_stream("test_filename",
b"hello world")
self.assertEqual(b"hello world",
self.fs.open_download_stream(oid).read())
self.assertEqual(1, self.db.fs.files.count())
self.assertEqual(1, self.db.fs.chunks.count())
self.fs.delete(oid)
self.assertRaises(NoFile, self.fs.open_download_stream, oid)
self.assertEqual(0, self.db.fs.files.count())
self.assertEqual(0, self.db.fs.chunks.count())
def test_multi_chunk_delete(self):
self.db.fs.drop()
self.assertEqual(0, self.db.fs.files.count())
self.assertEqual(0, self.db.fs.chunks.count())
gfs = gridfs.GridFSBucket(self.db)
oid = gfs.upload_from_stream("test_filename",
b"hello",
chunk_size_bytes=1)
self.assertEqual(1, self.db.fs.files.count())
self.assertEqual(5, self.db.fs.chunks.count())
gfs.delete(oid)
self.assertEqual(0, self.db.fs.files.count())
self.assertEqual(0, self.db.fs.chunks.count())
def test_empty_file(self):
oid = self.fs.upload_from_stream("test_filename",
b"")
self.assertEqual(b"", self.fs.open_download_stream(oid).read())
self.assertEqual(1, self.db.fs.files.count())
self.assertEqual(0, self.db.fs.chunks.count())
raw = self.db.fs.files.find_one()
self.assertEqual(0, raw["length"])
self.assertEqual(oid, raw["_id"])
self.assertTrue(isinstance(raw["uploadDate"], datetime.datetime))
self.assertEqual(255 * 1024, raw["chunkSize"])
self.assertTrue(isinstance(raw["md5"], string_type))
def test_corrupt_chunk(self):
files_id = self.fs.upload_from_stream("test_filename",
b'foobar')
self.db.fs.chunks.update_one({'files_id': files_id},
{'$set': {'data': Binary(b'foo', 0)}})
try:
out = self.fs.open_download_stream(files_id)
self.assertRaises(CorruptGridFile, out.read)
out = self.fs.open_download_stream(files_id)
self.assertRaises(CorruptGridFile, out.readline)
finally:
self.fs.delete(files_id)
def test_upload_ensures_index(self):
# setUp has dropped collections.
names = self.db.collection_names()
self.assertFalse([name for name in names if name.startswith('fs')])
chunks = self.db.fs.chunks
files = self.db.fs.files
self.fs.upload_from_stream("filename", b"junk")
self.assertTrue(any(
info.get('key') == [('files_id', 1), ('n', 1)]
for info in chunks.index_information().values()))
self.assertTrue(any(
info.get('key') == [('filename', 1), ('uploadDate', 1)]
for info in files.index_information().values()))
def test_alt_collection(self):
oid = self.alt.upload_from_stream("test_filename",
b"hello world")
self.assertEqual(b"hello world",
self.alt.open_download_stream(oid).read())
self.assertEqual(1, self.db.alt.files.count())
self.assertEqual(1, self.db.alt.chunks.count())
self.alt.delete(oid)
self.assertRaises(NoFile, self.alt.open_download_stream, oid)
self.assertEqual(0, self.db.alt.files.count())
self.assertEqual(0, self.db.alt.chunks.count())
self.assertRaises(NoFile, self.alt.open_download_stream, "foo")
self.alt.upload_from_stream("foo",
b"hello world")
self.assertEqual(b"hello world",
self.alt.open_download_stream_by_name("foo").read())
self.alt.upload_from_stream("mike", b"")
self.alt.upload_from_stream("test", b"foo")
self.alt.upload_from_stream("hello world", b"")
self.assertEqual(set(["mike", "test", "hello world", "foo"]),
set(k["filename"] for k in list(
self.db.alt.files.find())))
def test_threaded_reads(self):
self.fs.upload_from_stream("test", b"hello")
threads = []
results = []
for i in range(10):
threads.append(JustRead(self.fs, 10, results))
threads[i].start()
joinall(threads)
self.assertEqual(
100 * [b'hello'],
results
)
def test_threaded_writes(self):
threads = []
for i in range(10):
threads.append(JustWrite(self.fs, 10))
threads[i].start()
joinall(threads)
fstr = self.fs.open_download_stream_by_name("test")
self.assertEqual(fstr.read(), b"hello")
# Should have created 100 versions of 'test' file
self.assertEqual(
100,
self.db.fs.files.find({'filename': 'test'}).count()
)
def test_get_last_version(self):
one = self.fs.upload_from_stream("test", b"foo")
time.sleep(0.01)
two, _ = self.fs.open_upload_stream("test")
two.write(b"bar")
two.close()
time.sleep(0.01)
two = two._id
three = self.fs.upload_from_stream("test", b"baz")
self.assertEqual(b"baz",
self.fs.open_download_stream_by_name("test").read())
self.fs.delete(three)
self.assertEqual(b"bar",
self.fs.open_download_stream_by_name("test").read())
self.fs.delete(two)
self.assertEqual(b"foo",
self.fs.open_download_stream_by_name("test").read())
self.fs.delete(one)
self.assertRaises(NoFile,
self.fs.open_download_stream_by_name, "test")
def test_get_version(self):
self.fs.upload_from_stream("test", b"foo")
time.sleep(0.01)
self.fs.upload_from_stream("test", b"bar")
time.sleep(0.01)
self.fs.upload_from_stream("test", b"baz")
time.sleep(0.01)
self.assertEqual(b"foo", self.fs.open_download_stream_by_name(
"test", revision=0).read())
self.assertEqual(b"bar", self.fs.open_download_stream_by_name(
"test", revision=1).read())
self.assertEqual(b"baz", self.fs.open_download_stream_by_name(
"test", revision=2).read())
self.assertEqual(b"baz", self.fs.open_download_stream_by_name(
"test", revision=-1).read())
self.assertEqual(b"bar", self.fs.open_download_stream_by_name(
"test", revision=-2).read())
self.assertEqual(b"foo", self.fs.open_download_stream_by_name(
"test", revision=-3).read())
self.assertRaises(NoFile, self.fs.open_download_stream_by_name,
"test", revision=3)
self.assertRaises(NoFile, self.fs.open_download_stream_by_name,
"test", revision=-4)
def test_upload_from_stream_filelike(self):
oid = self.fs.upload_from_stream("test_file",
StringIO(b"hello world"),
chunk_size_bytes=1)
self.assertEqual(11, self.db.fs.chunks.count())
self.assertEqual(b"hello world",
self.fs.open_download_stream(oid).read())
def test_missing_length_iter(self):
# Test fix that guards against PHP-237
self.fs.upload_from_stream("empty", b"")
doc = self.db.fs.files.find_one({"filename": "empty"})
doc.pop("length")
self.db.fs.files.replace_one({"_id": doc["_id"]}, doc)
fstr = self.fs.open_download_stream_by_name("empty")
def iterate_file(grid_file):
for _ in grid_file:
pass
return True
self.assertTrue(iterate_file(fstr))
def test_gridfs_lazy_connect(self):
client = MongoClient('badhost', connect=False,
serverSelectionTimeoutMS=0)
cdb = client.db
gfs = gridfs.GridFSBucket(cdb)
self.assertRaises(ServerSelectionTimeoutError, gfs.delete, 0)
gfs = gridfs.GridFSBucket(cdb)
self.assertRaises(
ServerSelectionTimeoutError,
gfs.upload_from_stream, "test", b"") # Still no connection.
def test_gridfs_find(self):
self.fs.upload_from_stream("two", b"test2")
time.sleep(0.01)
self.fs.upload_from_stream("two", b"test2+")
time.sleep(0.01)
self.fs.upload_from_stream("one", b"test1")
time.sleep(0.01)
self.fs.upload_from_stream("two", b"test2++")
self.assertEqual(3, self.fs.find({"filename": "two"}).count())
self.assertEqual(4, self.fs.find({}).count())
cursor = self.fs.find(
{}, no_cursor_timeout=False, sort=[("uploadDate", -1)],
skip=1, limit=2)
gout = next(cursor)
self.assertEqual(b"test1", gout.read())
cursor.rewind()
gout = next(cursor)
self.assertEqual(b"test1", gout.read())
gout = next(cursor)
self.assertEqual(b"test2+", gout.read())
self.assertRaises(StopIteration, cursor.__next__)
cursor.close()
self.assertRaises(TypeError, self.fs.find, {}, {"_id": True})
def test_grid_in_non_int_chunksize(self):
# Lua, and perhaps other buggy GridFS clients, store size as a float.
data = b'data'
self.fs.upload_from_stream('f', data)
self.db.fs.files.update_one({'filename': 'f'},
{'$set': {'chunkSize': 100.0}})
self.assertEqual(data,
self.fs.open_download_stream_by_name('f').read())
def test_unacknowledged(self):
# w=0 is prohibited.
with self.assertRaises(ConfigurationError):
gridfs.GridFSBucket(rs_or_single_client(w=0).pymongo_test)
def test_rename(self):
_id = self.fs.upload_from_stream("first_name", b'testing')
self.assertEqual(b'testing', self.fs.open_download_stream_by_name(
"first_name").read())
self.fs.rename(_id, "second_name")
self.assertRaises(NoFile, self.fs.open_download_stream_by_name,
"first_name")
self.assertEqual(b"testing", self.fs.open_download_stream_by_name(
"second_name").read())
def test_abort(self):
gin, file_id = self.fs.open_upload_stream("test_filename",
chunk_size_bytes=5)
gin.write(b"test1")
gin.write(b"test2")
gin.write(b"test3")
self.assertEqual(3, self.db.fs.chunks.count(
{"files_id": file_id}))
gin.abort()
self.assertTrue(gin.closed)
self.assertRaises(ValueError, gin.write, b"test4")
self.assertEqual(0, self.db.fs.chunks.count(
{"files_id": file_id}))
class TestGridfsBucketReplicaSet(TestReplicaSetClientBase):
def test_gridfs_replica_set(self):
rsc = rs_client(
w=self.w, wtimeout=5000,
read_preference=ReadPreference.SECONDARY)
gfs = gridfs.GridFSBucket(rsc.pymongo_test)
oid = gfs.upload_from_stream("test_filename", b'foo')
content = gfs.open_download_stream(oid).read()
self.assertEqual(b'foo', content)
def test_gridfs_secondary(self):
primary_host, primary_port = self.primary
primary_connection = single_client(primary_host, primary_port)
secondary_host, secondary_port = one(self.secondaries)
secondary_connection = single_client(
secondary_host, secondary_port,
read_preference=ReadPreference.SECONDARY)
primary_connection.pymongo_test.drop_collection("fs.files")
primary_connection.pymongo_test.drop_collection("fs.chunks")
# Should detect it's connected to secondary and not attempt to
# create index
gfs = gridfs.GridFSBucket(secondary_connection.pymongo_test)
# This won't detect secondary, raises error
self.assertRaises(ConnectionFailure, gfs.upload_from_stream,
"test_filename", b'foo')
def test_gridfs_secondary_lazy(self):
# Should detect it's connected to secondary and not attempt to
# create index.
secondary_host, secondary_port = one(self.secondaries)
client = single_client(
secondary_host,
secondary_port,
read_preference=ReadPreference.SECONDARY,
connect=False)
# Still no connection.
gfs = gridfs.GridFSBucket(client.test_gridfs_secondary_lazy)
# Connects, doesn't create index.
self.assertRaises(NoFile, gfs.open_download_stream_by_name,
"test_filename")
self.assertRaises(ConnectionFailure, gfs.upload_from_stream,
"test_filename", b'data')
def tearDown(self):
rsc = client_context.rs_client
rsc.pymongo_test.drop_collection('fs.files')
rsc.pymongo_test.drop_collection('fs.chunks')
if __name__ == "__main__":
unittest.main()

221
test/test_gridfs_spec.py Normal file
View File

@ -0,0 +1,221 @@
# Copyright 2015 MongoDB, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Test GridFSBucket class."""
import copy
import datetime
import os
import sys
import re
import gridfs
sys.path[0:0] = [""]
from bson import Binary
from bson.json_util import loads
from bson.py3compat import bytes_from_hex
from gridfs.errors import NoFile, CorruptGridFile
from test import (unittest,
IntegrationTest)
# Commands.
_COMMANDS = {"delete": lambda coll, doc: [coll.delete_many(d["q"])
for d in doc['deletes']],
"insert": lambda coll, doc: coll.insert_many(doc['documents']),
"update": lambda coll, doc: [coll.update_many(u["q"], u["u"])
for u in doc['updates']]
}
# Location of JSON test specifications.
_TEST_PATH = os.path.join(
os.path.dirname(os.path.realpath(__file__)),
'gridfs')
def camel_to_snake(camel):
# Regex to convert CamelCase to snake_case. Special case for _id.
if camel == "id":
return "file_id"
snake = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', camel)
return re.sub('([a-z0-9])([A-Z])', r'\1_\2', snake).lower()
class TestAllScenarios(IntegrationTest):
@classmethod
def setUpClass(cls):
super(TestAllScenarios, cls).setUpClass()
cls.fs = gridfs.GridFSBucket(cls.db)
cls.str_to_cmd = {
"upload": cls.fs.upload_from_stream,
"download": cls.fs.open_download_stream,
"delete": cls.fs.delete,
"download_by_name": cls.fs.open_download_stream_by_name}
def init_db(self, data, test):
self.db.drop_collection("fs.files")
self.db.drop_collection("fs.chunks")
self.db.drop_collection("expected.files")
self.db.drop_collection("expected.chunks")
# Read in data.
if data['files']:
self.db.fs.files.insert_many(data['files'])
self.db.expected.files.insert_many(data['files'])
if data['chunks']:
self.db.fs.chunks.insert_many(data['chunks'])
self.db.expected.chunks.insert_many(data['chunks'])
# Make initial modifications.
if "arrange" in test:
for cmd in test['arrange'].get('data', []):
for key in cmd.keys():
if key in _COMMANDS:
coll = self.db.get_collection(cmd[key])
_COMMANDS[key](coll, cmd)
def init_expected_db(self, test, result):
# Modify outcome DB.
for cmd in test['assert'].get('data', []):
for key in cmd.keys():
if key in _COMMANDS:
# Replace wildcards in inserts.
for doc in cmd.get('documents', []):
keylist = doc.keys()
for dockey in copy.deepcopy(list(keylist)):
if "result" in str(doc[dockey]):
doc[dockey] = result
if "actual" in str(doc[dockey]): # Avoid duplicate
doc.pop(dockey)
# Move contentType to metadata.
if dockey == "contentType":
doc["metadata"] = {dockey: doc.pop(dockey)}
coll = self.db.get_collection(cmd[key])
_COMMANDS[key](coll, cmd)
if test['assert'].get('result') == "&result":
test['assert']['result'] = result
def sorted_list(self, coll, ignore_id):
to_sort = []
for doc in coll.find():
docstr = "{"
if ignore_id: # Cannot compare _id in chunks collection.
doc.pop("_id")
for k in sorted(doc.keys()):
if k == "uploadDate": # Can't compare datetime.
self.assertTrue(isinstance(doc[k], datetime.datetime))
else:
docstr += "%s:%s " % (k, repr(doc[k]))
to_sort.append(docstr + "}")
return to_sort
def create_test(scenario_def):
def run_scenario(self):
# Run tests.
self.assertTrue(scenario_def['tests'], "tests cannot be empty")
for test in scenario_def['tests']:
self.init_db(scenario_def['data'], test)
# Run GridFs Operation.
operation = self.str_to_cmd[test['act']['operation']]
args = test['act']['arguments']
extra_opts = args.pop("options", {})
if "contentType" in extra_opts:
extra_opts["metadata"] = {
"contentType": extra_opts.pop("contentType")}
args.update(extra_opts)
converted_args = dict((camel_to_snake(c), v)
for c, v in args.items())
error = None
try:
result = operation(**converted_args)
if 'download' in test['act']['operation']:
result = Binary(result.read())
except Exception as exc:
error = exc
self.init_expected_db(test, result)
# Asserts.
errors = {"FileNotFound": NoFile,
"ChunkIsMissing": CorruptGridFile,
"ExtraChunk": CorruptGridFile,
"ChunkIsWrongSize": CorruptGridFile,
"RevisionNotFound": NoFile}
if test['assert'].get("error", False):
self.assertIsNotNone(error)
self.assertTrue(isinstance(error,
errors[test['assert']['error']]))
else:
self.assertIsNone(error)
if 'result' in test['assert']:
if test['assert']['result'] == 'void':
test['assert']['result'] = None
self.assertEqual(result, test['assert'].get('result'))
if 'data' in test['assert']:
# Create alphabetized list
self.assertEqual(
set(self.sorted_list(self.db.fs.chunks, True)),
set(self.sorted_list(self.db.expected.chunks, True)))
self.assertEqual(
set(self.sorted_list(self.db.fs.files, False)),
set(self.sorted_list(self.db.expected.files, False)))
return run_scenario
def create_tests():
for dirpath, _, filenames in os.walk(_TEST_PATH):
for filename in filenames:
with open(os.path.join(dirpath, filename)) as scenario_stream:
scenario_def = loads(scenario_stream.read())
# Because object_hook is already defined by bson.json_util,
# and everything is named 'data'
def str2hex(jsn):
for key, val in jsn.items():
if key in ("data", "source", "result"):
if "$hex" in val:
jsn[key] = Binary(bytes_from_hex(val['$hex']))
if isinstance(jsn[key], dict):
str2hex(jsn[key])
if isinstance(jsn[key], list):
for k in jsn[key]:
str2hex(k)
str2hex(scenario_def)
# Construct test from scenario.
new_test = create_test(scenario_def)
test_name = 'test_%s' % (
os.path.splitext(filename)[0])
new_test.__name__ = test_name
setattr(TestAllScenarios, new_test.__name__, new_test)
create_tests()
if __name__ == "__main__":
unittest.main()