PYTHON-4947 - GridFS spec: Add performant 'delete revisions by filena… (#2218)
This commit is contained in:
parent
58a41ae7f5
commit
8675a163df
@ -9,6 +9,8 @@ PyMongo 4.12 brings a number of changes including:
|
||||
- Support for configuring DEK cache lifetime via the ``key_expiration_ms`` argument to
|
||||
:class:`~pymongo.encryption_options.AutoEncryptionOpts`.
|
||||
- Support for $lookup in CSFLE and QE supported on MongoDB 8.1+.
|
||||
- Added :meth:`gridfs.asynchronous.grid_file.AsyncGridFSBucket.delete_by_name` and :meth:`gridfs.grid_file.GridFSBucket.delete_by_name`
|
||||
for more performant deletion of a file with multiple revisions.
|
||||
- AsyncMongoClient no longer performs DNS resolution for "mongodb+srv://" connection strings on creation.
|
||||
To avoid blocking the asyncio loop, the resolution is now deferred until the client is first connected.
|
||||
- Added index hinting support to the
|
||||
|
||||
@ -834,6 +834,35 @@ class AsyncGridFSBucket:
|
||||
if not res.deleted_count:
|
||||
raise NoFile("no file could be deleted because none matched %s" % file_id)
|
||||
|
||||
@_csot.apply
|
||||
async def delete_by_name(
|
||||
self, filename: str, session: Optional[AsyncClientSession] = None
|
||||
) -> None:
|
||||
"""Given a filename, delete this stored file's files collection document(s)
|
||||
and associated chunks from a GridFS bucket.
|
||||
|
||||
For example::
|
||||
|
||||
my_db = AsyncMongoClient().test
|
||||
fs = AsyncGridFSBucket(my_db)
|
||||
await fs.upload_from_stream("test_file", "data I want to store!")
|
||||
await fs.delete_by_name("test_file")
|
||||
|
||||
Raises :exc:`~gridfs.errors.NoFile` if no file with the given filename exists.
|
||||
|
||||
:param filename: The name of the file to be deleted.
|
||||
:param session: a :class:`~pymongo.client_session.AsyncClientSession`
|
||||
|
||||
.. versionadded:: 4.12
|
||||
"""
|
||||
_disallow_transactions(session)
|
||||
files = self._files.find({"filename": filename}, {"_id": 1}, session=session)
|
||||
file_ids = [file["_id"] async for file in files]
|
||||
res = await self._files.delete_many({"_id": {"$in": file_ids}}, session=session)
|
||||
await self._chunks.delete_many({"files_id": {"$in": file_ids}}, session=session)
|
||||
if not res.deleted_count:
|
||||
raise NoFile(f"no file could be deleted because none matched filename {filename!r}")
|
||||
|
||||
def find(self, *args: Any, **kwargs: Any) -> AsyncGridOutCursor:
|
||||
"""Find and return the files collection documents that match ``filter``
|
||||
|
||||
|
||||
@ -830,6 +830,33 @@ class GridFSBucket:
|
||||
if not res.deleted_count:
|
||||
raise NoFile("no file could be deleted because none matched %s" % file_id)
|
||||
|
||||
@_csot.apply
|
||||
def delete_by_name(self, filename: str, session: Optional[ClientSession] = None) -> None:
|
||||
"""Given a filename, delete this stored file's files collection document(s)
|
||||
and associated chunks from a GridFS bucket.
|
||||
|
||||
For example::
|
||||
|
||||
my_db = MongoClient().test
|
||||
fs = GridFSBucket(my_db)
|
||||
fs.upload_from_stream("test_file", "data I want to store!")
|
||||
fs.delete_by_name("test_file")
|
||||
|
||||
Raises :exc:`~gridfs.errors.NoFile` if no file with the given filename exists.
|
||||
|
||||
:param filename: The name of the file to be deleted.
|
||||
:param session: a :class:`~pymongo.client_session.ClientSession`
|
||||
|
||||
.. versionadded:: 4.12
|
||||
"""
|
||||
_disallow_transactions(session)
|
||||
files = self._files.find({"filename": filename}, {"_id": 1}, session=session)
|
||||
file_ids = [file["_id"] for file in files]
|
||||
res = self._files.delete_many({"_id": {"$in": file_ids}}, session=session)
|
||||
self._chunks.delete_many({"files_id": {"$in": file_ids}}, session=session)
|
||||
if not res.deleted_count:
|
||||
raise NoFile(f"no file could be deleted because none matched filename {filename!r}")
|
||||
|
||||
def find(self, *args: Any, **kwargs: Any) -> GridOutCursor:
|
||||
"""Find and return the files collection documents that match ``filter``
|
||||
|
||||
|
||||
@ -115,6 +115,17 @@ class TestGridfs(AsyncIntegrationTest):
|
||||
self.assertEqual(0, await self.db.fs.files.count_documents({}))
|
||||
self.assertEqual(0, await self.db.fs.chunks.count_documents({}))
|
||||
|
||||
async def test_delete_by_name(self):
|
||||
self.assertEqual(0, await self.db.fs.files.count_documents({}))
|
||||
self.assertEqual(0, await self.db.fs.chunks.count_documents({}))
|
||||
gfs = gridfs.AsyncGridFSBucket(self.db)
|
||||
await gfs.upload_from_stream("test_filename", b"hello", chunk_size_bytes=1)
|
||||
self.assertEqual(1, await self.db.fs.files.count_documents({}))
|
||||
self.assertEqual(5, await self.db.fs.chunks.count_documents({}))
|
||||
await gfs.delete_by_name("test_filename")
|
||||
self.assertEqual(0, await self.db.fs.files.count_documents({}))
|
||||
self.assertEqual(0, await self.db.fs.chunks.count_documents({}))
|
||||
|
||||
async def test_empty_file(self):
|
||||
oid = await self.fs.upload_from_stream("test_filename", b"")
|
||||
self.assertEqual(b"", await (await self.fs.open_download_stream(oid)).read())
|
||||
|
||||
@ -45,7 +45,7 @@ from test.utils_shared import (
|
||||
|
||||
from bson import DBRef
|
||||
from gridfs.asynchronous.grid_file import AsyncGridFS, AsyncGridFSBucket
|
||||
from pymongo import ASCENDING, AsyncMongoClient, monitoring
|
||||
from pymongo import ASCENDING, AsyncMongoClient, _csot, monitoring
|
||||
from pymongo.asynchronous.command_cursor import AsyncCommandCursor
|
||||
from pymongo.asynchronous.cursor import AsyncCursor
|
||||
from pymongo.asynchronous.helpers import anext
|
||||
@ -543,7 +543,7 @@ class TestSession(AsyncIntegrationTest):
|
||||
(bucket.rename, [1, "f2"], {}),
|
||||
# Delete both files so _test_ops can run these operations twice.
|
||||
(bucket.delete, [1], {}),
|
||||
(bucket.delete, [2], {}),
|
||||
(bucket.delete_by_name, ["f"], {}),
|
||||
)
|
||||
|
||||
async def test_gridfsbucket_cursor(self):
|
||||
|
||||
@ -32,7 +32,7 @@ from typing import List
|
||||
|
||||
from bson import encode
|
||||
from bson.raw_bson import RawBSONDocument
|
||||
from pymongo import WriteConcern
|
||||
from pymongo import WriteConcern, _csot
|
||||
from pymongo.asynchronous import client_session
|
||||
from pymongo.asynchronous.client_session import TransactionOptions
|
||||
from pymongo.asynchronous.command_cursor import AsyncCommandCursor
|
||||
@ -295,6 +295,7 @@ class TestTransactions(AsyncTransactionsBase):
|
||||
"new-name",
|
||||
),
|
||||
),
|
||||
(bucket.delete_by_name, ("new-name",)),
|
||||
]
|
||||
|
||||
async with client.start_session() as s, await s.start_transaction():
|
||||
|
||||
@ -66,7 +66,7 @@ import pymongo
|
||||
from bson import SON, json_util
|
||||
from bson.codec_options import DEFAULT_CODEC_OPTIONS
|
||||
from bson.objectid import ObjectId
|
||||
from gridfs import AsyncGridFSBucket, GridOut
|
||||
from gridfs import AsyncGridFSBucket, GridOut, NoFile
|
||||
from pymongo import ASCENDING, AsyncMongoClient, CursorType, _csot
|
||||
from pymongo.asynchronous.change_stream import AsyncChangeStream
|
||||
from pymongo.asynchronous.client_session import AsyncClientSession, TransactionOptions, _TxnState
|
||||
@ -632,7 +632,7 @@ class UnifiedSpecTestMixinV1(AsyncIntegrationTest):
|
||||
# Connection errors are considered client errors.
|
||||
if isinstance(error, ConnectionFailure):
|
||||
self.assertNotIsInstance(error, NotPrimaryError)
|
||||
elif isinstance(error, (InvalidOperation, ConfigurationError, EncryptionError)):
|
||||
elif isinstance(error, (InvalidOperation, ConfigurationError, EncryptionError, NoFile)):
|
||||
pass
|
||||
else:
|
||||
self.assertNotIsInstance(error, PyMongoError)
|
||||
|
||||
230
test/gridfs/deleteByName.json
Normal file
230
test/gridfs/deleteByName.json
Normal file
@ -0,0 +1,230 @@
|
||||
{
|
||||
"description": "gridfs-deleteByName",
|
||||
"schemaVersion": "1.0",
|
||||
"createEntities": [
|
||||
{
|
||||
"client": {
|
||||
"id": "client0"
|
||||
}
|
||||
},
|
||||
{
|
||||
"database": {
|
||||
"id": "database0",
|
||||
"client": "client0",
|
||||
"databaseName": "gridfs-tests"
|
||||
}
|
||||
},
|
||||
{
|
||||
"bucket": {
|
||||
"id": "bucket0",
|
||||
"database": "database0"
|
||||
}
|
||||
},
|
||||
{
|
||||
"collection": {
|
||||
"id": "bucket0_files_collection",
|
||||
"database": "database0",
|
||||
"collectionName": "fs.files"
|
||||
}
|
||||
},
|
||||
{
|
||||
"collection": {
|
||||
"id": "bucket0_chunks_collection",
|
||||
"database": "database0",
|
||||
"collectionName": "fs.chunks"
|
||||
}
|
||||
}
|
||||
],
|
||||
"initialData": [
|
||||
{
|
||||
"collectionName": "fs.files",
|
||||
"databaseName": "gridfs-tests",
|
||||
"documents": [
|
||||
{
|
||||
"_id": {
|
||||
"$oid": "000000000000000000000001"
|
||||
},
|
||||
"length": 0,
|
||||
"chunkSize": 4,
|
||||
"uploadDate": {
|
||||
"$date": "1970-01-01T00:00:00.000Z"
|
||||
},
|
||||
"filename": "filename",
|
||||
"metadata": {}
|
||||
},
|
||||
{
|
||||
"_id": {
|
||||
"$oid": "000000000000000000000002"
|
||||
},
|
||||
"length": 0,
|
||||
"chunkSize": 4,
|
||||
"uploadDate": {
|
||||
"$date": "1970-01-01T00:00:00.000Z"
|
||||
},
|
||||
"filename": "filename",
|
||||
"metadata": {}
|
||||
},
|
||||
{
|
||||
"_id": {
|
||||
"$oid": "000000000000000000000003"
|
||||
},
|
||||
"length": 2,
|
||||
"chunkSize": 4,
|
||||
"uploadDate": {
|
||||
"$date": "1970-01-01T00:00:00.000Z"
|
||||
},
|
||||
"filename": "filename",
|
||||
"metadata": {}
|
||||
},
|
||||
{
|
||||
"_id": {
|
||||
"$oid": "000000000000000000000004"
|
||||
},
|
||||
"length": 8,
|
||||
"chunkSize": 4,
|
||||
"uploadDate": {
|
||||
"$date": "1970-01-01T00:00:00.000Z"
|
||||
},
|
||||
"filename": "otherfilename",
|
||||
"metadata": {}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"collectionName": "fs.chunks",
|
||||
"databaseName": "gridfs-tests",
|
||||
"documents": [
|
||||
{
|
||||
"_id": {
|
||||
"$oid": "000000000000000000000001"
|
||||
},
|
||||
"files_id": {
|
||||
"$oid": "000000000000000000000002"
|
||||
},
|
||||
"n": 0,
|
||||
"data": {
|
||||
"$binary": {
|
||||
"base64": "",
|
||||
"subType": "00"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"_id": {
|
||||
"$oid": "000000000000000000000002"
|
||||
},
|
||||
"files_id": {
|
||||
"$oid": "000000000000000000000003"
|
||||
},
|
||||
"n": 0,
|
||||
"data": {
|
||||
"$binary": {
|
||||
"base64": "",
|
||||
"subType": "00"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"_id": {
|
||||
"$oid": "000000000000000000000003"
|
||||
},
|
||||
"files_id": {
|
||||
"$oid": "000000000000000000000003"
|
||||
},
|
||||
"n": 0,
|
||||
"data": {
|
||||
"$binary": {
|
||||
"base64": "",
|
||||
"subType": "00"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"_id": {
|
||||
"$oid": "000000000000000000000004"
|
||||
},
|
||||
"files_id": {
|
||||
"$oid": "000000000000000000000004"
|
||||
},
|
||||
"n": 0,
|
||||
"data": {
|
||||
"$binary": {
|
||||
"base64": "",
|
||||
"subType": "00"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"tests": [
|
||||
{
|
||||
"description": "delete when multiple revisions of the file exist",
|
||||
"operations": [
|
||||
{
|
||||
"name": "deleteByName",
|
||||
"object": "bucket0",
|
||||
"arguments": {
|
||||
"filename": "filename"
|
||||
}
|
||||
}
|
||||
],
|
||||
"outcome": [
|
||||
{
|
||||
"collectionName": "fs.files",
|
||||
"databaseName": "gridfs-tests",
|
||||
"documents": [
|
||||
{
|
||||
"_id": {
|
||||
"$oid": "000000000000000000000004"
|
||||
},
|
||||
"length": 8,
|
||||
"chunkSize": 4,
|
||||
"uploadDate": {
|
||||
"$date": "1970-01-01T00:00:00.000Z"
|
||||
},
|
||||
"filename": "otherfilename",
|
||||
"metadata": {}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"collectionName": "fs.chunks",
|
||||
"databaseName": "gridfs-tests",
|
||||
"documents": [
|
||||
{
|
||||
"_id": {
|
||||
"$oid": "000000000000000000000004"
|
||||
},
|
||||
"files_id": {
|
||||
"$oid": "000000000000000000000004"
|
||||
},
|
||||
"n": 0,
|
||||
"data": {
|
||||
"$binary": {
|
||||
"base64": "",
|
||||
"subType": "00"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"description": "delete when file name does not exist",
|
||||
"operations": [
|
||||
{
|
||||
"name": "deleteByName",
|
||||
"object": "bucket0",
|
||||
"arguments": {
|
||||
"filename": "missing-file"
|
||||
},
|
||||
"expectError": {
|
||||
"isClientError": true
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -115,6 +115,17 @@ class TestGridfs(IntegrationTest):
|
||||
self.assertEqual(0, self.db.fs.files.count_documents({}))
|
||||
self.assertEqual(0, self.db.fs.chunks.count_documents({}))
|
||||
|
||||
def test_delete_by_name(self):
|
||||
self.assertEqual(0, self.db.fs.files.count_documents({}))
|
||||
self.assertEqual(0, self.db.fs.chunks.count_documents({}))
|
||||
gfs = gridfs.GridFSBucket(self.db)
|
||||
gfs.upload_from_stream("test_filename", b"hello", chunk_size_bytes=1)
|
||||
self.assertEqual(1, self.db.fs.files.count_documents({}))
|
||||
self.assertEqual(5, self.db.fs.chunks.count_documents({}))
|
||||
gfs.delete_by_name("test_filename")
|
||||
self.assertEqual(0, self.db.fs.files.count_documents({}))
|
||||
self.assertEqual(0, self.db.fs.chunks.count_documents({}))
|
||||
|
||||
def test_empty_file(self):
|
||||
oid = self.fs.upload_from_stream("test_filename", b"")
|
||||
self.assertEqual(b"", (self.fs.open_download_stream(oid)).read())
|
||||
|
||||
@ -45,7 +45,7 @@ from test.utils_shared import (
|
||||
|
||||
from bson import DBRef
|
||||
from gridfs.synchronous.grid_file import GridFS, GridFSBucket
|
||||
from pymongo import ASCENDING, MongoClient, monitoring
|
||||
from pymongo import ASCENDING, MongoClient, _csot, monitoring
|
||||
from pymongo.common import _MAX_END_SESSIONS
|
||||
from pymongo.errors import ConfigurationError, InvalidOperation, OperationFailure
|
||||
from pymongo.operations import IndexModel, InsertOne, UpdateOne
|
||||
@ -543,7 +543,7 @@ class TestSession(IntegrationTest):
|
||||
(bucket.rename, [1, "f2"], {}),
|
||||
# Delete both files so _test_ops can run these operations twice.
|
||||
(bucket.delete, [1], {}),
|
||||
(bucket.delete, [2], {}),
|
||||
(bucket.delete_by_name, ["f"], {}),
|
||||
)
|
||||
|
||||
def test_gridfsbucket_cursor(self):
|
||||
|
||||
@ -32,7 +32,7 @@ from typing import List
|
||||
|
||||
from bson import encode
|
||||
from bson.raw_bson import RawBSONDocument
|
||||
from pymongo import WriteConcern
|
||||
from pymongo import WriteConcern, _csot
|
||||
from pymongo.errors import (
|
||||
CollectionInvalid,
|
||||
ConfigurationError,
|
||||
@ -287,6 +287,7 @@ class TestTransactions(TransactionsBase):
|
||||
"new-name",
|
||||
),
|
||||
),
|
||||
(bucket.delete_by_name, ("new-name",)),
|
||||
]
|
||||
|
||||
with client.start_session() as s, s.start_transaction():
|
||||
|
||||
@ -65,7 +65,7 @@ import pymongo
|
||||
from bson import SON, json_util
|
||||
from bson.codec_options import DEFAULT_CODEC_OPTIONS
|
||||
from bson.objectid import ObjectId
|
||||
from gridfs import GridFSBucket, GridOut
|
||||
from gridfs import GridFSBucket, GridOut, NoFile
|
||||
from pymongo import ASCENDING, CursorType, MongoClient, _csot
|
||||
from pymongo.encryption_options import _HAVE_PYMONGOCRYPT
|
||||
from pymongo.errors import (
|
||||
@ -631,7 +631,7 @@ class UnifiedSpecTestMixinV1(IntegrationTest):
|
||||
# Connection errors are considered client errors.
|
||||
if isinstance(error, ConnectionFailure):
|
||||
self.assertNotIsInstance(error, NotPrimaryError)
|
||||
elif isinstance(error, (InvalidOperation, ConfigurationError, EncryptionError)):
|
||||
elif isinstance(error, (InvalidOperation, ConfigurationError, EncryptionError, NoFile)):
|
||||
pass
|
||||
else:
|
||||
self.assertNotIsInstance(error, PyMongoError)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user