PYTHON-4947 - GridFS spec: Add performant 'delete revisions by filena… (#2218)

This commit is contained in:
Noah Stapp 2025-03-31 09:07:53 -04:00 committed by GitHub
parent 58a41ae7f5
commit 8675a163df
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 322 additions and 10 deletions

View File

@ -9,6 +9,8 @@ PyMongo 4.12 brings a number of changes including:
- Support for configuring DEK cache lifetime via the ``key_expiration_ms`` argument to
:class:`~pymongo.encryption_options.AutoEncryptionOpts`.
- Support for $lookup in CSFLE and QE supported on MongoDB 8.1+.
- Added :meth:`gridfs.asynchronous.grid_file.AsyncGridFSBucket.delete_by_name` and :meth:`gridfs.grid_file.GridFSBucket.delete_by_name`
for more performant deletion of a file with multiple revisions.
- AsyncMongoClient no longer performs DNS resolution for "mongodb+srv://" connection strings on creation.
To avoid blocking the asyncio loop, the resolution is now deferred until the client is first connected.
- Added index hinting support to the

View File

@ -834,6 +834,35 @@ class AsyncGridFSBucket:
if not res.deleted_count:
raise NoFile("no file could be deleted because none matched %s" % file_id)
@_csot.apply
async def delete_by_name(
self, filename: str, session: Optional[AsyncClientSession] = None
) -> None:
"""Given a filename, delete this stored file's files collection document(s)
and associated chunks from a GridFS bucket.
For example::
my_db = AsyncMongoClient().test
fs = AsyncGridFSBucket(my_db)
await fs.upload_from_stream("test_file", "data I want to store!")
await fs.delete_by_name("test_file")
Raises :exc:`~gridfs.errors.NoFile` if no file with the given filename exists.
:param filename: The name of the file to be deleted.
:param session: a :class:`~pymongo.client_session.AsyncClientSession`
.. versionadded:: 4.12
"""
_disallow_transactions(session)
files = self._files.find({"filename": filename}, {"_id": 1}, session=session)
file_ids = [file["_id"] async for file in files]
res = await self._files.delete_many({"_id": {"$in": file_ids}}, session=session)
await self._chunks.delete_many({"files_id": {"$in": file_ids}}, session=session)
if not res.deleted_count:
raise NoFile(f"no file could be deleted because none matched filename {filename!r}")
def find(self, *args: Any, **kwargs: Any) -> AsyncGridOutCursor:
"""Find and return the files collection documents that match ``filter``

View File

@ -830,6 +830,33 @@ class GridFSBucket:
if not res.deleted_count:
raise NoFile("no file could be deleted because none matched %s" % file_id)
@_csot.apply
def delete_by_name(self, filename: str, session: Optional[ClientSession] = None) -> None:
"""Given a filename, delete this stored file's files collection document(s)
and associated chunks from a GridFS bucket.
For example::
my_db = MongoClient().test
fs = GridFSBucket(my_db)
fs.upload_from_stream("test_file", "data I want to store!")
fs.delete_by_name("test_file")
Raises :exc:`~gridfs.errors.NoFile` if no file with the given filename exists.
:param filename: The name of the file to be deleted.
:param session: a :class:`~pymongo.client_session.ClientSession`
.. versionadded:: 4.12
"""
_disallow_transactions(session)
files = self._files.find({"filename": filename}, {"_id": 1}, session=session)
file_ids = [file["_id"] for file in files]
res = self._files.delete_many({"_id": {"$in": file_ids}}, session=session)
self._chunks.delete_many({"files_id": {"$in": file_ids}}, session=session)
if not res.deleted_count:
raise NoFile(f"no file could be deleted because none matched filename {filename!r}")
def find(self, *args: Any, **kwargs: Any) -> GridOutCursor:
"""Find and return the files collection documents that match ``filter``

View File

@ -115,6 +115,17 @@ class TestGridfs(AsyncIntegrationTest):
self.assertEqual(0, await self.db.fs.files.count_documents({}))
self.assertEqual(0, await self.db.fs.chunks.count_documents({}))
async def test_delete_by_name(self):
self.assertEqual(0, await self.db.fs.files.count_documents({}))
self.assertEqual(0, await self.db.fs.chunks.count_documents({}))
gfs = gridfs.AsyncGridFSBucket(self.db)
await gfs.upload_from_stream("test_filename", b"hello", chunk_size_bytes=1)
self.assertEqual(1, await self.db.fs.files.count_documents({}))
self.assertEqual(5, await self.db.fs.chunks.count_documents({}))
await gfs.delete_by_name("test_filename")
self.assertEqual(0, await self.db.fs.files.count_documents({}))
self.assertEqual(0, await self.db.fs.chunks.count_documents({}))
async def test_empty_file(self):
oid = await self.fs.upload_from_stream("test_filename", b"")
self.assertEqual(b"", await (await self.fs.open_download_stream(oid)).read())

View File

@ -45,7 +45,7 @@ from test.utils_shared import (
from bson import DBRef
from gridfs.asynchronous.grid_file import AsyncGridFS, AsyncGridFSBucket
from pymongo import ASCENDING, AsyncMongoClient, monitoring
from pymongo import ASCENDING, AsyncMongoClient, _csot, monitoring
from pymongo.asynchronous.command_cursor import AsyncCommandCursor
from pymongo.asynchronous.cursor import AsyncCursor
from pymongo.asynchronous.helpers import anext
@ -543,7 +543,7 @@ class TestSession(AsyncIntegrationTest):
(bucket.rename, [1, "f2"], {}),
# Delete both files so _test_ops can run these operations twice.
(bucket.delete, [1], {}),
(bucket.delete, [2], {}),
(bucket.delete_by_name, ["f"], {}),
)
async def test_gridfsbucket_cursor(self):

View File

@ -32,7 +32,7 @@ from typing import List
from bson import encode
from bson.raw_bson import RawBSONDocument
from pymongo import WriteConcern
from pymongo import WriteConcern, _csot
from pymongo.asynchronous import client_session
from pymongo.asynchronous.client_session import TransactionOptions
from pymongo.asynchronous.command_cursor import AsyncCommandCursor
@ -295,6 +295,7 @@ class TestTransactions(AsyncTransactionsBase):
"new-name",
),
),
(bucket.delete_by_name, ("new-name",)),
]
async with client.start_session() as s, await s.start_transaction():

View File

@ -66,7 +66,7 @@ import pymongo
from bson import SON, json_util
from bson.codec_options import DEFAULT_CODEC_OPTIONS
from bson.objectid import ObjectId
from gridfs import AsyncGridFSBucket, GridOut
from gridfs import AsyncGridFSBucket, GridOut, NoFile
from pymongo import ASCENDING, AsyncMongoClient, CursorType, _csot
from pymongo.asynchronous.change_stream import AsyncChangeStream
from pymongo.asynchronous.client_session import AsyncClientSession, TransactionOptions, _TxnState
@ -632,7 +632,7 @@ class UnifiedSpecTestMixinV1(AsyncIntegrationTest):
# Connection errors are considered client errors.
if isinstance(error, ConnectionFailure):
self.assertNotIsInstance(error, NotPrimaryError)
elif isinstance(error, (InvalidOperation, ConfigurationError, EncryptionError)):
elif isinstance(error, (InvalidOperation, ConfigurationError, EncryptionError, NoFile)):
pass
else:
self.assertNotIsInstance(error, PyMongoError)

View File

@ -0,0 +1,230 @@
{
"description": "gridfs-deleteByName",
"schemaVersion": "1.0",
"createEntities": [
{
"client": {
"id": "client0"
}
},
{
"database": {
"id": "database0",
"client": "client0",
"databaseName": "gridfs-tests"
}
},
{
"bucket": {
"id": "bucket0",
"database": "database0"
}
},
{
"collection": {
"id": "bucket0_files_collection",
"database": "database0",
"collectionName": "fs.files"
}
},
{
"collection": {
"id": "bucket0_chunks_collection",
"database": "database0",
"collectionName": "fs.chunks"
}
}
],
"initialData": [
{
"collectionName": "fs.files",
"databaseName": "gridfs-tests",
"documents": [
{
"_id": {
"$oid": "000000000000000000000001"
},
"length": 0,
"chunkSize": 4,
"uploadDate": {
"$date": "1970-01-01T00:00:00.000Z"
},
"filename": "filename",
"metadata": {}
},
{
"_id": {
"$oid": "000000000000000000000002"
},
"length": 0,
"chunkSize": 4,
"uploadDate": {
"$date": "1970-01-01T00:00:00.000Z"
},
"filename": "filename",
"metadata": {}
},
{
"_id": {
"$oid": "000000000000000000000003"
},
"length": 2,
"chunkSize": 4,
"uploadDate": {
"$date": "1970-01-01T00:00:00.000Z"
},
"filename": "filename",
"metadata": {}
},
{
"_id": {
"$oid": "000000000000000000000004"
},
"length": 8,
"chunkSize": 4,
"uploadDate": {
"$date": "1970-01-01T00:00:00.000Z"
},
"filename": "otherfilename",
"metadata": {}
}
]
},
{
"collectionName": "fs.chunks",
"databaseName": "gridfs-tests",
"documents": [
{
"_id": {
"$oid": "000000000000000000000001"
},
"files_id": {
"$oid": "000000000000000000000002"
},
"n": 0,
"data": {
"$binary": {
"base64": "",
"subType": "00"
}
}
},
{
"_id": {
"$oid": "000000000000000000000002"
},
"files_id": {
"$oid": "000000000000000000000003"
},
"n": 0,
"data": {
"$binary": {
"base64": "",
"subType": "00"
}
}
},
{
"_id": {
"$oid": "000000000000000000000003"
},
"files_id": {
"$oid": "000000000000000000000003"
},
"n": 0,
"data": {
"$binary": {
"base64": "",
"subType": "00"
}
}
},
{
"_id": {
"$oid": "000000000000000000000004"
},
"files_id": {
"$oid": "000000000000000000000004"
},
"n": 0,
"data": {
"$binary": {
"base64": "",
"subType": "00"
}
}
}
]
}
],
"tests": [
{
"description": "delete when multiple revisions of the file exist",
"operations": [
{
"name": "deleteByName",
"object": "bucket0",
"arguments": {
"filename": "filename"
}
}
],
"outcome": [
{
"collectionName": "fs.files",
"databaseName": "gridfs-tests",
"documents": [
{
"_id": {
"$oid": "000000000000000000000004"
},
"length": 8,
"chunkSize": 4,
"uploadDate": {
"$date": "1970-01-01T00:00:00.000Z"
},
"filename": "otherfilename",
"metadata": {}
}
]
},
{
"collectionName": "fs.chunks",
"databaseName": "gridfs-tests",
"documents": [
{
"_id": {
"$oid": "000000000000000000000004"
},
"files_id": {
"$oid": "000000000000000000000004"
},
"n": 0,
"data": {
"$binary": {
"base64": "",
"subType": "00"
}
}
}
]
}
]
},
{
"description": "delete when file name does not exist",
"operations": [
{
"name": "deleteByName",
"object": "bucket0",
"arguments": {
"filename": "missing-file"
},
"expectError": {
"isClientError": true
}
}
]
}
]
}

View File

@ -115,6 +115,17 @@ class TestGridfs(IntegrationTest):
self.assertEqual(0, self.db.fs.files.count_documents({}))
self.assertEqual(0, self.db.fs.chunks.count_documents({}))
def test_delete_by_name(self):
self.assertEqual(0, self.db.fs.files.count_documents({}))
self.assertEqual(0, self.db.fs.chunks.count_documents({}))
gfs = gridfs.GridFSBucket(self.db)
gfs.upload_from_stream("test_filename", b"hello", chunk_size_bytes=1)
self.assertEqual(1, self.db.fs.files.count_documents({}))
self.assertEqual(5, self.db.fs.chunks.count_documents({}))
gfs.delete_by_name("test_filename")
self.assertEqual(0, self.db.fs.files.count_documents({}))
self.assertEqual(0, self.db.fs.chunks.count_documents({}))
def test_empty_file(self):
oid = self.fs.upload_from_stream("test_filename", b"")
self.assertEqual(b"", (self.fs.open_download_stream(oid)).read())

View File

@ -45,7 +45,7 @@ from test.utils_shared import (
from bson import DBRef
from gridfs.synchronous.grid_file import GridFS, GridFSBucket
from pymongo import ASCENDING, MongoClient, monitoring
from pymongo import ASCENDING, MongoClient, _csot, monitoring
from pymongo.common import _MAX_END_SESSIONS
from pymongo.errors import ConfigurationError, InvalidOperation, OperationFailure
from pymongo.operations import IndexModel, InsertOne, UpdateOne
@ -543,7 +543,7 @@ class TestSession(IntegrationTest):
(bucket.rename, [1, "f2"], {}),
# Delete both files so _test_ops can run these operations twice.
(bucket.delete, [1], {}),
(bucket.delete, [2], {}),
(bucket.delete_by_name, ["f"], {}),
)
def test_gridfsbucket_cursor(self):

View File

@ -32,7 +32,7 @@ from typing import List
from bson import encode
from bson.raw_bson import RawBSONDocument
from pymongo import WriteConcern
from pymongo import WriteConcern, _csot
from pymongo.errors import (
CollectionInvalid,
ConfigurationError,
@ -287,6 +287,7 @@ class TestTransactions(TransactionsBase):
"new-name",
),
),
(bucket.delete_by_name, ("new-name",)),
]
with client.start_session() as s, s.start_transaction():

View File

@ -65,7 +65,7 @@ import pymongo
from bson import SON, json_util
from bson.codec_options import DEFAULT_CODEC_OPTIONS
from bson.objectid import ObjectId
from gridfs import GridFSBucket, GridOut
from gridfs import GridFSBucket, GridOut, NoFile
from pymongo import ASCENDING, CursorType, MongoClient, _csot
from pymongo.encryption_options import _HAVE_PYMONGOCRYPT
from pymongo.errors import (
@ -631,7 +631,7 @@ class UnifiedSpecTestMixinV1(IntegrationTest):
# Connection errors are considered client errors.
if isinstance(error, ConnectionFailure):
self.assertNotIsInstance(error, NotPrimaryError)
elif isinstance(error, (InvalidOperation, ConfigurationError, EncryptionError)):
elif isinstance(error, (InvalidOperation, ConfigurationError, EncryptionError, NoFile)):
pass
else:
self.assertNotIsInstance(error, PyMongoError)