Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
2e78aa8
Implement synapse issue #16751: Treat local_media_directory as option…
drallgood Nov 19, 2025
7cc0aeb
Implement synapse issue #16751: Treat local_media_directory as option…
drallgood Nov 19, 2025
8eed314
Revert lint.sh formatting changes
drallgood Nov 19, 2025
b6ffef6
Fix mypy error for local_media_directory type annotation
drallgood Nov 20, 2025
ccc047d
Merge branch 'develop' into implement-issue-16751-local-media-provider
drallgood Nov 27, 2025
5d272af
Refactor MediaStorage to accept local_provider as optional argument
drallgood Dec 3, 2025
646a051
Convert ensure_media_is_in_local_cache to async context manager
drallgood Dec 3, 2025
94c4358
Document enable_local_media_storage config option
drallgood Dec 3, 2025
f63c91a
Merge branch 'develop' into implement-issue-16751-local-media-provider
drallgood Dec 3, 2025
99eb251
Fix lint formatting in media_repository
drallgood Dec 4, 2025
af958d3
Merge branch 'develop' into implement-issue-16751-local-media-provider
drallgood Dec 7, 2025
316e514
Fix race condition in concurrent remote media downloads
drallgood Jan 2, 2026
c957ae9
Merge branch 'develop' into implement-issue-16751-local-media-provider
drallgood Jan 2, 2026
635c4cc
Update tests/replication/test_multi_media_repo.py
drallgood Jan 5, 2026
8992d11
Apply suggestions from code review
drallgood Jan 5, 2026
84bb64f
Add documentation warning for enable_local_media_storage configuration
drallgood Jan 5, 2026
fedda30
Merge branch 'develop' into implement-issue-16751-local-media-provider
drallgood Jan 5, 2026
96b6703
Add warning log when no media storage backends are configured
drallgood Jan 5, 2026
9459809
Regenerate config documentation
anoadragon453 Jan 6, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/19204.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Made the local media directory optional by treating it as a storage provider. This allows off-site media storage without local cache, where media is stored directly to remote providers only, with temporary files used for thumbnail generation when needed. Contributed by Patrice Brend'amour @dr.allgood.
18 changes: 16 additions & 2 deletions synapse/media/media_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,10 @@
SHA256TransparentIOReader,
SHA256TransparentIOWriter,
)
from synapse.media.storage_provider import StorageProviderWrapper
from synapse.media.storage_provider import (
FileStorageProviderBackend,
StorageProviderWrapper,
)
from synapse.media.thumbnailer import Thumbnailer, ThumbnailError
from synapse.media.url_previewer import UrlPreviewer
from synapse.storage.databases.main.media_repository import LocalMedia, RemoteMedia
Expand Down Expand Up @@ -141,8 +144,19 @@ def __init__(self, hs: "HomeServer"):
)
storage_providers.append(provider)

# If we have a local media directory, add it as a storage provider
if self.primary_base_path:
backend = FileStorageProviderBackend(hs, self.primary_base_path)
local_wrapper = StorageProviderWrapper(
backend,
store_local=True,
store_remote=False,
store_synchronous=True,
)
storage_providers.insert(0, local_wrapper)

self.media_storage: MediaStorage = MediaStorage(
self.hs, self.primary_base_path, self.filepaths, storage_providers
self.hs, self.filepaths, storage_providers
)

self.clock.looping_call(
Expand Down
219 changes: 140 additions & 79 deletions synapse/media/media_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import logging
import os
import shutil
import tempfile
from contextlib import closing
from io import BytesIO
from types import TracebackType
Expand All @@ -49,16 +50,16 @@
from synapse.api.errors import NotFoundError
from synapse.logging.context import defer_to_thread, run_in_background
from synapse.logging.opentracing import start_active_span, trace, trace_with_opname
from synapse.media._base import ThreadedFileSender
from synapse.media.storage_provider import FileStorageProviderBackend
from synapse.util.clock import Clock
from synapse.util.file_consumer import BackgroundFileConsumer

from ..types import JsonDict
from ._base import FileInfo, Responder
from ._base import FileInfo, Responder, ThreadedFileSender
from .filepath import MediaFilePaths

if TYPE_CHECKING:
from synapse.media.storage_provider import StorageProvider
from synapse.media.storage_provider import StorageProviderWrapper
from synapse.server import HomeServer

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -149,27 +150,31 @@ def __getattr__(self, attr_name: str) -> Any:


class MediaStorage:
"""Responsible for storing/fetching files from local sources.
"""Responsible for storing/fetching files from storage providers.

Args:
hs
local_media_directory: Base path where we store media on disk
filepaths
storage_providers: List of StorageProvider that are used to fetch and store files.
"""

def __init__(
self,
hs: "HomeServer",
local_media_directory: str,
filepaths: MediaFilePaths,
storage_providers: Sequence["StorageProvider"],
storage_providers: Sequence["StorageProviderWrapper"],
):
self.hs = hs
self.reactor = hs.get_reactor()
self.local_media_directory = local_media_directory
self.filepaths = filepaths
self.storage_providers = storage_providers
self.storage_providers = list(storage_providers)
self.local_provider = None
self.local_media_directory = None
for provider in self.storage_providers:
if isinstance(provider.backend, FileStorageProviderBackend):
self.local_provider = provider
self.local_media_directory = provider.backend.base_directory
break
self._spam_checker_module_callbacks = hs.get_module_api_callbacks().spam_checker
self.clock = hs.get_clock()

Expand Down Expand Up @@ -221,53 +226,94 @@ async def store_into_file(
"""

path = self._file_info_to_path(file_info)
fname = os.path.join(self.local_media_directory, path)

dirname = os.path.dirname(fname)
os.makedirs(dirname, exist_ok=True)

try:
with start_active_span("writing to main media repo"):
with open(fname, "wb") as f:
yield f, fname
if self.local_provider:
fname = os.path.join(self.local_media_directory, path) # type: ignore[arg-type]
dirname = os.path.dirname(fname)
os.makedirs(dirname, exist_ok=True)

with start_active_span("writing to other storage providers"):
spam_check = (
await self._spam_checker_module_callbacks.check_media_file_for_spam(
try:
with start_active_span("writing to main media repo"):
with open(fname, "wb") as f:
yield f, fname

with start_active_span(
"spam checking and writing to other storage providers"
):
spam_check = await self._spam_checker_module_callbacks.check_media_file_for_spam(
ReadableFileWrapper(self.clock, fname), file_info
)
)
if spam_check != self._spam_checker_module_callbacks.NOT_SPAM:
logger.info("Blocking media due to spam checker")
# Note that we'll delete the stored media, due to the
# try/except below. The media also won't be stored in
# the DB.
# We currently ignore any additional field returned by
# the spam-check API.
raise SpamMediaException(errcode=spam_check[0])

for provider in self.storage_providers:
with start_active_span(str(provider)):
await provider.store_file(path, file_info)

except Exception as e:
if spam_check != self._spam_checker_module_callbacks.NOT_SPAM:
logger.info("Blocking media due to spam checker")
# Note that we'll delete the stored media, due to the
# try/except below. The media also won't be stored in
# the DB.
# We currently ignore any additional field returned by
# the spam-check API.
raise SpamMediaException(errcode=spam_check[0])

for provider in self.storage_providers:
if provider is not self.local_provider:
with start_active_span(str(provider)):
await provider.store_file(path, file_info)

except Exception as e:
try:
os.remove(fname)
except Exception:
pass

raise e from None
else:
# No local provider, write to temp file
with tempfile.NamedTemporaryFile(delete=False) as f:
fname = f.name
yield cast(BinaryIO, f), fname

try:
os.remove(fname)
except Exception:
pass
with start_active_span(
"spam checking and writing to storage providers"
):
spam_check = await self._spam_checker_module_callbacks.check_media_file_for_spam(
ReadableFileWrapper(self.clock, fname), file_info
)
if spam_check != self._spam_checker_module_callbacks.NOT_SPAM:
logger.info("Blocking media due to spam checker")
raise SpamMediaException(errcode=spam_check[0])

for provider in self.storage_providers:
with start_active_span(str(provider)):
await provider.store_file(path, file_info)

raise e from None
except Exception as e:
try:
os.remove(fname)
except Exception:
pass

raise e from None

async def fetch_media(self, file_info: FileInfo) -> Responder | None:
"""Attempts to fetch media described by file_info from the local cache
and configured storage providers.
"""Attempts to fetch media described by file_info from the configured storage providers.

Args:
file_info: Metadata about the media file

Returns:
Returns a Responder if the file was found, otherwise None.
"""
# URL cache files are stored locally and should not go through storage providers
if file_info.url_cache:
path = self._file_info_to_path(file_info)
if self.local_provider:
local_path = os.path.join(self.local_media_directory, path) # type: ignore[arg-type]
if os.path.isfile(local_path):
# Import here to avoid circular import
from .media_storage import FileResponder

return FileResponder(self.hs, open(local_path, "rb"))
return None

paths = [self._file_info_to_path(file_info)]

# fallback for remote thumbnails with no method in the filename
Expand All @@ -282,13 +328,6 @@ async def fetch_media(self, file_info: FileInfo) -> Responder | None:
)
)

for path in paths:
local_path = os.path.join(self.local_media_directory, path)
if os.path.exists(local_path):
logger.debug("responding with local file %s", local_path)
return FileResponder(self.hs, open(local_path, "rb"))
logger.debug("local file %s did not exist", local_path)

for provider in self.storage_providers:
for path in paths:
res: Any = await provider.fetch(path, file_info)
Expand All @@ -311,39 +350,61 @@ async def ensure_media_is_in_local_cache(self, file_info: FileInfo) -> str:
Full path to local file
"""
path = self._file_info_to_path(file_info)
local_path = os.path.join(self.local_media_directory, path)
if os.path.exists(local_path):
return local_path

# Fallback for paths without method names
# Should be removed in the future
if file_info.thumbnail and file_info.server_name:
legacy_path = self.filepaths.remote_media_thumbnail_rel_legacy(
server_name=file_info.server_name,
file_id=file_info.file_id,
width=file_info.thumbnail.width,
height=file_info.thumbnail.height,
content_type=file_info.thumbnail.type,
)
legacy_local_path = os.path.join(self.local_media_directory, legacy_path)
if os.path.exists(legacy_local_path):
return legacy_local_path

dirname = os.path.dirname(local_path)
os.makedirs(dirname, exist_ok=True)

for provider in self.storage_providers:
res: Any = await provider.fetch(path, file_info)
if res:
with res:
consumer = BackgroundFileConsumer(
open(local_path, "wb"), self.reactor
)
await res.write_to_consumer(consumer)
await consumer.wait()
if self.local_provider:
local_path = os.path.join(self.local_media_directory, path) # type: ignore[arg-type]
if os.path.exists(local_path):
return local_path

raise NotFoundError()
# Fallback for paths without method names
# Should be removed in the future
if file_info.thumbnail and file_info.server_name:
legacy_path = self.filepaths.remote_media_thumbnail_rel_legacy(
server_name=file_info.server_name,
file_id=file_info.file_id,
width=file_info.thumbnail.width,
height=file_info.thumbnail.height,
content_type=file_info.thumbnail.type,
)
legacy_local_path = os.path.join(
self.local_media_directory, # type: ignore[arg-type]
legacy_path,
)
if os.path.exists(legacy_local_path):
return legacy_local_path

dirname = os.path.dirname(local_path)
os.makedirs(dirname, exist_ok=True)

for provider in self.storage_providers:
if provider is self.local_provider:
continue
remote_res: Any = await provider.fetch(path, file_info)
if remote_res:
with remote_res:
consumer = BackgroundFileConsumer(
open(local_path, "wb"), self.reactor
)
await remote_res.write_to_consumer(consumer)
await consumer.wait()
return local_path

raise NotFoundError()
else:
# No local provider, download to temp
for provider in self.storage_providers:
res: Any = await provider.fetch(path, file_info)
if res:
temp_dir = tempfile.gettempdir()
temp_path = os.path.join(temp_dir, os.path.basename(path))
with res:
consumer = BackgroundFileConsumer(
open(temp_path, "wb"), self.reactor
)
await res.write_to_consumer(consumer)
await consumer.wait()
return temp_path

raise NotFoundError()

@trace
def _file_info_to_path(self, file_info: FileInfo) -> str:
Expand Down
4 changes: 3 additions & 1 deletion synapse/media/storage_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
from synapse.util.async_helpers import maybe_awaitable

from ._base import FileInfo, Responder
from .media_storage import FileResponder

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -178,6 +177,9 @@ async def fetch(self, path: str, file_info: FileInfo) -> Responder | None:

backup_fname = os.path.join(self.base_directory, path)
if os.path.isfile(backup_fname):
# Import here to avoid circular import
from .media_storage import FileResponder

return FileResponder(self.hs, open(backup_fname, "rb"))

return None
Expand Down
Loading
Loading