Skip to content

Commit c8f656c

Browse files
authored
Greatly improve Reddit subdomain handling (#422)
2 parents ddc6078 + 3699569 commit c8f656c

File tree

1 file changed

+35
-14
lines changed

1 file changed

+35
-14
lines changed

app/components/fixup_embeds.py

Lines changed: 35 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,42 @@
2020

2121
from app.bot import GhosttyBot
2222

23-
type SiteTransformation = tuple[re.Pattern[str], Callable[[re.Match[str]], str]]
23+
type SiteTransformation = tuple[re.Pattern[str], Callable[[re.Match[str]], str | None]]
24+
25+
26+
def _reddit_transformer(match: re.Match[str]) -> str | None:
27+
# Reddit supports `foo.reddit.com` as an alias for `reddit.com/r/foo`, but Rxddit
28+
# does not. However, Reddit also has a *bunch* of random subdomains. Rxddit handles
29+
# the skins (old.reddit.com and new.reddit.com) properly, so those are appended to
30+
# the URL. Apparently there's also a subdomain for every two-letter sequence, with
31+
# some being language codes and others being unused, which Rxddit doesn't handle, so
32+
# they're simply dropped by the regex below.
33+
34+
# Post links have either a subdomain (representing the subreddit) or a subreddit, so
35+
# ignore everything else.
36+
if bool(match["subdomain"]) == bool(match["subreddit"]):
37+
return None
38+
39+
skin = f"{s}." if (s := match["skin"]) else ""
40+
if subreddit := match["subreddit"]:
41+
# https://reddit.com/r///foo/comments/bar works apparently, but Rxddit doesn't
42+
# support it. Honestly don't blame them.
43+
subreddit = "r/" + subreddit.removeprefix("r").strip("/")
44+
else:
45+
# Append the subdomain as a subreddit if we don't already have one.
46+
subreddit = f"r/{match['subdomain']}"
47+
return f"https://{skin}rxddit.com/{subreddit}/{match['post']}"
48+
2449

2550
VALID_URI_CHARS = r"[A-Za-z0-9-._~:/?#\[\]@!$&'()*+,;%=]"
2651
EMBED_SITES: tuple[SiteTransformation, ...] = (
52+
(
53+
re.compile(
54+
r"https://(?:(?:www|(?P<skin>old|new)|\w\w|(?P<subdomain>[A-Za-z0-9_]+))\.)?reddit\.com/+"
55+
rf"(?P<subreddit>r/+[A-Za-z0-9_]+/+)?(?P<post>{VALID_URI_CHARS}+)"
56+
),
57+
_reddit_transformer,
58+
),
2759
(
2860
re.compile(
2961
r"https://(?:www\.)?(?P<site>x|twitter)\.com/"
@@ -39,17 +71,6 @@
3971
),
4072
lambda match: f"https://phixiv.net/{match[1]}",
4173
),
42-
(
43-
re.compile(
44-
r"https://(?:(?:www|(?P<subreddit>\w+))\.)?reddit\.com/"
45-
rf"(?P<post>{VALID_URI_CHARS}+)"
46-
),
47-
# Reddit supports `foo.reddit.com` as an alias for `reddit.com/r/foo`, but
48-
# Rxddit does not.
49-
lambda match: "https://rxddit.com/"
50-
+ (f"r/{subreddit}/" if (subreddit := match["subreddit"]) else "")
51-
+ match["post"],
52-
),
5374
)
5475
IGNORED_LINK = re.compile(rf"\<https://{VALID_URI_CHARS}+\>")
5576

@@ -68,12 +89,12 @@ def __init__(self, bot: GhosttyBot) -> None:
6889
FixUpActions.linker = self.linker
6990

7091
async def process(self, message: dc.Message) -> ProcessedMessage:
71-
matches: list[str] = []
92+
matches: list[str | None] = []
7293
message_content = IGNORED_LINK.sub("", message.content)
7394
for pattern, transformer in EMBED_SITES:
7495
matches.extend(map(transformer, pattern.finditer(message_content)))
7596

76-
links = list(dict.fromkeys(matches))
97+
links = list(filter(None, dict.fromkeys(matches)))
7798
omitted = False
7899
if len(links) > 5:
79100
omitted = True

0 commit comments

Comments
 (0)