[redbust] add support (#6759 #6918 #7043)

SpiffyChatterbox · mikf · web-flow · commit 48ac41605d5c · 2025-06-16T12:10:42.000+02:00
* init - Redbust.com Support
* Added Test
    Could use a second set of eyes on this
* update 'gallery' extractor
    - extract more metadata
    - simplify image extraction
    - support legacy galleries
* add tests
* update 'image' extractor
* add 'tag' extractor
* add 'archive' extractor
* restrict 'image' extractor pattern
* update docs/supportedsites
* replace quotes inside f-string

---------

Co-authored-by: Mike Fährmann &lt;mike_faehrmann@web.de&gt;
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
@@ -787,6 +787,12 @@ Consider all listed sites to potentially be NSFW.
     <td>Favorites, Pools, Posts, Tag Searches</td>
     <td></td>
 </tr>
+<tr>
+    <td>RedBust</td>
+    <td>https://redbust.com/</td>
+    <td>Archives, Galleries, Categories, individual Images, Tag Searches</td>
+    <td></td>
+</tr>
 <tr>
     <td>Reddit</td>
     <td>https://www.reddit.com/</td>
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
@@ -145,6 +145,7 @@
     "reactor",
     "readcomiconline",
     "realbooru",
+    "redbust",
     "reddit",
     "redgifs",
     "rule34us",
diff --git a/gallery_dl/extractor/redbust.py b/gallery_dl/extractor/redbust.py
@@ -0,0 +1,186 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://redbust.com/"""
+
+from .common import GalleryExtractor, Extractor, Message
+from .. import text
+
+BASE_PATTERN = r"(?:https?://)?redbust\.com"
+
+
+class RedbustExtractor(Extractor):
+    """Base class for RedBust extractors"""
+    category = "redbust"
+    root = "https://redbust.com"
+    filename_fmt = "{filename}.{extension}"
+
+    def items(self):
+        data = {"_extractor": RedbustGalleryExtractor}
+        for url in self.galleries():
+            yield Message.Queue, url, data
+
+    def _pagination(self, path, page=None):
+        if page is None:
+            url = f"{self.root}{path}/"
+            base = url + "page/"
+            page = self.request(url).text
+        else:
+            base = f"{self.root}{path}/page/"
+
+        pnum = 1
+        while True:
+            for post in text.extract_iter(
+                    page, '<h2 class="post-title">', "rel="):
+                yield text.extr(post, 'href="', '"')
+
+            pnum += 1
+            url = f"{base}{pnum}/"
+            if url not in page:
+                return
+            page = self.request(url).text
+
+
+class RedbustGalleryExtractor(GalleryExtractor, RedbustExtractor):
+    """Extractor for RedBust galleries"""
+    pattern = BASE_PATTERN + r"/([\w-]+)/?$"
+    example = "https://redbust.com/TITLE/"
+
+    def items(self):
+        url = f"{self.root}/{self.groups[0]}/"
+        self.page = page = self.request(url).text
+
+        self.gallery_id = gid = text.extr(
+            page, "<link rel='shortlink' href='https://redbust.com/?p=", "'")
+
+        if gid:
+            self.gallery_url = False
+            return GalleryExtractor.items(self)
+        else:
+            self.subcategory = "category"
+            return self._items_category(page)
+
+    def _items_category(self, _):
+        page = self.page
+        data = {"_extractor": RedbustGalleryExtractor}
+        base = f"{self.root}/{self.groups[0]}/page/"
+        pnum = 1
+
+        while True:
+            for post in text.extract_iter(
+                    page, '<h2 class="post-title">', "rel="):
+                url = text.extr(post, 'href="', '"')
+                yield Message.Queue, url, data
+
+            pnum += 1
+            url = f"{base}{pnum}/"
+            if url not in page:
+                return
+            page = self.request(url).text
+
+    def metadata(self, _):
+        extr = text.extract_from(self.page)
+
+        return {
+            "gallery_id"  : self.gallery_id,
+            "gallery_slug": self.groups[0],
+            "categories"  : text.split_html(extr(
+                '<li class="category">', "</li>"))[::2],
+            "title"       : text.unescape(extr('class="post-title">', "<")),
+            "date"        : text.parse_datetime(
+                extr('class="post-byline">', "<").strip(), "%B %d, %Y"),
+            "views"       : text.parse_int(extr("</b>", "v").replace(",", "")),
+            "tags"        : text.split_html(extr(
+                'class="post-tags">', "</p"))[1:],
+        }
+
+    def images(self, _):
+        results = []
+
+        for img in text.extract_iter(self.page, "'><img ", ">"):
+            if src := text.extr(img, 'src="', '"'):
+                path, _, end = src.rpartition("-")
+                if "x" in end:
+                    url = f"{path}.{end.rpartition('.')[2]}"
+                    data = None if src == url else {"_fallback": (src,)}
+                else:
+                    url = src
+                    data = None
+                results.append((url, data))
+
+        if not results:
+            # fallback for older galleries
+            for path in text.extract_iter(
+                    self.page, '<img src="/wp-content/uploads/', '"'):
+                results.append(
+                    (f"{self.root}/wp-content/uploads/{path}", None))
+
+        return results
+
+
+class RedbustTagExtractor(RedbustExtractor):
+    """Extractor for RedBust tag searches"""
+    subcategory = "tag"
+    pattern = BASE_PATTERN + r"/tag/([\w-]+)"
+    example = "https://redbust.com/tag/TAG/"
+
+    def galleries(self):
+        return self._pagination("/tag/" + self.groups[0])
+
+
+class RedbustArchiveExtractor(RedbustExtractor):
+    """Extractor for RedBust monthly archive collections"""
+    subcategory = "archive"
+    pattern = BASE_PATTERN + r"(/\d{4}/\d{2})"
+    example = "https://redbust.com/2010/01/"
+
+    def galleries(self):
+        return self._pagination(self.groups[0])
+
+
+class RedbustImageExtractor(RedbustExtractor):
+    """Extractor for RedBust images"""
+    subcategory = "image"
+    directory_fmt = ("{category}", "{title}")
+    pattern = BASE_PATTERN + r"/(?!tag/|\d{4}/)([\w-]+)/([\w-]+)/?$"
+    example = "https://redbust.com/TITLE/SLUG/"
+
+    def items(self):
+        gallery_slug, image_slug = self.groups
+        url = f"{self.root}/{gallery_slug}/{image_slug}/"
+        page = self.request(url).text
+
+        img_url = None
+
+        # Look for the largest image in srcset first
+        if srcset := text.extr(page, 'srcset="', '"'):
+            # Extract the largest image from srcset (typically last one)
+            urls = srcset.split(", ")
+            img_url = urls[-1].partition(" ")[0] if urls else None
+
+        # Fallback to original extraction method
+        if not img_url:
+            if entry := text.extr(page, "entry-inner ", "alt="):
+                img_url = text.extr(entry, "img src=", " ").strip("\"'")
+
+        if not img_url:
+            return
+
+        end = img_url.rpartition("-")[2]
+        data = text.nameext_from_url(img_url, {
+            "title"       : text.unescape(text.extr(
+                page, 'title="Return to ', '"')),
+            "image_id"    : text.extr(
+                page, "rel='shortlink' href='https://redbust.com/?p=", "'"),
+            "gallery_slug": gallery_slug,
+            "image_slug"  : image_slug,
+            "num"         : text.parse_int(end.partition(".")[0]),
+            "count"       : 1,
+            "url"         : img_url,
+        })
+
+        yield Message.Directory, data
+        yield Message.Url, img_url, data
diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py
@@ -122,6 +122,7 @@
     "pornpics"       : "PornPics.com",
     "pornreactor"    : "PornReactor",
     "readcomiconline": "Read Comic Online",
+    "redbust"        : "RedBust",
     "rbt"            : "RebeccaBlackTech",
     "redgifs"        : "RedGIFs",
     "rozenarcana"    : "Rozen Arcana",
@@ -334,6 +335,9 @@
         "post"           : "Individual Posts",
         "shorturl"       : "",
     },
+    "redbust": {
+        "gallery": ("Galleries", "Categories"),
+    },
     "redgifs": {
         "collections": "",
     },
diff --git a/test/results/redbust.py b/test/results/redbust.py
@@ -0,0 +1,100 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+from gallery_dl.extractor import redbust
+
+
+__tests__ = (
+{
+    "#url"     : "https://redbust.com/sadie-gray-wearing-black-nighty/",
+    "#class"   : redbust.RedbustGalleryExtractor,
+    "#pattern" : r"https://redbust.com/stuff/sadie-gray-wearing-black-nighty/sadie-gray-morning-ritual-bathroom-naked-boobs-nighty-playboy-\d+\.jpg",
+    "#count"   : 35,
+
+    "count"       : 35,
+    "num"         : range(1, 35),
+    "date"        : "dt:2024-03-19 00:00:00",
+    "extension"   : "jpg",
+    "filename"    : str,
+    "gallery_id"  : "373920",
+    "gallery_slug": "sadie-gray-wearing-black-nighty",
+    "title"       : "Sadie Gray wearing black nighty",
+    "views"       : range(38_000, 50_000),
+    "categories"  : [
+        "Super Chicks",
+    ],
+    "tags"        : [
+        "bathroom",
+        "nighty",
+        "playboy",
+        "redhead",
+        "sadie gray",
+    ],
+},
+
+{
+    "#url"     : "https://redbust.com/girls-in-carwash/",
+    "#comment" : "legacy",
+    "#class"   : redbust.RedbustGalleryExtractor,
+    "#pattern" : r"https://redbust.com/wp-content/uploads/girls_in_carwash/girls_in_carwash_\d+\.jpg",
+    "#count"   : 27,
+
+    "count"       : 27,
+    "num"         : range(1, 27),
+    "date"        : "dt:2010-09-05 00:00:00",
+    "extension"   : "jpg",
+    "filename"    : str,
+    "gallery_id"  : "6517",
+    "gallery_slug": "girls-in-carwash",
+    "title"       : "Girls in carwash",
+    "views"       : range(9_000, 20_000),
+    "categories"  : [
+        "Automotive",
+    ],
+    "tags"        : [
+        "carwash",
+    ],
+},
+
+{
+    "#url"     : "https://redbust.com/other/",
+    "#comment" : "'category' listing",
+    "#class"   : redbust.RedbustGalleryExtractor,
+    "#pattern" : redbust.RedbustGalleryExtractor.pattern,
+    "#count"   : 28,
+},
+
+{
+    "#url"     : "https://redbust.com/tag/tan-lines/",
+    "#class"   : redbust.RedbustTagExtractor,
+    "#pattern" : redbust.RedbustGalleryExtractor.pattern,
+    "#count"   : range(70, 100),
+},
+
+{
+    "#url"     : "https://redbust.com/2021/04/",
+    "#class"   : redbust.RedbustArchiveExtractor,
+    "#pattern" : redbust.RedbustGalleryExtractor.pattern,
+    "#count"   : 25,
+},
+
+{
+    "#url"     : "https://redbust.com/sadie-gray-wearing-black-nighty/sadie-gray-morning-ritual-bathroom-naked-boobs-nighty-playboy-04/",
+    "#class"   : redbust.RedbustImageExtractor,
+    "#results" : "https://redbust.com/stuff/sadie-gray-wearing-black-nighty/sadie-gray-morning-ritual-bathroom-naked-boobs-nighty-playboy-04.jpg",
+
+    "count"       : 1,
+    "num"         : 4,
+    "extension"   : "jpg",
+    "filename"    : "sadie-gray-morning-ritual-bathroom-naked-boobs-nighty-playboy-04",
+    "gallery_slug": "sadie-gray-wearing-black-nighty",
+    "image_id"    : "373925",
+    "image_slug"  : "sadie-gray-morning-ritual-bathroom-naked-boobs-nighty-playboy-04",
+    "title"       : "Sadie Gray wearing black nighty",
+    "url"         : "https://redbust.com/stuff/sadie-gray-wearing-black-nighty/sadie-gray-morning-ritual-bathroom-naked-boobs-nighty-playboy-04.jpg",
+},
+
+)