Skip to content

Commit 48ac416

Browse files
[redbust] add support (#6759 #6918 #7043)
* init - Redbust.com Support * Added Test Could use a second set of eyes on this * update 'gallery' extractor - extract more metadata - simplify image extraction - support legacy galleries * add tests * update 'image' extractor * add 'tag' extractor * add 'archive' extractor * restrict 'image' extractor pattern * update docs/supportedsites * replace quotes inside f-string --------- Co-authored-by: Mike Fährmann <[email protected]>
1 parent 6227b64 commit 48ac416

5 files changed

Lines changed: 297 additions & 0 deletions

File tree

docs/supportedsites.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -787,6 +787,12 @@ Consider all listed sites to potentially be NSFW.
787787
<td>Favorites, Pools, Posts, Tag Searches</td>
788788
<td></td>
789789
</tr>
790+
<tr>
791+
<td>RedBust</td>
792+
<td>https://redbust.com/</td>
793+
<td>Archives, Galleries, Categories, individual Images, Tag Searches</td>
794+
<td></td>
795+
</tr>
790796
<tr>
791797
<td>Reddit</td>
792798
<td>https://www.reddit.com/</td>

gallery_dl/extractor/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,7 @@
145145
"reactor",
146146
"readcomiconline",
147147
"realbooru",
148+
"redbust",
148149
"reddit",
149150
"redgifs",
150151
"rule34us",

gallery_dl/extractor/redbust.py

Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
# -*- coding: utf-8 -*-
2+
3+
# This program is free software; you can redistribute it and/or modify
4+
# it under the terms of the GNU General Public License version 2 as
5+
# published by the Free Software Foundation.
6+
7+
"""Extractors for https://redbust.com/"""
8+
9+
from .common import GalleryExtractor, Extractor, Message
10+
from .. import text
11+
12+
BASE_PATTERN = r"(?:https?://)?redbust\.com"
13+
14+
15+
class RedbustExtractor(Extractor):
16+
"""Base class for RedBust extractors"""
17+
category = "redbust"
18+
root = "https://redbust.com"
19+
filename_fmt = "{filename}.{extension}"
20+
21+
def items(self):
22+
data = {"_extractor": RedbustGalleryExtractor}
23+
for url in self.galleries():
24+
yield Message.Queue, url, data
25+
26+
def _pagination(self, path, page=None):
27+
if page is None:
28+
url = f"{self.root}{path}/"
29+
base = url + "page/"
30+
page = self.request(url).text
31+
else:
32+
base = f"{self.root}{path}/page/"
33+
34+
pnum = 1
35+
while True:
36+
for post in text.extract_iter(
37+
page, '<h2 class="post-title">', "rel="):
38+
yield text.extr(post, 'href="', '"')
39+
40+
pnum += 1
41+
url = f"{base}{pnum}/"
42+
if url not in page:
43+
return
44+
page = self.request(url).text
45+
46+
47+
class RedbustGalleryExtractor(GalleryExtractor, RedbustExtractor):
48+
"""Extractor for RedBust galleries"""
49+
pattern = BASE_PATTERN + r"/([\w-]+)/?$"
50+
example = "https://redbust.com/TITLE/"
51+
52+
def items(self):
53+
url = f"{self.root}/{self.groups[0]}/"
54+
self.page = page = self.request(url).text
55+
56+
self.gallery_id = gid = text.extr(
57+
page, "<link rel='shortlink' href='https://redbust.com/?p=", "'")
58+
59+
if gid:
60+
self.gallery_url = False
61+
return GalleryExtractor.items(self)
62+
else:
63+
self.subcategory = "category"
64+
return self._items_category(page)
65+
66+
def _items_category(self, _):
67+
page = self.page
68+
data = {"_extractor": RedbustGalleryExtractor}
69+
base = f"{self.root}/{self.groups[0]}/page/"
70+
pnum = 1
71+
72+
while True:
73+
for post in text.extract_iter(
74+
page, '<h2 class="post-title">', "rel="):
75+
url = text.extr(post, 'href="', '"')
76+
yield Message.Queue, url, data
77+
78+
pnum += 1
79+
url = f"{base}{pnum}/"
80+
if url not in page:
81+
return
82+
page = self.request(url).text
83+
84+
def metadata(self, _):
85+
extr = text.extract_from(self.page)
86+
87+
return {
88+
"gallery_id" : self.gallery_id,
89+
"gallery_slug": self.groups[0],
90+
"categories" : text.split_html(extr(
91+
'<li class="category">', "</li>"))[::2],
92+
"title" : text.unescape(extr('class="post-title">', "<")),
93+
"date" : text.parse_datetime(
94+
extr('class="post-byline">', "<").strip(), "%B %d, %Y"),
95+
"views" : text.parse_int(extr("</b>", "v").replace(",", "")),
96+
"tags" : text.split_html(extr(
97+
'class="post-tags">', "</p"))[1:],
98+
}
99+
100+
def images(self, _):
101+
results = []
102+
103+
for img in text.extract_iter(self.page, "'><img ", ">"):
104+
if src := text.extr(img, 'src="', '"'):
105+
path, _, end = src.rpartition("-")
106+
if "x" in end:
107+
url = f"{path}.{end.rpartition('.')[2]}"
108+
data = None if src == url else {"_fallback": (src,)}
109+
else:
110+
url = src
111+
data = None
112+
results.append((url, data))
113+
114+
if not results:
115+
# fallback for older galleries
116+
for path in text.extract_iter(
117+
self.page, '<img src="/wp-content/uploads/', '"'):
118+
results.append(
119+
(f"{self.root}/wp-content/uploads/{path}", None))
120+
121+
return results
122+
123+
124+
class RedbustTagExtractor(RedbustExtractor):
125+
"""Extractor for RedBust tag searches"""
126+
subcategory = "tag"
127+
pattern = BASE_PATTERN + r"/tag/([\w-]+)"
128+
example = "https://redbust.com/tag/TAG/"
129+
130+
def galleries(self):
131+
return self._pagination("/tag/" + self.groups[0])
132+
133+
134+
class RedbustArchiveExtractor(RedbustExtractor):
135+
"""Extractor for RedBust monthly archive collections"""
136+
subcategory = "archive"
137+
pattern = BASE_PATTERN + r"(/\d{4}/\d{2})"
138+
example = "https://redbust.com/2010/01/"
139+
140+
def galleries(self):
141+
return self._pagination(self.groups[0])
142+
143+
144+
class RedbustImageExtractor(RedbustExtractor):
145+
"""Extractor for RedBust images"""
146+
subcategory = "image"
147+
directory_fmt = ("{category}", "{title}")
148+
pattern = BASE_PATTERN + r"/(?!tag/|\d{4}/)([\w-]+)/([\w-]+)/?$"
149+
example = "https://redbust.com/TITLE/SLUG/"
150+
151+
def items(self):
152+
gallery_slug, image_slug = self.groups
153+
url = f"{self.root}/{gallery_slug}/{image_slug}/"
154+
page = self.request(url).text
155+
156+
img_url = None
157+
158+
# Look for the largest image in srcset first
159+
if srcset := text.extr(page, 'srcset="', '"'):
160+
# Extract the largest image from srcset (typically last one)
161+
urls = srcset.split(", ")
162+
img_url = urls[-1].partition(" ")[0] if urls else None
163+
164+
# Fallback to original extraction method
165+
if not img_url:
166+
if entry := text.extr(page, "entry-inner ", "alt="):
167+
img_url = text.extr(entry, "img src=", " ").strip("\"'")
168+
169+
if not img_url:
170+
return
171+
172+
end = img_url.rpartition("-")[2]
173+
data = text.nameext_from_url(img_url, {
174+
"title" : text.unescape(text.extr(
175+
page, 'title="Return to ', '"')),
176+
"image_id" : text.extr(
177+
page, "rel='shortlink' href='https://redbust.com/?p=", "'"),
178+
"gallery_slug": gallery_slug,
179+
"image_slug" : image_slug,
180+
"num" : text.parse_int(end.partition(".")[0]),
181+
"count" : 1,
182+
"url" : img_url,
183+
})
184+
185+
yield Message.Directory, data
186+
yield Message.Url, img_url, data

scripts/supportedsites.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@
122122
"pornpics" : "PornPics.com",
123123
"pornreactor" : "PornReactor",
124124
"readcomiconline": "Read Comic Online",
125+
"redbust" : "RedBust",
125126
"rbt" : "RebeccaBlackTech",
126127
"redgifs" : "RedGIFs",
127128
"rozenarcana" : "Rozen Arcana",
@@ -334,6 +335,9 @@
334335
"post" : "Individual Posts",
335336
"shorturl" : "",
336337
},
338+
"redbust": {
339+
"gallery": ("Galleries", "Categories"),
340+
},
337341
"redgifs": {
338342
"collections": "",
339343
},

test/results/redbust.py

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
# -*- coding: utf-8 -*-
2+
3+
# This program is free software; you can redistribute it and/or modify
4+
# it under the terms of the GNU General Public License version 2 as
5+
# published by the Free Software Foundation.
6+
7+
from gallery_dl.extractor import redbust
8+
9+
10+
__tests__ = (
11+
{
12+
"#url" : "https://redbust.com/sadie-gray-wearing-black-nighty/",
13+
"#class" : redbust.RedbustGalleryExtractor,
14+
"#pattern" : r"https://redbust.com/stuff/sadie-gray-wearing-black-nighty/sadie-gray-morning-ritual-bathroom-naked-boobs-nighty-playboy-\d+\.jpg",
15+
"#count" : 35,
16+
17+
"count" : 35,
18+
"num" : range(1, 35),
19+
"date" : "dt:2024-03-19 00:00:00",
20+
"extension" : "jpg",
21+
"filename" : str,
22+
"gallery_id" : "373920",
23+
"gallery_slug": "sadie-gray-wearing-black-nighty",
24+
"title" : "Sadie Gray wearing black nighty",
25+
"views" : range(38_000, 50_000),
26+
"categories" : [
27+
"Super Chicks",
28+
],
29+
"tags" : [
30+
"bathroom",
31+
"nighty",
32+
"playboy",
33+
"redhead",
34+
"sadie gray",
35+
],
36+
},
37+
38+
{
39+
"#url" : "https://redbust.com/girls-in-carwash/",
40+
"#comment" : "legacy",
41+
"#class" : redbust.RedbustGalleryExtractor,
42+
"#pattern" : r"https://redbust.com/wp-content/uploads/girls_in_carwash/girls_in_carwash_\d+\.jpg",
43+
"#count" : 27,
44+
45+
"count" : 27,
46+
"num" : range(1, 27),
47+
"date" : "dt:2010-09-05 00:00:00",
48+
"extension" : "jpg",
49+
"filename" : str,
50+
"gallery_id" : "6517",
51+
"gallery_slug": "girls-in-carwash",
52+
"title" : "Girls in carwash",
53+
"views" : range(9_000, 20_000),
54+
"categories" : [
55+
"Automotive",
56+
],
57+
"tags" : [
58+
"carwash",
59+
],
60+
},
61+
62+
{
63+
"#url" : "https://redbust.com/other/",
64+
"#comment" : "'category' listing",
65+
"#class" : redbust.RedbustGalleryExtractor,
66+
"#pattern" : redbust.RedbustGalleryExtractor.pattern,
67+
"#count" : 28,
68+
},
69+
70+
{
71+
"#url" : "https://redbust.com/tag/tan-lines/",
72+
"#class" : redbust.RedbustTagExtractor,
73+
"#pattern" : redbust.RedbustGalleryExtractor.pattern,
74+
"#count" : range(70, 100),
75+
},
76+
77+
{
78+
"#url" : "https://redbust.com/2021/04/",
79+
"#class" : redbust.RedbustArchiveExtractor,
80+
"#pattern" : redbust.RedbustGalleryExtractor.pattern,
81+
"#count" : 25,
82+
},
83+
84+
{
85+
"#url" : "https://redbust.com/sadie-gray-wearing-black-nighty/sadie-gray-morning-ritual-bathroom-naked-boobs-nighty-playboy-04/",
86+
"#class" : redbust.RedbustImageExtractor,
87+
"#results" : "https://redbust.com/stuff/sadie-gray-wearing-black-nighty/sadie-gray-morning-ritual-bathroom-naked-boobs-nighty-playboy-04.jpg",
88+
89+
"count" : 1,
90+
"num" : 4,
91+
"extension" : "jpg",
92+
"filename" : "sadie-gray-morning-ritual-bathroom-naked-boobs-nighty-playboy-04",
93+
"gallery_slug": "sadie-gray-wearing-black-nighty",
94+
"image_id" : "373925",
95+
"image_slug" : "sadie-gray-morning-ritual-bathroom-naked-boobs-nighty-playboy-04",
96+
"title" : "Sadie Gray wearing black nighty",
97+
"url" : "https://redbust.com/stuff/sadie-gray-wearing-black-nighty/sadie-gray-morning-ritual-bathroom-naked-boobs-nighty-playboy-04.jpg",
98+
},
99+
100+
)

0 commit comments

Comments
 (0)