Skip to content

Commit aa682d7

Browse files
authored
fix: medium bug (#129)
* fix: improved medium parser for images and multi-section content * fix: duplicate video
1 parent 4e049de commit aa682d7

3 files changed

Lines changed: 36 additions & 2 deletions

File tree

fixtures/medium.com/1485902752952.html

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/extractors/custom/medium.com/index.js

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ export const MediumExtractor = {
1919

2020
content: {
2121
selectors: [
22+
['.section-content'],
2223
'.section-content',
2324
'article > div > section',
2425
],
@@ -36,10 +37,20 @@ export const MediumExtractor = {
3637
const [_, youtubeId] = thumb.match(ytRe) // eslint-disable-line
3738
$node.attr('src', `https://www.youtube.com/embed/${youtubeId}`);
3839
const $parent = $node.parents('figure');
39-
$parent.prepend($node.clone());
40-
$node.remove();
40+
const $caption = $parent.find('figcaption');
41+
$parent.empty().append([$node, $caption]);
4142
}
4243
},
44+
45+
// rewrite figures to pull out image and caption, remove rest
46+
figure: ($node) => {
47+
// ignore if figure has an iframe
48+
if ($node.find('iframe').length > 0) return;
49+
50+
const $img = $node.find('img').slice(-1)[0];
51+
const $caption = $node.find('figcaption');
52+
$node.empty().append([$img, $caption]);
53+
},
4354
},
4455

4556
// Is there anything that is in the result that shouldn't be?

src/extractors/custom/medium.com/index.test.js

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,4 +77,26 @@ describe('MediumExtractor', () => {
7777
assert.equal(first13, 'Video of WTF? My talk at the White House Frontiers ConferenceLast Thursday, I');
7878
});
7979
});
80+
81+
describe('works with another url', () => {
82+
let result;
83+
let url;
84+
beforeAll(() => {
85+
url =
86+
'https://medium.com/@JakobUlbrich/flag-attributes-in-android-how-to-use-them-ac4ec8aee7d1#.h949wjmyw';
87+
const html = fs.readFileSync('./fixtures/medium.com/1485902752952.html');
88+
result =
89+
Mercury.parse(url, html, { fallback: false });
90+
});
91+
92+
it('returns the content', async () => {
93+
const { content } = await result;
94+
95+
const $ = cheerio.load(content || '');
96+
97+
const first13 = excerptContent($.text(), 13);
98+
99+
assert.equal(first13, 'I’m sure you have seen something like the following line very often while');
100+
});
101+
});
80102
});

0 commit comments

Comments
 (0)