File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 1+ import {
2+ MediumExtractor ,
3+ BloggerExtractor ,
4+ } from './custom/' ;
5+
6+ const Detectors = {
7+ 'meta[name="al:ios:app_name"][value="Medium"]' : MediumExtractor ,
8+ 'meta[name="generator"][value="blogger"]' : BloggerExtractor ,
9+ } ;
10+
11+ export default function detectByHtml ( $ ) {
12+ const selector = Reflect . ownKeys ( Detectors ) . find ( s => $ ( s ) . length > 0 ) ;
13+
14+ return Detectors [ selector ] ;
15+ }
Original file line number Diff line number Diff line change 1+ import assert from 'assert' ;
2+ import cheerio from 'cheerio' ;
3+
4+ import detectByHtml from './detect-by-html' ;
5+
6+ describe ( 'detectByHtml' , ( ) => {
7+ it ( 'detects a medium post from the html' , ( ) => {
8+ const html =
9+ '<head><meta name="al:ios:app_name" value="Medium" /></head>' ;
10+
11+ const $ = cheerio . load ( html ) ;
12+
13+ assert . equal ( detectByHtml ( $ ) . domain , 'medium.com' ) ;
14+ } ) ;
15+
16+ it ( 'returns nothing if no match is found' , ( ) => {
17+ const html =
18+ '<div></div>' ;
19+
20+ const $ = cheerio . load ( html ) ;
21+
22+ assert . equal ( detectByHtml ( $ ) , null ) ;
23+ } ) ;
24+ } ) ;
Original file line number Diff line number Diff line change @@ -2,11 +2,13 @@ import URL from 'url';
22
33import Extractors from './all' ;
44import GenericExtractor from './generic' ;
5+ import detectByHtml from './detect-by-html' ;
56
6- export default function getExtractor ( url , parsedUrl ) {
7+ export default function getExtractor ( url , parsedUrl , $ ) {
78 parsedUrl = parsedUrl || URL . parse ( url ) ;
89 const { hostname } = parsedUrl ;
910 const baseDomain = hostname . split ( '.' ) . slice ( - 2 ) . join ( '.' ) ;
1011
11- return Extractors [ hostname ] || Extractors [ baseDomain ] || GenericExtractor ;
12+ return Extractors [ hostname ] || Extractors [ baseDomain ] ||
13+ detectByHtml ( $ ) || GenericExtractor ;
1214}
Original file line number Diff line number Diff line change 11import assert from 'assert' ;
2+ import cheerio from 'cheerio' ;
23
34import getExtractor from './get-extractor' ;
45
56describe ( 'getExtractor(url)' , ( ) => {
67 it ( 'returns GenericExtractor if no custom extractor is found' , ( ) => {
7- const extractor = getExtractor ( 'http://example.com' ) ;
8+ const extractor = getExtractor ( 'http://example.com' , null , cheerio . load ( '<div />' ) ) ;
89
910 assert . equal ( extractor . domain , '*' ) ;
1011 } ) ;
@@ -26,4 +27,14 @@ describe('getExtractor(url)', () => {
2627
2728 assert . equal ( extractor . domain , 'wikipedia.org' ) ;
2829 } ) ;
30+
31+ it ( 'returns a custom extractor based on detectors' , ( ) => {
32+ const html =
33+ '<head><meta name="al:ios:app_name" value="Medium" /></head>' ;
34+
35+ const $ = cheerio . load ( html ) ;
36+ const extractor = getExtractor ( 'http://foo.com' , null , $ ) ;
37+
38+ assert . equal ( extractor . domain , 'medium.com' ) ;
39+ } ) ;
2940} ) ;
Original file line number Diff line number Diff line change @@ -31,11 +31,11 @@ const Mercury = {
3131 return Errors . badUrl ;
3232 }
3333
34- const Extractor = getExtractor ( url , parsedUrl ) ;
35- // console.log(`Using extractor for ${Extractor.domain}`);
36-
3734 const $ = await Resource . create ( url , html , parsedUrl ) ;
3835
36+ const Extractor = getExtractor ( url , parsedUrl , $ ) ;
37+ // console.log(`Using extractor for ${Extractor.domain}`);
38+
3939 // If we found an error creating the resource, return that error
4040 if ( $ . failed ) {
4141 return $ ;
You can’t perform that action at this time.
0 commit comments