@@ -8,6 +8,7 @@ var _objectWithoutProperties = _interopDefault(require('@babel/runtime-corejs2/h
88var _asyncToGenerator = _interopDefault ( require ( '@babel/runtime-corejs2/helpers/asyncToGenerator' ) ) ;
99var URL = _interopDefault ( require ( 'url' ) ) ;
1010var cheerio = _interopDefault ( require ( 'cheerio' ) ) ;
11+ var TurndownService = _interopDefault ( require ( 'turndown' ) ) ;
1112var iconv = _interopDefault ( require ( 'iconv-lite' ) ) ;
1213var _parseInt = _interopDefault ( require ( '@babel/runtime-corejs2/core-js/parse-int' ) ) ;
1314var _slicedToArray = _interopDefault ( require ( '@babel/runtime-corejs2/helpers/slicedToArray' ) ) ;
@@ -21,7 +22,6 @@ var _Set = _interopDefault(require('@babel/runtime-corejs2/core-js/set'));
2122var _typeof = _interopDefault ( require ( '@babel/runtime-corejs2/helpers/typeof' ) ) ;
2223var _getIterator = _interopDefault ( require ( '@babel/runtime-corejs2/core-js/get-iterator' ) ) ;
2324var _Object$keys = _interopDefault ( require ( '@babel/runtime-corejs2/core-js/object/keys' ) ) ;
24- var TurndownService = _interopDefault ( require ( 'turndown' ) ) ;
2525var stringDirection = _interopDefault ( require ( 'string-direction' ) ) ;
2626var validUrl = _interopDefault ( require ( 'valid-url' ) ) ;
2727var moment = _interopDefault ( require ( 'moment-timezone' ) ) ;
@@ -6018,9 +6018,7 @@ var GenericExtractor = {
60186018 } ,
60196019 extract : function extract ( options ) {
60206020 var html = options . html ,
6021- $ = options . $ ,
6022- _options$contentType = options . contentType ,
6023- contentType = _options$contentType === void 0 ? 'html' : _options$contentType ;
6021+ $ = options . $ ;
60246022
60256023 if ( html && ! $ ) {
60266024 var loaded = cheerio . load ( html ) ;
@@ -6054,24 +6052,13 @@ var GenericExtractor = {
60546052 url = _this$url_and_domain . url ,
60556053 domain = _this$url_and_domain . domain ;
60566054
6057- var convertedContent ;
6058-
6059- if ( contentType === 'html' ) {
6060- convertedContent = content ;
6061- } else if ( contentType === 'text' ) {
6062- convertedContent = $ . text ( cheerio . load ( content ) ) ;
6063- } else if ( contentType === 'markdown' ) {
6064- var turndownService = new TurndownService ( ) ;
6065- convertedContent = turndownService . turndown ( content ) ;
6066- }
6067-
60686055 return {
60696056 title : title ,
60706057 author : author ,
60716058 date_published : date_published || null ,
60726059 dek : dek ,
60736060 lead_image_url : lead_image_url ,
6074- content : convertedContent ,
6061+ content : content ,
60756062 next_page_url : next_page_url ,
60766063 url : url ,
60776064 domain : domain ,
@@ -6161,9 +6148,7 @@ function select(opts) {
61616148 type = opts . type ,
61626149 extractionOpts = opts . extractionOpts ,
61636150 _opts$extractHtml = opts . extractHtml ,
6164- extractHtml = _opts$extractHtml === void 0 ? false : _opts$extractHtml ,
6165- _opts$contentType = opts . contentType ,
6166- contentType = _opts$contentType === void 0 ? 'html' : _opts$contentType ; // Skip if there's not extraction for this type
6151+ extractHtml = _opts$extractHtml === void 0 ? false : _opts$extractHtml ; // Skip if there's not extraction for this type
61676152
61686153 if ( ! extractionOpts ) return null ; // If a string is hardcoded for a type (e.g., Wikipedia
61696154 // contributors), return the string
@@ -6205,19 +6190,7 @@ function select(opts) {
62056190 $content = Cleaners [ type ] ( $content , _objectSpread ( { } , opts , {
62066191 defaultCleaner : defaultCleaner
62076192 } ) ) ;
6208-
6209- if ( contentType === 'html' ) {
6210- return $ . html ( $content ) ;
6211- }
6212-
6213- if ( contentType === 'text' ) {
6214- return $ . text ( $content ) ;
6215- }
6216-
6217- if ( contentType === 'markdown' ) {
6218- var turndownService = new TurndownService ( ) ;
6219- return turndownService . turndown ( $ . html ( $content ) ) ;
6220- }
6193+ return $ . html ( $content ) ;
62216194 }
62226195
62236196 var result ; // if selector is an array (e.g., ['img', 'src']),
@@ -6270,9 +6243,7 @@ var RootExtractor = {
62706243 var opts = arguments . length > 1 ? arguments [ 1 ] : undefined ;
62716244 var _opts = opts ,
62726245 contentOnly = _opts . contentOnly ,
6273- extractedTitle = _opts . extractedTitle ,
6274- _opts$contentType2 = _opts . contentType ,
6275- contentType = _opts$contentType2 === void 0 ? 'html' : _opts$contentType2 ; // This is the generic extractor. Run its extract method
6246+ extractedTitle = _opts . extractedTitle ; // This is the generic extractor. Run its extract method
62766247
62776248 if ( extractor . domain === '*' ) return extractor . extract ( opts ) ;
62786249 opts = _objectSpread ( { } , opts , {
@@ -6283,8 +6254,7 @@ var RootExtractor = {
62836254 var _content = extractResult ( _objectSpread ( { } , opts , {
62846255 type : 'content' ,
62856256 extractHtml : true ,
6286- title : extractedTitle ,
6287- contentType : contentType
6257+ title : extractedTitle
62886258 } ) ) ;
62896259
62906260 return {
@@ -6451,6 +6421,7 @@ var Mercury = {
64516421 _result ,
64526422 title ,
64536423 next_page_url ,
6424+ turndownService ,
64546425 _args = arguments ;
64556426
64566427 return _regeneratorRuntime . wrap ( function _callee$ ( _context ) {
@@ -6545,9 +6516,16 @@ var Mercury = {
65456516 } ) ;
65466517
65476518 case 23 :
6519+ if ( contentType === 'markdown' ) {
6520+ turndownService = new TurndownService ( ) ;
6521+ result . content = turndownService . turndown ( result . content ) ;
6522+ } else if ( contentType === 'text' ) {
6523+ result . content = $ . text ( $ ( result . content ) ) ;
6524+ }
6525+
65486526 return _context . abrupt ( "return" , result ) ;
65496527
6550- case 24 :
6528+ case 25 :
65516529 case "end" :
65526530 return _context . stop ( ) ;
65536531 }
0 commit comments