@@ -3,112 +3,275 @@ const got = require(`got`)
33const crypto = require ( `crypto` )
44const path = require ( `path` )
55const { isWebUri } = require ( `valid-url` )
6+ const Queue = require ( `better-queue` )
67
78const { createFileNode } = require ( `./create-file-node` )
89const cacheId = url => `create-remote-file-node-${ url } `
910
11+ /********************
12+ * Type Definitions *
13+ ********************/
14+
1015/**
11- * Index of promises resolving to File node from remote url
16+ * @typedef {Redux }
17+ * @see [Redux Docs]{@link https://redux.js.org/api-reference}
1218 */
13- const processingCache = { }
1419
15- module . exports = ( { url, store, cache, createNode, auth = { } } ) => {
16- // Check if we already requested node for this remote file
17- // and return stored promise if we did.
18- if ( processingCache [ url ] ) {
19- return processingCache [ url ]
20+ /**
21+ * @typedef {GatsbyCache }
22+ * @see gatsby/packages/gatsby/utils/cache.js
23+ */
24+
25+ /**
26+ * @typedef {Auth }
27+ * @type {Object }
28+ * @property {String } htaccess_pass
29+ * @property {String } htaccess_user
30+ */
31+
32+ /**
33+ * @typedef {CreateRemoteFileNodePayload }
34+ * @typedef {Object }
35+ * @description Create Remote File Node Payload
36+ *
37+ * @param {String } options.url
38+ * @param {Redux } options.store
39+ * @param {GatsbyCache } options.cache
40+ * @param {Function } options.createNode
41+ * @param {Auth } [options.auth]
42+ */
43+
44+ /*********
45+ * utils *
46+ *********/
47+
48+ /**
49+ * createHash
50+ * --
51+ *
52+ * Create an md5 hash of the given str
53+ * @param {Stringq } str
54+ * @return {String }
55+ */
56+ const createHash = ( str ) => crypto
57+ . createHash ( `md5` )
58+ . update ( str )
59+ . digest ( `hex` )
60+
61+ const CACHE_DIR = `.cache`
62+ const FS_PLUGIN_DIR = `gatsby-source-filesystem`
63+
64+ /**
65+ * createFilePath
66+ * --
67+ *
68+ * @param {String } directory
69+ * @param {String } filename
70+ * @param {String } url
71+ * @return {String }
72+ */
73+ const createFilePath = ( directory , filename , ext ) => path . join (
74+ directory ,
75+ CACHE_DIR ,
76+ FS_PLUGIN_DIR ,
77+ `${ filename } ${ ext } `
78+ )
79+
80+ /********************
81+ * Queue Management *
82+ ********************/
83+
84+ /**
85+ * Queue
86+ * Use the task's url as the id
87+ * When pushing a task with a similar id, prefer the original task
88+ * as it's already in the processing cache
89+ */
90+ const queue = new Queue ( pushToQueue , {
91+ id : `url` ,
92+ merge : ( old , _ , cb ) => cb ( old ) ,
93+ concurrent : 200 ,
94+ } )
95+
96+ /**
97+ * @callback {Queue~queueCallback }
98+ * @param {* } error
99+ * @param {* } result
100+ */
101+
102+ /**
103+ * pushToQueue
104+ * --
105+ * Handle tasks that are pushed in to the Queue
106+ *
107+ *
108+ * @param {CreateRemoteFileNodePayload } task
109+ * @param {Queue~queueCallback } cb
110+ * @return {Promise<null> }
111+ */
112+ async function pushToQueue ( task , cb ) {
113+ try {
114+ const node = await processRemoteNode ( task )
115+ return cb ( null , node )
116+ } catch ( e ) {
117+ return cb ( null , e )
20118 }
119+ }
21120
22- return ( processingCache [ url ] = new Promise ( async ( resolve , reject ) => {
23- if ( ! url || isWebUri ( url ) === undefined ) {
24- resolve ( )
25- return
26- }
121+ /******************
122+ * Core Functions *
123+ ******************/
124+
125+ /**
126+ * requestRemoteNode
127+ * --
128+ * Download the requested file
129+ *
130+ * @param {String } url
131+ * @param {Headers } headers
132+ * @param {String } tmpFilename
133+ * @param {String } filename
134+ * @return {Promise<Object> } Resolves with the [http Result Object]{@link https://nodejs.org/api/http.html#http_class_http_serverresponse}
135+ */
136+ const requestRemoteNode = ( url , headers , tmpFilename , filename ) => new Promise ( ( resolve , reject ) => {
137+ const responseStream = got . stream ( url , { ...headers , timeout : 30000 } )
138+ responseStream . pipe ( fs . createWriteStream ( tmpFilename ) )
139+ responseStream . on ( `downloadProgress` , pro => console . log ( pro ) )
140+
141+ // If there's a 400/500 response or other error.
142+ responseStream . on ( `error` , ( error , body , response ) => {
143+ fs . removeSync ( tmpFilename )
144+ reject ( { error, body, response } )
145+ } )
27146
28- // Ensure our cache directory exists.
29- await fs . ensureDir (
30- path . join (
31- store . getState ( ) . program . directory ,
32- `.cache` ,
33- `gatsby-source-filesystem`
34- )
147+ responseStream . on ( `response` , response => {
148+ resolve ( response )
149+ } )
150+ } )
151+
152+ /**
153+ * processRemoteNode
154+ * --
155+ * Request the remote file and return the fileNode
156+ *
157+ * @param {CreateRemoteFileNodePayload } options
158+ * @return {Promise<Object> } Resolves with the fileNode
159+ */
160+ async function processRemoteNode ( { url, store, cache, createNode, auth = { } } ) {
161+ // Ensure our cache directory exists.
162+ const programDir = store . getState ( ) . program . directory
163+ await fs . ensureDir (
164+ path . join (
165+ programDir ,
166+ CACHE_DIR ,
167+ FS_PLUGIN_DIR
35168 )
169+ )
36170
37- // See if there's response headers for this url
38- // from a previous request.
39- const cachedHeaders = await cache . get ( cacheId ( url ) )
40- const headers = { }
171+ // See if there's response headers for this url
172+ // from a previous request.
173+ const cachedHeaders = await cache . get ( cacheId ( url ) )
174+ const headers = { }
41175
42- // Add htaccess authentication if passed in. This isn't particularly
43- // extensible. We should define a proper API that we validate.
44- if ( auth && auth . htaccess_pass && auth . htaccess_user ) {
45- headers . auth = `${ auth . htaccess_user } :${ auth . htaccess_pass } `
46- }
176+ // Add htaccess authentication if passed in. This isn't particularly
177+ // extensible. We should define a proper API that we validate.
178+ if ( auth && auth . htaccess_pass && auth . htaccess_user ) {
179+ headers . auth = `${ auth . htaccess_user } :${ auth . htaccess_pass } `
180+ }
47181
48- if ( cachedHeaders && cachedHeaders . etag ) {
49- headers [ `If-None-Match` ] = cachedHeaders . etag
50- }
182+ if ( cachedHeaders && cachedHeaders . etag ) {
183+ headers [ `If-None-Match` ] = cachedHeaders . etag
184+ }
51185
52- // Create the temp and permanent file names for the url.
53- const digest = crypto
54- . createHash ( `md5` )
55- . update ( url )
56- . digest ( `hex` )
57- const tmpFilename = path . join (
58- store . getState ( ) . program . directory ,
59- `.cache` ,
60- `gatsby-source-filesystem` ,
61- `tmp-` + digest + path . parse ( url ) . ext
62- )
63- const filename = path . join (
64- store . getState ( ) . program . directory ,
65- `.cache` ,
66- `gatsby-source-filesystem` ,
67- digest + path . parse ( url ) . ext
68- )
186+ // Create the temp and permanent file names for the url.
187+ const digest = createHash ( url )
188+ const ext = path . parse ( url ) . ext
69189
70- // Fetch the file.
71- let statusCode
72- let responseHeaders
73- let responseError = false
74- const responseStream = got . stream ( url , headers )
75- responseStream . pipe ( fs . createWriteStream ( tmpFilename ) )
76- responseStream . on ( `downloadProgress` , pro => console . log ( pro ) )
77-
78- // If there's a 400/500 response or other error.
79- responseStream . on ( `error` , ( error , body , response ) => {
80- responseError = true
81- fs . removeSync ( tmpFilename )
82- reject ( error , body , response )
83- } )
190+ const tmpFilename = createFilePath ( programDir , `tmp-${ digest } ` , ext )
191+ const filename = createFilePath ( programDir , digest , ext )
192+
193+ // Fetch the file.
194+ try {
195+ const response = await requestRemoteNode ( url , headers , tmpFilename , filename )
196+ // Save the response headers for future requests.
197+ cache . set ( cacheId ( url ) , response . headers )
84198
85199 // If the status code is 200, move the piped temp file to the real name.
200+ if ( response . statusCode === 200 ) {
201+ await fs . move ( tmpFilename , filename , { overwrite : true } )
86202 // Else if 304, remove the empty response.
87- responseStream . on ( `response` , response => {
88- statusCode = response . statusCode
89- responseHeaders = response . headers
90- } )
203+ } else {
204+ await fs . remove ( tmpFilename )
205+ }
206+
207+ // Create the file node.
208+ const fileNode = await createFileNode ( filename , { } )
209+
210+ // Override the default plugin as gatsby-source-filesystem needs to
211+ // be the owner of File nodes or there'll be conflicts if any other
212+ // File nodes are created through normal usages of
213+ // gatsby-source-filesystem.
214+ createNode ( fileNode , { name : `gatsby-source-filesystem` } )
91215
92- responseStream . on ( `end` , response => {
93- if ( responseError ) return
94-
95- // Save the response headers for future requests.
96- cache . set ( cacheId ( url ) , responseHeaders )
97- if ( statusCode === 200 ) {
98- fs . moveSync ( tmpFilename , filename , { overwrite : true } )
99- } else {
100- fs . removeSync ( tmpFilename )
101- }
102-
103- // Create the file node and return.
104- createFileNode ( filename , { } ) . then ( fileNode => {
105- // Override the default plugin as gatsby-source-filesystem needs to
106- // be the owner of File nodes or there'll be conflicts if any other
107- // File nodes are created through normal usages of
108- // gatsby-source-filesystem.
109- createNode ( fileNode , { name : `gatsby-source-filesystem` } )
110- resolve ( fileNode )
111- } )
216+ return fileNode
217+ } catch ( err ) {
218+ // ignore
219+ }
220+ return null
221+ }
222+
223+ /**
224+ * Index of promises resolving to File node from remote url
225+ */
226+ const processingCache = { }
227+ /**
228+ * pushTask
229+ * --
230+ * pushes a task in to the Queue and the processing cache
231+ *
232+ * Promisfy a task in queue
233+ * @param {CreateRemoteFileNodePayload } task
234+ * @return {Promise<Object> }
235+ */
236+ const pushTask = ( task ) => new Promise ( ( resolve , reject ) => {
237+ queue
238+ . push ( task )
239+ . on ( `finish` , ( task ) => {
240+ resolve ( task )
241+ } )
242+ . on ( `failed` , ( ) => {
243+ resolve ( )
112244 } )
113- } ) )
245+ } )
246+
247+ /***************
248+ * Entry Point *
249+ ***************/
250+
251+ /**
252+ * createRemoteFileNode
253+ * --
254+ *
255+ * Download a remote file
256+ * First checks cache to ensure duplicate requests aren't processed
257+ * Then pushes to a queue
258+ *
259+ * @param {CreateRemoteFileNodePayload } options
260+ * @return {Promise<Object> } Returns the created node
261+ */
262+ module . exports = ( { url, store, cache, createNode, auth = { } } ) => {
263+ // Check if we already requested node for this remote file
264+ // and return stored promise if we did.
265+ if ( processingCache [ url ] ) {
266+ return processingCache [ url ]
267+ }
268+
269+
270+ if ( ! url || isWebUri ( url ) === undefined ) {
271+ // should we resolve here, or reject?
272+ // Technically, it's invalid input
273+ return Promise . resolve ( )
274+ }
275+
276+ return ( processingCache [ url ] = pushTask ( { url, store, cache, createNode, auth } ) )
114277}
0 commit comments