-
Notifications
You must be signed in to change notification settings - Fork 57
Expand file tree
/
Copy pathlinkchecker.js
More file actions
131 lines (117 loc) · 4.03 KB
/
linkchecker.js
File metadata and controls
131 lines (117 loc) · 4.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
// Check every sources(img, stylesheets, scripts) are in same folder as spec document, and they are reachable.
import puppeteer from 'puppeteer';
const self = {
name: 'links.linkchecker',
section: 'document-body',
rule: 'brokenLink',
};
const compound = {
name: 'links.linkchecker',
section: 'compound',
rule: 'compoundFilesLocation',
};
const allowList = [
/^https:\/\/www.w3.org\/StyleSheets\//,
/^https:\/\/www.w3.org\/scripts\//,
'https://www.w3.org/TR/tr-outdated-spec',
/^https:\/\/www.w3.org\/analytics\/piwik\//,
/^https:\/\/test.csswg.org\/harness\//,
/^https:\/\/www.w3.org\/assets\//,
/^https:\/\/static.cloudflareinsights.com\/beacon\.min\.js/,
/^https:\/\/www.w3.org\/Icons\/member_subm\.png/,
/^https:\/\/www.w3.org\/favicon\.ico/,
/^data:/,
];
const noRespondAllowList = [
'https://www.w3.org/TR/tr-outdated-spec',
'https://www.w3.org/analytics/piwik/matomo.js',
];
export const { name } = self;
/**
* @param url
*/
function simplifyURL(url) {
const urlObj = new URL(url);
return (
(urlObj.origin !== 'null' ? urlObj.origin : urlObj.protocol) +
urlObj.pathname
);
}
/**
* Upgrade version of Array.include(). The array can be RegExp
*
* @param url
* @param regArray
* @returns {boolean}
*/
function includedByReg(url, regArray = allowList) {
return regArray.some(item => {
if (typeof item === 'object') {
// item is RegExp
return item.test(url);
}
// item is simple string
return item === url;
});
}
/**
* @param sr
* @param done
*/
export async function check(sr, done) {
// send out warning for /nu W3C link checker.
sr.warning(self, 'display', { link: sr.url });
if (!sr.url) {
return done();
}
// sr.url is used as base url. Every other resources should use in same folder as base. e.g.
// - spec doc: https://www.w3.org/TR/2021/WD-pubrules-20210401/
// - image (pass): https://www.w3.org/TR/2021/WD-pubrules-20210401/images/sample.png
// - image (pass): https://www.w3.org/TR/2021/WD-pubrules-20210401/sample.png
// - image (error): https://w3c.github.io/pubrules/sample.png
const browser = await puppeteer.launch({
headless: true,
args: ['--disable-gpu'],
});
const page = await browser.newPage();
const docPath = sr.url.replace(/\/[^/]+$/, '/').replace(/^https?:/, '');
const origin = new URL(sr.url).origin;
page.on('response', response => {
const url = simplifyURL(response.url());
const { referer } = response.request().headers();
if (url !== `${origin}/favicon.ico`) {
// check if resource is in same folder as base document
if (
!url.replace(/^https?:/, '').startsWith(docPath) &&
!(includedByReg(url) || includedByReg(referer)) &&
url !== sr.url
) {
sr.error(compound, 'not-same-folder', { base: docPath, url });
}
// check if every resource's status code is ok, ignore 3xx
if (response.status() >= 400 && !noRespondAllowList.includes(url)) {
const chain = response.request().redirectChain();
// If an url is redirected from another, chain shall exist
if (chain.length) {
sr.error(compound, 'response-error-with-redirect', {
url,
originUrl: chain[0].url(),
status: response.status(),
text: response.statusText(),
referer,
});
} else {
sr.error(compound, 'response-error', {
url,
status: response.status(),
text: response.statusText(),
referer,
});
}
}
}
});
await page.goto(sr.url, { waitUntil: 'load', timeout: 60000 });
await browser.close();
done();
}