This repository has been archived by the owner on Oct 6, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 825
/
Copy pathmiddleware.ts
196 lines (184 loc) · 4.88 KB
/
middleware.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
/*
* Copyright 2017 Google Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
import express from 'express';
import request from 'request';
/**
* A default set of user agent patterns for bots/crawlers that do not perform
* well with pages that require JavaScript.
*/
export const botUserAgents = [
'Baiduspider',
'bingbot',
'Embedly',
'facebookexternalhit',
'LinkedInBot',
'outbrain',
'pinterest',
'quora link preview',
'rogerbot',
'showyoubot',
'Slackbot',
'TelegramBot',
'Twitterbot',
'vkShare',
'W3C_Validator',
'WhatsApp',
];
/**
* A default set of file extensions for static assets that do not need to be
* proxied.
*/
const staticFileExtensions = [
'ai',
'avi',
'css',
'dat',
'dmg',
'doc',
'doc',
'exe',
'flv',
'gif',
'ico',
'iso',
'jpeg',
'jpg',
'js',
'less',
'm4a',
'm4v',
'mov',
'mp3',
'mp4',
'mpeg',
'mpg',
'pdf',
'png',
'ppt',
'psd',
'rar',
'rss',
'svg',
'swf',
'tif',
'torrent',
'ttf',
'txt',
'wav',
'wmv',
'woff',
'xls',
'xml',
'zip',
];
/**
* Options for makeMiddleware.
*/
export interface Options {
/**
* Base URL of the Rendertron proxy service. Required.
*/
proxyUrl: string;
/**
* Regular expression to match user agent to proxy. Defaults to a set of bots
* that do not perform well with pages that require JavaScript.
*/
userAgentPattern?: RegExp;
/**
* Regular expression used to exclude request URL paths. Defaults to a set of
* typical static asset file extensions.
*/
excludeUrlPattern?: RegExp;
/**
* Force web components polyfills to be loaded and enabled. Defaults to false.
*/
injectShadyDom?: boolean;
/**
* Millisecond timeout for proxy requests. Defaults to 11000 milliseconds.
*/
timeout?: number;
/**
* If a forwarded host header is found and matches one of the hosts in this
* array, then that host will be used for the request to the rendertron server
* instead of the actual host of the request.
* This is usedful if this middleware is running on a different host
* which is proxied behind the actual site, and the rendertron server should
* request the main site.
*/
allowedForwardedHosts?: string[];
/**
* Header used to determine the forwarded host that should be used when
* building the URL to be rendered. Only applicable if `allowedForwardedHosts`
* is not empty.
* Defaults to `"X-Forwarded-Host"`.
*/
forwardedHostHeader?: string;
}
/**
* Create a new Express middleware function that proxies requests to a
* Rendertron bot rendering service.
*/
export function makeMiddleware(options: Options): express.Handler {
if (!options || !options.proxyUrl) {
throw new Error('Must set options.proxyUrl.');
}
let proxyUrl = options.proxyUrl;
if (!proxyUrl.endsWith('/')) {
proxyUrl += '/';
}
const userAgentPattern =
options.userAgentPattern || new RegExp(botUserAgents.join('|'), 'i');
const excludeUrlPattern =
options.excludeUrlPattern ||
new RegExp(`\\.(${staticFileExtensions.join('|')})$`, 'i');
const injectShadyDom = !!options.injectShadyDom;
// The Rendertron service itself has a hard limit of 10 seconds to render, so
// let's give a little more time than that by default.
const timeout = options.timeout || 11000; // Milliseconds.
const allowedForwardedHosts = options.allowedForwardedHosts || [];
const forwardedHostHeader = allowedForwardedHosts.length
? options.forwardedHostHeader || 'X-Forwarded-Host'
: null;
return function rendertronMiddleware(req, res, next) {
const ua = req.headers['user-agent'];
if (
ua === undefined ||
!userAgentPattern.test(ua) ||
excludeUrlPattern.test(req.path)
) {
next();
return;
}
const forwardedHost = forwardedHostHeader && req.get(forwardedHostHeader);
const host =
forwardedHost && allowedForwardedHosts.includes(forwardedHost)
? forwardedHost
: req.get('host');
const incomingUrl = req.protocol + '://' + host + req.originalUrl;
let renderUrl = proxyUrl + encodeURIComponent(incomingUrl);
if (injectShadyDom) {
renderUrl += '?wc-inject-shadydom=true';
}
request({ url: renderUrl, timeout }, (e) => {
if (e) {
console.error(
`[rendertron middleware] ${e.code} error fetching ${renderUrl}`
);
next();
}
}).pipe(res);
};
}