entry.js 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471
  1. const { Request, Response } = require('minipass-fetch')
  2. const { Minipass } = require('minipass')
  3. const MinipassFlush = require('minipass-flush')
  4. const cacache = require('cacache')
  5. const url = require('url')
  6. const CachingMinipassPipeline = require('../pipeline.js')
  7. const CachePolicy = require('./policy.js')
  8. const cacheKey = require('./key.js')
  9. const remote = require('../remote.js')
  10. const hasOwnProperty = (obj, prop) => Object.prototype.hasOwnProperty.call(obj, prop)
  11. // allow list for request headers that will be written to the cache index
  12. // note: we will also store any request headers
  13. // that are named in a response's vary header
  14. const KEEP_REQUEST_HEADERS = [
  15. 'accept-charset',
  16. 'accept-encoding',
  17. 'accept-language',
  18. 'accept',
  19. 'cache-control',
  20. ]
  21. // allow list for response headers that will be written to the cache index
  22. // note: we must not store the real response's age header, or when we load
  23. // a cache policy based on the metadata it will think the cached response
  24. // is always stale
  25. const KEEP_RESPONSE_HEADERS = [
  26. 'cache-control',
  27. 'content-encoding',
  28. 'content-language',
  29. 'content-type',
  30. 'date',
  31. 'etag',
  32. 'expires',
  33. 'last-modified',
  34. 'link',
  35. 'location',
  36. 'pragma',
  37. 'vary',
  38. ]
  39. // return an object containing all metadata to be written to the index
  40. const getMetadata = (request, response, options) => {
  41. const metadata = {
  42. time: Date.now(),
  43. url: request.url,
  44. reqHeaders: {},
  45. resHeaders: {},
  46. // options on which we must match the request and vary the response
  47. options: {
  48. compress: options.compress != null ? options.compress : request.compress,
  49. },
  50. }
  51. // only save the status if it's not a 200 or 304
  52. if (response.status !== 200 && response.status !== 304) {
  53. metadata.status = response.status
  54. }
  55. for (const name of KEEP_REQUEST_HEADERS) {
  56. if (request.headers.has(name)) {
  57. metadata.reqHeaders[name] = request.headers.get(name)
  58. }
  59. }
  60. // if the request's host header differs from the host in the url
  61. // we need to keep it, otherwise it's just noise and we ignore it
  62. const host = request.headers.get('host')
  63. const parsedUrl = new url.URL(request.url)
  64. if (host && parsedUrl.host !== host) {
  65. metadata.reqHeaders.host = host
  66. }
  67. // if the response has a vary header, make sure
  68. // we store the relevant request headers too
  69. if (response.headers.has('vary')) {
  70. const vary = response.headers.get('vary')
  71. // a vary of "*" means every header causes a different response.
  72. // in that scenario, we do not include any additional headers
  73. // as the freshness check will always fail anyway and we don't
  74. // want to bloat the cache indexes
  75. if (vary !== '*') {
  76. // copy any other request headers that will vary the response
  77. const varyHeaders = vary.trim().toLowerCase().split(/\s*,\s*/)
  78. for (const name of varyHeaders) {
  79. if (request.headers.has(name)) {
  80. metadata.reqHeaders[name] = request.headers.get(name)
  81. }
  82. }
  83. }
  84. }
  85. for (const name of KEEP_RESPONSE_HEADERS) {
  86. if (response.headers.has(name)) {
  87. metadata.resHeaders[name] = response.headers.get(name)
  88. }
  89. }
  90. for (const name of options.cacheAdditionalHeaders) {
  91. if (response.headers.has(name)) {
  92. metadata.resHeaders[name] = response.headers.get(name)
  93. }
  94. }
  95. return metadata
  96. }
  97. // symbols used to hide objects that may be lazily evaluated in a getter
  98. const _request = Symbol('request')
  99. const _response = Symbol('response')
  100. const _policy = Symbol('policy')
  101. class CacheEntry {
  102. constructor ({ entry, request, response, options }) {
  103. if (entry) {
  104. this.key = entry.key
  105. this.entry = entry
  106. // previous versions of this module didn't write an explicit timestamp in
  107. // the metadata, so fall back to the entry's timestamp. we can't use the
  108. // entry timestamp to determine staleness because cacache will update it
  109. // when it verifies its data
  110. this.entry.metadata.time = this.entry.metadata.time || this.entry.time
  111. } else {
  112. this.key = cacheKey(request)
  113. }
  114. this.options = options
  115. // these properties are behind getters that lazily evaluate
  116. this[_request] = request
  117. this[_response] = response
  118. this[_policy] = null
  119. }
  120. // returns a CacheEntry instance that satisfies the given request
  121. // or undefined if no existing entry satisfies
  122. static async find (request, options) {
  123. try {
  124. // compacts the index and returns an array of unique entries
  125. var matches = await cacache.index.compact(options.cachePath, cacheKey(request), (A, B) => {
  126. const entryA = new CacheEntry({ entry: A, options })
  127. const entryB = new CacheEntry({ entry: B, options })
  128. return entryA.policy.satisfies(entryB.request)
  129. }, {
  130. validateEntry: (entry) => {
  131. // clean out entries with a buggy content-encoding value
  132. if (entry.metadata &&
  133. entry.metadata.resHeaders &&
  134. entry.metadata.resHeaders['content-encoding'] === null) {
  135. return false
  136. }
  137. // if an integrity is null, it needs to have a status specified
  138. if (entry.integrity === null) {
  139. return !!(entry.metadata && entry.metadata.status)
  140. }
  141. return true
  142. },
  143. })
  144. } catch (err) {
  145. // if the compact request fails, ignore the error and return
  146. return
  147. }
  148. // a cache mode of 'reload' means to behave as though we have no cache
  149. // on the way to the network. return undefined to allow cacheFetch to
  150. // create a brand new request no matter what.
  151. if (options.cache === 'reload') {
  152. return
  153. }
  154. // find the specific entry that satisfies the request
  155. let match
  156. for (const entry of matches) {
  157. const _entry = new CacheEntry({
  158. entry,
  159. options,
  160. })
  161. if (_entry.policy.satisfies(request)) {
  162. match = _entry
  163. break
  164. }
  165. }
  166. return match
  167. }
  168. // if the user made a PUT/POST/PATCH then we invalidate our
  169. // cache for the same url by deleting the index entirely
  170. static async invalidate (request, options) {
  171. const key = cacheKey(request)
  172. try {
  173. await cacache.rm.entry(options.cachePath, key, { removeFully: true })
  174. } catch (err) {
  175. // ignore errors
  176. }
  177. }
  178. get request () {
  179. if (!this[_request]) {
  180. this[_request] = new Request(this.entry.metadata.url, {
  181. method: 'GET',
  182. headers: this.entry.metadata.reqHeaders,
  183. ...this.entry.metadata.options,
  184. })
  185. }
  186. return this[_request]
  187. }
  188. get response () {
  189. if (!this[_response]) {
  190. this[_response] = new Response(null, {
  191. url: this.entry.metadata.url,
  192. counter: this.options.counter,
  193. status: this.entry.metadata.status || 200,
  194. headers: {
  195. ...this.entry.metadata.resHeaders,
  196. 'content-length': this.entry.size,
  197. },
  198. })
  199. }
  200. return this[_response]
  201. }
  202. get policy () {
  203. if (!this[_policy]) {
  204. this[_policy] = new CachePolicy({
  205. entry: this.entry,
  206. request: this.request,
  207. response: this.response,
  208. options: this.options,
  209. })
  210. }
  211. return this[_policy]
  212. }
  213. // wraps the response in a pipeline that stores the data
  214. // in the cache while the user consumes it
  215. async store (status) {
  216. // if we got a status other than 200, 301, or 308,
  217. // or the CachePolicy forbid storage, append the
  218. // cache status header and return it untouched
  219. if (
  220. this.request.method !== 'GET' ||
  221. ![200, 301, 308].includes(this.response.status) ||
  222. !this.policy.storable()
  223. ) {
  224. this.response.headers.set('x-local-cache-status', 'skip')
  225. return this.response
  226. }
  227. const size = this.response.headers.get('content-length')
  228. const cacheOpts = {
  229. algorithms: this.options.algorithms,
  230. metadata: getMetadata(this.request, this.response, this.options),
  231. size,
  232. integrity: this.options.integrity,
  233. integrityEmitter: this.response.body.hasIntegrityEmitter && this.response.body,
  234. }
  235. let body = null
  236. // we only set a body if the status is a 200, redirects are
  237. // stored as metadata only
  238. if (this.response.status === 200) {
  239. let cacheWriteResolve, cacheWriteReject
  240. const cacheWritePromise = new Promise((resolve, reject) => {
  241. cacheWriteResolve = resolve
  242. cacheWriteReject = reject
  243. }).catch((err) => {
  244. body.emit('error', err)
  245. })
  246. body = new CachingMinipassPipeline({ events: ['integrity', 'size'] }, new MinipassFlush({
  247. flush () {
  248. return cacheWritePromise
  249. },
  250. }))
  251. // this is always true since if we aren't reusing the one from the remote fetch, we
  252. // are using the one from cacache
  253. body.hasIntegrityEmitter = true
  254. const onResume = () => {
  255. const tee = new Minipass()
  256. const cacheStream = cacache.put.stream(this.options.cachePath, this.key, cacheOpts)
  257. // re-emit the integrity and size events on our new response body so they can be reused
  258. cacheStream.on('integrity', i => body.emit('integrity', i))
  259. cacheStream.on('size', s => body.emit('size', s))
  260. // stick a flag on here so downstream users will know if they can expect integrity events
  261. tee.pipe(cacheStream)
  262. // TODO if the cache write fails, log a warning but return the response anyway
  263. // eslint-disable-next-line promise/catch-or-return
  264. cacheStream.promise().then(cacheWriteResolve, cacheWriteReject)
  265. body.unshift(tee)
  266. body.unshift(this.response.body)
  267. }
  268. body.once('resume', onResume)
  269. body.once('end', () => body.removeListener('resume', onResume))
  270. } else {
  271. await cacache.index.insert(this.options.cachePath, this.key, null, cacheOpts)
  272. }
  273. // note: we do not set the x-local-cache-hash header because we do not know
  274. // the hash value until after the write to the cache completes, which doesn't
  275. // happen until after the response has been sent and it's too late to write
  276. // the header anyway
  277. this.response.headers.set('x-local-cache', encodeURIComponent(this.options.cachePath))
  278. this.response.headers.set('x-local-cache-key', encodeURIComponent(this.key))
  279. this.response.headers.set('x-local-cache-mode', 'stream')
  280. this.response.headers.set('x-local-cache-status', status)
  281. this.response.headers.set('x-local-cache-time', new Date().toISOString())
  282. const newResponse = new Response(body, {
  283. url: this.response.url,
  284. status: this.response.status,
  285. headers: this.response.headers,
  286. counter: this.options.counter,
  287. })
  288. return newResponse
  289. }
  290. // use the cached data to create a response and return it
  291. async respond (method, options, status) {
  292. let response
  293. if (method === 'HEAD' || [301, 308].includes(this.response.status)) {
  294. // if the request is a HEAD, or the response is a redirect,
  295. // then the metadata in the entry already includes everything
  296. // we need to build a response
  297. response = this.response
  298. } else {
  299. // we're responding with a full cached response, so create a body
  300. // that reads from cacache and attach it to a new Response
  301. const body = new Minipass()
  302. const headers = { ...this.policy.responseHeaders() }
  303. const onResume = () => {
  304. const cacheStream = cacache.get.stream.byDigest(
  305. this.options.cachePath, this.entry.integrity, { memoize: this.options.memoize }
  306. )
  307. cacheStream.on('error', async (err) => {
  308. cacheStream.pause()
  309. if (err.code === 'EINTEGRITY') {
  310. await cacache.rm.content(
  311. this.options.cachePath, this.entry.integrity, { memoize: this.options.memoize }
  312. )
  313. }
  314. if (err.code === 'ENOENT' || err.code === 'EINTEGRITY') {
  315. await CacheEntry.invalidate(this.request, this.options)
  316. }
  317. body.emit('error', err)
  318. cacheStream.resume()
  319. })
  320. // emit the integrity and size events based on our metadata so we're consistent
  321. body.emit('integrity', this.entry.integrity)
  322. body.emit('size', Number(headers['content-length']))
  323. cacheStream.pipe(body)
  324. }
  325. body.once('resume', onResume)
  326. body.once('end', () => body.removeListener('resume', onResume))
  327. response = new Response(body, {
  328. url: this.entry.metadata.url,
  329. counter: options.counter,
  330. status: 200,
  331. headers,
  332. })
  333. }
  334. response.headers.set('x-local-cache', encodeURIComponent(this.options.cachePath))
  335. response.headers.set('x-local-cache-hash', encodeURIComponent(this.entry.integrity))
  336. response.headers.set('x-local-cache-key', encodeURIComponent(this.key))
  337. response.headers.set('x-local-cache-mode', 'stream')
  338. response.headers.set('x-local-cache-status', status)
  339. response.headers.set('x-local-cache-time', new Date(this.entry.metadata.time).toUTCString())
  340. return response
  341. }
  342. // use the provided request along with this cache entry to
  343. // revalidate the stored response. returns a response, either
  344. // from the cache or from the update
  345. async revalidate (request, options) {
  346. const revalidateRequest = new Request(request, {
  347. headers: this.policy.revalidationHeaders(request),
  348. })
  349. try {
  350. // NOTE: be sure to remove the headers property from the
  351. // user supplied options, since we have already defined
  352. // them on the new request object. if they're still in the
  353. // options then those will overwrite the ones from the policy
  354. var response = await remote(revalidateRequest, {
  355. ...options,
  356. headers: undefined,
  357. })
  358. } catch (err) {
  359. // if the network fetch fails, return the stale
  360. // cached response unless it has a cache-control
  361. // of 'must-revalidate'
  362. if (!this.policy.mustRevalidate) {
  363. return this.respond(request.method, options, 'stale')
  364. }
  365. throw err
  366. }
  367. if (this.policy.revalidated(revalidateRequest, response)) {
  368. // we got a 304, write a new index to the cache and respond from cache
  369. const metadata = getMetadata(request, response, options)
  370. // 304 responses do not include headers that are specific to the response data
  371. // since they do not include a body, so we copy values for headers that were
  372. // in the old cache entry to the new one, if the new metadata does not already
  373. // include that header
  374. for (const name of KEEP_RESPONSE_HEADERS) {
  375. if (
  376. !hasOwnProperty(metadata.resHeaders, name) &&
  377. hasOwnProperty(this.entry.metadata.resHeaders, name)
  378. ) {
  379. metadata.resHeaders[name] = this.entry.metadata.resHeaders[name]
  380. }
  381. }
  382. for (const name of options.cacheAdditionalHeaders) {
  383. const inMeta = hasOwnProperty(metadata.resHeaders, name)
  384. const inEntry = hasOwnProperty(this.entry.metadata.resHeaders, name)
  385. const inPolicy = hasOwnProperty(this.policy.response.headers, name)
  386. // if the header is in the existing entry, but it is not in the metadata
  387. // then we need to write it to the metadata as this will refresh the on-disk cache
  388. if (!inMeta && inEntry) {
  389. metadata.resHeaders[name] = this.entry.metadata.resHeaders[name]
  390. }
  391. // if the header is in the metadata, but not in the policy, then we need to set
  392. // it in the policy so that it's included in the immediate response. future
  393. // responses will load a new cache entry, so we don't need to change that
  394. if (!inPolicy && inMeta) {
  395. this.policy.response.headers[name] = metadata.resHeaders[name]
  396. }
  397. }
  398. try {
  399. await cacache.index.insert(options.cachePath, this.key, this.entry.integrity, {
  400. size: this.entry.size,
  401. metadata,
  402. })
  403. } catch (err) {
  404. // if updating the cache index fails, we ignore it and
  405. // respond anyway
  406. }
  407. return this.respond(request.method, options, 'revalidated')
  408. }
  409. // if we got a modified response, create a new entry based on it
  410. const newEntry = new CacheEntry({
  411. request,
  412. response,
  413. options,
  414. })
  415. // respond with the new entry while writing it to the cache
  416. return newEntry.store('updated')
  417. }
  418. }
  419. module.exports = CacheEntry