entry-index.js 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336
  1. 'use strict'
  2. const crypto = require('crypto')
  3. const {
  4. appendFile,
  5. mkdir,
  6. readFile,
  7. readdir,
  8. rm,
  9. writeFile,
  10. } = require('fs/promises')
  11. const { Minipass } = require('minipass')
  12. const path = require('path')
  13. const ssri = require('ssri')
  14. const uniqueFilename = require('unique-filename')
  15. const contentPath = require('./content/path')
  16. const hashToSegments = require('./util/hash-to-segments')
  17. const indexV = require('../package.json')['cache-version'].index
  18. const { moveFile } = require('@npmcli/fs')
  19. const lsStreamConcurrency = 5
  20. module.exports.NotFoundError = class NotFoundError extends Error {
  21. constructor (cache, key) {
  22. super(`No cache entry for ${key} found in ${cache}`)
  23. this.code = 'ENOENT'
  24. this.cache = cache
  25. this.key = key
  26. }
  27. }
  28. module.exports.compact = compact
  29. async function compact (cache, key, matchFn, opts = {}) {
  30. const bucket = bucketPath(cache, key)
  31. const entries = await bucketEntries(bucket)
  32. const newEntries = []
  33. // we loop backwards because the bottom-most result is the newest
  34. // since we add new entries with appendFile
  35. for (let i = entries.length - 1; i >= 0; --i) {
  36. const entry = entries[i]
  37. // a null integrity could mean either a delete was appended
  38. // or the user has simply stored an index that does not map
  39. // to any content. we determine if the user wants to keep the
  40. // null integrity based on the validateEntry function passed in options.
  41. // if the integrity is null and no validateEntry is provided, we break
  42. // as we consider the null integrity to be a deletion of everything
  43. // that came before it.
  44. if (entry.integrity === null && !opts.validateEntry) {
  45. break
  46. }
  47. // if this entry is valid, and it is either the first entry or
  48. // the newEntries array doesn't already include an entry that
  49. // matches this one based on the provided matchFn, then we add
  50. // it to the beginning of our list
  51. if ((!opts.validateEntry || opts.validateEntry(entry) === true) &&
  52. (newEntries.length === 0 ||
  53. !newEntries.find((oldEntry) => matchFn(oldEntry, entry)))) {
  54. newEntries.unshift(entry)
  55. }
  56. }
  57. const newIndex = '\n' + newEntries.map((entry) => {
  58. const stringified = JSON.stringify(entry)
  59. const hash = hashEntry(stringified)
  60. return `${hash}\t${stringified}`
  61. }).join('\n')
  62. const setup = async () => {
  63. const target = uniqueFilename(path.join(cache, 'tmp'), opts.tmpPrefix)
  64. await mkdir(path.dirname(target), { recursive: true })
  65. return {
  66. target,
  67. moved: false,
  68. }
  69. }
  70. const teardown = async (tmp) => {
  71. if (!tmp.moved) {
  72. return rm(tmp.target, { recursive: true, force: true })
  73. }
  74. }
  75. const write = async (tmp) => {
  76. await writeFile(tmp.target, newIndex, { flag: 'wx' })
  77. await mkdir(path.dirname(bucket), { recursive: true })
  78. // we use @npmcli/move-file directly here because we
  79. // want to overwrite the existing file
  80. await moveFile(tmp.target, bucket)
  81. tmp.moved = true
  82. }
  83. // write the file atomically
  84. const tmp = await setup()
  85. try {
  86. await write(tmp)
  87. } finally {
  88. await teardown(tmp)
  89. }
  90. // we reverse the list we generated such that the newest
  91. // entries come first in order to make looping through them easier
  92. // the true passed to formatEntry tells it to keep null
  93. // integrity values, if they made it this far it's because
  94. // validateEntry returned true, and as such we should return it
  95. return newEntries.reverse().map((entry) => formatEntry(cache, entry, true))
  96. }
  97. module.exports.insert = insert
  98. async function insert (cache, key, integrity, opts = {}) {
  99. const { metadata, size, time } = opts
  100. const bucket = bucketPath(cache, key)
  101. const entry = {
  102. key,
  103. integrity: integrity && ssri.stringify(integrity),
  104. time: time || Date.now(),
  105. size,
  106. metadata,
  107. }
  108. try {
  109. await mkdir(path.dirname(bucket), { recursive: true })
  110. const stringified = JSON.stringify(entry)
  111. // NOTE - Cleverness ahoy!
  112. //
  113. // This works because it's tremendously unlikely for an entry to corrupt
  114. // another while still preserving the string length of the JSON in
  115. // question. So, we just slap the length in there and verify it on read.
  116. //
  117. // Thanks to @isaacs for the whiteboarding session that ended up with
  118. // this.
  119. await appendFile(bucket, `\n${hashEntry(stringified)}\t${stringified}`)
  120. } catch (err) {
  121. if (err.code === 'ENOENT') {
  122. return undefined
  123. }
  124. throw err
  125. }
  126. return formatEntry(cache, entry)
  127. }
  128. module.exports.find = find
  129. async function find (cache, key) {
  130. const bucket = bucketPath(cache, key)
  131. try {
  132. const entries = await bucketEntries(bucket)
  133. return entries.reduce((latest, next) => {
  134. if (next && next.key === key) {
  135. return formatEntry(cache, next)
  136. } else {
  137. return latest
  138. }
  139. }, null)
  140. } catch (err) {
  141. if (err.code === 'ENOENT') {
  142. return null
  143. } else {
  144. throw err
  145. }
  146. }
  147. }
  148. module.exports.delete = del
  149. function del (cache, key, opts = {}) {
  150. if (!opts.removeFully) {
  151. return insert(cache, key, null, opts)
  152. }
  153. const bucket = bucketPath(cache, key)
  154. return rm(bucket, { recursive: true, force: true })
  155. }
  156. module.exports.lsStream = lsStream
  157. function lsStream (cache) {
  158. const indexDir = bucketDir(cache)
  159. const stream = new Minipass({ objectMode: true })
  160. // Set all this up to run on the stream and then just return the stream
  161. Promise.resolve().then(async () => {
  162. const { default: pMap } = await import('p-map')
  163. const buckets = await readdirOrEmpty(indexDir)
  164. await pMap(buckets, async (bucket) => {
  165. const bucketPath = path.join(indexDir, bucket)
  166. const subbuckets = await readdirOrEmpty(bucketPath)
  167. await pMap(subbuckets, async (subbucket) => {
  168. const subbucketPath = path.join(bucketPath, subbucket)
  169. // "/cachename/<bucket 0xFF>/<bucket 0xFF>./*"
  170. const subbucketEntries = await readdirOrEmpty(subbucketPath)
  171. await pMap(subbucketEntries, async (entry) => {
  172. const entryPath = path.join(subbucketPath, entry)
  173. try {
  174. const entries = await bucketEntries(entryPath)
  175. // using a Map here prevents duplicate keys from showing up
  176. // twice, I guess?
  177. const reduced = entries.reduce((acc, entry) => {
  178. acc.set(entry.key, entry)
  179. return acc
  180. }, new Map())
  181. // reduced is a map of key => entry
  182. for (const entry of reduced.values()) {
  183. const formatted = formatEntry(cache, entry)
  184. if (formatted) {
  185. stream.write(formatted)
  186. }
  187. }
  188. } catch (err) {
  189. if (err.code === 'ENOENT') {
  190. return undefined
  191. }
  192. throw err
  193. }
  194. },
  195. { concurrency: lsStreamConcurrency })
  196. },
  197. { concurrency: lsStreamConcurrency })
  198. },
  199. { concurrency: lsStreamConcurrency })
  200. stream.end()
  201. return stream
  202. }).catch(err => stream.emit('error', err))
  203. return stream
  204. }
  205. module.exports.ls = ls
  206. async function ls (cache) {
  207. const entries = await lsStream(cache).collect()
  208. return entries.reduce((acc, xs) => {
  209. acc[xs.key] = xs
  210. return acc
  211. }, {})
  212. }
  213. module.exports.bucketEntries = bucketEntries
  214. async function bucketEntries (bucket, filter) {
  215. const data = await readFile(bucket, 'utf8')
  216. return _bucketEntries(data, filter)
  217. }
  218. function _bucketEntries (data) {
  219. const entries = []
  220. data.split('\n').forEach((entry) => {
  221. if (!entry) {
  222. return
  223. }
  224. const pieces = entry.split('\t')
  225. if (!pieces[1] || hashEntry(pieces[1]) !== pieces[0]) {
  226. // Hash is no good! Corruption or malice? Doesn't matter!
  227. // EJECT EJECT
  228. return
  229. }
  230. let obj
  231. try {
  232. obj = JSON.parse(pieces[1])
  233. } catch (_) {
  234. // eslint-ignore-next-line no-empty-block
  235. }
  236. // coverage disabled here, no need to test with an entry that parses to something falsey
  237. // istanbul ignore else
  238. if (obj) {
  239. entries.push(obj)
  240. }
  241. })
  242. return entries
  243. }
  244. module.exports.bucketDir = bucketDir
  245. function bucketDir (cache) {
  246. return path.join(cache, `index-v${indexV}`)
  247. }
  248. module.exports.bucketPath = bucketPath
  249. function bucketPath (cache, key) {
  250. const hashed = hashKey(key)
  251. return path.join.apply(
  252. path,
  253. [bucketDir(cache)].concat(hashToSegments(hashed))
  254. )
  255. }
  256. module.exports.hashKey = hashKey
  257. function hashKey (key) {
  258. return hash(key, 'sha256')
  259. }
  260. module.exports.hashEntry = hashEntry
  261. function hashEntry (str) {
  262. return hash(str, 'sha1')
  263. }
  264. function hash (str, digest) {
  265. return crypto
  266. .createHash(digest)
  267. .update(str)
  268. .digest('hex')
  269. }
  270. function formatEntry (cache, entry, keepAll) {
  271. // Treat null digests as deletions. They'll shadow any previous entries.
  272. if (!entry.integrity && !keepAll) {
  273. return null
  274. }
  275. return {
  276. key: entry.key,
  277. integrity: entry.integrity,
  278. path: entry.integrity ? contentPath(cache, entry.integrity) : undefined,
  279. size: entry.size,
  280. time: entry.time,
  281. metadata: entry.metadata,
  282. }
  283. }
  284. function readdirOrEmpty (dir) {
  285. return readdir(dir).catch((err) => {
  286. if (err.code === 'ENOENT' || err.code === 'ENOTDIR') {
  287. return []
  288. }
  289. throw err
  290. })
  291. }