verify.js 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258
  1. 'use strict'
  2. const {
  3. mkdir,
  4. readFile,
  5. rm,
  6. stat,
  7. truncate,
  8. writeFile,
  9. } = require('fs/promises')
  10. const contentPath = require('./content/path')
  11. const fsm = require('fs-minipass')
  12. const glob = require('./util/glob.js')
  13. const index = require('./entry-index')
  14. const path = require('path')
  15. const ssri = require('ssri')
  16. const hasOwnProperty = (obj, key) =>
  17. Object.prototype.hasOwnProperty.call(obj, key)
  18. const verifyOpts = (opts) => ({
  19. concurrency: 20,
  20. log: { silly () {} },
  21. ...opts,
  22. })
  23. module.exports = verify
  24. async function verify (cache, opts) {
  25. opts = verifyOpts(opts)
  26. opts.log.silly('verify', 'verifying cache at', cache)
  27. const steps = [
  28. markStartTime,
  29. fixPerms,
  30. garbageCollect,
  31. rebuildIndex,
  32. cleanTmp,
  33. writeVerifile,
  34. markEndTime,
  35. ]
  36. const stats = {}
  37. for (const step of steps) {
  38. const label = step.name
  39. const start = new Date()
  40. const s = await step(cache, opts)
  41. if (s) {
  42. Object.keys(s).forEach((k) => {
  43. stats[k] = s[k]
  44. })
  45. }
  46. const end = new Date()
  47. if (!stats.runTime) {
  48. stats.runTime = {}
  49. }
  50. stats.runTime[label] = end - start
  51. }
  52. stats.runTime.total = stats.endTime - stats.startTime
  53. opts.log.silly(
  54. 'verify',
  55. 'verification finished for',
  56. cache,
  57. 'in',
  58. `${stats.runTime.total}ms`
  59. )
  60. return stats
  61. }
  62. async function markStartTime () {
  63. return { startTime: new Date() }
  64. }
  65. async function markEndTime () {
  66. return { endTime: new Date() }
  67. }
  68. async function fixPerms (cache, opts) {
  69. opts.log.silly('verify', 'fixing cache permissions')
  70. await mkdir(cache, { recursive: true })
  71. return null
  72. }
  73. // Implements a naive mark-and-sweep tracing garbage collector.
  74. //
  75. // The algorithm is basically as follows:
  76. // 1. Read (and filter) all index entries ("pointers")
  77. // 2. Mark each integrity value as "live"
  78. // 3. Read entire filesystem tree in `content-vX/` dir
  79. // 4. If content is live, verify its checksum and delete it if it fails
  80. // 5. If content is not marked as live, rm it.
  81. //
  82. async function garbageCollect (cache, opts) {
  83. opts.log.silly('verify', 'garbage collecting content')
  84. const { default: pMap } = await import('p-map')
  85. const indexStream = index.lsStream(cache)
  86. const liveContent = new Set()
  87. indexStream.on('data', (entry) => {
  88. if (opts.filter && !opts.filter(entry)) {
  89. return
  90. }
  91. // integrity is stringified, re-parse it so we can get each hash
  92. const integrity = ssri.parse(entry.integrity)
  93. for (const algo in integrity) {
  94. liveContent.add(integrity[algo].toString())
  95. }
  96. })
  97. await new Promise((resolve, reject) => {
  98. indexStream.on('end', resolve).on('error', reject)
  99. })
  100. const contentDir = contentPath.contentDir(cache)
  101. const files = await glob(path.join(contentDir, '**'), {
  102. follow: false,
  103. nodir: true,
  104. nosort: true,
  105. })
  106. const stats = {
  107. verifiedContent: 0,
  108. reclaimedCount: 0,
  109. reclaimedSize: 0,
  110. badContentCount: 0,
  111. keptSize: 0,
  112. }
  113. await pMap(
  114. files,
  115. async (f) => {
  116. const split = f.split(/[/\\]/)
  117. const digest = split.slice(split.length - 3).join('')
  118. const algo = split[split.length - 4]
  119. const integrity = ssri.fromHex(digest, algo)
  120. if (liveContent.has(integrity.toString())) {
  121. const info = await verifyContent(f, integrity)
  122. if (!info.valid) {
  123. stats.reclaimedCount++
  124. stats.badContentCount++
  125. stats.reclaimedSize += info.size
  126. } else {
  127. stats.verifiedContent++
  128. stats.keptSize += info.size
  129. }
  130. } else {
  131. // No entries refer to this content. We can delete.
  132. stats.reclaimedCount++
  133. const s = await stat(f)
  134. await rm(f, { recursive: true, force: true })
  135. stats.reclaimedSize += s.size
  136. }
  137. return stats
  138. },
  139. { concurrency: opts.concurrency }
  140. )
  141. return stats
  142. }
  143. async function verifyContent (filepath, sri) {
  144. const contentInfo = {}
  145. try {
  146. const { size } = await stat(filepath)
  147. contentInfo.size = size
  148. contentInfo.valid = true
  149. await ssri.checkStream(new fsm.ReadStream(filepath), sri)
  150. } catch (err) {
  151. if (err.code === 'ENOENT') {
  152. return { size: 0, valid: false }
  153. }
  154. if (err.code !== 'EINTEGRITY') {
  155. throw err
  156. }
  157. await rm(filepath, { recursive: true, force: true })
  158. contentInfo.valid = false
  159. }
  160. return contentInfo
  161. }
  162. async function rebuildIndex (cache, opts) {
  163. opts.log.silly('verify', 'rebuilding index')
  164. const { default: pMap } = await import('p-map')
  165. const entries = await index.ls(cache)
  166. const stats = {
  167. missingContent: 0,
  168. rejectedEntries: 0,
  169. totalEntries: 0,
  170. }
  171. const buckets = {}
  172. for (const k in entries) {
  173. /* istanbul ignore else */
  174. if (hasOwnProperty(entries, k)) {
  175. const hashed = index.hashKey(k)
  176. const entry = entries[k]
  177. const excluded = opts.filter && !opts.filter(entry)
  178. excluded && stats.rejectedEntries++
  179. if (buckets[hashed] && !excluded) {
  180. buckets[hashed].push(entry)
  181. } else if (buckets[hashed] && excluded) {
  182. // skip
  183. } else if (excluded) {
  184. buckets[hashed] = []
  185. buckets[hashed]._path = index.bucketPath(cache, k)
  186. } else {
  187. buckets[hashed] = [entry]
  188. buckets[hashed]._path = index.bucketPath(cache, k)
  189. }
  190. }
  191. }
  192. await pMap(
  193. Object.keys(buckets),
  194. (key) => {
  195. return rebuildBucket(cache, buckets[key], stats, opts)
  196. },
  197. { concurrency: opts.concurrency }
  198. )
  199. return stats
  200. }
  201. async function rebuildBucket (cache, bucket, stats) {
  202. await truncate(bucket._path)
  203. // This needs to be serialized because cacache explicitly
  204. // lets very racy bucket conflicts clobber each other.
  205. for (const entry of bucket) {
  206. const content = contentPath(cache, entry.integrity)
  207. try {
  208. await stat(content)
  209. await index.insert(cache, entry.key, entry.integrity, {
  210. metadata: entry.metadata,
  211. size: entry.size,
  212. time: entry.time,
  213. })
  214. stats.totalEntries++
  215. } catch (err) {
  216. if (err.code === 'ENOENT') {
  217. stats.rejectedEntries++
  218. stats.missingContent++
  219. } else {
  220. throw err
  221. }
  222. }
  223. }
  224. }
  225. function cleanTmp (cache, opts) {
  226. opts.log.silly('verify', 'cleaning tmp directory')
  227. return rm(path.join(cache, 'tmp'), { recursive: true, force: true })
  228. }
  229. async function writeVerifile (cache, opts) {
  230. const verifile = path.join(cache, '_lastverified')
  231. opts.log.silly('verify', 'writing verifile to ' + verifile)
  232. return writeFile(verifile, `${Date.now()}`)
  233. }
  234. module.exports.lastRun = lastRun
  235. async function lastRun (cache) {
  236. const data = await readFile(path.join(cache, '_lastverified'), { encoding: 'utf8' })
  237. return new Date(+data)
  238. }