fetcher.js 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497
  1. // This is the base class that the other fetcher types in lib
  2. // all descend from.
  3. // It handles the unpacking and retry logic that is shared among
  4. // all of the other Fetcher types.
  5. const { basename, dirname } = require('node:path')
  6. const { rm, mkdir } = require('node:fs/promises')
  7. const PackageJson = require('@npmcli/package-json')
  8. const cacache = require('cacache')
  9. const fsm = require('fs-minipass')
  10. const getContents = require('@npmcli/installed-package-contents')
  11. const npa = require('npm-package-arg')
  12. const retry = require('promise-retry')
  13. const ssri = require('ssri')
  14. const tar = require('tar')
  15. const { Minipass } = require('minipass')
  16. const { log } = require('proc-log')
  17. const _ = require('./util/protected.js')
  18. const cacheDir = require('./util/cache-dir.js')
  19. const isPackageBin = require('./util/is-package-bin.js')
  20. const removeTrailingSlashes = require('./util/trailing-slashes.js')
  21. // Pacote is only concerned with the package.json contents
  22. const packageJsonPrepare = (p) => PackageJson.prepare(p).then(pkg => pkg.content)
  23. const packageJsonNormalize = (p) => PackageJson.normalize(p).then(pkg => pkg.content)
  24. class FetcherBase {
  25. constructor (spec, opts) {
  26. if (!opts || typeof opts !== 'object') {
  27. throw new TypeError('options object is required')
  28. }
  29. this.spec = npa(spec, opts.where)
  30. this.allowGitIgnore = !!opts.allowGitIgnore
  31. // a bit redundant because presumably the caller already knows this,
  32. // but it makes it easier to not have to keep track of the requested
  33. // spec when we're dispatching thousands of these at once, and normalizing
  34. // is nice. saveSpec is preferred if set, because it turns stuff like
  35. // x/y#committish into github:x/y#committish. use name@rawSpec for
  36. // registry deps so that we turn xyz and xyz@ -> xyz@
  37. this.from = this.spec.registry
  38. ? `${this.spec.name}@${this.spec.rawSpec}` : this.spec.saveSpec
  39. this.#assertType()
  40. // clone the opts object so that others aren't upset when we mutate it
  41. // by adding/modifying the integrity value.
  42. this.opts = { ...opts }
  43. this.cache = opts.cache || cacheDir().cacache
  44. this.tufCache = opts.tufCache || cacheDir().tufcache
  45. this.resolved = opts.resolved || null
  46. // default to caching/verifying with sha512, that's what we usually have
  47. // need to change this default, or start overriding it, when sha512
  48. // is no longer strong enough.
  49. this.defaultIntegrityAlgorithm = opts.defaultIntegrityAlgorithm || 'sha512'
  50. if (typeof opts.integrity === 'string') {
  51. this.opts.integrity = ssri.parse(opts.integrity)
  52. }
  53. this.package = null
  54. this.type = this.constructor.name
  55. this.fmode = opts.fmode || 0o666
  56. this.dmode = opts.dmode || 0o777
  57. // we don't need a default umask, because we don't chmod files coming
  58. // out of package tarballs. they're forced to have a mode that is
  59. // valid, regardless of what's in the tarball entry, and then we let
  60. // the process's umask setting do its job. but if configured, we do
  61. // respect it.
  62. this.umask = opts.umask || 0
  63. this.preferOnline = !!opts.preferOnline
  64. this.preferOffline = !!opts.preferOffline
  65. this.offline = !!opts.offline
  66. this.before = opts.before
  67. this.fullMetadata = this.before ? true : !!opts.fullMetadata
  68. this.fullReadJson = !!opts.fullReadJson
  69. this[_.readPackageJson] = this.fullReadJson
  70. ? packageJsonPrepare
  71. : packageJsonNormalize
  72. // rrh is a registry hostname or 'never' or 'always'
  73. // defaults to registry.npmjs.org
  74. this.replaceRegistryHost = (!opts.replaceRegistryHost || opts.replaceRegistryHost === 'npmjs') ?
  75. 'registry.npmjs.org' : opts.replaceRegistryHost
  76. this.defaultTag = opts.defaultTag || 'latest'
  77. this.registry = removeTrailingSlashes(opts.registry || 'https://registry.npmjs.org')
  78. // command to run 'prepare' scripts on directories and git dirs
  79. // To use pacote with yarn, for example, set npmBin to 'yarn'
  80. // and npmCliConfig with yarn's equivalents.
  81. this.npmBin = opts.npmBin || 'npm'
  82. // command to install deps for preparing
  83. this.npmInstallCmd = opts.npmInstallCmd || ['install', '--force']
  84. // XXX fill more of this in based on what we know from this.opts
  85. // we explicitly DO NOT fill in --tag, though, since we are often
  86. // going to be packing in the context of a publish, which may set
  87. // a dist-tag, but certainly wants to keep defaulting to latest.
  88. this.npmCliConfig = opts.npmCliConfig || [
  89. `--cache=${dirname(this.cache)}`,
  90. `--prefer-offline=${!!this.preferOffline}`,
  91. `--prefer-online=${!!this.preferOnline}`,
  92. `--offline=${!!this.offline}`,
  93. ...(this.before ? [`--before=${this.before.toISOString()}`] : []),
  94. '--no-progress',
  95. '--no-save',
  96. '--no-audit',
  97. // override any omit settings from the environment
  98. '--include=dev',
  99. '--include=peer',
  100. '--include=optional',
  101. // we need the actual things, not just the lockfile
  102. '--no-package-lock-only',
  103. '--no-dry-run',
  104. ]
  105. }
  106. get integrity () {
  107. return this.opts.integrity || null
  108. }
  109. set integrity (i) {
  110. if (!i) {
  111. return
  112. }
  113. i = ssri.parse(i)
  114. const current = this.opts.integrity
  115. // do not ever update an existing hash value, but do
  116. // merge in NEW algos and hashes that we don't already have.
  117. if (current) {
  118. current.merge(i)
  119. } else {
  120. this.opts.integrity = i
  121. }
  122. }
  123. get notImplementedError () {
  124. return new Error('not implemented in this fetcher type: ' + this.type)
  125. }
  126. // override in child classes
  127. // Returns a Promise that resolves to this.resolved string value
  128. resolve () {
  129. return this.resolved ? Promise.resolve(this.resolved)
  130. : Promise.reject(this.notImplementedError)
  131. }
  132. packument () {
  133. return Promise.reject(this.notImplementedError)
  134. }
  135. // override in child class
  136. // returns a manifest containing:
  137. // - name
  138. // - version
  139. // - _resolved
  140. // - _integrity
  141. // - plus whatever else was in there (corgi, full metadata, or pj file)
  142. manifest () {
  143. return Promise.reject(this.notImplementedError)
  144. }
  145. // private, should be overridden.
  146. // Note that they should *not* calculate or check integrity or cache,
  147. // but *just* return the raw tarball data stream.
  148. [_.tarballFromResolved] () {
  149. throw this.notImplementedError
  150. }
  151. // public, should not be overridden
  152. tarball () {
  153. return this.tarballStream(stream => stream.concat().then(data => {
  154. data.integrity = this.integrity && String(this.integrity)
  155. data.resolved = this.resolved
  156. data.from = this.from
  157. return data
  158. }))
  159. }
  160. // private
  161. // Note: cacache will raise a EINTEGRITY error if the integrity doesn't match
  162. #tarballFromCache () {
  163. const startTime = Date.now()
  164. const stream = cacache.get.stream.byDigest(this.cache, this.integrity, this.opts)
  165. const elapsedTime = Date.now() - startTime
  166. // cache is good, so log it as a hit in particular since there was no fetch logged
  167. log.http(
  168. 'cache',
  169. `${this.spec} ${elapsedTime}ms (cache hit)`
  170. )
  171. return stream
  172. }
  173. get [_.cacheFetches] () {
  174. return true
  175. }
  176. #istream (stream) {
  177. // if not caching this, just return it
  178. if (!this.opts.cache || !this[_.cacheFetches]) {
  179. // instead of creating a new integrity stream, we only piggyback on the
  180. // provided stream's events
  181. if (stream.hasIntegrityEmitter) {
  182. stream.on('integrity', i => this.integrity = i)
  183. return stream
  184. }
  185. const istream = ssri.integrityStream(this.opts)
  186. istream.on('integrity', i => this.integrity = i)
  187. stream.on('error', err => istream.emit('error', err))
  188. return stream.pipe(istream)
  189. }
  190. // we have to return a stream that gets ALL the data, and proxies errors,
  191. // but then pipe from the original tarball stream into the cache as well.
  192. // To do this without losing any data, and since the cacache put stream
  193. // is not a passthrough, we have to pipe from the original stream into
  194. // the cache AFTER we pipe into the middleStream. Since the cache stream
  195. // has an asynchronous flush to write its contents to disk, we need to
  196. // defer the middleStream end until the cache stream ends.
  197. const middleStream = new Minipass()
  198. stream.on('error', err => middleStream.emit('error', err))
  199. stream.pipe(middleStream, { end: false })
  200. const cstream = cacache.put.stream(
  201. this.opts.cache,
  202. `pacote:tarball:${this.from}`,
  203. this.opts
  204. )
  205. cstream.on('integrity', i => this.integrity = i)
  206. cstream.on('error', err => stream.emit('error', err))
  207. stream.pipe(cstream)
  208. // eslint-disable-next-line promise/catch-or-return
  209. cstream.promise().catch(() => {}).then(() => middleStream.end())
  210. return middleStream
  211. }
  212. pickIntegrityAlgorithm () {
  213. return this.integrity ? this.integrity.pickAlgorithm(this.opts)
  214. : this.defaultIntegrityAlgorithm
  215. }
  216. // TODO: check error class, once those are rolled out to our deps
  217. isDataCorruptionError (er) {
  218. return er.code === 'EINTEGRITY' || er.code === 'Z_DATA_ERROR'
  219. }
  220. // override the types getter
  221. get types () {
  222. return false
  223. }
  224. #assertType () {
  225. if (this.types && !this.types.includes(this.spec.type)) {
  226. throw new TypeError(`Wrong spec type (${
  227. this.spec.type
  228. }) for ${
  229. this.constructor.name
  230. }. Supported types: ${this.types.join(', ')}`)
  231. }
  232. }
  233. // We allow ENOENTs from cacache, but not anywhere else.
  234. // An ENOENT trying to read a tgz file, for example, is Right Out.
  235. isRetriableError (er) {
  236. // TODO: check error class, once those are rolled out to our deps
  237. return this.isDataCorruptionError(er) ||
  238. er.code === 'ENOENT' ||
  239. er.code === 'EISDIR'
  240. }
  241. // Mostly internal, but has some uses
  242. // Pass in a function which returns a promise
  243. // Function will be called 1 or more times with streams that may fail.
  244. // Retries:
  245. // Function MUST handle errors on the stream by rejecting the promise,
  246. // so that retry logic can pick it up and either retry or fail whatever
  247. // promise it was making (ie, failing extraction, etc.)
  248. //
  249. // The return value of this method is a Promise that resolves the same
  250. // as whatever the streamHandler resolves to.
  251. //
  252. // This should never be overridden by child classes, but it is public.
  253. tarballStream (streamHandler) {
  254. // Only short-circuit via cache if we have everything else we'll need,
  255. // and the user has not expressed a preference for checking online.
  256. const fromCache = (
  257. !this.preferOnline &&
  258. this.integrity &&
  259. this.resolved
  260. ) ? streamHandler(this.#tarballFromCache()).catch(er => {
  261. if (this.isDataCorruptionError(er)) {
  262. log.warn('tarball', `cached data for ${
  263. this.spec
  264. } (${this.integrity}) seems to be corrupted. Refreshing cache.`)
  265. return this.cleanupCached().then(() => {
  266. throw er
  267. })
  268. } else {
  269. throw er
  270. }
  271. }) : null
  272. const fromResolved = er => {
  273. if (er) {
  274. if (!this.isRetriableError(er)) {
  275. throw er
  276. }
  277. log.silly('tarball', `no local data for ${
  278. this.spec
  279. }. Extracting by manifest.`)
  280. }
  281. return this.resolve().then(() => retry(tryAgain =>
  282. streamHandler(this.#istream(this[_.tarballFromResolved]()))
  283. .catch(streamErr => {
  284. // Most likely data integrity. A cache ENOENT error is unlikely
  285. // here, since we're definitely not reading from the cache, but it
  286. // IS possible that the fetch subsystem accessed the cache, and the
  287. // entry got blown away or something. Try one more time to be sure.
  288. if (this.isRetriableError(streamErr)) {
  289. log.warn('tarball', `tarball data for ${
  290. this.spec
  291. } (${this.integrity}) seems to be corrupted. Trying again.`)
  292. return this.cleanupCached().then(() => tryAgain(streamErr))
  293. }
  294. throw streamErr
  295. }), { retries: 1, minTimeout: 0, maxTimeout: 0 }))
  296. }
  297. return fromCache ? fromCache.catch(fromResolved) : fromResolved()
  298. }
  299. cleanupCached () {
  300. return cacache.rm.content(this.cache, this.integrity, this.opts)
  301. }
  302. #empty (path) {
  303. return getContents({ path, depth: 1 }).then(contents => Promise.all(
  304. contents.map(entry => rm(entry, { recursive: true, force: true }))))
  305. }
  306. async #mkdir (dest) {
  307. await this.#empty(dest)
  308. return await mkdir(dest, { recursive: true })
  309. }
  310. // extraction is always the same. the only difference is where
  311. // the tarball comes from.
  312. async extract (dest) {
  313. await this.#mkdir(dest)
  314. return this.tarballStream((tarball) => this.#extract(dest, tarball))
  315. }
  316. #toFile (dest) {
  317. return this.tarballStream(str => new Promise((res, rej) => {
  318. const writer = new fsm.WriteStream(dest)
  319. str.on('error', er => writer.emit('error', er))
  320. writer.on('error', er => rej(er))
  321. writer.on('close', () => res({
  322. integrity: this.integrity && String(this.integrity),
  323. resolved: this.resolved,
  324. from: this.from,
  325. }))
  326. str.pipe(writer)
  327. }))
  328. }
  329. // don't use this.#mkdir because we don't want to rimraf anything
  330. async tarballFile (dest) {
  331. const dir = dirname(dest)
  332. await mkdir(dir, { recursive: true })
  333. return this.#toFile(dest)
  334. }
  335. #extract (dest, tarball) {
  336. const extractor = tar.x(this.#tarxOptions({ cwd: dest }))
  337. const p = new Promise((resolve, reject) => {
  338. extractor.on('end', () => {
  339. resolve({
  340. resolved: this.resolved,
  341. integrity: this.integrity && String(this.integrity),
  342. from: this.from,
  343. })
  344. })
  345. extractor.on('error', er => {
  346. log.warn('tar', er.message)
  347. log.silly('tar', er)
  348. reject(er)
  349. })
  350. tarball.on('error', er => reject(er))
  351. })
  352. tarball.pipe(extractor)
  353. return p
  354. }
  355. // always ensure that entries are at least as permissive as our configured
  356. // dmode/fmode, but never more permissive than the umask allows.
  357. #entryMode (path, mode, type) {
  358. const m = /Directory|GNUDumpDir/.test(type) ? this.dmode
  359. : /File$/.test(type) ? this.fmode
  360. : /* istanbul ignore next - should never happen in a pkg */ 0
  361. // make sure package bins are executable
  362. const exe = isPackageBin(this.package, path) ? 0o111 : 0
  363. // always ensure that files are read/writable by the owner
  364. return ((mode | m) & ~this.umask) | exe | 0o600
  365. }
  366. #tarxOptions ({ cwd }) {
  367. const sawIgnores = new Set()
  368. return {
  369. cwd,
  370. noChmod: true,
  371. noMtime: true,
  372. filter: (name, entry) => {
  373. if (/Link$/.test(entry.type)) {
  374. return false
  375. }
  376. entry.mode = this.#entryMode(entry.path, entry.mode, entry.type)
  377. // this replicates the npm pack behavior where .gitignore files
  378. // are treated like .npmignore files, but only if a .npmignore
  379. // file is not present.
  380. if (/File$/.test(entry.type)) {
  381. const base = basename(entry.path)
  382. if (base === '.npmignore') {
  383. sawIgnores.add(entry.path)
  384. } else if (base === '.gitignore' && !this.allowGitIgnore) {
  385. // rename, but only if there's not already a .npmignore
  386. const ni = entry.path.replace(/\.gitignore$/, '.npmignore')
  387. if (sawIgnores.has(ni)) {
  388. return false
  389. }
  390. entry.path = ni
  391. }
  392. return true
  393. }
  394. },
  395. strip: 1,
  396. onwarn: /* istanbul ignore next - we can trust that tar logs */
  397. (code, msg, data) => {
  398. log.warn('tar', code, msg)
  399. log.silly('tar', code, msg, data)
  400. },
  401. umask: this.umask,
  402. // always ignore ownership info from tarball metadata
  403. preserveOwner: false,
  404. }
  405. }
  406. }
  407. module.exports = FetcherBase
  408. // Child classes
  409. const GitFetcher = require('./git.js')
  410. const RegistryFetcher = require('./registry.js')
  411. const FileFetcher = require('./file.js')
  412. const DirFetcher = require('./dir.js')
  413. const RemoteFetcher = require('./remote.js')
  414. // Get an appropriate fetcher object from a spec and options
  415. FetcherBase.get = (rawSpec, opts = {}) => {
  416. const spec = npa(rawSpec, opts.where)
  417. switch (spec.type) {
  418. case 'git':
  419. return new GitFetcher(spec, opts)
  420. case 'remote':
  421. return new RemoteFetcher(spec, opts)
  422. case 'version':
  423. case 'range':
  424. case 'tag':
  425. case 'alias':
  426. return new RegistryFetcher(spec.subSpec || spec, opts)
  427. case 'file':
  428. return new FileFetcher(spec, opts)
  429. case 'directory':
  430. return new DirFetcher(spec, opts)
  431. default:
  432. throw new TypeError('Unknown spec type: ' + spec.type)
  433. }
  434. }