normalize.js 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593
  1. const valid = require('semver/functions/valid')
  2. const clean = require('semver/functions/clean')
  3. const fs = require('node:fs/promises')
  4. const path = require('node:path')
  5. const { log } = require('proc-log')
  6. /**
  7. * @type {import('hosted-git-info')}
  8. */
  9. let _hostedGitInfo
  10. function lazyHostedGitInfo () {
  11. if (!_hostedGitInfo) {
  12. _hostedGitInfo = require('hosted-git-info')
  13. }
  14. return _hostedGitInfo
  15. }
  16. /**
  17. * @type {import('glob').glob}
  18. */
  19. let _glob
  20. function lazyLoadGlob () {
  21. if (!_glob) {
  22. _glob = require('glob').glob
  23. }
  24. return _glob
  25. }
  26. // used to be npm-normalize-package-bin
  27. function normalizePackageBin (pkg, changes) {
  28. if (pkg.bin) {
  29. if (typeof pkg.bin === 'string' && pkg.name) {
  30. changes?.push('"bin" was converted to an object')
  31. pkg.bin = { [pkg.name]: pkg.bin }
  32. } else if (Array.isArray(pkg.bin)) {
  33. changes?.push('"bin" was converted to an object')
  34. pkg.bin = pkg.bin.reduce((acc, k) => {
  35. acc[path.basename(k)] = k
  36. return acc
  37. }, {})
  38. }
  39. if (typeof pkg.bin === 'object') {
  40. for (const binKey in pkg.bin) {
  41. if (typeof pkg.bin[binKey] !== 'string') {
  42. delete pkg.bin[binKey]
  43. changes?.push(`removed invalid "bin[${binKey}]"`)
  44. continue
  45. }
  46. const base = path.basename(secureAndUnixifyPath(binKey))
  47. if (!base) {
  48. delete pkg.bin[binKey]
  49. changes?.push(`removed invalid "bin[${binKey}]"`)
  50. continue
  51. }
  52. const binTarget = secureAndUnixifyPath(pkg.bin[binKey])
  53. if (!binTarget) {
  54. delete pkg.bin[binKey]
  55. changes?.push(`removed invalid "bin[${binKey}]"`)
  56. continue
  57. }
  58. if (base !== binKey) {
  59. delete pkg.bin[binKey]
  60. changes?.push(`"bin[${binKey}]" was renamed to "bin[${base}]"`)
  61. }
  62. if (binTarget !== pkg.bin[binKey]) {
  63. changes?.push(`"bin[${base}]" script name was cleaned`)
  64. }
  65. pkg.bin[base] = binTarget
  66. }
  67. if (Object.keys(pkg.bin).length === 0) {
  68. changes?.push('empty "bin" was removed')
  69. delete pkg.bin
  70. }
  71. return pkg
  72. }
  73. }
  74. delete pkg.bin
  75. }
  76. function normalizePackageMan (pkg, changes) {
  77. if (pkg.man) {
  78. const mans = []
  79. for (const man of (Array.isArray(pkg.man) ? pkg.man : [pkg.man])) {
  80. if (typeof man !== 'string') {
  81. changes?.push(`removed invalid "man [${man}]"`)
  82. } else {
  83. mans.push(secureAndUnixifyPath(man))
  84. }
  85. }
  86. if (!mans.length) {
  87. changes?.push('empty "man" was removed')
  88. } else {
  89. pkg.man = mans
  90. return pkg
  91. }
  92. }
  93. delete pkg.man
  94. }
  95. function isCorrectlyEncodedName (spec) {
  96. return !spec.match(/[/@\s+%:]/) &&
  97. spec === encodeURIComponent(spec)
  98. }
  99. function isValidScopedPackageName (spec) {
  100. if (spec.charAt(0) !== '@') {
  101. return false
  102. }
  103. const rest = spec.slice(1).split('/')
  104. if (rest.length !== 2) {
  105. return false
  106. }
  107. return rest[0] && rest[1] &&
  108. rest[0] === encodeURIComponent(rest[0]) &&
  109. rest[1] === encodeURIComponent(rest[1])
  110. }
  111. function unixifyPath (ref) {
  112. return ref.replace(/\\|:/g, '/')
  113. }
  114. function secureAndUnixifyPath (ref) {
  115. const secured = unixifyPath(path.join('.', path.join('/', unixifyPath(ref))))
  116. return secured.startsWith('./') ? '' : secured
  117. }
  118. // We don't want the `changes` array in here by default because this is a hot
  119. // path for parsing packuments during install. So the calling method passes it
  120. // in if it wants to track changes.
  121. const normalize = async (pkg, { strict, steps, root, changes, allowLegacyCase }) => {
  122. if (!pkg.content) {
  123. throw new Error('Can not normalize without content')
  124. }
  125. const data = pkg.content
  126. const scripts = data.scripts || {}
  127. const pkgId = `${data.name ?? ''}@${data.version ?? ''}`
  128. // name and version are load bearing so we have to clean them up first
  129. if (steps.includes('fixNameField') || steps.includes('normalizeData')) {
  130. if (!data.name && !strict) {
  131. changes?.push('Missing "name" field was set to an empty string')
  132. data.name = ''
  133. } else {
  134. if (typeof data.name !== 'string') {
  135. throw new Error('name field must be a string.')
  136. }
  137. if (!strict) {
  138. const name = data.name.trim()
  139. if (data.name !== name) {
  140. changes?.push(`Whitespace was trimmed from "name"`)
  141. data.name = name
  142. }
  143. }
  144. if (data.name.startsWith('.') ||
  145. !(isValidScopedPackageName(data.name) || isCorrectlyEncodedName(data.name)) ||
  146. (strict && (!allowLegacyCase) && data.name !== data.name.toLowerCase()) ||
  147. data.name.toLowerCase() === 'node_modules' ||
  148. data.name.toLowerCase() === 'favicon.ico') {
  149. throw new Error('Invalid name: ' + JSON.stringify(data.name))
  150. }
  151. }
  152. }
  153. if (steps.includes('fixVersionField') || steps.includes('normalizeData')) {
  154. // allow "loose" semver 1.0 versions in non-strict mode
  155. // enforce strict semver 2.0 compliance in strict mode
  156. const loose = !strict
  157. if (!data.version) {
  158. data.version = ''
  159. } else {
  160. if (!valid(data.version, loose)) {
  161. throw new Error(`Invalid version: "${data.version}"`)
  162. }
  163. const version = clean(data.version, loose)
  164. if (version !== data.version) {
  165. changes?.push(`"version" was cleaned and set to "${version}"`)
  166. data.version = version
  167. }
  168. }
  169. }
  170. // remove attributes that start with "_"
  171. if (steps.includes('_attributes')) {
  172. for (const key in data) {
  173. if (key.startsWith('_')) {
  174. changes?.push(`"${key}" was removed`)
  175. delete pkg.content[key]
  176. }
  177. }
  178. }
  179. // build the "_id" attribute
  180. if (steps.includes('_id')) {
  181. if (data.name && data.version) {
  182. changes?.push(`"_id" was set to ${pkgId}`)
  183. data._id = pkgId
  184. }
  185. }
  186. // fix bundledDependencies typo
  187. // normalize bundleDependencies
  188. if (steps.includes('bundledDependencies')) {
  189. if (data.bundleDependencies === undefined && data.bundledDependencies !== undefined) {
  190. data.bundleDependencies = data.bundledDependencies
  191. }
  192. changes?.push(`Deleted incorrect "bundledDependencies"`)
  193. delete data.bundledDependencies
  194. }
  195. // expand "bundleDependencies: true or translate from object"
  196. if (steps.includes('bundleDependencies')) {
  197. const bd = data.bundleDependencies
  198. if (bd === false && !steps.includes('bundleDependenciesDeleteFalse')) {
  199. changes?.push(`"bundleDependencies" was changed from "false" to "[]"`)
  200. data.bundleDependencies = []
  201. } else if (bd === true) {
  202. changes?.push(`"bundleDependencies" was auto-populated from "dependencies"`)
  203. data.bundleDependencies = Object.keys(data.dependencies || {})
  204. } else if (bd && typeof bd === 'object') {
  205. if (!Array.isArray(bd)) {
  206. changes?.push(`"bundleDependencies" was changed from an object to an array`)
  207. data.bundleDependencies = Object.keys(bd)
  208. }
  209. } else if ('bundleDependencies' in data) {
  210. changes?.push(`"bundleDependencies" was removed`)
  211. delete data.bundleDependencies
  212. }
  213. }
  214. // it was once common practice to list deps both in optionalDependencies and
  215. // in dependencies, to support npm versions that did not know about
  216. // optionalDependencies. This is no longer a relevant need, so duplicating
  217. // the deps in two places is unnecessary and excessive.
  218. if (steps.includes('optionalDedupe')) {
  219. if (data.dependencies &&
  220. data.optionalDependencies && typeof data.optionalDependencies === 'object') {
  221. for (const name in data.optionalDependencies) {
  222. changes?.push(`optionalDependencies."${name}" was removed`)
  223. delete data.dependencies[name]
  224. }
  225. if (!Object.keys(data.dependencies).length) {
  226. changes?.push(`Empty "optionalDependencies" was removed`)
  227. delete data.dependencies
  228. }
  229. }
  230. }
  231. // add "install" attribute if any "*.gyp" files exist
  232. if (steps.includes('gypfile')) {
  233. if (!scripts.install && !scripts.preinstall && data.gypfile !== false) {
  234. const files = await lazyLoadGlob()('*.gyp', { cwd: pkg.path })
  235. if (files.length) {
  236. scripts.install = 'node-gyp rebuild'
  237. data.scripts = scripts
  238. data.gypfile = true
  239. changes?.push(`"scripts.install" was set to "node-gyp rebuild"`)
  240. changes?.push(`"gypfile" was set to "true"`)
  241. }
  242. }
  243. }
  244. // add "start" attribute if "server.js" exists
  245. if (steps.includes('serverjs') && !scripts.start) {
  246. try {
  247. await fs.access(path.join(pkg.path, 'server.js'))
  248. scripts.start = 'node server.js'
  249. data.scripts = scripts
  250. changes?.push('"scripts.start" was set to "node server.js"')
  251. } catch {
  252. // do nothing
  253. }
  254. }
  255. // strip "node_modules/.bin" from scripts entries
  256. // remove invalid scripts entries (non-strings)
  257. if ((steps.includes('scripts') || steps.includes('scriptpath')) && data.scripts !== undefined) {
  258. const spre = /^(\.[/\\])?node_modules[/\\].bin[\\/]/
  259. if (typeof data.scripts === 'object') {
  260. for (const name in data.scripts) {
  261. if (typeof data.scripts[name] !== 'string') {
  262. delete data.scripts[name]
  263. changes?.push(`Invalid scripts."${name}" was removed`)
  264. } else if (steps.includes('scriptpath') && spre.test(data.scripts[name])) {
  265. data.scripts[name] = data.scripts[name].replace(spre, '')
  266. changes?.push(`scripts entry "${name}" was fixed to remove node_modules/.bin reference`)
  267. }
  268. }
  269. } else {
  270. changes?.push(`Removed invalid "scripts"`)
  271. delete data.scripts
  272. }
  273. }
  274. if (steps.includes('funding')) {
  275. if (data.funding && typeof data.funding === 'string') {
  276. data.funding = { url: data.funding }
  277. changes?.push(`"funding" was changed to an object with a url attribute`)
  278. }
  279. }
  280. // populate "authors" attribute
  281. if (steps.includes('authors') && !data.contributors) {
  282. try {
  283. const authorData = await fs.readFile(path.join(pkg.path, 'AUTHORS'), 'utf8')
  284. const authors = authorData.split(/\r?\n/g)
  285. .map(line => line.replace(/^\s*#.*$/, '').trim())
  286. .filter(line => line)
  287. data.contributors = authors
  288. changes?.push('"contributors" was auto-populated with the contents of the "AUTHORS" file')
  289. } catch {
  290. // do nothing
  291. }
  292. }
  293. // populate "readme" attribute
  294. if (steps.includes('readme') && !data.readme) {
  295. const mdre = /\.m?a?r?k?d?o?w?n?$/i
  296. const files = await lazyLoadGlob()('{README,README.*}', {
  297. cwd: pkg.path,
  298. nocase: true,
  299. mark: true,
  300. })
  301. let readmeFile
  302. for (const file of files) {
  303. // don't accept directories.
  304. if (!file.endsWith(path.sep)) {
  305. if (file.match(mdre)) {
  306. readmeFile = file
  307. break
  308. }
  309. if (file.endsWith('README')) {
  310. readmeFile = file
  311. }
  312. }
  313. }
  314. if (readmeFile) {
  315. const readmeData = await fs.readFile(path.join(pkg.path, readmeFile), 'utf8')
  316. data.readme = readmeData
  317. data.readmeFilename = readmeFile
  318. changes?.push(`"readme" was set to the contents of ${readmeFile}`)
  319. changes?.push(`"readmeFilename" was set to ${readmeFile}`)
  320. }
  321. if (!data.readme) {
  322. data.readme = 'ERROR: No README data found!'
  323. }
  324. }
  325. // expand directories.man
  326. if (steps.includes('mans')) {
  327. if (data.directories?.man && !data.man) {
  328. const manDir = secureAndUnixifyPath(data.directories.man)
  329. const cwd = path.resolve(pkg.path, manDir)
  330. const files = await lazyLoadGlob()('**/*.[0-9]', { cwd })
  331. data.man = files.map(man =>
  332. path.relative(pkg.path, path.join(cwd, man)).split(path.sep).join('/')
  333. )
  334. }
  335. normalizePackageMan(data, changes)
  336. }
  337. if (steps.includes('bin') || steps.includes('binDir') || steps.includes('binRefs')) {
  338. normalizePackageBin(data, changes)
  339. }
  340. // expand "directories.bin"
  341. if (steps.includes('binDir') && data.directories?.bin && !data.bin) {
  342. const binsDir = path.resolve(pkg.path, secureAndUnixifyPath(data.directories.bin))
  343. const bins = await lazyLoadGlob()('**', { cwd: binsDir })
  344. data.bin = bins.reduce((acc, binFile) => {
  345. if (binFile && !binFile.startsWith('.')) {
  346. const binName = path.basename(binFile)
  347. acc[binName] = path.join(data.directories.bin, binFile)
  348. }
  349. return acc
  350. }, {})
  351. // *sigh*
  352. normalizePackageBin(data, changes)
  353. }
  354. // populate "gitHead" attribute
  355. if (steps.includes('gitHead') && !data.gitHead) {
  356. const git = require('@npmcli/git')
  357. const gitRoot = await git.find({ cwd: pkg.path, root })
  358. let head
  359. if (gitRoot) {
  360. try {
  361. head = await fs.readFile(path.resolve(gitRoot, '.git/HEAD'), 'utf8')
  362. } catch (err) {
  363. // do nothing
  364. }
  365. }
  366. let headData
  367. if (head) {
  368. if (head.startsWith('ref: ')) {
  369. const headRef = head.replace(/^ref: /, '').trim()
  370. const headFile = path.resolve(gitRoot, '.git', headRef)
  371. try {
  372. headData = await fs.readFile(headFile, 'utf8')
  373. headData = headData.replace(/^ref: /, '').trim()
  374. } catch (err) {
  375. // do nothing
  376. }
  377. if (!headData) {
  378. const packFile = path.resolve(gitRoot, '.git/packed-refs')
  379. try {
  380. let refs = await fs.readFile(packFile, 'utf8')
  381. if (refs) {
  382. refs = refs.split('\n')
  383. for (let i = 0; i < refs.length; i++) {
  384. const match = refs[i].match(/^([0-9a-f]{40}) (.+)$/)
  385. if (match && match[2].trim() === headRef) {
  386. headData = match[1]
  387. break
  388. }
  389. }
  390. }
  391. } catch {
  392. // do nothing
  393. }
  394. }
  395. } else {
  396. headData = head.trim()
  397. }
  398. }
  399. if (headData) {
  400. data.gitHead = headData
  401. }
  402. }
  403. // populate "types" attribute
  404. if (steps.includes('fillTypes')) {
  405. const index = data.main || 'index.js'
  406. if (typeof index !== 'string') {
  407. throw new TypeError('The "main" attribute must be of type string.')
  408. }
  409. // TODO exports is much more complicated than this in verbose format
  410. // We need to support for instance
  411. // "exports": {
  412. // ".": [
  413. // {
  414. // "default": "./lib/npm.js"
  415. // },
  416. // "./lib/npm.js"
  417. // ],
  418. // "./package.json": "./package.json"
  419. // },
  420. // as well as conditional exports
  421. // if (data.exports && typeof data.exports === 'string') {
  422. // index = data.exports
  423. // }
  424. // if (data.exports && data.exports['.']) {
  425. // index = data.exports['.']
  426. // if (typeof index !== 'string') {
  427. // }
  428. // }
  429. const extless = path.join(path.dirname(index), path.basename(index, path.extname(index)))
  430. const dts = `./${extless}.d.ts`
  431. const hasDTSFields = 'types' in data || 'typings' in data
  432. if (!hasDTSFields) {
  433. try {
  434. await fs.access(path.join(pkg.path, dts))
  435. data.types = dts.split(path.sep).join('/')
  436. } catch {
  437. // do nothing
  438. }
  439. }
  440. }
  441. // "normalizeData" from "read-package-json", which was just a call through to
  442. // "normalize-package-data". We only call the "fixer" functions because
  443. // outside of that it was also clobbering _id (which we already conditionally
  444. // do) and also adding the gypfile script (which we also already
  445. // conditionally do)
  446. // Some steps are isolated so we can do a limited subset of these in `fix`
  447. if (steps.includes('fixRepositoryField') || steps.includes('normalizeData')) {
  448. if (data.repositories) {
  449. changes?.push(`"repository" was set to the first entry in "repositories" (${data.repository})`)
  450. data.repository = data.repositories[0]
  451. }
  452. if (data.repository) {
  453. if (typeof data.repository === 'string') {
  454. changes?.push('"repository" was changed from a string to an object')
  455. data.repository = {
  456. type: 'git',
  457. url: data.repository,
  458. }
  459. }
  460. if (data.repository.url) {
  461. const hosted = lazyHostedGitInfo().fromUrl(data.repository.url)
  462. let r
  463. if (hosted) {
  464. if (hosted.getDefaultRepresentation() === 'shortcut') {
  465. r = hosted.https()
  466. } else {
  467. r = hosted.toString()
  468. }
  469. if (r !== data.repository.url) {
  470. changes?.push(`"repository.url" was normalized to "${r}"`)
  471. data.repository.url = r
  472. }
  473. }
  474. }
  475. }
  476. }
  477. if (steps.includes('fixDependencies') || steps.includes('normalizeData')) {
  478. // peerDependencies?
  479. // devDependencies is meaningless here, it's ignored on an installed package
  480. for (const type of ['dependencies', 'devDependencies', 'optionalDependencies']) {
  481. if (data[type]) {
  482. let secondWarning = true
  483. if (typeof data[type] === 'string') {
  484. changes?.push(`"${type}" was converted from a string into an object`)
  485. data[type] = data[type].trim().split(/[\n\r\s\t ,]+/)
  486. secondWarning = false
  487. }
  488. if (Array.isArray(data[type])) {
  489. if (secondWarning) {
  490. changes?.push(`"${type}" was converted from an array into an object`)
  491. }
  492. const o = {}
  493. for (const d of data[type]) {
  494. if (typeof d === 'string') {
  495. const dep = d.trim().split(/(:?[@\s><=])/)
  496. const dn = dep.shift()
  497. const dv = dep.join('').replace(/^@/, '').trim()
  498. o[dn] = dv
  499. }
  500. }
  501. data[type] = o
  502. }
  503. }
  504. }
  505. // normalize-package-data used to put optional dependencies BACK into
  506. // dependencies here, we no longer do this
  507. for (const deps of ['dependencies', 'devDependencies']) {
  508. if (deps in data) {
  509. if (!data[deps] || typeof data[deps] !== 'object') {
  510. changes?.push(`Removed invalid "${deps}"`)
  511. delete data[deps]
  512. } else {
  513. for (const d in data[deps]) {
  514. const r = data[deps][d]
  515. if (typeof r !== 'string') {
  516. changes?.push(`Removed invalid "${deps}.${d}"`)
  517. delete data[deps][d]
  518. }
  519. const hosted = lazyHostedGitInfo().fromUrl(data[deps][d])?.toString()
  520. if (hosted && hosted !== data[deps][d]) {
  521. changes?.push(`Normalized git reference to "${deps}.${d}"`)
  522. data[deps][d] = hosted.toString()
  523. }
  524. }
  525. }
  526. }
  527. }
  528. }
  529. // TODO some of this is duplicated in other steps here, a future breaking change may be able to remove the duplicates involved in this step
  530. if (steps.includes('normalizeData')) {
  531. const { normalizeData } = require('./normalize-data.js')
  532. normalizeData(data, changes)
  533. }
  534. // Warn if the bin references don't point to anything. This might be better
  535. // in normalize-package-data if it had access to the file path.
  536. if (steps.includes('binRefs') && data.bin instanceof Object) {
  537. for (const key in data.bin) {
  538. try {
  539. await fs.access(path.resolve(pkg.path, data.bin[key]))
  540. } catch {
  541. log.warn('package-json', pkgId, `No bin file found at ${data.bin[key]}`)
  542. // XXX: should a future breaking change delete bin entries that cannot be accessed?
  543. }
  544. }
  545. }
  546. }
  547. module.exports = normalize