normalize.js 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601
  1. const valid = require('semver/functions/valid')
  2. const clean = require('semver/functions/clean')
  3. const fs = require('node:fs/promises')
  4. const path = require('node:path')
  5. const { log } = require('proc-log')
  6. const moduleBuiltin = require('node:module')
  7. /**
  8. * @type {import('hosted-git-info')}
  9. */
  10. let _hostedGitInfo
  11. function lazyHostedGitInfo () {
  12. if (!_hostedGitInfo) {
  13. _hostedGitInfo = require('hosted-git-info')
  14. }
  15. return _hostedGitInfo
  16. }
  17. /**
  18. * @type {import('glob').glob}
  19. */
  20. let _glob
  21. function lazyLoadGlob () {
  22. if (!_glob) {
  23. _glob = require('glob').glob
  24. }
  25. return _glob
  26. }
  27. // used to be npm-normalize-package-bin
  28. function normalizePackageBin (pkg, changes) {
  29. if (pkg.bin) {
  30. if (typeof pkg.bin === 'string' && pkg.name) {
  31. changes?.push('"bin" was converted to an object')
  32. pkg.bin = { [pkg.name]: pkg.bin }
  33. } else if (Array.isArray(pkg.bin)) {
  34. changes?.push('"bin" was converted to an object')
  35. pkg.bin = pkg.bin.reduce((acc, k) => {
  36. acc[path.basename(k)] = k
  37. return acc
  38. }, {})
  39. }
  40. if (typeof pkg.bin === 'object') {
  41. for (const binKey in pkg.bin) {
  42. if (typeof pkg.bin[binKey] !== 'string') {
  43. delete pkg.bin[binKey]
  44. changes?.push(`removed invalid "bin[${binKey}]"`)
  45. continue
  46. }
  47. const base = path.basename(secureAndUnixifyPath(binKey))
  48. if (!base) {
  49. delete pkg.bin[binKey]
  50. changes?.push(`removed invalid "bin[${binKey}]"`)
  51. continue
  52. }
  53. const binTarget = secureAndUnixifyPath(pkg.bin[binKey])
  54. if (!binTarget) {
  55. delete pkg.bin[binKey]
  56. changes?.push(`removed invalid "bin[${binKey}]"`)
  57. continue
  58. }
  59. if (base !== binKey) {
  60. delete pkg.bin[binKey]
  61. changes?.push(`"bin[${binKey}]" was renamed to "bin[${base}]"`)
  62. }
  63. if (binTarget !== pkg.bin[binKey]) {
  64. changes?.push(`"bin[${base}]" script name was cleaned`)
  65. }
  66. pkg.bin[base] = binTarget
  67. }
  68. if (Object.keys(pkg.bin).length === 0) {
  69. changes?.push('empty "bin" was removed')
  70. delete pkg.bin
  71. }
  72. return pkg
  73. }
  74. }
  75. delete pkg.bin
  76. }
  77. function normalizePackageMan (pkg, changes) {
  78. if (pkg.man) {
  79. const mans = []
  80. for (const man of (Array.isArray(pkg.man) ? pkg.man : [pkg.man])) {
  81. if (typeof man !== 'string') {
  82. changes?.push(`removed invalid "man [${man}]"`)
  83. } else {
  84. mans.push(secureAndUnixifyPath(man))
  85. }
  86. }
  87. if (!mans.length) {
  88. changes?.push('empty "man" was removed')
  89. } else {
  90. pkg.man = mans
  91. return pkg
  92. }
  93. }
  94. delete pkg.man
  95. }
  96. function isCorrectlyEncodedName (spec) {
  97. return !spec.match(/[/@\s+%:]/) &&
  98. spec === encodeURIComponent(spec)
  99. }
  100. function isValidScopedPackageName (spec) {
  101. if (spec.charAt(0) !== '@') {
  102. return false
  103. }
  104. const rest = spec.slice(1).split('/')
  105. if (rest.length !== 2) {
  106. return false
  107. }
  108. return rest[0] && rest[1] &&
  109. rest[0] === encodeURIComponent(rest[0]) &&
  110. rest[1] === encodeURIComponent(rest[1])
  111. }
  112. function unixifyPath (ref) {
  113. return ref.replace(/\\|:/g, '/')
  114. }
  115. function secureAndUnixifyPath (ref) {
  116. const secured = unixifyPath(path.join('.', path.join('/', unixifyPath(ref))))
  117. return secured.startsWith('./') ? '' : secured
  118. }
  119. // We don't want the `changes` array in here by default because this is a hot
  120. // path for parsing packuments during install. So the calling method passes it
  121. // in if it wants to track changes.
  122. const normalize = async (pkg, { strict, steps, root, changes, allowLegacyCase }) => {
  123. if (!pkg.content) {
  124. throw new Error('Can not normalize without content')
  125. }
  126. const data = pkg.content
  127. const scripts = data.scripts || {}
  128. const pkgId = `${data.name ?? ''}@${data.version ?? ''}`
  129. // name and version are load bearing so we have to clean them up first
  130. if (steps.includes('fixName') || steps.includes('fixNameField') || steps.includes('normalizeData')) {
  131. if (!data.name && !strict) {
  132. changes?.push('Missing "name" field was set to an empty string')
  133. data.name = ''
  134. } else {
  135. if (typeof data.name !== 'string') {
  136. throw new Error('name field must be a string.')
  137. }
  138. if (!strict) {
  139. const name = data.name.trim()
  140. if (data.name !== name) {
  141. changes?.push(`Whitespace was trimmed from "name"`)
  142. data.name = name
  143. }
  144. }
  145. if (data.name.startsWith('.') ||
  146. !(isValidScopedPackageName(data.name) || isCorrectlyEncodedName(data.name)) ||
  147. (strict && (!allowLegacyCase) && data.name !== data.name.toLowerCase()) ||
  148. data.name.toLowerCase() === 'node_modules' ||
  149. data.name.toLowerCase() === 'favicon.ico') {
  150. throw new Error('Invalid name: ' + JSON.stringify(data.name))
  151. }
  152. }
  153. }
  154. if (steps.includes('fixName')) {
  155. // Check for conflicts with builtin modules
  156. if (moduleBuiltin.builtinModules.includes(data.name)) {
  157. log.warn('package-json', pkgId, `Package name "${data.name}" conflicts with a Node.js built-in module name`)
  158. }
  159. }
  160. if (steps.includes('fixVersionField') || steps.includes('normalizeData')) {
  161. // allow "loose" semver 1.0 versions in non-strict mode
  162. // enforce strict semver 2.0 compliance in strict mode
  163. const loose = !strict
  164. if (!data.version) {
  165. data.version = ''
  166. } else {
  167. if (!valid(data.version, loose)) {
  168. throw new Error(`Invalid version: "${data.version}"`)
  169. }
  170. const version = clean(data.version, loose)
  171. if (version !== data.version) {
  172. changes?.push(`"version" was cleaned and set to "${version}"`)
  173. data.version = version
  174. }
  175. }
  176. }
  177. // remove attributes that start with "_"
  178. if (steps.includes('_attributes')) {
  179. for (const key in data) {
  180. if (key.startsWith('_')) {
  181. changes?.push(`"${key}" was removed`)
  182. delete pkg.content[key]
  183. }
  184. }
  185. }
  186. // build the "_id" attribute
  187. if (steps.includes('_id')) {
  188. if (data.name && data.version) {
  189. changes?.push(`"_id" was set to ${pkgId}`)
  190. data._id = pkgId
  191. }
  192. }
  193. // fix bundledDependencies typo
  194. // normalize bundleDependencies
  195. if (steps.includes('bundledDependencies')) {
  196. if (data.bundleDependencies === undefined && data.bundledDependencies !== undefined) {
  197. data.bundleDependencies = data.bundledDependencies
  198. }
  199. changes?.push(`Deleted incorrect "bundledDependencies"`)
  200. delete data.bundledDependencies
  201. }
  202. // expand "bundleDependencies: true or translate from object"
  203. if (steps.includes('bundleDependencies')) {
  204. const bd = data.bundleDependencies
  205. if (bd === false && !steps.includes('bundleDependenciesDeleteFalse')) {
  206. changes?.push(`"bundleDependencies" was changed from "false" to "[]"`)
  207. data.bundleDependencies = []
  208. } else if (bd === true) {
  209. changes?.push(`"bundleDependencies" was auto-populated from "dependencies"`)
  210. data.bundleDependencies = Object.keys(data.dependencies || {})
  211. } else if (bd && typeof bd === 'object') {
  212. if (!Array.isArray(bd)) {
  213. changes?.push(`"bundleDependencies" was changed from an object to an array`)
  214. data.bundleDependencies = Object.keys(bd)
  215. }
  216. } else if ('bundleDependencies' in data) {
  217. changes?.push(`"bundleDependencies" was removed`)
  218. delete data.bundleDependencies
  219. }
  220. }
  221. // it was once common practice to list deps both in optionalDependencies and
  222. // in dependencies, to support npm versions that did not know about
  223. // optionalDependencies. This is no longer a relevant need, so duplicating
  224. // the deps in two places is unnecessary and excessive.
  225. if (steps.includes('optionalDedupe')) {
  226. if (data.dependencies &&
  227. data.optionalDependencies && typeof data.optionalDependencies === 'object') {
  228. for (const name in data.optionalDependencies) {
  229. changes?.push(`optionalDependencies."${name}" was removed`)
  230. delete data.dependencies[name]
  231. }
  232. if (!Object.keys(data.dependencies).length) {
  233. changes?.push(`Empty "optionalDependencies" was removed`)
  234. delete data.dependencies
  235. }
  236. }
  237. }
  238. // add "install" attribute if any "*.gyp" files exist
  239. if (steps.includes('gypfile')) {
  240. if (!scripts.install && !scripts.preinstall && data.gypfile !== false) {
  241. const files = await lazyLoadGlob()('*.gyp', { cwd: pkg.path })
  242. if (files.length) {
  243. scripts.install = 'node-gyp rebuild'
  244. data.scripts = scripts
  245. data.gypfile = true
  246. changes?.push(`"scripts.install" was set to "node-gyp rebuild"`)
  247. changes?.push(`"gypfile" was set to "true"`)
  248. }
  249. }
  250. }
  251. // add "start" attribute if "server.js" exists
  252. if (steps.includes('serverjs') && !scripts.start) {
  253. try {
  254. await fs.access(path.join(pkg.path, 'server.js'))
  255. scripts.start = 'node server.js'
  256. data.scripts = scripts
  257. changes?.push('"scripts.start" was set to "node server.js"')
  258. } catch {
  259. // do nothing
  260. }
  261. }
  262. // strip "node_modules/.bin" from scripts entries
  263. // remove invalid scripts entries (non-strings)
  264. if ((steps.includes('scripts') || steps.includes('scriptpath')) && data.scripts !== undefined) {
  265. const spre = /^(\.[/\\])?node_modules[/\\].bin[\\/]/
  266. if (typeof data.scripts === 'object') {
  267. for (const name in data.scripts) {
  268. if (typeof data.scripts[name] !== 'string') {
  269. delete data.scripts[name]
  270. changes?.push(`Invalid scripts."${name}" was removed`)
  271. } else if (steps.includes('scriptpath') && spre.test(data.scripts[name])) {
  272. data.scripts[name] = data.scripts[name].replace(spre, '')
  273. changes?.push(`scripts entry "${name}" was fixed to remove node_modules/.bin reference`)
  274. }
  275. }
  276. } else {
  277. changes?.push(`Removed invalid "scripts"`)
  278. delete data.scripts
  279. }
  280. }
  281. if (steps.includes('funding')) {
  282. if (data.funding && typeof data.funding === 'string') {
  283. data.funding = { url: data.funding }
  284. changes?.push(`"funding" was changed to an object with a url attribute`)
  285. }
  286. }
  287. // populate "authors" attribute
  288. if (steps.includes('authors') && !data.contributors) {
  289. try {
  290. const authorData = await fs.readFile(path.join(pkg.path, 'AUTHORS'), 'utf8')
  291. const authors = authorData.split(/\r?\n/g)
  292. .map(line => line.replace(/^\s*#.*$/, '').trim())
  293. .filter(line => line)
  294. data.contributors = authors
  295. changes?.push('"contributors" was auto-populated with the contents of the "AUTHORS" file')
  296. } catch {
  297. // do nothing
  298. }
  299. }
  300. // populate "readme" attribute
  301. if (steps.includes('readme') && !data.readme) {
  302. const mdre = /\.m?a?r?k?d?o?w?n?$/i
  303. const files = await lazyLoadGlob()('{README,README.*}', {
  304. cwd: pkg.path,
  305. nocase: true,
  306. mark: true,
  307. })
  308. let readmeFile
  309. for (const file of files) {
  310. // don't accept directories.
  311. if (!file.endsWith(path.sep)) {
  312. if (file.match(mdre)) {
  313. readmeFile = file
  314. break
  315. }
  316. if (file.endsWith('README')) {
  317. readmeFile = file
  318. }
  319. }
  320. }
  321. if (readmeFile) {
  322. const readmeData = await fs.readFile(path.join(pkg.path, readmeFile), 'utf8')
  323. data.readme = readmeData
  324. data.readmeFilename = readmeFile
  325. changes?.push(`"readme" was set to the contents of ${readmeFile}`)
  326. changes?.push(`"readmeFilename" was set to ${readmeFile}`)
  327. }
  328. if (!data.readme) {
  329. data.readme = 'ERROR: No README data found!'
  330. }
  331. }
  332. // expand directories.man
  333. if (steps.includes('mans')) {
  334. if (data.directories?.man && !data.man) {
  335. const manDir = secureAndUnixifyPath(data.directories.man)
  336. const cwd = path.resolve(pkg.path, manDir)
  337. const files = await lazyLoadGlob()('**/*.[0-9]', { cwd })
  338. data.man = files.map(man =>
  339. path.relative(pkg.path, path.join(cwd, man)).split(path.sep).join('/')
  340. )
  341. }
  342. normalizePackageMan(data, changes)
  343. }
  344. if (steps.includes('bin') || steps.includes('binDir') || steps.includes('binRefs')) {
  345. normalizePackageBin(data, changes)
  346. }
  347. // expand "directories.bin"
  348. if (steps.includes('binDir') && data.directories?.bin && !data.bin) {
  349. const binsDir = path.resolve(pkg.path, secureAndUnixifyPath(data.directories.bin))
  350. const bins = await lazyLoadGlob()('**', { cwd: binsDir })
  351. data.bin = bins.reduce((acc, binFile) => {
  352. if (binFile && !binFile.startsWith('.')) {
  353. const binName = path.basename(binFile)
  354. acc[binName] = path.join(data.directories.bin, binFile)
  355. }
  356. return acc
  357. }, {})
  358. // *sigh*
  359. normalizePackageBin(data, changes)
  360. }
  361. // populate "gitHead" attribute
  362. if (steps.includes('gitHead') && !data.gitHead) {
  363. const git = require('@npmcli/git')
  364. const gitRoot = await git.find({ cwd: pkg.path, root })
  365. let head
  366. if (gitRoot) {
  367. try {
  368. head = await fs.readFile(path.resolve(gitRoot, '.git/HEAD'), 'utf8')
  369. } catch (err) {
  370. // do nothing
  371. }
  372. }
  373. let headData
  374. if (head) {
  375. if (head.startsWith('ref: ')) {
  376. const headRef = head.replace(/^ref: /, '').trim()
  377. const headFile = path.resolve(gitRoot, '.git', headRef)
  378. try {
  379. headData = await fs.readFile(headFile, 'utf8')
  380. headData = headData.replace(/^ref: /, '').trim()
  381. } catch (err) {
  382. // do nothing
  383. }
  384. if (!headData) {
  385. const packFile = path.resolve(gitRoot, '.git/packed-refs')
  386. try {
  387. let refs = await fs.readFile(packFile, 'utf8')
  388. if (refs) {
  389. refs = refs.split('\n')
  390. for (let i = 0; i < refs.length; i++) {
  391. const match = refs[i].match(/^([0-9a-f]{40}) (.+)$/)
  392. if (match && match[2].trim() === headRef) {
  393. headData = match[1]
  394. break
  395. }
  396. }
  397. }
  398. } catch {
  399. // do nothing
  400. }
  401. }
  402. } else {
  403. headData = head.trim()
  404. }
  405. }
  406. if (headData) {
  407. data.gitHead = headData
  408. }
  409. }
  410. // populate "types" attribute
  411. if (steps.includes('fillTypes')) {
  412. const index = data.main || 'index.js'
  413. if (typeof index !== 'string') {
  414. throw new TypeError('The "main" attribute must be of type string.')
  415. }
  416. // TODO exports is much more complicated than this in verbose format
  417. // We need to support for instance
  418. // "exports": {
  419. // ".": [
  420. // {
  421. // "default": "./lib/npm.js"
  422. // },
  423. // "./lib/npm.js"
  424. // ],
  425. // "./package.json": "./package.json"
  426. // },
  427. // as well as conditional exports
  428. // if (data.exports && typeof data.exports === 'string') {
  429. // index = data.exports
  430. // }
  431. // if (data.exports && data.exports['.']) {
  432. // index = data.exports['.']
  433. // if (typeof index !== 'string') {
  434. // }
  435. // }
  436. const extless = path.join(path.dirname(index), path.basename(index, path.extname(index)))
  437. const dts = `./${extless}.d.ts`
  438. const hasDTSFields = 'types' in data || 'typings' in data
  439. if (!hasDTSFields) {
  440. try {
  441. await fs.access(path.join(pkg.path, dts))
  442. data.types = dts.split(path.sep).join('/')
  443. } catch {
  444. // do nothing
  445. }
  446. }
  447. }
  448. // "normalizeData" from "read-package-json", which was just a call through to
  449. // "normalize-package-data". We only call the "fixer" functions because
  450. // outside of that it was also clobbering _id (which we already conditionally
  451. // do) and also adding the gypfile script (which we also already
  452. // conditionally do)
  453. // Some steps are isolated so we can do a limited subset of these in `fix`
  454. if (steps.includes('fixRepositoryField') || steps.includes('normalizeData')) {
  455. if (data.repositories) {
  456. changes?.push(`"repository" was set to the first entry in "repositories" (${data.repository})`)
  457. data.repository = data.repositories[0]
  458. }
  459. if (data.repository) {
  460. if (typeof data.repository === 'string') {
  461. changes?.push('"repository" was changed from a string to an object')
  462. data.repository = {
  463. type: 'git',
  464. url: data.repository,
  465. }
  466. }
  467. if (data.repository.url) {
  468. const hosted = lazyHostedGitInfo().fromUrl(data.repository.url)
  469. let r
  470. if (hosted) {
  471. if (hosted.getDefaultRepresentation() === 'shortcut') {
  472. r = hosted.https()
  473. } else {
  474. r = hosted.toString()
  475. }
  476. if (r !== data.repository.url) {
  477. changes?.push(`"repository.url" was normalized to "${r}"`)
  478. data.repository.url = r
  479. }
  480. }
  481. }
  482. }
  483. }
  484. if (steps.includes('fixDependencies') || steps.includes('normalizeData')) {
  485. // peerDependencies?
  486. // devDependencies is meaningless here, it's ignored on an installed package
  487. for (const type of ['dependencies', 'devDependencies', 'optionalDependencies']) {
  488. if (data[type]) {
  489. let secondWarning = true
  490. if (typeof data[type] === 'string') {
  491. changes?.push(`"${type}" was converted from a string into an object`)
  492. data[type] = data[type].trim().split(/[\n\r\s\t ,]+/)
  493. secondWarning = false
  494. }
  495. if (Array.isArray(data[type])) {
  496. if (secondWarning) {
  497. changes?.push(`"${type}" was converted from an array into an object`)
  498. }
  499. const o = {}
  500. for (const d of data[type]) {
  501. if (typeof d === 'string') {
  502. const dep = d.trim().split(/(:?[@\s><=])/)
  503. const dn = dep.shift()
  504. const dv = dep.join('').replace(/^@/, '').trim()
  505. o[dn] = dv
  506. }
  507. }
  508. data[type] = o
  509. }
  510. }
  511. }
  512. // normalize-package-data used to put optional dependencies BACK into
  513. // dependencies here, we no longer do this
  514. for (const deps of ['dependencies', 'devDependencies']) {
  515. if (deps in data) {
  516. if (!data[deps] || typeof data[deps] !== 'object') {
  517. changes?.push(`Removed invalid "${deps}"`)
  518. delete data[deps]
  519. } else {
  520. for (const d in data[deps]) {
  521. const r = data[deps][d]
  522. if (typeof r !== 'string') {
  523. changes?.push(`Removed invalid "${deps}.${d}"`)
  524. delete data[deps][d]
  525. }
  526. const hosted = lazyHostedGitInfo().fromUrl(data[deps][d])?.toString()
  527. if (hosted && hosted !== data[deps][d]) {
  528. changes?.push(`Normalized git reference to "${deps}.${d}"`)
  529. data[deps][d] = hosted.toString()
  530. }
  531. }
  532. }
  533. }
  534. }
  535. }
  536. // TODO some of this is duplicated in other steps here, a future breaking change may be able to remove the duplicates involved in this step
  537. if (steps.includes('normalizeData')) {
  538. const { normalizeData } = require('./normalize-data.js')
  539. normalizeData(data, changes)
  540. }
  541. // Warn if the bin references don't point to anything. This might be better
  542. // in normalize-package-data if it had access to the file path.
  543. if (steps.includes('binRefs') && data.bin instanceof Object) {
  544. for (const key in data.bin) {
  545. try {
  546. await fs.access(path.resolve(pkg.path, data.bin[key]))
  547. } catch {
  548. log.warn('package-json', pkgId, `No bin file found at ${data.bin[key]}`)
  549. // XXX: should a future breaking change delete bin entries that cannot be accessed?
  550. }
  551. }
  552. }
  553. }
  554. module.exports = normalize