index.mjs 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565
  1. // Main parser class
  2. import * as utils from './common/utils.mjs'
  3. import * as helpers from './helpers/index.mjs'
  4. import Renderer from './renderer.mjs'
  5. import ParserCore from './parser_core.mjs'
  6. import ParserBlock from './parser_block.mjs'
  7. import ParserInline from './parser_inline.mjs'
  8. import LinkifyIt from 'linkify-it'
  9. import * as mdurl from 'mdurl'
  10. import punycode from 'punycode.js'
  11. import cfg_default from './presets/default.mjs'
  12. import cfg_zero from './presets/zero.mjs'
  13. import cfg_commonmark from './presets/commonmark.mjs'
  14. const config = {
  15. default: cfg_default,
  16. zero: cfg_zero,
  17. commonmark: cfg_commonmark
  18. }
  19. //
  20. // This validator can prohibit more than really needed to prevent XSS. It's a
  21. // tradeoff to keep code simple and to be secure by default.
  22. //
  23. // If you need different setup - override validator method as you wish. Or
  24. // replace it with dummy function and use external sanitizer.
  25. //
  26. const BAD_PROTO_RE = /^(vbscript|javascript|file|data):/
  27. const GOOD_DATA_RE = /^data:image\/(gif|png|jpeg|webp);/
  28. function validateLink (url) {
  29. // url should be normalized at this point, and existing entities are decoded
  30. const str = url.trim().toLowerCase()
  31. return BAD_PROTO_RE.test(str) ? GOOD_DATA_RE.test(str) : true
  32. }
  33. const RECODE_HOSTNAME_FOR = ['http:', 'https:', 'mailto:']
  34. function normalizeLink (url) {
  35. const parsed = mdurl.parse(url, true)
  36. if (parsed.hostname) {
  37. // Encode hostnames in urls like:
  38. // `http://host/`, `https://host/`, `mailto:user@host`, `//host/`
  39. //
  40. // We don't encode unknown schemas, because it's likely that we encode
  41. // something we shouldn't (e.g. `skype:name` treated as `skype:host`)
  42. //
  43. if (!parsed.protocol || RECODE_HOSTNAME_FOR.indexOf(parsed.protocol) >= 0) {
  44. try {
  45. parsed.hostname = punycode.toASCII(parsed.hostname)
  46. } catch (er) { /**/ }
  47. }
  48. }
  49. return mdurl.encode(mdurl.format(parsed))
  50. }
  51. function normalizeLinkText (url) {
  52. const parsed = mdurl.parse(url, true)
  53. if (parsed.hostname) {
  54. // Encode hostnames in urls like:
  55. // `http://host/`, `https://host/`, `mailto:user@host`, `//host/`
  56. //
  57. // We don't encode unknown schemas, because it's likely that we encode
  58. // something we shouldn't (e.g. `skype:name` treated as `skype:host`)
  59. //
  60. if (!parsed.protocol || RECODE_HOSTNAME_FOR.indexOf(parsed.protocol) >= 0) {
  61. try {
  62. parsed.hostname = punycode.toUnicode(parsed.hostname)
  63. } catch (er) { /**/ }
  64. }
  65. }
  66. // add '%' to exclude list because of https://github.com/markdown-it/markdown-it/issues/720
  67. return mdurl.decode(mdurl.format(parsed), mdurl.decode.defaultChars + '%')
  68. }
  69. /**
  70. * class MarkdownIt
  71. *
  72. * Main parser/renderer class.
  73. *
  74. * ##### Usage
  75. *
  76. * ```javascript
  77. * // node.js, "classic" way:
  78. * var MarkdownIt = require('markdown-it'),
  79. * md = new MarkdownIt();
  80. * var result = md.render('# markdown-it rulezz!');
  81. *
  82. * // node.js, the same, but with sugar:
  83. * var md = require('markdown-it')();
  84. * var result = md.render('# markdown-it rulezz!');
  85. *
  86. * // browser without AMD, added to "window" on script load
  87. * // Note, there are no dash.
  88. * var md = window.markdownit();
  89. * var result = md.render('# markdown-it rulezz!');
  90. * ```
  91. *
  92. * Single line rendering, without paragraph wrap:
  93. *
  94. * ```javascript
  95. * var md = require('markdown-it')();
  96. * var result = md.renderInline('__markdown-it__ rulezz!');
  97. * ```
  98. **/
  99. /**
  100. * new MarkdownIt([presetName, options])
  101. * - presetName (String): optional, `commonmark` / `zero`
  102. * - options (Object)
  103. *
  104. * Creates parser instanse with given config. Can be called without `new`.
  105. *
  106. * ##### presetName
  107. *
  108. * MarkdownIt provides named presets as a convenience to quickly
  109. * enable/disable active syntax rules and options for common use cases.
  110. *
  111. * - ["commonmark"](https://github.com/markdown-it/markdown-it/blob/master/lib/presets/commonmark.mjs) -
  112. * configures parser to strict [CommonMark](http://commonmark.org/) mode.
  113. * - [default](https://github.com/markdown-it/markdown-it/blob/master/lib/presets/default.mjs) -
  114. * similar to GFM, used when no preset name given. Enables all available rules,
  115. * but still without html, typographer & autolinker.
  116. * - ["zero"](https://github.com/markdown-it/markdown-it/blob/master/lib/presets/zero.mjs) -
  117. * all rules disabled. Useful to quickly setup your config via `.enable()`.
  118. * For example, when you need only `bold` and `italic` markup and nothing else.
  119. *
  120. * ##### options:
  121. *
  122. * - __html__ - `false`. Set `true` to enable HTML tags in source. Be careful!
  123. * That's not safe! You may need external sanitizer to protect output from XSS.
  124. * It's better to extend features via plugins, instead of enabling HTML.
  125. * - __xhtmlOut__ - `false`. Set `true` to add '/' when closing single tags
  126. * (`<br />`). This is needed only for full CommonMark compatibility. In real
  127. * world you will need HTML output.
  128. * - __breaks__ - `false`. Set `true` to convert `\n` in paragraphs into `<br>`.
  129. * - __langPrefix__ - `language-`. CSS language class prefix for fenced blocks.
  130. * Can be useful for external highlighters.
  131. * - __linkify__ - `false`. Set `true` to autoconvert URL-like text to links.
  132. * - __typographer__ - `false`. Set `true` to enable [some language-neutral
  133. * replacement](https://github.com/markdown-it/markdown-it/blob/master/lib/rules_core/replacements.mjs) +
  134. * quotes beautification (smartquotes).
  135. * - __quotes__ - `“”‘’`, String or Array. Double + single quotes replacement
  136. * pairs, when typographer enabled and smartquotes on. For example, you can
  137. * use `'«»„“'` for Russian, `'„“‚‘'` for German, and
  138. * `['«\xA0', '\xA0»', '‹\xA0', '\xA0›']` for French (including nbsp).
  139. * - __highlight__ - `null`. Highlighter function for fenced code blocks.
  140. * Highlighter `function (str, lang)` should return escaped HTML. It can also
  141. * return empty string if the source was not changed and should be escaped
  142. * externaly. If result starts with <pre... internal wrapper is skipped.
  143. *
  144. * ##### Example
  145. *
  146. * ```javascript
  147. * // commonmark mode
  148. * var md = require('markdown-it')('commonmark');
  149. *
  150. * // default mode
  151. * var md = require('markdown-it')();
  152. *
  153. * // enable everything
  154. * var md = require('markdown-it')({
  155. * html: true,
  156. * linkify: true,
  157. * typographer: true
  158. * });
  159. * ```
  160. *
  161. * ##### Syntax highlighting
  162. *
  163. * ```js
  164. * var hljs = require('highlight.js') // https://highlightjs.org/
  165. *
  166. * var md = require('markdown-it')({
  167. * highlight: function (str, lang) {
  168. * if (lang && hljs.getLanguage(lang)) {
  169. * try {
  170. * return hljs.highlight(str, { language: lang, ignoreIllegals: true }).value;
  171. * } catch (__) {}
  172. * }
  173. *
  174. * return ''; // use external default escaping
  175. * }
  176. * });
  177. * ```
  178. *
  179. * Or with full wrapper override (if you need assign class to `<pre>` or `<code>`):
  180. *
  181. * ```javascript
  182. * var hljs = require('highlight.js') // https://highlightjs.org/
  183. *
  184. * // Actual default values
  185. * var md = require('markdown-it')({
  186. * highlight: function (str, lang) {
  187. * if (lang && hljs.getLanguage(lang)) {
  188. * try {
  189. * return '<pre><code class="hljs">' +
  190. * hljs.highlight(str, { language: lang, ignoreIllegals: true }).value +
  191. * '</code></pre>';
  192. * } catch (__) {}
  193. * }
  194. *
  195. * return '<pre><code class="hljs">' + md.utils.escapeHtml(str) + '</code></pre>';
  196. * }
  197. * });
  198. * ```
  199. *
  200. **/
  201. function MarkdownIt (presetName, options) {
  202. if (!(this instanceof MarkdownIt)) {
  203. return new MarkdownIt(presetName, options)
  204. }
  205. if (!options) {
  206. if (!utils.isString(presetName)) {
  207. options = presetName || {}
  208. presetName = 'default'
  209. }
  210. }
  211. /**
  212. * MarkdownIt#inline -> ParserInline
  213. *
  214. * Instance of [[ParserInline]]. You may need it to add new rules when
  215. * writing plugins. For simple rules control use [[MarkdownIt.disable]] and
  216. * [[MarkdownIt.enable]].
  217. **/
  218. this.inline = new ParserInline()
  219. /**
  220. * MarkdownIt#block -> ParserBlock
  221. *
  222. * Instance of [[ParserBlock]]. You may need it to add new rules when
  223. * writing plugins. For simple rules control use [[MarkdownIt.disable]] and
  224. * [[MarkdownIt.enable]].
  225. **/
  226. this.block = new ParserBlock()
  227. /**
  228. * MarkdownIt#core -> Core
  229. *
  230. * Instance of [[Core]] chain executor. You may need it to add new rules when
  231. * writing plugins. For simple rules control use [[MarkdownIt.disable]] and
  232. * [[MarkdownIt.enable]].
  233. **/
  234. this.core = new ParserCore()
  235. /**
  236. * MarkdownIt#renderer -> Renderer
  237. *
  238. * Instance of [[Renderer]]. Use it to modify output look. Or to add rendering
  239. * rules for new token types, generated by plugins.
  240. *
  241. * ##### Example
  242. *
  243. * ```javascript
  244. * var md = require('markdown-it')();
  245. *
  246. * function myToken(tokens, idx, options, env, self) {
  247. * //...
  248. * return result;
  249. * };
  250. *
  251. * md.renderer.rules['my_token'] = myToken
  252. * ```
  253. *
  254. * See [[Renderer]] docs and [source code](https://github.com/markdown-it/markdown-it/blob/master/lib/renderer.mjs).
  255. **/
  256. this.renderer = new Renderer()
  257. /**
  258. * MarkdownIt#linkify -> LinkifyIt
  259. *
  260. * [linkify-it](https://github.com/markdown-it/linkify-it) instance.
  261. * Used by [linkify](https://github.com/markdown-it/markdown-it/blob/master/lib/rules_core/linkify.mjs)
  262. * rule.
  263. **/
  264. this.linkify = new LinkifyIt()
  265. /**
  266. * MarkdownIt#validateLink(url) -> Boolean
  267. *
  268. * Link validation function. CommonMark allows too much in links. By default
  269. * we disable `javascript:`, `vbscript:`, `file:` schemas, and almost all `data:...` schemas
  270. * except some embedded image types.
  271. *
  272. * You can change this behaviour:
  273. *
  274. * ```javascript
  275. * var md = require('markdown-it')();
  276. * // enable everything
  277. * md.validateLink = function () { return true; }
  278. * ```
  279. **/
  280. this.validateLink = validateLink
  281. /**
  282. * MarkdownIt#normalizeLink(url) -> String
  283. *
  284. * Function used to encode link url to a machine-readable format,
  285. * which includes url-encoding, punycode, etc.
  286. **/
  287. this.normalizeLink = normalizeLink
  288. /**
  289. * MarkdownIt#normalizeLinkText(url) -> String
  290. *
  291. * Function used to decode link url to a human-readable format`
  292. **/
  293. this.normalizeLinkText = normalizeLinkText
  294. // Expose utils & helpers for easy acces from plugins
  295. /**
  296. * MarkdownIt#utils -> utils
  297. *
  298. * Assorted utility functions, useful to write plugins. See details
  299. * [here](https://github.com/markdown-it/markdown-it/blob/master/lib/common/utils.mjs).
  300. **/
  301. this.utils = utils
  302. /**
  303. * MarkdownIt#helpers -> helpers
  304. *
  305. * Link components parser functions, useful to write plugins. See details
  306. * [here](https://github.com/markdown-it/markdown-it/blob/master/lib/helpers).
  307. **/
  308. this.helpers = utils.assign({}, helpers)
  309. this.options = {}
  310. this.configure(presetName)
  311. if (options) { this.set(options) }
  312. }
  313. /** chainable
  314. * MarkdownIt.set(options)
  315. *
  316. * Set parser options (in the same format as in constructor). Probably, you
  317. * will never need it, but you can change options after constructor call.
  318. *
  319. * ##### Example
  320. *
  321. * ```javascript
  322. * var md = require('markdown-it')()
  323. * .set({ html: true, breaks: true })
  324. * .set({ typographer, true });
  325. * ```
  326. *
  327. * __Note:__ To achieve the best possible performance, don't modify a
  328. * `markdown-it` instance options on the fly. If you need multiple configurations
  329. * it's best to create multiple instances and initialize each with separate
  330. * config.
  331. **/
  332. MarkdownIt.prototype.set = function (options) {
  333. utils.assign(this.options, options)
  334. return this
  335. }
  336. /** chainable, internal
  337. * MarkdownIt.configure(presets)
  338. *
  339. * Batch load of all options and compenent settings. This is internal method,
  340. * and you probably will not need it. But if you will - see available presets
  341. * and data structure [here](https://github.com/markdown-it/markdown-it/tree/master/lib/presets)
  342. *
  343. * We strongly recommend to use presets instead of direct config loads. That
  344. * will give better compatibility with next versions.
  345. **/
  346. MarkdownIt.prototype.configure = function (presets) {
  347. const self = this
  348. if (utils.isString(presets)) {
  349. const presetName = presets
  350. presets = config[presetName]
  351. if (!presets) { throw new Error('Wrong `markdown-it` preset "' + presetName + '", check name') }
  352. }
  353. if (!presets) { throw new Error('Wrong `markdown-it` preset, can\'t be empty') }
  354. if (presets.options) { self.set(presets.options) }
  355. if (presets.components) {
  356. Object.keys(presets.components).forEach(function (name) {
  357. if (presets.components[name].rules) {
  358. self[name].ruler.enableOnly(presets.components[name].rules)
  359. }
  360. if (presets.components[name].rules2) {
  361. self[name].ruler2.enableOnly(presets.components[name].rules2)
  362. }
  363. })
  364. }
  365. return this
  366. }
  367. /** chainable
  368. * MarkdownIt.enable(list, ignoreInvalid)
  369. * - list (String|Array): rule name or list of rule names to enable
  370. * - ignoreInvalid (Boolean): set `true` to ignore errors when rule not found.
  371. *
  372. * Enable list or rules. It will automatically find appropriate components,
  373. * containing rules with given names. If rule not found, and `ignoreInvalid`
  374. * not set - throws exception.
  375. *
  376. * ##### Example
  377. *
  378. * ```javascript
  379. * var md = require('markdown-it')()
  380. * .enable(['sub', 'sup'])
  381. * .disable('smartquotes');
  382. * ```
  383. **/
  384. MarkdownIt.prototype.enable = function (list, ignoreInvalid) {
  385. let result = []
  386. if (!Array.isArray(list)) { list = [list] }
  387. ['core', 'block', 'inline'].forEach(function (chain) {
  388. result = result.concat(this[chain].ruler.enable(list, true))
  389. }, this)
  390. result = result.concat(this.inline.ruler2.enable(list, true))
  391. const missed = list.filter(function (name) { return result.indexOf(name) < 0 })
  392. if (missed.length && !ignoreInvalid) {
  393. throw new Error('MarkdownIt. Failed to enable unknown rule(s): ' + missed)
  394. }
  395. return this
  396. }
  397. /** chainable
  398. * MarkdownIt.disable(list, ignoreInvalid)
  399. * - list (String|Array): rule name or list of rule names to disable.
  400. * - ignoreInvalid (Boolean): set `true` to ignore errors when rule not found.
  401. *
  402. * The same as [[MarkdownIt.enable]], but turn specified rules off.
  403. **/
  404. MarkdownIt.prototype.disable = function (list, ignoreInvalid) {
  405. let result = []
  406. if (!Array.isArray(list)) { list = [list] }
  407. ['core', 'block', 'inline'].forEach(function (chain) {
  408. result = result.concat(this[chain].ruler.disable(list, true))
  409. }, this)
  410. result = result.concat(this.inline.ruler2.disable(list, true))
  411. const missed = list.filter(function (name) { return result.indexOf(name) < 0 })
  412. if (missed.length && !ignoreInvalid) {
  413. throw new Error('MarkdownIt. Failed to disable unknown rule(s): ' + missed)
  414. }
  415. return this
  416. }
  417. /** chainable
  418. * MarkdownIt.use(plugin, params)
  419. *
  420. * Load specified plugin with given params into current parser instance.
  421. * It's just a sugar to call `plugin(md, params)` with curring.
  422. *
  423. * ##### Example
  424. *
  425. * ```javascript
  426. * var iterator = require('markdown-it-for-inline');
  427. * var md = require('markdown-it')()
  428. * .use(iterator, 'foo_replace', 'text', function (tokens, idx) {
  429. * tokens[idx].content = tokens[idx].content.replace(/foo/g, 'bar');
  430. * });
  431. * ```
  432. **/
  433. MarkdownIt.prototype.use = function (plugin /*, params, ... */) {
  434. const args = [this].concat(Array.prototype.slice.call(arguments, 1))
  435. plugin.apply(plugin, args)
  436. return this
  437. }
  438. /** internal
  439. * MarkdownIt.parse(src, env) -> Array
  440. * - src (String): source string
  441. * - env (Object): environment sandbox
  442. *
  443. * Parse input string and return list of block tokens (special token type
  444. * "inline" will contain list of inline tokens). You should not call this
  445. * method directly, until you write custom renderer (for example, to produce
  446. * AST).
  447. *
  448. * `env` is used to pass data between "distributed" rules and return additional
  449. * metadata like reference info, needed for the renderer. It also can be used to
  450. * inject data in specific cases. Usually, you will be ok to pass `{}`,
  451. * and then pass updated object to renderer.
  452. **/
  453. MarkdownIt.prototype.parse = function (src, env) {
  454. if (typeof src !== 'string') {
  455. throw new Error('Input data should be a String')
  456. }
  457. const state = new this.core.State(src, this, env)
  458. this.core.process(state)
  459. return state.tokens
  460. }
  461. /**
  462. * MarkdownIt.render(src [, env]) -> String
  463. * - src (String): source string
  464. * - env (Object): environment sandbox
  465. *
  466. * Render markdown string into html. It does all magic for you :).
  467. *
  468. * `env` can be used to inject additional metadata (`{}` by default).
  469. * But you will not need it with high probability. See also comment
  470. * in [[MarkdownIt.parse]].
  471. **/
  472. MarkdownIt.prototype.render = function (src, env) {
  473. env = env || {}
  474. return this.renderer.render(this.parse(src, env), this.options, env)
  475. }
  476. /** internal
  477. * MarkdownIt.parseInline(src, env) -> Array
  478. * - src (String): source string
  479. * - env (Object): environment sandbox
  480. *
  481. * The same as [[MarkdownIt.parse]] but skip all block rules. It returns the
  482. * block tokens list with the single `inline` element, containing parsed inline
  483. * tokens in `children` property. Also updates `env` object.
  484. **/
  485. MarkdownIt.prototype.parseInline = function (src, env) {
  486. const state = new this.core.State(src, this, env)
  487. state.inlineMode = true
  488. this.core.process(state)
  489. return state.tokens
  490. }
  491. /**
  492. * MarkdownIt.renderInline(src [, env]) -> String
  493. * - src (String): source string
  494. * - env (Object): environment sandbox
  495. *
  496. * Similar to [[MarkdownIt.render]] but for single paragraph content. Result
  497. * will NOT be wrapped into `<p>` tags.
  498. **/
  499. MarkdownIt.prototype.renderInline = function (src, env) {
  500. env = env || {}
  501. return this.renderer.render(this.parseInline(src, env), this.options, env)
  502. }
  503. export default MarkdownIt