index.js 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. // Enclose abbreviations in <abbr> tags
  2. //
  3. 'use strict';
  4. module.exports = function sub_plugin(md) {
  5. var escapeRE = md.utils.escapeRE,
  6. arrayReplaceAt = md.utils.arrayReplaceAt;
  7. // ASCII characters in Cc, Sc, Sm, Sk categories we should terminate on;
  8. // you can check character classes here:
  9. // http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
  10. var OTHER_CHARS = ' \r\n$+<=>^`|~';
  11. var UNICODE_PUNCT_RE = md.utils.lib.ucmicro.P.source;
  12. var UNICODE_SPACE_RE = md.utils.lib.ucmicro.Z.source;
  13. function abbr_def(state, startLine, endLine, silent) {
  14. var label, title, ch, labelStart, labelEnd,
  15. pos = state.bMarks[startLine] + state.tShift[startLine],
  16. max = state.eMarks[startLine];
  17. if (pos + 2 >= max) { return false; }
  18. if (state.src.charCodeAt(pos++) !== 0x2A/* * */) { return false; }
  19. if (state.src.charCodeAt(pos++) !== 0x5B/* [ */) { return false; }
  20. labelStart = pos;
  21. for (; pos < max; pos++) {
  22. ch = state.src.charCodeAt(pos);
  23. if (ch === 0x5B /* [ */) {
  24. return false;
  25. } else if (ch === 0x5D /* ] */) {
  26. labelEnd = pos;
  27. break;
  28. } else if (ch === 0x5C /* \ */) {
  29. pos++;
  30. }
  31. }
  32. if (labelEnd < 0 || state.src.charCodeAt(labelEnd + 1) !== 0x3A/* : */) {
  33. return false;
  34. }
  35. if (silent) { return true; }
  36. label = state.src.slice(labelStart, labelEnd).replace(/\\(.)/g, '$1');
  37. title = state.src.slice(labelEnd + 2, max).trim();
  38. if (label.length === 0) { return false; }
  39. if (title.length === 0) { return false; }
  40. if (!state.env.abbreviations) { state.env.abbreviations = {}; }
  41. // prepend ':' to avoid conflict with Object.prototype members
  42. if (typeof state.env.abbreviations[':' + label] === 'undefined') {
  43. state.env.abbreviations[':' + label] = title;
  44. }
  45. state.line = startLine + 1;
  46. return true;
  47. }
  48. function abbr_replace(state) {
  49. var i, j, l, tokens, token, text, nodes, pos, reg, m, regText, regSimple,
  50. currentToken,
  51. blockTokens = state.tokens;
  52. if (!state.env.abbreviations) { return; }
  53. regSimple = new RegExp('(?:' +
  54. Object.keys(state.env.abbreviations).map(function (x) {
  55. return x.substr(1);
  56. }).sort(function (a, b) {
  57. return b.length - a.length;
  58. }).map(escapeRE).join('|') +
  59. ')');
  60. regText = '(^|' + UNICODE_PUNCT_RE + '|' + UNICODE_SPACE_RE +
  61. '|[' + OTHER_CHARS.split('').map(escapeRE).join('') + '])'
  62. + '(' + Object.keys(state.env.abbreviations).map(function (x) {
  63. return x.substr(1);
  64. }).sort(function (a, b) {
  65. return b.length - a.length;
  66. }).map(escapeRE).join('|') + ')'
  67. + '($|' + UNICODE_PUNCT_RE + '|' + UNICODE_SPACE_RE +
  68. '|[' + OTHER_CHARS.split('').map(escapeRE).join('') + '])';
  69. reg = new RegExp(regText, 'g');
  70. for (j = 0, l = blockTokens.length; j < l; j++) {
  71. if (blockTokens[j].type !== 'inline') { continue; }
  72. tokens = blockTokens[j].children;
  73. // We scan from the end, to keep position when new tags added.
  74. for (i = tokens.length - 1; i >= 0; i--) {
  75. currentToken = tokens[i];
  76. if (currentToken.type !== 'text') { continue; }
  77. pos = 0;
  78. text = currentToken.content;
  79. reg.lastIndex = 0;
  80. nodes = [];
  81. // fast regexp run to determine whether there are any abbreviated words
  82. // in the current token
  83. if (!regSimple.test(text)) { continue; }
  84. while ((m = reg.exec(text))) {
  85. if (m.index > 0 || m[1].length > 0) {
  86. token = new state.Token('text', '', 0);
  87. token.content = text.slice(pos, m.index + m[1].length);
  88. nodes.push(token);
  89. }
  90. token = new state.Token('abbr_open', 'abbr', 1);
  91. token.attrs = [ [ 'title', state.env.abbreviations[':' + m[2]] ] ];
  92. nodes.push(token);
  93. token = new state.Token('text', '', 0);
  94. token.content = m[2];
  95. nodes.push(token);
  96. token = new state.Token('abbr_close', 'abbr', -1);
  97. nodes.push(token);
  98. reg.lastIndex -= m[3].length;
  99. pos = reg.lastIndex;
  100. }
  101. if (!nodes.length) { continue; }
  102. if (pos < text.length) {
  103. token = new state.Token('text', '', 0);
  104. token.content = text.slice(pos);
  105. nodes.push(token);
  106. }
  107. // replace current node
  108. blockTokens[j].children = tokens = arrayReplaceAt(tokens, i, nodes);
  109. }
  110. }
  111. }
  112. md.block.ruler.before('reference', 'abbr_def', abbr_def, { alt: [ 'paragraph', 'reference' ] });
  113. md.core.ruler.after('linkify', 'abbr_replace', abbr_replace);
  114. };