smartquotes.mjs 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193
  1. // Convert straight quotation marks to typographic ones
  2. //
  3. import { isWhiteSpace, isPunctChar, isMdAsciiPunct } from '../common/utils.mjs'
  4. const QUOTE_TEST_RE = /['"]/
  5. const QUOTE_RE = /['"]/g
  6. const APOSTROPHE = '\u2019' /* ’ */
  7. function replaceAt (str, index, ch) {
  8. return str.slice(0, index) + ch + str.slice(index + 1)
  9. }
  10. function process_inlines (tokens, state) {
  11. let j
  12. const stack = []
  13. for (let i = 0; i < tokens.length; i++) {
  14. const token = tokens[i]
  15. const thisLevel = tokens[i].level
  16. for (j = stack.length - 1; j >= 0; j--) {
  17. if (stack[j].level <= thisLevel) { break }
  18. }
  19. stack.length = j + 1
  20. if (token.type !== 'text') { continue }
  21. let text = token.content
  22. let pos = 0
  23. let max = text.length
  24. /* eslint no-labels:0,block-scoped-var:0 */
  25. OUTER:
  26. while (pos < max) {
  27. QUOTE_RE.lastIndex = pos
  28. const t = QUOTE_RE.exec(text)
  29. if (!t) { break }
  30. let canOpen = true
  31. let canClose = true
  32. pos = t.index + 1
  33. const isSingle = (t[0] === "'")
  34. // Find previous character,
  35. // default to space if it's the beginning of the line
  36. //
  37. let lastChar = 0x20
  38. if (t.index - 1 >= 0) {
  39. lastChar = text.charCodeAt(t.index - 1)
  40. } else {
  41. for (j = i - 1; j >= 0; j--) {
  42. if (tokens[j].type === 'softbreak' || tokens[j].type === 'hardbreak') break // lastChar defaults to 0x20
  43. if (!tokens[j].content) continue // should skip all tokens except 'text', 'html_inline' or 'code_inline'
  44. lastChar = tokens[j].content.charCodeAt(tokens[j].content.length - 1)
  45. break
  46. }
  47. }
  48. // Find next character,
  49. // default to space if it's the end of the line
  50. //
  51. let nextChar = 0x20
  52. if (pos < max) {
  53. nextChar = text.charCodeAt(pos)
  54. } else {
  55. for (j = i + 1; j < tokens.length; j++) {
  56. if (tokens[j].type === 'softbreak' || tokens[j].type === 'hardbreak') break // nextChar defaults to 0x20
  57. if (!tokens[j].content) continue // should skip all tokens except 'text', 'html_inline' or 'code_inline'
  58. nextChar = tokens[j].content.charCodeAt(0)
  59. break
  60. }
  61. }
  62. const isLastPunctChar = isMdAsciiPunct(lastChar) || isPunctChar(String.fromCharCode(lastChar))
  63. const isNextPunctChar = isMdAsciiPunct(nextChar) || isPunctChar(String.fromCharCode(nextChar))
  64. const isLastWhiteSpace = isWhiteSpace(lastChar)
  65. const isNextWhiteSpace = isWhiteSpace(nextChar)
  66. if (isNextWhiteSpace) {
  67. canOpen = false
  68. } else if (isNextPunctChar) {
  69. if (!(isLastWhiteSpace || isLastPunctChar)) {
  70. canOpen = false
  71. }
  72. }
  73. if (isLastWhiteSpace) {
  74. canClose = false
  75. } else if (isLastPunctChar) {
  76. if (!(isNextWhiteSpace || isNextPunctChar)) {
  77. canClose = false
  78. }
  79. }
  80. if (nextChar === 0x22 /* " */ && t[0] === '"') {
  81. if (lastChar >= 0x30 /* 0 */ && lastChar <= 0x39 /* 9 */) {
  82. // special case: 1"" - count first quote as an inch
  83. canClose = canOpen = false
  84. }
  85. }
  86. if (canOpen && canClose) {
  87. // Replace quotes in the middle of punctuation sequence, but not
  88. // in the middle of the words, i.e.:
  89. //
  90. // 1. foo " bar " baz - not replaced
  91. // 2. foo-"-bar-"-baz - replaced
  92. // 3. foo"bar"baz - not replaced
  93. //
  94. canOpen = isLastPunctChar
  95. canClose = isNextPunctChar
  96. }
  97. if (!canOpen && !canClose) {
  98. // middle of word
  99. if (isSingle) {
  100. token.content = replaceAt(token.content, t.index, APOSTROPHE)
  101. }
  102. continue
  103. }
  104. if (canClose) {
  105. // this could be a closing quote, rewind the stack to get a match
  106. for (j = stack.length - 1; j >= 0; j--) {
  107. let item = stack[j]
  108. if (stack[j].level < thisLevel) { break }
  109. if (item.single === isSingle && stack[j].level === thisLevel) {
  110. item = stack[j]
  111. let openQuote
  112. let closeQuote
  113. if (isSingle) {
  114. openQuote = state.md.options.quotes[2]
  115. closeQuote = state.md.options.quotes[3]
  116. } else {
  117. openQuote = state.md.options.quotes[0]
  118. closeQuote = state.md.options.quotes[1]
  119. }
  120. // replace token.content *before* tokens[item.token].content,
  121. // because, if they are pointing at the same token, replaceAt
  122. // could mess up indices when quote length != 1
  123. token.content = replaceAt(token.content, t.index, closeQuote)
  124. tokens[item.token].content = replaceAt(
  125. tokens[item.token].content, item.pos, openQuote)
  126. pos += closeQuote.length - 1
  127. if (item.token === i) { pos += openQuote.length - 1 }
  128. text = token.content
  129. max = text.length
  130. stack.length = j
  131. continue OUTER
  132. }
  133. }
  134. }
  135. if (canOpen) {
  136. stack.push({
  137. token: i,
  138. pos: t.index,
  139. single: isSingle,
  140. level: thisLevel
  141. })
  142. } else if (canClose && isSingle) {
  143. token.content = replaceAt(token.content, t.index, APOSTROPHE)
  144. }
  145. }
  146. }
  147. }
  148. export default function smartquotes (state) {
  149. /* eslint max-depth:0 */
  150. if (!state.md.options.typographer) { return }
  151. for (let blkIdx = state.tokens.length - 1; blkIdx >= 0; blkIdx--) {
  152. if (state.tokens[blkIdx].type !== 'inline' ||
  153. !QUOTE_TEST_RE.test(state.tokens[blkIdx].content)) {
  154. continue
  155. }
  156. process_inlines(state.tokens[blkIdx].children, state)
  157. }
  158. }