123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201 |
- // Convert straight quotation marks to typographic ones
- //
- 'use strict';
- var isWhiteSpace = require('../common/utils').isWhiteSpace;
- var isPunctChar = require('../common/utils').isPunctChar;
- var isMdAsciiPunct = require('../common/utils').isMdAsciiPunct;
- var QUOTE_TEST_RE = /['"]/;
- var QUOTE_RE = /['"]/g;
- var APOSTROPHE = '\u2019'; /* ’ */
- function replaceAt(str, index, ch) {
- return str.substr(0, index) + ch + str.substr(index + 1);
- }
- function process_inlines(tokens, state) {
- var i, token, text, t, pos, max, thisLevel, item, lastChar, nextChar,
- isLastPunctChar, isNextPunctChar, isLastWhiteSpace, isNextWhiteSpace,
- canOpen, canClose, j, isSingle, stack, openQuote, closeQuote;
- stack = [];
- for (i = 0; i < tokens.length; i++) {
- token = tokens[i];
- thisLevel = tokens[i].level;
- for (j = stack.length - 1; j >= 0; j--) {
- if (stack[j].level <= thisLevel) { break; }
- }
- stack.length = j + 1;
- if (token.type !== 'text') { continue; }
- text = token.content;
- pos = 0;
- max = text.length;
- /*eslint no-labels:0,block-scoped-var:0*/
- OUTER:
- while (pos < max) {
- QUOTE_RE.lastIndex = pos;
- t = QUOTE_RE.exec(text);
- if (!t) { break; }
- canOpen = canClose = true;
- pos = t.index + 1;
- isSingle = (t[0] === "'");
- // Find previous character,
- // default to space if it's the beginning of the line
- //
- lastChar = 0x20;
- if (t.index - 1 >= 0) {
- lastChar = text.charCodeAt(t.index - 1);
- } else {
- for (j = i - 1; j >= 0; j--) {
- if (tokens[j].type === 'softbreak' || tokens[j].type === 'hardbreak') break; // lastChar defaults to 0x20
- if (!tokens[j].content) continue; // should skip all tokens except 'text', 'html_inline' or 'code_inline'
- lastChar = tokens[j].content.charCodeAt(tokens[j].content.length - 1);
- break;
- }
- }
- // Find next character,
- // default to space if it's the end of the line
- //
- nextChar = 0x20;
- if (pos < max) {
- nextChar = text.charCodeAt(pos);
- } else {
- for (j = i + 1; j < tokens.length; j++) {
- if (tokens[j].type === 'softbreak' || tokens[j].type === 'hardbreak') break; // nextChar defaults to 0x20
- if (!tokens[j].content) continue; // should skip all tokens except 'text', 'html_inline' or 'code_inline'
- nextChar = tokens[j].content.charCodeAt(0);
- break;
- }
- }
- isLastPunctChar = isMdAsciiPunct(lastChar) || isPunctChar(String.fromCharCode(lastChar));
- isNextPunctChar = isMdAsciiPunct(nextChar) || isPunctChar(String.fromCharCode(nextChar));
- isLastWhiteSpace = isWhiteSpace(lastChar);
- isNextWhiteSpace = isWhiteSpace(nextChar);
- if (isNextWhiteSpace) {
- canOpen = false;
- } else if (isNextPunctChar) {
- if (!(isLastWhiteSpace || isLastPunctChar)) {
- canOpen = false;
- }
- }
- if (isLastWhiteSpace) {
- canClose = false;
- } else if (isLastPunctChar) {
- if (!(isNextWhiteSpace || isNextPunctChar)) {
- canClose = false;
- }
- }
- if (nextChar === 0x22 /* " */ && t[0] === '"') {
- if (lastChar >= 0x30 /* 0 */ && lastChar <= 0x39 /* 9 */) {
- // special case: 1"" - count first quote as an inch
- canClose = canOpen = false;
- }
- }
- if (canOpen && canClose) {
- // Replace quotes in the middle of punctuation sequence, but not
- // in the middle of the words, i.e.:
- //
- // 1. foo " bar " baz - not replaced
- // 2. foo-"-bar-"-baz - replaced
- // 3. foo"bar"baz - not replaced
- //
- canOpen = isLastPunctChar;
- canClose = isNextPunctChar;
- }
- if (!canOpen && !canClose) {
- // middle of word
- if (isSingle) {
- token.content = replaceAt(token.content, t.index, APOSTROPHE);
- }
- continue;
- }
- if (canClose) {
- // this could be a closing quote, rewind the stack to get a match
- for (j = stack.length - 1; j >= 0; j--) {
- item = stack[j];
- if (stack[j].level < thisLevel) { break; }
- if (item.single === isSingle && stack[j].level === thisLevel) {
- item = stack[j];
- if (isSingle) {
- openQuote = state.md.options.quotes[2];
- closeQuote = state.md.options.quotes[3];
- } else {
- openQuote = state.md.options.quotes[0];
- closeQuote = state.md.options.quotes[1];
- }
- // replace token.content *before* tokens[item.token].content,
- // because, if they are pointing at the same token, replaceAt
- // could mess up indices when quote length != 1
- token.content = replaceAt(token.content, t.index, closeQuote);
- tokens[item.token].content = replaceAt(
- tokens[item.token].content, item.pos, openQuote);
- pos += closeQuote.length - 1;
- if (item.token === i) { pos += openQuote.length - 1; }
- text = token.content;
- max = text.length;
- stack.length = j;
- continue OUTER;
- }
- }
- }
- if (canOpen) {
- stack.push({
- token: i,
- pos: t.index,
- single: isSingle,
- level: thisLevel
- });
- } else if (canClose && isSingle) {
- token.content = replaceAt(token.content, t.index, APOSTROPHE);
- }
- }
- }
- }
- module.exports = function smartquotes(state) {
- /*eslint max-depth:0*/
- var blkIdx;
- if (!state.md.options.typographer) { return; }
- for (blkIdx = state.tokens.length - 1; blkIdx >= 0; blkIdx--) {
- if (state.tokens[blkIdx].type !== 'inline' ||
- !QUOTE_TEST_RE.test(state.tokens[blkIdx].content)) {
- continue;
- }
- process_inlines(state.tokens[blkIdx].children, state);
- }
- };
|