parse.js 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501
  1. var Tokenizer = require('./tokenizer');
  2. var TAB = 9;
  3. var N = 10;
  4. var F = 12;
  5. var R = 13;
  6. var SPACE = 32;
  7. var EXCLAMATIONMARK = 33; // !
  8. var NUMBERSIGN = 35; // #
  9. var AMPERSAND = 38; // &
  10. var APOSTROPHE = 39; // '
  11. var LEFTPARENTHESIS = 40; // (
  12. var RIGHTPARENTHESIS = 41; // )
  13. var ASTERISK = 42; // *
  14. var PLUSSIGN = 43; // +
  15. var COMMA = 44; // ,
  16. var LESSTHANSIGN = 60; // <
  17. var GREATERTHANSIGN = 62; // >
  18. var QUESTIONMARK = 63; // ?
  19. var COMMERCIALAT = 64; // @
  20. var LEFTSQUAREBRACKET = 91; // [
  21. var RIGHTSQUAREBRACKET = 93; // ]
  22. var LEFTCURLYBRACKET = 123; // {
  23. var VERTICALLINE = 124; // |
  24. var RIGHTCURLYBRACKET = 125; // }
  25. var NAME_CHAR = createCharMap(function(ch) {
  26. return /[a-zA-Z0-9\-]/.test(ch);
  27. });
  28. var COMBINATOR_PRECEDENCE = {
  29. ' ': 1,
  30. '&&': 2,
  31. '||': 3,
  32. '|': 4
  33. };
  34. function createCharMap(fn) {
  35. var array = typeof Uint32Array === 'function' ? new Uint32Array(128) : new Array(128);
  36. for (var i = 0; i < 128; i++) {
  37. array[i] = fn(String.fromCharCode(i)) ? 1 : 0;
  38. }
  39. return array;
  40. }
  41. function scanSpaces(tokenizer) {
  42. return tokenizer.substringToPos(
  43. tokenizer.findWsEnd(tokenizer.pos + 1)
  44. );
  45. }
  46. function scanWord(tokenizer) {
  47. var end = tokenizer.pos;
  48. for (; end < tokenizer.str.length; end++) {
  49. var code = tokenizer.str.charCodeAt(end);
  50. if (code >= 128 || NAME_CHAR[code] === 0) {
  51. break;
  52. }
  53. }
  54. if (tokenizer.pos === end) {
  55. tokenizer.error('Expect a keyword');
  56. }
  57. return tokenizer.substringToPos(end);
  58. }
  59. function scanNumber(tokenizer) {
  60. var end = tokenizer.pos;
  61. for (; end < tokenizer.str.length; end++) {
  62. var code = tokenizer.str.charCodeAt(end);
  63. if (code < 48 || code > 57) {
  64. break;
  65. }
  66. }
  67. if (tokenizer.pos === end) {
  68. tokenizer.error('Expect a number');
  69. }
  70. return tokenizer.substringToPos(end);
  71. }
  72. function scanString(tokenizer) {
  73. var end = tokenizer.str.indexOf('\'', tokenizer.pos + 1);
  74. if (end === -1) {
  75. tokenizer.pos = tokenizer.str.length;
  76. tokenizer.error('Expect an apostrophe');
  77. }
  78. return tokenizer.substringToPos(end + 1);
  79. }
  80. function readMultiplierRange(tokenizer) {
  81. var min = null;
  82. var max = null;
  83. tokenizer.eat(LEFTCURLYBRACKET);
  84. min = scanNumber(tokenizer);
  85. if (tokenizer.charCode() === COMMA) {
  86. tokenizer.pos++;
  87. if (tokenizer.charCode() !== RIGHTCURLYBRACKET) {
  88. max = scanNumber(tokenizer);
  89. }
  90. } else {
  91. max = min;
  92. }
  93. tokenizer.eat(RIGHTCURLYBRACKET);
  94. return {
  95. min: Number(min),
  96. max: max ? Number(max) : 0
  97. };
  98. }
  99. function readMultiplier(tokenizer) {
  100. var range = null;
  101. var comma = false;
  102. switch (tokenizer.charCode()) {
  103. case ASTERISK:
  104. tokenizer.pos++;
  105. range = {
  106. min: 0,
  107. max: 0
  108. };
  109. break;
  110. case PLUSSIGN:
  111. tokenizer.pos++;
  112. range = {
  113. min: 1,
  114. max: 0
  115. };
  116. break;
  117. case QUESTIONMARK:
  118. tokenizer.pos++;
  119. range = {
  120. min: 0,
  121. max: 1
  122. };
  123. break;
  124. case NUMBERSIGN:
  125. tokenizer.pos++;
  126. comma = true;
  127. if (tokenizer.charCode() === LEFTCURLYBRACKET) {
  128. range = readMultiplierRange(tokenizer);
  129. } else {
  130. range = {
  131. min: 1,
  132. max: 0
  133. };
  134. }
  135. break;
  136. case LEFTCURLYBRACKET:
  137. range = readMultiplierRange(tokenizer);
  138. break;
  139. default:
  140. return null;
  141. }
  142. return {
  143. type: 'Multiplier',
  144. comma: comma,
  145. min: range.min,
  146. max: range.max,
  147. term: null
  148. };
  149. }
  150. function maybeMultiplied(tokenizer, node) {
  151. var multiplier = readMultiplier(tokenizer);
  152. if (multiplier !== null) {
  153. multiplier.term = node;
  154. return multiplier;
  155. }
  156. return node;
  157. }
  158. function maybeToken(tokenizer) {
  159. var ch = tokenizer.peek();
  160. if (ch === '') {
  161. return null;
  162. }
  163. return {
  164. type: 'Token',
  165. value: ch
  166. };
  167. }
  168. function readProperty(tokenizer) {
  169. var name;
  170. tokenizer.eat(LESSTHANSIGN);
  171. tokenizer.eat(APOSTROPHE);
  172. name = scanWord(tokenizer);
  173. tokenizer.eat(APOSTROPHE);
  174. tokenizer.eat(GREATERTHANSIGN);
  175. return maybeMultiplied(tokenizer, {
  176. type: 'Property',
  177. name: name
  178. });
  179. }
  180. function readType(tokenizer) {
  181. var name;
  182. tokenizer.eat(LESSTHANSIGN);
  183. name = scanWord(tokenizer);
  184. if (tokenizer.charCode() === LEFTPARENTHESIS &&
  185. tokenizer.nextCharCode() === RIGHTPARENTHESIS) {
  186. tokenizer.pos += 2;
  187. name += '()';
  188. }
  189. tokenizer.eat(GREATERTHANSIGN);
  190. return maybeMultiplied(tokenizer, {
  191. type: 'Type',
  192. name: name
  193. });
  194. }
  195. function readKeywordOrFunction(tokenizer) {
  196. var name;
  197. name = scanWord(tokenizer);
  198. if (tokenizer.charCode() === LEFTPARENTHESIS) {
  199. tokenizer.pos++;
  200. return {
  201. type: 'Function',
  202. name: name
  203. };
  204. }
  205. return maybeMultiplied(tokenizer, {
  206. type: 'Keyword',
  207. name: name
  208. });
  209. }
  210. function regroupTerms(terms, combinators) {
  211. function createGroup(terms, combinator) {
  212. return {
  213. type: 'Group',
  214. terms: terms,
  215. combinator: combinator,
  216. disallowEmpty: false,
  217. explicit: false
  218. };
  219. }
  220. combinators = Object.keys(combinators).sort(function(a, b) {
  221. return COMBINATOR_PRECEDENCE[a] - COMBINATOR_PRECEDENCE[b];
  222. });
  223. while (combinators.length > 0) {
  224. var combinator = combinators.shift();
  225. for (var i = 0, subgroupStart = 0; i < terms.length; i++) {
  226. var term = terms[i];
  227. if (term.type === 'Combinator') {
  228. if (term.value === combinator) {
  229. if (subgroupStart === -1) {
  230. subgroupStart = i - 1;
  231. }
  232. terms.splice(i, 1);
  233. i--;
  234. } else {
  235. if (subgroupStart !== -1 && i - subgroupStart > 1) {
  236. terms.splice(
  237. subgroupStart,
  238. i - subgroupStart,
  239. createGroup(terms.slice(subgroupStart, i), combinator)
  240. );
  241. i = subgroupStart + 1;
  242. }
  243. subgroupStart = -1;
  244. }
  245. }
  246. }
  247. if (subgroupStart !== -1 && combinators.length) {
  248. terms.splice(
  249. subgroupStart,
  250. i - subgroupStart,
  251. createGroup(terms.slice(subgroupStart, i), combinator)
  252. );
  253. }
  254. }
  255. return combinator;
  256. }
  257. function readImplicitGroup(tokenizer) {
  258. var terms = [];
  259. var combinators = {};
  260. var token;
  261. var prevToken = null;
  262. var prevTokenPos = tokenizer.pos;
  263. while (token = peek(tokenizer)) {
  264. if (token.type !== 'Spaces') {
  265. if (token.type === 'Combinator') {
  266. // check for combinator in group beginning and double combinator sequence
  267. if (prevToken === null || prevToken.type === 'Combinator') {
  268. tokenizer.pos = prevTokenPos;
  269. tokenizer.error('Unexpected combinator');
  270. }
  271. combinators[token.value] = true;
  272. } else if (prevToken !== null && prevToken.type !== 'Combinator') {
  273. combinators[' '] = true; // a b
  274. terms.push({
  275. type: 'Combinator',
  276. value: ' '
  277. });
  278. }
  279. terms.push(token);
  280. prevToken = token;
  281. prevTokenPos = tokenizer.pos;
  282. }
  283. }
  284. // check for combinator in group ending
  285. if (prevToken !== null && prevToken.type === 'Combinator') {
  286. tokenizer.pos -= prevTokenPos;
  287. tokenizer.error('Unexpected combinator');
  288. }
  289. return {
  290. type: 'Group',
  291. terms: terms,
  292. combinator: regroupTerms(terms, combinators) || ' ',
  293. disallowEmpty: false,
  294. explicit: false
  295. };
  296. }
  297. function readGroup(tokenizer) {
  298. var result;
  299. tokenizer.eat(LEFTSQUAREBRACKET);
  300. result = readImplicitGroup(tokenizer);
  301. tokenizer.eat(RIGHTSQUAREBRACKET);
  302. result.explicit = true;
  303. if (tokenizer.charCode() === EXCLAMATIONMARK) {
  304. tokenizer.pos++;
  305. result.disallowEmpty = true;
  306. }
  307. return result;
  308. }
  309. function peek(tokenizer) {
  310. var code = tokenizer.charCode();
  311. if (code < 128 && NAME_CHAR[code] === 1) {
  312. return readKeywordOrFunction(tokenizer);
  313. }
  314. switch (code) {
  315. case RIGHTSQUAREBRACKET:
  316. // don't eat, stop scan a group
  317. break;
  318. case LEFTSQUAREBRACKET:
  319. return maybeMultiplied(tokenizer, readGroup(tokenizer));
  320. case LESSTHANSIGN:
  321. return tokenizer.nextCharCode() === APOSTROPHE
  322. ? readProperty(tokenizer)
  323. : readType(tokenizer);
  324. case VERTICALLINE:
  325. return {
  326. type: 'Combinator',
  327. value: tokenizer.substringToPos(
  328. tokenizer.nextCharCode() === VERTICALLINE
  329. ? tokenizer.pos + 2
  330. : tokenizer.pos + 1
  331. )
  332. };
  333. case AMPERSAND:
  334. tokenizer.pos++;
  335. tokenizer.eat(AMPERSAND);
  336. return {
  337. type: 'Combinator',
  338. value: '&&'
  339. };
  340. case COMMA:
  341. tokenizer.pos++;
  342. return {
  343. type: 'Comma'
  344. };
  345. case APOSTROPHE:
  346. return maybeMultiplied(tokenizer, {
  347. type: 'String',
  348. value: scanString(tokenizer)
  349. });
  350. case SPACE:
  351. case TAB:
  352. case N:
  353. case R:
  354. case F:
  355. return {
  356. type: 'Spaces',
  357. value: scanSpaces(tokenizer)
  358. };
  359. case COMMERCIALAT:
  360. code = tokenizer.nextCharCode();
  361. if (code < 128 && NAME_CHAR[code] === 1) {
  362. tokenizer.pos++;
  363. return {
  364. type: 'AtKeyword',
  365. name: scanWord(tokenizer)
  366. };
  367. }
  368. return maybeToken(tokenizer);
  369. case ASTERISK:
  370. case PLUSSIGN:
  371. case QUESTIONMARK:
  372. case NUMBERSIGN:
  373. case EXCLAMATIONMARK:
  374. // prohibited tokens (used as a multiplier start)
  375. break;
  376. case LEFTCURLYBRACKET:
  377. // LEFTCURLYBRACKET is allowed since mdn/data uses it w/o quoting
  378. // check next char isn't a number, because it's likely a disjoined multiplier
  379. code = tokenizer.nextCharCode();
  380. if (code < 48 || code > 57) {
  381. return maybeToken(tokenizer);
  382. }
  383. break;
  384. default:
  385. return maybeToken(tokenizer);
  386. }
  387. }
  388. function parse(str) {
  389. var tokenizer = new Tokenizer(str);
  390. var result = readImplicitGroup(tokenizer);
  391. if (tokenizer.pos !== str.length) {
  392. tokenizer.error('Unexpected input');
  393. }
  394. // reduce redundant groups with single group term
  395. if (result.terms.length === 1 && result.terms[0].type === 'Group') {
  396. result = result.terms[0];
  397. }
  398. return result;
  399. }
  400. // warm up parse to elimitate code branches that never execute
  401. // fix soft deoptimizations (insufficient type feedback)
  402. parse('[a&&<b>#|<\'c\'>*||e() f{2} /,(% g#{1,2} h{2,})]!');
  403. module.exports = parse;