regex-tokeniser.test.js 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. var RegexTokeniser = require("../lib/regex-tokeniser").RegexTokeniser;
  2. var Token = require("../lib/Token");
  3. var StringSource = require("../lib/StringSource");
  4. exports.emptyStringIsTokenisedToEndToken = stringIsTokenisedTo("", [
  5. endToken("")
  6. ]);
  7. exports.canMatchSingleToken = stringIsTokenisedTo("blah", [
  8. new Token("identifier", "blah", stringSourceRange("blah", 0, 4)),
  9. endToken("blah")
  10. ]);
  11. exports.canMatchMultipleTokens = stringIsTokenisedTo("a.btn", [
  12. new Token("identifier", "a", stringSourceRange("a.btn", 0, 1)),
  13. new Token("dot", ".", stringSourceRange("a.btn", 1, 2)),
  14. new Token("identifier", "btn", stringSourceRange("a.btn", 2, 5)),
  15. endToken("a.btn")
  16. ]);
  17. exports.unrecognisedCharactersAreTokenised = stringIsTokenisedTo("!btn", [
  18. new Token("unrecognisedCharacter", "!", stringSourceRange("!btn", 0, 1)),
  19. new Token("identifier", "btn", stringSourceRange("!btn", 1, 4)),
  20. endToken("!btn")
  21. ]);
  22. exports.firstMatchingRuleIsUsed = stringIsTokenisedTo(":", [
  23. new Token("colon1", ":", stringSourceRange(":", 0, 1)),
  24. endToken(":")
  25. ]);
  26. exports.valuesOfZeroLengthAreIgnored = function(test) {
  27. var expectedTokens = [
  28. new Token("unrecognisedCharacter", "!", stringSourceRange("!btn", 0, 1)),
  29. new Token("identifier", "btn", stringSourceRange("!btn", 1, 4)),
  30. endToken("!btn")
  31. ];
  32. var rules = [
  33. {
  34. name: "identifier",
  35. regex: /([a-z]*)/
  36. }
  37. ];
  38. var tokeniser = new RegexTokeniser(rules);
  39. test.deepEqual(expectedTokens, tokeniser.tokenise("!btn"));
  40. test.done();
  41. };
  42. exports.tokenValueIsFirstCaptureOfRegex = stringIsTokenisedTo('"a"', [
  43. new Token("string", "a", stringSourceRange('"a"', 0, 3)),
  44. endToken('"a"')
  45. ]);
  46. exports.tokenWithNoCaptureHasUndefinedValue = function(test) {
  47. var expectedTokens = [
  48. new Token("bang", undefined, stringSourceRange("!", 0, 1)),
  49. endToken("!")
  50. ];
  51. var rules = [
  52. {
  53. name: "bang",
  54. regex: /!/
  55. }
  56. ];
  57. var tokeniser = new RegexTokeniser(rules);
  58. test.deepEqual(expectedTokens, tokeniser.tokenise("!"));
  59. test.done();
  60. };
  61. function endToken(input) {
  62. var source = stringSourceRange(input, input.length, input.length);
  63. return new Token("end", null, source);
  64. }
  65. function stringIsTokenisedTo(input, expected) {
  66. return function(test) {
  67. test.deepEqual(expected, tokenise(input));
  68. test.done();
  69. };
  70. };
  71. function stringSourceRange(string, startIndex, endIndex) {
  72. return new StringSource(string).range(startIndex, endIndex);
  73. };
  74. function tokenise(input) {
  75. var rules = [
  76. {
  77. name: "identifier",
  78. regex: /([a-z]+)/
  79. },
  80. {
  81. name: "dot",
  82. regex: /(\.)/
  83. },
  84. {
  85. name: "colon1",
  86. regex: /(:)/
  87. },
  88. {
  89. name: "colon2",
  90. regex: /(:)/
  91. },
  92. {
  93. name: "string",
  94. regex: /"([a-z]*)"/
  95. }
  96. ];
  97. var tokeniser = new RegexTokeniser(rules);
  98. return tokeniser.tokenise(input);
  99. };