12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970 |
- var Token = require("./Token");
- var StringSource = require("./StringSource");
- exports.RegexTokeniser = RegexTokeniser;
- function RegexTokeniser(rules) {
- rules = rules.map(function(rule) {
- return {
- name: rule.name,
- regex: new RegExp(rule.regex.source, "g")
- };
- });
-
- function tokenise(input, description) {
- var source = new StringSource(input, description);
- var index = 0;
- var tokens = [];
-
- while (index < input.length) {
- var result = readNextToken(input, index, source);
- index = result.endIndex;
- tokens.push(result.token);
- }
-
- tokens.push(endToken(input, source));
- return tokens;
- }
- function readNextToken(string, startIndex, source) {
- for (var i = 0; i < rules.length; i++) {
- var regex = rules[i].regex;
- regex.lastIndex = startIndex;
- var result = regex.exec(string);
-
- if (result) {
- var endIndex = startIndex + result[0].length;
- if (result.index === startIndex && endIndex > startIndex) {
- var value = result[1];
- var token = new Token(
- rules[i].name,
- value,
- source.range(startIndex, endIndex)
- );
- return {token: token, endIndex: endIndex};
- }
- }
- }
- var endIndex = startIndex + 1;
- var token = new Token(
- "unrecognisedCharacter",
- string.substring(startIndex, endIndex),
- source.range(startIndex, endIndex)
- );
- return {token: token, endIndex: endIndex};
- }
-
- function endToken(input, source) {
- return new Token(
- "end",
- null,
- source.range(input.length, input.length)
- );
- }
-
- return {
- tokenise: tokenise
- }
- }
|