| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282 | var util      = require('./util');var types     = require('./types');var sets      = require('./sets');var positions = require('./positions');module.exports = function(regexpStr) {  var i = 0, l, c,      start = { type: types.ROOT, stack: []},      // Keep track of last clause/group and stack.      lastGroup = start,      last = start.stack,      groupStack = [];  var repeatErr = function(i) {    util.error(regexpStr, 'Nothing to repeat at column ' + (i - 1));  };  // Decode a few escaped characters.  var str = util.strToChars(regexpStr);  l = str.length;  // Iterate through each character in string.  while (i < l) {    c = str[i++];    switch (c) {      // Handle escaped characters, inclues a few sets.      case '\\':        c = str[i++];        switch (c) {          case 'b':            last.push(positions.wordBoundary());            break;          case 'B':            last.push(positions.nonWordBoundary());            break;          case 'w':            last.push(sets.words());            break;          case 'W':            last.push(sets.notWords());            break;          case 'd':            last.push(sets.ints());            break;          case 'D':            last.push(sets.notInts());            break;          case 's':            last.push(sets.whitespace());            break;          case 'S':            last.push(sets.notWhitespace());            break;          default:            // Check if c is integer.            // In which case it's a reference.            if (/\d/.test(c)) {              last.push({ type: types.REFERENCE, value: parseInt(c, 10) });            // Escaped character.            } else {              last.push({ type: types.CHAR, value: c.charCodeAt(0) });            }        }        break;      // Positionals.      case '^':          last.push(positions.begin());        break;      case '$':          last.push(positions.end());        break;      // Handle custom sets.      case '[':        // Check if this class is 'anti' i.e. [^abc].        var not;        if (str[i] === '^') {          not = true;          i++;        } else {          not = false;        }        // Get all the characters in class.        var classTokens = util.tokenizeClass(str.slice(i), regexpStr);        // Increase index by length of class.        i += classTokens[1];        last.push({          type: types.SET,          set: classTokens[0],          not: not,        });        break;      // Class of any character except \n.      case '.':        last.push(sets.anyChar());        break;      // Push group onto stack.      case '(':        // Create group.        var group = {          type: types.GROUP,          stack: [],          remember: true,        };        c = str[i];        // If if this is a special kind of group.        if (c === '?') {          c = str[i + 1];          i += 2;          // Match if followed by.          if (c === '=') {            group.followedBy = true;          // Match if not followed by.          } else if (c === '!') {            group.notFollowedBy = true;          } else if (c !== ':') {            util.error(regexpStr,              'Invalid group, character \'' + c +              '\' after \'?\' at column ' + (i - 1));          }          group.remember = false;        }        // Insert subgroup into current group stack.        last.push(group);        // Remember the current group for when the group closes.        groupStack.push(lastGroup);        // Make this new group the current group.        lastGroup = group;        last = group.stack;        break;      // Pop group out of stack.      case ')':        if (groupStack.length === 0) {          util.error(regexpStr, 'Unmatched ) at column ' + (i - 1));        }        lastGroup = groupStack.pop();        // Check if this group has a PIPE.        // To get back the correct last stack.        last = lastGroup.options ?          lastGroup.options[lastGroup.options.length - 1] : lastGroup.stack;        break;      // Use pipe character to give more choices.      case '|':        // Create array where options are if this is the first PIPE        // in this clause.        if (!lastGroup.options) {          lastGroup.options = [lastGroup.stack];          delete lastGroup.stack;        }        // Create a new stack and add to options for rest of clause.        var stack = [];        lastGroup.options.push(stack);        last = stack;        break;      // Repetition.      // For every repetition, remove last element from last stack      // then insert back a RANGE object.      // This design is chosen because there could be more than      // one repetition symbols in a regex i.e. `a?+{2,3}`.      case '{':        var rs = /^(\d+)(,(\d+)?)?\}/.exec(str.slice(i)), min, max;        if (rs !== null) {          if (last.length === 0) {            repeatErr(i);          }          min = parseInt(rs[1], 10);          max = rs[2] ? rs[3] ? parseInt(rs[3], 10) : Infinity : min;          i += rs[0].length;          last.push({            type: types.REPETITION,            min: min,            max: max,            value: last.pop(),          });        } else {          last.push({            type: types.CHAR,            value: 123,          });        }        break;      case '?':        if (last.length === 0) {          repeatErr(i);        }        last.push({          type: types.REPETITION,          min: 0,          max: 1,          value: last.pop(),        });        break;      case '+':        if (last.length === 0) {          repeatErr(i);        }        last.push({          type: types.REPETITION,          min: 1,          max: Infinity,          value: last.pop(),        });        break;      case '*':        if (last.length === 0) {          repeatErr(i);        }        last.push({          type: types.REPETITION,          min: 0,          max: Infinity,          value: last.pop(),        });        break;      // Default is a character that is not `\[](){}?+*^$`.      default:        last.push({          type: types.CHAR,          value: c.charCodeAt(0),        });    }  }  // Check if any groups have not been closed.  if (groupStack.length !== 0) {    util.error(regexpStr, 'Unterminated group');  }  return start;};module.exports.types = types;
 |