string-parse.js 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. // adapted from https://github.com/jridgewell/string-dedent
  2. var getBuiltIn = require('../internals/get-built-in');
  3. var uncurryThis = require('../internals/function-uncurry-this');
  4. var fromCharCode = String.fromCharCode;
  5. var fromCodePoint = getBuiltIn('String', 'fromCodePoint');
  6. var charAt = uncurryThis(''.charAt);
  7. var charCodeAt = uncurryThis(''.charCodeAt);
  8. var stringIndexOf = uncurryThis(''.indexOf);
  9. var stringSlice = uncurryThis(''.slice);
  10. var ZERO_CODE = 48;
  11. var NINE_CODE = 57;
  12. var LOWER_A_CODE = 97;
  13. var LOWER_F_CODE = 102;
  14. var UPPER_A_CODE = 65;
  15. var UPPER_F_CODE = 70;
  16. var isDigit = function (str, index) {
  17. var c = charCodeAt(str, index);
  18. return c >= ZERO_CODE && c <= NINE_CODE;
  19. };
  20. var parseHex = function (str, index, end) {
  21. if (end >= str.length) return -1;
  22. var n = 0;
  23. for (; index < end; index++) {
  24. var c = hexToInt(charCodeAt(str, index));
  25. if (c === -1) return -1;
  26. n = n * 16 + c;
  27. }
  28. return n;
  29. };
  30. var hexToInt = function (c) {
  31. if (c >= ZERO_CODE && c <= NINE_CODE) return c - ZERO_CODE;
  32. if (c >= LOWER_A_CODE && c <= LOWER_F_CODE) return c - LOWER_A_CODE + 10;
  33. if (c >= UPPER_A_CODE && c <= UPPER_F_CODE) return c - UPPER_A_CODE + 10;
  34. return -1;
  35. };
  36. module.exports = function (raw) {
  37. var out = '';
  38. var start = 0;
  39. // We need to find every backslash escape sequence, and cook the escape into a real char.
  40. var i = 0;
  41. var n;
  42. while ((i = stringIndexOf(raw, '\\', i)) > -1) {
  43. out += stringSlice(raw, start, i);
  44. // If the backslash is the last char of the string, then it was an invalid sequence.
  45. // This can't actually happen in a tagged template literal, but could happen if you manually
  46. // invoked the tag with an array.
  47. if (++i === raw.length) return;
  48. var next = charAt(raw, i++);
  49. switch (next) {
  50. // Escaped control codes need to be individually processed.
  51. case 'b':
  52. out += '\b';
  53. break;
  54. case 't':
  55. out += '\t';
  56. break;
  57. case 'n':
  58. out += '\n';
  59. break;
  60. case 'v':
  61. out += '\v';
  62. break;
  63. case 'f':
  64. out += '\f';
  65. break;
  66. case 'r':
  67. out += '\r';
  68. break;
  69. // Escaped line terminators just skip the char.
  70. case '\r':
  71. // Treat `\r\n` as a single terminator.
  72. if (i < raw.length && charAt(raw, i) === '\n') ++i;
  73. // break omitted
  74. case '\n':
  75. case '\u2028':
  76. case '\u2029':
  77. break;
  78. // `\0` is a null control char, but `\0` followed by another digit is an illegal octal escape.
  79. case '0':
  80. if (isDigit(raw, i)) return;
  81. out += '\0';
  82. break;
  83. // Hex escapes must contain 2 hex chars.
  84. case 'x':
  85. n = parseHex(raw, i, i + 2);
  86. if (n === -1) return;
  87. i += 2;
  88. out += fromCharCode(n);
  89. break;
  90. // Unicode escapes contain either 4 chars, or an unlimited number between `{` and `}`.
  91. // The hex value must not overflow 0x10FFFF.
  92. case 'u':
  93. if (i < raw.length && charAt(raw, i) === '{') {
  94. var end = stringIndexOf(raw, '}', ++i);
  95. if (end === -1) return;
  96. n = parseHex(raw, i, end);
  97. i = end + 1;
  98. } else {
  99. n = parseHex(raw, i, i + 4);
  100. i += 4;
  101. }
  102. if (n === -1 || n > 0x10FFFF) return;
  103. out += fromCodePoint(n);
  104. break;
  105. default:
  106. if (isDigit(next, 0)) return;
  107. out += next;
  108. }
  109. start = i;
  110. }
  111. return out + stringSlice(raw, start);
  112. };