| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346 | 'use strict';const generate = require('regjsgen').generate;const parse = require('regjsparser').parse;const regenerate = require('regenerate');const unicodeMatchProperty = require('unicode-match-property-ecmascript');const unicodeMatchPropertyValue = require('unicode-match-property-value-ecmascript');const iuMappings = require('./data/iu-mappings.js');const ESCAPE_SETS = require('./data/character-class-escape-sets.js');// Prepare a Regenerate set containing all code points, used for negative// character classes (if any).const UNICODE_SET = regenerate().addRange(0x0, 0x10FFFF);// Without the `u` flag, the range stops at 0xFFFF.// https://mths.be/es6#sec-pattern-semanticsconst BMP_SET = regenerate().addRange(0x0, 0xFFFF);// Prepare a Regenerate set containing all code points that are supposed to be// matched by `/./u`. https://mths.be/es6#sec-atomconst DOT_SET_UNICODE = UNICODE_SET.clone() // all Unicode code points	.remove(		// minus `LineTerminator`s (https://mths.be/es6#sec-line-terminators):		0x000A, // Line Feed <LF>		0x000D, // Carriage Return <CR>		0x2028, // Line Separator <LS>		0x2029  // Paragraph Separator <PS>	);const getCharacterClassEscapeSet = (character, unicode, ignoreCase) => {	if (unicode) {		if (ignoreCase) {			return ESCAPE_SETS.UNICODE_IGNORE_CASE.get(character);		}		return ESCAPE_SETS.UNICODE.get(character);	}	return ESCAPE_SETS.REGULAR.get(character);};const getUnicodeDotSet = (dotAll) => {	return dotAll ? UNICODE_SET : DOT_SET_UNICODE;};const getUnicodePropertyValueSet = (property, value) => {	const path = value ?		`${ property }/${ value }` :		`Binary_Property/${ property }`;	try {		return require(`regenerate-unicode-properties/${ path }.js`);	} catch (exception) {		throw new Error(			`Failed to recognize value \`${ value }\` for property ` +			`\`${ property }\`.`		);	}};const handleLoneUnicodePropertyNameOrValue = (value) => {	// It could be a `General_Category` value or a binary property.	// Note: `unicodeMatchPropertyValue` throws on invalid values.	try {		const property = 'General_Category';		const category = unicodeMatchPropertyValue(property, value);		return getUnicodePropertyValueSet(property, category);	} catch (exception) {}	// It’s not a `General_Category` value, so check if it’s a binary	// property. Note: `unicodeMatchProperty` throws on invalid properties.	const property = unicodeMatchProperty(value);	return getUnicodePropertyValueSet(property);};const getUnicodePropertyEscapeSet = (value, isNegative) => {	const parts = value.split('=');	const firstPart = parts[0];	let set;	if (parts.length == 1) {		set = handleLoneUnicodePropertyNameOrValue(firstPart);	} else {		// The pattern consists of two parts, i.e. `Property=Value`.		const property = unicodeMatchProperty(firstPart);		const value = unicodeMatchPropertyValue(property, parts[1]);		set = getUnicodePropertyValueSet(property, value);	}	if (isNegative) {		return UNICODE_SET.clone().remove(set);	}	return set.clone();};// Given a range of code points, add any case-folded code points in that range// to a set.regenerate.prototype.iuAddRange = function(min, max) {	const $this = this;	do {		const folded = caseFold(min);		if (folded) {			$this.add(folded);		}	} while (++min <= max);	return $this;};const update = (item, pattern) => {	let tree = parse(pattern, config.useUnicodeFlag ? 'u' : '');	switch (tree.type) {		case 'characterClass':		case 'group':		case 'value':			// No wrapping needed.			break;		default:			// Wrap the pattern in a non-capturing group.			tree = wrap(tree, pattern);	}	Object.assign(item, tree);};const wrap = (tree, pattern) => {	// Wrap the pattern in a non-capturing group.	return {		'type': 'group',		'behavior': 'ignore',		'body': [tree],		'raw': `(?:${ pattern })`	};};const caseFold = (codePoint) => {	return iuMappings.get(codePoint) || false;};const processCharacterClass = (characterClassItem, regenerateOptions) => {	const set = regenerate();	for (const item of characterClassItem.body) {		switch (item.type) {			case 'value':				set.add(item.codePoint);				if (config.ignoreCase && config.unicode && !config.useUnicodeFlag) {					const folded = caseFold(item.codePoint);					if (folded) {						set.add(folded);					}				}				break;			case 'characterClassRange':				const min = item.min.codePoint;				const max = item.max.codePoint;				set.addRange(min, max);				if (config.ignoreCase && config.unicode && !config.useUnicodeFlag) {					set.iuAddRange(min, max);				}				break;			case 'characterClassEscape':				set.add(getCharacterClassEscapeSet(					item.value,					config.unicode,					config.ignoreCase				));				break;			case 'unicodePropertyEscape':				set.add(getUnicodePropertyEscapeSet(item.value, item.negative));				break;			// The `default` clause is only here as a safeguard; it should never be			// reached. Code coverage tools should ignore it.			/* istanbul ignore next */			default:				throw new Error(`Unknown term type: ${ item.type }`);		}	}	if (characterClassItem.negative) {		update(characterClassItem, `(?!${set.toString(regenerateOptions)})[\\s\\S]`)	} else {		update(characterClassItem, set.toString(regenerateOptions));	}	return characterClassItem;};const updateNamedReference = (item, index) => {	delete item.name;	item.matchIndex = index;};const assertNoUnmatchedReferences = (groups) => {	const unmatchedReferencesNames = Object.keys(groups.unmatchedReferences);	if (unmatchedReferencesNames.length > 0) {		throw new Error(`Unknown group names: ${unmatchedReferencesNames}`);	}};const processTerm = (item, regenerateOptions, groups) => {	switch (item.type) {		case 'dot':			if (config.useDotAllFlag) {				break;			} else if (config.unicode) {				update(					item,					getUnicodeDotSet(config.dotAll).toString(regenerateOptions)				);			} else if (config.dotAll) {				// TODO: consider changing this at the regenerate level.				update(item, '[\\s\\S]');			}			break;		case 'characterClass':			item = processCharacterClass(item, regenerateOptions);			break;		case 'unicodePropertyEscape':			if (config.unicodePropertyEscape) {				update(					item,					getUnicodePropertyEscapeSet(item.value, item.negative)						.toString(regenerateOptions)				);			}			break;		case 'characterClassEscape':			update(				item,				getCharacterClassEscapeSet(					item.value,					config.unicode,					config.ignoreCase				).toString(regenerateOptions)			);			break;		case 'group':			if (item.behavior == 'normal') {				groups.lastIndex++;			}			if (item.name && config.namedGroup) {				const name = item.name.value;				if (groups.names[name]) {					throw new Error(						`Multiple groups with the same name (${ name }) are not allowed.`					);				}				const index = groups.lastIndex;				delete item.name;				groups.names[name] = index;				if (groups.onNamedGroup) {					groups.onNamedGroup.call(null, name, index);				}				if (groups.unmatchedReferences[name]) {					groups.unmatchedReferences[name].forEach(reference => {						updateNamedReference(reference, index);					});					delete groups.unmatchedReferences[name];				}			}			/* falls through */		case 'alternative':		case 'disjunction':		case 'quantifier':			item.body = item.body.map(term => {				return processTerm(term, regenerateOptions, groups);			});			break;		case 'value':			const codePoint = item.codePoint;			const set = regenerate(codePoint);			if (config.ignoreCase && config.unicode && !config.useUnicodeFlag) {				const folded = caseFold(codePoint);				if (folded) {					set.add(folded);				}			}			update(item, set.toString(regenerateOptions));			break;		case 'reference':			if (item.name) {				const name = item.name.value;				const index = groups.names[name];				if (index) {					updateNamedReference(item, index);					break;				}				if (!groups.unmatchedReferences[name]) {					groups.unmatchedReferences[name] = [];				}				// Keep track of references used before the corresponding group.				groups.unmatchedReferences[name].push(item);			}			break;		case 'anchor':		case 'empty':		case 'group':			// Nothing to do here.			break;		// The `default` clause is only here as a safeguard; it should never be		// reached. Code coverage tools should ignore it.		/* istanbul ignore next */		default:			throw new Error(`Unknown term type: ${ item.type }`);	}	return item;};const config = {	'ignoreCase': false,	'unicode': false,	'dotAll': false,	'useDotAllFlag': false,	'useUnicodeFlag': false,	'unicodePropertyEscape': false,	'namedGroup': false};const rewritePattern = (pattern, flags, options) => {	config.unicode = flags && flags.includes('u');	const regjsparserFeatures = {		'unicodePropertyEscape': config.unicode,		'namedGroups': true,		'lookbehind': options && options.lookbehind	};	config.ignoreCase = flags && flags.includes('i');	const supportDotAllFlag = options && options.dotAllFlag;	config.dotAll = supportDotAllFlag && flags && flags.includes('s');	config.namedGroup = options && options.namedGroup;	config.useDotAllFlag = options && options.useDotAllFlag;	config.useUnicodeFlag = options && options.useUnicodeFlag;	config.unicodePropertyEscape = options && options.unicodePropertyEscape;	if (supportDotAllFlag && config.useDotAllFlag) {		throw new Error('`useDotAllFlag` and `dotAllFlag` cannot both be true!');	}	const regenerateOptions = {		'hasUnicodeFlag': config.useUnicodeFlag,		'bmpOnly': !config.unicode	};	const groups = {		'onNamedGroup': options && options.onNamedGroup,		'lastIndex': 0,		'names': Object.create(null), // { [name]: index }		'unmatchedReferences': Object.create(null) // { [name]: Array<reference> }	};	const tree = parse(pattern, flags, regjsparserFeatures);	// Note: `processTerm` mutates `tree` and `groups`.	processTerm(tree, regenerateOptions, groups);	assertNoUnmatchedReferences(groups);	return generate(tree);};module.exports = rewritePattern;
 |