Commit 10ca6433b4b0b25d10c97c06fd6e9f121d88beed

Michael Schmidt 2020-05-04T23:52:49

Better error messages for pattern tests (#2364)

diff --git a/tests/pattern-tests.js b/tests/pattern-tests.js
index beb33c7..b35fdb9 100644
--- a/tests/pattern-tests.js
+++ b/tests/pattern-tests.js
@@ -81,7 +81,7 @@ function testPatterns(Prism) {
 		BFS(Prism.languages, path => {
 			const { key, value } = path[path.length - 1];
 
-			let tokenPath = '<languages>';
+			let tokenPath = 'Prism.languages';
 			for (const { key } of path) {
 				if (!key) {
 					// do nothing
@@ -172,11 +172,46 @@ function testPatterns(Prism) {
 		}
 	}
 
+	/**
+	 * Returns whether the given element will always at the start of the whole match.
+	 *
+	 * @param {Element} element
+	 * @returns {boolean}
+	 */
+	function isFirstMatch(element) {
+		const parent = element.parent;
+		switch (parent.type) {
+			case 'Alternative':
+				// all elements before this element have to of zero length
+				if (!parent.elements.slice(0, parent.elements.indexOf(element)).every(isAlwaysZeroWidth)) {
+					return false;
+				}
+				const grandParent = parent.parent;
+				if (grandParent.type === 'Pattern') {
+					return true;
+				} else {
+					return isFirstMatch(grandParent);
+				}
+
+			case 'Quantifier':
+				if (parent.max >= 2) {
+					return false;
+				} else {
+					return isFirstMatch(parent);
+				}
+
+			default:
+				throw new Error(`Internal error: The given node should not be a '${element.type}'.`);
+		}
+	}
+
 
 	it('- should not match the empty string', function () {
 		forEachPattern(({ pattern, tokenPath }) => {
 			// test for empty string
-			assert.notMatch('', pattern, `Token ${tokenPath}: ${pattern} should not match the empty string.`);
+			assert.notMatch('', pattern, `${tokenPath}: ${pattern} should not match the empty string.\n\n`
+				+ `Patterns that do match the empty string can potentially cause infinitely many empty tokens. `
+				+ `Make sure that all patterns always consume at least one character.`);
 		});
 	});
 
@@ -187,54 +222,26 @@ function testPatterns(Prism) {
 				forEachCapturingGroup(ast.pattern, () => { hasCapturingGroup = true; });
 
 				if (!hasCapturingGroup) {
-					assert.fail(`Token ${tokenPath}: The pattern is set to 'lookbehind: true' but does not have a capturing group.`);
+					assert.fail(`${tokenPath}: The pattern is set to 'lookbehind: true' but does not have a capturing group.\n\n`
+						+ `Prism lookbehind groups use the captured text of the first capturing group to simulate a lookbehind. `
+						+ `Without a capturing group, a lookbehind is not possible.\n`
+						+ `To fix this, either add a capturing group for the lookbehind or remove the 'lookbehind' property.`);
 				}
 			}
 		});
 	});
 
 	it('- should not have lookbehind groups that can be preceded by other some characters', function () {
-		/**
-		 * Returns whether the given element will always match the start of the string.
-		 *
-		 * @param {Element} element
-		 * @returns {boolean}
-		 */
-		function isFirstMatch(element) {
-			const parent = element.parent;
-			switch (parent.type) {
-				case 'Alternative':
-					// all elements before this element have to of zero length
-					if (!parent.elements.slice(0, parent.elements.indexOf(element)).every(isAlwaysZeroWidth)) {
-						return false;
-					}
-					const grandParent = parent.parent;
-					if (grandParent.type === 'Pattern') {
-						return true;
-					} else {
-						return isFirstMatch(grandParent);
-					}
-
-				case 'Quantifier':
-					if (parent.max >= 2) {
-						return false;
-					} else {
-						return isFirstMatch(parent);
-					}
-
-				default:
-					throw new Error(`Internal error: The given node should not be a '${element.type}'.`);
-			}
-		}
-
 		forEachPattern(({ ast, tokenPath, lookbehind }) => {
 			if (!lookbehind) {
 				return;
 			}
 			forEachCapturingGroup(ast.pattern, ({ group, number }) => {
 				if (number === 1 && !isFirstMatch(group)) {
-					assert.fail(`Token ${tokenPath}: `
-						+ `The lookbehind group (if matched) always has to be at index 0 relative to the whole match.`);
+					assert.fail(`${tokenPath}: The lookbehind group ${group.raw} might be preceded by some characters.\n\n`
+						+ `Prism assumes that the lookbehind group, if captured, is the first thing matched by the regex. `
+						+ `If characters might precede the lookbehind group (e.g. /a?(b)c/), then Prism cannot correctly apply the lookbehind correctly in all cases.\n`
+						+ `To fix this, either remove the preceding characters or include them in the lookbehind group.`);
 				}
 			});
 		});
@@ -249,9 +256,9 @@ function testPatterns(Prism) {
 				if (number === 1 && isAlwaysZeroWidth(group)) {
 					const groupContent = group.raw.substr(1, group.raw.length - 2);
 					const replacement = group.alternatives.length === 1 ? groupContent : `(?:${groupContent})`;
-					reportError(`Token ${tokenPath}: The lookbehind group ${group.raw} does not consume characters. `
-						+ `Therefor it is not necessary to use a lookbehind group. `
-						+ `Replacing the lookbehind group with: ${replacement}`);
+					reportError(`${tokenPath}: The lookbehind group ${group.raw} does not consume characters.\n\n`
+						+ `Therefor it is not necessary to use a lookbehind group.\n`
+						+ `To fix this, replace the lookbehind group with ${replacement} and remove the 'lookbehind' property.`);
 				}
 			});
 		});
@@ -262,7 +269,22 @@ function testPatterns(Prism) {
 			forEachCapturingGroup(ast.pattern, ({ group, number }) => {
 				const isLookbehindGroup = lookbehind && number === 1;
 				if (group.references.length === 0 && !isLookbehindGroup) {
-					reportError(`Token ${tokenPath}: Unused capturing group ${group.raw}. All capturing groups have to be either referenced or used as a Prism lookbehind group.`);
+					const fixes = [];
+					fixes.push(`Make this group a non-capturing group ('(?:...)' instead of '(...)'). (It's usually this option.)`);
+					fixes.push(`Reference this group with a backreference (use '\\${number}' for this).`);
+					if (number === 1 && !lookbehind) {
+						if (isFirstMatch(group)) {
+							fixes.push(`Add a 'lookbehind: true' declaration.`);
+						} else {
+							fixes.push(`Add a 'lookbehind: true' declaration. (This group is not a valid lookbehind group because it can be preceded by some characters.)`);
+						}
+					}
+
+					reportError(`${tokenPath}: Unused capturing group ${group.raw}.\n\n`
+						+ `Unused capturing groups generally degrade the performance of regular expressions. `
+						+ `They might also be a sign that a backreference is incorrect or that a 'lookbehind: true' declaration in missing.\n`
+						+ `To fix this, do one of the following:\n`
+						+ fixes.map(f => '- ' + f).join('\n'));
 				}
 			});
 		});
@@ -272,7 +294,8 @@ function testPatterns(Prism) {
 		const niceName = /^[a-z][a-z\d]*(?:[-_][a-z\d]+)*$/;
 		function testName(name, desc = 'token name') {
 			if (!niceName.test(name)) {
-				assert.fail(`The ${desc} '${name}' does not match ${niceName}`);
+				assert.fail(`The ${desc} '${name}' does not match ${niceName}.\n\n`
+					+ `To fix this, choose a name that matches the above regular expression.`);
 			}
 		}
 
@@ -305,7 +328,10 @@ function testPatterns(Prism) {
 			visitRegExpAST(ast.pattern, {
 				onCharacterEnter(node) {
 					if (/^\\(?:[1-9]|\d{2,})$/.test(node.raw)) {
-						reportError(`Token ${tokenPath}: Octal escape ${node.raw}.`);
+						reportError(`${tokenPath}: Octal escape ${node.raw}.\n\n`
+							+ `Octal escapes can be confused with backreferences, so please do not use them.\n`
+							+ `To fix this, use a different escape method. `
+							+ `Note that this could also be an invalid backreference, so be sure to carefully analyse the pattern.`);
 					}
 				}
 			});