kmx git

Commit 5333e28106b8438f76b07b855c0c5fe4b3570947

2021-10-19T19:34:41
Added regex coverage (#3138)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index c8de6ee..f8f4941 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -57,3 +57,16 @@ jobs:
           node-version: 14.x
       - run: npm ci
       - run: npm run lint:ci
+
+  coverage:
+
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v2
+      - name: Use Node.js 14.x
+        uses: actions/setup-node@v1
+        with:
+          node-version: 14.x
+      - run: npm ci
+      - run: npm run regex-coverage
diff --git a/package.json b/package.json
index 1961040..e15083f 100755
--- a/package.json
+++ b/package.json
@@ -14,6 +14,7 @@
 		"lint": "eslint . --cache",
 		"lint:fix": "npm run lint -- --fix",
 		"lint:ci": "eslint . --max-warnings 0",
+		"regex-coverage": "mocha tests/coverage.js",
 		"test:aliases": "mocha tests/aliases-test.js",
 		"test:core": "mocha tests/core/**/*.js",
 		"test:dependencies": "mocha tests/dependencies-test.js",
diff --git a/tests/coverage.js b/tests/coverage.js
new file mode 100644
index 0000000..cd71757
--- /dev/null
+++ b/tests/coverage.js
@@ -0,0 +1,260 @@
+'use strict';
+
+const TestDiscovery = require('./helper/test-discovery');
+const TestCase = require('./helper/test-case');
+const PrismLoader = require('./helper/prism-loader');
+const { BFS, BFSPathToPrismTokenPath } = require('./helper/util');
+const { assert } = require('chai');
+const components = require('../components.json');
+const ALL_LANGUAGES = [...Object.keys(components.languages).filter(k => k !== 'meta')];
+
+
+describe('Pattern test coverage', function () {
+	/**
+	 * @type {Map<string, PatternData>}
+	 * @typedef PatternData
+	 * @property {RegExp} pattern
+	 * @property {string} language
+	 * @property {Set<string>} from
+	 * @property {RegExpExecArray[]} matches
+	 */
+	const patterns = new Map();
+
+	/**
+	 * @param {string | string[]} languages
+	 * @returns {import("./helper/prism-loader").Prism}
+	 */
+	function createInstance(languages) {
+		const Prism = PrismLoader.createInstance(languages);
+
+		BFS(Prism.languages, (path, object) => {
+			const { key, value } = path[path.length - 1];
+			const tokenPath = BFSPathToPrismTokenPath(path);
+
+			if (Object.prototype.toString.call(value) == '[object RegExp]') {
+				const regex = makeGlobal(value);
+				object[key] = regex;
+
+				const patternKey = String(regex);
+				let data = patterns.get(patternKey);
+				if (!data) {
+					data = {
+						pattern: regex,
+						language: path[1].key,
+						from: new Set([tokenPath]),
+						matches: []
+					};
+					patterns.set(patternKey, data);
+				} else {
+					data.from.add(tokenPath);
+				}
+
+				regex.exec = string => {
+					let match = RegExp.prototype.exec.call(regex, string);
+					if (match) {
+						data.matches.push(match);
+					}
+					return match;
+				};
+			}
+		});
+
+		return Prism;
+	}
+
+	describe('Register all patterns', function () {
+		it('all', function () {
+			this.slow(10 * 1000);
+			// This will cause ALL regexes of Prism to be registered in the patterns map.
+			// (Languages that don't have any tests can't be caught otherwise.)
+			createInstance(ALL_LANGUAGES);
+		});
+	});
+
+	describe('Run all language tests', function () {
+		// define tests for all tests in all languages in the test suite
+		for (const [languageIdentifier, files] of TestDiscovery.loadAllTests()) {
+			it(languageIdentifier, function () {
+				this.timeout(10 * 1000);
+
+				for (const filePath of files) {
+					try {
+						TestCase.run({
+							languageIdentifier,
+							filePath,
+							updateMode: 'none',
+							createInstance
+						});
+					} catch (error) {
+						// we don't case about whether the test succeeds,
+						// we just want to gather usage data
+					}
+				}
+			});
+		}
+	});
+
+	describe('Coverage', function () {
+		for (const language of ALL_LANGUAGES) {
+			describe(language, function () {
+				it(`- should cover all patterns`, function () {
+					const untested = getAllOf(language).filter(d => d.matches.length === 0);
+					if (untested.length === 0) {
+						return;
+					}
+
+					const problems = untested.map(data => {
+						return formatProblem(data, [
+							'This pattern is completely untested. Add test files that match this pattern.'
+						]);
+					});
+
+					assert.fail([
+						`${problems.length} pattern(s) are untested:\n`
+						+ 'You can learn more about writing tests at https://prismjs.com/test-suite.html#writing-tests',
+						...problems
+					].join('\n\n'));
+				});
+
+				it(`- should exhaustively cover all keywords in keyword lists`, function () {
+					const problems = [];
+
+					for (const data of getAllOf(language)) {
+						if (data.matches.length === 0) {
+							// don't report the same pattern twice
+							continue;
+						}
+
+						const keywords = getKeywordList(data.pattern);
+						if (!keywords) {
+							continue;
+						}
+						const keywordCount = keywords.size;
+
+						data.matches.forEach(([m]) => {
+							if (data.pattern.ignoreCase) {
+								m = m.toUpperCase();
+							}
+							keywords.delete(m);
+						});
+
+						if (keywords.size > 0) {
+							problems.push(formatProblem(data, [
+								`Add test files to test all keywords. The following keywords (${keywords.size}/${keywordCount}) are untested:`,
+								...[...keywords].map(k => `    ${k}`)
+							]));
+						}
+					}
+
+					if (problems.length === 0) {
+						return;
+					}
+
+					assert.fail([
+						`${problems.length} keyword list(s) are not exhaustively tested:\n`
+						+ 'You can learn more about writing tests at https://prismjs.com/test-suite.html#writing-tests',
+						...problems
+					].join('\n\n'));
+				});
+			});
+		}
+	});
+
+	/**
+	 * @param {string} language
+	 * @returns {PatternData[]}
+	 */
+	function getAllOf(language) {
+		return [...patterns.values()].filter(d => d.language === language);
+	}
+
+	/**
+	 * @param {string} string
+	 * @param {number} maxLength
+	 * @returns {string}
+	 */
+	function short(string, maxLength) {
+		if (string.length > maxLength) {
+			return string.slice(0, maxLength - 1) + '…';
+		} else {
+			return string;
+		}
+	}
+
+	/**
+	 * If the given pattern string describes a keyword list, all keyword will be returned. Otherwise, `null` will be
+	 * returned.
+	 *
+	 * @param {RegExp} pattern
+	 * @returns {Set<string> | null}
+	 */
+	function getKeywordList(pattern) {
+		// Right now, only keyword lists of the form /\b(?:foo|bar)\b/ are supported.
+		// In the future, we might want to convert these regexes to NFAs and iterate all words to cover more complex
+		// keyword lists and even operator and punctuation lists.
+
+		let source = pattern.source.replace(/^\\b|\\b$/g, '');
+		if (source.startsWith('(?:') && source.endsWith(')')) {
+			source = source.slice('(?:'.length, source.length - ')'.length);
+		}
+
+		if (/^\w+(?:\|\w+)*$/.test(source)) {
+			if (pattern.ignoreCase) {
+				source = source.toUpperCase();
+			}
+			return new Set(source.split(/\|/g));
+		} else {
+			return null;
+		}
+	}
+
+	/**
+	 * @param {Iterable<string>} occurrences
+	 * @returns {{ origin: string; otherOccurrences: string[] }}
+	 */
+	function splitOccurrences(occurrences) {
+		const all = [...occurrences];
+		return {
+			origin: all[0],
+			otherOccurrences: all.slice(1),
+		};
+	}
+
+	/**
+	 * @param {PatternData} data
+	 * @param {string[]} messageLines
+	 * @returns {string}
+	 */
+	function formatProblem(data, messageLines) {
+		const { origin, otherOccurrences } = splitOccurrences(data.from);
+
+		const lines = [
+			`${origin}:`,
+			short(String(data.pattern), 100),
+			'',
+			...messageLines,
+		];
+
+		if (otherOccurrences.length) {
+			lines.push(
+				'',
+				'Other occurrences of this pattern:',
+				...otherOccurrences.map(o => `- ${o}`)
+			);
+		}
+
+		return lines.join('\n    ');
+	}
+});
+
+/**
+ * @param {RegExp} regex
+ * @returns {RegExp}
+ */
+function makeGlobal(regex) {
+	if (regex.global) {
+		return regex;
+	} else {
+		return RegExp(regex.source, regex.flags + 'g');
+	}
+}
diff --git a/tests/helper/test-case.js b/tests/helper/test-case.js
index 3454d77..d69e2c3 100644
--- a/tests/helper/test-case.js
+++ b/tests/helper/test-case.js
@@ -1,6 +1,7 @@
 'use strict';
 
 const fs = require('fs');
+const path = require('path');
 const { assert } = require('chai');
 const Prettier = require('prettier');
 const PrismLoader = require('./prism-loader');
@@ -12,6 +13,12 @@ const TokenStreamTransformer = require('./token-stream-transformer');
  */
 
 /**
+ * @param {string[]} languages
+ * @returns {Prism}
+ */
+const defaultCreateInstance = (languages) => PrismLoader.createInstance(languages);
+
+/**
  * Handles parsing and printing of a test case file.
  *
  * A test case file consists of at most three parts, separated by a line of at least 10 dashes.
@@ -298,6 +305,29 @@ module.exports = {
 	TestCaseFile,
 
 	/**
+	 * Runs the given test file and asserts the result.
+	 *
+	 * This function will determine what kind of test files the given file is and call the appropriate method to run the
+	 * test.
+	 *
+	 * @param {RunOptions} options
+	 * @returns {void}
+	 *
+	 * @typedef RunOptions
+	 * @property {string} languageIdentifier
+	 * @property {string} filePath
+	 * @property {"none" | "insert" | "update"} updateMode
+	 * @property {(languages: string[]) => Prism} [createInstance]
+	 */
+	run(options) {
+		if (path.extname(options.filePath) === '.test') {
+			this.runTestCase(options.languageIdentifier, options.filePath, options.updateMode, options.createInstance);
+		} else {
+			this.runTestsWithHooks(options.languageIdentifier, require(options.filePath), options.createInstance);
+		}
+	},
+
+	/**
 	 * Runs the given test case file and asserts the result
 	 *
 	 * The passed language identifier can either be a language like "css" or a composed language
@@ -312,13 +342,16 @@ module.exports = {
 	 * @param {string} languageIdentifier
 	 * @param {string} filePath
 	 * @param {"none" | "insert" | "update"} updateMode
+	 * @param {(languages: string[]) => Prism} [createInstance]
 	 */
-	runTestCase(languageIdentifier, filePath, updateMode) {
+	runTestCase(languageIdentifier, filePath, updateMode, createInstance = defaultCreateInstance) {
+		let runner;
 		if (/\.html\.test$/i.test(filePath)) {
-			this.runTestCaseWithRunner(languageIdentifier, filePath, updateMode, new HighlightHTMLRunner());
+			runner = new HighlightHTMLRunner();
 		} else {
-			this.runTestCaseWithRunner(languageIdentifier, filePath, updateMode, new TokenizeJSONRunner());
+			runner = new TokenizeJSONRunner();
 		}
+		this.runTestCaseWithRunner(languageIdentifier, filePath, updateMode, runner, createInstance);
 	},
 
 	/**
@@ -326,13 +359,14 @@ module.exports = {
 	 * @param {string} filePath
 	 * @param {"none" | "insert" | "update"} updateMode
 	 * @param {Runner<T>} runner
+	 * @param {(languages: string[]) => Prism} createInstance
 	 * @template T
 	 */
-	runTestCaseWithRunner(languageIdentifier, filePath, updateMode, runner) {
+	runTestCaseWithRunner(languageIdentifier, filePath, updateMode, runner, createInstance) {
 		const testCase = TestCaseFile.readFromFile(filePath);
 		const usedLanguages = this.parseLanguageNames(languageIdentifier);
 
-		const Prism = PrismLoader.createInstance(usedLanguages.languages);
+		const Prism = createInstance(usedLanguages.languages);
 
 		// the first language is the main language to highlight
 		const actualValue = runner.run(Prism, testCase.code, usedLanguages.mainLanguage);
diff --git a/tests/helper/test-discovery.js b/tests/helper/test-discovery.js
index 4d97873..3e374a3 100644
--- a/tests/helper/test-discovery.js
+++ b/tests/helper/test-discovery.js
@@ -3,41 +3,37 @@
 const fs = require('fs');
 const path = require('path');
 
+const LANGUAGES_DIR = path.join(__dirname, '..', 'languages');
+
 module.exports = {
 
 	/**
 	 * Loads the list of all available tests
 	 *
-	 * @param {string} rootDir
-	 * @returns {Object<string, string[]>}
+	 * @param {string} [rootDir]
+	 * @returns {Map<string, string[]>}
 	 */
 	loadAllTests(rootDir) {
-		/** @type {Object.<string, string[]>} */
-		const testSuite = {};
-
-		for (const language of this.getAllDirectories(rootDir)) {
-			testSuite[language] = this.getAllFiles(path.join(rootDir, language));
-		}
+		rootDir = rootDir || LANGUAGES_DIR;
 
-		return testSuite;
+		return new Map(this.getAllDirectories(rootDir).map(language => {
+			return [language, this.getAllFiles(path.join(rootDir, language))];
+		}));
 	},
 
 	/**
 	 * Loads the list of available tests that match the given languages
 	 *
-	 * @param {string} rootDir
 	 * @param {string|string[]} languages
-	 * @returns {Object<string, string[]>}
+	 * @param {string} [rootDir]
+	 * @returns {Map<string, string[]>}
 	 */
-	loadSomeTests(rootDir, languages) {
-		/** @type {Object.<string, string[]>} */
-		const testSuite = {};
-
-		for (const language of this.getSomeDirectories(rootDir, languages)) {
-			testSuite[language] = this.getAllFiles(path.join(rootDir, language));
-		}
+	loadSomeTests(languages, rootDir) {
+		rootDir = rootDir || LANGUAGES_DIR;
 
-		return testSuite;
+		return new Map(this.getSomeDirectories(rootDir, languages).map(language => {
+			return [language, this.getAllFiles(path.join(rootDir, language))];
+		}));
 	},
 
 
diff --git a/tests/helper/util.js b/tests/helper/util.js
index da3a18d..e4c7287 100644
--- a/tests/helper/util.js
+++ b/tests/helper/util.js
@@ -19,7 +19,7 @@ module.exports = {
 	 * Performs a breadth-first search on the given start element.
 	 *
 	 * @param {any} start
-	 * @param {(path: { key: string, value: any }[]) => void} callback
+	 * @param {(path: { key: string, value: any }[], obj: Record<string, any>) => void} callback
 	 */
 	BFS(start, callback) {
 		const visited = new Set();
@@ -28,8 +28,6 @@ module.exports = {
 			[{ key: null, value: start }]
 		];
 
-		callback(toVisit[0]);
-
 		while (toVisit.length > 0) {
 			/** @type {{ key: string, value: any }[][]} */
 			const newToVisit = [];
@@ -43,7 +41,7 @@ module.exports = {
 						const value = obj[key];
 
 						path.push({ key, value });
-						callback(path);
+						callback(path, obj);
 
 						if (Array.isArray(value) || Object.prototype.toString.call(value) == '[object Object]') {
 							newToVisit.push([...path]);
@@ -59,6 +57,30 @@ module.exports = {
 	},
 
 	/**
+	 * Given the `BFS` path given to `BFS` callbacks, this will return the Prism language token path of the current
+	 * value (e.g. `Prism.languages.xml.tag.pattern`).
+	 *
+	 * @param {readonly{ key: string, value: any }[]} path
+	 * @param {string} [root]
+	 * @returns {string}
+	 */
+	BFSPathToPrismTokenPath(path, root = 'Prism.languages') {
+		let tokenPath = root;
+		for (const { key } of path) {
+			if (!key) {
+				// do nothing
+			} else if (/^\d+$/.test(key)) {
+				tokenPath += `[${key}]`;
+			} else if (/^[a-z]\w*$/i.test(key)) {
+				tokenPath += `.${key}`;
+			} else {
+				tokenPath += `[${JSON.stringify(key)}]`;
+			}
+		}
+		return tokenPath;
+	},
+
+	/**
 	 * Returns the AST of a given pattern.
 	 *
 	 * @param {RegExp} regex
diff --git a/tests/pattern-tests.js b/tests/pattern-tests.js
index 2dc9ee4..91ce21e 100644
--- a/tests/pattern-tests.js
+++ b/tests/pattern-tests.js
@@ -5,7 +5,7 @@ const { assert } = require('chai');
 const PrismLoader = require('./helper/prism-loader');
 const TestDiscovery = require('./helper/test-discovery');
 const TestCase = require('./helper/test-case');
-const { BFS, parseRegex } = require('./helper/util');
+const { BFS, BFSPathToPrismTokenPath, parseRegex } = require('./helper/util');
 const { languages } = require('../components.json');
 const { visitRegExpAST } = require('regexpp');
 const { transform, combineTransformers, getIntersectionWordSets, JS, Words, NFA, Transformers } = require('refa');
@@ -19,8 +19,8 @@ const RAA = require('regexp-ast-analysis');
  * @type {Map<string, string[]>}
  */
 const testSnippets = new Map();
-const testSuite = TestDiscovery.loadAllTests(__dirname + '/languages');
-for (const languageIdentifier in testSuite) {
+const testSuite = TestDiscovery.loadAllTests();
+for (const [languageIdentifier, files] of testSuite) {
 	const lang = TestCase.parseLanguageNames(languageIdentifier).mainLanguage;
 	let snippets = testSnippets.get(lang);
 	if (snippets === undefined) {
@@ -28,7 +28,7 @@ for (const languageIdentifier in testSuite) {
 		testSnippets.set(lang, snippets);
 	}
 
-	for (const file of testSuite[languageIdentifier]) {
+	for (const file of files) {
 		snippets.push(TestCase.TestCaseFile.readFromFile(file).code);
 	}
 }
@@ -91,27 +91,6 @@ function testPatterns(Prism, mainLanguage) {
 	}
 
 	/**
-	 * @param {string} root
-	 * @param {Parameters<Parameters<typeof BFS>[1]>[0]} path
-	 * @returns {string}
-	 */
-	function BFSPathToString(root, path) {
-		let pathStr = root;
-		for (const { key } of path) {
-			if (!key) {
-				// do nothing
-			} else if (/^\d+$/.test(key)) {
-				pathStr += `[${key}]`;
-			} else if (/^[a-z]\w*$/i.test(key)) {
-				pathStr += `.${key}`;
-			} else {
-				pathStr += `[${JSON.stringify(key)}]`;
-			}
-		}
-		return pathStr;
-	}
-
-	/**
 	 * Invokes the given function on every pattern in `Prism.languages`.
 	 *
 	 * _Note:_ This will aggregate all errors thrown by the given callback and throw an aggregated error at the end
@@ -146,10 +125,9 @@ function testPatterns(Prism, mainLanguage) {
 
 			BFS(root, path => {
 				const { key, value } = path[path.length - 1];
+				const tokenPath = BFSPathToPrismTokenPath(path, rootStr);
 				visited.add(value);
 
-				const tokenPath = BFSPathToString(rootStr, path);
-
 				if (Object.prototype.toString.call(value) == '[object RegExp]') {
 					try {
 						let ast;
diff --git a/tests/run.js b/tests/run.js
index f030fbc..390d771 100644
--- a/tests/run.js
+++ b/tests/run.js
@@ -8,29 +8,23 @@ const { argv } = require('yargs');
 
 const testSuite =
 	(argv.language)
-		? TestDiscovery.loadSomeTests(__dirname + '/languages', argv.language)
+		? TestDiscovery.loadSomeTests(argv.language)
 		// load complete test suite
-		: TestDiscovery.loadAllTests(__dirname + '/languages');
+		: TestDiscovery.loadAllTests();
 
 const update = !!argv.update;
 
 // define tests for all tests in all languages in the test suite
-for (const language in testSuite) {
-	if (!testSuite.hasOwnProperty(language)) {
-		continue;
-	}
+for (const [languageIdentifier, files] of testSuite) {
+	describe("Testing language '" + languageIdentifier + "'", function () {
+		this.timeout(10000);
 
-	(function (language, testFiles) {
-		describe("Testing language '" + language + "'", function () {
-			this.timeout(10000);
+		for (const filePath of files) {
+			const fileName = path.basename(filePath, path.extname(filePath));
 
-			for (const filePath of testFiles) {
-				const fileName = path.basename(filePath, path.extname(filePath));
-
-				it("– should pass test case '" + fileName + "'", function () {
-					TestCase.runTestCase(language, filePath, update ? 'update' : 'insert');
-				});
-			}
-		});
-	}(language, testSuite[language]));
+			it("– should pass test case '" + fileName + "'", function () {
+				TestCase.runTestCase(languageIdentifier, filePath, update ? 'update' : 'insert');
+			});
+		}
+	});
 }
kmx.io/prism.js

Commit 5333e28106b8438f76b07b855c0c5fe4b3570947