Commit 8a72830ab4b08d8d21ea4fc75fa27ba1ea872da2

Michael Schmidt 2020-05-07T22:44:16

Regex: Added aliases and minor improvements (#2325) This adds a lot of aliases to the regex tokens, so themes can apply their styles. It also makes a few improvements. See the PR for more details.

diff --git a/components/prism-regex.js b/components/prism-regex.js
index ec94277..7b4d144 100644
--- a/components/prism-regex.js
+++ b/components/prism-regex.js
@@ -4,8 +4,15 @@
 		pattern: /\\[\\(){}[\]^$+*?|.]/,
 		alias: 'escape'
 	};
-	var escape = /\\(?:x[\da-fA-F]{2}|u[\da-fA-F]{4}|u\{[\da-fA-F]+\}|c[a-zA-Z]|0[0-7]{0,2}|[123][0-7]{2}|.)/
-	var charClass = /\\[wsd]|\.|\\p{[^{}]+}/i
+	var escape = /\\(?:x[\da-fA-F]{2}|u[\da-fA-F]{4}|u\{[\da-fA-F]+\}|c[a-zA-Z]|0[0-7]{0,2}|[123][0-7]{2}|.)/;
+	var charClass = {
+		pattern: /\.|\\[wsd]|\\p{[^{}]+}/i,
+		alias: 'class-name'
+	};
+	var charClassWithoutDot = {
+		pattern: /\\[wsd]|\\p{[^{}]+}/i,
+		alias: 'class-name'
+	};
 
 	var rangeChar = '(?:[^\\\\-]|' + escape.source + ')';
 	var range = RegExp(rangeChar + '-' + rangeChar);
@@ -17,16 +24,6 @@
 		alias: 'variable'
 	};
 
-	var backreference = [
-		/\\(?![123][0-7]{2})[1-9]/, // a backreference which is not an octal escape
-		{
-			pattern: /\\k<[^<>']+>/,
-			inside: {
-				'group-name': groupName
-			}
-		}
-	];
-
 	Prism.languages.regex = {
 		'charset': {
 			pattern: /((?:^|[^\\])(?:\\\\)*)\[(?:[^\\\]]|\\[\s\S])*\]/,
@@ -35,25 +32,47 @@
 				'charset-negation': {
 					pattern: /(^\[)\^/,
 					lookbehind: true,
+					alias: 'operator'
+				},
+				'charset-punctuation': {
+					pattern: /^\[|\]$/,
+					alias: 'punctuation'
 				},
-				'charset-punctuation': /^\[|\]$/,
 				'range': {
 					pattern: range,
 					inside: {
 						'escape': escape,
-						'range-punctuation': /-/
+						'range-punctuation': {
+							pattern: /-/,
+							alias: 'operator'
+						}
 					}
 				},
 				'special-escape': specialEscape,
-				'charclass': charClass,
-				'backreference': backreference,
+				'charclass': charClassWithoutDot,
 				'escape': escape
 			}
 		},
 		'special-escape': specialEscape,
 		'charclass': charClass,
-		'backreference': backreference,
-		'anchor': /[$^]|\\[ABbGZz]/,
+		'backreference': [
+			{
+				// a backreference which is not an octal escape
+				pattern: /\\(?![123][0-7]{2})[1-9]/,
+				alias: 'keyword'
+			},
+			{
+				pattern: /\\k<[^<>']+>/,
+				alias: 'keyword',
+				inside: {
+					'group-name': groupName
+				}
+			}
+		],
+		'anchor': {
+			pattern: /[$^]|\\[ABbGZz]/,
+			alias: 'function'
+		},
 		'escape': escape,
 		'group': [
 			{
@@ -62,14 +81,24 @@
 
 				// (), (?<name>), (?'name'), (?>), (?:), (?=), (?!), (?<=), (?<!), (?is-m), (?i-m:)
 				pattern: /\((?:\?(?:<[^<>']+>|'[^<>']+'|[>:]|<?[=!]|[idmnsuxU]+(?:-[idmnsuxU]+)?:?))?/,
+				alias: 'punctuation',
 				inside: {
 					'group-name': groupName
 				}
 			},
-			/\)/
+			{
+				pattern: /\)/,
+				alias: 'punctuation'
+			}
 		],
-		'quantifier': /[+*?]|\{(?:\d+,?\d*)\}/,
-		'alternation': /\|/
+		'quantifier': {
+			pattern: /(?:[+*?]|\{(?:\d+,?\d*)\})[?+]?/,
+			alias: 'number'
+		},
+		'alternation': {
+			pattern: /\|/,
+			alias: 'keyword'
+		}
 	};
 
 
@@ -84,12 +113,13 @@
 		var grammar = Prism.languages[lang];
 		if (grammar) {
 			grammar['regex'].inside = {
-				'regex-flags': /[a-z]+$/,
-				'regex-delimiter': /^\/|\/$/,
 				'language-regex': {
-					pattern: /[\s\S]+/,
+					pattern: /^(\/)[\s\S]+(?=\/[a-z]*$)/i,
+					lookbehind: true,
 					inside: Prism.languages.regex
-				}
+				},
+				'regex-flags': /[a-z]+$/i,
+				'regex-delimiter': /^\/|\/$/,
 			};
 		}
 	});
diff --git a/components/prism-regex.min.js b/components/prism-regex.min.js
index cfb8877..e2a76df 100644
--- a/components/prism-regex.min.js
+++ b/components/prism-regex.min.js
@@ -1 +1 @@
-!function(n){var e={pattern:/\\[\\(){}[\]^$+*?|.]/,alias:"escape"},a=/\\(?:x[\da-fA-F]{2}|u[\da-fA-F]{4}|u\{[\da-fA-F]+\}|c[a-zA-Z]|0[0-7]{0,2}|[123][0-7]{2}|.)/,r=/\\[wsd]|\.|\\p{[^{}]+}/i,i="(?:[^\\\\-]|"+a.source+")",s=RegExp(i+"-"+i),t={pattern:/(<|')[^<>']+(?=[>']$)/,lookbehind:!0,alias:"variable"},c=[/\\(?![123][0-7]{2})[1-9]/,{pattern:/\\k<[^<>']+>/,inside:{"group-name":t}}];n.languages.regex={charset:{pattern:/((?:^|[^\\])(?:\\\\)*)\[(?:[^\\\]]|\\[\s\S])*\]/,lookbehind:!0,inside:{"charset-negation":{pattern:/(^\[)\^/,lookbehind:!0},"charset-punctuation":/^\[|\]$/,range:{pattern:s,inside:{escape:a,"range-punctuation":/-/}},"special-escape":e,charclass:r,backreference:c,escape:a}},"special-escape":e,charclass:r,backreference:c,anchor:/[$^]|\\[ABbGZz]/,escape:a,group:[{pattern:/\((?:\?(?:<[^<>']+>|'[^<>']+'|[>:]|<?[=!]|[idmnsuxU]+(?:-[idmnsuxU]+)?:?))?/,inside:{"group-name":t}},/\)/],quantifier:/[+*?]|\{(?:\d+,?\d*)\}/,alternation:/\|/},["actionscript","coffescript","flow","javascript","typescript","vala"].forEach(function(e){var a=n.languages[e];a&&(a.regex.inside={"regex-flags":/[a-z]+$/,"regex-delimiter":/^\/|\/$/,"language-regex":{pattern:/[\s\S]+/,inside:n.languages.regex}})})}(Prism);
\ No newline at end of file
+!function(n){var a={pattern:/\\[\\(){}[\]^$+*?|.]/,alias:"escape"},e=/\\(?:x[\da-fA-F]{2}|u[\da-fA-F]{4}|u\{[\da-fA-F]+\}|c[a-zA-Z]|0[0-7]{0,2}|[123][0-7]{2}|.)/,t="(?:[^\\\\-]|"+e.source+")",i=RegExp(t+"-"+t),r={pattern:/(<|')[^<>']+(?=[>']$)/,lookbehind:!0,alias:"variable"};n.languages.regex={charset:{pattern:/((?:^|[^\\])(?:\\\\)*)\[(?:[^\\\]]|\\[\s\S])*\]/,lookbehind:!0,inside:{"charset-negation":{pattern:/(^\[)\^/,lookbehind:!0,alias:"operator"},"charset-punctuation":{pattern:/^\[|\]$/,alias:"punctuation"},range:{pattern:i,inside:{escape:e,"range-punctuation":{pattern:/-/,alias:"operator"}}},"special-escape":a,charclass:{pattern:/\\[wsd]|\\p{[^{}]+}/i,alias:"class-name"},escape:e}},"special-escape":a,charclass:{pattern:/\.|\\[wsd]|\\p{[^{}]+}/i,alias:"class-name"},backreference:[{pattern:/\\(?![123][0-7]{2})[1-9]/,alias:"keyword"},{pattern:/\\k<[^<>']+>/,alias:"keyword",inside:{"group-name":r}}],anchor:{pattern:/[$^]|\\[ABbGZz]/,alias:"function"},escape:e,group:[{pattern:/\((?:\?(?:<[^<>']+>|'[^<>']+'|[>:]|<?[=!]|[idmnsuxU]+(?:-[idmnsuxU]+)?:?))?/,alias:"punctuation",inside:{"group-name":r}},{pattern:/\)/,alias:"punctuation"}],quantifier:{pattern:/(?:[+*?]|\{(?:\d+,?\d*)\})[?+]?/,alias:"number"},alternation:{pattern:/\|/,alias:"keyword"}},["actionscript","coffescript","flow","javascript","typescript","vala"].forEach(function(a){var e=n.languages[a];e&&(e.regex.inside={"language-regex":{pattern:/^(\/)[\s\S]+(?=\/[a-z]*$)/i,lookbehind:!0,inside:n.languages.regex},"regex-flags":/[a-z]+$/i,"regex-delimiter":/^\/|\/$/})})}(Prism);
\ No newline at end of file
diff --git a/examples/prism-regex.html b/examples/prism-regex.html
new file mode 100644
index 0000000..41a59c1
--- /dev/null
+++ b/examples/prism-regex.html
@@ -0,0 +1,46 @@
+<p>The regex languages con be used for inline regex snippets like <code>(?&lt;number>\d+)[-_ ]\k&lt;number></code> but it mainly adds itself to other languages such as:</p>
+
+<h2>JavaScript</h2>
+<pre class="language-javascript" data-dependencies="regex"><code>Prism.languages.markup = {
+	'comment': /&lt;!--[\s\S]*?-->/,
+	'prolog': /&lt;\?[\s\S]+?\?>/,
+	'doctype': {
+		pattern: /&lt;!DOCTYPE(?:[^>"'[\]]|"[^"]*"|'[^']*')+(?:\[(?:[^&lt;"'\]]|"[^"]*"|'[^']*'|&lt;(?!!--)|&lt;!--(?:[^-]|-(?!->))*-->)*\]\s*)?>/i,
+		greedy: true
+	},
+	'cdata': /&lt;!\[CDATA\[[\s\S]*?]]>/i,
+	'tag': {
+		pattern: /&lt;\/?(?!\d)[^\s>\/=$&lt;%]+(?:\s(?:\s*[^\s>\/=]+(?:\s*=\s*(?:"[^"]*"|'[^']*'|[^\s'">=]+(?=[\s>]))|(?=[\s/>])))+)?\s*\/?>/i,
+		greedy: true,
+		inside: {
+			'tag': {
+				pattern: /^&lt;\/?[^\s>\/]+/i,
+				inside: {
+					'punctuation': /^&lt;\/?/,
+					'namespace': /^[^\s>\/:]+:/
+				}
+			},
+			'attr-value': {
+				pattern: /=\s*(?:"[^"]*"|'[^']*'|[^\s'">=]+)/i,
+				inside: {
+					'punctuation': [
+						/^=/,
+						{
+							pattern: /^(\s*)["']|["']$/,
+							lookbehind: true
+						}
+					]
+				}
+			},
+			'punctuation': /\/?>/,
+			'attr-name': {
+				pattern: /[^\s>\/]+/,
+				inside: {
+					'namespace': /^[^\s>\/:]+:/
+				}
+			}
+
+		}
+	},
+	'entity': /&amp;#?[\da-z]{1,8};/i
+};</code></pre>
diff --git a/tests/examples-test.js b/tests/examples-test.js
index b45c684..0a4f3e4 100644
--- a/tests/examples-test.js
+++ b/tests/examples-test.js
@@ -13,9 +13,7 @@ describe('Examples', function () {
 		'markup-templating',
 		't4-templating',
 		// this does alter some languages but it's mainly a library
-		'javadoclike',
-		// Regex doesn't have any classes supported by our themes and mainly extends other languages
-		'regex'
+		'javadoclike'
 	]);
 	const validFiles = new Set();
 
diff --git a/tests/languages/regex/charset_feature.test b/tests/languages/regex/charset_feature.test
index 0a970da..317216e 100644
--- a/tests/languages/regex/charset_feature.test
+++ b/tests/languages/regex/charset_feature.test
@@ -2,6 +2,7 @@
 [^]
 [foo]
 [\]\b]
+[.^$\1]
 
 ----------------------------------------------------
 
@@ -28,6 +29,13 @@
 		["special-escape", "\\]"],
 		["escape", "\\b"],
 		["charset-punctuation", "]"]
+	]],
+
+	["charset", [
+		["charset-punctuation", "["],
+		".^$",
+		["escape", "\\1"],
+		["charset-punctuation", "]"]
 	]]
 ]
 
diff --git a/tests/languages/regex/quantifier_feature.test b/tests/languages/regex/quantifier_feature.test
index 87246d6..bd4e9bf 100644
--- a/tests/languages/regex/quantifier_feature.test
+++ b/tests/languages/regex/quantifier_feature.test
@@ -1,6 +1,12 @@
 * + ?
 {2} {2,} {0,1}
 
+*? +? ??
+{2}? {2,}? {0,1}?
+
+*+ ++ ?+
+{2}+ {2,}+ {0,1}+
+
 ----------------------------------------------------
 
 [
@@ -9,7 +15,21 @@
 	["quantifier", "?"],
 	["quantifier", "{2}"],
 	["quantifier", "{2,}"],
-	["quantifier", "{0,1}"]
+	["quantifier", "{0,1}"],
+
+	["quantifier", "*?"],
+	["quantifier", "+?"],
+	["quantifier", "??"],
+	["quantifier", "{2}?"],
+	["quantifier", "{2,}?"],
+	["quantifier", "{0,1}?"],
+
+	["quantifier", "*+"],
+	["quantifier", "++"],
+	["quantifier", "?+"],
+	["quantifier", "{2}+"],
+	["quantifier", "{2,}+"],
+	["quantifier", "{0,1}+"]
 ]
 
 ----------------------------------------------------