Commit 4dde2e20e68b8d91a1cfb01020954e473522696c

Michael Schmidt 2021-10-05T21:30:45

Regex: Fixed char-class/char-set confusion (#3124)

diff --git a/components/prism-regex.js b/components/prism-regex.js
index 6a1020d..a565e23 100644
--- a/components/prism-regex.js
+++ b/components/prism-regex.js
@@ -5,11 +5,11 @@
 		alias: 'escape'
 	};
 	var escape = /\\(?:x[\da-fA-F]{2}|u[\da-fA-F]{4}|u\{[\da-fA-F]+\}|0[0-7]{0,2}|[123][0-7]{2}|c[a-zA-Z]|.)/;
-	var charClass = {
+	var charSet = {
 		pattern: /\.|\\[wsd]|\\p\{[^{}]+\}/i,
 		alias: 'class-name'
 	};
-	var charClassWithoutDot = {
+	var charSetWithoutDot = {
 		pattern: /\\[wsd]|\\p\{[^{}]+\}/i,
 		alias: 'class-name'
 	};
@@ -25,16 +25,16 @@
 	};
 
 	Prism.languages.regex = {
-		'charset': {
+		'char-class': {
 			pattern: /((?:^|[^\\])(?:\\\\)*)\[(?:[^\\\]]|\\[\s\S])*\]/,
 			lookbehind: true,
 			inside: {
-				'charset-negation': {
+				'char-class-negation': {
 					pattern: /(^\[)\^/,
 					lookbehind: true,
 					alias: 'operator'
 				},
-				'charset-punctuation': {
+				'char-class-punctuation': {
 					pattern: /^\[|\]$/,
 					alias: 'punctuation'
 				},
@@ -49,12 +49,12 @@
 					}
 				},
 				'special-escape': specialEscape,
-				'charclass': charClassWithoutDot,
+				'char-set': charSetWithoutDot,
 				'escape': escape
 			}
 		},
 		'special-escape': specialEscape,
-		'charclass': charClass,
+		'char-set': charSet,
 		'backreference': [
 			{
 				// a backreference which is not an octal escape
diff --git a/components/prism-regex.min.js b/components/prism-regex.min.js
index 044af82..f14e7f0 100644
--- a/components/prism-regex.min.js
+++ b/components/prism-regex.min.js
@@ -1 +1 @@
-!function(a){var e={pattern:/\\[\\(){}[\]^$+*?|.]/,alias:"escape"},n=/\\(?:x[\da-fA-F]{2}|u[\da-fA-F]{4}|u\{[\da-fA-F]+\}|0[0-7]{0,2}|[123][0-7]{2}|c[a-zA-Z]|.)/,t="(?:[^\\\\-]|"+n.source+")",s=RegExp(t+"-"+t),i={pattern:/(<|')[^<>']+(?=[>']$)/,lookbehind:!0,alias:"variable"};a.languages.regex={charset:{pattern:/((?:^|[^\\])(?:\\\\)*)\[(?:[^\\\]]|\\[\s\S])*\]/,lookbehind:!0,inside:{"charset-negation":{pattern:/(^\[)\^/,lookbehind:!0,alias:"operator"},"charset-punctuation":{pattern:/^\[|\]$/,alias:"punctuation"},range:{pattern:s,inside:{escape:n,"range-punctuation":{pattern:/-/,alias:"operator"}}},"special-escape":e,charclass:{pattern:/\\[wsd]|\\p\{[^{}]+\}/i,alias:"class-name"},escape:n}},"special-escape":e,charclass:{pattern:/\.|\\[wsd]|\\p\{[^{}]+\}/i,alias:"class-name"},backreference:[{pattern:/\\(?![123][0-7]{2})[1-9]/,alias:"keyword"},{pattern:/\\k<[^<>']+>/,alias:"keyword",inside:{"group-name":i}}],anchor:{pattern:/[$^]|\\[ABbGZz]/,alias:"function"},escape:n,group:[{pattern:/\((?:\?(?:<[^<>']+>|'[^<>']+'|[>:]|<?[=!]|[idmnsuxU]+(?:-[idmnsuxU]+)?:?))?/,alias:"punctuation",inside:{"group-name":i}},{pattern:/\)/,alias:"punctuation"}],quantifier:{pattern:/(?:[+*?]|\{\d+(?:,\d*)?\})[?+]?/,alias:"number"},alternation:{pattern:/\|/,alias:"keyword"}}}(Prism);
\ No newline at end of file
+!function(a){var e={pattern:/\\[\\(){}[\]^$+*?|.]/,alias:"escape"},n=/\\(?:x[\da-fA-F]{2}|u[\da-fA-F]{4}|u\{[\da-fA-F]+\}|0[0-7]{0,2}|[123][0-7]{2}|c[a-zA-Z]|.)/,t="(?:[^\\\\-]|"+n.source+")",s=RegExp(t+"-"+t),i={pattern:/(<|')[^<>']+(?=[>']$)/,lookbehind:!0,alias:"variable"};a.languages.regex={"char-class":{pattern:/((?:^|[^\\])(?:\\\\)*)\[(?:[^\\\]]|\\[\s\S])*\]/,lookbehind:!0,inside:{"char-class-negation":{pattern:/(^\[)\^/,lookbehind:!0,alias:"operator"},"char-class-punctuation":{pattern:/^\[|\]$/,alias:"punctuation"},range:{pattern:s,inside:{escape:n,"range-punctuation":{pattern:/-/,alias:"operator"}}},"special-escape":e,"char-set":{pattern:/\\[wsd]|\\p\{[^{}]+\}/i,alias:"class-name"},escape:n}},"special-escape":e,"char-set":{pattern:/\.|\\[wsd]|\\p\{[^{}]+\}/i,alias:"class-name"},backreference:[{pattern:/\\(?![123][0-7]{2})[1-9]/,alias:"keyword"},{pattern:/\\k<[^<>']+>/,alias:"keyword",inside:{"group-name":i}}],anchor:{pattern:/[$^]|\\[ABbGZz]/,alias:"function"},escape:n,group:[{pattern:/\((?:\?(?:<[^<>']+>|'[^<>']+'|[>:]|<?[=!]|[idmnsuxU]+(?:-[idmnsuxU]+)?:?))?/,alias:"punctuation",inside:{"group-name":i}},{pattern:/\)/,alias:"punctuation"}],quantifier:{pattern:/(?:[+*?]|\{\d+(?:,\d*)?\})[?+]?/,alias:"number"},alternation:{pattern:/\|/,alias:"keyword"}}}(Prism);
\ No newline at end of file
diff --git a/tests/languages/javascript!+regex/regex_inclusion.test b/tests/languages/javascript!+regex/regex_inclusion.test
index 3720fe7..bb787a2 100644
--- a/tests/languages/javascript!+regex/regex_inclusion.test
+++ b/tests/languages/javascript!+regex/regex_inclusion.test
@@ -9,17 +9,17 @@
 			"a",
 			["quantifier", "+"],
 			["group", ["(?:"]],
-			["charset", [
-				["charset-punctuation", "["],
+			["char-class", [
+				["char-class-punctuation", "["],
 				["range", [
 					"a",
 					["range-punctuation", "-"],
 					"z"
 				]],
-				["charset-punctuation", "]"]
+				["char-class-punctuation", "]"]
 			]],
 			["alternation", "|"],
-			["charclass", "\\d"],
+			["char-set", "\\d"],
 			["group", ")"],
 			["quantifier", "?"]
 		]],
diff --git a/tests/languages/regex/char-class_feature.test b/tests/languages/regex/char-class_feature.test
new file mode 100644
index 0000000..1c278a5
--- /dev/null
+++ b/tests/languages/regex/char-class_feature.test
@@ -0,0 +1,48 @@
+[]
+[^]
+[foo]
+[\]\b]
+[.^$\1]
+[\d\D\p{L}]
+
+----------------------------------------------------
+
+[
+	["char-class", [
+		["char-class-punctuation", "["],
+		["char-class-punctuation", "]"]
+	]],
+	["char-class", [
+		["char-class-punctuation", "["],
+		["char-class-negation", "^"],
+		["char-class-punctuation", "]"]
+	]],
+	["char-class", [
+		["char-class-punctuation", "["],
+		"foo",
+		["char-class-punctuation", "]"]
+	]],
+	["char-class", [
+		["char-class-punctuation", "["],
+		["special-escape", "\\]"],
+		["escape", "\\b"],
+		["char-class-punctuation", "]"]
+	]],
+	["char-class", [
+		["char-class-punctuation", "["],
+		".^$",
+		["escape", "\\1"],
+		["char-class-punctuation", "]"]
+	]],
+	["char-class", [
+		["char-class-punctuation", "["],
+		["char-set", "\\d"],
+		["char-set", "\\D"],
+		["char-set", "\\p{L}"],
+		["char-class-punctuation", "]"]
+	]]
+]
+
+----------------------------------------------------
+
+Checks for character sets.
diff --git a/tests/languages/regex/char-set_feature.test b/tests/languages/regex/char-set_feature.test
new file mode 100644
index 0000000..6ea56dd
--- /dev/null
+++ b/tests/languages/regex/char-set_feature.test
@@ -0,0 +1,21 @@
+.
+\w \W
+\s \S
+\d \D
+\p{ASCII}
+\P{ASCII}
+
+----------------------------------------------------
+
+[
+	["char-set", "."],
+	["char-set", "\\w"], ["char-set", "\\W"],
+	["char-set", "\\s"], ["char-set", "\\S"],
+	["char-set", "\\d"], ["char-set", "\\D"],
+	["char-set", "\\p{ASCII}"],
+	["char-set", "\\P{ASCII}"]
+]
+
+----------------------------------------------------
+
+Checks for character classes.
diff --git a/tests/languages/regex/charclass_feature.test b/tests/languages/regex/charclass_feature.test
deleted file mode 100644
index 4a045fa..0000000
--- a/tests/languages/regex/charclass_feature.test
+++ /dev/null
@@ -1,25 +0,0 @@
-.
-\w \W
-\s \S
-\d \D
-\p{ASCII}
-\P{ASCII}
-
-----------------------------------------------------
-
-[
-	["charclass", "."],
-	["charclass", "\\w"],
-	["charclass", "\\W"],
-	["charclass", "\\s"],
-	["charclass", "\\S"],
-	["charclass", "\\d"],
-	["charclass", "\\D"],
-
-	["charclass", "\\p{ASCII}"],
-	["charclass", "\\P{ASCII}"]
-]
-
-----------------------------------------------------
-
-Checks for character classes.
diff --git a/tests/languages/regex/charset_feature.test b/tests/languages/regex/charset_feature.test
deleted file mode 100644
index 317216e..0000000
--- a/tests/languages/regex/charset_feature.test
+++ /dev/null
@@ -1,44 +0,0 @@
-[]
-[^]
-[foo]
-[\]\b]
-[.^$\1]
-
-----------------------------------------------------
-
-[
-	["charset", [
-		["charset-punctuation", "["],
-		["charset-punctuation", "]"]
-	]],
-
-	["charset", [
-		["charset-punctuation", "["],
-		["charset-negation", "^"],
-		["charset-punctuation", "]"]
-	]],
-
-	["charset", [
-		["charset-punctuation", "["],
-		"foo",
-		["charset-punctuation", "]"]
-	]],
-
-	["charset", [
-		["charset-punctuation", "["],
-		["special-escape", "\\]"],
-		["escape", "\\b"],
-		["charset-punctuation", "]"]
-	]],
-
-	["charset", [
-		["charset-punctuation", "["],
-		".^$",
-		["escape", "\\1"],
-		["charset-punctuation", "]"]
-	]]
-]
-
-----------------------------------------------------
-
-Checks for character sets.
diff --git a/tests/languages/regex/range_feature.test b/tests/languages/regex/range_feature.test
index 6de46dd..fc1fb68 100644
--- a/tests/languages/regex/range_feature.test
+++ b/tests/languages/regex/range_feature.test
@@ -5,8 +5,8 @@
 ----------------------------------------------------
 
 [
-	["charset", [
-		["charset-punctuation", "["],
+	["char-class", [
+		["char-class-punctuation", "["],
 		["range", [
 			"a",
 			["range-punctuation", "-"],
@@ -22,11 +22,10 @@
 			["range-punctuation", "-"],
 			"9"
 		]],
-		["charset-punctuation", "]"]
+		["char-class-punctuation", "]"]
 	]],
-
-	["charset", [
-		["charset-punctuation", "["],
+	["char-class", [
+		["char-class-punctuation", "["],
 		["range", [
 			["escape", "\\xa1"],
 			["range-punctuation", "-"],
@@ -37,14 +36,13 @@
 			["range-punctuation", "-"],
 			["escape", "\\u{256}"]
 		]],
-		["charset-punctuation", "]"]
+		["char-class-punctuation", "]"]
 	]],
-
-	["charset", [
-		["charset-punctuation", "["],
-		["charset-negation", "^"],
+	["char-class", [
+		["char-class-punctuation", "["],
+		["char-class-negation", "^"],
 		"-aaa-",
-		["charset-punctuation", "]"]
+		["char-class-punctuation", "]"]
 	]]
 ]