Commit d04d166df80f8cbe79e723c602f30416fef21d76

Michael Schmidt 2021-12-05T14:54:45

Structured text: Improved tokenization (#3213)

diff --git a/components/prism-iecst.js b/components/prism-iecst.js
index 4456ad3..3e0fb97 100644
--- a/components/prism-iecst.js
+++ b/components/prism-iecst.js
@@ -3,6 +3,7 @@ Prism.languages.iecst = {
 		{
 			pattern: /(^|[^\\])(?:\/\*[\s\S]*?(?:\*\/|$)|\(\*[\s\S]*?(?:\*\)|$)|\{[\s\S]*?(?:\}|$))/,
 			lookbehind: true,
+			greedy: true,
 		},
 		{
 			pattern: /(^|[^\\:])\/\/.*/,
@@ -14,17 +15,18 @@ Prism.languages.iecst = {
 		pattern: /(["'])(?:\\(?:\r\n|[\s\S])|(?!\1)[^\\\r\n])*\1/,
 		greedy: true,
 	},
-	'class-name': /\b(?:END_)?(?:PROGRAM|CONFIGURATION|INTERFACE|FUNCTION_BLOCK|FUNCTION|ACTION|TRANSITION|TYPE|STRUCT|(?:INITIAL_)?STEP|NAMESPACE|LIBRARY|CHANNEL|FOLDER|RESOURCE|VAR_(?:ACCESS|CONFIG|EXTERNAL|GLOBAL|INPUT|IN_OUT|PUTPUT|TEMP)|VAR|METHOD|PROPERTY)\b/i,
-	'keyword': /\b(?:AT|BY|(?:END_)?(?:CASE|FOR|IF|REPEAT|WHILE)|CONSTANT|CONTINUE|DO|ELSE|ELSIF|EXIT|EXTENDS|FROM|GET|GOTO|IMPLEMENTS|JMP|NON_RETAIN|PRIVATE|PROTECTED|PUBLIC|RETAIN|RETURN|SET|TASK|THEN|TO|UNTIL|USING|WITH|__CATCH|__ENDTRY|__FINALLY|__TRY)\b/,
-	'variable': /\b(?:ANY|ARRAY|AT|BOOL|BYTE|U?(?:D|L|S)?INT|(?:D|L)?WORD|DATE(?:_AND_TIME)?|DT|L?REAL|POINTER|STRING|TIME(?:_OF_DAY)?|TOD)\b/,
-	'symbol': /%[IQM][XBWDL][\d.]*|%[IQ][\d.]*/,
+	'keyword': [
+		/\b(?:END_)?(?:PROGRAM|CONFIGURATION|INTERFACE|FUNCTION_BLOCK|FUNCTION|ACTION|TRANSITION|TYPE|STRUCT|(?:INITIAL_)?STEP|NAMESPACE|LIBRARY|CHANNEL|FOLDER|RESOURCE|VAR_(?:ACCESS|CONFIG|EXTERNAL|GLOBAL|INPUT|IN_OUT|OUTPUT|TEMP)|VAR|METHOD|PROPERTY)\b/i,
+		/\b(?:AT|BY|(?:END_)?(?:CASE|FOR|IF|REPEAT|WHILE)|CONSTANT|CONTINUE|DO|ELSE|ELSIF|EXIT|EXTENDS|FROM|GET|GOTO|IMPLEMENTS|JMP|NON_RETAIN|OF|PRIVATE|PROTECTED|PUBLIC|RETAIN|RETURN|SET|TASK|THEN|TO|UNTIL|USING|WITH|__CATCH|__ENDTRY|__FINALLY|__TRY)\b/
+	],
+	'class-name': /\b(?:ANY|ARRAY|BOOL|BYTE|U?(?:D|L|S)?INT|(?:D|L)?WORD|DATE(?:_AND_TIME)?|DT|L?REAL|POINTER|STRING|TIME(?:_OF_DAY)?|TOD)\b/,
+	'address': {
+		pattern: /%[IQM][XBWDL][\d.]*|%[IQ][\d.]*/,
+		alias: 'symbol'
+	},
 	'number': /\b(?:16#[\da-f]+|2#[01_]+|0x[\da-f]+)\b|\b(?:D|DT|T|TOD)#[\d_shmd:]*|\b[A-Z]*#[\d.,_]*|(?:\b\d+(?:\.\d*)?|\B\.\d+)(?:e[+-]?\d+)?/i,
 	'boolean': /\b(?:FALSE|NULL|TRUE)\b/,
-	'function': /\w+(?=\()/,
-	'operator': /(?:S?R?:?=>?|&&?|\*\*?|<=?|>=?|[-:^/+])|\b(?:AND|EQ|GE|GT|LE|LT|MOD|NE|NOT|OR|XOR)\b/,
-	'punctuation': /[();]/,
-	'type': {
-		'pattern': /#/,
-		'alias': 'selector',
-	},
+	'operator': /S?R?:?=>?|&&?|\*\*?|<[=>]?|>=?|[-:^/+#]|\b(?:AND|EQ|EXPT|GE|GT|LE|LT|MOD|NE|NOT|OR|XOR)\b/,
+	'function': /\b[a-z_]\w*(?=\s*\()/i,
+	'punctuation': /[()[\].,;]/,
 };
diff --git a/components/prism-iecst.min.js b/components/prism-iecst.min.js
index 97757be..270815b 100644
--- a/components/prism-iecst.min.js
+++ b/components/prism-iecst.min.js
@@ -1 +1 @@
-Prism.languages.iecst={comment:[{pattern:/(^|[^\\])(?:\/\*[\s\S]*?(?:\*\/|$)|\(\*[\s\S]*?(?:\*\)|$)|\{[\s\S]*?(?:\}|$))/,lookbehind:!0},{pattern:/(^|[^\\:])\/\/.*/,lookbehind:!0,greedy:!0}],string:{pattern:/(["'])(?:\\(?:\r\n|[\s\S])|(?!\1)[^\\\r\n])*\1/,greedy:!0},"class-name":/\b(?:END_)?(?:PROGRAM|CONFIGURATION|INTERFACE|FUNCTION_BLOCK|FUNCTION|ACTION|TRANSITION|TYPE|STRUCT|(?:INITIAL_)?STEP|NAMESPACE|LIBRARY|CHANNEL|FOLDER|RESOURCE|VAR_(?:ACCESS|CONFIG|EXTERNAL|GLOBAL|INPUT|IN_OUT|PUTPUT|TEMP)|VAR|METHOD|PROPERTY)\b/i,keyword:/\b(?:AT|BY|(?:END_)?(?:CASE|FOR|IF|REPEAT|WHILE)|CONSTANT|CONTINUE|DO|ELSE|ELSIF|EXIT|EXTENDS|FROM|GET|GOTO|IMPLEMENTS|JMP|NON_RETAIN|PRIVATE|PROTECTED|PUBLIC|RETAIN|RETURN|SET|TASK|THEN|TO|UNTIL|USING|WITH|__CATCH|__ENDTRY|__FINALLY|__TRY)\b/,variable:/\b(?:ANY|ARRAY|AT|BOOL|BYTE|U?(?:D|L|S)?INT|(?:D|L)?WORD|DATE(?:_AND_TIME)?|DT|L?REAL|POINTER|STRING|TIME(?:_OF_DAY)?|TOD)\b/,symbol:/%[IQM][XBWDL][\d.]*|%[IQ][\d.]*/,number:/\b(?:16#[\da-f]+|2#[01_]+|0x[\da-f]+)\b|\b(?:D|DT|T|TOD)#[\d_shmd:]*|\b[A-Z]*#[\d.,_]*|(?:\b\d+(?:\.\d*)?|\B\.\d+)(?:e[+-]?\d+)?/i,boolean:/\b(?:FALSE|NULL|TRUE)\b/,function:/\w+(?=\()/,operator:/(?:S?R?:?=>?|&&?|\*\*?|<=?|>=?|[-:^/+])|\b(?:AND|EQ|GE|GT|LE|LT|MOD|NE|NOT|OR|XOR)\b/,punctuation:/[();]/,type:{pattern:/#/,alias:"selector"}};
\ No newline at end of file
+Prism.languages.iecst={comment:[{pattern:/(^|[^\\])(?:\/\*[\s\S]*?(?:\*\/|$)|\(\*[\s\S]*?(?:\*\)|$)|\{[\s\S]*?(?:\}|$))/,lookbehind:!0,greedy:!0},{pattern:/(^|[^\\:])\/\/.*/,lookbehind:!0,greedy:!0}],string:{pattern:/(["'])(?:\\(?:\r\n|[\s\S])|(?!\1)[^\\\r\n])*\1/,greedy:!0},keyword:[/\b(?:END_)?(?:PROGRAM|CONFIGURATION|INTERFACE|FUNCTION_BLOCK|FUNCTION|ACTION|TRANSITION|TYPE|STRUCT|(?:INITIAL_)?STEP|NAMESPACE|LIBRARY|CHANNEL|FOLDER|RESOURCE|VAR_(?:ACCESS|CONFIG|EXTERNAL|GLOBAL|INPUT|IN_OUT|OUTPUT|TEMP)|VAR|METHOD|PROPERTY)\b/i,/\b(?:AT|BY|(?:END_)?(?:CASE|FOR|IF|REPEAT|WHILE)|CONSTANT|CONTINUE|DO|ELSE|ELSIF|EXIT|EXTENDS|FROM|GET|GOTO|IMPLEMENTS|JMP|NON_RETAIN|OF|PRIVATE|PROTECTED|PUBLIC|RETAIN|RETURN|SET|TASK|THEN|TO|UNTIL|USING|WITH|__CATCH|__ENDTRY|__FINALLY|__TRY)\b/],"class-name":/\b(?:ANY|ARRAY|BOOL|BYTE|U?(?:D|L|S)?INT|(?:D|L)?WORD|DATE(?:_AND_TIME)?|DT|L?REAL|POINTER|STRING|TIME(?:_OF_DAY)?|TOD)\b/,address:{pattern:/%[IQM][XBWDL][\d.]*|%[IQ][\d.]*/,alias:"symbol"},number:/\b(?:16#[\da-f]+|2#[01_]+|0x[\da-f]+)\b|\b(?:D|DT|T|TOD)#[\d_shmd:]*|\b[A-Z]*#[\d.,_]*|(?:\b\d+(?:\.\d*)?|\B\.\d+)(?:e[+-]?\d+)?/i,boolean:/\b(?:FALSE|NULL|TRUE)\b/,operator:/S?R?:?=>?|&&?|\*\*?|<[=>]?|>=?|[-:^/+#]|\b(?:AND|EQ|EXPT|GE|GT|LE|LT|MOD|NE|NOT|OR|XOR)\b/,function:/\b[a-z_]\w*(?=\s*\()/i,punctuation:/[()[\].,;]/};
\ No newline at end of file
diff --git a/tests/languages/iecst/address_feature.test b/tests/languages/iecst/address_feature.test
new file mode 100644
index 0000000..e556407
--- /dev/null
+++ b/tests/languages/iecst/address_feature.test
@@ -0,0 +1,24 @@
+VAR
+    varname AT %QX1.0.0: BOOL := TRUE;
+END_VAR
+
+----------------------------------------------------
+
+[
+	["keyword", "VAR"],
+
+	"\r\n    varname ",
+	["keyword", "AT"],
+	["address", "%QX1.0.0"],
+	["operator", ":"],
+	["class-name", "BOOL"],
+	["operator", ":="],
+	["boolean", "TRUE"],
+	["punctuation", ";"],
+
+	["keyword", "END_VAR"]
+]
+
+----------------------------------------------------
+
+Checks symbols.
diff --git a/tests/languages/iecst/number.test b/tests/languages/iecst/number.test
deleted file mode 100644
index 666f8fb..0000000
--- a/tests/languages/iecst/number.test
+++ /dev/null
@@ -1,13 +0,0 @@
-a := 100
-
-----------------------------------------------------
-
-[
-	"a ",
-	["operator", ":="],
-	["number", "100"]
-]
-
-----------------------------------------------------
-
-Checks number.
diff --git a/tests/languages/iecst/number_feature.test b/tests/languages/iecst/number_feature.test
new file mode 100644
index 0000000..666f8fb
--- /dev/null
+++ b/tests/languages/iecst/number_feature.test
@@ -0,0 +1,13 @@
+a := 100
+
+----------------------------------------------------
+
+[
+	"a ",
+	["operator", ":="],
+	["number", "100"]
+]
+
+----------------------------------------------------
+
+Checks number.
diff --git a/tests/languages/iecst/operator_feature.test b/tests/languages/iecst/operator_feature.test
new file mode 100644
index 0000000..68b5b74
--- /dev/null
+++ b/tests/languages/iecst/operator_feature.test
@@ -0,0 +1,54 @@
+= <> < <= > >=
++ - * / ** ^ & &&
+
+: :=
+#
+
+AND
+EQ
+EXPT
+GE
+GT
+LE
+LT
+MOD
+NE
+NOT
+OR
+XOR
+
+----------------------------------------------------
+
+[
+	["operator", "="],
+	["operator", "<>"],
+	["operator", "<"],
+	["operator", "<="],
+	["operator", ">"],
+	["operator", ">="],
+
+	["operator", "+"],
+	["operator", "-"],
+	["operator", "*"],
+	["operator", "/"],
+	["operator", "**"],
+	["operator", "^"],
+	["operator", "&"],
+	["operator", "&&"],
+
+	["operator", ":"], ["operator", ":="],
+	["operator", "#"],
+
+	["operator", "AND"],
+	["operator", "EQ"],
+	["operator", "EXPT"],
+	["operator", "GE"],
+	["operator", "GT"],
+	["operator", "LE"],
+	["operator", "LT"],
+	["operator", "MOD"],
+	["operator", "NE"],
+	["operator", "NOT"],
+	["operator", "OR"],
+	["operator", "XOR"]
+]
diff --git a/tests/languages/iecst/punctuation_feature.test b/tests/languages/iecst/punctuation_feature.test
new file mode 100644
index 0000000..b1400d0
--- /dev/null
+++ b/tests/languages/iecst/punctuation_feature.test
@@ -0,0 +1,15 @@
+( ) [ ]
+, ; .
+
+----------------------------------------------------
+
+[
+	["punctuation", "("],
+	["punctuation", ")"],
+	["punctuation", "["],
+	["punctuation", "]"],
+
+	["punctuation", ","],
+	["punctuation", ";"],
+	["punctuation", "."]
+]
diff --git a/tests/languages/iecst/symbol.test b/tests/languages/iecst/symbol.test
deleted file mode 100644
index a55153d..0000000
--- a/tests/languages/iecst/symbol.test
+++ /dev/null
@@ -1,24 +0,0 @@
-VAR
-    varname AT %QX1.0.0: BOOL := TRUE;
-END_VAR
-
-----------------------------------------------------
-
-[
-	["class-name", "VAR"],
-
-	"\r\n    varname ",
-	["keyword", "AT"],
-	["symbol", "%QX1.0.0"],
-	["operator", ":"],
-	["variable", "BOOL"],
-	["operator", ":="],
-	["boolean", "TRUE"],
-	["punctuation", ";"],
-
-	["class-name", "END_VAR"]
-]
-
-----------------------------------------------------
-
-Checks symbols.