Commit 7b34e65d73134c8824a5f37151159709e054a0eb

Michael Schmidt 2021-11-22T13:12:41

Concurnas: Improved tokenization (#3189)

diff --git a/components/prism-concurnas.js b/components/prism-concurnas.js
index 499355c..476d0b4 100644
--- a/components/prism-concurnas.js
+++ b/components/prism-concurnas.js
@@ -1,19 +1,20 @@
 Prism.languages.concurnas = {
-	'comment': [
-		{
-			pattern: /(^|[^\\])\/\*[\s\S]*?(?:\*\/|$)/,
-			lookbehind: true
-		},
-		{
-			pattern: /(^|[^\\:])\/\/.*/,
-			lookbehind: true,
-			greedy: true
-		}
-	],
+	'comment': {
+		pattern: /(^|[^\\])(?:\/\*[\s\S]*?(?:\*\/|$)|\/\/.*)/,
+		lookbehind: true,
+		greedy: true
+	},
 	'langext': {
 		pattern: /\b\w+\s*\|\|[\s\S]+?\|\|/,
 		greedy: true,
-		alias: 'string'
+		inside: {
+			'class-name': /^\w+/,
+			'string': {
+				pattern: /(^\s*\|\|)[\s\S]+(?=\|\|$)/,
+				lookbehind: true
+			},
+			'punctuation': /\|\|/
+		}
 	},
 	'function': {
 		pattern: /((?:^|\s)def[ \t]+)[a-zA-Z_]\w*(?=\s*\()/,
@@ -23,7 +24,7 @@ Prism.languages.concurnas = {
 	'boolean': /\b(?:false|true)\b/,
 	'number': /\b0b[01][01_]*L?\b|\b0x(?:[\da-f_]*\.)?[\da-f_p+-]+\b|(?:\b\d[\d_]*(?:\.[\d_]*)?|\B\.\d[\d_]*)(?:e[+-]?\d[\d_]*)?[dfls]?/i,
 	'punctuation': /[{}[\];(),.:]/,
-	'operator': /<==|>==|=>|->|<-|<>|\^|&==|&<>|!|\?:?|\.\?|\+\+|--|[-+*/=<>]=?|\b(?:and|as|band|bor|bxor|comp|is|isnot|mod|or)\b=?/,
+	'operator': /<==|>==|=>|->|<-|<>|&==|&<>|\?:?|\.\?|\+\+|--|[-+*/=<>]=?|[!^~]|\b(?:and|as|band|bor|bxor|comp|is|isnot|mod|or)\b=?/,
 	'annotation': {
 		pattern: /@(?:\w+:)?(?:\w+|\[[^\]]+\])?/,
 		alias: 'builtin'
@@ -31,8 +32,20 @@ Prism.languages.concurnas = {
 };
 
 Prism.languages.insertBefore('concurnas', 'langext', {
-	'string': {
-		pattern: /[rs]?("|')(?:\\.|(?!\1)[^\\\r\n])*\1/,
+	'regex-literal': {
+		pattern: /\br("|')(?:\\.|(?!\1)[^\\\r\n])*\1/,
+		greedy: true,
+		inside: {
+			'interpolation': {
+				pattern: /((?:^|[^\\])(?:\\{2})*)\{(?:[^{}]|\{(?:[^{}]|\{[^}]*\})*\})+\}/,
+				lookbehind: true,
+				inside: Prism.languages.concurnas
+			},
+			'regex': /[\s\S]+/
+		}
+	},
+	'string-literal': {
+		pattern: /(?:\B|\bs)("|')(?:\\.|(?!\1)[^\\\r\n])*\1/,
 		greedy: true,
 		inside: {
 			'interpolation': {
diff --git a/components/prism-concurnas.min.js b/components/prism-concurnas.min.js
index fcce1fc..60294b7 100644
--- a/components/prism-concurnas.min.js
+++ b/components/prism-concurnas.min.js
@@ -1 +1 @@
-Prism.languages.concurnas={comment:[{pattern:/(^|[^\\])\/\*[\s\S]*?(?:\*\/|$)/,lookbehind:!0},{pattern:/(^|[^\\:])\/\/.*/,lookbehind:!0,greedy:!0}],langext:{pattern:/\b\w+\s*\|\|[\s\S]+?\|\|/,greedy:!0,alias:"string"},function:{pattern:/((?:^|\s)def[ \t]+)[a-zA-Z_]\w*(?=\s*\()/,lookbehind:!0},keyword:/\b(?:abstract|actor|also|annotation|assert|async|await|bool|boolean|break|byte|case|catch|changed|char|class|closed|constant|continue|def|default|del|double|elif|else|enum|every|extends|false|finally|float|for|from|global|gpudef|gpukernel|if|import|in|init|inject|int|lambda|local|long|loop|match|new|nodefault|null|of|onchange|open|out|override|package|parfor|parforsync|post|pre|private|protected|provide|provider|public|return|shared|short|single|size_t|sizeof|super|sync|this|throw|trait|trans|transient|true|try|typedef|unchecked|using|val|var|void|while|with)\b/,boolean:/\b(?:false|true)\b/,number:/\b0b[01][01_]*L?\b|\b0x(?:[\da-f_]*\.)?[\da-f_p+-]+\b|(?:\b\d[\d_]*(?:\.[\d_]*)?|\B\.\d[\d_]*)(?:e[+-]?\d[\d_]*)?[dfls]?/i,punctuation:/[{}[\];(),.:]/,operator:/<==|>==|=>|->|<-|<>|\^|&==|&<>|!|\?:?|\.\?|\+\+|--|[-+*/=<>]=?|\b(?:and|as|band|bor|bxor|comp|is|isnot|mod|or)\b=?/,annotation:{pattern:/@(?:\w+:)?(?:\w+|\[[^\]]+\])?/,alias:"builtin"}},Prism.languages.insertBefore("concurnas","langext",{string:{pattern:/[rs]?("|')(?:\\.|(?!\1)[^\\\r\n])*\1/,greedy:!0,inside:{interpolation:{pattern:/((?:^|[^\\])(?:\\{2})*)\{(?:[^{}]|\{(?:[^{}]|\{[^}]*\})*\})+\}/,lookbehind:!0,inside:Prism.languages.concurnas},string:/[\s\S]+/}}}),Prism.languages.conc=Prism.languages.concurnas;
\ No newline at end of file
+Prism.languages.concurnas={comment:{pattern:/(^|[^\\])(?:\/\*[\s\S]*?(?:\*\/|$)|\/\/.*)/,lookbehind:!0,greedy:!0},langext:{pattern:/\b\w+\s*\|\|[\s\S]+?\|\|/,greedy:!0,inside:{"class-name":/^\w+/,string:{pattern:/(^\s*\|\|)[\s\S]+(?=\|\|$)/,lookbehind:!0},punctuation:/\|\|/}},function:{pattern:/((?:^|\s)def[ \t]+)[a-zA-Z_]\w*(?=\s*\()/,lookbehind:!0},keyword:/\b(?:abstract|actor|also|annotation|assert|async|await|bool|boolean|break|byte|case|catch|changed|char|class|closed|constant|continue|def|default|del|double|elif|else|enum|every|extends|false|finally|float|for|from|global|gpudef|gpukernel|if|import|in|init|inject|int|lambda|local|long|loop|match|new|nodefault|null|of|onchange|open|out|override|package|parfor|parforsync|post|pre|private|protected|provide|provider|public|return|shared|short|single|size_t|sizeof|super|sync|this|throw|trait|trans|transient|true|try|typedef|unchecked|using|val|var|void|while|with)\b/,boolean:/\b(?:false|true)\b/,number:/\b0b[01][01_]*L?\b|\b0x(?:[\da-f_]*\.)?[\da-f_p+-]+\b|(?:\b\d[\d_]*(?:\.[\d_]*)?|\B\.\d[\d_]*)(?:e[+-]?\d[\d_]*)?[dfls]?/i,punctuation:/[{}[\];(),.:]/,operator:/<==|>==|=>|->|<-|<>|&==|&<>|\?:?|\.\?|\+\+|--|[-+*/=<>]=?|[!^~]|\b(?:and|as|band|bor|bxor|comp|is|isnot|mod|or)\b=?/,annotation:{pattern:/@(?:\w+:)?(?:\w+|\[[^\]]+\])?/,alias:"builtin"}},Prism.languages.insertBefore("concurnas","langext",{"regex-literal":{pattern:/\br("|')(?:\\.|(?!\1)[^\\\r\n])*\1/,greedy:!0,inside:{interpolation:{pattern:/((?:^|[^\\])(?:\\{2})*)\{(?:[^{}]|\{(?:[^{}]|\{[^}]*\})*\})+\}/,lookbehind:!0,inside:Prism.languages.concurnas},regex:/[\s\S]+/}},"string-literal":{pattern:/(?:\B|\bs)("|')(?:\\.|(?!\1)[^\\\r\n])*\1/,greedy:!0,inside:{interpolation:{pattern:/((?:^|[^\\])(?:\\{2})*)\{(?:[^{}]|\{(?:[^{}]|\{[^}]*\})*\})+\}/,lookbehind:!0,inside:Prism.languages.concurnas},string:/[\s\S]+/}}}),Prism.languages.conc=Prism.languages.concurnas;
\ No newline at end of file
diff --git a/tests/languages/concurnas/comment_feature.test b/tests/languages/concurnas/comment_feature.test
new file mode 100644
index 0000000..cbcafa7
--- /dev/null
+++ b/tests/languages/concurnas/comment_feature.test
@@ -0,0 +1,11 @@
+// comment
+/*
+comment
+*/
+
+----------------------------------------------------
+
+[
+	["comment", "// comment"],
+	["comment", "/*\r\ncomment\r\n*/"]
+]
diff --git a/tests/languages/concurnas/function_feature.test b/tests/languages/concurnas/function_feature.test
index 32611b7..9f9905a 100644
--- a/tests/languages/concurnas/function_feature.test
+++ b/tests/languages/concurnas/function_feature.test
@@ -4,10 +4,18 @@ myfunc()
 ----------------------------------------------------
 
 [
-	["keyword", "def"], ["function", "myfunc"], ["punctuation", "("], ["punctuation", ")"], ["operator", "=>"], ["number", "12"],
-	"\r\nmyfunc" , ["punctuation", "("], ["punctuation", ")"]
+	["keyword", "def"],
+	["function", "myfunc"],
+	["punctuation", "("],
+	["punctuation", ")"],
+	["operator", "=>"],
+	["number", "12"],
+
+	"\r\nmyfunc",
+	["punctuation", "("],
+	["punctuation", ")"]
 ]
 
 ----------------------------------------------------
 
-Checks for functions.
\ No newline at end of file
+Checks for functions.
diff --git a/tests/languages/concurnas/langext_feature.test b/tests/languages/concurnas/langext_feature.test
new file mode 100644
index 0000000..3460688
--- /dev/null
+++ b/tests/languages/concurnas/langext_feature.test
@@ -0,0 +1,23 @@
+myAPL || x[⍋x←6?40] ||
+SimpleLisp||(+ 1 2 (* 3 3 ) )||
+
+ || invalid ||
+
+----------------------------------------------------
+
+[
+	["langext", [
+		["class-name", "myAPL"],
+		["punctuation", "||"],
+		["string", " x[⍋x←6?40] "],
+		["punctuation", "||"]
+	]],
+	["langext", [
+		["class-name", "SimpleLisp"],
+		["punctuation", "||"],
+		["string", "(+ 1 2 (* 3 3 ) )"],
+		["punctuation", "||"]
+	]],
+
+	"\r\n\r\n || invalid ||"
+]
diff --git a/tests/languages/concurnas/operator_feature.test b/tests/languages/concurnas/operator_feature.test
index 4854a85..f19761e 100644
--- a/tests/languages/concurnas/operator_feature.test
+++ b/tests/languages/concurnas/operator_feature.test
@@ -10,7 +10,7 @@ mod mod=
 < <== > >==
 and or
 band bor bxor
-^
+^ ~
 
 ----------------------------------------------------
 
@@ -18,34 +18,47 @@ band bor bxor
 	["operator", "+"],
 	["operator", "++"],
 	["operator", "+="],
+
 	["operator", "-"],
 	["operator", "--"],
 	["operator", "-="],
+
 	["operator", "="],
 	["operator", "=="],
 	["operator", "<>"],
+
 	["operator", "&=="],
 	["operator", "&<>"],
+
 	["operator", "isnot"],
+
 	["operator", "is"],
 	["operator", "as"],
+
 	["operator", "comp"],
+
 	["operator", "/"],
 	["operator", "/="],
 	["operator", "*"],
 	["operator", "*="],
+
 	["operator", "mod"],
 	["operator", "mod="],
+
 	["operator", "<"],
 	["operator", "<=="],
 	["operator", ">"],
 	["operator", ">=="],
+
 	["operator", "and"],
 	["operator", "or"],
+
 	["operator", "band"],
 	["operator", "bor"],
 	["operator", "bxor"],
-	["operator", "^"]
+
+	["operator", "^"],
+	["operator", "~"]
 ]
 
 ----------------------------------------------------
diff --git a/tests/languages/concurnas/regex_feature.test b/tests/languages/concurnas/regex_feature.test
new file mode 100644
index 0000000..5615bdc
--- /dev/null
+++ b/tests/languages/concurnas/regex_feature.test
@@ -0,0 +1,13 @@
+r'say'
+r"hello"
+
+----------------------------------------------------
+
+[
+	["regex-literal", [
+		["regex", "r'say'"]
+	]],
+	["regex-literal", [
+		["regex", "r\"hello\""]
+	]]
+]
diff --git a/tests/languages/concurnas/string_feature.test b/tests/languages/concurnas/string_feature.test
index a9930c8..ee0c237 100644
--- a/tests/languages/concurnas/string_feature.test
+++ b/tests/languages/concurnas/string_feature.test
@@ -1,19 +1,15 @@
 "hi"
 "addition result: {1+2}"
 'hi'
-r'say'
-r"hello"
 'contains: "'
-myAPL || x[⍋x←6?40] ||
- || invalid ||
 
 ----------------------------------------------------
 
 [
-	["string", [
+	["string-literal", [
 		["string", "\"hi\""]
 	]],
-	["string", [
+	["string-literal", [
 		["string", "\"addition result: "],
 		["interpolation", [
 			["punctuation", "{"],
@@ -24,20 +20,12 @@ myAPL || x[⍋x←6?40] ||
 		]],
 		["string", "\""]
 	]],
-	["string", [
+	["string-literal", [
 		["string", "'hi'"]
 	]],
-	["string", [
-		["string", "r'say'"]
-	]],
-	["string", [
-		["string", "r\"hello\""]
-	]],
-	["string", [
+	["string-literal", [
 		["string", "'contains: \"'"]
-	]],
-	["langext", "myAPL || x[⍋x←6?40] ||"],
-	"\r\n || invalid ||"
+	]]
 ]
 
 ----------------------------------------------------