Commit a69c2b6259e80862c56f7ce13df052b6101ecac7

Michael Schmidt 2018-12-02T16:48:53

Improvements to Python F-strings and string prefixes (#1642) This PR adds support for [string interpolation](https://www.python.org/dev/peps/pep-0498/) (aka. f-strings) and makes the [string prefixes](https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals) part of the string. Resolves #1636. ### Known issues Assumes that strings inside the interpolation expression are 'nice'. So strings with unfortunate numbers of curley braces will cause incorrect highlighting: E.g.: `f"{'}'}"`.

diff --git a/components/prism-python.js b/components/prism-python.js
index b326a5c..ea1a515 100644
--- a/components/prism-python.js
+++ b/components/prism-python.js
@@ -3,13 +3,36 @@ Prism.languages.python = {
 		pattern: /(^|[^\\])#.*/,
 		lookbehind: true
 	},
+	'string-interpolation': {
+		pattern: /(?:f|rf|fr)(?:("""|''')[\s\S]+?\1|("|')(?:\\.|(?!\2)[^\\\r\n])*\2)/i,
+		greedy: true,
+		inside: {
+			'interpolation': {
+				// "{" <expression> <optional "!s", "!r", or "!a"> <optional ":" format specifier> "}"
+				pattern: /((?:^|[^{])(?:{{)*){(?!{)(?:[^{}]|{(?!{)(?:[^{}]|{(?!{)(?:[^{}])+})+})+}/,
+				lookbehind: true,
+				inside: {
+					'format-spec': {
+						pattern: /(:)[^:(){}]+(?=}$)/,
+						lookbehind: true
+					},
+					'conversion-option': {
+						pattern: /![sra](?=[:}]$)/,
+						alias: 'punctuation'
+					},
+					rest: null
+				}
+			},
+			'string': /[\s\S]+/
+		}
+	},
 	'triple-quoted-string': {
-		pattern: /("""|''')[\s\S]+?\1/,
+		pattern: /(?:[rub]|rb|br)?("""|''')[\s\S]+?\1/i,
 		greedy: true,
 		alias: 'string'
 	},
 	'string': {
-		pattern: /("|')(?:\\.|(?!\1)[^\\\r\n])*\1/,
+		pattern: /(?:[rub]|rb|br)?("|')(?:\\.|(?!\1)[^\\\r\n])*\1/i,
 		greedy: true
 	},
 	'function': {
@@ -35,3 +58,5 @@ Prism.languages.python = {
 	'operator': /[-+%=]=?|!=|\*\*?=?|\/\/?=?|<[<=>]?|>[=>]?|[&|^~]/,
 	'punctuation': /[{}[\];(),.:]/
 };
+
+Prism.languages.python['string-interpolation'].inside['interpolation'].inside.rest = Prism.languages.python;
diff --git a/components/prism-python.min.js b/components/prism-python.min.js
index 8956dae..7f9ee42 100644
--- a/components/prism-python.min.js
+++ b/components/prism-python.min.js
@@ -1 +1 @@
-Prism.languages.python={comment:{pattern:/(^|[^\\])#.*/,lookbehind:!0},"triple-quoted-string":{pattern:/("""|''')[\s\S]+?\1/,greedy:!0,alias:"string"},string:{pattern:/("|')(?:\\.|(?!\1)[^\\\r\n])*\1/,greedy:!0},"function":{pattern:/((?:^|\s)def[ \t]+)[a-zA-Z_]\w*(?=\s*\()/g,lookbehind:!0},"class-name":{pattern:/(\bclass\s+)\w+/i,lookbehind:!0},decorator:{pattern:/(^\s*)@\w+(?:\.\w+)*/i,lookbehind:!0,alias:["annotation","punctuation"],inside:{punctuation:/\./}},keyword:/\b(?:and|as|assert|async|await|break|class|continue|def|del|elif|else|except|exec|finally|for|from|global|if|import|in|is|lambda|nonlocal|not|or|pass|print|raise|return|try|while|with|yield)\b/,builtin:/\b(?:__import__|abs|all|any|apply|ascii|basestring|bin|bool|buffer|bytearray|bytes|callable|chr|classmethod|cmp|coerce|compile|complex|delattr|dict|dir|divmod|enumerate|eval|execfile|file|filter|float|format|frozenset|getattr|globals|hasattr|hash|help|hex|id|input|int|intern|isinstance|issubclass|iter|len|list|locals|long|map|max|memoryview|min|next|object|oct|open|ord|pow|property|range|raw_input|reduce|reload|repr|reversed|round|set|setattr|slice|sorted|staticmethod|str|sum|super|tuple|type|unichr|unicode|vars|xrange|zip)\b/,"boolean":/\b(?:True|False|None)\b/,number:/(?:\b(?=\d)|\B(?=\.))(?:0[bo])?(?:(?:\d|0x[\da-f])[\da-f]*\.?\d*|\.\d+)(?:e[+-]?\d+)?j?\b/i,operator:/[-+%=]=?|!=|\*\*?=?|\/\/?=?|<[<=>]?|>[=>]?|[&|^~]/,punctuation:/[{}[\];(),.:]/};
\ No newline at end of file
+Prism.languages.python={comment:{pattern:/(^|[^\\])#.*/,lookbehind:!0},"string-interpolation":{pattern:/(?:f|rf|fr)(?:("""|''')[\s\S]+?\1|("|')(?:\\.|(?!\2)[^\\\r\n])*\2)/i,greedy:!0,inside:{interpolation:{pattern:/((?:^|[^{])(?:{{)*){(?!{)(?:[^{}]|{(?!{)(?:[^{}]|{(?!{)(?:[^{}])+})+})+}/,lookbehind:!0,inside:{"format-spec":{pattern:/(:)[^:(){}]+(?=}$)/,lookbehind:!0},"conversion-option":{pattern:/![sra](?=[:}]$)/,alias:"punctuation"},rest:null}},string:/[\s\S]+/}},"triple-quoted-string":{pattern:/(?:[rub]|rb|br)?("""|''')[\s\S]+?\1/i,greedy:!0,alias:"string"},string:{pattern:/(?:[rub]|rb|br)?("|')(?:\\.|(?!\1)[^\\\r\n])*\1/i,greedy:!0},"function":{pattern:/((?:^|\s)def[ \t]+)[a-zA-Z_]\w*(?=\s*\()/g,lookbehind:!0},"class-name":{pattern:/(\bclass\s+)\w+/i,lookbehind:!0},decorator:{pattern:/(^\s*)@\w+(?:\.\w+)*/i,lookbehind:!0,alias:["annotation","punctuation"],inside:{punctuation:/\./}},keyword:/\b(?:and|as|assert|async|await|break|class|continue|def|del|elif|else|except|exec|finally|for|from|global|if|import|in|is|lambda|nonlocal|not|or|pass|print|raise|return|try|while|with|yield)\b/,builtin:/\b(?:__import__|abs|all|any|apply|ascii|basestring|bin|bool|buffer|bytearray|bytes|callable|chr|classmethod|cmp|coerce|compile|complex|delattr|dict|dir|divmod|enumerate|eval|execfile|file|filter|float|format|frozenset|getattr|globals|hasattr|hash|help|hex|id|input|int|intern|isinstance|issubclass|iter|len|list|locals|long|map|max|memoryview|min|next|object|oct|open|ord|pow|property|range|raw_input|reduce|reload|repr|reversed|round|set|setattr|slice|sorted|staticmethod|str|sum|super|tuple|type|unichr|unicode|vars|xrange|zip)\b/,"boolean":/\b(?:True|False|None)\b/,number:/(?:\b(?=\d)|\B(?=\.))(?:0[bo])?(?:(?:\d|0x[\da-f])[\da-f]*\.?\d*|\.\d+)(?:e[+-]?\d+)?j?\b/i,operator:/[-+%=]=?|!=|\*\*?=?|\/\/?=?|<[<=>]?|>[=>]?|[&|^~]/,punctuation:/[{}[\];(),.:]/},Prism.languages.python["string-interpolation"].inside.interpolation.inside.rest=Prism.languages.python;
\ No newline at end of file
diff --git a/examples/prism-python.html b/examples/prism-python.html
index ca7a397..ce6efc0 100644
--- a/examples/prism-python.html
+++ b/examples/prism-python.html
@@ -57,8 +57,5 @@ if __name__ == '__main__':
 	If a failure is listed here, it doesn’t mean it will never be fixed. This is more of a “known bugs” list, just with a certain type of bug.
 </p>
 
-<h3>Triple-quoted strings with what look like strings inside</h3>
-<pre><code>def antique(string):
-    """Replace anachronistic Latin "j" with "i"."""
-    return string.replace("j", "i").replace("J", "I")
-    </code></pre>
+<h3>Interpolation expressions containing strings with <code>{</code> or <code>}</code></h3>
+<pre><code>f"{'}'}"</code></pre>
diff --git a/tests/languages/python/string-interpolation_feature.test b/tests/languages/python/string-interpolation_feature.test
new file mode 100644
index 0000000..789cb83
--- /dev/null
+++ b/tests/languages/python/string-interpolation_feature.test
@@ -0,0 +1,147 @@
+f'The value is {value}.'
+
+f"The value is {'4'}."
+
+f'input={value!s:#06x}'
+
+f'{{{4*10}}}'
+
+fr'x={4*10}\n'
+
+f'''{x
++1}'''
+
+f'mapping is { {a:b for (a, b) in ((1, 2), (3, 4))} }'
+
+f'{(lambda x: x*2)(3)}'
+
+----------------------------------------------------
+
+[
+	["string-interpolation", [
+		["string", "f'The value is "],
+		["interpolation", [
+			["punctuation", "{"],
+			"value",
+			["punctuation", "}"]
+		]],
+		["string", ".'"]
+	]],
+
+	["string-interpolation", [
+		["string", "f\"The value is "],
+		["interpolation", [
+			["punctuation", "{"],
+			["string", "'4'"],
+			["punctuation", "}"]
+		]],
+		["string", ".\""]
+	]],
+
+	["string-interpolation", [
+		["string", "f'input="],
+		["interpolation", [
+			["punctuation", "{"],
+			"value",
+			["conversion-option", "!s"],
+			["punctuation", ":"],
+			["format-spec", "#06x"],
+			["punctuation", "}"]
+		]],
+		["string", "'"]
+	]],
+
+	["string-interpolation", [
+		["string", "f'{{"],
+		["interpolation", [
+			["punctuation", "{"],
+			["number", "4"],
+			["operator", "*"],
+			["number", "10"],
+			["punctuation", "}"]
+		]],
+		["string", "}}'"]
+	]],
+
+	["string-interpolation", [
+		["string", "fr'x="],
+		["interpolation", [
+			["punctuation", "{"],
+			["number", "4"],
+			["operator", "*"],
+			["number", "10"],
+			["punctuation", "}"]
+		]],
+		["string", "\\n'"]
+	]],
+
+	["string-interpolation", [
+		["string", "f'''"],
+		["interpolation", [
+			["punctuation", "{"],
+			"x\r\n",
+			["operator", "+"],
+			["number", "1"],
+			["punctuation", "}"]
+		]],
+		["string", "'''"]
+	]],
+
+	["string-interpolation", [
+		["string", "f'mapping is "],
+		["interpolation", [
+			["punctuation", "{"],
+			["punctuation", "{"],
+			"a",
+			["punctuation", ":"],
+			"b ",
+			["keyword", "for"],
+			["punctuation", "("],
+			"a",
+			["punctuation", ","],
+			" b",
+			["punctuation", ")"],
+			["keyword", "in"],
+			["punctuation", "("],
+			["punctuation", "("],
+			["number", "1"],
+			["punctuation", ","],
+			["number", "2"],
+			["punctuation", ")"],
+			["punctuation", ","],
+			["punctuation", "("],
+			["number", "3"],
+			["punctuation", ","],
+			["number", "4"],
+			["punctuation", ")"],
+			["punctuation", ")"],
+			["punctuation", "}"],
+			["punctuation", "}"]
+		]],
+		["string", "'"]
+	]],
+
+	["string-interpolation", [
+		["string", "f'"],
+		["interpolation", [
+			["punctuation", "{"],
+			["punctuation", "("],
+			["keyword", "lambda"],
+			" x",
+			["punctuation", ":"],
+			" x",
+			["operator", "*"],
+			["number", "2"],
+			["punctuation", ")"],
+			["punctuation", "("],
+			["number", "3"],
+			["punctuation", ")"],
+			["punctuation", "}"]
+		]],
+		["string", "'"]
+	]]
+]
+
+----------------------------------------------------
+
+Checks for string interpolation.
\ No newline at end of file
diff --git a/tests/languages/python/string_feature.test b/tests/languages/python/string_feature.test
index 9e21c4d..0bb5119 100644
--- a/tests/languages/python/string_feature.test
+++ b/tests/languages/python/string_feature.test
@@ -4,6 +4,11 @@
 'fo\'obar'
 "fo\" # comment obar"
 
+r"\n"
+b'foo'
+rb"foo\n"
+u"foo"
+
 ----------------------------------------------------
 
 [
@@ -11,7 +16,12 @@
 	["string", "\"fo\\\"obar\""],
 	["string", "''"],
 	["string", "'fo\\'obar'"],
-	["string", "\"fo\\\" # comment obar\""]
+	["string", "\"fo\\\" # comment obar\""],
+
+	["string", "r\"\\n\""],
+	["string", "b'foo'"],
+	["string", "rb\"foo\\n\""],
+	["string", "u\"foo\""]
 ]
 
 ----------------------------------------------------