Commit 0a00d7f728e6d7fe4b1e0873e3cccac8300b9a83

Andreas Rohner 2017-02-10T17:10:19

Fix the PHP language This patch is an attempt to fix the PHP language, when it is combined with markup. The problem is, that markup has a higher priority than all other tokens. This leads to weird errors, where HTML tags are highlighted inside of comments #197. One solution to this was to set the `greedy` flag for the comment token, but this leads to far worse errors like #1097. This patch should fix both issues #197 and #1097, by switching the grammar to markup on the fly. One potential problem is, that it relies on the `<?php` tag to detect if markup is present. So if a PHP file contains only markup and no PHP code at all, the result will look broken.

diff --git a/components/prism-php.js b/components/prism-php.js
index b135fbf..240ea11 100644
--- a/components/prism-php.js
+++ b/components/prism-php.js
@@ -16,8 +16,7 @@ Prism.languages.php = Prism.languages.extend('clike', {
 	'constant': /\b[A-Z0-9_]{2,}\b/,
 	'comment': {
 		pattern: /(^|[^\\])(?:\/\*[\s\S]*?\*\/|\/\/.*)/,
-		lookbehind: true,
-		greedy: true
+		lookbehind: true
 	}
 });
 
@@ -32,7 +31,10 @@ Prism.languages.insertBefore('php', 'class-name', {
 });
 
 Prism.languages.insertBefore('php', 'keyword', {
-	'delimiter': /\?>|<\?(?:php)?/i,
+	'delimiter': {
+		pattern: /\?>|<\?(?:php)?/i,
+		alias: 'important'
+	},
 	'variable': /\$\w+\b/i,
 	'package': {
 		pattern: /(\\|namespace\s+|use\s+)[\w\\]+/,
@@ -51,29 +53,32 @@ Prism.languages.insertBefore('php', 'operator', {
 	}
 });
 
-// Add HTML support of the markup language exists
+// Add HTML support if the markup language exists
 if (Prism.languages.markup) {
 
 	// Tokenize all inline PHP blocks that are wrapped in <?php ?>
 	// This allows for easy PHP + markup highlighting
 	Prism.hooks.add('before-highlight', function(env) {
-		if (env.language !== 'php') {
+		if (env.language !== 'php' || !/(?:<\?php|<\?)/ig.test(env.code)) {
 			return;
 		}
 
 		env.tokenStack = [];
 
 		env.backupCode = env.code;
-		env.code = env.code.replace(/(?:<\?php|<\?)[\s\S]*?(?:\?>)/ig, function(match) {
+		env.code = env.code.replace(/(?:<\?php|<\?)[\s\S]*?(?:\?>|$)/ig, function(match) {
 			env.tokenStack.push(match);
 
 			return '{{{PHP' + env.tokenStack.length + '}}}';
 		});
+
+		// Switch the grammar to markup
+		env.grammar = Prism.languages.markup;
 	});
 
 	// Restore env.code for other plugins (e.g. line-numbers)
 	Prism.hooks.add('before-insert', function(env) {
-		if (env.language === 'php') {
+		if (env.language === 'php' && env.backupCode) {
 			env.code = env.backupCode;
 			delete env.backupCode;
 		}
@@ -81,31 +86,21 @@ if (Prism.languages.markup) {
 
 	// Re-insert the tokens after highlighting
 	Prism.hooks.add('after-highlight', function(env) {
-		if (env.language !== 'php') {
+		if (env.language !== 'php' || !env.tokenStack) {
 			return;
 		}
 
+		// Switch the grammar back
+		env.grammar = Prism.languages.php;
+
 		for (var i = 0, t; t = env.tokenStack[i]; i++) {
 			// The replace prevents $$, $&, $`, $', $n, $nn from being interpreted as special patterns
-			env.highlightedCode = env.highlightedCode.replace('{{{PHP' + (i + 1) + '}}}', Prism.highlight(t, env.grammar, 'php').replace(/\$/g, '$$$$'));
+			env.highlightedCode = env.highlightedCode.replace('{{{PHP' + (i + 1) + '}}}',
+					"<span class=\"token php\">" +
+					Prism.highlight(t, env.grammar, 'php').replace(/\$/g, '$$$$') +
+					"</span>");
 		}
 
 		env.element.innerHTML = env.highlightedCode;
 	});
-
-	// Wrap tokens in classes that are missing them
-	Prism.hooks.add('wrap', function(env) {
-		if (env.language === 'php' && env.type === 'markup') {
-			env.content = env.content.replace(/(\{\{\{PHP\d+\}\}\})/g, "<span class=\"token php\">$1</span>");
-		}
-	});
-
-	// Add the rules before all others
-	Prism.languages.insertBefore('php', 'comment', {
-		'markup': {
-			pattern: /<[^?]\/?(.*?)>/,
-			inside: Prism.languages.markup
-		},
-		'php': /\{\{\{PHP\d+\}\}\}/
-	});
 }
diff --git a/components/prism-php.min.js b/components/prism-php.min.js
index 8e795ce..bf09544 100644
--- a/components/prism-php.min.js
+++ b/components/prism-php.min.js
@@ -1 +1 @@
-Prism.languages.php=Prism.languages.extend("clike",{keyword:/\b(and|or|xor|array|as|break|case|cfunction|class|const|continue|declare|default|die|do|else|elseif|enddeclare|endfor|endforeach|endif|endswitch|endwhile|extends|for|foreach|function|include|include_once|global|if|new|return|static|switch|use|require|require_once|var|while|abstract|interface|public|implements|private|protected|parent|throw|null|echo|print|trait|namespace|final|yield|goto|instanceof|finally|try|catch)\b/i,constant:/\b[A-Z0-9_]{2,}\b/,comment:{pattern:/(^|[^\\])(?:\/\*[\s\S]*?\*\/|\/\/.*)/,lookbehind:!0,greedy:!0}}),Prism.languages.insertBefore("php","class-name",{"shell-comment":{pattern:/(^|[^\\])#.*/,lookbehind:!0,alias:"comment"}}),Prism.languages.insertBefore("php","keyword",{delimiter:/\?>|<\?(?:php)?/i,variable:/\$\w+\b/i,"package":{pattern:/(\\|namespace\s+|use\s+)[\w\\]+/,lookbehind:!0,inside:{punctuation:/\\/}}}),Prism.languages.insertBefore("php","operator",{property:{pattern:/(->)[\w]+/,lookbehind:!0}}),Prism.languages.markup&&(Prism.hooks.add("before-highlight",function(e){"php"===e.language&&(e.tokenStack=[],e.backupCode=e.code,e.code=e.code.replace(/(?:<\?php|<\?)[\s\S]*?(?:\?>)/gi,function(a){return e.tokenStack.push(a),"{{{PHP"+e.tokenStack.length+"}}}"}))}),Prism.hooks.add("before-insert",function(e){"php"===e.language&&(e.code=e.backupCode,delete e.backupCode)}),Prism.hooks.add("after-highlight",function(e){if("php"===e.language){for(var a,n=0;a=e.tokenStack[n];n++)e.highlightedCode=e.highlightedCode.replace("{{{PHP"+(n+1)+"}}}",Prism.highlight(a,e.grammar,"php").replace(/\$/g,"$$$$"));e.element.innerHTML=e.highlightedCode}}),Prism.hooks.add("wrap",function(e){"php"===e.language&&"markup"===e.type&&(e.content=e.content.replace(/(\{\{\{PHP\d+\}\}\})/g,'<span class="token php">$1</span>'))}),Prism.languages.insertBefore("php","comment",{markup:{pattern:/<[^?]\/?(.*?)>/,inside:Prism.languages.markup},php:/\{\{\{PHP\d+\}\}\}/}));
\ No newline at end of file
+Prism.languages.php=Prism.languages.extend("clike",{keyword:/\b(and|or|xor|array|as|break|case|cfunction|class|const|continue|declare|default|die|do|else|elseif|enddeclare|endfor|endforeach|endif|endswitch|endwhile|extends|for|foreach|function|include|include_once|global|if|new|return|static|switch|use|require|require_once|var|while|abstract|interface|public|implements|private|protected|parent|throw|null|echo|print|trait|namespace|final|yield|goto|instanceof|finally|try|catch)\b/i,constant:/\b[A-Z0-9_]{2,}\b/,comment:{pattern:/(^|[^\\])(?:\/\*[\s\S]*?\*\/|\/\/.*)/,lookbehind:!0}}),Prism.languages.insertBefore("php","class-name",{"shell-comment":{pattern:/(^|[^\\])#.*/,lookbehind:!0,alias:"comment"}}),Prism.languages.insertBefore("php","keyword",{delimiter:{pattern:/\?>|<\?(?:php)?/i,alias:"important"},variable:/\$\w+\b/i,"package":{pattern:/(\\|namespace\s+|use\s+)[\w\\]+/,lookbehind:!0,inside:{punctuation:/\\/}}}),Prism.languages.insertBefore("php","operator",{property:{pattern:/(->)[\w]+/,lookbehind:!0}}),Prism.languages.markup&&(Prism.hooks.add("before-highlight",function(e){"php"===e.language&&/(?:<\?php|<\?)/gi.test(e.code)&&(e.tokenStack=[],e.backupCode=e.code,e.code=e.code.replace(/(?:<\?php|<\?)[\s\S]*?(?:\?>|$)/gi,function(a){return e.tokenStack.push(a),"{{{PHP"+e.tokenStack.length+"}}}"}),e.grammar=Prism.languages.markup)}),Prism.hooks.add("before-insert",function(e){"php"===e.language&&e.backupCode&&(e.code=e.backupCode,delete e.backupCode)}),Prism.hooks.add("after-highlight",function(e){if("php"===e.language&&e.tokenStack){e.grammar=Prism.languages.php;for(var a,n=0;a=e.tokenStack[n];n++)e.highlightedCode=e.highlightedCode.replace("{{{PHP"+(n+1)+"}}}",'<span class="token php">'+Prism.highlight(a,e.grammar,"php").replace(/\$/g,"$$$$")+"</span>");e.element.innerHTML=e.highlightedCode}}));
\ No newline at end of file
diff --git a/tests/helper/test-case.js b/tests/helper/test-case.js
index 27cb5b9..c92ca11 100644
--- a/tests/helper/test-case.js
+++ b/tests/helper/test-case.js
@@ -175,7 +175,7 @@ module.exports = {
 					code: code
 				};
 				Prism.hooks.run('before-highlight', env);
-				env.highlightedCode = Prism.highlight(env.code, Prism.languages[usedLanguages.mainLanguage], usedLanguages.mainLanguage);
+				env.highlightedCode = Prism.highlight(env.code, env.grammar, env.language);
 				Prism.hooks.run('before-insert', env);
 				env.element.innerHTML = env.highlightedCode;
 				Prism.hooks.run('after-highlight', env);
diff --git a/tests/languages/markup+php/markup_feature.test b/tests/languages/markup+php/markup_feature.test
deleted file mode 100644
index b771f46..0000000
--- a/tests/languages/markup+php/markup_feature.test
+++ /dev/null
@@ -1,37 +0,0 @@
-<div class="foo"></div>
-
-----------------------------------------------------
-
-[
-	["markup", [
-		["tag", [
-			["tag", [
-				["punctuation", "<"],
-				"div"
-			]],
-			["attr-name", [
-				"class"
-			]],
-			["attr-value", [
-				["punctuation", "="],
-				["punctuation", "\""],
-				"foo",
-				["punctuation", "\""]
-			]],
-			["punctuation", ">"]
-		]]
-	]],
-	["markup", [
-		["tag", [
-			["tag", [
-				["punctuation", "</"],
-				"div"
-			]],
-			["punctuation", ">"]
-		]]
-	]]
-]
-
-----------------------------------------------------
-
-Checks for markup in PHP.
\ No newline at end of file
diff --git a/tests/languages/markup+php/php_in_markup_feature.js b/tests/languages/markup+php/php_in_markup_feature.js
index 3b94913..b20d50a 100644
--- a/tests/languages/markup+php/php_in_markup_feature.js
+++ b/tests/languages/markup+php/php_in_markup_feature.js
@@ -1,6 +1,6 @@
 module.exports = {
-	'<div><?php echo $foo; ?></div>': '<span class="token markup"><span class="token tag"><span class="token tag"><span class="token punctuation">&lt;</span>div</span><span class="token punctuation">></span></span></span><span class="token php"><span class="token delimiter">&lt;?php</span> <span class="token keyword">echo</span> <span class="token variable">$foo</span><span class="token punctuation">;</span> <span class="token delimiter">?></span></span><span class="token markup"><span class="token tag"><span class="token tag"><span class="token punctuation">&lt;/</span>div</span><span class="token punctuation">></span></span></span>',
-	'<div><? echo $foo; ?></div>': '<span class="token markup"><span class="token tag"><span class="token tag"><span class="token punctuation">&lt;</span>div</span><span class="token punctuation">></span></span></span><span class="token php"><span class="token delimiter">&lt;?</span> <span class="token keyword">echo</span> <span class="token variable">$foo</span><span class="token punctuation">;</span> <span class="token delimiter">?></span></span><span class="token markup"><span class="token tag"><span class="token tag"><span class="token punctuation">&lt;/</span>div</span><span class="token punctuation">></span></span></span>',
-	'<div class="<?php echo $foo; ?>">': '<span class="token markup"><span class="token tag"><span class="token tag"><span class="token punctuation">&lt;</span>div</span> <span class="token attr-name">class</span><span class="token attr-value"><span class="token punctuation">=</span><span class="token punctuation">"</span><span class="token php"><span class="token delimiter">&lt;?php</span> <span class="token keyword">echo</span> <span class="token variable">$foo</span><span class="token punctuation">;</span> <span class="token delimiter">?></span></span><span class="token punctuation">"</span></span><span class="token punctuation">></span></span></span>',
-	'<div class="<? echo $foo; ?>">': '<span class="token markup"><span class="token tag"><span class="token tag"><span class="token punctuation">&lt;</span>div</span> <span class="token attr-name">class</span><span class="token attr-value"><span class="token punctuation">=</span><span class="token punctuation">"</span><span class="token php"><span class="token delimiter">&lt;?</span> <span class="token keyword">echo</span> <span class="token variable">$foo</span><span class="token punctuation">;</span> <span class="token delimiter">?></span></span><span class="token punctuation">"</span></span><span class="token punctuation">></span></span></span>'
+	'<div><?php echo $foo; ?></div>': '<span class="token tag"><span class="token tag"><span class="token punctuation">&lt;</span>div</span><span class="token punctuation">></span></span><span class="token php"><span class="token delimiter important">&lt;?php</span> <span class="token keyword">echo</span> <span class="token variable">$foo</span><span class="token punctuation">;</span> <span class="token delimiter important">?></span></span><span class="token tag"><span class="token tag"><span class="token punctuation">&lt;/</span>div</span><span class="token punctuation">></span></span>',
+	'<div><? echo $foo; ?></div>': '<span class="token tag"><span class="token tag"><span class="token punctuation">&lt;</span>div</span><span class="token punctuation">></span></span><span class="token php"><span class="token delimiter important">&lt;?</span> <span class="token keyword">echo</span> <span class="token variable">$foo</span><span class="token punctuation">;</span> <span class="token delimiter important">?></span></span><span class="token tag"><span class="token tag"><span class="token punctuation">&lt;/</span>div</span><span class="token punctuation">></span></span>',
+	'<div class="<?php echo $foo; ?>">': '<span class="token tag"><span class="token tag"><span class="token punctuation">&lt;</span>div</span> <span class="token attr-name">class</span><span class="token attr-value"><span class="token punctuation">=</span><span class="token punctuation">"</span><span class="token php"><span class="token delimiter important">&lt;?php</span> <span class="token keyword">echo</span> <span class="token variable">$foo</span><span class="token punctuation">;</span> <span class="token delimiter important">?></span></span><span class="token punctuation">"</span></span><span class="token punctuation">></span></span>',
+	'<div class="<? echo $foo; ?>">': '<span class="token tag"><span class="token tag"><span class="token punctuation">&lt;</span>div</span> <span class="token attr-name">class</span><span class="token attr-value"><span class="token punctuation">=</span><span class="token punctuation">"</span><span class="token php"><span class="token delimiter important">&lt;?</span> <span class="token keyword">echo</span> <span class="token variable">$foo</span><span class="token punctuation">;</span> <span class="token delimiter important">?></span></span><span class="token punctuation">"</span></span><span class="token punctuation">></span></span>'
 };
\ No newline at end of file
diff --git a/tests/languages/php/comment_feature.test b/tests/languages/php/comment_feature.test
index c728423..0db0474 100644
--- a/tests/languages/php/comment_feature.test
+++ b/tests/languages/php/comment_feature.test
@@ -3,6 +3,7 @@
 /**/
 /* foo
 bar */
+/* <me@example.com> */
 
 ----------------------------------------------------
 
@@ -10,7 +11,8 @@ bar */
 	["comment", "//"],
 	["comment", "// foobar"],
 	["comment", "/**/"],
-	["comment", "/* foo\r\nbar */"]
+	["comment", "/* foo\r\nbar */"],
+	["comment", "/* <me@example.com> */"]
 ]
 
 ----------------------------------------------------
diff --git a/tests/languages/php/string_feature.test b/tests/languages/php/string_feature.test
new file mode 100644
index 0000000..94db8b0
--- /dev/null
+++ b/tests/languages/php/string_feature.test
@@ -0,0 +1,13 @@
+"https://example.com"
+" /* not a comment */ "
+
+----------------------------------------------------
+
+[
+	["string", "\"https://example.com\""],
+	["string", "\" /* not a comment */ \""]
+]
+
+----------------------------------------------------
+
+Checks for strings with comments.
\ No newline at end of file