Fixed regexes further, documented lookbehind feature
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196
diff --git a/components/prism-core.js b/components/prism-core.js
index 5d684a9..21134f9 100644
--- a/components/prism-core.js
+++ b/components/prism-core.js
@@ -17,7 +17,7 @@ var _ = self.Prism = {
var elements = document.querySelectorAll('pre.prism, pre.prism > code, code.prism');
for (var i=0, element; element = elements[i++];) {
- if(/pre/i.test(element.nodeName) && element.children.length > 0) {
+ if (/pre/i.test(element.nodeName) && element.children.length > 0) {
continue;
}
@@ -98,7 +98,7 @@ var _ = self.Prism = {
var pattern = tokens[token],
inside = pattern.inside,
- lookbehind = pattern.lookbehind || 0;
+ lookbehind = !!pattern.lookbehind || 0;
pattern = pattern.pattern || pattern;
@@ -120,7 +120,11 @@ var _ = self.Prism = {
var match = pattern.exec(str);
if (match) {
- var from = match.index - 1 + lookbehind;
+ if(lookbehind) {
+ lookbehind = match[1].length;
+ }
+
+ var from = match.index - 1 + lookbehind,
match = match[0].slice(lookbehind),
len = match.length,
to = from + len,
diff --git a/components/prism-javascript.js b/components/prism-javascript.js
index 1d2589a..f84b281 100644
--- a/components/prism-javascript.js
+++ b/components/prism-javascript.js
@@ -1,16 +1,16 @@
Prism.languages.javascript = {
'comment': /\/\*[\w\W]*?\*\//g,
'regex': {
- pattern: /[^/]\/(\\?.)+?\/[gim]{0,3}/g,
- lookbehind: 1
+ pattern: /(^|[^/])\/(?!\/)(\[.+?]|\\.|[^/\r\n])+\/[gim]{0,3}/g,
+ lookbehind: true
},
'line-comment': /\/\/.*?(\r?\n|$)/g,
'string': /("|')(\\?.)*?\1/g,
'keyword': /\b(var|let|if|else|while|do|for|return|in|instanceof|function|new|with|typeof|try|catch|finally|null|break|continue)\b/g,
'boolean': /\b(true|false)\b/g,
'number': /\b-?(0x)?\d*\.?\d+\b/g,
- 'operator': /[-+]{1,2}|!|=?<|=?>|={1,2}|(&){1,2}|\|?\||\?|:|\*|\//g,
+ 'operator': /[-+]{1,2}|!|=?<|=?>|={1,2}|(&){1,2}|\|?\||\?|\*|\//g,
'ignore': /&(lt|gt|amp);/gi,
- 'punctuation': /[{}[\];(),.]/g,
+ 'punctuation': /[{}[\];(),.:]/g,
'tab': /\t/g
};
\ No newline at end of file
diff --git a/examples.html b/examples.html
index 3a638b5..7ade7a9 100644
--- a/examples.html
+++ b/examples.html
@@ -112,6 +112,10 @@ ol {}</code></pre>
}
}</code></pre>
+ <h2>Regexes</h2>
+ <pre class="prism"><code class="language-javascript">/([^/])\/(\\?.|\[.+?])+?\/[gim]{0,3}/g</code></pre>
+ <pre class="prism"><code class="language-javascript">/\/\*[\w\W]*?\*\//g</code></pre>
+
<h2>Single line comments & regexes</h2>
<pre class="prism"><code class="language-javascript">// http://lea.verou.me
var comment = /\/\*[\w\W]*?\*\//g;</code></pre>
@@ -119,6 +123,9 @@ var comment = /\/\*[\w\W]*?\*\//g;</code></pre>
<h2>Link in comment</h2>
<pre class="prism"><code class="language-javascript">// http://lea.verou.me
/* http://lea.verou.me */</code></pre>
+
+ <h2>Strings with slashes</h2>
+ <pre class="prism"><code class="language-javascript">env.content + '</' + env.tag + '>'</code></pre>
</section>
<footer><div class="wrapper">
diff --git a/index.html b/index.html
index 4487176..bfcf7ea 100644
--- a/index.html
+++ b/index.html
@@ -111,13 +111,30 @@
<section id="language-definitions">
<h1>Language definitions</h1>
- <p>Every language is defined as a set of tokens, which are expressed as regular expressions. For example, this is the language definition for JavaScript:</p>
- <pre data-src="components/prism-javascript.js"></pre>
+ <p>Every language is defined as a set of tokens, which are expressed as regular expressions. For example, this is the language definition for CSS:</p>
+ <pre data-src="components/prism-css.js"></pre>
+
+ <p>A regular expression literal is the simplest way to express a token. An alternative way, with more options, is by using an object literal. With that notation, the regular expression describing the token would be the <code>pattern</code> attribute:</p>
+ <pre class="prism"><code class="language-javascript">...
+'tokenname': {
+ pattern: /regex/
+}
+...</code></pre>
+ <p>So far the functionality is exactly the same between the short and extended notations. However, the extended notation allows for additional options:</p>
+
+ <dl>
+ <dt>inside</dt>
+ <dd>This property accepts another object literal, with tokens that are allowed to be nested in this token.
+ This makes it easier to define certain languages. However, keep in mind that they’re slower and if coded poorly, can even result in infinite recursion.
+ For an example of nested tokens, check out the Markup language definition:
+ <pre data-src="components/prism-markup.js"></pre></dd>
+
+ <dt>lookbehind</dt>
+ <dd>This option mitigates JavaScript’s lack of lookbehind. When set to <code class="prism language-javascript">true</code>, the first capturing group in the regex <code>pattern</code> is discarded when matching this token, so it effectively behaves exactly as if it was lookbehind.</dd>
+ </dl>
- <p>Each token cannot contain other tokens, so their order is significant. Although per the ECMAScript specification, objects are not required to have a specific ordering of their properties, in practice they do in every modern browser.</p>
- <p>In some cases, it’s easier to define a language when certain tokens can only be nested inside other tokens. This is allowed by using an object as the token value, instead of a regular expression. This object has a <code>pattern</code> property,
- which contains the regular expression that describes the entire token and an <code>inside</code> property that contains the tokens that can be nested inside that token. For example, the tokens for HTML are defined in that way:</p>
- <pre data-src="components/prism-markup.js"></pre>
+ <p>Unless explicitly allowed through the <code>inside</code> attribute, each token cannot contain other tokens, so their order is significant. Although per the ECMAScript specification, objects are not required to have a specific ordering of their properties, in practice they do in every modern browser.</p>
+
<p>Nested definitions can help you get around JavaScript’s lack of lookbehind assertions and usually result in simpler regular expressions. However, keep in mind that they’re slower and if coded poorly, can even result in infinite recursion.</p>
</section>
diff --git a/prism.css b/prism.css
index 5a138b2..2cc8c46 100644
--- a/prism.css
+++ b/prism.css
@@ -70,6 +70,7 @@ code.prism {
.operator {
color: #a67f59;
+ background: hsla(0,0%,100%,.5);
}
.atrule,
diff --git a/prism.js b/prism.js
index 39227a4..db76c93 100644
--- a/prism.js
+++ b/prism.js
@@ -17,7 +17,7 @@ var _ = self.Prism = {
var elements = document.querySelectorAll('pre.prism, pre.prism > code, code.prism');
for (var i=0, element; element = elements[i++];) {
- if(/pre/i.test(element.nodeName) && element.children.length > 0) {
+ if (/pre/i.test(element.nodeName) && element.children.length > 0) {
continue;
}
@@ -98,7 +98,7 @@ var _ = self.Prism = {
var pattern = tokens[token],
inside = pattern.inside,
- lookbehind = pattern.lookbehind || 0;
+ lookbehind = !!pattern.lookbehind || 0;
pattern = pattern.pattern || pattern;
@@ -120,7 +120,11 @@ var _ = self.Prism = {
var match = pattern.exec(str);
if (match) {
- var from = match.index - 1 + lookbehind;
+ if(lookbehind) {
+ lookbehind = match[1].length;
+ }
+
+ var from = match.index - 1 + lookbehind,
match = match[0].slice(lookbehind),
len = match.length,
to = from + len,
@@ -260,17 +264,17 @@ Prism.languages.css = {
Prism.languages.javascript = {
'comment': /\/\*[\w\W]*?\*\//g,
'regex': {
- pattern: /[^/]\/(\\?.)+?\/[gim]{0,3}/g,
- lookbehind: 1
+ pattern: /(^|[^/])\/(?!\/)(\[.+?]|\\.|[^/\r\n])+\/[gim]{0,3}/g,
+ lookbehind: true
},
'line-comment': /\/\/.*?(\r?\n|$)/g,
'string': /("|')(\\?.)*?\1/g,
'keyword': /\b(var|let|if|else|while|do|for|return|in|instanceof|function|new|with|typeof|try|catch|finally|null|break|continue)\b/g,
'boolean': /\b(true|false)\b/g,
'number': /\b-?(0x)?\d*\.?\d+\b/g,
- 'operator': /[-+]{1,2}|!|=?<|=?>|={1,2}|(&){1,2}|\|?\||\?|:|\*|\//g,
+ 'operator': /[-+]{1,2}|!|=?<|=?>|={1,2}|(&){1,2}|\|?\||\?|\*|\//g,
'ignore': /&(lt|gt|amp);/gi,
- 'punctuation': /[{}[\];(),.]/g,
+ 'punctuation': /[{}[\];(),.:]/g,
'tab': /\t/g
};