Commit b4c30cd6e6a6ed1c7b355abbad7dfeda79d15605

Martin Mitas 2020-02-13T02:23:03

Improve wiki-link parsing. * Get rid of MD_LINE::total_indent. * Remove some special complicated branching for nested images: Instead we use md_rollback() the wiki-link destination span to kill _any_ marks resolved so far, including the images. * Remove any length limit from label. Only destination length is limited, regardless of whether '|' is present or not. * Move the special handling of `[[foo|]]` from md_process_inlines() into md_resolve_links(). We simply expand the closer mark to consume the `|`. * Do not modify the opener and closer marks until we really know it is indeed a wiki-link.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
diff --git a/md4c/md4c.c b/md4c/md4c.c
index 4a92b90..43de7aa 100644
--- a/md4c/md4c.c
+++ b/md4c/md4c.c
@@ -214,14 +214,12 @@ struct MD_LINE_ANALYSIS_tag {
     OFF beg;
     OFF end;
     unsigned indent;        /* Indentation level. */
-    unsigned total_indent;  /* Total indent in characters. */
 };
 
 typedef struct MD_LINE_tag MD_LINE;
 struct MD_LINE_tag {
     OFF beg;
     OFF end;
-    unsigned total_indent;  /* Total indent in characters. */
 };
 
 typedef struct MD_VERBATIMLINE_tag MD_VERBATIMLINE;
@@ -2638,7 +2636,7 @@ md_rollback(MD_CTX* ctx, int opener_index, int closer_index, int how)
             chain->head = -1;
     }
 
-    /* Go backwards so that un-resolved openers are re-added into their
+    /* Go backwards so that unresolved openers are re-added into their
      * respective chains, in the right order. */
     mark_index = closer_index - 1;
     while(mark_index > opener_index) {
@@ -3429,89 +3427,87 @@ md_resolve_links(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
             continue;
         }
 
-        /* Detect and resolve wiki links. */
+        /* Recognize and resolve wiki links.
+         * Wiki-links maybe '[[destination]]' or '[[destination|label]]'.
+         */
         if ((ctx->parser.flags & MD_FLAG_WIKILINKS) &&
-            next_opener != NULL && next_closer != NULL &&
-            (opener->end - opener->beg == 1) &&
+            (opener->end - opener->beg == 1) &&         /* not image */
+            next_opener != NULL &&                      /* double '[' opener */
+            next_opener->ch == '[' &&
             (next_opener->beg == opener->beg - 1) &&
-            (next_closer->beg == closer->beg + 1) &&
             (next_opener->end - next_opener->beg == 1) &&
-            (next_closer->end - next_closer->beg == 1) &&
-            (next_opener->ch == '[' && next_closer->ch == ']'))
+            next_closer != NULL &&                      /* double ']' closer */
+            next_closer->ch == ']' &&
+            (next_closer->beg == closer->beg + 1) &&
+            (next_closer->end - next_closer->beg == 1))
         {
-            is_link = TRUE;
+            MD_MARK* delim = NULL;
+            int delim_index;
+            OFF dest_beg, dest_end;
+            OFF off;
 
-            if (opener->end == closer->beg)
-                is_link = FALSE;
-
-            int delim_index = opener_index;
-            MD_MARK* delim = &ctx->marks[delim_index];
+            is_link = TRUE;
 
-            /* To prevent runaway O(n^2) performance, don't look too far for the delimiter (.. < 100). */
-            while(is_link && delim_index < closer_index && (delim_index - opener_index) < 100 ) {
-                if(delim->ch == '|' && delim->beg == opener->end) {
-                    is_link = FALSE;
-                } else if(delim->ch == '|' && delim->end == closer->beg) {
+            /* We don't allow destination to be longer then 100 characters.
+             * Lets scan to see whether there is '|'. (If not then the whole
+             * wiki-link has to be below the 100 characters.) */
+            delim_index = opener_index + 1;
+            while(delim_index < closer_index) {
+                if(ctx->marks[delim_index].beg - opener->end > 100)
                     break;
-                } else if(delim->ch == '|') {
-                    opener->end = delim->beg;
+                if(ctx->marks[delim_index].ch == '|') {
+                    delim = &ctx->marks[delim_index];
                     break;
                 }
                 delim_index++;
-                delim = &ctx->marks[delim_index];
             }
-
-            OFF off = closer->beg-1;
-            int count = 0;
-            int has_label = (opener->end - opener->beg > 2);
-            const MD_LINE* line;
-            int line_index = n_lines-1;
-
-            /* An image inside the link target disables the wiki link. */
-            if( (has_label && last_img_beg >= opener->beg && last_img_end <= opener->end) ||
-                (!has_label && last_img_beg >= opener->beg && last_img_end <= closer->end))
+            dest_beg = opener->end;
+            dest_end = (delim != NULL) ? delim->beg : closer->beg;
+            if(dest_end - dest_beg == 0 || dest_end - dest_beg > 100)
                 is_link = FALSE;
 
-            while(is_link && off > opener->beg && count++ < 100) {
-
-                /* Newline not allowed in link target. */
-                if(has_label && (off <= opener->end) && ISNEWLINE(off))
-                    is_link = FALSE;
-                else if(!has_label && off > opener->end && ISNEWLINE(off))
-                    is_link = FALSE;
-                else if(ISNEWLINE(off)) {
-                    line = &lines[line_index--];
-                    count = count - line->total_indent - 1;  /* Count newline too. */
+            /* There may not be any new line in the destination. */
+            if(is_link) {
+                for(off = dest_beg; off < dest_end; off++) {
+                    if(ISNEWLINE(off)) {
+                        is_link = FALSE;
+                        break;
+                    }
                 }
-
-                off--;
             }
 
-            if(off > opener->beg)
-                is_link = FALSE;
-
             if(is_link) {
-                if(delim->ch == '|')
-                    delim->flags |= MD_MARK_RESOLVED;
+                if(delim != NULL) {
+                    if(delim->end < closer->beg) {
+                        opener->end = delim->beg;
+                    } else {
+                        closer->beg = delim->beg;
+                        delim = NULL;
+                    }
+                }
 
                 opener->beg = next_opener->beg;
-                closer->end = next_closer->end;
-
                 opener->next = closer_index;
                 opener->flags |= MD_MARK_OPENER | MD_MARK_RESOLVED;
+
+                closer->end = next_closer->end;
                 closer->prev = opener_index;
                 closer->flags |= MD_MARK_CLOSER | MD_MARK_RESOLVED;
 
                 last_link_beg = opener->beg;
                 last_link_end = closer->end;
 
-                if ((opener->end - opener->beg > 2))
+                if(delim != NULL) {
+                    delim->flags |= MD_MARK_RESOLVED;
+                    md_rollback(ctx, opener_index, delim_index, MD_ROLLBACK_ALL);
                     md_analyze_link_contents(ctx, lines, n_lines, opener_index+1, closer_index);
+                } else {
+                    md_rollback(ctx, opener_index, closer_index, MD_ROLLBACK_ALL);
+                }
 
-                opener_index = next_index;
+                opener_index = next_opener->prev;
                 continue;
             }
-
         }
 
         if(next_opener != NULL  &&  next_opener->beg == closer->end) {
@@ -4194,16 +4190,13 @@ md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
 
                     if ((opener->ch == '[' && closer->ch == ']') &&
                         opener->end - opener->beg >= 2 &&
-                        closer->end - closer->beg == 2)
+                        closer->end - closer->beg >= 2)
                     {
-                        const MD_MARK* delim = opener+3;  /* Scan past the two dummy marks. */
                         int has_label = (opener->end - opener->beg > 2);
-                        int target_sz;
+                        SZ target_sz;
 
                         if(has_label)
                             target_sz = opener->end - (opener->beg+2);
-                        else if(delim->ch == '|')
-                            target_sz = (closer->beg-1) - opener->end;
                         else
                             target_sz = closer->beg - opener->end;
 
@@ -5036,7 +5029,6 @@ md_add_line_into_current_block(MD_CTX* ctx, const MD_LINE_ANALYSIS* analysis)
 
         line->beg = analysis->beg;
         line->end = analysis->end;
-        line->total_indent = analysis->total_indent;
     }
     ctx->current_block->n_lines++;
 
@@ -6043,8 +6035,6 @@ md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end,
         break;
     }
 
-    line->total_indent = total_indent;
-
     /* Scan for end of the line.
      *
      * Note this is quite a bottleneck of the parsing as we here iterate almost
diff --git a/test/wiki-links.txt b/test/wiki-links.txt
index 3210354..c8afe71 100644
--- a/test/wiki-links.txt
+++ b/test/wiki-links.txt
@@ -3,7 +3,8 @@
 
 With the flag `MD_FLAG_WIKILINKS`, MD4C recognizes wiki links.
 
-A wiki link is ...
+The simple wiki-link is a wiki-link destination enclosed in `[[` followed with
+`]]`.
 
 ```````````````````````````````` example
 [[foo]]
@@ -11,7 +12,16 @@ A wiki link is ...
 <p><x-wikilink data-target="foo">foo</x-wikilink></p>
 ````````````````````````````````
 
-A wiki link cannot be empty.
+However wiki-link may contain an explicit label, delimited from the destination
+with `|`.
+
+```````````````````````````````` example
+[[foo|bar]]
+.
+<p><x-wikilink data-target="foo">bar</x-wikilink></p>
+````````````````````````````````
+
+A wiki-link destination cannot be empty.
 
 ```````````````````````````````` example
 [[]]
@@ -19,7 +29,14 @@ A wiki link cannot be empty.
 <p>[[]]</p>
 ````````````````````````````````
 
-The link target of a wiki link cannot span more than one line:
+```````````````````````````````` example
+[[|foo]]
+.
+<p>[[|foo]]</p>
+````````````````````````````````
+
+
+The wiki-link destination cannot contain a new line.
 
 ```````````````````````````````` example
 [[foo
@@ -29,31 +46,31 @@ bar]]
 bar]]</p>
 ````````````````````````````````
 
-The link target is rendered verbatim; inline markup is not recognized.
-
 ```````````````````````````````` example
-[[*foo*]]
+[[foo
+bar|baz]]
 .
-<p><x-wikilink data-target="*foo*">*foo*</x-wikilink></p>
+<p>[[foo
+bar|baz]]</p>
 ````````````````````````````````
 
-Wiki links can have a label that is delimited by a `|`.
+The wiki-link destination is rendered verbatim; inline markup in it is not
+recognized.
 
 ```````````````````````````````` example
-[[foo|bar]]
+[[*foo*]]
 .
-<p><x-wikilink data-target="foo">bar</x-wikilink></p>
+<p><x-wikilink data-target="*foo*">*foo*</x-wikilink></p>
 ````````````````````````````````
 
-A wiki link with a label but without an id is not a wiki link.
-
 ```````````````````````````````` example
-[[|foo]]
+[[foo|![bar](bar.jpg)]]
 .
-<p>[[|foo]]</p>
+<p><x-wikilink data-target="foo"><img src="bar.jpg" alt="bar"></x-wikilink></p>
 ````````````````````````````````
 
-With multiple `|` delimiters, only the first is recognized.
+With multiple `|` delimiters, only the first one is recognized and the other
+ones are part of the label.
 
 ```````````````````````````````` example
 [[foo|bar|baz]]
@@ -61,7 +78,7 @@ With multiple `|` delimiters, only the first is recognized.
 <p><x-wikilink data-target="foo">bar|baz</x-wikilink></p>
 ````````````````````````````````
 
-The delimiter `|` can be escaped with `/`.
+However the delimiter `|` can be escaped with `/`.
 
 ```````````````````````````````` example
 [[foo\|bar|baz]]
@@ -77,17 +94,8 @@ The label can contain inline elements.
 <p><x-wikilink data-target="foo"><em>bar</em></x-wikilink></p>
 ````````````````````````````````
 
-The label can contain at most one linebreak.
-
-```````````````````````````````` example
-[[foo|*bar
-baz*]]
-.
-<p><x-wikilink data-target="foo"><em>bar
-baz</em></x-wikilink></p>
-````````````````````````````````
-
-A wiki link with an empty label is considered invalid.
+Empty explicit label is the same as using the implicit label; i.e. the verbatim
+destination string is used as the label.
 
 ```````````````````````````````` example
 [[foo|]]
@@ -107,7 +115,7 @@ bar
 baz</x-wikilink></p>
 ````````````````````````````````
 
-Wiki links are prioritized over regular links.
+Wiki-links have higher priority then links.
 
 ```````````````````````````````` example
 [[foo]](foo.jpg)
@@ -115,6 +123,14 @@ Wiki links are prioritized over regular links.
 <p><x-wikilink data-target="foo">foo</x-wikilink>(foo.jpg)</p>
 ````````````````````````````````
 
+```````````````````````````````` example
+[foo]: /url
+
+[[foo]]
+.
+<p><x-wikilink data-target="foo">foo</x-wikilink></p>
+````````````````````````````````
+
 Wiki links can be inlined in tables.
 
 ```````````````````````````````` example
@@ -138,7 +154,7 @@ Wiki links can be inlined in tables.
 </table>
 ````````````````````````````````
 
-Wiki links not prioritzed over images.
+Wiki-links are not prioritized over images.
 
 ```````````````````````````````` example
 ![[foo]](foo.jpg)
@@ -146,30 +162,7 @@ Wiki links not prioritzed over images.
 <p><img src="foo.jpg" alt="[foo]"></p>
 ````````````````````````````````
 
-An image placed inside a wiki link target disables the wiki link.
-
-```````````````````````````````` example
-[[![foo](foo.jpg)]]
-.
-<p>[[<img src="foo.jpg" alt="foo">]]</p>
-
-````````````````````````````````
-
-```````````````````````````````` example
-[[![foo](foo.jpg)|test]]
-.
-<p>[[<img src="foo.jpg" alt="foo">|test]]</p>
-````````````````````````````````
-
-But images can be placed in the wiki link label.
-
-```````````````````````````````` example
-[[foo|![bar](bar.jpg)]]
-.
-<p><x-wikilink data-target="foo"><img src="bar.jpg" alt="bar"></x-wikilink></p>
-````````````````````````````````
-
-Something that may look like a wiki link at first, but turns out not to be,
+Something that may look like a wiki-link at first, but turns out not to be,
 is recognized as a normal link.
 
 ```````````````````````````````` example
@@ -180,7 +173,8 @@ is recognized as a normal link.
 <p>[<a href="/url">foo</a></p>
 ````````````````````````````````
 
-Escaping the opening `[`, we should get a normal link.
+Escaping the opening `[` escapes only that one character, not the whole `[[`
+opener:
 
 ```````````````````````````````` example
 \[[foo]]
@@ -190,7 +184,7 @@ Escaping the opening `[`, we should get a normal link.
 <p>[<a href="/url">foo</a>]</p>
 ````````````````````````````````
 
-Like normal links, the innermost wiki link is preferred.
+Like with other inline links, the innermost wiki-link is preferred.
 
 ```````````````````````````````` example
 [[foo[[bar]]]]
@@ -198,40 +192,24 @@ Like normal links, the innermost wiki link is preferred.
 <p>[[foo<x-wikilink data-target="bar">bar</x-wikilink>]]</p>
 ````````````````````````````````
 
-
-There is limit of 100 characters inside a wiki link target (or id); 101
-characters is too much.
+There is limit of 100 characters for the wiki-link destination.
 
 ```````````````````````````````` example
 [[12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901]]
+[[12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901|foo]]
 .
-<p>[[12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901]]</p>
+<p>[[12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901]]
+[[12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901|foo]]</p>
 ````````````````````````````````
 
 100 characters inside a wiki link target works.
 
 ```````````````````````````````` example
 [[1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890]]
+[[1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890|foo]]
 .
-<p><x-wikilink data-target="1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890">1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890</x-wikilink></p>
-````````````````````````````````
-
-If a label is included, the limit is 99 characters in total, excluding the `|`
-delimiter.
-
-```````````````````````````````` example
-[[12345678901234567890123456789012345678901234567890|1234567890123456789012345678901234567890123456789]]
-.
-<p><x-wikilink data-target="12345678901234567890123456789012345678901234567890">1234567890123456789012345678901234567890123456789</x-wikilink></p>
-````````````````````````````````
-
-This example shows that 101 characters (or 100 exclusive the delimiter) is one
-too many.
-
-```````````````````````````````` example
-[[12345678901234567890123456789012345678901234567890|12345678901234567890123456789012345678901234567890]]
-.
-<p>[[12345678901234567890123456789012345678901234567890|12345678901234567890123456789012345678901234567890]]</p>
+<p><x-wikilink data-target="1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890">1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890</x-wikilink>
+<x-wikilink data-target="1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890">foo</x-wikilink></p>
 ````````````````````````````````
 
 The limit on link content does not include any characters belonging to a block