Commit c51fb310580415526deb26eeb14c9947cf5d0b6d

Martin Mitas 2017-07-24T23:14:05

md_analyze_marks: Walk only required range of the marks. This changes causes that when recursing to analysis of link contents, only the marks between the link opener and closer are iterated in md_analyze_marks(). Fixes #22

diff --git a/md4c/md4c.c b/md4c/md4c.c
index f41a088..f56ba3e 100644
--- a/md4c/md4c.c
+++ b/md4c/md4c.c
@@ -3270,7 +3270,8 @@ md_analyze_bracket(MD_CTX* ctx, int mark_index)
 }
 
 /* Forward declaration. */
-static void md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF end);
+static void md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
+                                     OFF beg, OFF end, int mark_beg, int mark_end);
 
 static int
 md_resolve_links(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
@@ -3404,7 +3405,7 @@ md_resolve_links(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
                 last_img_end = closer->end;
             }
 
-            md_analyze_link_contents(ctx, lines, n_lines, opener->end, closer->beg);
+            md_analyze_link_contents(ctx, lines, n_lines, opener->end, closer->beg, opener_index+1, closer_index);
         }
 
         opener_index = next_index;
@@ -3685,11 +3686,13 @@ md_analyze_permissive_email_autolink(MD_CTX* ctx, int mark_index)
 }
 
 static void
-md_analyze_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF end, const CHAR* mark_chars)
+md_analyze_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
+                 OFF beg, OFF end, int mark_beg, int mark_end,
+                 const CHAR* mark_chars)
 {
-    int i = 0;
+    int i = mark_beg;
 
-    while(i < ctx->n_marks) {
+    while(i < mark_end) {
         MD_MARK* mark = &ctx->marks[i];
 
         /* Do not care about marks outside the given range. */
@@ -3756,13 +3759,13 @@ md_analyze_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mod
 
     /* We analyze marks in few groups to handle their precedence. */
     /* (1) Entities; code spans; autolinks; raw HTML. */
-    md_analyze_marks(ctx, lines, n_lines, beg, end, _T("&`<>"));
+    md_analyze_marks(ctx, lines, n_lines, beg, end, 0, ctx->n_marks, _T("&`<>"));
     BACKTICK_OPENERS.head = -1;
     BACKTICK_OPENERS.tail = -1;
     LOWERTHEN_OPENERS.head = -1;
     LOWERTHEN_OPENERS.tail = -1;
     /* (2) Links. */
-    md_analyze_marks(ctx, lines, n_lines, beg, end, _T("[]!"));
+    md_analyze_marks(ctx, lines, n_lines, beg, end, 0, ctx->n_marks, _T("[]!"));
     MD_CHECK(md_resolve_links(ctx, lines, n_lines));
     BRACKET_OPENERS.head = -1;
     BRACKET_OPENERS.tail = -1;
@@ -3776,10 +3779,10 @@ md_analyze_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mod
         TABLECELLBOUNDARIES.head = -1;
         TABLECELLBOUNDARIES.tail = -1;
         ctx->n_table_cell_boundaries = 0;
-        md_analyze_marks(ctx, lines, n_lines, beg, end, _T("|"));
+        md_analyze_marks(ctx, lines, n_lines, beg, end, 0, ctx->n_marks, _T("|"));
     } else {
         /* (3b) Emphasis and strong emphasis; permissive autolinks. */
-        md_analyze_link_contents(ctx, lines, n_lines, beg, end);
+        md_analyze_link_contents(ctx, lines, n_lines, beg, end, 0, ctx->n_marks);
     }
 
 abort:
@@ -3787,9 +3790,10 @@ abort:
 }
 
 static void
-md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF end)
+md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
+                         OFF beg, OFF end, int mark_beg, int mark_end)
 {
-    md_analyze_marks(ctx, lines, n_lines, beg, end, _T("*_~@:."));
+    md_analyze_marks(ctx, lines, n_lines, beg, end, mark_beg, mark_end, _T("*_~@:."));
     ASTERISK_OPENERS.head = -1;
     ASTERISK_OPENERS.tail = -1;
     UNDERSCORE_OPENERS.head = -1;
diff --git a/test/pathological_tests.py b/test/pathological_tests.py
index 6f12379..1c94b55 100644
--- a/test/pathological_tests.py
+++ b/test/pathological_tests.py
@@ -58,7 +58,10 @@ pathological = {
                   re.compile("abc\ufffd?de\ufffd?")),
     "backticks":
                  ("".join(map(lambda x: ("e" + "`" * x), range(1,10000))),
-                  re.compile("^<p>[e`]*</p>\r?\n$"))
+                  re.compile("^<p>[e`]*</p>\r?\n$")),
+    "many links":
+                 ("[t](/u) " * 50000,
+                  re.compile("(<a href=\"/u\">t</a> ?){50000}"))
     }
 
 whitespace_re = re.compile('/s+/')