md_analyze_line: Fix O(n^2) in thematic break handling. Fixes #66.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 23b521a..6524a72 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,7 +9,10 @@ Fixes:
* Fixed some quadratic behaviors:
[#58](https://github.com/mity/md4c/issues/58),
[#59](https://github.com/mity/md4c/issues/59),
- [#60](https://github.com/mity/md4c/issues/60)
+ [#60](https://github.com/mity/md4c/issues/60),
+ [#66](https://github.com/mity/md4c/issues/66)
+
+ Thanks to Anders Kaseorg for finding all those issues.
* [#61](https://github.com/mity/md4c/issues/59): Flag `MD_FLAG_NOHTMLSPANS`
erroneously affected also recognition of CommonMark autolinks.
diff --git a/md4c/md4c.c b/md4c/md4c.c
index fc5a72a..d1ad2ea 100644
--- a/md4c/md4c.c
+++ b/md4c/md4c.c
@@ -4799,7 +4799,7 @@ abort:
***********************/
static int
-md_is_hr_line(MD_CTX* ctx, OFF beg, OFF* p_end)
+md_is_hr_line(MD_CTX* ctx, OFF beg, OFF* p_end, OFF* p_killer)
{
OFF off = beg + 1;
int n = 1;
@@ -4810,12 +4810,16 @@ md_is_hr_line(MD_CTX* ctx, OFF beg, OFF* p_end)
off++;
}
- if(n < 3)
+ if(n < 3) {
+ *p_killer = off;
return FALSE;
+ }
/* Nothing else can be present on the line. */
- if(off < ctx->size && !ISNEWLINE(off))
+ if(off < ctx->size && !ISNEWLINE(off)) {
+ *p_killer = off;
return FALSE;
+ }
*p_end = off;
return TRUE;
@@ -5395,6 +5399,7 @@ md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end,
MD_CONTAINER container = { 0 };
int prev_line_has_list_loosening_effect = ctx->last_line_has_list_loosening_effect;
OFF off = beg;
+ OFF hr_killer = 0;
int ret = 0;
line->indent = md_line_indentation(ctx, total_indent, off, &off);
@@ -5570,8 +5575,8 @@ redo:
}
/* Check for thematic break line. */
- if(line->indent < ctx->code_indent_offset && ISANYOF(off, _T("-_*"))) {
- if(md_is_hr_line(ctx, off, &off)) {
+ if(line->indent < ctx->code_indent_offset && ISANYOF(off, _T("-_*")) && off >= hr_killer) {
+ if(md_is_hr_line(ctx, off, &off, &hr_killer)) {
line->type = MD_LINE_HR;
goto done;
}
diff --git a/test/pathological_tests.py b/test/pathological_tests.py
index 2d7c700..71fcaff 100755
--- a/test/pathological_tests.py
+++ b/test/pathological_tests.py
@@ -77,7 +77,10 @@ pathological = {
re.compile("(``){50000}")),
"many broken link titles":
(("[ (](" * 50000),
- re.compile("(\[ \(\]\(){50000}"))
+ re.compile("(\[ \(\]\(){50000}")),
+ "broken thematic break":
+ (("* " * 50000 + "a"),
+ re.compile("<ul>\r?\n(<li><ul>\r?\n){49999}<li>a</li>\r?\n</ul>\r?\n(</li>\r?\n</ul>\r?\n){49999}"))
}
whitespace_re = re.compile('/s+/')