Commit 65957f5369eba29a66ece08733cfb59be1858d2e

Martin Mitas 2024-01-19T10:37:33

Limit number of table columns to prevent explosion of output... with the input pattern in the form of geneated by this one-liner: $ python3 -c 'N=1000; print("x|" * N + "\n" + "-|" * N + "\n" + "x\n" * N)' Here the amount of HTML otput grows with N^2.

diff --git a/src/md4c.c b/src/md4c.c
index 47b392c..17709a0 100644
--- a/src/md4c.c
+++ b/src/md4c.c
@@ -118,6 +118,22 @@
 #define MD_UNUSED(x)                ((void)x)
 
 
+/******************************
+ ***  Some internal limits  ***
+ ******************************/
+
+/* We limit code span marks to lower than 32 backticks. This solves the
+ * pathologic case of too many openers, each of different length: Their
+ * resolving would be then O(n^2). */
+#define CODESPAN_MARK_MAXLEN    32
+
+/* We limit column count of tables to prevent quadratic explosion of output
+ * from pathological input of a table thousands of columns and thousands
+ * of rows where rows are requested with as little as single character
+ * per-line, relying on us to "helpfully" fill all the missing "<td></td>". */
+#define TABLE_MAXCOLCOUNT       128
+
+
 /************************
  ***  Internal Types  ***
  ************************/
@@ -2724,11 +2740,6 @@ md_build_mark_char_map(MD_CTX* ctx)
     }
 }
 
-/* We limit code span marks to lower than 32 backticks. This solves the
- * pathologic case of too many openers, each of different length: Their
- * resolving would be then O(n^2). */
-#define CODESPAN_MARK_MAXLEN    32
-
 static int
 md_is_code_span(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg,
                 MD_MARK* opener, MD_MARK* closer,
@@ -5303,6 +5314,10 @@ md_is_table_underline(MD_CTX* ctx, OFF beg, OFF* p_end, unsigned* p_col_count)
             off++;
 
         col_count++;
+        if(col_count > TABLE_MAXCOLCOUNT) {
+            MD_LOG("Suppressing table (column_count >" STRINGIZE(TABLE_MAXCOLCOUNT) ")");
+            return FALSE;
+        }
 
         /* Pipe delimiter (optional at the end of line). */
         while(off < ctx->size  &&  ISWHITESPACE(off))
diff --git a/test/pathological-tests.py b/test/pathological-tests.py
index 4ec175a..d72453d 100644
--- a/test/pathological-tests.py
+++ b/test/pathological-tests.py
@@ -95,7 +95,11 @@ pathological = {
     "many broken permissive autolinks":
             (("www._" * 50000 + "x"),
             re.compile("<p>(www._){50000}x</p>"),
-            "--fpermissive-www-autolinks")
+            "--fpermissive-www-autolinks"),
+    "huge table":
+            (("th|" * 10000 + "\n" + "-|" * 10000 + "\n" + "td\n" * 10000),
+            re.compile(""),
+            "--ftables")
 }
 
 whitespace_re = re.compile('/s+/')