Introduce an overall limit to link. ref. defs instantiations. This is to prevent time and output size explosion in case of input pattern generated by this: $ python -c 'N=1000; print("[x]: " + "x" * N + "\n[x]" * N)' We roughly allow to blowing up the input size of the document 16 times by link reference definitions or up to 1 MB, whatever is smaller. When the threshold is reached, following reference definitions are sent to output unresolved as a text. Fixes #238.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
diff --git a/src/md4c.c b/src/md4c.c
index 3038863..4a4e401 100644
--- a/src/md4c.c
+++ b/src/md4c.c
@@ -26,6 +26,7 @@
#include "md4c.h"
#include <limits.h>
+#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -143,6 +144,9 @@
#define SZ MD_SIZE
#define OFF MD_OFFSET
+#define SZ_MAX (sizeof(SZ) == 8 ? UINT64_MAX : UINT32_MAX)
+#define OFF_MAX (sizeof(OFF) == 8 ? UINT64_MAX : UINT32_MAX)
+
typedef struct MD_MARK_tag MD_MARK;
typedef struct MD_BLOCK_tag MD_BLOCK;
typedef struct MD_CONTAINER_tag MD_CONTAINER;
@@ -180,6 +184,7 @@ struct MD_CTX_tag {
int alloc_ref_defs;
void** ref_def_hashtable;
int ref_def_hashtable_size;
+ SZ max_ref_def_output;
/* Stack of inline/span markers.
* This is only used for parsing a single block contents but by storing it
@@ -2283,11 +2288,14 @@ md_is_link_reference(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines,
int is_multiline;
CHAR* label;
SZ label_size;
- int ret;
+ int ret = FALSE;
MD_ASSERT(CH(beg) == _T('[') || CH(beg) == _T('!'));
MD_ASSERT(CH(end-1) == _T(']'));
+ if(ctx->max_ref_def_output == 0)
+ return FALSE;
+
beg += (CH(beg) == _T('!') ? 2 : 1);
end--;
@@ -2315,7 +2323,17 @@ md_is_link_reference(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines,
if(is_multiline)
free(label);
- ret = (def != NULL);
+ if(def != NULL) {
+ /* See https://github.com/mity/md4c/issues/238 */
+ MD_SIZE output_size_estimation = def->label_size + def->title_size + def->dest_end - def->dest_beg;
+ if(output_size_estimation < ctx->max_ref_def_output) {
+ ctx->max_ref_def_output -= output_size_estimation;
+ ret = TRUE;
+ } else {
+ MD_LOG("Too many link reference definition instantiations.");
+ ctx->max_ref_def_output = 0;
+ }
+ }
abort:
return ret;
@@ -6470,6 +6488,7 @@ md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* userd
ctx.code_indent_offset = (ctx.parser.flags & MD_FLAG_NOINDENTEDCODEBLOCKS) ? (OFF)(-1) : 4;
md_build_mark_char_map(&ctx);
ctx.doc_ends_with_newline = (size > 0 && ISNEWLINE_(text[size-1]));
+ ctx.max_ref_def_output = MIN(MIN(16 * (uint64_t)size, (uint64_t)(1024 * 1024)), (uint64_t)SZ_MAX);
/* Reset all mark stacks and lists. */
for(i = 0; i < (int) SIZEOF_ARRAY(ctx.opener_stacks); i++)
diff --git a/test/pathological-tests.py b/test/pathological-tests.py
index 924cbe9..66b85ad 100644
--- a/test/pathological-tests.py
+++ b/test/pathological-tests.py
@@ -102,7 +102,10 @@ pathological = {
"--ftables"),
"many broken links":
(("]([\n" * 50000),
- re.compile("<p>(\]\(\[\r?\n){49999}\]\(\[</p>"))
+ re.compile("<p>(\]\(\[\r?\n){49999}\]\(\[</p>")),
+ "many link ref. def. instantiations":
+ (("[x]: " + "x" * 50000 + "\n[x]" * 50000),
+ re.compile(""))
}
whitespace_re = re.compile('/s+/')