Implement/improve some optimizations, both in the parser and HTML renderer.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109
diff --git a/md2html/render_html.c b/md2html/render_html.c
index 12bb1cb..b8046e0 100644
--- a/md2html/render_html.c
+++ b/md2html/render_html.c
@@ -50,6 +50,7 @@ struct MD_RENDER_HTML_tag {
void* userdata;
unsigned flags;
int image_nesting_level;
+ char escape_map[256];
};
@@ -79,12 +80,16 @@ render_html_escaped(MD_RENDER_HTML* r, const MD_CHAR* data, MD_SIZE size)
MD_OFFSET off = 0;
/* Some characters need to be escaped in normal HTML text. */
- #define HTML_NEED_ESCAPE(ch) \
- ((ch) == '&' || (ch) == '<' || (ch) == '>' || (ch) == '"')
+ #define HTML_NEED_ESCAPE(ch) (r->escape_map[(unsigned char)(ch)] != 0)
while(1) {
+ /* Optimization: Use some loop unrolling. */
+ while(off + 3 < size && !HTML_NEED_ESCAPE(data[off+0]) && !HTML_NEED_ESCAPE(data[off+1])
+ && !HTML_NEED_ESCAPE(data[off+2]) && !HTML_NEED_ESCAPE(data[off+3]))
+ off += 4;
while(off < size && !HTML_NEED_ESCAPE(data[off]))
off++;
+
if(off > beg)
render_text(r, data + beg, off - beg);
@@ -487,7 +492,7 @@ md_render_html(const MD_CHAR* input, MD_SIZE input_size,
void (*process_output)(const MD_CHAR*, MD_SIZE, void*),
void* userdata, unsigned parser_flags, unsigned renderer_flags)
{
- MD_RENDER_HTML render = { process_output, userdata, renderer_flags, 0 };
+ MD_RENDER_HTML render = { process_output, userdata, renderer_flags, 0, { 0 } };
MD_PARSER parser = {
0,
@@ -501,6 +506,11 @@ md_render_html(const MD_CHAR* input, MD_SIZE input_size,
NULL
};
+ render.escape_map[(unsigned char)'"'] = 1;
+ render.escape_map[(unsigned char)'&'] = 1;
+ render.escape_map[(unsigned char)'<'] = 1;
+ render.escape_map[(unsigned char)'>'] = 1;
+
return md_parse(input, input_size, &parser, (void*) &render);
}
diff --git a/md4c/md4c.c b/md4c/md4c.c
index e54e681..8222e0f 100644
--- a/md4c/md4c.c
+++ b/md4c/md4c.c
@@ -98,6 +98,9 @@ struct MD_CTX_tag {
MD_PARSER parser;
void* userdata;
+ /* When this is true, it allows some optimizations. */
+ int doc_ends_with_newline;
+
/* Helper temporary growing buffer. */
CHAR* buffer;
unsigned alloc_buffer;
@@ -2957,8 +2960,8 @@ md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode)
#define IS_MARK_CHAR(off) (ctx->mark_char_map[(unsigned char) CH(off)])
#endif
- /* Optimization: Fast path (with some loop unrolling). */
- while(off + 4 < line_end && !IS_MARK_CHAR(off+0) && !IS_MARK_CHAR(off+1)
+ /* Optimization: Use some loop unrolling. */
+ while(off + 3 < line_end && !IS_MARK_CHAR(off+0) && !IS_MARK_CHAR(off+1)
&& !IS_MARK_CHAR(off+2) && !IS_MARK_CHAR(off+3))
off += 4;
while(off < line_end && !IS_MARK_CHAR(off+0))
@@ -5832,8 +5835,21 @@ md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end,
}
/* Scan for end of the line. */
- while(off < ctx->size && !ISNEWLINE(off))
- off++;
+ if(ctx->doc_ends_with_newline && off < ctx->size) {
+ /* There is a good chance libc provides well optimized code for these. */
+#ifdef MD4C_USE_UTF16
+ off += (OFF) wcscspn(STR(off), _T("\r\n"));
+#else
+ off += (OFF) strcspn(STR(off), "\r\n");
+#endif
+ } else {
+ /* Optimization: Use some loop unrolling. */
+ while(off + 3 < ctx->size && !ISNEWLINE(off+0) && !ISNEWLINE(off+1)
+ && !ISNEWLINE(off+2) && !ISNEWLINE(off+3))
+ off += 4;
+ while(off < ctx->size && !ISNEWLINE(off))
+ off++;
+ }
/* Set end of the line. */
line->end = off;
@@ -6053,6 +6069,7 @@ md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* userd
ctx.userdata = userdata;
ctx.code_indent_offset = (ctx.parser.flags & MD_FLAG_NOINDENTEDCODEBLOCKS) ? (OFF)(-1) : 4;
md_build_mark_char_map(&ctx);
+ ctx.doc_ends_with_newline = (size > 0 && ISNEWLINE_(text[size-1]));
/* Reset all unresolved opener mark chains. */
for(i = 0; i < (int) SIZEOF_ARRAY(ctx.mark_chains); i++) {