Commit 56f8bbb19bf524116a62b701d9e4f3f3cf0fde6f

Martin Mitas 2019-05-19T15:12:32

Implement/improve some optimizations, both in the parser and HTML renderer.

diff --git a/md2html/render_html.c b/md2html/render_html.c
index 12bb1cb..b8046e0 100644
--- a/md2html/render_html.c
+++ b/md2html/render_html.c
@@ -50,6 +50,7 @@ struct MD_RENDER_HTML_tag {
     void* userdata;
     unsigned flags;
     int image_nesting_level;
+    char escape_map[256];
 };
 
 
@@ -79,12 +80,16 @@ render_html_escaped(MD_RENDER_HTML* r, const MD_CHAR* data, MD_SIZE size)
     MD_OFFSET off = 0;
 
     /* Some characters need to be escaped in normal HTML text. */
-    #define HTML_NEED_ESCAPE(ch)                                            \
-            ((ch) == '&' || (ch) == '<' || (ch) == '>' || (ch) == '"')
+    #define HTML_NEED_ESCAPE(ch)        (r->escape_map[(unsigned char)(ch)] != 0)
 
     while(1) {
+        /* Optimization: Use some loop unrolling. */
+        while(off + 3 < size  &&  !HTML_NEED_ESCAPE(data[off+0])  &&  !HTML_NEED_ESCAPE(data[off+1])
+                              &&  !HTML_NEED_ESCAPE(data[off+2])  &&  !HTML_NEED_ESCAPE(data[off+3]))
+            off += 4;
         while(off < size  &&  !HTML_NEED_ESCAPE(data[off]))
             off++;
+
         if(off > beg)
             render_text(r, data + beg, off - beg);
 
@@ -487,7 +492,7 @@ md_render_html(const MD_CHAR* input, MD_SIZE input_size,
                void (*process_output)(const MD_CHAR*, MD_SIZE, void*),
                void* userdata, unsigned parser_flags, unsigned renderer_flags)
 {
-    MD_RENDER_HTML render = { process_output, userdata, renderer_flags, 0 };
+    MD_RENDER_HTML render = { process_output, userdata, renderer_flags, 0, { 0 } };
 
     MD_PARSER parser = {
         0,
@@ -501,6 +506,11 @@ md_render_html(const MD_CHAR* input, MD_SIZE input_size,
         NULL
     };
 
+    render.escape_map[(unsigned char)'"'] = 1;
+    render.escape_map[(unsigned char)'&'] = 1;
+    render.escape_map[(unsigned char)'<'] = 1;
+    render.escape_map[(unsigned char)'>'] = 1;
+
     return md_parse(input, input_size, &parser, (void*) &render);
 }
 
diff --git a/md4c/md4c.c b/md4c/md4c.c
index e54e681..8222e0f 100644
--- a/md4c/md4c.c
+++ b/md4c/md4c.c
@@ -98,6 +98,9 @@ struct MD_CTX_tag {
     MD_PARSER parser;
     void* userdata;
 
+    /* When this is true, it allows some optimizations. */
+    int doc_ends_with_newline;
+
     /* Helper temporary growing buffer. */
     CHAR* buffer;
     unsigned alloc_buffer;
@@ -2957,8 +2960,8 @@ md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode)
     #define IS_MARK_CHAR(off)   (ctx->mark_char_map[(unsigned char) CH(off)])
 #endif
 
-            /* Optimization: Fast path (with some loop unrolling). */
-            while(off + 4 < line_end  &&  !IS_MARK_CHAR(off+0)  &&  !IS_MARK_CHAR(off+1)
+            /* Optimization: Use some loop unrolling. */
+            while(off + 3 < line_end  &&  !IS_MARK_CHAR(off+0)  &&  !IS_MARK_CHAR(off+1)
                                       &&  !IS_MARK_CHAR(off+2)  &&  !IS_MARK_CHAR(off+3))
                 off += 4;
             while(off < line_end  &&  !IS_MARK_CHAR(off+0))
@@ -5832,8 +5835,21 @@ md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end,
     }
 
     /* Scan for end of the line. */
-    while(off < ctx->size  &&  !ISNEWLINE(off))
-        off++;
+    if(ctx->doc_ends_with_newline  &&  off < ctx->size) {
+        /* There is a good chance libc provides well optimized code for these. */
+#ifdef MD4C_USE_UTF16
+        off += (OFF) wcscspn(STR(off), _T("\r\n"));
+#else
+        off += (OFF) strcspn(STR(off), "\r\n");
+#endif
+    } else {
+        /* Optimization: Use some loop unrolling. */
+        while(off + 3 < ctx->size  &&  !ISNEWLINE(off+0)  &&  !ISNEWLINE(off+1)
+                                   &&  !ISNEWLINE(off+2)  &&  !ISNEWLINE(off+3))
+            off += 4;
+        while(off < ctx->size  &&  !ISNEWLINE(off))
+            off++;
+    }
 
     /* Set end of the line. */
     line->end = off;
@@ -6053,6 +6069,7 @@ md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* userd
     ctx.userdata = userdata;
     ctx.code_indent_offset = (ctx.parser.flags & MD_FLAG_NOINDENTEDCODEBLOCKS) ? (OFF)(-1) : 4;
     md_build_mark_char_map(&ctx);
+    ctx.doc_ends_with_newline = (size > 0  &&  ISNEWLINE_(text[size-1]));
 
     /* Reset all unresolved opener mark chains. */
     for(i = 0; i < (int) SIZEOF_ARRAY(ctx.mark_chains); i++) {