Commit 0225f337b416aee0c295e02a0431bd86c5dbcc63

Martin Mitas 2016-10-04T00:55:32

Implement ATX headers.

diff --git a/README.md b/README.md
index 559f8f4..ad74f0f 100644
--- a/README.md
+++ b/README.md
@@ -82,7 +82,7 @@ more or less forms our to do list.
 
 - **Leaf Blocks:**
   - [x] 4.1 Thematic breaks
-  - [ ] 4.2 ATX headings
+  - [x] 4.2 ATX headings
   - [ ] 4.3 Setext headings
   - [ ] 4.4 Indented code blocks
   - [ ] 4.5 Fenced code blocks
diff --git a/md2html/md2html.c b/md2html/md2html.c
index c5718dd..481f3e8 100644
--- a/md2html/md2html.c
+++ b/md2html/md2html.c
@@ -128,11 +128,13 @@ membuf_append_escaped(struct membuffer* buf, const char* data, MD_SIZE size)
 static int
 enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
 {
+    static const char* head[6] = { "<h1>", "<h2>", "<h3>", "<h4>", "<h5>", "<h6>" };
     struct membuffer* out = (struct membuffer*) userdata;
 
     switch(type) {
     case MD_BLOCK_DOC:      /* noop */ break;
     case MD_BLOCK_HR:       MEMBUF_APPEND_LITERAL(out, "<hr>\n"); break;
+    case MD_BLOCK_H:        MEMBUF_APPEND_LITERAL(out, head[((MD_BLOCK_H_DETAIL*)detail)->level - 1]); break;
     case MD_BLOCK_P:        MEMBUF_APPEND_LITERAL(out, "<p>"); break;
     }
 
@@ -142,11 +144,13 @@ enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
 static int
 leave_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
 {
+    static const char* head[6] = { "</h1>\n", "</h2>\n", "</h3>\n", "</h4>\n", "</h5>\n", "</h6>\n" };
     struct membuffer* out = (struct membuffer*) userdata;
 
     switch(type) {
     case MD_BLOCK_DOC:      /*noop*/ break;
     case MD_BLOCK_HR:       /*noop*/ break;
+    case MD_BLOCK_H:        MEMBUF_APPEND_LITERAL(out, head[((MD_BLOCK_H_DETAIL*)detail)->level - 1]); break;
     case MD_BLOCK_P:        MEMBUF_APPEND_LITERAL(out, "</p>\n"); break;
     }
 
@@ -281,6 +285,7 @@ static const option cmdline_options[] = {
     { "full-html",                  'f', 'f', OPTION_ARG_NONE },
     { "stat",                       's', 's', OPTION_ARG_NONE },
     { "help",                       'h', 'h', OPTION_ARG_NONE },
+    { "fpermissive-atx-headers",     0,  'A', OPTION_ARG_NONE },
     { 0 }
 };
 
@@ -296,11 +301,15 @@ usage(void)
         "  -f, --full-html          generate full HTML document, including header\n"
         "  -s, --stat               measure time of input parsing\n"
         "  -h, --help               display this help and exit\n"
+        "\n"
+        "Markdown dialect options:\n"
+        "      --fpermissive-atx-headers    allow ATX headers without delimiting space\n"
     );
 }
 
 static const char* input_path = NULL;
 static const char* output_path = NULL;
+static unsigned renderer_flags = 0;
 static int want_fullhtml = 0;
 static int want_stat = 0;
 
@@ -322,6 +331,8 @@ cmdline_callback(int opt, char const* value, void* data)
     case 's':   want_stat = 1; break;
     case 'h':   usage(); exit(0); break;
 
+    case 'A':   renderer_flags |= MD_FLAG_PERMISSIVEATXHEADERS; break;
+
     default:
         fprintf(stderr, "Illegal option: %s\n", value);
         fprintf(stderr, "Use --help for more info.\n");
@@ -359,7 +370,7 @@ main(int argc, char** argv)
         }
     }
 
-    ret = process_file(in, out, 0, want_fullhtml, want_stat);
+    ret = process_file(in, out, renderer_flags, want_fullhtml, want_stat);
     if(in != stdin)
         fclose(in);
     if(out != stdout)
diff --git a/md4c/md4c.c b/md4c/md4c.c
index 3136b53..3a33fc9 100644
--- a/md4c/md4c.c
+++ b/md4c/md4c.c
@@ -75,12 +75,16 @@ struct MD_CTX_tag {
     SZ size;
     MD_RENDERER r;
     void* userdata;
+
+    /* For MD_BLOCK_HEADER. */
+    unsigned header_level;
 };
 
 typedef enum MD_LINETYPE_tag MD_LINETYPE;
 enum MD_LINETYPE_tag {
     MD_LINE_BLANK,
     MD_LINE_HR,
+    MD_LINE_ATXHEADER,
     MD_LINE_TEXT
 };
 
@@ -277,6 +281,29 @@ md_is_hr_line(MD_CTX* ctx, OFF beg, OFF* p_end)
     return 0;
 }
 
+static int
+md_is_atxheader_line(MD_CTX* ctx, OFF beg, OFF* p_beg, OFF* p_end)
+{
+    int n;
+    OFF off = beg + 1;
+
+    while(off < ctx->size  &&  CH(off) == _T('#')  &&  off - beg < 7)
+        off++;
+    n = off - beg;
+
+    if(n > 6)
+        return -1;
+    ctx->header_level = n;
+
+    if(!(ctx->r.flags & MD_FLAG_PERMISSIVEATXHEADERS)  &&  off < ctx->size  &&  CH(off) != _T(' '))
+        return -1;
+
+    while(off < ctx->size  &&  CH(off) == _T(' '))
+        off++;
+    *p_beg = off;
+    return 0;
+}
+
 /* Analyze type of the line and find some its properties. This serves as a
  * main input for determining type and boundaries of a block. */
 static void
@@ -300,6 +327,14 @@ md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end, const MD_LINE* pivot_line, MD_
         goto done;
     }
 
+    /* Check whether we are ATX header. */
+    if(CH(off) == _T('#')) {
+        if(md_is_atxheader_line(ctx, off, &line->beg, &off) == 0) {
+            line->type = MD_LINE_ATXHEADER;
+            goto done;
+        }
+    }
+
     /* Check whether we are thematic break line. */
     if(ISANYOF(off, _T("-_*"))) {
         if(md_is_hr_line(ctx, off, &off) == 0) {
@@ -319,6 +354,19 @@ done:
     /* Set end of the line. */
     line->end = off;
 
+    /* But for ATX header, we should not include the optional tailing mark. */
+    if(line->type == MD_LINE_ATXHEADER) {
+        OFF tmp = line->end;
+        while(tmp > line->beg  &&  CH(tmp-1) == _T(' '))
+            tmp--;
+        while(tmp > line->beg  &&  CH(tmp-1) == _T('#'))
+            tmp--;
+        while(tmp > line->beg  &&  CH(tmp-1) == _T(' '))
+            tmp--;
+        if(CH(tmp) == _T(' ') || (ctx->r.flags & MD_FLAG_PERMISSIVEATXHEADERS))
+            line->end = tmp;
+    }
+
     /* Eat also the new line. */
     if(off < ctx->size  &&  CH(off) == _T('\r'))
         off++;
@@ -336,6 +384,9 @@ static int
 md_process_block(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
 {
     MD_BLOCKTYPE block_type;
+    union {
+        MD_BLOCK_H_DETAIL header;
+    } det;
     int ret = 0;
 
     if(n_lines == 0)
@@ -345,18 +396,24 @@ md_process_block(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
     switch(lines[0].type) {
     case MD_LINE_BLANK:     return 0;
     case MD_LINE_HR:        block_type = MD_BLOCK_HR; break;
+
+    case MD_LINE_ATXHEADER:
+        block_type = MD_BLOCK_H;
+        det.header.level = ctx->header_level;
+        break;
+
     case MD_LINE_TEXT:      block_type = MD_BLOCK_P; break;
     }
 
     /* Process the block accordingly to is type. */
-    MD_ENTER_BLOCK(block_type, NULL);
+    MD_ENTER_BLOCK(block_type, (void*) &det);
     switch(block_type) {
     case MD_BLOCK_HR:   /* Noop. */ break;
     default:            ret = md_process_normal_block(ctx, lines, n_lines); break;
     }
     if(ret != 0)
         goto abort;
-    MD_LEAVE_BLOCK(block_type, NULL);
+    MD_LEAVE_BLOCK(block_type, (void*) &det);
 
 abort:
     return ret;
diff --git a/md4c/md4c.h b/md4c/md4c.h
index 3065c28..56d8d47 100644
--- a/md4c/md4c.h
+++ b/md4c/md4c.h
@@ -64,11 +64,14 @@ enum MD_BLOCKTYPE_tag {
     /* <hr> */
     MD_BLOCK_HR,
 
+    /* <h1>...</h1> (for levels up to 6)
+     * Detail: See structure MD_BLOCK_H_DETAIL. */
+    MD_BLOCK_H,
+
     /* <p>...</p> */
     MD_BLOCK_P
 };
 
-
 /* Span represents an in-line piece of a document which should be rendered with
  * the same font, color and other attributes. A sequence of spans forms a block
  * like paragraph or list item. */
@@ -77,7 +80,6 @@ enum MD_SPANTYPE_tag {
     MD_SPAN_DUMMY = 0       /* not yet used... */
 };
 
-
 /* Text is the actual textual contents of span. */
 typedef enum MD_TEXTTYPE_tag MD_TEXTTYPE;
 enum MD_TEXTTYPE_tag {
@@ -86,6 +88,20 @@ enum MD_TEXTTYPE_tag {
 };
 
 
+/* Detailed info for MD_BLOCK_H. */
+typedef struct MD_BLOCK_H_DETAIL_tag MD_BLOCK_H_DETAIL;
+struct MD_BLOCK_H_DETAIL_tag {
+    unsigned level;         /* Header level (1 - 6) */
+};
+
+
+/* Flags specifying Markdown dialect.
+ *
+ * By default (when MD_RENDERER::flags == 0), we follow CommMark specification.
+ * The following flags may allow some extensions or deviations from it.
+ */
+#define MD_FLAG_PERMISSIVEATXHEADERS    0x0001  /* Do not require space in ATX headers ( ###header ) */
+
 /* Caller-provided callbacks.
  *
  * For some block/span types, more detailed information is provided in a