Commit 43bd28445b639afbd91e0155ffb5b5072c88e181

Martin Mitas 2016-10-04T03:26:56

Implemented indented code blocks.

diff --git a/README.md b/README.md
index 8558700..63c3920 100644
--- a/README.md
+++ b/README.md
@@ -84,7 +84,7 @@ more or less forms our to do list.
   - [x] 4.1 Thematic breaks
   - [x] 4.2 ATX headings
   - [x] 4.3 Setext headings
-  - [ ] 4.4 Indented code blocks
+  - [x] 4.4 Indented code blocks
   - [ ] 4.5 Fenced code blocks
   - [ ] 4.6 HTML blocks
   - [ ] 4.7 Link reference definitions
@@ -141,7 +141,7 @@ consideration.
 - **Miscellaneous:**
   - [x] Permissive ATX headers: `###Header` (without space)
   - [ ] Permissive autolinks: `http://google.com` (without `<`...`>`)
-  - [ ] Disabling indented code blocks
+  - [x] Disabling indented code blocks
   - [ ] Disabling raw HTML blocks/spans
 
 
diff --git a/md2html/md2html.c b/md2html/md2html.c
index 1779306..74e3387 100644
--- a/md2html/md2html.c
+++ b/md2html/md2html.c
@@ -135,6 +135,7 @@ enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
         case MD_BLOCK_DOC:      /* noop */ break;
         case MD_BLOCK_HR:       MEMBUF_APPEND_LITERAL(out, "<hr>\n"); break;
         case MD_BLOCK_H:        MEMBUF_APPEND_LITERAL(out, head[((MD_BLOCK_H_DETAIL*)detail)->level - 1]); break;
+        case MD_BLOCK_CODE:     MEMBUF_APPEND_LITERAL(out, "<pre><code>"); break;
         case MD_BLOCK_P:        MEMBUF_APPEND_LITERAL(out, "<p>"); break;
     }
 
@@ -151,6 +152,7 @@ leave_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
         case MD_BLOCK_DOC:      /*noop*/ break;
         case MD_BLOCK_HR:       /*noop*/ break;
         case MD_BLOCK_H:        MEMBUF_APPEND_LITERAL(out, head[((MD_BLOCK_H_DETAIL*)detail)->level - 1]); break;
+        case MD_BLOCK_CODE:     MEMBUF_APPEND_LITERAL(out, "</code></pre>\n"); break;
         case MD_BLOCK_P:        MEMBUF_APPEND_LITERAL(out, "</p>\n"); break;
     }
 
@@ -286,6 +288,7 @@ static const option cmdline_options[] = {
     { "stat",                       's', 's', OPTION_ARG_NONE },
     { "help",                       'h', 'h', OPTION_ARG_NONE },
     { "fpermissive-atx-headers",     0,  'A', OPTION_ARG_NONE },
+    { "fno-indented-code",           0,  'I', OPTION_ARG_NONE },
     { 0 }
 };
 
@@ -304,6 +307,7 @@ usage(void)
         "\n"
         "Markdown dialect options:\n"
         "      --fpermissive-atx-headers    allow ATX headers without delimiting space\n"
+        "      --fno-indented-code          disabled indented code blocks\n"
     );
 }
 
@@ -332,6 +336,7 @@ cmdline_callback(int opt, char const* value, void* data)
         case 'h':   usage(); exit(0); break;
 
         case 'A':   renderer_flags |= MD_FLAG_PERMISSIVEATXHEADERS; break;
+        case 'I':   renderer_flags |= MD_FLAG_NOINDENTEDCODE; break;
 
         default:
             fprintf(stderr, "Illegal option: %s\n", value);
diff --git a/md4c/md4c.c b/md4c/md4c.c
index 9eaa4a6..1a1173c 100644
--- a/md4c/md4c.c
+++ b/md4c/md4c.c
@@ -76,6 +76,9 @@ struct MD_CTX_tag {
     MD_RENDERER r;
     void* userdata;
 
+    /* Minimal indentation to call the block "indented code". */
+    unsigned code_indent_offset;
+
     /* For MD_BLOCK_HEADER. */
     unsigned header_level;
 };
@@ -87,6 +90,7 @@ enum MD_LINETYPE_tag {
     MD_LINE_ATXHEADER,
     MD_LINE_SETEXTHEADER,
     MD_LINE_SETEXTUNDERLINE,
+    MD_LINE_INDENTEDCODE,
     MD_LINE_TEXT
 };
 
@@ -95,6 +99,7 @@ struct MD_LINE_tag {
     MD_LINETYPE type;
     OFF beg;
     OFF end;
+    unsigned indent;        /* Indentation level. */
 };
 
 
@@ -259,6 +264,36 @@ abort:
     return ret;
 }
 
+static int
+md_process_verbatim_block(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
+{
+    static const CHAR indent_str[16] = _T("                ");
+    int i;
+    int ret = 0;
+
+    for(i = 0; i < n_lines; i++) {
+        const MD_LINE* line = &lines[i];
+        int indent = line->indent;
+
+        /* Output code indentation. */
+        while(indent > SIZEOF_ARRAY(indent_str)) {
+            MD_TEXT(MD_TEXT_CODEBLOCK, indent_str, SIZEOF_ARRAY(indent_str));
+            indent -= SIZEOF_ARRAY(indent_str);
+        }
+        if(indent > 0)
+            MD_TEXT(MD_TEXT_CODEBLOCK, indent_str, indent);
+
+        /* Output the code line itself. */
+        MD_TEXT(MD_TEXT_CODEBLOCK, STR(line->beg), line->end - line->beg);
+
+        /* Enforce end-of-line. */
+        MD_TEXT(MD_TEXT_CODEBLOCK, _T("\n"), 1);
+    }
+
+abort:
+    return ret;
+}
+
 
 /***************************************
  ***  Breaking Document into Blocks  ***
@@ -337,18 +372,51 @@ md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end, const MD_LINE* pivot_line, MD_
     OFF off = beg;
 
     line->type = MD_LINE_BLANK;
+    line->indent = 0;
 
     /* Eat indentation. */
     while(off < ctx->size  &&  ISBLANK(off)) {
+        if(CH(off) == _T('\t'))
+            line->indent = (line->indent + 4) & ~3;
+        else
+            line->indent++;
         off++;
     }
 
     line->beg = off;
 
-    /* Check whether we are blank line. Note we fall here even if we are beyond
-     * the document end. */
+    /* Check whether we are blank line.
+     * Note blank lines after indented code are treated as part of that block.
+     * If they are at the end of the block, it is discarded by caller.
+     */
     if(off >= ctx->size  ||  ISNEWLINE(off)) {
-        line->type = MD_LINE_BLANK;
+        line->indent = 0;
+        if(pivot_line->type == MD_LINE_INDENTEDCODE)
+            line->type = MD_LINE_INDENTEDCODE;
+        else
+            line->type = MD_LINE_BLANK;
+        goto done;
+    }
+
+    /* Check whether we are indented code line.
+     * Note indented code block cannot interrupt paragraph.
+     * Keep this is as the first check after the blank line: The checks below
+     * then do not need to verify that indentation < 4. */
+    if((pivot_line->type == MD_LINE_BLANK || pivot_line->type == MD_LINE_INDENTEDCODE)
+        && line->indent >= ctx->code_indent_offset) {
+        line->type = MD_LINE_INDENTEDCODE;
+        line->indent -= ctx->code_indent_offset;
+        goto done;
+    }
+
+    /* Check whether we are indented code line.
+     * Note indented code block cannot interrupt paragraph.
+     * Keep this is as the first check after the blank line: The checks below
+     * then do not need to verify that indentation < 4. */
+    if((pivot_line->type == MD_LINE_BLANK || pivot_line->type == MD_LINE_INDENTEDCODE)
+        && line->indent >= ctx->code_indent_offset) {
+        line->type = MD_LINE_INDENTEDCODE;
+        line->indent -= ctx->code_indent_offset;
         goto done;
     }
 
@@ -394,17 +462,18 @@ done:
             tmp--;
         while(tmp > line->beg && CH(tmp-1) == _T('#'))
             tmp--;
-        if(tmp == line->beg  ||  CH(tmp-1) == _T(' ')  ||  (ctx->r.flags & MD_FLAG_PERMISSIVEATXHEADERS)) {
-            while(tmp > line->beg && CH(tmp-1) == _T(' '))
-                tmp--;
+        if(tmp == line->beg || CH(tmp-1) == _T(' ') || (ctx->r.flags & MD_FLAG_PERMISSIVEATXHEADERS))
             line->end = tmp;
-        }
     }
 
+    /* Trim tailing spaces. */
+    while(line->end > line->beg && CH(line->end-1) == _T(' '))
+        line->end--;
+
     /* Eat also the new line. */
-    if(off < ctx->size  &&  CH(off) == _T('\r'))
+    if(off < ctx->size && CH(off) == _T('\r'))
         off++;
-    if(off < ctx->size  &&  CH(off) == _T('\n'))
+    if(off < ctx->size && CH(off) == _T('\n'))
         off++;
 
     *p_end = off;
@@ -441,6 +510,10 @@ md_process_block(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
             det.header.level = ctx->header_level;
             break;
 
+        case MD_LINE_INDENTEDCODE:
+            block_type = MD_BLOCK_CODE;
+            break;
+
         case MD_LINE_TEXT:
             block_type = MD_BLOCK_P;
             break;
@@ -459,6 +532,10 @@ md_process_block(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
             /* Noop. */
             break;
 
+        case MD_BLOCK_CODE:
+            ret = md_process_verbatim_block(ctx, lines, n_lines);
+            break;
+
         default:
             ret = md_process_normal_block(ctx, lines, n_lines);
             break;
@@ -585,6 +662,9 @@ md_parse(const MD_CHAR* text, MD_SIZE size, const MD_RENDERER* renderer, void* u
     memcpy(&ctx.r, renderer, sizeof(MD_RENDERER));
     ctx.userdata = userdata;
 
-    /* Doo all the hard work. */
+    /* Offset for indented code block. */
+    ctx.code_indent_offset = (ctx.r.flags & MD_FLAG_NOINDENTEDCODE) ? (OFF)(-1) : 4;
+
+    /* Do all the hard work. */
     return md_process_doc(&ctx);
 }
diff --git a/md4c/md4c.h b/md4c/md4c.h
index 56d8d47..80529de 100644
--- a/md4c/md4c.h
+++ b/md4c/md4c.h
@@ -68,6 +68,10 @@ enum MD_BLOCKTYPE_tag {
      * Detail: See structure MD_BLOCK_H_DETAIL. */
     MD_BLOCK_H,
 
+    /* <pre><code>...</code></pre>
+     * Note the text lines (spans) within blocks are terminated with '\n'. */
+    MD_BLOCK_CODE,
+
     /* <p>...</p> */
     MD_BLOCK_P
 };
@@ -84,7 +88,12 @@ enum MD_SPANTYPE_tag {
 typedef enum MD_TEXTTYPE_tag MD_TEXTTYPE;
 enum MD_TEXTTYPE_tag {
     /* Normal text. */
-    MD_TEXT_NORMAL = 0
+    MD_TEXT_NORMAL = 0,
+
+    /* Text in a code block (inside MD_BLOCK_CODE).
+     * Includes spaces for indentation and '\n' for new lines.
+     * MD_TEXT_BR and MD_TEXT_SOFTBR are not sent for this kind of text. */
+    MD_TEXT_CODEBLOCK
 };
 
 
@@ -101,6 +110,7 @@ struct MD_BLOCK_H_DETAIL_tag {
  * The following flags may allow some extensions or deviations from it.
  */
 #define MD_FLAG_PERMISSIVEATXHEADERS    0x0001  /* Do not require space in ATX headers ( ###header ) */
+#define MD_FLAG_NOINDENTEDCODE          0x0002  /* Recognize only fenced code blocks. */
 
 /* Caller-provided callbacks.
  *