Commit 232ceeac24980bbe05c22d23f68c7d907fb98260

Martin Mitas 2016-12-16T00:11:23

md_analyze_line: A list item can begin with at most one blank line (issue #6). Hacky. I very dislike it, but it brings us full CommonMark 0.27 compliance. Hallelujah.

diff --git a/README.md b/README.md
index dfb2475..f49a436 100644
--- a/README.md
+++ b/README.md
@@ -27,7 +27,7 @@ MD4C is C Markdown parser with the following features:
 
 * **Compliance:** Generally MD4C aims to be compliant to the latest version of
   [CommonMark specification](http://spec.commonmark.org/). Right now we are
-  very close to CommonMark 0.27.
+  fully compliant to CommonMark 0.27.
 
 * **Extensions:** MD4C supports some commonly requested and accepted extensions.
   See below.
diff --git a/md4c/md4c.c b/md4c/md4c.c
index dd5edee..bae422e 100644
--- a/md4c/md4c.c
+++ b/md4c/md4c.c
@@ -147,6 +147,7 @@ struct MD_CTX_tag {
     int alloc_containers;
 
     int last_line_has_list_loosening_effect;
+    int last_list_item_starts_with_two_blank_lines;
 
     /* Minimal indentation to call the block "indented code block". */
     unsigned code_indent_offset;
@@ -4942,10 +4943,47 @@ redo:
             ctx->last_line_has_list_loosening_effect = (n_parents > 0  &&
                     n_brothers + n_children == 0  &&
                     ctx->containers[n_parents-1].ch != _T('>'));
+
+#if 1
+            /* See https://github.com/mity/md4c/issues/6
+             *
+             * This ugly checking tests we are in (yet empty) list item but not
+             * its very first line (with the list item mark).
+             *
+             * If we are such blank line, then any following non-blank line
+             * which would be part of this list item actually ends the list
+             * because "a list item can begin with at most one blank line."
+             */
+            if(n_parents > 0  &&  ctx->containers[n_parents-1].ch != _T('>')  &&
+               n_brothers + n_children == 0  &&  ctx->current_block == NULL  &&
+               ctx->n_block_bytes > sizeof(MD_BLOCK))
+            {
+                MD_BLOCK* top_block = (MD_BLOCK*) ((char*)ctx->block_bytes + ctx->n_block_bytes - sizeof(MD_BLOCK));
+                if(top_block->type == MD_BLOCK_LI)
+                    ctx->last_list_item_starts_with_two_blank_lines = TRUE;
+            }
+#endif
         }
         goto done;
     } else {
+#if 1
+        /* This is 2nd half of the hack. If the flag is set (that is there
+         * were 2nd blank line at the start of the list item) and we would also
+         * belonging to such list item, then interrupt the list. */
         ctx->last_line_has_list_loosening_effect = FALSE;
+        if(ctx->last_list_item_starts_with_two_blank_lines) {
+            if(n_parents > 0  &&  ctx->containers[n_parents-1].ch != _T('>')  &&
+               n_brothers + n_children == 0  &&  ctx->current_block == NULL  &&
+               ctx->n_block_bytes > sizeof(MD_BLOCK))
+            {
+                MD_BLOCK* top_block = (MD_BLOCK*) ((char*)ctx->block_bytes + ctx->n_block_bytes - sizeof(MD_BLOCK));
+                if(top_block->type == MD_BLOCK_LI)
+                    n_parents--;
+            }
+
+            ctx->last_list_item_starts_with_two_blank_lines = FALSE;
+        }
+#endif
     }
 
     /* Check whether we are Setext underline. */