Commit 25a156ee1b21c7cb8d6cc82029c181fd15bccee8

Martin Mitas 2017-07-12T23:30:14

Implement strikethrough extension.

diff --git a/README.md b/README.md
index f49a436..371bb54 100644
--- a/README.md
+++ b/README.md
@@ -99,6 +99,9 @@ some extensions or allowing some deviations from the specification.
  * With the flag `MD_FLAG_NOINDENTEDCODEBLOCKS`, indented code blocks are
    disabled.
 
+ * With the flag `MD_FLAG_STRIKETHROUGH`, strikethrough spans are enabled
+   (text enclosed in tilde marks, e.g. '~foo bar~').
+
 
 ## Input/Output Encoding
 
diff --git a/md2html/md2html.c b/md2html/md2html.c
index cc038e3..16f1beb 100644
--- a/md2html/md2html.c
+++ b/md2html/md2html.c
@@ -206,6 +206,7 @@ static const option cmdline_options[] = {
     { "fno-html",                    0,  'H', OPTION_ARG_NONE },
     { "fcollapse-whitespace",        0,  'W', OPTION_ARG_NONE },
     { "ftables",                     0,  'T', OPTION_ARG_NONE },
+    { "fstrikethrough",              0,  'S', OPTION_ARG_NONE },
     { 0 }
 };
 
@@ -249,6 +250,7 @@ usage(void)
         "                       Disable raw HTML spans\n"
         "      --fno-html       Same as --fno-html-blocks --fno-html-spans\n"
         "      --ftables        Enable tables\n"
+        "      --fstrikethrough Enable strikethrough spans\n"
     );
 }
 
@@ -294,6 +296,7 @@ cmdline_callback(int opt, char const* value, void* data)
         case '@':   parser_flags |= MD_FLAG_PERMISSIVEEMAILAUTOLINKS; break;
         case 'V':   parser_flags |= MD_FLAG_PERMISSIVEAUTOLINKS; break;
         case 'T':   parser_flags |= MD_FLAG_TABLES; break;
+        case 'S':   parser_flags |= MD_FLAG_STRIKETHROUGH; break;
 
         default:
             fprintf(stderr, "Illegal option: %s\n", value);
diff --git a/md2html/render_html.c b/md2html/render_html.c
index 00024e0..fccddee 100644
--- a/md2html/render_html.c
+++ b/md2html/render_html.c
@@ -410,6 +410,7 @@ enter_span_callback(MD_SPANTYPE type, void* detail, void* userdata)
         case MD_SPAN_A:         render_open_a_span(r, (MD_SPAN_A_DETAIL*) detail); break;
         case MD_SPAN_IMG:       render_open_img_span(r, (MD_SPAN_IMG_DETAIL*) detail); break;
         case MD_SPAN_CODE:      RENDER_LITERAL(r, "<code>"); break;
+        case MD_SPAN_DEL:       RENDER_LITERAL(r, "<del>"); break;
     }
 
     return 0;
@@ -434,6 +435,7 @@ leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata)
         case MD_SPAN_A:         RENDER_LITERAL(r, "</a>"); break;
         case MD_SPAN_IMG:       /*noop, handled above*/ break;
         case MD_SPAN_CODE:      RENDER_LITERAL(r, "</code>"); break;
+        case MD_SPAN_DEL:       RENDER_LITERAL(r, "</del>"); break;
     }
 
     return 0;
diff --git a/md4c/md4c.c b/md4c/md4c.c
index 24d9bee..744c00e 100644
--- a/md4c/md4c.c
+++ b/md4c/md4c.c
@@ -121,14 +121,15 @@ struct MD_CTX_tag {
 #endif
 
     /* For resolving of inline spans. */
-    MD_MARKCHAIN mark_chains[7];
+    MD_MARKCHAIN mark_chains[8];
 #define PTR_CHAIN               ctx->mark_chains[0]
 #define BACKTICK_OPENERS        ctx->mark_chains[1]
 #define LOWERTHEN_OPENERS       ctx->mark_chains[2]
 #define ASTERISK_OPENERS        ctx->mark_chains[3]
 #define UNDERSCORE_OPENERS      ctx->mark_chains[4]
-#define BRACKET_OPENERS         ctx->mark_chains[5]
-#define TABLECELLBOUNDARIES     ctx->mark_chains[6]
+#define TILDE_OPENERS           ctx->mark_chains[5]
+#define BRACKET_OPENERS         ctx->mark_chains[6]
+#define TABLECELLBOUNDARIES     ctx->mark_chains[7]
 
     int n_table_cell_boundaries;
 
@@ -2131,6 +2132,8 @@ md_free_link_ref_defs(MD_CTX* ctx)
  * '\\': Maybe escape sequence.
  * '\0': NULL char.
  *  '*': Maybe (strong) emphasis start/end.
+ *  '_': Maybe (strong) emphasis start/end.
+ *  '~': Maybe strikethrough start/end (needs MD_FLAG_STRIKETHROUGH).
  *  '`': Maybe code span start/end.
  *  '&': Maybe start of entity.
  *  ';': Maybe end of entity.
@@ -2348,6 +2351,7 @@ md_rollback(MD_CTX* ctx, int opener_index, int closer_index, int how)
                     case '_':   chain = &UNDERSCORE_OPENERS; break;
                     case '`':   chain = &BACKTICK_OPENERS; break;
                     case '<':   chain = &LOWERTHEN_OPENERS; break;
+                    case '~':   chain = &TILDE_OPENERS; break;
                     default:    MD_UNREACHABLE(); break;
                 }
                 md_mark_chain_append(ctx, chain, mark_opener_index);
@@ -2395,6 +2399,9 @@ md_build_mark_char_map(MD_CTX* ctx)
     ctx->mark_char_map[']'] = 1;
     ctx->mark_char_map['\0'] = 1;
 
+    if(ctx->r.flags & MD_FLAG_STRIKETHROUGH)
+        ctx->mark_char_map['~'] = 1;
+
     if(ctx->r.flags & MD_FLAG_PERMISSIVEURLAUTOLINKS)
         ctx->mark_char_map[':'] = 1;
 
@@ -2653,6 +2660,17 @@ md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode)
                 continue;
             }
 
+            /* A potential strikethrough start/end. */
+            if(ch == _T('~')) {
+                OFF tmp = off+1;
+
+                while(tmp < line_end  &&  CH(tmp) == _T('~'))
+                    tmp++;
+
+                PUSH_MARK(ch, off, tmp, MD_MARK_POTENTIAL_OPENER | MD_MARK_POTENTIAL_CLOSER);
+                off = tmp;
+            }
+
             /* Turn non-trivial whitespace into single space. */
             if(ISWHITESPACE_(ch)) {
                 OFF tmp = off+1;
@@ -3233,6 +3251,25 @@ md_analyze_underscore(MD_CTX* ctx, int mark_index)
 }
 
 static void
+md_analyze_tilde(MD_CTX* ctx, int mark_index)
+{
+    /* We attempt to be Github Flavored Markdown compatible here. GFM says
+     * that length of the tilde sequence is not important at all. Note that
+     * implies the TILDE_OPENERS chain can have at most one item. */
+
+    if(TILDE_OPENERS.head >= 0) {
+        /* The chain already contains an opener, so we may resolve the span. */
+        int opener_index = TILDE_OPENERS.head;
+
+        md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_CROSSING);
+        md_resolve_range(ctx, &TILDE_OPENERS, opener_index, mark_index);
+    } else {
+        /* We can only be opener. */
+        md_mark_chain_append(ctx, &TILDE_OPENERS, mark_index);
+    }
+}
+
+static void
 md_analyze_permissive_url_autolink(MD_CTX* ctx, int mark_index)
 {
     MD_MARK* opener = &ctx->marks[mark_index];
@@ -3386,6 +3423,7 @@ md_analyze_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF en
             case '|':   md_analyze_table_cell_boundary(ctx, i); break;
             case '*':   md_analyze_asterisk(ctx, i); break;
             case '_':   md_analyze_underscore(ctx, i); break;
+            case '~':   md_analyze_tilde(ctx, i); break;
             case ':':   md_analyze_permissive_url_autolink(ctx, i); break;
             case '@':   md_analyze_permissive_email_autolink(ctx, i); break;
         }
@@ -3434,11 +3472,7 @@ md_analyze_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mod
         md_analyze_marks(ctx, lines, n_lines, beg, end, _T("|"));
     } else {
         /* (3b) Emphasis and strong emphasis; permissive autolinks. */
-        md_analyze_marks(ctx, lines, n_lines, beg, end, _T("*_@:"));
-        ASTERISK_OPENERS.head = -1;
-        ASTERISK_OPENERS.tail = -1;
-        UNDERSCORE_OPENERS.head = -1;
-        UNDERSCORE_OPENERS.tail = -1;
+        md_analyze_link_contents(ctx, lines, n_lines, beg, end);
     }
 
 abort:
@@ -3448,11 +3482,13 @@ abort:
 static void
 md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF end)
 {
-    md_analyze_marks(ctx, lines, n_lines, beg, end, _T("*_@:"));
+    md_analyze_marks(ctx, lines, n_lines, beg, end, _T("*_~@:"));
     ASTERISK_OPENERS.head = -1;
     ASTERISK_OPENERS.tail = -1;
     UNDERSCORE_OPENERS.head = -1;
     UNDERSCORE_OPENERS.tail = -1;
+    TILDE_OPENERS.head = -1;
+    TILDE_OPENERS.tail = -1;
 }
 
 static int
@@ -3558,6 +3594,13 @@ md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
                     }
                     break;
 
+                case '~':
+                    if(mark->flags & MD_MARK_OPENER)
+                        MD_ENTER_SPAN(MD_SPAN_DEL, NULL);
+                    else
+                        MD_LEAVE_SPAN(MD_SPAN_DEL, NULL);
+                    break;
+
                 case '[':       /* Link, image. */
                 case '!':
                 case ']':
diff --git a/md4c/md4c.h b/md4c/md4c.h
index a3b00fc..a58cc74 100644
--- a/md4c/md4c.h
+++ b/md4c/md4c.h
@@ -126,7 +126,12 @@ typedef enum MD_SPANTYPE {
     MD_SPAN_IMG,
 
     /* <code>...</code> */
-    MD_SPAN_CODE
+    MD_SPAN_CODE,
+
+    /* <del>...</del>
+     * Note: Recognized only when MD_FLAG_STRIKETHROUGH is enabled.
+     */
+    MD_SPAN_DEL
 } MD_SPANTYPE;
 
 /* Text is the actual textual contents of span. */
@@ -263,6 +268,7 @@ typedef struct MD_SPAN_IMG_DETAIL {
 #define MD_FLAG_NOHTMLSPANS                 0x0040  /* Disable raw HTML (inline). */
 #define MD_FLAG_NOHTML                      (MD_FLAG_NOHTMLBLOCKS | MD_FLAG_NOHTMLSPANS)
 #define MD_FLAG_TABLES                      0x0100  /* Enable tables extension. */
+#define MD_FLAG_STRIKETHROUGH               0x0200  /* Enable strikethrough extension. */
 
 /* Convenient sets of flags corresponding to well-known Markdown dialects.
  * Note we may only support subset of features of the referred dialect.
@@ -270,7 +276,7 @@ typedef struct MD_SPAN_IMG_DETAIL {
  * possible given what features we implement.
  */
 #define MD_DIALECT_COMMONMARK               0
-#define MD_DIALECT_GITHUB                   (MD_FLAG_PERMISSIVEAUTOLINKS | MD_FLAG_TABLES)
+#define MD_DIALECT_GITHUB                   (MD_FLAG_PERMISSIVEAUTOLINKS | MD_FLAG_TABLES | MD_FLAG_STRIKETHROUGH)
 
 /* Renderer structure.
  */
diff --git a/scripts/run-tests.sh b/scripts/run-tests.sh
index d333c7a..8dd42be 100755
--- a/scripts/run-tests.sh
+++ b/scripts/run-tests.sh
@@ -35,3 +35,4 @@ $PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/coverage.txt" -p "$PROGRAM"
 $PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/permissive-email-autolinks.txt" -p "$PROGRAM --fpermissive-email-autolinks"
 $PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/permissive-url-autolinks.txt" -p "$PROGRAM --fpermissive-url-autolinks"
 $PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/tables.txt" -p "$PROGRAM --ftables"
+$PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/strikethrough.txt" -p "$PROGRAM --fstrikethrough"
diff --git a/test/strikethrough.txt b/test/strikethrough.txt
new file mode 100644
index 0000000..ffb0d59
--- /dev/null
+++ b/test/strikethrough.txt
@@ -0,0 +1,34 @@
+
+# Strikethrough
+
+With the flag `MD_FLAG_STRIKETHROUGH`, MD4C enables extension for recognition
+of strike-through spans.
+
+Strikethrough text is any text wrapped in tildes (~).
+
+```````````````````````````````` example
+~Hi~ Hello, world!
+.
+<p><del>Hi</del> Hello, world!</p>
+````````````````````````````````
+
+Any number of tildes may be used on either side of the text; they do not need
+to match, and they cannot be nested.
+
+```````````````````````````````` example
+This ~text~~~~ is ~~~~curious~.
+.
+<p>This <del>text</del> is <del>curious</del>.</p>
+````````````````````````````````
+
+As with regular emphasis delimiters, a new paragraph will cause the cessation
+of parsing a strikethrough:
+
+```````````````````````````````` example
+This ~~has a
+
+new paragraph~~.
+.
+<p>This ~~has a</p>
+<p>new paragraph~~.</p>
+````````````````````````````````
\ No newline at end of file