Commit e6661f23dc3eb1373e1af314ea3d6057e4a78caa

Martin Mitáš 2020-01-10T19:27:10

Implement an underline extension. (#103) Closes #101.

diff --git a/CHANGELOG.md b/CHANGELOG.md
index afd674c..8aec50a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,12 @@
 
 ## Next Version (Work in Progress)
 
+New features:
+
+ * With `MD_FLAG_UNDERLINE`, spans enclosed in underscore (`_foo_`) are seen
+   as underline (`MD_SPAN_UNDERLINE`) rather then an ordinary emphasis or
+   strong emphasis.
+
 Changes:
 
  * Recognition of strike-through spans (with the flag `MD_FLAG_STRIKETHROUGH`)
diff --git a/README.md b/README.md
index 4c11514..881d04c 100644
--- a/README.md
+++ b/README.md
@@ -110,6 +110,9 @@ additional extensions:
   `[[target article|link label]]`) are supported. (Note that the HTML renderer
   outputs them in a custom tag `<x-wikilink>`.)
 
+* With the flag `MD_FLAG_UNDERLINE`, underscore (`_`) denotes an underline
+  instead of an ordinary emphasis or strong emphasis.
+
 Few features of CommonMark (those some people see as mis-features) may be
 disabled:
 
diff --git a/md2html/md2html.c b/md2html/md2html.c
index c199664..4afd305 100644
--- a/md2html/md2html.c
+++ b/md2html/md2html.c
@@ -211,6 +211,7 @@ static const option cmdline_options[] = {
     { "flatex-math",                 0,  'L', OPTION_ARG_NONE },
     { "fwiki-links",                 0,  'K', OPTION_ARG_NONE },
     { "ftasklists",                  0,  'X', OPTION_ARG_NONE },
+    { "funderline",                  0,  '_', OPTION_ARG_NONE },
     { 0 }
 };
 
@@ -261,6 +262,7 @@ usage(void)
         "      --flatex-math    Enable LaTeX style mathematics spans (e.g. $a+b=c$ becomes <equation>a+b=c</equation>)\n"
         "      --fwiki-links    Enable wiki links\n"
         "      --ftasklists     Enable task lists\n"
+        "      --funderline     Enable underline spans\n"
     );
 }
 
@@ -311,6 +313,7 @@ cmdline_callback(int opt, char const* value, void* data)
         case 'L':   parser_flags |= MD_FLAG_LATEXMATHSPANS; break;
         case 'K':   parser_flags |= MD_FLAG_WIKILINKS; break;
         case 'X':   parser_flags |= MD_FLAG_TASKLISTS; break;
+        case '_':   parser_flags |= MD_FLAG_UNDERLINE; break;
 
         default:
             fprintf(stderr, "Illegal option: %s\n", value);
diff --git a/md2html/render_html.c b/md2html/render_html.c
index 0caea41..a1f96b2 100644
--- a/md2html/render_html.c
+++ b/md2html/render_html.c
@@ -455,6 +455,7 @@ enter_span_callback(MD_SPANTYPE type, void* detail, void* userdata)
     switch(type) {
         case MD_SPAN_EM:                RENDER_VERBATIM(r, "<em>"); break;
         case MD_SPAN_STRONG:            RENDER_VERBATIM(r, "<strong>"); break;
+        case MD_SPAN_U:                 RENDER_VERBATIM(r, "<u>"); break;
         case MD_SPAN_A:                 render_open_a_span(r, (MD_SPAN_A_DETAIL*) detail); break;
         case MD_SPAN_IMG:               render_open_img_span(r, (MD_SPAN_IMG_DETAIL*) detail); break;
         case MD_SPAN_CODE:              RENDER_VERBATIM(r, "<code>"); break;
@@ -483,6 +484,7 @@ leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata)
     switch(type) {
         case MD_SPAN_EM:                RENDER_VERBATIM(r, "</em>"); break;
         case MD_SPAN_STRONG:            RENDER_VERBATIM(r, "</strong>"); break;
+        case MD_SPAN_U:                 RENDER_VERBATIM(r, "</u>"); break;
         case MD_SPAN_A:                 RENDER_VERBATIM(r, "</a>"); break;
         case MD_SPAN_IMG:               /*noop, handled above*/ break;
         case MD_SPAN_CODE:              RENDER_VERBATIM(r, "</code>"); break;
diff --git a/md4c/md4c.c b/md4c/md4c.c
index 934bc5f..1520545 100644
--- a/md4c/md4c.c
+++ b/md4c/md4c.c
@@ -4111,7 +4111,23 @@ md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
                     }
                     break;
 
-                case '_':
+                case '_':       /* Underline (or emphasis if we fall through). */
+                    if(ctx->parser.flags & MD_FLAG_UNDERLINE) {
+                        if(mark->flags & MD_MARK_OPENER) {
+                            while(off < mark->end) {
+                                MD_ENTER_SPAN(MD_SPAN_U, NULL);
+                                off++;
+                            }
+                        } else {
+                            while(off < mark->end) {
+                                MD_LEAVE_SPAN(MD_SPAN_U, NULL);
+                                off++;
+                            }
+                        }
+                        break;
+                    }
+                    /* Fall though. */
+
                 case '*':       /* Emphasis, strong emphasis. */
                     if(mark->flags & MD_MARK_OPENER) {
                         if((mark->end - off) % 2) {
diff --git a/md4c/md4c.h b/md4c/md4c.h
index 994467a..c2c4311 100644
--- a/md4c/md4c.h
+++ b/md4c/md4c.h
@@ -140,7 +140,11 @@ typedef enum MD_SPANTYPE {
     /* Wiki links
      * Note: Recognized only when MD_FLAG_WIKILINKS is enabled.
      */
-    MD_SPAN_WIKILINK
+    MD_SPAN_WIKILINK,
+
+    /* <u>...</u>
+     * Note: Recognized only when MD_FLAG_UNDERLINE is enabled. */
+    MD_SPAN_U
 } MD_SPANTYPE;
 
 /* Text is the actual textual contents of span. */
@@ -296,6 +300,7 @@ typedef struct MD_SPAN_WIKILINK {
 #define MD_FLAG_TASKLISTS                   0x0800  /* Enable task list extension. */
 #define MD_FLAG_LATEXMATHSPANS              0x1000  /* Enable $ and $$ containing LaTeX equations. */
 #define MD_FLAG_WIKILINKS                   0x2000  /* Enable wiki links extension. */
+#define MD_FLAG_UNDERLINE                   0x4000  /* Enable underline extension (and disables '_' for normal emphasis). */
 
 #define MD_FLAG_PERMISSIVEAUTOLINKS         (MD_FLAG_PERMISSIVEEMAILAUTOLINKS | MD_FLAG_PERMISSIVEURLAUTOLINKS | MD_FLAG_PERMISSIVEWWWAUTOLINKS)
 #define MD_FLAG_NOHTML                      (MD_FLAG_NOHTMLBLOCKS | MD_FLAG_NOHTMLSPANS)
diff --git a/scripts/run-tests.sh b/scripts/run-tests.sh
index 7e6fe73..c00b36a 100755
--- a/scripts/run-tests.sh
+++ b/scripts/run-tests.sh
@@ -67,5 +67,9 @@ echo "Wiki links extension:"
 $PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/wiki-links.txt" -p "$PROGRAM --fwiki-links --ftables"
 
 echo
+echo "Underline extension:"
+$PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/underline.txt" -p "$PROGRAM --funderline"
+
+echo
 echo "Pathological input:"
 $PYTHON "$TEST_DIR/pathological_tests.py" -p "$PROGRAM"
diff --git a/test/underline.txt b/test/underline.txt
new file mode 100644
index 0000000..35e80b6
--- /dev/null
+++ b/test/underline.txt
@@ -0,0 +1,39 @@
+
+# Underline
+
+With the flag `MD_FLAG_UNDERLINE`, MD4C sees underscore `_` rather as a mark
+denoting an underlined span rather then an ordinary emphasis (or a strong
+emphasis).
+
+```````````````````````````````` example
+_foo_
+.
+<p><u>foo</u></p>
+````````````````````````````````
+
+In sequences of multiple underscores, each single one translates into an
+underline span mark.
+
+```````````````````````````````` example
+___foo___
+.
+<p><u><u><u>foo</u></u></u></p>
+````````````````````````````````
+
+Intra-word underscores are not recognized as underline marks:
+
+```````````````````````````````` example
+foo_bar_baz
+.
+<p>foo_bar_baz</p>
+````````````````````````````````
+
+Also the parser follows the standard understanding when the underscore can
+or cannot open or close a span. Therefore there is no underline in the following
+example because no underline can be seen as a closing mark.
+
+```````````````````````````````` example
+_foo _bar
+.
+<p>_foo _bar</p>
+````````````````````````````````