Rework/improve recognition of strike-through spans. Closes #102.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187
diff --git a/CHANGELOG.md b/CHANGELOG.md
index a83c44b..afd674c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,18 @@
Changes:
+ * Recognition of strike-through spans (with the flag `MD_FLAG_STRIKETHROUGH`)
+ has become much stricter and, arguably, reasonable.
+
+ - Only single tildes (`~`) and double tildes (`~~`) are recognized as
+ strike-through marks. Longer ones are not anymore.
+ - The length of the opener and closer marks have to be the same.
+ - The tildes cannot open a strike-through span if a whitespace follows.
+ - The tildes cannot close a strike-through span if a whitespace precedes.
+
+ This change follows the changes of behavior in cmark-gfm some time ago, so
+ it is also beneficial from compatibility point of view.
+
* When building MD4C by hand instead of using its CMake-based build, the UTF-8
support was by default disabled, unless explicitly asked for by defining
a preprocessor macro `MD4C_USE_UTF8`.
diff --git a/md4c/md4c.c b/md4c/md4c.c
index dc76fb8..c293424 100644
--- a/md4c/md4c.c
+++ b/md4c/md4c.c
@@ -133,7 +133,7 @@ struct MD_CTX_tag {
#endif
/* For resolving of inline spans. */
- MD_MARKCHAIN mark_chains[12];
+ MD_MARKCHAIN mark_chains[13];
#define PTR_CHAIN ctx->mark_chains[0]
#define TABLECELLBOUNDARIES ctx->mark_chains[1]
#define ASTERISK_OPENERS_extraword_mod3_0 ctx->mark_chains[2]
@@ -143,11 +143,12 @@ struct MD_CTX_tag {
#define ASTERISK_OPENERS_intraword_mod3_1 ctx->mark_chains[6]
#define ASTERISK_OPENERS_intraword_mod3_2 ctx->mark_chains[7]
#define UNDERSCORE_OPENERS ctx->mark_chains[8]
-#define TILDE_OPENERS ctx->mark_chains[9]
-#define BRACKET_OPENERS ctx->mark_chains[10]
-#define DOLLAR_OPENERS ctx->mark_chains[11]
+#define TILDE_OPENERS_1 ctx->mark_chains[9]
+#define TILDE_OPENERS_2 ctx->mark_chains[10]
+#define BRACKET_OPENERS ctx->mark_chains[11]
+#define DOLLAR_OPENERS ctx->mark_chains[12]
#define OPENERS_CHAIN_FIRST 2
-#define OPENERS_CHAIN_LAST 11
+#define OPENERS_CHAIN_LAST 12
int n_table_cell_boundaries;
@@ -2474,7 +2475,7 @@ md_mark_chain(MD_CTX* ctx, int mark_index)
switch(mark->ch) {
case _T('*'): return md_asterisk_chain(ctx, mark->flags);
case _T('_'): return &UNDERSCORE_OPENERS;
- case _T('~'): return &TILDE_OPENERS;
+ case _T('~'): return (mark->end - mark->beg == 1) ? &TILDE_OPENERS_1 : &TILDE_OPENERS_2;
case _T('['): return &BRACKET_OPENERS;
case _T('|'): return &TABLECELLBOUNDARIES;
default: return NULL;
@@ -3254,7 +3255,17 @@ md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode)
while(tmp < line_end && CH(tmp) == _T('~'))
tmp++;
- PUSH_MARK(ch, off, tmp, MD_MARK_POTENTIAL_OPENER | MD_MARK_POTENTIAL_CLOSER);
+ if(tmp - off < 3) {
+ unsigned flags = 0;
+
+ if(tmp < line_end && !ISUNICODEWHITESPACE(tmp))
+ flags |= MD_MARK_POTENTIAL_OPENER;
+ if(off > line->beg && !ISUNICODEWHITESPACEBEFORE(off))
+ flags |= MD_MARK_POTENTIAL_CLOSER;
+ if(flags != 0)
+ PUSH_MARK(ch, off, tmp, flags);
+ }
+
off = tmp;
continue;
}
@@ -3724,20 +3735,23 @@ md_analyze_emph(MD_CTX* ctx, int mark_index)
static void
md_analyze_tilde(MD_CTX* ctx, int mark_index)
{
- /* We attempt to be Github Flavored Markdown compatible here. GFM says
- * that length of the tilde sequence is not important at all. Note that
- * implies the TILDE_OPENERS chain can have at most one item. */
+ MD_MARK* mark = &ctx->marks[mark_index];
+ MD_MARKCHAIN* chain = md_mark_chain(ctx, mark_index);
+
+ /* We attempt to be Github Flavored Markdown compatible here. GFM accepts
+ * only tildes sequences of length 1 and 2, and the length of the opener
+ * and closer has to match. */
- if(TILDE_OPENERS.head >= 0) {
- /* The chain already contains an opener, so we may resolve the span. */
- int opener_index = TILDE_OPENERS.head;
+ if((mark->flags & MD_MARK_POTENTIAL_CLOSER) && chain->head >= 0) {
+ int opener_index = chain->head;
md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_CROSSING);
- md_resolve_range(ctx, &TILDE_OPENERS, opener_index, mark_index);
- } else {
- /* We can only be opener. */
- md_mark_chain_append(ctx, &TILDE_OPENERS, mark_index);
+ md_resolve_range(ctx, chain, opener_index, mark_index);
+ return;
}
+
+ if(mark->flags & MD_MARK_POTENTIAL_OPENER)
+ md_mark_chain_append(ctx, chain, mark_index);
}
static void
@@ -3997,8 +4011,10 @@ md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
ASTERISK_OPENERS_intraword_mod3_2.tail = -1;
UNDERSCORE_OPENERS.head = -1;
UNDERSCORE_OPENERS.tail = -1;
- TILDE_OPENERS.head = -1;
- TILDE_OPENERS.tail = -1;
+ TILDE_OPENERS_1.head = -1;
+ TILDE_OPENERS_1.tail = -1;
+ TILDE_OPENERS_2.head = -1;
+ TILDE_OPENERS_2.tail = -1;
DOLLAR_OPENERS.head = -1;
DOLLAR_OPENERS.tail = -1;
}
diff --git a/test/strikethrough.txt b/test/strikethrough.txt
index 0b21b7e..884ce59 100644
--- a/test/strikethrough.txt
+++ b/test/strikethrough.txt
@@ -1,10 +1,10 @@
-# Strikethrough
+# Strike-Through
With the flag `MD_FLAG_STRIKETHROUGH`, MD4C enables extension for recognition
of strike-through spans.
-Strikethrough text is any text wrapped in tildes (~).
+Strike-through text is any text wrapped in one or two tildes (`~`).
```````````````````````````````` example
~Hi~ Hello, world!
@@ -12,17 +12,36 @@ Strikethrough text is any text wrapped in tildes (~).
<p><del>Hi</del> Hello, world!</p>
````````````````````````````````
-Any number of tildes may be used on either side of the text; they do not need
-to match, and they cannot be nested.
+If the length of the opener and closer doesn't match, the strike-through is
+not recognized.
```````````````````````````````` example
-This ~text~~~~ is ~~~~curious~.
+This ~text~~ is curious.
.
-<p>This <del>text</del> is <del>curious</del>.</p>
+<p>This ~text~~ is curious.</p>
````````````````````````````````
+Too long tilde sequence won't be recognized:
+
+```````````````````````````````` example
+foo ~~~bar~~~
+.
+<p>foo ~~~bar~~~</p>
+````````````````````````````````
+
+Also note the markers cannot open a strike-through span if they are followed
+with a whitespace; and similarly, then cannot close the span if they are
+preceded with a whitespace:
+
+```````````````````````````````` example
+~foo ~bar
+.
+<p>~foo ~bar</p>
+````````````````````````````````
+
+
As with regular emphasis delimiters, a new paragraph will cause the cessation
-of parsing a strikethrough:
+of parsing a strike-through:
```````````````````````````````` example
This ~~has a