Fix some emphasis parsing issues. * We incorrectly applied the infamous rule of three only to asterisk-encoded emphasis, it has to be applied to underscore as well. * We incorrectly applied the rule of three only if the opener and/or closer was inside a word. It has also to be applied if the mark is both preceded and followed by punctuation. Fixes #217.

diff --git a/src/md4c.c b/src/md4c.c
index e25d266..502d5a7 100644
--- a/src/md4c.c
+++ b/src/md4c.c
@@ -178,22 +178,27 @@ struct MD_CTX_tag {
#endif
/* For resolving of inline spans. */
- MD_MARKCHAIN mark_chains[13];
+ MD_MARKCHAIN mark_chains[18];
#define PTR_CHAIN (ctx->mark_chains[0])
#define TABLECELLBOUNDARIES (ctx->mark_chains[1])
-#define ASTERISK_OPENERS_extraword_mod3_0 (ctx->mark_chains[2])
-#define ASTERISK_OPENERS_extraword_mod3_1 (ctx->mark_chains[3])
-#define ASTERISK_OPENERS_extraword_mod3_2 (ctx->mark_chains[4])
-#define ASTERISK_OPENERS_intraword_mod3_0 (ctx->mark_chains[5])
-#define ASTERISK_OPENERS_intraword_mod3_1 (ctx->mark_chains[6])
-#define ASTERISK_OPENERS_intraword_mod3_2 (ctx->mark_chains[7])
-#define UNDERSCORE_OPENERS (ctx->mark_chains[8])
-#define TILDE_OPENERS_1 (ctx->mark_chains[9])
-#define TILDE_OPENERS_2 (ctx->mark_chains[10])
-#define BRACKET_OPENERS (ctx->mark_chains[11])
-#define DOLLAR_OPENERS (ctx->mark_chains[12])
+#define ASTERISK_OPENERS_oo_mod3_0 (ctx->mark_chains[2]) /* Opener-only */
+#define ASTERISK_OPENERS_oo_mod3_1 (ctx->mark_chains[3])
+#define ASTERISK_OPENERS_oo_mod3_2 (ctx->mark_chains[4])
+#define ASTERISK_OPENERS_oc_mod3_0 (ctx->mark_chains[5]) /* Both opener and closer candidate */
+#define ASTERISK_OPENERS_oc_mod3_1 (ctx->mark_chains[6])
+#define ASTERISK_OPENERS_oc_mod3_2 (ctx->mark_chains[7])
+#define UNDERSCORE_OPENERS_oo_mod3_0 (ctx->mark_chains[8]) /* Opener-only */
+#define UNDERSCORE_OPENERS_oo_mod3_1 (ctx->mark_chains[9])
+#define UNDERSCORE_OPENERS_oo_mod3_2 (ctx->mark_chains[10])
+#define UNDERSCORE_OPENERS_oc_mod3_0 (ctx->mark_chains[11]) /* Both opener and closer candidate */
+#define UNDERSCORE_OPENERS_oc_mod3_1 (ctx->mark_chains[12])
+#define UNDERSCORE_OPENERS_oc_mod3_2 (ctx->mark_chains[13])
+#define TILDE_OPENERS_1 (ctx->mark_chains[14])
+#define TILDE_OPENERS_2 (ctx->mark_chains[15])
+#define BRACKET_OPENERS (ctx->mark_chains[16])
+#define DOLLAR_OPENERS (ctx->mark_chains[17])
#define OPENERS_CHAIN_FIRST 2 /* [0] and [1] are special. */
-#define OPENERS_CHAIN_LAST 12
+#define OPENERS_CHAIN_LAST 17
int n_table_cell_boundaries;
@@ -2482,7 +2487,7 @@ struct MD_MARK_tag {
#define MD_MARK_RESOLVED 0x10 /* Resolved in any definite way. */
/* Mark flags specific for various mark types (so they can share bits). */
-#define MD_MARK_EMPH_INTRAWORD 0x20 /* Helper for the "rule of 3". */
+#define MD_MARK_EMPH_OC 0x20 /* Opener/closer mixed candidate. Helper for the "rule of 3". */
#define MD_MARK_EMPH_MOD3_0 0x40
#define MD_MARK_EMPH_MOD3_1 0x80
#define MD_MARK_EMPH_MOD3_2 (0x40 | 0x80)
@@ -2492,18 +2497,27 @@ struct MD_MARK_tag {
#define MD_MARK_HASNESTEDBRACKETS 0x20 /* For '[' to rule out invalid link labels early */
static MD_MARKCHAIN*
-md_asterisk_chain(MD_CTX* ctx, unsigned flags)
+md_emph_chain(MD_CTX* ctx, MD_CHAR ch, unsigned flags)
{
- switch(flags & (MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_MASK)) {
- case MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_0: return &ASTERISK_OPENERS_intraword_mod3_0;
- case MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_1: return &ASTERISK_OPENERS_intraword_mod3_1;
- case MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_2: return &ASTERISK_OPENERS_intraword_mod3_2;
- case MD_MARK_EMPH_MOD3_0: return &ASTERISK_OPENERS_extraword_mod3_0;
- case MD_MARK_EMPH_MOD3_1: return &ASTERISK_OPENERS_extraword_mod3_1;
- case MD_MARK_EMPH_MOD3_2: return &ASTERISK_OPENERS_extraword_mod3_2;
- default: MD_UNREACHABLE();
+ MD_MARKCHAIN* chain;
+
+ switch(ch) {
+ case '*': chain = &ASTERISK_OPENERS_oo_mod3_0; break;
+ case '_': chain = &UNDERSCORE_OPENERS_oo_mod3_0; break;
+ default: MD_UNREACHABLE();
}
- return NULL;
+
+ if(flags & MD_MARK_EMPH_OC)
+ chain += 3;
+
+ switch(flags & MD_MARK_EMPH_MOD3_MASK) {
+ case MD_MARK_EMPH_MOD3_0: chain += 0; break;
+ case MD_MARK_EMPH_MOD3_1: chain += 1; break;
+ case MD_MARK_EMPH_MOD3_2: chain += 2; break;
+ default: MD_UNREACHABLE();
+ }
+
+ return chain;
}
static MD_MARKCHAIN*
@@ -2512,11 +2526,14 @@ md_mark_chain(MD_CTX* ctx, int mark_index)
MD_MARK* mark = &ctx->marks[mark_index];
switch(mark->ch) {
- case _T('*'): return md_asterisk_chain(ctx, mark->flags);
- case _T('_'): return &UNDERSCORE_OPENERS;
+ case _T('*'):
+ case _T('_'): return md_emph_chain(ctx, mark->ch, mark->flags);
+
case _T('~'): return (mark->end - mark->beg == 1) ? &TILDE_OPENERS_1 : &TILDE_OPENERS_2;
+
case _T('!'):
case _T('['): return &BRACKET_OPENERS;
+
default: return NULL;
}
}
@@ -3075,8 +3092,8 @@ md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode)
flags |= MD_MARK_POTENTIAL_CLOSER;
if(right_level > 0 && right_level >= left_level)
flags |= MD_MARK_POTENTIAL_OPENER;
- if(left_level == 2 && right_level == 2)
- flags |= MD_MARK_EMPH_INTRAWORD;
+ if(flags == (MD_MARK_POTENTIAL_OPENER | MD_MARK_POTENTIAL_CLOSER))
+ flags |= MD_MARK_EMPH_OC;
/* For "the rule of three" we need to remember the original
* size of the mark (modulo three), before we potentially
@@ -3745,48 +3762,39 @@ static void
md_analyze_emph(MD_CTX* ctx, int mark_index)
{
MD_MARK* mark = &ctx->marks[mark_index];
- MD_MARKCHAIN* chain = md_mark_chain(ctx, mark_index);
/* If we can be a closer, try to resolve with the preceding opener. */
if(mark->flags & MD_MARK_POTENTIAL_CLOSER) {
MD_MARK* opener = NULL;
int opener_index = 0;
-
- if(mark->ch == _T('*')) {
- MD_MARKCHAIN* opener_chains[6];
- int i, n_opener_chains;
- unsigned flags = mark->flags;
-
- /* Apply the "rule of three". */
- n_opener_chains = 0;
- opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_intraword_mod3_0;
- if((flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_2)
- opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_intraword_mod3_1;
- if((flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_1)
- opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_intraword_mod3_2;
- opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_extraword_mod3_0;
- if(!(flags & MD_MARK_EMPH_INTRAWORD) || (flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_2)
- opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_extraword_mod3_1;
- if(!(flags & MD_MARK_EMPH_INTRAWORD) || (flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_1)
- opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_extraword_mod3_2;
-
- /* Opener is the most recent mark from the allowed chains. */
- for(i = 0; i < n_opener_chains; i++) {
- if(opener_chains[i]->tail >= 0) {
- int tmp_index = opener_chains[i]->tail;
- MD_MARK* tmp_mark = &ctx->marks[tmp_index];
- if(opener == NULL || tmp_mark->end > opener->end) {
- opener_index = tmp_index;
- opener = tmp_mark;
- }
+ MD_MARKCHAIN* opener_chains[6];
+ int i, n_opener_chains;
+ unsigned flags = mark->flags;
+
+ n_opener_chains = 0;
+
+ /* Apply the rule of 3 */
+ opener_chains[n_opener_chains++] = md_emph_chain(ctx, mark->ch, MD_MARK_EMPH_MOD3_0 | MD_MARK_EMPH_OC);
+ if((flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_2)
+ opener_chains[n_opener_chains++] = md_emph_chain(ctx, mark->ch, MD_MARK_EMPH_MOD3_1 | MD_MARK_EMPH_OC);
+ if((flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_1)
+ opener_chains[n_opener_chains++] = md_emph_chain(ctx, mark->ch, MD_MARK_EMPH_MOD3_2 | MD_MARK_EMPH_OC);
+ opener_chains[n_opener_chains++] = md_emph_chain(ctx, mark->ch, MD_MARK_EMPH_MOD3_0);
+ if(!(flags & MD_MARK_EMPH_OC) || (flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_2)
+ opener_chains[n_opener_chains++] = md_emph_chain(ctx, mark->ch, MD_MARK_EMPH_MOD3_1);
+ if(!(flags & MD_MARK_EMPH_OC) || (flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_1)
+ opener_chains[n_opener_chains++] = md_emph_chain(ctx, mark->ch, MD_MARK_EMPH_MOD3_2);
+
+ /* Opener is the most recent mark from the allowed chains. */
+ for(i = 0; i < n_opener_chains; i++) {
+ if(opener_chains[i]->tail >= 0) {
+ int tmp_index = opener_chains[i]->tail;
+ MD_MARK* tmp_mark = &ctx->marks[tmp_index];
+ if(opener == NULL || tmp_mark->end > opener->end) {
+ opener_index = tmp_index;
+ opener = tmp_mark;
}
}
- } else {
- /* Simple emph. mark */
- if(chain->tail >= 0) {
- opener_index = chain->tail;
- opener = &ctx->marks[opener_index];
- }
}
/* Resolve, if we have found matching opener. */
@@ -3810,7 +3818,7 @@ md_analyze_emph(MD_CTX* ctx, int mark_index)
/* If we could not resolve as closer, we may be yet be an opener. */
if(mark->flags & MD_MARK_POTENTIAL_OPENER)
- md_mark_chain_append(ctx, chain, mark_index);
+ md_mark_chain_append(ctx, md_emph_chain(ctx, mark->ch, mark->flags), mark_index);
}
static void
diff --git a/test/coverage.txt b/test/coverage.txt
index 1d4fdf5..5d79bdb 100644
--- a/test/coverage.txt
+++ b/test/coverage.txt
@@ -420,6 +420,36 @@ x <!A>
````````````````````````````````
+### [Issue 217](https://github.com/mity/md4c/issues/217)
+
+```````````````````````````````` example
+__!_!__
+
+__!x!__
+
+**!*!**
+
+---
+
+_*__*_*
+
+_*xx*_*
+
+_*__-_-
+
+_*xx-_-
+.
+<p><strong>!_!</strong></p>
+<p><strong>!x!</strong></p>
+<p><strong>!*!</strong></p>
+<hr />
+<p><em><em>__</em></em>*</p>
+<p><em><em>xx</em></em>*</p>
+<p><em>*__-</em>-</p>
+<p><em>*xx-</em>-</p>
+````````````````````````````````
+
+
## Code coverage
### `md_is_unicode_whitespace__()`