Fix some emphasis parsing issues. * We incorrectly applied the infamous rule of three only to asterisk-encoded emphasis, it has to be applied to underscore as well. * We incorrectly applied the rule of three only if the opener and/or closer was inside a word. It has also to be applied if the mark is both preceded and followed by punctuation. Fixes #217.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245
diff --git a/src/md4c.c b/src/md4c.c
index e25d266..502d5a7 100644
--- a/src/md4c.c
+++ b/src/md4c.c
@@ -178,22 +178,27 @@ struct MD_CTX_tag {
#endif
/* For resolving of inline spans. */
- MD_MARKCHAIN mark_chains[13];
+ MD_MARKCHAIN mark_chains[18];
#define PTR_CHAIN (ctx->mark_chains[0])
#define TABLECELLBOUNDARIES (ctx->mark_chains[1])
-#define ASTERISK_OPENERS_extraword_mod3_0 (ctx->mark_chains[2])
-#define ASTERISK_OPENERS_extraword_mod3_1 (ctx->mark_chains[3])
-#define ASTERISK_OPENERS_extraword_mod3_2 (ctx->mark_chains[4])
-#define ASTERISK_OPENERS_intraword_mod3_0 (ctx->mark_chains[5])
-#define ASTERISK_OPENERS_intraword_mod3_1 (ctx->mark_chains[6])
-#define ASTERISK_OPENERS_intraword_mod3_2 (ctx->mark_chains[7])
-#define UNDERSCORE_OPENERS (ctx->mark_chains[8])
-#define TILDE_OPENERS_1 (ctx->mark_chains[9])
-#define TILDE_OPENERS_2 (ctx->mark_chains[10])
-#define BRACKET_OPENERS (ctx->mark_chains[11])
-#define DOLLAR_OPENERS (ctx->mark_chains[12])
+#define ASTERISK_OPENERS_oo_mod3_0 (ctx->mark_chains[2]) /* Opener-only */
+#define ASTERISK_OPENERS_oo_mod3_1 (ctx->mark_chains[3])
+#define ASTERISK_OPENERS_oo_mod3_2 (ctx->mark_chains[4])
+#define ASTERISK_OPENERS_oc_mod3_0 (ctx->mark_chains[5]) /* Both opener and closer candidate */
+#define ASTERISK_OPENERS_oc_mod3_1 (ctx->mark_chains[6])
+#define ASTERISK_OPENERS_oc_mod3_2 (ctx->mark_chains[7])
+#define UNDERSCORE_OPENERS_oo_mod3_0 (ctx->mark_chains[8]) /* Opener-only */
+#define UNDERSCORE_OPENERS_oo_mod3_1 (ctx->mark_chains[9])
+#define UNDERSCORE_OPENERS_oo_mod3_2 (ctx->mark_chains[10])
+#define UNDERSCORE_OPENERS_oc_mod3_0 (ctx->mark_chains[11]) /* Both opener and closer candidate */
+#define UNDERSCORE_OPENERS_oc_mod3_1 (ctx->mark_chains[12])
+#define UNDERSCORE_OPENERS_oc_mod3_2 (ctx->mark_chains[13])
+#define TILDE_OPENERS_1 (ctx->mark_chains[14])
+#define TILDE_OPENERS_2 (ctx->mark_chains[15])
+#define BRACKET_OPENERS (ctx->mark_chains[16])
+#define DOLLAR_OPENERS (ctx->mark_chains[17])
#define OPENERS_CHAIN_FIRST 2 /* [0] and [1] are special. */
-#define OPENERS_CHAIN_LAST 12
+#define OPENERS_CHAIN_LAST 17
int n_table_cell_boundaries;
@@ -2482,7 +2487,7 @@ struct MD_MARK_tag {
#define MD_MARK_RESOLVED 0x10 /* Resolved in any definite way. */
/* Mark flags specific for various mark types (so they can share bits). */
-#define MD_MARK_EMPH_INTRAWORD 0x20 /* Helper for the "rule of 3". */
+#define MD_MARK_EMPH_OC 0x20 /* Opener/closer mixed candidate. Helper for the "rule of 3". */
#define MD_MARK_EMPH_MOD3_0 0x40
#define MD_MARK_EMPH_MOD3_1 0x80
#define MD_MARK_EMPH_MOD3_2 (0x40 | 0x80)
@@ -2492,18 +2497,27 @@ struct MD_MARK_tag {
#define MD_MARK_HASNESTEDBRACKETS 0x20 /* For '[' to rule out invalid link labels early */
static MD_MARKCHAIN*
-md_asterisk_chain(MD_CTX* ctx, unsigned flags)
+md_emph_chain(MD_CTX* ctx, MD_CHAR ch, unsigned flags)
{
- switch(flags & (MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_MASK)) {
- case MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_0: return &ASTERISK_OPENERS_intraword_mod3_0;
- case MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_1: return &ASTERISK_OPENERS_intraword_mod3_1;
- case MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_2: return &ASTERISK_OPENERS_intraword_mod3_2;
- case MD_MARK_EMPH_MOD3_0: return &ASTERISK_OPENERS_extraword_mod3_0;
- case MD_MARK_EMPH_MOD3_1: return &ASTERISK_OPENERS_extraword_mod3_1;
- case MD_MARK_EMPH_MOD3_2: return &ASTERISK_OPENERS_extraword_mod3_2;
- default: MD_UNREACHABLE();
+ MD_MARKCHAIN* chain;
+
+ switch(ch) {
+ case '*': chain = &ASTERISK_OPENERS_oo_mod3_0; break;
+ case '_': chain = &UNDERSCORE_OPENERS_oo_mod3_0; break;
+ default: MD_UNREACHABLE();
}
- return NULL;
+
+ if(flags & MD_MARK_EMPH_OC)
+ chain += 3;
+
+ switch(flags & MD_MARK_EMPH_MOD3_MASK) {
+ case MD_MARK_EMPH_MOD3_0: chain += 0; break;
+ case MD_MARK_EMPH_MOD3_1: chain += 1; break;
+ case MD_MARK_EMPH_MOD3_2: chain += 2; break;
+ default: MD_UNREACHABLE();
+ }
+
+ return chain;
}
static MD_MARKCHAIN*
@@ -2512,11 +2526,14 @@ md_mark_chain(MD_CTX* ctx, int mark_index)
MD_MARK* mark = &ctx->marks[mark_index];
switch(mark->ch) {
- case _T('*'): return md_asterisk_chain(ctx, mark->flags);
- case _T('_'): return &UNDERSCORE_OPENERS;
+ case _T('*'):
+ case _T('_'): return md_emph_chain(ctx, mark->ch, mark->flags);
+
case _T('~'): return (mark->end - mark->beg == 1) ? &TILDE_OPENERS_1 : &TILDE_OPENERS_2;
+
case _T('!'):
case _T('['): return &BRACKET_OPENERS;
+
default: return NULL;
}
}
@@ -3075,8 +3092,8 @@ md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode)
flags |= MD_MARK_POTENTIAL_CLOSER;
if(right_level > 0 && right_level >= left_level)
flags |= MD_MARK_POTENTIAL_OPENER;
- if(left_level == 2 && right_level == 2)
- flags |= MD_MARK_EMPH_INTRAWORD;
+ if(flags == (MD_MARK_POTENTIAL_OPENER | MD_MARK_POTENTIAL_CLOSER))
+ flags |= MD_MARK_EMPH_OC;
/* For "the rule of three" we need to remember the original
* size of the mark (modulo three), before we potentially
@@ -3745,48 +3762,39 @@ static void
md_analyze_emph(MD_CTX* ctx, int mark_index)
{
MD_MARK* mark = &ctx->marks[mark_index];
- MD_MARKCHAIN* chain = md_mark_chain(ctx, mark_index);
/* If we can be a closer, try to resolve with the preceding opener. */
if(mark->flags & MD_MARK_POTENTIAL_CLOSER) {
MD_MARK* opener = NULL;
int opener_index = 0;
-
- if(mark->ch == _T('*')) {
- MD_MARKCHAIN* opener_chains[6];
- int i, n_opener_chains;
- unsigned flags = mark->flags;
-
- /* Apply the "rule of three". */
- n_opener_chains = 0;
- opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_intraword_mod3_0;
- if((flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_2)
- opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_intraword_mod3_1;
- if((flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_1)
- opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_intraword_mod3_2;
- opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_extraword_mod3_0;
- if(!(flags & MD_MARK_EMPH_INTRAWORD) || (flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_2)
- opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_extraword_mod3_1;
- if(!(flags & MD_MARK_EMPH_INTRAWORD) || (flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_1)
- opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_extraword_mod3_2;
-
- /* Opener is the most recent mark from the allowed chains. */
- for(i = 0; i < n_opener_chains; i++) {
- if(opener_chains[i]->tail >= 0) {
- int tmp_index = opener_chains[i]->tail;
- MD_MARK* tmp_mark = &ctx->marks[tmp_index];
- if(opener == NULL || tmp_mark->end > opener->end) {
- opener_index = tmp_index;
- opener = tmp_mark;
- }
+ MD_MARKCHAIN* opener_chains[6];
+ int i, n_opener_chains;
+ unsigned flags = mark->flags;
+
+ n_opener_chains = 0;
+
+ /* Apply the rule of 3 */
+ opener_chains[n_opener_chains++] = md_emph_chain(ctx, mark->ch, MD_MARK_EMPH_MOD3_0 | MD_MARK_EMPH_OC);
+ if((flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_2)
+ opener_chains[n_opener_chains++] = md_emph_chain(ctx, mark->ch, MD_MARK_EMPH_MOD3_1 | MD_MARK_EMPH_OC);
+ if((flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_1)
+ opener_chains[n_opener_chains++] = md_emph_chain(ctx, mark->ch, MD_MARK_EMPH_MOD3_2 | MD_MARK_EMPH_OC);
+ opener_chains[n_opener_chains++] = md_emph_chain(ctx, mark->ch, MD_MARK_EMPH_MOD3_0);
+ if(!(flags & MD_MARK_EMPH_OC) || (flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_2)
+ opener_chains[n_opener_chains++] = md_emph_chain(ctx, mark->ch, MD_MARK_EMPH_MOD3_1);
+ if(!(flags & MD_MARK_EMPH_OC) || (flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_1)
+ opener_chains[n_opener_chains++] = md_emph_chain(ctx, mark->ch, MD_MARK_EMPH_MOD3_2);
+
+ /* Opener is the most recent mark from the allowed chains. */
+ for(i = 0; i < n_opener_chains; i++) {
+ if(opener_chains[i]->tail >= 0) {
+ int tmp_index = opener_chains[i]->tail;
+ MD_MARK* tmp_mark = &ctx->marks[tmp_index];
+ if(opener == NULL || tmp_mark->end > opener->end) {
+ opener_index = tmp_index;
+ opener = tmp_mark;
}
}
- } else {
- /* Simple emph. mark */
- if(chain->tail >= 0) {
- opener_index = chain->tail;
- opener = &ctx->marks[opener_index];
- }
}
/* Resolve, if we have found matching opener. */
@@ -3810,7 +3818,7 @@ md_analyze_emph(MD_CTX* ctx, int mark_index)
/* If we could not resolve as closer, we may be yet be an opener. */
if(mark->flags & MD_MARK_POTENTIAL_OPENER)
- md_mark_chain_append(ctx, chain, mark_index);
+ md_mark_chain_append(ctx, md_emph_chain(ctx, mark->ch, mark->flags), mark_index);
}
static void
diff --git a/test/coverage.txt b/test/coverage.txt
index 1d4fdf5..5d79bdb 100644
--- a/test/coverage.txt
+++ b/test/coverage.txt
@@ -420,6 +420,36 @@ x <!A>
````````````````````````````````
+### [Issue 217](https://github.com/mity/md4c/issues/217)
+
+```````````````````````````````` example
+__!_!__
+
+__!x!__
+
+**!*!**
+
+---
+
+_*__*_*
+
+_*xx*_*
+
+_*__-_-
+
+_*xx-_-
+.
+<p><strong>!_!</strong></p>
+<p><strong>!x!</strong></p>
+<p><strong>!*!</strong></p>
+<hr />
+<p><em><em>__</em></em>*</p>
+<p><em><em>xx</em></em>*</p>
+<p><em>*__-</em>-</p>
+<p><em>*xx-</em>-</p>
+````````````````````````````````
+
+
## Code coverage
### `md_is_unicode_whitespace__()`