Fix "rule of three" for emphasis resolution (issue #14).
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
diff --git a/md4c/md4c.c b/md4c/md4c.c
index 02a0639..0b9219b 100644
--- a/md4c/md4c.c
+++ b/md4c/md4c.c
@@ -2160,8 +2160,12 @@ struct MD_MARK_tag {
#define MD_MARK_RESOLVED 0x10 /* Resolved in any definite way. */
/* Mark flags specific for various mark types (so they can share bits). */
-#define MD_MARK_INTRAWORD 0x40 /* Helper for emphasis '*', '_' ("the rule of 3"). */
-#define MD_MARK_AUTOLINK 0x40 /* Distinguisher for '<', '>'. */
+#define MD_MARK_EMPH_INTRAWORD 0x20 /* Helper for the "rule of 3". */
+#define MD_MARK_EMPH_MODULO3_0 0x40
+#define MD_MARK_EMPH_MODULO3_1 0x80
+#define MD_MARK_EMPH_MODULO3_2 (0x40 | 0x80)
+#define MD_MARK_EMPH_MODULO3_MASK (0x40 | 0x80)
+#define MD_MARK_AUTOLINK 0x20 /* Distinguisher for '<', '>'. */
static MD_MARK*
@@ -2483,7 +2487,17 @@ md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode)
if(right_level > 0 && right_level >= left_level)
flags |= MD_MARK_POTENTIAL_OPENER;
if(left_level == 2 && right_level == 2)
- flags |= MD_MARK_INTRAWORD;
+ flags |= MD_MARK_EMPH_INTRAWORD;
+
+ /* For "the rule of three" we need to remember the original
+ * size of the mark (modulo three), before we potentially
+ * split the mark when being later resolved partially by some
+ * shorter closer. */
+ switch((tmp - off) % 3) {
+ case 0: flags |= MD_MARK_EMPH_MODULO3_0; break;
+ case 1: flags |= MD_MARK_EMPH_MODULO3_1; break;
+ case 2: flags |= MD_MARK_EMPH_MODULO3_2; break;
+ }
if(flags != 0) {
PUSH_MARK(ch, off, tmp, flags);
@@ -3148,8 +3162,18 @@ md_analyze_simple_pairing_mark(MD_CTX* ctx, MD_MARKCHAIN* chain, int mark_index,
SZ opener_size = opener->end - opener->beg;
SZ closer_size = mark->end - mark->beg;
- if(apply_rule_of_three && ((mark->flags & MD_MARK_INTRAWORD) || (opener->flags & MD_MARK_INTRAWORD))) {
- while((opener_size + closer_size) % 3 == 0) {
+ if(apply_rule_of_three &&
+ ((mark->flags & MD_MARK_EMPH_INTRAWORD) || (opener->flags & MD_MARK_EMPH_INTRAWORD)))
+ {
+ SZ opener_orig_size_modulo3;
+
+ switch(opener->flags & MD_MARK_EMPH_MODULO3_MASK) {
+ case MD_MARK_EMPH_MODULO3_0: opener_orig_size_modulo3 = 0; break;
+ case MD_MARK_EMPH_MODULO3_1: opener_orig_size_modulo3 = 1; break;
+ case MD_MARK_EMPH_MODULO3_2: opener_orig_size_modulo3 = 2; break;
+ }
+
+ while((opener_orig_size_modulo3 + closer_size) % 3 == 0) {
if(opener->prev < 0)
goto cannot_resolve;
diff --git a/test/coverage.txt b/test/coverage.txt
index aeb000c..a801300 100644
--- a/test/coverage.txt
+++ b/test/coverage.txt
@@ -94,6 +94,15 @@ x [link](/url "foo – bar") x
````````````````````````````````
+### [Issue 14](https://github.com/mity/md4c/issues/14)
+```````````````````````````````` example
+a***b* c*
+.
+<p>a*<em><em>b</em> c</em></p>
+````````````````````````````````
+
+
+
## Code coverage
### `md_is_unicode_whitespace__()`