Implement code spans.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400
diff --git a/README.md b/README.md
index 011ce0e..1ca3bb6 100644
--- a/README.md
+++ b/README.md
@@ -99,7 +99,7 @@ more or less forms our to do list.
- **Inlines:**
- [x] 6.1 Backslash escapes
- [ ] 6.2 Entity and numeric character references
- - [ ] 6.3 Code spans
+ - [x] 6.3 Code spans
- [ ] 6.4 Emphasis and strong emphasis
- [ ] 6.5 Links
- [ ] 6.6 Images
diff --git a/md2html/md2html.c b/md2html/md2html.c
index 1b79cc5..66e76f7 100644
--- a/md2html/md2html.c
+++ b/md2html/md2html.c
@@ -181,12 +181,24 @@ leave_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
static int
enter_span_callback(MD_SPANTYPE type, void* detail, void* userdata)
{
+ struct membuffer* out = (struct membuffer*) userdata;
+
+ switch(type) {
+ case MD_SPAN_CODE: MEMBUF_APPEND_LITERAL(out, "<code>"); break;
+ }
+
return 0;
}
static int
leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata)
{
+ struct membuffer* out = (struct membuffer*) userdata;
+
+ switch(type) {
+ case MD_SPAN_CODE: MEMBUF_APPEND_LITERAL(out, "</code>"); break;
+ }
+
return 0;
}
diff --git a/md4c/md4c.c b/md4c/md4c.c
index d6a8781..9426a26 100644
--- a/md4c/md4c.c
+++ b/md4c/md4c.c
@@ -305,8 +305,8 @@ md_str_case_eq(const CHAR* s1, const CHAR* s2, SZ n)
/* Structure marking an offset which needs special attention. The type
* of the attention is determined by the member ch:
*
- * '\\': Escape sequence.
- * (beg points to '\\'; beg+1 to the escaped char.)
+ * '\\': Maybe escape sequence.
+ * '`': Maybe code span start/end.
*
* Note that not all instances of these chars in the text imply creation of the
* structure. Only those which have (or may have, after we see more context)
@@ -315,14 +315,20 @@ md_str_case_eq(const CHAR* s1, const CHAR* s2, SZ n)
struct MD_MARK_tag {
OFF beg;
OFF end;
+
+ /* Index of another mark. Before resolving the member may be used for
+ * arbitrary purpose during the analyzes phase.
+ * For resolved openers, it has to point to the corresponding closer. */
+ int next;
+
MD_CHAR ch;
unsigned short flags;
};
/* Mark flags. */
-#define MD_MARK_RESOLVED 0x0001
-#define MD_MARK_OPENER 0x0002
-#define MD_MARK_CLOSER 0x0004
+#define MD_MARK_RESOLVED 0x0001 /* Yes, the special meaning is indeed recognized. */
+#define MD_MARK_OPENER 0x0002 /* This opens (or potentially may open) a span. */
+#define MD_MARK_CLOSER 0x0004 /* This closes (or potentially may close) a span. */
static MD_MARK*
@@ -374,9 +380,6 @@ md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
int ret = 0;
MD_MARK* mark;
- /* Reset the previously collected stack of marks. */
- ctx->n_marks = 0;
-
for(i = 0; i < n_lines; i++) {
const MD_LINE* line = &lines[i];
OFF off = line->beg;
@@ -384,14 +387,53 @@ md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
while(off < end) {
CHAR ch = CH(off);
- /* Analyze backslash escapes.
- * Note it can go beyond line->end as it may involve
- * escaped new line to form a hard break. */
+ /* A backslash escape.
+ * It can go beyond line->end as it may involve escaped new
+ * line to form a hard break. */
if(ch == _T('\\') && off+1 < ctx->size && (ISPUNCT(off+1) || ISNEWLINE(off+1))) {
/* Hard-break cannot be on the last line of the block. */
if(!ISNEWLINE(off+1) || i+1 < n_lines)
PUSH(ch, off, off+2, MD_MARK_RESOLVED);
- off += 2;
+
+ /* If '`' follows, we need both marks as the backslash may be
+ * inside a code span. */
+ if(CH(off+1) == _T('`'))
+ off++;
+ else
+ off += 2;
+ continue;
+ }
+
+ /* Turn non-trivial whitespace into single space. */
+ if(ISWHITESPACE_(ch)) {
+ OFF tmp = off+1;
+
+ while(tmp < end && ISWHITESPACE(tmp))
+ tmp++;
+
+ if(tmp - end > 1 || ch != _T(' ')) {
+ PUSH(ch, off, tmp, MD_MARK_RESOLVED);
+ off = tmp;
+ continue;
+ }
+ }
+
+ /* A potential code span start/end. */
+ if(ch == _T('`')) {
+ unsigned flags;
+ OFF tmp = off+1;
+
+ /* It may be opener only if it is not escaped. */
+ if(ctx->n_marks > 0 && ctx->marks[ctx->n_marks-1].beg == off-1 && CH(off-1) == _T('\\'))
+ flags = MD_MARK_CLOSER;
+ else
+ flags = MD_MARK_OPENER | MD_MARK_CLOSER;
+
+ while(tmp < end && CH(tmp) == _T('`'))
+ tmp++;
+ PUSH(ch, off, tmp, flags);
+
+ off = tmp;
continue;
}
@@ -409,23 +451,115 @@ abort:
return ret;
}
+
+/* Table of precedence of various span types. */
+static const CHAR* md_precedence_table[] = {
+ _T("`"), /* Code spans. */
+ _T("\\") /* Backslash escapes. */
+};
+
+
+static void
+md_analyze_backtick(MD_CTX* ctx, int mark_index, int* p_unresolved_openers)
+{
+ MD_MARK* mark = &ctx->marks[mark_index];
+ int opener = *p_unresolved_openers;
+
+ /* Try to find unresolved opener of the same length. If we find it,
+ * we form a code span. */
+ while(opener >= 0) {
+ MD_MARK* op = &ctx->marks[opener];
+
+ if(op->end - op->beg == mark->end - mark->beg) {
+ /* Resolve the span. */
+ op->flags = MD_MARK_OPENER | MD_MARK_RESOLVED;
+ mark->flags = MD_MARK_CLOSER | MD_MARK_RESOLVED;
+
+ /* Shorten the list of unresolved openers. */
+ *p_unresolved_openers = op->next;
+
+ /* Make the opener point to us as its closer. */
+ op->next = mark_index;
+
+ /* Cancel any escapes inside the code span. */
+ if(mark_index - opener > 1)
+ memset(ctx->marks + opener + 1, 0, sizeof(MD_MARK) * (mark_index - opener - 1));
+
+ /* Append any space or new line inside the span into the mark itself
+ * to swallow it. */
+ while(CH(op->end) == _T(' ') || ISNEWLINE(op->end))
+ op->end++;
+ while(CH(mark->beg-1) == _T(' ') || ISNEWLINE(mark->beg-1))
+ mark->beg--;
+
+ /* Done. */
+ return;
+ }
+
+ opener = ctx->marks[opener].next;
+ }
+
+ /* We didn't find any matching opener, remember it as a potential opener. */
+ if(mark->flags & MD_MARK_OPENER) {
+ mark->next = *p_unresolved_openers;
+ *p_unresolved_openers = mark_index;
+ }
+}
+
+static void
+md_analyze_marks(MD_CTX* ctx, int precedence_level)
+{
+ const CHAR* mark_chars = md_precedence_table[precedence_level];
+ /* Chain of potential/unresolved code span openers. */
+ int code_span_unresolved_openers = -1;
+ int i = 0;
+
+ while(i < ctx->n_marks) {
+ MD_MARK* mark = &ctx->marks[i];
+
+ /* Skip resolved spans. */
+ if(mark->flags & MD_MARK_RESOLVED) {
+ if(mark->flags & MD_MARK_OPENER)
+ i = mark->next + 1;
+ else
+ i++;
+ continue;
+ }
+
+ /* Skip marks we do not want to deal with. */
+ if(!ISANYOF_(mark->ch, mark_chars)) {
+ i++;
+ continue;
+ }
+
+ /* Analyze the mark. */
+ switch(mark->ch) {
+ case _T('`'):
+ md_analyze_backtick(ctx, i, &code_span_unresolved_openers);
+ break;
+ }
+
+ i++;
+ }
+}
+
/* Analyze marks (build ctx->marks). */
-static int
+static void
md_analyze_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
{
- int ret = 0;
-
- MD_CHECK(md_collect_marks(ctx, lines, n_lines));
+ int i;
-abort:
- return ret;
+ for(i = 0; i < SIZEOF_ARRAY(md_precedence_table); i++)
+ md_analyze_marks(ctx, i);
}
/* Render the output, accordingly to the analyzed ctx->marks. */
static int
md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
{
+ MD_TEXTTYPE text_type;
const MD_LINE* line = lines;
+ const MD_MARK* prev_mark = NULL;
const MD_MARK* mark;
OFF off = lines[0].beg;
OFF end = lines[n_lines-1].end;
@@ -440,28 +574,45 @@ md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
while(!(mark->flags & MD_MARK_RESOLVED))
mark++;
+ text_type = MD_TEXT_NORMAL;
+
while(1) {
/* Process the text up to the next mark or end-of-line. */
OFF tmp = (line->end < mark->beg ? line->end : mark->beg);
if(tmp > off) {
- MD_TEXT(MD_TEXT_NORMAL, STR(off), tmp - off);
+ MD_TEXT(text_type, STR(off), tmp - off);
off = tmp;
}
/* If reached the mark, process it and move to next one. */
if(off >= mark->beg) {
switch(mark->ch) {
- case _T('\\'): /* Backslash escape. */
- if(ISNEWLINE(mark->beg+1))
- enforce_hardbreak = 1;
- else
- MD_TEXT(MD_TEXT_NORMAL, STR(mark->beg+1), 1);
- break;
+ case _T('\\'): /* Backslash escape. */
+ if(ISNEWLINE(mark->beg+1))
+ enforce_hardbreak = 1;
+ else
+ MD_TEXT(text_type, STR(mark->beg+1), 1);
+ break;
+
+ case _T(' '): /* Non-trivial space. */
+ MD_TEXT(text_type, _T(" "), 1);
+ break;
+
+ case _T('`'): /* Code span. */
+ if(mark->flags & MD_MARK_OPENER) {
+ MD_ENTER_SPAN(MD_SPAN_CODE, NULL);
+ text_type = MD_TEXT_CODE;
+ } else {
+ MD_LEAVE_SPAN(MD_SPAN_CODE, NULL);
+ text_type = MD_TEXT_NORMAL;
+ }
+ break;
}
off = mark->end;
/* Move to next resolved mark. */
+ prev_mark = mark;
mark++;
while(!(mark->flags & MD_MARK_RESOLVED))
mark++;
@@ -475,12 +626,23 @@ md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
if(off >= end)
break;
- /* Output soft or hard line break. */
- if(enforce_hardbreak || (CH(line->end) == _T(' ') && CH(line->end+1) == _T(' ')))
- break_type = MD_TEXT_BR;
- else
- break_type = MD_TEXT_SOFTBR;
- MD_TEXT(break_type, _T("\n"), 1);
+ if(text_type == MD_TEXT_CODE) {
+ /* Inside code spans, new lines are transformed into single
+ * spaces. */
+ MD_ASSERT(prev_mark != NULL);
+ MD_ASSERT(prev_mark->ch == _T('`') && (prev_mark->flags & MD_MARK_OPENER));
+ MD_ASSERT(mark->ch == _T('`') && (mark->flags & MD_MARK_CLOSER));
+
+ if(prev_mark->end < off && off < mark->beg)
+ MD_TEXT(MD_SPAN_CODE, _T(" "), 1);
+ } else {
+ /* Output soft or hard line break. */
+ if(enforce_hardbreak || (CH(line->end) == _T(' ') && CH(line->end+1) == _T(' ')))
+ break_type = MD_TEXT_BR;
+ else
+ break_type = MD_TEXT_SOFTBR;
+ MD_TEXT(break_type, _T("\n"), 1);
+ }
/* Switch to the following line. */
line++;
@@ -504,7 +666,13 @@ md_process_normal_block(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
{
int ret;
- MD_CHECK(md_analyze_inlines(ctx, lines, n_lines));
+ /* Reset the previously collected stack of marks. */
+ ctx->n_marks = 0;
+
+ MD_CHECK(md_collect_marks(ctx, lines, n_lines));
+
+ md_analyze_inlines(ctx, lines, n_lines);
+
MD_CHECK(md_process_inlines(ctx, lines, n_lines));
abort:
@@ -616,8 +784,6 @@ md_is_opening_code_fence(MD_CTX* ctx, OFF beg, OFF* p_end)
{
OFF off = beg;
- MD_ASSERT(CH(beg) == _T('`') || CH(beg) == _T('~'));
-
while(off < ctx->size && CH(off) == CH(beg))
off++;
@@ -728,8 +894,6 @@ md_is_html_block_start_condition(MD_CTX* ctx, OFF beg)
OFF off = beg + 1;
int i;
- MD_ASSERT(CH(beg) == _T('<'));
-
/* Check for type 1: <script, <pre, or <style */
for(i = 0; t1[i].name != NULL; i++) {
if(off + t1[i].len < ctx->size) {
diff --git a/md4c/md4c.h b/md4c/md4c.h
index 941a9ed..929eb24 100644
--- a/md4c/md4c.h
+++ b/md4c/md4c.h
@@ -89,7 +89,7 @@ enum MD_BLOCKTYPE_tag {
* like paragraph or list item. */
typedef enum MD_SPANTYPE_tag MD_SPANTYPE;
enum MD_SPANTYPE_tag {
- MD_SPAN_DUMMY = 0 /* not yet used... */
+ MD_SPAN_CODE
};
/* Text is the actual textual contents of span. */