Implement fenced code blocks.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250
diff --git a/md2html/md2html.c b/md2html/md2html.c
index 74e3387..0761c3c 100644
--- a/md2html/md2html.c
+++ b/md2html/md2html.c
@@ -125,6 +125,21 @@ membuf_append_escaped(struct membuffer* buf, const char* data, MD_SIZE size)
*** HTML renderer implementation ***
**************************************/
+static void
+open_code_block(struct membuffer* out, const MD_BLOCK_CODE_DETAIL* det)
+{
+ MEMBUF_APPEND_LITERAL(out, "<pre><code");
+
+ /* If known, output the HTML 5 attribute class="language-LANGNAME". */
+ if(det->lang != NULL) {
+ MEMBUF_APPEND_LITERAL(out, " class=\"language-");
+ membuf_append_escaped(out, det->lang, det->lang_size);
+ MEMBUF_APPEND_LITERAL(out, "\"");
+ }
+
+ MEMBUF_APPEND_LITERAL(out, ">");
+}
+
static int
enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
{
@@ -135,7 +150,7 @@ enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
case MD_BLOCK_DOC: /* noop */ break;
case MD_BLOCK_HR: MEMBUF_APPEND_LITERAL(out, "<hr>\n"); break;
case MD_BLOCK_H: MEMBUF_APPEND_LITERAL(out, head[((MD_BLOCK_H_DETAIL*)detail)->level - 1]); break;
- case MD_BLOCK_CODE: MEMBUF_APPEND_LITERAL(out, "<pre><code>"); break;
+ case MD_BLOCK_CODE: open_code_block(out, (const MD_BLOCK_CODE_DETAIL*) detail); break;
case MD_BLOCK_P: MEMBUF_APPEND_LITERAL(out, "<p>"); break;
}
diff --git a/md4c/md4c.c b/md4c/md4c.c
index 1a1173c..a50fe6a 100644
--- a/md4c/md4c.c
+++ b/md4c/md4c.c
@@ -81,6 +81,13 @@ struct MD_CTX_tag {
/* For MD_BLOCK_HEADER. */
unsigned header_level;
+
+ /* For MD_BLOCK_CODE (fenced). */
+ CHAR code_fence_char; /* '~' or '`' */
+ SZ code_fence_length;
+ OFF code_fence_indent;
+ OFF code_fence_info_beg;
+ OFF code_fence_info_end;
};
typedef enum MD_LINETYPE_tag MD_LINETYPE;
@@ -91,6 +98,8 @@ enum MD_LINETYPE_tag {
MD_LINE_SETEXTHEADER,
MD_LINE_SETEXTUNDERLINE,
MD_LINE_INDENTEDCODE,
+ MD_LINE_CODEFENCE,
+ MD_LINE_FENCEDCODE,
MD_LINE_TEXT
};
@@ -364,6 +373,74 @@ md_is_setext_underline(MD_CTX* ctx, OFF beg, OFF* p_end)
return 0;
}
+static int
+md_is_opening_code_fence(MD_CTX* ctx, OFF beg, OFF* p_end)
+{
+ OFF off = beg;
+
+ MD_ASSERT(CH(beg) == _T('`') || CH(beg) == _T('~'));
+
+ while(off < ctx->size && CH(off) == CH(beg))
+ off++;
+
+ /* Fence must have at least three characters. */
+ if(off - beg < 3)
+ return -1;
+
+ ctx->code_fence_length = off - beg;
+
+ /* Optionally, space(s) can follow. */
+ while(off < ctx->size && CH(off) == _T(' '))
+ off++;
+
+ /* Optionally, language info can follow. It must not contain '`'. */
+ ctx->code_fence_info_beg = off;
+ while(off < ctx->size && CH(off) != _T('`') && !ISNEWLINE(off))
+ off++;
+ if(off < ctx->size && !ISNEWLINE(off))
+ return -1;
+
+ *p_end = off;
+
+ /* Right trim of language info. */
+ while(off > ctx->code_fence_info_beg && CH(off-1) == _T(' '))
+ off--;
+ ctx->code_fence_info_end = off;
+
+ ctx->code_fence_char = CH(beg);
+ return 0;
+}
+
+static int
+md_is_closing_code_fence(MD_CTX* ctx, OFF beg, OFF* p_end)
+{
+ OFF off = beg;
+ int ret = -1;
+
+ /* Closing fence must have at least the same length and use same char as
+ * opening one. */
+ while(off < ctx->size && CH(off) == ctx->code_fence_char)
+ off++;
+ if(off - beg < ctx->code_fence_length)
+ goto out;
+
+ /* Optionally, space(s) can follow */
+ while(off < ctx->size && CH(off) == _T(' '))
+ off++;
+
+ /* But nothing more is allowed on the line. */
+ if(off < ctx->size && !ISNEWLINE(off))
+ goto out;
+
+ ret = 0;
+
+out:
+ /* Note we set *p_end even on failure: If we are not closing fence, caller
+ * would eat the line anyway without any parsing. */
+ *p_end = off;
+ return ret;
+}
+
/* Analyze type of the line and find some its properties. This serves as a
* main input for determining type and boundaries of a block. */
static void
@@ -385,6 +462,21 @@ md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end, const MD_LINE* pivot_line, MD_
line->beg = off;
+ /* Check whether we are fenced code continuation. */
+ if(pivot_line->type == MD_LINE_FENCEDCODE || pivot_line->type == MD_LINE_CODEFENCE) {
+ /* We are another MD_LINE_FENCEDCODE unless we are closing fence
+ * which we transform into MD_LINE_BLANK. */
+ if(line->indent < ctx->code_indent_offset) {
+ if(md_is_closing_code_fence(ctx, off, &off) == 0) {
+ line->type = MD_LINE_BLANK;
+ goto done;
+ }
+ }
+
+ line->type = MD_LINE_FENCEDCODE;
+ goto done;
+ }
+
/* Check whether we are blank line.
* Note blank lines after indented code are treated as part of that block.
* If they are at the end of the block, it is discarded by caller.
@@ -428,7 +520,7 @@ md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end, const MD_LINE* pivot_line, MD_
}
}
- /* Check whether we are setext underline. */
+ /* Check whether we are Setext underline. */
if(pivot_line->type == MD_LINE_TEXT && (CH(off) == _T('=') || CH(off) == _T('-'))) {
if(md_is_setext_underline(ctx, off, &off) == 0) {
line->type = MD_LINE_SETEXTUNDERLINE;
@@ -436,7 +528,8 @@ md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end, const MD_LINE* pivot_line, MD_
}
}
- /* Check whether we are thematic break line. */
+ /* Check whether we are thematic break line.
+ * (Keep this after check for Setext underline as that one has higher priority). */
if(ISANYOF(off, _T("-_*"))) {
if(md_is_hr_line(ctx, off, &off) == 0) {
line->type = MD_LINE_HR;
@@ -444,6 +537,15 @@ md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end, const MD_LINE* pivot_line, MD_
}
}
+ /* Check whether we are starting code fence. */
+ if(CH(off) == _T('`') || CH(off) == _T('~')) {
+ if(md_is_opening_code_fence(ctx, off, &off) == 0) {
+ ctx->code_fence_indent = line->indent;
+ line->type = MD_LINE_CODEFENCE;
+ goto done;
+ }
+ }
+
/* By default, we are normal text line. */
line->type = MD_LINE_TEXT;
@@ -489,6 +591,7 @@ md_process_block(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
MD_BLOCKTYPE block_type;
union {
MD_BLOCK_H_DETAIL header;
+ MD_BLOCK_CODE_DETAIL code;
} det;
int ret = 0;
@@ -511,7 +614,18 @@ md_process_block(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
break;
case MD_LINE_INDENTEDCODE:
+ det.code.lang = NULL;
+ det.code.lang_size = 0;
+ block_type = MD_BLOCK_CODE;
+ break;
+
+ case MD_LINE_FENCEDCODE:
block_type = MD_BLOCK_CODE;
+ if(ctx->code_fence_info_beg < ctx->code_fence_info_end)
+ det.code.lang = STR(ctx->code_fence_info_beg);
+ else
+ det.code.lang = NULL;
+ det.code.lang_size = ctx->code_fence_info_end - ctx->code_fence_info_beg;
break;
case MD_LINE_TEXT:
@@ -519,6 +633,10 @@ md_process_block(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
break;
case MD_LINE_SETEXTUNDERLINE:
+ case MD_LINE_CODEFENCE:
+ /* Noop. */
+ return 0;
+
default:
MD_UNREACHABLE();
break;
diff --git a/md4c/md4c.h b/md4c/md4c.h
index 80529de..0aab4a0 100644
--- a/md4c/md4c.h
+++ b/md4c/md4c.h
@@ -103,6 +103,13 @@ struct MD_BLOCK_H_DETAIL_tag {
unsigned level; /* Header level (1 - 6) */
};
+/* Detailed info for MD_BLOCK_CODE. */
+typedef struct MD_BLOCK_CODE_DETAIL_tag MD_BLOCK_CODE_DETAIL;
+struct MD_BLOCK_CODE_DETAIL_tag {
+ const MD_CHAR* lang; /* Not zero-terminated, use lang_size. */
+ MD_SIZE lang_size;
+};
+
/* Flags specifying Markdown dialect.
*