md_build_attribute: Handle U+0000 character.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
diff --git a/md2html/render_html.c b/md2html/render_html.c
index ca28089..8f0b22c 100644
--- a/md2html/render_html.c
+++ b/md2html/render_html.c
@@ -245,6 +245,7 @@ render_attribute(MD_RENDER_HTML* r, const MD_ATTRIBUTE* attr,
const MD_CHAR* text = attr->text + off;
switch(type) {
+ case MD_TEXT_NULLCHAR: render_utf8_codepoint(r, 0x0000, render_text); break;
case MD_TEXT_ENTITY: render_entity(r, text, size, fn_append); break;
default: fn_append(r, text, size); break;
}
diff --git a/md4c/md4c.c b/md4c/md4c.c
index 7b2c186..8e07945 100644
--- a/md4c/md4c.c
+++ b/md4c/md4c.c
@@ -250,36 +250,38 @@ struct MD_VERBATIMLINE_tag {
/* Character classification.
* Note we assume ASCII compatibility of code points < 128 here. */
-#define ISIN_(ch, ch_min, ch_max) ((ch_min) <= (unsigned)(ch) && (unsigned)(ch) <= (ch_max))
-#define ISANYOF_(ch, palette) (md_strchr((palette), (ch)) != NULL)
-#define ISANYOF2_(ch, ch1, ch2) ((ch) == (ch1) || (ch) == (ch2))
-#define ISASCII_(ch) ((unsigned)(ch) <= 127)
-#define ISBLANK_(ch) (ISANYOF2_((ch), _T(' '), _T('\t')))
-#define ISNEWLINE_(ch) (ISANYOF2_((ch), _T('\r'), _T('\n')))
-#define ISWHITESPACE_(ch) (ISBLANK_(ch) || ISANYOF2_((ch), _T('\v'), _T('\f')))
-#define ISCNTRL_(ch) ((unsigned)(ch) <= 31 || (unsigned)(ch) == 127)
-#define ISPUNCT_(ch) (ISIN_(ch, 33, 47) || ISIN_(ch, 58, 64) || ISIN_(ch, 91, 96) || ISIN_(ch, 123, 126))
-#define ISUPPER_(ch) (ISIN_(ch, _T('A'), _T('Z')))
-#define ISLOWER_(ch) (ISIN_(ch, _T('a'), _T('z')))
-#define ISALPHA_(ch) (ISUPPER_(ch) || ISLOWER_(ch))
-#define ISDIGIT_(ch) (ISIN_(ch, _T('0'), _T('9')))
-#define ISXDIGIT_(ch) (ISDIGIT_(ch) || ISIN_(ch, _T('A'), _T('F')) || ISIN_(ch, _T('a'), _T('f')))
-#define ISALNUM_(ch) (ISALPHA_(ch) || ISDIGIT_(ch))
-
-#define ISANYOF(off, palette) ISANYOF_(CH(off), (palette))
-#define ISANYOF2(off, ch1, ch2) ISANYOF2_(CH(off), (ch1), (ch2))
-#define ISASCII(off) ISASCII_(CH(off))
-#define ISBLANK(off) ISBLANK_(CH(off))
-#define ISNEWLINE(off) ISNEWLINE_(CH(off))
-#define ISWHITESPACE(off) ISWHITESPACE_(CH(off))
-#define ISCNTRL(off) ISCNTRL_(CH(off))
-#define ISPUNCT(off) ISPUNCT_(CH(off))
-#define ISUPPER(off) ISUPPER_(CH(off))
-#define ISLOWER(off) ISLOWER_(CH(off))
-#define ISALPHA(off) ISALPHA_(CH(off))
-#define ISDIGIT(off) ISDIGIT_(CH(off))
-#define ISXDIGIT(off) ISXDIGIT_(CH(off))
-#define ISALNUM(off) ISALNUM_(CH(off))
+#define ISIN_(ch, ch_min, ch_max) ((ch_min) <= (unsigned)(ch) && (unsigned)(ch) <= (ch_max))
+#define ISANYOF_(ch, palette) (md_strchr((palette), (ch)) != NULL)
+#define ISANYOF2_(ch, ch1, ch2) ((ch) == (ch1) || (ch) == (ch2))
+#define ISANYOF3_(ch, ch1, ch2, ch3) ((ch) == (ch1) || (ch) == (ch2) || (ch) == (ch3))
+#define ISASCII_(ch) ((unsigned)(ch) <= 127)
+#define ISBLANK_(ch) (ISANYOF2_((ch), _T(' '), _T('\t')))
+#define ISNEWLINE_(ch) (ISANYOF2_((ch), _T('\r'), _T('\n')))
+#define ISWHITESPACE_(ch) (ISBLANK_(ch) || ISANYOF2_((ch), _T('\v'), _T('\f')))
+#define ISCNTRL_(ch) ((unsigned)(ch) <= 31 || (unsigned)(ch) == 127)
+#define ISPUNCT_(ch) (ISIN_(ch, 33, 47) || ISIN_(ch, 58, 64) || ISIN_(ch, 91, 96) || ISIN_(ch, 123, 126))
+#define ISUPPER_(ch) (ISIN_(ch, _T('A'), _T('Z')))
+#define ISLOWER_(ch) (ISIN_(ch, _T('a'), _T('z')))
+#define ISALPHA_(ch) (ISUPPER_(ch) || ISLOWER_(ch))
+#define ISDIGIT_(ch) (ISIN_(ch, _T('0'), _T('9')))
+#define ISXDIGIT_(ch) (ISDIGIT_(ch) || ISIN_(ch, _T('A'), _T('F')) || ISIN_(ch, _T('a'), _T('f')))
+#define ISALNUM_(ch) (ISALPHA_(ch) || ISDIGIT_(ch))
+
+#define ISANYOF(off, palette) ISANYOF_(CH(off), (palette))
+#define ISANYOF2(off, ch1, ch2) ISANYOF2_(CH(off), (ch1), (ch2))
+#define ISANYOF3(off, ch1, ch2, ch3) ISANYOF3_(CH(off), (ch1), (ch2), (ch3))
+#define ISASCII(off) ISASCII_(CH(off))
+#define ISBLANK(off) ISBLANK_(CH(off))
+#define ISNEWLINE(off) ISNEWLINE_(CH(off))
+#define ISWHITESPACE(off) ISWHITESPACE_(CH(off))
+#define ISCNTRL(off) ISCNTRL_(CH(off))
+#define ISPUNCT(off) ISPUNCT_(CH(off))
+#define ISUPPER(off) ISUPPER_(CH(off))
+#define ISLOWER(off) ISLOWER_(CH(off))
+#define ISALPHA(off) ISALPHA_(CH(off))
+#define ISDIGIT(off) ISDIGIT_(CH(off))
+#define ISXDIGIT(off) ISXDIGIT_(CH(off))
+#define ISALNUM(off) ISALNUM_(CH(off))
static inline const CHAR*
md_strchr(const CHAR* str, CHAR ch)
{
@@ -1428,7 +1430,7 @@ md_build_attribute(MD_CTX* ctx, const CHAR* raw_text, SZ raw_size,
* without any malloc(). */
is_trivial = TRUE;
for(raw_off = 0; raw_off < raw_size; raw_off++) {
- if(ISANYOF2_(raw_text[raw_off], _T('\\'), _T('&'))) {
+ if(ISANYOF3_(raw_text[raw_off], _T('\\'), _T('&'), _T('\0'))) {
is_trivial = FALSE;
break;
}
@@ -1455,6 +1457,14 @@ md_build_attribute(MD_CTX* ctx, const CHAR* raw_text, SZ raw_size,
off = 0;
while(raw_off < raw_size) {
+ if(raw_text[raw_off] == _T('\0')) {
+ MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_NULLCHAR, off));
+ memcpy(build->text + off, raw_text + raw_off, 1);
+ off++;
+ raw_off++;
+ continue;
+ }
+
if(raw_text[raw_off] == _T('&')) {
OFF ent_end;
diff --git a/md4c/md4c.h b/md4c/md4c.h
index 46acbee..1d67073 100644
--- a/md4c/md4c.h
+++ b/md4c/md4c.h
@@ -201,7 +201,7 @@ typedef enum MD_ALIGN {
* Note that these conditions are guaranteed:
* -- substr_offsets[0] == 0
* -- substr_offsets[LAST+1] == size
- * -- Only MD_TEXT_NORMAL and MD_TEXT_ENTITY substrings can appear.
+ * -- Only MD_TEXT_NORMAL, MD_TEXT_ENTITY, MD_TEXT_NULLCHAR substrings can appear.
*/
typedef struct MD_ATTRIBUTE {
const MD_CHAR* text;