Commit 5f47a5cbfa4bc83390185d7b9fa8af7f4cfb809f

Martin Mitas 2017-09-11T10:55:21

md_build_attribute: Handle U+0000 character.

diff --git a/md2html/render_html.c b/md2html/render_html.c
index ca28089..8f0b22c 100644
--- a/md2html/render_html.c
+++ b/md2html/render_html.c
@@ -245,6 +245,7 @@ render_attribute(MD_RENDER_HTML* r, const MD_ATTRIBUTE* attr,
         const MD_CHAR* text = attr->text + off;
 
         switch(type) {
+            case MD_TEXT_NULLCHAR:  render_utf8_codepoint(r, 0x0000, render_text); break;
             case MD_TEXT_ENTITY:    render_entity(r, text, size, fn_append); break;
             default:                fn_append(r, text, size); break;
         }
diff --git a/md4c/md4c.c b/md4c/md4c.c
index 7b2c186..8e07945 100644
--- a/md4c/md4c.c
+++ b/md4c/md4c.c
@@ -250,36 +250,38 @@ struct MD_VERBATIMLINE_tag {
 
 /* Character classification.
  * Note we assume ASCII compatibility of code points < 128 here. */
-#define ISIN_(ch, ch_min, ch_max)   ((ch_min) <= (unsigned)(ch) && (unsigned)(ch) <= (ch_max))
-#define ISANYOF_(ch, palette)       (md_strchr((palette), (ch)) != NULL)
-#define ISANYOF2_(ch, ch1, ch2)     ((ch) == (ch1) || (ch) == (ch2))
-#define ISASCII_(ch)                ((unsigned)(ch) <= 127)
-#define ISBLANK_(ch)                (ISANYOF2_((ch), _T(' '), _T('\t')))
-#define ISNEWLINE_(ch)              (ISANYOF2_((ch), _T('\r'), _T('\n')))
-#define ISWHITESPACE_(ch)           (ISBLANK_(ch) || ISANYOF2_((ch), _T('\v'), _T('\f')))
-#define ISCNTRL_(ch)                ((unsigned)(ch) <= 31 || (unsigned)(ch) == 127)
-#define ISPUNCT_(ch)                (ISIN_(ch, 33, 47) || ISIN_(ch, 58, 64) || ISIN_(ch, 91, 96) || ISIN_(ch, 123, 126))
-#define ISUPPER_(ch)                (ISIN_(ch, _T('A'), _T('Z')))
-#define ISLOWER_(ch)                (ISIN_(ch, _T('a'), _T('z')))
-#define ISALPHA_(ch)                (ISUPPER_(ch) || ISLOWER_(ch))
-#define ISDIGIT_(ch)                (ISIN_(ch, _T('0'), _T('9')))
-#define ISXDIGIT_(ch)               (ISDIGIT_(ch) || ISIN_(ch, _T('A'), _T('F')) || ISIN_(ch, _T('a'), _T('f')))
-#define ISALNUM_(ch)                (ISALPHA_(ch) || ISDIGIT_(ch))
-
-#define ISANYOF(off, palette)       ISANYOF_(CH(off), (palette))
-#define ISANYOF2(off, ch1, ch2)     ISANYOF2_(CH(off), (ch1), (ch2))
-#define ISASCII(off)                ISASCII_(CH(off))
-#define ISBLANK(off)                ISBLANK_(CH(off))
-#define ISNEWLINE(off)              ISNEWLINE_(CH(off))
-#define ISWHITESPACE(off)           ISWHITESPACE_(CH(off))
-#define ISCNTRL(off)                ISCNTRL_(CH(off))
-#define ISPUNCT(off)                ISPUNCT_(CH(off))
-#define ISUPPER(off)                ISUPPER_(CH(off))
-#define ISLOWER(off)                ISLOWER_(CH(off))
-#define ISALPHA(off)                ISALPHA_(CH(off))
-#define ISDIGIT(off)                ISDIGIT_(CH(off))
-#define ISXDIGIT(off)               ISXDIGIT_(CH(off))
-#define ISALNUM(off)                ISALNUM_(CH(off))
+#define ISIN_(ch, ch_min, ch_max)       ((ch_min) <= (unsigned)(ch) && (unsigned)(ch) <= (ch_max))
+#define ISANYOF_(ch, palette)           (md_strchr((palette), (ch)) != NULL)
+#define ISANYOF2_(ch, ch1, ch2)         ((ch) == (ch1) || (ch) == (ch2))
+#define ISANYOF3_(ch, ch1, ch2, ch3)    ((ch) == (ch1) || (ch) == (ch2) || (ch) == (ch3))
+#define ISASCII_(ch)                    ((unsigned)(ch) <= 127)
+#define ISBLANK_(ch)                    (ISANYOF2_((ch), _T(' '), _T('\t')))
+#define ISNEWLINE_(ch)                  (ISANYOF2_((ch), _T('\r'), _T('\n')))
+#define ISWHITESPACE_(ch)               (ISBLANK_(ch) || ISANYOF2_((ch), _T('\v'), _T('\f')))
+#define ISCNTRL_(ch)                    ((unsigned)(ch) <= 31 || (unsigned)(ch) == 127)
+#define ISPUNCT_(ch)                    (ISIN_(ch, 33, 47) || ISIN_(ch, 58, 64) || ISIN_(ch, 91, 96) || ISIN_(ch, 123, 126))
+#define ISUPPER_(ch)                    (ISIN_(ch, _T('A'), _T('Z')))
+#define ISLOWER_(ch)                    (ISIN_(ch, _T('a'), _T('z')))
+#define ISALPHA_(ch)                    (ISUPPER_(ch) || ISLOWER_(ch))
+#define ISDIGIT_(ch)                    (ISIN_(ch, _T('0'), _T('9')))
+#define ISXDIGIT_(ch)                   (ISDIGIT_(ch) || ISIN_(ch, _T('A'), _T('F')) || ISIN_(ch, _T('a'), _T('f')))
+#define ISALNUM_(ch)                    (ISALPHA_(ch) || ISDIGIT_(ch))
+
+#define ISANYOF(off, palette)           ISANYOF_(CH(off), (palette))
+#define ISANYOF2(off, ch1, ch2)         ISANYOF2_(CH(off), (ch1), (ch2))
+#define ISANYOF3(off, ch1, ch2, ch3)    ISANYOF3_(CH(off), (ch1), (ch2), (ch3))
+#define ISASCII(off)                    ISASCII_(CH(off))
+#define ISBLANK(off)                    ISBLANK_(CH(off))
+#define ISNEWLINE(off)                  ISNEWLINE_(CH(off))
+#define ISWHITESPACE(off)               ISWHITESPACE_(CH(off))
+#define ISCNTRL(off)                    ISCNTRL_(CH(off))
+#define ISPUNCT(off)                    ISPUNCT_(CH(off))
+#define ISUPPER(off)                    ISUPPER_(CH(off))
+#define ISLOWER(off)                    ISLOWER_(CH(off))
+#define ISALPHA(off)                    ISALPHA_(CH(off))
+#define ISDIGIT(off)                    ISDIGIT_(CH(off))
+#define ISXDIGIT(off)                   ISXDIGIT_(CH(off))
+#define ISALNUM(off)                    ISALNUM_(CH(off))
 static inline const CHAR*
 md_strchr(const CHAR* str, CHAR ch)
 {
@@ -1428,7 +1430,7 @@ md_build_attribute(MD_CTX* ctx, const CHAR* raw_text, SZ raw_size,
      * without any malloc(). */
     is_trivial = TRUE;
     for(raw_off = 0; raw_off < raw_size; raw_off++) {
-        if(ISANYOF2_(raw_text[raw_off], _T('\\'), _T('&'))) {
+        if(ISANYOF3_(raw_text[raw_off], _T('\\'), _T('&'), _T('\0'))) {
             is_trivial = FALSE;
             break;
         }
@@ -1455,6 +1457,14 @@ md_build_attribute(MD_CTX* ctx, const CHAR* raw_text, SZ raw_size,
         off = 0;
 
         while(raw_off < raw_size) {
+            if(raw_text[raw_off] == _T('\0')) {
+                MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_NULLCHAR, off));
+                memcpy(build->text + off, raw_text + raw_off, 1);
+                off++;
+                raw_off++;
+                continue;
+            }
+
             if(raw_text[raw_off] == _T('&')) {
                 OFF ent_end;
 
diff --git a/md4c/md4c.h b/md4c/md4c.h
index 46acbee..1d67073 100644
--- a/md4c/md4c.h
+++ b/md4c/md4c.h
@@ -201,7 +201,7 @@ typedef enum MD_ALIGN {
  * Note that these conditions are guaranteed:
  *  -- substr_offsets[0] == 0
  *  -- substr_offsets[LAST+1] == size
- *  -- Only MD_TEXT_NORMAL and MD_TEXT_ENTITY substrings can appear.
+ *  -- Only MD_TEXT_NORMAL, MD_TEXT_ENTITY, MD_TEXT_NULLCHAR substrings can appear.
  */
 typedef struct MD_ATTRIBUTE {
     const MD_CHAR* text;