Simplify parsing of tables (#97) We do so by removing the function md_is_table_row(). md_is_table_row() did some crazy inline parsing to detect whether the line contains at least one pipe which is not inside a code span or other high-priority inline element. This was very complicated under the hood and to was actually breaking the clean design which separates block analysis parse and inline analysis of each block contents. We now just use the table underline for determining the block is table and its properties like e.g. the column count. This means a paragraph now cannot interrupt a table. This is a change in a behavior but likely acceptable one as it actually brings the behavior closer to behavior of tables in cmark-gfm in this regard. Last but not least, it seems to prevent adoption of other useful features, for about that, see the discussion in PR #92.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4c11aa1..a5104e5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,13 @@
Changes:
+ * Parsing of tables (with `MD_FLAG_TABLES`) is now closer to the way how
+ cmark-gfm parses tables as we do not require every row of the table to
+ contain a pipe `|` anymore.
+
+ As a consequence, paragraphs now cannot interrupt tables. A paragraph which
+ follows the table has to be delimited with a blank line.
+
* With `MD_FLAG_LATEXMATHSPANS`, LaTeX math spans (`$...$`) and LaTeX display
math spans (`$$...$$`) are recognized. (Note though that the HTML renderer
outputs them verbatim.) Thanks for the feature belong to [Tilman Roeder](
diff --git a/md4c/md4c.c b/md4c/md4c.c
index 2b4bfd5..ecd9656 100644
--- a/md4c/md4c.c
+++ b/md4c/md4c.c
@@ -4251,7 +4251,7 @@ md_process_table_row(MD_CTX* ctx, MD_BLOCKTYPE cell_type, OFF beg, OFF end,
{
MD_LINE line;
OFF* pipe_offs = NULL;
- int i, j, n;
+ int i, j, k, n;
int ret = 0;
line.beg = beg;
@@ -4263,32 +4263,32 @@ md_process_table_row(MD_CTX* ctx, MD_BLOCKTYPE cell_type, OFF beg, OFF end,
/* We have to remember the cell boundaries in local buffer because
* ctx->marks[] shall be reused during cell contents processing. */
- n = ctx->n_table_cell_boundaries;
+ n = ctx->n_table_cell_boundaries + 2;
pipe_offs = (OFF*) malloc(n * sizeof(OFF));
if(pipe_offs == NULL) {
MD_LOG("malloc() failed.");
ret = -1;
goto abort;
}
- for(i = TABLECELLBOUNDARIES.head, j = 0; i >= 0; i = ctx->marks[i].next) {
+ j = 0;
+ pipe_offs[j++] = beg;
+ for(i = TABLECELLBOUNDARIES.head; i >= 0; i = ctx->marks[i].next) {
MD_MARK* mark = &ctx->marks[i];
- pipe_offs[j++] = mark->beg;
+ pipe_offs[j++] = mark->end;
}
+ pipe_offs[j++] = end+1;
/* Process cells. */
MD_ENTER_BLOCK(MD_BLOCK_TR, NULL);
- j = 0;
- if(beg < pipe_offs[0] && j < col_count)
- MD_CHECK(md_process_table_cell(ctx, cell_type, align[j++], beg, pipe_offs[0]));
- for(i = 0; i < n-1 && j < col_count; i++)
- MD_CHECK(md_process_table_cell(ctx, cell_type, align[j++], pipe_offs[i]+1, pipe_offs[i+1]));
- if(pipe_offs[n-1] < end-1 && j < col_count)
- MD_CHECK(md_process_table_cell(ctx, cell_type, align[j++], pipe_offs[n-1]+1, end));
+ k = 0;
+ for(i = 0; i < j-1 && k < col_count; i++) {
+ if(pipe_offs[i] < pipe_offs[i+1]-1)
+ MD_CHECK(md_process_table_cell(ctx, cell_type, align[k++], pipe_offs[i], pipe_offs[i+1]-1));
+ }
/* Make sure we call enough table cells even if the current table contains
* too few of them. */
- while(j < col_count)
- MD_CHECK(md_process_table_cell(ctx, cell_type, align[j++], 0, 0));
-
+ while(k < col_count)
+ MD_CHECK(md_process_table_cell(ctx, cell_type, align[k++], 0, 0));
MD_LEAVE_BLOCK(MD_BLOCK_TR, NULL);
abort:
@@ -4340,38 +4340,6 @@ abort:
return ret;
}
-static int
-md_is_table_row(MD_CTX* ctx, OFF beg, OFF* p_end)
-{
- MD_LINE line;
- int i;
- int ret = FALSE;
-
- line.beg = beg;
- line.end = beg;
-
- /* Find end of line. */
- while(line.end < ctx->size && !ISNEWLINE(line.end))
- line.end++;
-
- MD_CHECK(md_analyze_inlines(ctx, &line, 1, TRUE));
-
- if(TABLECELLBOUNDARIES.head >= 0) {
- if(p_end != NULL)
- *p_end = line.end;
- ret = TRUE;
- }
-
-abort:
- /* Free any temporary memory blocks stored within some dummy marks. */
- for(i = PTR_CHAIN.head; i >= 0; i = ctx->marks[i].next)
- free(md_mark_get_ptr(ctx, i));
- PTR_CHAIN.head = -1;
- PTR_CHAIN.tail = -1;
-
- return ret;
-}
-
/**************************
*** Processing Block ***
@@ -5803,9 +5771,7 @@ md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end,
}
/* Check whether we are table continuation. */
- if(pivot_line->type == MD_LINE_TABLE && md_is_table_row(ctx, off, &off) &&
- n_parents == ctx->n_containers)
- {
+ if(pivot_line->type == MD_LINE_TABLE && n_parents == ctx->n_containers) {
line->type = MD_LINE_TABLE;
break;
}
@@ -5859,8 +5825,7 @@ md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end,
unsigned col_count;
if(ctx->current_block != NULL && ctx->current_block->n_lines == 1 &&
- md_is_table_underline(ctx, off, &off, &col_count) &&
- md_is_table_row(ctx, pivot_line->beg, NULL))
+ md_is_table_underline(ctx, off, &off, &col_count))
{
line->data = col_count;
line->type = MD_LINE_TABLEUNDERLINE;
diff --git a/test/tables.txt b/test/tables.txt
index b883e82..de61f7d 100644
--- a/test/tables.txt
+++ b/test/tables.txt
@@ -85,8 +85,9 @@ quux | quuz
</table>
````````````````````````````````
-However for one-column table, at least one of those has to be used, otherwise
-it would be parsed as a Setext title followed by paragraph.
+However for one-column table, at least one pipe has to be used in the table
+header underline, otherwise it would be parsed as a Setext title followed by
+a paragraph.
```````````````````````````````` example
Column 1
@@ -141,8 +142,7 @@ Lorem ipsum dolor sit amet.
| quux | quuz</p>
````````````````````````````````
-But paragraph or other block can interrupt a table as a line without any pipe
-ends the table.
+Similarly, paragraph cannot interrupt a table:
```````````````````````````````` example
Column 1 | Column 2
@@ -160,15 +160,15 @@ Lorem ipsum dolor sit amet.
<tr><td>foo</td><td>bar</td></tr>
<tr><td>baz</td><td>qux</td></tr>
<tr><td>quux</td><td>quuz</td></tr>
+<tr><td>Lorem ipsum dolor sit amet.</td><td></td></tr>
</tbody>
</table>
-<p>Lorem ipsum dolor sit amet.</p>
````````````````````````````````
-The ruling line between head and body of the table must include the same amount
-of cells as the line with column names, and each cell has to consist of three
-dash (`-`) characters. However first, last or both dashes may be replaced with
-colon to denote column alignment.
+The underline of the table is crucial for recognition of the table, count of
+its columns and their alignment: The line has to contain at least one pipe,
+and it has provide at least three dash (`-`) characters for every column in
+the table.
Thus this is not a table because there are too few dashes for Column 2.
@@ -186,7 +186,9 @@ Thus this is not a table because there are too few dashes for Column 2.
| quux | quuz</p>
````````````````````````````````
-And this is a table where each of the four column uses different alignment.
+The first, the last or both the first and the last dash in each column
+underline can be replaced with a colon (`:`) to request left, right or middle
+alignment of the respective column:
```````````````````````````````` example
| Column 1 | Column 2 | Column 3 | Column 4 |
@@ -263,12 +265,11 @@ quux | quuz
<tr><th>Column 1</th><th>Column 2</th></tr>
</thead>
<tbody>
+<tr><td><code>foo | bar</code></td><td></td></tr>
+<tr><td>baz</td><td>qux</td></tr>
+<tr><td>quux</td><td>quuz</td></tr>
</tbody>
</table>
-<p><code>foo | bar</code>
-baz | qux
-quux | quuz</p>
-
````````````````````````````````