/* kc3
* Copyright from 2022 to 2025 kmx.io <contact@kmx.io>
*
* Permission is hereby granted to use this software granted the above
* copyright notice and this permission paragraph are included in all
* copies and substantial portions of this software.
*
* THIS SOFTWARE IS PROVIDED "AS-IS" WITHOUT ANY GUARANTEE OF
* PURPOSE AND PERFORMANCE. IN NO EVENT WHATSOEVER SHALL THE
* AUTHOR BE CONSIDERED LIABLE FOR THE USE AND PERFORMANCE OF
* THIS SOFTWARE.
*/
#include "../libkc3/kc3.h"
#include "pdf_buf_parse.h"
#include "pdf_file.h"
// TODO: use xref instead
sw pdf_buf_ignore_until_token (s_buf *buf, const char *token)
{
sw r;
sw result = 0;
assert(buf);
assert(token);
while (1) {
if ((r = pdf_buf_parse_token(buf, token)) < 0)
return r;
if (r) {
result += r;
break;
}
if ((r = buf_ignore(buf, 1)) <= 0)
return r;
result += r;
}
return result;
}
sw pdf_buf_parse (s_buf *buf, s_tag *dest)
{
sw r;
sw result = 0;
s_tag tmp = {0};
if ((r = buf_ignore_spaces(buf)) < 0)
return r;
result += r;
if ((r = buf_parse_comments(buf)) < 0)
goto ok;
result += r;
if ((r = pdf_buf_parse_indirect_object(buf, &tmp.data.tuple)) > 0) {
result += r;
tmp.type = TAG_TUPLE;
goto ok;
}
if ((r = pdf_buf_parse_bool(buf, &tmp.data.bool_)) > 0) {
result += r;
tmp.type = TAG_BOOL;
goto ok;
}
if ((r = pdf_buf_parse_null(buf, &tmp)) > 0) {
result += r;
goto ok;
}
if ((r = pdf_buf_parse_number(buf, &tmp)) > 0) {
result += r;
goto ok;
}
if ((r = pdf_buf_parse_dictionnary(buf, &tmp.data.map)) > 0) {
result += r;
tmp.type = TAG_MAP;
goto ok;
}
if ((r = pdf_buf_parse_string(buf, &tmp)) > 0) {
result += r;
goto ok;
}
if ((r = pdf_buf_parse_name(buf, &tmp.data.psym)) > 0) {
tmp.type = TAG_PSYM;
result += r;
goto ok;
}
if ((r = pdf_buf_parse_array(buf, &tmp.data.plist)) > 0) {
tmp.type = TAG_PLIST;
result += r;
goto ok;
}
ok:
if (result)
*dest = tmp;
return result;
}
sw pdf_buf_parse_array (s_buf *buf, p_list *dest)
{
bool end;
sw r;
sw result = 0;
p_list *tail;
p_list tmp = NULL;
s_buf_save save = {0};
assert(buf);
assert(dest);
buf_save_init(buf, &save);
if ((r = buf_read_1(buf, "[")) <= 0)
goto clean;
result += r;
tail = &tmp;
while (1) {
if ((r = buf_read_1(buf, "]")) < 0)
goto restore;
result += r;
if (r) {
result += r;
if ((r = pdf_buf_parse_object_end(buf, &end)) > 0)
result += r;
if (! end)
goto restore;
break;
}
if (! (*tail = list_new(NULL))) {
r = -2;
goto restore;
}
if ((r = pdf_buf_parse(buf, &(*tail)->tag)) <= 0)
goto restore;
result += r;
tail = &(*tail)->next.data.plist;
}
*dest = tmp;
r = result;
goto clean;
restore:
list_delete_all(tmp);
buf_save_restore_rpos(buf, &save);
clean:
buf_save_clean(buf, &save);
return r;
}
sw pdf_buf_parse_bool (s_buf *buf, bool *dest)
{
bool end;
sw r;
sw result = 0;
s_buf_save save = {0};
bool tmp;
assert(buf);
assert(dest);
buf_save_init(buf, &save);
if ((r = buf_read_1(buf, "true")) > 0)
tmp = true;
else if ((r = buf_read_1(buf, "false")) > 0)
tmp = false;
else
goto clean;
result += r;
if ((r = pdf_buf_parse_object_end(buf, &end)) > 0)
result += r;
if (! end) {
r = 0;
goto restore;
}
result += r;
*dest = tmp;
r = result;
goto clean;
restore:
buf_save_restore_rpos(buf, &save);
clean:
buf_save_clean(buf, &save);
return r;
}
sw pdf_buf_parse_comment (s_buf *buf)
{
sw r;
sw result = 0;
s_str str = {0};
assert(buf);
if ((r = buf_read_1(buf, "%")) <= 0)
return r;
result += r;
if ((r = buf_read_until_1_into_str(buf, "\n", &str)) < 0)
return result;
str_clean(&str);
return result;
}
sw pdf_buf_parse_comments (s_buf *buf)
{
sw r1 = 1;
sw r2 = 1;
sw result = 0;
assert(buf);
while (r1 | r2) {
if ((r1 = pdf_buf_parse_comment(buf)) < 0)
return r1;
result += r1;
if ((r2 = buf_ignore_spaces(buf)) < 0)
return r2;
result += r2;
}
return result;
}
sw pdf_buf_parse_dictionnary (s_buf *buf, s_map *dest)
{
p_list keys = NULL;
p_list *keys_tail = &keys;
const s_sym *name;
sw r;
sw result = 0;
s_buf_save save;
s_map tmp = {0};
p_list values = NULL;
p_list *values_tail = &values;
buf_save_init(buf, &save);
if ((r = pdf_buf_parse_token(buf, "<<")) <= 0)
goto clean;
result += r;
while (1) {
if ((r = pdf_buf_parse_token(buf, ">>")) < 0)
goto clean;
if (r) {
result += r;
goto ok;
}
if ((r = pdf_buf_parse_name(buf, &name)) <= 0)
goto clean;
result += r;
if (! (*keys_tail = list_new_psym(name, NULL))) {
r = -1;
goto clean;
}
if (! (*values_tail = list_new(NULL))) {
r = -1;
goto clean;
}
if ((r = pdf_buf_parse(buf, &(*values_tail)->tag)) <= 0)
goto clean;
result += r;
keys_tail = &(*keys_tail)->next.data.plist;
values_tail = &(*values_tail)->next.data.plist;
}
ok:
if (! map_init_from_lists(&tmp, keys, values)) {
err_puts("pdf_buf_parse_dictionnary: map_init_from_lists");
assert(! "pdf_buf_parse_dictionnary: map_init_from_lists");
r = -1;
goto restore;
}
*dest = tmp;
r = result;
goto clean;
restore:
buf_save_restore_rpos(buf, &save);
clean:
list_delete_all(values);
list_delete_all(keys);
buf_save_clean(buf, &save);
return r;
}
sw pdf_buf_parse_file_header (s_buf *buf, s_str *dest)
{
u16 i;
sw r;
sw result = 0;
s_buf_save save;
s_str tmp = {0};
buf_save_init(buf, &save);
if ((r = buf_read_1(buf, "%PDF-")) <= 0)
goto clean;
result += r;
if ((r = buf_parse_u16_decimal(buf, &i)) <= 0 || i < 1) {
err_puts("pdf_buf_parse_file_header: buf_parse_u16_decimal 1");
assert(! "pdf_buf_parse_file_header: buf_parse_u16_decimal 1");
goto restore;
}
result += r;
if ((r = buf_read_1(buf, ".")) <= 0) {
err_puts("pdf_buf_parse_file_header: buf_read_1 2");
assert(! "pdf_buf_parse_file_header: buf_read_1 2");
goto restore;
}
result += r;
if ((r = buf_parse_u16_decimal(buf, &i)) <= 0) {
err_puts("pdf_buf_parse_file_header: buf_parse_u16_decimal");
assert(! "pdf_buf_parse_file_header: buf_parse_u16_decimal");
goto restore;
}
result += r;
if (! str_init_alloc_copy(&tmp, result, buf->ptr.pchar + save.rpos)) {
r = -1;
goto clean;
}
*dest = tmp;
r = result;
goto clean;
restore:
buf_save_restore_rpos(buf, &save);
clean:
buf_save_clean(buf, &save);
return r;
}
sw pdf_buf_parse_file (s_buf *buf, s_pdf_file *dest)
{
sw r;
sw result = 0;
s_pdf_file tmp = {0};
if ((r = pdf_buf_parse_file_header(buf, &tmp.header)) <= 0) {
err_puts("pdf_buf_parse_file: pdf_buf_parse_file_header");
assert(! "pdf_buf_parse_file: pdf_buf_parse_file_header");
return r;
}
result += r;
if ((r = pdf_buf_parse_trailer(buf, &tmp.trailer) <= 0)) {
err_puts("pdf_buf_parse_file: pdf_buf_parse_trailer");
assert(! "pdf_buf_parse_file: pdf_buf_parse_trailer");
return r;
}
result += r;
if ((r = buf_seek(buf, tmp.trailer.startxref, SEEK_SET)) < 0 ||
(u64) r != tmp.trailer.startxref) {
err_puts("pdf_buf_parse_file: buf_seek");
assert(! "pdf_buf_parse_file: buf_seek");
return -1;
}
if ((r = pdf_buf_parse_xref(buf, &tmp.xref)) <= 0) {
err_puts("pdf_buf_parse_file: pdf_buf_parse_xref");
assert(! "pdf_buf_parse_file: pdf_buf_parse_xref");
pdf_file_clean(&tmp);
return -1;
}
result += r;
*dest = tmp;
return result;
}
sw pdf_buf_parse_float (s_buf *buf, f64 *dest)
{
u8 digit;
bool end;
f64 i;
bool negative = false;
sw r;
sw result = 0;
s_buf_save save;
f64 tmp;
assert(buf);
assert(dest);
buf_save_init(buf, &save);
if ((r = buf_read_1(buf, "+")) < 0)
goto clean;
if (r)
result += r;
else {
if ((r = buf_read_1(buf, "-")) < 0)
goto clean;
if (r) {
negative = true;
result += r;
}
}
if ((r = buf_parse_digit_dec(buf, &digit)) <= 0)
goto restore;
result += r;
tmp = digit;
while ((r = buf_parse_digit_dec(buf, &digit)) > 0) {
result += r;
tmp = tmp * 10 + digit;
}
if (r < 0 ||
(r = buf_read_1(buf, ".")) <= 0)
goto ok;
result += r;
i = 10;
while ((r = buf_parse_digit_dec(buf, &digit)) > 0) {
result += r;
tmp += (f32) digit / i;
i *= 10;
}
if ((r = pdf_buf_parse_object_end(buf, &end)) > 0)
result += r;
if (! end) {
r = -1;
goto restore;
}
result += r;
ok:
if (negative)
tmp = -tmp;
*dest = tmp;
r = result;
goto clean;
restore:
buf_save_restore_rpos(buf, &save);
clean:
buf_save_clean(buf, &save);
return r;
}
sw pdf_buf_parse_indirect_object (s_buf *buf, s_tuple *dest)
{
s_tag generation_number = {0};
s_tag object_number = {0};
sw r;
sw result = 0;
s_buf_save save = {0};
const s_sym *sym_indirect_object = sym_1("indirect_object");
s_tuple tmp = {0};
buf_save_init(buf, &save);
if ((r = pdf_buf_parse_integer(buf, &object_number)) <= 0)
goto clean;
result += r;
if ((r = pdf_buf_parse_integer(buf, &generation_number)) <= 0)
goto restore;
result += r;
if ((r = pdf_buf_parse_token(buf, "obj")) < 0)
goto restore;
if (r) {
result += r;
if (! tuple_init(&tmp, 4)) {
r = -1;
goto restore;
}
if ((r = pdf_buf_parse_stream(buf, tmp.tag + 3)) < 0)
goto restore;
if (! r &&
(r = pdf_buf_parse(buf, tmp.tag + 3)) <= 0)
goto restore;
goto ok;
}
if ((r = pdf_buf_parse_token(buf, "R")) < 0)
goto restore;
if (r) {
result += r;
if (! tuple_init(&tmp, 3)) {
r = -1;
goto restore;
}
goto ok;
}
goto restore;
ok:
tag_init_psym(tmp.tag, sym_indirect_object);
tmp.tag[1] = object_number;
tmp.tag[2] = generation_number;
*dest = tmp;
r = result;
goto clean;
restore:
tuple_clean(&tmp);
tag_clean(&object_number);
tag_clean(&generation_number);
buf_save_restore_rpos(buf, &save);
clean:
buf_save_clean(buf, &save);
return r;
}
sw pdf_buf_parse_integer (s_buf *buf, s_tag *dest)
{
bool end;
bool negative = false;
sw r;
sw result = 0;
s_buf_save save = {0};
s_tag tmp = {0};
assert(buf);
assert(dest);
buf_save_init(buf, &save);
if ((r = buf_read_1(buf, "+")) < 0)
goto clean;
if (! r) {
if ((r = buf_read_1(buf, "-")) < 0)
goto clean;
if (r)
negative = true;
}
result += r;
if ((r = buf_parse_integer_decimal(buf, negative,
&tmp.data.integer)) <= 0)
goto restore;
result += r;
if ((r = pdf_buf_parse_object_end(buf, &end)) > 0)
result += r;
if (! end) {
r = -1;
goto restore;
}
tmp.type = TAG_INTEGER;
tag_integer_reduce(&tmp);
*dest = tmp;
r = result;
goto clean;
restore:
buf_save_restore_rpos(buf, &save);
clean:
buf_save_clean(buf, &save);
return r;
}
sw pdf_buf_parse_name (s_buf *buf, p_sym *dest)
{
char a[BUF_SIZE];
character c;
u8 d;
u8 d1;
bool end;
sw r;
sw result = 0;
s_buf_save save = {0};
s_str str = {0};
p_sym tmp = {0};
s_buf tmp_buf;
assert(buf);
assert(dest);
buf_save_init(buf, &save);
if ((r = buf_read_1(buf, "/")) <= 0)
goto clean;
result += r;
buf_init(&tmp_buf, false, sizeof(a) - 1, a);
while (1) {
if ((r = pdf_buf_parse_object_end(buf, &end)) > 0)
result += r;
if (end)
goto ok;
if ((r = buf_read_character_utf8(buf, &c)) <= 0)
break;
result += r;
if (c == '#') {
if ((r = buf_read_character_utf8(buf, &c)) <= 0)
break;
result += r;
if (c >= '0' && c <= '9')
d = c - '0';
else if (c >= 'A' && c <= 'F')
d = c - 'A';
else if (c >= 'a' && c <= 'f')
d = c - 'a';
else {
r = 0;
goto restore;
}
if ((r = buf_read_character_utf8(buf, &c)) <= 0)
break;
result += r;
if (c >= '0' && c <= '9')
d1 = c - '0';
else if (c >= 'A' && c <= 'F')
d1 = c - 'A';
else if (c >= 'a' && c <= 'f')
d1 = c - 'a';
else {
r = 0;
goto restore;
}
c = d * 0x10 + d1;
}
if ((r = buf_write_character_utf8(&tmp_buf, c)) <= 0)
goto restore;
}
ok:
if (! buf_read_to_str(&tmp_buf, &str)) {
r = -2;
goto clean;
}
if (! (tmp = str_to_sym(&str))) {
r = -2;
str_clean(&str);
goto clean;
}
str_clean(&str);
*dest = tmp;
r = result;
goto clean;
restore:
buf_save_restore_rpos(buf, &save);
clean:
buf_save_clean(buf, &save);
return r;
}
sw pdf_buf_parse_null (s_buf *buf, s_tag *dest)
{
sw r;
if ((r = pdf_buf_parse_token(buf, "null")) > 0)
tag_init_void(dest);
return r;
}
sw pdf_buf_parse_number (s_buf *buf, s_tag *dest)
{
sw r;
s_tag tmp = {0};
assert(buf);
assert(dest);
if ((r = pdf_buf_parse_integer(buf, &tmp)) > 0)
goto ok;
if ((r = pdf_buf_parse_float(buf, &tmp.data.f64)) < 0) {
tmp.type = TAG_F64;
goto ok;
}
return 0;
ok:
*dest = tmp;
return r;
}
sw pdf_buf_parse_object_end (s_buf *buf, bool *end)
{
character c;
sw r;
sw result = 0;
if ((r = buf_ignore_spaces(buf)) < 0)
goto end;
result += r;
if ((r = pdf_buf_parse_comments(buf)) < 0)
goto end;
result += r;
if ((r = buf_peek_character_utf8(buf, &c)) < 0)
goto end;
if (r && pdf_character_is_delimiter(c))
goto end;
*end = result > 0 ? true : false;
return result;
end:
*end = true;
return result;
}
sw pdf_buf_parse_rewind_to_trailer (s_buf *buf)
{
bool end;
sw pos;
sw r;
sw result = 0;
assert(buf);
if (false) {
err_write_1("pdf_buf_parse_rewind_to_trailer: rpos = ");
err_inspect_sw_decimal(buf->rpos);
err_write_1("\n");
}
if (buf->rpos < 9)
return -1;
buf->rpos -= 9;
while (1) {
if (! buf->rpos)
return -1;
buf->rpos--;
pos = buf->rpos;
if ((r = pdf_buf_parse_object_end(buf, &end)) > 0 &&
(result += r) &&
end &&
(r = pdf_buf_parse_token(buf, "trailer")) > 0) {
result += r;
return result;
}
buf->rpos = pos;
}
return -1;
}
sw pdf_buf_parse_stream (s_buf *buf, s_tag *dest)
{
s_pdf_stream *pdf_stream;
s_map map = {0};
sw r;
sw result = 0;
s_buf_save save;
const s_sym *sym_PDF_Stream = sym_1("PDF.Stream");
s_tag tmp = {0};
assert(buf);
assert(dest);
buf_save_init(buf, &save);
if ((r = pdf_buf_parse_dictionnary(buf, &map)) <= 0)
goto clean;
result += r;
if ((r = buf_read_1(buf, "stream")) <= 0) {
map_clean(&map);
goto restore;
}
result += r;
if ((r = buf_read_1(buf, "\r")) < 0) {
map_clean(&map);
goto restore;
}
result += r;
if ((r = buf_read_1(buf, "\n")) <= 0) {
map_clean(&map);
goto restore;
}
result += r;
if (! tag_init_pstruct(&tmp, sym_PDF_Stream) ||
! struct_allocate(tmp.data.pstruct) ||
! (pdf_stream = tmp.data.pstruct->data)) {
tag_clean(&tmp);
map_clean(&map);
r = -1;
goto clean;
}
pdf_stream->dictionnary = map;
if (! buf_tell_r(buf, &pdf_stream->offset)) {
tag_clean(&tmp);
r = -1;
goto clean;
}
if ((r = pdf_buf_ignore_until_token(buf, "endstream")) <= 0) {
tag_clean(&tmp);
goto restore;
}
result += r;
*dest = tmp;
r = result;
goto clean;
restore:
buf_save_restore_rpos(buf, &save);
clean:
buf_save_clean(buf, &save);
return r;
}
sw pdf_buf_parse_string (s_buf *buf, s_tag *dest)
{
sw r;
sw result = 0;
s_buf_save save = {0};
s_str str = {0};
s_tag tmp = {0};
assert(buf);
assert(dest);
buf_save_init(buf, &save);
if ((r = pdf_buf_parse_string_paren(buf, &str)) < 0)
goto clean;
if (r) {
result += r;
goto ok;
}
if ((r = pdf_buf_parse_string_hex(buf, &str)) < 0)
goto clean;
if (r) {
result += r;
goto ok;
}
r = 0;
goto clean;
ok:
if (false) {
err_write_1("pdf_buf_parse_string: str = ");
err_inspect_str(&str);
err_write_1("\n");
}
// TODO: fix
if (false && ! str_parse_eval(&str, &tmp)) {
err_puts("pdf_buf_parse_string: str_parse_eval");
assert(! "pdf_buf_parse_string: str_parse_eval");
r = -1;
str_clean(&str);
goto restore;
}
//
tmp.type = TAG_STR;
tmp.data.str = str;
*dest = tmp;
r = result;
goto clean;
restore:
buf_save_restore_rpos(buf, &save);
clean:
buf_save_clean(buf, &save);
return r;
}
sw pdf_buf_parse_string_hex (s_buf *buf, s_str *dest)
{
char a[BUF_SIZE] = {0};
character c;
u8 d;
u8 d1;
bool end;
sw r;
sw result = 0;
s_buf_save save = {0};
s_str tmp = {0};
s_buf tmp_buf = {0};
assert(buf);
assert(dest);
buf_save_init(buf, &save);
if ((r = buf_read_1(buf, "<")) <= 0)
goto clean;
result += r;
buf_init(&tmp_buf, false, sizeof(a) - 1, a);
while (1) {
if ((r = buf_read_character_utf8(buf, &c)) <= 0)
goto restore;
result += r;
if (c == '>')
break;
if (c >= '0' && c <= '9')
d = c - '0';
else if (c >= 'a' && c <= 'f')
d = c - 'a' + 10;
else if (c >= 'A' && c <= 'F')
d = c - 'A' + 10;
else {
r = -1;
goto restore;
}
if ((r = buf_read_character_utf8(buf, &c)) <= 0)
goto restore;
result += r;
if (c >= '0' && c <= '9')
d1 = c - '0';
else if (c >= 'a' && c <= 'f')
d1 = c - 'a' + 10;
else if (c >= 'A' && c <= 'F')
d1 = c - 'A' + 10;
else {
r = -1;
goto restore;
}
if ((r = buf_write_u8(&tmp_buf, (d << 4) | d1)) <= 0)
goto restore;
}
if ((r = pdf_buf_parse_object_end(buf, &end)) > 0)
result += r;
if (! end) {
r = -1;
goto restore;
}
if (! buf_read_to_str(&tmp_buf, &tmp)) {
r = -1;
goto restore;
}
buf_clean(&tmp_buf);
*dest = tmp;
r = result;
goto clean;
restore:
buf_clean(&tmp_buf);
buf_save_restore_rpos(buf, &save);
clean:
buf_save_clean(buf, &save);
return r;
}
// parentheses must be balanced or backslashed
sw pdf_buf_parse_string_paren (s_buf *buf, s_str *dest)
{
char a[BUF_SIZE] = {0};
character c;
character c1;
character c2;
s32 paren = 0;
sw r;
sw result = 0;
s_buf_save save = {0};
s_str tmp = {0};
s_buf tmp_buf = {0};
assert(buf);
assert(dest);
buf_save_init(buf, &save);
if ((r = buf_read_1(buf, "(")) <= 0)
goto clean;
result += r;
paren++;
buf_init(&tmp_buf, false, sizeof(a) - 1, a);
while (1) {
if ((r = buf_read_character_utf8(buf, &c)) <= 0)
goto restore;
result += r;
if (c == '(')
paren++;
else if (c == ')') {
paren--;
if (! paren)
goto ok;
}
else if (c == '\\') {
if ((r = buf_read_character_utf8(buf, &c)) <= 0)
goto restore;
result += r;
switch (c) {
case 'n': c = '\n'; break;
case 'r': c = '\r'; break;
case 't': c = '\t'; break;
case 'b': c = '\b'; break;
case 'f': c = '\f'; break;
case '(': c = '('; break;
case ')': c = ')'; break;
case '\\': c = '\\'; break;
case '0': case '1': case '2': case '3':
if ((r = buf_read_character_utf8(buf, &c1)) <= 0)
break;
if (c1 < '0' || c1 > '7') {
r = -1;
break;
}
result += r;
if ((r = buf_read_character_utf8(buf, &c2)) <= 0)
break;
if (c2 < '0' || c2 > '7') {
r = -1;
break;
}
result += r;
c = (c - '0') * 0100 + (c1 - '0') * 010 + (c2 - '0');
break;
case '\n':
goto next;
}
}
if ((r = buf_write_character_utf8(&tmp_buf, c)) <= 0)
goto restore;
next:
continue;
}
ok:
if ((r = buf_read_to_str(&tmp_buf, &tmp)) < 0)
goto restore;
*dest = tmp;
r = result;
goto clean;
restore:
buf_save_restore_rpos(buf, &save);
clean:
buf_save_clean(buf, &save);
return r;
}
sw pdf_buf_parse_token (s_buf *buf, const char *pchar)
{
bool end;
sw r;
sw result = 0;
s_buf_save save;
buf_save_init(buf, &save);
if ((r = buf_read_1(buf, pchar)) <= 0)
goto clean;
result += r;
if ((r = pdf_buf_parse_object_end(buf, &end)) < 0)
goto restore;
if (! end) {
r = 0;
goto restore;
}
r = result;
goto clean;
restore:
buf_save_restore_rpos(buf, &save);
clean:
buf_save_clean(buf, &save);
return r;
}
sw pdf_buf_parse_trailer (s_buf *buf, s_pdf_trailer *dest)
{
u64 pos;
sw r;
sw result = 0;
sw size;
const s_sym *sym_U64 = &g_sym_U64;
s_tag tag = {0};
s_pdf_trailer tmp = {0};
if (! buf_total_size(buf, &pos) || ! pos) {
err_puts("pdf_buf_parse_trailer: invalid buf_total_size");
assert(! "pdf_buf_parse_trailer: invalid buf_total_size");
return -1;
}
if (pos < buf->size) {
size = pos;
pos = 0;
}
else {
size = buf->size;
pos -= size;
}
if ((r = buf_seek(buf, pos, SEEK_SET)) < 0 ||
(uw) r != pos) {
err_puts("pdf_buf_parse_trailer: buf_seek");
assert(! "pdf_buf_parse_trailer: buf_seek");
return r;
}
if (buf_refill(buf, size) < size) {
err_puts("pdf_buf_parse_trailer: buf_refill");
assert(! "pdf_buf_parse_trailer: buf_refill");
return -1;
}
buf->rpos = buf->wpos;
if ((r = pdf_buf_parse_rewind_to_trailer(buf)) <= 0) {
err_puts("pdf_buf_parse_trailer: pdf_buf_parse_rewind_to_trailer");
assert(! "pdf_buf_parse_trailer: pdf_buf_parse_rewind_to_trailer");
return r;
}
result += r;
if ((r = pdf_buf_parse_dictionnary(buf, &tmp.dictionnary)) <= 0) {
err_puts("pdf_buf_parse_trailer: pdf_buf_parse_dictionnary");
err_inspect_buf(buf);
assert(! "pdf_buf_parse_trailer: pdf_buf_parse_dictionnary");
return r;
}
result += r;
if ((r = pdf_buf_parse_token(buf, "startxref")) <= 0) {
err_puts("pdf_buf_parse_trailer: pdf_buf_parse_token startxref");
assert(! "pdf_buf_parse_trailer: pdf_buf_parse_token startxref");
return r;
}
result += r;
if ((r = pdf_buf_parse_integer(buf, &tag)) <= 0) {
err_puts("pdf_buf_parse_trailer: pdf_buf_parse_integer");
assert(! "pdf_buf_parse_trailer: pdf_buf_parse_integer");
return r;
}
result += r;
if (! u64_init_cast(&tmp.startxref, &sym_U64, &tag)) {
err_puts("pdf_buf_parse_trailer: u64_init_cast");
assert(! "pdf_buf_parse_trailer: u64_init_cast");
tag_clean(&tag);
return -1;
}
if (false) {
err_write_1("pdf_buf_parse_trailer: startxref = ");
err_inspect_u64_decimal(tmp.startxref);
err_write_1("\n");
}
tag_clean(&tag);
if (buf->rpos < buf->wpos) {
err_puts("pdf_buf_parse_trailer: data after %%EOF");
assert(! "pdf_buf_parse_trailer: data after %%EOF");
return -1;
}
result += r;
*dest = tmp;
return result;
}
sw pdf_buf_parse_xref (s_buf *buf, s_map *dest)
{
char c;
u32 count;
u16 generation_number;
u32 i;
p_list keys;
p_list *keys_tail;
u32 object_number;
u32 offset;
sw r;
sw r1;
sw r2;
sw r3;
sw r4;
sw r5;
sw result = 0;
sw result_inner;
s_buf_save save;
s_map tmp = {0};
p_list values;
p_list *values_tail;
assert(buf);
assert(dest);
if ((r = pdf_buf_parse_token(buf, "xref")) <= 0) {
err_puts(" pdf_buf_parse_xref: pdf_buf_parse_token");
assert(! " pdf_buf_parse_xref: pdf_buf_parse_token");
return r;
}
result += r;
buf_save_init(buf, &save);
result_inner = result;
keys = NULL;
keys_tail = &keys;
values = NULL;
values_tail = &values;
while (1) {
buf_save_update(buf, &save);
result = result_inner;
if ((r = buf_parse_u32_decimal(buf, &object_number)) <= 0 ||
(r1 = buf_read_1(buf, " ")) <= 0 ||
(r2 = buf_parse_u32_decimal(buf, &count)) <= 0 ||
((r3 = buf_read_1(buf, "\n")) <= 0 &&
(r3 = buf_read_1(buf, "\r\n")) <= 0)) {
break;
}
result_inner += r + r1 + r2 + r3;
i = 0;
while (i < count) {
if ((r = buf_parse_u32_decimal(buf, &offset)) != 10 ||
(r1 = buf_read_1(buf, " ")) != 1 ||
(r2 = buf_parse_u16_decimal(buf, &generation_number)) != 5 ||
(r3 = buf_read_1(buf, " ")) != 1 ||
(r4 = buf_read_u8(buf, (u8 *) &c)) != 1 ||
((r5 = buf_read_1(buf, " \n")) <= 0 &&
(r5 = buf_read_1(buf, "\r\n")) != 2)) {
err_puts("pdf_buf_parse_xref: invalid xref");
err_inspect_buf(buf);
assert(! "pdf_buf_parse_xref: invalid xref");
goto error;
}
result_inner += r + r1 + r2 + r3 + r4 + r5;
if (! (*keys_tail = list_new_tuple(2, NULL)) ||
! (*values_tail = list_new_u64(offset, NULL)))
goto error;
tag_init_u32((*keys_tail)->tag.data.tuple.tag, object_number);
tag_init_u16((*keys_tail)->tag.data.tuple.tag + 1, generation_number);
keys_tail = &(*keys_tail)->next.data.plist;
values_tail = &(*values_tail)->next.data.plist;
i++;
object_number++;
}
}
//restore_inner:
buf_save_restore_rpos(buf, &save);
buf_save_clean(buf, &save);
if (! keys || ! values) {
err_puts("pdf_buf_parse_xref: empty xref");
err_inspect_buf(buf);
assert(! "pdf_buf_parse_xref: empty xref");
return -1;
}
if (! map_init_from_lists(&tmp, keys, values)) {
err_puts("pdf_buf_parse_xref: map_init_from_lists");
assert(! "pdf_buf_parse_xref: map_init_from_lists");
list_delete_all(keys);
list_delete_all(values);
return -1;
}
*dest = tmp;
return result;
error:
list_delete_all(keys);
list_delete_all(values);
buf_save_restore_rpos(buf, &save);
buf_save_clean(buf, &save);
return -1;
}
bool pdf_character_is_delimiter (character c)
{
return (c == '%' ||
c == '/' ||
c == '>' ||
c == ']' ||
character_is_space(c));
}