Branch
Hash :
0a577a09
Author :
Date :
2023-11-07T12:58:41
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
/*
* Copyright © 2023 Pierre Le Marre <dev@wismill.eu>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef COMPOSE_DUMP_H
#define COMPOSE_DUMP_H
#include "config.h"
#include <stdlib.h>
#include "src/utils.h"
/* Ad-hoc escaping for UTF-8 string
*
* Note that it only escapes the strict minimum to get a valid Compose file.
* It also escapes hexadecimal digits after an hexadecimal escape. This is not
* strictly needed by the current implementation: "\x0abcg" parses as "␊bcg",
* but better be cautious than sorry and produce "\x0a\x62\x63g" instead.
* In the latter string there is no ambiguity and no need to know the maximum
* number of digits supported by the escape sequence.
*/
static inline char*
escape_utf8_string_literal(const char *from)
{
const size_t length = strlen(from);
/* Longest escape is converting ASCII character to "\xNN" */
char* to = calloc(4 * length + 1, sizeof(to));
if (!to)
return NULL;
size_t t = 0;
bool previous_is_hex_escape = false;
uint8_t nbytes = 0;
for (size_t f = 0; f < length;) {
if ((unsigned char) from[f] < 0x80) {
/* ASCII */
if (from[f] <= 0x10 || from[f] == 0x7f ||
(is_xdigit(from[f]) && previous_is_hex_escape))
{
/* Control character or
hexadecimal digit following an hexadecimal escape */
snprintf_safe(&to[t], 5, "\\x%02x", from[f]);
t += 4;
previous_is_hex_escape = true;
} else if (from[f] == '"' || from[f] == '\\') {
/* Quote and backslash */
snprintf_safe(&to[t], 3, "\\%c", from[f]);
t += 2;
previous_is_hex_escape = false;
} else {
/* Other characters */
to[t++] = from[f];
previous_is_hex_escape = false;
}
f++;
continue;
}
/* Test next byte for the next Unicode codepoint’s bytes count */
else if ((unsigned char) from[f] < 0xe0)
nbytes = 2;
else if ((unsigned char) from[f] < 0xf0)
nbytes = 3;
else
nbytes = 4;
memcpy(&to[t], &from[f], nbytes);
t += nbytes;
f += nbytes;
previous_is_hex_escape = false;
}
to[t++] = '\0';
return realloc(to, t);
}
#endif