Hash :
df9634eb
Author :
Thomas de Grivel
Date :
2023-12-14T01:13:47
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
/* c3
* Copyright 2022,2023 kmx.io <contact@kmx.io>
*
* Permission is hereby granted to use this software granted the above
* copyright notice and this permission paragraph are included in all
* copies and substantial portions of this software.
*
* THIS SOFTWARE IS PROVIDED "AS-IS" WITHOUT ANY GUARANTEE OF
* PURPOSE AND PERFORMANCE. IN NO EVENT WHATSOEVER SHALL THE
* AUTHOR BE CONSIDERED LIABLE FOR THE USE AND PERFORMANCE OF
* THIS SOFTWARE.
*/
#include <assert.h>
#include "character.h"
#include "str.h"
#include "ucd.h"
character character_1 (const s8 *p)
{
character c;
s_str stra;
assert(p);
str_init_1(&stra, NULL, p);
str_peek_character_utf8(&stra, &c);
return c;
}
character * character_init_copy (character *c, const character *src)
{
assert(c);
assert(src);
*c = *src;
return c;
}
bool character_is_digit (character c)
{
return ('0' <= c && c <= '9');
}
bool character_is_lowercase (character c)
{
return (c >= 0 &&
c < UCD_MAX &&
g_ucd[c].flags & UCD_LETTER_LOWERCASE);
}
bool character_is_printable (character c)
{
const u64 ucd_printable = UCD_LETTER | UCD_MARK | UCD_NUMBER |
UCD_PUNCTUATION | UCD_SYMBOL | UCD_SEPARATOR_SPACE;
return (c >= 0 &&
c < UCD_MAX &&
g_ucd[c].flags & ucd_printable);
}
bool character_is_space (character c)
{
return (c >= 0 &&
c < UCD_MAX &&
g_ucd[c].flags & (UCD_OTHER_CONTROL | UCD_SEPARATOR_SPACE));
}
bool character_is_uppercase (character c)
{
return (c >= 0 &&
c < UCD_MAX &&
g_ucd[c].flags & UCD_LETTER_UPPERCASE);
}
character character_switch_case (character c)
{
character s;
if (c >= 0 &&
c < UCD_MAX &&
(s = g_ucd[c].to_lower | g_ucd[c].to_upper))
return s;
return c;
}
character character_to_lower (character c)
{
if (c >= 0 &&
c < UCD_MAX &&
g_ucd[c].to_lower)
return g_ucd[c].to_lower;
return c;
}
character character_to_upper (character c)
{
if (c >= 0 &&
c < UCD_MAX &&
g_ucd[c].to_upper)
return g_ucd[c].to_upper;
return c;
}
sw character_utf8 (character c, s8 *dest)
{
const u8 _00000111 = 0x07;
const u8 _00001111 = 0x0F;
const u8 _00011111 = 0x1F;
const u8 _00111111 = 0x3F;
const u8 _10000000 = 0x80;
const u8 _11000000 = 0xC0;
const u8 _11100000 = 0xE0;
const u8 _11110000 = 0xF0;
if (c == -1)
return -1;
if (((u64) c) < 0x80) {
dest[0] = (s8) c;
return 1;
}
if (((u64) c) < 0x800) {
dest[0] = _11000000 | ((c >> 6) & _00011111);
dest[1] = _10000000 | ( c & _00111111);
return 2;
}
if (((u64) c) < 0x10000) {
dest[0] = _11100000 | ((c >> 12) & _00001111);
dest[1] = _10000000 | ((c >> 6) & _00111111);
dest[2] = _10000000 | ( c & _00111111);
return 3;
}
if (((u64) c) < 0x110000) {
dest[0] = _11110000 | ((c >> 18) & _00000111);
dest[1] = _10000000 | ((c >> 12) & _00111111);
dest[2] = _10000000 | ((c >> 6) & _00111111);
dest[3] = _10000000 | ( c & _00111111);
return 4;
}
return -1;
}
sw character_utf8_size (character c)
{
if (c == -1)
return -1;
if (((u64) c) < 0x80)
return 1;
if (((u64) c) < 0x800)
return 2;
if (((u64) c) < 0x10000)
return 3;
if (((u64) c) < 0x110000)
return 4;
return -1;
}