Hash :
7b089321
Author :
Date :
2025-01-01T09:24:36
maint: run 'make update-copyright'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
/* Line breaking auxiliary tables.
Copyright (C) 2001-2003, 2006-2025 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2001.
This file is free software.
It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
You can redistribute it and/or modify it under either
- the terms of the GNU Lesser General Public License as published
by the Free Software Foundation, either version 3, or (at your
option) any later version, or
- the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2, or (at your option)
any later version, or
- the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License and the GNU General Public License
for more details.
You should have received a copy of the GNU Lesser General Public
License and of the GNU General Public License along with this
program. If not, see <https://www.gnu.org/licenses/>. */
#include "unitypes.h"
#ifdef __cplusplus
extern "C" {
#endif
/* Line breaking classification. */
enum
{
/* Values >= 41 are resolved at run time. */
LBP_BK = 41, /* mandatory break */
LBP_CR = 42, /* carriage return */
LBP_LF = 43, /* line feed */
LBP_CM = 44, /* attached characters and combining marks */
/*LBP_NL, next line - not used here because it's equivalent to LBP_BK */
/*LBP_SG, surrogates - not used here because they are not characters */
LBP_WJ = 0, /* word joiner */
LBP_ZW = 45, /* zero width space */
LBP_GL = 1, /* non-breaking (glue) */
LBP_SP = 46, /* space */
LBP_B2 = 2, /* break opportunity before and after */
LBP_BA = 3, /* break opportunity after */
LBP_BB = 4, /* break opportunity before */
LBP_HY = 5, /* hyphen */
LBP_CB = 47, /* contingent break opportunity */
LBP_CL = 6, /* closing punctuation */
LBP_CP1 = 7, /* closing parenthesis, non-EastAsian character */
LBP_CP2 = 8, /* closing parenthesis, EastAsian character */
LBP_EX = 9, /* exclamation/interrogation */
LBP_IN = 10, /* inseparable */
LBP_NS = 11, /* non starter */
LBP_OP1 = 12, /* opening punctuation, non-EastAsian character */
LBP_OP2 = 13, /* opening punctuation, EastAsian character */
LBP_QU1 = 14, /* ambiguous quotation, neither initial nor final punctuation */
LBP_QU2 = 15, /* ambiguous quotation, initial punctuation */
LBP_QU3 = 16, /* ambiguous quotation, final punctuation */
LBP_IS = 17, /* infix separator (numeric) */
LBP_NU = 18, /* numeric */
LBP_PO = 19, /* postfix (numeric) */
LBP_PR = 20, /* prefix (numeric) */
LBP_SY = 21, /* symbols allowing breaks */
LBP_AI = 48, /* ambiguous (alphabetic or ideograph) */
LBP_AL1 = 22, /* ordinary alphabetic and symbol characters, != U+25CC */
LBP_AL2 = 23, /* ordinary alphabetic and symbol characters, == U+25CC */
/*LBP_CJ, conditional Japanese starter, resolved to NS */
LBP_H2 = 24, /* Hangul LV syllable */
LBP_H3 = 25, /* Hangul LVT syllable */
LBP_HL = 31, /* Hebrew letter */
LBP_ID1 = 26, /* ideographic */
LBP_ID2 = 27, /* ideographic and potential future emoji */
LBP_JL = 28, /* Hangul L Jamo */
LBP_JV = 29, /* Hangul V Jamo */
LBP_JT = 30, /* Hangul T Jamo */
LBP_AP = 32, /* Brahmic scripts: pre-base repha */
LBP_AK = 33, /* Brahmic scripts: consonants */
LBP_AS = 34, /* Brahmic scripts: independent vowels */
LBP_VI = 35, /* Brahmic scripts: conjoining viramas */
LBP_VF = 36, /* Brahmic scripts: viramas for final consonants */
LBP_RI = 37, /* regional indicator */
LBP_SA = 49, /* complex context (South East Asian) */
LBP_ZWJ = 38, /* zero width joiner */
LBP_EB = 39, /* emoji base */
LBP_EM = 40, /* emoji modifier */
LBP_XX = 50, /* unknown */
/* Artificial values that exist only at runtime, not in the tables. */
LBP_AKLS_VI = 100,
LBP_HL_BA = 101
};
#include "lbrkprop1.h"
/* Combining prop and ea to a table entry. */
#define PROP_EA(prop,ea) (((prop) << 1) | (ea))
/* Splitting a table entry into prop and ea. */
#define PROP(entry) ((entry) >> 1)
#define EA(entry) ((entry) & 1)
/* Returns (prop << 1) | ea, where
- prop is the line breaking property,
- ea is the EastAsian property (1 bit)
of UC. */
static inline unsigned char
unilbrkprop_lookup (ucs4_t uc)
{
unsigned int index1 = uc >> lbrkprop_header_0;
if (index1 < lbrkprop_header_1)
{
int lookup1 = unilbrkprop.level1[index1];
if (lookup1 >= 0)
{
unsigned int index2 = (uc >> lbrkprop_header_2) & lbrkprop_header_3;
int lookup2 = unilbrkprop.level2[lookup1 + index2];
if (lookup2 >= 0)
{
unsigned int index3 = uc & lbrkprop_header_4;
return unilbrkprop.level3[lookup2 + index3];
}
}
}
return PROP_EA (LBP_XX, 0);
}
/* Table indexed by two line breaking classifications. */
#define D 1 /* direct break opportunity, empty in table 7.3 of UTR #14 */
#define I 2 /* indirect break opportunity, '%' in table 7.3 of UTR #14 */
#define P 3 /* prohibited break, '^' in table 7.3 of UTR #14 */
extern const unsigned char unilbrk_table[41][41];
/* We don't support line breaking of complex-context dependent characters
(Thai, Lao, Myanmar, Khmer) yet, because it requires dictionary lookup. */
#ifdef __cplusplus
}
#endif