Hash :
d2777e80
Author :
Date :
2023-07-11T00:52:23
dfa: Overcome wchar_t limitations. * lib/localeinfo.h: Include <uchar.h>. Add special definitions for GAWK. (case_folded_counterparts): Change array element type to char32_t. * lib/localeinfo.c: Include <uchar.h>. Add special definitions for GAWK. (is_using_utf8, init_localeinfo): Use mbrtoc32 instead of mbrtowc. (lonesome_lower): Change element type to 'unsigned short'. (case_folded_counterparts): Change array element type to char32_t. Use c32toupper instead of towupper. Use c32tolower instead of towlower. * lib/dfa.c: Include <uchar.h>. Add special definitions for GAWK. (struct mb_char_classes): Change element type of 'chars' to char32_t. (mbs_to_wchar): Use mbrtoc32 instead of mbrtowc. (setbit_wc): Change type of first argument to char32_t. Use c32tob instead of wctob. (parse_bracket_exp): Update. (lex): Use c32isprint instead of iswprint. Use c32isspace instead of iswspace. Use c32rtomb instead of a %lc directive. (addtok_wc): Use c32rtomb instead of wcrtomb. (atom): Update. * modules/dfa (Depends-on): Remove wctype-h. Add uchar, mbrtoc32-regular, c32rtomb, c32tob, c32tolower, c32toupper, c32isprint, c32isspace. (Link): Add $(LIBUNISTRING) $(LIBC32CONV). * modules/dfa-tests (Makefile.am): Link test-dfa-match-aux with $(LIBUNISTRING) $(LIBC32CONV). * NEWS: Mention the change.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
/* locale information
Copyright 2016-2023 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
02110-1301, USA. */
/* Written by Paul Eggert. */
#include <limits.h>
#include <wchar.h>
#if GAWK
/* Use ISO C 99 API. */
# define char32_t wchar_t
#else
/* Use ISO C 11 + gnulib API. */
# include <uchar.h>
#endif
struct localeinfo
{
/* MB_CUR_MAX > 1. */
bool multibyte;
/* The locale is simple, like the C locale. These locales can be
processed more efficiently, as they are single-byte, their native
character set is in collating-sequence order, and they do not
have multi-character collating elements. */
bool simple;
/* The locale uses UTF-8. */
bool using_utf8;
/* An array indexed by byte values B that contains 1 if B is a
single-byte character, -1 if B is an encoding error, and -2 if B
is the leading byte of a multibyte character that contains more
than one byte. */
signed char sbclen[UCHAR_MAX + 1];
/* An array indexed by byte values B that contains the corresponding
32-bit wide character (if any) for B if sbclen[B] == 1. WEOF means
the byte is not a valid single-byte character, i.e., sbclen[B] == -1
or -2. */
wint_t sbctowc[UCHAR_MAX + 1];
};
extern void init_localeinfo (struct localeinfo *);
/* Maximum number of characters that can be the case-folded
counterparts of a single character, not counting the character
itself. This is a generous upper bound. */
enum { CASE_FOLDED_BUFSIZE = 32 };
extern int case_folded_counterparts (wint_t, char32_t[CASE_FOLDED_BUFSIZE]);