Tag
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
/* Test of conversion to UTF-32 from legacy encodings.
Copyright (C) 2007 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
/* Written by Bruno Haible <bruno@clisp.org>, 2007. */
#include <config.h>
#include "uniconv.h"
#include <stdio.h>
#include <stdlib.h>
#include "unistr.h"
#define SIZEOF(array) (sizeof (array) / sizeof (array[0]))
#define ASSERT(expr) \
do \
{ \
if (!(expr)) \
{ \
fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \
abort (); \
} \
} \
while (0)
int
main ()
{
static enum iconv_ilseq_handler handlers[] =
{ iconveh_error, iconveh_question_mark, iconveh_escape_sequence };
size_t h;
#if HAVE_ICONV
/* Assume that iconv() supports at least the encodings ASCII, ISO-8859-1,
ISO-8859-2, and UTF-8. */
/* Test conversion from ISO-8859-1 to UTF-16 with no errors. */
for (h = 0; h < SIZEOF (handlers); h++)
{
enum iconv_ilseq_handler handler = handlers[h];
static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
static const uint32_t expected[] = /* Ärger mit bösen Bübchen ohne Augenmaß */
{
0xC4, 'r', 'g', 'e', 'r', ' ', 'm', 'i', 't', ' ', 'b', 0xF6, 's',
'e', 'n', ' ', 'B', 0xFC, 'b', 'c', 'h', 'e', 'n', ' ', 'o', 'h',
'n', 'e', ' ', 'A', 'u', 'g', 'e', 'n', 'm', 'a', 0xDF, 0
};
uint32_t *result = u32_strconv_from_encoding (input, "ISO-8859-1", handler);
ASSERT (result != NULL);
ASSERT (u32_strcmp (result, expected) == 0);
free (result);
}
/* Test conversion from ISO-8859-2 to UTF-16 with no errors. */
for (h = 0; h < SIZEOF (handlers); h++)
{
enum iconv_ilseq_handler handler = handlers[h];
static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
static const uint32_t expected[] =
{
'R', 'a', 'f', 'a', 0x0142, ' ', 'M', 'a', 's', 'z', 'k', 'o', 'w',
's', 'k', 'i', 0
};
uint32_t *result = u32_strconv_from_encoding (input, "ISO-8859-2", handler);
ASSERT (result != NULL);
ASSERT (u32_strcmp (result, expected) == 0);
free (result);
}
/* autodetect_jp is only supported when iconv() support ISO-2022-JP-2. */
# if defined _LIBICONV_VERSION || !(defined _AIX || defined __sgi || defined __hpux || defined __osf__)
/* Test conversions from autodetect_jp to UTF-16. */
for (h = 0; h < SIZEOF (handlers); h++)
{
enum iconv_ilseq_handler handler = handlers[h];
static const char input[] = "\244\263\244\363\244\313\244\301\244\317"; /* こんにちは in EUC-JP */
static const uint32_t expected[] = /* こんにちは */
{
0x3053, 0x3093, 0x306B, 0x3061, 0x306F, 0
};
uint32_t *result = u32_strconv_from_encoding (input, "autodetect_jp", handler);
ASSERT (result != NULL);
ASSERT (u32_strcmp (result, expected) == 0);
free (result);
}
for (h = 0; h < SIZEOF (handlers); h++)
{
enum iconv_ilseq_handler handler = handlers[h];
static const char input[] = "\202\261\202\361\202\311\202\277\202\315"; /* こんにちは in Shift_JIS */
static const uint32_t expected[] = /* こんにちは */
{
0x3053, 0x3093, 0x306B, 0x3061, 0x306F, 0
};
uint32_t *result = u32_strconv_from_encoding (input, "autodetect_jp", handler);
ASSERT (result != NULL);
ASSERT (u32_strcmp (result, expected) == 0);
free (result);
}
for (h = 0; h < SIZEOF (handlers); h++)
{
enum iconv_ilseq_handler handler = handlers[h];
static const char input[] = "\033$B$3$s$K$A$O\033(B"; /* こんにちは in ISO-2022-JP-2 */
static const uint32_t expected[] = /* こんにちは */
{
0x3053, 0x3093, 0x306B, 0x3061, 0x306F, 0
};
uint32_t *result = u32_strconv_from_encoding (input, "autodetect_jp", handler);
ASSERT (result != NULL);
ASSERT (u32_strcmp (result, expected) == 0);
free (result);
}
# endif
#endif
return 0;
}