Branch
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
/* Copyright (C) 1999-2001, 2003, 2011 Bruno Haible.
This file is not part of the GNU LIBICONV Library.
This file is put into the public domain. */
#include "iconv_string.h"
#include <iconv.h>
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#define tmpbufsize 4096
int iconv_string (const char* tocode, const char* fromcode,
const char* start, const char* end,
char** resultp, size_t* lengthp)
{
iconv_t cd = iconv_open(tocode,fromcode);
size_t length;
char* result;
if (cd == (iconv_t)(-1)) {
if (errno != EINVAL)
return -1;
/* Unsupported fromcode or tocode. Check whether the caller requested
autodetection. */
if (!strcmp(fromcode,"autodetect_utf8")) {
int ret;
/* Try UTF-8 first. There are very few ISO-8859-1 inputs that would
be valid UTF-8, but many UTF-8 inputs are valid ISO-8859-1. */
ret = iconv_string(tocode,"UTF-8",start,end,resultp,lengthp);
if (!(ret < 0 && errno == EILSEQ))
return ret;
ret = iconv_string(tocode,"ISO-8859-1",start,end,resultp,lengthp);
return ret;
}
if (!strcmp(fromcode,"autodetect_jp")) {
int ret;
/* Try 7-bit encoding first. If the input contains bytes >= 0x80,
it will fail. */
ret = iconv_string(tocode,"ISO-2022-JP-2",start,end,resultp,lengthp);
if (!(ret < 0 && errno == EILSEQ))
return ret;
/* Try EUC-JP next. Short SHIFT_JIS inputs may come out wrong. This
is unavoidable. People will condemn SHIFT_JIS.
If we tried SHIFT_JIS first, then some short EUC-JP inputs would
come out wrong, and people would condemn EUC-JP and Unix, which
would not be good. */
ret = iconv_string(tocode,"EUC-JP",start,end,resultp,lengthp);
if (!(ret < 0 && errno == EILSEQ))
return ret;
/* Finally try SHIFT_JIS. */
ret = iconv_string(tocode,"SHIFT_JIS",start,end,resultp,lengthp);
return ret;
}
if (!strcmp(fromcode,"autodetect_kr")) {
int ret;
/* Try 7-bit encoding first. If the input contains bytes >= 0x80,
it will fail. */
ret = iconv_string(tocode,"ISO-2022-KR",start,end,resultp,lengthp);
if (!(ret < 0 && errno == EILSEQ))
return ret;
/* Finally try EUC-KR. */
ret = iconv_string(tocode,"EUC-KR",start,end,resultp,lengthp);
return ret;
}
errno = EINVAL;
return -1;
}
/* Determine the length we need. */
{
size_t count = 0;
char tmpbuf[tmpbufsize];
const char* inptr = start;
size_t insize = end-start;
while (insize > 0) {
char* outptr = tmpbuf;
size_t outsize = tmpbufsize;
size_t res = iconv(cd,&inptr,&insize,&outptr,&outsize);
if (res == (size_t)(-1) && errno != E2BIG) {
int saved_errno = (errno == EINVAL ? EILSEQ : errno);
iconv_close(cd);
errno = saved_errno;
return -1;
}
count += outptr-tmpbuf;
}
{
char* outptr = tmpbuf;
size_t outsize = tmpbufsize;
size_t res = iconv(cd,NULL,NULL,&outptr,&outsize);
if (res == (size_t)(-1)) {
int saved_errno = errno;
iconv_close(cd);
errno = saved_errno;
return -1;
}
count += outptr-tmpbuf;
}
length = count;
}
if (lengthp != NULL)
*lengthp = length;
if (resultp == NULL) {
iconv_close(cd);
return 0;
}
result = (*resultp == NULL ? malloc(length) : realloc(*resultp,length));
*resultp = result;
if (length == 0) {
iconv_close(cd);
return 0;
}
if (result == NULL) {
iconv_close(cd);
errno = ENOMEM;
return -1;
}
iconv(cd,NULL,NULL,NULL,NULL); /* return to the initial state */
/* Do the conversion for real. */
{
const char* inptr = start;
size_t insize = end-start;
char* outptr = result;
size_t outsize = length;
while (insize > 0) {
size_t res = iconv(cd,&inptr,&insize,&outptr,&outsize);
if (res == (size_t)(-1)) {
if (errno == EINVAL)
break;
else {
int saved_errno = errno;
iconv_close(cd);
errno = saved_errno;
return -1;
}
}
}
{
size_t res = iconv(cd,NULL,NULL,&outptr,&outsize);
if (res == (size_t)(-1)) {
int saved_errno = errno;
iconv_close(cd);
errno = saved_errno;
return -1;
}
}
if (outsize != 0) abort();
}
iconv_close(cd);
return 0;
}