Edit

kc3-lang/libxml2/encoding.c

Branch :

  • Show log

    Commit

  • Author : Darin Adler
    Date : 2001-07-27 22:47:14
    Hash : 699613b1
    Message : Add cast to fix warning. Add include of <libxml/parser.h>. * encoding.c: (xmlIconvWrapper): Add cast to fix warning. * testCatalog.c: Add include of <libxml/parser.h>.

  • encoding.c
  • /*
     * encoding.c : implements the encoding conversion functions needed for XML
     *
     * Related specs: 
     * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
     * rfc2781        UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
     * [ISO-10646]    UTF-8 and UTF-16 in Annexes
     * [ISO-8859-1]   ISO Latin-1 characters codes.
     * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
     *                Worldwide Character Encoding -- Version 1.0", Addison-
     *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
     *                described in Unicode Technical Report #4.
     * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
     *                Information Interchange, ANSI X3.4-1986.
     *
     * See Copyright for the status of this software.
     *
     * daniel@veillard.com
     *
     * UTF8 string routines from:
     * "William M. Brack" <wbrack@mmm.com.hk>
     *
     * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
     */
    
    #include "libxml.h"
    
    #include <string.h>
    
    #ifdef HAVE_CTYPE_H
    #include <ctype.h>
    #endif
    #ifdef HAVE_STDLIB_H
    #include <stdlib.h>
    #endif
    #ifdef LIBXML_ICONV_ENABLED
    #ifdef HAVE_ERRNO_H
    #include <errno.h>
    #endif
    #endif
    #include <libxml/encoding.h>
    #include <libxml/xmlmemory.h>
    #ifdef LIBXML_HTML_ENABLED
    #include <libxml/HTMLparser.h>
    #endif
    #include <libxml/xmlerror.h>
    
    static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
    static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
    
    typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
    typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
    struct _xmlCharEncodingAlias {
        const char *name;
        const char *alias;
    };
    
    static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
    static int xmlCharEncodingAliasesNb = 0;
    static int xmlCharEncodingAliasesMax = 0;
    
    #ifdef LIBXML_ICONV_ENABLED
    #if 0
    #define DEBUG_ENCODING  /* Define this to get encoding traces */
    #endif
    #endif
    
    static int xmlLittleEndian = 1;
    
    /************************************************************************
     *									*
     *			Generic UTF8 handling routines			*
     *									*
     * From rfc2044: encoding of the Unicode values on UTF-8:		*
     *									*
     * UCS-4 range (hex.)           UTF-8 octet sequence (binary)		*
     * 0000 0000-0000 007F   0xxxxxxx					*
     * 0000 0080-0000 07FF   110xxxxx 10xxxxxx				*
     * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx 			*
     *									*
     * I hope we won't use values > 0xFFFF anytime soon !			*
     *									*
     ************************************************************************/
    
    /**
     * xmlUTF8Strlen:
     * @utf:  a sequence of UTF-8 encoded bytes
     *
     * compute the lenght of an UTF8 string, it doesn't do a full UTF8
     * checking of the content of the string.
     *
     * Returns the number of characters in the string or -1 in case of error
     */
    int
    xmlUTF8Strlen(const xmlChar *utf) {
        int ret = 0;
    
        if (utf == NULL)
    	return(-1);
    
        while (*utf != 0) {
    	if (utf[0] & 0x80) {
    	    if ((utf[1] & 0xc0) != 0x80)
    		return(-1);
    	    if ((utf[0] & 0xe0) == 0xe0) {
    		if ((utf[2] & 0xc0) != 0x80)
    		    return(-1);
    		if ((utf[0] & 0xf0) == 0xf0) {
    		    if ((utf[0] & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)
    			return(-1);
    		    utf += 4;
    		} else {
    		    utf += 3;
    		}
    	    } else {
    		utf += 2;
    	    }
    	} else {
    	    utf++;
    	}
    	ret++;
        }
        return(ret);
    }
    
    /**
     * xmlGetUTF8Char:
     * @utf:  a sequence of UTF-8 encoded bytes
     * @len:  a pointer to @bytes len
     *
     * Read one UTF8 Char from @utf
     *
     * Returns the char value or -1 in case of error and update @len with the
     *        number of bytes used
     */
    static int
    xmlGetUTF8Char(const unsigned char *utf, int *len) {
        unsigned int c;
    
        if (utf == NULL)
    	goto error;
        if (len == NULL)
    	goto error;
        if (*len < 1)
    	goto error;
    
        c = utf[0];
        if (c & 0x80) {
    	if (*len < 2)
    	    goto error;
    	if ((utf[1] & 0xc0) != 0x80)
    	    goto error;
    	if ((c & 0xe0) == 0xe0) {
    	    if (*len < 3)
    		goto error;
    	    if ((utf[2] & 0xc0) != 0x80)
    		goto error;
    	    if ((c & 0xf0) == 0xf0) {
    		if (*len < 4)
    		    goto error;
    		if ((c & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)
    		    goto error;
    		*len = 4;
    		/* 4-byte code */
    		c = (utf[0] & 0x7) << 18;
    		c |= (utf[1] & 0x3f) << 12;
    		c |= (utf[2] & 0x3f) << 6;
    		c |= utf[3] & 0x3f;
    	    } else {
    	      /* 3-byte code */
    		*len = 3;
    		c = (utf[0] & 0xf) << 12;
    		c |= (utf[1] & 0x3f) << 6;
    		c |= utf[2] & 0x3f;
    	    }
    	} else {
    	  /* 2-byte code */
    	    *len = 2;
    	    c = (utf[0] & 0x1f) << 6;
    	    c |= utf[1] & 0x3f;
    	}
        } else {
    	/* 1-byte code */
    	*len = 1;
        }
        return(c);
    
    error:
        *len = 0;
        return(-1);
    }
    
    /**
     * xmlCheckUTF8: Check utf-8 string for legality.
     * @utf: Pointer to putative utf-8 encoded string.
     *
     * Checks @utf for being valid utf-8. @utf is assumed to be
     * null-terminated. This function is not super-strict, as it will
     * allow longer utf-8 sequences than necessary. Note that Java is
     * capable of producing these sequences if provoked. Also note, this
     * routine checks for the 4-byte maxiumum size, but does not check for
     * 0x10ffff maximum value.
     *
     * Return value: true if @utf is valid.
     **/
    int
    xmlCheckUTF8(const unsigned char *utf)
    {
        int ix;
        unsigned char c;
    
        for (ix = 0; (c = utf[ix]);) {
            if (c & 0x80) {
    	    if ((utf[ix + 1] & 0xc0) != 0x80)
    	        return(0);
    	    if ((c & 0xe0) == 0xe0) {
    	        if ((utf[ix + 2] & 0xc0) != 0x80)
    		    return(0);
    	        if ((c & 0xf0) == 0xf0) {
    		    if ((c & 0xf8) != 0xf0 || (utf[ix + 3] & 0xc0) != 0x80)
    		        return(0);
    		    ix += 4;
    		    /* 4-byte code */
    	        } else
    		  /* 3-byte code */
    		    ix += 3;
    	    } else
    	      /* 2-byte code */
    	        ix += 2;
    	} else
    	    /* 1-byte code */
    	    ix++;
          }
          return(1);
    }
    
    /**
     * xmlUTF8Strsize:
     * @utf:  a sequence of UTF-8 encoded bytes
     * @len:  the number of characters in the array
     *
     * storage size of an UTF8 string
     *
     * Returns the storage size of
     * the first 'len' characters of ARRAY
     *
     */
    
    int
    xmlUTF8Strsize(const xmlChar *utf, int len) {
        const xmlChar	*ptr=utf;
        xmlChar	ch;
    
        if (len <= 0)
    	return(0);
    
        while ( len-- > 0) {
    	if ( !*ptr )
    	    break;
    	if ( (ch = *ptr++) & 0x80)
    	    while ( (ch<<=1) & 0x80 )
    		ptr++;
        }
        return (ptr - utf);
    }
    
    
    /**
     * xmlUTF8Strndup:
     * @utf:  the input UTF8 *
     * @len:  the len of @utf (in chars)
     *
     * a strndup for array of UTF8's
     *
     * Returns a new UTF8 * or NULL
     */
    xmlChar *
    xmlUTF8Strndup(const xmlChar *utf, int len) {
        xmlChar *ret;
        int i;
        
        if ((utf == NULL) || (len < 0)) return(NULL);
        i = xmlUTF8Strsize(utf, len);
        ret = (xmlChar *) xmlMalloc((i + 1) * sizeof(xmlChar));
        if (ret == NULL) {
            xmlGenericError(xmlGenericErrorContext,
    		"malloc of %ld byte failed\n",
    	        (len + 1) * (long)sizeof(xmlChar));
            return(NULL);
        }
        memcpy(ret, utf, i * sizeof(xmlChar));
        ret[i] = 0;
        return(ret);
    }
    
    /**
     * xmlUTF8Strpos:
     * @utf:  the input UTF8 *
     * @pos:  the position of the desired UTF8 char (in chars)
     *
     * a function to provide the equivalent of fetching a
     * character from a string array
     *
     * Returns a pointer to the UTF8 character or NULL
     */
    xmlChar *
    xmlUTF8Strpos(const xmlChar *utf, int pos) {
        xmlChar ch;
    
        if (utf == NULL) return(NULL);
        if ( (pos < 0) || (pos >= xmlUTF8Strlen(utf)) )
    	return(NULL);
        while (pos--) {
    	if ((ch=*utf++) == 0) return(NULL);
    	if ( ch & 0x80 ) {
    	    /* if not simple ascii, verify proper format */
    	    if ( (ch & 0xc0) != 0xc0 )
    		return(NULL);
    	    /* then skip over remaining bytes for this char */
    	    while ( (ch <<= 1) & 0x80 )
    		if ( (*utf++ & 0xc0) != 0x80 )
    		    return(NULL);
    	}
        }
        return((xmlChar *)utf);
    }
    
    /**
     * xmlUTF8Strloc:
     * @utf:  the input UTF8 *
     * @utfchar:  the UTF8 character to be found
     *
     * a function to provide relative location of a UTF8 char
     *
     * Returns the relative character position of the desired char
     * or -1 if not found
     */
    int
    xmlUTF8Strloc(const xmlChar *utf, const xmlChar *utfchar) {
        int i, size;
        xmlChar ch;
    
        if (utf==NULL || utfchar==NULL) return -1;
        size = xmlUTF8Strsize(utfchar, 1);
    	for(i=0; (ch=*utf) != 0; i++) {
    	    if (xmlStrncmp(utf, utfchar, size)==0)
    		return(i);
    	    utf++;
    	    if ( ch & 0x80 ) {
    		/* if not simple ascii, verify proper format */
    		if ( (ch & 0xc0) != 0xc0 )
    		    return(-1);
    		/* then skip over remaining bytes for this char */
    		while ( (ch <<= 1) & 0x80 )
    		    if ( (*utf++ & 0xc0) != 0x80 )
    			return(-1);
    	    }
    	}
    
        return(-1);
    }
    /**
     * xmlUTF8Strsub:
     * @utf:  a sequence of UTF-8 encoded bytes
     * @start: relative pos of first char
     * @len:   total number to copy
     *
     * Note:  positions are given in units of UTF-8 chars
     *
     * Returns a pointer to a newly created string
     * or NULL if any problem
     */
    
    xmlChar *
    xmlUTF8Strsub(const xmlChar *utf, int start, int len) {
        int	    i;
        xmlChar ch;
    
        if (utf == NULL) return(NULL);
        if (start < 0) return(NULL);
        if (len < 0) return(NULL);
    
        /*
         * Skip over any leading chars
         */
        for (i = 0;i < start;i++) {
    	if ((ch=*utf++) == 0) return(NULL);
    	if ( ch & 0x80 ) {
    	    /* if not simple ascii, verify proper format */
    	    if ( (ch & 0xc0) != 0xc0 )
    		return(NULL);
    	    /* then skip over remaining bytes for this char */
    	    while ( (ch <<= 1) & 0x80 )
    		if ( (*utf++ & 0xc0) != 0x80 )
    		    return(NULL);
    	}
        }
    
        return(xmlUTF8Strndup(utf, len));
    }
    
    /************************************************************************
     *									*
     *		Conversions To/From UTF8 encoding			*
     *									*
     ************************************************************************/
    
    /**
     * asciiToUTF8:
     * @out:  a pointer to an array of bytes to store the result
     * @outlen:  the length of @out
     * @in:  a pointer to an array of ASCII chars
     * @inlen:  the length of @in
     *
     * Take a block of ASCII chars in and try to convert it to an UTF-8
     * block of chars out.
     * Returns 0 if success, or -1 otherwise
     * The value of @inlen after return is the number of octets consumed
     *     as the return value is positive, else unpredictiable.
     * The value of @outlen after return is the number of ocetes consumed.
     */
    static int
    asciiToUTF8(unsigned char* out, int *outlen,
                  const unsigned char* in, int *inlen) {
        unsigned char* outstart = out;
        const unsigned char* base = in;
        const unsigned char* processed = in;
        unsigned char* outend = out + *outlen;
        const unsigned char* inend;
        unsigned int c;
        int bits;
    
        inend = in + (*inlen);
        while ((in < inend) && (out - outstart + 5 < *outlen)) {
    	c= *in++;
    
    	/* assertion: c is a single UTF-4 value */
            if (out >= outend)
    	    break;
            if      (c <    0x80) {  *out++=  c;                bits= -6; }
            else { 
    	    *outlen = out - outstart;
    	    *inlen = processed - base;
    	    return(-1);
    	}
     
            for ( ; bits >= 0; bits-= 6) {
                if (out >= outend)
    	        break;
                *out++= ((c >> bits) & 0x3F) | 0x80;
            }
    	processed = (const unsigned char*) in;
        }
        *outlen = out - outstart;
        *inlen = processed - base;
        return(0);
    }
    
    /**
     * UTF8Toascii:
     * @out:  a pointer to an array of bytes to store the result
     * @outlen:  the length of @out
     * @in:  a pointer to an array of UTF-8 chars
     * @inlen:  the length of @in
     *
     * Take a block of UTF-8 chars in and try to convert it to an ASCII
     * block of chars out.
     *
     * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
     * The value of @inlen after return is the number of octets consumed
     *     as the return value is positive, else unpredictiable.
     * The value of @outlen after return is the number of ocetes consumed.
     */
    static int
    UTF8Toascii(unsigned char* out, int *outlen,
                  const unsigned char* in, int *inlen) {
        const unsigned char* processed = in;
        const unsigned char* outend;
        const unsigned char* outstart = out;
        const unsigned char* instart = in;
        const unsigned char* inend;
        unsigned int c, d;
        int trailing;
    
        if (in == NULL) {
            /*
    	 * initialization nothing to do
    	 */
    	*outlen = 0;
    	*inlen = 0;
    	return(0);
        }
        inend = in + (*inlen);
        outend = out + (*outlen);
        while (in < inend) {
    	d = *in++;
    	if      (d < 0x80)  { c= d; trailing= 0; }
    	else if (d < 0xC0) {
    	    /* trailing byte in leading position */
    	    *outlen = out - outstart;
    	    *inlen = processed - instart;
    	    return(-2);
            } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
            else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
            else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
    	else {
    	    /* no chance for this in Ascii */
    	    *outlen = out - outstart;
    	    *inlen = processed - instart;
    	    return(-2);
    	}
    
    	if (inend - in < trailing) {
    	    break;
    	} 
    
    	for ( ; trailing; trailing--) {
    	    if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
    		break;
    	    c <<= 6;
    	    c |= d & 0x3F;
    	}
    
    	/* assertion: c is a single UTF-4 value */
    	if (c < 0x80) {
    	    if (out >= outend)
    		break;
    	    *out++ = c;
    	} else {
    	    /* no chance for this in Ascii */
    	    *outlen = out - outstart;
    	    *inlen = processed - instart;
    	    return(-2);
    	}
    	processed = in;
        }
        *outlen = out - outstart;
        *inlen = processed - instart;
        return(0);
    }
    
    /**
     * isolat1ToUTF8:
     * @out:  a pointer to an array of bytes to store the result
     * @outlen:  the length of @out
     * @in:  a pointer to an array of ISO Latin 1 chars
     * @inlen:  the length of @in
     *
     * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
     * block of chars out.
     * Returns 0 if success, or -1 otherwise
     * The value of @inlen after return is the number of octets consumed
     *     as the return value is positive, else unpredictiable.
     * The value of @outlen after return is the number of ocetes consumed.
     */
    int
    isolat1ToUTF8(unsigned char* out, int *outlen,
                  const unsigned char* in, int *inlen) {
        unsigned char* outstart = out;
        const unsigned char* base = in;
        const unsigned char* processed = in;
        unsigned char* outend = out + *outlen;
        const unsigned char* inend;
        unsigned int c;
    
        inend = in + (*inlen);
        while (in < inend) {
    	c = *in++;
    
            if (out >= outend)
    	    break;
    
            if (c < 0x80) {
    	    *out++ =  c;
    	    processed++;
    	    continue;
    	} else {
    	    *out++= ((c >>  6) & 0x1F) | 0xC0;
                if (out >= outend)
    		break;
                *out++= (c & 0x3F) | 0x80;
    	    processed++;
            }
        }
        *outlen = out - outstart;
        *inlen = processed - base;
        return(0);
    }
    
    /**
     * UTF8Toisolat1:
     * @out:  a pointer to an array of bytes to store the result
     * @outlen:  the length of @out
     * @in:  a pointer to an array of UTF-8 chars
     * @inlen:  the length of @in
     *
     * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
     * block of chars out.
     *
     * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
     * The value of @inlen after return is the number of octets consumed
     *     as the return value is positive, else unpredictiable.
     * The value of @outlen after return is the number of ocetes consumed.
     */
    int
    UTF8Toisolat1(unsigned char* out, int *outlen,
                  const unsigned char* in, int *inlen) {
        const unsigned char* processed = in;
        const unsigned char* outend;
        const unsigned char* outstart = out;
        const unsigned char* instart = in;
        const unsigned char* inend;
        unsigned int c, d;
        int trailing;
    
        if (in == NULL) {
            /*
    	 * initialization nothing to do
    	 */
    	*outlen = 0;
    	*inlen = 0;
    	return(0);
        }
        inend = in + (*inlen);
        outend = out + (*outlen);
        while (in < inend) {
    	d = *in++;
    	if      (d < 0x80)  { c= d; trailing= 0; }
    	else if (d < 0xC0) {
    	    /* trailing byte in leading position */
    	    *outlen = out - outstart;
    	    *inlen = processed - instart;
    	    return(-2);
            } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
            else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
            else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
    	else {
    	    /* no chance for this in IsoLat1 */
    	    *outlen = out - outstart;
    	    *inlen = processed - instart;
    	    return(-2);
    	}
    
    	if (inend - in < trailing) {
    	    break;
    	} 
    
    	for ( ; trailing; trailing--) {
    	    if (in >= inend)
    		break;
    	    if (((d= *in++) & 0xC0) != 0x80) {
    		*outlen = out - outstart;
    		*inlen = processed - instart;
    		return(-2);
    	    }
    	    c <<= 6;
    	    c |= d & 0x3F;
    	}
    
    	/* assertion: c is a single UTF-4 value */
    	if (c <= 0xFF) {
    	    if (out >= outend)
    		break;
    	    *out++ = c;
    	} else {
    	    /* no chance for this in IsoLat1 */
    	    *outlen = out - outstart;
    	    *inlen = processed - instart;
    	    return(-2);
    	}
    	processed = in;
        }
        *outlen = out - outstart;
        *inlen = processed - instart;
        return(0);
    }
    
    /**
     * UTF16LEToUTF8:
     * @out:  a pointer to an array of bytes to store the result
     * @outlen:  the length of @out
     * @inb:  a pointer to an array of UTF-16LE passwd as a byte array
     * @inlenb:  the length of @in in UTF-16LE chars
     *
     * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
     * block of chars out. This function assume the endian properity
     * is the same between the native type of this machine and the
     * inputed one.
     *
     * Returns the number of byte written, or -1 by lack of space, or -2
     *     if the transcoding fails (for *in is not valid utf16 string)
     *     The value of *inlen after return is the number of octets consumed
     *     as the return value is positive, else unpredictiable.
     */
    static int
    UTF16LEToUTF8(unsigned char* out, int *outlen,
                const unsigned char* inb, int *inlenb)
    {
        unsigned char* outstart = out;
        const unsigned char* processed = inb;
        unsigned char* outend = out + *outlen;
        unsigned short* in = (unsigned short*) inb;
        unsigned short* inend;
        unsigned int c, d, inlen;
        unsigned char *tmp;
        int bits;
    
        if ((*inlenb % 2) == 1)
            (*inlenb)--;
        inlen = *inlenb / 2;
        inend = in + inlen;
        while ((in < inend) && (out - outstart + 5 < *outlen)) {
            if (xmlLittleEndian) {
    	    c= *in++;
    	} else {
    	    tmp = (unsigned char *) in;
    	    c = *tmp++;
    	    c = c | (((unsigned int)*tmp) << 8);
    	    in++;
    	}
            if ((c & 0xFC00) == 0xD800) {    /* surrogates */
    	    if (in >= inend) {           /* (in > inend) shouldn't happens */
    		break;
    	    }
    	    if (xmlLittleEndian) {
    		d = *in++;
    	    } else {
    		tmp = (unsigned char *) in;
    		d = *tmp++;
    		d = d | (((unsigned int)*tmp) << 8);
    		in++;
    	    }
                if ((d & 0xFC00) == 0xDC00) {
                    c &= 0x03FF;
                    c <<= 10;
                    c |= d & 0x03FF;
                    c += 0x10000;
                }
                else {
    		*outlen = out - outstart;
    		*inlenb = processed - inb;
    	        return(-2);
    	    }
            }
    
    	/* assertion: c is a single UTF-4 value */
            if (out >= outend)
    	    break;
            if      (c <    0x80) {  *out++=  c;                bits= -6; }
            else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
            else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
            else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
     
            for ( ; bits >= 0; bits-= 6) {
                if (out >= outend)
    	        break;
                *out++= ((c >> bits) & 0x3F) | 0x80;
            }
    	processed = (const unsigned char*) in;
        }
        *outlen = out - outstart;
        *inlenb = processed - inb;
        return(0);
    }
    
    /**
     * UTF8ToUTF16LE:
     * @outb:  a pointer to an array of bytes to store the result
     * @outlen:  the length of @outb
     * @in:  a pointer to an array of UTF-8 chars
     * @inlen:  the length of @in
     *
     * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
     * block of chars out.
     *
     * Returns the number of byte written, or -1 by lack of space, or -2
     *     if the transcoding failed. 
     */
    static int
    UTF8ToUTF16LE(unsigned char* outb, int *outlen,
                const unsigned char* in, int *inlen)
    {
        unsigned short* out = (unsigned short*) outb;
        const unsigned char* processed = in;
        unsigned short* outstart= out;
        unsigned short* outend;
        const unsigned char* inend= in+*inlen;
        unsigned int c, d;
        int trailing;
        unsigned char *tmp;
        unsigned short tmp1, tmp2;
    
        if (in == NULL) {
            /*
    	 * initialization, add the Byte Order Mark
    	 */
            if (*outlen >= 2) {
    	    outb[0] = 0xFF;
    	    outb[1] = 0xFE;
    	    *outlen = 2;
    	    *inlen = 0;
    #ifdef DEBUG_ENCODING
                xmlGenericError(xmlGenericErrorContext,
    		    "Added FFFE Byte Order Mark\n");
    #endif
    	    return(2);
    	}
    	*outlen = 0;
    	*inlen = 0;
    	return(0);
        }
        outend = out + (*outlen / 2);
        while (in < inend) {
          d= *in++;
          if      (d < 0x80)  { c= d; trailing= 0; }
          else if (d < 0xC0) {
              /* trailing byte in leading position */
    	  *outlen = (out - outstart) * 2;
    	  *inlen = processed - in;
    	  return(-2);
          } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
          else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
          else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
          else {
    	/* no chance for this in UTF-16 */
    	*outlen = (out - outstart) * 2;
    	*inlen = processed - in;
    	return(-2);
          }
    
          if (inend - in < trailing) {
              break;
          } 
    
          for ( ; trailing; trailing--) {
              if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
    	      break;
              c <<= 6;
              c |= d & 0x3F;
          }
    
          /* assertion: c is a single UTF-4 value */
            if (c < 0x10000) {
                if (out >= outend)
    	        break;
    	    if (xmlLittleEndian) {
    		*out++ = c;
    	    } else {
    		tmp = (unsigned char *) out;
    		*tmp = c ;
    		*(tmp + 1) = c >> 8 ;
    		out++;
    	    }
            }
            else if (c < 0x110000) {
                if (out+1 >= outend)
    	        break;
                c -= 0x10000;
    	    if (xmlLittleEndian) {
    		*out++ = 0xD800 | (c >> 10);
    		*out++ = 0xDC00 | (c & 0x03FF);
    	    } else {
    		tmp1 = 0xD800 | (c >> 10);
    		tmp = (unsigned char *) out;
    		*tmp = (unsigned char) tmp1;
    		*(tmp + 1) = tmp1 >> 8;
    		out++;
    
    		tmp2 = 0xDC00 | (c & 0x03FF);
    		tmp = (unsigned char *) out;
    		*tmp  = (unsigned char) tmp2;
    		*(tmp + 1) = tmp2 >> 8;
    		out++;
    	    }
            }
            else
    	    break;
    	processed = in;
        }
        *outlen = (out - outstart) * 2;
        *inlen = processed - in;
        return(0);
    }
    
    /**
     * UTF16BEToUTF8:
     * @out:  a pointer to an array of bytes to store the result
     * @outlen:  the length of @out
     * @inb:  a pointer to an array of UTF-16 passwd as a byte array
     * @inlenb:  the length of @in in UTF-16 chars
     *
     * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
     * block of chars out. This function assume the endian properity
     * is the same between the native type of this machine and the
     * inputed one.
     *
     * Returns the number of byte written, or -1 by lack of space, or -2
     *     if the transcoding fails (for *in is not valid utf16 string)
     * The value of *inlen after return is the number of octets consumed
     *     as the return value is positive, else unpredictiable.
     */
    static int
    UTF16BEToUTF8(unsigned char* out, int *outlen,
                const unsigned char* inb, int *inlenb)
    {
        unsigned char* outstart = out;
        const unsigned char* processed = inb;
        unsigned char* outend = out + *outlen;
        unsigned short* in = (unsigned short*) inb;
        unsigned short* inend;
        unsigned int c, d, inlen;
        unsigned char *tmp;
        int bits;
    
        if ((*inlenb % 2) == 1)
            (*inlenb)--;
        inlen = *inlenb / 2;
        inend= in + inlen;
        while (in < inend) {
    	if (xmlLittleEndian) {
    	    tmp = (unsigned char *) in;
    	    c = *tmp++;
    	    c = c << 8;
    	    c = c | (unsigned int) *tmp;
    	    in++;
    	} else {
    	    c= *in++;
    	} 
            if ((c & 0xFC00) == 0xD800) {    /* surrogates */
    	    if (in >= inend) {           /* (in > inend) shouldn't happens */
    		*outlen = out - outstart;
    		*inlenb = processed - inb;
    	        return(-2);
    	    }
    	    if (xmlLittleEndian) {
    		tmp = (unsigned char *) in;
    		d = *tmp++;
    		d = d << 8;
    		d = d | (unsigned int) *tmp;
    		in++;
    	    } else {
    		d= *in++;
    	    }
                if ((d & 0xFC00) == 0xDC00) {
                    c &= 0x03FF;
                    c <<= 10;
                    c |= d & 0x03FF;
                    c += 0x10000;
                }
                else {
    		*outlen = out - outstart;
    		*inlenb = processed - inb;
    	        return(-2);
    	    }
            }
    
    	/* assertion: c is a single UTF-4 value */
            if (out >= outend) 
    	    break;
            if      (c <    0x80) {  *out++=  c;                bits= -6; }
            else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
            else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
            else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
     
            for ( ; bits >= 0; bits-= 6) {
                if (out >= outend) 
    	        break;
                *out++= ((c >> bits) & 0x3F) | 0x80;
            }
    	processed = (const unsigned char*) in;
        }
        *outlen = out - outstart;
        *inlenb = processed - inb;
        return(0);
    }
    
    /**
     * UTF8ToUTF16BE:
     * @outb:  a pointer to an array of bytes to store the result
     * @outlen:  the length of @outb
     * @in:  a pointer to an array of UTF-8 chars
     * @inlen:  the length of @in
     *
     * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
     * block of chars out.
     *
     * Returns the number of byte written, or -1 by lack of space, or -2
     *     if the transcoding failed. 
     */
    static int
    UTF8ToUTF16BE(unsigned char* outb, int *outlen,
                const unsigned char* in, int *inlen)
    {
        unsigned short* out = (unsigned short*) outb;
        const unsigned char* processed = in;
        unsigned short* outstart= out;
        unsigned short* outend;
        const unsigned char* inend= in+*inlen;
        unsigned int c, d;
        int trailing;
        unsigned char *tmp;
        unsigned short tmp1, tmp2;
    
        if (in == NULL) {
            /*
    	 * initialization, add the Byte Order Mark
    	 */
            if (*outlen >= 2) {
    	    outb[0] = 0xFE;
    	    outb[1] = 0xFF;
    	    *outlen = 2;
    	    *inlen = 0;
    #ifdef DEBUG_ENCODING
                xmlGenericError(xmlGenericErrorContext,
    		    "Added FEFF Byte Order Mark\n");
    #endif
    	    return(2);
    	}
    	*outlen = 0;
    	*inlen = 0;
    	return(0);
        }
        outend = out + (*outlen / 2);
        while (in < inend) {
          d= *in++;
          if      (d < 0x80)  { c= d; trailing= 0; }
          else if (d < 0xC0)  {
              /* trailing byte in leading position */
    	  *outlen = out - outstart;
    	  *inlen = processed - in;
    	  return(-2);
          } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
          else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
          else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
          else {
              /* no chance for this in UTF-16 */
    	  *outlen = out - outstart;
    	  *inlen = processed - in;
    	  return(-2);
          }
    
          if (inend - in < trailing) {
              break;
          } 
    
          for ( ; trailing; trailing--) {
              if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
              c <<= 6;
              c |= d & 0x3F;
          }
    
          /* assertion: c is a single UTF-4 value */
            if (c < 0x10000) {
                if (out >= outend)  break;
    	    if (xmlLittleEndian) {
    		tmp = (unsigned char *) out;
    		*tmp = c >> 8;
    		*(tmp + 1) = c;
    		out++;
    	    } else {
    		*out++ = c;
    	    }
            }
            else if (c < 0x110000) {
                if (out+1 >= outend)  break;
                c -= 0x10000;
    	    if (xmlLittleEndian) {
    		tmp1 = 0xD800 | (c >> 10);
    		tmp = (unsigned char *) out;
    		*tmp = tmp1 >> 8;
    		*(tmp + 1) = (unsigned char) tmp1;
    		out++;
    
    		tmp2 = 0xDC00 | (c & 0x03FF);
    		tmp = (unsigned char *) out;
    		*tmp = tmp2 >> 8;
    		*(tmp + 1) = (unsigned char) tmp2;
    		out++;
    	    } else {
    		*out++ = 0xD800 | (c >> 10);
    		*out++ = 0xDC00 | (c & 0x03FF);
    	    }
            }
            else
    	    break;
    	processed = in;
        }
        *outlen = (out - outstart) * 2;
        *inlen = processed - in;
        return(0);
    }
    
    /************************************************************************
     *									*
     *		Generic encoding handling routines			*
     *									*
     ************************************************************************/
    
    /**
     * xmlDetectCharEncoding:
     * @in:  a pointer to the first bytes of the XML entity, must be at least
     *       4 bytes long.
     * @len:  pointer to the length of the buffer
     *
     * Guess the encoding of the entity using the first bytes of the entity content
     * accordingly of the non-normative appendix F of the XML-1.0 recommendation.
     * 
     * Returns one of the XML_CHAR_ENCODING_... values.
     */
    xmlCharEncoding
    xmlDetectCharEncoding(const unsigned char* in, int len)
    {
        if (len >= 4) {
    	if ((in[0] == 0x00) && (in[1] == 0x00) &&
    	    (in[2] == 0x00) && (in[3] == 0x3C))
    	    return(XML_CHAR_ENCODING_UCS4BE);
    	if ((in[0] == 0x3C) && (in[1] == 0x00) &&
    	    (in[2] == 0x00) && (in[3] == 0x00))
    	    return(XML_CHAR_ENCODING_UCS4LE);
    	if ((in[0] == 0x00) && (in[1] == 0x00) &&
    	    (in[2] == 0x3C) && (in[3] == 0x00))
    	    return(XML_CHAR_ENCODING_UCS4_2143);
    	if ((in[0] == 0x00) && (in[1] == 0x3C) &&
    	    (in[2] == 0x00) && (in[3] == 0x00))
    	    return(XML_CHAR_ENCODING_UCS4_3412);
    	if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
    	    (in[2] == 0xA7) && (in[3] == 0x94))
    	    return(XML_CHAR_ENCODING_EBCDIC);
    	if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
    	    (in[2] == 0x78) && (in[3] == 0x6D))
    	    return(XML_CHAR_ENCODING_UTF8);
        }
        if (len >= 3) {
    	/*
    	 * Errata on XML-1.0 June 20 2001
    	 * We now allow an UTF8 encoded BOM
    	 */
    	if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
    	    (in[2] == 0xBF))
    	    return(XML_CHAR_ENCODING_UTF8);
        }
        if (len >= 2) {
    	if ((in[0] == 0xFE) && (in[1] == 0xFF))
    	    return(XML_CHAR_ENCODING_UTF16BE);
    	if ((in[0] == 0xFF) && (in[1] == 0xFE))
    	    return(XML_CHAR_ENCODING_UTF16LE);
        }
        return(XML_CHAR_ENCODING_NONE);
    }
    
    /**
     * xmlCleanupEncodingAliases:
     *
     * Unregisters all aliases
     */
    void
    xmlCleanupEncodingAliases(void) {
        int i;
    
        if (xmlCharEncodingAliases == NULL)
    	return;
    
        for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
    	if (xmlCharEncodingAliases[i].name != NULL)
    	    xmlFree((char *) xmlCharEncodingAliases[i].name);
    	if (xmlCharEncodingAliases[i].alias != NULL)
    	    xmlFree((char *) xmlCharEncodingAliases[i].alias);
        }
        xmlCharEncodingAliasesNb = 0;
        xmlCharEncodingAliasesMax = 0;
        xmlFree(xmlCharEncodingAliases);
    }
    
    /**
     * xmlGetEncodingAlias:
     * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
     *
     * Lookup an encoding name for the given alias.
     * 
     * Returns NULL if not found the original name otherwise
     */
    const char *
    xmlGetEncodingAlias(const char *alias) {
        int i;
        char upper[100];
    
        if (alias == NULL)
    	return(NULL);
    
        if (xmlCharEncodingAliases == NULL)
    	return(NULL);
    
        for (i = 0;i < 99;i++) {
            upper[i] = toupper(alias[i]);
    	if (upper[i] == 0) break;
        }
        upper[i] = 0;
    
        /*
         * Walk down the list looking for a definition of the alias
         */
        for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
    	if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
    	    return(xmlCharEncodingAliases[i].name);
    	}
        }
        return(NULL);
    }
    
    /**
     * xmlAddEncodingAlias:
     * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
     * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
     *
     * Registers and alias @alias for an encoding named @name. Existing alias
     * will be overwritten.
     * 
     * Returns 0 in case of success, -1 in case of error
     */
    int
    xmlAddEncodingAlias(const char *name, const char *alias) {
        int i;
        char upper[100];
    
        if ((name == NULL) || (alias == NULL))
    	return(-1);
    
        for (i = 0;i < 99;i++) {
            upper[i] = toupper(alias[i]);
    	if (upper[i] == 0) break;
        }
        upper[i] = 0;
    
        if (xmlCharEncodingAliases == NULL) {
    	xmlCharEncodingAliasesNb = 0;
    	xmlCharEncodingAliasesMax = 20;
    	xmlCharEncodingAliases = (xmlCharEncodingAliasPtr) 
    	      xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
    	if (xmlCharEncodingAliases == NULL)
    	    return(-1);
        } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
    	xmlCharEncodingAliasesMax *= 2;
    	xmlCharEncodingAliases = (xmlCharEncodingAliasPtr) 
    	      xmlRealloc(xmlCharEncodingAliases,
    		         xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
        }
        /*
         * Walk down the list looking for a definition of the alias
         */
        for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
    	if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
    	    /*
    	     * Replace the definition.
    	     */
    	    xmlFree((char *) xmlCharEncodingAliases[i].name);
    	    xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
    	    return(0);
    	}
        }
        /*
         * Add the definition
         */
        xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
        xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
        xmlCharEncodingAliasesNb++;
        return(0);
    }
    
    /**
     * xmlDelEncodingAlias:
     * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
     *
     * Unregisters an encoding alias @alias
     * 
     * Returns 0 in case of success, -1 in case of error
     */
    int
    xmlDelEncodingAlias(const char *alias) {
        int i;
    
        if (alias == NULL)
    	return(-1);
    
        if (xmlCharEncodingAliases == NULL)
    	return(-1);
        /*
         * Walk down the list looking for a definition of the alias
         */
        for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
    	if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
    	    xmlFree((char *) xmlCharEncodingAliases[i].name);
    	    xmlFree((char *) xmlCharEncodingAliases[i].alias);
    	    xmlCharEncodingAliasesNb--;
    	    memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
    		    sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
    	    return(0);
    	}
        }
        return(-1);
    }
    
    /**
     * xmlParseCharEncoding:
     * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
     *
     * Conpare the string to the known encoding schemes already known. Note
     * that the comparison is case insensitive accordingly to the section
     * [XML] 4.3.3 Character Encoding in Entities.
     * 
     * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
     * if not recognized.
     */
    xmlCharEncoding
    xmlParseCharEncoding(const char* name)
    {
        const char *alias;
        char upper[500];
        int i;
    
        if (name == NULL)
    	return(XML_CHAR_ENCODING_NONE);
    
        /*
         * Do the alias resolution
         */
        alias = xmlGetEncodingAlias(name);
        if (alias != NULL)
    	name = alias;
    
        for (i = 0;i < 499;i++) {
            upper[i] = toupper(name[i]);
    	if (upper[i] == 0) break;
        }
        upper[i] = 0;
    
        if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
        if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
        if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
    
        /*
         * NOTE: if we were able to parse this, the endianness of UTF16 is
         *       already found and in use
         */
        if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
        if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
        
        if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
        if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
        if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
    
        /*
         * NOTE: if we were able to parse this, the endianness of UCS4 is
         *       already found and in use
         */
        if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
        if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
        if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
    
        
        if (!strcmp(upper,  "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
        if (!strcmp(upper,  "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
        if (!strcmp(upper,  "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
    
        if (!strcmp(upper,  "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
        if (!strcmp(upper,  "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
        if (!strcmp(upper,  "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
    
        if (!strcmp(upper,  "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
        if (!strcmp(upper,  "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
        if (!strcmp(upper,  "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
        if (!strcmp(upper,  "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
        if (!strcmp(upper,  "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
        if (!strcmp(upper,  "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
        if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
    
        if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
        if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
        if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
    
    #ifdef DEBUG_ENCODING
        xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
    #endif
        return(XML_CHAR_ENCODING_ERROR);
    }
    
    /**
     * xmlGetCharEncodingName:
     * @enc:  the encoding
     *
     * The "canonical" name for XML encoding.
     * C.f. http://www.w3.org/TR/REC-xml#charencoding
     * Section 4.3.3  Character Encoding in Entities
     *
     * Returns the canonical name for the given encoding
     */
    
    const char*
    xmlGetCharEncodingName(xmlCharEncoding enc) {
        switch (enc) {
            case XML_CHAR_ENCODING_ERROR:
    	    return(NULL);
            case XML_CHAR_ENCODING_NONE:
    	    return(NULL);
            case XML_CHAR_ENCODING_UTF8:
    	    return("UTF-8");
            case XML_CHAR_ENCODING_UTF16LE:
    	    return("UTF-16");
            case XML_CHAR_ENCODING_UTF16BE:
    	    return("UTF-16");
            case XML_CHAR_ENCODING_EBCDIC:
                return("EBCDIC");
            case XML_CHAR_ENCODING_UCS4LE:
                return("ISO-10646-UCS-4");
            case XML_CHAR_ENCODING_UCS4BE:
                return("ISO-10646-UCS-4");
            case XML_CHAR_ENCODING_UCS4_2143:
                return("ISO-10646-UCS-4");
            case XML_CHAR_ENCODING_UCS4_3412:
                return("ISO-10646-UCS-4");
            case XML_CHAR_ENCODING_UCS2:
                return("ISO-10646-UCS-2");
            case XML_CHAR_ENCODING_8859_1:
    	    return("ISO-8859-1");
            case XML_CHAR_ENCODING_8859_2:
    	    return("ISO-8859-2");
            case XML_CHAR_ENCODING_8859_3:
    	    return("ISO-8859-3");
            case XML_CHAR_ENCODING_8859_4:
    	    return("ISO-8859-4");
            case XML_CHAR_ENCODING_8859_5:
    	    return("ISO-8859-5");
            case XML_CHAR_ENCODING_8859_6:
    	    return("ISO-8859-6");
            case XML_CHAR_ENCODING_8859_7:
    	    return("ISO-8859-7");
            case XML_CHAR_ENCODING_8859_8:
    	    return("ISO-8859-8");
            case XML_CHAR_ENCODING_8859_9:
    	    return("ISO-8859-9");
            case XML_CHAR_ENCODING_2022_JP:
                return("ISO-2022-JP");
            case XML_CHAR_ENCODING_SHIFT_JIS:
                return("Shift-JIS");
            case XML_CHAR_ENCODING_EUC_JP:
                return("EUC-JP");
    	case XML_CHAR_ENCODING_ASCII:
    	    return(NULL);
        }
        return(NULL);
    }
    
    /************************************************************************
     *									*
     *			Char encoding handlers				*
     *									*
     ************************************************************************/
    
    
    /* the size should be growable, but it's not a big deal ... */
    #define MAX_ENCODING_HANDLERS 50
    static xmlCharEncodingHandlerPtr *handlers = NULL;
    static int nbCharEncodingHandler = 0;
    
    /*
     * The default is UTF-8 for XML, that's also the default used for the
     * parser internals, so the default encoding handler is NULL
     */
    
    static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
    
    /**
     * xmlNewCharEncodingHandler:
     * @name:  the encoding name, in UTF-8 format (ASCII actually)
     * @input:  the xmlCharEncodingInputFunc to read that encoding
     * @output:  the xmlCharEncodingOutputFunc to write that encoding
     *
     * Create and registers an xmlCharEncodingHandler.
     * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
     */
    static xmlCharEncodingHandlerPtr
    xmlNewCharEncodingHandler(const char *name, 
                              xmlCharEncodingInputFunc input,
                              xmlCharEncodingOutputFunc output) {
        xmlCharEncodingHandlerPtr handler;
        const char *alias;
        char upper[500];
        int i;
        char *up = 0;
    
        /*
         * Do the alias resolution
         */
        alias = xmlGetEncodingAlias(name);
        if (alias != NULL)
    	name = alias;
    
        /*
         * Keep only the uppercase version of the encoding.
         */
        if (name == NULL) {
            xmlGenericError(xmlGenericErrorContext,
    		"xmlNewCharEncodingHandler : no name !\n");
    	return(NULL);
        }
        for (i = 0;i < 499;i++) {
            upper[i] = toupper(name[i]);
    	if (upper[i] == 0) break;
        }
        upper[i] = 0;
        up = xmlMemStrdup(upper);
        if (up == NULL) {
            xmlGenericError(xmlGenericErrorContext,
    		"xmlNewCharEncodingHandler : out of memory !\n");
    	return(NULL);
        }
    
        /*
         * allocate and fill-up an handler block.
         */
        handler = (xmlCharEncodingHandlerPtr)
                  xmlMalloc(sizeof(xmlCharEncodingHandler));
        if (handler == NULL) {
            xmlGenericError(xmlGenericErrorContext,
    		"xmlNewCharEncodingHandler : out of memory !\n");
    	return(NULL);
        }
        handler->input = input;
        handler->output = output;
        handler->name = up;
    
    #ifdef LIBXML_ICONV_ENABLED
        handler->iconv_in = NULL;
        handler->iconv_out = NULL;
    #endif /* LIBXML_ICONV_ENABLED */
    
        /*
         * registers and returns the handler.
         */
        xmlRegisterCharEncodingHandler(handler);
    #ifdef DEBUG_ENCODING
        xmlGenericError(xmlGenericErrorContext,
    	    "Registered encoding handler for %s\n", name);
    #endif
        return(handler);
    }
    
    /**
     * xmlInitCharEncodingHandlers:
     *
     * Initialize the char encoding support, it registers the default
     * encoding supported.
     * NOTE: while public, this function usually doesn't need to be called
     *       in normal processing.
     */
    void
    xmlInitCharEncodingHandlers(void) {
        unsigned short int tst = 0x1234;
        unsigned char *ptr = (unsigned char *) &tst; 
    
        if (handlers != NULL) return;
    
        handlers = (xmlCharEncodingHandlerPtr *)
            xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
    
        if (*ptr == 0x12) xmlLittleEndian = 0;
        else if (*ptr == 0x34) xmlLittleEndian = 1;
        else xmlGenericError(xmlGenericErrorContext,
    	    "Odd problem at endianness detection\n");
    
        if (handlers == NULL) {
            xmlGenericError(xmlGenericErrorContext,
    		"xmlInitCharEncodingHandlers : out of memory !\n");
    	return;
        }
        xmlNewCharEncodingHandler("UTF-8", NULL, NULL);
        xmlUTF16LEHandler = 
              xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
        xmlUTF16BEHandler = 
              xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
        xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
        xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
        xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
    #ifdef LIBXML_HTML_ENABLED
        xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
    #endif
    }
    
    /**
     * xmlCleanupCharEncodingHandlers:
     *
     * Cleanup the memory allocated for the char encoding support, it
     * unregisters all the encoding handlers and the aliases.
     */
    void
    xmlCleanupCharEncodingHandlers(void) {
        xmlCleanupEncodingAliases();
    
        if (handlers == NULL) return;
    
        for (;nbCharEncodingHandler > 0;) {
            nbCharEncodingHandler--;
    	if (handlers[nbCharEncodingHandler] != NULL) {
    	    if (handlers[nbCharEncodingHandler]->name != NULL)
    		xmlFree(handlers[nbCharEncodingHandler]->name);
    	    xmlFree(handlers[nbCharEncodingHandler]);
    	}
        }
        xmlFree(handlers);
        handlers = NULL;
        nbCharEncodingHandler = 0;
        xmlDefaultCharEncodingHandler = NULL;
    }
    
    /**
     * xmlRegisterCharEncodingHandler:
     * @handler:  the xmlCharEncodingHandlerPtr handler block
     *
     * Register the char encoding handler, surprizing, isn't it ?
     */
    void
    xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
        if (handlers == NULL) xmlInitCharEncodingHandlers();
        if (handler == NULL) {
            xmlGenericError(xmlGenericErrorContext,
    		"xmlRegisterCharEncodingHandler: NULL handler !\n");
    	return;
        }
    
        if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
            xmlGenericError(xmlGenericErrorContext, 
    	"xmlRegisterCharEncodingHandler: Too many handler registered\n");
            xmlGenericError(xmlGenericErrorContext,
    		"\tincrease MAX_ENCODING_HANDLERS : %s\n", __FILE__);
    	return;
        }
        handlers[nbCharEncodingHandler++] = handler;
    }
    
    /**
     * xmlGetCharEncodingHandler:
     * @enc:  an xmlCharEncoding value.
     *
     * Search in the registrered set the handler able to read/write that encoding.
     *
     * Returns the handler or NULL if not found
     */
    xmlCharEncodingHandlerPtr
    xmlGetCharEncodingHandler(xmlCharEncoding enc) {
        xmlCharEncodingHandlerPtr handler;
    
        if (handlers == NULL) xmlInitCharEncodingHandlers();
        switch (enc) {
            case XML_CHAR_ENCODING_ERROR:
    	    return(NULL);
            case XML_CHAR_ENCODING_NONE:
    	    return(NULL);
            case XML_CHAR_ENCODING_UTF8:
    	    return(NULL);
            case XML_CHAR_ENCODING_UTF16LE:
    	    return(xmlUTF16LEHandler);
            case XML_CHAR_ENCODING_UTF16BE:
    	    return(xmlUTF16BEHandler);
            case XML_CHAR_ENCODING_EBCDIC:
                handler = xmlFindCharEncodingHandler("EBCDIC");
                if (handler != NULL) return(handler);
                handler = xmlFindCharEncodingHandler("ebcdic");
                if (handler != NULL) return(handler);
    	    break;
            case XML_CHAR_ENCODING_UCS4BE:
                handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
                if (handler != NULL) return(handler);
                handler = xmlFindCharEncodingHandler("UCS-4");
                if (handler != NULL) return(handler);
                handler = xmlFindCharEncodingHandler("UCS4");
                if (handler != NULL) return(handler);
    	    break;
            case XML_CHAR_ENCODING_UCS4LE:
                handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
                if (handler != NULL) return(handler);
                handler = xmlFindCharEncodingHandler("UCS-4");
                if (handler != NULL) return(handler);
                handler = xmlFindCharEncodingHandler("UCS4");
                if (handler != NULL) return(handler);
    	    break;
            case XML_CHAR_ENCODING_UCS4_2143:
    	    break;
            case XML_CHAR_ENCODING_UCS4_3412:
    	    break;
            case XML_CHAR_ENCODING_UCS2:
                handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
                if (handler != NULL) return(handler);
                handler = xmlFindCharEncodingHandler("UCS-2");
                if (handler != NULL) return(handler);
                handler = xmlFindCharEncodingHandler("UCS2");
                if (handler != NULL) return(handler);
    	    break;
    
    	    /*
    	     * We used to keep ISO Latin encodings native in the
    	     * generated data. This led to so many problems that
    	     * this has been removed. One can still change this
    	     * back by registering no-ops encoders for those
    	     */
            case XML_CHAR_ENCODING_8859_1:
    	    handler = xmlFindCharEncodingHandler("ISO-8859-1");
    	    if (handler != NULL) return(handler);
    	    break;
            case XML_CHAR_ENCODING_8859_2:
    	    handler = xmlFindCharEncodingHandler("ISO-8859-2");
    	    if (handler != NULL) return(handler);
    	    break;
            case XML_CHAR_ENCODING_8859_3:
    	    handler = xmlFindCharEncodingHandler("ISO-8859-3");
    	    if (handler != NULL) return(handler);
    	    break;
            case XML_CHAR_ENCODING_8859_4:
    	    handler = xmlFindCharEncodingHandler("ISO-8859-4");
    	    if (handler != NULL) return(handler);
    	    break;
            case XML_CHAR_ENCODING_8859_5:
    	    handler = xmlFindCharEncodingHandler("ISO-8859-5");
    	    if (handler != NULL) return(handler);
    	    break;
            case XML_CHAR_ENCODING_8859_6:
    	    handler = xmlFindCharEncodingHandler("ISO-8859-6");
    	    if (handler != NULL) return(handler);
    	    break;
            case XML_CHAR_ENCODING_8859_7:
    	    handler = xmlFindCharEncodingHandler("ISO-8859-7");
    	    if (handler != NULL) return(handler);
    	    break;
            case XML_CHAR_ENCODING_8859_8:
    	    handler = xmlFindCharEncodingHandler("ISO-8859-8");
    	    if (handler != NULL) return(handler);
    	    break;
            case XML_CHAR_ENCODING_8859_9:
    	    handler = xmlFindCharEncodingHandler("ISO-8859-9");
    	    if (handler != NULL) return(handler);
    	    break;
    
    
            case XML_CHAR_ENCODING_2022_JP:
                handler = xmlFindCharEncodingHandler("ISO-2022-JP");
                if (handler != NULL) return(handler);
    	    break;
            case XML_CHAR_ENCODING_SHIFT_JIS:
                handler = xmlFindCharEncodingHandler("SHIFT-JIS");
                if (handler != NULL) return(handler);
                handler = xmlFindCharEncodingHandler("SHIFT_JIS");
                if (handler != NULL) return(handler);
                handler = xmlFindCharEncodingHandler("Shift_JIS");
                if (handler != NULL) return(handler);
    	    break;
            case XML_CHAR_ENCODING_EUC_JP:
                handler = xmlFindCharEncodingHandler("EUC-JP");
                if (handler != NULL) return(handler);
    	    break;
    	default: 
    	    break;
        }
        
    #ifdef DEBUG_ENCODING
        xmlGenericError(xmlGenericErrorContext,
    	    "No handler found for encoding %d\n", enc);
    #endif
        return(NULL);
    }
    
    /**
     * xmlFindCharEncodingHandler:
     * @name:  a string describing the char encoding.
     *
     * Search in the registrered set the handler able to read/write that encoding.
     *
     * Returns the handler or NULL if not found
     */
    xmlCharEncodingHandlerPtr
    xmlFindCharEncodingHandler(const char *name) {
        const char *nalias;
        const char *norig;
        xmlCharEncoding alias;
    #ifdef LIBXML_ICONV_ENABLED
        xmlCharEncodingHandlerPtr enc;
        iconv_t icv_in, icv_out;
    #endif /* LIBXML_ICONV_ENABLED */
        char upper[100];
        int i;
    
        if (handlers == NULL) xmlInitCharEncodingHandlers();
        if (name == NULL) return(xmlDefaultCharEncodingHandler);
        if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
    
        /*
         * Do the alias resolution
         */
        norig = name;
        nalias = xmlGetEncodingAlias(name);
        if (nalias != NULL)
    	name = nalias;
    
        /*
         * Check first for directly registered encoding names
         */
        for (i = 0;i < 99;i++) {
            upper[i] = toupper(name[i]);
    	if (upper[i] == 0) break;
        }
        upper[i] = 0;
    
        for (i = 0;i < nbCharEncodingHandler; i++)
            if (!strcmp(upper, handlers[i]->name)) {
    #ifdef DEBUG_ENCODING
                xmlGenericError(xmlGenericErrorContext,
    		    "Found registered handler for encoding %s\n", name);
    #endif
    	    return(handlers[i]);
    	}
    
    #ifdef LIBXML_ICONV_ENABLED
        /* check whether iconv can handle this */
        icv_in = iconv_open("UTF-8", name);
        icv_out = iconv_open(name, "UTF-8");
        if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
    	    enc = (xmlCharEncodingHandlerPtr)
    	          xmlMalloc(sizeof(xmlCharEncodingHandler));
    	    if (enc == NULL) {
    	        iconv_close(icv_in);
    	        iconv_close(icv_out);
    		return(NULL);
    	    }
    	    enc->name = xmlMemStrdup(name);
    	    enc->input = NULL;
    	    enc->output = NULL;
    	    enc->iconv_in = icv_in;
    	    enc->iconv_out = icv_out;
    #ifdef DEBUG_ENCODING
                xmlGenericError(xmlGenericErrorContext,
    		    "Found iconv handler for encoding %s\n", name);
    #endif
    	    return enc;
        } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
    	    xmlGenericError(xmlGenericErrorContext,
    		    "iconv : problems with filters for '%s'\n", name);
        }
    #endif /* LIBXML_ICONV_ENABLED */
    
    #ifdef DEBUG_ENCODING
        xmlGenericError(xmlGenericErrorContext,
    	    "No handler found for encoding %s\n", name);
    #endif
    
        /*
         * Fallback using the canonical names
         */
        alias = xmlParseCharEncoding(norig);
        if (alias != XML_CHAR_ENCODING_ERROR) {
            const char* canon;
            canon = xmlGetCharEncodingName(alias);
            if ((canon != NULL) && (strcmp(name, canon))) {
    	    return(xmlFindCharEncodingHandler(canon));
            }
        }
    
        return(NULL);
    }
    
    /************************************************************************
     *									*
     *		ICONV based generic conversion functions		*
     *									*
     ************************************************************************/
    
    #ifdef LIBXML_ICONV_ENABLED
    /**
     * xmlIconvWrapper:
     * @cd:		iconv converter data structure
     * @out:  a pointer to an array of bytes to store the result
     * @outlen:  the length of @out
     * @in:  a pointer to an array of ISO Latin 1 chars
     * @inlen:  the length of @in
     *
     * Returns 0 if success, or 
     *     -1 by lack of space, or
     *     -2 if the transcoding fails (for *in is not valid utf8 string or
     *        the result of transformation can't fit into the encoding we want), or
     *     -3 if there the last byte can't form a single output char.
     *     
     * The value of @inlen after return is the number of octets consumed
     *     as the return value is positive, else unpredictiable.
     * The value of @outlen after return is the number of ocetes consumed.
     */
    static int
    xmlIconvWrapper(iconv_t cd,
        unsigned char *out, int *outlen,
        const unsigned char *in, int *inlen) {
    
        size_t icv_inlen = *inlen, icv_outlen = *outlen;
        const char *icv_in = (const char *) in;
        char *icv_out = (char *) out;
        int ret;
    
        ret = iconv(cd, (char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
        if (in != NULL) {
            *inlen -= icv_inlen;
            *outlen -= icv_outlen;
        } else {
            *inlen = 0;
            *outlen = 0;
        }
        if ((icv_inlen != 0) || (ret == -1)) {
    #ifdef EILSEQ
            if (errno == EILSEQ) {
                return -2;
            } else
    #endif
    #ifdef E2BIG
            if (errno == E2BIG) {
                return -1;
            } else
    #endif
    #ifdef EINVAL
            if (errno == EINVAL) {
                return -3;
            } else
    #endif
            {
                return -3;
            }
        }
        return 0;
    }
    #endif /* LIBXML_ICONV_ENABLED */
    
    /************************************************************************
     *									*
     *		The real API used by libxml for on-the-fly conversion	*
     *									*
     ************************************************************************/
    
    /**
     * xmlCharEncFirstLine:
     * @handler:	char enconding transformation data structure
     * @out:  an xmlBuffer for the output.
     * @in:  an xmlBuffer for the input
     *     
     * Front-end for the encoding handler input function, but handle only
     * the very first line, i.e. limit itself to 45 chars.
     *     
     * Returns the number of byte written if success, or 
     *     -1 general error
     *     -2 if the transcoding fails (for *in is not valid utf8 string or
     *        the result of transformation can't fit into the encoding we want), or
     */
    int
    xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
                     xmlBufferPtr in) {
        int ret = -2;
        int written;
        int toconv;
    
        if (handler == NULL) return(-1);
        if (out == NULL) return(-1);
        if (in == NULL) return(-1);
    
        written = out->size - out->use;
        toconv = in->use;
        if (toconv * 2 >= written) {
            xmlBufferGrow(out, toconv);
    	written = out->size - out->use - 1;
        }
    
        /*
         * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
         * 45 chars should be sufficient to reach the end of the encoding
         * decalration without going too far inside the document content.
         */
        written = 45;
    
        if (handler->input != NULL) {
    	ret = handler->input(&out->content[out->use], &written,
    	                     in->content, &toconv);
    	xmlBufferShrink(in, toconv);
    	out->use += written;
    	out->content[out->use] = 0;
        }
    #ifdef LIBXML_ICONV_ENABLED
        else if (handler->iconv_in != NULL) {
    	ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
    	                      &written, in->content, &toconv);
    	xmlBufferShrink(in, toconv);
    	out->use += written;
    	out->content[out->use] = 0;
    	if (ret == -1) ret = -3;
        }
    #endif /* LIBXML_ICONV_ENABLED */
    #ifdef DEBUG_ENCODING
        switch (ret) {
            case 0:
    	    xmlGenericError(xmlGenericErrorContext,
    		    "converted %d bytes to %d bytes of input\n",
    	            toconv, written);
    	    break;
            case -1:
    	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
    	            toconv, written, in->use);
    	    break;
            case -2:
    	    xmlGenericError(xmlGenericErrorContext,
    		    "input conversion failed due to input error\n");
    	    break;
            case -3:
    	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
    	            toconv, written, in->use);
    	    break;
    	default:
    	    xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
        }
    #endif /* DEBUG_ENCODING */
        /*
         * Ignore when input buffer is not on a boundary
         */
        if (ret == -3) ret = 0;
        if (ret == -1) ret = 0;
        return(ret);
    }
    
    /**
     * xmlCharEncInFunc:
     * @handler:	char enconding transformation data structure
     * @out:  an xmlBuffer for the output.
     * @in:  an xmlBuffer for the input
     *     
     * Generic front-end for the encoding handler input function
     *     
     * Returns the number of byte written if success, or 
     *     -1 general error
     *     -2 if the transcoding fails (for *in is not valid utf8 string or
     *        the result of transformation can't fit into the encoding we want), or
     */
    int
    xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
                     xmlBufferPtr in)
    {
        int ret = -2;
        int written;
        int toconv;
    
        if (handler == NULL)
            return (-1);
        if (out == NULL)
            return (-1);
        if (in == NULL)
            return (-1);
    
        toconv = in->use;
        if (toconv == 0)
            return (0);
        written = out->size - out->use;
        if (toconv * 2 >= written) {
            xmlBufferGrow(out, out->size + toconv * 2);
            written = out->size - out->use - 1;
        }
        if (handler->input != NULL) {
            ret = handler->input(&out->content[out->use], &written,
                                 in->content, &toconv);
            xmlBufferShrink(in, toconv);
            out->use += written;
            out->content[out->use] = 0;
        }
    #ifdef LIBXML_ICONV_ENABLED
        else if (handler->iconv_in != NULL) {
            ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
                                  &written, in->content, &toconv);
            xmlBufferShrink(in, toconv);
            out->use += written;
            out->content[out->use] = 0;
            if (ret == -1)
                ret = -3;
        }
    #endif /* LIBXML_ICONV_ENABLED */
        switch (ret) {
            case 0:
    #ifdef DEBUG_ENCODING
                xmlGenericError(xmlGenericErrorContext,
                                "converted %d bytes to %d bytes of input\n",
                                toconv, written);
    #endif
                break;
            case -1:
    #ifdef DEBUG_ENCODING
                xmlGenericError(xmlGenericErrorContext,
                             "converted %d bytes to %d bytes of input, %d left\n",
                                toconv, written, in->use);
    #endif
                break;
            case -3:
    #ifdef DEBUG_ENCODING
                xmlGenericError(xmlGenericErrorContext,
                            "converted %d bytes to %d bytes of input, %d left\n",
                                toconv, written, in->use);
    #endif
                break;
            case -2:
                xmlGenericError(xmlGenericErrorContext,
                                "input conversion failed due to input error\n");
                xmlGenericError(xmlGenericErrorContext,
                                "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
                                in->content[0], in->content[1],
                                in->content[2], in->content[3]);
        }
        /*
         * Ignore when input buffer is not on a boundary
         */
        if (ret == -3)
            ret = 0;
        return (ret);
    }
    
    /**
     * xmlCharEncOutFunc:
     * @handler:	char enconding transformation data structure
     * @out:  an xmlBuffer for the output.
     * @in:  an xmlBuffer for the input
     *     
     * Generic front-end for the encoding handler output function
     * a first call with @in == NULL has to be made firs to initiate the 
     * output in case of non-stateless encoding needing to initiate their
     * state or the output (like the BOM in UTF16).
     * In case of UTF8 sequence conversion errors for the given encoder,
     * the content will be automatically remapped to a CharRef sequence.
     *     
     * Returns the number of byte written if success, or 
     *     -1 general error
     *     -2 if the transcoding fails (for *in is not valid utf8 string or
     *        the result of transformation can't fit into the encoding we want), or
     */
    int
    xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
                      xmlBufferPtr in) {
        int ret = -2;
        int written;
        int writtentot = 0;
        int toconv;
        int output = 0;
    
        if (handler == NULL) return(-1);
        if (out == NULL) return(-1);
    
    retry:
        
        written = out->size - out->use;
    
        /*
         * First specific handling of in = NULL, i.e. the initialization call
         */
        if (in == NULL) {
            toconv = 0;
    	if (handler->output != NULL) {
    	    ret = handler->output(&out->content[out->use], &written,
    				  NULL, &toconv);
    	    out->use += written;
    	    out->content[out->use] = 0;
    	}
    #ifdef LIBXML_ICONV_ENABLED
    	else if (handler->iconv_out != NULL) {
    	    ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
    				  &written, NULL, &toconv);
    	    out->use += written;
    	    out->content[out->use] = 0;
    	}
    #endif /* LIBXML_ICONV_ENABLED */
    #ifdef DEBUG_ENCODING
    	xmlGenericError(xmlGenericErrorContext,
    		"initialized encoder\n");
    #endif
            return(0);
        }
    
        /*
         * Convertion itself.
         */
        toconv = in->use;
        if (toconv == 0)
    	return(0);
        if (toconv * 2 >= written) {
            xmlBufferGrow(out, toconv * 2);
    	written = out->size - out->use - 1;
        }
        if (handler->output != NULL) {
    	ret = handler->output(&out->content[out->use], &written,
    	                      in->content, &toconv);
    	xmlBufferShrink(in, toconv);
    	out->use += written;
    	writtentot += written;
    	out->content[out->use] = 0;
        }
    #ifdef LIBXML_ICONV_ENABLED
        else if (handler->iconv_out != NULL) {
    	ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
    	                      &written, in->content, &toconv);
    	xmlBufferShrink(in, toconv);
    	out->use += written;
    	writtentot += written;
    	out->content[out->use] = 0;
    	if (ret == -1) {
    	    if (written > 0) {
    		/*
    		 * Can be a limitation of iconv
    		 */
    		goto retry;
    	    }
    	    ret = -3;
    	}
        }
    #endif /* LIBXML_ICONV_ENABLED */
        else {
    	xmlGenericError(xmlGenericErrorContext,
    		"xmlCharEncOutFunc: no output function !\n");
    	return(-1);
        }
    
        if (ret >= 0) output += ret;
    
        /*
         * Attempt to handle error cases
         */
        switch (ret) {
            case 0:
    #ifdef DEBUG_ENCODING
    	    xmlGenericError(xmlGenericErrorContext,
    		    "converted %d bytes to %d bytes of output\n",
    	            toconv, written);
    #endif
    	    break;
            case -1:
    #ifdef DEBUG_ENCODING
    	    xmlGenericError(xmlGenericErrorContext,
    		    "output conversion failed by lack of space\n");
    #endif
    	    break;
            case -3:
    	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
    	            toconv, written, in->use);
    	    break;
            case -2: {
    	    int len = in->use;
    	    const xmlChar *utf = (const xmlChar *) in->content;
    	    int cur;
    
    	    cur = xmlGetUTF8Char(utf, &len);
    	    if (cur > 0) {
    		xmlChar charref[20];
    
    #ifdef DEBUG_ENCODING
    		xmlGenericError(xmlGenericErrorContext,
    			"handling output conversion error\n");
    		xmlGenericError(xmlGenericErrorContext,
    			"Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
    			in->content[0], in->content[1],
    			in->content[2], in->content[3]);
    #endif
    		/*
    		 * Removes the UTF8 sequence, and replace it by a charref
    		 * and continue the transcoding phase, hoping the error
    		 * did not mangle the encoder state.
    		 */
    		sprintf((char *) charref, "&#x%X;", cur);
    		xmlBufferShrink(in, len);
    		xmlBufferAddHead(in, charref, -1);
    
    		goto retry;
    	    } else {
    		xmlGenericError(xmlGenericErrorContext,
    			"output conversion failed due to conv error\n");
    		xmlGenericError(xmlGenericErrorContext,
    			"Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
    			in->content[0], in->content[1],
    			in->content[2], in->content[3]);
    		in->content[0] = ' ';
    	    }
    	    break;
    	}
        }
        return(ret);
    }
    
    /**
     * xmlCharEncCloseFunc:
     * @handler:	char enconding transformation data structure
     *     
     * Generic front-end for hencoding handler close function
     *
     * Returns 0 if success, or -1 in case of error
     */
    int
    xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
        int ret = 0;
        if (handler == NULL) return(-1);
        if (handler->name == NULL) return(-1);
    #ifdef LIBXML_ICONV_ENABLED
        /*
         * Iconv handlers can be oused only once, free the whole block.
         * and the associated icon resources.
         */
        if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
    	if (handler->name != NULL)
    	    xmlFree(handler->name);
    	handler->name = NULL;
    	if (handler->iconv_out != NULL) {
    	    if (iconv_close(handler->iconv_out))
    		ret = -1;
    	    handler->iconv_out = NULL;
    	}
    	if (handler->iconv_in != NULL) {
    	    if (iconv_close(handler->iconv_in))
    		ret = -1;
    	    handler->iconv_in = NULL;
    	}
    	xmlFree(handler);
        }
    #endif /* LIBXML_ICONV_ENABLED */
    #ifdef DEBUG_ENCODING
        if (ret)
            xmlGenericError(xmlGenericErrorContext,
    		"failed to close the encoding handler\n");
        else
            xmlGenericError(xmlGenericErrorContext,
    		"closed the encoding handler\n");
    #endif
    
        return(ret);
    }