iconv_open1.h

Branch :
Show log
Commit
Author : Bruno Haible
Date : 2024-10-22 12:28:36
Hash : f017cafc
Message : Spelling fixes. From Mike Taves <mwtoews@gmail.com> in <https://lists.gnu.org/archive/html/bug-gnu-libiconv/2024-10/msg00002.html>. * NOTES: Fix grammar. * lib/iconv_open1.h: Fix a typo in comments. * lib/iso2022_jp2.h: Likewise. * lib/iso2022_jpms.h: Fix English spelling in comments.
lib/iconv_open1.h
/*
 * Copyright (C) 1999-2008, 2011, 2018, 2020, 2023-2024 Free Software Foundation, Inc.
 * This file is part of the GNU LIBICONV Library.
 *
 * The GNU LIBICONV Library is free software; you can redistribute it
 * and/or modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either version 2.1
 * of the License, or (at your option) any later version.
 *
 * The GNU LIBICONV Library is distributed in the hope that it will be
 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
 * If not, see <https://www.gnu.org/licenses/>.
 */

/* Part 1 of iconv_open.
   Input: const char* tocode, const char* fromcode.
   Output:
     unsigned int from_index;
     int from_wchar;
     unsigned int from_surface;
     unsigned int to_index;
     int to_wchar;
     unsigned int to_surface;
     int transliterate;
     int discard_ilseq;
   Jumps to 'invalid' in case of error.
 */
{
  char buf[MAX_WORD_LENGTH+9+9+1];
  const char* cp;
  char* bp;
  const struct alias * ap;
  unsigned int count;

  from_surface = ICONV_SURFACE_NONE;
  to_surface = ICONV_SURFACE_NONE;
  transliterate = 0;
  discard_ilseq = 0;

  /* Before calling aliases_lookup, convert the input string to upper case,
   * and check whether it's entirely ASCII (we call gperf with option "-7"
   * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
   * or if it's too long, it is not a valid encoding name.
   */
  for (to_wchar = 0;;) {
    /* Search tocode in the table. */
    for (cp = tocode, bp = buf, count = MAX_WORD_LENGTH+9+9+1; ; cp++, bp++) {
      unsigned char c = (unsigned char) *cp;
      if (c >= 0x80)
        goto invalid;
      if (c >= 'a' && c <= 'z')
        c -= 'a'-'A';
      *bp = c;
      if (c == '\0')
        break;
      if (--count == 0)
        goto invalid;
    }
    for (;;) {
      char *sp = bp;
      int parsed_translit = 0;
      int parsed_ignore = 0;
      if (sp-buf > 9 && memcmp(sp-9,"/TRANSLIT",9)==0) {
        sp = sp - 9;
        parsed_translit = 1;
      } else if (sp-buf > 7 && memcmp(sp-7,"/IGNORE",7)==0) {
        sp = sp - 7;
        parsed_ignore = 1;
      }
      if (sp > buf && memcmp(sp-1,"/",1) == 0) {
        bp = sp - 1;
      } else if (sp-buf >= 9 && memcmp(sp-9,"/ZOS_UNIX",9)==0) {
        bp = sp - 9;
        to_surface = ICONV_SURFACE_EBCDIC_ZOS_UNIX;
      } else
        break;
      *bp = '\0';
      if (parsed_translit)
        transliterate = 1;
      if (parsed_ignore)
        discard_ilseq = 1;
      break;
    }
    if (buf[0] == '\0') {
      tocode = locale_charset();
      /* Avoid an endless loop that could occur when using an older version
         of localcharset.c. */
      if (tocode[0] == '\0')
        goto invalid;
      continue;
    }
    ap = aliases_lookup(buf,bp-buf);
    if (ap == NULL) {
      ap = aliases2_lookup(buf);
      if (ap == NULL)
        goto invalid;
    }
    if (ap->encoding_index == ei_local_char) {
      tocode = locale_charset();
      /* Avoid an endless loop that could occur when using an older version
         of localcharset.c. */
      if (tocode[0] == '\0')
        goto invalid;
      continue;
    }
    if (ap->encoding_index == ei_local_wchar_t) {
      /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
         This is also the case on native Woe32 systems and Cygwin >= 1.7, where
         we know that it is UTF-16.  */
#if (defined _WIN32 && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007)
      if (sizeof(wchar_t) == 4) {
        to_index = ei_ucs4internal;
        break;
      }
      if (sizeof(wchar_t) == 2) {
# if WORDS_LITTLEENDIAN
        to_index = ei_utf16le;
# else
        to_index = ei_utf16be;
# endif
        break;
      }
#elif __STDC_ISO_10646__
      if (sizeof(wchar_t) == 4) {
        to_index = ei_ucs4internal;
        break;
      }
      if (sizeof(wchar_t) == 2) {
        to_index = ei_ucs2internal;
        break;
      }
      if (sizeof(wchar_t) == 1) {
        to_index = ei_iso8859_1;
        break;
      }
#endif
#if HAVE_MBRTOWC
      to_wchar = 1;
      tocode = locale_charset();
      continue;
#endif
      goto invalid;
    }
    to_index = ap->encoding_index;
    break;
  }
  for (from_wchar = 0;;) {
    /* Search fromcode in the table. */
    for (cp = fromcode, bp = buf, count = MAX_WORD_LENGTH+9+9+1; ; cp++, bp++) {
      unsigned char c = (unsigned char) *cp;
      if (c >= 0x80)
        goto invalid;
      if (c >= 'a' && c <= 'z')
        c -= 'a'-'A';
      *bp = c;
      if (c == '\0')
        break;
      if (--count == 0)
        goto invalid;
    }
    for (;;) {
      char *sp = bp;
      int parsed_translit = 0;
      int parsed_ignore = 0;
      if (sp-buf > 9 && memcmp(sp-9,"/TRANSLIT",9)==0) {
        sp = sp - 9;
        parsed_translit = 1;
      } else if (sp-buf > 7 && memcmp(sp-7,"/IGNORE",7)==0) {
        sp = sp - 7;
        parsed_ignore = 1;
      }
      if (sp > buf && memcmp(sp-1,"/",1) == 0) {
        bp = sp - 1;
      } else if (sp-buf >= 9 && memcmp(sp-9,"/ZOS_UNIX",9)==0) {
        bp = sp - 9;
        from_surface = ICONV_SURFACE_EBCDIC_ZOS_UNIX;
      } else
        break;
      *bp = '\0';
      if (parsed_translit)
        transliterate = 1;
      if (parsed_ignore)
        discard_ilseq = 1;
      break;
    }
    if (buf[0] == '\0') {
      fromcode = locale_charset();
      /* Avoid an endless loop that could occur when using an older version
         of localcharset.c. */
      if (fromcode[0] == '\0')
        goto invalid;
      continue;
    }
    ap = aliases_lookup(buf,bp-buf);
    if (ap == NULL) {
      ap = aliases2_lookup(buf);
      if (ap == NULL)
        goto invalid;
    }
    if (ap->encoding_index == ei_local_char) {
      fromcode = locale_charset();
      /* Avoid an endless loop that could occur when using an older version
         of localcharset.c. */
      if (fromcode[0] == '\0')
        goto invalid;
      continue;
    }
    if (ap->encoding_index == ei_local_wchar_t) {
      /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
         This is also the case on native Woe32 systems and Cygwin >= 1.7, where
         we know that it is UTF-16.  */
#if (defined _WIN32 && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007)
      if (sizeof(wchar_t) == 4) {
        from_index = ei_ucs4internal;
        break;
      }
      if (sizeof(wchar_t) == 2) {
# if WORDS_LITTLEENDIAN
        from_index = ei_utf16le;
# else
        from_index = ei_utf16be;
# endif
        break;
      }
#elif __STDC_ISO_10646__
      if (sizeof(wchar_t) == 4) {
        from_index = ei_ucs4internal;
        break;
      }
      if (sizeof(wchar_t) == 2) {
        from_index = ei_ucs2internal;
        break;
      }
      if (sizeof(wchar_t) == 1) {
        from_index = ei_iso8859_1;
        break;
      }
#endif
#if HAVE_WCRTOMB
      from_wchar = 1;
      fromcode = locale_charset();
      continue;
#endif
      goto invalid;
    }
    from_index = ap->encoding_index;
    break;
  }
}
thodg/libiconv/lib/iconv_open1.h

Commit

thodg/libiconv /lib/iconv_open1.h