Edit

thodg/libiconv/lib/gentranslit.c

Branch :

  • Show log

    Commit

  • Author : Bruno Haible
    Date : 2021-06-06 11:51:12
    Hash : 91f96be0
    Message : Change the license of the library from LGPL 2.0 to LGPL 2.1.

  • lib/gentranslit.c
  • /* Copyright (C) 1999-2003, 2005, 2011-2012, 2016, 2018, 2020 Free Software Foundation, Inc.
       This file is part of the GNU LIBICONV Library.
    
       The GNU LIBICONV Library is free software; you can redistribute it
       and/or modify it under the terms of the GNU Lesser General Public
       License as published by the Free Software Foundation; either version 2.1
       of the License, or (at your option) any later version.
    
       The GNU LIBICONV Library is distributed in the hope that it will be
       useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
       Lesser General Public License for more details.
    
       You should have received a copy of the GNU Lesser General Public
       License along with the GNU LIBICONV Library; see the file COPYING.LIB.
       If not, see <https://www.gnu.org/licenses/>.  */
    
    /*
     * Generates a table of small strings, used for transliteration, from a table
     * containing lines of the form
     *   Unicode <tab> utf-8 replacement <tab> # comment
     */
    
    #include <stdio.h>
    #include <stdlib.h>
    #include <stdbool.h>
    
    int main (int argc, char *argv[])
    {
      unsigned int *data;
      int *uni2index;
      int index;
    
      if (argc != 1)
        exit(1);
    
      data = malloc(0x100000 * sizeof(*data));
      uni2index = malloc(0x110000 * sizeof(*uni2index));
      if (data == NULL || uni2index == NULL) {
        fprintf(stderr, "out of memory\n");
        exit(1);
      }
    
      printf("/*\n");
      printf(" * Copyright (C) 1999-2003 Free Software Foundation, Inc.\n");
      printf(" * This file is part of the GNU LIBICONV Library.\n");
      printf(" *\n");
      printf(" * The GNU LIBICONV Library is free software; you can redistribute it\n");
      printf(" * and/or modify it under the terms of the GNU Lesser General Public\n");
      printf(" * License as published by the Free Software Foundation; either version 2\n");
      printf(" * of the License, or (at your option) any later version.\n");
      printf(" *\n");
      printf(" * The GNU LIBICONV Library is distributed in the hope that it will be\n");
      printf(" * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of\n");
      printf(" * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n");
      printf(" * Lesser General Public License for more details.\n");
      printf(" *\n");
      printf(" * You should have received a copy of the GNU Lesser General Public\n");
      printf(" * License along with the GNU LIBICONV Library; see the file COPYING.LIB.\n");
      printf(" * If not, see <https://www.gnu.org/licenses/>.\n");
      printf(" */\n");
      printf("\n");
      printf("/*\n");
      printf(" * Transliteration table\n");
      printf(" */\n");
      printf("\n");
      {
        int c;
        int j;
        for (j = 0; j < 0x110000; j++)
          uni2index[j] = -1;
        index = 0;
        for (;;) {
          c = getc(stdin);
          if (c == EOF)
            break;
          if (c == '#') {
            do { c = getc(stdin); } while (!(c == EOF || c == '\n'));
            continue;
          }
          ungetc(c,stdin);
          if (scanf("%x",&j) != 1)
            exit(1);
          c = getc(stdin);
          if (c != '\t')
            exit(1);
          for (;;) {
            c = getc(stdin);
            if (c == EOF || c == '\n')
              exit(1);
            if (c == '\t')
              break;
            if (uni2index[j] < 0) {
              uni2index[j] = index;
              data[index++] = 0;
            }
            if (c >= 0x80) {
              /* Finish reading an UTF-8 character. */
              if (c < 0xc0)
                exit(1);
              else {
                unsigned int i = (c < 0xe0 ? 2 : c < 0xf0 ? 3 : c < 0xf8 ? 4 : c < 0xfc ? 5 : 6);
                c &= (1 << (8-i)) - 1;
                while (--i > 0) {
                  int cc = getc(stdin);
                  if (!(cc >= 0x80 && cc < 0xc0))
                    exit(1);
                  c <<= 6; c |= (cc & 0x3f);
                }
              }
            }
            data[index++] = (unsigned int) c;
          }
          if (uni2index[j] >= 0)
            data[uni2index[j]] = index - uni2index[j] - 1;
          do { c = getc(stdin); } while (!(c == EOF || c == '\n'));
        }
      }
      printf("static const unsigned int translit_data[%d] = {",index);
      {
        int i;
        for (i = 0; i < index; i++) {
          if (data[i] < 32)
            printf("\n %3d,",data[i]);
          else if (data[i] == '\'')
            printf("'\\'',");
          else if (data[i] == '\\')
            printf("'\\\\',");
          else if (data[i] < 127)
            printf(" '%c',",data[i]);
          else if (data[i] < 256)
            printf("0x%02X,",data[i]);
          else
            printf("0x%04X,",data[i]);
        }
        printf("\n};\n");
      }
      printf("\n");
      {
        int line[0x22000];
        int tableno;
        struct { int minline; int maxline; int usecount; const char* suffix; } tables[0x2000];
        int i, j, p, j1, j2, t;
    
        for (j1 = 0; j1 < 0x22000; j1++) {
          bool all_invalid = true;
          for (j2 = 0; j2 < 8; j2++) {
            j = 8*j1+j2;
            if (uni2index[j] >= 0)
              all_invalid = false;
          }
          if (all_invalid)
            line[j1] = -1;
          else
            line[j1] = 0;
        }
        tableno = 0;
        for (j1 = 0; j1 < 0x22000; j1++) {
          if (line[j1] >= 0) {
            if (tableno > 0
                && ((j1 > 0 && line[j1-1] == tableno-1)
                    || ((tables[tableno-1].maxline >> 5) == (j1 >> 5)
                        && j1 - tables[tableno-1].maxline <= 8))) {
              line[j1] = tableno-1;
              tables[tableno-1].maxline = j1;
            } else {
              tableno++;
              line[j1] = tableno-1;
              tables[tableno-1].minline = tables[tableno-1].maxline = j1;
            }
          }
        }
        for (t = 0; t < tableno; t++) {
          tables[t].usecount = 0;
          j1 = 8*tables[t].minline;
          j2 = 8*(tables[t].maxline+1);
          for (j = j1; j < j2; j++)
            if (uni2index[j] >= 0)
              tables[t].usecount++;
        }
        for (t = 0, p = -1, i = 0; t < tableno; t++) {
          if (tables[t].usecount > 1) {
            char* s;
            if (p == tables[t].minline >> 5) {
              i++;
              /* i is the number of tables with the same (tables[t].minline >> 5)
                 that we have seen so far. Since the tables[t].minline values are
                 strongly monotonically increasing, there are at most 32 of them. */
              if (!(i >= 0 && i <= 32)) abort();
              s = (char*) malloc(4+1+2+1);
              sprintf(s, "%02x_%d", p, i);
            } else {
              p = tables[t].minline >> 5;
              i = 0;
              s = (char*) malloc(4+1);
              sprintf(s, "%02x", p);
            }
            tables[t].suffix = s;
          } else
            tables[t].suffix = NULL;
        }
        {
          p = -1;
          for (t = 0; t < tableno; t++)
            if (tables[t].usecount > 1) {
              p = 0;
              printf("static const short translit_page%s[%d] = {\n", tables[t].suffix, 8*(tables[t].maxline-tables[t].minline+1));
              for (j1 = tables[t].minline; j1 <= tables[t].maxline; j1++) {
                if ((j1 % 0x20) == 0 && j1 > tables[t].minline)
                  printf("  /* 0x%04x */\n", 8*j1);
                printf(" ");
                for (j2 = 0; j2 < 8; j2++) {
                  j = 8*j1+j2;
                  printf(" %4d,", uni2index[j]);
                }
                printf(" /* 0x%02x-0x%02x */\n", 8*(j1 % 0x20), 8*(j1 % 0x20)+7);
              }
              printf("};\n");
            }
          if (p >= 0)
            printf("\n");
        }
        printf("#define translit_index(wc) \\\n  (");
        for (j1 = 0; j1 < 0x22000;) {
          t = line[j1];
          for (j2 = j1; j2 < 0x22000 && line[j2] == t; j2++);
          if (t >= 0) {
            if (j1 != tables[t].minline) abort();
            if (j2 > tables[t].maxline+1) abort();
            j2 = tables[t].maxline+1;
          }
          if (t == -1) {
          } else {
            if (t >= 0 && tables[t].usecount == 0) abort();
            if (t >= 0 && tables[t].usecount == 1) {
              if (j2 != j1+1) abort();
              for (j = 8*j1; j < 8*j2; j++)
                if (uni2index[j] >= 0) {
                  printf("wc == 0x%04x ? %d", j, uni2index[j]);
                  break;
                }
            } else {
              if (j1 == 0) {
                printf("wc < 0x%04x", 8*j2);
              } else {
                printf("wc >= 0x%04x && wc < 0x%04x", 8*j1, 8*j2);
              }
              printf(" ? translit_page%s[wc", tables[t].suffix);
              if (tables[t].minline > 0)
                printf("-0x%04x", 8*j1);
              printf("]");
            }
            printf(" : \\\n   ");
          }
          j1 = j2;
        }
        printf("-1)\n");
      }
    
      if (ferror(stdout) || fclose(stdout))
        exit(1);
      exit(0);
    }