Edit

thodg/libiconv/lib/gentranslit.c

Branch :

  • Show log

    Commit

  • Author : Ben Noordhuis
    Date : 2016-10-01 19:24:59
    Hash : 19970423
    Message : Remove large stack requirement from gentranslit.

  • lib/gentranslit.c
  • /* Copyright (C) 1999-2003, 2005, 2011-2012, 2016 Free Software Foundation, Inc.
       This file is part of the GNU LIBICONV Library.
    
       The GNU LIBICONV Library is free software; you can redistribute it
       and/or modify it under the terms of the GNU Library General Public
       License as published by the Free Software Foundation; either version 2
       of the License, or (at your option) any later version.
    
       The GNU LIBICONV Library is distributed in the hope that it will be
       useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
       Library General Public License for more details.
    
       You should have received a copy of the GNU Library General Public
       License along with the GNU LIBICONV Library; see the file COPYING.LIB.
       If not, see <http://www.gnu.org/licenses/>.  */
    
    /*
     * Generates a table of small strings, used for transliteration, from a table
     * containing lines of the form
     *   Unicode <tab> utf-8 replacement <tab> # comment
     */
    
    #include <stdio.h>
    #include <stdlib.h>
    #include <stdbool.h>
    
    int main (int argc, char *argv[])
    {
      unsigned int *data;
      int *uni2index;
      int index;
    
      if (argc != 1)
        exit(1);
    
      data = malloc(0x100000 * sizeof(*data));
      uni2index = malloc(0x110000 * sizeof(*uni2index));
      if (data == NULL || uni2index == NULL) {
        fprintf(stderr, "out of memory\n");
        exit(1);
      }
    
      printf("/*\n");
      printf(" * Copyright (C) 1999-2003 Free Software Foundation, Inc.\n");
      printf(" * This file is part of the GNU LIBICONV Library.\n");
      printf(" *\n");
      printf(" * The GNU LIBICONV Library is free software; you can redistribute it\n");
      printf(" * and/or modify it under the terms of the GNU Library General Public\n");
      printf(" * License as published by the Free Software Foundation; either version 2\n");
      printf(" * of the License, or (at your option) any later version.\n");
      printf(" *\n");
      printf(" * The GNU LIBICONV Library is distributed in the hope that it will be\n");
      printf(" * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of\n");
      printf(" * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n");
      printf(" * Library General Public License for more details.\n");
      printf(" *\n");
      printf(" * You should have received a copy of the GNU Library General Public\n");
      printf(" * License along with the GNU LIBICONV Library; see the file COPYING.LIB.\n");
      printf(" * If not, see <http://www.gnu.org/licenses/>.\n");
      printf(" */\n");
      printf("\n");
      printf("/*\n");
      printf(" * Transliteration table\n");
      printf(" */\n");
      printf("\n");
      {
        int c;
        int j;
        for (j = 0; j < 0x110000; j++)
          uni2index[j] = -1;
        index = 0;
        for (;;) {
          c = getc(stdin);
          if (c == EOF)
            break;
          if (c == '#') {
            do { c = getc(stdin); } while (!(c == EOF || c == '\n'));
            continue;
          }
          ungetc(c,stdin);
          if (scanf("%x",&j) != 1)
            exit(1);
          c = getc(stdin);
          if (c != '\t')
            exit(1);
          for (;;) {
            c = getc(stdin);
            if (c == EOF || c == '\n')
              exit(1);
            if (c == '\t')
              break;
            if (uni2index[j] < 0) {
              uni2index[j] = index;
              data[index++] = 0;
            }
            if (c >= 0x80) {
              /* Finish reading an UTF-8 character. */
              if (c < 0xc0)
                exit(1);
              else {
                unsigned int i = (c < 0xe0 ? 2 : c < 0xf0 ? 3 : c < 0xf8 ? 4 : c < 0xfc ? 5 : 6);
                c &= (1 << (8-i)) - 1;
                while (--i > 0) {
                  int cc = getc(stdin);
                  if (!(cc >= 0x80 && cc < 0xc0))
                    exit(1);
                  c <<= 6; c |= (cc & 0x3f);
                }
              }
            }
            data[index++] = (unsigned int) c;
          }
          if (uni2index[j] >= 0)
            data[uni2index[j]] = index - uni2index[j] - 1;
          do { c = getc(stdin); } while (!(c == EOF || c == '\n'));
        }
      }
      printf("static const unsigned int translit_data[%d] = {",index);
      {
        int i;
        for (i = 0; i < index; i++) {
          if (data[i] < 32)
            printf("\n %3d,",data[i]);
          else if (data[i] == '\'')
            printf("'\\'',");
          else if (data[i] == '\\')
            printf("'\\\\',");
          else if (data[i] < 127)
            printf(" '%c',",data[i]);
          else if (data[i] < 256)
            printf("0x%02X,",data[i]);
          else
            printf("0x%04X,",data[i]);
        }
        printf("\n};\n");
      }
      printf("\n");
      {
        int line[0x22000];
        int tableno;
        struct { int minline; int maxline; int usecount; const char* suffix; } tables[0x2000];
        int i, j, p, j1, j2, t;
    
        for (j1 = 0; j1 < 0x22000; j1++) {
          bool all_invalid = true;
          for (j2 = 0; j2 < 8; j2++) {
            j = 8*j1+j2;
            if (uni2index[j] >= 0)
              all_invalid = false;
          }
          if (all_invalid)
            line[j1] = -1;
          else
            line[j1] = 0;
        }
        tableno = 0;
        for (j1 = 0; j1 < 0x22000; j1++) {
          if (line[j1] >= 0) {
            if (tableno > 0
                && ((j1 > 0 && line[j1-1] == tableno-1)
                    || ((tables[tableno-1].maxline >> 5) == (j1 >> 5)
                        && j1 - tables[tableno-1].maxline <= 8))) {
              line[j1] = tableno-1;
              tables[tableno-1].maxline = j1;
            } else {
              tableno++;
              line[j1] = tableno-1;
              tables[tableno-1].minline = tables[tableno-1].maxline = j1;
            }
          }
        }
        for (t = 0; t < tableno; t++) {
          tables[t].usecount = 0;
          j1 = 8*tables[t].minline;
          j2 = 8*(tables[t].maxline+1);
          for (j = j1; j < j2; j++)
            if (uni2index[j] >= 0)
              tables[t].usecount++;
        }
        for (t = 0, p = -1, i = 0; t < tableno; t++) {
          if (tables[t].usecount > 1) {
            char* s;
            if (p == tables[t].minline >> 5) {
              s = (char*) malloc(4+1+2+1);
              sprintf(s, "%02x_%d", p, ++i);
            } else {
              p = tables[t].minline >> 5;
              s = (char*) malloc(4+1);
              sprintf(s, "%02x", p);
            }
            tables[t].suffix = s;
          } else
            tables[t].suffix = NULL;
        }
        {
          p = -1;
          for (t = 0; t < tableno; t++)
            if (tables[t].usecount > 1) {
              p = 0;
              printf("static const short translit_page%s[%d] = {\n", tables[t].suffix, 8*(tables[t].maxline-tables[t].minline+1));
              for (j1 = tables[t].minline; j1 <= tables[t].maxline; j1++) {
                if ((j1 % 0x20) == 0 && j1 > tables[t].minline)
                  printf("  /* 0x%04x */\n", 8*j1);
                printf(" ");
                for (j2 = 0; j2 < 8; j2++) {
                  j = 8*j1+j2;
                  printf(" %4d,", uni2index[j]);
                }
                printf(" /* 0x%02x-0x%02x */\n", 8*(j1 % 0x20), 8*(j1 % 0x20)+7);
              }
              printf("};\n");
            }
          if (p >= 0)
            printf("\n");
        }
        printf("#define translit_index(wc) \\\n  (");
        for (j1 = 0; j1 < 0x22000;) {
          t = line[j1];
          for (j2 = j1; j2 < 0x22000 && line[j2] == t; j2++);
          if (t >= 0) {
            if (j1 != tables[t].minline) abort();
            if (j2 > tables[t].maxline+1) abort();
            j2 = tables[t].maxline+1;
          }
          if (t == -1) {
          } else {
            if (t >= 0 && tables[t].usecount == 0) abort();
            if (t >= 0 && tables[t].usecount == 1) {
              if (j2 != j1+1) abort();
              for (j = 8*j1; j < 8*j2; j++)
                if (uni2index[j] >= 0) {
                  printf("wc == 0x%04x ? %d", j, uni2index[j]);
                  break;
                }
            } else {
              if (j1 == 0) {
                printf("wc < 0x%04x", 8*j2);
              } else {
                printf("wc >= 0x%04x && wc < 0x%04x", 8*j1, 8*j2);
              }
              printf(" ? translit_page%s[wc", tables[t].suffix);
              if (tables[t].minline > 0)
                printf("-0x%04x", 8*j1);
              printf("]");
            }
            printf(" : \\\n   ");
          }
          j1 = j2;
        }
        printf("-1)\n");
      }
    
      if (ferror(stdout) || fclose(stdout))
        exit(1);
      exit(0);
    }