Edit

kc3-lang/freetype/src/tools/afblue.pl

Branch :

  • Show log

    Commit

  • Author : Werner Lemberg
    Date : 2014-04-15 16:47:05
    Hash : 181fd071
    Message : [afblue.pl]: Minor improvements. * src/tools/afblue.pl: Allow whitespace before comments. Ignore whitespace in strings.

  • src/tools/afblue.pl
  • #! /usr/bin/perl -w
    # -*- Perl -*-
    #
    # afblue.pl
    #
    # Process a blue zone character data file.
    #
    # Copyright 2013, 2014 by
    # David Turner, Robert Wilhelm, and Werner Lemberg.
    #
    # This file is part of the FreeType project, and may only be used,
    # modified, and distributed under the terms of the FreeType project
    # license, LICENSE.TXT.  By continuing to use, modify, or distribute
    # this file you indicate that you have read the license and
    # understand and accept it fully.
    
    use strict;
    use warnings;
    use English '-no_match_vars';
    use open ':std', ':encoding(UTF-8)';
    
    
    my $prog = $PROGRAM_NAME;
    $prog =~ s| .* / ||x;      # Remove path.
    
    die "usage: $prog datafile < infile > outfile\n" if $#ARGV != 0;
    
    
    my $datafile = $ARGV[0];
    
    my %diversions;        # The extracted and massaged data from `datafile'.
    my @else_stack;        # Booleans to track else-clauses.
    my @name_stack;        # Stack of integers used for names of aux. variables.
    
    my $curr_enum;         # Name of the current enumeration.
    my $curr_array;        # Name of the current array.
    my $curr_max;          # Name of the current maximum value.
    
    my $curr_enum_element; # Name of the current enumeration element.
    my $curr_offset;       # The offset relative to current aux. variable.
    my $curr_elem_size;    # The size of the current string or block.
    
    my $have_sections = 0; # Boolean; set if start of a section has been seen.
    my $have_strings;      # Boolean; set if current section contains strings.
    my $have_blocks;       # Boolean; set if current section contains blocks.
    
    my $have_enum_element; # Boolean; set if we have an enumeration element.
    my $in_string;         # Boolean; set if a string has been parsed.
    
    my $num_sections = 0;  # Number of sections seen so far.
    
    my $last_aux;          # Name of last auxiliary variable.
    
    
    # Regular expressions.
    
    # [<ws>] <enum_name> <ws> <array_name> <ws> <max_name> [<ws>] ':' [<ws>] '\n'
    my $section_re = qr/ ^ \s* (\S+) \s+ (\S+) \s+ (\S+) \s* : \s* $ /x;
    
    # [<ws>] <enum_element_name> [<ws>] '\n'
    my $enum_element_re = qr/ ^ \s* ( [A-Za-z0-9_]+ ) \s* $ /x;
    
    # '#' <preprocessor directive> '\n'
    my $preprocessor_re = qr/ ^ \# /x;
    
    # [<ws>] '/' '/' <comment> '\n'
    my $comment_re = qr| ^ \s* // |x;
    
    # empty line
    my $whitespace_only_re = qr/ ^ \s* $ /x;
    
    # [<ws>] '"' <string> '"' [<ws>] '\n'  (<string> doesn't contain newlines)
    my $string_re = qr/ ^ \s*
                           " ( (?> (?: (?> [^"\\]+ ) | \\. )* ) ) "
                           \s* $ /x;
    
    # [<ws>] '{' <block> '}' [<ws>] '\n'  (<block> can contain newlines)
    my $block_start_re = qr/ ^ \s* \{ /x;
    
    # We need the capturing group for `split' to make it return the separator
    # tokens (i.e., the opening and closing brace) also.
    my $brace_re = qr/ ( [{}] ) /x;
    
    
    sub Warn
    {
      my $message = shift;
      warn "$datafile:$INPUT_LINE_NUMBER: warning: $message\n";
    }
    
    
    sub Die
    {
      my $message = shift;
      die "$datafile:$INPUT_LINE_NUMBER: error: $message\n";
    }
    
    
    my $warned_before = 0;
    
    sub warn_before
    {
      Warn("data before first section gets ignored") unless $warned_before;
      $warned_before = 1;
    }
    
    
    sub strip_newline
    {
      chomp;
      s/ \x0D $ //x;
    }
    
    
    sub end_curr_string
    {
      # Append final null byte to string.
      if ($have_strings)
      {
        push @{$diversions{$curr_array}}, "    '\\0',\n" if $in_string;
    
        $curr_offset++;
        $in_string = 0;
      }
    }
    
    
    sub update_max_elem_size
    {
      if ($curr_elem_size)
      {
        my $max = pop @{$diversions{$curr_max}};
        $max = $curr_elem_size if $curr_elem_size > $max;
        push @{$diversions{$curr_max}}, $max;
      }
    }
    
    
    sub convert_non_ascii_char
    {
      # A UTF-8 character outside of the printable ASCII range, with possibly a
      # leading backslash character.
      my $s = shift;
    
      # Here we count characters, not bytes.
      $curr_elem_size += length $s;
    
      utf8::encode($s);
      $s = uc unpack 'H*', $s;
    
      $curr_offset += $s =~ s/\G(..)/'\\x$1', /sg;
    
      return $s;
    }
    
    
    sub convert_ascii_chars
    {
      # A series of ASCII characters in the printable range.
      my $s = shift;
    
      # We ignore spaces.
      $s =~ s/ //g;
    
      my $count = $s =~ s/\G(.)/'$1', /g;
      $curr_offset += $count;
      $curr_elem_size += $count;
    
      return $s;
    }
    
    
    sub convert_literal
    {
      my $s = shift;
      my $orig = $s;
    
      # ASCII printables and space
      my $safe_re = '\x20-\x7E';
      # ASCII printables and space, no backslash
      my $safe_no_backslash_re = '\x20-\x5B\x5D-\x7E';
    
      $s =~ s{
               (?: \\? ( [^$safe_re] )
                   | ( (?: [$safe_no_backslash_re]
                           | \\ [$safe_re] )+ ) )
             }
             {
               defined($1) ? convert_non_ascii_char($1)
                           : convert_ascii_chars($2)
             }egx;
    
       # We assume that `$orig' doesn't contain `*/'
       return $s . " /* $orig */";
    }
    
    
    sub aux_name
    {
      return "af_blue_" . $num_sections. "_" . join('_', @name_stack);
    }
    
    
    sub aux_name_next
    {
      $name_stack[$#name_stack]++;
      my $name = aux_name();
      $name_stack[$#name_stack]--;
    
      return $name;
    }
    
    
    sub enum_val_string
    {
      # Build string that holds code to save the current offset in an
      # enumeration element.
      my $aux = shift;
    
      my $add = ($last_aux eq "af_blue_" . $num_sections . "_0" )
                  ? ""
                  : "$last_aux + ";
    
      return "    $aux = $add$curr_offset,\n";
    }
    
    
    
    # Process data file.
    
    open(DATA, $datafile) || die "$prog: can't open \`$datafile': $OS_ERROR\n";
    
    while (<DATA>)
    {
      strip_newline();
    
      next if /$comment_re/;
      next if /$whitespace_only_re/;
    
      if (/$section_re/)
      {
        Warn("previous section is empty") if ($have_sections
                                              && !$have_strings
                                              && !$have_blocks);
    
        end_curr_string();
        update_max_elem_size();
    
        # Save captured groups from `section_re'.
        $curr_enum = $1;
        $curr_array = $2;
        $curr_max = $3;
    
        $curr_enum_element = "";
        $curr_offset = 0;
    
        Warn("overwriting already defined enumeration \`$curr_enum'")
          if exists($diversions{$curr_enum});
        Warn("overwriting already defined array \`$curr_array'")
          if exists($diversions{$curr_array});
        Warn("overwriting already defined maximum value \`$curr_max'")
          if exists($diversions{$curr_max});
    
        $diversions{$curr_enum} = [];
        $diversions{$curr_array} = [];
        $diversions{$curr_max} = [];
    
        push @{$diversions{$curr_max}}, 0;
    
        @name_stack = ();
        push @name_stack, 0;
    
        $have_sections = 1;
        $have_strings = 0;
        $have_blocks = 0;
    
        $have_enum_element = 0;
        $in_string = 0;
    
        $num_sections++;
        $curr_elem_size = 0;
    
        $last_aux = aux_name();
    
        next;
      }
    
      if (/$preprocessor_re/)
      {
        if ($have_sections)
        {
          # Having preprocessor conditionals complicates the computation of
          # correct offset values.  We have to introduce auxiliary enumeration
          # elements with the name `af_blue_<s>_<n1>_<n2>_...' that store
          # offsets to be used in conditional clauses.  `<s>' is the number of
          # sections seen so far, `<n1>' is the number of `#if' and `#endif'
          # conditionals seen so far in the topmost level, `<n2>' the number of
          # `#if' and `#endif' conditionals seen so far one level deeper, etc.
          # As a consequence, uneven values are used within a clause, and even
          # values after a clause, since the C standard doesn't allow the
          # redefinition of an enumeration value.  For example, the name
          # `af_blue_5_1_6' is used to construct enumeration values in the fifth
          # section after the third (second-level) if-clause within the first
          # (top-level) if-clause.  After the first top-level clause has
          # finished, `af_blue_5_2' is used.  The current offset is then
          # relative to the value stored in the current auxiliary element.
    
          if (/ ^ \# \s* if /x)
          {
            push @else_stack, 0;
    
            $name_stack[$#name_stack]++;
    
            push @{$diversions{$curr_enum}}, enum_val_string(aux_name());
            $last_aux = aux_name();
    
            push @name_stack, 0;
    
            $curr_offset = 0;
          }
          elsif (/ ^ \# \s* elif /x)
          {
            Die("unbalanced #elif") unless @else_stack;
    
            pop @name_stack;
    
            push @{$diversions{$curr_enum}}, enum_val_string(aux_name_next());
            $last_aux = aux_name();
    
            push @name_stack, 0;
    
            $curr_offset = 0;
          }
          elsif (/ ^ \# \s* else /x)
          {
            my $prev_else = pop @else_stack;
            Die("unbalanced #else") unless defined($prev_else);
            Die("#else already seen") if $prev_else;
            push @else_stack, 1;
    
            pop @name_stack;
    
            push @{$diversions{$curr_enum}}, enum_val_string(aux_name_next());
            $last_aux = aux_name();
    
            push @name_stack, 0;
    
            $curr_offset = 0;
          }
          elsif (/ ^ (\# \s*) endif /x)
          {
            my $prev_else = pop @else_stack;
            Die("unbalanced #endif") unless defined($prev_else);
    
            pop @name_stack;
    
            # If there is no else-clause for an if-clause, we add one.  This is
            # necessary to have correct offsets.
            if (!$prev_else)
            {
              # Use amount of whitespace from `endif'.
              push @{$diversions{$curr_enum}}, enum_val_string(aux_name_next())
                                               . $1 . "else\n";
              $last_aux = aux_name();
    
              $curr_offset = 0;
            }
    
            $name_stack[$#name_stack]++;
    
            push @{$diversions{$curr_enum}}, enum_val_string(aux_name());
            $last_aux = aux_name();
    
            $curr_offset = 0;
          }
    
          # Handle (probably continued) preprocessor lines.
        CONTINUED_LOOP:
          {
            do
            {
              strip_newline();
    
              push @{$diversions{$curr_enum}}, $ARG . "\n";
              push @{$diversions{$curr_array}}, $ARG . "\n";
    
              last CONTINUED_LOOP unless / \\ $ /x;
    
            } while (<DATA>);
          }
        }
        else
        {
          warn_before();
        }
    
        next;
      }
    
      if (/$enum_element_re/)
      {
        end_curr_string();
        update_max_elem_size();
    
        $curr_enum_element = $1;
        $have_enum_element = 1;
        $curr_elem_size = 0;
    
        next;
      }
    
      if (/$string_re/)
      {
        if ($have_sections)
        {
          Die("strings and blocks can't be mixed in a section") if $have_blocks;
    
          # Save captured group from `string_re'.
          my $string = $1;
    
          if ($have_enum_element)
          {
            push @{$diversions{$curr_enum}}, enum_val_string($curr_enum_element);
            $have_enum_element = 0;
          }
    
          $string = convert_literal($string);
    
          push @{$diversions{$curr_array}}, "    $string\n";
    
          $have_strings = 1;
          $in_string = 1;
        }
        else
        {
          warn_before();
        }
    
        next;
      }
    
      if (/$block_start_re/)
      {
        if ($have_sections)
        {
          Die("strings and blocks can't be mixed in a section") if $have_strings;
    
          my $depth = 0;
          my $block = "";
          my $block_end = 0;
    
          # Count braces while getting the block.
        BRACE_LOOP:
          {
            do
            {
              strip_newline();
    
              foreach my $substring (split(/$brace_re/))
              {
                if ($block_end)
                {
                  Die("invalid data after last matching closing brace")
                    if $substring !~ /$whitespace_only_re/;
                }
    
                $block .= $substring;
    
                if ($substring eq '{')
                {
                  $depth++;
                }
                elsif ($substring eq '}')
                {
                  $depth--;
    
                  $block_end = 1 if $depth == 0;
                }
              }
    
              # If we are here, we have run out of substrings, so get next line
              # or exit.
              last BRACE_LOOP if $block_end;
    
              $block .= "\n";
    
            } while (<DATA>);
          }
    
          if ($have_enum_element)
          {
            push @{$diversions{$curr_enum}}, enum_val_string($curr_enum_element);
            $have_enum_element = 0;
          }
    
          push @{$diversions{$curr_array}}, $block . ",\n";
    
          $curr_offset++;
          $curr_elem_size++;
    
          $have_blocks = 1;
        }
        else
        {
          warn_before();
        }
    
        next;
      }
    
      # Garbage.  We weren't able to parse the data.
      Die("syntax error");
    }
    
    # Finalize data.
    end_curr_string();
    update_max_elem_size();
    
    
    # Filter stdin to stdout, replacing `@...@' templates.
    
    sub emit_diversion
    {
      my $diversion_name = shift;
      return (exists($diversions{$1})) ? "@{$diversions{$1}}"
                                       : "@" . $diversion_name . "@";
    }
    
    
    $LIST_SEPARATOR = '';
    
    my $s1 = "This file has been generated by the Perl script \`$prog',";
    my $s1len = length $s1;
    my $s2 = "using data from file \`$datafile'.";
    my $s2len = length $s2;
    my $slen = ($s1len > $s2len) ? $s1len : $s2len;
    
    print "/* " . $s1 . " " x ($slen - $s1len) . " */\n"
          . "/* " . $s2 . " " x ($slen - $s2len) . " */\n"
          . "\n";
    
    while (<STDIN>)
    {
      s/ @ ( [A-Za-z0-9_]+? ) @ / emit_diversion($1) /egx;
      print;
    }
    
    # EOF