Edit

kc3-lang/libxml2/genUnicode.py

Branch :

  • Show log

    Commit

  • Author : Igor Zlatkovic
    Date : 2003-08-25 09:05:12
    Hash : 76874e45
    Message : Exportability taint of the headers

  • genUnicode.py
  • #!/usr/bin/python -u
    import sys
    import string
    import time
    
    sources = "Blocks-4.txt UnicodeData-3.1.0.txt"
    
    try:
        blocks = open("Blocks-4.txt", "r")
    except:
        print "Missing Blocks-4.txt, aborting ..."
        sys.exit(1)
    
    BlockNames = {}
    for line in blocks.readlines():
        if line[0] == '#':
            continue
        line = string.strip(line)
        if line == '':
    	continue
        try:
    	fields = string.split(line, ';')
    	range = string.strip(fields[0])
    	(start, end) = string.split(range, "..")
    	name = string.strip(fields[1])
    	name = string.replace(name, ' ', '')
        except:
            print "Failed to process line: %s" % (line)
    	continue
        BlockNames[name] = ("0x"+start, "0x"+end)
    blocks.close()
    print "Parsed %d blocks descriptions" % (len(BlockNames.keys()))
    
    try:
        data = open("UnicodeData-3.1.0.txt", "r")
    except:
        print "Missing UnicodeData-3.1.0.txt, aborting ..."
        sys.exit(1)
    
    nbchar = 0;
    Categories = {}
    for line in data.readlines():
        if line[0] == '#':
            continue
        line = string.strip(line)
        if line == '':
    	continue
        try:
    	fields = string.split(line, ';')
    	point = string.strip(fields[0])
    	value = 0
    	while point != '':
    	    value = value * 16
    	    if point[0] >= '0' and point[0] <= '9':
    	        value = value + ord(point[0]) - ord('0')
    	    elif point[0] >= 'A' and point[0] <= 'F':
    	        value = value + 10 + ord(point[0]) - ord('A')
    	    elif point[0] >= 'a' and point[0] <= 'f':
    	        value = value + 10 + ord(point[0]) - ord('a')
    	    point = point[1:]
    	name = fields[2]
        except:
            print "Failed to process line: %s" % (line)
    	continue
        
        nbchar = nbchar + 1
        try:
    	Categories[name].append(value)
        except:
            try:
    	    Categories[name] = [value]
    	except:
    	    print "Failed to process line: %s" % (line)
        try:
    	Categories[name[0]].append(value)
        except:
            try:
    	    Categories[name[0]] = [value]
    	except:
    	    print "Failed to process line: %s" % (line)
    	
    blocks.close()
    print "Parsed %d char generating %d categories" % (nbchar, len(Categories.keys()))
    #reduce the number list into ranges
    for cat in Categories.keys():
        list = Categories[cat]
        start = -1
        prev = -1
        end = -1
        ranges = []
        for val in list:
            if start == -1:
    	    start = val
    	    prev = val
    	    continue
    	elif val == prev + 1:
    	    prev = val
    	    continue
    	elif prev == start:
    	    ranges.append((prev, prev))
    	    start = val
    	    prev = val
    	    continue
    	else:
    	    ranges.append((start, prev))
    	    start = val
    	    prev = val
    	    continue
        if prev == start:
            ranges.append((prev, prev))
        else:
            ranges.append((start, prev))
        Categories[cat] = ranges
            
    #
    # Generate the resulting files
    #
    try:
        header = open("xmlunicode.h", "w")
    except:
        print "Failed to open xmlunicode.h"
        sys.exit(1)
    
    try:
        output = open("xmlunicode.c", "w")
    except:
        print "Failed to open xmlunicode.c"
        sys.exit(1)
    
    date = time.asctime(time.localtime(time.time()))
    
    header.write(
    """/*
     * xmlunicode.h: this header exports interfaces for the Unicode character APIs
     *
     * This file is automatically generated from the
     * UCS description files of the Unicode Character Database
     * http://www.unicode.org/Public/3.1-Update/UnicodeCharacterDatabase-3.1.0.html
     * using the genUnicode.py Python script.
     *
     * Generation date: %s
     * Sources: %s
     * Daniel Veillard <veillard@redhat.com>
     */
    
    #ifndef __XML_UNICODE_H__
    #define __XML_UNICODE_H__
    
    #include <libxml/xmlversion.h>
    
    #ifdef __cplusplus
    extern "C" {
    #endif
    
    """ % (date, sources));
    output.write(
    """/*
     * xmlunicode.c: this module implements the Unicode character APIs
     *
     * This file is automatically generated from the
     * UCS description files of the Unicode Character Database
     * http://www.unicode.org/Public/3.1-Update/UnicodeCharacterDatabase-3.1.0.html
     * using the genUnicode.py Python script.
     *
     * Generation date: %s
     * Sources: %s
     * Daniel Veillard <veillard@redhat.com>
     */
    
    #define IN_LIBXML
    #include "libxml.h"
    
    #ifdef LIBXML_UNICODE_ENABLED
    
    #include <string.h>
    #include <libxml/xmlversion.h>
    #include <libxml/xmlunicode.h>
    
    """ % (date, sources));
    
    keys = BlockNames.keys()
    keys.sort()
    for block in keys:
        (start, end) = BlockNames[block]
        name = string.replace(block, '-', '')
        header.write("XMLPUBFUN int XMLCALL xmlUCSIs%s\t(int code);\n" % name)
        output.write("/**\n * xmlUCSIs%s:\n * @code: UCS code point\n" % (name))
        output.write(" *\n * Check whether the character is part of %s UCS Block\n"%
                     (block))
        output.write(" *\n * Returns 1 if true 0 otherwise\n */\n");
        output.write("int\nxmlUCSIs%s(int code) {\n" % name)
        output.write("    return((code >= %s) && (code <= %s));\n" % (start, end))
        output.write("}\n\n")
    
    header.write("\nXMLPUBFUN int XMLCALL xmlUCSIsBlock\t(int code,\n\t\t\t const char *block);\n\n")
    output.write("/**\n * xmlUCSIsBlock:\n * @code: UCS code point\n")
    output.write(" * @block: UCS block name\n")
    output.write(" *\n * Check whether the caracter is part of the UCS Block\n")
    output.write(" *\n * Returns 1 if true, 0 if false and -1 on unknown block\n */\n");
    output.write("int\nxmlUCSIsBlock(int code, const char *block) {\n")
    keys = BlockNames.keys()
    keys.sort()
    for block in keys:
        name = string.replace(block, '-', '')
        output.write("    if (!strcmp(block, \"%s\"))\n        return(xmlUCSIs%s(code));\n" %
                     (block, name));
    output.write("    return(-1);\n}\n\n")
    
    
    keys = Categories.keys()
    keys.sort()
    for name in keys:
        ranges = Categories[name]
        header.write("XMLPUBFUN int XMLCALL xmlUCSIsCat%s\t(int code);\n" % name)
        output.write("/**\n * xmlUCSIsCat%s:\n * @code: UCS code point\n" % (name))
        output.write(" *\n * Check whether the character is part of %s UCS Category\n"%
                     (name))
        output.write(" *\n * Returns 1 if true 0 otherwise\n */\n");
        output.write("int\nxmlUCSIsCat%s(int code) {\n" % name)
        start = 1
        for range in ranges:
            (begin, end) = range;
    	if start:
    	    output.write("    return(");
    	    start = 0
    	else:
    	    output.write(" ||\n           ");
    	if (begin == end):
    	    output.write("(code == %s)" % (hex(begin)))
    	else:
    	    output.write("((code >= %s) && (code <= %s))" % (
    	                 hex(begin), hex(end)))
        output.write(");\n}\n\n")
    
    header.write("\nXMLPUBFUN int XMLCALL xmlUCSIsCat\t(int code,\n\t\t\t const char *cat);\n")
    output.write("/**\n * xmlUCSIsCat:\n * @code: UCS code point\n")
    output.write(" * @cat: UCS Category name\n")
    output.write(" *\n * Check whether the caracter is part of the UCS Category\n")
    output.write(" *\n * Returns 1 if true, 0 if false and -1 on unknown category\n */\n");
    output.write("int\nxmlUCSIsCat(int code, const char *cat) {\n")
    keys = Categories.keys()
    keys.sort()
    for name in keys:
        output.write("    if (!strcmp(cat, \"%s\"))\n        return(xmlUCSIsCat%s(code));\n" %
                     (name, name));
    output.write("    return(-1);\n}\n\n")
    
    header.write("""
    #ifdef __cplusplus
    }
    #endif
    #endif /* __XML_UNICODE_H__ */
    """);
    output.write("""
    #endif /* LIBXML_UNICODE_ENABLED */
    """);
    header.close()
    output.close()