Edit

kc3-lang/libxml2/genChRanges.py

Branch :

  • Show log

    Commit

  • Author : Daniel Veillard
    Date : 2003-11-18 20:56:51
    Hash : be586972
    Message : modified the file header to add more informations, painful... updated to * include/libxml/*.h include/libxml/*.h.in: modified the file header to add more informations, painful... * genChRanges.py genUnicode.py: updated to generate said changes in headers * doc/apibuild.py: extract headers, add them to libxml2-api.xml * *.html *.xsl *.xml: updated the stylesheets to flag geprecated APIs modules. Updated the stylesheets, some cleanups, regenerated * doc/html/*.html: regenerated added back book1 and libxml-lib.html Daniel

  • genChRanges.py
  • #!/usr/bin/python -u
    #
    # Portions of this script have been (shamelessly) stolen from the
    # prior work of Daniel Veillard (genUnicode.py)
    #
    # I, however, take full credit for any bugs, errors or difficulties :-)
    #
    # William Brack
    # October 2003
    #
    # 18 October 2003
    # Modified to maintain binary compatibility with previous library versions
    # by adding a suffix 'Q' ('quick') to the macro generated for the original,
    # function, and adding generation of a function (with the original name) which
    # instantiates the macro.
    #
    
    import sys
    import string
    import time
    
    #
    # A routine to take a list of yes/no (1, 0) values and turn it
    # into a list of ranges.  This will later be used to determine whether
    # to generate single-byte lookup tables, or inline comparisons
    #
    def makeRange(lst):
        ret = []
        pos = 0
        while pos < len(lst):
    	try:		# index generates exception if not present
    	    s = lst[pos:].index(1)	# look for start of next range
    	except:
    	    break			# if no more, finished
    	pos += s			# pointer to start of possible range
    	try:
    	    e = lst[pos:].index(0)	# look for end of range
    	    e += pos
    	except:				# if no end, set to end of list
    	    e = len(lst)
    	ret.append((pos, e-1))		# append range tuple to list
    	pos = e + 1			# ready to check for next range
        return ret
    
    sources = "chvalid.def"			# input filename
    
    # minTableSize gives the minimum number of ranges which must be present
    # before a 256-byte lookup table is produced.  If there are less than this
    # number, a macro with inline comparisons is generated
    minTableSize = 6
    
    # dictionary of functions, key=name, element contains char-map and range-list
    Functs = {}
    
    state = 0
    
    try:
        defines = open("chvalid.def", "r")
    except:
        print "Missing chvalid.def, aborting ..."
        sys.exit(1)
    
    #
    # The lines in the .def file have three types:-
    #   name:   Defines a new function block
    #   ur:	    Defines individual or ranges of unicode values
    #   end:    Indicates the end of the function block
    #
    # These lines are processed below.
    #
    for line in defines.readlines():
        # ignore blank lines, or lines beginning with '#'
        if line[0] == '#':
            continue
        line = string.strip(line)
        if line == '':
            continue
        # split line into space-separated fields, then split on type
        try:
            fields = string.split(line, ' ')
    	#
    	# name line:
    	#   validate any previous function block already ended
    	#   validate this function not already defined
    	#   initialize an entry in the function dicitonary
    	#	including a mask table with no values yet defined
    	#
    	if fields[0] == 'name':
    	    name = fields[1]
    	    if state != 0:
    		print "'name' %s found before previous name" \
    		      "completed" % (fields[1])
    		continue
    	    state = 1
    	    if Functs.has_key(name):
    		print "name '%s' already present - may give" \
    		      " wrong results" % (name)
    	    else:
    		# dict entry with two list elements (chdata, rangedata)
    		Functs[name] = [ [], [] ]
    		for v in range(256):
    		    Functs[name][0].append(0)
    	#
    	# end line:
    	#   validate there was a preceding function name line
    	#   set state to show no current function active
    	#
    	elif fields[0] == 'end':
    	    if state == 0:
    		print "'end' found outside of function block"
    		continue
    	    state = 0
    
    	#
    	# ur line:
    	#   validate function has been defined
    	#   process remaining fields on the line, which may be either
    	#	individual unicode values or ranges of values
    	#
    	elif fields[0] == 'ur':
    	    if state != 1:
    		raise ValidationError, "'ur' found outside of 'name' block"
    	    for el in fields[1:]:
    		pos = string.find(el, '..')
    		# pos <=0 means not a range, so must be individual value
    		if pos <= 0:
    		    # cheap handling of hex or decimal values
    		    if el[0:2] == '0x':
    		        value = int(el[2:],16)
    		    elif el[0] == "'":
    			value = ord(el[1])
    		    else:
    			value = int(el)
    		    if ((value < 0) | (value > 0x1fffff)):
    			raise ValidationError, 'Illegal value (%s) in ch for'\
    				' name %s' % (el,name)
    		    # for ur we have only ranges (makes things simpler),
    		    # so convert val to range
    		    currange = (value, value)
    		# pos > 0 means this is a range, so isolate/validate
    		# the interval
    		else:
    		    # split the range into it's first-val, last-val
    		    (first, last) = string.split(el, "..")
    		    # convert values from text into binary
    		    if first[0:2] == '0x':	
    			start = int(first[2:],16)
    		    elif first[0] == "'":
    			start = ord(first[1])
    		    else:
    			start = int(first)
    		    if last[0:2] == '0x':
    			end = int(last[2:],16)
    		    elif last[0] == "'":
    			end = ord(last[1])
    		    else:
    			end = int(last)
    		    if (start < 0) | (end > 0x1fffff) | (start > end):
    			raise ValidationError, "Invalid range '%s'" % el
    		    currange = (start, end)
    		# common path - 'currange' has the range, now take care of it
    		# We split on single-byte values vs. multibyte
    		if currange[1] < 0x100:	# single-byte
    		    for ch in range(currange[0],currange[1]+1):
    			# validate that value not previously defined
    			if Functs[name][0][ch]:
    			    msg = "Duplicate ch value '%s' for name '%s'" % (el, name)
    			    raise ValidationError, msg
    			Functs[name][0][ch] = 1
    		else:			# multi-byte
    		    if currange in Functs[name][1]:
    			raise ValidationError, "range already defined in" \
    				" function"
    		    else:
    			Functs[name][1].append(currange)
    
        except:
    	print "Failed to process line: %s" % (line)
    	raise
    #
    # At this point, the entire definition file has been processed.  Now we
    # enter the output phase, where we generate the two files chvalid.c and'
    # chvalid.h
    #
    # To do this, we first output the 'static' data (heading, fixed
    # definitions, etc.), then output the 'dynamic' data (the results
    # of the above processing), and finally output closing 'static' data
    # (e.g. the subroutine to process the ranges)
    #
    
    #
    # Generate the headings:
    #
    try:
        header = open("include/libxml/chvalid.h", "w")
    except:
        print "Failed to open include/libxml/chvalid.h"
        sys.exit(1)
    
    try:
        output = open("chvalid.c", "w")
    except:
        print "Failed to open chvalid.c"
        sys.exit(1)
    
    date = time.asctime(time.localtime(time.time()))
    
    header.write(
    """/*
     * Summary: Unicode character range checking
     * Description: this module exports interfaces for the character
     *               range validation APIs
     *
     * This file is automatically generated from the cvs source
     * definition files using the genChRanges.py Python script
     *
     * Generation date: %s
     * Sources: %s
     * Author: William Brack <wbrack@mmm.com.hk>
     */
    
    #ifndef __XML_CHVALID_H__
    #define __XML_CHVALID_H__
    
    #include <libxml/xmlversion.h>
    
    #ifdef __cplusplus
    extern "C" {
    #endif
    
    /*
     * Define our typedefs and structures
     *
     */
    typedef struct _xmlChSRange xmlChSRange;
    typedef xmlChSRange *xmlChSRangePtr;
    struct _xmlChSRange {
        unsigned short	low;
        unsigned short	high;
    };
    
    typedef struct _xmlChLRange xmlChLRange;
    typedef xmlChLRange *xmlChLRangePtr;
    struct _xmlChLRange {
        unsigned int	low;
        unsigned int	high;
    };
    
    typedef struct _xmlChRangeGroup xmlChRangeGroup;
    typedef xmlChRangeGroup *xmlChRangeGroupPtr;
    struct _xmlChRangeGroup {
        int			nbShortRange;
        int			nbLongRange;
        xmlChSRangePtr	shortRange;	/* points to an array of ranges */
        xmlChLRangePtr	longRange;
    };
    
    /**
     * Range checking routine
     */
    XMLPUBFUN int XMLCALL
    		xmlCharInRange(unsigned int val, const xmlChRangeGroupPtr group);
    
    """ % (date, sources));
    output.write(
    """/*
     * chvalid.c:	this module implements the character range
     *		validation APIs
     *
     * This file is automatically generated from the cvs source
     * definition files using the genChRanges.py Python script
     *
     * Generation date: %s
     * Sources: %s
     * William Brack <wbrack@mmm.com.hk>
     */
    
    #define IN_LIBXML
    #include "libxml.h"
    #include <libxml/chvalid.h>
    
    /*
     * The initial tables ({func_name}_tab) are used to validate whether a
     * single-byte character is within the specified group.  Each table
     * contains 256 bytes, with each byte representing one of the 256
     * possible characters.  If the table byte is set, the character is
     * allowed.
     *
     */
    """ % (date, sources));
    
    #
    # Now output the generated data.
    # We try to produce the best execution times.  Tests have shown that validation
    # with direct table lookup is, when there are a "small" number of valid items,
    # still not as fast as a sequence of inline compares.  So, if the single-byte
    # portion of a range has a "small" number of ranges, we output a macro for inline
    # compares, otherwise we output a 256-byte table and a macro to use it.
    #
    
    fkeys = Functs.keys()	# Dictionary of all defined functions
    fkeys.sort()		# Put some order to our output
    
    for f in fkeys:
    
    # First we convert the specified single-byte values into a group of ranges.
    # If the total number of such ranges is less than minTableSize, we generate
    # an inline macro for direct comparisons; if greater, we generate a lookup
    # table.
        if max(Functs[f][0]) > 0:	# only check if at least one entry
            rangeTable = makeRange(Functs[f][0])
    	numRanges = len(rangeTable)
    	if numRanges >= minTableSize:	# table is worthwhile
    	    header.write("XMLPUBVAR unsigned char %s_tab[256];\n" % f)
    	    header.write("""
    /**
     * %s_ch:
     * @c: char to validate
     *
     * Automatically generated by genChRanges.py
     */
    """ % f)
    	    header.write("#define %s_ch(c)\t(%s_tab[(c)])\n" % (f, f))
    
    	    # write the constant data to the code file
    	    output.write("unsigned char %s_tab[256] = {\n" % f)
    	    pline = "   "
    	    for n in range(255):
    		pline += " 0x%02x," % Functs[f][0][n]
    		if len(pline) > 72:
    		    output.write(pline + "\n")
    		    pline = "   "
    	    output.write(pline + " 0x%02x };\n\n" % Functs[f][0][255])
    
    	else:		# inline check is used
    	    # first another little optimisation - if space is present,
    	    # put it at the front of the list so it is checked first
    	    try:
    		ix = rangeTable.remove((0x20, 0x20))
    		rangeTable.insert(0, (0x20, 0x20))
    	    except:
    		pass
    	    firstFlag = 1
    	    
    	    header.write("""
    /**
     * %s_ch:
     * @c: char to validate
     *
     * Automatically generated by genChRanges.py
     */
    """ % f)
    	    # okay, I'm tired of the messy lineup - let's automate it!
    	    pline = "#define %s_ch(c)" % f
    	    # 'ntab' is number of tabs needed to position to col. 33 from name end
    	    ntab = 4 - (len(pline)) / 8
    	    if ntab < 0:
    		ntab = 0
    	    just = ""
    	    for i in range(ntab):
    		just += "\t"
    	    pline = pline + just + "("
    	    for rg in rangeTable:
    		if not firstFlag:
    		    pline += " || \\\n\t\t\t\t "
    		else:
    		    firstFlag = 0
    		if rg[0] == rg[1]:		# single value - check equal
    		    pline += "((c) == 0x%x)" % rg[0]
    		else:				# value range
    		# since we are doing char, also change range ending in 0xff
    		    if rg[1] != 0xff:
    		        pline += "((0x%x <= (c)) &&" % rg[0]
    		        pline += " ((c) <= 0x%x))" % rg[1]
    		    else:
    		        pline += " (0x%x <= (c))" % rg[0]
    	    pline += ")\n"
    	    header.write(pline)
    
        header.write("""
    /**
     * %sQ:
     * @c: char to validate
     *
     * Automatically generated by genChRanges.py
     */
    """ % f)
        pline = "#define %sQ(c)" % f
        ntab = 4 - (len(pline)) / 8
        if ntab < 0:
    	ntab = 0
        just = ""
        for i in range(ntab):
    	just += "\t"
        header.write(pline + just + "(((c) < 0x100) ? \\\n\t\t\t\t ")
        if max(Functs[f][0]) > 0:
    	header.write("%s_ch((c)) :" % f)
        else:
    	header.write("0 :")
    
        # if no ranges defined, value invalid if >= 0x100
        numRanges = len(Functs[f][1])
        if numRanges == 0:
    	header.write(" 0)\n\n")
        else:
    	if numRanges >= minTableSize:
    	    header.write(" \\\n\t\t\t\t xmlCharInRange((c), &%sGroup))\n\n"  % f)
    	else:		# if < minTableSize, generate inline code
    	    firstFlag = 1
    	    for rg in Functs[f][1]:
    		if not firstFlag:
    		    pline += " || \\\n\t\t\t\t "
    		else:
    		    firstFlag = 0
    		    pline = "\\\n\t\t\t\t("
    		if rg[0] == rg[1]:		# single value - check equal
    		    pline += "((c) == 0x%x)" % rg[0]
    		else:				# value range
    		    pline += "((0x%x <= (c)) &&" % rg[0]
    		    pline += " ((c) <= 0x%x))" % rg[1]
    	    pline += "))\n\n"
    	    header.write(pline)
    
    
        if len(Functs[f][1]) > 0:
    	header.write("XMLPUBVAR xmlChRangeGroup %sGroup;\n" % f)
    
    
    #
    # Next we do the unicode ranges
    #
    
    for f in fkeys:
        if len(Functs[f][1]) > 0:	# only generate if unicode ranges present
    	rangeTable = Functs[f][1]
    	rangeTable.sort()	# ascending tuple sequence
    	numShort = 0
    	numLong  = 0
    	for rg in rangeTable:
    	    if rg[1] < 0x10000:	# if short value
    		if numShort == 0:	# first occurence
    		    pline = "static xmlChSRange %s_srng[] = { " % f
    		else:
    		    pline += ", "
    		numShort += 1
    		if len(pline) > 60:
    		    output.write(pline + "\n")
    		    pline = "    "
    		pline += "{0x%x, 0x%x}" % (rg[0], rg[1])
    	    else:		# if long value
    		if numLong == 0:	# first occurence
    		    if numShort > 0:	# if there were shorts, finish them off
    			output.write(pline + "};\n")
    		    pline = "static xmlChLRange %s_lrng[] = { " % f
    		else:
    		    pline += ", "
    		numLong += 1
    		if len(pline) > 60:
    		    output.write(pline + "\n")
    		    pline = "    "
    		pline += "{0x%x, 0x%x}" % (rg[0], rg[1])
    	output.write(pline + "};\n")	# finish off last group
    
    	pline = "xmlChRangeGroup %sGroup =\n\t{%d, %d, " % (f, numShort, numLong)
    	if numShort > 0:
    	    pline += "%s_srng" % f
    	else:
    	    pline += "(xmlChSRangePtr)0"
    	if numLong > 0:
    	    pline += ", %s_lrng" % f
    	else:
    	    pline += ", (xmlChLRangePtr)0"
    	
    	output.write(pline + "};\n\n")
    
    output.write(
    """
    /**
     * xmlCharInRange:
     * @val: character to be validated
     * @rptr: pointer to range to be used to validate
     *
     * Does a binary search of the range table to determine if char
     * is valid
     *
     * Returns: true if character valid, false otherwise
     */
    int
    xmlCharInRange (unsigned int val, const xmlChRangeGroupPtr rptr) {
        int low, high, mid;
        xmlChSRangePtr sptr;
        xmlChLRangePtr lptr;
        if (val < 0x10000) {	/* is val in 'short' or 'long'  array? */
    	if (rptr->nbShortRange == 0)
    	    return 0;
    	low = 0;
    	high = rptr->nbShortRange - 1;
    	sptr = rptr->shortRange;
    	while (low <= high) {
    	    mid = (low + high) / 2;
    	    if ((unsigned short) val < sptr[mid].low) {
    		high = mid - 1;
    	    } else {
    	        if ((unsigned short) val > sptr[mid].high) {
    		    low = mid + 1;
    		} else {
    		    return 1;
    		}
    	    }
    	}
        } else {
    	if (rptr->nbLongRange == 0) {
    	    return 0;
    	}
    	low = 0;
    	high = rptr->nbLongRange - 1;
    	lptr = rptr->longRange;
    	while (low <= high) {
    	    mid = (low + high) / 2;
    	    if (val < lptr[mid].low) {
    		high = mid - 1;
    	    } else {
    	        if (val > lptr[mid].high) {
    		    low = mid + 1;
    		} else {
    		    return 1;
    		}
    	    }
    	}
        }
        return 0;
    }
    
    """);
    
    #
    # finally, generate the ABI compatibility functions
    #
    for f in fkeys:
        output.write("""
    /**
     * %s:
     * @ch:  character to validate
     *
     * This function is DEPRECATED.
    """ % f);
        if max(Functs[f][0]) > 0:
            output.write(" * Use %s_ch or %sQ instead" % (f, f))
        else:
            output.write(" * Use %sQ instead" % f)
        output.write("""
     *
     * Returns true if argument valid, false otherwise
     */
    """)
        output.write("int\n%s(unsigned int ch) {\n    return(%sQ(ch));\n}\n\n" % (f,f))
        header.write("XMLPUBFUN int XMLCALL\n\t\t%s(unsigned int ch);\n" % f);
    #
    # Run complete - write trailers and close the output files
    #
    
    header.write("""
    #ifdef __cplusplus
    }
    #endif
    #endif /* __XML_CHVALID_H__ */
    """);
    
    header.close()
    output.close()