Edit

kc3-lang/libxml2/parser.h

Branch :

  • Show log

    Commit

  • Author : Daniel Veillard
    Date : 2000-06-30 17:58:25
    Hash : 3f6f7f64
    Message : - win32config.h.in: updated - xmlversion.h.in: crap forgot to update this, this mean 2.1.0 lacks iconv support :-( need to release 2.1.1 - configure.in: release 2.1.1 - HTMLparser: fixed bug #14784 - xpath.c HTMLparser.c encoding.c parser.c: fix warning raised by Windows compiler - HTMLparser.c SAX.c HTMLtree.h tree.h: create HTML document in the SAX startDocument() callback. - TODO: updated - rebuild doc Daniel

  • parser.h
  • /*
     * parser.h : Interfaces, constants and types related to the XML parser.
     *
     * See Copyright for the status of this software.
     *
     * Daniel.Veillard@w3.org
     */
    
    #ifndef __XML_PARSER_H__
    #define __XML_PARSER_H__
    
    #include <libxml/tree.h>
    #include <libxml/valid.h>
    #include <libxml/xmlIO.h>
    #include <libxml/entities.h>
    
    
    #ifdef __cplusplus
    extern "C" {
    #endif
    
    /*
     * Constants.
     */
    #define XML_DEFAULT_VERSION	"1.0"
    
    /**
     * an xmlParserInput is an input flow for the XML processor.
     * Each entity parsed is associated an xmlParserInput (except the
     * few predefined ones). This is the case both for internal entities
     * - in which case the flow is already completely in memory - or
     * external entities - in which case we use the buf structure for
     * progressive reading and I18N conversions to the internal UTF-8 format.
     */
    
    typedef void (* xmlParserInputDeallocate)(xmlChar *);
    typedef struct _xmlParserInput xmlParserInput;
    typedef xmlParserInput *xmlParserInputPtr;
    struct _xmlParserInput {
        /* Input buffer */
        xmlParserInputBufferPtr buf;      /* UTF-8 encoded buffer */
    
        const char *filename;             /* The file analyzed, if any */
        const char *directory;            /* the directory/base of teh file */
        const xmlChar *base;              /* Base of the array to parse */
        const xmlChar *cur;               /* Current char being parsed */
        int length;                       /* length if known */
        int line;                         /* Current line */
        int col;                          /* Current column */
        int consumed;                     /* How many xmlChars already consumed */
        xmlParserInputDeallocate free;    /* function to deallocate the base */
        const xmlChar *encoding;          /* the encoding string for entity */
        const xmlChar *version;           /* the version string for entity */
        int standalone;                   /* Was that entity marked standalone */
    };
    
    /**
     * the parser can be asked to collect Node informations, i.e. at what
     * place in the file they were detected. 
     * NOTE: This is off by default and not very well tested.
     */
    typedef struct _xmlParserNodeInfo xmlParserNodeInfo;
    typedef xmlParserNodeInfo *xmlParserNodeInfoPtr;
    
    struct _xmlParserNodeInfo {
      const struct _xmlNode* node;
      /* Position & line # that text that created the node begins & ends on */
      unsigned long begin_pos;
      unsigned long begin_line;
      unsigned long end_pos;
      unsigned long end_line;
    };
    
    typedef struct _xmlParserNodeInfoSeq xmlParserNodeInfoSeq;
    typedef xmlParserNodeInfoSeq *xmlParserNodeInfoSeqPtr;
    struct _xmlParserNodeInfoSeq {
      unsigned long maximum;
      unsigned long length;
      xmlParserNodeInfo* buffer;
    };
    
    /**
     * The parser is now working also as a state based parser
     * The recursive one use the stagte info for entities processing
     */
    typedef enum {
        XML_PARSER_EOF = -1,	/* nothing is to be parsed */
        XML_PARSER_START = 0,	/* nothing has been parsed */
        XML_PARSER_MISC,		/* Misc* before int subset */
        XML_PARSER_PI,		/* Whithin a processing instruction */
        XML_PARSER_DTD,		/* within some DTD content */
        XML_PARSER_PROLOG,		/* Misc* after internal subset */
        XML_PARSER_COMMENT,		/* within a comment */
        XML_PARSER_START_TAG,	/* within a start tag */
        XML_PARSER_CONTENT,		/* within the content */
        XML_PARSER_CDATA_SECTION,	/* within a CDATA section */
        XML_PARSER_END_TAG,		/* within a closing tag */
        XML_PARSER_ENTITY_DECL,	/* within an entity declaration */
        XML_PARSER_ENTITY_VALUE,	/* within an entity value in a decl */
        XML_PARSER_ATTRIBUTE_VALUE,	/* within an attribute value */
        XML_PARSER_SYSTEM_LITERAL,	/* within a SYSTEM value */
        XML_PARSER_EPILOG 		/* the Misc* after the last end tag */
    } xmlParserInputState;
    
    /**
     * The parser context.
     * NOTE This doesn't completely defines the parser state, the (current ?)
     *      design of the parser uses recursive function calls since this allow
     *      and easy mapping from the production rules of the specification
     *      to the actual code. The drawback is that the actual function call
     *      also reflect the parser state. However most of the parsing routines
     *      takes as the only argument the parser context pointer, so migrating
     *      to a state based parser for progressive parsing shouldn't be too hard.
     */
    typedef struct _xmlParserCtxt xmlParserCtxt;
    typedef xmlParserCtxt *xmlParserCtxtPtr;
    struct _xmlParserCtxt {
        struct _xmlSAXHandler *sax;       /* The SAX handler */
        void            *userData;        /* the document being built */
        xmlDocPtr           myDoc;        /* the document being built */
        int            wellFormed;        /* is the document well formed */
        int       replaceEntities;        /* shall we replace entities ? */
        const xmlChar    *version;        /* the XML version string */
        const xmlChar   *encoding;        /* the declared encoding, if any */
        int            standalone;        /* standalone document */
        int                  html;        /* are we parsing an HTML document */
    
        /* Input stream stack */
        xmlParserInputPtr  input;         /* Current input stream */
        int                inputNr;       /* Number of current input streams */
        int                inputMax;      /* Max number of input streams */
        xmlParserInputPtr *inputTab;      /* stack of inputs */
    
        /* Node analysis stack only used for DOM building */
        xmlNodePtr         node;          /* Current parsed Node */
        int                nodeNr;        /* Depth of the parsing stack */
        int                nodeMax;       /* Max depth of the parsing stack */
        xmlNodePtr        *nodeTab;       /* array of nodes */
    
        int record_info;                  /* Whether node info should be kept */
        xmlParserNodeInfoSeq node_seq;    /* info about each node parsed */
    
        int errNo;                        /* error code */
    
        int     hasExternalSubset;        /* reference and external subset */
        int             hasPErefs;        /* the internal subset has PE refs */
        int              external;        /* are we parsing an external entity */
    
        int                 valid;        /* is the document valid */
        int              validate;        /* shall we try to validate ? */
        xmlValidCtxt        vctxt;        /* The validity context */
    
        xmlParserInputState instate;      /* current type of input */
        int                 token;        /* next char look-ahead */    
    
        char           *directory;        /* the data directory */
    
        /* Node name stack */
        xmlChar           *name;          /* Current parsed Node */
        int                nameNr;        /* Depth of the parsing stack */
        int                nameMax;       /* Max depth of the parsing stack */
        xmlChar *         *nameTab;       /* array of nodes */
    
        long               nbChars;       /* number of xmlChar processed */
        long            checkIndex;       /* used by progressive parsing lookup */
        int             keepBlanks;       /* ugly but ... */
        int             disableSAX;       /* SAX callbacks are disabled */
        int               inSubset;       /* Parsing is in int 1/ext 2 subset */
        xmlChar *          intSubName;    /* name of subset */
        xmlChar *          extSubURI;     /* URI of external subset */
        xmlChar *          extSubSystem;  /* SYSTEM ID of external subset */
    
        /* xml:space values */
        int *              space;         /* Should the parser preserve spaces */
        int                spaceNr;       /* Depth of the parsing stack */
        int                spaceMax;      /* Max depth of the parsing stack */
        int *              spaceTab;      /* array of space infos */
    
        int                depth;         /* to prevent entity substitution loops */
        xmlParserInputPtr  entity;        /* used to check entities boundaries */
        int                charset;       /* encoding of the in-memory content
    				         actually an xmlCharEncoding */
        int                nodelen;       /* Those two fields are there to */
        int                nodemem;       /* Speed up large node parsing */
    };
    
    /**
     * a SAX Locator.
     */
    typedef struct _xmlSAXLocator xmlSAXLocator;
    typedef xmlSAXLocator *xmlSAXLocatorPtr;
    struct _xmlSAXLocator {
        const xmlChar *(*getPublicId)(void *ctx);
        const xmlChar *(*getSystemId)(void *ctx);
        int (*getLineNumber)(void *ctx);
        int (*getColumnNumber)(void *ctx);
    };
    
    /**
     * a SAX handler is bunch of callbacks called by the parser when processing
     * of the input generate data or structure informations.
     */
    
    typedef xmlParserInputPtr (*resolveEntitySAXFunc) (void *ctx,
    			    const xmlChar *publicId, const xmlChar *systemId);
    typedef void (*internalSubsetSAXFunc) (void *ctx, const xmlChar *name,
                                const xmlChar *ExternalID, const xmlChar *SystemID);
    typedef void (*externalSubsetSAXFunc) (void *ctx, const xmlChar *name,
                                const xmlChar *ExternalID, const xmlChar *SystemID);
    typedef xmlEntityPtr (*getEntitySAXFunc) (void *ctx,
                                const xmlChar *name);
    typedef xmlEntityPtr (*getParameterEntitySAXFunc) (void *ctx,
                                const xmlChar *name);
    typedef void (*entityDeclSAXFunc) (void *ctx,
                                const xmlChar *name, int type, const xmlChar *publicId,
    			    const xmlChar *systemId, xmlChar *content);
    typedef void (*notationDeclSAXFunc)(void *ctx, const xmlChar *name,
    			    const xmlChar *publicId, const xmlChar *systemId);
    typedef void (*attributeDeclSAXFunc)(void *ctx, const xmlChar *elem,
                                const xmlChar *name, int type, int def,
    			    const xmlChar *defaultValue, xmlEnumerationPtr tree);
    typedef void (*elementDeclSAXFunc)(void *ctx, const xmlChar *name,
    			    int type, xmlElementContentPtr content);
    typedef void (*unparsedEntityDeclSAXFunc)(void *ctx,
                                const xmlChar *name, const xmlChar *publicId,
    			    const xmlChar *systemId, const xmlChar *notationName);
    typedef void (*setDocumentLocatorSAXFunc) (void *ctx,
                                xmlSAXLocatorPtr loc);
    typedef void (*startDocumentSAXFunc) (void *ctx);
    typedef void (*endDocumentSAXFunc) (void *ctx);
    typedef void (*startElementSAXFunc) (void *ctx, const xmlChar *name,
                                const xmlChar **atts);
    typedef void (*endElementSAXFunc) (void *ctx, const xmlChar *name);
    typedef void (*attributeSAXFunc) (void *ctx, const xmlChar *name,
                                      const xmlChar *value);
    typedef void (*referenceSAXFunc) (void *ctx, const xmlChar *name);
    typedef void (*charactersSAXFunc) (void *ctx, const xmlChar *ch,
    		            int len);
    typedef void (*ignorableWhitespaceSAXFunc) (void *ctx,
    			    const xmlChar *ch, int len);
    typedef void (*processingInstructionSAXFunc) (void *ctx,
                                const xmlChar *target, const xmlChar *data);
    typedef void (*commentSAXFunc) (void *ctx, const xmlChar *value);
    typedef void (*cdataBlockSAXFunc) (void *ctx, const xmlChar *value, int len);
    typedef void (*warningSAXFunc) (void *ctx, const char *msg, ...);
    typedef void (*errorSAXFunc) (void *ctx, const char *msg, ...);
    typedef void (*fatalErrorSAXFunc) (void *ctx, const char *msg, ...);
    typedef int (*isStandaloneSAXFunc) (void *ctx);
    typedef int (*hasInternalSubsetSAXFunc) (void *ctx);
    typedef int (*hasExternalSubsetSAXFunc) (void *ctx);
    
    typedef struct _xmlSAXHandler xmlSAXHandler;
    typedef xmlSAXHandler *xmlSAXHandlerPtr;
    struct _xmlSAXHandler {
        internalSubsetSAXFunc internalSubset;
        isStandaloneSAXFunc isStandalone;
        hasInternalSubsetSAXFunc hasInternalSubset;
        hasExternalSubsetSAXFunc hasExternalSubset;
        resolveEntitySAXFunc resolveEntity;
        getEntitySAXFunc getEntity;
        entityDeclSAXFunc entityDecl;
        notationDeclSAXFunc notationDecl;
        attributeDeclSAXFunc attributeDecl;
        elementDeclSAXFunc elementDecl;
        unparsedEntityDeclSAXFunc unparsedEntityDecl;
        setDocumentLocatorSAXFunc setDocumentLocator;
        startDocumentSAXFunc startDocument;
        endDocumentSAXFunc endDocument;
        startElementSAXFunc startElement;
        endElementSAXFunc endElement;
        referenceSAXFunc reference;
        charactersSAXFunc characters;
        ignorableWhitespaceSAXFunc ignorableWhitespace;
        processingInstructionSAXFunc processingInstruction;
        commentSAXFunc comment;
        warningSAXFunc warning;
        errorSAXFunc error;
        fatalErrorSAXFunc fatalError;
        getParameterEntitySAXFunc getParameterEntity;
        cdataBlockSAXFunc cdataBlock;
        externalSubsetSAXFunc externalSubset;
    };
    
    /**
     * External entity loaders types
     */
    typedef xmlParserInputPtr (*xmlExternalEntityLoader)(const char *URL,
    						     const char *ID,
    						     xmlParserCtxtPtr context);
    
    /**
     * Global variables: just the default SAX interface tables and XML
     * version infos.
     */
    extern const char *xmlParserVersion;
    
    extern xmlSAXLocator xmlDefaultSAXLocator;
    extern xmlSAXHandler xmlDefaultSAXHandler;
    extern xmlSAXHandler htmlDefaultSAXHandler;
    
    /**
     * entity substitution default behaviour.
     */
    
    extern int xmlSubstituteEntitiesDefaultValue;
    extern int xmlGetWarningsDefaultValue;
    
    
    /**
     * Cleanup
     */
    void		xmlCleanupParser	(void);
    
    /**
     * Input functions
     */
    int		xmlParserInputRead	(xmlParserInputPtr in,
    					 int len);
    int		xmlParserInputGrow	(xmlParserInputPtr in,
    					 int len);
    
    /**
     * xmlChar handling
     */
    xmlChar *	xmlStrdup		(const xmlChar *cur);
    xmlChar *	xmlStrndup		(const xmlChar *cur,
    					 int len);
    xmlChar *	xmlStrsub		(const xmlChar *str,
    					 int start,
    					 int len);
    const xmlChar *	xmlStrchr		(const xmlChar *str,
    					 xmlChar val);
    const xmlChar *	xmlStrstr		(const xmlChar *str,
    					 xmlChar *val);
    int		xmlStrcmp		(const xmlChar *str1,
    					 const xmlChar *str2);
    int		xmlStrncmp		(const xmlChar *str1,
    					 const xmlChar *str2,
    					 int len);
    int		xmlStrlen		(const xmlChar *str);
    xmlChar *	xmlStrcat		(xmlChar *cur,
    					 const xmlChar *add);
    xmlChar *	xmlStrncat		(xmlChar *cur,
    					 const xmlChar *add,
    					 int len);
    
    /**
     * Basic parsing Interfaces
     */
    xmlDocPtr	xmlParseDoc		(xmlChar *cur);
    xmlDocPtr	xmlParseMemory		(char *buffer,
    					 int size);
    xmlDocPtr	xmlParseFile		(const char *filename);
    int		xmlSubstituteEntitiesDefault(int val);
    int		xmlKeepBlanksDefault	(int val);
    void		xmlStopParser		(xmlParserCtxtPtr ctxt);
    
    /**
     * Recovery mode 
     */
    xmlDocPtr	xmlRecoverDoc		(xmlChar *cur);
    xmlDocPtr	xmlRecoverMemory	(char *buffer,
    					 int size);
    xmlDocPtr	xmlRecoverFile		(const char *filename);
    
    /**
     * Less common routines and SAX interfaces
     */
    int		xmlParseDocument	(xmlParserCtxtPtr ctxt);
    xmlDocPtr	xmlSAXParseDoc		(xmlSAXHandlerPtr sax,
    					 xmlChar *cur,
    					 int recovery);
    int		xmlSAXUserParseFile	(xmlSAXHandlerPtr sax,
    					 void *user_data,
    					 const char *filename);
    int		xmlSAXUserParseMemory	(xmlSAXHandlerPtr sax,
    					 void *user_data,
    					 char *buffer,
    					 int size);
    xmlDocPtr	xmlSAXParseMemory	(xmlSAXHandlerPtr sax,
    					 char *buffer,
                                       	 int size,
    					 int recovery);
    xmlDocPtr	xmlSAXParseFile		(xmlSAXHandlerPtr sax,
    					 const char *filename,
    					 int recovery);
    xmlDtdPtr	xmlParseDTD		(const xmlChar *ExternalID,
    					 const xmlChar *SystemID);
    xmlDtdPtr	xmlSAXParseDTD		(xmlSAXHandlerPtr sax,
    					 const xmlChar *ExternalID,
    					 const xmlChar *SystemID);
    int		xmlParseBalancedChunkMemory(xmlDocPtr doc,
    					 xmlSAXHandlerPtr sax,
    					 void *user_data,
    					 int depth,
    					 const xmlChar *string,
    					 xmlNodePtr *list);
    int		xmlParseExternalEntity	(xmlDocPtr doc,
    					 xmlSAXHandlerPtr sax,
    					 void *user_data,
    					 int depth,
    					 const xmlChar *URL,
    					 const xmlChar *ID,
    					 xmlNodePtr *list);
    
    /**
     * SAX initialization routines
     */
    void		xmlDefaultSAXHandlerInit(void);
    void		htmlDefaultSAXHandlerInit(void);
    
    /**
     * Parser contexts handling.
     */
    void		xmlInitParserCtxt	(xmlParserCtxtPtr ctxt);
    void		xmlClearParserCtxt	(xmlParserCtxtPtr ctxt);
    void		xmlFreeParserCtxt	(xmlParserCtxtPtr ctxt);
    void		xmlSetupParserForBuffer	(xmlParserCtxtPtr ctxt,
    					 const xmlChar* buffer,
    					 const char* filename);
    xmlParserCtxtPtr xmlCreateDocParserCtxt	(xmlChar *cur);
    
    /**
     * Interfaces for the Push mode
     */
    xmlParserCtxtPtr xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,
    					 void *user_data,
    					 const char *chunk,
    					 int size,
    					 const char *filename);
    int		 xmlParseChunk		(xmlParserCtxtPtr ctxt,
    					 const char *chunk,
    					 int size,
    					 int terminate);
    
    /**
     * Special I/O mode
     */
    
    xmlParserCtxtPtr xmlCreateIOParserCtxt	(xmlSAXHandlerPtr sax,
    					 void *user_data,
    					 xmlInputReadCallback   ioread,
    					 xmlInputCloseCallback  ioclose,
    					 void *ioctx,
    					 xmlCharEncoding enc);
    
    xmlParserInputPtr xmlNewIOInputStream	(xmlParserCtxtPtr ctxt,
    					 xmlParserInputBufferPtr input,
    					 xmlCharEncoding enc);
    
    /**
     * Node infos
     */
    const xmlParserNodeInfo*
    		xmlParserFindNodeInfo	(const xmlParserCtxt* ctxt,
                                                   const xmlNode* node);
    void		xmlInitNodeInfoSeq	(xmlParserNodeInfoSeqPtr seq);
    void		xmlClearNodeInfoSeq	(xmlParserNodeInfoSeqPtr seq);
    unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
                                             const xmlNode* node);
    void		xmlParserAddNodeInfo	(xmlParserCtxtPtr ctxt,
    					 const xmlParserNodeInfo* info);
    
    /*
     * External entities handling actually implemented in xmlIO
     */
    
    void		xmlSetExternalEntityLoader(xmlExternalEntityLoader f);
    xmlExternalEntityLoader
    		xmlGetExternalEntityLoader(void);
    xmlParserInputPtr
    		xmlLoadExternalEntity	(const char *URL,
    					 const char *ID,
    					 xmlParserCtxtPtr context);
    
    #ifdef __cplusplus
    }
    #endif
    
    #endif /* __XML_PARSER_H__ */