Edit

kc3-lang/libxml2/xmlIO.c

Branch :

  • Show log

    Commit

  • Author : Daniel Veillard
    Date : 2000-01-25 18:31:22
    Hash : da07c34a
    Message : - added a nano FTP module - removed SNAP from RPM - updated the status in doc

  • xmlIO.c
  • /*
     * xmlIO.c : implementation of the I/O interfaces used by the parser
     *
     * See Copyright for the status of this software.
     *
     * Daniel.Veillard@w3.org
     */
    
    #ifdef WIN32
    #include "win32config.h"
    #else
    #include "config.h"
    #endif
    
    #include <stdio.h>
    #include <string.h>
    
    #ifdef HAVE_SYS_TYPES_H
    #include <sys/types.h>
    #endif
    #ifdef HAVE_SYS_STAT_H
    #include <sys/stat.h>
    #endif
    #ifdef HAVE_FCNTL_H
    #include <fcntl.h>
    #endif
    #ifdef HAVE_UNISTD_H
    #include <unistd.h>
    #endif
    #ifdef HAVE_STDLIB_H
    #include <stdlib.h>
    #endif
    #ifdef HAVE_ZLIB_H
    #include <zlib.h>
    #endif
    
    #include "xmlmemory.h"
    #include "parser.h"
    #include "parserInternals.h"
    #include "xmlIO.h"
    #include "nanohttp.h"
    #include "nanoftp.h"
    
    /* #define DEBUG_INPUT */
    /* #define VERBOSE_FAILURE */
    /* #define DEBUG_EXTERNAL_ENTITIES */
    
    #ifdef DEBUG_INPUT
    #define MINLEN 40
    #else
    #define MINLEN 4000
    #endif
    
    /**
     * xmlAllocParserInputBuffer:
     * @enc:  the charset encoding if known
     *
     * Create a buffered parser input for progressive parsing
     *
     * Returns the new parser input or NULL
     */
    xmlParserInputBufferPtr
    xmlAllocParserInputBuffer(xmlCharEncoding enc) {
        xmlParserInputBufferPtr ret;
    
        ret = (xmlParserInputBufferPtr) xmlMalloc(sizeof(xmlParserInputBuffer));
        if (ret == NULL) {
            fprintf(stderr, "xmlAllocParserInputBuffer : out of memory!\n");
    	return(NULL);
        }
        memset(ret, 0, (size_t) sizeof(xmlParserInputBuffer));
        ret->buffer = xmlBufferCreate();
        if (ret->buffer == NULL) {
            xmlFree(ret);
    	return(NULL);
        }
        ret->buffer->alloc = XML_BUFFER_ALLOC_DOUBLEIT;
        ret->encoder = xmlGetCharEncodingHandler(enc);
        ret->fd = -1;
        ret->httpIO = NULL;
        ret->ftpIO = NULL;
    
        return(ret);
    }
    
    /**
     * xmlFreeParserInputBuffer:
     * @in:  a buffered parser input
     *
     * Free up the memory used by a buffered parser input
     */
    void
    xmlFreeParserInputBuffer(xmlParserInputBufferPtr in) {
        if (in->buffer != NULL) {
            xmlBufferFree(in->buffer);
    	in->buffer = NULL;
        }
    #ifdef HAVE_ZLIB_H
        if (in->gzfile != NULL)
            gzclose(in->gzfile);
    #endif
        if (in->httpIO != NULL)
            xmlNanoHTTPClose(in->httpIO);
        if (in->ftpIO != NULL)
            xmlNanoFTPClose(in->ftpIO);
        if (in->fd >= 0)
            close(in->fd);
        memset(in, 0xbe, (size_t) sizeof(xmlParserInputBuffer));
        xmlFree(in);
    }
    
    /**
     * xmlParserInputBufferCreateFilename:
     * @filename:  a C string containing the filename
     * @enc:  the charset encoding if known
     *
     * Create a buffered parser input for the progressive parsing of a file
     * If filename is "-' then we use stdin as the input.
     * Automatic support for ZLIB/Compress compressed document is provided
     * by default if found at compile-time.
     *
     * Returns the new parser input or NULL
     */
    xmlParserInputBufferPtr
    xmlParserInputBufferCreateFilename(const char *filename, xmlCharEncoding enc) {
        xmlParserInputBufferPtr ret;
    #ifdef HAVE_ZLIB_H
        gzFile input = 0;
    #else
        int input = -1;
    #endif
        void *httpIO = NULL;
        void *ftpIO = NULL;
    
        if (filename == NULL) return(NULL);
    
        if (!strncmp(filename, "http://", 7)) {
            httpIO = xmlNanoHTTPOpen(filename, NULL);
            if (httpIO == NULL) {
    #ifdef VERBOSE_FAILURE
                fprintf (stderr, "Cannot read URL %s\n", filename);
                perror ("xmlNanoHTTPOpen failed");
    #endif
                return(NULL);
    	}
        } else if (!strncmp(filename, "ftp://", 6)) {
            ftpIO = xmlNanoFTPOpen(filename);
            if (ftpIO == NULL) {
    #ifdef VERBOSE_FAILURE
                fprintf (stderr, "Cannot read URL %s\n", filename);
                perror ("xmlNanoFTPOpen failed");
    #endif
                return(NULL);
    	}
        } else if (!strcmp(filename, "-")) {
    #ifdef HAVE_ZLIB_H
            input = gzdopen (fileno(stdin), "r");
            if (input == NULL) {
    #ifdef VERBOSE_FAILURE
                fprintf (stderr, "Cannot read from stdin\n");
                perror ("gzdopen failed");
    #endif
                return(NULL);
    	}
    #else
    #ifdef WIN32
            input = -1;
    #else
            input = fileno(stdin);
    #endif
            if (input < 0) {
    #ifdef VERBOSE_FAILURE
                fprintf (stderr, "Cannot read from stdin\n");
                perror ("open failed");
    #endif
    	    return(NULL);
    	}
    #endif
        } else {
    #ifdef HAVE_ZLIB_H
    	input = gzopen (filename, "r");
    	if (input == NULL) {
    #ifdef VERBOSE_FAILURE
    	    fprintf (stderr, "Cannot read file %s :\n", filename);
    	    perror ("gzopen failed");
    #endif
    	    return(NULL);
    	}
    #else
    #ifdef WIN32
    	input = _open (filename, O_RDONLY | _O_BINARY);
    #else
    	input = open (filename, O_RDONLY);
    #endif
    	if (input < 0) {
    #ifdef VERBOSE_FAILURE
    	    fprintf (stderr, "Cannot read file %s :\n", filename);
    	    perror ("open failed");
    #endif
    	    return(NULL);
    	}
    #endif
        }
        /* 
         * TODO : get the 4 first bytes and decode the charset
         * if enc == XML_CHAR_ENCODING_NONE
         * plug some encoding conversion routines here. !!!
         * enc = xmlDetectCharEncoding(buffer);
         */
    
        ret = xmlAllocParserInputBuffer(enc);
        if (ret != NULL) {
    #ifdef HAVE_ZLIB_H
            ret->gzfile = input;
    #else
            ret->fd = input;
    #endif
            ret->httpIO = httpIO;
            ret->ftpIO = ftpIO;
        }
        xmlParserInputBufferRead(ret, 4);
    
        return(ret);
    }
    
    /**
     * xmlParserInputBufferCreateFile:
     * @file:  a FILE* 
     * @enc:  the charset encoding if known
     *
     * Create a buffered parser input for the progressive parsing of a FILE *
     * buffered C I/O
     *
     * Returns the new parser input or NULL
     */
    xmlParserInputBufferPtr
    xmlParserInputBufferCreateFile(FILE *file, xmlCharEncoding enc) {
        xmlParserInputBufferPtr ret;
    
        if (file == NULL) return(NULL);
    
        ret = xmlAllocParserInputBuffer(enc);
        if (ret != NULL)
            ret->file = file;
    
        return(ret);
    }
    
    /**
     * xmlParserInputBufferCreateFd:
     * @fd:  a file descriptor number
     * @enc:  the charset encoding if known
     *
     * Create a buffered parser input for the progressive parsing for the input
     * from a file descriptor
     *
     * Returns the new parser input or NULL
     */
    xmlParserInputBufferPtr
    xmlParserInputBufferCreateFd(int fd, xmlCharEncoding enc) {
        xmlParserInputBufferPtr ret;
    
        if (fd < 0) return(NULL);
    
        ret = xmlAllocParserInputBuffer(enc);
        if (ret != NULL)
            ret->fd = fd;
    
        return(ret);
    }
    
    /**
     * xmlParserInputBufferPush:
     * @in:  a buffered parser input
     * @buf:  an char array
     * @len:  the size in bytes of the array.
     *
     * Push the content of the arry in the input buffer
     * This routine handle the I18N transcoding to internal UTF-8
     * This is used when operating the parser in progressive (push) mode.
     *
     * Returns the number of chars read and stored in the buffer, or -1
     *         in case of error.
     */
    int
    xmlParserInputBufferPush(xmlParserInputBufferPtr in, int len, const char *buf) {
        int nbchars = 0;
    
        if (len < 0) return(0);
        if (in->encoder != NULL) {
            xmlChar *buffer;
    
    	buffer = (xmlChar *) xmlMalloc((len + 1) * 2 * sizeof(xmlChar));
    	if (buffer == NULL) {
    	    fprintf(stderr, "xmlParserInputBufferGrow : out of memory !\n");
    	    xmlFree(buffer);
    	    return(-1);
    	}
    	nbchars = in->encoder->input(buffer, (len + 1) * 2 * sizeof(xmlChar),
    	                             (xmlChar *) buf, len);
    	/*
    	 * TODO : we really need to have something atomic or the 
    	 *        encoder must report the number of bytes read
    	 */
            buffer[nbchars] = 0;
            xmlBufferAdd(in->buffer, (xmlChar *) buffer, nbchars);
    	xmlFree(buffer);
        } else {
    	nbchars = len;
            xmlBufferAdd(in->buffer, (xmlChar *) buf, nbchars);
        }
    #ifdef DEBUG_INPUT
        fprintf(stderr, "I/O: pushed %d chars, buffer %d/%d\n",
                nbchars, in->buffer->use, in->buffer->size);
    #endif
        return(nbchars);
    }
    
    /**
     * xmlParserInputBufferGrow:
     * @in:  a buffered parser input
     * @len:  indicative value of the amount of chars to read
     *
     * Grow up the content of the input buffer, the old data are preserved
     * This routine handle the I18N transcoding to internal UTF-8
     * This routine is used when operating the parser in normal (pull) mode
     * TODO: one should be able to remove one extra copy
     *
     * Returns the number of chars read and stored in the buffer, or -1
     *         in case of error.
     */
    int
    xmlParserInputBufferGrow(xmlParserInputBufferPtr in, int len) {
        char *buffer = NULL;
    #ifdef HAVE_ZLIB_H
        gzFile input = (gzFile) in->gzfile;
    #endif
        int res = 0;
        int nbchars = 0;
        int buffree;
    
        if ((len <= MINLEN) && (len != 4)) 
            len = MINLEN;
        buffree = in->buffer->size - in->buffer->use;
        if (buffree <= 0) {
            fprintf(stderr, "xmlParserInputBufferGrow : buffer full !\n");
    	return(0);
        }
        if (len > buffree) 
            len = buffree;
    
        buffer = xmlMalloc((len + 1) * sizeof(char));
        if (buffer == NULL) {
            fprintf(stderr, "xmlParserInputBufferGrow : out of memory !\n");
    	return(-1);
        }
        if (in->httpIO != NULL) {
            res = xmlNanoHTTPRead(in->httpIO, &buffer[0], len);
        } else if (in->ftpIO != NULL) {
            res = xmlNanoFTPRead(in->ftpIO, &buffer[0], len);
        } else if (in->file != NULL) {
    	res = fread(&buffer[0], 1, len, in->file);
    #ifdef HAVE_ZLIB_H
        } else if (in->gzfile != NULL) {
        	res = gzread(input, &buffer[0], len);
    #endif
        } else if (in->fd >= 0) {
    	res = read(in->fd, &buffer[0], len);
        } else {
            fprintf(stderr, "xmlParserInputBufferGrow : no input !\n");
    	xmlFree(buffer);
    	return(-1);
        }
        if (res == 0) {
    	xmlFree(buffer);
            return(0);
        }
        if (res < 0) {
    	perror ("read error");
    	xmlFree(buffer);
    	return(-1);
        }
        if (in->encoder != NULL) {
            xmlChar *buf;
    
    	buf = (xmlChar *) xmlMalloc((res + 1) * 2 * sizeof(xmlChar));
    	if (buf == NULL) {
    	    fprintf(stderr, "xmlParserInputBufferGrow : out of memory !\n");
    	    xmlFree(buffer);
    	    return(-1);
    	}
    	nbchars = in->encoder->input(buf, (res + 1) * 2 * sizeof(xmlChar),
    	                             BAD_CAST buffer, res);
            buf[nbchars] = 0;
            xmlBufferAdd(in->buffer, (xmlChar *) buf, nbchars);
    	xmlFree(buf);
        } else {
    	nbchars = res;
            buffer[nbchars] = 0;
            xmlBufferAdd(in->buffer, (xmlChar *) buffer, nbchars);
        }
    #ifdef DEBUG_INPUT
        fprintf(stderr, "I/O: read %d chars, buffer %d/%d\n",
                nbchars, in->buffer->use, in->buffer->size);
    #endif
        xmlFree(buffer);
        return(nbchars);
    }
    
    /**
     * xmlParserInputBufferRead:
     * @in:  a buffered parser input
     * @len:  indicative value of the amount of chars to read
     *
     * Refresh the content of the input buffer, the old data are considered
     * consumed
     * This routine handle the I18N transcoding to internal UTF-8
     *
     * Returns the number of chars read and stored in the buffer, or -1
     *         in case of error.
     */
    int
    xmlParserInputBufferRead(xmlParserInputBufferPtr in, int len) {
        /* xmlBufferEmpty(in->buffer); */
        if ((in->httpIO != NULL) || (in->ftpIO != NULL) || (in->file != NULL) ||
    #ifdef HAVE_ZLIB_H
            (in->gzfile != NULL) ||
    #endif
            (in->fd >= 0))
    	return(xmlParserInputBufferGrow(in, len));
        else
            return(0);
    }
    
    /*
     * xmlParserGetDirectory:
     * @filename:  the path to a file
     *
     * lookup the directory for that file
     *
     * Returns a new allocated string containing the directory, or NULL.
     */
    char *
    xmlParserGetDirectory(const char *filename) {
        char *ret = NULL;
        char dir[1024];
        char *cur;
        char sep = '/';
    
        if (filename == NULL) return(NULL);
    #ifdef WIN32
        sep = '\\';
    #endif
    
        strncpy(dir, filename, 1023);
        dir[1023] = 0;
        cur = &dir[strlen(dir)];
        while (cur > dir) {
             if (*cur == sep) break;
    	 cur --;
        }
        if (*cur == sep) {
            if (cur == dir) dir[1] = 0;
    	else *cur = 0;
    	ret = xmlMemStrdup(dir);
        } else {
            if (getcwd(dir, 1024) != NULL) {
    	    dir[1023] = 0;
    	    ret = xmlMemStrdup(dir);
    	}
        }
        return(ret);
    }
    
    /****************************************************************
     *								*
     *		External entities loading			*
     *								*
     ****************************************************************/
    
    /*
     * xmlDefaultExternalEntityLoader:
     * @URL:  the URL for the entity to load
     * @ID:  the System ID for the entity to load
     * @ctxt:  the context in which the entity is called or NULL
     *
     * By default we don't load external entitites, yet.
     *
     * Returns a new allocated xmlParserInputPtr, or NULL.
     */
    static
    xmlParserInputPtr
    xmlDefaultExternalEntityLoader(const char *URL, const char *ID,
                                   xmlParserCtxtPtr ctxt) {
        xmlParserInputPtr ret = NULL;
    #ifdef DEBUG_EXTERNAL_ENTITIES
        fprintf(stderr, "xmlDefaultExternalEntityLoader(%s, xxx)\n", URL);
    #endif
        if (URL == NULL) {
            if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
    	    ctxt->sax->warning(ctxt, "failed to load external entity \"%s\"\n",
    	                       ID);
            return(NULL);
        }
        ret = xmlNewInputFromFile(ctxt, URL);
        if (ret == NULL) {
            if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
    	    ctxt->sax->warning(ctxt, "failed to load external entity \"%s\"\n",
    	                       URL);
        }
        return(ret);
    }
    
    static xmlExternalEntityLoader xmlCurrentExternalEntityLoader =
           xmlDefaultExternalEntityLoader;
    
    /*
     * xmlSetExternalEntityLoader:
     * @f:  the new entity resolver function
     *
     * Changes the defaultexternal entity resolver function for the application
     */
    void
    xmlSetExternalEntityLoader(xmlExternalEntityLoader f) {
        xmlCurrentExternalEntityLoader = f;
    }
    
    /*
     * xmlGetExternalEntityLoader:
     *
     * Get the default external entity resolver function for the application
     *
     * Returns the xmlExternalEntityLoader function pointer
     */
    xmlExternalEntityLoader
    xmlGetExternalEntityLoader(void) {
        return(xmlCurrentExternalEntityLoader);
    }
    
    /*
     * xmlLoadExternalEntity:
     * @URL:  the URL for the entity to load
     * @ID:  the System ID for the entity to load
     * @ctxt:  the context in which the entity is called or NULL
     *
     * Load an external entity, note that the use of this function for
     * unparsed entities may generate problems
     * TODO: a more generic External entitiy API must be designed
     *
     * Returns the xmlParserInputPtr or NULL
     */
    xmlParserInputPtr
    xmlLoadExternalEntity(const char *URL, const char *ID,
                          xmlParserCtxtPtr ctxt) {
        return(xmlCurrentExternalEntityLoader(URL, ID, ctxt));
    }