Hash :
ec808a44
Author :
Date :
2021-02-07T13:57:49
Speed up HTML fuzzer htmlDocDumpMemory uses the "HTML" encoding if no other encoding was specified in the source HTML. This encoding can be extremely slow because of an inefficiency in htmlEntityValueLookup. Stop encoding the output for now.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
/*
* html.c: a libFuzzer target to test several HTML parser interfaces.
*
* See Copyright for the status of this software.
*/
#include <libxml/HTMLparser.h>
#include <libxml/HTMLtree.h>
#include "fuzz.h"
int
LLVMFuzzerInitialize(int *argc ATTRIBUTE_UNUSED,
char ***argv ATTRIBUTE_UNUSED) {
xmlInitParser();
xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc);
return 0;
}
int
LLVMFuzzerTestOneInput(const char *data, size_t size) {
static const size_t maxChunkSize = 128;
htmlDocPtr doc;
htmlParserCtxtPtr ctxt;
xmlOutputBufferPtr out;
const char *docBuffer;
size_t docSize, consumed, chunkSize;
int opts, outSize;
xmlFuzzDataInit(data, size);
opts = xmlFuzzReadInt();
docBuffer = xmlFuzzReadRemaining(&docSize);
if (docBuffer == NULL) {
xmlFuzzDataCleanup();
return(0);
}
/* Pull parser */
doc = htmlReadMemory(docBuffer, docSize, NULL, NULL, opts);
/*
* Also test the serializer. Call htmlDocContentDumpOutput with our
* own buffer to avoid encoding the output. The HTML encoding is
* excruciatingly slow (see htmlEntityValueLookup).
*/
out = xmlAllocOutputBuffer(NULL);
htmlDocContentDumpOutput(out, doc, NULL);
xmlOutputBufferClose(out);
xmlFreeDoc(doc);
/* Push parser */
ctxt = htmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL,
XML_CHAR_ENCODING_NONE);
htmlCtxtUseOptions(ctxt, opts);
for (consumed = 0; consumed < docSize; consumed += chunkSize) {
chunkSize = docSize - consumed;
if (chunkSize > maxChunkSize)
chunkSize = maxChunkSize;
htmlParseChunk(ctxt, docBuffer + consumed, chunkSize, 0);
}
htmlParseChunk(ctxt, NULL, 0, 1);
xmlFreeDoc(ctxt->myDoc);
htmlFreeParserCtxt(ctxt);
/* Cleanup */
xmlFuzzDataCleanup();
return(0);
}