From 23f05e0c33987d6605387b300c4be5da2120a7ab Mon Sep 17 00:00:00 2001 From: Daniel Veillard Date: Tue, 19 Feb 2013 02:21:49 +0000 Subject: Detect excessive entities expansion upon replacement If entities expansion in the XML parser is asked for, it is possble to craft relatively small input document leading to excessive on-the-fly content generation. This patch accounts for those replacement and stop parsing after a given threshold. it can be bypassed as usual with the HUGE parser option. --- diff --git a/include/libxml/parser.h b/include/libxml/parser.h index e1346e4..3f5730d 100644 --- a/include/libxml/parser.h +++ b/include/libxml/parser.h @@ -308,6 +308,8 @@ struct _xmlParserCtxt { int nodeInfoNr; /* Depth of the parsing stack */ int nodeInfoMax; /* Max depth of the parsing stack */ xmlParserNodeInfo *nodeInfoTab; /* array of nodeInfos */ + + unsigned long sizeentcopy; /* volume of entity copy */ }; /** diff --git a/parser.c b/parser.c index 91f8c90..ddf3b5b 100644 --- a/parser.c +++ b/parser.c @@ -122,7 +122,7 @@ xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, */ static int xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size, - xmlEntityPtr ent) + xmlEntityPtr ent, size_t replacement) { size_t consumed = 0; @@ -130,7 +130,24 @@ xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size, return (0); if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) return (1); - if (size != 0) { + if (replacement != 0) { + if (replacement < XML_MAX_TEXT_LENGTH) + return(0); + + /* + * If the volume of entity copy reaches 10 times the + * amount of parsed data and over the large text threshold + * then that's very likely to be an abuse. + */ + if (ctxt->input != NULL) { + consumed = ctxt->input->consumed + + (ctxt->input->cur - ctxt->input->base); + } + consumed += ctxt->sizeentities; + + if (replacement < XML_PARSER_NON_LINEAR * consumed) + return(0); + } else if (size != 0) { /* * Do the check based on the replacement size of the entity */ @@ -176,7 +193,6 @@ xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size, */ return (0); } - xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); return (1); } @@ -2743,7 +2759,7 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, while (*current != 0) { /* non input consuming loop */ buffer[nbchars++] = *current++; if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { - if (xmlParserEntityCheck(ctxt, nbchars, ent)) + if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) goto int_error; growBuffer(buffer, XML_PARSER_BUFFER_SIZE); } @@ -2785,7 +2801,7 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, while (*current != 0) { /* non input consuming loop */ buffer[nbchars++] = *current++; if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { - if (xmlParserEntityCheck(ctxt, nbchars, ent)) + if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) goto int_error; growBuffer(buffer, XML_PARSER_BUFFER_SIZE); } @@ -7203,7 +7219,7 @@ xmlParseReference(xmlParserCtxtPtr ctxt) { xmlFreeNodeList(list); return; } - if (xmlParserEntityCheck(ctxt, 0, ent)) { + if (xmlParserEntityCheck(ctxt, 0, ent, 0)) { xmlFreeNodeList(list); return; } @@ -7361,6 +7377,13 @@ xmlParseReference(xmlParserCtxtPtr ctxt) { xmlNodePtr nw = NULL, cur, firstChild = NULL; /* + * We are copying here, make sure there is no abuse + */ + ctxt->sizeentcopy += ent->length; + if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy)) + return; + + /* * when operating on a reader, the entities definitions * are always owning the entities subtree. if (ctxt->parseMode == XML_PARSE_READER) @@ -7400,6 +7423,14 @@ xmlParseReference(xmlParserCtxtPtr ctxt) { } else if ((list == NULL) || (ctxt->inputNr > 0)) { xmlNodePtr nw = NULL, cur, next, last, firstChild = NULL; + + /* + * We are copying here, make sure there is no abuse + */ + ctxt->sizeentcopy += ent->length; + if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy)) + return; + /* * Copy the entity child list and make it the new * entity child list. The goal is to make sure any @@ -14767,6 +14798,7 @@ xmlCtxtReset(xmlParserCtxtPtr ctxt) ctxt->catalogs = NULL; ctxt->nbentities = 0; ctxt->sizeentities = 0; + ctxt->sizeentcopy = 0; xmlInitNodeInfoSeq(&ctxt->node_seq); if (ctxt->attsDefault != NULL) { diff --git a/parserInternals.c b/parserInternals.c index 02032d5..f8a7041 100644 --- a/parserInternals.c +++ b/parserInternals.c @@ -1757,6 +1757,8 @@ ctxt->charset = XML_CHAR_ENCODING_UTF8; ctxt->catalogs = NULL; ctxt->nbentities = 0; + ctxt->sizeentities = 0; + ctxt->sizeentcopy = 0; xmlInitNodeInfoSeq(&ctxt->node_seq); return(0); } --