Revert "Add support for parsing of large XML data (>= 10MB)"
authorMichael Paquier <michael@paquier.xyz>
Fri, 26 Jan 2024 01:15:32 +0000 (10:15 +0900)
committerMichael Paquier <michael@paquier.xyz>
Fri, 26 Jan 2024 01:15:32 +0000 (10:15 +0900)
This reverts commit 2197d06224a1, following a discussion over a Coverity
report where issues like the "Billion laugh attack" could cause the
backend to waste CPU and memory even if a client applied checks on the
size of the data given in input, and libxml2 does not offer guarantees
that input limits are respected under XML_PARSE_HUGE.

Discussion: https://postgr.es/m/ZbHlgrPLtBZyr_QW@paquier.xyz

contrib/xml2/xpath.c
contrib/xml2/xslt_proc.c
src/backend/utils/adt/xml.c

index a2cec95f3fac5afeec6931ed7e34f0b6f71f5955..a967257546ad9a541ed057bd2956adc0303635c8 100644 (file)
@@ -381,7 +381,7 @@ pgxml_xpath(text *document, xmlChar *xpath, xpath_workspace *workspace)
        {
                workspace->doctree = xmlReadMemory((char *) VARDATA_ANY(document),
                                                                                   docsize, NULL, NULL,
-                                                                                  XML_PARSE_HUGE | XML_PARSE_NOENT);
+                                                                                  XML_PARSE_NOENT);
                if (workspace->doctree != NULL)
                {
                        workspace->ctxt = xmlXPathNewContext(workspace->doctree);
@@ -626,7 +626,7 @@ xpath_table(PG_FUNCTION_ARGS)
                        if (xmldoc)
                                doctree = xmlReadMemory(xmldoc, strlen(xmldoc),
                                                                                NULL, NULL,
-                                                                               XML_PARSE_HUGE | XML_PARSE_NOENT);
+                                                                               XML_PARSE_NOENT);
                        else                            /* treat NULL as not well-formed */
                                doctree = NULL;
 
index 9cbc05db1abc90a4afbc749c64a7e7d515a9d60c..f30a3a42c03e9328d25f44a5d87b8eee76919938 100644 (file)
@@ -87,7 +87,7 @@ xslt_process(PG_FUNCTION_ARGS)
                /* Parse document */
                doctree = xmlReadMemory((char *) VARDATA_ANY(doct),
                                                                VARSIZE_ANY_EXHDR(doct), NULL, NULL,
-                                                               XML_PARSE_HUGE | XML_PARSE_NOENT);
+                                                               XML_PARSE_NOENT);
 
                if (doctree == NULL)
                        xml_ereport(xmlerrcxt, ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION,
@@ -96,7 +96,7 @@ xslt_process(PG_FUNCTION_ARGS)
                /* Same for stylesheet */
                ssdoc = xmlReadMemory((char *) VARDATA_ANY(ssheet),
                                                          VARSIZE_ANY_EXHDR(ssheet), NULL, NULL,
-                                                         XML_PARSE_HUGE | XML_PARSE_NOENT);
+                                                         XML_PARSE_NOENT);
 
                if (ssdoc == NULL)
                        xml_ereport(xmlerrcxt, ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION,
index d3db75eb8772c69d9c3936e30aa2fad6636053cc..f869c680afda645fac9aea5a89843ff515b16061 100644 (file)
@@ -1688,8 +1688,8 @@ xml_doctype_in_content(const xmlChar *str)
  * xmloption_arg, but a DOCTYPE node in the input can force DOCUMENT mode).
  *
  * If parsed_nodes isn't NULL and the input is not an XML document, the list
- * of parsed nodes from the xmlParseInNodeContext call will be returned to
- * *parsed_nodes.
+ * of parsed nodes from the xmlParseBalancedChunkMemory call will be returned
+ * to *parsed_nodes.
  *
  * Errors normally result in ereport(ERROR), but if escontext is an
  * ErrorSaveContext, then "safe" errors are reported there instead, and the
@@ -1795,7 +1795,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
                        doc = xmlCtxtReadDoc(ctxt, utf8string,
                                                                 NULL,
                                                                 "UTF-8",
-                                                                XML_PARSE_NOENT | XML_PARSE_DTDATTR | XML_PARSE_HUGE
+                                                                XML_PARSE_NOENT | XML_PARSE_DTDATTR
                                                                 | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS));
                        if (doc == NULL || xmlerrcxt->err_occurred)
                        {
@@ -1828,30 +1828,10 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
                        /* allow empty content */
                        if (*(utf8string + count))
                        {
-                               const char *data;
-                               xmlNodePtr      root;
-                               xmlNodePtr      lst;
-                               xmlParserErrors xml_error;
-
-                               data = (const char *) (utf8string + count);
-
-                               /*
-                                * Create a fake root node.  The xmlNewDoc() function creates
-                                * an XML document without any nodes, and this is required for
-                                * xmlParseInNodeContext() that is able to handle
-                                * XML_PARSE_HUGE.
-                                */
-                               root = xmlNewNode(NULL, (const xmlChar *) "content-root");
-                               if (root == NULL || xmlerrcxt->err_occurred)
-                                       xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-                                                               "could not allocate xml node");
-                               xmlDocSetRootElement(doc, root);
-
-                               /* Try to parse string with using root node context. */
-                               xml_error = xmlParseInNodeContext(root, data, strlen(data),
-                                                                                                 XML_PARSE_HUGE,
-                                                                                                 parsed_nodes ? parsed_nodes : &lst);
-                               if (xml_error != XML_ERR_OK || xmlerrcxt->err_occurred)
+                               res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0,
+                                                                                                          utf8string + count,
+                                                                                                          parsed_nodes);
+                               if (res_code != 0 || xmlerrcxt->err_occurred)
                                {
                                        xml_errsave(escontext, xmlerrcxt,
                                                                ERRCODE_INVALID_XML_CONTENT,
@@ -4364,7 +4344,7 @@ xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
                        xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
                                                "could not allocate parser context");
                doc = xmlCtxtReadMemory(ctxt, (char *) string + xmldecl_len,
-                                                               len - xmldecl_len, NULL, NULL, XML_PARSE_HUGE);
+                                                               len - xmldecl_len, NULL, NULL, 0);
                if (doc == NULL || xmlerrcxt->err_occurred)
                        xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
                                                "could not parse XML document");
@@ -4695,7 +4675,7 @@ XmlTableSetDocument(TableFuncScanState *state, Datum value)
 
        PG_TRY();
        {
-               doc = xmlCtxtReadMemory(xtCxt->ctxt, (char *) xstr, length, NULL, NULL, XML_PARSE_HUGE);
+               doc = xmlCtxtReadMemory(xtCxt->ctxt, (char *) xstr, length, NULL, NULL, 0);
                if (doc == NULL || xtCxt->xmlerrcxt->err_occurred)
                        xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
                                                "could not parse XML document");