summaryrefslogtreecommitdiff
path: root/contrib/xml
diff options
context:
space:
mode:
authorBruce Momjian2004-03-05 03:57:58 +0000
committerBruce Momjian2004-03-05 03:57:58 +0000
commit31f4b59a464808ab0fec0ffb2eaa723321ea1af7 (patch)
tree004f71d1eb77899fa9e16ac8047189dcde6576e5 /contrib/xml
parentadca025c9ec4b3050411eb74a5b4f9c20a4ce2b5 (diff)
Move new version of contrib/ xml into xml2, keep old version in /xml.
Diffstat (limited to 'contrib/xml')
-rw-r--r--contrib/xml/Makefile18
-rw-r--r--contrib/xml/README.pgxml179
-rw-r--r--contrib/xml/TODO78
-rw-r--r--contrib/xml/pgxml.c352
-rw-r--r--contrib/xml/pgxml.h42
-rw-r--r--contrib/xml/pgxml.sql.in57
-rw-r--r--contrib/xml/pgxml_dom.c265
-rw-r--r--contrib/xml/pgxml_dom.sql.in10
-rw-r--r--contrib/xml/xpath.c893
-rw-r--r--contrib/xml/xslt_proc.c184
10 files changed, 747 insertions, 1331 deletions
diff --git a/contrib/xml/Makefile b/contrib/xml/Makefile
deleted file mode 100644
index 9177ca865c..0000000000
--- a/contrib/xml/Makefile
+++ /dev/null
@@ -1,18 +0,0 @@
-# This makefile will build the new XML and XSLT routines.
-subdir = contrib/xml
-top_builddir = ../../
-include $(top_builddir)/src/Makefile.global
-
-MODULE_big = pgxml
-
-# Remove xslt_proc.o from the following line if you don't have libxslt
-OBJS = xpath.o xslt_proc.o
-
-# Remove -lxslt from the following line if you don't have libxslt.
-SHLIB_LINK = -lxml2 -lxslt
-
-DATA_built = pgxml.sql
-DOCS = README.pgxml
-
-include $(top_builddir)contrib/contrib-global.mk
-
diff --git a/contrib/xml/README.pgxml b/contrib/xml/README.pgxml
deleted file mode 100644
index f29d071722..0000000000
--- a/contrib/xml/README.pgxml
+++ /dev/null
@@ -1,179 +0,0 @@
-XML-handling functions for PostgreSQL
-=====================================
-
-Development of this module was sponsored by Torchbox Ltd. (www.torchbox.com)
-
-This version of the XML functions provides both XPath querying and
-XSLT functionality. There is also a new table function which allows
-the straightforward return of multiple XML results. Note that the current code
-doesn't take any particular care over character sets - this is
-something that should be fixed at some point!
-
-Installation
-------------
-
-The current build process will only work if the files are in
-contrib/xml in a PostgreSQL 7.3 or 7.4 source tree which has been
-configured and built (If you alter the subdir value in the Makefile
-you can place it in a different directory in a PostgreSQL tree).
-
-Before you begin, just check the Makefile, and then just 'make' and
-'make install'.
-
-This code requires libxml to be previously installed.
-
-Description of functions
-------------------------
-
-The first set of functions are straightforward XML parsing and XPath queries:
-
-pgxml_parse(document) RETURNS bool
-
-This parses the document text in its parameter and returns true if the
-document is well-formed XML.
-
-xpath_string(document,query) RETURNS text
-xpath_number(document,query) RETURNS float4
-xpath_bool(document,query) RETURNS bool
-
-These functions evaluate the XPath query on the supplied document, and
-cast the result to the specified type.
-
-
-xpath_nodeset(document,query,toptag,itemtag) RETURNS text
-
-This evaluates query on document and wraps the result in XML tags. If
-the result is multivalued, the output will look like:
-
-<toptag>
-<itemtag>Value 1 which could be an XML fragment</itemtag>
-<itemtag>Value 2....</itemtag>
-</toptag>
-
-If either toptag or itemtag is an empty string, the relevant tag is omitted.
-There are also wrapper functions for this operation:
-
-xpath_nodeset(document,query) RETURNS text omits both tags.
-xpath_nodeset(document,query,itemtag) RETURNS text omits toptag.
-
-
-xpath_list(document,query,seperator) RETURNS text
-
-This function returns multiple values seperated by the specified
-seperator, e.g. Value 1,Value 2,Value 3 if seperator=','.
-
-xpath_list(document,query) RETURNS text
-
-This is a wrapper for the above function that uses ',' as the seperator.
-
-
-xpath_table
------------
-
-This is a table function which evaluates a set of XPath queries on
-each of a set of documents and returns the results as a table. The
-primary key field from the original document table is returned as the
-first column of the result so that the resultset from xpath_table can
-be readily used in joins.
-
-The function itself takes 5 arguments, all text.
-
-xpath_table(key,document,relation,xpaths,criteria)
-
-key - the name of the "key" field - this is just a field to be used as
-the first column of the output table i.e. it identifies the record from
-which each output row came.
-
-document - the name of the field containing the XML document
-
-relation - the name of the table or view containing the documents
-
-xpaths - multiple xpath expressions separated by |
-
-criteria - The contents of the where clause. This needs to be specified,
-so use "true" or "1=1" here if you want to process all the rows in the
-relation.
-
-NB These parameters (except the XPath strings) are just substituted
-into a plain SQL SELECT statement, so you have some flexibility - the
-statement is
-
-SELECT <key>,<document> FROM <relation> WHERE <criteria>
-
-so those parameters can be *anything* valid in those particular
-locations. The result from this SELECT needs to return exactly two
-columns (which it will unless you try to list multiple fields for key
-or document). Beware that this simplistic approach requires that you
-validate any user-supplied values to avoid SQL injection attacks.
-
-Using the function
-
-The function has to be used in a FROM expression. This gives the following
-form:
-
-SELECT * FROM
-xpath_table('article_id',
- 'article_xml',
- 'articles',
- '/article/author|/article/pages|/article/title',
- 'date_entered > ''2003-01-01'' ')
-AS t(article_id integer, author text, page_count integer, title text);
-
-The AS clause defines the names and types of the columns in the
-virtual table. If there are more XPath queries than result columns,
-the extra queries will be ignored. If there are more result columns
-than XPath queries, the extra columns will be NULL.
-
-Note that I've said in this example that pages is an integer. The
-function deals internally with string representations, so when you say
-you want an integer in the output, it will take the string
-representation of the XPath result and use PostgreSQL input functions
-to transform it into an integer (or whatever type the AS clause
-requests). An error will result if it can't do this - for example if
-the result is empty - so you may wish to just stick to 'text' as the
-column type if you think your data has any problems.
-
-The select statement doesn't need to use * alone - it can reference the
-columns by name or join them to other tables. The function produces a
-virtual table with which you can perform any operation you wish (e.g.
-aggregation, joining, sorting etc). So we could also have:
-
-SELECT t.title, p.fullname, p.email
-FROM xpath_table('article_id','article_xml','articles',
- '/article/title|/article/author/@id',
- 'xpath_string(article_xml,''/article/@date'') > ''2003-03-20'' ')
- AS t(article_id integer, title text, author_id integer),
- tblPeopleInfo AS p
-WHERE t.author_id = p.person_id;
-
-as a more complicated example. Of course, you could wrap all
-of this in a view for convenience.
-
-XSLT functions
---------------
-
-The following functions are available if libxslt is installed (this is
-not currently detected automatically, so you will have to amend the
-Makefile)
-
-xslt_process(document,stylesheet,paramlist) RETURNS text
-
-This function appplies the XSL stylesheet to the document and returns
-the transformed result. The paramlist is a list of parameter
-assignments to be used in the transformation, specified in the form
-'a=1,b=2'. Note that this is also proof-of-concept code and the
-parameter parsing is very simple-minded (e.g. parameter values cannot
-contain commas!)
-
-Also note that if either the document or stylesheet values do not
-begin with a < then they will be treated as URLs and libxslt will
-fetch them. It thus follows that you can use xslt_process as a means
-to fetch the contents of URLs - you should be aware of the security
-implications of this.
-
-There is also a two-parameter version of xslt_process which does not
-pass any parameters to the transformation.
-
-If you have any comments or suggestions, please do contact me at
-jgray@azuli.co.uk. Unfortunately, this isn't my main job, so I can't
-guarantee a rapid response to your query!
diff --git a/contrib/xml/TODO b/contrib/xml/TODO
new file mode 100644
index 0000000000..5ddd62a658
--- /dev/null
+++ b/contrib/xml/TODO
@@ -0,0 +1,78 @@
+PGXML TODO List
+===============
+
+Some of these items still require much more thought! Since the first
+release, the XPath support has improved (because I'm no longer using a
+homemade algorithm!).
+
+1. Performance considerations
+
+At present each document is parsed to produce the DOM tree on every query.
+
+Pros:
+ Easy
+ No persistent memory or storage allocation for parsed trees
+ (libxml docs suggest representation of a document might
+ be 4 times the size of the text)
+
+Cons:
+ Slow/ CPU intensive to parse.
+ Makes it difficult for PLs to apply libxml manipulations to create
+ new documents or amend existing ones.
+
+
+2. XQuery
+
+I'm not sure if the addition of XQuery would be best as a function or
+as a new front-end parser. This is one to think about, but with a
+decent implementation of XPath, one of the prerequisites is covered.
+
+3. DOM Interfaces
+
+Expose more aspects of the DOM to user functions/ PLs. This would
+allow a procedure in a PL to run some queries and then use exposed
+interfaces to libxml to create an XML document out of the query
+results. I accept the argument that this might be more properly
+performed on the client side.
+
+4. Returning sets of documents from XPath queries.
+
+Although the current implementation allows you to amalgamate the
+returned results into a single document, it's quite possible that
+you'd like to use the returned set of nodes as a source for FROM.
+
+Is there a good way to optimise/index the results of certain XPath
+operations to make them faster?:
+
+select docid, pgxml_xpath(document,'//site/location/text()','','') as location
+where pgxml_xpath(document,'//site/name/text()','','') = 'Church Farm';
+
+and with multiple element occurences in a document?
+
+select d.docid, pgxml_xpath(d.document,'//site/location/text()','','')
+from docstore d,
+pgxml_xpaths('docstore','document','//feature/type/text()','docid') ft
+where ft.key = d.docid and ft.value ='Limekiln';
+
+pgxml_xpaths params are relname, attrname, xpath, returnkey. It would
+return a set of two-element tuples (key,value) consisting of the value of
+returnkey, and the cdata value of the xpath. The XML document would be
+defined by relname and attrname.
+
+The pgxml_xpaths function could be the basis of a functional index,
+which could speed up the above query very substantially, working
+through the normal query planner mechanism.
+
+5. Return type support.
+
+Better support for returning e.g. numeric or boolean values. I need to
+get to grips with the returned data from libxml first.
+
+
+John Gray <jgray@azuli.co.uk> 16 August 2001
+
+
+
+
+
+
diff --git a/contrib/xml/pgxml.c b/contrib/xml/pgxml.c
new file mode 100644
index 0000000000..4d8c3b96bc
--- /dev/null
+++ b/contrib/xml/pgxml.c
@@ -0,0 +1,352 @@
+/********************************************************
+ * Interface code to parse an XML document using expat
+ ********************************************************/
+
+#include "postgres.h"
+#include "fmgr.h"
+
+#include "expat.h"
+#include "pgxml.h"
+
+/* Memory management - we make expat use standard pg MM */
+
+XML_Memory_Handling_Suite mhs;
+
+/* passthrough functions (palloc is a macro) */
+
+static void *
+pgxml_palloc(size_t size)
+{
+ return palloc(size);
+}
+
+static void *
+pgxml_repalloc(void *ptr, size_t size)
+{
+ return repalloc(ptr, size);
+}
+
+static void
+pgxml_pfree(void *ptr)
+{
+ return pfree(ptr);
+}
+
+static void
+pgxml_mhs_init()
+{
+ mhs.malloc_fcn = pgxml_palloc;
+ mhs.realloc_fcn = pgxml_repalloc;
+ mhs.free_fcn = pgxml_pfree;
+}
+
+static void
+pgxml_handler_init()
+{
+ /*
+ * This code should set up the relevant handlers from user-supplied
+ * settings. Quite how these settings are made is another matter :)
+ */
+}
+
+/* Returns true if document is well-formed */
+
+PG_FUNCTION_INFO_V1(pgxml_parse);
+
+Datum
+pgxml_parse(PG_FUNCTION_ARGS)
+{
+ /* called as pgxml_parse(document) */
+ XML_Parser p;
+ text *t = PG_GETARG_TEXT_P(0); /* document buffer */
+ int32 docsize = VARSIZE(t) - VARHDRSZ;
+
+ pgxml_mhs_init();
+
+ pgxml_handler_init();
+
+ p = XML_ParserCreate_MM(NULL, &mhs, NULL);
+ if (!p)
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
+ errmsg("could not create expat parser")));
+ PG_RETURN_NULL(); /* seems appropriate if we couldn't parse */
+ }
+
+ if (!XML_Parse(p, (char *) VARDATA(t), docsize, 1))
+ {
+ /*
+ * elog(WARNING, "Parse error at line %d:%s",
+ * XML_GetCurrentLineNumber(p),
+ * XML_ErrorString(XML_GetErrorCode(p)));
+ */
+ XML_ParserFree(p);
+ PG_RETURN_BOOL(false);
+ }
+
+ XML_ParserFree(p);
+ PG_RETURN_BOOL(true);
+}
+
+/* XPath handling functions */
+
+/* XPath support here is for a very skeletal kind of XPath!
+ It was easy to program though... */
+
+/* This first is the core function that builds a result set. The
+ actual functions called by the user manipulate that result set
+ in various ways.
+*/
+
+static XPath_Results *
+build_xpath_results(text *doc, text *pathstr)
+{
+ XPath_Results *xpr;
+ char *res;
+ pgxml_udata *udata;
+ XML_Parser p;
+ int32 docsize;
+
+ xpr = (XPath_Results *) palloc((sizeof(XPath_Results)));
+ memset((void *) xpr, 0, sizeof(XPath_Results));
+ xpr->rescount = 0;
+
+ docsize = VARSIZE(doc) - VARHDRSZ;
+
+ /* res isn't going to be the real return type, it is just a buffer */
+
+ res = (char *) palloc(docsize);
+ memset((void *) res, 0, docsize);
+
+ xpr->resbuf = res;
+
+ udata = (pgxml_udata *) palloc((sizeof(pgxml_udata)));
+ memset((void *) udata, 0, sizeof(pgxml_udata));
+
+ udata->currentpath[0] = '\0';
+ udata->textgrab = 0;
+
+ udata->path = (char *) palloc(VARSIZE(pathstr));
+ memcpy(udata->path, VARDATA(pathstr), VARSIZE(pathstr) - VARHDRSZ);
+
+ udata->path[VARSIZE(pathstr) - VARHDRSZ] = '\0';
+
+ udata->resptr = res;
+ udata->reslen = 0;
+
+ udata->xpres = xpr;
+
+ /* Now fire up the parser */
+ pgxml_mhs_init();
+
+ p = XML_ParserCreate_MM(NULL, &mhs, NULL);
+ if (!p)
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
+ errmsg("could not create expat parser")));
+ pfree(xpr);
+ pfree(udata->path);
+ pfree(udata);
+ pfree(res);
+ return NULL;
+ }
+ XML_SetUserData(p, (void *) udata);
+
+ /* Set the handlers */
+
+ XML_SetElementHandler(p, pgxml_starthandler, pgxml_endhandler);
+ XML_SetCharacterDataHandler(p, pgxml_charhandler);
+
+ if (!XML_Parse(p, (char *) VARDATA(doc), docsize, 1))
+ {
+ /*
+ * elog(WARNING, "Parse error at line %d:%s",
+ * XML_GetCurrentLineNumber(p),
+ * XML_ErrorString(XML_GetErrorCode(p)));
+ */
+ XML_ParserFree(p);
+ pfree(xpr);
+ pfree(udata->path);
+ pfree(udata);
+
+ return NULL;
+ }
+
+ pfree(udata->path);
+ pfree(udata);
+ XML_ParserFree(p);
+ return xpr;
+}
+
+
+PG_FUNCTION_INFO_V1(pgxml_xpath);
+
+Datum
+pgxml_xpath(PG_FUNCTION_ARGS)
+{
+ /* called as pgxml_xpath(document,pathstr, index) for the moment */
+
+ XPath_Results *xpresults;
+ text *restext;
+
+ text *t = PG_GETARG_TEXT_P(0); /* document buffer */
+ text *t2 = PG_GETARG_TEXT_P(1);
+ int32 ind = PG_GETARG_INT32(2) - 1;
+
+ xpresults = build_xpath_results(t, t2);
+
+ /*
+ * This needs to be changed depending on the mechanism for returning
+ * our set of results.
+ */
+
+ if (xpresults == NULL) /* parse error (not WF or parser failure) */
+ PG_RETURN_NULL();
+
+ if (ind >= (xpresults->rescount))
+ PG_RETURN_NULL();
+
+ restext = (text *) palloc(xpresults->reslens[ind] + VARHDRSZ);
+ memcpy(VARDATA(restext), xpresults->results[ind], xpresults->reslens[ind]);
+
+ VARATT_SIZEP(restext) = xpresults->reslens[ind] + VARHDRSZ;
+
+ pfree(xpresults->resbuf);
+ pfree(xpresults);
+
+ PG_RETURN_TEXT_P(restext);
+}
+
+
+static void
+pgxml_pathcompare(void *userData)
+{
+ char *matchpos;
+
+ matchpos = strstr(UD->currentpath, UD->path);
+
+ if (matchpos == NULL)
+ { /* Should we have more logic here ? */
+ if (UD->textgrab)
+ {
+ UD->textgrab = 0;
+ pgxml_finalisegrabbedtext(userData);
+ }
+ return;
+ }
+
+ /*
+ * OK, we have a match of some sort. Now we need to check that our
+ * match is anchored to the *end* of the string AND that it is
+ * immediately preceded by a '/'
+ */
+
+ /*
+ * This test wouldn't work if strlen (UD->path) overran the length of
+ * the currentpath, but that's not possible because we got a match!
+ */
+
+ if ((matchpos + strlen(UD->path))[0] == '\0')
+ {
+ if ((UD->path)[0] == '/')
+ {
+ if (matchpos == UD->currentpath)
+ UD->textgrab = 1;
+ }
+ else
+ {
+ if ((matchpos - 1)[0] == '/')
+ UD->textgrab = 1;
+ }
+ }
+}
+
+static void
+pgxml_starthandler(void *userData, const XML_Char * name,
+ const XML_Char ** atts)
+{
+
+ char sepstr[] = "/";
+
+ if ((strlen(name) + strlen(UD->currentpath)) > MAXPATHLENGTH - 2)
+ elog(WARNING, "path too long");
+ else
+ {
+ strncat(UD->currentpath, sepstr, 1);
+ strcat(UD->currentpath, name);
+ }
+ if (UD->textgrab)
+ {
+ /*
+ * Depending on user preference, should we "reconstitute" the
+ * element into the result text?
+ */
+ }
+ else
+ pgxml_pathcompare(userData);
+}
+
+static void
+pgxml_endhandler(void *userData, const XML_Char * name)
+{
+ /*
+ * Start by removing the current element off the end of the
+ * currentpath
+ */
+
+ char *sepptr;
+
+ sepptr = strrchr(UD->currentpath, '/');
+ if (sepptr == NULL)
+ {
+ /* internal error */
+ elog(ERROR, "did not find '/'");
+ sepptr = UD->currentpath;
+ }
+ if (strcmp(name, sepptr + 1) != 0)
+ {
+ elog(WARNING, "wanted [%s], got [%s]", sepptr, name);
+ /* unmatched entry, so do nothing */
+ }
+ else
+ {
+ sepptr[0] = '\0'; /* Chop that element off the end */
+ }
+
+ if (UD->textgrab)
+ pgxml_pathcompare(userData);
+
+}
+
+static void
+pgxml_charhandler(void *userData, const XML_Char * s, int len)
+{
+ if (UD->textgrab)
+ {
+ if (len > 0)
+ {
+ memcpy(UD->resptr, s, len);
+ UD->resptr += len;
+ UD->reslen += len;
+ }
+ }
+}
+
+/* Should I be using PG list types here? */
+
+static void
+pgxml_finalisegrabbedtext(void *userData)
+{
+ /* In res/reslen, we have a single result. */
+ UD->xpres->results[UD->xpres->rescount] = UD->resptr - UD->reslen;
+ UD->xpres->reslens[UD->xpres->rescount] = UD->reslen;
+ UD->reslen = 0;
+ UD->xpres->rescount++;
+
+ /*
+ * This effectively concatenates all the results together but we do
+ * know where one ends and the next begins
+ */
+}
diff --git a/contrib/xml/pgxml.h b/contrib/xml/pgxml.h
new file mode 100644
index 0000000000..2b80124b77
--- /dev/null
+++ b/contrib/xml/pgxml.h
@@ -0,0 +1,42 @@
+/* Header for pg xml parser interface */
+
+static void *pgxml_palloc(size_t size);
+static void *pgxml_repalloc(void *ptr, size_t size);
+static void pgxml_pfree(void *ptr);
+static void pgxml_mhs_init();
+static void pgxml_handler_init();
+Datum pgxml_parse(PG_FUNCTION_ARGS);
+Datum pgxml_xpath(PG_FUNCTION_ARGS);
+static void pgxml_starthandler(void *userData, const XML_Char * name,
+ const XML_Char ** atts);
+static void pgxml_endhandler(void *userData, const XML_Char * name);
+static void pgxml_charhandler(void *userData, const XML_Char * s, int len);
+static void pgxml_pathcompare(void *userData);
+static void pgxml_finalisegrabbedtext(void *userData);
+
+#define MAXPATHLENGTH 512
+#define MAXRESULTS 100
+
+
+typedef struct
+{
+ int rescount;
+ char *results[MAXRESULTS];
+ int32 reslens[MAXRESULTS];
+ char *resbuf; /* pointer to the result buffer for pfree */
+} XPath_Results;
+
+
+
+typedef struct
+{
+ char currentpath[MAXPATHLENGTH];
+ char *path;
+ int textgrab;
+ char *resptr;
+ int32 reslen;
+ XPath_Results *xpres;
+} pgxml_udata;
+
+
+#define UD ((pgxml_udata *) userData)
diff --git a/contrib/xml/pgxml.sql.in b/contrib/xml/pgxml.sql.in
deleted file mode 100644
index ff46e845b1..0000000000
--- a/contrib/xml/pgxml.sql.in
+++ /dev/null
@@ -1,57 +0,0 @@
---SQL for XML parser
-
-CREATE OR REPLACE FUNCTION pgxml_parse(text) RETURNS bool
- AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict);
-
-CREATE OR REPLACE FUNCTION xpath_string(text,text) RETURNS text
- AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict);
-
-CREATE OR REPLACE FUNCTION xpath_nodeset(text,text,text,text) RETURNS text
- AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict);
-
-CREATE OR REPLACE FUNCTION xpath_number(text,text) RETURNS float4
- AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict);
-
-CREATE OR REPLACE FUNCTION xpath_bool(text,text) RETURNS boolean
- AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict);
-
--- List function
-
-CREATE OR REPLACE FUNCTION xpath_list(text,text,text) RETURNS text
- AS 'MODULE_PATHNAME'
- LANGUAGE 'c' WITH (isStrict);
-
-
-CREATE OR REPLACE FUNCTION xpath_list(text,text) RETURNS text
-AS 'SELECT xpath_list($1,$2,'','')' language 'SQL' WITH (isStrict);
-
-
-
--- Wrapper functions for nodeset where no tags needed.
-
-
-CREATE OR REPLACE FUNCTION xpath_nodeset(text,text) RETURNS text AS
-'SELECT xpath_nodeset($1,$2,'''','''')' language 'SQL' WITH (isStrict);
-
-
-CREATE OR REPLACE FUNCTION xpath_nodeset(text,text,text) RETURNS text AS
-'SELECT xpath_nodeset($1,$2,'''',$3)' language 'SQL' WITH (isStrict);
-
--- Table function
-
-CREATE OR REPLACE FUNCTION xpath_table(text,text,text,text,text) RETURNS setof record
- AS 'MODULE_PATHNAME'
- LANGUAGE 'c' WITH (isStrict);
-
--- XSLT functions
--- Delete from here to the end of the file if you are not compiling with
--- XSLT support.
-
-
-CREATE OR REPLACE FUNCTION xslt_process(text,text,text) RETURNS text
- AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict);
-
--- the function checks for the correct argument count
-
-CREATE OR REPLACE FUNCTION xslt_process(text,text) RETURNS text
- AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict);
diff --git a/contrib/xml/pgxml_dom.c b/contrib/xml/pgxml_dom.c
new file mode 100644
index 0000000000..2b11b1d646
--- /dev/null
+++ b/contrib/xml/pgxml_dom.c
@@ -0,0 +1,265 @@
+/* Parser interface for DOM-based parser (libxml) rather than
+ stream-based SAX-type parser */
+
+#include "postgres.h"
+#include "fmgr.h"
+
+/* libxml includes */
+
+#include <libxml/xpath.h>
+#include <libxml/tree.h>
+#include <libxml/xmlmemory.h>
+
+/* declarations */
+
+static void *pgxml_palloc(size_t size);
+static void *pgxml_repalloc(void *ptr, size_t size);
+static void pgxml_pfree(void *ptr);
+static char *pgxml_pstrdup(const char *string);
+
+static void pgxml_parser_init();
+
+static xmlChar *pgxmlNodeSetToText(xmlNodeSetPtr nodeset, xmlDocPtr doc,
+ xmlChar * toptagname, xmlChar * septagname,
+ int format);
+
+static xmlChar *pgxml_texttoxmlchar(text *textstring);
+
+
+Datum pgxml_parse(PG_FUNCTION_ARGS);
+Datum pgxml_xpath(PG_FUNCTION_ARGS);
+
+/* memory handling passthrough functions (e.g. palloc, pstrdup are
+ currently macros, and the others might become so...) */
+
+static void *
+pgxml_palloc(size_t size)
+{
+ return palloc(size);
+}
+
+static void *
+pgxml_repalloc(void *ptr, size_t size)
+{
+ return repalloc(ptr, size);
+}
+
+static void
+pgxml_pfree(void *ptr)
+{
+ return pfree(ptr);
+}
+
+static char *
+pgxml_pstrdup(const char *string)
+{
+ return pstrdup(string);
+}
+
+static void
+pgxml_parser_init()
+{
+ /*
+ * This code should also set parser settings from user-supplied info.
+ * Quite how these settings are made is another matter :)
+ */
+
+ xmlMemSetup(pgxml_pfree, pgxml_palloc, pgxml_repalloc, pgxml_pstrdup);
+ xmlInitParser();
+
+}
+
+
+/* Returns true if document is well-formed */
+
+PG_FUNCTION_INFO_V1(pgxml_parse);
+
+Datum
+pgxml_parse(PG_FUNCTION_ARGS)
+{
+ /* called as pgxml_parse(document) */
+ xmlDocPtr doctree;
+ text *t = PG_GETARG_TEXT_P(0); /* document buffer */
+ int32 docsize = VARSIZE(t) - VARHDRSZ;
+
+ pgxml_parser_init();
+
+ doctree = xmlParseMemory((char *) VARDATA(t), docsize);
+ if (doctree == NULL)
+ {
+ xmlCleanupParser();
+ PG_RETURN_BOOL(false); /* i.e. not well-formed */
+ }
+ xmlCleanupParser();
+ xmlFreeDoc(doctree);
+ PG_RETURN_BOOL(true);
+}
+
+static xmlChar
+*
+pgxmlNodeSetToText(xmlNodeSetPtr nodeset,
+ xmlDocPtr doc,
+ xmlChar * toptagname,
+ xmlChar * septagname,
+ int format)
+{
+ /* Function translates a nodeset into a text representation */
+
+ /*
+ * iterates over each node in the set and calls xmlNodeDump to write
+ * it to an xmlBuffer -from which an xmlChar * string is returned.
+ */
+ /* each representation is surrounded by <tagname> ... </tagname> */
+ /* if format==0, add a newline between nodes?? */
+
+ xmlBufferPtr buf;
+ xmlChar *result;
+ int i;
+
+ buf = xmlBufferCreate();
+
+ if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
+ {
+ xmlBufferWriteChar(buf, "<");
+ xmlBufferWriteCHAR(buf, toptagname);
+ xmlBufferWriteChar(buf, ">");
+ }
+ if (nodeset != NULL)
+ {
+ for (i = 0; i < nodeset->nodeNr; i++)
+ {
+ if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
+ {
+ xmlBufferWriteChar(buf, "<");
+ xmlBufferWriteCHAR(buf, septagname);
+ xmlBufferWriteChar(buf, ">");
+ }
+ xmlNodeDump(buf, doc, nodeset->nodeTab[i], 1, (format == 2));
+
+ if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
+ {
+ xmlBufferWriteChar(buf, "</");
+ xmlBufferWriteCHAR(buf, septagname);
+ xmlBufferWriteChar(buf, ">");
+ }
+ if (format)
+ xmlBufferWriteChar(buf, "\n");
+ }
+ }
+
+ if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
+ {
+ xmlBufferWriteChar(buf, "</");
+ xmlBufferWriteCHAR(buf, toptagname);
+ xmlBufferWriteChar(buf, ">");
+ }
+ result = xmlStrdup(buf->content);
+ xmlBufferFree(buf);
+ return result;
+}
+
+static xmlChar *
+pgxml_texttoxmlchar(text *textstring)
+{
+ xmlChar *res;
+ int32 txsize;
+
+ txsize = VARSIZE(textstring) - VARHDRSZ;
+ res = (xmlChar *) palloc(txsize + 1);
+ memcpy((char *) res, VARDATA(textstring), txsize);
+ res[txsize] = '\0';
+ return res;
+}
+
+
+PG_FUNCTION_INFO_V1(pgxml_xpath);
+
+Datum
+pgxml_xpath(PG_FUNCTION_ARGS)
+{
+ xmlDocPtr doctree;
+ xmlXPathContextPtr ctxt;
+ xmlXPathObjectPtr res;
+ xmlChar *xpath,
+ *xpresstr,
+ *toptag,
+ *septag;
+ xmlXPathCompExprPtr comppath;
+
+ int32 docsize,
+ ressize;
+ text *t,
+ *xpres;
+
+ t = PG_GETARG_TEXT_P(0); /* document buffer */
+ xpath = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(1)); /* XPath expression */
+ toptag = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(2));
+ septag = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(3));
+
+ docsize = VARSIZE(t) - VARHDRSZ;
+
+ pgxml_parser_init();
+
+ doctree = xmlParseMemory((char *) VARDATA(t), docsize);
+ if (doctree == NULL)
+ { /* not well-formed */
+ xmlCleanupParser();
+ PG_RETURN_NULL();
+ }
+
+ ctxt = xmlXPathNewContext(doctree);
+ ctxt->node = xmlDocGetRootElement(doctree);
+
+ /* compile the path */
+ comppath = xmlXPathCompile(xpath);
+ if (comppath == NULL)
+ {
+ elog(WARNING, "XPath syntax error");
+ xmlFreeDoc(doctree);
+ pfree((void *) xpath);
+ xmlCleanupParser();
+ PG_RETURN_NULL();
+ }
+
+ /* Now evaluate the path expression. */
+ res = xmlXPathCompiledEval(comppath, ctxt);
+ xmlXPathFreeCompExpr(comppath);
+
+ if (res == NULL)
+ {
+ xmlFreeDoc(doctree);
+ pfree((void *) xpath);
+ xmlCleanupParser();
+ PG_RETURN_NULL(); /* seems appropriate */
+ }
+ /* now we dump this node, ?surrounding by tags? */
+ /* To do this, we look first at the type */
+ switch (res->type)
+ {
+ case XPATH_NODESET:
+ xpresstr = pgxmlNodeSetToText(res->nodesetval,
+ doctree,
+ toptag, septag, 0);
+ break;
+ case XPATH_STRING:
+ xpresstr = xmlStrdup(res->stringval);
+ break;
+ default:
+ elog(WARNING, "Unsupported XQuery result: %d", res->type);
+ xpresstr = xmlStrdup("<unsupported/>");
+ }
+
+
+ /* Now convert this result back to text */
+ ressize = strlen(xpresstr);
+ xpres = (text *) palloc(ressize + VARHDRSZ);
+ memcpy(VARDATA(xpres), xpresstr, ressize);
+ VARATT_SIZEP(xpres) = ressize + VARHDRSZ;
+
+ /* Free various storage */
+ xmlFreeDoc(doctree);
+ pfree((void *) xpath);
+ xmlFree(xpresstr);
+ xmlCleanupParser();
+ PG_RETURN_TEXT_P(xpres);
+}
diff --git a/contrib/xml/pgxml_dom.sql.in b/contrib/xml/pgxml_dom.sql.in
new file mode 100644
index 0000000000..514643b936
--- /dev/null
+++ b/contrib/xml/pgxml_dom.sql.in
@@ -0,0 +1,10 @@
+-- SQL for XML parser
+
+-- Adjust this setting to control where the objects get created.
+SET search_path TO public;
+
+CREATE OR REPLACE FUNCTION pgxml_parse(text) RETURNS boolean
+ AS 'MODULE_PATHNAME' LANGUAGE c STRICT;
+
+CREATE OR REPLACE FUNCTION pgxml_xpath(text, text, text, text) RETURNS text
+ AS 'MODULE_PATHNAME' LANGUAGE c STRICT;
diff --git a/contrib/xml/xpath.c b/contrib/xml/xpath.c
deleted file mode 100644
index b4fc828798..0000000000
--- a/contrib/xml/xpath.c
+++ /dev/null
@@ -1,893 +0,0 @@
-/* Parser interface for DOM-based parser (libxml) rather than
- stream-based SAX-type parser */
-
-#include "postgres.h"
-#include "fmgr.h"
-#include "executor/spi.h"
-#include "funcapi.h"
-#include "miscadmin.h"
-#include "lib/stringinfo.h"
-
-/* libxml includes */
-
-#include <libxml/xpath.h>
-#include <libxml/tree.h>
-#include <libxml/xmlmemory.h>
-#include <libxml/xmlerror.h>
-#include <libxml/parserInternals.h>
-
-/* declarations */
-
-static void *pgxml_palloc(size_t size);
-static void *pgxml_repalloc(void *ptr, size_t size);
-static void pgxml_pfree(void *ptr);
-static char *pgxml_pstrdup(const char *string);
-static void pgxml_errorHandler (void * ctxt, const char *msg, ...);
-
-void elog_error(int level, char *explain, int force);
-void pgxml_parser_init(void);
-
-static xmlChar *pgxmlNodeSetToText(xmlNodeSetPtr nodeset,
- xmlChar * toptagname, xmlChar * septagname,
- xmlChar * plainsep);
-
-text *pgxml_result_to_text(xmlXPathObjectPtr res, xmlChar *toptag,
- xmlChar *septag, xmlChar *plainsep);
-
-xmlChar *pgxml_texttoxmlchar(text *textstring);
-
-static xmlXPathObjectPtr pgxml_xpath(text *document, xmlChar* xpath);
-
-
-Datum pgxml_parse(PG_FUNCTION_ARGS);
-Datum xpath_nodeset(PG_FUNCTION_ARGS);
-Datum xpath_string(PG_FUNCTION_ARGS);
-Datum xpath_number(PG_FUNCTION_ARGS);
-Datum xpath_bool(PG_FUNCTION_ARGS);
-Datum xpath_list(PG_FUNCTION_ARGS);
-Datum xpath_table(PG_FUNCTION_ARGS);
-
-/* Global variables */
-char *errbuf; /* per line error buffer */
-char *pgxml_errorMsg = NULL; /* overall error message */
-
-/* Convenience macros */
-
-#define GET_TEXT(cstrp) DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(cstrp)))
-#define GET_STR(textp) DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp)))
-
-#define ERRBUF_SIZE 200
-
-/* memory handling passthrough functions (e.g. palloc, pstrdup are
- currently macros, and the others might become so...) */
-
-static void *
-pgxml_palloc(size_t size)
-{
-/* elog(DEBUG1,"Alloc %d in CMC %x",size,CurrentMemoryContext); */
- return palloc(size);
-}
-
-static void *
-pgxml_repalloc(void *ptr, size_t size)
-{
-/* elog(DEBUG1,"ReAlloc in CMC %x",CurrentMemoryContext);*/
- return repalloc(ptr, size);
-}
-
-static void
-pgxml_pfree(void *ptr)
-{
-/* elog(DEBUG1,"Free in CMC %x",CurrentMemoryContext); */
- return pfree(ptr);
-}
-
-static char *
-pgxml_pstrdup(const char *string)
-{
- return pstrdup(string);
-}
-
-/* The error handling function. This formats an error message and sets
- * a flag - an ereport will be issued prior to return
- */
-
-static void
-pgxml_errorHandler (void * ctxt, const char *msg, ...)
-{
- va_list args;
-
- va_start(args, msg);
- vsnprintf(errbuf, ERRBUF_SIZE, msg, args);
- va_end(args);
- /* Now copy the argument across */
- if (pgxml_errorMsg == NULL)
- {
- pgxml_errorMsg = pstrdup(errbuf);
- }
-else
- {
- int32 xsize = strlen(pgxml_errorMsg);
- pgxml_errorMsg = repalloc(pgxml_errorMsg,
- (size_t) (xsize + strlen(errbuf) + 1));
- strncpy(&pgxml_errorMsg[xsize-1],errbuf,strlen(errbuf));
- pgxml_errorMsg[xsize+strlen(errbuf)-1]='\0';
-
- }
- memset(errbuf,0,ERRBUF_SIZE);
-}
-
-/* This function reports the current message at the level specified */
-void elog_error(int level, char *explain, int force)
-{
- if (force || (pgxml_errorMsg != NULL))
- {
- if (pgxml_errorMsg == NULL)
- {
- ereport(level,(errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
- errmsg(explain)));
- }
- else
- {
- ereport(level,(errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
- errmsg("%s:%s",explain,pgxml_errorMsg)));
- pfree(pgxml_errorMsg);
- }
- }
-}
-
-void
-pgxml_parser_init()
-{
- /*
- * This code could also set parser settings from user-supplied info.
- * Quite how these settings are made is another matter :)
- */
-
- xmlMemSetup(pgxml_pfree, pgxml_palloc, pgxml_repalloc, pgxml_pstrdup);
- xmlInitParser();
-
- xmlSetGenericErrorFunc(NULL, pgxml_errorHandler);
-
- xmlSubstituteEntitiesDefault(1);
- xmlLoadExtDtdDefaultValue = 1;
-
- pgxml_errorMsg = NULL;
-
- errbuf = palloc(200);
- memset(errbuf,0,200);
-
-}
-
-
-/* Returns true if document is well-formed */
-
-PG_FUNCTION_INFO_V1(pgxml_parse);
-
-Datum
-pgxml_parse(PG_FUNCTION_ARGS)
-{
- /* called as pgxml_parse(document) */
- xmlDocPtr doctree;
- text *t = PG_GETARG_TEXT_P(0); /* document buffer */
- int32 docsize = VARSIZE(t) - VARHDRSZ;
-
- pgxml_parser_init();
-
- doctree = xmlParseMemory((char *) VARDATA(t), docsize);
- if (doctree == NULL)
- {
- xmlCleanupParser();
- PG_RETURN_BOOL(false); /* i.e. not well-formed */
- }
- xmlCleanupParser();
- xmlFreeDoc(doctree);
- PG_RETURN_BOOL(true);
-}
-
-
-static xmlChar
-*
-pgxmlNodeSetToText(xmlNodeSetPtr nodeset,
- xmlChar * toptagname,
- xmlChar * septagname,
- xmlChar * plainsep)
-{
- /* Function translates a nodeset into a text representation */
-
- /*
- * iterates over each node in the set and calls xmlNodeDump to write
- * it to an xmlBuffer -from which an xmlChar * string is returned.
- */
-
- /* each representation is surrounded by <tagname> ... </tagname> */
- /* plainsep is an ordinary (not tag) seperator - if used, then
- * nodes are cast to string as output method */
-
-
- xmlBufferPtr buf;
- xmlChar *result;
- int i;
-
- buf = xmlBufferCreate();
-
- if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
- {
- xmlBufferWriteChar(buf, "<");
- xmlBufferWriteCHAR(buf, toptagname);
- xmlBufferWriteChar(buf, ">");
- }
- if (nodeset != NULL)
- {
- for (i = 0; i < nodeset->nodeNr; i++)
- {
-
- if (plainsep != NULL) {
- xmlBufferWriteCHAR(buf,
- xmlXPathCastNodeToString(nodeset->nodeTab[i]));
-
- /* If this isn't the last entry, write the plain sep. */
- if (i < (nodeset->nodeNr)-1) {
- xmlBufferWriteChar(buf, plainsep);
- }
- } else {
-
-
- if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
- {
- xmlBufferWriteChar(buf, "<");
- xmlBufferWriteCHAR(buf, septagname);
- xmlBufferWriteChar(buf, ">");
- }
- xmlNodeDump(buf,
- nodeset->nodeTab[i]->doc,
- nodeset->nodeTab[i],
- 1, 0);
-
- if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
- {
- xmlBufferWriteChar(buf, "</");
- xmlBufferWriteCHAR(buf, septagname);
- xmlBufferWriteChar(buf, ">");
- }
- }
- }
- }
-
- if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
- {
- xmlBufferWriteChar(buf, "</");
- xmlBufferWriteCHAR(buf, toptagname);
- xmlBufferWriteChar(buf, ">");
- }
- result = xmlStrdup(buf->content);
- xmlBufferFree(buf);
- return result;
-}
-
-
-/* Translate a PostgreSQL "varlena" -i.e. a variable length parameter
- * into the libxml2 representation
- */
-
-xmlChar *
-pgxml_texttoxmlchar(text *textstring)
-{
- xmlChar *res;
- int32 txsize;
-
- txsize = VARSIZE(textstring) - VARHDRSZ;
- res = (xmlChar *) palloc(txsize + 1);
- memcpy((char *) res, VARDATA(textstring), txsize);
- res[txsize] = '\0';
- return res;
-}
-
-/* Public visible XPath functions */
-
-/* This is a "raw" xpath function. Check that it returns child elements
- * properly
- */
-
-PG_FUNCTION_INFO_V1(xpath_nodeset);
-
-Datum
-xpath_nodeset(PG_FUNCTION_ARGS)
-{
- xmlChar *xpath, *toptag, *septag;
- int32 pathsize;
- text
- *xpathsupp,
- *xpres;
-
- /* PG_GETARG_TEXT_P(0) is document buffer */
- xpathsupp = PG_GETARG_TEXT_P(1); /* XPath expression */
-
- toptag = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(2));
- septag = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(3));
-
- pathsize = VARSIZE(xpathsupp) - VARHDRSZ;
-
- xpath = pgxml_texttoxmlchar(xpathsupp);
-
- xpres = pgxml_result_to_text(
- pgxml_xpath(PG_GETARG_TEXT_P(0),xpath),
- toptag,septag,NULL);
-
- /* xmlCleanupParser(); done by result_to_text routine */
- pfree((void *) xpath);
-
- if (xpres == NULL)
- {
- PG_RETURN_NULL();
- }
- PG_RETURN_TEXT_P(xpres);
-}
-
-// The following function is almost identical, but returns the elements in
-// a list.
-
-PG_FUNCTION_INFO_V1(xpath_list);
-
-Datum
-xpath_list(PG_FUNCTION_ARGS)
-{
- xmlChar *xpath, *plainsep;
- int32 pathsize;
- text
- *xpathsupp,
- *xpres;
-
- /* PG_GETARG_TEXT_P(0) is document buffer */
- xpathsupp = PG_GETARG_TEXT_P(1); /* XPath expression */
-
- plainsep = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(2));
-
- pathsize = VARSIZE(xpathsupp) - VARHDRSZ;
-
- xpath = pgxml_texttoxmlchar(xpathsupp);
-
- xpres = pgxml_result_to_text(
- pgxml_xpath(PG_GETARG_TEXT_P(0),xpath),
- NULL,NULL,plainsep);
-
- /* xmlCleanupParser(); done by result_to_text routine */
- pfree((void *) xpath);
-
- if (xpres == NULL)
- {
- PG_RETURN_NULL();
- }
- PG_RETURN_TEXT_P(xpres);
-}
-
-
-PG_FUNCTION_INFO_V1(xpath_string);
-
-Datum
-xpath_string(PG_FUNCTION_ARGS)
-{
- xmlChar *xpath;
- int32 pathsize;
- text
- *xpathsupp,
- *xpres;
-
- /* PG_GETARG_TEXT_P(0) is document buffer */
- xpathsupp = PG_GETARG_TEXT_P(1); /* XPath expression */
-
- pathsize = VARSIZE(xpathsupp) - VARHDRSZ;
-
- /* We encapsulate the supplied path with "string()"
- * = 8 chars + 1 for NUL at end */
- /* We could try casting to string using the libxml function? */
-
- xpath =(xmlChar *) palloc(pathsize + 9);
- memcpy((char *) (xpath+7), VARDATA(xpathsupp), pathsize);
- strncpy((char *) xpath, "string(",7);
- xpath[pathsize+7] = ')';
- xpath[pathsize+8] = '\0';
-
- xpres = pgxml_result_to_text(
- pgxml_xpath(PG_GETARG_TEXT_P(0),xpath),
- NULL,NULL,NULL);
-
- xmlCleanupParser();
- pfree((void *) xpath);
-
- if (xpres == NULL)
- {
- PG_RETURN_NULL();
- }
- PG_RETURN_TEXT_P(xpres);
-}
-
-
-PG_FUNCTION_INFO_V1(xpath_number);
-
-Datum
-xpath_number(PG_FUNCTION_ARGS)
-{
- xmlChar *xpath;
- int32 pathsize;
- text
- *xpathsupp;
-
- float4 fRes;
-
- xmlXPathObjectPtr res;
-
- /* PG_GETARG_TEXT_P(0) is document buffer */
- xpathsupp = PG_GETARG_TEXT_P(1); /* XPath expression */
-
- pathsize = VARSIZE(xpathsupp) - VARHDRSZ;
-
- xpath = pgxml_texttoxmlchar(xpathsupp);
-
- res = pgxml_xpath(PG_GETARG_TEXT_P(0),xpath);
- pfree((void *) xpath);
-
- if (res == NULL)
- {
- xmlCleanupParser();
- PG_RETURN_NULL();
- }
-
- fRes = xmlXPathCastToNumber(res);
- xmlCleanupParser();
- if (xmlXPathIsNaN(fRes))
- {
- PG_RETURN_NULL();
- }
-
- PG_RETURN_FLOAT4(fRes);
-
-}
-
-
-PG_FUNCTION_INFO_V1(xpath_bool);
-
-Datum
-xpath_bool(PG_FUNCTION_ARGS)
-{
- xmlChar *xpath;
- int32 pathsize;
- text
- *xpathsupp;
-
- int bRes;
-
- xmlXPathObjectPtr res;
-
- /* PG_GETARG_TEXT_P(0) is document buffer */
- xpathsupp = PG_GETARG_TEXT_P(1); /* XPath expression */
-
- pathsize = VARSIZE(xpathsupp) - VARHDRSZ;
-
- xpath = pgxml_texttoxmlchar(xpathsupp);
-
- res = pgxml_xpath(PG_GETARG_TEXT_P(0),xpath);
- pfree((void *) xpath);
-
- if (res == NULL)
- {
- xmlCleanupParser();
- PG_RETURN_BOOL(false);
- }
-
- bRes = xmlXPathCastToBoolean(res);
- xmlCleanupParser();
- PG_RETURN_BOOL(bRes);
-
-}
-
-
-
-/* Core function to evaluate XPath query */
-
-xmlXPathObjectPtr
- pgxml_xpath(text *document, xmlChar *xpath)
- {
-
- xmlDocPtr doctree;
- xmlXPathContextPtr ctxt;
- xmlXPathObjectPtr res;
-
- xmlXPathCompExprPtr comppath;
-
- int32 docsize;
-
-
- docsize = VARSIZE(document) - VARHDRSZ;
-
- pgxml_parser_init();
-
- doctree = xmlParseMemory((char *) VARDATA(document), docsize);
- if (doctree == NULL)
- { /* not well-formed */
- return NULL;
- }
-
- ctxt = xmlXPathNewContext(doctree);
- ctxt->node = xmlDocGetRootElement(doctree);
-
-
- /* compile the path */
- comppath = xmlXPathCompile(xpath);
- if (comppath == NULL)
- {
- xmlCleanupParser();
- xmlFreeDoc(doctree);
- elog_error(ERROR,"XPath Syntax Error",1);
-
- return NULL;
- }
-
- /* Now evaluate the path expression. */
- res = xmlXPathCompiledEval(comppath, ctxt);
- xmlXPathFreeCompExpr(comppath);
-
- if (res == NULL)
- {
- xmlXPathFreeContext(ctxt);
- // xmlCleanupParser();
- xmlFreeDoc(doctree);
-
- return NULL;
- }
- /* xmlFreeDoc(doctree); */
- return res;
- }
-
-text
-*pgxml_result_to_text(xmlXPathObjectPtr res,
- xmlChar *toptag,
- xmlChar *septag,
- xmlChar *plainsep)
-{
- xmlChar *xpresstr;
- int32 ressize;
- text *xpres;
-
- if (res == NULL)
- {
- return NULL;
- }
- switch (res->type)
- {
- case XPATH_NODESET:
- xpresstr = pgxmlNodeSetToText(res->nodesetval,
- toptag,
- septag, plainsep);
- break;
-
- case XPATH_STRING:
- xpresstr = xmlStrdup(res->stringval);
- break;
-
- default:
- elog(NOTICE, "Unsupported XQuery result: %d", res->type);
- xpresstr = xmlStrdup("<unsupported/>");
- }
-
-
- /* Now convert this result back to text */
- ressize = strlen(xpresstr);
- xpres = (text *) palloc(ressize + VARHDRSZ);
- memcpy(VARDATA(xpres), xpresstr, ressize);
- VARATT_SIZEP(xpres) = ressize + VARHDRSZ;
-
- /* Free various storage */
- xmlCleanupParser();
- /* xmlFreeDoc(doctree); -- will die at end of tuple anyway */
-
- xmlFree(xpresstr);
-
- elog_error(ERROR,"XPath error",0);
-
-
- return xpres;
-}
-
-/* xpath_table is a table function. It needs some tidying (as do the
- * other functions here!
- */
-
-PG_FUNCTION_INFO_V1(xpath_table);
-
-Datum xpath_table(PG_FUNCTION_ARGS)
-{
-/* SPI (input tuple) support */
- SPITupleTable *tuptable;
- HeapTuple spi_tuple;
- TupleDesc spi_tupdesc;
-
-/* Output tuple (tuplestore) support */
- Tuplestorestate *tupstore = NULL;
- TupleDesc ret_tupdesc;
- HeapTuple ret_tuple;
-
- ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
- AttInMetadata *attinmeta;
- MemoryContext per_query_ctx;
- MemoryContext oldcontext;
-
-/* Function parameters */
- char *pkeyfield = GET_STR(PG_GETARG_TEXT_P(0));
- char *xmlfield = GET_STR(PG_GETARG_TEXT_P(1));
- char *relname = GET_STR(PG_GETARG_TEXT_P(2));
- char *xpathset = GET_STR(PG_GETARG_TEXT_P(3));
- char *condition = GET_STR(PG_GETARG_TEXT_P(4));
-
- char **values;
- xmlChar **xpaths;
- xmlChar *pos;
- xmlChar *pathsep= "|";
-
- int numpaths;
- int ret;
- int proc;
- int i;
- int j;
- int rownr; /* For issuing multiple rows from one original document */
- int had_values; /* To determine end of nodeset results */
-
- StringInfo querysql;
-
-/* We only have a valid tuple description in table function mode */
- if (rsinfo->expectedDesc == NULL) {
- ereport(ERROR,(errcode(ERRCODE_SYNTAX_ERROR),
- errmsg("xpath_table must be called as a table function")));
- }
-
-/* The tuplestore must exist in a higher context than
- * this function call (per_query_ctx is used) */
-
- per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
- oldcontext = MemoryContextSwitchTo(per_query_ctx);
-
-/* Create the tuplestore - SortMem is the max in-memory size before it is
- * shipped to a disk heap file. Just like ... SortMem!
- */
-
- tupstore = tuplestore_begin_heap(true, false, SortMem);
-
- MemoryContextSwitchTo(oldcontext);
-
- /* get the requested return tuple description */
- ret_tupdesc = CreateTupleDescCopy(rsinfo->expectedDesc);
-
- /* At the moment we assume that the returned attributes make sense
- * for the XPath specififed (i.e. we trust the caller).
- * It's not fatal if they get it wrong - the input function for the
- * column type will raise an error if the path result can't be converted
- * into the correct binary representation.
- */
-
- attinmeta = TupleDescGetAttInMetadata(ret_tupdesc);
-
- /* We want to materialise because it means that we don't have to
- * carry libxml2 parser state between invocations of this function
- */
-
- /* check to see if caller supports us returning a tuplestore */
- if (!rsinfo || !(rsinfo->allowedModes & SFRM_Materialize))
- ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR),
- errmsg("xpath_table requires Materialize mode, but it is not "
- "allowed in this context")));
-
- // Set return mode and allocate value space.
- rsinfo->returnMode = SFRM_Materialize;
- rsinfo->setDesc = ret_tupdesc;
-
- values = (char **) palloc(ret_tupdesc->natts * sizeof(char *));
-
- xpaths = (xmlChar **) palloc(ret_tupdesc->natts * sizeof(xmlChar *));
-
- /* Split XPaths. xpathset is a writable CString. */
-
- /* Note that we stop splitting once we've done all needed for tupdesc */
-
- numpaths=0;
- pos = xpathset;
- do {
- xpaths[numpaths] = pos;
- pos = strstr(pos,pathsep);
- if (pos != NULL) {
- *pos = '\0';
- pos++;
- }
- numpaths++;
- } while ((pos != NULL) && (numpaths < (ret_tupdesc->natts - 1) ));
-
- /* Now build query */
-
- querysql = makeStringInfo();
-
- /* Build initial sql statement */
- appendStringInfo(querysql, "SELECT %s, %s FROM %s WHERE %s",
- pkeyfield,
- xmlfield,
- relname,
- condition
- );
-
-
- if ((ret = SPI_connect()) < 0) {
- elog(ERROR, "xpath_table: SPI_connect returned %d", ret);
- }
-
- if ((ret = SPI_exec(querysql->data,0)) != SPI_OK_SELECT) {
- elog(ERROR,"xpath_table: SPI execution failed for query %s",querysql->data);
- }
-
- proc= SPI_processed;
- /* elog(DEBUG1,"xpath_table: SPI returned %d rows",proc); */
- tuptable = SPI_tuptable;
- spi_tupdesc = tuptable->tupdesc;
-
-/* Switch out of SPI context */
- MemoryContextSwitchTo(oldcontext);
-
-
-/* Check that SPI returned correct result. If you put a comma into one of
- * the function parameters, this will catch it when the SPI query returns
- * e.g. 3 columns.
- */
-
- if (spi_tupdesc->natts != 2) {
- ereport(ERROR,(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("Expression returning multiple columns is not valid in parameter list"),
- errdetail("Expected two columns in SPI result, got %d",spi_tupdesc->natts)));
- }
-
-/* Setup the parser. Beware that this must happen in the same context as the
- * cleanup - which means that any error from here on must do cleanup to
- * ensure that the entity table doesn't get freed by being out of context.
- */
- pgxml_parser_init();
-
- /* For each row i.e. document returned from SPI */
- for (i=0; i < proc; i++) {
- char *pkey;
- char *xmldoc;
-
- xmlDocPtr doctree;
- xmlXPathContextPtr ctxt;
- xmlXPathObjectPtr res;
- xmlChar *resstr;
-
-
- xmlXPathCompExprPtr comppath;
-
- /* Extract the row data as C Strings */
-
- spi_tuple = tuptable->vals[i];
- pkey = SPI_getvalue(spi_tuple, spi_tupdesc,1);
- xmldoc = SPI_getvalue(spi_tuple, spi_tupdesc,2);
-
-
- /* Clear the values array, so that not-well-formed documents
- * return NULL in all columns.
- */
-
- /* Note that this also means that spare columns will be NULL. */
- for (j=0; j < ret_tupdesc->natts; j++) {
- values[j]= NULL;
- }
-
- /* Insert primary key */
- values[0]=pkey;
-
- /* Parse the document */
- doctree = xmlParseMemory(xmldoc, strlen(xmldoc));
-
- if (doctree == NULL)
- { /* not well-formed, so output all-NULL tuple */
-
- ret_tuple = BuildTupleFromCStrings(attinmeta, values);
- oldcontext = MemoryContextSwitchTo(per_query_ctx);
- tuplestore_puttuple(tupstore, ret_tuple);
- MemoryContextSwitchTo(oldcontext);
- heap_freetuple(ret_tuple);
- }
- else
- {
- /* New loop here - we have to deal with nodeset results */
- rownr=0;
-
- do {
- /* Now evaluate the set of xpaths. */
- had_values=0;
- for (j=0; j < numpaths; j++) {
-
- ctxt = xmlXPathNewContext(doctree);
- ctxt->node = xmlDocGetRootElement(doctree);
- xmlSetGenericErrorFunc(ctxt, pgxml_errorHandler);
-
- /* compile the path */
- comppath = xmlXPathCompile(xpaths[j]);
- if (comppath == NULL)
- {
- xmlCleanupParser();
- xmlFreeDoc(doctree);
-
- elog_error(ERROR,"XPath Syntax Error",1);
-
- PG_RETURN_NULL(); /* Keep compiler happy */
- }
-
- /* Now evaluate the path expression. */
- res = xmlXPathCompiledEval(comppath, ctxt);
- xmlXPathFreeCompExpr(comppath);
-
- if (res != NULL)
- {
- switch (res->type)
- {
- case XPATH_NODESET:
- /* We see if this nodeset has enough nodes */
- if ((res->nodesetval != NULL) && (rownr < res->nodesetval->nodeNr)) {
- resstr =
- xmlXPathCastNodeToString(res->nodesetval->nodeTab[rownr]);
- had_values=1;
- } else {
- resstr = NULL;
- }
-
- break;
-
- case XPATH_STRING:
- resstr = xmlStrdup(res->stringval);
- break;
-
- default:
- elog(NOTICE, "Unsupported XQuery result: %d", res->type);
- resstr = xmlStrdup("<unsupported/>");
- }
-
-
- // Insert this into the appropriate column in the result tuple.
- values[j+1] = resstr;
- }
- xmlXPathFreeContext(ctxt);
- }
- // Now add the tuple to the output, if there is one.
- if (had_values) {
- ret_tuple = BuildTupleFromCStrings(attinmeta, values);
- oldcontext = MemoryContextSwitchTo(per_query_ctx);
- tuplestore_puttuple(tupstore, ret_tuple);
- MemoryContextSwitchTo(oldcontext);
- heap_freetuple(ret_tuple);
- }
-
- rownr++;
-
- } while (had_values);
-
- }
-
- xmlFreeDoc(doctree);
-
- pfree(pkey);
- pfree(xmldoc);
- }
-
- xmlCleanupParser();
-/* Needed to flag completeness in 7.3.1. 7.4 defines it as a no-op. */
- tuplestore_donestoring(tupstore);
-
- SPI_finish();
-
- rsinfo->setResult=tupstore;
-
- /*
- * SFRM_Materialize mode expects us to return a NULL Datum. The actual
- * tuples are in our tuplestore and passed back through
- * rsinfo->setResult. rsinfo->setDesc is set to the tuple description
- * that we actually used to build our tuples with, so the caller can
- * verify we did what it was expecting.
- */
- return (Datum) 0;
-
-}
diff --git a/contrib/xml/xslt_proc.c b/contrib/xml/xslt_proc.c
deleted file mode 100644
index 64f9736622..0000000000
--- a/contrib/xml/xslt_proc.c
+++ /dev/null
@@ -1,184 +0,0 @@
-/* XSLT processing functions (requiring libxslt) */
-/* John Gray, for Torchbox 2003-04-01 */
-
-#include "postgres.h"
-#include "fmgr.h"
-#include "executor/spi.h"
-#include "funcapi.h"
-#include "miscadmin.h"
-
-/* libxml includes */
-
-#include <libxml/xpath.h>
-#include <libxml/tree.h>
-#include <libxml/xmlmemory.h>
-
-/* libxslt includes */
-
-#include <libxslt/xslt.h>
-#include <libxslt/xsltInternals.h>
-#include <libxslt/transform.h>
-#include <libxslt/xsltutils.h>
-
-
-/* declarations to come from xpath.c */
-
-extern void elog_error(int level, char *explain, int force);
-extern void pgxml_parser_init();
-extern xmlChar *pgxml_texttoxmlchar(text *textstring);
-
-#define GET_STR(textp) DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp)))
-
-/* local defs */
-static void parse_params(const char **params, text *paramstr);
-
-Datum xslt_process(PG_FUNCTION_ARGS);
-
-
-#define MAXPARAMS 20
-
-PG_FUNCTION_INFO_V1(xslt_process);
-
-Datum xslt_process(PG_FUNCTION_ARGS) {
-
-
- const char *params[MAXPARAMS + 1]; /* +1 for the terminator */
- xsltStylesheetPtr stylesheet = NULL;
- xmlDocPtr doctree;
- xmlDocPtr restree;
- xmlDocPtr ssdoc = NULL;
- xmlChar *resstr;
- int resstat;
- int reslen;
-
- text *doct = PG_GETARG_TEXT_P(0);
- text *ssheet = PG_GETARG_TEXT_P(1);
- text *paramstr;
- text *tres;
-
-
- if (fcinfo->nargs == 3)
- {
- paramstr = PG_GETARG_TEXT_P(2);
- parse_params(params,paramstr);
- }
- else /* No parameters */
- {
- params[0] = NULL;
- }
-
- /* Setup parser */
- pgxml_parser_init();
-
- /* Check to see if document is a file or a literal */
-
- if (VARDATA(doct)[0] == '<')
- {
- doctree = xmlParseMemory((char *) VARDATA(doct), VARSIZE(doct)-VARHDRSZ);
- }
- else
- {
- doctree = xmlParseFile(GET_STR(doct));
- }
-
- if (doctree == NULL)
- {
- xmlCleanupParser();
- elog_error(ERROR,"Error parsing XML document",0);
-
- PG_RETURN_NULL();
- }
-
- /* Same for stylesheet */
- if (VARDATA(ssheet)[0] == '<')
- {
- ssdoc = xmlParseMemory((char *) VARDATA(ssheet),
- VARSIZE(ssheet)-VARHDRSZ);
- if (ssdoc == NULL)
- {
- xmlFreeDoc(doctree);
- xmlCleanupParser();
- elog_error(ERROR,"Error parsing stylesheet as XML document",0);
- PG_RETURN_NULL();
- }
-
- stylesheet = xsltParseStylesheetDoc(ssdoc);
- }
- else
- {
- stylesheet = xsltParseStylesheetFile(GET_STR(ssheet));
- }
-
-
- if (stylesheet == NULL)
- {
- xmlFreeDoc(doctree);
- xsltCleanupGlobals();
- xmlCleanupParser();
- elog_error(ERROR,"Failed to parse stylesheet",0);
- PG_RETURN_NULL();
- }
-
- restree = xsltApplyStylesheet(stylesheet, doctree, params);
- resstat = xsltSaveResultToString(&resstr, &reslen, restree, stylesheet);
-
- xsltFreeStylesheet(stylesheet);
- xmlFreeDoc(restree);
- xmlFreeDoc(doctree);
-
- xsltCleanupGlobals();
- xmlCleanupParser();
-
- if (resstat < 0) {
- PG_RETURN_NULL();
- }
-
- tres = palloc(reslen + VARHDRSZ);
- memcpy(VARDATA(tres),resstr,reslen);
- VARATT_SIZEP(tres) = reslen + VARHDRSZ;
-
- PG_RETURN_TEXT_P(tres);
-}
-
-
-void parse_params(const char **params, text *paramstr)
-{
- char *pos;
- char *pstr;
-
- int i;
- char *nvsep="=";
- char *itsep=",";
-
- pstr = GET_STR(paramstr);
-
- pos=pstr;
-
- for (i=0; i < MAXPARAMS; i++)
- {
- params[i] = pos;
- pos = strstr(pos,nvsep);
- if (pos != NULL) {
- *pos = '\0';
- pos++;
- } else {
- params[i]=NULL;
- break;
- }
- /* Value */
- i++;
- params[i]=pos;
- pos = strstr(pos,itsep);
- if (pos != NULL) {
- *pos = '\0';
- pos++;
- } else {
- break;
- }
-
- }
- if (i < MAXPARAMS)
- {
- params[i+1]=NULL;
- }
-}