Move new version of contrib/ xml into xml2, keep old version in /xml.

author: Bruce Momjian 2004-03-05 03:57:58 +0000
committer: Bruce Momjian 2004-03-05 03:57:58 +0000
commit: 31f4b59a464808ab0fec0ffb2eaa723321ea1af7 (patch)
tree: 004f71d1eb77899fa9e16ac8047189dcde6576e5 /contrib/xml
parent: adca025c9ec4b3050411eb74a5b4f9c20a4ce2b5 (diff)
10 files changed, 747 insertions, 1331 deletions
diff --git a/contrib/xml/Makefile b/contrib/xml/Makefile
deleted file mode 100644
index 9177ca865c..0000000000
--- a/contrib/xml/Makefile
+++ /dev/null
@@ -1,18 +0,0 @@
-# This makefile will build the new XML and XSLT routines.
-subdir = contrib/xml
-top_builddir = ../../
-include $(top_builddir)/src/Makefile.global
-
-MODULE_big = pgxml
-
-# Remove xslt_proc.o from the following line if you don't have libxslt
-OBJS = xpath.o xslt_proc.o
-
-# Remove -lxslt from the following line if you don't have libxslt.
-SHLIB_LINK = -lxml2 -lxslt
-
-DATA_built = pgxml.sql
-DOCS = README.pgxml
-
-include $(top_builddir)contrib/contrib-global.mk
-
diff --git a/contrib/xml/README.pgxml b/contrib/xml/README.pgxml
deleted file mode 100644
index f29d071722..0000000000
--- a/contrib/xml/README.pgxml
+++ /dev/null
@@ -1,179 +0,0 @@
-XML-handling functions for PostgreSQL
-=====================================
-
-Development of this module was sponsored by Torchbox Ltd. (www.torchbox.com)
-
-This version of the XML functions provides both XPath querying and
-XSLT functionality. There is also a new table function which allows
-the straightforward return of multiple XML results. Note that the current code
-doesn't take any particular care over character sets - this is
-something that should be fixed at some point!
-
-Installation
-------------
-
-The current build process will only work if the files are in
-contrib/xml in a PostgreSQL 7.3 or 7.4 source tree which has been
-configured and built (If you alter the subdir value in the Makefile
-you can place it in a different directory in a PostgreSQL tree).
-
-Before you begin, just check the Makefile, and then just 'make' and
-'make install'.
-
-This code requires libxml to be previously installed.
-
-Description of functions
-------------------------
-
-The first set of functions are straightforward XML parsing and XPath queries:
-
-pgxml_parse(document) RETURNS bool
-
-This parses the document text in its parameter and returns true if the
-document is well-formed XML.
-
-xpath_string(document,query) RETURNS text
-xpath_number(document,query) RETURNS float4
-xpath_bool(document,query) RETURNS bool
-
-These functions evaluate the XPath query on the supplied document, and
-cast the result to the specified type.
-
-
-xpath_nodeset(document,query,toptag,itemtag) RETURNS text
-
-This evaluates query on document and wraps the result in XML tags. If
-the result is multivalued, the output will look like:
-
-<toptag>
-<itemtag>Value 1 which could be an XML fragment</itemtag>
-<itemtag>Value 2....</itemtag>
-</toptag>
-
-If either toptag or itemtag is an empty string, the relevant tag is omitted.
-There are also wrapper functions for this operation:
-
-xpath_nodeset(document,query) RETURNS text omits both tags.
-xpath_nodeset(document,query,itemtag) RETURNS text omits toptag.
-
-
-xpath_list(document,query,seperator) RETURNS text
-
-This function returns multiple values seperated by the specified
-seperator, e.g. Value 1,Value 2,Value 3 if seperator=','.
-
-xpath_list(document,query) RETURNS text
-
-This is a wrapper for the above function that uses ',' as the seperator.
-
-
-xpath_table
------------
-
-This is a table function which evaluates a set of XPath queries on
-each of a set of documents and returns the results as a table. The
-primary key field from the original document table is returned as the
-first column of the result so that the resultset from xpath_table can
-be readily used in joins.
-
-The function itself takes 5 arguments, all text.
-
-xpath_table(key,document,relation,xpaths,criteria)
-
-key - the name of the "key" field - this is just a field to be used as
-the first column of the output table i.e. it identifies the record from
-which each output row came.
-
-document - the name of the field containing the XML document
-
-relation - the name of the table or view containing the documents
-
-xpaths - multiple xpath expressions separated by |
-
-criteria - The contents of the where clause. This needs to be specified,
-so use "true" or "1=1" here if you want to process all the rows in the
-relation.
-
-NB These parameters (except the XPath strings) are just substituted
-into a plain SQL SELECT statement, so you have some flexibility - the
-statement is
-
-SELECT <key>,<document> FROM <relation> WHERE <criteria>
-
-so those parameters can be *anything* valid in those particular
-locations. The result from this SELECT needs to return exactly two
-columns (which it will unless you try to list multiple fields for key
-or document). Beware that this simplistic approach requires that you
-validate any user-supplied values to avoid SQL injection attacks.
-
-Using the function
-
-The function has to be used in a FROM expression. This gives the following
-form:
-
-SELECT * FROM
-xpath_table('article_id', 
-	'article_xml',
-	'articles', 
-	'/article/author|/article/pages|/article/title',
-	'date_entered > ''2003-01-01'' ') 
-AS t(article_id integer, author text, page_count integer, title text);
-
-The AS clause defines the names and types of the columns in the
-virtual table. If there are more XPath queries than result columns,
-the extra queries will be ignored. If there are more result columns
-than XPath queries, the extra columns will be NULL.
-
-Note that I've said in this example that pages is an integer.  The
-function deals internally with string representations, so when you say
-you want an integer in the output, it will take the string
-representation of the XPath result and use PostgreSQL input functions
-to transform it into an integer (or whatever type the AS clause
-requests). An error will result if it can't do this - for example if
-the result is empty - so you may wish to just stick to 'text' as the
-column type if you think your data has any problems.
-
-The select statement doesn't need to use * alone - it can reference the
-columns by name or join them to other tables. The function produces a
-virtual table with which you can perform any operation you wish (e.g.
-aggregation, joining, sorting etc). So we could also have:
-
-SELECT t.title, p.fullname, p.email 
-FROM xpath_table('article_id','article_xml','articles',
-            '/article/title|/article/author/@id',
-            'xpath_string(article_xml,''/article/@date'') > ''2003-03-20'' ')
-            AS t(article_id integer, title text, author_id integer), 
-     tblPeopleInfo AS p 
-WHERE t.author_id = p.person_id;
-
-as a more complicated example. Of course, you could wrap all
-of this in a view for convenience.
-
-XSLT functions
---------------
-
-The following functions are available if libxslt is installed (this is
-not currently detected automatically, so you will have to amend the
-Makefile)
-
-xslt_process(document,stylesheet,paramlist) RETURNS text
-
-This function appplies the XSL stylesheet to the document and returns
-the transformed result. The paramlist is a list of parameter
-assignments to be used in the transformation, specified in the form
-'a=1,b=2'. Note that this is also proof-of-concept code and the
-parameter parsing is very simple-minded (e.g. parameter values cannot
-contain commas!)
-
-Also note that if either the document or stylesheet values do not
-begin with a < then they will be treated as URLs and libxslt will
-fetch them. It thus follows that you can use xslt_process as a means
-to fetch the contents of URLs - you should be aware of the security
-implications of this.
-
-There is also a two-parameter version of xslt_process which does not
-pass any parameters to the transformation.
-
-If you have any comments or suggestions, please do contact me at
-jgray@azuli.co.uk. Unfortunately, this isn't my main job, so I can't
-guarantee a rapid response to your query!
diff --git a/contrib/xml/TODO b/contrib/xml/TODO
new file mode 100644
index 0000000000..5ddd62a658
--- /dev/null
+++ b/contrib/xml/TODO
@@ -0,0 +1,78 @@
+PGXML TODO List
+===============
+
+Some of these items still require much more thought! Since the first
+release, the XPath support has improved (because I'm no longer using a
+homemade algorithm!).
+
+1. Performance considerations
+
+At present each document is parsed to produce the DOM tree on every query.
+
+Pros: 
+	Easy
+	No persistent memory or storage allocation for parsed trees
+		(libxml docs suggest representation of a document might
+		 be 4 times the size of the text)
+
+Cons:
+	Slow/ CPU intensive to parse.
+	Makes it difficult for PLs to apply libxml manipulations to create
+		new documents or amend existing ones.
+
+
+2. XQuery 
+
+I'm not sure if the addition of XQuery would be best as a function or
+as a new front-end parser. This is one to think about, but with a
+decent implementation of XPath, one of the prerequisites is covered.
+
+3. DOM Interfaces
+
+Expose more aspects of the DOM to user functions/ PLs. This would
+allow a procedure in a PL to run some queries and then use exposed
+interfaces to libxml to create an XML document out of the query
+results. I accept the argument that this might be more properly
+performed on the client side.
+
+4. Returning sets of documents from XPath queries.
+
+Although the current implementation allows you to amalgamate the
+returned results into a single document, it's quite possible that
+you'd like to use the returned set of nodes as a source for FROM.
+ 
+Is there a good way to optimise/index the results of certain XPath
+operations to make them faster?:
+
+select docid, pgxml_xpath(document,'//site/location/text()','','') as location 
+where pgxml_xpath(document,'//site/name/text()','','') = 'Church Farm';
+
+and with multiple element occurences in a document?
+
+select d.docid, pgxml_xpath(d.document,'//site/location/text()','','') 
+from docstore d, 
+pgxml_xpaths('docstore','document','//feature/type/text()','docid') ft 
+where ft.key = d.docid and ft.value ='Limekiln';
+
+pgxml_xpaths params are relname, attrname, xpath, returnkey. It would
+return a set of two-element tuples (key,value) consisting of the value of
+returnkey, and the cdata value of the xpath. The XML document would be
+defined by relname and attrname.
+
+The pgxml_xpaths function could be the basis of a functional index,
+which could speed up the above query very substantially, working
+through the normal query planner mechanism.
+
+5. Return type support.
+
+Better support for returning e.g. numeric or boolean values. I need to
+get to grips with the returned data from libxml first.
+
+ 
+John Gray <jgray@azuli.co.uk> 16 August 2001
+
+
+
+
+
+
diff --git a/contrib/xml/pgxml.c b/contrib/xml/pgxml.c
new file mode 100644
index 0000000000..4d8c3b96bc
--- /dev/null
+++ b/contrib/xml/pgxml.c
@@ -0,0 +1,352 @@
+/********************************************************
+ * Interface code to parse an XML document using expat
+ ********************************************************/
+
+#include "postgres.h"
+#include "fmgr.h"
+
+#include "expat.h"
+#include "pgxml.h"
+
+/* Memory management - we make expat use standard pg MM */
+
+XML_Memory_Handling_Suite mhs;
+
+/* passthrough functions (palloc is a macro) */
+
+static void *
+pgxml_palloc(size_t size)
+{
+	return palloc(size);
+}
+
+static void *
+pgxml_repalloc(void *ptr, size_t size)
+{
+	return repalloc(ptr, size);
+}
+
+static void
+pgxml_pfree(void *ptr)
+{
+	return pfree(ptr);
+}
+
+static void
+pgxml_mhs_init()
+{
+	mhs.malloc_fcn = pgxml_palloc;
+	mhs.realloc_fcn = pgxml_repalloc;
+	mhs.free_fcn = pgxml_pfree;
+}
+
+static void
+pgxml_handler_init()
+{
+	/*
+	 * This code should set up the relevant handlers from  user-supplied
+	 * settings. Quite how these settings are made is another matter :)
+	 */
+}
+
+/* Returns true if document is well-formed */
+
+PG_FUNCTION_INFO_V1(pgxml_parse);
+
+Datum
+pgxml_parse(PG_FUNCTION_ARGS)
+{
+	/* called as pgxml_parse(document) */
+	XML_Parser	p;
+	text	   *t = PG_GETARG_TEXT_P(0);		/* document buffer */
+	int32		docsize = VARSIZE(t) - VARHDRSZ;
+
+	pgxml_mhs_init();
+
+	pgxml_handler_init();
+
+	p = XML_ParserCreate_MM(NULL, &mhs, NULL);
+	if (!p)
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
+				 errmsg("could not create expat parser")));
+		PG_RETURN_NULL();		/* seems appropriate if we couldn't parse */
+	}
+
+	if (!XML_Parse(p, (char *) VARDATA(t), docsize, 1))
+	{
+		/*
+		 * elog(WARNING, "Parse error at line %d:%s",
+		 * XML_GetCurrentLineNumber(p),
+		 * XML_ErrorString(XML_GetErrorCode(p)));
+		 */
+		XML_ParserFree(p);
+		PG_RETURN_BOOL(false);
+	}
+
+	XML_ParserFree(p);
+	PG_RETURN_BOOL(true);
+}
+
+/* XPath handling functions */
+
+/* XPath support here is for a very skeletal kind of XPath!
+   It was easy to program though... */
+
+/* This first is the core function that builds a result set. The
+   actual functions called by the user manipulate that result set
+   in various ways.
+*/
+
+static XPath_Results *
+build_xpath_results(text *doc, text *pathstr)
+{
+	XPath_Results *xpr;
+	char	   *res;
+	pgxml_udata *udata;
+	XML_Parser	p;
+	int32		docsize;
+
+	xpr = (XPath_Results *) palloc((sizeof(XPath_Results)));
+	memset((void *) xpr, 0, sizeof(XPath_Results));
+	xpr->rescount = 0;
+
+	docsize = VARSIZE(doc) - VARHDRSZ;
+
+	/* res isn't going to be the real return type, it is just a buffer */
+
+	res = (char *) palloc(docsize);
+	memset((void *) res, 0, docsize);
+
+	xpr->resbuf = res;
+
+	udata = (pgxml_udata *) palloc((sizeof(pgxml_udata)));
+	memset((void *) udata, 0, sizeof(pgxml_udata));
+
+	udata->currentpath[0] = '\0';
+	udata->textgrab = 0;
+
+	udata->path = (char *) palloc(VARSIZE(pathstr));
+	memcpy(udata->path, VARDATA(pathstr), VARSIZE(pathstr) - VARHDRSZ);
+
+	udata->path[VARSIZE(pathstr) - VARHDRSZ] = '\0';
+
+	udata->resptr = res;
+	udata->reslen = 0;
+
+	udata->xpres = xpr;
+
+	/* Now fire up the parser */
+	pgxml_mhs_init();
+
+	p = XML_ParserCreate_MM(NULL, &mhs, NULL);
+	if (!p)
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
+				 errmsg("could not create expat parser")));
+		pfree(xpr);
+		pfree(udata->path);
+		pfree(udata);
+		pfree(res);
+		return NULL;
+	}
+	XML_SetUserData(p, (void *) udata);
+
+	/* Set the handlers */
+
+	XML_SetElementHandler(p, pgxml_starthandler, pgxml_endhandler);
+	XML_SetCharacterDataHandler(p, pgxml_charhandler);
+
+	if (!XML_Parse(p, (char *) VARDATA(doc), docsize, 1))
+	{
+		/*
+		 * elog(WARNING, "Parse error at line %d:%s",
+		 * XML_GetCurrentLineNumber(p),
+		 * XML_ErrorString(XML_GetErrorCode(p)));
+		 */
+		XML_ParserFree(p);
+		pfree(xpr);
+		pfree(udata->path);
+		pfree(udata);
+
+		return NULL;
+	}
+
+	pfree(udata->path);
+	pfree(udata);
+	XML_ParserFree(p);
+	return xpr;
+}
+
+
+PG_FUNCTION_INFO_V1(pgxml_xpath);
+
+Datum
+pgxml_xpath(PG_FUNCTION_ARGS)
+{
+	/* called as pgxml_xpath(document,pathstr, index) for the moment */
+
+	XPath_Results *xpresults;
+	text	   *restext;
+
+	text	   *t = PG_GETARG_TEXT_P(0);		/* document buffer */
+	text	   *t2 = PG_GETARG_TEXT_P(1);
+	int32		ind = PG_GETARG_INT32(2) - 1;
+
+	xpresults = build_xpath_results(t, t2);
+
+	/*
+	 * This needs to be changed depending on the mechanism for returning
+	 * our set of results.
+	 */
+
+	if (xpresults == NULL)		/* parse error (not WF or parser failure) */
+		PG_RETURN_NULL();
+
+	if (ind >= (xpresults->rescount))
+		PG_RETURN_NULL();
+
+	restext = (text *) palloc(xpresults->reslens[ind] + VARHDRSZ);
+	memcpy(VARDATA(restext), xpresults->results[ind], xpresults->reslens[ind]);
+
+	VARATT_SIZEP(restext) = xpresults->reslens[ind] + VARHDRSZ;
+
+	pfree(xpresults->resbuf);
+	pfree(xpresults);
+
+	PG_RETURN_TEXT_P(restext);
+}
+
+
+static void
+pgxml_pathcompare(void *userData)
+{
+	char	   *matchpos;
+
+	matchpos = strstr(UD->currentpath, UD->path);
+
+	if (matchpos == NULL)
+	{							/* Should we have more logic here ? */
+		if (UD->textgrab)
+		{
+			UD->textgrab = 0;
+			pgxml_finalisegrabbedtext(userData);
+		}
+		return;
+	}
+
+	/*
+	 * OK, we have a match of some sort. Now we need to check that our
+	 * match is anchored to the *end* of the string AND that it is
+	 * immediately preceded by a '/'
+	 */
+
+	/*
+	 * This test wouldn't work if strlen (UD->path) overran the length of
+	 * the currentpath, but that's not possible because we got a match!
+	 */
+
+	if ((matchpos + strlen(UD->path))[0] == '\0')
+	{
+		if ((UD->path)[0] == '/')
+		{
+			if (matchpos == UD->currentpath)
+				UD->textgrab = 1;
+		}
+		else
+		{
+			if ((matchpos - 1)[0] == '/')
+				UD->textgrab = 1;
+		}
+	}
+}
+
+static void
+pgxml_starthandler(void *userData, const XML_Char * name,
+				   const XML_Char ** atts)
+{
+
+	char		sepstr[] = "/";
+
+	if ((strlen(name) + strlen(UD->currentpath)) > MAXPATHLENGTH - 2)
+		elog(WARNING, "path too long");
+	else
+	{
+		strncat(UD->currentpath, sepstr, 1);
+		strcat(UD->currentpath, name);
+	}
+	if (UD->textgrab)
+	{
+		/*
+		 * Depending on user preference, should we "reconstitute" the
+		 * element into the result text?
+		 */
+	}
+	else
+		pgxml_pathcompare(userData);
+}
+
+static void
+pgxml_endhandler(void *userData, const XML_Char * name)
+{
+	/*
+	 * Start by removing the current element off the end of the
+	 * currentpath
+	 */
+
+	char	   *sepptr;
+
+	sepptr = strrchr(UD->currentpath, '/');
+	if (sepptr == NULL)
+	{
+		/* internal error */
+		elog(ERROR, "did not find '/'");
+		sepptr = UD->currentpath;
+	}
+	if (strcmp(name, sepptr + 1) != 0)
+	{
+		elog(WARNING, "wanted [%s], got [%s]", sepptr, name);
+		/* unmatched entry, so do nothing */
+	}
+	else
+	{
+		sepptr[0] = '\0';		/* Chop that element off the end */
+	}
+
+	if (UD->textgrab)
+		pgxml_pathcompare(userData);
+
+}
+
+static void
+pgxml_charhandler(void *userData, const XML_Char * s, int len)
+{
+	if (UD->textgrab)
+	{
+		if (len > 0)
+		{
+			memcpy(UD->resptr, s, len);
+			UD->resptr += len;
+			UD->reslen += len;
+		}
+	}
+}
+
+/* Should I be using PG list types here? */
+
+static void
+pgxml_finalisegrabbedtext(void *userData)
+{
+	/* In res/reslen, we have a single result. */
+	UD->xpres->results[UD->xpres->rescount] = UD->resptr - UD->reslen;
+	UD->xpres->reslens[UD->xpres->rescount] = UD->reslen;
+	UD->reslen = 0;
+	UD->xpres->rescount++;
+
+	/*
+	 * This effectively concatenates all the results together but we do
+	 * know where one ends and the next begins
+	 */
+}
diff --git a/contrib/xml/pgxml.h b/contrib/xml/pgxml.h
new file mode 100644
index 0000000000..2b80124b77
--- /dev/null
+++ b/contrib/xml/pgxml.h
@@ -0,0 +1,42 @@
+/* Header for pg xml parser interface */
+
+static void *pgxml_palloc(size_t size);
+static void *pgxml_repalloc(void *ptr, size_t size);
+static void pgxml_pfree(void *ptr);
+static void pgxml_mhs_init();
+static void pgxml_handler_init();
+Datum		pgxml_parse(PG_FUNCTION_ARGS);
+Datum		pgxml_xpath(PG_FUNCTION_ARGS);
+static void pgxml_starthandler(void *userData, const XML_Char * name,
+				   const XML_Char ** atts);
+static void pgxml_endhandler(void *userData, const XML_Char * name);
+static void pgxml_charhandler(void *userData, const XML_Char * s, int len);
+static void pgxml_pathcompare(void *userData);
+static void pgxml_finalisegrabbedtext(void *userData);
+
+#define MAXPATHLENGTH 512
+#define MAXRESULTS 100
+
+
+typedef struct
+{
+	int			rescount;
+	char	   *results[MAXRESULTS];
+	int32		reslens[MAXRESULTS];
+	char	   *resbuf;			/* pointer to the result buffer for pfree */
+}	XPath_Results;
+
+
+
+typedef struct
+{
+	char		currentpath[MAXPATHLENGTH];
+	char	   *path;
+	int			textgrab;
+	char	   *resptr;
+	int32		reslen;
+	XPath_Results *xpres;
+}	pgxml_udata;
+
+
+#define UD ((pgxml_udata *) userData)
diff --git a/contrib/xml/pgxml.sql.in b/contrib/xml/pgxml.sql.in
deleted file mode 100644
index ff46e845b1..0000000000
--- a/contrib/xml/pgxml.sql.in
+++ /dev/null
@@ -1,57 +0,0 @@
---SQL for XML parser
-
-CREATE OR REPLACE FUNCTION pgxml_parse(text) RETURNS bool
-	AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict);
-
-CREATE OR REPLACE FUNCTION xpath_string(text,text) RETURNS text
-	AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict);
-
-CREATE OR REPLACE FUNCTION xpath_nodeset(text,text,text,text) RETURNS text
-	AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict);
-
-CREATE OR REPLACE FUNCTION xpath_number(text,text) RETURNS float4
-	AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict);
-
-CREATE OR REPLACE FUNCTION xpath_bool(text,text) RETURNS boolean
-	AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict);
-
--- List function
-
-CREATE OR REPLACE FUNCTION xpath_list(text,text,text) RETURNS text
-	AS 'MODULE_PATHNAME'
-	LANGUAGE 'c' WITH (isStrict);
-
-
-CREATE OR REPLACE FUNCTION xpath_list(text,text) RETURNS text 
-AS 'SELECT xpath_list($1,$2,'','')' language 'SQL' WITH (isStrict);
-
-
-
--- Wrapper functions for nodeset where no tags needed.
-
-
-CREATE OR REPLACE FUNCTION xpath_nodeset(text,text) RETURNS text AS
-'SELECT xpath_nodeset($1,$2,'''','''')' language 'SQL' WITH (isStrict);
-
-
-CREATE OR REPLACE FUNCTION xpath_nodeset(text,text,text) RETURNS text AS
-'SELECT xpath_nodeset($1,$2,'''',$3)' language 'SQL' WITH (isStrict);
-
--- Table function
-
-CREATE OR REPLACE FUNCTION xpath_table(text,text,text,text,text) RETURNS setof record
-	AS 'MODULE_PATHNAME'
-	LANGUAGE 'c' WITH (isStrict);
-
--- XSLT functions
--- Delete from here to the end of the file if you are not compiling with
--- XSLT support.
-
-
-CREATE OR REPLACE FUNCTION xslt_process(text,text,text) RETURNS text 
-	AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict);
-
--- the function checks for the correct argument count
-
-CREATE OR REPLACE FUNCTION xslt_process(text,text) RETURNS text 
-	AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict);
diff --git a/contrib/xml/pgxml_dom.c b/contrib/xml/pgxml_dom.c
new file mode 100644
index 0000000000..2b11b1d646
--- /dev/null
+++ b/contrib/xml/pgxml_dom.c
@@ -0,0 +1,265 @@
+/* Parser interface for DOM-based parser (libxml) rather than
+   stream-based SAX-type parser */
+
+#include "postgres.h"
+#include "fmgr.h"
+
+/* libxml includes */
+
+#include <libxml/xpath.h>
+#include <libxml/tree.h>
+#include <libxml/xmlmemory.h>
+
+/* declarations */
+
+static void *pgxml_palloc(size_t size);
+static void *pgxml_repalloc(void *ptr, size_t size);
+static void pgxml_pfree(void *ptr);
+static char *pgxml_pstrdup(const char *string);
+
+static void pgxml_parser_init();
+
+static xmlChar *pgxmlNodeSetToText(xmlNodeSetPtr nodeset, xmlDocPtr doc,
+				   xmlChar * toptagname, xmlChar * septagname,
+				   int format);
+
+static xmlChar *pgxml_texttoxmlchar(text *textstring);
+
+
+Datum		pgxml_parse(PG_FUNCTION_ARGS);
+Datum		pgxml_xpath(PG_FUNCTION_ARGS);
+
+/* memory handling passthrough functions (e.g. palloc, pstrdup are
+   currently macros, and the others might become so...) */
+
+static void *
+pgxml_palloc(size_t size)
+{
+	return palloc(size);
+}
+
+static void *
+pgxml_repalloc(void *ptr, size_t size)
+{
+	return repalloc(ptr, size);
+}
+
+static void
+pgxml_pfree(void *ptr)
+{
+	return pfree(ptr);
+}
+
+static char *
+pgxml_pstrdup(const char *string)
+{
+	return pstrdup(string);
+}
+
+static void
+pgxml_parser_init()
+{
+	/*
+	 * This code should also set parser settings from  user-supplied info.
+	 * Quite how these settings are made is another matter :)
+	 */
+
+	xmlMemSetup(pgxml_pfree, pgxml_palloc, pgxml_repalloc, pgxml_pstrdup);
+	xmlInitParser();
+
+}
+
+
+/* Returns true if document is well-formed */
+
+PG_FUNCTION_INFO_V1(pgxml_parse);
+
+Datum
+pgxml_parse(PG_FUNCTION_ARGS)
+{
+	/* called as pgxml_parse(document) */
+	xmlDocPtr	doctree;
+	text	   *t = PG_GETARG_TEXT_P(0);		/* document buffer */
+	int32		docsize = VARSIZE(t) - VARHDRSZ;
+
+	pgxml_parser_init();
+
+	doctree = xmlParseMemory((char *) VARDATA(t), docsize);
+	if (doctree == NULL)
+	{
+		xmlCleanupParser();
+		PG_RETURN_BOOL(false);	/* i.e. not well-formed */
+	}
+	xmlCleanupParser();
+	xmlFreeDoc(doctree);
+	PG_RETURN_BOOL(true);
+}
+
+static xmlChar
+*
+pgxmlNodeSetToText(xmlNodeSetPtr nodeset,
+				   xmlDocPtr doc,
+				   xmlChar * toptagname,
+				   xmlChar * septagname,
+				   int format)
+{
+	/* Function translates a nodeset into a text representation */
+
+	/*
+	 * iterates over each node in the set and calls xmlNodeDump to write
+	 * it to an xmlBuffer -from which an xmlChar * string is returned.
+	 */
+	/* each representation is surrounded by <tagname> ... </tagname> */
+	/* if format==0, add a newline between nodes?? */
+
+	xmlBufferPtr buf;
+	xmlChar    *result;
+	int			i;
+
+	buf = xmlBufferCreate();
+
+	if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
+	{
+		xmlBufferWriteChar(buf, "<");
+		xmlBufferWriteCHAR(buf, toptagname);
+		xmlBufferWriteChar(buf, ">");
+	}
+	if (nodeset != NULL)
+	{
+		for (i = 0; i < nodeset->nodeNr; i++)
+		{
+			if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
+			{
+				xmlBufferWriteChar(buf, "<");
+				xmlBufferWriteCHAR(buf, septagname);
+				xmlBufferWriteChar(buf, ">");
+			}
+			xmlNodeDump(buf, doc, nodeset->nodeTab[i], 1, (format == 2));
+
+			if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
+			{
+				xmlBufferWriteChar(buf, "</");
+				xmlBufferWriteCHAR(buf, septagname);
+				xmlBufferWriteChar(buf, ">");
+			}
+			if (format)
+				xmlBufferWriteChar(buf, "\n");
+		}
+	}
+
+	if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
+	{
+		xmlBufferWriteChar(buf, "</");
+		xmlBufferWriteCHAR(buf, toptagname);
+		xmlBufferWriteChar(buf, ">");
+	}
+	result = xmlStrdup(buf->content);
+	xmlBufferFree(buf);
+	return result;
+}
+
+static xmlChar *
+pgxml_texttoxmlchar(text *textstring)
+{
+	xmlChar    *res;
+	int32		txsize;
+
+	txsize = VARSIZE(textstring) - VARHDRSZ;
+	res = (xmlChar *) palloc(txsize + 1);
+	memcpy((char *) res, VARDATA(textstring), txsize);
+	res[txsize] = '\0';
+	return res;
+}
+
+
+PG_FUNCTION_INFO_V1(pgxml_xpath);
+
+Datum
+pgxml_xpath(PG_FUNCTION_ARGS)
+{
+	xmlDocPtr	doctree;
+	xmlXPathContextPtr ctxt;
+	xmlXPathObjectPtr res;
+	xmlChar    *xpath,
+			   *xpresstr,
+			   *toptag,
+			   *septag;
+	xmlXPathCompExprPtr comppath;
+
+	int32		docsize,
+				ressize;
+	text	   *t,
+			   *xpres;
+
+	t = PG_GETARG_TEXT_P(0);	/* document buffer */
+	xpath = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(1));	/* XPath expression */
+	toptag = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(2));
+	septag = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(3));
+
+	docsize = VARSIZE(t) - VARHDRSZ;
+
+	pgxml_parser_init();
+
+	doctree = xmlParseMemory((char *) VARDATA(t), docsize);
+	if (doctree == NULL)
+	{							/* not well-formed */
+		xmlCleanupParser();
+		PG_RETURN_NULL();
+	}
+
+	ctxt = xmlXPathNewContext(doctree);
+	ctxt->node = xmlDocGetRootElement(doctree);
+
+	/* compile the path */
+	comppath = xmlXPathCompile(xpath);
+	if (comppath == NULL)
+	{
+		elog(WARNING, "XPath syntax error");
+		xmlFreeDoc(doctree);
+		pfree((void *) xpath);
+		xmlCleanupParser();
+		PG_RETURN_NULL();
+	}
+
+	/* Now evaluate the path expression. */
+	res = xmlXPathCompiledEval(comppath, ctxt);
+	xmlXPathFreeCompExpr(comppath);
+
+	if (res == NULL)
+	{
+		xmlFreeDoc(doctree);
+		pfree((void *) xpath);
+		xmlCleanupParser();
+		PG_RETURN_NULL();		/* seems appropriate */
+	}
+	/* now we dump this node, ?surrounding by tags? */
+	/* To do this, we look first at the type */
+	switch (res->type)
+	{
+		case XPATH_NODESET:
+			xpresstr = pgxmlNodeSetToText(res->nodesetval,
+										  doctree,
+										  toptag, septag, 0);
+			break;
+		case XPATH_STRING:
+			xpresstr = xmlStrdup(res->stringval);
+			break;
+		default:
+			elog(WARNING, "Unsupported XQuery result: %d", res->type);
+			xpresstr = xmlStrdup("<unsupported/>");
+	}
+
+
+	/* Now convert this result back to text */
+	ressize = strlen(xpresstr);
+	xpres = (text *) palloc(ressize + VARHDRSZ);
+	memcpy(VARDATA(xpres), xpresstr, ressize);
+	VARATT_SIZEP(xpres) = ressize + VARHDRSZ;
+
+	/* Free various storage */
+	xmlFreeDoc(doctree);
+	pfree((void *) xpath);
+	xmlFree(xpresstr);
+	xmlCleanupParser();
+	PG_RETURN_TEXT_P(xpres);
+}
diff --git a/contrib/xml/pgxml_dom.sql.in b/contrib/xml/pgxml_dom.sql.in
new file mode 100644
index 0000000000..514643b936
--- /dev/null
+++ b/contrib/xml/pgxml_dom.sql.in
@@ -0,0 +1,10 @@
+-- SQL for XML parser
+
+-- Adjust this setting to control where the objects get created.
+SET search_path TO public;
+
+CREATE OR REPLACE FUNCTION pgxml_parse(text) RETURNS boolean
+    AS 'MODULE_PATHNAME' LANGUAGE c STRICT;
+
+CREATE OR REPLACE FUNCTION pgxml_xpath(text, text, text, text) RETURNS text
+    AS 'MODULE_PATHNAME' LANGUAGE c STRICT;
diff --git a/contrib/xml/xpath.c b/contrib/xml/xpath.c
deleted file mode 100644
index b4fc828798..0000000000
--- a/contrib/xml/xpath.c
+++ /dev/null
@@ -1,893 +0,0 @@
-/* Parser interface for DOM-based parser (libxml) rather than
-   stream-based SAX-type parser */
-
-#include "postgres.h"
-#include "fmgr.h"
-#include "executor/spi.h"
-#include "funcapi.h"
-#include "miscadmin.h"
-#include "lib/stringinfo.h"
-
-/* libxml includes */
-
-#include <libxml/xpath.h>
-#include <libxml/tree.h>
-#include <libxml/xmlmemory.h>
-#include <libxml/xmlerror.h>
-#include <libxml/parserInternals.h>
-
-/* declarations */
-
-static void *pgxml_palloc(size_t size);
-static void *pgxml_repalloc(void *ptr, size_t size);
-static void pgxml_pfree(void *ptr);
-static char *pgxml_pstrdup(const char *string);
-static void pgxml_errorHandler (void * ctxt, const char *msg, ...);
-
-void elog_error(int level, char *explain, int force);
-void pgxml_parser_init(void);
-
-static xmlChar *pgxmlNodeSetToText(xmlNodeSetPtr nodeset,
-				   xmlChar * toptagname, xmlChar * septagname,
-				   xmlChar * plainsep);
-
-text *pgxml_result_to_text(xmlXPathObjectPtr res, xmlChar *toptag, 
-			   xmlChar *septag, xmlChar *plainsep);
-
-xmlChar *pgxml_texttoxmlchar(text *textstring);
-
-static xmlXPathObjectPtr pgxml_xpath(text *document, xmlChar* xpath);
-
-
-Datum		pgxml_parse(PG_FUNCTION_ARGS);
-Datum           xpath_nodeset(PG_FUNCTION_ARGS);
-Datum		xpath_string(PG_FUNCTION_ARGS);
-Datum		xpath_number(PG_FUNCTION_ARGS);
-Datum           xpath_bool(PG_FUNCTION_ARGS);
-Datum           xpath_list(PG_FUNCTION_ARGS);
-Datum           xpath_table(PG_FUNCTION_ARGS);
-
-/* Global variables */
-char *errbuf; /* per line error buffer */
-char *pgxml_errorMsg = NULL; /* overall error message */
-
-/* Convenience macros */
-
-#define GET_TEXT(cstrp) DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(cstrp)))
-#define GET_STR(textp) DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp)))
-
-#define ERRBUF_SIZE 200
-
-/* memory handling passthrough functions (e.g. palloc, pstrdup are
-   currently macros, and the others might become so...) */
-
-static void *
-pgxml_palloc(size_t size)
-{
-/*	elog(DEBUG1,"Alloc %d in CMC %x",size,CurrentMemoryContext); */
-	return palloc(size);
-}
-
-static void *
-pgxml_repalloc(void *ptr, size_t size)
-{
-/*	elog(DEBUG1,"ReAlloc in CMC %x",CurrentMemoryContext);*/
-	return repalloc(ptr, size);
-}
-
-static void
-pgxml_pfree(void *ptr)
-{
-/*	elog(DEBUG1,"Free in CMC %x",CurrentMemoryContext); */
-	return pfree(ptr);
-}
-
-static char *
-pgxml_pstrdup(const char *string)
-{
-	return pstrdup(string);
-}
-
-/* The error handling function. This formats an error message and sets
- * a flag - an ereport will be issued prior to return
- */
-
-static void
-pgxml_errorHandler (void * ctxt, const char *msg, ...)
-{
-  va_list args;
-
-  va_start(args, msg);
-  vsnprintf(errbuf, ERRBUF_SIZE, msg, args);
-  va_end(args);
-  /* Now copy the argument across */
-  if (pgxml_errorMsg == NULL)
-    {
-      pgxml_errorMsg = pstrdup(errbuf);
-    }
-else
-  {
-    int32 xsize = strlen(pgxml_errorMsg);
-    pgxml_errorMsg = repalloc(pgxml_errorMsg, 
-		       (size_t) (xsize + strlen(errbuf) + 1));
-    strncpy(&pgxml_errorMsg[xsize-1],errbuf,strlen(errbuf));
-    pgxml_errorMsg[xsize+strlen(errbuf)-1]='\0';
-
-  }
-    memset(errbuf,0,ERRBUF_SIZE);
-}
-
-/* This function reports the current message at the level specified */
-void elog_error(int level, char *explain, int force)
-{
-  if (force || (pgxml_errorMsg != NULL))
-    {
-      if (pgxml_errorMsg == NULL) 
-	{
-	  ereport(level,(errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
-					 errmsg(explain)));
-	}
-      else
-	{
-	  ereport(level,(errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
-					 errmsg("%s:%s",explain,pgxml_errorMsg)));
-	  pfree(pgxml_errorMsg);
-	}
-    }
-}
-
-void
-pgxml_parser_init()
-{
-	/*
-	 * This code could also set parser settings from  user-supplied info.
-	 * Quite how these settings are made is another matter :)
-	 */
-
-	xmlMemSetup(pgxml_pfree, pgxml_palloc, pgxml_repalloc, pgxml_pstrdup);
-	xmlInitParser();
-
-	xmlSetGenericErrorFunc(NULL, pgxml_errorHandler);
-
-	xmlSubstituteEntitiesDefault(1);
-	xmlLoadExtDtdDefaultValue = 1;
-
-	pgxml_errorMsg = NULL;
-
-	errbuf = palloc(200);
-	memset(errbuf,0,200);
-
-}
-
-
-/* Returns true if document is well-formed */
-
-PG_FUNCTION_INFO_V1(pgxml_parse);
-
-Datum
-pgxml_parse(PG_FUNCTION_ARGS)
-{
-	/* called as pgxml_parse(document) */
-	xmlDocPtr	doctree;
-	text	   *t = PG_GETARG_TEXT_P(0);		/* document buffer */
-	int32		docsize = VARSIZE(t) - VARHDRSZ;
-
-	pgxml_parser_init();
-
-	doctree = xmlParseMemory((char *) VARDATA(t), docsize);
-	if (doctree == NULL)
-	{
-		xmlCleanupParser();
-		PG_RETURN_BOOL(false);	/* i.e. not well-formed */
-	}
-	xmlCleanupParser();
-	xmlFreeDoc(doctree);
-	PG_RETURN_BOOL(true);
-}
-
-
-static xmlChar
-*
-pgxmlNodeSetToText(xmlNodeSetPtr nodeset,
-				   xmlChar * toptagname,
-				   xmlChar * septagname,
-		                   xmlChar * plainsep)
-{
-	/* Function translates a nodeset into a text representation */
-
-	/*
-	 * iterates over each node in the set and calls xmlNodeDump to write
-	 * it to an xmlBuffer -from which an xmlChar * string is returned.
-	 */
-
-	/* each representation is surrounded by <tagname> ... </tagname> */
-	/* plainsep is an ordinary (not tag) seperator - if used, then
-	* nodes are cast to string as output method */
-        
-
-	xmlBufferPtr buf;
-	xmlChar    *result;
-	int			i;
-
-	buf = xmlBufferCreate();
-
-	if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
-	{
-		xmlBufferWriteChar(buf, "<");
-		xmlBufferWriteCHAR(buf, toptagname);
-		xmlBufferWriteChar(buf, ">");
-	}
-	if (nodeset != NULL)
-	{
-		for (i = 0; i < nodeset->nodeNr; i++)
-		{
-
-		  if (plainsep != NULL) {
-		    xmlBufferWriteCHAR(buf,
-			  xmlXPathCastNodeToString(nodeset->nodeTab[i]));
-			 
-			/* If this isn't the last entry, write the plain sep. */
-		    if (i < (nodeset->nodeNr)-1) {
-			  xmlBufferWriteChar(buf, plainsep);
-			}
-		  } else {
-
-
-			if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
-			{
-				xmlBufferWriteChar(buf, "<");
-				xmlBufferWriteCHAR(buf, septagname);
-				xmlBufferWriteChar(buf, ">");
-			}
-						xmlNodeDump(buf, 
-				    nodeset->nodeTab[i]->doc, 
-				    nodeset->nodeTab[i], 
-				    1, 0);
-
-			if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
-			{
-				xmlBufferWriteChar(buf, "</");
-				xmlBufferWriteCHAR(buf, septagname);
-				xmlBufferWriteChar(buf, ">");
-			}
-		  }
-		}
-	}
-
-	if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
-	{
-		xmlBufferWriteChar(buf, "</");
-		xmlBufferWriteCHAR(buf, toptagname);
-		xmlBufferWriteChar(buf, ">");
-	}
-	result = xmlStrdup(buf->content);
-	xmlBufferFree(buf);
-	return result;
-}
-
-
-/* Translate a PostgreSQL "varlena" -i.e. a variable length parameter
- * into the libxml2 representation
- */
-
-xmlChar *
-pgxml_texttoxmlchar(text *textstring)
-{
-	xmlChar    *res;
-	int32		txsize;
-
-	txsize = VARSIZE(textstring) - VARHDRSZ;
-	res = (xmlChar *) palloc(txsize + 1);
-	memcpy((char *) res, VARDATA(textstring), txsize);
-	res[txsize] = '\0';
-	return res;
-}
-
-/* Public visible XPath functions */
-
-/* This is a "raw" xpath function. Check that it returns child elements
- * properly
- */
-
-PG_FUNCTION_INFO_V1(xpath_nodeset);
-
-Datum
-xpath_nodeset(PG_FUNCTION_ARGS)
-{
-  xmlChar    *xpath, *toptag, *septag;
-  int32 pathsize;
-	text	   
-	           *xpathsupp,
-		   *xpres;
-
-	/* PG_GETARG_TEXT_P(0) is document buffer */
-	xpathsupp = PG_GETARG_TEXT_P(1);	/* XPath expression */
-
-	toptag = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(2));
-	septag = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(3));
-
-	pathsize = VARSIZE(xpathsupp) - VARHDRSZ;
-
-	xpath = pgxml_texttoxmlchar(xpathsupp);
-
-	xpres = pgxml_result_to_text(
-				     pgxml_xpath(PG_GETARG_TEXT_P(0),xpath),
-				     toptag,septag,NULL);
-
-	/* xmlCleanupParser(); done by result_to_text routine */
-	pfree((void *) xpath);
-
-	if (xpres == NULL) 
-	  {
-	    PG_RETURN_NULL();
-	  }
-	PG_RETURN_TEXT_P(xpres);
-}
-
-// The following function is almost identical, but returns the elements in
-// a list.
-
-PG_FUNCTION_INFO_V1(xpath_list);
-
-Datum
-xpath_list(PG_FUNCTION_ARGS)
-{
-  xmlChar    *xpath, *plainsep;
-  int32 pathsize;
-	text	   
-	           *xpathsupp,
-		   *xpres;
-
-	/* PG_GETARG_TEXT_P(0) is document buffer */
-	xpathsupp = PG_GETARG_TEXT_P(1);	/* XPath expression */
-
-	plainsep = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(2));
-
-	pathsize = VARSIZE(xpathsupp) - VARHDRSZ;
-
-	xpath = pgxml_texttoxmlchar(xpathsupp);
-
-	xpres = pgxml_result_to_text(
-				     pgxml_xpath(PG_GETARG_TEXT_P(0),xpath),
-				     NULL,NULL,plainsep);
-
-	/* xmlCleanupParser(); done by result_to_text routine */
-	pfree((void *) xpath);
-
-	if (xpres == NULL) 
-	  {
-	    PG_RETURN_NULL();
-	  }
-	PG_RETURN_TEXT_P(xpres);
-}
-
-
-PG_FUNCTION_INFO_V1(xpath_string);
-
-Datum
-xpath_string(PG_FUNCTION_ARGS)
-{
-  xmlChar    *xpath;
-  int32 pathsize;
-	text	   
-	           *xpathsupp,
-		   *xpres;
-
-	/* PG_GETARG_TEXT_P(0) is document buffer */
-	xpathsupp = PG_GETARG_TEXT_P(1);	/* XPath expression */
-
-	pathsize = VARSIZE(xpathsupp) - VARHDRSZ;
-
-	/* We encapsulate the supplied path with "string()"
-	 * = 8 chars + 1 for NUL at end */
-	/* We could try casting to string using the libxml function? */
-
-	xpath =(xmlChar *) palloc(pathsize + 9);
-	memcpy((char *) (xpath+7), VARDATA(xpathsupp), pathsize);
-	strncpy((char *) xpath, "string(",7);
-	xpath[pathsize+7] = ')';
-	xpath[pathsize+8] = '\0';
-
-	xpres = pgxml_result_to_text(
-				     pgxml_xpath(PG_GETARG_TEXT_P(0),xpath),
-				     NULL,NULL,NULL);
-
-	xmlCleanupParser();
-	pfree((void *) xpath);
-
-	if (xpres == NULL) 
-	  {
-	    PG_RETURN_NULL();
-	  }
-	PG_RETURN_TEXT_P(xpres);
-}
-
-
-PG_FUNCTION_INFO_V1(xpath_number);
-
-Datum
-xpath_number(PG_FUNCTION_ARGS)
-{
-  xmlChar    *xpath;
-  int32 pathsize;
-	text	   
-	  *xpathsupp;
-		   
-	float4 fRes;
-
-	xmlXPathObjectPtr res;
-
-	/* PG_GETARG_TEXT_P(0) is document buffer */
-	xpathsupp = PG_GETARG_TEXT_P(1);	/* XPath expression */
-
-	pathsize = VARSIZE(xpathsupp) - VARHDRSZ;
-
-	xpath = pgxml_texttoxmlchar(xpathsupp);
-
-	res = pgxml_xpath(PG_GETARG_TEXT_P(0),xpath);
-	pfree((void *) xpath);
-
-	if (res == NULL)
-	  {
-	    xmlCleanupParser();
-	    PG_RETURN_NULL();
-	  }
-
-	fRes = xmlXPathCastToNumber(res);
-	xmlCleanupParser();
-	if (xmlXPathIsNaN(fRes))
-	  {
-	    PG_RETURN_NULL();
-	  }
-
-	PG_RETURN_FLOAT4(fRes);
-
-}
-
-
-PG_FUNCTION_INFO_V1(xpath_bool);
-
-Datum
-xpath_bool(PG_FUNCTION_ARGS)
-{
-  xmlChar    *xpath;
-  int32 pathsize;
-	text	   
-	  *xpathsupp;
-		   
-	int bRes;
-
-	xmlXPathObjectPtr res;
-
-	/* PG_GETARG_TEXT_P(0) is document buffer */
-	xpathsupp = PG_GETARG_TEXT_P(1);	/* XPath expression */
-
-	pathsize = VARSIZE(xpathsupp) - VARHDRSZ;
-
-	xpath = pgxml_texttoxmlchar(xpathsupp);
-
-	res = pgxml_xpath(PG_GETARG_TEXT_P(0),xpath);
-	pfree((void *) xpath);
-
-	if (res == NULL)
-	  {
-	    xmlCleanupParser();
-	    PG_RETURN_BOOL(false);
-	  }
-
-	bRes = xmlXPathCastToBoolean(res);
-	xmlCleanupParser();
-	PG_RETURN_BOOL(bRes);
-
-}
-
-
-
-/* Core function to evaluate XPath query */
-
-xmlXPathObjectPtr
-  pgxml_xpath(text *document, xmlChar *xpath)
-  {
-
-	xmlDocPtr	doctree;
-	xmlXPathContextPtr ctxt;
-	xmlXPathObjectPtr res;
-
-	xmlXPathCompExprPtr comppath;
-
-	int32		docsize;
-
-    
-	docsize = VARSIZE(document) - VARHDRSZ;
-
-	pgxml_parser_init();
-
-	doctree = xmlParseMemory((char *) VARDATA(document), docsize);
-	if (doctree == NULL)
-	{					/* not well-formed */
-		return NULL;
-	}
-
-	ctxt = xmlXPathNewContext(doctree);
-	ctxt->node = xmlDocGetRootElement(doctree);
-
-
-	/* compile the path */
-	comppath = xmlXPathCompile(xpath);
-	if (comppath == NULL)
-	{
-		xmlCleanupParser();
-		xmlFreeDoc(doctree);
-		elog_error(ERROR,"XPath Syntax Error",1);
-
-	        return NULL;
-	}
-
-	/* Now evaluate the path expression. */
-	res = xmlXPathCompiledEval(comppath, ctxt);
-	xmlXPathFreeCompExpr(comppath);
-
-	if (res == NULL)
-	{
-	  xmlXPathFreeContext(ctxt);
-	  // xmlCleanupParser();
-		xmlFreeDoc(doctree);
-
-		return NULL;
-	}
-	/* xmlFreeDoc(doctree); */
-	return res;
-  }
-
-text 
-*pgxml_result_to_text(xmlXPathObjectPtr res, 
-		      xmlChar *toptag, 
-		      xmlChar *septag,
-		      xmlChar *plainsep)
-{
-	xmlChar *xpresstr;
-	int32 ressize;
-	text *xpres;
-
-  if (res == NULL)
-    {
-      return NULL;
-    }
-	switch (res->type)
-	{
-		case XPATH_NODESET:
-			xpresstr = pgxmlNodeSetToText(res->nodesetval,
-						      toptag,
-						      septag, plainsep);
-			break;
-
-		case XPATH_STRING:
-			xpresstr = xmlStrdup(res->stringval);
-			break;
-
-		default:
-			elog(NOTICE, "Unsupported XQuery result: %d", res->type);
-			xpresstr = xmlStrdup("<unsupported/>");
-	}
-
-
-	/* Now convert this result back to text */
-	ressize = strlen(xpresstr);
-	xpres = (text *) palloc(ressize + VARHDRSZ);
-	memcpy(VARDATA(xpres), xpresstr, ressize);
-	VARATT_SIZEP(xpres) = ressize + VARHDRSZ;
-
-	/* Free various storage */
-	xmlCleanupParser();
-	/* xmlFreeDoc(doctree);  -- will die at end of tuple anyway */
-
-	xmlFree(xpresstr);
-
-	elog_error(ERROR,"XPath error",0);
-
-
-	return xpres;
-}
-
-/* xpath_table is a table function. It needs some tidying (as do the
- * other functions here!
- */
-
-PG_FUNCTION_INFO_V1(xpath_table);
-
-Datum xpath_table(PG_FUNCTION_ARGS)
-{
-/* SPI (input tuple) support */
-  SPITupleTable *tuptable;
-  HeapTuple  spi_tuple;
-  TupleDesc  spi_tupdesc;
-
-/* Output tuple (tuplestore) support */
-  Tuplestorestate *tupstore = NULL;
-  TupleDesc ret_tupdesc;
-  HeapTuple  ret_tuple;
-
-  ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
-  AttInMetadata *attinmeta;
-  MemoryContext per_query_ctx;
-  MemoryContext oldcontext;
-
-/* Function parameters */              
-  char *pkeyfield = GET_STR(PG_GETARG_TEXT_P(0));
-  char *xmlfield = GET_STR(PG_GETARG_TEXT_P(1));
-  char *relname = GET_STR(PG_GETARG_TEXT_P(2));
-  char *xpathset = GET_STR(PG_GETARG_TEXT_P(3));
-  char *condition = GET_STR(PG_GETARG_TEXT_P(4));
-
-  char **values;
-  xmlChar **xpaths; 
-  xmlChar *pos;
-  xmlChar *pathsep= "|";
-
-  int numpaths;
-  int ret;
-  int proc;
-  int i;
-  int j;
-  int rownr; /* For issuing multiple rows from one original document */
-  int had_values; /* To determine end of nodeset results */
-
-  StringInfo querysql;
-
-/* We only have a valid tuple description in table function mode */
-  if (rsinfo->expectedDesc == NULL) {
-	  ereport(ERROR,(errcode(ERRCODE_SYNTAX_ERROR),
-					 errmsg("xpath_table must be called as a table function")));
-  }
- 
-/* The tuplestore must exist in a higher context than 
- * this function call (per_query_ctx is used) */
-
-  per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
-  oldcontext = MemoryContextSwitchTo(per_query_ctx);
-
-/* Create the tuplestore - SortMem is the max in-memory size before it is
- * shipped to a disk heap file. Just like ... SortMem!
- */
-
-  tupstore = tuplestore_begin_heap(true, false, SortMem);
-
-  MemoryContextSwitchTo(oldcontext);
-
-  /* get the requested return tuple description */
-  ret_tupdesc = CreateTupleDescCopy(rsinfo->expectedDesc);
-
-  /* At the moment we assume that the returned attributes make sense
-   * for the XPath specififed (i.e. we trust the caller). 
-   * It's not fatal if they get it wrong - the input function for the
-   * column type will raise an error if the path result can't be converted
-   * into the correct binary representation.
-   */
-
-  attinmeta = TupleDescGetAttInMetadata(ret_tupdesc);
-
-  /* We want to materialise because it means that we don't have to
-   * carry libxml2 parser state between invocations of this function
-   */
-
-  /* check to see if caller supports us returning a tuplestore */
-  if (!rsinfo || !(rsinfo->allowedModes & SFRM_Materialize))
-	  ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR),
-					  errmsg("xpath_table requires Materialize mode, but it is not "
-							 "allowed in this context")));
-
-  // Set return mode and allocate value space.
-  rsinfo->returnMode = SFRM_Materialize;
-  rsinfo->setDesc = ret_tupdesc;
-  
-  values = (char **) palloc(ret_tupdesc->natts * sizeof(char *));
-
-  xpaths = (xmlChar **) palloc(ret_tupdesc->natts * sizeof(xmlChar *));
-
-  /* Split XPaths. xpathset is a writable CString. */
-
-  /* Note that we stop splitting once we've done all needed for tupdesc */
-
-  numpaths=0;
-  pos = xpathset;
-  do {
-    xpaths[numpaths] = pos;
-    pos = strstr(pos,pathsep);
-    if (pos != NULL) {
-      *pos = '\0';
-      pos++;
-    }
-    numpaths++;
-  } while ((pos != NULL) && (numpaths < (ret_tupdesc->natts - 1) ));
-
-  /* Now build query */
-
-  querysql = makeStringInfo();
-
-  /* Build initial sql statement */
-  appendStringInfo(querysql, "SELECT %s, %s FROM %s WHERE %s",
-		   pkeyfield,
-		   xmlfield,
-		   relname,
-		   condition
-		   );
-
-
-  if ((ret = SPI_connect()) < 0) {
-    elog(ERROR, "xpath_table: SPI_connect returned %d", ret);
-  }
-
-  if ((ret = SPI_exec(querysql->data,0)) != SPI_OK_SELECT) {
-    elog(ERROR,"xpath_table: SPI execution failed for query %s",querysql->data);
-  }
-
-  proc= SPI_processed;
-  /*  elog(DEBUG1,"xpath_table: SPI returned %d rows",proc); */
-  tuptable = SPI_tuptable;
-  spi_tupdesc = tuptable->tupdesc;
-
-/* Switch out of SPI context */
-  MemoryContextSwitchTo(oldcontext);
-
-
-/* Check that SPI returned correct result. If you put a comma into one of
- * the function parameters, this will catch it when the SPI query returns
- * e.g. 3 columns. 
- */
-
-  if (spi_tupdesc->natts != 2) {
-    ereport(ERROR,(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-				   errmsg("Expression returning multiple columns is not valid in parameter list"),
-				   errdetail("Expected two columns in SPI result, got %d",spi_tupdesc->natts)));
-  }
-
-/* Setup the parser. Beware that this must happen in the same context as the
- * cleanup - which means that any error from here on must do cleanup to
- * ensure that the entity table doesn't get freed by being out of context.
- */
-  pgxml_parser_init();
-
-   /* For each row i.e. document returned from SPI */
-  for (i=0; i < proc; i++) {
-	  char *pkey;
-	  char *xmldoc;
-
-	  xmlDocPtr	doctree;
-	  xmlXPathContextPtr ctxt;
-	  xmlXPathObjectPtr res;
-	  xmlChar *resstr;
-
-    
-	  xmlXPathCompExprPtr comppath;
-
-	  /* Extract the row data as C Strings */
-	  
-	  spi_tuple = tuptable->vals[i]; 
-	  pkey = SPI_getvalue(spi_tuple, spi_tupdesc,1);
-	  xmldoc = SPI_getvalue(spi_tuple, spi_tupdesc,2);
-
-
-	  /* Clear the values array, so that not-well-formed documents
-	   * return NULL in all columns.
-	   */
-    
-	  /* Note that this also means that spare columns will be NULL. */
-	  for (j=0; j < ret_tupdesc->natts; j++) {
-		  values[j]= NULL;
-	  }
-    
-	  /* Insert primary key */
-	  values[0]=pkey;
-    
-	  /* Parse the document */
-	  doctree = xmlParseMemory(xmldoc, strlen(xmldoc));
-
-	  if (doctree == NULL)
-      {					/* not well-formed, so output all-NULL tuple */
-	
-		  ret_tuple = BuildTupleFromCStrings(attinmeta, values);
-		  oldcontext = MemoryContextSwitchTo(per_query_ctx);
-		  tuplestore_puttuple(tupstore, ret_tuple);
-		  MemoryContextSwitchTo(oldcontext);
-		  heap_freetuple(ret_tuple);
-      }
-	  else 
-      {
-		  /* New loop here - we have to deal with nodeset results */
-		  rownr=0;
-		  
-		  do {
-			  /* Now evaluate the set of xpaths. */
-			  had_values=0;
-			  for (j=0; j < numpaths; j++) {
-				  
-				  ctxt = xmlXPathNewContext(doctree);
-				  ctxt->node = xmlDocGetRootElement(doctree);
-				  xmlSetGenericErrorFunc(ctxt, pgxml_errorHandler);
-				  
-				  /* compile the path */
-				  comppath = xmlXPathCompile(xpaths[j]);
-				  if (comppath == NULL)
-				  {
-					  xmlCleanupParser();
-					  xmlFreeDoc(doctree);
-					  
-					  elog_error(ERROR,"XPath Syntax Error",1);
-					  
-					  PG_RETURN_NULL();  /* Keep compiler happy */
-				  }
-				  
-				  /* Now evaluate the path expression. */
-				  res = xmlXPathCompiledEval(comppath, ctxt);
-				  xmlXPathFreeCompExpr(comppath);
-				  
-				  if (res != NULL) 
-				  {
-					  switch (res->type)
-					  {
-					  case XPATH_NODESET:
-						  /* We see if this nodeset has enough nodes */
-						  if ((res->nodesetval != NULL) && (rownr < res->nodesetval->nodeNr)) {
-							  resstr = 
-								  xmlXPathCastNodeToString(res->nodesetval->nodeTab[rownr]);
-							  had_values=1;
-						  } else {
-							  resstr = NULL;
-						  }
-						  
-						  break;
-						  
-					  case XPATH_STRING:
-						  resstr = xmlStrdup(res->stringval);
-						  break;
-						  
-					  default:
-						  elog(NOTICE, "Unsupported XQuery result: %d", res->type);
-						  resstr = xmlStrdup("<unsupported/>");
-					  }
-					  
-					  
-					  // Insert this into the appropriate column in the result tuple.
-					  values[j+1] = resstr;
-				  }
-				  xmlXPathFreeContext(ctxt);
-			  }
-			  // Now add the tuple to the output, if there is one.
-			  if (had_values) {
-				  ret_tuple = BuildTupleFromCStrings(attinmeta, values);
-				  oldcontext = MemoryContextSwitchTo(per_query_ctx);
-				  tuplestore_puttuple(tupstore, ret_tuple);
-				  MemoryContextSwitchTo(oldcontext);
-				  heap_freetuple(ret_tuple);
-			  }
-			  
-			  rownr++;
-			  
-		  } while (had_values);
-		  
-      }
-	  
-    xmlFreeDoc(doctree);    
-    
-    pfree(pkey);
-    pfree(xmldoc);
-  }
-
-  xmlCleanupParser();
-/* Needed to flag completeness in 7.3.1. 7.4 defines it as a no-op. */
-  tuplestore_donestoring(tupstore);
-
-  SPI_finish();
-
-  rsinfo->setResult=tupstore;
-  
-  /*
-   * SFRM_Materialize mode expects us to return a NULL Datum. The actual
-   * tuples are in our tuplestore and passed back through
-   * rsinfo->setResult. rsinfo->setDesc is set to the tuple description
-   * that we actually used to build our tuples with, so the caller can
-   * verify we did what it was expecting.
-   */
-  return (Datum) 0;
-  
-}
diff --git a/contrib/xml/xslt_proc.c b/contrib/xml/xslt_proc.c
deleted file mode 100644
index 64f9736622..0000000000
--- a/contrib/xml/xslt_proc.c
+++ /dev/null
@@ -1,184 +0,0 @@
-/* XSLT processing functions (requiring libxslt) */
-/* John Gray, for Torchbox 2003-04-01 */
-
-#include "postgres.h"
-#include "fmgr.h"
-#include "executor/spi.h"
-#include "funcapi.h"
-#include "miscadmin.h"
-
-/* libxml includes */
-
-#include <libxml/xpath.h>
-#include <libxml/tree.h>
-#include <libxml/xmlmemory.h>
-
-/* libxslt includes */
-
-#include <libxslt/xslt.h>
-#include <libxslt/xsltInternals.h>
-#include <libxslt/transform.h>
-#include <libxslt/xsltutils.h>
-
-
-/* declarations to come from xpath.c */
-
-extern void elog_error(int level, char *explain, int force);
-extern void pgxml_parser_init();
-extern xmlChar *pgxml_texttoxmlchar(text *textstring);
-
-#define GET_STR(textp) DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp)))
-
-/* local defs */
-static void parse_params(const char **params, text *paramstr);
-
-Datum xslt_process(PG_FUNCTION_ARGS);
-
-
-#define MAXPARAMS 20
-
-PG_FUNCTION_INFO_V1(xslt_process);
-
-Datum xslt_process(PG_FUNCTION_ARGS) {
-
-
-  const char *params[MAXPARAMS + 1]; /* +1 for the terminator */
-  xsltStylesheetPtr stylesheet = NULL;
-  xmlDocPtr doctree;
-  xmlDocPtr restree;
-  xmlDocPtr ssdoc = NULL;
-  xmlChar *resstr;
-  int resstat;
-  int reslen;
-
-  text *doct  = PG_GETARG_TEXT_P(0);
-  text *ssheet  = PG_GETARG_TEXT_P(1);
-  text *paramstr;
-  text *tres;
-
-
-  if (fcinfo->nargs == 3)
-    {
-      paramstr = PG_GETARG_TEXT_P(2);
-      parse_params(params,paramstr);
-    }
-  else /* No parameters */
-    {
-      params[0] = NULL;
-    }
-
-  /* Setup parser */
-  pgxml_parser_init();
-
-  /* Check to see if document is a file or a literal */
-
-  if (VARDATA(doct)[0] == '<')
-    {
-      doctree = xmlParseMemory((char *) VARDATA(doct), VARSIZE(doct)-VARHDRSZ);
-    } 
-  else 
-    {
-      doctree = xmlParseFile(GET_STR(doct));
-    }
-
-  if (doctree == NULL)
-    {
-      xmlCleanupParser();
-      elog_error(ERROR,"Error parsing XML document",0);
-
-      PG_RETURN_NULL();
-    }
-
-  /* Same for stylesheet */
-  if (VARDATA(ssheet)[0] == '<')
-    {
-      ssdoc = xmlParseMemory((char *) VARDATA(ssheet),
-					    VARSIZE(ssheet)-VARHDRSZ);
-      if (ssdoc == NULL) 
-	{
-	  xmlFreeDoc(doctree);
-	  xmlCleanupParser();
-	  elog_error(ERROR,"Error parsing stylesheet as XML document",0);	  
-	  PG_RETURN_NULL();
-	}
-
-      stylesheet = xsltParseStylesheetDoc(ssdoc);
-    }
-  else 
-   {
-      stylesheet = xsltParseStylesheetFile(GET_STR(ssheet));
-    }
-
-
-  if (stylesheet == NULL)
-    {
-      xmlFreeDoc(doctree);
-      xsltCleanupGlobals();
-      xmlCleanupParser();
-      elog_error(ERROR,"Failed to parse stylesheet",0);
-      PG_RETURN_NULL();
-    }
-
-  restree = xsltApplyStylesheet(stylesheet, doctree, params);
-  resstat = xsltSaveResultToString(&resstr, &reslen, restree, stylesheet);
-
-  xsltFreeStylesheet(stylesheet);
-  xmlFreeDoc(restree);
-  xmlFreeDoc(doctree);
-  
-  xsltCleanupGlobals();
-  xmlCleanupParser();
-  
-  if (resstat < 0) {
-    PG_RETURN_NULL();
-  }
-  
-  tres = palloc(reslen + VARHDRSZ);
-  memcpy(VARDATA(tres),resstr,reslen);
-  VARATT_SIZEP(tres) = reslen + VARHDRSZ;
-  
-  PG_RETURN_TEXT_P(tres);
-}
-
-
-void parse_params(const char **params, text *paramstr)
-{
-  char *pos;
-  char *pstr;
-
-  int i;
-  char *nvsep="=";
-  char *itsep=",";
-
-  pstr = GET_STR(paramstr);
-
-  pos=pstr;
-  
-  for (i=0; i < MAXPARAMS; i++) 
-    {
-      params[i] = pos;
-      pos = strstr(pos,nvsep);
-      if (pos != NULL) {
-	*pos = '\0';
-	pos++;
-      } else {
-	params[i]=NULL;
-	break;
-      }
-      /* Value */
-      i++;
-      params[i]=pos;
-      pos = strstr(pos,itsep);
-      if (pos != NULL) {
-	*pos = '\0';
-	pos++;
-      } else {
-	break;
-      }
-
-    }
-  if (i < MAXPARAMS) 
-    {
-      params[i+1]=NULL;
-    }
-}
author	Bruce Momjian	2004-03-05 03:57:58 +0000
committer	Bruce Momjian	2004-03-05 03:57:58 +0000
commit	31f4b59a464808ab0fec0ffb2eaa723321ea1af7 (patch)
tree	004f71d1eb77899fa9e16ac8047189dcde6576e5 /contrib/xml
parent	adca025c9ec4b3050411eb74a5b4f9c20a4ce2b5 (diff)