From 31f4b59a464808ab0fec0ffb2eaa723321ea1af7 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Fri, 5 Mar 2004 03:57:58 +0000 Subject: Move new version of contrib/ xml into xml2, keep old version in /xml. --- contrib/README | 4 + contrib/xml/Makefile | 18 - contrib/xml/README.pgxml | 179 --------- contrib/xml/TODO | 78 ++++ contrib/xml/pgxml.c | 352 +++++++++++++++++ contrib/xml/pgxml.h | 42 ++ contrib/xml/pgxml.sql.in | 57 --- contrib/xml/pgxml_dom.c | 265 +++++++++++++ contrib/xml/pgxml_dom.sql.in | 10 + contrib/xml/xpath.c | 893 ------------------------------------------- contrib/xml/xslt_proc.c | 184 --------- contrib/xml2/Makefile | 18 + contrib/xml2/README.pgxml | 179 +++++++++ contrib/xml2/pgxml.sql.in | 57 +++ contrib/xml2/xpath.c | 893 +++++++++++++++++++++++++++++++++++++++++++ contrib/xml2/xslt_proc.c | 184 +++++++++ 16 files changed, 2082 insertions(+), 1331 deletions(-) delete mode 100644 contrib/xml/Makefile delete mode 100644 contrib/xml/README.pgxml create mode 100644 contrib/xml/TODO create mode 100644 contrib/xml/pgxml.c create mode 100644 contrib/xml/pgxml.h delete mode 100644 contrib/xml/pgxml.sql.in create mode 100644 contrib/xml/pgxml_dom.c create mode 100644 contrib/xml/pgxml_dom.sql.in delete mode 100644 contrib/xml/xpath.c delete mode 100644 contrib/xml/xslt_proc.c create mode 100644 contrib/xml2/Makefile create mode 100644 contrib/xml2/README.pgxml create mode 100644 contrib/xml2/pgxml.sql.in create mode 100644 contrib/xml2/xpath.c create mode 100644 contrib/xml2/xslt_proc.c diff --git a/contrib/README b/contrib/README index a8a2c6c968..0071f43b85 100644 --- a/contrib/README +++ b/contrib/README @@ -217,5 +217,9 @@ vacuumlo - by Peter T Mount xml - + Storing XML in PostgreSQL (obsolete version) + by John Gray + +xml2 - Storing XML in PostgreSQL by John Gray diff --git a/contrib/xml/Makefile b/contrib/xml/Makefile deleted file mode 100644 index 9177ca865c..0000000000 --- a/contrib/xml/Makefile +++ /dev/null @@ -1,18 +0,0 @@ -# This makefile will build the new XML and XSLT routines. -subdir = contrib/xml -top_builddir = ../../ -include $(top_builddir)/src/Makefile.global - -MODULE_big = pgxml - -# Remove xslt_proc.o from the following line if you don't have libxslt -OBJS = xpath.o xslt_proc.o - -# Remove -lxslt from the following line if you don't have libxslt. -SHLIB_LINK = -lxml2 -lxslt - -DATA_built = pgxml.sql -DOCS = README.pgxml - -include $(top_builddir)contrib/contrib-global.mk - diff --git a/contrib/xml/README.pgxml b/contrib/xml/README.pgxml deleted file mode 100644 index f29d071722..0000000000 --- a/contrib/xml/README.pgxml +++ /dev/null @@ -1,179 +0,0 @@ -XML-handling functions for PostgreSQL -===================================== - -Development of this module was sponsored by Torchbox Ltd. (www.torchbox.com) - -This version of the XML functions provides both XPath querying and -XSLT functionality. There is also a new table function which allows -the straightforward return of multiple XML results. Note that the current code -doesn't take any particular care over character sets - this is -something that should be fixed at some point! - -Installation ------------- - -The current build process will only work if the files are in -contrib/xml in a PostgreSQL 7.3 or 7.4 source tree which has been -configured and built (If you alter the subdir value in the Makefile -you can place it in a different directory in a PostgreSQL tree). - -Before you begin, just check the Makefile, and then just 'make' and -'make install'. - -This code requires libxml to be previously installed. - -Description of functions ------------------------- - -The first set of functions are straightforward XML parsing and XPath queries: - -pgxml_parse(document) RETURNS bool - -This parses the document text in its parameter and returns true if the -document is well-formed XML. - -xpath_string(document,query) RETURNS text -xpath_number(document,query) RETURNS float4 -xpath_bool(document,query) RETURNS bool - -These functions evaluate the XPath query on the supplied document, and -cast the result to the specified type. - - -xpath_nodeset(document,query,toptag,itemtag) RETURNS text - -This evaluates query on document and wraps the result in XML tags. If -the result is multivalued, the output will look like: - - -Value 1 which could be an XML fragment -Value 2.... - - -If either toptag or itemtag is an empty string, the relevant tag is omitted. -There are also wrapper functions for this operation: - -xpath_nodeset(document,query) RETURNS text omits both tags. -xpath_nodeset(document,query,itemtag) RETURNS text omits toptag. - - -xpath_list(document,query,seperator) RETURNS text - -This function returns multiple values seperated by the specified -seperator, e.g. Value 1,Value 2,Value 3 if seperator=','. - -xpath_list(document,query) RETURNS text - -This is a wrapper for the above function that uses ',' as the seperator. - - -xpath_table ------------ - -This is a table function which evaluates a set of XPath queries on -each of a set of documents and returns the results as a table. The -primary key field from the original document table is returned as the -first column of the result so that the resultset from xpath_table can -be readily used in joins. - -The function itself takes 5 arguments, all text. - -xpath_table(key,document,relation,xpaths,criteria) - -key - the name of the "key" field - this is just a field to be used as -the first column of the output table i.e. it identifies the record from -which each output row came. - -document - the name of the field containing the XML document - -relation - the name of the table or view containing the documents - -xpaths - multiple xpath expressions separated by | - -criteria - The contents of the where clause. This needs to be specified, -so use "true" or "1=1" here if you want to process all the rows in the -relation. - -NB These parameters (except the XPath strings) are just substituted -into a plain SQL SELECT statement, so you have some flexibility - the -statement is - -SELECT , FROM WHERE - -so those parameters can be *anything* valid in those particular -locations. The result from this SELECT needs to return exactly two -columns (which it will unless you try to list multiple fields for key -or document). Beware that this simplistic approach requires that you -validate any user-supplied values to avoid SQL injection attacks. - -Using the function - -The function has to be used in a FROM expression. This gives the following -form: - -SELECT * FROM -xpath_table('article_id', - 'article_xml', - 'articles', - '/article/author|/article/pages|/article/title', - 'date_entered > ''2003-01-01'' ') -AS t(article_id integer, author text, page_count integer, title text); - -The AS clause defines the names and types of the columns in the -virtual table. If there are more XPath queries than result columns, -the extra queries will be ignored. If there are more result columns -than XPath queries, the extra columns will be NULL. - -Note that I've said in this example that pages is an integer. The -function deals internally with string representations, so when you say -you want an integer in the output, it will take the string -representation of the XPath result and use PostgreSQL input functions -to transform it into an integer (or whatever type the AS clause -requests). An error will result if it can't do this - for example if -the result is empty - so you may wish to just stick to 'text' as the -column type if you think your data has any problems. - -The select statement doesn't need to use * alone - it can reference the -columns by name or join them to other tables. The function produces a -virtual table with which you can perform any operation you wish (e.g. -aggregation, joining, sorting etc). So we could also have: - -SELECT t.title, p.fullname, p.email -FROM xpath_table('article_id','article_xml','articles', - '/article/title|/article/author/@id', - 'xpath_string(article_xml,''/article/@date'') > ''2003-03-20'' ') - AS t(article_id integer, title text, author_id integer), - tblPeopleInfo AS p -WHERE t.author_id = p.person_id; - -as a more complicated example. Of course, you could wrap all -of this in a view for convenience. - -XSLT functions --------------- - -The following functions are available if libxslt is installed (this is -not currently detected automatically, so you will have to amend the -Makefile) - -xslt_process(document,stylesheet,paramlist) RETURNS text - -This function appplies the XSL stylesheet to the document and returns -the transformed result. The paramlist is a list of parameter -assignments to be used in the transformation, specified in the form -'a=1,b=2'. Note that this is also proof-of-concept code and the -parameter parsing is very simple-minded (e.g. parameter values cannot -contain commas!) - -Also note that if either the document or stylesheet values do not -begin with a < then they will be treated as URLs and libxslt will -fetch them. It thus follows that you can use xslt_process as a means -to fetch the contents of URLs - you should be aware of the security -implications of this. - -There is also a two-parameter version of xslt_process which does not -pass any parameters to the transformation. - -If you have any comments or suggestions, please do contact me at -jgray@azuli.co.uk. Unfortunately, this isn't my main job, so I can't -guarantee a rapid response to your query! diff --git a/contrib/xml/TODO b/contrib/xml/TODO new file mode 100644 index 0000000000..5ddd62a658 --- /dev/null +++ b/contrib/xml/TODO @@ -0,0 +1,78 @@ +PGXML TODO List +=============== + +Some of these items still require much more thought! Since the first +release, the XPath support has improved (because I'm no longer using a +homemade algorithm!). + +1. Performance considerations + +At present each document is parsed to produce the DOM tree on every query. + +Pros: + Easy + No persistent memory or storage allocation for parsed trees + (libxml docs suggest representation of a document might + be 4 times the size of the text) + +Cons: + Slow/ CPU intensive to parse. + Makes it difficult for PLs to apply libxml manipulations to create + new documents or amend existing ones. + + +2. XQuery + +I'm not sure if the addition of XQuery would be best as a function or +as a new front-end parser. This is one to think about, but with a +decent implementation of XPath, one of the prerequisites is covered. + +3. DOM Interfaces + +Expose more aspects of the DOM to user functions/ PLs. This would +allow a procedure in a PL to run some queries and then use exposed +interfaces to libxml to create an XML document out of the query +results. I accept the argument that this might be more properly +performed on the client side. + +4. Returning sets of documents from XPath queries. + +Although the current implementation allows you to amalgamate the +returned results into a single document, it's quite possible that +you'd like to use the returned set of nodes as a source for FROM. + +Is there a good way to optimise/index the results of certain XPath +operations to make them faster?: + +select docid, pgxml_xpath(document,'//site/location/text()','','') as location +where pgxml_xpath(document,'//site/name/text()','','') = 'Church Farm'; + +and with multiple element occurences in a document? + +select d.docid, pgxml_xpath(d.document,'//site/location/text()','','') +from docstore d, +pgxml_xpaths('docstore','document','//feature/type/text()','docid') ft +where ft.key = d.docid and ft.value ='Limekiln'; + +pgxml_xpaths params are relname, attrname, xpath, returnkey. It would +return a set of two-element tuples (key,value) consisting of the value of +returnkey, and the cdata value of the xpath. The XML document would be +defined by relname and attrname. + +The pgxml_xpaths function could be the basis of a functional index, +which could speed up the above query very substantially, working +through the normal query planner mechanism. + +5. Return type support. + +Better support for returning e.g. numeric or boolean values. I need to +get to grips with the returned data from libxml first. + + +John Gray 16 August 2001 + + + + + + diff --git a/contrib/xml/pgxml.c b/contrib/xml/pgxml.c new file mode 100644 index 0000000000..4d8c3b96bc --- /dev/null +++ b/contrib/xml/pgxml.c @@ -0,0 +1,352 @@ +/******************************************************** + * Interface code to parse an XML document using expat + ********************************************************/ + +#include "postgres.h" +#include "fmgr.h" + +#include "expat.h" +#include "pgxml.h" + +/* Memory management - we make expat use standard pg MM */ + +XML_Memory_Handling_Suite mhs; + +/* passthrough functions (palloc is a macro) */ + +static void * +pgxml_palloc(size_t size) +{ + return palloc(size); +} + +static void * +pgxml_repalloc(void *ptr, size_t size) +{ + return repalloc(ptr, size); +} + +static void +pgxml_pfree(void *ptr) +{ + return pfree(ptr); +} + +static void +pgxml_mhs_init() +{ + mhs.malloc_fcn = pgxml_palloc; + mhs.realloc_fcn = pgxml_repalloc; + mhs.free_fcn = pgxml_pfree; +} + +static void +pgxml_handler_init() +{ + /* + * This code should set up the relevant handlers from user-supplied + * settings. Quite how these settings are made is another matter :) + */ +} + +/* Returns true if document is well-formed */ + +PG_FUNCTION_INFO_V1(pgxml_parse); + +Datum +pgxml_parse(PG_FUNCTION_ARGS) +{ + /* called as pgxml_parse(document) */ + XML_Parser p; + text *t = PG_GETARG_TEXT_P(0); /* document buffer */ + int32 docsize = VARSIZE(t) - VARHDRSZ; + + pgxml_mhs_init(); + + pgxml_handler_init(); + + p = XML_ParserCreate_MM(NULL, &mhs, NULL); + if (!p) + { + ereport(ERROR, + (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION), + errmsg("could not create expat parser"))); + PG_RETURN_NULL(); /* seems appropriate if we couldn't parse */ + } + + if (!XML_Parse(p, (char *) VARDATA(t), docsize, 1)) + { + /* + * elog(WARNING, "Parse error at line %d:%s", + * XML_GetCurrentLineNumber(p), + * XML_ErrorString(XML_GetErrorCode(p))); + */ + XML_ParserFree(p); + PG_RETURN_BOOL(false); + } + + XML_ParserFree(p); + PG_RETURN_BOOL(true); +} + +/* XPath handling functions */ + +/* XPath support here is for a very skeletal kind of XPath! + It was easy to program though... */ + +/* This first is the core function that builds a result set. The + actual functions called by the user manipulate that result set + in various ways. +*/ + +static XPath_Results * +build_xpath_results(text *doc, text *pathstr) +{ + XPath_Results *xpr; + char *res; + pgxml_udata *udata; + XML_Parser p; + int32 docsize; + + xpr = (XPath_Results *) palloc((sizeof(XPath_Results))); + memset((void *) xpr, 0, sizeof(XPath_Results)); + xpr->rescount = 0; + + docsize = VARSIZE(doc) - VARHDRSZ; + + /* res isn't going to be the real return type, it is just a buffer */ + + res = (char *) palloc(docsize); + memset((void *) res, 0, docsize); + + xpr->resbuf = res; + + udata = (pgxml_udata *) palloc((sizeof(pgxml_udata))); + memset((void *) udata, 0, sizeof(pgxml_udata)); + + udata->currentpath[0] = '\0'; + udata->textgrab = 0; + + udata->path = (char *) palloc(VARSIZE(pathstr)); + memcpy(udata->path, VARDATA(pathstr), VARSIZE(pathstr) - VARHDRSZ); + + udata->path[VARSIZE(pathstr) - VARHDRSZ] = '\0'; + + udata->resptr = res; + udata->reslen = 0; + + udata->xpres = xpr; + + /* Now fire up the parser */ + pgxml_mhs_init(); + + p = XML_ParserCreate_MM(NULL, &mhs, NULL); + if (!p) + { + ereport(ERROR, + (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION), + errmsg("could not create expat parser"))); + pfree(xpr); + pfree(udata->path); + pfree(udata); + pfree(res); + return NULL; + } + XML_SetUserData(p, (void *) udata); + + /* Set the handlers */ + + XML_SetElementHandler(p, pgxml_starthandler, pgxml_endhandler); + XML_SetCharacterDataHandler(p, pgxml_charhandler); + + if (!XML_Parse(p, (char *) VARDATA(doc), docsize, 1)) + { + /* + * elog(WARNING, "Parse error at line %d:%s", + * XML_GetCurrentLineNumber(p), + * XML_ErrorString(XML_GetErrorCode(p))); + */ + XML_ParserFree(p); + pfree(xpr); + pfree(udata->path); + pfree(udata); + + return NULL; + } + + pfree(udata->path); + pfree(udata); + XML_ParserFree(p); + return xpr; +} + + +PG_FUNCTION_INFO_V1(pgxml_xpath); + +Datum +pgxml_xpath(PG_FUNCTION_ARGS) +{ + /* called as pgxml_xpath(document,pathstr, index) for the moment */ + + XPath_Results *xpresults; + text *restext; + + text *t = PG_GETARG_TEXT_P(0); /* document buffer */ + text *t2 = PG_GETARG_TEXT_P(1); + int32 ind = PG_GETARG_INT32(2) - 1; + + xpresults = build_xpath_results(t, t2); + + /* + * This needs to be changed depending on the mechanism for returning + * our set of results. + */ + + if (xpresults == NULL) /* parse error (not WF or parser failure) */ + PG_RETURN_NULL(); + + if (ind >= (xpresults->rescount)) + PG_RETURN_NULL(); + + restext = (text *) palloc(xpresults->reslens[ind] + VARHDRSZ); + memcpy(VARDATA(restext), xpresults->results[ind], xpresults->reslens[ind]); + + VARATT_SIZEP(restext) = xpresults->reslens[ind] + VARHDRSZ; + + pfree(xpresults->resbuf); + pfree(xpresults); + + PG_RETURN_TEXT_P(restext); +} + + +static void +pgxml_pathcompare(void *userData) +{ + char *matchpos; + + matchpos = strstr(UD->currentpath, UD->path); + + if (matchpos == NULL) + { /* Should we have more logic here ? */ + if (UD->textgrab) + { + UD->textgrab = 0; + pgxml_finalisegrabbedtext(userData); + } + return; + } + + /* + * OK, we have a match of some sort. Now we need to check that our + * match is anchored to the *end* of the string AND that it is + * immediately preceded by a '/' + */ + + /* + * This test wouldn't work if strlen (UD->path) overran the length of + * the currentpath, but that's not possible because we got a match! + */ + + if ((matchpos + strlen(UD->path))[0] == '\0') + { + if ((UD->path)[0] == '/') + { + if (matchpos == UD->currentpath) + UD->textgrab = 1; + } + else + { + if ((matchpos - 1)[0] == '/') + UD->textgrab = 1; + } + } +} + +static void +pgxml_starthandler(void *userData, const XML_Char * name, + const XML_Char ** atts) +{ + + char sepstr[] = "/"; + + if ((strlen(name) + strlen(UD->currentpath)) > MAXPATHLENGTH - 2) + elog(WARNING, "path too long"); + else + { + strncat(UD->currentpath, sepstr, 1); + strcat(UD->currentpath, name); + } + if (UD->textgrab) + { + /* + * Depending on user preference, should we "reconstitute" the + * element into the result text? + */ + } + else + pgxml_pathcompare(userData); +} + +static void +pgxml_endhandler(void *userData, const XML_Char * name) +{ + /* + * Start by removing the current element off the end of the + * currentpath + */ + + char *sepptr; + + sepptr = strrchr(UD->currentpath, '/'); + if (sepptr == NULL) + { + /* internal error */ + elog(ERROR, "did not find '/'"); + sepptr = UD->currentpath; + } + if (strcmp(name, sepptr + 1) != 0) + { + elog(WARNING, "wanted [%s], got [%s]", sepptr, name); + /* unmatched entry, so do nothing */ + } + else + { + sepptr[0] = '\0'; /* Chop that element off the end */ + } + + if (UD->textgrab) + pgxml_pathcompare(userData); + +} + +static void +pgxml_charhandler(void *userData, const XML_Char * s, int len) +{ + if (UD->textgrab) + { + if (len > 0) + { + memcpy(UD->resptr, s, len); + UD->resptr += len; + UD->reslen += len; + } + } +} + +/* Should I be using PG list types here? */ + +static void +pgxml_finalisegrabbedtext(void *userData) +{ + /* In res/reslen, we have a single result. */ + UD->xpres->results[UD->xpres->rescount] = UD->resptr - UD->reslen; + UD->xpres->reslens[UD->xpres->rescount] = UD->reslen; + UD->reslen = 0; + UD->xpres->rescount++; + + /* + * This effectively concatenates all the results together but we do + * know where one ends and the next begins + */ +} diff --git a/contrib/xml/pgxml.h b/contrib/xml/pgxml.h new file mode 100644 index 0000000000..2b80124b77 --- /dev/null +++ b/contrib/xml/pgxml.h @@ -0,0 +1,42 @@ +/* Header for pg xml parser interface */ + +static void *pgxml_palloc(size_t size); +static void *pgxml_repalloc(void *ptr, size_t size); +static void pgxml_pfree(void *ptr); +static void pgxml_mhs_init(); +static void pgxml_handler_init(); +Datum pgxml_parse(PG_FUNCTION_ARGS); +Datum pgxml_xpath(PG_FUNCTION_ARGS); +static void pgxml_starthandler(void *userData, const XML_Char * name, + const XML_Char ** atts); +static void pgxml_endhandler(void *userData, const XML_Char * name); +static void pgxml_charhandler(void *userData, const XML_Char * s, int len); +static void pgxml_pathcompare(void *userData); +static void pgxml_finalisegrabbedtext(void *userData); + +#define MAXPATHLENGTH 512 +#define MAXRESULTS 100 + + +typedef struct +{ + int rescount; + char *results[MAXRESULTS]; + int32 reslens[MAXRESULTS]; + char *resbuf; /* pointer to the result buffer for pfree */ +} XPath_Results; + + + +typedef struct +{ + char currentpath[MAXPATHLENGTH]; + char *path; + int textgrab; + char *resptr; + int32 reslen; + XPath_Results *xpres; +} pgxml_udata; + + +#define UD ((pgxml_udata *) userData) diff --git a/contrib/xml/pgxml.sql.in b/contrib/xml/pgxml.sql.in deleted file mode 100644 index ff46e845b1..0000000000 --- a/contrib/xml/pgxml.sql.in +++ /dev/null @@ -1,57 +0,0 @@ ---SQL for XML parser - -CREATE OR REPLACE FUNCTION pgxml_parse(text) RETURNS bool - AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict); - -CREATE OR REPLACE FUNCTION xpath_string(text,text) RETURNS text - AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict); - -CREATE OR REPLACE FUNCTION xpath_nodeset(text,text,text,text) RETURNS text - AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict); - -CREATE OR REPLACE FUNCTION xpath_number(text,text) RETURNS float4 - AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict); - -CREATE OR REPLACE FUNCTION xpath_bool(text,text) RETURNS boolean - AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict); - --- List function - -CREATE OR REPLACE FUNCTION xpath_list(text,text,text) RETURNS text - AS 'MODULE_PATHNAME' - LANGUAGE 'c' WITH (isStrict); - - -CREATE OR REPLACE FUNCTION xpath_list(text,text) RETURNS text -AS 'SELECT xpath_list($1,$2,'','')' language 'SQL' WITH (isStrict); - - - --- Wrapper functions for nodeset where no tags needed. - - -CREATE OR REPLACE FUNCTION xpath_nodeset(text,text) RETURNS text AS -'SELECT xpath_nodeset($1,$2,'''','''')' language 'SQL' WITH (isStrict); - - -CREATE OR REPLACE FUNCTION xpath_nodeset(text,text,text) RETURNS text AS -'SELECT xpath_nodeset($1,$2,'''',$3)' language 'SQL' WITH (isStrict); - --- Table function - -CREATE OR REPLACE FUNCTION xpath_table(text,text,text,text,text) RETURNS setof record - AS 'MODULE_PATHNAME' - LANGUAGE 'c' WITH (isStrict); - --- XSLT functions --- Delete from here to the end of the file if you are not compiling with --- XSLT support. - - -CREATE OR REPLACE FUNCTION xslt_process(text,text,text) RETURNS text - AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict); - --- the function checks for the correct argument count - -CREATE OR REPLACE FUNCTION xslt_process(text,text) RETURNS text - AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict); diff --git a/contrib/xml/pgxml_dom.c b/contrib/xml/pgxml_dom.c new file mode 100644 index 0000000000..2b11b1d646 --- /dev/null +++ b/contrib/xml/pgxml_dom.c @@ -0,0 +1,265 @@ +/* Parser interface for DOM-based parser (libxml) rather than + stream-based SAX-type parser */ + +#include "postgres.h" +#include "fmgr.h" + +/* libxml includes */ + +#include +#include +#include + +/* declarations */ + +static void *pgxml_palloc(size_t size); +static void *pgxml_repalloc(void *ptr, size_t size); +static void pgxml_pfree(void *ptr); +static char *pgxml_pstrdup(const char *string); + +static void pgxml_parser_init(); + +static xmlChar *pgxmlNodeSetToText(xmlNodeSetPtr nodeset, xmlDocPtr doc, + xmlChar * toptagname, xmlChar * septagname, + int format); + +static xmlChar *pgxml_texttoxmlchar(text *textstring); + + +Datum pgxml_parse(PG_FUNCTION_ARGS); +Datum pgxml_xpath(PG_FUNCTION_ARGS); + +/* memory handling passthrough functions (e.g. palloc, pstrdup are + currently macros, and the others might become so...) */ + +static void * +pgxml_palloc(size_t size) +{ + return palloc(size); +} + +static void * +pgxml_repalloc(void *ptr, size_t size) +{ + return repalloc(ptr, size); +} + +static void +pgxml_pfree(void *ptr) +{ + return pfree(ptr); +} + +static char * +pgxml_pstrdup(const char *string) +{ + return pstrdup(string); +} + +static void +pgxml_parser_init() +{ + /* + * This code should also set parser settings from user-supplied info. + * Quite how these settings are made is another matter :) + */ + + xmlMemSetup(pgxml_pfree, pgxml_palloc, pgxml_repalloc, pgxml_pstrdup); + xmlInitParser(); + +} + + +/* Returns true if document is well-formed */ + +PG_FUNCTION_INFO_V1(pgxml_parse); + +Datum +pgxml_parse(PG_FUNCTION_ARGS) +{ + /* called as pgxml_parse(document) */ + xmlDocPtr doctree; + text *t = PG_GETARG_TEXT_P(0); /* document buffer */ + int32 docsize = VARSIZE(t) - VARHDRSZ; + + pgxml_parser_init(); + + doctree = xmlParseMemory((char *) VARDATA(t), docsize); + if (doctree == NULL) + { + xmlCleanupParser(); + PG_RETURN_BOOL(false); /* i.e. not well-formed */ + } + xmlCleanupParser(); + xmlFreeDoc(doctree); + PG_RETURN_BOOL(true); +} + +static xmlChar +* +pgxmlNodeSetToText(xmlNodeSetPtr nodeset, + xmlDocPtr doc, + xmlChar * toptagname, + xmlChar * septagname, + int format) +{ + /* Function translates a nodeset into a text representation */ + + /* + * iterates over each node in the set and calls xmlNodeDump to write + * it to an xmlBuffer -from which an xmlChar * string is returned. + */ + /* each representation is surrounded by ... */ + /* if format==0, add a newline between nodes?? */ + + xmlBufferPtr buf; + xmlChar *result; + int i; + + buf = xmlBufferCreate(); + + if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0)) + { + xmlBufferWriteChar(buf, "<"); + xmlBufferWriteCHAR(buf, toptagname); + xmlBufferWriteChar(buf, ">"); + } + if (nodeset != NULL) + { + for (i = 0; i < nodeset->nodeNr; i++) + { + if ((septagname != NULL) && (xmlStrlen(septagname) > 0)) + { + xmlBufferWriteChar(buf, "<"); + xmlBufferWriteCHAR(buf, septagname); + xmlBufferWriteChar(buf, ">"); + } + xmlNodeDump(buf, doc, nodeset->nodeTab[i], 1, (format == 2)); + + if ((septagname != NULL) && (xmlStrlen(septagname) > 0)) + { + xmlBufferWriteChar(buf, ""); + } + if (format) + xmlBufferWriteChar(buf, "\n"); + } + } + + if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0)) + { + xmlBufferWriteChar(buf, ""); + } + result = xmlStrdup(buf->content); + xmlBufferFree(buf); + return result; +} + +static xmlChar * +pgxml_texttoxmlchar(text *textstring) +{ + xmlChar *res; + int32 txsize; + + txsize = VARSIZE(textstring) - VARHDRSZ; + res = (xmlChar *) palloc(txsize + 1); + memcpy((char *) res, VARDATA(textstring), txsize); + res[txsize] = '\0'; + return res; +} + + +PG_FUNCTION_INFO_V1(pgxml_xpath); + +Datum +pgxml_xpath(PG_FUNCTION_ARGS) +{ + xmlDocPtr doctree; + xmlXPathContextPtr ctxt; + xmlXPathObjectPtr res; + xmlChar *xpath, + *xpresstr, + *toptag, + *septag; + xmlXPathCompExprPtr comppath; + + int32 docsize, + ressize; + text *t, + *xpres; + + t = PG_GETARG_TEXT_P(0); /* document buffer */ + xpath = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(1)); /* XPath expression */ + toptag = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(2)); + septag = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(3)); + + docsize = VARSIZE(t) - VARHDRSZ; + + pgxml_parser_init(); + + doctree = xmlParseMemory((char *) VARDATA(t), docsize); + if (doctree == NULL) + { /* not well-formed */ + xmlCleanupParser(); + PG_RETURN_NULL(); + } + + ctxt = xmlXPathNewContext(doctree); + ctxt->node = xmlDocGetRootElement(doctree); + + /* compile the path */ + comppath = xmlXPathCompile(xpath); + if (comppath == NULL) + { + elog(WARNING, "XPath syntax error"); + xmlFreeDoc(doctree); + pfree((void *) xpath); + xmlCleanupParser(); + PG_RETURN_NULL(); + } + + /* Now evaluate the path expression. */ + res = xmlXPathCompiledEval(comppath, ctxt); + xmlXPathFreeCompExpr(comppath); + + if (res == NULL) + { + xmlFreeDoc(doctree); + pfree((void *) xpath); + xmlCleanupParser(); + PG_RETURN_NULL(); /* seems appropriate */ + } + /* now we dump this node, ?surrounding by tags? */ + /* To do this, we look first at the type */ + switch (res->type) + { + case XPATH_NODESET: + xpresstr = pgxmlNodeSetToText(res->nodesetval, + doctree, + toptag, septag, 0); + break; + case XPATH_STRING: + xpresstr = xmlStrdup(res->stringval); + break; + default: + elog(WARNING, "Unsupported XQuery result: %d", res->type); + xpresstr = xmlStrdup(""); + } + + + /* Now convert this result back to text */ + ressize = strlen(xpresstr); + xpres = (text *) palloc(ressize + VARHDRSZ); + memcpy(VARDATA(xpres), xpresstr, ressize); + VARATT_SIZEP(xpres) = ressize + VARHDRSZ; + + /* Free various storage */ + xmlFreeDoc(doctree); + pfree((void *) xpath); + xmlFree(xpresstr); + xmlCleanupParser(); + PG_RETURN_TEXT_P(xpres); +} diff --git a/contrib/xml/pgxml_dom.sql.in b/contrib/xml/pgxml_dom.sql.in new file mode 100644 index 0000000000..514643b936 --- /dev/null +++ b/contrib/xml/pgxml_dom.sql.in @@ -0,0 +1,10 @@ +-- SQL for XML parser + +-- Adjust this setting to control where the objects get created. +SET search_path TO public; + +CREATE OR REPLACE FUNCTION pgxml_parse(text) RETURNS boolean + AS 'MODULE_PATHNAME' LANGUAGE c STRICT; + +CREATE OR REPLACE FUNCTION pgxml_xpath(text, text, text, text) RETURNS text + AS 'MODULE_PATHNAME' LANGUAGE c STRICT; diff --git a/contrib/xml/xpath.c b/contrib/xml/xpath.c deleted file mode 100644 index b4fc828798..0000000000 --- a/contrib/xml/xpath.c +++ /dev/null @@ -1,893 +0,0 @@ -/* Parser interface for DOM-based parser (libxml) rather than - stream-based SAX-type parser */ - -#include "postgres.h" -#include "fmgr.h" -#include "executor/spi.h" -#include "funcapi.h" -#include "miscadmin.h" -#include "lib/stringinfo.h" - -/* libxml includes */ - -#include -#include -#include -#include -#include - -/* declarations */ - -static void *pgxml_palloc(size_t size); -static void *pgxml_repalloc(void *ptr, size_t size); -static void pgxml_pfree(void *ptr); -static char *pgxml_pstrdup(const char *string); -static void pgxml_errorHandler (void * ctxt, const char *msg, ...); - -void elog_error(int level, char *explain, int force); -void pgxml_parser_init(void); - -static xmlChar *pgxmlNodeSetToText(xmlNodeSetPtr nodeset, - xmlChar * toptagname, xmlChar * septagname, - xmlChar * plainsep); - -text *pgxml_result_to_text(xmlXPathObjectPtr res, xmlChar *toptag, - xmlChar *septag, xmlChar *plainsep); - -xmlChar *pgxml_texttoxmlchar(text *textstring); - -static xmlXPathObjectPtr pgxml_xpath(text *document, xmlChar* xpath); - - -Datum pgxml_parse(PG_FUNCTION_ARGS); -Datum xpath_nodeset(PG_FUNCTION_ARGS); -Datum xpath_string(PG_FUNCTION_ARGS); -Datum xpath_number(PG_FUNCTION_ARGS); -Datum xpath_bool(PG_FUNCTION_ARGS); -Datum xpath_list(PG_FUNCTION_ARGS); -Datum xpath_table(PG_FUNCTION_ARGS); - -/* Global variables */ -char *errbuf; /* per line error buffer */ -char *pgxml_errorMsg = NULL; /* overall error message */ - -/* Convenience macros */ - -#define GET_TEXT(cstrp) DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(cstrp))) -#define GET_STR(textp) DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp))) - -#define ERRBUF_SIZE 200 - -/* memory handling passthrough functions (e.g. palloc, pstrdup are - currently macros, and the others might become so...) */ - -static void * -pgxml_palloc(size_t size) -{ -/* elog(DEBUG1,"Alloc %d in CMC %x",size,CurrentMemoryContext); */ - return palloc(size); -} - -static void * -pgxml_repalloc(void *ptr, size_t size) -{ -/* elog(DEBUG1,"ReAlloc in CMC %x",CurrentMemoryContext);*/ - return repalloc(ptr, size); -} - -static void -pgxml_pfree(void *ptr) -{ -/* elog(DEBUG1,"Free in CMC %x",CurrentMemoryContext); */ - return pfree(ptr); -} - -static char * -pgxml_pstrdup(const char *string) -{ - return pstrdup(string); -} - -/* The error handling function. This formats an error message and sets - * a flag - an ereport will be issued prior to return - */ - -static void -pgxml_errorHandler (void * ctxt, const char *msg, ...) -{ - va_list args; - - va_start(args, msg); - vsnprintf(errbuf, ERRBUF_SIZE, msg, args); - va_end(args); - /* Now copy the argument across */ - if (pgxml_errorMsg == NULL) - { - pgxml_errorMsg = pstrdup(errbuf); - } -else - { - int32 xsize = strlen(pgxml_errorMsg); - pgxml_errorMsg = repalloc(pgxml_errorMsg, - (size_t) (xsize + strlen(errbuf) + 1)); - strncpy(&pgxml_errorMsg[xsize-1],errbuf,strlen(errbuf)); - pgxml_errorMsg[xsize+strlen(errbuf)-1]='\0'; - - } - memset(errbuf,0,ERRBUF_SIZE); -} - -/* This function reports the current message at the level specified */ -void elog_error(int level, char *explain, int force) -{ - if (force || (pgxml_errorMsg != NULL)) - { - if (pgxml_errorMsg == NULL) - { - ereport(level,(errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION), - errmsg(explain))); - } - else - { - ereport(level,(errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION), - errmsg("%s:%s",explain,pgxml_errorMsg))); - pfree(pgxml_errorMsg); - } - } -} - -void -pgxml_parser_init() -{ - /* - * This code could also set parser settings from user-supplied info. - * Quite how these settings are made is another matter :) - */ - - xmlMemSetup(pgxml_pfree, pgxml_palloc, pgxml_repalloc, pgxml_pstrdup); - xmlInitParser(); - - xmlSetGenericErrorFunc(NULL, pgxml_errorHandler); - - xmlSubstituteEntitiesDefault(1); - xmlLoadExtDtdDefaultValue = 1; - - pgxml_errorMsg = NULL; - - errbuf = palloc(200); - memset(errbuf,0,200); - -} - - -/* Returns true if document is well-formed */ - -PG_FUNCTION_INFO_V1(pgxml_parse); - -Datum -pgxml_parse(PG_FUNCTION_ARGS) -{ - /* called as pgxml_parse(document) */ - xmlDocPtr doctree; - text *t = PG_GETARG_TEXT_P(0); /* document buffer */ - int32 docsize = VARSIZE(t) - VARHDRSZ; - - pgxml_parser_init(); - - doctree = xmlParseMemory((char *) VARDATA(t), docsize); - if (doctree == NULL) - { - xmlCleanupParser(); - PG_RETURN_BOOL(false); /* i.e. not well-formed */ - } - xmlCleanupParser(); - xmlFreeDoc(doctree); - PG_RETURN_BOOL(true); -} - - -static xmlChar -* -pgxmlNodeSetToText(xmlNodeSetPtr nodeset, - xmlChar * toptagname, - xmlChar * septagname, - xmlChar * plainsep) -{ - /* Function translates a nodeset into a text representation */ - - /* - * iterates over each node in the set and calls xmlNodeDump to write - * it to an xmlBuffer -from which an xmlChar * string is returned. - */ - - /* each representation is surrounded by ... */ - /* plainsep is an ordinary (not tag) seperator - if used, then - * nodes are cast to string as output method */ - - - xmlBufferPtr buf; - xmlChar *result; - int i; - - buf = xmlBufferCreate(); - - if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0)) - { - xmlBufferWriteChar(buf, "<"); - xmlBufferWriteCHAR(buf, toptagname); - xmlBufferWriteChar(buf, ">"); - } - if (nodeset != NULL) - { - for (i = 0; i < nodeset->nodeNr; i++) - { - - if (plainsep != NULL) { - xmlBufferWriteCHAR(buf, - xmlXPathCastNodeToString(nodeset->nodeTab[i])); - - /* If this isn't the last entry, write the plain sep. */ - if (i < (nodeset->nodeNr)-1) { - xmlBufferWriteChar(buf, plainsep); - } - } else { - - - if ((septagname != NULL) && (xmlStrlen(septagname) > 0)) - { - xmlBufferWriteChar(buf, "<"); - xmlBufferWriteCHAR(buf, septagname); - xmlBufferWriteChar(buf, ">"); - } - xmlNodeDump(buf, - nodeset->nodeTab[i]->doc, - nodeset->nodeTab[i], - 1, 0); - - if ((septagname != NULL) && (xmlStrlen(septagname) > 0)) - { - xmlBufferWriteChar(buf, ""); - } - } - } - } - - if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0)) - { - xmlBufferWriteChar(buf, ""); - } - result = xmlStrdup(buf->content); - xmlBufferFree(buf); - return result; -} - - -/* Translate a PostgreSQL "varlena" -i.e. a variable length parameter - * into the libxml2 representation - */ - -xmlChar * -pgxml_texttoxmlchar(text *textstring) -{ - xmlChar *res; - int32 txsize; - - txsize = VARSIZE(textstring) - VARHDRSZ; - res = (xmlChar *) palloc(txsize + 1); - memcpy((char *) res, VARDATA(textstring), txsize); - res[txsize] = '\0'; - return res; -} - -/* Public visible XPath functions */ - -/* This is a "raw" xpath function. Check that it returns child elements - * properly - */ - -PG_FUNCTION_INFO_V1(xpath_nodeset); - -Datum -xpath_nodeset(PG_FUNCTION_ARGS) -{ - xmlChar *xpath, *toptag, *septag; - int32 pathsize; - text - *xpathsupp, - *xpres; - - /* PG_GETARG_TEXT_P(0) is document buffer */ - xpathsupp = PG_GETARG_TEXT_P(1); /* XPath expression */ - - toptag = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(2)); - septag = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(3)); - - pathsize = VARSIZE(xpathsupp) - VARHDRSZ; - - xpath = pgxml_texttoxmlchar(xpathsupp); - - xpres = pgxml_result_to_text( - pgxml_xpath(PG_GETARG_TEXT_P(0),xpath), - toptag,septag,NULL); - - /* xmlCleanupParser(); done by result_to_text routine */ - pfree((void *) xpath); - - if (xpres == NULL) - { - PG_RETURN_NULL(); - } - PG_RETURN_TEXT_P(xpres); -} - -// The following function is almost identical, but returns the elements in -// a list. - -PG_FUNCTION_INFO_V1(xpath_list); - -Datum -xpath_list(PG_FUNCTION_ARGS) -{ - xmlChar *xpath, *plainsep; - int32 pathsize; - text - *xpathsupp, - *xpres; - - /* PG_GETARG_TEXT_P(0) is document buffer */ - xpathsupp = PG_GETARG_TEXT_P(1); /* XPath expression */ - - plainsep = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(2)); - - pathsize = VARSIZE(xpathsupp) - VARHDRSZ; - - xpath = pgxml_texttoxmlchar(xpathsupp); - - xpres = pgxml_result_to_text( - pgxml_xpath(PG_GETARG_TEXT_P(0),xpath), - NULL,NULL,plainsep); - - /* xmlCleanupParser(); done by result_to_text routine */ - pfree((void *) xpath); - - if (xpres == NULL) - { - PG_RETURN_NULL(); - } - PG_RETURN_TEXT_P(xpres); -} - - -PG_FUNCTION_INFO_V1(xpath_string); - -Datum -xpath_string(PG_FUNCTION_ARGS) -{ - xmlChar *xpath; - int32 pathsize; - text - *xpathsupp, - *xpres; - - /* PG_GETARG_TEXT_P(0) is document buffer */ - xpathsupp = PG_GETARG_TEXT_P(1); /* XPath expression */ - - pathsize = VARSIZE(xpathsupp) - VARHDRSZ; - - /* We encapsulate the supplied path with "string()" - * = 8 chars + 1 for NUL at end */ - /* We could try casting to string using the libxml function? */ - - xpath =(xmlChar *) palloc(pathsize + 9); - memcpy((char *) (xpath+7), VARDATA(xpathsupp), pathsize); - strncpy((char *) xpath, "string(",7); - xpath[pathsize+7] = ')'; - xpath[pathsize+8] = '\0'; - - xpres = pgxml_result_to_text( - pgxml_xpath(PG_GETARG_TEXT_P(0),xpath), - NULL,NULL,NULL); - - xmlCleanupParser(); - pfree((void *) xpath); - - if (xpres == NULL) - { - PG_RETURN_NULL(); - } - PG_RETURN_TEXT_P(xpres); -} - - -PG_FUNCTION_INFO_V1(xpath_number); - -Datum -xpath_number(PG_FUNCTION_ARGS) -{ - xmlChar *xpath; - int32 pathsize; - text - *xpathsupp; - - float4 fRes; - - xmlXPathObjectPtr res; - - /* PG_GETARG_TEXT_P(0) is document buffer */ - xpathsupp = PG_GETARG_TEXT_P(1); /* XPath expression */ - - pathsize = VARSIZE(xpathsupp) - VARHDRSZ; - - xpath = pgxml_texttoxmlchar(xpathsupp); - - res = pgxml_xpath(PG_GETARG_TEXT_P(0),xpath); - pfree((void *) xpath); - - if (res == NULL) - { - xmlCleanupParser(); - PG_RETURN_NULL(); - } - - fRes = xmlXPathCastToNumber(res); - xmlCleanupParser(); - if (xmlXPathIsNaN(fRes)) - { - PG_RETURN_NULL(); - } - - PG_RETURN_FLOAT4(fRes); - -} - - -PG_FUNCTION_INFO_V1(xpath_bool); - -Datum -xpath_bool(PG_FUNCTION_ARGS) -{ - xmlChar *xpath; - int32 pathsize; - text - *xpathsupp; - - int bRes; - - xmlXPathObjectPtr res; - - /* PG_GETARG_TEXT_P(0) is document buffer */ - xpathsupp = PG_GETARG_TEXT_P(1); /* XPath expression */ - - pathsize = VARSIZE(xpathsupp) - VARHDRSZ; - - xpath = pgxml_texttoxmlchar(xpathsupp); - - res = pgxml_xpath(PG_GETARG_TEXT_P(0),xpath); - pfree((void *) xpath); - - if (res == NULL) - { - xmlCleanupParser(); - PG_RETURN_BOOL(false); - } - - bRes = xmlXPathCastToBoolean(res); - xmlCleanupParser(); - PG_RETURN_BOOL(bRes); - -} - - - -/* Core function to evaluate XPath query */ - -xmlXPathObjectPtr - pgxml_xpath(text *document, xmlChar *xpath) - { - - xmlDocPtr doctree; - xmlXPathContextPtr ctxt; - xmlXPathObjectPtr res; - - xmlXPathCompExprPtr comppath; - - int32 docsize; - - - docsize = VARSIZE(document) - VARHDRSZ; - - pgxml_parser_init(); - - doctree = xmlParseMemory((char *) VARDATA(document), docsize); - if (doctree == NULL) - { /* not well-formed */ - return NULL; - } - - ctxt = xmlXPathNewContext(doctree); - ctxt->node = xmlDocGetRootElement(doctree); - - - /* compile the path */ - comppath = xmlXPathCompile(xpath); - if (comppath == NULL) - { - xmlCleanupParser(); - xmlFreeDoc(doctree); - elog_error(ERROR,"XPath Syntax Error",1); - - return NULL; - } - - /* Now evaluate the path expression. */ - res = xmlXPathCompiledEval(comppath, ctxt); - xmlXPathFreeCompExpr(comppath); - - if (res == NULL) - { - xmlXPathFreeContext(ctxt); - // xmlCleanupParser(); - xmlFreeDoc(doctree); - - return NULL; - } - /* xmlFreeDoc(doctree); */ - return res; - } - -text -*pgxml_result_to_text(xmlXPathObjectPtr res, - xmlChar *toptag, - xmlChar *septag, - xmlChar *plainsep) -{ - xmlChar *xpresstr; - int32 ressize; - text *xpres; - - if (res == NULL) - { - return NULL; - } - switch (res->type) - { - case XPATH_NODESET: - xpresstr = pgxmlNodeSetToText(res->nodesetval, - toptag, - septag, plainsep); - break; - - case XPATH_STRING: - xpresstr = xmlStrdup(res->stringval); - break; - - default: - elog(NOTICE, "Unsupported XQuery result: %d", res->type); - xpresstr = xmlStrdup(""); - } - - - /* Now convert this result back to text */ - ressize = strlen(xpresstr); - xpres = (text *) palloc(ressize + VARHDRSZ); - memcpy(VARDATA(xpres), xpresstr, ressize); - VARATT_SIZEP(xpres) = ressize + VARHDRSZ; - - /* Free various storage */ - xmlCleanupParser(); - /* xmlFreeDoc(doctree); -- will die at end of tuple anyway */ - - xmlFree(xpresstr); - - elog_error(ERROR,"XPath error",0); - - - return xpres; -} - -/* xpath_table is a table function. It needs some tidying (as do the - * other functions here! - */ - -PG_FUNCTION_INFO_V1(xpath_table); - -Datum xpath_table(PG_FUNCTION_ARGS) -{ -/* SPI (input tuple) support */ - SPITupleTable *tuptable; - HeapTuple spi_tuple; - TupleDesc spi_tupdesc; - -/* Output tuple (tuplestore) support */ - Tuplestorestate *tupstore = NULL; - TupleDesc ret_tupdesc; - HeapTuple ret_tuple; - - ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; - AttInMetadata *attinmeta; - MemoryContext per_query_ctx; - MemoryContext oldcontext; - -/* Function parameters */ - char *pkeyfield = GET_STR(PG_GETARG_TEXT_P(0)); - char *xmlfield = GET_STR(PG_GETARG_TEXT_P(1)); - char *relname = GET_STR(PG_GETARG_TEXT_P(2)); - char *xpathset = GET_STR(PG_GETARG_TEXT_P(3)); - char *condition = GET_STR(PG_GETARG_TEXT_P(4)); - - char **values; - xmlChar **xpaths; - xmlChar *pos; - xmlChar *pathsep= "|"; - - int numpaths; - int ret; - int proc; - int i; - int j; - int rownr; /* For issuing multiple rows from one original document */ - int had_values; /* To determine end of nodeset results */ - - StringInfo querysql; - -/* We only have a valid tuple description in table function mode */ - if (rsinfo->expectedDesc == NULL) { - ereport(ERROR,(errcode(ERRCODE_SYNTAX_ERROR), - errmsg("xpath_table must be called as a table function"))); - } - -/* The tuplestore must exist in a higher context than - * this function call (per_query_ctx is used) */ - - per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; - oldcontext = MemoryContextSwitchTo(per_query_ctx); - -/* Create the tuplestore - SortMem is the max in-memory size before it is - * shipped to a disk heap file. Just like ... SortMem! - */ - - tupstore = tuplestore_begin_heap(true, false, SortMem); - - MemoryContextSwitchTo(oldcontext); - - /* get the requested return tuple description */ - ret_tupdesc = CreateTupleDescCopy(rsinfo->expectedDesc); - - /* At the moment we assume that the returned attributes make sense - * for the XPath specififed (i.e. we trust the caller). - * It's not fatal if they get it wrong - the input function for the - * column type will raise an error if the path result can't be converted - * into the correct binary representation. - */ - - attinmeta = TupleDescGetAttInMetadata(ret_tupdesc); - - /* We want to materialise because it means that we don't have to - * carry libxml2 parser state between invocations of this function - */ - - /* check to see if caller supports us returning a tuplestore */ - if (!rsinfo || !(rsinfo->allowedModes & SFRM_Materialize)) - ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("xpath_table requires Materialize mode, but it is not " - "allowed in this context"))); - - // Set return mode and allocate value space. - rsinfo->returnMode = SFRM_Materialize; - rsinfo->setDesc = ret_tupdesc; - - values = (char **) palloc(ret_tupdesc->natts * sizeof(char *)); - - xpaths = (xmlChar **) palloc(ret_tupdesc->natts * sizeof(xmlChar *)); - - /* Split XPaths. xpathset is a writable CString. */ - - /* Note that we stop splitting once we've done all needed for tupdesc */ - - numpaths=0; - pos = xpathset; - do { - xpaths[numpaths] = pos; - pos = strstr(pos,pathsep); - if (pos != NULL) { - *pos = '\0'; - pos++; - } - numpaths++; - } while ((pos != NULL) && (numpaths < (ret_tupdesc->natts - 1) )); - - /* Now build query */ - - querysql = makeStringInfo(); - - /* Build initial sql statement */ - appendStringInfo(querysql, "SELECT %s, %s FROM %s WHERE %s", - pkeyfield, - xmlfield, - relname, - condition - ); - - - if ((ret = SPI_connect()) < 0) { - elog(ERROR, "xpath_table: SPI_connect returned %d", ret); - } - - if ((ret = SPI_exec(querysql->data,0)) != SPI_OK_SELECT) { - elog(ERROR,"xpath_table: SPI execution failed for query %s",querysql->data); - } - - proc= SPI_processed; - /* elog(DEBUG1,"xpath_table: SPI returned %d rows",proc); */ - tuptable = SPI_tuptable; - spi_tupdesc = tuptable->tupdesc; - -/* Switch out of SPI context */ - MemoryContextSwitchTo(oldcontext); - - -/* Check that SPI returned correct result. If you put a comma into one of - * the function parameters, this will catch it when the SPI query returns - * e.g. 3 columns. - */ - - if (spi_tupdesc->natts != 2) { - ereport(ERROR,(errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("Expression returning multiple columns is not valid in parameter list"), - errdetail("Expected two columns in SPI result, got %d",spi_tupdesc->natts))); - } - -/* Setup the parser. Beware that this must happen in the same context as the - * cleanup - which means that any error from here on must do cleanup to - * ensure that the entity table doesn't get freed by being out of context. - */ - pgxml_parser_init(); - - /* For each row i.e. document returned from SPI */ - for (i=0; i < proc; i++) { - char *pkey; - char *xmldoc; - - xmlDocPtr doctree; - xmlXPathContextPtr ctxt; - xmlXPathObjectPtr res; - xmlChar *resstr; - - - xmlXPathCompExprPtr comppath; - - /* Extract the row data as C Strings */ - - spi_tuple = tuptable->vals[i]; - pkey = SPI_getvalue(spi_tuple, spi_tupdesc,1); - xmldoc = SPI_getvalue(spi_tuple, spi_tupdesc,2); - - - /* Clear the values array, so that not-well-formed documents - * return NULL in all columns. - */ - - /* Note that this also means that spare columns will be NULL. */ - for (j=0; j < ret_tupdesc->natts; j++) { - values[j]= NULL; - } - - /* Insert primary key */ - values[0]=pkey; - - /* Parse the document */ - doctree = xmlParseMemory(xmldoc, strlen(xmldoc)); - - if (doctree == NULL) - { /* not well-formed, so output all-NULL tuple */ - - ret_tuple = BuildTupleFromCStrings(attinmeta, values); - oldcontext = MemoryContextSwitchTo(per_query_ctx); - tuplestore_puttuple(tupstore, ret_tuple); - MemoryContextSwitchTo(oldcontext); - heap_freetuple(ret_tuple); - } - else - { - /* New loop here - we have to deal with nodeset results */ - rownr=0; - - do { - /* Now evaluate the set of xpaths. */ - had_values=0; - for (j=0; j < numpaths; j++) { - - ctxt = xmlXPathNewContext(doctree); - ctxt->node = xmlDocGetRootElement(doctree); - xmlSetGenericErrorFunc(ctxt, pgxml_errorHandler); - - /* compile the path */ - comppath = xmlXPathCompile(xpaths[j]); - if (comppath == NULL) - { - xmlCleanupParser(); - xmlFreeDoc(doctree); - - elog_error(ERROR,"XPath Syntax Error",1); - - PG_RETURN_NULL(); /* Keep compiler happy */ - } - - /* Now evaluate the path expression. */ - res = xmlXPathCompiledEval(comppath, ctxt); - xmlXPathFreeCompExpr(comppath); - - if (res != NULL) - { - switch (res->type) - { - case XPATH_NODESET: - /* We see if this nodeset has enough nodes */ - if ((res->nodesetval != NULL) && (rownr < res->nodesetval->nodeNr)) { - resstr = - xmlXPathCastNodeToString(res->nodesetval->nodeTab[rownr]); - had_values=1; - } else { - resstr = NULL; - } - - break; - - case XPATH_STRING: - resstr = xmlStrdup(res->stringval); - break; - - default: - elog(NOTICE, "Unsupported XQuery result: %d", res->type); - resstr = xmlStrdup(""); - } - - - // Insert this into the appropriate column in the result tuple. - values[j+1] = resstr; - } - xmlXPathFreeContext(ctxt); - } - // Now add the tuple to the output, if there is one. - if (had_values) { - ret_tuple = BuildTupleFromCStrings(attinmeta, values); - oldcontext = MemoryContextSwitchTo(per_query_ctx); - tuplestore_puttuple(tupstore, ret_tuple); - MemoryContextSwitchTo(oldcontext); - heap_freetuple(ret_tuple); - } - - rownr++; - - } while (had_values); - - } - - xmlFreeDoc(doctree); - - pfree(pkey); - pfree(xmldoc); - } - - xmlCleanupParser(); -/* Needed to flag completeness in 7.3.1. 7.4 defines it as a no-op. */ - tuplestore_donestoring(tupstore); - - SPI_finish(); - - rsinfo->setResult=tupstore; - - /* - * SFRM_Materialize mode expects us to return a NULL Datum. The actual - * tuples are in our tuplestore and passed back through - * rsinfo->setResult. rsinfo->setDesc is set to the tuple description - * that we actually used to build our tuples with, so the caller can - * verify we did what it was expecting. - */ - return (Datum) 0; - -} diff --git a/contrib/xml/xslt_proc.c b/contrib/xml/xslt_proc.c deleted file mode 100644 index 64f9736622..0000000000 --- a/contrib/xml/xslt_proc.c +++ /dev/null @@ -1,184 +0,0 @@ -/* XSLT processing functions (requiring libxslt) */ -/* John Gray, for Torchbox 2003-04-01 */ - -#include "postgres.h" -#include "fmgr.h" -#include "executor/spi.h" -#include "funcapi.h" -#include "miscadmin.h" - -/* libxml includes */ - -#include -#include -#include - -/* libxslt includes */ - -#include -#include -#include -#include - - -/* declarations to come from xpath.c */ - -extern void elog_error(int level, char *explain, int force); -extern void pgxml_parser_init(); -extern xmlChar *pgxml_texttoxmlchar(text *textstring); - -#define GET_STR(textp) DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp))) - -/* local defs */ -static void parse_params(const char **params, text *paramstr); - -Datum xslt_process(PG_FUNCTION_ARGS); - - -#define MAXPARAMS 20 - -PG_FUNCTION_INFO_V1(xslt_process); - -Datum xslt_process(PG_FUNCTION_ARGS) { - - - const char *params[MAXPARAMS + 1]; /* +1 for the terminator */ - xsltStylesheetPtr stylesheet = NULL; - xmlDocPtr doctree; - xmlDocPtr restree; - xmlDocPtr ssdoc = NULL; - xmlChar *resstr; - int resstat; - int reslen; - - text *doct = PG_GETARG_TEXT_P(0); - text *ssheet = PG_GETARG_TEXT_P(1); - text *paramstr; - text *tres; - - - if (fcinfo->nargs == 3) - { - paramstr = PG_GETARG_TEXT_P(2); - parse_params(params,paramstr); - } - else /* No parameters */ - { - params[0] = NULL; - } - - /* Setup parser */ - pgxml_parser_init(); - - /* Check to see if document is a file or a literal */ - - if (VARDATA(doct)[0] == '<') - { - doctree = xmlParseMemory((char *) VARDATA(doct), VARSIZE(doct)-VARHDRSZ); - } - else - { - doctree = xmlParseFile(GET_STR(doct)); - } - - if (doctree == NULL) - { - xmlCleanupParser(); - elog_error(ERROR,"Error parsing XML document",0); - - PG_RETURN_NULL(); - } - - /* Same for stylesheet */ - if (VARDATA(ssheet)[0] == '<') - { - ssdoc = xmlParseMemory((char *) VARDATA(ssheet), - VARSIZE(ssheet)-VARHDRSZ); - if (ssdoc == NULL) - { - xmlFreeDoc(doctree); - xmlCleanupParser(); - elog_error(ERROR,"Error parsing stylesheet as XML document",0); - PG_RETURN_NULL(); - } - - stylesheet = xsltParseStylesheetDoc(ssdoc); - } - else - { - stylesheet = xsltParseStylesheetFile(GET_STR(ssheet)); - } - - - if (stylesheet == NULL) - { - xmlFreeDoc(doctree); - xsltCleanupGlobals(); - xmlCleanupParser(); - elog_error(ERROR,"Failed to parse stylesheet",0); - PG_RETURN_NULL(); - } - - restree = xsltApplyStylesheet(stylesheet, doctree, params); - resstat = xsltSaveResultToString(&resstr, &reslen, restree, stylesheet); - - xsltFreeStylesheet(stylesheet); - xmlFreeDoc(restree); - xmlFreeDoc(doctree); - - xsltCleanupGlobals(); - xmlCleanupParser(); - - if (resstat < 0) { - PG_RETURN_NULL(); - } - - tres = palloc(reslen + VARHDRSZ); - memcpy(VARDATA(tres),resstr,reslen); - VARATT_SIZEP(tres) = reslen + VARHDRSZ; - - PG_RETURN_TEXT_P(tres); -} - - -void parse_params(const char **params, text *paramstr) -{ - char *pos; - char *pstr; - - int i; - char *nvsep="="; - char *itsep=","; - - pstr = GET_STR(paramstr); - - pos=pstr; - - for (i=0; i < MAXPARAMS; i++) - { - params[i] = pos; - pos = strstr(pos,nvsep); - if (pos != NULL) { - *pos = '\0'; - pos++; - } else { - params[i]=NULL; - break; - } - /* Value */ - i++; - params[i]=pos; - pos = strstr(pos,itsep); - if (pos != NULL) { - *pos = '\0'; - pos++; - } else { - break; - } - - } - if (i < MAXPARAMS) - { - params[i+1]=NULL; - } -} diff --git a/contrib/xml2/Makefile b/contrib/xml2/Makefile new file mode 100644 index 0000000000..9177ca865c --- /dev/null +++ b/contrib/xml2/Makefile @@ -0,0 +1,18 @@ +# This makefile will build the new XML and XSLT routines. +subdir = contrib/xml +top_builddir = ../../ +include $(top_builddir)/src/Makefile.global + +MODULE_big = pgxml + +# Remove xslt_proc.o from the following line if you don't have libxslt +OBJS = xpath.o xslt_proc.o + +# Remove -lxslt from the following line if you don't have libxslt. +SHLIB_LINK = -lxml2 -lxslt + +DATA_built = pgxml.sql +DOCS = README.pgxml + +include $(top_builddir)contrib/contrib-global.mk + diff --git a/contrib/xml2/README.pgxml b/contrib/xml2/README.pgxml new file mode 100644 index 0000000000..f29d071722 --- /dev/null +++ b/contrib/xml2/README.pgxml @@ -0,0 +1,179 @@ +XML-handling functions for PostgreSQL +===================================== + +Development of this module was sponsored by Torchbox Ltd. (www.torchbox.com) + +This version of the XML functions provides both XPath querying and +XSLT functionality. There is also a new table function which allows +the straightforward return of multiple XML results. Note that the current code +doesn't take any particular care over character sets - this is +something that should be fixed at some point! + +Installation +------------ + +The current build process will only work if the files are in +contrib/xml in a PostgreSQL 7.3 or 7.4 source tree which has been +configured and built (If you alter the subdir value in the Makefile +you can place it in a different directory in a PostgreSQL tree). + +Before you begin, just check the Makefile, and then just 'make' and +'make install'. + +This code requires libxml to be previously installed. + +Description of functions +------------------------ + +The first set of functions are straightforward XML parsing and XPath queries: + +pgxml_parse(document) RETURNS bool + +This parses the document text in its parameter and returns true if the +document is well-formed XML. + +xpath_string(document,query) RETURNS text +xpath_number(document,query) RETURNS float4 +xpath_bool(document,query) RETURNS bool + +These functions evaluate the XPath query on the supplied document, and +cast the result to the specified type. + + +xpath_nodeset(document,query,toptag,itemtag) RETURNS text + +This evaluates query on document and wraps the result in XML tags. If +the result is multivalued, the output will look like: + + +Value 1 which could be an XML fragment +Value 2.... + + +If either toptag or itemtag is an empty string, the relevant tag is omitted. +There are also wrapper functions for this operation: + +xpath_nodeset(document,query) RETURNS text omits both tags. +xpath_nodeset(document,query,itemtag) RETURNS text omits toptag. + + +xpath_list(document,query,seperator) RETURNS text + +This function returns multiple values seperated by the specified +seperator, e.g. Value 1,Value 2,Value 3 if seperator=','. + +xpath_list(document,query) RETURNS text + +This is a wrapper for the above function that uses ',' as the seperator. + + +xpath_table +----------- + +This is a table function which evaluates a set of XPath queries on +each of a set of documents and returns the results as a table. The +primary key field from the original document table is returned as the +first column of the result so that the resultset from xpath_table can +be readily used in joins. + +The function itself takes 5 arguments, all text. + +xpath_table(key,document,relation,xpaths,criteria) + +key - the name of the "key" field - this is just a field to be used as +the first column of the output table i.e. it identifies the record from +which each output row came. + +document - the name of the field containing the XML document + +relation - the name of the table or view containing the documents + +xpaths - multiple xpath expressions separated by | + +criteria - The contents of the where clause. This needs to be specified, +so use "true" or "1=1" here if you want to process all the rows in the +relation. + +NB These parameters (except the XPath strings) are just substituted +into a plain SQL SELECT statement, so you have some flexibility - the +statement is + +SELECT , FROM WHERE + +so those parameters can be *anything* valid in those particular +locations. The result from this SELECT needs to return exactly two +columns (which it will unless you try to list multiple fields for key +or document). Beware that this simplistic approach requires that you +validate any user-supplied values to avoid SQL injection attacks. + +Using the function + +The function has to be used in a FROM expression. This gives the following +form: + +SELECT * FROM +xpath_table('article_id', + 'article_xml', + 'articles', + '/article/author|/article/pages|/article/title', + 'date_entered > ''2003-01-01'' ') +AS t(article_id integer, author text, page_count integer, title text); + +The AS clause defines the names and types of the columns in the +virtual table. If there are more XPath queries than result columns, +the extra queries will be ignored. If there are more result columns +than XPath queries, the extra columns will be NULL. + +Note that I've said in this example that pages is an integer. The +function deals internally with string representations, so when you say +you want an integer in the output, it will take the string +representation of the XPath result and use PostgreSQL input functions +to transform it into an integer (or whatever type the AS clause +requests). An error will result if it can't do this - for example if +the result is empty - so you may wish to just stick to 'text' as the +column type if you think your data has any problems. + +The select statement doesn't need to use * alone - it can reference the +columns by name or join them to other tables. The function produces a +virtual table with which you can perform any operation you wish (e.g. +aggregation, joining, sorting etc). So we could also have: + +SELECT t.title, p.fullname, p.email +FROM xpath_table('article_id','article_xml','articles', + '/article/title|/article/author/@id', + 'xpath_string(article_xml,''/article/@date'') > ''2003-03-20'' ') + AS t(article_id integer, title text, author_id integer), + tblPeopleInfo AS p +WHERE t.author_id = p.person_id; + +as a more complicated example. Of course, you could wrap all +of this in a view for convenience. + +XSLT functions +-------------- + +The following functions are available if libxslt is installed (this is +not currently detected automatically, so you will have to amend the +Makefile) + +xslt_process(document,stylesheet,paramlist) RETURNS text + +This function appplies the XSL stylesheet to the document and returns +the transformed result. The paramlist is a list of parameter +assignments to be used in the transformation, specified in the form +'a=1,b=2'. Note that this is also proof-of-concept code and the +parameter parsing is very simple-minded (e.g. parameter values cannot +contain commas!) + +Also note that if either the document or stylesheet values do not +begin with a < then they will be treated as URLs and libxslt will +fetch them. It thus follows that you can use xslt_process as a means +to fetch the contents of URLs - you should be aware of the security +implications of this. + +There is also a two-parameter version of xslt_process which does not +pass any parameters to the transformation. + +If you have any comments or suggestions, please do contact me at +jgray@azuli.co.uk. Unfortunately, this isn't my main job, so I can't +guarantee a rapid response to your query! diff --git a/contrib/xml2/pgxml.sql.in b/contrib/xml2/pgxml.sql.in new file mode 100644 index 0000000000..ff46e845b1 --- /dev/null +++ b/contrib/xml2/pgxml.sql.in @@ -0,0 +1,57 @@ +--SQL for XML parser + +CREATE OR REPLACE FUNCTION pgxml_parse(text) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict); + +CREATE OR REPLACE FUNCTION xpath_string(text,text) RETURNS text + AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict); + +CREATE OR REPLACE FUNCTION xpath_nodeset(text,text,text,text) RETURNS text + AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict); + +CREATE OR REPLACE FUNCTION xpath_number(text,text) RETURNS float4 + AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict); + +CREATE OR REPLACE FUNCTION xpath_bool(text,text) RETURNS boolean + AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict); + +-- List function + +CREATE OR REPLACE FUNCTION xpath_list(text,text,text) RETURNS text + AS 'MODULE_PATHNAME' + LANGUAGE 'c' WITH (isStrict); + + +CREATE OR REPLACE FUNCTION xpath_list(text,text) RETURNS text +AS 'SELECT xpath_list($1,$2,'','')' language 'SQL' WITH (isStrict); + + + +-- Wrapper functions for nodeset where no tags needed. + + +CREATE OR REPLACE FUNCTION xpath_nodeset(text,text) RETURNS text AS +'SELECT xpath_nodeset($1,$2,'''','''')' language 'SQL' WITH (isStrict); + + +CREATE OR REPLACE FUNCTION xpath_nodeset(text,text,text) RETURNS text AS +'SELECT xpath_nodeset($1,$2,'''',$3)' language 'SQL' WITH (isStrict); + +-- Table function + +CREATE OR REPLACE FUNCTION xpath_table(text,text,text,text,text) RETURNS setof record + AS 'MODULE_PATHNAME' + LANGUAGE 'c' WITH (isStrict); + +-- XSLT functions +-- Delete from here to the end of the file if you are not compiling with +-- XSLT support. + + +CREATE OR REPLACE FUNCTION xslt_process(text,text,text) RETURNS text + AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict); + +-- the function checks for the correct argument count + +CREATE OR REPLACE FUNCTION xslt_process(text,text) RETURNS text + AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict); diff --git a/contrib/xml2/xpath.c b/contrib/xml2/xpath.c new file mode 100644 index 0000000000..b4fc828798 --- /dev/null +++ b/contrib/xml2/xpath.c @@ -0,0 +1,893 @@ +/* Parser interface for DOM-based parser (libxml) rather than + stream-based SAX-type parser */ + +#include "postgres.h" +#include "fmgr.h" +#include "executor/spi.h" +#include "funcapi.h" +#include "miscadmin.h" +#include "lib/stringinfo.h" + +/* libxml includes */ + +#include +#include +#include +#include +#include + +/* declarations */ + +static void *pgxml_palloc(size_t size); +static void *pgxml_repalloc(void *ptr, size_t size); +static void pgxml_pfree(void *ptr); +static char *pgxml_pstrdup(const char *string); +static void pgxml_errorHandler (void * ctxt, const char *msg, ...); + +void elog_error(int level, char *explain, int force); +void pgxml_parser_init(void); + +static xmlChar *pgxmlNodeSetToText(xmlNodeSetPtr nodeset, + xmlChar * toptagname, xmlChar * septagname, + xmlChar * plainsep); + +text *pgxml_result_to_text(xmlXPathObjectPtr res, xmlChar *toptag, + xmlChar *septag, xmlChar *plainsep); + +xmlChar *pgxml_texttoxmlchar(text *textstring); + +static xmlXPathObjectPtr pgxml_xpath(text *document, xmlChar* xpath); + + +Datum pgxml_parse(PG_FUNCTION_ARGS); +Datum xpath_nodeset(PG_FUNCTION_ARGS); +Datum xpath_string(PG_FUNCTION_ARGS); +Datum xpath_number(PG_FUNCTION_ARGS); +Datum xpath_bool(PG_FUNCTION_ARGS); +Datum xpath_list(PG_FUNCTION_ARGS); +Datum xpath_table(PG_FUNCTION_ARGS); + +/* Global variables */ +char *errbuf; /* per line error buffer */ +char *pgxml_errorMsg = NULL; /* overall error message */ + +/* Convenience macros */ + +#define GET_TEXT(cstrp) DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(cstrp))) +#define GET_STR(textp) DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp))) + +#define ERRBUF_SIZE 200 + +/* memory handling passthrough functions (e.g. palloc, pstrdup are + currently macros, and the others might become so...) */ + +static void * +pgxml_palloc(size_t size) +{ +/* elog(DEBUG1,"Alloc %d in CMC %x",size,CurrentMemoryContext); */ + return palloc(size); +} + +static void * +pgxml_repalloc(void *ptr, size_t size) +{ +/* elog(DEBUG1,"ReAlloc in CMC %x",CurrentMemoryContext);*/ + return repalloc(ptr, size); +} + +static void +pgxml_pfree(void *ptr) +{ +/* elog(DEBUG1,"Free in CMC %x",CurrentMemoryContext); */ + return pfree(ptr); +} + +static char * +pgxml_pstrdup(const char *string) +{ + return pstrdup(string); +} + +/* The error handling function. This formats an error message and sets + * a flag - an ereport will be issued prior to return + */ + +static void +pgxml_errorHandler (void * ctxt, const char *msg, ...) +{ + va_list args; + + va_start(args, msg); + vsnprintf(errbuf, ERRBUF_SIZE, msg, args); + va_end(args); + /* Now copy the argument across */ + if (pgxml_errorMsg == NULL) + { + pgxml_errorMsg = pstrdup(errbuf); + } +else + { + int32 xsize = strlen(pgxml_errorMsg); + pgxml_errorMsg = repalloc(pgxml_errorMsg, + (size_t) (xsize + strlen(errbuf) + 1)); + strncpy(&pgxml_errorMsg[xsize-1],errbuf,strlen(errbuf)); + pgxml_errorMsg[xsize+strlen(errbuf)-1]='\0'; + + } + memset(errbuf,0,ERRBUF_SIZE); +} + +/* This function reports the current message at the level specified */ +void elog_error(int level, char *explain, int force) +{ + if (force || (pgxml_errorMsg != NULL)) + { + if (pgxml_errorMsg == NULL) + { + ereport(level,(errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION), + errmsg(explain))); + } + else + { + ereport(level,(errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION), + errmsg("%s:%s",explain,pgxml_errorMsg))); + pfree(pgxml_errorMsg); + } + } +} + +void +pgxml_parser_init() +{ + /* + * This code could also set parser settings from user-supplied info. + * Quite how these settings are made is another matter :) + */ + + xmlMemSetup(pgxml_pfree, pgxml_palloc, pgxml_repalloc, pgxml_pstrdup); + xmlInitParser(); + + xmlSetGenericErrorFunc(NULL, pgxml_errorHandler); + + xmlSubstituteEntitiesDefault(1); + xmlLoadExtDtdDefaultValue = 1; + + pgxml_errorMsg = NULL; + + errbuf = palloc(200); + memset(errbuf,0,200); + +} + + +/* Returns true if document is well-formed */ + +PG_FUNCTION_INFO_V1(pgxml_parse); + +Datum +pgxml_parse(PG_FUNCTION_ARGS) +{ + /* called as pgxml_parse(document) */ + xmlDocPtr doctree; + text *t = PG_GETARG_TEXT_P(0); /* document buffer */ + int32 docsize = VARSIZE(t) - VARHDRSZ; + + pgxml_parser_init(); + + doctree = xmlParseMemory((char *) VARDATA(t), docsize); + if (doctree == NULL) + { + xmlCleanupParser(); + PG_RETURN_BOOL(false); /* i.e. not well-formed */ + } + xmlCleanupParser(); + xmlFreeDoc(doctree); + PG_RETURN_BOOL(true); +} + + +static xmlChar +* +pgxmlNodeSetToText(xmlNodeSetPtr nodeset, + xmlChar * toptagname, + xmlChar * septagname, + xmlChar * plainsep) +{ + /* Function translates a nodeset into a text representation */ + + /* + * iterates over each node in the set and calls xmlNodeDump to write + * it to an xmlBuffer -from which an xmlChar * string is returned. + */ + + /* each representation is surrounded by ... */ + /* plainsep is an ordinary (not tag) seperator - if used, then + * nodes are cast to string as output method */ + + + xmlBufferPtr buf; + xmlChar *result; + int i; + + buf = xmlBufferCreate(); + + if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0)) + { + xmlBufferWriteChar(buf, "<"); + xmlBufferWriteCHAR(buf, toptagname); + xmlBufferWriteChar(buf, ">"); + } + if (nodeset != NULL) + { + for (i = 0; i < nodeset->nodeNr; i++) + { + + if (plainsep != NULL) { + xmlBufferWriteCHAR(buf, + xmlXPathCastNodeToString(nodeset->nodeTab[i])); + + /* If this isn't the last entry, write the plain sep. */ + if (i < (nodeset->nodeNr)-1) { + xmlBufferWriteChar(buf, plainsep); + } + } else { + + + if ((septagname != NULL) && (xmlStrlen(septagname) > 0)) + { + xmlBufferWriteChar(buf, "<"); + xmlBufferWriteCHAR(buf, septagname); + xmlBufferWriteChar(buf, ">"); + } + xmlNodeDump(buf, + nodeset->nodeTab[i]->doc, + nodeset->nodeTab[i], + 1, 0); + + if ((septagname != NULL) && (xmlStrlen(septagname) > 0)) + { + xmlBufferWriteChar(buf, ""); + } + } + } + } + + if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0)) + { + xmlBufferWriteChar(buf, ""); + } + result = xmlStrdup(buf->content); + xmlBufferFree(buf); + return result; +} + + +/* Translate a PostgreSQL "varlena" -i.e. a variable length parameter + * into the libxml2 representation + */ + +xmlChar * +pgxml_texttoxmlchar(text *textstring) +{ + xmlChar *res; + int32 txsize; + + txsize = VARSIZE(textstring) - VARHDRSZ; + res = (xmlChar *) palloc(txsize + 1); + memcpy((char *) res, VARDATA(textstring), txsize); + res[txsize] = '\0'; + return res; +} + +/* Public visible XPath functions */ + +/* This is a "raw" xpath function. Check that it returns child elements + * properly + */ + +PG_FUNCTION_INFO_V1(xpath_nodeset); + +Datum +xpath_nodeset(PG_FUNCTION_ARGS) +{ + xmlChar *xpath, *toptag, *septag; + int32 pathsize; + text + *xpathsupp, + *xpres; + + /* PG_GETARG_TEXT_P(0) is document buffer */ + xpathsupp = PG_GETARG_TEXT_P(1); /* XPath expression */ + + toptag = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(2)); + septag = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(3)); + + pathsize = VARSIZE(xpathsupp) - VARHDRSZ; + + xpath = pgxml_texttoxmlchar(xpathsupp); + + xpres = pgxml_result_to_text( + pgxml_xpath(PG_GETARG_TEXT_P(0),xpath), + toptag,septag,NULL); + + /* xmlCleanupParser(); done by result_to_text routine */ + pfree((void *) xpath); + + if (xpres == NULL) + { + PG_RETURN_NULL(); + } + PG_RETURN_TEXT_P(xpres); +} + +// The following function is almost identical, but returns the elements in +// a list. + +PG_FUNCTION_INFO_V1(xpath_list); + +Datum +xpath_list(PG_FUNCTION_ARGS) +{ + xmlChar *xpath, *plainsep; + int32 pathsize; + text + *xpathsupp, + *xpres; + + /* PG_GETARG_TEXT_P(0) is document buffer */ + xpathsupp = PG_GETARG_TEXT_P(1); /* XPath expression */ + + plainsep = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(2)); + + pathsize = VARSIZE(xpathsupp) - VARHDRSZ; + + xpath = pgxml_texttoxmlchar(xpathsupp); + + xpres = pgxml_result_to_text( + pgxml_xpath(PG_GETARG_TEXT_P(0),xpath), + NULL,NULL,plainsep); + + /* xmlCleanupParser(); done by result_to_text routine */ + pfree((void *) xpath); + + if (xpres == NULL) + { + PG_RETURN_NULL(); + } + PG_RETURN_TEXT_P(xpres); +} + + +PG_FUNCTION_INFO_V1(xpath_string); + +Datum +xpath_string(PG_FUNCTION_ARGS) +{ + xmlChar *xpath; + int32 pathsize; + text + *xpathsupp, + *xpres; + + /* PG_GETARG_TEXT_P(0) is document buffer */ + xpathsupp = PG_GETARG_TEXT_P(1); /* XPath expression */ + + pathsize = VARSIZE(xpathsupp) - VARHDRSZ; + + /* We encapsulate the supplied path with "string()" + * = 8 chars + 1 for NUL at end */ + /* We could try casting to string using the libxml function? */ + + xpath =(xmlChar *) palloc(pathsize + 9); + memcpy((char *) (xpath+7), VARDATA(xpathsupp), pathsize); + strncpy((char *) xpath, "string(",7); + xpath[pathsize+7] = ')'; + xpath[pathsize+8] = '\0'; + + xpres = pgxml_result_to_text( + pgxml_xpath(PG_GETARG_TEXT_P(0),xpath), + NULL,NULL,NULL); + + xmlCleanupParser(); + pfree((void *) xpath); + + if (xpres == NULL) + { + PG_RETURN_NULL(); + } + PG_RETURN_TEXT_P(xpres); +} + + +PG_FUNCTION_INFO_V1(xpath_number); + +Datum +xpath_number(PG_FUNCTION_ARGS) +{ + xmlChar *xpath; + int32 pathsize; + text + *xpathsupp; + + float4 fRes; + + xmlXPathObjectPtr res; + + /* PG_GETARG_TEXT_P(0) is document buffer */ + xpathsupp = PG_GETARG_TEXT_P(1); /* XPath expression */ + + pathsize = VARSIZE(xpathsupp) - VARHDRSZ; + + xpath = pgxml_texttoxmlchar(xpathsupp); + + res = pgxml_xpath(PG_GETARG_TEXT_P(0),xpath); + pfree((void *) xpath); + + if (res == NULL) + { + xmlCleanupParser(); + PG_RETURN_NULL(); + } + + fRes = xmlXPathCastToNumber(res); + xmlCleanupParser(); + if (xmlXPathIsNaN(fRes)) + { + PG_RETURN_NULL(); + } + + PG_RETURN_FLOAT4(fRes); + +} + + +PG_FUNCTION_INFO_V1(xpath_bool); + +Datum +xpath_bool(PG_FUNCTION_ARGS) +{ + xmlChar *xpath; + int32 pathsize; + text + *xpathsupp; + + int bRes; + + xmlXPathObjectPtr res; + + /* PG_GETARG_TEXT_P(0) is document buffer */ + xpathsupp = PG_GETARG_TEXT_P(1); /* XPath expression */ + + pathsize = VARSIZE(xpathsupp) - VARHDRSZ; + + xpath = pgxml_texttoxmlchar(xpathsupp); + + res = pgxml_xpath(PG_GETARG_TEXT_P(0),xpath); + pfree((void *) xpath); + + if (res == NULL) + { + xmlCleanupParser(); + PG_RETURN_BOOL(false); + } + + bRes = xmlXPathCastToBoolean(res); + xmlCleanupParser(); + PG_RETURN_BOOL(bRes); + +} + + + +/* Core function to evaluate XPath query */ + +xmlXPathObjectPtr + pgxml_xpath(text *document, xmlChar *xpath) + { + + xmlDocPtr doctree; + xmlXPathContextPtr ctxt; + xmlXPathObjectPtr res; + + xmlXPathCompExprPtr comppath; + + int32 docsize; + + + docsize = VARSIZE(document) - VARHDRSZ; + + pgxml_parser_init(); + + doctree = xmlParseMemory((char *) VARDATA(document), docsize); + if (doctree == NULL) + { /* not well-formed */ + return NULL; + } + + ctxt = xmlXPathNewContext(doctree); + ctxt->node = xmlDocGetRootElement(doctree); + + + /* compile the path */ + comppath = xmlXPathCompile(xpath); + if (comppath == NULL) + { + xmlCleanupParser(); + xmlFreeDoc(doctree); + elog_error(ERROR,"XPath Syntax Error",1); + + return NULL; + } + + /* Now evaluate the path expression. */ + res = xmlXPathCompiledEval(comppath, ctxt); + xmlXPathFreeCompExpr(comppath); + + if (res == NULL) + { + xmlXPathFreeContext(ctxt); + // xmlCleanupParser(); + xmlFreeDoc(doctree); + + return NULL; + } + /* xmlFreeDoc(doctree); */ + return res; + } + +text +*pgxml_result_to_text(xmlXPathObjectPtr res, + xmlChar *toptag, + xmlChar *septag, + xmlChar *plainsep) +{ + xmlChar *xpresstr; + int32 ressize; + text *xpres; + + if (res == NULL) + { + return NULL; + } + switch (res->type) + { + case XPATH_NODESET: + xpresstr = pgxmlNodeSetToText(res->nodesetval, + toptag, + septag, plainsep); + break; + + case XPATH_STRING: + xpresstr = xmlStrdup(res->stringval); + break; + + default: + elog(NOTICE, "Unsupported XQuery result: %d", res->type); + xpresstr = xmlStrdup(""); + } + + + /* Now convert this result back to text */ + ressize = strlen(xpresstr); + xpres = (text *) palloc(ressize + VARHDRSZ); + memcpy(VARDATA(xpres), xpresstr, ressize); + VARATT_SIZEP(xpres) = ressize + VARHDRSZ; + + /* Free various storage */ + xmlCleanupParser(); + /* xmlFreeDoc(doctree); -- will die at end of tuple anyway */ + + xmlFree(xpresstr); + + elog_error(ERROR,"XPath error",0); + + + return xpres; +} + +/* xpath_table is a table function. It needs some tidying (as do the + * other functions here! + */ + +PG_FUNCTION_INFO_V1(xpath_table); + +Datum xpath_table(PG_FUNCTION_ARGS) +{ +/* SPI (input tuple) support */ + SPITupleTable *tuptable; + HeapTuple spi_tuple; + TupleDesc spi_tupdesc; + +/* Output tuple (tuplestore) support */ + Tuplestorestate *tupstore = NULL; + TupleDesc ret_tupdesc; + HeapTuple ret_tuple; + + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + AttInMetadata *attinmeta; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + +/* Function parameters */ + char *pkeyfield = GET_STR(PG_GETARG_TEXT_P(0)); + char *xmlfield = GET_STR(PG_GETARG_TEXT_P(1)); + char *relname = GET_STR(PG_GETARG_TEXT_P(2)); + char *xpathset = GET_STR(PG_GETARG_TEXT_P(3)); + char *condition = GET_STR(PG_GETARG_TEXT_P(4)); + + char **values; + xmlChar **xpaths; + xmlChar *pos; + xmlChar *pathsep= "|"; + + int numpaths; + int ret; + int proc; + int i; + int j; + int rownr; /* For issuing multiple rows from one original document */ + int had_values; /* To determine end of nodeset results */ + + StringInfo querysql; + +/* We only have a valid tuple description in table function mode */ + if (rsinfo->expectedDesc == NULL) { + ereport(ERROR,(errcode(ERRCODE_SYNTAX_ERROR), + errmsg("xpath_table must be called as a table function"))); + } + +/* The tuplestore must exist in a higher context than + * this function call (per_query_ctx is used) */ + + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + +/* Create the tuplestore - SortMem is the max in-memory size before it is + * shipped to a disk heap file. Just like ... SortMem! + */ + + tupstore = tuplestore_begin_heap(true, false, SortMem); + + MemoryContextSwitchTo(oldcontext); + + /* get the requested return tuple description */ + ret_tupdesc = CreateTupleDescCopy(rsinfo->expectedDesc); + + /* At the moment we assume that the returned attributes make sense + * for the XPath specififed (i.e. we trust the caller). + * It's not fatal if they get it wrong - the input function for the + * column type will raise an error if the path result can't be converted + * into the correct binary representation. + */ + + attinmeta = TupleDescGetAttInMetadata(ret_tupdesc); + + /* We want to materialise because it means that we don't have to + * carry libxml2 parser state between invocations of this function + */ + + /* check to see if caller supports us returning a tuplestore */ + if (!rsinfo || !(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("xpath_table requires Materialize mode, but it is not " + "allowed in this context"))); + + // Set return mode and allocate value space. + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setDesc = ret_tupdesc; + + values = (char **) palloc(ret_tupdesc->natts * sizeof(char *)); + + xpaths = (xmlChar **) palloc(ret_tupdesc->natts * sizeof(xmlChar *)); + + /* Split XPaths. xpathset is a writable CString. */ + + /* Note that we stop splitting once we've done all needed for tupdesc */ + + numpaths=0; + pos = xpathset; + do { + xpaths[numpaths] = pos; + pos = strstr(pos,pathsep); + if (pos != NULL) { + *pos = '\0'; + pos++; + } + numpaths++; + } while ((pos != NULL) && (numpaths < (ret_tupdesc->natts - 1) )); + + /* Now build query */ + + querysql = makeStringInfo(); + + /* Build initial sql statement */ + appendStringInfo(querysql, "SELECT %s, %s FROM %s WHERE %s", + pkeyfield, + xmlfield, + relname, + condition + ); + + + if ((ret = SPI_connect()) < 0) { + elog(ERROR, "xpath_table: SPI_connect returned %d", ret); + } + + if ((ret = SPI_exec(querysql->data,0)) != SPI_OK_SELECT) { + elog(ERROR,"xpath_table: SPI execution failed for query %s",querysql->data); + } + + proc= SPI_processed; + /* elog(DEBUG1,"xpath_table: SPI returned %d rows",proc); */ + tuptable = SPI_tuptable; + spi_tupdesc = tuptable->tupdesc; + +/* Switch out of SPI context */ + MemoryContextSwitchTo(oldcontext); + + +/* Check that SPI returned correct result. If you put a comma into one of + * the function parameters, this will catch it when the SPI query returns + * e.g. 3 columns. + */ + + if (spi_tupdesc->natts != 2) { + ereport(ERROR,(errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Expression returning multiple columns is not valid in parameter list"), + errdetail("Expected two columns in SPI result, got %d",spi_tupdesc->natts))); + } + +/* Setup the parser. Beware that this must happen in the same context as the + * cleanup - which means that any error from here on must do cleanup to + * ensure that the entity table doesn't get freed by being out of context. + */ + pgxml_parser_init(); + + /* For each row i.e. document returned from SPI */ + for (i=0; i < proc; i++) { + char *pkey; + char *xmldoc; + + xmlDocPtr doctree; + xmlXPathContextPtr ctxt; + xmlXPathObjectPtr res; + xmlChar *resstr; + + + xmlXPathCompExprPtr comppath; + + /* Extract the row data as C Strings */ + + spi_tuple = tuptable->vals[i]; + pkey = SPI_getvalue(spi_tuple, spi_tupdesc,1); + xmldoc = SPI_getvalue(spi_tuple, spi_tupdesc,2); + + + /* Clear the values array, so that not-well-formed documents + * return NULL in all columns. + */ + + /* Note that this also means that spare columns will be NULL. */ + for (j=0; j < ret_tupdesc->natts; j++) { + values[j]= NULL; + } + + /* Insert primary key */ + values[0]=pkey; + + /* Parse the document */ + doctree = xmlParseMemory(xmldoc, strlen(xmldoc)); + + if (doctree == NULL) + { /* not well-formed, so output all-NULL tuple */ + + ret_tuple = BuildTupleFromCStrings(attinmeta, values); + oldcontext = MemoryContextSwitchTo(per_query_ctx); + tuplestore_puttuple(tupstore, ret_tuple); + MemoryContextSwitchTo(oldcontext); + heap_freetuple(ret_tuple); + } + else + { + /* New loop here - we have to deal with nodeset results */ + rownr=0; + + do { + /* Now evaluate the set of xpaths. */ + had_values=0; + for (j=0; j < numpaths; j++) { + + ctxt = xmlXPathNewContext(doctree); + ctxt->node = xmlDocGetRootElement(doctree); + xmlSetGenericErrorFunc(ctxt, pgxml_errorHandler); + + /* compile the path */ + comppath = xmlXPathCompile(xpaths[j]); + if (comppath == NULL) + { + xmlCleanupParser(); + xmlFreeDoc(doctree); + + elog_error(ERROR,"XPath Syntax Error",1); + + PG_RETURN_NULL(); /* Keep compiler happy */ + } + + /* Now evaluate the path expression. */ + res = xmlXPathCompiledEval(comppath, ctxt); + xmlXPathFreeCompExpr(comppath); + + if (res != NULL) + { + switch (res->type) + { + case XPATH_NODESET: + /* We see if this nodeset has enough nodes */ + if ((res->nodesetval != NULL) && (rownr < res->nodesetval->nodeNr)) { + resstr = + xmlXPathCastNodeToString(res->nodesetval->nodeTab[rownr]); + had_values=1; + } else { + resstr = NULL; + } + + break; + + case XPATH_STRING: + resstr = xmlStrdup(res->stringval); + break; + + default: + elog(NOTICE, "Unsupported XQuery result: %d", res->type); + resstr = xmlStrdup(""); + } + + + // Insert this into the appropriate column in the result tuple. + values[j+1] = resstr; + } + xmlXPathFreeContext(ctxt); + } + // Now add the tuple to the output, if there is one. + if (had_values) { + ret_tuple = BuildTupleFromCStrings(attinmeta, values); + oldcontext = MemoryContextSwitchTo(per_query_ctx); + tuplestore_puttuple(tupstore, ret_tuple); + MemoryContextSwitchTo(oldcontext); + heap_freetuple(ret_tuple); + } + + rownr++; + + } while (had_values); + + } + + xmlFreeDoc(doctree); + + pfree(pkey); + pfree(xmldoc); + } + + xmlCleanupParser(); +/* Needed to flag completeness in 7.3.1. 7.4 defines it as a no-op. */ + tuplestore_donestoring(tupstore); + + SPI_finish(); + + rsinfo->setResult=tupstore; + + /* + * SFRM_Materialize mode expects us to return a NULL Datum. The actual + * tuples are in our tuplestore and passed back through + * rsinfo->setResult. rsinfo->setDesc is set to the tuple description + * that we actually used to build our tuples with, so the caller can + * verify we did what it was expecting. + */ + return (Datum) 0; + +} diff --git a/contrib/xml2/xslt_proc.c b/contrib/xml2/xslt_proc.c new file mode 100644 index 0000000000..64f9736622 --- /dev/null +++ b/contrib/xml2/xslt_proc.c @@ -0,0 +1,184 @@ +/* XSLT processing functions (requiring libxslt) */ +/* John Gray, for Torchbox 2003-04-01 */ + +#include "postgres.h" +#include "fmgr.h" +#include "executor/spi.h" +#include "funcapi.h" +#include "miscadmin.h" + +/* libxml includes */ + +#include +#include +#include + +/* libxslt includes */ + +#include +#include +#include +#include + + +/* declarations to come from xpath.c */ + +extern void elog_error(int level, char *explain, int force); +extern void pgxml_parser_init(); +extern xmlChar *pgxml_texttoxmlchar(text *textstring); + +#define GET_STR(textp) DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp))) + +/* local defs */ +static void parse_params(const char **params, text *paramstr); + +Datum xslt_process(PG_FUNCTION_ARGS); + + +#define MAXPARAMS 20 + +PG_FUNCTION_INFO_V1(xslt_process); + +Datum xslt_process(PG_FUNCTION_ARGS) { + + + const char *params[MAXPARAMS + 1]; /* +1 for the terminator */ + xsltStylesheetPtr stylesheet = NULL; + xmlDocPtr doctree; + xmlDocPtr restree; + xmlDocPtr ssdoc = NULL; + xmlChar *resstr; + int resstat; + int reslen; + + text *doct = PG_GETARG_TEXT_P(0); + text *ssheet = PG_GETARG_TEXT_P(1); + text *paramstr; + text *tres; + + + if (fcinfo->nargs == 3) + { + paramstr = PG_GETARG_TEXT_P(2); + parse_params(params,paramstr); + } + else /* No parameters */ + { + params[0] = NULL; + } + + /* Setup parser */ + pgxml_parser_init(); + + /* Check to see if document is a file or a literal */ + + if (VARDATA(doct)[0] == '<') + { + doctree = xmlParseMemory((char *) VARDATA(doct), VARSIZE(doct)-VARHDRSZ); + } + else + { + doctree = xmlParseFile(GET_STR(doct)); + } + + if (doctree == NULL) + { + xmlCleanupParser(); + elog_error(ERROR,"Error parsing XML document",0); + + PG_RETURN_NULL(); + } + + /* Same for stylesheet */ + if (VARDATA(ssheet)[0] == '<') + { + ssdoc = xmlParseMemory((char *) VARDATA(ssheet), + VARSIZE(ssheet)-VARHDRSZ); + if (ssdoc == NULL) + { + xmlFreeDoc(doctree); + xmlCleanupParser(); + elog_error(ERROR,"Error parsing stylesheet as XML document",0); + PG_RETURN_NULL(); + } + + stylesheet = xsltParseStylesheetDoc(ssdoc); + } + else + { + stylesheet = xsltParseStylesheetFile(GET_STR(ssheet)); + } + + + if (stylesheet == NULL) + { + xmlFreeDoc(doctree); + xsltCleanupGlobals(); + xmlCleanupParser(); + elog_error(ERROR,"Failed to parse stylesheet",0); + PG_RETURN_NULL(); + } + + restree = xsltApplyStylesheet(stylesheet, doctree, params); + resstat = xsltSaveResultToString(&resstr, &reslen, restree, stylesheet); + + xsltFreeStylesheet(stylesheet); + xmlFreeDoc(restree); + xmlFreeDoc(doctree); + + xsltCleanupGlobals(); + xmlCleanupParser(); + + if (resstat < 0) { + PG_RETURN_NULL(); + } + + tres = palloc(reslen + VARHDRSZ); + memcpy(VARDATA(tres),resstr,reslen); + VARATT_SIZEP(tres) = reslen + VARHDRSZ; + + PG_RETURN_TEXT_P(tres); +} + + +void parse_params(const char **params, text *paramstr) +{ + char *pos; + char *pstr; + + int i; + char *nvsep="="; + char *itsep=","; + + pstr = GET_STR(paramstr); + + pos=pstr; + + for (i=0; i < MAXPARAMS; i++) + { + params[i] = pos; + pos = strstr(pos,nvsep); + if (pos != NULL) { + *pos = '\0'; + pos++; + } else { + params[i]=NULL; + break; + } + /* Value */ + i++; + params[i]=pos; + pos = strstr(pos,itsep); + if (pos != NULL) { + *pos = '\0'; + pos++; + } else { + break; + } + + } + if (i < MAXPARAMS) + { + params[i+1]=NULL; + } +} -- cgit v1.2.3