Line data Source code
1 : /*
2 : * contrib/xml2/xpath.c
3 : *
4 : * Parser interface for DOM-based parser (libxml) rather than
5 : * stream-based SAX-type parser
6 : */
7 : #include "postgres.h"
8 :
9 : #include "access/htup_details.h"
10 : #include "executor/spi.h"
11 : #include "fmgr.h"
12 : #include "funcapi.h"
13 : #include "lib/stringinfo.h"
14 : #include "utils/builtins.h"
15 : #include "utils/xml.h"
16 :
17 : /* libxml includes */
18 :
19 : #include <libxml/xpath.h>
20 : #include <libxml/tree.h>
21 : #include <libxml/xmlmemory.h>
22 : #include <libxml/xmlerror.h>
23 : #include <libxml/parserInternals.h>
24 :
25 2 : PG_MODULE_MAGIC_EXT(
26 : .name = "xml2",
27 : .version = PG_VERSION
28 : );
29 :
30 : /* exported for use by xslt_proc.c */
31 :
32 : PgXmlErrorContext *pgxml_parser_init(PgXmlStrictness strictness);
33 :
34 : /* workspace for pgxml_xpath() */
35 :
36 : typedef struct
37 : {
38 : xmlDocPtr doctree;
39 : xmlXPathContextPtr ctxt;
40 : xmlXPathObjectPtr res;
41 : } xpath_workspace;
42 :
43 : /* local declarations */
44 :
45 : static xmlChar *pgxmlNodeSetToText(xmlNodeSetPtr nodeset,
46 : xmlChar *toptagname, xmlChar *septagname,
47 : xmlChar *plainsep);
48 :
49 : static text *pgxml_result_to_text(xmlXPathObjectPtr res, xmlChar *toptag,
50 : xmlChar *septag, xmlChar *plainsep);
51 :
52 : static xmlChar *pgxml_texttoxmlchar(text *textstring);
53 :
54 : static xpath_workspace *pgxml_xpath(text *document, xmlChar *xpath,
55 : PgXmlErrorContext *xmlerrcxt);
56 :
57 : static void cleanup_workspace(volatile xpath_workspace *workspace);
58 :
59 :
60 : /*
61 : * Initialize for xml parsing.
62 : *
63 : * As with the underlying pg_xml_init function, calls to this MUST be followed
64 : * by a PG_TRY block that guarantees that pg_xml_done is called.
65 : */
66 : PgXmlErrorContext *
67 22 : pgxml_parser_init(PgXmlStrictness strictness)
68 : {
69 : PgXmlErrorContext *xmlerrcxt;
70 :
71 : /* Set up error handling (we share the core's error handler) */
72 22 : xmlerrcxt = pg_xml_init(strictness);
73 :
74 : /* Note: we're assuming an elog cannot be thrown by the following calls */
75 :
76 : /* Initialize libxml */
77 22 : xmlInitParser();
78 :
79 22 : return xmlerrcxt;
80 : }
81 :
82 :
83 : /* Encodes special characters (<, >, &, " and \r) as XML entities */
84 :
85 2 : PG_FUNCTION_INFO_V1(xml_encode_special_chars);
86 :
87 : Datum
88 0 : xml_encode_special_chars(PG_FUNCTION_ARGS)
89 : {
90 0 : text *tin = PG_GETARG_TEXT_PP(0);
91 : text *tout;
92 0 : volatile xmlChar *tt = NULL;
93 : PgXmlErrorContext *xmlerrcxt;
94 :
95 0 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
96 :
97 0 : PG_TRY();
98 : {
99 : xmlChar *ts;
100 :
101 0 : ts = pgxml_texttoxmlchar(tin);
102 :
103 0 : tt = xmlEncodeSpecialChars(NULL, ts);
104 0 : if (tt == NULL || pg_xml_error_occurred(xmlerrcxt))
105 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
106 : "could not allocate xmlChar");
107 0 : pfree(ts);
108 :
109 0 : tout = cstring_to_text((char *) tt);
110 : }
111 0 : PG_CATCH();
112 : {
113 0 : if (tt != NULL)
114 0 : xmlFree((xmlChar *) tt);
115 :
116 0 : pg_xml_done(xmlerrcxt, true);
117 :
118 0 : PG_RE_THROW();
119 : }
120 0 : PG_END_TRY();
121 :
122 0 : if (tt != NULL)
123 0 : xmlFree((xmlChar *) tt);
124 :
125 0 : pg_xml_done(xmlerrcxt, false);
126 :
127 0 : PG_RETURN_TEXT_P(tout);
128 : }
129 :
130 : /*
131 : * Function translates a nodeset into a text representation
132 : *
133 : * iterates over each node in the set and calls xmlNodeDump to write it to
134 : * an xmlBuffer -from which an xmlChar * string is returned.
135 : *
136 : * each representation is surrounded by <tagname> ... </tagname>
137 : *
138 : * plainsep is an ordinary (not tag) separator - if used, then nodes are
139 : * cast to string as output method
140 : */
141 : static xmlChar *
142 10 : pgxmlNodeSetToText(xmlNodeSetPtr nodeset,
143 : xmlChar *toptagname,
144 : xmlChar *septagname,
145 : xmlChar *plainsep)
146 : {
147 10 : volatile xmlBufferPtr buf = NULL;
148 : xmlChar *result;
149 : int i;
150 : PgXmlErrorContext *xmlerrcxt;
151 :
152 : /* spin some error handling */
153 10 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
154 :
155 10 : PG_TRY();
156 : {
157 10 : buf = xmlBufferCreate();
158 :
159 10 : if (buf == NULL || pg_xml_error_occurred(xmlerrcxt))
160 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
161 : "could not allocate xmlBuffer");
162 :
163 10 : if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
164 : {
165 2 : xmlBufferWriteChar(buf, "<");
166 2 : xmlBufferWriteCHAR(buf, toptagname);
167 2 : xmlBufferWriteChar(buf, ">");
168 : }
169 10 : if (nodeset != NULL)
170 : {
171 30 : for (i = 0; i < nodeset->nodeNr; i++)
172 : {
173 20 : if (plainsep != NULL)
174 : {
175 8 : xmlBufferWriteCHAR(buf,
176 8 : xmlXPathCastNodeToString(nodeset->nodeTab[i]));
177 :
178 : /* If this isn't the last entry, write the plain sep. */
179 8 : if (i < (nodeset->nodeNr) - 1)
180 4 : xmlBufferWriteChar(buf, (char *) plainsep);
181 : }
182 : else
183 : {
184 12 : if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
185 : {
186 8 : xmlBufferWriteChar(buf, "<");
187 8 : xmlBufferWriteCHAR(buf, septagname);
188 8 : xmlBufferWriteChar(buf, ">");
189 : }
190 12 : xmlNodeDump(buf,
191 12 : nodeset->nodeTab[i]->doc,
192 12 : nodeset->nodeTab[i],
193 : 1, 0);
194 :
195 12 : if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
196 : {
197 8 : xmlBufferWriteChar(buf, "</");
198 8 : xmlBufferWriteCHAR(buf, septagname);
199 8 : xmlBufferWriteChar(buf, ">");
200 : }
201 : }
202 : }
203 : }
204 :
205 10 : if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
206 : {
207 2 : xmlBufferWriteChar(buf, "</");
208 2 : xmlBufferWriteCHAR(buf, toptagname);
209 2 : xmlBufferWriteChar(buf, ">");
210 : }
211 :
212 10 : result = xmlStrdup(xmlBufferContent(buf));
213 10 : if (result == NULL || pg_xml_error_occurred(xmlerrcxt))
214 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
215 : "could not allocate result");
216 : }
217 0 : PG_CATCH();
218 : {
219 0 : if (buf)
220 0 : xmlBufferFree(buf);
221 :
222 0 : pg_xml_done(xmlerrcxt, true);
223 :
224 0 : PG_RE_THROW();
225 : }
226 10 : PG_END_TRY();
227 :
228 10 : xmlBufferFree(buf);
229 10 : pg_xml_done(xmlerrcxt, false);
230 :
231 10 : return result;
232 : }
233 :
234 :
235 : /* Translate a PostgreSQL "varlena" -i.e. a variable length parameter
236 : * into the libxml2 representation
237 : */
238 : static xmlChar *
239 26 : pgxml_texttoxmlchar(text *textstring)
240 : {
241 26 : return (xmlChar *) text_to_cstring(textstring);
242 : }
243 :
244 : /* Publicly visible XPath functions */
245 :
246 : /*
247 : * This is a "raw" xpath function. Check that it returns child elements
248 : * properly
249 : */
250 4 : PG_FUNCTION_INFO_V1(xpath_nodeset);
251 :
252 : Datum
253 6 : xpath_nodeset(PG_FUNCTION_ARGS)
254 : {
255 6 : text *document = PG_GETARG_TEXT_PP(0);
256 6 : text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
257 6 : xmlChar *toptag = pgxml_texttoxmlchar(PG_GETARG_TEXT_PP(2));
258 6 : xmlChar *septag = pgxml_texttoxmlchar(PG_GETARG_TEXT_PP(3));
259 : xmlChar *xpath;
260 : text *xpres;
261 : volatile xpath_workspace *workspace;
262 : PgXmlErrorContext *xmlerrcxt;
263 :
264 6 : xpath = pgxml_texttoxmlchar(xpathsupp);
265 6 : xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY);
266 :
267 6 : PG_TRY();
268 : {
269 6 : workspace = pgxml_xpath(document, xpath, xmlerrcxt);
270 6 : xpres = pgxml_result_to_text(workspace->res, toptag, septag, NULL);
271 : }
272 0 : PG_CATCH();
273 : {
274 0 : if (workspace)
275 0 : cleanup_workspace(workspace);
276 :
277 0 : pg_xml_done(xmlerrcxt, true);
278 0 : PG_RE_THROW();
279 : }
280 6 : PG_END_TRY();
281 :
282 6 : cleanup_workspace(workspace);
283 6 : pg_xml_done(xmlerrcxt, false);
284 :
285 6 : pfree(xpath);
286 :
287 6 : if (xpres == NULL)
288 0 : PG_RETURN_NULL();
289 6 : PG_RETURN_TEXT_P(xpres);
290 : }
291 :
292 : /*
293 : * The following function is almost identical, but returns the elements in
294 : * a list.
295 : */
296 4 : PG_FUNCTION_INFO_V1(xpath_list);
297 :
298 : Datum
299 4 : xpath_list(PG_FUNCTION_ARGS)
300 : {
301 4 : text *document = PG_GETARG_TEXT_PP(0);
302 4 : text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
303 4 : xmlChar *plainsep = pgxml_texttoxmlchar(PG_GETARG_TEXT_PP(2));
304 : xmlChar *xpath;
305 : text *xpres;
306 : volatile xpath_workspace *workspace;
307 : PgXmlErrorContext *xmlerrcxt;
308 :
309 4 : xpath = pgxml_texttoxmlchar(xpathsupp);
310 4 : xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY);
311 :
312 4 : PG_TRY();
313 : {
314 4 : workspace = pgxml_xpath(document, xpath, xmlerrcxt);
315 4 : xpres = pgxml_result_to_text(workspace->res, NULL, NULL, plainsep);
316 : }
317 0 : PG_CATCH();
318 : {
319 0 : if (workspace)
320 0 : cleanup_workspace(workspace);
321 :
322 0 : pg_xml_done(xmlerrcxt, true);
323 0 : PG_RE_THROW();
324 : }
325 4 : PG_END_TRY();
326 :
327 4 : cleanup_workspace(workspace);
328 4 : pg_xml_done(xmlerrcxt, false);
329 :
330 4 : pfree(xpath);
331 :
332 4 : if (xpres == NULL)
333 0 : PG_RETURN_NULL();
334 4 : PG_RETURN_TEXT_P(xpres);
335 : }
336 :
337 :
338 4 : PG_FUNCTION_INFO_V1(xpath_string);
339 :
340 : Datum
341 2 : xpath_string(PG_FUNCTION_ARGS)
342 : {
343 2 : text *document = PG_GETARG_TEXT_PP(0);
344 2 : text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
345 : xmlChar *xpath;
346 : int32 pathsize;
347 : text *xpres;
348 : volatile xpath_workspace *workspace;
349 : PgXmlErrorContext *xmlerrcxt;
350 :
351 2 : pathsize = VARSIZE_ANY_EXHDR(xpathsupp);
352 :
353 : /*
354 : * We encapsulate the supplied path with "string()" = 8 chars + 1 for NUL
355 : * at end
356 : */
357 : /* We could try casting to string using the libxml function? */
358 :
359 2 : xpath = (xmlChar *) palloc(pathsize + 9);
360 2 : memcpy(xpath, "string(", 7);
361 2 : memcpy(xpath + 7, VARDATA_ANY(xpathsupp), pathsize);
362 2 : xpath[pathsize + 7] = ')';
363 2 : xpath[pathsize + 8] = '\0';
364 :
365 2 : xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY);
366 :
367 2 : PG_TRY();
368 : {
369 2 : workspace = pgxml_xpath(document, xpath, xmlerrcxt);
370 2 : xpres = pgxml_result_to_text(workspace->res, NULL, NULL, NULL);
371 : }
372 0 : PG_CATCH();
373 : {
374 0 : if (workspace)
375 0 : cleanup_workspace(workspace);
376 :
377 0 : pg_xml_done(xmlerrcxt, true);
378 0 : PG_RE_THROW();
379 : }
380 2 : PG_END_TRY();
381 :
382 2 : cleanup_workspace(workspace);
383 2 : pg_xml_done(xmlerrcxt, false);
384 :
385 2 : pfree(xpath);
386 :
387 2 : if (xpres == NULL)
388 2 : PG_RETURN_NULL();
389 0 : PG_RETURN_TEXT_P(xpres);
390 : }
391 :
392 :
393 2 : PG_FUNCTION_INFO_V1(xpath_number);
394 :
395 : Datum
396 0 : xpath_number(PG_FUNCTION_ARGS)
397 : {
398 0 : text *document = PG_GETARG_TEXT_PP(0);
399 0 : text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
400 : xmlChar *xpath;
401 0 : float4 fRes = 0.0;
402 0 : bool isNull = false;
403 0 : volatile xpath_workspace *workspace = NULL;
404 : PgXmlErrorContext *xmlerrcxt;
405 :
406 0 : xpath = pgxml_texttoxmlchar(xpathsupp);
407 0 : xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY);
408 :
409 0 : PG_TRY();
410 : {
411 0 : workspace = pgxml_xpath(document, xpath, xmlerrcxt);
412 0 : pfree(xpath);
413 :
414 0 : if (workspace->res == NULL)
415 0 : isNull = true;
416 : else
417 0 : fRes = xmlXPathCastToNumber(workspace->res);
418 : }
419 0 : PG_CATCH();
420 : {
421 0 : if (workspace)
422 0 : cleanup_workspace(workspace);
423 :
424 0 : pg_xml_done(xmlerrcxt, true);
425 0 : PG_RE_THROW();
426 : }
427 0 : PG_END_TRY();
428 :
429 0 : cleanup_workspace(workspace);
430 0 : pg_xml_done(xmlerrcxt, false);
431 :
432 0 : if (isNull || xmlXPathIsNaN(fRes))
433 0 : PG_RETURN_NULL();
434 :
435 0 : PG_RETURN_FLOAT4(fRes);
436 : }
437 :
438 :
439 2 : PG_FUNCTION_INFO_V1(xpath_bool);
440 :
441 : Datum
442 0 : xpath_bool(PG_FUNCTION_ARGS)
443 : {
444 0 : text *document = PG_GETARG_TEXT_PP(0);
445 0 : text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
446 : xmlChar *xpath;
447 : int bRes;
448 0 : volatile xpath_workspace *workspace = NULL;
449 : PgXmlErrorContext *xmlerrcxt;
450 :
451 0 : xpath = pgxml_texttoxmlchar(xpathsupp);
452 0 : xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY);
453 :
454 0 : PG_TRY();
455 : {
456 0 : workspace = pgxml_xpath(document, xpath, xmlerrcxt);
457 0 : pfree(xpath);
458 :
459 0 : if (workspace->res == NULL)
460 0 : bRes = 0;
461 : else
462 0 : bRes = xmlXPathCastToBoolean(workspace->res);
463 : }
464 0 : PG_CATCH();
465 : {
466 0 : if (workspace)
467 0 : cleanup_workspace(workspace);
468 :
469 0 : pg_xml_done(xmlerrcxt, true);
470 0 : PG_RE_THROW();
471 : }
472 0 : PG_END_TRY();
473 :
474 0 : cleanup_workspace(workspace);
475 0 : pg_xml_done(xmlerrcxt, false);
476 :
477 0 : PG_RETURN_BOOL(bRes);
478 : }
479 :
480 :
481 :
482 : /* Core function to evaluate XPath query */
483 :
484 : static xpath_workspace *
485 12 : pgxml_xpath(text *document, xmlChar *xpath, PgXmlErrorContext *xmlerrcxt)
486 : {
487 12 : int32 docsize = VARSIZE_ANY_EXHDR(document);
488 : xmlXPathCompExprPtr comppath;
489 : xpath_workspace *workspace = (xpath_workspace *)
490 12 : palloc0(sizeof(xpath_workspace));
491 :
492 12 : workspace->doctree = NULL;
493 12 : workspace->ctxt = NULL;
494 12 : workspace->res = NULL;
495 :
496 12 : workspace->doctree = xmlReadMemory((char *) VARDATA_ANY(document),
497 : docsize, NULL, NULL,
498 : XML_PARSE_NOENT);
499 12 : if (workspace->doctree != NULL)
500 : {
501 10 : workspace->ctxt = xmlXPathNewContext(workspace->doctree);
502 10 : workspace->ctxt->node = xmlDocGetRootElement(workspace->doctree);
503 :
504 : /* compile the path */
505 10 : comppath = xmlXPathCtxtCompile(workspace->ctxt, xpath);
506 10 : if (comppath == NULL || pg_xml_error_occurred(xmlerrcxt))
507 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
508 : "XPath Syntax Error");
509 :
510 : /* Now evaluate the path expression. */
511 10 : workspace->res = xmlXPathCompiledEval(comppath, workspace->ctxt);
512 :
513 10 : xmlXPathFreeCompExpr(comppath);
514 : }
515 :
516 12 : return workspace;
517 : }
518 :
519 : /* Clean up after processing the result of pgxml_xpath() */
520 : static void
521 12 : cleanup_workspace(volatile xpath_workspace *workspace)
522 : {
523 12 : if (workspace->res)
524 10 : xmlXPathFreeObject(workspace->res);
525 12 : workspace->res = NULL;
526 12 : if (workspace->ctxt)
527 10 : xmlXPathFreeContext(workspace->ctxt);
528 12 : workspace->ctxt = NULL;
529 12 : if (workspace->doctree)
530 10 : xmlFreeDoc(workspace->doctree);
531 12 : workspace->doctree = NULL;
532 12 : }
533 :
534 : static text *
535 12 : pgxml_result_to_text(xmlXPathObjectPtr res,
536 : xmlChar *toptag,
537 : xmlChar *septag,
538 : xmlChar *plainsep)
539 : {
540 12 : volatile xmlChar *xpresstr = NULL;
541 : PgXmlErrorContext *xmlerrcxt;
542 : text *xpres;
543 :
544 12 : if (res == NULL)
545 2 : return NULL;
546 :
547 : /* spin some error handling */
548 10 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
549 :
550 10 : PG_TRY();
551 : {
552 10 : switch (res->type)
553 : {
554 10 : case XPATH_NODESET:
555 10 : xpresstr = pgxmlNodeSetToText(res->nodesetval,
556 : toptag,
557 : septag, plainsep);
558 10 : break;
559 :
560 0 : case XPATH_STRING:
561 0 : xpresstr = xmlStrdup(res->stringval);
562 0 : if (xpresstr == NULL || pg_xml_error_occurred(xmlerrcxt))
563 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
564 : "could not allocate result");
565 0 : break;
566 :
567 0 : default:
568 0 : elog(NOTICE, "unsupported XQuery result: %d", res->type);
569 0 : xpresstr = xmlStrdup((const xmlChar *) "<unsupported/>");
570 0 : if (xpresstr == NULL || pg_xml_error_occurred(xmlerrcxt))
571 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
572 : "could not allocate result");
573 : }
574 :
575 : /* Now convert this result back to text */
576 10 : xpres = cstring_to_text((char *) xpresstr);
577 : }
578 0 : PG_CATCH();
579 : {
580 0 : if (xpresstr != NULL)
581 0 : xmlFree((xmlChar *) xpresstr);
582 :
583 0 : pg_xml_done(xmlerrcxt, true);
584 :
585 0 : PG_RE_THROW();
586 : }
587 10 : PG_END_TRY();
588 :
589 : /* Free various storage */
590 10 : xmlFree((xmlChar *) xpresstr);
591 :
592 10 : pg_xml_done(xmlerrcxt, false);
593 :
594 10 : return xpres;
595 : }
596 :
597 : /*
598 : * xpath_table is a table function. It needs some tidying (as do the
599 : * other functions here!
600 : */
601 4 : PG_FUNCTION_INFO_V1(xpath_table);
602 :
603 : Datum
604 10 : xpath_table(PG_FUNCTION_ARGS)
605 : {
606 : /* Function parameters */
607 10 : char *pkeyfield = text_to_cstring(PG_GETARG_TEXT_PP(0));
608 10 : char *xmlfield = text_to_cstring(PG_GETARG_TEXT_PP(1));
609 10 : char *relname = text_to_cstring(PG_GETARG_TEXT_PP(2));
610 10 : char *xpathset = text_to_cstring(PG_GETARG_TEXT_PP(3));
611 10 : char *condition = text_to_cstring(PG_GETARG_TEXT_PP(4));
612 :
613 : /* SPI (input tuple) support */
614 : SPITupleTable *tuptable;
615 : HeapTuple spi_tuple;
616 : TupleDesc spi_tupdesc;
617 :
618 :
619 10 : ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
620 : AttInMetadata *attinmeta;
621 :
622 : char **values;
623 : xmlChar **xpaths;
624 : char *pos;
625 10 : const char *pathsep = "|";
626 :
627 : int numpaths;
628 : int ret;
629 : uint64 proc;
630 : int j;
631 : int rownr; /* For issuing multiple rows from one original
632 : * document */
633 : bool had_values; /* To determine end of nodeset results */
634 : StringInfoData query_buf;
635 : PgXmlErrorContext *xmlerrcxt;
636 10 : volatile xmlDocPtr doctree = NULL;
637 :
638 10 : InitMaterializedSRF(fcinfo, MAT_SRF_USE_EXPECTED_DESC);
639 :
640 : /* must have at least one output column (for the pkey) */
641 10 : if (rsinfo->setDesc->natts < 1)
642 0 : ereport(ERROR,
643 : (errcode(ERRCODE_SYNTAX_ERROR),
644 : errmsg("xpath_table must have at least one output column")));
645 :
646 : /*
647 : * At the moment we assume that the returned attributes make sense for the
648 : * XPath specified (i.e. we trust the caller). It's not fatal if they get
649 : * it wrong - the input function for the column type will raise an error
650 : * if the path result can't be converted into the correct binary
651 : * representation.
652 : */
653 :
654 10 : attinmeta = TupleDescGetAttInMetadata(rsinfo->setDesc);
655 :
656 10 : values = (char **) palloc(rsinfo->setDesc->natts * sizeof(char *));
657 10 : xpaths = (xmlChar **) palloc(rsinfo->setDesc->natts * sizeof(xmlChar *));
658 :
659 : /*
660 : * Split XPaths. xpathset is a writable CString.
661 : *
662 : * Note that we stop splitting once we've done all needed for tupdesc
663 : */
664 10 : numpaths = 0;
665 10 : pos = xpathset;
666 14 : while (numpaths < (rsinfo->setDesc->natts - 1))
667 : {
668 10 : xpaths[numpaths++] = (xmlChar *) pos;
669 10 : pos = strstr(pos, pathsep);
670 10 : if (pos != NULL)
671 : {
672 4 : *pos = '\0';
673 4 : pos++;
674 : }
675 : else
676 6 : break;
677 : }
678 :
679 : /* Now build query */
680 10 : initStringInfo(&query_buf);
681 :
682 : /* Build initial sql statement */
683 10 : appendStringInfo(&query_buf, "SELECT %s, %s FROM %s WHERE %s",
684 : pkeyfield,
685 : xmlfield,
686 : relname,
687 : condition);
688 :
689 10 : SPI_connect();
690 :
691 10 : if ((ret = SPI_exec(query_buf.data, 0)) != SPI_OK_SELECT)
692 0 : elog(ERROR, "xpath_table: SPI execution failed for query %s",
693 : query_buf.data);
694 :
695 10 : proc = SPI_processed;
696 10 : tuptable = SPI_tuptable;
697 10 : spi_tupdesc = tuptable->tupdesc;
698 :
699 : /*
700 : * Check that SPI returned correct result. If you put a comma into one of
701 : * the function parameters, this will catch it when the SPI query returns
702 : * e.g. 3 columns.
703 : */
704 10 : if (spi_tupdesc->natts != 2)
705 : {
706 0 : ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
707 : errmsg("expression returning multiple columns is not valid in parameter list"),
708 : errdetail("Expected two columns in SPI result, got %d.", spi_tupdesc->natts)));
709 : }
710 :
711 : /*
712 : * Setup the parser. This should happen after we are done evaluating the
713 : * query, in case it calls functions that set up libxml differently.
714 : */
715 10 : xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY);
716 :
717 10 : PG_TRY();
718 : {
719 : /* For each row i.e. document returned from SPI */
720 : uint64 i;
721 :
722 20 : for (i = 0; i < proc; i++)
723 : {
724 : char *pkey;
725 : char *xmldoc;
726 : xmlXPathContextPtr ctxt;
727 : xmlXPathObjectPtr res;
728 : xmlChar *resstr;
729 : xmlXPathCompExprPtr comppath;
730 : HeapTuple ret_tuple;
731 :
732 : /* Extract the row data as C Strings */
733 10 : spi_tuple = tuptable->vals[i];
734 10 : pkey = SPI_getvalue(spi_tuple, spi_tupdesc, 1);
735 10 : xmldoc = SPI_getvalue(spi_tuple, spi_tupdesc, 2);
736 :
737 : /*
738 : * Clear the values array, so that not-well-formed documents
739 : * return NULL in all columns. Note that this also means that
740 : * spare columns will be NULL.
741 : */
742 30 : for (j = 0; j < rsinfo->setDesc->natts; j++)
743 20 : values[j] = NULL;
744 :
745 : /* Insert primary key */
746 10 : values[0] = pkey;
747 :
748 : /* Parse the document */
749 10 : if (xmldoc)
750 10 : doctree = xmlReadMemory(xmldoc, strlen(xmldoc),
751 : NULL, NULL,
752 : XML_PARSE_NOENT);
753 : else /* treat NULL as not well-formed */
754 0 : doctree = NULL;
755 :
756 10 : if (doctree == NULL)
757 : {
758 : /* not well-formed, so output all-NULL tuple */
759 0 : ret_tuple = BuildTupleFromCStrings(attinmeta, values);
760 0 : tuplestore_puttuple(rsinfo->setResult, ret_tuple);
761 0 : heap_freetuple(ret_tuple);
762 : }
763 : else
764 : {
765 : /* New loop here - we have to deal with nodeset results */
766 10 : rownr = 0;
767 :
768 : do
769 : {
770 : /* Now evaluate the set of xpaths. */
771 16 : had_values = false;
772 36 : for (j = 0; j < numpaths; j++)
773 : {
774 20 : ctxt = xmlXPathNewContext(doctree);
775 20 : if (ctxt == NULL || pg_xml_error_occurred(xmlerrcxt))
776 0 : xml_ereport(xmlerrcxt,
777 : ERROR, ERRCODE_OUT_OF_MEMORY,
778 : "could not allocate XPath context");
779 :
780 20 : ctxt->node = xmlDocGetRootElement(doctree);
781 :
782 : /* compile the path */
783 20 : comppath = xmlXPathCtxtCompile(ctxt, xpaths[j]);
784 20 : if (comppath == NULL || pg_xml_error_occurred(xmlerrcxt))
785 0 : xml_ereport(xmlerrcxt, ERROR,
786 : ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
787 : "XPath Syntax Error");
788 :
789 : /* Now evaluate the path expression. */
790 20 : res = xmlXPathCompiledEval(comppath, ctxt);
791 20 : xmlXPathFreeCompExpr(comppath);
792 :
793 20 : if (res != NULL)
794 : {
795 20 : switch (res->type)
796 : {
797 20 : case XPATH_NODESET:
798 : /* We see if this nodeset has enough nodes */
799 20 : if (res->nodesetval != NULL &&
800 20 : rownr < res->nodesetval->nodeNr)
801 : {
802 8 : resstr = xmlXPathCastNodeToString(res->nodesetval->nodeTab[rownr]);
803 8 : if (resstr == NULL || pg_xml_error_occurred(xmlerrcxt))
804 0 : xml_ereport(xmlerrcxt,
805 : ERROR, ERRCODE_OUT_OF_MEMORY,
806 : "could not allocate result");
807 8 : had_values = true;
808 : }
809 : else
810 12 : resstr = NULL;
811 :
812 20 : break;
813 :
814 0 : case XPATH_STRING:
815 0 : resstr = xmlStrdup(res->stringval);
816 0 : if (resstr == NULL || pg_xml_error_occurred(xmlerrcxt))
817 0 : xml_ereport(xmlerrcxt,
818 : ERROR, ERRCODE_OUT_OF_MEMORY,
819 : "could not allocate result");
820 0 : break;
821 :
822 0 : default:
823 0 : elog(NOTICE, "unsupported XQuery result: %d", res->type);
824 0 : resstr = xmlStrdup((const xmlChar *) "<unsupported/>");
825 0 : if (resstr == NULL || pg_xml_error_occurred(xmlerrcxt))
826 0 : xml_ereport(xmlerrcxt,
827 : ERROR, ERRCODE_OUT_OF_MEMORY,
828 : "could not allocate result");
829 : }
830 :
831 : /*
832 : * Insert this into the appropriate column in the
833 : * result tuple.
834 : */
835 20 : values[j + 1] = (char *) resstr;
836 : }
837 20 : xmlXPathFreeContext(ctxt);
838 : }
839 :
840 : /* Now add the tuple to the output, if there is one. */
841 16 : if (had_values)
842 : {
843 6 : ret_tuple = BuildTupleFromCStrings(attinmeta, values);
844 6 : tuplestore_puttuple(rsinfo->setResult, ret_tuple);
845 6 : heap_freetuple(ret_tuple);
846 : }
847 :
848 16 : rownr++;
849 16 : } while (had_values);
850 : }
851 :
852 10 : if (doctree != NULL)
853 10 : xmlFreeDoc(doctree);
854 10 : doctree = NULL;
855 :
856 10 : if (pkey)
857 10 : pfree(pkey);
858 10 : if (xmldoc)
859 10 : pfree(xmldoc);
860 : }
861 : }
862 0 : PG_CATCH();
863 : {
864 0 : if (doctree != NULL)
865 0 : xmlFreeDoc(doctree);
866 :
867 0 : pg_xml_done(xmlerrcxt, true);
868 :
869 0 : PG_RE_THROW();
870 : }
871 10 : PG_END_TRY();
872 :
873 10 : if (doctree != NULL)
874 0 : xmlFreeDoc(doctree);
875 :
876 10 : pg_xml_done(xmlerrcxt, false);
877 :
878 10 : SPI_finish();
879 :
880 : /*
881 : * SFRM_Materialize mode expects us to return a NULL Datum. The actual
882 : * tuples are in our tuplestore and passed back through rsinfo->setResult.
883 : * rsinfo->setDesc is set to the tuple description that we actually used
884 : * to build our tuples with, so the caller can verify we did what it was
885 : * expecting.
886 : */
887 10 : return (Datum) 0;
888 : }
|