Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * xml.c
4 : * XML data type support.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * src/backend/utils/adt/xml.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : /*
16 : * Generally, XML type support is only available when libxml use was
17 : * configured during the build. But even if that is not done, the
18 : * type and all the functions are available, but most of them will
19 : * fail. For one thing, this avoids having to manage variant catalog
20 : * installations. But it also has nice effects such as that you can
21 : * dump a database containing XML type data even if the server is not
22 : * linked with libxml. Thus, make sure xml_out() works even if nothing
23 : * else does.
24 : */
25 :
26 : /*
27 : * Notes on memory management:
28 : *
29 : * Sometimes libxml allocates global structures in the hope that it can reuse
30 : * them later on. This makes it impractical to change the xmlMemSetup
31 : * functions on-the-fly; that is likely to lead to trying to pfree() chunks
32 : * allocated with malloc() or vice versa. Since libxml might be used by
33 : * loadable modules, eg libperl, our only safe choices are to change the
34 : * functions at postmaster/backend launch or not at all. Since we'd rather
35 : * not activate libxml in sessions that might never use it, the latter choice
36 : * is the preferred one. However, for debugging purposes it can be awfully
37 : * handy to constrain libxml's allocations to be done in a specific palloc
38 : * context, where they're easy to track. Therefore there is code here that
39 : * can be enabled in debug builds to redirect libxml's allocations into a
40 : * special context LibxmlContext. It's not recommended to turn this on in
41 : * a production build because of the possibility of bad interactions with
42 : * external modules.
43 : */
44 : /* #define USE_LIBXMLCONTEXT */
45 :
46 : #include "postgres.h"
47 :
48 : #ifdef USE_LIBXML
49 : #include <libxml/chvalid.h>
50 : #include <libxml/entities.h>
51 : #include <libxml/parser.h>
52 : #include <libxml/parserInternals.h>
53 : #include <libxml/tree.h>
54 : #include <libxml/uri.h>
55 : #include <libxml/xmlerror.h>
56 : #include <libxml/xmlsave.h>
57 : #include <libxml/xmlversion.h>
58 : #include <libxml/xmlwriter.h>
59 : #include <libxml/xpath.h>
60 : #include <libxml/xpathInternals.h>
61 :
62 : /*
63 : * We used to check for xmlStructuredErrorContext via a configure test; but
64 : * that doesn't work on Windows, so instead use this grottier method of
65 : * testing the library version number.
66 : */
67 : #if LIBXML_VERSION >= 20704
68 : #define HAVE_XMLSTRUCTUREDERRORCONTEXT 1
69 : #endif
70 :
71 : /*
72 : * libxml2 2.12 decided to insert "const" into the error handler API.
73 : */
74 : #if LIBXML_VERSION >= 21200
75 : #define PgXmlErrorPtr const xmlError *
76 : #else
77 : #define PgXmlErrorPtr xmlErrorPtr
78 : #endif
79 :
80 : #endif /* USE_LIBXML */
81 :
82 : #include "access/htup_details.h"
83 : #include "access/table.h"
84 : #include "catalog/namespace.h"
85 : #include "catalog/pg_class.h"
86 : #include "catalog/pg_type.h"
87 : #include "commands/dbcommands.h"
88 : #include "executor/spi.h"
89 : #include "executor/tablefunc.h"
90 : #include "fmgr.h"
91 : #include "lib/stringinfo.h"
92 : #include "libpq/pqformat.h"
93 : #include "mb/pg_wchar.h"
94 : #include "miscadmin.h"
95 : #include "nodes/execnodes.h"
96 : #include "nodes/miscnodes.h"
97 : #include "nodes/nodeFuncs.h"
98 : #include "utils/array.h"
99 : #include "utils/builtins.h"
100 : #include "utils/date.h"
101 : #include "utils/datetime.h"
102 : #include "utils/lsyscache.h"
103 : #include "utils/rel.h"
104 : #include "utils/syscache.h"
105 : #include "utils/xml.h"
106 :
107 :
108 : /* GUC variables */
109 : int xmlbinary = XMLBINARY_BASE64;
110 : int xmloption = XMLOPTION_CONTENT;
111 :
112 : #ifdef USE_LIBXML
113 :
114 : /* random number to identify PgXmlErrorContext */
115 : #define ERRCXT_MAGIC 68275028
116 :
117 : struct PgXmlErrorContext
118 : {
119 : int magic;
120 : /* strictness argument passed to pg_xml_init */
121 : PgXmlStrictness strictness;
122 : /* current error status and accumulated message, if any */
123 : bool err_occurred;
124 : StringInfoData err_buf;
125 : /* previous libxml error handling state (saved by pg_xml_init) */
126 : xmlStructuredErrorFunc saved_errfunc;
127 : void *saved_errcxt;
128 : /* previous libxml entity handler (saved by pg_xml_init) */
129 : xmlExternalEntityLoader saved_entityfunc;
130 : };
131 :
132 : static xmlParserInputPtr xmlPgEntityLoader(const char *URL, const char *ID,
133 : xmlParserCtxtPtr ctxt);
134 : static void xml_errsave(Node *escontext, PgXmlErrorContext *errcxt,
135 : int sqlcode, const char *msg);
136 : static void xml_errorHandler(void *data, PgXmlErrorPtr error);
137 : static int errdetail_for_xml_code(int code);
138 : static void chopStringInfoNewlines(StringInfo str);
139 : static void appendStringInfoLineSeparator(StringInfo str);
140 :
141 : #ifdef USE_LIBXMLCONTEXT
142 :
143 : static MemoryContext LibxmlContext = NULL;
144 :
145 : static void xml_memory_init(void);
146 : static void *xml_palloc(size_t size);
147 : static void *xml_repalloc(void *ptr, size_t size);
148 : static void xml_pfree(void *ptr);
149 : static char *xml_pstrdup(const char *string);
150 : #endif /* USE_LIBXMLCONTEXT */
151 :
152 : static xmlChar *xml_text2xmlChar(text *in);
153 : static int parse_xml_decl(const xmlChar *str, size_t *lenp,
154 : xmlChar **version, xmlChar **encoding, int *standalone);
155 : static bool print_xml_decl(StringInfo buf, const xmlChar *version,
156 : pg_enc encoding, int standalone);
157 : static bool xml_doctype_in_content(const xmlChar *str);
158 : static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
159 : bool preserve_whitespace, int encoding,
160 : XmlOptionType *parsed_xmloptiontype,
161 : xmlNodePtr *parsed_nodes,
162 : Node *escontext);
163 : static text *xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt);
164 : static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
165 : ArrayBuildState *astate,
166 : PgXmlErrorContext *xmlerrcxt);
167 : static xmlChar *pg_xmlCharStrndup(const char *str, size_t len);
168 : #endif /* USE_LIBXML */
169 :
170 : static void xmldata_root_element_start(StringInfo result, const char *eltname,
171 : const char *xmlschema, const char *targetns,
172 : bool top_level);
173 : static void xmldata_root_element_end(StringInfo result, const char *eltname);
174 : static StringInfo query_to_xml_internal(const char *query, char *tablename,
175 : const char *xmlschema, bool nulls, bool tableforest,
176 : const char *targetns, bool top_level);
177 : static const char *map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid,
178 : bool nulls, bool tableforest, const char *targetns);
179 : static const char *map_sql_schema_to_xmlschema_types(Oid nspid,
180 : List *relid_list, bool nulls,
181 : bool tableforest, const char *targetns);
182 : static const char *map_sql_catalog_to_xmlschema_types(List *nspid_list,
183 : bool nulls, bool tableforest,
184 : const char *targetns);
185 : static const char *map_sql_type_to_xml_name(Oid typeoid, int typmod);
186 : static const char *map_sql_typecoll_to_xmlschema_types(List *tupdesc_list);
187 : static const char *map_sql_type_to_xmlschema_type(Oid typeoid, int typmod);
188 : static void SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result,
189 : char *tablename, bool nulls, bool tableforest,
190 : const char *targetns, bool top_level);
191 :
192 : /* XMLTABLE support */
193 : #ifdef USE_LIBXML
194 : /* random number to identify XmlTableContext */
195 : #define XMLTABLE_CONTEXT_MAGIC 46922182
196 : typedef struct XmlTableBuilderData
197 : {
198 : int magic;
199 : int natts;
200 : long int row_count;
201 : PgXmlErrorContext *xmlerrcxt;
202 : xmlParserCtxtPtr ctxt;
203 : xmlDocPtr doc;
204 : xmlXPathContextPtr xpathcxt;
205 : xmlXPathCompExprPtr xpathcomp;
206 : xmlXPathObjectPtr xpathobj;
207 : xmlXPathCompExprPtr *xpathscomp;
208 : } XmlTableBuilderData;
209 : #endif
210 :
211 : static void XmlTableInitOpaque(struct TableFuncScanState *state, int natts);
212 : static void XmlTableSetDocument(struct TableFuncScanState *state, Datum value);
213 : static void XmlTableSetNamespace(struct TableFuncScanState *state, const char *name,
214 : const char *uri);
215 : static void XmlTableSetRowFilter(struct TableFuncScanState *state, const char *path);
216 : static void XmlTableSetColumnFilter(struct TableFuncScanState *state,
217 : const char *path, int colnum);
218 : static bool XmlTableFetchRow(struct TableFuncScanState *state);
219 : static Datum XmlTableGetValue(struct TableFuncScanState *state, int colnum,
220 : Oid typid, int32 typmod, bool *isnull);
221 : static void XmlTableDestroyOpaque(struct TableFuncScanState *state);
222 :
223 : const TableFuncRoutine XmlTableRoutine =
224 : {
225 : .InitOpaque = XmlTableInitOpaque,
226 : .SetDocument = XmlTableSetDocument,
227 : .SetNamespace = XmlTableSetNamespace,
228 : .SetRowFilter = XmlTableSetRowFilter,
229 : .SetColumnFilter = XmlTableSetColumnFilter,
230 : .FetchRow = XmlTableFetchRow,
231 : .GetValue = XmlTableGetValue,
232 : .DestroyOpaque = XmlTableDestroyOpaque
233 : };
234 :
235 : #define NO_XML_SUPPORT() \
236 : ereport(ERROR, \
237 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \
238 : errmsg("unsupported XML feature"), \
239 : errdetail("This functionality requires the server to be built with libxml support.")))
240 :
241 :
242 : /* from SQL/XML:2008 section 4.9 */
243 : #define NAMESPACE_XSD "http://www.w3.org/2001/XMLSchema"
244 : #define NAMESPACE_XSI "http://www.w3.org/2001/XMLSchema-instance"
245 : #define NAMESPACE_SQLXML "http://standards.iso.org/iso/9075/2003/sqlxml"
246 :
247 :
248 : #ifdef USE_LIBXML
249 :
250 : static int
251 0 : xmlChar_to_encoding(const xmlChar *encoding_name)
252 : {
253 0 : int encoding = pg_char_to_encoding((const char *) encoding_name);
254 :
255 0 : if (encoding < 0)
256 0 : ereport(ERROR,
257 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
258 : errmsg("invalid encoding name \"%s\"",
259 : (const char *) encoding_name)));
260 0 : return encoding;
261 : }
262 : #endif
263 :
264 :
265 : /*
266 : * xml_in uses a plain C string to VARDATA conversion, so for the time being
267 : * we use the conversion function for the text datatype.
268 : *
269 : * This is only acceptable so long as xmltype and text use the same
270 : * representation.
271 : */
272 : Datum
273 898 : xml_in(PG_FUNCTION_ARGS)
274 : {
275 : #ifdef USE_LIBXML
276 898 : char *s = PG_GETARG_CSTRING(0);
277 : xmltype *vardata;
278 : xmlDocPtr doc;
279 :
280 : /* Build the result object. */
281 898 : vardata = (xmltype *) cstring_to_text(s);
282 :
283 : /*
284 : * Parse the data to check if it is well-formed XML data.
285 : *
286 : * Note: we don't need to worry about whether a soft error is detected.
287 : */
288 898 : doc = xml_parse(vardata, xmloption, true, GetDatabaseEncoding(),
289 898 : NULL, NULL, fcinfo->context);
290 850 : if (doc != NULL)
291 838 : xmlFreeDoc(doc);
292 :
293 850 : PG_RETURN_XML_P(vardata);
294 : #else
295 : NO_XML_SUPPORT();
296 : return 0;
297 : #endif
298 : }
299 :
300 :
301 : #define PG_XML_DEFAULT_VERSION "1.0"
302 :
303 :
304 : /*
305 : * xml_out_internal uses a plain VARDATA to C string conversion, so for the
306 : * time being we use the conversion function for the text datatype.
307 : *
308 : * This is only acceptable so long as xmltype and text use the same
309 : * representation.
310 : */
311 : static char *
312 23752 : xml_out_internal(xmltype *x, pg_enc target_encoding)
313 : {
314 23752 : char *str = text_to_cstring((text *) x);
315 :
316 : #ifdef USE_LIBXML
317 23752 : size_t len = strlen(str);
318 : xmlChar *version;
319 : int standalone;
320 : int res_code;
321 :
322 23752 : if ((res_code = parse_xml_decl((xmlChar *) str,
323 : &len, &version, NULL, &standalone)) == 0)
324 : {
325 : StringInfoData buf;
326 :
327 23752 : initStringInfo(&buf);
328 :
329 23752 : if (!print_xml_decl(&buf, version, target_encoding, standalone))
330 : {
331 : /*
332 : * If we are not going to produce an XML declaration, eat a single
333 : * newline in the original string to prevent empty first lines in
334 : * the output.
335 : */
336 23704 : if (*(str + len) == '\n')
337 6 : len += 1;
338 : }
339 23752 : appendStringInfoString(&buf, str + len);
340 :
341 23752 : pfree(str);
342 :
343 23752 : return buf.data;
344 : }
345 :
346 0 : ereport(WARNING,
347 : errcode(ERRCODE_DATA_CORRUPTED),
348 : errmsg_internal("could not parse XML declaration in stored value"),
349 : errdetail_for_xml_code(res_code));
350 : #endif
351 0 : return str;
352 : }
353 :
354 :
355 : Datum
356 23488 : xml_out(PG_FUNCTION_ARGS)
357 : {
358 23488 : xmltype *x = PG_GETARG_XML_P(0);
359 :
360 : /*
361 : * xml_out removes the encoding property in all cases. This is because we
362 : * cannot control from here whether the datum will be converted to a
363 : * different client encoding, so we'd do more harm than good by including
364 : * it.
365 : */
366 23488 : PG_RETURN_CSTRING(xml_out_internal(x, 0));
367 : }
368 :
369 :
370 : Datum
371 0 : xml_recv(PG_FUNCTION_ARGS)
372 : {
373 : #ifdef USE_LIBXML
374 0 : StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
375 : xmltype *result;
376 : char *str;
377 : char *newstr;
378 : int nbytes;
379 : xmlDocPtr doc;
380 0 : xmlChar *encodingStr = NULL;
381 : int encoding;
382 :
383 : /*
384 : * Read the data in raw format. We don't know yet what the encoding is, as
385 : * that information is embedded in the xml declaration; so we have to
386 : * parse that before converting to server encoding.
387 : */
388 0 : nbytes = buf->len - buf->cursor;
389 0 : str = (char *) pq_getmsgbytes(buf, nbytes);
390 :
391 : /*
392 : * We need a null-terminated string to pass to parse_xml_decl(). Rather
393 : * than make a separate copy, make the temporary result one byte bigger
394 : * than it needs to be.
395 : */
396 0 : result = palloc(nbytes + 1 + VARHDRSZ);
397 0 : SET_VARSIZE(result, nbytes + VARHDRSZ);
398 0 : memcpy(VARDATA(result), str, nbytes);
399 0 : str = VARDATA(result);
400 0 : str[nbytes] = '\0';
401 :
402 0 : parse_xml_decl((const xmlChar *) str, NULL, NULL, &encodingStr, NULL);
403 :
404 : /*
405 : * If encoding wasn't explicitly specified in the XML header, treat it as
406 : * UTF-8, as that's the default in XML. This is different from xml_in(),
407 : * where the input has to go through the normal client to server encoding
408 : * conversion.
409 : */
410 0 : encoding = encodingStr ? xmlChar_to_encoding(encodingStr) : PG_UTF8;
411 :
412 : /*
413 : * Parse the data to check if it is well-formed XML data. Assume that
414 : * xml_parse will throw ERROR if not.
415 : */
416 0 : doc = xml_parse(result, xmloption, true, encoding, NULL, NULL, NULL);
417 0 : xmlFreeDoc(doc);
418 :
419 : /* Now that we know what we're dealing with, convert to server encoding */
420 0 : newstr = pg_any_to_server(str, nbytes, encoding);
421 :
422 0 : if (newstr != str)
423 : {
424 0 : pfree(result);
425 0 : result = (xmltype *) cstring_to_text(newstr);
426 0 : pfree(newstr);
427 : }
428 :
429 0 : PG_RETURN_XML_P(result);
430 : #else
431 : NO_XML_SUPPORT();
432 : return 0;
433 : #endif
434 : }
435 :
436 :
437 : Datum
438 0 : xml_send(PG_FUNCTION_ARGS)
439 : {
440 0 : xmltype *x = PG_GETARG_XML_P(0);
441 : char *outval;
442 : StringInfoData buf;
443 :
444 : /*
445 : * xml_out_internal doesn't convert the encoding, it just prints the right
446 : * declaration. pq_sendtext will do the conversion.
447 : */
448 0 : outval = xml_out_internal(x, pg_get_client_encoding());
449 :
450 0 : pq_begintypsend(&buf);
451 0 : pq_sendtext(&buf, outval, strlen(outval));
452 0 : pfree(outval);
453 0 : PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
454 : }
455 :
456 :
457 : #ifdef USE_LIBXML
458 : static void
459 132 : appendStringInfoText(StringInfo str, const text *t)
460 : {
461 132 : appendBinaryStringInfo(str, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
462 132 : }
463 : #endif
464 :
465 :
466 : static xmltype *
467 22568 : stringinfo_to_xmltype(StringInfo buf)
468 : {
469 22568 : return (xmltype *) cstring_to_text_with_len(buf->data, buf->len);
470 : }
471 :
472 :
473 : static xmltype *
474 78 : cstring_to_xmltype(const char *string)
475 : {
476 78 : return (xmltype *) cstring_to_text(string);
477 : }
478 :
479 :
480 : #ifdef USE_LIBXML
481 : static xmltype *
482 22666 : xmlBuffer_to_xmltype(xmlBufferPtr buf)
483 : {
484 22666 : return (xmltype *) cstring_to_text_with_len((const char *) xmlBufferContent(buf),
485 : xmlBufferLength(buf));
486 : }
487 : #endif
488 :
489 :
490 : Datum
491 42 : xmlcomment(PG_FUNCTION_ARGS)
492 : {
493 : #ifdef USE_LIBXML
494 42 : text *arg = PG_GETARG_TEXT_PP(0);
495 42 : char *argdata = VARDATA_ANY(arg);
496 42 : int len = VARSIZE_ANY_EXHDR(arg);
497 : StringInfoData buf;
498 : int i;
499 :
500 : /* check for "--" in string or "-" at the end */
501 180 : for (i = 1; i < len; i++)
502 : {
503 144 : if (argdata[i] == '-' && argdata[i - 1] == '-')
504 6 : ereport(ERROR,
505 : (errcode(ERRCODE_INVALID_XML_COMMENT),
506 : errmsg("invalid XML comment")));
507 : }
508 36 : if (len > 0 && argdata[len - 1] == '-')
509 6 : ereport(ERROR,
510 : (errcode(ERRCODE_INVALID_XML_COMMENT),
511 : errmsg("invalid XML comment")));
512 :
513 30 : initStringInfo(&buf);
514 30 : appendStringInfoString(&buf, "<!--");
515 30 : appendStringInfoText(&buf, arg);
516 30 : appendStringInfoString(&buf, "-->");
517 :
518 30 : PG_RETURN_XML_P(stringinfo_to_xmltype(&buf));
519 : #else
520 : NO_XML_SUPPORT();
521 : return 0;
522 : #endif
523 : }
524 :
525 :
526 : Datum
527 30 : xmltext(PG_FUNCTION_ARGS)
528 : {
529 : #ifdef USE_LIBXML
530 30 : text *arg = PG_GETARG_TEXT_PP(0);
531 : text *result;
532 30 : volatile xmlChar *xmlbuf = NULL;
533 : PgXmlErrorContext *xmlerrcxt;
534 :
535 : /* First we gotta spin up some error handling. */
536 30 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
537 :
538 30 : PG_TRY();
539 : {
540 30 : xmlbuf = xmlEncodeSpecialChars(NULL, xml_text2xmlChar(arg));
541 :
542 30 : if (xmlbuf == NULL || xmlerrcxt->err_occurred)
543 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
544 : "could not allocate xmlChar");
545 :
546 30 : result = cstring_to_text_with_len((const char *) xmlbuf,
547 : xmlStrlen((const xmlChar *) xmlbuf));
548 : }
549 0 : PG_CATCH();
550 : {
551 0 : if (xmlbuf)
552 0 : xmlFree((xmlChar *) xmlbuf);
553 :
554 0 : pg_xml_done(xmlerrcxt, true);
555 0 : PG_RE_THROW();
556 : }
557 30 : PG_END_TRY();
558 :
559 30 : xmlFree((xmlChar *) xmlbuf);
560 30 : pg_xml_done(xmlerrcxt, false);
561 :
562 30 : PG_RETURN_XML_P(result);
563 : #else
564 : NO_XML_SUPPORT();
565 : return 0;
566 : #endif /* not USE_LIBXML */
567 : }
568 :
569 :
570 : /*
571 : * TODO: xmlconcat needs to merge the notations and unparsed entities
572 : * of the argument values. Not very important in practice, though.
573 : */
574 : xmltype *
575 22318 : xmlconcat(List *args)
576 : {
577 : #ifdef USE_LIBXML
578 22318 : int global_standalone = 1;
579 22318 : xmlChar *global_version = NULL;
580 22318 : bool global_version_no_value = false;
581 : StringInfoData buf;
582 : ListCell *v;
583 :
584 22318 : initStringInfo(&buf);
585 66960 : foreach(v, args)
586 : {
587 44642 : xmltype *x = DatumGetXmlP(PointerGetDatum(lfirst(v)));
588 : size_t len;
589 : xmlChar *version;
590 : int standalone;
591 : char *str;
592 :
593 44642 : len = VARSIZE(x) - VARHDRSZ;
594 44642 : str = text_to_cstring((text *) x);
595 :
596 44642 : parse_xml_decl((xmlChar *) str, &len, &version, NULL, &standalone);
597 :
598 44642 : if (standalone == 0 && global_standalone == 1)
599 0 : global_standalone = 0;
600 44642 : if (standalone < 0)
601 44630 : global_standalone = -1;
602 :
603 44642 : if (!version)
604 44624 : global_version_no_value = true;
605 18 : else if (!global_version)
606 12 : global_version = version;
607 6 : else if (xmlStrcmp(version, global_version) != 0)
608 0 : global_version_no_value = true;
609 :
610 44642 : appendStringInfoString(&buf, str + len);
611 44642 : pfree(str);
612 : }
613 :
614 22318 : if (!global_version_no_value || global_standalone >= 0)
615 : {
616 : StringInfoData buf2;
617 :
618 6 : initStringInfo(&buf2);
619 :
620 6 : print_xml_decl(&buf2,
621 6 : (!global_version_no_value) ? global_version : NULL,
622 : 0,
623 : global_standalone);
624 :
625 6 : appendBinaryStringInfo(&buf2, buf.data, buf.len);
626 6 : buf = buf2;
627 : }
628 :
629 22318 : return stringinfo_to_xmltype(&buf);
630 : #else
631 : NO_XML_SUPPORT();
632 : return NULL;
633 : #endif
634 : }
635 :
636 :
637 : /*
638 : * XMLAGG support
639 : */
640 : Datum
641 22294 : xmlconcat2(PG_FUNCTION_ARGS)
642 : {
643 22294 : if (PG_ARGISNULL(0))
644 : {
645 18 : if (PG_ARGISNULL(1))
646 0 : PG_RETURN_NULL();
647 : else
648 18 : PG_RETURN_XML_P(PG_GETARG_XML_P(1));
649 : }
650 22276 : else if (PG_ARGISNULL(1))
651 0 : PG_RETURN_XML_P(PG_GETARG_XML_P(0));
652 : else
653 22276 : PG_RETURN_XML_P(xmlconcat(list_make2(PG_GETARG_XML_P(0),
654 : PG_GETARG_XML_P(1))));
655 : }
656 :
657 :
658 : Datum
659 6 : texttoxml(PG_FUNCTION_ARGS)
660 : {
661 6 : text *data = PG_GETARG_TEXT_PP(0);
662 :
663 6 : PG_RETURN_XML_P(xmlparse(data, xmloption, true));
664 : }
665 :
666 :
667 : Datum
668 0 : xmltotext(PG_FUNCTION_ARGS)
669 : {
670 0 : xmltype *data = PG_GETARG_XML_P(0);
671 :
672 : /* It's actually binary compatible. */
673 0 : PG_RETURN_TEXT_P((text *) data);
674 : }
675 :
676 :
677 : text *
678 180 : xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
679 : {
680 : #ifdef USE_LIBXML
681 : text *volatile result;
682 : xmlDocPtr doc;
683 : XmlOptionType parsed_xmloptiontype;
684 : xmlNodePtr content_nodes;
685 180 : volatile xmlBufferPtr buf = NULL;
686 180 : volatile xmlSaveCtxtPtr ctxt = NULL;
687 180 : ErrorSaveContext escontext = {T_ErrorSaveContext};
688 180 : PgXmlErrorContext *volatile xmlerrcxt = NULL;
689 : #endif
690 :
691 180 : if (xmloption_arg != XMLOPTION_DOCUMENT && !indent)
692 : {
693 : /*
694 : * We don't actually need to do anything, so just return the
695 : * binary-compatible input. For backwards-compatibility reasons,
696 : * allow such cases to succeed even without USE_LIBXML.
697 : */
698 36 : return (text *) data;
699 : }
700 :
701 : #ifdef USE_LIBXML
702 :
703 : /*
704 : * Parse the input according to the xmloption.
705 : *
706 : * preserve_whitespace is set to false in case we are indenting, otherwise
707 : * libxml2 will fail to indent elements that have whitespace between them.
708 : */
709 144 : doc = xml_parse(data, xmloption_arg, !indent, GetDatabaseEncoding(),
710 : &parsed_xmloptiontype, &content_nodes,
711 144 : (Node *) &escontext);
712 144 : if (doc == NULL || escontext.error_occurred)
713 : {
714 30 : if (doc)
715 0 : xmlFreeDoc(doc);
716 : /* A soft error must be failure to conform to XMLOPTION_DOCUMENT */
717 30 : ereport(ERROR,
718 : (errcode(ERRCODE_NOT_AN_XML_DOCUMENT),
719 : errmsg("not an XML document")));
720 : }
721 :
722 : /* If we weren't asked to indent, we're done. */
723 114 : if (!indent)
724 : {
725 18 : xmlFreeDoc(doc);
726 18 : return (text *) data;
727 : }
728 :
729 : /*
730 : * Otherwise, we gotta spin up some error handling. Unlike most other
731 : * routines in this module, we already have a libxml "doc" structure to
732 : * free, so we need to call pg_xml_init() inside the PG_TRY and be
733 : * prepared for it to fail (typically due to palloc OOM).
734 : */
735 96 : PG_TRY();
736 : {
737 96 : size_t decl_len = 0;
738 :
739 96 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
740 :
741 : /* The serialized data will go into this buffer. */
742 96 : buf = xmlBufferCreate();
743 :
744 96 : if (buf == NULL || xmlerrcxt->err_occurred)
745 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
746 : "could not allocate xmlBuffer");
747 :
748 : /* Detect whether there's an XML declaration */
749 96 : parse_xml_decl(xml_text2xmlChar(data), &decl_len, NULL, NULL, NULL);
750 :
751 : /*
752 : * Emit declaration only if the input had one. Note: some versions of
753 : * xmlSaveToBuffer leak memory if a non-null encoding argument is
754 : * passed, so don't do that. We don't want any encoding conversion
755 : * anyway.
756 : */
757 96 : if (decl_len == 0)
758 84 : ctxt = xmlSaveToBuffer(buf, NULL,
759 : XML_SAVE_NO_DECL | XML_SAVE_FORMAT);
760 : else
761 12 : ctxt = xmlSaveToBuffer(buf, NULL,
762 : XML_SAVE_FORMAT);
763 :
764 96 : if (ctxt == NULL || xmlerrcxt->err_occurred)
765 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
766 : "could not allocate xmlSaveCtxt");
767 :
768 96 : if (parsed_xmloptiontype == XMLOPTION_DOCUMENT)
769 : {
770 : /* If it's a document, saving is easy. */
771 42 : if (xmlSaveDoc(ctxt, doc) == -1 || xmlerrcxt->err_occurred)
772 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
773 : "could not save document to xmlBuffer");
774 : }
775 54 : else if (content_nodes != NULL)
776 : {
777 : /*
778 : * Deal with the case where we have non-singly-rooted XML.
779 : * libxml's dump functions don't work well for that without help.
780 : * We build a fake root node that serves as a container for the
781 : * content nodes, and then iterate over the nodes.
782 : */
783 : xmlNodePtr root;
784 : xmlNodePtr oldroot;
785 : xmlNodePtr newline;
786 :
787 48 : root = xmlNewNode(NULL, (const xmlChar *) "content-root");
788 48 : if (root == NULL || xmlerrcxt->err_occurred)
789 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
790 : "could not allocate xml node");
791 :
792 : /*
793 : * This attaches root to doc, so we need not free it separately...
794 : * but instead, we have to free the old root if there was one.
795 : */
796 48 : oldroot = xmlDocSetRootElement(doc, root);
797 48 : if (oldroot != NULL)
798 48 : xmlFreeNode(oldroot);
799 :
800 48 : if (xmlAddChildList(root, content_nodes) == NULL ||
801 48 : xmlerrcxt->err_occurred)
802 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
803 : "could not append xml node list");
804 :
805 : /*
806 : * We use this node to insert newlines in the dump. Note: in at
807 : * least some libxml versions, xmlNewDocText would not attach the
808 : * node to the document even if we passed it. Therefore, manage
809 : * freeing of this node manually, and pass NULL here to make sure
810 : * there's not a dangling link.
811 : */
812 48 : newline = xmlNewDocText(NULL, (const xmlChar *) "\n");
813 48 : if (newline == NULL || xmlerrcxt->err_occurred)
814 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
815 : "could not allocate xml node");
816 :
817 126 : for (xmlNodePtr node = root->children; node; node = node->next)
818 : {
819 : /* insert newlines between nodes */
820 78 : if (node->type != XML_TEXT_NODE && node->prev != NULL)
821 : {
822 24 : if (xmlSaveTree(ctxt, newline) == -1 || xmlerrcxt->err_occurred)
823 : {
824 0 : xmlFreeNode(newline);
825 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
826 : "could not save newline to xmlBuffer");
827 : }
828 : }
829 :
830 78 : if (xmlSaveTree(ctxt, node) == -1 || xmlerrcxt->err_occurred)
831 : {
832 0 : xmlFreeNode(newline);
833 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
834 : "could not save content to xmlBuffer");
835 : }
836 : }
837 :
838 48 : xmlFreeNode(newline);
839 : }
840 :
841 96 : if (xmlSaveClose(ctxt) == -1 || xmlerrcxt->err_occurred)
842 : {
843 0 : ctxt = NULL; /* don't try to close it again */
844 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
845 : "could not close xmlSaveCtxtPtr");
846 : }
847 :
848 : /*
849 : * xmlDocContentDumpOutput may add a trailing newline, so remove that.
850 : */
851 96 : if (xmloption_arg == XMLOPTION_DOCUMENT)
852 : {
853 36 : const char *str = (const char *) xmlBufferContent(buf);
854 36 : int len = xmlBufferLength(buf);
855 :
856 72 : while (len > 0 && (str[len - 1] == '\n' ||
857 36 : str[len - 1] == '\r'))
858 36 : len--;
859 :
860 36 : result = cstring_to_text_with_len(str, len);
861 : }
862 : else
863 60 : result = (text *) xmlBuffer_to_xmltype(buf);
864 : }
865 0 : PG_CATCH();
866 : {
867 0 : if (ctxt)
868 0 : xmlSaveClose(ctxt);
869 0 : if (buf)
870 0 : xmlBufferFree(buf);
871 0 : xmlFreeDoc(doc);
872 :
873 0 : if (xmlerrcxt)
874 0 : pg_xml_done(xmlerrcxt, true);
875 :
876 0 : PG_RE_THROW();
877 : }
878 96 : PG_END_TRY();
879 :
880 96 : xmlBufferFree(buf);
881 96 : xmlFreeDoc(doc);
882 :
883 96 : pg_xml_done(xmlerrcxt, false);
884 :
885 96 : return result;
886 : #else
887 : NO_XML_SUPPORT();
888 : return NULL;
889 : #endif
890 : }
891 :
892 :
893 : xmltype *
894 22450 : xmlelement(XmlExpr *xexpr,
895 : Datum *named_argvalue, bool *named_argnull,
896 : Datum *argvalue, bool *argnull)
897 : {
898 : #ifdef USE_LIBXML
899 : xmltype *result;
900 : List *named_arg_strings;
901 : List *arg_strings;
902 : int i;
903 : ListCell *arg;
904 : ListCell *narg;
905 : PgXmlErrorContext *xmlerrcxt;
906 22450 : volatile xmlBufferPtr buf = NULL;
907 22450 : volatile xmlTextWriterPtr writer = NULL;
908 :
909 : /*
910 : * All arguments are already evaluated, and their values are passed in the
911 : * named_argvalue/named_argnull or argvalue/argnull arrays. This avoids
912 : * issues if one of the arguments involves a call to some other function
913 : * or subsystem that wants to use libxml on its own terms. We examine the
914 : * original XmlExpr to identify the numbers and types of the arguments.
915 : */
916 22450 : named_arg_strings = NIL;
917 22450 : i = 0;
918 22498 : foreach(arg, xexpr->named_args)
919 : {
920 54 : Expr *e = (Expr *) lfirst(arg);
921 : char *str;
922 :
923 54 : if (named_argnull[i])
924 0 : str = NULL;
925 : else
926 54 : str = map_sql_value_to_xml_value(named_argvalue[i],
927 : exprType((Node *) e),
928 : false);
929 48 : named_arg_strings = lappend(named_arg_strings, str);
930 48 : i++;
931 : }
932 :
933 22444 : arg_strings = NIL;
934 22444 : i = 0;
935 44864 : foreach(arg, xexpr->args)
936 : {
937 22420 : Expr *e = (Expr *) lfirst(arg);
938 : char *str;
939 :
940 : /* here we can just forget NULL elements immediately */
941 22420 : if (!argnull[i])
942 : {
943 22420 : str = map_sql_value_to_xml_value(argvalue[i],
944 : exprType((Node *) e),
945 : true);
946 22420 : arg_strings = lappend(arg_strings, str);
947 : }
948 22420 : i++;
949 : }
950 :
951 22444 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
952 :
953 22444 : PG_TRY();
954 : {
955 22444 : buf = xmlBufferCreate();
956 22444 : if (buf == NULL || xmlerrcxt->err_occurred)
957 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
958 : "could not allocate xmlBuffer");
959 22444 : writer = xmlNewTextWriterMemory(buf, 0);
960 22444 : if (writer == NULL || xmlerrcxt->err_occurred)
961 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
962 : "could not allocate xmlTextWriter");
963 :
964 22444 : if (xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name) < 0 ||
965 22444 : xmlerrcxt->err_occurred)
966 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
967 : "could not start xml element");
968 :
969 22492 : forboth(arg, named_arg_strings, narg, xexpr->arg_names)
970 : {
971 48 : char *str = (char *) lfirst(arg);
972 48 : char *argname = strVal(lfirst(narg));
973 :
974 48 : if (str)
975 : {
976 48 : if (xmlTextWriterWriteAttribute(writer,
977 : (xmlChar *) argname,
978 48 : (xmlChar *) str) < 0 ||
979 48 : xmlerrcxt->err_occurred)
980 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
981 : "could not write xml attribute");
982 : }
983 : }
984 :
985 44864 : foreach(arg, arg_strings)
986 : {
987 22420 : char *str = (char *) lfirst(arg);
988 :
989 22420 : if (xmlTextWriterWriteRaw(writer, (xmlChar *) str) < 0 ||
990 22420 : xmlerrcxt->err_occurred)
991 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
992 : "could not write raw xml text");
993 : }
994 :
995 22444 : if (xmlTextWriterEndElement(writer) < 0 ||
996 22444 : xmlerrcxt->err_occurred)
997 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
998 : "could not end xml element");
999 :
1000 : /* we MUST do this now to flush data out to the buffer ... */
1001 22444 : xmlFreeTextWriter(writer);
1002 22444 : writer = NULL;
1003 :
1004 22444 : result = xmlBuffer_to_xmltype(buf);
1005 : }
1006 0 : PG_CATCH();
1007 : {
1008 0 : if (writer)
1009 0 : xmlFreeTextWriter(writer);
1010 0 : if (buf)
1011 0 : xmlBufferFree(buf);
1012 :
1013 0 : pg_xml_done(xmlerrcxt, true);
1014 :
1015 0 : PG_RE_THROW();
1016 : }
1017 22444 : PG_END_TRY();
1018 :
1019 22444 : xmlBufferFree(buf);
1020 :
1021 22444 : pg_xml_done(xmlerrcxt, false);
1022 :
1023 22444 : return result;
1024 : #else
1025 : NO_XML_SUPPORT();
1026 : return NULL;
1027 : #endif
1028 : }
1029 :
1030 :
1031 : xmltype *
1032 138 : xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace)
1033 : {
1034 : #ifdef USE_LIBXML
1035 : xmlDocPtr doc;
1036 :
1037 138 : doc = xml_parse(data, xmloption_arg, preserve_whitespace,
1038 : GetDatabaseEncoding(), NULL, NULL, NULL);
1039 90 : xmlFreeDoc(doc);
1040 :
1041 90 : return (xmltype *) data;
1042 : #else
1043 : NO_XML_SUPPORT();
1044 : return NULL;
1045 : #endif
1046 : }
1047 :
1048 :
1049 : xmltype *
1050 72 : xmlpi(const char *target, text *arg, bool arg_is_null, bool *result_is_null)
1051 : {
1052 : #ifdef USE_LIBXML
1053 : xmltype *result;
1054 : StringInfoData buf;
1055 :
1056 72 : if (pg_strcasecmp(target, "xml") == 0)
1057 12 : ereport(ERROR,
1058 : (errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION),
1059 : errmsg("invalid XML processing instruction"),
1060 : errdetail("XML processing instruction target name cannot be \"%s\".", target)));
1061 :
1062 : /*
1063 : * Following the SQL standard, the null check comes after the syntax check
1064 : * above.
1065 : */
1066 60 : *result_is_null = arg_is_null;
1067 60 : if (*result_is_null)
1068 12 : return NULL;
1069 :
1070 48 : initStringInfo(&buf);
1071 :
1072 48 : appendStringInfo(&buf, "<?%s", target);
1073 :
1074 48 : if (arg != NULL)
1075 : {
1076 : char *string;
1077 :
1078 24 : string = text_to_cstring(arg);
1079 24 : if (strstr(string, "?>") != NULL)
1080 6 : ereport(ERROR,
1081 : (errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION),
1082 : errmsg("invalid XML processing instruction"),
1083 : errdetail("XML processing instruction cannot contain \"?>\".")));
1084 :
1085 18 : appendStringInfoChar(&buf, ' ');
1086 18 : appendStringInfoString(&buf, string + strspn(string, " "));
1087 18 : pfree(string);
1088 : }
1089 42 : appendStringInfoString(&buf, "?>");
1090 :
1091 42 : result = stringinfo_to_xmltype(&buf);
1092 42 : pfree(buf.data);
1093 42 : return result;
1094 : #else
1095 : NO_XML_SUPPORT();
1096 : return NULL;
1097 : #endif
1098 : }
1099 :
1100 :
1101 : xmltype *
1102 60 : xmlroot(xmltype *data, text *version, int standalone)
1103 : {
1104 : #ifdef USE_LIBXML
1105 : char *str;
1106 : size_t len;
1107 : xmlChar *orig_version;
1108 : int orig_standalone;
1109 : StringInfoData buf;
1110 :
1111 60 : len = VARSIZE(data) - VARHDRSZ;
1112 60 : str = text_to_cstring((text *) data);
1113 :
1114 60 : parse_xml_decl((xmlChar *) str, &len, &orig_version, NULL, &orig_standalone);
1115 :
1116 60 : if (version)
1117 24 : orig_version = xml_text2xmlChar(version);
1118 : else
1119 36 : orig_version = NULL;
1120 :
1121 60 : switch (standalone)
1122 : {
1123 18 : case XML_STANDALONE_YES:
1124 18 : orig_standalone = 1;
1125 18 : break;
1126 12 : case XML_STANDALONE_NO:
1127 12 : orig_standalone = 0;
1128 12 : break;
1129 12 : case XML_STANDALONE_NO_VALUE:
1130 12 : orig_standalone = -1;
1131 12 : break;
1132 18 : case XML_STANDALONE_OMITTED:
1133 : /* leave original value */
1134 18 : break;
1135 : }
1136 :
1137 60 : initStringInfo(&buf);
1138 60 : print_xml_decl(&buf, orig_version, 0, orig_standalone);
1139 60 : appendStringInfoString(&buf, str + len);
1140 :
1141 60 : return stringinfo_to_xmltype(&buf);
1142 : #else
1143 : NO_XML_SUPPORT();
1144 : return NULL;
1145 : #endif
1146 : }
1147 :
1148 :
1149 : /*
1150 : * Validate document (given as string) against DTD (given as external link)
1151 : *
1152 : * This has been removed because it is a security hole: unprivileged users
1153 : * should not be able to use Postgres to fetch arbitrary external files,
1154 : * which unfortunately is exactly what libxml is willing to do with the DTD
1155 : * parameter.
1156 : */
1157 : Datum
1158 0 : xmlvalidate(PG_FUNCTION_ARGS)
1159 : {
1160 0 : ereport(ERROR,
1161 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1162 : errmsg("xmlvalidate is not implemented")));
1163 : return 0;
1164 : }
1165 :
1166 :
1167 : bool
1168 24 : xml_is_document(xmltype *arg)
1169 : {
1170 : #ifdef USE_LIBXML
1171 : xmlDocPtr doc;
1172 24 : ErrorSaveContext escontext = {T_ErrorSaveContext};
1173 :
1174 : /*
1175 : * We'll report "true" if no soft error is reported by xml_parse().
1176 : */
1177 24 : doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true,
1178 : GetDatabaseEncoding(), NULL, NULL, (Node *) &escontext);
1179 24 : if (doc)
1180 12 : xmlFreeDoc(doc);
1181 :
1182 24 : return !escontext.error_occurred;
1183 : #else /* not USE_LIBXML */
1184 : NO_XML_SUPPORT();
1185 : return false;
1186 : #endif /* not USE_LIBXML */
1187 : }
1188 :
1189 :
1190 : #ifdef USE_LIBXML
1191 :
1192 : /*
1193 : * pg_xml_init_library --- set up for use of libxml
1194 : *
1195 : * This should be called by each function that is about to use libxml
1196 : * facilities but doesn't require error handling. It initializes libxml
1197 : * and verifies compatibility with the loaded libxml version. These are
1198 : * once-per-session activities.
1199 : *
1200 : * TODO: xmlChar is utf8-char, make proper tuning (initdb with enc!=utf8 and
1201 : * check)
1202 : */
1203 : void
1204 94908 : pg_xml_init_library(void)
1205 : {
1206 : static bool first_time = true;
1207 :
1208 94908 : if (first_time)
1209 : {
1210 : /* Stuff we need do only once per session */
1211 :
1212 : /*
1213 : * Currently, we have no pure UTF-8 support for internals -- check if
1214 : * we can work.
1215 : */
1216 : if (sizeof(char) != sizeof(xmlChar))
1217 : ereport(ERROR,
1218 : (errmsg("could not initialize XML library"),
1219 : errdetail("libxml2 has incompatible char type: sizeof(char)=%zu, sizeof(xmlChar)=%zu.",
1220 : sizeof(char), sizeof(xmlChar))));
1221 :
1222 : #ifdef USE_LIBXMLCONTEXT
1223 : /* Set up libxml's memory allocation our way */
1224 : xml_memory_init();
1225 : #endif
1226 :
1227 : /* Check library compatibility */
1228 42 : LIBXML_TEST_VERSION;
1229 :
1230 42 : first_time = false;
1231 : }
1232 94908 : }
1233 :
1234 : /*
1235 : * pg_xml_init --- set up for use of libxml and register an error handler
1236 : *
1237 : * This should be called by each function that is about to use libxml
1238 : * facilities and requires error handling. It initializes libxml with
1239 : * pg_xml_init_library() and establishes our libxml error handler.
1240 : *
1241 : * strictness determines which errors are reported and which are ignored.
1242 : *
1243 : * Calls to this function MUST be followed by a PG_TRY block that guarantees
1244 : * that pg_xml_done() is called during either normal or error exit.
1245 : *
1246 : * This is exported for use by contrib/xml2, as well as other code that might
1247 : * wish to share use of this module's libxml error handler.
1248 : */
1249 : PgXmlErrorContext *
1250 24782 : pg_xml_init(PgXmlStrictness strictness)
1251 : {
1252 : PgXmlErrorContext *errcxt;
1253 : void *new_errcxt;
1254 :
1255 : /* Do one-time setup if needed */
1256 24782 : pg_xml_init_library();
1257 :
1258 : /* Create error handling context structure */
1259 24782 : errcxt = (PgXmlErrorContext *) palloc(sizeof(PgXmlErrorContext));
1260 24782 : errcxt->magic = ERRCXT_MAGIC;
1261 24782 : errcxt->strictness = strictness;
1262 24782 : errcxt->err_occurred = false;
1263 24782 : initStringInfo(&errcxt->err_buf);
1264 :
1265 : /*
1266 : * Save original error handler and install ours. libxml originally didn't
1267 : * distinguish between the contexts for generic and for structured error
1268 : * handlers. If we're using an old libxml version, we must thus save the
1269 : * generic error context, even though we're using a structured error
1270 : * handler.
1271 : */
1272 24782 : errcxt->saved_errfunc = xmlStructuredError;
1273 :
1274 : #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1275 24782 : errcxt->saved_errcxt = xmlStructuredErrorContext;
1276 : #else
1277 : errcxt->saved_errcxt = xmlGenericErrorContext;
1278 : #endif
1279 :
1280 24782 : xmlSetStructuredErrorFunc(errcxt, xml_errorHandler);
1281 :
1282 : /*
1283 : * Verify that xmlSetStructuredErrorFunc set the context variable we
1284 : * expected it to. If not, the error context pointer we just saved is not
1285 : * the correct thing to restore, and since that leaves us without a way to
1286 : * restore the context in pg_xml_done, we must fail.
1287 : *
1288 : * The only known situation in which this test fails is if we compile with
1289 : * headers from a libxml2 that doesn't track the structured error context
1290 : * separately (< 2.7.4), but at runtime use a version that does, or vice
1291 : * versa. The libxml2 authors did not treat that change as constituting
1292 : * an ABI break, so the LIBXML_TEST_VERSION test in pg_xml_init_library
1293 : * fails to protect us from this.
1294 : */
1295 :
1296 : #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1297 24782 : new_errcxt = xmlStructuredErrorContext;
1298 : #else
1299 : new_errcxt = xmlGenericErrorContext;
1300 : #endif
1301 :
1302 24782 : if (new_errcxt != errcxt)
1303 0 : ereport(ERROR,
1304 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1305 : errmsg("could not set up XML error handler"),
1306 : errhint("This probably indicates that the version of libxml2"
1307 : " being used is not compatible with the libxml2"
1308 : " header files that PostgreSQL was built with.")));
1309 :
1310 : /*
1311 : * Also, install an entity loader to prevent unwanted fetches of external
1312 : * files and URLs.
1313 : */
1314 24782 : errcxt->saved_entityfunc = xmlGetExternalEntityLoader();
1315 24782 : xmlSetExternalEntityLoader(xmlPgEntityLoader);
1316 :
1317 24782 : return errcxt;
1318 : }
1319 :
1320 :
1321 : /*
1322 : * pg_xml_done --- restore previous libxml error handling
1323 : *
1324 : * Resets libxml's global error-handling state to what it was before
1325 : * pg_xml_init() was called.
1326 : *
1327 : * This routine verifies that all pending errors have been dealt with
1328 : * (in assert-enabled builds, anyway).
1329 : */
1330 : void
1331 24782 : pg_xml_done(PgXmlErrorContext *errcxt, bool isError)
1332 : {
1333 : void *cur_errcxt;
1334 :
1335 : /* An assert seems like enough protection here */
1336 : Assert(errcxt->magic == ERRCXT_MAGIC);
1337 :
1338 : /*
1339 : * In a normal exit, there should be no un-handled libxml errors. But we
1340 : * shouldn't try to enforce this during error recovery, since the longjmp
1341 : * could have been thrown before xml_ereport had a chance to run.
1342 : */
1343 : Assert(!errcxt->err_occurred || isError);
1344 :
1345 : /*
1346 : * Check that libxml's global state is correct, warn if not. This is a
1347 : * real test and not an Assert because it has a higher probability of
1348 : * happening.
1349 : */
1350 : #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1351 24782 : cur_errcxt = xmlStructuredErrorContext;
1352 : #else
1353 : cur_errcxt = xmlGenericErrorContext;
1354 : #endif
1355 :
1356 24782 : if (cur_errcxt != errcxt)
1357 0 : elog(WARNING, "libxml error handling state is out of sync with xml.c");
1358 :
1359 : /* Restore the saved handlers */
1360 24782 : xmlSetStructuredErrorFunc(errcxt->saved_errcxt, errcxt->saved_errfunc);
1361 24782 : xmlSetExternalEntityLoader(errcxt->saved_entityfunc);
1362 :
1363 : /*
1364 : * Mark the struct as invalid, just in case somebody somehow manages to
1365 : * call xml_errorHandler or xml_ereport with it.
1366 : */
1367 24782 : errcxt->magic = 0;
1368 :
1369 : /* Release memory */
1370 24782 : pfree(errcxt->err_buf.data);
1371 24782 : pfree(errcxt);
1372 24782 : }
1373 :
1374 :
1375 : /*
1376 : * pg_xml_error_occurred() --- test the error flag
1377 : */
1378 : bool
1379 78 : pg_xml_error_occurred(PgXmlErrorContext *errcxt)
1380 : {
1381 78 : return errcxt->err_occurred;
1382 : }
1383 :
1384 :
1385 : /*
1386 : * SQL/XML allows storing "XML documents" or "XML content". "XML
1387 : * documents" are specified by the XML specification and are parsed
1388 : * easily by libxml. "XML content" is specified by SQL/XML as the
1389 : * production "XMLDecl? content". But libxml can only parse the
1390 : * "content" part, so we have to parse the XML declaration ourselves
1391 : * to complete this.
1392 : */
1393 :
1394 : #define CHECK_XML_SPACE(p) \
1395 : do { \
1396 : if (!xmlIsBlank_ch(*(p))) \
1397 : return XML_ERR_SPACE_REQUIRED; \
1398 : } while (0)
1399 :
1400 : #define SKIP_XML_SPACE(p) \
1401 : while (xmlIsBlank_ch(*(p))) (p)++
1402 :
1403 : /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
1404 : /* Beware of multiple evaluations of argument! */
1405 : #define PG_XMLISNAMECHAR(c) \
1406 : (xmlIsBaseChar_ch(c) || xmlIsIdeographicQ(c) \
1407 : || xmlIsDigit_ch(c) \
1408 : || c == '.' || c == '-' || c == '_' || c == ':' \
1409 : || xmlIsCombiningQ(c) \
1410 : || xmlIsExtender_ch(c))
1411 :
1412 : /* pnstrdup, but deal with xmlChar not char; len is measured in xmlChars */
1413 : static xmlChar *
1414 192 : xml_pnstrdup(const xmlChar *str, size_t len)
1415 : {
1416 : xmlChar *result;
1417 :
1418 192 : result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
1419 192 : memcpy(result, str, len * sizeof(xmlChar));
1420 192 : result[len] = 0;
1421 192 : return result;
1422 : }
1423 :
1424 : /* Ditto, except input is char* */
1425 : static xmlChar *
1426 2424 : pg_xmlCharStrndup(const char *str, size_t len)
1427 : {
1428 : xmlChar *result;
1429 :
1430 2424 : result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
1431 2424 : memcpy(result, str, len);
1432 2424 : result[len] = '\0';
1433 :
1434 2424 : return result;
1435 : }
1436 :
1437 : /*
1438 : * Copy xmlChar string to PostgreSQL-owned memory, freeing the input.
1439 : *
1440 : * The input xmlChar is freed regardless of success of the copy.
1441 : */
1442 : static char *
1443 112826 : xml_pstrdup_and_free(xmlChar *str)
1444 : {
1445 : char *result;
1446 :
1447 112826 : if (str)
1448 : {
1449 112826 : PG_TRY();
1450 : {
1451 112826 : result = pstrdup((char *) str);
1452 : }
1453 0 : PG_FINALLY();
1454 : {
1455 112826 : xmlFree(str);
1456 : }
1457 112826 : PG_END_TRY();
1458 : }
1459 : else
1460 0 : result = NULL;
1461 :
1462 112826 : return result;
1463 : }
1464 :
1465 : /*
1466 : * str is the null-terminated input string. Remaining arguments are
1467 : * output arguments; each can be NULL if value is not wanted.
1468 : * version and encoding are returned as locally-palloc'd strings.
1469 : * Result is 0 if OK, an error code if not.
1470 : */
1471 : static int
1472 70126 : parse_xml_decl(const xmlChar *str, size_t *lenp,
1473 : xmlChar **version, xmlChar **encoding, int *standalone)
1474 : {
1475 : const xmlChar *p;
1476 : const xmlChar *save_p;
1477 : size_t len;
1478 : int utf8char;
1479 : int utf8len;
1480 :
1481 : /*
1482 : * Only initialize libxml. We don't need error handling here, but we do
1483 : * need to make sure libxml is initialized before calling any of its
1484 : * functions. Note that this is safe (and a no-op) if caller has already
1485 : * done pg_xml_init().
1486 : */
1487 70126 : pg_xml_init_library();
1488 :
1489 : /* Initialize output arguments to "not present" */
1490 70126 : if (version)
1491 69478 : *version = NULL;
1492 70126 : if (encoding)
1493 0 : *encoding = NULL;
1494 70126 : if (standalone)
1495 69478 : *standalone = -1;
1496 :
1497 70126 : p = str;
1498 :
1499 70126 : if (xmlStrncmp(p, (xmlChar *) "<?xml", 5) != 0)
1500 69904 : goto finished;
1501 :
1502 : /*
1503 : * If next char is a name char, it's a PI like <?xml-stylesheet ...?>
1504 : * rather than an XMLDecl, so we have done what we came to do and found no
1505 : * XMLDecl.
1506 : *
1507 : * We need an input length value for xmlGetUTF8Char, but there's no need
1508 : * to count the whole document size, so use strnlen not strlen.
1509 : */
1510 222 : utf8len = strnlen((const char *) (p + 5), MAX_MULTIBYTE_CHAR_LEN);
1511 222 : utf8char = xmlGetUTF8Char(p + 5, &utf8len);
1512 222 : if (PG_XMLISNAMECHAR(utf8char))
1513 12 : goto finished;
1514 :
1515 210 : p += 5;
1516 :
1517 : /* version */
1518 210 : CHECK_XML_SPACE(p);
1519 420 : SKIP_XML_SPACE(p);
1520 210 : if (xmlStrncmp(p, (xmlChar *) "version", 7) != 0)
1521 0 : return XML_ERR_VERSION_MISSING;
1522 210 : p += 7;
1523 210 : SKIP_XML_SPACE(p);
1524 210 : if (*p != '=')
1525 0 : return XML_ERR_VERSION_MISSING;
1526 210 : p += 1;
1527 210 : SKIP_XML_SPACE(p);
1528 :
1529 210 : if (*p == '\'' || *p == '"')
1530 210 : {
1531 : const xmlChar *q;
1532 :
1533 210 : q = xmlStrchr(p + 1, *p);
1534 210 : if (!q)
1535 0 : return XML_ERR_VERSION_MISSING;
1536 :
1537 210 : if (version)
1538 192 : *version = xml_pnstrdup(p + 1, q - p - 1);
1539 210 : p = q + 1;
1540 : }
1541 : else
1542 0 : return XML_ERR_VERSION_MISSING;
1543 :
1544 : /* encoding */
1545 210 : save_p = p;
1546 372 : SKIP_XML_SPACE(p);
1547 210 : if (xmlStrncmp(p, (xmlChar *) "encoding", 8) == 0)
1548 : {
1549 54 : CHECK_XML_SPACE(save_p);
1550 54 : p += 8;
1551 54 : SKIP_XML_SPACE(p);
1552 54 : if (*p != '=')
1553 0 : return XML_ERR_MISSING_ENCODING;
1554 54 : p += 1;
1555 54 : SKIP_XML_SPACE(p);
1556 :
1557 54 : if (*p == '\'' || *p == '"')
1558 54 : {
1559 : const xmlChar *q;
1560 :
1561 54 : q = xmlStrchr(p + 1, *p);
1562 54 : if (!q)
1563 0 : return XML_ERR_MISSING_ENCODING;
1564 :
1565 54 : if (encoding)
1566 0 : *encoding = xml_pnstrdup(p + 1, q - p - 1);
1567 54 : p = q + 1;
1568 : }
1569 : else
1570 0 : return XML_ERR_MISSING_ENCODING;
1571 : }
1572 : else
1573 : {
1574 156 : p = save_p;
1575 : }
1576 :
1577 : /* standalone */
1578 210 : save_p = p;
1579 318 : SKIP_XML_SPACE(p);
1580 210 : if (xmlStrncmp(p, (xmlChar *) "standalone", 10) == 0)
1581 : {
1582 108 : CHECK_XML_SPACE(save_p);
1583 108 : p += 10;
1584 108 : SKIP_XML_SPACE(p);
1585 108 : if (*p != '=')
1586 0 : return XML_ERR_STANDALONE_VALUE;
1587 108 : p += 1;
1588 108 : SKIP_XML_SPACE(p);
1589 216 : if (xmlStrncmp(p, (xmlChar *) "'yes'", 5) == 0 ||
1590 108 : xmlStrncmp(p, (xmlChar *) "\"yes\"", 5) == 0)
1591 : {
1592 60 : if (standalone)
1593 60 : *standalone = 1;
1594 60 : p += 5;
1595 : }
1596 96 : else if (xmlStrncmp(p, (xmlChar *) "'no'", 4) == 0 ||
1597 48 : xmlStrncmp(p, (xmlChar *) "\"no\"", 4) == 0)
1598 : {
1599 36 : if (standalone)
1600 36 : *standalone = 0;
1601 36 : p += 4;
1602 : }
1603 : else
1604 12 : return XML_ERR_STANDALONE_VALUE;
1605 : }
1606 : else
1607 : {
1608 102 : p = save_p;
1609 : }
1610 :
1611 198 : SKIP_XML_SPACE(p);
1612 198 : if (xmlStrncmp(p, (xmlChar *) "?>", 2) != 0)
1613 0 : return XML_ERR_XMLDECL_NOT_FINISHED;
1614 198 : p += 2;
1615 :
1616 70114 : finished:
1617 70114 : len = p - str;
1618 :
1619 76846 : for (p = str; p < str + len; p++)
1620 6732 : if (*p > 127)
1621 0 : return XML_ERR_INVALID_CHAR;
1622 :
1623 70114 : if (lenp)
1624 70114 : *lenp = len;
1625 :
1626 70114 : return XML_ERR_OK;
1627 : }
1628 :
1629 :
1630 : /*
1631 : * Write an XML declaration. On output, we adjust the XML declaration
1632 : * as follows. (These rules are the moral equivalent of the clause
1633 : * "Serialization of an XML value" in the SQL standard.)
1634 : *
1635 : * We try to avoid generating an XML declaration if possible. This is
1636 : * so that you don't get trivial things like xml '<foo/>' resulting in
1637 : * '<?xml version="1.0"?><foo/>', which would surely be annoying. We
1638 : * must provide a declaration if the standalone property is specified
1639 : * or if we include an encoding declaration. If we have a
1640 : * declaration, we must specify a version (XML requires this).
1641 : * Otherwise we only make a declaration if the version is not "1.0",
1642 : * which is the default version specified in SQL:2003.
1643 : */
1644 : static bool
1645 23818 : print_xml_decl(StringInfo buf, const xmlChar *version,
1646 : pg_enc encoding, int standalone)
1647 : {
1648 23818 : if ((version && strcmp((const char *) version, PG_XML_DEFAULT_VERSION) != 0)
1649 23782 : || (encoding && encoding != PG_UTF8)
1650 23782 : || standalone != -1)
1651 : {
1652 96 : appendStringInfoString(buf, "<?xml");
1653 :
1654 96 : if (version)
1655 72 : appendStringInfo(buf, " version=\"%s\"", version);
1656 : else
1657 24 : appendStringInfo(buf, " version=\"%s\"", PG_XML_DEFAULT_VERSION);
1658 :
1659 96 : if (encoding && encoding != PG_UTF8)
1660 : {
1661 : /*
1662 : * XXX might be useful to convert this to IANA names (ISO-8859-1
1663 : * instead of LATIN1 etc.); needs field experience
1664 : */
1665 0 : appendStringInfo(buf, " encoding=\"%s\"",
1666 : pg_encoding_to_char(encoding));
1667 : }
1668 :
1669 96 : if (standalone == 1)
1670 48 : appendStringInfoString(buf, " standalone=\"yes\"");
1671 48 : else if (standalone == 0)
1672 24 : appendStringInfoString(buf, " standalone=\"no\"");
1673 96 : appendStringInfoString(buf, "?>");
1674 :
1675 96 : return true;
1676 : }
1677 : else
1678 23722 : return false;
1679 : }
1680 :
1681 : /*
1682 : * Test whether an input that is to be parsed as CONTENT contains a DTD.
1683 : *
1684 : * The SQL/XML:2003 definition of CONTENT ("XMLDecl? content") is not
1685 : * satisfied by a document with a DTD, which is a bit of a wart, as it means
1686 : * the CONTENT type is not a proper superset of DOCUMENT. SQL/XML:2006 and
1687 : * later fix that, by redefining content with reference to the "more
1688 : * permissive" Document Node of the XQuery/XPath Data Model, such that any
1689 : * DOCUMENT value is indeed also a CONTENT value. That definition is more
1690 : * useful, as CONTENT becomes usable for parsing input of unknown form (think
1691 : * pg_restore).
1692 : *
1693 : * As used below in parse_xml when parsing for CONTENT, libxml does not give
1694 : * us the 2006+ behavior, but only the 2003; it will choke if the input has
1695 : * a DTD. But we can provide the 2006+ definition of CONTENT easily enough,
1696 : * by detecting this case first and simply doing the parse as DOCUMENT.
1697 : *
1698 : * A DTD can be found arbitrarily far in, but that would be a contrived case;
1699 : * it will ordinarily start within a few dozen characters. The only things
1700 : * that can precede it are an XMLDecl (here, the caller will have called
1701 : * parse_xml_decl already), whitespace, comments, and processing instructions.
1702 : * This function need only return true if it sees a valid sequence of such
1703 : * things leading to <!DOCTYPE. It can simply return false in any other
1704 : * cases, including malformed input; that will mean the input gets parsed as
1705 : * CONTENT as originally planned, with libxml reporting any errors.
1706 : *
1707 : * This is only to be called from xml_parse, when pg_xml_init has already
1708 : * been called. The input is already in UTF8 encoding.
1709 : */
1710 : static bool
1711 1012 : xml_doctype_in_content(const xmlChar *str)
1712 : {
1713 1012 : const xmlChar *p = str;
1714 :
1715 : for (;;)
1716 36 : {
1717 : const xmlChar *e;
1718 :
1719 1138 : SKIP_XML_SPACE(p);
1720 1048 : if (*p != '<')
1721 214 : return false;
1722 834 : p++;
1723 :
1724 834 : if (*p == '!')
1725 : {
1726 72 : p++;
1727 :
1728 : /* if we see <!DOCTYPE, we can return true */
1729 72 : if (xmlStrncmp(p, (xmlChar *) "DOCTYPE", 7) == 0)
1730 42 : return true;
1731 :
1732 : /* otherwise, if it's not a comment, fail */
1733 30 : if (xmlStrncmp(p, (xmlChar *) "--", 2) != 0)
1734 0 : return false;
1735 : /* find end of comment: find -- and a > must follow */
1736 30 : p = xmlStrstr(p + 2, (xmlChar *) "--");
1737 30 : if (!p || p[2] != '>')
1738 0 : return false;
1739 : /* advance over comment, and keep scanning */
1740 30 : p += 3;
1741 30 : continue;
1742 : }
1743 :
1744 : /* otherwise, if it's not a PI <?target something?>, fail */
1745 762 : if (*p != '?')
1746 756 : return false;
1747 6 : p++;
1748 :
1749 : /* find end of PI (the string ?> is forbidden within a PI) */
1750 6 : e = xmlStrstr(p, (xmlChar *) "?>");
1751 6 : if (!e)
1752 0 : return false;
1753 :
1754 : /* advance over PI, keep scanning */
1755 6 : p = e + 2;
1756 : }
1757 : }
1758 :
1759 :
1760 : /*
1761 : * Convert a text object to XML internal representation
1762 : *
1763 : * data is the source data (must not be toasted!), encoding is its encoding,
1764 : * and xmloption_arg and preserve_whitespace are options for the
1765 : * transformation.
1766 : *
1767 : * If parsed_xmloptiontype isn't NULL, *parsed_xmloptiontype is set to the
1768 : * XmlOptionType actually used to parse the input (typically the same as
1769 : * xmloption_arg, but a DOCTYPE node in the input can force DOCUMENT mode).
1770 : *
1771 : * If parsed_nodes isn't NULL and we parse in CONTENT mode, the list
1772 : * of parsed nodes from the xmlParseInNodeContext call will be returned
1773 : * to *parsed_nodes. (It is caller's responsibility to free that.)
1774 : *
1775 : * Errors normally result in ereport(ERROR), but if escontext is an
1776 : * ErrorSaveContext, then "safe" errors are reported there instead, and the
1777 : * caller must check SOFT_ERROR_OCCURRED() to see whether that happened.
1778 : *
1779 : * Note: it is caller's responsibility to xmlFreeDoc() the result,
1780 : * else a permanent memory leak will ensue! But note the result could
1781 : * be NULL after a soft error.
1782 : *
1783 : * TODO maybe libxml2's xmlreader is better? (do not construct DOM,
1784 : * yet do not use SAX - see xmlreader.c)
1785 : */
1786 : static xmlDocPtr
1787 1318 : xml_parse(text *data, XmlOptionType xmloption_arg,
1788 : bool preserve_whitespace, int encoding,
1789 : XmlOptionType *parsed_xmloptiontype, xmlNodePtr *parsed_nodes,
1790 : Node *escontext)
1791 : {
1792 : int32 len;
1793 : xmlChar *string;
1794 : xmlChar *utf8string;
1795 : PgXmlErrorContext *xmlerrcxt;
1796 1318 : volatile xmlParserCtxtPtr ctxt = NULL;
1797 1318 : volatile xmlDocPtr doc = NULL;
1798 :
1799 : /*
1800 : * This step looks annoyingly redundant, but we must do it to have a
1801 : * null-terminated string in case encoding conversion isn't required.
1802 : */
1803 1318 : len = VARSIZE_ANY_EXHDR(data); /* will be useful later */
1804 1318 : string = xml_text2xmlChar(data);
1805 :
1806 : /*
1807 : * If the data isn't UTF8, we must translate before giving it to libxml.
1808 : *
1809 : * XXX ideally, we'd catch any encoding conversion failure and return a
1810 : * soft error. However, failure to convert to UTF8 should be pretty darn
1811 : * rare, so for now this is left undone.
1812 : */
1813 1318 : utf8string = pg_do_encoding_conversion(string,
1814 : len,
1815 : encoding,
1816 : PG_UTF8);
1817 :
1818 : /* Start up libxml and its parser */
1819 1318 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_WELLFORMED);
1820 :
1821 : /* Use a TRY block to ensure we clean up correctly */
1822 1318 : PG_TRY();
1823 : {
1824 1318 : bool parse_as_document = false;
1825 : int options;
1826 : int res_code;
1827 1318 : size_t count = 0;
1828 1318 : xmlChar *version = NULL;
1829 1318 : int standalone = 0;
1830 :
1831 : /* Any errors here are reported as hard ereport's */
1832 1318 : xmlInitParser();
1833 :
1834 : /* Decide whether to parse as document or content */
1835 1318 : if (xmloption_arg == XMLOPTION_DOCUMENT)
1836 294 : parse_as_document = true;
1837 : else
1838 : {
1839 : /* Parse and skip over the XML declaration, if any */
1840 1024 : res_code = parse_xml_decl(utf8string,
1841 : &count, &version, NULL, &standalone);
1842 1024 : if (res_code != 0)
1843 : {
1844 12 : errsave(escontext,
1845 : errcode(ERRCODE_INVALID_XML_CONTENT),
1846 : errmsg_internal("invalid XML content: invalid XML declaration"),
1847 : errdetail_for_xml_code(res_code));
1848 12 : goto fail;
1849 : }
1850 :
1851 : /* Is there a DOCTYPE element? */
1852 1012 : if (xml_doctype_in_content(utf8string + count))
1853 42 : parse_as_document = true;
1854 : }
1855 :
1856 : /*
1857 : * Select parse options.
1858 : *
1859 : * Note that here we try to apply DTD defaults (XML_PARSE_DTDATTR)
1860 : * according to SQL/XML:2008 GR 10.16.7.d: 'Default values defined by
1861 : * internal DTD are applied'. As for external DTDs, we try to support
1862 : * them too (see SQL/XML:2008 GR 10.16.7.e), but that doesn't really
1863 : * happen because xmlPgEntityLoader prevents it.
1864 : */
1865 1306 : options = XML_PARSE_NOENT | XML_PARSE_DTDATTR
1866 1306 : | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS);
1867 :
1868 : /* initialize output parameters */
1869 1306 : if (parsed_xmloptiontype != NULL)
1870 144 : *parsed_xmloptiontype = parse_as_document ? XMLOPTION_DOCUMENT :
1871 : XMLOPTION_CONTENT;
1872 1306 : if (parsed_nodes != NULL)
1873 144 : *parsed_nodes = NULL;
1874 :
1875 1306 : if (parse_as_document)
1876 : {
1877 336 : ctxt = xmlNewParserCtxt();
1878 336 : if (ctxt == NULL || xmlerrcxt->err_occurred)
1879 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1880 : "could not allocate parser context");
1881 :
1882 336 : doc = xmlCtxtReadDoc(ctxt, utf8string,
1883 : NULL, /* no URL */
1884 : "UTF-8",
1885 : options);
1886 :
1887 336 : if (doc == NULL || xmlerrcxt->err_occurred)
1888 : {
1889 : /* Use original option to decide which error code to report */
1890 144 : if (xmloption_arg == XMLOPTION_DOCUMENT)
1891 138 : xml_errsave(escontext, xmlerrcxt,
1892 : ERRCODE_INVALID_XML_DOCUMENT,
1893 : "invalid XML document");
1894 : else
1895 6 : xml_errsave(escontext, xmlerrcxt,
1896 : ERRCODE_INVALID_XML_CONTENT,
1897 : "invalid XML content");
1898 96 : goto fail;
1899 : }
1900 : }
1901 : else
1902 : {
1903 : xmlNodePtr root;
1904 : xmlNodePtr oldroot PG_USED_FOR_ASSERTS_ONLY;
1905 :
1906 : /* set up document with empty root node to be the context node */
1907 970 : doc = xmlNewDoc(version);
1908 970 : if (doc == NULL || xmlerrcxt->err_occurred)
1909 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1910 : "could not allocate XML document");
1911 :
1912 : Assert(doc->encoding == NULL);
1913 970 : doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
1914 970 : if (doc->encoding == NULL || xmlerrcxt->err_occurred)
1915 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1916 : "could not allocate XML document");
1917 970 : doc->standalone = standalone;
1918 :
1919 970 : root = xmlNewNode(NULL, (const xmlChar *) "content-root");
1920 970 : if (root == NULL || xmlerrcxt->err_occurred)
1921 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1922 : "could not allocate xml node");
1923 :
1924 : /*
1925 : * This attaches root to doc, so we need not free it separately;
1926 : * and there can't yet be any old root to free.
1927 : */
1928 970 : oldroot = xmlDocSetRootElement(doc, root);
1929 : Assert(oldroot == NULL);
1930 :
1931 : /* allow empty content */
1932 970 : if (*(utf8string + count))
1933 : {
1934 946 : xmlNodePtr node_list = NULL;
1935 : xmlParserErrors res;
1936 :
1937 1892 : res = xmlParseInNodeContext(root,
1938 : (char *) utf8string + count,
1939 946 : strlen((char *) utf8string + count),
1940 : options,
1941 : &node_list);
1942 :
1943 946 : if (res != XML_ERR_OK || xmlerrcxt->err_occurred)
1944 : {
1945 60 : xmlFreeNodeList(node_list);
1946 60 : xml_errsave(escontext, xmlerrcxt,
1947 : ERRCODE_INVALID_XML_CONTENT,
1948 : "invalid XML content");
1949 12 : goto fail;
1950 : }
1951 :
1952 886 : if (parsed_nodes != NULL)
1953 48 : *parsed_nodes = node_list;
1954 : else
1955 838 : xmlFreeNodeList(node_list);
1956 : }
1957 : }
1958 :
1959 1222 : fail:
1960 : ;
1961 : }
1962 96 : PG_CATCH();
1963 : {
1964 96 : if (doc != NULL)
1965 48 : xmlFreeDoc(doc);
1966 96 : if (ctxt != NULL)
1967 48 : xmlFreeParserCtxt(ctxt);
1968 :
1969 96 : pg_xml_done(xmlerrcxt, true);
1970 :
1971 96 : PG_RE_THROW();
1972 : }
1973 1222 : PG_END_TRY();
1974 :
1975 1222 : if (ctxt != NULL)
1976 288 : xmlFreeParserCtxt(ctxt);
1977 :
1978 1222 : pg_xml_done(xmlerrcxt, false);
1979 :
1980 1222 : return doc;
1981 : }
1982 :
1983 :
1984 : /*
1985 : * xmlChar<->text conversions
1986 : */
1987 : static xmlChar *
1988 1468 : xml_text2xmlChar(text *in)
1989 : {
1990 1468 : return (xmlChar *) text_to_cstring(in);
1991 : }
1992 :
1993 :
1994 : #ifdef USE_LIBXMLCONTEXT
1995 :
1996 : /*
1997 : * Manage the special context used for all libxml allocations (but only
1998 : * in special debug builds; see notes at top of file)
1999 : */
2000 : static void
2001 : xml_memory_init(void)
2002 : {
2003 : /* Create memory context if not there already */
2004 : if (LibxmlContext == NULL)
2005 : LibxmlContext = AllocSetContextCreate(TopMemoryContext,
2006 : "Libxml context",
2007 : ALLOCSET_DEFAULT_SIZES);
2008 :
2009 : /* Re-establish the callbacks even if already set */
2010 : xmlMemSetup(xml_pfree, xml_palloc, xml_repalloc, xml_pstrdup);
2011 : }
2012 :
2013 : /*
2014 : * Wrappers for memory management functions
2015 : */
2016 : static void *
2017 : xml_palloc(size_t size)
2018 : {
2019 : return MemoryContextAlloc(LibxmlContext, size);
2020 : }
2021 :
2022 :
2023 : static void *
2024 : xml_repalloc(void *ptr, size_t size)
2025 : {
2026 : return repalloc(ptr, size);
2027 : }
2028 :
2029 :
2030 : static void
2031 : xml_pfree(void *ptr)
2032 : {
2033 : /* At least some parts of libxml assume xmlFree(NULL) is allowed */
2034 : if (ptr)
2035 : pfree(ptr);
2036 : }
2037 :
2038 :
2039 : static char *
2040 : xml_pstrdup(const char *string)
2041 : {
2042 : return MemoryContextStrdup(LibxmlContext, string);
2043 : }
2044 : #endif /* USE_LIBXMLCONTEXT */
2045 :
2046 :
2047 : /*
2048 : * xmlPgEntityLoader --- entity loader callback function
2049 : *
2050 : * Silently prevent any external entity URL from being loaded. We don't want
2051 : * to throw an error, so instead make the entity appear to expand to an empty
2052 : * string.
2053 : *
2054 : * We would prefer to allow loading entities that exist in the system's
2055 : * global XML catalog; but the available libxml2 APIs make that a complex
2056 : * and fragile task. For now, just shut down all external access.
2057 : */
2058 : static xmlParserInputPtr
2059 18 : xmlPgEntityLoader(const char *URL, const char *ID,
2060 : xmlParserCtxtPtr ctxt)
2061 : {
2062 18 : return xmlNewStringInputStream(ctxt, (const xmlChar *) "");
2063 : }
2064 :
2065 :
2066 : /*
2067 : * xml_ereport --- report an XML-related error
2068 : *
2069 : * The "msg" is the SQL-level message; some can be adopted from the SQL/XML
2070 : * standard. This function adds libxml's native error message, if any, as
2071 : * detail.
2072 : *
2073 : * This is exported for modules that want to share the core libxml error
2074 : * handler. Note that pg_xml_init() *must* have been called previously.
2075 : */
2076 : void
2077 12 : xml_ereport(PgXmlErrorContext *errcxt, int level, int sqlcode, const char *msg)
2078 : {
2079 : char *detail;
2080 :
2081 : /* Defend against someone passing us a bogus context struct */
2082 12 : if (errcxt->magic != ERRCXT_MAGIC)
2083 0 : elog(ERROR, "xml_ereport called with invalid PgXmlErrorContext");
2084 :
2085 : /* Flag that the current libxml error has been reported */
2086 12 : errcxt->err_occurred = false;
2087 :
2088 : /* Include detail only if we have some text from libxml */
2089 12 : if (errcxt->err_buf.len > 0)
2090 12 : detail = errcxt->err_buf.data;
2091 : else
2092 0 : detail = NULL;
2093 :
2094 12 : ereport(level,
2095 : (errcode(sqlcode),
2096 : errmsg_internal("%s", msg),
2097 : detail ? errdetail_internal("%s", detail) : 0));
2098 0 : }
2099 :
2100 :
2101 : /*
2102 : * xml_errsave --- save an XML-related error
2103 : *
2104 : * If escontext is an ErrorSaveContext, error details are saved into it,
2105 : * and control returns normally.
2106 : *
2107 : * Otherwise, the error is thrown, so that this is equivalent to
2108 : * xml_ereport() with level == ERROR.
2109 : *
2110 : * This should be used only for errors that we're sure we do not need
2111 : * a transaction abort to clean up after.
2112 : */
2113 : static void
2114 204 : xml_errsave(Node *escontext, PgXmlErrorContext *errcxt,
2115 : int sqlcode, const char *msg)
2116 : {
2117 : char *detail;
2118 :
2119 : /* Defend against someone passing us a bogus context struct */
2120 204 : if (errcxt->magic != ERRCXT_MAGIC)
2121 0 : elog(ERROR, "xml_errsave called with invalid PgXmlErrorContext");
2122 :
2123 : /* Flag that the current libxml error has been reported */
2124 204 : errcxt->err_occurred = false;
2125 :
2126 : /* Include detail only if we have some text from libxml */
2127 204 : if (errcxt->err_buf.len > 0)
2128 204 : detail = errcxt->err_buf.data;
2129 : else
2130 0 : detail = NULL;
2131 :
2132 204 : errsave(escontext,
2133 : (errcode(sqlcode),
2134 : errmsg_internal("%s", msg),
2135 : detail ? errdetail_internal("%s", detail) : 0));
2136 108 : }
2137 :
2138 :
2139 : /*
2140 : * Error handler for libxml errors and warnings
2141 : */
2142 : static void
2143 398 : xml_errorHandler(void *data, PgXmlErrorPtr error)
2144 : {
2145 398 : PgXmlErrorContext *xmlerrcxt = (PgXmlErrorContext *) data;
2146 398 : xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) error->ctxt;
2147 398 : xmlParserInputPtr input = (ctxt != NULL) ? ctxt->input : NULL;
2148 398 : xmlNodePtr node = error->node;
2149 398 : const xmlChar *name = (node != NULL &&
2150 398 : node->type == XML_ELEMENT_NODE) ? node->name : NULL;
2151 398 : int domain = error->domain;
2152 398 : int level = error->level;
2153 : StringInfo errorBuf;
2154 :
2155 : /*
2156 : * Defend against someone passing us a bogus context struct.
2157 : *
2158 : * We force a backend exit if this check fails because longjmp'ing out of
2159 : * libxml would likely render it unsafe to use further.
2160 : */
2161 398 : if (xmlerrcxt->magic != ERRCXT_MAGIC)
2162 0 : elog(FATAL, "xml_errorHandler called with invalid PgXmlErrorContext");
2163 :
2164 : /*----------
2165 : * Older libxml versions report some errors differently.
2166 : * First, some errors were previously reported as coming from the parser
2167 : * domain but are now reported as coming from the namespace domain.
2168 : * Second, some warnings were upgraded to errors.
2169 : * We attempt to compensate for that here.
2170 : *----------
2171 : */
2172 398 : switch (error->code)
2173 : {
2174 30 : case XML_WAR_NS_URI:
2175 30 : level = XML_ERR_ERROR;
2176 30 : domain = XML_FROM_NAMESPACE;
2177 30 : break;
2178 :
2179 54 : case XML_ERR_NS_DECL_ERROR:
2180 : case XML_WAR_NS_URI_RELATIVE:
2181 : case XML_WAR_NS_COLUMN:
2182 : case XML_NS_ERR_XML_NAMESPACE:
2183 : case XML_NS_ERR_UNDEFINED_NAMESPACE:
2184 : case XML_NS_ERR_QNAME:
2185 : case XML_NS_ERR_ATTRIBUTE_REDEFINED:
2186 : case XML_NS_ERR_EMPTY:
2187 54 : domain = XML_FROM_NAMESPACE;
2188 54 : break;
2189 : }
2190 :
2191 : /* Decide whether to act on the error or not */
2192 398 : switch (domain)
2193 : {
2194 314 : case XML_FROM_PARSER:
2195 :
2196 : /*
2197 : * XML_ERR_NOT_WELL_BALANCED is typically reported after some
2198 : * other, more on-point error. Furthermore, libxml2 2.13 reports
2199 : * it under a completely different set of rules than prior
2200 : * versions. To avoid cross-version behavioral differences,
2201 : * suppress it so long as we already logged some error.
2202 : */
2203 314 : if (error->code == XML_ERR_NOT_WELL_BALANCED &&
2204 30 : xmlerrcxt->err_occurred)
2205 30 : return;
2206 : /* fall through */
2207 :
2208 : case XML_FROM_NONE:
2209 : case XML_FROM_MEMORY:
2210 : case XML_FROM_IO:
2211 :
2212 : /*
2213 : * Suppress warnings about undeclared entities. We need to do
2214 : * this to avoid problems due to not loading DTD definitions.
2215 : */
2216 284 : if (error->code == XML_WAR_UNDECLARED_ENTITY)
2217 6 : return;
2218 :
2219 : /* Otherwise, accept error regardless of the parsing purpose */
2220 278 : break;
2221 :
2222 84 : default:
2223 : /* Ignore error if only doing well-formedness check */
2224 84 : if (xmlerrcxt->strictness == PG_XML_STRICTNESS_WELLFORMED)
2225 66 : return;
2226 18 : break;
2227 : }
2228 :
2229 : /* Prepare error message in errorBuf */
2230 296 : errorBuf = makeStringInfo();
2231 :
2232 296 : if (error->line > 0)
2233 296 : appendStringInfo(errorBuf, "line %d: ", error->line);
2234 296 : if (name != NULL)
2235 0 : appendStringInfo(errorBuf, "element %s: ", name);
2236 296 : if (error->message != NULL)
2237 296 : appendStringInfoString(errorBuf, error->message);
2238 : else
2239 0 : appendStringInfoString(errorBuf, "(no message provided)");
2240 :
2241 : /*
2242 : * Append context information to errorBuf.
2243 : *
2244 : * xmlParserPrintFileContext() uses libxml's "generic" error handler to
2245 : * write the context. Since we don't want to duplicate libxml
2246 : * functionality here, we set up a generic error handler temporarily.
2247 : *
2248 : * We use appendStringInfo() directly as libxml's generic error handler.
2249 : * This should work because it has essentially the same signature as
2250 : * libxml expects, namely (void *ptr, const char *msg, ...).
2251 : */
2252 296 : if (input != NULL)
2253 : {
2254 296 : xmlGenericErrorFunc errFuncSaved = xmlGenericError;
2255 296 : void *errCtxSaved = xmlGenericErrorContext;
2256 :
2257 296 : xmlSetGenericErrorFunc(errorBuf,
2258 : (xmlGenericErrorFunc) appendStringInfo);
2259 :
2260 : /* Add context information to errorBuf */
2261 296 : appendStringInfoLineSeparator(errorBuf);
2262 :
2263 296 : xmlParserPrintFileContext(input);
2264 :
2265 : /* Restore generic error func */
2266 296 : xmlSetGenericErrorFunc(errCtxSaved, errFuncSaved);
2267 : }
2268 :
2269 : /* Get rid of any trailing newlines in errorBuf */
2270 296 : chopStringInfoNewlines(errorBuf);
2271 :
2272 : /*
2273 : * Legacy error handling mode. err_occurred is never set, we just add the
2274 : * message to err_buf. This mode exists because the xml2 contrib module
2275 : * uses our error-handling infrastructure, but we don't want to change its
2276 : * behaviour since it's deprecated anyway. This is also why we don't
2277 : * distinguish between notices, warnings and errors here --- the old-style
2278 : * generic error handler wouldn't have done that either.
2279 : */
2280 296 : if (xmlerrcxt->strictness == PG_XML_STRICTNESS_LEGACY)
2281 : {
2282 2 : appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
2283 2 : appendBinaryStringInfo(&xmlerrcxt->err_buf, errorBuf->data,
2284 : errorBuf->len);
2285 :
2286 2 : destroyStringInfo(errorBuf);
2287 2 : return;
2288 : }
2289 :
2290 : /*
2291 : * We don't want to ereport() here because that'd probably leave libxml in
2292 : * an inconsistent state. Instead, we remember the error and ereport()
2293 : * from xml_ereport().
2294 : *
2295 : * Warnings and notices can be reported immediately since they won't cause
2296 : * a longjmp() out of libxml.
2297 : */
2298 294 : if (level >= XML_ERR_ERROR)
2299 : {
2300 288 : appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
2301 288 : appendBinaryStringInfo(&xmlerrcxt->err_buf, errorBuf->data,
2302 : errorBuf->len);
2303 :
2304 288 : xmlerrcxt->err_occurred = true;
2305 : }
2306 6 : else if (level >= XML_ERR_WARNING)
2307 : {
2308 6 : ereport(WARNING,
2309 : (errmsg_internal("%s", errorBuf->data)));
2310 : }
2311 : else
2312 : {
2313 0 : ereport(NOTICE,
2314 : (errmsg_internal("%s", errorBuf->data)));
2315 : }
2316 :
2317 294 : destroyStringInfo(errorBuf);
2318 : }
2319 :
2320 :
2321 : /*
2322 : * Convert libxml error codes into textual errdetail messages.
2323 : *
2324 : * This should be called within an ereport or errsave invocation,
2325 : * just as errdetail would be.
2326 : *
2327 : * At the moment, we only need to cover those codes that we
2328 : * may raise in this file.
2329 : */
2330 : static int
2331 6 : errdetail_for_xml_code(int code)
2332 : {
2333 : const char *det;
2334 :
2335 6 : switch (code)
2336 : {
2337 0 : case XML_ERR_INVALID_CHAR:
2338 0 : det = gettext_noop("Invalid character value.");
2339 0 : break;
2340 0 : case XML_ERR_SPACE_REQUIRED:
2341 0 : det = gettext_noop("Space required.");
2342 0 : break;
2343 6 : case XML_ERR_STANDALONE_VALUE:
2344 6 : det = gettext_noop("standalone accepts only 'yes' or 'no'.");
2345 6 : break;
2346 0 : case XML_ERR_VERSION_MISSING:
2347 0 : det = gettext_noop("Malformed declaration: missing version.");
2348 0 : break;
2349 0 : case XML_ERR_MISSING_ENCODING:
2350 0 : det = gettext_noop("Missing encoding in text declaration.");
2351 0 : break;
2352 0 : case XML_ERR_XMLDECL_NOT_FINISHED:
2353 0 : det = gettext_noop("Parsing XML declaration: '?>' expected.");
2354 0 : break;
2355 0 : default:
2356 0 : det = gettext_noop("Unrecognized libxml error code: %d.");
2357 0 : break;
2358 : }
2359 :
2360 6 : return errdetail(det, code);
2361 : }
2362 :
2363 :
2364 : /*
2365 : * Remove all trailing newlines from a StringInfo string
2366 : */
2367 : static void
2368 882 : chopStringInfoNewlines(StringInfo str)
2369 : {
2370 1474 : while (str->len > 0 && str->data[str->len - 1] == '\n')
2371 592 : str->data[--str->len] = '\0';
2372 882 : }
2373 :
2374 :
2375 : /*
2376 : * Append a newline after removing any existing trailing newlines
2377 : */
2378 : static void
2379 586 : appendStringInfoLineSeparator(StringInfo str)
2380 : {
2381 586 : chopStringInfoNewlines(str);
2382 586 : if (str->len > 0)
2383 368 : appendStringInfoChar(str, '\n');
2384 586 : }
2385 :
2386 :
2387 : /*
2388 : * Convert one char in the current server encoding to a Unicode codepoint.
2389 : */
2390 : static pg_wchar
2391 18350 : sqlchar_to_unicode(const char *s)
2392 : {
2393 : char *utf8string;
2394 : pg_wchar ret[2]; /* need space for trailing zero */
2395 :
2396 : /* note we're not assuming s is null-terminated */
2397 18350 : utf8string = pg_server_to_any(s, pg_mblen(s), PG_UTF8);
2398 :
2399 18350 : pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret,
2400 : pg_encoding_mblen(PG_UTF8, utf8string));
2401 :
2402 18350 : if (utf8string != s)
2403 0 : pfree(utf8string);
2404 :
2405 18350 : return ret[0];
2406 : }
2407 :
2408 :
2409 : static bool
2410 3652 : is_valid_xml_namefirst(pg_wchar c)
2411 : {
2412 : /* (Letter | '_' | ':') */
2413 3658 : return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
2414 7310 : || c == '_' || c == ':');
2415 : }
2416 :
2417 :
2418 : static bool
2419 14698 : is_valid_xml_namechar(pg_wchar c)
2420 : {
2421 : /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
2422 15590 : return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
2423 892 : || xmlIsDigitQ(c)
2424 256 : || c == '.' || c == '-' || c == '_' || c == ':'
2425 12 : || xmlIsCombiningQ(c)
2426 31180 : || xmlIsExtenderQ(c));
2427 : }
2428 : #endif /* USE_LIBXML */
2429 :
2430 :
2431 : /*
2432 : * Map SQL identifier to XML name; see SQL/XML:2008 section 9.1.
2433 : */
2434 : char *
2435 3668 : map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped,
2436 : bool escape_period)
2437 : {
2438 : #ifdef USE_LIBXML
2439 : StringInfoData buf;
2440 : const char *p;
2441 :
2442 : /*
2443 : * SQL/XML doesn't make use of this case anywhere, so it's probably a
2444 : * mistake.
2445 : */
2446 : Assert(fully_escaped || !escape_period);
2447 :
2448 3668 : initStringInfo(&buf);
2449 :
2450 22040 : for (p = ident; *p; p += pg_mblen(p))
2451 : {
2452 18372 : if (*p == ':' && (p == ident || fully_escaped))
2453 16 : appendStringInfoString(&buf, "_x003A_");
2454 18356 : else if (*p == '_' && *(p + 1) == 'x')
2455 6 : appendStringInfoString(&buf, "_x005F_");
2456 21638 : else if (fully_escaped && p == ident &&
2457 3288 : pg_strncasecmp(p, "xml", 3) == 0)
2458 : {
2459 0 : if (*p == 'x')
2460 0 : appendStringInfoString(&buf, "_x0078_");
2461 : else
2462 0 : appendStringInfoString(&buf, "_x0058_");
2463 : }
2464 18350 : else if (escape_period && *p == '.')
2465 0 : appendStringInfoString(&buf, "_x002E_");
2466 : else
2467 : {
2468 18350 : pg_wchar u = sqlchar_to_unicode(p);
2469 :
2470 36700 : if ((p == ident)
2471 3652 : ? !is_valid_xml_namefirst(u)
2472 14698 : : !is_valid_xml_namechar(u))
2473 18 : appendStringInfo(&buf, "_x%04X_", (unsigned int) u);
2474 : else
2475 18332 : appendBinaryStringInfo(&buf, p, pg_mblen(p));
2476 : }
2477 : }
2478 :
2479 3668 : return buf.data;
2480 : #else /* not USE_LIBXML */
2481 : NO_XML_SUPPORT();
2482 : return NULL;
2483 : #endif /* not USE_LIBXML */
2484 : }
2485 :
2486 :
2487 : /*
2488 : * Map XML name to SQL identifier; see SQL/XML:2008 section 9.3.
2489 : */
2490 : char *
2491 176 : map_xml_name_to_sql_identifier(const char *name)
2492 : {
2493 : StringInfoData buf;
2494 : const char *p;
2495 :
2496 176 : initStringInfo(&buf);
2497 :
2498 968 : for (p = name; *p; p += pg_mblen(p))
2499 : {
2500 792 : if (*p == '_' && *(p + 1) == 'x'
2501 22 : && isxdigit((unsigned char) *(p + 2))
2502 22 : && isxdigit((unsigned char) *(p + 3))
2503 22 : && isxdigit((unsigned char) *(p + 4))
2504 22 : && isxdigit((unsigned char) *(p + 5))
2505 22 : && *(p + 6) == '_')
2506 22 : {
2507 : char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
2508 : unsigned int u;
2509 :
2510 22 : sscanf(p + 2, "%X", &u);
2511 22 : pg_unicode_to_server(u, (unsigned char *) cbuf);
2512 22 : appendStringInfoString(&buf, cbuf);
2513 22 : p += 6;
2514 : }
2515 : else
2516 770 : appendBinaryStringInfo(&buf, p, pg_mblen(p));
2517 : }
2518 :
2519 176 : return buf.data;
2520 : }
2521 :
2522 : /*
2523 : * Map SQL value to XML value; see SQL/XML:2008 section 9.8.
2524 : *
2525 : * When xml_escape_strings is true, then certain characters in string
2526 : * values are replaced by entity references (< etc.), as specified
2527 : * in SQL/XML:2008 section 9.8 GR 9) a) iii). This is normally what is
2528 : * wanted. The false case is mainly useful when the resulting value
2529 : * is used with xmlTextWriterWriteAttribute() to write out an
2530 : * attribute, because that function does the escaping itself.
2531 : */
2532 : char *
2533 134916 : map_sql_value_to_xml_value(Datum value, Oid type, bool xml_escape_strings)
2534 : {
2535 134916 : if (type_is_array_domain(type))
2536 : {
2537 : ArrayType *array;
2538 : Oid elmtype;
2539 : int16 elmlen;
2540 : bool elmbyval;
2541 : char elmalign;
2542 : int num_elems;
2543 : Datum *elem_values;
2544 : bool *elem_nulls;
2545 : StringInfoData buf;
2546 : int i;
2547 :
2548 6 : array = DatumGetArrayTypeP(value);
2549 6 : elmtype = ARR_ELEMTYPE(array);
2550 6 : get_typlenbyvalalign(elmtype, &elmlen, &elmbyval, &elmalign);
2551 :
2552 6 : deconstruct_array(array, elmtype,
2553 : elmlen, elmbyval, elmalign,
2554 : &elem_values, &elem_nulls,
2555 : &num_elems);
2556 :
2557 6 : initStringInfo(&buf);
2558 :
2559 24 : for (i = 0; i < num_elems; i++)
2560 : {
2561 18 : if (elem_nulls[i])
2562 0 : continue;
2563 18 : appendStringInfoString(&buf, "<element>");
2564 18 : appendStringInfoString(&buf,
2565 18 : map_sql_value_to_xml_value(elem_values[i],
2566 : elmtype, true));
2567 18 : appendStringInfoString(&buf, "</element>");
2568 : }
2569 :
2570 6 : pfree(elem_values);
2571 6 : pfree(elem_nulls);
2572 :
2573 6 : return buf.data;
2574 : }
2575 : else
2576 : {
2577 : Oid typeOut;
2578 : bool isvarlena;
2579 : char *str;
2580 :
2581 : /*
2582 : * Flatten domains; the special-case treatments below should apply to,
2583 : * eg, domains over boolean not just boolean.
2584 : */
2585 134910 : type = getBaseType(type);
2586 :
2587 : /*
2588 : * Special XSD formatting for some data types
2589 : */
2590 134910 : switch (type)
2591 : {
2592 66 : case BOOLOID:
2593 66 : if (DatumGetBool(value))
2594 60 : return "true";
2595 : else
2596 6 : return "false";
2597 :
2598 48 : case DATEOID:
2599 : {
2600 : DateADT date;
2601 : struct pg_tm tm;
2602 : char buf[MAXDATELEN + 1];
2603 :
2604 48 : date = DatumGetDateADT(value);
2605 : /* XSD doesn't support infinite values */
2606 48 : if (DATE_NOT_FINITE(date))
2607 0 : ereport(ERROR,
2608 : (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2609 : errmsg("date out of range"),
2610 : errdetail("XML does not support infinite date values.")));
2611 48 : j2date(date + POSTGRES_EPOCH_JDATE,
2612 : &(tm.tm_year), &(tm.tm_mon), &(tm.tm_mday));
2613 48 : EncodeDateOnly(&tm, USE_XSD_DATES, buf);
2614 :
2615 48 : return pstrdup(buf);
2616 : }
2617 :
2618 36 : case TIMESTAMPOID:
2619 : {
2620 : Timestamp timestamp;
2621 : struct pg_tm tm;
2622 : fsec_t fsec;
2623 : char buf[MAXDATELEN + 1];
2624 :
2625 36 : timestamp = DatumGetTimestamp(value);
2626 :
2627 : /* XSD doesn't support infinite values */
2628 36 : if (TIMESTAMP_NOT_FINITE(timestamp))
2629 6 : ereport(ERROR,
2630 : (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2631 : errmsg("timestamp out of range"),
2632 : errdetail("XML does not support infinite timestamp values.")));
2633 30 : else if (timestamp2tm(timestamp, NULL, &tm, &fsec, NULL, NULL) == 0)
2634 30 : EncodeDateTime(&tm, fsec, false, 0, NULL, USE_XSD_DATES, buf);
2635 : else
2636 0 : ereport(ERROR,
2637 : (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2638 : errmsg("timestamp out of range")));
2639 :
2640 30 : return pstrdup(buf);
2641 : }
2642 :
2643 24 : case TIMESTAMPTZOID:
2644 : {
2645 : TimestampTz timestamp;
2646 : struct pg_tm tm;
2647 : int tz;
2648 : fsec_t fsec;
2649 24 : const char *tzn = NULL;
2650 : char buf[MAXDATELEN + 1];
2651 :
2652 24 : timestamp = DatumGetTimestamp(value);
2653 :
2654 : /* XSD doesn't support infinite values */
2655 24 : if (TIMESTAMP_NOT_FINITE(timestamp))
2656 0 : ereport(ERROR,
2657 : (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2658 : errmsg("timestamp out of range"),
2659 : errdetail("XML does not support infinite timestamp values.")));
2660 24 : else if (timestamp2tm(timestamp, &tz, &tm, &fsec, &tzn, NULL) == 0)
2661 24 : EncodeDateTime(&tm, fsec, true, tz, tzn, USE_XSD_DATES, buf);
2662 : else
2663 0 : ereport(ERROR,
2664 : (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2665 : errmsg("timestamp out of range")));
2666 :
2667 24 : return pstrdup(buf);
2668 : }
2669 :
2670 : #ifdef USE_LIBXML
2671 36 : case BYTEAOID:
2672 : {
2673 36 : bytea *bstr = DatumGetByteaPP(value);
2674 : PgXmlErrorContext *xmlerrcxt;
2675 36 : volatile xmlBufferPtr buf = NULL;
2676 36 : volatile xmlTextWriterPtr writer = NULL;
2677 : char *result;
2678 :
2679 36 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
2680 :
2681 36 : PG_TRY();
2682 : {
2683 36 : buf = xmlBufferCreate();
2684 36 : if (buf == NULL || xmlerrcxt->err_occurred)
2685 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
2686 : "could not allocate xmlBuffer");
2687 36 : writer = xmlNewTextWriterMemory(buf, 0);
2688 36 : if (writer == NULL || xmlerrcxt->err_occurred)
2689 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
2690 : "could not allocate xmlTextWriter");
2691 :
2692 36 : if (xmlbinary == XMLBINARY_BASE64)
2693 30 : xmlTextWriterWriteBase64(writer, VARDATA_ANY(bstr),
2694 30 : 0, VARSIZE_ANY_EXHDR(bstr));
2695 : else
2696 6 : xmlTextWriterWriteBinHex(writer, VARDATA_ANY(bstr),
2697 6 : 0, VARSIZE_ANY_EXHDR(bstr));
2698 :
2699 : /* we MUST do this now to flush data out to the buffer */
2700 36 : xmlFreeTextWriter(writer);
2701 36 : writer = NULL;
2702 :
2703 36 : result = pstrdup((const char *) xmlBufferContent(buf));
2704 : }
2705 0 : PG_CATCH();
2706 : {
2707 0 : if (writer)
2708 0 : xmlFreeTextWriter(writer);
2709 0 : if (buf)
2710 0 : xmlBufferFree(buf);
2711 :
2712 0 : pg_xml_done(xmlerrcxt, true);
2713 :
2714 0 : PG_RE_THROW();
2715 : }
2716 36 : PG_END_TRY();
2717 :
2718 36 : xmlBufferFree(buf);
2719 :
2720 36 : pg_xml_done(xmlerrcxt, false);
2721 :
2722 36 : return result;
2723 : }
2724 : #endif /* USE_LIBXML */
2725 :
2726 : }
2727 :
2728 : /*
2729 : * otherwise, just use the type's native text representation
2730 : */
2731 134700 : getTypeOutputInfo(type, &typeOut, &isvarlena);
2732 134700 : str = OidOutputFunctionCall(typeOut, value);
2733 :
2734 : /* ... exactly as-is for XML, and when escaping is not wanted */
2735 134700 : if (type == XMLOID || !xml_escape_strings)
2736 22360 : return str;
2737 :
2738 : /* otherwise, translate special characters as needed */
2739 112340 : return escape_xml(str);
2740 : }
2741 : }
2742 :
2743 :
2744 : /*
2745 : * Escape characters in text that have special meanings in XML.
2746 : *
2747 : * Returns a palloc'd string.
2748 : *
2749 : * NB: this is intentionally not dependent on libxml.
2750 : */
2751 : char *
2752 112812 : escape_xml(const char *str)
2753 : {
2754 : StringInfoData buf;
2755 : const char *p;
2756 :
2757 112812 : initStringInfo(&buf);
2758 708650 : for (p = str; *p; p++)
2759 : {
2760 595838 : switch (*p)
2761 : {
2762 0 : case '&':
2763 0 : appendStringInfoString(&buf, "&");
2764 0 : break;
2765 36 : case '<':
2766 36 : appendStringInfoString(&buf, "<");
2767 36 : break;
2768 24 : case '>':
2769 24 : appendStringInfoString(&buf, ">");
2770 24 : break;
2771 0 : case '\r':
2772 0 : appendStringInfoString(&buf, "
");
2773 0 : break;
2774 595778 : default:
2775 595778 : appendStringInfoCharMacro(&buf, *p);
2776 595778 : break;
2777 : }
2778 : }
2779 112812 : return buf.data;
2780 : }
2781 :
2782 :
2783 : static char *
2784 24 : _SPI_strdup(const char *s)
2785 : {
2786 24 : size_t len = strlen(s) + 1;
2787 24 : char *ret = SPI_palloc(len);
2788 :
2789 24 : memcpy(ret, s, len);
2790 24 : return ret;
2791 : }
2792 :
2793 :
2794 : /*
2795 : * SQL to XML mapping functions
2796 : *
2797 : * What follows below was at one point intentionally organized so that
2798 : * you can read along in the SQL/XML standard. The functions are
2799 : * mostly split up the way the clauses lay out in the standards
2800 : * document, and the identifiers are also aligned with the standard
2801 : * text. Unfortunately, SQL/XML:2006 reordered the clauses
2802 : * differently than SQL/XML:2003, so the order below doesn't make much
2803 : * sense anymore.
2804 : *
2805 : * There are many things going on there:
2806 : *
2807 : * There are two kinds of mappings: Mapping SQL data (table contents)
2808 : * to XML documents, and mapping SQL structure (the "schema") to XML
2809 : * Schema. And there are functions that do both at the same time.
2810 : *
2811 : * Then you can map a database, a schema, or a table, each in both
2812 : * ways. This breaks down recursively: Mapping a database invokes
2813 : * mapping schemas, which invokes mapping tables, which invokes
2814 : * mapping rows, which invokes mapping columns, although you can't
2815 : * call the last two from the outside. Because of this, there are a
2816 : * number of xyz_internal() functions which are to be called both from
2817 : * the function manager wrapper and from some upper layer in a
2818 : * recursive call.
2819 : *
2820 : * See the documentation about what the common function arguments
2821 : * nulls, tableforest, and targetns mean.
2822 : *
2823 : * Some style guidelines for XML output: Use double quotes for quoting
2824 : * XML attributes. Indent XML elements by two spaces, but remember
2825 : * that a lot of code is called recursively at different levels, so
2826 : * it's better not to indent rather than create output that indents
2827 : * and outdents weirdly. Add newlines to make the output look nice.
2828 : */
2829 :
2830 :
2831 : /*
2832 : * Visibility of objects for XML mappings; see SQL/XML:2008 section
2833 : * 4.10.8.
2834 : */
2835 :
2836 : /*
2837 : * Given a query, which must return type oid as first column, produce
2838 : * a list of Oids with the query results.
2839 : */
2840 : static List *
2841 36 : query_to_oid_list(const char *query)
2842 : {
2843 : uint64 i;
2844 36 : List *list = NIL;
2845 : int spi_result;
2846 :
2847 36 : spi_result = SPI_execute(query, true, 0);
2848 36 : if (spi_result != SPI_OK_SELECT)
2849 0 : elog(ERROR, "SPI_execute returned %s for %s",
2850 : SPI_result_code_string(spi_result), query);
2851 :
2852 108 : for (i = 0; i < SPI_processed; i++)
2853 : {
2854 : Datum oid;
2855 : bool isnull;
2856 :
2857 72 : oid = SPI_getbinval(SPI_tuptable->vals[i],
2858 72 : SPI_tuptable->tupdesc,
2859 : 1,
2860 : &isnull);
2861 72 : if (!isnull)
2862 72 : list = lappend_oid(list, DatumGetObjectId(oid));
2863 : }
2864 :
2865 36 : return list;
2866 : }
2867 :
2868 :
2869 : static List *
2870 36 : schema_get_xml_visible_tables(Oid nspid)
2871 : {
2872 : StringInfoData query;
2873 :
2874 36 : initStringInfo(&query);
2875 36 : appendStringInfo(&query, "SELECT oid FROM pg_catalog.pg_class"
2876 : " WHERE relnamespace = %u AND relkind IN ("
2877 : CppAsString2(RELKIND_RELATION) ","
2878 : CppAsString2(RELKIND_MATVIEW) ","
2879 : CppAsString2(RELKIND_VIEW) ")"
2880 : " AND pg_catalog.has_table_privilege (oid, 'SELECT')"
2881 : " ORDER BY relname;", nspid);
2882 :
2883 36 : return query_to_oid_list(query.data);
2884 : }
2885 :
2886 :
2887 : /*
2888 : * Including the system schemas is probably not useful for a database
2889 : * mapping.
2890 : */
2891 : #define XML_VISIBLE_SCHEMAS_EXCLUDE "(nspname ~ '^pg_' OR nspname = 'information_schema')"
2892 :
2893 : #define XML_VISIBLE_SCHEMAS "SELECT oid FROM pg_catalog.pg_namespace WHERE pg_catalog.has_schema_privilege (oid, 'USAGE') AND NOT " XML_VISIBLE_SCHEMAS_EXCLUDE
2894 :
2895 :
2896 : static List *
2897 0 : database_get_xml_visible_schemas(void)
2898 : {
2899 0 : return query_to_oid_list(XML_VISIBLE_SCHEMAS " ORDER BY nspname;");
2900 : }
2901 :
2902 :
2903 : static List *
2904 0 : database_get_xml_visible_tables(void)
2905 : {
2906 : /* At the moment there is no order required here. */
2907 0 : return query_to_oid_list("SELECT oid FROM pg_catalog.pg_class"
2908 : " WHERE relkind IN ("
2909 : CppAsString2(RELKIND_RELATION) ","
2910 : CppAsString2(RELKIND_MATVIEW) ","
2911 : CppAsString2(RELKIND_VIEW) ")"
2912 : " AND pg_catalog.has_table_privilege(pg_class.oid, 'SELECT')"
2913 : " AND relnamespace IN (" XML_VISIBLE_SCHEMAS ");");
2914 : }
2915 :
2916 :
2917 : /*
2918 : * Map SQL table to XML and/or XML Schema document; see SQL/XML:2008
2919 : * section 9.11.
2920 : */
2921 :
2922 : static StringInfo
2923 96 : table_to_xml_internal(Oid relid,
2924 : const char *xmlschema, bool nulls, bool tableforest,
2925 : const char *targetns, bool top_level)
2926 : {
2927 : StringInfoData query;
2928 :
2929 96 : initStringInfo(&query);
2930 96 : appendStringInfo(&query, "SELECT * FROM %s",
2931 : DatumGetCString(DirectFunctionCall1(regclassout,
2932 : ObjectIdGetDatum(relid))));
2933 96 : return query_to_xml_internal(query.data, get_rel_name(relid),
2934 : xmlschema, nulls, tableforest,
2935 : targetns, top_level);
2936 : }
2937 :
2938 :
2939 : Datum
2940 36 : table_to_xml(PG_FUNCTION_ARGS)
2941 : {
2942 36 : Oid relid = PG_GETARG_OID(0);
2943 36 : bool nulls = PG_GETARG_BOOL(1);
2944 36 : bool tableforest = PG_GETARG_BOOL(2);
2945 36 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2946 :
2947 36 : PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid, NULL,
2948 : nulls, tableforest,
2949 : targetns, true)));
2950 : }
2951 :
2952 :
2953 : Datum
2954 10 : query_to_xml(PG_FUNCTION_ARGS)
2955 : {
2956 10 : char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2957 10 : bool nulls = PG_GETARG_BOOL(1);
2958 10 : bool tableforest = PG_GETARG_BOOL(2);
2959 10 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2960 :
2961 10 : PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
2962 : NULL, nulls, tableforest,
2963 : targetns, true)));
2964 : }
2965 :
2966 :
2967 : Datum
2968 12 : cursor_to_xml(PG_FUNCTION_ARGS)
2969 : {
2970 12 : char *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
2971 12 : int32 count = PG_GETARG_INT32(1);
2972 12 : bool nulls = PG_GETARG_BOOL(2);
2973 12 : bool tableforest = PG_GETARG_BOOL(3);
2974 12 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(4));
2975 :
2976 : StringInfoData result;
2977 : Portal portal;
2978 : uint64 i;
2979 :
2980 12 : initStringInfo(&result);
2981 :
2982 12 : if (!tableforest)
2983 : {
2984 6 : xmldata_root_element_start(&result, "table", NULL, targetns, true);
2985 6 : appendStringInfoChar(&result, '\n');
2986 : }
2987 :
2988 12 : SPI_connect();
2989 12 : portal = SPI_cursor_find(name);
2990 12 : if (portal == NULL)
2991 0 : ereport(ERROR,
2992 : (errcode(ERRCODE_UNDEFINED_CURSOR),
2993 : errmsg("cursor \"%s\" does not exist", name)));
2994 :
2995 12 : SPI_cursor_fetch(portal, true, count);
2996 48 : for (i = 0; i < SPI_processed; i++)
2997 36 : SPI_sql_row_to_xmlelement(i, &result, NULL, nulls,
2998 : tableforest, targetns, true);
2999 :
3000 12 : SPI_finish();
3001 :
3002 12 : if (!tableforest)
3003 6 : xmldata_root_element_end(&result, "table");
3004 :
3005 12 : PG_RETURN_XML_P(stringinfo_to_xmltype(&result));
3006 : }
3007 :
3008 :
3009 : /*
3010 : * Write the start tag of the root element of a data mapping.
3011 : *
3012 : * top_level means that this is the very top level of the eventual
3013 : * output. For example, when the user calls table_to_xml, then a call
3014 : * with a table name to this function is the top level. When the user
3015 : * calls database_to_xml, then a call with a schema name to this
3016 : * function is not the top level. If top_level is false, then the XML
3017 : * namespace declarations are omitted, because they supposedly already
3018 : * appeared earlier in the output. Repeating them is not wrong, but
3019 : * it looks ugly.
3020 : */
3021 : static void
3022 238 : xmldata_root_element_start(StringInfo result, const char *eltname,
3023 : const char *xmlschema, const char *targetns,
3024 : bool top_level)
3025 : {
3026 : /* This isn't really wrong but currently makes no sense. */
3027 : Assert(top_level || !xmlschema);
3028 :
3029 238 : appendStringInfo(result, "<%s", eltname);
3030 238 : if (top_level)
3031 : {
3032 178 : appendStringInfoString(result, " xmlns:xsi=\"" NAMESPACE_XSI "\"");
3033 178 : if (strlen(targetns) > 0)
3034 30 : appendStringInfo(result, " xmlns=\"%s\"", targetns);
3035 : }
3036 238 : if (xmlschema)
3037 : {
3038 : /* FIXME: better targets */
3039 18 : if (strlen(targetns) > 0)
3040 6 : appendStringInfo(result, " xsi:schemaLocation=\"%s #\"", targetns);
3041 : else
3042 12 : appendStringInfoString(result, " xsi:noNamespaceSchemaLocation=\"#\"");
3043 : }
3044 238 : appendStringInfoString(result, ">\n");
3045 238 : }
3046 :
3047 :
3048 : static void
3049 238 : xmldata_root_element_end(StringInfo result, const char *eltname)
3050 : {
3051 238 : appendStringInfo(result, "</%s>\n", eltname);
3052 238 : }
3053 :
3054 :
3055 : static StringInfo
3056 112 : query_to_xml_internal(const char *query, char *tablename,
3057 : const char *xmlschema, bool nulls, bool tableforest,
3058 : const char *targetns, bool top_level)
3059 : {
3060 : StringInfo result;
3061 : char *xmltn;
3062 : uint64 i;
3063 :
3064 112 : if (tablename)
3065 96 : xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
3066 : else
3067 16 : xmltn = "table";
3068 :
3069 112 : result = makeStringInfo();
3070 :
3071 112 : SPI_connect();
3072 112 : if (SPI_execute(query, true, 0) != SPI_OK_SELECT)
3073 0 : ereport(ERROR,
3074 : (errcode(ERRCODE_DATA_EXCEPTION),
3075 : errmsg("invalid query")));
3076 :
3077 112 : if (!tableforest)
3078 : {
3079 52 : xmldata_root_element_start(result, xmltn, xmlschema,
3080 : targetns, top_level);
3081 52 : appendStringInfoChar(result, '\n');
3082 : }
3083 :
3084 112 : if (xmlschema)
3085 30 : appendStringInfo(result, "%s\n\n", xmlschema);
3086 :
3087 388 : for (i = 0; i < SPI_processed; i++)
3088 276 : SPI_sql_row_to_xmlelement(i, result, tablename, nulls,
3089 : tableforest, targetns, top_level);
3090 :
3091 112 : if (!tableforest)
3092 52 : xmldata_root_element_end(result, xmltn);
3093 :
3094 112 : SPI_finish();
3095 :
3096 112 : return result;
3097 : }
3098 :
3099 :
3100 : Datum
3101 30 : table_to_xmlschema(PG_FUNCTION_ARGS)
3102 : {
3103 30 : Oid relid = PG_GETARG_OID(0);
3104 30 : bool nulls = PG_GETARG_BOOL(1);
3105 30 : bool tableforest = PG_GETARG_BOOL(2);
3106 30 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3107 : const char *result;
3108 : Relation rel;
3109 :
3110 30 : rel = table_open(relid, AccessShareLock);
3111 30 : result = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
3112 : tableforest, targetns);
3113 30 : table_close(rel, NoLock);
3114 :
3115 30 : PG_RETURN_XML_P(cstring_to_xmltype(result));
3116 : }
3117 :
3118 :
3119 : Datum
3120 6 : query_to_xmlschema(PG_FUNCTION_ARGS)
3121 : {
3122 6 : char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
3123 6 : bool nulls = PG_GETARG_BOOL(1);
3124 6 : bool tableforest = PG_GETARG_BOOL(2);
3125 6 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3126 : const char *result;
3127 : SPIPlanPtr plan;
3128 : Portal portal;
3129 :
3130 6 : SPI_connect();
3131 :
3132 6 : if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
3133 0 : elog(ERROR, "SPI_prepare(\"%s\") failed", query);
3134 :
3135 6 : if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
3136 0 : elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
3137 :
3138 6 : result = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
3139 : InvalidOid, nulls,
3140 : tableforest, targetns));
3141 6 : SPI_cursor_close(portal);
3142 6 : SPI_finish();
3143 :
3144 6 : PG_RETURN_XML_P(cstring_to_xmltype(result));
3145 : }
3146 :
3147 :
3148 : Datum
3149 12 : cursor_to_xmlschema(PG_FUNCTION_ARGS)
3150 : {
3151 12 : char *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
3152 12 : bool nulls = PG_GETARG_BOOL(1);
3153 12 : bool tableforest = PG_GETARG_BOOL(2);
3154 12 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3155 : const char *xmlschema;
3156 : Portal portal;
3157 :
3158 12 : SPI_connect();
3159 12 : portal = SPI_cursor_find(name);
3160 12 : if (portal == NULL)
3161 0 : ereport(ERROR,
3162 : (errcode(ERRCODE_UNDEFINED_CURSOR),
3163 : errmsg("cursor \"%s\" does not exist", name)));
3164 12 : if (portal->tupDesc == NULL)
3165 0 : ereport(ERROR,
3166 : (errcode(ERRCODE_INVALID_CURSOR_STATE),
3167 : errmsg("portal \"%s\" does not return tuples", name)));
3168 :
3169 12 : xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
3170 : InvalidOid, nulls,
3171 : tableforest, targetns));
3172 12 : SPI_finish();
3173 :
3174 12 : PG_RETURN_XML_P(cstring_to_xmltype(xmlschema));
3175 : }
3176 :
3177 :
3178 : Datum
3179 24 : table_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3180 : {
3181 24 : Oid relid = PG_GETARG_OID(0);
3182 24 : bool nulls = PG_GETARG_BOOL(1);
3183 24 : bool tableforest = PG_GETARG_BOOL(2);
3184 24 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3185 : Relation rel;
3186 : const char *xmlschema;
3187 :
3188 24 : rel = table_open(relid, AccessShareLock);
3189 24 : xmlschema = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
3190 : tableforest, targetns);
3191 24 : table_close(rel, NoLock);
3192 :
3193 24 : PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid,
3194 : xmlschema, nulls, tableforest,
3195 : targetns, true)));
3196 : }
3197 :
3198 :
3199 : Datum
3200 6 : query_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3201 : {
3202 6 : char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
3203 6 : bool nulls = PG_GETARG_BOOL(1);
3204 6 : bool tableforest = PG_GETARG_BOOL(2);
3205 6 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3206 :
3207 : const char *xmlschema;
3208 : SPIPlanPtr plan;
3209 : Portal portal;
3210 :
3211 6 : SPI_connect();
3212 :
3213 6 : if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
3214 0 : elog(ERROR, "SPI_prepare(\"%s\") failed", query);
3215 :
3216 6 : if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
3217 0 : elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
3218 :
3219 6 : xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
3220 : InvalidOid, nulls, tableforest, targetns));
3221 6 : SPI_cursor_close(portal);
3222 6 : SPI_finish();
3223 :
3224 6 : PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
3225 : xmlschema, nulls, tableforest,
3226 : targetns, true)));
3227 : }
3228 :
3229 :
3230 : /*
3231 : * Map SQL schema to XML and/or XML Schema document; see SQL/XML:2008
3232 : * sections 9.13, 9.14.
3233 : */
3234 :
3235 : static StringInfo
3236 18 : schema_to_xml_internal(Oid nspid, const char *xmlschema, bool nulls,
3237 : bool tableforest, const char *targetns, bool top_level)
3238 : {
3239 : StringInfo result;
3240 : char *xmlsn;
3241 : List *relid_list;
3242 : ListCell *cell;
3243 :
3244 18 : xmlsn = map_sql_identifier_to_xml_name(get_namespace_name(nspid),
3245 : true, false);
3246 18 : result = makeStringInfo();
3247 :
3248 18 : xmldata_root_element_start(result, xmlsn, xmlschema, targetns, top_level);
3249 18 : appendStringInfoChar(result, '\n');
3250 :
3251 18 : if (xmlschema)
3252 6 : appendStringInfo(result, "%s\n\n", xmlschema);
3253 :
3254 18 : SPI_connect();
3255 :
3256 18 : relid_list = schema_get_xml_visible_tables(nspid);
3257 :
3258 54 : foreach(cell, relid_list)
3259 : {
3260 36 : Oid relid = lfirst_oid(cell);
3261 : StringInfo subres;
3262 :
3263 36 : subres = table_to_xml_internal(relid, NULL, nulls, tableforest,
3264 : targetns, false);
3265 :
3266 36 : appendBinaryStringInfo(result, subres->data, subres->len);
3267 36 : appendStringInfoChar(result, '\n');
3268 : }
3269 :
3270 18 : SPI_finish();
3271 :
3272 18 : xmldata_root_element_end(result, xmlsn);
3273 :
3274 18 : return result;
3275 : }
3276 :
3277 :
3278 : Datum
3279 12 : schema_to_xml(PG_FUNCTION_ARGS)
3280 : {
3281 12 : Name name = PG_GETARG_NAME(0);
3282 12 : bool nulls = PG_GETARG_BOOL(1);
3283 12 : bool tableforest = PG_GETARG_BOOL(2);
3284 12 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3285 :
3286 : char *schemaname;
3287 : Oid nspid;
3288 :
3289 12 : schemaname = NameStr(*name);
3290 12 : nspid = LookupExplicitNamespace(schemaname, false);
3291 :
3292 12 : PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid, NULL,
3293 : nulls, tableforest, targetns, true)));
3294 : }
3295 :
3296 :
3297 : /*
3298 : * Write the start element of the root element of an XML Schema mapping.
3299 : */
3300 : static void
3301 96 : xsd_schema_element_start(StringInfo result, const char *targetns)
3302 : {
3303 96 : appendStringInfoString(result,
3304 : "<xsd:schema\n"
3305 : " xmlns:xsd=\"" NAMESPACE_XSD "\"");
3306 96 : if (strlen(targetns) > 0)
3307 18 : appendStringInfo(result,
3308 : "\n"
3309 : " targetNamespace=\"%s\"\n"
3310 : " elementFormDefault=\"qualified\"",
3311 : targetns);
3312 96 : appendStringInfoString(result,
3313 : ">\n\n");
3314 96 : }
3315 :
3316 :
3317 : static void
3318 96 : xsd_schema_element_end(StringInfo result)
3319 : {
3320 96 : appendStringInfoString(result, "</xsd:schema>");
3321 96 : }
3322 :
3323 :
3324 : static StringInfo
3325 18 : schema_to_xmlschema_internal(const char *schemaname, bool nulls,
3326 : bool tableforest, const char *targetns)
3327 : {
3328 : Oid nspid;
3329 : List *relid_list;
3330 : List *tupdesc_list;
3331 : ListCell *cell;
3332 : StringInfo result;
3333 :
3334 18 : result = makeStringInfo();
3335 :
3336 18 : nspid = LookupExplicitNamespace(schemaname, false);
3337 :
3338 18 : xsd_schema_element_start(result, targetns);
3339 :
3340 18 : SPI_connect();
3341 :
3342 18 : relid_list = schema_get_xml_visible_tables(nspid);
3343 :
3344 18 : tupdesc_list = NIL;
3345 54 : foreach(cell, relid_list)
3346 : {
3347 : Relation rel;
3348 :
3349 36 : rel = table_open(lfirst_oid(cell), AccessShareLock);
3350 36 : tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
3351 36 : table_close(rel, NoLock);
3352 : }
3353 :
3354 18 : appendStringInfoString(result,
3355 : map_sql_typecoll_to_xmlschema_types(tupdesc_list));
3356 :
3357 18 : appendStringInfoString(result,
3358 : map_sql_schema_to_xmlschema_types(nspid, relid_list,
3359 : nulls, tableforest, targetns));
3360 :
3361 18 : xsd_schema_element_end(result);
3362 :
3363 18 : SPI_finish();
3364 :
3365 18 : return result;
3366 : }
3367 :
3368 :
3369 : Datum
3370 12 : schema_to_xmlschema(PG_FUNCTION_ARGS)
3371 : {
3372 12 : Name name = PG_GETARG_NAME(0);
3373 12 : bool nulls = PG_GETARG_BOOL(1);
3374 12 : bool tableforest = PG_GETARG_BOOL(2);
3375 12 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3376 :
3377 12 : PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xmlschema_internal(NameStr(*name),
3378 : nulls, tableforest, targetns)));
3379 : }
3380 :
3381 :
3382 : Datum
3383 6 : schema_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3384 : {
3385 6 : Name name = PG_GETARG_NAME(0);
3386 6 : bool nulls = PG_GETARG_BOOL(1);
3387 6 : bool tableforest = PG_GETARG_BOOL(2);
3388 6 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3389 : char *schemaname;
3390 : Oid nspid;
3391 : StringInfo xmlschema;
3392 :
3393 6 : schemaname = NameStr(*name);
3394 6 : nspid = LookupExplicitNamespace(schemaname, false);
3395 :
3396 6 : xmlschema = schema_to_xmlschema_internal(schemaname, nulls,
3397 : tableforest, targetns);
3398 :
3399 6 : PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid,
3400 : xmlschema->data, nulls,
3401 : tableforest, targetns, true)));
3402 : }
3403 :
3404 :
3405 : /*
3406 : * Map SQL database to XML and/or XML Schema document; see SQL/XML:2008
3407 : * sections 9.16, 9.17.
3408 : */
3409 :
3410 : static StringInfo
3411 0 : database_to_xml_internal(const char *xmlschema, bool nulls,
3412 : bool tableforest, const char *targetns)
3413 : {
3414 : StringInfo result;
3415 : List *nspid_list;
3416 : ListCell *cell;
3417 : char *xmlcn;
3418 :
3419 0 : xmlcn = map_sql_identifier_to_xml_name(get_database_name(MyDatabaseId),
3420 : true, false);
3421 0 : result = makeStringInfo();
3422 :
3423 0 : xmldata_root_element_start(result, xmlcn, xmlschema, targetns, true);
3424 0 : appendStringInfoChar(result, '\n');
3425 :
3426 0 : if (xmlschema)
3427 0 : appendStringInfo(result, "%s\n\n", xmlschema);
3428 :
3429 0 : SPI_connect();
3430 :
3431 0 : nspid_list = database_get_xml_visible_schemas();
3432 :
3433 0 : foreach(cell, nspid_list)
3434 : {
3435 0 : Oid nspid = lfirst_oid(cell);
3436 : StringInfo subres;
3437 :
3438 0 : subres = schema_to_xml_internal(nspid, NULL, nulls,
3439 : tableforest, targetns, false);
3440 :
3441 0 : appendBinaryStringInfo(result, subres->data, subres->len);
3442 0 : appendStringInfoChar(result, '\n');
3443 : }
3444 :
3445 0 : SPI_finish();
3446 :
3447 0 : xmldata_root_element_end(result, xmlcn);
3448 :
3449 0 : return result;
3450 : }
3451 :
3452 :
3453 : Datum
3454 0 : database_to_xml(PG_FUNCTION_ARGS)
3455 : {
3456 0 : bool nulls = PG_GETARG_BOOL(0);
3457 0 : bool tableforest = PG_GETARG_BOOL(1);
3458 0 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3459 :
3460 0 : PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(NULL, nulls,
3461 : tableforest, targetns)));
3462 : }
3463 :
3464 :
3465 : static StringInfo
3466 0 : database_to_xmlschema_internal(bool nulls, bool tableforest,
3467 : const char *targetns)
3468 : {
3469 : List *relid_list;
3470 : List *nspid_list;
3471 : List *tupdesc_list;
3472 : ListCell *cell;
3473 : StringInfo result;
3474 :
3475 0 : result = makeStringInfo();
3476 :
3477 0 : xsd_schema_element_start(result, targetns);
3478 :
3479 0 : SPI_connect();
3480 :
3481 0 : relid_list = database_get_xml_visible_tables();
3482 0 : nspid_list = database_get_xml_visible_schemas();
3483 :
3484 0 : tupdesc_list = NIL;
3485 0 : foreach(cell, relid_list)
3486 : {
3487 : Relation rel;
3488 :
3489 0 : rel = table_open(lfirst_oid(cell), AccessShareLock);
3490 0 : tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
3491 0 : table_close(rel, NoLock);
3492 : }
3493 :
3494 0 : appendStringInfoString(result,
3495 : map_sql_typecoll_to_xmlschema_types(tupdesc_list));
3496 :
3497 0 : appendStringInfoString(result,
3498 : map_sql_catalog_to_xmlschema_types(nspid_list, nulls, tableforest, targetns));
3499 :
3500 0 : xsd_schema_element_end(result);
3501 :
3502 0 : SPI_finish();
3503 :
3504 0 : return result;
3505 : }
3506 :
3507 :
3508 : Datum
3509 0 : database_to_xmlschema(PG_FUNCTION_ARGS)
3510 : {
3511 0 : bool nulls = PG_GETARG_BOOL(0);
3512 0 : bool tableforest = PG_GETARG_BOOL(1);
3513 0 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3514 :
3515 0 : PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xmlschema_internal(nulls,
3516 : tableforest, targetns)));
3517 : }
3518 :
3519 :
3520 : Datum
3521 0 : database_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3522 : {
3523 0 : bool nulls = PG_GETARG_BOOL(0);
3524 0 : bool tableforest = PG_GETARG_BOOL(1);
3525 0 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3526 : StringInfo xmlschema;
3527 :
3528 0 : xmlschema = database_to_xmlschema_internal(nulls, tableforest, targetns);
3529 :
3530 0 : PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(xmlschema->data,
3531 : nulls, tableforest, targetns)));
3532 : }
3533 :
3534 :
3535 : /*
3536 : * Map a multi-part SQL name to an XML name; see SQL/XML:2008 section
3537 : * 9.2.
3538 : */
3539 : static char *
3540 384 : map_multipart_sql_identifier_to_xml_name(const char *a, const char *b, const char *c, const char *d)
3541 : {
3542 : StringInfoData result;
3543 :
3544 384 : initStringInfo(&result);
3545 :
3546 384 : if (a)
3547 384 : appendStringInfoString(&result,
3548 384 : map_sql_identifier_to_xml_name(a, true, true));
3549 384 : if (b)
3550 384 : appendStringInfo(&result, ".%s",
3551 : map_sql_identifier_to_xml_name(b, true, true));
3552 384 : if (c)
3553 384 : appendStringInfo(&result, ".%s",
3554 : map_sql_identifier_to_xml_name(c, true, true));
3555 384 : if (d)
3556 366 : appendStringInfo(&result, ".%s",
3557 : map_sql_identifier_to_xml_name(d, true, true));
3558 :
3559 384 : return result.data;
3560 : }
3561 :
3562 :
3563 : /*
3564 : * Map an SQL table to an XML Schema document; see SQL/XML:2008
3565 : * section 9.11.
3566 : *
3567 : * Map an SQL table to XML Schema data types; see SQL/XML:2008 section
3568 : * 9.9.
3569 : */
3570 : static const char *
3571 78 : map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid, bool nulls,
3572 : bool tableforest, const char *targetns)
3573 : {
3574 : int i;
3575 : char *xmltn;
3576 : char *tabletypename;
3577 : char *rowtypename;
3578 : StringInfoData result;
3579 :
3580 78 : initStringInfo(&result);
3581 :
3582 78 : if (OidIsValid(relid))
3583 : {
3584 : HeapTuple tuple;
3585 : Form_pg_class reltuple;
3586 :
3587 54 : tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
3588 54 : if (!HeapTupleIsValid(tuple))
3589 0 : elog(ERROR, "cache lookup failed for relation %u", relid);
3590 54 : reltuple = (Form_pg_class) GETSTRUCT(tuple);
3591 :
3592 54 : xmltn = map_sql_identifier_to_xml_name(NameStr(reltuple->relname),
3593 : true, false);
3594 :
3595 54 : tabletypename = map_multipart_sql_identifier_to_xml_name("TableType",
3596 54 : get_database_name(MyDatabaseId),
3597 54 : get_namespace_name(reltuple->relnamespace),
3598 54 : NameStr(reltuple->relname));
3599 :
3600 54 : rowtypename = map_multipart_sql_identifier_to_xml_name("RowType",
3601 54 : get_database_name(MyDatabaseId),
3602 54 : get_namespace_name(reltuple->relnamespace),
3603 54 : NameStr(reltuple->relname));
3604 :
3605 54 : ReleaseSysCache(tuple);
3606 : }
3607 : else
3608 : {
3609 24 : if (tableforest)
3610 12 : xmltn = "row";
3611 : else
3612 12 : xmltn = "table";
3613 :
3614 24 : tabletypename = "TableType";
3615 24 : rowtypename = "RowType";
3616 : }
3617 :
3618 78 : xsd_schema_element_start(&result, targetns);
3619 :
3620 78 : appendStringInfoString(&result,
3621 78 : map_sql_typecoll_to_xmlschema_types(list_make1(tupdesc)));
3622 :
3623 78 : appendStringInfo(&result,
3624 : "<xsd:complexType name=\"%s\">\n"
3625 : " <xsd:sequence>\n",
3626 : rowtypename);
3627 :
3628 324 : for (i = 0; i < tupdesc->natts; i++)
3629 : {
3630 246 : Form_pg_attribute att = TupleDescAttr(tupdesc, i);
3631 :
3632 246 : if (att->attisdropped)
3633 6 : continue;
3634 480 : appendStringInfo(&result,
3635 : " <xsd:element name=\"%s\" type=\"%s\"%s></xsd:element>\n",
3636 240 : map_sql_identifier_to_xml_name(NameStr(att->attname),
3637 : true, false),
3638 : map_sql_type_to_xml_name(att->atttypid, -1),
3639 : nulls ? " nillable=\"true\"" : " minOccurs=\"0\"");
3640 : }
3641 :
3642 78 : appendStringInfoString(&result,
3643 : " </xsd:sequence>\n"
3644 : "</xsd:complexType>\n\n");
3645 :
3646 78 : if (!tableforest)
3647 : {
3648 42 : appendStringInfo(&result,
3649 : "<xsd:complexType name=\"%s\">\n"
3650 : " <xsd:sequence>\n"
3651 : " <xsd:element name=\"row\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n"
3652 : " </xsd:sequence>\n"
3653 : "</xsd:complexType>\n\n",
3654 : tabletypename, rowtypename);
3655 :
3656 42 : appendStringInfo(&result,
3657 : "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3658 : xmltn, tabletypename);
3659 : }
3660 : else
3661 36 : appendStringInfo(&result,
3662 : "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3663 : xmltn, rowtypename);
3664 :
3665 78 : xsd_schema_element_end(&result);
3666 :
3667 78 : return result.data;
3668 : }
3669 :
3670 :
3671 : /*
3672 : * Map an SQL schema to XML Schema data types; see SQL/XML:2008
3673 : * section 9.12.
3674 : */
3675 : static const char *
3676 18 : map_sql_schema_to_xmlschema_types(Oid nspid, List *relid_list, bool nulls,
3677 : bool tableforest, const char *targetns)
3678 : {
3679 : char *dbname;
3680 : char *nspname;
3681 : char *xmlsn;
3682 : char *schematypename;
3683 : StringInfoData result;
3684 : ListCell *cell;
3685 :
3686 18 : dbname = get_database_name(MyDatabaseId);
3687 18 : nspname = get_namespace_name(nspid);
3688 :
3689 18 : initStringInfo(&result);
3690 :
3691 18 : xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
3692 :
3693 18 : schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3694 : dbname,
3695 : nspname,
3696 : NULL);
3697 :
3698 18 : appendStringInfo(&result,
3699 : "<xsd:complexType name=\"%s\">\n", schematypename);
3700 18 : if (!tableforest)
3701 6 : appendStringInfoString(&result,
3702 : " <xsd:all>\n");
3703 : else
3704 12 : appendStringInfoString(&result,
3705 : " <xsd:sequence>\n");
3706 :
3707 54 : foreach(cell, relid_list)
3708 : {
3709 36 : Oid relid = lfirst_oid(cell);
3710 36 : char *relname = get_rel_name(relid);
3711 36 : char *xmltn = map_sql_identifier_to_xml_name(relname, true, false);
3712 36 : char *tabletypename = map_multipart_sql_identifier_to_xml_name(tableforest ? "RowType" : "TableType",
3713 : dbname,
3714 : nspname,
3715 : relname);
3716 :
3717 36 : if (!tableforest)
3718 12 : appendStringInfo(&result,
3719 : " <xsd:element name=\"%s\" type=\"%s\"/>\n",
3720 : xmltn, tabletypename);
3721 : else
3722 24 : appendStringInfo(&result,
3723 : " <xsd:element name=\"%s\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n",
3724 : xmltn, tabletypename);
3725 : }
3726 :
3727 18 : if (!tableforest)
3728 6 : appendStringInfoString(&result,
3729 : " </xsd:all>\n");
3730 : else
3731 12 : appendStringInfoString(&result,
3732 : " </xsd:sequence>\n");
3733 18 : appendStringInfoString(&result,
3734 : "</xsd:complexType>\n\n");
3735 :
3736 18 : appendStringInfo(&result,
3737 : "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3738 : xmlsn, schematypename);
3739 :
3740 18 : return result.data;
3741 : }
3742 :
3743 :
3744 : /*
3745 : * Map an SQL catalog to XML Schema data types; see SQL/XML:2008
3746 : * section 9.15.
3747 : */
3748 : static const char *
3749 0 : map_sql_catalog_to_xmlschema_types(List *nspid_list, bool nulls,
3750 : bool tableforest, const char *targetns)
3751 : {
3752 : char *dbname;
3753 : char *xmlcn;
3754 : char *catalogtypename;
3755 : StringInfoData result;
3756 : ListCell *cell;
3757 :
3758 0 : dbname = get_database_name(MyDatabaseId);
3759 :
3760 0 : initStringInfo(&result);
3761 :
3762 0 : xmlcn = map_sql_identifier_to_xml_name(dbname, true, false);
3763 :
3764 0 : catalogtypename = map_multipart_sql_identifier_to_xml_name("CatalogType",
3765 : dbname,
3766 : NULL,
3767 : NULL);
3768 :
3769 0 : appendStringInfo(&result,
3770 : "<xsd:complexType name=\"%s\">\n", catalogtypename);
3771 0 : appendStringInfoString(&result,
3772 : " <xsd:all>\n");
3773 :
3774 0 : foreach(cell, nspid_list)
3775 : {
3776 0 : Oid nspid = lfirst_oid(cell);
3777 0 : char *nspname = get_namespace_name(nspid);
3778 0 : char *xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
3779 0 : char *schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3780 : dbname,
3781 : nspname,
3782 : NULL);
3783 :
3784 0 : appendStringInfo(&result,
3785 : " <xsd:element name=\"%s\" type=\"%s\"/>\n",
3786 : xmlsn, schematypename);
3787 : }
3788 :
3789 0 : appendStringInfoString(&result,
3790 : " </xsd:all>\n");
3791 0 : appendStringInfoString(&result,
3792 : "</xsd:complexType>\n\n");
3793 :
3794 0 : appendStringInfo(&result,
3795 : "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3796 : xmlcn, catalogtypename);
3797 :
3798 0 : return result.data;
3799 : }
3800 :
3801 :
3802 : /*
3803 : * Map an SQL data type to an XML name; see SQL/XML:2008 section 9.4.
3804 : */
3805 : static const char *
3806 810 : map_sql_type_to_xml_name(Oid typeoid, int typmod)
3807 : {
3808 : StringInfoData result;
3809 :
3810 810 : initStringInfo(&result);
3811 :
3812 810 : switch (typeoid)
3813 : {
3814 30 : case BPCHAROID:
3815 30 : if (typmod == -1)
3816 30 : appendStringInfoString(&result, "CHAR");
3817 : else
3818 0 : appendStringInfo(&result, "CHAR_%d", typmod - VARHDRSZ);
3819 30 : break;
3820 54 : case VARCHAROID:
3821 54 : if (typmod == -1)
3822 54 : appendStringInfoString(&result, "VARCHAR");
3823 : else
3824 0 : appendStringInfo(&result, "VARCHAR_%d", typmod - VARHDRSZ);
3825 54 : break;
3826 30 : case NUMERICOID:
3827 30 : if (typmod == -1)
3828 30 : appendStringInfoString(&result, "NUMERIC");
3829 : else
3830 0 : appendStringInfo(&result, "NUMERIC_%d_%d",
3831 0 : ((typmod - VARHDRSZ) >> 16) & 0xffff,
3832 0 : (typmod - VARHDRSZ) & 0xffff);
3833 30 : break;
3834 174 : case INT4OID:
3835 174 : appendStringInfoString(&result, "INTEGER");
3836 174 : break;
3837 30 : case INT2OID:
3838 30 : appendStringInfoString(&result, "SMALLINT");
3839 30 : break;
3840 30 : case INT8OID:
3841 30 : appendStringInfoString(&result, "BIGINT");
3842 30 : break;
3843 30 : case FLOAT4OID:
3844 30 : appendStringInfoString(&result, "REAL");
3845 30 : break;
3846 0 : case FLOAT8OID:
3847 0 : appendStringInfoString(&result, "DOUBLE");
3848 0 : break;
3849 30 : case BOOLOID:
3850 30 : appendStringInfoString(&result, "BOOLEAN");
3851 30 : break;
3852 30 : case TIMEOID:
3853 30 : if (typmod == -1)
3854 30 : appendStringInfoString(&result, "TIME");
3855 : else
3856 0 : appendStringInfo(&result, "TIME_%d", typmod);
3857 30 : break;
3858 30 : case TIMETZOID:
3859 30 : if (typmod == -1)
3860 30 : appendStringInfoString(&result, "TIME_WTZ");
3861 : else
3862 0 : appendStringInfo(&result, "TIME_WTZ_%d", typmod);
3863 30 : break;
3864 30 : case TIMESTAMPOID:
3865 30 : if (typmod == -1)
3866 30 : appendStringInfoString(&result, "TIMESTAMP");
3867 : else
3868 0 : appendStringInfo(&result, "TIMESTAMP_%d", typmod);
3869 30 : break;
3870 30 : case TIMESTAMPTZOID:
3871 30 : if (typmod == -1)
3872 30 : appendStringInfoString(&result, "TIMESTAMP_WTZ");
3873 : else
3874 0 : appendStringInfo(&result, "TIMESTAMP_WTZ_%d", typmod);
3875 30 : break;
3876 30 : case DATEOID:
3877 30 : appendStringInfoString(&result, "DATE");
3878 30 : break;
3879 30 : case XMLOID:
3880 30 : appendStringInfoString(&result, "XML");
3881 30 : break;
3882 222 : default:
3883 : {
3884 : HeapTuple tuple;
3885 : Form_pg_type typtuple;
3886 :
3887 222 : tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typeoid));
3888 222 : if (!HeapTupleIsValid(tuple))
3889 0 : elog(ERROR, "cache lookup failed for type %u", typeoid);
3890 222 : typtuple = (Form_pg_type) GETSTRUCT(tuple);
3891 :
3892 222 : appendStringInfoString(&result,
3893 222 : map_multipart_sql_identifier_to_xml_name((typtuple->typtype == TYPTYPE_DOMAIN) ? "Domain" : "UDT",
3894 222 : get_database_name(MyDatabaseId),
3895 222 : get_namespace_name(typtuple->typnamespace),
3896 222 : NameStr(typtuple->typname)));
3897 :
3898 222 : ReleaseSysCache(tuple);
3899 : }
3900 : }
3901 :
3902 810 : return result.data;
3903 : }
3904 :
3905 :
3906 : /*
3907 : * Map a collection of SQL data types to XML Schema data types; see
3908 : * SQL/XML:2008 section 9.7.
3909 : */
3910 : static const char *
3911 96 : map_sql_typecoll_to_xmlschema_types(List *tupdesc_list)
3912 : {
3913 96 : List *uniquetypes = NIL;
3914 : int i;
3915 : StringInfoData result;
3916 : ListCell *cell0;
3917 :
3918 : /* extract all column types used in the set of TupleDescs */
3919 210 : foreach(cell0, tupdesc_list)
3920 : {
3921 114 : TupleDesc tupdesc = (TupleDesc) lfirst(cell0);
3922 :
3923 702 : for (i = 0; i < tupdesc->natts; i++)
3924 : {
3925 588 : Form_pg_attribute att = TupleDescAttr(tupdesc, i);
3926 :
3927 588 : if (att->attisdropped)
3928 24 : continue;
3929 564 : uniquetypes = list_append_unique_oid(uniquetypes, att->atttypid);
3930 : }
3931 : }
3932 :
3933 : /* add base types of domains */
3934 642 : foreach(cell0, uniquetypes)
3935 : {
3936 546 : Oid typid = lfirst_oid(cell0);
3937 546 : Oid basetypid = getBaseType(typid);
3938 :
3939 546 : if (basetypid != typid)
3940 24 : uniquetypes = list_append_unique_oid(uniquetypes, basetypid);
3941 : }
3942 :
3943 : /* Convert to textual form */
3944 96 : initStringInfo(&result);
3945 :
3946 642 : foreach(cell0, uniquetypes)
3947 : {
3948 546 : appendStringInfo(&result, "%s\n",
3949 : map_sql_type_to_xmlschema_type(lfirst_oid(cell0),
3950 : -1));
3951 : }
3952 :
3953 96 : return result.data;
3954 : }
3955 :
3956 :
3957 : /*
3958 : * Map an SQL data type to a named XML Schema data type; see
3959 : * SQL/XML:2008 sections 9.5 and 9.6.
3960 : *
3961 : * (The distinction between 9.5 and 9.6 is basically that 9.6 adds
3962 : * a name attribute, which this function does. The name-less version
3963 : * 9.5 doesn't appear to be required anywhere.)
3964 : */
3965 : static const char *
3966 546 : map_sql_type_to_xmlschema_type(Oid typeoid, int typmod)
3967 : {
3968 : StringInfoData result;
3969 546 : const char *typename = map_sql_type_to_xml_name(typeoid, typmod);
3970 :
3971 546 : initStringInfo(&result);
3972 :
3973 546 : if (typeoid == XMLOID)
3974 : {
3975 24 : appendStringInfoString(&result,
3976 : "<xsd:complexType mixed=\"true\">\n"
3977 : " <xsd:sequence>\n"
3978 : " <xsd:any name=\"element\" minOccurs=\"0\" maxOccurs=\"unbounded\" processContents=\"skip\"/>\n"
3979 : " </xsd:sequence>\n"
3980 : "</xsd:complexType>\n");
3981 : }
3982 : else
3983 : {
3984 522 : appendStringInfo(&result,
3985 : "<xsd:simpleType name=\"%s\">\n", typename);
3986 :
3987 522 : switch (typeoid)
3988 : {
3989 138 : case BPCHAROID:
3990 : case VARCHAROID:
3991 : case TEXTOID:
3992 138 : appendStringInfoString(&result,
3993 : " <xsd:restriction base=\"xsd:string\">\n");
3994 138 : if (typmod != -1)
3995 0 : appendStringInfo(&result,
3996 : " <xsd:maxLength value=\"%d\"/>\n",
3997 : typmod - VARHDRSZ);
3998 138 : appendStringInfoString(&result, " </xsd:restriction>\n");
3999 138 : break;
4000 :
4001 24 : case BYTEAOID:
4002 24 : appendStringInfo(&result,
4003 : " <xsd:restriction base=\"xsd:%s\">\n"
4004 : " </xsd:restriction>\n",
4005 24 : xmlbinary == XMLBINARY_BASE64 ? "base64Binary" : "hexBinary");
4006 24 : break;
4007 :
4008 24 : case NUMERICOID:
4009 24 : if (typmod != -1)
4010 0 : appendStringInfo(&result,
4011 : " <xsd:restriction base=\"xsd:decimal\">\n"
4012 : " <xsd:totalDigits value=\"%d\"/>\n"
4013 : " <xsd:fractionDigits value=\"%d\"/>\n"
4014 : " </xsd:restriction>\n",
4015 0 : ((typmod - VARHDRSZ) >> 16) & 0xffff,
4016 0 : (typmod - VARHDRSZ) & 0xffff);
4017 24 : break;
4018 :
4019 24 : case INT2OID:
4020 24 : appendStringInfo(&result,
4021 : " <xsd:restriction base=\"xsd:short\">\n"
4022 : " <xsd:maxInclusive value=\"%d\"/>\n"
4023 : " <xsd:minInclusive value=\"%d\"/>\n"
4024 : " </xsd:restriction>\n",
4025 : SHRT_MAX, SHRT_MIN);
4026 24 : break;
4027 :
4028 96 : case INT4OID:
4029 96 : appendStringInfo(&result,
4030 : " <xsd:restriction base=\"xsd:int\">\n"
4031 : " <xsd:maxInclusive value=\"%d\"/>\n"
4032 : " <xsd:minInclusive value=\"%d\"/>\n"
4033 : " </xsd:restriction>\n",
4034 : INT_MAX, INT_MIN);
4035 96 : break;
4036 :
4037 24 : case INT8OID:
4038 24 : appendStringInfo(&result,
4039 : " <xsd:restriction base=\"xsd:long\">\n"
4040 : " <xsd:maxInclusive value=\"" INT64_FORMAT "\"/>\n"
4041 : " <xsd:minInclusive value=\"" INT64_FORMAT "\"/>\n"
4042 : " </xsd:restriction>\n",
4043 : PG_INT64_MAX,
4044 : PG_INT64_MIN);
4045 24 : break;
4046 :
4047 24 : case FLOAT4OID:
4048 24 : appendStringInfoString(&result,
4049 : " <xsd:restriction base=\"xsd:float\"></xsd:restriction>\n");
4050 24 : break;
4051 :
4052 0 : case FLOAT8OID:
4053 0 : appendStringInfoString(&result,
4054 : " <xsd:restriction base=\"xsd:double\"></xsd:restriction>\n");
4055 0 : break;
4056 :
4057 24 : case BOOLOID:
4058 24 : appendStringInfoString(&result,
4059 : " <xsd:restriction base=\"xsd:boolean\"></xsd:restriction>\n");
4060 24 : break;
4061 :
4062 48 : case TIMEOID:
4063 : case TIMETZOID:
4064 : {
4065 48 : const char *tz = (typeoid == TIMETZOID ? "(\\+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
4066 :
4067 48 : if (typmod == -1)
4068 48 : appendStringInfo(&result,
4069 : " <xsd:restriction base=\"xsd:time\">\n"
4070 : " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
4071 : " </xsd:restriction>\n", tz);
4072 0 : else if (typmod == 0)
4073 0 : appendStringInfo(&result,
4074 : " <xsd:restriction base=\"xsd:time\">\n"
4075 : " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
4076 : " </xsd:restriction>\n", tz);
4077 : else
4078 0 : appendStringInfo(&result,
4079 : " <xsd:restriction base=\"xsd:time\">\n"
4080 : " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
4081 : " </xsd:restriction>\n", typmod - VARHDRSZ, tz);
4082 48 : break;
4083 : }
4084 :
4085 48 : case TIMESTAMPOID:
4086 : case TIMESTAMPTZOID:
4087 : {
4088 48 : const char *tz = (typeoid == TIMESTAMPTZOID ? "(\\+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
4089 :
4090 48 : if (typmod == -1)
4091 48 : appendStringInfo(&result,
4092 : " <xsd:restriction base=\"xsd:dateTime\">\n"
4093 : " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
4094 : " </xsd:restriction>\n", tz);
4095 0 : else if (typmod == 0)
4096 0 : appendStringInfo(&result,
4097 : " <xsd:restriction base=\"xsd:dateTime\">\n"
4098 : " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
4099 : " </xsd:restriction>\n", tz);
4100 : else
4101 0 : appendStringInfo(&result,
4102 : " <xsd:restriction base=\"xsd:dateTime\">\n"
4103 : " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
4104 : " </xsd:restriction>\n", typmod - VARHDRSZ, tz);
4105 48 : break;
4106 : }
4107 :
4108 24 : case DATEOID:
4109 24 : appendStringInfoString(&result,
4110 : " <xsd:restriction base=\"xsd:date\">\n"
4111 : " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}\"/>\n"
4112 : " </xsd:restriction>\n");
4113 24 : break;
4114 :
4115 24 : default:
4116 24 : if (get_typtype(typeoid) == TYPTYPE_DOMAIN)
4117 : {
4118 : Oid base_typeoid;
4119 24 : int32 base_typmod = -1;
4120 :
4121 24 : base_typeoid = getBaseTypeAndTypmod(typeoid, &base_typmod);
4122 :
4123 24 : appendStringInfo(&result,
4124 : " <xsd:restriction base=\"%s\"/>\n",
4125 : map_sql_type_to_xml_name(base_typeoid, base_typmod));
4126 : }
4127 24 : break;
4128 : }
4129 522 : appendStringInfoString(&result, "</xsd:simpleType>\n");
4130 : }
4131 :
4132 546 : return result.data;
4133 : }
4134 :
4135 :
4136 : /*
4137 : * Map an SQL row to an XML element, taking the row from the active
4138 : * SPI cursor. See also SQL/XML:2008 section 9.10.
4139 : */
4140 : static void
4141 312 : SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result, char *tablename,
4142 : bool nulls, bool tableforest,
4143 : const char *targetns, bool top_level)
4144 : {
4145 : int i;
4146 : char *xmltn;
4147 :
4148 312 : if (tablename)
4149 228 : xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
4150 : else
4151 : {
4152 84 : if (tableforest)
4153 36 : xmltn = "row";
4154 : else
4155 48 : xmltn = "table";
4156 : }
4157 :
4158 312 : if (tableforest)
4159 162 : xmldata_root_element_start(result, xmltn, NULL, targetns, top_level);
4160 : else
4161 150 : appendStringInfoString(result, "<row>\n");
4162 :
4163 1272 : for (i = 1; i <= SPI_tuptable->tupdesc->natts; i++)
4164 : {
4165 : char *colname;
4166 : Datum colval;
4167 : bool isnull;
4168 :
4169 960 : colname = map_sql_identifier_to_xml_name(SPI_fname(SPI_tuptable->tupdesc, i),
4170 : true, false);
4171 960 : colval = SPI_getbinval(SPI_tuptable->vals[rownum],
4172 960 : SPI_tuptable->tupdesc,
4173 : i,
4174 : &isnull);
4175 960 : if (isnull)
4176 : {
4177 114 : if (nulls)
4178 60 : appendStringInfo(result, " <%s xsi:nil=\"true\"/>\n", colname);
4179 : }
4180 : else
4181 846 : appendStringInfo(result, " <%s>%s</%s>\n",
4182 : colname,
4183 : map_sql_value_to_xml_value(colval,
4184 846 : SPI_gettypeid(SPI_tuptable->tupdesc, i), true),
4185 : colname);
4186 : }
4187 :
4188 312 : if (tableforest)
4189 : {
4190 162 : xmldata_root_element_end(result, xmltn);
4191 162 : appendStringInfoChar(result, '\n');
4192 : }
4193 : else
4194 150 : appendStringInfoString(result, "</row>\n\n");
4195 312 : }
4196 :
4197 :
4198 : /*
4199 : * XPath related functions
4200 : */
4201 :
4202 : #ifdef USE_LIBXML
4203 :
4204 : /*
4205 : * Convert XML node to text.
4206 : *
4207 : * For attribute and text nodes, return the escaped text. For anything else,
4208 : * dump the whole subtree.
4209 : */
4210 : static text *
4211 192 : xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt)
4212 : {
4213 192 : xmltype *result = NULL;
4214 :
4215 192 : if (cur->type != XML_ATTRIBUTE_NODE && cur->type != XML_TEXT_NODE)
4216 162 : {
4217 162 : void (*volatile nodefree) (xmlNodePtr) = NULL;
4218 162 : volatile xmlBufferPtr buf = NULL;
4219 162 : volatile xmlNodePtr cur_copy = NULL;
4220 :
4221 162 : PG_TRY();
4222 : {
4223 : int bytes;
4224 :
4225 162 : buf = xmlBufferCreate();
4226 162 : if (buf == NULL || xmlerrcxt->err_occurred)
4227 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4228 : "could not allocate xmlBuffer");
4229 :
4230 : /*
4231 : * Produce a dump of the node that we can serialize. xmlNodeDump
4232 : * does that, but the result of that function won't contain
4233 : * namespace definitions from ancestor nodes, so we first do a
4234 : * xmlCopyNode() which duplicates the node along with its required
4235 : * namespace definitions.
4236 : *
4237 : * Some old libxml2 versions such as 2.7.6 produce partially
4238 : * broken XML_DOCUMENT_NODE nodes (unset content field) when
4239 : * copying them. xmlNodeDump of such a node works fine, but
4240 : * xmlFreeNode crashes; set us up to call xmlFreeDoc instead.
4241 : */
4242 162 : cur_copy = xmlCopyNode(cur, 1);
4243 162 : if (cur_copy == NULL || xmlerrcxt->err_occurred)
4244 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4245 : "could not copy node");
4246 324 : nodefree = (cur_copy->type == XML_DOCUMENT_NODE) ?
4247 162 : (void (*) (xmlNodePtr)) xmlFreeDoc : xmlFreeNode;
4248 :
4249 162 : bytes = xmlNodeDump(buf, NULL, cur_copy, 0, 0);
4250 162 : if (bytes == -1 || xmlerrcxt->err_occurred)
4251 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4252 : "could not dump node");
4253 :
4254 162 : result = xmlBuffer_to_xmltype(buf);
4255 : }
4256 0 : PG_FINALLY();
4257 : {
4258 162 : if (nodefree)
4259 162 : nodefree(cur_copy);
4260 162 : if (buf)
4261 162 : xmlBufferFree(buf);
4262 : }
4263 162 : PG_END_TRY();
4264 : }
4265 : else
4266 : {
4267 30 : volatile xmlChar *str = NULL;
4268 :
4269 30 : PG_TRY();
4270 : {
4271 : char *escaped;
4272 :
4273 30 : str = xmlXPathCastNodeToString(cur);
4274 30 : if (str == NULL || xmlerrcxt->err_occurred)
4275 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4276 : "could not allocate xmlChar");
4277 :
4278 : /* Here we rely on XML having the same representation as TEXT */
4279 30 : escaped = escape_xml((char *) str);
4280 :
4281 30 : result = (xmltype *) cstring_to_text(escaped);
4282 30 : pfree(escaped);
4283 : }
4284 0 : PG_FINALLY();
4285 : {
4286 30 : if (str)
4287 30 : xmlFree((xmlChar *) str);
4288 : }
4289 30 : PG_END_TRY();
4290 : }
4291 :
4292 192 : return result;
4293 : }
4294 :
4295 : /*
4296 : * Convert an XML XPath object (the result of evaluating an XPath expression)
4297 : * to an array of xml values, which are appended to astate. The function
4298 : * result value is the number of elements in the array.
4299 : *
4300 : * If "astate" is NULL then we don't generate the array value, but we still
4301 : * return the number of elements it would have had.
4302 : *
4303 : * Nodesets are converted to an array containing the nodes' textual
4304 : * representations. Primitive values (float, double, string) are converted
4305 : * to a single-element array containing the value's string representation.
4306 : */
4307 : static int
4308 540 : xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
4309 : ArrayBuildState *astate,
4310 : PgXmlErrorContext *xmlerrcxt)
4311 : {
4312 540 : int result = 0;
4313 : Datum datum;
4314 : Oid datumtype;
4315 : char *result_str;
4316 :
4317 540 : switch (xpathobj->type)
4318 : {
4319 498 : case XPATH_NODESET:
4320 498 : if (xpathobj->nodesetval != NULL)
4321 : {
4322 354 : result = xpathobj->nodesetval->nodeNr;
4323 354 : if (astate != NULL)
4324 : {
4325 : int i;
4326 :
4327 168 : for (i = 0; i < result; i++)
4328 : {
4329 90 : datum = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
4330 : xmlerrcxt));
4331 90 : (void) accumArrayResult(astate, datum, false,
4332 : XMLOID, CurrentMemoryContext);
4333 : }
4334 : }
4335 : }
4336 498 : return result;
4337 :
4338 12 : case XPATH_BOOLEAN:
4339 12 : if (astate == NULL)
4340 0 : return 1;
4341 12 : datum = BoolGetDatum(xpathobj->boolval);
4342 12 : datumtype = BOOLOID;
4343 12 : break;
4344 :
4345 18 : case XPATH_NUMBER:
4346 18 : if (astate == NULL)
4347 12 : return 1;
4348 6 : datum = Float8GetDatum(xpathobj->floatval);
4349 6 : datumtype = FLOAT8OID;
4350 6 : break;
4351 :
4352 12 : case XPATH_STRING:
4353 12 : if (astate == NULL)
4354 0 : return 1;
4355 12 : datum = CStringGetDatum((char *) xpathobj->stringval);
4356 12 : datumtype = CSTRINGOID;
4357 12 : break;
4358 :
4359 0 : default:
4360 0 : elog(ERROR, "xpath expression result type %d is unsupported",
4361 : xpathobj->type);
4362 : return 0; /* keep compiler quiet */
4363 : }
4364 :
4365 : /* Common code for scalar-value cases */
4366 30 : result_str = map_sql_value_to_xml_value(datum, datumtype, true);
4367 30 : datum = PointerGetDatum(cstring_to_xmltype(result_str));
4368 30 : (void) accumArrayResult(astate, datum, false,
4369 : XMLOID, CurrentMemoryContext);
4370 30 : return 1;
4371 : }
4372 :
4373 :
4374 : /*
4375 : * Common code for xpath() and xmlexists()
4376 : *
4377 : * Evaluate XPath expression and return number of nodes in res_nitems
4378 : * and array of XML values in astate. Either of those pointers can be
4379 : * NULL if the corresponding result isn't wanted.
4380 : *
4381 : * It is up to the user to ensure that the XML passed is in fact
4382 : * an XML document - XPath doesn't work easily on fragments without
4383 : * a context node being known.
4384 : */
4385 : static void
4386 558 : xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
4387 : int *res_nitems, ArrayBuildState *astate)
4388 : {
4389 : PgXmlErrorContext *xmlerrcxt;
4390 558 : volatile xmlParserCtxtPtr ctxt = NULL;
4391 558 : volatile xmlDocPtr doc = NULL;
4392 558 : volatile xmlXPathContextPtr xpathctx = NULL;
4393 558 : volatile xmlXPathCompExprPtr xpathcomp = NULL;
4394 558 : volatile xmlXPathObjectPtr xpathobj = NULL;
4395 : char *datastr;
4396 : int32 len;
4397 : int32 xpath_len;
4398 : xmlChar *string;
4399 : xmlChar *xpath_expr;
4400 558 : size_t xmldecl_len = 0;
4401 : int i;
4402 : int ndim;
4403 : Datum *ns_names_uris;
4404 : bool *ns_names_uris_nulls;
4405 : int ns_count;
4406 :
4407 : /*
4408 : * Namespace mappings are passed as text[]. If an empty array is passed
4409 : * (ndim = 0, "0-dimensional"), then there are no namespace mappings.
4410 : * Else, a 2-dimensional array with length of the second axis being equal
4411 : * to 2 should be passed, i.e., every subarray contains 2 elements, the
4412 : * first element defining the name, the second one the URI. Example:
4413 : * ARRAY[ARRAY['myns', 'http://example.com'], ARRAY['myns2',
4414 : * 'http://example2.com']].
4415 : */
4416 558 : ndim = namespaces ? ARR_NDIM(namespaces) : 0;
4417 558 : if (ndim != 0)
4418 : {
4419 : int *dims;
4420 :
4421 126 : dims = ARR_DIMS(namespaces);
4422 :
4423 126 : if (ndim != 2 || dims[1] != 2)
4424 0 : ereport(ERROR,
4425 : (errcode(ERRCODE_DATA_EXCEPTION),
4426 : errmsg("invalid array for XML namespace mapping"),
4427 : errdetail("The array must be two-dimensional with length of the second axis equal to 2.")));
4428 :
4429 : Assert(ARR_ELEMTYPE(namespaces) == TEXTOID);
4430 :
4431 126 : deconstruct_array_builtin(namespaces, TEXTOID,
4432 : &ns_names_uris, &ns_names_uris_nulls,
4433 : &ns_count);
4434 :
4435 : Assert((ns_count % 2) == 0); /* checked above */
4436 126 : ns_count /= 2; /* count pairs only */
4437 : }
4438 : else
4439 : {
4440 432 : ns_names_uris = NULL;
4441 432 : ns_names_uris_nulls = NULL;
4442 432 : ns_count = 0;
4443 : }
4444 :
4445 558 : datastr = VARDATA(data);
4446 558 : len = VARSIZE(data) - VARHDRSZ;
4447 558 : xpath_len = VARSIZE_ANY_EXHDR(xpath_expr_text);
4448 558 : if (xpath_len == 0)
4449 6 : ereport(ERROR,
4450 : (errcode(ERRCODE_INVALID_ARGUMENT_FOR_XQUERY),
4451 : errmsg("empty XPath expression")));
4452 :
4453 552 : string = pg_xmlCharStrndup(datastr, len);
4454 552 : xpath_expr = pg_xmlCharStrndup(VARDATA_ANY(xpath_expr_text), xpath_len);
4455 :
4456 : /*
4457 : * In a UTF8 database, skip any xml declaration, which might assert
4458 : * another encoding. Ignore parse_xml_decl() failure, letting
4459 : * xmlCtxtReadMemory() report parse errors. Documentation disclaims
4460 : * xpath() support for non-ASCII data in non-UTF8 databases, so leave
4461 : * those scenarios bug-compatible with historical behavior.
4462 : */
4463 552 : if (GetDatabaseEncoding() == PG_UTF8)
4464 552 : parse_xml_decl(string, &xmldecl_len, NULL, NULL, NULL);
4465 :
4466 552 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
4467 :
4468 552 : PG_TRY();
4469 : {
4470 552 : xmlInitParser();
4471 :
4472 : /*
4473 : * redundant XML parsing (two parsings for the same value during one
4474 : * command execution are possible)
4475 : */
4476 552 : ctxt = xmlNewParserCtxt();
4477 552 : if (ctxt == NULL || xmlerrcxt->err_occurred)
4478 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4479 : "could not allocate parser context");
4480 1104 : doc = xmlCtxtReadMemory(ctxt, (char *) string + xmldecl_len,
4481 552 : len - xmldecl_len, NULL, NULL, 0);
4482 552 : if (doc == NULL || xmlerrcxt->err_occurred)
4483 12 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
4484 : "could not parse XML document");
4485 540 : xpathctx = xmlXPathNewContext(doc);
4486 540 : if (xpathctx == NULL || xmlerrcxt->err_occurred)
4487 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4488 : "could not allocate XPath context");
4489 540 : xpathctx->node = (xmlNodePtr) doc;
4490 :
4491 : /* register namespaces, if any */
4492 540 : if (ns_count > 0)
4493 : {
4494 252 : for (i = 0; i < ns_count; i++)
4495 : {
4496 : char *ns_name;
4497 : char *ns_uri;
4498 :
4499 126 : if (ns_names_uris_nulls[i * 2] ||
4500 126 : ns_names_uris_nulls[i * 2 + 1])
4501 0 : ereport(ERROR,
4502 : (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
4503 : errmsg("neither namespace name nor URI may be null")));
4504 126 : ns_name = TextDatumGetCString(ns_names_uris[i * 2]);
4505 126 : ns_uri = TextDatumGetCString(ns_names_uris[i * 2 + 1]);
4506 126 : if (xmlXPathRegisterNs(xpathctx,
4507 : (xmlChar *) ns_name,
4508 : (xmlChar *) ns_uri) != 0)
4509 0 : ereport(ERROR, /* is this an internal error??? */
4510 : (errmsg("could not register XML namespace with name \"%s\" and URI \"%s\"",
4511 : ns_name, ns_uri)));
4512 : }
4513 : }
4514 :
4515 : /*
4516 : * Note: here and elsewhere, be careful to use xmlXPathCtxtCompile not
4517 : * xmlXPathCompile. In libxml2 2.13.3 and older, the latter function
4518 : * fails to defend itself against recursion-to-stack-overflow. See
4519 : * https://gitlab.gnome.org/GNOME/libxml2/-/issues/799
4520 : */
4521 540 : xpathcomp = xmlXPathCtxtCompile(xpathctx, xpath_expr);
4522 540 : if (xpathcomp == NULL || xmlerrcxt->err_occurred)
4523 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4524 : "invalid XPath expression");
4525 :
4526 : /*
4527 : * Version 2.6.27 introduces a function named
4528 : * xmlXPathCompiledEvalToBoolean, which would be enough for xmlexists,
4529 : * but we can derive the existence by whether any nodes are returned,
4530 : * thereby preventing a library version upgrade and keeping the code
4531 : * the same.
4532 : */
4533 540 : xpathobj = xmlXPathCompiledEval(xpathcomp, xpathctx);
4534 540 : if (xpathobj == NULL || xmlerrcxt->err_occurred)
4535 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4536 : "could not create XPath object");
4537 :
4538 : /*
4539 : * Extract the results as requested.
4540 : */
4541 540 : if (res_nitems != NULL)
4542 432 : *res_nitems = xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
4543 : else
4544 108 : (void) xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
4545 : }
4546 12 : PG_CATCH();
4547 : {
4548 12 : if (xpathobj)
4549 0 : xmlXPathFreeObject(xpathobj);
4550 12 : if (xpathcomp)
4551 0 : xmlXPathFreeCompExpr(xpathcomp);
4552 12 : if (xpathctx)
4553 0 : xmlXPathFreeContext(xpathctx);
4554 12 : if (doc)
4555 12 : xmlFreeDoc(doc);
4556 12 : if (ctxt)
4557 12 : xmlFreeParserCtxt(ctxt);
4558 :
4559 12 : pg_xml_done(xmlerrcxt, true);
4560 :
4561 12 : PG_RE_THROW();
4562 : }
4563 540 : PG_END_TRY();
4564 :
4565 540 : xmlXPathFreeObject(xpathobj);
4566 540 : xmlXPathFreeCompExpr(xpathcomp);
4567 540 : xmlXPathFreeContext(xpathctx);
4568 540 : xmlFreeDoc(doc);
4569 540 : xmlFreeParserCtxt(ctxt);
4570 :
4571 540 : pg_xml_done(xmlerrcxt, false);
4572 540 : }
4573 : #endif /* USE_LIBXML */
4574 :
4575 : /*
4576 : * Evaluate XPath expression and return array of XML values.
4577 : *
4578 : * As we have no support of XQuery sequences yet, this function seems
4579 : * to be the most useful one (array of XML functions plays a role of
4580 : * some kind of substitution for XQuery sequences).
4581 : */
4582 : Datum
4583 126 : xpath(PG_FUNCTION_ARGS)
4584 : {
4585 : #ifdef USE_LIBXML
4586 126 : text *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4587 126 : xmltype *data = PG_GETARG_XML_P(1);
4588 126 : ArrayType *namespaces = PG_GETARG_ARRAYTYPE_P(2);
4589 : ArrayBuildState *astate;
4590 :
4591 126 : astate = initArrayResult(XMLOID, CurrentMemoryContext, true);
4592 126 : xpath_internal(xpath_expr_text, data, namespaces,
4593 : NULL, astate);
4594 108 : PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
4595 : #else
4596 : NO_XML_SUPPORT();
4597 : return 0;
4598 : #endif
4599 : }
4600 :
4601 : /*
4602 : * Determines if the node specified by the supplied XPath exists
4603 : * in a given XML document, returning a boolean.
4604 : */
4605 : Datum
4606 198 : xmlexists(PG_FUNCTION_ARGS)
4607 : {
4608 : #ifdef USE_LIBXML
4609 198 : text *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4610 198 : xmltype *data = PG_GETARG_XML_P(1);
4611 : int res_nitems;
4612 :
4613 198 : xpath_internal(xpath_expr_text, data, NULL,
4614 : &res_nitems, NULL);
4615 :
4616 198 : PG_RETURN_BOOL(res_nitems > 0);
4617 : #else
4618 : NO_XML_SUPPORT();
4619 : return 0;
4620 : #endif
4621 : }
4622 :
4623 : /*
4624 : * Determines if the node specified by the supplied XPath exists
4625 : * in a given XML document, returning a boolean. Differs from
4626 : * xmlexists as it supports namespaces and is not defined in SQL/XML.
4627 : */
4628 : Datum
4629 234 : xpath_exists(PG_FUNCTION_ARGS)
4630 : {
4631 : #ifdef USE_LIBXML
4632 234 : text *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4633 234 : xmltype *data = PG_GETARG_XML_P(1);
4634 234 : ArrayType *namespaces = PG_GETARG_ARRAYTYPE_P(2);
4635 : int res_nitems;
4636 :
4637 234 : xpath_internal(xpath_expr_text, data, namespaces,
4638 : &res_nitems, NULL);
4639 :
4640 234 : PG_RETURN_BOOL(res_nitems > 0);
4641 : #else
4642 : NO_XML_SUPPORT();
4643 : return 0;
4644 : #endif
4645 : }
4646 :
4647 : /*
4648 : * Functions for checking well-formed-ness
4649 : */
4650 :
4651 : #ifdef USE_LIBXML
4652 : static bool
4653 114 : wellformed_xml(text *data, XmlOptionType xmloption_arg)
4654 : {
4655 : xmlDocPtr doc;
4656 114 : ErrorSaveContext escontext = {T_ErrorSaveContext};
4657 :
4658 : /*
4659 : * We'll report "true" if no soft error is reported by xml_parse().
4660 : */
4661 114 : doc = xml_parse(data, xmloption_arg, true,
4662 : GetDatabaseEncoding(), NULL, NULL, (Node *) &escontext);
4663 114 : if (doc)
4664 60 : xmlFreeDoc(doc);
4665 :
4666 114 : return !escontext.error_occurred;
4667 : }
4668 : #endif
4669 :
4670 : Datum
4671 90 : xml_is_well_formed(PG_FUNCTION_ARGS)
4672 : {
4673 : #ifdef USE_LIBXML
4674 90 : text *data = PG_GETARG_TEXT_PP(0);
4675 :
4676 90 : PG_RETURN_BOOL(wellformed_xml(data, xmloption));
4677 : #else
4678 : NO_XML_SUPPORT();
4679 : return 0;
4680 : #endif /* not USE_LIBXML */
4681 : }
4682 :
4683 : Datum
4684 12 : xml_is_well_formed_document(PG_FUNCTION_ARGS)
4685 : {
4686 : #ifdef USE_LIBXML
4687 12 : text *data = PG_GETARG_TEXT_PP(0);
4688 :
4689 12 : PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_DOCUMENT));
4690 : #else
4691 : NO_XML_SUPPORT();
4692 : return 0;
4693 : #endif /* not USE_LIBXML */
4694 : }
4695 :
4696 : Datum
4697 12 : xml_is_well_formed_content(PG_FUNCTION_ARGS)
4698 : {
4699 : #ifdef USE_LIBXML
4700 12 : text *data = PG_GETARG_TEXT_PP(0);
4701 :
4702 12 : PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_CONTENT));
4703 : #else
4704 : NO_XML_SUPPORT();
4705 : return 0;
4706 : #endif /* not USE_LIBXML */
4707 : }
4708 :
4709 : /*
4710 : * support functions for XMLTABLE
4711 : *
4712 : */
4713 : #ifdef USE_LIBXML
4714 :
4715 : /*
4716 : * Returns private data from executor state. Ensure validity by check with
4717 : * MAGIC number.
4718 : */
4719 : static inline XmlTableBuilderData *
4720 160270 : GetXmlTableBuilderPrivateData(TableFuncScanState *state, const char *fname)
4721 : {
4722 : XmlTableBuilderData *result;
4723 :
4724 160270 : if (!IsA(state, TableFuncScanState))
4725 0 : elog(ERROR, "%s called with invalid TableFuncScanState", fname);
4726 160270 : result = (XmlTableBuilderData *) state->opaque;
4727 160270 : if (result->magic != XMLTABLE_CONTEXT_MAGIC)
4728 0 : elog(ERROR, "%s called with invalid TableFuncScanState", fname);
4729 :
4730 160270 : return result;
4731 : }
4732 : #endif
4733 :
4734 : /*
4735 : * XmlTableInitOpaque
4736 : * Fill in TableFuncScanState->opaque for XmlTable processor; initialize
4737 : * the XML parser.
4738 : *
4739 : * Note: Because we call pg_xml_init() here and pg_xml_done() in
4740 : * XmlTableDestroyOpaque, it is critical for robustness that no other
4741 : * executor nodes run until this node is processed to completion. Caller
4742 : * must execute this to completion (probably filling a tuplestore to exhaust
4743 : * this node in a single pass) instead of using row-per-call mode.
4744 : */
4745 : static void
4746 264 : XmlTableInitOpaque(TableFuncScanState *state, int natts)
4747 : {
4748 : #ifdef USE_LIBXML
4749 264 : volatile xmlParserCtxtPtr ctxt = NULL;
4750 : XmlTableBuilderData *xtCxt;
4751 : PgXmlErrorContext *xmlerrcxt;
4752 :
4753 264 : xtCxt = palloc0(sizeof(XmlTableBuilderData));
4754 264 : xtCxt->magic = XMLTABLE_CONTEXT_MAGIC;
4755 264 : xtCxt->natts = natts;
4756 264 : xtCxt->xpathscomp = palloc0(sizeof(xmlXPathCompExprPtr) * natts);
4757 :
4758 264 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
4759 :
4760 264 : PG_TRY();
4761 : {
4762 264 : xmlInitParser();
4763 :
4764 264 : ctxt = xmlNewParserCtxt();
4765 264 : if (ctxt == NULL || xmlerrcxt->err_occurred)
4766 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4767 : "could not allocate parser context");
4768 : }
4769 0 : PG_CATCH();
4770 : {
4771 0 : if (ctxt != NULL)
4772 0 : xmlFreeParserCtxt(ctxt);
4773 :
4774 0 : pg_xml_done(xmlerrcxt, true);
4775 :
4776 0 : PG_RE_THROW();
4777 : }
4778 264 : PG_END_TRY();
4779 :
4780 264 : xtCxt->xmlerrcxt = xmlerrcxt;
4781 264 : xtCxt->ctxt = ctxt;
4782 :
4783 264 : state->opaque = xtCxt;
4784 : #else
4785 : NO_XML_SUPPORT();
4786 : #endif /* not USE_LIBXML */
4787 264 : }
4788 :
4789 : /*
4790 : * XmlTableSetDocument
4791 : * Install the input document
4792 : */
4793 : static void
4794 264 : XmlTableSetDocument(TableFuncScanState *state, Datum value)
4795 : {
4796 : #ifdef USE_LIBXML
4797 : XmlTableBuilderData *xtCxt;
4798 264 : xmltype *xmlval = DatumGetXmlP(value);
4799 : char *str;
4800 : xmlChar *xstr;
4801 : int length;
4802 264 : volatile xmlDocPtr doc = NULL;
4803 264 : volatile xmlXPathContextPtr xpathcxt = NULL;
4804 :
4805 264 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetDocument");
4806 :
4807 : /*
4808 : * Use out function for casting to string (remove encoding property). See
4809 : * comment in xml_out.
4810 : */
4811 264 : str = xml_out_internal(xmlval, 0);
4812 :
4813 264 : length = strlen(str);
4814 264 : xstr = pg_xmlCharStrndup(str, length);
4815 :
4816 264 : PG_TRY();
4817 : {
4818 264 : doc = xmlCtxtReadMemory(xtCxt->ctxt, (char *) xstr, length, NULL, NULL, 0);
4819 264 : if (doc == NULL || xtCxt->xmlerrcxt->err_occurred)
4820 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
4821 : "could not parse XML document");
4822 264 : xpathcxt = xmlXPathNewContext(doc);
4823 264 : if (xpathcxt == NULL || xtCxt->xmlerrcxt->err_occurred)
4824 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4825 : "could not allocate XPath context");
4826 264 : xpathcxt->node = (xmlNodePtr) doc;
4827 : }
4828 0 : PG_CATCH();
4829 : {
4830 0 : if (xpathcxt != NULL)
4831 0 : xmlXPathFreeContext(xpathcxt);
4832 0 : if (doc != NULL)
4833 0 : xmlFreeDoc(doc);
4834 :
4835 0 : PG_RE_THROW();
4836 : }
4837 264 : PG_END_TRY();
4838 :
4839 264 : xtCxt->doc = doc;
4840 264 : xtCxt->xpathcxt = xpathcxt;
4841 : #else
4842 : NO_XML_SUPPORT();
4843 : #endif /* not USE_LIBXML */
4844 264 : }
4845 :
4846 : /*
4847 : * XmlTableSetNamespace
4848 : * Add a namespace declaration
4849 : */
4850 : static void
4851 18 : XmlTableSetNamespace(TableFuncScanState *state, const char *name, const char *uri)
4852 : {
4853 : #ifdef USE_LIBXML
4854 : XmlTableBuilderData *xtCxt;
4855 :
4856 18 : if (name == NULL)
4857 6 : ereport(ERROR,
4858 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4859 : errmsg("DEFAULT namespace is not supported")));
4860 12 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetNamespace");
4861 :
4862 12 : if (xmlXPathRegisterNs(xtCxt->xpathcxt,
4863 12 : pg_xmlCharStrndup(name, strlen(name)),
4864 12 : pg_xmlCharStrndup(uri, strlen(uri))))
4865 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4866 : "could not set XML namespace");
4867 : #else
4868 : NO_XML_SUPPORT();
4869 : #endif /* not USE_LIBXML */
4870 12 : }
4871 :
4872 : /*
4873 : * XmlTableSetRowFilter
4874 : * Install the row-filter Xpath expression.
4875 : */
4876 : static void
4877 258 : XmlTableSetRowFilter(TableFuncScanState *state, const char *path)
4878 : {
4879 : #ifdef USE_LIBXML
4880 : XmlTableBuilderData *xtCxt;
4881 : xmlChar *xstr;
4882 :
4883 258 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetRowFilter");
4884 :
4885 258 : if (*path == '\0')
4886 0 : ereport(ERROR,
4887 : (errcode(ERRCODE_INVALID_ARGUMENT_FOR_XQUERY),
4888 : errmsg("row path filter must not be empty string")));
4889 :
4890 258 : xstr = pg_xmlCharStrndup(path, strlen(path));
4891 :
4892 : /* We require XmlTableSetDocument to have been done already */
4893 : Assert(xtCxt->xpathcxt != NULL);
4894 :
4895 258 : xtCxt->xpathcomp = xmlXPathCtxtCompile(xtCxt->xpathcxt, xstr);
4896 258 : if (xtCxt->xpathcomp == NULL || xtCxt->xmlerrcxt->err_occurred)
4897 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4898 : "invalid XPath expression");
4899 : #else
4900 : NO_XML_SUPPORT();
4901 : #endif /* not USE_LIBXML */
4902 258 : }
4903 :
4904 : /*
4905 : * XmlTableSetColumnFilter
4906 : * Install the column-filter Xpath expression, for the given column.
4907 : */
4908 : static void
4909 774 : XmlTableSetColumnFilter(TableFuncScanState *state, const char *path, int colnum)
4910 : {
4911 : #ifdef USE_LIBXML
4912 : XmlTableBuilderData *xtCxt;
4913 : xmlChar *xstr;
4914 :
4915 : Assert(PointerIsValid(path));
4916 :
4917 774 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetColumnFilter");
4918 :
4919 774 : if (*path == '\0')
4920 0 : ereport(ERROR,
4921 : (errcode(ERRCODE_INVALID_ARGUMENT_FOR_XQUERY),
4922 : errmsg("column path filter must not be empty string")));
4923 :
4924 774 : xstr = pg_xmlCharStrndup(path, strlen(path));
4925 :
4926 : /* We require XmlTableSetDocument to have been done already */
4927 : Assert(xtCxt->xpathcxt != NULL);
4928 :
4929 774 : xtCxt->xpathscomp[colnum] = xmlXPathCtxtCompile(xtCxt->xpathcxt, xstr);
4930 774 : if (xtCxt->xpathscomp[colnum] == NULL || xtCxt->xmlerrcxt->err_occurred)
4931 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4932 : "invalid XPath expression");
4933 : #else
4934 : NO_XML_SUPPORT();
4935 : #endif /* not USE_LIBXML */
4936 774 : }
4937 :
4938 : /*
4939 : * XmlTableFetchRow
4940 : * Prepare the next "current" tuple for upcoming GetValue calls.
4941 : * Returns false if the row-filter expression returned no more rows.
4942 : */
4943 : static bool
4944 23032 : XmlTableFetchRow(TableFuncScanState *state)
4945 : {
4946 : #ifdef USE_LIBXML
4947 : XmlTableBuilderData *xtCxt;
4948 :
4949 23032 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableFetchRow");
4950 :
4951 : /* Propagate our own error context to libxml2 */
4952 23032 : xmlSetStructuredErrorFunc(xtCxt->xmlerrcxt, xml_errorHandler);
4953 :
4954 23032 : if (xtCxt->xpathobj == NULL)
4955 : {
4956 258 : xtCxt->xpathobj = xmlXPathCompiledEval(xtCxt->xpathcomp, xtCxt->xpathcxt);
4957 258 : if (xtCxt->xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred)
4958 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4959 : "could not create XPath object");
4960 :
4961 258 : xtCxt->row_count = 0;
4962 : }
4963 :
4964 23032 : if (xtCxt->xpathobj->type == XPATH_NODESET)
4965 : {
4966 23032 : if (xtCxt->xpathobj->nodesetval != NULL)
4967 : {
4968 23032 : if (xtCxt->row_count++ < xtCxt->xpathobj->nodesetval->nodeNr)
4969 22786 : return true;
4970 : }
4971 : }
4972 :
4973 246 : return false;
4974 : #else
4975 : NO_XML_SUPPORT();
4976 : return false;
4977 : #endif /* not USE_LIBXML */
4978 : }
4979 :
4980 : /*
4981 : * XmlTableGetValue
4982 : * Return the value for column number 'colnum' for the current row. If
4983 : * column -1 is requested, return representation of the whole row.
4984 : *
4985 : * This leaks memory, so be sure to reset often the context in which it's
4986 : * called.
4987 : */
4988 : static Datum
4989 135666 : XmlTableGetValue(TableFuncScanState *state, int colnum,
4990 : Oid typid, int32 typmod, bool *isnull)
4991 : {
4992 : #ifdef USE_LIBXML
4993 135666 : Datum result = (Datum) 0;
4994 : XmlTableBuilderData *xtCxt;
4995 135666 : volatile xmlXPathObjectPtr xpathobj = NULL;
4996 :
4997 135666 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableGetValue");
4998 :
4999 : Assert(xtCxt->xpathobj &&
5000 : xtCxt->xpathobj->type == XPATH_NODESET &&
5001 : xtCxt->xpathobj->nodesetval != NULL);
5002 :
5003 : /* Propagate our own error context to libxml2 */
5004 135666 : xmlSetStructuredErrorFunc(xtCxt->xmlerrcxt, xml_errorHandler);
5005 :
5006 135666 : *isnull = false;
5007 :
5008 : Assert(xtCxt->xpathscomp[colnum] != NULL);
5009 :
5010 135666 : PG_TRY();
5011 : {
5012 : xmlNodePtr cur;
5013 135666 : char *cstr = NULL;
5014 :
5015 : /* Set current node as entry point for XPath evaluation */
5016 135666 : cur = xtCxt->xpathobj->nodesetval->nodeTab[xtCxt->row_count - 1];
5017 135666 : xtCxt->xpathcxt->node = cur;
5018 :
5019 : /* Evaluate column path */
5020 135666 : xpathobj = xmlXPathCompiledEval(xtCxt->xpathscomp[colnum], xtCxt->xpathcxt);
5021 135666 : if (xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred)
5022 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
5023 : "could not create XPath object");
5024 :
5025 : /*
5026 : * There are four possible cases, depending on the number of nodes
5027 : * returned by the XPath expression and the type of the target column:
5028 : * a) XPath returns no nodes. b) The target type is XML (return all
5029 : * as XML). For non-XML return types: c) One node (return content).
5030 : * d) Multiple nodes (error).
5031 : */
5032 135666 : if (xpathobj->type == XPATH_NODESET)
5033 : {
5034 135636 : int count = 0;
5035 :
5036 135636 : if (xpathobj->nodesetval != NULL)
5037 135426 : count = xpathobj->nodesetval->nodeNr;
5038 :
5039 135636 : if (xpathobj->nodesetval == NULL || count == 0)
5040 : {
5041 22744 : *isnull = true;
5042 : }
5043 : else
5044 : {
5045 112892 : if (typid == XMLOID)
5046 : {
5047 : text *textstr;
5048 : StringInfoData str;
5049 :
5050 : /* Concatenate serialized values */
5051 72 : initStringInfo(&str);
5052 174 : for (int i = 0; i < count; i++)
5053 : {
5054 : textstr =
5055 102 : xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
5056 : xtCxt->xmlerrcxt);
5057 :
5058 102 : appendStringInfoText(&str, textstr);
5059 : }
5060 72 : cstr = str.data;
5061 : }
5062 : else
5063 : {
5064 : xmlChar *str;
5065 :
5066 112820 : if (count > 1)
5067 6 : ereport(ERROR,
5068 : (errcode(ERRCODE_CARDINALITY_VIOLATION),
5069 : errmsg("more than one value returned by column XPath expression")));
5070 :
5071 112814 : str = xmlXPathCastNodeSetToString(xpathobj->nodesetval);
5072 112814 : cstr = str ? xml_pstrdup_and_free(str) : "";
5073 : }
5074 : }
5075 : }
5076 30 : else if (xpathobj->type == XPATH_STRING)
5077 : {
5078 : /* Content should be escaped when target will be XML */
5079 18 : if (typid == XMLOID)
5080 6 : cstr = escape_xml((char *) xpathobj->stringval);
5081 : else
5082 12 : cstr = (char *) xpathobj->stringval;
5083 : }
5084 12 : else if (xpathobj->type == XPATH_BOOLEAN)
5085 : {
5086 : char typcategory;
5087 : bool typispreferred;
5088 : xmlChar *str;
5089 :
5090 : /* Allow implicit casting from boolean to numbers */
5091 6 : get_type_category_preferred(typid, &typcategory, &typispreferred);
5092 :
5093 6 : if (typcategory != TYPCATEGORY_NUMERIC)
5094 6 : str = xmlXPathCastBooleanToString(xpathobj->boolval);
5095 : else
5096 0 : str = xmlXPathCastNumberToString(xmlXPathCastBooleanToNumber(xpathobj->boolval));
5097 :
5098 6 : cstr = xml_pstrdup_and_free(str);
5099 : }
5100 6 : else if (xpathobj->type == XPATH_NUMBER)
5101 : {
5102 : xmlChar *str;
5103 :
5104 6 : str = xmlXPathCastNumberToString(xpathobj->floatval);
5105 6 : cstr = xml_pstrdup_and_free(str);
5106 : }
5107 : else
5108 0 : elog(ERROR, "unexpected XPath object type %u", xpathobj->type);
5109 :
5110 : /*
5111 : * By here, either cstr contains the result value, or the isnull flag
5112 : * has been set.
5113 : */
5114 : Assert(cstr || *isnull);
5115 :
5116 135660 : if (!*isnull)
5117 112916 : result = InputFunctionCall(&state->in_functions[colnum],
5118 : cstr,
5119 112916 : state->typioparams[colnum],
5120 : typmod);
5121 : }
5122 6 : PG_FINALLY();
5123 : {
5124 135666 : if (xpathobj != NULL)
5125 135666 : xmlXPathFreeObject(xpathobj);
5126 : }
5127 135666 : PG_END_TRY();
5128 :
5129 135660 : return result;
5130 : #else
5131 : NO_XML_SUPPORT();
5132 : return 0;
5133 : #endif /* not USE_LIBXML */
5134 : }
5135 :
5136 : /*
5137 : * XmlTableDestroyOpaque
5138 : * Release all libxml2 resources
5139 : */
5140 : static void
5141 264 : XmlTableDestroyOpaque(TableFuncScanState *state)
5142 : {
5143 : #ifdef USE_LIBXML
5144 : XmlTableBuilderData *xtCxt;
5145 :
5146 264 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableDestroyOpaque");
5147 :
5148 : /* Propagate our own error context to libxml2 */
5149 264 : xmlSetStructuredErrorFunc(xtCxt->xmlerrcxt, xml_errorHandler);
5150 :
5151 264 : if (xtCxt->xpathscomp != NULL)
5152 : {
5153 : int i;
5154 :
5155 1116 : for (i = 0; i < xtCxt->natts; i++)
5156 852 : if (xtCxt->xpathscomp[i] != NULL)
5157 774 : xmlXPathFreeCompExpr(xtCxt->xpathscomp[i]);
5158 : }
5159 :
5160 264 : if (xtCxt->xpathobj != NULL)
5161 258 : xmlXPathFreeObject(xtCxt->xpathobj);
5162 264 : if (xtCxt->xpathcomp != NULL)
5163 258 : xmlXPathFreeCompExpr(xtCxt->xpathcomp);
5164 264 : if (xtCxt->xpathcxt != NULL)
5165 264 : xmlXPathFreeContext(xtCxt->xpathcxt);
5166 264 : if (xtCxt->doc != NULL)
5167 264 : xmlFreeDoc(xtCxt->doc);
5168 264 : if (xtCxt->ctxt != NULL)
5169 264 : xmlFreeParserCtxt(xtCxt->ctxt);
5170 :
5171 264 : pg_xml_done(xtCxt->xmlerrcxt, true);
5172 :
5173 : /* not valid anymore */
5174 264 : xtCxt->magic = 0;
5175 264 : state->opaque = NULL;
5176 :
5177 : #else
5178 : NO_XML_SUPPORT();
5179 : #endif /* not USE_LIBXML */
5180 264 : }
|