Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * xml.c
4 : * XML data type support.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * src/backend/utils/adt/xml.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : /*
16 : * Generally, XML type support is only available when libxml use was
17 : * configured during the build. But even if that is not done, the
18 : * type and all the functions are available, but most of them will
19 : * fail. For one thing, this avoids having to manage variant catalog
20 : * installations. But it also has nice effects such as that you can
21 : * dump a database containing XML type data even if the server is not
22 : * linked with libxml. Thus, make sure xml_out() works even if nothing
23 : * else does.
24 : */
25 :
26 : /*
27 : * Notes on memory management:
28 : *
29 : * Sometimes libxml allocates global structures in the hope that it can reuse
30 : * them later on. This makes it impractical to change the xmlMemSetup
31 : * functions on-the-fly; that is likely to lead to trying to pfree() chunks
32 : * allocated with malloc() or vice versa. Since libxml might be used by
33 : * loadable modules, eg libperl, our only safe choices are to change the
34 : * functions at postmaster/backend launch or not at all. Since we'd rather
35 : * not activate libxml in sessions that might never use it, the latter choice
36 : * is the preferred one. However, for debugging purposes it can be awfully
37 : * handy to constrain libxml's allocations to be done in a specific palloc
38 : * context, where they're easy to track. Therefore there is code here that
39 : * can be enabled in debug builds to redirect libxml's allocations into a
40 : * special context LibxmlContext. It's not recommended to turn this on in
41 : * a production build because of the possibility of bad interactions with
42 : * external modules.
43 : */
44 : /* #define USE_LIBXMLCONTEXT */
45 :
46 : #include "postgres.h"
47 :
48 : #ifdef USE_LIBXML
49 : #include <libxml/chvalid.h>
50 : #include <libxml/parser.h>
51 : #include <libxml/parserInternals.h>
52 : #include <libxml/tree.h>
53 : #include <libxml/uri.h>
54 : #include <libxml/xmlerror.h>
55 : #include <libxml/xmlsave.h>
56 : #include <libxml/xmlversion.h>
57 : #include <libxml/xmlwriter.h>
58 : #include <libxml/xpath.h>
59 : #include <libxml/xpathInternals.h>
60 :
61 : /*
62 : * We used to check for xmlStructuredErrorContext via a configure test; but
63 : * that doesn't work on Windows, so instead use this grottier method of
64 : * testing the library version number.
65 : */
66 : #if LIBXML_VERSION >= 20704
67 : #define HAVE_XMLSTRUCTUREDERRORCONTEXT 1
68 : #endif
69 : #endif /* USE_LIBXML */
70 :
71 : #include "access/htup_details.h"
72 : #include "access/table.h"
73 : #include "catalog/namespace.h"
74 : #include "catalog/pg_class.h"
75 : #include "catalog/pg_type.h"
76 : #include "commands/dbcommands.h"
77 : #include "executor/spi.h"
78 : #include "executor/tablefunc.h"
79 : #include "fmgr.h"
80 : #include "lib/stringinfo.h"
81 : #include "libpq/pqformat.h"
82 : #include "mb/pg_wchar.h"
83 : #include "miscadmin.h"
84 : #include "nodes/execnodes.h"
85 : #include "nodes/miscnodes.h"
86 : #include "nodes/nodeFuncs.h"
87 : #include "utils/array.h"
88 : #include "utils/builtins.h"
89 : #include "utils/date.h"
90 : #include "utils/datetime.h"
91 : #include "utils/lsyscache.h"
92 : #include "utils/memutils.h"
93 : #include "utils/rel.h"
94 : #include "utils/syscache.h"
95 : #include "utils/xml.h"
96 :
97 :
98 : /* GUC variables */
99 : int xmlbinary = XMLBINARY_BASE64;
100 : int xmloption = XMLOPTION_CONTENT;
101 :
102 : #ifdef USE_LIBXML
103 :
104 : /* random number to identify PgXmlErrorContext */
105 : #define ERRCXT_MAGIC 68275028
106 :
107 : struct PgXmlErrorContext
108 : {
109 : int magic;
110 : /* strictness argument passed to pg_xml_init */
111 : PgXmlStrictness strictness;
112 : /* current error status and accumulated message, if any */
113 : bool err_occurred;
114 : StringInfoData err_buf;
115 : /* previous libxml error handling state (saved by pg_xml_init) */
116 : xmlStructuredErrorFunc saved_errfunc;
117 : void *saved_errcxt;
118 : /* previous libxml entity handler (saved by pg_xml_init) */
119 : xmlExternalEntityLoader saved_entityfunc;
120 : };
121 :
122 : static xmlParserInputPtr xmlPgEntityLoader(const char *URL, const char *ID,
123 : xmlParserCtxtPtr ctxt);
124 : static void xml_errsave(Node *escontext, PgXmlErrorContext *errcxt,
125 : int sqlcode, const char *msg);
126 : static void xml_errorHandler(void *data, xmlErrorPtr error);
127 : static int errdetail_for_xml_code(int code);
128 : static void chopStringInfoNewlines(StringInfo str);
129 : static void appendStringInfoLineSeparator(StringInfo str);
130 :
131 : #ifdef USE_LIBXMLCONTEXT
132 :
133 : static MemoryContext LibxmlContext = NULL;
134 :
135 : static void xml_memory_init(void);
136 : static void *xml_palloc(size_t size);
137 : static void *xml_repalloc(void *ptr, size_t size);
138 : static void xml_pfree(void *ptr);
139 : static char *xml_pstrdup(const char *string);
140 : #endif /* USE_LIBXMLCONTEXT */
141 :
142 : static xmlChar *xml_text2xmlChar(text *in);
143 : static int parse_xml_decl(const xmlChar *str, size_t *lenp,
144 : xmlChar **version, xmlChar **encoding, int *standalone);
145 : static bool print_xml_decl(StringInfo buf, const xmlChar *version,
146 : pg_enc encoding, int standalone);
147 : static bool xml_doctype_in_content(const xmlChar *str);
148 : static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
149 : bool preserve_whitespace, int encoding,
150 : XmlOptionType *parsed_xmloptiontype,
151 : xmlNodePtr *parsed_nodes,
152 : Node *escontext);
153 : static text *xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt);
154 : static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
155 : ArrayBuildState *astate,
156 : PgXmlErrorContext *xmlerrcxt);
157 : static xmlChar *pg_xmlCharStrndup(const char *str, size_t len);
158 : #endif /* USE_LIBXML */
159 :
160 : static void xmldata_root_element_start(StringInfo result, const char *eltname,
161 : const char *xmlschema, const char *targetns,
162 : bool top_level);
163 : static void xmldata_root_element_end(StringInfo result, const char *eltname);
164 : static StringInfo query_to_xml_internal(const char *query, char *tablename,
165 : const char *xmlschema, bool nulls, bool tableforest,
166 : const char *targetns, bool top_level);
167 : static const char *map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid,
168 : bool nulls, bool tableforest, const char *targetns);
169 : static const char *map_sql_schema_to_xmlschema_types(Oid nspid,
170 : List *relid_list, bool nulls,
171 : bool tableforest, const char *targetns);
172 : static const char *map_sql_catalog_to_xmlschema_types(List *nspid_list,
173 : bool nulls, bool tableforest,
174 : const char *targetns);
175 : static const char *map_sql_type_to_xml_name(Oid typeoid, int typmod);
176 : static const char *map_sql_typecoll_to_xmlschema_types(List *tupdesc_list);
177 : static const char *map_sql_type_to_xmlschema_type(Oid typeoid, int typmod);
178 : static void SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result,
179 : char *tablename, bool nulls, bool tableforest,
180 : const char *targetns, bool top_level);
181 :
182 : /* XMLTABLE support */
183 : #ifdef USE_LIBXML
184 : /* random number to identify XmlTableContext */
185 : #define XMLTABLE_CONTEXT_MAGIC 46922182
186 : typedef struct XmlTableBuilderData
187 : {
188 : int magic;
189 : int natts;
190 : long int row_count;
191 : PgXmlErrorContext *xmlerrcxt;
192 : xmlParserCtxtPtr ctxt;
193 : xmlDocPtr doc;
194 : xmlXPathContextPtr xpathcxt;
195 : xmlXPathCompExprPtr xpathcomp;
196 : xmlXPathObjectPtr xpathobj;
197 : xmlXPathCompExprPtr *xpathscomp;
198 : } XmlTableBuilderData;
199 : #endif
200 :
201 : static void XmlTableInitOpaque(struct TableFuncScanState *state, int natts);
202 : static void XmlTableSetDocument(struct TableFuncScanState *state, Datum value);
203 : static void XmlTableSetNamespace(struct TableFuncScanState *state, const char *name,
204 : const char *uri);
205 : static void XmlTableSetRowFilter(struct TableFuncScanState *state, const char *path);
206 : static void XmlTableSetColumnFilter(struct TableFuncScanState *state,
207 : const char *path, int colnum);
208 : static bool XmlTableFetchRow(struct TableFuncScanState *state);
209 : static Datum XmlTableGetValue(struct TableFuncScanState *state, int colnum,
210 : Oid typid, int32 typmod, bool *isnull);
211 : static void XmlTableDestroyOpaque(struct TableFuncScanState *state);
212 :
213 : const TableFuncRoutine XmlTableRoutine =
214 : {
215 : XmlTableInitOpaque,
216 : XmlTableSetDocument,
217 : XmlTableSetNamespace,
218 : XmlTableSetRowFilter,
219 : XmlTableSetColumnFilter,
220 : XmlTableFetchRow,
221 : XmlTableGetValue,
222 : XmlTableDestroyOpaque
223 : };
224 :
225 : #define NO_XML_SUPPORT() \
226 : ereport(ERROR, \
227 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \
228 : errmsg("unsupported XML feature"), \
229 : errdetail("This functionality requires the server to be built with libxml support.")))
230 :
231 :
232 : /* from SQL/XML:2008 section 4.9 */
233 : #define NAMESPACE_XSD "http://www.w3.org/2001/XMLSchema"
234 : #define NAMESPACE_XSI "http://www.w3.org/2001/XMLSchema-instance"
235 : #define NAMESPACE_SQLXML "http://standards.iso.org/iso/9075/2003/sqlxml"
236 :
237 :
238 : #ifdef USE_LIBXML
239 :
240 : static int
5079 tgl 241 UIC 0 : xmlChar_to_encoding(const xmlChar *encoding_name)
242 : {
243 0 : int encoding = pg_char_to_encoding((const char *) encoding_name);
244 :
5657 245 0 : if (encoding < 0)
246 0 : ereport(ERROR,
5657 tgl 247 EUB : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
248 : errmsg("invalid encoding name \"%s\"",
5079 249 : (const char *) encoding_name)));
5657 tgl 250 UIC 0 : return encoding;
5657 tgl 251 EUB : }
252 : #endif
253 :
254 :
255 : /*
5493 256 : * xml_in uses a plain C string to VARDATA conversion, so for the time being
257 : * we use the conversion function for the text datatype.
258 : *
259 : * This is only acceptable so long as xmltype and text use the same
260 : * representation.
261 : */
262 : Datum
5953 peter_e 263 GIC 411 : xml_in(PG_FUNCTION_ARGS)
264 : {
265 : #ifdef USE_LIBXML
5624 bruce 266 411 : char *s = PG_GETARG_CSTRING(0);
267 : xmltype *vardata;
268 : xmlDocPtr doc;
5953 peter_e 269 ECB :
270 : /* Build the result object. */
5493 tgl 271 GIC 411 : vardata = (xmltype *) cstring_to_text(s);
272 :
5953 peter_e 273 ECB : /*
274 : * Parse the data to check if it is well-formed XML data.
275 : *
276 : * Note: we don't need to worry about whether a soft error is detected.
277 : */
114 tgl 278 GNC 411 : doc = xml_parse(vardata, xmloption, true, GetDatabaseEncoding(),
25 279 411 : NULL, NULL, fcinfo->context);
114 280 387 : if (doc != NULL)
281 381 : xmlFreeDoc(doc);
282 :
5953 peter_e 283 GIC 387 : PG_RETURN_XML_P(vardata);
284 : #else
285 : NO_XML_SUPPORT();
286 : return 0;
287 : #endif
5953 peter_e 288 ECB : }
289 :
290 :
5925 291 : #define PG_XML_DEFAULT_VERSION "1.0"
292 :
293 :
294 : /*
295 : * xml_out_internal uses a plain VARDATA to C string conversion, so for the
296 : * time being we use the conversion function for the text datatype.
297 : *
298 : * This is only acceptable so long as xmltype and text use the same
299 : * representation.
300 : */
301 : static char *
5624 bruce 302 GIC 11386 : xml_out_internal(xmltype *x, pg_enc target_encoding)
303 : {
5318 tgl 304 11386 : char *str = text_to_cstring((text *) x);
305 :
306 : #ifdef USE_LIBXML
307 11386 : size_t len = strlen(str);
308 : xmlChar *version;
309 : int standalone;
310 : int res_code;
311 :
5633 tgl 312 CBC 11386 : if ((res_code = parse_xml_decl((xmlChar *) str,
313 : &len, &version, NULL, &standalone)) == 0)
5925 peter_e 314 ECB : {
315 : StringInfoData buf;
316 :
5925 peter_e 317 CBC 11386 : initStringInfo(&buf);
318 :
5918 peter_e 319 GIC 11386 : if (!print_xml_decl(&buf, version, target_encoding, standalone))
320 : {
321 : /*
5624 bruce 322 ECB : * If we are not going to produce an XML declaration, eat a single
323 : * newline in the original string to prevent empty first lines in
324 : * the output.
325 : */
5925 peter_e 326 GIC 11362 : if (*(str + len) == '\n')
5925 peter_e 327 CBC 5 : len += 1;
328 : }
329 11386 : appendStringInfoString(&buf, str + len);
330 :
5318 tgl 331 GIC 11386 : pfree(str);
332 :
5925 peter_e 333 11386 : return buf.data;
334 : }
335 :
114 tgl 336 UNC 0 : ereport(WARNING,
337 : errcode(ERRCODE_INTERNAL_ERROR),
338 : errmsg_internal("could not parse XML declaration in stored value"),
339 : errdetail_for_xml_code(res_code));
5925 peter_e 340 ECB : #endif
5925 peter_e 341 UIC 0 : return str;
5925 peter_e 342 ECB : }
343 :
344 :
345 : Datum
5953 peter_e 346 GIC 11254 : xml_out(PG_FUNCTION_ARGS)
5953 peter_e 347 EUB : {
5624 bruce 348 GIC 11254 : xmltype *x = PG_GETARG_XML_P(0);
349 :
350 : /*
351 : * xml_out removes the encoding property in all cases. This is because we
5624 bruce 352 EUB : * cannot control from here whether the datum will be converted to a
353 : * different client encoding, so we'd do more harm than good by including
354 : * it.
355 : */
5925 peter_e 356 GIC 11254 : PG_RETURN_CSTRING(xml_out_internal(x, 0));
5953 peter_e 357 ECB : }
358 :
359 :
360 : Datum
5946 peter_e 361 UIC 0 : xml_recv(PG_FUNCTION_ARGS)
362 : {
363 : #ifdef USE_LIBXML
364 0 : StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
365 : xmltype *result;
366 : char *str;
5925 peter_e 367 ECB : char *newstr;
368 : int nbytes;
369 : xmlDocPtr doc;
4965 heikki.linnakangas 370 UIC 0 : xmlChar *encodingStr = NULL;
371 : int encoding;
5946 peter_e 372 EUB :
373 : /*
374 : * Read the data in raw format. We don't know yet what the encoding is, as
5624 bruce 375 : * that information is embedded in the xml declaration; so we have to
376 : * parse that before converting to server encoding.
377 : */
5677 tgl 378 UIC 0 : nbytes = buf->len - buf->cursor;
379 0 : str = (char *) pq_getmsgbytes(buf, nbytes);
380 :
5677 tgl 381 EUB : /*
382 : * We need a null-terminated string to pass to parse_xml_decl(). Rather
383 : * than make a separate copy, make the temporary result one byte bigger
384 : * than it needs to be.
385 : */
5677 tgl 386 UIC 0 : result = palloc(nbytes + 1 + VARHDRSZ);
5885 387 0 : SET_VARSIZE(result, nbytes + VARHDRSZ);
5946 peter_e 388 0 : memcpy(VARDATA(result), str, nbytes);
5677 tgl 389 UBC 0 : str = VARDATA(result);
390 0 : str[nbytes] = '\0';
391 :
4058 peter_e 392 UIC 0 : parse_xml_decl((const xmlChar *) str, NULL, NULL, &encodingStr, NULL);
393 :
394 : /*
395 : * If encoding wasn't explicitly specified in the XML header, treat it as
396 : * UTF-8, as that's the default in XML. This is different from xml_in(),
4965 heikki.linnakangas 397 EUB : * where the input has to go through the normal client to server encoding
398 : * conversion.
399 : */
4965 heikki.linnakangas 400 UBC 0 : encoding = encodingStr ? xmlChar_to_encoding(encodingStr) : PG_UTF8;
5946 peter_e 401 EUB :
402 : /*
5624 bruce 403 : * Parse the data to check if it is well-formed XML data. Assume that
404 : * xml_parse will throw ERROR if not.
405 : */
25 tgl 406 UNC 0 : doc = xml_parse(result, xmloption, true, encoding, NULL, NULL, NULL);
5937 peter_e 407 UIC 0 : xmlFreeDoc(doc);
408 :
409 : /* Now that we know what we're dealing with, convert to server encoding */
3332 tgl 410 0 : newstr = pg_any_to_server(str, nbytes, encoding);
5925 peter_e 411 EUB :
5925 peter_e 412 UIC 0 : if (newstr != str)
413 : {
5677 tgl 414 0 : pfree(result);
5453 415 0 : result = (xmltype *) cstring_to_text(newstr);
5677 416 0 : pfree(newstr);
5925 peter_e 417 EUB : }
418 :
5946 peter_e 419 UIC 0 : PG_RETURN_XML_P(result);
420 : #else
5946 peter_e 421 EUB : NO_XML_SUPPORT();
422 : return 0;
423 : #endif
424 : }
425 :
426 :
427 : Datum
5946 peter_e 428 UIC 0 : xml_send(PG_FUNCTION_ARGS)
429 : {
5624 bruce 430 UBC 0 : xmltype *x = PG_GETARG_XML_P(0);
431 : char *outval;
432 : StringInfoData buf;
433 :
434 : /*
435 : * xml_out_internal doesn't convert the encoding, it just prints the right
436 : * declaration. pq_sendtext will do the conversion.
437 : */
5677 tgl 438 UIC 0 : outval = xml_out_internal(x, pg_get_client_encoding());
5946 peter_e 439 EUB :
5946 peter_e 440 UIC 0 : pq_begintypsend(&buf);
5677 tgl 441 UBC 0 : pq_sendtext(&buf, outval, strlen(outval));
5677 tgl 442 UIC 0 : pfree(outval);
5946 peter_e 443 0 : PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
444 : }
445 :
446 :
447 : #ifdef USE_LIBXML
448 : static void
5953 peter_e 449 GBC 66 : appendStringInfoText(StringInfo str, const text *t)
450 : {
2219 noah 451 66 : appendBinaryStringInfo(str, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
5953 peter_e 452 66 : }
5896 peter_e 453 EUB : #endif
5953 454 :
455 :
456 : static xmltype *
5953 peter_e 457 GIC 10889 : stringinfo_to_xmltype(StringInfo buf)
458 : {
5453 tgl 459 10889 : return (xmltype *) cstring_to_text_with_len(buf->data, buf->len);
5953 peter_e 460 ECB : }
461 :
5937 462 :
5925 463 : static xmltype *
5925 peter_e 464 GIC 39 : cstring_to_xmltype(const char *string)
465 : {
5453 tgl 466 39 : return (xmltype *) cstring_to_text(string);
467 : }
5925 peter_e 468 ECB :
469 :
5896 470 : #ifdef USE_LIBXML
471 : static xmltype *
5937 peter_e 472 GIC 10950 : xmlBuffer_to_xmltype(xmlBufferPtr buf)
473 : {
4228 474 10950 : return (xmltype *) cstring_to_text_with_len((const char *) xmlBufferContent(buf),
5453 tgl 475 ECB : xmlBufferLength(buf));
476 : }
5953 peter_e 477 : #endif
478 :
479 :
480 : Datum
5953 peter_e 481 GIC 21 : xmlcomment(PG_FUNCTION_ARGS)
482 : {
5953 peter_e 483 ECB : #ifdef USE_LIBXML
2219 noah 484 GIC 21 : text *arg = PG_GETARG_TEXT_PP(0);
2219 noah 485 CBC 21 : char *argdata = VARDATA_ANY(arg);
2219 noah 486 GIC 21 : int len = VARSIZE_ANY_EXHDR(arg);
487 : StringInfoData buf;
488 : int i;
489 :
490 : /* check for "--" in string or "-" at the end */
5953 peter_e 491 90 : for (i = 1; i < len; i++)
5633 tgl 492 ECB : {
5633 tgl 493 GIC 72 : if (argdata[i] == '-' && argdata[i - 1] == '-')
494 3 : ereport(ERROR,
5633 tgl 495 ECB : (errcode(ERRCODE_INVALID_XML_COMMENT),
496 : errmsg("invalid XML comment")));
497 : }
5633 tgl 498 GIC 18 : if (len > 0 && argdata[len - 1] == '-')
499 3 : ereport(ERROR,
500 : (errcode(ERRCODE_INVALID_XML_COMMENT),
501 : errmsg("invalid XML comment")));
5953 peter_e 502 ECB :
5953 peter_e 503 GIC 15 : initStringInfo(&buf);
3447 rhaas 504 CBC 15 : appendStringInfoString(&buf, "<!--");
5953 peter_e 505 15 : appendStringInfoText(&buf, arg);
3447 rhaas 506 GIC 15 : appendStringInfoString(&buf, "-->");
507 :
5953 peter_e 508 15 : PG_RETURN_XML_P(stringinfo_to_xmltype(&buf));
5953 peter_e 509 ECB : #else
510 : NO_XML_SUPPORT();
511 : return 0;
512 : #endif
513 : }
514 :
515 :
5923 516 :
517 : /*
518 : * TODO: xmlconcat needs to merge the notations and unparsed entities
3260 bruce 519 : * of the argument values. Not very important in practice, though.
520 : */
521 : xmltype *
5923 peter_e 522 GIC 10764 : xmlconcat(List *args)
523 : {
524 : #ifdef USE_LIBXML
525 10764 : int global_standalone = 1;
5624 bruce 526 10764 : xmlChar *global_version = NULL;
5923 peter_e 527 10764 : bool global_version_no_value = false;
528 : StringInfoData buf;
529 : ListCell *v;
530 :
531 10764 : initStringInfo(&buf);
532 32295 : foreach(v, args)
5923 peter_e 533 ECB : {
5624 bruce 534 GIC 21531 : xmltype *x = DatumGetXmlP(PointerGetDatum(lfirst(v)));
535 : size_t len;
5624 bruce 536 ECB : xmlChar *version;
5923 peter_e 537 : int standalone;
538 : char *str;
539 :
5923 peter_e 540 GIC 21531 : len = VARSIZE(x) - VARHDRSZ;
5453 tgl 541 21531 : str = text_to_cstring((text *) x);
5923 peter_e 542 ECB :
5923 peter_e 543 CBC 21531 : parse_xml_decl((xmlChar *) str, &len, &version, NULL, &standalone);
544 :
545 21531 : if (standalone == 0 && global_standalone == 1)
5923 peter_e 546 UIC 0 : global_standalone = 0;
5923 peter_e 547 GIC 21531 : if (standalone < 0)
548 21525 : global_standalone = -1;
549 :
5918 550 21531 : if (!version)
5918 peter_e 551 CBC 21522 : global_version_no_value = true;
552 9 : else if (!global_version)
5080 tgl 553 GIC 6 : global_version = version;
5918 peter_e 554 CBC 3 : else if (xmlStrcmp(version, global_version) != 0)
5923 peter_e 555 UIC 0 : global_version_no_value = true;
5923 peter_e 556 ECB :
5923 peter_e 557 GBC 21531 : appendStringInfoString(&buf, str + len);
5923 peter_e 558 CBC 21531 : pfree(str);
5923 peter_e 559 ECB : }
560 :
5923 peter_e 561 CBC 10764 : if (!global_version_no_value || global_standalone >= 0)
5923 peter_e 562 ECB : {
563 : StringInfoData buf2;
564 :
5923 peter_e 565 CBC 3 : initStringInfo(&buf2);
5923 peter_e 566 EUB :
5918 peter_e 567 GIC 3 : print_xml_decl(&buf2,
5633 tgl 568 CBC 3 : (!global_version_no_value) ? global_version : NULL,
5918 peter_e 569 ECB : 0,
570 : global_standalone);
571 :
1356 drowley 572 CBC 3 : appendBinaryStringInfo(&buf2, buf.data, buf.len);
5923 peter_e 573 GIC 3 : buf = buf2;
574 : }
575 :
5923 peter_e 576 CBC 10764 : return stringinfo_to_xmltype(&buf);
577 : #else
5923 peter_e 578 ECB : NO_XML_SUPPORT();
579 : return NULL;
580 : #endif
581 : }
582 :
583 :
584 : /*
585 : * XMLAGG support
586 : */
587 : Datum
5923 peter_e 588 GIC 10752 : xmlconcat2(PG_FUNCTION_ARGS)
589 : {
590 10752 : if (PG_ARGISNULL(0))
591 : {
592 9 : if (PG_ARGISNULL(1))
5923 peter_e 593 UIC 0 : PG_RETURN_NULL();
594 : else
5923 peter_e 595 GIC 9 : PG_RETURN_XML_P(PG_GETARG_XML_P(1));
596 : }
597 10743 : else if (PG_ARGISNULL(1))
5923 peter_e 598 UIC 0 : PG_RETURN_XML_P(PG_GETARG_XML_P(0));
5923 peter_e 599 ECB : else
5633 tgl 600 GIC 10743 : PG_RETURN_XML_P(xmlconcat(list_make2(PG_GETARG_XML_P(0),
5633 tgl 601 ECB : PG_GETARG_XML_P(1))));
602 : }
5923 peter_e 603 :
5923 peter_e 604 EUB :
605 : Datum
5950 tgl 606 CBC 5 : texttoxml(PG_FUNCTION_ARGS)
607 : {
2219 noah 608 5 : text *data = PG_GETARG_TEXT_PP(0);
5953 peter_e 609 EUB :
5909 peter_e 610 GIC 5 : PG_RETURN_XML_P(xmlparse(data, xmloption, true));
5909 peter_e 611 ECB : }
612 :
613 :
614 : Datum
5909 peter_e 615 UIC 0 : xmltotext(PG_FUNCTION_ARGS)
616 : {
5624 bruce 617 LBC 0 : xmltype *data = PG_GETARG_XML_P(0);
618 :
5612 peter_e 619 ECB : /* It's actually binary compatible. */
5612 tgl 620 UIC 0 : PG_RETURN_TEXT_P((text *) data);
5909 peter_e 621 ECB : }
622 :
623 :
624 : text *
25 tgl 625 GNC 84 : xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
5909 peter_e 626 EUB : {
627 : #ifdef USE_LIBXML
628 : text *volatile result;
629 : xmlDocPtr doc;
630 : XmlOptionType parsed_xmloptiontype;
631 : xmlNodePtr content_nodes;
25 tgl 632 GNC 84 : volatile xmlBufferPtr buf = NULL;
633 84 : volatile xmlSaveCtxtPtr ctxt = NULL;
634 84 : ErrorSaveContext escontext = {T_ErrorSaveContext};
635 : PgXmlErrorContext *xmlerrcxt;
636 : #endif
637 :
638 84 : if (xmloption_arg != XMLOPTION_DOCUMENT && !indent)
639 : {
640 : /*
641 : * We don't actually need to do anything, so just return the
642 : * binary-compatible input. For backwards-compatibility reasons,
643 : * allow such cases to succeed even without USE_LIBXML.
644 : */
645 18 : return (text *) data;
646 : }
647 :
648 : #ifdef USE_LIBXML
649 : /* Parse the input according to the xmloption */
650 66 : doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding(),
651 : &parsed_xmloptiontype, &content_nodes,
652 : (Node *) &escontext);
653 66 : if (doc == NULL || escontext.error_occurred)
654 : {
655 15 : if (doc)
25 tgl 656 UNC 0 : xmlFreeDoc(doc);
657 : /* A soft error must be failure to conform to XMLOPTION_DOCUMENT */
5909 peter_e 658 GBC 15 : ereport(ERROR,
659 : (errcode(ERRCODE_NOT_AN_XML_DOCUMENT),
660 : errmsg("not an XML document")));
661 : }
25 tgl 662 EUB :
663 : /* If we weren't asked to indent, we're done. */
25 tgl 664 GNC 51 : if (!indent)
665 : {
666 9 : xmlFreeDoc(doc);
667 9 : return (text *) data;
668 : }
669 :
670 : /* Otherwise, we gotta spin up some error handling. */
671 42 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
672 :
673 42 : PG_TRY();
674 : {
675 42 : size_t decl_len = 0;
676 :
677 : /* The serialized data will go into this buffer. */
678 42 : buf = xmlBufferCreate();
679 :
680 42 : if (buf == NULL || xmlerrcxt->err_occurred)
25 tgl 681 UNC 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
682 : "could not allocate xmlBuffer");
683 :
684 : /* Detect whether there's an XML declaration */
25 tgl 685 GNC 42 : parse_xml_decl(xml_text2xmlChar(data), &decl_len, NULL, NULL, NULL);
686 :
687 : /*
688 : * Emit declaration only if the input had one. Note: some versions of
689 : * xmlSaveToBuffer leak memory if a non-null encoding argument is
690 : * passed, so don't do that. We don't want any encoding conversion
691 : * anyway.
692 : */
693 42 : if (decl_len == 0)
694 36 : ctxt = xmlSaveToBuffer(buf, NULL,
695 : XML_SAVE_NO_DECL | XML_SAVE_FORMAT);
696 : else
697 6 : ctxt = xmlSaveToBuffer(buf, NULL,
698 : XML_SAVE_FORMAT);
699 :
700 42 : if (ctxt == NULL || xmlerrcxt->err_occurred)
25 tgl 701 UNC 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
702 : "could not allocate xmlSaveCtxt");
703 :
25 tgl 704 GNC 42 : if (parsed_xmloptiontype == XMLOPTION_DOCUMENT)
705 : {
706 : /* If it's a document, saving is easy. */
707 18 : if (xmlSaveDoc(ctxt, doc) == -1 || xmlerrcxt->err_occurred)
25 tgl 708 UNC 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
709 : "could not save document to xmlBuffer");
710 : }
25 tgl 711 GNC 24 : else if (content_nodes != NULL)
712 : {
713 : /*
714 : * Deal with the case where we have non-singly-rooted XML.
715 : * libxml's dump functions don't work well for that without help.
716 : * We build a fake root node that serves as a container for the
717 : * content nodes, and then iterate over the nodes.
718 : */
719 : xmlNodePtr root;
720 : xmlNodePtr newline;
721 :
722 21 : root = xmlNewNode(NULL, (const xmlChar *) "content-root");
723 21 : if (root == NULL || xmlerrcxt->err_occurred)
25 tgl 724 UNC 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
725 : "could not allocate xml node");
726 :
727 : /* This attaches root to doc, so we need not free it separately. */
25 tgl 728 GNC 21 : xmlDocSetRootElement(doc, root);
729 21 : xmlAddChild(root, content_nodes);
730 :
731 : /*
732 : * We use this node to insert newlines in the dump. Note: in at
733 : * least some libxml versions, xmlNewDocText would not attach the
734 : * node to the document even if we passed it. Therefore, manage
735 : * freeing of this node manually, and pass NULL here to make sure
736 : * there's not a dangling link.
737 : */
738 21 : newline = xmlNewDocText(NULL, (const xmlChar *) "\n");
739 21 : if (newline == NULL || xmlerrcxt->err_occurred)
25 tgl 740 UNC 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
741 : "could not allocate xml node");
742 :
25 tgl 743 GNC 54 : for (xmlNodePtr node = root->children; node; node = node->next)
744 : {
745 : /* insert newlines between nodes */
746 33 : if (node->type != XML_TEXT_NODE && node->prev != NULL)
747 : {
748 9 : if (xmlSaveTree(ctxt, newline) == -1 || xmlerrcxt->err_occurred)
749 : {
25 tgl 750 UNC 0 : xmlFreeNode(newline);
751 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
752 : "could not save newline to xmlBuffer");
753 : }
754 : }
755 :
25 tgl 756 GNC 33 : if (xmlSaveTree(ctxt, node) == -1 || xmlerrcxt->err_occurred)
757 : {
25 tgl 758 UNC 0 : xmlFreeNode(newline);
759 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
760 : "could not save content to xmlBuffer");
761 : }
762 : }
763 :
25 tgl 764 GNC 21 : xmlFreeNode(newline);
765 : }
766 :
767 42 : if (xmlSaveClose(ctxt) == -1 || xmlerrcxt->err_occurred)
768 : {
25 tgl 769 UNC 0 : ctxt = NULL; /* don't try to close it again */
770 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
771 : "could not close xmlSaveCtxtPtr");
772 : }
773 :
25 tgl 774 GNC 42 : result = (text *) xmlBuffer_to_xmltype(buf);
775 : }
25 tgl 776 UNC 0 : PG_CATCH();
777 : {
778 0 : if (ctxt)
779 0 : xmlSaveClose(ctxt);
780 0 : if (buf)
781 0 : xmlBufferFree(buf);
782 0 : if (doc)
783 0 : xmlFreeDoc(doc);
784 :
785 0 : pg_xml_done(xmlerrcxt, true);
786 :
787 0 : PG_RE_THROW();
788 : }
25 tgl 789 GNC 42 : PG_END_TRY();
790 :
791 42 : xmlBufferFree(buf);
792 42 : xmlFreeDoc(doc);
793 :
794 42 : pg_xml_done(xmlerrcxt, false);
795 :
796 42 : return result;
797 : #else
798 : NO_XML_SUPPORT();
799 : return NULL;
800 : #endif
801 : }
802 :
5953 peter_e 803 ECB :
804 : xmltype *
2217 andres 805 GIC 10830 : xmlelement(XmlExpr *xexpr,
806 : Datum *named_argvalue, bool *named_argnull,
807 : Datum *argvalue, bool *argnull)
808 : {
809 : #ifdef USE_LIBXML
5624 bruce 810 ECB : xmltype *result;
5634 tgl 811 : List *named_arg_strings;
812 : List *arg_strings;
813 : int i;
814 : ListCell *arg;
815 : ListCell *narg;
4281 816 : PgXmlErrorContext *xmlerrcxt;
4281 tgl 817 GIC 10830 : volatile xmlBufferPtr buf = NULL;
818 10830 : volatile xmlTextWriterPtr writer = NULL;
819 :
820 : /*
821 : * All arguments are already evaluated, and their values are passed in the
822 : * named_argvalue/named_argnull or argvalue/argnull arrays. This avoids
2217 andres 823 ECB : * issues if one of the arguments involves a call to some other function
824 : * or subsystem that wants to use libxml on its own terms. We examine the
825 : * original XmlExpr to identify the numbers and types of the arguments.
826 : */
5634 tgl 827 GIC 10830 : named_arg_strings = NIL;
5933 peter_e 828 CBC 10830 : i = 0;
2217 andres 829 GIC 10854 : foreach(arg, xexpr->named_args)
830 : {
2217 andres 831 CBC 27 : Expr *e = (Expr *) lfirst(arg);
832 : char *str;
5933 peter_e 833 ECB :
2217 andres 834 GBC 27 : if (named_argnull[i])
5634 tgl 835 UIC 0 : str = NULL;
5634 tgl 836 ECB : else
2217 andres 837 GIC 27 : str = map_sql_value_to_xml_value(named_argvalue[i],
838 : exprType((Node *) e),
839 : false);
5634 tgl 840 24 : named_arg_strings = lappend(named_arg_strings, str);
5933 peter_e 841 24 : i++;
5933 peter_e 842 ECB : }
843 :
5634 tgl 844 CBC 10827 : arg_strings = NIL;
2217 andres 845 10827 : i = 0;
2217 andres 846 GIC 21642 : foreach(arg, xexpr->args)
847 : {
848 10815 : Expr *e = (Expr *) lfirst(arg);
5634 tgl 849 ECB : char *str;
850 :
851 : /* here we can just forget NULL elements immediately */
2217 andres 852 GIC 10815 : if (!argnull[i])
5634 tgl 853 ECB : {
2217 andres 854 GIC 10815 : str = map_sql_value_to_xml_value(argvalue[i],
855 : exprType((Node *) e),
2217 andres 856 ECB : true);
5634 tgl 857 GIC 10815 : arg_strings = lappend(arg_strings, str);
5634 tgl 858 ECB : }
2217 andres 859 GBC 10815 : i++;
860 : }
861 :
4281 tgl 862 GIC 10827 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
5634 tgl 863 ECB :
5079 tgl 864 GIC 10827 : PG_TRY();
865 : {
5050 bruce 866 10827 : buf = xmlBufferCreate();
4281 tgl 867 10827 : if (buf == NULL || xmlerrcxt->err_occurred)
4281 tgl 868 UIC 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
869 : "could not allocate xmlBuffer");
5050 bruce 870 GIC 10827 : writer = xmlNewTextWriterMemory(buf, 0);
4281 tgl 871 CBC 10827 : if (writer == NULL || xmlerrcxt->err_occurred)
4281 tgl 872 LBC 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
873 : "could not allocate xmlTextWriter");
874 :
5050 bruce 875 CBC 10827 : xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name);
876 :
5050 bruce 877 GIC 10851 : forboth(arg, named_arg_strings, narg, xexpr->arg_names)
5050 bruce 878 ECB : {
5050 bruce 879 GBC 24 : char *str = (char *) lfirst(arg);
5050 bruce 880 GIC 24 : char *argname = strVal(lfirst(narg));
881 :
5050 bruce 882 CBC 24 : if (str)
5050 bruce 883 GIC 24 : xmlTextWriterWriteAttribute(writer,
884 : (xmlChar *) argname,
5050 bruce 885 ECB : (xmlChar *) str);
5050 bruce 886 EUB : }
887 :
5050 bruce 888 GIC 21642 : foreach(arg, arg_strings)
5050 bruce 889 ECB : {
5050 bruce 890 GIC 10815 : char *str = (char *) lfirst(arg);
891 :
892 10815 : xmlTextWriterWriteRaw(writer, (xmlChar *) str);
893 : }
894 :
895 10827 : xmlTextWriterEndElement(writer);
896 :
897 : /* we MUST do this now to flush data out to the buffer ... */
898 10827 : xmlFreeTextWriter(writer);
899 10827 : writer = NULL;
5933 peter_e 900 ECB :
5050 bruce 901 CBC 10827 : result = xmlBuffer_to_xmltype(buf);
5079 tgl 902 EUB : }
5079 tgl 903 UIC 0 : PG_CATCH();
904 : {
905 0 : if (writer)
5079 tgl 906 LBC 0 : xmlFreeTextWriter(writer);
907 0 : if (buf)
5079 tgl 908 UIC 0 : xmlBufferFree(buf);
909 :
4281 910 0 : pg_xml_done(xmlerrcxt, true);
911 :
5079 912 0 : PG_RE_THROW();
913 : }
5079 tgl 914 GIC 10827 : PG_END_TRY();
915 :
5933 peter_e 916 CBC 10827 : xmlBufferFree(buf);
5634 tgl 917 ECB :
4281 tgl 918 GBC 10827 : pg_xml_done(xmlerrcxt, false);
919 :
5933 peter_e 920 GIC 10827 : return result;
5933 peter_e 921 ECB : #else
922 : NO_XML_SUPPORT();
923 : return NULL;
924 : #endif
925 : }
926 :
927 :
5950 tgl 928 EUB : xmltype *
5909 peter_e 929 GBC 71 : xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace)
930 : {
931 : #ifdef USE_LIBXML
932 : xmlDocPtr doc;
933 :
4965 heikki.linnakangas 934 CBC 71 : doc = xml_parse(data, xmloption_arg, preserve_whitespace,
935 : GetDatabaseEncoding(), NULL, NULL, NULL);
5937 peter_e 936 GBC 47 : xmlFreeDoc(doc);
5953 peter_e 937 EUB :
5950 tgl 938 GIC 47 : return (xmltype *) data;
939 : #else
940 : NO_XML_SUPPORT();
941 : return NULL;
5953 peter_e 942 ECB : #endif
943 : }
944 :
945 :
946 : xmltype *
1986 peter_e 947 GBC 36 : xmlpi(const char *target, text *arg, bool arg_is_null, bool *result_is_null)
5953 peter_e 948 EUB : {
949 : #ifdef USE_LIBXML
950 : xmltype *result;
951 : StringInfoData buf;
5953 peter_e 952 ECB :
5630 peter_e 953 GIC 36 : if (pg_strcasecmp(target, "xml") == 0)
5953 peter_e 954 GBC 6 : ereport(ERROR,
955 : (errcode(ERRCODE_SYNTAX_ERROR), /* really */
5953 peter_e 956 EUB : errmsg("invalid XML processing instruction"),
5630 957 : errdetail("XML processing instruction target name cannot be \"%s\".", target)));
5953 958 :
5936 959 : /*
5624 bruce 960 : * Following the SQL standard, the null check comes after the syntax check
961 : * above.
962 : */
5936 peter_e 963 GBC 30 : *result_is_null = arg_is_null;
5936 peter_e 964 GIC 30 : if (*result_is_null)
5624 bruce 965 GBC 6 : return NULL;
966 :
5953 peter_e 967 CBC 24 : initStringInfo(&buf);
968 :
5950 tgl 969 24 : appendStringInfo(&buf, "<?%s", target);
5950 tgl 970 ECB :
5950 tgl 971 GIC 24 : if (arg != NULL)
5953 peter_e 972 ECB : {
973 : char *string;
974 :
5493 tgl 975 GIC 12 : string = text_to_cstring(arg);
5950 976 12 : if (strstr(string, "?>") != NULL)
5624 bruce 977 3 : ereport(ERROR,
978 : (errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION),
979 : errmsg("invalid XML processing instruction"),
980 : errdetail("XML processing instruction cannot contain \"?>\".")));
981 :
5950 tgl 982 9 : appendStringInfoChar(&buf, ' ');
5936 peter_e 983 CBC 9 : appendStringInfoString(&buf, string + strspn(string, " "));
5950 tgl 984 GIC 9 : pfree(string);
985 : }
5953 peter_e 986 21 : appendStringInfoString(&buf, "?>");
987 :
5950 tgl 988 21 : result = stringinfo_to_xmltype(&buf);
989 21 : pfree(buf.data);
990 21 : return result;
991 : #else
992 : NO_XML_SUPPORT();
993 : return NULL;
994 : #endif
5953 peter_e 995 ECB : }
996 :
997 :
998 : xmltype *
5624 bruce 999 GIC 30 : xmlroot(xmltype *data, text *version, int standalone)
1000 : {
1001 : #ifdef USE_LIBXML
1002 : char *str;
1003 : size_t len;
1004 : xmlChar *orig_version;
5918 peter_e 1005 ECB : int orig_standalone;
1006 : StringInfoData buf;
5953 1007 :
5918 peter_e 1008 GIC 30 : len = VARSIZE(data) - VARHDRSZ;
5453 tgl 1009 CBC 30 : str = text_to_cstring((text *) data);
1010 :
5918 peter_e 1011 GIC 30 : parse_xml_decl((xmlChar *) str, &len, &orig_version, NULL, &orig_standalone);
5953 peter_e 1012 ECB :
5950 tgl 1013 GBC 30 : if (version)
5918 peter_e 1014 GIC 12 : orig_version = xml_text2xmlChar(version);
5937 peter_e 1015 ECB : else
5918 peter_e 1016 GIC 18 : orig_version = NULL;
1017 :
5937 peter_e 1018 CBC 30 : switch (standalone)
5950 tgl 1019 ECB : {
5918 peter_e 1020 GIC 9 : case XML_STANDALONE_YES:
1021 9 : orig_standalone = 1;
5918 peter_e 1022 CBC 9 : break;
1023 6 : case XML_STANDALONE_NO:
1024 6 : orig_standalone = 0;
5937 peter_e 1025 GIC 6 : break;
5918 peter_e 1026 CBC 6 : case XML_STANDALONE_NO_VALUE:
5918 peter_e 1027 GIC 6 : orig_standalone = -1;
5937 1028 6 : break;
5918 1029 9 : case XML_STANDALONE_OMITTED:
5918 peter_e 1030 ECB : /* leave original value */
5937 peter_e 1031 GIC 9 : break;
5953 peter_e 1032 ECB : }
1033 :
5918 peter_e 1034 GIC 30 : initStringInfo(&buf);
5918 peter_e 1035 CBC 30 : print_xml_decl(&buf, orig_version, 0, orig_standalone);
5918 peter_e 1036 GIC 30 : appendStringInfoString(&buf, str + len);
5937 peter_e 1037 ECB :
5918 peter_e 1038 GIC 30 : return stringinfo_to_xmltype(&buf);
1039 : #else
5953 peter_e 1040 ECB : NO_XML_SUPPORT();
1041 : return NULL;
1042 : #endif
1043 : }
1044 :
1045 :
5953 peter_e 1046 EUB : /*
1047 : * Validate document (given as string) against DTD (given as external link)
5517 tgl 1048 ECB : *
1049 : * This has been removed because it is a security hole: unprivileged users
5517 tgl 1050 EUB : * should not be able to use Postgres to fetch arbitrary external files,
1051 : * which unfortunately is exactly what libxml is willing to do with the DTD
1052 : * parameter.
5953 peter_e 1053 ECB : */
1054 : Datum
5953 peter_e 1055 LBC 0 : xmlvalidate(PG_FUNCTION_ARGS)
1056 : {
5517 tgl 1057 0 : ereport(ERROR,
5517 tgl 1058 ECB : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1059 : errmsg("xmlvalidate is not implemented")));
5953 peter_e 1060 : return 0;
1061 : }
1062 :
1063 :
1064 : bool
5624 bruce 1065 GIC 12 : xml_is_document(xmltype *arg)
5929 peter_e 1066 ECB : {
1067 : #ifdef USE_LIBXML
1068 : xmlDocPtr doc;
114 tgl 1069 GNC 12 : ErrorSaveContext escontext = {T_ErrorSaveContext};
5929 peter_e 1070 ECB :
1071 : /*
1072 : * We'll report "true" if no soft error is reported by xml_parse().
1073 : */
114 tgl 1074 GNC 12 : doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true,
1075 : GetDatabaseEncoding(), NULL, NULL, (Node *) &escontext);
5929 peter_e 1076 GIC 12 : if (doc)
1077 6 : xmlFreeDoc(doc);
1078 :
114 tgl 1079 GNC 12 : return !escontext.error_occurred;
1080 : #else /* not USE_LIBXML */
1081 : NO_XML_SUPPORT();
1082 : return false;
1083 : #endif /* not USE_LIBXML */
5929 peter_e 1084 ECB : }
1085 :
1086 :
1087 : #ifdef USE_LIBXML
1088 :
5953 1089 : /*
1090 : * pg_xml_init_library --- set up for use of libxml
4785 tgl 1091 : *
1092 : * This should be called by each function that is about to use libxml
4281 1093 : * facilities but doesn't require error handling. It initializes libxml
1094 : * and verifies compatibility with the loaded libxml version. These are
1095 : * once-per-session activities.
1096 : *
1097 : * TODO: xmlChar is utf8-char, make proper tuning (initdb with enc!=utf8 and
1098 : * check)
1099 : */
1100 : void
4281 tgl 1101 GIC 45661 : pg_xml_init_library(void)
5953 peter_e 1102 ECB : {
1103 : static bool first_time = true;
1104 :
5634 tgl 1105 GIC 45661 : if (first_time)
1106 : {
1107 : /* Stuff we need do only once per session */
5950 tgl 1108 ECB :
5634 1109 : /*
1110 : * Currently, we have no pure UTF-8 support for internals -- check if
1111 : * we can work.
1112 : */
1113 : if (sizeof(char) != sizeof(xmlChar))
1114 : ereport(ERROR,
1115 : (errmsg("could not initialize XML library"),
1116 : errdetail("libxml2 has incompatible char type: sizeof(char)=%zu, sizeof(xmlChar)=%zu.",
1117 : sizeof(char), sizeof(xmlChar))));
1118 :
5079 1119 : #ifdef USE_LIBXMLCONTEXT
4281 1120 : /* Set up libxml's memory allocation our way */
1121 : xml_memory_init();
5079 1122 : #endif
1123 :
5634 1124 : /* Check library compatibility */
5634 tgl 1125 GIC 13 : LIBXML_TEST_VERSION;
5634 tgl 1126 ECB :
5634 tgl 1127 GIC 13 : first_time = false;
1128 : }
4281 1129 45661 : }
5916 peter_e 1130 ECB :
4281 tgl 1131 : /*
1132 : * pg_xml_init --- set up for use of libxml and register an error handler
1133 : *
1134 : * This should be called by each function that is about to use libxml
1135 : * facilities and requires error handling. It initializes libxml with
1136 : * pg_xml_init_library() and establishes our libxml error handler.
1137 : *
1138 : * strictness determines which errors are reported and which are ignored.
1139 : *
1140 : * Calls to this function MUST be followed by a PG_TRY block that guarantees
1141 : * that pg_xml_done() is called during either normal or error exit.
1142 : *
1143 : * This is exported for use by contrib/xml2, as well as other code that might
1144 : * wish to share use of this module's libxml error handler.
1145 : */
1146 : PgXmlErrorContext *
4281 tgl 1147 GIC 11923 : pg_xml_init(PgXmlStrictness strictness)
1148 : {
1149 : PgXmlErrorContext *errcxt;
1150 : void *new_errcxt;
1151 :
1152 : /* Do one-time setup if needed */
1153 11923 : pg_xml_init_library();
4281 tgl 1154 ECB :
1155 : /* Create error handling context structure */
4281 tgl 1156 GIC 11923 : errcxt = (PgXmlErrorContext *) palloc(sizeof(PgXmlErrorContext));
1157 11923 : errcxt->magic = ERRCXT_MAGIC;
1158 11923 : errcxt->strictness = strictness;
1159 11923 : errcxt->err_occurred = false;
1160 11923 : initStringInfo(&errcxt->err_buf);
1161 :
1162 : /*
4281 tgl 1163 ECB : * Save original error handler and install ours. libxml originally didn't
1164 : * distinguish between the contexts for generic and for structured error
1165 : * handlers. If we're using an old libxml version, we must thus save the
3955 bruce 1166 : * generic error context, even though we're using a structured error
1167 : * handler.
4281 tgl 1168 : */
4281 tgl 1169 CBC 11923 : errcxt->saved_errfunc = xmlStructuredError;
1170 :
4281 tgl 1171 ECB : #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
4281 tgl 1172 GIC 11923 : errcxt->saved_errcxt = xmlStructuredErrorContext;
4281 tgl 1173 ECB : #else
1174 : errcxt->saved_errcxt = xmlGenericErrorContext;
1175 : #endif
1176 :
4281 tgl 1177 CBC 11923 : xmlSetStructuredErrorFunc((void *) errcxt, xml_errorHandler);
4281 tgl 1178 ECB :
4275 1179 : /*
1180 : * Verify that xmlSetStructuredErrorFunc set the context variable we
3260 bruce 1181 : * expected it to. If not, the error context pointer we just saved is not
4275 tgl 1182 : * the correct thing to restore, and since that leaves us without a way to
1183 : * restore the context in pg_xml_done, we must fail.
1184 : *
1185 : * The only known situation in which this test fails is if we compile with
1186 : * headers from a libxml2 that doesn't track the structured error context
1187 : * separately (< 2.7.4), but at runtime use a version that does, or vice
1188 : * versa. The libxml2 authors did not treat that change as constituting
1189 : * an ABI break, so the LIBXML_TEST_VERSION test in pg_xml_init_library
1190 : * fails to protect us from this.
1191 : */
1192 :
1193 : #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
4275 tgl 1194 GIC 11923 : new_errcxt = xmlStructuredErrorContext;
1195 : #else
1196 : new_errcxt = xmlGenericErrorContext;
1197 : #endif
1198 :
1199 11923 : if (new_errcxt != (void *) errcxt)
4275 tgl 1200 UIC 0 : ereport(ERROR,
1201 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1202 : errmsg("could not set up XML error handler"),
1203 : errhint("This probably indicates that the version of libxml2"
1204 : " being used is not compatible with the libxml2"
1205 : " header files that PostgreSQL was built with.")));
1206 :
1207 : /*
1208 : * Also, install an entity loader to prevent unwanted fetches of external
1209 : * files and URLs.
3890 tgl 1210 EUB : */
3890 tgl 1211 GIC 11923 : errcxt->saved_entityfunc = xmlGetExternalEntityLoader();
3890 tgl 1212 GBC 11923 : xmlSetExternalEntityLoader(xmlPgEntityLoader);
1213 :
4281 tgl 1214 GIC 11923 : return errcxt;
1215 : }
1216 :
1217 :
1218 : /*
1219 : * pg_xml_done --- restore previous libxml error handling
4281 tgl 1220 ECB : *
1221 : * Resets libxml's global error-handling state to what it was before
1222 : * pg_xml_init() was called.
1223 : *
1224 : * This routine verifies that all pending errors have been dealt with
1225 : * (in assert-enabled builds, anyway).
1226 : */
1227 : void
4281 tgl 1228 GIC 11923 : pg_xml_done(PgXmlErrorContext *errcxt, bool isError)
4281 tgl 1229 ECB : {
1230 : void *cur_errcxt;
1231 :
1232 : /* An assert seems like enough protection here */
4281 tgl 1233 GIC 11923 : Assert(errcxt->magic == ERRCXT_MAGIC);
4281 tgl 1234 ECB :
1235 : /*
1236 : * In a normal exit, there should be no un-handled libxml errors. But we
1237 : * shouldn't try to enforce this during error recovery, since the longjmp
1238 : * could have been thrown before xml_ereport had a chance to run.
1239 : */
4281 tgl 1240 GIC 11923 : Assert(!errcxt->err_occurred || isError);
1241 :
1242 : /*
1243 : * Check that libxml's global state is correct, warn if not. This is a
1244 : * real test and not an Assert because it has a higher probability of
1245 : * happening.
1246 : */
1247 : #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1248 11923 : cur_errcxt = xmlStructuredErrorContext;
1249 : #else
1250 : cur_errcxt = xmlGenericErrorContext;
1251 : #endif
1252 :
1253 11923 : if (cur_errcxt != (void *) errcxt)
4281 tgl 1254 UIC 0 : elog(WARNING, "libxml error handling state is out of sync with xml.c");
1255 :
3890 tgl 1256 ECB : /* Restore the saved handlers */
4281 tgl 1257 GIC 11923 : xmlSetStructuredErrorFunc(errcxt->saved_errcxt, errcxt->saved_errfunc);
3890 1258 11923 : xmlSetExternalEntityLoader(errcxt->saved_entityfunc);
1259 :
4281 tgl 1260 ECB : /*
1261 : * Mark the struct as invalid, just in case somebody somehow manages to
1262 : * call xml_errorHandler or xml_ereport with it.
1263 : */
4281 tgl 1264 GIC 11923 : errcxt->magic = 0;
1265 :
1266 : /* Release memory */
1267 11923 : pfree(errcxt->err_buf.data);
1268 11923 : pfree(errcxt);
1269 11923 : }
1270 :
1271 :
1272 : /*
1273 : * pg_xml_error_occurred() --- test the error flag
1274 : */
1275 : bool
4281 tgl 1276 UIC 0 : pg_xml_error_occurred(PgXmlErrorContext *errcxt)
1277 : {
1278 0 : return errcxt->err_occurred;
1279 : }
5953 peter_e 1280 ECB :
1281 :
5936 1282 : /*
1283 : * SQL/XML allows storing "XML documents" or "XML content". "XML
1284 : * documents" are specified by the XML specification and are parsed
1285 : * easily by libxml. "XML content" is specified by SQL/XML as the
1286 : * production "XMLDecl? content". But libxml can only parse the
1287 : * "content" part, so we have to parse the XML declaration ourselves
1288 : * to complete this.
1289 : */
1290 :
1291 : #define CHECK_XML_SPACE(p) \
1292 : do { \
1293 : if (!xmlIsBlank_ch(*(p))) \
1294 : return XML_ERR_SPACE_REQUIRED; \
1295 : } while (0)
1296 :
1297 : #define SKIP_XML_SPACE(p) \
1298 : while (xmlIsBlank_ch(*(p))) (p)++
1299 :
1300 : /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
1301 : /* Beware of multiple evaluations of argument! */
5629 tgl 1302 : #define PG_XMLISNAMECHAR(c) \
1303 : (xmlIsBaseChar_ch(c) || xmlIsIdeographicQ(c) \
1304 : || xmlIsDigit_ch(c) \
1305 : || c == '.' || c == '-' || c == '_' || c == ':' \
1306 : || xmlIsCombiningQ(c) \
1307 : || xmlIsExtender_ch(c))
5630 peter_e 1308 :
1309 : /* pnstrdup, but deal with xmlChar not char; len is measured in xmlChars */
1310 : static xmlChar *
5080 tgl 1311 CBC 100 : xml_pnstrdup(const xmlChar *str, size_t len)
5080 tgl 1312 ECB : {
5050 bruce 1313 : xmlChar *result;
5080 tgl 1314 :
5080 tgl 1315 CBC 100 : result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
5080 tgl 1316 GIC 100 : memcpy(result, str, len * sizeof(xmlChar));
1317 100 : result[len] = 0;
1318 100 : return result;
1319 : }
1320 :
1321 : /* Ditto, except input is char* */
1322 : static xmlChar *
1986 peter_e 1323 1212 : pg_xmlCharStrndup(const char *str, size_t len)
2223 alvherre 1324 ECB : {
1325 : xmlChar *result;
1326 :
2223 alvherre 1327 CBC 1212 : result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
2223 alvherre 1328 GIC 1212 : memcpy(result, str, len);
1329 1212 : result[len] = '\0';
1330 :
1331 1212 : return result;
2223 alvherre 1332 ECB : }
1333 :
1334 : /*
1335 : * Copy xmlChar string to PostgreSQL-owned memory, freeing the input.
1336 : *
1337 : * The input xmlChar is freed regardless of success of the copy.
1338 : */
1339 : static char *
1494 alvherre 1340 GIC 54293 : xml_pstrdup_and_free(xmlChar *str)
1341 : {
1342 : char *result;
1343 :
1344 54293 : if (str)
1345 : {
1346 54293 : PG_TRY();
1347 : {
1348 54293 : result = pstrdup((char *) str);
1494 alvherre 1349 ECB : }
1255 peter 1350 UIC 0 : PG_FINALLY();
1351 : {
1494 alvherre 1352 GIC 54293 : xmlFree(str);
1353 : }
1494 alvherre 1354 CBC 54293 : PG_END_TRY();
1494 alvherre 1355 EUB : }
1356 : else
1494 alvherre 1357 UIC 0 : result = NULL;
1358 :
1494 alvherre 1359 GIC 54293 : return result;
1360 : }
1361 :
1362 : /*
1363 : * str is the null-terminated input string. Remaining arguments are
1364 : * output arguments; each can be NULL if value is not wanted.
1365 : * version and encoding are returned as locally-palloc'd strings.
5080 tgl 1366 ECB : * Result is 0 if OK, an error code if not.
1367 : */
1368 : static int
5050 bruce 1369 CBC 33738 : parse_xml_decl(const xmlChar *str, size_t *lenp,
1370 : xmlChar **version, xmlChar **encoding, int *standalone)
1371 : {
1372 : const xmlChar *p;
1373 : const xmlChar *save_p;
1374 : size_t len;
1375 : int utf8char;
1376 : int utf8len;
1377 :
1378 : /*
1379 : * Only initialize libxml. We don't need error handling here, but we do
1380 : * need to make sure libxml is initialized before calling any of its
1381 : * functions. Note that this is safe (and a no-op) if caller has already
1382 : * done pg_xml_init().
4281 tgl 1383 ECB : */
4281 tgl 1384 GIC 33738 : pg_xml_init_library();
1385 :
1386 : /* Initialize output arguments to "not present" */
5925 peter_e 1387 33738 : if (version)
5925 peter_e 1388 CBC 33420 : *version = NULL;
5925 peter_e 1389 GIC 33738 : if (encoding)
5925 peter_e 1390 UIC 0 : *encoding = NULL;
5925 peter_e 1391 GIC 33738 : if (standalone)
1392 33420 : *standalone = -1;
1393 :
5633 tgl 1394 33738 : p = str;
5633 tgl 1395 ECB :
5624 bruce 1396 GIC 33738 : if (xmlStrncmp(p, (xmlChar *) "<?xml", 5) != 0)
5936 peter_e 1397 33623 : goto finished;
1398 :
1399 : /*
1400 : * If next char is a name char, it's a PI like <?xml-stylesheet ...?>
1401 : * rather than an XMLDecl, so we have done what we came to do and found no
1402 : * XMLDecl.
1478 tgl 1403 ECB : *
1404 : * We need an input length value for xmlGetUTF8Char, but there's no need
1405 : * to count the whole document size, so use strnlen not strlen.
1406 : */
1478 tgl 1407 GIC 115 : utf8len = strnlen((const char *) (p + 5), MAX_MULTIBYTE_CHAR_LEN);
5624 bruce 1408 CBC 115 : utf8char = xmlGetUTF8Char(p + 5, &utf8len);
5629 tgl 1409 GBC 115 : if (PG_XMLISNAMECHAR(utf8char))
5630 peter_e 1410 GIC 6 : goto finished;
1411 :
5936 peter_e 1412 CBC 109 : p += 5;
5936 peter_e 1413 ECB :
1414 : /* version */
5936 peter_e 1415 GIC 109 : CHECK_XML_SPACE(p);
1416 218 : SKIP_XML_SPACE(p);
5624 bruce 1417 109 : if (xmlStrncmp(p, (xmlChar *) "version", 7) != 0)
5936 peter_e 1418 UIC 0 : return XML_ERR_VERSION_MISSING;
5936 peter_e 1419 CBC 109 : p += 7;
5936 peter_e 1420 GIC 109 : SKIP_XML_SPACE(p);
1421 109 : if (*p != '=')
5936 peter_e 1422 LBC 0 : return XML_ERR_VERSION_MISSING;
5936 peter_e 1423 CBC 109 : p += 1;
1424 109 : SKIP_XML_SPACE(p);
1425 :
5925 peter_e 1426 GIC 109 : if (*p == '\'' || *p == '"')
1427 109 : {
1428 : const xmlChar *q;
1429 :
1430 109 : q = xmlStrchr(p + 1, *p);
5925 peter_e 1431 GBC 109 : if (!q)
5925 peter_e 1432 UIC 0 : return XML_ERR_VERSION_MISSING;
5925 peter_e 1433 EUB :
5925 peter_e 1434 GIC 109 : if (version)
5080 tgl 1435 100 : *version = xml_pnstrdup(p + 1, q - p - 1);
5925 peter_e 1436 109 : p = q + 1;
1437 : }
1438 : else
5936 peter_e 1439 UIC 0 : return XML_ERR_VERSION_MISSING;
1440 :
1441 : /* encoding */
5936 peter_e 1442 GIC 109 : save_p = p;
1443 190 : SKIP_XML_SPACE(p);
5624 bruce 1444 109 : if (xmlStrncmp(p, (xmlChar *) "encoding", 8) == 0)
1445 : {
5936 peter_e 1446 27 : CHECK_XML_SPACE(save_p);
1447 27 : p += 8;
1448 27 : SKIP_XML_SPACE(p);
1449 27 : if (*p != '=')
5936 peter_e 1450 UIC 0 : return XML_ERR_MISSING_ENCODING;
5936 peter_e 1451 GIC 27 : p += 1;
1452 27 : SKIP_XML_SPACE(p);
1453 :
1454 27 : if (*p == '\'' || *p == '"')
1455 27 : {
1456 : const xmlChar *q;
1457 :
1458 27 : q = xmlStrchr(p + 1, *p);
1459 27 : if (!q)
5936 peter_e 1460 UIC 0 : return XML_ERR_MISSING_ENCODING;
1461 :
5925 peter_e 1462 GIC 27 : if (encoding)
5080 tgl 1463 UIC 0 : *encoding = xml_pnstrdup(p + 1, q - p - 1);
5936 peter_e 1464 GIC 27 : p = q + 1;
1465 : }
5936 peter_e 1466 ECB : else
5936 peter_e 1467 UIC 0 : return XML_ERR_MISSING_ENCODING;
1468 : }
1469 : else
5936 peter_e 1470 ECB : {
5936 peter_e 1471 CBC 82 : p = save_p;
5936 peter_e 1472 ECB : }
1473 :
1474 : /* standalone */
5936 peter_e 1475 GIC 109 : save_p = p;
1476 163 : SKIP_XML_SPACE(p);
5624 bruce 1477 109 : if (xmlStrncmp(p, (xmlChar *) "standalone", 10) == 0)
5936 peter_e 1478 ECB : {
5936 peter_e 1479 GIC 54 : CHECK_XML_SPACE(save_p);
1480 54 : p += 10;
1481 54 : SKIP_XML_SPACE(p);
5936 peter_e 1482 CBC 54 : if (*p != '=')
5936 peter_e 1483 LBC 0 : return XML_ERR_STANDALONE_VALUE;
5936 peter_e 1484 CBC 54 : p += 1;
5936 peter_e 1485 GIC 54 : SKIP_XML_SPACE(p);
5619 tgl 1486 CBC 108 : if (xmlStrncmp(p, (xmlChar *) "'yes'", 5) == 0 ||
5619 tgl 1487 GIC 54 : xmlStrncmp(p, (xmlChar *) "\"yes\"", 5) == 0)
1488 : {
4334 1489 30 : if (standalone)
1490 30 : *standalone = 1;
5936 peter_e 1491 30 : p += 5;
1492 : }
5619 tgl 1493 48 : else if (xmlStrncmp(p, (xmlChar *) "'no'", 4) == 0 ||
1494 24 : xmlStrncmp(p, (xmlChar *) "\"no\"", 4) == 0)
5936 peter_e 1495 ECB : {
4334 tgl 1496 GIC 18 : if (standalone)
1497 18 : *standalone = 0;
5936 peter_e 1498 18 : p += 4;
5936 peter_e 1499 ECB : }
1500 : else
5936 peter_e 1501 CBC 6 : return XML_ERR_STANDALONE_VALUE;
1502 : }
5936 peter_e 1503 ECB : else
1504 : {
5936 peter_e 1505 GBC 55 : p = save_p;
1506 : }
5936 peter_e 1507 ECB :
5936 peter_e 1508 GIC 103 : SKIP_XML_SPACE(p);
5624 bruce 1509 CBC 103 : if (xmlStrncmp(p, (xmlChar *) "?>", 2) != 0)
5936 peter_e 1510 UIC 0 : return XML_ERR_XMLDECL_NOT_FINISHED;
5936 peter_e 1511 GIC 103 : p += 2;
5936 peter_e 1512 EUB :
5936 peter_e 1513 GIC 33732 : finished:
5925 peter_e 1514 CBC 33732 : len = p - str;
1515 :
5925 peter_e 1516 GIC 37182 : for (p = str; p < str + len; p++)
1517 3450 : if (*p > 127)
5925 peter_e 1518 UIC 0 : return XML_ERR_INVALID_CHAR;
1519 :
5925 peter_e 1520 GIC 33732 : if (lenp)
1521 33732 : *lenp = len;
1522 :
5936 1523 33732 : return XML_ERR_OK;
5936 peter_e 1524 ECB : }
1525 :
1526 :
1527 : /*
1528 : * Write an XML declaration. On output, we adjust the XML declaration
1529 : * as follows. (These rules are the moral equivalent of the clause
1530 : * "Serialization of an XML value" in the SQL standard.)
1531 : *
1532 : * We try to avoid generating an XML declaration if possible. This is
1533 : * so that you don't get trivial things like xml '<foo/>' resulting in
1534 : * '<?xml version="1.0"?><foo/>', which would surely be annoying. We
1535 : * must provide a declaration if the standalone property is specified
1536 : * or if we include an encoding declaration. If we have a
1537 : * declaration, we must specify a version (XML requires this).
1538 : * Otherwise we only make a declaration if the version is not "1.0",
5918 1539 : * which is the default version specified in SQL:2003.
1540 : */
1541 : static bool
5050 bruce 1542 CBC 11419 : print_xml_decl(StringInfo buf, const xmlChar *version,
5633 tgl 1543 ECB : pg_enc encoding, int standalone)
5918 peter_e 1544 : {
4228 peter_e 1545 GBC 11419 : if ((version && strcmp((const char *) version, PG_XML_DEFAULT_VERSION) != 0)
5918 peter_e 1546 CBC 11401 : || (encoding && encoding != PG_UTF8)
1547 11401 : || standalone != -1)
1548 : {
1549 48 : appendStringInfoString(buf, "<?xml");
1550 :
1551 48 : if (version)
1552 36 : appendStringInfo(buf, " version=\"%s\"", version);
1553 : else
5918 peter_e 1554 GIC 12 : appendStringInfo(buf, " version=\"%s\"", PG_XML_DEFAULT_VERSION);
1555 :
1556 48 : if (encoding && encoding != PG_UTF8)
1557 : {
1558 : /*
1559 : * XXX might be useful to convert this to IANA names (ISO-8859-1
1560 : * instead of LATIN1 etc.); needs field experience
1561 : */
5633 tgl 1562 LBC 0 : appendStringInfo(buf, " encoding=\"%s\"",
5633 tgl 1563 ECB : pg_encoding_to_char(encoding));
1564 : }
5918 peter_e 1565 :
5918 peter_e 1566 GIC 48 : if (standalone == 1)
5918 peter_e 1567 CBC 24 : appendStringInfoString(buf, " standalone=\"yes\"");
5918 peter_e 1568 GIC 24 : else if (standalone == 0)
1569 12 : appendStringInfoString(buf, " standalone=\"no\"");
5918 peter_e 1570 CBC 48 : appendStringInfoString(buf, "?>");
5918 peter_e 1571 ECB :
5918 peter_e 1572 CBC 48 : return true;
5918 peter_e 1573 EUB : }
5918 peter_e 1574 ECB : else
5918 peter_e 1575 CBC 11371 : return false;
5918 peter_e 1576 ECB : }
5918 peter_e 1577 EUB :
1478 tgl 1578 ECB : /*
1579 : * Test whether an input that is to be parsed as CONTENT contains a DTD.
1580 : *
1581 : * The SQL/XML:2003 definition of CONTENT ("XMLDecl? content") is not
1582 : * satisfied by a document with a DTD, which is a bit of a wart, as it means
1583 : * the CONTENT type is not a proper superset of DOCUMENT. SQL/XML:2006 and
1584 : * later fix that, by redefining content with reference to the "more
1585 : * permissive" Document Node of the XQuery/XPath Data Model, such that any
1586 : * DOCUMENT value is indeed also a CONTENT value. That definition is more
1478 tgl 1587 EUB : * useful, as CONTENT becomes usable for parsing input of unknown form (think
1588 : * pg_restore).
1478 tgl 1589 ECB : *
1590 : * As used below in parse_xml when parsing for CONTENT, libxml does not give
1591 : * us the 2006+ behavior, but only the 2003; it will choke if the input has
1592 : * a DTD. But we can provide the 2006+ definition of CONTENT easily enough,
1593 : * by detecting this case first and simply doing the parse as DOCUMENT.
1478 tgl 1594 EUB : *
1595 : * A DTD can be found arbitrarily far in, but that would be a contrived case;
1596 : * it will ordinarily start within a few dozen characters. The only things
1478 tgl 1597 ECB : * that can precede it are an XMLDecl (here, the caller will have called
1598 : * parse_xml_decl already), whitespace, comments, and processing instructions.
1599 : * This function need only return true if it sees a valid sequence of such
1600 : * things leading to <!DOCTYPE. It can simply return false in any other
1601 : * cases, including malformed input; that will mean the input gets parsed as
1602 : * CONTENT as originally planned, with libxml reporting any errors.
1603 : *
1604 : * This is only to be called from xml_parse, when pg_xml_init has already
1478 tgl 1605 EUB : * been called. The input is already in UTF8 encoding.
1478 tgl 1606 ECB : */
1607 : static bool
1478 tgl 1608 GIC 467 : xml_doctype_in_content(const xmlChar *str)
1478 tgl 1609 ECB : {
1478 tgl 1610 CBC 467 : const xmlChar *p = str;
1611 :
1612 : for (;;)
1613 18 : {
1478 tgl 1614 ECB : const xmlChar *e;
1478 tgl 1615 EUB :
1478 tgl 1616 GIC 532 : SKIP_XML_SPACE(p);
1478 tgl 1617 CBC 485 : if (*p != '<')
1478 tgl 1618 GBC 97 : return false;
1478 tgl 1619 CBC 388 : p++;
1620 :
1478 tgl 1621 GIC 388 : if (*p == '!')
1478 tgl 1622 EUB : {
1478 tgl 1623 GIC 36 : p++;
1624 :
1625 : /* if we see <!DOCTYPE, we can return true */
1478 tgl 1626 CBC 36 : if (xmlStrncmp(p, (xmlChar *) "DOCTYPE", 7) == 0)
1478 tgl 1627 GIC 21 : return true;
1628 :
1629 : /* otherwise, if it's not a comment, fail */
1478 tgl 1630 CBC 15 : if (xmlStrncmp(p, (xmlChar *) "--", 2) != 0)
1478 tgl 1631 LBC 0 : return false;
1478 tgl 1632 ECB : /* find end of comment: find -- and a > must follow */
1478 tgl 1633 GIC 15 : p = xmlStrstr(p + 2, (xmlChar *) "--");
1478 tgl 1634 CBC 15 : if (!p || p[2] != '>')
1478 tgl 1635 LBC 0 : return false;
1478 tgl 1636 ECB : /* advance over comment, and keep scanning */
1478 tgl 1637 CBC 15 : p += 3;
1478 tgl 1638 GBC 15 : continue;
1478 tgl 1639 ECB : }
1640 :
1641 : /* otherwise, if it's not a PI <?target something?>, fail */
1478 tgl 1642 CBC 352 : if (*p != '?')
1478 tgl 1643 GIC 349 : return false;
1478 tgl 1644 CBC 3 : p++;
1478 tgl 1645 ECB :
1646 : /* find end of PI (the string ?> is forbidden within a PI) */
1478 tgl 1647 GIC 3 : e = xmlStrstr(p, (xmlChar *) "?>");
1478 tgl 1648 CBC 3 : if (!e)
1478 tgl 1649 LBC 0 : return false;
1650 :
1478 tgl 1651 ECB : /* advance over PI, keep scanning */
1478 tgl 1652 CBC 3 : p = e + 2;
1478 tgl 1653 ECB : }
1654 : }
1655 :
5918 peter_e 1656 :
1657 : /*
1658 : * Convert a text object to XML internal representation
1659 : *
1660 : * data is the source data (must not be toasted!), encoding is its encoding,
1661 : * and xmloption_arg and preserve_whitespace are options for the
1662 : * transformation.
1663 : *
1664 : * If parsed_xmloptiontype isn't NULL, *parsed_xmloptiontype is set to the
1665 : * XmlOptionType actually used to parse the input (typically the same as
1666 : * xmloption_arg, but a DOCTYPE node in the input can force DOCUMENT mode).
1667 : *
1668 : * If parsed_nodes isn't NULL and the input is not an XML document, the list
1669 : * of parsed nodes from the xmlParseBalancedChunkMemory call will be returned
1670 : * to *parsed_nodes.
1671 : *
1672 : * Errors normally result in ereport(ERROR), but if escontext is an
1673 : * ErrorSaveContext, then "safe" errors are reported there instead, and the
1674 : * caller must check SOFT_ERROR_OCCURRED() to see whether that happened.
1675 : *
5079 tgl 1676 : * Note: it is caller's responsibility to xmlFreeDoc() the result,
1677 : * else a permanent memory leak will ensue! But note the result could
1678 : * be NULL after a soft error.
1679 : *
1680 : * TODO maybe libxml2's xmlreader is better? (do not construct DOM,
5563 1681 : * yet do not use SAX - see xmlreader.c)
5953 peter_e 1682 EUB : */
5953 peter_e 1683 ECB : static xmlDocPtr
25 tgl 1684 GNC 617 : xml_parse(text *data, XmlOptionType xmloption_arg,
1685 : bool preserve_whitespace, int encoding,
1686 : XmlOptionType *parsed_xmloptiontype, xmlNodePtr *parsed_nodes,
1687 : Node *escontext)
5953 peter_e 1688 ECB : {
1689 : int32 len;
5624 bruce 1690 : xmlChar *string;
1691 : xmlChar *utf8string;
4281 tgl 1692 EUB : PgXmlErrorContext *xmlerrcxt;
4281 tgl 1693 GIC 617 : volatile xmlParserCtxtPtr ctxt = NULL;
4281 tgl 1694 CBC 617 : volatile xmlDocPtr doc = NULL;
5953 peter_e 1695 ECB :
1696 : /*
1697 : * This step looks annoyingly redundant, but we must do it to have a
1698 : * null-terminated string in case encoding conversion isn't required.
1699 : */
2118 tgl 1700 GIC 617 : len = VARSIZE_ANY_EXHDR(data); /* will be useful later */
5953 peter_e 1701 CBC 617 : string = xml_text2xmlChar(data);
1702 :
1703 : /*
1704 : * If the data isn't UTF8, we must translate before giving it to libxml.
1705 : *
1706 : * XXX ideally, we'd catch any encoding conversion failure and return a
1707 : * soft error. However, failure to convert to UTF8 should be pretty darn
1708 : * rare, so for now this is left undone.
1709 : */
5925 peter_e 1710 GIC 617 : utf8string = pg_do_encoding_conversion(string,
1711 : len,
1712 : encoding,
1713 : PG_UTF8);
1714 :
1715 : /* Start up libxml and its parser */
4281 tgl 1716 617 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_WELLFORMED);
1717 :
1718 : /* Use a TRY block to ensure we clean up correctly */
5079 1719 617 : PG_TRY();
1720 : {
1478 1721 617 : bool parse_as_document = false;
1722 : int res_code;
1723 617 : size_t count = 0;
1724 617 : xmlChar *version = NULL;
1725 617 : int standalone = 0;
1726 :
1727 : /* Any errors here are reported as hard ereport's */
4281 tgl 1728 CBC 617 : xmlInitParser();
1729 :
4281 tgl 1730 GIC 617 : ctxt = xmlNewParserCtxt();
4281 tgl 1731 CBC 617 : if (ctxt == NULL || xmlerrcxt->err_occurred)
4281 tgl 1732 LBC 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4281 tgl 1733 ECB : "could not allocate parser context");
1734 :
1478 1735 : /* Decide whether to parse as document or content */
5050 bruce 1736 GIC 617 : if (xmloption_arg == XMLOPTION_DOCUMENT)
1478 tgl 1737 CBC 144 : parse_as_document = true;
1478 tgl 1738 ECB : else
1739 : {
1740 : /* Parse and skip over the XML declaration, if any */
1478 tgl 1741 GIC 473 : res_code = parse_xml_decl(utf8string,
1478 tgl 1742 ECB : &count, &version, NULL, &standalone);
1478 tgl 1743 GIC 473 : if (res_code != 0)
1744 : {
114 tgl 1745 GNC 6 : errsave(escontext,
1746 : errcode(ERRCODE_INVALID_XML_CONTENT),
1747 : errmsg_internal("invalid XML content: invalid XML declaration"),
1748 : errdetail_for_xml_code(res_code));
1749 6 : goto fail;
1750 : }
1751 :
1478 tgl 1752 EUB : /* Is there a DOCTYPE element? */
1478 tgl 1753 GIC 467 : if (xml_doctype_in_content(utf8string + count))
1754 21 : parse_as_document = true;
1755 : }
1478 tgl 1756 ECB :
1757 : /* initialize output parameters */
25 tgl 1758 GNC 611 : if (parsed_xmloptiontype != NULL)
1759 66 : *parsed_xmloptiontype = parse_as_document ? XMLOPTION_DOCUMENT :
1760 : XMLOPTION_CONTENT;
1761 611 : if (parsed_nodes != NULL)
1762 66 : *parsed_nodes = NULL;
1763 :
1478 tgl 1764 CBC 611 : if (parse_as_document)
5079 tgl 1765 ECB : {
5050 bruce 1766 : /*
1767 : * Note, that here we try to apply DTD defaults
1768 : * (XML_PARSE_DTDATTR) according to SQL/XML:2008 GR 10.16.7.d:
4382 1769 : * 'Default values defined by internal DTD are applied'. As for
1770 : * external DTDs, we try to support them too, (see SQL/XML:2008 GR
1771 : * 10.16.7.e)
5050 1772 : */
5050 bruce 1773 GIC 165 : doc = xmlCtxtReadDoc(ctxt, utf8string,
1774 : NULL,
1775 : "UTF-8",
1776 : XML_PARSE_NOENT | XML_PARSE_DTDATTR
1777 : | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS));
4281 tgl 1778 165 : if (doc == NULL || xmlerrcxt->err_occurred)
1779 : {
1780 : /* Use original option to decide which error code to report */
1478 1781 72 : if (xmloption_arg == XMLOPTION_DOCUMENT)
114 tgl 1782 GNC 69 : xml_errsave(escontext, xmlerrcxt,
1783 : ERRCODE_INVALID_XML_DOCUMENT,
1784 : "invalid XML document");
1785 : else
1786 3 : xml_errsave(escontext, xmlerrcxt,
1787 : ERRCODE_INVALID_XML_CONTENT,
1788 : "invalid XML content");
1789 48 : goto fail;
1790 : }
1791 : }
1792 : else
1793 : {
5050 bruce 1794 GIC 446 : doc = xmlNewDoc(version);
114 tgl 1795 GNC 446 : if (doc == NULL || xmlerrcxt->err_occurred)
114 tgl 1796 UNC 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1797 : "could not allocate XML document");
1798 :
5050 bruce 1799 GIC 446 : Assert(doc->encoding == NULL);
1800 446 : doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
114 tgl 1801 GNC 446 : if (doc->encoding == NULL || xmlerrcxt->err_occurred)
114 tgl 1802 UNC 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1803 : "could not allocate XML document");
5050 bruce 1804 GIC 446 : doc->standalone = standalone;
1805 :
1806 : /* allow empty content */
3134 peter_e 1807 446 : if (*(utf8string + count))
1808 : {
1809 868 : res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0,
25 tgl 1810 GNC 434 : utf8string + count,
1811 : parsed_nodes);
3134 peter_e 1812 GIC 434 : if (res_code != 0 || xmlerrcxt->err_occurred)
1813 : {
114 tgl 1814 GNC 30 : xml_errsave(escontext, xmlerrcxt,
1815 : ERRCODE_INVALID_XML_CONTENT,
1816 : "invalid XML content");
1817 6 : goto fail;
1818 : }
1819 : }
5079 tgl 1820 ECB : }
1821 :
114 tgl 1822 GNC 569 : fail:
1823 : ;
1824 : }
5079 tgl 1825 CBC 48 : PG_CATCH();
1826 : {
4281 tgl 1827 GIC 48 : if (doc != NULL)
4281 tgl 1828 CBC 24 : xmlFreeDoc(doc);
4281 tgl 1829 GIC 48 : if (ctxt != NULL)
1830 48 : xmlFreeParserCtxt(ctxt);
4281 tgl 1831 ECB :
4281 tgl 1832 CBC 48 : pg_xml_done(xmlerrcxt, true);
4281 tgl 1833 ECB :
5079 tgl 1834 CBC 48 : PG_RE_THROW();
1835 : }
1836 569 : PG_END_TRY();
1837 :
5563 1838 569 : xmlFreeParserCtxt(ctxt);
1839 :
4281 tgl 1840 GIC 569 : pg_xml_done(xmlerrcxt, false);
4281 tgl 1841 ECB :
5953 peter_e 1842 CBC 569 : return doc;
1843 : }
1844 :
5953 peter_e 1845 ECB :
5951 tgl 1846 EUB : /*
1847 : * xmlChar<->text conversions
5953 peter_e 1848 ECB : */
1849 : static xmlChar *
5953 peter_e 1850 GBC 671 : xml_text2xmlChar(text *in)
1851 : {
5453 tgl 1852 CBC 671 : return (xmlChar *) text_to_cstring(in);
5953 peter_e 1853 ECB : }
1854 :
1855 :
1856 : #ifdef USE_LIBXMLCONTEXT
5079 tgl 1857 :
5563 1858 : /*
5079 1859 : * Manage the special context used for all libxml allocations (but only
1860 : * in special debug builds; see notes at top of file)
1861 : */
5563 1862 : static void
1863 : xml_memory_init(void)
5563 tgl 1864 EUB : {
1865 : /* Create memory context if not there already */
1866 : if (LibxmlContext == NULL)
5563 tgl 1867 ECB : LibxmlContext = AllocSetContextCreate(TopMemoryContext,
1868 : "Libxml context",
1869 : ALLOCSET_DEFAULT_SIZES);
1870 :
1871 : /* Re-establish the callbacks even if already set */
1872 : xmlMemSetup(xml_pfree, xml_palloc, xml_repalloc, xml_pstrdup);
1873 : }
1874 :
1875 : /*
1876 : * Wrappers for memory management functions
1877 : */
1878 : static void *
1879 : xml_palloc(size_t size)
1880 : {
1881 : return MemoryContextAlloc(LibxmlContext, size);
1882 : }
1883 :
1884 :
1885 : static void *
1886 : xml_repalloc(void *ptr, size_t size)
1887 : {
1888 : return repalloc(ptr, size);
1889 : }
1890 :
1891 :
1892 : static void
1893 : xml_pfree(void *ptr)
1894 : {
1895 : /* At least some parts of libxml assume xmlFree(NULL) is allowed */
1896 : if (ptr)
1897 : pfree(ptr);
1898 : }
5953 peter_e 1899 :
1900 :
1901 : static char *
1902 : xml_pstrdup(const char *string)
1903 : {
1904 : return MemoryContextStrdup(LibxmlContext, string);
1905 : }
1906 : #endif /* USE_LIBXMLCONTEXT */
1907 :
1908 :
3890 tgl 1909 : /*
1910 : * xmlPgEntityLoader --- entity loader callback function
1911 : *
1912 : * Silently prevent any external entity URL from being loaded. We don't want
1913 : * to throw an error, so instead make the entity appear to expand to an empty
1914 : * string.
1915 : *
1916 : * We would prefer to allow loading entities that exist in the system's
1917 : * global XML catalog; but the available libxml2 APIs make that a complex
1918 : * and fragile task. For now, just shut down all external access.
1919 : */
1920 : static xmlParserInputPtr
3890 tgl 1921 GIC 9 : xmlPgEntityLoader(const char *URL, const char *ID,
1922 : xmlParserCtxtPtr ctxt)
1923 : {
1924 9 : return xmlNewStringInputStream(ctxt, (const xmlChar *) "");
3890 tgl 1925 ECB : }
1926 :
1927 :
1928 : /*
1929 : * xml_ereport --- report an XML-related error
1930 : *
4785 1931 : * The "msg" is the SQL-level message; some can be adopted from the SQL/XML
1932 : * standard. This function adds libxml's native error message, if any, as
1933 : * detail.
1934 : *
1935 : * This is exported for modules that want to share the core libxml error
1936 : * handler. Note that pg_xml_init() *must* have been called previously.
1937 : */
1938 : void
4281 tgl 1939 CBC 7 : xml_ereport(PgXmlErrorContext *errcxt, int level, int sqlcode, const char *msg)
5953 peter_e 1940 ECB : {
1941 : char *detail;
1942 :
4281 tgl 1943 : /* Defend against someone passing us a bogus context struct */
4281 tgl 1944 GIC 7 : if (errcxt->magic != ERRCXT_MAGIC)
4281 tgl 1945 LBC 0 : elog(ERROR, "xml_ereport called with invalid PgXmlErrorContext");
4281 tgl 1946 ECB :
4281 tgl 1947 EUB : /* Flag that the current libxml error has been reported */
4281 tgl 1948 GIC 7 : errcxt->err_occurred = false;
1949 :
1950 : /* Include detail only if we have some text from libxml */
4281 tgl 1951 CBC 7 : if (errcxt->err_buf.len > 0)
1952 6 : detail = errcxt->err_buf.data;
1953 : else
5920 peter_e 1954 GIC 1 : detail = NULL;
1955 :
4281 tgl 1956 CBC 7 : ereport(level,
1957 : (errcode(sqlcode),
4281 tgl 1958 ECB : errmsg_internal("%s", msg),
1959 : detail ? errdetail_internal("%s", detail) : 0));
4281 tgl 1960 LBC 0 : }
1961 :
1962 :
1963 : /*
1964 : * xml_errsave --- save an XML-related error
1965 : *
1966 : * If escontext is an ErrorSaveContext, error details are saved into it,
1967 : * and control returns normally.
1968 : *
1969 : * Otherwise, the error is thrown, so that this is equivalent to
1970 : * xml_ereport() with level == ERROR.
1971 : *
1972 : * This should be used only for errors that we're sure we do not need
1973 : * a transaction abort to clean up after.
1974 : */
1975 : static void
114 tgl 1976 GNC 102 : xml_errsave(Node *escontext, PgXmlErrorContext *errcxt,
1977 : int sqlcode, const char *msg)
1978 : {
1979 : char *detail;
1980 :
1981 : /* Defend against someone passing us a bogus context struct */
1982 102 : if (errcxt->magic != ERRCXT_MAGIC)
114 tgl 1983 UNC 0 : elog(ERROR, "xml_errsave called with invalid PgXmlErrorContext");
1984 :
1985 : /* Flag that the current libxml error has been reported */
114 tgl 1986 GNC 102 : errcxt->err_occurred = false;
1987 :
1988 : /* Include detail only if we have some text from libxml */
1989 102 : if (errcxt->err_buf.len > 0)
1990 102 : detail = errcxt->err_buf.data;
1991 : else
114 tgl 1992 UNC 0 : detail = NULL;
1993 :
114 tgl 1994 GNC 102 : errsave(escontext,
1995 : (errcode(sqlcode),
1996 : errmsg_internal("%s", msg),
1997 : detail ? errdetail_internal("%s", detail) : 0));
1998 54 : }
1999 :
2000 :
2001 : /*
4281 tgl 2002 ECB : * Error handler for libxml errors and warnings
2003 : */
2004 : static void
4281 tgl 2005 GIC 199 : xml_errorHandler(void *data, xmlErrorPtr error)
4281 tgl 2006 ECB : {
4281 tgl 2007 CBC 199 : PgXmlErrorContext *xmlerrcxt = (PgXmlErrorContext *) data;
4281 tgl 2008 GIC 199 : xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) error->ctxt;
2009 199 : xmlParserInputPtr input = (ctxt != NULL) ? ctxt->input : NULL;
3955 bruce 2010 199 : xmlNodePtr node = error->node;
4281 tgl 2011 CBC 199 : const xmlChar *name = (node != NULL &&
2118 2012 199 : node->type == XML_ELEMENT_NODE) ? node->name : NULL;
4281 tgl 2013 GIC 199 : int domain = error->domain;
4281 tgl 2014 CBC 199 : int level = error->level;
4281 tgl 2015 ECB : StringInfo errorBuf;
2016 :
4275 2017 : /*
2018 : * Defend against someone passing us a bogus context struct.
2019 : *
2020 : * We force a backend exit if this check fails because longjmp'ing out of
2021 : * libxml would likely render it unsafe to use further.
2022 : */
4281 tgl 2023 GIC 199 : if (xmlerrcxt->magic != ERRCXT_MAGIC)
4275 tgl 2024 UIC 0 : elog(FATAL, "xml_errorHandler called with invalid PgXmlErrorContext");
2025 :
4281 tgl 2026 ECB : /*----------
2027 : * Older libxml versions report some errors differently.
2028 : * First, some errors were previously reported as coming from the parser
2029 : * domain but are now reported as coming from the namespace domain.
2030 : * Second, some warnings were upgraded to errors.
2031 : * We attempt to compensate for that here.
2032 : *----------
2033 : */
4281 tgl 2034 CBC 199 : switch (error->code)
5953 peter_e 2035 ECB : {
4281 tgl 2036 GIC 15 : case XML_WAR_NS_URI:
2037 15 : level = XML_ERR_ERROR;
2038 15 : domain = XML_FROM_NAMESPACE;
4281 tgl 2039 CBC 15 : break;
2040 :
4281 tgl 2041 GIC 27 : case XML_ERR_NS_DECL_ERROR:
4281 tgl 2042 ECB : case XML_WAR_NS_URI_RELATIVE:
2043 : case XML_WAR_NS_COLUMN:
2044 : case XML_NS_ERR_XML_NAMESPACE:
2045 : case XML_NS_ERR_UNDEFINED_NAMESPACE:
2046 : case XML_NS_ERR_QNAME:
2047 : case XML_NS_ERR_ATTRIBUTE_REDEFINED:
2048 : case XML_NS_ERR_EMPTY:
4281 tgl 2049 GBC 27 : domain = XML_FROM_NAMESPACE;
4281 tgl 2050 GIC 27 : break;
2051 : }
5920 peter_e 2052 ECB :
4281 tgl 2053 : /* Decide whether to act on the error or not */
4281 tgl 2054 CBC 199 : switch (domain)
4281 tgl 2055 EUB : {
4281 tgl 2056 GIC 157 : case XML_FROM_PARSER:
4281 tgl 2057 ECB : case XML_FROM_NONE:
2058 : case XML_FROM_MEMORY:
2059 : case XML_FROM_IO:
3602 bruce 2060 :
2061 : /*
3890 tgl 2062 : * Suppress warnings about undeclared entities. We need to do
2063 : * this to avoid problems due to not loading DTD definitions.
2064 : */
3890 tgl 2065 CBC 157 : if (error->code == XML_WAR_UNDECLARED_ENTITY)
3890 tgl 2066 GIC 3 : return;
3890 tgl 2067 ECB :
2068 : /* Otherwise, accept error regardless of the parsing purpose */
4281 tgl 2069 GIC 154 : break;
5920 peter_e 2070 ECB :
4281 tgl 2071 GIC 42 : default:
2072 : /* Ignore error if only doing well-formedness check */
2073 42 : if (xmlerrcxt->strictness == PG_XML_STRICTNESS_WELLFORMED)
2074 33 : return;
4281 tgl 2075 CBC 9 : break;
2076 : }
2077 :
4281 tgl 2078 ECB : /* Prepare error message in errorBuf */
4281 tgl 2079 GIC 163 : errorBuf = makeStringInfo();
4281 tgl 2080 ECB :
4281 tgl 2081 CBC 163 : if (error->line > 0)
2082 163 : appendStringInfo(errorBuf, "line %d: ", error->line);
2083 163 : if (name != NULL)
4281 tgl 2084 UIC 0 : appendStringInfo(errorBuf, "element %s: ", name);
1521 tgl 2085 CBC 163 : if (error->message != NULL)
1521 tgl 2086 GIC 163 : appendStringInfoString(errorBuf, error->message);
1521 tgl 2087 ECB : else
1521 tgl 2088 UIC 0 : appendStringInfoString(errorBuf, "(no message provided)");
4281 tgl 2089 ECB :
2090 : /*
2091 : * Append context information to errorBuf.
2092 : *
2093 : * xmlParserPrintFileContext() uses libxml's "generic" error handler to
2094 : * write the context. Since we don't want to duplicate libxml
2095 : * functionality here, we set up a generic error handler temporarily.
2096 : *
2097 : * We use appendStringInfo() directly as libxml's generic error handler.
2098 : * This should work because it has essentially the same signature as
2099 : * libxml expects, namely (void *ptr, const char *msg, ...).
2100 : */
4281 tgl 2101 GIC 163 : if (input != NULL)
2102 : {
4281 tgl 2103 CBC 163 : xmlGenericErrorFunc errFuncSaved = xmlGenericError;
3955 bruce 2104 GIC 163 : void *errCtxSaved = xmlGenericErrorContext;
4281 tgl 2105 ECB :
4281 tgl 2106 GIC 163 : xmlSetGenericErrorFunc((void *) errorBuf,
2107 : (xmlGenericErrorFunc) appendStringInfo);
2108 :
2109 : /* Add context information to errorBuf */
2110 163 : appendStringInfoLineSeparator(errorBuf);
2111 :
2112 163 : xmlParserPrintFileContext(input);
2113 :
2114 : /* Restore generic error func */
2115 163 : xmlSetGenericErrorFunc(errCtxSaved, errFuncSaved);
2116 : }
2117 :
2118 : /* Get rid of any trailing newlines in errorBuf */
2119 163 : chopStringInfoNewlines(errorBuf);
2120 :
2121 : /*
2122 : * Legacy error handling mode. err_occurred is never set, we just add the
2123 : * message to err_buf. This mode exists because the xml2 contrib module
2124 : * uses our error-handling infrastructure, but we don't want to change its
2125 : * behaviour since it's deprecated anyway. This is also why we don't
2126 : * distinguish between notices, warnings and errors here --- the old-style
2127 : * generic error handler wouldn't have done that either.
2128 : */
2129 163 : if (xmlerrcxt->strictness == PG_XML_STRICTNESS_LEGACY)
2130 : {
2131 1 : appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
1356 drowley 2132 1 : appendBinaryStringInfo(&xmlerrcxt->err_buf, errorBuf->data,
2133 : errorBuf->len);
2134 :
4281 tgl 2135 1 : pfree(errorBuf->data);
2136 1 : pfree(errorBuf);
2137 1 : return;
2138 : }
2139 :
2140 : /*
2141 : * We don't want to ereport() here because that'd probably leave libxml in
2142 : * an inconsistent state. Instead, we remember the error and ereport()
2143 : * from xml_ereport().
2144 : *
2145 : * Warnings and notices can be reported immediately since they won't cause
2146 : * a longjmp() out of libxml.
2147 : */
2148 162 : if (level >= XML_ERR_ERROR)
2149 : {
2150 159 : appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
1356 drowley 2151 159 : appendBinaryStringInfo(&xmlerrcxt->err_buf, errorBuf->data,
2152 : errorBuf->len);
2153 :
4281 tgl 2154 159 : xmlerrcxt->err_occurred = true;
2155 : }
2156 3 : else if (level >= XML_ERR_WARNING)
2157 : {
2158 3 : ereport(WARNING,
2159 : (errmsg_internal("%s", errorBuf->data)));
2160 : }
2161 : else
2162 : {
4281 tgl 2163 UIC 0 : ereport(NOTICE,
2164 : (errmsg_internal("%s", errorBuf->data)));
2165 : }
2166 :
4281 tgl 2167 GIC 162 : pfree(errorBuf->data);
2168 162 : pfree(errorBuf);
2169 : }
2170 :
2171 :
2172 : /*
2173 : * Convert libxml error codes into textual errdetail messages.
2174 : *
2175 : * This should be called within an ereport or errsave invocation,
2176 : * just as errdetail would be.
2177 : *
2178 : * At the moment, we only need to cover those codes that we
5920 peter_e 2179 ECB : * may raise in this file.
2180 : */
2181 : static int
114 tgl 2182 GNC 3 : errdetail_for_xml_code(int code)
2183 : {
2184 : const char *det;
2185 :
5624 bruce 2186 GIC 3 : switch (code)
2187 : {
5920 peter_e 2188 UIC 0 : case XML_ERR_INVALID_CHAR:
5494 tgl 2189 0 : det = gettext_noop("Invalid character value.");
5920 peter_e 2190 0 : break;
2191 0 : case XML_ERR_SPACE_REQUIRED:
5494 tgl 2192 0 : det = gettext_noop("Space required.");
5920 peter_e 2193 LBC 0 : break;
5920 peter_e 2194 GIC 3 : case XML_ERR_STANDALONE_VALUE:
5494 tgl 2195 3 : det = gettext_noop("standalone accepts only 'yes' or 'no'.");
5920 peter_e 2196 3 : break;
5920 peter_e 2197 UIC 0 : case XML_ERR_VERSION_MISSING:
5494 tgl 2198 LBC 0 : det = gettext_noop("Malformed declaration: missing version.");
5920 peter_e 2199 UBC 0 : break;
5920 peter_e 2200 UIC 0 : case XML_ERR_MISSING_ENCODING:
5494 tgl 2201 0 : det = gettext_noop("Missing encoding in text declaration.");
5920 peter_e 2202 LBC 0 : break;
5920 peter_e 2203 UIC 0 : case XML_ERR_XMLDECL_NOT_FINISHED:
5494 tgl 2204 0 : det = gettext_noop("Parsing XML declaration: '?>' expected.");
5920 peter_e 2205 LBC 0 : break;
5624 bruce 2206 0 : default:
5494 tgl 2207 UIC 0 : det = gettext_noop("Unrecognized libxml error code: %d.");
5950 tgl 2208 LBC 0 : break;
2209 : }
5951 tgl 2210 ECB :
114 tgl 2211 GNC 3 : return errdetail(det, code);
2212 : }
2213 :
2214 :
2215 : /*
2216 : * Remove all trailing newlines from a StringInfo string
2217 : */
2218 : static void
4281 tgl 2219 GIC 486 : chopStringInfoNewlines(StringInfo str)
2220 : {
2221 812 : while (str->len > 0 && str->data[str->len - 1] == '\n')
2222 326 : str->data[--str->len] = '\0';
2223 486 : }
2224 :
2225 :
2226 : /*
4281 tgl 2227 ECB : * Append a newline after removing any existing trailing newlines
2228 : */
2229 : static void
4281 tgl 2230 GIC 323 : appendStringInfoLineSeparator(StringInfo str)
2231 : {
2232 323 : chopStringInfoNewlines(str);
4281 tgl 2233 CBC 323 : if (str->len > 0)
4281 tgl 2234 GBC 214 : appendStringInfoChar(str, '\n');
4281 tgl 2235 GIC 323 : }
2236 :
4281 tgl 2237 ECB :
2238 : /*
2239 : * Convert one char in the current server encoding to a Unicode codepoint.
5953 peter_e 2240 : */
2241 : static pg_wchar
1986 peter_e 2242 GIC 9140 : sqlchar_to_unicode(const char *s)
5953 peter_e 2243 EUB : {
2244 : char *utf8string;
5624 bruce 2245 ECB : pg_wchar ret[2]; /* need space for trailing zero */
2246 :
2247 : /* note we're not assuming s is null-terminated */
3332 tgl 2248 GIC 9140 : utf8string = pg_server_to_any(s, pg_mblen(s), PG_UTF8);
5953 peter_e 2249 ECB :
5263 tgl 2250 GIC 9140 : pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret,
2251 : pg_encoding_mblen(PG_UTF8, utf8string));
2252 :
2253 9140 : if (utf8string != s)
5263 tgl 2254 UIC 0 : pfree(utf8string);
2255 :
5950 tgl 2256 CBC 9140 : return ret[0];
2257 : }
5953 peter_e 2258 ECB :
2259 :
2260 : static bool
5953 peter_e 2261 CBC 1819 : is_valid_xml_namefirst(pg_wchar c)
5953 peter_e 2262 ECB : {
2263 : /* (Letter | '_' | ':') */
5953 peter_e 2264 CBC 1822 : return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
2265 3641 : || c == '_' || c == ':');
2266 : }
2267 :
2268 :
2269 : static bool
5953 peter_e 2270 GIC 7321 : is_valid_xml_namechar(pg_wchar c)
2271 : {
2272 : /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
2273 7766 : return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
5953 peter_e 2274 CBC 445 : || xmlIsDigitQ(c)
5953 peter_e 2275 GBC 127 : || c == '.' || c == '-' || c == '_' || c == ':'
5953 peter_e 2276 GIC 6 : || xmlIsCombiningQ(c)
2277 15532 : || xmlIsExtenderQ(c));
2278 : }
2279 : #endif /* USE_LIBXML */
2280 :
2281 :
2282 : /*
2283 : * Map SQL identifier to XML name; see SQL/XML:2008 section 9.1.
2284 : */
5953 peter_e 2285 ECB : char *
1986 peter_e 2286 GIC 1826 : map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped,
5633 tgl 2287 ECB : bool escape_period)
5953 peter_e 2288 : {
5899 magnus 2289 : #ifdef USE_LIBXML
2290 : StringInfoData buf;
2291 : const char *p;
2292 :
2293 : /*
2294 : * SQL/XML doesn't make use of this case anywhere, so it's probably a
2295 : * mistake.
2296 : */
5901 peter_e 2297 GIC 1826 : Assert(fully_escaped || !escape_period);
2298 :
5953 2299 1826 : initStringInfo(&buf);
5953 peter_e 2300 ECB :
5953 peter_e 2301 CBC 10976 : for (p = ident; *p; p += pg_mblen(p))
2302 : {
5953 peter_e 2303 GIC 9150 : if (*p == ':' && (p == ident || fully_escaped))
3447 rhaas 2304 7 : appendStringInfoString(&buf, "_x003A_");
5624 bruce 2305 CBC 9143 : else if (*p == '_' && *(p + 1) == 'x')
3447 rhaas 2306 GIC 3 : appendStringInfoString(&buf, "_x005F_");
5950 tgl 2307 CBC 10784 : else if (fully_escaped && p == ident &&
5950 tgl 2308 GIC 1644 : pg_strncasecmp(p, "xml", 3) == 0)
2309 : {
5953 peter_e 2310 UIC 0 : if (*p == 'x')
3447 rhaas 2311 0 : appendStringInfoString(&buf, "_x0078_");
2312 : else
2313 0 : appendStringInfoString(&buf, "_x0058_");
2314 : }
5901 peter_e 2315 GIC 9140 : else if (escape_period && *p == '.')
3447 rhaas 2316 LBC 0 : appendStringInfoString(&buf, "_x002E_");
5953 peter_e 2317 ECB : else
2318 : {
5624 bruce 2319 GIC 9140 : pg_wchar u = sqlchar_to_unicode(p);
5953 peter_e 2320 ECB :
5950 tgl 2321 GIC 18280 : if ((p == ident)
5950 tgl 2322 CBC 1819 : ? !is_valid_xml_namefirst(u)
5950 tgl 2323 GIC 7321 : : !is_valid_xml_namechar(u))
5950 tgl 2324 CBC 9 : appendStringInfo(&buf, "_x%04X_", (unsigned int) u);
5953 peter_e 2325 ECB : else
5953 peter_e 2326 CBC 9131 : appendBinaryStringInfo(&buf, p, pg_mblen(p));
2327 : }
2328 : }
2329 :
2330 1826 : return buf.data;
2331 : #else /* not USE_LIBXML */
5953 peter_e 2332 ECB : NO_XML_SUPPORT();
2333 : return NULL;
2118 tgl 2334 : #endif /* not USE_LIBXML */
5953 peter_e 2335 EUB : }
5945 peter_e 2336 ECB :
2337 :
2338 : /*
4559 peter_e 2339 EUB : * Map XML name to SQL identifier; see SQL/XML:2008 section 9.3.
2340 : */
2341 : char *
1986 peter_e 2342 GIC 64 : map_xml_name_to_sql_identifier(const char *name)
2343 : {
2344 : StringInfoData buf;
2345 : const char *p;
2346 :
5945 2347 64 : initStringInfo(&buf);
2348 :
2349 352 : for (p = name; *p; p += pg_mblen(p))
2350 : {
5624 bruce 2351 288 : if (*p == '_' && *(p + 1) == 'x'
5624 bruce 2352 CBC 8 : && isxdigit((unsigned char) *(p + 2))
5624 bruce 2353 GIC 8 : && isxdigit((unsigned char) *(p + 3))
5624 bruce 2354 CBC 8 : && isxdigit((unsigned char) *(p + 4))
2355 8 : && isxdigit((unsigned char) *(p + 5))
5624 bruce 2356 GIC 8 : && *(p + 6) == '_')
5945 peter_e 2357 CBC 8 : {
2358 : char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
2359 : unsigned int u;
2360 :
2361 8 : sscanf(p + 2, "%X", &u);
1129 tgl 2362 GIC 8 : pg_unicode_to_server(u, (unsigned char *) cbuf);
1129 tgl 2363 CBC 8 : appendStringInfoString(&buf, cbuf);
5945 peter_e 2364 GIC 8 : p += 6;
2365 : }
5945 peter_e 2366 ECB : else
5945 peter_e 2367 GIC 280 : appendBinaryStringInfo(&buf, p, pg_mblen(p));
2368 : }
2369 :
5945 peter_e 2370 CBC 64 : return buf.data;
2371 : }
2372 :
2373 : /*
2374 : * Map SQL value to XML value; see SQL/XML:2008 section 9.8.
2375 : *
2376 : * When xml_escape_strings is true, then certain characters in string
2377 : * values are replaced by entity references (< etc.), as specified
2378 : * in SQL/XML:2008 section 9.8 GR 9) a) iii). This is normally what is
2379 : * wanted. The false case is mainly useful when the resulting value
5052 peter_e 2380 ECB : * is used with xmlTextWriterWriteAttribute() to write out an
2381 : * attribute, because that function does the escaping itself.
5931 2382 : */
2383 : char *
5052 peter_e 2384 GIC 64943 : map_sql_value_to_xml_value(Datum value, Oid type, bool xml_escape_strings)
2385 : {
4553 tgl 2386 CBC 64943 : if (type_is_array_domain(type))
5931 peter_e 2387 ECB : {
5624 bruce 2388 : ArrayType *array;
2389 : Oid elmtype;
2390 : int16 elmlen;
2391 : bool elmbyval;
2392 : char elmalign;
2393 : int num_elems;
2394 : Datum *elem_values;
2395 : bool *elem_nulls;
2396 : StringInfoData buf;
2397 : int i;
2398 :
5931 peter_e 2399 CBC 3 : array = DatumGetArrayTypeP(value);
5931 peter_e 2400 GIC 3 : elmtype = ARR_ELEMTYPE(array);
5931 peter_e 2401 CBC 3 : get_typlenbyvalalign(elmtype, &elmlen, &elmbyval, &elmalign);
5931 peter_e 2402 ECB :
5633 tgl 2403 GIC 3 : deconstruct_array(array, elmtype,
2404 : elmlen, elmbyval, elmalign,
5633 tgl 2405 ECB : &elem_values, &elem_nulls,
2406 : &num_elems);
5931 peter_e 2407 :
5295 tgl 2408 GIC 3 : initStringInfo(&buf);
5295 tgl 2409 ECB :
5633 tgl 2410 GIC 12 : for (i = 0; i < num_elems; i++)
2411 : {
2412 9 : if (elem_nulls[i])
5633 tgl 2413 UIC 0 : continue;
5931 peter_e 2414 GBC 9 : appendStringInfoString(&buf, "<element>");
5633 tgl 2415 GIC 9 : appendStringInfoString(&buf,
2416 9 : map_sql_value_to_xml_value(elem_values[i],
2417 : elmtype, true));
5931 peter_e 2418 CBC 9 : appendStringInfoString(&buf, "</element>");
5931 peter_e 2419 ECB : }
2420 :
5633 tgl 2421 GIC 3 : pfree(elem_values);
2422 3 : pfree(elem_nulls);
2423 :
5295 2424 3 : return buf.data;
2425 : }
2426 : else
2427 : {
2428 : Oid typeOut;
2429 : bool isvarlena;
2430 : char *str;
2431 :
2432 : /*
3602 bruce 2433 ECB : * Flatten domains; the special-case treatments below should apply to,
2434 : * eg, domains over boolean not just boolean.
2435 : */
3689 tgl 2436 GIC 64940 : type = getBaseType(type);
3689 tgl 2437 ECB :
2438 : /*
5883 peter_e 2439 EUB : * Special XSD formatting for some data types
2440 : */
5883 peter_e 2441 GBC 64940 : switch (type)
5902 peter_e 2442 EUB : {
5883 peter_e 2443 GBC 33 : case BOOLOID:
2444 33 : if (DatumGetBool(value))
5883 peter_e 2445 CBC 30 : return "true";
5883 peter_e 2446 ECB : else
5883 peter_e 2447 CBC 3 : return "false";
5883 peter_e 2448 EUB :
5883 peter_e 2449 GBC 24 : case DATEOID:
5624 bruce 2450 EUB : {
2451 : DateADT date;
2452 : struct pg_tm tm;
2453 : char buf[MAXDATELEN + 1];
5883 peter_e 2454 :
5624 bruce 2455 GBC 24 : date = DatumGetDateADT(value);
5290 tgl 2456 EUB : /* XSD doesn't support infinite values */
5290 tgl 2457 GBC 24 : if (DATE_NOT_FINITE(date))
5290 tgl 2458 UBC 0 : ereport(ERROR,
5290 tgl 2459 EUB : (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2460 : errmsg("date out of range"),
2461 : errdetail("XML does not support infinite date values.")));
5624 bruce 2462 CBC 24 : j2date(date + POSTGRES_EPOCH_JDATE,
2463 : &(tm.tm_year), &(tm.tm_mon), &(tm.tm_mday));
5624 bruce 2464 GIC 24 : EncodeDateOnly(&tm, USE_XSD_DATES, buf);
2465 :
2466 24 : return pstrdup(buf);
2467 : }
2468 :
5883 peter_e 2469 18 : case TIMESTAMPOID:
5624 bruce 2470 ECB : {
2471 : Timestamp timestamp;
2472 : struct pg_tm tm;
2473 : fsec_t fsec;
2474 : char buf[MAXDATELEN + 1];
2475 :
5624 bruce 2476 GIC 18 : timestamp = DatumGetTimestamp(value);
2477 :
2478 : /* XSD doesn't support infinite values */
2479 18 : if (TIMESTAMP_NOT_FINITE(timestamp))
2480 3 : ereport(ERROR,
5624 bruce 2481 ECB : (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2482 : errmsg("timestamp out of range"),
5126 tgl 2483 : errdetail("XML does not support infinite timestamp values.")));
5624 bruce 2484 CBC 15 : else if (timestamp2tm(timestamp, NULL, &tm, &fsec, NULL, NULL) == 0)
4043 peter_e 2485 15 : EncodeDateTime(&tm, fsec, false, 0, NULL, USE_XSD_DATES, buf);
5624 bruce 2486 ECB : else
5624 bruce 2487 UIC 0 : ereport(ERROR,
2488 : (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2489 : errmsg("timestamp out of range")));
2490 :
5624 bruce 2491 GIC 15 : return pstrdup(buf);
2492 : }
5883 peter_e 2493 ECB :
5883 peter_e 2494 GIC 12 : case TIMESTAMPTZOID:
2495 : {
2496 : TimestampTz timestamp;
2497 : struct pg_tm tm;
2498 : int tz;
5624 bruce 2499 ECB : fsec_t fsec;
4042 peter_e 2500 GIC 12 : const char *tzn = NULL;
5624 bruce 2501 ECB : char buf[MAXDATELEN + 1];
2502 :
5624 bruce 2503 GIC 12 : timestamp = DatumGetTimestamp(value);
5624 bruce 2504 ECB :
5624 bruce 2505 EUB : /* XSD doesn't support infinite values */
5624 bruce 2506 GIC 12 : if (TIMESTAMP_NOT_FINITE(timestamp))
5624 bruce 2507 LBC 0 : ereport(ERROR,
2508 : (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2509 : errmsg("timestamp out of range"),
2510 : errdetail("XML does not support infinite timestamp values.")));
5624 bruce 2511 GIC 12 : else if (timestamp2tm(timestamp, &tz, &tm, &fsec, &tzn, NULL) == 0)
4043 peter_e 2512 CBC 12 : EncodeDateTime(&tm, fsec, true, tz, tzn, USE_XSD_DATES, buf);
2513 : else
5624 bruce 2514 UIC 0 : ereport(ERROR,
5624 bruce 2515 ECB : (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2516 : errmsg("timestamp out of range")));
2517 :
5624 bruce 2518 GIC 12 : return pstrdup(buf);
2519 : }
2520 :
5295 tgl 2521 ECB : #ifdef USE_LIBXML
5295 tgl 2522 GIC 18 : case BYTEAOID:
2523 : {
5295 tgl 2524 CBC 18 : bytea *bstr = DatumGetByteaPP(value);
4281 tgl 2525 ECB : PgXmlErrorContext *xmlerrcxt;
4281 tgl 2526 CBC 18 : volatile xmlBufferPtr buf = NULL;
2527 18 : volatile xmlTextWriterPtr writer = NULL;
5295 tgl 2528 ECB : char *result;
2529 :
4281 tgl 2530 GIC 18 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
2531 :
5079 2532 18 : PG_TRY();
2533 : {
2534 18 : buf = xmlBufferCreate();
4281 2535 18 : if (buf == NULL || xmlerrcxt->err_occurred)
4281 tgl 2536 UIC 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
5079 tgl 2537 ECB : "could not allocate xmlBuffer");
5079 tgl 2538 GIC 18 : writer = xmlNewTextWriterMemory(buf, 0);
4281 2539 18 : if (writer == NULL || xmlerrcxt->err_occurred)
4281 tgl 2540 UIC 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
2541 : "could not allocate xmlTextWriter");
2542 :
5079 tgl 2543 GIC 18 : if (xmlbinary == XMLBINARY_BASE64)
2544 15 : xmlTextWriterWriteBase64(writer, VARDATA_ANY(bstr),
2118 2545 15 : 0, VARSIZE_ANY_EXHDR(bstr));
2546 : else
5079 2547 3 : xmlTextWriterWriteBinHex(writer, VARDATA_ANY(bstr),
2118 tgl 2548 CBC 3 : 0, VARSIZE_ANY_EXHDR(bstr));
2549 :
5079 tgl 2550 ECB : /* we MUST do this now to flush data out to the buffer */
5079 tgl 2551 GIC 18 : xmlFreeTextWriter(writer);
5079 tgl 2552 CBC 18 : writer = NULL;
2553 :
2554 18 : result = pstrdup((const char *) xmlBufferContent(buf));
5079 tgl 2555 ECB : }
5079 tgl 2556 LBC 0 : PG_CATCH();
5079 tgl 2557 ECB : {
5079 tgl 2558 LBC 0 : if (writer)
2559 0 : xmlFreeTextWriter(writer);
5079 tgl 2560 UIC 0 : if (buf)
5079 tgl 2561 UBC 0 : xmlBufferFree(buf);
4281 tgl 2562 EUB :
4281 tgl 2563 UIC 0 : pg_xml_done(xmlerrcxt, true);
4281 tgl 2564 EUB :
5079 tgl 2565 UIC 0 : PG_RE_THROW();
5079 tgl 2566 ECB : }
5079 tgl 2567 GBC 18 : PG_END_TRY();
2568 :
5295 tgl 2569 GIC 18 : xmlBufferFree(buf);
5079 tgl 2570 ECB :
4281 tgl 2571 GIC 18 : pg_xml_done(xmlerrcxt, false);
4281 tgl 2572 ECB :
5295 tgl 2573 CBC 18 : return result;
5295 tgl 2574 ECB : }
2118 2575 : #endif /* USE_LIBXML */
2576 :
5902 peter_e 2577 : }
2578 :
2579 : /*
2580 : * otherwise, just use the type's native text representation
5295 tgl 2581 : */
5931 peter_e 2582 GIC 64835 : getTypeOutputInfo(type, &typeOut, &isvarlena);
2583 64835 : str = OidOutputFunctionCall(typeOut, value);
2584 :
2585 : /* ... exactly as-is for XML, and when escaping is not wanted */
5052 2586 64835 : if (type == XMLOID || !xml_escape_strings)
5931 2587 10785 : return str;
2588 :
2589 : /* otherwise, translate special characters as needed */
4990 tgl 2590 54050 : return escape_xml(str);
2591 : }
2592 : }
4990 tgl 2593 ECB :
2594 :
2595 : /*
2596 : * Escape characters in text that have special meanings in XML.
2597 : *
2598 : * Returns a palloc'd string.
2599 : *
2600 : * NB: this is intentionally not dependent on libxml.
2601 : */
2602 : char *
4990 tgl 2603 CBC 54173 : escape_xml(const char *str)
4990 tgl 2604 ECB : {
2605 : StringInfoData buf;
2606 : const char *p;
2607 :
4990 tgl 2608 CBC 54173 : initStringInfo(&buf);
4990 tgl 2609 GIC 334366 : for (p = str; *p; p++)
2610 : {
2611 280193 : switch (*p)
5931 peter_e 2612 ECB : {
4990 tgl 2613 LBC 0 : case '&':
2614 0 : appendStringInfoString(&buf, "&");
2615 0 : break;
4990 tgl 2616 GIC 18 : case '<':
2617 18 : appendStringInfoString(&buf, "<");
4990 tgl 2618 CBC 18 : break;
4990 tgl 2619 GIC 12 : case '>':
2620 12 : appendStringInfoString(&buf, ">");
4990 tgl 2621 CBC 12 : break;
4990 tgl 2622 UIC 0 : case '\r':
2623 0 : appendStringInfoString(&buf, "
");
2624 0 : break;
4990 tgl 2625 GIC 280163 : default:
2626 280163 : appendStringInfoCharMacro(&buf, *p);
2627 280163 : break;
2628 : }
2629 : }
2630 54173 : return buf.data;
2631 : }
2632 :
2633 :
2634 : static char *
5896 peter_e 2635 CBC 12 : _SPI_strdup(const char *s)
2636 : {
5566 neilc 2637 12 : size_t len = strlen(s) + 1;
5566 neilc 2638 GIC 12 : char *ret = SPI_palloc(len);
2639 :
2640 12 : memcpy(ret, s, len);
5896 peter_e 2641 12 : return ret;
2642 : }
2643 :
2644 :
2645 : /*
2646 : * SQL to XML mapping functions
2647 : *
2648 : * What follows below was at one point intentionally organized so that
2649 : * you can read along in the SQL/XML standard. The functions are
4559 peter_e 2650 ECB : * mostly split up the way the clauses lay out in the standards
5852 2651 : * document, and the identifiers are also aligned with the standard
4559 2652 : * text. Unfortunately, SQL/XML:2006 reordered the clauses
2653 : * differently than SQL/XML:2003, so the order below doesn't make much
2654 : * sense anymore.
2655 : *
2656 : * There are many things going on there:
2657 : *
2658 : * There are two kinds of mappings: Mapping SQL data (table contents)
5852 2659 : * to XML documents, and mapping SQL structure (the "schema") to XML
2660 : * Schema. And there are functions that do both at the same time.
2661 : *
2662 : * Then you can map a database, a schema, or a table, each in both
2663 : * ways. This breaks down recursively: Mapping a database invokes
5852 peter_e 2664 EUB : * mapping schemas, which invokes mapping tables, which invokes
5852 peter_e 2665 ECB : * mapping rows, which invokes mapping columns, although you can't
3260 bruce 2666 : * call the last two from the outside. Because of this, there are a
5852 peter_e 2667 : * number of xyz_internal() functions which are to be called both from
2668 : * the function manager wrapper and from some upper layer in a
2669 : * recursive call.
2670 : *
2671 : * See the documentation about what the common function arguments
2672 : * nulls, tableforest, and targetns mean.
2673 : *
2674 : * Some style guidelines for XML output: Use double quotes for quoting
3260 bruce 2675 : * XML attributes. Indent XML elements by two spaces, but remember
2676 : * that a lot of code is called recursively at different levels, so
2677 : * it's better not to indent rather than create output that indents
2678 : * and outdents weirdly. Add newlines to make the output look nice.
2679 : */
2680 :
2681 :
2682 : /*
2683 : * Visibility of objects for XML mappings; see SQL/XML:2008 section
2684 : * 4.10.8.
2685 : */
2686 :
5852 peter_e 2687 : /*
2688 : * Given a query, which must return type oid as first column, produce
2689 : * a list of Oids with the query results.
2690 : */
2691 : static List *
5852 peter_e 2692 CBC 18 : query_to_oid_list(const char *query)
2693 : {
2584 tgl 2694 ECB : uint64 i;
5852 peter_e 2695 CBC 18 : List *list = NIL;
1249 michael 2696 ECB : int spi_result;
2697 :
1249 michael 2698 CBC 18 : spi_result = SPI_execute(query, true, 0);
1249 michael 2699 GIC 18 : if (spi_result != SPI_OK_SELECT)
1249 michael 2700 LBC 0 : elog(ERROR, "SPI_execute returned %s for %s",
2701 : SPI_result_code_string(spi_result), query);
2702 :
5852 peter_e 2703 GIC 54 : for (i = 0; i < SPI_processed; i++)
2704 : {
2705 : Datum oid;
5624 bruce 2706 ECB : bool isnull;
2707 :
5633 tgl 2708 CBC 36 : oid = SPI_getbinval(SPI_tuptable->vals[i],
5633 tgl 2709 GBC 36 : SPI_tuptable->tupdesc,
2710 : 1,
2711 : &isnull);
5633 tgl 2712 GIC 36 : if (!isnull)
5633 tgl 2713 CBC 36 : list = lappend_oid(list, DatumGetObjectId(oid));
2714 : }
5852 peter_e 2715 ECB :
5852 peter_e 2716 GIC 18 : return list;
5852 peter_e 2717 ECB : }
2718 :
2719 :
2720 : static List *
5852 peter_e 2721 GIC 18 : schema_get_xml_visible_tables(Oid nspid)
2722 : {
2723 : StringInfoData query;
2724 :
2725 18 : initStringInfo(&query);
2222 tgl 2726 18 : appendStringInfo(&query, "SELECT oid FROM pg_catalog.pg_class"
2222 tgl 2727 ECB : " WHERE relnamespace = %u AND relkind IN ("
2728 : CppAsString2(RELKIND_RELATION) ","
2729 : CppAsString2(RELKIND_MATVIEW) ","
2730 : CppAsString2(RELKIND_VIEW) ")"
2731 : " AND pg_catalog.has_table_privilege (oid, 'SELECT')"
2732 : " ORDER BY relname;", nspid);
2733 :
5852 peter_e 2734 GIC 18 : return query_to_oid_list(query.data);
5852 peter_e 2735 ECB : }
2736 :
2737 :
5624 bruce 2738 EUB : /*
2739 : * Including the system schemas is probably not useful for a database
2740 : * mapping.
2741 : */
5633 tgl 2742 ECB : #define XML_VISIBLE_SCHEMAS_EXCLUDE "(nspname ~ '^pg_' OR nspname = 'information_schema')"
2743 :
2744 : #define XML_VISIBLE_SCHEMAS "SELECT oid FROM pg_catalog.pg_namespace WHERE pg_catalog.has_schema_privilege (oid, 'USAGE') AND NOT " XML_VISIBLE_SCHEMAS_EXCLUDE
5852 peter_e 2745 :
2746 :
2747 : static List *
5852 peter_e 2748 UIC 0 : database_get_xml_visible_schemas(void)
2749 : {
2750 0 : return query_to_oid_list(XML_VISIBLE_SCHEMAS " ORDER BY nspname;");
5852 peter_e 2751 ECB : }
2752 :
2753 :
2754 : static List *
5852 peter_e 2755 UIC 0 : database_get_xml_visible_tables(void)
2756 : {
5852 peter_e 2757 ECB : /* At the moment there is no order required here. */
2222 tgl 2758 UBC 0 : return query_to_oid_list("SELECT oid FROM pg_catalog.pg_class"
2759 : " WHERE relkind IN ("
2760 : CppAsString2(RELKIND_RELATION) ","
2761 : CppAsString2(RELKIND_MATVIEW) ","
2222 tgl 2762 ECB : CppAsString2(RELKIND_VIEW) ")"
2118 2763 : " AND pg_catalog.has_table_privilege(pg_class.oid, 'SELECT')"
2764 : " AND relnamespace IN (" XML_VISIBLE_SCHEMAS ");");
5852 peter_e 2765 EUB : }
2766 :
2767 :
2768 : /*
4559 peter_e 2769 ECB : * Map SQL table to XML and/or XML Schema document; see SQL/XML:2008
2770 : * section 9.11.
2771 : */
2772 :
5852 2773 : static StringInfo
5633 tgl 2774 GIC 48 : table_to_xml_internal(Oid relid,
5633 tgl 2775 ECB : const char *xmlschema, bool nulls, bool tableforest,
2776 : const char *targetns, bool top_level)
5852 peter_e 2777 : {
2778 : StringInfoData query;
2779 :
5852 peter_e 2780 GIC 48 : initStringInfo(&query);
5633 tgl 2781 CBC 48 : appendStringInfo(&query, "SELECT * FROM %s",
2782 : DatumGetCString(DirectFunctionCall1(regclassout,
2118 tgl 2783 ECB : ObjectIdGetDatum(relid))));
5633 tgl 2784 GIC 48 : return query_to_xml_internal(query.data, get_rel_name(relid),
5633 tgl 2785 ECB : xmlschema, nulls, tableforest,
2786 : targetns, top_level);
5852 peter_e 2787 EUB : }
2788 :
5852 peter_e 2789 ECB :
5896 2790 : Datum
5896 peter_e 2791 GBC 18 : table_to_xml(PG_FUNCTION_ARGS)
2792 : {
5896 peter_e 2793 GIC 18 : Oid relid = PG_GETARG_OID(0);
5896 peter_e 2794 CBC 18 : bool nulls = PG_GETARG_BOOL(1);
2795 18 : bool tableforest = PG_GETARG_BOOL(2);
5493 tgl 2796 18 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2797 :
5633 2798 18 : PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid, NULL,
2118 tgl 2799 ECB : nulls, tableforest,
2800 : targetns, true)));
2801 : }
5896 peter_e 2802 :
2803 :
2804 : Datum
5896 peter_e 2805 CBC 5 : query_to_xml(PG_FUNCTION_ARGS)
2806 : {
5493 tgl 2807 GBC 5 : char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
5896 peter_e 2808 GIC 5 : bool nulls = PG_GETARG_BOOL(1);
5896 peter_e 2809 GBC 5 : bool tableforest = PG_GETARG_BOOL(2);
5493 tgl 2810 5 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
5896 peter_e 2811 EUB :
5633 tgl 2812 GBC 5 : PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
2813 : NULL, nulls, tableforest,
2118 tgl 2814 EUB : targetns, true)));
2815 : }
5896 peter_e 2816 :
2817 :
5896 peter_e 2818 ECB : Datum
5896 peter_e 2819 GIC 6 : cursor_to_xml(PG_FUNCTION_ARGS)
5896 peter_e 2820 ECB : {
5493 tgl 2821 GIC 6 : char *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
5896 peter_e 2822 CBC 6 : int32 count = PG_GETARG_INT32(1);
5896 peter_e 2823 GIC 6 : bool nulls = PG_GETARG_BOOL(2);
5896 peter_e 2824 CBC 6 : bool tableforest = PG_GETARG_BOOL(3);
5493 tgl 2825 GIC 6 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(4));
2826 :
2827 : StringInfoData result;
2828 : Portal portal;
2829 : uint64 i;
2830 :
5896 peter_e 2831 6 : initStringInfo(&result);
2832 :
2167 peter_e 2833 CBC 6 : if (!tableforest)
2167 peter_e 2834 ECB : {
2167 peter_e 2835 GIC 3 : xmldata_root_element_start(&result, "table", NULL, targetns, true);
2836 3 : appendStringInfoChar(&result, '\n');
2167 peter_e 2837 ECB : }
2838 :
5896 peter_e 2839 GIC 6 : SPI_connect();
2840 6 : portal = SPI_cursor_find(name);
5896 peter_e 2841 CBC 6 : if (portal == NULL)
5896 peter_e 2842 UIC 0 : ereport(ERROR,
2843 : (errcode(ERRCODE_UNDEFINED_CURSOR),
2844 : errmsg("cursor \"%s\" does not exist", name)));
2845 :
5896 peter_e 2846 GIC 6 : SPI_cursor_fetch(portal, true, count);
2847 24 : for (i = 0; i < SPI_processed; i++)
5633 tgl 2848 18 : SPI_sql_row_to_xmlelement(i, &result, NULL, nulls,
2849 : tableforest, targetns, true);
2850 :
5896 peter_e 2851 6 : SPI_finish();
2852 :
2167 2853 6 : if (!tableforest)
2167 peter_e 2854 CBC 3 : xmldata_root_element_end(&result, "table");
2855 :
5896 peter_e 2856 GIC 6 : PG_RETURN_XML_P(stringinfo_to_xmltype(&result));
2857 : }
2858 :
5896 peter_e 2859 ECB :
5852 2860 : /*
2861 : * Write the start tag of the root element of a data mapping.
2862 : *
2863 : * top_level means that this is the very top level of the eventual
3260 bruce 2864 EUB : * output. For example, when the user calls table_to_xml, then a call
5852 peter_e 2865 : * with a table name to this function is the top level. When the user
2866 : * calls database_to_xml, then a call with a schema name to this
5852 peter_e 2867 ECB : * function is not the top level. If top_level is false, then the XML
2868 : * namespace declarations are omitted, because they supposedly already
3260 bruce 2869 : * appeared earlier in the output. Repeating them is not wrong, but
5852 peter_e 2870 : * it looks ugly.
5633 tgl 2871 : */
5852 peter_e 2872 : static void
5633 tgl 2873 GBC 119 : xmldata_root_element_start(StringInfo result, const char *eltname,
5633 tgl 2874 EUB : const char *xmlschema, const char *targetns,
2875 : bool top_level)
5852 peter_e 2876 ECB : {
2877 : /* This isn't really wrong but currently makes no sense. */
5852 peter_e 2878 CBC 119 : Assert(top_level || !xmlschema);
2879 :
5852 peter_e 2880 GIC 119 : appendStringInfo(result, "<%s", eltname);
5852 peter_e 2881 CBC 119 : if (top_level)
2882 : {
5852 peter_e 2883 GIC 89 : appendStringInfoString(result, " xmlns:xsi=\"" NAMESPACE_XSI "\"");
2884 89 : if (strlen(targetns) > 0)
2885 15 : appendStringInfo(result, " xmlns=\"%s\"", targetns);
5852 peter_e 2886 ECB : }
5852 peter_e 2887 GIC 119 : if (xmlschema)
5852 peter_e 2888 ECB : {
2889 : /* FIXME: better targets */
5852 peter_e 2890 GIC 9 : if (strlen(targetns) > 0)
5852 peter_e 2891 CBC 3 : appendStringInfo(result, " xsi:schemaLocation=\"%s #\"", targetns);
5852 peter_e 2892 ECB : else
3447 rhaas 2893 GIC 6 : appendStringInfoString(result, " xsi:noNamespaceSchemaLocation=\"#\"");
2894 : }
2895 119 : appendStringInfoString(result, ">\n");
5852 peter_e 2896 119 : }
2897 :
2898 :
2899 : static void
2900 119 : xmldata_root_element_end(StringInfo result, const char *eltname)
2901 : {
2902 119 : appendStringInfo(result, "</%s>\n", eltname);
2903 119 : }
2904 :
2905 :
2906 : static StringInfo
5633 tgl 2907 56 : query_to_xml_internal(const char *query, char *tablename,
2908 : const char *xmlschema, bool nulls, bool tableforest,
2909 : const char *targetns, bool top_level)
2910 : {
2911 : StringInfo result;
2912 : char *xmltn;
2913 : uint64 i;
2914 :
5896 peter_e 2915 56 : if (tablename)
2916 48 : xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
2917 : else
2918 8 : xmltn = "table";
2919 :
2920 56 : result = makeStringInfo();
2921 :
2922 56 : SPI_connect();
2923 56 : if (SPI_execute(query, true, 0) != SPI_OK_SELECT)
5896 peter_e 2924 UIC 0 : ereport(ERROR,
2925 : (errcode(ERRCODE_DATA_EXCEPTION),
2926 : errmsg("invalid query")));
2927 :
5896 peter_e 2928 GIC 56 : if (!tableforest)
2929 : {
5633 tgl 2930 26 : xmldata_root_element_start(result, xmltn, xmlschema,
2931 : targetns, top_level);
2838 heikki.linnakangas 2932 26 : appendStringInfoChar(result, '\n');
2933 : }
2934 :
5896 peter_e 2935 56 : if (xmlschema)
2936 15 : appendStringInfo(result, "%s\n\n", xmlschema);
2937 :
5624 bruce 2938 194 : for (i = 0; i < SPI_processed; i++)
5633 tgl 2939 138 : SPI_sql_row_to_xmlelement(i, result, tablename, nulls,
2940 : tableforest, targetns, top_level);
2941 :
5896 peter_e 2942 56 : if (!tableforest)
5852 peter_e 2943 CBC 26 : xmldata_root_element_end(result, xmltn);
2944 :
5896 peter_e 2945 GIC 56 : SPI_finish();
5896 peter_e 2946 ECB :
5896 peter_e 2947 GIC 56 : return result;
2948 : }
5896 peter_e 2949 ECB :
2950 :
5896 peter_e 2951 EUB : Datum
5896 peter_e 2952 GIC 15 : table_to_xmlschema(PG_FUNCTION_ARGS)
2953 : {
5896 peter_e 2954 CBC 15 : Oid relid = PG_GETARG_OID(0);
5896 peter_e 2955 GIC 15 : bool nulls = PG_GETARG_BOOL(1);
2956 15 : bool tableforest = PG_GETARG_BOOL(2);
5493 tgl 2957 15 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2958 : const char *result;
5624 bruce 2959 ECB : Relation rel;
5896 peter_e 2960 :
1539 andres 2961 GIC 15 : rel = table_open(relid, AccessShareLock);
5633 tgl 2962 15 : result = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
5633 tgl 2963 ECB : tableforest, targetns);
1539 andres 2964 CBC 15 : table_close(rel, NoLock);
2965 :
5896 peter_e 2966 GIC 15 : PG_RETURN_XML_P(cstring_to_xmltype(result));
5896 peter_e 2967 ECB : }
2968 :
2969 :
2970 : Datum
5896 peter_e 2971 GIC 3 : query_to_xmlschema(PG_FUNCTION_ARGS)
5896 peter_e 2972 ECB : {
5493 tgl 2973 GIC 3 : char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
5896 peter_e 2974 3 : bool nulls = PG_GETARG_BOOL(1);
2975 3 : bool tableforest = PG_GETARG_BOOL(2);
5493 tgl 2976 CBC 3 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
5896 peter_e 2977 ECB : const char *result;
2978 : SPIPlanPtr plan;
2979 : Portal portal;
2980 :
5896 peter_e 2981 GIC 3 : SPI_connect();
2982 :
5566 neilc 2983 3 : if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
5566 neilc 2984 UIC 0 : elog(ERROR, "SPI_prepare(\"%s\") failed", query);
5566 neilc 2985 ECB :
5566 neilc 2986 GIC 3 : if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
5566 neilc 2987 UIC 0 : elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
2988 :
5633 tgl 2989 GIC 3 : result = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
2990 : InvalidOid, nulls,
2991 : tableforest, targetns));
5896 peter_e 2992 3 : SPI_cursor_close(portal);
2993 3 : SPI_finish();
2994 :
2995 3 : PG_RETURN_XML_P(cstring_to_xmltype(result));
2996 : }
2997 :
2998 :
5896 peter_e 2999 EUB : Datum
5896 peter_e 3000 GIC 6 : cursor_to_xmlschema(PG_FUNCTION_ARGS)
5896 peter_e 3001 EUB : {
5493 tgl 3002 GIC 6 : char *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
5896 peter_e 3003 6 : bool nulls = PG_GETARG_BOOL(1);
3004 6 : bool tableforest = PG_GETARG_BOOL(2);
5493 tgl 3005 6 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
5896 peter_e 3006 EUB : const char *xmlschema;
3007 : Portal portal;
3008 :
5896 peter_e 3009 GBC 6 : SPI_connect();
5896 peter_e 3010 GIC 6 : portal = SPI_cursor_find(name);
3011 6 : if (portal == NULL)
5896 peter_e 3012 UIC 0 : ereport(ERROR,
3013 : (errcode(ERRCODE_UNDEFINED_CURSOR),
3014 : errmsg("cursor \"%s\" does not exist", name)));
3015 :
5633 tgl 3016 GIC 6 : xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
3017 : InvalidOid, nulls,
3018 : tableforest, targetns));
5896 peter_e 3019 6 : SPI_finish();
3020 :
3021 6 : PG_RETURN_XML_P(cstring_to_xmltype(xmlschema));
3022 : }
3023 :
3024 :
5896 peter_e 3025 ECB : Datum
5896 peter_e 3026 GIC 12 : table_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3027 : {
3028 12 : Oid relid = PG_GETARG_OID(0);
3029 12 : bool nulls = PG_GETARG_BOOL(1);
3030 12 : bool tableforest = PG_GETARG_BOOL(2);
5493 tgl 3031 CBC 12 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
5896 peter_e 3032 ECB : Relation rel;
3033 : const char *xmlschema;
3034 :
1539 andres 3035 CBC 12 : rel = table_open(relid, AccessShareLock);
5633 tgl 3036 GIC 12 : xmlschema = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
3037 : tableforest, targetns);
1539 andres 3038 12 : table_close(rel, NoLock);
3039 :
5633 tgl 3040 12 : PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid,
3041 : xmlschema, nulls, tableforest,
2118 tgl 3042 ECB : targetns, true)));
3043 : }
5896 peter_e 3044 :
3045 :
3046 : Datum
5896 peter_e 3047 CBC 3 : query_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3048 : {
5493 tgl 3049 3 : char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
5896 peter_e 3050 GIC 3 : bool nulls = PG_GETARG_BOOL(1);
3051 3 : bool tableforest = PG_GETARG_BOOL(2);
5493 tgl 3052 3 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3053 :
3054 : const char *xmlschema;
3055 : SPIPlanPtr plan;
5896 peter_e 3056 ECB : Portal portal;
3057 :
5896 peter_e 3058 CBC 3 : SPI_connect();
5566 neilc 3059 ECB :
5566 neilc 3060 CBC 3 : if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
5566 neilc 3061 LBC 0 : elog(ERROR, "SPI_prepare(\"%s\") failed", query);
3062 :
5566 neilc 3063 CBC 3 : if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
5566 neilc 3064 UIC 0 : elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
3065 :
5633 tgl 3066 GIC 3 : xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
3067 : InvalidOid, nulls, tableforest, targetns));
5896 peter_e 3068 3 : SPI_cursor_close(portal);
3069 3 : SPI_finish();
5896 peter_e 3070 ECB :
5633 tgl 3071 GIC 3 : PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
2118 tgl 3072 ECB : xmlschema, nulls, tableforest,
3073 : targetns, true)));
5852 peter_e 3074 : }
3075 :
3076 :
3077 : /*
3078 : * Map SQL schema to XML and/or XML Schema document; see SQL/XML:2008
3079 : * sections 9.13, 9.14.
3080 : */
3081 :
3082 : static StringInfo
5633 tgl 3083 GIC 9 : schema_to_xml_internal(Oid nspid, const char *xmlschema, bool nulls,
5633 tgl 3084 ECB : bool tableforest, const char *targetns, bool top_level)
3085 : {
5852 peter_e 3086 : StringInfo result;
3087 : char *xmlsn;
3088 : List *relid_list;
3089 : ListCell *cell;
3090 :
5633 tgl 3091 CBC 9 : xmlsn = map_sql_identifier_to_xml_name(get_namespace_name(nspid),
5633 tgl 3092 ECB : true, false);
5852 peter_e 3093 GBC 9 : result = makeStringInfo();
3094 :
5852 peter_e 3095 GIC 9 : xmldata_root_element_start(result, xmlsn, xmlschema, targetns, top_level);
2838 heikki.linnakangas 3096 9 : appendStringInfoChar(result, '\n');
5852 peter_e 3097 ECB :
5852 peter_e 3098 CBC 9 : if (xmlschema)
3099 3 : appendStringInfo(result, "%s\n\n", xmlschema);
3100 :
5852 peter_e 3101 GIC 9 : SPI_connect();
5852 peter_e 3102 ECB :
5852 peter_e 3103 GIC 9 : relid_list = schema_get_xml_visible_tables(nspid);
5852 peter_e 3104 ECB :
5852 peter_e 3105 CBC 27 : foreach(cell, relid_list)
3106 : {
5624 bruce 3107 18 : Oid relid = lfirst_oid(cell);
3108 : StringInfo subres;
3109 :
5633 tgl 3110 GIC 18 : subres = table_to_xml_internal(relid, NULL, nulls, tableforest,
3111 : targetns, false);
3112 :
1356 drowley 3113 18 : appendBinaryStringInfo(result, subres->data, subres->len);
5852 peter_e 3114 18 : appendStringInfoChar(result, '\n');
3115 : }
3116 :
3117 9 : SPI_finish();
3118 :
3119 9 : xmldata_root_element_end(result, xmlsn);
3120 :
3121 9 : return result;
3122 : }
3123 :
5852 peter_e 3124 ECB :
3125 : Datum
5852 peter_e 3126 GIC 6 : schema_to_xml(PG_FUNCTION_ARGS)
3127 : {
3128 6 : Name name = PG_GETARG_NAME(0);
5852 peter_e 3129 CBC 6 : bool nulls = PG_GETARG_BOOL(1);
5852 peter_e 3130 GIC 6 : bool tableforest = PG_GETARG_BOOL(2);
5493 tgl 3131 CBC 6 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
5852 peter_e 3132 ECB :
3133 : char *schemaname;
3134 : Oid nspid;
3135 :
5852 peter_e 3136 CBC 6 : schemaname = NameStr(*name);
3725 bruce 3137 GIC 6 : nspid = LookupExplicitNamespace(schemaname, false);
5852 peter_e 3138 ECB :
5633 tgl 3139 GIC 6 : PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid, NULL,
3140 : nulls, tableforest, targetns, true)));
5852 peter_e 3141 ECB : }
3142 :
3143 :
3144 : /*
3145 : * Write the start element of the root element of an XML Schema mapping.
3146 : */
3147 : static void
5852 peter_e 3148 GIC 48 : xsd_schema_element_start(StringInfo result, const char *targetns)
3149 : {
3150 48 : appendStringInfoString(result,
5852 peter_e 3151 ECB : "<xsd:schema\n"
3152 : " xmlns:xsd=\"" NAMESPACE_XSD "\"");
5852 peter_e 3153 CBC 48 : if (strlen(targetns) > 0)
3154 9 : appendStringInfo(result,
3155 : "\n"
3156 : " targetNamespace=\"%s\"\n"
3157 : " elementFormDefault=\"qualified\"",
5852 peter_e 3158 ECB : targetns);
5852 peter_e 3159 GIC 48 : appendStringInfoString(result,
3160 : ">\n\n");
3161 48 : }
3162 :
3163 :
3164 : static void
3165 48 : xsd_schema_element_end(StringInfo result)
5852 peter_e 3166 ECB : {
5633 tgl 3167 CBC 48 : appendStringInfoString(result, "</xsd:schema>");
5852 peter_e 3168 GIC 48 : }
5852 peter_e 3169 ECB :
3170 :
3171 : static StringInfo
5633 tgl 3172 GIC 9 : schema_to_xmlschema_internal(const char *schemaname, bool nulls,
5633 tgl 3173 ECB : bool tableforest, const char *targetns)
5852 peter_e 3174 : {
5852 peter_e 3175 EUB : Oid nspid;
3176 : List *relid_list;
3177 : List *tupdesc_list;
3178 : ListCell *cell;
5852 peter_e 3179 ECB : StringInfo result;
3180 :
5852 peter_e 3181 CBC 9 : result = makeStringInfo();
3182 :
3725 bruce 3183 9 : nspid = LookupExplicitNamespace(schemaname, false);
3184 :
5852 peter_e 3185 GIC 9 : xsd_schema_element_start(result, targetns);
5852 peter_e 3186 ECB :
5852 peter_e 3187 CBC 9 : SPI_connect();
3188 :
3189 9 : relid_list = schema_get_xml_visible_tables(nspid);
5852 peter_e 3190 ECB :
5852 peter_e 3191 GIC 9 : tupdesc_list = NIL;
5624 bruce 3192 27 : foreach(cell, relid_list)
5852 peter_e 3193 ECB : {
5624 bruce 3194 : Relation rel;
3195 :
1539 andres 3196 CBC 18 : rel = table_open(lfirst_oid(cell), AccessShareLock);
5633 tgl 3197 GIC 18 : tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
1539 andres 3198 CBC 18 : table_close(rel, NoLock);
3199 : }
3200 :
5852 peter_e 3201 GIC 9 : appendStringInfoString(result,
3202 : map_sql_typecoll_to_xmlschema_types(tupdesc_list));
5852 peter_e 3203 ECB :
5852 peter_e 3204 GIC 9 : appendStringInfoString(result,
2118 tgl 3205 ECB : map_sql_schema_to_xmlschema_types(nspid, relid_list,
3206 : nulls, tableforest, targetns));
5852 peter_e 3207 :
5852 peter_e 3208 CBC 9 : xsd_schema_element_end(result);
3209 :
5852 peter_e 3210 GIC 9 : SPI_finish();
3211 :
5852 peter_e 3212 CBC 9 : return result;
5852 peter_e 3213 ECB : }
3214 :
3215 :
3216 : Datum
5852 peter_e 3217 CBC 6 : schema_to_xmlschema(PG_FUNCTION_ARGS)
3218 : {
5852 peter_e 3219 GIC 6 : Name name = PG_GETARG_NAME(0);
3220 6 : bool nulls = PG_GETARG_BOOL(1);
3221 6 : bool tableforest = PG_GETARG_BOOL(2);
5493 tgl 3222 CBC 6 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3223 :
5633 3224 6 : PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xmlschema_internal(NameStr(*name),
2118 tgl 3225 ECB : nulls, tableforest, targetns)));
5852 peter_e 3226 : }
3227 :
3228 :
3229 : Datum
5852 peter_e 3230 GIC 3 : schema_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3231 : {
5852 peter_e 3232 CBC 3 : Name name = PG_GETARG_NAME(0);
5852 peter_e 3233 GIC 3 : bool nulls = PG_GETARG_BOOL(1);
5852 peter_e 3234 CBC 3 : bool tableforest = PG_GETARG_BOOL(2);
5493 tgl 3235 GBC 3 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3236 : char *schemaname;
5852 peter_e 3237 ECB : Oid nspid;
5852 peter_e 3238 EUB : StringInfo xmlschema;
3239 :
5852 peter_e 3240 CBC 3 : schemaname = NameStr(*name);
3725 bruce 3241 GIC 3 : nspid = LookupExplicitNamespace(schemaname, false);
3242 :
5633 tgl 3243 CBC 3 : xmlschema = schema_to_xmlschema_internal(schemaname, nulls,
5633 tgl 3244 ECB : tableforest, targetns);
3245 :
5633 tgl 3246 CBC 3 : PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid,
3247 : xmlschema->data, nulls,
3248 : tableforest, targetns, true)));
3249 : }
3250 :
5852 peter_e 3251 ECB :
3252 : /*
4559 3253 : * Map SQL database to XML and/or XML Schema document; see SQL/XML:2008
3254 : * sections 9.16, 9.17.
5852 3255 : */
3256 :
3257 : static StringInfo
5633 tgl 3258 UIC 0 : database_to_xml_internal(const char *xmlschema, bool nulls,
3259 : bool tableforest, const char *targetns)
5852 peter_e 3260 ECB : {
3261 : StringInfo result;
3262 : List *nspid_list;
5852 peter_e 3263 EUB : ListCell *cell;
3264 : char *xmlcn;
3265 :
5633 tgl 3266 UIC 0 : xmlcn = map_sql_identifier_to_xml_name(get_database_name(MyDatabaseId),
5633 tgl 3267 ECB : true, false);
5852 peter_e 3268 UIC 0 : result = makeStringInfo();
3269 :
5852 peter_e 3270 LBC 0 : xmldata_root_element_start(result, xmlcn, xmlschema, targetns, true);
2838 heikki.linnakangas 3271 UIC 0 : appendStringInfoChar(result, '\n');
5852 peter_e 3272 ECB :
5852 peter_e 3273 UIC 0 : if (xmlschema)
3274 0 : appendStringInfo(result, "%s\n\n", xmlschema);
3275 :
3276 0 : SPI_connect();
5852 peter_e 3277 ECB :
5852 peter_e 3278 UIC 0 : nspid_list = database_get_xml_visible_schemas();
5852 peter_e 3279 ECB :
5852 peter_e 3280 LBC 0 : foreach(cell, nspid_list)
5852 peter_e 3281 ECB : {
5624 bruce 3282 LBC 0 : Oid nspid = lfirst_oid(cell);
3283 : StringInfo subres;
3284 :
5633 tgl 3285 UIC 0 : subres = schema_to_xml_internal(nspid, NULL, nulls,
5633 tgl 3286 ECB : tableforest, targetns, false);
5852 peter_e 3287 :
1356 drowley 3288 UIC 0 : appendBinaryStringInfo(result, subres->data, subres->len);
5852 peter_e 3289 LBC 0 : appendStringInfoChar(result, '\n');
3290 : }
5852 peter_e 3291 ECB :
5852 peter_e 3292 UIC 0 : SPI_finish();
3293 :
3294 0 : xmldata_root_element_end(result, xmlcn);
3295 :
3296 0 : return result;
3297 : }
5852 peter_e 3298 ECB :
3299 :
3300 : Datum
5852 peter_e 3301 LBC 0 : database_to_xml(PG_FUNCTION_ARGS)
5852 peter_e 3302 ECB : {
5852 peter_e 3303 LBC 0 : bool nulls = PG_GETARG_BOOL(0);
5852 peter_e 3304 UIC 0 : bool tableforest = PG_GETARG_BOOL(1);
5493 tgl 3305 0 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3306 :
5633 3307 0 : PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(NULL, nulls,
3308 : tableforest, targetns)));
5852 peter_e 3309 ECB : }
3310 :
3311 :
5852 peter_e 3312 EUB : static StringInfo
5633 tgl 3313 UIC 0 : database_to_xmlschema_internal(bool nulls, bool tableforest,
5633 tgl 3314 ECB : const char *targetns)
5852 peter_e 3315 EUB : {
3316 : List *relid_list;
5852 peter_e 3317 ECB : List *nspid_list;
3318 : List *tupdesc_list;
3319 : ListCell *cell;
3320 : StringInfo result;
3321 :
5852 peter_e 3322 LBC 0 : result = makeStringInfo();
3323 :
5852 peter_e 3324 UIC 0 : xsd_schema_element_start(result, targetns);
3325 :
3326 0 : SPI_connect();
3327 :
3328 0 : relid_list = database_get_xml_visible_tables();
3329 0 : nspid_list = database_get_xml_visible_schemas();
3330 :
3331 0 : tupdesc_list = NIL;
5624 bruce 3332 0 : foreach(cell, relid_list)
3333 : {
5624 bruce 3334 ECB : Relation rel;
3335 :
1539 andres 3336 UIC 0 : rel = table_open(lfirst_oid(cell), AccessShareLock);
5633 tgl 3337 0 : tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
1539 andres 3338 0 : table_close(rel, NoLock);
3339 : }
3340 :
5852 peter_e 3341 0 : appendStringInfoString(result,
5852 peter_e 3342 ECB : map_sql_typecoll_to_xmlschema_types(tupdesc_list));
3343 :
5852 peter_e 3344 LBC 0 : appendStringInfoString(result,
3345 : map_sql_catalog_to_xmlschema_types(nspid_list, nulls, tableforest, targetns));
5852 peter_e 3346 ECB :
5852 peter_e 3347 LBC 0 : xsd_schema_element_end(result);
3348 :
3349 0 : SPI_finish();
5852 peter_e 3350 ECB :
5852 peter_e 3351 UIC 0 : return result;
5852 peter_e 3352 ECB : }
3353 :
3354 :
3355 : Datum
5852 peter_e 3356 LBC 0 : database_to_xmlschema(PG_FUNCTION_ARGS)
3357 : {
3358 0 : bool nulls = PG_GETARG_BOOL(0);
5852 peter_e 3359 UIC 0 : bool tableforest = PG_GETARG_BOOL(1);
5493 tgl 3360 0 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
5852 peter_e 3361 ECB :
5633 tgl 3362 UIC 0 : PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xmlschema_internal(nulls,
3363 : tableforest, targetns)));
5852 peter_e 3364 ECB : }
3365 :
3366 :
3367 : Datum
5852 peter_e 3368 LBC 0 : database_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3369 : {
3370 0 : bool nulls = PG_GETARG_BOOL(0);
5852 peter_e 3371 UIC 0 : bool tableforest = PG_GETARG_BOOL(1);
5493 tgl 3372 LBC 0 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3373 : StringInfo xmlschema;
3374 :
5852 peter_e 3375 UIC 0 : xmlschema = database_to_xmlschema_internal(nulls, tableforest, targetns);
3376 :
5633 tgl 3377 LBC 0 : PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(xmlschema->data,
3378 : nulls, tableforest, targetns)));
5896 peter_e 3379 ECB : }
3380 :
3381 :
3382 : /*
3383 : * Map a multi-part SQL name to an XML name; see SQL/XML:2008 section
3384 : * 9.2.
3385 : */
3386 : static char *
1986 peter_e 3387 CBC 192 : map_multipart_sql_identifier_to_xml_name(const char *a, const char *b, const char *c, const char *d)
5896 peter_e 3388 ECB : {
3389 : StringInfoData result;
3390 :
5896 peter_e 3391 GIC 192 : initStringInfo(&result);
3392 :
3393 192 : if (a)
3447 rhaas 3394 192 : appendStringInfoString(&result,
3260 bruce 3395 192 : map_sql_identifier_to_xml_name(a, true, true));
5896 peter_e 3396 192 : if (b)
5633 tgl 3397 192 : appendStringInfo(&result, ".%s",
3398 : map_sql_identifier_to_xml_name(b, true, true));
5896 peter_e 3399 CBC 192 : if (c)
5633 tgl 3400 GIC 192 : appendStringInfo(&result, ".%s",
5633 tgl 3401 ECB : map_sql_identifier_to_xml_name(c, true, true));
5896 peter_e 3402 GIC 192 : if (d)
5633 tgl 3403 183 : appendStringInfo(&result, ".%s",
5633 tgl 3404 ECB : map_sql_identifier_to_xml_name(d, true, true));
5896 peter_e 3405 :
5896 peter_e 3406 GIC 192 : return result.data;
3407 : }
3408 :
3409 :
5896 peter_e 3410 ECB : /*
3411 : * Map an SQL table to an XML Schema document; see SQL/XML:2008
4559 3412 : * section 9.11.
3413 : *
3414 : * Map an SQL table to XML Schema data types; see SQL/XML:2008 section
3415 : * 9.9.
5896 3416 : */
3417 : static const char *
5633 tgl 3418 CBC 39 : map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid, bool nulls,
5633 tgl 3419 ECB : bool tableforest, const char *targetns)
3420 : {
3421 : int i;
3422 : char *xmltn;
5896 peter_e 3423 : char *tabletypename;
3424 : char *rowtypename;
3425 : StringInfoData result;
3426 :
5896 peter_e 3427 GIC 39 : initStringInfo(&result);
3428 :
5633 tgl 3429 39 : if (OidIsValid(relid))
3430 : {
3431 : HeapTuple tuple;
5633 tgl 3432 ECB : Form_pg_class reltuple;
3433 :
4802 rhaas 3434 CBC 27 : tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
5633 tgl 3435 GIC 27 : if (!HeapTupleIsValid(tuple))
5633 tgl 3436 LBC 0 : elog(ERROR, "cache lookup failed for relation %u", relid);
5633 tgl 3437 GIC 27 : reltuple = (Form_pg_class) GETSTRUCT(tuple);
5896 peter_e 3438 ECB :
5633 tgl 3439 GIC 27 : xmltn = map_sql_identifier_to_xml_name(NameStr(reltuple->relname),
5633 tgl 3440 ECB : true, false);
3441 :
5896 peter_e 3442 CBC 27 : tabletypename = map_multipart_sql_identifier_to_xml_name("TableType",
2118 tgl 3443 27 : get_database_name(MyDatabaseId),
2118 tgl 3444 GIC 27 : get_namespace_name(reltuple->relnamespace),
3445 27 : NameStr(reltuple->relname));
3446 :
5896 peter_e 3447 CBC 27 : rowtypename = map_multipart_sql_identifier_to_xml_name("RowType",
2118 tgl 3448 27 : get_database_name(MyDatabaseId),
3449 27 : get_namespace_name(reltuple->relnamespace),
2118 tgl 3450 GIC 27 : NameStr(reltuple->relname));
3451 :
5896 peter_e 3452 CBC 27 : ReleaseSysCache(tuple);
3453 : }
3454 : else
5896 peter_e 3455 ECB : {
5896 peter_e 3456 GIC 12 : if (tableforest)
3457 6 : xmltn = "row";
3458 : else
5896 peter_e 3459 CBC 6 : xmltn = "table";
3460 :
3461 12 : tabletypename = "TableType";
5896 peter_e 3462 GIC 12 : rowtypename = "RowType";
5896 peter_e 3463 ECB : }
3464 :
5852 peter_e 3465 GIC 39 : xsd_schema_element_start(&result, targetns);
3466 :
5896 3467 39 : appendStringInfoString(&result,
2118 tgl 3468 CBC 39 : map_sql_typecoll_to_xmlschema_types(list_make1(tupdesc)));
3469 :
5896 peter_e 3470 39 : appendStringInfo(&result,
5896 peter_e 3471 ECB : "<xsd:complexType name=\"%s\">\n"
3472 : " <xsd:sequence>\n",
3473 : rowtypename);
3474 :
5896 peter_e 3475 CBC 162 : for (i = 0; i < tupdesc->natts; i++)
3476 : {
2058 andres 3477 GIC 123 : Form_pg_attribute att = TupleDescAttr(tupdesc, i);
3478 :
3479 123 : if (att->attisdropped)
5053 peter_e 3480 3 : continue;
5896 peter_e 3481 CBC 240 : appendStringInfo(&result,
3482 : " <xsd:element name=\"%s\" type=\"%s\"%s></xsd:element>\n",
2058 andres 3483 120 : map_sql_identifier_to_xml_name(NameStr(att->attname),
2118 tgl 3484 ECB : true, false),
2058 andres 3485 : map_sql_type_to_xml_name(att->atttypid, -1),
5896 peter_e 3486 : nulls ? " nillable=\"true\"" : " minOccurs=\"0\"");
3487 : }
3488 :
5896 peter_e 3489 GIC 39 : appendStringInfoString(&result,
3490 : " </xsd:sequence>\n"
5896 peter_e 3491 ECB : "</xsd:complexType>\n\n");
3492 :
5896 peter_e 3493 GIC 39 : if (!tableforest)
5896 peter_e 3494 ECB : {
5896 peter_e 3495 GIC 21 : appendStringInfo(&result,
3496 : "<xsd:complexType name=\"%s\">\n"
5896 peter_e 3497 ECB : " <xsd:sequence>\n"
3498 : " <xsd:element name=\"row\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n"
3499 : " </xsd:sequence>\n"
3500 : "</xsd:complexType>\n\n",
3501 : tabletypename, rowtypename);
3502 :
5896 peter_e 3503 GIC 21 : appendStringInfo(&result,
3504 : "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3505 : xmltn, tabletypename);
3506 : }
3507 : else
3508 18 : appendStringInfo(&result,
5896 peter_e 3509 EUB : "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3510 : xmltn, rowtypename);
3511 :
5852 peter_e 3512 GIC 39 : xsd_schema_element_end(&result);
3513 :
3514 39 : return result.data;
3515 : }
3516 :
5852 peter_e 3517 EUB :
3518 : /*
4559 3519 : * Map an SQL schema to XML Schema data types; see SQL/XML:2008
3520 : * section 9.12.
5852 3521 : */
3522 : static const char *
5633 tgl 3523 GIC 9 : map_sql_schema_to_xmlschema_types(Oid nspid, List *relid_list, bool nulls,
5633 tgl 3524 EUB : bool tableforest, const char *targetns)
5852 peter_e 3525 : {
3526 : char *dbname;
5633 tgl 3527 : char *nspname;
3528 : char *xmlsn;
5852 peter_e 3529 : char *schematypename;
3530 : StringInfoData result;
3531 : ListCell *cell;
3532 :
5633 tgl 3533 GBC 9 : dbname = get_database_name(MyDatabaseId);
5633 tgl 3534 GIC 9 : nspname = get_namespace_name(nspid);
3535 :
5852 peter_e 3536 GBC 9 : initStringInfo(&result);
3537 :
5633 tgl 3538 GIC 9 : xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
5852 peter_e 3539 EUB :
5852 peter_e 3540 GBC 9 : schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3541 : dbname,
3542 : nspname,
5852 peter_e 3543 EUB : NULL);
3544 :
5852 peter_e 3545 GBC 9 : appendStringInfo(&result,
3546 : "<xsd:complexType name=\"%s\">\n", schematypename);
3547 9 : if (!tableforest)
5852 peter_e 3548 GIC 3 : appendStringInfoString(&result,
3549 : " <xsd:all>\n");
3550 : else
3551 6 : appendStringInfoString(&result,
5852 peter_e 3552 EUB : " <xsd:sequence>\n");
3553 :
5624 bruce 3554 GBC 27 : foreach(cell, relid_list)
5852 peter_e 3555 EUB : {
5624 bruce 3556 GBC 18 : Oid relid = lfirst_oid(cell);
5624 bruce 3557 GIC 18 : char *relname = get_rel_name(relid);
5624 bruce 3558 GBC 18 : char *xmltn = map_sql_identifier_to_xml_name(relname, true, false);
5624 bruce 3559 GIC 18 : char *tabletypename = map_multipart_sql_identifier_to_xml_name(tableforest ? "RowType" : "TableType",
3560 : dbname,
3561 : nspname,
3562 : relname);
3563 :
5852 peter_e 3564 GBC 18 : if (!tableforest)
5852 peter_e 3565 GIC 6 : appendStringInfo(&result,
3566 : " <xsd:element name=\"%s\" type=\"%s\"/>\n",
3567 : xmltn, tabletypename);
3568 : else
3569 12 : appendStringInfo(&result,
3570 : " <xsd:element name=\"%s\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n",
3571 : xmltn, tabletypename);
3572 : }
5852 peter_e 3573 EUB :
5852 peter_e 3574 GIC 9 : if (!tableforest)
5852 peter_e 3575 GBC 3 : appendStringInfoString(&result,
3576 : " </xsd:all>\n");
5852 peter_e 3577 EUB : else
5852 peter_e 3578 GIC 6 : appendStringInfoString(&result,
5852 peter_e 3579 EUB : " </xsd:sequence>\n");
5896 peter_e 3580 GBC 9 : appendStringInfoString(&result,
3581 : "</xsd:complexType>\n\n");
5852 peter_e 3582 EUB :
5852 peter_e 3583 GBC 9 : appendStringInfo(&result,
3584 : "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3585 : xmlsn, schematypename);
3586 :
3587 9 : return result.data;
5852 peter_e 3588 EUB : }
3589 :
3590 :
3591 : /*
4559 3592 : * Map an SQL catalog to XML Schema data types; see SQL/XML:2008
3593 : * section 9.15.
3594 : */
5852 3595 : static const char *
5633 tgl 3596 UIC 0 : map_sql_catalog_to_xmlschema_types(List *nspid_list, bool nulls,
3597 : bool tableforest, const char *targetns)
5852 peter_e 3598 EUB : {
3599 : char *dbname;
3600 : char *xmlcn;
3601 : char *catalogtypename;
3602 : StringInfoData result;
3603 : ListCell *cell;
3604 :
5633 tgl 3605 UIC 0 : dbname = get_database_name(MyDatabaseId);
3606 :
5852 peter_e 3607 UBC 0 : initStringInfo(&result);
3608 :
5633 tgl 3609 0 : xmlcn = map_sql_identifier_to_xml_name(dbname, true, false);
5852 peter_e 3610 EUB :
5852 peter_e 3611 UBC 0 : catalogtypename = map_multipart_sql_identifier_to_xml_name("CatalogType",
3612 : dbname,
5852 peter_e 3613 EUB : NULL,
3614 : NULL);
3615 :
5852 peter_e 3616 UIC 0 : appendStringInfo(&result,
3617 : "<xsd:complexType name=\"%s\">\n", catalogtypename);
3618 0 : appendStringInfoString(&result,
5852 peter_e 3619 EUB : " <xsd:all>\n");
3620 :
5624 bruce 3621 UBC 0 : foreach(cell, nspid_list)
5852 peter_e 3622 EUB : {
5624 bruce 3623 UBC 0 : Oid nspid = lfirst_oid(cell);
5633 tgl 3624 UIC 0 : char *nspname = get_namespace_name(nspid);
5624 bruce 3625 0 : char *xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
5624 bruce 3626 UBC 0 : char *schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3627 : dbname,
2118 tgl 3628 EUB : nspname,
3629 : NULL);
3630 :
5852 peter_e 3631 UIC 0 : appendStringInfo(&result,
3632 : " <xsd:element name=\"%s\" type=\"%s\"/>\n",
3633 : xmlsn, schematypename);
3634 : }
3635 :
3636 0 : appendStringInfoString(&result,
3637 : " </xsd:all>\n");
5852 peter_e 3638 LBC 0 : appendStringInfoString(&result,
3639 : "</xsd:complexType>\n\n");
3640 :
5852 peter_e 3641 UIC 0 : appendStringInfo(&result,
5852 peter_e 3642 ECB : "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3643 : xmlcn, catalogtypename);
5896 3644 :
5896 peter_e 3645 LBC 0 : return result.data;
5896 peter_e 3646 ECB : }
3647 :
3648 :
3649 : /*
4559 3650 : * Map an SQL data type to an XML name; see SQL/XML:2008 section 9.4.
5896 3651 : */
3652 : static const char *
5896 peter_e 3653 CBC 405 : map_sql_type_to_xml_name(Oid typeoid, int typmod)
5896 peter_e 3654 ECB : {
3655 : StringInfoData result;
3656 :
5896 peter_e 3657 CBC 405 : initStringInfo(&result);
3658 :
5624 bruce 3659 GIC 405 : switch (typeoid)
3660 : {
5896 peter_e 3661 15 : case BPCHAROID:
3662 15 : if (typmod == -1)
3447 rhaas 3663 15 : appendStringInfoString(&result, "CHAR");
3664 : else
5896 peter_e 3665 UIC 0 : appendStringInfo(&result, "CHAR_%d", typmod - VARHDRSZ);
5896 peter_e 3666 GIC 15 : break;
3667 27 : case VARCHAROID:
3668 27 : if (typmod == -1)
3447 rhaas 3669 CBC 27 : appendStringInfoString(&result, "VARCHAR");
3670 : else
5896 peter_e 3671 UIC 0 : appendStringInfo(&result, "VARCHAR_%d", typmod - VARHDRSZ);
5896 peter_e 3672 GIC 27 : break;
3673 15 : case NUMERICOID:
3674 15 : if (typmod == -1)
3447 rhaas 3675 15 : appendStringInfoString(&result, "NUMERIC");
3676 : else
5896 peter_e 3677 UIC 0 : appendStringInfo(&result, "NUMERIC_%d_%d",
5896 peter_e 3678 LBC 0 : ((typmod - VARHDRSZ) >> 16) & 0xffff,
5896 peter_e 3679 UIC 0 : (typmod - VARHDRSZ) & 0xffff);
5896 peter_e 3680 CBC 15 : break;
5896 peter_e 3681 GIC 87 : case INT4OID:
3447 rhaas 3682 87 : appendStringInfoString(&result, "INTEGER");
5896 peter_e 3683 87 : break;
3684 15 : case INT2OID:
3447 rhaas 3685 CBC 15 : appendStringInfoString(&result, "SMALLINT");
5896 peter_e 3686 15 : break;
5896 peter_e 3687 GBC 15 : case INT8OID:
3447 rhaas 3688 CBC 15 : appendStringInfoString(&result, "BIGINT");
5896 peter_e 3689 GIC 15 : break;
5896 peter_e 3690 CBC 15 : case FLOAT4OID:
3447 rhaas 3691 GIC 15 : appendStringInfoString(&result, "REAL");
5896 peter_e 3692 15 : break;
5896 peter_e 3693 LBC 0 : case FLOAT8OID:
3447 rhaas 3694 0 : appendStringInfoString(&result, "DOUBLE");
5896 peter_e 3695 0 : break;
5896 peter_e 3696 CBC 15 : case BOOLOID:
3447 rhaas 3697 GIC 15 : appendStringInfoString(&result, "BOOLEAN");
5896 peter_e 3698 CBC 15 : break;
3699 15 : case TIMEOID:
3700 15 : if (typmod == -1)
3447 rhaas 3701 15 : appendStringInfoString(&result, "TIME");
3702 : else
5896 peter_e 3703 LBC 0 : appendStringInfo(&result, "TIME_%d", typmod);
5896 peter_e 3704 GIC 15 : break;
3705 15 : case TIMETZOID:
3706 15 : if (typmod == -1)
3447 rhaas 3707 CBC 15 : appendStringInfoString(&result, "TIME_WTZ");
5896 peter_e 3708 ECB : else
5896 peter_e 3709 UIC 0 : appendStringInfo(&result, "TIME_WTZ_%d", typmod);
5896 peter_e 3710 CBC 15 : break;
5896 peter_e 3711 GIC 15 : case TIMESTAMPOID:
5896 peter_e 3712 CBC 15 : if (typmod == -1)
3447 rhaas 3713 15 : appendStringInfoString(&result, "TIMESTAMP");
3714 : else
5896 peter_e 3715 UIC 0 : appendStringInfo(&result, "TIMESTAMP_%d", typmod);
5896 peter_e 3716 CBC 15 : break;
5896 peter_e 3717 GIC 15 : case TIMESTAMPTZOID:
5896 peter_e 3718 CBC 15 : if (typmod == -1)
3447 rhaas 3719 15 : appendStringInfoString(&result, "TIMESTAMP_WTZ");
3720 : else
5896 peter_e 3721 LBC 0 : appendStringInfo(&result, "TIMESTAMP_WTZ_%d", typmod);
5896 peter_e 3722 GIC 15 : break;
3723 15 : case DATEOID:
3447 rhaas 3724 15 : appendStringInfoString(&result, "DATE");
5896 peter_e 3725 15 : break;
5896 peter_e 3726 CBC 15 : case XMLOID:
3447 rhaas 3727 GIC 15 : appendStringInfoString(&result, "XML");
5896 peter_e 3728 CBC 15 : break;
5896 peter_e 3729 GIC 111 : default:
5624 bruce 3730 ECB : {
3731 : HeapTuple tuple;
3732 : Form_pg_type typtuple;
3733 :
4802 rhaas 3734 CBC 111 : tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typeoid));
5624 bruce 3735 GIC 111 : if (!HeapTupleIsValid(tuple))
5624 bruce 3736 UIC 0 : elog(ERROR, "cache lookup failed for type %u", typeoid);
5624 bruce 3737 GIC 111 : typtuple = (Form_pg_type) GETSTRUCT(tuple);
3738 :
3739 111 : appendStringInfoString(&result,
5624 bruce 3740 CBC 111 : map_multipart_sql_identifier_to_xml_name((typtuple->typtype == TYPTYPE_DOMAIN) ? "Domain" : "UDT",
2118 tgl 3741 GIC 111 : get_database_name(MyDatabaseId),
3742 111 : get_namespace_name(typtuple->typnamespace),
3743 111 : NameStr(typtuple->typname)));
5624 bruce 3744 ECB :
5624 bruce 3745 GIC 111 : ReleaseSysCache(tuple);
5624 bruce 3746 ECB : }
3747 : }
3748 :
5896 peter_e 3749 GIC 405 : return result.data;
3750 : }
3751 :
3752 :
3753 : /*
5896 peter_e 3754 ECB : * Map a collection of SQL data types to XML Schema data types; see
3755 : * SQL/XML:2008 section 9.7.
3756 : */
3757 : static const char *
5852 peter_e 3758 GIC 48 : map_sql_typecoll_to_xmlschema_types(List *tupdesc_list)
5896 peter_e 3759 ECB : {
5852 peter_e 3760 GIC 48 : List *uniquetypes = NIL;
3761 : int i;
3762 : StringInfoData result;
5749 tgl 3763 ECB : ListCell *cell0;
3764 :
3765 : /* extract all column types used in the set of TupleDescs */
5749 tgl 3766 GIC 105 : foreach(cell0, tupdesc_list)
3767 : {
5624 bruce 3768 57 : TupleDesc tupdesc = (TupleDesc) lfirst(cell0);
3769 :
5749 tgl 3770 351 : for (i = 0; i < tupdesc->natts; i++)
3771 : {
2058 andres 3772 294 : Form_pg_attribute att = TupleDescAttr(tupdesc, i);
3773 :
2058 andres 3774 CBC 294 : if (att->attisdropped)
5852 peter_e 3775 GIC 12 : continue;
2058 andres 3776 282 : uniquetypes = list_append_unique_oid(uniquetypes, att->atttypid);
3777 : }
3778 : }
3779 :
3780 : /* add base types of domains */
5749 tgl 3781 321 : foreach(cell0, uniquetypes)
3782 : {
5624 bruce 3783 273 : Oid typid = lfirst_oid(cell0);
5624 bruce 3784 CBC 273 : Oid basetypid = getBaseType(typid);
5896 peter_e 3785 ECB :
5749 tgl 3786 GIC 273 : if (basetypid != typid)
5749 tgl 3787 CBC 12 : uniquetypes = list_append_unique_oid(uniquetypes, basetypid);
3788 : }
5896 peter_e 3789 ECB :
3790 : /* Convert to textual form */
5852 peter_e 3791 CBC 48 : initStringInfo(&result);
3792 :
5749 tgl 3793 GIC 321 : foreach(cell0, uniquetypes)
3794 : {
3795 273 : appendStringInfo(&result, "%s\n",
5749 tgl 3796 ECB : map_sql_type_to_xmlschema_type(lfirst_oid(cell0),
3797 : -1));
3798 : }
5896 peter_e 3799 :
5896 peter_e 3800 GIC 48 : return result.data;
3801 : }
5896 peter_e 3802 ECB :
3803 :
3804 : /*
4559 3805 : * Map an SQL data type to a named XML Schema data type; see
3806 : * SQL/XML:2008 sections 9.5 and 9.6.
5896 3807 : *
4559 3808 : * (The distinction between 9.5 and 9.6 is basically that 9.6 adds
3260 bruce 3809 : * a name attribute, which this function does. The name-less version
4559 peter_e 3810 : * 9.5 doesn't appear to be required anywhere.)
3811 : */
3812 : static const char *
5896 peter_e 3813 GIC 273 : map_sql_type_to_xmlschema_type(Oid typeoid, int typmod)
3814 : {
5896 peter_e 3815 ECB : StringInfoData result;
5896 peter_e 3816 CBC 273 : const char *typename = map_sql_type_to_xml_name(typeoid, typmod);
3817 :
5896 peter_e 3818 GIC 273 : initStringInfo(&result);
3819 :
5896 peter_e 3820 CBC 273 : if (typeoid == XMLOID)
3821 : {
3447 rhaas 3822 GIC 12 : appendStringInfoString(&result,
3823 : "<xsd:complexType mixed=\"true\">\n"
3824 : " <xsd:sequence>\n"
3260 bruce 3825 ECB : " <xsd:any name=\"element\" minOccurs=\"0\" maxOccurs=\"unbounded\" processContents=\"skip\"/>\n"
3826 : " </xsd:sequence>\n"
3827 : "</xsd:complexType>\n");
3828 : }
5896 peter_e 3829 : else
3830 : {
5896 peter_e 3831 CBC 261 : appendStringInfo(&result,
3832 : "<xsd:simpleType name=\"%s\">\n", typename);
3833 :
5624 bruce 3834 261 : switch (typeoid)
3835 : {
5896 peter_e 3836 GIC 69 : case BPCHAROID:
3837 : case VARCHAROID:
5896 peter_e 3838 ECB : case TEXTOID:
2063 peter_e 3839 GIC 69 : appendStringInfoString(&result,
3840 : " <xsd:restriction base=\"xsd:string\">\n");
5896 3841 69 : if (typmod != -1)
5896 peter_e 3842 UIC 0 : appendStringInfo(&result,
3843 : " <xsd:maxLength value=\"%d\"/>\n",
3844 : typmod - VARHDRSZ);
3447 rhaas 3845 GIC 69 : appendStringInfoString(&result, " </xsd:restriction>\n");
5896 peter_e 3846 69 : break;
5896 peter_e 3847 EUB :
5896 peter_e 3848 GIC 12 : case BYTEAOID:
3849 12 : appendStringInfo(&result,
3850 : " <xsd:restriction base=\"xsd:%s\">\n"
3851 : " </xsd:restriction>\n",
2118 tgl 3852 12 : xmlbinary == XMLBINARY_BASE64 ? "base64Binary" : "hexBinary");
5611 peter_e 3853 12 : break;
3854 :
5896 3855 12 : case NUMERICOID:
5896 peter_e 3856 GBC 12 : if (typmod != -1)
5896 peter_e 3857 UIC 0 : appendStringInfo(&result,
2118 tgl 3858 EUB : " <xsd:restriction base=\"xsd:decimal\">\n"
3859 : " <xsd:totalDigits value=\"%d\"/>\n"
3860 : " <xsd:fractionDigits value=\"%d\"/>\n"
3861 : " </xsd:restriction>\n",
5896 peter_e 3862 UBC 0 : ((typmod - VARHDRSZ) >> 16) & 0xffff,
5896 peter_e 3863 UIC 0 : (typmod - VARHDRSZ) & 0xffff);
5896 peter_e 3864 GIC 12 : break;
3865 :
3866 12 : case INT2OID:
5896 peter_e 3867 GBC 12 : appendStringInfo(&result,
3868 : " <xsd:restriction base=\"xsd:short\">\n"
5896 peter_e 3869 EUB : " <xsd:maxInclusive value=\"%d\"/>\n"
3870 : " <xsd:minInclusive value=\"%d\"/>\n"
3871 : " </xsd:restriction>\n",
3872 : SHRT_MAX, SHRT_MIN);
5896 peter_e 3873 GIC 12 : break;
5896 peter_e 3874 EUB :
5896 peter_e 3875 GBC 48 : case INT4OID:
3876 48 : appendStringInfo(&result,
5614 peter_e 3877 EUB : " <xsd:restriction base=\"xsd:int\">\n"
3878 : " <xsd:maxInclusive value=\"%d\"/>\n"
3879 : " <xsd:minInclusive value=\"%d\"/>\n"
3880 : " </xsd:restriction>\n",
3881 : INT_MAX, INT_MIN);
5896 peter_e 3882 GBC 48 : break;
3883 :
5896 peter_e 3884 GIC 12 : case INT8OID:
3885 12 : appendStringInfo(&result,
3886 : " <xsd:restriction base=\"xsd:long\">\n"
2118 tgl 3887 EUB : " <xsd:maxInclusive value=\"" INT64_FORMAT "\"/>\n"
3888 : " <xsd:minInclusive value=\"" INT64_FORMAT "\"/>\n"
5896 peter_e 3889 : " </xsd:restriction>\n",
3890 : PG_INT64_MAX,
3891 : PG_INT64_MIN);
5896 peter_e 3892 GBC 12 : break;
3893 :
5896 peter_e 3894 GIC 12 : case FLOAT4OID:
3447 rhaas 3895 12 : appendStringInfoString(&result,
2118 tgl 3896 EUB : " <xsd:restriction base=\"xsd:float\"></xsd:restriction>\n");
5896 peter_e 3897 GIC 12 : break;
3898 :
5896 peter_e 3899 UIC 0 : case FLOAT8OID:
3447 rhaas 3900 0 : appendStringInfoString(&result,
3901 : " <xsd:restriction base=\"xsd:double\"></xsd:restriction>\n");
5896 peter_e 3902 0 : break;
3903 :
5896 peter_e 3904 CBC 12 : case BOOLOID:
3447 rhaas 3905 GIC 12 : appendStringInfoString(&result,
3906 : " <xsd:restriction base=\"xsd:boolean\"></xsd:restriction>\n");
5896 peter_e 3907 12 : break;
5896 peter_e 3908 ECB :
5896 peter_e 3909 GIC 24 : case TIMEOID:
5896 peter_e 3910 ECB : case TIMETZOID:
3911 : {
387 tgl 3912 CBC 24 : const char *tz = (typeoid == TIMETZOID ? "(\\+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
5624 bruce 3913 ECB :
5624 bruce 3914 CBC 24 : if (typmod == -1)
5624 bruce 3915 GIC 24 : appendStringInfo(&result,
2118 tgl 3916 EUB : " <xsd:restriction base=\"xsd:time\">\n"
5624 bruce 3917 ECB : " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
3918 : " </xsd:restriction>\n", tz);
5624 bruce 3919 LBC 0 : else if (typmod == 0)
3920 0 : appendStringInfo(&result,
3921 : " <xsd:restriction base=\"xsd:time\">\n"
5624 bruce 3922 EUB : " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
5624 bruce 3923 ECB : " </xsd:restriction>\n", tz);
3924 : else
5624 bruce 3925 LBC 0 : appendStringInfo(&result,
2118 tgl 3926 ECB : " <xsd:restriction base=\"xsd:time\">\n"
3927 : " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
2118 tgl 3928 EUB : " </xsd:restriction>\n", typmod - VARHDRSZ, tz);
5624 bruce 3929 GBC 24 : break;
5624 bruce 3930 EUB : }
5896 peter_e 3931 ECB :
5896 peter_e 3932 CBC 24 : case TIMESTAMPOID:
5896 peter_e 3933 ECB : case TIMESTAMPTZOID:
5624 bruce 3934 : {
387 tgl 3935 CBC 24 : const char *tz = (typeoid == TIMESTAMPTZOID ? "(\\+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
5624 bruce 3936 ECB :
5624 bruce 3937 CBC 24 : if (typmod == -1)
3938 24 : appendStringInfo(&result,
2118 tgl 3939 ECB : " <xsd:restriction base=\"xsd:dateTime\">\n"
5624 bruce 3940 : " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
3941 : " </xsd:restriction>\n", tz);
5624 bruce 3942 LBC 0 : else if (typmod == 0)
3943 0 : appendStringInfo(&result,
2118 tgl 3944 EUB : " <xsd:restriction base=\"xsd:dateTime\">\n"
5624 bruce 3945 : " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
3946 : " </xsd:restriction>\n", tz);
5624 bruce 3947 ECB : else
5624 bruce 3948 LBC 0 : appendStringInfo(&result,
2118 tgl 3949 ECB : " <xsd:restriction base=\"xsd:dateTime\">\n"
5624 bruce 3950 : " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
2118 tgl 3951 : " </xsd:restriction>\n", typmod - VARHDRSZ, tz);
5624 bruce 3952 CBC 24 : break;
3953 : }
5896 peter_e 3954 EUB :
5896 peter_e 3955 CBC 12 : case DATEOID:
3447 rhaas 3956 12 : appendStringInfoString(&result,
2118 tgl 3957 ECB : " <xsd:restriction base=\"xsd:date\">\n"
3260 bruce 3958 : " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}\"/>\n"
3959 : " </xsd:restriction>\n");
5633 tgl 3960 GBC 12 : break;
5896 peter_e 3961 ECB :
5896 peter_e 3962 CBC 12 : default:
5851 tgl 3963 12 : if (get_typtype(typeoid) == TYPTYPE_DOMAIN)
5896 peter_e 3964 ECB : {
3965 : Oid base_typeoid;
5624 bruce 3966 GBC 12 : int32 base_typmod = -1;
5896 peter_e 3967 ECB :
5896 peter_e 3968 CBC 12 : base_typeoid = getBaseTypeAndTypmod(typeoid, &base_typmod);
5896 peter_e 3969 ECB :
5896 peter_e 3970 CBC 12 : appendStringInfo(&result,
3971 : " <xsd:restriction base=\"%s\"/>\n",
2118 tgl 3972 EUB : map_sql_type_to_xml_name(base_typeoid, base_typmod));
5896 peter_e 3973 ECB : }
5633 tgl 3974 CBC 12 : break;
5896 peter_e 3975 ECB : }
3447 rhaas 3976 CBC 261 : appendStringInfoString(&result, "</xsd:simpleType>\n");
5896 peter_e 3977 ECB : }
3978 :
5896 peter_e 3979 CBC 273 : return result.data;
5896 peter_e 3980 ECB : }
3981 :
3982 :
3983 : /*
3984 : * Map an SQL row to an XML element, taking the row from the active
3260 bruce 3985 : * SPI cursor. See also SQL/XML:2008 section 9.10.
5896 peter_e 3986 : */
5896 peter_e 3987 EUB : static void
2584 tgl 3988 CBC 156 : SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result, char *tablename,
3989 : bool nulls, bool tableforest,
5633 tgl 3990 ECB : const char *targetns, bool top_level)
5896 peter_e 3991 : {
3992 : int i;
3993 : char *xmltn;
3994 :
5896 peter_e 3995 GIC 156 : if (tablename)
5896 peter_e 3996 CBC 114 : xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
3997 : else
3998 : {
5896 peter_e 3999 GIC 42 : if (tableforest)
5896 peter_e 4000 CBC 18 : xmltn = "row";
4001 : else
5896 peter_e 4002 GIC 24 : xmltn = "table";
4003 : }
4004 :
4005 156 : if (tableforest)
5852 4006 81 : xmldata_root_element_start(result, xmltn, NULL, targetns, top_level);
4007 : else
5896 4008 75 : appendStringInfoString(result, "<row>\n");
5896 peter_e 4009 ECB :
5624 bruce 4010 GIC 636 : for (i = 1; i <= SPI_tuptable->tupdesc->natts; i++)
5896 peter_e 4011 ECB : {
4012 : char *colname;
4013 : Datum colval;
4014 : bool isnull;
4015 :
5633 tgl 4016 GIC 480 : colname = map_sql_identifier_to_xml_name(SPI_fname(SPI_tuptable->tupdesc, i),
5633 tgl 4017 ECB : true, false);
5633 tgl 4018 GIC 480 : colval = SPI_getbinval(SPI_tuptable->vals[rownum],
5633 tgl 4019 CBC 480 : SPI_tuptable->tupdesc,
4020 : i,
5633 tgl 4021 ECB : &isnull);
5896 peter_e 4022 GIC 480 : if (isnull)
5896 peter_e 4023 ECB : {
5896 peter_e 4024 GIC 57 : if (nulls)
5614 peter_e 4025 CBC 30 : appendStringInfo(result, " <%s xsi:nil=\"true\"/>\n", colname);
5896 peter_e 4026 ECB : }
4027 : else
5896 peter_e 4028 GIC 423 : appendStringInfo(result, " <%s>%s</%s>\n",
4029 : colname,
4030 : map_sql_value_to_xml_value(colval,
2118 tgl 4031 423 : SPI_gettypeid(SPI_tuptable->tupdesc, i), true),
5896 peter_e 4032 ECB : colname);
4033 : }
4034 :
5896 peter_e 4035 CBC 156 : if (tableforest)
4036 : {
5852 4037 81 : xmldata_root_element_end(result, xmltn);
4038 81 : appendStringInfoChar(result, '\n');
4039 : }
4040 : else
5896 peter_e 4041 GIC 75 : appendStringInfoString(result, "</row>\n\n");
5896 peter_e 4042 CBC 156 : }
4043 :
5862 bruce 4044 ECB :
4045 : /*
4046 : * XPath related functions
4047 : */
4048 :
4049 : #ifdef USE_LIBXML
4050 :
5624 4051 : /*
4052 : * Convert XML node to text.
4053 : *
4054 : * For attribute and text nodes, return the escaped text. For anything else,
4055 : * dump the whole subtree.
4056 : */
4057 : static text *
3015 peter_e 4058 GIC 96 : xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt)
4059 : {
1252 peter 4060 96 : xmltype *result = NULL;
4061 :
1494 alvherre 4062 96 : if (cur->type != XML_ATTRIBUTE_NODE && cur->type != XML_TEXT_NODE)
5862 bruce 4063 81 : {
1469 tgl 4064 CBC 81 : void (*volatile nodefree) (xmlNodePtr) = NULL;
1493 alvherre 4065 GIC 81 : volatile xmlBufferPtr buf = NULL;
4066 81 : volatile xmlNodePtr cur_copy = NULL;
5079 tgl 4067 ECB :
1493 alvherre 4068 GIC 81 : PG_TRY();
1493 alvherre 4069 ECB : {
4070 : int bytes;
3015 peter_e 4071 :
1493 alvherre 4072 GIC 81 : buf = xmlBufferCreate();
1493 alvherre 4073 CBC 81 : if (buf == NULL || xmlerrcxt->err_occurred)
1493 alvherre 4074 UIC 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4075 : "could not allocate xmlBuffer");
4076 :
4077 : /*
4078 : * Produce a dump of the node that we can serialize. xmlNodeDump
4079 : * does that, but the result of that function won't contain
4080 : * namespace definitions from ancestor nodes, so we first do a
4081 : * xmlCopyNode() which duplicates the node along with its required
1493 alvherre 4082 ECB : * namespace definitions.
4083 : *
4084 : * Some old libxml2 versions such as 2.7.6 produce partially
4085 : * broken XML_DOCUMENT_NODE nodes (unset content field) when
4086 : * copying them. xmlNodeDump of such a node works fine, but
4087 : * xmlFreeNode crashes; set us up to call xmlFreeDoc instead.
4088 : */
1493 alvherre 4089 GIC 81 : cur_copy = xmlCopyNode(cur, 1);
1493 alvherre 4090 CBC 81 : if (cur_copy == NULL || xmlerrcxt->err_occurred)
1493 alvherre 4091 UIC 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1493 alvherre 4092 ECB : "could not copy node");
1493 alvherre 4093 GBC 162 : nodefree = (cur_copy->type == XML_DOCUMENT_NODE) ?
1493 alvherre 4094 GIC 81 : (void (*) (xmlNodePtr)) xmlFreeDoc : xmlFreeNode;
4095 :
1447 tgl 4096 CBC 81 : bytes = xmlNodeDump(buf, NULL, cur_copy, 0, 0);
1493 alvherre 4097 81 : if (bytes == -1 || xmlerrcxt->err_occurred)
1493 alvherre 4098 UIC 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1493 alvherre 4099 ECB : "could not dump node");
3015 peter_e 4100 :
5079 tgl 4101 GIC 81 : result = xmlBuffer_to_xmltype(buf);
4102 : }
1249 peter 4103 LBC 0 : PG_FINALLY();
5079 tgl 4104 ECB : {
1493 alvherre 4105 GIC 81 : if (nodefree)
1493 alvherre 4106 CBC 81 : nodefree(cur_copy);
4107 81 : if (buf)
1493 alvherre 4108 GBC 81 : xmlBufferFree(buf);
4109 : }
5079 tgl 4110 GIC 81 : PG_END_TRY();
4111 : }
4112 : else
5862 bruce 4113 EUB : {
5079 tgl 4114 : xmlChar *str;
5079 tgl 4115 ECB :
5862 bruce 4116 GIC 15 : str = xmlXPathCastNodeToString(cur);
5079 tgl 4117 CBC 15 : PG_TRY();
5079 tgl 4118 ECB : {
4119 : /* Here we rely on XML having the same representation as TEXT */
3955 bruce 4120 GIC 15 : char *escaped = escape_xml((char *) str);
4121 :
4281 tgl 4122 15 : result = (xmltype *) cstring_to_text(escaped);
4123 15 : pfree(escaped);
5079 tgl 4124 ECB : }
1255 peter 4125 UIC 0 : PG_FINALLY();
5079 tgl 4126 ECB : {
5079 tgl 4127 CBC 15 : xmlFree(str);
4128 : }
5079 tgl 4129 GIC 15 : PG_END_TRY();
4130 : }
4131 :
5862 bruce 4132 96 : return result;
5862 bruce 4133 ECB : }
4134 :
4280 tgl 4135 : /*
4136 : * Convert an XML XPath object (the result of evaluating an XPath expression)
4137 : * to an array of xml values, which are appended to astate. The function
4138 : * result value is the number of elements in the array.
4139 : *
4140 : * If "astate" is NULL then we don't generate the array value, but we still
4141 : * return the number of elements it would have had.
4142 : *
4143 : * Nodesets are converted to an array containing the nodes' textual
4144 : * representations. Primitive values (float, double, string) are converted
4145 : * to a single-element array containing the value's string representation.
4146 : */
4147 : static int
4280 tgl 4148 CBC 270 : xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
4149 : ArrayBuildState *astate,
3015 peter_e 4150 EUB : PgXmlErrorContext *xmlerrcxt)
4280 tgl 4151 : {
4280 tgl 4152 GIC 270 : int result = 0;
4280 tgl 4153 EUB : Datum datum;
4154 : Oid datumtype;
4280 tgl 4155 ECB : char *result_str;
4156 :
4280 tgl 4157 GIC 270 : switch (xpathobj->type)
4280 tgl 4158 ECB : {
4280 tgl 4159 GIC 249 : case XPATH_NODESET:
4280 tgl 4160 CBC 249 : if (xpathobj->nodesetval != NULL)
4161 : {
4280 tgl 4162 GIC 177 : result = xpathobj->nodesetval->nodeNr;
4280 tgl 4163 CBC 177 : if (astate != NULL)
4164 : {
3955 bruce 4165 ECB : int i;
4280 tgl 4166 :
4280 tgl 4167 GIC 84 : for (i = 0; i < result; i++)
4168 : {
3015 peter_e 4169 45 : datum = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
2118 tgl 4170 EUB : xmlerrcxt));
3057 tgl 4171 GBC 45 : (void) accumArrayResult(astate, datum, false,
4172 : XMLOID, CurrentMemoryContext);
4173 : }
4174 : }
4175 : }
4280 4176 249 : return result;
4177 :
4280 tgl 4178 GIC 6 : case XPATH_BOOLEAN:
4179 6 : if (astate == NULL)
4280 tgl 4180 LBC 0 : return 1;
4280 tgl 4181 GIC 6 : datum = BoolGetDatum(xpathobj->boolval);
4182 6 : datumtype = BOOLOID;
4280 tgl 4183 CBC 6 : break;
4184 :
4280 tgl 4185 GIC 9 : case XPATH_NUMBER:
4280 tgl 4186 CBC 9 : if (astate == NULL)
4280 tgl 4187 GIC 6 : return 1;
4280 tgl 4188 CBC 3 : datum = Float8GetDatum(xpathobj->floatval);
4189 3 : datumtype = FLOAT8OID;
4280 tgl 4190 GIC 3 : break;
4191 :
4192 6 : case XPATH_STRING:
4280 tgl 4193 GBC 6 : if (astate == NULL)
4280 tgl 4194 UBC 0 : return 1;
4280 tgl 4195 GIC 6 : datum = CStringGetDatum((char *) xpathobj->stringval);
4196 6 : datumtype = CSTRINGOID;
4197 6 : break;
4198 :
4280 tgl 4199 UBC 0 : default:
4280 tgl 4200 UIC 0 : elog(ERROR, "xpath expression result type %d is unsupported",
4201 : xpathobj->type);
4202 : return 0; /* keep compiler quiet */
4280 tgl 4203 ECB : }
4204 :
4205 : /* Common code for scalar-value cases */
4280 tgl 4206 CBC 15 : result_str = map_sql_value_to_xml_value(datum, datumtype, true);
4207 15 : datum = PointerGetDatum(cstring_to_xmltype(result_str));
3057 tgl 4208 GIC 15 : (void) accumArrayResult(astate, datum, false,
4209 : XMLOID, CurrentMemoryContext);
4280 4210 15 : return 1;
4280 tgl 4211 ECB : }
4212 :
5802 peter_e 4213 :
5862 bruce 4214 : /*
4215 : * Common code for xpath() and xmlexists()
4216 : *
1335 michael 4217 : * Evaluate XPath expression and return number of nodes in res_nitems
4218 : * and array of XML values in astate. Either of those pointers can be
4280 tgl 4219 : * NULL if the corresponding result isn't wanted.
4220 : *
5130 andrew 4221 : * It is up to the user to ensure that the XML passed is in fact
4222 : * an XML document - XPath doesn't work easily on fragments without
4223 : * a context node being known.
4224 : */
4630 peter_e 4225 : static void
4630 peter_e 4226 GIC 279 : xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
3057 tgl 4227 ECB : int *res_nitems, ArrayBuildState *astate)
4228 : {
4229 : PgXmlErrorContext *xmlerrcxt;
4281 tgl 4230 CBC 279 : volatile xmlParserCtxtPtr ctxt = NULL;
4281 tgl 4231 GIC 279 : volatile xmlDocPtr doc = NULL;
4232 279 : volatile xmlXPathContextPtr xpathctx = NULL;
4233 279 : volatile xmlXPathCompExprPtr xpathcomp = NULL;
4234 279 : volatile xmlXPathObjectPtr xpathobj = NULL;
4235 : char *datastr;
4236 : int32 len;
4237 : int32 xpath_len;
4238 : xmlChar *string;
5624 bruce 4239 ECB : xmlChar *xpath_expr;
1975 noah 4240 GIC 279 : size_t xmldecl_len = 0;
4241 : int i;
4242 : int ndim;
4243 : Datum *ns_names_uris;
4244 : bool *ns_names_uris_nulls;
4245 : int ns_count;
5802 peter_e 4246 ECB :
4247 : /*
4248 : * Namespace mappings are passed as text[]. If an empty array is passed
4249 : * (ndim = 0, "0-dimensional"), then there are no namespace mappings.
5624 bruce 4250 : * Else, a 2-dimensional array with length of the second axis being equal
4251 : * to 2 should be passed, i.e., every subarray contains 2 elements, the
4252 : * first element defining the name, the second one the URI. Example:
4253 : * ARRAY[ARRAY['myns', 'http://example.com'], ARRAY['myns2',
4254 : * 'http://example2.com']].
4255 : */
4630 peter_e 4256 CBC 279 : ndim = namespaces ? ARR_NDIM(namespaces) : 0;
5802 4257 279 : if (ndim != 0)
4258 : {
5802 peter_e 4259 ECB : int *dims;
4260 :
5862 bruce 4261 CBC 63 : dims = ARR_DIMS(namespaces);
4262 :
5802 peter_e 4263 GIC 63 : if (ndim != 2 || dims[1] != 2)
5633 tgl 4264 UIC 0 : ereport(ERROR,
4265 : (errcode(ERRCODE_DATA_EXCEPTION),
4266 : errmsg("invalid array for XML namespace mapping"),
5633 tgl 4267 ECB : errdetail("The array must be two-dimensional with length of the second axis equal to 2.")));
4268 :
5862 bruce 4269 CBC 63 : Assert(ARR_ELEMTYPE(namespaces) == TEXTOID);
5802 peter_e 4270 ECB :
282 peter 4271 GNC 63 : deconstruct_array_builtin(namespaces, TEXTOID,
4272 : &ns_names_uris, &ns_names_uris_nulls,
4273 : &ns_count);
4274 :
5624 bruce 4275 CBC 63 : Assert((ns_count % 2) == 0); /* checked above */
5633 tgl 4276 63 : ns_count /= 2; /* count pairs only */
4277 : }
4278 : else
5802 peter_e 4279 ECB : {
5633 tgl 4280 GIC 216 : ns_names_uris = NULL;
4281 216 : ns_names_uris_nulls = NULL;
5802 peter_e 4282 CBC 216 : ns_count = 0;
4283 : }
4284 :
5633 tgl 4285 GIC 279 : datastr = VARDATA(data);
5862 bruce 4286 CBC 279 : len = VARSIZE(data) - VARHDRSZ;
2219 noah 4287 GIC 279 : xpath_len = VARSIZE_ANY_EXHDR(xpath_expr_text);
5862 bruce 4288 CBC 279 : if (xpath_len == 0)
5633 tgl 4289 3 : ereport(ERROR,
4290 : (errcode(ERRCODE_DATA_EXCEPTION),
4291 : errmsg("empty XPath expression")));
5802 peter_e 4292 ECB :
2223 alvherre 4293 CBC 276 : string = pg_xmlCharStrndup(datastr, len);
2219 noah 4294 GIC 276 : xpath_expr = pg_xmlCharStrndup(VARDATA_ANY(xpath_expr_text), xpath_len);
4295 :
4296 : /*
4297 : * In a UTF8 database, skip any xml declaration, which might assert
4298 : * another encoding. Ignore parse_xml_decl() failure, letting
4299 : * xmlCtxtReadMemory() report parse errors. Documentation disclaims
4300 : * xpath() support for non-ASCII data in non-UTF8 databases, so leave
4301 : * those scenarios bug-compatible with historical behavior.
4302 : */
1975 4303 276 : if (GetDatabaseEncoding() == PG_UTF8)
4304 276 : parse_xml_decl(string, &xmldecl_len, NULL, NULL, NULL);
4305 :
4281 tgl 4306 276 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
4307 :
5079 4308 276 : PG_TRY();
5079 tgl 4309 ECB : {
4281 tgl 4310 GIC 276 : xmlInitParser();
4281 tgl 4311 ECB :
4312 : /*
5050 bruce 4313 : * redundant XML parsing (two parsings for the same value during one
4314 : * command execution are possible)
4315 : */
5050 bruce 4316 CBC 276 : ctxt = xmlNewParserCtxt();
4281 tgl 4317 276 : if (ctxt == NULL || xmlerrcxt->err_occurred)
4281 tgl 4318 UIC 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
5050 bruce 4319 ECB : "could not allocate parser context");
1975 noah 4320 GIC 552 : doc = xmlCtxtReadMemory(ctxt, (char *) string + xmldecl_len,
4321 276 : len - xmldecl_len, NULL, NULL, 0);
4281 tgl 4322 276 : if (doc == NULL || xmlerrcxt->err_occurred)
4281 tgl 4323 CBC 6 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
5050 bruce 4324 ECB : "could not parse XML document");
5050 bruce 4325 GBC 270 : xpathctx = xmlXPathNewContext(doc);
4281 tgl 4326 GIC 270 : if (xpathctx == NULL || xmlerrcxt->err_occurred)
4281 tgl 4327 UIC 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4328 : "could not allocate XPath context");
1753 alvherre 4329 GIC 270 : xpathctx->node = (xmlNodePtr) doc;
4330 :
4331 : /* register namespaces, if any */
5050 bruce 4332 270 : if (ns_count > 0)
4333 : {
4334 126 : for (i = 0; i < ns_count; i++)
4335 : {
4336 : char *ns_name;
4337 : char *ns_uri;
4338 :
4339 63 : if (ns_names_uris_nulls[i * 2] ||
5050 bruce 4340 CBC 63 : ns_names_uris_nulls[i * 2 + 1])
5050 bruce 4341 LBC 0 : ereport(ERROR,
5050 bruce 4342 EUB : (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
4343 : errmsg("neither namespace name nor URI may be null")));
5050 bruce 4344 CBC 63 : ns_name = TextDatumGetCString(ns_names_uris[i * 2]);
4345 63 : ns_uri = TextDatumGetCString(ns_names_uris[i * 2 + 1]);
5050 bruce 4346 GIC 63 : if (xmlXPathRegisterNs(xpathctx,
5050 bruce 4347 ECB : (xmlChar *) ns_name,
4348 : (xmlChar *) ns_uri) != 0)
2118 tgl 4349 UBC 0 : ereport(ERROR, /* is this an internal error??? */
4350 : (errmsg("could not register XML namespace with name \"%s\" and URI \"%s\"",
4351 : ns_name, ns_uri)));
5050 bruce 4352 ECB : }
4353 : }
5802 peter_e 4354 EUB :
5050 bruce 4355 GIC 270 : xpathcomp = xmlXPathCompile(xpath_expr);
4281 tgl 4356 CBC 270 : if (xpathcomp == NULL || xmlerrcxt->err_occurred)
4281 tgl 4357 LBC 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
5050 bruce 4358 ECB : "invalid XPath expression");
5657 tgl 4359 :
4360 : /*
4630 peter_e 4361 : * Version 2.6.27 introduces a function named
4362 : * xmlXPathCompiledEvalToBoolean, which would be enough for xmlexists,
4363 : * but we can derive the existence by whether any nodes are returned,
4364 : * thereby preventing a library version upgrade and keeping the code
4365 : * the same.
4366 : */
5050 bruce 4367 CBC 270 : xpathobj = xmlXPathCompiledEval(xpathcomp, xpathctx);
4281 tgl 4368 270 : if (xpathobj == NULL || xmlerrcxt->err_occurred)
4281 tgl 4369 UIC 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4370 : "could not create XPath object");
5624 bruce 4371 ECB :
4372 : /*
4280 tgl 4373 : * Extract the results as requested.
4374 : */
4280 tgl 4375 GIC 270 : if (res_nitems != NULL)
3015 peter_e 4376 GBC 216 : *res_nitems = xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
4377 : else
3015 peter_e 4378 CBC 54 : (void) xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
4379 : }
5079 tgl 4380 6 : PG_CATCH();
4381 : {
5079 tgl 4382 GIC 6 : if (xpathobj)
5079 tgl 4383 LBC 0 : xmlXPathFreeObject(xpathobj);
5079 tgl 4384 GIC 6 : if (xpathcomp)
5079 tgl 4385 UIC 0 : xmlXPathFreeCompExpr(xpathcomp);
5079 tgl 4386 GIC 6 : if (xpathctx)
5079 tgl 4387 UIC 0 : xmlXPathFreeContext(xpathctx);
5079 tgl 4388 GIC 6 : if (doc)
4389 6 : xmlFreeDoc(doc);
4390 6 : if (ctxt)
4391 6 : xmlFreeParserCtxt(ctxt);
4392 :
4281 4393 6 : pg_xml_done(xmlerrcxt, true);
4394 :
5079 4395 6 : PG_RE_THROW();
4396 : }
4397 270 : PG_END_TRY();
4398 :
5563 tgl 4399 CBC 270 : xmlXPathFreeObject(xpathobj);
5079 tgl 4400 GIC 270 : xmlXPathFreeCompExpr(xpathcomp);
5563 4401 270 : xmlXPathFreeContext(xpathctx);
4402 270 : xmlFreeDoc(doc);
5563 tgl 4403 CBC 270 : xmlFreeParserCtxt(ctxt);
4404 :
4281 tgl 4405 GIC 270 : pg_xml_done(xmlerrcxt, false);
4630 peter_e 4406 270 : }
4407 : #endif /* USE_LIBXML */
4630 peter_e 4408 ECB :
4409 : /*
4410 : * Evaluate XPath expression and return array of XML values.
4411 : *
4412 : * As we have no support of XQuery sequences yet, this function seems
4413 : * to be the most useful one (array of XML functions plays a role of
4414 : * some kind of substitution for XQuery sequences).
4415 : */
4416 : Datum
4630 peter_e 4417 GIC 63 : xpath(PG_FUNCTION_ARGS)
4630 peter_e 4418 ECB : {
4419 : #ifdef USE_LIBXML
2219 noah 4420 CBC 63 : text *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4630 peter_e 4421 GIC 63 : xmltype *data = PG_GETARG_XML_P(1);
4630 peter_e 4422 CBC 63 : ArrayType *namespaces = PG_GETARG_ARRAYTYPE_P(2);
4423 : ArrayBuildState *astate;
4424 :
2969 jdavis 4425 GIC 63 : astate = initArrayResult(XMLOID, CurrentMemoryContext, true);
4630 peter_e 4426 63 : xpath_internal(xpath_expr_text, data, namespaces,
3057 tgl 4427 ECB : NULL, astate);
224 peter 4428 GNC 54 : PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
5862 bruce 4429 ECB : #else
4430 : NO_XML_SUPPORT();
5862 bruce 4431 EUB : return 0;
5862 bruce 4432 ECB : #endif
4433 : }
4630 peter_e 4434 :
4435 : /*
4436 : * Determines if the node specified by the supplied XPath exists
4437 : * in a given XML document, returning a boolean.
4438 : */
4382 bruce 4439 : Datum
4382 bruce 4440 CBC 99 : xmlexists(PG_FUNCTION_ARGS)
4630 peter_e 4441 ECB : {
4442 : #ifdef USE_LIBXML
2219 noah 4443 CBC 99 : text *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4630 peter_e 4444 99 : xmltype *data = PG_GETARG_XML_P(1);
4630 peter_e 4445 EUB : int res_nitems;
4630 peter_e 4446 ECB :
4630 peter_e 4447 CBC 99 : xpath_internal(xpath_expr_text, data, NULL,
4630 peter_e 4448 ECB : &res_nitems, NULL);
4449 :
4630 peter_e 4450 GBC 99 : PG_RETURN_BOOL(res_nitems > 0);
4630 peter_e 4451 EUB : #else
4452 : NO_XML_SUPPORT();
4453 : return 0;
4454 : #endif
4455 : }
4456 :
4627 tgl 4457 ECB : /*
4458 : * Determines if the node specified by the supplied XPath exists
4459 : * in a given XML document, returning a boolean. Differs from
4460 : * xmlexists as it supports namespaces and is not defined in SQL/XML.
4461 : */
4462 : Datum
4627 tgl 4463 GIC 117 : xpath_exists(PG_FUNCTION_ARGS)
4464 : {
4465 : #ifdef USE_LIBXML
2219 noah 4466 117 : text *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4627 tgl 4467 117 : xmltype *data = PG_GETARG_XML_P(1);
4468 117 : ArrayType *namespaces = PG_GETARG_ARRAYTYPE_P(2);
4469 : int res_nitems;
4470 :
4471 117 : xpath_internal(xpath_expr_text, data, namespaces,
4472 : &res_nitems, NULL);
4473 :
4474 117 : PG_RETURN_BOOL(res_nitems > 0);
4475 : #else
4476 : NO_XML_SUPPORT();
4627 tgl 4477 ECB : return 0;
4478 : #endif
4479 : }
4480 :
4622 4481 : /*
4482 : * Functions for checking well-formed-ness
4483 : */
4484 :
4485 : #ifdef USE_LIBXML
4486 : static bool
4622 tgl 4487 GIC 57 : wellformed_xml(text *data, XmlOptionType xmloption_arg)
4488 : {
4489 : xmlDocPtr doc;
114 tgl 4490 GNC 57 : ErrorSaveContext escontext = {T_ErrorSaveContext};
4491 :
4492 : /*
4493 : * We'll report "true" if no soft error is reported by xml_parse().
4494 : */
4495 57 : doc = xml_parse(data, xmloption_arg, true,
4496 : GetDatabaseEncoding(), NULL, NULL, (Node *) &escontext);
4622 tgl 4497 GIC 57 : if (doc)
4498 30 : xmlFreeDoc(doc);
4622 tgl 4499 ECB :
114 tgl 4500 GNC 57 : return !escontext.error_occurred;
4501 : }
4502 : #endif
4503 :
4622 tgl 4504 ECB : Datum
4622 tgl 4505 GIC 45 : xml_is_well_formed(PG_FUNCTION_ARGS)
4622 tgl 4506 ECB : {
4622 tgl 4507 EUB : #ifdef USE_LIBXML
2219 noah 4508 GIC 45 : text *data = PG_GETARG_TEXT_PP(0);
4509 :
4622 tgl 4510 45 : PG_RETURN_BOOL(wellformed_xml(data, xmloption));
4511 : #else
4622 tgl 4512 ECB : NO_XML_SUPPORT();
4513 : return 0;
2118 4514 : #endif /* not USE_LIBXML */
4515 : }
4516 :
4517 : Datum
4622 tgl 4518 CBC 6 : xml_is_well_formed_document(PG_FUNCTION_ARGS)
4622 tgl 4519 ECB : {
4520 : #ifdef USE_LIBXML
2219 noah 4521 GIC 6 : text *data = PG_GETARG_TEXT_PP(0);
4522 :
4622 tgl 4523 CBC 6 : PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_DOCUMENT));
4622 tgl 4524 ECB : #else
4525 : NO_XML_SUPPORT();
4526 : return 0;
4527 : #endif /* not USE_LIBXML */
4528 : }
4529 :
4530 : Datum
4622 tgl 4531 CBC 6 : xml_is_well_formed_content(PG_FUNCTION_ARGS)
4622 tgl 4532 ECB : {
4533 : #ifdef USE_LIBXML
2219 noah 4534 GIC 6 : text *data = PG_GETARG_TEXT_PP(0);
4535 :
4622 tgl 4536 CBC 6 : PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_CONTENT));
4622 tgl 4537 ECB : #else
4538 : NO_XML_SUPPORT();
4539 : return 0;
4540 : #endif /* not USE_LIBXML */
4541 : }
4542 :
4543 : /*
4544 : * support functions for XMLTABLE
4545 : *
2223 alvherre 4546 : */
4547 : #ifdef USE_LIBXML
4548 :
4549 : /*
4550 : * Returns private data from executor state. Ensure validity by check with
4551 : * MAGIC number.
4552 : */
4553 : static inline XmlTableBuilderData *
2223 alvherre 4554 GIC 77370 : GetXmlTableBuilderPrivateData(TableFuncScanState *state, const char *fname)
4555 : {
4556 : XmlTableBuilderData *result;
4557 :
4558 77370 : if (!IsA(state, TableFuncScanState))
2223 alvherre 4559 LBC 0 : elog(ERROR, "%s called with invalid TableFuncScanState", fname);
2223 alvherre 4560 CBC 77370 : result = (XmlTableBuilderData *) state->opaque;
2223 alvherre 4561 GBC 77370 : if (result->magic != XMLTABLE_CONTEXT_MAGIC)
2223 alvherre 4562 UIC 0 : elog(ERROR, "%s called with invalid TableFuncScanState", fname);
2223 alvherre 4563 ECB :
2223 alvherre 4564 CBC 77370 : return result;
2223 alvherre 4565 ECB : }
4566 : #endif
4567 :
4568 : /*
4569 : * XmlTableInitOpaque
2223 alvherre 4570 EUB : * Fill in TableFuncScanState->opaque for XmlTable processor; initialize
4571 : * the XML parser.
2223 alvherre 4572 ECB : *
4573 : * Note: Because we call pg_xml_init() here and pg_xml_done() in
4574 : * XmlTableDestroyOpaque, it is critical for robustness that no other
4575 : * executor nodes run until this node is processed to completion. Caller
4576 : * must execute this to completion (probably filling a tuplestore to exhaust
4577 : * this node in a single pass) instead of using row-per-call mode.
4578 : */
4579 : static void
2223 alvherre 4580 GIC 132 : XmlTableInitOpaque(TableFuncScanState *state, int natts)
4581 : {
2223 alvherre 4582 ECB : #ifdef USE_LIBXML
2223 alvherre 4583 CBC 132 : volatile xmlParserCtxtPtr ctxt = NULL;
2223 alvherre 4584 EUB : XmlTableBuilderData *xtCxt;
4585 : PgXmlErrorContext *xmlerrcxt;
4586 :
2223 alvherre 4587 CBC 132 : xtCxt = palloc0(sizeof(XmlTableBuilderData));
4588 132 : xtCxt->magic = XMLTABLE_CONTEXT_MAGIC;
4589 132 : xtCxt->natts = natts;
2223 alvherre 4590 GIC 132 : xtCxt->xpathscomp = palloc0(sizeof(xmlXPathCompExprPtr) * natts);
4591 :
2223 alvherre 4592 GBC 132 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
4593 :
2223 alvherre 4594 GIC 132 : PG_TRY();
4595 : {
4596 132 : xmlInitParser();
4597 :
2223 alvherre 4598 CBC 132 : ctxt = xmlNewParserCtxt();
4599 132 : if (ctxt == NULL || xmlerrcxt->err_occurred)
2223 alvherre 4600 UBC 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4601 : "could not allocate parser context");
4602 : }
2223 alvherre 4603 UIC 0 : PG_CATCH();
4604 : {
4605 0 : if (ctxt != NULL)
4606 0 : xmlFreeParserCtxt(ctxt);
4607 :
4608 0 : pg_xml_done(xmlerrcxt, true);
4609 :
2223 alvherre 4610 LBC 0 : PG_RE_THROW();
2223 alvherre 4611 ECB : }
2223 alvherre 4612 GBC 132 : PG_END_TRY();
4613 :
2223 alvherre 4614 GIC 132 : xtCxt->xmlerrcxt = xmlerrcxt;
4615 132 : xtCxt->ctxt = ctxt;
4616 :
4617 132 : state->opaque = xtCxt;
2223 alvherre 4618 ECB : #else
4619 : NO_XML_SUPPORT();
4620 : #endif /* not USE_LIBXML */
2223 alvherre 4621 CBC 132 : }
4622 :
2223 alvherre 4623 ECB : /*
4624 : * XmlTableSetDocument
4625 : * Install the input document
2223 alvherre 4626 EUB : */
2223 alvherre 4627 ECB : static void
2223 alvherre 4628 GBC 132 : XmlTableSetDocument(TableFuncScanState *state, Datum value)
2223 alvherre 4629 ECB : {
2223 alvherre 4630 EUB : #ifdef USE_LIBXML
2223 alvherre 4631 ECB : XmlTableBuilderData *xtCxt;
2223 alvherre 4632 CBC 132 : xmltype *xmlval = DatumGetXmlP(value);
2223 alvherre 4633 ECB : char *str;
4634 : xmlChar *xstr;
4635 : int length;
2223 alvherre 4636 CBC 132 : volatile xmlDocPtr doc = NULL;
2223 alvherre 4637 GIC 132 : volatile xmlXPathContextPtr xpathcxt = NULL;
2223 alvherre 4638 ECB :
2223 alvherre 4639 GIC 132 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetDocument");
2223 alvherre 4640 ECB :
4641 : /*
4642 : * Use out function for casting to string (remove encoding property). See
4643 : * comment in xml_out.
4644 : */
2223 alvherre 4645 CBC 132 : str = xml_out_internal(xmlval, 0);
2223 alvherre 4646 ECB :
2223 alvherre 4647 GIC 132 : length = strlen(str);
2223 alvherre 4648 CBC 132 : xstr = pg_xmlCharStrndup(str, length);
2223 alvherre 4649 ECB :
2223 alvherre 4650 GIC 132 : PG_TRY();
4651 : {
4652 132 : doc = xmlCtxtReadMemory(xtCxt->ctxt, (char *) xstr, length, NULL, NULL, 0);
4653 132 : if (doc == NULL || xtCxt->xmlerrcxt->err_occurred)
2223 alvherre 4654 UIC 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
4655 : "could not parse XML document");
2223 alvherre 4656 GIC 132 : xpathcxt = xmlXPathNewContext(doc);
4657 132 : if (xpathcxt == NULL || xtCxt->xmlerrcxt->err_occurred)
2223 alvherre 4658 UIC 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4659 : "could not allocate XPath context");
1753 alvherre 4660 CBC 132 : xpathcxt->node = (xmlNodePtr) doc;
4661 : }
2223 alvherre 4662 UIC 0 : PG_CATCH();
2223 alvherre 4663 ECB : {
2223 alvherre 4664 LBC 0 : if (xpathcxt != NULL)
4665 0 : xmlXPathFreeContext(xpathcxt);
2223 alvherre 4666 UIC 0 : if (doc != NULL)
4667 0 : xmlFreeDoc(doc);
2223 alvherre 4668 ECB :
2223 alvherre 4669 LBC 0 : PG_RE_THROW();
4670 : }
2223 alvherre 4671 CBC 132 : PG_END_TRY();
4672 :
2223 alvherre 4673 GIC 132 : xtCxt->doc = doc;
4674 132 : xtCxt->xpathcxt = xpathcxt;
4675 : #else
4676 : NO_XML_SUPPORT();
4677 : #endif /* not USE_LIBXML */
4678 132 : }
4679 :
4680 : /*
4681 : * XmlTableSetNamespace
4682 : * Add a namespace declaration
2223 alvherre 4683 ECB : */
4684 : static void
1986 peter_e 4685 GIC 9 : XmlTableSetNamespace(TableFuncScanState *state, const char *name, const char *uri)
2223 alvherre 4686 ECB : {
4687 : #ifdef USE_LIBXML
4688 : XmlTableBuilderData *xtCxt;
4689 :
2223 alvherre 4690 CBC 9 : if (name == NULL)
2223 alvherre 4691 GIC 3 : ereport(ERROR,
4692 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2223 alvherre 4693 ECB : errmsg("DEFAULT namespace is not supported")));
2223 alvherre 4694 GIC 6 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetNamespace");
4695 :
4696 6 : if (xmlXPathRegisterNs(xtCxt->xpathcxt,
4697 6 : pg_xmlCharStrndup(name, strlen(name)),
4698 6 : pg_xmlCharStrndup(uri, strlen(uri))))
2223 alvherre 4699 UIC 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_DATA_EXCEPTION,
4700 : "could not set XML namespace");
4701 : #else
4702 : NO_XML_SUPPORT();
4703 : #endif /* not USE_LIBXML */
2223 alvherre 4704 GIC 6 : }
4705 :
2223 alvherre 4706 ECB : /*
4707 : * XmlTableSetRowFilter
4708 : * Install the row-filter Xpath expression.
4709 : */
4710 : static void
1986 peter_e 4711 CBC 129 : XmlTableSetRowFilter(TableFuncScanState *state, const char *path)
4712 : {
4713 : #ifdef USE_LIBXML
2223 alvherre 4714 ECB : XmlTableBuilderData *xtCxt;
4715 : xmlChar *xstr;
4716 :
2223 alvherre 4717 CBC 129 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetRowFilter");
4718 :
2223 alvherre 4719 GIC 129 : if (*path == '\0')
2223 alvherre 4720 UIC 0 : ereport(ERROR,
4721 : (errcode(ERRCODE_DATA_EXCEPTION),
4722 : errmsg("row path filter must not be empty string")));
4723 :
2223 alvherre 4724 GIC 129 : xstr = pg_xmlCharStrndup(path, strlen(path));
4725 :
4726 129 : xtCxt->xpathcomp = xmlXPathCompile(xstr);
4727 129 : if (xtCxt->xpathcomp == NULL || xtCxt->xmlerrcxt->err_occurred)
2223 alvherre 4728 UIC 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_SYNTAX_ERROR,
4729 : "invalid XPath expression");
2223 alvherre 4730 ECB : #else
4731 : NO_XML_SUPPORT();
4732 : #endif /* not USE_LIBXML */
2223 alvherre 4733 CBC 129 : }
4734 :
4735 : /*
4736 : * XmlTableSetColumnFilter
4737 : * Install the column-filter Xpath expression, for the given column.
2223 alvherre 4738 ECB : */
4739 : static void
1986 peter_e 4740 CBC 387 : XmlTableSetColumnFilter(TableFuncScanState *state, const char *path, int colnum)
2223 alvherre 4741 ECB : {
4742 : #ifdef USE_LIBXML
4743 : XmlTableBuilderData *xtCxt;
4744 : xmlChar *xstr;
4745 :
163 peter 4746 GNC 387 : Assert(PointerIsValid(path));
4747 :
2223 alvherre 4748 CBC 387 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetColumnFilter");
4749 :
2223 alvherre 4750 GIC 387 : if (*path == '\0')
2223 alvherre 4751 LBC 0 : ereport(ERROR,
4752 : (errcode(ERRCODE_DATA_EXCEPTION),
2223 alvherre 4753 ECB : errmsg("column path filter must not be empty string")));
4754 :
2223 alvherre 4755 GIC 387 : xstr = pg_xmlCharStrndup(path, strlen(path));
4756 :
4757 387 : xtCxt->xpathscomp[colnum] = xmlXPathCompile(xstr);
4758 387 : if (xtCxt->xpathscomp[colnum] == NULL || xtCxt->xmlerrcxt->err_occurred)
2223 alvherre 4759 UIC 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_DATA_EXCEPTION,
4760 : "invalid XPath expression");
2223 alvherre 4761 ECB : #else
4762 : NO_XML_SUPPORT();
4763 : #endif /* not USE_LIBXML */
2223 alvherre 4764 CBC 387 : }
4765 :
2223 alvherre 4766 ECB : /*
4767 : * XmlTableFetchRow
4768 : * Prepare the next "current" tuple for upcoming GetValue calls.
4769 : * Returns false if the row-filter expression returned no more rows.
4770 : */
4771 : static bool
2223 alvherre 4772 GIC 11121 : XmlTableFetchRow(TableFuncScanState *state)
4773 : {
2223 alvherre 4774 ECB : #ifdef USE_LIBXML
4775 : XmlTableBuilderData *xtCxt;
4776 :
2223 alvherre 4777 CBC 11121 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableFetchRow");
4778 :
825 michael 4779 ECB : /* Propagate our own error context to libxml2 */
2223 alvherre 4780 GIC 11121 : xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
4781 :
4782 11121 : if (xtCxt->xpathobj == NULL)
4783 : {
4784 129 : xtCxt->xpathobj = xmlXPathCompiledEval(xtCxt->xpathcomp, xtCxt->xpathcxt);
4785 129 : if (xtCxt->xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred)
2223 alvherre 4786 UIC 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4787 : "could not create XPath object");
4788 :
2223 alvherre 4789 GIC 129 : xtCxt->row_count = 0;
4790 : }
4791 :
4792 11121 : if (xtCxt->xpathobj->type == XPATH_NODESET)
4793 : {
4794 11121 : if (xtCxt->xpathobj->nodesetval != NULL)
4795 : {
4796 11121 : if (xtCxt->row_count++ < xtCxt->xpathobj->nodesetval->nodeNr)
2223 alvherre 4797 CBC 10998 : return true;
4798 : }
4799 : }
4800 :
4801 123 : return false;
2223 alvherre 4802 EUB : #else
2223 alvherre 4803 ECB : NO_XML_SUPPORT();
4804 : return false;
2118 tgl 4805 EUB : #endif /* not USE_LIBXML */
4806 : }
2223 alvherre 4807 ECB :
4808 : /*
4809 : * XmlTableGetValue
4810 : * Return the value for column number 'colnum' for the current row. If
4811 : * column -1 is requested, return representation of the whole row.
4812 : *
4813 : * This leaks memory, so be sure to reset often the context in which it's
4814 : * called.
4815 : */
4816 : static Datum
2223 alvherre 4817 GIC 65463 : XmlTableGetValue(TableFuncScanState *state, int colnum,
4818 : Oid typid, int32 typmod, bool *isnull)
4819 : {
4820 : #ifdef USE_LIBXML
4821 : XmlTableBuilderData *xtCxt;
4822 65463 : Datum result = (Datum) 0;
2223 alvherre 4823 ECB : xmlNodePtr cur;
2223 alvherre 4824 GIC 65463 : char *cstr = NULL;
4825 65463 : volatile xmlXPathObjectPtr xpathobj = NULL;
2223 alvherre 4826 ECB :
2223 alvherre 4827 GIC 65463 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableGetValue");
4828 :
4829 65463 : Assert(xtCxt->xpathobj &&
2223 alvherre 4830 ECB : xtCxt->xpathobj->type == XPATH_NODESET &&
4831 : xtCxt->xpathobj->nodesetval != NULL);
4832 :
825 michael 4833 : /* Propagate our own error context to libxml2 */
2223 alvherre 4834 GIC 65463 : xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
2223 alvherre 4835 ECB :
2223 alvherre 4836 GIC 65463 : *isnull = false;
2223 alvherre 4837 ECB :
2223 alvherre 4838 GIC 65463 : cur = xtCxt->xpathobj->nodesetval->nodeTab[xtCxt->row_count - 1];
2223 alvherre 4839 ECB :
2223 alvherre 4840 GIC 65463 : Assert(xtCxt->xpathscomp[colnum] != NULL);
2223 alvherre 4841 ECB :
2223 alvherre 4842 CBC 65463 : PG_TRY();
2223 alvherre 4843 EUB : {
4844 : /* Set current node as entry point for XPath evaluation */
2223 alvherre 4845 GIC 65463 : xtCxt->xpathcxt->node = cur;
2223 alvherre 4846 EUB :
4847 : /* Evaluate column path */
2223 alvherre 4848 GBC 65463 : xpathobj = xmlXPathCompiledEval(xtCxt->xpathscomp[colnum], xtCxt->xpathcxt);
4849 65463 : if (xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred)
2223 alvherre 4850 UIC 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
2223 alvherre 4851 EUB : "could not create XPath object");
4852 :
4853 : /*
4854 : * There are four possible cases, depending on the number of nodes
2223 alvherre 4855 ECB : * returned by the XPath expression and the type of the target column:
4856 : * a) XPath returns no nodes. b) The target type is XML (return all
1494 4857 : * as XML). For non-XML return types: c) One node (return content).
4858 : * d) Multiple nodes (error).
4859 : */
2223 alvherre 4860 CBC 65463 : if (xpathobj->type == XPATH_NODESET)
4861 : {
2223 alvherre 4862 GIC 65448 : int count = 0;
4863 :
2223 alvherre 4864 CBC 65448 : if (xpathobj->nodesetval != NULL)
2223 alvherre 4865 GIC 65343 : count = xpathobj->nodesetval->nodeNr;
4866 :
4867 65448 : if (xpathobj->nodesetval == NULL || count == 0)
4868 : {
4869 11122 : *isnull = true;
4870 : }
1494 alvherre 4871 ECB : else
4872 : {
1494 alvherre 4873 GIC 54326 : if (typid == XMLOID)
4874 : {
1494 alvherre 4875 ECB : text *textstr;
4876 : StringInfoData str;
4877 :
4878 : /* Concatenate serialized values */
1494 alvherre 4879 CBC 36 : initStringInfo(&str);
4880 87 : for (int i = 0; i < count; i++)
4881 : {
1494 alvherre 4882 ECB : textstr =
1494 alvherre 4883 GIC 51 : xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
4884 : xtCxt->xmlerrcxt);
4885 :
4886 51 : appendStringInfoText(&str, textstr);
4887 : }
1494 alvherre 4888 CBC 36 : cstr = str.data;
4889 : }
2223 alvherre 4890 ECB : else
4891 : {
4892 : xmlChar *str;
1494 4893 :
1494 alvherre 4894 GIC 54290 : if (count > 1)
1494 alvherre 4895 CBC 3 : ereport(ERROR,
1494 alvherre 4896 ECB : (errcode(ERRCODE_CARDINALITY_VIOLATION),
1494 alvherre 4897 EUB : errmsg("more than one value returned by column XPath expression")));
4898 :
1494 alvherre 4899 CBC 54287 : str = xmlXPathCastNodeSetToString(xpathobj->nodesetval);
4900 54287 : cstr = str ? xml_pstrdup_and_free(str) : "";
2223 alvherre 4901 EUB : }
4902 : }
1494 alvherre 4903 ECB : }
1494 alvherre 4904 GIC 15 : else if (xpathobj->type == XPATH_STRING)
1494 alvherre 4905 EUB : {
4906 : /* Content should be escaped when target will be XML */
1494 alvherre 4907 GBC 9 : if (typid == XMLOID)
4908 3 : cstr = escape_xml((char *) xpathobj->stringval);
2223 alvherre 4909 EUB : else
1494 alvherre 4910 GBC 6 : cstr = (char *) xpathobj->stringval;
4911 : }
4912 6 : else if (xpathobj->type == XPATH_BOOLEAN)
4913 : {
1494 alvherre 4914 ECB : char typcategory;
4915 : bool typispreferred;
4916 : xmlChar *str;
2223 4917 :
4918 : /* Allow implicit casting from boolean to numbers */
1494 alvherre 4919 GIC 3 : get_type_category_preferred(typid, &typcategory, &typispreferred);
4920 :
1494 alvherre 4921 CBC 3 : if (typcategory != TYPCATEGORY_NUMERIC)
1494 alvherre 4922 GIC 3 : str = xmlXPathCastBooleanToString(xpathobj->boolval);
4923 : else
1494 alvherre 4924 UIC 0 : str = xmlXPathCastNumberToString(xmlXPathCastBooleanToNumber(xpathobj->boolval));
4925 :
1494 alvherre 4926 GIC 3 : cstr = xml_pstrdup_and_free(str);
4927 : }
1494 alvherre 4928 CBC 3 : else if (xpathobj->type == XPATH_NUMBER)
4929 : {
4930 : xmlChar *str;
4931 :
1494 alvherre 4932 GIC 3 : str = xmlXPathCastNumberToString(xpathobj->floatval);
1494 alvherre 4933 CBC 3 : cstr = xml_pstrdup_and_free(str);
2223 alvherre 4934 ECB : }
4935 : else
2223 alvherre 4936 UIC 0 : elog(ERROR, "unexpected XPath object type %u", xpathobj->type);
2223 alvherre 4937 ECB :
4938 : /*
4939 : * By here, either cstr contains the result value, or the isnull flag
4940 : * has been set.
4941 : */
2223 alvherre 4942 GBC 65460 : Assert(cstr || *isnull);
4943 :
2223 alvherre 4944 GIC 65460 : if (!*isnull)
4945 54338 : result = InputFunctionCall(&state->in_functions[colnum],
4946 : cstr,
2223 alvherre 4947 CBC 54338 : state->typioparams[colnum],
4948 : typmod);
4949 : }
1255 peter 4950 GIC 3 : PG_FINALLY();
4951 : {
2223 alvherre 4952 65463 : if (xpathobj != NULL)
4953 65463 : xmlXPathFreeObject(xpathobj);
2223 alvherre 4954 ECB : }
2223 alvherre 4955 GIC 65463 : PG_END_TRY();
4956 :
4957 65460 : return result;
4958 : #else
4959 : NO_XML_SUPPORT();
2223 tgl 4960 ECB : return 0;
4961 : #endif /* not USE_LIBXML */
alvherre 4962 : }
2223 alvherre 4963 EUB :
4964 : /*
4965 : * XmlTableDestroyOpaque
4966 : * Release all libxml2 resources
2223 alvherre 4967 ECB : */
4968 : static void
2223 alvherre 4969 CBC 132 : XmlTableDestroyOpaque(TableFuncScanState *state)
2223 alvherre 4970 ECB : {
2223 alvherre 4971 EUB : #ifdef USE_LIBXML
4972 : XmlTableBuilderData *xtCxt;
4973 :
2223 alvherre 4974 GIC 132 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableDestroyOpaque");
4975 :
825 michael 4976 ECB : /* Propagate our own error context to libxml2 */
2223 alvherre 4977 GIC 132 : xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
4978 :
4979 132 : if (xtCxt->xpathscomp != NULL)
4980 : {
4981 : int i;
4982 :
2223 alvherre 4983 CBC 558 : for (i = 0; i < xtCxt->natts; i++)
2223 alvherre 4984 GIC 426 : if (xtCxt->xpathscomp[i] != NULL)
4985 387 : xmlXPathFreeCompExpr(xtCxt->xpathscomp[i]);
4986 : }
4987 :
4988 132 : if (xtCxt->xpathobj != NULL)
2223 alvherre 4989 CBC 129 : xmlXPathFreeObject(xtCxt->xpathobj);
2223 alvherre 4990 GIC 132 : if (xtCxt->xpathcomp != NULL)
2223 alvherre 4991 CBC 129 : xmlXPathFreeCompExpr(xtCxt->xpathcomp);
2223 alvherre 4992 GIC 132 : if (xtCxt->xpathcxt != NULL)
2223 alvherre 4993 CBC 132 : xmlXPathFreeContext(xtCxt->xpathcxt);
2223 alvherre 4994 GBC 132 : if (xtCxt->doc != NULL)
2223 alvherre 4995 GIC 132 : xmlFreeDoc(xtCxt->doc);
4996 132 : if (xtCxt->ctxt != NULL)
4997 132 : xmlFreeParserCtxt(xtCxt->ctxt);
2223 alvherre 4998 ECB :
2223 alvherre 4999 GIC 132 : pg_xml_done(xtCxt->xmlerrcxt, true);
2223 alvherre 5000 ECB :
5001 : /* not valid anymore */
2223 alvherre 5002 GBC 132 : xtCxt->magic = 0;
2223 alvherre 5003 GIC 132 : state->opaque = NULL;
5004 :
5005 : #else
5006 : NO_XML_SUPPORT();
2118 tgl 5007 ECB : #endif /* not USE_LIBXML */
2223 alvherre 5008 GIC 132 : }
|