Age Owner TLA Line data Source code
1 : %top{
2 : /*-------------------------------------------------------------------------
3 : *
4 : * jsonpath_scan.l
5 : * Lexical parser for jsonpath datatype
6 : *
7 : * Splits jsonpath string into tokens represented as JsonPathString structs.
8 : * Decodes unicode and hex escaped strings.
9 : *
10 : * Copyright (c) 2019-2023, PostgreSQL Global Development Group
11 : *
12 : * IDENTIFICATION
13 : * src/backend/utils/adt/jsonpath_scan.l
14 : *
15 : *-------------------------------------------------------------------------
16 : */
17 :
18 : #include "postgres.h"
19 :
20 : /*
21 : * NB: include jsonpath_gram.h only AFTER including jsonpath_internal.h,
22 : * because jsonpath_internal.h contains the declaration for JsonPathString.
23 : */
24 : #include "jsonpath_internal.h"
25 : #include "jsonpath_gram.h"
26 :
27 : #include "mb/pg_wchar.h"
28 : #include "nodes/miscnodes.h"
29 : #include "nodes/pg_list.h"
30 : }
31 :
32 : %{
33 : static JsonPathString scanstring;
34 :
35 : /* Handles to the buffer that the lexer uses internally */
36 : static YY_BUFFER_STATE scanbufhandle;
37 : static char *scanbuf;
38 : static int scanbuflen;
39 :
40 : static void addstring(bool init, char *s, int l);
41 : static void addchar(bool init, char c);
42 : static enum yytokentype checkKeyword(void);
43 : static bool parseUnicode(char *s, int l, struct Node *escontext);
44 : static bool parseHexChar(char *s, struct Node *escontext);
45 :
46 : /* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */
47 : #undef fprintf
48 : #define fprintf(file, fmt, msg) fprintf_to_ereport(fmt, msg)
49 :
50 : static void
1485 akorotkov 51 UIC 0 : fprintf_to_ereport(const char *fmt, const char *msg)
52 : {
53 0 : ereport(ERROR, (errmsg_internal("%s", msg)));
54 : }
55 :
56 : /* LCOV_EXCL_START */
57 :
58 : %}
59 :
60 : %option 8bit
1485 akorotkov 61 EUB : %option never-interactive
62 : %option nodefault
63 : %option noinput
64 : %option nounput
65 : %option noyywrap
66 : %option warn
67 : %option prefix="jsonpath_yy"
68 : %option bison-bridge
69 : %option noyyalloc
70 : %option noyyrealloc
71 : %option noyyfree
72 :
73 : /*
74 : * We use exclusive states for quoted and non-quoted strings,
75 : * quoted variable names and C-style comments.
76 : * Exclusive states:
77 : * <xq> - quoted strings
78 : * <xnq> - non-quoted strings
79 : * <xvq> - quoted variable names
80 : * <xc> - C-style comment
81 : */
82 :
83 : %x xq
84 : %x xnq
85 : %x xvq
86 : %x xc
87 :
88 : special [\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/]
89 : blank [ \t\n\r\f]
90 : /* "other" means anything that's not special, blank, or '\' or '"' */
91 : other [^\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/\\\" \t\n\r\f]
92 :
93 : decdigit [0-9]
94 : hexdigit [0-9A-Fa-f]
95 : octdigit [0-7]
96 : bindigit [0-1]
97 :
98 : /* DecimalInteger in ECMAScript; must not start with 0 unless it's exactly 0 */
99 : decinteger (0|[1-9](_?{decdigit})*)
100 : /* DecimalDigits in ECMAScript; only used as part of other rules */
101 : decdigits {decdigit}(_?{decdigit})*
102 : /* Non-decimal integers; in ECMAScript, these must not have underscore after prefix */
103 : hexinteger 0[xX]{hexdigit}(_?{hexdigit})*
104 : octinteger 0[oO]{octdigit}(_?{octdigit})*
105 : bininteger 0[bB]{bindigit}(_?{bindigit})*
106 :
107 : decimal ({decinteger}\.{decdigits}?|\.{decdigits})
108 : real ({decinteger}|{decimal})[Ee][-+]?{decdigits}
109 : realfail ({decinteger}|{decimal})[Ee][-+]
110 :
111 : decinteger_junk {decinteger}{other}
112 : decimal_junk {decimal}{other}
113 : real_junk {real}{other}
114 :
115 : unicode \\u({hexdigit}{4}|\{{hexdigit}{1,6}\})
116 : unicodefail \\u({hexdigit}{0,3}|\{{hexdigit}{0,6})
117 : hex_char \\x{hexdigit}{2}
118 : hex_fail \\x{hexdigit}{0,1}
119 :
120 : %%
121 :
122 : <xnq>{other}+ {
1476 akorotkov 123 GIC 3 : addstring(false, yytext, yyleng);
124 : }
1485 125 3 :
1476 126 1551 : <xnq>{blank}+ {
127 1551 : yylval->str = scanstring;
128 1551 : BEGIN INITIAL;
129 1551 : return checkKeyword();
130 : }
131 :
1476 akorotkov 132 UIC 0 : <xnq>\/\* {
133 0 : yylval->str = scanstring;
134 0 : BEGIN xc;
135 : }
1485 136 0 :
1297 tgl 137 GIC 1209 : <xnq>({special}|\") {
1485 akorotkov 138 1209 : yylval->str = scanstring;
1476 139 1209 : yyless(0);
140 1209 : BEGIN INITIAL;
141 1209 : return checkKeyword();
142 : }
143 :
1476 akorotkov 144 CBC 318 : <xnq><<EOF>> {
1476 akorotkov 145 GIC 318 : yylval->str = scanstring;
1476 akorotkov 146 CBC 318 : BEGIN INITIAL;
147 318 : return checkKeyword();
1485 akorotkov 148 ECB : }
149 :
1297 tgl 150 CBC 3 : <xnq,xq,xvq>\\b { addchar(false, '\b'); }
1485 akorotkov 151 GIC 3 :
1297 tgl 152 3 : <xnq,xq,xvq>\\f { addchar(false, '\f'); }
1485 akorotkov 153 GBC 3 :
1297 tgl 154 3 : <xnq,xq,xvq>\\n { addchar(false, '\n'); }
1485 akorotkov 155 3 :
1297 tgl 156 GIC 3 : <xnq,xq,xvq>\\r { addchar(false, '\r'); }
1485 akorotkov 157 GBC 3 :
1297 tgl 158 CBC 6 : <xnq,xq,xvq>\\t { addchar(false, '\t'); }
1485 akorotkov 159 6 :
1297 tgl 160 3 : <xnq,xq,xvq>\\v { addchar(false, '\v'); }
1485 akorotkov 161 3 :
106 andrew 162 GNC 63 : <xnq,xq,xvq>{unicode}+ {
163 63 : if (!parseUnicode(yytext, yyleng, escontext))
106 andrew 164 UNC 0 : yyterminate();
165 : }
1485 akorotkov 166 GIC 27 :
106 andrew 167 GNC 6 : <xnq,xq,xvq>{hex_char} {
168 6 : if (!parseHexChar(yytext, escontext))
106 andrew 169 UNC 0 : yyterminate();
170 : }
1476 akorotkov 171 CBC 6 :
106 andrew 172 GNC 18 : <xnq,xq,xvq>{unicode}*{unicodefail} {
173 18 : jsonpath_yyerror(NULL, escontext,
174 : "invalid unicode sequence");
106 andrew 175 UNC 0 : yyterminate();
176 : }
1485 akorotkov 177 ECB :
106 andrew 178 UNC 0 : <xnq,xq,xvq>{hex_fail} {
179 0 : jsonpath_yyerror(NULL, escontext,
180 : "invalid hex character sequence");
181 0 : yyterminate();
182 : }
183 :
1297 tgl 184 GIC 3 : <xnq,xq,xvq>{unicode}+\\ {
1297 tgl 185 ECB : /* throw back the \\, and treat as unicode */
1297 tgl 186 CBC 3 : yyless(yyleng - 1);
106 andrew 187 GNC 3 : if (!parseUnicode(yytext, yyleng, escontext))
106 andrew 188 UNC 0 : yyterminate();
1297 tgl 189 ECB : }
1485 akorotkov 190 CBC 3 :
1297 tgl 191 63 : <xnq,xq,xvq>\\. { addchar(false, yytext[1]); }
1485 akorotkov 192 63 :
106 andrew 193 UNC 0 : <xnq,xq,xvq>\\ {
194 0 : jsonpath_yyerror(NULL, escontext,
195 : "unexpected end after backslash");
196 0 : yyterminate();
197 : }
1485 akorotkov 198 ECB :
106 andrew 199 UNC 0 : <xq,xvq><<EOF>> {
200 0 : jsonpath_yyerror(NULL, escontext,
201 : "unexpected end of quoted string");
202 0 : yyterminate();
203 : }
1485 akorotkov 204 ECB :
1476 akorotkov 205 CBC 651 : <xq>\" {
1485 206 651 : yylval->str = scanstring;
1476 207 651 : BEGIN INITIAL;
1476 akorotkov 208 GBC 651 : return STRING_P;
209 : }
1485 akorotkov 210 ECB :
1476 akorotkov 211 LBC 0 : <xvq>\" {
1485 212 0 : yylval->str = scanstring;
1485 akorotkov 213 UBC 0 : BEGIN INITIAL;
1476 akorotkov 214 UIC 0 : return VARIABLE_P;
1485 akorotkov 215 ECB : }
216 :
1476 akorotkov 217 CBC 687 : <xq,xvq>[^\\\"]+ { addstring(false, yytext, yyleng); }
1476 akorotkov 218 GIC 687 :
1476 akorotkov 219 UBC 0 : <xc>\*\/ { BEGIN INITIAL; }
1485 akorotkov 220 UIC 0 :
1476 221 0 : <xc>[^\*]+ { }
1485 akorotkov 222 UBC 0 :
1476 223 0 : <xc>\* { }
1485 akorotkov 224 UIC 0 :
106 andrew 225 UNC 0 : <xc><<EOF>> {
226 0 : jsonpath_yyerror(
227 : NULL, escontext,
228 : "unexpected end of comment");
229 0 : yyterminate();
230 : }
1476 akorotkov 231 GIC 84 : \&\& { return AND_P; }
1485 akorotkov 232 CBC 84 :
1476 akorotkov 233 GIC 54 : \|\| { return OR_P; }
1485 akorotkov 234 ECB :
1476 akorotkov 235 CBC 12 : \! { return NOT_P; }
1485 akorotkov 236 EUB :
1476 akorotkov 237 GIC 177 : \*\* { return ANY_P; }
1485 akorotkov 238 ECB :
1476 akorotkov 239 CBC 237 : \< { return LESS_P; }
1485 akorotkov 240 ECB :
1476 akorotkov 241 GBC 12 : \<\= { return LESSEQUAL_P; }
1485 akorotkov 242 EUB :
1476 akorotkov 243 GIC 432 : \=\= { return EQUAL_P; }
1485 akorotkov 244 EUB :
1476 akorotkov 245 UIC 0 : \<\> { return NOTEQUAL_P; }
246 :
1476 akorotkov 247 GBC 6 : \!\= { return NOTEQUAL_P; }
1485 akorotkov 248 EUB :
1476 akorotkov 249 GIC 63 : \>\= { return GREATEREQUAL_P; }
1485 akorotkov 250 EUB :
1476 akorotkov 251 GIC 192 : \> { return GREATER_P; }
252 :
1297 tgl 253 CBC 162 : \${other}+ {
1476 akorotkov 254 162 : addstring(true, yytext + 1, yyleng - 1);
255 162 : addchar(false, '\0');
1485 256 162 : yylval->str = scanstring;
1476 akorotkov 257 GIC 162 : return VARIABLE_P;
258 : }
1476 akorotkov 259 EUB :
1476 akorotkov 260 UBC 0 : \$\" {
261 0 : addchar(true, '\0');
262 0 : BEGIN xvq;
263 : }
1485 akorotkov 264 UIC 0 :
1476 akorotkov 265 CBC 12030 : {special} { return *yytext; }
1476 akorotkov 266 ECB :
1476 akorotkov 267 GBC 3693 : {blank}+ { /* ignore */ }
268 3693 :
1476 akorotkov 269 UBC 0 : \/\* {
270 0 : addchar(true, '\0');
271 0 : BEGIN xc;
1476 akorotkov 272 EUB : }
1476 akorotkov 273 UBC 0 :
1476 akorotkov 274 GBC 150 : {real} {
1476 akorotkov 275 GIC 150 : addstring(true, yytext, yyleng);
276 150 : addchar(false, '\0');
1485 akorotkov 277 GBC 150 : yylval->str = scanstring;
1476 akorotkov 278 GIC 150 : return NUMERIC_P;
1485 akorotkov 279 ECB : }
280 :
1476 akorotkov 281 CBC 117 : {decimal} {
1476 akorotkov 282 GIC 117 : addstring(true, yytext, yyleng);
1476 akorotkov 283 CBC 117 : addchar(false, '\0');
1485 akorotkov 284 GIC 117 : yylval->str = scanstring;
1476 akorotkov 285 CBC 117 : return NUMERIC_P;
286 : }
1485 akorotkov 287 ECB :
35 peter 288 GNC 738 : {decinteger} {
35 peter 289 CBC 738 : addstring(true, yytext, yyleng);
35 peter 290 GIC 738 : addchar(false, '\0');
35 peter 291 CBC 738 : yylval->str = scanstring;
35 peter 292 GIC 738 : return INT_P;
35 peter 293 EUB : }
294 :
35 peter 295 GNC 6 : {hexinteger} {
296 6 : addstring(true, yytext, yyleng);
297 6 : addchar(false, '\0');
298 6 : yylval->str = scanstring;
299 6 : return INT_P;
300 : }
35 peter 301 ECB :
35 peter 302 GNC 6 : {octinteger} {
303 6 : addstring(true, yytext, yyleng);
304 6 : addchar(false, '\0');
305 6 : yylval->str = scanstring;
306 6 : return INT_P;
307 : }
308 :
309 6 : {bininteger} {
1476 akorotkov 310 6 : addstring(true, yytext, yyleng);
311 6 : addchar(false, '\0');
312 6 : yylval->str = scanstring;
313 6 : return INT_P;
314 : }
315 :
106 andrew 316 UNC 0 : {realfail} {
317 0 : jsonpath_yyerror(
318 : NULL, escontext,
319 : "invalid numeric literal");
320 0 : yyterminate();
321 : }
322 : {decinteger_junk} {
106 andrew 323 GNC 45 : jsonpath_yyerror(
324 : NULL, escontext,
325 : "trailing junk after numeric literal");
326 12 : yyterminate();
327 : }
328 : {decimal_junk} {
329 21 : jsonpath_yyerror(
330 : NULL, escontext,
331 : "trailing junk after numeric literal");
106 andrew 332 UNC 0 : yyterminate();
333 : }
334 : {real_junk} {
106 andrew 335 GNC 3 : jsonpath_yyerror(
336 : NULL, escontext,
337 : "trailing junk after numeric literal");
106 andrew 338 UNC 0 : yyterminate();
339 : }
340 : \" {
1476 akorotkov 341 CBC 705 : addchar(true, '\0');
342 705 : BEGIN xq;
1476 akorotkov 343 ECB : }
1485 akorotkov 344 CBC 705 :
1476 akorotkov 345 LBC 0 : \\ {
1476 akorotkov 346 UIC 0 : yyless(0);
347 0 : addchar(true, '\0');
1476 akorotkov 348 UBC 0 : BEGIN xnq;
1476 akorotkov 349 EUB : }
1485 akorotkov 350 UBC 0 :
1297 tgl 351 GIC 3078 : {other}+ {
1297 tgl 352 GBC 3078 : addstring(true, yytext, yyleng);
1297 tgl 353 CBC 3078 : BEGIN xnq;
354 : }
355 3078 :
1476 akorotkov 356 2112 : <<EOF>> { yyterminate(); }
1485 akorotkov 357 EUB :
1485 akorotkov 358 UBC 0 : %%
1485 akorotkov 359 EUB :
360 : /* LCOV_EXCL_STOP */
1048 peter 361 :
1485 akorotkov 362 ECB : void
106 andrew 363 GNC 123 : jsonpath_yyerror(JsonPathParseResult **result, struct Node *escontext,
364 : const char *message)
1485 akorotkov 365 ECB : {
366 : /* don't overwrite escontext if it's already been set */
106 andrew 367 GNC 123 : if (SOFT_ERROR_OCCURRED(escontext))
368 6 : return;
369 :
1485 akorotkov 370 CBC 117 : if (*yytext == YY_END_OF_BUFFER_CHAR)
1485 akorotkov 371 ECB : {
106 andrew 372 GNC 30 : errsave(escontext,
373 : (errcode(ERRCODE_SYNTAX_ERROR),
1485 akorotkov 374 ECB : /* translator: %s is typically "syntax error" */
1447 375 : errmsg("%s at end of jsonpath input", _(message))));
1485 376 : }
377 : else
378 : {
106 andrew 379 GNC 87 : errsave(escontext,
380 : (errcode(ERRCODE_SYNTAX_ERROR),
1485 akorotkov 381 ECB : /* translator: first %s is typically "syntax error" */
1447 382 : errmsg("%s at or near \"%s\" of jsonpath input",
383 : _(message), yytext)));
1485 384 : }
385 : }
386 :
387 : typedef struct JsonPathKeyword
388 : {
1484 389 : int16 len;
390 : bool lowercase;
391 : int val;
1482 392 : const char *keyword;
393 : } JsonPathKeyword;
394 :
1485 395 : /*
396 : * Array of key words should be sorted by length and then
397 : * alphabetical order
398 : */
1482 399 : static const JsonPathKeyword keywords[] = {
400 : { 2, false, IS_P, "is"},
401 : { 2, false, TO_P, "to"},
1485 402 : { 3, false, ABS_P, "abs"},
403 : { 3, false, LAX_P, "lax"},
404 : { 4, false, FLAG_P, "flag"},
405 : { 4, false, LAST_P, "last"},
406 : { 4, true, NULL_P, "null"},
407 : { 4, false, SIZE_P, "size"},
408 : { 4, true, TRUE_P, "true"},
1485 akorotkov 409 EUB : { 4, false, TYPE_P, "type"},
410 : { 4, false, WITH_P, "with"},
411 : { 5, true, FALSE_P, "false"},
412 : { 5, false, FLOOR_P, "floor"},
413 : { 6, false, DOUBLE_P, "double"},
414 : { 6, false, EXISTS_P, "exists"},
415 : { 6, false, STARTS_P, "starts"},
1485 akorotkov 416 ECB : { 6, false, STRICT_P, "strict"},
417 : { 7, false, CEILING_P, "ceiling"},
418 : { 7, false, UNKNOWN_P, "unknown"},
1292 419 : { 8, false, DATETIME_P, "datetime"},
420 : { 8, false, KEYVALUE_P, "keyvalue"},
421 : { 10,false, LIKE_REGEX_P, "like_regex"},
1485 422 : };
423 :
424 : /* Check if current scanstring value is a keyword */
1476 akorotkov 425 EUB : static enum yytokentype
1476 akorotkov 426 GIC 3078 : checkKeyword()
427 : {
331 peter 428 CBC 3078 : int res = IDENT_P;
429 : int diff;
1482 akorotkov 430 GIC 3078 : const JsonPathKeyword *StopLow = keywords,
1482 akorotkov 431 GBC 3078 : *StopHigh = keywords + lengthof(keywords),
432 : *StopMiddle;
433 :
1485 akorotkov 434 CBC 3078 : if (scanstring.len > keywords[lengthof(keywords) - 1].len)
435 3 : return res;
436 :
1476 437 14787 : while (StopLow < StopHigh)
1485 akorotkov 438 EUB : {
1485 akorotkov 439 GBC 13488 : StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
1485 akorotkov 440 EUB :
1485 akorotkov 441 GBC 13488 : if (StopMiddle->len == scanstring.len)
1485 akorotkov 442 GIC 4167 : diff = pg_strncasecmp(StopMiddle->keyword, scanstring.val,
1485 akorotkov 443 GBC 4167 : scanstring.len);
1485 akorotkov 444 ECB : else
1485 akorotkov 445 CBC 9321 : diff = StopMiddle->len - scanstring.len;
1485 akorotkov 446 ECB :
1485 akorotkov 447 GIC 13488 : if (diff < 0)
1485 akorotkov 448 CBC 3069 : StopLow = StopMiddle + 1;
449 10419 : else if (diff > 0)
1485 akorotkov 450 GIC 8643 : StopHigh = StopMiddle;
1485 akorotkov 451 EUB : else
452 : {
1485 akorotkov 453 GIC 1776 : if (StopMiddle->lowercase)
454 147 : diff = strncmp(StopMiddle->keyword, scanstring.val,
455 147 : scanstring.len);
1485 akorotkov 456 ECB :
1485 akorotkov 457 GIC 1776 : if (diff == 0)
458 1776 : res = StopMiddle->val;
459 :
1485 akorotkov 460 CBC 1776 : break;
1485 akorotkov 461 ECB : }
462 : }
463 :
1485 akorotkov 464 GIC 3075 : return res;
1485 akorotkov 465 ECB : }
466 :
467 : /*
468 : * Called before any actual parsing is done
469 : */
470 : static void
1485 akorotkov 471 GIC 2280 : jsonpath_scanner_init(const char *str, int slen)
1485 akorotkov 472 ECB : {
1485 akorotkov 473 GIC 2280 : if (slen <= 0)
474 3 : slen = strlen(str);
475 :
476 : /*
477 : * Might be left over after ereport()
478 : */
479 2280 : yy_init_globals();
480 :
481 : /*
482 : * Make a scan buffer with special termination needed by flex.
483 : */
484 :
485 2280 : scanbuflen = slen;
486 2280 : scanbuf = palloc(slen + 2);
487 2280 : memcpy(scanbuf, str, slen);
488 2280 : scanbuf[slen] = scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR;
489 2280 : scanbufhandle = yy_scan_buffer(scanbuf, slen + 2);
490 :
491 2280 : BEGIN(INITIAL);
492 2280 : }
493 :
494 :
495 : /*
496 : * Called after parsing is done to clean up after jsonpath_scanner_init()
497 : */
498 : static void
499 2130 : jsonpath_scanner_finish(void)
500 : {
501 2130 : yy_delete_buffer(scanbufhandle);
502 2130 : pfree(scanbuf);
503 2130 : }
504 :
505 : /*
506 : * Resize scanstring so that it can append string of given length.
507 : * Reinitialize if required.
508 : */
509 : static void
1476 510 6987 : resizeString(bool init, int appendLen)
511 : {
1485 512 6987 : if (init)
513 : {
1476 514 4968 : scanstring.total = Max(32, appendLen);
515 4968 : scanstring.val = (char *) palloc(scanstring.total);
1485 516 4968 : scanstring.len = 0;
517 : }
518 : else
1485 akorotkov 519 ECB : {
1476 akorotkov 520 GIC 2019 : if (scanstring.len + appendLen >= scanstring.total)
1485 akorotkov 521 ECB : {
1476 akorotkov 522 UIC 0 : while (scanstring.len + appendLen >= scanstring.total)
1476 akorotkov 523 LBC 0 : scanstring.total *= 2;
1485 524 0 : scanstring.val = repalloc(scanstring.val, scanstring.total);
525 : }
526 : }
1485 akorotkov 527 CBC 6987 : }
1485 akorotkov 528 ECB :
529 : /* Add set of bytes at "s" of length "l" to scanstring */
530 : static void
1476 akorotkov 531 GIC 5013 : addstring(bool init, char *s, int l)
1485 akorotkov 532 ECB : {
1476 akorotkov 533 GIC 5013 : resizeString(init, l + 1);
1476 akorotkov 534 CBC 5013 : memcpy(scanstring.val + scanstring.len, s, l);
535 5013 : scanstring.len += l;
536 5013 : }
537 :
1476 akorotkov 538 ECB : /* Add single byte "c" to scanstring */
539 : static void
1476 akorotkov 540 CBC 1974 : addchar(bool init, char c)
1476 akorotkov 541 ECB : {
1476 akorotkov 542 CBC 1974 : resizeString(init, 1);
543 1974 : scanstring.val[scanstring.len] = c;
1476 akorotkov 544 GIC 1974 : if (c != '\0')
1485 545 84 : scanstring.len++;
1485 akorotkov 546 CBC 1974 : }
1485 akorotkov 547 ECB :
1476 548 : /* Interface to jsonpath parser */
549 : JsonPathParseResult *
106 andrew 550 GNC 2280 : parsejsonpath(const char *str, int len, struct Node *escontext)
1485 akorotkov 551 ECB : {
552 : JsonPathParseResult *parseresult;
553 :
1485 akorotkov 554 GIC 2280 : jsonpath_scanner_init(str, len);
555 :
106 andrew 556 GNC 2280 : if (jsonpath_yyparse((void *) &parseresult, escontext) != 0)
557 6 : jsonpath_yyerror(NULL, escontext, "bogus input"); /* shouldn't happen */
558 :
1485 akorotkov 559 GIC 2130 : jsonpath_scanner_finish();
560 :
561 2130 : return parseresult;
562 : }
563 :
1476 akorotkov 564 ECB : /* Turn hex character into integer */
565 : static bool
106 andrew 566 GNC 438 : hexval(char c, int *result, struct Node *escontext)
1485 akorotkov 567 ECB : {
1485 akorotkov 568 GIC 438 : if (c >= '0' && c <= '9')
569 : {
106 andrew 570 GNC 294 : *result = c - '0';
571 294 : return true;
572 : }
1485 akorotkov 573 GIC 144 : if (c >= 'a' && c <= 'f')
574 : {
106 andrew 575 GNC 126 : *result = c - 'a' + 0xA;
576 126 : return true;
577 : }
1485 akorotkov 578 CBC 18 : if (c >= 'A' && c <= 'F')
579 : {
106 andrew 580 GNC 18 : *result = c - 'A' + 0xA;
581 18 : return true;
582 : }
106 andrew 583 UNC 0 : jsonpath_yyerror(NULL, escontext, "invalid hexadecimal digit");
584 0 : return false;
585 : }
586 :
1476 akorotkov 587 ECB : /* Add given unicode character to scanstring */
588 : static bool
106 andrew 589 GNC 72 : addUnicodeChar(int ch, struct Node *escontext)
1485 akorotkov 590 ECB : {
1485 akorotkov 591 CBC 72 : if (ch == 0)
592 : {
1485 akorotkov 593 ECB : /* We can't allow this, since our TEXT type doesn't */
106 andrew 594 GNC 12 : ereturn(escontext, false,
595 : (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
596 : errmsg("unsupported Unicode escape sequence"),
597 : errdetail("\\u0000 cannot be converted to text.")));
598 : }
599 : else
600 : {
1129 tgl 601 ECB : char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
602 :
603 : /*
604 : * If we're trapping the error status, call the noerror form of the
605 : * conversion function. Otherwise call the normal form which provides
606 : * more detailed errors.
607 : */
608 :
106 andrew 609 GNC 60 : if (! escontext || ! IsA(escontext, ErrorSaveContext))
610 60 : pg_unicode_to_server(ch, (unsigned char *) cbuf);
106 andrew 611 UNC 0 : else if (!pg_unicode_to_server_noerror(ch, (unsigned char *) cbuf))
612 0 : ereturn(escontext, false,
613 : (errcode(ERRCODE_SYNTAX_ERROR),
614 : errmsg("could not convert unicode to server encoding")));
1129 tgl 615 CBC 60 : addstring(false, cbuf, strlen(cbuf));
1485 akorotkov 616 ECB : }
106 andrew 617 GNC 60 : return true;
618 : }
619 :
620 : /* Add unicode character, processing any surrogate pairs */
621 : static bool
622 108 : addUnicode(int ch, int *hi_surrogate, struct Node *escontext)
623 : {
1129 tgl 624 CBC 108 : if (is_utf16_surrogate_first(ch))
625 : {
1485 akorotkov 626 30 : if (*hi_surrogate != -1)
106 andrew 627 GNC 6 : ereturn(escontext, false,
1485 akorotkov 628 ECB : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1424 alvherre 629 : errmsg("invalid input syntax for type %s", "jsonpath"),
1485 akorotkov 630 : errdetail("Unicode high surrogate must not follow "
631 : "a high surrogate.")));
1129 tgl 632 GIC 24 : *hi_surrogate = ch;
106 andrew 633 GNC 24 : return true;
1485 akorotkov 634 ECB : }
1129 tgl 635 GIC 78 : else if (is_utf16_surrogate_second(ch))
1485 akorotkov 636 EUB : {
1485 akorotkov 637 GBC 24 : if (*hi_surrogate == -1)
106 andrew 638 GNC 12 : ereturn(escontext, false,
639 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
640 : errmsg("invalid input syntax for type %s", "jsonpath"),
1485 akorotkov 641 ECB : errdetail("Unicode low surrogate must follow a high "
642 : "surrogate.")));
1129 tgl 643 GIC 12 : ch = surrogate_pair_to_codepoint(*hi_surrogate, ch);
1485 akorotkov 644 12 : *hi_surrogate = -1;
1485 akorotkov 645 ECB : }
1485 akorotkov 646 GIC 54 : else if (*hi_surrogate != -1)
1485 akorotkov 647 ECB : {
106 andrew 648 UNC 0 : ereturn(escontext, false,
1485 akorotkov 649 ECB : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1424 alvherre 650 : errmsg("invalid input syntax for type %s", "jsonpath"),
651 : errdetail("Unicode low surrogate must follow a high "
652 : "surrogate.")));
653 : }
1485 akorotkov 654 :
106 andrew 655 GNC 66 : return addUnicodeChar(ch, escontext);
1485 akorotkov 656 ECB : }
657 :
658 : /*
659 : * parseUnicode was adopted from json_lex_string() in
660 : * src/backend/utils/adt/json.c
661 : */
662 : static bool
106 andrew 663 GNC 66 : parseUnicode(char *s, int l, struct Node *escontext)
1485 akorotkov 664 ECB : {
1476 akorotkov 665 GIC 66 : int i = 2;
1485 666 66 : int hi_surrogate = -1;
667 :
1485 akorotkov 668 CBC 144 : for (i = 2; i < l; i += 2) /* skip '\u' */
669 : {
670 108 : int ch = 0;
671 : int j, si;
672 :
673 108 : if (s[i] == '{') /* parse '\u{XX...}' */
674 : {
675 84 : while (s[++i] != '}' && i < l)
676 : {
106 andrew 677 GNC 66 : if (!hexval(s[i], &si, escontext))
106 andrew 678 UNC 0 : return false;
106 andrew 679 GNC 66 : ch = (ch << 4) | si;
680 : }
1414 akapila 681 GIC 18 : i++; /* skip '}' */
682 : }
683 : else /* parse '\uXXXX' */
1485 akorotkov 684 ECB : {
1485 akorotkov 685 GIC 450 : for (j = 0; j < 4 && i < l; j++)
686 : {
106 andrew 687 GNC 360 : if (!hexval(s[i++], &si, escontext))
106 andrew 688 UNC 0 : return false;
106 andrew 689 GNC 360 : ch = (ch << 4) | si;
690 : }
691 : }
1485 akorotkov 692 ECB :
106 andrew 693 GNC 108 : if (! addUnicode(ch, &hi_surrogate, escontext))
106 andrew 694 UNC 0 : return false;
695 : }
1485 akorotkov 696 ECB :
1485 akorotkov 697 GIC 36 : if (hi_surrogate != -1)
1485 akorotkov 698 ECB : {
106 andrew 699 GNC 6 : ereturn(escontext, false,
700 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1424 alvherre 701 ECB : errmsg("invalid input syntax for type %s", "jsonpath"),
702 : errdetail("Unicode low surrogate must follow a high "
1485 akorotkov 703 : "surrogate.")));
704 : }
705 :
106 andrew 706 GNC 30 : return true;
707 : }
1485 akorotkov 708 EUB :
1476 709 : /* Parse sequence of hex-encoded characters */
710 : static bool
106 andrew 711 GNC 6 : parseHexChar(char *s, struct Node *escontext)
712 : {
713 : int s2, s3, ch;
714 6 : if (!hexval(s[2], &s2, escontext))
106 andrew 715 UNC 0 : return false;
106 andrew 716 GNC 6 : if (!hexval(s[3], &s3, escontext))
106 andrew 717 UNC 0 : return false;
718 :
106 andrew 719 GNC 6 : ch = (s2 << 4) | s3;
720 :
721 6 : return addUnicodeChar(ch, escontext);
722 : }
723 :
1485 akorotkov 724 ECB : /*
725 : * Interface functions to make flex use palloc() instead of malloc().
726 : * It'd be better to make these static, but flex insists otherwise.
727 : */
728 :
729 : void *
1485 akorotkov 730 GIC 4560 : jsonpath_yyalloc(yy_size_t bytes)
731 : {
732 4560 : return palloc(bytes);
733 : }
734 :
735 : void *
1485 akorotkov 736 UIC 0 : jsonpath_yyrealloc(void *ptr, yy_size_t bytes)
737 : {
738 0 : if (ptr)
1485 akorotkov 739 LBC 0 : return repalloc(ptr, bytes);
1485 akorotkov 740 ECB : else
1485 akorotkov 741 UBC 0 : return palloc(bytes);
1485 akorotkov 742 EUB : }
743 :
744 : void
1485 akorotkov 745 CBC 2130 : jsonpath_yyfree(void *ptr)
746 : {
747 2130 : if (ptr)
1485 akorotkov 748 GIC 2130 : pfree(ptr);
749 2130 : }
|