Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * jsonb.h
4 : : * Declarations for jsonb data type support.
5 : : *
6 : : * Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 : : *
8 : : * src/include/utils/jsonb.h
9 : : *
10 : : *-------------------------------------------------------------------------
11 : : */
12 : : #ifndef __JSONB_H__
13 : : #define __JSONB_H__
14 : :
15 : : #include "lib/stringinfo.h"
16 : : #include "utils/array.h"
17 : : #include "utils/numeric.h"
18 : :
19 : : /* Tokens used when sequentially processing a jsonb value */
20 : : typedef enum
21 : : {
22 : : WJB_DONE,
23 : : WJB_KEY,
24 : : WJB_VALUE,
25 : : WJB_ELEM,
26 : : WJB_BEGIN_ARRAY,
27 : : WJB_END_ARRAY,
28 : : WJB_BEGIN_OBJECT,
29 : : WJB_END_OBJECT,
30 : : } JsonbIteratorToken;
31 : :
32 : : /* Strategy numbers for GIN index opclasses */
33 : : #define JsonbContainsStrategyNumber 7
34 : : #define JsonbExistsStrategyNumber 9
35 : : #define JsonbExistsAnyStrategyNumber 10
36 : : #define JsonbExistsAllStrategyNumber 11
37 : : #define JsonbJsonpathExistsStrategyNumber 15
38 : : #define JsonbJsonpathPredicateStrategyNumber 16
39 : :
40 : :
41 : : /*
42 : : * In the standard jsonb_ops GIN opclass for jsonb, we choose to index both
43 : : * keys and values. The storage format is text. The first byte of the text
44 : : * string distinguishes whether this is a key (always a string), null value,
45 : : * boolean value, numeric value, or string value. However, array elements
46 : : * that are strings are marked as though they were keys; this imprecision
47 : : * supports the definition of the "exists" operator, which treats array
48 : : * elements like keys. The remainder of the text string is empty for a null
49 : : * value, "t" or "f" for a boolean value, a normalized print representation of
50 : : * a numeric value, or the text of a string value. However, if the length of
51 : : * this text representation would exceed JGIN_MAXLENGTH bytes, we instead hash
52 : : * the text representation and store an 8-hex-digit representation of the
53 : : * uint32 hash value, marking the prefix byte with an additional bit to
54 : : * distinguish that this has happened. Hashing long strings saves space and
55 : : * ensures that we won't overrun the maximum entry length for a GIN index.
56 : : * (But JGIN_MAXLENGTH is quite a bit shorter than GIN's limit. It's chosen
57 : : * to ensure that the on-disk text datum will have a short varlena header.)
58 : : * Note that when any hashed item appears in a query, we must recheck index
59 : : * matches against the heap tuple; currently, this costs nothing because we
60 : : * must always recheck for other reasons.
61 : : */
62 : : #define JGINFLAG_KEY 0x01 /* key (or string array element) */
63 : : #define JGINFLAG_NULL 0x02 /* null value */
64 : : #define JGINFLAG_BOOL 0x03 /* boolean value */
65 : : #define JGINFLAG_NUM 0x04 /* numeric value */
66 : : #define JGINFLAG_STR 0x05 /* string value (if not an array element) */
67 : : #define JGINFLAG_HASHED 0x10 /* OR'd into flag if value was hashed */
68 : : #define JGIN_MAXLENGTH 125 /* max length of text part before hashing */
69 : :
70 : : typedef struct JsonbPair JsonbPair;
71 : : typedef struct JsonbValue JsonbValue;
72 : :
73 : : /*
74 : : * Jsonbs are varlena objects, so must meet the varlena convention that the
75 : : * first int32 of the object contains the total object size in bytes. Be sure
76 : : * to use VARSIZE() and SET_VARSIZE() to access it, though!
77 : : *
78 : : * Jsonb is the on-disk representation, in contrast to the in-memory JsonbValue
79 : : * representation. Often, JsonbValues are just shims through which a Jsonb
80 : : * buffer is accessed, but they can also be deep copied and passed around.
81 : : *
82 : : * Jsonb is a tree structure. Each node in the tree consists of a JEntry
83 : : * header and a variable-length content (possibly of zero size). The JEntry
84 : : * header indicates what kind of a node it is, e.g. a string or an array,
85 : : * and provides the length of its variable-length portion.
86 : : *
87 : : * The JEntry and the content of a node are not stored physically together.
88 : : * Instead, the container array or object has an array that holds the JEntrys
89 : : * of all the child nodes, followed by their variable-length portions.
90 : : *
91 : : * The root node is an exception; it has no parent array or object that could
92 : : * hold its JEntry. Hence, no JEntry header is stored for the root node. It
93 : : * is implicitly known that the root node must be an array or an object,
94 : : * so we can get away without the type indicator as long as we can distinguish
95 : : * the two. For that purpose, both an array and an object begin with a uint32
96 : : * header field, which contains an JB_FOBJECT or JB_FARRAY flag. When a naked
97 : : * scalar value needs to be stored as a Jsonb value, what we actually store is
98 : : * an array with one element, with the flags in the array's header field set
99 : : * to JB_FSCALAR | JB_FARRAY.
100 : : *
101 : : * Overall, the Jsonb struct requires 4-bytes alignment. Within the struct,
102 : : * the variable-length portion of some node types is aligned to a 4-byte
103 : : * boundary, while others are not. When alignment is needed, the padding is
104 : : * in the beginning of the node that requires it. For example, if a numeric
105 : : * node is stored after a string node, so that the numeric node begins at
106 : : * offset 3, the variable-length portion of the numeric node will begin with
107 : : * one padding byte so that the actual numeric data is 4-byte aligned.
108 : : */
109 : :
110 : : /*
111 : : * JEntry format.
112 : : *
113 : : * The least significant 28 bits store either the data length of the entry,
114 : : * or its end+1 offset from the start of the variable-length portion of the
115 : : * containing object. The next three bits store the type of the entry, and
116 : : * the high-order bit tells whether the least significant bits store a length
117 : : * or an offset.
118 : : *
119 : : * The reason for the offset-or-length complication is to compromise between
120 : : * access speed and data compressibility. In the initial design each JEntry
121 : : * always stored an offset, but this resulted in JEntry arrays with horrible
122 : : * compressibility properties, so that TOAST compression of a JSONB did not
123 : : * work well. Storing only lengths would greatly improve compressibility,
124 : : * but it makes random access into large arrays expensive (O(N) not O(1)).
125 : : * So what we do is store an offset in every JB_OFFSET_STRIDE'th JEntry and
126 : : * a length in the rest. This results in reasonably compressible data (as
127 : : * long as the stride isn't too small). We may have to examine as many as
128 : : * JB_OFFSET_STRIDE JEntrys in order to find out the offset or length of any
129 : : * given item, but that's still O(1) no matter how large the container is.
130 : : *
131 : : * We could avoid eating a flag bit for this purpose if we were to store
132 : : * the stride in the container header, or if we were willing to treat the
133 : : * stride as an unchangeable constant. Neither of those options is very
134 : : * attractive though.
135 : : */
136 : : typedef uint32 JEntry;
137 : :
138 : : #define JENTRY_OFFLENMASK 0x0FFFFFFF
139 : : #define JENTRY_TYPEMASK 0x70000000
140 : : #define JENTRY_HAS_OFF 0x80000000
141 : :
142 : : /* values stored in the type bits */
143 : : #define JENTRY_ISSTRING 0x00000000
144 : : #define JENTRY_ISNUMERIC 0x10000000
145 : : #define JENTRY_ISBOOL_FALSE 0x20000000
146 : : #define JENTRY_ISBOOL_TRUE 0x30000000
147 : : #define JENTRY_ISNULL 0x40000000
148 : : #define JENTRY_ISCONTAINER 0x50000000 /* array or object */
149 : :
150 : : /* Access macros. Note possible multiple evaluations */
151 : : #define JBE_OFFLENFLD(je_) ((je_) & JENTRY_OFFLENMASK)
152 : : #define JBE_HAS_OFF(je_) (((je_) & JENTRY_HAS_OFF) != 0)
153 : : #define JBE_ISSTRING(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISSTRING)
154 : : #define JBE_ISNUMERIC(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISNUMERIC)
155 : : #define JBE_ISCONTAINER(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISCONTAINER)
156 : : #define JBE_ISNULL(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISNULL)
157 : : #define JBE_ISBOOL_TRUE(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISBOOL_TRUE)
158 : : #define JBE_ISBOOL_FALSE(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISBOOL_FALSE)
159 : : #define JBE_ISBOOL(je_) (JBE_ISBOOL_TRUE(je_) || JBE_ISBOOL_FALSE(je_))
160 : :
161 : : /* Macro for advancing an offset variable to the next JEntry */
162 : : #define JBE_ADVANCE_OFFSET(offset, je) \
163 : : do { \
164 : : JEntry je_ = (je); \
165 : : if (JBE_HAS_OFF(je_)) \
166 : : (offset) = JBE_OFFLENFLD(je_); \
167 : : else \
168 : : (offset) += JBE_OFFLENFLD(je_); \
169 : : } while(0)
170 : :
171 : : /*
172 : : * We store an offset, not a length, every JB_OFFSET_STRIDE children.
173 : : * Caution: this macro should only be referenced when creating a JSONB
174 : : * value. When examining an existing value, pay attention to the HAS_OFF
175 : : * bits instead. This allows changes in the offset-placement heuristic
176 : : * without breaking on-disk compatibility.
177 : : */
178 : : #define JB_OFFSET_STRIDE 32
179 : :
180 : : /*
181 : : * A jsonb array or object node, within a Jsonb Datum.
182 : : *
183 : : * An array has one child for each element, stored in array order.
184 : : *
185 : : * An object has two children for each key/value pair. The keys all appear
186 : : * first, in key sort order; then the values appear, in an order matching the
187 : : * key order. This arrangement keeps the keys compact in memory, making a
188 : : * search for a particular key more cache-friendly.
189 : : */
190 : : typedef struct JsonbContainer
191 : : {
192 : : uint32 header; /* number of elements or key/value pairs, and
193 : : * flags */
194 : : JEntry children[FLEXIBLE_ARRAY_MEMBER];
195 : :
196 : : /* the data for each child node follows. */
197 : : } JsonbContainer;
198 : :
199 : : /* flags for the header-field in JsonbContainer */
200 : : #define JB_CMASK 0x0FFFFFFF /* mask for count field */
201 : : #define JB_FSCALAR 0x10000000 /* flag bits */
202 : : #define JB_FOBJECT 0x20000000
203 : : #define JB_FARRAY 0x40000000
204 : :
205 : : /* convenience macros for accessing a JsonbContainer struct */
206 : : #define JsonContainerSize(jc) ((jc)->header & JB_CMASK)
207 : : #define JsonContainerIsScalar(jc) (((jc)->header & JB_FSCALAR) != 0)
208 : : #define JsonContainerIsObject(jc) (((jc)->header & JB_FOBJECT) != 0)
209 : : #define JsonContainerIsArray(jc) (((jc)->header & JB_FARRAY) != 0)
210 : :
211 : : /* The top-level on-disk format for a jsonb datum. */
212 : : typedef struct
213 : : {
214 : : int32 vl_len_; /* varlena header (do not touch directly!) */
215 : : JsonbContainer root;
216 : : } Jsonb;
217 : :
218 : : /* convenience macros for accessing the root container in a Jsonb datum */
219 : : #define JB_ROOT_COUNT(jbp_) (*(uint32 *) VARDATA(jbp_) & JB_CMASK)
220 : : #define JB_ROOT_IS_SCALAR(jbp_) ((*(uint32 *) VARDATA(jbp_) & JB_FSCALAR) != 0)
221 : : #define JB_ROOT_IS_OBJECT(jbp_) ((*(uint32 *) VARDATA(jbp_) & JB_FOBJECT) != 0)
222 : : #define JB_ROOT_IS_ARRAY(jbp_) ((*(uint32 *) VARDATA(jbp_) & JB_FARRAY) != 0)
223 : :
224 : :
225 : : enum jbvType
226 : : {
227 : : /* Scalar types */
228 : : jbvNull = 0x0,
229 : : jbvString,
230 : : jbvNumeric,
231 : : jbvBool,
232 : : /* Composite types */
233 : : jbvArray = 0x10,
234 : : jbvObject,
235 : : /* Binary (i.e. struct Jsonb) jbvArray/jbvObject */
236 : : jbvBinary,
237 : :
238 : : /*
239 : : * Virtual types.
240 : : *
241 : : * These types are used only for in-memory JSON processing and serialized
242 : : * into JSON strings when outputted to json/jsonb.
243 : : */
244 : : jbvDatetime = 0x20,
245 : : };
246 : :
247 : : /*
248 : : * JsonbValue: In-memory representation of Jsonb. This is a convenient
249 : : * deserialized representation, that can easily support using the "val"
250 : : * union across underlying types during manipulation. The Jsonb on-disk
251 : : * representation has various alignment considerations.
252 : : */
253 : : struct JsonbValue
254 : : {
255 : : enum jbvType type; /* Influences sort order */
256 : :
257 : : union
258 : : {
259 : : Numeric numeric;
260 : : bool boolean;
261 : : struct
262 : : {
263 : : int len;
264 : : char *val; /* Not necessarily null-terminated */
265 : : } string; /* String primitive type */
266 : :
267 : : struct
268 : : {
269 : : int nElems;
270 : : JsonbValue *elems;
271 : : bool rawScalar; /* Top-level "raw scalar" array? */
272 : : } array; /* Array container type */
273 : :
274 : : struct
275 : : {
276 : : int nPairs; /* 1 pair, 2 elements */
277 : : JsonbPair *pairs;
278 : : } object; /* Associative container type */
279 : :
280 : : struct
281 : : {
282 : : int len;
283 : : JsonbContainer *data;
284 : : } binary; /* Array or object, in on-disk format */
285 : :
286 : : struct
287 : : {
288 : : Datum value;
289 : : Oid typid;
290 : : int32 typmod;
291 : : int tz; /* Numeric time zone, in seconds, for
292 : : * TimestampTz data type */
293 : : } datetime;
294 : : } val;
295 : : };
296 : :
297 : : #define IsAJsonbScalar(jsonbval) (((jsonbval)->type >= jbvNull && \
298 : : (jsonbval)->type <= jbvBool) || \
299 : : (jsonbval)->type == jbvDatetime)
300 : :
301 : : /*
302 : : * Key/value pair within an Object.
303 : : *
304 : : * This struct type is only used briefly while constructing a Jsonb; it is
305 : : * *not* the on-disk representation.
306 : : *
307 : : * Pairs with duplicate keys are de-duplicated. We store the originally
308 : : * observed pair ordering for the purpose of removing duplicates in a
309 : : * well-defined way (which is "last observed wins").
310 : : */
311 : : struct JsonbPair
312 : : {
313 : : JsonbValue key; /* Must be a jbvString */
314 : : JsonbValue value; /* May be of any type */
315 : : uint32 order; /* Pair's index in original sequence */
316 : : };
317 : :
318 : : /* Conversion state used when parsing Jsonb from text, or for type coercion */
319 : : typedef struct JsonbParseState
320 : : {
321 : : JsonbValue contVal;
322 : : Size size;
323 : : struct JsonbParseState *next;
324 : : bool unique_keys; /* Check object key uniqueness */
325 : : bool skip_nulls; /* Skip null object fields */
326 : : } JsonbParseState;
327 : :
328 : : /*
329 : : * JsonbIterator holds details of the type for each iteration. It also stores a
330 : : * Jsonb varlena buffer, which can be directly accessed in some contexts.
331 : : */
332 : : typedef enum
333 : : {
334 : : JBI_ARRAY_START,
335 : : JBI_ARRAY_ELEM,
336 : : JBI_OBJECT_START,
337 : : JBI_OBJECT_KEY,
338 : : JBI_OBJECT_VALUE,
339 : : } JsonbIterState;
340 : :
341 : : typedef struct JsonbIterator
342 : : {
343 : : /* Container being iterated */
344 : : JsonbContainer *container;
345 : : uint32 nElems; /* Number of elements in children array (will
346 : : * be nPairs for objects) */
347 : : bool isScalar; /* Pseudo-array scalar value? */
348 : : JEntry *children; /* JEntrys for child nodes */
349 : : /* Data proper. This points to the beginning of the variable-length data */
350 : : char *dataProper;
351 : :
352 : : /* Current item in buffer (up to nElems) */
353 : : int curIndex;
354 : :
355 : : /* Data offset corresponding to current item */
356 : : uint32 curDataOffset;
357 : :
358 : : /*
359 : : * If the container is an object, we want to return keys and values
360 : : * alternately; so curDataOffset points to the current key, and
361 : : * curValueOffset points to the current value.
362 : : */
363 : : uint32 curValueOffset;
364 : :
365 : : /* Private state */
366 : : JsonbIterState state;
367 : :
368 : : struct JsonbIterator *parent;
369 : : } JsonbIterator;
370 : :
371 : :
372 : : /* Convenience macros */
373 : : static inline Jsonb *
565 peter@eisentraut.org 374 :CBC 476961 : DatumGetJsonbP(Datum d)
375 : : {
376 : 476961 : return (Jsonb *) PG_DETOAST_DATUM(d);
377 : : }
378 : :
379 : : static inline Jsonb *
380 : 3978 : DatumGetJsonbPCopy(Datum d)
381 : : {
382 : 3978 : return (Jsonb *) PG_DETOAST_DATUM_COPY(d);
383 : : }
384 : :
385 : : static inline Datum
386 : 4119 : JsonbPGetDatum(const Jsonb *p)
387 : : {
388 : 4119 : return PointerGetDatum(p);
389 : : }
390 : :
391 : : #define PG_GETARG_JSONB_P(x) DatumGetJsonbP(PG_GETARG_DATUM(x))
392 : : #define PG_GETARG_JSONB_P_COPY(x) DatumGetJsonbPCopy(PG_GETARG_DATUM(x))
393 : : #define PG_RETURN_JSONB_P(x) PG_RETURN_POINTER(x)
394 : :
395 : : /* Support functions */
396 : : extern uint32 getJsonbOffset(const JsonbContainer *jc, int index);
397 : : extern uint32 getJsonbLength(const JsonbContainer *jc, int index);
398 : : extern int compareJsonbContainers(JsonbContainer *a, JsonbContainer *b);
399 : : extern JsonbValue *findJsonbValueFromContainer(JsonbContainer *container,
400 : : uint32 flags,
401 : : JsonbValue *key);
402 : : extern JsonbValue *getKeyJsonValueFromContainer(JsonbContainer *container,
403 : : const char *keyVal, int keyLen,
404 : : JsonbValue *res);
405 : : extern JsonbValue *getIthJsonbValueFromContainer(JsonbContainer *container,
406 : : uint32 i);
407 : : extern JsonbValue *pushJsonbValue(JsonbParseState **pstate,
408 : : JsonbIteratorToken seq, JsonbValue *jbval);
409 : : extern JsonbIterator *JsonbIteratorInit(JsonbContainer *container);
410 : : extern JsonbIteratorToken JsonbIteratorNext(JsonbIterator **it, JsonbValue *val,
411 : : bool skipNested);
412 : : extern void JsonbToJsonbValue(Jsonb *jsonb, JsonbValue *val);
413 : : extern Jsonb *JsonbValueToJsonb(JsonbValue *val);
414 : : extern bool JsonbDeepContains(JsonbIterator **val,
415 : : JsonbIterator **mContained);
416 : : extern void JsonbHashScalarValue(const JsonbValue *scalarVal, uint32 *hash);
417 : : extern void JsonbHashScalarValueExtended(const JsonbValue *scalarVal,
418 : : uint64 *hash, uint64 seed);
419 : :
420 : : /* jsonb.c support functions */
421 : : extern char *JsonbToCString(StringInfo out, JsonbContainer *in,
422 : : int estimated_len);
423 : : extern char *JsonbToCStringIndent(StringInfo out, JsonbContainer *in,
424 : : int estimated_len);
425 : : extern char *JsonbUnquote(Jsonb *jb);
426 : : extern bool JsonbExtractScalar(JsonbContainer *jbc, JsonbValue *res);
427 : : extern const char *JsonbTypeName(JsonbValue *val);
428 : :
429 : : extern Datum jsonb_set_element(Jsonb *jb, Datum *path, int path_len,
430 : : JsonbValue *newval);
431 : : extern Datum jsonb_get_element(Jsonb *jb, Datum *path, int npath,
432 : : bool *isnull, bool as_text);
433 : : extern bool to_jsonb_is_immutable(Oid typoid);
434 : : extern Datum jsonb_build_object_worker(int nargs, const Datum *args, const bool *nulls,
435 : : const Oid *types, bool absent_on_null,
436 : : bool unique_keys);
437 : : extern Datum jsonb_build_array_worker(int nargs, const Datum *args, const bool *nulls,
438 : : const Oid *types, bool absent_on_null);
439 : :
440 : : #endif /* __JSONB_H__ */
|