Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * queryjumblefuncs.c
4 : * Query normalization and fingerprinting.
5 : *
6 : * Normalization is a process whereby similar queries, typically differing only
7 : * in their constants (though the exact rules are somewhat more subtle than
8 : * that) are recognized as equivalent, and are tracked as a single entry. This
9 : * is particularly useful for non-prepared queries.
10 : *
11 : * Normalization is implemented by fingerprinting queries, selectively
12 : * serializing those fields of each query tree's nodes that are judged to be
13 : * essential to the query. This is referred to as a query jumble. This is
14 : * distinct from a regular serialization in that various extraneous
15 : * information is ignored as irrelevant or not essential to the query, such
16 : * as the collations of Vars and, most notably, the values of constants.
17 : *
18 : * This jumble is acquired at the end of parse analysis of each query, and
19 : * a 64-bit hash of it is stored into the query's Query.queryId field.
20 : * The server then copies this value around, making it available in plan
21 : * tree(s) generated from the query. The executor can then use this value
22 : * to blame query costs on the proper queryId.
23 : *
24 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
25 : * Portions Copyright (c) 1994, Regents of the University of California
26 : *
27 : *
28 : * IDENTIFICATION
29 : * src/backend/nodes/queryjumblefuncs.c
30 : *
31 : *-------------------------------------------------------------------------
32 : */
33 : #include "postgres.h"
34 :
35 : #include "common/hashfn.h"
36 : #include "miscadmin.h"
37 : #include "nodes/queryjumble.h"
38 : #include "parser/scansup.h"
39 :
40 : #define JUMBLE_SIZE 1024 /* query serialization buffer size */
41 :
42 : /* GUC parameters */
43 : int compute_query_id = COMPUTE_QUERY_ID_AUTO;
44 :
45 : /* True when compute_query_id is ON, or AUTO and a module requests them */
46 : bool query_id_enabled = false;
47 :
48 : static void AppendJumble(JumbleState *jstate,
49 : const unsigned char *item, Size size);
50 : static void RecordConstLocation(JumbleState *jstate, int location);
51 : static void _jumbleNode(JumbleState *jstate, Node *node);
52 : static void _jumbleA_Const(JumbleState *jstate, Node *node);
53 : static void _jumbleList(JumbleState *jstate, Node *node);
54 : static void _jumbleRangeTblEntry(JumbleState *jstate, Node *node);
55 :
56 : /*
57 : * Given a possibly multi-statement source string, confine our attention to the
58 : * relevant part of the string.
59 : */
60 : const char *
732 bruce 61 GNC 68943 : CleanQuerytext(const char *query, int *location, int *len)
62 : {
697 tgl 63 68943 : int query_location = *location;
64 68943 : int query_len = *len;
65 :
66 : /* First apply starting offset, unless it's -1 (unknown). */
732 bruce 67 68943 : if (query_location >= 0)
68 : {
69 68759 : Assert(query_location <= strlen(query));
70 68759 : query += query_location;
71 : /* Length of 0 (or -1) means "rest of string" */
72 68759 : if (query_len <= 0)
73 3926 : query_len = strlen(query);
74 : else
75 64833 : Assert(query_len <= strlen(query));
76 : }
77 : else
78 : {
79 : /* If query location is unknown, distrust query_len as well */
80 184 : query_location = 0;
81 184 : query_len = strlen(query);
82 : }
83 :
84 : /*
85 : * Discard leading and trailing whitespace, too. Use scanner_isspace()
86 : * not libc's isspace(), because we want to match the lexer's behavior.
87 : */
88 69341 : while (query_len > 0 && scanner_isspace(query[0]))
89 398 : query++, query_location++, query_len--;
90 69415 : while (query_len > 0 && scanner_isspace(query[query_len - 1]))
91 472 : query_len--;
92 :
93 68943 : *location = query_location;
94 68943 : *len = query_len;
95 :
96 68943 : return query;
97 : }
98 :
99 : JumbleState *
100 60772 : JumbleQuery(Query *query, const char *querytext)
101 : {
102 60772 : JumbleState *jstate = NULL;
103 :
694 alvherre 104 60772 : Assert(IsQueryIdEnabled());
105 :
68 michael 106 60772 : jstate = (JumbleState *) palloc(sizeof(JumbleState));
107 :
108 : /* Set up workspace for query jumbling */
109 60772 : jstate->jumble = (unsigned char *) palloc(JUMBLE_SIZE);
110 60772 : jstate->jumble_len = 0;
111 60772 : jstate->clocations_buf_size = 32;
112 60772 : jstate->clocations = (LocationLen *)
113 60772 : palloc(jstate->clocations_buf_size * sizeof(LocationLen));
114 60772 : jstate->clocations_count = 0;
115 60772 : jstate->highest_extern_param_id = 0;
116 :
117 : /* Compute query ID and mark the Query node with it */
118 60772 : _jumbleNode(jstate, (Node *) query);
119 60772 : query->queryId = DatumGetUInt64(hash_any_extended(jstate->jumble,
120 60772 : jstate->jumble_len,
121 : 0));
122 :
123 : /*
124 : * If we are unlucky enough to get a hash of zero, use 1 instead for
125 : * normal statements and 2 for utility queries.
126 : */
127 60772 : if (query->queryId == UINT64CONST(0))
128 : {
68 michael 129 UNC 0 : if (query->utilityStmt)
130 0 : query->queryId = UINT64CONST(2);
131 : else
732 bruce 132 0 : query->queryId = UINT64CONST(1);
133 : }
134 :
732 bruce 135 GNC 60772 : return jstate;
136 : }
137 :
138 : /*
139 : * Enables query identifier computation.
140 : *
141 : * Third-party plugins can use this function to inform core that they require
142 : * a query identifier to be computed.
143 : */
144 : void
694 alvherre 145 3 : EnableQueryId(void)
146 : {
147 3 : if (compute_query_id != COMPUTE_QUERY_ID_OFF)
148 3 : query_id_enabled = true;
149 3 : }
150 :
151 : /*
152 : * AppendJumble: Append a value that is substantive in a given query to
153 : * the current jumble.
154 : */
155 : static void
732 bruce 156 2819372 : AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
157 : {
158 2819372 : unsigned char *jumble = jstate->jumble;
159 2819372 : Size jumble_len = jstate->jumble_len;
160 :
161 : /*
162 : * Whenever the jumble buffer is full, we hash the current contents and
163 : * reset the buffer to contain just that hash value, thus relying on the
164 : * hash to summarize everything so far.
165 : */
166 5639012 : while (size > 0)
167 : {
168 : Size part_size;
169 :
170 2819640 : if (jumble_len >= JUMBLE_SIZE)
171 : {
172 : uint64 start_hash;
173 :
174 801 : start_hash = DatumGetUInt64(hash_any_extended(jumble,
175 : JUMBLE_SIZE, 0));
176 801 : memcpy(jumble, &start_hash, sizeof(start_hash));
177 801 : jumble_len = sizeof(start_hash);
178 : }
179 2819640 : part_size = Min(size, JUMBLE_SIZE - jumble_len);
180 2819640 : memcpy(jumble + jumble_len, item, part_size);
181 2819640 : jumble_len += part_size;
182 2819640 : item += part_size;
183 2819640 : size -= part_size;
184 : }
185 2819372 : jstate->jumble_len = jumble_len;
186 2819372 : }
187 :
188 : /*
189 : * Record location of constant within query string of query tree
190 : * that is currently being walked.
191 : */
192 : static void
68 michael 193 86704 : RecordConstLocation(JumbleState *jstate, int location)
194 : {
195 : /* -1 indicates unknown or undefined location */
196 86704 : if (location >= 0)
197 : {
198 : /* enlarge array if needed */
199 82985 : if (jstate->clocations_count >= jstate->clocations_buf_size)
200 : {
201 54 : jstate->clocations_buf_size *= 2;
202 54 : jstate->clocations = (LocationLen *)
203 54 : repalloc(jstate->clocations,
204 54 : jstate->clocations_buf_size *
205 : sizeof(LocationLen));
206 : }
207 82985 : jstate->clocations[jstate->clocations_count].location = location;
208 : /* initialize lengths to -1 to simplify third-party module usage */
209 82985 : jstate->clocations[jstate->clocations_count].length = -1;
210 82985 : jstate->clocations_count++;
211 : }
732 bruce 212 86704 : }
213 :
214 : #define JUMBLE_NODE(item) \
215 : _jumbleNode(jstate, (Node *) expr->item)
216 : #define JUMBLE_LOCATION(location) \
217 : RecordConstLocation(jstate, expr->location)
218 : #define JUMBLE_FIELD(item) \
219 : AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
220 : #define JUMBLE_FIELD_SINGLE(item) \
221 : AppendJumble(jstate, (const unsigned char *) &(item), sizeof(item))
222 : #define JUMBLE_STRING(str) \
223 : do { \
224 : if (expr->str) \
225 : AppendJumble(jstate, (const unsigned char *) (expr->str), strlen(expr->str) + 1); \
226 : } while(0)
227 :
228 : #include "queryjumblefuncs.funcs.c"
229 :
230 : static void
68 michael 231 2547341 : _jumbleNode(JumbleState *jstate, Node *node)
232 : {
233 2547341 : Node *expr = node;
234 :
235 2547341 : if (expr == NULL)
732 bruce 236 1422976 : return;
237 :
238 : /* Guard against stack overflow due to overly complex expressions */
239 1124365 : check_stack_depth();
240 :
241 : /*
242 : * We always emit the node's NodeTag, then any additional fields that are
243 : * considered significant, and then we recurse to any child nodes.
244 : */
68 michael 245 1124365 : JUMBLE_FIELD(type);
246 :
247 1124365 : switch (nodeTag(expr))
248 : {
249 : #include "queryjumblefuncs.switch.c"
250 :
251 278335 : case T_List:
252 : case T_IntList:
253 : case T_OidList:
254 : case T_XidList:
255 278335 : _jumbleList(jstate, expr);
732 bruce 256 278335 : break;
257 :
68 michael 258 UNC 0 : default:
259 : /* Only a warning, since we can stumble along anyway */
260 0 : elog(WARNING, "unrecognized node type: %d",
261 : (int) nodeTag(expr));
732 bruce 262 0 : break;
263 : }
264 :
265 : /* Special cases to handle outside the automated code */
68 michael 266 GNC 1124365 : switch (nodeTag(expr))
267 : {
732 bruce 268 5083 : case T_Param:
269 : {
270 5083 : Param *p = (Param *) node;
271 :
272 : /*
273 : * Update the highest Param id seen, in order to start
274 : * normalization correctly.
275 : */
276 5083 : if (p->paramkind == PARAM_EXTERN &&
277 4773 : p->paramid > jstate->highest_extern_param_id)
278 4010 : jstate->highest_extern_param_id = p->paramid;
279 : }
280 5083 : break;
68 michael 281 1119282 : default:
732 bruce 282 1119282 : break;
283 : }
284 : }
285 :
286 : static void
68 michael 287 278335 : _jumbleList(JumbleState *jstate, Node *node)
288 : {
289 278335 : List *expr = (List *) node;
290 : ListCell *l;
291 :
292 278335 : switch (expr->type)
293 : {
294 277949 : case T_List:
295 760339 : foreach(l, expr)
296 482390 : _jumbleNode(jstate, lfirst(l));
732 bruce 297 277949 : break;
68 michael 298 386 : case T_IntList:
299 865 : foreach(l, expr)
300 479 : JUMBLE_FIELD_SINGLE(lfirst_int(l));
732 bruce 301 386 : break;
68 michael 302 UNC 0 : case T_OidList:
303 0 : foreach(l, expr)
304 0 : JUMBLE_FIELD_SINGLE(lfirst_oid(l));
732 bruce 305 0 : break;
68 michael 306 0 : case T_XidList:
307 0 : foreach(l, expr)
308 0 : JUMBLE_FIELD_SINGLE(lfirst_xid(l));
732 bruce 309 0 : break;
68 michael 310 0 : default:
311 0 : elog(ERROR, "unrecognized list node type: %d",
312 : (int) expr->type);
313 : return;
314 : }
315 : }
316 :
317 : static void
61 michael 318 GNC 7275 : _jumbleA_Const(JumbleState *jstate, Node *node)
319 : {
320 7275 : A_Const *expr = (A_Const *) node;
321 :
322 7275 : JUMBLE_FIELD(isnull);
323 7275 : if (!expr->isnull)
324 : {
325 7202 : JUMBLE_FIELD(val.node.type);
326 7202 : switch (nodeTag(&expr->val))
327 : {
328 3524 : case T_Integer:
329 3524 : JUMBLE_FIELD(val.ival.ival);
330 3524 : break;
331 42 : case T_Float:
332 42 : JUMBLE_STRING(val.fval.fval);
333 42 : break;
334 96 : case T_Boolean:
335 96 : JUMBLE_FIELD(val.boolval.boolval);
336 96 : break;
337 3538 : case T_String:
338 3538 : JUMBLE_STRING(val.sval.sval);
339 3538 : break;
340 2 : case T_BitString:
341 2 : JUMBLE_STRING(val.bsval.bsval);
342 2 : break;
61 michael 343 UNC 0 : default:
344 0 : elog(ERROR, "unrecognized node type: %d",
345 : (int) nodeTag(&expr->val));
346 : break;
347 : }
348 : }
61 michael 349 GNC 7275 : }
350 :
351 : static void
68 352 52382 : _jumbleRangeTblEntry(JumbleState *jstate, Node *node)
353 : {
354 52382 : RangeTblEntry *expr = (RangeTblEntry *) node;
355 :
356 52382 : JUMBLE_FIELD(rtekind);
357 52382 : switch (expr->rtekind)
358 : {
359 37437 : case RTE_RELATION:
360 37437 : JUMBLE_FIELD(relid);
361 37437 : JUMBLE_NODE(tablesample);
362 37437 : JUMBLE_FIELD(inh);
194 alvherre 363 37437 : break;
68 michael 364 4577 : case RTE_SUBQUERY:
365 4577 : JUMBLE_NODE(subquery);
732 bruce 366 4577 : break;
68 michael 367 5717 : case RTE_JOIN:
368 5717 : JUMBLE_FIELD(jointype);
732 bruce 369 5717 : break;
68 michael 370 2915 : case RTE_FUNCTION:
371 2915 : JUMBLE_NODE(functions);
732 bruce 372 2915 : break;
68 michael 373 37 : case RTE_TABLEFUNC:
374 37 : JUMBLE_NODE(tablefunc);
732 bruce 375 37 : break;
68 michael 376 1175 : case RTE_VALUES:
377 1175 : JUMBLE_NODE(values_lists);
732 bruce 378 1175 : break;
68 michael 379 449 : case RTE_CTE:
380 :
381 : /*
382 : * Depending on the CTE name here isn't ideal, but it's the only
383 : * info we have to identify the referenced WITH item.
384 : */
385 449 : JUMBLE_STRING(ctename);
386 449 : JUMBLE_FIELD(ctelevelsup);
732 bruce 387 449 : break;
68 michael 388 75 : case RTE_NAMEDTUPLESTORE:
389 75 : JUMBLE_STRING(enrname);
732 bruce 390 75 : break;
68 michael 391 UNC 0 : case RTE_RESULT:
732 bruce 392 0 : break;
393 0 : default:
68 michael 394 0 : elog(ERROR, "unrecognized RTE kind: %d", (int) expr->rtekind);
395 : break;
396 : }
732 bruce 397 GNC 52382 : }
|