Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * dependencies.c
4 : : * POSTGRES functional dependencies
5 : : *
6 : : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 : : * Portions Copyright (c) 1994, Regents of the University of California
8 : : *
9 : : * IDENTIFICATION
10 : : * src/backend/statistics/dependencies.c
11 : : *
12 : : *-------------------------------------------------------------------------
13 : : */
14 : : #include "postgres.h"
15 : :
16 : : #include "access/htup_details.h"
17 : : #include "catalog/pg_statistic_ext.h"
18 : : #include "catalog/pg_statistic_ext_data.h"
19 : : #include "lib/stringinfo.h"
20 : : #include "nodes/nodeFuncs.h"
21 : : #include "nodes/nodes.h"
22 : : #include "nodes/pathnodes.h"
23 : : #include "optimizer/clauses.h"
24 : : #include "optimizer/optimizer.h"
25 : : #include "parser/parsetree.h"
26 : : #include "statistics/extended_stats_internal.h"
27 : : #include "statistics/statistics.h"
28 : : #include "utils/fmgroids.h"
29 : : #include "utils/fmgrprotos.h"
30 : : #include "utils/lsyscache.h"
31 : : #include "utils/memutils.h"
32 : : #include "utils/selfuncs.h"
33 : : #include "utils/syscache.h"
34 : : #include "utils/typcache.h"
35 : : #include "varatt.h"
36 : :
37 : : /* size of the struct header fields (magic, type, ndeps) */
38 : : #define SizeOfHeader (3 * sizeof(uint32))
39 : :
40 : : /* size of a serialized dependency (degree, natts, atts) */
41 : : #define SizeOfItem(natts) \
42 : : (sizeof(double) + sizeof(AttrNumber) * (1 + (natts)))
43 : :
44 : : /* minimal size of a dependency (with two attributes) */
45 : : #define MinSizeOfItem SizeOfItem(2)
46 : :
47 : : /* minimal size of dependencies, when all deps are minimal */
48 : : #define MinSizeOfItems(ndeps) \
49 : : (SizeOfHeader + (ndeps) * MinSizeOfItem)
50 : :
51 : : /*
52 : : * Internal state for DependencyGenerator of dependencies. Dependencies are similar to
53 : : * k-permutations of n elements, except that the order does not matter for the
54 : : * first (k-1) elements. That is, (a,b=>c) and (b,a=>c) are equivalent.
55 : : */
56 : : typedef struct DependencyGeneratorData
57 : : {
58 : : int k; /* size of the dependency */
59 : : int n; /* number of possible attributes */
60 : : int current; /* next dependency to return (index) */
61 : : AttrNumber ndependencies; /* number of dependencies generated */
62 : : AttrNumber *dependencies; /* array of pre-generated dependencies */
63 : : } DependencyGeneratorData;
64 : :
65 : : typedef DependencyGeneratorData *DependencyGenerator;
66 : :
67 : : static void generate_dependencies_recurse(DependencyGenerator state,
68 : : int index, AttrNumber start, AttrNumber *current);
69 : : static void generate_dependencies(DependencyGenerator state);
70 : : static DependencyGenerator DependencyGenerator_init(int n, int k);
71 : : static void DependencyGenerator_free(DependencyGenerator state);
72 : : static AttrNumber *DependencyGenerator_next(DependencyGenerator state);
73 : : static double dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency);
74 : : static bool dependency_is_fully_matched(MVDependency *dependency,
75 : : Bitmapset *attnums);
76 : : static bool dependency_is_compatible_clause(Node *clause, Index relid,
77 : : AttrNumber *attnum);
78 : : static bool dependency_is_compatible_expression(Node *clause, Index relid,
79 : : List *statlist, Node **expr);
80 : : static MVDependency *find_strongest_dependency(MVDependencies **dependencies,
81 : : int ndependencies, Bitmapset *attnums);
82 : : static Selectivity clauselist_apply_dependencies(PlannerInfo *root, List *clauses,
83 : : int varRelid, JoinType jointype,
84 : : SpecialJoinInfo *sjinfo,
85 : : MVDependency **dependencies,
86 : : int ndependencies,
87 : : AttrNumber *list_attnums,
88 : : Bitmapset **estimatedclauses);
89 : :
90 : : static void
2566 simon@2ndQuadrant.co 91 :CBC 372 : generate_dependencies_recurse(DependencyGenerator state, int index,
92 : : AttrNumber start, AttrNumber *current)
93 : : {
94 : : /*
95 : : * The generator handles the first (k-1) elements differently from the
96 : : * last element.
97 : : */
98 [ + + ]: 372 : if (index < (state->k - 1))
99 : : {
100 : : AttrNumber i;
101 : :
102 : : /*
103 : : * The first (k-1) values have to be in ascending order, which we
104 : : * generate recursively.
105 : : */
106 : :
107 [ + + ]: 444 : for (i = start; i < state->n; i++)
108 : : {
109 : 288 : current[index] = i;
110 : 288 : generate_dependencies_recurse(state, (index + 1), (i + 1), current);
111 : : }
112 : : }
113 : : else
114 : : {
115 : : int i;
116 : :
117 : : /*
118 : : * the last element is the implied value, which does not respect the
119 : : * ascending order. We just need to check that the value is not in the
120 : : * first (k-1) elements.
121 : : */
122 : :
123 [ + + ]: 792 : for (i = 0; i < state->n; i++)
124 : : {
125 : : int j;
126 : 576 : bool match = false;
127 : :
128 : 576 : current[index] = i;
129 : :
130 [ + + ]: 1008 : for (j = 0; j < index; j++)
131 : : {
132 [ + + ]: 720 : if (current[j] == i)
133 : : {
134 : 288 : match = true;
135 : 288 : break;
136 : : }
137 : : }
138 : :
139 : : /*
140 : : * If the value is not found in the first part of the dependency,
141 : : * we're done.
142 : : */
143 [ + + ]: 576 : if (!match)
144 : : {
145 : 576 : state->dependencies = (AttrNumber *) repalloc(state->dependencies,
2489 tgl@sss.pgh.pa.us 146 : 288 : state->k * (state->ndependencies + 1) * sizeof(AttrNumber));
2566 simon@2ndQuadrant.co 147 : 288 : memcpy(&state->dependencies[(state->k * state->ndependencies)],
148 : 288 : current, state->k * sizeof(AttrNumber));
149 : 288 : state->ndependencies++;
150 : : }
151 : : }
152 : : }
153 : 372 : }
154 : :
155 : : /* generate all dependencies (k-permutations of n elements) */
156 : : static void
157 : 84 : generate_dependencies(DependencyGenerator state)
158 : : {
159 : 84 : AttrNumber *current = (AttrNumber *) palloc0(sizeof(AttrNumber) * state->k);
160 : :
161 : 84 : generate_dependencies_recurse(state, 0, 0, current);
162 : :
163 : 84 : pfree(current);
164 : 84 : }
165 : :
166 : : /*
167 : : * initialize the DependencyGenerator of variations, and prebuild the variations
168 : : *
169 : : * This pre-builds all the variations. We could also generate them in
170 : : * DependencyGenerator_next(), but this seems simpler.
171 : : */
172 : : static DependencyGenerator
173 : 84 : DependencyGenerator_init(int n, int k)
174 : : {
175 : : DependencyGenerator state;
176 : :
177 [ + - - + ]: 84 : Assert((n >= k) && (k > 0));
178 : :
179 : : /* allocate the DependencyGenerator state */
180 : 84 : state = (DependencyGenerator) palloc0(sizeof(DependencyGeneratorData));
181 : 84 : state->dependencies = (AttrNumber *) palloc(k * sizeof(AttrNumber));
182 : :
183 : 84 : state->ndependencies = 0;
184 : 84 : state->current = 0;
185 : 84 : state->k = k;
186 : 84 : state->n = n;
187 : :
188 : : /* now actually pre-generate all the variations */
189 : 84 : generate_dependencies(state);
190 : :
191 : 84 : return state;
192 : : }
193 : :
194 : : /* free the DependencyGenerator state */
195 : : static void
196 : 84 : DependencyGenerator_free(DependencyGenerator state)
197 : : {
198 : 84 : pfree(state->dependencies);
199 : 84 : pfree(state);
200 : 84 : }
201 : :
202 : : /* generate next combination */
203 : : static AttrNumber *
204 : 372 : DependencyGenerator_next(DependencyGenerator state)
205 : : {
206 [ + + ]: 372 : if (state->current == state->ndependencies)
207 : 84 : return NULL;
208 : :
209 : 288 : return &state->dependencies[state->k * state->current++];
210 : : }
211 : :
212 : :
213 : : /*
214 : : * validates functional dependency on the data
215 : : *
216 : : * An actual work horse of detecting functional dependencies. Given a variation
217 : : * of k attributes, it checks that the first (k-1) are sufficient to determine
218 : : * the last one.
219 : : */
220 : : static double
1115 tomas.vondra@postgre 221 : 288 : dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency)
222 : : {
223 : : int i,
224 : : nitems;
225 : : MultiSortSupport mss;
226 : : SortItem *items;
227 : : AttrNumber *attnums_dep;
228 : :
229 : : /* counters valid within a group */
2566 simon@2ndQuadrant.co 230 : 288 : int group_size = 0;
231 : 288 : int n_violations = 0;
232 : :
233 : : /* total number of rows supporting (consistent with) the dependency */
234 : 288 : int n_supporting_rows = 0;
235 : :
236 : : /* Make sure we have at least two input attributes. */
237 [ - + ]: 288 : Assert(k >= 2);
238 : :
239 : : /* sort info for all attributes columns */
240 : 288 : mss = multi_sort_init(k);
241 : :
242 : : /*
243 : : * Translate the array of indexes to regular attnums for the dependency
244 : : * (we will need this to identify the columns in StatsBuildData).
245 : : */
1845 tomas.vondra@postgre 246 : 288 : attnums_dep = (AttrNumber *) palloc(k * sizeof(AttrNumber));
247 [ + + ]: 936 : for (i = 0; i < k; i++)
1115 248 : 648 : attnums_dep[i] = data->attnums[dependency[i]];
249 : :
250 : : /*
251 : : * Verify the dependency (a,b,...)->z, using a rather simple algorithm:
252 : : *
253 : : * (a) sort the data lexicographically
254 : : *
255 : : * (b) split the data into groups by first (k-1) columns
256 : : *
257 : : * (c) for each group count different values in the last column
258 : : *
259 : : * We use the column data types' default sort operators and collations;
260 : : * perhaps at some point it'd be worth using column-specific collations?
261 : : */
262 : :
263 : : /* prepare the sort function for the dimensions */
2566 simon@2ndQuadrant.co 264 [ + + ]: 936 : for (i = 0; i < k; i++)
265 : : {
1115 tomas.vondra@postgre 266 : 648 : VacAttrStats *colstat = data->stats[dependency[i]];
267 : : TypeCacheEntry *type;
268 : :
2566 simon@2ndQuadrant.co 269 : 648 : type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
270 [ - + ]: 648 : if (type->lt_opr == InvalidOid) /* shouldn't happen */
2566 simon@2ndQuadrant.co 271 [ # # ]:UBC 0 : elog(ERROR, "cache lookup failed for ordering operator for type %u",
272 : : colstat->attrtypid);
273 : :
274 : : /* prepare the sort function for this dimension */
1732 tomas.vondra@postgre 275 :CBC 648 : multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
276 : : }
277 : :
278 : : /*
279 : : * build an array of SortItem(s) sorted using the multi-sort support
280 : : *
281 : : * XXX This relies on all stats entries pointing to the same tuple
282 : : * descriptor. For now that assumption holds, but it might change in the
283 : : * future for example if we support statistics on multiple tables.
284 : : */
1115 285 : 288 : items = build_sorted_items(data, &nitems, mss, k, attnums_dep);
286 : :
287 : : /*
288 : : * Walk through the sorted array, split it into rows according to the
289 : : * first (k-1) columns. If there's a single value in the last column, we
290 : : * count the group as 'supporting' the functional dependency. Otherwise we
291 : : * count it as contradicting.
292 : : */
293 : :
294 : : /* start with the first row forming a group */
2566 simon@2ndQuadrant.co 295 : 288 : group_size = 1;
296 : :
297 : : /* loop 1 beyond the end of the array so that we count the final group */
1845 tomas.vondra@postgre 298 [ + + ]: 752685 : for (i = 1; i <= nitems; i++)
299 : : {
300 : : /*
301 : : * Check if the group ended, which may be either because we processed
302 : : * all the items (i==nitems), or because the i-th item is not equal to
303 : : * the preceding one.
304 : : */
305 [ + + + + ]: 1504506 : if (i == nitems ||
2489 tgl@sss.pgh.pa.us 306 : 752109 : multi_sort_compare_dims(0, k - 2, &items[i - 1], &items[i], mss) != 0)
307 : : {
308 : : /*
309 : : * If no violations were found in the group then track the rows of
310 : : * the group as supporting the functional dependency.
311 : : */
2566 simon@2ndQuadrant.co 312 [ + + ]: 16107 : if (n_violations == 0)
313 : 9771 : n_supporting_rows += group_size;
314 : :
315 : : /* Reset counters for the new group */
316 : 16107 : n_violations = 0;
317 : 16107 : group_size = 1;
318 : 16107 : continue;
319 : : }
320 : : /* first columns match, but the last one does not (so contradicting) */
321 [ + + ]: 736290 : else if (multi_sort_compare_dim(k - 1, &items[i - 1], &items[i], mss) != 0)
322 : 30042 : n_violations++;
323 : :
324 : 736290 : group_size++;
325 : : }
326 : :
327 : : /* Compute the 'degree of validity' as (supporting/total). */
1115 tomas.vondra@postgre 328 : 288 : return (n_supporting_rows * 1.0 / data->numrows);
329 : : }
330 : :
331 : : /*
332 : : * detects functional dependencies between groups of columns
333 : : *
334 : : * Generates all possible subsets of columns (variations) and computes
335 : : * the degree of validity for each one. For example when creating statistics
336 : : * on three columns (a,b,c) there are 9 possible dependencies
337 : : *
338 : : * two columns three columns
339 : : * ----------- -------------
340 : : * (a) -> b (a,b) -> c
341 : : * (a) -> c (a,c) -> b
342 : : * (b) -> a (b,c) -> a
343 : : * (b) -> c
344 : : * (c) -> a
345 : : * (c) -> b
346 : : */
347 : : MVDependencies *
348 : 60 : statext_dependencies_build(StatsBuildData *data)
349 : : {
350 : : int i,
351 : : k;
352 : :
353 : : /* result */
2566 simon@2ndQuadrant.co 354 : 60 : MVDependencies *dependencies = NULL;
355 : : MemoryContext cxt;
356 : :
1115 tomas.vondra@postgre 357 [ - + ]: 60 : Assert(data->nattnums >= 2);
358 : :
359 : : /* tracks memory allocated by dependency_degree calls */
936 360 : 60 : cxt = AllocSetContextCreate(CurrentMemoryContext,
361 : : "dependency_degree cxt",
362 : : ALLOCSET_DEFAULT_SIZES);
363 : :
364 : : /*
365 : : * We'll try build functional dependencies starting from the smallest ones
366 : : * covering just 2 columns, to the largest ones, covering all columns
367 : : * included in the statistics object. We start from the smallest ones
368 : : * because we want to be able to skip already implied ones.
369 : : */
1115 370 [ + + ]: 144 : for (k = 2; k <= data->nattnums; k++)
371 : : {
372 : : AttrNumber *dependency; /* array with k elements */
373 : :
374 : : /* prepare a DependencyGenerator of variation */
375 : 84 : DependencyGenerator DependencyGenerator = DependencyGenerator_init(data->nattnums, k);
376 : :
377 : : /* generate all possible variations of k values (out of n) */
2566 simon@2ndQuadrant.co 378 [ + + ]: 372 : while ((dependency = DependencyGenerator_next(DependencyGenerator)))
379 : : {
380 : : double degree;
381 : : MVDependency *d;
382 : : MemoryContext oldcxt;
383 : :
384 : : /* release memory used by dependency degree calculation */
936 tomas.vondra@postgre 385 : 288 : oldcxt = MemoryContextSwitchTo(cxt);
386 : :
387 : : /* compute how valid the dependency seems */
1115 388 : 288 : degree = dependency_degree(data, k, dependency);
389 : :
936 390 : 288 : MemoryContextSwitchTo(oldcxt);
391 : 288 : MemoryContextReset(cxt);
392 : :
393 : : /*
394 : : * if the dependency seems entirely invalid, don't store it
395 : : */
2566 simon@2ndQuadrant.co 396 [ + + ]: 288 : if (degree == 0.0)
397 : 126 : continue;
398 : :
399 : 162 : d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
2489 tgl@sss.pgh.pa.us 400 : 162 : + k * sizeof(AttrNumber));
401 : :
402 : : /* copy the dependency (and keep the indexes into stxkeys) */
2566 simon@2ndQuadrant.co 403 : 162 : d->degree = degree;
404 : 162 : d->nattributes = k;
405 [ + + ]: 522 : for (i = 0; i < k; i++)
1115 tomas.vondra@postgre 406 : 360 : d->attributes[i] = data->attnums[dependency[i]];
407 : :
408 : : /* initialize the list of dependencies */
2566 simon@2ndQuadrant.co 409 [ + + ]: 162 : if (dependencies == NULL)
410 : : {
411 : : dependencies
412 : 51 : = (MVDependencies *) palloc0(sizeof(MVDependencies));
413 : :
414 : 51 : dependencies->magic = STATS_DEPS_MAGIC;
415 : 51 : dependencies->type = STATS_DEPS_TYPE_BASIC;
416 : 51 : dependencies->ndeps = 0;
417 : : }
418 : :
419 : 162 : dependencies->ndeps++;
420 : 162 : dependencies = (MVDependencies *) repalloc(dependencies,
421 : : offsetof(MVDependencies, deps)
1820 tomas.vondra@postgre 422 : 162 : + dependencies->ndeps * sizeof(MVDependency *));
423 : :
2566 simon@2ndQuadrant.co 424 : 162 : dependencies->deps[dependencies->ndeps - 1] = d;
425 : : }
426 : :
427 : : /*
428 : : * we're done with variations of k elements, so free the
429 : : * DependencyGenerator
430 : : */
431 : 84 : DependencyGenerator_free(DependencyGenerator);
432 : : }
433 : :
936 tomas.vondra@postgre 434 : 60 : MemoryContextDelete(cxt);
435 : :
2566 simon@2ndQuadrant.co 436 : 60 : return dependencies;
437 : : }
438 : :
439 : :
440 : : /*
441 : : * Serialize list of dependencies into a bytea value.
442 : : */
443 : : bytea *
2524 bruce@momjian.us 444 : 51 : statext_dependencies_serialize(MVDependencies *dependencies)
445 : : {
446 : : int i;
447 : : bytea *output;
448 : : char *tmp;
449 : : Size len;
450 : :
451 : : /* we need to store ndeps, with a number of attributes for each one */
1820 tomas.vondra@postgre 452 : 51 : len = VARHDRSZ + SizeOfHeader;
453 : :
454 : : /* and also include space for the actual attribute numbers and degrees */
2566 simon@2ndQuadrant.co 455 [ + + ]: 213 : for (i = 0; i < dependencies->ndeps; i++)
1820 tomas.vondra@postgre 456 : 162 : len += SizeOfItem(dependencies->deps[i]->nattributes);
457 : :
2566 simon@2ndQuadrant.co 458 : 51 : output = (bytea *) palloc0(len);
459 : 51 : SET_VARSIZE(output, len);
460 : :
461 : 51 : tmp = VARDATA(output);
462 : :
463 : : /* Store the base struct values (magic, type, ndeps) */
464 : 51 : memcpy(tmp, &dependencies->magic, sizeof(uint32));
465 : 51 : tmp += sizeof(uint32);
466 : 51 : memcpy(tmp, &dependencies->type, sizeof(uint32));
467 : 51 : tmp += sizeof(uint32);
468 : 51 : memcpy(tmp, &dependencies->ndeps, sizeof(uint32));
469 : 51 : tmp += sizeof(uint32);
470 : :
471 : : /* store number of attributes and attribute numbers for each dependency */
472 [ + + ]: 213 : for (i = 0; i < dependencies->ndeps; i++)
473 : : {
474 : 162 : MVDependency *d = dependencies->deps[i];
475 : :
1820 tomas.vondra@postgre 476 : 162 : memcpy(tmp, &d->degree, sizeof(double));
477 : 162 : tmp += sizeof(double);
478 : :
479 : 162 : memcpy(tmp, &d->nattributes, sizeof(AttrNumber));
480 : 162 : tmp += sizeof(AttrNumber);
481 : :
2566 simon@2ndQuadrant.co 482 : 162 : memcpy(tmp, d->attributes, sizeof(AttrNumber) * d->nattributes);
483 : 162 : tmp += sizeof(AttrNumber) * d->nattributes;
484 : :
485 : : /* protect against overflow */
486 [ - + ]: 162 : Assert(tmp <= ((char *) output + len));
487 : : }
488 : :
489 : : /* make sure we've produced exactly the right amount of data */
1820 tomas.vondra@postgre 490 [ - + ]: 51 : Assert(tmp == ((char *) output + len));
491 : :
2566 simon@2ndQuadrant.co 492 : 51 : return output;
493 : : }
494 : :
495 : : /*
496 : : * Reads serialized dependencies into MVDependencies structure.
497 : : */
498 : : MVDependencies *
499 : 558 : statext_dependencies_deserialize(bytea *data)
500 : : {
501 : : int i;
502 : : Size min_expected_size;
503 : : MVDependencies *dependencies;
504 : : char *tmp;
505 : :
506 [ - + ]: 558 : if (data == NULL)
2566 simon@2ndQuadrant.co 507 :UBC 0 : return NULL;
508 : :
1820 tomas.vondra@postgre 509 [ - + - - :CBC 558 : if (VARSIZE_ANY_EXHDR(data) < SizeOfHeader)
- - - - +
- - + ]
415 peter@eisentraut.org 510 [ # # # # :UBC 0 : elog(ERROR, "invalid MVDependencies size %zu (expected at least %zu)",
# # # # #
# # # ]
511 : : VARSIZE_ANY_EXHDR(data), SizeOfHeader);
512 : :
513 : : /* read the MVDependencies header */
2566 simon@2ndQuadrant.co 514 :CBC 558 : dependencies = (MVDependencies *) palloc0(sizeof(MVDependencies));
515 : :
516 : : /* initialize pointer to the data part (skip the varlena header) */
517 [ + - ]: 558 : tmp = VARDATA_ANY(data);
518 : :
519 : : /* read the header fields and perform basic sanity checks */
520 : 558 : memcpy(&dependencies->magic, tmp, sizeof(uint32));
521 : 558 : tmp += sizeof(uint32);
522 : 558 : memcpy(&dependencies->type, tmp, sizeof(uint32));
523 : 558 : tmp += sizeof(uint32);
524 : 558 : memcpy(&dependencies->ndeps, tmp, sizeof(uint32));
525 : 558 : tmp += sizeof(uint32);
526 : :
527 [ - + ]: 558 : if (dependencies->magic != STATS_DEPS_MAGIC)
2566 simon@2ndQuadrant.co 528 [ # # ]:UBC 0 : elog(ERROR, "invalid dependency magic %d (expected %d)",
529 : : dependencies->magic, STATS_DEPS_MAGIC);
530 : :
2566 simon@2ndQuadrant.co 531 [ - + ]:CBC 558 : if (dependencies->type != STATS_DEPS_TYPE_BASIC)
2566 simon@2ndQuadrant.co 532 [ # # ]:UBC 0 : elog(ERROR, "invalid dependency type %d (expected %d)",
533 : : dependencies->type, STATS_DEPS_TYPE_BASIC);
534 : :
2566 simon@2ndQuadrant.co 535 [ - + ]:CBC 558 : if (dependencies->ndeps == 0)
1781 tomas.vondra@postgre 536 [ # # ]:UBC 0 : elog(ERROR, "invalid zero-length item array in MVDependencies");
537 : :
538 : : /* what minimum bytea size do we expect for those parameters */
1820 tomas.vondra@postgre 539 :CBC 558 : min_expected_size = SizeOfItem(dependencies->ndeps);
540 : :
2566 simon@2ndQuadrant.co 541 [ - + - - : 558 : if (VARSIZE_ANY_EXHDR(data) < min_expected_size)
- - - - +
- - + ]
415 peter@eisentraut.org 542 [ # # # # :UBC 0 : elog(ERROR, "invalid dependencies size %zu (expected at least %zu)",
# # # # #
# # # ]
543 : : VARSIZE_ANY_EXHDR(data), min_expected_size);
544 : :
545 : : /* allocate space for the MCV items */
2566 simon@2ndQuadrant.co 546 :CBC 558 : dependencies = repalloc(dependencies, offsetof(MVDependencies, deps)
2489 tgl@sss.pgh.pa.us 547 : 558 : + (dependencies->ndeps * sizeof(MVDependency *)));
548 : :
2566 simon@2ndQuadrant.co 549 [ + + ]: 3285 : for (i = 0; i < dependencies->ndeps; i++)
550 : : {
551 : : double degree;
552 : : AttrNumber k;
553 : : MVDependency *d;
554 : :
555 : : /* degree of validity */
556 : 2727 : memcpy(°ree, tmp, sizeof(double));
557 : 2727 : tmp += sizeof(double);
558 : :
559 : : /* number of attributes */
560 : 2727 : memcpy(&k, tmp, sizeof(AttrNumber));
561 : 2727 : tmp += sizeof(AttrNumber);
562 : :
563 : : /* is the number of attributes valid? */
564 [ + - - + ]: 2727 : Assert((k >= 2) && (k <= STATS_MAX_DIMENSIONS));
565 : :
566 : : /* now that we know the number of attributes, allocate the dependency */
567 : 2727 : d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
2489 tgl@sss.pgh.pa.us 568 : 2727 : + (k * sizeof(AttrNumber)));
569 : :
2566 simon@2ndQuadrant.co 570 : 2727 : d->degree = degree;
571 : 2727 : d->nattributes = k;
572 : :
573 : : /* copy attribute numbers */
574 : 2727 : memcpy(d->attributes, tmp, sizeof(AttrNumber) * d->nattributes);
575 : 2727 : tmp += sizeof(AttrNumber) * d->nattributes;
576 : :
577 : 2727 : dependencies->deps[i] = d;
578 : :
579 : : /* still within the bytea */
580 [ - + - - : 2727 : Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
- - - - +
- - + ]
581 : : }
582 : :
583 : : /* we should have consumed the whole bytea exactly */
584 [ - + - - : 558 : Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
- - - - +
- - + ]
585 : :
586 : 558 : return dependencies;
587 : : }
588 : :
589 : : /*
590 : : * dependency_is_fully_matched
591 : : * checks that a functional dependency is fully matched given clauses on
592 : : * attributes (assuming the clauses are suitable equality clauses)
593 : : */
594 : : static bool
2524 bruce@momjian.us 595 : 2322 : dependency_is_fully_matched(MVDependency *dependency, Bitmapset *attnums)
596 : : {
597 : : int j;
598 : :
599 : : /*
600 : : * Check that the dependency actually is fully covered by clauses. We have
601 : : * to translate all attribute numbers, as those are referenced
602 : : */
2566 simon@2ndQuadrant.co 603 [ + + ]: 6057 : for (j = 0; j < dependency->nattributes; j++)
604 : : {
605 : 4767 : int attnum = dependency->attributes[j];
606 : :
607 [ + + ]: 4767 : if (!bms_is_member(attnum, attnums))
608 : 1032 : return false;
609 : : }
610 : :
611 : 1290 : return true;
612 : : }
613 : :
614 : : /*
615 : : * statext_dependencies_load
616 : : * Load the functional dependencies for the indicated pg_statistic_ext tuple
617 : : */
618 : : MVDependencies *
819 tomas.vondra@postgre 619 : 552 : statext_dependencies_load(Oid mvoid, bool inh)
620 : : {
621 : : MVDependencies *result;
622 : : bool isnull;
623 : : Datum deps;
624 : : HeapTuple htup;
625 : :
626 : 552 : htup = SearchSysCache2(STATEXTDATASTXOID,
627 : : ObjectIdGetDatum(mvoid),
628 : : BoolGetDatum(inh));
2566 simon@2ndQuadrant.co 629 [ - + ]: 552 : if (!HeapTupleIsValid(htup))
2527 tgl@sss.pgh.pa.us 630 [ # # ]:UBC 0 : elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
631 : :
1767 tomas.vondra@postgre 632 :CBC 552 : deps = SysCacheGetAttr(STATEXTDATASTXOID, htup,
633 : : Anum_pg_statistic_ext_data_stxddependencies, &isnull);
2174 tgl@sss.pgh.pa.us 634 [ - + ]: 552 : if (isnull)
2174 tgl@sss.pgh.pa.us 635 [ # # ]:UBC 0 : elog(ERROR,
636 : : "requested statistics kind \"%c\" is not yet built for statistics object %u",
637 : : STATS_EXT_DEPENDENCIES, mvoid);
638 : :
2174 tgl@sss.pgh.pa.us 639 :CBC 552 : result = statext_dependencies_deserialize(DatumGetByteaPP(deps));
640 : :
2566 simon@2ndQuadrant.co 641 : 552 : ReleaseSysCache(htup);
642 : :
2174 tgl@sss.pgh.pa.us 643 : 552 : return result;
644 : : }
645 : :
646 : : /*
647 : : * pg_dependencies_in - input routine for type pg_dependencies.
648 : : *
649 : : * pg_dependencies is real enough to be a table column, but it has no operations
650 : : * of its own, and disallows input too
651 : : */
652 : : Datum
2566 simon@2ndQuadrant.co 653 :UBC 0 : pg_dependencies_in(PG_FUNCTION_ARGS)
654 : : {
655 : : /*
656 : : * pg_node_list stores the data in binary form and parsing text input is
657 : : * not needed, so disallow this.
658 : : */
659 [ # # ]: 0 : ereport(ERROR,
660 : : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
661 : : errmsg("cannot accept a value of type %s", "pg_dependencies")));
662 : :
663 : : PG_RETURN_VOID(); /* keep compiler quiet */
664 : : }
665 : :
666 : : /*
667 : : * pg_dependencies - output routine for type pg_dependencies.
668 : : */
669 : : Datum
2566 simon@2ndQuadrant.co 670 :CBC 6 : pg_dependencies_out(PG_FUNCTION_ARGS)
671 : : {
2539 alvherre@alvh.no-ip. 672 : 6 : bytea *data = PG_GETARG_BYTEA_PP(0);
673 : 6 : MVDependencies *dependencies = statext_dependencies_deserialize(data);
674 : : int i,
675 : : j;
676 : : StringInfoData str;
677 : :
2566 simon@2ndQuadrant.co 678 : 6 : initStringInfo(&str);
2539 alvherre@alvh.no-ip. 679 : 6 : appendStringInfoChar(&str, '{');
680 : :
2566 simon@2ndQuadrant.co 681 [ + + ]: 36 : for (i = 0; i < dependencies->ndeps; i++)
682 : : {
683 : 30 : MVDependency *dependency = dependencies->deps[i];
684 : :
685 [ + + ]: 30 : if (i > 0)
686 : 24 : appendStringInfoString(&str, ", ");
687 : :
2539 alvherre@alvh.no-ip. 688 : 30 : appendStringInfoChar(&str, '"');
2566 simon@2ndQuadrant.co 689 [ + + ]: 102 : for (j = 0; j < dependency->nattributes; j++)
690 : : {
691 [ + + ]: 72 : if (j == dependency->nattributes - 1)
692 : 30 : appendStringInfoString(&str, " => ");
693 [ + + ]: 42 : else if (j > 0)
694 : 12 : appendStringInfoString(&str, ", ");
695 : :
696 : 72 : appendStringInfo(&str, "%d", dependency->attributes[j]);
697 : : }
2539 alvherre@alvh.no-ip. 698 : 30 : appendStringInfo(&str, "\": %f", dependency->degree);
699 : : }
700 : :
701 : 6 : appendStringInfoChar(&str, '}');
702 : :
2566 simon@2ndQuadrant.co 703 : 6 : PG_RETURN_CSTRING(str.data);
704 : : }
705 : :
706 : : /*
707 : : * pg_dependencies_recv - binary input routine for type pg_dependencies.
708 : : */
709 : : Datum
2566 simon@2ndQuadrant.co 710 :UBC 0 : pg_dependencies_recv(PG_FUNCTION_ARGS)
711 : : {
712 [ # # ]: 0 : ereport(ERROR,
713 : : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
714 : : errmsg("cannot accept a value of type %s", "pg_dependencies")));
715 : :
716 : : PG_RETURN_VOID(); /* keep compiler quiet */
717 : : }
718 : :
719 : : /*
720 : : * pg_dependencies_send - binary output routine for type pg_dependencies.
721 : : *
722 : : * Functional dependencies are serialized in a bytea value (although the type
723 : : * is named differently), so let's just send that.
724 : : */
725 : : Datum
726 : 0 : pg_dependencies_send(PG_FUNCTION_ARGS)
727 : : {
728 : 0 : return byteasend(fcinfo);
729 : : }
730 : :
731 : : /*
732 : : * dependency_is_compatible_clause
733 : : * Determines if the clause is compatible with functional dependencies
734 : : *
735 : : * Only clauses that have the form of equality to a pseudoconstant, or can be
736 : : * interpreted that way, are currently accepted. Furthermore the variable
737 : : * part of the clause must be a simple Var belonging to the specified
738 : : * relation, whose attribute number we return in *attnum on success.
739 : : */
740 : : static bool
2566 simon@2ndQuadrant.co 741 :CBC 1476 : dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
742 : : {
743 : : Var *var;
744 : : Node *clause_expr;
745 : :
1488 tomas.vondra@postgre 746 [ + + ]: 1476 : if (IsA(clause, RestrictInfo))
747 : : {
748 : 1416 : RestrictInfo *rinfo = (RestrictInfo *) clause;
749 : :
750 : : /* Pseudoconstants are not interesting (they couldn't contain a Var) */
751 [ + + ]: 1416 : if (rinfo->pseudoconstant)
752 : 3 : return false;
753 : :
754 : : /* Clauses referencing multiple, or no, varnos are incompatible */
755 [ - + ]: 1413 : if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
1488 tomas.vondra@postgre 756 :UBC 0 : return false;
757 : :
1488 tomas.vondra@postgre 758 :CBC 1413 : clause = (Node *) rinfo->clause;
759 : : }
760 : :
761 [ + + ]: 1473 : if (is_opclause(clause))
762 : : {
763 : : /* If it's an opclause, check for Var = Const or Const = Var. */
764 : 567 : OpExpr *expr = (OpExpr *) clause;
765 : :
766 : : /* Only expressions with two arguments are candidates. */
2566 simon@2ndQuadrant.co 767 [ - + ]: 567 : if (list_length(expr->args) != 2)
2566 simon@2ndQuadrant.co 768 :UBC 0 : return false;
769 : :
770 : : /* Make sure non-selected argument is a pseudoconstant. */
2323 tgl@sss.pgh.pa.us 771 [ + - ]:CBC 567 : if (is_pseudo_constant_clause(lsecond(expr->args)))
1115 tomas.vondra@postgre 772 : 567 : clause_expr = linitial(expr->args);
2323 tgl@sss.pgh.pa.us 773 [ # # ]:UBC 0 : else if (is_pseudo_constant_clause(linitial(expr->args)))
1115 tomas.vondra@postgre 774 : 0 : clause_expr = lsecond(expr->args);
775 : : else
2566 simon@2ndQuadrant.co 776 : 0 : return false;
777 : :
778 : : /*
779 : : * If it's not an "=" operator, just ignore the clause, as it's not
780 : : * compatible with functional dependencies.
781 : : *
782 : : * This uses the function for estimating selectivity, not the operator
783 : : * directly (a bit awkward, but well ...).
784 : : *
785 : : * XXX this is pretty dubious; probably it'd be better to check btree
786 : : * or hash opclass membership, so as not to be fooled by custom
787 : : * selectivity functions, and to be more consistent with decisions
788 : : * elsewhere in the planner.
789 : : */
2566 simon@2ndQuadrant.co 790 [ + + ]:CBC 567 : if (get_oprrest(expr->opno) != F_EQSEL)
791 : 18 : return false;
792 : :
793 : : /* OK to proceed with checking "var" */
794 : : }
1488 tomas.vondra@postgre 795 [ + + ]: 906 : else if (IsA(clause, ScalarArrayOpExpr))
796 : : {
797 : : /* If it's an scalar array operator, check for Var IN Const. */
1431 tgl@sss.pgh.pa.us 798 : 870 : ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
799 : :
800 : : /*
801 : : * Reject ALL() variant, we only care about ANY/IN.
802 : : *
803 : : * XXX Maybe we should check if all the values are the same, and allow
804 : : * ALL in that case? Doesn't seem very practical, though.
805 : : */
1492 tomas.vondra@postgre 806 [ + + ]: 870 : if (!expr->useOr)
807 : 18 : return false;
808 : :
809 : : /* Only expressions with two arguments are candidates. */
810 [ - + ]: 852 : if (list_length(expr->args) != 2)
1492 tomas.vondra@postgre 811 :UBC 0 : return false;
812 : :
813 : : /*
814 : : * We know it's always (Var IN Const), so we assume the var is the
815 : : * first argument, and pseudoconstant is the second one.
816 : : */
1492 tomas.vondra@postgre 817 [ - + ]:CBC 852 : if (!is_pseudo_constant_clause(lsecond(expr->args)))
1492 tomas.vondra@postgre 818 :UBC 0 : return false;
819 : :
1115 tomas.vondra@postgre 820 :CBC 852 : clause_expr = linitial(expr->args);
821 : :
822 : : /*
823 : : * If it's not an "=" operator, just ignore the clause, as it's not
824 : : * compatible with functional dependencies. The operator is identified
825 : : * simply by looking at which function it uses to estimate
826 : : * selectivity. That's a bit strange, but it's what other similar
827 : : * places do.
828 : : */
1492 829 [ + + ]: 852 : if (get_oprrest(expr->opno) != F_EQSEL)
830 : 90 : return false;
831 : :
832 : : /* OK to proceed with checking "var" */
833 : : }
1488 834 [ + - ]: 36 : else if (is_orclause(clause))
835 : : {
1115 836 : 36 : BoolExpr *bool_expr = (BoolExpr *) clause;
837 : : ListCell *lc;
838 : :
839 : : /* start with no attribute number */
1488 840 : 36 : *attnum = InvalidAttrNumber;
841 : :
1115 842 [ + - + + : 75 : foreach(lc, bool_expr->args)
+ + ]
843 : : {
844 : : AttrNumber clause_attnum;
845 : :
846 : : /*
847 : : * Had we found incompatible clause in the arguments, treat the
848 : : * whole clause as incompatible.
849 : : */
1488 850 [ + + ]: 60 : if (!dependency_is_compatible_clause((Node *) lfirst(lc),
851 : : relid, &clause_attnum))
852 : 21 : return false;
853 : :
854 [ + + ]: 42 : if (*attnum == InvalidAttrNumber)
855 : 18 : *attnum = clause_attnum;
856 : :
857 : : /* ensure all the variables are the same (same attnum) */
858 [ + + ]: 42 : if (*attnum != clause_attnum)
859 : 3 : return false;
860 : : }
861 : :
862 : : /* the Var is already checked by the recursive call */
863 : 15 : return true;
864 : : }
1488 tomas.vondra@postgre 865 [ # # ]:UBC 0 : else if (is_notclause(clause))
866 : : {
867 : : /*
868 : : * "NOT x" can be interpreted as "x = false", so get the argument and
869 : : * proceed with seeing if it's a suitable Var.
870 : : */
1115 871 : 0 : clause_expr = (Node *) get_notclausearg(clause);
872 : : }
873 : : else
874 : : {
875 : : /*
876 : : * A boolean expression "x" can be interpreted as "x = true", so
877 : : * proceed with seeing if it's a suitable Var.
878 : : */
879 : 0 : clause_expr = (Node *) clause;
880 : : }
881 : :
882 : : /*
883 : : * We may ignore any RelabelType node above the operand. (There won't be
884 : : * more than one, since eval_const_expressions has been applied already.)
885 : : */
1115 tomas.vondra@postgre 886 [ - + ]:CBC 1311 : if (IsA(clause_expr, RelabelType))
1115 tomas.vondra@postgre 887 :UBC 0 : clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
888 : :
889 : : /* We only support plain Vars for now */
1115 tomas.vondra@postgre 890 [ + + ]:CBC 1311 : if (!IsA(clause_expr, Var))
2323 tgl@sss.pgh.pa.us 891 : 144 : return false;
892 : :
893 : : /* OK, we know we have a Var */
1115 tomas.vondra@postgre 894 : 1167 : var = (Var *) clause_expr;
895 : :
896 : : /* Ensure Var is from the correct relation */
2323 tgl@sss.pgh.pa.us 897 [ - + ]: 1167 : if (var->varno != relid)
2323 tgl@sss.pgh.pa.us 898 :UBC 0 : return false;
899 : :
900 : : /* We also better ensure the Var is from the current level */
2323 tgl@sss.pgh.pa.us 901 [ - + ]:CBC 1167 : if (var->varlevelsup != 0)
2323 tgl@sss.pgh.pa.us 902 :UBC 0 : return false;
903 : :
904 : : /* Also ignore system attributes (we don't allow stats on those) */
2323 tgl@sss.pgh.pa.us 905 [ - + ]:CBC 1167 : if (!AttrNumberIsForUserDefinedAttr(var->varattno))
2323 tgl@sss.pgh.pa.us 906 :UBC 0 : return false;
907 : :
2323 tgl@sss.pgh.pa.us 908 :CBC 1167 : *attnum = var->varattno;
909 : 1167 : return true;
910 : : }
911 : :
912 : : /*
913 : : * find_strongest_dependency
914 : : * find the strongest dependency on the attributes
915 : : *
916 : : * When applying functional dependencies, we start with the strongest
917 : : * dependencies. That is, we select the dependency that:
918 : : *
919 : : * (a) has all attributes covered by equality clauses
920 : : *
921 : : * (b) has the most attributes
922 : : *
923 : : * (c) has the highest degree of validity
924 : : *
925 : : * This guarantees that we eliminate the most redundant conditions first
926 : : * (see the comment in dependencies_clauselist_selectivity).
927 : : */
928 : : static MVDependency *
1553 tomas.vondra@postgre 929 : 1239 : find_strongest_dependency(MVDependencies **dependencies, int ndependencies,
930 : : Bitmapset *attnums)
931 : : {
932 : : int i,
933 : : j;
2566 simon@2ndQuadrant.co 934 : 1239 : MVDependency *strongest = NULL;
935 : :
936 : : /* number of attnums in clauses */
937 : 1239 : int nattnums = bms_num_members(attnums);
938 : :
939 : : /*
940 : : * Iterate over the MVDependency items and find the strongest one from the
941 : : * fully-matched dependencies. We do the cheap checks first, before
942 : : * matching it against the attnums.
943 : : */
1553 tomas.vondra@postgre 944 [ + + ]: 2496 : for (i = 0; i < ndependencies; i++)
945 : : {
946 [ + + ]: 7116 : for (j = 0; j < dependencies[i]->ndeps; j++)
947 : : {
948 : 5859 : MVDependency *dependency = dependencies[i]->deps[j];
949 : :
950 : : /*
951 : : * Skip dependencies referencing more attributes than available
952 : : * clauses, as those can't be fully matched.
953 : : */
954 [ + + ]: 5859 : if (dependency->nattributes > nattnums)
2566 simon@2ndQuadrant.co 955 : 3537 : continue;
956 : :
1553 tomas.vondra@postgre 957 [ + + ]: 2322 : if (strongest)
958 : : {
959 : : /* skip dependencies on fewer attributes than the strongest. */
960 [ - + ]: 1464 : if (dependency->nattributes < strongest->nattributes)
1553 tomas.vondra@postgre 961 :UBC 0 : continue;
962 : :
963 : : /* also skip weaker dependencies when attribute count matches */
1553 tomas.vondra@postgre 964 [ + + ]:CBC 1464 : if (strongest->nattributes == dependency->nattributes &&
965 [ - + ]: 1323 : strongest->degree > dependency->degree)
1553 tomas.vondra@postgre 966 :UBC 0 : continue;
967 : : }
968 : :
969 : : /*
970 : : * this dependency is stronger, but we must still check that it's
971 : : * fully matched to these attnums. We perform this check last as
972 : : * it's slightly more expensive than the previous checks.
973 : : */
1553 tomas.vondra@postgre 974 [ + + ]:CBC 2322 : if (dependency_is_fully_matched(dependency, attnums))
975 : 1290 : strongest = dependency; /* save new best match */
976 : : }
977 : : }
978 : :
2566 simon@2ndQuadrant.co 979 : 1239 : return strongest;
980 : : }
981 : :
982 : : /*
983 : : * clauselist_apply_dependencies
984 : : * Apply the specified functional dependencies to a list of clauses and
985 : : * return the estimated selectivity of the clauses that are compatible
986 : : * with any of the given dependencies.
987 : : *
988 : : * This will estimate all not-already-estimated clauses that are compatible
989 : : * with functional dependencies, and which have an attribute mentioned by any
990 : : * of the given dependencies (either as an implying or implied attribute).
991 : : *
992 : : * Given (lists of) clauses on attributes (a,b) and a functional dependency
993 : : * (a=>b), the per-column selectivities P(a) and P(b) are notionally combined
994 : : * using the formula
995 : : *
996 : : * P(a,b) = f * P(a) + (1-f) * P(a) * P(b)
997 : : *
998 : : * where 'f' is the degree of dependency. This reflects the fact that we
999 : : * expect a fraction f of all rows to be consistent with the dependency
1000 : : * (a=>b), and so have a selectivity of P(a), while the remaining rows are
1001 : : * treated as independent.
1002 : : *
1003 : : * In practice, we use a slightly modified version of this formula, which uses
1004 : : * a selectivity of Min(P(a), P(b)) for the dependent rows, since the result
1005 : : * should obviously not exceed either column's individual selectivity. I.e.,
1006 : : * we actually combine selectivities using the formula
1007 : : *
1008 : : * P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b)
1009 : : *
1010 : : * This can make quite a difference if the specific values matching the
1011 : : * clauses are not consistent with the functional dependency.
1012 : : */
1013 : : static Selectivity
1478 dean.a.rasheed@gmail 1014 : 546 : clauselist_apply_dependencies(PlannerInfo *root, List *clauses,
1015 : : int varRelid, JoinType jointype,
1016 : : SpecialJoinInfo *sjinfo,
1017 : : MVDependency **dependencies, int ndependencies,
1018 : : AttrNumber *list_attnums,
1019 : : Bitmapset **estimatedclauses)
1020 : : {
1021 : : Bitmapset *attnums;
1022 : : int i;
1023 : : int j;
1024 : : int nattrs;
1025 : : Selectivity *attr_sel;
1026 : : int attidx;
1027 : : int listidx;
1028 : : ListCell *l;
1029 : : Selectivity s1;
1030 : :
1031 : : /*
1032 : : * Extract the attnums of all implying and implied attributes from all the
1033 : : * given dependencies. Each of these attributes is expected to have at
1034 : : * least 1 not-already-estimated compatible clause that we will estimate
1035 : : * here.
1036 : : */
1037 : 546 : attnums = NULL;
1038 [ + + ]: 1239 : for (i = 0; i < ndependencies; i++)
1039 : : {
1040 [ + + ]: 2220 : for (j = 0; j < dependencies[i]->nattributes; j++)
1041 : : {
1042 : 1527 : AttrNumber attnum = dependencies[i]->attributes[j];
1043 : :
1044 : 1527 : attnums = bms_add_member(attnums, attnum);
1045 : : }
1046 : : }
1047 : :
1048 : : /*
1049 : : * Compute per-column selectivity estimates for each of these attributes,
1050 : : * and mark all the corresponding clauses as estimated.
1051 : : */
1052 : 546 : nattrs = bms_num_members(attnums);
1053 : 546 : attr_sel = (Selectivity *) palloc(sizeof(Selectivity) * nattrs);
1054 : :
1055 : 546 : attidx = 0;
1056 : 546 : i = -1;
1057 [ + + ]: 1791 : while ((i = bms_next_member(attnums, i)) >= 0)
1058 : : {
1059 : 1245 : List *attr_clauses = NIL;
1060 : : Selectivity simple_sel;
1061 : :
1062 : 1245 : listidx = -1;
1063 [ + - + + : 4206 : foreach(l, clauses)
+ + ]
1064 : : {
1065 : 2961 : Node *clause = (Node *) lfirst(l);
1066 : :
1067 : 2961 : listidx++;
1068 [ + + ]: 2961 : if (list_attnums[listidx] == i)
1069 : : {
1070 : 1245 : attr_clauses = lappend(attr_clauses, clause);
1071 : 1245 : *estimatedclauses = bms_add_member(*estimatedclauses, listidx);
1072 : : }
1073 : : }
1074 : :
1228 1075 : 1245 : simple_sel = clauselist_selectivity_ext(root, attr_clauses, varRelid,
1076 : : jointype, sjinfo, false);
1478 1077 : 1245 : attr_sel[attidx++] = simple_sel;
1078 : : }
1079 : :
1080 : : /*
1081 : : * Now combine these selectivities using the dependency information. For
1082 : : * chains of dependencies such as a -> b -> c, the b -> c dependency will
1083 : : * come before the a -> b dependency in the array, so we traverse the
1084 : : * array backwards to ensure such chains are computed in the right order.
1085 : : *
1086 : : * As explained above, pairs of selectivities are combined using the
1087 : : * formula
1088 : : *
1089 : : * P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b)
1090 : : *
1091 : : * to ensure that the combined selectivity is never greater than either
1092 : : * individual selectivity.
1093 : : *
1094 : : * Where multiple dependencies apply (e.g., a -> b -> c), we use
1095 : : * conditional probabilities to compute the overall result as follows:
1096 : : *
1097 : : * P(a,b,c) = P(c|a,b) * P(a,b) = P(c|a,b) * P(b|a) * P(a)
1098 : : *
1099 : : * so we replace the selectivities of all implied attributes with
1100 : : * conditional probabilities, that are conditional on all their implying
1101 : : * attributes. The selectivities of all other non-implied attributes are
1102 : : * left as they are.
1103 : : */
1104 [ + + ]: 1239 : for (i = ndependencies - 1; i >= 0; i--)
1105 : : {
1106 : 693 : MVDependency *dependency = dependencies[i];
1107 : : AttrNumber attnum;
1108 : : Selectivity s2;
1109 : : double f;
1110 : :
1111 : : /* Selectivity of all the implying attributes */
1112 : 693 : s1 = 1.0;
1113 [ + + ]: 1527 : for (j = 0; j < dependency->nattributes - 1; j++)
1114 : : {
1115 : 834 : attnum = dependency->attributes[j];
1116 : 834 : attidx = bms_member_index(attnums, attnum);
1117 : 834 : s1 *= attr_sel[attidx];
1118 : : }
1119 : :
1120 : : /* Original selectivity of the implied attribute */
1121 : 693 : attnum = dependency->attributes[j];
1122 : 693 : attidx = bms_member_index(attnums, attnum);
1123 : 693 : s2 = attr_sel[attidx];
1124 : :
1125 : : /*
1126 : : * Replace s2 with the conditional probability s2 given s1, computed
1127 : : * using the formula P(b|a) = P(a,b) / P(a), which simplifies to
1128 : : *
1129 : : * P(b|a) = f * Min(P(a), P(b)) / P(a) + (1-f) * P(b)
1130 : : *
1131 : : * where P(a) = s1, the selectivity of the implying attributes, and
1132 : : * P(b) = s2, the selectivity of the implied attribute.
1133 : : */
1134 : 693 : f = dependency->degree;
1135 : :
1136 [ + + ]: 693 : if (s1 <= s2)
1137 : 663 : attr_sel[attidx] = f + (1 - f) * s2;
1138 : : else
1139 : 30 : attr_sel[attidx] = f * s2 / s1 + (1 - f) * s2;
1140 : : }
1141 : :
1142 : : /*
1143 : : * The overall selectivity of all the clauses on all these attributes is
1144 : : * then the product of all the original (non-implied) probabilities and
1145 : : * the new conditional (implied) probabilities.
1146 : : */
1147 : 546 : s1 = 1.0;
1148 [ + + ]: 1791 : for (i = 0; i < nattrs; i++)
1149 : 1245 : s1 *= attr_sel[i];
1150 : :
1151 [ - + - + ]: 546 : CLAMP_PROBABILITY(s1);
1152 : :
1153 : 546 : pfree(attr_sel);
1154 : 546 : bms_free(attnums);
1155 : :
1156 : 546 : return s1;
1157 : : }
1158 : :
1159 : : /*
1160 : : * dependency_is_compatible_expression
1161 : : * Determines if the expression is compatible with functional dependencies
1162 : : *
1163 : : * Similar to dependency_is_compatible_clause, but doesn't enforce that the
1164 : : * expression is a simple Var. On success, return the matching statistics
1165 : : * expression into *expr.
1166 : : */
1167 : : static bool
1115 tomas.vondra@postgre 1168 : 321 : dependency_is_compatible_expression(Node *clause, Index relid, List *statlist, Node **expr)
1169 : : {
1170 : : ListCell *lc,
1171 : : *lc2;
1172 : : Node *clause_expr;
1173 : :
1174 [ + + ]: 321 : if (IsA(clause, RestrictInfo))
1175 : : {
1176 : 276 : RestrictInfo *rinfo = (RestrictInfo *) clause;
1177 : :
1178 : : /* Pseudoconstants are not interesting (they couldn't contain a Var) */
1179 [ + + ]: 276 : if (rinfo->pseudoconstant)
1180 : 3 : return false;
1181 : :
1182 : : /* Clauses referencing multiple, or no, varnos are incompatible */
1183 [ - + ]: 273 : if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
1115 tomas.vondra@postgre 1184 :UBC 0 : return false;
1185 : :
1115 tomas.vondra@postgre 1186 :CBC 273 : clause = (Node *) rinfo->clause;
1187 : : }
1188 : :
1189 [ + + ]: 318 : if (is_opclause(clause))
1190 : : {
1191 : : /* If it's an opclause, check for Var = Const or Const = Var. */
1192 : 102 : OpExpr *expr = (OpExpr *) clause;
1193 : :
1194 : : /* Only expressions with two arguments are candidates. */
1195 [ - + ]: 102 : if (list_length(expr->args) != 2)
1115 tomas.vondra@postgre 1196 :UBC 0 : return false;
1197 : :
1198 : : /* Make sure non-selected argument is a pseudoconstant. */
1115 tomas.vondra@postgre 1199 [ + - ]:CBC 102 : if (is_pseudo_constant_clause(lsecond(expr->args)))
1200 : 102 : clause_expr = linitial(expr->args);
1115 tomas.vondra@postgre 1201 [ # # ]:UBC 0 : else if (is_pseudo_constant_clause(linitial(expr->args)))
1202 : 0 : clause_expr = lsecond(expr->args);
1203 : : else
1204 : 0 : return false;
1205 : :
1206 : : /*
1207 : : * If it's not an "=" operator, just ignore the clause, as it's not
1208 : : * compatible with functional dependencies.
1209 : : *
1210 : : * This uses the function for estimating selectivity, not the operator
1211 : : * directly (a bit awkward, but well ...).
1212 : : *
1213 : : * XXX this is pretty dubious; probably it'd be better to check btree
1214 : : * or hash opclass membership, so as not to be fooled by custom
1215 : : * selectivity functions, and to be more consistent with decisions
1216 : : * elsewhere in the planner.
1217 : : */
1115 tomas.vondra@postgre 1218 [ + + ]:CBC 102 : if (get_oprrest(expr->opno) != F_EQSEL)
1219 : 18 : return false;
1220 : :
1221 : : /* OK to proceed with checking "var" */
1222 : : }
1223 [ + + ]: 216 : else if (IsA(clause, ScalarArrayOpExpr))
1224 : : {
1225 : : /* If it's an scalar array operator, check for Var IN Const. */
1226 : 195 : ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
1227 : :
1228 : : /*
1229 : : * Reject ALL() variant, we only care about ANY/IN.
1230 : : *
1231 : : * FIXME Maybe we should check if all the values are the same, and
1232 : : * allow ALL in that case? Doesn't seem very practical, though.
1233 : : */
1234 [ + + ]: 195 : if (!expr->useOr)
1235 : 18 : return false;
1236 : :
1237 : : /* Only expressions with two arguments are candidates. */
1238 [ - + ]: 177 : if (list_length(expr->args) != 2)
1115 tomas.vondra@postgre 1239 :UBC 0 : return false;
1240 : :
1241 : : /*
1242 : : * We know it's always (Var IN Const), so we assume the var is the
1243 : : * first argument, and pseudoconstant is the second one.
1244 : : */
1115 tomas.vondra@postgre 1245 [ - + ]:CBC 177 : if (!is_pseudo_constant_clause(lsecond(expr->args)))
1115 tomas.vondra@postgre 1246 :UBC 0 : return false;
1247 : :
1115 tomas.vondra@postgre 1248 :CBC 177 : clause_expr = linitial(expr->args);
1249 : :
1250 : : /*
1251 : : * If it's not an "=" operator, just ignore the clause, as it's not
1252 : : * compatible with functional dependencies. The operator is identified
1253 : : * simply by looking at which function it uses to estimate
1254 : : * selectivity. That's a bit strange, but it's what other similar
1255 : : * places do.
1256 : : */
1257 [ + + ]: 177 : if (get_oprrest(expr->opno) != F_EQSEL)
1258 : 90 : return false;
1259 : :
1260 : : /* OK to proceed with checking "var" */
1261 : : }
1262 [ + - ]: 21 : else if (is_orclause(clause))
1263 : : {
1264 : 21 : BoolExpr *bool_expr = (BoolExpr *) clause;
1265 : :
1266 : : /* start with no expression (we'll use the first match) */
1267 : 21 : *expr = NULL;
1268 : :
1269 [ + - + + : 60 : foreach(lc, bool_expr->args)
+ + ]
1270 : : {
1271 : 45 : Node *or_expr = NULL;
1272 : :
1273 : : /*
1274 : : * Had we found incompatible expression in the arguments, treat
1275 : : * the whole expression as incompatible.
1276 : : */
1277 [ + + ]: 45 : if (!dependency_is_compatible_expression((Node *) lfirst(lc), relid,
1278 : : statlist, &or_expr))
1279 : 6 : return false;
1280 : :
1281 [ + + ]: 42 : if (*expr == NULL)
1282 : 18 : *expr = or_expr;
1283 : :
1284 : : /* ensure all the expressions are the same */
1285 [ + + ]: 42 : if (!equal(or_expr, *expr))
1286 : 3 : return false;
1287 : : }
1288 : :
1289 : : /* the expression is already checked by the recursive call */
1290 : 15 : return true;
1291 : : }
1115 tomas.vondra@postgre 1292 [ # # ]:UBC 0 : else if (is_notclause(clause))
1293 : : {
1294 : : /*
1295 : : * "NOT x" can be interpreted as "x = false", so get the argument and
1296 : : * proceed with seeing if it's a suitable Var.
1297 : : */
1298 : 0 : clause_expr = (Node *) get_notclausearg(clause);
1299 : : }
1300 : : else
1301 : : {
1302 : : /*
1303 : : * A boolean expression "x" can be interpreted as "x = true", so
1304 : : * proceed with seeing if it's a suitable Var.
1305 : : */
1306 : 0 : clause_expr = (Node *) clause;
1307 : : }
1308 : :
1309 : : /*
1310 : : * We may ignore any RelabelType node above the operand. (There won't be
1311 : : * more than one, since eval_const_expressions has been applied already.)
1312 : : */
1115 tomas.vondra@postgre 1313 [ - + ]:CBC 171 : if (IsA(clause_expr, RelabelType))
1115 tomas.vondra@postgre 1314 :UBC 0 : clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
1315 : :
1316 : : /*
1317 : : * Search for a matching statistics expression.
1318 : : */
1115 tomas.vondra@postgre 1319 [ + - + + :CBC 174 : foreach(lc, statlist)
+ + ]
1320 : : {
1321 : 171 : StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
1322 : :
1323 : : /* ignore stats without dependencies */
1324 [ - + ]: 171 : if (info->kind != STATS_EXT_DEPENDENCIES)
1115 tomas.vondra@postgre 1325 :UBC 0 : continue;
1326 : :
1115 tomas.vondra@postgre 1327 [ + + + - :CBC 279 : foreach(lc2, info->exprs)
+ + ]
1328 : : {
1329 : 276 : Node *stat_expr = (Node *) lfirst(lc2);
1330 : :
1331 [ + + ]: 276 : if (equal(clause_expr, stat_expr))
1332 : : {
1333 : 168 : *expr = stat_expr;
1334 : 168 : return true;
1335 : : }
1336 : : }
1337 : : }
1338 : :
1339 : 3 : return false;
1340 : : }
1341 : :
1342 : : /*
1343 : : * dependencies_clauselist_selectivity
1344 : : * Return the estimated selectivity of (a subset of) the given clauses
1345 : : * using functional dependency statistics, or 1.0 if no useful functional
1346 : : * dependency statistic exists.
1347 : : *
1348 : : * 'estimatedclauses' is an input/output argument that gets a bit set
1349 : : * corresponding to the (zero-based) list index of each clause that is included
1350 : : * in the estimated selectivity.
1351 : : *
1352 : : * Given equality clauses on attributes (a,b) we find the strongest dependency
1353 : : * between them, i.e. either (a=>b) or (b=>a). Assuming (a=>b) is the selected
1354 : : * dependency, we then combine the per-clause selectivities using the formula
1355 : : *
1356 : : * P(a,b) = f * P(a) + (1-f) * P(a) * P(b)
1357 : : *
1358 : : * where 'f' is the degree of the dependency. (Actually we use a slightly
1359 : : * modified version of this formula -- see clauselist_apply_dependencies()).
1360 : : *
1361 : : * With clauses on more than two attributes, the dependencies are applied
1362 : : * recursively, starting with the widest/strongest dependencies. For example
1363 : : * P(a,b,c) is first split like this:
1364 : : *
1365 : : * P(a,b,c) = f * P(a,b) + (1-f) * P(a,b) * P(c)
1366 : : *
1367 : : * assuming (a,b=>c) is the strongest dependency.
1368 : : */
1369 : : Selectivity
2566 simon@2ndQuadrant.co 1370 : 867 : dependencies_clauselist_selectivity(PlannerInfo *root,
1371 : : List *clauses,
1372 : : int varRelid,
1373 : : JoinType jointype,
1374 : : SpecialJoinInfo *sjinfo,
1375 : : RelOptInfo *rel,
1376 : : Bitmapset **estimatedclauses)
1377 : : {
1378 : 867 : Selectivity s1 = 1.0;
1379 : : ListCell *l;
1380 : 867 : Bitmapset *clauses_attnums = NULL;
1381 : : AttrNumber *list_attnums;
1382 : : int listidx;
1383 : : MVDependencies **func_dependencies;
1384 : : int nfunc_dependencies;
1385 : : int total_ndeps;
1386 : : MVDependency **dependencies;
1387 : : int ndependencies;
1388 : : int i;
1389 : : AttrNumber attnum_offset;
820 tomas.vondra@postgre 1390 [ + - ]: 867 : RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
1391 : :
1392 : : /* unique expressions */
1393 : : Node **unique_exprs;
1394 : : int unique_exprs_cnt;
1395 : :
1396 : : /* check if there's any stats that might be useful for us. */
2566 simon@2ndQuadrant.co 1397 [ + + ]: 867 : if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES))
1398 : 225 : return 1.0;
1399 : :
1478 dean.a.rasheed@gmail 1400 : 642 : list_attnums = (AttrNumber *) palloc(sizeof(AttrNumber) *
2566 simon@2ndQuadrant.co 1401 : 642 : list_length(clauses));
1402 : :
1403 : : /*
1404 : : * We allocate space as if every clause was a unique expression, although
1405 : : * that's probably overkill. Some will be simple column references that
1406 : : * we'll translate to attnums, and there might be duplicates. But it's
1407 : : * easier and cheaper to just do one allocation than repalloc later.
1408 : : */
1115 tomas.vondra@postgre 1409 : 642 : unique_exprs = (Node **) palloc(sizeof(Node *) * list_length(clauses));
1410 : 642 : unique_exprs_cnt = 0;
1411 : :
1412 : : /*
1413 : : * Pre-process the clauses list to extract the attnums seen in each item.
1414 : : * We need to determine if there's any clauses which will be useful for
1415 : : * dependency selectivity estimations. Along the way we'll record all of
1416 : : * the attnums for each clause in a list which we'll reference later so we
1417 : : * don't need to repeat the same work again. We'll also keep track of all
1418 : : * attnums seen.
1419 : : *
1420 : : * We also skip clauses that we already estimated using different types of
1421 : : * statistics (we treat them as incompatible).
1422 : : *
1423 : : * To handle expressions, we assign them negative attnums, as if it was a
1424 : : * system attribute (this is fine, as we only allow extended stats on user
1425 : : * attributes). And then we offset everything by the number of
1426 : : * expressions, so that we can store the values in a bitmapset.
1427 : : */
2566 simon@2ndQuadrant.co 1428 : 642 : listidx = 0;
1429 [ + - + + : 2079 : foreach(l, clauses)
+ + ]
1430 : : {
1431 : 1437 : Node *clause = (Node *) lfirst(l);
1432 : : AttrNumber attnum;
1115 tomas.vondra@postgre 1433 : 1437 : Node *expr = NULL;
1434 : :
1435 : : /* ignore clause by default */
1436 : 1437 : list_attnums[listidx] = InvalidAttrNumber;
1437 : :
1438 [ + + ]: 1437 : if (!bms_is_member(listidx, *estimatedclauses))
1439 : : {
1440 : : /*
1441 : : * If it's a simple column reference, just extract the attnum. If
1442 : : * it's an expression, assign a negative attnum as if it was a
1443 : : * system attribute.
1444 : : */
1445 [ + + ]: 1416 : if (dependency_is_compatible_clause(clause, rel->relid, &attnum))
1446 : : {
1447 : 1140 : list_attnums[listidx] = attnum;
1448 : : }
1449 [ + + ]: 276 : else if (dependency_is_compatible_expression(clause, rel->relid,
1450 : : rel->statlist,
1451 : : &expr))
1452 : : {
1453 : : /* special attnum assigned to this expression */
1454 : 141 : attnum = InvalidAttrNumber;
1455 : :
1456 [ - + ]: 141 : Assert(expr != NULL);
1457 : :
1458 : : /* If the expression is duplicate, use the same attnum. */
1459 [ + + ]: 237 : for (i = 0; i < unique_exprs_cnt; i++)
1460 : : {
1461 [ - + ]: 96 : if (equal(unique_exprs[i], expr))
1462 : : {
1463 : : /* negative attribute number to expression */
1115 tomas.vondra@postgre 1464 :UBC 0 : attnum = -(i + 1);
1465 : 0 : break;
1466 : : }
1467 : : }
1468 : :
1469 : : /* not found in the list, so add it */
1115 tomas.vondra@postgre 1470 [ + - ]:CBC 141 : if (attnum == InvalidAttrNumber)
1471 : : {
1472 : 141 : unique_exprs[unique_exprs_cnt++] = expr;
1473 : :
1474 : : /* after incrementing the value, to get -1, -2, ... */
1475 : 141 : attnum = (-unique_exprs_cnt);
1476 : : }
1477 : :
1478 : : /* remember which attnum was assigned to this clause */
1479 : 141 : list_attnums[listidx] = attnum;
1480 : : }
1481 : : }
1482 : :
2566 simon@2ndQuadrant.co 1483 : 1437 : listidx++;
1484 : : }
1485 : :
1115 tomas.vondra@postgre 1486 [ - + ]: 642 : Assert(listidx == list_length(clauses));
1487 : :
1488 : : /*
1489 : : * How much we need to offset the attnums? If there are no expressions,
1490 : : * then no offset is needed. Otherwise we need to offset enough for the
1491 : : * lowest value (-unique_exprs_cnt) to become 1.
1492 : : */
1493 [ + + ]: 642 : if (unique_exprs_cnt > 0)
1494 : 66 : attnum_offset = (unique_exprs_cnt + 1);
1495 : : else
1496 : 576 : attnum_offset = 0;
1497 : :
1498 : : /*
1499 : : * Now that we know how many expressions there are, we can offset the
1500 : : * values just enough to build the bitmapset.
1501 : : */
1502 [ + + ]: 2079 : for (i = 0; i < list_length(clauses); i++)
1503 : : {
1504 : : AttrNumber attnum;
1505 : :
1506 : : /* ignore incompatible or already estimated clauses */
1507 [ + + ]: 1437 : if (list_attnums[i] == InvalidAttrNumber)
1508 : 156 : continue;
1509 : :
1510 : : /* make sure the attnum is in the expected range */
1511 [ - + ]: 1281 : Assert(list_attnums[i] >= (-unique_exprs_cnt));
1512 [ - + ]: 1281 : Assert(list_attnums[i] <= MaxHeapAttributeNumber);
1513 : :
1514 : : /* make sure the attnum is positive (valid AttrNumber) */
1515 : 1281 : attnum = list_attnums[i] + attnum_offset;
1516 : :
1517 : : /*
1518 : : * Either it's a regular attribute, or it's an expression, in which
1519 : : * case we must not have seen it before (expressions are unique).
1520 : : *
1521 : : * XXX Check whether it's a regular attribute has to be done using the
1522 : : * original attnum, while the second check has to use the value with
1523 : : * an offset.
1524 : : */
1525 [ + + - + ]: 1281 : Assert(AttrNumberIsForUserDefinedAttr(list_attnums[i]) ||
1526 : : !bms_is_member(attnum, clauses_attnums));
1527 : :
1528 : : /*
1529 : : * Remember the offset attnum, both for attributes and expressions.
1530 : : * We'll pass list_attnums to clauselist_apply_dependencies, which
1531 : : * uses it to identify clauses in a bitmap. We could also pass the
1532 : : * offset, but this is more convenient.
1533 : : */
1534 : 1281 : list_attnums[i] = attnum;
1535 : :
1536 : 1281 : clauses_attnums = bms_add_member(clauses_attnums, attnum);
1537 : : }
1538 : :
1539 : : /*
1540 : : * If there's not at least two distinct attnums and expressions, then
1541 : : * reject the whole list of clauses. We must return 1.0 so the calling
1542 : : * function's selectivity is unaffected.
1543 : : */
1327 drowley@postgresql.o 1544 [ + + ]: 642 : if (bms_membership(clauses_attnums) != BMS_MULTIPLE)
1545 : : {
1478 dean.a.rasheed@gmail 1546 : 96 : bms_free(clauses_attnums);
2566 simon@2ndQuadrant.co 1547 : 96 : pfree(list_attnums);
1548 : 96 : return 1.0;
1549 : : }
1550 : :
1551 : : /*
1552 : : * Load all functional dependencies matching at least two parameters. We
1553 : : * can simply consider all dependencies at once, without having to search
1554 : : * for the best statistics object.
1555 : : *
1556 : : * To not waste cycles and memory, we deserialize dependencies only for
1557 : : * statistics that match at least two attributes. The array is allocated
1558 : : * with the assumption that all objects match - we could grow the array to
1559 : : * make it just the right size, but it's likely wasteful anyway thanks to
1560 : : * moving the freed chunks to freelists etc.
1561 : : */
1478 dean.a.rasheed@gmail 1562 : 546 : func_dependencies = (MVDependencies **) palloc(sizeof(MVDependencies *) *
1563 : 546 : list_length(rel->statlist));
1564 : 546 : nfunc_dependencies = 0;
1565 : 546 : total_ndeps = 0;
1566 : :
1567 [ + - + + : 1161 : foreach(l, rel->statlist)
+ + ]
1568 : : {
1569 : 615 : StatisticExtInfo *stat = (StatisticExtInfo *) lfirst(l);
1570 : : int nmatched;
1571 : : int nexprs;
1572 : : int k;
1573 : : MVDependencies *deps;
1574 : :
1575 : : /* skip statistics that are not of the correct type */
1553 tomas.vondra@postgre 1576 [ + + ]: 615 : if (stat->kind != STATS_EXT_DEPENDENCIES)
1577 : 54 : continue;
1578 : :
1579 : : /* skip statistics with mismatching stxdinherit value */
819 1580 [ - + ]: 561 : if (stat->inherit != rte->inh)
819 tomas.vondra@postgre 1581 :UBC 0 : continue;
1582 : :
1583 : : /*
1584 : : * Count matching attributes - we have to undo the attnum offsets. The
1585 : : * input attribute numbers are not offset (expressions are not
1586 : : * included in stat->keys, so it's not necessary). But we need to
1587 : : * offset it before checking against clauses_attnums.
1588 : : */
1115 tomas.vondra@postgre 1589 :CBC 561 : nmatched = 0;
1590 : 561 : k = -1;
1591 [ + + ]: 2052 : while ((k = bms_next_member(stat->keys, k)) >= 0)
1592 : : {
1593 : 1491 : AttrNumber attnum = (AttrNumber) k;
1594 : :
1595 : : /* skip expressions */
1596 [ - + ]: 1491 : if (!AttrNumberIsForUserDefinedAttr(attnum))
1115 tomas.vondra@postgre 1597 :UBC 0 : continue;
1598 : :
1599 : : /* apply the same offset as above */
1115 tomas.vondra@postgre 1600 :CBC 1491 : attnum += attnum_offset;
1601 : :
1602 [ + + ]: 1491 : if (bms_is_member(attnum, clauses_attnums))
1603 : 1116 : nmatched++;
1604 : : }
1605 : :
1606 : : /* count matching expressions */
1607 : 561 : nexprs = 0;
1608 [ + + ]: 690 : for (i = 0; i < unique_exprs_cnt; i++)
1609 : : {
1610 : : ListCell *lc;
1611 : :
1612 [ + - + + : 516 : foreach(lc, stat->exprs)
+ + ]
1613 : : {
1614 : 387 : Node *stat_expr = (Node *) lfirst(lc);
1615 : :
1616 : : /* try to match it */
1617 [ + + ]: 387 : if (equal(stat_expr, unique_exprs[i]))
1618 : 129 : nexprs++;
1619 : : }
1620 : : }
1621 : :
1622 : : /*
1623 : : * Skip objects matching fewer than two attributes/expressions from
1624 : : * clauses.
1625 : : */
1626 [ + + ]: 561 : if (nmatched + nexprs < 2)
1553 1627 : 9 : continue;
1628 : :
819 1629 : 552 : deps = statext_dependencies_load(stat->statOid, rte->inh);
1630 : :
1631 : : /*
1632 : : * The expressions may be represented by different attnums in the
1633 : : * stats, we need to remap them to be consistent with the clauses.
1634 : : * That will make the later steps (e.g. picking the strongest item and
1635 : : * so on) much simpler and cheaper, because it won't need to care
1636 : : * about the offset at all.
1637 : : *
1638 : : * When we're at it, we can ignore dependencies that are not fully
1639 : : * matched by clauses (i.e. referencing attributes or expressions that
1640 : : * are not in the clauses).
1641 : : *
1642 : : * We have to do this for all statistics, as long as there are any
1643 : : * expressions - we need to shift the attnums in all dependencies.
1644 : : *
1645 : : * XXX Maybe we should do this always, because it also eliminates some
1646 : : * of the dependencies early. It might be cheaper than having to walk
1647 : : * the longer list in find_strongest_dependency later, especially as
1648 : : * we need to do that repeatedly?
1649 : : *
1650 : : * XXX We have to do this even when there are no expressions in
1651 : : * clauses, otherwise find_strongest_dependency may fail for stats
1652 : : * with expressions (due to lookup of negative value in bitmap). So we
1653 : : * need to at least filter out those dependencies. Maybe we could do
1654 : : * it in a cheaper way (if there are no expr clauses, we can just
1655 : : * discard all negative attnums without any lookups).
1656 : : */
1115 1657 [ + + - + ]: 552 : if (unique_exprs_cnt > 0 || stat->exprs != NIL)
1658 : : {
1659 : 54 : int ndeps = 0;
1660 : :
1661 [ + + ]: 324 : for (i = 0; i < deps->ndeps; i++)
1662 : : {
1663 : 270 : bool skip = false;
1664 : 270 : MVDependency *dep = deps->deps[i];
1665 : : int j;
1666 : :
1667 [ + + ]: 753 : for (j = 0; j < dep->nattributes; j++)
1668 : : {
1669 : : int idx;
1670 : : Node *expr;
1671 : 615 : AttrNumber unique_attnum = InvalidAttrNumber;
1672 : : AttrNumber attnum;
1673 : :
1674 : : /* undo the per-statistics offset */
1675 : 615 : attnum = dep->attributes[j];
1676 : :
1677 : : /*
1678 : : * For regular attributes we can simply check if it
1679 : : * matches any clause. If there's no matching clause, we
1680 : : * can just ignore it. We need to offset the attnum
1681 : : * though.
1682 : : */
1683 [ - + ]: 615 : if (AttrNumberIsForUserDefinedAttr(attnum))
1684 : : {
1115 tomas.vondra@postgre 1685 :UBC 0 : dep->attributes[j] = attnum + attnum_offset;
1686 : :
1687 [ # # ]: 0 : if (!bms_is_member(dep->attributes[j], clauses_attnums))
1688 : : {
1689 : 0 : skip = true;
1690 : 0 : break;
1691 : : }
1692 : :
1693 : 0 : continue;
1694 : : }
1695 : :
1696 : : /*
1697 : : * the attnum should be a valid system attnum (-1, -2,
1698 : : * ...)
1699 : : */
1115 tomas.vondra@postgre 1700 [ - + ]:CBC 615 : Assert(AttributeNumberIsValid(attnum));
1701 : :
1702 : : /*
1703 : : * For expressions, we need to do two translations. First
1704 : : * we have to translate the negative attnum to index in
1705 : : * the list of expressions (in the statistics object).
1706 : : * Then we need to see if there's a matching clause. The
1707 : : * index of the unique expression determines the attnum
1708 : : * (and we offset it).
1709 : : */
1710 : 615 : idx = -(1 + attnum);
1711 : :
1712 : : /* Is the expression index is valid? */
1713 [ + - - + ]: 615 : Assert((idx >= 0) && (idx < list_length(stat->exprs)));
1714 : :
1715 : 615 : expr = (Node *) list_nth(stat->exprs, idx);
1716 : :
1717 : : /* try to find the expression in the unique list */
557 drowley@postgresql.o 1718 [ + + ]: 1230 : for (int m = 0; m < unique_exprs_cnt; m++)
1719 : : {
1720 : : /*
1721 : : * found a matching unique expression, use the attnum
1722 : : * (derived from index of the unique expression)
1723 : : */
1724 [ + + ]: 1098 : if (equal(unique_exprs[m], expr))
1725 : : {
1726 : 483 : unique_attnum = -(m + 1) + attnum_offset;
1115 tomas.vondra@postgre 1727 : 483 : break;
1728 : : }
1729 : : }
1730 : :
1731 : : /*
1732 : : * Found no matching expression, so we can simply skip
1733 : : * this dependency, because there's no chance it will be
1734 : : * fully covered.
1735 : : */
1736 [ + + ]: 615 : if (unique_attnum == InvalidAttrNumber)
1737 : : {
1738 : 132 : skip = true;
1739 : 132 : break;
1740 : : }
1741 : :
1742 : : /* otherwise remap it to the new attnum */
1743 : 483 : dep->attributes[j] = unique_attnum;
1744 : : }
1745 : :
1746 : : /* if found a matching dependency, keep it */
1747 [ + + ]: 270 : if (!skip)
1748 : : {
1749 : : /* maybe we've skipped something earlier, so move it */
1750 [ - + ]: 138 : if (ndeps != i)
1115 tomas.vondra@postgre 1751 :UBC 0 : deps->deps[ndeps] = deps->deps[i];
1752 : :
1115 tomas.vondra@postgre 1753 :CBC 138 : ndeps++;
1754 : : }
1755 : : }
1756 : :
1757 : 54 : deps->ndeps = ndeps;
1758 : : }
1759 : :
1760 : : /*
1761 : : * It's possible we've removed all dependencies, in which case we
1762 : : * don't bother adding it to the list.
1763 : : */
1764 [ + - ]: 552 : if (deps->ndeps > 0)
1765 : : {
1766 : 552 : func_dependencies[nfunc_dependencies] = deps;
1767 : 552 : total_ndeps += deps->ndeps;
1768 : 552 : nfunc_dependencies++;
1769 : : }
1770 : : }
1771 : :
1772 : : /* if no matching stats could be found then we've nothing to do */
1478 dean.a.rasheed@gmail 1773 [ - + ]: 546 : if (nfunc_dependencies == 0)
1774 : : {
1478 dean.a.rasheed@gmail 1775 :UBC 0 : pfree(func_dependencies);
1776 : 0 : bms_free(clauses_attnums);
2566 simon@2ndQuadrant.co 1777 : 0 : pfree(list_attnums);
1115 tomas.vondra@postgre 1778 : 0 : pfree(unique_exprs);
2566 simon@2ndQuadrant.co 1779 : 0 : return 1.0;
1780 : : }
1781 : :
1782 : : /*
1783 : : * Work out which dependencies we can apply, starting with the
1784 : : * widest/strongest ones, and proceeding to smaller/weaker ones.
1785 : : */
1478 dean.a.rasheed@gmail 1786 :CBC 546 : dependencies = (MVDependency **) palloc(sizeof(MVDependency *) *
1787 : : total_ndeps);
1788 : 546 : ndependencies = 0;
1789 : :
1790 : : while (true)
2566 simon@2ndQuadrant.co 1791 : 693 : {
1792 : : MVDependency *dependency;
1793 : : AttrNumber attnum;
1794 : :
1795 : : /* the widest/strongest dependency, fully matched by clauses */
1478 dean.a.rasheed@gmail 1796 : 1239 : dependency = find_strongest_dependency(func_dependencies,
1797 : : nfunc_dependencies,
1798 : : clauses_attnums);
2566 simon@2ndQuadrant.co 1799 [ + + ]: 1239 : if (!dependency)
1800 : 546 : break;
1801 : :
1478 dean.a.rasheed@gmail 1802 : 693 : dependencies[ndependencies++] = dependency;
1803 : :
1804 : : /* Ignore dependencies using this implied attribute in later loops */
1805 : 693 : attnum = dependency->attributes[dependency->nattributes - 1];
1806 : 693 : clauses_attnums = bms_del_member(clauses_attnums, attnum);
1807 : : }
1808 : :
1809 : : /*
1810 : : * If we found applicable dependencies, use them to estimate all
1811 : : * compatible clauses on attributes that they refer to.
1812 : : */
1813 [ + - ]: 546 : if (ndependencies != 0)
1814 : 546 : s1 = clauselist_apply_dependencies(root, clauses, varRelid, jointype,
1815 : : sjinfo, dependencies, ndependencies,
1816 : : list_attnums, estimatedclauses);
1817 : :
1818 : : /* free deserialized functional dependencies (and then the array) */
1819 [ + + ]: 1098 : for (i = 0; i < nfunc_dependencies; i++)
1820 : 552 : pfree(func_dependencies[i]);
1821 : :
2566 simon@2ndQuadrant.co 1822 : 546 : pfree(dependencies);
1478 dean.a.rasheed@gmail 1823 : 546 : pfree(func_dependencies);
1824 : 546 : bms_free(clauses_attnums);
2566 simon@2ndQuadrant.co 1825 : 546 : pfree(list_attnums);
1115 tomas.vondra@postgre 1826 : 546 : pfree(unique_exprs);
1827 : :
2566 simon@2ndQuadrant.co 1828 : 546 : return s1;
1829 : : }
|