Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * dependencies.c
4 : * POSTGRES functional dependencies
5 : *
6 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : * IDENTIFICATION
10 : * src/backend/statistics/dependencies.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 : #include "postgres.h"
15 :
16 : #include "access/htup_details.h"
17 : #include "access/sysattr.h"
18 : #include "catalog/pg_operator.h"
19 : #include "catalog/pg_statistic_ext.h"
20 : #include "catalog/pg_statistic_ext_data.h"
21 : #include "lib/stringinfo.h"
22 : #include "nodes/nodeFuncs.h"
23 : #include "nodes/nodes.h"
24 : #include "nodes/pathnodes.h"
25 : #include "optimizer/clauses.h"
26 : #include "optimizer/optimizer.h"
27 : #include "parser/parsetree.h"
28 : #include "statistics/extended_stats_internal.h"
29 : #include "statistics/statistics.h"
30 : #include "utils/bytea.h"
31 : #include "utils/fmgroids.h"
32 : #include "utils/fmgrprotos.h"
33 : #include "utils/lsyscache.h"
34 : #include "utils/memutils.h"
35 : #include "utils/selfuncs.h"
36 : #include "utils/syscache.h"
37 : #include "utils/typcache.h"
38 :
39 : /* size of the struct header fields (magic, type, ndeps) */
40 : #define SizeOfHeader (3 * sizeof(uint32))
41 :
42 : /* size of a serialized dependency (degree, natts, atts) */
43 : #define SizeOfItem(natts) \
44 : (sizeof(double) + sizeof(AttrNumber) * (1 + (natts)))
45 :
46 : /* minimal size of a dependency (with two attributes) */
47 : #define MinSizeOfItem SizeOfItem(2)
48 :
49 : /* minimal size of dependencies, when all deps are minimal */
50 : #define MinSizeOfItems(ndeps) \
51 : (SizeOfHeader + (ndeps) * MinSizeOfItem)
52 :
53 : /*
54 : * Internal state for DependencyGenerator of dependencies. Dependencies are similar to
55 : * k-permutations of n elements, except that the order does not matter for the
56 : * first (k-1) elements. That is, (a,b=>c) and (b,a=>c) are equivalent.
57 : */
58 : typedef struct DependencyGeneratorData
59 : {
60 : int k; /* size of the dependency */
61 : int n; /* number of possible attributes */
62 : int current; /* next dependency to return (index) */
63 : AttrNumber ndependencies; /* number of dependencies generated */
64 : AttrNumber *dependencies; /* array of pre-generated dependencies */
65 : } DependencyGeneratorData;
66 :
67 : typedef DependencyGeneratorData *DependencyGenerator;
68 :
69 : static void generate_dependencies_recurse(DependencyGenerator state,
70 : int index, AttrNumber start, AttrNumber *current);
71 : static void generate_dependencies(DependencyGenerator state);
72 : static DependencyGenerator DependencyGenerator_init(int n, int k);
73 : static void DependencyGenerator_free(DependencyGenerator state);
74 : static AttrNumber *DependencyGenerator_next(DependencyGenerator state);
75 : static double dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency);
76 : static bool dependency_is_fully_matched(MVDependency *dependency,
77 : Bitmapset *attnums);
78 : static bool dependency_is_compatible_clause(Node *clause, Index relid,
79 : AttrNumber *attnum);
80 : static bool dependency_is_compatible_expression(Node *clause, Index relid,
81 : List *statlist, Node **expr);
82 : static MVDependency *find_strongest_dependency(MVDependencies **dependencies,
83 : int ndependencies, Bitmapset *attnums);
84 : static Selectivity clauselist_apply_dependencies(PlannerInfo *root, List *clauses,
85 : int varRelid, JoinType jointype,
86 : SpecialJoinInfo *sjinfo,
87 : MVDependency **dependencies,
88 : int ndependencies,
89 : AttrNumber *list_attnums,
90 : Bitmapset **estimatedclauses);
91 :
92 : static void
2195 simon 93 CBC 372 : generate_dependencies_recurse(DependencyGenerator state, int index,
94 : AttrNumber start, AttrNumber *current)
95 : {
96 : /*
97 : * The generator handles the first (k-1) elements differently from the
98 : * last element.
99 : */
100 372 : if (index < (state->k - 1))
101 : {
102 : AttrNumber i;
103 :
104 : /*
105 : * The first (k-1) values have to be in ascending order, which we
106 : * generate recursively.
107 : */
108 :
109 444 : for (i = start; i < state->n; i++)
110 : {
111 288 : current[index] = i;
112 288 : generate_dependencies_recurse(state, (index + 1), (i + 1), current);
113 : }
114 : }
115 : else
116 : {
117 : int i;
118 :
119 : /*
120 : * the last element is the implied value, which does not respect the
121 : * ascending order. We just need to check that the value is not in the
122 : * first (k-1) elements.
123 : */
124 :
125 792 : for (i = 0; i < state->n; i++)
126 : {
127 : int j;
128 576 : bool match = false;
129 :
130 576 : current[index] = i;
131 :
132 1008 : for (j = 0; j < index; j++)
133 : {
134 720 : if (current[j] == i)
135 : {
136 288 : match = true;
137 288 : break;
138 : }
139 : }
140 :
141 : /*
142 : * If the value is not found in the first part of the dependency,
143 : * we're done.
144 : */
145 576 : if (!match)
146 : {
147 576 : state->dependencies = (AttrNumber *) repalloc(state->dependencies,
2118 tgl 148 288 : state->k * (state->ndependencies + 1) * sizeof(AttrNumber));
2195 simon 149 288 : memcpy(&state->dependencies[(state->k * state->ndependencies)],
150 288 : current, state->k * sizeof(AttrNumber));
151 288 : state->ndependencies++;
152 : }
153 : }
154 : }
155 372 : }
156 :
157 : /* generate all dependencies (k-permutations of n elements) */
158 : static void
159 84 : generate_dependencies(DependencyGenerator state)
160 : {
161 84 : AttrNumber *current = (AttrNumber *) palloc0(sizeof(AttrNumber) * state->k);
162 :
163 84 : generate_dependencies_recurse(state, 0, 0, current);
164 :
165 84 : pfree(current);
166 84 : }
167 :
168 : /*
169 : * initialize the DependencyGenerator of variations, and prebuild the variations
170 : *
171 : * This pre-builds all the variations. We could also generate them in
172 : * DependencyGenerator_next(), but this seems simpler.
173 : */
174 : static DependencyGenerator
175 84 : DependencyGenerator_init(int n, int k)
176 : {
177 : DependencyGenerator state;
178 :
179 84 : Assert((n >= k) && (k > 0));
180 :
181 : /* allocate the DependencyGenerator state */
182 84 : state = (DependencyGenerator) palloc0(sizeof(DependencyGeneratorData));
183 84 : state->dependencies = (AttrNumber *) palloc(k * sizeof(AttrNumber));
184 :
185 84 : state->ndependencies = 0;
186 84 : state->current = 0;
187 84 : state->k = k;
188 84 : state->n = n;
189 :
190 : /* now actually pre-generate all the variations */
191 84 : generate_dependencies(state);
192 :
193 84 : return state;
194 : }
195 :
196 : /* free the DependencyGenerator state */
197 : static void
198 84 : DependencyGenerator_free(DependencyGenerator state)
199 : {
200 84 : pfree(state->dependencies);
201 84 : pfree(state);
202 84 : }
203 :
204 : /* generate next combination */
205 : static AttrNumber *
206 372 : DependencyGenerator_next(DependencyGenerator state)
207 : {
208 372 : if (state->current == state->ndependencies)
209 84 : return NULL;
210 :
211 288 : return &state->dependencies[state->k * state->current++];
212 : }
213 :
214 :
215 : /*
216 : * validates functional dependency on the data
217 : *
218 : * An actual work horse of detecting functional dependencies. Given a variation
219 : * of k attributes, it checks that the first (k-1) are sufficient to determine
220 : * the last one.
221 : */
222 : static double
744 tomas.vondra 223 288 : dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency)
224 : {
225 : int i,
226 : nitems;
227 : MultiSortSupport mss;
228 : SortItem *items;
229 : AttrNumber *attnums_dep;
230 :
231 : /* counters valid within a group */
2195 simon 232 288 : int group_size = 0;
233 288 : int n_violations = 0;
234 :
235 : /* total number of rows supporting (consistent with) the dependency */
236 288 : int n_supporting_rows = 0;
237 :
238 : /* Make sure we have at least two input attributes. */
239 288 : Assert(k >= 2);
240 :
241 : /* sort info for all attributes columns */
242 288 : mss = multi_sort_init(k);
243 :
244 : /*
245 : * Translate the array of indexes to regular attnums for the dependency
246 : * (we will need this to identify the columns in StatsBuildData).
247 : */
1474 tomas.vondra 248 288 : attnums_dep = (AttrNumber *) palloc(k * sizeof(AttrNumber));
249 936 : for (i = 0; i < k; i++)
744 250 648 : attnums_dep[i] = data->attnums[dependency[i]];
251 :
252 : /*
253 : * Verify the dependency (a,b,...)->z, using a rather simple algorithm:
254 : *
255 : * (a) sort the data lexicographically
256 : *
257 : * (b) split the data into groups by first (k-1) columns
258 : *
259 : * (c) for each group count different values in the last column
260 : *
261 : * We use the column data types' default sort operators and collations;
262 : * perhaps at some point it'd be worth using column-specific collations?
263 : */
264 :
265 : /* prepare the sort function for the dimensions */
2195 simon 266 936 : for (i = 0; i < k; i++)
267 : {
744 tomas.vondra 268 648 : VacAttrStats *colstat = data->stats[dependency[i]];
269 : TypeCacheEntry *type;
270 :
2195 simon 271 648 : type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
272 648 : if (type->lt_opr == InvalidOid) /* shouldn't happen */
2195 simon 273 UBC 0 : elog(ERROR, "cache lookup failed for ordering operator for type %u",
274 : colstat->attrtypid);
275 :
276 : /* prepare the sort function for this dimension */
1361 tomas.vondra 277 CBC 648 : multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
278 : }
279 :
280 : /*
281 : * build an array of SortItem(s) sorted using the multi-sort support
282 : *
283 : * XXX This relies on all stats entries pointing to the same tuple
284 : * descriptor. For now that assumption holds, but it might change in the
285 : * future for example if we support statistics on multiple tables.
286 : */
744 287 288 : items = build_sorted_items(data, &nitems, mss, k, attnums_dep);
288 :
289 : /*
290 : * Walk through the sorted array, split it into rows according to the
291 : * first (k-1) columns. If there's a single value in the last column, we
292 : * count the group as 'supporting' the functional dependency. Otherwise we
293 : * count it as contradicting.
294 : */
295 :
296 : /* start with the first row forming a group */
2195 simon 297 288 : group_size = 1;
298 :
299 : /* loop 1 beyond the end of the array so that we count the final group */
1474 tomas.vondra 300 752685 : for (i = 1; i <= nitems; i++)
301 : {
302 : /*
303 : * Check if the group ended, which may be either because we processed
304 : * all the items (i==nitems), or because the i-th item is not equal to
305 : * the preceding one.
306 : */
307 1504506 : if (i == nitems ||
2118 tgl 308 752109 : multi_sort_compare_dims(0, k - 2, &items[i - 1], &items[i], mss) != 0)
309 : {
310 : /*
311 : * If no violations were found in the group then track the rows of
312 : * the group as supporting the functional dependency.
313 : */
2195 simon 314 16107 : if (n_violations == 0)
315 9771 : n_supporting_rows += group_size;
316 :
317 : /* Reset counters for the new group */
318 16107 : n_violations = 0;
319 16107 : group_size = 1;
320 16107 : continue;
321 : }
322 : /* first columns match, but the last one does not (so contradicting) */
323 736290 : else if (multi_sort_compare_dim(k - 1, &items[i - 1], &items[i], mss) != 0)
324 30042 : n_violations++;
325 :
326 736290 : group_size++;
327 : }
328 :
329 : /* Compute the 'degree of validity' as (supporting/total). */
744 tomas.vondra 330 288 : return (n_supporting_rows * 1.0 / data->numrows);
331 : }
332 :
333 : /*
334 : * detects functional dependencies between groups of columns
335 : *
336 : * Generates all possible subsets of columns (variations) and computes
337 : * the degree of validity for each one. For example when creating statistics
338 : * on three columns (a,b,c) there are 9 possible dependencies
339 : *
340 : * two columns three columns
341 : * ----------- -------------
342 : * (a) -> b (a,b) -> c
343 : * (a) -> c (a,c) -> b
344 : * (b) -> a (b,c) -> a
345 : * (b) -> c
346 : * (c) -> a
347 : * (c) -> b
348 : */
349 : MVDependencies *
350 60 : statext_dependencies_build(StatsBuildData *data)
351 : {
352 : int i,
353 : k;
354 :
355 : /* result */
2195 simon 356 60 : MVDependencies *dependencies = NULL;
357 : MemoryContext cxt;
358 :
744 tomas.vondra 359 60 : Assert(data->nattnums >= 2);
360 :
361 : /* tracks memory allocated by dependency_degree calls */
565 362 60 : cxt = AllocSetContextCreate(CurrentMemoryContext,
363 : "dependency_degree cxt",
364 : ALLOCSET_DEFAULT_SIZES);
365 :
366 : /*
367 : * We'll try build functional dependencies starting from the smallest ones
368 : * covering just 2 columns, to the largest ones, covering all columns
369 : * included in the statistics object. We start from the smallest ones
370 : * because we want to be able to skip already implied ones.
371 : */
744 372 144 : for (k = 2; k <= data->nattnums; k++)
373 : {
374 : AttrNumber *dependency; /* array with k elements */
375 :
376 : /* prepare a DependencyGenerator of variation */
377 84 : DependencyGenerator DependencyGenerator = DependencyGenerator_init(data->nattnums, k);
378 :
379 : /* generate all possible variations of k values (out of n) */
2195 simon 380 372 : while ((dependency = DependencyGenerator_next(DependencyGenerator)))
381 : {
382 : double degree;
383 : MVDependency *d;
384 : MemoryContext oldcxt;
385 :
386 : /* release memory used by dependency degree calculation */
565 tomas.vondra 387 288 : oldcxt = MemoryContextSwitchTo(cxt);
388 :
389 : /* compute how valid the dependency seems */
744 390 288 : degree = dependency_degree(data, k, dependency);
391 :
565 392 288 : MemoryContextSwitchTo(oldcxt);
393 288 : MemoryContextReset(cxt);
394 :
395 : /*
396 : * if the dependency seems entirely invalid, don't store it
397 : */
2195 simon 398 288 : if (degree == 0.0)
399 126 : continue;
400 :
401 162 : d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
2118 tgl 402 162 : + k * sizeof(AttrNumber));
403 :
404 : /* copy the dependency (and keep the indexes into stxkeys) */
2195 simon 405 162 : d->degree = degree;
406 162 : d->nattributes = k;
407 522 : for (i = 0; i < k; i++)
744 tomas.vondra 408 360 : d->attributes[i] = data->attnums[dependency[i]];
409 :
410 : /* initialize the list of dependencies */
2195 simon 411 162 : if (dependencies == NULL)
412 : {
413 : dependencies
414 51 : = (MVDependencies *) palloc0(sizeof(MVDependencies));
415 :
416 51 : dependencies->magic = STATS_DEPS_MAGIC;
417 51 : dependencies->type = STATS_DEPS_TYPE_BASIC;
418 51 : dependencies->ndeps = 0;
419 : }
420 :
421 162 : dependencies->ndeps++;
422 162 : dependencies = (MVDependencies *) repalloc(dependencies,
423 : offsetof(MVDependencies, deps)
1449 tomas.vondra 424 162 : + dependencies->ndeps * sizeof(MVDependency *));
425 :
2195 simon 426 162 : dependencies->deps[dependencies->ndeps - 1] = d;
427 : }
428 :
429 : /*
430 : * we're done with variations of k elements, so free the
431 : * DependencyGenerator
432 : */
433 84 : DependencyGenerator_free(DependencyGenerator);
434 : }
435 :
565 tomas.vondra 436 60 : MemoryContextDelete(cxt);
437 :
2195 simon 438 60 : return dependencies;
439 : }
440 :
441 :
442 : /*
443 : * Serialize list of dependencies into a bytea value.
444 : */
445 : bytea *
2153 bruce 446 51 : statext_dependencies_serialize(MVDependencies *dependencies)
447 : {
448 : int i;
449 : bytea *output;
450 : char *tmp;
451 : Size len;
452 :
453 : /* we need to store ndeps, with a number of attributes for each one */
1449 tomas.vondra 454 51 : len = VARHDRSZ + SizeOfHeader;
455 :
456 : /* and also include space for the actual attribute numbers and degrees */
2195 simon 457 213 : for (i = 0; i < dependencies->ndeps; i++)
1449 tomas.vondra 458 162 : len += SizeOfItem(dependencies->deps[i]->nattributes);
459 :
2195 simon 460 51 : output = (bytea *) palloc0(len);
461 51 : SET_VARSIZE(output, len);
462 :
463 51 : tmp = VARDATA(output);
464 :
465 : /* Store the base struct values (magic, type, ndeps) */
466 51 : memcpy(tmp, &dependencies->magic, sizeof(uint32));
467 51 : tmp += sizeof(uint32);
468 51 : memcpy(tmp, &dependencies->type, sizeof(uint32));
469 51 : tmp += sizeof(uint32);
470 51 : memcpy(tmp, &dependencies->ndeps, sizeof(uint32));
471 51 : tmp += sizeof(uint32);
472 :
473 : /* store number of attributes and attribute numbers for each dependency */
474 213 : for (i = 0; i < dependencies->ndeps; i++)
475 : {
476 162 : MVDependency *d = dependencies->deps[i];
477 :
1449 tomas.vondra 478 162 : memcpy(tmp, &d->degree, sizeof(double));
479 162 : tmp += sizeof(double);
480 :
481 162 : memcpy(tmp, &d->nattributes, sizeof(AttrNumber));
482 162 : tmp += sizeof(AttrNumber);
483 :
2195 simon 484 162 : memcpy(tmp, d->attributes, sizeof(AttrNumber) * d->nattributes);
485 162 : tmp += sizeof(AttrNumber) * d->nattributes;
486 :
487 : /* protect against overflow */
488 162 : Assert(tmp <= ((char *) output + len));
489 : }
490 :
491 : /* make sure we've produced exactly the right amount of data */
1449 tomas.vondra 492 51 : Assert(tmp == ((char *) output + len));
493 :
2195 simon 494 51 : return output;
495 : }
496 :
497 : /*
498 : * Reads serialized dependencies into MVDependencies structure.
499 : */
500 : MVDependencies *
501 576 : statext_dependencies_deserialize(bytea *data)
502 : {
503 : int i;
504 : Size min_expected_size;
505 : MVDependencies *dependencies;
506 : char *tmp;
507 :
508 576 : if (data == NULL)
2195 simon 509 UBC 0 : return NULL;
510 :
1449 tomas.vondra 511 CBC 576 : if (VARSIZE_ANY_EXHDR(data) < SizeOfHeader)
44 peter 512 UNC 0 : elog(ERROR, "invalid MVDependencies size %zu (expected at least %zu)",
513 : VARSIZE_ANY_EXHDR(data), SizeOfHeader);
514 :
515 : /* read the MVDependencies header */
2195 simon 516 CBC 576 : dependencies = (MVDependencies *) palloc0(sizeof(MVDependencies));
517 :
518 : /* initialize pointer to the data part (skip the varlena header) */
519 576 : tmp = VARDATA_ANY(data);
520 :
521 : /* read the header fields and perform basic sanity checks */
522 576 : memcpy(&dependencies->magic, tmp, sizeof(uint32));
523 576 : tmp += sizeof(uint32);
524 576 : memcpy(&dependencies->type, tmp, sizeof(uint32));
525 576 : tmp += sizeof(uint32);
526 576 : memcpy(&dependencies->ndeps, tmp, sizeof(uint32));
527 576 : tmp += sizeof(uint32);
528 :
529 576 : if (dependencies->magic != STATS_DEPS_MAGIC)
2195 simon 530 UBC 0 : elog(ERROR, "invalid dependency magic %d (expected %d)",
531 : dependencies->magic, STATS_DEPS_MAGIC);
532 :
2195 simon 533 CBC 576 : if (dependencies->type != STATS_DEPS_TYPE_BASIC)
2195 simon 534 UBC 0 : elog(ERROR, "invalid dependency type %d (expected %d)",
535 : dependencies->type, STATS_DEPS_TYPE_BASIC);
536 :
2195 simon 537 CBC 576 : if (dependencies->ndeps == 0)
1410 tomas.vondra 538 UBC 0 : elog(ERROR, "invalid zero-length item array in MVDependencies");
539 :
540 : /* what minimum bytea size do we expect for those parameters */
1449 tomas.vondra 541 CBC 576 : min_expected_size = SizeOfItem(dependencies->ndeps);
542 :
2195 simon 543 576 : if (VARSIZE_ANY_EXHDR(data) < min_expected_size)
44 peter 544 UNC 0 : elog(ERROR, "invalid dependencies size %zu (expected at least %zu)",
545 : VARSIZE_ANY_EXHDR(data), min_expected_size);
546 :
547 : /* allocate space for the MCV items */
2195 simon 548 CBC 576 : dependencies = repalloc(dependencies, offsetof(MVDependencies, deps)
2118 tgl 549 576 : + (dependencies->ndeps * sizeof(MVDependency *)));
550 :
2195 simon 551 3393 : for (i = 0; i < dependencies->ndeps; i++)
552 : {
553 : double degree;
554 : AttrNumber k;
555 : MVDependency *d;
556 :
557 : /* degree of validity */
558 2817 : memcpy(°ree, tmp, sizeof(double));
559 2817 : tmp += sizeof(double);
560 :
561 : /* number of attributes */
562 2817 : memcpy(&k, tmp, sizeof(AttrNumber));
563 2817 : tmp += sizeof(AttrNumber);
564 :
565 : /* is the number of attributes valid? */
566 2817 : Assert((k >= 2) && (k <= STATS_MAX_DIMENSIONS));
567 :
568 : /* now that we know the number of attributes, allocate the dependency */
569 2817 : d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
2118 tgl 570 2817 : + (k * sizeof(AttrNumber)));
571 :
2195 simon 572 2817 : d->degree = degree;
573 2817 : d->nattributes = k;
574 :
575 : /* copy attribute numbers */
576 2817 : memcpy(d->attributes, tmp, sizeof(AttrNumber) * d->nattributes);
577 2817 : tmp += sizeof(AttrNumber) * d->nattributes;
578 :
579 2817 : dependencies->deps[i] = d;
580 :
581 : /* still within the bytea */
582 2817 : Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
583 : }
584 :
585 : /* we should have consumed the whole bytea exactly */
586 576 : Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
587 :
588 576 : return dependencies;
589 : }
590 :
591 : /*
592 : * dependency_is_fully_matched
593 : * checks that a functional dependency is fully matched given clauses on
594 : * attributes (assuming the clauses are suitable equality clauses)
595 : */
596 : static bool
2153 bruce 597 2376 : dependency_is_fully_matched(MVDependency *dependency, Bitmapset *attnums)
598 : {
599 : int j;
600 :
601 : /*
602 : * Check that the dependency actually is fully covered by clauses. We have
603 : * to translate all attribute numbers, as those are referenced
604 : */
2195 simon 605 6165 : for (j = 0; j < dependency->nattributes; j++)
606 : {
607 4857 : int attnum = dependency->attributes[j];
608 :
609 4857 : if (!bms_is_member(attnum, attnums))
610 1068 : return false;
611 : }
612 :
613 1308 : return true;
614 : }
615 :
616 : /*
617 : * statext_dependencies_load
618 : * Load the functional dependencies for the indicated pg_statistic_ext tuple
619 : */
620 : MVDependencies *
448 tomas.vondra 621 570 : statext_dependencies_load(Oid mvoid, bool inh)
622 : {
623 : MVDependencies *result;
624 : bool isnull;
625 : Datum deps;
626 : HeapTuple htup;
627 :
628 570 : htup = SearchSysCache2(STATEXTDATASTXOID,
629 : ObjectIdGetDatum(mvoid),
630 : BoolGetDatum(inh));
2195 simon 631 570 : if (!HeapTupleIsValid(htup))
2156 tgl 632 UBC 0 : elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
633 :
1396 tomas.vondra 634 CBC 570 : deps = SysCacheGetAttr(STATEXTDATASTXOID, htup,
635 : Anum_pg_statistic_ext_data_stxddependencies, &isnull);
1803 tgl 636 570 : if (isnull)
1803 tgl 637 UBC 0 : elog(ERROR,
638 : "requested statistics kind \"%c\" is not yet built for statistics object %u",
639 : STATS_EXT_DEPENDENCIES, mvoid);
640 :
1803 tgl 641 CBC 570 : result = statext_dependencies_deserialize(DatumGetByteaPP(deps));
642 :
2195 simon 643 570 : ReleaseSysCache(htup);
644 :
1803 tgl 645 570 : return result;
646 : }
647 :
648 : /*
649 : * pg_dependencies_in - input routine for type pg_dependencies.
650 : *
651 : * pg_dependencies is real enough to be a table column, but it has no operations
652 : * of its own, and disallows input too
653 : */
654 : Datum
2195 simon 655 UBC 0 : pg_dependencies_in(PG_FUNCTION_ARGS)
656 : {
657 : /*
658 : * pg_node_list stores the data in binary form and parsing text input is
659 : * not needed, so disallow this.
660 : */
661 0 : ereport(ERROR,
662 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
663 : errmsg("cannot accept a value of type %s", "pg_dependencies")));
664 :
665 : PG_RETURN_VOID(); /* keep compiler quiet */
666 : }
667 :
668 : /*
669 : * pg_dependencies - output routine for type pg_dependencies.
670 : */
671 : Datum
2195 simon 672 CBC 6 : pg_dependencies_out(PG_FUNCTION_ARGS)
673 : {
2168 alvherre 674 6 : bytea *data = PG_GETARG_BYTEA_PP(0);
675 6 : MVDependencies *dependencies = statext_dependencies_deserialize(data);
676 : int i,
677 : j;
678 : StringInfoData str;
679 :
2195 simon 680 6 : initStringInfo(&str);
2168 alvherre 681 6 : appendStringInfoChar(&str, '{');
682 :
2195 simon 683 36 : for (i = 0; i < dependencies->ndeps; i++)
684 : {
685 30 : MVDependency *dependency = dependencies->deps[i];
686 :
687 30 : if (i > 0)
688 24 : appendStringInfoString(&str, ", ");
689 :
2168 alvherre 690 30 : appendStringInfoChar(&str, '"');
2195 simon 691 102 : for (j = 0; j < dependency->nattributes; j++)
692 : {
693 72 : if (j == dependency->nattributes - 1)
694 30 : appendStringInfoString(&str, " => ");
695 42 : else if (j > 0)
696 12 : appendStringInfoString(&str, ", ");
697 :
698 72 : appendStringInfo(&str, "%d", dependency->attributes[j]);
699 : }
2168 alvherre 700 30 : appendStringInfo(&str, "\": %f", dependency->degree);
701 : }
702 :
703 6 : appendStringInfoChar(&str, '}');
704 :
2195 simon 705 6 : PG_RETURN_CSTRING(str.data);
706 : }
707 :
708 : /*
709 : * pg_dependencies_recv - binary input routine for type pg_dependencies.
710 : */
711 : Datum
2195 simon 712 UBC 0 : pg_dependencies_recv(PG_FUNCTION_ARGS)
713 : {
714 0 : ereport(ERROR,
715 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
716 : errmsg("cannot accept a value of type %s", "pg_dependencies")));
717 :
718 : PG_RETURN_VOID(); /* keep compiler quiet */
719 : }
720 :
721 : /*
722 : * pg_dependencies_send - binary output routine for type pg_dependencies.
723 : *
724 : * Functional dependencies are serialized in a bytea value (although the type
725 : * is named differently), so let's just send that.
726 : */
727 : Datum
728 0 : pg_dependencies_send(PG_FUNCTION_ARGS)
729 : {
730 0 : return byteasend(fcinfo);
731 : }
732 :
733 : /*
734 : * dependency_is_compatible_clause
735 : * Determines if the clause is compatible with functional dependencies
736 : *
737 : * Only clauses that have the form of equality to a pseudoconstant, or can be
738 : * interpreted that way, are currently accepted. Furthermore the variable
739 : * part of the clause must be a simple Var belonging to the specified
740 : * relation, whose attribute number we return in *attnum on success.
741 : */
742 : static bool
2195 simon 743 CBC 1512 : dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
744 : {
745 : Var *var;
746 : Node *clause_expr;
747 :
1117 tomas.vondra 748 1512 : if (IsA(clause, RestrictInfo))
749 : {
750 1452 : RestrictInfo *rinfo = (RestrictInfo *) clause;
751 :
752 : /* Pseudoconstants are not interesting (they couldn't contain a Var) */
753 1452 : if (rinfo->pseudoconstant)
754 3 : return false;
755 :
756 : /* Clauses referencing multiple, or no, varnos are incompatible */
757 1449 : if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
1117 tomas.vondra 758 UBC 0 : return false;
759 :
1117 tomas.vondra 760 CBC 1449 : clause = (Node *) rinfo->clause;
761 : }
762 :
763 1509 : if (is_opclause(clause))
764 : {
765 : /* If it's an opclause, check for Var = Const or Const = Var. */
766 585 : OpExpr *expr = (OpExpr *) clause;
767 :
768 : /* Only expressions with two arguments are candidates. */
2195 simon 769 585 : if (list_length(expr->args) != 2)
2195 simon 770 UBC 0 : return false;
771 :
772 : /* Make sure non-selected argument is a pseudoconstant. */
1952 tgl 773 CBC 585 : if (is_pseudo_constant_clause(lsecond(expr->args)))
744 tomas.vondra 774 585 : clause_expr = linitial(expr->args);
1952 tgl 775 UBC 0 : else if (is_pseudo_constant_clause(linitial(expr->args)))
744 tomas.vondra 776 0 : clause_expr = lsecond(expr->args);
777 : else
2195 simon 778 0 : return false;
779 :
780 : /*
781 : * If it's not an "=" operator, just ignore the clause, as it's not
782 : * compatible with functional dependencies.
783 : *
784 : * This uses the function for estimating selectivity, not the operator
785 : * directly (a bit awkward, but well ...).
786 : *
787 : * XXX this is pretty dubious; probably it'd be better to check btree
788 : * or hash opclass membership, so as not to be fooled by custom
789 : * selectivity functions, and to be more consistent with decisions
790 : * elsewhere in the planner.
791 : */
2195 simon 792 CBC 585 : if (get_oprrest(expr->opno) != F_EQSEL)
793 18 : return false;
794 :
795 : /* OK to proceed with checking "var" */
796 : }
1117 tomas.vondra 797 924 : else if (IsA(clause, ScalarArrayOpExpr))
798 : {
799 : /* If it's an scalar array operator, check for Var IN Const. */
1060 tgl 800 888 : ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
801 :
802 : /*
803 : * Reject ALL() variant, we only care about ANY/IN.
804 : *
805 : * XXX Maybe we should check if all the values are the same, and allow
806 : * ALL in that case? Doesn't seem very practical, though.
807 : */
1121 tomas.vondra 808 888 : if (!expr->useOr)
809 18 : return false;
810 :
811 : /* Only expressions with two arguments are candidates. */
812 870 : if (list_length(expr->args) != 2)
1121 tomas.vondra 813 UBC 0 : return false;
814 :
815 : /*
816 : * We know it's always (Var IN Const), so we assume the var is the
817 : * first argument, and pseudoconstant is the second one.
818 : */
1121 tomas.vondra 819 CBC 870 : if (!is_pseudo_constant_clause(lsecond(expr->args)))
1121 tomas.vondra 820 UBC 0 : return false;
821 :
744 tomas.vondra 822 CBC 870 : clause_expr = linitial(expr->args);
823 :
824 : /*
825 : * If it's not an "=" operator, just ignore the clause, as it's not
826 : * compatible with functional dependencies. The operator is identified
827 : * simply by looking at which function it uses to estimate
828 : * selectivity. That's a bit strange, but it's what other similar
829 : * places do.
830 : */
1121 831 870 : if (get_oprrest(expr->opno) != F_EQSEL)
832 90 : return false;
833 :
834 : /* OK to proceed with checking "var" */
835 : }
1117 836 36 : else if (is_orclause(clause))
837 : {
744 838 36 : BoolExpr *bool_expr = (BoolExpr *) clause;
839 : ListCell *lc;
840 :
841 : /* start with no attribute number */
1117 842 36 : *attnum = InvalidAttrNumber;
843 :
744 844 75 : foreach(lc, bool_expr->args)
845 : {
846 : AttrNumber clause_attnum;
847 :
848 : /*
849 : * Had we found incompatible clause in the arguments, treat the
850 : * whole clause as incompatible.
851 : */
1117 852 60 : if (!dependency_is_compatible_clause((Node *) lfirst(lc),
853 : relid, &clause_attnum))
854 21 : return false;
855 :
856 42 : if (*attnum == InvalidAttrNumber)
857 18 : *attnum = clause_attnum;
858 :
859 : /* ensure all the variables are the same (same attnum) */
860 42 : if (*attnum != clause_attnum)
861 3 : return false;
862 : }
863 :
864 : /* the Var is already checked by the recursive call */
865 15 : return true;
866 : }
1117 tomas.vondra 867 UBC 0 : else if (is_notclause(clause))
868 : {
869 : /*
870 : * "NOT x" can be interpreted as "x = false", so get the argument and
871 : * proceed with seeing if it's a suitable Var.
872 : */
744 873 0 : clause_expr = (Node *) get_notclausearg(clause);
874 : }
875 : else
876 : {
877 : /*
878 : * A boolean expression "x" can be interpreted as "x = true", so
879 : * proceed with seeing if it's a suitable Var.
880 : */
881 0 : clause_expr = (Node *) clause;
882 : }
883 :
884 : /*
885 : * We may ignore any RelabelType node above the operand. (There won't be
886 : * more than one, since eval_const_expressions has been applied already.)
887 : */
744 tomas.vondra 888 CBC 1347 : if (IsA(clause_expr, RelabelType))
744 tomas.vondra 889 UBC 0 : clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
890 :
891 : /* We only support plain Vars for now */
744 tomas.vondra 892 CBC 1347 : if (!IsA(clause_expr, Var))
1952 tgl 893 144 : return false;
894 :
895 : /* OK, we know we have a Var */
744 tomas.vondra 896 1203 : var = (Var *) clause_expr;
897 :
898 : /* Ensure Var is from the correct relation */
1952 tgl 899 1203 : if (var->varno != relid)
1952 tgl 900 UBC 0 : return false;
901 :
902 : /* We also better ensure the Var is from the current level */
1952 tgl 903 CBC 1203 : if (var->varlevelsup != 0)
1952 tgl 904 UBC 0 : return false;
905 :
906 : /* Also ignore system attributes (we don't allow stats on those) */
1952 tgl 907 CBC 1203 : if (!AttrNumberIsForUserDefinedAttr(var->varattno))
1952 tgl 908 UBC 0 : return false;
909 :
1952 tgl 910 CBC 1203 : *attnum = var->varattno;
911 1203 : return true;
912 : }
913 :
914 : /*
915 : * find_strongest_dependency
916 : * find the strongest dependency on the attributes
917 : *
918 : * When applying functional dependencies, we start with the strongest
919 : * dependencies. That is, we select the dependency that:
920 : *
921 : * (a) has all attributes covered by equality clauses
922 : *
923 : * (b) has the most attributes
924 : *
925 : * (c) has the highest degree of validity
926 : *
927 : * This guarantees that we eliminate the most redundant conditions first
928 : * (see the comment in dependencies_clauselist_selectivity).
929 : */
930 : static MVDependency *
1182 tomas.vondra 931 1275 : find_strongest_dependency(MVDependencies **dependencies, int ndependencies,
932 : Bitmapset *attnums)
933 : {
934 : int i,
935 : j;
2195 simon 936 1275 : MVDependency *strongest = NULL;
937 :
938 : /* number of attnums in clauses */
939 1275 : int nattnums = bms_num_members(attnums);
940 :
941 : /*
942 : * Iterate over the MVDependency items and find the strongest one from the
943 : * fully-matched dependencies. We do the cheap checks first, before
944 : * matching it against the attnums.
945 : */
1182 tomas.vondra 946 2568 : for (i = 0; i < ndependencies; i++)
947 : {
948 7332 : for (j = 0; j < dependencies[i]->ndeps; j++)
949 : {
950 6039 : MVDependency *dependency = dependencies[i]->deps[j];
951 :
952 : /*
953 : * Skip dependencies referencing more attributes than available
954 : * clauses, as those can't be fully matched.
955 : */
956 6039 : if (dependency->nattributes > nattnums)
2195 simon 957 3663 : continue;
958 :
1182 tomas.vondra 959 2376 : if (strongest)
960 : {
961 : /* skip dependencies on fewer attributes than the strongest. */
962 1482 : if (dependency->nattributes < strongest->nattributes)
1182 tomas.vondra 963 UBC 0 : continue;
964 :
965 : /* also skip weaker dependencies when attribute count matches */
1182 tomas.vondra 966 CBC 1482 : if (strongest->nattributes == dependency->nattributes &&
967 1341 : strongest->degree > dependency->degree)
1182 tomas.vondra 968 UBC 0 : continue;
969 : }
970 :
971 : /*
972 : * this dependency is stronger, but we must still check that it's
973 : * fully matched to these attnums. We perform this check last as
974 : * it's slightly more expensive than the previous checks.
975 : */
1182 tomas.vondra 976 CBC 2376 : if (dependency_is_fully_matched(dependency, attnums))
977 1308 : strongest = dependency; /* save new best match */
978 : }
979 : }
980 :
2195 simon 981 1275 : return strongest;
982 : }
983 :
984 : /*
985 : * clauselist_apply_dependencies
986 : * Apply the specified functional dependencies to a list of clauses and
987 : * return the estimated selectivity of the clauses that are compatible
988 : * with any of the given dependencies.
989 : *
990 : * This will estimate all not-already-estimated clauses that are compatible
991 : * with functional dependencies, and which have an attribute mentioned by any
992 : * of the given dependencies (either as an implying or implied attribute).
993 : *
994 : * Given (lists of) clauses on attributes (a,b) and a functional dependency
995 : * (a=>b), the per-column selectivities P(a) and P(b) are notionally combined
996 : * using the formula
997 : *
998 : * P(a,b) = f * P(a) + (1-f) * P(a) * P(b)
999 : *
1000 : * where 'f' is the degree of dependency. This reflects the fact that we
1001 : * expect a fraction f of all rows to be consistent with the dependency
1002 : * (a=>b), and so have a selectivity of P(a), while the remaining rows are
1003 : * treated as independent.
1004 : *
1005 : * In practice, we use a slightly modified version of this formula, which uses
1006 : * a selectivity of Min(P(a), P(b)) for the dependent rows, since the result
1007 : * should obviously not exceed either column's individual selectivity. I.e.,
1008 : * we actually combine selectivities using the formula
1009 : *
1010 : * P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b)
1011 : *
1012 : * This can make quite a difference if the specific values matching the
1013 : * clauses are not consistent with the functional dependency.
1014 : */
1015 : static Selectivity
1107 dean.a.rasheed 1016 564 : clauselist_apply_dependencies(PlannerInfo *root, List *clauses,
1017 : int varRelid, JoinType jointype,
1018 : SpecialJoinInfo *sjinfo,
1019 : MVDependency **dependencies, int ndependencies,
1020 : AttrNumber *list_attnums,
1021 : Bitmapset **estimatedclauses)
1022 : {
1023 : Bitmapset *attnums;
1024 : int i;
1025 : int j;
1026 : int nattrs;
1027 : Selectivity *attr_sel;
1028 : int attidx;
1029 : int listidx;
1030 : ListCell *l;
1031 : Selectivity s1;
1032 :
1033 : /*
1034 : * Extract the attnums of all implying and implied attributes from all the
1035 : * given dependencies. Each of these attributes is expected to have at
1036 : * least 1 not-already-estimated compatible clause that we will estimate
1037 : * here.
1038 : */
1039 564 : attnums = NULL;
1040 1275 : for (i = 0; i < ndependencies; i++)
1041 : {
1042 2274 : for (j = 0; j < dependencies[i]->nattributes; j++)
1043 : {
1044 1563 : AttrNumber attnum = dependencies[i]->attributes[j];
1045 :
1046 1563 : attnums = bms_add_member(attnums, attnum);
1047 : }
1048 : }
1049 :
1050 : /*
1051 : * Compute per-column selectivity estimates for each of these attributes,
1052 : * and mark all the corresponding clauses as estimated.
1053 : */
1054 564 : nattrs = bms_num_members(attnums);
1055 564 : attr_sel = (Selectivity *) palloc(sizeof(Selectivity) * nattrs);
1056 :
1057 564 : attidx = 0;
1058 564 : i = -1;
1059 1845 : while ((i = bms_next_member(attnums, i)) >= 0)
1060 : {
1061 1281 : List *attr_clauses = NIL;
1062 : Selectivity simple_sel;
1063 :
1064 1281 : listidx = -1;
1065 4314 : foreach(l, clauses)
1066 : {
1067 3033 : Node *clause = (Node *) lfirst(l);
1068 :
1069 3033 : listidx++;
1070 3033 : if (list_attnums[listidx] == i)
1071 : {
1072 1281 : attr_clauses = lappend(attr_clauses, clause);
1073 1281 : *estimatedclauses = bms_add_member(*estimatedclauses, listidx);
1074 : }
1075 : }
1076 :
857 1077 1281 : simple_sel = clauselist_selectivity_ext(root, attr_clauses, varRelid,
1078 : jointype, sjinfo, false);
1107 1079 1281 : attr_sel[attidx++] = simple_sel;
1080 : }
1081 :
1082 : /*
1083 : * Now combine these selectivities using the dependency information. For
1084 : * chains of dependencies such as a -> b -> c, the b -> c dependency will
1085 : * come before the a -> b dependency in the array, so we traverse the
1086 : * array backwards to ensure such chains are computed in the right order.
1087 : *
1088 : * As explained above, pairs of selectivities are combined using the
1089 : * formula
1090 : *
1091 : * P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b)
1092 : *
1093 : * to ensure that the combined selectivity is never greater than either
1094 : * individual selectivity.
1095 : *
1096 : * Where multiple dependencies apply (e.g., a -> b -> c), we use
1097 : * conditional probabilities to compute the overall result as follows:
1098 : *
1099 : * P(a,b,c) = P(c|a,b) * P(a,b) = P(c|a,b) * P(b|a) * P(a)
1100 : *
1101 : * so we replace the selectivities of all implied attributes with
1102 : * conditional probabilities, that are conditional on all their implying
1103 : * attributes. The selectivities of all other non-implied attributes are
1104 : * left as they are.
1105 : */
1106 1275 : for (i = ndependencies - 1; i >= 0; i--)
1107 : {
1108 711 : MVDependency *dependency = dependencies[i];
1109 : AttrNumber attnum;
1110 : Selectivity s2;
1111 : double f;
1112 :
1113 : /* Selectivity of all the implying attributes */
1114 711 : s1 = 1.0;
1115 1563 : for (j = 0; j < dependency->nattributes - 1; j++)
1116 : {
1117 852 : attnum = dependency->attributes[j];
1118 852 : attidx = bms_member_index(attnums, attnum);
1119 852 : s1 *= attr_sel[attidx];
1120 : }
1121 :
1122 : /* Original selectivity of the implied attribute */
1123 711 : attnum = dependency->attributes[j];
1124 711 : attidx = bms_member_index(attnums, attnum);
1125 711 : s2 = attr_sel[attidx];
1126 :
1127 : /*
1128 : * Replace s2 with the conditional probability s2 given s1, computed
1129 : * using the formula P(b|a) = P(a,b) / P(a), which simplifies to
1130 : *
1131 : * P(b|a) = f * Min(P(a), P(b)) / P(a) + (1-f) * P(b)
1132 : *
1133 : * where P(a) = s1, the selectivity of the implying attributes, and
1134 : * P(b) = s2, the selectivity of the implied attribute.
1135 : */
1136 711 : f = dependency->degree;
1137 :
1138 711 : if (s1 <= s2)
1139 681 : attr_sel[attidx] = f + (1 - f) * s2;
1140 : else
1141 30 : attr_sel[attidx] = f * s2 / s1 + (1 - f) * s2;
1142 : }
1143 :
1144 : /*
1145 : * The overall selectivity of all the clauses on all these attributes is
1146 : * then the product of all the original (non-implied) probabilities and
1147 : * the new conditional (implied) probabilities.
1148 : */
1149 564 : s1 = 1.0;
1150 1845 : for (i = 0; i < nattrs; i++)
1151 1281 : s1 *= attr_sel[i];
1152 :
1153 564 : CLAMP_PROBABILITY(s1);
1154 :
1155 564 : pfree(attr_sel);
1156 564 : bms_free(attnums);
1157 :
1158 564 : return s1;
1159 : }
1160 :
1161 : /*
1162 : * dependency_is_compatible_expression
1163 : * Determines if the expression is compatible with functional dependencies
1164 : *
1165 : * Similar to dependency_is_compatible_clause, but doesn't enforce that the
1166 : * expression is a simple Var. On success, return the matching statistics
1167 : * expression into *expr.
1168 : */
1169 : static bool
744 tomas.vondra 1170 321 : dependency_is_compatible_expression(Node *clause, Index relid, List *statlist, Node **expr)
1171 : {
1172 : ListCell *lc,
1173 : *lc2;
1174 : Node *clause_expr;
1175 :
1176 321 : if (IsA(clause, RestrictInfo))
1177 : {
1178 276 : RestrictInfo *rinfo = (RestrictInfo *) clause;
1179 :
1180 : /* Pseudoconstants are not interesting (they couldn't contain a Var) */
1181 276 : if (rinfo->pseudoconstant)
1182 3 : return false;
1183 :
1184 : /* Clauses referencing multiple, or no, varnos are incompatible */
1185 273 : if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
744 tomas.vondra 1186 UBC 0 : return false;
1187 :
744 tomas.vondra 1188 CBC 273 : clause = (Node *) rinfo->clause;
1189 : }
1190 :
1191 318 : if (is_opclause(clause))
1192 : {
1193 : /* If it's an opclause, check for Var = Const or Const = Var. */
1194 102 : OpExpr *expr = (OpExpr *) clause;
1195 :
1196 : /* Only expressions with two arguments are candidates. */
1197 102 : if (list_length(expr->args) != 2)
744 tomas.vondra 1198 UBC 0 : return false;
1199 :
1200 : /* Make sure non-selected argument is a pseudoconstant. */
744 tomas.vondra 1201 CBC 102 : if (is_pseudo_constant_clause(lsecond(expr->args)))
1202 102 : clause_expr = linitial(expr->args);
744 tomas.vondra 1203 UBC 0 : else if (is_pseudo_constant_clause(linitial(expr->args)))
1204 0 : clause_expr = lsecond(expr->args);
1205 : else
1206 0 : return false;
1207 :
1208 : /*
1209 : * If it's not an "=" operator, just ignore the clause, as it's not
1210 : * compatible with functional dependencies.
1211 : *
1212 : * This uses the function for estimating selectivity, not the operator
1213 : * directly (a bit awkward, but well ...).
1214 : *
1215 : * XXX this is pretty dubious; probably it'd be better to check btree
1216 : * or hash opclass membership, so as not to be fooled by custom
1217 : * selectivity functions, and to be more consistent with decisions
1218 : * elsewhere in the planner.
1219 : */
744 tomas.vondra 1220 CBC 102 : if (get_oprrest(expr->opno) != F_EQSEL)
1221 18 : return false;
1222 :
1223 : /* OK to proceed with checking "var" */
1224 : }
1225 216 : else if (IsA(clause, ScalarArrayOpExpr))
1226 : {
1227 : /* If it's an scalar array operator, check for Var IN Const. */
1228 195 : ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
1229 :
1230 : /*
1231 : * Reject ALL() variant, we only care about ANY/IN.
1232 : *
1233 : * FIXME Maybe we should check if all the values are the same, and
1234 : * allow ALL in that case? Doesn't seem very practical, though.
1235 : */
1236 195 : if (!expr->useOr)
1237 18 : return false;
1238 :
1239 : /* Only expressions with two arguments are candidates. */
1240 177 : if (list_length(expr->args) != 2)
744 tomas.vondra 1241 UBC 0 : return false;
1242 :
1243 : /*
1244 : * We know it's always (Var IN Const), so we assume the var is the
1245 : * first argument, and pseudoconstant is the second one.
1246 : */
744 tomas.vondra 1247 CBC 177 : if (!is_pseudo_constant_clause(lsecond(expr->args)))
744 tomas.vondra 1248 UBC 0 : return false;
1249 :
744 tomas.vondra 1250 CBC 177 : clause_expr = linitial(expr->args);
1251 :
1252 : /*
1253 : * If it's not an "=" operator, just ignore the clause, as it's not
1254 : * compatible with functional dependencies. The operator is identified
1255 : * simply by looking at which function it uses to estimate
1256 : * selectivity. That's a bit strange, but it's what other similar
1257 : * places do.
1258 : */
1259 177 : if (get_oprrest(expr->opno) != F_EQSEL)
1260 90 : return false;
1261 :
1262 : /* OK to proceed with checking "var" */
1263 : }
1264 21 : else if (is_orclause(clause))
1265 : {
1266 21 : BoolExpr *bool_expr = (BoolExpr *) clause;
1267 :
744 tomas.vondra 1268 ECB : /* start with no expression (we'll use the first match) */
744 tomas.vondra 1269 GIC 21 : *expr = NULL;
744 tomas.vondra 1270 ECB :
744 tomas.vondra 1271 GIC 60 : foreach(lc, bool_expr->args)
744 tomas.vondra 1272 ECB : {
744 tomas.vondra 1273 GIC 45 : Node *or_expr = NULL;
1274 :
1275 : /*
1276 : * Had we found incompatible expression in the arguments, treat
1277 : * the whole expression as incompatible.
744 tomas.vondra 1278 ECB : */
744 tomas.vondra 1279 GIC 45 : if (!dependency_is_compatible_expression((Node *) lfirst(lc), relid,
744 tomas.vondra 1280 ECB : statlist, &or_expr))
744 tomas.vondra 1281 GIC 6 : return false;
744 tomas.vondra 1282 ECB :
744 tomas.vondra 1283 CBC 42 : if (*expr == NULL)
744 tomas.vondra 1284 GIC 18 : *expr = or_expr;
1285 :
744 tomas.vondra 1286 ECB : /* ensure all the expressions are the same */
744 tomas.vondra 1287 CBC 42 : if (!equal(or_expr, *expr))
744 tomas.vondra 1288 GIC 3 : return false;
1289 : }
1290 :
744 tomas.vondra 1291 ECB : /* the expression is already checked by the recursive call */
744 tomas.vondra 1292 GIC 15 : return true;
744 tomas.vondra 1293 EUB : }
744 tomas.vondra 1294 UIC 0 : else if (is_notclause(clause))
1295 : {
1296 : /*
1297 : * "NOT x" can be interpreted as "x = false", so get the argument and
1298 : * proceed with seeing if it's a suitable Var.
744 tomas.vondra 1299 EUB : */
744 tomas.vondra 1300 UIC 0 : clause_expr = (Node *) get_notclausearg(clause);
1301 : }
1302 : else
1303 : {
1304 : /*
1305 : * A boolean expression "x" can be interpreted as "x = true", so
1306 : * proceed with seeing if it's a suitable Var.
744 tomas.vondra 1307 EUB : */
744 tomas.vondra 1308 UIC 0 : clause_expr = (Node *) clause;
1309 : }
1310 :
1311 : /*
1312 : * We may ignore any RelabelType node above the operand. (There won't be
1313 : * more than one, since eval_const_expressions has been applied already.)
744 tomas.vondra 1314 ECB : */
744 tomas.vondra 1315 GBC 171 : if (IsA(clause_expr, RelabelType))
744 tomas.vondra 1316 UIC 0 : clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
1317 :
1318 : /*
1319 : * Search for a matching statistics expression.
744 tomas.vondra 1320 ECB : */
744 tomas.vondra 1321 GIC 174 : foreach(lc, statlist)
744 tomas.vondra 1322 ECB : {
744 tomas.vondra 1323 GIC 171 : StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
1324 :
744 tomas.vondra 1325 ECB : /* ignore stats without dependencies */
744 tomas.vondra 1326 GBC 171 : if (info->kind != STATS_EXT_DEPENDENCIES)
744 tomas.vondra 1327 UIC 0 : continue;
744 tomas.vondra 1328 ECB :
744 tomas.vondra 1329 GIC 279 : foreach(lc2, info->exprs)
744 tomas.vondra 1330 ECB : {
744 tomas.vondra 1331 GIC 276 : Node *stat_expr = (Node *) lfirst(lc2);
744 tomas.vondra 1332 ECB :
744 tomas.vondra 1333 GIC 276 : if (equal(clause_expr, stat_expr))
744 tomas.vondra 1334 ECB : {
744 tomas.vondra 1335 CBC 168 : *expr = stat_expr;
744 tomas.vondra 1336 GIC 168 : return true;
1337 : }
1338 : }
1339 : }
744 tomas.vondra 1340 ECB :
744 tomas.vondra 1341 GIC 3 : return false;
1342 : }
1343 :
1344 : /*
1345 : * dependencies_clauselist_selectivity
1346 : * Return the estimated selectivity of (a subset of) the given clauses
1347 : * using functional dependency statistics, or 1.0 if no useful functional
1348 : * dependency statistic exists.
1349 : *
1350 : * 'estimatedclauses' is an input/output argument that gets a bit set
1351 : * corresponding to the (zero-based) list index of each clause that is included
1352 : * in the estimated selectivity.
1353 : *
1354 : * Given equality clauses on attributes (a,b) we find the strongest dependency
1355 : * between them, i.e. either (a=>b) or (b=>a). Assuming (a=>b) is the selected
1356 : * dependency, we then combine the per-clause selectivities using the formula
1357 : *
1358 : * P(a,b) = f * P(a) + (1-f) * P(a) * P(b)
1359 : *
1360 : * where 'f' is the degree of the dependency. (Actually we use a slightly
1361 : * modified version of this formula -- see clauselist_apply_dependencies()).
1362 : *
1363 : * With clauses on more than two attributes, the dependencies are applied
1364 : * recursively, starting with the widest/strongest dependencies. For example
1365 : * P(a,b,c) is first split like this:
1366 : *
1367 : * P(a,b,c) = f * P(a,b) + (1-f) * P(a,b) * P(c)
1368 : *
1369 : * assuming (a,b=>c) is the strongest dependency.
1370 : */
2195 simon 1371 ECB : Selectivity
2195 simon 1372 GIC 894 : dependencies_clauselist_selectivity(PlannerInfo *root,
1373 : List *clauses,
1374 : int varRelid,
1375 : JoinType jointype,
1376 : SpecialJoinInfo *sjinfo,
1377 : RelOptInfo *rel,
1378 : Bitmapset **estimatedclauses)
2195 simon 1379 ECB : {
2195 simon 1380 GIC 894 : Selectivity s1 = 1.0;
2195 simon 1381 ECB : ListCell *l;
2195 simon 1382 GIC 894 : Bitmapset *clauses_attnums = NULL;
1383 : AttrNumber *list_attnums;
1384 : int listidx;
1385 : MVDependencies **func_dependencies;
1386 : int nfunc_dependencies;
1387 : int total_ndeps;
1388 : MVDependency **dependencies;
1389 : int ndependencies;
1390 : int i;
744 tomas.vondra 1391 ECB : AttrNumber attnum_offset;
449 tomas.vondra 1392 GIC 894 : RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
1393 :
1394 : /* unique expressions */
1395 : Node **unique_exprs;
1396 : int unique_exprs_cnt;
1397 :
2195 simon 1398 ECB : /* check if there's any stats that might be useful for us. */
2195 simon 1399 CBC 894 : if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES))
2195 simon 1400 GIC 234 : return 1.0;
2195 simon 1401 ECB :
1107 dean.a.rasheed 1402 CBC 660 : list_attnums = (AttrNumber *) palloc(sizeof(AttrNumber) *
2195 simon 1403 GIC 660 : list_length(clauses));
1404 :
1405 : /*
1406 : * We allocate space as if every clause was a unique expression, although
1407 : * that's probably overkill. Some will be simple column references that
1408 : * we'll translate to attnums, and there might be duplicates. But it's
1409 : * easier and cheaper to just do one allocation than repalloc later.
744 tomas.vondra 1410 ECB : */
744 tomas.vondra 1411 CBC 660 : unique_exprs = (Node **) palloc(sizeof(Node *) * list_length(clauses));
744 tomas.vondra 1412 GIC 660 : unique_exprs_cnt = 0;
1413 :
1414 : /*
1415 : * Pre-process the clauses list to extract the attnums seen in each item.
1416 : * We need to determine if there's any clauses which will be useful for
1417 : * dependency selectivity estimations. Along the way we'll record all of
1418 : * the attnums for each clause in a list which we'll reference later so we
1419 : * don't need to repeat the same work again. We'll also keep track of all
1420 : * attnums seen.
1421 : *
1422 : * We also skip clauses that we already estimated using different types of
1423 : * statistics (we treat them as incompatible).
1424 : *
1425 : * To handle expressions, we assign them negative attnums, as if it was a
1426 : * system attribute (this is fine, as we only allow extended stats on user
1427 : * attributes). And then we offset everything by the number of
1428 : * expressions, so that we can store the values in a bitmapset.
2195 simon 1429 ECB : */
2195 simon 1430 CBC 660 : listidx = 0;
2195 simon 1431 GIC 2133 : foreach(l, clauses)
2195 simon 1432 ECB : {
2195 simon 1433 GIC 1473 : Node *clause = (Node *) lfirst(l);
2195 simon 1434 ECB : AttrNumber attnum;
744 tomas.vondra 1435 GIC 1473 : Node *expr = NULL;
1436 :
744 tomas.vondra 1437 ECB : /* ignore clause by default */
744 tomas.vondra 1438 GIC 1473 : list_attnums[listidx] = InvalidAttrNumber;
2195 simon 1439 ECB :
744 tomas.vondra 1440 GIC 1473 : if (!bms_is_member(listidx, *estimatedclauses))
1441 : {
1442 : /*
1443 : * If it's a simple column reference, just extract the attnum. If
1444 : * it's an expression, assign a negative attnum as if it was a
1445 : * system attribute.
744 tomas.vondra 1446 ECB : */
744 tomas.vondra 1447 GIC 1452 : if (dependency_is_compatible_clause(clause, rel->relid, &attnum))
744 tomas.vondra 1448 ECB : {
744 tomas.vondra 1449 GIC 1176 : list_attnums[listidx] = attnum;
744 tomas.vondra 1450 ECB : }
744 tomas.vondra 1451 GIC 276 : else if (dependency_is_compatible_expression(clause, rel->relid,
1452 : rel->statlist,
1453 : &expr))
1454 : {
744 tomas.vondra 1455 ECB : /* special attnum assigned to this expression */
744 tomas.vondra 1456 GIC 141 : attnum = InvalidAttrNumber;
744 tomas.vondra 1457 ECB :
744 tomas.vondra 1458 GIC 141 : Assert(expr != NULL);
1459 :
744 tomas.vondra 1460 ECB : /* If the expression is duplicate, use the same attnum. */
744 tomas.vondra 1461 GIC 237 : for (i = 0; i < unique_exprs_cnt; i++)
744 tomas.vondra 1462 ECB : {
744 tomas.vondra 1463 GIC 96 : if (equal(unique_exprs[i], expr))
1464 : {
744 tomas.vondra 1465 EUB : /* negative attribute number to expression */
744 tomas.vondra 1466 UBC 0 : attnum = -(i + 1);
744 tomas.vondra 1467 UIC 0 : break;
1468 : }
1469 : }
1470 :
744 tomas.vondra 1471 ECB : /* not found in the list, so add it */
744 tomas.vondra 1472 GIC 141 : if (attnum == InvalidAttrNumber)
744 tomas.vondra 1473 ECB : {
744 tomas.vondra 1474 GIC 141 : unique_exprs[unique_exprs_cnt++] = expr;
1475 :
744 tomas.vondra 1476 ECB : /* after incrementing the value, to get -1, -2, ... */
744 tomas.vondra 1477 GIC 141 : attnum = (-unique_exprs_cnt);
1478 : }
1479 :
744 tomas.vondra 1480 ECB : /* remember which attnum was assigned to this clause */
744 tomas.vondra 1481 GIC 141 : list_attnums[listidx] = attnum;
1482 : }
1483 : }
2195 simon 1484 ECB :
2195 simon 1485 GIC 1473 : listidx++;
1486 : }
2195 simon 1487 ECB :
744 tomas.vondra 1488 GIC 660 : Assert(listidx == list_length(clauses));
1489 :
1490 : /*
1491 : * How much we need to offset the attnums? If there are no expressions,
1492 : * then no offset is needed. Otherwise we need to offset enough for the
1493 : * lowest value (-unique_exprs_cnt) to become 1.
744 tomas.vondra 1494 ECB : */
744 tomas.vondra 1495 CBC 660 : if (unique_exprs_cnt > 0)
744 tomas.vondra 1496 GIC 66 : attnum_offset = (unique_exprs_cnt + 1);
744 tomas.vondra 1497 ECB : else
744 tomas.vondra 1498 GIC 594 : attnum_offset = 0;
1499 :
1500 : /*
1501 : * Now that we know how many expressions there are, we can offset the
1502 : * values just enough to build the bitmapset.
744 tomas.vondra 1503 ECB : */
744 tomas.vondra 1504 GIC 2133 : for (i = 0; i < list_length(clauses); i++)
1505 : {
1506 : AttrNumber attnum;
1507 :
744 tomas.vondra 1508 ECB : /* ignore incompatible or already estimated clauses */
744 tomas.vondra 1509 CBC 1473 : if (list_attnums[i] == InvalidAttrNumber)
744 tomas.vondra 1510 GIC 156 : continue;
1511 :
744 tomas.vondra 1512 ECB : /* make sure the attnum is in the expected range */
744 tomas.vondra 1513 CBC 1317 : Assert(list_attnums[i] >= (-unique_exprs_cnt));
744 tomas.vondra 1514 GIC 1317 : Assert(list_attnums[i] <= MaxHeapAttributeNumber);
1515 :
744 tomas.vondra 1516 ECB : /* make sure the attnum is positive (valid AttrNumber) */
744 tomas.vondra 1517 GIC 1317 : attnum = list_attnums[i] + attnum_offset;
1518 :
1519 : /*
1520 : * Either it's a regular attribute, or it's an expression, in which
1521 : * case we must not have seen it before (expressions are unique).
1522 : *
1523 : * XXX Check whether it's a regular attribute has to be done using the
1524 : * original attnum, while the second check has to use the value with
1525 : * an offset.
744 tomas.vondra 1526 ECB : */
744 tomas.vondra 1527 GIC 1317 : Assert(AttrNumberIsForUserDefinedAttr(list_attnums[i]) ||
1528 : !bms_is_member(attnum, clauses_attnums));
1529 :
1530 : /*
1531 : * Remember the offset attnum, both for attributes and expressions.
1532 : * We'll pass list_attnums to clauselist_apply_dependencies, which
1533 : * uses it to identify clauses in a bitmap. We could also pass the
1534 : * offset, but this is more convenient.
744 tomas.vondra 1535 ECB : */
744 tomas.vondra 1536 GIC 1317 : list_attnums[i] = attnum;
744 tomas.vondra 1537 ECB :
744 tomas.vondra 1538 GIC 1317 : clauses_attnums = bms_add_member(clauses_attnums, attnum);
1539 : }
1540 :
1541 : /*
1542 : * If there's not at least two distinct attnums and expressions, then
1543 : * reject the whole list of clauses. We must return 1.0 so the calling
1544 : * function's selectivity is unaffected.
2195 simon 1545 ECB : */
956 drowley 1546 GIC 660 : if (bms_membership(clauses_attnums) != BMS_MULTIPLE)
2195 simon 1547 ECB : {
1107 dean.a.rasheed 1548 CBC 96 : bms_free(clauses_attnums);
2195 simon 1549 96 : pfree(list_attnums);
2195 simon 1550 GIC 96 : return 1.0;
1551 : }
1552 :
1553 : /*
1554 : * Load all functional dependencies matching at least two parameters. We
1555 : * can simply consider all dependencies at once, without having to search
1556 : * for the best statistics object.
1557 : *
1558 : * To not waste cycles and memory, we deserialize dependencies only for
1559 : * statistics that match at least two attributes. The array is allocated
1560 : * with the assumption that all objects match - we could grow the array to
1561 : * make it just the right size, but it's likely wasteful anyway thanks to
1562 : * moving the freed chunks to freelists etc.
1182 tomas.vondra 1563 ECB : */
1107 dean.a.rasheed 1564 CBC 564 : func_dependencies = (MVDependencies **) palloc(sizeof(MVDependencies *) *
1565 564 : list_length(rel->statlist));
1566 564 : nfunc_dependencies = 0;
1107 dean.a.rasheed 1567 GIC 564 : total_ndeps = 0;
1182 tomas.vondra 1568 ECB :
1107 dean.a.rasheed 1569 GIC 1197 : foreach(l, rel->statlist)
1182 tomas.vondra 1570 ECB : {
1107 dean.a.rasheed 1571 GIC 633 : StatisticExtInfo *stat = (StatisticExtInfo *) lfirst(l);
1572 : int nmatched;
1573 : int nexprs;
1574 : int k;
1575 : MVDependencies *deps;
1576 :
1182 tomas.vondra 1577 ECB : /* skip statistics that are not of the correct type */
1182 tomas.vondra 1578 CBC 633 : if (stat->kind != STATS_EXT_DEPENDENCIES)
1182 tomas.vondra 1579 GIC 54 : continue;
1580 :
448 tomas.vondra 1581 ECB : /* skip statistics with mismatching stxdinherit value */
448 tomas.vondra 1582 GBC 579 : if (stat->inherit != rte->inh)
448 tomas.vondra 1583 UIC 0 : continue;
1584 :
1585 : /*
1586 : * Count matching attributes - we have to undo the attnum offsets. The
1587 : * input attribute numbers are not offset (expressions are not
1588 : * included in stat->keys, so it's not necessary). But we need to
1589 : * offset it before checking against clauses_attnums.
744 tomas.vondra 1590 ECB : */
744 tomas.vondra 1591 CBC 579 : nmatched = 0;
1592 579 : k = -1;
744 tomas.vondra 1593 GIC 2124 : while ((k = bms_next_member(stat->keys, k)) >= 0)
744 tomas.vondra 1594 ECB : {
744 tomas.vondra 1595 GIC 1545 : AttrNumber attnum = (AttrNumber) k;
1596 :
744 tomas.vondra 1597 ECB : /* skip expressions */
744 tomas.vondra 1598 GBC 1545 : if (!AttrNumberIsForUserDefinedAttr(attnum))
744 tomas.vondra 1599 UIC 0 : continue;
1600 :
744 tomas.vondra 1601 ECB : /* apply the same offset as above */
744 tomas.vondra 1602 GIC 1545 : attnum += attnum_offset;
744 tomas.vondra 1603 ECB :
744 tomas.vondra 1604 CBC 1545 : if (bms_is_member(attnum, clauses_attnums))
744 tomas.vondra 1605 GIC 1152 : nmatched++;
1606 : }
1607 :
744 tomas.vondra 1608 ECB : /* count matching expressions */
744 tomas.vondra 1609 CBC 579 : nexprs = 0;
744 tomas.vondra 1610 GIC 708 : for (i = 0; i < unique_exprs_cnt; i++)
1611 : {
1612 : ListCell *lc;
744 tomas.vondra 1613 ECB :
744 tomas.vondra 1614 GIC 516 : foreach(lc, stat->exprs)
744 tomas.vondra 1615 ECB : {
744 tomas.vondra 1616 GIC 387 : Node *stat_expr = (Node *) lfirst(lc);
1617 :
744 tomas.vondra 1618 ECB : /* try to match it */
744 tomas.vondra 1619 CBC 387 : if (equal(stat_expr, unique_exprs[i]))
744 tomas.vondra 1620 GIC 129 : nexprs++;
1621 : }
1622 : }
1623 :
1624 : /*
1625 : * Skip objects matching fewer than two attributes/expressions from
1626 : * clauses.
744 tomas.vondra 1627 ECB : */
744 tomas.vondra 1628 CBC 579 : if (nmatched + nexprs < 2)
1182 tomas.vondra 1629 GIC 9 : continue;
1182 tomas.vondra 1630 ECB :
448 tomas.vondra 1631 GIC 570 : deps = statext_dependencies_load(stat->statOid, rte->inh);
1632 :
1633 : /*
1634 : * The expressions may be represented by different attnums in the
1635 : * stats, we need to remap them to be consistent with the clauses.
1636 : * That will make the later steps (e.g. picking the strongest item and
1637 : * so on) much simpler and cheaper, because it won't need to care
1638 : * about the offset at all.
1639 : *
1640 : * When we're at it, we can ignore dependencies that are not fully
1641 : * matched by clauses (i.e. referencing attributes or expressions that
1642 : * are not in the clauses).
1643 : *
1644 : * We have to do this for all statistics, as long as there are any
1645 : * expressions - we need to shift the attnums in all dependencies.
1646 : *
1647 : * XXX Maybe we should do this always, because it also eliminates some
1648 : * of the dependencies early. It might be cheaper than having to walk
1649 : * the longer list in find_strongest_dependency later, especially as
1650 : * we need to do that repeatedly?
1651 : *
1652 : * XXX We have to do this even when there are no expressions in
1653 : * clauses, otherwise find_strongest_dependency may fail for stats
1654 : * with expressions (due to lookup of negative value in bitmap). So we
1655 : * need to at least filter out those dependencies. Maybe we could do
1656 : * it in a cheaper way (if there are no expr clauses, we can just
1657 : * discard all negative attnums without any lookups).
744 tomas.vondra 1658 ECB : */
744 tomas.vondra 1659 GIC 570 : if (unique_exprs_cnt > 0 || stat->exprs != NIL)
744 tomas.vondra 1660 ECB : {
744 tomas.vondra 1661 GIC 54 : int ndeps = 0;
744 tomas.vondra 1662 ECB :
744 tomas.vondra 1663 GIC 324 : for (i = 0; i < deps->ndeps; i++)
744 tomas.vondra 1664 ECB : {
744 tomas.vondra 1665 CBC 270 : bool skip = false;
744 tomas.vondra 1666 GIC 270 : MVDependency *dep = deps->deps[i];
1667 : int j;
744 tomas.vondra 1668 ECB :
744 tomas.vondra 1669 GIC 753 : for (j = 0; j < dep->nattributes; j++)
1670 : {
1671 : int idx;
744 tomas.vondra 1672 ECB : Node *expr;
744 tomas.vondra 1673 GIC 615 : AttrNumber unique_attnum = InvalidAttrNumber;
1674 : AttrNumber attnum;
744 tomas.vondra 1675 ECB :
1676 : /* undo the per-statistics offset */
744 tomas.vondra 1677 GIC 615 : attnum = dep->attributes[j];
1678 :
1679 : /*
1680 : * For regular attributes we can simply check if it
1681 : * matches any clause. If there's no matching clause, we
1682 : * can just ignore it. We need to offset the attnum
744 tomas.vondra 1683 ECB : * though.
1684 : */
744 tomas.vondra 1685 GBC 615 : if (AttrNumberIsForUserDefinedAttr(attnum))
1686 : {
744 tomas.vondra 1687 UBC 0 : dep->attributes[j] = attnum + attnum_offset;
1688 :
1689 0 : if (!bms_is_member(dep->attributes[j], clauses_attnums))
744 tomas.vondra 1690 EUB : {
744 tomas.vondra 1691 UIC 0 : skip = true;
1692 0 : break;
744 tomas.vondra 1693 EUB : }
1694 :
744 tomas.vondra 1695 UIC 0 : continue;
1696 : }
1697 :
1698 : /*
1699 : * the attnum should be a valid system attnum (-1, -2,
744 tomas.vondra 1700 ECB : * ...)
1701 : */
744 tomas.vondra 1702 GIC 615 : Assert(AttributeNumberIsValid(attnum));
1703 :
1704 : /*
1705 : * For expressions, we need to do two translations. First
1706 : * we have to translate the negative attnum to index in
1707 : * the list of expressions (in the statistics object).
1708 : * Then we need to see if there's a matching clause. The
1709 : * index of the unique expression determines the attnum
744 tomas.vondra 1710 ECB : * (and we offset it).
1711 : */
744 tomas.vondra 1712 GIC 615 : idx = -(1 + attnum);
744 tomas.vondra 1713 ECB :
1714 : /* Is the expression index is valid? */
744 tomas.vondra 1715 CBC 615 : Assert((idx >= 0) && (idx < list_length(stat->exprs)));
1716 :
744 tomas.vondra 1717 GIC 615 : expr = (Node *) list_nth(stat->exprs, idx);
744 tomas.vondra 1718 ECB :
1719 : /* try to find the expression in the unique list */
186 drowley 1720 GNC 1230 : for (int m = 0; m < unique_exprs_cnt; m++)
1721 : {
1722 : /*
1723 : * found a matching unique expression, use the attnum
744 tomas.vondra 1724 ECB : * (derived from index of the unique expression)
1725 : */
186 drowley 1726 GNC 1098 : if (equal(unique_exprs[m], expr))
744 tomas.vondra 1727 ECB : {
186 drowley 1728 GNC 483 : unique_attnum = -(m + 1) + attnum_offset;
744 tomas.vondra 1729 GIC 483 : break;
1730 : }
1731 : }
1732 :
1733 : /*
1734 : * Found no matching expression, so we can simply skip
1735 : * this dependency, because there's no chance it will be
744 tomas.vondra 1736 ECB : * fully covered.
1737 : */
744 tomas.vondra 1738 CBC 615 : if (unique_attnum == InvalidAttrNumber)
744 tomas.vondra 1739 ECB : {
744 tomas.vondra 1740 GIC 132 : skip = true;
1741 132 : break;
1742 : }
744 tomas.vondra 1743 ECB :
1744 : /* otherwise remap it to the new attnum */
744 tomas.vondra 1745 GIC 483 : dep->attributes[j] = unique_attnum;
1746 : }
1107 dean.a.rasheed 1747 ECB :
1748 : /* if found a matching dependency, keep it */
744 tomas.vondra 1749 GIC 270 : if (!skip)
744 tomas.vondra 1750 ECB : {
744 tomas.vondra 1751 EUB : /* maybe we've skipped something earlier, so move it */
744 tomas.vondra 1752 GIC 138 : if (ndeps != i)
744 tomas.vondra 1753 LBC 0 : deps->deps[ndeps] = deps->deps[i];
1754 :
744 tomas.vondra 1755 GIC 138 : ndeps++;
1756 : }
744 tomas.vondra 1757 ECB : }
1758 :
744 tomas.vondra 1759 GIC 54 : deps->ndeps = ndeps;
1760 : }
1761 :
1762 : /*
1763 : * It's possible we've removed all dependencies, in which case we
744 tomas.vondra 1764 ECB : * don't bother adding it to the list.
1765 : */
744 tomas.vondra 1766 CBC 570 : if (deps->ndeps > 0)
744 tomas.vondra 1767 ECB : {
744 tomas.vondra 1768 CBC 570 : func_dependencies[nfunc_dependencies] = deps;
744 tomas.vondra 1769 GIC 570 : total_ndeps += deps->ndeps;
1770 570 : nfunc_dependencies++;
1771 : }
1772 : }
2195 simon 1773 ECB :
1774 : /* if no matching stats could be found then we've nothing to do */
1107 dean.a.rasheed 1775 GBC 564 : if (nfunc_dependencies == 0)
2195 simon 1776 EUB : {
1107 dean.a.rasheed 1777 UBC 0 : pfree(func_dependencies);
1778 0 : bms_free(clauses_attnums);
2195 simon 1779 0 : pfree(list_attnums);
744 tomas.vondra 1780 UIC 0 : pfree(unique_exprs);
2195 simon 1781 0 : return 1.0;
1782 : }
1783 :
1784 : /*
1785 : * Work out which dependencies we can apply, starting with the
888 michael 1786 ECB : * widest/strongest ones, and proceeding to smaller/weaker ones.
1787 : */
1107 dean.a.rasheed 1788 CBC 564 : dependencies = (MVDependency **) palloc(sizeof(MVDependency *) *
1789 : total_ndeps);
1107 dean.a.rasheed 1790 GIC 564 : ndependencies = 0;
1107 dean.a.rasheed 1791 ECB :
1792 : while (true)
2195 simon 1793 GIC 711 : {
1794 : MVDependency *dependency;
1795 : AttrNumber attnum;
2195 simon 1796 ECB :
1797 : /* the widest/strongest dependency, fully matched by clauses */
1107 dean.a.rasheed 1798 GIC 1275 : dependency = find_strongest_dependency(func_dependencies,
1107 dean.a.rasheed 1799 ECB : nfunc_dependencies,
2195 simon 1800 : clauses_attnums);
2195 simon 1801 GIC 1275 : if (!dependency)
2195 simon 1802 CBC 564 : break;
1803 :
1107 dean.a.rasheed 1804 GIC 711 : dependencies[ndependencies++] = dependency;
2195 simon 1805 ECB :
1107 dean.a.rasheed 1806 : /* Ignore dependencies using this implied attribute in later loops */
1107 dean.a.rasheed 1807 GIC 711 : attnum = dependency->attributes[dependency->nattributes - 1];
1808 711 : clauses_attnums = bms_del_member(clauses_attnums, attnum);
1809 : }
1810 :
1811 : /*
1812 : * If we found applicable dependencies, use them to estimate all
1107 dean.a.rasheed 1813 ECB : * compatible clauses on attributes that they refer to.
1814 : */
1107 dean.a.rasheed 1815 GIC 564 : if (ndependencies != 0)
1816 564 : s1 = clauselist_apply_dependencies(root, clauses, varRelid, jointype,
1817 : sjinfo, dependencies, ndependencies,
1818 : list_attnums, estimatedclauses);
1107 dean.a.rasheed 1819 ECB :
1182 tomas.vondra 1820 : /* free deserialized functional dependencies (and then the array) */
1107 dean.a.rasheed 1821 GIC 1134 : for (i = 0; i < nfunc_dependencies; i++)
1107 dean.a.rasheed 1822 CBC 570 : pfree(func_dependencies[i]);
1182 tomas.vondra 1823 ECB :
2195 simon 1824 CBC 564 : pfree(dependencies);
1107 dean.a.rasheed 1825 564 : pfree(func_dependencies);
1826 564 : bms_free(clauses_attnums);
2195 simon 1827 GIC 564 : pfree(list_attnums);
744 tomas.vondra 1828 CBC 564 : pfree(unique_exprs);
1829 :
2195 simon 1830 GIC 564 : return s1;
1831 : }
|