LCOV - differential code coverage report
Current view: top level - src/backend/statistics - dependencies.c (source / functions) Coverage Total Hit UNC LBC UIC UBC GBC GIC GNC CBC EUB ECB DUB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 88.7 % 515 457 2 1 13 42 6 97 3 351 8 104 2 1
Current Date: 2023-04-08 15:15:32 Functions: 85.0 % 20 17 3 1 2 14 1
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * dependencies.c
       4                 :  *    POSTGRES functional dependencies
       5                 :  *
       6                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
       7                 :  * Portions Copyright (c) 1994, Regents of the University of California
       8                 :  *
       9                 :  * IDENTIFICATION
      10                 :  *    src/backend/statistics/dependencies.c
      11                 :  *
      12                 :  *-------------------------------------------------------------------------
      13                 :  */
      14                 : #include "postgres.h"
      15                 : 
      16                 : #include "access/htup_details.h"
      17                 : #include "access/sysattr.h"
      18                 : #include "catalog/pg_operator.h"
      19                 : #include "catalog/pg_statistic_ext.h"
      20                 : #include "catalog/pg_statistic_ext_data.h"
      21                 : #include "lib/stringinfo.h"
      22                 : #include "nodes/nodeFuncs.h"
      23                 : #include "nodes/nodes.h"
      24                 : #include "nodes/pathnodes.h"
      25                 : #include "optimizer/clauses.h"
      26                 : #include "optimizer/optimizer.h"
      27                 : #include "parser/parsetree.h"
      28                 : #include "statistics/extended_stats_internal.h"
      29                 : #include "statistics/statistics.h"
      30                 : #include "utils/bytea.h"
      31                 : #include "utils/fmgroids.h"
      32                 : #include "utils/fmgrprotos.h"
      33                 : #include "utils/lsyscache.h"
      34                 : #include "utils/memutils.h"
      35                 : #include "utils/selfuncs.h"
      36                 : #include "utils/syscache.h"
      37                 : #include "utils/typcache.h"
      38                 : 
      39                 : /* size of the struct header fields (magic, type, ndeps) */
      40                 : #define SizeOfHeader        (3 * sizeof(uint32))
      41                 : 
      42                 : /* size of a serialized dependency (degree, natts, atts) */
      43                 : #define SizeOfItem(natts) \
      44                 :     (sizeof(double) + sizeof(AttrNumber) * (1 + (natts)))
      45                 : 
      46                 : /* minimal size of a dependency (with two attributes) */
      47                 : #define MinSizeOfItem   SizeOfItem(2)
      48                 : 
      49                 : /* minimal size of dependencies, when all deps are minimal */
      50                 : #define MinSizeOfItems(ndeps) \
      51                 :     (SizeOfHeader + (ndeps) * MinSizeOfItem)
      52                 : 
      53                 : /*
      54                 :  * Internal state for DependencyGenerator of dependencies. Dependencies are similar to
      55                 :  * k-permutations of n elements, except that the order does not matter for the
      56                 :  * first (k-1) elements. That is, (a,b=>c) and (b,a=>c) are equivalent.
      57                 :  */
      58                 : typedef struct DependencyGeneratorData
      59                 : {
      60                 :     int         k;              /* size of the dependency */
      61                 :     int         n;              /* number of possible attributes */
      62                 :     int         current;        /* next dependency to return (index) */
      63                 :     AttrNumber  ndependencies;  /* number of dependencies generated */
      64                 :     AttrNumber *dependencies;   /* array of pre-generated dependencies  */
      65                 : } DependencyGeneratorData;
      66                 : 
      67                 : typedef DependencyGeneratorData *DependencyGenerator;
      68                 : 
      69                 : static void generate_dependencies_recurse(DependencyGenerator state,
      70                 :                                           int index, AttrNumber start, AttrNumber *current);
      71                 : static void generate_dependencies(DependencyGenerator state);
      72                 : static DependencyGenerator DependencyGenerator_init(int n, int k);
      73                 : static void DependencyGenerator_free(DependencyGenerator state);
      74                 : static AttrNumber *DependencyGenerator_next(DependencyGenerator state);
      75                 : static double dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency);
      76                 : static bool dependency_is_fully_matched(MVDependency *dependency,
      77                 :                                         Bitmapset *attnums);
      78                 : static bool dependency_is_compatible_clause(Node *clause, Index relid,
      79                 :                                             AttrNumber *attnum);
      80                 : static bool dependency_is_compatible_expression(Node *clause, Index relid,
      81                 :                                                 List *statlist, Node **expr);
      82                 : static MVDependency *find_strongest_dependency(MVDependencies **dependencies,
      83                 :                                                int ndependencies, Bitmapset *attnums);
      84                 : static Selectivity clauselist_apply_dependencies(PlannerInfo *root, List *clauses,
      85                 :                                                  int varRelid, JoinType jointype,
      86                 :                                                  SpecialJoinInfo *sjinfo,
      87                 :                                                  MVDependency **dependencies,
      88                 :                                                  int ndependencies,
      89                 :                                                  AttrNumber *list_attnums,
      90                 :                                                  Bitmapset **estimatedclauses);
      91                 : 
      92                 : static void
      93 CBC         372 : generate_dependencies_recurse(DependencyGenerator state, int index,
      94                 :                               AttrNumber start, AttrNumber *current)
      95                 : {
      96                 :     /*
      97                 :      * The generator handles the first (k-1) elements differently from the
      98                 :      * last element.
      99                 :      */
     100             372 :     if (index < (state->k - 1))
     101                 :     {
     102                 :         AttrNumber  i;
     103                 : 
     104                 :         /*
     105                 :          * The first (k-1) values have to be in ascending order, which we
     106                 :          * generate recursively.
     107                 :          */
     108                 : 
     109             444 :         for (i = start; i < state->n; i++)
     110                 :         {
     111             288 :             current[index] = i;
     112             288 :             generate_dependencies_recurse(state, (index + 1), (i + 1), current);
     113                 :         }
     114                 :     }
     115                 :     else
     116                 :     {
     117                 :         int         i;
     118                 : 
     119                 :         /*
     120                 :          * the last element is the implied value, which does not respect the
     121                 :          * ascending order. We just need to check that the value is not in the
     122                 :          * first (k-1) elements.
     123                 :          */
     124                 : 
     125             792 :         for (i = 0; i < state->n; i++)
     126                 :         {
     127                 :             int         j;
     128             576 :             bool        match = false;
     129                 : 
     130             576 :             current[index] = i;
     131                 : 
     132            1008 :             for (j = 0; j < index; j++)
     133                 :             {
     134             720 :                 if (current[j] == i)
     135                 :                 {
     136             288 :                     match = true;
     137             288 :                     break;
     138                 :                 }
     139                 :             }
     140                 : 
     141                 :             /*
     142                 :              * If the value is not found in the first part of the dependency,
     143                 :              * we're done.
     144                 :              */
     145             576 :             if (!match)
     146                 :             {
     147             576 :                 state->dependencies = (AttrNumber *) repalloc(state->dependencies,
     148             288 :                                                               state->k * (state->ndependencies + 1) * sizeof(AttrNumber));
     149             288 :                 memcpy(&state->dependencies[(state->k * state->ndependencies)],
     150             288 :                        current, state->k * sizeof(AttrNumber));
     151             288 :                 state->ndependencies++;
     152                 :             }
     153                 :         }
     154                 :     }
     155             372 : }
     156                 : 
     157                 : /* generate all dependencies (k-permutations of n elements) */
     158                 : static void
     159              84 : generate_dependencies(DependencyGenerator state)
     160                 : {
     161              84 :     AttrNumber *current = (AttrNumber *) palloc0(sizeof(AttrNumber) * state->k);
     162                 : 
     163              84 :     generate_dependencies_recurse(state, 0, 0, current);
     164                 : 
     165              84 :     pfree(current);
     166              84 : }
     167                 : 
     168                 : /*
     169                 :  * initialize the DependencyGenerator of variations, and prebuild the variations
     170                 :  *
     171                 :  * This pre-builds all the variations. We could also generate them in
     172                 :  * DependencyGenerator_next(), but this seems simpler.
     173                 :  */
     174                 : static DependencyGenerator
     175              84 : DependencyGenerator_init(int n, int k)
     176                 : {
     177                 :     DependencyGenerator state;
     178                 : 
     179              84 :     Assert((n >= k) && (k > 0));
     180                 : 
     181                 :     /* allocate the DependencyGenerator state */
     182              84 :     state = (DependencyGenerator) palloc0(sizeof(DependencyGeneratorData));
     183              84 :     state->dependencies = (AttrNumber *) palloc(k * sizeof(AttrNumber));
     184                 : 
     185              84 :     state->ndependencies = 0;
     186              84 :     state->current = 0;
     187              84 :     state->k = k;
     188              84 :     state->n = n;
     189                 : 
     190                 :     /* now actually pre-generate all the variations */
     191              84 :     generate_dependencies(state);
     192                 : 
     193              84 :     return state;
     194                 : }
     195                 : 
     196                 : /* free the DependencyGenerator state */
     197                 : static void
     198              84 : DependencyGenerator_free(DependencyGenerator state)
     199                 : {
     200              84 :     pfree(state->dependencies);
     201              84 :     pfree(state);
     202              84 : }
     203                 : 
     204                 : /* generate next combination */
     205                 : static AttrNumber *
     206             372 : DependencyGenerator_next(DependencyGenerator state)
     207                 : {
     208             372 :     if (state->current == state->ndependencies)
     209              84 :         return NULL;
     210                 : 
     211             288 :     return &state->dependencies[state->k * state->current++];
     212                 : }
     213                 : 
     214                 : 
     215                 : /*
     216                 :  * validates functional dependency on the data
     217                 :  *
     218                 :  * An actual work horse of detecting functional dependencies. Given a variation
     219                 :  * of k attributes, it checks that the first (k-1) are sufficient to determine
     220                 :  * the last one.
     221                 :  */
     222                 : static double
     223             288 : dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency)
     224                 : {
     225                 :     int         i,
     226                 :                 nitems;
     227                 :     MultiSortSupport mss;
     228                 :     SortItem   *items;
     229                 :     AttrNumber *attnums_dep;
     230                 : 
     231                 :     /* counters valid within a group */
     232             288 :     int         group_size = 0;
     233             288 :     int         n_violations = 0;
     234                 : 
     235                 :     /* total number of rows supporting (consistent with) the dependency */
     236             288 :     int         n_supporting_rows = 0;
     237                 : 
     238                 :     /* Make sure we have at least two input attributes. */
     239             288 :     Assert(k >= 2);
     240                 : 
     241                 :     /* sort info for all attributes columns */
     242             288 :     mss = multi_sort_init(k);
     243                 : 
     244                 :     /*
     245                 :      * Translate the array of indexes to regular attnums for the dependency
     246                 :      * (we will need this to identify the columns in StatsBuildData).
     247                 :      */
     248             288 :     attnums_dep = (AttrNumber *) palloc(k * sizeof(AttrNumber));
     249             936 :     for (i = 0; i < k; i++)
     250             648 :         attnums_dep[i] = data->attnums[dependency[i]];
     251                 : 
     252                 :     /*
     253                 :      * Verify the dependency (a,b,...)->z, using a rather simple algorithm:
     254                 :      *
     255                 :      * (a) sort the data lexicographically
     256                 :      *
     257                 :      * (b) split the data into groups by first (k-1) columns
     258                 :      *
     259                 :      * (c) for each group count different values in the last column
     260                 :      *
     261                 :      * We use the column data types' default sort operators and collations;
     262                 :      * perhaps at some point it'd be worth using column-specific collations?
     263                 :      */
     264                 : 
     265                 :     /* prepare the sort function for the dimensions */
     266             936 :     for (i = 0; i < k; i++)
     267                 :     {
     268             648 :         VacAttrStats *colstat = data->stats[dependency[i]];
     269                 :         TypeCacheEntry *type;
     270                 : 
     271             648 :         type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
     272             648 :         if (type->lt_opr == InvalidOid) /* shouldn't happen */
     273 UBC           0 :             elog(ERROR, "cache lookup failed for ordering operator for type %u",
     274                 :                  colstat->attrtypid);
     275                 : 
     276                 :         /* prepare the sort function for this dimension */
     277 CBC         648 :         multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
     278                 :     }
     279                 : 
     280                 :     /*
     281                 :      * build an array of SortItem(s) sorted using the multi-sort support
     282                 :      *
     283                 :      * XXX This relies on all stats entries pointing to the same tuple
     284                 :      * descriptor.  For now that assumption holds, but it might change in the
     285                 :      * future for example if we support statistics on multiple tables.
     286                 :      */
     287             288 :     items = build_sorted_items(data, &nitems, mss, k, attnums_dep);
     288                 : 
     289                 :     /*
     290                 :      * Walk through the sorted array, split it into rows according to the
     291                 :      * first (k-1) columns. If there's a single value in the last column, we
     292                 :      * count the group as 'supporting' the functional dependency. Otherwise we
     293                 :      * count it as contradicting.
     294                 :      */
     295                 : 
     296                 :     /* start with the first row forming a group */
     297             288 :     group_size = 1;
     298                 : 
     299                 :     /* loop 1 beyond the end of the array so that we count the final group */
     300          752685 :     for (i = 1; i <= nitems; i++)
     301                 :     {
     302                 :         /*
     303                 :          * Check if the group ended, which may be either because we processed
     304                 :          * all the items (i==nitems), or because the i-th item is not equal to
     305                 :          * the preceding one.
     306                 :          */
     307         1504506 :         if (i == nitems ||
     308          752109 :             multi_sort_compare_dims(0, k - 2, &items[i - 1], &items[i], mss) != 0)
     309                 :         {
     310                 :             /*
     311                 :              * If no violations were found in the group then track the rows of
     312                 :              * the group as supporting the functional dependency.
     313                 :              */
     314           16107 :             if (n_violations == 0)
     315            9771 :                 n_supporting_rows += group_size;
     316                 : 
     317                 :             /* Reset counters for the new group */
     318           16107 :             n_violations = 0;
     319           16107 :             group_size = 1;
     320           16107 :             continue;
     321                 :         }
     322                 :         /* first columns match, but the last one does not (so contradicting) */
     323          736290 :         else if (multi_sort_compare_dim(k - 1, &items[i - 1], &items[i], mss) != 0)
     324           30042 :             n_violations++;
     325                 : 
     326          736290 :         group_size++;
     327                 :     }
     328                 : 
     329                 :     /* Compute the 'degree of validity' as (supporting/total). */
     330             288 :     return (n_supporting_rows * 1.0 / data->numrows);
     331                 : }
     332                 : 
     333                 : /*
     334                 :  * detects functional dependencies between groups of columns
     335                 :  *
     336                 :  * Generates all possible subsets of columns (variations) and computes
     337                 :  * the degree of validity for each one. For example when creating statistics
     338                 :  * on three columns (a,b,c) there are 9 possible dependencies
     339                 :  *
     340                 :  *     two columns            three columns
     341                 :  *     -----------            -------------
     342                 :  *     (a) -> b                (a,b) -> c
     343                 :  *     (a) -> c                (a,c) -> b
     344                 :  *     (b) -> a                (b,c) -> a
     345                 :  *     (b) -> c
     346                 :  *     (c) -> a
     347                 :  *     (c) -> b
     348                 :  */
     349                 : MVDependencies *
     350              60 : statext_dependencies_build(StatsBuildData *data)
     351                 : {
     352                 :     int         i,
     353                 :                 k;
     354                 : 
     355                 :     /* result */
     356              60 :     MVDependencies *dependencies = NULL;
     357                 :     MemoryContext cxt;
     358                 : 
     359              60 :     Assert(data->nattnums >= 2);
     360                 : 
     361                 :     /* tracks memory allocated by dependency_degree calls */
     362              60 :     cxt = AllocSetContextCreate(CurrentMemoryContext,
     363                 :                                 "dependency_degree cxt",
     364                 :                                 ALLOCSET_DEFAULT_SIZES);
     365                 : 
     366                 :     /*
     367                 :      * We'll try build functional dependencies starting from the smallest ones
     368                 :      * covering just 2 columns, to the largest ones, covering all columns
     369                 :      * included in the statistics object.  We start from the smallest ones
     370                 :      * because we want to be able to skip already implied ones.
     371                 :      */
     372             144 :     for (k = 2; k <= data->nattnums; k++)
     373                 :     {
     374                 :         AttrNumber *dependency; /* array with k elements */
     375                 : 
     376                 :         /* prepare a DependencyGenerator of variation */
     377              84 :         DependencyGenerator DependencyGenerator = DependencyGenerator_init(data->nattnums, k);
     378                 : 
     379                 :         /* generate all possible variations of k values (out of n) */
     380             372 :         while ((dependency = DependencyGenerator_next(DependencyGenerator)))
     381                 :         {
     382                 :             double      degree;
     383                 :             MVDependency *d;
     384                 :             MemoryContext oldcxt;
     385                 : 
     386                 :             /* release memory used by dependency degree calculation */
     387             288 :             oldcxt = MemoryContextSwitchTo(cxt);
     388                 : 
     389                 :             /* compute how valid the dependency seems */
     390             288 :             degree = dependency_degree(data, k, dependency);
     391                 : 
     392             288 :             MemoryContextSwitchTo(oldcxt);
     393             288 :             MemoryContextReset(cxt);
     394                 : 
     395                 :             /*
     396                 :              * if the dependency seems entirely invalid, don't store it
     397                 :              */
     398             288 :             if (degree == 0.0)
     399             126 :                 continue;
     400                 : 
     401             162 :             d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
     402             162 :                                          + k * sizeof(AttrNumber));
     403                 : 
     404                 :             /* copy the dependency (and keep the indexes into stxkeys) */
     405             162 :             d->degree = degree;
     406             162 :             d->nattributes = k;
     407             522 :             for (i = 0; i < k; i++)
     408             360 :                 d->attributes[i] = data->attnums[dependency[i]];
     409                 : 
     410                 :             /* initialize the list of dependencies */
     411             162 :             if (dependencies == NULL)
     412                 :             {
     413                 :                 dependencies
     414              51 :                     = (MVDependencies *) palloc0(sizeof(MVDependencies));
     415                 : 
     416              51 :                 dependencies->magic = STATS_DEPS_MAGIC;
     417              51 :                 dependencies->type = STATS_DEPS_TYPE_BASIC;
     418              51 :                 dependencies->ndeps = 0;
     419                 :             }
     420                 : 
     421             162 :             dependencies->ndeps++;
     422             162 :             dependencies = (MVDependencies *) repalloc(dependencies,
     423                 :                                                        offsetof(MVDependencies, deps)
     424             162 :                                                        + dependencies->ndeps * sizeof(MVDependency *));
     425                 : 
     426             162 :             dependencies->deps[dependencies->ndeps - 1] = d;
     427                 :         }
     428                 : 
     429                 :         /*
     430                 :          * we're done with variations of k elements, so free the
     431                 :          * DependencyGenerator
     432                 :          */
     433              84 :         DependencyGenerator_free(DependencyGenerator);
     434                 :     }
     435                 : 
     436              60 :     MemoryContextDelete(cxt);
     437                 : 
     438              60 :     return dependencies;
     439                 : }
     440                 : 
     441                 : 
     442                 : /*
     443                 :  * Serialize list of dependencies into a bytea value.
     444                 :  */
     445                 : bytea *
     446              51 : statext_dependencies_serialize(MVDependencies *dependencies)
     447                 : {
     448                 :     int         i;
     449                 :     bytea      *output;
     450                 :     char       *tmp;
     451                 :     Size        len;
     452                 : 
     453                 :     /* we need to store ndeps, with a number of attributes for each one */
     454              51 :     len = VARHDRSZ + SizeOfHeader;
     455                 : 
     456                 :     /* and also include space for the actual attribute numbers and degrees */
     457             213 :     for (i = 0; i < dependencies->ndeps; i++)
     458             162 :         len += SizeOfItem(dependencies->deps[i]->nattributes);
     459                 : 
     460              51 :     output = (bytea *) palloc0(len);
     461              51 :     SET_VARSIZE(output, len);
     462                 : 
     463              51 :     tmp = VARDATA(output);
     464                 : 
     465                 :     /* Store the base struct values (magic, type, ndeps) */
     466              51 :     memcpy(tmp, &dependencies->magic, sizeof(uint32));
     467              51 :     tmp += sizeof(uint32);
     468              51 :     memcpy(tmp, &dependencies->type, sizeof(uint32));
     469              51 :     tmp += sizeof(uint32);
     470              51 :     memcpy(tmp, &dependencies->ndeps, sizeof(uint32));
     471              51 :     tmp += sizeof(uint32);
     472                 : 
     473                 :     /* store number of attributes and attribute numbers for each dependency */
     474             213 :     for (i = 0; i < dependencies->ndeps; i++)
     475                 :     {
     476             162 :         MVDependency *d = dependencies->deps[i];
     477                 : 
     478             162 :         memcpy(tmp, &d->degree, sizeof(double));
     479             162 :         tmp += sizeof(double);
     480                 : 
     481             162 :         memcpy(tmp, &d->nattributes, sizeof(AttrNumber));
     482             162 :         tmp += sizeof(AttrNumber);
     483                 : 
     484             162 :         memcpy(tmp, d->attributes, sizeof(AttrNumber) * d->nattributes);
     485             162 :         tmp += sizeof(AttrNumber) * d->nattributes;
     486                 : 
     487                 :         /* protect against overflow */
     488             162 :         Assert(tmp <= ((char *) output + len));
     489                 :     }
     490                 : 
     491                 :     /* make sure we've produced exactly the right amount of data */
     492              51 :     Assert(tmp == ((char *) output + len));
     493                 : 
     494              51 :     return output;
     495                 : }
     496                 : 
     497                 : /*
     498                 :  * Reads serialized dependencies into MVDependencies structure.
     499                 :  */
     500                 : MVDependencies *
     501             576 : statext_dependencies_deserialize(bytea *data)
     502                 : {
     503                 :     int         i;
     504                 :     Size        min_expected_size;
     505                 :     MVDependencies *dependencies;
     506                 :     char       *tmp;
     507                 : 
     508             576 :     if (data == NULL)
     509 UBC           0 :         return NULL;
     510                 : 
     511 CBC         576 :     if (VARSIZE_ANY_EXHDR(data) < SizeOfHeader)
     512 UNC           0 :         elog(ERROR, "invalid MVDependencies size %zu (expected at least %zu)",
     513                 :              VARSIZE_ANY_EXHDR(data), SizeOfHeader);
     514                 : 
     515                 :     /* read the MVDependencies header */
     516 CBC         576 :     dependencies = (MVDependencies *) palloc0(sizeof(MVDependencies));
     517                 : 
     518                 :     /* initialize pointer to the data part (skip the varlena header) */
     519             576 :     tmp = VARDATA_ANY(data);
     520                 : 
     521                 :     /* read the header fields and perform basic sanity checks */
     522             576 :     memcpy(&dependencies->magic, tmp, sizeof(uint32));
     523             576 :     tmp += sizeof(uint32);
     524             576 :     memcpy(&dependencies->type, tmp, sizeof(uint32));
     525             576 :     tmp += sizeof(uint32);
     526             576 :     memcpy(&dependencies->ndeps, tmp, sizeof(uint32));
     527             576 :     tmp += sizeof(uint32);
     528                 : 
     529             576 :     if (dependencies->magic != STATS_DEPS_MAGIC)
     530 UBC           0 :         elog(ERROR, "invalid dependency magic %d (expected %d)",
     531                 :              dependencies->magic, STATS_DEPS_MAGIC);
     532                 : 
     533 CBC         576 :     if (dependencies->type != STATS_DEPS_TYPE_BASIC)
     534 UBC           0 :         elog(ERROR, "invalid dependency type %d (expected %d)",
     535                 :              dependencies->type, STATS_DEPS_TYPE_BASIC);
     536                 : 
     537 CBC         576 :     if (dependencies->ndeps == 0)
     538 UBC           0 :         elog(ERROR, "invalid zero-length item array in MVDependencies");
     539                 : 
     540                 :     /* what minimum bytea size do we expect for those parameters */
     541 CBC         576 :     min_expected_size = SizeOfItem(dependencies->ndeps);
     542                 : 
     543             576 :     if (VARSIZE_ANY_EXHDR(data) < min_expected_size)
     544 UNC           0 :         elog(ERROR, "invalid dependencies size %zu (expected at least %zu)",
     545                 :              VARSIZE_ANY_EXHDR(data), min_expected_size);
     546                 : 
     547                 :     /* allocate space for the MCV items */
     548 CBC         576 :     dependencies = repalloc(dependencies, offsetof(MVDependencies, deps)
     549             576 :                             + (dependencies->ndeps * sizeof(MVDependency *)));
     550                 : 
     551            3393 :     for (i = 0; i < dependencies->ndeps; i++)
     552                 :     {
     553                 :         double      degree;
     554                 :         AttrNumber  k;
     555                 :         MVDependency *d;
     556                 : 
     557                 :         /* degree of validity */
     558            2817 :         memcpy(&degree, tmp, sizeof(double));
     559            2817 :         tmp += sizeof(double);
     560                 : 
     561                 :         /* number of attributes */
     562            2817 :         memcpy(&k, tmp, sizeof(AttrNumber));
     563            2817 :         tmp += sizeof(AttrNumber);
     564                 : 
     565                 :         /* is the number of attributes valid? */
     566            2817 :         Assert((k >= 2) && (k <= STATS_MAX_DIMENSIONS));
     567                 : 
     568                 :         /* now that we know the number of attributes, allocate the dependency */
     569            2817 :         d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
     570            2817 :                                      + (k * sizeof(AttrNumber)));
     571                 : 
     572            2817 :         d->degree = degree;
     573            2817 :         d->nattributes = k;
     574                 : 
     575                 :         /* copy attribute numbers */
     576            2817 :         memcpy(d->attributes, tmp, sizeof(AttrNumber) * d->nattributes);
     577            2817 :         tmp += sizeof(AttrNumber) * d->nattributes;
     578                 : 
     579            2817 :         dependencies->deps[i] = d;
     580                 : 
     581                 :         /* still within the bytea */
     582            2817 :         Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
     583                 :     }
     584                 : 
     585                 :     /* we should have consumed the whole bytea exactly */
     586             576 :     Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
     587                 : 
     588             576 :     return dependencies;
     589                 : }
     590                 : 
     591                 : /*
     592                 :  * dependency_is_fully_matched
     593                 :  *      checks that a functional dependency is fully matched given clauses on
     594                 :  *      attributes (assuming the clauses are suitable equality clauses)
     595                 :  */
     596                 : static bool
     597            2376 : dependency_is_fully_matched(MVDependency *dependency, Bitmapset *attnums)
     598                 : {
     599                 :     int         j;
     600                 : 
     601                 :     /*
     602                 :      * Check that the dependency actually is fully covered by clauses. We have
     603                 :      * to translate all attribute numbers, as those are referenced
     604                 :      */
     605            6165 :     for (j = 0; j < dependency->nattributes; j++)
     606                 :     {
     607            4857 :         int         attnum = dependency->attributes[j];
     608                 : 
     609            4857 :         if (!bms_is_member(attnum, attnums))
     610            1068 :             return false;
     611                 :     }
     612                 : 
     613            1308 :     return true;
     614                 : }
     615                 : 
     616                 : /*
     617                 :  * statext_dependencies_load
     618                 :  *      Load the functional dependencies for the indicated pg_statistic_ext tuple
     619                 :  */
     620                 : MVDependencies *
     621             570 : statext_dependencies_load(Oid mvoid, bool inh)
     622                 : {
     623                 :     MVDependencies *result;
     624                 :     bool        isnull;
     625                 :     Datum       deps;
     626                 :     HeapTuple   htup;
     627                 : 
     628             570 :     htup = SearchSysCache2(STATEXTDATASTXOID,
     629                 :                            ObjectIdGetDatum(mvoid),
     630                 :                            BoolGetDatum(inh));
     631             570 :     if (!HeapTupleIsValid(htup))
     632 UBC           0 :         elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
     633                 : 
     634 CBC         570 :     deps = SysCacheGetAttr(STATEXTDATASTXOID, htup,
     635                 :                            Anum_pg_statistic_ext_data_stxddependencies, &isnull);
     636             570 :     if (isnull)
     637 UBC           0 :         elog(ERROR,
     638                 :              "requested statistics kind \"%c\" is not yet built for statistics object %u",
     639                 :              STATS_EXT_DEPENDENCIES, mvoid);
     640                 : 
     641 CBC         570 :     result = statext_dependencies_deserialize(DatumGetByteaPP(deps));
     642                 : 
     643             570 :     ReleaseSysCache(htup);
     644                 : 
     645             570 :     return result;
     646                 : }
     647                 : 
     648                 : /*
     649                 :  * pg_dependencies_in       - input routine for type pg_dependencies.
     650                 :  *
     651                 :  * pg_dependencies is real enough to be a table column, but it has no operations
     652                 :  * of its own, and disallows input too
     653                 :  */
     654                 : Datum
     655 UBC           0 : pg_dependencies_in(PG_FUNCTION_ARGS)
     656                 : {
     657                 :     /*
     658                 :      * pg_node_list stores the data in binary form and parsing text input is
     659                 :      * not needed, so disallow this.
     660                 :      */
     661               0 :     ereport(ERROR,
     662                 :             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     663                 :              errmsg("cannot accept a value of type %s", "pg_dependencies")));
     664                 : 
     665                 :     PG_RETURN_VOID();           /* keep compiler quiet */
     666                 : }
     667                 : 
     668                 : /*
     669                 :  * pg_dependencies      - output routine for type pg_dependencies.
     670                 :  */
     671                 : Datum
     672 CBC           6 : pg_dependencies_out(PG_FUNCTION_ARGS)
     673                 : {
     674               6 :     bytea      *data = PG_GETARG_BYTEA_PP(0);
     675               6 :     MVDependencies *dependencies = statext_dependencies_deserialize(data);
     676                 :     int         i,
     677                 :                 j;
     678                 :     StringInfoData str;
     679                 : 
     680               6 :     initStringInfo(&str);
     681               6 :     appendStringInfoChar(&str, '{');
     682                 : 
     683              36 :     for (i = 0; i < dependencies->ndeps; i++)
     684                 :     {
     685              30 :         MVDependency *dependency = dependencies->deps[i];
     686                 : 
     687              30 :         if (i > 0)
     688              24 :             appendStringInfoString(&str, ", ");
     689                 : 
     690              30 :         appendStringInfoChar(&str, '"');
     691             102 :         for (j = 0; j < dependency->nattributes; j++)
     692                 :         {
     693              72 :             if (j == dependency->nattributes - 1)
     694              30 :                 appendStringInfoString(&str, " => ");
     695              42 :             else if (j > 0)
     696              12 :                 appendStringInfoString(&str, ", ");
     697                 : 
     698              72 :             appendStringInfo(&str, "%d", dependency->attributes[j]);
     699                 :         }
     700              30 :         appendStringInfo(&str, "\": %f", dependency->degree);
     701                 :     }
     702                 : 
     703               6 :     appendStringInfoChar(&str, '}');
     704                 : 
     705               6 :     PG_RETURN_CSTRING(str.data);
     706                 : }
     707                 : 
     708                 : /*
     709                 :  * pg_dependencies_recv     - binary input routine for type pg_dependencies.
     710                 :  */
     711                 : Datum
     712 UBC           0 : pg_dependencies_recv(PG_FUNCTION_ARGS)
     713                 : {
     714               0 :     ereport(ERROR,
     715                 :             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     716                 :              errmsg("cannot accept a value of type %s", "pg_dependencies")));
     717                 : 
     718                 :     PG_RETURN_VOID();           /* keep compiler quiet */
     719                 : }
     720                 : 
     721                 : /*
     722                 :  * pg_dependencies_send     - binary output routine for type pg_dependencies.
     723                 :  *
     724                 :  * Functional dependencies are serialized in a bytea value (although the type
     725                 :  * is named differently), so let's just send that.
     726                 :  */
     727                 : Datum
     728               0 : pg_dependencies_send(PG_FUNCTION_ARGS)
     729                 : {
     730               0 :     return byteasend(fcinfo);
     731                 : }
     732                 : 
     733                 : /*
     734                 :  * dependency_is_compatible_clause
     735                 :  *      Determines if the clause is compatible with functional dependencies
     736                 :  *
     737                 :  * Only clauses that have the form of equality to a pseudoconstant, or can be
     738                 :  * interpreted that way, are currently accepted.  Furthermore the variable
     739                 :  * part of the clause must be a simple Var belonging to the specified
     740                 :  * relation, whose attribute number we return in *attnum on success.
     741                 :  */
     742                 : static bool
     743 CBC        1512 : dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
     744                 : {
     745                 :     Var        *var;
     746                 :     Node       *clause_expr;
     747                 : 
     748            1512 :     if (IsA(clause, RestrictInfo))
     749                 :     {
     750            1452 :         RestrictInfo *rinfo = (RestrictInfo *) clause;
     751                 : 
     752                 :         /* Pseudoconstants are not interesting (they couldn't contain a Var) */
     753            1452 :         if (rinfo->pseudoconstant)
     754               3 :             return false;
     755                 : 
     756                 :         /* Clauses referencing multiple, or no, varnos are incompatible */
     757            1449 :         if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
     758 UBC           0 :             return false;
     759                 : 
     760 CBC        1449 :         clause = (Node *) rinfo->clause;
     761                 :     }
     762                 : 
     763            1509 :     if (is_opclause(clause))
     764                 :     {
     765                 :         /* If it's an opclause, check for Var = Const or Const = Var. */
     766             585 :         OpExpr     *expr = (OpExpr *) clause;
     767                 : 
     768                 :         /* Only expressions with two arguments are candidates. */
     769             585 :         if (list_length(expr->args) != 2)
     770 UBC           0 :             return false;
     771                 : 
     772                 :         /* Make sure non-selected argument is a pseudoconstant. */
     773 CBC         585 :         if (is_pseudo_constant_clause(lsecond(expr->args)))
     774             585 :             clause_expr = linitial(expr->args);
     775 UBC           0 :         else if (is_pseudo_constant_clause(linitial(expr->args)))
     776               0 :             clause_expr = lsecond(expr->args);
     777                 :         else
     778               0 :             return false;
     779                 : 
     780                 :         /*
     781                 :          * If it's not an "=" operator, just ignore the clause, as it's not
     782                 :          * compatible with functional dependencies.
     783                 :          *
     784                 :          * This uses the function for estimating selectivity, not the operator
     785                 :          * directly (a bit awkward, but well ...).
     786                 :          *
     787                 :          * XXX this is pretty dubious; probably it'd be better to check btree
     788                 :          * or hash opclass membership, so as not to be fooled by custom
     789                 :          * selectivity functions, and to be more consistent with decisions
     790                 :          * elsewhere in the planner.
     791                 :          */
     792 CBC         585 :         if (get_oprrest(expr->opno) != F_EQSEL)
     793              18 :             return false;
     794                 : 
     795                 :         /* OK to proceed with checking "var" */
     796                 :     }
     797             924 :     else if (IsA(clause, ScalarArrayOpExpr))
     798                 :     {
     799                 :         /* If it's an scalar array operator, check for Var IN Const. */
     800             888 :         ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
     801                 : 
     802                 :         /*
     803                 :          * Reject ALL() variant, we only care about ANY/IN.
     804                 :          *
     805                 :          * XXX Maybe we should check if all the values are the same, and allow
     806                 :          * ALL in that case? Doesn't seem very practical, though.
     807                 :          */
     808             888 :         if (!expr->useOr)
     809              18 :             return false;
     810                 : 
     811                 :         /* Only expressions with two arguments are candidates. */
     812             870 :         if (list_length(expr->args) != 2)
     813 UBC           0 :             return false;
     814                 : 
     815                 :         /*
     816                 :          * We know it's always (Var IN Const), so we assume the var is the
     817                 :          * first argument, and pseudoconstant is the second one.
     818                 :          */
     819 CBC         870 :         if (!is_pseudo_constant_clause(lsecond(expr->args)))
     820 UBC           0 :             return false;
     821                 : 
     822 CBC         870 :         clause_expr = linitial(expr->args);
     823                 : 
     824                 :         /*
     825                 :          * If it's not an "=" operator, just ignore the clause, as it's not
     826                 :          * compatible with functional dependencies. The operator is identified
     827                 :          * simply by looking at which function it uses to estimate
     828                 :          * selectivity. That's a bit strange, but it's what other similar
     829                 :          * places do.
     830                 :          */
     831             870 :         if (get_oprrest(expr->opno) != F_EQSEL)
     832              90 :             return false;
     833                 : 
     834                 :         /* OK to proceed with checking "var" */
     835                 :     }
     836              36 :     else if (is_orclause(clause))
     837                 :     {
     838              36 :         BoolExpr   *bool_expr = (BoolExpr *) clause;
     839                 :         ListCell   *lc;
     840                 : 
     841                 :         /* start with no attribute number */
     842              36 :         *attnum = InvalidAttrNumber;
     843                 : 
     844              75 :         foreach(lc, bool_expr->args)
     845                 :         {
     846                 :             AttrNumber  clause_attnum;
     847                 : 
     848                 :             /*
     849                 :              * Had we found incompatible clause in the arguments, treat the
     850                 :              * whole clause as incompatible.
     851                 :              */
     852              60 :             if (!dependency_is_compatible_clause((Node *) lfirst(lc),
     853                 :                                                  relid, &clause_attnum))
     854              21 :                 return false;
     855                 : 
     856              42 :             if (*attnum == InvalidAttrNumber)
     857              18 :                 *attnum = clause_attnum;
     858                 : 
     859                 :             /* ensure all the variables are the same (same attnum) */
     860              42 :             if (*attnum != clause_attnum)
     861               3 :                 return false;
     862                 :         }
     863                 : 
     864                 :         /* the Var is already checked by the recursive call */
     865              15 :         return true;
     866                 :     }
     867 UBC           0 :     else if (is_notclause(clause))
     868                 :     {
     869                 :         /*
     870                 :          * "NOT x" can be interpreted as "x = false", so get the argument and
     871                 :          * proceed with seeing if it's a suitable Var.
     872                 :          */
     873               0 :         clause_expr = (Node *) get_notclausearg(clause);
     874                 :     }
     875                 :     else
     876                 :     {
     877                 :         /*
     878                 :          * A boolean expression "x" can be interpreted as "x = true", so
     879                 :          * proceed with seeing if it's a suitable Var.
     880                 :          */
     881               0 :         clause_expr = (Node *) clause;
     882                 :     }
     883                 : 
     884                 :     /*
     885                 :      * We may ignore any RelabelType node above the operand.  (There won't be
     886                 :      * more than one, since eval_const_expressions has been applied already.)
     887                 :      */
     888 CBC        1347 :     if (IsA(clause_expr, RelabelType))
     889 UBC           0 :         clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
     890                 : 
     891                 :     /* We only support plain Vars for now */
     892 CBC        1347 :     if (!IsA(clause_expr, Var))
     893             144 :         return false;
     894                 : 
     895                 :     /* OK, we know we have a Var */
     896            1203 :     var = (Var *) clause_expr;
     897                 : 
     898                 :     /* Ensure Var is from the correct relation */
     899            1203 :     if (var->varno != relid)
     900 UBC           0 :         return false;
     901                 : 
     902                 :     /* We also better ensure the Var is from the current level */
     903 CBC        1203 :     if (var->varlevelsup != 0)
     904 UBC           0 :         return false;
     905                 : 
     906                 :     /* Also ignore system attributes (we don't allow stats on those) */
     907 CBC        1203 :     if (!AttrNumberIsForUserDefinedAttr(var->varattno))
     908 UBC           0 :         return false;
     909                 : 
     910 CBC        1203 :     *attnum = var->varattno;
     911            1203 :     return true;
     912                 : }
     913                 : 
     914                 : /*
     915                 :  * find_strongest_dependency
     916                 :  *      find the strongest dependency on the attributes
     917                 :  *
     918                 :  * When applying functional dependencies, we start with the strongest
     919                 :  * dependencies. That is, we select the dependency that:
     920                 :  *
     921                 :  * (a) has all attributes covered by equality clauses
     922                 :  *
     923                 :  * (b) has the most attributes
     924                 :  *
     925                 :  * (c) has the highest degree of validity
     926                 :  *
     927                 :  * This guarantees that we eliminate the most redundant conditions first
     928                 :  * (see the comment in dependencies_clauselist_selectivity).
     929                 :  */
     930                 : static MVDependency *
     931            1275 : find_strongest_dependency(MVDependencies **dependencies, int ndependencies,
     932                 :                           Bitmapset *attnums)
     933                 : {
     934                 :     int         i,
     935                 :                 j;
     936            1275 :     MVDependency *strongest = NULL;
     937                 : 
     938                 :     /* number of attnums in clauses */
     939            1275 :     int         nattnums = bms_num_members(attnums);
     940                 : 
     941                 :     /*
     942                 :      * Iterate over the MVDependency items and find the strongest one from the
     943                 :      * fully-matched dependencies. We do the cheap checks first, before
     944                 :      * matching it against the attnums.
     945                 :      */
     946            2568 :     for (i = 0; i < ndependencies; i++)
     947                 :     {
     948            7332 :         for (j = 0; j < dependencies[i]->ndeps; j++)
     949                 :         {
     950            6039 :             MVDependency *dependency = dependencies[i]->deps[j];
     951                 : 
     952                 :             /*
     953                 :              * Skip dependencies referencing more attributes than available
     954                 :              * clauses, as those can't be fully matched.
     955                 :              */
     956            6039 :             if (dependency->nattributes > nattnums)
     957            3663 :                 continue;
     958                 : 
     959            2376 :             if (strongest)
     960                 :             {
     961                 :                 /* skip dependencies on fewer attributes than the strongest. */
     962            1482 :                 if (dependency->nattributes < strongest->nattributes)
     963 UBC           0 :                     continue;
     964                 : 
     965                 :                 /* also skip weaker dependencies when attribute count matches */
     966 CBC        1482 :                 if (strongest->nattributes == dependency->nattributes &&
     967            1341 :                     strongest->degree > dependency->degree)
     968 UBC           0 :                     continue;
     969                 :             }
     970                 : 
     971                 :             /*
     972                 :              * this dependency is stronger, but we must still check that it's
     973                 :              * fully matched to these attnums. We perform this check last as
     974                 :              * it's slightly more expensive than the previous checks.
     975                 :              */
     976 CBC        2376 :             if (dependency_is_fully_matched(dependency, attnums))
     977            1308 :                 strongest = dependency; /* save new best match */
     978                 :         }
     979                 :     }
     980                 : 
     981            1275 :     return strongest;
     982                 : }
     983                 : 
     984                 : /*
     985                 :  * clauselist_apply_dependencies
     986                 :  *      Apply the specified functional dependencies to a list of clauses and
     987                 :  *      return the estimated selectivity of the clauses that are compatible
     988                 :  *      with any of the given dependencies.
     989                 :  *
     990                 :  * This will estimate all not-already-estimated clauses that are compatible
     991                 :  * with functional dependencies, and which have an attribute mentioned by any
     992                 :  * of the given dependencies (either as an implying or implied attribute).
     993                 :  *
     994                 :  * Given (lists of) clauses on attributes (a,b) and a functional dependency
     995                 :  * (a=>b), the per-column selectivities P(a) and P(b) are notionally combined
     996                 :  * using the formula
     997                 :  *
     998                 :  *      P(a,b) = f * P(a) + (1-f) * P(a) * P(b)
     999                 :  *
    1000                 :  * where 'f' is the degree of dependency.  This reflects the fact that we
    1001                 :  * expect a fraction f of all rows to be consistent with the dependency
    1002                 :  * (a=>b), and so have a selectivity of P(a), while the remaining rows are
    1003                 :  * treated as independent.
    1004                 :  *
    1005                 :  * In practice, we use a slightly modified version of this formula, which uses
    1006                 :  * a selectivity of Min(P(a), P(b)) for the dependent rows, since the result
    1007                 :  * should obviously not exceed either column's individual selectivity.  I.e.,
    1008                 :  * we actually combine selectivities using the formula
    1009                 :  *
    1010                 :  *      P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b)
    1011                 :  *
    1012                 :  * This can make quite a difference if the specific values matching the
    1013                 :  * clauses are not consistent with the functional dependency.
    1014                 :  */
    1015                 : static Selectivity
    1016             564 : clauselist_apply_dependencies(PlannerInfo *root, List *clauses,
    1017                 :                               int varRelid, JoinType jointype,
    1018                 :                               SpecialJoinInfo *sjinfo,
    1019                 :                               MVDependency **dependencies, int ndependencies,
    1020                 :                               AttrNumber *list_attnums,
    1021                 :                               Bitmapset **estimatedclauses)
    1022                 : {
    1023                 :     Bitmapset  *attnums;
    1024                 :     int         i;
    1025                 :     int         j;
    1026                 :     int         nattrs;
    1027                 :     Selectivity *attr_sel;
    1028                 :     int         attidx;
    1029                 :     int         listidx;
    1030                 :     ListCell   *l;
    1031                 :     Selectivity s1;
    1032                 : 
    1033                 :     /*
    1034                 :      * Extract the attnums of all implying and implied attributes from all the
    1035                 :      * given dependencies.  Each of these attributes is expected to have at
    1036                 :      * least 1 not-already-estimated compatible clause that we will estimate
    1037                 :      * here.
    1038                 :      */
    1039             564 :     attnums = NULL;
    1040            1275 :     for (i = 0; i < ndependencies; i++)
    1041                 :     {
    1042            2274 :         for (j = 0; j < dependencies[i]->nattributes; j++)
    1043                 :         {
    1044            1563 :             AttrNumber  attnum = dependencies[i]->attributes[j];
    1045                 : 
    1046            1563 :             attnums = bms_add_member(attnums, attnum);
    1047                 :         }
    1048                 :     }
    1049                 : 
    1050                 :     /*
    1051                 :      * Compute per-column selectivity estimates for each of these attributes,
    1052                 :      * and mark all the corresponding clauses as estimated.
    1053                 :      */
    1054             564 :     nattrs = bms_num_members(attnums);
    1055             564 :     attr_sel = (Selectivity *) palloc(sizeof(Selectivity) * nattrs);
    1056                 : 
    1057             564 :     attidx = 0;
    1058             564 :     i = -1;
    1059            1845 :     while ((i = bms_next_member(attnums, i)) >= 0)
    1060                 :     {
    1061            1281 :         List       *attr_clauses = NIL;
    1062                 :         Selectivity simple_sel;
    1063                 : 
    1064            1281 :         listidx = -1;
    1065            4314 :         foreach(l, clauses)
    1066                 :         {
    1067            3033 :             Node       *clause = (Node *) lfirst(l);
    1068                 : 
    1069            3033 :             listidx++;
    1070            3033 :             if (list_attnums[listidx] == i)
    1071                 :             {
    1072            1281 :                 attr_clauses = lappend(attr_clauses, clause);
    1073            1281 :                 *estimatedclauses = bms_add_member(*estimatedclauses, listidx);
    1074                 :             }
    1075                 :         }
    1076                 : 
    1077            1281 :         simple_sel = clauselist_selectivity_ext(root, attr_clauses, varRelid,
    1078                 :                                                 jointype, sjinfo, false);
    1079            1281 :         attr_sel[attidx++] = simple_sel;
    1080                 :     }
    1081                 : 
    1082                 :     /*
    1083                 :      * Now combine these selectivities using the dependency information.  For
    1084                 :      * chains of dependencies such as a -> b -> c, the b -> c dependency will
    1085                 :      * come before the a -> b dependency in the array, so we traverse the
    1086                 :      * array backwards to ensure such chains are computed in the right order.
    1087                 :      *
    1088                 :      * As explained above, pairs of selectivities are combined using the
    1089                 :      * formula
    1090                 :      *
    1091                 :      * P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b)
    1092                 :      *
    1093                 :      * to ensure that the combined selectivity is never greater than either
    1094                 :      * individual selectivity.
    1095                 :      *
    1096                 :      * Where multiple dependencies apply (e.g., a -> b -> c), we use
    1097                 :      * conditional probabilities to compute the overall result as follows:
    1098                 :      *
    1099                 :      * P(a,b,c) = P(c|a,b) * P(a,b) = P(c|a,b) * P(b|a) * P(a)
    1100                 :      *
    1101                 :      * so we replace the selectivities of all implied attributes with
    1102                 :      * conditional probabilities, that are conditional on all their implying
    1103                 :      * attributes.  The selectivities of all other non-implied attributes are
    1104                 :      * left as they are.
    1105                 :      */
    1106            1275 :     for (i = ndependencies - 1; i >= 0; i--)
    1107                 :     {
    1108             711 :         MVDependency *dependency = dependencies[i];
    1109                 :         AttrNumber  attnum;
    1110                 :         Selectivity s2;
    1111                 :         double      f;
    1112                 : 
    1113                 :         /* Selectivity of all the implying attributes */
    1114             711 :         s1 = 1.0;
    1115            1563 :         for (j = 0; j < dependency->nattributes - 1; j++)
    1116                 :         {
    1117             852 :             attnum = dependency->attributes[j];
    1118             852 :             attidx = bms_member_index(attnums, attnum);
    1119             852 :             s1 *= attr_sel[attidx];
    1120                 :         }
    1121                 : 
    1122                 :         /* Original selectivity of the implied attribute */
    1123             711 :         attnum = dependency->attributes[j];
    1124             711 :         attidx = bms_member_index(attnums, attnum);
    1125             711 :         s2 = attr_sel[attidx];
    1126                 : 
    1127                 :         /*
    1128                 :          * Replace s2 with the conditional probability s2 given s1, computed
    1129                 :          * using the formula P(b|a) = P(a,b) / P(a), which simplifies to
    1130                 :          *
    1131                 :          * P(b|a) = f * Min(P(a), P(b)) / P(a) + (1-f) * P(b)
    1132                 :          *
    1133                 :          * where P(a) = s1, the selectivity of the implying attributes, and
    1134                 :          * P(b) = s2, the selectivity of the implied attribute.
    1135                 :          */
    1136             711 :         f = dependency->degree;
    1137                 : 
    1138             711 :         if (s1 <= s2)
    1139             681 :             attr_sel[attidx] = f + (1 - f) * s2;
    1140                 :         else
    1141              30 :             attr_sel[attidx] = f * s2 / s1 + (1 - f) * s2;
    1142                 :     }
    1143                 : 
    1144                 :     /*
    1145                 :      * The overall selectivity of all the clauses on all these attributes is
    1146                 :      * then the product of all the original (non-implied) probabilities and
    1147                 :      * the new conditional (implied) probabilities.
    1148                 :      */
    1149             564 :     s1 = 1.0;
    1150            1845 :     for (i = 0; i < nattrs; i++)
    1151            1281 :         s1 *= attr_sel[i];
    1152                 : 
    1153             564 :     CLAMP_PROBABILITY(s1);
    1154                 : 
    1155             564 :     pfree(attr_sel);
    1156             564 :     bms_free(attnums);
    1157                 : 
    1158             564 :     return s1;
    1159                 : }
    1160                 : 
    1161                 : /*
    1162                 :  * dependency_is_compatible_expression
    1163                 :  *      Determines if the expression is compatible with functional dependencies
    1164                 :  *
    1165                 :  * Similar to dependency_is_compatible_clause, but doesn't enforce that the
    1166                 :  * expression is a simple Var.  On success, return the matching statistics
    1167                 :  * expression into *expr.
    1168                 :  */
    1169                 : static bool
    1170             321 : dependency_is_compatible_expression(Node *clause, Index relid, List *statlist, Node **expr)
    1171                 : {
    1172                 :     ListCell   *lc,
    1173                 :                *lc2;
    1174                 :     Node       *clause_expr;
    1175                 : 
    1176             321 :     if (IsA(clause, RestrictInfo))
    1177                 :     {
    1178             276 :         RestrictInfo *rinfo = (RestrictInfo *) clause;
    1179                 : 
    1180                 :         /* Pseudoconstants are not interesting (they couldn't contain a Var) */
    1181             276 :         if (rinfo->pseudoconstant)
    1182               3 :             return false;
    1183                 : 
    1184                 :         /* Clauses referencing multiple, or no, varnos are incompatible */
    1185             273 :         if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
    1186 UBC           0 :             return false;
    1187                 : 
    1188 CBC         273 :         clause = (Node *) rinfo->clause;
    1189                 :     }
    1190                 : 
    1191             318 :     if (is_opclause(clause))
    1192                 :     {
    1193                 :         /* If it's an opclause, check for Var = Const or Const = Var. */
    1194             102 :         OpExpr     *expr = (OpExpr *) clause;
    1195                 : 
    1196                 :         /* Only expressions with two arguments are candidates. */
    1197             102 :         if (list_length(expr->args) != 2)
    1198 UBC           0 :             return false;
    1199                 : 
    1200                 :         /* Make sure non-selected argument is a pseudoconstant. */
    1201 CBC         102 :         if (is_pseudo_constant_clause(lsecond(expr->args)))
    1202             102 :             clause_expr = linitial(expr->args);
    1203 UBC           0 :         else if (is_pseudo_constant_clause(linitial(expr->args)))
    1204               0 :             clause_expr = lsecond(expr->args);
    1205                 :         else
    1206               0 :             return false;
    1207                 : 
    1208                 :         /*
    1209                 :          * If it's not an "=" operator, just ignore the clause, as it's not
    1210                 :          * compatible with functional dependencies.
    1211                 :          *
    1212                 :          * This uses the function for estimating selectivity, not the operator
    1213                 :          * directly (a bit awkward, but well ...).
    1214                 :          *
    1215                 :          * XXX this is pretty dubious; probably it'd be better to check btree
    1216                 :          * or hash opclass membership, so as not to be fooled by custom
    1217                 :          * selectivity functions, and to be more consistent with decisions
    1218                 :          * elsewhere in the planner.
    1219                 :          */
    1220 CBC         102 :         if (get_oprrest(expr->opno) != F_EQSEL)
    1221              18 :             return false;
    1222                 : 
    1223                 :         /* OK to proceed with checking "var" */
    1224                 :     }
    1225             216 :     else if (IsA(clause, ScalarArrayOpExpr))
    1226                 :     {
    1227                 :         /* If it's an scalar array operator, check for Var IN Const. */
    1228             195 :         ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
    1229                 : 
    1230                 :         /*
    1231                 :          * Reject ALL() variant, we only care about ANY/IN.
    1232                 :          *
    1233                 :          * FIXME Maybe we should check if all the values are the same, and
    1234                 :          * allow ALL in that case? Doesn't seem very practical, though.
    1235                 :          */
    1236             195 :         if (!expr->useOr)
    1237              18 :             return false;
    1238                 : 
    1239                 :         /* Only expressions with two arguments are candidates. */
    1240             177 :         if (list_length(expr->args) != 2)
    1241 UBC           0 :             return false;
    1242                 : 
    1243                 :         /*
    1244                 :          * We know it's always (Var IN Const), so we assume the var is the
    1245                 :          * first argument, and pseudoconstant is the second one.
    1246                 :          */
    1247 CBC         177 :         if (!is_pseudo_constant_clause(lsecond(expr->args)))
    1248 UBC           0 :             return false;
    1249                 : 
    1250 CBC         177 :         clause_expr = linitial(expr->args);
    1251                 : 
    1252                 :         /*
    1253                 :          * If it's not an "=" operator, just ignore the clause, as it's not
    1254                 :          * compatible with functional dependencies. The operator is identified
    1255                 :          * simply by looking at which function it uses to estimate
    1256                 :          * selectivity. That's a bit strange, but it's what other similar
    1257                 :          * places do.
    1258                 :          */
    1259             177 :         if (get_oprrest(expr->opno) != F_EQSEL)
    1260              90 :             return false;
    1261                 : 
    1262                 :         /* OK to proceed with checking "var" */
    1263                 :     }
    1264              21 :     else if (is_orclause(clause))
    1265                 :     {
    1266              21 :         BoolExpr   *bool_expr = (BoolExpr *) clause;
    1267                 : 
    1268 ECB             :         /* start with no expression (we'll use the first match) */
    1269 GIC          21 :         *expr = NULL;
    1270 ECB             : 
    1271 GIC          60 :         foreach(lc, bool_expr->args)
    1272 ECB             :         {
    1273 GIC          45 :             Node       *or_expr = NULL;
    1274                 : 
    1275                 :             /*
    1276                 :              * Had we found incompatible expression in the arguments, treat
    1277                 :              * the whole expression as incompatible.
    1278 ECB             :              */
    1279 GIC          45 :             if (!dependency_is_compatible_expression((Node *) lfirst(lc), relid,
    1280 ECB             :                                                      statlist, &or_expr))
    1281 GIC           6 :                 return false;
    1282 ECB             : 
    1283 CBC          42 :             if (*expr == NULL)
    1284 GIC          18 :                 *expr = or_expr;
    1285                 : 
    1286 ECB             :             /* ensure all the expressions are the same */
    1287 CBC          42 :             if (!equal(or_expr, *expr))
    1288 GIC           3 :                 return false;
    1289                 :         }
    1290                 : 
    1291 ECB             :         /* the expression is already checked by the recursive call */
    1292 GIC          15 :         return true;
    1293 EUB             :     }
    1294 UIC           0 :     else if (is_notclause(clause))
    1295                 :     {
    1296                 :         /*
    1297                 :          * "NOT x" can be interpreted as "x = false", so get the argument and
    1298                 :          * proceed with seeing if it's a suitable Var.
    1299 EUB             :          */
    1300 UIC           0 :         clause_expr = (Node *) get_notclausearg(clause);
    1301                 :     }
    1302                 :     else
    1303                 :     {
    1304                 :         /*
    1305                 :          * A boolean expression "x" can be interpreted as "x = true", so
    1306                 :          * proceed with seeing if it's a suitable Var.
    1307 EUB             :          */
    1308 UIC           0 :         clause_expr = (Node *) clause;
    1309                 :     }
    1310                 : 
    1311                 :     /*
    1312                 :      * We may ignore any RelabelType node above the operand.  (There won't be
    1313                 :      * more than one, since eval_const_expressions has been applied already.)
    1314 ECB             :      */
    1315 GBC         171 :     if (IsA(clause_expr, RelabelType))
    1316 UIC           0 :         clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
    1317                 : 
    1318                 :     /*
    1319                 :      * Search for a matching statistics expression.
    1320 ECB             :      */
    1321 GIC         174 :     foreach(lc, statlist)
    1322 ECB             :     {
    1323 GIC         171 :         StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
    1324                 : 
    1325 ECB             :         /* ignore stats without dependencies */
    1326 GBC         171 :         if (info->kind != STATS_EXT_DEPENDENCIES)
    1327 UIC           0 :             continue;
    1328 ECB             : 
    1329 GIC         279 :         foreach(lc2, info->exprs)
    1330 ECB             :         {
    1331 GIC         276 :             Node       *stat_expr = (Node *) lfirst(lc2);
    1332 ECB             : 
    1333 GIC         276 :             if (equal(clause_expr, stat_expr))
    1334 ECB             :             {
    1335 CBC         168 :                 *expr = stat_expr;
    1336 GIC         168 :                 return true;
    1337                 :             }
    1338                 :         }
    1339                 :     }
    1340 ECB             : 
    1341 GIC           3 :     return false;
    1342                 : }
    1343                 : 
    1344                 : /*
    1345                 :  * dependencies_clauselist_selectivity
    1346                 :  *      Return the estimated selectivity of (a subset of) the given clauses
    1347                 :  *      using functional dependency statistics, or 1.0 if no useful functional
    1348                 :  *      dependency statistic exists.
    1349                 :  *
    1350                 :  * 'estimatedclauses' is an input/output argument that gets a bit set
    1351                 :  * corresponding to the (zero-based) list index of each clause that is included
    1352                 :  * in the estimated selectivity.
    1353                 :  *
    1354                 :  * Given equality clauses on attributes (a,b) we find the strongest dependency
    1355                 :  * between them, i.e. either (a=>b) or (b=>a). Assuming (a=>b) is the selected
    1356                 :  * dependency, we then combine the per-clause selectivities using the formula
    1357                 :  *
    1358                 :  *     P(a,b) = f * P(a) + (1-f) * P(a) * P(b)
    1359                 :  *
    1360                 :  * where 'f' is the degree of the dependency.  (Actually we use a slightly
    1361                 :  * modified version of this formula -- see clauselist_apply_dependencies()).
    1362                 :  *
    1363                 :  * With clauses on more than two attributes, the dependencies are applied
    1364                 :  * recursively, starting with the widest/strongest dependencies. For example
    1365                 :  * P(a,b,c) is first split like this:
    1366                 :  *
    1367                 :  *     P(a,b,c) = f * P(a,b) + (1-f) * P(a,b) * P(c)
    1368                 :  *
    1369                 :  * assuming (a,b=>c) is the strongest dependency.
    1370                 :  */
    1371 ECB             : Selectivity
    1372 GIC         894 : dependencies_clauselist_selectivity(PlannerInfo *root,
    1373                 :                                     List *clauses,
    1374                 :                                     int varRelid,
    1375                 :                                     JoinType jointype,
    1376                 :                                     SpecialJoinInfo *sjinfo,
    1377                 :                                     RelOptInfo *rel,
    1378                 :                                     Bitmapset **estimatedclauses)
    1379 ECB             : {
    1380 GIC         894 :     Selectivity s1 = 1.0;
    1381 ECB             :     ListCell   *l;
    1382 GIC         894 :     Bitmapset  *clauses_attnums = NULL;
    1383                 :     AttrNumber *list_attnums;
    1384                 :     int         listidx;
    1385                 :     MVDependencies **func_dependencies;
    1386                 :     int         nfunc_dependencies;
    1387                 :     int         total_ndeps;
    1388                 :     MVDependency **dependencies;
    1389                 :     int         ndependencies;
    1390                 :     int         i;
    1391 ECB             :     AttrNumber  attnum_offset;
    1392 GIC         894 :     RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
    1393                 : 
    1394                 :     /* unique expressions */
    1395                 :     Node      **unique_exprs;
    1396                 :     int         unique_exprs_cnt;
    1397                 : 
    1398 ECB             :     /* check if there's any stats that might be useful for us. */
    1399 CBC         894 :     if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES))
    1400 GIC         234 :         return 1.0;
    1401 ECB             : 
    1402 CBC         660 :     list_attnums = (AttrNumber *) palloc(sizeof(AttrNumber) *
    1403 GIC         660 :                                          list_length(clauses));
    1404                 : 
    1405                 :     /*
    1406                 :      * We allocate space as if every clause was a unique expression, although
    1407                 :      * that's probably overkill. Some will be simple column references that
    1408                 :      * we'll translate to attnums, and there might be duplicates. But it's
    1409                 :      * easier and cheaper to just do one allocation than repalloc later.
    1410 ECB             :      */
    1411 CBC         660 :     unique_exprs = (Node **) palloc(sizeof(Node *) * list_length(clauses));
    1412 GIC         660 :     unique_exprs_cnt = 0;
    1413                 : 
    1414                 :     /*
    1415                 :      * Pre-process the clauses list to extract the attnums seen in each item.
    1416                 :      * We need to determine if there's any clauses which will be useful for
    1417                 :      * dependency selectivity estimations. Along the way we'll record all of
    1418                 :      * the attnums for each clause in a list which we'll reference later so we
    1419                 :      * don't need to repeat the same work again. We'll also keep track of all
    1420                 :      * attnums seen.
    1421                 :      *
    1422                 :      * We also skip clauses that we already estimated using different types of
    1423                 :      * statistics (we treat them as incompatible).
    1424                 :      *
    1425                 :      * To handle expressions, we assign them negative attnums, as if it was a
    1426                 :      * system attribute (this is fine, as we only allow extended stats on user
    1427                 :      * attributes). And then we offset everything by the number of
    1428                 :      * expressions, so that we can store the values in a bitmapset.
    1429 ECB             :      */
    1430 CBC         660 :     listidx = 0;
    1431 GIC        2133 :     foreach(l, clauses)
    1432 ECB             :     {
    1433 GIC        1473 :         Node       *clause = (Node *) lfirst(l);
    1434 ECB             :         AttrNumber  attnum;
    1435 GIC        1473 :         Node       *expr = NULL;
    1436                 : 
    1437 ECB             :         /* ignore clause by default */
    1438 GIC        1473 :         list_attnums[listidx] = InvalidAttrNumber;
    1439 ECB             : 
    1440 GIC        1473 :         if (!bms_is_member(listidx, *estimatedclauses))
    1441                 :         {
    1442                 :             /*
    1443                 :              * If it's a simple column reference, just extract the attnum. If
    1444                 :              * it's an expression, assign a negative attnum as if it was a
    1445                 :              * system attribute.
    1446 ECB             :              */
    1447 GIC        1452 :             if (dependency_is_compatible_clause(clause, rel->relid, &attnum))
    1448 ECB             :             {
    1449 GIC        1176 :                 list_attnums[listidx] = attnum;
    1450 ECB             :             }
    1451 GIC         276 :             else if (dependency_is_compatible_expression(clause, rel->relid,
    1452                 :                                                          rel->statlist,
    1453                 :                                                          &expr))
    1454                 :             {
    1455 ECB             :                 /* special attnum assigned to this expression */
    1456 GIC         141 :                 attnum = InvalidAttrNumber;
    1457 ECB             : 
    1458 GIC         141 :                 Assert(expr != NULL);
    1459                 : 
    1460 ECB             :                 /* If the expression is duplicate, use the same attnum. */
    1461 GIC         237 :                 for (i = 0; i < unique_exprs_cnt; i++)
    1462 ECB             :                 {
    1463 GIC          96 :                     if (equal(unique_exprs[i], expr))
    1464                 :                     {
    1465 EUB             :                         /* negative attribute number to expression */
    1466 UBC           0 :                         attnum = -(i + 1);
    1467 UIC           0 :                         break;
    1468                 :                     }
    1469                 :                 }
    1470                 : 
    1471 ECB             :                 /* not found in the list, so add it */
    1472 GIC         141 :                 if (attnum == InvalidAttrNumber)
    1473 ECB             :                 {
    1474 GIC         141 :                     unique_exprs[unique_exprs_cnt++] = expr;
    1475                 : 
    1476 ECB             :                     /* after incrementing the value, to get -1, -2, ... */
    1477 GIC         141 :                     attnum = (-unique_exprs_cnt);
    1478                 :                 }
    1479                 : 
    1480 ECB             :                 /* remember which attnum was assigned to this clause */
    1481 GIC         141 :                 list_attnums[listidx] = attnum;
    1482                 :             }
    1483                 :         }
    1484 ECB             : 
    1485 GIC        1473 :         listidx++;
    1486                 :     }
    1487 ECB             : 
    1488 GIC         660 :     Assert(listidx == list_length(clauses));
    1489                 : 
    1490                 :     /*
    1491                 :      * How much we need to offset the attnums? If there are no expressions,
    1492                 :      * then no offset is needed. Otherwise we need to offset enough for the
    1493                 :      * lowest value (-unique_exprs_cnt) to become 1.
    1494 ECB             :      */
    1495 CBC         660 :     if (unique_exprs_cnt > 0)
    1496 GIC          66 :         attnum_offset = (unique_exprs_cnt + 1);
    1497 ECB             :     else
    1498 GIC         594 :         attnum_offset = 0;
    1499                 : 
    1500                 :     /*
    1501                 :      * Now that we know how many expressions there are, we can offset the
    1502                 :      * values just enough to build the bitmapset.
    1503 ECB             :      */
    1504 GIC        2133 :     for (i = 0; i < list_length(clauses); i++)
    1505                 :     {
    1506                 :         AttrNumber  attnum;
    1507                 : 
    1508 ECB             :         /* ignore incompatible or already estimated clauses */
    1509 CBC        1473 :         if (list_attnums[i] == InvalidAttrNumber)
    1510 GIC         156 :             continue;
    1511                 : 
    1512 ECB             :         /* make sure the attnum is in the expected range */
    1513 CBC        1317 :         Assert(list_attnums[i] >= (-unique_exprs_cnt));
    1514 GIC        1317 :         Assert(list_attnums[i] <= MaxHeapAttributeNumber);
    1515                 : 
    1516 ECB             :         /* make sure the attnum is positive (valid AttrNumber) */
    1517 GIC        1317 :         attnum = list_attnums[i] + attnum_offset;
    1518                 : 
    1519                 :         /*
    1520                 :          * Either it's a regular attribute, or it's an expression, in which
    1521                 :          * case we must not have seen it before (expressions are unique).
    1522                 :          *
    1523                 :          * XXX Check whether it's a regular attribute has to be done using the
    1524                 :          * original attnum, while the second check has to use the value with
    1525                 :          * an offset.
    1526 ECB             :          */
    1527 GIC        1317 :         Assert(AttrNumberIsForUserDefinedAttr(list_attnums[i]) ||
    1528                 :                !bms_is_member(attnum, clauses_attnums));
    1529                 : 
    1530                 :         /*
    1531                 :          * Remember the offset attnum, both for attributes and expressions.
    1532                 :          * We'll pass list_attnums to clauselist_apply_dependencies, which
    1533                 :          * uses it to identify clauses in a bitmap. We could also pass the
    1534                 :          * offset, but this is more convenient.
    1535 ECB             :          */
    1536 GIC        1317 :         list_attnums[i] = attnum;
    1537 ECB             : 
    1538 GIC        1317 :         clauses_attnums = bms_add_member(clauses_attnums, attnum);
    1539                 :     }
    1540                 : 
    1541                 :     /*
    1542                 :      * If there's not at least two distinct attnums and expressions, then
    1543                 :      * reject the whole list of clauses. We must return 1.0 so the calling
    1544                 :      * function's selectivity is unaffected.
    1545 ECB             :      */
    1546 GIC         660 :     if (bms_membership(clauses_attnums) != BMS_MULTIPLE)
    1547 ECB             :     {
    1548 CBC          96 :         bms_free(clauses_attnums);
    1549              96 :         pfree(list_attnums);
    1550 GIC          96 :         return 1.0;
    1551                 :     }
    1552                 : 
    1553                 :     /*
    1554                 :      * Load all functional dependencies matching at least two parameters. We
    1555                 :      * can simply consider all dependencies at once, without having to search
    1556                 :      * for the best statistics object.
    1557                 :      *
    1558                 :      * To not waste cycles and memory, we deserialize dependencies only for
    1559                 :      * statistics that match at least two attributes. The array is allocated
    1560                 :      * with the assumption that all objects match - we could grow the array to
    1561                 :      * make it just the right size, but it's likely wasteful anyway thanks to
    1562                 :      * moving the freed chunks to freelists etc.
    1563 ECB             :      */
    1564 CBC         564 :     func_dependencies = (MVDependencies **) palloc(sizeof(MVDependencies *) *
    1565             564 :                                                    list_length(rel->statlist));
    1566             564 :     nfunc_dependencies = 0;
    1567 GIC         564 :     total_ndeps = 0;
    1568 ECB             : 
    1569 GIC        1197 :     foreach(l, rel->statlist)
    1570 ECB             :     {
    1571 GIC         633 :         StatisticExtInfo *stat = (StatisticExtInfo *) lfirst(l);
    1572                 :         int         nmatched;
    1573                 :         int         nexprs;
    1574                 :         int         k;
    1575                 :         MVDependencies *deps;
    1576                 : 
    1577 ECB             :         /* skip statistics that are not of the correct type */
    1578 CBC         633 :         if (stat->kind != STATS_EXT_DEPENDENCIES)
    1579 GIC          54 :             continue;
    1580                 : 
    1581 ECB             :         /* skip statistics with mismatching stxdinherit value */
    1582 GBC         579 :         if (stat->inherit != rte->inh)
    1583 UIC           0 :             continue;
    1584                 : 
    1585                 :         /*
    1586                 :          * Count matching attributes - we have to undo the attnum offsets. The
    1587                 :          * input attribute numbers are not offset (expressions are not
    1588                 :          * included in stat->keys, so it's not necessary). But we need to
    1589                 :          * offset it before checking against clauses_attnums.
    1590 ECB             :          */
    1591 CBC         579 :         nmatched = 0;
    1592             579 :         k = -1;
    1593 GIC        2124 :         while ((k = bms_next_member(stat->keys, k)) >= 0)
    1594 ECB             :         {
    1595 GIC        1545 :             AttrNumber  attnum = (AttrNumber) k;
    1596                 : 
    1597 ECB             :             /* skip expressions */
    1598 GBC        1545 :             if (!AttrNumberIsForUserDefinedAttr(attnum))
    1599 UIC           0 :                 continue;
    1600                 : 
    1601 ECB             :             /* apply the same offset as above */
    1602 GIC        1545 :             attnum += attnum_offset;
    1603 ECB             : 
    1604 CBC        1545 :             if (bms_is_member(attnum, clauses_attnums))
    1605 GIC        1152 :                 nmatched++;
    1606                 :         }
    1607                 : 
    1608 ECB             :         /* count matching expressions */
    1609 CBC         579 :         nexprs = 0;
    1610 GIC         708 :         for (i = 0; i < unique_exprs_cnt; i++)
    1611                 :         {
    1612                 :             ListCell   *lc;
    1613 ECB             : 
    1614 GIC         516 :             foreach(lc, stat->exprs)
    1615 ECB             :             {
    1616 GIC         387 :                 Node       *stat_expr = (Node *) lfirst(lc);
    1617                 : 
    1618 ECB             :                 /* try to match it */
    1619 CBC         387 :                 if (equal(stat_expr, unique_exprs[i]))
    1620 GIC         129 :                     nexprs++;
    1621                 :             }
    1622                 :         }
    1623                 : 
    1624                 :         /*
    1625                 :          * Skip objects matching fewer than two attributes/expressions from
    1626                 :          * clauses.
    1627 ECB             :          */
    1628 CBC         579 :         if (nmatched + nexprs < 2)
    1629 GIC           9 :             continue;
    1630 ECB             : 
    1631 GIC         570 :         deps = statext_dependencies_load(stat->statOid, rte->inh);
    1632                 : 
    1633                 :         /*
    1634                 :          * The expressions may be represented by different attnums in the
    1635                 :          * stats, we need to remap them to be consistent with the clauses.
    1636                 :          * That will make the later steps (e.g. picking the strongest item and
    1637                 :          * so on) much simpler and cheaper, because it won't need to care
    1638                 :          * about the offset at all.
    1639                 :          *
    1640                 :          * When we're at it, we can ignore dependencies that are not fully
    1641                 :          * matched by clauses (i.e. referencing attributes or expressions that
    1642                 :          * are not in the clauses).
    1643                 :          *
    1644                 :          * We have to do this for all statistics, as long as there are any
    1645                 :          * expressions - we need to shift the attnums in all dependencies.
    1646                 :          *
    1647                 :          * XXX Maybe we should do this always, because it also eliminates some
    1648                 :          * of the dependencies early. It might be cheaper than having to walk
    1649                 :          * the longer list in find_strongest_dependency later, especially as
    1650                 :          * we need to do that repeatedly?
    1651                 :          *
    1652                 :          * XXX We have to do this even when there are no expressions in
    1653                 :          * clauses, otherwise find_strongest_dependency may fail for stats
    1654                 :          * with expressions (due to lookup of negative value in bitmap). So we
    1655                 :          * need to at least filter out those dependencies. Maybe we could do
    1656                 :          * it in a cheaper way (if there are no expr clauses, we can just
    1657                 :          * discard all negative attnums without any lookups).
    1658 ECB             :          */
    1659 GIC         570 :         if (unique_exprs_cnt > 0 || stat->exprs != NIL)
    1660 ECB             :         {
    1661 GIC          54 :             int         ndeps = 0;
    1662 ECB             : 
    1663 GIC         324 :             for (i = 0; i < deps->ndeps; i++)
    1664 ECB             :             {
    1665 CBC         270 :                 bool        skip = false;
    1666 GIC         270 :                 MVDependency *dep = deps->deps[i];
    1667                 :                 int         j;
    1668 ECB             : 
    1669 GIC         753 :                 for (j = 0; j < dep->nattributes; j++)
    1670                 :                 {
    1671                 :                     int         idx;
    1672 ECB             :                     Node       *expr;
    1673 GIC         615 :                     AttrNumber  unique_attnum = InvalidAttrNumber;
    1674                 :                     AttrNumber  attnum;
    1675 ECB             : 
    1676                 :                     /* undo the per-statistics offset */
    1677 GIC         615 :                     attnum = dep->attributes[j];
    1678                 : 
    1679                 :                     /*
    1680                 :                      * For regular attributes we can simply check if it
    1681                 :                      * matches any clause. If there's no matching clause, we
    1682                 :                      * can just ignore it. We need to offset the attnum
    1683 ECB             :                      * though.
    1684                 :                      */
    1685 GBC         615 :                     if (AttrNumberIsForUserDefinedAttr(attnum))
    1686                 :                     {
    1687 UBC           0 :                         dep->attributes[j] = attnum + attnum_offset;
    1688                 : 
    1689               0 :                         if (!bms_is_member(dep->attributes[j], clauses_attnums))
    1690 EUB             :                         {
    1691 UIC           0 :                             skip = true;
    1692               0 :                             break;
    1693 EUB             :                         }
    1694                 : 
    1695 UIC           0 :                         continue;
    1696                 :                     }
    1697                 : 
    1698                 :                     /*
    1699                 :                      * the attnum should be a valid system attnum (-1, -2,
    1700 ECB             :                      * ...)
    1701                 :                      */
    1702 GIC         615 :                     Assert(AttributeNumberIsValid(attnum));
    1703                 : 
    1704                 :                     /*
    1705                 :                      * For expressions, we need to do two translations. First
    1706                 :                      * we have to translate the negative attnum to index in
    1707                 :                      * the list of expressions (in the statistics object).
    1708                 :                      * Then we need to see if there's a matching clause. The
    1709                 :                      * index of the unique expression determines the attnum
    1710 ECB             :                      * (and we offset it).
    1711                 :                      */
    1712 GIC         615 :                     idx = -(1 + attnum);
    1713 ECB             : 
    1714                 :                     /* Is the expression index is valid? */
    1715 CBC         615 :                     Assert((idx >= 0) && (idx < list_length(stat->exprs)));
    1716                 : 
    1717 GIC         615 :                     expr = (Node *) list_nth(stat->exprs, idx);
    1718 ECB             : 
    1719                 :                     /* try to find the expression in the unique list */
    1720 GNC        1230 :                     for (int m = 0; m < unique_exprs_cnt; m++)
    1721                 :                     {
    1722                 :                         /*
    1723                 :                          * found a matching unique expression, use the attnum
    1724 ECB             :                          * (derived from index of the unique expression)
    1725                 :                          */
    1726 GNC        1098 :                         if (equal(unique_exprs[m], expr))
    1727 ECB             :                         {
    1728 GNC         483 :                             unique_attnum = -(m + 1) + attnum_offset;
    1729 GIC         483 :                             break;
    1730                 :                         }
    1731                 :                     }
    1732                 : 
    1733                 :                     /*
    1734                 :                      * Found no matching expression, so we can simply skip
    1735                 :                      * this dependency, because there's no chance it will be
    1736 ECB             :                      * fully covered.
    1737                 :                      */
    1738 CBC         615 :                     if (unique_attnum == InvalidAttrNumber)
    1739 ECB             :                     {
    1740 GIC         132 :                         skip = true;
    1741             132 :                         break;
    1742                 :                     }
    1743 ECB             : 
    1744                 :                     /* otherwise remap it to the new attnum */
    1745 GIC         483 :                     dep->attributes[j] = unique_attnum;
    1746                 :                 }
    1747 ECB             : 
    1748                 :                 /* if found a matching dependency, keep it */
    1749 GIC         270 :                 if (!skip)
    1750 ECB             :                 {
    1751 EUB             :                     /* maybe we've skipped something earlier, so move it */
    1752 GIC         138 :                     if (ndeps != i)
    1753 LBC           0 :                         deps->deps[ndeps] = deps->deps[i];
    1754                 : 
    1755 GIC         138 :                     ndeps++;
    1756                 :                 }
    1757 ECB             :             }
    1758                 : 
    1759 GIC          54 :             deps->ndeps = ndeps;
    1760                 :         }
    1761                 : 
    1762                 :         /*
    1763                 :          * It's possible we've removed all dependencies, in which case we
    1764 ECB             :          * don't bother adding it to the list.
    1765                 :          */
    1766 CBC         570 :         if (deps->ndeps > 0)
    1767 ECB             :         {
    1768 CBC         570 :             func_dependencies[nfunc_dependencies] = deps;
    1769 GIC         570 :             total_ndeps += deps->ndeps;
    1770             570 :             nfunc_dependencies++;
    1771                 :         }
    1772                 :     }
    1773 ECB             : 
    1774                 :     /* if no matching stats could be found then we've nothing to do */
    1775 GBC         564 :     if (nfunc_dependencies == 0)
    1776 EUB             :     {
    1777 UBC           0 :         pfree(func_dependencies);
    1778               0 :         bms_free(clauses_attnums);
    1779               0 :         pfree(list_attnums);
    1780 UIC           0 :         pfree(unique_exprs);
    1781               0 :         return 1.0;
    1782                 :     }
    1783                 : 
    1784                 :     /*
    1785                 :      * Work out which dependencies we can apply, starting with the
    1786 ECB             :      * widest/strongest ones, and proceeding to smaller/weaker ones.
    1787                 :      */
    1788 CBC         564 :     dependencies = (MVDependency **) palloc(sizeof(MVDependency *) *
    1789                 :                                             total_ndeps);
    1790 GIC         564 :     ndependencies = 0;
    1791 ECB             : 
    1792                 :     while (true)
    1793 GIC         711 :     {
    1794                 :         MVDependency *dependency;
    1795                 :         AttrNumber  attnum;
    1796 ECB             : 
    1797                 :         /* the widest/strongest dependency, fully matched by clauses */
    1798 GIC        1275 :         dependency = find_strongest_dependency(func_dependencies,
    1799 ECB             :                                                nfunc_dependencies,
    1800                 :                                                clauses_attnums);
    1801 GIC        1275 :         if (!dependency)
    1802 CBC         564 :             break;
    1803                 : 
    1804 GIC         711 :         dependencies[ndependencies++] = dependency;
    1805 ECB             : 
    1806                 :         /* Ignore dependencies using this implied attribute in later loops */
    1807 GIC         711 :         attnum = dependency->attributes[dependency->nattributes - 1];
    1808             711 :         clauses_attnums = bms_del_member(clauses_attnums, attnum);
    1809                 :     }
    1810                 : 
    1811                 :     /*
    1812                 :      * If we found applicable dependencies, use them to estimate all
    1813 ECB             :      * compatible clauses on attributes that they refer to.
    1814                 :      */
    1815 GIC         564 :     if (ndependencies != 0)
    1816             564 :         s1 = clauselist_apply_dependencies(root, clauses, varRelid, jointype,
    1817                 :                                            sjinfo, dependencies, ndependencies,
    1818                 :                                            list_attnums, estimatedclauses);
    1819 ECB             : 
    1820                 :     /* free deserialized functional dependencies (and then the array) */
    1821 GIC        1134 :     for (i = 0; i < nfunc_dependencies; i++)
    1822 CBC         570 :         pfree(func_dependencies[i]);
    1823 ECB             : 
    1824 CBC         564 :     pfree(dependencies);
    1825             564 :     pfree(func_dependencies);
    1826             564 :     bms_free(clauses_attnums);
    1827 GIC         564 :     pfree(list_attnums);
    1828 CBC         564 :     pfree(unique_exprs);
    1829                 : 
    1830 GIC         564 :     return s1;
    1831                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a