LCOV - differential code coverage report
Current view: top level - src/backend/statistics - mcv.c (source / functions) Coverage Total Hit UNC UIC UBC GIC GNC CBC EUB ECB DUB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 91.6 % 619 567 3 3 46 107 9 451 3 108 3 8
Current Date: 2023-04-08 15:15:32 Functions: 81.0 % 21 17 4 3 5 9 3
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * mcv.c
       4                 :  *    POSTGRES multivariate MCV lists
       5                 :  *
       6                 :  *
       7                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
       8                 :  * Portions Copyright (c) 1994, Regents of the University of California
       9                 :  *
      10                 :  * IDENTIFICATION
      11                 :  *    src/backend/statistics/mcv.c
      12                 :  *
      13                 :  *-------------------------------------------------------------------------
      14                 :  */
      15                 : #include "postgres.h"
      16                 : 
      17                 : #include <math.h>
      18                 : 
      19                 : #include "access/htup_details.h"
      20                 : #include "catalog/pg_collation.h"
      21                 : #include "catalog/pg_statistic_ext.h"
      22                 : #include "catalog/pg_statistic_ext_data.h"
      23                 : #include "fmgr.h"
      24                 : #include "funcapi.h"
      25                 : #include "nodes/nodeFuncs.h"
      26                 : #include "optimizer/clauses.h"
      27                 : #include "statistics/extended_stats_internal.h"
      28                 : #include "statistics/statistics.h"
      29                 : #include "utils/array.h"
      30                 : #include "utils/builtins.h"
      31                 : #include "utils/bytea.h"
      32                 : #include "utils/fmgroids.h"
      33                 : #include "utils/fmgrprotos.h"
      34                 : #include "utils/lsyscache.h"
      35                 : #include "utils/selfuncs.h"
      36                 : #include "utils/syscache.h"
      37                 : #include "utils/typcache.h"
      38                 : 
      39                 : /*
      40                 :  * Computes size of a serialized MCV item, depending on the number of
      41                 :  * dimensions (columns) the statistic is defined on. The datum values are
      42                 :  * stored in a separate array (deduplicated, to minimize the size), and
      43                 :  * so the serialized items only store uint16 indexes into that array.
      44                 :  *
      45                 :  * Each serialized item stores (in this order):
      46                 :  *
      47                 :  * - indexes to values    (ndim * sizeof(uint16))
      48                 :  * - null flags           (ndim * sizeof(bool))
      49                 :  * - frequency            (sizeof(double))
      50                 :  * - base_frequency       (sizeof(double))
      51                 :  *
      52                 :  * There is no alignment padding within an MCV item.
      53                 :  * So in total each MCV item requires this many bytes:
      54                 :  *
      55                 :  *   ndim * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double)
      56                 :  */
      57                 : #define ITEM_SIZE(ndims)    \
      58                 :     ((ndims) * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double))
      59                 : 
      60                 : /*
      61                 :  * Used to compute size of serialized MCV list representation.
      62                 :  */
      63                 : #define MinSizeOfMCVList        \
      64                 :     (VARHDRSZ + sizeof(uint32) * 3 + sizeof(AttrNumber))
      65                 : 
      66                 : /*
      67                 :  * Size of the serialized MCV list, excluding the space needed for
      68                 :  * deduplicated per-dimension values. The macro is meant to be used
      69                 :  * when it's not yet safe to access the serialized info about amount
      70                 :  * of data for each column.
      71                 :  */
      72                 : #define SizeOfMCVList(ndims,nitems) \
      73                 :     ((MinSizeOfMCVList + sizeof(Oid) * (ndims)) + \
      74                 :      ((ndims) * sizeof(DimensionInfo)) + \
      75                 :      ((nitems) * ITEM_SIZE(ndims)))
      76                 : 
      77                 : static MultiSortSupport build_mss(StatsBuildData *data);
      78                 : 
      79                 : static SortItem *build_distinct_groups(int numrows, SortItem *items,
      80                 :                                        MultiSortSupport mss, int *ndistinct);
      81                 : 
      82                 : static SortItem **build_column_frequencies(SortItem *groups, int ngroups,
      83                 :                                            MultiSortSupport mss, int *ncounts);
      84                 : 
      85                 : static int  count_distinct_groups(int numrows, SortItem *items,
      86                 :                                   MultiSortSupport mss);
      87                 : 
      88                 : /*
      89                 :  * Compute new value for bitmap item, considering whether it's used for
      90                 :  * clauses connected by AND/OR.
      91                 :  */
      92                 : #define RESULT_MERGE(value, is_or, match) \
      93                 :     ((is_or) ? ((value) || (match)) : ((value) && (match)))
      94                 : 
      95                 : /*
      96                 :  * When processing a list of clauses, the bitmap item may get set to a value
      97                 :  * such that additional clauses can't change it. For example, when processing
      98                 :  * a list of clauses connected to AND, as soon as the item gets set to 'false'
      99                 :  * then it'll remain like that. Similarly clauses connected by OR and 'true'.
     100                 :  *
     101                 :  * Returns true when the value in the bitmap can't change no matter how the
     102                 :  * remaining clauses are evaluated.
     103                 :  */
     104                 : #define RESULT_IS_FINAL(value, is_or)   ((is_or) ? (value) : (!(value)))
     105                 : 
     106                 : /*
     107                 :  * get_mincount_for_mcv_list
     108                 :  *      Determine the minimum number of times a value needs to appear in
     109                 :  *      the sample for it to be included in the MCV list.
     110                 :  *
     111                 :  * We want to keep only values that appear sufficiently often in the
     112                 :  * sample that it is reasonable to extrapolate their sample frequencies to
     113                 :  * the entire table.  We do this by placing an upper bound on the relative
     114                 :  * standard error of the sample frequency, so that any estimates the
     115                 :  * planner generates from the MCV statistics can be expected to be
     116                 :  * reasonably accurate.
     117                 :  *
     118                 :  * Since we are sampling without replacement, the sample frequency of a
     119                 :  * particular value is described by a hypergeometric distribution.  A
     120                 :  * common rule of thumb when estimating errors in this situation is to
     121                 :  * require at least 10 instances of the value in the sample, in which case
     122                 :  * the distribution can be approximated by a normal distribution, and
     123                 :  * standard error analysis techniques can be applied.  Given a sample size
     124                 :  * of n, a population size of N, and a sample frequency of p=cnt/n, the
     125                 :  * standard error of the proportion p is given by
     126                 :  *      SE = sqrt(p*(1-p)/n) * sqrt((N-n)/(N-1))
     127                 :  * where the second term is the finite population correction.  To get
     128                 :  * reasonably accurate planner estimates, we impose an upper bound on the
     129                 :  * relative standard error of 20% -- i.e., SE/p < 0.2.  This 20% relative
     130                 :  * error bound is fairly arbitrary, but has been found empirically to work
     131                 :  * well.  Rearranging this formula gives a lower bound on the number of
     132                 :  * instances of the value seen:
     133                 :  *      cnt > n*(N-n) / (N-n+0.04*n*(N-1))
     134                 :  * This bound is at most 25, and approaches 0 as n approaches 0 or N. The
     135                 :  * case where n approaches 0 cannot happen in practice, since the sample
     136                 :  * size is at least 300.  The case where n approaches N corresponds to
     137                 :  * sampling the whole table, in which case it is reasonable to keep
     138                 :  * the whole MCV list (have no lower bound), so it makes sense to apply
     139                 :  * this formula for all inputs, even though the above derivation is
     140                 :  * technically only valid when the right hand side is at least around 10.
     141                 :  *
     142                 :  * An alternative way to look at this formula is as follows -- assume that
     143                 :  * the number of instances of the value seen scales up to the entire
     144                 :  * table, so that the population count is K=N*cnt/n. Then the distribution
     145                 :  * in the sample is a hypergeometric distribution parameterised by N, n
     146                 :  * and K, and the bound above is mathematically equivalent to demanding
     147                 :  * that the standard deviation of that distribution is less than 20% of
     148                 :  * its mean.  Thus the relative errors in any planner estimates produced
     149                 :  * from the MCV statistics are likely to be not too large.
     150                 :  */
     151                 : static double
     152 CBC          90 : get_mincount_for_mcv_list(int samplerows, double totalrows)
     153                 : {
     154              90 :     double      n = samplerows;
     155              90 :     double      N = totalrows;
     156                 :     double      numer,
     157                 :                 denom;
     158                 : 
     159              90 :     numer = n * (N - n);
     160              90 :     denom = N - n + 0.04 * n * (N - 1);
     161                 : 
     162                 :     /* Guard against division by zero (possible if n = N = 1) */
     163              90 :     if (denom == 0.0)
     164               6 :         return 0.0;
     165                 : 
     166              84 :     return numer / denom;
     167                 : }
     168                 : 
     169                 : /*
     170                 :  * Builds MCV list from the set of sampled rows.
     171                 :  *
     172                 :  * The algorithm is quite simple:
     173                 :  *
     174                 :  *     (1) sort the data (default collation, '<' for the data type)
     175                 :  *
     176                 :  *     (2) count distinct groups, decide how many to keep
     177                 :  *
     178                 :  *     (3) build the MCV list using the threshold determined in (2)
     179                 :  *
     180                 :  *     (4) remove rows represented by the MCV from the sample
     181                 :  *
     182                 :  */
     183                 : MCVList *
     184              90 : statext_mcv_build(StatsBuildData *data, double totalrows, int stattarget)
     185                 : {
     186                 :     int         i,
     187                 :                 numattrs,
     188                 :                 numrows,
     189                 :                 ngroups,
     190                 :                 nitems;
     191                 :     double      mincount;
     192                 :     SortItem   *items;
     193                 :     SortItem   *groups;
     194              90 :     MCVList    *mcvlist = NULL;
     195                 :     MultiSortSupport mss;
     196                 : 
     197                 :     /* comparator for all the columns */
     198              90 :     mss = build_mss(data);
     199                 : 
     200                 :     /* sort the rows */
     201              90 :     items = build_sorted_items(data, &nitems, mss,
     202                 :                                data->nattnums, data->attnums);
     203                 : 
     204              90 :     if (!items)
     205 UBC           0 :         return NULL;
     206                 : 
     207                 :     /* for convenience */
     208 CBC          90 :     numattrs = data->nattnums;
     209              90 :     numrows = data->numrows;
     210                 : 
     211                 :     /* transform the sorted rows into groups (sorted by frequency) */
     212              90 :     groups = build_distinct_groups(nitems, items, mss, &ngroups);
     213                 : 
     214                 :     /*
     215                 :      * The maximum number of MCV items to store, based on the statistics
     216                 :      * target we computed for the statistics object (from the target set for
     217                 :      * the object itself, attributes and the system default). In any case, we
     218                 :      * can't keep more groups than we have available.
     219                 :      */
     220              90 :     nitems = stattarget;
     221              90 :     if (nitems > ngroups)
     222              54 :         nitems = ngroups;
     223                 : 
     224                 :     /*
     225                 :      * Decide how many items to keep in the MCV list. We can't use the same
     226                 :      * algorithm as per-column MCV lists, because that only considers the
     227                 :      * actual group frequency - but we're primarily interested in how the
     228                 :      * actual frequency differs from the base frequency (product of simple
     229                 :      * per-column frequencies, as if the columns were independent).
     230                 :      *
     231                 :      * Using the same algorithm might exclude items that are close to the
     232                 :      * "average" frequency of the sample. But that does not say whether the
     233                 :      * observed frequency is close to the base frequency or not. We also need
     234                 :      * to consider unexpectedly uncommon items (again, compared to the base
     235                 :      * frequency), and the single-column algorithm does not have to.
     236                 :      *
     237                 :      * We simply decide how many items to keep by computing the minimum count
     238                 :      * using get_mincount_for_mcv_list() and then keep all items that seem to
     239                 :      * be more common than that.
     240                 :      */
     241              90 :     mincount = get_mincount_for_mcv_list(numrows, totalrows);
     242                 : 
     243                 :     /*
     244                 :      * Walk the groups until we find the first group with a count below the
     245                 :      * mincount threshold (the index of that group is the number of groups we
     246                 :      * want to keep).
     247                 :      */
     248            4386 :     for (i = 0; i < nitems; i++)
     249                 :     {
     250            4296 :         if (groups[i].count < mincount)
     251                 :         {
     252 UBC           0 :             nitems = i;
     253               0 :             break;
     254                 :         }
     255                 :     }
     256                 : 
     257                 :     /*
     258                 :      * At this point, we know the number of items for the MCV list. There
     259                 :      * might be none (for uniform distribution with many groups), and in that
     260                 :      * case, there will be no MCV list. Otherwise, construct the MCV list.
     261                 :      */
     262 CBC          90 :     if (nitems > 0)
     263                 :     {
     264                 :         int         j;
     265                 :         SortItem    key;
     266                 :         MultiSortSupport tmp;
     267                 : 
     268                 :         /* frequencies for values in each attribute */
     269                 :         SortItem  **freqs;
     270                 :         int        *nfreqs;
     271                 : 
     272                 :         /* used to search values */
     273              90 :         tmp = (MultiSortSupport) palloc(offsetof(MultiSortSupportData, ssup)
     274                 :                                         + sizeof(SortSupportData));
     275                 : 
     276                 :         /* compute frequencies for values in each column */
     277              90 :         nfreqs = (int *) palloc0(sizeof(int) * numattrs);
     278              90 :         freqs = build_column_frequencies(groups, ngroups, mss, nfreqs);
     279                 : 
     280                 :         /*
     281                 :          * Allocate the MCV list structure, set the global parameters.
     282                 :          */
     283              90 :         mcvlist = (MCVList *) palloc0(offsetof(MCVList, items) +
     284              90 :                                       sizeof(MCVItem) * nitems);
     285                 : 
     286              90 :         mcvlist->magic = STATS_MCV_MAGIC;
     287              90 :         mcvlist->type = STATS_MCV_TYPE_BASIC;
     288              90 :         mcvlist->ndimensions = numattrs;
     289              90 :         mcvlist->nitems = nitems;
     290                 : 
     291                 :         /* store info about data type OIDs */
     292             345 :         for (i = 0; i < numattrs; i++)
     293             255 :             mcvlist->types[i] = data->stats[i]->attrtypid;
     294                 : 
     295                 :         /* Copy the first chunk of groups into the result. */
     296            4386 :         for (i = 0; i < nitems; i++)
     297                 :         {
     298                 :             /* just point to the proper place in the list */
     299            4296 :             MCVItem    *item = &mcvlist->items[i];
     300                 : 
     301            4296 :             item->values = (Datum *) palloc(sizeof(Datum) * numattrs);
     302            4296 :             item->isnull = (bool *) palloc(sizeof(bool) * numattrs);
     303                 : 
     304                 :             /* copy values for the group */
     305            4296 :             memcpy(item->values, groups[i].values, sizeof(Datum) * numattrs);
     306            4296 :             memcpy(item->isnull, groups[i].isnull, sizeof(bool) * numattrs);
     307                 : 
     308                 :             /* groups should be sorted by frequency in descending order */
     309            4296 :             Assert((i == 0) || (groups[i - 1].count >= groups[i].count));
     310                 : 
     311                 :             /* group frequency */
     312            4296 :             item->frequency = (double) groups[i].count / numrows;
     313                 : 
     314                 :             /* base frequency, if the attributes were independent */
     315            4296 :             item->base_frequency = 1.0;
     316           16425 :             for (j = 0; j < numattrs; j++)
     317                 :             {
     318                 :                 SortItem   *freq;
     319                 : 
     320                 :                 /* single dimension */
     321           12129 :                 tmp->ndims = 1;
     322           12129 :                 tmp->ssup[0] = mss->ssup[j];
     323                 : 
     324                 :                 /* fill search key */
     325           12129 :                 key.values = &groups[i].values[j];
     326           12129 :                 key.isnull = &groups[i].isnull[j];
     327                 : 
     328           12129 :                 freq = (SortItem *) bsearch_arg(&key, freqs[j], nfreqs[j],
     329                 :                                                 sizeof(SortItem),
     330                 :                                                 multi_sort_compare, tmp);
     331                 : 
     332           12129 :                 item->base_frequency *= ((double) freq->count) / numrows;
     333                 :             }
     334                 :         }
     335                 : 
     336              90 :         pfree(nfreqs);
     337              90 :         pfree(freqs);
     338                 :     }
     339                 : 
     340              90 :     pfree(items);
     341              90 :     pfree(groups);
     342                 : 
     343              90 :     return mcvlist;
     344                 : }
     345                 : 
     346                 : /*
     347                 :  * build_mss
     348                 :  *      Build a MultiSortSupport for the given StatsBuildData.
     349                 :  */
     350                 : static MultiSortSupport
     351              90 : build_mss(StatsBuildData *data)
     352                 : {
     353                 :     int         i;
     354              90 :     int         numattrs = data->nattnums;
     355                 : 
     356                 :     /* Sort by multiple columns (using array of SortSupport) */
     357              90 :     MultiSortSupport mss = multi_sort_init(numattrs);
     358                 : 
     359                 :     /* prepare the sort functions for all the attributes */
     360             345 :     for (i = 0; i < numattrs; i++)
     361                 :     {
     362             255 :         VacAttrStats *colstat = data->stats[i];
     363                 :         TypeCacheEntry *type;
     364                 : 
     365             255 :         type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
     366             255 :         if (type->lt_opr == InvalidOid) /* shouldn't happen */
     367 UBC           0 :             elog(ERROR, "cache lookup failed for ordering operator for type %u",
     368                 :                  colstat->attrtypid);
     369                 : 
     370 CBC         255 :         multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
     371                 :     }
     372                 : 
     373              90 :     return mss;
     374                 : }
     375                 : 
     376                 : /*
     377                 :  * count_distinct_groups
     378                 :  *      Count distinct combinations of SortItems in the array.
     379                 :  *
     380                 :  * The array is assumed to be sorted according to the MultiSortSupport.
     381                 :  */
     382                 : static int
     383              90 : count_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss)
     384                 : {
     385                 :     int         i;
     386                 :     int         ndistinct;
     387                 : 
     388              90 :     ndistinct = 1;
     389          229509 :     for (i = 1; i < numrows; i++)
     390                 :     {
     391                 :         /* make sure the array really is sorted */
     392          229419 :         Assert(multi_sort_compare(&items[i], &items[i - 1], mss) >= 0);
     393                 : 
     394          229419 :         if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
     395           38856 :             ndistinct += 1;
     396                 :     }
     397                 : 
     398              90 :     return ndistinct;
     399                 : }
     400                 : 
     401                 : /*
     402                 :  * compare_sort_item_count
     403                 :  *      Comparator for sorting items by count (frequencies) in descending
     404                 :  *      order.
     405                 :  */
     406                 : static int
     407           39378 : compare_sort_item_count(const void *a, const void *b, void *arg)
     408                 : {
     409           39378 :     SortItem   *ia = (SortItem *) a;
     410           39378 :     SortItem   *ib = (SortItem *) b;
     411                 : 
     412           39378 :     if (ia->count == ib->count)
     413           38904 :         return 0;
     414             474 :     else if (ia->count > ib->count)
     415             315 :         return -1;
     416                 : 
     417             159 :     return 1;
     418                 : }
     419                 : 
     420                 : /*
     421                 :  * build_distinct_groups
     422                 :  *      Build an array of SortItems for distinct groups and counts matching
     423                 :  *      items.
     424                 :  *
     425                 :  * The 'items' array is assumed to be sorted.
     426                 :  */
     427                 : static SortItem *
     428              90 : build_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss,
     429                 :                       int *ndistinct)
     430                 : {
     431                 :     int         i,
     432                 :                 j;
     433              90 :     int         ngroups = count_distinct_groups(numrows, items, mss);
     434                 : 
     435              90 :     SortItem   *groups = (SortItem *) palloc(ngroups * sizeof(SortItem));
     436                 : 
     437              90 :     j = 0;
     438              90 :     groups[0] = items[0];
     439              90 :     groups[0].count = 1;
     440                 : 
     441          229509 :     for (i = 1; i < numrows; i++)
     442                 :     {
     443                 :         /* Assume sorted in ascending order. */
     444          229419 :         Assert(multi_sort_compare(&items[i], &items[i - 1], mss) >= 0);
     445                 : 
     446                 :         /* New distinct group detected. */
     447          229419 :         if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
     448                 :         {
     449           38856 :             groups[++j] = items[i];
     450           38856 :             groups[j].count = 0;
     451                 :         }
     452                 : 
     453          229419 :         groups[j].count++;
     454                 :     }
     455                 : 
     456                 :     /* ensure we filled the expected number of distinct groups */
     457              90 :     Assert(j + 1 == ngroups);
     458                 : 
     459                 :     /* Sort the distinct groups by frequency (in descending order). */
     460 GNC          90 :     qsort_interruptible(groups, ngroups, sizeof(SortItem),
     461                 :                         compare_sort_item_count, NULL);
     462                 : 
     463 CBC          90 :     *ndistinct = ngroups;
     464              90 :     return groups;
     465                 : }
     466                 : 
     467                 : /* compare sort items (single dimension) */
     468                 : static int
     469          417045 : sort_item_compare(const void *a, const void *b, void *arg)
     470                 : {
     471          417045 :     SortSupport ssup = (SortSupport) arg;
     472          417045 :     SortItem   *ia = (SortItem *) a;
     473          417045 :     SortItem   *ib = (SortItem *) b;
     474                 : 
     475          834090 :     return ApplySortComparator(ia->values[0], ia->isnull[0],
     476          417045 :                                ib->values[0], ib->isnull[0],
     477                 :                                ssup);
     478                 : }
     479                 : 
     480                 : /*
     481                 :  * build_column_frequencies
     482                 :  *      Compute frequencies of values in each column.
     483                 :  *
     484                 :  * This returns an array of SortItems for each attribute the MCV is built
     485                 :  * on, with a frequency (number of occurrences) for each value. This is
     486                 :  * then used to compute "base" frequency of MCV items.
     487                 :  *
     488                 :  * All the memory is allocated in a single chunk, so that a single pfree
     489                 :  * is enough to release it. We do not allocate space for values/isnull
     490                 :  * arrays in the SortItems, because we can simply point into the input
     491                 :  * groups directly.
     492                 :  */
     493                 : static SortItem **
     494              90 : build_column_frequencies(SortItem *groups, int ngroups,
     495                 :                          MultiSortSupport mss, int *ncounts)
     496                 : {
     497                 :     int         i,
     498                 :                 dim;
     499                 :     SortItem  **result;
     500                 :     char       *ptr;
     501                 : 
     502              90 :     Assert(groups);
     503              90 :     Assert(ncounts);
     504                 : 
     505                 :     /* allocate arrays for all columns as a single chunk */
     506              90 :     ptr = palloc(MAXALIGN(sizeof(SortItem *) * mss->ndims) +
     507              90 :                  mss->ndims * MAXALIGN(sizeof(SortItem) * ngroups));
     508                 : 
     509                 :     /* initial array of pointers */
     510              90 :     result = (SortItem **) ptr;
     511              90 :     ptr += MAXALIGN(sizeof(SortItem *) * mss->ndims);
     512                 : 
     513             345 :     for (dim = 0; dim < mss->ndims; dim++)
     514                 :     {
     515             255 :         SortSupport ssup = &mss->ssup[dim];
     516                 : 
     517                 :         /* array of values for a single column */
     518             255 :         result[dim] = (SortItem *) ptr;
     519             255 :         ptr += MAXALIGN(sizeof(SortItem) * ngroups);
     520                 : 
     521                 :         /* extract data for the dimension */
     522          113484 :         for (i = 0; i < ngroups; i++)
     523                 :         {
     524                 :             /* point into the input groups */
     525          113229 :             result[dim][i].values = &groups[i].values[dim];
     526          113229 :             result[dim][i].isnull = &groups[i].isnull[dim];
     527          113229 :             result[dim][i].count = groups[i].count;
     528                 :         }
     529                 : 
     530                 :         /* sort the values, deduplicate */
     531 GNC         255 :         qsort_interruptible(result[dim], ngroups, sizeof(SortItem),
     532                 :                             sort_item_compare, ssup);
     533                 : 
     534                 :         /*
     535                 :          * Identify distinct values, compute frequency (there might be
     536                 :          * multiple MCV items containing this value, so we need to sum counts
     537                 :          * from all of them.
     538                 :          */
     539 CBC         255 :         ncounts[dim] = 1;
     540          113229 :         for (i = 1; i < ngroups; i++)
     541                 :         {
     542          112974 :             if (sort_item_compare(&result[dim][i - 1], &result[dim][i], ssup) == 0)
     543                 :             {
     544           68091 :                 result[dim][ncounts[dim] - 1].count += result[dim][i].count;
     545           68091 :                 continue;
     546                 :             }
     547                 : 
     548           44883 :             result[dim][ncounts[dim]] = result[dim][i];
     549                 : 
     550           44883 :             ncounts[dim]++;
     551                 :         }
     552                 :     }
     553                 : 
     554              90 :     return result;
     555                 : }
     556                 : 
     557                 : /*
     558                 :  * statext_mcv_load
     559                 :  *      Load the MCV list for the indicated pg_statistic_ext_data tuple.
     560                 :  */
     561                 : MCVList *
     562             240 : statext_mcv_load(Oid mvoid, bool inh)
     563                 : {
     564                 :     MCVList    *result;
     565                 :     bool        isnull;
     566                 :     Datum       mcvlist;
     567             240 :     HeapTuple   htup = SearchSysCache2(STATEXTDATASTXOID,
     568                 :                                        ObjectIdGetDatum(mvoid), BoolGetDatum(inh));
     569                 : 
     570             240 :     if (!HeapTupleIsValid(htup))
     571 UBC           0 :         elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
     572                 : 
     573 CBC         240 :     mcvlist = SysCacheGetAttr(STATEXTDATASTXOID, htup,
     574                 :                               Anum_pg_statistic_ext_data_stxdmcv, &isnull);
     575                 : 
     576             240 :     if (isnull)
     577 UBC           0 :         elog(ERROR,
     578                 :              "requested statistics kind \"%c\" is not yet built for statistics object %u",
     579                 :              STATS_EXT_DEPENDENCIES, mvoid);
     580                 : 
     581 CBC         240 :     result = statext_mcv_deserialize(DatumGetByteaP(mcvlist));
     582                 : 
     583             240 :     ReleaseSysCache(htup);
     584                 : 
     585             240 :     return result;
     586                 : }
     587                 : 
     588                 : 
     589                 : /*
     590                 :  * statext_mcv_serialize
     591                 :  *      Serialize MCV list into a pg_mcv_list value.
     592                 :  *
     593                 :  * The MCV items may include values of various data types, and it's reasonable
     594                 :  * to expect redundancy (values for a given attribute, repeated for multiple
     595                 :  * MCV list items). So we deduplicate the values into arrays, and then replace
     596                 :  * the values by indexes into those arrays.
     597                 :  *
     598                 :  * The overall structure of the serialized representation looks like this:
     599                 :  *
     600                 :  * +---------------+----------------+---------------------+-------+
     601                 :  * | header fields | dimension info | deduplicated values | items |
     602                 :  * +---------------+----------------+---------------------+-------+
     603                 :  *
     604                 :  * Where dimension info stores information about the type of the K-th
     605                 :  * attribute (e.g. typlen, typbyval and length of deduplicated values).
     606                 :  * Deduplicated values store deduplicated values for each attribute.  And
     607                 :  * items store the actual MCV list items, with values replaced by indexes into
     608                 :  * the arrays.
     609                 :  *
     610                 :  * When serializing the items, we use uint16 indexes. The number of MCV items
     611                 :  * is limited by the statistics target (which is capped to 10k at the moment).
     612                 :  * We might increase this to 65k and still fit into uint16, so there's a bit of
     613                 :  * slack. Furthermore, this limit is on the number of distinct values per column,
     614                 :  * and we usually have few of those (and various combinations of them for the
     615                 :  * those MCV list). So uint16 seems fine for now.
     616                 :  *
     617                 :  * We don't really expect the serialization to save as much space as for
     618                 :  * histograms, as we are not doing any bucket splits (which is the source
     619                 :  * of high redundancy in histograms).
     620                 :  *
     621                 :  * TODO: Consider packing boolean flags (NULL) for each item into a single char
     622                 :  * (or a longer type) instead of using an array of bool items.
     623                 :  */
     624                 : bytea *
     625              90 : statext_mcv_serialize(MCVList *mcvlist, VacAttrStats **stats)
     626                 : {
     627                 :     int         i;
     628                 :     int         dim;
     629              90 :     int         ndims = mcvlist->ndimensions;
     630                 : 
     631                 :     SortSupport ssup;
     632                 :     DimensionInfo *info;
     633                 : 
     634                 :     Size        total_length;
     635                 : 
     636                 :     /* serialized items (indexes into arrays, etc.) */
     637                 :     bytea      *raw;
     638                 :     char       *ptr;
     639                 :     char       *endptr PG_USED_FOR_ASSERTS_ONLY;
     640                 : 
     641                 :     /* values per dimension (and number of non-NULL values) */
     642              90 :     Datum     **values = (Datum **) palloc0(sizeof(Datum *) * ndims);
     643              90 :     int        *counts = (int *) palloc0(sizeof(int) * ndims);
     644                 : 
     645                 :     /*
     646                 :      * We'll include some rudimentary information about the attribute types
     647                 :      * (length, by-val flag), so that we don't have to look them up while
     648                 :      * deserializing the MCV list (we already have the type OID in the
     649                 :      * header).  This is safe because when changing the type of the attribute
     650                 :      * the statistics gets dropped automatically.  We need to store the info
     651                 :      * about the arrays of deduplicated values anyway.
     652                 :      */
     653              90 :     info = (DimensionInfo *) palloc0(sizeof(DimensionInfo) * ndims);
     654                 : 
     655                 :     /* sort support data for all attributes included in the MCV list */
     656              90 :     ssup = (SortSupport) palloc0(sizeof(SortSupportData) * ndims);
     657                 : 
     658                 :     /* collect and deduplicate values for each dimension (attribute) */
     659             345 :     for (dim = 0; dim < ndims; dim++)
     660                 :     {
     661                 :         int         ndistinct;
     662                 :         TypeCacheEntry *typentry;
     663                 : 
     664                 :         /*
     665                 :          * Lookup the LT operator (can't get it from stats extra_data, as we
     666                 :          * don't know how to interpret that - scalar vs. array etc.).
     667                 :          */
     668             255 :         typentry = lookup_type_cache(stats[dim]->attrtypid, TYPECACHE_LT_OPR);
     669                 : 
     670                 :         /* copy important info about the data type (length, by-value) */
     671             255 :         info[dim].typlen = stats[dim]->attrtype->typlen;
     672             255 :         info[dim].typbyval = stats[dim]->attrtype->typbyval;
     673                 : 
     674                 :         /* allocate space for values in the attribute and collect them */
     675             255 :         values[dim] = (Datum *) palloc0(sizeof(Datum) * mcvlist->nitems);
     676                 : 
     677           12384 :         for (i = 0; i < mcvlist->nitems; i++)
     678                 :         {
     679                 :             /* skip NULL values - we don't need to deduplicate those */
     680           12129 :             if (mcvlist->items[i].isnull[dim])
     681              33 :                 continue;
     682                 : 
     683                 :             /* append the value at the end */
     684           12096 :             values[dim][counts[dim]] = mcvlist->items[i].values[dim];
     685           12096 :             counts[dim] += 1;
     686                 :         }
     687                 : 
     688                 :         /* if there are just NULL values in this dimension, we're done */
     689             255 :         if (counts[dim] == 0)
     690               3 :             continue;
     691                 : 
     692                 :         /* sort and deduplicate the data */
     693             252 :         ssup[dim].ssup_cxt = CurrentMemoryContext;
     694             252 :         ssup[dim].ssup_collation = stats[dim]->attrcollid;
     695             252 :         ssup[dim].ssup_nulls_first = false;
     696                 : 
     697             252 :         PrepareSortSupportFromOrderingOp(typentry->lt_opr, &ssup[dim]);
     698                 : 
     699             252 :         qsort_interruptible(values[dim], counts[dim], sizeof(Datum),
     700             252 :                             compare_scalars_simple, &ssup[dim]);
     701                 : 
     702                 :         /*
     703                 :          * Walk through the array and eliminate duplicate values, but keep the
     704                 :          * ordering (so that we can do a binary search later). We know there's
     705                 :          * at least one item as (counts[dim] != 0), so we can skip the first
     706                 :          * element.
     707                 :          */
     708             252 :         ndistinct = 1;          /* number of distinct values */
     709           12096 :         for (i = 1; i < counts[dim]; i++)
     710                 :         {
     711                 :             /* expect sorted array */
     712           11844 :             Assert(compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]) <= 0);
     713                 : 
     714                 :             /* if the value is the same as the previous one, we can skip it */
     715           11844 :             if (!compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]))
     716            4977 :                 continue;
     717                 : 
     718            6867 :             values[dim][ndistinct] = values[dim][i];
     719            6867 :             ndistinct += 1;
     720                 :         }
     721                 : 
     722                 :         /* we must not exceed PG_UINT16_MAX, as we use uint16 indexes */
     723             252 :         Assert(ndistinct <= PG_UINT16_MAX);
     724                 : 
     725                 :         /*
     726                 :          * Store additional info about the attribute - number of deduplicated
     727                 :          * values, and also size of the serialized data. For fixed-length data
     728                 :          * types this is trivial to compute, for varwidth types we need to
     729                 :          * actually walk the array and sum the sizes.
     730                 :          */
     731             252 :         info[dim].nvalues = ndistinct;
     732                 : 
     733             252 :         if (info[dim].typbyval) /* by-value data types */
     734                 :         {
     735             186 :             info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
     736                 : 
     737                 :             /*
     738                 :              * We copy the data into the MCV item during deserialization, so
     739                 :              * we don't need to allocate any extra space.
     740                 :              */
     741             186 :             info[dim].nbytes_aligned = 0;
     742                 :         }
     743              66 :         else if (info[dim].typlen > 0)   /* fixed-length by-ref */
     744                 :         {
     745                 :             /*
     746                 :              * We don't care about alignment in the serialized data, so we
     747                 :              * pack the data as much as possible. But we also track how much
     748                 :              * data will be needed after deserialization, and in that case we
     749                 :              * need to account for alignment of each item.
     750                 :              *
     751                 :              * Note: As the items are fixed-length, we could easily compute
     752                 :              * this during deserialization, but we do it here anyway.
     753                 :              */
     754              12 :             info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
     755              12 :             info[dim].nbytes_aligned = info[dim].nvalues * MAXALIGN(info[dim].typlen);
     756                 :         }
     757              54 :         else if (info[dim].typlen == -1)    /* varlena */
     758                 :         {
     759              54 :             info[dim].nbytes = 0;
     760              54 :             info[dim].nbytes_aligned = 0;
     761            1437 :             for (i = 0; i < info[dim].nvalues; i++)
     762                 :             {
     763                 :                 Size        len;
     764                 : 
     765                 :                 /*
     766                 :                  * For varlena values, we detoast the values and store the
     767                 :                  * length and data separately. We don't bother with alignment
     768                 :                  * here, which means that during deserialization we need to
     769                 :                  * copy the fields and only access the copies.
     770                 :                  */
     771            1383 :                 values[dim][i] = PointerGetDatum(PG_DETOAST_DATUM(values[dim][i]));
     772                 : 
     773                 :                 /* serialized length (uint32 length + data) */
     774            1383 :                 len = VARSIZE_ANY_EXHDR(values[dim][i]);
     775            1383 :                 info[dim].nbytes += sizeof(uint32); /* length */
     776            1383 :                 info[dim].nbytes += len;    /* value (no header) */
     777                 : 
     778                 :                 /*
     779                 :                  * During deserialization we'll build regular varlena values
     780                 :                  * with full headers, and we need to align them properly.
     781                 :                  */
     782            1383 :                 info[dim].nbytes_aligned += MAXALIGN(VARHDRSZ + len);
     783                 :             }
     784                 :         }
     785 UBC           0 :         else if (info[dim].typlen == -2)    /* cstring */
     786                 :         {
     787               0 :             info[dim].nbytes = 0;
     788               0 :             info[dim].nbytes_aligned = 0;
     789               0 :             for (i = 0; i < info[dim].nvalues; i++)
     790                 :             {
     791                 :                 Size        len;
     792                 : 
     793                 :                 /*
     794                 :                  * cstring is handled similar to varlena - first we store the
     795                 :                  * length as uint32 and then the data. We don't care about
     796                 :                  * alignment, which means that during deserialization we need
     797                 :                  * to copy the fields and only access the copies.
     798                 :                  */
     799                 : 
     800                 :                 /* c-strings include terminator, so +1 byte */
     801               0 :                 len = strlen(DatumGetCString(values[dim][i])) + 1;
     802               0 :                 info[dim].nbytes += sizeof(uint32); /* length */
     803               0 :                 info[dim].nbytes += len;    /* value */
     804                 : 
     805                 :                 /* space needed for properly aligned deserialized copies */
     806               0 :                 info[dim].nbytes_aligned += MAXALIGN(len);
     807                 :             }
     808                 :         }
     809                 : 
     810                 :         /* we know (count>0) so there must be some data */
     811 CBC         252 :         Assert(info[dim].nbytes > 0);
     812                 :     }
     813                 : 
     814                 :     /*
     815                 :      * Now we can finally compute how much space we'll actually need for the
     816                 :      * whole serialized MCV list (varlena header, MCV header, dimension info
     817                 :      * for each attribute, deduplicated values and items).
     818                 :      */
     819              90 :     total_length = (3 * sizeof(uint32)) /* magic + type + nitems */
     820                 :         + sizeof(AttrNumber)    /* ndimensions */
     821              90 :         + (ndims * sizeof(Oid));    /* attribute types */
     822                 : 
     823                 :     /* dimension info */
     824              90 :     total_length += ndims * sizeof(DimensionInfo);
     825                 : 
     826                 :     /* add space for the arrays of deduplicated values */
     827             345 :     for (i = 0; i < ndims; i++)
     828             255 :         total_length += info[i].nbytes;
     829                 : 
     830                 :     /*
     831                 :      * And finally account for the items (those are fixed-length, thanks to
     832                 :      * replacing values with uint16 indexes into the deduplicated arrays).
     833                 :      */
     834              90 :     total_length += mcvlist->nitems * ITEM_SIZE(dim);
     835                 : 
     836                 :     /*
     837                 :      * Allocate space for the whole serialized MCV list (we'll skip bytes, so
     838                 :      * we set them to zero to make the result more compressible).
     839                 :      */
     840              90 :     raw = (bytea *) palloc0(VARHDRSZ + total_length);
     841              90 :     SET_VARSIZE(raw, VARHDRSZ + total_length);
     842                 : 
     843              90 :     ptr = VARDATA(raw);
     844              90 :     endptr = ptr + total_length;
     845                 : 
     846                 :     /* copy the MCV list header fields, one by one */
     847              90 :     memcpy(ptr, &mcvlist->magic, sizeof(uint32));
     848              90 :     ptr += sizeof(uint32);
     849                 : 
     850              90 :     memcpy(ptr, &mcvlist->type, sizeof(uint32));
     851              90 :     ptr += sizeof(uint32);
     852                 : 
     853              90 :     memcpy(ptr, &mcvlist->nitems, sizeof(uint32));
     854              90 :     ptr += sizeof(uint32);
     855                 : 
     856              90 :     memcpy(ptr, &mcvlist->ndimensions, sizeof(AttrNumber));
     857              90 :     ptr += sizeof(AttrNumber);
     858                 : 
     859              90 :     memcpy(ptr, mcvlist->types, sizeof(Oid) * ndims);
     860              90 :     ptr += (sizeof(Oid) * ndims);
     861                 : 
     862                 :     /* store information about the attributes (data amounts, ...) */
     863              90 :     memcpy(ptr, info, sizeof(DimensionInfo) * ndims);
     864              90 :     ptr += sizeof(DimensionInfo) * ndims;
     865                 : 
     866                 :     /* Copy the deduplicated values for all attributes to the output. */
     867             345 :     for (dim = 0; dim < ndims; dim++)
     868                 :     {
     869                 :         /* remember the starting point for Asserts later */
     870             255 :         char       *start PG_USED_FOR_ASSERTS_ONLY = ptr;
     871                 : 
     872            7374 :         for (i = 0; i < info[dim].nvalues; i++)
     873                 :         {
     874            7119 :             Datum       value = values[dim][i];
     875                 : 
     876            7119 :             if (info[dim].typbyval) /* passed by value */
     877                 :             {
     878                 :                 Datum       tmp;
     879                 : 
     880                 :                 /*
     881                 :                  * For byval types, we need to copy just the significant bytes
     882                 :                  * - we can't use memcpy directly, as that assumes
     883                 :                  * little-endian behavior.  store_att_byval does almost what
     884                 :                  * we need, but it requires a properly aligned buffer - the
     885                 :                  * output buffer does not guarantee that. So we simply use a
     886                 :                  * local Datum variable (which guarantees proper alignment),
     887                 :                  * and then copy the value from it.
     888                 :                  */
     889            5181 :                 store_att_byval(&tmp, value, info[dim].typlen);
     890                 : 
     891            5181 :                 memcpy(ptr, &tmp, info[dim].typlen);
     892            5181 :                 ptr += info[dim].typlen;
     893                 :             }
     894            1938 :             else if (info[dim].typlen > 0)   /* passed by reference */
     895                 :             {
     896                 :                 /* no special alignment needed, treated as char array */
     897             555 :                 memcpy(ptr, DatumGetPointer(value), info[dim].typlen);
     898             555 :                 ptr += info[dim].typlen;
     899                 :             }
     900            1383 :             else if (info[dim].typlen == -1)    /* varlena */
     901                 :             {
     902            1383 :                 uint32      len = VARSIZE_ANY_EXHDR(DatumGetPointer(value));
     903                 : 
     904                 :                 /* copy the length */
     905            1383 :                 memcpy(ptr, &len, sizeof(uint32));
     906            1383 :                 ptr += sizeof(uint32);
     907                 : 
     908                 :                 /* data from the varlena value (without the header) */
     909            1383 :                 memcpy(ptr, VARDATA_ANY(DatumGetPointer(value)), len);
     910            1383 :                 ptr += len;
     911                 :             }
     912 UBC           0 :             else if (info[dim].typlen == -2)    /* cstring */
     913                 :             {
     914               0 :                 uint32      len = (uint32) strlen(DatumGetCString(value)) + 1;
     915                 : 
     916                 :                 /* copy the length */
     917               0 :                 memcpy(ptr, &len, sizeof(uint32));
     918               0 :                 ptr += sizeof(uint32);
     919                 : 
     920                 :                 /* value */
     921               0 :                 memcpy(ptr, DatumGetCString(value), len);
     922               0 :                 ptr += len;
     923                 :             }
     924                 : 
     925                 :             /* no underflows or overflows */
     926 CBC        7119 :             Assert((ptr > start) && ((ptr - start) <= info[dim].nbytes));
     927                 :         }
     928                 : 
     929                 :         /* we should get exactly nbytes of data for this dimension */
     930             255 :         Assert((ptr - start) == info[dim].nbytes);
     931                 :     }
     932                 : 
     933                 :     /* Serialize the items, with uint16 indexes instead of the values. */
     934            4386 :     for (i = 0; i < mcvlist->nitems; i++)
     935                 :     {
     936            4296 :         MCVItem    *mcvitem = &mcvlist->items[i];
     937                 : 
     938                 :         /* don't write beyond the allocated space */
     939            4296 :         Assert(ptr <= (endptr - ITEM_SIZE(dim)));
     940                 : 
     941                 :         /* copy NULL and frequency flags into the serialized MCV */
     942            4296 :         memcpy(ptr, mcvitem->isnull, sizeof(bool) * ndims);
     943            4296 :         ptr += sizeof(bool) * ndims;
     944                 : 
     945            4296 :         memcpy(ptr, &mcvitem->frequency, sizeof(double));
     946            4296 :         ptr += sizeof(double);
     947                 : 
     948            4296 :         memcpy(ptr, &mcvitem->base_frequency, sizeof(double));
     949            4296 :         ptr += sizeof(double);
     950                 : 
     951                 :         /* store the indexes last */
     952           16425 :         for (dim = 0; dim < ndims; dim++)
     953                 :         {
     954           12129 :             uint16      index = 0;
     955                 :             Datum      *value;
     956                 : 
     957                 :             /* do the lookup only for non-NULL values */
     958           12129 :             if (!mcvitem->isnull[dim])
     959                 :             {
     960           12096 :                 value = (Datum *) bsearch_arg(&mcvitem->values[dim], values[dim],
     961           12096 :                                               info[dim].nvalues, sizeof(Datum),
     962           12096 :                                               compare_scalars_simple, &ssup[dim]);
     963                 : 
     964           12096 :                 Assert(value != NULL);  /* serialization or deduplication
     965                 :                                          * error */
     966                 : 
     967                 :                 /* compute index within the deduplicated array */
     968           12096 :                 index = (uint16) (value - values[dim]);
     969                 : 
     970                 :                 /* check the index is within expected bounds */
     971           12096 :                 Assert(index < info[dim].nvalues);
     972                 :             }
     973                 : 
     974                 :             /* copy the index into the serialized MCV */
     975           12129 :             memcpy(ptr, &index, sizeof(uint16));
     976           12129 :             ptr += sizeof(uint16);
     977                 :         }
     978                 : 
     979                 :         /* make sure we don't overflow the allocated value */
     980            4296 :         Assert(ptr <= endptr);
     981                 :     }
     982                 : 
     983                 :     /* at this point we expect to match the total_length exactly */
     984              90 :     Assert(ptr == endptr);
     985                 : 
     986              90 :     pfree(values);
     987              90 :     pfree(counts);
     988                 : 
     989              90 :     return raw;
     990                 : }
     991                 : 
     992                 : /*
     993                 :  * statext_mcv_deserialize
     994                 :  *      Reads serialized MCV list into MCVList structure.
     995                 :  *
     996                 :  * All the memory needed by the MCV list is allocated as a single chunk, so
     997                 :  * it's possible to simply pfree() it at once.
     998                 :  */
     999                 : MCVList *
    1000             246 : statext_mcv_deserialize(bytea *data)
    1001                 : {
    1002                 :     int         dim,
    1003                 :                 i;
    1004                 :     Size        expected_size;
    1005                 :     MCVList    *mcvlist;
    1006                 :     char       *raw;
    1007                 :     char       *ptr;
    1008                 :     char       *endptr PG_USED_FOR_ASSERTS_ONLY;
    1009                 : 
    1010                 :     int         ndims,
    1011                 :                 nitems;
    1012             246 :     DimensionInfo *info = NULL;
    1013                 : 
    1014                 :     /* local allocation buffer (used only for deserialization) */
    1015             246 :     Datum     **map = NULL;
    1016                 : 
    1017                 :     /* MCV list */
    1018                 :     Size        mcvlen;
    1019                 : 
    1020                 :     /* buffer used for the result */
    1021                 :     Size        datalen;
    1022                 :     char       *dataptr;
    1023                 :     char       *valuesptr;
    1024                 :     char       *isnullptr;
    1025                 : 
    1026             246 :     if (data == NULL)
    1027 UBC           0 :         return NULL;
    1028                 : 
    1029                 :     /*
    1030                 :      * We can't possibly deserialize a MCV list if there's not even a complete
    1031                 :      * header. We need an explicit formula here, because we serialize the
    1032                 :      * header fields one by one, so we need to ignore struct alignment.
    1033                 :      */
    1034 CBC         246 :     if (VARSIZE_ANY(data) < MinSizeOfMCVList)
    1035 UNC           0 :         elog(ERROR, "invalid MCV size %zu (expected at least %zu)",
    1036                 :              VARSIZE_ANY(data), MinSizeOfMCVList);
    1037                 : 
    1038                 :     /* read the MCV list header */
    1039 CBC         246 :     mcvlist = (MCVList *) palloc0(offsetof(MCVList, items));
    1040                 : 
    1041                 :     /* pointer to the data part (skip the varlena header) */
    1042             246 :     raw = (char *) data;
    1043             246 :     ptr = VARDATA_ANY(raw);
    1044             246 :     endptr = (char *) raw + VARSIZE_ANY(data);
    1045                 : 
    1046                 :     /* get the header and perform further sanity checks */
    1047             246 :     memcpy(&mcvlist->magic, ptr, sizeof(uint32));
    1048             246 :     ptr += sizeof(uint32);
    1049                 : 
    1050             246 :     memcpy(&mcvlist->type, ptr, sizeof(uint32));
    1051             246 :     ptr += sizeof(uint32);
    1052                 : 
    1053             246 :     memcpy(&mcvlist->nitems, ptr, sizeof(uint32));
    1054             246 :     ptr += sizeof(uint32);
    1055                 : 
    1056             246 :     memcpy(&mcvlist->ndimensions, ptr, sizeof(AttrNumber));
    1057             246 :     ptr += sizeof(AttrNumber);
    1058                 : 
    1059             246 :     if (mcvlist->magic != STATS_MCV_MAGIC)
    1060 UBC           0 :         elog(ERROR, "invalid MCV magic %u (expected %u)",
    1061                 :              mcvlist->magic, STATS_MCV_MAGIC);
    1062                 : 
    1063 CBC         246 :     if (mcvlist->type != STATS_MCV_TYPE_BASIC)
    1064 UBC           0 :         elog(ERROR, "invalid MCV type %u (expected %u)",
    1065                 :              mcvlist->type, STATS_MCV_TYPE_BASIC);
    1066                 : 
    1067 CBC         246 :     if (mcvlist->ndimensions == 0)
    1068 UBC           0 :         elog(ERROR, "invalid zero-length dimension array in MCVList");
    1069 CBC         246 :     else if ((mcvlist->ndimensions > STATS_MAX_DIMENSIONS) ||
    1070             246 :              (mcvlist->ndimensions < 0))
    1071 UBC           0 :         elog(ERROR, "invalid length (%d) dimension array in MCVList",
    1072                 :              mcvlist->ndimensions);
    1073                 : 
    1074 CBC         246 :     if (mcvlist->nitems == 0)
    1075 UBC           0 :         elog(ERROR, "invalid zero-length item array in MCVList");
    1076 CBC         246 :     else if (mcvlist->nitems > STATS_MCVLIST_MAX_ITEMS)
    1077 UBC           0 :         elog(ERROR, "invalid length (%u) item array in MCVList",
    1078                 :              mcvlist->nitems);
    1079                 : 
    1080 CBC         246 :     nitems = mcvlist->nitems;
    1081             246 :     ndims = mcvlist->ndimensions;
    1082                 : 
    1083                 :     /*
    1084                 :      * Check amount of data including DimensionInfo for all dimensions and
    1085                 :      * also the serialized items (including uint16 indexes). Also, walk
    1086                 :      * through the dimension information and add it to the sum.
    1087                 :      */
    1088             246 :     expected_size = SizeOfMCVList(ndims, nitems);
    1089                 : 
    1090                 :     /*
    1091                 :      * Check that we have at least the dimension and info records, along with
    1092                 :      * the items. We don't know the size of the serialized values yet. We need
    1093                 :      * to do this check first, before accessing the dimension info.
    1094                 :      */
    1095             246 :     if (VARSIZE_ANY(data) < expected_size)
    1096 UNC           0 :         elog(ERROR, "invalid MCV size %zu (expected %zu)",
    1097                 :              VARSIZE_ANY(data), expected_size);
    1098                 : 
    1099                 :     /* Now copy the array of type Oids. */
    1100 CBC         246 :     memcpy(mcvlist->types, ptr, sizeof(Oid) * ndims);
    1101             246 :     ptr += (sizeof(Oid) * ndims);
    1102                 : 
    1103                 :     /* Now it's safe to access the dimension info. */
    1104             246 :     info = palloc(ndims * sizeof(DimensionInfo));
    1105                 : 
    1106             246 :     memcpy(info, ptr, ndims * sizeof(DimensionInfo));
    1107             246 :     ptr += (ndims * sizeof(DimensionInfo));
    1108                 : 
    1109                 :     /* account for the value arrays */
    1110            1059 :     for (dim = 0; dim < ndims; dim++)
    1111                 :     {
    1112                 :         /*
    1113                 :          * XXX I wonder if we can/should rely on asserts here. Maybe those
    1114                 :          * checks should be done every time?
    1115                 :          */
    1116             813 :         Assert(info[dim].nvalues >= 0);
    1117             813 :         Assert(info[dim].nbytes >= 0);
    1118                 : 
    1119             813 :         expected_size += info[dim].nbytes;
    1120                 :     }
    1121                 : 
    1122                 :     /*
    1123                 :      * Now we know the total expected MCV size, including all the pieces
    1124                 :      * (header, dimension info. items and deduplicated data). So do the final
    1125                 :      * check on size.
    1126                 :      */
    1127             246 :     if (VARSIZE_ANY(data) != expected_size)
    1128 UNC           0 :         elog(ERROR, "invalid MCV size %zu (expected %zu)",
    1129                 :              VARSIZE_ANY(data), expected_size);
    1130                 : 
    1131                 :     /*
    1132                 :      * We need an array of Datum values for each dimension, so that we can
    1133                 :      * easily translate the uint16 indexes later. We also need a top-level
    1134                 :      * array of pointers to those per-dimension arrays.
    1135                 :      *
    1136                 :      * While allocating the arrays for dimensions, compute how much space we
    1137                 :      * need for a copy of the by-ref data, as we can't simply point to the
    1138                 :      * original values (it might go away).
    1139                 :      */
    1140 CBC         246 :     datalen = 0;                /* space for by-ref data */
    1141             246 :     map = (Datum **) palloc(ndims * sizeof(Datum *));
    1142                 : 
    1143            1059 :     for (dim = 0; dim < ndims; dim++)
    1144                 :     {
    1145             813 :         map[dim] = (Datum *) palloc(sizeof(Datum) * info[dim].nvalues);
    1146                 : 
    1147                 :         /* space needed for a copy of data for by-ref types */
    1148             813 :         datalen += info[dim].nbytes_aligned;
    1149                 :     }
    1150                 : 
    1151                 :     /*
    1152                 :      * Now resize the MCV list so that the allocation includes all the data.
    1153                 :      *
    1154                 :      * Allocate space for a copy of the data, as we can't simply reference the
    1155                 :      * serialized data - it's not aligned properly, and it may disappear while
    1156                 :      * we're still using the MCV list, e.g. due to catcache release.
    1157                 :      *
    1158                 :      * We do care about alignment here, because we will allocate all the
    1159                 :      * pieces at once, but then use pointers to different parts.
    1160                 :      */
    1161             246 :     mcvlen = MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
    1162                 : 
    1163                 :     /* arrays of values and isnull flags for all MCV items */
    1164             246 :     mcvlen += nitems * MAXALIGN(sizeof(Datum) * ndims);
    1165             246 :     mcvlen += nitems * MAXALIGN(sizeof(bool) * ndims);
    1166                 : 
    1167                 :     /* we don't quite need to align this, but it makes some asserts easier */
    1168             246 :     mcvlen += MAXALIGN(datalen);
    1169                 : 
    1170                 :     /* now resize the deserialized MCV list, and compute pointers to parts */
    1171             246 :     mcvlist = repalloc(mcvlist, mcvlen);
    1172                 : 
    1173                 :     /* pointer to the beginning of values/isnull arrays */
    1174             246 :     valuesptr = (char *) mcvlist
    1175             246 :         + MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
    1176                 : 
    1177             246 :     isnullptr = valuesptr + (nitems * MAXALIGN(sizeof(Datum) * ndims));
    1178                 : 
    1179             246 :     dataptr = isnullptr + (nitems * MAXALIGN(sizeof(bool) * ndims));
    1180                 : 
    1181                 :     /*
    1182                 :      * Build mapping (index => value) for translating the serialized data into
    1183                 :      * the in-memory representation.
    1184                 :      */
    1185            1059 :     for (dim = 0; dim < ndims; dim++)
    1186                 :     {
    1187                 :         /* remember start position in the input array */
    1188             813 :         char       *start PG_USED_FOR_ASSERTS_ONLY = ptr;
    1189                 : 
    1190             813 :         if (info[dim].typbyval)
    1191                 :         {
    1192                 :             /* for by-val types we simply copy data into the mapping */
    1193           21963 :             for (i = 0; i < info[dim].nvalues; i++)
    1194                 :             {
    1195           21429 :                 Datum       v = 0;
    1196                 : 
    1197           21429 :                 memcpy(&v, ptr, info[dim].typlen);
    1198           21429 :                 ptr += info[dim].typlen;
    1199                 : 
    1200           21429 :                 map[dim][i] = fetch_att(&v, true, info[dim].typlen);
    1201                 : 
    1202                 :                 /* no under/overflow of input array */
    1203           21429 :                 Assert(ptr <= (start + info[dim].nbytes));
    1204                 :             }
    1205                 :         }
    1206                 :         else
    1207                 :         {
    1208                 :             /* for by-ref types we need to also make a copy of the data */
    1209                 : 
    1210                 :             /* passed by reference, but fixed length (name, tid, ...) */
    1211             279 :             if (info[dim].typlen > 0)
    1212                 :             {
    1213            1101 :                 for (i = 0; i < info[dim].nvalues; i++)
    1214                 :                 {
    1215            1080 :                     memcpy(dataptr, ptr, info[dim].typlen);
    1216            1080 :                     ptr += info[dim].typlen;
    1217                 : 
    1218                 :                     /* just point into the array */
    1219            1080 :                     map[dim][i] = PointerGetDatum(dataptr);
    1220            1080 :                     dataptr += MAXALIGN(info[dim].typlen);
    1221                 :                 }
    1222                 :             }
    1223             258 :             else if (info[dim].typlen == -1)
    1224                 :             {
    1225                 :                 /* varlena */
    1226            7494 :                 for (i = 0; i < info[dim].nvalues; i++)
    1227                 :                 {
    1228                 :                     uint32      len;
    1229                 : 
    1230                 :                     /* read the uint32 length */
    1231            7236 :                     memcpy(&len, ptr, sizeof(uint32));
    1232            7236 :                     ptr += sizeof(uint32);
    1233                 : 
    1234                 :                     /* the length is data-only */
    1235            7236 :                     SET_VARSIZE(dataptr, len + VARHDRSZ);
    1236            7236 :                     memcpy(VARDATA(dataptr), ptr, len);
    1237            7236 :                     ptr += len;
    1238                 : 
    1239                 :                     /* just point into the array */
    1240            7236 :                     map[dim][i] = PointerGetDatum(dataptr);
    1241                 : 
    1242                 :                     /* skip to place of the next deserialized value */
    1243            7236 :                     dataptr += MAXALIGN(len + VARHDRSZ);
    1244                 :                 }
    1245                 :             }
    1246 UBC           0 :             else if (info[dim].typlen == -2)
    1247                 :             {
    1248                 :                 /* cstring */
    1249               0 :                 for (i = 0; i < info[dim].nvalues; i++)
    1250                 :                 {
    1251                 :                     uint32      len;
    1252                 : 
    1253               0 :                     memcpy(&len, ptr, sizeof(uint32));
    1254               0 :                     ptr += sizeof(uint32);
    1255                 : 
    1256               0 :                     memcpy(dataptr, ptr, len);
    1257               0 :                     ptr += len;
    1258                 : 
    1259                 :                     /* just point into the array */
    1260               0 :                     map[dim][i] = PointerGetDatum(dataptr);
    1261               0 :                     dataptr += MAXALIGN(len);
    1262                 :                 }
    1263                 :             }
    1264                 : 
    1265                 :             /* no under/overflow of input array */
    1266 CBC         279 :             Assert(ptr <= (start + info[dim].nbytes));
    1267                 : 
    1268                 :             /* no overflow of the output mcv value */
    1269             279 :             Assert(dataptr <= ((char *) mcvlist + mcvlen));
    1270                 :         }
    1271                 : 
    1272                 :         /* check we consumed input data for this dimension exactly */
    1273             813 :         Assert(ptr == (start + info[dim].nbytes));
    1274                 :     }
    1275                 : 
    1276                 :     /* we should have also filled the MCV list exactly */
    1277             246 :     Assert(dataptr == ((char *) mcvlist + mcvlen));
    1278                 : 
    1279                 :     /* deserialize the MCV items and translate the indexes to Datums */
    1280           15219 :     for (i = 0; i < nitems; i++)
    1281                 :     {
    1282           14973 :         MCVItem    *item = &mcvlist->items[i];
    1283                 : 
    1284           14973 :         item->values = (Datum *) valuesptr;
    1285           14973 :         valuesptr += MAXALIGN(sizeof(Datum) * ndims);
    1286                 : 
    1287           14973 :         item->isnull = (bool *) isnullptr;
    1288           14973 :         isnullptr += MAXALIGN(sizeof(bool) * ndims);
    1289                 : 
    1290           14973 :         memcpy(item->isnull, ptr, sizeof(bool) * ndims);
    1291           14973 :         ptr += sizeof(bool) * ndims;
    1292                 : 
    1293           14973 :         memcpy(&item->frequency, ptr, sizeof(double));
    1294           14973 :         ptr += sizeof(double);
    1295                 : 
    1296           14973 :         memcpy(&item->base_frequency, ptr, sizeof(double));
    1297           14973 :         ptr += sizeof(double);
    1298                 : 
    1299                 :         /* finally translate the indexes (for non-NULL only) */
    1300           66894 :         for (dim = 0; dim < ndims; dim++)
    1301                 :         {
    1302                 :             uint16      index;
    1303                 : 
    1304           51921 :             memcpy(&index, ptr, sizeof(uint16));
    1305           51921 :             ptr += sizeof(uint16);
    1306                 : 
    1307           51921 :             if (item->isnull[dim])
    1308             153 :                 continue;
    1309                 : 
    1310           51768 :             item->values[dim] = map[dim][index];
    1311                 :         }
    1312                 : 
    1313                 :         /* check we're not overflowing the input */
    1314           14973 :         Assert(ptr <= endptr);
    1315                 :     }
    1316                 : 
    1317                 :     /* check that we processed all the data */
    1318             246 :     Assert(ptr == endptr);
    1319                 : 
    1320                 :     /* release the buffers used for mapping */
    1321            1059 :     for (dim = 0; dim < ndims; dim++)
    1322             813 :         pfree(map[dim]);
    1323                 : 
    1324             246 :     pfree(map);
    1325                 : 
    1326             246 :     return mcvlist;
    1327                 : }
    1328                 : 
    1329                 : /*
    1330                 :  * SRF with details about buckets of a histogram:
    1331                 :  *
    1332                 :  * - item ID (0...nitems)
    1333                 :  * - values (string array)
    1334                 :  * - nulls only (boolean array)
    1335                 :  * - frequency (double precision)
    1336                 :  * - base_frequency (double precision)
    1337                 :  *
    1338                 :  * The input is the OID of the statistics, and there are no rows returned if
    1339                 :  * the statistics contains no histogram.
    1340                 :  */
    1341                 : Datum
    1342              15 : pg_stats_ext_mcvlist_items(PG_FUNCTION_ARGS)
    1343                 : {
    1344                 :     FuncCallContext *funcctx;
    1345                 : 
    1346                 :     /* stuff done only on the first call of the function */
    1347              15 :     if (SRF_IS_FIRSTCALL())
    1348                 :     {
    1349                 :         MemoryContext oldcontext;
    1350                 :         MCVList    *mcvlist;
    1351                 :         TupleDesc   tupdesc;
    1352                 : 
    1353                 :         /* create a function context for cross-call persistence */
    1354               6 :         funcctx = SRF_FIRSTCALL_INIT();
    1355                 : 
    1356                 :         /* switch to memory context appropriate for multiple function calls */
    1357               6 :         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
    1358                 : 
    1359               6 :         mcvlist = statext_mcv_deserialize(PG_GETARG_BYTEA_P(0));
    1360                 : 
    1361               6 :         funcctx->user_fctx = mcvlist;
    1362                 : 
    1363                 :         /* total number of tuples to be returned */
    1364               6 :         funcctx->max_calls = 0;
    1365               6 :         if (funcctx->user_fctx != NULL)
    1366               6 :             funcctx->max_calls = mcvlist->nitems;
    1367                 : 
    1368                 :         /* Build a tuple descriptor for our result type */
    1369               6 :         if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
    1370 UBC           0 :             ereport(ERROR,
    1371                 :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1372                 :                      errmsg("function returning record called in context "
    1373                 :                             "that cannot accept type record")));
    1374 CBC           6 :         tupdesc = BlessTupleDesc(tupdesc);
    1375                 : 
    1376                 :         /*
    1377                 :          * generate attribute metadata needed later to produce tuples from raw
    1378                 :          * C strings
    1379                 :          */
    1380               6 :         funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
    1381                 : 
    1382               6 :         MemoryContextSwitchTo(oldcontext);
    1383                 :     }
    1384                 : 
    1385                 :     /* stuff done on every call of the function */
    1386              15 :     funcctx = SRF_PERCALL_SETUP();
    1387                 : 
    1388              15 :     if (funcctx->call_cntr < funcctx->max_calls)   /* do when there is more
    1389                 :                                                      * left to send */
    1390                 :     {
    1391                 :         Datum       values[5];
    1392                 :         bool        nulls[5];
    1393                 :         HeapTuple   tuple;
    1394                 :         Datum       result;
    1395               9 :         ArrayBuildState *astate_values = NULL;
    1396               9 :         ArrayBuildState *astate_nulls = NULL;
    1397                 : 
    1398                 :         int         i;
    1399                 :         MCVList    *mcvlist;
    1400                 :         MCVItem    *item;
    1401                 : 
    1402               9 :         mcvlist = (MCVList *) funcctx->user_fctx;
    1403                 : 
    1404               9 :         Assert(funcctx->call_cntr < mcvlist->nitems);
    1405                 : 
    1406               9 :         item = &mcvlist->items[funcctx->call_cntr];
    1407                 : 
    1408              36 :         for (i = 0; i < mcvlist->ndimensions; i++)
    1409                 :         {
    1410                 : 
    1411              27 :             astate_nulls = accumArrayResult(astate_nulls,
    1412              27 :                                             BoolGetDatum(item->isnull[i]),
    1413                 :                                             false,
    1414                 :                                             BOOLOID,
    1415                 :                                             CurrentMemoryContext);
    1416                 : 
    1417              27 :             if (!item->isnull[i])
    1418                 :             {
    1419                 :                 bool        isvarlena;
    1420                 :                 Oid         outfunc;
    1421                 :                 FmgrInfo    fmgrinfo;
    1422                 :                 Datum       val;
    1423                 :                 text       *txt;
    1424                 : 
    1425                 :                 /* lookup output func for the type */
    1426              15 :                 getTypeOutputInfo(mcvlist->types[i], &outfunc, &isvarlena);
    1427              15 :                 fmgr_info(outfunc, &fmgrinfo);
    1428                 : 
    1429              15 :                 val = FunctionCall1(&fmgrinfo, item->values[i]);
    1430              15 :                 txt = cstring_to_text(DatumGetPointer(val));
    1431                 : 
    1432              15 :                 astate_values = accumArrayResult(astate_values,
    1433                 :                                                  PointerGetDatum(txt),
    1434                 :                                                  false,
    1435                 :                                                  TEXTOID,
    1436                 :                                                  CurrentMemoryContext);
    1437                 :             }
    1438                 :             else
    1439              12 :                 astate_values = accumArrayResult(astate_values,
    1440                 :                                                  (Datum) 0,
    1441                 :                                                  true,
    1442                 :                                                  TEXTOID,
    1443                 :                                                  CurrentMemoryContext);
    1444                 :         }
    1445                 : 
    1446               9 :         values[0] = Int32GetDatum(funcctx->call_cntr);
    1447 GNC           9 :         values[1] = makeArrayResult(astate_values, CurrentMemoryContext);
    1448               9 :         values[2] = makeArrayResult(astate_nulls, CurrentMemoryContext);
    1449 CBC           9 :         values[3] = Float8GetDatum(item->frequency);
    1450               9 :         values[4] = Float8GetDatum(item->base_frequency);
    1451                 : 
    1452                 :         /* no NULLs in the tuple */
    1453               9 :         memset(nulls, 0, sizeof(nulls));
    1454                 : 
    1455                 :         /* build a tuple */
    1456               9 :         tuple = heap_form_tuple(funcctx->attinmeta->tupdesc, values, nulls);
    1457                 : 
    1458                 :         /* make the tuple into a datum */
    1459               9 :         result = HeapTupleGetDatum(tuple);
    1460                 : 
    1461               9 :         SRF_RETURN_NEXT(funcctx, result);
    1462                 :     }
    1463                 :     else                        /* do when there is no more left */
    1464                 :     {
    1465               6 :         SRF_RETURN_DONE(funcctx);
    1466                 :     }
    1467                 : }
    1468                 : 
    1469                 : /*
    1470                 :  * pg_mcv_list_in       - input routine for type pg_mcv_list.
    1471                 :  *
    1472                 :  * pg_mcv_list is real enough to be a table column, but it has no operations
    1473                 :  * of its own, and disallows input too
    1474                 :  */
    1475                 : Datum
    1476 UBC           0 : pg_mcv_list_in(PG_FUNCTION_ARGS)
    1477                 : {
    1478                 :     /*
    1479                 :      * pg_mcv_list stores the data in binary form and parsing text input is
    1480                 :      * not needed, so disallow this.
    1481                 :      */
    1482               0 :     ereport(ERROR,
    1483                 :             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1484                 :              errmsg("cannot accept a value of type %s", "pg_mcv_list")));
    1485                 : 
    1486                 :     PG_RETURN_VOID();           /* keep compiler quiet */
    1487                 : }
    1488                 : 
    1489                 : 
    1490                 : /*
    1491                 :  * pg_mcv_list_out      - output routine for type pg_mcv_list.
    1492                 :  *
    1493                 :  * MCV lists are serialized into a bytea value, so we simply call byteaout()
    1494                 :  * to serialize the value into text. But it'd be nice to serialize that into
    1495                 :  * a meaningful representation (e.g. for inspection by people).
    1496                 :  *
    1497                 :  * XXX This should probably return something meaningful, similar to what
    1498                 :  * pg_dependencies_out does. Not sure how to deal with the deduplicated
    1499                 :  * values, though - do we want to expand that or not?
    1500                 :  */
    1501                 : Datum
    1502               0 : pg_mcv_list_out(PG_FUNCTION_ARGS)
    1503                 : {
    1504               0 :     return byteaout(fcinfo);
    1505                 : }
    1506                 : 
    1507                 : /*
    1508                 :  * pg_mcv_list_recv     - binary input routine for type pg_mcv_list.
    1509                 :  */
    1510                 : Datum
    1511               0 : pg_mcv_list_recv(PG_FUNCTION_ARGS)
    1512                 : {
    1513               0 :     ereport(ERROR,
    1514                 :             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1515                 :              errmsg("cannot accept a value of type %s", "pg_mcv_list")));
    1516                 : 
    1517                 :     PG_RETURN_VOID();           /* keep compiler quiet */
    1518                 : }
    1519                 : 
    1520                 : /*
    1521                 :  * pg_mcv_list_send     - binary output routine for type pg_mcv_list.
    1522                 :  *
    1523                 :  * MCV lists are serialized in a bytea value (although the type is named
    1524                 :  * differently), so let's just send that.
    1525                 :  */
    1526                 : Datum
    1527               0 : pg_mcv_list_send(PG_FUNCTION_ARGS)
    1528                 : {
    1529               0 :     return byteasend(fcinfo);
    1530                 : }
    1531                 : 
    1532                 : /*
    1533                 :  * match the attribute/expression to a dimension of the statistic
    1534                 :  *
    1535                 :  * Returns the zero-based index of the matching statistics dimension.
    1536                 :  * Optionally determines the collation.
    1537                 :  */
    1538                 : static int
    1539 CBC         576 : mcv_match_expression(Node *expr, Bitmapset *keys, List *exprs, Oid *collid)
    1540                 : {
    1541                 :     int         idx;
    1542                 : 
    1543             576 :     if (IsA(expr, Var))
    1544                 :     {
    1545                 :         /* simple Var, so just lookup using varattno */
    1546             453 :         Var        *var = (Var *) expr;
    1547                 : 
    1548             453 :         if (collid)
    1549             420 :             *collid = var->varcollid;
    1550                 : 
    1551             453 :         idx = bms_member_index(keys, var->varattno);
    1552                 : 
    1553             453 :         if (idx < 0)
    1554 UBC           0 :             elog(ERROR, "variable not found in statistics object");
    1555                 :     }
    1556                 :     else
    1557                 :     {
    1558                 :         /* expression - lookup in stats expressions */
    1559                 :         ListCell   *lc;
    1560                 : 
    1561 CBC         123 :         if (collid)
    1562             120 :             *collid = exprCollation(expr);
    1563                 : 
    1564                 :         /* expressions are stored after the simple columns */
    1565             123 :         idx = bms_num_members(keys);
    1566             228 :         foreach(lc, exprs)
    1567                 :         {
    1568             228 :             Node       *stat_expr = (Node *) lfirst(lc);
    1569                 : 
    1570             228 :             if (equal(expr, stat_expr))
    1571             123 :                 break;
    1572                 : 
    1573             105 :             idx++;
    1574                 :         }
    1575                 : 
    1576             123 :         if (lc == NULL)
    1577 UBC           0 :             elog(ERROR, "expression not found in statistics object");
    1578                 :     }
    1579                 : 
    1580 CBC         576 :     return idx;
    1581                 : }
    1582                 : 
    1583                 : /*
    1584                 :  * mcv_get_match_bitmap
    1585                 :  *  Evaluate clauses using the MCV list, and update the match bitmap.
    1586                 :  *
    1587                 :  * A match bitmap keeps match/mismatch status for each MCV item, and we
    1588                 :  * update it based on additional clauses. We also use it to skip items
    1589                 :  * that can't possibly match (e.g. item marked as "mismatch" can't change
    1590                 :  * to "match" when evaluating AND clause list).
    1591                 :  *
    1592                 :  * The function also returns a flag indicating whether there was an
    1593                 :  * equality condition for all attributes, the minimum frequency in the MCV
    1594                 :  * list, and a total MCV frequency (sum of frequencies for all items).
    1595                 :  *
    1596                 :  * XXX Currently the match bitmap uses a bool for each MCV item, which is
    1597                 :  * somewhat wasteful as we could do with just a single bit, thus reducing
    1598                 :  * the size to ~1/8. It would also allow us to combine bitmaps simply using
    1599                 :  * & and |, which should be faster than min/max. The bitmaps are fairly
    1600                 :  * small, though (thanks to the cap on the MCV list size).
    1601                 :  */
    1602                 : static bool *
    1603             357 : mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
    1604                 :                      Bitmapset *keys, List *exprs,
    1605                 :                      MCVList *mcvlist, bool is_or)
    1606                 : {
    1607                 :     ListCell   *l;
    1608                 :     bool       *matches;
    1609                 : 
    1610 ECB             :     /* The bitmap may be partially built. */
    1611 CBC         357 :     Assert(clauses != NIL);
    1612             357 :     Assert(mcvlist != NULL);
    1613 GIC         357 :     Assert(mcvlist->nitems > 0);
    1614 CBC         357 :     Assert(mcvlist->nitems <= STATS_MCVLIST_MAX_ITEMS);
    1615 ECB             : 
    1616 GIC         357 :     matches = palloc(sizeof(bool) * mcvlist->nitems);
    1617             357 :     memset(matches, !is_or, sizeof(bool) * mcvlist->nitems);
    1618                 : 
    1619                 :     /*
    1620                 :      * Loop through the list of clauses, and for each of them evaluate all the
    1621 ECB             :      * MCV items not yet eliminated by the preceding clauses.
    1622                 :      */
    1623 CBC        1017 :     foreach(l, clauses)
    1624                 :     {
    1625 GIC         660 :         Node       *clause = (Node *) lfirst(l);
    1626 ECB             : 
    1627                 :         /* if it's a RestrictInfo, then extract the clause */
    1628 GIC         660 :         if (IsA(clause, RestrictInfo))
    1629             609 :             clause = (Node *) ((RestrictInfo *) clause)->clause;
    1630                 : 
    1631                 :         /*
    1632                 :          * Handle the various types of clauses - OpClause, NullTest and
    1633 ECB             :          * AND/OR/NOT
    1634                 :          */
    1635 CBC         660 :         if (is_opclause(clause))
    1636                 :         {
    1637 GIC         438 :             OpExpr     *expr = (OpExpr *) clause;
    1638                 :             FmgrInfo    opproc;
    1639                 : 
    1640                 :             /* valid only after examine_opclause_args returns true */
    1641                 :             Node       *clause_expr;
    1642                 :             Const      *cst;
    1643                 :             bool        expronleft;
    1644                 :             int         idx;
    1645 ECB             :             Oid         collid;
    1646                 : 
    1647 GIC         438 :             fmgr_info(get_opcode(expr->opno), &opproc);
    1648 ECB             : 
    1649 EUB             :             /* extract the var/expr and const from the expression */
    1650 GIC         438 :             if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft))
    1651 UIC           0 :                 elog(ERROR, "incompatible clause");
    1652 ECB             : 
    1653                 :             /* match the attribute/expression to a dimension of the statistic */
    1654 GIC         438 :             idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
    1655                 : 
    1656                 :             /*
    1657                 :              * Walk through the MCV items and evaluate the current clause. We
    1658                 :              * can skip items that were already ruled out, and terminate if
    1659 ECB             :              * there are no remaining MCV items that might possibly match.
    1660                 :              */
    1661 GNC       29316 :             for (int i = 0; i < mcvlist->nitems; i++)
    1662 ECB             :             {
    1663 GIC       28878 :                 bool        match = true;
    1664 CBC       28878 :                 MCVItem    *item = &mcvlist->items[i];
    1665                 : 
    1666 GIC       28878 :                 Assert(idx >= 0);
    1667                 : 
    1668                 :                 /*
    1669                 :                  * When the MCV item or the Const value is NULL we can treat
    1670                 :                  * this as a mismatch. We must not call the operator because
    1671 ECB             :                  * of strictness.
    1672                 :                  */
    1673 CBC       28878 :                 if (item->isnull[idx] || cst->constisnull)
    1674 ECB             :                 {
    1675 GIC          24 :                     matches[i] = RESULT_MERGE(matches[i], is_or, false);
    1676              24 :                     continue;
    1677                 :                 }
    1678                 : 
    1679                 :                 /*
    1680                 :                  * Skip MCV items that can't change result in the bitmap. Once
    1681                 :                  * the value gets false for AND-lists, or true for OR-lists,
    1682 ECB             :                  * we don't need to look at more clauses.
    1683                 :                  */
    1684 GIC       28854 :                 if (RESULT_IS_FINAL(matches[i], is_or))
    1685           13482 :                     continue;
    1686                 : 
    1687                 :                 /*
    1688                 :                  * First check whether the constant is below the lower
    1689                 :                  * boundary (in that case we can skip the bucket, because
    1690                 :                  * there's no overlap).
    1691                 :                  *
    1692                 :                  * We don't store collations used to build the statistics, but
    1693                 :                  * we can use the collation for the attribute itself, as
    1694                 :                  * stored in varcollid. We do reset the statistics after a
    1695                 :                  * type change (including collation change), so this is OK.
    1696                 :                  * For expressions, we use the collation extracted from the
    1697 ECB             :                  * expression itself.
    1698                 :                  */
    1699 GIC       15372 :                 if (expronleft)
    1700 CBC       14064 :                     match = DatumGetBool(FunctionCall2Coll(&opproc,
    1701 ECB             :                                                            collid,
    1702 GIC       14064 :                                                            item->values[idx],
    1703 CBC       14064 :                                                            cst->constvalue));
    1704                 :                 else
    1705            1308 :                     match = DatumGetBool(FunctionCall2Coll(&opproc,
    1706 ECB             :                                                            collid,
    1707 GIC        1308 :                                                            cst->constvalue,
    1708            1308 :                                                            item->values[idx]));
    1709 ECB             : 
    1710                 :                 /* update the match bitmap with the result */
    1711 GIC       15372 :                 matches[i] = RESULT_MERGE(matches[i], is_or, match);
    1712 ECB             :             }
    1713                 :         }
    1714 CBC         222 :         else if (IsA(clause, ScalarArrayOpExpr))
    1715                 :         {
    1716 GIC         102 :             ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
    1717                 :             FmgrInfo    opproc;
    1718                 : 
    1719                 :             /* valid only after examine_opclause_args returns true */
    1720                 :             Node       *clause_expr;
    1721                 :             Const      *cst;
    1722                 :             bool        expronleft;
    1723                 :             Oid         collid;
    1724                 :             int         idx;
    1725                 : 
    1726                 :             /* array evaluation */
    1727                 :             ArrayType  *arrayval;
    1728                 :             int16       elmlen;
    1729                 :             bool        elmbyval;
    1730                 :             char        elmalign;
    1731                 :             int         num_elems;
    1732                 :             Datum      *elem_values;
    1733 ECB             :             bool       *elem_nulls;
    1734                 : 
    1735 GIC         102 :             fmgr_info(get_opcode(expr->opno), &opproc);
    1736 ECB             : 
    1737 EUB             :             /* extract the var/expr and const from the expression */
    1738 GIC         102 :             if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft))
    1739 UIC           0 :                 elog(ERROR, "incompatible clause");
    1740 ECB             : 
    1741 EUB             :             /* We expect Var on left */
    1742 GIC         102 :             if (!expronleft)
    1743 UIC           0 :                 elog(ERROR, "incompatible clause");
    1744                 : 
    1745                 :             /*
    1746                 :              * Deconstruct the array constant, unless it's NULL (we'll cover
    1747 ECB             :              * that case below)
    1748                 :              */
    1749 CBC         102 :             if (!cst->constisnull)
    1750 ECB             :             {
    1751 GIC         102 :                 arrayval = DatumGetArrayTypeP(cst->constvalue);
    1752 CBC         102 :                 get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
    1753                 :                                      &elmlen, &elmbyval, &elmalign);
    1754 GIC         102 :                 deconstruct_array(arrayval,
    1755                 :                                   ARR_ELEMTYPE(arrayval),
    1756                 :                                   elmlen, elmbyval, elmalign,
    1757                 :                                   &elem_values, &elem_nulls, &num_elems);
    1758                 :             }
    1759 ECB             : 
    1760                 :             /* match the attribute/expression to a dimension of the statistic */
    1761 GIC         102 :             idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
    1762                 : 
    1763                 :             /*
    1764                 :              * Walk through the MCV items and evaluate the current clause. We
    1765                 :              * can skip items that were already ruled out, and terminate if
    1766 ECB             :              * there are no remaining MCV items that might possibly match.
    1767                 :              */
    1768 GNC        7848 :             for (int i = 0; i < mcvlist->nitems; i++)
    1769 ECB             :             {
    1770                 :                 int         j;
    1771 GIC        7746 :                 bool        match = !expr->useOr;
    1772            7746 :                 MCVItem    *item = &mcvlist->items[i];
    1773                 : 
    1774                 :                 /*
    1775                 :                  * When the MCV item or the Const value is NULL we can treat
    1776                 :                  * this as a mismatch. We must not call the operator because
    1777 ECB             :                  * of strictness.
    1778                 :                  */
    1779 CBC        7746 :                 if (item->isnull[idx] || cst->constisnull)
    1780 ECB             :                 {
    1781 GIC           9 :                     matches[i] = RESULT_MERGE(matches[i], is_or, false);
    1782               9 :                     continue;
    1783                 :                 }
    1784                 : 
    1785                 :                 /*
    1786                 :                  * Skip MCV items that can't change result in the bitmap. Once
    1787                 :                  * the value gets false for AND-lists, or true for OR-lists,
    1788 ECB             :                  * we don't need to look at more clauses.
    1789                 :                  */
    1790 GIC        7737 :                 if (RESULT_IS_FINAL(matches[i], is_or))
    1791 CBC        4008 :                     continue;
    1792                 : 
    1793           14169 :                 for (j = 0; j < num_elems; j++)
    1794 ECB             :                 {
    1795 GIC       11817 :                     Datum       elem_value = elem_values[j];
    1796           11817 :                     bool        elem_isnull = elem_nulls[j];
    1797                 :                     bool        elem_match;
    1798 ECB             : 
    1799                 :                     /* NULL values always evaluate as not matching. */
    1800 CBC       11817 :                     if (elem_isnull)
    1801 ECB             :                     {
    1802 GIC        1056 :                         match = RESULT_MERGE(match, expr->useOr, false);
    1803            1056 :                         continue;
    1804                 :                     }
    1805                 : 
    1806                 :                     /*
    1807                 :                      * Stop evaluating the array elements once we reach a
    1808                 :                      * matching value that can't change - ALL() is the same as
    1809 ECB             :                      * AND-list, ANY() is the same as OR-list.
    1810                 :                      */
    1811 GIC       10761 :                     if (RESULT_IS_FINAL(match, expr->useOr))
    1812 CBC        1377 :                         break;
    1813                 : 
    1814            9384 :                     elem_match = DatumGetBool(FunctionCall2Coll(&opproc,
    1815                 :                                                                 collid,
    1816 GIC        9384 :                                                                 item->values[idx],
    1817 ECB             :                                                                 elem_value));
    1818                 : 
    1819 GIC        9384 :                     match = RESULT_MERGE(match, expr->useOr, elem_match);
    1820                 :                 }
    1821 ECB             : 
    1822                 :                 /* update the match bitmap with the result */
    1823 GIC        3729 :                 matches[i] = RESULT_MERGE(matches[i], is_or, match);
    1824 ECB             :             }
    1825                 :         }
    1826 CBC         120 :         else if (IsA(clause, NullTest))
    1827 ECB             :         {
    1828 GIC          33 :             NullTest   *expr = (NullTest *) clause;
    1829              33 :             Node       *clause_expr = (Node *) (expr->arg);
    1830 ECB             : 
    1831                 :             /* match the attribute/expression to a dimension of the statistic */
    1832 GIC          33 :             int         idx = mcv_match_expression(clause_expr, keys, exprs, NULL);
    1833                 : 
    1834                 :             /*
    1835                 :              * Walk through the MCV items and evaluate the current clause. We
    1836                 :              * can skip items that were already ruled out, and terminate if
    1837 ECB             :              * there are no remaining MCV items that might possibly match.
    1838                 :              */
    1839 GNC        3039 :             for (int i = 0; i < mcvlist->nitems; i++)
    1840 ECB             :             {
    1841 GIC        3006 :                 bool        match = false;  /* assume mismatch */
    1842            3006 :                 MCVItem    *item = &mcvlist->items[i];
    1843 ECB             : 
    1844                 :                 /* if the clause mismatches the MCV item, update the bitmap */
    1845 CBC        3006 :                 switch (expr->nulltesttype)
    1846 ECB             :                 {
    1847 CBC        2106 :                     case IS_NULL:
    1848 GIC        2106 :                         match = (item->isnull[idx]) ? true : match;
    1849 CBC        2106 :                         break;
    1850 ECB             : 
    1851 CBC         900 :                     case IS_NOT_NULL:
    1852 GIC         900 :                         match = (!item->isnull[idx]) ? true : match;
    1853             900 :                         break;
    1854                 :                 }
    1855 ECB             : 
    1856                 :                 /* now, update the match bitmap, depending on OR/AND type */
    1857 GIC        3006 :                 matches[i] = RESULT_MERGE(matches[i], is_or, match);
    1858 ECB             :             }
    1859                 :         }
    1860 GIC          87 :         else if (is_orclause(clause) || is_andclause(clause))
    1861              30 :         {
    1862                 :             /* AND/OR clause, with all subclauses being compatible */
    1863 ECB             : 
    1864                 :             int         i;
    1865 GIC          30 :             BoolExpr   *bool_clause = ((BoolExpr *) clause);
    1866              30 :             List       *bool_clauses = bool_clause->args;
    1867 ECB             : 
    1868                 :             /* match/mismatch bitmap for each MCV item */
    1869 CBC          30 :             bool       *bool_matches = NULL;
    1870 ECB             : 
    1871 GIC          30 :             Assert(bool_clauses != NIL);
    1872              30 :             Assert(list_length(bool_clauses) >= 2);
    1873 ECB             : 
    1874                 :             /* build the match bitmap for the OR-clauses */
    1875 GIC          30 :             bool_matches = mcv_get_match_bitmap(root, bool_clauses, keys, exprs,
    1876              30 :                                                 mcvlist, is_orclause(clause));
    1877                 : 
    1878                 :             /*
    1879                 :              * Merge the bitmap produced by mcv_get_match_bitmap into the
    1880                 :              * current one. We need to consider if we're evaluating AND or OR
    1881 ECB             :              * condition when merging the results.
    1882                 :              */
    1883 GIC        1878 :             for (i = 0; i < mcvlist->nitems; i++)
    1884 CBC        1848 :                 matches[i] = RESULT_MERGE(matches[i], is_or, bool_matches[i]);
    1885                 : 
    1886              30 :             pfree(bool_matches);
    1887                 :         }
    1888 GIC          57 :         else if (is_notclause(clause))
    1889                 :         {
    1890                 :             /* NOT clause, with all subclauses compatible */
    1891 ECB             : 
    1892                 :             int         i;
    1893 GIC          15 :             BoolExpr   *not_clause = ((BoolExpr *) clause);
    1894              15 :             List       *not_args = not_clause->args;
    1895 ECB             : 
    1896                 :             /* match/mismatch bitmap for each MCV item */
    1897 CBC          15 :             bool       *not_matches = NULL;
    1898 ECB             : 
    1899 GIC          15 :             Assert(not_args != NIL);
    1900              15 :             Assert(list_length(not_args) == 1);
    1901 ECB             : 
    1902                 :             /* build the match bitmap for the NOT-clause */
    1903 GIC          15 :             not_matches = mcv_get_match_bitmap(root, not_args, keys, exprs,
    1904                 :                                                mcvlist, false);
    1905                 : 
    1906                 :             /*
    1907                 :              * Merge the bitmap produced by mcv_get_match_bitmap into the
    1908                 :              * current one. We're handling a NOT clause, so invert the result
    1909 ECB             :              * before merging it into the global bitmap.
    1910                 :              */
    1911 GIC          75 :             for (i = 0; i < mcvlist->nitems; i++)
    1912 CBC          60 :                 matches[i] = RESULT_MERGE(matches[i], is_or, !not_matches[i]);
    1913                 : 
    1914              15 :             pfree(not_matches);
    1915                 :         }
    1916 GIC          42 :         else if (IsA(clause, Var))
    1917                 :         {
    1918 ECB             :             /* Var (has to be a boolean Var, possibly from below NOT) */
    1919                 : 
    1920 GIC          39 :             Var        *var = (Var *) (clause);
    1921 ECB             : 
    1922                 :             /* match the attribute to a dimension of the statistic */
    1923 CBC          39 :             int         idx = bms_member_index(keys, var->varattno);
    1924                 : 
    1925 GIC          39 :             Assert(var->vartype == BOOLOID);
    1926                 : 
    1927                 :             /*
    1928                 :              * Walk through the MCV items and evaluate the current clause. We
    1929                 :              * can skip items that were already ruled out, and terminate if
    1930 ECB             :              * there are no remaining MCV items that might possibly match.
    1931                 :              */
    1932 GNC         189 :             for (int i = 0; i < mcvlist->nitems; i++)
    1933 ECB             :             {
    1934 GIC         150 :                 MCVItem    *item = &mcvlist->items[i];
    1935             150 :                 bool        match = false;
    1936 ECB             : 
    1937                 :                 /* if the item is NULL, it's a mismatch */
    1938 GIC         150 :                 if (!item->isnull[idx] && DatumGetBool(item->values[idx]))
    1939              75 :                     match = true;
    1940 ECB             : 
    1941                 :                 /* update the result bitmap */
    1942 GIC         150 :                 matches[i] = RESULT_MERGE(matches[i], is_or, match);
    1943                 :             }
    1944                 :         }
    1945                 :         else
    1946                 :         {
    1947                 :             /* Otherwise, it must be a bare boolean-returning expression */
    1948                 :             int         idx;
    1949 ECB             : 
    1950                 :             /* match the expression to a dimension of the statistic */
    1951 GIC           3 :             idx = mcv_match_expression(clause, keys, exprs, NULL);
    1952                 : 
    1953                 :             /*
    1954                 :              * Walk through the MCV items and evaluate the current clause. We
    1955                 :              * can skip items that were already ruled out, and terminate if
    1956 ECB             :              * there are no remaining MCV items that might possibly match.
    1957                 :              */
    1958 GNC         111 :             for (int i = 0; i < mcvlist->nitems; i++)
    1959 ECB             :             {
    1960                 :                 bool        match;
    1961 GIC         108 :                 MCVItem    *item = &mcvlist->items[i];
    1962 ECB             : 
    1963                 :                 /* "match" just means it's bool TRUE */
    1964 GIC         108 :                 match = !item->isnull[idx] && DatumGetBool(item->values[idx]);
    1965 ECB             : 
    1966                 :                 /* now, update the match bitmap, depending on OR/AND type */
    1967 GIC         108 :                 matches[i] = RESULT_MERGE(matches[i], is_or, match);
    1968                 :             }
    1969                 :         }
    1970 ECB             :     }
    1971                 : 
    1972 GIC         357 :     return matches;
    1973                 : }
    1974                 : 
    1975                 : 
    1976                 : /*
    1977                 :  * mcv_combine_selectivities
    1978                 :  *      Combine per-column and multi-column MCV selectivity estimates.
    1979                 :  *
    1980                 :  * simple_sel is a "simple" selectivity estimate (produced without using any
    1981                 :  * extended statistics, essentially assuming independence of columns/clauses).
    1982                 :  *
    1983                 :  * mcv_sel and mcv_basesel are sums of the frequencies and base frequencies of
    1984                 :  * all matching MCV items.  The difference (mcv_sel - mcv_basesel) is then
    1985                 :  * essentially interpreted as a correction to be added to simple_sel, as
    1986                 :  * described below.
    1987                 :  *
    1988                 :  * mcv_totalsel is the sum of the frequencies of all MCV items (not just the
    1989                 :  * matching ones).  This is used as an upper bound on the portion of the
    1990                 :  * selectivity estimates not covered by the MCV statistics.
    1991                 :  *
    1992                 :  * Note: While simple and base selectivities are defined in a quite similar
    1993                 :  * way, the values are computed differently and are not therefore equal. The
    1994                 :  * simple selectivity is computed as a product of per-clause estimates, while
    1995                 :  * the base selectivity is computed by adding up base frequencies of matching
    1996                 :  * items of the multi-column MCV list. So the values may differ for two main
    1997                 :  * reasons - (a) the MCV list may not cover 100% of the data and (b) some of
    1998                 :  * the MCV items did not match the estimated clauses.
    1999                 :  *
    2000                 :  * As both (a) and (b) reduce the base selectivity value, it generally holds
    2001                 :  * that (simple_sel >= mcv_basesel). If the MCV list covers all the data, the
    2002                 :  * values may be equal.
    2003                 :  *
    2004                 :  * So, other_sel = (simple_sel - mcv_basesel) is an estimate for the part not
    2005                 :  * covered by the MCV list, and (mcv_sel - mcv_basesel) may be seen as a
    2006                 :  * correction for the part covered by the MCV list. Those two statements are
    2007                 :  * actually equivalent.
    2008 ECB             :  */
    2009                 : Selectivity
    2010 GIC         336 : mcv_combine_selectivities(Selectivity simple_sel,
    2011                 :                           Selectivity mcv_sel,
    2012                 :                           Selectivity mcv_basesel,
    2013                 :                           Selectivity mcv_totalsel)
    2014                 : {
    2015                 :     Selectivity other_sel;
    2016                 :     Selectivity sel;
    2017 ECB             : 
    2018                 :     /* estimated selectivity of values not covered by MCV matches */
    2019 GIC         336 :     other_sel = simple_sel - mcv_basesel;
    2020             336 :     CLAMP_PROBABILITY(other_sel);
    2021 ECB             : 
    2022                 :     /* this non-MCV selectivity cannot exceed 1 - mcv_totalsel */
    2023 GIC         336 :     if (other_sel > 1.0 - mcv_totalsel)
    2024             222 :         other_sel = 1.0 - mcv_totalsel;
    2025 ECB             : 
    2026                 :     /* overall selectivity is the sum of the MCV and non-MCV parts */
    2027 GIC         336 :     sel = mcv_sel + other_sel;
    2028 CBC         336 :     CLAMP_PROBABILITY(sel);
    2029                 : 
    2030 GIC         336 :     return sel;
    2031                 : }
    2032                 : 
    2033                 : 
    2034                 : /*
    2035                 :  * mcv_clauselist_selectivity
    2036                 :  *      Use MCV statistics to estimate the selectivity of an implicitly-ANDed
    2037                 :  *      list of clauses.
    2038                 :  *
    2039                 :  * This determines which MCV items match every clause in the list and returns
    2040                 :  * the sum of the frequencies of those items.
    2041                 :  *
    2042                 :  * In addition, it returns the sum of the base frequencies of each of those
    2043                 :  * items (that is the sum of the selectivities that each item would have if
    2044                 :  * the columns were independent of one another), and the total selectivity of
    2045                 :  * all the MCV items (not just the matching ones).  These are expected to be
    2046                 :  * used together with a "simple" selectivity estimate (one based only on
    2047                 :  * per-column statistics) to produce an overall selectivity estimate that
    2048                 :  * makes use of both per-column and multi-column statistics --- see
    2049                 :  * mcv_combine_selectivities().
    2050 ECB             :  */
    2051                 : Selectivity
    2052 GIC         192 : mcv_clauselist_selectivity(PlannerInfo *root, StatisticExtInfo *stat,
    2053                 :                            List *clauses, int varRelid,
    2054                 :                            JoinType jointype, SpecialJoinInfo *sjinfo,
    2055                 :                            RelOptInfo *rel,
    2056                 :                            Selectivity *basesel, Selectivity *totalsel)
    2057                 : {
    2058 ECB             :     int         i;
    2059                 :     MCVList    *mcv;
    2060 GIC         192 :     Selectivity s = 0.0;
    2061             192 :     RangeTblEntry *rte = root->simple_rte_array[rel->relid];
    2062 ECB             : 
    2063                 :     /* match/mismatch bitmap for each MCV item */
    2064 GIC         192 :     bool       *matches = NULL;
    2065 ECB             : 
    2066                 :     /* load the MCV list stored in the statistics object */
    2067 GIC         192 :     mcv = statext_mcv_load(stat->statOid, rte->inh);
    2068 ECB             : 
    2069                 :     /* build a match bitmap for the clauses */
    2070 GIC         192 :     matches = mcv_get_match_bitmap(root, clauses, stat->keys, stat->exprs,
    2071                 :                                    mcv, false);
    2072 ECB             : 
    2073                 :     /* sum frequencies for all the matching MCV items */
    2074 CBC         192 :     *basesel = 0.0;
    2075 GIC         192 :     *totalsel = 0.0;
    2076 CBC       12552 :     for (i = 0; i < mcv->nitems; i++)
    2077                 :     {
    2078           12360 :         *totalsel += mcv->items[i].frequency;
    2079                 : 
    2080           12360 :         if (matches[i] != false)
    2081 ECB             :         {
    2082 GIC         231 :             *basesel += mcv->items[i].base_frequency;
    2083             231 :             s += mcv->items[i].frequency;
    2084                 :         }
    2085 ECB             :     }
    2086                 : 
    2087 GIC         192 :     return s;
    2088                 : }
    2089                 : 
    2090                 : 
    2091                 : /*
    2092                 :  * mcv_clause_selectivity_or
    2093                 :  *      Use MCV statistics to estimate the selectivity of a clause that
    2094                 :  *      appears in an ORed list of clauses.
    2095                 :  *
    2096                 :  * As with mcv_clauselist_selectivity() this determines which MCV items match
    2097                 :  * the clause and returns both the sum of the frequencies and the sum of the
    2098                 :  * base frequencies of those items, as well as the sum of the frequencies of
    2099                 :  * all MCV items (not just the matching ones) so that this information can be
    2100                 :  * used by mcv_combine_selectivities() to produce a selectivity estimate that
    2101                 :  * makes use of both per-column and multi-column statistics.
    2102                 :  *
    2103                 :  * Additionally, we return information to help compute the overall selectivity
    2104                 :  * of the ORed list of clauses assumed to contain this clause.  This function
    2105                 :  * is intended to be called for each clause in the ORed list of clauses,
    2106                 :  * allowing the overall selectivity to be computed using the following
    2107                 :  * algorithm:
    2108                 :  *
    2109                 :  * Suppose P[n] = P(C[1] OR C[2] OR ... OR C[n]) is the combined selectivity
    2110                 :  * of the first n clauses in the list.  Then the combined selectivity taking
    2111                 :  * into account the next clause C[n+1] can be written as
    2112                 :  *
    2113                 :  *      P[n+1] = P[n] + P(C[n+1]) - P((C[1] OR ... OR C[n]) AND C[n+1])
    2114                 :  *
    2115                 :  * The final term above represents the overlap between the clauses examined so
    2116                 :  * far and the (n+1)'th clause.  To estimate its selectivity, we track the
    2117                 :  * match bitmap for the ORed list of clauses examined so far and examine its
    2118                 :  * intersection with the match bitmap for the (n+1)'th clause.
    2119                 :  *
    2120                 :  * We then also return the sums of the MCV item frequencies and base
    2121                 :  * frequencies for the match bitmap intersection corresponding to the overlap
    2122                 :  * term above, so that they can be combined with a simple selectivity estimate
    2123                 :  * for that term.
    2124                 :  *
    2125                 :  * The parameter "or_matches" is an in/out parameter tracking the match bitmap
    2126                 :  * for the clauses examined so far.  The caller is expected to set it to NULL
    2127                 :  * the first time it calls this function.
    2128 ECB             :  */
    2129                 : Selectivity
    2130 GIC         120 : mcv_clause_selectivity_or(PlannerInfo *root, StatisticExtInfo *stat,
    2131                 :                           MCVList *mcv, Node *clause, bool **or_matches,
    2132                 :                           Selectivity *basesel, Selectivity *overlap_mcvsel,
    2133 ECB             :                           Selectivity *overlap_basesel, Selectivity *totalsel)
    2134                 : {
    2135 GIC         120 :     Selectivity s = 0.0;
    2136                 :     bool       *new_matches;
    2137                 :     int         i;
    2138 ECB             : 
    2139                 :     /* build the OR-matches bitmap, if not built already */
    2140 GIC         120 :     if (*or_matches == NULL)
    2141              48 :         *or_matches = palloc0(sizeof(bool) * mcv->nitems);
    2142 ECB             : 
    2143                 :     /* build the match bitmap for the new clause */
    2144 GIC         120 :     new_matches = mcv_get_match_bitmap(root, list_make1(clause), stat->keys,
    2145                 :                                        stat->exprs, mcv, false);
    2146                 : 
    2147                 :     /*
    2148                 :      * Sum the frequencies for all the MCV items matching this clause and also
    2149                 :      * those matching the overlap between this clause and any of the preceding
    2150 ECB             :      * clauses as described above.
    2151                 :      */
    2152 CBC         120 :     *basesel = 0.0;
    2153             120 :     *overlap_mcvsel = 0.0;
    2154             120 :     *overlap_basesel = 0.0;
    2155 GIC         120 :     *totalsel = 0.0;
    2156 CBC        7758 :     for (i = 0; i < mcv->nitems; i++)
    2157                 :     {
    2158            7638 :         *totalsel += mcv->items[i].frequency;
    2159                 : 
    2160            7638 :         if (new_matches[i])
    2161 ECB             :         {
    2162 GIC         168 :             s += mcv->items[i].frequency;
    2163 CBC         168 :             *basesel += mcv->items[i].base_frequency;
    2164                 : 
    2165             168 :             if ((*or_matches)[i])
    2166 ECB             :             {
    2167 GIC          72 :                 *overlap_mcvsel += mcv->items[i].frequency;
    2168              72 :                 *overlap_basesel += mcv->items[i].base_frequency;
    2169                 :             }
    2170                 :         }
    2171 ECB             : 
    2172                 :         /* update the OR-matches bitmap for the next clause */
    2173 GIC        7638 :         (*or_matches)[i] = (*or_matches)[i] || new_matches[i];
    2174 ECB             :     }
    2175                 : 
    2176 CBC         120 :     pfree(new_matches);
    2177                 : 
    2178 GIC         120 :     return s;
    2179                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a