LCOV - differential code coverage report
Current view: top level - src/backend/statistics - mvdistinct.c (source / functions) Coverage Total Hit UNC UBC GNC CBC DUB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 92.0 % 213 196 2 15 1 195 2 1
Current Date: 2023-04-08 17:13:01 Functions: 82.4 % 17 14 3 2 12
Baseline: 15 Line coverage date bins:
Baseline Date: 2023-04-08 15:09:40 [..60] days: 0.0 % 2 0 2
Legend: Lines: hit not hit (60,120] days: 100.0 % 1 1 1
(240..) days: 92.9 % 210 195 15 195
Function coverage date bins:
(240..) days: 82.4 % 17 14 3 2 12

 Age         Owner                  TLA  Line data    Source code
                                  1                 : /*-------------------------------------------------------------------------
                                  2                 :  *
                                  3                 :  * mvdistinct.c
                                  4                 :  *    POSTGRES multivariate ndistinct coefficients
                                  5                 :  *
                                  6                 :  * Estimating number of groups in a combination of columns (e.g. for GROUP BY)
                                  7                 :  * is tricky, and the estimation error is often significant.
                                  8                 : 
                                  9                 :  * The multivariate ndistinct coefficients address this by storing ndistinct
                                 10                 :  * estimates for combinations of the user-specified columns.  So for example
                                 11                 :  * given a statistics object on three columns (a,b,c), this module estimates
                                 12                 :  * and stores n-distinct for (a,b), (a,c), (b,c) and (a,b,c).  The per-column
                                 13                 :  * estimates are already available in pg_statistic.
                                 14                 :  *
                                 15                 :  *
                                 16                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
                                 17                 :  * Portions Copyright (c) 1994, Regents of the University of California
                                 18                 :  *
                                 19                 :  * IDENTIFICATION
                                 20                 :  *    src/backend/statistics/mvdistinct.c
                                 21                 :  *
                                 22                 :  *-------------------------------------------------------------------------
                                 23                 :  */
                                 24                 : #include "postgres.h"
                                 25                 : 
                                 26                 : #include <math.h>
                                 27                 : 
                                 28                 : #include "access/htup_details.h"
                                 29                 : #include "catalog/pg_statistic_ext.h"
                                 30                 : #include "catalog/pg_statistic_ext_data.h"
                                 31                 : #include "lib/stringinfo.h"
                                 32                 : #include "statistics/extended_stats_internal.h"
                                 33                 : #include "statistics/statistics.h"
                                 34                 : #include "utils/fmgrprotos.h"
                                 35                 : #include "utils/lsyscache.h"
                                 36                 : #include "utils/syscache.h"
                                 37                 : #include "utils/typcache.h"
                                 38                 : 
                                 39                 : static double ndistinct_for_combination(double totalrows, StatsBuildData *data,
                                 40                 :                                         int k, int *combination);
                                 41                 : static double estimate_ndistinct(double totalrows, int numrows, int d, int f1);
                                 42                 : static int  n_choose_k(int n, int k);
                                 43                 : static int  num_combinations(int n);
                                 44                 : 
                                 45                 : /* size of the struct header fields (magic, type, nitems) */
                                 46                 : #define SizeOfHeader        (3 * sizeof(uint32))
                                 47                 : 
                                 48                 : /* size of a serialized ndistinct item (coefficient, natts, atts) */
                                 49                 : #define SizeOfItem(natts) \
                                 50                 :     (sizeof(double) + sizeof(int) + (natts) * sizeof(AttrNumber))
                                 51                 : 
                                 52                 : /* minimal size of a ndistinct item (with two attributes) */
                                 53                 : #define MinSizeOfItem   SizeOfItem(2)
                                 54                 : 
                                 55                 : /* minimal size of mvndistinct, when all items are minimal */
                                 56                 : #define MinSizeOfItems(nitems)  \
                                 57                 :     (SizeOfHeader + (nitems) * MinSizeOfItem)
                                 58                 : 
                                 59                 : /* Combination generator API */
                                 60                 : 
                                 61                 : /* internal state for generator of k-combinations of n elements */
                                 62                 : typedef struct CombinationGenerator
                                 63                 : {
                                 64                 :     int         k;              /* size of the combination */
                                 65                 :     int         n;              /* total number of elements */
                                 66                 :     int         current;        /* index of the next combination to return */
                                 67                 :     int         ncombinations;  /* number of combinations (size of array) */
                                 68                 :     int        *combinations;   /* array of pre-built combinations */
                                 69                 : } CombinationGenerator;
                                 70                 : 
                                 71                 : static CombinationGenerator *generator_init(int n, int k);
                                 72                 : static void generator_free(CombinationGenerator *state);
                                 73                 : static int *generator_next(CombinationGenerator *state);
                                 74                 : static void generate_combinations(CombinationGenerator *state);
                                 75                 : 
                                 76                 : 
                                 77                 : /*
                                 78                 :  * statext_ndistinct_build
                                 79                 :  *      Compute ndistinct coefficient for the combination of attributes.
                                 80                 :  *
                                 81                 :  * This computes the ndistinct estimate using the same estimator used
                                 82                 :  * in analyze.c and then computes the coefficient.
                                 83                 :  *
                                 84                 :  * To handle expressions easily, we treat them as system attributes with
                                 85                 :  * negative attnums, and offset everything by number of expressions to
                                 86                 :  * allow using Bitmapsets.
                                 87                 :  */
                                 88                 : MVNDistinct *
  744 tomas.vondra               89 CBC          78 : statext_ndistinct_build(double totalrows, StatsBuildData *data)
                                 90                 : {
                                 91                 :     MVNDistinct *result;
                                 92                 :     int         k;
                                 93                 :     int         itemcnt;
                                 94              78 :     int         numattrs = data->nattnums;
 2207 alvherre                   95              78 :     int         numcombs = num_combinations(numattrs);
                                 96                 : 
                                 97              78 :     result = palloc(offsetof(MVNDistinct, items) +
                                 98              78 :                     numcombs * sizeof(MVNDistinctItem));
                                 99              78 :     result->magic = STATS_NDISTINCT_MAGIC;
                                100              78 :     result->type = STATS_NDISTINCT_TYPE_BASIC;
                                101              78 :     result->nitems = numcombs;
                                102                 : 
                                103              78 :     itemcnt = 0;
                                104             198 :     for (k = 2; k <= numattrs; k++)
                                105                 :     {
                                106                 :         int        *combination;
                                107                 :         CombinationGenerator *generator;
                                108                 : 
                                109                 :         /* generate combinations of K out of N elements */
                                110             120 :         generator = generator_init(numattrs, k);
                                111                 : 
                                112             372 :         while ((combination = generator_next(generator)))
                                113                 :         {
                                114             252 :             MVNDistinctItem *item = &result->items[itemcnt];
                                115                 :             int         j;
                                116                 : 
  744 tomas.vondra              117             252 :             item->attributes = palloc(sizeof(AttrNumber) * k);
                                118             252 :             item->nattributes = k;
                                119                 : 
                                120                 :             /* translate the indexes to attnums */
 2207 alvherre                  121             846 :             for (j = 0; j < k; j++)
                                122                 :             {
  744 tomas.vondra              123             594 :                 item->attributes[j] = data->attnums[combination[j]];
                                124                 : 
                                125             594 :                 Assert(AttributeNumberIsValid(item->attributes[j]));
                                126                 :             }
                                127                 : 
 2207 alvherre                  128             252 :             item->ndistinct =
  744 tomas.vondra              129             252 :                 ndistinct_for_combination(totalrows, data, k, combination);
                                130                 : 
 2207 alvherre                  131             252 :             itemcnt++;
                                132             252 :             Assert(itemcnt <= result->nitems);
                                133                 :         }
                                134                 : 
                                135             120 :         generator_free(generator);
                                136                 :     }
                                137                 : 
                                138                 :     /* must consume exactly the whole output array */
                                139              78 :     Assert(itemcnt == result->nitems);
                                140                 : 
                                141              78 :     return result;
                                142                 : }
                                143                 : 
                                144                 : /*
                                145                 :  * statext_ndistinct_load
                                146                 :  *      Load the ndistinct value for the indicated pg_statistic_ext tuple
                                147                 :  */
                                148                 : MVNDistinct *
  448 tomas.vondra              149             201 : statext_ndistinct_load(Oid mvoid, bool inh)
                                150                 : {
                                151                 :     MVNDistinct *result;
                                152                 :     bool        isnull;
                                153                 :     Datum       ndist;
                                154                 :     HeapTuple   htup;
                                155                 : 
                                156             201 :     htup = SearchSysCache2(STATEXTDATASTXOID,
                                157                 :                            ObjectIdGetDatum(mvoid), BoolGetDatum(inh));
 1803 tgl                       158             201 :     if (!HeapTupleIsValid(htup))
 2156 tgl                       159 UBC           0 :         elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
                                160                 : 
 1396 tomas.vondra              161 CBC         201 :     ndist = SysCacheGetAttr(STATEXTDATASTXOID, htup,
                                162                 :                             Anum_pg_statistic_ext_data_stxdndistinct, &isnull);
 2207 alvherre                  163             201 :     if (isnull)
 2207 alvherre                  164 UBC           0 :         elog(ERROR,
                                165                 :              "requested statistics kind \"%c\" is not yet built for statistics object %u",
                                166                 :              STATS_EXT_NDISTINCT, mvoid);
                                167                 : 
 1803 tgl                       168 CBC         201 :     result = statext_ndistinct_deserialize(DatumGetByteaPP(ndist));
                                169                 : 
 2207 alvherre                  170             201 :     ReleaseSysCache(htup);
                                171                 : 
 1803 tgl                       172             201 :     return result;
                                173                 : }
                                174                 : 
                                175                 : /*
                                176                 :  * statext_ndistinct_serialize
                                177                 :  *      serialize ndistinct to the on-disk bytea format
                                178                 :  */
                                179                 : bytea *
 2207 alvherre                  180              78 : statext_ndistinct_serialize(MVNDistinct *ndistinct)
                                181                 : {
                                182                 :     int         i;
                                183                 :     bytea      *output;
                                184                 :     char       *tmp;
                                185                 :     Size        len;
                                186                 : 
                                187              78 :     Assert(ndistinct->magic == STATS_NDISTINCT_MAGIC);
                                188              78 :     Assert(ndistinct->type == STATS_NDISTINCT_TYPE_BASIC);
                                189                 : 
                                190                 :     /*
                                191                 :      * Base size is size of scalar fields in the struct, plus one base struct
                                192                 :      * for each item, including number of items for each.
                                193                 :      */
 1449 tomas.vondra              194              78 :     len = VARHDRSZ + SizeOfHeader;
                                195                 : 
                                196                 :     /* and also include space for the actual attribute numbers */
 2207 alvherre                  197             330 :     for (i = 0; i < ndistinct->nitems; i++)
                                198                 :     {
                                199                 :         int         nmembers;
                                200                 : 
  744 tomas.vondra              201             252 :         nmembers = ndistinct->items[i].nattributes;
 2207 alvherre                  202             252 :         Assert(nmembers >= 2);
                                203                 : 
 1449 tomas.vondra              204             252 :         len += SizeOfItem(nmembers);
                                205                 :     }
                                206                 : 
 2207 alvherre                  207              78 :     output = (bytea *) palloc(len);
                                208              78 :     SET_VARSIZE(output, len);
                                209                 : 
                                210              78 :     tmp = VARDATA(output);
                                211                 : 
                                212                 :     /* Store the base struct values (magic, type, nitems) */
 2204                           213              78 :     memcpy(tmp, &ndistinct->magic, sizeof(uint32));
                                214              78 :     tmp += sizeof(uint32);
                                215              78 :     memcpy(tmp, &ndistinct->type, sizeof(uint32));
                                216              78 :     tmp += sizeof(uint32);
                                217              78 :     memcpy(tmp, &ndistinct->nitems, sizeof(uint32));
                                218              78 :     tmp += sizeof(uint32);
                                219                 : 
                                220                 :     /*
                                221                 :      * store number of attributes and attribute numbers for each entry
                                222                 :      */
 2207                           223             330 :     for (i = 0; i < ndistinct->nitems; i++)
                                224                 :     {
                                225             252 :         MVNDistinctItem item = ndistinct->items[i];
  744 tomas.vondra              226             252 :         int         nmembers = item.nattributes;
                                227                 : 
 2207 alvherre                  228             252 :         memcpy(tmp, &item.ndistinct, sizeof(double));
                                229             252 :         tmp += sizeof(double);
                                230             252 :         memcpy(tmp, &nmembers, sizeof(int));
                                231             252 :         tmp += sizeof(int);
                                232                 : 
  744 tomas.vondra              233             252 :         memcpy(tmp, item.attributes, sizeof(AttrNumber) * nmembers);
                                234             252 :         tmp += nmembers * sizeof(AttrNumber);
                                235                 : 
                                236                 :         /* protect against overflows */
 2207 alvherre                  237             252 :         Assert(tmp <= ((char *) output + len));
                                238                 :     }
                                239                 : 
                                240                 :     /* check we used exactly the expected space */
 1449 tomas.vondra              241              78 :     Assert(tmp == ((char *) output + len));
                                242                 : 
 2207 alvherre                  243              78 :     return output;
                                244                 : }
                                245                 : 
                                246                 : /*
                                247                 :  * statext_ndistinct_deserialize
                                248                 :  *      Read an on-disk bytea format MVNDistinct to in-memory format
                                249                 :  */
                                250                 : MVNDistinct *
                                251             213 : statext_ndistinct_deserialize(bytea *data)
                                252                 : {
                                253                 :     int         i;
                                254                 :     Size        minimum_size;
                                255                 :     MVNDistinct ndist;
                                256                 :     MVNDistinct *ndistinct;
                                257                 :     char       *tmp;
                                258                 : 
                                259             213 :     if (data == NULL)
 2207 alvherre                  260 UBC           0 :         return NULL;
                                261                 : 
                                262                 :     /* we expect at least the basic fields of MVNDistinct struct */
 1449 tomas.vondra              263 CBC         213 :     if (VARSIZE_ANY_EXHDR(data) < SizeOfHeader)
   44 peter                     264 UNC           0 :         elog(ERROR, "invalid MVNDistinct size %zu (expected at least %zu)",
                                265                 :              VARSIZE_ANY_EXHDR(data), SizeOfHeader);
                                266                 : 
                                267                 :     /* initialize pointer to the data part (skip the varlena header) */
 2207 alvherre                  268 CBC         213 :     tmp = VARDATA_ANY(data);
                                269                 : 
                                270                 :     /* read the header fields and perform basic sanity checks */
 2204                           271             213 :     memcpy(&ndist.magic, tmp, sizeof(uint32));
                                272             213 :     tmp += sizeof(uint32);
                                273             213 :     memcpy(&ndist.type, tmp, sizeof(uint32));
                                274             213 :     tmp += sizeof(uint32);
                                275             213 :     memcpy(&ndist.nitems, tmp, sizeof(uint32));
                                276             213 :     tmp += sizeof(uint32);
                                277                 : 
                                278             213 :     if (ndist.magic != STATS_NDISTINCT_MAGIC)
 1410 tomas.vondra              279 UBC           0 :         elog(ERROR, "invalid ndistinct magic %08x (expected %08x)",
                                280                 :              ndist.magic, STATS_NDISTINCT_MAGIC);
 2204 alvherre                  281 CBC         213 :     if (ndist.type != STATS_NDISTINCT_TYPE_BASIC)
 1410 tomas.vondra              282 UBC           0 :         elog(ERROR, "invalid ndistinct type %d (expected %d)",
                                283                 :              ndist.type, STATS_NDISTINCT_TYPE_BASIC);
 2204 alvherre                  284 CBC         213 :     if (ndist.nitems == 0)
 1410 tomas.vondra              285 UBC           0 :         elog(ERROR, "invalid zero-length item array in MVNDistinct");
                                286                 : 
                                287                 :     /* what minimum bytea size do we expect for those parameters */
 1449 tomas.vondra              288 CBC         213 :     minimum_size = MinSizeOfItems(ndist.nitems);
 2204 alvherre                  289             213 :     if (VARSIZE_ANY_EXHDR(data) < minimum_size)
   44 peter                     290 UNC           0 :         elog(ERROR, "invalid MVNDistinct size %zu (expected at least %zu)",
                                291                 :              VARSIZE_ANY_EXHDR(data), minimum_size);
                                292                 : 
                                293                 :     /*
                                294                 :      * Allocate space for the ndistinct items (no space for each item's
                                295                 :      * attnos: those live in bitmapsets allocated separately)
                                296                 :      */
 1449 tomas.vondra              297 CBC         213 :     ndistinct = palloc0(MAXALIGN(offsetof(MVNDistinct, items)) +
 2204 alvherre                  298             213 :                         (ndist.nitems * sizeof(MVNDistinctItem)));
                                299             213 :     ndistinct->magic = ndist.magic;
                                300             213 :     ndistinct->type = ndist.type;
                                301             213 :     ndistinct->nitems = ndist.nitems;
                                302                 : 
 2207                           303            1155 :     for (i = 0; i < ndistinct->nitems; i++)
                                304                 :     {
                                305             942 :         MVNDistinctItem *item = &ndistinct->items[i];
                                306                 : 
                                307                 :         /* ndistinct value */
                                308             942 :         memcpy(&item->ndistinct, tmp, sizeof(double));
                                309             942 :         tmp += sizeof(double);
                                310                 : 
                                311                 :         /* number of attributes */
  744 tomas.vondra              312             942 :         memcpy(&item->nattributes, tmp, sizeof(int));
 2207 alvherre                  313             942 :         tmp += sizeof(int);
  744 tomas.vondra              314             942 :         Assert((item->nattributes >= 2) && (item->nattributes <= STATS_MAX_DIMENSIONS));
                                315                 : 
                                316                 :         item->attributes
                                317             942 :             = (AttrNumber *) palloc(item->nattributes * sizeof(AttrNumber));
                                318                 : 
                                319             942 :         memcpy(item->attributes, tmp, sizeof(AttrNumber) * item->nattributes);
                                320             942 :         tmp += sizeof(AttrNumber) * item->nattributes;
                                321                 : 
                                322                 :         /* still within the bytea */
 2207 alvherre                  323             942 :         Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
                                324                 :     }
                                325                 : 
                                326                 :     /* we should have consumed the whole bytea exactly */
                                327             213 :     Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
                                328                 : 
                                329             213 :     return ndistinct;
                                330                 : }
                                331                 : 
                                332                 : /*
                                333                 :  * pg_ndistinct_in
                                334                 :  *      input routine for type pg_ndistinct
                                335                 :  *
                                336                 :  * pg_ndistinct is real enough to be a table column, but it has no
                                337                 :  * operations of its own, and disallows input (just like pg_node_tree).
                                338                 :  */
                                339                 : Datum
 2207 alvherre                  340 UBC           0 : pg_ndistinct_in(PG_FUNCTION_ARGS)
                                341                 : {
                                342               0 :     ereport(ERROR,
                                343                 :             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                                344                 :              errmsg("cannot accept a value of type %s", "pg_ndistinct")));
                                345                 : 
                                346                 :     PG_RETURN_VOID();           /* keep compiler quiet */
                                347                 : }
                                348                 : 
                                349                 : /*
                                350                 :  * pg_ndistinct
                                351                 :  *      output routine for type pg_ndistinct
                                352                 :  *
                                353                 :  * Produces a human-readable representation of the value.
                                354                 :  */
                                355                 : Datum
 2207 alvherre                  356 CBC          12 : pg_ndistinct_out(PG_FUNCTION_ARGS)
                                357                 : {
                                358              12 :     bytea      *data = PG_GETARG_BYTEA_PP(0);
                                359              12 :     MVNDistinct *ndist = statext_ndistinct_deserialize(data);
                                360                 :     int         i;
                                361                 :     StringInfoData str;
                                362                 : 
                                363              12 :     initStringInfo(&str);
 2168                           364              12 :     appendStringInfoChar(&str, '{');
                                365                 : 
 2207                           366              60 :     for (i = 0; i < ndist->nitems; i++)
                                367                 :     {
                                368                 :         int         j;
                                369              48 :         MVNDistinctItem item = ndist->items[i];
                                370                 : 
                                371              48 :         if (i > 0)
                                372              36 :             appendStringInfoString(&str, ", ");
                                373                 : 
  744 tomas.vondra              374             156 :         for (j = 0; j < item.nattributes; j++)
                                375                 :         {
                                376             108 :             AttrNumber  attnum = item.attributes[j];
                                377                 : 
                                378             108 :             appendStringInfo(&str, "%s%d", (j == 0) ? "\"" : ", ", attnum);
                                379                 :         }
 2168 alvherre                  380              48 :         appendStringInfo(&str, "\": %d", (int) item.ndistinct);
                                381                 :     }
                                382                 : 
                                383              12 :     appendStringInfoChar(&str, '}');
                                384                 : 
 2207                           385              12 :     PG_RETURN_CSTRING(str.data);
                                386                 : }
                                387                 : 
                                388                 : /*
                                389                 :  * pg_ndistinct_recv
                                390                 :  *      binary input routine for type pg_ndistinct
                                391                 :  */
                                392                 : Datum
 2207 alvherre                  393 UBC           0 : pg_ndistinct_recv(PG_FUNCTION_ARGS)
                                394                 : {
                                395               0 :     ereport(ERROR,
                                396                 :             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                                397                 :              errmsg("cannot accept a value of type %s", "pg_ndistinct")));
                                398                 : 
                                399                 :     PG_RETURN_VOID();           /* keep compiler quiet */
                                400                 : }
                                401                 : 
                                402                 : /*
                                403                 :  * pg_ndistinct_send
                                404                 :  *      binary output routine for type pg_ndistinct
                                405                 :  *
                                406                 :  * n-distinct is serialized into a bytea value, so let's send that.
                                407                 :  */
                                408                 : Datum
                                409               0 : pg_ndistinct_send(PG_FUNCTION_ARGS)
                                410                 : {
                                411               0 :     return byteasend(fcinfo);
                                412                 : }
                                413                 : 
                                414                 : /*
                                415                 :  * ndistinct_for_combination
                                416                 :  *      Estimates number of distinct values in a combination of columns.
                                417                 :  *
                                418                 :  * This uses the same ndistinct estimator as compute_scalar_stats() in
                                419                 :  * ANALYZE, i.e.,
                                420                 :  *      n*d / (n - f1 + f1*n/N)
                                421                 :  *
                                422                 :  * except that instead of values in a single column we are dealing with
                                423                 :  * combination of multiple columns.
                                424                 :  */
                                425                 : static double
  744 tomas.vondra              426 CBC         252 : ndistinct_for_combination(double totalrows, StatsBuildData *data,
                                427                 :                           int k, int *combination)
                                428                 : {
                                429                 :     int         i,
                                430                 :                 j;
                                431                 :     int         f1,
                                432                 :                 cnt,
                                433                 :                 d;
                                434                 :     bool       *isnull;
                                435                 :     Datum      *values;
                                436                 :     SortItem   *items;
                                437                 :     MultiSortSupport mss;
                                438             252 :     int         numrows = data->numrows;
                                439                 : 
 2207 alvherre                  440             252 :     mss = multi_sort_init(k);
                                441                 : 
                                442                 :     /*
                                443                 :      * In order to determine the number of distinct elements, create separate
                                444                 :      * values[]/isnull[] arrays with all the data we have, then sort them
                                445                 :      * using the specified column combination as dimensions.  We could try to
                                446                 :      * sort in place, but it'd probably be more complex and bug-prone.
                                447                 :      */
                                448             252 :     items = (SortItem *) palloc(numrows * sizeof(SortItem));
                                449             252 :     values = (Datum *) palloc0(sizeof(Datum) * numrows * k);
                                450             252 :     isnull = (bool *) palloc0(sizeof(bool) * numrows * k);
                                451                 : 
                                452          241752 :     for (i = 0; i < numrows; i++)
                                453                 :     {
                                454          241500 :         items[i].values = &values[i * k];
                                455          241500 :         items[i].isnull = &isnull[i * k];
                                456                 :     }
                                457                 : 
                                458                 :     /*
                                459                 :      * For each dimension, set up sort-support and fill in the values from the
                                460                 :      * sample data.
                                461                 :      *
                                462                 :      * We use the column data types' default sort operators and collations;
                                463                 :      * perhaps at some point it'd be worth using column-specific collations?
                                464                 :      */
                                465             846 :     for (i = 0; i < k; i++)
                                466                 :     {
                                467                 :         Oid         typid;
                                468                 :         TypeCacheEntry *type;
  744 tomas.vondra              469             594 :         Oid         collid = InvalidOid;
                                470             594 :         VacAttrStats *colstat = data->stats[combination[i]];
                                471                 : 
                                472             594 :         typid = colstat->attrtypid;
                                473             594 :         collid = colstat->attrcollid;
                                474                 : 
                                475             594 :         type = lookup_type_cache(typid, TYPECACHE_LT_OPR);
 2153 bruce                     476             594 :         if (type->lt_opr == InvalidOid) /* shouldn't happen */
 2207 alvherre                  477 UBC           0 :             elog(ERROR, "cache lookup failed for ordering operator for type %u",
                                478                 :                  typid);
                                479                 : 
                                480                 :         /* prepare the sort function for this dimension */
  744 tomas.vondra              481 CBC         594 :         multi_sort_add_dimension(mss, i, type->lt_opr, collid);
                                482                 : 
                                483                 :         /* accumulate all the data for this dimension into the arrays */
 2207 alvherre                  484          573591 :         for (j = 0; j < numrows; j++)
                                485                 :         {
  744 tomas.vondra              486          572997 :             items[j].values[i] = data->values[combination[i]][j];
                                487          572997 :             items[j].isnull[i] = data->nulls[combination[i]][j];
                                488                 :         }
                                489                 :     }
                                490                 : 
                                491                 :     /* We can sort the array now ... */
   61 peter                     492 GNC         252 :     qsort_interruptible(items, numrows, sizeof(SortItem),
                                493                 :                         multi_sort_compare, mss);
                                494                 : 
                                495                 :     /* ... and count the number of distinct combinations */
                                496                 : 
 2207 alvherre                  497 CBC         252 :     f1 = 0;
                                498             252 :     cnt = 1;
                                499             252 :     d = 1;
                                500          241500 :     for (i = 1; i < numrows; i++)
                                501                 :     {
                                502          241248 :         if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
                                503                 :         {
                                504           23343 :             if (cnt == 1)
                                505           12909 :                 f1 += 1;
                                506                 : 
                                507           23343 :             d++;
                                508           23343 :             cnt = 0;
                                509                 :         }
                                510                 : 
                                511          241248 :         cnt += 1;
                                512                 :     }
                                513                 : 
                                514             252 :     if (cnt == 1)
                                515              45 :         f1 += 1;
                                516                 : 
                                517             252 :     return estimate_ndistinct(totalrows, numrows, d, f1);
                                518                 : }
                                519                 : 
                                520                 : /* The Duj1 estimator (already used in analyze.c). */
                                521                 : static double
                                522             252 : estimate_ndistinct(double totalrows, int numrows, int d, int f1)
                                523                 : {
                                524                 :     double      numer,
                                525                 :                 denom,
                                526                 :                 ndistinct;
                                527                 : 
 2118 tgl                       528             252 :     numer = (double) numrows * (double) d;
                                529                 : 
 2207 alvherre                  530             252 :     denom = (double) (numrows - f1) +
 2118 tgl                       531             252 :         (double) f1 * (double) numrows / totalrows;
                                532                 : 
 2207 alvherre                  533             252 :     ndistinct = numer / denom;
                                534                 : 
                                535                 :     /* Clamp to sane range in case of roundoff error */
                                536             252 :     if (ndistinct < (double) d)
 2207 alvherre                  537 UBC           0 :         ndistinct = (double) d;
                                538                 : 
 2207 alvherre                  539 CBC         252 :     if (ndistinct > totalrows)
 2207 alvherre                  540 UBC           0 :         ndistinct = totalrows;
                                541                 : 
 2207 alvherre                  542 CBC         252 :     return floor(ndistinct + 0.5);
                                543                 : }
                                544                 : 
                                545                 : /*
                                546                 :  * n_choose_k
                                547                 :  *      computes binomial coefficients using an algorithm that is both
                                548                 :  *      efficient and prevents overflows
                                549                 :  */
                                550                 : static int
                                551             120 : n_choose_k(int n, int k)
                                552                 : {
                                553                 :     int         d,
                                554                 :                 r;
                                555                 : 
                                556             120 :     Assert((k > 0) && (n >= k));
                                557                 : 
                                558                 :     /* use symmetry of the binomial coefficients */
                                559             120 :     k = Min(k, n - k);
                                560                 : 
                                561             120 :     r = 1;
                                562             174 :     for (d = 1; d <= k; ++d)
                                563                 :     {
                                564              54 :         r *= n--;
                                565              54 :         r /= d;
                                566                 :     }
                                567                 : 
                                568             120 :     return r;
                                569                 : }
                                570                 : 
                                571                 : /*
                                572                 :  * num_combinations
                                573                 :  *      number of combinations, excluding single-value combinations
                                574                 :  */
                                575                 : static int
                                576              78 : num_combinations(int n)
                                577                 : {
 1096 drowley                   578              78 :     return (1 << n) - (n + 1);
                                579                 : }
                                580                 : 
                                581                 : /*
                                582                 :  * generator_init
                                583                 :  *      initialize the generator of combinations
                                584                 :  *
                                585                 :  * The generator produces combinations of K elements in the interval (0..N).
                                586                 :  * We prebuild all the combinations in this method, which is simpler than
                                587                 :  * generating them on the fly.
                                588                 :  */
                                589                 : static CombinationGenerator *
 2207 alvherre                  590             120 : generator_init(int n, int k)
                                591                 : {
                                592                 :     CombinationGenerator *state;
                                593                 : 
                                594             120 :     Assert((n >= k) && (k > 0));
                                595                 : 
                                596                 :     /* allocate the generator state as a single chunk of memory */
                                597             120 :     state = (CombinationGenerator *) palloc(sizeof(CombinationGenerator));
                                598                 : 
                                599             120 :     state->ncombinations = n_choose_k(n, k);
                                600                 : 
                                601                 :     /* pre-allocate space for all combinations */
                                602             120 :     state->combinations = (int *) palloc(sizeof(int) * k * state->ncombinations);
                                603                 : 
                                604             120 :     state->current = 0;
                                605             120 :     state->k = k;
                                606             120 :     state->n = n;
                                607                 : 
                                608                 :     /* now actually pre-generate all the combinations of K elements */
                                609             120 :     generate_combinations(state);
                                610                 : 
                                611                 :     /* make sure we got the expected number of combinations */
                                612             120 :     Assert(state->current == state->ncombinations);
                                613                 : 
                                614                 :     /* reset the number, so we start with the first one */
                                615             120 :     state->current = 0;
                                616                 : 
                                617             120 :     return state;
                                618                 : }
                                619                 : 
                                620                 : /*
                                621                 :  * generator_next
                                622                 :  *      returns the next combination from the prebuilt list
                                623                 :  *
                                624                 :  * Returns a combination of K array indexes (0 .. N), as specified to
                                625                 :  * generator_init), or NULL when there are no more combination.
                                626                 :  */
                                627                 : static int *
                                628             372 : generator_next(CombinationGenerator *state)
                                629                 : {
                                630             372 :     if (state->current == state->ncombinations)
                                631             120 :         return NULL;
                                632                 : 
                                633             252 :     return &state->combinations[state->k * state->current++];
                                634                 : }
                                635                 : 
                                636                 : /*
                                637                 :  * generator_free
                                638                 :  *      free the internal state of the generator
                                639                 :  *
                                640                 :  * Releases the generator internal state (pre-built combinations).
                                641                 :  */
                                642                 : static void
                                643             120 : generator_free(CombinationGenerator *state)
                                644                 : {
                                645             120 :     pfree(state->combinations);
                                646             120 :     pfree(state);
                                647             120 : }
                                648                 : 
                                649                 : /*
                                650                 :  * generate_combinations_recurse
                                651                 :  *      given a prefix, generate all possible combinations
                                652                 :  *
                                653                 :  * Given a prefix (first few elements of the combination), generate following
                                654                 :  * elements recursively. We generate the combinations in lexicographic order,
                                655                 :  * which eliminates permutations of the same combination.
                                656                 :  */
                                657                 : static void
                                658             966 : generate_combinations_recurse(CombinationGenerator *state,
                                659                 :                               int index, int start, int *current)
                                660                 : {
                                661                 :     /* If we haven't filled all the elements, simply recurse. */
                                662             966 :     if (index < state->k)
                                663                 :     {
                                664                 :         int         i;
                                665                 : 
                                666                 :         /*
                                667                 :          * The values have to be in ascending order, so make sure we start
                                668                 :          * with the value passed by parameter.
                                669                 :          */
                                670                 : 
                                671            1560 :         for (i = start; i < state->n; i++)
                                672                 :         {
                                673             846 :             current[index] = i;
                                674             846 :             generate_combinations_recurse(state, (index + 1), (i + 1), current);
                                675                 :         }
                                676                 : 
                                677             714 :         return;
                                678                 :     }
                                679                 :     else
                                680                 :     {
                                681                 :         /* we got a valid combination, add it to the array */
                                682             252 :         memcpy(&state->combinations[(state->k * state->current)],
                                683             252 :                current, state->k * sizeof(int));
                                684             252 :         state->current++;
                                685                 :     }
                                686                 : }
                                687                 : 
                                688                 : /*
                                689                 :  * generate_combinations
                                690                 :  *      generate all k-combinations of N elements
                                691                 :  */
                                692                 : static void
                                693             120 : generate_combinations(CombinationGenerator *state)
                                694                 : {
 2153 bruce                     695             120 :     int        *current = (int *) palloc0(sizeof(int) * state->k);
                                696                 : 
 2207 alvherre                  697             120 :     generate_combinations_recurse(state, 0, 0, current);
                                698                 : 
                                699             120 :     pfree(current);
                                700             120 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a