LCOV - differential code coverage report
Current view: top level - src/backend/statistics - mvdistinct.c (source / functions) Coverage Total Hit UNC UBC GNC CBC DUB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 92.0 % 213 196 2 15 1 195 2 1
Current Date: 2023-04-08 15:15:32 Functions: 82.4 % 17 14 3 2 12
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * mvdistinct.c
       4                 :  *    POSTGRES multivariate ndistinct coefficients
       5                 :  *
       6                 :  * Estimating number of groups in a combination of columns (e.g. for GROUP BY)
       7                 :  * is tricky, and the estimation error is often significant.
       8                 : 
       9                 :  * The multivariate ndistinct coefficients address this by storing ndistinct
      10                 :  * estimates for combinations of the user-specified columns.  So for example
      11                 :  * given a statistics object on three columns (a,b,c), this module estimates
      12                 :  * and stores n-distinct for (a,b), (a,c), (b,c) and (a,b,c).  The per-column
      13                 :  * estimates are already available in pg_statistic.
      14                 :  *
      15                 :  *
      16                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
      17                 :  * Portions Copyright (c) 1994, Regents of the University of California
      18                 :  *
      19                 :  * IDENTIFICATION
      20                 :  *    src/backend/statistics/mvdistinct.c
      21                 :  *
      22                 :  *-------------------------------------------------------------------------
      23                 :  */
      24                 : #include "postgres.h"
      25                 : 
      26                 : #include <math.h>
      27                 : 
      28                 : #include "access/htup_details.h"
      29                 : #include "catalog/pg_statistic_ext.h"
      30                 : #include "catalog/pg_statistic_ext_data.h"
      31                 : #include "lib/stringinfo.h"
      32                 : #include "statistics/extended_stats_internal.h"
      33                 : #include "statistics/statistics.h"
      34                 : #include "utils/fmgrprotos.h"
      35                 : #include "utils/lsyscache.h"
      36                 : #include "utils/syscache.h"
      37                 : #include "utils/typcache.h"
      38                 : 
      39                 : static double ndistinct_for_combination(double totalrows, StatsBuildData *data,
      40                 :                                         int k, int *combination);
      41                 : static double estimate_ndistinct(double totalrows, int numrows, int d, int f1);
      42                 : static int  n_choose_k(int n, int k);
      43                 : static int  num_combinations(int n);
      44                 : 
      45                 : /* size of the struct header fields (magic, type, nitems) */
      46                 : #define SizeOfHeader        (3 * sizeof(uint32))
      47                 : 
      48                 : /* size of a serialized ndistinct item (coefficient, natts, atts) */
      49                 : #define SizeOfItem(natts) \
      50                 :     (sizeof(double) + sizeof(int) + (natts) * sizeof(AttrNumber))
      51                 : 
      52                 : /* minimal size of a ndistinct item (with two attributes) */
      53                 : #define MinSizeOfItem   SizeOfItem(2)
      54                 : 
      55                 : /* minimal size of mvndistinct, when all items are minimal */
      56                 : #define MinSizeOfItems(nitems)  \
      57                 :     (SizeOfHeader + (nitems) * MinSizeOfItem)
      58                 : 
      59                 : /* Combination generator API */
      60                 : 
      61                 : /* internal state for generator of k-combinations of n elements */
      62                 : typedef struct CombinationGenerator
      63                 : {
      64                 :     int         k;              /* size of the combination */
      65                 :     int         n;              /* total number of elements */
      66                 :     int         current;        /* index of the next combination to return */
      67                 :     int         ncombinations;  /* number of combinations (size of array) */
      68                 :     int        *combinations;   /* array of pre-built combinations */
      69                 : } CombinationGenerator;
      70                 : 
      71                 : static CombinationGenerator *generator_init(int n, int k);
      72                 : static void generator_free(CombinationGenerator *state);
      73                 : static int *generator_next(CombinationGenerator *state);
      74                 : static void generate_combinations(CombinationGenerator *state);
      75                 : 
      76                 : 
      77                 : /*
      78                 :  * statext_ndistinct_build
      79                 :  *      Compute ndistinct coefficient for the combination of attributes.
      80                 :  *
      81                 :  * This computes the ndistinct estimate using the same estimator used
      82                 :  * in analyze.c and then computes the coefficient.
      83                 :  *
      84                 :  * To handle expressions easily, we treat them as system attributes with
      85                 :  * negative attnums, and offset everything by number of expressions to
      86                 :  * allow using Bitmapsets.
      87                 :  */
      88                 : MVNDistinct *
      89 CBC          78 : statext_ndistinct_build(double totalrows, StatsBuildData *data)
      90                 : {
      91                 :     MVNDistinct *result;
      92                 :     int         k;
      93                 :     int         itemcnt;
      94              78 :     int         numattrs = data->nattnums;
      95              78 :     int         numcombs = num_combinations(numattrs);
      96                 : 
      97              78 :     result = palloc(offsetof(MVNDistinct, items) +
      98              78 :                     numcombs * sizeof(MVNDistinctItem));
      99              78 :     result->magic = STATS_NDISTINCT_MAGIC;
     100              78 :     result->type = STATS_NDISTINCT_TYPE_BASIC;
     101              78 :     result->nitems = numcombs;
     102                 : 
     103              78 :     itemcnt = 0;
     104             198 :     for (k = 2; k <= numattrs; k++)
     105                 :     {
     106                 :         int        *combination;
     107                 :         CombinationGenerator *generator;
     108                 : 
     109                 :         /* generate combinations of K out of N elements */
     110             120 :         generator = generator_init(numattrs, k);
     111                 : 
     112             372 :         while ((combination = generator_next(generator)))
     113                 :         {
     114             252 :             MVNDistinctItem *item = &result->items[itemcnt];
     115                 :             int         j;
     116                 : 
     117             252 :             item->attributes = palloc(sizeof(AttrNumber) * k);
     118             252 :             item->nattributes = k;
     119                 : 
     120                 :             /* translate the indexes to attnums */
     121             846 :             for (j = 0; j < k; j++)
     122                 :             {
     123             594 :                 item->attributes[j] = data->attnums[combination[j]];
     124                 : 
     125             594 :                 Assert(AttributeNumberIsValid(item->attributes[j]));
     126                 :             }
     127                 : 
     128             252 :             item->ndistinct =
     129             252 :                 ndistinct_for_combination(totalrows, data, k, combination);
     130                 : 
     131             252 :             itemcnt++;
     132             252 :             Assert(itemcnt <= result->nitems);
     133                 :         }
     134                 : 
     135             120 :         generator_free(generator);
     136                 :     }
     137                 : 
     138                 :     /* must consume exactly the whole output array */
     139              78 :     Assert(itemcnt == result->nitems);
     140                 : 
     141              78 :     return result;
     142                 : }
     143                 : 
     144                 : /*
     145                 :  * statext_ndistinct_load
     146                 :  *      Load the ndistinct value for the indicated pg_statistic_ext tuple
     147                 :  */
     148                 : MVNDistinct *
     149             201 : statext_ndistinct_load(Oid mvoid, bool inh)
     150                 : {
     151                 :     MVNDistinct *result;
     152                 :     bool        isnull;
     153                 :     Datum       ndist;
     154                 :     HeapTuple   htup;
     155                 : 
     156             201 :     htup = SearchSysCache2(STATEXTDATASTXOID,
     157                 :                            ObjectIdGetDatum(mvoid), BoolGetDatum(inh));
     158             201 :     if (!HeapTupleIsValid(htup))
     159 UBC           0 :         elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
     160                 : 
     161 CBC         201 :     ndist = SysCacheGetAttr(STATEXTDATASTXOID, htup,
     162                 :                             Anum_pg_statistic_ext_data_stxdndistinct, &isnull);
     163             201 :     if (isnull)
     164 UBC           0 :         elog(ERROR,
     165                 :              "requested statistics kind \"%c\" is not yet built for statistics object %u",
     166                 :              STATS_EXT_NDISTINCT, mvoid);
     167                 : 
     168 CBC         201 :     result = statext_ndistinct_deserialize(DatumGetByteaPP(ndist));
     169                 : 
     170             201 :     ReleaseSysCache(htup);
     171                 : 
     172             201 :     return result;
     173                 : }
     174                 : 
     175                 : /*
     176                 :  * statext_ndistinct_serialize
     177                 :  *      serialize ndistinct to the on-disk bytea format
     178                 :  */
     179                 : bytea *
     180              78 : statext_ndistinct_serialize(MVNDistinct *ndistinct)
     181                 : {
     182                 :     int         i;
     183                 :     bytea      *output;
     184                 :     char       *tmp;
     185                 :     Size        len;
     186                 : 
     187              78 :     Assert(ndistinct->magic == STATS_NDISTINCT_MAGIC);
     188              78 :     Assert(ndistinct->type == STATS_NDISTINCT_TYPE_BASIC);
     189                 : 
     190                 :     /*
     191                 :      * Base size is size of scalar fields in the struct, plus one base struct
     192                 :      * for each item, including number of items for each.
     193                 :      */
     194              78 :     len = VARHDRSZ + SizeOfHeader;
     195                 : 
     196                 :     /* and also include space for the actual attribute numbers */
     197             330 :     for (i = 0; i < ndistinct->nitems; i++)
     198                 :     {
     199                 :         int         nmembers;
     200                 : 
     201             252 :         nmembers = ndistinct->items[i].nattributes;
     202             252 :         Assert(nmembers >= 2);
     203                 : 
     204             252 :         len += SizeOfItem(nmembers);
     205                 :     }
     206                 : 
     207              78 :     output = (bytea *) palloc(len);
     208              78 :     SET_VARSIZE(output, len);
     209                 : 
     210              78 :     tmp = VARDATA(output);
     211                 : 
     212                 :     /* Store the base struct values (magic, type, nitems) */
     213              78 :     memcpy(tmp, &ndistinct->magic, sizeof(uint32));
     214              78 :     tmp += sizeof(uint32);
     215              78 :     memcpy(tmp, &ndistinct->type, sizeof(uint32));
     216              78 :     tmp += sizeof(uint32);
     217              78 :     memcpy(tmp, &ndistinct->nitems, sizeof(uint32));
     218              78 :     tmp += sizeof(uint32);
     219                 : 
     220                 :     /*
     221                 :      * store number of attributes and attribute numbers for each entry
     222                 :      */
     223             330 :     for (i = 0; i < ndistinct->nitems; i++)
     224                 :     {
     225             252 :         MVNDistinctItem item = ndistinct->items[i];
     226             252 :         int         nmembers = item.nattributes;
     227                 : 
     228             252 :         memcpy(tmp, &item.ndistinct, sizeof(double));
     229             252 :         tmp += sizeof(double);
     230             252 :         memcpy(tmp, &nmembers, sizeof(int));
     231             252 :         tmp += sizeof(int);
     232                 : 
     233             252 :         memcpy(tmp, item.attributes, sizeof(AttrNumber) * nmembers);
     234             252 :         tmp += nmembers * sizeof(AttrNumber);
     235                 : 
     236                 :         /* protect against overflows */
     237             252 :         Assert(tmp <= ((char *) output + len));
     238                 :     }
     239                 : 
     240                 :     /* check we used exactly the expected space */
     241              78 :     Assert(tmp == ((char *) output + len));
     242                 : 
     243              78 :     return output;
     244                 : }
     245                 : 
     246                 : /*
     247                 :  * statext_ndistinct_deserialize
     248                 :  *      Read an on-disk bytea format MVNDistinct to in-memory format
     249                 :  */
     250                 : MVNDistinct *
     251             213 : statext_ndistinct_deserialize(bytea *data)
     252                 : {
     253                 :     int         i;
     254                 :     Size        minimum_size;
     255                 :     MVNDistinct ndist;
     256                 :     MVNDistinct *ndistinct;
     257                 :     char       *tmp;
     258                 : 
     259             213 :     if (data == NULL)
     260 UBC           0 :         return NULL;
     261                 : 
     262                 :     /* we expect at least the basic fields of MVNDistinct struct */
     263 CBC         213 :     if (VARSIZE_ANY_EXHDR(data) < SizeOfHeader)
     264 UNC           0 :         elog(ERROR, "invalid MVNDistinct size %zu (expected at least %zu)",
     265                 :              VARSIZE_ANY_EXHDR(data), SizeOfHeader);
     266                 : 
     267                 :     /* initialize pointer to the data part (skip the varlena header) */
     268 CBC         213 :     tmp = VARDATA_ANY(data);
     269                 : 
     270                 :     /* read the header fields and perform basic sanity checks */
     271             213 :     memcpy(&ndist.magic, tmp, sizeof(uint32));
     272             213 :     tmp += sizeof(uint32);
     273             213 :     memcpy(&ndist.type, tmp, sizeof(uint32));
     274             213 :     tmp += sizeof(uint32);
     275             213 :     memcpy(&ndist.nitems, tmp, sizeof(uint32));
     276             213 :     tmp += sizeof(uint32);
     277                 : 
     278             213 :     if (ndist.magic != STATS_NDISTINCT_MAGIC)
     279 UBC           0 :         elog(ERROR, "invalid ndistinct magic %08x (expected %08x)",
     280                 :              ndist.magic, STATS_NDISTINCT_MAGIC);
     281 CBC         213 :     if (ndist.type != STATS_NDISTINCT_TYPE_BASIC)
     282 UBC           0 :         elog(ERROR, "invalid ndistinct type %d (expected %d)",
     283                 :              ndist.type, STATS_NDISTINCT_TYPE_BASIC);
     284 CBC         213 :     if (ndist.nitems == 0)
     285 UBC           0 :         elog(ERROR, "invalid zero-length item array in MVNDistinct");
     286                 : 
     287                 :     /* what minimum bytea size do we expect for those parameters */
     288 CBC         213 :     minimum_size = MinSizeOfItems(ndist.nitems);
     289             213 :     if (VARSIZE_ANY_EXHDR(data) < minimum_size)
     290 UNC           0 :         elog(ERROR, "invalid MVNDistinct size %zu (expected at least %zu)",
     291                 :              VARSIZE_ANY_EXHDR(data), minimum_size);
     292                 : 
     293                 :     /*
     294                 :      * Allocate space for the ndistinct items (no space for each item's
     295                 :      * attnos: those live in bitmapsets allocated separately)
     296                 :      */
     297 CBC         213 :     ndistinct = palloc0(MAXALIGN(offsetof(MVNDistinct, items)) +
     298             213 :                         (ndist.nitems * sizeof(MVNDistinctItem)));
     299             213 :     ndistinct->magic = ndist.magic;
     300             213 :     ndistinct->type = ndist.type;
     301             213 :     ndistinct->nitems = ndist.nitems;
     302                 : 
     303            1155 :     for (i = 0; i < ndistinct->nitems; i++)
     304                 :     {
     305             942 :         MVNDistinctItem *item = &ndistinct->items[i];
     306                 : 
     307                 :         /* ndistinct value */
     308             942 :         memcpy(&item->ndistinct, tmp, sizeof(double));
     309             942 :         tmp += sizeof(double);
     310                 : 
     311                 :         /* number of attributes */
     312             942 :         memcpy(&item->nattributes, tmp, sizeof(int));
     313             942 :         tmp += sizeof(int);
     314             942 :         Assert((item->nattributes >= 2) && (item->nattributes <= STATS_MAX_DIMENSIONS));
     315                 : 
     316                 :         item->attributes
     317             942 :             = (AttrNumber *) palloc(item->nattributes * sizeof(AttrNumber));
     318                 : 
     319             942 :         memcpy(item->attributes, tmp, sizeof(AttrNumber) * item->nattributes);
     320             942 :         tmp += sizeof(AttrNumber) * item->nattributes;
     321                 : 
     322                 :         /* still within the bytea */
     323             942 :         Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
     324                 :     }
     325                 : 
     326                 :     /* we should have consumed the whole bytea exactly */
     327             213 :     Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
     328                 : 
     329             213 :     return ndistinct;
     330                 : }
     331                 : 
     332                 : /*
     333                 :  * pg_ndistinct_in
     334                 :  *      input routine for type pg_ndistinct
     335                 :  *
     336                 :  * pg_ndistinct is real enough to be a table column, but it has no
     337                 :  * operations of its own, and disallows input (just like pg_node_tree).
     338                 :  */
     339                 : Datum
     340 UBC           0 : pg_ndistinct_in(PG_FUNCTION_ARGS)
     341                 : {
     342               0 :     ereport(ERROR,
     343                 :             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     344                 :              errmsg("cannot accept a value of type %s", "pg_ndistinct")));
     345                 : 
     346                 :     PG_RETURN_VOID();           /* keep compiler quiet */
     347                 : }
     348                 : 
     349                 : /*
     350                 :  * pg_ndistinct
     351                 :  *      output routine for type pg_ndistinct
     352                 :  *
     353                 :  * Produces a human-readable representation of the value.
     354                 :  */
     355                 : Datum
     356 CBC          12 : pg_ndistinct_out(PG_FUNCTION_ARGS)
     357                 : {
     358              12 :     bytea      *data = PG_GETARG_BYTEA_PP(0);
     359              12 :     MVNDistinct *ndist = statext_ndistinct_deserialize(data);
     360                 :     int         i;
     361                 :     StringInfoData str;
     362                 : 
     363              12 :     initStringInfo(&str);
     364              12 :     appendStringInfoChar(&str, '{');
     365                 : 
     366              60 :     for (i = 0; i < ndist->nitems; i++)
     367                 :     {
     368                 :         int         j;
     369              48 :         MVNDistinctItem item = ndist->items[i];
     370                 : 
     371              48 :         if (i > 0)
     372              36 :             appendStringInfoString(&str, ", ");
     373                 : 
     374             156 :         for (j = 0; j < item.nattributes; j++)
     375                 :         {
     376             108 :             AttrNumber  attnum = item.attributes[j];
     377                 : 
     378             108 :             appendStringInfo(&str, "%s%d", (j == 0) ? "\"" : ", ", attnum);
     379                 :         }
     380              48 :         appendStringInfo(&str, "\": %d", (int) item.ndistinct);
     381                 :     }
     382                 : 
     383              12 :     appendStringInfoChar(&str, '}');
     384                 : 
     385              12 :     PG_RETURN_CSTRING(str.data);
     386                 : }
     387                 : 
     388                 : /*
     389                 :  * pg_ndistinct_recv
     390                 :  *      binary input routine for type pg_ndistinct
     391                 :  */
     392                 : Datum
     393 UBC           0 : pg_ndistinct_recv(PG_FUNCTION_ARGS)
     394                 : {
     395               0 :     ereport(ERROR,
     396                 :             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     397                 :              errmsg("cannot accept a value of type %s", "pg_ndistinct")));
     398                 : 
     399                 :     PG_RETURN_VOID();           /* keep compiler quiet */
     400                 : }
     401                 : 
     402                 : /*
     403                 :  * pg_ndistinct_send
     404                 :  *      binary output routine for type pg_ndistinct
     405                 :  *
     406                 :  * n-distinct is serialized into a bytea value, so let's send that.
     407                 :  */
     408                 : Datum
     409               0 : pg_ndistinct_send(PG_FUNCTION_ARGS)
     410                 : {
     411               0 :     return byteasend(fcinfo);
     412                 : }
     413                 : 
     414                 : /*
     415                 :  * ndistinct_for_combination
     416                 :  *      Estimates number of distinct values in a combination of columns.
     417                 :  *
     418                 :  * This uses the same ndistinct estimator as compute_scalar_stats() in
     419                 :  * ANALYZE, i.e.,
     420                 :  *      n*d / (n - f1 + f1*n/N)
     421                 :  *
     422                 :  * except that instead of values in a single column we are dealing with
     423                 :  * combination of multiple columns.
     424                 :  */
     425                 : static double
     426 CBC         252 : ndistinct_for_combination(double totalrows, StatsBuildData *data,
     427                 :                           int k, int *combination)
     428                 : {
     429                 :     int         i,
     430                 :                 j;
     431                 :     int         f1,
     432                 :                 cnt,
     433                 :                 d;
     434                 :     bool       *isnull;
     435                 :     Datum      *values;
     436                 :     SortItem   *items;
     437                 :     MultiSortSupport mss;
     438             252 :     int         numrows = data->numrows;
     439                 : 
     440             252 :     mss = multi_sort_init(k);
     441                 : 
     442                 :     /*
     443                 :      * In order to determine the number of distinct elements, create separate
     444                 :      * values[]/isnull[] arrays with all the data we have, then sort them
     445                 :      * using the specified column combination as dimensions.  We could try to
     446                 :      * sort in place, but it'd probably be more complex and bug-prone.
     447                 :      */
     448             252 :     items = (SortItem *) palloc(numrows * sizeof(SortItem));
     449             252 :     values = (Datum *) palloc0(sizeof(Datum) * numrows * k);
     450             252 :     isnull = (bool *) palloc0(sizeof(bool) * numrows * k);
     451                 : 
     452          241752 :     for (i = 0; i < numrows; i++)
     453                 :     {
     454          241500 :         items[i].values = &values[i * k];
     455          241500 :         items[i].isnull = &isnull[i * k];
     456                 :     }
     457                 : 
     458                 :     /*
     459                 :      * For each dimension, set up sort-support and fill in the values from the
     460                 :      * sample data.
     461                 :      *
     462                 :      * We use the column data types' default sort operators and collations;
     463                 :      * perhaps at some point it'd be worth using column-specific collations?
     464                 :      */
     465             846 :     for (i = 0; i < k; i++)
     466                 :     {
     467                 :         Oid         typid;
     468                 :         TypeCacheEntry *type;
     469             594 :         Oid         collid = InvalidOid;
     470             594 :         VacAttrStats *colstat = data->stats[combination[i]];
     471                 : 
     472             594 :         typid = colstat->attrtypid;
     473             594 :         collid = colstat->attrcollid;
     474                 : 
     475             594 :         type = lookup_type_cache(typid, TYPECACHE_LT_OPR);
     476             594 :         if (type->lt_opr == InvalidOid) /* shouldn't happen */
     477 UBC           0 :             elog(ERROR, "cache lookup failed for ordering operator for type %u",
     478                 :                  typid);
     479                 : 
     480                 :         /* prepare the sort function for this dimension */
     481 CBC         594 :         multi_sort_add_dimension(mss, i, type->lt_opr, collid);
     482                 : 
     483                 :         /* accumulate all the data for this dimension into the arrays */
     484          573591 :         for (j = 0; j < numrows; j++)
     485                 :         {
     486          572997 :             items[j].values[i] = data->values[combination[i]][j];
     487          572997 :             items[j].isnull[i] = data->nulls[combination[i]][j];
     488                 :         }
     489                 :     }
     490                 : 
     491                 :     /* We can sort the array now ... */
     492 GNC         252 :     qsort_interruptible(items, numrows, sizeof(SortItem),
     493                 :                         multi_sort_compare, mss);
     494                 : 
     495                 :     /* ... and count the number of distinct combinations */
     496                 : 
     497 CBC         252 :     f1 = 0;
     498             252 :     cnt = 1;
     499             252 :     d = 1;
     500          241500 :     for (i = 1; i < numrows; i++)
     501                 :     {
     502          241248 :         if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
     503                 :         {
     504           23343 :             if (cnt == 1)
     505           12909 :                 f1 += 1;
     506                 : 
     507           23343 :             d++;
     508           23343 :             cnt = 0;
     509                 :         }
     510                 : 
     511          241248 :         cnt += 1;
     512                 :     }
     513                 : 
     514             252 :     if (cnt == 1)
     515              45 :         f1 += 1;
     516                 : 
     517             252 :     return estimate_ndistinct(totalrows, numrows, d, f1);
     518                 : }
     519                 : 
     520                 : /* The Duj1 estimator (already used in analyze.c). */
     521                 : static double
     522             252 : estimate_ndistinct(double totalrows, int numrows, int d, int f1)
     523                 : {
     524                 :     double      numer,
     525                 :                 denom,
     526                 :                 ndistinct;
     527                 : 
     528             252 :     numer = (double) numrows * (double) d;
     529                 : 
     530             252 :     denom = (double) (numrows - f1) +
     531             252 :         (double) f1 * (double) numrows / totalrows;
     532                 : 
     533             252 :     ndistinct = numer / denom;
     534                 : 
     535                 :     /* Clamp to sane range in case of roundoff error */
     536             252 :     if (ndistinct < (double) d)
     537 UBC           0 :         ndistinct = (double) d;
     538                 : 
     539 CBC         252 :     if (ndistinct > totalrows)
     540 UBC           0 :         ndistinct = totalrows;
     541                 : 
     542 CBC         252 :     return floor(ndistinct + 0.5);
     543                 : }
     544                 : 
     545                 : /*
     546                 :  * n_choose_k
     547                 :  *      computes binomial coefficients using an algorithm that is both
     548                 :  *      efficient and prevents overflows
     549                 :  */
     550                 : static int
     551             120 : n_choose_k(int n, int k)
     552                 : {
     553                 :     int         d,
     554                 :                 r;
     555                 : 
     556             120 :     Assert((k > 0) && (n >= k));
     557                 : 
     558                 :     /* use symmetry of the binomial coefficients */
     559             120 :     k = Min(k, n - k);
     560                 : 
     561             120 :     r = 1;
     562             174 :     for (d = 1; d <= k; ++d)
     563                 :     {
     564              54 :         r *= n--;
     565              54 :         r /= d;
     566                 :     }
     567                 : 
     568             120 :     return r;
     569                 : }
     570                 : 
     571                 : /*
     572                 :  * num_combinations
     573                 :  *      number of combinations, excluding single-value combinations
     574                 :  */
     575                 : static int
     576              78 : num_combinations(int n)
     577                 : {
     578              78 :     return (1 << n) - (n + 1);
     579                 : }
     580                 : 
     581                 : /*
     582                 :  * generator_init
     583                 :  *      initialize the generator of combinations
     584                 :  *
     585                 :  * The generator produces combinations of K elements in the interval (0..N).
     586                 :  * We prebuild all the combinations in this method, which is simpler than
     587                 :  * generating them on the fly.
     588                 :  */
     589                 : static CombinationGenerator *
     590             120 : generator_init(int n, int k)
     591                 : {
     592                 :     CombinationGenerator *state;
     593                 : 
     594             120 :     Assert((n >= k) && (k > 0));
     595                 : 
     596                 :     /* allocate the generator state as a single chunk of memory */
     597             120 :     state = (CombinationGenerator *) palloc(sizeof(CombinationGenerator));
     598                 : 
     599             120 :     state->ncombinations = n_choose_k(n, k);
     600                 : 
     601                 :     /* pre-allocate space for all combinations */
     602             120 :     state->combinations = (int *) palloc(sizeof(int) * k * state->ncombinations);
     603                 : 
     604             120 :     state->current = 0;
     605             120 :     state->k = k;
     606             120 :     state->n = n;
     607                 : 
     608                 :     /* now actually pre-generate all the combinations of K elements */
     609             120 :     generate_combinations(state);
     610                 : 
     611                 :     /* make sure we got the expected number of combinations */
     612             120 :     Assert(state->current == state->ncombinations);
     613                 : 
     614                 :     /* reset the number, so we start with the first one */
     615             120 :     state->current = 0;
     616                 : 
     617             120 :     return state;
     618                 : }
     619                 : 
     620                 : /*
     621                 :  * generator_next
     622                 :  *      returns the next combination from the prebuilt list
     623                 :  *
     624                 :  * Returns a combination of K array indexes (0 .. N), as specified to
     625                 :  * generator_init), or NULL when there are no more combination.
     626                 :  */
     627                 : static int *
     628             372 : generator_next(CombinationGenerator *state)
     629                 : {
     630             372 :     if (state->current == state->ncombinations)
     631             120 :         return NULL;
     632                 : 
     633             252 :     return &state->combinations[state->k * state->current++];
     634                 : }
     635                 : 
     636                 : /*
     637                 :  * generator_free
     638                 :  *      free the internal state of the generator
     639                 :  *
     640                 :  * Releases the generator internal state (pre-built combinations).
     641                 :  */
     642                 : static void
     643             120 : generator_free(CombinationGenerator *state)
     644                 : {
     645             120 :     pfree(state->combinations);
     646             120 :     pfree(state);
     647             120 : }
     648                 : 
     649                 : /*
     650                 :  * generate_combinations_recurse
     651                 :  *      given a prefix, generate all possible combinations
     652                 :  *
     653                 :  * Given a prefix (first few elements of the combination), generate following
     654                 :  * elements recursively. We generate the combinations in lexicographic order,
     655                 :  * which eliminates permutations of the same combination.
     656                 :  */
     657                 : static void
     658             966 : generate_combinations_recurse(CombinationGenerator *state,
     659                 :                               int index, int start, int *current)
     660                 : {
     661                 :     /* If we haven't filled all the elements, simply recurse. */
     662             966 :     if (index < state->k)
     663                 :     {
     664                 :         int         i;
     665                 : 
     666                 :         /*
     667                 :          * The values have to be in ascending order, so make sure we start
     668                 :          * with the value passed by parameter.
     669                 :          */
     670                 : 
     671            1560 :         for (i = start; i < state->n; i++)
     672                 :         {
     673             846 :             current[index] = i;
     674             846 :             generate_combinations_recurse(state, (index + 1), (i + 1), current);
     675                 :         }
     676                 : 
     677             714 :         return;
     678                 :     }
     679                 :     else
     680                 :     {
     681                 :         /* we got a valid combination, add it to the array */
     682             252 :         memcpy(&state->combinations[(state->k * state->current)],
     683             252 :                current, state->k * sizeof(int));
     684             252 :         state->current++;
     685                 :     }
     686                 : }
     687                 : 
     688                 : /*
     689                 :  * generate_combinations
     690                 :  *      generate all k-combinations of N elements
     691                 :  */
     692                 : static void
     693             120 : generate_combinations(CombinationGenerator *state)
     694                 : {
     695             120 :     int        *current = (int *) palloc0(sizeof(int) * state->k);
     696                 : 
     697             120 :     generate_combinations_recurse(state, 0, 0, current);
     698                 : 
     699             120 :     pfree(current);
     700             120 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a