Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * mvdistinct.c
4 : * POSTGRES multivariate ndistinct coefficients
5 : *
6 : * Estimating number of groups in a combination of columns (e.g. for GROUP BY)
7 : * is tricky, and the estimation error is often significant.
8 :
9 : * The multivariate ndistinct coefficients address this by storing ndistinct
10 : * estimates for combinations of the user-specified columns. So for example
11 : * given a statistics object on three columns (a,b,c), this module estimates
12 : * and stores n-distinct for (a,b), (a,c), (b,c) and (a,b,c). The per-column
13 : * estimates are already available in pg_statistic.
14 : *
15 : *
16 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
17 : * Portions Copyright (c) 1994, Regents of the University of California
18 : *
19 : * IDENTIFICATION
20 : * src/backend/statistics/mvdistinct.c
21 : *
22 : *-------------------------------------------------------------------------
23 : */
24 : #include "postgres.h"
25 :
26 : #include <math.h>
27 :
28 : #include "access/htup_details.h"
29 : #include "catalog/pg_statistic_ext.h"
30 : #include "catalog/pg_statistic_ext_data.h"
31 : #include "lib/stringinfo.h"
32 : #include "statistics/extended_stats_internal.h"
33 : #include "statistics/statistics.h"
34 : #include "utils/fmgrprotos.h"
35 : #include "utils/lsyscache.h"
36 : #include "utils/syscache.h"
37 : #include "utils/typcache.h"
38 :
39 : static double ndistinct_for_combination(double totalrows, StatsBuildData *data,
40 : int k, int *combination);
41 : static double estimate_ndistinct(double totalrows, int numrows, int d, int f1);
42 : static int n_choose_k(int n, int k);
43 : static int num_combinations(int n);
44 :
45 : /* size of the struct header fields (magic, type, nitems) */
46 : #define SizeOfHeader (3 * sizeof(uint32))
47 :
48 : /* size of a serialized ndistinct item (coefficient, natts, atts) */
49 : #define SizeOfItem(natts) \
50 : (sizeof(double) + sizeof(int) + (natts) * sizeof(AttrNumber))
51 :
52 : /* minimal size of a ndistinct item (with two attributes) */
53 : #define MinSizeOfItem SizeOfItem(2)
54 :
55 : /* minimal size of mvndistinct, when all items are minimal */
56 : #define MinSizeOfItems(nitems) \
57 : (SizeOfHeader + (nitems) * MinSizeOfItem)
58 :
59 : /* Combination generator API */
60 :
61 : /* internal state for generator of k-combinations of n elements */
62 : typedef struct CombinationGenerator
63 : {
64 : int k; /* size of the combination */
65 : int n; /* total number of elements */
66 : int current; /* index of the next combination to return */
67 : int ncombinations; /* number of combinations (size of array) */
68 : int *combinations; /* array of pre-built combinations */
69 : } CombinationGenerator;
70 :
71 : static CombinationGenerator *generator_init(int n, int k);
72 : static void generator_free(CombinationGenerator *state);
73 : static int *generator_next(CombinationGenerator *state);
74 : static void generate_combinations(CombinationGenerator *state);
75 :
76 :
77 : /*
78 : * statext_ndistinct_build
79 : * Compute ndistinct coefficient for the combination of attributes.
80 : *
81 : * This computes the ndistinct estimate using the same estimator used
82 : * in analyze.c and then computes the coefficient.
83 : *
84 : * To handle expressions easily, we treat them as system attributes with
85 : * negative attnums, and offset everything by number of expressions to
86 : * allow using Bitmapsets.
87 : */
88 : MVNDistinct *
744 tomas.vondra 89 CBC 78 : statext_ndistinct_build(double totalrows, StatsBuildData *data)
90 : {
91 : MVNDistinct *result;
92 : int k;
93 : int itemcnt;
94 78 : int numattrs = data->nattnums;
2207 alvherre 95 78 : int numcombs = num_combinations(numattrs);
96 :
97 78 : result = palloc(offsetof(MVNDistinct, items) +
98 78 : numcombs * sizeof(MVNDistinctItem));
99 78 : result->magic = STATS_NDISTINCT_MAGIC;
100 78 : result->type = STATS_NDISTINCT_TYPE_BASIC;
101 78 : result->nitems = numcombs;
102 :
103 78 : itemcnt = 0;
104 198 : for (k = 2; k <= numattrs; k++)
105 : {
106 : int *combination;
107 : CombinationGenerator *generator;
108 :
109 : /* generate combinations of K out of N elements */
110 120 : generator = generator_init(numattrs, k);
111 :
112 372 : while ((combination = generator_next(generator)))
113 : {
114 252 : MVNDistinctItem *item = &result->items[itemcnt];
115 : int j;
116 :
744 tomas.vondra 117 252 : item->attributes = palloc(sizeof(AttrNumber) * k);
118 252 : item->nattributes = k;
119 :
120 : /* translate the indexes to attnums */
2207 alvherre 121 846 : for (j = 0; j < k; j++)
122 : {
744 tomas.vondra 123 594 : item->attributes[j] = data->attnums[combination[j]];
124 :
125 594 : Assert(AttributeNumberIsValid(item->attributes[j]));
126 : }
127 :
2207 alvherre 128 252 : item->ndistinct =
744 tomas.vondra 129 252 : ndistinct_for_combination(totalrows, data, k, combination);
130 :
2207 alvherre 131 252 : itemcnt++;
132 252 : Assert(itemcnt <= result->nitems);
133 : }
134 :
135 120 : generator_free(generator);
136 : }
137 :
138 : /* must consume exactly the whole output array */
139 78 : Assert(itemcnt == result->nitems);
140 :
141 78 : return result;
142 : }
143 :
144 : /*
145 : * statext_ndistinct_load
146 : * Load the ndistinct value for the indicated pg_statistic_ext tuple
147 : */
148 : MVNDistinct *
448 tomas.vondra 149 201 : statext_ndistinct_load(Oid mvoid, bool inh)
150 : {
151 : MVNDistinct *result;
152 : bool isnull;
153 : Datum ndist;
154 : HeapTuple htup;
155 :
156 201 : htup = SearchSysCache2(STATEXTDATASTXOID,
157 : ObjectIdGetDatum(mvoid), BoolGetDatum(inh));
1803 tgl 158 201 : if (!HeapTupleIsValid(htup))
2156 tgl 159 UBC 0 : elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
160 :
1396 tomas.vondra 161 CBC 201 : ndist = SysCacheGetAttr(STATEXTDATASTXOID, htup,
162 : Anum_pg_statistic_ext_data_stxdndistinct, &isnull);
2207 alvherre 163 201 : if (isnull)
2207 alvherre 164 UBC 0 : elog(ERROR,
165 : "requested statistics kind \"%c\" is not yet built for statistics object %u",
166 : STATS_EXT_NDISTINCT, mvoid);
167 :
1803 tgl 168 CBC 201 : result = statext_ndistinct_deserialize(DatumGetByteaPP(ndist));
169 :
2207 alvherre 170 201 : ReleaseSysCache(htup);
171 :
1803 tgl 172 201 : return result;
173 : }
174 :
175 : /*
176 : * statext_ndistinct_serialize
177 : * serialize ndistinct to the on-disk bytea format
178 : */
179 : bytea *
2207 alvherre 180 78 : statext_ndistinct_serialize(MVNDistinct *ndistinct)
181 : {
182 : int i;
183 : bytea *output;
184 : char *tmp;
185 : Size len;
186 :
187 78 : Assert(ndistinct->magic == STATS_NDISTINCT_MAGIC);
188 78 : Assert(ndistinct->type == STATS_NDISTINCT_TYPE_BASIC);
189 :
190 : /*
191 : * Base size is size of scalar fields in the struct, plus one base struct
192 : * for each item, including number of items for each.
193 : */
1449 tomas.vondra 194 78 : len = VARHDRSZ + SizeOfHeader;
195 :
196 : /* and also include space for the actual attribute numbers */
2207 alvherre 197 330 : for (i = 0; i < ndistinct->nitems; i++)
198 : {
199 : int nmembers;
200 :
744 tomas.vondra 201 252 : nmembers = ndistinct->items[i].nattributes;
2207 alvherre 202 252 : Assert(nmembers >= 2);
203 :
1449 tomas.vondra 204 252 : len += SizeOfItem(nmembers);
205 : }
206 :
2207 alvherre 207 78 : output = (bytea *) palloc(len);
208 78 : SET_VARSIZE(output, len);
209 :
210 78 : tmp = VARDATA(output);
211 :
212 : /* Store the base struct values (magic, type, nitems) */
2204 213 78 : memcpy(tmp, &ndistinct->magic, sizeof(uint32));
214 78 : tmp += sizeof(uint32);
215 78 : memcpy(tmp, &ndistinct->type, sizeof(uint32));
216 78 : tmp += sizeof(uint32);
217 78 : memcpy(tmp, &ndistinct->nitems, sizeof(uint32));
218 78 : tmp += sizeof(uint32);
219 :
220 : /*
221 : * store number of attributes and attribute numbers for each entry
222 : */
2207 223 330 : for (i = 0; i < ndistinct->nitems; i++)
224 : {
225 252 : MVNDistinctItem item = ndistinct->items[i];
744 tomas.vondra 226 252 : int nmembers = item.nattributes;
227 :
2207 alvherre 228 252 : memcpy(tmp, &item.ndistinct, sizeof(double));
229 252 : tmp += sizeof(double);
230 252 : memcpy(tmp, &nmembers, sizeof(int));
231 252 : tmp += sizeof(int);
232 :
744 tomas.vondra 233 252 : memcpy(tmp, item.attributes, sizeof(AttrNumber) * nmembers);
234 252 : tmp += nmembers * sizeof(AttrNumber);
235 :
236 : /* protect against overflows */
2207 alvherre 237 252 : Assert(tmp <= ((char *) output + len));
238 : }
239 :
240 : /* check we used exactly the expected space */
1449 tomas.vondra 241 78 : Assert(tmp == ((char *) output + len));
242 :
2207 alvherre 243 78 : return output;
244 : }
245 :
246 : /*
247 : * statext_ndistinct_deserialize
248 : * Read an on-disk bytea format MVNDistinct to in-memory format
249 : */
250 : MVNDistinct *
251 213 : statext_ndistinct_deserialize(bytea *data)
252 : {
253 : int i;
254 : Size minimum_size;
255 : MVNDistinct ndist;
256 : MVNDistinct *ndistinct;
257 : char *tmp;
258 :
259 213 : if (data == NULL)
2207 alvherre 260 UBC 0 : return NULL;
261 :
262 : /* we expect at least the basic fields of MVNDistinct struct */
1449 tomas.vondra 263 CBC 213 : if (VARSIZE_ANY_EXHDR(data) < SizeOfHeader)
44 peter 264 UNC 0 : elog(ERROR, "invalid MVNDistinct size %zu (expected at least %zu)",
265 : VARSIZE_ANY_EXHDR(data), SizeOfHeader);
266 :
267 : /* initialize pointer to the data part (skip the varlena header) */
2207 alvherre 268 CBC 213 : tmp = VARDATA_ANY(data);
269 :
270 : /* read the header fields and perform basic sanity checks */
2204 271 213 : memcpy(&ndist.magic, tmp, sizeof(uint32));
272 213 : tmp += sizeof(uint32);
273 213 : memcpy(&ndist.type, tmp, sizeof(uint32));
274 213 : tmp += sizeof(uint32);
275 213 : memcpy(&ndist.nitems, tmp, sizeof(uint32));
276 213 : tmp += sizeof(uint32);
277 :
278 213 : if (ndist.magic != STATS_NDISTINCT_MAGIC)
1410 tomas.vondra 279 UBC 0 : elog(ERROR, "invalid ndistinct magic %08x (expected %08x)",
280 : ndist.magic, STATS_NDISTINCT_MAGIC);
2204 alvherre 281 CBC 213 : if (ndist.type != STATS_NDISTINCT_TYPE_BASIC)
1410 tomas.vondra 282 UBC 0 : elog(ERROR, "invalid ndistinct type %d (expected %d)",
283 : ndist.type, STATS_NDISTINCT_TYPE_BASIC);
2204 alvherre 284 CBC 213 : if (ndist.nitems == 0)
1410 tomas.vondra 285 UBC 0 : elog(ERROR, "invalid zero-length item array in MVNDistinct");
286 :
287 : /* what minimum bytea size do we expect for those parameters */
1449 tomas.vondra 288 CBC 213 : minimum_size = MinSizeOfItems(ndist.nitems);
2204 alvherre 289 213 : if (VARSIZE_ANY_EXHDR(data) < minimum_size)
44 peter 290 UNC 0 : elog(ERROR, "invalid MVNDistinct size %zu (expected at least %zu)",
291 : VARSIZE_ANY_EXHDR(data), minimum_size);
292 :
293 : /*
294 : * Allocate space for the ndistinct items (no space for each item's
295 : * attnos: those live in bitmapsets allocated separately)
296 : */
1449 tomas.vondra 297 CBC 213 : ndistinct = palloc0(MAXALIGN(offsetof(MVNDistinct, items)) +
2204 alvherre 298 213 : (ndist.nitems * sizeof(MVNDistinctItem)));
299 213 : ndistinct->magic = ndist.magic;
300 213 : ndistinct->type = ndist.type;
301 213 : ndistinct->nitems = ndist.nitems;
302 :
2207 303 1155 : for (i = 0; i < ndistinct->nitems; i++)
304 : {
305 942 : MVNDistinctItem *item = &ndistinct->items[i];
306 :
307 : /* ndistinct value */
308 942 : memcpy(&item->ndistinct, tmp, sizeof(double));
309 942 : tmp += sizeof(double);
310 :
311 : /* number of attributes */
744 tomas.vondra 312 942 : memcpy(&item->nattributes, tmp, sizeof(int));
2207 alvherre 313 942 : tmp += sizeof(int);
744 tomas.vondra 314 942 : Assert((item->nattributes >= 2) && (item->nattributes <= STATS_MAX_DIMENSIONS));
315 :
316 : item->attributes
317 942 : = (AttrNumber *) palloc(item->nattributes * sizeof(AttrNumber));
318 :
319 942 : memcpy(item->attributes, tmp, sizeof(AttrNumber) * item->nattributes);
320 942 : tmp += sizeof(AttrNumber) * item->nattributes;
321 :
322 : /* still within the bytea */
2207 alvherre 323 942 : Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
324 : }
325 :
326 : /* we should have consumed the whole bytea exactly */
327 213 : Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
328 :
329 213 : return ndistinct;
330 : }
331 :
332 : /*
333 : * pg_ndistinct_in
334 : * input routine for type pg_ndistinct
335 : *
336 : * pg_ndistinct is real enough to be a table column, but it has no
337 : * operations of its own, and disallows input (just like pg_node_tree).
338 : */
339 : Datum
2207 alvherre 340 UBC 0 : pg_ndistinct_in(PG_FUNCTION_ARGS)
341 : {
342 0 : ereport(ERROR,
343 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
344 : errmsg("cannot accept a value of type %s", "pg_ndistinct")));
345 :
346 : PG_RETURN_VOID(); /* keep compiler quiet */
347 : }
348 :
349 : /*
350 : * pg_ndistinct
351 : * output routine for type pg_ndistinct
352 : *
353 : * Produces a human-readable representation of the value.
354 : */
355 : Datum
2207 alvherre 356 CBC 12 : pg_ndistinct_out(PG_FUNCTION_ARGS)
357 : {
358 12 : bytea *data = PG_GETARG_BYTEA_PP(0);
359 12 : MVNDistinct *ndist = statext_ndistinct_deserialize(data);
360 : int i;
361 : StringInfoData str;
362 :
363 12 : initStringInfo(&str);
2168 364 12 : appendStringInfoChar(&str, '{');
365 :
2207 366 60 : for (i = 0; i < ndist->nitems; i++)
367 : {
368 : int j;
369 48 : MVNDistinctItem item = ndist->items[i];
370 :
371 48 : if (i > 0)
372 36 : appendStringInfoString(&str, ", ");
373 :
744 tomas.vondra 374 156 : for (j = 0; j < item.nattributes; j++)
375 : {
376 108 : AttrNumber attnum = item.attributes[j];
377 :
378 108 : appendStringInfo(&str, "%s%d", (j == 0) ? "\"" : ", ", attnum);
379 : }
2168 alvherre 380 48 : appendStringInfo(&str, "\": %d", (int) item.ndistinct);
381 : }
382 :
383 12 : appendStringInfoChar(&str, '}');
384 :
2207 385 12 : PG_RETURN_CSTRING(str.data);
386 : }
387 :
388 : /*
389 : * pg_ndistinct_recv
390 : * binary input routine for type pg_ndistinct
391 : */
392 : Datum
2207 alvherre 393 UBC 0 : pg_ndistinct_recv(PG_FUNCTION_ARGS)
394 : {
395 0 : ereport(ERROR,
396 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
397 : errmsg("cannot accept a value of type %s", "pg_ndistinct")));
398 :
399 : PG_RETURN_VOID(); /* keep compiler quiet */
400 : }
401 :
402 : /*
403 : * pg_ndistinct_send
404 : * binary output routine for type pg_ndistinct
405 : *
406 : * n-distinct is serialized into a bytea value, so let's send that.
407 : */
408 : Datum
409 0 : pg_ndistinct_send(PG_FUNCTION_ARGS)
410 : {
411 0 : return byteasend(fcinfo);
412 : }
413 :
414 : /*
415 : * ndistinct_for_combination
416 : * Estimates number of distinct values in a combination of columns.
417 : *
418 : * This uses the same ndistinct estimator as compute_scalar_stats() in
419 : * ANALYZE, i.e.,
420 : * n*d / (n - f1 + f1*n/N)
421 : *
422 : * except that instead of values in a single column we are dealing with
423 : * combination of multiple columns.
424 : */
425 : static double
744 tomas.vondra 426 CBC 252 : ndistinct_for_combination(double totalrows, StatsBuildData *data,
427 : int k, int *combination)
428 : {
429 : int i,
430 : j;
431 : int f1,
432 : cnt,
433 : d;
434 : bool *isnull;
435 : Datum *values;
436 : SortItem *items;
437 : MultiSortSupport mss;
438 252 : int numrows = data->numrows;
439 :
2207 alvherre 440 252 : mss = multi_sort_init(k);
441 :
442 : /*
443 : * In order to determine the number of distinct elements, create separate
444 : * values[]/isnull[] arrays with all the data we have, then sort them
445 : * using the specified column combination as dimensions. We could try to
446 : * sort in place, but it'd probably be more complex and bug-prone.
447 : */
448 252 : items = (SortItem *) palloc(numrows * sizeof(SortItem));
449 252 : values = (Datum *) palloc0(sizeof(Datum) * numrows * k);
450 252 : isnull = (bool *) palloc0(sizeof(bool) * numrows * k);
451 :
452 241752 : for (i = 0; i < numrows; i++)
453 : {
454 241500 : items[i].values = &values[i * k];
455 241500 : items[i].isnull = &isnull[i * k];
456 : }
457 :
458 : /*
459 : * For each dimension, set up sort-support and fill in the values from the
460 : * sample data.
461 : *
462 : * We use the column data types' default sort operators and collations;
463 : * perhaps at some point it'd be worth using column-specific collations?
464 : */
465 846 : for (i = 0; i < k; i++)
466 : {
467 : Oid typid;
468 : TypeCacheEntry *type;
744 tomas.vondra 469 594 : Oid collid = InvalidOid;
470 594 : VacAttrStats *colstat = data->stats[combination[i]];
471 :
472 594 : typid = colstat->attrtypid;
473 594 : collid = colstat->attrcollid;
474 :
475 594 : type = lookup_type_cache(typid, TYPECACHE_LT_OPR);
2153 bruce 476 594 : if (type->lt_opr == InvalidOid) /* shouldn't happen */
2207 alvherre 477 UBC 0 : elog(ERROR, "cache lookup failed for ordering operator for type %u",
478 : typid);
479 :
480 : /* prepare the sort function for this dimension */
744 tomas.vondra 481 CBC 594 : multi_sort_add_dimension(mss, i, type->lt_opr, collid);
482 :
483 : /* accumulate all the data for this dimension into the arrays */
2207 alvherre 484 573591 : for (j = 0; j < numrows; j++)
485 : {
744 tomas.vondra 486 572997 : items[j].values[i] = data->values[combination[i]][j];
487 572997 : items[j].isnull[i] = data->nulls[combination[i]][j];
488 : }
489 : }
490 :
491 : /* We can sort the array now ... */
61 peter 492 GNC 252 : qsort_interruptible(items, numrows, sizeof(SortItem),
493 : multi_sort_compare, mss);
494 :
495 : /* ... and count the number of distinct combinations */
496 :
2207 alvherre 497 CBC 252 : f1 = 0;
498 252 : cnt = 1;
499 252 : d = 1;
500 241500 : for (i = 1; i < numrows; i++)
501 : {
502 241248 : if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
503 : {
504 23343 : if (cnt == 1)
505 12909 : f1 += 1;
506 :
507 23343 : d++;
508 23343 : cnt = 0;
509 : }
510 :
511 241248 : cnt += 1;
512 : }
513 :
514 252 : if (cnt == 1)
515 45 : f1 += 1;
516 :
517 252 : return estimate_ndistinct(totalrows, numrows, d, f1);
518 : }
519 :
520 : /* The Duj1 estimator (already used in analyze.c). */
521 : static double
522 252 : estimate_ndistinct(double totalrows, int numrows, int d, int f1)
523 : {
524 : double numer,
525 : denom,
526 : ndistinct;
527 :
2118 tgl 528 252 : numer = (double) numrows * (double) d;
529 :
2207 alvherre 530 252 : denom = (double) (numrows - f1) +
2118 tgl 531 252 : (double) f1 * (double) numrows / totalrows;
532 :
2207 alvherre 533 252 : ndistinct = numer / denom;
534 :
535 : /* Clamp to sane range in case of roundoff error */
536 252 : if (ndistinct < (double) d)
2207 alvherre 537 UBC 0 : ndistinct = (double) d;
538 :
2207 alvherre 539 CBC 252 : if (ndistinct > totalrows)
2207 alvherre 540 UBC 0 : ndistinct = totalrows;
541 :
2207 alvherre 542 CBC 252 : return floor(ndistinct + 0.5);
543 : }
544 :
545 : /*
546 : * n_choose_k
547 : * computes binomial coefficients using an algorithm that is both
548 : * efficient and prevents overflows
549 : */
550 : static int
551 120 : n_choose_k(int n, int k)
552 : {
553 : int d,
554 : r;
555 :
556 120 : Assert((k > 0) && (n >= k));
557 :
558 : /* use symmetry of the binomial coefficients */
559 120 : k = Min(k, n - k);
560 :
561 120 : r = 1;
562 174 : for (d = 1; d <= k; ++d)
563 : {
564 54 : r *= n--;
565 54 : r /= d;
566 : }
567 :
568 120 : return r;
569 : }
570 :
571 : /*
572 : * num_combinations
573 : * number of combinations, excluding single-value combinations
574 : */
575 : static int
576 78 : num_combinations(int n)
577 : {
1096 drowley 578 78 : return (1 << n) - (n + 1);
579 : }
580 :
581 : /*
582 : * generator_init
583 : * initialize the generator of combinations
584 : *
585 : * The generator produces combinations of K elements in the interval (0..N).
586 : * We prebuild all the combinations in this method, which is simpler than
587 : * generating them on the fly.
588 : */
589 : static CombinationGenerator *
2207 alvherre 590 120 : generator_init(int n, int k)
591 : {
592 : CombinationGenerator *state;
593 :
594 120 : Assert((n >= k) && (k > 0));
595 :
596 : /* allocate the generator state as a single chunk of memory */
597 120 : state = (CombinationGenerator *) palloc(sizeof(CombinationGenerator));
598 :
599 120 : state->ncombinations = n_choose_k(n, k);
600 :
601 : /* pre-allocate space for all combinations */
602 120 : state->combinations = (int *) palloc(sizeof(int) * k * state->ncombinations);
603 :
604 120 : state->current = 0;
605 120 : state->k = k;
606 120 : state->n = n;
607 :
608 : /* now actually pre-generate all the combinations of K elements */
609 120 : generate_combinations(state);
610 :
611 : /* make sure we got the expected number of combinations */
612 120 : Assert(state->current == state->ncombinations);
613 :
614 : /* reset the number, so we start with the first one */
615 120 : state->current = 0;
616 :
617 120 : return state;
618 : }
619 :
620 : /*
621 : * generator_next
622 : * returns the next combination from the prebuilt list
623 : *
624 : * Returns a combination of K array indexes (0 .. N), as specified to
625 : * generator_init), or NULL when there are no more combination.
626 : */
627 : static int *
628 372 : generator_next(CombinationGenerator *state)
629 : {
630 372 : if (state->current == state->ncombinations)
631 120 : return NULL;
632 :
633 252 : return &state->combinations[state->k * state->current++];
634 : }
635 :
636 : /*
637 : * generator_free
638 : * free the internal state of the generator
639 : *
640 : * Releases the generator internal state (pre-built combinations).
641 : */
642 : static void
643 120 : generator_free(CombinationGenerator *state)
644 : {
645 120 : pfree(state->combinations);
646 120 : pfree(state);
647 120 : }
648 :
649 : /*
650 : * generate_combinations_recurse
651 : * given a prefix, generate all possible combinations
652 : *
653 : * Given a prefix (first few elements of the combination), generate following
654 : * elements recursively. We generate the combinations in lexicographic order,
655 : * which eliminates permutations of the same combination.
656 : */
657 : static void
658 966 : generate_combinations_recurse(CombinationGenerator *state,
659 : int index, int start, int *current)
660 : {
661 : /* If we haven't filled all the elements, simply recurse. */
662 966 : if (index < state->k)
663 : {
664 : int i;
665 :
666 : /*
667 : * The values have to be in ascending order, so make sure we start
668 : * with the value passed by parameter.
669 : */
670 :
671 1560 : for (i = start; i < state->n; i++)
672 : {
673 846 : current[index] = i;
674 846 : generate_combinations_recurse(state, (index + 1), (i + 1), current);
675 : }
676 :
677 714 : return;
678 : }
679 : else
680 : {
681 : /* we got a valid combination, add it to the array */
682 252 : memcpy(&state->combinations[(state->k * state->current)],
683 252 : current, state->k * sizeof(int));
684 252 : state->current++;
685 : }
686 : }
687 :
688 : /*
689 : * generate_combinations
690 : * generate all k-combinations of N elements
691 : */
692 : static void
693 120 : generate_combinations(CombinationGenerator *state)
694 : {
2153 bruce 695 120 : int *current = (int *) palloc0(sizeof(int) * state->k);
696 :
2207 alvherre 697 120 : generate_combinations_recurse(state, 0, 0, current);
698 :
699 120 : pfree(current);
700 120 : }
|