TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * bitmapset.c
4 : * PostgreSQL generic bitmap set package
5 : *
6 : * A bitmap set can represent any set of nonnegative integers, although
7 : * it is mainly intended for sets where the maximum value is not large,
8 : * say at most a few hundred. By convention, we always represent the
9 : * empty set by a NULL pointer.
10 : *
11 : *
12 : * Copyright (c) 2003-2023, PostgreSQL Global Development Group
13 : *
14 : * IDENTIFICATION
15 : * src/backend/nodes/bitmapset.c
16 : *
17 : *-------------------------------------------------------------------------
18 : */
19 : #include "postgres.h"
20 :
21 : #include "common/hashfn.h"
22 : #include "nodes/bitmapset.h"
23 : #include "nodes/pg_list.h"
24 : #include "port/pg_bitutils.h"
25 :
26 :
27 : #define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD)
28 : #define BITNUM(x) ((x) % BITS_PER_BITMAPWORD)
29 :
30 : #define BITMAPSET_SIZE(nwords) \
31 : (offsetof(Bitmapset, words) + (nwords) * sizeof(bitmapword))
32 :
33 : /*----------
34 : * This is a well-known cute trick for isolating the rightmost one-bit
35 : * in a word. It assumes two's complement arithmetic. Consider any
36 : * nonzero value, and focus attention on the rightmost one. The value is
37 : * then something like
38 : * xxxxxx10000
39 : * where x's are unspecified bits. The two's complement negative is formed
40 : * by inverting all the bits and adding one. Inversion gives
41 : * yyyyyy01111
42 : * where each y is the inverse of the corresponding x. Incrementing gives
43 : * yyyyyy10000
44 : * and then ANDing with the original value gives
45 : * 00000010000
46 : * This works for all cases except original value = zero, where of course
47 : * we get zero.
48 : *----------
49 : */
50 : #define RIGHTMOST_ONE(x) ((signedbitmapword) (x) & -((signedbitmapword) (x)))
51 :
52 : #define HAS_MULTIPLE_ONES(x) ((bitmapword) RIGHTMOST_ONE(x) != (x))
53 :
54 : /* Select appropriate bit-twiddling functions for bitmap word size */
55 : #if BITS_PER_BITMAPWORD == 32
56 : #define bmw_leftmost_one_pos(w) pg_leftmost_one_pos32(w)
57 : #define bmw_rightmost_one_pos(w) pg_rightmost_one_pos32(w)
58 : #define bmw_popcount(w) pg_popcount32(w)
59 : #elif BITS_PER_BITMAPWORD == 64
60 : #define bmw_leftmost_one_pos(w) pg_leftmost_one_pos64(w)
61 : #define bmw_rightmost_one_pos(w) pg_rightmost_one_pos64(w)
62 : #define bmw_popcount(w) pg_popcount64(w)
63 : #else
64 : #error "invalid BITS_PER_BITMAPWORD"
65 : #endif
66 :
67 : static bool bms_is_empty_internal(const Bitmapset *a);
68 :
69 :
70 : /*
71 : * bms_copy - make a palloc'd copy of a bitmapset
72 : */
73 : Bitmapset *
74 CBC 13733411 : bms_copy(const Bitmapset *a)
75 : {
76 : Bitmapset *result;
77 : size_t size;
78 :
79 13733411 : if (a == NULL)
80 7055322 : return NULL;
81 6678089 : size = BITMAPSET_SIZE(a->nwords);
82 6678089 : result = (Bitmapset *) palloc(size);
83 6678089 : memcpy(result, a, size);
84 6678089 : return result;
85 : }
86 :
87 : /*
88 : * bms_equal - are two bitmapsets equal?
89 : *
90 : * This is logical not physical equality; in particular, a NULL pointer will
91 : * be reported as equal to a palloc'd value containing no members.
92 : */
93 : bool
94 4242628 : bms_equal(const Bitmapset *a, const Bitmapset *b)
95 : {
96 : const Bitmapset *shorter;
97 : const Bitmapset *longer;
98 : int shortlen;
99 : int longlen;
100 : int i;
101 :
102 : /* Handle cases where either input is NULL */
103 4242628 : if (a == NULL)
104 : {
105 2413524 : if (b == NULL)
106 2380390 : return true;
107 GNC 33134 : return false;
108 : }
109 CBC 1829104 : else if (b == NULL)
110 GNC 8709 : return false;
111 : /* Identify shorter and longer input */
112 CBC 1820395 : if (a->nwords <= b->nwords)
113 : {
114 1820395 : shorter = a;
115 1820395 : longer = b;
116 : }
117 : else
118 : {
119 UBC 0 : shorter = b;
120 0 : longer = a;
121 : }
122 : /* And process */
123 CBC 1820395 : shortlen = shorter->nwords;
124 2577213 : for (i = 0; i < shortlen; i++)
125 : {
126 1820395 : if (shorter->words[i] != longer->words[i])
127 1063577 : return false;
128 : }
129 756818 : longlen = longer->nwords;
130 756818 : for (; i < longlen; i++)
131 : {
132 UBC 0 : if (longer->words[i] != 0)
133 0 : return false;
134 : }
135 CBC 756818 : return true;
136 : }
137 :
138 : /*
139 : * bms_compare - qsort-style comparator for bitmapsets
140 : *
141 : * This guarantees to report values as equal iff bms_equal would say they are
142 : * equal. Otherwise, the highest-numbered bit that is set in one value but
143 : * not the other determines the result. (This rule means that, for example,
144 : * {6} is greater than {5}, which seems plausible.)
145 : */
146 : int
147 9710 : bms_compare(const Bitmapset *a, const Bitmapset *b)
148 : {
149 : int shortlen;
150 : int i;
151 :
152 : /* Handle cases where either input is NULL */
153 9710 : if (a == NULL)
154 UNC 0 : return (b == NULL) ? 0 : -1;
155 CBC 9710 : else if (b == NULL)
156 UNC 0 : return +1;
157 : /* Handle cases where one input is longer than the other */
158 CBC 9710 : shortlen = Min(a->nwords, b->nwords);
159 9710 : for (i = shortlen; i < a->nwords; i++)
160 : {
161 UBC 0 : if (a->words[i] != 0)
162 0 : return +1;
163 : }
164 CBC 9710 : for (i = shortlen; i < b->nwords; i++)
165 : {
166 UBC 0 : if (b->words[i] != 0)
167 0 : return -1;
168 : }
169 : /* Process words in common */
170 CBC 9710 : i = shortlen;
171 9710 : while (--i >= 0)
172 : {
173 9710 : bitmapword aw = a->words[i];
174 9710 : bitmapword bw = b->words[i];
175 :
176 9710 : if (aw != bw)
177 9710 : return (aw > bw) ? +1 : -1;
178 : }
179 UBC 0 : return 0;
180 : }
181 :
182 : /*
183 : * bms_make_singleton - build a bitmapset containing a single member
184 : */
185 : Bitmapset *
186 CBC 5503461 : bms_make_singleton(int x)
187 : {
188 : Bitmapset *result;
189 : int wordnum,
190 : bitnum;
191 :
192 5503461 : if (x < 0)
193 UBC 0 : elog(ERROR, "negative bitmapset member not allowed");
194 CBC 5503461 : wordnum = WORDNUM(x);
195 5503461 : bitnum = BITNUM(x);
196 5503461 : result = (Bitmapset *) palloc0(BITMAPSET_SIZE(wordnum + 1));
197 GNC 5503461 : result->type = T_Bitmapset;
198 CBC 5503461 : result->nwords = wordnum + 1;
199 5503461 : result->words[wordnum] = ((bitmapword) 1 << bitnum);
200 5503461 : return result;
201 ECB : }
202 :
203 : /*
204 : * bms_free - free a bitmapset
205 : *
206 : * Same as pfree except for allowing NULL input
207 : */
208 : void
209 GIC 11634462 : bms_free(Bitmapset *a)
210 ECB : {
211 GIC 11634462 : if (a)
212 CBC 4036168 : pfree(a);
213 11634462 : }
214 ECB :
215 :
216 : /*
217 : * These operations all make a freshly palloc'd result,
218 : * leaving their inputs untouched
219 : */
220 :
221 :
222 : /*
223 : * bms_union - set union
224 : */
225 : Bitmapset *
226 GIC 3446165 : bms_union(const Bitmapset *a, const Bitmapset *b)
227 ECB : {
228 : Bitmapset *result;
229 : const Bitmapset *other;
230 : int otherlen;
231 : int i;
232 :
233 : /* Handle cases where either input is NULL */
234 GIC 3446165 : if (a == NULL)
235 CBC 2347279 : return bms_copy(b);
236 1098886 : if (b == NULL)
237 508694 : return bms_copy(a);
238 ECB : /* Identify shorter and longer input; copy the longer one */
239 GIC 590192 : if (a->nwords <= b->nwords)
240 ECB : {
241 GIC 590192 : result = bms_copy(b);
242 CBC 590192 : other = a;
243 ECB : }
244 : else
245 : {
246 UIC 0 : result = bms_copy(a);
247 UBC 0 : other = b;
248 EUB : }
249 : /* And union the shorter input into the result */
250 GIC 590192 : otherlen = other->nwords;
251 CBC 1180384 : for (i = 0; i < otherlen; i++)
252 590192 : result->words[i] |= other->words[i];
253 590192 : return result;
254 ECB : }
255 :
256 : /*
257 : * bms_intersect - set intersection
258 : */
259 : Bitmapset *
260 GIC 1450076 : bms_intersect(const Bitmapset *a, const Bitmapset *b)
261 ECB : {
262 : Bitmapset *result;
263 : const Bitmapset *other;
264 : int resultlen;
265 : int i;
266 :
267 : /* Handle cases where either input is NULL */
268 GIC 1450076 : if (a == NULL || b == NULL)
269 CBC 758932 : return NULL;
270 ECB : /* Identify shorter and longer input; copy the shorter one */
271 GIC 691144 : if (a->nwords <= b->nwords)
272 ECB : {
273 GIC 691144 : result = bms_copy(a);
274 CBC 691144 : other = b;
275 ECB : }
276 : else
277 : {
278 UIC 0 : result = bms_copy(b);
279 UBC 0 : other = a;
280 EUB : }
281 : /* And intersect the longer input with the result */
282 GIC 691144 : resultlen = result->nwords;
283 CBC 1382288 : for (i = 0; i < resultlen; i++)
284 691144 : result->words[i] &= other->words[i];
285 : /* If we computed an empty result, we must return NULL */
286 GNC 691144 : if (bms_is_empty_internal(result))
287 : {
288 6224 : pfree(result);
289 6224 : return NULL;
290 : }
291 CBC 684920 : return result;
292 : }
293 ECB :
294 : /*
295 : * bms_difference - set difference (ie, A without members of B)
296 : */
297 : Bitmapset *
298 CBC 1221212 : bms_difference(const Bitmapset *a, const Bitmapset *b)
299 : {
300 : Bitmapset *result;
301 : int shortlen;
302 : int i;
303 :
304 : /* Handle cases where either input is NULL */
305 1221212 : if (a == NULL)
306 GIC 643548 : return NULL;
307 577664 : if (b == NULL)
308 198693 : return bms_copy(a);
309 :
310 : /*
311 : * In Postgres' usage, an empty result is a very common case, so it's
312 : * worth optimizing for that by testing bms_nonempty_difference(). This
313 : * saves us a palloc/pfree cycle compared to checking after-the-fact.
314 : */
315 GNC 378971 : if (!bms_nonempty_difference(a, b))
316 307662 : return NULL;
317 :
318 : /* Copy the left input */
319 GIC 71309 : result = bms_copy(a);
320 : /* And remove b's bits from result */
321 CBC 71309 : shortlen = Min(a->nwords, b->nwords);
322 142618 : for (i = 0; i < shortlen; i++)
323 71309 : result->words[i] &= ~b->words[i];
324 : /* Need not check for empty result, since we handled that case above */
325 71309 : return result;
326 : }
327 :
328 : /*
329 : * bms_is_subset - is A a subset of B?
330 : */
331 : bool
332 8875317 : bms_is_subset(const Bitmapset *a, const Bitmapset *b)
333 ECB : {
334 : int shortlen;
335 : int longlen;
336 : int i;
337 :
338 : /* Handle cases where either input is NULL */
339 CBC 8875317 : if (a == NULL)
340 2015013 : return true; /* empty set is a subset of anything */
341 GIC 6860304 : if (b == NULL)
342 GNC 153793 : return false;
343 : /* Check common words */
344 GIC 6706511 : shortlen = Min(a->nwords, b->nwords);
345 10981901 : for (i = 0; i < shortlen; i++)
346 : {
347 6706511 : if ((a->words[i] & ~b->words[i]) != 0)
348 2431121 : return false;
349 ECB : }
350 : /* Check extra words */
351 GIC 4275390 : if (a->nwords > b->nwords)
352 : {
353 UIC 0 : longlen = a->nwords;
354 0 : for (; i < longlen; i++)
355 : {
356 LBC 0 : if (a->words[i] != 0)
357 0 : return false;
358 ECB : }
359 : }
360 GIC 4275390 : return true;
361 ECB : }
362 :
363 : /*
364 : * bms_subset_compare - compare A and B for equality/subset relationships
365 : *
366 : * This is more efficient than testing bms_is_subset in both directions.
367 : */
368 : BMS_Comparison
369 GIC 848836 : bms_subset_compare(const Bitmapset *a, const Bitmapset *b)
370 EUB : {
371 : BMS_Comparison result;
372 : int shortlen;
373 : int longlen;
374 : int i;
375 :
376 : /* Handle cases where either input is NULL */
377 CBC 848836 : if (a == NULL)
378 : {
379 GIC 716183 : if (b == NULL)
380 686566 : return BMS_EQUAL;
381 GNC 29617 : return BMS_SUBSET1;
382 : }
383 GIC 132653 : if (b == NULL)
384 GNC 62632 : return BMS_SUBSET2;
385 : /* Check common words */
386 CBC 70021 : result = BMS_EQUAL; /* status so far */
387 GIC 70021 : shortlen = Min(a->nwords, b->nwords);
388 124444 : for (i = 0; i < shortlen; i++)
389 : {
390 70021 : bitmapword aword = a->words[i];
391 70021 : bitmapword bword = b->words[i];
392 :
393 70021 : if ((aword & ~bword) != 0)
394 ECB : {
395 : /* a is not a subset of b */
396 CBC 16737 : if (result == BMS_SUBSET1)
397 LBC 0 : return BMS_DIFFERENT;
398 CBC 16737 : result = BMS_SUBSET2;
399 : }
400 70021 : if ((bword & ~aword) != 0)
401 ECB : {
402 : /* b is not a subset of a */
403 CBC 16773 : if (result == BMS_SUBSET2)
404 15598 : return BMS_DIFFERENT;
405 1175 : result = BMS_SUBSET1;
406 : }
407 ECB : }
408 : /* Check extra words */
409 GIC 54423 : if (a->nwords > b->nwords)
410 ECB : {
411 UIC 0 : longlen = a->nwords;
412 0 : for (; i < longlen; i++)
413 ECB : {
414 UBC 0 : if (a->words[i] != 0)
415 ECB : {
416 : /* a is not a subset of b */
417 LBC 0 : if (result == BMS_SUBSET1)
418 UIC 0 : return BMS_DIFFERENT;
419 0 : result = BMS_SUBSET2;
420 ECB : }
421 : }
422 : }
423 GIC 54423 : else if (a->nwords < b->nwords)
424 : {
425 UIC 0 : longlen = b->nwords;
426 LBC 0 : for (; i < longlen; i++)
427 : {
428 UBC 0 : if (b->words[i] != 0)
429 EUB : {
430 : /* b is not a subset of a */
431 UBC 0 : if (result == BMS_SUBSET2)
432 UIC 0 : return BMS_DIFFERENT;
433 0 : result = BMS_SUBSET1;
434 EUB : }
435 : }
436 : }
437 GIC 54423 : return result;
438 : }
439 :
440 ECB : /*
441 : * bms_is_member - is X a member of A?
442 EUB : */
443 : bool
444 GIC 6831270 : bms_is_member(int x, const Bitmapset *a)
445 EUB : {
446 : int wordnum,
447 : bitnum;
448 :
449 : /* XXX better to just return false for x<0 ? */
450 GBC 6831270 : if (x < 0)
451 UIC 0 : elog(ERROR, "negative bitmapset member not allowed");
452 GIC 6831270 : if (a == NULL)
453 4226728 : return false;
454 CBC 2604542 : wordnum = WORDNUM(x);
455 GIC 2604542 : bitnum = BITNUM(x);
456 2604542 : if (wordnum >= a->nwords)
457 113 : return false;
458 2604429 : if ((a->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0)
459 1685048 : return true;
460 919381 : return false;
461 ECB : }
462 :
463 : /*
464 : * bms_member_index
465 : * determine 0-based index of member x in the bitmap
466 : *
467 : * Returns (-1) when x is not a member.
468 EUB : */
469 ECB : int
470 CBC 2055 : bms_member_index(Bitmapset *a, int x)
471 ECB : {
472 : int i;
473 : int bitnum;
474 : int wordnum;
475 CBC 2055 : int result = 0;
476 ECB : bitmapword mask;
477 :
478 : /* return -1 if not a member of the bitmap */
479 GIC 2055 : if (!bms_is_member(x, a))
480 UIC 0 : return -1;
481 :
482 GIC 2055 : wordnum = WORDNUM(x);
483 2055 : bitnum = BITNUM(x);
484 :
485 : /* count bits in preceding words */
486 2055 : for (i = 0; i < wordnum; i++)
487 ECB : {
488 UIC 0 : bitmapword w = a->words[i];
489 :
490 : /* No need to count the bits in a zero word */
491 0 : if (w != 0)
492 LBC 0 : result += bmw_popcount(w);
493 : }
494 :
495 : /*
496 ECB : * Now add bits of the last word, but only those before the item. We can
497 EUB : * do that by applying a mask and then using popcount again. To get
498 : * 0-based index, we want to count only preceding bits, not the item
499 ECB : * itself, so we subtract 1.
500 : */
501 GIC 2055 : mask = ((bitmapword) 1 << bitnum) - 1;
502 2055 : result += bmw_popcount(a->words[wordnum] & mask);
503 ECB :
504 GIC 2055 : return result;
505 EUB : }
506 :
507 : /*
508 : * bms_overlap - do sets overlap (ie, have a nonempty intersection)?
509 : */
510 : bool
511 GIC 9122799 : bms_overlap(const Bitmapset *a, const Bitmapset *b)
512 : {
513 : int shortlen;
514 : int i;
515 :
516 : /* Handle cases where either input is NULL */
517 9122799 : if (a == NULL || b == NULL)
518 CBC 5529143 : return false;
519 ECB : /* Check words in common */
520 GIC 3593656 : shortlen = Min(a->nwords, b->nwords);
521 CBC 5080052 : for (i = 0; i < shortlen; i++)
522 : {
523 GIC 3593656 : if ((a->words[i] & b->words[i]) != 0)
524 2107260 : return true;
525 : }
526 1486396 : return false;
527 : }
528 ECB :
529 : /*
530 : * bms_overlap_list - does a set overlap an integer list?
531 : */
532 : bool
533 GIC 653 : bms_overlap_list(const Bitmapset *a, const List *b)
534 ECB : {
535 : ListCell *lc;
536 : int wordnum,
537 : bitnum;
538 :
539 GIC 653 : if (a == NULL || b == NIL)
540 CBC 584 : return false;
541 ECB :
542 GIC 129 : foreach(lc, b)
543 ECB : {
544 GIC 99 : int x = lfirst_int(lc);
545 :
546 99 : if (x < 0)
547 UIC 0 : elog(ERROR, "negative bitmapset member not allowed");
548 GIC 99 : wordnum = WORDNUM(x);
549 99 : bitnum = BITNUM(x);
550 CBC 99 : if (wordnum < a->nwords)
551 GIC 99 : if ((a->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0)
552 39 : return true;
553 : }
554 :
555 30 : return false;
556 ECB : }
557 :
558 : /*
559 : * bms_nonempty_difference - do sets have a nonempty difference?
560 : *
561 : * i.e., are any members set in 'a' that are not also set in 'b'.
562 : */
563 : bool
564 GBC 1147177 : bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b)
565 ECB : {
566 : int shortlen;
567 : int i;
568 :
569 : /* Handle cases where either input is NULL */
570 GIC 1147177 : if (a == NULL)
571 36 : return false;
572 CBC 1147141 : if (b == NULL)
573 UNC 0 : return true;
574 : /* Check words in common */
575 GIC 1147141 : shortlen = Min(a->nwords, b->nwords);
576 1793994 : for (i = 0; i < shortlen; i++)
577 : {
578 1147141 : if ((a->words[i] & ~b->words[i]) != 0)
579 500288 : return true;
580 : }
581 ECB : /* Check extra words in a */
582 GIC 646853 : for (; i < a->nwords; i++)
583 : {
584 UIC 0 : if (a->words[i] != 0)
585 0 : return true;
586 : }
587 CBC 646853 : return false;
588 ECB : }
589 :
590 EUB : /*
591 : * bms_singleton_member - return the sole integer member of set
592 ECB : *
593 : * Raises error if |a| is not 1.
594 : */
595 : int
596 CBC 176398 : bms_singleton_member(const Bitmapset *a)
597 : {
598 GIC 176398 : int result = -1;
599 ECB : int nwords;
600 : int wordnum;
601 EUB :
602 GBC 176398 : if (a == NULL)
603 UIC 0 : elog(ERROR, "bitmapset is empty");
604 CBC 176398 : nwords = a->nwords;
605 GIC 352796 : for (wordnum = 0; wordnum < nwords; wordnum++)
606 : {
607 176398 : bitmapword w = a->words[wordnum];
608 :
609 176398 : if (w != 0)
610 : {
611 176398 : if (result >= 0 || HAS_MULTIPLE_ONES(w))
612 UIC 0 : elog(ERROR, "bitmapset has multiple members");
613 CBC 176398 : result = wordnum * BITS_PER_BITMAPWORD;
614 GIC 176398 : result += bmw_rightmost_one_pos(w);
615 ECB : }
616 : }
617 GIC 176398 : if (result < 0)
618 UIC 0 : elog(ERROR, "bitmapset is empty");
619 CBC 176398 : return result;
620 EUB : }
621 ECB :
622 : /*
623 : * bms_get_singleton_member
624 : *
625 : * Test whether the given set is a singleton.
626 : * If so, set *member to the value of its sole member, and return true.
627 : * If not, return false, without changing *member.
628 : *
629 EUB : * This is more convenient and faster than calling bms_membership() and then
630 ECB : * bms_singleton_member(), if we don't care about distinguishing empty sets
631 : * from multiple-member sets.
632 : */
633 : bool
634 CBC 481321 : bms_get_singleton_member(const Bitmapset *a, int *member)
635 EUB : {
636 CBC 481321 : int result = -1;
637 : int nwords;
638 : int wordnum;
639 :
640 GIC 481321 : if (a == NULL)
641 12 : return false;
642 481309 : nwords = a->nwords;
643 850023 : for (wordnum = 0; wordnum < nwords; wordnum++)
644 : {
645 481309 : bitmapword w = a->words[wordnum];
646 :
647 481309 : if (w != 0)
648 : {
649 481309 : if (result >= 0 || HAS_MULTIPLE_ONES(w))
650 112595 : return false;
651 CBC 368714 : result = wordnum * BITS_PER_BITMAPWORD;
652 GIC 368714 : result += bmw_rightmost_one_pos(w);
653 ECB : }
654 : }
655 GIC 368714 : if (result < 0)
656 UIC 0 : return false;
657 CBC 368714 : *member = result;
658 368714 : return true;
659 ECB : }
660 :
661 : /*
662 : * bms_num_members - count members of set
663 : */
664 : int
665 GIC 768996 : bms_num_members(const Bitmapset *a)
666 ECB : {
667 CBC 768996 : int result = 0;
668 ECB : int nwords;
669 : int wordnum;
670 :
671 GIC 768996 : if (a == NULL)
672 CBC 66885 : return 0;
673 GBC 702111 : nwords = a->nwords;
674 CBC 1404222 : for (wordnum = 0; wordnum < nwords; wordnum++)
675 ECB : {
676 GIC 702111 : bitmapword w = a->words[wordnum];
677 :
678 : /* No need to count the bits in a zero word */
679 702111 : if (w != 0)
680 702111 : result += bmw_popcount(w);
681 : }
682 CBC 702111 : return result;
683 : }
684 ECB :
685 : /*
686 : * bms_membership - does a set have zero, one, or multiple members?
687 : *
688 : * This is faster than making an exact count with bms_num_members().
689 : */
690 : BMS_Membership
691 CBC 862657 : bms_membership(const Bitmapset *a)
692 : {
693 862657 : BMS_Membership result = BMS_EMPTY_SET;
694 : int nwords;
695 : int wordnum;
696 ECB :
697 CBC 862657 : if (a == NULL)
698 GIC 158613 : return BMS_EMPTY_SET;
699 CBC 704044 : nwords = a->nwords;
700 GIC 1290873 : for (wordnum = 0; wordnum < nwords; wordnum++)
701 : {
702 704044 : bitmapword w = a->words[wordnum];
703 :
704 704044 : if (w != 0)
705 : {
706 704044 : if (result != BMS_EMPTY_SET || HAS_MULTIPLE_ONES(w))
707 117215 : return BMS_MULTIPLE;
708 CBC 586829 : result = BMS_SINGLETON;
709 : }
710 ECB : }
711 GIC 586829 : return result;
712 : }
713 :
714 ECB : /*
715 : * bms_is_empty_internal - is a set empty?
716 : *
717 : * This is now used only locally, to detect cases where a function has
718 : * computed an empty set that we must now get rid of. Hence, we can
719 : * assume the input isn't NULL.
720 : */
721 : static bool
722 GNC 1658147 : bms_is_empty_internal(const Bitmapset *a)
723 ECB : {
724 : int nwords;
725 : int wordnum;
726 :
727 GIC 1658147 : nwords = a->nwords;
728 CBC 2173489 : for (wordnum = 0; wordnum < nwords; wordnum++)
729 : {
730 GIC 1658147 : bitmapword w = a->words[wordnum];
731 :
732 1658147 : if (w != 0)
733 1142805 : return false;
734 : }
735 515342 : return true;
736 : }
737 :
738 :
739 ECB : /*
740 : * These operations all "recycle" their non-const inputs, ie, either
741 : * return the modified input or pfree it if it can't hold the result.
742 : *
743 : * These should generally be used in the style
744 : *
745 : * foo = bms_add_member(foo, x);
746 : */
747 :
748 :
749 : /*
750 : * bms_add_member - add a specified member to set
751 : *
752 : * Input set is modified or recycled!
753 : */
754 : Bitmapset *
755 GIC 7842801 : bms_add_member(Bitmapset *a, int x)
756 : {
757 : int wordnum,
758 : bitnum;
759 :
760 7842801 : if (x < 0)
761 UIC 0 : elog(ERROR, "negative bitmapset member not allowed");
762 GIC 7842801 : if (a == NULL)
763 4593519 : return bms_make_singleton(x);
764 3249282 : wordnum = WORDNUM(x);
765 3249282 : bitnum = BITNUM(x);
766 :
767 : /* enlarge the set if necessary */
768 3249282 : if (wordnum >= a->nwords)
769 : {
770 90 : int oldnwords = a->nwords;
771 : int i;
772 ECB :
773 GIC 90 : a = (Bitmapset *) repalloc(a, BITMAPSET_SIZE(wordnum + 1));
774 90 : a->nwords = wordnum + 1;
775 : /* zero out the enlarged portion */
776 324 : for (i = oldnwords; i < a->nwords; i++)
777 CBC 234 : a->words[i] = 0;
778 EUB : }
779 ECB :
780 CBC 3249282 : a->words[wordnum] |= ((bitmapword) 1 << bitnum);
781 3249282 : return a;
782 ECB : }
783 :
784 : /*
785 : * bms_del_member - remove a specified member from set
786 : *
787 : * No error if x is not currently a member of set
788 : *
789 : * Input set is modified in-place!
790 : */
791 : Bitmapset *
792 GIC 900130 : bms_del_member(Bitmapset *a, int x)
793 ECB : {
794 : int wordnum,
795 : bitnum;
796 :
797 CBC 900130 : if (x < 0)
798 LBC 0 : elog(ERROR, "negative bitmapset member not allowed");
799 GIC 900130 : if (a == NULL)
800 503157 : return NULL;
801 396973 : wordnum = WORDNUM(x);
802 396973 : bitnum = BITNUM(x);
803 396973 : if (wordnum < a->nwords)
804 396973 : a->words[wordnum] &= ~((bitmapword) 1 << bitnum);
805 : /* If we computed an empty result, we must return NULL */
806 GNC 396973 : if (bms_is_empty_internal(a))
807 : {
808 178350 : pfree(a);
809 178350 : return NULL;
810 : }
811 GIC 218623 : return a;
812 : }
813 :
814 : /*
815 ECB : * bms_add_members - like bms_union, but left input is recycled
816 : */
817 : Bitmapset *
818 GIC 5999328 : bms_add_members(Bitmapset *a, const Bitmapset *b)
819 : {
820 ECB : Bitmapset *result;
821 EUB : const Bitmapset *other;
822 ECB : int otherlen;
823 : int i;
824 :
825 : /* Handle cases where either input is NULL */
826 CBC 5999328 : if (a == NULL)
827 3029605 : return bms_copy(b);
828 GIC 2969723 : if (b == NULL)
829 CBC 1879942 : return a;
830 : /* Identify shorter and longer input; copy the longer one if needed */
831 1089781 : if (a->nwords < b->nwords)
832 ECB : {
833 UIC 0 : result = bms_copy(b);
834 LBC 0 : other = a;
835 : }
836 : else
837 : {
838 GIC 1089781 : result = a;
839 1089781 : other = b;
840 : }
841 ECB : /* And union the shorter input into the result */
842 GIC 1089781 : otherlen = other->nwords;
843 2179562 : for (i = 0; i < otherlen; i++)
844 1089781 : result->words[i] |= other->words[i];
845 1089781 : if (result != a)
846 UIC 0 : pfree(a);
847 GIC 1089781 : return result;
848 : }
849 ECB :
850 : /*
851 : * bms_add_range
852 : * Add members in the range of 'lower' to 'upper' to the set.
853 : *
854 : * Note this could also be done by calling bms_add_member in a loop, however,
855 : * using this function will be faster when the range is large as we work at
856 EUB : * the bitmapword level rather than at bit level.
857 : */
858 : Bitmapset *
859 GIC 12683 : bms_add_range(Bitmapset *a, int lower, int upper)
860 : {
861 ECB : int lwordnum,
862 : lbitnum,
863 : uwordnum,
864 : ushiftbits,
865 : wordnum;
866 :
867 : /* do nothing if nothing is called for, without further checking */
868 CBC 12683 : if (upper < lower)
869 GBC 12 : return a;
870 ECB :
871 GIC 12671 : if (lower < 0)
872 UIC 0 : elog(ERROR, "negative bitmapset member not allowed");
873 GIC 12671 : uwordnum = WORDNUM(upper);
874 :
875 12671 : if (a == NULL)
876 : {
877 12671 : a = (Bitmapset *) palloc0(BITMAPSET_SIZE(uwordnum + 1));
878 GNC 12671 : a->type = T_Bitmapset;
879 GIC 12671 : a->nwords = uwordnum + 1;
880 : }
881 UIC 0 : else if (uwordnum >= a->nwords)
882 : {
883 LBC 0 : int oldnwords = a->nwords;
884 : int i;
885 :
886 : /* ensure we have enough words to store the upper bit */
887 UIC 0 : a = (Bitmapset *) repalloc(a, BITMAPSET_SIZE(uwordnum + 1));
888 0 : a->nwords = uwordnum + 1;
889 : /* zero out the enlarged portion */
890 0 : for (i = oldnwords; i < a->nwords; i++)
891 0 : a->words[i] = 0;
892 ECB : }
893 :
894 GIC 12671 : wordnum = lwordnum = WORDNUM(lower);
895 ECB :
896 GBC 12671 : lbitnum = BITNUM(lower);
897 CBC 12671 : ushiftbits = BITS_PER_BITMAPWORD - (BITNUM(upper) + 1);
898 :
899 ECB : /*
900 : * Special case when lwordnum is the same as uwordnum we must perform the
901 : * upper and lower masking on the word.
902 : */
903 CBC 12671 : if (lwordnum == uwordnum)
904 : {
905 GBC 12671 : a->words[lwordnum] |= ~(bitmapword) (((bitmapword) 1 << lbitnum) - 1)
906 GIC 12671 : & (~(bitmapword) 0) >> ushiftbits;
907 EUB : }
908 : else
909 : {
910 : /* turn on lbitnum and all bits left of it */
911 UBC 0 : a->words[wordnum++] |= ~(bitmapword) (((bitmapword) 1 << lbitnum) - 1);
912 EUB :
913 : /* turn on all bits for any intermediate words */
914 UBC 0 : while (wordnum < uwordnum)
915 0 : a->words[wordnum++] = ~(bitmapword) 0;
916 :
917 : /* turn on upper's bit and all bits right of it. */
918 LBC 0 : a->words[uwordnum] |= (~(bitmapword) 0) >> ushiftbits;
919 : }
920 ECB :
921 CBC 12671 : return a;
922 : }
923 :
924 : /*
925 : * bms_int_members - like bms_intersect, but left input is recycled
926 : */
927 ECB : Bitmapset *
928 GIC 230590 : bms_int_members(Bitmapset *a, const Bitmapset *b)
929 ECB : {
930 : int shortlen;
931 : int i;
932 :
933 : /* Handle cases where either input is NULL */
934 GIC 230590 : if (a == NULL)
935 GBC 10022 : return NULL;
936 GIC 220568 : if (b == NULL)
937 : {
938 GBC 1590 : pfree(a);
939 1590 : return NULL;
940 : }
941 : /* Intersect b into a; we need never copy */
942 218978 : shortlen = Min(a->nwords, b->nwords);
943 GIC 437956 : for (i = 0; i < shortlen; i++)
944 218978 : a->words[i] &= b->words[i];
945 CBC 218978 : for (; i < a->nwords; i++)
946 UIC 0 : a->words[i] = 0;
947 : /* If we computed an empty result, we must return NULL */
948 GNC 218978 : if (bms_is_empty_internal(a))
949 : {
950 42801 : pfree(a);
951 42801 : return NULL;
952 : }
953 GIC 176177 : return a;
954 : }
955 :
956 : /*
957 : * bms_del_members - like bms_difference, but left input is recycled
958 ECB : */
959 : Bitmapset *
960 GIC 762242 : bms_del_members(Bitmapset *a, const Bitmapset *b)
961 : {
962 : int shortlen;
963 : int i;
964 ECB :
965 : /* Handle cases where either input is NULL */
966 CBC 762242 : if (a == NULL)
967 GIC 329588 : return NULL;
968 CBC 432654 : if (b == NULL)
969 81602 : return a;
970 : /* Remove b's bits from a; we need never copy */
971 GIC 351052 : shortlen = Min(a->nwords, b->nwords);
972 CBC 702104 : for (i = 0; i < shortlen; i++)
973 351052 : a->words[i] &= ~b->words[i];
974 : /* If we computed an empty result, we must return NULL */
975 GNC 351052 : if (bms_is_empty_internal(a))
976 : {
977 287967 : pfree(a);
978 287967 : return NULL;
979 : }
980 CBC 63085 : return a;
981 ECB : }
982 EUB :
983 : /*
984 ECB : * bms_join - like bms_union, but *both* inputs are recycled
985 : */
986 : Bitmapset *
987 CBC 734035 : bms_join(Bitmapset *a, Bitmapset *b)
988 : {
989 ECB : Bitmapset *result;
990 : Bitmapset *other;
991 : int otherlen;
992 : int i;
993 :
994 : /* Handle cases where either input is NULL */
995 GIC 734035 : if (a == NULL)
996 CBC 417262 : return b;
997 GIC 316773 : if (b == NULL)
998 62999 : return a;
999 : /* Identify shorter and longer input; use longer one as result */
1000 253774 : if (a->nwords < b->nwords)
1001 : {
1002 LBC 0 : result = b;
1003 0 : other = a;
1004 ECB : }
1005 : else
1006 : {
1007 CBC 253774 : result = a;
1008 253774 : other = b;
1009 ECB : }
1010 : /* And union the shorter input into the result */
1011 CBC 253774 : otherlen = other->nwords;
1012 GIC 507548 : for (i = 0; i < otherlen; i++)
1013 CBC 253774 : result->words[i] |= other->words[i];
1014 253774 : if (other != result) /* pure paranoia */
1015 GIC 253774 : pfree(other);
1016 CBC 253774 : return result;
1017 : }
1018 :
1019 : /*
1020 : * bms_next_member - find next member of a set
1021 : *
1022 : * Returns smallest member greater than "prevbit", or -2 if there is none.
1023 : * "prevbit" must NOT be less than -1, or the behavior is unpredictable.
1024 : *
1025 : * This is intended as support for iterating through the members of a set.
1026 : * The typical pattern is
1027 : *
1028 : * x = -1;
1029 : * while ((x = bms_next_member(inputset, x)) >= 0)
1030 : * process member x;
1031 : *
1032 : * Notice that when there are no more members, we return -2, not -1 as you
1033 ECB : * might expect. The rationale for that is to allow distinguishing the
1034 : * loop-not-started state (x == -1) from the loop-completed state (x == -2).
1035 : * It makes no difference in simple loop usage, but complex iteration logic
1036 : * might need such an ability.
1037 : */
1038 : int
1039 CBC 11462578 : bms_next_member(const Bitmapset *a, int prevbit)
1040 ECB : {
1041 : int nwords;
1042 : int wordnum;
1043 : bitmapword mask;
1044 :
1045 GIC 11462578 : if (a == NULL)
1046 CBC 3503794 : return -2;
1047 GIC 7958784 : nwords = a->nwords;
1048 7958784 : prevbit++;
1049 CBC 7958784 : mask = (~(bitmapword) 0) << BITNUM(prevbit);
1050 GIC 10441612 : for (wordnum = WORDNUM(prevbit); wordnum < nwords; wordnum++)
1051 ECB : {
1052 GIC 7958784 : bitmapword w = a->words[wordnum];
1053 :
1054 : /* ignore bits before prevbit */
1055 CBC 7958784 : w &= mask;
1056 ECB :
1057 CBC 7958784 : if (w != 0)
1058 : {
1059 : int result;
1060 :
1061 5475956 : result = wordnum * BITS_PER_BITMAPWORD;
1062 GIC 5475956 : result += bmw_rightmost_one_pos(w);
1063 CBC 5475956 : return result;
1064 : }
1065 :
1066 : /* in subsequent words, consider all bits */
1067 GIC 2482828 : mask = (~(bitmapword) 0);
1068 : }
1069 2482828 : return -2;
1070 : }
1071 :
1072 : /*
1073 : * bms_prev_member - find prev member of a set
1074 : *
1075 : * Returns largest member less than "prevbit", or -2 if there is none.
1076 : * "prevbit" must NOT be more than one above the highest possible bit that can
1077 : * be set at the Bitmapset at its current size.
1078 : *
1079 : * To ease finding the highest set bit for the initial loop, the special
1080 : * prevbit value of -1 can be passed to have the function find the highest
1081 : * valued member in the set.
1082 : *
1083 : * This is intended as support for iterating through the members of a set in
1084 : * reverse. The typical pattern is
1085 : *
1086 : * x = -1;
1087 : * while ((x = bms_prev_member(inputset, x)) >= 0)
1088 : * process member x;
1089 : *
1090 : * Notice that when there are no more members, we return -2, not -1 as you
1091 : * might expect. The rationale for that is to allow distinguishing the
1092 ECB : * loop-not-started state (x == -1) from the loop-completed state (x == -2).
1093 : * It makes no difference in simple loop usage, but complex iteration logic
1094 : * might need such an ability.
1095 : */
1096 :
1097 : int
1098 GIC 9 : bms_prev_member(const Bitmapset *a, int prevbit)
1099 : {
1100 : int wordnum;
1101 : int ushiftbits;
1102 ECB : bitmapword mask;
1103 EUB :
1104 : /*
1105 : * If set is NULL or if there are no more bits to the right then we've
1106 ECB : * nothing to do.
1107 EUB : */
1108 GIC 9 : if (a == NULL || prevbit == 0)
1109 LBC 0 : return -2;
1110 :
1111 ECB : /* transform -1 to the highest possible bit we could have set */
1112 CBC 9 : if (prevbit == -1)
1113 LBC 0 : prevbit = a->nwords * BITS_PER_BITMAPWORD - 1;
1114 : else
1115 CBC 9 : prevbit--;
1116 :
1117 GIC 9 : ushiftbits = BITS_PER_BITMAPWORD - (BITNUM(prevbit) + 1);
1118 CBC 9 : mask = (~(bitmapword) 0) >> ushiftbits;
1119 GIC 12 : for (wordnum = WORDNUM(prevbit); wordnum >= 0; wordnum--)
1120 ECB : {
1121 GIC 9 : bitmapword w = a->words[wordnum];
1122 :
1123 : /* mask out bits left of prevbit */
1124 CBC 9 : w &= mask;
1125 ECB :
1126 CBC 9 : if (w != 0)
1127 : {
1128 : int result;
1129 :
1130 6 : result = wordnum * BITS_PER_BITMAPWORD;
1131 GIC 6 : result += bmw_leftmost_one_pos(w);
1132 CBC 6 : return result;
1133 : }
1134 :
1135 : /* in subsequent words, consider all bits */
1136 GIC 3 : mask = (~(bitmapword) 0);
1137 : }
1138 3 : return -2;
1139 : }
1140 :
1141 : /*
1142 : * bms_hash_value - compute a hash key for a Bitmapset
1143 : *
1144 ECB : * Note: we must ensure that any two bitmapsets that are bms_equal() will
1145 : * hash to the same value; in practice this means that trailing all-zero
1146 : * words must not affect the result. Hence we strip those before applying
1147 : * hash_any().
1148 : */
1149 EUB : uint32
1150 CBC 2649 : bms_hash_value(const Bitmapset *a)
1151 : {
1152 ECB : int lastword;
1153 :
1154 GIC 2649 : if (a == NULL)
1155 LBC 0 : return 0; /* All empty sets hash to 0 */
1156 GBC 2649 : for (lastword = a->nwords; --lastword >= 0;)
1157 ECB : {
1158 CBC 2649 : if (a->words[lastword] != 0)
1159 GIC 2649 : break;
1160 : }
1161 2649 : if (lastword < 0)
1162 UIC 0 : return 0; /* All empty sets hash to 0 */
1163 GIC 2649 : return DatumGetUInt32(hash_any((const unsigned char *) a->words,
1164 2649 : (lastword + 1) * sizeof(bitmapword)));
1165 : }
1166 :
1167 ECB : /*
1168 : * bitmap_hash - hash function for keys that are (pointers to) Bitmapsets
1169 : *
1170 : * Note: don't forget to specify bitmap_match as the match function!
1171 : */
1172 : uint32
1173 GIC 2649 : bitmap_hash(const void *key, Size keysize)
1174 : {
1175 2649 : Assert(keysize == sizeof(Bitmapset *));
1176 2649 : return bms_hash_value(*((const Bitmapset *const *) key));
1177 ECB : }
1178 :
1179 : /*
1180 : * bitmap_match - match function to use with bitmap_hash
1181 : */
1182 : int
1183 GIC 1602 : bitmap_match(const void *key1, const void *key2, Size keysize)
1184 : {
1185 1602 : Assert(keysize == sizeof(Bitmapset *));
1186 1602 : return !bms_equal(*((const Bitmapset *const *) key1),
1187 : *((const Bitmapset *const *) key2));
1188 : }
|