Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * tidbitmap.c
4 : : * PostgreSQL tuple-id (TID) bitmap package
5 : : *
6 : : * This module provides bitmap data structures that are spiritually
7 : : * similar to Bitmapsets, but are specially adapted to store sets of
8 : : * tuple identifiers (TIDs), or ItemPointers. In particular, the division
9 : : * of an ItemPointer into BlockNumber and OffsetNumber is catered for.
10 : : * Also, since we wish to be able to store very large tuple sets in
11 : : * memory with this data structure, we support "lossy" storage, in which
12 : : * we no longer remember individual tuple offsets on a page but only the
13 : : * fact that a particular page needs to be visited.
14 : : *
15 : : * The "lossy" storage uses one bit per disk page, so at the standard 8K
16 : : * BLCKSZ, we can represent all pages in 64Gb of disk space in about 1Mb
17 : : * of memory. People pushing around tables of that size should have a
18 : : * couple of Mb to spare, so we don't worry about providing a second level
19 : : * of lossiness. In theory we could fall back to page ranges at some
20 : : * point, but for now that seems useless complexity.
21 : : *
22 : : * We also support the notion of candidate matches, or rechecking. This
23 : : * means we know that a search need visit only some tuples on a page,
24 : : * but we are not certain that all of those tuples are real matches.
25 : : * So the eventual heap scan must recheck the quals for these tuples only,
26 : : * rather than rechecking the quals for all tuples on the page as in the
27 : : * lossy-bitmap case. Rechecking can be specified when TIDs are inserted
28 : : * into a bitmap, and it can also happen internally when we AND a lossy
29 : : * and a non-lossy page.
30 : : *
31 : : *
32 : : * Copyright (c) 2003-2024, PostgreSQL Global Development Group
33 : : *
34 : : * IDENTIFICATION
35 : : * src/backend/nodes/tidbitmap.c
36 : : *
37 : : *-------------------------------------------------------------------------
38 : : */
39 : : #include "postgres.h"
40 : :
41 : : #include <limits.h>
42 : :
43 : : #include "access/htup_details.h"
44 : : #include "common/hashfn.h"
45 : : #include "common/int.h"
46 : : #include "nodes/bitmapset.h"
47 : : #include "nodes/tidbitmap.h"
48 : : #include "storage/lwlock.h"
49 : : #include "utils/dsa.h"
50 : :
51 : : /*
52 : : * The maximum number of tuples per page is not large (typically 256 with
53 : : * 8K pages, or 1024 with 32K pages). So there's not much point in making
54 : : * the per-page bitmaps variable size. We just legislate that the size
55 : : * is this:
56 : : */
57 : : #define MAX_TUPLES_PER_PAGE MaxHeapTuplesPerPage
58 : :
59 : : /*
60 : : * When we have to switch over to lossy storage, we use a data structure
61 : : * with one bit per page, where all pages having the same number DIV
62 : : * PAGES_PER_CHUNK are aggregated into one chunk. When a chunk is present
63 : : * and has the bit set for a given page, there must not be a per-page entry
64 : : * for that page in the page table.
65 : : *
66 : : * We actually store both exact pages and lossy chunks in the same hash
67 : : * table, using identical data structures. (This is because the memory
68 : : * management for hashtables doesn't easily/efficiently allow space to be
69 : : * transferred easily from one hashtable to another.) Therefore it's best
70 : : * if PAGES_PER_CHUNK is the same as MAX_TUPLES_PER_PAGE, or at least not
71 : : * too different. But we also want PAGES_PER_CHUNK to be a power of 2 to
72 : : * avoid expensive integer remainder operations. So, define it like this:
73 : : */
74 : : #define PAGES_PER_CHUNK (BLCKSZ / 32)
75 : :
76 : : /* We use BITS_PER_BITMAPWORD and typedef bitmapword from nodes/bitmapset.h */
77 : :
78 : : #define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD)
79 : : #define BITNUM(x) ((x) % BITS_PER_BITMAPWORD)
80 : :
81 : : /* number of active words for an exact page: */
82 : : #define WORDS_PER_PAGE ((MAX_TUPLES_PER_PAGE - 1) / BITS_PER_BITMAPWORD + 1)
83 : : /* number of active words for a lossy chunk: */
84 : : #define WORDS_PER_CHUNK ((PAGES_PER_CHUNK - 1) / BITS_PER_BITMAPWORD + 1)
85 : :
86 : : /*
87 : : * The hashtable entries are represented by this data structure. For
88 : : * an exact page, blockno is the page number and bit k of the bitmap
89 : : * represents tuple offset k+1. For a lossy chunk, blockno is the first
90 : : * page in the chunk (this must be a multiple of PAGES_PER_CHUNK) and
91 : : * bit k represents page blockno+k. Note that it is not possible to
92 : : * have exact storage for the first page of a chunk if we are using
93 : : * lossy storage for any page in the chunk's range, since the same
94 : : * hashtable entry has to serve both purposes.
95 : : *
96 : : * recheck is used only on exact pages --- it indicates that although
97 : : * only the stated tuples need be checked, the full index qual condition
98 : : * must be checked for each (ie, these are candidate matches).
99 : : */
100 : : typedef struct PagetableEntry
101 : : {
102 : : BlockNumber blockno; /* page number (hashtable key) */
103 : : char status; /* hash entry status */
104 : : bool ischunk; /* T = lossy storage, F = exact */
105 : : bool recheck; /* should the tuples be rechecked? */
106 : : bitmapword words[Max(WORDS_PER_PAGE, WORDS_PER_CHUNK)];
107 : : } PagetableEntry;
108 : :
109 : : /*
110 : : * Holds array of pagetable entries.
111 : : */
112 : : typedef struct PTEntryArray
113 : : {
114 : : pg_atomic_uint32 refcount; /* no. of iterator attached */
115 : : PagetableEntry ptentry[FLEXIBLE_ARRAY_MEMBER];
116 : : } PTEntryArray;
117 : :
118 : : /*
119 : : * We want to avoid the overhead of creating the hashtable, which is
120 : : * comparatively large, when not necessary. Particularly when we are using a
121 : : * bitmap scan on the inside of a nestloop join: a bitmap may well live only
122 : : * long enough to accumulate one entry in such cases. We therefore avoid
123 : : * creating an actual hashtable until we need two pagetable entries. When
124 : : * just one pagetable entry is needed, we store it in a fixed field of
125 : : * TIDBitMap. (NOTE: we don't get rid of the hashtable if the bitmap later
126 : : * shrinks down to zero or one page again. So, status can be TBM_HASH even
127 : : * when nentries is zero or one.)
128 : : */
129 : : typedef enum
130 : : {
131 : : TBM_EMPTY, /* no hashtable, nentries == 0 */
132 : : TBM_ONE_PAGE, /* entry1 contains the single entry */
133 : : TBM_HASH, /* pagetable is valid, entry1 is not */
134 : : } TBMStatus;
135 : :
136 : : /*
137 : : * Current iterating state of the TBM.
138 : : */
139 : : typedef enum
140 : : {
141 : : TBM_NOT_ITERATING, /* not yet converted to page and chunk array */
142 : : TBM_ITERATING_PRIVATE, /* converted to local page and chunk array */
143 : : TBM_ITERATING_SHARED, /* converted to shared page and chunk array */
144 : : } TBMIteratingState;
145 : :
146 : : /*
147 : : * Here is the representation for a whole TIDBitMap:
148 : : */
149 : : struct TIDBitmap
150 : : {
151 : : NodeTag type; /* to make it a valid Node */
152 : : MemoryContext mcxt; /* memory context containing me */
153 : : TBMStatus status; /* see codes above */
154 : : struct pagetable_hash *pagetable; /* hash table of PagetableEntry's */
155 : : int nentries; /* number of entries in pagetable */
156 : : int maxentries; /* limit on same to meet maxbytes */
157 : : int npages; /* number of exact entries in pagetable */
158 : : int nchunks; /* number of lossy entries in pagetable */
159 : : TBMIteratingState iterating; /* tbm_begin_iterate called? */
160 : : uint32 lossify_start; /* offset to start lossifying hashtable at */
161 : : PagetableEntry entry1; /* used when status == TBM_ONE_PAGE */
162 : : /* these are valid when iterating is true: */
163 : : PagetableEntry **spages; /* sorted exact-page list, or NULL */
164 : : PagetableEntry **schunks; /* sorted lossy-chunk list, or NULL */
165 : : dsa_pointer dsapagetable; /* dsa_pointer to the element array */
166 : : dsa_pointer dsapagetableold; /* dsa_pointer to the old element array */
167 : : dsa_pointer ptpages; /* dsa_pointer to the page array */
168 : : dsa_pointer ptchunks; /* dsa_pointer to the chunk array */
169 : : dsa_area *dsa; /* reference to per-query dsa area */
170 : : };
171 : :
172 : : /*
173 : : * When iterating over a bitmap in sorted order, a TBMIterator is used to
174 : : * track our progress. There can be several iterators scanning the same
175 : : * bitmap concurrently. Note that the bitmap becomes read-only as soon as
176 : : * any iterator is created.
177 : : */
178 : : struct TBMIterator
179 : : {
180 : : TIDBitmap *tbm; /* TIDBitmap we're iterating over */
181 : : int spageptr; /* next spages index */
182 : : int schunkptr; /* next schunks index */
183 : : int schunkbit; /* next bit to check in current schunk */
184 : : TBMIterateResult output; /* MUST BE LAST (because variable-size) */
185 : : };
186 : :
187 : : /*
188 : : * Holds the shared members of the iterator so that multiple processes
189 : : * can jointly iterate.
190 : : */
191 : : typedef struct TBMSharedIteratorState
192 : : {
193 : : int nentries; /* number of entries in pagetable */
194 : : int maxentries; /* limit on same to meet maxbytes */
195 : : int npages; /* number of exact entries in pagetable */
196 : : int nchunks; /* number of lossy entries in pagetable */
197 : : dsa_pointer pagetable; /* dsa pointers to head of pagetable data */
198 : : dsa_pointer spages; /* dsa pointer to page array */
199 : : dsa_pointer schunks; /* dsa pointer to chunk array */
200 : : LWLock lock; /* lock to protect below members */
201 : : int spageptr; /* next spages index */
202 : : int schunkptr; /* next schunks index */
203 : : int schunkbit; /* next bit to check in current schunk */
204 : : } TBMSharedIteratorState;
205 : :
206 : : /*
207 : : * pagetable iteration array.
208 : : */
209 : : typedef struct PTIterationArray
210 : : {
211 : : pg_atomic_uint32 refcount; /* no. of iterator attached */
212 : : int index[FLEXIBLE_ARRAY_MEMBER]; /* index array */
213 : : } PTIterationArray;
214 : :
215 : : /*
216 : : * same as TBMIterator, but it is used for joint iteration, therefore this
217 : : * also holds a reference to the shared state.
218 : : */
219 : : struct TBMSharedIterator
220 : : {
221 : : TBMSharedIteratorState *state; /* shared state */
222 : : PTEntryArray *ptbase; /* pagetable element array */
223 : : PTIterationArray *ptpages; /* sorted exact page index list */
224 : : PTIterationArray *ptchunks; /* sorted lossy page index list */
225 : : TBMIterateResult output; /* MUST BE LAST (because variable-size) */
226 : : };
227 : :
228 : : /* Local function prototypes */
229 : : static void tbm_union_page(TIDBitmap *a, const PagetableEntry *bpage);
230 : : static bool tbm_intersect_page(TIDBitmap *a, PagetableEntry *apage,
231 : : const TIDBitmap *b);
232 : : static const PagetableEntry *tbm_find_pageentry(const TIDBitmap *tbm,
233 : : BlockNumber pageno);
234 : : static PagetableEntry *tbm_get_pageentry(TIDBitmap *tbm, BlockNumber pageno);
235 : : static bool tbm_page_is_lossy(const TIDBitmap *tbm, BlockNumber pageno);
236 : : static void tbm_mark_page_lossy(TIDBitmap *tbm, BlockNumber pageno);
237 : : static void tbm_lossify(TIDBitmap *tbm);
238 : : static int tbm_comparator(const void *left, const void *right);
239 : : static int tbm_shared_comparator(const void *left, const void *right,
240 : : void *arg);
241 : :
242 : : /* define hashtable mapping block numbers to PagetableEntry's */
243 : : #define SH_USE_NONDEFAULT_ALLOCATOR
244 : : #define SH_PREFIX pagetable
245 : : #define SH_ELEMENT_TYPE PagetableEntry
246 : : #define SH_KEY_TYPE BlockNumber
247 : : #define SH_KEY blockno
248 : : #define SH_HASH_KEY(tb, key) murmurhash32(key)
249 : : #define SH_EQUAL(tb, a, b) a == b
250 : : #define SH_SCOPE static inline
251 : : #define SH_DEFINE
252 : : #define SH_DECLARE
253 : : #include "lib/simplehash.h"
254 : :
255 : :
256 : : /*
257 : : * tbm_create - create an initially-empty bitmap
258 : : *
259 : : * The bitmap will live in the memory context that is CurrentMemoryContext
260 : : * at the time of this call. It will be limited to (approximately) maxbytes
261 : : * total memory consumption. If the DSA passed to this function is not NULL
262 : : * then the memory for storing elements of the underlying page table will
263 : : * be allocated from the DSA.
264 : : */
265 : : TIDBitmap *
2594 rhaas@postgresql.org 266 :CBC 9486 : tbm_create(long maxbytes, dsa_area *dsa)
267 : : {
268 : : TIDBitmap *tbm;
269 : :
270 : : /* Create the TIDBitmap struct and zero all its fields */
5573 tgl@sss.pgh.pa.us 271 : 9486 : tbm = makeNode(TIDBitmap);
272 : :
6937 273 : 9486 : tbm->mcxt = CurrentMemoryContext;
6907 274 : 9486 : tbm->status = TBM_EMPTY;
275 : :
2347 rhaas@postgresql.org 276 : 9486 : tbm->maxentries = (int) tbm_calculate_entries(maxbytes);
2739 andres@anarazel.de 277 : 9486 : tbm->lossify_start = 0;
2594 rhaas@postgresql.org 278 : 9486 : tbm->dsa = dsa;
2586 279 : 9486 : tbm->dsapagetable = InvalidDsaPointer;
280 : 9486 : tbm->dsapagetableold = InvalidDsaPointer;
281 : 9486 : tbm->ptpages = InvalidDsaPointer;
282 : 9486 : tbm->ptchunks = InvalidDsaPointer;
283 : :
6907 tgl@sss.pgh.pa.us 284 : 9486 : return tbm;
285 : : }
286 : :
287 : : /*
288 : : * Actually create the hashtable. Since this is a moderately expensive
289 : : * proposition, we don't do it until we have to.
290 : : */
291 : : static void
292 : 4137 : tbm_create_pagetable(TIDBitmap *tbm)
293 : : {
294 [ - + ]: 4137 : Assert(tbm->status != TBM_HASH);
295 [ - + ]: 4137 : Assert(tbm->pagetable == NULL);
296 : :
2594 rhaas@postgresql.org 297 : 4137 : tbm->pagetable = pagetable_create(tbm->mcxt, 128, tbm);
298 : :
299 : : /* If entry1 is valid, push it into the hashtable */
6907 tgl@sss.pgh.pa.us 300 [ + + ]: 4137 : if (tbm->status == TBM_ONE_PAGE)
301 : : {
302 : : PagetableEntry *page;
303 : : bool found;
304 : : char oldstatus;
305 : :
2739 andres@anarazel.de 306 : 2703 : page = pagetable_insert(tbm->pagetable,
307 : : tbm->entry1.blockno,
308 : : &found);
6907 tgl@sss.pgh.pa.us 309 [ - + ]: 2703 : Assert(!found);
2739 andres@anarazel.de 310 : 2703 : oldstatus = page->status;
6907 tgl@sss.pgh.pa.us 311 : 2703 : memcpy(page, &tbm->entry1, sizeof(PagetableEntry));
2739 andres@anarazel.de 312 : 2703 : page->status = oldstatus;
313 : : }
314 : :
6907 tgl@sss.pgh.pa.us 315 : 4137 : tbm->status = TBM_HASH;
6937 316 : 4137 : }
317 : :
318 : : /*
319 : : * tbm_free - free a TIDBitmap
320 : : */
321 : : void
322 : 9439 : tbm_free(TIDBitmap *tbm)
323 : : {
6907 324 [ + + ]: 9439 : if (tbm->pagetable)
2739 andres@anarazel.de 325 : 4135 : pagetable_destroy(tbm->pagetable);
6937 tgl@sss.pgh.pa.us 326 [ + + ]: 9439 : if (tbm->spages)
327 : 2624 : pfree(tbm->spages);
328 [ + + ]: 9439 : if (tbm->schunks)
329 : 1440 : pfree(tbm->schunks);
330 : 9439 : pfree(tbm);
331 : 9439 : }
332 : :
333 : : /*
334 : : * tbm_free_shared_area - free shared state
335 : : *
336 : : * Free shared iterator state, Also free shared pagetable and iterator arrays
337 : : * memory if they are not referred by any of the shared iterator i.e recount
338 : : * is becomes 0.
339 : : */
340 : : void
2594 rhaas@postgresql.org 341 : 54 : tbm_free_shared_area(dsa_area *dsa, dsa_pointer dp)
342 : : {
343 : 54 : TBMSharedIteratorState *istate = dsa_get_address(dsa, dp);
344 : : PTEntryArray *ptbase;
345 : : PTIterationArray *ptpages;
346 : : PTIterationArray *ptchunks;
347 : :
2586 348 [ + - ]: 54 : if (DsaPointerIsValid(istate->pagetable))
349 : : {
350 : 54 : ptbase = dsa_get_address(dsa, istate->pagetable);
351 [ + + ]: 54 : if (pg_atomic_sub_fetch_u32(&ptbase->refcount, 1) == 0)
352 : 27 : dsa_free(dsa, istate->pagetable);
353 : : }
354 [ + - ]: 54 : if (DsaPointerIsValid(istate->spages))
355 : : {
2594 356 : 54 : ptpages = dsa_get_address(dsa, istate->spages);
357 [ + + ]: 54 : if (pg_atomic_sub_fetch_u32(&ptpages->refcount, 1) == 0)
358 : 27 : dsa_free(dsa, istate->spages);
359 : : }
2586 360 [ - + ]: 54 : if (DsaPointerIsValid(istate->schunks))
361 : : {
2594 rhaas@postgresql.org 362 :UBC 0 : ptchunks = dsa_get_address(dsa, istate->schunks);
363 [ # # ]: 0 : if (pg_atomic_sub_fetch_u32(&ptchunks->refcount, 1) == 0)
364 : 0 : dsa_free(dsa, istate->schunks);
365 : : }
366 : :
2594 rhaas@postgresql.org 367 :CBC 54 : dsa_free(dsa, dp);
368 : 54 : }
369 : :
370 : : /*
371 : : * tbm_add_tuples - add some tuple IDs to a TIDBitmap
372 : : *
373 : : * If recheck is true, then the recheck flag will be set in the
374 : : * TBMIterateResult when any of these tuples are reported out.
375 : : */
376 : : void
5848 tgl@sss.pgh.pa.us 377 : 3215536 : tbm_add_tuples(TIDBitmap *tbm, const ItemPointer tids, int ntids,
378 : : bool recheck)
379 : : {
3376 380 : 3215536 : BlockNumber currblk = InvalidBlockNumber;
381 : 3215536 : PagetableEntry *page = NULL; /* only valid when currblk is valid */
382 : : int i;
383 : :
2594 rhaas@postgresql.org 384 [ - + ]: 3215536 : Assert(tbm->iterating == TBM_NOT_ITERATING);
6937 tgl@sss.pgh.pa.us 385 [ + + ]: 7521801 : for (i = 0; i < ntids; i++)
386 : : {
387 : 4306265 : BlockNumber blk = ItemPointerGetBlockNumber(tids + i);
388 : 4306265 : OffsetNumber off = ItemPointerGetOffsetNumber(tids + i);
389 : : int wordnum,
390 : : bitnum;
391 : :
392 : : /* safety check to ensure we don't overrun bit array bounds */
393 [ + - - + ]: 4306265 : if (off < 1 || off > MAX_TUPLES_PER_PAGE)
6937 tgl@sss.pgh.pa.us 394 [ # # ]:UBC 0 : elog(ERROR, "tuple offset out of range: %u", off);
395 : :
396 : : /*
397 : : * Look up target page unless we already did. This saves cycles when
398 : : * the input includes consecutive tuples on the same page, which is
399 : : * common enough to justify an extra test here.
400 : : */
3376 tgl@sss.pgh.pa.us 401 [ + + ]:CBC 4306265 : if (blk != currblk)
402 : : {
andres@anarazel.de 403 [ + + ]: 3603891 : if (tbm_page_is_lossy(tbm, blk))
tgl@sss.pgh.pa.us 404 : 1428 : page = NULL; /* remember page is lossy */
405 : : else
406 : 3602463 : page = tbm_get_pageentry(tbm, blk);
407 : 3603891 : currblk = blk;
408 : : }
409 : :
410 [ + + ]: 4306265 : if (page == NULL)
411 : 1428 : continue; /* whole page is already marked */
412 : :
6937 413 [ - + ]: 4304837 : if (page->ischunk)
414 : : {
415 : : /* The page is a lossy chunk header, set bit for itself */
6937 tgl@sss.pgh.pa.us 416 :UBC 0 : wordnum = bitnum = 0;
417 : : }
418 : : else
419 : : {
420 : : /* Page is exact, so set bit for individual tuple */
6937 tgl@sss.pgh.pa.us 421 :CBC 4304837 : wordnum = WORDNUM(off - 1);
422 : 4304837 : bitnum = BITNUM(off - 1);
423 : : }
424 : 4304837 : page->words[wordnum] |= ((bitmapword) 1 << bitnum);
5848 425 : 4304837 : page->recheck |= recheck;
426 : :
6937 427 [ + + ]: 4304837 : if (tbm->nentries > tbm->maxentries)
428 : : {
429 : 12 : tbm_lossify(tbm);
430 : : /* Page could have been converted to lossy, so force new lookup */
3376 431 : 12 : currblk = InvalidBlockNumber;
432 : : }
433 : : }
6937 434 : 3215536 : }
435 : :
436 : : /*
437 : : * tbm_add_page - add a whole page to a TIDBitmap
438 : : *
439 : : * This causes the whole page to be reported (with the recheck flag)
440 : : * when the TIDBitmap is scanned.
441 : : */
442 : : void
5500 443 : 73985 : tbm_add_page(TIDBitmap *tbm, BlockNumber pageno)
444 : : {
445 : : /* Enter the page in the bitmap, or mark it lossy if already present */
446 : 73985 : tbm_mark_page_lossy(tbm, pageno);
447 : : /* If we went over the memory limit, lossify some more pages */
448 [ - + ]: 73985 : if (tbm->nentries > tbm->maxentries)
5500 tgl@sss.pgh.pa.us 449 :UBC 0 : tbm_lossify(tbm);
5500 tgl@sss.pgh.pa.us 450 :CBC 73985 : }
451 : :
452 : : /*
453 : : * tbm_union - set union
454 : : *
455 : : * a is modified in-place, b is not changed
456 : : */
457 : : void
6937 tgl@sss.pgh.pa.us 458 :UBC 0 : tbm_union(TIDBitmap *a, const TIDBitmap *b)
459 : : {
6907 460 [ # # ]: 0 : Assert(!a->iterating);
461 : : /* Nothing to do if b is empty */
462 [ # # ]: 0 : if (b->nentries == 0)
463 : 0 : return;
464 : : /* Scan through chunks and pages in b, merge into a */
465 [ # # ]: 0 : if (b->status == TBM_ONE_PAGE)
466 : 0 : tbm_union_page(a, &b->entry1);
467 : : else
468 : : {
469 : : pagetable_iterator i;
470 : : PagetableEntry *bpage;
471 : :
472 [ # # ]: 0 : Assert(b->status == TBM_HASH);
2739 andres@anarazel.de 473 : 0 : pagetable_start_iterate(b->pagetable, &i);
474 [ # # ]: 0 : while ((bpage = pagetable_iterate(b->pagetable, &i)) != NULL)
6907 tgl@sss.pgh.pa.us 475 : 0 : tbm_union_page(a, bpage);
476 : : }
477 : : }
478 : :
479 : : /* Process one page of b during a union op */
480 : : static void
481 : 0 : tbm_union_page(TIDBitmap *a, const PagetableEntry *bpage)
482 : : {
483 : : PagetableEntry *apage;
484 : : int wordnum;
485 : :
486 [ # # ]: 0 : if (bpage->ischunk)
487 : : {
488 : : /* Scan b's chunk, mark each indicated page lossy in a */
3803 489 [ # # ]: 0 : for (wordnum = 0; wordnum < WORDS_PER_CHUNK; wordnum++)
490 : : {
6907 491 : 0 : bitmapword w = bpage->words[wordnum];
492 : :
493 [ # # ]: 0 : if (w != 0)
494 : : {
495 : : BlockNumber pg;
496 : :
497 : 0 : pg = bpage->blockno + (wordnum * BITS_PER_BITMAPWORD);
498 [ # # ]: 0 : while (w != 0)
499 : : {
500 [ # # ]: 0 : if (w & 1)
501 : 0 : tbm_mark_page_lossy(a, pg);
502 : 0 : pg++;
503 : 0 : w >>= 1;
504 : : }
505 : : }
506 : : }
507 : : }
508 [ # # ]: 0 : else if (tbm_page_is_lossy(a, bpage->blockno))
509 : : {
510 : : /* page is already lossy in a, nothing to do */
511 : 0 : return;
512 : : }
513 : : else
514 : : {
515 : 0 : apage = tbm_get_pageentry(a, bpage->blockno);
516 [ # # ]: 0 : if (apage->ischunk)
517 : : {
518 : : /* The page is a lossy chunk header, set bit for itself */
519 : 0 : apage->words[0] |= ((bitmapword) 1 << 0);
520 : : }
521 : : else
522 : : {
523 : : /* Both pages are exact, merge at the bit level */
524 [ # # ]: 0 : for (wordnum = 0; wordnum < WORDS_PER_PAGE; wordnum++)
525 : 0 : apage->words[wordnum] |= bpage->words[wordnum];
5848 526 : 0 : apage->recheck |= bpage->recheck;
527 : : }
528 : : }
529 : :
6907 530 [ # # ]: 0 : if (a->nentries > a->maxentries)
531 : 0 : tbm_lossify(a);
532 : : }
533 : :
534 : : /*
535 : : * tbm_intersect - set intersection
536 : : *
537 : : * a is modified in-place, b is not changed
538 : : */
539 : : void
6937 tgl@sss.pgh.pa.us 540 :CBC 38 : tbm_intersect(TIDBitmap *a, const TIDBitmap *b)
541 : : {
542 [ - + ]: 38 : Assert(!a->iterating);
543 : : /* Nothing to do if a is empty */
6907 544 [ - + ]: 38 : if (a->nentries == 0)
6907 tgl@sss.pgh.pa.us 545 :UBC 0 : return;
546 : : /* Scan through chunks and pages in a, try to match to b */
6907 tgl@sss.pgh.pa.us 547 [ + + ]:CBC 38 : if (a->status == TBM_ONE_PAGE)
548 : : {
6839 549 [ - + ]: 24 : if (tbm_intersect_page(a, &a->entry1, b))
550 : : {
551 : : /* Page is now empty, remove it from a */
6907 tgl@sss.pgh.pa.us 552 [ # # ]:UBC 0 : Assert(!a->entry1.ischunk);
553 : 0 : a->npages--;
554 : 0 : a->nentries--;
555 [ # # ]: 0 : Assert(a->nentries == 0);
556 : 0 : a->status = TBM_EMPTY;
557 : : }
558 : : }
559 : : else
560 : : {
561 : : pagetable_iterator i;
562 : : PagetableEntry *apage;
563 : :
6907 tgl@sss.pgh.pa.us 564 [ - + ]:CBC 14 : Assert(a->status == TBM_HASH);
2739 andres@anarazel.de 565 : 14 : pagetable_start_iterate(a->pagetable, &i);
566 [ + + ]: 2450 : while ((apage = pagetable_iterate(a->pagetable, &i)) != NULL)
567 : : {
6839 tgl@sss.pgh.pa.us 568 [ + + ]: 2422 : if (tbm_intersect_page(a, apage, b))
569 : : {
570 : : /* Page or chunk is now empty, remove it from a */
6907 571 [ - + ]: 2328 : if (apage->ischunk)
6907 tgl@sss.pgh.pa.us 572 :UBC 0 : a->nchunks--;
573 : : else
6907 tgl@sss.pgh.pa.us 574 :CBC 2328 : a->npages--;
575 : 2328 : a->nentries--;
2739 andres@anarazel.de 576 [ - + ]: 2328 : if (!pagetable_delete(a->pagetable, apage->blockno))
6937 tgl@sss.pgh.pa.us 577 [ # # ]:UBC 0 : elog(ERROR, "hash table corrupted");
578 : : }
579 : : }
580 : : }
581 : : }
582 : :
583 : : /*
584 : : * Process one page of a during an intersection op
585 : : *
586 : : * Returns true if apage is now empty and should be deleted from a
587 : : */
588 : : static bool
6839 tgl@sss.pgh.pa.us 589 :CBC 2446 : tbm_intersect_page(TIDBitmap *a, PagetableEntry *apage, const TIDBitmap *b)
590 : : {
591 : : const PagetableEntry *bpage;
592 : : int wordnum;
593 : :
6907 594 [ + + ]: 2446 : if (apage->ischunk)
595 : : {
596 : : /* Scan each bit in chunk, try to clear */
6756 bruce@momjian.us 597 : 15 : bool candelete = true;
598 : :
3803 tgl@sss.pgh.pa.us 599 [ + + ]: 75 : for (wordnum = 0; wordnum < WORDS_PER_CHUNK; wordnum++)
600 : : {
6907 601 : 60 : bitmapword w = apage->words[wordnum];
602 : :
603 [ + + ]: 60 : if (w != 0)
604 : : {
605 : 54 : bitmapword neww = w;
606 : : BlockNumber pg;
607 : : int bitnum;
608 : :
609 : 54 : pg = apage->blockno + (wordnum * BITS_PER_BITMAPWORD);
610 : 54 : bitnum = 0;
611 [ + + ]: 3303 : while (w != 0)
612 : : {
613 [ + + ]: 3249 : if (w & 1)
614 : : {
615 [ + + - + ]: 1680 : if (!tbm_page_is_lossy(b, pg) &&
616 : 126 : tbm_find_pageentry(b, pg) == NULL)
617 : : {
618 : : /* Page is not in b at all, lose lossy bit */
6907 tgl@sss.pgh.pa.us 619 :UBC 0 : neww &= ~((bitmapword) 1 << bitnum);
620 : : }
621 : : }
6907 tgl@sss.pgh.pa.us 622 :CBC 3249 : pg++;
623 : 3249 : bitnum++;
624 : 3249 : w >>= 1;
625 : : }
626 : 54 : apage->words[wordnum] = neww;
627 [ + - ]: 54 : if (neww != 0)
628 : 54 : candelete = false;
629 : : }
630 : : }
631 : 15 : return candelete;
632 : : }
633 [ - + ]: 2431 : else if (tbm_page_is_lossy(b, apage->blockno))
634 : : {
635 : : /*
636 : : * Some of the tuples in 'a' might not satisfy the quals for 'b', but
637 : : * because the page 'b' is lossy, we don't know which ones. Therefore
638 : : * we mark 'a' as requiring rechecks, to indicate that at most those
639 : : * tuples set in 'a' are matches.
640 : : */
5848 tgl@sss.pgh.pa.us 641 :UBC 0 : apage->recheck = true;
6907 642 : 0 : return false;
643 : : }
644 : : else
645 : : {
6756 bruce@momjian.us 646 :CBC 2431 : bool candelete = true;
647 : :
6907 tgl@sss.pgh.pa.us 648 : 2431 : bpage = tbm_find_pageentry(b, apage->blockno);
649 [ + + ]: 2431 : if (bpage != NULL)
650 : : {
651 : : /* Both pages are exact, merge at the bit level */
652 [ - + ]: 2162 : Assert(!bpage->ischunk);
653 [ + + ]: 12972 : for (wordnum = 0; wordnum < WORDS_PER_PAGE; wordnum++)
654 : : {
655 : 10810 : apage->words[wordnum] &= bpage->words[wordnum];
656 [ + + ]: 10810 : if (apage->words[wordnum] != 0)
657 : 109 : candelete = false;
658 : : }
5848 659 : 2162 : apage->recheck |= bpage->recheck;
660 : : }
661 : : /* If there is no matching b page, we can just delete the a page */
6907 662 : 2431 : return candelete;
663 : : }
664 : : }
665 : :
666 : : /*
667 : : * tbm_is_empty - is a TIDBitmap completely empty?
668 : : */
669 : : bool
6804 670 : 343 : tbm_is_empty(const TIDBitmap *tbm)
671 : : {
672 : 343 : return (tbm->nentries == 0);
673 : : }
674 : :
675 : : /*
676 : : * tbm_begin_iterate - prepare to iterate through a TIDBitmap
677 : : *
678 : : * The TBMIterator struct is created in the caller's memory context.
679 : : * For a clean shutdown of the iteration, call tbm_end_iterate; but it's
680 : : * okay to just allow the memory context to be released, too. It is caller's
681 : : * responsibility not to touch the TBMIterator anymore once the TIDBitmap
682 : : * is freed.
683 : : *
684 : : * NB: after this is called, it is no longer allowed to modify the contents
685 : : * of the bitmap. However, you can call this multiple times to scan the
686 : : * contents repeatedly, including parallel scans.
687 : : */
688 : : TBMIterator *
6937 689 : 18500 : tbm_begin_iterate(TIDBitmap *tbm)
690 : : {
691 : : TBMIterator *iterator;
692 : :
2594 rhaas@postgresql.org 693 [ - + ]: 18500 : Assert(tbm->iterating != TBM_ITERATING_SHARED);
694 : :
695 : : /*
696 : : * Create the TBMIterator struct, with enough trailing space to serve the
697 : : * needs of the TBMIterateResult sub-struct.
698 : : */
5573 tgl@sss.pgh.pa.us 699 : 18500 : iterator = (TBMIterator *) palloc(sizeof(TBMIterator) +
700 : : MAX_TUPLES_PER_PAGE * sizeof(OffsetNumber));
701 : 18500 : iterator->tbm = tbm;
702 : :
703 : : /*
704 : : * Initialize iteration pointers.
705 : : */
706 : 18500 : iterator->spageptr = 0;
707 : 18500 : iterator->schunkptr = 0;
708 : 18500 : iterator->schunkbit = 0;
709 : :
710 : : /*
711 : : * If we have a hashtable, create and fill the sorted page lists, unless
712 : : * we already did that for a previous iterator. Note that the lists are
713 : : * attached to the bitmap not the iterator, so they can be used by more
714 : : * than one iterator.
715 : : */
2594 rhaas@postgresql.org 716 [ + + + + ]: 18500 : if (tbm->status == TBM_HASH && tbm->iterating == TBM_NOT_ITERATING)
717 : : {
718 : : pagetable_iterator i;
719 : : PagetableEntry *page;
720 : : int npages;
721 : : int nchunks;
722 : :
5573 tgl@sss.pgh.pa.us 723 [ + - + + ]: 4063 : if (!tbm->spages && tbm->npages > 0)
724 : 2626 : tbm->spages = (PagetableEntry **)
725 : 2626 : MemoryContextAlloc(tbm->mcxt,
726 : 2626 : tbm->npages * sizeof(PagetableEntry *));
727 [ + - + + ]: 4063 : if (!tbm->schunks && tbm->nchunks > 0)
728 : 1440 : tbm->schunks = (PagetableEntry **)
729 : 1440 : MemoryContextAlloc(tbm->mcxt,
730 : 1440 : tbm->nchunks * sizeof(PagetableEntry *));
731 : :
732 : 4063 : npages = nchunks = 0;
2739 andres@anarazel.de 733 : 4063 : pagetable_start_iterate(tbm->pagetable, &i);
734 [ + + ]: 107711 : while ((page = pagetable_iterate(tbm->pagetable, &i)) != NULL)
735 : : {
5573 tgl@sss.pgh.pa.us 736 [ + + ]: 103648 : if (page->ischunk)
737 : 1461 : tbm->schunks[nchunks++] = page;
738 : : else
739 : 102187 : tbm->spages[npages++] = page;
740 : : }
741 [ - + ]: 4063 : Assert(npages == tbm->npages);
742 [ - + ]: 4063 : Assert(nchunks == tbm->nchunks);
743 [ + + ]: 4063 : if (npages > 1)
744 : 2624 : qsort(tbm->spages, npages, sizeof(PagetableEntry *),
745 : : tbm_comparator);
746 [ + + ]: 4063 : if (nchunks > 1)
747 : 6 : qsort(tbm->schunks, nchunks, sizeof(PagetableEntry *),
748 : : tbm_comparator);
749 : : }
750 : :
2594 rhaas@postgresql.org 751 : 18500 : tbm->iterating = TBM_ITERATING_PRIVATE;
752 : :
5573 tgl@sss.pgh.pa.us 753 : 18500 : return iterator;
754 : : }
755 : :
756 : : /*
757 : : * tbm_prepare_shared_iterate - prepare shared iteration state for a TIDBitmap.
758 : : *
759 : : * The necessary shared state will be allocated from the DSA passed to
760 : : * tbm_create, so that multiple processes can attach to it and iterate jointly.
761 : : *
762 : : * This will convert the pagetable hash into page and chunk array of the index
763 : : * into pagetable array.
764 : : */
765 : : dsa_pointer
2594 rhaas@postgresql.org 766 : 72 : tbm_prepare_shared_iterate(TIDBitmap *tbm)
767 : : {
768 : : dsa_pointer dp;
769 : : TBMSharedIteratorState *istate;
2586 770 : 72 : PTEntryArray *ptbase = NULL;
2594 tgl@sss.pgh.pa.us 771 : 72 : PTIterationArray *ptpages = NULL;
772 : 72 : PTIterationArray *ptchunks = NULL;
773 : :
rhaas@postgresql.org 774 [ - + ]: 72 : Assert(tbm->dsa != NULL);
775 [ - + ]: 72 : Assert(tbm->iterating != TBM_ITERATING_PRIVATE);
776 : :
777 : : /*
778 : : * Allocate TBMSharedIteratorState from DSA to hold the shared members and
779 : : * lock, this will also be used by multiple worker for shared iterate.
780 : : */
2586 781 : 72 : dp = dsa_allocate0(tbm->dsa, sizeof(TBMSharedIteratorState));
2594 782 : 72 : istate = dsa_get_address(tbm->dsa, dp);
783 : :
784 : : /*
785 : : * If we're not already iterating, create and fill the sorted page lists.
786 : : * (If we are, the sorted page lists are already stored in the TIDBitmap,
787 : : * and we can just reuse them.)
788 : : */
789 [ + + ]: 72 : if (tbm->iterating == TBM_NOT_ITERATING)
790 : : {
791 : : pagetable_iterator i;
792 : : PagetableEntry *page;
793 : : int idx;
794 : : int npages;
795 : : int nchunks;
796 : :
797 : : /*
798 : : * Allocate the page and chunk array memory from the DSA to share
799 : : * across multiple processes.
800 : : */
801 [ + - ]: 36 : if (tbm->npages)
802 : : {
803 : 36 : tbm->ptpages = dsa_allocate(tbm->dsa, sizeof(PTIterationArray) +
804 : : tbm->npages * sizeof(int));
805 : 36 : ptpages = dsa_get_address(tbm->dsa, tbm->ptpages);
806 : 36 : pg_atomic_init_u32(&ptpages->refcount, 0);
807 : : }
808 [ + + ]: 36 : if (tbm->nchunks)
809 : : {
810 : 3 : tbm->ptchunks = dsa_allocate(tbm->dsa, sizeof(PTIterationArray) +
811 : : tbm->nchunks * sizeof(int));
812 : 3 : ptchunks = dsa_get_address(tbm->dsa, tbm->ptchunks);
813 : 3 : pg_atomic_init_u32(&ptchunks->refcount, 0);
814 : : }
815 : :
816 : : /*
817 : : * If TBM status is TBM_HASH then iterate over the pagetable and
818 : : * convert it to page and chunk arrays. But if it's in the
819 : : * TBM_ONE_PAGE mode then directly allocate the space for one entry
820 : : * from the DSA.
821 : : */
822 : 36 : npages = nchunks = 0;
823 [ + - ]: 36 : if (tbm->status == TBM_HASH)
824 : : {
825 : 36 : ptbase = dsa_get_address(tbm->dsa, tbm->dsapagetable);
826 : :
827 : 36 : pagetable_start_iterate(tbm->pagetable, &i);
828 [ + + ]: 13551 : while ((page = pagetable_iterate(tbm->pagetable, &i)) != NULL)
829 : : {
830 : 13515 : idx = page - ptbase->ptentry;
831 [ + + ]: 13515 : if (page->ischunk)
832 : 12 : ptchunks->index[nchunks++] = idx;
833 : : else
834 : 13503 : ptpages->index[npages++] = idx;
835 : : }
836 : :
837 [ - + ]: 36 : Assert(npages == tbm->npages);
838 [ - + ]: 36 : Assert(nchunks == tbm->nchunks);
839 : : }
2586 rhaas@postgresql.org 840 [ # # ]:UBC 0 : else if (tbm->status == TBM_ONE_PAGE)
841 : : {
842 : : /*
843 : : * In one page mode allocate the space for one pagetable entry,
844 : : * initialize it, and directly store its index (i.e. 0) in the
845 : : * page array.
846 : : */
2594 847 : 0 : tbm->dsapagetable = dsa_allocate(tbm->dsa, sizeof(PTEntryArray) +
848 : : sizeof(PagetableEntry));
849 : 0 : ptbase = dsa_get_address(tbm->dsa, tbm->dsapagetable);
2560 850 : 0 : memcpy(ptbase->ptentry, &tbm->entry1, sizeof(PagetableEntry));
2594 851 : 0 : ptpages->index[0] = 0;
852 : : }
853 : :
2586 rhaas@postgresql.org 854 [ + - ]:CBC 36 : if (ptbase != NULL)
855 : 36 : pg_atomic_init_u32(&ptbase->refcount, 0);
2594 856 [ + - ]: 36 : if (npages > 1)
432 peter@eisentraut.org 857 : 36 : qsort_arg(ptpages->index, npages, sizeof(int),
858 : 36 : tbm_shared_comparator, ptbase->ptentry);
2594 rhaas@postgresql.org 859 [ + + ]: 36 : if (nchunks > 1)
432 peter@eisentraut.org 860 : 3 : qsort_arg(ptchunks->index, nchunks, sizeof(int),
861 : 3 : tbm_shared_comparator, ptbase->ptentry);
862 : : }
863 : :
864 : : /*
865 : : * Store the TBM members in the shared state so that we can share them
866 : : * across multiple processes.
867 : : */
2594 rhaas@postgresql.org 868 : 72 : istate->nentries = tbm->nentries;
869 : 72 : istate->maxentries = tbm->maxentries;
870 : 72 : istate->npages = tbm->npages;
871 : 72 : istate->nchunks = tbm->nchunks;
872 : 72 : istate->pagetable = tbm->dsapagetable;
873 : 72 : istate->spages = tbm->ptpages;
874 : 72 : istate->schunks = tbm->ptchunks;
875 : :
876 : 72 : ptbase = dsa_get_address(tbm->dsa, tbm->dsapagetable);
877 : 72 : ptpages = dsa_get_address(tbm->dsa, tbm->ptpages);
878 : 72 : ptchunks = dsa_get_address(tbm->dsa, tbm->ptchunks);
879 : :
880 : : /*
881 : : * For every shared iterator, referring to pagetable and iterator array,
882 : : * increase the refcount by 1 so that while freeing the shared iterator we
883 : : * don't free pagetable and iterator array until its refcount becomes 0.
884 : : */
2586 885 [ + - ]: 72 : if (ptbase != NULL)
886 : 72 : pg_atomic_add_fetch_u32(&ptbase->refcount, 1);
887 [ + - ]: 72 : if (ptpages != NULL)
2594 888 : 72 : pg_atomic_add_fetch_u32(&ptpages->refcount, 1);
2586 889 [ + + ]: 72 : if (ptchunks != NULL)
2594 890 : 6 : pg_atomic_add_fetch_u32(&ptchunks->refcount, 1);
891 : :
892 : : /* Initialize the iterator lock */
1430 tgl@sss.pgh.pa.us 893 : 72 : LWLockInitialize(&istate->lock, LWTRANCHE_SHARED_TIDBITMAP);
894 : :
895 : : /* Initialize the shared iterator state */
2594 rhaas@postgresql.org 896 : 72 : istate->schunkbit = 0;
897 : 72 : istate->schunkptr = 0;
898 : 72 : istate->spageptr = 0;
899 : :
900 : 72 : tbm->iterating = TBM_ITERATING_SHARED;
901 : :
902 : 72 : return dp;
903 : : }
904 : :
905 : : /*
906 : : * tbm_extract_page_tuple - extract the tuple offsets from a page
907 : : *
908 : : * The extracted offsets are stored into TBMIterateResult.
909 : : */
910 : : static inline int
911 : 234432 : tbm_extract_page_tuple(PagetableEntry *page, TBMIterateResult *output)
912 : : {
913 : : int wordnum;
914 : 234432 : int ntuples = 0;
915 : :
916 [ + + ]: 1406592 : for (wordnum = 0; wordnum < WORDS_PER_PAGE; wordnum++)
917 : : {
918 : 1172160 : bitmapword w = page->words[wordnum];
919 : :
920 [ + + ]: 1172160 : if (w != 0)
921 : : {
922 : 550920 : int off = wordnum * BITS_PER_BITMAPWORD + 1;
923 : :
924 [ + + ]: 23908287 : while (w != 0)
925 : : {
926 [ + + ]: 23357367 : if (w & 1)
927 : 5701699 : output->offsets[ntuples++] = (OffsetNumber) off;
928 : 23357367 : off++;
929 : 23357367 : w >>= 1;
930 : : }
931 : : }
932 : : }
933 : :
934 : 234432 : return ntuples;
935 : : }
936 : :
937 : : /*
938 : : * tbm_advance_schunkbit - Advance the schunkbit
939 : : */
940 : : static inline void
941 : 166330 : tbm_advance_schunkbit(PagetableEntry *chunk, int *schunkbitp)
942 : : {
943 : 166330 : int schunkbit = *schunkbitp;
944 : :
945 [ + + ]: 763224 : while (schunkbit < PAGES_PER_CHUNK)
946 : : {
947 : 760278 : int wordnum = WORDNUM(schunkbit);
948 : 760278 : int bitnum = BITNUM(schunkbit);
949 : :
950 [ + + ]: 760278 : if ((chunk->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0)
951 : 163384 : break;
952 : 596894 : schunkbit++;
953 : : }
954 : :
955 : 166330 : *schunkbitp = schunkbit;
956 : 166330 : }
957 : :
958 : : /*
959 : : * tbm_iterate - scan through next page of a TIDBitmap
960 : : *
961 : : * Returns a TBMIterateResult representing one page, or NULL if there are
962 : : * no more pages to scan. Pages are guaranteed to be delivered in numerical
963 : : * order. If result->ntuples < 0, then the bitmap is "lossy" and failed to
964 : : * remember the exact tuples to look at on this page --- the caller must
965 : : * examine all tuples on the page and check if they meet the intended
966 : : * condition. If result->recheck is true, only the indicated tuples need
967 : : * be examined, but the condition must be rechecked anyway. (For ease of
968 : : * testing, recheck is always set true when ntuples < 0.)
969 : : */
970 : : TBMIterateResult *
5573 tgl@sss.pgh.pa.us 971 : 377636 : tbm_iterate(TBMIterator *iterator)
972 : : {
5421 bruce@momjian.us 973 : 377636 : TIDBitmap *tbm = iterator->tbm;
5573 tgl@sss.pgh.pa.us 974 : 377636 : TBMIterateResult *output = &(iterator->output);
975 : :
2594 rhaas@postgresql.org 976 [ - + ]: 377636 : Assert(tbm->iterating == TBM_ITERATING_PRIVATE);
977 : :
978 : : /*
979 : : * If lossy chunk pages remain, make sure we've advanced schunkptr/
980 : : * schunkbit to the next set bit.
981 : : */
5573 tgl@sss.pgh.pa.us 982 [ + + ]: 380558 : while (iterator->schunkptr < tbm->nchunks)
983 : : {
984 : 160180 : PagetableEntry *chunk = tbm->schunks[iterator->schunkptr];
985 : 160180 : int schunkbit = iterator->schunkbit;
986 : :
2594 rhaas@postgresql.org 987 : 160180 : tbm_advance_schunkbit(chunk, &schunkbit);
6937 tgl@sss.pgh.pa.us 988 [ + + ]: 160180 : if (schunkbit < PAGES_PER_CHUNK)
989 : : {
5573 990 : 157258 : iterator->schunkbit = schunkbit;
6937 991 : 157258 : break;
992 : : }
993 : : /* advance to next chunk */
5573 994 : 2922 : iterator->schunkptr++;
995 : 2922 : iterator->schunkbit = 0;
996 : : }
997 : :
998 : : /*
999 : : * If both chunk and per-page data remain, must output the numerically
1000 : : * earlier page.
1001 : : */
1002 [ + + ]: 377636 : if (iterator->schunkptr < tbm->nchunks)
1003 : : {
1004 : 157258 : PagetableEntry *chunk = tbm->schunks[iterator->schunkptr];
1005 : : BlockNumber chunk_blockno;
1006 : :
1007 : 157258 : chunk_blockno = chunk->blockno + iterator->schunkbit;
1008 [ + + ]: 157258 : if (iterator->spageptr >= tbm->npages ||
1009 [ + + ]: 9288 : chunk_blockno < tbm->spages[iterator->spageptr]->blockno)
1010 : : {
1011 : : /* Return a lossy page indicator from the chunk */
6937 1012 : 154180 : output->blockno = chunk_blockno;
1013 : 154180 : output->ntuples = -1;
5848 1014 : 154180 : output->recheck = true;
5573 1015 : 154180 : iterator->schunkbit++;
6937 1016 : 154180 : return output;
1017 : : }
1018 : : }
1019 : :
5573 1020 [ + + ]: 223456 : if (iterator->spageptr < tbm->npages)
1021 : : {
1022 : : PagetableEntry *page;
1023 : : int ntuples;
1024 : :
1025 : : /* In TBM_ONE_PAGE state, we don't allocate an spages[] array */
6907 1026 [ + + ]: 207426 : if (tbm->status == TBM_ONE_PAGE)
1027 : 6661 : page = &tbm->entry1;
1028 : : else
5573 1029 : 200765 : page = tbm->spages[iterator->spageptr];
1030 : :
1031 : : /* scan bitmap to extract individual offset numbers */
2594 rhaas@postgresql.org 1032 : 207426 : ntuples = tbm_extract_page_tuple(page, output);
1033 : 207426 : output->blockno = page->blockno;
1034 : 207426 : output->ntuples = ntuples;
1035 : 207426 : output->recheck = page->recheck;
1036 : 207426 : iterator->spageptr++;
1037 : 207426 : return output;
1038 : : }
1039 : :
1040 : : /* Nothing more in the bitmap */
1041 : 16030 : return NULL;
1042 : : }
1043 : :
1044 : : /*
1045 : : * tbm_shared_iterate - scan through next page of a TIDBitmap
1046 : : *
1047 : : * As above, but this will iterate using an iterator which is shared
1048 : : * across multiple processes. We need to acquire the iterator LWLock,
1049 : : * before accessing the shared members.
1050 : : */
1051 : : TBMIterateResult *
1052 : 30448 : tbm_shared_iterate(TBMSharedIterator *iterator)
1053 : : {
1054 : 30448 : TBMIterateResult *output = &iterator->output;
1055 : 30448 : TBMSharedIteratorState *istate = iterator->state;
2586 1056 : 30448 : PagetableEntry *ptbase = NULL;
1057 : 30448 : int *idxpages = NULL;
1058 : 30448 : int *idxchunks = NULL;
1059 : :
1060 [ + - ]: 30448 : if (iterator->ptbase != NULL)
1061 : 30448 : ptbase = iterator->ptbase->ptentry;
1062 [ + - ]: 30448 : if (iterator->ptpages != NULL)
1063 : 30448 : idxpages = iterator->ptpages->index;
1064 [ + + ]: 30448 : if (iterator->ptchunks != NULL)
1065 : 7440 : idxchunks = iterator->ptchunks->index;
1066 : :
1067 : : /* Acquire the LWLock before accessing the shared members */
2594 1068 : 30448 : LWLockAcquire(&istate->lock, LW_EXCLUSIVE);
1069 : :
1070 : : /*
1071 : : * If lossy chunk pages remain, make sure we've advanced schunkptr/
1072 : : * schunkbit to the next set bit.
1073 : : */
1074 [ + + ]: 30472 : while (istate->schunkptr < istate->nchunks)
1075 : : {
1076 : 6150 : PagetableEntry *chunk = &ptbase[idxchunks[istate->schunkptr]];
1077 : 6150 : int schunkbit = istate->schunkbit;
1078 : :
1079 : 6150 : tbm_advance_schunkbit(chunk, &schunkbit);
1080 [ + + ]: 6150 : if (schunkbit < PAGES_PER_CHUNK)
1081 : : {
1082 : 6126 : istate->schunkbit = schunkbit;
1083 : 6126 : break;
1084 : : }
1085 : : /* advance to next chunk */
1086 : 24 : istate->schunkptr++;
1087 : 24 : istate->schunkbit = 0;
1088 : : }
1089 : :
1090 : : /*
1091 : : * If both chunk and per-page data remain, must output the numerically
1092 : : * earlier page.
1093 : : */
1094 [ + + ]: 30448 : if (istate->schunkptr < istate->nchunks)
1095 : : {
1096 : 6126 : PagetableEntry *chunk = &ptbase[idxchunks[istate->schunkptr]];
1097 : : BlockNumber chunk_blockno;
1098 : :
1099 : 6126 : chunk_blockno = chunk->blockno + istate->schunkbit;
1100 : :
1101 [ + - ]: 6126 : if (istate->spageptr >= istate->npages ||
2593 1102 [ + + ]: 6126 : chunk_blockno < ptbase[idxpages[istate->spageptr]].blockno)
1103 : : {
1104 : : /* Return a lossy page indicator from the chunk */
2594 1105 : 3102 : output->blockno = chunk_blockno;
1106 : 3102 : output->ntuples = -1;
1107 : 3102 : output->recheck = true;
1108 : 3102 : istate->schunkbit++;
1109 : :
1110 : 3102 : LWLockRelease(&istate->lock);
1111 : 3102 : return output;
1112 : : }
1113 : : }
1114 : :
1115 [ + + ]: 27346 : if (istate->spageptr < istate->npages)
1116 : : {
1117 : 27006 : PagetableEntry *page = &ptbase[idxpages[istate->spageptr]];
1118 : : int ntuples;
1119 : :
1120 : : /* scan bitmap to extract individual offset numbers */
1121 : 27006 : ntuples = tbm_extract_page_tuple(page, output);
6937 tgl@sss.pgh.pa.us 1122 : 27006 : output->blockno = page->blockno;
1123 : 27006 : output->ntuples = ntuples;
5848 1124 : 27006 : output->recheck = page->recheck;
2594 rhaas@postgresql.org 1125 : 27006 : istate->spageptr++;
1126 : :
1127 : 27006 : LWLockRelease(&istate->lock);
1128 : :
6937 tgl@sss.pgh.pa.us 1129 : 27006 : return output;
1130 : : }
1131 : :
2594 rhaas@postgresql.org 1132 : 340 : LWLockRelease(&istate->lock);
1133 : :
1134 : : /* Nothing more in the bitmap */
6937 tgl@sss.pgh.pa.us 1135 : 340 : return NULL;
1136 : : }
1137 : :
1138 : : /*
1139 : : * tbm_end_iterate - finish an iteration over a TIDBitmap
1140 : : *
1141 : : * Currently this is just a pfree, but it might do more someday. (For
1142 : : * instance, it could be useful to count open iterators and allow the
1143 : : * bitmap to return to read/write status when there are no more iterators.)
1144 : : */
1145 : : void
5573 1146 : 18446 : tbm_end_iterate(TBMIterator *iterator)
1147 : : {
1148 : 18446 : pfree(iterator);
1149 : 18446 : }
1150 : :
1151 : : /*
1152 : : * tbm_end_shared_iterate - finish a shared iteration over a TIDBitmap
1153 : : *
1154 : : * This doesn't free any of the shared state associated with the iterator,
1155 : : * just our backend-private state.
1156 : : */
1157 : : void
2594 rhaas@postgresql.org 1158 : 348 : tbm_end_shared_iterate(TBMSharedIterator *iterator)
1159 : : {
1160 : 348 : pfree(iterator);
1161 : 348 : }
1162 : :
1163 : : /*
1164 : : * tbm_find_pageentry - find a PagetableEntry for the pageno
1165 : : *
1166 : : * Returns NULL if there is no non-lossy entry for the pageno.
1167 : : */
1168 : : static const PagetableEntry *
6937 tgl@sss.pgh.pa.us 1169 : 2557 : tbm_find_pageentry(const TIDBitmap *tbm, BlockNumber pageno)
1170 : : {
1171 : : const PagetableEntry *page;
1172 : :
6907 1173 [ - + ]: 2557 : if (tbm->nentries == 0) /* in case pagetable doesn't exist */
6907 tgl@sss.pgh.pa.us 1174 :UBC 0 : return NULL;
1175 : :
6907 tgl@sss.pgh.pa.us 1176 [ - + ]:CBC 2557 : if (tbm->status == TBM_ONE_PAGE)
1177 : : {
6907 tgl@sss.pgh.pa.us 1178 :UBC 0 : page = &tbm->entry1;
1179 [ # # ]: 0 : if (page->blockno != pageno)
1180 : 0 : return NULL;
1181 [ # # ]: 0 : Assert(!page->ischunk);
1182 : 0 : return page;
1183 : : }
1184 : :
2739 andres@anarazel.de 1185 :CBC 2557 : page = pagetable_lookup(tbm->pagetable, pageno);
6937 tgl@sss.pgh.pa.us 1186 [ + + ]: 2557 : if (page == NULL)
1187 : 269 : return NULL;
1188 [ - + ]: 2288 : if (page->ischunk)
6937 tgl@sss.pgh.pa.us 1189 :UBC 0 : return NULL; /* don't want a lossy chunk header */
6937 tgl@sss.pgh.pa.us 1190 :CBC 2288 : return page;
1191 : : }
1192 : :
1193 : : /*
1194 : : * tbm_get_pageentry - find or create a PagetableEntry for the pageno
1195 : : *
1196 : : * If new, the entry is marked as an exact (non-chunk) entry.
1197 : : *
1198 : : * This may cause the table to exceed the desired memory size. It is
1199 : : * up to the caller to call tbm_lossify() at the next safe point if so.
1200 : : */
1201 : : static PagetableEntry *
1202 : 3602463 : tbm_get_pageentry(TIDBitmap *tbm, BlockNumber pageno)
1203 : : {
1204 : : PagetableEntry *page;
1205 : : bool found;
1206 : :
6907 1207 [ + + ]: 3602463 : if (tbm->status == TBM_EMPTY)
1208 : : {
1209 : : /* Use the fixed slot */
1210 : 6096 : page = &tbm->entry1;
1211 : 6096 : found = false;
1212 : 6096 : tbm->status = TBM_ONE_PAGE;
1213 : : }
1214 : : else
1215 : : {
1216 [ + + ]: 3596367 : if (tbm->status == TBM_ONE_PAGE)
1217 : : {
1218 : 43040 : page = &tbm->entry1;
1219 [ + + ]: 43040 : if (page->blockno == pageno)
1220 : 40337 : return page;
1221 : : /* Time to switch from one page to a hashtable */
1222 : 2703 : tbm_create_pagetable(tbm);
1223 : : }
1224 : :
1225 : : /* Look up or create an entry */
2739 andres@anarazel.de 1226 : 3556030 : page = pagetable_insert(tbm->pagetable, pageno, &found);
1227 : : }
1228 : :
1229 : : /* Initialize it if not present before */
6937 tgl@sss.pgh.pa.us 1230 [ + + ]: 3562126 : if (!found)
1231 : : {
2739 andres@anarazel.de 1232 : 130113 : char oldstatus = page->status;
1233 : :
6937 tgl@sss.pgh.pa.us 1234 [ + - + - : 910791 : MemSet(page, 0, sizeof(PagetableEntry));
+ - + - +
+ ]
2739 andres@anarazel.de 1235 : 130113 : page->status = oldstatus;
6937 tgl@sss.pgh.pa.us 1236 : 130113 : page->blockno = pageno;
1237 : : /* must count it too */
1238 : 130113 : tbm->nentries++;
1239 : 130113 : tbm->npages++;
1240 : : }
1241 : :
1242 : 3562126 : return page;
1243 : : }
1244 : :
1245 : : /*
1246 : : * tbm_page_is_lossy - is the page marked as lossily stored?
1247 : : */
1248 : : static bool
1249 : 3607876 : tbm_page_is_lossy(const TIDBitmap *tbm, BlockNumber pageno)
1250 : : {
1251 : : PagetableEntry *page;
1252 : : BlockNumber chunk_pageno;
1253 : : int bitno;
1254 : :
1255 : : /* we can skip the lookup if there are no lossy chunks */
1256 [ + + ]: 3607876 : if (tbm->nchunks == 0)
1257 : 3547561 : return false;
6907 1258 [ - + ]: 60315 : Assert(tbm->status == TBM_HASH);
1259 : :
6937 1260 : 60315 : bitno = pageno % PAGES_PER_CHUNK;
1261 : 60315 : chunk_pageno = pageno - bitno;
1262 : :
2739 andres@anarazel.de 1263 : 60315 : page = pagetable_lookup(tbm->pagetable, chunk_pageno);
1264 : :
6937 tgl@sss.pgh.pa.us 1265 [ + - + + ]: 60315 : if (page != NULL && page->ischunk)
1266 : : {
6756 bruce@momjian.us 1267 : 6216 : int wordnum = WORDNUM(bitno);
1268 : 6216 : int bitnum = BITNUM(bitno);
1269 : :
6937 tgl@sss.pgh.pa.us 1270 [ + + ]: 6216 : if ((page->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0)
1271 : 2856 : return true;
1272 : : }
1273 : 57459 : return false;
1274 : : }
1275 : :
1276 : : /*
1277 : : * tbm_mark_page_lossy - mark the page number as lossily stored
1278 : : *
1279 : : * This may cause the table to exceed the desired memory size. It is
1280 : : * up to the caller to call tbm_lossify() at the next safe point if so.
1281 : : */
1282 : : static void
1283 : 80141 : tbm_mark_page_lossy(TIDBitmap *tbm, BlockNumber pageno)
1284 : : {
1285 : : PagetableEntry *page;
1286 : : bool found;
1287 : : BlockNumber chunk_pageno;
1288 : : int bitno;
1289 : : int wordnum;
1290 : : int bitnum;
1291 : :
1292 : : /* We force the bitmap into hashtable mode whenever it's lossy */
6907 1293 [ + + ]: 80141 : if (tbm->status != TBM_HASH)
1294 : 1434 : tbm_create_pagetable(tbm);
1295 : :
6937 1296 : 80141 : bitno = pageno % PAGES_PER_CHUNK;
1297 : 80141 : chunk_pageno = pageno - bitno;
1298 : :
1299 : : /*
1300 : : * Remove any extant non-lossy entry for the page. If the page is its own
1301 : : * chunk header, however, we skip this and handle the case below.
1302 : : */
1303 [ + + ]: 80141 : if (bitno != 0)
1304 : : {
2739 andres@anarazel.de 1305 [ + + ]: 78977 : if (pagetable_delete(tbm->pagetable, pageno))
1306 : : {
1307 : : /* It was present, so adjust counts */
6937 tgl@sss.pgh.pa.us 1308 : 6156 : tbm->nentries--;
1309 : 6156 : tbm->npages--; /* assume it must have been non-lossy */
1310 : : }
1311 : : }
1312 : :
1313 : : /* Look up or create entry for chunk-header page */
2739 andres@anarazel.de 1314 : 80141 : page = pagetable_insert(tbm->pagetable, chunk_pageno, &found);
1315 : :
1316 : : /* Initialize it if not present before */
6937 tgl@sss.pgh.pa.us 1317 [ + + ]: 80141 : if (!found)
1318 : : {
2739 andres@anarazel.de 1319 : 1434 : char oldstatus = page->status;
1320 : :
6937 tgl@sss.pgh.pa.us 1321 [ + - + - : 10038 : MemSet(page, 0, sizeof(PagetableEntry));
+ - + - +
+ ]
2739 andres@anarazel.de 1322 : 1434 : page->status = oldstatus;
6937 tgl@sss.pgh.pa.us 1323 : 1434 : page->blockno = chunk_pageno;
1324 : 1434 : page->ischunk = true;
1325 : : /* must count it too */
1326 : 1434 : tbm->nentries++;
1327 : 1434 : tbm->nchunks++;
1328 : : }
1329 [ + + ]: 78707 : else if (!page->ischunk)
1330 : : {
2739 andres@anarazel.de 1331 : 51 : char oldstatus = page->status;
1332 : :
1333 : : /* chunk header page was formerly non-lossy, make it lossy */
6937 tgl@sss.pgh.pa.us 1334 [ + - + - : 357 : MemSet(page, 0, sizeof(PagetableEntry));
+ - + - +
+ ]
2739 andres@anarazel.de 1335 : 51 : page->status = oldstatus;
6937 tgl@sss.pgh.pa.us 1336 : 51 : page->blockno = chunk_pageno;
1337 : 51 : page->ischunk = true;
1338 : : /* we assume it had some tuple bit(s) set, so mark it lossy */
1339 : 51 : page->words[0] = ((bitmapword) 1 << 0);
1340 : : /* adjust counts */
1341 : 51 : tbm->nchunks++;
1342 : 51 : tbm->npages--;
1343 : : }
1344 : :
1345 : : /* Now set the original target page's bit */
1346 : 80141 : wordnum = WORDNUM(bitno);
1347 : 80141 : bitnum = BITNUM(bitno);
1348 : 80141 : page->words[wordnum] |= ((bitmapword) 1 << bitnum);
1349 : 80141 : }
1350 : :
1351 : : /*
1352 : : * tbm_lossify - lose some information to get back under the memory limit
1353 : : */
1354 : : static void
1355 : 12 : tbm_lossify(TIDBitmap *tbm)
1356 : : {
1357 : : pagetable_iterator i;
1358 : : PagetableEntry *page;
1359 : :
1360 : : /*
1361 : : * XXX Really stupid implementation: this just lossifies pages in
1362 : : * essentially random order. We should be paying some attention to the
1363 : : * number of bits set in each page, instead.
1364 : : *
1365 : : * Since we are called as soon as nentries exceeds maxentries, we should
1366 : : * push nentries down to significantly less than maxentries, or else we'll
1367 : : * just end up doing this again very soon. We shoot for maxentries/2.
1368 : : */
2594 rhaas@postgresql.org 1369 [ - + ]: 12 : Assert(tbm->iterating == TBM_NOT_ITERATING);
6907 tgl@sss.pgh.pa.us 1370 [ - + ]: 12 : Assert(tbm->status == TBM_HASH);
1371 : :
2739 andres@anarazel.de 1372 : 12 : pagetable_start_iterate_at(tbm->pagetable, &i, tbm->lossify_start);
1373 [ + - ]: 6165 : while ((page = pagetable_iterate(tbm->pagetable, &i)) != NULL)
1374 : : {
6937 tgl@sss.pgh.pa.us 1375 [ - + ]: 6165 : if (page->ischunk)
6937 tgl@sss.pgh.pa.us 1376 :UBC 0 : continue; /* already a chunk header */
1377 : :
1378 : : /*
1379 : : * If the page would become a chunk header, we won't save anything by
1380 : : * converting it to lossy, so skip it.
1381 : : */
6937 tgl@sss.pgh.pa.us 1382 [ + + ]:CBC 6165 : if ((page->blockno % PAGES_PER_CHUNK) == 0)
1383 : 9 : continue;
1384 : :
1385 : : /* This does the dirty work ... */
1386 : 6156 : tbm_mark_page_lossy(tbm, page->blockno);
1387 : :
4621 1388 [ + + ]: 6156 : if (tbm->nentries <= tbm->maxentries / 2)
1389 : : {
1390 : : /*
1391 : : * We have made enough room. Remember where to start lossifying
1392 : : * next round, so we evenly iterate over the hashtable.
1393 : : */
2739 andres@anarazel.de 1394 : 12 : tbm->lossify_start = i.cur;
6198 tgl@sss.pgh.pa.us 1395 : 12 : break;
1396 : : }
1397 : :
1398 : : /*
1399 : : * Note: tbm_mark_page_lossy may have inserted a lossy chunk into the
1400 : : * hashtable and may have deleted the non-lossy chunk. We can
1401 : : * continue the same hash table scan, since failure to visit one
1402 : : * element or visiting the newly inserted element, isn't fatal.
1403 : : */
1404 : : }
1405 : :
1406 : : /*
1407 : : * With a big bitmap and small work_mem, it's possible that we cannot get
1408 : : * under maxentries. Again, if that happens, we'd end up uselessly
1409 : : * calling tbm_lossify over and over. To prevent this from becoming a
1410 : : * performance sink, force maxentries up to at least double the current
1411 : : * number of entries. (In essence, we're admitting inability to fit
1412 : : * within work_mem when we do this.) Note that this test will not fire if
1413 : : * we broke out of the loop early; and if we didn't, the current number of
1414 : : * entries is simply not reducible any further.
1415 : : */
4621 1416 [ - + ]: 12 : if (tbm->nentries > tbm->maxentries / 2)
4621 tgl@sss.pgh.pa.us 1417 :UBC 0 : tbm->maxentries = Min(tbm->nentries, (INT_MAX - 1) / 2) * 2;
6937 tgl@sss.pgh.pa.us 1418 :CBC 12 : }
1419 : :
1420 : : /*
1421 : : * qsort comparator to handle PagetableEntry pointers.
1422 : : */
1423 : : static int
1424 : 684430 : tbm_comparator(const void *left, const void *right)
1425 : : {
4326 bruce@momjian.us 1426 : 684430 : BlockNumber l = (*((PagetableEntry *const *) left))->blockno;
1427 : 684430 : BlockNumber r = (*((PagetableEntry *const *) right))->blockno;
1428 : :
58 nathan@postgresql.or 1429 :GNC 684430 : return pg_cmp_u32(l, r);
1430 : : }
1431 : :
1432 : : /*
1433 : : * As above, but this will get index into PagetableEntry array. Therefore,
1434 : : * it needs to get actual PagetableEntry using the index before comparing the
1435 : : * blockno.
1436 : : */
1437 : : static int
2594 rhaas@postgresql.org 1438 :CBC 119145 : tbm_shared_comparator(const void *left, const void *right, void *arg)
1439 : : {
1440 : 119145 : PagetableEntry *base = (PagetableEntry *) arg;
1441 : 119145 : PagetableEntry *lpage = &base[*(int *) left];
1442 : 119145 : PagetableEntry *rpage = &base[*(int *) right];
1443 : :
1444 [ + + ]: 119145 : if (lpage->blockno < rpage->blockno)
1445 : 54483 : return -1;
1446 [ + - ]: 64662 : else if (lpage->blockno > rpage->blockno)
1447 : 64662 : return 1;
2594 rhaas@postgresql.org 1448 :UBC 0 : return 0;
1449 : : }
1450 : :
1451 : : /*
1452 : : * tbm_attach_shared_iterate
1453 : : *
1454 : : * Allocate a backend-private iterator and attach the shared iterator state
1455 : : * to it so that multiple processed can iterate jointly.
1456 : : *
1457 : : * We also converts the DSA pointers to local pointers and store them into
1458 : : * our private iterator.
1459 : : */
1460 : : TBMSharedIterator *
2594 rhaas@postgresql.org 1461 :CBC 348 : tbm_attach_shared_iterate(dsa_area *dsa, dsa_pointer dp)
1462 : : {
1463 : : TBMSharedIterator *iterator;
1464 : : TBMSharedIteratorState *istate;
1465 : :
1466 : : /*
1467 : : * Create the TBMSharedIterator struct, with enough trailing space to
1468 : : * serve the needs of the TBMIterateResult sub-struct.
1469 : : */
2586 1470 : 348 : iterator = (TBMSharedIterator *) palloc0(sizeof(TBMSharedIterator) +
1471 : : MAX_TUPLES_PER_PAGE * sizeof(OffsetNumber));
1472 : :
2594 1473 : 348 : istate = (TBMSharedIteratorState *) dsa_get_address(dsa, dp);
1474 : :
1475 : 348 : iterator->state = istate;
1476 : :
1477 : 348 : iterator->ptbase = dsa_get_address(dsa, istate->pagetable);
1478 : :
1479 [ + - ]: 348 : if (istate->npages)
1480 : 348 : iterator->ptpages = dsa_get_address(dsa, istate->spages);
1481 [ + + ]: 348 : if (istate->nchunks)
1482 : 30 : iterator->ptchunks = dsa_get_address(dsa, istate->schunks);
1483 : :
1484 : 348 : return iterator;
1485 : : }
1486 : :
1487 : : /*
1488 : : * pagetable_allocate
1489 : : *
1490 : : * Callback function for allocating the memory for hashtable elements.
1491 : : * Allocate memory for hashtable elements, using DSA if available.
1492 : : */
1493 : : static inline void *
1494 : 4315 : pagetable_allocate(pagetable_hash *pagetable, Size size)
1495 : : {
1496 : 4315 : TIDBitmap *tbm = (TIDBitmap *) pagetable->private_data;
1497 : : PTEntryArray *ptbase;
1498 : :
1499 [ + + ]: 4315 : if (tbm->dsa == NULL)
1500 : 4237 : return MemoryContextAllocExtended(pagetable->ctx, size,
1501 : : MCXT_ALLOC_HUGE | MCXT_ALLOC_ZERO);
1502 : :
1503 : : /*
1504 : : * Save the dsapagetable reference in dsapagetableold before allocating
1505 : : * new memory so that pagetable_free can free the old entry.
1506 : : */
1507 : 78 : tbm->dsapagetableold = tbm->dsapagetable;
2575 1508 : 78 : tbm->dsapagetable = dsa_allocate_extended(tbm->dsa,
1509 : : sizeof(PTEntryArray) + size,
1510 : : DSA_ALLOC_HUGE | DSA_ALLOC_ZERO);
2594 1511 : 78 : ptbase = dsa_get_address(tbm->dsa, tbm->dsapagetable);
1512 : :
1513 : 78 : return ptbase->ptentry;
1514 : : }
1515 : :
1516 : : /*
1517 : : * pagetable_free
1518 : : *
1519 : : * Callback function for freeing hash table elements.
1520 : : */
1521 : : static inline void
1522 : 4313 : pagetable_free(pagetable_hash *pagetable, void *pointer)
1523 : : {
1524 : 4313 : TIDBitmap *tbm = (TIDBitmap *) pagetable->private_data;
1525 : :
1526 : : /* pfree the input pointer if DSA is not available */
1527 [ + + ]: 4313 : if (tbm->dsa == NULL)
1528 : 4235 : pfree(pointer);
1529 [ + + ]: 78 : else if (DsaPointerIsValid(tbm->dsapagetableold))
1530 : : {
1531 : 42 : dsa_free(tbm->dsa, tbm->dsapagetableold);
1532 : 42 : tbm->dsapagetableold = InvalidDsaPointer;
1533 : : }
1534 : 4313 : }
1535 : :
1536 : : /*
1537 : : * tbm_calculate_entries
1538 : : *
1539 : : * Estimate number of hashtable entries we can have within maxbytes.
1540 : : */
1541 : : long
2347 1542 : 286980 : tbm_calculate_entries(double maxbytes)
1543 : : {
1544 : : long nbuckets;
1545 : :
1546 : : /*
1547 : : * Estimate number of hashtable entries we can have within maxbytes. This
1548 : : * estimates the hash cost as sizeof(PagetableEntry), which is good enough
1549 : : * for our purpose. Also count an extra Pointer per entry for the arrays
1550 : : * created during iteration readout.
1551 : : */
1552 : 286980 : nbuckets = maxbytes /
1553 : : (sizeof(PagetableEntry) + sizeof(Pointer) + sizeof(Pointer));
1554 : 286980 : nbuckets = Min(nbuckets, INT_MAX - 1); /* safety limit */
1555 : 286980 : nbuckets = Max(nbuckets, 16); /* sanity limit */
1556 : :
1557 : 286980 : return nbuckets;
1558 : : }
|