Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * hashinsert.c
4 : * Item insertion in hash tables for Postgres.
5 : *
6 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/hash/hashinsert.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 :
16 : #include "postgres.h"
17 :
18 : #include "access/hash.h"
19 : #include "access/hash_xlog.h"
20 : #include "access/xloginsert.h"
21 : #include "miscadmin.h"
22 : #include "storage/buf_internals.h"
23 : #include "storage/lwlock.h"
24 : #include "storage/predicate.h"
25 : #include "utils/rel.h"
26 :
27 : static void _hash_vacuum_one_page(Relation rel, Relation hrel,
28 : Buffer metabuf, Buffer buf);
29 :
30 : /*
31 : * _hash_doinsert() -- Handle insertion of a single index tuple.
32 : *
33 : * This routine is called by the public interface routines, hashbuild
34 : * and hashinsert. By here, itup is completely filled in.
35 : *
36 : * 'sorted' must only be passed as 'true' when inserts are done in hashkey
37 : * order.
38 : */
39 : void
136 drowley 40 GNC 345510 : _hash_doinsert(Relation rel, IndexTuple itup, Relation heapRel, bool sorted)
41 : {
2321 rhaas 42 GIC 345510 : Buffer buf = InvalidBuffer;
2321 rhaas 43 ECB : Buffer bucket_buf;
44 : Buffer metabuf;
9344 bruce 45 : HashMetaPage metap;
2252 rhaas 46 GIC 345510 : HashMetaPage usedmetap = NULL;
47 : Page metapage;
48 : Page page;
7157 tgl 49 ECB : HashPageOpaque pageopaque;
50 : Size itemsz;
51 : bool do_expand;
52 : uint32 hashkey;
53 : Bucket bucket;
54 : OffsetNumber itup_off;
55 :
56 : /*
57 : * Get the hash key for the item (it's stored in the index tuple itself).
58 : */
5319 tgl 59 GIC 345510 : hashkey = _hash_get_indextuple_hashkey(itup);
60 :
61 : /* compute item size too */
1866 tgl 62 CBC 345510 : itemsz = IndexTupleSize(itup);
6385 bruce 63 GIC 345510 : itemsz = MAXALIGN(itemsz); /* be safe, PageAddItem will do this but we
64 : * need to be consistent */
9345 bruce 65 ECB :
2321 rhaas 66 CBC 345510 : restart_insert:
67 :
68 : /*
2252 rhaas 69 ECB : * Read the metapage. We don't lock it yet; HashMaxItemSize() will
70 : * examine pd_pagesize_version, but that can't change so we can examine it
71 : * without a lock.
72 : */
2252 rhaas 73 GIC 345510 : metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_NOLOCK, LH_META_PAGE);
2299 74 345510 : metapage = BufferGetPage(metabuf);
75 :
9345 bruce 76 ECB : /*
6385 77 : * Check whether the item can fit on a hash page at all. (Eventually, we
78 : * ought to try to apply TOAST methods if not.) Note that at this point,
79 : * itemsz doesn't include the ItemId.
80 : *
81 : * XXX this is useless code if we are only storing hash keys.
82 : */
2299 rhaas 83 GIC 345510 : if (itemsz > HashMaxItemSize(metapage))
7157 tgl 84 UIC 0 : ereport(ERROR,
85 : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
3363 tgl 86 ECB : errmsg("index row size %zu exceeds hash maximum %zu",
2299 rhaas 87 EUB : itemsz, HashMaxItemSize(metapage)),
88 : errhint("Values larger than a buffer page cannot be indexed.")));
89 :
90 : /* Lock the primary bucket page for the target bucket. */
2252 rhaas 91 GIC 345510 : buf = _hash_getbucketbuf_from_hashkey(rel, hashkey, HASH_WRITE,
92 : &usedmetap);
93 345510 : Assert(usedmetap != NULL);
9345 bruce 94 ECB :
1167 tmunro 95 GIC 345510 : CheckForSerializableConflictIn(rel, NULL, BufferGetBlockNumber(buf));
1828 teodor 96 ECB :
97 : /* remember the primary bucket buffer to release the pin on it at end. */
2321 rhaas 98 CBC 345504 : bucket_buf = buf;
99 :
2545 kgrittn 100 GIC 345504 : page = BufferGetPage(buf);
373 michael 101 CBC 345504 : pageopaque = HashPageGetOpaque(page);
2252 rhaas 102 GIC 345504 : bucket = pageopaque->hasho_bucket;
9345 bruce 103 ECB :
2321 rhaas 104 : /*
105 : * If this bucket is in the process of being split, try to finish the
106 : * split before inserting, because that might create room for the
107 : * insertion to proceed without allocating an additional overflow page.
108 : * It's only interesting to finish the split if we're trying to insert
109 : * into the bucket from which we're removing tuples (the "old" bucket),
110 : * not if we're trying to insert into the bucket into which tuples are
111 : * being moved (the "new" bucket).
112 : */
2321 rhaas 113 GIC 345504 : if (H_BUCKET_BEING_SPLIT(pageopaque) && IsBufferCleanupOK(buf))
114 : {
115 : /* release the lock on bucket buffer, before completing the split. */
2298 rhaas 116 LBC 0 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
117 :
2252 rhaas 118 UIC 0 : _hash_finish_split(rel, metabuf, buf, bucket,
2252 rhaas 119 UBC 0 : usedmetap->hashm_maxbucket,
2252 rhaas 120 UIC 0 : usedmetap->hashm_highmask,
2252 rhaas 121 UBC 0 : usedmetap->hashm_lowmask);
2321 rhaas 122 EUB :
123 : /* release the pin on old and meta buffer. retry for insert. */
2321 rhaas 124 UBC 0 : _hash_dropbuf(rel, buf);
2321 rhaas 125 UIC 0 : _hash_dropbuf(rel, metabuf);
126 0 : goto restart_insert;
2321 rhaas 127 EUB : }
128 :
7157 tgl 129 : /* Do the insertion */
9345 bruce 130 GIC 574273 : while (PageGetFreeSpace(page) < itemsz)
131 : {
132 : BlockNumber nextblkno;
2216 rhaas 133 ECB :
134 : /*
135 : * Check if current page has any DEAD tuples. If yes, delete these
136 : * tuples and see if we can get a space for the new item to be
137 : * inserted before moving to the next page in the bucket chain.
138 : */
2216 rhaas 139 GIC 228769 : if (H_HAS_DEAD_TUPLES(pageopaque))
140 : {
141 :
2216 rhaas 142 LBC 0 : if (IsBufferCleanupOK(buf))
143 : {
1475 andres 144 UIC 0 : _hash_vacuum_one_page(rel, heapRel, metabuf, buf);
2216 rhaas 145 EUB :
2216 rhaas 146 UIC 0 : if (PageGetFreeSpace(page) >= itemsz)
2153 bruce 147 UBC 0 : break; /* OK, now we have enough space */
148 : }
2216 rhaas 149 EUB : }
150 :
151 : /*
152 : * no space on this page; check for an overflow page
153 : */
2216 rhaas 154 GIC 228769 : nextblkno = pageopaque->hasho_nextblkno;
155 :
7157 tgl 156 228769 : if (BlockNumberIsValid(nextblkno))
9345 bruce 157 ECB : {
158 : /*
6385 159 : * ovfl page exists; go get it. if it doesn't have room, we'll
160 : * find out next pass through the loop test above. we always
161 : * release both the lock and pin if this is an overflow page, but
162 : * only the lock if this is the primary bucket page, since the pin
163 : * on the primary bucket must be retained throughout the scan.
164 : */
2321 rhaas 165 GIC 228647 : if (buf != bucket_buf)
166 195402 : _hash_relbuf(rel, buf);
167 : else
2298 rhaas 168 CBC 33245 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
5820 tgl 169 228647 : buf = _hash_getbuf(rel, nextblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
2545 kgrittn 170 GIC 228647 : page = BufferGetPage(buf);
9345 bruce 171 ECB : }
172 : else
173 : {
174 : /*
175 : * we're at the end of the bucket chain and we haven't found a
176 : * page with enough room. allocate a new overflow page.
177 : */
178 :
179 : /* release our write lock without modifying buffer */
2298 rhaas 180 GIC 122 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
181 :
182 : /* chain to a new overflow page */
578 michael 183 CBC 122 : buf = _hash_addovflpage(rel, metabuf, buf, (buf == bucket_buf));
2545 kgrittn 184 GIC 122 : page = BufferGetPage(buf);
185 :
7157 tgl 186 ECB : /* should fit now, given test above */
7157 tgl 187 CBC 122 : Assert(PageGetFreeSpace(page) >= itemsz);
188 : }
373 michael 189 GIC 228769 : pageopaque = HashPageGetOpaque(page);
2216 rhaas 190 CBC 228769 : Assert((pageopaque->hasho_flag & LH_PAGE_TYPE) == LH_OVERFLOW_PAGE);
9345 bruce 191 GIC 228769 : Assert(pageopaque->hasho_bucket == bucket);
9345 bruce 192 ECB : }
193 :
7157 tgl 194 : /*
195 : * Write-lock the metapage so we can increment the tuple count. After
196 : * incrementing it, check to see if it's time for a split.
197 : */
2298 rhaas 198 GIC 345504 : LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
199 :
200 : /* Do the update. No ereport(ERROR) until changes are logged */
2217 rhaas 201 CBC 345504 : START_CRIT_SECTION();
202 :
203 : /* found page with enough space, so add the item here */
136 drowley 204 GNC 345504 : itup_off = _hash_pgaddtup(rel, buf, itemsz, itup, sorted);
2217 rhaas 205 GIC 345504 : MarkBufferDirty(buf);
206 :
2217 rhaas 207 ECB : /* metapage operations */
2252 rhaas 208 CBC 345504 : metap = HashPageGetMeta(metapage);
7157 tgl 209 GIC 345504 : metap->hashm_ntuples += 1;
210 :
7157 tgl 211 ECB : /* Make sure this stays in sync with _hash_expandtable() */
7157 tgl 212 CBC 345504 : do_expand = metap->hashm_ntuples >
7157 tgl 213 GIC 345504 : (double) metap->hashm_ffactor * (metap->hashm_maxbucket + 1);
214 :
2298 rhaas 215 CBC 345504 : MarkBufferDirty(metabuf);
2217 rhaas 216 ECB :
217 : /* XLOG stuff */
2217 rhaas 218 CBC 345504 : if (RelationNeedsWAL(rel))
219 : {
220 : xl_hash_insert xlrec;
2217 rhaas 221 ECB : XLogRecPtr recptr;
222 :
2217 rhaas 223 GIC 263334 : xlrec.offnum = itup_off;
224 :
225 263334 : XLogBeginInsert();
2217 rhaas 226 CBC 263334 : XLogRegisterData((char *) &xlrec, SizeOfHashInsert);
227 :
228 263334 : XLogRegisterBuffer(1, metabuf, REGBUF_STANDARD);
2217 rhaas 229 ECB :
2217 rhaas 230 GIC 263334 : XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
1866 tgl 231 CBC 263334 : XLogRegisterBufData(0, (char *) itup, IndexTupleSize(itup));
232 :
2217 rhaas 233 263334 : recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_INSERT);
2217 rhaas 234 ECB :
2217 rhaas 235 GIC 263334 : PageSetLSN(BufferGetPage(buf), recptr);
2217 rhaas 236 CBC 263334 : PageSetLSN(BufferGetPage(metabuf), recptr);
237 : }
2217 rhaas 238 ECB :
2217 rhaas 239 CBC 345504 : END_CRIT_SECTION();
240 :
241 : /* drop lock on metapage, but keep pin */
2298 242 345504 : LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
243 :
244 : /*
2217 rhaas 245 ECB : * Release the modified page and ensure to release the pin on primary
246 : * page.
247 : */
2217 rhaas 248 GIC 345504 : _hash_relbuf(rel, buf);
249 345504 : if (buf != bucket_buf)
250 33289 : _hash_dropbuf(rel, bucket_buf);
2217 rhaas 251 ECB :
7157 tgl 252 : /* Attempt to split if a split is needed */
7157 tgl 253 CBC 345504 : if (do_expand)
9345 bruce 254 GIC 666 : _hash_expandtable(rel, metabuf);
255 :
7157 tgl 256 ECB : /* Finally drop our pin on the metapage */
7157 tgl 257 CBC 345504 : _hash_dropbuf(rel, metabuf);
9345 bruce 258 GIC 345504 : }
259 :
9770 scrappy 260 ECB : /*
9345 bruce 261 : * _hash_pgaddtup() -- add a tuple to a particular page in the index.
262 : *
263 : * This routine adds the tuple to the page as requested; it does not write out
264 : * the page. It is an error to call this function without pin and write lock
265 : * on the target buffer.
266 : *
267 : * Returns the offset number at which the tuple was inserted. This function
268 : * is responsible for preserving the condition that tuples in a hash index
269 : * page are sorted by hashkey value, however, if the caller is certain that
270 : * the hashkey for the tuple being added is >= the hashkeys of all existing
271 : * tuples on the page, then the 'appendtup' flag may be passed as true. This
272 : * saves from having to binary search for the correct location to insert the
273 : * tuple.
274 : */
275 : OffsetNumber
136 drowley 276 GNC 345504 : _hash_pgaddtup(Relation rel, Buffer buf, Size itemsize, IndexTuple itup,
277 : bool appendtup)
278 : {
279 : OffsetNumber itup_off;
280 : Page page;
281 :
6363 tgl 282 GIC 345504 : _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
2545 kgrittn 283 CBC 345504 : page = BufferGetPage(buf);
284 :
285 : /*
286 : * Find where to insert the tuple (preserving page's hashkey ordering). If
287 : * 'appendtup' is true then we just insert it at the end.
288 : */
136 drowley 289 GNC 345504 : if (appendtup)
290 : {
291 60500 : itup_off = PageGetMaxOffsetNumber(page) + 1;
292 :
293 : #ifdef USE_ASSERT_CHECKING
294 : /* ensure this tuple's hashkey is >= the final existing tuple */
135 295 60500 : if (PageGetMaxOffsetNumber(page) > 0)
296 : {
297 : IndexTuple lasttup;
298 : ItemId itemid;
299 :
300 59142 : itemid = PageGetItemId(page, PageGetMaxOffsetNumber(page));
301 59142 : lasttup = (IndexTuple) PageGetItem(page, itemid);
302 :
303 59142 : Assert(_hash_get_indextuple_hashkey(lasttup) <=
304 : _hash_get_indextuple_hashkey(itup));
305 : }
306 : #endif
307 : }
308 : else
309 : {
136 310 285004 : uint32 hashkey = _hash_get_indextuple_hashkey(itup);
311 :
312 285004 : itup_off = _hash_binsearch(page, hashkey);
313 : }
314 :
5680 tgl 315 CBC 345504 : if (PageAddItem(page, (Item) itup, itemsize, itup_off, false, false)
8068 tgl 316 ECB : == InvalidOffsetNumber)
7202 tgl 317 UIC 0 : elog(ERROR, "failed to add index item to \"%s\"",
318 : RelationGetRelationName(rel));
319 :
8986 bruce 320 GIC 345504 : return itup_off;
321 : }
2232 rhaas 322 ECB :
323 : /*
324 : * _hash_pgaddmultitup() -- add a tuple vector to a particular page in the
325 : * index.
326 : *
327 : * This routine has same requirements for locking and tuple ordering as
328 : * _hash_pgaddtup().
329 : *
330 : * Returns the offset number array at which the tuples were inserted.
331 : */
332 : void
2232 rhaas 333 CBC 734 : _hash_pgaddmultitup(Relation rel, Buffer buf, IndexTuple *itups,
2232 rhaas 334 ECB : OffsetNumber *itup_offsets, uint16 nitups)
335 : {
336 : OffsetNumber itup_off;
337 : Page page;
338 : uint32 hashkey;
339 : int i;
340 :
2232 rhaas 341 GIC 734 : _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
342 734 : page = BufferGetPage(buf);
2232 rhaas 343 ECB :
2232 rhaas 344 GIC 64293 : for (i = 0; i < nitups; i++)
2232 rhaas 345 ECB : {
346 : Size itemsize;
347 :
1866 tgl 348 CBC 63559 : itemsize = IndexTupleSize(itups[i]);
2232 rhaas 349 GIC 63559 : itemsize = MAXALIGN(itemsize);
2232 rhaas 350 EUB :
351 : /* Find where to insert the tuple (preserving page's hashkey ordering) */
2232 rhaas 352 GIC 63559 : hashkey = _hash_get_indextuple_hashkey(itups[i]);
2232 rhaas 353 CBC 63559 : itup_off = _hash_binsearch(page, hashkey);
354 :
2232 rhaas 355 GIC 63559 : itup_offsets[i] = itup_off;
356 :
357 63559 : if (PageAddItem(page, (Item) itups[i], itemsize, itup_off, false, false)
358 : == InvalidOffsetNumber)
2232 rhaas 359 UIC 0 : elog(ERROR, "failed to add index item to \"%s\"",
360 : RelationGetRelationName(rel));
361 : }
2232 rhaas 362 GIC 734 : }
363 :
364 : /*
365 : * _hash_vacuum_one_page - vacuum just one index page.
2216 rhaas 366 ECB : *
367 : * Try to remove LP_DEAD items from the given page. We must acquire cleanup
368 : * lock on the page being modified before calling this function.
369 : */
370 :
371 : static void
1475 andres 372 UIC 0 : _hash_vacuum_one_page(Relation rel, Relation hrel, Buffer metabuf, Buffer buf)
373 : {
2153 bruce 374 ECB : OffsetNumber deletable[MaxOffsetNumber];
2153 bruce 375 LBC 0 : int ndeletable = 0;
376 : OffsetNumber offnum,
2153 bruce 377 ECB : maxoff;
2153 bruce 378 UIC 0 : Page page = BufferGetPage(buf);
379 : HashPageOpaque pageopaque;
380 : HashMetaPage metap;
2216 rhaas 381 ECB :
382 : /* Scan each tuple in page to see if it is marked as LP_DEAD */
2216 rhaas 383 UIC 0 : maxoff = PageGetMaxOffsetNumber(page);
384 0 : for (offnum = FirstOffsetNumber;
2216 rhaas 385 LBC 0 : offnum <= maxoff;
386 0 : offnum = OffsetNumberNext(offnum))
387 : {
2153 bruce 388 0 : ItemId itemId = PageGetItemId(page, offnum);
389 :
2216 rhaas 390 0 : if (ItemIdIsDead(itemId))
2216 rhaas 391 UIC 0 : deletable[ndeletable++] = offnum;
2216 rhaas 392 EUB : }
393 :
2216 rhaas 394 UIC 0 : if (ndeletable > 0)
2216 rhaas 395 ECB : {
396 : TransactionId snapshotConflictHorizon;
397 :
398 : snapshotConflictHorizon =
1475 andres 399 UIC 0 : index_compute_xid_horizon_for_tuples(rel, hrel, buf,
400 : deletable, ndeletable);
401 :
402 : /*
403 : * Write-lock the meta page so that we can decrement tuple count.
404 : */
2216 rhaas 405 UBC 0 : LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
406 :
407 : /* No ereport(ERROR) until changes are logged */
408 0 : START_CRIT_SECTION();
409 :
2216 rhaas 410 UIC 0 : PageIndexMultiDelete(page, deletable, ndeletable);
2216 rhaas 411 EUB :
412 : /*
413 : * Mark the page as not containing any LP_DEAD items. This is not
414 : * certainly true (there might be some that have recently been marked,
415 : * but weren't included in our target-item list), but it will almost
2153 bruce 416 : * always be true and it doesn't seem worth an additional page scan to
417 : * check it. Remember that LH_PAGE_HAS_DEAD_TUPLES is only a hint
2211 rhaas 418 : * anyway.
419 : */
373 michael 420 UIC 0 : pageopaque = HashPageGetOpaque(page);
2216 rhaas 421 UBC 0 : pageopaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
422 :
423 0 : metap = HashPageGetMeta(BufferGetPage(metabuf));
2204 424 0 : metap->hashm_ntuples -= ndeletable;
425 :
2216 rhaas 426 UIC 0 : MarkBufferDirty(buf);
2216 rhaas 427 UBC 0 : MarkBufferDirty(metabuf);
428 :
429 : /* XLOG stuff */
2216 rhaas 430 UIC 0 : if (RelationNeedsWAL(rel))
431 : {
2153 bruce 432 EUB : xl_hash_vacuum_one_page xlrec;
433 : XLogRecPtr recptr;
434 :
7 andres 435 UNC 0 : xlrec.isCatalogRel = RelationIsAccessibleInLogicalDecoding(hrel);
143 pg 436 0 : xlrec.snapshotConflictHorizon = snapshotConflictHorizon;
2204 rhaas 437 UIC 0 : xlrec.ntuples = ndeletable;
438 :
2216 rhaas 439 UBC 0 : XLogBeginInsert();
2204 rhaas 440 UIC 0 : XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
2216 441 0 : XLogRegisterData((char *) &xlrec, SizeOfHashVacuumOnePage);
2216 rhaas 442 EUB :
443 : /*
2153 bruce 444 : * We need the target-offsets array whether or not we store the
445 : * whole buffer, to allow us to find the snapshotConflictHorizon
446 : * on a standby server.
447 : */
2204 rhaas 448 UIC 0 : XLogRegisterData((char *) deletable,
449 : ndeletable * sizeof(OffsetNumber));
450 :
2216 451 0 : XLogRegisterBuffer(1, metabuf, REGBUF_STANDARD);
452 :
453 0 : recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_VACUUM_ONE_PAGE);
2216 rhaas 454 EUB :
2216 rhaas 455 UBC 0 : PageSetLSN(BufferGetPage(buf), recptr);
2216 rhaas 456 UIC 0 : PageSetLSN(BufferGetPage(metabuf), recptr);
2216 rhaas 457 EUB : }
458 :
2216 rhaas 459 UIC 0 : END_CRIT_SECTION();
2153 bruce 460 EUB :
2216 rhaas 461 : /*
462 : * Releasing write lock on meta page as we have updated the tuple
463 : * count.
464 : */
2216 rhaas 465 UIC 0 : LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
466 : }
467 0 : }
|