Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * gininsert.c
4 : * insert routines for the postgres inverted index access method.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/gin/gininsert.c
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : #include "postgres.h"
16 :
17 : #include "access/gin_private.h"
18 : #include "access/ginxlog.h"
19 : #include "access/tableam.h"
20 : #include "access/xloginsert.h"
21 : #include "catalog/index.h"
22 : #include "miscadmin.h"
23 : #include "storage/bufmgr.h"
24 : #include "storage/indexfsm.h"
25 : #include "storage/predicate.h"
26 : #include "storage/smgr.h"
27 : #include "utils/memutils.h"
28 : #include "utils/rel.h"
29 :
30 : typedef struct
31 : {
32 : GinState ginstate;
33 : double indtuples;
34 : GinStatsData buildStats;
35 : MemoryContext tmpCtx;
36 : MemoryContext funcCtx;
37 : BuildAccumulator accum;
38 : } GinBuildState;
39 :
40 :
41 : /*
42 : * Adds array of item pointers to tuple's posting list, or
43 : * creates posting tree and tuple pointing to tree in case
44 : * of not enough space. Max size of tuple is defined in
45 : * GinFormTuple(). Returns a new, modified index tuple.
46 : * items[] must be in sorted order with no duplicates.
47 : */
48 : static IndexTuple
4475 tgl 49 CBC 16326 : addItemPointersToLeafTuple(GinState *ginstate,
50 : IndexTuple old,
51 : ItemPointerData *items, uint32 nitem,
52 : GinStatsData *buildStats, Buffer buffer)
53 : {
54 : OffsetNumber attnum;
55 : Datum key;
56 : GinNullCategory category;
57 : IndexTuple res;
58 : ItemPointerData *newItems,
59 : *oldItems;
60 : int oldNPosting,
61 : newNPosting;
62 : GinPostingList *compressedList;
63 :
64 16326 : Assert(!GinIsPostingTree(old));
65 :
66 16326 : attnum = gintuple_get_attrnum(ginstate, old);
67 16326 : key = gintuple_get_key(ginstate, old, &category);
68 :
69 : /* merge the old and new posting lists */
3364 heikki.linnakangas 70 16326 : oldItems = ginReadTuple(ginstate, attnum, old, &oldNPosting);
71 :
3303 72 16326 : newItems = ginMergeItemPointers(items, nitem,
73 : oldItems, oldNPosting,
74 : &newNPosting);
75 :
76 : /* Compress the posting list, and try to a build tuple with room for it */
3364 77 16326 : res = NULL;
78 16326 : compressedList = ginCompressPostingList(newItems, newNPosting, GinMaxItemSize,
79 : NULL);
80 16326 : pfree(newItems);
81 16326 : if (compressedList)
82 : {
83 16326 : res = GinFormTuple(ginstate, attnum, key, category,
84 : (char *) compressedList,
85 16326 : SizeOfGinPostingList(compressedList),
86 : newNPosting,
87 : false);
88 16326 : pfree(compressedList);
89 : }
90 16326 : if (!res)
91 : {
92 : /* posting list would be too big, convert to posting tree */
93 : BlockNumber postingRoot;
94 :
95 : /*
96 : * Initialize posting tree with the old tuple's posting list. It's
97 : * surely small enough to fit on one posting-tree page, and should
98 : * already be in order with no duplicates.
99 : */
4475 tgl 100 8 : postingRoot = createPostingTree(ginstate->index,
101 : oldItems,
102 : oldNPosting,
103 : buildStats,
104 : buffer);
105 :
106 : /* Now insert the TIDs-to-be-added into the posting tree */
3427 heikki.linnakangas 107 8 : ginInsertItemPointers(ginstate->index, postingRoot,
108 : items, nitem,
109 : buildStats);
110 :
111 : /* And build a new posting-tree-only result tuple */
3364 112 8 : res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, 0, true);
4475 tgl 113 8 : GinSetPostingTree(res, postingRoot);
114 : }
3364 heikki.linnakangas 115 16326 : pfree(oldItems);
116 :
4475 tgl 117 16326 : return res;
118 : }
119 :
120 : /*
121 : * Build a fresh leaf tuple, either posting-list or posting-tree format
122 : * depending on whether the given items list will fit.
123 : * items[] must be in sorted order with no duplicates.
124 : *
125 : * This is basically the same logic as in addItemPointersToLeafTuple,
126 : * but working from slightly different input.
127 : */
128 : static IndexTuple
129 254592 : buildFreshLeafTuple(GinState *ginstate,
130 : OffsetNumber attnum, Datum key, GinNullCategory category,
131 : ItemPointerData *items, uint32 nitem,
132 : GinStatsData *buildStats, Buffer buffer)
133 : {
3364 heikki.linnakangas 134 254592 : IndexTuple res = NULL;
135 : GinPostingList *compressedList;
136 :
137 : /* try to build a posting list tuple with all the items */
138 254592 : compressedList = ginCompressPostingList(items, nitem, GinMaxItemSize, NULL);
139 254592 : if (compressedList)
140 : {
141 254592 : res = GinFormTuple(ginstate, attnum, key, category,
142 : (char *) compressedList,
143 254592 : SizeOfGinPostingList(compressedList),
144 : nitem, false);
145 254592 : pfree(compressedList);
146 : }
4475 tgl 147 254592 : if (!res)
148 : {
149 : /* posting list would be too big, build posting tree */
150 : BlockNumber postingRoot;
151 :
152 : /*
153 : * Build posting-tree-only result tuple. We do this first so as to
154 : * fail quickly if the key is too big.
155 : */
3364 heikki.linnakangas 156 50 : res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, 0, true);
157 :
158 : /*
159 : * Initialize a new posting tree with the TIDs.
160 : */
3441 161 50 : postingRoot = createPostingTree(ginstate->index, items, nitem,
162 : buildStats, buffer);
163 :
164 : /* And save the root link in the result tuple */
4475 tgl 165 50 : GinSetPostingTree(res, postingRoot);
166 : }
167 :
6186 teodor 168 254592 : return res;
169 : }
170 :
171 : /*
172 : * Insert one or more heap TIDs associated with the given key value.
173 : * This will either add a single key entry, or enlarge a pre-existing entry.
174 : *
175 : * During an index build, buildStats is non-null and the counters
176 : * it contains should be incremented as needed.
177 : */
178 : void
4475 tgl 179 295620 : ginEntryInsert(GinState *ginstate,
180 : OffsetNumber attnum, Datum key, GinNullCategory category,
181 : ItemPointerData *items, uint32 nitem,
182 : GinStatsData *buildStats)
183 : {
184 : GinBtreeData btree;
185 : GinBtreeEntryInsertData insertdata;
186 : GinBtreeStack *stack;
187 : IndexTuple itup;
188 : Page page;
189 :
2062 peter_e 190 295620 : insertdata.isDelete = false;
191 :
4475 tgl 192 295620 : ginPrepareEntryScan(&btree, attnum, key, category, ginstate);
1467 heikki.linnakangas 193 295620 : btree.isBuild = (buildStats != NULL);
194 :
1564 akorotkov 195 295620 : stack = ginFindLeafPage(&btree, false, false, NULL);
2545 kgrittn 196 295620 : page = BufferGetPage(stack->buffer);
197 :
6031 bruce 198 295620 : if (btree.findItem(&btree, stack))
199 : {
200 : /* found pre-existing entry */
6186 teodor 201 41026 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
202 :
6031 bruce 203 41026 : if (GinIsPostingTree(itup))
204 : {
205 : /* add entries to existing posting tree */
206 24696 : BlockNumber rootPostingTree = GinGetPostingTree(itup);
207 :
208 : /* release all stack */
6186 teodor 209 24696 : LockBuffer(stack->buffer, GIN_UNLOCK);
6031 bruce 210 24696 : freeGinBtreeStack(stack);
211 :
212 : /* insert into posting tree */
3427 heikki.linnakangas 213 24696 : ginInsertItemPointers(ginstate->index, rootPostingTree,
214 : items, nitem,
215 : buildStats);
6186 teodor 216 24694 : return;
217 : }
218 :
1167 tmunro 219 16330 : CheckForSerializableConflictIn(ginstate->index, NULL,
220 : BufferGetBlockNumber(stack->buffer));
221 : /* modify an existing leaf entry */
4475 tgl 222 16326 : itup = addItemPointersToLeafTuple(ginstate, itup,
223 : items, nitem, buildStats, stack->buffer);
224 :
2062 peter_e 225 16326 : insertdata.isDelete = true;
226 : }
227 : else
228 : {
1167 tmunro 229 254594 : CheckForSerializableConflictIn(ginstate->index, NULL,
230 : BufferGetBlockNumber(stack->buffer));
231 : /* no match, so construct a new leaf entry */
4475 tgl 232 254592 : itup = buildFreshLeafTuple(ginstate, attnum, key, category,
233 : items, nitem, buildStats, stack->buffer);
234 :
235 : /*
236 : * nEntries counts leaf tuples, so increment it only when we make a
237 : * new one.
238 : */
1252 239 254592 : if (buildStats)
240 74570 : buildStats->nEntries++;
241 : }
242 :
243 : /* Insert the new or modified leaf tuple */
3420 heikki.linnakangas 244 270918 : insertdata.entry = itup;
245 270918 : ginInsertValue(&btree, stack, &insertdata, buildStats);
6031 bruce 246 270918 : pfree(itup);
247 : }
248 :
249 : /*
250 : * Extract index entries for a single indexable item, and add them to the
251 : * BuildAccumulator's state.
252 : *
253 : * This function is used only during initial index creation.
254 : */
255 : static void
4475 tgl 256 434612 : ginHeapTupleBulkInsert(GinBuildState *buildstate, OffsetNumber attnum,
257 : Datum value, bool isNull,
258 : ItemPointer heapptr)
259 : {
260 : Datum *entries;
261 : GinNullCategory *categories;
262 : int32 nentries;
263 : MemoryContext oldCtx;
264 :
6116 teodor 265 434612 : oldCtx = MemoryContextSwitchTo(buildstate->funcCtx);
4475 tgl 266 434612 : entries = ginExtractEntries(buildstate->accum.ginstate, attnum,
267 : value, isNull,
268 : &nentries, &categories);
6116 teodor 269 434612 : MemoryContextSwitchTo(oldCtx);
270 :
4475 tgl 271 434612 : ginInsertBAEntries(&buildstate->accum, heapptr, attnum,
272 : entries, categories, nentries);
273 :
274 434612 : buildstate->indtuples += nentries;
275 :
6116 teodor 276 434612 : MemoryContextReset(buildstate->funcCtx);
6186 277 434612 : }
278 :
279 : static void
1248 andres 280 434303 : ginBuildCallback(Relation index, ItemPointer tid, Datum *values,
281 : bool *isnull, bool tupleIsAlive, void *state)
282 : {
6031 bruce 283 434303 : GinBuildState *buildstate = (GinBuildState *) state;
284 : MemoryContext oldCtx;
285 : int i;
286 :
6186 teodor 287 434303 : oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
288 :
5050 bruce 289 868915 : for (i = 0; i < buildstate->ginstate.origTupdesc->natts; i++)
4475 tgl 290 434612 : ginHeapTupleBulkInsert(buildstate, (OffsetNumber) (i + 1),
1248 andres 291 434612 : values[i], isnull[i], tid);
292 :
293 : /* If we've maxed out our available memory, dump everything to the index */
2495 rhaas 294 434303 : if (buildstate->accum.allocatedMemory >= (Size) maintenance_work_mem * 1024L)
295 : {
296 : ItemPointerData *list;
297 : Datum key;
298 : GinNullCategory category;
299 : uint32 nlist;
300 : OffsetNumber attnum;
301 :
4634 tgl 302 UBC 0 : ginBeginBAScan(&buildstate->accum);
4475 303 0 : while ((list = ginGetBAEntry(&buildstate->accum,
2118 304 0 : &attnum, &key, &category, &nlist)) != NULL)
305 : {
306 : /* there could be many entries, so be willing to abort here */
5441 307 0 : CHECK_FOR_INTERRUPTS();
4475 308 0 : ginEntryInsert(&buildstate->ginstate, attnum, key, category,
309 : list, nlist, &buildstate->buildStats);
310 : }
311 :
6186 teodor 312 0 : MemoryContextReset(buildstate->tmpCtx);
313 0 : ginInitBA(&buildstate->accum);
314 : }
315 :
6186 teodor 316 CBC 434303 : MemoryContextSwitchTo(oldCtx);
317 434303 : }
318 :
319 : IndexBuildResult *
2639 tgl 320 152 : ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
321 : {
322 : IndexBuildResult *result;
323 : double reltuples;
324 : GinBuildState buildstate;
325 : Buffer RootBuffer,
326 : MetaBuffer;
327 : ItemPointerData *list;
328 : Datum key;
329 : GinNullCategory category;
330 : uint32 nlist;
331 : MemoryContext oldCtx;
332 : OffsetNumber attnum;
333 :
6186 teodor 334 152 : if (RelationGetNumberOfBlocks(index) != 0)
6186 teodor 335 UBC 0 : elog(ERROR, "index \"%s\" already contains data",
336 : RelationGetRelationName(index));
337 :
6186 teodor 338 CBC 152 : initGinState(&buildstate.ginstate, index);
4557 tgl 339 152 : buildstate.indtuples = 0;
340 152 : memset(&buildstate.buildStats, 0, sizeof(GinStatsData));
341 :
342 : /* initialize the meta page */
5129 343 152 : MetaBuffer = GinNewBuffer(index);
344 :
345 : /* initialize the root page */
346 152 : RootBuffer = GinNewBuffer(index);
347 :
6186 teodor 348 152 : START_CRIT_SECTION();
5129 tgl 349 152 : GinInitMetabuffer(MetaBuffer);
350 152 : MarkBufferDirty(MetaBuffer);
351 152 : GinInitBuffer(RootBuffer, GIN_LEAF);
352 152 : MarkBufferDirty(RootBuffer);
353 :
354 :
355 152 : UnlockReleaseBuffer(MetaBuffer);
356 152 : UnlockReleaseBuffer(RootBuffer);
6186 teodor 357 152 : END_CRIT_SECTION();
358 :
359 : /* count the root as first entry page */
4557 tgl 360 152 : buildstate.buildStats.nEntryPages++;
361 :
362 : /*
363 : * create a temporary memory context that is used to hold data not yet
364 : * dumped out to the index
365 : */
6186 teodor 366 152 : buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext,
367 : "Gin build temporary context",
368 : ALLOCSET_DEFAULT_SIZES);
369 :
370 : /*
371 : * create a temporary memory context that is used for calling
372 : * ginExtractEntries(), and can be reset after each tuple
373 : */
2933 tgl 374 152 : buildstate.funcCtx = AllocSetContextCreate(CurrentMemoryContext,
375 : "Gin build temporary context for user-defined function",
376 : ALLOCSET_DEFAULT_SIZES);
377 :
6186 teodor 378 152 : buildstate.accum.ginstate = &buildstate.ginstate;
6031 bruce 379 152 : ginInitBA(&buildstate.accum);
380 :
381 : /*
382 : * Do the heap scan. We disallow sync scan here because dataPlaceToPage
383 : * prefers to receive tuples in TID order.
384 : */
1468 alvherre 385 152 : reltuples = table_index_build_scan(heap, index, indexInfo, false, true,
386 : ginBuildCallback, (void *) &buildstate,
387 : NULL);
388 :
389 : /* dump remaining entries to the index */
6186 teodor 390 152 : oldCtx = MemoryContextSwitchTo(buildstate.tmpCtx);
4634 tgl 391 152 : ginBeginBAScan(&buildstate.accum);
4475 392 74722 : while ((list = ginGetBAEntry(&buildstate.accum,
393 74722 : &attnum, &key, &category, &nlist)) != NULL)
394 : {
395 : /* there could be many entries, so be willing to abort here */
5441 396 74570 : CHECK_FOR_INTERRUPTS();
4475 397 74570 : ginEntryInsert(&buildstate.ginstate, attnum, key, category,
398 : list, nlist, &buildstate.buildStats);
399 : }
6186 teodor 400 152 : MemoryContextSwitchTo(oldCtx);
401 :
2933 tgl 402 152 : MemoryContextDelete(buildstate.funcCtx);
6186 teodor 403 152 : MemoryContextDelete(buildstate.tmpCtx);
404 :
405 : /*
406 : * Update metapage stats
407 : */
4557 tgl 408 152 : buildstate.buildStats.nTotalPages = RelationGetNumberOfBlocks(index);
1467 heikki.linnakangas 409 152 : ginUpdateStats(index, &buildstate.buildStats, true);
410 :
411 : /*
412 : * We didn't write WAL records as we built the index, so if WAL-logging is
413 : * required, write all pages to the WAL now.
414 : */
415 152 : if (RelationNeedsWAL(index))
416 : {
417 91 : log_newpage_range(index, MAIN_FORKNUM,
418 : 0, RelationGetNumberOfBlocks(index),
419 : true);
420 : }
421 :
422 : /*
423 : * Return statistics
424 : */
6178 tgl 425 152 : result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
426 :
427 152 : result->heap_tuples = reltuples;
428 152 : result->index_tuples = buildstate.indtuples;
429 :
2639 430 152 : return result;
431 : }
432 :
433 : /*
434 : * ginbuildempty() -- build an empty gin index in the initialization fork
435 : */
436 : void
437 3 : ginbuildempty(Relation index)
438 : {
439 : Buffer RootBuffer,
440 : MetaBuffer;
441 :
442 : /* An empty GIN index has two pages. */
4 andres 443 GNC 3 : MetaBuffer = ExtendBufferedRel(EB_REL(index), INIT_FORKNUM, NULL,
444 : EB_LOCK_FIRST | EB_SKIP_EXTENSION_LOCK);
445 3 : RootBuffer = ExtendBufferedRel(EB_REL(index), INIT_FORKNUM, NULL,
446 : EB_LOCK_FIRST | EB_SKIP_EXTENSION_LOCK);
4484 rhaas 447 ECB :
3951 448 : /* Initialize and xlog metabuffer and root buffer. */
4484 rhaas 449 CBC 3 : START_CRIT_SECTION();
450 3 : GinInitMetabuffer(MetaBuffer);
451 3 : MarkBufferDirty(MetaBuffer);
1984 tgl 452 3 : log_newpage_buffer(MetaBuffer, true);
4484 rhaas 453 3 : GinInitBuffer(RootBuffer, GIN_LEAF);
454 3 : MarkBufferDirty(RootBuffer);
3413 heikki.linnakangas 455 GIC 3 : log_newpage_buffer(RootBuffer, false);
4484 rhaas 456 3 : END_CRIT_SECTION();
4484 rhaas 457 ECB :
458 : /* Unlock and release the buffers. */
4484 rhaas 459 CBC 3 : UnlockReleaseBuffer(MetaBuffer);
4484 rhaas 460 GIC 3 : UnlockReleaseBuffer(RootBuffer);
461 3 : }
462 :
463 : /*
464 : * Insert index entries for a single indexable item during "normal"
465 : * (non-fast-update) insertion
6186 teodor 466 ECB : */
467 : static void
4475 tgl 468 GIC 16027 : ginHeapTupleInsert(GinState *ginstate, OffsetNumber attnum,
469 : Datum value, bool isNull,
470 : ItemPointer item)
471 : {
472 : Datum *entries;
473 : GinNullCategory *categories;
474 : int32 i,
6031 bruce 475 ECB : nentries;
476 :
4475 tgl 477 GIC 16027 : entries = ginExtractEntries(ginstate, attnum, value, isNull,
4475 tgl 478 ECB : &nentries, &categories);
6186 teodor 479 :
6031 bruce 480 GIC 54033 : for (i = 0; i < nentries; i++)
4475 tgl 481 CBC 38014 : ginEntryInsert(ginstate, attnum, entries[i], categories[i],
482 : item, 1, NULL);
6186 teodor 483 GIC 16019 : }
6186 teodor 484 ECB :
485 : bool
2639 tgl 486 GIC 147922 : gininsert(Relation index, Datum *values, bool *isnull,
487 : ItemPointer ht_ctid, Relation heapRel,
488 : IndexUniqueCheck checkUnique,
489 : bool indexUnchanged,
2250 tgl 490 ECB : IndexInfo *indexInfo)
491 : {
2250 tgl 492 GIC 147922 : GinState *ginstate = (GinState *) indexInfo->ii_AmCache;
493 : MemoryContext oldCtx;
494 : MemoryContext insertCtx;
495 : int i;
6186 teodor 496 ECB :
497 : /* Initialize GinState cache if first call in this statement */
2250 tgl 498 CBC 147922 : if (ginstate == NULL)
2250 tgl 499 ECB : {
2250 tgl 500 CBC 81 : oldCtx = MemoryContextSwitchTo(indexInfo->ii_Context);
501 81 : ginstate = (GinState *) palloc(sizeof(GinState));
502 81 : initGinState(ginstate, index);
2250 tgl 503 GIC 81 : indexInfo->ii_AmCache = (void *) ginstate;
504 81 : MemoryContextSwitchTo(oldCtx);
2250 tgl 505 ECB : }
506 :
6186 teodor 507 GIC 147922 : insertCtx = AllocSetContextCreate(CurrentMemoryContext,
508 : "Gin insert temporary context",
2416 tgl 509 ECB : ALLOCSET_DEFAULT_SIZES);
510 :
6186 teodor 511 CBC 147922 : oldCtx = MemoryContextSwitchTo(insertCtx);
6186 teodor 512 ECB :
5050 bruce 513 GIC 147922 : if (GinGetUseFastUpdate(index))
5129 tgl 514 131892 : {
5050 bruce 515 ECB : GinTupleCollector collector;
516 :
5129 tgl 517 CBC 131895 : memset(&collector, 0, sizeof(GinTupleCollector));
4475 tgl 518 ECB :
2250 tgl 519 CBC 323829 : for (i = 0; i < ginstate->origTupdesc->natts; i++)
520 191934 : ginHeapTupleFastCollect(ginstate, &collector,
4475 tgl 521 GIC 191934 : (OffsetNumber) (i + 1),
522 191934 : values[i], isnull[i],
4475 tgl 523 ECB : ht_ctid);
524 :
2250 tgl 525 GIC 131895 : ginHeapTupleFastInsert(ginstate, &collector);
526 : }
5129 tgl 527 ECB : else
528 : {
2250 tgl 529 CBC 32046 : for (i = 0; i < ginstate->origTupdesc->natts; i++)
2250 tgl 530 GIC 16027 : ginHeapTupleInsert(ginstate, (OffsetNumber) (i + 1),
4475 531 16027 : values[i], isnull[i],
532 : ht_ctid);
5129 tgl 533 ECB : }
6186 teodor 534 :
6186 teodor 535 GIC 147911 : MemoryContextSwitchTo(oldCtx);
6186 teodor 536 CBC 147911 : MemoryContextDelete(insertCtx);
537 :
2639 tgl 538 GIC 147911 : return false;
539 : }
|