Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * ginbulk.c
4 : * routines for fast build of inverted index
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/gin/ginbulk.c
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : #include "postgres.h"
16 :
17 : #include <limits.h>
18 :
19 : #include "access/gin_private.h"
20 : #include "utils/datum.h"
21 : #include "utils/memutils.h"
22 :
23 :
24 : #define DEF_NENTRY 2048 /* GinEntryAccumulator allocation quantum */
25 : #define DEF_NPTR 5 /* ItemPointer initial allocation quantum */
26 :
27 :
28 : /* Combiner function for rbtree.c */
29 : static void
1615 tgl 30 CBC 1000258 : ginCombineData(RBTNode *existing, const RBTNode *newdata, void *arg)
31 : {
4475 32 1000258 : GinEntryAccumulator *eo = (GinEntryAccumulator *) existing;
33 1000258 : const GinEntryAccumulator *en = (const GinEntryAccumulator *) newdata;
4790 bruce 34 1000258 : BuildAccumulator *accum = (BuildAccumulator *) arg;
35 :
36 : /*
37 : * Note this code assumes that newdata contains only one itempointer.
38 : */
4475 tgl 39 1000258 : if (eo->count >= eo->maxcount)
40 : {
2776 teodor 41 38538 : if (eo->maxcount > INT_MAX)
2776 teodor 42 UBC 0 : ereport(ERROR,
43 : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
44 : errmsg("posting list is too long"),
45 : errhint("Reduce maintenance_work_mem.")));
46 :
4805 teodor 47 CBC 38538 : accum->allocatedMemory -= GetMemoryChunkSpace(eo->list);
4475 tgl 48 38538 : eo->maxcount *= 2;
49 38538 : eo->list = (ItemPointerData *)
2776 teodor 50 38538 : repalloc_huge(eo->list, sizeof(ItemPointerData) * eo->maxcount);
4805 51 38538 : accum->allocatedMemory += GetMemoryChunkSpace(eo->list);
52 : }
53 :
54 : /* If item pointers are not ordered, they will need to be sorted later */
2062 peter_e 55 1000258 : if (eo->shouldSort == false)
56 : {
57 : int res;
58 :
4475 tgl 59 1000258 : res = ginCompareItemPointers(eo->list + eo->count - 1, en->list);
6031 bruce 60 1000258 : Assert(res != 0);
61 :
62 1000258 : if (res > 0)
2062 peter_e 63 UBC 0 : eo->shouldSort = true;
64 : }
65 :
4475 tgl 66 CBC 1000258 : eo->list[eo->count] = en->list[0];
67 1000258 : eo->count++;
4805 teodor 68 1000258 : }
69 :
70 : /* Comparator function for rbtree.c */
71 : static int
1615 tgl 72 12712326 : cmpEntryAccumulator(const RBTNode *a, const RBTNode *b, void *arg)
73 : {
4475 74 12712326 : const GinEntryAccumulator *ea = (const GinEntryAccumulator *) a;
75 12712326 : const GinEntryAccumulator *eb = (const GinEntryAccumulator *) b;
4790 bruce 76 12712326 : BuildAccumulator *accum = (BuildAccumulator *) arg;
77 :
4475 tgl 78 25424652 : return ginCompareAttEntries(accum->ginstate,
79 12712326 : ea->attnum, ea->key, ea->category,
80 12712326 : eb->attnum, eb->key, eb->category);
81 : }
82 :
83 : /* Allocator function for rbtree.c */
84 : static RBTNode *
4634 85 257606 : ginAllocEntryAccumulator(void *arg)
86 : {
87 257606 : BuildAccumulator *accum = (BuildAccumulator *) arg;
88 : GinEntryAccumulator *ea;
89 :
90 : /*
91 : * Allocate memory by rather big chunks to decrease overhead. We have no
92 : * need to reclaim RBTNodes individually, so this costs nothing.
93 : */
4475 94 257606 : if (accum->entryallocator == NULL || accum->eas_used >= DEF_NENTRY)
95 : {
96 245 : accum->entryallocator = palloc(sizeof(GinEntryAccumulator) * DEF_NENTRY);
4634 97 245 : accum->allocatedMemory += GetMemoryChunkSpace(accum->entryallocator);
4475 98 245 : accum->eas_used = 0;
99 : }
100 :
101 : /* Allocate new RBTNode from current chunk */
102 257606 : ea = accum->entryallocator + accum->eas_used;
103 257606 : accum->eas_used++;
104 :
1615 105 257606 : return (RBTNode *) ea;
106 : }
107 :
108 : void
4805 teodor 109 167 : ginInitBA(BuildAccumulator *accum)
110 : {
111 : /* accum->ginstate is intentionally not set here */
112 167 : accum->allocatedMemory = 0;
113 167 : accum->entryallocator = NULL;
4475 tgl 114 167 : accum->eas_used = 0;
1615 115 167 : accum->tree = rbt_create(sizeof(GinEntryAccumulator),
116 : cmpEntryAccumulator,
117 : ginCombineData,
118 : ginAllocEntryAccumulator,
119 : NULL, /* no freefunc needed */
120 : (void *) accum);
6186 teodor 121 167 : }
122 :
123 : /*
124 : * This is basically the same as datumCopy(), but extended to count
125 : * palloc'd space in accum->allocatedMemory.
126 : */
127 : static Datum
5385 tgl 128 257533 : getDatumCopy(BuildAccumulator *accum, OffsetNumber attnum, Datum value)
129 : {
130 : Form_pg_attribute att;
131 : Datum res;
132 :
2058 andres 133 257533 : att = TupleDescAttr(accum->ginstate->origTupdesc, attnum - 1);
5385 tgl 134 257533 : if (att->attbyval)
6111 135 248997 : res = value;
136 : else
137 : {
5385 138 8536 : res = datumCopy(value, false, att->attlen);
5397 139 8536 : accum->allocatedMemory += GetMemoryChunkSpace(DatumGetPointer(res));
140 : }
6111 141 257533 : return res;
142 : }
143 :
144 : /*
145 : * Find/store one entry from indexed value.
146 : */
147 : static void
4475 148 1257864 : ginInsertBAEntry(BuildAccumulator *accum,
149 : ItemPointer heapptr, OffsetNumber attnum,
150 : Datum key, GinNullCategory category)
151 : {
152 : GinEntryAccumulator eatmp;
153 : GinEntryAccumulator *ea;
154 : bool isNew;
155 :
156 : /*
157 : * For the moment, fill only the fields of eatmp that will be looked at by
158 : * cmpEntryAccumulator or ginCombineData.
159 : */
160 1257864 : eatmp.attnum = attnum;
161 1257864 : eatmp.key = key;
162 1257864 : eatmp.category = category;
163 : /* temporarily set up single-entry itempointer list */
164 1257864 : eatmp.list = heapptr;
165 :
1615 166 1257864 : ea = (GinEntryAccumulator *) rbt_insert(accum->tree, (RBTNode *) &eatmp,
167 : &isNew);
168 :
4634 169 1257864 : if (isNew)
170 : {
171 : /*
172 : * Finish initializing new tree entry, including making permanent
173 : * copies of the datum (if it's not null) and itempointer.
174 : */
4475 175 257606 : if (category == GIN_CAT_NORM_KEY)
176 257533 : ea->key = getDatumCopy(accum, attnum, key);
177 257606 : ea->maxcount = DEF_NPTR;
178 257606 : ea->count = 1;
2062 peter_e 179 257606 : ea->shouldSort = false;
4634 tgl 180 257606 : ea->list =
181 257606 : (ItemPointerData *) palloc(sizeof(ItemPointerData) * DEF_NPTR);
182 257606 : ea->list[0] = *heapptr;
183 257606 : accum->allocatedMemory += GetMemoryChunkSpace(ea->list);
184 : }
185 : else
186 : {
187 : /*
188 : * ginCombineData did everything needed.
189 : */
190 : }
6116 teodor 191 1257864 : }
192 :
193 : /*
194 : * Insert the entries for one heap pointer.
195 : *
196 : * Since the entries are being inserted into a balanced binary tree, you
197 : * might think that the order of insertion wouldn't be critical, but it turns
198 : * out that inserting the entries in sorted order results in a lot of
199 : * rebalancing operations and is slow. To prevent this, we attempt to insert
200 : * the nodes in an order that will produce a nearly-balanced tree if the input
201 : * is in fact sorted.
202 : *
203 : * We do this as follows. First, we imagine that we have an array whose size
204 : * is the smallest power of two greater than or equal to the actual array
205 : * size. Second, we insert the middle entry of our virtual array into the
206 : * tree; then, we insert the middles of each half of our virtual array, then
207 : * middles of quarters, etc.
208 : */
209 : void
4475 tgl 210 626505 : ginInsertBAEntries(BuildAccumulator *accum,
211 : ItemPointer heapptr, OffsetNumber attnum,
212 : Datum *entries, GinNullCategory *categories,
213 : int32 nentries)
214 : {
215 626505 : uint32 step = nentries;
216 :
217 626505 : if (nentries <= 0)
6116 teodor 218 UBC 0 : return;
219 :
5129 tgl 220 CBC 626505 : Assert(ItemPointerIsValid(heapptr) && attnum >= FirstOffsetNumber);
221 :
222 : /*
223 : * step will contain largest power of 2 and <= nentries
224 : */
4790 bruce 225 626505 : step |= (step >> 1);
226 626505 : step |= (step >> 2);
227 626505 : step |= (step >> 4);
228 626505 : step |= (step >> 8);
4805 teodor 229 626505 : step |= (step >> 16);
230 626505 : step >>= 1;
4790 bruce 231 626505 : step++;
232 :
233 1522476 : while (step > 0)
234 : {
235 : int i;
236 :
4475 tgl 237 2153835 : for (i = step - 1; i < nentries && i >= 0; i += step << 1 /* *2 */ )
238 1257864 : ginInsertBAEntry(accum, heapptr, attnum,
239 1257864 : entries[i], categories[i]);
240 :
4790 bruce 241 895971 : step >>= 1; /* /2 */
242 : }
243 : }
244 :
245 : static int
6031 bruce 246 UBC 0 : qsortCompareItemPointers(const void *a, const void *b)
247 : {
4557 tgl 248 0 : int res = ginCompareItemPointers((ItemPointer) a, (ItemPointer) b);
249 :
250 : /* Assert that there are no equal item pointers being sorted */
6031 bruce 251 0 : Assert(res != 0);
6186 teodor 252 0 : return res;
253 : }
254 :
255 : /* Prepare to read out the rbtree contents using ginGetBAEntry */
256 : void
4634 tgl 257 CBC 167 : ginBeginBAScan(BuildAccumulator *accum)
258 : {
1615 259 167 : rbt_begin_iterate(accum->tree, LeftRightWalk, &accum->tree_walk);
4634 260 167 : }
261 :
262 : /*
263 : * Get the next entry in sequence from the BuildAccumulator's rbtree.
264 : * This consists of a single key datum and a list (array) of one or more
265 : * heap TIDs in which that key is found. The list is guaranteed sorted.
266 : */
267 : ItemPointerData *
4475 268 257773 : ginGetBAEntry(BuildAccumulator *accum,
269 : OffsetNumber *attnum, Datum *key, GinNullCategory *category,
270 : uint32 *n)
271 : {
272 : GinEntryAccumulator *entry;
273 : ItemPointerData *list;
274 :
1615 275 257773 : entry = (GinEntryAccumulator *) rbt_iterate(&accum->tree_walk);
276 :
6031 bruce 277 257773 : if (entry == NULL)
4475 tgl 278 167 : return NULL; /* no more entries */
279 :
5385 280 257606 : *attnum = entry->attnum;
4475 281 257606 : *key = entry->key;
282 257606 : *category = entry->category;
6031 bruce 283 257606 : list = entry->list;
4475 tgl 284 257606 : *n = entry->count;
285 :
286 257606 : Assert(list != NULL && entry->count > 0);
287 :
288 257606 : if (entry->shouldSort && entry->count > 1)
4475 tgl 289 UBC 0 : qsort(list, entry->count, sizeof(ItemPointerData),
290 : qsortCompareItemPointers);
291 :
6186 teodor 292 CBC 257606 : return list;
293 : }
|