Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * tuplesortvariants.c
4 : * Implementation of tuple sorting variants.
5 : *
6 : * This module handles the sorting of heap tuples, index tuples, or single
7 : * Datums. The implementation is based on the generalized tuple sorting
8 : * facility given in tuplesort.c. Support other kinds of sortable objects
9 : * could be easily added here, another module, or even an extension.
10 : *
11 : *
12 : * Copyright (c) 2022-2023, PostgreSQL Global Development Group
13 : *
14 : * IDENTIFICATION
15 : * src/backend/utils/sort/tuplesortvariants.c
16 : *
17 : *-------------------------------------------------------------------------
18 : */
19 :
20 : #include "postgres.h"
21 :
22 : #include "access/hash.h"
23 : #include "access/htup_details.h"
24 : #include "access/nbtree.h"
25 : #include "catalog/index.h"
26 : #include "executor/executor.h"
27 : #include "pg_trace.h"
28 : #include "utils/datum.h"
29 : #include "utils/lsyscache.h"
30 : #include "utils/guc.h"
31 : #include "utils/tuplesort.h"
32 :
33 :
34 : /* sort-type codes for sort__start probes */
35 : #define HEAP_SORT 0
36 : #define INDEX_SORT 1
37 : #define DATUM_SORT 2
38 : #define CLUSTER_SORT 3
39 :
40 : static void removeabbrev_heap(Tuplesortstate *state, SortTuple *stups,
41 : int count);
42 : static void removeabbrev_cluster(Tuplesortstate *state, SortTuple *stups,
43 : int count);
44 : static void removeabbrev_index(Tuplesortstate *state, SortTuple *stups,
45 : int count);
46 : static void removeabbrev_datum(Tuplesortstate *state, SortTuple *stups,
47 : int count);
48 : static int comparetup_heap(const SortTuple *a, const SortTuple *b,
49 : Tuplesortstate *state);
50 : static void writetup_heap(Tuplesortstate *state, LogicalTape *tape,
51 : SortTuple *stup);
52 : static void readtup_heap(Tuplesortstate *state, SortTuple *stup,
53 : LogicalTape *tape, unsigned int len);
54 : static int comparetup_cluster(const SortTuple *a, const SortTuple *b,
55 : Tuplesortstate *state);
56 : static void writetup_cluster(Tuplesortstate *state, LogicalTape *tape,
57 : SortTuple *stup);
58 : static void readtup_cluster(Tuplesortstate *state, SortTuple *stup,
59 : LogicalTape *tape, unsigned int tuplen);
60 : static int comparetup_index_btree(const SortTuple *a, const SortTuple *b,
61 : Tuplesortstate *state);
62 : static int comparetup_index_hash(const SortTuple *a, const SortTuple *b,
63 : Tuplesortstate *state);
64 : static void writetup_index(Tuplesortstate *state, LogicalTape *tape,
65 : SortTuple *stup);
66 : static void readtup_index(Tuplesortstate *state, SortTuple *stup,
67 : LogicalTape *tape, unsigned int len);
68 : static int comparetup_datum(const SortTuple *a, const SortTuple *b,
69 : Tuplesortstate *state);
70 : static void writetup_datum(Tuplesortstate *state, LogicalTape *tape,
71 : SortTuple *stup);
72 : static void readtup_datum(Tuplesortstate *state, SortTuple *stup,
73 : LogicalTape *tape, unsigned int len);
74 : static void freestate_cluster(Tuplesortstate *state);
75 :
76 : /*
77 : * Data struture pointed by "TuplesortPublic.arg" for the CLUSTER case. Set by
78 : * the tuplesort_begin_cluster.
79 : */
80 : typedef struct
81 : {
82 : TupleDesc tupDesc;
83 :
84 : IndexInfo *indexInfo; /* info about index being used for reference */
85 : EState *estate; /* for evaluating index expressions */
86 : } TuplesortClusterArg;
87 :
88 : /*
89 : * Data struture pointed by "TuplesortPublic.arg" for the IndexTuple case.
90 : * Set by tuplesort_begin_index_xxx and used only by the IndexTuple routines.
91 : */
92 : typedef struct
93 : {
94 : Relation heapRel; /* table the index is being built on */
95 : Relation indexRel; /* index being built */
96 : } TuplesortIndexArg;
97 :
98 : /*
99 : * Data struture pointed by "TuplesortPublic.arg" for the index_btree subcase.
100 : */
101 : typedef struct
102 : {
103 : TuplesortIndexArg index;
104 :
105 : bool enforceUnique; /* complain if we find duplicate tuples */
106 : bool uniqueNullsNotDistinct; /* unique constraint null treatment */
107 : } TuplesortIndexBTreeArg;
108 :
109 : /*
110 : * Data struture pointed by "TuplesortPublic.arg" for the index_hash subcase.
111 : */
112 : typedef struct
113 : {
114 : TuplesortIndexArg index;
115 :
116 : uint32 high_mask; /* masks for sortable part of hash code */
117 : uint32 low_mask;
118 : uint32 max_buckets;
119 : } TuplesortIndexHashArg;
120 :
121 : /*
122 : * Data struture pointed by "TuplesortPublic.arg" for the Datum case.
123 : * Set by tuplesort_begin_datum and used only by the DatumTuple routines.
124 : */
125 : typedef struct
126 : {
127 : /* the datatype oid of Datum's to be sorted */
128 : Oid datumType;
129 : /* we need typelen in order to know how to copy the Datums. */
130 : int datumTypeLen;
131 : } TuplesortDatumArg;
132 :
133 : Tuplesortstate *
256 akorotkov 134 GNC 39242 : tuplesort_begin_heap(TupleDesc tupDesc,
135 : int nkeys, AttrNumber *attNums,
136 : Oid *sortOperators, Oid *sortCollations,
137 : bool *nullsFirstFlags,
138 : int workMem, SortCoordinate coordinate, int sortopt)
139 : {
140 39242 : Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
141 : sortopt);
142 39242 : TuplesortPublic *base = TuplesortstateGetPublic(state);
143 : MemoryContext oldcontext;
144 : int i;
145 :
146 39242 : oldcontext = MemoryContextSwitchTo(base->maincontext);
147 :
163 peter 148 39242 : Assert(nkeys > 0);
149 :
150 : #ifdef TRACE_SORT
256 akorotkov 151 39242 : if (trace_sort)
256 akorotkov 152 UNC 0 : elog(LOG,
153 : "begin tuple sort: nkeys = %d, workMem = %d, randomAccess = %c",
154 : nkeys, workMem, sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f');
155 : #endif
156 :
256 akorotkov 157 GNC 39242 : base->nKeys = nkeys;
158 :
159 : TRACE_POSTGRESQL_SORT_START(HEAP_SORT,
160 : false, /* no unique check */
161 : nkeys,
162 : workMem,
163 : sortopt & TUPLESORT_RANDOMACCESS,
164 : PARALLEL_SORT(coordinate));
165 :
166 39242 : base->removeabbrev = removeabbrev_heap;
167 39242 : base->comparetup = comparetup_heap;
168 39242 : base->writetup = writetup_heap;
169 39242 : base->readtup = readtup_heap;
170 39242 : base->haveDatum1 = true;
171 39242 : base->arg = tupDesc; /* assume we need not copy tupDesc */
172 :
173 : /* Prepare SortSupport data for each column */
174 39242 : base->sortKeys = (SortSupport) palloc0(nkeys * sizeof(SortSupportData));
175 :
176 89546 : for (i = 0; i < nkeys; i++)
177 : {
178 50313 : SortSupport sortKey = base->sortKeys + i;
179 :
163 peter 180 50313 : Assert(attNums[i] != 0);
181 50313 : Assert(sortOperators[i] != 0);
182 :
256 akorotkov 183 50313 : sortKey->ssup_cxt = CurrentMemoryContext;
184 50313 : sortKey->ssup_collation = sortCollations[i];
185 50313 : sortKey->ssup_nulls_first = nullsFirstFlags[i];
186 50313 : sortKey->ssup_attno = attNums[i];
187 : /* Convey if abbreviation optimization is applicable in principle */
188 50313 : sortKey->abbreviate = (i == 0 && base->haveDatum1);
189 :
190 50313 : PrepareSortSupportFromOrderingOp(sortOperators[i], sortKey);
191 : }
192 :
193 : /*
194 : * The "onlyKey" optimization cannot be used with abbreviated keys, since
195 : * tie-breaker comparisons may be required. Typically, the optimization
196 : * is only of value to pass-by-value types anyway, whereas abbreviated
197 : * keys are typically only of value to pass-by-reference types.
198 : */
199 39233 : if (nkeys == 1 && !base->sortKeys->abbrev_converter)
200 10718 : base->onlyKey = base->sortKeys;
201 :
202 39233 : MemoryContextSwitchTo(oldcontext);
203 :
204 39233 : return state;
205 : }
206 :
207 : Tuplesortstate *
208 51 : tuplesort_begin_cluster(TupleDesc tupDesc,
209 : Relation indexRel,
210 : Relation heaprel,
211 : int workMem,
212 : SortCoordinate coordinate, int sortopt)
213 : {
214 51 : Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
215 : sortopt);
216 51 : TuplesortPublic *base = TuplesortstateGetPublic(state);
217 : BTScanInsert indexScanKey;
218 : MemoryContext oldcontext;
219 : TuplesortClusterArg *arg;
220 : int i;
221 :
222 51 : Assert(indexRel->rd_rel->relam == BTREE_AM_OID);
223 :
224 51 : oldcontext = MemoryContextSwitchTo(base->maincontext);
225 51 : arg = (TuplesortClusterArg *) palloc0(sizeof(TuplesortClusterArg));
226 :
227 : #ifdef TRACE_SORT
228 51 : if (trace_sort)
256 akorotkov 229 UNC 0 : elog(LOG,
230 : "begin tuple sort: nkeys = %d, workMem = %d, randomAccess = %c",
231 : RelationGetNumberOfAttributes(indexRel),
232 : workMem, sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f');
233 : #endif
234 :
256 akorotkov 235 GNC 51 : base->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel);
236 :
237 : TRACE_POSTGRESQL_SORT_START(CLUSTER_SORT,
238 : false, /* no unique check */
239 : base->nKeys,
240 : workMem,
241 : sortopt & TUPLESORT_RANDOMACCESS,
242 : PARALLEL_SORT(coordinate));
243 :
244 51 : base->removeabbrev = removeabbrev_cluster;
245 51 : base->comparetup = comparetup_cluster;
246 51 : base->writetup = writetup_cluster;
247 51 : base->readtup = readtup_cluster;
248 51 : base->freestate = freestate_cluster;
249 51 : base->arg = arg;
250 :
251 51 : arg->indexInfo = BuildIndexInfo(indexRel);
252 :
253 : /*
254 : * If we don't have a simple leading attribute, we don't currently
255 : * initialize datum1, so disable optimizations that require it.
256 : */
257 51 : if (arg->indexInfo->ii_IndexAttrNumbers[0] == 0)
258 9 : base->haveDatum1 = false;
259 : else
260 42 : base->haveDatum1 = true;
261 :
262 51 : arg->tupDesc = tupDesc; /* assume we need not copy tupDesc */
263 :
8 andres 264 51 : indexScanKey = _bt_mkscankey(indexRel, heaprel, NULL);
265 :
256 akorotkov 266 51 : if (arg->indexInfo->ii_Expressions != NULL)
267 : {
268 : TupleTableSlot *slot;
269 : ExprContext *econtext;
270 :
271 : /*
272 : * We will need to use FormIndexDatum to evaluate the index
273 : * expressions. To do that, we need an EState, as well as a
274 : * TupleTableSlot to put the table tuples into. The econtext's
275 : * scantuple has to point to that slot, too.
276 : */
277 9 : arg->estate = CreateExecutorState();
278 9 : slot = MakeSingleTupleTableSlot(tupDesc, &TTSOpsHeapTuple);
279 9 : econtext = GetPerTupleExprContext(arg->estate);
280 9 : econtext->ecxt_scantuple = slot;
281 : }
282 :
283 : /* Prepare SortSupport data for each column */
284 51 : base->sortKeys = (SortSupport) palloc0(base->nKeys *
285 : sizeof(SortSupportData));
286 :
287 114 : for (i = 0; i < base->nKeys; i++)
288 : {
289 63 : SortSupport sortKey = base->sortKeys + i;
290 63 : ScanKey scanKey = indexScanKey->scankeys + i;
291 : int16 strategy;
292 :
293 63 : sortKey->ssup_cxt = CurrentMemoryContext;
294 63 : sortKey->ssup_collation = scanKey->sk_collation;
295 63 : sortKey->ssup_nulls_first =
296 63 : (scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0;
297 63 : sortKey->ssup_attno = scanKey->sk_attno;
298 : /* Convey if abbreviation optimization is applicable in principle */
299 63 : sortKey->abbreviate = (i == 0 && base->haveDatum1);
300 :
163 peter 301 63 : Assert(sortKey->ssup_attno != 0);
302 :
256 akorotkov 303 63 : strategy = (scanKey->sk_flags & SK_BT_DESC) != 0 ?
304 : BTGreaterStrategyNumber : BTLessStrategyNumber;
305 :
306 63 : PrepareSortSupportFromIndexRel(indexRel, strategy, sortKey);
307 : }
308 :
309 51 : pfree(indexScanKey);
310 :
311 51 : MemoryContextSwitchTo(oldcontext);
312 :
313 51 : return state;
314 : }
315 :
316 : Tuplesortstate *
317 121105 : tuplesort_begin_index_btree(Relation heapRel,
318 : Relation indexRel,
319 : bool enforceUnique,
320 : bool uniqueNullsNotDistinct,
321 : int workMem,
322 : SortCoordinate coordinate,
323 : int sortopt)
324 : {
325 121105 : Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
326 : sortopt);
327 121105 : TuplesortPublic *base = TuplesortstateGetPublic(state);
328 : BTScanInsert indexScanKey;
329 : TuplesortIndexBTreeArg *arg;
330 : MemoryContext oldcontext;
331 : int i;
332 :
333 121105 : oldcontext = MemoryContextSwitchTo(base->maincontext);
334 121105 : arg = (TuplesortIndexBTreeArg *) palloc(sizeof(TuplesortIndexBTreeArg));
335 :
336 : #ifdef TRACE_SORT
337 121105 : if (trace_sort)
256 akorotkov 338 UNC 0 : elog(LOG,
339 : "begin index sort: unique = %c, workMem = %d, randomAccess = %c",
340 : enforceUnique ? 't' : 'f',
341 : workMem, sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f');
342 : #endif
343 :
256 akorotkov 344 GNC 121105 : base->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel);
345 :
346 : TRACE_POSTGRESQL_SORT_START(INDEX_SORT,
347 : enforceUnique,
348 : base->nKeys,
349 : workMem,
350 : sortopt & TUPLESORT_RANDOMACCESS,
351 : PARALLEL_SORT(coordinate));
352 :
353 121105 : base->removeabbrev = removeabbrev_index;
354 121105 : base->comparetup = comparetup_index_btree;
355 121105 : base->writetup = writetup_index;
356 121105 : base->readtup = readtup_index;
357 121105 : base->haveDatum1 = true;
358 121105 : base->arg = arg;
359 :
360 121105 : arg->index.heapRel = heapRel;
361 121105 : arg->index.indexRel = indexRel;
362 121105 : arg->enforceUnique = enforceUnique;
363 121105 : arg->uniqueNullsNotDistinct = uniqueNullsNotDistinct;
364 :
8 andres 365 121105 : indexScanKey = _bt_mkscankey(indexRel, heapRel, NULL);
366 :
367 : /* Prepare SortSupport data for each column */
256 akorotkov 368 121105 : base->sortKeys = (SortSupport) palloc0(base->nKeys *
369 : sizeof(SortSupportData));
370 :
371 328250 : for (i = 0; i < base->nKeys; i++)
372 : {
373 207145 : SortSupport sortKey = base->sortKeys + i;
374 207145 : ScanKey scanKey = indexScanKey->scankeys + i;
375 : int16 strategy;
376 :
377 207145 : sortKey->ssup_cxt = CurrentMemoryContext;
378 207145 : sortKey->ssup_collation = scanKey->sk_collation;
379 207145 : sortKey->ssup_nulls_first =
380 207145 : (scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0;
381 207145 : sortKey->ssup_attno = scanKey->sk_attno;
382 : /* Convey if abbreviation optimization is applicable in principle */
383 207145 : sortKey->abbreviate = (i == 0 && base->haveDatum1);
384 :
163 peter 385 207145 : Assert(sortKey->ssup_attno != 0);
386 :
256 akorotkov 387 207145 : strategy = (scanKey->sk_flags & SK_BT_DESC) != 0 ?
388 : BTGreaterStrategyNumber : BTLessStrategyNumber;
389 :
390 207145 : PrepareSortSupportFromIndexRel(indexRel, strategy, sortKey);
391 : }
392 :
393 121105 : pfree(indexScanKey);
394 :
395 121105 : MemoryContextSwitchTo(oldcontext);
396 :
397 121105 : return state;
398 : }
399 :
400 : Tuplesortstate *
401 4 : tuplesort_begin_index_hash(Relation heapRel,
402 : Relation indexRel,
403 : uint32 high_mask,
404 : uint32 low_mask,
405 : uint32 max_buckets,
406 : int workMem,
407 : SortCoordinate coordinate,
408 : int sortopt)
409 : {
410 4 : Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
411 : sortopt);
412 4 : TuplesortPublic *base = TuplesortstateGetPublic(state);
413 : MemoryContext oldcontext;
414 : TuplesortIndexHashArg *arg;
415 :
416 4 : oldcontext = MemoryContextSwitchTo(base->maincontext);
417 4 : arg = (TuplesortIndexHashArg *) palloc(sizeof(TuplesortIndexHashArg));
418 :
419 : #ifdef TRACE_SORT
420 4 : if (trace_sort)
256 akorotkov 421 UNC 0 : elog(LOG,
422 : "begin index sort: high_mask = 0x%x, low_mask = 0x%x, "
423 : "max_buckets = 0x%x, workMem = %d, randomAccess = %c",
424 : high_mask,
425 : low_mask,
426 : max_buckets,
427 : workMem,
428 : sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f');
429 : #endif
430 :
256 akorotkov 431 GNC 4 : base->nKeys = 1; /* Only one sort column, the hash code */
432 :
433 4 : base->removeabbrev = removeabbrev_index;
434 4 : base->comparetup = comparetup_index_hash;
435 4 : base->writetup = writetup_index;
436 4 : base->readtup = readtup_index;
437 4 : base->haveDatum1 = true;
438 4 : base->arg = arg;
439 :
440 4 : arg->index.heapRel = heapRel;
441 4 : arg->index.indexRel = indexRel;
442 :
443 4 : arg->high_mask = high_mask;
444 4 : arg->low_mask = low_mask;
445 4 : arg->max_buckets = max_buckets;
446 :
447 4 : MemoryContextSwitchTo(oldcontext);
448 :
449 4 : return state;
450 : }
451 :
452 : Tuplesortstate *
453 69 : tuplesort_begin_index_gist(Relation heapRel,
454 : Relation indexRel,
455 : int workMem,
456 : SortCoordinate coordinate,
457 : int sortopt)
458 : {
459 69 : Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
460 : sortopt);
461 69 : TuplesortPublic *base = TuplesortstateGetPublic(state);
462 : MemoryContext oldcontext;
463 : TuplesortIndexBTreeArg *arg;
464 : int i;
465 :
466 69 : oldcontext = MemoryContextSwitchTo(base->maincontext);
467 69 : arg = (TuplesortIndexBTreeArg *) palloc(sizeof(TuplesortIndexBTreeArg));
468 :
469 : #ifdef TRACE_SORT
470 69 : if (trace_sort)
256 akorotkov 471 UNC 0 : elog(LOG,
472 : "begin index sort: workMem = %d, randomAccess = %c",
473 : workMem, sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f');
474 : #endif
475 :
256 akorotkov 476 GNC 69 : base->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel);
477 :
478 69 : base->removeabbrev = removeabbrev_index;
479 69 : base->comparetup = comparetup_index_btree;
480 69 : base->writetup = writetup_index;
481 69 : base->readtup = readtup_index;
482 69 : base->haveDatum1 = true;
483 69 : base->arg = arg;
484 :
485 69 : arg->index.heapRel = heapRel;
486 69 : arg->index.indexRel = indexRel;
487 69 : arg->enforceUnique = false;
488 69 : arg->uniqueNullsNotDistinct = false;
489 :
490 : /* Prepare SortSupport data for each column */
491 69 : base->sortKeys = (SortSupport) palloc0(base->nKeys *
492 : sizeof(SortSupportData));
493 :
494 138 : for (i = 0; i < base->nKeys; i++)
495 : {
496 69 : SortSupport sortKey = base->sortKeys + i;
497 :
498 69 : sortKey->ssup_cxt = CurrentMemoryContext;
499 69 : sortKey->ssup_collation = indexRel->rd_indcollation[i];
500 69 : sortKey->ssup_nulls_first = false;
501 69 : sortKey->ssup_attno = i + 1;
502 : /* Convey if abbreviation optimization is applicable in principle */
503 69 : sortKey->abbreviate = (i == 0 && base->haveDatum1);
504 :
163 peter 505 69 : Assert(sortKey->ssup_attno != 0);
506 :
507 : /* Look for a sort support function */
256 akorotkov 508 69 : PrepareSortSupportFromGistIndexRel(indexRel, sortKey);
509 : }
510 :
511 69 : MemoryContextSwitchTo(oldcontext);
512 :
513 69 : return state;
514 : }
515 :
516 : Tuplesortstate *
517 29346 : tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation,
518 : bool nullsFirstFlag, int workMem,
519 : SortCoordinate coordinate, int sortopt)
520 : {
521 29346 : Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
522 : sortopt);
523 29346 : TuplesortPublic *base = TuplesortstateGetPublic(state);
524 : TuplesortDatumArg *arg;
525 : MemoryContext oldcontext;
526 : int16 typlen;
527 : bool typbyval;
528 :
529 29346 : oldcontext = MemoryContextSwitchTo(base->maincontext);
530 29346 : arg = (TuplesortDatumArg *) palloc(sizeof(TuplesortDatumArg));
531 :
532 : #ifdef TRACE_SORT
533 29346 : if (trace_sort)
256 akorotkov 534 UNC 0 : elog(LOG,
535 : "begin datum sort: workMem = %d, randomAccess = %c",
536 : workMem, sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f');
537 : #endif
538 :
256 akorotkov 539 GNC 29346 : base->nKeys = 1; /* always a one-column sort */
540 :
541 : TRACE_POSTGRESQL_SORT_START(DATUM_SORT,
542 : false, /* no unique check */
543 : 1,
544 : workMem,
545 : sortopt & TUPLESORT_RANDOMACCESS,
546 : PARALLEL_SORT(coordinate));
547 :
548 29346 : base->removeabbrev = removeabbrev_datum;
549 29346 : base->comparetup = comparetup_datum;
550 29346 : base->writetup = writetup_datum;
551 29346 : base->readtup = readtup_datum;
552 29346 : base->haveDatum1 = true;
553 29346 : base->arg = arg;
554 :
555 29346 : arg->datumType = datumType;
556 :
557 : /* lookup necessary attributes of the datum type */
558 29346 : get_typlenbyval(datumType, &typlen, &typbyval);
559 29346 : arg->datumTypeLen = typlen;
560 29346 : base->tuples = !typbyval;
561 :
562 : /* Prepare SortSupport data */
563 29346 : base->sortKeys = (SortSupport) palloc0(sizeof(SortSupportData));
564 :
565 29346 : base->sortKeys->ssup_cxt = CurrentMemoryContext;
566 29346 : base->sortKeys->ssup_collation = sortCollation;
567 29346 : base->sortKeys->ssup_nulls_first = nullsFirstFlag;
568 :
569 : /*
570 : * Abbreviation is possible here only for by-reference types. In theory,
571 : * a pass-by-value datatype could have an abbreviated form that is cheaper
572 : * to compare. In a tuple sort, we could support that, because we can
573 : * always extract the original datum from the tuple as needed. Here, we
574 : * can't, because a datum sort only stores a single copy of the datum; the
575 : * "tuple" field of each SortTuple is NULL.
576 : */
577 29346 : base->sortKeys->abbreviate = !typbyval;
578 :
579 29346 : PrepareSortSupportFromOrderingOp(sortOperator, base->sortKeys);
580 :
581 : /*
582 : * The "onlyKey" optimization cannot be used with abbreviated keys, since
583 : * tie-breaker comparisons may be required. Typically, the optimization
584 : * is only of value to pass-by-value types anyway, whereas abbreviated
585 : * keys are typically only of value to pass-by-reference types.
586 : */
587 29346 : if (!base->sortKeys->abbrev_converter)
588 28786 : base->onlyKey = base->sortKeys;
589 :
590 29346 : MemoryContextSwitchTo(oldcontext);
591 :
592 29346 : return state;
593 : }
594 :
595 : /*
596 : * Accept one tuple while collecting input data for sort.
597 : *
598 : * Note that the input data is always copied; the caller need not save it.
599 : */
600 : void
601 5269913 : tuplesort_puttupleslot(Tuplesortstate *state, TupleTableSlot *slot)
602 : {
603 5269913 : TuplesortPublic *base = TuplesortstateGetPublic(state);
604 5269913 : MemoryContext oldcontext = MemoryContextSwitchTo(base->tuplecontext);
605 5269913 : TupleDesc tupDesc = (TupleDesc) base->arg;
606 : SortTuple stup;
607 : MinimalTuple tuple;
608 : HeapTupleData htup;
609 :
610 : /* copy the tuple into sort storage */
611 5269913 : tuple = ExecCopySlotMinimalTuple(slot);
612 5269913 : stup.tuple = (void *) tuple;
613 : /* set up first-column key value */
614 5269913 : htup.t_len = tuple->t_len + MINIMAL_TUPLE_OFFSET;
615 5269913 : htup.t_data = (HeapTupleHeader) ((char *) tuple - MINIMAL_TUPLE_OFFSET);
616 10539826 : stup.datum1 = heap_getattr(&htup,
617 5269913 : base->sortKeys[0].ssup_attno,
618 : tupDesc,
619 : &stup.isnull1);
620 :
621 5269913 : tuplesort_puttuple_common(state, &stup,
622 5941887 : base->sortKeys->abbrev_converter &&
623 671974 : !stup.isnull1);
624 :
625 5269913 : MemoryContextSwitchTo(oldcontext);
626 5269913 : }
627 :
628 : /*
629 : * Accept one tuple while collecting input data for sort.
630 : *
631 : * Note that the input data is always copied; the caller need not save it.
632 : */
633 : void
634 273643 : tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup)
635 : {
636 : SortTuple stup;
637 273643 : TuplesortPublic *base = TuplesortstateGetPublic(state);
638 273643 : MemoryContext oldcontext = MemoryContextSwitchTo(base->tuplecontext);
639 273643 : TuplesortClusterArg *arg = (TuplesortClusterArg *) base->arg;
640 :
641 : /* copy the tuple into sort storage */
642 273643 : tup = heap_copytuple(tup);
643 273643 : stup.tuple = (void *) tup;
644 :
645 : /*
646 : * set up first-column key value, and potentially abbreviate, if it's a
647 : * simple column
648 : */
649 273643 : if (base->haveDatum1)
650 : {
651 272836 : stup.datum1 = heap_getattr(tup,
652 272836 : arg->indexInfo->ii_IndexAttrNumbers[0],
653 : arg->tupDesc,
654 : &stup.isnull1);
655 : }
656 :
657 273643 : tuplesort_puttuple_common(state, &stup,
658 546479 : base->haveDatum1 &&
659 455251 : base->sortKeys->abbrev_converter &&
660 181608 : !stup.isnull1);
661 :
662 273643 : MemoryContextSwitchTo(oldcontext);
663 273643 : }
664 :
665 : /*
666 : * Collect one index tuple while collecting input data for sort, building
667 : * it from caller-supplied values.
668 : */
669 : void
670 11927280 : tuplesort_putindextuplevalues(Tuplesortstate *state, Relation rel,
671 : ItemPointer self, Datum *values,
672 : bool *isnull)
673 : {
674 : SortTuple stup;
675 : IndexTuple tuple;
676 11927280 : TuplesortPublic *base = TuplesortstateGetPublic(state);
677 11927280 : TuplesortIndexArg *arg = (TuplesortIndexArg *) base->arg;
678 :
679 11927280 : stup.tuple = index_form_tuple_context(RelationGetDescr(rel), values,
680 : isnull, base->tuplecontext);
681 11927280 : tuple = ((IndexTuple) stup.tuple);
682 11927280 : tuple->t_tid = *self;
683 : /* set up first-column key value */
684 23854560 : stup.datum1 = index_getattr(tuple,
685 : 1,
686 11927280 : RelationGetDescr(arg->indexRel),
687 : &stup.isnull1);
688 :
689 11927280 : tuplesort_puttuple_common(state, &stup,
690 23794060 : base->sortKeys &&
691 12995226 : base->sortKeys->abbrev_converter &&
692 1067946 : !stup.isnull1);
693 11927280 : }
694 :
695 : /*
696 : * Accept one Datum while collecting input data for sort.
697 : *
698 : * If the Datum is pass-by-ref type, the value will be copied.
699 : */
700 : void
701 1764829 : tuplesort_putdatum(Tuplesortstate *state, Datum val, bool isNull)
702 : {
703 1764829 : TuplesortPublic *base = TuplesortstateGetPublic(state);
704 1764829 : MemoryContext oldcontext = MemoryContextSwitchTo(base->tuplecontext);
705 1764829 : TuplesortDatumArg *arg = (TuplesortDatumArg *) base->arg;
706 : SortTuple stup;
707 :
708 : /*
709 : * Pass-by-value types or null values are just stored directly in
710 : * stup.datum1 (and stup.tuple is not used and set to NULL).
711 : *
712 : * Non-null pass-by-reference values need to be copied into memory we
713 : * control, and possibly abbreviated. The copied value is pointed to by
714 : * stup.tuple and is treated as the canonical copy (e.g. to return via
715 : * tuplesort_getdatum or when writing to tape); stup.datum1 gets the
716 : * abbreviated value if abbreviation is happening, otherwise it's
717 : * identical to stup.tuple.
718 : */
719 :
720 1764829 : if (isNull || !base->tuples)
721 : {
722 : /*
723 : * Set datum1 to zeroed representation for NULLs (to be consistent,
724 : * and to support cheap inequality tests for NULL abbreviated keys).
725 : */
726 908642 : stup.datum1 = !isNull ? val : (Datum) 0;
727 908642 : stup.isnull1 = isNull;
728 908642 : stup.tuple = NULL; /* no separate storage */
729 : }
730 : else
731 : {
732 856187 : stup.isnull1 = false;
733 856187 : stup.datum1 = datumCopy(val, false, arg->datumTypeLen);
734 856187 : stup.tuple = DatumGetPointer(stup.datum1);
735 : }
736 :
737 1764829 : tuplesort_puttuple_common(state, &stup,
738 2621339 : base->tuples &&
739 1764829 : base->sortKeys->abbrev_converter && !isNull);
740 :
741 1764829 : MemoryContextSwitchTo(oldcontext);
742 1764829 : }
743 :
744 : /*
745 : * Fetch the next tuple in either forward or back direction.
746 : * If successful, put tuple in slot and return true; else, clear the slot
747 : * and return false.
748 : *
749 : * Caller may optionally be passed back abbreviated value (on true return
750 : * value) when abbreviation was used, which can be used to cheaply avoid
751 : * equality checks that might otherwise be required. Caller can safely make a
752 : * determination of "non-equal tuple" based on simple binary inequality. A
753 : * NULL value in leading attribute will set abbreviated value to zeroed
754 : * representation, which caller may rely on in abbreviated inequality check.
755 : *
756 : * If copy is true, the slot receives a tuple that's been copied into the
757 : * caller's memory context, so that it will stay valid regardless of future
758 : * manipulations of the tuplesort's state (up to and including deleting the
759 : * tuplesort). If copy is false, the slot will just receive a pointer to a
760 : * tuple held within the tuplesort, which is more efficient, but only safe for
761 : * callers that are prepared to have any subsequent manipulation of the
762 : * tuplesort's state invalidate slot contents.
763 : */
764 : bool
765 4450575 : tuplesort_gettupleslot(Tuplesortstate *state, bool forward, bool copy,
766 : TupleTableSlot *slot, Datum *abbrev)
767 : {
768 4450575 : TuplesortPublic *base = TuplesortstateGetPublic(state);
769 4450575 : MemoryContext oldcontext = MemoryContextSwitchTo(base->sortcontext);
770 : SortTuple stup;
771 :
772 4450575 : if (!tuplesort_gettuple_common(state, forward, &stup))
773 39988 : stup.tuple = NULL;
774 :
775 4450575 : MemoryContextSwitchTo(oldcontext);
776 :
777 4450575 : if (stup.tuple)
778 : {
779 : /* Record abbreviated key for caller */
780 4410587 : if (base->sortKeys->abbrev_converter && abbrev)
256 akorotkov 781 UNC 0 : *abbrev = stup.datum1;
782 :
256 akorotkov 783 GNC 4410587 : if (copy)
784 2260 : stup.tuple = heap_copy_minimal_tuple((MinimalTuple) stup.tuple);
785 :
786 4410587 : ExecStoreMinimalTuple((MinimalTuple) stup.tuple, slot, copy);
787 4410587 : return true;
788 : }
789 : else
790 : {
791 39988 : ExecClearTuple(slot);
792 39988 : return false;
793 : }
794 : }
795 :
796 : /*
797 : * Fetch the next tuple in either forward or back direction.
798 : * Returns NULL if no more tuples. Returned tuple belongs to tuplesort memory
799 : * context, and must not be freed by caller. Caller may not rely on tuple
800 : * remaining valid after any further manipulation of tuplesort.
801 : */
802 : HeapTuple
803 273694 : tuplesort_getheaptuple(Tuplesortstate *state, bool forward)
804 : {
805 273694 : TuplesortPublic *base = TuplesortstateGetPublic(state);
806 273694 : MemoryContext oldcontext = MemoryContextSwitchTo(base->sortcontext);
807 : SortTuple stup;
808 :
809 273694 : if (!tuplesort_gettuple_common(state, forward, &stup))
810 51 : stup.tuple = NULL;
811 :
812 273694 : MemoryContextSwitchTo(oldcontext);
813 :
814 273694 : return stup.tuple;
815 : }
816 :
817 : /*
818 : * Fetch the next index tuple in either forward or back direction.
819 : * Returns NULL if no more tuples. Returned tuple belongs to tuplesort memory
820 : * context, and must not be freed by caller. Caller may not rely on tuple
821 : * remaining valid after any further manipulation of tuplesort.
822 : */
823 : IndexTuple
824 11991342 : tuplesort_getindextuple(Tuplesortstate *state, bool forward)
825 : {
826 11991342 : TuplesortPublic *base = TuplesortstateGetPublic(state);
827 11991342 : MemoryContext oldcontext = MemoryContextSwitchTo(base->sortcontext);
828 : SortTuple stup;
829 :
830 11991342 : if (!tuplesort_gettuple_common(state, forward, &stup))
831 64251 : stup.tuple = NULL;
832 :
833 11991342 : MemoryContextSwitchTo(oldcontext);
834 :
835 11991342 : return (IndexTuple) stup.tuple;
836 : }
837 :
838 : /*
839 : * Fetch the next Datum in either forward or back direction.
840 : * Returns false if no more datums.
841 : *
842 : * If the Datum is pass-by-ref type, the returned value is freshly palloc'd
843 : * in caller's context, and is now owned by the caller (this differs from
844 : * similar routines for other types of tuplesorts).
845 : *
846 : * Caller may optionally be passed back abbreviated value (on true return
847 : * value) when abbreviation was used, which can be used to cheaply avoid
848 : * equality checks that might otherwise be required. Caller can safely make a
849 : * determination of "non-equal tuple" based on simple binary inequality. A
850 : * NULL value will have a zeroed abbreviated value representation, which caller
851 : * may rely on in abbreviated inequality check.
852 : *
853 : * For byref Datums, if copy is true, *val is set to a copy of the Datum
854 : * copied into the caller's memory context, so that it will stay valid
855 : * regardless of future manipulations of the tuplesort's state (up to and
856 : * including deleting the tuplesort). If copy is false, *val will just be
857 : * set to a pointer to the Datum held within the tuplesort, which is more
858 : * efficient, but only safe for callers that are prepared to have any
859 : * subsequent manipulation of the tuplesort's state invalidate slot contents.
860 : * For byval Datums, the value of the 'copy' parameter has no effect.
861 :
862 : */
863 : bool
163 drowley 864 1076861 : tuplesort_getdatum(Tuplesortstate *state, bool forward, bool copy,
865 : Datum *val, bool *isNull, Datum *abbrev)
866 : {
256 akorotkov 867 1076861 : TuplesortPublic *base = TuplesortstateGetPublic(state);
868 1076861 : MemoryContext oldcontext = MemoryContextSwitchTo(base->sortcontext);
869 1076861 : TuplesortDatumArg *arg = (TuplesortDatumArg *) base->arg;
870 : SortTuple stup;
871 :
872 1076861 : if (!tuplesort_gettuple_common(state, forward, &stup))
873 : {
874 28776 : MemoryContextSwitchTo(oldcontext);
875 28776 : return false;
876 : }
877 :
878 : /* Ensure we copy into caller's memory context */
879 1048085 : MemoryContextSwitchTo(oldcontext);
880 :
881 : /* Record abbreviated key for caller */
882 1048085 : if (base->sortKeys->abbrev_converter && abbrev)
883 42512 : *abbrev = stup.datum1;
884 :
885 1048085 : if (stup.isnull1 || !base->tuples)
886 : {
887 491991 : *val = stup.datum1;
888 491991 : *isNull = stup.isnull1;
889 : }
890 : else
891 : {
892 : /* use stup.tuple because stup.datum1 may be an abbreviation */
163 drowley 893 556094 : if (copy)
894 30024 : *val = datumCopy(PointerGetDatum(stup.tuple), false,
895 : arg->datumTypeLen);
896 : else
897 526070 : *val = PointerGetDatum(stup.tuple);
256 akorotkov 898 556094 : *isNull = false;
899 : }
900 :
901 1048085 : return true;
902 : }
903 :
904 :
905 : /*
906 : * Routines specialized for HeapTuple (actually MinimalTuple) case
907 : */
908 :
909 : static void
910 17 : removeabbrev_heap(Tuplesortstate *state, SortTuple *stups, int count)
911 : {
912 : int i;
913 17 : TuplesortPublic *base = TuplesortstateGetPublic(state);
914 :
915 65457 : for (i = 0; i < count; i++)
916 : {
917 : HeapTupleData htup;
918 :
919 65440 : htup.t_len = ((MinimalTuple) stups[i].tuple)->t_len +
920 : MINIMAL_TUPLE_OFFSET;
921 65440 : htup.t_data = (HeapTupleHeader) ((char *) stups[i].tuple -
922 : MINIMAL_TUPLE_OFFSET);
923 65440 : stups[i].datum1 = heap_getattr(&htup,
924 65440 : base->sortKeys[0].ssup_attno,
925 65440 : (TupleDesc) base->arg,
926 65440 : &stups[i].isnull1);
927 : }
928 17 : }
929 :
930 : static int
931 18740772 : comparetup_heap(const SortTuple *a, const SortTuple *b, Tuplesortstate *state)
932 : {
933 18740772 : TuplesortPublic *base = TuplesortstateGetPublic(state);
934 18740772 : SortSupport sortKey = base->sortKeys;
935 : HeapTupleData ltup;
936 : HeapTupleData rtup;
937 : TupleDesc tupDesc;
938 : int nkey;
939 : int32 compare;
940 : AttrNumber attno;
941 : Datum datum1,
942 : datum2;
943 : bool isnull1,
944 : isnull2;
945 :
946 :
947 : /* Compare the leading sort key */
948 18740772 : compare = ApplySortComparator(a->datum1, a->isnull1,
949 18740772 : b->datum1, b->isnull1,
950 : sortKey);
951 18740772 : if (compare != 0)
952 7886704 : return compare;
953 :
954 : /* Compare additional sort keys */
955 10854068 : ltup.t_len = ((MinimalTuple) a->tuple)->t_len + MINIMAL_TUPLE_OFFSET;
956 10854068 : ltup.t_data = (HeapTupleHeader) ((char *) a->tuple - MINIMAL_TUPLE_OFFSET);
957 10854068 : rtup.t_len = ((MinimalTuple) b->tuple)->t_len + MINIMAL_TUPLE_OFFSET;
958 10854068 : rtup.t_data = (HeapTupleHeader) ((char *) b->tuple - MINIMAL_TUPLE_OFFSET);
959 10854068 : tupDesc = (TupleDesc) base->arg;
960 :
961 10854068 : if (sortKey->abbrev_converter)
962 : {
963 642940 : attno = sortKey->ssup_attno;
964 :
965 642940 : datum1 = heap_getattr(<up, attno, tupDesc, &isnull1);
966 642940 : datum2 = heap_getattr(&rtup, attno, tupDesc, &isnull2);
967 :
968 642940 : compare = ApplySortAbbrevFullComparator(datum1, isnull1,
969 : datum2, isnull2,
970 : sortKey);
971 642940 : if (compare != 0)
972 440327 : return compare;
973 : }
974 :
975 10413741 : sortKey++;
976 11126129 : for (nkey = 1; nkey < base->nKeys; nkey++, sortKey++)
977 : {
978 10361148 : attno = sortKey->ssup_attno;
979 :
980 10361148 : datum1 = heap_getattr(<up, attno, tupDesc, &isnull1);
981 10361148 : datum2 = heap_getattr(&rtup, attno, tupDesc, &isnull2);
982 :
983 10361148 : compare = ApplySortComparator(datum1, isnull1,
984 : datum2, isnull2,
985 : sortKey);
986 10361148 : if (compare != 0)
987 9648760 : return compare;
988 : }
989 :
990 764981 : return 0;
991 : }
992 :
993 : static void
994 532725 : writetup_heap(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup)
995 : {
996 532725 : TuplesortPublic *base = TuplesortstateGetPublic(state);
997 532725 : MinimalTuple tuple = (MinimalTuple) stup->tuple;
998 :
999 : /* the part of the MinimalTuple we'll write: */
1000 532725 : char *tupbody = (char *) tuple + MINIMAL_TUPLE_DATA_OFFSET;
1001 532725 : unsigned int tupbodylen = tuple->t_len - MINIMAL_TUPLE_DATA_OFFSET;
1002 :
1003 : /* total on-disk footprint: */
1004 532725 : unsigned int tuplen = tupbodylen + sizeof(int);
1005 :
100 peter 1006 532725 : LogicalTapeWrite(tape, &tuplen, sizeof(tuplen));
1007 532725 : LogicalTapeWrite(tape, tupbody, tupbodylen);
256 akorotkov 1008 532725 : if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
100 peter 1009 15000 : LogicalTapeWrite(tape, &tuplen, sizeof(tuplen));
256 akorotkov 1010 532725 : }
1011 :
1012 : static void
1013 481590 : readtup_heap(Tuplesortstate *state, SortTuple *stup,
1014 : LogicalTape *tape, unsigned int len)
1015 : {
1016 481590 : unsigned int tupbodylen = len - sizeof(int);
1017 481590 : unsigned int tuplen = tupbodylen + MINIMAL_TUPLE_DATA_OFFSET;
1018 481590 : MinimalTuple tuple = (MinimalTuple) tuplesort_readtup_alloc(state, tuplen);
1019 481590 : char *tupbody = (char *) tuple + MINIMAL_TUPLE_DATA_OFFSET;
1020 481590 : TuplesortPublic *base = TuplesortstateGetPublic(state);
1021 : HeapTupleData htup;
1022 :
1023 : /* read in the tuple proper */
1024 481590 : tuple->t_len = tuplen;
1025 481590 : LogicalTapeReadExact(tape, tupbody, tupbodylen);
1026 481590 : if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
1027 23892 : LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen));
1028 481590 : stup->tuple = (void *) tuple;
1029 : /* set up first-column key value */
1030 481590 : htup.t_len = tuple->t_len + MINIMAL_TUPLE_OFFSET;
1031 481590 : htup.t_data = (HeapTupleHeader) ((char *) tuple - MINIMAL_TUPLE_OFFSET);
1032 963180 : stup->datum1 = heap_getattr(&htup,
1033 481590 : base->sortKeys[0].ssup_attno,
1034 481590 : (TupleDesc) base->arg,
1035 : &stup->isnull1);
1036 481590 : }
1037 :
1038 : /*
1039 : * Routines specialized for the CLUSTER case (HeapTuple data, with
1040 : * comparisons per a btree index definition)
1041 : */
1042 :
1043 : static void
1044 6 : removeabbrev_cluster(Tuplesortstate *state, SortTuple *stups, int count)
1045 : {
1046 : int i;
1047 6 : TuplesortPublic *base = TuplesortstateGetPublic(state);
1048 6 : TuplesortClusterArg *arg = (TuplesortClusterArg *) base->arg;
1049 :
1050 61446 : for (i = 0; i < count; i++)
1051 : {
1052 : HeapTuple tup;
1053 :
1054 61440 : tup = (HeapTuple) stups[i].tuple;
1055 61440 : stups[i].datum1 = heap_getattr(tup,
1056 61440 : arg->indexInfo->ii_IndexAttrNumbers[0],
1057 : arg->tupDesc,
1058 61440 : &stups[i].isnull1);
1059 : }
1060 6 : }
1061 :
1062 : static int
1063 3209297 : comparetup_cluster(const SortTuple *a, const SortTuple *b,
1064 : Tuplesortstate *state)
1065 : {
1066 3209297 : TuplesortPublic *base = TuplesortstateGetPublic(state);
1067 3209297 : TuplesortClusterArg *arg = (TuplesortClusterArg *) base->arg;
1068 3209297 : SortSupport sortKey = base->sortKeys;
1069 : HeapTuple ltup;
1070 : HeapTuple rtup;
1071 : TupleDesc tupDesc;
1072 : int nkey;
1073 : int32 compare;
1074 : Datum datum1,
1075 : datum2;
1076 : bool isnull1,
1077 : isnull2;
1078 :
1079 : /* Be prepared to compare additional sort keys */
1080 3209297 : ltup = (HeapTuple) a->tuple;
1081 3209297 : rtup = (HeapTuple) b->tuple;
1082 3209297 : tupDesc = arg->tupDesc;
1083 :
1084 : /* Compare the leading sort key, if it's simple */
1085 3209297 : if (base->haveDatum1)
1086 : {
1087 3203405 : compare = ApplySortComparator(a->datum1, a->isnull1,
1088 3203405 : b->datum1, b->isnull1,
1089 : sortKey);
1090 3203405 : if (compare != 0)
1091 2918796 : return compare;
1092 :
1093 284609 : if (sortKey->abbrev_converter)
1094 : {
1095 72302 : AttrNumber leading = arg->indexInfo->ii_IndexAttrNumbers[0];
1096 :
1097 72302 : datum1 = heap_getattr(ltup, leading, tupDesc, &isnull1);
1098 72302 : datum2 = heap_getattr(rtup, leading, tupDesc, &isnull2);
1099 :
1100 72302 : compare = ApplySortAbbrevFullComparator(datum1, isnull1,
1101 : datum2, isnull2,
1102 : sortKey);
1103 : }
1104 284609 : if (compare != 0 || base->nKeys == 1)
1105 73799 : return compare;
1106 : /* Compare additional columns the hard way */
1107 210810 : sortKey++;
1108 210810 : nkey = 1;
1109 : }
1110 : else
1111 : {
1112 : /* Must compare all keys the hard way */
1113 5892 : nkey = 0;
1114 : }
1115 :
1116 216702 : if (arg->indexInfo->ii_Expressions == NULL)
1117 : {
1118 : /* If not expression index, just compare the proper heap attrs */
1119 :
1120 294294 : for (; nkey < base->nKeys; nkey++, sortKey++)
1121 : {
1122 294294 : AttrNumber attno = arg->indexInfo->ii_IndexAttrNumbers[nkey];
1123 :
1124 294294 : datum1 = heap_getattr(ltup, attno, tupDesc, &isnull1);
1125 294294 : datum2 = heap_getattr(rtup, attno, tupDesc, &isnull2);
1126 :
1127 294294 : compare = ApplySortComparator(datum1, isnull1,
1128 : datum2, isnull2,
1129 : sortKey);
1130 294294 : if (compare != 0)
1131 210810 : return compare;
1132 : }
1133 : }
1134 : else
1135 : {
1136 : /*
1137 : * In the expression index case, compute the whole index tuple and
1138 : * then compare values. It would perhaps be faster to compute only as
1139 : * many columns as we need to compare, but that would require
1140 : * duplicating all the logic in FormIndexDatum.
1141 : */
1142 : Datum l_index_values[INDEX_MAX_KEYS];
1143 : bool l_index_isnull[INDEX_MAX_KEYS];
1144 : Datum r_index_values[INDEX_MAX_KEYS];
1145 : bool r_index_isnull[INDEX_MAX_KEYS];
1146 : TupleTableSlot *ecxt_scantuple;
1147 :
1148 : /* Reset context each time to prevent memory leakage */
1149 5892 : ResetPerTupleExprContext(arg->estate);
1150 :
1151 5892 : ecxt_scantuple = GetPerTupleExprContext(arg->estate)->ecxt_scantuple;
1152 :
1153 5892 : ExecStoreHeapTuple(ltup, ecxt_scantuple, false);
1154 5892 : FormIndexDatum(arg->indexInfo, ecxt_scantuple, arg->estate,
1155 : l_index_values, l_index_isnull);
1156 :
1157 5892 : ExecStoreHeapTuple(rtup, ecxt_scantuple, false);
1158 5892 : FormIndexDatum(arg->indexInfo, ecxt_scantuple, arg->estate,
1159 : r_index_values, r_index_isnull);
1160 :
1161 6354 : for (; nkey < base->nKeys; nkey++, sortKey++)
1162 : {
1163 6354 : compare = ApplySortComparator(l_index_values[nkey],
1164 6354 : l_index_isnull[nkey],
1165 : r_index_values[nkey],
1166 6354 : r_index_isnull[nkey],
1167 : sortKey);
1168 6354 : if (compare != 0)
1169 5892 : return compare;
1170 : }
1171 : }
1172 :
256 akorotkov 1173 UNC 0 : return 0;
1174 : }
1175 :
1176 : static void
256 akorotkov 1177 GNC 30000 : writetup_cluster(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup)
1178 : {
1179 30000 : TuplesortPublic *base = TuplesortstateGetPublic(state);
1180 30000 : HeapTuple tuple = (HeapTuple) stup->tuple;
1181 30000 : unsigned int tuplen = tuple->t_len + sizeof(ItemPointerData) + sizeof(int);
1182 :
1183 : /* We need to store t_self, but not other fields of HeapTupleData */
1184 30000 : LogicalTapeWrite(tape, &tuplen, sizeof(tuplen));
1185 30000 : LogicalTapeWrite(tape, &tuple->t_self, sizeof(ItemPointerData));
1186 30000 : LogicalTapeWrite(tape, tuple->t_data, tuple->t_len);
1187 30000 : if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
256 akorotkov 1188 UNC 0 : LogicalTapeWrite(tape, &tuplen, sizeof(tuplen));
256 akorotkov 1189 GNC 30000 : }
1190 :
1191 : static void
1192 30000 : readtup_cluster(Tuplesortstate *state, SortTuple *stup,
1193 : LogicalTape *tape, unsigned int tuplen)
1194 : {
1195 30000 : TuplesortPublic *base = TuplesortstateGetPublic(state);
1196 30000 : TuplesortClusterArg *arg = (TuplesortClusterArg *) base->arg;
1197 30000 : unsigned int t_len = tuplen - sizeof(ItemPointerData) - sizeof(int);
1198 30000 : HeapTuple tuple = (HeapTuple) tuplesort_readtup_alloc(state,
1199 : t_len + HEAPTUPLESIZE);
1200 :
1201 : /* Reconstruct the HeapTupleData header */
1202 30000 : tuple->t_data = (HeapTupleHeader) ((char *) tuple + HEAPTUPLESIZE);
1203 30000 : tuple->t_len = t_len;
1204 30000 : LogicalTapeReadExact(tape, &tuple->t_self, sizeof(ItemPointerData));
1205 : /* We don't currently bother to reconstruct t_tableOid */
1206 30000 : tuple->t_tableOid = InvalidOid;
1207 : /* Read in the tuple body */
1208 30000 : LogicalTapeReadExact(tape, tuple->t_data, tuple->t_len);
1209 30000 : if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
256 akorotkov 1210 UNC 0 : LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen));
256 akorotkov 1211 GNC 30000 : stup->tuple = (void *) tuple;
1212 : /* set up first-column key value, if it's a simple column */
1213 30000 : if (base->haveDatum1)
1214 30000 : stup->datum1 = heap_getattr(tuple,
1215 30000 : arg->indexInfo->ii_IndexAttrNumbers[0],
1216 : arg->tupDesc,
1217 : &stup->isnull1);
1218 30000 : }
1219 :
1220 : static void
1221 51 : freestate_cluster(Tuplesortstate *state)
1222 : {
1223 51 : TuplesortPublic *base = TuplesortstateGetPublic(state);
1224 51 : TuplesortClusterArg *arg = (TuplesortClusterArg *) base->arg;
1225 :
1226 : /* Free any execution state created for CLUSTER case */
1227 51 : if (arg->estate != NULL)
1228 : {
1229 9 : ExprContext *econtext = GetPerTupleExprContext(arg->estate);
1230 :
1231 9 : ExecDropSingleTupleTableSlot(econtext->ecxt_scantuple);
1232 9 : FreeExecutorState(arg->estate);
1233 : }
1234 51 : }
1235 :
1236 : /*
1237 : * Routines specialized for IndexTuple case
1238 : *
1239 : * The btree and hash cases require separate comparison functions, but the
1240 : * IndexTuple representation is the same so the copy/write/read support
1241 : * functions can be shared.
1242 : */
1243 :
1244 : static void
1245 36 : removeabbrev_index(Tuplesortstate *state, SortTuple *stups, int count)
1246 : {
1247 36 : TuplesortPublic *base = TuplesortstateGetPublic(state);
1248 36 : TuplesortIndexArg *arg = (TuplesortIndexArg *) base->arg;
1249 : int i;
1250 :
1251 308516 : for (i = 0; i < count; i++)
1252 : {
1253 : IndexTuple tuple;
1254 :
1255 308480 : tuple = stups[i].tuple;
1256 308480 : stups[i].datum1 = index_getattr(tuple,
1257 : 1,
1258 308480 : RelationGetDescr(arg->indexRel),
1259 308480 : &stups[i].isnull1);
1260 : }
1261 36 : }
1262 :
1263 : static int
1264 91211690 : comparetup_index_btree(const SortTuple *a, const SortTuple *b,
1265 : Tuplesortstate *state)
1266 : {
1267 : /*
1268 : * This is similar to comparetup_heap(), but expects index tuples. There
1269 : * is also special handling for enforcing uniqueness, and special
1270 : * treatment for equal keys at the end.
1271 : */
1272 91211690 : TuplesortPublic *base = TuplesortstateGetPublic(state);
1273 91211690 : TuplesortIndexBTreeArg *arg = (TuplesortIndexBTreeArg *) base->arg;
1274 91211690 : SortSupport sortKey = base->sortKeys;
1275 : IndexTuple tuple1;
1276 : IndexTuple tuple2;
1277 : int keysz;
1278 : TupleDesc tupDes;
1279 91211690 : bool equal_hasnull = false;
1280 : int nkey;
1281 : int32 compare;
1282 : Datum datum1,
1283 : datum2;
1284 : bool isnull1,
1285 : isnull2;
1286 :
1287 :
1288 : /* Compare the leading sort key */
1289 91211690 : compare = ApplySortComparator(a->datum1, a->isnull1,
1290 91211690 : b->datum1, b->isnull1,
1291 : sortKey);
1292 91211690 : if (compare != 0)
1293 76320409 : return compare;
1294 :
1295 : /* Compare additional sort keys */
1296 14891281 : tuple1 = (IndexTuple) a->tuple;
1297 14891281 : tuple2 = (IndexTuple) b->tuple;
1298 14891281 : keysz = base->nKeys;
1299 14891281 : tupDes = RelationGetDescr(arg->index.indexRel);
1300 :
1301 14891281 : if (sortKey->abbrev_converter)
1302 : {
1303 441787 : datum1 = index_getattr(tuple1, 1, tupDes, &isnull1);
1304 441787 : datum2 = index_getattr(tuple2, 1, tupDes, &isnull2);
1305 :
1306 441787 : compare = ApplySortAbbrevFullComparator(datum1, isnull1,
1307 : datum2, isnull2,
1308 : sortKey);
1309 441787 : if (compare != 0)
1310 405541 : return compare;
1311 : }
1312 :
1313 : /* they are equal, so we only need to examine one null flag */
1314 14485740 : if (a->isnull1)
1315 213 : equal_hasnull = true;
1316 :
1317 14485740 : sortKey++;
1318 17834722 : for (nkey = 2; nkey <= keysz; nkey++, sortKey++)
1319 : {
1320 11906028 : datum1 = index_getattr(tuple1, nkey, tupDes, &isnull1);
1321 11906028 : datum2 = index_getattr(tuple2, nkey, tupDes, &isnull2);
1322 :
1323 11906028 : compare = ApplySortComparator(datum1, isnull1,
1324 : datum2, isnull2,
1325 : sortKey);
1326 11906028 : if (compare != 0)
1327 8557046 : return compare; /* done when we find unequal attributes */
1328 :
1329 : /* they are equal, so we only need to examine one null flag */
1330 3348982 : if (isnull1)
1331 2997 : equal_hasnull = true;
1332 : }
1333 :
1334 : /*
1335 : * If btree has asked us to enforce uniqueness, complain if two equal
1336 : * tuples are detected (unless there was at least one NULL field and NULLS
1337 : * NOT DISTINCT was not set).
1338 : *
1339 : * It is sufficient to make the test here, because if two tuples are equal
1340 : * they *must* get compared at some stage of the sort --- otherwise the
1341 : * sort algorithm wouldn't have checked whether one must appear before the
1342 : * other.
1343 : */
1344 5928694 : if (arg->enforceUnique && !(!arg->uniqueNullsNotDistinct && equal_hasnull))
1345 : {
1346 : Datum values[INDEX_MAX_KEYS];
1347 : bool isnull[INDEX_MAX_KEYS];
1348 : char *key_desc;
1349 :
1350 : /*
1351 : * Some rather brain-dead implementations of qsort (such as the one in
1352 : * QNX 4) will sometimes call the comparison routine to compare a
1353 : * value to itself, but we always use our own implementation, which
1354 : * does not.
1355 : */
1356 42 : Assert(tuple1 != tuple2);
1357 :
1358 42 : index_deform_tuple(tuple1, tupDes, values, isnull);
1359 :
1360 42 : key_desc = BuildIndexValueDescription(arg->index.indexRel, values, isnull);
1361 :
1362 42 : ereport(ERROR,
1363 : (errcode(ERRCODE_UNIQUE_VIOLATION),
1364 : errmsg("could not create unique index \"%s\"",
1365 : RelationGetRelationName(arg->index.indexRel)),
1366 : key_desc ? errdetail("Key %s is duplicated.", key_desc) :
1367 : errdetail("Duplicate keys exist."),
1368 : errtableconstraint(arg->index.heapRel,
1369 : RelationGetRelationName(arg->index.indexRel))));
1370 : }
1371 :
1372 : /*
1373 : * If key values are equal, we sort on ItemPointer. This is required for
1374 : * btree indexes, since heap TID is treated as an implicit last key
1375 : * attribute in order to ensure that all keys in the index are physically
1376 : * unique.
1377 : */
1378 : {
1379 5928652 : BlockNumber blk1 = ItemPointerGetBlockNumber(&tuple1->t_tid);
1380 5928652 : BlockNumber blk2 = ItemPointerGetBlockNumber(&tuple2->t_tid);
1381 :
1382 5928652 : if (blk1 != blk2)
1383 5550806 : return (blk1 < blk2) ? -1 : 1;
1384 : }
1385 : {
1386 377846 : OffsetNumber pos1 = ItemPointerGetOffsetNumber(&tuple1->t_tid);
1387 377846 : OffsetNumber pos2 = ItemPointerGetOffsetNumber(&tuple2->t_tid);
1388 :
1389 377846 : if (pos1 != pos2)
1390 377846 : return (pos1 < pos2) ? -1 : 1;
1391 : }
1392 :
1393 : /* ItemPointer values should never be equal */
256 akorotkov 1394 UNC 0 : Assert(false);
1395 :
1396 : return 0;
1397 : }
1398 :
1399 : static int
256 akorotkov 1400 GNC 913962 : comparetup_index_hash(const SortTuple *a, const SortTuple *b,
1401 : Tuplesortstate *state)
1402 : {
1403 : Bucket bucket1;
1404 : Bucket bucket2;
1405 : uint32 hash1;
1406 : uint32 hash2;
1407 : IndexTuple tuple1;
1408 : IndexTuple tuple2;
1409 913962 : TuplesortPublic *base = TuplesortstateGetPublic(state);
1410 913962 : TuplesortIndexHashArg *arg = (TuplesortIndexHashArg *) base->arg;
1411 :
1412 : /*
1413 : * Fetch hash keys and mask off bits we don't want to sort by, so that the
1414 : * initial sort is just on the bucket number. We know that the first
1415 : * column of the index tuple is the hash key.
1416 : */
1417 913962 : Assert(!a->isnull1);
1418 913962 : bucket1 = _hash_hashkey2bucket(DatumGetUInt32(a->datum1),
1419 : arg->max_buckets, arg->high_mask,
1420 : arg->low_mask);
1421 913962 : Assert(!b->isnull1);
1422 913962 : bucket2 = _hash_hashkey2bucket(DatumGetUInt32(b->datum1),
1423 : arg->max_buckets, arg->high_mask,
1424 : arg->low_mask);
1425 913962 : if (bucket1 > bucket2)
1426 270487 : return 1;
1427 643475 : else if (bucket1 < bucket2)
1428 253917 : return -1;
1429 :
1430 : /*
1431 : * If bucket values are equal, sort by hash values. This allows us to
1432 : * insert directly onto bucket/overflow pages, where the index tuples are
1433 : * stored in hash order to allow fast binary search within each page.
1434 : */
255 tgl 1435 389558 : hash1 = DatumGetUInt32(a->datum1);
1436 389558 : hash2 = DatumGetUInt32(b->datum1);
1437 389558 : if (hash1 > hash2)
1438 97508 : return 1;
1439 292050 : else if (hash1 < hash2)
1440 87091 : return -1;
1441 :
1442 : /*
1443 : * If hash values are equal, we sort on ItemPointer. This does not affect
1444 : * validity of the finished index, but it may be useful to have index
1445 : * scans in physical order.
1446 : */
256 akorotkov 1447 204959 : tuple1 = (IndexTuple) a->tuple;
1448 204959 : tuple2 = (IndexTuple) b->tuple;
1449 :
1450 : {
1451 204959 : BlockNumber blk1 = ItemPointerGetBlockNumber(&tuple1->t_tid);
1452 204959 : BlockNumber blk2 = ItemPointerGetBlockNumber(&tuple2->t_tid);
1453 :
1454 204959 : if (blk1 != blk2)
1455 136543 : return (blk1 < blk2) ? -1 : 1;
1456 : }
1457 : {
1458 68416 : OffsetNumber pos1 = ItemPointerGetOffsetNumber(&tuple1->t_tid);
1459 68416 : OffsetNumber pos2 = ItemPointerGetOffsetNumber(&tuple2->t_tid);
1460 :
1461 68416 : if (pos1 != pos2)
1462 68416 : return (pos1 < pos2) ? -1 : 1;
1463 : }
1464 :
1465 : /* ItemPointer values should never be equal */
256 akorotkov 1466 UNC 0 : Assert(false);
1467 :
1468 : return 0;
1469 : }
1470 :
1471 : static void
256 akorotkov 1472 GNC 2374326 : writetup_index(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup)
1473 : {
1474 2374326 : TuplesortPublic *base = TuplesortstateGetPublic(state);
1475 2374326 : IndexTuple tuple = (IndexTuple) stup->tuple;
1476 : unsigned int tuplen;
1477 :
1478 2374326 : tuplen = IndexTupleSize(tuple) + sizeof(tuplen);
100 peter 1479 2374326 : LogicalTapeWrite(tape, &tuplen, sizeof(tuplen));
1480 2374326 : LogicalTapeWrite(tape, tuple, IndexTupleSize(tuple));
256 akorotkov 1481 2374326 : if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
100 peter 1482 UNC 0 : LogicalTapeWrite(tape, &tuplen, sizeof(tuplen));
256 akorotkov 1483 GNC 2374326 : }
1484 :
1485 : static void
1486 2374326 : readtup_index(Tuplesortstate *state, SortTuple *stup,
1487 : LogicalTape *tape, unsigned int len)
1488 : {
1489 2374326 : TuplesortPublic *base = TuplesortstateGetPublic(state);
1490 2374326 : TuplesortIndexArg *arg = (TuplesortIndexArg *) base->arg;
1491 2374326 : unsigned int tuplen = len - sizeof(unsigned int);
1492 2374326 : IndexTuple tuple = (IndexTuple) tuplesort_readtup_alloc(state, tuplen);
1493 :
1494 2374326 : LogicalTapeReadExact(tape, tuple, tuplen);
1495 2374326 : if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
256 akorotkov 1496 UNC 0 : LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen));
256 akorotkov 1497 GNC 2374326 : stup->tuple = (void *) tuple;
1498 : /* set up first-column key value */
1499 4748652 : stup->datum1 = index_getattr(tuple,
1500 : 1,
1501 2374326 : RelationGetDescr(arg->indexRel),
1502 : &stup->isnull1);
1503 2374326 : }
1504 :
1505 : /*
1506 : * Routines specialized for DatumTuple case
1507 : */
1508 :
1509 : static void
1510 18 : removeabbrev_datum(Tuplesortstate *state, SortTuple *stups, int count)
1511 : {
1512 : int i;
1513 :
1514 65618 : for (i = 0; i < count; i++)
1515 65600 : stups[i].datum1 = PointerGetDatum(stups[i].tuple);
1516 18 : }
1517 :
1518 : static int
1519 7100704 : comparetup_datum(const SortTuple *a, const SortTuple *b, Tuplesortstate *state)
1520 : {
1521 7100704 : TuplesortPublic *base = TuplesortstateGetPublic(state);
1522 : int compare;
1523 :
1524 7100704 : compare = ApplySortComparator(a->datum1, a->isnull1,
1525 7100704 : b->datum1, b->isnull1,
1526 : base->sortKeys);
1527 7100704 : if (compare != 0)
1528 6063791 : return compare;
1529 :
1530 : /* if we have abbreviations, then "tuple" has the original value */
1531 :
1532 1036913 : if (base->sortKeys->abbrev_converter)
1533 917557 : compare = ApplySortAbbrevFullComparator(PointerGetDatum(a->tuple), a->isnull1,
1534 917557 : PointerGetDatum(b->tuple), b->isnull1,
1535 : base->sortKeys);
1536 :
1537 1036913 : return compare;
1538 : }
1539 :
1540 : static void
1541 1170585 : writetup_datum(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup)
1542 : {
1543 1170585 : TuplesortPublic *base = TuplesortstateGetPublic(state);
1544 1170585 : TuplesortDatumArg *arg = (TuplesortDatumArg *) base->arg;
1545 : void *waddr;
1546 : unsigned int tuplen;
1547 : unsigned int writtenlen;
1548 :
1549 1170585 : if (stup->isnull1)
1550 : {
1551 81 : waddr = NULL;
1552 81 : tuplen = 0;
1553 : }
1554 1170504 : else if (!base->tuples)
1555 : {
1556 150066 : waddr = &stup->datum1;
1557 150066 : tuplen = sizeof(Datum);
1558 : }
1559 : else
1560 : {
1561 1020438 : waddr = stup->tuple;
1562 1020438 : tuplen = datumGetSize(PointerGetDatum(stup->tuple), false, arg->datumTypeLen);
1563 1020438 : Assert(tuplen != 0);
1564 : }
1565 :
1566 1170585 : writtenlen = tuplen + sizeof(unsigned int);
1567 :
100 peter 1568 1170585 : LogicalTapeWrite(tape, &writtenlen, sizeof(writtenlen));
256 akorotkov 1569 1170585 : LogicalTapeWrite(tape, waddr, tuplen);
1570 1170585 : if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
100 peter 1571 360198 : LogicalTapeWrite(tape, &writtenlen, sizeof(writtenlen));
256 akorotkov 1572 1170585 : }
1573 :
1574 : static void
1575 1110591 : readtup_datum(Tuplesortstate *state, SortTuple *stup,
1576 : LogicalTape *tape, unsigned int len)
1577 : {
1578 1110591 : TuplesortPublic *base = TuplesortstateGetPublic(state);
1579 1110591 : unsigned int tuplen = len - sizeof(unsigned int);
1580 :
1581 1110591 : if (tuplen == 0)
1582 : {
1583 : /* it's NULL */
1584 93 : stup->datum1 = (Datum) 0;
1585 93 : stup->isnull1 = true;
1586 93 : stup->tuple = NULL;
1587 : }
1588 1110498 : else if (!base->tuples)
1589 : {
1590 150069 : Assert(tuplen == sizeof(Datum));
1591 150069 : LogicalTapeReadExact(tape, &stup->datum1, tuplen);
1592 150069 : stup->isnull1 = false;
1593 150069 : stup->tuple = NULL;
1594 : }
1595 : else
1596 : {
1597 960429 : void *raddr = tuplesort_readtup_alloc(state, tuplen);
1598 :
1599 960429 : LogicalTapeReadExact(tape, raddr, tuplen);
1600 960429 : stup->datum1 = PointerGetDatum(raddr);
1601 960429 : stup->isnull1 = false;
1602 960429 : stup->tuple = raddr;
1603 : }
1604 :
1605 1110591 : if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
1606 360222 : LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen));
1607 1110591 : }
|