Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * nodeSort.c
4 : * Routines to handle sorting of relations.
5 : *
6 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/executor/nodeSort.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 :
16 : #include "postgres.h"
17 :
18 : #include "access/parallel.h"
19 : #include "executor/execdebug.h"
20 : #include "executor/nodeSort.h"
21 : #include "miscadmin.h"
22 : #include "utils/tuplesort.h"
23 :
24 :
25 : /* ----------------------------------------------------------------
26 : * ExecSort
27 : *
28 : * Sorts tuples from the outer subtree of the node using tuplesort,
29 : * which saves the results in a temporary file or memory. After the
30 : * initial call, returns a tuple from the file with each call.
31 : *
32 : * There are two distinct ways that this sort can be performed:
33 : *
34 : * 1) When the result is a single column we perform a Datum sort.
35 : *
36 : * 2) When the result contains multiple columns we perform a tuple sort.
37 : *
38 : * We could do this by always performing a tuple sort, however sorting
39 : * Datums only can be significantly faster than sorting tuples,
40 : * especially when the Datums are of a pass-by-value type.
41 : *
42 : * Conditions:
43 : * -- none.
44 : *
45 : * Initial States:
46 : * -- the outer child is prepared to return the first tuple.
47 : * ----------------------------------------------------------------
48 : */
49 : static TupleTableSlot *
2092 andres 50 CBC 4896561 : ExecSort(PlanState *pstate)
51 : {
52 4896561 : SortState *node = castNode(SortState, pstate);
53 : EState *estate;
54 : ScanDirection dir;
55 : Tuplesortstate *tuplesortstate;
56 : TupleTableSlot *slot;
57 :
2084 58 4896561 : CHECK_FOR_INTERRUPTS();
59 :
60 : /*
61 : * get state info from node
62 : */
63 : SO1_printf("ExecSort: %s\n",
64 : "entering routine");
65 :
7430 tgl 66 4896561 : estate = node->ss.ps.state;
9345 bruce 67 4896561 : dir = estate->es_direction;
7430 tgl 68 4896561 : tuplesortstate = (Tuplesortstate *) node->tuplesortstate;
69 :
70 : /*
71 : * If first time through, read all tuples from outer plan and pass them to
72 : * tuplesort.c. Subsequent calls just fetch tuples from tuplesort.
73 : */
74 :
75 4896561 : if (!node->sort_Done)
76 : {
77 40814 : Sort *plannode = (Sort *) node->ss.ps.plan;
78 : PlanState *outerNode;
79 : TupleDesc tupDesc;
370 drowley 80 40814 : int tuplesortopts = TUPLESORT_NONE;
81 :
82 : SO1_printf("ExecSort: %s\n",
83 : "sorting subplan");
84 :
85 : /*
86 : * Want to scan subplan in the forward direction while creating the
87 : * sorted data.
88 : */
9345 bruce 89 40814 : estate->es_direction = ForwardScanDirection;
90 :
91 : /*
92 : * Initialize tuplesort module.
93 : */
94 : SO1_printf("ExecSort: %s\n",
95 : "calling tuplesort_begin");
96 :
7430 tgl 97 40814 : outerNode = outerPlanState(node);
7279 98 40814 : tupDesc = ExecGetResultType(outerNode);
99 :
370 drowley 100 40814 : if (node->randomAccess)
101 1836 : tuplesortopts |= TUPLESORT_RANDOMACCESS;
102 40814 : if (node->bounded)
103 726 : tuplesortopts |= TUPLESORT_ALLOWBOUNDED;
104 :
626 105 40814 : if (node->datumSort)
106 2096 : tuplesortstate = tuplesort_begin_datum(TupleDescAttr(tupDesc, 0)->atttypid,
107 2096 : plannode->sortOperators[0],
108 2096 : plannode->collations[0],
109 2096 : plannode->nullsFirst[0],
110 : work_mem,
111 : NULL,
112 : tuplesortopts);
113 : else
114 38718 : tuplesortstate = tuplesort_begin_heap(tupDesc,
115 : plannode->numCols,
116 : plannode->sortColIdx,
117 : plannode->sortOperators,
118 : plannode->collations,
119 : plannode->nullsFirst,
120 : work_mem,
121 : NULL,
122 : tuplesortopts);
5819 tgl 123 40805 : if (node->bounded)
124 726 : tuplesort_set_bound(tuplesortstate, node->bound);
7430 125 40805 : node->tuplesortstate = (void *) tuplesortstate;
126 :
127 : /*
128 : * Scan the subplan and feed all the tuples to tuplesort using the
129 : * appropriate method based on the type of sort we're doing.
130 : */
626 drowley 131 40805 : if (node->datumSort)
132 : {
133 : for (;;)
134 : {
135 736857 : slot = ExecProcNode(outerNode);
136 :
137 736854 : if (TupIsNull(slot))
138 : break;
139 734761 : slot_getsomeattrs(slot, 1);
140 734761 : tuplesort_putdatum(tuplesortstate,
141 734761 : slot->tts_values[0],
142 734761 : slot->tts_isnull[0]);
143 : }
144 : }
145 : else
146 : {
147 : for (;;)
148 : {
149 4998680 : slot = ExecProcNode(outerNode);
150 :
151 4998680 : if (TupIsNull(slot))
152 : break;
153 4959971 : tuplesort_puttupleslot(tuplesortstate, slot);
154 : }
155 : }
156 :
157 : /*
158 : * Complete the sort.
159 : */
8575 tgl 160 40802 : tuplesort_performsort(tuplesortstate);
161 :
162 : /*
163 : * restore to user specified direction
164 : */
9345 bruce 165 40802 : estate->es_direction = dir;
166 :
167 : /*
168 : * finally set the sorted flag to true
169 : */
7430 tgl 170 40802 : node->sort_Done = true;
5819 171 40802 : node->bounded_Done = node->bounded;
172 40802 : node->bound_Done = node->bound;
2049 rhaas 173 40802 : if (node->shared_info && node->am_worker)
174 : {
175 : TuplesortInstrumentation *si;
176 :
177 48 : Assert(IsParallelWorker());
178 48 : Assert(ParallelWorkerNumber <= node->shared_info->num_workers);
179 48 : si = &node->shared_info->sinstrument[ParallelWorkerNumber];
180 48 : tuplesort_get_stats(tuplesortstate, si);
181 : }
182 : SO1_printf("ExecSort: %s\n", "sorting done");
183 : }
184 :
185 : SO1_printf("ExecSort: %s\n",
186 : "retrieving tuple from tuplesort");
187 :
626 drowley 188 4896549 : slot = node->ss.ps.ps_ResultTupleSlot;
189 :
190 : /*
191 : * Fetch the next sorted item from the appropriate tuplesort function. For
192 : * datum sorts we must manage the slot ourselves and leave it clear when
193 : * tuplesort_getdatum returns false to indicate there are no more datums.
194 : * For tuple sorts, tuplesort_gettupleslot manages the slot for us and
195 : * empties the slot when it runs out of tuples.
196 : */
197 4896549 : if (node->datumSort)
198 : {
199 591530 : ExecClearTuple(slot);
200 591530 : if (tuplesort_getdatum(tuplesortstate, ScanDirectionIsForward(dir),
201 : false, &(slot->tts_values[0]),
202 : &(slot->tts_isnull[0]), NULL))
626 drowley 203 GIC 589578 : ExecStoreVirtualTuple(slot);
626 drowley 204 ECB : }
205 : else
626 drowley 206 GIC 4305019 : (void) tuplesort_gettupleslot(tuplesortstate,
626 drowley 207 ECB : ScanDirectionIsForward(dir),
208 : false, slot, NULL);
209 :
6130 tgl 210 GIC 4896549 : return slot;
9770 scrappy 211 ECB : }
212 :
213 : /* ----------------------------------------------------------------
214 : * ExecInitSort
215 : *
216 : * Creates the run-time state information for the sort node
217 : * produced by the planner and initializes its outer subtree.
218 : * ----------------------------------------------------------------
219 : */
220 : SortState *
6249 tgl 221 GIC 26053 : ExecInitSort(Sort *node, EState *estate, int eflags)
9770 scrappy 222 ECB : {
223 : SortState *sortstate;
224 : TupleDesc outerTupDesc;
225 :
226 : SO1_printf("ExecInitSort: %s\n",
227 : "initializing sort node");
228 :
229 : /*
230 : * create state structure
231 : */
9345 bruce 232 GIC 26053 : sortstate = makeNode(SortState);
7430 tgl 233 CBC 26053 : sortstate->ss.ps.plan = (Plan *) node;
234 26053 : sortstate->ss.ps.state = estate;
2092 andres 235 26053 : sortstate->ss.ps.ExecProcNode = ExecSort;
7430 tgl 236 ECB :
237 : /*
238 : * We must have random access to the sort output to do backward scan or
239 : * mark/restore. We also prefer to materialize the sort output if we
240 : * might be called on to rewind and replay it many times.
241 : */
6249 tgl 242 GIC 26053 : sortstate->randomAccess = (eflags & (EXEC_FLAG_REWIND |
6249 tgl 243 ECB : EXEC_FLAG_BACKWARD |
6249 tgl 244 GIC 26053 : EXEC_FLAG_MARK)) != 0;
6249 tgl 245 ECB :
5819 tgl 246 GIC 26053 : sortstate->bounded = false;
8575 tgl 247 CBC 26053 : sortstate->sort_Done = false;
248 26053 : sortstate->tuplesortstate = NULL;
9345 bruce 249 ECB :
250 : /*
251 : * Miscellaneous initialization
252 : *
253 : * Sort nodes don't initialize their ExprContexts because they never call
254 : * ExecQual or ExecProject.
255 : */
256 :
257 : /*
258 : * initialize child nodes
259 : *
260 : * We shield the child node from the need to support REWIND, BACKWARD, or
261 : * MARK/RESTORE.
262 : */
6249 tgl 263 GIC 26053 : eflags &= ~(EXEC_FLAG_REWIND | EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK);
6249 tgl 264 ECB :
6249 tgl 265 GIC 26053 : outerPlanState(sortstate) = ExecInitNode(outerPlan(node), estate, eflags);
9345 bruce 266 ECB :
267 : /*
268 : * Initialize scan slot and type.
269 : */
1606 andres 270 GIC 26050 : ExecCreateScanSlotFromOuterPlan(estate, &sortstate->ss, &TTSOpsVirtual);
1878 andres 271 ECB :
272 : /*
273 : * Initialize return slot and type. No need to initialize projection info
274 : * because this node doesn't do projections.
275 : */
1606 andres 276 GIC 26050 : ExecInitResultTupleSlotTL(&sortstate->ss.ps, &TTSOpsMinimalTuple);
7430 tgl 277 CBC 26050 : sortstate->ss.ps.ps_ProjInfo = NULL;
9345 bruce 278 ECB :
192 drowley 279 GIC 26050 : outerTupDesc = ExecGetResultType(outerPlanState(sortstate));
192 drowley 280 ECB :
281 : /*
282 : * We perform a Datum sort when we're sorting just a single column,
283 : * otherwise we perform a tuple sort.
284 : */
163 drowley 285 GNC 26050 : if (outerTupDesc->natts == 1)
626 drowley 286 CBC 3400 : sortstate->datumSort = true;
626 drowley 287 ECB : else
626 drowley 288 GIC 22650 : sortstate->datumSort = false;
626 drowley 289 ECB :
290 : SO1_printf("ExecInitSort: %s\n",
291 : "sort node initialized");
292 :
7430 tgl 293 GIC 26050 : return sortstate;
9770 scrappy 294 ECB : }
295 :
296 : /* ----------------------------------------------------------------
297 : * ExecEndSort(node)
298 : * ----------------------------------------------------------------
299 : */
300 : void
7430 tgl 301 GIC 26011 : ExecEndSort(SortState *node)
9770 scrappy 302 ECB : {
303 : SO1_printf("ExecEndSort: %s\n",
304 : "shutting down sort node");
305 :
306 : /*
307 : * clean out the tuple table
308 : */
7430 tgl 309 GIC 26011 : ExecClearTuple(node->ss.ss_ScanTupleSlot);
6251 tgl 310 ECB : /* must drop pointer to sort result tuple */
6251 tgl 311 GIC 26011 : ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
9345 bruce 312 ECB :
313 : /*
314 : * Release tuplesort resources
315 : */
7430 tgl 316 GIC 26011 : if (node->tuplesortstate != NULL)
7430 tgl 317 CBC 22830 : tuplesort_end((Tuplesortstate *) node->tuplesortstate);
318 26011 : node->tuplesortstate = NULL;
9345 bruce 319 ECB :
320 : /*
321 : * shut down the subplan
322 : */
7420 tgl 323 GIC 26011 : ExecEndNode(outerPlanState(node));
7420 tgl 324 ECB :
325 : SO1_printf("ExecEndSort: %s\n",
326 : "sort node shutdown");
9345 bruce 327 GIC 26011 : }
9770 scrappy 328 ECB :
329 : /* ----------------------------------------------------------------
330 : * ExecSortMarkPos
331 : *
332 : * Calls tuplesort to save the current position in the sorted file.
333 : * ----------------------------------------------------------------
334 : */
335 : void
7430 tgl 336 GIC 284485 : ExecSortMarkPos(SortState *node)
9770 scrappy 337 ECB : {
338 : /*
339 : * if we haven't sorted yet, just return
340 : */
7430 tgl 341 GIC 284485 : if (!node->sort_Done)
9345 bruce 342 LBC 0 : return;
9345 bruce 343 EUB :
7430 tgl 344 GIC 284485 : tuplesort_markpos((Tuplesortstate *) node->tuplesortstate);
9770 scrappy 345 ECB : }
346 :
347 : /* ----------------------------------------------------------------
348 : * ExecSortRestrPos
349 : *
350 : * Calls tuplesort to restore the last saved sort file position.
351 : * ----------------------------------------------------------------
352 : */
353 : void
7430 tgl 354 GIC 14909 : ExecSortRestrPos(SortState *node)
9770 scrappy 355 ECB : {
356 : /*
357 : * if we haven't sorted yet, just return.
358 : */
7430 tgl 359 GIC 14909 : if (!node->sort_Done)
9345 bruce 360 LBC 0 : return;
9345 bruce 361 EUB :
362 : /*
363 : * restore the scan to the previously marked position
364 : */
7430 tgl 365 GIC 14909 : tuplesort_restorepos((Tuplesortstate *) node->tuplesortstate);
9770 scrappy 366 ECB : }
367 :
368 : void
4654 tgl 369 GIC 18328 : ExecReScanSort(SortState *node)
9176 vadim4o 370 ECB : {
2878 bruce 371 GIC 18328 : PlanState *outerPlan = outerPlanState(node);
2897 rhaas 372 ECB :
373 : /*
374 : * If we haven't sorted yet, just return. If outerplan's chgParam is not
375 : * NULL then it will be re-scanned by ExecProcNode, else no reason to
376 : * re-scan it at all.
377 : */
7430 tgl 378 GIC 18328 : if (!node->sort_Done)
9176 vadim4o 379 CBC 366 : return;
9173 bruce 380 ECB :
381 : /* must drop pointer to sort result tuple */
7430 tgl 382 GIC 17962 : ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
9173 bruce 383 ECB :
384 : /*
385 : * If subnode is to be rescanned then we forget previous sort results; we
386 : * have to re-read the subplan and re-sort. Also must re-sort if the
387 : * bounded-sort parameters changed or we didn't select randomAccess.
388 : *
389 : * Otherwise we can just rewind and rescan the sorted output.
390 : */
2897 rhaas 391 GIC 17962 : if (outerPlan->chgParam != NULL ||
5819 tgl 392 CBC 224 : node->bounded != node->bounded_Done ||
393 224 : node->bound != node->bound_Done ||
6249 394 197 : !node->randomAccess)
8575 tgl 395 ECB : {
7430 tgl 396 GIC 17945 : node->sort_Done = false;
7430 tgl 397 CBC 17945 : tuplesort_end((Tuplesortstate *) node->tuplesortstate);
398 17945 : node->tuplesortstate = NULL;
6031 bruce 399 ECB :
400 : /*
401 : * if chgParam of subnode is not null then plan will be re-scanned by
402 : * first ExecProcNode.
403 : */
2897 rhaas 404 GIC 17945 : if (outerPlan->chgParam == NULL)
2897 rhaas 405 CBC 207 : ExecReScan(outerPlan);
8575 tgl 406 ECB : }
407 : else
7430 tgl 408 GIC 17 : tuplesort_rescan((Tuplesortstate *) node->tuplesortstate);
9176 vadim4o 409 ECB : }
410 :
411 : /* ----------------------------------------------------------------
412 : * Parallel Query Support
413 : * ----------------------------------------------------------------
414 : */
415 :
416 : /* ----------------------------------------------------------------
417 : * ExecSortEstimate
418 : *
419 : * Estimate space required to propagate sort statistics.
420 : * ----------------------------------------------------------------
421 : */
422 : void
2049 rhaas 423 GIC 70 : ExecSortEstimate(SortState *node, ParallelContext *pcxt)
2049 rhaas 424 ECB : {
425 : Size size;
426 :
427 : /* don't need this if not instrumenting or no workers */
2049 rhaas 428 GIC 70 : if (!node->ss.ps.instrument || pcxt->nworkers == 0)
2049 rhaas 429 CBC 64 : return;
2049 rhaas 430 ECB :
2049 rhaas 431 GIC 6 : size = mul_size(pcxt->nworkers, sizeof(TuplesortInstrumentation));
2049 rhaas 432 CBC 6 : size = add_size(size, offsetof(SharedSortInfo, sinstrument));
433 6 : shm_toc_estimate_chunk(&pcxt->estimator, size);
434 6 : shm_toc_estimate_keys(&pcxt->estimator, 1);
2049 rhaas 435 ECB : }
436 :
437 : /* ----------------------------------------------------------------
438 : * ExecSortInitializeDSM
439 : *
440 : * Initialize DSM space for sort statistics.
441 : * ----------------------------------------------------------------
442 : */
443 : void
2049 rhaas 444 GIC 70 : ExecSortInitializeDSM(SortState *node, ParallelContext *pcxt)
2049 rhaas 445 ECB : {
446 : Size size;
447 :
448 : /* don't need this if not instrumenting or no workers */
2049 rhaas 449 GIC 70 : if (!node->ss.ps.instrument || pcxt->nworkers == 0)
2049 rhaas 450 CBC 64 : return;
2049 rhaas 451 ECB :
2049 rhaas 452 GIC 6 : size = offsetof(SharedSortInfo, sinstrument)
2049 rhaas 453 CBC 6 : + pcxt->nworkers * sizeof(TuplesortInstrumentation);
454 6 : node->shared_info = shm_toc_allocate(pcxt->toc, size);
2049 rhaas 455 ECB : /* ensure any unfilled slots will contain zeroes */
2049 rhaas 456 GIC 6 : memset(node->shared_info, 0, size);
2049 rhaas 457 CBC 6 : node->shared_info->num_workers = pcxt->nworkers;
458 6 : shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id,
459 6 : node->shared_info);
2049 rhaas 460 ECB : }
461 :
462 : /* ----------------------------------------------------------------
463 : * ExecSortInitializeWorker
464 : *
465 : * Attach worker to DSM space for sort statistics.
466 : * ----------------------------------------------------------------
467 : */
468 : void
1970 andres 469 GIC 214 : ExecSortInitializeWorker(SortState *node, ParallelWorkerContext *pwcxt)
2049 rhaas 470 ECB : {
2049 rhaas 471 GIC 214 : node->shared_info =
1970 andres 472 CBC 214 : shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, true);
2049 rhaas 473 214 : node->am_worker = true;
474 214 : }
2049 rhaas 475 ECB :
476 : /* ----------------------------------------------------------------
477 : * ExecSortRetrieveInstrumentation
478 : *
479 : * Transfer sort statistics from DSM to private memory.
480 : * ----------------------------------------------------------------
481 : */
482 : void
2049 rhaas 483 GIC 6 : ExecSortRetrieveInstrumentation(SortState *node)
2049 rhaas 484 ECB : {
485 : Size size;
486 : SharedSortInfo *si;
487 :
2049 rhaas 488 GIC 6 : if (node->shared_info == NULL)
2049 rhaas 489 LBC 0 : return;
2049 rhaas 490 EUB :
2049 rhaas 491 GIC 6 : size = offsetof(SharedSortInfo, sinstrument)
2049 rhaas 492 CBC 6 : + node->shared_info->num_workers * sizeof(TuplesortInstrumentation);
493 6 : si = palloc(size);
494 6 : memcpy(si, node->shared_info, size);
495 6 : node->shared_info = si;
2049 rhaas 496 ECB : }
|