Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * snapmgr.c
4 : * PostgreSQL snapshot manager
5 : *
6 : * We keep track of snapshots in two ways: those "registered" by resowner.c,
7 : * and the "active snapshot" stack. All snapshots in either of them live in
8 : * persistent memory. When a snapshot is no longer in any of these lists
9 : * (tracked by separate refcounts on each snapshot), its memory can be freed.
10 : *
11 : * The FirstXactSnapshot, if any, is treated a bit specially: we increment its
12 : * regd_count and list it in RegisteredSnapshots, but this reference is not
13 : * tracked by a resource owner. We used to use the TopTransactionResourceOwner
14 : * to track this snapshot reference, but that introduces logical circularity
15 : * and thus makes it impossible to clean up in a sane fashion. It's better to
16 : * handle this reference as an internally-tracked registration, so that this
17 : * module is entirely lower-level than ResourceOwners.
18 : *
19 : * Likewise, any snapshots that have been exported by pg_export_snapshot
20 : * have regd_count = 1 and are listed in RegisteredSnapshots, but are not
21 : * tracked by any resource owner.
22 : *
23 : * Likewise, the CatalogSnapshot is listed in RegisteredSnapshots when it
24 : * is valid, but is not tracked by any resource owner.
25 : *
26 : * The same is true for historic snapshots used during logical decoding,
27 : * their lifetime is managed separately (as they live longer than one xact.c
28 : * transaction).
29 : *
30 : * These arrangements let us reset MyProc->xmin when there are no snapshots
31 : * referenced by this transaction, and advance it when the one with oldest
32 : * Xmin is no longer referenced. For simplicity however, only registered
33 : * snapshots not active snapshots participate in tracking which one is oldest;
34 : * we don't try to change MyProc->xmin except when the active-snapshot
35 : * stack is empty.
36 : *
37 : *
38 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
39 : * Portions Copyright (c) 1994, Regents of the University of California
40 : *
41 : * IDENTIFICATION
42 : * src/backend/utils/time/snapmgr.c
43 : *
44 : *-------------------------------------------------------------------------
45 : */
46 : #include "postgres.h"
47 :
48 : #include <sys/stat.h>
49 : #include <unistd.h>
50 :
51 : #include "access/subtrans.h"
52 : #include "access/transam.h"
53 : #include "access/xact.h"
54 : #include "access/xlog.h"
55 : #include "catalog/catalog.h"
56 : #include "datatype/timestamp.h"
57 : #include "lib/pairingheap.h"
58 : #include "miscadmin.h"
59 : #include "port/pg_lfind.h"
60 : #include "storage/predicate.h"
61 : #include "storage/proc.h"
62 : #include "storage/procarray.h"
63 : #include "storage/sinval.h"
64 : #include "storage/sinvaladt.h"
65 : #include "storage/spin.h"
66 : #include "utils/builtins.h"
67 : #include "utils/memutils.h"
68 : #include "utils/old_snapshot.h"
69 : #include "utils/rel.h"
70 : #include "utils/resowner_private.h"
71 : #include "utils/snapmgr.h"
72 : #include "utils/syscache.h"
73 : #include "utils/timestamp.h"
74 :
75 :
76 : /*
77 : * GUC parameters
78 : */
79 : int old_snapshot_threshold; /* number of minutes, -1 disables */
80 :
81 : volatile OldSnapshotControlData *oldSnapshotControl;
82 :
83 :
84 : /*
85 : * CurrentSnapshot points to the only snapshot taken in transaction-snapshot
86 : * mode, and to the latest one taken in a read-committed transaction.
87 : * SecondarySnapshot is a snapshot that's always up-to-date as of the current
88 : * instant, even in transaction-snapshot mode. It should only be used for
89 : * special-purpose code (say, RI checking.) CatalogSnapshot points to an
90 : * MVCC snapshot intended to be used for catalog scans; we must invalidate it
91 : * whenever a system catalog change occurs.
92 : *
93 : * These SnapshotData structs are static to simplify memory allocation
94 : * (see the hack in GetSnapshotData to avoid repeated malloc/free).
95 : */
96 : static SnapshotData CurrentSnapshotData = {SNAPSHOT_MVCC};
97 : static SnapshotData SecondarySnapshotData = {SNAPSHOT_MVCC};
98 : SnapshotData CatalogSnapshotData = {SNAPSHOT_MVCC};
99 : SnapshotData SnapshotSelfData = {SNAPSHOT_SELF};
100 : SnapshotData SnapshotAnyData = {SNAPSHOT_ANY};
101 :
102 : /* Pointers to valid snapshots */
103 : static Snapshot CurrentSnapshot = NULL;
104 : static Snapshot SecondarySnapshot = NULL;
105 : static Snapshot CatalogSnapshot = NULL;
106 : static Snapshot HistoricSnapshot = NULL;
107 :
108 : /*
109 : * These are updated by GetSnapshotData. We initialize them this way
110 : * for the convenience of TransactionIdIsInProgress: even in bootstrap
111 : * mode, we don't want it to say that BootstrapTransactionId is in progress.
112 : */
113 : TransactionId TransactionXmin = FirstNormalTransactionId;
114 : TransactionId RecentXmin = FirstNormalTransactionId;
115 :
116 : /* (table, ctid) => (cmin, cmax) mapping during timetravel */
117 : static HTAB *tuplecid_data = NULL;
118 :
119 : /*
120 : * Elements of the active snapshot stack.
121 : *
122 : * Each element here accounts for exactly one active_count on SnapshotData.
123 : *
124 : * NB: the code assumes that elements in this list are in non-increasing
125 : * order of as_level; also, the list must be NULL-terminated.
126 : */
127 : typedef struct ActiveSnapshotElt
128 : {
129 : Snapshot as_snap;
130 : int as_level;
131 : struct ActiveSnapshotElt *as_next;
132 : } ActiveSnapshotElt;
133 :
134 : /* Top of the stack of active snapshots */
135 : static ActiveSnapshotElt *ActiveSnapshot = NULL;
136 :
137 : /* Bottom of the stack of active snapshots */
138 : static ActiveSnapshotElt *OldestActiveSnapshot = NULL;
139 :
140 : /*
141 : * Currently registered Snapshots. Ordered in a heap by xmin, so that we can
142 : * quickly find the one with lowest xmin, to advance our MyProc->xmin.
143 : */
144 : static int xmin_cmp(const pairingheap_node *a, const pairingheap_node *b,
145 : void *arg);
146 :
147 : static pairingheap RegisteredSnapshots = {&xmin_cmp, NULL, NULL};
148 :
149 : /* first GetTransactionSnapshot call in a transaction? */
150 : bool FirstSnapshotSet = false;
151 :
152 : /*
153 : * Remember the serializable transaction snapshot, if any. We cannot trust
154 : * FirstSnapshotSet in combination with IsolationUsesXactSnapshot(), because
155 : * GUC may be reset before us, changing the value of IsolationUsesXactSnapshot.
156 : */
157 : static Snapshot FirstXactSnapshot = NULL;
158 :
159 : /* Define pathname of exported-snapshot files */
160 : #define SNAPSHOT_EXPORT_DIR "pg_snapshots"
161 :
162 : /* Structure holding info about exported snapshot. */
163 : typedef struct ExportedSnapshot
164 : {
165 : char *snapfile;
166 : Snapshot snapshot;
167 : } ExportedSnapshot;
168 :
169 : /* Current xact's exported snapshots (a list of ExportedSnapshot structs) */
170 : static List *exportedSnapshots = NIL;
171 :
172 : /* Prototypes for local functions */
173 : static TimestampTz AlignTimestampToMinuteBoundary(TimestampTz ts);
174 : static Snapshot CopySnapshot(Snapshot snapshot);
175 : static void FreeSnapshot(Snapshot snapshot);
176 : static void SnapshotResetXmin(void);
177 :
178 : /*
179 : * Snapshot fields to be serialized.
180 : *
181 : * Only these fields need to be sent to the cooperating backend; the
182 : * remaining ones can (and must) be set by the receiver upon restore.
183 : */
184 : typedef struct SerializedSnapshotData
185 : {
186 : TransactionId xmin;
187 : TransactionId xmax;
188 : uint32 xcnt;
189 : int32 subxcnt;
190 : bool suboverflowed;
191 : bool takenDuringRecovery;
192 : CommandId curcid;
193 : TimestampTz whenTaken;
194 : XLogRecPtr lsn;
195 : } SerializedSnapshotData;
196 :
197 : Size
2557 kgrittn 198 GIC 4564 : SnapMgrShmemSize(void)
2557 kgrittn 199 ECB : {
200 : Size size;
201 :
2557 kgrittn 202 GIC 4564 : size = offsetof(OldSnapshotControlData, xid_by_minute);
2557 kgrittn 203 CBC 4564 : if (old_snapshot_threshold > 0)
2557 kgrittn 204 LBC 0 : size = add_size(size, mul_size(sizeof(TransactionId),
2536 kgrittn 205 UBC 0 : OLD_SNAPSHOT_TIME_MAP_ENTRIES));
2557 kgrittn 206 EUB :
2557 kgrittn 207 GIC 4564 : return size;
2557 kgrittn 208 ECB : }
209 :
210 : /*
211 : * Initialize for managing old snapshot detection.
212 : */
213 : void
2557 kgrittn 214 GIC 1826 : SnapMgrInit(void)
2557 kgrittn 215 ECB : {
216 : bool found;
217 :
218 : /*
219 : * Create or attach to the OldSnapshotControlData structure.
220 : */
2554 kgrittn 221 GIC 1826 : oldSnapshotControl = (volatile OldSnapshotControlData *)
2557 kgrittn 222 CBC 1826 : ShmemInitStruct("OldSnapshotControlData",
2557 kgrittn 223 ECB : SnapMgrShmemSize(), &found);
224 :
2557 kgrittn 225 GIC 1826 : if (!found)
2557 kgrittn 226 ECB : {
2557 kgrittn 227 GIC 1826 : SpinLockInit(&oldSnapshotControl->mutex_current);
2557 kgrittn 228 CBC 1826 : oldSnapshotControl->current_timestamp = 0;
229 1826 : SpinLockInit(&oldSnapshotControl->mutex_latest_xmin);
230 1826 : oldSnapshotControl->latest_xmin = InvalidTransactionId;
2529 231 1826 : oldSnapshotControl->next_map_update = 0;
2557 232 1826 : SpinLockInit(&oldSnapshotControl->mutex_threshold);
233 1826 : oldSnapshotControl->threshold_timestamp = 0;
234 1826 : oldSnapshotControl->threshold_xid = InvalidTransactionId;
235 1826 : oldSnapshotControl->head_offset = 0;
236 1826 : oldSnapshotControl->head_timestamp = 0;
237 1826 : oldSnapshotControl->count_used = 0;
2557 kgrittn 238 ECB : }
2557 kgrittn 239 GIC 1826 : }
2557 kgrittn 240 ECB :
241 : /*
242 : * GetTransactionSnapshot
243 : * Get the appropriate snapshot for a new query in a transaction.
244 : *
245 : * Note that the return value may point at static storage that will be modified
246 : * by future calls and by CommandCounterIncrement(). Callers should call
247 : * RegisterSnapshot or PushActiveSnapshot on the returned snap if it is to be
248 : * used very long.
249 : */
250 : Snapshot
5492 alvherre 251 GIC 1028842 : GetTransactionSnapshot(void)
5492 alvherre 252 ECB : {
253 : /*
254 : * Return historic snapshot if doing logical decoding. We'll never need a
255 : * non-historic transaction snapshot in this (sub-)transaction, so there's
256 : * no need to be careful to set one up for later calls to
257 : * GetTransactionSnapshot().
258 : */
3324 rhaas 259 GIC 1028842 : if (HistoricSnapshotActive())
3324 rhaas 260 ECB : {
3324 rhaas 261 UIC 0 : Assert(!FirstSnapshotSet);
3324 rhaas 262 UBC 0 : return HistoricSnapshot;
3324 rhaas 263 EUB : }
264 :
265 : /* First call in transaction? */
5445 alvherre 266 GIC 1028842 : if (!FirstSnapshotSet)
5492 alvherre 267 ECB : {
268 : /*
269 : * Don't allow catalog snapshot to be older than xact snapshot. Must
270 : * do this first to allow the empty-heap Assert to succeed.
271 : */
2336 tgl 272 GIC 463782 : InvalidateCatalogSnapshot();
2336 tgl 273 ECB :
3004 heikki.linnakangas 274 GIC 463782 : Assert(pairingheap_is_empty(&RegisteredSnapshots));
4213 tgl 275 CBC 463782 : Assert(FirstXactSnapshot == NULL);
5248 alvherre 276 ECB :
2901 rhaas 277 GIC 463782 : if (IsInParallelMode())
2901 rhaas 278 LBC 0 : elog(ERROR,
2901 rhaas 279 EUB : "cannot take query snapshot during a parallel operation");
280 :
281 : /*
282 : * In transaction-snapshot mode, the first snapshot must live until
283 : * end of xact regardless of what the caller does with it, so we must
284 : * make a copy of it rather than returning CurrentSnapshotData
285 : * directly. Furthermore, if we're running in serializable mode,
286 : * predicate.c needs to wrap the snapshot fetch in its own processing.
287 : */
4593 mail 288 GIC 463782 : if (IsolationUsesXactSnapshot())
5445 alvherre 289 ECB : {
290 : /* First, create the snapshot in CurrentSnapshotData */
4444 heikki.linnakangas 291 GIC 2601 : if (IsolationIsSerializable())
4213 tgl 292 CBC 1640 : CurrentSnapshot = GetSerializableTransactionSnapshot(&CurrentSnapshotData);
4444 heikki.linnakangas 293 ECB : else
4444 heikki.linnakangas 294 GIC 961 : CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
4213 tgl 295 ECB : /* Make a saved copy */
4213 tgl 296 GIC 2601 : CurrentSnapshot = CopySnapshot(CurrentSnapshot);
4213 tgl 297 CBC 2601 : FirstXactSnapshot = CurrentSnapshot;
4213 tgl 298 ECB : /* Mark it as "registered" in FirstXactSnapshot */
4213 tgl 299 GIC 2601 : FirstXactSnapshot->regd_count++;
3004 heikki.linnakangas 300 CBC 2601 : pairingheap_add(&RegisteredSnapshots, &FirstXactSnapshot->ph_node);
5445 alvherre 301 ECB : }
302 : else
4444 heikki.linnakangas 303 GIC 461181 : CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
5445 alvherre 304 ECB :
4444 heikki.linnakangas 305 GIC 463782 : FirstSnapshotSet = true;
5445 alvherre 306 CBC 463782 : return CurrentSnapshot;
5492 alvherre 307 ECB : }
308 :
4593 mail 309 GIC 565060 : if (IsolationUsesXactSnapshot())
5445 alvherre 310 CBC 88090 : return CurrentSnapshot;
5492 alvherre 311 ECB :
312 : /* Don't allow catalog snapshot to be older than xact snapshot. */
2336 tgl 313 GIC 476970 : InvalidateCatalogSnapshot();
3568 rhaas 314 ECB :
5445 alvherre 315 GIC 476970 : CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
5492 alvherre 316 ECB :
5445 alvherre 317 GIC 476970 : return CurrentSnapshot;
5492 alvherre 318 ECB : }
319 :
320 : /*
321 : * GetLatestSnapshot
322 : * Get a snapshot that is up-to-date as of the current instant,
323 : * even if we are executing in transaction-snapshot mode.
324 : */
325 : Snapshot
5492 alvherre 326 GIC 147495 : GetLatestSnapshot(void)
5492 alvherre 327 ECB : {
328 : /*
329 : * We might be able to relax this, but nothing that could otherwise work
330 : * needs it.
331 : */
2901 rhaas 332 GIC 147495 : if (IsInParallelMode())
2901 rhaas 333 LBC 0 : elog(ERROR,
2901 rhaas 334 EUB : "cannot update SecondarySnapshot during a parallel operation");
335 :
336 : /*
337 : * So far there are no cases requiring support for GetLatestSnapshot()
338 : * during logical decoding, but it wouldn't be hard to add if required.
339 : */
3324 rhaas 340 GIC 147495 : Assert(!HistoricSnapshotActive());
3324 rhaas 341 ECB :
342 : /* If first call in transaction, go ahead and set the xact snapshot */
5445 alvherre 343 GIC 147495 : if (!FirstSnapshotSet)
3934 tgl 344 CBC 50 : return GetTransactionSnapshot();
5492 alvherre 345 ECB :
5445 alvherre 346 GIC 147445 : SecondarySnapshot = GetSnapshotData(&SecondarySnapshotData);
5492 alvherre 347 ECB :
5445 alvherre 348 GIC 147445 : return SecondarySnapshot;
5445 alvherre 349 ECB : }
350 :
351 : /*
352 : * GetOldestSnapshot
353 : *
354 : * Get the transaction's oldest known snapshot, as judged by the LSN.
355 : * Will return NULL if there are no active or registered snapshots.
356 : */
357 : Snapshot
2440 rhaas 358 GIC 51113 : GetOldestSnapshot(void)
2440 rhaas 359 ECB : {
2440 rhaas 360 GIC 51113 : Snapshot OldestRegisteredSnapshot = NULL;
2440 rhaas 361 CBC 51113 : XLogRecPtr RegisteredLSN = InvalidXLogRecPtr;
2440 rhaas 362 ECB :
2440 rhaas 363 GIC 51113 : if (!pairingheap_is_empty(&RegisteredSnapshots))
2440 rhaas 364 ECB : {
2440 rhaas 365 GIC 50978 : OldestRegisteredSnapshot = pairingheap_container(SnapshotData, ph_node,
2118 tgl 366 ECB : pairingheap_first(&RegisteredSnapshots));
2440 rhaas 367 GIC 50978 : RegisteredLSN = OldestRegisteredSnapshot->lsn;
2440 rhaas 368 ECB : }
369 :
2440 rhaas 370 GIC 51113 : if (OldestActiveSnapshot != NULL)
2436 tgl 371 ECB : {
2436 tgl 372 GIC 51110 : XLogRecPtr ActiveLSN = OldestActiveSnapshot->as_snap->lsn;
2440 rhaas 373 ECB :
2436 tgl 374 GIC 51110 : if (XLogRecPtrIsInvalid(RegisteredLSN) || RegisteredLSN > ActiveLSN)
2436 tgl 375 CBC 51110 : return OldestActiveSnapshot->as_snap;
2436 tgl 376 ECB : }
377 :
2440 rhaas 378 GIC 3 : return OldestRegisteredSnapshot;
2440 rhaas 379 ECB : }
380 :
381 : /*
382 : * GetCatalogSnapshot
383 : * Get a snapshot that is sufficiently up-to-date for scan of the
384 : * system catalog with the specified OID.
385 : */
386 : Snapshot
3568 rhaas 387 GIC 8329744 : GetCatalogSnapshot(Oid relid)
3324 rhaas 388 ECB : {
389 : /*
390 : * Return historic snapshot while we're doing logical decoding, so we can
391 : * see the appropriate state of the catalog.
392 : *
393 : * This is the primary reason for needing to reset the system caches after
394 : * finishing decoding.
395 : */
3324 rhaas 396 GIC 8329744 : if (HistoricSnapshotActive())
3324 rhaas 397 CBC 11718 : return HistoricSnapshot;
3324 rhaas 398 ECB :
3324 rhaas 399 GIC 8318026 : return GetNonHistoricCatalogSnapshot(relid);
3324 rhaas 400 ECB : }
401 :
402 : /*
403 : * GetNonHistoricCatalogSnapshot
404 : * Get a snapshot that is sufficiently up-to-date for scan of the system
405 : * catalog with the specified OID, even while historic snapshots are set
406 : * up.
407 : */
408 : Snapshot
3324 rhaas 409 GIC 8319334 : GetNonHistoricCatalogSnapshot(Oid relid)
3568 rhaas 410 ECB : {
411 : /*
412 : * If the caller is trying to scan a relation that has no syscache, no
413 : * catcache invalidations will be sent when it is updated. For a few key
414 : * relations, snapshot invalidations are sent instead. If we're trying to
415 : * scan a relation for which neither catcache nor snapshot invalidations
416 : * are sent, we must refresh the snapshot every time.
417 : */
2336 tgl 418 GIC 8319334 : if (CatalogSnapshot &&
2336 tgl 419 CBC 6998380 : !RelationInvalidatesSnapshotsOnly(relid) &&
3568 rhaas 420 6271010 : !RelationHasSysCache(relid))
2336 tgl 421 189473 : InvalidateCatalogSnapshot();
3568 rhaas 422 ECB :
2336 tgl 423 GIC 8319334 : if (CatalogSnapshot == NULL)
3568 rhaas 424 ECB : {
425 : /* Get new snapshot. */
3568 rhaas 426 GIC 1510427 : CatalogSnapshot = GetSnapshotData(&CatalogSnapshotData);
3568 rhaas 427 ECB :
428 : /*
429 : * Make sure the catalog snapshot will be accounted for in decisions
430 : * about advancing PGPROC->xmin. We could apply RegisterSnapshot, but
431 : * that would result in making a physical copy, which is overkill; and
432 : * it would also create a dependency on some resource owner, which we
433 : * do not want for reasons explained at the head of this file. Instead
434 : * just shove the CatalogSnapshot into the pairing heap manually. This
435 : * has to be reversed in InvalidateCatalogSnapshot, of course.
436 : *
437 : * NB: it had better be impossible for this to throw error, since the
438 : * CatalogSnapshot pointer is already valid.
439 : */
2336 tgl 440 GIC 1510427 : pairingheap_add(&RegisteredSnapshots, &CatalogSnapshot->ph_node);
3568 rhaas 441 ECB : }
442 :
3568 rhaas 443 GIC 8319334 : return CatalogSnapshot;
3568 rhaas 444 ECB : }
445 :
446 : /*
447 : * InvalidateCatalogSnapshot
448 : * Mark the current catalog snapshot, if any, as invalid
449 : *
450 : * We could change this API to allow the caller to provide more fine-grained
451 : * invalidation details, so that a change to relation A wouldn't prevent us
452 : * from using our cached snapshot to scan relation B, but so far there's no
453 : * evidence that the CPU cycles we spent tracking such fine details would be
454 : * well-spent.
455 : */
456 : void
2794 andres 457 GIC 17422509 : InvalidateCatalogSnapshot(void)
3568 rhaas 458 ECB : {
2336 tgl 459 GIC 17422509 : if (CatalogSnapshot)
2336 tgl 460 ECB : {
2336 tgl 461 GIC 1510427 : pairingheap_remove(&RegisteredSnapshots, &CatalogSnapshot->ph_node);
2336 tgl 462 CBC 1510427 : CatalogSnapshot = NULL;
463 1510427 : SnapshotResetXmin();
2336 tgl 464 ECB : }
2336 tgl 465 GIC 17422509 : }
2336 tgl 466 ECB :
467 : /*
468 : * InvalidateCatalogSnapshotConditionally
469 : * Drop catalog snapshot if it's the only one we have
470 : *
471 : * This is called when we are about to wait for client input, so we don't
472 : * want to continue holding the catalog snapshot if it might mean that the
473 : * global xmin horizon can't advance. However, if there are other snapshots
474 : * still active or registered, the catalog snapshot isn't likely to be the
475 : * oldest one, so we might as well keep it.
476 : */
477 : void
2336 tgl 478 GIC 498735 : InvalidateCatalogSnapshotConditionally(void)
2336 tgl 479 ECB : {
2336 tgl 480 GIC 498735 : if (CatalogSnapshot &&
2336 tgl 481 CBC 36715 : ActiveSnapshot == NULL &&
482 35917 : pairingheap_is_singular(&RegisteredSnapshots))
483 5027 : InvalidateCatalogSnapshot();
3568 rhaas 484 498735 : }
3568 rhaas 485 ECB :
486 : /*
487 : * SnapshotSetCommandId
488 : * Propagate CommandCounterIncrement into the static snapshots, if set
489 : */
490 : void
5445 alvherre 491 GIC 1206680 : SnapshotSetCommandId(CommandId curcid)
5445 alvherre 492 ECB : {
5445 alvherre 493 GIC 1206680 : if (!FirstSnapshotSet)
5445 alvherre 494 CBC 60589 : return;
5445 alvherre 495 ECB :
5445 alvherre 496 GIC 1146091 : if (CurrentSnapshot)
5445 alvherre 497 CBC 1146091 : CurrentSnapshot->curcid = curcid;
498 1146091 : if (SecondarySnapshot)
499 75962 : SecondarySnapshot->curcid = curcid;
2336 tgl 500 ECB : /* Should we do the same with CatalogSnapshot? */
501 : }
502 :
503 : /*
504 : * SetTransactionSnapshot
505 : * Set the transaction's snapshot from an imported MVCC snapshot.
506 : *
507 : * Note that this is very closely tied to GetTransactionSnapshot --- it
508 : * must take care of all the same considerations as the first-snapshot case
509 : * in GetTransactionSnapshot.
510 : */
511 : static void
2125 andres 512 GIC 1471 : SetTransactionSnapshot(Snapshot sourcesnap, VirtualTransactionId *sourcevxid,
2125 andres 513 ECB : int sourcepid, PGPROC *sourceproc)
514 : {
515 : /* Caller should have checked this already */
4187 tgl 516 GIC 1471 : Assert(!FirstSnapshotSet);
4187 tgl 517 ECB :
518 : /* Better do this to ensure following Assert succeeds. */
2336 tgl 519 GIC 1471 : InvalidateCatalogSnapshot();
2336 tgl 520 ECB :
3004 heikki.linnakangas 521 GIC 1471 : Assert(pairingheap_is_empty(&RegisteredSnapshots));
4187 tgl 522 CBC 1471 : Assert(FirstXactSnapshot == NULL);
3315 rhaas 523 1471 : Assert(!HistoricSnapshotActive());
4187 tgl 524 ECB :
525 : /*
526 : * Even though we are not going to use the snapshot it computes, we must
527 : * call GetSnapshotData, for two reasons: (1) to be sure that
528 : * CurrentSnapshotData's XID arrays have been allocated, and (2) to update
529 : * the state for GlobalVis*.
530 : */
4187 tgl 531 GIC 1471 : CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
4187 tgl 532 ECB :
533 : /*
534 : * Now copy appropriate fields from the source snapshot.
535 : */
4187 tgl 536 GIC 1471 : CurrentSnapshot->xmin = sourcesnap->xmin;
4187 tgl 537 CBC 1471 : CurrentSnapshot->xmax = sourcesnap->xmax;
538 1471 : CurrentSnapshot->xcnt = sourcesnap->xcnt;
539 1471 : Assert(sourcesnap->xcnt <= GetMaxSnapshotXidCount());
402 540 1471 : if (sourcesnap->xcnt > 0)
541 247 : memcpy(CurrentSnapshot->xip, sourcesnap->xip,
542 247 : sourcesnap->xcnt * sizeof(TransactionId));
4187 543 1471 : CurrentSnapshot->subxcnt = sourcesnap->subxcnt;
544 1471 : Assert(sourcesnap->subxcnt <= GetMaxSnapshotSubxidCount());
402 545 1471 : if (sourcesnap->subxcnt > 0)
402 tgl 546 LBC 0 : memcpy(CurrentSnapshot->subxip, sourcesnap->subxip,
402 tgl 547 UBC 0 : sourcesnap->subxcnt * sizeof(TransactionId));
4187 tgl 548 GBC 1471 : CurrentSnapshot->suboverflowed = sourcesnap->suboverflowed;
4187 tgl 549 CBC 1471 : CurrentSnapshot->takenDuringRecovery = sourcesnap->takenDuringRecovery;
4187 tgl 550 ECB : /* NB: curcid should NOT be copied, it's a local matter */
551 :
965 andres 552 GIC 1471 : CurrentSnapshot->snapXactCompletionCount = 0;
965 andres 553 ECB :
554 : /*
555 : * Now we have to fix what GetSnapshotData did with MyProc->xmin and
556 : * TransactionXmin. There is a race condition: to make sure we are not
557 : * causing the global xmin to go backwards, we have to test that the
558 : * source transaction is still running, and that has to be done
559 : * atomically. So let procarray.c do it.
560 : *
561 : * Note: in serializable mode, predicate.c will do this a second time. It
562 : * doesn't seem worth contorting the logic here to avoid two calls,
563 : * especially since it's not clear that predicate.c *must* do this.
564 : */
2901 rhaas 565 GIC 1471 : if (sourceproc != NULL)
2901 rhaas 566 ECB : {
2901 rhaas 567 GIC 1453 : if (!ProcArrayInstallRestoredXmin(CurrentSnapshot->xmin, sourceproc))
2901 rhaas 568 LBC 0 : ereport(ERROR,
2901 rhaas 569 EUB : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
570 : errmsg("could not import the requested snapshot"),
571 : errdetail("The source transaction is not running anymore.")));
572 : }
2125 andres 573 GIC 18 : else if (!ProcArrayInstallImportedXmin(CurrentSnapshot->xmin, sourcevxid))
4187 tgl 574 LBC 0 : ereport(ERROR,
4187 tgl 575 EUB : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
576 : errmsg("could not import the requested snapshot"),
577 : errdetail("The source process with PID %d is not running anymore.",
578 : sourcepid)));
579 :
580 : /*
581 : * In transaction-snapshot mode, the first snapshot must live until end of
582 : * xact, so we must make a copy of it. Furthermore, if we're running in
583 : * serializable mode, predicate.c needs to do its own processing.
584 : */
4187 tgl 585 GIC 1471 : if (IsolationUsesXactSnapshot())
4187 tgl 586 ECB : {
4187 tgl 587 GIC 193 : if (IsolationIsSerializable())
2125 andres 588 CBC 13 : SetSerializableTransactionSnapshot(CurrentSnapshot, sourcevxid,
2125 andres 589 ECB : sourcepid);
590 : /* Make a saved copy */
4187 tgl 591 GIC 193 : CurrentSnapshot = CopySnapshot(CurrentSnapshot);
4187 tgl 592 CBC 193 : FirstXactSnapshot = CurrentSnapshot;
4187 tgl 593 ECB : /* Mark it as "registered" in FirstXactSnapshot */
4187 tgl 594 GIC 193 : FirstXactSnapshot->regd_count++;
3004 heikki.linnakangas 595 CBC 193 : pairingheap_add(&RegisteredSnapshots, &FirstXactSnapshot->ph_node);
4187 tgl 596 ECB : }
597 :
4187 tgl 598 GIC 1471 : FirstSnapshotSet = true;
4187 tgl 599 CBC 1471 : }
4187 tgl 600 ECB :
601 : /*
602 : * CopySnapshot
603 : * Copy the given snapshot.
604 : *
605 : * The copy is palloc'd in TopTransactionContext and has initial refcounts set
606 : * to 0. The returned snapshot has the copied flag set.
607 : */
608 : static Snapshot
5492 alvherre 609 GIC 9332950 : CopySnapshot(Snapshot snapshot)
5492 alvherre 610 ECB : {
611 : Snapshot newsnap;
612 : Size subxipoff;
613 : Size size;
614 :
5445 alvherre 615 GIC 9332950 : Assert(snapshot != InvalidSnapshot);
5445 alvherre 616 ECB :
617 : /* We allocate any XID arrays needed in the same palloc block. */
5492 alvherre 618 GIC 9332950 : size = subxipoff = sizeof(SnapshotData) +
5492 alvherre 619 CBC 9332950 : snapshot->xcnt * sizeof(TransactionId);
620 9332950 : if (snapshot->subxcnt > 0)
621 59854 : size += snapshot->subxcnt * sizeof(TransactionId);
5492 alvherre 622 ECB :
5445 alvherre 623 GIC 9332950 : newsnap = (Snapshot) MemoryContextAlloc(TopTransactionContext, size);
5492 alvherre 624 CBC 9332950 : memcpy(newsnap, snapshot, sizeof(SnapshotData));
5492 alvherre 625 ECB :
5445 alvherre 626 GIC 9332950 : newsnap->regd_count = 0;
5445 alvherre 627 CBC 9332950 : newsnap->active_count = 0;
628 9332950 : newsnap->copied = true;
965 andres 629 9332950 : newsnap->snapXactCompletionCount = 0;
5445 alvherre 630 ECB :
631 : /* setup XID array */
5492 alvherre 632 GIC 9332950 : if (snapshot->xcnt > 0)
5492 alvherre 633 ECB : {
5492 alvherre 634 GIC 1144106 : newsnap->xip = (TransactionId *) (newsnap + 1);
5492 alvherre 635 CBC 1144106 : memcpy(newsnap->xip, snapshot->xip,
636 1144106 : snapshot->xcnt * sizeof(TransactionId));
5492 alvherre 637 ECB : }
638 : else
5492 alvherre 639 GIC 8188844 : newsnap->xip = NULL;
5492 alvherre 640 ECB :
641 : /*
642 : * Setup subXID array. Don't bother to copy it if it had overflowed,
643 : * though, because it's not used anywhere in that case. Except if it's a
644 : * snapshot taken during recovery; all the top-level XIDs are in subxip as
645 : * well in that case, so we mustn't lose them.
646 : */
4859 simon 647 GIC 9332950 : if (snapshot->subxcnt > 0 &&
4859 simon 648 CBC 59854 : (!snapshot->suboverflowed || snapshot->takenDuringRecovery))
5492 alvherre 649 ECB : {
5492 alvherre 650 GIC 59841 : newsnap->subxip = (TransactionId *) ((char *) newsnap + subxipoff);
5492 alvherre 651 CBC 59841 : memcpy(newsnap->subxip, snapshot->subxip,
652 59841 : snapshot->subxcnt * sizeof(TransactionId));
5492 alvherre 653 ECB : }
654 : else
5492 alvherre 655 GIC 9273109 : newsnap->subxip = NULL;
5492 alvherre 656 ECB :
5492 alvherre 657 GIC 9332950 : return newsnap;
5492 alvherre 658 ECB : }
659 :
660 : /*
661 : * FreeSnapshot
662 : * Free the memory associated with a snapshot.
663 : */
664 : static void
5445 alvherre 665 GIC 9312517 : FreeSnapshot(Snapshot snapshot)
5445 alvherre 666 ECB : {
5445 alvherre 667 GIC 9312517 : Assert(snapshot->regd_count == 0);
5445 alvherre 668 CBC 9312517 : Assert(snapshot->active_count == 0);
5385 669 9312517 : Assert(snapshot->copied);
5445 alvherre 670 ECB :
5445 alvherre 671 GIC 9312517 : pfree(snapshot);
5445 alvherre 672 CBC 9312517 : }
5445 alvherre 673 ECB :
674 : /*
675 : * PushActiveSnapshot
676 : * Set the given snapshot as the current active snapshot
677 : *
678 : * If the passed snapshot is a statically-allocated one, or it is possibly
679 : * subject to a future command counter update, create a new long-lived copy
680 : * with active refcount=1. Otherwise, only increment the refcount.
681 : */
682 : void
201 pg 683 GNC 852969 : PushActiveSnapshot(Snapshot snapshot)
555 tgl 684 ECB : {
201 pg 685 GNC 852969 : PushActiveSnapshotWithLevel(snapshot, GetCurrentTransactionNestLevel());
555 tgl 686 CBC 852969 : }
555 tgl 687 ECB :
688 : /*
689 : * PushActiveSnapshotWithLevel
690 : * Set the given snapshot as the current active snapshot
691 : *
692 : * Same as PushActiveSnapshot except that caller can specify the
693 : * transaction nesting level that "owns" the snapshot. This level
694 : * must not be deeper than the current top of the snapshot stack.
695 : */
696 : void
201 pg 697 GNC 1161869 : PushActiveSnapshotWithLevel(Snapshot snapshot, int snap_level)
5445 alvherre 698 ECB : {
699 : ActiveSnapshotElt *newactive;
700 :
201 pg 701 GNC 1161869 : Assert(snapshot != InvalidSnapshot);
555 tgl 702 CBC 1161869 : Assert(ActiveSnapshot == NULL || snap_level >= ActiveSnapshot->as_level);
5445 alvherre 703 ECB :
5445 alvherre 704 GIC 1161869 : newactive = MemoryContextAlloc(TopTransactionContext, sizeof(ActiveSnapshotElt));
4932 alvherre 705 ECB :
706 : /*
707 : * Checking SecondarySnapshot is probably useless here, but it seems
708 : * better to be sure.
709 : */
201 pg 710 GNC 1161869 : if (snapshot == CurrentSnapshot || snapshot == SecondarySnapshot ||
711 195075 : !snapshot->copied)
712 966794 : newactive->as_snap = CopySnapshot(snapshot);
4932 alvherre 713 ECB : else
201 pg 714 GNC 195075 : newactive->as_snap = snapshot;
715 :
5445 alvherre 716 CBC 1161869 : newactive->as_next = ActiveSnapshot;
555 tgl 717 GIC 1161869 : newactive->as_level = snap_level;
5445 alvherre 718 ECB :
5445 alvherre 719 CBC 1161869 : newactive->as_snap->active_count++;
720 :
721 1161869 : ActiveSnapshot = newactive;
2440 rhaas 722 GIC 1161869 : if (OldestActiveSnapshot == NULL)
2440 rhaas 723 CBC 950839 : OldestActiveSnapshot = ActiveSnapshot;
5445 alvherre 724 1161869 : }
5445 alvherre 725 ECB :
726 : /*
727 : * PushCopiedSnapshot
728 : * As above, except forcibly copy the presented snapshot.
729 : *
730 : * This should be used when the ActiveSnapshot has to be modifiable, for
731 : * example if the caller intends to call UpdateActiveSnapshotCommandId.
732 : * The new snapshot will be released when popped from the stack.
733 : */
734 : void
4423 tgl 735 GIC 62749 : PushCopiedSnapshot(Snapshot snapshot)
736 : {
4423 tgl 737 CBC 62749 : PushActiveSnapshot(CopySnapshot(snapshot));
4423 tgl 738 GIC 62749 : }
5445 alvherre 739 ECB :
4423 tgl 740 : /*
741 : * UpdateActiveSnapshotCommandId
742 : *
743 : * Update the current CID of the active snapshot. This can only be applied
744 : * to a snapshot that is not referenced elsewhere.
745 : */
746 : void
4423 tgl 747 GIC 44867 : UpdateActiveSnapshotCommandId(void)
748 : {
2878 bruce 749 ECB : CommandId save_curcid,
750 : curcid;
751 :
4423 tgl 752 GIC 44867 : Assert(ActiveSnapshot != NULL);
753 44867 : Assert(ActiveSnapshot->as_snap->active_count == 1);
4423 tgl 754 CBC 44867 : Assert(ActiveSnapshot->as_snap->regd_count == 0);
5445 alvherre 755 ECB :
2901 rhaas 756 : /*
757 : * Don't allow modification of the active snapshot during parallel
758 : * operation. We share the snapshot to worker backends at the beginning
759 : * of parallel operation, so any change to the snapshot can lead to
760 : * inconsistencies. We have other defenses against
761 : * CommandCounterIncrement, but there are a few places that call this
762 : * directly, so we put an additional guard here.
763 : */
2901 rhaas 764 GIC 44867 : save_curcid = ActiveSnapshot->as_snap->curcid;
765 44867 : curcid = GetCurrentCommandId(false);
2901 rhaas 766 CBC 44867 : if (IsInParallelMode() && save_curcid != curcid)
2901 rhaas 767 LBC 0 : elog(ERROR, "cannot modify commandid in active snapshot during a parallel operation");
2901 rhaas 768 CBC 44867 : ActiveSnapshot->as_snap->curcid = curcid;
5445 alvherre 769 GBC 44867 : }
5445 alvherre 770 ECB :
771 : /*
772 : * PopActiveSnapshot
773 : *
774 : * Remove the topmost snapshot from the active snapshot stack, decrementing the
775 : * reference count, and free it if this was the last reference.
776 : */
777 : void
5445 alvherre 778 GIC 1138069 : PopActiveSnapshot(void)
779 : {
5050 bruce 780 ECB : ActiveSnapshotElt *newstack;
781 :
5445 alvherre 782 GIC 1138069 : newstack = ActiveSnapshot->as_next;
783 :
5445 alvherre 784 CBC 1138069 : Assert(ActiveSnapshot->as_snap->active_count > 0);
785 :
786 1138069 : ActiveSnapshot->as_snap->active_count--;
787 :
788 1138069 : if (ActiveSnapshot->as_snap->active_count == 0 &&
5445 alvherre 789 GIC 1127424 : ActiveSnapshot->as_snap->regd_count == 0)
5445 alvherre 790 CBC 904777 : FreeSnapshot(ActiveSnapshot->as_snap);
5445 alvherre 791 ECB :
5445 alvherre 792 CBC 1138069 : pfree(ActiveSnapshot);
5445 alvherre 793 GIC 1138069 : ActiveSnapshot = newstack;
2440 rhaas 794 CBC 1138069 : if (ActiveSnapshot == NULL)
795 934155 : OldestActiveSnapshot = NULL;
5445 alvherre 796 ECB :
5445 alvherre 797 CBC 1138069 : SnapshotResetXmin();
5492 alvherre 798 GIC 1138069 : }
5492 alvherre 799 ECB :
800 : /*
801 : * GetActiveSnapshot
802 : * Return the topmost snapshot in the Active stack.
803 : */
804 : Snapshot
5445 alvherre 805 GIC 893513 : GetActiveSnapshot(void)
806 : {
5445 alvherre 807 CBC 893513 : Assert(ActiveSnapshot != NULL);
808 :
809 893513 : return ActiveSnapshot->as_snap;
810 : }
5445 alvherre 811 ECB :
812 : /*
813 : * ActiveSnapshotSet
814 : * Return whether there is at least one snapshot in the Active stack
815 : */
816 : bool
5445 alvherre 817 GIC 799107 : ActiveSnapshotSet(void)
818 : {
5445 alvherre 819 CBC 799107 : return ActiveSnapshot != NULL;
820 : }
5445 alvherre 821 ECB :
822 : /*
823 : * RegisterSnapshot
824 : * Register a snapshot as being in use by the current resource owner
825 : *
826 : * If InvalidSnapshot is passed, it is not registered.
827 : */
828 : Snapshot
5445 alvherre 829 GIC 9379752 : RegisterSnapshot(Snapshot snapshot)
830 : {
5239 alvherre 831 CBC 9379752 : if (snapshot == InvalidSnapshot)
5239 alvherre 832 GIC 533658 : return InvalidSnapshot;
5239 alvherre 833 ECB :
5239 alvherre 834 CBC 8846094 : return RegisterSnapshotOnOwner(snapshot, CurrentResourceOwner);
835 : }
5239 alvherre 836 ECB :
837 : /*
838 : * RegisterSnapshotOnOwner
839 : * As above, but use the specified resource owner
840 : */
841 : Snapshot
5239 alvherre 842 GIC 8846204 : RegisterSnapshotOnOwner(Snapshot snapshot, ResourceOwner owner)
843 : {
5050 bruce 844 ECB : Snapshot snap;
845 :
5445 alvherre 846 GIC 8846204 : if (snapshot == InvalidSnapshot)
5445 alvherre 847 UIC 0 : return InvalidSnapshot;
5445 alvherre 848 ECB :
5445 alvherre 849 EUB : /* Static snapshot? Create a persistent copy */
5248 alvherre 850 GIC 8846204 : snap = snapshot->copied ? snapshot : CopySnapshot(snapshot);
851 :
5248 alvherre 852 ECB : /* and tell resowner.c about it */
5239 alvherre 853 GIC 8846204 : ResourceOwnerEnlargeSnapshots(owner);
5248 854 8846204 : snap->regd_count++;
5239 alvherre 855 CBC 8846204 : ResourceOwnerRememberSnapshot(owner, snap);
5445 alvherre 856 ECB :
3004 heikki.linnakangas 857 CBC 8846204 : if (snap->regd_count == 1)
3004 heikki.linnakangas 858 GIC 8566269 : pairingheap_add(&RegisteredSnapshots, &snap->ph_node);
5445 alvherre 859 ECB :
5248 alvherre 860 CBC 8846204 : return snap;
861 : }
5445 alvherre 862 ECB :
863 : /*
864 : * UnregisterSnapshot
865 : *
866 : * Decrement the reference count of a snapshot, remove the corresponding
867 : * reference from CurrentResourceOwner, and free the snapshot if no more
868 : * references remain.
869 : */
870 : void
5445 alvherre 871 GIC 9339581 : UnregisterSnapshot(Snapshot snapshot)
872 : {
5239 alvherre 873 CBC 9339581 : if (snapshot == NULL)
5239 alvherre 874 GIC 509097 : return;
5239 alvherre 875 ECB :
5239 alvherre 876 CBC 8830484 : UnregisterSnapshotFromOwner(snapshot, CurrentResourceOwner);
877 : }
5239 alvherre 878 ECB :
879 : /*
880 : * UnregisterSnapshotFromOwner
881 : * As above, but use the specified resource owner
882 : */
883 : void
5239 alvherre 884 GIC 8846204 : UnregisterSnapshotFromOwner(Snapshot snapshot, ResourceOwner owner)
885 : {
5248 alvherre 886 CBC 8846204 : if (snapshot == NULL)
5445 alvherre 887 UIC 0 : return;
5445 alvherre 888 ECB :
5248 alvherre 889 GBC 8846204 : Assert(snapshot->regd_count > 0);
3004 heikki.linnakangas 890 GIC 8846204 : Assert(!pairingheap_is_empty(&RegisteredSnapshots));
5445 alvherre 891 ECB :
5239 alvherre 892 CBC 8846204 : ResourceOwnerForgetSnapshot(owner, snapshot);
893 :
3004 heikki.linnakangas 894 8846204 : snapshot->regd_count--;
3004 heikki.linnakangas 895 GIC 8846204 : if (snapshot->regd_count == 0)
3004 heikki.linnakangas 896 CBC 8566269 : pairingheap_remove(&RegisteredSnapshots, &snapshot->ph_node);
3004 heikki.linnakangas 897 ECB :
3004 heikki.linnakangas 898 CBC 8846204 : if (snapshot->regd_count == 0 && snapshot->active_count == 0)
899 : {
5248 alvherre 900 8404934 : FreeSnapshot(snapshot);
5248 alvherre 901 GIC 8404934 : SnapshotResetXmin();
5445 alvherre 902 ECB : }
903 : }
904 :
905 : /*
906 : * Comparison function for RegisteredSnapshots heap. Snapshots are ordered
907 : * by xmin, so that the snapshot with smallest xmin is at the top.
908 : */
909 : static int
3004 heikki.linnakangas 910 GIC 8863619 : xmin_cmp(const pairingheap_node *a, const pairingheap_node *b, void *arg)
911 : {
3004 heikki.linnakangas 912 CBC 8863619 : const SnapshotData *asnap = pairingheap_const_container(SnapshotData, ph_node, a);
3004 heikki.linnakangas 913 GIC 8863619 : const SnapshotData *bsnap = pairingheap_const_container(SnapshotData, ph_node, b);
3004 heikki.linnakangas 914 ECB :
3004 heikki.linnakangas 915 CBC 8863619 : if (TransactionIdPrecedes(asnap->xmin, bsnap->xmin))
3004 heikki.linnakangas 916 GIC 47124 : return 1;
3004 heikki.linnakangas 917 CBC 8816495 : else if (TransactionIdFollows(asnap->xmin, bsnap->xmin))
918 7995 : return -1;
3004 heikki.linnakangas 919 ECB : else
3004 heikki.linnakangas 920 CBC 8808500 : return 0;
921 : }
3004 heikki.linnakangas 922 ECB :
923 : /*
924 : * SnapshotResetXmin
925 : *
926 : * If there are no more snapshots, we can reset our PGPROC->xmin to
927 : * InvalidTransactionId. Note we can do this without locking because we assume
928 : * that storing an Xid is atomic.
929 : *
930 : * Even if there are some remaining snapshots, we may be able to advance our
931 : * PGPROC->xmin to some degree. This typically happens when a portal is
932 : * dropped. For efficiency, we only consider recomputing PGPROC->xmin when
933 : * the active snapshot stack is empty; this allows us not to need to track
934 : * which active snapshot is oldest.
935 : *
936 : * Note: it's tempting to use GetOldestSnapshot() here so that we can include
937 : * active snapshots in the calculation. However, that compares by LSN not
938 : * xmin so it's not entirely clear that it's the same thing. Also, we'd be
939 : * critically dependent on the assumption that the bottommost active snapshot
940 : * stack entry has the oldest xmin. (Current uses of GetOldestSnapshot() are
941 : * not actually critical, but this would be.)
942 : */
943 : static void
5445 alvherre 944 GIC 11078445 : SnapshotResetXmin(void)
945 : {
2878 bruce 946 ECB : Snapshot minSnapshot;
947 :
3004 heikki.linnakangas 948 GIC 11078445 : if (ActiveSnapshot != NULL)
949 8301199 : return;
3004 heikki.linnakangas 950 ECB :
3004 heikki.linnakangas 951 CBC 2777246 : if (pairingheap_is_empty(&RegisteredSnapshots))
952 : {
969 andres 953 938854 : MyProc->xmin = InvalidTransactionId;
3004 heikki.linnakangas 954 GIC 938854 : return;
3004 heikki.linnakangas 955 ECB : }
956 :
3004 heikki.linnakangas 957 GIC 1838392 : minSnapshot = pairingheap_container(SnapshotData, ph_node,
958 : pairingheap_first(&RegisteredSnapshots));
3004 heikki.linnakangas 959 ECB :
969 andres 960 GIC 1838392 : if (TransactionIdPrecedes(MyProc->xmin, minSnapshot->xmin))
961 3038 : MyProc->xmin = minSnapshot->xmin;
5445 alvherre 962 ECB : }
963 :
964 : /*
965 : * AtSubCommit_Snapshot
966 : */
967 : void
5445 alvherre 968 GIC 4332 : AtSubCommit_Snapshot(int level)
969 : {
5050 bruce 970 ECB : ActiveSnapshotElt *active;
971 :
972 : /*
973 : * Relabel the active snapshots set in this subtransaction as though they
974 : * are owned by the parent subxact.
975 : */
5445 alvherre 976 GIC 4332 : for (active = ActiveSnapshot; active != NULL; active = active->as_next)
977 : {
5445 alvherre 978 CBC 3490 : if (active->as_level < level)
5445 alvherre 979 GIC 3490 : break;
5445 alvherre 980 LBC 0 : active->as_level = level - 1;
5445 alvherre 981 ECB : }
5445 alvherre 982 GBC 4332 : }
983 :
5445 alvherre 984 ECB : /*
985 : * AtSubAbort_Snapshot
986 : * Clean up snapshots after a subtransaction abort
987 : */
988 : void
5445 alvherre 989 GIC 4483 : AtSubAbort_Snapshot(int level)
990 : {
5445 alvherre 991 ECB : /* Forget the active snapshots set by this subtransaction */
5445 alvherre 992 GIC 11772 : while (ActiveSnapshot && ActiveSnapshot->as_level >= level)
993 : {
5050 bruce 994 ECB : ActiveSnapshotElt *next;
995 :
5445 alvherre 996 GIC 2806 : next = ActiveSnapshot->as_next;
997 :
5445 alvherre 998 ECB : /*
999 : * Decrement the snapshot's active count. If it's still registered or
1000 : * marked as active by an outer subtransaction, we can't free it yet.
1001 : */
5445 alvherre 1002 GIC 2806 : Assert(ActiveSnapshot->as_snap->active_count >= 1);
1003 2806 : ActiveSnapshot->as_snap->active_count -= 1;
5445 alvherre 1004 ECB :
5445 alvherre 1005 CBC 2806 : if (ActiveSnapshot->as_snap->active_count == 0 &&
5445 alvherre 1006 GIC 2806 : ActiveSnapshot->as_snap->regd_count == 0)
5445 alvherre 1007 CBC 2806 : FreeSnapshot(ActiveSnapshot->as_snap);
5445 alvherre 1008 ECB :
1009 : /* and free the stack element */
5445 alvherre 1010 GIC 2806 : pfree(ActiveSnapshot);
1011 :
5445 alvherre 1012 CBC 2806 : ActiveSnapshot = next;
2440 rhaas 1013 GIC 2806 : if (ActiveSnapshot == NULL)
2440 rhaas 1014 CBC 121 : OldestActiveSnapshot = NULL;
5445 alvherre 1015 ECB : }
1016 :
5445 alvherre 1017 GIC 4483 : SnapshotResetXmin();
1018 4483 : }
5445 alvherre 1019 ECB :
1020 : /*
1021 : * AtEOXact_Snapshot
1022 : * Snapshot manager's cleanup function for end of transaction
1023 : */
1024 : void
2194 simon 1025 GIC 486969 : AtEOXact_Snapshot(bool isCommit, bool resetXmin)
1026 : {
4213 tgl 1027 ECB : /*
1028 : * In transaction-snapshot mode we must release our privately-managed
1029 : * reference to the transaction snapshot. We must remove it from
1030 : * RegisteredSnapshots to keep the check below happy. But we don't bother
1031 : * to do FreeSnapshot, for two reasons: the memory will go away with
1032 : * TopTransactionContext anyway, and if someone has left the snapshot
1033 : * stacked as active, we don't want the code below to be chasing through a
1034 : * dangling pointer.
1035 : */
4213 tgl 1036 GIC 486969 : if (FirstXactSnapshot != NULL)
1037 : {
4213 tgl 1038 CBC 2794 : Assert(FirstXactSnapshot->regd_count > 0);
3004 heikki.linnakangas 1039 GIC 2794 : Assert(!pairingheap_is_empty(&RegisteredSnapshots));
3004 heikki.linnakangas 1040 CBC 2794 : pairingheap_remove(&RegisteredSnapshots, &FirstXactSnapshot->ph_node);
4213 tgl 1041 ECB : }
4213 tgl 1042 CBC 486969 : FirstXactSnapshot = NULL;
1043 :
4187 tgl 1044 ECB : /*
1045 : * If we exported any snapshots, clean them up.
1046 : */
4187 tgl 1047 GIC 486969 : if (exportedSnapshots != NIL)
1048 : {
3004 heikki.linnakangas 1049 ECB : ListCell *lc;
1050 :
1051 : /*
1052 : * Get rid of the files. Unlink failure is only a WARNING because (1)
1053 : * it's too late to abort the transaction, and (2) leaving a leaked
1054 : * file around has little real consequence anyway.
1055 : *
1056 : * We also need to remove the snapshots from RegisteredSnapshots to
1057 : * prevent a warning below.
1058 : *
1059 : * As with the FirstXactSnapshot, we don't need to free resources of
1060 : * the snapshot itself as it will go away with the memory context.
1061 : */
3004 heikki.linnakangas 1062 GIC 18 : foreach(lc, exportedSnapshots)
1063 : {
2118 tgl 1064 CBC 9 : ExportedSnapshot *esnap = (ExportedSnapshot *) lfirst(lc);
1065 :
2125 andres 1066 9 : if (unlink(esnap->snapfile))
2125 andres 1067 UIC 0 : elog(WARNING, "could not unlink file \"%s\": %m",
2125 andres 1068 ECB : esnap->snapfile);
2125 andres 1069 EUB :
2125 andres 1070 GIC 9 : pairingheap_remove(&RegisteredSnapshots,
1071 9 : &esnap->snapshot->ph_node);
3004 heikki.linnakangas 1072 ECB : }
4187 tgl 1073 :
4187 tgl 1074 GIC 9 : exportedSnapshots = NIL;
1075 : }
4187 tgl 1076 ECB :
1077 : /* Drop catalog snapshot if any */
2336 tgl 1078 GIC 486969 : InvalidateCatalogSnapshot();
1079 :
5445 alvherre 1080 ECB : /* On commit, complain about leftover snapshots */
5445 alvherre 1081 GIC 486969 : if (isCommit)
1082 : {
5050 bruce 1083 ECB : ActiveSnapshotElt *active;
1084 :
3004 heikki.linnakangas 1085 GIC 466803 : if (!pairingheap_is_empty(&RegisteredSnapshots))
3004 heikki.linnakangas 1086 UIC 0 : elog(WARNING, "registered snapshots seem to remain after cleanup");
5248 alvherre 1087 ECB :
5445 alvherre 1088 EUB : /* complain about unpopped active snapshots */
5445 alvherre 1089 GIC 466803 : for (active = ActiveSnapshot; active != NULL; active = active->as_next)
5277 alvherre 1090 UIC 0 : elog(WARNING, "snapshot %p still active", active);
5445 alvherre 1091 ECB : }
5445 alvherre 1092 EUB :
1093 : /*
1094 : * And reset our state. We don't need to free the memory explicitly --
1095 : * it'll go away with TopTransactionContext.
1096 : */
5445 alvherre 1097 GIC 486969 : ActiveSnapshot = NULL;
2440 rhaas 1098 486969 : OldestActiveSnapshot = NULL;
3004 heikki.linnakangas 1099 CBC 486969 : pairingheap_reset(&RegisteredSnapshots);
5445 alvherre 1100 ECB :
5445 alvherre 1101 CBC 486969 : CurrentSnapshot = NULL;
5445 alvherre 1102 GIC 486969 : SecondarySnapshot = NULL;
5445 alvherre 1103 ECB :
5445 alvherre 1104 CBC 486969 : FirstSnapshotSet = false;
1105 :
2194 simon 1106 ECB : /*
1107 : * During normal commit processing, we call ProcArrayEndTransaction() to
1108 : * reset the MyProc->xmin. That call happens prior to the call to
1109 : * AtEOXact_Snapshot(), so we need not touch xmin here at all.
1110 : */
2194 simon 1111 GIC 486969 : if (resetXmin)
1112 20532 : SnapshotResetXmin();
2194 simon 1113 ECB :
969 andres 1114 CBC 486969 : Assert(resetXmin || MyProc->xmin == 0);
5492 alvherre 1115 GIC 486969 : }
4187 tgl 1116 ECB :
1117 :
1118 : /*
1119 : * ExportSnapshot
1120 : * Export the snapshot to a file so that other backends can import it.
1121 : * Returns the token (the file name) that can be used to import this
1122 : * snapshot.
1123 : */
1124 : char *
4187 tgl 1125 GIC 9 : ExportSnapshot(Snapshot snapshot)
1126 : {
4187 tgl 1127 ECB : TransactionId topXid;
1128 : TransactionId *children;
1129 : ExportedSnapshot *esnap;
1130 : int nchildren;
1131 : int addTopXid;
1132 : StringInfoData buf;
1133 : FILE *f;
1134 : int i;
1135 : MemoryContext oldcxt;
1136 : char path[MAXPGPATH];
1137 : char pathtmp[MAXPGPATH];
1138 :
1139 : /*
1140 : * It's tempting to call RequireTransactionBlock here, since it's not very
1141 : * useful to export a snapshot that will disappear immediately afterwards.
1142 : * However, we haven't got enough information to do that, since we don't
1143 : * know if we're at top level or not. For example, we could be inside a
1144 : * plpgsql function that is going to fire off other transactions via
1145 : * dblink. Rather than disallow perfectly legitimate usages, don't make a
1146 : * check.
1147 : *
1148 : * Also note that we don't make any restriction on the transaction's
1149 : * isolation level; however, importers must check the level if they are
1150 : * serializable.
1151 : */
1152 :
1153 : /*
1154 : * Get our transaction ID if there is one, to include in the snapshot.
1155 : */
2125 andres 1156 GIC 9 : topXid = GetTopTransactionIdIfAny();
1157 :
4187 tgl 1158 ECB : /*
1159 : * We cannot export a snapshot from a subtransaction because there's no
1160 : * easy way for importers to verify that the same subtransaction is still
1161 : * running.
1162 : */
4187 tgl 1163 GIC 9 : if (IsSubTransaction())
4187 tgl 1164 UIC 0 : ereport(ERROR,
4187 tgl 1165 ECB : (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
4187 tgl 1166 EUB : errmsg("cannot export a snapshot from a subtransaction")));
1167 :
1168 : /*
1169 : * We do however allow previous committed subtransactions to exist.
1170 : * Importers of the snapshot must see them as still running, so get their
1171 : * XIDs to add them to the snapshot.
1172 : */
4187 tgl 1173 GIC 9 : nchildren = xactGetCommittedChildren(&children);
1174 :
2125 andres 1175 ECB : /*
1176 : * Generate file path for the snapshot. We start numbering of snapshots
1177 : * inside the transaction from 1.
1178 : */
2125 andres 1179 GIC 9 : snprintf(path, sizeof(path), SNAPSHOT_EXPORT_DIR "/%08X-%08X-%d",
2118 tgl 1180 9 : MyProc->backendId, MyProc->lxid, list_length(exportedSnapshots) + 1);
2125 andres 1181 ECB :
4187 tgl 1182 : /*
1183 : * Copy the snapshot into TopTransactionContext, add it to the
1184 : * exportedSnapshots list, and mark it pseudo-registered. We do this to
1185 : * ensure that the snapshot's xmin is honored for the rest of the
1186 : * transaction.
1187 : */
4187 tgl 1188 GIC 9 : snapshot = CopySnapshot(snapshot);
1189 :
4187 tgl 1190 CBC 9 : oldcxt = MemoryContextSwitchTo(TopTransactionContext);
2125 andres 1191 GIC 9 : esnap = (ExportedSnapshot *) palloc(sizeof(ExportedSnapshot));
2125 andres 1192 CBC 9 : esnap->snapfile = pstrdup(path);
1193 9 : esnap->snapshot = snapshot;
1194 9 : exportedSnapshots = lappend(exportedSnapshots, esnap);
4187 tgl 1195 9 : MemoryContextSwitchTo(oldcxt);
4187 tgl 1196 ECB :
4187 tgl 1197 CBC 9 : snapshot->regd_count++;
3004 heikki.linnakangas 1198 GIC 9 : pairingheap_add(&RegisteredSnapshots, &snapshot->ph_node);
4187 tgl 1199 ECB :
1200 : /*
1201 : * Fill buf with a text serialization of the snapshot, plus identification
1202 : * data about this transaction. The format expected by ImportSnapshot is
1203 : * pretty rigid: each line must be fieldname:value.
1204 : */
4187 tgl 1205 GIC 9 : initStringInfo(&buf);
1206 :
2125 andres 1207 CBC 9 : appendStringInfo(&buf, "vxid:%d/%u\n", MyProc->backendId, MyProc->lxid);
2125 andres 1208 GIC 9 : appendStringInfo(&buf, "pid:%d\n", MyProcPid);
4187 tgl 1209 CBC 9 : appendStringInfo(&buf, "dbid:%u\n", MyDatabaseId);
1210 9 : appendStringInfo(&buf, "iso:%d\n", XactIsoLevel);
1211 9 : appendStringInfo(&buf, "ro:%d\n", XactReadOnly);
4187 tgl 1212 ECB :
4187 tgl 1213 CBC 9 : appendStringInfo(&buf, "xmin:%u\n", snapshot->xmin);
4187 tgl 1214 GIC 9 : appendStringInfo(&buf, "xmax:%u\n", snapshot->xmax);
4187 tgl 1215 ECB :
1216 : /*
1217 : * We must include our own top transaction ID in the top-xid data, since
1218 : * by definition we will still be running when the importing transaction
1219 : * adopts the snapshot, but GetSnapshotData never includes our own XID in
1220 : * the snapshot. (There must, therefore, be enough room to add it.)
1221 : *
1222 : * However, it could be that our topXid is after the xmax, in which case
1223 : * we shouldn't include it because xip[] members are expected to be before
1224 : * xmax. (We need not make the same check for subxip[] members, see
1225 : * snapshot.h.)
1226 : */
2125 andres 1227 GIC 9 : addTopXid = (TransactionIdIsValid(topXid) &&
2118 tgl 1228 9 : TransactionIdPrecedes(topXid, snapshot->xmax)) ? 1 : 0;
4187 tgl 1229 CBC 9 : appendStringInfo(&buf, "xcnt:%d\n", snapshot->xcnt + addTopXid);
1230 9 : for (i = 0; i < snapshot->xcnt; i++)
4187 tgl 1231 LBC 0 : appendStringInfo(&buf, "xip:%u\n", snapshot->xip[i]);
4187 tgl 1232 CBC 9 : if (addTopXid)
4187 tgl 1233 UBC 0 : appendStringInfo(&buf, "xip:%u\n", topXid);
4187 tgl 1234 ECB :
4187 tgl 1235 EUB : /*
1236 : * Similarly, we add our subcommitted child XIDs to the subxid data. Here,
1237 : * we have to cope with possible overflow.
1238 : */
4187 tgl 1239 GIC 18 : if (snapshot->suboverflowed ||
1240 9 : snapshot->subxcnt + nchildren > GetMaxSnapshotSubxidCount())
4187 tgl 1241 LBC 0 : appendStringInfoString(&buf, "sof:1\n");
4187 tgl 1242 ECB : else
4187 tgl 1243 EUB : {
4187 tgl 1244 GIC 9 : appendStringInfoString(&buf, "sof:0\n");
1245 9 : appendStringInfo(&buf, "sxcnt:%d\n", snapshot->subxcnt + nchildren);
4187 tgl 1246 CBC 9 : for (i = 0; i < snapshot->subxcnt; i++)
4187 tgl 1247 LBC 0 : appendStringInfo(&buf, "sxp:%u\n", snapshot->subxip[i]);
4187 tgl 1248 CBC 9 : for (i = 0; i < nchildren; i++)
4187 tgl 1249 UBC 0 : appendStringInfo(&buf, "sxp:%u\n", children[i]);
4187 tgl 1250 ECB : }
4187 tgl 1251 GBC 9 : appendStringInfo(&buf, "rec:%u\n", snapshot->takenDuringRecovery);
1252 :
4187 tgl 1253 ECB : /*
1254 : * Now write the text representation into a file. We first write to a
1255 : * ".tmp" filename, and rename to final filename if no error. This
1256 : * ensures that no other backend can read an incomplete file
1257 : * (ImportSnapshot won't allow it because of its valid-characters check).
1258 : */
2125 andres 1259 GIC 9 : snprintf(pathtmp, sizeof(pathtmp), "%s.tmp", path);
4187 tgl 1260 9 : if (!(f = AllocateFile(pathtmp, PG_BINARY_W)))
4187 tgl 1261 LBC 0 : ereport(ERROR,
4187 tgl 1262 ECB : (errcode_for_file_access(),
4187 tgl 1263 EUB : errmsg("could not create file \"%s\": %m", pathtmp)));
1264 :
4187 tgl 1265 GIC 9 : if (fwrite(buf.data, buf.len, 1, f) != 1)
4187 tgl 1266 UIC 0 : ereport(ERROR,
4187 tgl 1267 ECB : (errcode_for_file_access(),
4187 tgl 1268 EUB : errmsg("could not write to file \"%s\": %m", pathtmp)));
1269 :
1270 : /* no fsync() since file need not survive a system crash */
1271 :
4187 tgl 1272 GIC 9 : if (FreeFile(f))
4187 tgl 1273 UIC 0 : ereport(ERROR,
4187 tgl 1274 ECB : (errcode_for_file_access(),
4187 tgl 1275 EUB : errmsg("could not write to file \"%s\": %m", pathtmp)));
1276 :
1277 : /*
1278 : * Now that we have written everything into a .tmp file, rename the file
1279 : * to remove the .tmp suffix.
1280 : */
4187 tgl 1281 GIC 9 : if (rename(pathtmp, path) < 0)
4187 tgl 1282 UIC 0 : ereport(ERROR,
4187 tgl 1283 ECB : (errcode_for_file_access(),
4187 tgl 1284 EUB : errmsg("could not rename file \"%s\" to \"%s\": %m",
1285 : pathtmp, path)));
1286 :
1287 : /*
1288 : * The basename of the file is what we return from pg_export_snapshot().
1289 : * It's already in path in a textual format and we know that the path
1290 : * starts with SNAPSHOT_EXPORT_DIR. Skip over the prefix and the slash
1291 : * and pstrdup it so as not to return the address of a local variable.
1292 : */
4187 tgl 1293 GIC 9 : return pstrdup(path + strlen(SNAPSHOT_EXPORT_DIR) + 1);
1294 : }
4187 tgl 1295 ECB :
1296 : /*
1297 : * pg_export_snapshot
1298 : * SQL-callable wrapper for ExportSnapshot.
1299 : */
1300 : Datum
4187 tgl 1301 GIC 9 : pg_export_snapshot(PG_FUNCTION_ARGS)
1302 : {
4187 tgl 1303 ECB : char *snapshotName;
1304 :
4187 tgl 1305 GIC 9 : snapshotName = ExportSnapshot(GetActiveSnapshot());
1306 9 : PG_RETURN_TEXT_P(cstring_to_text(snapshotName));
4187 tgl 1307 ECB : }
1308 :
1309 :
1310 : /*
1311 : * Parsing subroutines for ImportSnapshot: parse a line with the given
1312 : * prefix followed by a value, and advance *s to the next line. The
1313 : * filename is provided for use in error messages.
1314 : */
1315 : static int
4187 tgl 1316 GIC 126 : parseIntFromText(const char *prefix, char **s, const char *filename)
1317 : {
4187 tgl 1318 CBC 126 : char *ptr = *s;
4187 tgl 1319 GIC 126 : int prefixlen = strlen(prefix);
4187 tgl 1320 ECB : int val;
1321 :
4187 tgl 1322 GIC 126 : if (strncmp(ptr, prefix, prefixlen) != 0)
4187 tgl 1323 UIC 0 : ereport(ERROR,
4187 tgl 1324 ECB : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
4187 tgl 1325 EUB : errmsg("invalid snapshot data in file \"%s\"", filename)));
4187 tgl 1326 GIC 126 : ptr += prefixlen;
1327 126 : if (sscanf(ptr, "%d", &val) != 1)
4187 tgl 1328 LBC 0 : ereport(ERROR,
4187 tgl 1329 ECB : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
4187 tgl 1330 EUB : errmsg("invalid snapshot data in file \"%s\"", filename)));
4187 tgl 1331 GIC 126 : ptr = strchr(ptr, '\n');
1332 126 : if (!ptr)
4187 tgl 1333 LBC 0 : ereport(ERROR,
4187 tgl 1334 ECB : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
4187 tgl 1335 EUB : errmsg("invalid snapshot data in file \"%s\"", filename)));
4187 tgl 1336 GIC 126 : *s = ptr + 1;
1337 126 : return val;
4187 tgl 1338 ECB : }
1339 :
1340 : static TransactionId
4187 tgl 1341 GIC 54 : parseXidFromText(const char *prefix, char **s, const char *filename)
1342 : {
4187 tgl 1343 CBC 54 : char *ptr = *s;
4187 tgl 1344 GIC 54 : int prefixlen = strlen(prefix);
4187 tgl 1345 ECB : TransactionId val;
1346 :
4187 tgl 1347 GIC 54 : if (strncmp(ptr, prefix, prefixlen) != 0)
4187 tgl 1348 UIC 0 : ereport(ERROR,
4187 tgl 1349 ECB : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
4187 tgl 1350 EUB : errmsg("invalid snapshot data in file \"%s\"", filename)));
4187 tgl 1351 GIC 54 : ptr += prefixlen;
1352 54 : if (sscanf(ptr, "%u", &val) != 1)
4187 tgl 1353 LBC 0 : ereport(ERROR,
4187 tgl 1354 ECB : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
4187 tgl 1355 EUB : errmsg("invalid snapshot data in file \"%s\"", filename)));
4187 tgl 1356 GIC 54 : ptr = strchr(ptr, '\n');
1357 54 : if (!ptr)
4187 tgl 1358 LBC 0 : ereport(ERROR,
4187 tgl 1359 ECB : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
4187 tgl 1360 EUB : errmsg("invalid snapshot data in file \"%s\"", filename)));
4187 tgl 1361 GIC 54 : *s = ptr + 1;
1362 54 : return val;
4187 tgl 1363 ECB : }
1364 :
1365 : static void
2125 andres 1366 GIC 18 : parseVxidFromText(const char *prefix, char **s, const char *filename,
1367 : VirtualTransactionId *vxid)
2125 andres 1368 ECB : {
2125 andres 1369 GIC 18 : char *ptr = *s;
1370 18 : int prefixlen = strlen(prefix);
2125 andres 1371 ECB :
2125 andres 1372 CBC 18 : if (strncmp(ptr, prefix, prefixlen) != 0)
2125 andres 1373 UIC 0 : ereport(ERROR,
2125 andres 1374 ECB : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
2125 andres 1375 EUB : errmsg("invalid snapshot data in file \"%s\"", filename)));
2125 andres 1376 GIC 18 : ptr += prefixlen;
1377 18 : if (sscanf(ptr, "%d/%u", &vxid->backendId, &vxid->localTransactionId) != 2)
2125 andres 1378 LBC 0 : ereport(ERROR,
2125 andres 1379 ECB : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
2125 andres 1380 EUB : errmsg("invalid snapshot data in file \"%s\"", filename)));
2125 andres 1381 GIC 18 : ptr = strchr(ptr, '\n');
1382 18 : if (!ptr)
2125 andres 1383 LBC 0 : ereport(ERROR,
2125 andres 1384 ECB : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
2125 andres 1385 EUB : errmsg("invalid snapshot data in file \"%s\"", filename)));
2125 andres 1386 GIC 18 : *s = ptr + 1;
1387 18 : }
2125 andres 1388 ECB :
4187 tgl 1389 : /*
1390 : * ImportSnapshot
1391 : * Import a previously exported snapshot. The argument should be a
1392 : * filename in SNAPSHOT_EXPORT_DIR. Load the snapshot from that file.
1393 : * This is called by "SET TRANSACTION SNAPSHOT 'foo'".
1394 : */
1395 : void
4187 tgl 1396 GIC 18 : ImportSnapshot(const char *idstr)
1397 : {
4187 tgl 1398 ECB : char path[MAXPGPATH];
1399 : FILE *f;
1400 : struct stat stat_buf;
1401 : char *filebuf;
1402 : int xcnt;
1403 : int i;
1404 : VirtualTransactionId src_vxid;
1405 : int src_pid;
1406 : Oid src_dbid;
1407 : int src_isolevel;
1408 : bool src_readonly;
1409 : SnapshotData snapshot;
1410 :
1411 : /*
1412 : * Must be at top level of a fresh transaction. Note in particular that
1413 : * we check we haven't acquired an XID --- if we have, it's conceivable
1414 : * that the snapshot would show it as not running, making for very screwy
1415 : * behavior.
1416 : */
4187 tgl 1417 GIC 36 : if (FirstSnapshotSet ||
1418 36 : GetTopTransactionIdIfAny() != InvalidTransactionId ||
4187 tgl 1419 CBC 18 : IsSubTransaction())
4187 tgl 1420 LBC 0 : ereport(ERROR,
4187 tgl 1421 ECB : (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
2118 tgl 1422 EUB : errmsg("SET TRANSACTION SNAPSHOT must be called before any query")));
1423 :
1424 : /*
1425 : * If we are in read committed mode then the next query would execute with
1426 : * a new snapshot thus making this function call quite useless.
1427 : */
4187 tgl 1428 GIC 18 : if (!IsolationUsesXactSnapshot())
4187 tgl 1429 UIC 0 : ereport(ERROR,
4187 tgl 1430 ECB : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4187 tgl 1431 EUB : errmsg("a snapshot-importing transaction must have isolation level SERIALIZABLE or REPEATABLE READ")));
1432 :
1433 : /*
1434 : * Verify the identifier: only 0-9, A-F and hyphens are allowed. We do
1435 : * this mainly to prevent reading arbitrary files.
1436 : */
4187 tgl 1437 GIC 18 : if (strspn(idstr, "0123456789ABCDEF-") != strlen(idstr))
4187 tgl 1438 UIC 0 : ereport(ERROR,
4187 tgl 1439 ECB : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
3933 peter_e 1440 EUB : errmsg("invalid snapshot identifier: \"%s\"", idstr)));
1441 :
1442 : /* OK, read the file */
4187 tgl 1443 GIC 18 : snprintf(path, MAXPGPATH, SNAPSHOT_EXPORT_DIR "/%s", idstr);
1444 :
4187 tgl 1445 CBC 18 : f = AllocateFile(path, PG_BINARY_R);
4187 tgl 1446 GIC 18 : if (!f)
4187 tgl 1447 LBC 0 : ereport(ERROR,
4187 tgl 1448 ECB : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
3933 peter_e 1449 EUB : errmsg("invalid snapshot identifier: \"%s\"", idstr)));
1450 :
1451 : /* get the size of the file so that we know how much memory we need */
4187 tgl 1452 GIC 18 : if (fstat(fileno(f), &stat_buf))
4187 tgl 1453 UIC 0 : elog(ERROR, "could not stat file \"%s\": %m", path);
4187 tgl 1454 ECB :
4187 tgl 1455 EUB : /* and read the file into a palloc'd string */
4187 tgl 1456 GIC 18 : filebuf = (char *) palloc(stat_buf.st_size + 1);
1457 18 : if (fread(filebuf, stat_buf.st_size, 1, f) != 1)
4187 tgl 1458 LBC 0 : elog(ERROR, "could not read file \"%s\": %m", path);
4187 tgl 1459 ECB :
4187 tgl 1460 GBC 18 : filebuf[stat_buf.st_size] = '\0';
1461 :
4187 tgl 1462 CBC 18 : FreeFile(f);
1463 :
4187 tgl 1464 ECB : /*
1465 : * Construct a snapshot struct by parsing the file content.
1466 : */
4187 tgl 1467 GIC 18 : memset(&snapshot, 0, sizeof(snapshot));
1468 :
2125 andres 1469 CBC 18 : parseVxidFromText("vxid:", &filebuf, path, &src_vxid);
2125 andres 1470 GIC 18 : src_pid = parseIntFromText("pid:", &filebuf, path);
4187 tgl 1471 ECB : /* we abuse parseXidFromText a bit here ... */
4187 tgl 1472 CBC 18 : src_dbid = parseXidFromText("dbid:", &filebuf, path);
4187 tgl 1473 GIC 18 : src_isolevel = parseIntFromText("iso:", &filebuf, path);
4187 tgl 1474 CBC 18 : src_readonly = parseIntFromText("ro:", &filebuf, path);
4187 tgl 1475 ECB :
1509 michael 1476 CBC 18 : snapshot.snapshot_type = SNAPSHOT_MVCC;
1477 :
4187 tgl 1478 18 : snapshot.xmin = parseXidFromText("xmin:", &filebuf, path);
4187 tgl 1479 GIC 18 : snapshot.xmax = parseXidFromText("xmax:", &filebuf, path);
4187 tgl 1480 ECB :
4187 tgl 1481 CBC 18 : snapshot.xcnt = xcnt = parseIntFromText("xcnt:", &filebuf, path);
1482 :
4187 tgl 1483 ECB : /* sanity-check the xid count before palloc */
4187 tgl 1484 GIC 18 : if (xcnt < 0 || xcnt > GetMaxSnapshotXidCount())
4187 tgl 1485 UIC 0 : ereport(ERROR,
4187 tgl 1486 ECB : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
4187 tgl 1487 EUB : errmsg("invalid snapshot data in file \"%s\"", path)));
1488 :
4187 tgl 1489 GIC 18 : snapshot.xip = (TransactionId *) palloc(xcnt * sizeof(TransactionId));
1490 18 : for (i = 0; i < xcnt; i++)
4187 tgl 1491 LBC 0 : snapshot.xip[i] = parseXidFromText("xip:", &filebuf, path);
4187 tgl 1492 ECB :
4187 tgl 1493 GBC 18 : snapshot.suboverflowed = parseIntFromText("sof:", &filebuf, path);
1494 :
4187 tgl 1495 CBC 18 : if (!snapshot.suboverflowed)
1496 : {
1497 18 : snapshot.subxcnt = xcnt = parseIntFromText("sxcnt:", &filebuf, path);
1498 :
4187 tgl 1499 ECB : /* sanity-check the xid count before palloc */
4187 tgl 1500 GIC 18 : if (xcnt < 0 || xcnt > GetMaxSnapshotSubxidCount())
4187 tgl 1501 UIC 0 : ereport(ERROR,
4187 tgl 1502 ECB : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
4187 tgl 1503 EUB : errmsg("invalid snapshot data in file \"%s\"", path)));
1504 :
4187 tgl 1505 GIC 18 : snapshot.subxip = (TransactionId *) palloc(xcnt * sizeof(TransactionId));
1506 18 : for (i = 0; i < xcnt; i++)
4187 tgl 1507 LBC 0 : snapshot.subxip[i] = parseXidFromText("sxp:", &filebuf, path);
4187 tgl 1508 ECB : }
4187 tgl 1509 EUB : else
1510 : {
4187 tgl 1511 UIC 0 : snapshot.subxcnt = 0;
1512 0 : snapshot.subxip = NULL;
4187 tgl 1513 EUB : }
1514 :
4187 tgl 1515 GIC 18 : snapshot.takenDuringRecovery = parseIntFromText("rec:", &filebuf, path);
1516 :
4187 tgl 1517 ECB : /*
1518 : * Do some additional sanity checking, just to protect ourselves. We
1519 : * don't trouble to check the array elements, just the most critical
1520 : * fields.
1521 : */
2125 andres 1522 GIC 18 : if (!VirtualTransactionIdIsValid(src_vxid) ||
4187 tgl 1523 18 : !OidIsValid(src_dbid) ||
4187 tgl 1524 CBC 18 : !TransactionIdIsNormal(snapshot.xmin) ||
1525 18 : !TransactionIdIsNormal(snapshot.xmax))
4187 tgl 1526 LBC 0 : ereport(ERROR,
4187 tgl 1527 ECB : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
4187 tgl 1528 EUB : errmsg("invalid snapshot data in file \"%s\"", path)));
1529 :
1530 : /*
1531 : * If we're serializable, the source transaction must be too, otherwise
1532 : * predicate.c has problems (SxactGlobalXmin could go backwards). Also, a
1533 : * non-read-only transaction can't adopt a snapshot from a read-only
1534 : * transaction, as predicate.c handles the cases very differently.
1535 : */
4187 tgl 1536 GIC 18 : if (IsolationIsSerializable())
1537 : {
4187 tgl 1538 LBC 0 : if (src_isolevel != XACT_SERIALIZABLE)
4187 tgl 1539 UIC 0 : ereport(ERROR,
4187 tgl 1540 EUB : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1541 : errmsg("a serializable transaction cannot import a snapshot from a non-serializable transaction")));
4187 tgl 1542 UIC 0 : if (src_readonly && !XactReadOnly)
1543 0 : ereport(ERROR,
4187 tgl 1544 EUB : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1545 : errmsg("a non-read-only serializable transaction cannot import a snapshot from a read-only transaction")));
1546 : }
1547 :
1548 : /*
1549 : * We cannot import a snapshot that was taken in a different database,
1550 : * because vacuum calculates OldestXmin on a per-database basis; so the
1551 : * source transaction's xmin doesn't protect us from data loss. This
1552 : * restriction could be removed if the source transaction were to mark its
1553 : * xmin as being globally applicable. But that would require some
1554 : * additional syntax, since that has to be known when the snapshot is
1555 : * initially taken. (See pgsql-hackers discussion of 2011-10-21.)
1556 : */
4187 tgl 1557 GIC 18 : if (src_dbid != MyDatabaseId)
4187 tgl 1558 UIC 0 : ereport(ERROR,
4187 tgl 1559 ECB : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2118 tgl 1560 EUB : errmsg("cannot import a snapshot from a different database")));
1561 :
1562 : /* OK, install the snapshot */
2125 andres 1563 GIC 18 : SetTransactionSnapshot(&snapshot, &src_vxid, src_pid, NULL);
4187 tgl 1564 18 : }
4187 tgl 1565 ECB :
1566 : /*
1567 : * XactHasExportedSnapshots
1568 : * Test whether current transaction has exported any snapshots.
1569 : */
1570 : bool
4187 tgl 1571 GIC 379 : XactHasExportedSnapshots(void)
1572 : {
4187 tgl 1573 CBC 379 : return (exportedSnapshots != NIL);
1574 : }
4187 tgl 1575 ECB :
1576 : /*
1577 : * DeleteAllExportedSnapshotFiles
1578 : * Clean up any files that have been left behind by a crashed backend
1579 : * that had exported snapshots before it died.
1580 : *
1581 : * This should be called during database startup or crash recovery.
1582 : */
1583 : void
4187 tgl 1584 GIC 151 : DeleteAllExportedSnapshotFiles(void)
1585 : {
2189 peter_e 1586 ECB : char buf[MAXPGPATH + sizeof(SNAPSHOT_EXPORT_DIR)];
1587 : DIR *s_dir;
1588 : struct dirent *s_de;
1589 :
1590 : /*
1591 : * Problems in reading the directory, or unlinking files, are reported at
1592 : * LOG level. Since we're running in the startup process, ERROR level
1593 : * would prevent database start, and it's not important enough for that.
1594 : */
1952 tgl 1595 GIC 151 : s_dir = AllocateDir(SNAPSHOT_EXPORT_DIR);
1596 :
1952 tgl 1597 CBC 453 : while ((s_de = ReadDirExtended(s_dir, SNAPSHOT_EXPORT_DIR, LOG)) != NULL)
1598 : {
4187 1599 302 : if (strcmp(s_de->d_name, ".") == 0 ||
4187 tgl 1600 GIC 151 : strcmp(s_de->d_name, "..") == 0)
4187 tgl 1601 CBC 302 : continue;
4187 tgl 1602 ECB :
2189 peter_e 1603 LBC 0 : snprintf(buf, sizeof(buf), SNAPSHOT_EXPORT_DIR "/%s", s_de->d_name);
1604 :
1952 tgl 1605 UBC 0 : if (unlink(buf) != 0)
1952 tgl 1606 UIC 0 : ereport(LOG,
1952 tgl 1607 EUB : (errcode_for_file_access(),
1608 : errmsg("could not remove file \"%s\": %m", buf)));
1609 : }
1610 :
4187 tgl 1611 GIC 151 : FreeDir(s_dir);
1612 151 : }
3781 simon 1613 ECB :
1981 noah 1614 : /*
1615 : * ThereAreNoPriorRegisteredSnapshots
1616 : * Is the registered snapshot count less than or equal to one?
1617 : *
1618 : * Don't use this to settle important decisions. While zero registrations and
1619 : * no ActiveSnapshot would confirm a certain idleness, the system makes no
1620 : * guarantees about the significance of one registered snapshot.
1621 : */
1622 : bool
3781 simon 1623 GIC 26 : ThereAreNoPriorRegisteredSnapshots(void)
1624 : {
3004 heikki.linnakangas 1625 CBC 26 : if (pairingheap_is_empty(&RegisteredSnapshots) ||
3004 heikki.linnakangas 1626 UIC 0 : pairingheap_is_singular(&RegisteredSnapshots))
3781 simon 1627 CBC 26 : return true;
3781 simon 1628 EUB :
3781 simon 1629 LBC 0 : return false;
1630 : }
3324 rhaas 1631 EUB :
1632 : /*
1633 : * HaveRegisteredOrActiveSnapshots
1634 : * Is there any registered or active snapshot?
1635 : *
1636 : * NB: Unless pushed or active, the cached catalog snapshot will not cause
1637 : * this function to return true. That allows this function to be used in
1638 : * checks enforcing a longer-lived snapshot.
1639 : */
1640 : bool
414 andres 1641 GIC 51268 : HaveRegisteredOrActiveSnapshot(void)
1642 : {
414 andres 1643 CBC 51268 : if (ActiveSnapshot != NULL)
414 andres 1644 GIC 51110 : return true;
414 andres 1645 ECB :
1646 : /*
1647 : * The catalog snapshot is in RegisteredSnapshots when valid, but can be
1648 : * removed at any time due to invalidation processing. If explicitly
1649 : * registered more than one snapshot has to be in RegisteredSnapshots.
1650 : */
358 tgl 1651 GIC 158 : if (CatalogSnapshot != NULL &&
1652 3 : pairingheap_is_singular(&RegisteredSnapshots))
414 andres 1653 LBC 0 : return false;
414 andres 1654 ECB :
358 tgl 1655 GBC 158 : return !pairingheap_is_empty(&RegisteredSnapshots);
1656 : }
414 andres 1657 ECB :
1658 :
1659 : /*
1660 : * Return a timestamp that is exactly on a minute boundary.
1661 : *
1662 : * If the argument is already aligned, return that value, otherwise move to
1663 : * the next minute boundary following the given time.
1664 : */
1665 : static TimestampTz
2236 tgl 1666 GIC 3560 : AlignTimestampToMinuteBoundary(TimestampTz ts)
1667 : {
2236 tgl 1668 CBC 3560 : TimestampTz retval = ts + (USECS_PER_MINUTE - 1);
1669 :
2557 kgrittn 1670 3560 : return retval - (retval % USECS_PER_MINUTE);
1671 : }
2557 kgrittn 1672 ECB :
1673 : /*
1674 : * Get current timestamp for snapshots
1675 : *
1676 : * This is basically GetCurrentTimestamp(), but with a guarantee that
1677 : * the result never moves backward.
1678 : */
1679 : TimestampTz
2557 kgrittn 1680 GIC 6192 : GetSnapshotCurrentTimestamp(void)
1681 : {
2236 tgl 1682 CBC 6192 : TimestampTz now = GetCurrentTimestamp();
1683 :
2557 kgrittn 1684 ECB : /*
1685 : * Don't let time move backward; if it hasn't advanced, use the old value.
1686 : */
2557 kgrittn 1687 GIC 6192 : SpinLockAcquire(&oldSnapshotControl->mutex_current);
1688 6192 : if (now <= oldSnapshotControl->current_timestamp)
2557 kgrittn 1689 LBC 0 : now = oldSnapshotControl->current_timestamp;
2557 kgrittn 1690 ECB : else
2557 kgrittn 1691 GBC 6192 : oldSnapshotControl->current_timestamp = now;
2557 kgrittn 1692 GIC 6192 : SpinLockRelease(&oldSnapshotControl->mutex_current);
2557 kgrittn 1693 ECB :
2557 kgrittn 1694 CBC 6192 : return now;
1695 : }
2557 kgrittn 1696 ECB :
1697 : /*
1698 : * Get timestamp through which vacuum may have processed based on last stored
1699 : * value for threshold_timestamp.
1700 : *
1701 : * XXX: So far, we never trust that a 64-bit value can be read atomically; if
1702 : * that ever changes, we could get rid of the spinlock here.
1703 : */
1704 : TimestampTz
2557 kgrittn 1705 GIC 651 : GetOldSnapshotThresholdTimestamp(void)
1706 : {
2236 tgl 1707 ECB : TimestampTz threshold_timestamp;
1708 :
2557 kgrittn 1709 GIC 651 : SpinLockAcquire(&oldSnapshotControl->mutex_threshold);
1710 651 : threshold_timestamp = oldSnapshotControl->threshold_timestamp;
2557 kgrittn 1711 CBC 651 : SpinLockRelease(&oldSnapshotControl->mutex_threshold);
2557 kgrittn 1712 ECB :
2557 kgrittn 1713 CBC 651 : return threshold_timestamp;
1714 : }
2557 kgrittn 1715 ECB :
1716 : void
2236 tgl 1717 GIC 3 : SetOldSnapshotThresholdTimestamp(TimestampTz ts, TransactionId xlimit)
1718 : {
2557 kgrittn 1719 CBC 3 : SpinLockAcquire(&oldSnapshotControl->mutex_threshold);
970 andres 1720 GIC 3 : Assert(oldSnapshotControl->threshold_timestamp <= ts);
970 andres 1721 CBC 3 : Assert(TransactionIdPrecedesOrEquals(oldSnapshotControl->threshold_xid, xlimit));
2557 kgrittn 1722 3 : oldSnapshotControl->threshold_timestamp = ts;
1723 3 : oldSnapshotControl->threshold_xid = xlimit;
1724 3 : SpinLockRelease(&oldSnapshotControl->mutex_threshold);
1725 3 : }
2557 kgrittn 1726 ECB :
970 andres 1727 : /*
1728 : * XXX: Magic to keep old_snapshot_threshold tests appear "working". They
1729 : * currently are broken, and discussion of what to do about them is
1730 : * ongoing. See
1731 : * https://www.postgresql.org/message-id/20200403001235.e6jfdll3gh2ygbuc%40alap3.anarazel.de
1732 : */
1733 : void
970 andres 1734 GIC 2627 : SnapshotTooOldMagicForTest(void)
1735 : {
970 andres 1736 CBC 2627 : TimestampTz ts = GetSnapshotCurrentTimestamp();
1737 :
1738 2627 : Assert(old_snapshot_threshold == 0);
1739 :
1740 2627 : ts -= 5 * USECS_PER_SEC;
1741 :
1742 2627 : SpinLockAcquire(&oldSnapshotControl->mutex_threshold);
970 andres 1743 GIC 2627 : oldSnapshotControl->threshold_timestamp = ts;
970 andres 1744 CBC 2627 : SpinLockRelease(&oldSnapshotControl->mutex_threshold);
1745 2627 : }
970 andres 1746 ECB :
1747 : /*
1748 : * If there is a valid mapping for the timestamp, set *xlimitp to
1749 : * that. Returns whether there is such a mapping.
1750 : */
1751 : static bool
970 andres 1752 UIC 0 : GetOldSnapshotFromTimeMapping(TimestampTz ts, TransactionId *xlimitp)
1753 : {
970 andres 1754 UBC 0 : bool in_mapping = false;
1755 :
1756 0 : Assert(ts == AlignTimestampToMinuteBoundary(ts));
1757 :
1758 0 : LWLockAcquire(OldSnapshotTimeMapLock, LW_SHARED);
1759 :
1760 0 : if (oldSnapshotControl->count_used > 0
970 andres 1761 UIC 0 : && ts >= oldSnapshotControl->head_timestamp)
970 andres 1762 EUB : {
1763 : int offset;
1764 :
970 andres 1765 UIC 0 : offset = ((ts - oldSnapshotControl->head_timestamp)
1766 0 : / USECS_PER_MINUTE);
970 andres 1767 UBC 0 : if (offset > oldSnapshotControl->count_used - 1)
1768 0 : offset = oldSnapshotControl->count_used - 1;
1769 0 : offset = (oldSnapshotControl->head_offset + offset)
1770 0 : % OLD_SNAPSHOT_TIME_MAP_ENTRIES;
970 andres 1771 EUB :
970 andres 1772 UBC 0 : *xlimitp = oldSnapshotControl->xid_by_minute[offset];
1773 :
1774 0 : in_mapping = true;
1775 : }
970 andres 1776 EUB :
970 andres 1777 UIC 0 : LWLockRelease(OldSnapshotTimeMapLock);
1778 :
970 andres 1779 UBC 0 : return in_mapping;
1780 : }
970 andres 1781 EUB :
1782 : /*
1783 : * TransactionIdLimitedForOldSnapshots
1784 : *
1785 : * Apply old snapshot limit. This is intended to be called for page pruning
1786 : * and table vacuuming, to allow old_snapshot_threshold to override the normal
1787 : * global xmin value. Actual testing for snapshot too old will be based on
1788 : * whether a snapshot timestamp is prior to the threshold timestamp set in
1789 : * this function.
1790 : *
1791 : * If the limited horizon allows a cleanup action that otherwise would not be
1792 : * possible, SetOldSnapshotThresholdTimestamp(*limit_ts, *limit_xid) needs to
1793 : * be called before that cleanup action.
1794 : */
1795 : bool
2557 kgrittn 1796 GIC 24 : TransactionIdLimitedForOldSnapshots(TransactionId recentXmin,
1797 : Relation relation,
970 andres 1798 ECB : TransactionId *limit_xid,
1799 : TimestampTz *limit_ts)
1800 : {
1801 : TimestampTz ts;
970 andres 1802 GIC 24 : TransactionId xlimit = recentXmin;
1803 : TransactionId latest_xmin;
970 andres 1804 ECB : TimestampTz next_map_update_ts;
1805 : TransactionId threshold_timestamp;
1806 : TransactionId threshold_xid;
1807 :
970 andres 1808 GIC 24 : Assert(TransactionIdIsNormal(recentXmin));
1809 24 : Assert(OldSnapshotThresholdActive());
970 andres 1810 CBC 24 : Assert(limit_ts != NULL && limit_xid != NULL);
2529 kgrittn 1811 ECB :
799 noah 1812 : /*
1813 : * TestForOldSnapshot() assumes early pruning advances the page LSN, so we
1814 : * can't prune early when skipping WAL.
1815 : */
799 noah 1816 GIC 24 : if (!RelationAllowsEarlyPruning(relation) || !RelationNeedsWAL(relation))
970 andres 1817 19 : return false;
2557 kgrittn 1818 ECB :
970 andres 1819 CBC 5 : ts = GetSnapshotCurrentTimestamp();
1820 :
1821 5 : SpinLockAcquire(&oldSnapshotControl->mutex_latest_xmin);
970 andres 1822 GIC 5 : latest_xmin = oldSnapshotControl->latest_xmin;
970 andres 1823 CBC 5 : next_map_update_ts = oldSnapshotControl->next_map_update;
1824 5 : SpinLockRelease(&oldSnapshotControl->mutex_latest_xmin);
970 andres 1825 ECB :
1826 : /*
1827 : * Zero threshold always overrides to latest xmin, if valid. Without some
1828 : * heuristic it will find its own snapshot too old on, for example, a
1829 : * simple UPDATE -- which would make it useless for most testing, but
1830 : * there is no principled way to ensure that it doesn't fail in this way.
1831 : * Use a five-second delay to try to get useful testing behavior, but this
1832 : * may need adjustment.
1833 : */
970 andres 1834 GIC 5 : if (old_snapshot_threshold == 0)
1835 : {
969 andres 1836 CBC 5 : if (TransactionIdPrecedes(latest_xmin, MyProc->xmin)
970 andres 1837 UIC 0 : && TransactionIdFollows(latest_xmin, xlimit))
970 andres 1838 LBC 0 : xlimit = latest_xmin;
2557 kgrittn 1839 EUB :
970 andres 1840 GBC 5 : ts -= 5 * USECS_PER_SEC;
1841 : }
970 andres 1842 ECB : else
1843 : {
2557 kgrittn 1844 UIC 0 : ts = AlignTimestampToMinuteBoundary(ts)
2495 rhaas 1845 0 : - (old_snapshot_threshold * USECS_PER_MINUTE);
2557 kgrittn 1846 EUB :
1847 : /* Check for fast exit without LW locking. */
2557 kgrittn 1848 UIC 0 : SpinLockAcquire(&oldSnapshotControl->mutex_threshold);
970 andres 1849 0 : threshold_timestamp = oldSnapshotControl->threshold_timestamp;
970 andres 1850 UBC 0 : threshold_xid = oldSnapshotControl->threshold_xid;
2557 kgrittn 1851 0 : SpinLockRelease(&oldSnapshotControl->mutex_threshold);
2557 kgrittn 1852 EUB :
970 andres 1853 UBC 0 : if (ts == threshold_timestamp)
1854 : {
970 andres 1855 EUB : /*
1856 : * Current timestamp is in same bucket as the last limit that was
1857 : * applied. Reuse.
1858 : */
970 andres 1859 UIC 0 : xlimit = threshold_xid;
1860 : }
970 andres 1861 UBC 0 : else if (ts == next_map_update_ts)
1862 : {
970 andres 1863 EUB : /*
1864 : * FIXME: This branch is super iffy - but that should probably
1865 : * fixed separately.
1866 : */
970 andres 1867 UIC 0 : xlimit = latest_xmin;
1868 : }
970 andres 1869 UBC 0 : else if (GetOldSnapshotFromTimeMapping(ts, &xlimit))
1870 : {
2557 kgrittn 1871 EUB : }
1872 :
1873 : /*
1874 : * Failsafe protection against vacuuming work of active transaction.
1875 : *
1876 : * This is not an assertion because we avoid the spinlock for
1877 : * performance, leaving open the possibility that xlimit could advance
1878 : * and be more current; but it seems prudent to apply this limit. It
1879 : * might make pruning a tiny bit less aggressive than it could be, but
1880 : * protects against data loss bugs.
1881 : */
2557 kgrittn 1882 UIC 0 : if (TransactionIdIsNormal(latest_xmin)
1883 0 : && TransactionIdPrecedes(latest_xmin, xlimit))
2557 kgrittn 1884 UBC 0 : xlimit = latest_xmin;
970 andres 1885 EUB : }
2557 kgrittn 1886 :
970 andres 1887 GIC 10 : if (TransactionIdIsValid(xlimit) &&
1888 5 : TransactionIdFollowsOrEquals(xlimit, recentXmin))
970 andres 1889 ECB : {
970 andres 1890 CBC 5 : *limit_ts = ts;
970 andres 1891 GIC 5 : *limit_xid = xlimit;
970 andres 1892 ECB :
970 andres 1893 CBC 5 : return true;
1894 : }
2557 kgrittn 1895 ECB :
970 andres 1896 UIC 0 : return false;
1897 : }
2557 kgrittn 1898 EUB :
1899 : /*
1900 : * Take care of the circular buffer that maps time to xid.
1901 : */
1902 : void
2236 tgl 1903 GIC 3560 : MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
1904 : {
2236 tgl 1905 ECB : TimestampTz ts;
1906 : TransactionId latest_xmin;
1907 : TimestampTz update_ts;
2529 kgrittn 1908 GIC 3560 : bool map_update_required = false;
1909 :
2540 kgrittn 1910 ECB : /* Never call this function when old snapshot checking is disabled. */
2540 kgrittn 1911 GIC 3560 : Assert(old_snapshot_threshold >= 0);
1912 :
2529 kgrittn 1913 CBC 3560 : ts = AlignTimestampToMinuteBoundary(whenTaken);
1914 :
2529 kgrittn 1915 ECB : /*
1916 : * Keep track of the latest xmin seen by any process. Update mapping with
1917 : * a new value when we have crossed a bucket boundary.
1918 : */
2557 kgrittn 1919 GIC 3560 : SpinLockAcquire(&oldSnapshotControl->mutex_latest_xmin);
2529 1920 3560 : latest_xmin = oldSnapshotControl->latest_xmin;
2529 kgrittn 1921 CBC 3560 : update_ts = oldSnapshotControl->next_map_update;
1922 3560 : if (ts > update_ts)
2529 kgrittn 1923 ECB : {
2529 kgrittn 1924 CBC 2 : oldSnapshotControl->next_map_update = ts;
2529 kgrittn 1925 GIC 2 : map_update_required = true;
2529 kgrittn 1926 ECB : }
2529 kgrittn 1927 CBC 3560 : if (TransactionIdFollows(xmin, latest_xmin))
2557 kgrittn 1928 GIC 14 : oldSnapshotControl->latest_xmin = xmin;
2557 kgrittn 1929 CBC 3560 : SpinLockRelease(&oldSnapshotControl->mutex_latest_xmin);
2557 kgrittn 1930 ECB :
2529 1931 : /* We only needed to update the most recent xmin value. */
2529 kgrittn 1932 GIC 3560 : if (!map_update_required)
1933 3558 : return;
2529 kgrittn 1934 ECB :
2557 1935 : /* No further tracking needed for 0 (used for testing). */
2557 kgrittn 1936 GIC 2 : if (old_snapshot_threshold == 0)
1937 2 : return;
2557 kgrittn 1938 ECB :
1939 : /*
1940 : * We don't want to do something stupid with unusual values, but we don't
1941 : * want to litter the log with warnings or break otherwise normal
1942 : * processing for this feature; so if something seems unreasonable, just
1943 : * log at DEBUG level and return without doing anything.
1944 : */
2557 kgrittn 1945 UIC 0 : if (whenTaken < 0)
1946 : {
2557 kgrittn 1947 UBC 0 : elog(DEBUG1,
1948 : "MaintainOldSnapshotTimeMapping called with negative whenTaken = %ld",
2557 kgrittn 1949 EUB : (long) whenTaken);
2557 kgrittn 1950 UIC 0 : return;
1951 : }
2557 kgrittn 1952 UBC 0 : if (!TransactionIdIsNormal(xmin))
1953 : {
1954 0 : elog(DEBUG1,
1955 : "MaintainOldSnapshotTimeMapping called with xmin = %lu",
2557 kgrittn 1956 EUB : (unsigned long) xmin);
2557 kgrittn 1957 UIC 0 : return;
1958 : }
2557 kgrittn 1959 EUB :
2557 kgrittn 1960 UIC 0 : LWLockAcquire(OldSnapshotTimeMapLock, LW_EXCLUSIVE);
1961 :
2557 kgrittn 1962 UBC 0 : Assert(oldSnapshotControl->head_offset >= 0);
2536 kgrittn 1963 UIC 0 : Assert(oldSnapshotControl->head_offset < OLD_SNAPSHOT_TIME_MAP_ENTRIES);
2557 kgrittn 1964 UBC 0 : Assert((oldSnapshotControl->head_timestamp % USECS_PER_MINUTE) == 0);
1965 0 : Assert(oldSnapshotControl->count_used >= 0);
2536 1966 0 : Assert(oldSnapshotControl->count_used <= OLD_SNAPSHOT_TIME_MAP_ENTRIES);
2557 kgrittn 1967 EUB :
2557 kgrittn 1968 UBC 0 : if (oldSnapshotControl->count_used == 0)
1969 : {
2557 kgrittn 1970 EUB : /* set up first entry for empty mapping */
2557 kgrittn 1971 UIC 0 : oldSnapshotControl->head_offset = 0;
1972 0 : oldSnapshotControl->head_timestamp = ts;
2557 kgrittn 1973 UBC 0 : oldSnapshotControl->count_used = 1;
1974 0 : oldSnapshotControl->xid_by_minute[0] = xmin;
2557 kgrittn 1975 EUB : }
2557 kgrittn 1976 UBC 0 : else if (ts < oldSnapshotControl->head_timestamp)
1977 : {
2557 kgrittn 1978 EUB : /* old ts; log it at DEBUG */
2557 kgrittn 1979 UIC 0 : LWLockRelease(OldSnapshotTimeMapLock);
1980 0 : elog(DEBUG1,
2557 kgrittn 1981 EUB : "MaintainOldSnapshotTimeMapping called with old whenTaken = %ld",
1982 : (long) whenTaken);
2557 kgrittn 1983 UIC 0 : return;
1984 : }
2557 kgrittn 1985 UBC 0 : else if (ts <= (oldSnapshotControl->head_timestamp +
2557 kgrittn 1986 UIC 0 : ((oldSnapshotControl->count_used - 1)
2557 kgrittn 1987 UBC 0 : * USECS_PER_MINUTE)))
2557 kgrittn 1988 EUB : {
1989 : /* existing mapping; advance xid if possible */
2495 rhaas 1990 UIC 0 : int bucket = (oldSnapshotControl->head_offset
1991 0 : + ((ts - oldSnapshotControl->head_timestamp)
2495 rhaas 1992 UBC 0 : / USECS_PER_MINUTE))
1993 0 : % OLD_SNAPSHOT_TIME_MAP_ENTRIES;
2557 kgrittn 1994 EUB :
2557 kgrittn 1995 UBC 0 : if (TransactionIdPrecedes(oldSnapshotControl->xid_by_minute[bucket], xmin))
2557 kgrittn 1996 UIC 0 : oldSnapshotControl->xid_by_minute[bucket] = xmin;
2557 kgrittn 1997 EUB : }
1998 : else
1999 : {
2000 : /* We need a new bucket, but it might not be the very next one. */
2001 : int distance_to_new_tail;
2002 : int distance_to_current_tail;
2003 : int advance;
2004 :
2005 : /*
2006 : * Our goal is for the new "tail" of the mapping, that is, the entry
2007 : * which is newest and thus furthest from the "head" entry, to
2008 : * correspond to "ts". Since there's one entry per minute, the
2009 : * distance between the current head and the new tail is just the
2010 : * number of minutes of difference between ts and the current
2011 : * head_timestamp.
2012 : *
2013 : * The distance from the current head to the current tail is one less
2014 : * than the number of entries in the mapping, because the entry at the
2015 : * head_offset is for 0 minutes after head_timestamp.
2016 : *
2017 : * The difference between these two values is the number of minutes by
2018 : * which we need to advance the mapping, either adding new entries or
2019 : * rotating old ones out.
2020 : */
927 rhaas 2021 UIC 0 : distance_to_new_tail =
2022 0 : (ts - oldSnapshotControl->head_timestamp) / USECS_PER_MINUTE;
927 rhaas 2023 UBC 0 : distance_to_current_tail =
2024 0 : oldSnapshotControl->count_used - 1;
2025 0 : advance = distance_to_new_tail - distance_to_current_tail;
2026 0 : Assert(advance > 0);
2557 kgrittn 2027 EUB :
2536 kgrittn 2028 UBC 0 : if (advance >= OLD_SNAPSHOT_TIME_MAP_ENTRIES)
2029 : {
2557 kgrittn 2030 EUB : /* Advance is so far that all old data is junk; start over. */
2557 kgrittn 2031 UIC 0 : oldSnapshotControl->head_offset = 0;
2032 0 : oldSnapshotControl->count_used = 1;
2557 kgrittn 2033 UBC 0 : oldSnapshotControl->xid_by_minute[0] = xmin;
927 rhaas 2034 0 : oldSnapshotControl->head_timestamp = ts;
2557 kgrittn 2035 EUB : }
2036 : else
2037 : {
2038 : /* Store the new value in one or more buckets. */
2039 : int i;
2040 :
2557 kgrittn 2041 UIC 0 : for (i = 0; i < advance; i++)
2042 : {
2536 kgrittn 2043 UBC 0 : if (oldSnapshotControl->count_used == OLD_SNAPSHOT_TIME_MAP_ENTRIES)
2044 : {
2557 kgrittn 2045 EUB : /* Map full and new value replaces old head. */
2495 rhaas 2046 UIC 0 : int old_head = oldSnapshotControl->head_offset;
2047 :
2536 kgrittn 2048 UBC 0 : if (old_head == (OLD_SNAPSHOT_TIME_MAP_ENTRIES - 1))
2557 kgrittn 2049 UIC 0 : oldSnapshotControl->head_offset = 0;
2557 kgrittn 2050 EUB : else
2557 kgrittn 2051 UBC 0 : oldSnapshotControl->head_offset = old_head + 1;
2557 kgrittn 2052 UIC 0 : oldSnapshotControl->xid_by_minute[old_head] = xmin;
927 rhaas 2053 UBC 0 : oldSnapshotControl->head_timestamp += USECS_PER_MINUTE;
2557 kgrittn 2054 EUB : }
2055 : else
2056 : {
2057 : /* Extend map to unused entry. */
2495 rhaas 2058 UIC 0 : int new_tail = (oldSnapshotControl->head_offset
2059 0 : + oldSnapshotControl->count_used)
2495 rhaas 2060 UBC 0 : % OLD_SNAPSHOT_TIME_MAP_ENTRIES;
2557 kgrittn 2061 EUB :
2557 kgrittn 2062 UBC 0 : oldSnapshotControl->count_used++;
2557 kgrittn 2063 UIC 0 : oldSnapshotControl->xid_by_minute[new_tail] = xmin;
2557 kgrittn 2064 EUB : }
2065 : }
2066 : }
2067 : }
2068 :
2557 kgrittn 2069 UIC 0 : LWLockRelease(OldSnapshotTimeMapLock);
2070 : }
2557 kgrittn 2071 EUB :
2072 :
2073 : /*
2074 : * Setup a snapshot that replaces normal catalog snapshots that allows catalog
2075 : * access to behave just like it did at a certain point in the past.
2076 : *
2077 : * Needed for logical decoding.
2078 : */
2079 : void
3324 rhaas 2080 GIC 3786 : SetupHistoricSnapshot(Snapshot historic_snapshot, HTAB *tuplecids)
2081 : {
3324 rhaas 2082 CBC 3786 : Assert(historic_snapshot != NULL);
2083 :
3324 rhaas 2084 ECB : /* setup the timetravel snapshot */
3324 rhaas 2085 GIC 3786 : HistoricSnapshot = historic_snapshot;
2086 :
3324 rhaas 2087 ECB : /* setup (cmin, cmax) lookup hash */
3324 rhaas 2088 GIC 3786 : tuplecid_data = tuplecids;
2089 3786 : }
3324 rhaas 2090 ECB :
2091 :
2092 : /*
2093 : * Make catalog snapshots behave normally again.
2094 : */
2095 : void
3324 rhaas 2096 GIC 3780 : TeardownHistoricSnapshot(bool is_error)
2097 : {
3324 rhaas 2098 CBC 3780 : HistoricSnapshot = NULL;
3324 rhaas 2099 GIC 3780 : tuplecid_data = NULL;
3324 rhaas 2100 CBC 3780 : }
3324 rhaas 2101 ECB :
2102 : bool
3324 rhaas 2103 GIC 11078439 : HistoricSnapshotActive(void)
2104 : {
3324 rhaas 2105 CBC 11078439 : return HistoricSnapshot != NULL;
2106 : }
3324 rhaas 2107 ECB :
2108 : HTAB *
3324 rhaas 2109 GIC 601 : HistoricSnapshotGetTupleCids(void)
2110 : {
3324 rhaas 2111 CBC 601 : Assert(HistoricSnapshotActive());
3324 rhaas 2112 GIC 601 : return tuplecid_data;
3324 rhaas 2113 ECB : }
2901 2114 :
2115 : /*
2116 : * EstimateSnapshotSpace
2117 : * Returns the size needed to store the given snapshot.
2118 : *
2119 : * We are exporting only required fields from the Snapshot, stored in
2120 : * SerializedSnapshotData.
2121 : */
2122 : Size
201 pg 2123 GNC 916 : EstimateSnapshotSpace(Snapshot snapshot)
2124 : {
2901 rhaas 2125 ECB : Size size;
2126 :
201 pg 2127 GNC 916 : Assert(snapshot != InvalidSnapshot);
2128 916 : Assert(snapshot->snapshot_type == SNAPSHOT_MVCC);
2901 rhaas 2129 ECB :
2130 : /* We allocate any XID arrays needed in the same palloc block. */
2901 rhaas 2131 GIC 916 : size = add_size(sizeof(SerializedSnapshotData),
201 pg 2132 GNC 916 : mul_size(snapshot->xcnt, sizeof(TransactionId)));
2133 916 : if (snapshot->subxcnt > 0 &&
201 pg 2134 UNC 0 : (!snapshot->suboverflowed || snapshot->takenDuringRecovery))
2901 rhaas 2135 LBC 0 : size = add_size(size,
201 pg 2136 UNC 0 : mul_size(snapshot->subxcnt, sizeof(TransactionId)));
2901 rhaas 2137 EUB :
2901 rhaas 2138 GBC 916 : return size;
2139 : }
2901 rhaas 2140 ECB :
2141 : /*
2142 : * SerializeSnapshot
2143 : * Dumps the serialized snapshot (extracted from given snapshot) onto the
2144 : * memory location at start_address.
2145 : */
2146 : void
2901 rhaas 2147 GIC 890 : SerializeSnapshot(Snapshot snapshot, char *start_address)
2148 : {
2229 noah 2149 ECB : SerializedSnapshotData serialized_snapshot;
2150 :
2901 rhaas 2151 GIC 890 : Assert(snapshot->subxcnt >= 0);
2152 :
2901 rhaas 2153 ECB : /* Copy all required fields */
2229 noah 2154 GIC 890 : serialized_snapshot.xmin = snapshot->xmin;
2155 890 : serialized_snapshot.xmax = snapshot->xmax;
2229 noah 2156 CBC 890 : serialized_snapshot.xcnt = snapshot->xcnt;
2157 890 : serialized_snapshot.subxcnt = snapshot->subxcnt;
2158 890 : serialized_snapshot.suboverflowed = snapshot->suboverflowed;
2159 890 : serialized_snapshot.takenDuringRecovery = snapshot->takenDuringRecovery;
2160 890 : serialized_snapshot.curcid = snapshot->curcid;
2161 890 : serialized_snapshot.whenTaken = snapshot->whenTaken;
2162 890 : serialized_snapshot.lsn = snapshot->lsn;
2901 rhaas 2163 ECB :
2115 simon 2164 : /*
2165 : * Ignore the SubXID array if it has overflowed, unless the snapshot was
2166 : * taken during recovery - in that case, top-level XIDs are in subxip as
2167 : * well, and we mustn't lose them.
2168 : */
2115 simon 2169 GIC 890 : if (serialized_snapshot.suboverflowed && !snapshot->takenDuringRecovery)
2115 simon 2170 UIC 0 : serialized_snapshot.subxcnt = 0;
2115 simon 2171 ECB :
2229 noah 2172 EUB : /* Copy struct to possibly-unaligned buffer */
2229 noah 2173 GIC 890 : memcpy(start_address,
2174 : &serialized_snapshot, sizeof(SerializedSnapshotData));
2901 rhaas 2175 ECB :
2176 : /* Copy XID array */
2901 rhaas 2177 GIC 890 : if (snapshot->xcnt > 0)
2229 noah 2178 302 : memcpy((TransactionId *) (start_address +
2229 noah 2179 ECB : sizeof(SerializedSnapshotData)),
2901 rhaas 2180 CBC 302 : snapshot->xip, snapshot->xcnt * sizeof(TransactionId));
2181 :
2901 rhaas 2182 ECB : /*
2183 : * Copy SubXID array. Don't bother to copy it if it had overflowed,
2184 : * though, because it's not used anywhere in that case. Except if it's a
2185 : * snapshot taken during recovery; all the top-level XIDs are in subxip as
2186 : * well in that case, so we mustn't lose them.
2187 : */
2229 noah 2188 GIC 890 : if (serialized_snapshot.subxcnt > 0)
2189 : {
2878 bruce 2190 LBC 0 : Size subxipoff = sizeof(SerializedSnapshotData) +
2878 bruce 2191 UIC 0 : snapshot->xcnt * sizeof(TransactionId);
2901 rhaas 2192 EUB :
2229 noah 2193 UBC 0 : memcpy((TransactionId *) (start_address + subxipoff),
2901 rhaas 2194 UIC 0 : snapshot->subxip, snapshot->subxcnt * sizeof(TransactionId));
2901 rhaas 2195 EUB : }
2901 rhaas 2196 GBC 890 : }
2197 :
2901 rhaas 2198 ECB : /*
2199 : * RestoreSnapshot
2200 : * Restore a serialized snapshot from the specified address.
2201 : *
2202 : * The copy is palloc'd in TopTransactionContext and has initial refcounts set
2203 : * to 0. The returned snapshot has the copied flag set.
2204 : */
2205 : Snapshot
2901 rhaas 2206 GIC 3383 : RestoreSnapshot(char *start_address)
2207 : {
2229 noah 2208 ECB : SerializedSnapshotData serialized_snapshot;
2209 : Size size;
2210 : Snapshot snapshot;
2211 : TransactionId *serialized_xids;
2212 :
2229 noah 2213 GIC 3383 : memcpy(&serialized_snapshot, start_address,
2214 : sizeof(SerializedSnapshotData));
2901 rhaas 2215 CBC 3383 : serialized_xids = (TransactionId *)
2216 : (start_address + sizeof(SerializedSnapshotData));
2901 rhaas 2217 ECB :
2218 : /* We allocate any XID arrays needed in the same palloc block. */
2901 rhaas 2219 GIC 3383 : size = sizeof(SnapshotData)
2229 noah 2220 3383 : + serialized_snapshot.xcnt * sizeof(TransactionId)
2229 noah 2221 CBC 3383 : + serialized_snapshot.subxcnt * sizeof(TransactionId);
2901 rhaas 2222 ECB :
2223 : /* Copy all required fields */
2901 rhaas 2224 GIC 3383 : snapshot = (Snapshot) MemoryContextAlloc(TopTransactionContext, size);
1539 andres 2225 3383 : snapshot->snapshot_type = SNAPSHOT_MVCC;
2229 noah 2226 CBC 3383 : snapshot->xmin = serialized_snapshot.xmin;
2227 3383 : snapshot->xmax = serialized_snapshot.xmax;
2901 rhaas 2228 3383 : snapshot->xip = NULL;
2229 noah 2229 3383 : snapshot->xcnt = serialized_snapshot.xcnt;
2901 rhaas 2230 3383 : snapshot->subxip = NULL;
2229 noah 2231 3383 : snapshot->subxcnt = serialized_snapshot.subxcnt;
2232 3383 : snapshot->suboverflowed = serialized_snapshot.suboverflowed;
2233 3383 : snapshot->takenDuringRecovery = serialized_snapshot.takenDuringRecovery;
2234 3383 : snapshot->curcid = serialized_snapshot.curcid;
2235 3383 : snapshot->whenTaken = serialized_snapshot.whenTaken;
2236 3383 : snapshot->lsn = serialized_snapshot.lsn;
965 andres 2237 3383 : snapshot->snapXactCompletionCount = 0;
2901 rhaas 2238 ECB :
2239 : /* Copy XIDs, if present. */
2229 noah 2240 GIC 3383 : if (serialized_snapshot.xcnt > 0)
2241 : {
2901 rhaas 2242 CBC 805 : snapshot->xip = (TransactionId *) (snapshot + 1);
2901 rhaas 2243 GIC 805 : memcpy(snapshot->xip, serialized_xids,
2229 noah 2244 CBC 805 : serialized_snapshot.xcnt * sizeof(TransactionId));
2901 rhaas 2245 ECB : }
2246 :
2247 : /* Copy SubXIDs, if present. */
2229 noah 2248 GIC 3383 : if (serialized_snapshot.subxcnt > 0)
2249 : {
2473 rhaas 2250 LBC 0 : snapshot->subxip = ((TransactionId *) (snapshot + 1)) +
2229 noah 2251 UIC 0 : serialized_snapshot.xcnt;
2229 noah 2252 UBC 0 : memcpy(snapshot->subxip, serialized_xids + serialized_snapshot.xcnt,
2253 0 : serialized_snapshot.subxcnt * sizeof(TransactionId));
2901 rhaas 2254 EUB : }
2255 :
2256 : /* Set the copied flag so that the caller will set refcounts correctly. */
2901 rhaas 2257 GIC 3383 : snapshot->regd_count = 0;
2258 3383 : snapshot->active_count = 0;
2901 rhaas 2259 CBC 3383 : snapshot->copied = true;
2901 rhaas 2260 ECB :
2901 rhaas 2261 CBC 3383 : return snapshot;
2262 : }
2901 rhaas 2263 ECB :
2264 : /*
2265 : * Install a restored snapshot as the transaction snapshot.
2266 : *
2267 : * The second argument is of type void * so that snapmgr.h need not include
2268 : * the declaration for PGPROC.
2269 : */
2270 : void
1028 andres 2271 GIC 1453 : RestoreTransactionSnapshot(Snapshot snapshot, void *source_pgproc)
2272 : {
1028 andres 2273 CBC 1453 : SetTransactionSnapshot(snapshot, NULL, InvalidPid, source_pgproc);
2901 rhaas 2274 GIC 1453 : }
1539 andres 2275 ECB :
2276 : /*
2277 : * XidInMVCCSnapshot
2278 : * Is the given XID still-in-progress according to the snapshot?
2279 : *
2280 : * Note: GetSnapshotData never stores either top xid or subxids of our own
2281 : * backend into a snapshot, so these xids will not be reported as "running"
2282 : * by this function. This is OK for current uses, because we always check
2283 : * TransactionIdIsCurrentTransactionId first, except when it's known the
2284 : * XID could not be ours anyway.
2285 : */
2286 : bool
1539 andres 2287 GIC 193368061 : XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
2288 : {
2289 : /*
2290 : * Make a quick range check to eliminate most XIDs without looking at the
2291 : * xip arrays. Note that this is OK even if we convert a subxact XID to
2292 : * its parent below, because a subxact with XID < xmin has surely also got
2293 : * a parent with XID < xmin, while one with XID >= xmax must belong to a
2294 : * parent that was not yet committed at the time of this snapshot.
2295 : */
2296 :
2297 : /* Any xid < xmin is not in-progress */
1539 andres 2298 CBC 193368061 : if (TransactionIdPrecedes(xid, snapshot->xmin))
2299 190172414 : return false;
2300 : /* Any xid >= xmax is in-progress */
2301 3195647 : if (TransactionIdFollowsOrEquals(xid, snapshot->xmax))
2302 9341 : return true;
2303 :
2304 : /*
2305 : * Snapshot information is stored slightly differently in snapshots taken
2306 : * during recovery.
2307 : */
2308 3186306 : if (!snapshot->takenDuringRecovery)
2309 : {
2310 : /*
2311 : * If the snapshot contains full subxact data, the fastest way to
2312 : * check things is just to compare the given XID against both subxact
2313 : * XIDs and top-level XIDs. If the snapshot overflowed, we have to
2314 : * use pg_subtrans to convert a subxact XID to its parent XID, but
2315 : * then we need only look at top-level XIDs not subxacts.
2316 : */
2317 3186303 : if (!snapshot->suboverflowed)
2318 : {
2319 : /* we have full data, so search subxip */
249 john.naylor 2320 GNC 3185953 : if (pg_lfind32(xid, snapshot->subxip, snapshot->subxcnt))
2321 230 : return true;
2322 :
2323 : /* not there, fall through to search xip[] */
2324 : }
2325 : else
1539 andres 2326 ECB : {
2327 : /*
2328 : * Snapshot overflowed, so convert xid to top-level. This is safe
2329 : * because we eliminated too-old XIDs above.
2330 : */
1539 andres 2331 GIC 350 : xid = SubTransGetTopmostTransaction(xid);
2332 :
1539 andres 2333 ECB : /*
1539 andres 2334 EUB : * If xid was indeed a subxact, we might now have an xid < xmin,
2335 : * so recheck to avoid an array scan. No point in rechecking
2336 : * xmax.
1539 andres 2337 ECB : */
1539 andres 2338 CBC 350 : if (TransactionIdPrecedes(xid, snapshot->xmin))
1539 andres 2339 UIC 0 : return false;
2340 : }
2341 :
249 john.naylor 2342 GNC 3186073 : if (pg_lfind32(xid, snapshot->xip, snapshot->xcnt))
2343 5537 : return true;
2344 : }
2345 : else
1539 andres 2346 ECB : {
2347 : /*
2348 : * In recovery we store all xids in the subxip array because it is by
2349 : * far the bigger array, and we mostly don't know which xids are
1539 andres 2350 EUB : * top-level and which are subxacts. The xip array is empty.
2351 : *
2352 : * We start by searching subtrans, if we overflowed.
2353 : */
1539 andres 2354 GIC 3 : if (snapshot->suboverflowed)
2355 : {
2356 : /*
1539 andres 2357 EUB : * Snapshot overflowed, so convert xid to top-level. This is safe
2358 : * because we eliminated too-old XIDs above.
2359 : */
1539 andres 2360 UIC 0 : xid = SubTransGetTopmostTransaction(xid);
2361 :
2362 : /*
2363 : * If xid was indeed a subxact, we might now have an xid < xmin,
2364 : * so recheck to avoid an array scan. No point in rechecking
2365 : * xmax.
1539 andres 2366 ECB : */
1539 andres 2367 LBC 0 : if (TransactionIdPrecedes(xid, snapshot->xmin))
1539 andres 2368 UIC 0 : return false;
2369 : }
1539 andres 2370 ECB :
2371 : /*
2372 : * We now have either a top-level xid higher than xmin or an
2373 : * indeterminate xid. We don't know whether it's top level or subxact
2374 : * but it doesn't matter. If it's present, the xid is visible.
2375 : */
249 john.naylor 2376 GNC 3 : if (pg_lfind32(xid, snapshot->subxip, snapshot->subxcnt))
2377 2 : return true;
2378 : }
2379 :
1539 andres 2380 GIC 3180537 : return false;
2381 : }
|