Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * lwlock.c
4 : * Lightweight lock manager
5 : *
6 : * Lightweight locks are intended primarily to provide mutual exclusion of
7 : * access to shared-memory data structures. Therefore, they offer both
8 : * exclusive and shared lock modes (to support read/write and read-only
9 : * access to a shared object). There are few other frammishes. User-level
10 : * locking should be done with the full lock manager --- which depends on
11 : * LWLocks to protect its shared state.
12 : *
13 : * In addition to exclusive and shared modes, lightweight locks can be used to
14 : * wait until a variable changes value. The variable is initially not set
15 : * when the lock is acquired with LWLockAcquire, i.e. it remains set to the
16 : * value it was set to when the lock was released last, and can be updated
17 : * without releasing the lock by calling LWLockUpdateVar. LWLockWaitForVar
18 : * waits for the variable to be updated, or until the lock is free. When
19 : * releasing the lock with LWLockReleaseClearVar() the value can be set to an
20 : * appropriate value for a free lock. The meaning of the variable is up to
21 : * the caller, the lightweight lock code just assigns and compares it.
22 : *
23 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
24 : * Portions Copyright (c) 1994, Regents of the University of California
25 : *
26 : * IDENTIFICATION
27 : * src/backend/storage/lmgr/lwlock.c
28 : *
29 : * NOTES:
30 : *
31 : * This used to be a pretty straight forward reader-writer lock
32 : * implementation, in which the internal state was protected by a
33 : * spinlock. Unfortunately the overhead of taking the spinlock proved to be
34 : * too high for workloads/locks that were taken in shared mode very
35 : * frequently. Often we were spinning in the (obviously exclusive) spinlock,
36 : * while trying to acquire a shared lock that was actually free.
37 : *
38 : * Thus a new implementation was devised that provides wait-free shared lock
39 : * acquisition for locks that aren't exclusively locked.
40 : *
41 : * The basic idea is to have a single atomic variable 'lockcount' instead of
42 : * the formerly separate shared and exclusive counters and to use atomic
43 : * operations to acquire the lock. That's fairly easy to do for plain
44 : * rw-spinlocks, but a lot harder for something like LWLocks that want to wait
45 : * in the OS.
46 : *
47 : * For lock acquisition we use an atomic compare-and-exchange on the lockcount
48 : * variable. For exclusive lock we swap in a sentinel value
49 : * (LW_VAL_EXCLUSIVE), for shared locks we count the number of holders.
50 : *
51 : * To release the lock we use an atomic decrement to release the lock. If the
52 : * new value is zero (we get that atomically), we know we can/have to release
53 : * waiters.
54 : *
55 : * Obviously it is important that the sentinel value for exclusive locks
56 : * doesn't conflict with the maximum number of possible share lockers -
57 : * luckily MAX_BACKENDS makes that easily possible.
58 : *
59 : *
60 : * The attentive reader might have noticed that naively doing the above has a
61 : * glaring race condition: We try to lock using the atomic operations and
62 : * notice that we have to wait. Unfortunately by the time we have finished
63 : * queuing, the former locker very well might have already finished it's
64 : * work. That's problematic because we're now stuck waiting inside the OS.
65 :
66 : * To mitigate those races we use a two phased attempt at locking:
67 : * Phase 1: Try to do it atomically, if we succeed, nice
68 : * Phase 2: Add ourselves to the waitqueue of the lock
69 : * Phase 3: Try to grab the lock again, if we succeed, remove ourselves from
70 : * the queue
71 : * Phase 4: Sleep till wake-up, goto Phase 1
72 : *
73 : * This protects us against the problem from above as nobody can release too
74 : * quick, before we're queued, since after Phase 2 we're already queued.
75 : * -------------------------------------------------------------------------
76 : */
77 : #include "postgres.h"
78 :
79 : #include "miscadmin.h"
80 : #include "pg_trace.h"
81 : #include "pgstat.h"
82 : #include "port/pg_bitutils.h"
83 : #include "postmaster/postmaster.h"
84 : #include "replication/slot.h"
85 : #include "storage/ipc.h"
86 : #include "storage/predicate.h"
87 : #include "storage/proc.h"
88 : #include "storage/proclist.h"
89 : #include "storage/spin.h"
90 : #include "utils/memutils.h"
91 :
92 : #ifdef LWLOCK_STATS
93 : #include "utils/hsearch.h"
94 : #endif
95 :
96 :
97 : /* We use the ShmemLock spinlock to protect LWLockCounter */
98 : extern slock_t *ShmemLock;
99 :
100 : #define LW_FLAG_HAS_WAITERS ((uint32) 1 << 30)
101 : #define LW_FLAG_RELEASE_OK ((uint32) 1 << 29)
102 : #define LW_FLAG_LOCKED ((uint32) 1 << 28)
103 :
104 : #define LW_VAL_EXCLUSIVE ((uint32) 1 << 24)
105 : #define LW_VAL_SHARED 1
106 :
107 : #define LW_LOCK_MASK ((uint32) ((1 << 25)-1))
108 : /* Must be greater than MAX_BACKENDS - which is 2^23-1, so we're fine. */
109 : #define LW_SHARED_MASK ((uint32) ((1 << 24)-1))
110 :
111 : StaticAssertDecl(LW_VAL_EXCLUSIVE > (uint32) MAX_BACKENDS,
112 : "MAX_BACKENDS too big for lwlock.c");
113 :
114 : /*
115 : * There are three sorts of LWLock "tranches":
116 : *
117 : * 1. The individually-named locks defined in lwlocknames.h each have their
118 : * own tranche. The names of these tranches appear in IndividualLWLockNames[]
119 : * in lwlocknames.c.
120 : *
121 : * 2. There are some predefined tranches for built-in groups of locks.
122 : * These are listed in enum BuiltinTrancheIds in lwlock.h, and their names
123 : * appear in BuiltinTrancheNames[] below.
124 : *
125 : * 3. Extensions can create new tranches, via either RequestNamedLWLockTranche
126 : * or LWLockRegisterTranche. The names of these that are known in the current
127 : * process appear in LWLockTrancheNames[].
128 : *
129 : * All these names are user-visible as wait event names, so choose with care
130 : * ... and do not forget to update the documentation's list of wait events.
131 : */
132 : extern const char *const IndividualLWLockNames[]; /* in lwlocknames.c */
133 :
134 : static const char *const BuiltinTrancheNames[] = {
135 : /* LWTRANCHE_XACT_BUFFER: */
136 : "XactBuffer",
137 : /* LWTRANCHE_COMMITTS_BUFFER: */
138 : "CommitTSBuffer",
139 : /* LWTRANCHE_SUBTRANS_BUFFER: */
140 : "SubtransBuffer",
141 : /* LWTRANCHE_MULTIXACTOFFSET_BUFFER: */
142 : "MultiXactOffsetBuffer",
143 : /* LWTRANCHE_MULTIXACTMEMBER_BUFFER: */
144 : "MultiXactMemberBuffer",
145 : /* LWTRANCHE_NOTIFY_BUFFER: */
146 : "NotifyBuffer",
147 : /* LWTRANCHE_SERIAL_BUFFER: */
148 : "SerialBuffer",
149 : /* LWTRANCHE_WAL_INSERT: */
150 : "WALInsert",
151 : /* LWTRANCHE_BUFFER_CONTENT: */
152 : "BufferContent",
153 : /* LWTRANCHE_REPLICATION_ORIGIN_STATE: */
154 : "ReplicationOriginState",
155 : /* LWTRANCHE_REPLICATION_SLOT_IO: */
156 : "ReplicationSlotIO",
157 : /* LWTRANCHE_LOCK_FASTPATH: */
158 : "LockFastPath",
159 : /* LWTRANCHE_BUFFER_MAPPING: */
160 : "BufferMapping",
161 : /* LWTRANCHE_LOCK_MANAGER: */
162 : "LockManager",
163 : /* LWTRANCHE_PREDICATE_LOCK_MANAGER: */
164 : "PredicateLockManager",
165 : /* LWTRANCHE_PARALLEL_HASH_JOIN: */
166 : "ParallelHashJoin",
167 : /* LWTRANCHE_PARALLEL_QUERY_DSA: */
168 : "ParallelQueryDSA",
169 : /* LWTRANCHE_PER_SESSION_DSA: */
170 : "PerSessionDSA",
171 : /* LWTRANCHE_PER_SESSION_RECORD_TYPE: */
172 : "PerSessionRecordType",
173 : /* LWTRANCHE_PER_SESSION_RECORD_TYPMOD: */
174 : "PerSessionRecordTypmod",
175 : /* LWTRANCHE_SHARED_TUPLESTORE: */
176 : "SharedTupleStore",
177 : /* LWTRANCHE_SHARED_TIDBITMAP: */
178 : "SharedTidBitmap",
179 : /* LWTRANCHE_PARALLEL_APPEND: */
180 : "ParallelAppend",
181 : /* LWTRANCHE_PER_XACT_PREDICATE_LIST: */
182 : "PerXactPredicateList",
183 : /* LWTRANCHE_PGSTATS_DSA: */
184 : "PgStatsDSA",
185 : /* LWTRANCHE_PGSTATS_HASH: */
186 : "PgStatsHash",
187 : /* LWTRANCHE_PGSTATS_DATA: */
188 : "PgStatsData",
189 : /* LWTRANCHE_LAUNCHER_DSA: */
190 : "LogicalRepLauncherDSA",
191 : /* LWTRANCHE_LAUNCHER_HASH: */
192 : "LogicalRepLauncherHash",
193 : };
194 :
195 : StaticAssertDecl(lengthof(BuiltinTrancheNames) ==
196 : LWTRANCHE_FIRST_USER_DEFINED - NUM_INDIVIDUAL_LWLOCKS,
197 : "missing entries in BuiltinTrancheNames[]");
198 :
199 : /*
200 : * This is indexed by tranche ID minus LWTRANCHE_FIRST_USER_DEFINED, and
201 : * stores the names of all dynamically-created tranches known to the current
202 : * process. Any unused entries in the array will contain NULL.
203 : */
204 : static const char **LWLockTrancheNames = NULL;
205 : static int LWLockTrancheNamesAllocated = 0;
206 :
207 : /*
208 : * This points to the main array of LWLocks in shared memory. Backends inherit
209 : * the pointer by fork from the postmaster (except in the EXEC_BACKEND case,
210 : * where we have special measures to pass it down).
211 : */
212 : LWLockPadded *MainLWLockArray = NULL;
213 :
214 : /*
215 : * We use this structure to keep track of locked LWLocks for release
216 : * during error recovery. Normally, only a few will be held at once, but
217 : * occasionally the number can be much higher; for example, the pg_buffercache
218 : * extension locks all buffer partitions simultaneously.
219 : */
220 : #define MAX_SIMUL_LWLOCKS 200
221 :
222 : /* struct representing the LWLocks we're holding */
223 : typedef struct LWLockHandle
224 : {
225 : LWLock *lock;
226 : LWLockMode mode;
227 : } LWLockHandle;
228 :
229 : static int num_held_lwlocks = 0;
230 : static LWLockHandle held_lwlocks[MAX_SIMUL_LWLOCKS];
231 :
232 : /* struct representing the LWLock tranche request for named tranche */
233 : typedef struct NamedLWLockTrancheRequest
234 : {
235 : char tranche_name[NAMEDATALEN];
236 : int num_lwlocks;
237 : } NamedLWLockTrancheRequest;
238 :
239 : static NamedLWLockTrancheRequest *NamedLWLockTrancheRequestArray = NULL;
240 : static int NamedLWLockTrancheRequestsAllocated = 0;
241 :
242 : /*
243 : * NamedLWLockTrancheRequests is both the valid length of the request array,
244 : * and the length of the shared-memory NamedLWLockTrancheArray later on.
245 : * This variable and NamedLWLockTrancheArray are non-static so that
246 : * postmaster.c can copy them to child processes in EXEC_BACKEND builds.
247 : */
248 : int NamedLWLockTrancheRequests = 0;
249 :
250 : /* points to data in shared memory: */
251 : NamedLWLockTranche *NamedLWLockTrancheArray = NULL;
252 :
253 : static void InitializeLWLocks(void);
254 : static inline void LWLockReportWaitStart(LWLock *lock);
255 : static inline void LWLockReportWaitEnd(void);
256 : static const char *GetLWTrancheName(uint16 trancheId);
257 :
258 : #define T_NAME(lock) \
259 : GetLWTrancheName((lock)->tranche)
260 :
261 : #ifdef LWLOCK_STATS
262 : typedef struct lwlock_stats_key
263 : {
264 : int tranche;
265 : void *instance;
266 : } lwlock_stats_key;
267 :
268 : typedef struct lwlock_stats
269 : {
270 : lwlock_stats_key key;
271 : int sh_acquire_count;
272 : int ex_acquire_count;
273 : int block_count;
274 : int dequeue_self_count;
275 : int spin_delay_count;
276 : } lwlock_stats;
277 :
278 : static HTAB *lwlock_stats_htab;
279 : static lwlock_stats lwlock_stats_dummy;
280 : #endif
281 :
282 : #ifdef LOCK_DEBUG
283 : bool Trace_lwlocks = false;
284 :
285 : inline static void
286 : PRINT_LWDEBUG(const char *where, LWLock *lock, LWLockMode mode)
287 : {
288 : /* hide statement & context here, otherwise the log is just too verbose */
289 : if (Trace_lwlocks)
290 : {
291 : uint32 state = pg_atomic_read_u32(&lock->state);
292 :
293 : ereport(LOG,
294 : (errhidestmt(true),
295 : errhidecontext(true),
296 : errmsg_internal("%d: %s(%s %p): excl %u shared %u haswaiters %u waiters %u rOK %d",
297 : MyProcPid,
298 : where, T_NAME(lock), lock,
299 : (state & LW_VAL_EXCLUSIVE) != 0,
300 : state & LW_SHARED_MASK,
301 : (state & LW_FLAG_HAS_WAITERS) != 0,
302 : pg_atomic_read_u32(&lock->nwaiters),
303 : (state & LW_FLAG_RELEASE_OK) != 0)));
304 : }
305 : }
306 :
307 : inline static void
308 : LOG_LWDEBUG(const char *where, LWLock *lock, const char *msg)
309 : {
310 : /* hide statement & context here, otherwise the log is just too verbose */
311 : if (Trace_lwlocks)
312 : {
313 : ereport(LOG,
314 : (errhidestmt(true),
315 : errhidecontext(true),
316 : errmsg_internal("%s(%s %p): %s", where,
317 : T_NAME(lock), lock, msg)));
318 : }
319 : }
320 :
321 : #else /* not LOCK_DEBUG */
322 : #define PRINT_LWDEBUG(a,b,c) ((void)0)
323 : #define LOG_LWDEBUG(a,b,c) ((void)0)
324 : #endif /* LOCK_DEBUG */
325 :
326 : #ifdef LWLOCK_STATS
327 :
328 : static void init_lwlock_stats(void);
329 : static void print_lwlock_stats(int code, Datum arg);
330 : static lwlock_stats * get_lwlock_stats_entry(LWLock *lock);
331 :
332 : static void
333 : init_lwlock_stats(void)
334 : {
335 : HASHCTL ctl;
336 : static MemoryContext lwlock_stats_cxt = NULL;
337 : static bool exit_registered = false;
338 :
339 : if (lwlock_stats_cxt != NULL)
340 : MemoryContextDelete(lwlock_stats_cxt);
341 :
342 : /*
343 : * The LWLock stats will be updated within a critical section, which
344 : * requires allocating new hash entries. Allocations within a critical
345 : * section are normally not allowed because running out of memory would
346 : * lead to a PANIC, but LWLOCK_STATS is debugging code that's not normally
347 : * turned on in production, so that's an acceptable risk. The hash entries
348 : * are small, so the risk of running out of memory is minimal in practice.
349 : */
350 : lwlock_stats_cxt = AllocSetContextCreate(TopMemoryContext,
351 : "LWLock stats",
352 : ALLOCSET_DEFAULT_SIZES);
353 : MemoryContextAllowInCriticalSection(lwlock_stats_cxt, true);
354 :
355 : ctl.keysize = sizeof(lwlock_stats_key);
356 : ctl.entrysize = sizeof(lwlock_stats);
357 : ctl.hcxt = lwlock_stats_cxt;
358 : lwlock_stats_htab = hash_create("lwlock stats", 16384, &ctl,
359 : HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
360 : if (!exit_registered)
361 : {
362 : on_shmem_exit(print_lwlock_stats, 0);
363 : exit_registered = true;
364 : }
365 : }
366 :
367 : static void
368 : print_lwlock_stats(int code, Datum arg)
369 : {
370 : HASH_SEQ_STATUS scan;
371 : lwlock_stats *lwstats;
372 :
373 : hash_seq_init(&scan, lwlock_stats_htab);
374 :
375 : /* Grab an LWLock to keep different backends from mixing reports */
376 : LWLockAcquire(&MainLWLockArray[0].lock, LW_EXCLUSIVE);
377 :
378 : while ((lwstats = (lwlock_stats *) hash_seq_search(&scan)) != NULL)
379 : {
380 : fprintf(stderr,
381 : "PID %d lwlock %s %p: shacq %u exacq %u blk %u spindelay %u dequeue self %u\n",
382 : MyProcPid, GetLWTrancheName(lwstats->key.tranche),
383 : lwstats->key.instance, lwstats->sh_acquire_count,
384 : lwstats->ex_acquire_count, lwstats->block_count,
385 : lwstats->spin_delay_count, lwstats->dequeue_self_count);
386 : }
387 :
388 : LWLockRelease(&MainLWLockArray[0].lock);
389 : }
390 :
391 : static lwlock_stats *
392 : get_lwlock_stats_entry(LWLock *lock)
393 : {
394 : lwlock_stats_key key;
395 : lwlock_stats *lwstats;
396 : bool found;
397 :
398 : /*
399 : * During shared memory initialization, the hash table doesn't exist yet.
400 : * Stats of that phase aren't very interesting, so just collect operations
401 : * on all locks in a single dummy entry.
402 : */
403 : if (lwlock_stats_htab == NULL)
404 : return &lwlock_stats_dummy;
405 :
406 : /* Fetch or create the entry. */
407 : MemSet(&key, 0, sizeof(key));
408 : key.tranche = lock->tranche;
409 : key.instance = lock;
410 : lwstats = hash_search(lwlock_stats_htab, &key, HASH_ENTER, &found);
411 : if (!found)
412 : {
413 : lwstats->sh_acquire_count = 0;
414 : lwstats->ex_acquire_count = 0;
415 : lwstats->block_count = 0;
416 : lwstats->dequeue_self_count = 0;
417 : lwstats->spin_delay_count = 0;
418 : }
419 : return lwstats;
420 : }
421 : #endif /* LWLOCK_STATS */
422 :
423 :
424 : /*
425 : * Compute number of LWLocks required by named tranches. These will be
426 : * allocated in the main array.
427 : */
428 : static int
1060 tgl 429 GIC 6390 : NumLWLocksForNamedTranches(void)
430 : {
2621 rhaas 431 6390 : int numLocks = 0;
432 : int i;
433 :
434 6402 : for (i = 0; i < NamedLWLockTrancheRequests; i++)
435 12 : numLocks += NamedLWLockTrancheRequestArray[i].num_lwlocks;
2621 rhaas 436 ECB :
2621 rhaas 437 GIC 6390 : return numLocks;
2621 rhaas 438 ECB : }
439 :
440 : /*
441 : * Compute shmem space needed for LWLocks and named tranches.
7862 tgl 442 : */
443 : Size
7862 tgl 444 CBC 4564 : LWLockShmemSize(void)
445 : {
446 : Size size;
447 : int i;
2615 rhaas 448 GIC 4564 : int numLocks = NUM_FIXED_LWLOCKS;
449 :
450 : /* Calculate total number of locks needed in the main array. */
1060 tgl 451 CBC 4564 : numLocks += NumLWLocksForNamedTranches();
452 :
453 : /* Space for the LWLock array. */
6414 tgl 454 GIC 4564 : size = mul_size(numLocks, sizeof(LWLockPadded));
6441 tgl 455 ECB :
456 : /* Space for dynamic allocation counter, plus room for alignment. */
2615 rhaas 457 GIC 4564 : size = add_size(size, sizeof(int) + LWLOCK_PADDED_SIZE);
7862 tgl 458 ECB :
459 : /* space for named tranches. */
2621 rhaas 460 GIC 4564 : size = add_size(size, mul_size(NamedLWLockTrancheRequests, sizeof(NamedLWLockTranche)));
2621 rhaas 461 ECB :
462 : /* space for name of each tranche. */
2621 rhaas 463 GIC 4573 : for (i = 0; i < NamedLWLockTrancheRequests; i++)
2621 rhaas 464 CBC 9 : size = add_size(size, strlen(NamedLWLockTrancheRequestArray[i].tranche_name) + 1);
465 :
6441 tgl 466 GIC 4564 : return size;
7862 tgl 467 ECB : }
468 :
469 : /*
2614 rhaas 470 : * Allocate shmem space for the main LWLock array and all tranches and
1060 tgl 471 : * initialize it. We also register extension LWLock tranches here.
472 : */
7862 473 : void
7862 tgl 474 GIC 1826 : CreateLWLocks(void)
475 : {
3359 rhaas 476 1826 : if (!IsUnderPostmaster)
3359 rhaas 477 ECB : {
3359 rhaas 478 GIC 1826 : Size spaceLocks = LWLockShmemSize();
3359 rhaas 479 ECB : int *LWLockCounter;
480 : char *ptr;
481 :
482 : /* Allocate space */
3359 rhaas 483 GIC 1826 : ptr = (char *) ShmemAlloc(spaceLocks);
6414 tgl 484 ECB :
485 : /* Leave room for dynamic allocation of tranches */
2615 rhaas 486 GIC 1826 : ptr += sizeof(int);
6393 tgl 487 ECB :
488 : /* Ensure desired alignment of LWLock array */
3359 rhaas 489 GIC 1826 : ptr += LWLOCK_PADDED_SIZE - ((uintptr_t) ptr) % LWLOCK_PADDED_SIZE;
6414 tgl 490 ECB :
3359 rhaas 491 GIC 1826 : MainLWLockArray = (LWLockPadded *) ptr;
7862 tgl 492 ECB :
493 : /*
494 : * Initialize the dynamic-allocation counter for tranches, which is
495 : * stored just before the first LWLock.
496 : */
2615 rhaas 497 GIC 1826 : LWLockCounter = (int *) ((char *) MainLWLockArray - sizeof(int));
2615 rhaas 498 CBC 1826 : *LWLockCounter = LWTRANCHE_FIRST_USER_DEFINED;
2621 rhaas 499 ECB :
500 : /* Initialize all LWLocks */
2614 rhaas 501 GIC 1826 : InitializeLWLocks();
2614 rhaas 502 ECB : }
503 :
504 : /* Register named extension LWLock tranches in the current process. */
1060 tgl 505 GIC 1829 : for (int i = 0; i < NamedLWLockTrancheRequests; i++)
1060 tgl 506 CBC 3 : LWLockRegisterTranche(NamedLWLockTrancheArray[i].trancheId,
507 3 : NamedLWLockTrancheArray[i].trancheName);
2614 rhaas 508 1826 : }
2614 rhaas 509 ECB :
510 : /*
511 : * Initialize LWLocks that are fixed and those belonging to named tranches.
512 : */
513 : static void
2614 rhaas 514 GIC 1826 : InitializeLWLocks(void)
2614 rhaas 515 ECB : {
1060 tgl 516 GIC 1826 : int numNamedLocks = NumLWLocksForNamedTranches();
2614 rhaas 517 ECB : int id;
518 : int i;
519 : int j;
520 : LWLockPadded *lock;
521 :
522 : /* Initialize all individual LWLocks in main array */
2614 rhaas 523 GIC 89474 : for (id = 0, lock = MainLWLockArray; id < NUM_INDIVIDUAL_LWLOCKS; id++, lock++)
2305 rhaas 524 CBC 87648 : LWLockInitialize(&lock->lock, id);
2614 rhaas 525 ECB :
526 : /* Initialize buffer mapping LWLocks in main array */
866 michael 527 GIC 1826 : lock = MainLWLockArray + BUFFER_MAPPING_LWLOCK_OFFSET;
2614 rhaas 528 CBC 235554 : for (id = 0; id < NUM_BUFFER_PARTITIONS; id++, lock++)
529 233728 : LWLockInitialize(&lock->lock, LWTRANCHE_BUFFER_MAPPING);
2614 rhaas 530 ECB :
531 : /* Initialize lmgrs' LWLocks in main array */
866 michael 532 GIC 1826 : lock = MainLWLockArray + LOCK_MANAGER_LWLOCK_OFFSET;
2614 rhaas 533 CBC 31042 : for (id = 0; id < NUM_LOCK_PARTITIONS; id++, lock++)
534 29216 : LWLockInitialize(&lock->lock, LWTRANCHE_LOCK_MANAGER);
2614 rhaas 535 ECB :
536 : /* Initialize predicate lmgrs' LWLocks in main array */
866 michael 537 GIC 1826 : lock = MainLWLockArray + PREDICATELOCK_MANAGER_LWLOCK_OFFSET;
2614 rhaas 538 CBC 31042 : for (id = 0; id < NUM_PREDICATELOCK_PARTITIONS; id++, lock++)
539 29216 : LWLockInitialize(&lock->lock, LWTRANCHE_PREDICATE_LOCK_MANAGER);
2614 rhaas 540 ECB :
541 : /*
542 : * Copy the info about any named tranches into shared memory (so that
543 : * other processes can see it), and initialize the requested LWLocks.
544 : */
2614 rhaas 545 GIC 1826 : if (NamedLWLockTrancheRequests > 0)
2614 rhaas 546 ECB : {
547 : char *trancheNames;
548 :
2614 rhaas 549 GIC 3 : NamedLWLockTrancheArray = (NamedLWLockTranche *)
2614 rhaas 550 CBC 3 : &MainLWLockArray[NUM_FIXED_LWLOCKS + numNamedLocks];
2614 rhaas 551 ECB :
2614 rhaas 552 GIC 3 : trancheNames = (char *) NamedLWLockTrancheArray +
2614 rhaas 553 CBC 3 : (NamedLWLockTrancheRequests * sizeof(NamedLWLockTranche));
554 3 : lock = &MainLWLockArray[NUM_FIXED_LWLOCKS];
2614 rhaas 555 ECB :
2614 rhaas 556 GIC 6 : for (i = 0; i < NamedLWLockTrancheRequests; i++)
2621 rhaas 557 ECB : {
558 : NamedLWLockTrancheRequest *request;
559 : NamedLWLockTranche *tranche;
560 : char *name;
561 :
2614 rhaas 562 GIC 3 : request = &NamedLWLockTrancheRequestArray[i];
2614 rhaas 563 CBC 3 : tranche = &NamedLWLockTrancheArray[i];
2614 rhaas 564 ECB :
2614 rhaas 565 GIC 3 : name = trancheNames;
2614 rhaas 566 CBC 3 : trancheNames += strlen(request->tranche_name) + 1;
567 3 : strcpy(name, request->tranche_name);
568 3 : tranche->trancheId = LWLockNewTrancheId();
2305 569 3 : tranche->trancheName = name;
2614 rhaas 570 ECB :
2614 rhaas 571 GIC 6 : for (j = 0; j < request->num_lwlocks; j++, lock++)
2614 rhaas 572 CBC 3 : LWLockInitialize(&lock->lock, tranche->trancheId);
2621 rhaas 573 ECB : }
574 : }
2614 rhaas 575 GIC 1826 : }
2614 rhaas 576 ECB :
577 : /*
578 : * InitLWLockAccess - initialize backend-local state needed to hold LWLocks
579 : */
580 : void
3205 heikki.linnakangas 581 GIC 11510 : InitLWLockAccess(void)
3205 heikki.linnakangas 582 ECB : {
583 : #ifdef LWLOCK_STATS
584 : init_lwlock_stats();
585 : #endif
3205 heikki.linnakangas 586 GIC 11510 : }
7862 tgl 587 ECB :
588 : /*
589 : * GetNamedLWLockTranche - returns the base address of LWLock from the
590 : * specified tranche.
591 : *
592 : * Caller needs to retrieve the requested number of LWLocks starting from
593 : * the base lock address returned by this API. This can be used for
594 : * tranches that are requested by using RequestNamedLWLockTranche() API.
595 : */
596 : LWLockPadded *
2621 rhaas 597 GIC 3 : GetNamedLWLockTranche(const char *tranche_name)
2621 rhaas 598 ECB : {
599 : int lock_pos;
600 : int i;
601 :
602 : /*
603 : * Obtain the position of base address of LWLock belonging to requested
604 : * tranche_name in MainLWLockArray. LWLocks for named tranches are placed
605 : * in MainLWLockArray after fixed locks.
606 : */
2615 rhaas 607 GIC 3 : lock_pos = NUM_FIXED_LWLOCKS;
2621 rhaas 608 CBC 3 : for (i = 0; i < NamedLWLockTrancheRequests; i++)
2621 rhaas 609 ECB : {
2621 rhaas 610 GIC 3 : if (strcmp(NamedLWLockTrancheRequestArray[i].tranche_name,
2621 rhaas 611 ECB : tranche_name) == 0)
2621 rhaas 612 GIC 3 : return &MainLWLockArray[lock_pos];
2621 rhaas 613 ECB :
2621 rhaas 614 UIC 0 : lock_pos += NamedLWLockTrancheRequestArray[i].num_lwlocks;
2621 rhaas 615 EUB : }
616 :
1060 tgl 617 UIC 0 : elog(ERROR, "requested tranche is not registered");
2621 rhaas 618 EUB :
619 : /* just to keep compiler quiet */
620 : return NULL;
621 : }
622 :
623 : /*
624 : * Allocate a new tranche ID.
625 : */
626 : int
3359 rhaas 627 GIC 6 : LWLockNewTrancheId(void)
3359 rhaas 628 ECB : {
629 : int result;
630 : int *LWLockCounter;
631 :
2615 rhaas 632 GIC 6 : LWLockCounter = (int *) ((char *) MainLWLockArray - sizeof(int));
3359 rhaas 633 CBC 6 : SpinLockAcquire(ShmemLock);
2615 634 6 : result = (*LWLockCounter)++;
3359 635 6 : SpinLockRelease(ShmemLock);
3359 rhaas 636 ECB :
3359 rhaas 637 GIC 6 : return result;
3359 rhaas 638 ECB : }
639 :
640 : /*
641 : * Register a dynamic tranche name in the lookup table of the current process.
642 : *
643 : * This routine will save a pointer to the tranche name passed as an argument,
644 : * so the name should be allocated in a backend-lifetime context
645 : * (shared memory, TopMemoryContext, static constant, or similar).
646 : *
647 : * The tranche name will be user-visible as a wait event name, so try to
648 : * use a name that fits the style for those.
649 : */
650 : void
1986 peter_e 651 GIC 8 : LWLockRegisterTranche(int tranche_id, const char *tranche_name)
3359 rhaas 652 ECB : {
653 : /* This should only be called for user-defined tranches. */
1060 tgl 654 GIC 8 : if (tranche_id < LWTRANCHE_FIRST_USER_DEFINED)
1060 tgl 655 LBC 0 : return;
1060 tgl 656 EUB :
657 : /* Convert to array index. */
1060 tgl 658 GIC 8 : tranche_id -= LWTRANCHE_FIRST_USER_DEFINED;
3359 rhaas 659 ECB :
660 : /* If necessary, create or enlarge array. */
1060 tgl 661 GIC 8 : if (tranche_id >= LWLockTrancheNamesAllocated)
3359 rhaas 662 ECB : {
663 : int newalloc;
664 :
647 drowley 665 GIC 8 : newalloc = pg_nextpower2_32(Max(8, tranche_id + 1));
3359 rhaas 666 ECB :
1060 tgl 667 GIC 8 : if (LWLockTrancheNames == NULL)
1060 tgl 668 CBC 8 : LWLockTrancheNames = (const char **)
669 8 : MemoryContextAllocZero(TopMemoryContext,
1060 tgl 670 ECB : newalloc * sizeof(char *));
671 : else
148 peter 672 UNC 0 : LWLockTrancheNames =
673 0 : repalloc0_array(LWLockTrancheNames, const char *, LWLockTrancheNamesAllocated, newalloc);
1060 tgl 674 GIC 8 : LWLockTrancheNamesAllocated = newalloc;
675 : }
676 :
677 8 : LWLockTrancheNames[tranche_id] = tranche_name;
678 : }
679 :
680 : /*
681 : * RequestNamedLWLockTranche
682 : * Request that extra LWLocks be allocated during postmaster
683 : * startup.
684 : *
685 : * This may only be called via the shmem_request_hook of a library that is
686 : * loaded into the postmaster via shared_preload_libraries. Calls from
687 : * elsewhere will fail.
688 : *
1059 tgl 689 ECB : * The tranche name will be user-visible as a wait event name, so try to
690 : * use a name that fits the style for those.
691 : */
692 : void
2621 rhaas 693 CBC 3 : RequestNamedLWLockTranche(const char *tranche_name, int num_lwlocks)
2621 rhaas 694 EUB : {
695 : NamedLWLockTrancheRequest *request;
2621 rhaas 696 ECB :
331 rhaas 697 GIC 3 : if (!process_shmem_requests_in_progress)
331 rhaas 698 LBC 0 : elog(FATAL, "cannot request additional LWLocks outside shmem_request_hook");
2621 rhaas 699 ECB :
2621 rhaas 700 CBC 3 : if (NamedLWLockTrancheRequestArray == NULL)
701 : {
2621 rhaas 702 GIC 3 : NamedLWLockTrancheRequestsAllocated = 16;
703 3 : NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *)
704 3 : MemoryContextAlloc(TopMemoryContext,
2621 rhaas 705 ECB : NamedLWLockTrancheRequestsAllocated
706 : * sizeof(NamedLWLockTrancheRequest));
2621 rhaas 707 EUB : }
708 :
2621 rhaas 709 GBC 3 : if (NamedLWLockTrancheRequests >= NamedLWLockTrancheRequestsAllocated)
2621 rhaas 710 EUB : {
647 drowley 711 UIC 0 : int i = pg_nextpower2_32(NamedLWLockTrancheRequests + 1);
2621 rhaas 712 EUB :
2621 rhaas 713 UIC 0 : NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *)
714 0 : repalloc(NamedLWLockTrancheRequestArray,
2621 rhaas 715 ECB : i * sizeof(NamedLWLockTrancheRequest));
2621 rhaas 716 LBC 0 : NamedLWLockTrancheRequestsAllocated = i;
2621 rhaas 717 ECB : }
718 :
2621 rhaas 719 CBC 3 : request = &NamedLWLockTrancheRequestArray[NamedLWLockTrancheRequests];
1060 tgl 720 3 : Assert(strlen(tranche_name) + 1 <= NAMEDATALEN);
1060 tgl 721 GIC 3 : strlcpy(request->tranche_name, tranche_name, NAMEDATALEN);
2621 rhaas 722 3 : request->num_lwlocks = num_lwlocks;
723 3 : NamedLWLockTrancheRequests++;
724 3 : }
725 :
3359 rhaas 726 ECB : /*
727 : * LWLockInitialize - initialize a new lwlock; it's initially unlocked
728 : */
729 : void
3359 rhaas 730 GIC 23373589 : LWLockInitialize(LWLock *lock, int tranche_id)
731 : {
3027 andres 732 CBC 23373589 : pg_atomic_init_u32(&lock->state, LW_FLAG_RELEASE_OK);
3027 andres 733 ECB : #ifdef LOCK_DEBUG
734 : pg_atomic_init_u32(&lock->nwaiters, 0);
735 : #endif
3359 rhaas 736 GIC 23373589 : lock->tranche = tranche_id;
2428 737 23373589 : proclist_init(&lock->waiters);
3359 738 23373589 : }
739 :
740 : /*
741 : * Report start of wait event for light-weight locks.
742 : *
743 : * This function will be used by all the light-weight lock calls which
2586 rhaas 744 ECB : * needs to wait to acquire the lock. This function distinguishes wait
745 : * event based on tranche and lock id.
746 : */
747 : static inline void
2586 rhaas 748 GIC 15736 : LWLockReportWaitStart(LWLock *lock)
749 : {
2305 750 15736 : pgstat_report_wait_start(PG_WAIT_LWLOCK | lock->tranche);
2586 751 15736 : }
752 :
2586 rhaas 753 ECB : /*
754 : * Report end of wait event for light-weight locks.
755 : */
756 : static inline void
2569 andres 757 GIC 15736 : LWLockReportWaitEnd(void)
758 : {
2586 rhaas 759 15736 : pgstat_report_wait_end();
760 15736 : }
761 :
2586 rhaas 762 EUB : /*
763 : * Return the name of an LWLock tranche.
764 : */
1060 tgl 765 : static const char *
1060 tgl 766 UBC 0 : GetLWTrancheName(uint16 trancheId)
767 : {
768 : /* Individual LWLock? */
769 0 : if (trancheId < NUM_INDIVIDUAL_LWLOCKS)
1059 770 0 : return IndividualLWLockNames[trancheId];
771 :
772 : /* Built-in tranche? */
1060 tgl 773 UIC 0 : if (trancheId < LWTRANCHE_FIRST_USER_DEFINED)
774 0 : return BuiltinTrancheNames[trancheId - NUM_INDIVIDUAL_LWLOCKS];
775 :
776 : /*
1060 tgl 777 EUB : * It's an extension tranche, so look in LWLockTrancheNames[]. However,
778 : * it's possible that the tranche has never been registered in the current
779 : * process, in which case give up and return "extension".
2586 rhaas 780 : */
1060 tgl 781 UBC 0 : trancheId -= LWTRANCHE_FIRST_USER_DEFINED;
782 :
783 0 : if (trancheId >= LWLockTrancheNamesAllocated ||
1060 tgl 784 UIC 0 : LWLockTrancheNames[trancheId] == NULL)
2586 rhaas 785 0 : return "extension";
786 :
1060 tgl 787 0 : return LWLockTrancheNames[trancheId];
788 : }
789 :
1060 tgl 790 EUB : /*
791 : * Return an identifier for an LWLock based on the wait class and event.
792 : */
793 : const char *
1060 tgl 794 UBC 0 : GetLWLockIdentifier(uint32 classId, uint16 eventId)
795 : {
1060 tgl 796 UIC 0 : Assert(classId == PG_WAIT_LWLOCK);
797 : /* The event IDs are just tranche numbers. */
798 0 : return GetLWTrancheName(eventId);
799 : }
800 :
801 : /*
802 : * Internal function that tries to atomically acquire the lwlock in the passed
803 : * in mode.
804 : *
805 : * This function will not block waiting for a lock to become free - that's the
806 : * callers job.
3027 andres 807 ECB : *
808 : * Returns true if the lock isn't free and we need to wait.
809 : */
810 : static bool
2878 bruce 811 CBC 295506755 : LWLockAttemptLock(LWLock *lock, LWLockMode mode)
812 : {
813 : uint32 old_state;
814 :
163 peter 815 GNC 295506755 : Assert(mode == LW_EXCLUSIVE || mode == LW_SHARED);
816 :
2809 andres 817 ECB : /*
818 : * Read once outside the loop, later iterations will get the newer value
819 : * via compare & exchange.
820 : */
2809 andres 821 CBC 295506755 : old_state = pg_atomic_read_u32(&lock->state);
822 :
823 : /* loop until we've determined whether we could acquire the lock or not */
824 : while (true)
3027 825 46480 : {
826 : uint32 desired_state;
2878 bruce 827 ECB : bool lock_free;
828 :
2809 andres 829 CBC 295553235 : desired_state = old_state;
3027 andres 830 ECB :
3027 andres 831 CBC 295553235 : if (mode == LW_EXCLUSIVE)
832 : {
2809 andres 833 GIC 146118898 : lock_free = (old_state & LW_LOCK_MASK) == 0;
3027 834 146118898 : if (lock_free)
3027 andres 835 CBC 146076408 : desired_state += LW_VAL_EXCLUSIVE;
3027 andres 836 ECB : }
837 : else
838 : {
2809 andres 839 GIC 149434337 : lock_free = (old_state & LW_VAL_EXCLUSIVE) == 0;
3027 840 149434337 : if (lock_free)
841 149427435 : desired_state += LW_VAL_SHARED;
842 : }
843 :
844 : /*
845 : * Attempt to swap in the state we are expecting. If we didn't see
846 : * lock to be free, that's just the old value. If we saw it as free,
847 : * we'll attempt to mark it acquired. The reason that we always swap
848 : * in the value is that this doubles as a memory barrier. We could try
849 : * to be smarter and only swap in values if we saw the lock as free,
3027 andres 850 ECB : * but benchmark haven't shown it as beneficial so far.
851 : *
852 : * Retry if the value changed since we last looked at it.
853 : */
3027 andres 854 GIC 295553235 : if (pg_atomic_compare_exchange_u32(&lock->state,
855 : &old_state, desired_state))
856 : {
857 295506755 : if (lock_free)
858 : {
859 : /* Great! Got the lock. */
3027 andres 860 ECB : #ifdef LOCK_DEBUG
861 : if (mode == LW_EXCLUSIVE)
862 : lock->owner = MyProc;
863 : #endif
3027 andres 864 GIC 295463867 : return false;
865 : }
866 : else
2253 heikki.linnakangas 867 42888 : return true; /* somebody else has the lock */
868 : }
869 : }
870 : pg_unreachable();
871 : }
872 :
873 : /*
874 : * Lock the LWLock's wait list against concurrent activity.
875 : *
876 : * NB: even though the wait list is locked, non-conflicting lock operations
877 : * may still happen concurrently.
2555 andres 878 ECB : *
879 : * Time spent holding mutex should be short!
880 : */
881 : static void
2555 andres 882 GIC 23222383 : LWLockWaitListLock(LWLock *lock)
883 : {
884 : uint32 old_state;
885 : #ifdef LWLOCK_STATS
886 : lwlock_stats *lwstats;
887 : uint32 delays = 0;
888 :
889 : lwstats = get_lwlock_stats_entry(lock);
890 : #endif
2555 andres 891 ECB :
892 : while (true)
893 : {
894 : /* always try once to acquire lock directly */
2555 andres 895 GIC 23226988 : old_state = pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_LOCKED);
896 23226988 : if (!(old_state & LW_FLAG_LOCKED))
897 23222383 : break; /* got lock */
898 :
2555 andres 899 ECB : /* and then spin without atomic operations until lock is released */
900 : {
2551 901 : SpinDelayStatus delayStatus;
902 :
2551 andres 903 CBC 4605 : init_local_spin_delay(&delayStatus);
2555 andres 904 ECB :
2555 andres 905 GIC 46227 : while (old_state & LW_FLAG_LOCKED)
906 : {
907 41622 : perform_spin_delay(&delayStatus);
908 41622 : old_state = pg_atomic_read_u32(&lock->state);
2555 andres 909 ECB : }
910 : #ifdef LWLOCK_STATS
911 : delays += delayStatus.delays;
912 : #endif
2555 andres 913 GIC 4605 : finish_spin_delay(&delayStatus);
914 : }
915 :
916 : /*
917 : * Retry. The lock might obviously already be re-acquired by the time
918 : * we're attempting to get it again.
919 : */
920 : }
2555 andres 921 ECB :
922 : #ifdef LWLOCK_STATS
923 : lwstats->spin_delay_count += delays;
924 : #endif
2555 andres 925 GIC 23222383 : }
926 :
927 : /*
928 : * Unlock the LWLock's wait list.
929 : *
2555 andres 930 ECB : * Note that it can be more efficient to manipulate flags and release the
931 : * locks in a single atomic operation.
932 : */
933 : static void
2555 andres 934 CBC 23193700 : LWLockWaitListUnlock(LWLock *lock)
935 : {
2118 tgl 936 ECB : uint32 old_state PG_USED_FOR_ASSERTS_ONLY;
2555 andres 937 :
2555 andres 938 GIC 23193700 : old_state = pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_LOCKED);
939 :
940 23193700 : Assert(old_state & LW_FLAG_LOCKED);
941 23193700 : }
942 :
3027 andres 943 ECB : /*
944 : * Wakeup all the lockers that currently have a chance to acquire the lock.
945 : */
946 : static void
3027 andres 947 GIC 28683 : LWLockWakeup(LWLock *lock)
948 : {
949 : bool new_release_ok;
3027 andres 950 CBC 28683 : bool wokeup_somebody = false;
951 : proclist_head wakeup;
2428 rhaas 952 ECB : proclist_mutable_iter iter;
953 :
2428 rhaas 954 GIC 28683 : proclist_init(&wakeup);
3027 andres 955 ECB :
3027 andres 956 GIC 28683 : new_release_ok = true;
3027 andres 957 ECB :
958 : /* lock wait list while collecting backends to wake up */
2555 andres 959 CBC 28683 : LWLockWaitListLock(lock);
960 :
2428 rhaas 961 34848 : proclist_foreach_modify(iter, &lock->waiters, lwWaitLink)
3027 andres 962 ECB : {
2428 rhaas 963 GIC 17300 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
3027 andres 964 ECB :
3027 andres 965 CBC 17300 : if (wokeup_somebody && waiter->lwWaitMode == LW_EXCLUSIVE)
3027 andres 966 GIC 99 : continue;
3027 andres 967 ECB :
2428 rhaas 968 GIC 17201 : proclist_delete(&lock->waiters, iter.cur, lwWaitLink);
969 17201 : proclist_push_tail(&wakeup, iter.cur, lwWaitLink);
970 :
3027 andres 971 17201 : if (waiter->lwWaitMode != LW_WAIT_UNTIL_FREE)
972 : {
973 : /*
3027 andres 974 ECB : * Prevent additional wakeups until retryer gets to run. Backends
975 : * that are just waiting for the lock to become free don't retry
976 : * automatically.
977 : */
3027 andres 978 GIC 14131 : new_release_ok = false;
2878 bruce 979 ECB :
980 : /*
981 : * Don't wakeup (further) exclusive locks.
982 : */
3027 andres 983 GIC 14131 : wokeup_somebody = true;
984 : }
985 :
986 : /*
987 : * Signal that the process isn't on the wait list anymore. This allows
988 : * LWLockDequeueSelf() to remove itself of the waitlist with a
989 : * proclist_delete(), rather than having to check if it has been
990 : * removed from the list.
991 : */
140 andres 992 GNC 17201 : Assert(waiter->lwWaiting == LW_WS_WAITING);
993 17201 : waiter->lwWaiting = LW_WS_PENDING_WAKEUP;
994 :
995 : /*
996 : * Once we've woken up an exclusive lock, there's no point in waking
3027 andres 997 ECB : * up anybody else.
998 : */
2878 bruce 999 GIC 17201 : if (waiter->lwWaitMode == LW_EXCLUSIVE)
3027 andres 1000 11135 : break;
1001 : }
1002 :
2428 rhaas 1003 28683 : Assert(proclist_is_empty(&wakeup) || pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS);
3027 andres 1004 ECB :
2555 1005 : /* unset required flags, and release lock, in one fell swoop */
1006 : {
1007 : uint32 old_state;
1008 : uint32 desired_state;
1009 :
2555 andres 1010 GIC 28683 : old_state = pg_atomic_read_u32(&lock->state);
1011 : while (true)
1012 : {
1013 28795 : desired_state = old_state;
1014 :
2555 andres 1015 ECB : /* compute desired flags */
1016 :
2555 andres 1017 GIC 28795 : if (new_release_ok)
2555 andres 1018 CBC 14802 : desired_state |= LW_FLAG_RELEASE_OK;
1019 : else
2555 andres 1020 GIC 13993 : desired_state &= ~LW_FLAG_RELEASE_OK;
1021 :
2428 rhaas 1022 CBC 28795 : if (proclist_is_empty(&wakeup))
2555 andres 1023 13184 : desired_state &= ~LW_FLAG_HAS_WAITERS;
1024 :
1025 28795 : desired_state &= ~LW_FLAG_LOCKED; /* release lock */
1026 :
1027 28795 : if (pg_atomic_compare_exchange_u32(&lock->state, &old_state,
2555 andres 1028 ECB : desired_state))
2555 andres 1029 GIC 28683 : break;
2555 andres 1030 ECB : }
1031 : }
3027 1032 :
1033 : /* Awaken any waiters I removed from the queue. */
2428 rhaas 1034 CBC 45884 : proclist_foreach_modify(iter, &wakeup, lwWaitLink)
1035 : {
2428 rhaas 1036 GIC 17201 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
1037 :
1038 : LOG_LWDEBUG("LWLockRelease", lock, "release waiter");
2428 rhaas 1039 CBC 17201 : proclist_delete(&wakeup, iter.cur, lwWaitLink);
1040 :
3027 andres 1041 ECB : /*
1042 : * Guarantee that lwWaiting being unset only becomes visible once the
1043 : * unlink from the link has completed. Otherwise the target backend
1044 : * could be woken up for other reason and enqueue for a new lock - if
1045 : * that happens before the list unlink happens, the list would end up
1046 : * being corrupted.
1047 : *
1048 : * The barrier pairs with the LWLockWaitListLock() when enqueuing for
1049 : * another lock.
1050 : */
3027 andres 1051 GIC 17201 : pg_write_barrier();
140 andres 1052 GNC 17201 : waiter->lwWaiting = LW_WS_NOT_WAITING;
2309 tgl 1053 GIC 17201 : PGSemaphoreUnlock(waiter->sem);
1054 : }
3027 andres 1055 28683 : }
3027 andres 1056 ECB :
1057 : /*
1058 : * Add ourselves to the end of the queue.
1059 : *
1060 : * NB: Mode can be LW_WAIT_UNTIL_FREE here!
1061 : */
1062 : static void
3027 andres 1063 GIC 31544 : LWLockQueueSelf(LWLock *lock, LWLockMode mode)
1064 : {
1065 : /*
1066 : * If we don't have a PGPROC structure, there's no way to wait. This
1067 : * should never occur, since MyProc should only be null during shared
3027 andres 1068 ECB : * memory initialization.
1069 : */
3027 andres 1070 GIC 31544 : if (MyProc == NULL)
3027 andres 1071 UIC 0 : elog(PANIC, "cannot wait without a PGPROC structure");
1072 :
140 andres 1073 GNC 31544 : if (MyProc->lwWaiting != LW_WS_NOT_WAITING)
3027 andres 1074 UIC 0 : elog(PANIC, "queueing for lock while waiting on another one");
3027 andres 1075 ECB :
2555 andres 1076 GBC 31544 : LWLockWaitListLock(lock);
1077 :
3027 andres 1078 ECB : /* setting the flag is protected by the spinlock */
3027 andres 1079 GBC 31544 : pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_HAS_WAITERS);
1080 :
140 andres 1081 GNC 31544 : MyProc->lwWaiting = LW_WS_WAITING;
3027 andres 1082 GIC 31544 : MyProc->lwWaitMode = mode;
1083 :
3027 andres 1084 ECB : /* LW_WAIT_UNTIL_FREE waiters are always at the front of the queue */
3027 andres 1085 GIC 31544 : if (mode == LW_WAIT_UNTIL_FREE)
2428 rhaas 1086 CBC 3924 : proclist_push_head(&lock->waiters, MyProc->pgprocno, lwWaitLink);
3027 andres 1087 ECB : else
2428 rhaas 1088 GIC 27620 : proclist_push_tail(&lock->waiters, MyProc->pgprocno, lwWaitLink);
1089 :
3027 andres 1090 ECB : /* Can release the mutex now */
2555 andres 1091 CBC 31544 : LWLockWaitListUnlock(lock);
1092 :
3027 andres 1093 ECB : #ifdef LOCK_DEBUG
1094 : pg_atomic_fetch_add_u32(&lock->nwaiters, 1);
1095 : #endif
3027 andres 1096 CBC 31544 : }
1097 :
1098 : /*
1099 : * Remove ourselves from the waitlist.
1100 : *
3027 andres 1101 ECB : * This is used if we queued ourselves because we thought we needed to sleep
1102 : * but, after further checking, we discovered that we don't actually need to
1103 : * do so.
1104 : */
1105 : static void
3027 andres 1106 GIC 15808 : LWLockDequeueSelf(LWLock *lock)
1107 : {
1108 : bool on_waitlist;
1109 :
3027 andres 1110 ECB : #ifdef LWLOCK_STATS
1111 : lwlock_stats *lwstats;
1112 :
1113 : lwstats = get_lwlock_stats_entry(lock);
1114 :
1115 : lwstats->dequeue_self_count++;
1116 : #endif
1117 :
2555 andres 1118 GIC 15808 : LWLockWaitListLock(lock);
1119 :
1120 : /*
1121 : * Remove ourselves from the waitlist, unless we've already been
1122 : * removed. The removal happens with the wait list lock held, so there's
1123 : * no race in this check.
1124 : */
140 andres 1125 GNC 15808 : on_waitlist = MyProc->lwWaiting == LW_WS_WAITING;
1126 15808 : if (on_waitlist)
1127 14250 : proclist_delete(&lock->waiters, MyProc->pgprocno, lwWaitLink);
3027 andres 1128 ECB :
2428 rhaas 1129 CBC 15808 : if (proclist_is_empty(&lock->waiters) &&
3027 andres 1130 GIC 15713 : (pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS) != 0)
3027 andres 1131 ECB : {
3027 andres 1132 GIC 15710 : pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_HAS_WAITERS);
1133 : }
1134 :
2555 andres 1135 ECB : /* XXX: combine with fetch_and above? */
2555 andres 1136 GIC 15808 : LWLockWaitListUnlock(lock);
1137 :
3027 andres 1138 ECB : /* clear waiting state again, nice for debugging */
140 andres 1139 GNC 15808 : if (on_waitlist)
1140 14250 : MyProc->lwWaiting = LW_WS_NOT_WAITING;
1141 : else
3027 andres 1142 ECB : {
2878 bruce 1143 GIC 1558 : int extraWaits = 0;
1144 :
1145 : /*
1146 : * Somebody else dequeued us and has or will wake us up. Deal with the
1147 : * superfluous absorption of a wakeup.
1148 : */
1149 :
1150 : /*
1151 : * Reset RELEASE_OK flag if somebody woke us before we removed
1152 : * ourselves - they'll have set it to false.
3027 andres 1153 ECB : */
3027 andres 1154 GIC 1558 : pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
1155 :
1156 : /*
1157 : * Now wait for the scheduled wakeup, otherwise our ->lwWaiting would
1158 : * get reset at some inconvenient point later. Most of the time this
1159 : * will immediately return.
1160 : */
1161 : for (;;)
3027 andres 1162 ECB : {
2309 tgl 1163 CBC 1558 : PGSemaphoreLock(MyProc->sem);
140 andres 1164 GNC 1558 : if (MyProc->lwWaiting == LW_WS_NOT_WAITING)
3027 andres 1165 GBC 1558 : break;
3027 andres 1166 UIC 0 : extraWaits++;
1167 : }
1168 :
1169 : /*
1170 : * Fix the process wait semaphore's count for any absorbed wakeups.
3027 andres 1171 ECB : */
3027 andres 1172 GBC 1558 : while (extraWaits-- > 0)
2309 tgl 1173 UIC 0 : PGSemaphoreUnlock(MyProc->sem);
1174 : }
1175 :
1176 : #ifdef LOCK_DEBUG
1177 : {
1178 : /* not waiting anymore */
1179 : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1180 :
1181 : Assert(nwaiters < MAX_BACKENDS);
1182 : }
3027 andres 1183 ECB : #endif
3027 andres 1184 GIC 15808 : }
1185 :
1186 : /*
1187 : * LWLockAcquire - acquire a lightweight lock in the specified mode
1188 : *
1189 : * If the lock is not available, sleep until it is. Returns true if the lock
1190 : * was available immediately, false if we had to sleep.
1191 : *
1192 : * Side effect: cancel/die interrupts are held off until lock release.
1193 : */
3306 heikki.linnakangas 1194 ECB : bool
2809 andres 1195 GIC 292824677 : LWLockAcquire(LWLock *lock, LWLockMode mode)
7862 tgl 1196 ECB : {
7607 JanWieck 1197 CBC 292824677 : PGPROC *proc = MyProc;
3306 heikki.linnakangas 1198 292824677 : bool result = true;
7762 tgl 1199 GIC 292824677 : int extraWaits = 0;
1200 : #ifdef LWLOCK_STATS
1201 : lwlock_stats *lwstats;
1202 :
1203 : lwstats = get_lwlock_stats_entry(lock);
1204 : #endif
7862 tgl 1205 ECB :
163 peter 1206 GNC 292824677 : Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
1207 :
1208 : PRINT_LWDEBUG("LWLockAcquire", lock, mode);
1209 :
1210 : #ifdef LWLOCK_STATS
1211 : /* Count lock acquisition attempts */
1212 : if (mode == LW_EXCLUSIVE)
1213 : lwstats->ex_acquire_count++;
1214 : else
1215 : lwstats->sh_acquire_count++;
1216 : #endif /* LWLOCK_STATS */
1217 :
1218 : /*
1219 : * We can't wait if we haven't got a PGPROC. This should only occur
1220 : * during bootstrap or shared memory initialization. Put an Assert here
1221 : * to catch unsafe coding practices.
7501 tgl 1222 ECB : */
7501 tgl 1223 GIC 292824677 : Assert(!(proc == NULL && IsUnderPostmaster));
1224 :
6575 tgl 1225 ECB : /* Ensure we will have room to remember the lock */
6575 tgl 1226 GBC 292824677 : if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
6575 tgl 1227 UIC 0 : elog(ERROR, "too many LWLocks taken");
1228 :
1229 : /*
1230 : * Lock out cancel/die interrupts until we exit the code section protected
1231 : * by the LWLock. This ensures that interrupts will not interfere with
1232 : * manipulations of data structures in shared memory.
7862 tgl 1233 ECB : */
7862 tgl 1234 GIC 292824677 : HOLD_INTERRUPTS();
1235 :
1236 : /*
1237 : * Loop here to try to acquire lock after each time we are signaled by
1238 : * LWLockRelease.
1239 : *
1240 : * NOTE: it might seem better to have LWLockRelease actually grant us the
1241 : * lock, rather than retrying and possibly having to go back to sleep. But
1242 : * in practice that is no good because it means a process swap for every
1243 : * lock acquisition when two or more processes are contending for the same
1244 : * lock. Since LWLocks are normally used to protect not-very-long
1245 : * sections of computation, a process needs to be able to acquire and
1246 : * release the same lock many times during a single CPU time slice, even
1247 : * in the presence of contention. The efficiency of being able to do that
1248 : * outweighs the inefficiency of sometimes wasting a process dispatch
1249 : * cycle because the lock is not free when a released waiter finally gets
1250 : * to run. See pgsql-hackers archives for 29-Dec-01.
1251 : */
7762 tgl 1252 ECB : for (;;)
7771 bruce 1253 GIC 12665 : {
1254 : bool mustwait;
1255 :
1256 : /*
1257 : * Try to grab the lock the first time, we're not in the waitqueue
1258 : * yet/anymore.
3027 andres 1259 ECB : */
3027 andres 1260 GIC 292837342 : mustwait = LWLockAttemptLock(lock, mode);
7862 tgl 1261 ECB :
7762 tgl 1262 GIC 292837342 : if (!mustwait)
1263 : {
3027 andres 1264 ECB : LOG_LWDEBUG("LWLockAcquire", lock, "immediately acquired lock");
7762 tgl 1265 GIC 292809722 : break; /* got the lock */
1266 : }
1267 :
1268 : /*
1269 : * Ok, at this point we couldn't grab the lock on the first try. We
1270 : * cannot simply queue ourselves to the end of the list and wait to be
1271 : * woken up because by now the lock could long have been released.
1272 : * Instead add us to the queue and try to grab the lock again. If we
1273 : * succeed we need to revert the queuing and be happy, otherwise we
1274 : * recheck the lock. If we still couldn't grab it, we know that the
1275 : * other locker will see our queue entries when releasing since they
1276 : * existed before we checked for the lock.
1277 : */
1278 :
3027 andres 1279 ECB : /* add to the queue */
3027 andres 1280 GIC 27620 : LWLockQueueSelf(lock, mode);
1281 :
3027 andres 1282 ECB : /* we're now guaranteed to be woken up if necessary */
3027 andres 1283 GIC 27620 : mustwait = LWLockAttemptLock(lock, mode);
1284 :
3027 andres 1285 ECB : /* ok, grabbed the lock the second time round, need to undo queueing */
3027 andres 1286 GIC 27620 : if (!mustwait)
1287 : {
1288 : LOG_LWDEBUG("LWLockAcquire", lock, "acquired, undoing queue");
3027 andres 1289 ECB :
3027 andres 1290 CBC 14955 : LWLockDequeueSelf(lock);
3027 andres 1291 GIC 14955 : break;
1292 : }
1293 :
1294 : /*
1295 : * Wait until awakened.
1296 : *
1297 : * It is possible that we get awakened for a reason other than being
1298 : * signaled by LWLockRelease. If so, loop back and wait again. Once
1299 : * we've gotten the LWLock, re-increment the sema by the number of
1300 : * additional signals received.
1301 : */
1302 : LOG_LWDEBUG("LWLockAcquire", lock, "waiting");
1303 :
1304 : #ifdef LWLOCK_STATS
1305 : lwstats->block_count++;
1306 : #endif
6197 tgl 1307 ECB :
2586 rhaas 1308 GIC 12665 : LWLockReportWaitStart(lock);
1309 : if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
1310 : TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), mode);
1311 :
1312 : for (;;)
7862 tgl 1313 ECB : {
2309 tgl 1314 CBC 12665 : PGSemaphoreLock(proc->sem);
140 andres 1315 GNC 12665 : if (proc->lwWaiting == LW_WS_NOT_WAITING)
7862 tgl 1316 GBC 12665 : break;
7862 tgl 1317 UIC 0 : extraWaits++;
1318 : }
1319 :
3027 andres 1320 ECB : /* Retrying, allow LWLockRelease to release waiters again. */
3027 andres 1321 GIC 12665 : pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
1322 :
1323 : #ifdef LOCK_DEBUG
1324 : {
1325 : /* not waiting anymore */
1326 : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1327 :
1328 : Assert(nwaiters < MAX_BACKENDS);
1329 : }
1330 : #endif
1331 :
1332 : if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
706 peter 1333 ECB : TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), mode);
2586 rhaas 1334 GIC 12665 : LWLockReportWaitEnd();
1335 :
1336 : LOG_LWDEBUG("LWLockAcquire", lock, "awakened");
1337 :
7762 tgl 1338 ECB : /* Now loop back and try to acquire lock again. */
3306 heikki.linnakangas 1339 GIC 12665 : result = false;
1340 : }
1341 :
1342 : if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_ENABLED())
1343 : TRACE_POSTGRESQL_LWLOCK_ACQUIRE(T_NAME(lock), mode);
1344 :
7862 tgl 1345 ECB : /* Add lock to list of locks held by this backend */
3027 andres 1346 CBC 292824677 : held_lwlocks[num_held_lwlocks].lock = lock;
3027 andres 1347 GIC 292824677 : held_lwlocks[num_held_lwlocks++].mode = mode;
1348 :
1349 : /*
1350 : * Fix the process wait semaphore's count for any absorbed wakeups.
7762 tgl 1351 ECB : */
7762 tgl 1352 GBC 292824677 : while (extraWaits-- > 0)
2309 tgl 1353 UIC 0 : PGSemaphoreUnlock(proc->sem);
3306 heikki.linnakangas 1354 ECB :
3306 heikki.linnakangas 1355 GIC 292824677 : return result;
1356 : }
1357 :
1358 : /*
1359 : * LWLockConditionalAcquire - acquire a lightweight lock in the specified mode
1360 : *
1361 : * If the lock is not available, return false with no side-effects.
1362 : *
1363 : * If successful, cancel/die interrupts are held off until lock release.
1364 : */
7862 tgl 1365 ECB : bool
3121 rhaas 1366 GIC 2344925 : LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
1367 : {
1368 : bool mustwait;
7862 tgl 1369 ECB :
163 peter 1370 GNC 2344925 : Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
1371 :
1372 : PRINT_LWDEBUG("LWLockConditionalAcquire", lock, mode);
1373 :
6575 tgl 1374 ECB : /* Ensure we will have room to remember the lock */
6575 tgl 1375 GBC 2344925 : if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
6575 tgl 1376 UIC 0 : elog(ERROR, "too many LWLocks taken");
1377 :
1378 : /*
1379 : * Lock out cancel/die interrupts until we exit the code section protected
1380 : * by the LWLock. This ensures that interrupts will not interfere with
1381 : * manipulations of data structures in shared memory.
7862 tgl 1382 ECB : */
7862 tgl 1383 GIC 2344925 : HOLD_INTERRUPTS();
1384 :
3027 andres 1385 ECB : /* Check for the lock */
3027 andres 1386 GIC 2344925 : mustwait = LWLockAttemptLock(lock, mode);
7862 tgl 1387 ECB :
7862 tgl 1388 GIC 2344925 : if (mustwait)
1389 : {
7862 tgl 1390 ECB : /* Failed to get lock, so release interrupt holdoff */
7862 tgl 1391 GIC 1112 : RESUME_INTERRUPTS();
1392 :
1393 : LOG_LWDEBUG("LWLockConditionalAcquire", lock, "failed");
1394 : if (TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL_ENABLED())
1395 : TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL(T_NAME(lock), mode);
1396 : }
1397 : else
1398 : {
7862 tgl 1399 ECB : /* Add lock to list of locks held by this backend */
3027 andres 1400 CBC 2343813 : held_lwlocks[num_held_lwlocks].lock = lock;
3027 andres 1401 GIC 2343813 : held_lwlocks[num_held_lwlocks++].mode = mode;
1402 : if (TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_ENABLED())
1403 : TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE(T_NAME(lock), mode);
7862 tgl 1404 ECB : }
7862 tgl 1405 GIC 2344925 : return !mustwait;
1406 : }
1407 :
1408 : /*
1409 : * LWLockAcquireOrWait - Acquire lock, or wait until it's free
1410 : *
1411 : * The semantics of this function are a bit funky. If the lock is currently
1412 : * free, it is acquired in the given mode, and the function returns true. If
1413 : * the lock isn't immediately free, the function waits until it is released
1414 : * and returns false, but does not acquire the lock.
1415 : *
1416 : * This is currently used for WALWriteLock: when a backend flushes the WAL,
1417 : * holding WALWriteLock, it can flush the commit records of many other
1418 : * backends as a side-effect. Those other backends need to wait until the
1419 : * flush finishes, but don't need to acquire the lock anymore. They can just
1420 : * wake up, observe that their records have already been flushed, and return.
1421 : */
4087 heikki.linnakangas 1422 ECB : bool
3121 rhaas 1423 GIC 296114 : LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
4087 heikki.linnakangas 1424 ECB : {
4087 heikki.linnakangas 1425 GIC 296114 : PGPROC *proc = MyProc;
4087 heikki.linnakangas 1426 ECB : bool mustwait;
4087 heikki.linnakangas 1427 GIC 296114 : int extraWaits = 0;
1428 : #ifdef LWLOCK_STATS
1429 : lwlock_stats *lwstats;
1430 :
1431 : lwstats = get_lwlock_stats_entry(lock);
1432 : #endif
4079 heikki.linnakangas 1433 ECB :
3027 andres 1434 GIC 296114 : Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
1435 :
1436 : PRINT_LWDEBUG("LWLockAcquireOrWait", lock, mode);
1437 :
4087 heikki.linnakangas 1438 ECB : /* Ensure we will have room to remember the lock */
4087 heikki.linnakangas 1439 GBC 296114 : if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
4087 heikki.linnakangas 1440 UIC 0 : elog(ERROR, "too many LWLocks taken");
1441 :
1442 : /*
1443 : * Lock out cancel/die interrupts until we exit the code section protected
1444 : * by the LWLock. This ensures that interrupts will not interfere with
1445 : * manipulations of data structures in shared memory.
4087 heikki.linnakangas 1446 ECB : */
4087 heikki.linnakangas 1447 GIC 296114 : HOLD_INTERRUPTS();
1448 :
1449 : /*
1450 : * NB: We're using nearly the same twice-in-a-row lock acquisition
1451 : * protocol as LWLockAcquire(). Check its comments for details.
3027 andres 1452 ECB : */
3027 andres 1453 GIC 296114 : mustwait = LWLockAttemptLock(lock, mode);
4087 heikki.linnakangas 1454 ECB :
4087 heikki.linnakangas 1455 GIC 296114 : if (mustwait)
4087 heikki.linnakangas 1456 ECB : {
3027 andres 1457 GIC 754 : LWLockQueueSelf(lock, LW_WAIT_UNTIL_FREE);
4087 heikki.linnakangas 1458 ECB :
3027 andres 1459 GIC 754 : mustwait = LWLockAttemptLock(lock, mode);
4087 heikki.linnakangas 1460 ECB :
3027 andres 1461 GIC 754 : if (mustwait)
1462 : {
1463 : /*
1464 : * Wait until awakened. Like in LWLockAcquire, be prepared for
1465 : * bogus wakeups.
1466 : */
1467 : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "waiting");
1468 :
1469 : #ifdef LWLOCK_STATS
1470 : lwstats->block_count++;
1471 : #endif
2586 rhaas 1472 ECB :
2586 rhaas 1473 GIC 737 : LWLockReportWaitStart(lock);
1474 : if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
1475 : TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), mode);
1476 :
1477 : for (;;)
3027 andres 1478 ECB : {
2309 tgl 1479 CBC 737 : PGSemaphoreLock(proc->sem);
140 andres 1480 GNC 737 : if (proc->lwWaiting == LW_WS_NOT_WAITING)
3027 andres 1481 GBC 737 : break;
3027 andres 1482 UIC 0 : extraWaits++;
1483 : }
1484 :
1485 : #ifdef LOCK_DEBUG
1486 : {
1487 : /* not waiting anymore */
1488 : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1489 :
1490 : Assert(nwaiters < MAX_BACKENDS);
1491 : }
1492 : #endif
1493 : if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
706 peter 1494 ECB : TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), mode);
2586 rhaas 1495 GIC 737 : LWLockReportWaitEnd();
1496 :
1497 : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "awakened");
1498 : }
1499 : else
1500 : {
1501 : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "acquired, undoing queue");
1502 :
1503 : /*
1504 : * Got lock in the second attempt, undo queueing. We need to treat
1505 : * this as having successfully acquired the lock, otherwise we'd
1506 : * not necessarily wake up people we've prevented from acquiring
1507 : * the lock.
2878 bruce 1508 ECB : */
3027 andres 1509 GIC 17 : LWLockDequeueSelf(lock);
1510 : }
1511 : }
1512 :
1513 : /*
1514 : * Fix the process wait semaphore's count for any absorbed wakeups.
4087 heikki.linnakangas 1515 ECB : */
4087 heikki.linnakangas 1516 GBC 296114 : while (extraWaits-- > 0)
2309 tgl 1517 UIC 0 : PGSemaphoreUnlock(proc->sem);
4087 heikki.linnakangas 1518 ECB :
4087 heikki.linnakangas 1519 GIC 296114 : if (mustwait)
1520 : {
4087 heikki.linnakangas 1521 ECB : /* Failed to get lock, so release interrupt holdoff */
4087 heikki.linnakangas 1522 GIC 737 : RESUME_INTERRUPTS();
1523 : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "failed");
1524 : if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_FAIL_ENABLED())
1525 : TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_FAIL(T_NAME(lock), mode);
1526 : }
1527 : else
1528 : {
1529 : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "succeeded");
4087 heikki.linnakangas 1530 ECB : /* Add lock to list of locks held by this backend */
3027 andres 1531 CBC 295377 : held_lwlocks[num_held_lwlocks].lock = lock;
3027 andres 1532 GIC 295377 : held_lwlocks[num_held_lwlocks++].mode = mode;
1533 : if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_ENABLED())
1534 : TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT(T_NAME(lock), mode);
1535 : }
4087 heikki.linnakangas 1536 ECB :
4087 heikki.linnakangas 1537 GIC 296114 : return !mustwait;
1538 : }
1539 :
1540 : /*
1541 : * Does the lwlock in its current state need to wait for the variable value to
1542 : * change?
1543 : *
1544 : * If we don't need to wait, and it's because the value of the variable has
1545 : * changed, store the current value in newval.
1546 : *
1547 : * *result is set to true if the lock was free, and false otherwise.
1548 : */
2809 andres 1549 ECB : static bool
2809 andres 1550 GIC 5904924 : LWLockConflictsWithVar(LWLock *lock,
1551 : uint64 *valptr, uint64 oldval, uint64 *newval,
1552 : bool *result)
1553 : {
1554 : bool mustwait;
1555 : uint64 value;
1556 :
1557 : /*
1558 : * Test first to see if it the slot is free right now.
1559 : *
1560 : * XXX: the caller uses a spinlock before this, so we don't need a memory
1561 : * barrier here as far as the current usage is concerned. But that might
1562 : * not be safe in general.
2809 andres 1563 ECB : */
2809 andres 1564 GIC 5904924 : mustwait = (pg_atomic_read_u32(&lock->state) & LW_VAL_EXCLUSIVE) != 0;
2809 andres 1565 ECB :
2809 andres 1566 GIC 5904924 : if (!mustwait)
2809 andres 1567 ECB : {
2809 andres 1568 CBC 2958802 : *result = true;
2809 andres 1569 GIC 2958802 : return false;
1570 : }
2809 andres 1571 ECB :
2809 andres 1572 GIC 2946122 : *result = false;
1573 :
1574 : /*
1575 : * Read value using the lwlock's wait list lock, as we can't generally
1576 : * rely on atomic 64 bit reads/stores. TODO: On platforms with a way to
1577 : * do atomic 64 bit reads/writes the spinlock should be optimized away.
2809 andres 1578 ECB : */
2555 andres 1579 CBC 2946122 : LWLockWaitListLock(lock);
2809 1580 2946122 : value = *valptr;
2555 andres 1581 GIC 2946122 : LWLockWaitListUnlock(lock);
2809 andres 1582 ECB :
2809 andres 1583 GIC 2946122 : if (value != oldval)
2809 andres 1584 ECB : {
2809 andres 1585 CBC 2940618 : mustwait = false;
2809 andres 1586 GIC 2940618 : *newval = value;
1587 : }
1588 : else
2809 andres 1589 ECB : {
2809 andres 1590 GIC 5504 : mustwait = true;
1591 : }
2809 andres 1592 ECB :
2809 andres 1593 GIC 2946122 : return mustwait;
1594 : }
1595 :
1596 : /*
1597 : * LWLockWaitForVar - Wait until lock is free, or a variable is updated.
1598 : *
1599 : * If the lock is held and *valptr equals oldval, waits until the lock is
1600 : * either freed, or the lock holder updates *valptr by calling
1601 : * LWLockUpdateVar. If the lock is free on exit (immediately or after
1602 : * waiting), returns true. If the lock is still held, but *valptr no longer
1603 : * matches oldval, returns false and sets *newval to the current value in
1604 : * *valptr.
1605 : *
1606 : * Note: this function ignores shared lock holders; if the lock is held
1607 : * in shared mode, returns 'true'.
1608 : */
3306 heikki.linnakangas 1609 ECB : bool
3121 rhaas 1610 GIC 5899420 : LWLockWaitForVar(LWLock *lock, uint64 *valptr, uint64 oldval, uint64 *newval)
3306 heikki.linnakangas 1611 ECB : {
3306 heikki.linnakangas 1612 CBC 5899420 : PGPROC *proc = MyProc;
1613 5899420 : int extraWaits = 0;
3306 heikki.linnakangas 1614 GIC 5899420 : bool result = false;
1615 : #ifdef LWLOCK_STATS
1616 : lwlock_stats *lwstats;
1617 :
1618 : lwstats = get_lwlock_stats_entry(lock);
1619 : #endif
1620 :
1621 : PRINT_LWDEBUG("LWLockWaitForVar", lock, LW_WAIT_UNTIL_FREE);
1622 :
1623 : /*
1624 : * Lock out cancel/die interrupts while we sleep on the lock. There is no
1625 : * cleanup mechanism to remove us from the wait queue if we got
1626 : * interrupted.
3306 heikki.linnakangas 1627 ECB : */
3306 heikki.linnakangas 1628 GIC 5899420 : HOLD_INTERRUPTS();
1629 :
1630 : /*
1631 : * Loop here to check the lock's status after each time we are signaled.
1632 : */
3306 heikki.linnakangas 1633 ECB : for (;;)
3306 heikki.linnakangas 1634 GIC 2334 : {
1635 : bool mustwait;
3306 heikki.linnakangas 1636 ECB :
2809 andres 1637 GIC 5901754 : mustwait = LWLockConflictsWithVar(lock, valptr, oldval, newval,
1638 : &result);
3306 heikki.linnakangas 1639 ECB :
3306 heikki.linnakangas 1640 CBC 5901754 : if (!mustwait)
3306 heikki.linnakangas 1641 GIC 5898584 : break; /* the lock was free or value didn't match */
1642 :
1643 : /*
1644 : * Add myself to wait queue. Note that this is racy, somebody else
1645 : * could wakeup before we're finished queuing. NB: We're using nearly
1646 : * the same twice-in-a-row lock acquisition protocol as
1647 : * LWLockAcquire(). Check its comments for details. The only
1648 : * difference is that we also have to check the variable's values when
1649 : * checking the state of the lock.
3027 andres 1650 ECB : */
3027 andres 1651 GIC 3170 : LWLockQueueSelf(lock, LW_WAIT_UNTIL_FREE);
1652 :
1653 : /*
1654 : * Set RELEASE_OK flag, to make sure we get woken up as soon as the
1655 : * lock is released.
3306 heikki.linnakangas 1656 ECB : */
3027 andres 1657 GIC 3170 : pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
1658 :
1659 : /*
1660 : * We're now guaranteed to be woken up if necessary. Recheck the lock
1661 : * and variables state.
3099 heikki.linnakangas 1662 ECB : */
2809 andres 1663 GIC 3170 : mustwait = LWLockConflictsWithVar(lock, valptr, oldval, newval,
1664 : &result);
1665 :
2809 andres 1666 ECB : /* Ok, no conflict after we queued ourselves. Undo queueing. */
3027 andres 1667 GIC 3170 : if (!mustwait)
1668 : {
1669 : LOG_LWDEBUG("LWLockWaitForVar", lock, "free, undoing queue");
3099 heikki.linnakangas 1670 ECB :
3027 andres 1671 CBC 836 : LWLockDequeueSelf(lock);
3027 andres 1672 GIC 836 : break;
1673 : }
1674 :
1675 : /*
1676 : * Wait until awakened.
1677 : *
1678 : * It is possible that we get awakened for a reason other than being
1679 : * signaled by LWLockRelease. If so, loop back and wait again. Once
1680 : * we've gotten the LWLock, re-increment the sema by the number of
1681 : * additional signals received.
1682 : */
1683 : LOG_LWDEBUG("LWLockWaitForVar", lock, "waiting");
1684 :
1685 : #ifdef LWLOCK_STATS
1686 : lwstats->block_count++;
1687 : #endif
3306 heikki.linnakangas 1688 ECB :
2586 rhaas 1689 GIC 2334 : LWLockReportWaitStart(lock);
1690 : if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
1691 : TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), LW_EXCLUSIVE);
1692 :
1693 : for (;;)
3306 heikki.linnakangas 1694 ECB : {
2309 tgl 1695 CBC 2334 : PGSemaphoreLock(proc->sem);
140 andres 1696 GNC 2334 : if (proc->lwWaiting == LW_WS_NOT_WAITING)
3306 heikki.linnakangas 1697 GBC 2334 : break;
3306 heikki.linnakangas 1698 UIC 0 : extraWaits++;
1699 : }
1700 :
1701 : #ifdef LOCK_DEBUG
1702 : {
1703 : /* not waiting anymore */
1704 : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1705 :
1706 : Assert(nwaiters < MAX_BACKENDS);
1707 : }
1708 : #endif
1709 :
1710 : if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
706 peter 1711 ECB : TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), LW_EXCLUSIVE);
2586 rhaas 1712 GIC 2334 : LWLockReportWaitEnd();
1713 :
1714 : LOG_LWDEBUG("LWLockWaitForVar", lock, "awakened");
1715 :
1716 : /* Now loop back and check the status of the lock again. */
1717 : }
1718 :
1719 : /*
1720 : * Fix the process wait semaphore's count for any absorbed wakeups.
3306 heikki.linnakangas 1721 ECB : */
3306 heikki.linnakangas 1722 GBC 5899420 : while (extraWaits-- > 0)
2309 tgl 1723 UIC 0 : PGSemaphoreUnlock(proc->sem);
1724 :
1725 : /*
1726 : * Now okay to allow cancel/die interrupts.
3306 heikki.linnakangas 1727 ECB : */
3306 heikki.linnakangas 1728 GIC 5899420 : RESUME_INTERRUPTS();
3306 heikki.linnakangas 1729 ECB :
3306 heikki.linnakangas 1730 GIC 5899420 : return result;
1731 : }
1732 :
1733 :
1734 : /*
1735 : * LWLockUpdateVar - Update a variable and wake up waiters atomically
1736 : *
1737 : * Sets *valptr to 'val', and wakes up all processes waiting for us with
1738 : * LWLockWaitForVar(). Setting the value and waking up the processes happen
1739 : * atomically so that any process calling LWLockWaitForVar() on the same lock
1740 : * is guaranteed to see the new value, and act accordingly.
1741 : *
1742 : * The caller must be holding the lock in exclusive mode.
1743 : */
3306 heikki.linnakangas 1744 ECB : void
3121 rhaas 1745 GIC 767276 : LWLockUpdateVar(LWLock *lock, uint64 *valptr, uint64 val)
1746 : {
1747 : proclist_head wakeup;
1748 : proclist_mutable_iter iter;
1749 :
1750 : PRINT_LWDEBUG("LWLockUpdateVar", lock, LW_EXCLUSIVE);
3027 andres 1751 ECB :
2428 rhaas 1752 GIC 767276 : proclist_init(&wakeup);
3306 heikki.linnakangas 1753 ECB :
2555 andres 1754 GIC 767276 : LWLockWaitListLock(lock);
3306 heikki.linnakangas 1755 ECB :
3027 andres 1756 GIC 767276 : Assert(pg_atomic_read_u32(&lock->state) & LW_VAL_EXCLUSIVE);
1757 :
3306 heikki.linnakangas 1758 ECB : /* Update the lock's value */
3121 rhaas 1759 GIC 767276 : *valptr = val;
1760 :
1761 : /*
1762 : * See if there are any LW_WAIT_UNTIL_FREE waiters that need to be woken
1763 : * up. They are always in the front of the queue.
3306 heikki.linnakangas 1764 ECB : */
2428 rhaas 1765 GIC 767378 : proclist_foreach_modify(iter, &lock->waiters, lwWaitLink)
3306 heikki.linnakangas 1766 ECB : {
2428 rhaas 1767 GIC 135 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
3027 andres 1768 ECB :
3027 andres 1769 CBC 135 : if (waiter->lwWaitMode != LW_WAIT_UNTIL_FREE)
3027 andres 1770 GIC 33 : break;
3306 heikki.linnakangas 1771 ECB :
2428 rhaas 1772 CBC 102 : proclist_delete(&lock->waiters, iter.cur, lwWaitLink);
2428 rhaas 1773 GIC 102 : proclist_push_tail(&wakeup, iter.cur, lwWaitLink);
1774 :
1775 : /* see LWLockWakeup() */
140 andres 1776 GNC 102 : Assert(waiter->lwWaiting == LW_WS_WAITING);
1777 102 : waiter->lwWaiting = LW_WS_PENDING_WAKEUP;
1778 : }
3306 heikki.linnakangas 1779 ECB :
1780 : /* We are done updating shared state of the lock itself. */
2555 andres 1781 GIC 767276 : LWLockWaitListUnlock(lock);
1782 :
1783 : /*
3306 heikki.linnakangas 1784 ECB : * Awaken any waiters I removed from the queue.
1785 : */
2428 rhaas 1786 GIC 767378 : proclist_foreach_modify(iter, &wakeup, lwWaitLink)
1787 : {
1788 102 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
2878 bruce 1789 ECB :
2428 rhaas 1790 GIC 102 : proclist_delete(&wakeup, iter.cur, lwWaitLink);
3027 andres 1791 ECB : /* check comment in LWLockWakeup() about this barrier */
3033 andres 1792 GIC 102 : pg_write_barrier();
140 andres 1793 GNC 102 : waiter->lwWaiting = LW_WS_NOT_WAITING;
2309 tgl 1794 GIC 102 : PGSemaphoreUnlock(waiter->sem);
3306 heikki.linnakangas 1795 ECB : }
3306 heikki.linnakangas 1796 CBC 767276 : }
3306 heikki.linnakangas 1797 ECB :
1798 :
7862 tgl 1799 : /*
1800 : * LWLockRelease - release a previously acquired lock
1801 : */
1802 : void
3121 rhaas 1803 GIC 295463867 : LWLockRelease(LWLock *lock)
1804 : {
1805 : LWLockMode mode;
3027 andres 1806 ECB : uint32 oldstate;
1807 : bool check_waiters;
1808 : int i;
1809 :
1810 : /*
1811 : * Remove lock from list of locks held. Usually, but not always, it will
1812 : * be the latest-acquired lock; so search array backwards.
1813 : */
7836 bruce 1814 GIC 326718739 : for (i = num_held_lwlocks; --i >= 0;)
3027 andres 1815 326718739 : if (lock == held_lwlocks[i].lock)
7862 tgl 1816 295463867 : break;
2315 sfrost 1817 ECB :
7862 tgl 1818 CBC 295463867 : if (i < 0)
2305 rhaas 1819 LBC 0 : elog(ERROR, "lock %s is not held", T_NAME(lock));
1820 :
2315 sfrost 1821 CBC 295463867 : mode = held_lwlocks[i].mode;
2315 sfrost 1822 EUB :
7862 tgl 1823 GIC 295463867 : num_held_lwlocks--;
7862 tgl 1824 CBC 326718739 : for (; i < num_held_lwlocks; i++)
7836 bruce 1825 GIC 31254872 : held_lwlocks[i] = held_lwlocks[i + 1];
7862 tgl 1826 ECB :
3027 andres 1827 : PRINT_LWDEBUG("LWLockRelease", lock, mode);
7862 tgl 1828 :
1829 : /*
1830 : * Release my hold on lock, after that it can immediately be acquired by
1831 : * others, even if we still have to wakeup other waiters.
1832 : */
3027 andres 1833 GIC 295463867 : if (mode == LW_EXCLUSIVE)
1834 146066514 : oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_EXCLUSIVE);
1835 : else
3027 andres 1836 CBC 149397353 : oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_SHARED);
4087 heikki.linnakangas 1837 ECB :
1838 : /* nobody else can have that kind of lock */
3027 andres 1839 CBC 295463867 : Assert(!(oldstate & LW_VAL_EXCLUSIVE));
1840 :
1841 : if (TRACE_POSTGRESQL_LWLOCK_RELEASE_ENABLED())
706 peter 1842 ECB : TRACE_POSTGRESQL_LWLOCK_RELEASE(T_NAME(lock));
1843 :
1844 : /*
1845 : * We're still waiting for backends to get scheduled, don't wake them up
1846 : * again.
1847 : */
3027 andres 1848 GIC 295463867 : if ((oldstate & (LW_FLAG_HAS_WAITERS | LW_FLAG_RELEASE_OK)) ==
1849 64605 : (LW_FLAG_HAS_WAITERS | LW_FLAG_RELEASE_OK) &&
1850 64605 : (oldstate & LW_LOCK_MASK) == 0)
3027 andres 1851 CBC 28683 : check_waiters = true;
3027 andres 1852 ECB : else
3027 andres 1853 CBC 295435184 : check_waiters = false;
6103 peter_e 1854 ECB :
1855 : /*
3027 andres 1856 : * As waking up waiters requires the spinlock to be acquired, only do so
1857 : * if necessary.
1858 : */
3027 andres 1859 GIC 295463867 : if (check_waiters)
1860 : {
1861 : /* XXX: remove before commit? */
3027 andres 1862 ECB : LOG_LWDEBUG("LWLockRelease", lock, "releasing waiters");
3027 andres 1863 GIC 28683 : LWLockWakeup(lock);
1864 : }
1865 :
7862 tgl 1866 ECB : /*
1867 : * Now okay to allow cancel/die interrupts.
1868 : */
7862 tgl 1869 GIC 295463867 : RESUME_INTERRUPTS();
1870 295463867 : }
1871 :
2809 andres 1872 ECB : /*
1873 : * LWLockReleaseClearVar - release a previously acquired lock, reset variable
1874 : */
1875 : void
2809 andres 1876 GIC 19432950 : LWLockReleaseClearVar(LWLock *lock, uint64 *valptr, uint64 val)
1877 : {
2555 1878 19432950 : LWLockWaitListLock(lock);
2809 andres 1879 ECB :
1880 : /*
1881 : * Set the variable's value before releasing the lock, that prevents race
1882 : * a race condition wherein a new locker acquires the lock, but hasn't yet
1883 : * set the variables value.
1884 : */
2809 andres 1885 GIC 19432950 : *valptr = val;
2555 1886 19432950 : LWLockWaitListUnlock(lock);
1887 :
2809 andres 1888 CBC 19432950 : LWLockRelease(lock);
1889 19432950 : }
1890 :
7862 tgl 1891 ECB :
1892 : /*
1893 : * LWLockReleaseAll - release all currently-held locks
1894 : *
1895 : * Used to clean up after ereport(ERROR). An important difference between this
1896 : * function and retail LWLockRelease calls is that InterruptHoldoffCount is
1897 : * unchanged by this operation. This is necessary since InterruptHoldoffCount
1898 : * has been set to an appropriate level earlier in error recovery. We could
1899 : * decrement it below zero if we allow it to drop for each released lock!
1900 : */
1901 : void
7862 tgl 1902 GIC 39722 : LWLockReleaseAll(void)
1903 : {
1904 39912 : while (num_held_lwlocks > 0)
7862 tgl 1905 ECB : {
7862 tgl 1906 GIC 190 : HOLD_INTERRUPTS(); /* match the upcoming RESUME_INTERRUPTS */
7862 tgl 1907 ECB :
3027 andres 1908 GIC 190 : LWLockRelease(held_lwlocks[num_held_lwlocks - 1].lock);
7862 tgl 1909 ECB : }
7862 tgl 1910 GIC 39722 : }
6876 tgl 1911 ECB :
1912 :
1913 : /*
1914 : * LWLockHeldByMe - test whether my process holds a lock in any mode
1915 : *
1916 : * This is meant as debug support only.
1917 : */
1918 : bool
202 pg 1919 GNC 88951326 : LWLockHeldByMe(LWLock *lock)
1920 : {
1921 : int i;
6876 tgl 1922 ECB :
6876 tgl 1923 GIC 132652689 : for (i = 0; i < num_held_lwlocks; i++)
1924 : {
202 pg 1925 GNC 63048384 : if (held_lwlocks[i].lock == lock)
6876 tgl 1926 CBC 19347021 : return true;
1927 : }
1928 69604305 : return false;
6876 tgl 1929 ECB : }
1930 :
272 tmunro 1931 : /*
1932 : * LWLockHeldByMe - test whether my process holds any of an array of locks
1933 : *
1934 : * This is meant as debug support only.
1935 : */
1936 : bool
202 pg 1937 GNC 1130820 : LWLockAnyHeldByMe(LWLock *lock, int nlocks, size_t stride)
1938 : {
1939 : char *held_lock_addr;
272 tmunro 1940 ECB : char *begin;
1941 : char *end;
1942 : int i;
1943 :
202 pg 1944 GNC 1130820 : begin = (char *) lock;
272 tmunro 1945 GIC 1130820 : end = begin + nlocks * stride;
1946 1131949 : for (i = 0; i < num_held_lwlocks; i++)
272 tmunro 1947 ECB : {
272 tmunro 1948 CBC 1129 : held_lock_addr = (char *) held_lwlocks[i].lock;
1949 1129 : if (held_lock_addr >= begin &&
272 tmunro 1950 UIC 0 : held_lock_addr < end &&
272 tmunro 1951 LBC 0 : (held_lock_addr - begin) % stride == 0)
1952 0 : return true;
272 tmunro 1953 EUB : }
272 tmunro 1954 GBC 1130820 : return false;
272 tmunro 1955 EUB : }
1956 :
2407 simon 1957 ECB : /*
1958 : * LWLockHeldByMeInMode - test whether my process holds a lock in given mode
1959 : *
1960 : * This is meant as debug support only.
1961 : */
1962 : bool
202 pg 1963 GNC 28867933 : LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
1964 : {
1965 : int i;
2407 simon 1966 ECB :
2407 simon 1967 GIC 31125733 : for (i = 0; i < num_held_lwlocks; i++)
1968 : {
202 pg 1969 GNC 31125733 : if (held_lwlocks[i].lock == lock && held_lwlocks[i].mode == mode)
2407 simon 1970 CBC 28867933 : return true;
1971 : }
2407 simon 1972 LBC 0 : return false;
2407 simon 1973 ECB : }
|