Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * lwlock.c
4 : : * Lightweight lock manager
5 : : *
6 : : * Lightweight locks are intended primarily to provide mutual exclusion of
7 : : * access to shared-memory data structures. Therefore, they offer both
8 : : * exclusive and shared lock modes (to support read/write and read-only
9 : : * access to a shared object). There are few other frammishes. User-level
10 : : * locking should be done with the full lock manager --- which depends on
11 : : * LWLocks to protect its shared state.
12 : : *
13 : : * In addition to exclusive and shared modes, lightweight locks can be used to
14 : : * wait until a variable changes value. The variable is initially not set
15 : : * when the lock is acquired with LWLockAcquire, i.e. it remains set to the
16 : : * value it was set to when the lock was released last, and can be updated
17 : : * without releasing the lock by calling LWLockUpdateVar. LWLockWaitForVar
18 : : * waits for the variable to be updated, or until the lock is free. When
19 : : * releasing the lock with LWLockReleaseClearVar() the value can be set to an
20 : : * appropriate value for a free lock. The meaning of the variable is up to
21 : : * the caller, the lightweight lock code just assigns and compares it.
22 : : *
23 : : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
24 : : * Portions Copyright (c) 1994, Regents of the University of California
25 : : *
26 : : * IDENTIFICATION
27 : : * src/backend/storage/lmgr/lwlock.c
28 : : *
29 : : * NOTES:
30 : : *
31 : : * This used to be a pretty straight forward reader-writer lock
32 : : * implementation, in which the internal state was protected by a
33 : : * spinlock. Unfortunately the overhead of taking the spinlock proved to be
34 : : * too high for workloads/locks that were taken in shared mode very
35 : : * frequently. Often we were spinning in the (obviously exclusive) spinlock,
36 : : * while trying to acquire a shared lock that was actually free.
37 : : *
38 : : * Thus a new implementation was devised that provides wait-free shared lock
39 : : * acquisition for locks that aren't exclusively locked.
40 : : *
41 : : * The basic idea is to have a single atomic variable 'lockcount' instead of
42 : : * the formerly separate shared and exclusive counters and to use atomic
43 : : * operations to acquire the lock. That's fairly easy to do for plain
44 : : * rw-spinlocks, but a lot harder for something like LWLocks that want to wait
45 : : * in the OS.
46 : : *
47 : : * For lock acquisition we use an atomic compare-and-exchange on the lockcount
48 : : * variable. For exclusive lock we swap in a sentinel value
49 : : * (LW_VAL_EXCLUSIVE), for shared locks we count the number of holders.
50 : : *
51 : : * To release the lock we use an atomic decrement to release the lock. If the
52 : : * new value is zero (we get that atomically), we know we can/have to release
53 : : * waiters.
54 : : *
55 : : * Obviously it is important that the sentinel value for exclusive locks
56 : : * doesn't conflict with the maximum number of possible share lockers -
57 : : * luckily MAX_BACKENDS makes that easily possible.
58 : : *
59 : : *
60 : : * The attentive reader might have noticed that naively doing the above has a
61 : : * glaring race condition: We try to lock using the atomic operations and
62 : : * notice that we have to wait. Unfortunately by the time we have finished
63 : : * queuing, the former locker very well might have already finished it's
64 : : * work. That's problematic because we're now stuck waiting inside the OS.
65 : :
66 : : * To mitigate those races we use a two phased attempt at locking:
67 : : * Phase 1: Try to do it atomically, if we succeed, nice
68 : : * Phase 2: Add ourselves to the waitqueue of the lock
69 : : * Phase 3: Try to grab the lock again, if we succeed, remove ourselves from
70 : : * the queue
71 : : * Phase 4: Sleep till wake-up, goto Phase 1
72 : : *
73 : : * This protects us against the problem from above as nobody can release too
74 : : * quick, before we're queued, since after Phase 2 we're already queued.
75 : : * -------------------------------------------------------------------------
76 : : */
77 : : #include "postgres.h"
78 : :
79 : : #include "miscadmin.h"
80 : : #include "pg_trace.h"
81 : : #include "pgstat.h"
82 : : #include "port/pg_bitutils.h"
83 : : #include "postmaster/postmaster.h"
84 : : #include "storage/proc.h"
85 : : #include "storage/proclist.h"
86 : : #include "storage/spin.h"
87 : : #include "utils/memutils.h"
88 : :
89 : : #ifdef LWLOCK_STATS
90 : : #include "utils/hsearch.h"
91 : : #endif
92 : :
93 : :
94 : : /* We use the ShmemLock spinlock to protect LWLockCounter */
95 : : extern slock_t *ShmemLock;
96 : :
97 : : #define LW_FLAG_HAS_WAITERS ((uint32) 1 << 30)
98 : : #define LW_FLAG_RELEASE_OK ((uint32) 1 << 29)
99 : : #define LW_FLAG_LOCKED ((uint32) 1 << 28)
100 : :
101 : : #define LW_VAL_EXCLUSIVE ((uint32) 1 << 24)
102 : : #define LW_VAL_SHARED 1
103 : :
104 : : #define LW_LOCK_MASK ((uint32) ((1 << 25)-1))
105 : : /* Must be greater than MAX_BACKENDS - which is 2^23-1, so we're fine. */
106 : : #define LW_SHARED_MASK ((uint32) ((1 << 24)-1))
107 : :
108 : : StaticAssertDecl(LW_VAL_EXCLUSIVE > (uint32) MAX_BACKENDS,
109 : : "MAX_BACKENDS too big for lwlock.c");
110 : :
111 : : /*
112 : : * There are three sorts of LWLock "tranches":
113 : : *
114 : : * 1. The individually-named locks defined in lwlocknames.h each have their
115 : : * own tranche. We absorb the names of these tranches from there into
116 : : * BuiltinTrancheNames here.
117 : : *
118 : : * 2. There are some predefined tranches for built-in groups of locks.
119 : : * These are listed in enum BuiltinTrancheIds in lwlock.h, and their names
120 : : * appear in BuiltinTrancheNames[] below.
121 : : *
122 : : * 3. Extensions can create new tranches, via either RequestNamedLWLockTranche
123 : : * or LWLockRegisterTranche. The names of these that are known in the current
124 : : * process appear in LWLockTrancheNames[].
125 : : *
126 : : * All these names are user-visible as wait event names, so choose with care
127 : : * ... and do not forget to update the documentation's list of wait events.
128 : : */
129 : : static const char *const BuiltinTrancheNames[] = {
130 : : #define PG_LWLOCK(id, lockname) [id] = CppAsString(lockname) "Lock",
131 : : #include "storage/lwlocklist.h"
132 : : #undef PG_LWLOCK
133 : : [LWTRANCHE_XACT_BUFFER] = "XactBuffer",
134 : : [LWTRANCHE_COMMITTS_BUFFER] = "CommitTsBuffer",
135 : : [LWTRANCHE_SUBTRANS_BUFFER] = "SubtransBuffer",
136 : : [LWTRANCHE_MULTIXACTOFFSET_BUFFER] = "MultiXactOffsetBuffer",
137 : : [LWTRANCHE_MULTIXACTMEMBER_BUFFER] = "MultiXactMemberBuffer",
138 : : [LWTRANCHE_NOTIFY_BUFFER] = "NotifyBuffer",
139 : : [LWTRANCHE_SERIAL_BUFFER] = "SerialBuffer",
140 : : [LWTRANCHE_WAL_INSERT] = "WALInsert",
141 : : [LWTRANCHE_BUFFER_CONTENT] = "BufferContent",
142 : : [LWTRANCHE_REPLICATION_ORIGIN_STATE] = "ReplicationOriginState",
143 : : [LWTRANCHE_REPLICATION_SLOT_IO] = "ReplicationSlotIO",
144 : : [LWTRANCHE_LOCK_FASTPATH] = "LockFastPath",
145 : : [LWTRANCHE_BUFFER_MAPPING] = "BufferMapping",
146 : : [LWTRANCHE_LOCK_MANAGER] = "LockManager",
147 : : [LWTRANCHE_PREDICATE_LOCK_MANAGER] = "PredicateLockManager",
148 : : [LWTRANCHE_PARALLEL_HASH_JOIN] = "ParallelHashJoin",
149 : : [LWTRANCHE_PARALLEL_QUERY_DSA] = "ParallelQueryDSA",
150 : : [LWTRANCHE_PER_SESSION_DSA] = "PerSessionDSA",
151 : : [LWTRANCHE_PER_SESSION_RECORD_TYPE] = "PerSessionRecordType",
152 : : [LWTRANCHE_PER_SESSION_RECORD_TYPMOD] = "PerSessionRecordTypmod",
153 : : [LWTRANCHE_SHARED_TUPLESTORE] = "SharedTupleStore",
154 : : [LWTRANCHE_SHARED_TIDBITMAP] = "SharedTidBitmap",
155 : : [LWTRANCHE_PARALLEL_APPEND] = "ParallelAppend",
156 : : [LWTRANCHE_PER_XACT_PREDICATE_LIST] = "PerXactPredicateList",
157 : : [LWTRANCHE_PGSTATS_DSA] = "PgStatsDSA",
158 : : [LWTRANCHE_PGSTATS_HASH] = "PgStatsHash",
159 : : [LWTRANCHE_PGSTATS_DATA] = "PgStatsData",
160 : : [LWTRANCHE_LAUNCHER_DSA] = "LogicalRepLauncherDSA",
161 : : [LWTRANCHE_LAUNCHER_HASH] = "LogicalRepLauncherHash",
162 : : [LWTRANCHE_DSM_REGISTRY_DSA] = "DSMRegistryDSA",
163 : : [LWTRANCHE_DSM_REGISTRY_HASH] = "DSMRegistryHash",
164 : : [LWTRANCHE_COMMITTS_SLRU] = "CommitTSSLRU",
165 : : [LWTRANCHE_MULTIXACTOFFSET_SLRU] = "MultixactOffsetSLRU",
166 : : [LWTRANCHE_MULTIXACTMEMBER_SLRU] = "MultixactMemberSLRU",
167 : : [LWTRANCHE_NOTIFY_SLRU] = "NotifySLRU",
168 : : [LWTRANCHE_SERIAL_SLRU] = "SerialSLRU",
169 : : [LWTRANCHE_SUBTRANS_SLRU] = "SubtransSLRU",
170 : : [LWTRANCHE_XACT_SLRU] = "XactSLRU",
171 : : [LWTRANCHE_PARALLEL_VACUUM_DSA] = "ParallelVacuumDSA",
172 : : };
173 : :
174 : : StaticAssertDecl(lengthof(BuiltinTrancheNames) ==
175 : : LWTRANCHE_FIRST_USER_DEFINED,
176 : : "missing entries in BuiltinTrancheNames[]");
177 : :
178 : : /*
179 : : * This is indexed by tranche ID minus LWTRANCHE_FIRST_USER_DEFINED, and
180 : : * stores the names of all dynamically-created tranches known to the current
181 : : * process. Any unused entries in the array will contain NULL.
182 : : */
183 : : static const char **LWLockTrancheNames = NULL;
184 : : static int LWLockTrancheNamesAllocated = 0;
185 : :
186 : : /*
187 : : * This points to the main array of LWLocks in shared memory. Backends inherit
188 : : * the pointer by fork from the postmaster (except in the EXEC_BACKEND case,
189 : : * where we have special measures to pass it down).
190 : : */
191 : : LWLockPadded *MainLWLockArray = NULL;
192 : :
193 : : /*
194 : : * We use this structure to keep track of locked LWLocks for release
195 : : * during error recovery. Normally, only a few will be held at once, but
196 : : * occasionally the number can be much higher; for example, the pg_buffercache
197 : : * extension locks all buffer partitions simultaneously.
198 : : */
199 : : #define MAX_SIMUL_LWLOCKS 200
200 : :
201 : : /* struct representing the LWLocks we're holding */
202 : : typedef struct LWLockHandle
203 : : {
204 : : LWLock *lock;
205 : : LWLockMode mode;
206 : : } LWLockHandle;
207 : :
208 : : static int num_held_lwlocks = 0;
209 : : static LWLockHandle held_lwlocks[MAX_SIMUL_LWLOCKS];
210 : :
211 : : /* struct representing the LWLock tranche request for named tranche */
212 : : typedef struct NamedLWLockTrancheRequest
213 : : {
214 : : char tranche_name[NAMEDATALEN];
215 : : int num_lwlocks;
216 : : } NamedLWLockTrancheRequest;
217 : :
218 : : static NamedLWLockTrancheRequest *NamedLWLockTrancheRequestArray = NULL;
219 : : static int NamedLWLockTrancheRequestsAllocated = 0;
220 : :
221 : : /*
222 : : * NamedLWLockTrancheRequests is both the valid length of the request array,
223 : : * and the length of the shared-memory NamedLWLockTrancheArray later on.
224 : : * This variable and NamedLWLockTrancheArray are non-static so that
225 : : * postmaster.c can copy them to child processes in EXEC_BACKEND builds.
226 : : */
227 : : int NamedLWLockTrancheRequests = 0;
228 : :
229 : : /* points to data in shared memory: */
230 : : NamedLWLockTranche *NamedLWLockTrancheArray = NULL;
231 : :
232 : : static void InitializeLWLocks(void);
233 : : static inline void LWLockReportWaitStart(LWLock *lock);
234 : : static inline void LWLockReportWaitEnd(void);
235 : : static const char *GetLWTrancheName(uint16 trancheId);
236 : :
237 : : #define T_NAME(lock) \
238 : : GetLWTrancheName((lock)->tranche)
239 : :
240 : : #ifdef LWLOCK_STATS
241 : : typedef struct lwlock_stats_key
242 : : {
243 : : int tranche;
244 : : void *instance;
245 : : } lwlock_stats_key;
246 : :
247 : : typedef struct lwlock_stats
248 : : {
249 : : lwlock_stats_key key;
250 : : int sh_acquire_count;
251 : : int ex_acquire_count;
252 : : int block_count;
253 : : int dequeue_self_count;
254 : : int spin_delay_count;
255 : : } lwlock_stats;
256 : :
257 : : static HTAB *lwlock_stats_htab;
258 : : static lwlock_stats lwlock_stats_dummy;
259 : : #endif
260 : :
261 : : #ifdef LOCK_DEBUG
262 : : bool Trace_lwlocks = false;
263 : :
264 : : inline static void
265 : : PRINT_LWDEBUG(const char *where, LWLock *lock, LWLockMode mode)
266 : : {
267 : : /* hide statement & context here, otherwise the log is just too verbose */
268 : : if (Trace_lwlocks)
269 : : {
270 : : uint32 state = pg_atomic_read_u32(&lock->state);
271 : :
272 : : ereport(LOG,
273 : : (errhidestmt(true),
274 : : errhidecontext(true),
275 : : errmsg_internal("%d: %s(%s %p): excl %u shared %u haswaiters %u waiters %u rOK %d",
276 : : MyProcPid,
277 : : where, T_NAME(lock), lock,
278 : : (state & LW_VAL_EXCLUSIVE) != 0,
279 : : state & LW_SHARED_MASK,
280 : : (state & LW_FLAG_HAS_WAITERS) != 0,
281 : : pg_atomic_read_u32(&lock->nwaiters),
282 : : (state & LW_FLAG_RELEASE_OK) != 0)));
283 : : }
284 : : }
285 : :
286 : : inline static void
287 : : LOG_LWDEBUG(const char *where, LWLock *lock, const char *msg)
288 : : {
289 : : /* hide statement & context here, otherwise the log is just too verbose */
290 : : if (Trace_lwlocks)
291 : : {
292 : : ereport(LOG,
293 : : (errhidestmt(true),
294 : : errhidecontext(true),
295 : : errmsg_internal("%s(%s %p): %s", where,
296 : : T_NAME(lock), lock, msg)));
297 : : }
298 : : }
299 : :
300 : : #else /* not LOCK_DEBUG */
301 : : #define PRINT_LWDEBUG(a,b,c) ((void)0)
302 : : #define LOG_LWDEBUG(a,b,c) ((void)0)
303 : : #endif /* LOCK_DEBUG */
304 : :
305 : : #ifdef LWLOCK_STATS
306 : :
307 : : static void init_lwlock_stats(void);
308 : : static void print_lwlock_stats(int code, Datum arg);
309 : : static lwlock_stats * get_lwlock_stats_entry(LWLock *lock);
310 : :
311 : : static void
312 : : init_lwlock_stats(void)
313 : : {
314 : : HASHCTL ctl;
315 : : static MemoryContext lwlock_stats_cxt = NULL;
316 : : static bool exit_registered = false;
317 : :
318 : : if (lwlock_stats_cxt != NULL)
319 : : MemoryContextDelete(lwlock_stats_cxt);
320 : :
321 : : /*
322 : : * The LWLock stats will be updated within a critical section, which
323 : : * requires allocating new hash entries. Allocations within a critical
324 : : * section are normally not allowed because running out of memory would
325 : : * lead to a PANIC, but LWLOCK_STATS is debugging code that's not normally
326 : : * turned on in production, so that's an acceptable risk. The hash entries
327 : : * are small, so the risk of running out of memory is minimal in practice.
328 : : */
329 : : lwlock_stats_cxt = AllocSetContextCreate(TopMemoryContext,
330 : : "LWLock stats",
331 : : ALLOCSET_DEFAULT_SIZES);
332 : : MemoryContextAllowInCriticalSection(lwlock_stats_cxt, true);
333 : :
334 : : ctl.keysize = sizeof(lwlock_stats_key);
335 : : ctl.entrysize = sizeof(lwlock_stats);
336 : : ctl.hcxt = lwlock_stats_cxt;
337 : : lwlock_stats_htab = hash_create("lwlock stats", 16384, &ctl,
338 : : HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
339 : : if (!exit_registered)
340 : : {
341 : : on_shmem_exit(print_lwlock_stats, 0);
342 : : exit_registered = true;
343 : : }
344 : : }
345 : :
346 : : static void
347 : : print_lwlock_stats(int code, Datum arg)
348 : : {
349 : : HASH_SEQ_STATUS scan;
350 : : lwlock_stats *lwstats;
351 : :
352 : : hash_seq_init(&scan, lwlock_stats_htab);
353 : :
354 : : /* Grab an LWLock to keep different backends from mixing reports */
355 : : LWLockAcquire(&MainLWLockArray[0].lock, LW_EXCLUSIVE);
356 : :
357 : : while ((lwstats = (lwlock_stats *) hash_seq_search(&scan)) != NULL)
358 : : {
359 : : fprintf(stderr,
360 : : "PID %d lwlock %s %p: shacq %u exacq %u blk %u spindelay %u dequeue self %u\n",
361 : : MyProcPid, GetLWTrancheName(lwstats->key.tranche),
362 : : lwstats->key.instance, lwstats->sh_acquire_count,
363 : : lwstats->ex_acquire_count, lwstats->block_count,
364 : : lwstats->spin_delay_count, lwstats->dequeue_self_count);
365 : : }
366 : :
367 : : LWLockRelease(&MainLWLockArray[0].lock);
368 : : }
369 : :
370 : : static lwlock_stats *
371 : : get_lwlock_stats_entry(LWLock *lock)
372 : : {
373 : : lwlock_stats_key key;
374 : : lwlock_stats *lwstats;
375 : : bool found;
376 : :
377 : : /*
378 : : * During shared memory initialization, the hash table doesn't exist yet.
379 : : * Stats of that phase aren't very interesting, so just collect operations
380 : : * on all locks in a single dummy entry.
381 : : */
382 : : if (lwlock_stats_htab == NULL)
383 : : return &lwlock_stats_dummy;
384 : :
385 : : /* Fetch or create the entry. */
386 : : MemSet(&key, 0, sizeof(key));
387 : : key.tranche = lock->tranche;
388 : : key.instance = lock;
389 : : lwstats = hash_search(lwlock_stats_htab, &key, HASH_ENTER, &found);
390 : : if (!found)
391 : : {
392 : : lwstats->sh_acquire_count = 0;
393 : : lwstats->ex_acquire_count = 0;
394 : : lwstats->block_count = 0;
395 : : lwstats->dequeue_self_count = 0;
396 : : lwstats->spin_delay_count = 0;
397 : : }
398 : : return lwstats;
399 : : }
400 : : #endif /* LWLOCK_STATS */
401 : :
402 : :
403 : : /*
404 : : * Compute number of LWLocks required by named tranches. These will be
405 : : * allocated in the main array.
406 : : */
407 : : static int
1431 tgl@sss.pgh.pa.us 408 :CBC 3475 : NumLWLocksForNamedTranches(void)
409 : : {
2992 rhaas@postgresql.org 410 : 3475 : int numLocks = 0;
411 : : int i;
412 : :
413 [ + + ]: 3499 : for (i = 0; i < NamedLWLockTrancheRequests; i++)
414 : 24 : numLocks += NamedLWLockTrancheRequestArray[i].num_lwlocks;
415 : :
416 : 3475 : return numLocks;
417 : : }
418 : :
419 : : /*
420 : : * Compute shmem space needed for LWLocks and named tranches.
421 : : */
422 : : Size
8233 tgl@sss.pgh.pa.us 423 : 2577 : LWLockShmemSize(void)
424 : : {
425 : : Size size;
426 : : int i;
2986 rhaas@postgresql.org 427 : 2577 : int numLocks = NUM_FIXED_LWLOCKS;
428 : :
429 : : /* Calculate total number of locks needed in the main array. */
1431 tgl@sss.pgh.pa.us 430 : 2577 : numLocks += NumLWLocksForNamedTranches();
431 : :
432 : : /* Space for the LWLock array. */
6785 433 : 2577 : size = mul_size(numLocks, sizeof(LWLockPadded));
434 : :
435 : : /* Space for dynamic allocation counter, plus room for alignment. */
2986 rhaas@postgresql.org 436 : 2577 : size = add_size(size, sizeof(int) + LWLOCK_PADDED_SIZE);
437 : :
438 : : /* space for named tranches. */
2992 439 : 2577 : size = add_size(size, mul_size(NamedLWLockTrancheRequests, sizeof(NamedLWLockTranche)));
440 : :
441 : : /* space for name of each tranche. */
442 [ + + ]: 2595 : for (i = 0; i < NamedLWLockTrancheRequests; i++)
443 : 18 : size = add_size(size, strlen(NamedLWLockTrancheRequestArray[i].tranche_name) + 1);
444 : :
6812 tgl@sss.pgh.pa.us 445 : 2577 : return size;
446 : : }
447 : :
448 : : /*
449 : : * Allocate shmem space for the main LWLock array and all tranches and
450 : : * initialize it. We also register extension LWLock tranches here.
451 : : */
452 : : void
8233 453 : 898 : CreateLWLocks(void)
454 : : {
3730 rhaas@postgresql.org 455 [ + - ]: 898 : if (!IsUnderPostmaster)
456 : : {
457 : 898 : Size spaceLocks = LWLockShmemSize();
458 : : int *LWLockCounter;
459 : : char *ptr;
460 : :
461 : : /* Allocate space */
462 : 898 : ptr = (char *) ShmemAlloc(spaceLocks);
463 : :
464 : : /* Leave room for dynamic allocation of tranches */
2986 465 : 898 : ptr += sizeof(int);
466 : :
467 : : /* Ensure desired alignment of LWLock array */
3730 468 : 898 : ptr += LWLOCK_PADDED_SIZE - ((uintptr_t) ptr) % LWLOCK_PADDED_SIZE;
469 : :
470 : 898 : MainLWLockArray = (LWLockPadded *) ptr;
471 : :
472 : : /*
473 : : * Initialize the dynamic-allocation counter for tranches, which is
474 : : * stored just before the first LWLock.
475 : : */
2986 476 : 898 : LWLockCounter = (int *) ((char *) MainLWLockArray - sizeof(int));
477 : 898 : *LWLockCounter = LWTRANCHE_FIRST_USER_DEFINED;
478 : :
479 : : /* Initialize all LWLocks */
2985 480 : 898 : InitializeLWLocks();
481 : : }
482 : :
483 : : /* Register named extension LWLock tranches in the current process. */
1431 tgl@sss.pgh.pa.us 484 [ + + ]: 904 : for (int i = 0; i < NamedLWLockTrancheRequests; i++)
485 : 6 : LWLockRegisterTranche(NamedLWLockTrancheArray[i].trancheId,
486 : 6 : NamedLWLockTrancheArray[i].trancheName);
2985 rhaas@postgresql.org 487 : 898 : }
488 : :
489 : : /*
490 : : * Initialize LWLocks that are fixed and those belonging to named tranches.
491 : : */
492 : : static void
493 : 898 : InitializeLWLocks(void)
494 : : {
1431 tgl@sss.pgh.pa.us 495 : 898 : int numNamedLocks = NumLWLocksForNamedTranches();
496 : : int id;
497 : : int i;
498 : : int j;
499 : : LWLockPadded *lock;
500 : :
501 : : /* Initialize all individual LWLocks in main array */
2985 rhaas@postgresql.org 502 [ + + ]: 48492 : for (id = 0, lock = MainLWLockArray; id < NUM_INDIVIDUAL_LWLOCKS; id++, lock++)
2676 503 : 47594 : LWLockInitialize(&lock->lock, id);
504 : :
505 : : /* Initialize buffer mapping LWLocks in main array */
1237 michael@paquier.xyz 506 : 898 : lock = MainLWLockArray + BUFFER_MAPPING_LWLOCK_OFFSET;
2985 rhaas@postgresql.org 507 [ + + ]: 115842 : for (id = 0; id < NUM_BUFFER_PARTITIONS; id++, lock++)
508 : 114944 : LWLockInitialize(&lock->lock, LWTRANCHE_BUFFER_MAPPING);
509 : :
510 : : /* Initialize lmgrs' LWLocks in main array */
1237 michael@paquier.xyz 511 : 898 : lock = MainLWLockArray + LOCK_MANAGER_LWLOCK_OFFSET;
2985 rhaas@postgresql.org 512 [ + + ]: 15266 : for (id = 0; id < NUM_LOCK_PARTITIONS; id++, lock++)
513 : 14368 : LWLockInitialize(&lock->lock, LWTRANCHE_LOCK_MANAGER);
514 : :
515 : : /* Initialize predicate lmgrs' LWLocks in main array */
1237 michael@paquier.xyz 516 : 898 : lock = MainLWLockArray + PREDICATELOCK_MANAGER_LWLOCK_OFFSET;
2985 rhaas@postgresql.org 517 [ + + ]: 15266 : for (id = 0; id < NUM_PREDICATELOCK_PARTITIONS; id++, lock++)
518 : 14368 : LWLockInitialize(&lock->lock, LWTRANCHE_PREDICATE_LOCK_MANAGER);
519 : :
520 : : /*
521 : : * Copy the info about any named tranches into shared memory (so that
522 : : * other processes can see it), and initialize the requested LWLocks.
523 : : */
524 [ + + ]: 898 : if (NamedLWLockTrancheRequests > 0)
525 : : {
526 : : char *trancheNames;
527 : :
528 : 6 : NamedLWLockTrancheArray = (NamedLWLockTranche *)
529 : 6 : &MainLWLockArray[NUM_FIXED_LWLOCKS + numNamedLocks];
530 : :
531 : 6 : trancheNames = (char *) NamedLWLockTrancheArray +
532 : 6 : (NamedLWLockTrancheRequests * sizeof(NamedLWLockTranche));
533 : 6 : lock = &MainLWLockArray[NUM_FIXED_LWLOCKS];
534 : :
535 [ + + ]: 12 : for (i = 0; i < NamedLWLockTrancheRequests; i++)
536 : : {
537 : : NamedLWLockTrancheRequest *request;
538 : : NamedLWLockTranche *tranche;
539 : : char *name;
540 : :
541 : 6 : request = &NamedLWLockTrancheRequestArray[i];
542 : 6 : tranche = &NamedLWLockTrancheArray[i];
543 : :
544 : 6 : name = trancheNames;
545 : 6 : trancheNames += strlen(request->tranche_name) + 1;
546 : 6 : strcpy(name, request->tranche_name);
547 : 6 : tranche->trancheId = LWLockNewTrancheId();
2676 548 : 6 : tranche->trancheName = name;
549 : :
2985 550 [ + + ]: 12 : for (j = 0; j < request->num_lwlocks; j++, lock++)
551 : 6 : LWLockInitialize(&lock->lock, tranche->trancheId);
552 : : }
553 : : }
554 : 898 : }
555 : :
556 : : /*
557 : : * InitLWLockAccess - initialize backend-local state needed to hold LWLocks
558 : : */
559 : : void
3576 heikki.linnakangas@i 560 : 19578 : InitLWLockAccess(void)
561 : : {
562 : : #ifdef LWLOCK_STATS
563 : : init_lwlock_stats();
564 : : #endif
565 : 19578 : }
566 : :
567 : : /*
568 : : * GetNamedLWLockTranche - returns the base address of LWLock from the
569 : : * specified tranche.
570 : : *
571 : : * Caller needs to retrieve the requested number of LWLocks starting from
572 : : * the base lock address returned by this API. This can be used for
573 : : * tranches that are requested by using RequestNamedLWLockTranche() API.
574 : : */
575 : : LWLockPadded *
2992 rhaas@postgresql.org 576 : 6 : GetNamedLWLockTranche(const char *tranche_name)
577 : : {
578 : : int lock_pos;
579 : : int i;
580 : :
581 : : /*
582 : : * Obtain the position of base address of LWLock belonging to requested
583 : : * tranche_name in MainLWLockArray. LWLocks for named tranches are placed
584 : : * in MainLWLockArray after fixed locks.
585 : : */
2986 586 : 6 : lock_pos = NUM_FIXED_LWLOCKS;
2992 587 [ + - ]: 6 : for (i = 0; i < NamedLWLockTrancheRequests; i++)
588 : : {
589 [ + - ]: 6 : if (strcmp(NamedLWLockTrancheRequestArray[i].tranche_name,
590 : : tranche_name) == 0)
591 : 6 : return &MainLWLockArray[lock_pos];
592 : :
2992 rhaas@postgresql.org 593 :UBC 0 : lock_pos += NamedLWLockTrancheRequestArray[i].num_lwlocks;
594 : : }
595 : :
1431 tgl@sss.pgh.pa.us 596 [ # # ]: 0 : elog(ERROR, "requested tranche is not registered");
597 : :
598 : : /* just to keep compiler quiet */
599 : : return NULL;
600 : : }
601 : :
602 : : /*
603 : : * Allocate a new tranche ID.
604 : : */
605 : : int
3730 rhaas@postgresql.org 606 :CBC 14 : LWLockNewTrancheId(void)
607 : : {
608 : : int result;
609 : : int *LWLockCounter;
610 : :
2986 611 : 14 : LWLockCounter = (int *) ((char *) MainLWLockArray - sizeof(int));
3730 612 [ - + ]: 14 : SpinLockAcquire(ShmemLock);
2986 613 : 14 : result = (*LWLockCounter)++;
3730 614 : 14 : SpinLockRelease(ShmemLock);
615 : :
616 : 14 : return result;
617 : : }
618 : :
619 : : /*
620 : : * Register a dynamic tranche name in the lookup table of the current process.
621 : : *
622 : : * This routine will save a pointer to the tranche name passed as an argument,
623 : : * so the name should be allocated in a backend-lifetime context
624 : : * (shared memory, TopMemoryContext, static constant, or similar).
625 : : *
626 : : * The tranche name will be user-visible as a wait event name, so try to
627 : : * use a name that fits the style for those.
628 : : */
629 : : void
2357 peter_e@gmx.net 630 : 17 : LWLockRegisterTranche(int tranche_id, const char *tranche_name)
631 : : {
632 : : /* This should only be called for user-defined tranches. */
1431 tgl@sss.pgh.pa.us 633 [ - + ]: 17 : if (tranche_id < LWTRANCHE_FIRST_USER_DEFINED)
1431 tgl@sss.pgh.pa.us 634 :UBC 0 : return;
635 : :
636 : : /* Convert to array index. */
1431 tgl@sss.pgh.pa.us 637 :CBC 17 : tranche_id -= LWTRANCHE_FIRST_USER_DEFINED;
638 : :
639 : : /* If necessary, create or enlarge array. */
640 [ + + ]: 17 : if (tranche_id >= LWLockTrancheNamesAllocated)
641 : : {
642 : : int newalloc;
643 : :
1018 drowley@postgresql.o 644 : 15 : newalloc = pg_nextpower2_32(Max(8, tranche_id + 1));
645 : :
1431 tgl@sss.pgh.pa.us 646 [ + - ]: 15 : if (LWLockTrancheNames == NULL)
647 : 15 : LWLockTrancheNames = (const char **)
648 : 15 : MemoryContextAllocZero(TopMemoryContext,
649 : : newalloc * sizeof(char *));
650 : : else
519 peter@eisentraut.org 651 :UBC 0 : LWLockTrancheNames =
652 : 0 : repalloc0_array(LWLockTrancheNames, const char *, LWLockTrancheNamesAllocated, newalloc);
1431 tgl@sss.pgh.pa.us 653 :CBC 15 : LWLockTrancheNamesAllocated = newalloc;
654 : : }
655 : :
656 : 17 : LWLockTrancheNames[tranche_id] = tranche_name;
657 : : }
658 : :
659 : : /*
660 : : * RequestNamedLWLockTranche
661 : : * Request that extra LWLocks be allocated during postmaster
662 : : * startup.
663 : : *
664 : : * This may only be called via the shmem_request_hook of a library that is
665 : : * loaded into the postmaster via shared_preload_libraries. Calls from
666 : : * elsewhere will fail.
667 : : *
668 : : * The tranche name will be user-visible as a wait event name, so try to
669 : : * use a name that fits the style for those.
670 : : */
671 : : void
2992 rhaas@postgresql.org 672 : 6 : RequestNamedLWLockTranche(const char *tranche_name, int num_lwlocks)
673 : : {
674 : : NamedLWLockTrancheRequest *request;
675 : :
702 676 [ - + ]: 6 : if (!process_shmem_requests_in_progress)
702 rhaas@postgresql.org 677 [ # # ]:UBC 0 : elog(FATAL, "cannot request additional LWLocks outside shmem_request_hook");
678 : :
2992 rhaas@postgresql.org 679 [ + - ]:CBC 6 : if (NamedLWLockTrancheRequestArray == NULL)
680 : : {
681 : 6 : NamedLWLockTrancheRequestsAllocated = 16;
682 : 6 : NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *)
683 : 6 : MemoryContextAlloc(TopMemoryContext,
684 : : NamedLWLockTrancheRequestsAllocated
685 : : * sizeof(NamedLWLockTrancheRequest));
686 : : }
687 : :
688 [ - + ]: 6 : if (NamedLWLockTrancheRequests >= NamedLWLockTrancheRequestsAllocated)
689 : : {
1018 drowley@postgresql.o 690 :UBC 0 : int i = pg_nextpower2_32(NamedLWLockTrancheRequests + 1);
691 : :
2992 rhaas@postgresql.org 692 : 0 : NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *)
693 : 0 : repalloc(NamedLWLockTrancheRequestArray,
694 : : i * sizeof(NamedLWLockTrancheRequest));
695 : 0 : NamedLWLockTrancheRequestsAllocated = i;
696 : : }
697 : :
2992 rhaas@postgresql.org 698 :CBC 6 : request = &NamedLWLockTrancheRequestArray[NamedLWLockTrancheRequests];
1431 tgl@sss.pgh.pa.us 699 [ - + ]: 6 : Assert(strlen(tranche_name) + 1 <= NAMEDATALEN);
700 : 6 : strlcpy(request->tranche_name, tranche_name, NAMEDATALEN);
2992 rhaas@postgresql.org 701 : 6 : request->num_lwlocks = num_lwlocks;
702 : 6 : NamedLWLockTrancheRequests++;
703 : 6 : }
704 : :
705 : : /*
706 : : * LWLockInitialize - initialize a new lwlock; it's initially unlocked
707 : : */
708 : : void
3730 709 : 9945650 : LWLockInitialize(LWLock *lock, int tranche_id)
710 : : {
3398 andres@anarazel.de 711 : 9945650 : pg_atomic_init_u32(&lock->state, LW_FLAG_RELEASE_OK);
712 : : #ifdef LOCK_DEBUG
713 : : pg_atomic_init_u32(&lock->nwaiters, 0);
714 : : #endif
3730 rhaas@postgresql.org 715 : 9945650 : lock->tranche = tranche_id;
2799 716 : 9945650 : proclist_init(&lock->waiters);
3730 717 : 9945650 : }
718 : :
719 : : /*
720 : : * Report start of wait event for light-weight locks.
721 : : *
722 : : * This function will be used by all the light-weight lock calls which
723 : : * needs to wait to acquire the lock. This function distinguishes wait
724 : : * event based on tranche and lock id.
725 : : */
726 : : static inline void
2957 727 : 318069 : LWLockReportWaitStart(LWLock *lock)
728 : : {
2676 729 : 318069 : pgstat_report_wait_start(PG_WAIT_LWLOCK | lock->tranche);
2957 730 : 318069 : }
731 : :
732 : : /*
733 : : * Report end of wait event for light-weight locks.
734 : : */
735 : : static inline void
2940 andres@anarazel.de 736 : 318069 : LWLockReportWaitEnd(void)
737 : : {
2957 rhaas@postgresql.org 738 : 318069 : pgstat_report_wait_end();
739 : 318069 : }
740 : :
741 : : /*
742 : : * Return the name of an LWLock tranche.
743 : : */
744 : : static const char *
1431 tgl@sss.pgh.pa.us 745 :UBC 0 : GetLWTrancheName(uint16 trancheId)
746 : : {
747 : : /* Built-in tranche or individual LWLock? */
748 [ # # ]: 0 : if (trancheId < LWTRANCHE_FIRST_USER_DEFINED)
81 alvherre@alvh.no-ip. 749 :UNC 0 : return BuiltinTrancheNames[trancheId];
750 : :
751 : : /*
752 : : * It's an extension tranche, so look in LWLockTrancheNames[]. However,
753 : : * it's possible that the tranche has never been registered in the current
754 : : * process, in which case give up and return "extension".
755 : : */
1431 tgl@sss.pgh.pa.us 756 :UBC 0 : trancheId -= LWTRANCHE_FIRST_USER_DEFINED;
757 : :
758 [ # # ]: 0 : if (trancheId >= LWLockTrancheNamesAllocated ||
759 [ # # ]: 0 : LWLockTrancheNames[trancheId] == NULL)
2957 rhaas@postgresql.org 760 : 0 : return "extension";
761 : :
1431 tgl@sss.pgh.pa.us 762 : 0 : return LWLockTrancheNames[trancheId];
763 : : }
764 : :
765 : : /*
766 : : * Return an identifier for an LWLock based on the wait class and event.
767 : : */
768 : : const char *
769 : 0 : GetLWLockIdentifier(uint32 classId, uint16 eventId)
770 : : {
771 [ # # ]: 0 : Assert(classId == PG_WAIT_LWLOCK);
772 : : /* The event IDs are just tranche numbers. */
773 : 0 : return GetLWTrancheName(eventId);
774 : : }
775 : :
776 : : /*
777 : : * Internal function that tries to atomically acquire the lwlock in the passed
778 : : * in mode.
779 : : *
780 : : * This function will not block waiting for a lock to become free - that's the
781 : : * caller's job.
782 : : *
783 : : * Returns true if the lock isn't free and we need to wait.
784 : : */
785 : : static bool
3249 bruce@momjian.us 786 :CBC 310795135 : LWLockAttemptLock(LWLock *lock, LWLockMode mode)
787 : : {
788 : : uint32 old_state;
789 : :
534 peter@eisentraut.org 790 [ + + - + ]: 310795135 : Assert(mode == LW_EXCLUSIVE || mode == LW_SHARED);
791 : :
792 : : /*
793 : : * Read once outside the loop, later iterations will get the newer value
794 : : * via compare & exchange.
795 : : */
3180 andres@anarazel.de 796 : 310795135 : old_state = pg_atomic_read_u32(&lock->state);
797 : :
798 : : /* loop until we've determined whether we could acquire the lock or not */
799 : : while (true)
3398 800 : 332669 : {
801 : : uint32 desired_state;
802 : : bool lock_free;
803 : :
3180 804 : 311127804 : desired_state = old_state;
805 : :
3398 806 [ + + ]: 311127804 : if (mode == LW_EXCLUSIVE)
807 : : {
3180 808 : 198437507 : lock_free = (old_state & LW_LOCK_MASK) == 0;
3398 809 [ + + ]: 198437507 : if (lock_free)
810 : 198054545 : desired_state += LW_VAL_EXCLUSIVE;
811 : : }
812 : : else
813 : : {
3180 814 : 112690297 : lock_free = (old_state & LW_VAL_EXCLUSIVE) == 0;
3398 815 [ + + ]: 112690297 : if (lock_free)
816 : 112105580 : desired_state += LW_VAL_SHARED;
817 : : }
818 : :
819 : : /*
820 : : * Attempt to swap in the state we are expecting. If we didn't see
821 : : * lock to be free, that's just the old value. If we saw it as free,
822 : : * we'll attempt to mark it acquired. The reason that we always swap
823 : : * in the value is that this doubles as a memory barrier. We could try
824 : : * to be smarter and only swap in values if we saw the lock as free,
825 : : * but benchmark haven't shown it as beneficial so far.
826 : : *
827 : : * Retry if the value changed since we last looked at it.
828 : : */
829 [ + + ]: 311127804 : if (pg_atomic_compare_exchange_u32(&lock->state,
830 : : &old_state, desired_state))
831 : : {
832 [ + + ]: 310795135 : if (lock_free)
833 : : {
834 : : /* Great! Got the lock. */
835 : : #ifdef LOCK_DEBUG
836 : : if (mode == LW_EXCLUSIVE)
837 : : lock->owner = MyProc;
838 : : #endif
839 : 309921568 : return false;
840 : : }
841 : : else
2624 heikki.linnakangas@i 842 : 873567 : return true; /* somebody else has the lock */
843 : : }
844 : : }
845 : : pg_unreachable();
846 : : }
847 : :
848 : : /*
849 : : * Lock the LWLock's wait list against concurrent activity.
850 : : *
851 : : * NB: even though the wait list is locked, non-conflicting lock operations
852 : : * may still happen concurrently.
853 : : *
854 : : * Time spent holding mutex should be short!
855 : : */
856 : : static void
2926 andres@anarazel.de 857 : 2132313 : LWLockWaitListLock(LWLock *lock)
858 : : {
859 : : uint32 old_state;
860 : : #ifdef LWLOCK_STATS
861 : : lwlock_stats *lwstats;
862 : : uint32 delays = 0;
863 : :
864 : : lwstats = get_lwlock_stats_entry(lock);
865 : : #endif
866 : :
867 : : while (true)
868 : : {
869 : : /* always try once to acquire lock directly */
870 : 2407878 : old_state = pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_LOCKED);
871 [ + + ]: 2407878 : if (!(old_state & LW_FLAG_LOCKED))
872 : 2132313 : break; /* got lock */
873 : :
874 : : /* and then spin without atomic operations until lock is released */
875 : : {
876 : : SpinDelayStatus delayStatus;
877 : :
2922 878 : 275565 : init_local_spin_delay(&delayStatus);
879 : :
2926 880 [ + + ]: 1177471 : while (old_state & LW_FLAG_LOCKED)
881 : : {
882 : 901906 : perform_spin_delay(&delayStatus);
883 : 901906 : old_state = pg_atomic_read_u32(&lock->state);
884 : : }
885 : : #ifdef LWLOCK_STATS
886 : : delays += delayStatus.delays;
887 : : #endif
888 : 275565 : finish_spin_delay(&delayStatus);
889 : : }
890 : :
891 : : /*
892 : : * Retry. The lock might obviously already be re-acquired by the time
893 : : * we're attempting to get it again.
894 : : */
895 : : }
896 : :
897 : : #ifdef LWLOCK_STATS
898 : : lwstats->spin_delay_count += delays;
899 : : #endif
900 : 2132313 : }
901 : :
902 : : /*
903 : : * Unlock the LWLock's wait list.
904 : : *
905 : : * Note that it can be more efficient to manipulate flags and release the
906 : : * locks in a single atomic operation.
907 : : */
908 : : static void
909 : 1500786 : LWLockWaitListUnlock(LWLock *lock)
910 : : {
911 : : uint32 old_state PG_USED_FOR_ASSERTS_ONLY;
912 : :
913 : 1500786 : old_state = pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_LOCKED);
914 : :
915 [ - + ]: 1500786 : Assert(old_state & LW_FLAG_LOCKED);
916 : 1500786 : }
917 : :
918 : : /*
919 : : * Wakeup all the lockers that currently have a chance to acquire the lock.
920 : : */
921 : : static void
3398 922 : 631527 : LWLockWakeup(LWLock *lock)
923 : : {
924 : : bool new_release_ok;
925 : 631527 : bool wokeup_somebody = false;
926 : : proclist_head wakeup;
927 : : proclist_mutable_iter iter;
928 : :
2799 rhaas@postgresql.org 929 : 631527 : proclist_init(&wakeup);
930 : :
3398 andres@anarazel.de 931 : 631527 : new_release_ok = true;
932 : :
933 : : /* lock wait list while collecting backends to wake up */
2926 934 : 631527 : LWLockWaitListLock(lock);
935 : :
2799 rhaas@postgresql.org 936 [ + + + + : 948943 : proclist_foreach_modify(iter, &lock->waiters, lwWaitLink)
+ + ]
937 : : {
938 : 468387 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
939 : :
3398 andres@anarazel.de 940 [ + + + + ]: 468387 : if (wokeup_somebody && waiter->lwWaitMode == LW_EXCLUSIVE)
941 : 2131 : continue;
942 : :
2799 rhaas@postgresql.org 943 : 466256 : proclist_delete(&lock->waiters, iter.cur, lwWaitLink);
944 : 466256 : proclist_push_tail(&wakeup, iter.cur, lwWaitLink);
945 : :
3398 andres@anarazel.de 946 [ + + ]: 466256 : if (waiter->lwWaitMode != LW_WAIT_UNTIL_FREE)
947 : : {
948 : : /*
949 : : * Prevent additional wakeups until retryer gets to run. Backends
950 : : * that are just waiting for the lock to become free don't retry
951 : : * automatically.
952 : : */
953 : 463843 : new_release_ok = false;
954 : :
955 : : /*
956 : : * Don't wakeup (further) exclusive locks.
957 : : */
958 : 463843 : wokeup_somebody = true;
959 : : }
960 : :
961 : : /*
962 : : * Signal that the process isn't on the wait list anymore. This allows
963 : : * LWLockDequeueSelf() to remove itself of the waitlist with a
964 : : * proclist_delete(), rather than having to check if it has been
965 : : * removed from the list.
966 : : */
511 967 [ - + ]: 466256 : Assert(waiter->lwWaiting == LW_WS_WAITING);
968 : 466256 : waiter->lwWaiting = LW_WS_PENDING_WAKEUP;
969 : :
970 : : /*
971 : : * Once we've woken up an exclusive lock, there's no point in waking
972 : : * up anybody else.
973 : : */
3249 bruce@momjian.us 974 [ + + ]: 466256 : if (waiter->lwWaitMode == LW_EXCLUSIVE)
3398 andres@anarazel.de 975 : 150971 : break;
976 : : }
977 : :
2799 rhaas@postgresql.org 978 [ + + - + ]: 631527 : Assert(proclist_is_empty(&wakeup) || pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS);
979 : :
980 : : /* unset required flags, and release lock, in one fell swoop */
981 : : {
982 : : uint32 old_state;
983 : : uint32 desired_state;
984 : :
2926 andres@anarazel.de 985 : 631527 : old_state = pg_atomic_read_u32(&lock->state);
986 : : while (true)
987 : : {
988 : 667868 : desired_state = old_state;
989 : :
990 : : /* compute desired flags */
991 : :
992 [ + + ]: 667868 : if (new_release_ok)
993 : 214296 : desired_state |= LW_FLAG_RELEASE_OK;
994 : : else
995 : 453572 : desired_state &= ~LW_FLAG_RELEASE_OK;
996 : :
2799 rhaas@postgresql.org 997 [ + + ]: 667868 : if (proclist_is_empty(&wakeup))
2926 andres@anarazel.de 998 : 212321 : desired_state &= ~LW_FLAG_HAS_WAITERS;
999 : :
1000 : 667868 : desired_state &= ~LW_FLAG_LOCKED; /* release lock */
1001 : :
1002 [ + + ]: 667868 : if (pg_atomic_compare_exchange_u32(&lock->state, &old_state,
1003 : : desired_state))
1004 : 631527 : break;
1005 : : }
1006 : : }
1007 : :
1008 : : /* Awaken any waiters I removed from the queue. */
2799 rhaas@postgresql.org 1009 [ + + + + : 1097783 : proclist_foreach_modify(iter, &wakeup, lwWaitLink)
+ + ]
1010 : : {
1011 : 466256 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
1012 : :
1013 : : LOG_LWDEBUG("LWLockRelease", lock, "release waiter");
1014 : 466256 : proclist_delete(&wakeup, iter.cur, lwWaitLink);
1015 : :
1016 : : /*
1017 : : * Guarantee that lwWaiting being unset only becomes visible once the
1018 : : * unlink from the link has completed. Otherwise the target backend
1019 : : * could be woken up for other reason and enqueue for a new lock - if
1020 : : * that happens before the list unlink happens, the list would end up
1021 : : * being corrupted.
1022 : : *
1023 : : * The barrier pairs with the LWLockWaitListLock() when enqueuing for
1024 : : * another lock.
1025 : : */
3398 andres@anarazel.de 1026 : 466256 : pg_write_barrier();
511 1027 : 466256 : waiter->lwWaiting = LW_WS_NOT_WAITING;
2680 tgl@sss.pgh.pa.us 1028 : 466256 : PGSemaphoreUnlock(waiter->sem);
1029 : : }
3398 andres@anarazel.de 1030 : 631527 : }
1031 : :
1032 : : /*
1033 : : * Add ourselves to the end of the queue.
1034 : : *
1035 : : * NB: Mode can be LW_WAIT_UNTIL_FREE here!
1036 : : */
1037 : : static void
1038 : 554554 : LWLockQueueSelf(LWLock *lock, LWLockMode mode)
1039 : : {
1040 : : /*
1041 : : * If we don't have a PGPROC structure, there's no way to wait. This
1042 : : * should never occur, since MyProc should only be null during shared
1043 : : * memory initialization.
1044 : : */
1045 [ - + ]: 554554 : if (MyProc == NULL)
3398 andres@anarazel.de 1046 [ # # ]:UBC 0 : elog(PANIC, "cannot wait without a PGPROC structure");
1047 : :
511 andres@anarazel.de 1048 [ - + ]:CBC 554554 : if (MyProc->lwWaiting != LW_WS_NOT_WAITING)
3398 andres@anarazel.de 1049 [ # # ]:UBC 0 : elog(PANIC, "queueing for lock while waiting on another one");
1050 : :
2926 andres@anarazel.de 1051 :CBC 554554 : LWLockWaitListLock(lock);
1052 : :
1053 : : /* setting the flag is protected by the spinlock */
3398 1054 : 554554 : pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_HAS_WAITERS);
1055 : :
511 1056 : 554554 : MyProc->lwWaiting = LW_WS_WAITING;
3398 1057 : 554554 : MyProc->lwWaitMode = mode;
1058 : :
1059 : : /* LW_WAIT_UNTIL_FREE waiters are always at the front of the queue */
1060 [ + + ]: 554554 : if (mode == LW_WAIT_UNTIL_FREE)
52 heikki.linnakangas@i 1061 :GNC 3364 : proclist_push_head(&lock->waiters, MyProcNumber, lwWaitLink);
1062 : : else
1063 : 551190 : proclist_push_tail(&lock->waiters, MyProcNumber, lwWaitLink);
1064 : :
1065 : : /* Can release the mutex now */
2926 andres@anarazel.de 1066 :CBC 554554 : LWLockWaitListUnlock(lock);
1067 : :
1068 : : #ifdef LOCK_DEBUG
1069 : : pg_atomic_fetch_add_u32(&lock->nwaiters, 1);
1070 : : #endif
3398 1071 : 554554 : }
1072 : :
1073 : : /*
1074 : : * Remove ourselves from the waitlist.
1075 : : *
1076 : : * This is used if we queued ourselves because we thought we needed to sleep
1077 : : * but, after further checking, we discovered that we don't actually need to
1078 : : * do so.
1079 : : */
1080 : : static void
1081 : 236485 : LWLockDequeueSelf(LWLock *lock)
1082 : : {
1083 : : bool on_waitlist;
1084 : :
1085 : : #ifdef LWLOCK_STATS
1086 : : lwlock_stats *lwstats;
1087 : :
1088 : : lwstats = get_lwlock_stats_entry(lock);
1089 : :
1090 : : lwstats->dequeue_self_count++;
1091 : : #endif
1092 : :
2926 1093 : 236485 : LWLockWaitListLock(lock);
1094 : :
1095 : : /*
1096 : : * Remove ourselves from the waitlist, unless we've already been removed.
1097 : : * The removal happens with the wait list lock held, so there's no race in
1098 : : * this check.
1099 : : */
511 1100 : 236485 : on_waitlist = MyProc->lwWaiting == LW_WS_WAITING;
1101 [ + + ]: 236485 : if (on_waitlist)
52 heikki.linnakangas@i 1102 :GNC 88223 : proclist_delete(&lock->waiters, MyProcNumber, lwWaitLink);
1103 : :
2799 rhaas@postgresql.org 1104 [ + + ]:CBC 236485 : if (proclist_is_empty(&lock->waiters) &&
3398 andres@anarazel.de 1105 [ + + ]: 235000 : (pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS) != 0)
1106 : : {
1107 : 234673 : pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_HAS_WAITERS);
1108 : : }
1109 : :
1110 : : /* XXX: combine with fetch_and above? */
2926 1111 : 236485 : LWLockWaitListUnlock(lock);
1112 : :
1113 : : /* clear waiting state again, nice for debugging */
511 1114 [ + + ]: 236485 : if (on_waitlist)
1115 : 88223 : MyProc->lwWaiting = LW_WS_NOT_WAITING;
1116 : : else
1117 : : {
3249 bruce@momjian.us 1118 : 148262 : int extraWaits = 0;
1119 : :
1120 : : /*
1121 : : * Somebody else dequeued us and has or will wake us up. Deal with the
1122 : : * superfluous absorption of a wakeup.
1123 : : */
1124 : :
1125 : : /*
1126 : : * Reset RELEASE_OK flag if somebody woke us before we removed
1127 : : * ourselves - they'll have set it to false.
1128 : : */
3398 andres@anarazel.de 1129 : 148262 : pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
1130 : :
1131 : : /*
1132 : : * Now wait for the scheduled wakeup, otherwise our ->lwWaiting would
1133 : : * get reset at some inconvenient point later. Most of the time this
1134 : : * will immediately return.
1135 : : */
1136 : : for (;;)
1137 : : {
2680 tgl@sss.pgh.pa.us 1138 : 148262 : PGSemaphoreLock(MyProc->sem);
511 andres@anarazel.de 1139 [ + - ]: 148262 : if (MyProc->lwWaiting == LW_WS_NOT_WAITING)
3398 1140 : 148262 : break;
3398 andres@anarazel.de 1141 :UBC 0 : extraWaits++;
1142 : : }
1143 : :
1144 : : /*
1145 : : * Fix the process wait semaphore's count for any absorbed wakeups.
1146 : : */
3398 andres@anarazel.de 1147 [ - + ]:CBC 148262 : while (extraWaits-- > 0)
2680 tgl@sss.pgh.pa.us 1148 :UBC 0 : PGSemaphoreUnlock(MyProc->sem);
1149 : : }
1150 : :
1151 : : #ifdef LOCK_DEBUG
1152 : : {
1153 : : /* not waiting anymore */
1154 : : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1155 : :
1156 : : Assert(nwaiters < MAX_BACKENDS);
1157 : : }
1158 : : #endif
3398 andres@anarazel.de 1159 :CBC 236485 : }
1160 : :
1161 : : /*
1162 : : * LWLockAcquire - acquire a lightweight lock in the specified mode
1163 : : *
1164 : : * If the lock is not available, sleep until it is. Returns true if the lock
1165 : : * was available immediately, false if we had to sleep.
1166 : : *
1167 : : * Side effect: cancel/die interrupts are held off until lock release.
1168 : : */
1169 : : bool
3180 1170 : 307740745 : LWLockAcquire(LWLock *lock, LWLockMode mode)
1171 : : {
7978 JanWieck@Yahoo.com 1172 : 307740745 : PGPROC *proc = MyProc;
3677 heikki.linnakangas@i 1173 : 307740745 : bool result = true;
8133 tgl@sss.pgh.pa.us 1174 : 307740745 : int extraWaits = 0;
1175 : : #ifdef LWLOCK_STATS
1176 : : lwlock_stats *lwstats;
1177 : :
1178 : : lwstats = get_lwlock_stats_entry(lock);
1179 : : #endif
1180 : :
534 peter@eisentraut.org 1181 [ + + - + ]: 307740745 : Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
1182 : :
1183 : : PRINT_LWDEBUG("LWLockAcquire", lock, mode);
1184 : :
1185 : : #ifdef LWLOCK_STATS
1186 : : /* Count lock acquisition attempts */
1187 : : if (mode == LW_EXCLUSIVE)
1188 : : lwstats->ex_acquire_count++;
1189 : : else
1190 : : lwstats->sh_acquire_count++;
1191 : : #endif /* LWLOCK_STATS */
1192 : :
1193 : : /*
1194 : : * We can't wait if we haven't got a PGPROC. This should only occur
1195 : : * during bootstrap or shared memory initialization. Put an Assert here
1196 : : * to catch unsafe coding practices.
1197 : : */
7872 tgl@sss.pgh.pa.us 1198 [ + + - + ]: 307740745 : Assert(!(proc == NULL && IsUnderPostmaster));
1199 : :
1200 : : /* Ensure we will have room to remember the lock */
6946 1201 [ - + ]: 307740745 : if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
6946 tgl@sss.pgh.pa.us 1202 [ # # ]:UBC 0 : elog(ERROR, "too many LWLocks taken");
1203 : :
1204 : : /*
1205 : : * Lock out cancel/die interrupts until we exit the code section protected
1206 : : * by the LWLock. This ensures that interrupts will not interfere with
1207 : : * manipulations of data structures in shared memory.
1208 : : */
8233 tgl@sss.pgh.pa.us 1209 :CBC 307740745 : HOLD_INTERRUPTS();
1210 : :
1211 : : /*
1212 : : * Loop here to try to acquire lock after each time we are signaled by
1213 : : * LWLockRelease.
1214 : : *
1215 : : * NOTE: it might seem better to have LWLockRelease actually grant us the
1216 : : * lock, rather than retrying and possibly having to go back to sleep. But
1217 : : * in practice that is no good because it means a process swap for every
1218 : : * lock acquisition when two or more processes are contending for the same
1219 : : * lock. Since LWLocks are normally used to protect not-very-long
1220 : : * sections of computation, a process needs to be able to acquire and
1221 : : * release the same lock many times during a single CPU time slice, even
1222 : : * in the presence of contention. The efficiency of being able to do that
1223 : : * outweighs the inefficiency of sometimes wasting a process dispatch
1224 : : * cycle because the lock is not free when a released waiter finally gets
1225 : : * to run. See pgsql-hackers archives for 29-Dec-01.
1226 : : */
1227 : : for (;;)
8142 bruce@momjian.us 1228 : 316163 : {
1229 : : bool mustwait;
1230 : :
1231 : : /*
1232 : : * Try to grab the lock the first time, we're not in the waitqueue
1233 : : * yet/anymore.
1234 : : */
3398 andres@anarazel.de 1235 : 308056908 : mustwait = LWLockAttemptLock(lock, mode);
1236 : :
8133 tgl@sss.pgh.pa.us 1237 [ + + ]: 308056907 : if (!mustwait)
1238 : : {
1239 : : LOG_LWDEBUG("LWLockAcquire", lock, "immediately acquired lock");
1240 : 307505717 : break; /* got the lock */
1241 : : }
1242 : :
1243 : : /*
1244 : : * Ok, at this point we couldn't grab the lock on the first try. We
1245 : : * cannot simply queue ourselves to the end of the list and wait to be
1246 : : * woken up because by now the lock could long have been released.
1247 : : * Instead add us to the queue and try to grab the lock again. If we
1248 : : * succeed we need to revert the queuing and be happy, otherwise we
1249 : : * recheck the lock. If we still couldn't grab it, we know that the
1250 : : * other locker will see our queue entries when releasing since they
1251 : : * existed before we checked for the lock.
1252 : : */
1253 : :
1254 : : /* add to the queue */
3398 andres@anarazel.de 1255 : 551190 : LWLockQueueSelf(lock, mode);
1256 : :
1257 : : /* we're now guaranteed to be woken up if necessary */
1258 : 551190 : mustwait = LWLockAttemptLock(lock, mode);
1259 : :
1260 : : /* ok, grabbed the lock the second time round, need to undo queueing */
1261 [ + + ]: 551190 : if (!mustwait)
1262 : : {
1263 : : LOG_LWDEBUG("LWLockAcquire", lock, "acquired, undoing queue");
1264 : :
1265 : 235027 : LWLockDequeueSelf(lock);
1266 : 235027 : break;
1267 : : }
1268 : :
1269 : : /*
1270 : : * Wait until awakened.
1271 : : *
1272 : : * It is possible that we get awakened for a reason other than being
1273 : : * signaled by LWLockRelease. If so, loop back and wait again. Once
1274 : : * we've gotten the LWLock, re-increment the sema by the number of
1275 : : * additional signals received.
1276 : : */
1277 : : LOG_LWDEBUG("LWLockAcquire", lock, "waiting");
1278 : :
1279 : : #ifdef LWLOCK_STATS
1280 : : lwstats->block_count++;
1281 : : #endif
1282 : :
2957 rhaas@postgresql.org 1283 : 316163 : LWLockReportWaitStart(lock);
1284 : : if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
1285 : : TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), mode);
1286 : :
1287 : : for (;;)
1288 : : {
2680 tgl@sss.pgh.pa.us 1289 : 316163 : PGSemaphoreLock(proc->sem);
511 andres@anarazel.de 1290 [ + - ]: 316163 : if (proc->lwWaiting == LW_WS_NOT_WAITING)
8233 tgl@sss.pgh.pa.us 1291 : 316163 : break;
8233 tgl@sss.pgh.pa.us 1292 :UBC 0 : extraWaits++;
1293 : : }
1294 : :
1295 : : /* Retrying, allow LWLockRelease to release waiters again. */
3398 andres@anarazel.de 1296 :CBC 316163 : pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
1297 : :
1298 : : #ifdef LOCK_DEBUG
1299 : : {
1300 : : /* not waiting anymore */
1301 : : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1302 : :
1303 : : Assert(nwaiters < MAX_BACKENDS);
1304 : : }
1305 : : #endif
1306 : :
1307 : : if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
1308 : : TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), mode);
2957 rhaas@postgresql.org 1309 : 316163 : LWLockReportWaitEnd();
1310 : :
1311 : : LOG_LWDEBUG("LWLockAcquire", lock, "awakened");
1312 : :
1313 : : /* Now loop back and try to acquire lock again. */
3677 heikki.linnakangas@i 1314 : 316163 : result = false;
1315 : : }
1316 : :
1317 : : if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_ENABLED())
1318 : : TRACE_POSTGRESQL_LWLOCK_ACQUIRE(T_NAME(lock), mode);
1319 : :
1320 : : /* Add lock to list of locks held by this backend */
3398 andres@anarazel.de 1321 : 307740744 : held_lwlocks[num_held_lwlocks].lock = lock;
1322 : 307740744 : held_lwlocks[num_held_lwlocks++].mode = mode;
1323 : :
1324 : : /*
1325 : : * Fix the process wait semaphore's count for any absorbed wakeups.
1326 : : */
8133 tgl@sss.pgh.pa.us 1327 [ - + ]: 307740744 : while (extraWaits-- > 0)
2680 tgl@sss.pgh.pa.us 1328 :UBC 0 : PGSemaphoreUnlock(proc->sem);
1329 : :
3677 heikki.linnakangas@i 1330 :CBC 307740744 : return result;
1331 : : }
1332 : :
1333 : : /*
1334 : : * LWLockConditionalAcquire - acquire a lightweight lock in the specified mode
1335 : : *
1336 : : * If the lock is not available, return false with no side-effects.
1337 : : *
1338 : : * If successful, cancel/die interrupts are held off until lock release.
1339 : : */
1340 : : bool
3492 rhaas@postgresql.org 1341 : 2072954 : LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
1342 : : {
1343 : : bool mustwait;
1344 : :
534 peter@eisentraut.org 1345 [ + + - + ]: 2072954 : Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
1346 : :
1347 : : PRINT_LWDEBUG("LWLockConditionalAcquire", lock, mode);
1348 : :
1349 : : /* Ensure we will have room to remember the lock */
6946 tgl@sss.pgh.pa.us 1350 [ - + ]: 2072954 : if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
6946 tgl@sss.pgh.pa.us 1351 [ # # ]:UBC 0 : elog(ERROR, "too many LWLocks taken");
1352 : :
1353 : : /*
1354 : : * Lock out cancel/die interrupts until we exit the code section protected
1355 : : * by the LWLock. This ensures that interrupts will not interfere with
1356 : : * manipulations of data structures in shared memory.
1357 : : */
8233 tgl@sss.pgh.pa.us 1358 :CBC 2072954 : HOLD_INTERRUPTS();
1359 : :
1360 : : /* Check for the lock */
3398 andres@anarazel.de 1361 : 2072954 : mustwait = LWLockAttemptLock(lock, mode);
1362 : :
8233 tgl@sss.pgh.pa.us 1363 [ + + ]: 2072954 : if (mustwait)
1364 : : {
1365 : : /* Failed to get lock, so release interrupt holdoff */
1366 [ - + ]: 4131 : RESUME_INTERRUPTS();
1367 : :
1368 : : LOG_LWDEBUG("LWLockConditionalAcquire", lock, "failed");
1369 : : if (TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL_ENABLED())
1370 : : TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL(T_NAME(lock), mode);
1371 : : }
1372 : : else
1373 : : {
1374 : : /* Add lock to list of locks held by this backend */
3398 andres@anarazel.de 1375 : 2068823 : held_lwlocks[num_held_lwlocks].lock = lock;
1376 : 2068823 : held_lwlocks[num_held_lwlocks++].mode = mode;
1377 : : if (TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_ENABLED())
1378 : : TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE(T_NAME(lock), mode);
1379 : : }
8233 tgl@sss.pgh.pa.us 1380 : 2072954 : return !mustwait;
1381 : : }
1382 : :
1383 : : /*
1384 : : * LWLockAcquireOrWait - Acquire lock, or wait until it's free
1385 : : *
1386 : : * The semantics of this function are a bit funky. If the lock is currently
1387 : : * free, it is acquired in the given mode, and the function returns true. If
1388 : : * the lock isn't immediately free, the function waits until it is released
1389 : : * and returns false, but does not acquire the lock.
1390 : : *
1391 : : * This is currently used for WALWriteLock: when a backend flushes the WAL,
1392 : : * holding WALWriteLock, it can flush the commit records of many other
1393 : : * backends as a side-effect. Those other backends need to wait until the
1394 : : * flush finishes, but don't need to acquire the lock anymore. They can just
1395 : : * wake up, observe that their records have already been flushed, and return.
1396 : : */
1397 : : bool
3492 rhaas@postgresql.org 1398 : 113009 : LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
1399 : : {
4458 heikki.linnakangas@i 1400 : 113009 : PGPROC *proc = MyProc;
1401 : : bool mustwait;
1402 : 113009 : int extraWaits = 0;
1403 : : #ifdef LWLOCK_STATS
1404 : : lwlock_stats *lwstats;
1405 : :
1406 : : lwstats = get_lwlock_stats_entry(lock);
1407 : : #endif
1408 : :
3398 andres@anarazel.de 1409 [ + - - + ]: 113009 : Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
1410 : :
1411 : : PRINT_LWDEBUG("LWLockAcquireOrWait", lock, mode);
1412 : :
1413 : : /* Ensure we will have room to remember the lock */
4458 heikki.linnakangas@i 1414 [ - + ]: 113009 : if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
4458 heikki.linnakangas@i 1415 [ # # ]:UBC 0 : elog(ERROR, "too many LWLocks taken");
1416 : :
1417 : : /*
1418 : : * Lock out cancel/die interrupts until we exit the code section protected
1419 : : * by the LWLock. This ensures that interrupts will not interfere with
1420 : : * manipulations of data structures in shared memory.
1421 : : */
4458 heikki.linnakangas@i 1422 :CBC 113009 : HOLD_INTERRUPTS();
1423 : :
1424 : : /*
1425 : : * NB: We're using nearly the same twice-in-a-row lock acquisition
1426 : : * protocol as LWLockAcquire(). Check its comments for details.
1427 : : */
3398 andres@anarazel.de 1428 : 113009 : mustwait = LWLockAttemptLock(lock, mode);
1429 : :
4458 heikki.linnakangas@i 1430 [ + + ]: 113009 : if (mustwait)
1431 : : {
3398 andres@anarazel.de 1432 : 1075 : LWLockQueueSelf(lock, LW_WAIT_UNTIL_FREE);
1433 : :
1434 : 1075 : mustwait = LWLockAttemptLock(lock, mode);
1435 : :
1436 [ + + ]: 1075 : if (mustwait)
1437 : : {
1438 : : /*
1439 : : * Wait until awakened. Like in LWLockAcquire, be prepared for
1440 : : * bogus wakeups.
1441 : : */
1442 : : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "waiting");
1443 : :
1444 : : #ifdef LWLOCK_STATS
1445 : : lwstats->block_count++;
1446 : : #endif
1447 : :
2957 rhaas@postgresql.org 1448 : 1008 : LWLockReportWaitStart(lock);
1449 : : if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
1450 : : TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), mode);
1451 : :
1452 : : for (;;)
1453 : : {
2680 tgl@sss.pgh.pa.us 1454 : 1008 : PGSemaphoreLock(proc->sem);
511 andres@anarazel.de 1455 [ + - ]: 1008 : if (proc->lwWaiting == LW_WS_NOT_WAITING)
3398 1456 : 1008 : break;
3398 andres@anarazel.de 1457 :UBC 0 : extraWaits++;
1458 : : }
1459 : :
1460 : : #ifdef LOCK_DEBUG
1461 : : {
1462 : : /* not waiting anymore */
1463 : : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1464 : :
1465 : : Assert(nwaiters < MAX_BACKENDS);
1466 : : }
1467 : : #endif
1468 : : if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
1469 : : TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), mode);
2957 rhaas@postgresql.org 1470 :CBC 1008 : LWLockReportWaitEnd();
1471 : :
1472 : : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "awakened");
1473 : : }
1474 : : else
1475 : : {
1476 : : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "acquired, undoing queue");
1477 : :
1478 : : /*
1479 : : * Got lock in the second attempt, undo queueing. We need to treat
1480 : : * this as having successfully acquired the lock, otherwise we'd
1481 : : * not necessarily wake up people we've prevented from acquiring
1482 : : * the lock.
1483 : : */
3398 andres@anarazel.de 1484 : 67 : LWLockDequeueSelf(lock);
1485 : : }
1486 : : }
1487 : :
1488 : : /*
1489 : : * Fix the process wait semaphore's count for any absorbed wakeups.
1490 : : */
4458 heikki.linnakangas@i 1491 [ - + ]: 113009 : while (extraWaits-- > 0)
2680 tgl@sss.pgh.pa.us 1492 :UBC 0 : PGSemaphoreUnlock(proc->sem);
1493 : :
4458 heikki.linnakangas@i 1494 [ + + ]:CBC 113009 : if (mustwait)
1495 : : {
1496 : : /* Failed to get lock, so release interrupt holdoff */
1497 [ - + ]: 1008 : RESUME_INTERRUPTS();
1498 : : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "failed");
1499 : : if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_FAIL_ENABLED())
1500 : : TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_FAIL(T_NAME(lock), mode);
1501 : : }
1502 : : else
1503 : : {
1504 : : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "succeeded");
1505 : : /* Add lock to list of locks held by this backend */
3398 andres@anarazel.de 1506 : 112001 : held_lwlocks[num_held_lwlocks].lock = lock;
1507 : 112001 : held_lwlocks[num_held_lwlocks++].mode = mode;
1508 : : if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_ENABLED())
1509 : : TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT(T_NAME(lock), mode);
1510 : : }
1511 : :
4458 heikki.linnakangas@i 1512 : 113009 : return !mustwait;
1513 : : }
1514 : :
1515 : : /*
1516 : : * Does the lwlock in its current state need to wait for the variable value to
1517 : : * change?
1518 : : *
1519 : : * If we don't need to wait, and it's because the value of the variable has
1520 : : * changed, store the current value in newval.
1521 : : *
1522 : : * *result is set to true if the lock was free, and false otherwise.
1523 : : */
1524 : : static bool
264 michael@paquier.xyz 1525 :GNC 1347939 : LWLockConflictsWithVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 oldval,
1526 : : uint64 *newval, bool *result)
1527 : : {
1528 : : bool mustwait;
1529 : : uint64 value;
1530 : :
1531 : : /*
1532 : : * Test first to see if it the slot is free right now.
1533 : : *
1534 : : * XXX: the unique caller of this routine, WaitXLogInsertionsToFinish()
1535 : : * via LWLockWaitForVar(), uses an implied barrier with a spinlock before
1536 : : * this, so we don't need a memory barrier here as far as the current
1537 : : * usage is concerned. But that might not be safe in general.
1538 : : */
3180 andres@anarazel.de 1539 :CBC 1347939 : mustwait = (pg_atomic_read_u32(&lock->state) & LW_VAL_EXCLUSIVE) != 0;
1540 : :
1541 [ + + ]: 1347939 : if (!mustwait)
1542 : : {
1543 : 969308 : *result = true;
1544 : 969308 : return false;
1545 : : }
1546 : :
1547 : 378631 : *result = false;
1548 : :
1549 : : /*
1550 : : * Reading this value atomically is safe even on platforms where uint64
1551 : : * cannot be read without observing a torn value.
1552 : : */
264 michael@paquier.xyz 1553 :GNC 378631 : value = pg_atomic_read_u64(valptr);
1554 : :
3180 andres@anarazel.de 1555 [ + + ]:CBC 378631 : if (value != oldval)
1556 : : {
1557 : 375444 : mustwait = false;
1558 : 375444 : *newval = value;
1559 : : }
1560 : : else
1561 : : {
1562 : 3187 : mustwait = true;
1563 : : }
1564 : :
1565 : 378631 : return mustwait;
1566 : : }
1567 : :
1568 : : /*
1569 : : * LWLockWaitForVar - Wait until lock is free, or a variable is updated.
1570 : : *
1571 : : * If the lock is held and *valptr equals oldval, waits until the lock is
1572 : : * either freed, or the lock holder updates *valptr by calling
1573 : : * LWLockUpdateVar. If the lock is free on exit (immediately or after
1574 : : * waiting), returns true. If the lock is still held, but *valptr no longer
1575 : : * matches oldval, returns false and sets *newval to the current value in
1576 : : * *valptr.
1577 : : *
1578 : : * Note: this function ignores shared lock holders; if the lock is held
1579 : : * in shared mode, returns 'true'.
1580 : : *
1581 : : * Be aware that LWLockConflictsWithVar() does not include a memory barrier,
1582 : : * hence the caller of this function may want to rely on an explicit barrier or
1583 : : * an implied barrier via spinlock or LWLock to avoid memory ordering issues.
1584 : : */
1585 : : bool
264 michael@paquier.xyz 1586 :GNC 1344752 : LWLockWaitForVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 oldval,
1587 : : uint64 *newval)
1588 : : {
3677 heikki.linnakangas@i 1589 :CBC 1344752 : PGPROC *proc = MyProc;
1590 : 1344752 : int extraWaits = 0;
1591 : 1344752 : bool result = false;
1592 : : #ifdef LWLOCK_STATS
1593 : : lwlock_stats *lwstats;
1594 : :
1595 : : lwstats = get_lwlock_stats_entry(lock);
1596 : : #endif
1597 : :
1598 : : PRINT_LWDEBUG("LWLockWaitForVar", lock, LW_WAIT_UNTIL_FREE);
1599 : :
1600 : : /*
1601 : : * Lock out cancel/die interrupts while we sleep on the lock. There is no
1602 : : * cleanup mechanism to remove us from the wait queue if we got
1603 : : * interrupted.
1604 : : */
1605 : 1344752 : HOLD_INTERRUPTS();
1606 : :
1607 : : /*
1608 : : * Loop here to check the lock's status after each time we are signaled.
1609 : : */
1610 : : for (;;)
1611 : 898 : {
1612 : : bool mustwait;
1613 : :
3180 andres@anarazel.de 1614 : 1345650 : mustwait = LWLockConflictsWithVar(lock, valptr, oldval, newval,
1615 : : &result);
1616 : :
3677 heikki.linnakangas@i 1617 [ + + ]: 1345650 : if (!mustwait)
1618 : 1343361 : break; /* the lock was free or value didn't match */
1619 : :
1620 : : /*
1621 : : * Add myself to wait queue. Note that this is racy, somebody else
1622 : : * could wakeup before we're finished queuing. NB: We're using nearly
1623 : : * the same twice-in-a-row lock acquisition protocol as
1624 : : * LWLockAcquire(). Check its comments for details. The only
1625 : : * difference is that we also have to check the variable's values when
1626 : : * checking the state of the lock.
1627 : : */
3398 andres@anarazel.de 1628 : 2289 : LWLockQueueSelf(lock, LW_WAIT_UNTIL_FREE);
1629 : :
1630 : : /*
1631 : : * Set RELEASE_OK flag, to make sure we get woken up as soon as the
1632 : : * lock is released.
1633 : : */
1634 : 2289 : pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
1635 : :
1636 : : /*
1637 : : * We're now guaranteed to be woken up if necessary. Recheck the lock
1638 : : * and variables state.
1639 : : */
3180 1640 : 2289 : mustwait = LWLockConflictsWithVar(lock, valptr, oldval, newval,
1641 : : &result);
1642 : :
1643 : : /* Ok, no conflict after we queued ourselves. Undo queueing. */
3398 1644 [ + + ]: 2289 : if (!mustwait)
1645 : : {
1646 : : LOG_LWDEBUG("LWLockWaitForVar", lock, "free, undoing queue");
1647 : :
1648 : 1391 : LWLockDequeueSelf(lock);
1649 : 1391 : break;
1650 : : }
1651 : :
1652 : : /*
1653 : : * Wait until awakened.
1654 : : *
1655 : : * It is possible that we get awakened for a reason other than being
1656 : : * signaled by LWLockRelease. If so, loop back and wait again. Once
1657 : : * we've gotten the LWLock, re-increment the sema by the number of
1658 : : * additional signals received.
1659 : : */
1660 : : LOG_LWDEBUG("LWLockWaitForVar", lock, "waiting");
1661 : :
1662 : : #ifdef LWLOCK_STATS
1663 : : lwstats->block_count++;
1664 : : #endif
1665 : :
2957 rhaas@postgresql.org 1666 : 898 : LWLockReportWaitStart(lock);
1667 : : if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
1668 : : TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), LW_EXCLUSIVE);
1669 : :
1670 : : for (;;)
1671 : : {
2680 tgl@sss.pgh.pa.us 1672 : 898 : PGSemaphoreLock(proc->sem);
511 andres@anarazel.de 1673 [ + - ]: 898 : if (proc->lwWaiting == LW_WS_NOT_WAITING)
3677 heikki.linnakangas@i 1674 : 898 : break;
3677 heikki.linnakangas@i 1675 :UBC 0 : extraWaits++;
1676 : : }
1677 : :
1678 : : #ifdef LOCK_DEBUG
1679 : : {
1680 : : /* not waiting anymore */
1681 : : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1682 : :
1683 : : Assert(nwaiters < MAX_BACKENDS);
1684 : : }
1685 : : #endif
1686 : :
1687 : : if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
1688 : : TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), LW_EXCLUSIVE);
2957 rhaas@postgresql.org 1689 :CBC 898 : LWLockReportWaitEnd();
1690 : :
1691 : : LOG_LWDEBUG("LWLockWaitForVar", lock, "awakened");
1692 : :
1693 : : /* Now loop back and check the status of the lock again. */
1694 : : }
1695 : :
1696 : : /*
1697 : : * Fix the process wait semaphore's count for any absorbed wakeups.
1698 : : */
3677 heikki.linnakangas@i 1699 [ - + ]: 1344752 : while (extraWaits-- > 0)
2680 tgl@sss.pgh.pa.us 1700 :UBC 0 : PGSemaphoreUnlock(proc->sem);
1701 : :
1702 : : /*
1703 : : * Now okay to allow cancel/die interrupts.
1704 : : */
3677 heikki.linnakangas@i 1705 [ - + ]:CBC 1344752 : RESUME_INTERRUPTS();
1706 : :
1707 : 1344752 : return result;
1708 : : }
1709 : :
1710 : :
1711 : : /*
1712 : : * LWLockUpdateVar - Update a variable and wake up waiters atomically
1713 : : *
1714 : : * Sets *valptr to 'val', and wakes up all processes waiting for us with
1715 : : * LWLockWaitForVar(). It first sets the value atomically and then wakes up
1716 : : * waiting processes so that any process calling LWLockWaitForVar() on the same
1717 : : * lock is guaranteed to see the new value, and act accordingly.
1718 : : *
1719 : : * The caller must be holding the lock in exclusive mode.
1720 : : */
1721 : : void
264 michael@paquier.xyz 1722 :GNC 709747 : LWLockUpdateVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
1723 : : {
1724 : : proclist_head wakeup;
1725 : : proclist_mutable_iter iter;
1726 : :
1727 : : PRINT_LWDEBUG("LWLockUpdateVar", lock, LW_EXCLUSIVE);
1728 : :
1729 : : /*
1730 : : * Note that pg_atomic_exchange_u64 is a full barrier, so we're guaranteed
1731 : : * that the variable is updated before waking up waiters.
1732 : : */
1733 : 709747 : pg_atomic_exchange_u64(valptr, val);
1734 : :
2799 rhaas@postgresql.org 1735 :CBC 709747 : proclist_init(&wakeup);
1736 : :
2926 andres@anarazel.de 1737 : 709747 : LWLockWaitListLock(lock);
1738 : :
3398 1739 [ - + ]: 709747 : Assert(pg_atomic_read_u32(&lock->state) & LW_VAL_EXCLUSIVE);
1740 : :
1741 : : /*
1742 : : * See if there are any LW_WAIT_UNTIL_FREE waiters that need to be woken
1743 : : * up. They are always in the front of the queue.
1744 : : */
2799 rhaas@postgresql.org 1745 [ + + + + ]: 709821 : proclist_foreach_modify(iter, &lock->waiters, lwWaitLink)
1746 : : {
1747 : 167 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
1748 : :
3398 andres@anarazel.de 1749 [ + + ]: 167 : if (waiter->lwWaitMode != LW_WAIT_UNTIL_FREE)
1750 : 93 : break;
1751 : :
2799 rhaas@postgresql.org 1752 : 74 : proclist_delete(&lock->waiters, iter.cur, lwWaitLink);
1753 : 74 : proclist_push_tail(&wakeup, iter.cur, lwWaitLink);
1754 : :
1755 : : /* see LWLockWakeup() */
511 andres@anarazel.de 1756 [ - + ]: 74 : Assert(waiter->lwWaiting == LW_WS_WAITING);
1757 [ + + ]: 74 : waiter->lwWaiting = LW_WS_PENDING_WAKEUP;
1758 : : }
1759 : :
1760 : : /* We are done updating shared state of the lock itself. */
2926 1761 : 709747 : LWLockWaitListUnlock(lock);
1762 : :
1763 : : /*
1764 : : * Awaken any waiters I removed from the queue.
1765 : : */
2799 rhaas@postgresql.org 1766 [ + + + + : 709821 : proclist_foreach_modify(iter, &wakeup, lwWaitLink)
+ + ]
1767 : : {
1768 : 74 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
1769 : :
1770 : 74 : proclist_delete(&wakeup, iter.cur, lwWaitLink);
1771 : : /* check comment in LWLockWakeup() about this barrier */
3404 andres@anarazel.de 1772 : 74 : pg_write_barrier();
511 1773 : 74 : waiter->lwWaiting = LW_WS_NOT_WAITING;
2680 tgl@sss.pgh.pa.us 1774 : 74 : PGSemaphoreUnlock(waiter->sem);
1775 : : }
3677 heikki.linnakangas@i 1776 : 709747 : }
1777 : :
1778 : :
1779 : : /*
1780 : : * LWLockRelease - release a previously acquired lock
1781 : : */
1782 : : void
3492 rhaas@postgresql.org 1783 : 309921568 : LWLockRelease(LWLock *lock)
1784 : : {
1785 : : LWLockMode mode;
1786 : : uint32 oldstate;
1787 : : bool check_waiters;
1788 : : int i;
1789 : :
1790 : : /*
1791 : : * Remove lock from list of locks held. Usually, but not always, it will
1792 : : * be the latest-acquired lock; so search array backwards.
1793 : : */
8207 bruce@momjian.us 1794 [ + - ]: 343612643 : for (i = num_held_lwlocks; --i >= 0;)
3398 andres@anarazel.de 1795 [ + + ]: 343612643 : if (lock == held_lwlocks[i].lock)
8233 tgl@sss.pgh.pa.us 1796 : 309921568 : break;
1797 : :
1798 [ - + ]: 309921568 : if (i < 0)
2676 rhaas@postgresql.org 1799 [ # # ]:UBC 0 : elog(ERROR, "lock %s is not held", T_NAME(lock));
1800 : :
2686 sfrost@snowman.net 1801 :CBC 309921568 : mode = held_lwlocks[i].mode;
1802 : :
8233 tgl@sss.pgh.pa.us 1803 : 309921568 : num_held_lwlocks--;
1804 [ + + ]: 343612643 : for (; i < num_held_lwlocks; i++)
8207 bruce@momjian.us 1805 : 33691075 : held_lwlocks[i] = held_lwlocks[i + 1];
1806 : :
1807 : : PRINT_LWDEBUG("LWLockRelease", lock, mode);
1808 : :
1809 : : /*
1810 : : * Release my hold on lock, after that it can immediately be acquired by
1811 : : * others, even if we still have to wakeup other waiters.
1812 : : */
3398 andres@anarazel.de 1813 [ + + ]: 309921568 : if (mode == LW_EXCLUSIVE)
1814 : 197946118 : oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_EXCLUSIVE);
1815 : : else
1816 : 111975450 : oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_SHARED);
1817 : :
1818 : : /* nobody else can have that kind of lock */
1819 [ - + ]: 309921568 : Assert(!(oldstate & LW_VAL_EXCLUSIVE));
1820 : :
1821 : : if (TRACE_POSTGRESQL_LWLOCK_RELEASE_ENABLED())
1822 : : TRACE_POSTGRESQL_LWLOCK_RELEASE(T_NAME(lock));
1823 : :
1824 : : /*
1825 : : * We're still waiting for backends to get scheduled, don't wake them up
1826 : : * again.
1827 : : */
1828 [ + + ]: 309921568 : if ((oldstate & (LW_FLAG_HAS_WAITERS | LW_FLAG_RELEASE_OK)) ==
1829 : 654582 : (LW_FLAG_HAS_WAITERS | LW_FLAG_RELEASE_OK) &&
1830 [ + + ]: 654582 : (oldstate & LW_LOCK_MASK) == 0)
1831 : 631527 : check_waiters = true;
1832 : : else
1833 : 309290041 : check_waiters = false;
1834 : :
1835 : : /*
1836 : : * As waking up waiters requires the spinlock to be acquired, only do so
1837 : : * if necessary.
1838 : : */
1839 [ + + ]: 309921568 : if (check_waiters)
1840 : : {
1841 : : /* XXX: remove before commit? */
1842 : : LOG_LWDEBUG("LWLockRelease", lock, "releasing waiters");
1843 : 631527 : LWLockWakeup(lock);
1844 : : }
1845 : :
1846 : : /*
1847 : : * Now okay to allow cancel/die interrupts.
1848 : : */
8233 tgl@sss.pgh.pa.us 1849 [ - + ]: 309921568 : RESUME_INTERRUPTS();
1850 : 309921568 : }
1851 : :
1852 : : /*
1853 : : * LWLockReleaseClearVar - release a previously acquired lock, reset variable
1854 : : */
1855 : : void
264 michael@paquier.xyz 1856 :GNC 13642158 : LWLockReleaseClearVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
1857 : : {
1858 : : /*
1859 : : * Note that pg_atomic_exchange_u64 is a full barrier, so we're guaranteed
1860 : : * that the variable is updated before releasing the lock.
1861 : : */
1862 : 13642158 : pg_atomic_exchange_u64(valptr, val);
1863 : :
3180 andres@anarazel.de 1864 :CBC 13642158 : LWLockRelease(lock);
1865 : 13642158 : }
1866 : :
1867 : :
1868 : : /*
1869 : : * LWLockReleaseAll - release all currently-held locks
1870 : : *
1871 : : * Used to clean up after ereport(ERROR). An important difference between this
1872 : : * function and retail LWLockRelease calls is that InterruptHoldoffCount is
1873 : : * unchanged by this operation. This is necessary since InterruptHoldoffCount
1874 : : * has been set to an appropriate level earlier in error recovery. We could
1875 : : * decrement it below zero if we allow it to drop for each released lock!
1876 : : */
1877 : : void
8233 tgl@sss.pgh.pa.us 1878 : 47730 : LWLockReleaseAll(void)
1879 : : {
1880 [ + + ]: 47929 : while (num_held_lwlocks > 0)
1881 : : {
1882 : 199 : HOLD_INTERRUPTS(); /* match the upcoming RESUME_INTERRUPTS */
1883 : :
3398 andres@anarazel.de 1884 : 199 : LWLockRelease(held_lwlocks[num_held_lwlocks - 1].lock);
1885 : : }
8233 tgl@sss.pgh.pa.us 1886 : 47730 : }
1887 : :
1888 : :
1889 : : /*
1890 : : * LWLockHeldByMe - test whether my process holds a lock in any mode
1891 : : *
1892 : : * This is meant as debug support only.
1893 : : */
1894 : : bool
573 pg@bowt.ie 1895 : 76781841 : LWLockHeldByMe(LWLock *lock)
1896 : : {
1897 : : int i;
1898 : :
7247 tgl@sss.pgh.pa.us 1899 [ + + ]: 121380061 : for (i = 0; i < num_held_lwlocks; i++)
1900 : : {
573 pg@bowt.ie 1901 [ + + ]: 70901641 : if (held_lwlocks[i].lock == lock)
7247 tgl@sss.pgh.pa.us 1902 : 26303421 : return true;
1903 : : }
1904 : 50478420 : return false;
1905 : : }
1906 : :
1907 : : /*
1908 : : * LWLockAnyHeldByMe - test whether my process holds any of an array of locks
1909 : : *
1910 : : * This is meant as debug support only.
1911 : : */
1912 : : bool
573 pg@bowt.ie 1913 : 1151204 : LWLockAnyHeldByMe(LWLock *lock, int nlocks, size_t stride)
1914 : : {
1915 : : char *held_lock_addr;
1916 : : char *begin;
1917 : : char *end;
1918 : : int i;
1919 : :
1920 : 1151204 : begin = (char *) lock;
643 tmunro@postgresql.or 1921 : 1151204 : end = begin + nlocks * stride;
1922 [ + + ]: 1152561 : for (i = 0; i < num_held_lwlocks; i++)
1923 : : {
1924 : 1357 : held_lock_addr = (char *) held_lwlocks[i].lock;
1925 [ - + - - ]: 1357 : if (held_lock_addr >= begin &&
643 tmunro@postgresql.or 1926 :UBC 0 : held_lock_addr < end &&
1927 [ # # ]: 0 : (held_lock_addr - begin) % stride == 0)
1928 : 0 : return true;
1929 : : }
643 tmunro@postgresql.or 1930 :CBC 1151204 : return false;
1931 : : }
1932 : :
1933 : : /*
1934 : : * LWLockHeldByMeInMode - test whether my process holds a lock in given mode
1935 : : *
1936 : : * This is meant as debug support only.
1937 : : */
1938 : : bool
573 pg@bowt.ie 1939 : 62430695 : LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
1940 : : {
1941 : : int i;
1942 : :
2778 simon@2ndQuadrant.co 1943 [ + - ]: 82451447 : for (i = 0; i < num_held_lwlocks; i++)
1944 : : {
573 pg@bowt.ie 1945 [ + + + - ]: 82451447 : if (held_lwlocks[i].lock == lock && held_lwlocks[i].mode == mode)
2778 simon@2ndQuadrant.co 1946 : 62430695 : return true;
1947 : : }
2778 simon@2ndQuadrant.co 1948 :UBC 0 : return false;
1949 : : }
|