TLA Line data Source code
1 : /* -------------------------------------------------------------------------
2 : *
3 : * pgstat_shmem.c
4 : * Storage of stats entries in shared memory
5 : *
6 : * Copyright (c) 2001-2023, PostgreSQL Global Development Group
7 : *
8 : * IDENTIFICATION
9 : * src/backend/utils/activity/pgstat_shmem.c
10 : * -------------------------------------------------------------------------
11 : */
12 :
13 : #include "postgres.h"
14 :
15 : #include "pgstat.h"
16 : #include "storage/shmem.h"
17 : #include "utils/memutils.h"
18 : #include "utils/pgstat_internal.h"
19 :
20 :
21 : #define PGSTAT_ENTRY_REF_HASH_SIZE 128
22 :
23 : /* hash table entry for finding the PgStat_EntryRef for a key */
24 : typedef struct PgStat_EntryRefHashEntry
25 : {
26 : PgStat_HashKey key; /* hash key */
27 : char status; /* for simplehash use */
28 : PgStat_EntryRef *entry_ref;
29 : } PgStat_EntryRefHashEntry;
30 :
31 :
32 : /* for references to shared statistics entries */
33 : #define SH_PREFIX pgstat_entry_ref_hash
34 : #define SH_ELEMENT_TYPE PgStat_EntryRefHashEntry
35 : #define SH_KEY_TYPE PgStat_HashKey
36 : #define SH_KEY key
37 : #define SH_HASH_KEY(tb, key) \
38 : pgstat_hash_hash_key(&key, sizeof(PgStat_HashKey), NULL)
39 : #define SH_EQUAL(tb, a, b) \
40 : pgstat_cmp_hash_key(&a, &b, sizeof(PgStat_HashKey), NULL) == 0
41 : #define SH_SCOPE static inline
42 : #define SH_DEFINE
43 : #define SH_DECLARE
44 : #include "lib/simplehash.h"
45 :
46 :
47 : static void pgstat_drop_database_and_contents(Oid dboid);
48 :
49 : static void pgstat_free_entry(PgStatShared_HashEntry *shent, dshash_seq_status *hstat);
50 :
51 : static void pgstat_release_entry_ref(PgStat_HashKey key, PgStat_EntryRef *entry_ref, bool discard_pending);
52 : static bool pgstat_need_entry_refs_gc(void);
53 : static void pgstat_gc_entry_refs(void);
54 : static void pgstat_release_all_entry_refs(bool discard_pending);
55 : typedef bool (*ReleaseMatchCB) (PgStat_EntryRefHashEntry *, Datum data);
56 : static void pgstat_release_matching_entry_refs(bool discard_pending, ReleaseMatchCB match, Datum match_data);
57 :
58 : static void pgstat_setup_memcxt(void);
59 :
60 :
61 : /* parameter for the shared hash */
62 : static const dshash_parameters dsh_params = {
63 : sizeof(PgStat_HashKey),
64 : sizeof(PgStatShared_HashEntry),
65 : pgstat_cmp_hash_key,
66 : pgstat_hash_hash_key,
67 : LWTRANCHE_PGSTATS_HASH
68 : };
69 :
70 :
71 : /*
72 : * Backend local references to shared stats entries. If there are pending
73 : * updates to a stats entry, the PgStat_EntryRef is added to the pgStatPending
74 : * list.
75 : *
76 : * When a stats entry is dropped each backend needs to release its reference
77 : * to it before the memory can be released. To trigger that
78 : * pgStatLocal.shmem->gc_request_count is incremented - which each backend
79 : * compares to their copy of pgStatSharedRefAge on a regular basis.
80 : */
81 : static pgstat_entry_ref_hash_hash *pgStatEntryRefHash = NULL;
82 : static int pgStatSharedRefAge = 0; /* cache age of pgStatShmLookupCache */
83 :
84 : /*
85 : * Memory contexts containing the pgStatEntryRefHash table and the
86 : * pgStatSharedRef entries respectively. Kept separate to make it easier to
87 : * track / attribute memory usage.
88 : */
89 : static MemoryContext pgStatSharedRefContext = NULL;
90 : static MemoryContext pgStatEntryRefHashContext = NULL;
91 :
92 :
93 : /* ------------------------------------------------------------
94 : * Public functions called from postmaster follow
95 : * ------------------------------------------------------------
96 : */
97 :
98 : /*
99 : * The size of the shared memory allocation for stats stored in the shared
100 : * stats hash table. This allocation will be done as part of the main shared
101 : * memory, rather than dynamic shared memory, allowing it to be initialized in
102 : * postmaster.
103 : */
104 : static Size
105 CBC 10042 : pgstat_dsa_init_size(void)
106 : {
107 : Size sz;
108 :
109 : /*
110 : * The dshash header / initial buckets array needs to fit into "plain"
111 : * shared memory, but it's beneficial to not need dsm segments
112 : * immediately. A size of 256kB seems works well and is not
113 : * disproportional compared to other constant sized shared memory
114 : * allocations. NB: To avoid DSMs further, the user can configure
115 : * min_dynamic_shared_memory.
116 : */
117 10042 : sz = 256 * 1024;
118 10042 : Assert(dsa_minimum_size() <= sz);
119 10042 : return MAXALIGN(sz);
120 : }
121 :
122 : /*
123 : * Compute shared memory space needed for cumulative statistics
124 : */
125 : Size
126 4564 : StatsShmemSize(void)
127 : {
128 : Size sz;
129 :
130 4564 : sz = MAXALIGN(sizeof(PgStat_ShmemControl));
131 4564 : sz = add_size(sz, pgstat_dsa_init_size());
132 :
133 4564 : return sz;
134 : }
135 :
136 : /*
137 : * Initialize cumulative statistics system during startup
138 : */
139 : void
140 1826 : StatsShmemInit(void)
141 : {
142 : bool found;
143 : Size sz;
144 :
145 1826 : sz = StatsShmemSize();
146 1826 : pgStatLocal.shmem = (PgStat_ShmemControl *)
147 1826 : ShmemInitStruct("Shared Memory Stats", sz, &found);
148 :
149 1826 : if (!IsUnderPostmaster)
150 : {
151 : dsa_area *dsa;
152 : dshash_table *dsh;
153 1826 : PgStat_ShmemControl *ctl = pgStatLocal.shmem;
154 1826 : char *p = (char *) ctl;
155 :
156 1826 : Assert(!found);
157 :
158 : /* the allocation of pgStatLocal.shmem itself */
159 1826 : p += MAXALIGN(sizeof(PgStat_ShmemControl));
160 :
161 : /*
162 : * Create a small dsa allocation in plain shared memory. This is
163 : * required because postmaster cannot use dsm segments. It also
164 : * provides a small efficiency win.
165 : */
166 1826 : ctl->raw_dsa_area = p;
167 1826 : p += MAXALIGN(pgstat_dsa_init_size());
168 1826 : dsa = dsa_create_in_place(ctl->raw_dsa_area,
169 : pgstat_dsa_init_size(),
170 : LWTRANCHE_PGSTATS_DSA, 0);
171 1826 : dsa_pin(dsa);
172 :
173 : /*
174 : * To ensure dshash is created in "plain" shared memory, temporarily
175 : * limit size of dsa to the initial size of the dsa.
176 : */
177 1826 : dsa_set_size_limit(dsa, pgstat_dsa_init_size());
178 :
179 : /*
180 : * With the limit in place, create the dshash table. XXX: It'd be nice
181 : * if there were dshash_create_in_place().
182 : */
183 1826 : dsh = dshash_create(dsa, &dsh_params, 0);
184 1826 : ctl->hash_handle = dshash_get_hash_table_handle(dsh);
185 :
186 : /* lift limit set above */
187 1826 : dsa_set_size_limit(dsa, -1);
188 :
189 : /*
190 : * Postmaster will never access these again, thus free the local
191 : * dsa/dshash references.
192 : */
193 1826 : dshash_detach(dsh);
194 1826 : dsa_detach(dsa);
195 :
196 1826 : pg_atomic_init_u64(&ctl->gc_request_count, 1);
197 :
198 :
199 : /* initialize fixed-numbered stats */
200 1826 : LWLockInitialize(&ctl->archiver.lock, LWTRANCHE_PGSTATS_DATA);
201 1826 : LWLockInitialize(&ctl->bgwriter.lock, LWTRANCHE_PGSTATS_DATA);
202 1826 : LWLockInitialize(&ctl->checkpointer.lock, LWTRANCHE_PGSTATS_DATA);
203 1826 : LWLockInitialize(&ctl->slru.lock, LWTRANCHE_PGSTATS_DATA);
204 1826 : LWLockInitialize(&ctl->wal.lock, LWTRANCHE_PGSTATS_DATA);
205 :
206 GNC 27390 : for (int i = 0; i < BACKEND_NUM_TYPES; i++)
207 25564 : LWLockInitialize(&ctl->io.locks[i],
208 : LWTRANCHE_PGSTATS_DATA);
209 : }
210 ECB : else
211 : {
212 UIC 0 : Assert(found);
213 : }
214 GIC 1826 : }
215 :
216 EUB : void
217 GIC 13301 : pgstat_attach_shmem(void)
218 ECB : {
219 : MemoryContext oldcontext;
220 :
221 CBC 13301 : Assert(pgStatLocal.dsa == NULL);
222 :
223 : /* stats shared memory persists for the backend lifetime */
224 GIC 13301 : oldcontext = MemoryContextSwitchTo(TopMemoryContext);
225 ECB :
226 GIC 13301 : pgStatLocal.dsa = dsa_attach_in_place(pgStatLocal.shmem->raw_dsa_area,
227 : NULL);
228 CBC 13301 : dsa_pin_mapping(pgStatLocal.dsa);
229 :
230 26602 : pgStatLocal.shared_hash = dshash_attach(pgStatLocal.dsa, &dsh_params,
231 GIC 13301 : pgStatLocal.shmem->hash_handle, 0);
232 ECB :
233 GIC 13301 : MemoryContextSwitchTo(oldcontext);
234 CBC 13301 : }
235 ECB :
236 : void
237 CBC 13301 : pgstat_detach_shmem(void)
238 ECB : {
239 GIC 13301 : Assert(pgStatLocal.dsa);
240 :
241 ECB : /* we shouldn't leave references to shared stats */
242 GIC 13301 : pgstat_release_all_entry_refs(false);
243 ECB :
244 GIC 13301 : dshash_detach(pgStatLocal.shared_hash);
245 13301 : pgStatLocal.shared_hash = NULL;
246 ECB :
247 GIC 13301 : dsa_detach(pgStatLocal.dsa);
248 CBC 13301 : pgStatLocal.dsa = NULL;
249 13301 : }
250 :
251 ECB :
252 : /* ------------------------------------------------------------
253 : * Maintenance of shared memory stats entries
254 : * ------------------------------------------------------------
255 : */
256 :
257 : PgStatShared_Common *
258 GIC 301229 : pgstat_init_entry(PgStat_Kind kind,
259 : PgStatShared_HashEntry *shhashent)
260 : {
261 : /* Create new stats entry. */
262 ECB : dsa_pointer chunk;
263 : PgStatShared_Common *shheader;
264 :
265 : /*
266 : * Initialize refcount to 1, marking it as valid / not dropped. The entry
267 : * can't be freed before the initialization because it can't be found as
268 : * long as we hold the dshash partition lock. Caller needs to increase
269 : * further if a longer lived reference is needed.
270 : */
271 GIC 301229 : pg_atomic_init_u32(&shhashent->refcount, 1);
272 301229 : shhashent->dropped = false;
273 :
274 301229 : chunk = dsa_allocate0(pgStatLocal.dsa, pgstat_get_kind_info(kind)->shared_size);
275 CBC 301229 : shheader = dsa_get_address(pgStatLocal.dsa, chunk);
276 301229 : shheader->magic = 0xdeadbeef;
277 :
278 ECB : /* Link the new entry from the hash entry. */
279 CBC 301229 : shhashent->body = chunk;
280 ECB :
281 GIC 301229 : LWLockInitialize(&shheader->lock, LWTRANCHE_PGSTATS_DATA);
282 :
283 CBC 301229 : return shheader;
284 : }
285 ECB :
286 : static PgStatShared_Common *
287 CBC 27 : pgstat_reinit_entry(PgStat_Kind kind, PgStatShared_HashEntry *shhashent)
288 : {
289 : PgStatShared_Common *shheader;
290 :
291 27 : shheader = dsa_get_address(pgStatLocal.dsa, shhashent->body);
292 :
293 : /* mark as not dropped anymore */
294 GIC 27 : pg_atomic_fetch_add_u32(&shhashent->refcount, 1);
295 CBC 27 : shhashent->dropped = false;
296 :
297 : /* reinitialize content */
298 27 : Assert(shheader->magic == 0xdeadbeef);
299 27 : memset(pgstat_get_entry_data(kind, shheader), 0,
300 : pgstat_get_entry_len(kind));
301 :
302 27 : return shheader;
303 ECB : }
304 :
305 : static void
306 CBC 1793136 : pgstat_setup_shared_refs(void)
307 : {
308 GIC 1793136 : if (likely(pgStatEntryRefHash != NULL))
309 1780693 : return;
310 ECB :
311 GIC 12443 : pgStatEntryRefHash =
312 CBC 12443 : pgstat_entry_ref_hash_create(pgStatEntryRefHashContext,
313 ECB : PGSTAT_ENTRY_REF_HASH_SIZE, NULL);
314 GIC 12443 : pgStatSharedRefAge = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
315 CBC 12443 : Assert(pgStatSharedRefAge != 0);
316 ECB : }
317 :
318 : /*
319 : * Helper function for pgstat_get_entry_ref().
320 : */
321 : static void
322 GIC 585284 : pgstat_acquire_entry_ref(PgStat_EntryRef *entry_ref,
323 : PgStatShared_HashEntry *shhashent,
324 : PgStatShared_Common *shheader)
325 : {
326 CBC 585284 : Assert(shheader->magic == 0xdeadbeef);
327 GIC 585284 : Assert(pg_atomic_read_u32(&shhashent->refcount) > 0);
328 :
329 585284 : pg_atomic_fetch_add_u32(&shhashent->refcount, 1);
330 ECB :
331 CBC 585284 : dshash_release_lock(pgStatLocal.shared_hash, shhashent);
332 :
333 585284 : entry_ref->shared_stats = shheader;
334 GIC 585284 : entry_ref->shared_entry = shhashent;
335 CBC 585284 : }
336 :
337 ECB : /*
338 : * Helper function for pgstat_get_entry_ref().
339 : */
340 : static bool
341 GIC 1793136 : pgstat_get_entry_ref_cached(PgStat_HashKey key, PgStat_EntryRef **entry_ref_p)
342 : {
343 : bool found;
344 : PgStat_EntryRefHashEntry *cache_entry;
345 ECB :
346 : /*
347 : * We immediately insert a cache entry, because it avoids 1) multiple
348 : * hashtable lookups in case of a cache miss 2) having to deal with
349 : * out-of-memory errors after incrementing PgStatShared_Common->refcount.
350 : */
351 :
352 GIC 1793136 : cache_entry = pgstat_entry_ref_hash_insert(pgStatEntryRefHash, key, &found);
353 :
354 1793136 : if (!found || !cache_entry->entry_ref)
355 763581 : {
356 ECB : PgStat_EntryRef *entry_ref;
357 :
358 CBC 763581 : cache_entry->entry_ref = entry_ref =
359 763581 : MemoryContextAlloc(pgStatSharedRefContext,
360 : sizeof(PgStat_EntryRef));
361 GIC 763581 : entry_ref->shared_stats = NULL;
362 CBC 763581 : entry_ref->shared_entry = NULL;
363 763581 : entry_ref->pending = NULL;
364 :
365 763581 : found = false;
366 ECB : }
367 CBC 1029555 : else if (cache_entry->entry_ref->shared_stats == NULL)
368 : {
369 LBC 0 : Assert(cache_entry->entry_ref->pending == NULL);
370 UIC 0 : found = false;
371 ECB : }
372 : else
373 EUB : {
374 : PgStat_EntryRef *entry_ref PG_USED_FOR_ASSERTS_ONLY;
375 :
376 GIC 1029555 : entry_ref = cache_entry->entry_ref;
377 1029555 : Assert(entry_ref->shared_entry != NULL);
378 1029555 : Assert(entry_ref->shared_stats != NULL);
379 :
380 CBC 1029555 : Assert(entry_ref->shared_stats->magic == 0xdeadbeef);
381 ECB : /* should have at least our reference */
382 CBC 1029555 : Assert(pg_atomic_read_u32(&entry_ref->shared_entry->refcount) > 0);
383 : }
384 ECB :
385 GIC 1793136 : *entry_ref_p = cache_entry->entry_ref;
386 CBC 1793136 : return found;
387 : }
388 :
389 ECB : /*
390 : * Get a shared stats reference. If create is true, the shared stats object is
391 : * created if it does not exist.
392 : *
393 : * When create is true, and created_entry is non-NULL, it'll be set to true
394 : * if the entry is newly created, false otherwise.
395 : */
396 : PgStat_EntryRef *
397 GIC 1793136 : pgstat_get_entry_ref(PgStat_Kind kind, Oid dboid, Oid objoid, bool create,
398 : bool *created_entry)
399 : {
400 1793136 : PgStat_HashKey key = {.kind = kind,.dboid = dboid,.objoid = objoid};
401 ECB : PgStatShared_HashEntry *shhashent;
402 GIC 1793136 : PgStatShared_Common *shheader = NULL;
403 : PgStat_EntryRef *entry_ref;
404 ECB :
405 : /*
406 : * passing in created_entry only makes sense if we possibly could create
407 : * entry.
408 : */
409 GNC 1793136 : Assert(create || created_entry == NULL);
410 GIC 1793136 : pgstat_assert_is_up();
411 1793136 : Assert(pgStatLocal.shared_hash != NULL);
412 1793136 : Assert(!pgStatLocal.shmem->is_shutdown);
413 ECB :
414 CBC 1793136 : pgstat_setup_memcxt();
415 1793136 : pgstat_setup_shared_refs();
416 ECB :
417 GIC 1793136 : if (created_entry != NULL)
418 CBC 104 : *created_entry = false;
419 ECB :
420 : /*
421 : * Check if other backends dropped stats that could not be deleted because
422 : * somebody held references to it. If so, check this backend's references.
423 : * This is not expected to happen often. The location of the check is a
424 : * bit random, but this is a relatively frequently called path, so better
425 : * than most.
426 : */
427 GIC 1793136 : if (pgstat_need_entry_refs_gc())
428 5537 : pgstat_gc_entry_refs();
429 :
430 : /*
431 ECB : * First check the lookup cache hashtable in local memory. If we find a
432 : * match here we can avoid taking locks / causing contention.
433 : */
434 GIC 1793136 : if (pgstat_get_entry_ref_cached(key, &entry_ref))
435 1029555 : return entry_ref;
436 :
437 763581 : Assert(entry_ref != NULL);
438 ECB :
439 : /*
440 : * Do a lookup in the hash table first - it's quite likely that the entry
441 : * already exists, and that way we only need a shared lock.
442 : */
443 GIC 763581 : shhashent = dshash_find(pgStatLocal.shared_hash, &key, false);
444 :
445 763581 : if (create && !shhashent)
446 : {
447 ECB : bool shfound;
448 :
449 : /*
450 : * It's possible that somebody created the entry since the above
451 : * lookup. If so, fall through to the same path as if we'd have if it
452 : * already had been created before the dshash_find() calls.
453 : */
454 GIC 147176 : shhashent = dshash_find_or_insert(pgStatLocal.shared_hash, &key, &shfound);
455 147176 : if (!shfound)
456 : {
457 147176 : shheader = pgstat_init_entry(kind, shhashent);
458 CBC 147176 : pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
459 ECB :
460 GIC 147176 : if (created_entry != NULL)
461 CBC 45 : *created_entry = true;
462 ECB :
463 GIC 147176 : return entry_ref;
464 ECB : }
465 : }
466 :
467 CBC 616405 : if (!shhashent)
468 : {
469 : /*
470 : * If we're not creating, delete the reference again. In all
471 ECB : * likelihood it's just a stats lookup - no point wasting memory for a
472 : * shared ref to nothing...
473 : */
474 GIC 178260 : pgstat_release_entry_ref(key, entry_ref, false);
475 :
476 178260 : return NULL;
477 : }
478 ECB : else
479 : {
480 : /*
481 : * Can get here either because dshash_find() found a match, or if
482 : * dshash_find_or_insert() found a concurrently inserted entry.
483 : */
484 :
485 GIC 438145 : if (shhashent->dropped && create)
486 : {
487 : /*
488 : * There are legitimate cases where the old stats entry might not
489 ECB : * yet have been dropped by the time it's reused. The most obvious
490 : * case are replication slot stats, where a new slot can be
491 : * created with the same index just after dropping. But oid
492 : * wraparound can lead to other cases as well. We just reset the
493 : * stats to their plain state.
494 : */
495 GIC 27 : shheader = pgstat_reinit_entry(kind, shhashent);
496 27 : pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
497 :
498 27 : if (created_entry != NULL)
499 LBC 0 : *created_entry = true;
500 ECB :
501 GIC 27 : return entry_ref;
502 ECB : }
503 GBC 438118 : else if (shhashent->dropped)
504 : {
505 CBC 37 : dshash_release_lock(pgStatLocal.shared_hash, shhashent);
506 GIC 37 : pgstat_release_entry_ref(key, entry_ref, false);
507 ECB :
508 GIC 37 : return NULL;
509 ECB : }
510 : else
511 : {
512 CBC 438081 : shheader = dsa_get_address(pgStatLocal.dsa, shhashent->body);
513 GIC 438081 : pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
514 :
515 438081 : return entry_ref;
516 ECB : }
517 : }
518 : }
519 :
520 : static void
521 GIC 763581 : pgstat_release_entry_ref(PgStat_HashKey key, PgStat_EntryRef *entry_ref,
522 : bool discard_pending)
523 : {
524 763581 : if (entry_ref && entry_ref->pending)
525 ECB : {
526 GIC 27724 : if (discard_pending)
527 27724 : pgstat_delete_pending_entry(entry_ref);
528 ECB : else
529 UIC 0 : elog(ERROR, "releasing ref with pending data");
530 ECB : }
531 :
532 GIC 763581 : if (entry_ref && entry_ref->shared_stats)
533 EUB : {
534 GIC 585284 : Assert(entry_ref->shared_stats->magic == 0xdeadbeef);
535 585284 : Assert(entry_ref->pending == NULL);
536 ECB :
537 : /*
538 : * This can't race with another backend looking up the stats entry and
539 : * increasing the refcount because it is not "legal" to create
540 : * additional references to dropped entries.
541 : */
542 GIC 585284 : if (pg_atomic_fetch_sub_u32(&entry_ref->shared_entry->refcount, 1) == 1)
543 : {
544 : PgStatShared_HashEntry *shent;
545 :
546 ECB : /*
547 : * We're the last referrer to this entry, try to drop the shared
548 : * entry.
549 : */
550 :
551 : /* only dropped entries can reach a 0 refcount */
552 GIC 4591 : Assert(entry_ref->shared_entry->dropped);
553 :
554 4591 : shent = dshash_find(pgStatLocal.shared_hash,
555 4591 : &entry_ref->shared_entry->key,
556 ECB : true);
557 GIC 4591 : if (!shent)
558 LBC 0 : elog(ERROR, "could not find just referenced shared stats entry");
559 ECB :
560 GIC 4591 : Assert(pg_atomic_read_u32(&entry_ref->shared_entry->refcount) == 0);
561 CBC 4591 : Assert(entry_ref->shared_entry == shent);
562 EUB :
563 GIC 4591 : pgstat_free_entry(shent, NULL);
564 ECB : }
565 : }
566 :
567 CBC 763581 : if (!pgstat_entry_ref_hash_delete(pgStatEntryRefHash, key))
568 UIC 0 : elog(ERROR, "entry ref vanished before deletion");
569 :
570 GIC 763581 : if (entry_ref)
571 CBC 763581 : pfree(entry_ref);
572 GBC 763581 : }
573 :
574 ECB : bool
575 CBC 816459 : pgstat_lock_entry(PgStat_EntryRef *entry_ref, bool nowait)
576 ECB : {
577 GIC 816459 : LWLock *lock = &entry_ref->shared_stats->lock;
578 :
579 CBC 816459 : if (nowait)
580 GIC 318911 : return LWLockConditionalAcquire(lock, LW_EXCLUSIVE);
581 ECB :
582 GIC 497548 : LWLockAcquire(lock, LW_EXCLUSIVE);
583 CBC 497548 : return true;
584 ECB : }
585 :
586 : /*
587 : * Separate from pgstat_lock_entry() as most callers will need to lock
588 : * exclusively.
589 : */
590 : bool
591 GIC 6008 : pgstat_lock_entry_shared(PgStat_EntryRef *entry_ref, bool nowait)
592 : {
593 6008 : LWLock *lock = &entry_ref->shared_stats->lock;
594 :
595 CBC 6008 : if (nowait)
596 UIC 0 : return LWLockConditionalAcquire(lock, LW_SHARED);
597 ECB :
598 GIC 6008 : LWLockAcquire(lock, LW_SHARED);
599 CBC 6008 : return true;
600 EUB : }
601 :
602 ECB : void
603 CBC 822462 : pgstat_unlock_entry(PgStat_EntryRef *entry_ref)
604 : {
605 GIC 822462 : LWLockRelease(&entry_ref->shared_stats->lock);
606 822462 : }
607 ECB :
608 : /*
609 : * Helper function to fetch and lock shared stats.
610 : */
611 : PgStat_EntryRef *
612 GIC 66559 : pgstat_get_entry_ref_locked(PgStat_Kind kind, Oid dboid, Oid objoid,
613 : bool nowait)
614 : {
615 : PgStat_EntryRef *entry_ref;
616 ECB :
617 : /* find shared table stats entry corresponding to the local entry */
618 GIC 66559 : entry_ref = pgstat_get_entry_ref(kind, dboid, objoid, true, NULL);
619 :
620 : /* lock the shared entry to protect the content, skip if failed */
621 66559 : if (!pgstat_lock_entry(entry_ref, nowait))
622 LBC 0 : return NULL;
623 :
624 GIC 66559 : return entry_ref;
625 ECB : }
626 EUB :
627 : void
628 CBC 1768 : pgstat_request_entry_refs_gc(void)
629 : {
630 GIC 1768 : pg_atomic_fetch_add_u64(&pgStatLocal.shmem->gc_request_count, 1);
631 1768 : }
632 ECB :
633 : static bool
634 CBC 1793136 : pgstat_need_entry_refs_gc(void)
635 ECB : {
636 : uint64 curage;
637 :
638 CBC 1793136 : if (!pgStatEntryRefHash)
639 UIC 0 : return false;
640 :
641 : /* should have been initialized when creating pgStatEntryRefHash */
642 CBC 1793136 : Assert(pgStatSharedRefAge != 0);
643 EUB :
644 GIC 1793136 : curage = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
645 :
646 CBC 1793136 : return pgStatSharedRefAge != curage;
647 : }
648 ECB :
649 : static void
650 CBC 5537 : pgstat_gc_entry_refs(void)
651 : {
652 : pgstat_entry_ref_hash_iterator i;
653 : PgStat_EntryRefHashEntry *ent;
654 ECB : uint64 curage;
655 :
656 GIC 5537 : curage = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
657 5537 : Assert(curage != 0);
658 :
659 : /*
660 ECB : * Some entries have been dropped. Invalidate cache pointer to them.
661 : */
662 GIC 5537 : pgstat_entry_ref_hash_start_iterate(pgStatEntryRefHash, &i);
663 387516 : while ((ent = pgstat_entry_ref_hash_iterate(pgStatEntryRefHash, &i)) != NULL)
664 : {
665 381979 : PgStat_EntryRef *entry_ref = ent->entry_ref;
666 ECB :
667 CBC 381979 : Assert(!entry_ref->shared_stats ||
668 : entry_ref->shared_stats->magic == 0xdeadbeef);
669 ECB :
670 GIC 381979 : if (!entry_ref->shared_entry->dropped)
671 CBC 285652 : continue;
672 :
673 : /* cannot gc shared ref that has pending data */
674 96327 : if (entry_ref->pending != NULL)
675 91124 : continue;
676 :
677 GIC 5203 : pgstat_release_entry_ref(ent->key, entry_ref, false);
678 ECB : }
679 :
680 GIC 5537 : pgStatSharedRefAge = curage;
681 CBC 5537 : }
682 :
683 : static void
684 12458 : pgstat_release_matching_entry_refs(bool discard_pending, ReleaseMatchCB match,
685 ECB : Datum match_data)
686 : {
687 : pgstat_entry_ref_hash_iterator i;
688 : PgStat_EntryRefHashEntry *ent;
689 :
690 GIC 12458 : if (pgStatEntryRefHash == NULL)
691 5 : return;
692 :
693 12453 : pgstat_entry_ref_hash_start_iterate(pgStatEntryRefHash, &i);
694 ECB :
695 CBC 564802 : while ((ent = pgstat_entry_ref_hash_iterate(pgStatEntryRefHash, &i))
696 GIC 564802 : != NULL)
697 ECB : {
698 GIC 552349 : Assert(ent->entry_ref != NULL);
699 ECB :
700 CBC 552349 : if (match && !match(ent, match_data))
701 GIC 297 : continue;
702 ECB :
703 GIC 552052 : pgstat_release_entry_ref(ent->key, ent->entry_ref, discard_pending);
704 ECB : }
705 : }
706 :
707 : /*
708 : * Release all local references to shared stats entries.
709 : *
710 : * When a process exits it cannot do so while still holding references onto
711 : * stats entries, otherwise the shared stats entries could never be freed.
712 : */
713 : static void
714 GIC 13301 : pgstat_release_all_entry_refs(bool discard_pending)
715 : {
716 13301 : if (pgStatEntryRefHash == NULL)
717 858 : return;
718 ECB :
719 GIC 12443 : pgstat_release_matching_entry_refs(discard_pending, NULL, 0);
720 CBC 12443 : Assert(pgStatEntryRefHash->members == 0);
721 12443 : pgstat_entry_ref_hash_destroy(pgStatEntryRefHash);
722 GIC 12443 : pgStatEntryRefHash = NULL;
723 ECB : }
724 :
725 : static bool
726 CBC 297 : match_db(PgStat_EntryRefHashEntry *ent, Datum match_data)
727 : {
728 GIC 297 : Oid dboid = DatumGetObjectId(match_data);
729 :
730 CBC 297 : return ent->key.dboid == dboid;
731 : }
732 ECB :
733 : static void
734 CBC 15 : pgstat_release_db_entry_refs(Oid dboid)
735 : {
736 GIC 15 : pgstat_release_matching_entry_refs( /* discard pending = */ true,
737 : match_db,
738 ECB : ObjectIdGetDatum(dboid));
739 GIC 15 : }
740 ECB :
741 :
742 : /* ------------------------------------------------------------
743 : * Dropping and resetting of stats entries
744 : * ------------------------------------------------------------
745 : */
746 :
747 : static void
748 GIC 29158 : pgstat_free_entry(PgStatShared_HashEntry *shent, dshash_seq_status *hstat)
749 : {
750 : dsa_pointer pdsa;
751 :
752 ECB : /*
753 : * Fetch dsa pointer before deleting entry - that way we can free the
754 : * memory after releasing the lock.
755 : */
756 GIC 29158 : pdsa = shent->body;
757 :
758 29158 : if (!hstat)
759 28097 : dshash_delete_entry(pgStatLocal.shared_hash, shent);
760 ECB : else
761 GIC 1061 : dshash_delete_current(hstat);
762 ECB :
763 CBC 29158 : dsa_free(pgStatLocal.dsa, pdsa);
764 GIC 29158 : }
765 ECB :
766 : /*
767 : * Helper for both pgstat_drop_database_and_contents() and
768 : * pgstat_drop_entry(). If hstat is non-null delete the shared entry using
769 : * dshash_delete_current(), otherwise use dshash_delete_entry(). In either
770 : * case the entry needs to be already locked.
771 : */
772 : static bool
773 GIC 29187 : pgstat_drop_entry_internal(PgStatShared_HashEntry *shent,
774 : dshash_seq_status *hstat)
775 : {
776 29187 : Assert(shent->body != InvalidDsaPointer);
777 ECB :
778 : /* should already have released local reference */
779 GIC 29187 : if (pgStatEntryRefHash)
780 CBC 28945 : Assert(!pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, shent->key));
781 :
782 : /*
783 ECB : * Signal that the entry is dropped - this will eventually cause other
784 : * backends to release their references.
785 : */
786 GIC 29187 : if (shent->dropped)
787 UIC 0 : elog(ERROR, "can only drop stats once");
788 GIC 29187 : shent->dropped = true;
789 :
790 ECB : /* release refcount marking entry as not dropped */
791 GBC 29187 : if (pg_atomic_sub_fetch_u32(&shent->refcount, 1) == 0)
792 ECB : {
793 GIC 24567 : pgstat_free_entry(shent, hstat);
794 24567 : return true;
795 ECB : }
796 : else
797 : {
798 CBC 4620 : if (!hstat)
799 GIC 4620 : dshash_release_lock(pgStatLocal.shared_hash, shent);
800 4620 : return false;
801 : }
802 ECB : }
803 :
804 : /*
805 : * Drop stats for the database and all the objects inside that database.
806 : */
807 : static void
808 GIC 15 : pgstat_drop_database_and_contents(Oid dboid)
809 : {
810 : dshash_seq_status hstat;
811 : PgStatShared_HashEntry *p;
812 CBC 15 : uint64 not_freed_count = 0;
813 :
814 GIC 15 : Assert(OidIsValid(dboid));
815 :
816 CBC 15 : Assert(pgStatLocal.shared_hash != NULL);
817 :
818 ECB : /*
819 : * This backend might very well be the only backend holding a reference to
820 : * about-to-be-dropped entries. Ensure that we're not preventing it from
821 : * being cleaned up till later.
822 : *
823 : * Doing this separately from the dshash iteration below avoids having to
824 : * do so while holding a partition lock on the shared hashtable.
825 : */
826 GIC 15 : pgstat_release_db_entry_refs(dboid);
827 :
828 : /* some of the dshash entries are to be removed, take exclusive lock. */
829 15 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, true);
830 CBC 5015 : while ((p = dshash_seq_next(&hstat)) != NULL)
831 : {
832 GIC 5000 : if (p->dropped)
833 CBC 2 : continue;
834 ECB :
835 GIC 4998 : if (p->key.dboid != dboid)
836 CBC 3993 : continue;
837 ECB :
838 GIC 1005 : if (!pgstat_drop_entry_internal(p, &hstat))
839 ECB : {
840 : /*
841 : * Even statistics for a dropped database might currently be
842 : * accessed (consider e.g. database stats for pg_stat_database).
843 : */
844 UIC 0 : not_freed_count++;
845 : }
846 : }
847 GIC 15 : dshash_seq_term(&hstat);
848 EUB :
849 : /*
850 : * If some of the stats data could not be freed, signal the reference
851 ECB : * holders to run garbage collection of their cached pgStatShmLookupCache.
852 : */
853 GIC 15 : if (not_freed_count > 0)
854 UIC 0 : pgstat_request_entry_refs_gc();
855 GIC 15 : }
856 :
857 ECB : bool
858 GBC 41848 : pgstat_drop_entry(PgStat_Kind kind, Oid dboid, Oid objoid)
859 ECB : {
860 GIC 41848 : PgStat_HashKey key = {.kind = kind,.dboid = dboid,.objoid = objoid};
861 : PgStatShared_HashEntry *shent;
862 CBC 41848 : bool freed = true;
863 :
864 ECB : /* delete local reference */
865 GIC 41848 : if (pgStatEntryRefHash)
866 ECB : {
867 : PgStat_EntryRefHashEntry *lohashent =
868 GIC 34191 : pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, key);
869 ECB :
870 GIC 34191 : if (lohashent)
871 28029 : pgstat_release_entry_ref(lohashent->key, lohashent->entry_ref,
872 ECB : true);
873 : }
874 :
875 : /* mark entry in shared hashtable as deleted, drop if possible */
876 GIC 41848 : shent = dshash_find(pgStatLocal.shared_hash, &key, true);
877 41848 : if (shent)
878 : {
879 28126 : freed = pgstat_drop_entry_internal(shent, NULL);
880 ECB :
881 : /*
882 : * Database stats contain other stats. Drop those as well when
883 : * dropping the database. XXX: Perhaps this should be done in a
884 : * slightly more principled way? But not obvious what that'd look
885 : * like, and so far this is the only case...
886 : */
887 GIC 28126 : if (key.kind == PGSTAT_KIND_DATABASE)
888 15 : pgstat_drop_database_and_contents(key.dboid);
889 : }
890 :
891 CBC 41848 : return freed;
892 ECB : }
893 :
894 : void
895 CBC 438 : pgstat_drop_all_entries(void)
896 : {
897 : dshash_seq_status hstat;
898 : PgStatShared_HashEntry *ps;
899 438 : uint64 not_freed_count = 0;
900 :
901 GIC 438 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, true);
902 494 : while ((ps = dshash_seq_next(&hstat)) != NULL)
903 ECB : {
904 GIC 56 : if (ps->dropped)
905 LBC 0 : continue;
906 ECB :
907 GIC 56 : if (!pgstat_drop_entry_internal(ps, &hstat))
908 LBC 0 : not_freed_count++;
909 EUB : }
910 GIC 438 : dshash_seq_term(&hstat);
911 ECB :
912 GBC 438 : if (not_freed_count > 0)
913 UIC 0 : pgstat_request_entry_refs_gc();
914 CBC 438 : }
915 :
916 ECB : static void
917 GBC 7896 : shared_stat_reset_contents(PgStat_Kind kind, PgStatShared_Common *header,
918 ECB : TimestampTz ts)
919 : {
920 GIC 7896 : const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
921 ECB :
922 GIC 7896 : memset(pgstat_get_entry_data(kind, header), 0,
923 : pgstat_get_entry_len(kind));
924 ECB :
925 GIC 7896 : if (kind_info->reset_timestamp_cb)
926 CBC 140 : kind_info->reset_timestamp_cb(header, ts);
927 GIC 7896 : }
928 :
929 ECB : /*
930 : * Reset one variable-numbered stats entry.
931 : */
932 : void
933 GIC 123 : pgstat_reset_entry(PgStat_Kind kind, Oid dboid, Oid objoid, TimestampTz ts)
934 : {
935 : PgStat_EntryRef *entry_ref;
936 :
937 CBC 123 : Assert(!pgstat_get_kind_info(kind)->fixed_amount);
938 :
939 GIC 123 : entry_ref = pgstat_get_entry_ref(kind, dboid, objoid, false, NULL);
940 123 : if (!entry_ref || entry_ref->shared_entry->dropped)
941 CBC 1 : return;
942 :
943 122 : (void) pgstat_lock_entry(entry_ref, false);
944 122 : shared_stat_reset_contents(kind, entry_ref->shared_stats, ts);
945 122 : pgstat_unlock_entry(entry_ref);
946 : }
947 ECB :
948 : /*
949 : * Scan through the shared hashtable of stats, resetting statistics if
950 : * approved by the provided do_reset() function.
951 : */
952 : void
953 GIC 17 : pgstat_reset_matching_entries(bool (*do_reset) (PgStatShared_HashEntry *, Datum),
954 : Datum match_data, TimestampTz ts)
955 : {
956 : dshash_seq_status hstat;
957 ECB : PgStatShared_HashEntry *p;
958 :
959 : /* dshash entry is not modified, take shared lock */
960 GIC 17 : dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
961 11707 : while ((p = dshash_seq_next(&hstat)) != NULL)
962 : {
963 : PgStatShared_Common *header;
964 ECB :
965 CBC 11690 : if (p->dropped)
966 GIC 1 : continue;
967 :
968 11689 : if (!do_reset(p, match_data))
969 CBC 3915 : continue;
970 ECB :
971 GIC 7774 : header = dsa_get_address(pgStatLocal.dsa, p->body);
972 ECB :
973 CBC 7774 : LWLockAcquire(&header->lock, LW_EXCLUSIVE);
974 :
975 7774 : shared_stat_reset_contents(p->key.kind, header, ts);
976 :
977 7774 : LWLockRelease(&header->lock);
978 : }
979 17 : dshash_seq_term(&hstat);
980 GIC 17 : }
981 ECB :
982 : static bool
983 CBC 1451 : match_kind(PgStatShared_HashEntry *p, Datum match_data)
984 ECB : {
985 GIC 1451 : return p->key.kind == DatumGetInt32(match_data);
986 : }
987 ECB :
988 : void
989 CBC 4 : pgstat_reset_entries_of_kind(PgStat_Kind kind, TimestampTz ts)
990 : {
991 GIC 4 : pgstat_reset_matching_entries(match_kind, Int32GetDatum(kind), ts);
992 4 : }
993 ECB :
994 : static void
995 CBC 1793136 : pgstat_setup_memcxt(void)
996 ECB : {
997 GIC 1793136 : if (unlikely(!pgStatSharedRefContext))
998 12443 : pgStatSharedRefContext =
999 CBC 12443 : AllocSetContextCreate(TopMemoryContext,
1000 : "PgStat Shared Ref",
1001 ECB : ALLOCSET_SMALL_SIZES);
1002 CBC 1793136 : if (unlikely(!pgStatEntryRefHashContext))
1003 12443 : pgStatEntryRefHashContext =
1004 GIC 12443 : AllocSetContextCreate(TopMemoryContext,
1005 : "PgStat Shared Ref Hash",
1006 ECB : ALLOCSET_SMALL_SIZES);
1007 CBC 1793136 : }
|