Age Owner TLA Line data Source code
1 : /*--------------------------------------------------------------------
2 : * bgworker.c
3 : * POSTGRES pluggable background workers implementation
4 : *
5 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
6 : *
7 : * IDENTIFICATION
8 : * src/backend/postmaster/bgworker.c
9 : *
10 : *-------------------------------------------------------------------------
11 : */
12 :
13 : #include "postgres.h"
14 :
15 : #include "access/parallel.h"
16 : #include "libpq/pqsignal.h"
17 : #include "miscadmin.h"
18 : #include "pgstat.h"
19 : #include "port/atomics.h"
20 : #include "postmaster/bgworker_internals.h"
21 : #include "postmaster/interrupt.h"
22 : #include "postmaster/postmaster.h"
23 : #include "replication/logicallauncher.h"
24 : #include "replication/logicalworker.h"
25 : #include "storage/dsm.h"
26 : #include "storage/ipc.h"
27 : #include "storage/latch.h"
28 : #include "storage/lwlock.h"
29 : #include "storage/pg_shmem.h"
30 : #include "storage/pmsignal.h"
31 : #include "storage/proc.h"
32 : #include "storage/procsignal.h"
33 : #include "storage/shmem.h"
34 : #include "tcop/tcopprot.h"
35 : #include "utils/ascii.h"
36 : #include "utils/ps_status.h"
37 : #include "utils/timeout.h"
38 :
39 : /*
40 : * The postmaster's list of registered background workers, in private memory.
41 : */
42 : slist_head BackgroundWorkerList = SLIST_STATIC_INIT(BackgroundWorkerList);
43 :
44 : /*
45 : * BackgroundWorkerSlots exist in shared memory and can be accessed (via
46 : * the BackgroundWorkerArray) by both the postmaster and by regular backends.
47 : * However, the postmaster cannot take locks, even spinlocks, because this
48 : * might allow it to crash or become wedged if shared memory gets corrupted.
49 : * Such an outcome is intolerable. Therefore, we need a lockless protocol
50 : * for coordinating access to this data.
51 : *
52 : * The 'in_use' flag is used to hand off responsibility for the slot between
53 : * the postmaster and the rest of the system. When 'in_use' is false,
54 : * the postmaster will ignore the slot entirely, except for the 'in_use' flag
55 : * itself, which it may read. In this state, regular backends may modify the
56 : * slot. Once a backend sets 'in_use' to true, the slot becomes the
57 : * responsibility of the postmaster. Regular backends may no longer modify it,
58 : * but the postmaster may examine it. Thus, a backend initializing a slot
59 : * must fully initialize the slot - and insert a write memory barrier - before
60 : * marking it as in use.
61 : *
62 : * As an exception, however, even when the slot is in use, regular backends
63 : * may set the 'terminate' flag for a slot, telling the postmaster not
64 : * to restart it. Once the background worker is no longer running, the slot
65 : * will be released for reuse.
66 : *
67 : * In addition to coordinating with the postmaster, backends modifying this
68 : * data structure must coordinate with each other. Since they can take locks,
69 : * this is straightforward: any backend wishing to manipulate a slot must
70 : * take BackgroundWorkerLock in exclusive mode. Backends wishing to read
71 : * data that might get concurrently modified by other backends should take
72 : * this lock in shared mode. No matter what, backends reading this data
73 : * structure must be able to tolerate concurrent modifications by the
74 : * postmaster.
75 : */
76 : typedef struct BackgroundWorkerSlot
77 : {
78 : bool in_use;
79 : bool terminate;
80 : pid_t pid; /* InvalidPid = not started yet; 0 = dead */
81 : uint64 generation; /* incremented when slot is recycled */
82 : BackgroundWorker worker;
83 : } BackgroundWorkerSlot;
84 :
85 : /*
86 : * In order to limit the total number of parallel workers (according to
87 : * max_parallel_workers GUC), we maintain the number of active parallel
88 : * workers. Since the postmaster cannot take locks, two variables are used for
89 : * this purpose: the number of registered parallel workers (modified by the
90 : * backends, protected by BackgroundWorkerLock) and the number of terminated
91 : * parallel workers (modified only by the postmaster, lockless). The active
92 : * number of parallel workers is the number of registered workers minus the
93 : * terminated ones. These counters can of course overflow, but it's not
94 : * important here since the subtraction will still give the right number.
95 : */
96 : typedef struct BackgroundWorkerArray
97 : {
98 : int total_slots;
99 : uint32 parallel_register_count;
100 : uint32 parallel_terminate_count;
101 : BackgroundWorkerSlot slot[FLEXIBLE_ARRAY_MEMBER];
102 : } BackgroundWorkerArray;
103 :
104 : struct BackgroundWorkerHandle
105 : {
106 : int slot;
107 : uint64 generation;
108 : };
109 :
110 : static BackgroundWorkerArray *BackgroundWorkerData;
111 :
112 : /*
113 : * List of internal background worker entry points. We need this for
114 : * reasons explained in LookupBackgroundWorkerFunction(), below.
115 : */
116 : static const struct
117 : {
118 : const char *fn_name;
119 : bgworker_main_type fn_addr;
120 : } InternalBGWorkers[] =
121 :
122 : {
123 : {
124 : "ParallelWorkerMain", ParallelWorkerMain
125 : },
126 : {
127 : "ApplyLauncherMain", ApplyLauncherMain
128 : },
129 : {
130 : "ApplyWorkerMain", ApplyWorkerMain
131 : },
132 : {
133 : "ParallelApplyWorkerMain", ParallelApplyWorkerMain
134 : }
135 : };
136 :
137 : /* Private functions. */
138 : static bgworker_main_type LookupBackgroundWorkerFunction(const char *libraryname, const char *funcname);
139 :
140 :
141 : /*
142 : * Calculate shared memory needed.
143 : */
144 : Size
3554 rhaas 145 GIC 4564 : BackgroundWorkerShmemSize(void)
146 : {
147 : Size size;
3554 rhaas 148 ECB :
149 : /* Array of workers is variably sized. */
3554 rhaas 150 GIC 4564 : size = offsetof(BackgroundWorkerArray, slot);
151 4564 : size = add_size(size, mul_size(max_worker_processes,
152 : sizeof(BackgroundWorkerSlot)));
3554 rhaas 153 ECB :
3554 rhaas 154 CBC 4564 : return size;
155 : }
156 :
3554 rhaas 157 ECB : /*
158 : * Initialize shared memory.
159 : */
160 : void
3554 rhaas 161 GIC 1826 : BackgroundWorkerShmemInit(void)
162 : {
163 : bool found;
3554 rhaas 164 ECB :
3554 rhaas 165 GIC 1826 : BackgroundWorkerData = ShmemInitStruct("Background Worker Data",
166 : BackgroundWorkerShmemSize(),
167 : &found);
3554 rhaas 168 CBC 1826 : if (!IsUnderPostmaster)
169 : {
170 : slist_iter siter;
171 1826 : int slotno = 0;
172 :
3554 rhaas 173 GIC 1826 : BackgroundWorkerData->total_slots = max_worker_processes;
2319 rhaas 174 CBC 1826 : BackgroundWorkerData->parallel_register_count = 0;
2319 rhaas 175 GIC 1826 : BackgroundWorkerData->parallel_terminate_count = 0;
3554 rhaas 176 ECB :
177 : /*
3260 bruce 178 : * Copy contents of worker list into shared memory. Record the shared
179 : * memory slot assigned to each worker. This ensures a 1-to-1
180 : * correspondence between the postmaster's private list and the array
181 : * in shared memory.
182 : */
3554 rhaas 183 GIC 2427 : slist_foreach(siter, &BackgroundWorkerList)
184 : {
185 601 : BackgroundWorkerSlot *slot = &BackgroundWorkerData->slot[slotno];
3554 rhaas 186 ECB : RegisteredBgWorker *rw;
187 :
3554 rhaas 188 CBC 601 : rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
3554 rhaas 189 GIC 601 : Assert(slotno < max_worker_processes);
190 601 : slot->in_use = true;
3460 rhaas 191 CBC 601 : slot->terminate = false;
3511 192 601 : slot->pid = InvalidPid;
193 601 : slot->generation = 0;
3554 194 601 : rw->rw_shmem_slot = slotno;
3260 bruce 195 601 : rw->rw_worker.bgw_notify_pid = 0; /* might be reinit after crash */
3554 rhaas 196 601 : memcpy(&slot->worker, &rw->rw_worker, sizeof(BackgroundWorker));
197 601 : ++slotno;
3554 rhaas 198 ECB : }
199 :
200 : /*
201 : * Mark any remaining slots as not in use.
202 : */
3554 rhaas 203 GIC 15833 : while (slotno < max_worker_processes)
204 : {
205 14007 : BackgroundWorkerSlot *slot = &BackgroundWorkerData->slot[slotno];
3554 rhaas 206 ECB :
3554 rhaas 207 GIC 14007 : slot->in_use = false;
3554 rhaas 208 CBC 14007 : ++slotno;
209 : }
3554 rhaas 210 ECB : }
211 : else
3554 rhaas 212 UIC 0 : Assert(found);
3554 rhaas 213 GIC 1826 : }
214 :
3538 rhaas 215 EUB : /*
3538 rhaas 216 ECB : * Search the postmaster's backend-private list of RegisteredBgWorker objects
217 : * for the one that maps to the given slot number.
218 : */
219 : static RegisteredBgWorker *
3554 rhaas 220 GIC 3282 : FindRegisteredWorkerBySlotNumber(int slotno)
221 : {
222 : slist_iter siter;
3554 rhaas 223 ECB :
3554 rhaas 224 GIC 8182 : slist_foreach(siter, &BackgroundWorkerList)
225 : {
226 : RegisteredBgWorker *rw;
3554 rhaas 227 ECB :
3554 rhaas 228 GIC 6551 : rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
229 6551 : if (rw->rw_shmem_slot == slotno)
230 1651 : return rw;
3554 rhaas 231 ECB : }
232 :
3554 rhaas 233 CBC 1631 : return NULL;
234 : }
235 :
3554 rhaas 236 ECB : /*
237 : * Notice changes to shared memory made by other backends.
238 : * Accept new worker requests only if allow_new_workers is true.
239 : *
240 : * This code runs in the postmaster, so we must be very careful not to assume
241 : * that shared memory contents are sane. Otherwise, a rogue backend could
242 : * take out the postmaster.
243 : */
244 : void
836 tgl 245 GIC 953 : BackgroundWorkerStateChange(bool allow_new_workers)
246 : {
247 : int slotno;
3554 rhaas 248 ECB :
249 : /*
250 : * The total number of slots stored in shared memory should match our
251 : * notion of max_worker_processes. If it does not, something is very
252 : * wrong. Further down, we always refer to this value as
253 : * max_worker_processes, in case shared memory gets corrupted while we're
254 : * looping.
255 : */
3554 rhaas 256 GIC 953 : if (max_worker_processes != BackgroundWorkerData->total_slots)
257 : {
856 peter 258 UIC 0 : ereport(LOG,
856 peter 259 ECB : (errmsg("inconsistent background worker state (max_worker_processes=%d, total_slots=%d)",
260 : max_worker_processes,
856 peter 261 EUB : BackgroundWorkerData->total_slots)));
3554 rhaas 262 UIC 0 : return;
263 : }
264 :
3554 rhaas 265 EUB : /*
266 : * Iterate through slots, looking for newly-registered workers or workers
267 : * who must die.
268 : */
3554 rhaas 269 GIC 8577 : for (slotno = 0; slotno < max_worker_processes; ++slotno)
270 : {
271 7624 : BackgroundWorkerSlot *slot = &BackgroundWorkerData->slot[slotno];
3554 rhaas 272 ECB : RegisteredBgWorker *rw;
273 :
3554 rhaas 274 CBC 7624 : if (!slot->in_use)
3554 rhaas 275 GIC 4342 : continue;
276 :
3554 rhaas 277 ECB : /*
278 : * Make sure we don't see the in_use flag before the updated slot
279 : * contents.
280 : */
3554 rhaas 281 GIC 3282 : pg_read_barrier();
282 :
283 : /* See whether we already know about this worker. */
3554 rhaas 284 CBC 3282 : rw = FindRegisteredWorkerBySlotNumber(slotno);
3554 rhaas 285 GIC 3282 : if (rw != NULL)
286 : {
3460 rhaas 287 ECB : /*
288 : * In general, the worker data can't change after it's initially
289 : * registered. However, someone can set the terminate flag.
290 : */
3460 rhaas 291 GIC 1651 : if (slot->terminate && !rw->rw_terminate)
292 : {
293 3 : rw->rw_terminate = true;
3460 rhaas 294 CBC 3 : if (rw->rw_pid != 0)
3460 rhaas 295 GIC 3 : kill(rw->rw_pid, SIGTERM);
2943 rhaas 296 ECB : else
297 : {
298 : /* Report never-started, now-terminated worker as dead. */
2943 rhaas 299 UIC 0 : ReportBackgroundWorkerPID(rw);
300 : }
301 : }
3554 rhaas 302 GBC 1651 : continue;
303 : }
304 :
836 tgl 305 ECB : /*
306 : * If we aren't allowing new workers, then immediately mark it for
307 : * termination; the next stanza will take care of cleaning it up.
308 : * Doing this ensures that any process waiting for the worker will get
309 : * awoken, even though the worker will never be allowed to run.
310 : */
836 tgl 311 GIC 1631 : if (!allow_new_workers)
836 tgl 312 UIC 0 : slot->terminate = true;
313 :
2943 rhaas 314 ECB : /*
2878 bruce 315 EUB : * If the worker is marked for termination, we don't need to add it to
316 : * the registered workers list; we can just free the slot. However, if
317 : * bgw_notify_pid is set, the process that registered the worker may
318 : * need to know that we've processed the terminate request, so be sure
319 : * to signal it.
320 : */
3460 rhaas 321 GIC 1631 : if (slot->terminate)
3460 rhaas 322 UIC 0 : {
323 : int notify_pid;
2943 rhaas 324 ECB :
2943 rhaas 325 EUB : /*
326 : * We need a memory barrier here to make sure that the load of
327 : * bgw_notify_pid and the update of parallel_terminate_count
328 : * complete before the store to in_use.
329 : */
2943 rhaas 330 UIC 0 : notify_pid = slot->worker.bgw_notify_pid;
2319 331 0 : if ((slot->worker.bgw_flags & BGWORKER_CLASS_PARALLEL) != 0)
332 0 : BackgroundWorkerData->parallel_terminate_count++;
2943 rhaas 333 UBC 0 : slot->pid = 0;
694 tgl 334 EUB :
694 tgl 335 UBC 0 : pg_memory_barrier();
3460 rhaas 336 0 : slot->in_use = false;
337 :
2943 338 0 : if (notify_pid != 0)
339 0 : kill(notify_pid, SIGUSR1);
340 :
3460 341 0 : continue;
3460 rhaas 342 EUB : }
343 :
3554 344 : /*
345 : * Copy the registration data into the registered workers list.
346 : */
3554 rhaas 347 GIC 1631 : rw = malloc(sizeof(RegisteredBgWorker));
348 1631 : if (rw == NULL)
349 : {
3554 rhaas 350 LBC 0 : ereport(LOG,
3554 rhaas 351 ECB : (errcode(ERRCODE_OUT_OF_MEMORY),
352 : errmsg("out of memory")));
3554 rhaas 353 UBC 0 : return;
354 : }
355 :
3554 rhaas 356 EUB : /*
357 : * Copy strings in a paranoid way. If shared memory is corrupted, the
358 : * source data might not even be NUL-terminated.
359 : */
3554 rhaas 360 GIC 1631 : ascii_safe_strlcpy(rw->rw_worker.bgw_name,
361 1631 : slot->worker.bgw_name, BGW_MAXLEN);
2047 peter_e 362 1631 : ascii_safe_strlcpy(rw->rw_worker.bgw_type,
2047 peter_e 363 CBC 1631 : slot->worker.bgw_type, BGW_MAXLEN);
3554 rhaas 364 1631 : ascii_safe_strlcpy(rw->rw_worker.bgw_library_name,
365 1631 : slot->worker.bgw_library_name, BGW_MAXLEN);
366 1631 : ascii_safe_strlcpy(rw->rw_worker.bgw_function_name,
367 1631 : slot->worker.bgw_function_name, BGW_MAXLEN);
3554 rhaas 368 ECB :
369 : /*
3511 370 : * Copy various fixed-size fields.
371 : *
372 : * flags, start_time, and restart_time are examined by the postmaster,
373 : * but nothing too bad will happen if they are corrupted. The
374 : * remaining fields will only be examined by the child process. It
375 : * might crash, but we won't.
376 : */
3554 rhaas 377 GIC 1631 : rw->rw_worker.bgw_flags = slot->worker.bgw_flags;
378 1631 : rw->rw_worker.bgw_start_time = slot->worker.bgw_start_time;
379 1631 : rw->rw_worker.bgw_restart_time = slot->worker.bgw_restart_time;
3554 rhaas 380 CBC 1631 : rw->rw_worker.bgw_main_arg = slot->worker.bgw_main_arg;
2712 381 1631 : memcpy(rw->rw_worker.bgw_extra, slot->worker.bgw_extra, BGW_EXTRALEN);
3554 rhaas 382 ECB :
3511 383 : /*
3260 bruce 384 : * Copy the PID to be notified about state changes, but only if the
385 : * postmaster knows about a backend with that PID. It isn't an error
386 : * if the postmaster doesn't know about the PID, because the backend
387 : * that requested the worker could have died (or been killed) just
388 : * after doing so. Nonetheless, at least until we get some experience
389 : * with how this plays out in the wild, log a message at a relative
390 : * high debug level.
391 : */
3511 rhaas 392 GIC 1631 : rw->rw_worker.bgw_notify_pid = slot->worker.bgw_notify_pid;
393 1631 : if (!PostmasterMarkPIDForWorkerNotify(rw->rw_worker.bgw_notify_pid))
394 : {
177 peter 395 UNC 0 : elog(DEBUG1, "worker notification PID %d is not valid",
396 : (int) rw->rw_worker.bgw_notify_pid);
3511 rhaas 397 UIC 0 : rw->rw_worker.bgw_notify_pid = 0;
3511 rhaas 398 EUB : }
399 :
3554 400 : /* Initialize postmaster bookkeeping. */
3554 rhaas 401 GIC 1631 : rw->rw_backend = NULL;
402 1631 : rw->rw_pid = 0;
403 1631 : rw->rw_child_slot = 0;
3554 rhaas 404 CBC 1631 : rw->rw_crashed_at = 0;
405 1631 : rw->rw_shmem_slot = slotno;
3460 406 1631 : rw->rw_terminate = false;
3554 rhaas 407 ECB :
408 : /* Log it! */
2844 rhaas 409 CBC 1631 : ereport(DEBUG1,
410 : (errmsg_internal("registering background worker \"%s\"",
411 : rw->rw_worker.bgw_name)));
3554 rhaas 412 ECB :
3554 rhaas 413 GIC 1631 : slist_push_head(&BackgroundWorkerList, &rw->rw_lnode);
414 : }
415 : }
3554 rhaas 416 ECB :
417 : /*
418 : * Forget about a background worker that's no longer needed.
419 : *
420 : * The worker must be identified by passing an slist_mutable_iter that
421 : * points to it. This convention allows deletion of workers during
422 : * searches of the worker list, and saves having to search the list again.
423 : *
424 : * Caller is responsible for notifying bgw_notify_pid, if appropriate.
425 : *
426 : * This function must be invoked only in the postmaster.
427 : */
428 : void
3546 tgl 429 GIC 1630 : ForgetBackgroundWorker(slist_mutable_iter *cur)
430 : {
431 : RegisteredBgWorker *rw;
3554 rhaas 432 ECB : BackgroundWorkerSlot *slot;
433 :
3546 tgl 434 GIC 1630 : rw = slist_container(RegisteredBgWorker, rw_lnode, cur->cur);
435 :
3554 rhaas 436 1630 : Assert(rw->rw_shmem_slot < max_worker_processes);
3554 rhaas 437 CBC 1630 : slot = &BackgroundWorkerData->slot[rw->rw_shmem_slot];
694 tgl 438 GIC 1630 : Assert(slot->in_use);
694 tgl 439 ECB :
440 : /*
441 : * We need a memory barrier here to make sure that the update of
442 : * parallel_terminate_count completes before the store to in_use.
443 : */
2319 rhaas 444 GIC 1630 : if ((rw->rw_worker.bgw_flags & BGWORKER_CLASS_PARALLEL) != 0)
445 1298 : BackgroundWorkerData->parallel_terminate_count++;
446 :
694 tgl 447 CBC 1630 : pg_memory_barrier();
3554 rhaas 448 1630 : slot->in_use = false;
449 :
2844 450 1630 : ereport(DEBUG1,
781 peter 451 ECB : (errmsg_internal("unregistering background worker \"%s\"",
452 : rw->rw_worker.bgw_name)));
3554 rhaas 453 :
3546 tgl 454 GIC 1630 : slist_delete_current(cur);
3554 rhaas 455 1630 : free(rw);
456 1630 : }
3554 rhaas 457 ECB :
3511 458 : /*
459 : * Report the PID of a newly-launched background worker in shared memory.
460 : *
461 : * This function should only be called from the postmaster.
462 : */
463 : void
3511 rhaas 464 GIC 2153 : ReportBackgroundWorkerPID(RegisteredBgWorker *rw)
465 : {
466 : BackgroundWorkerSlot *slot;
3511 rhaas 467 ECB :
3511 rhaas 468 GIC 2153 : Assert(rw->rw_shmem_slot < max_worker_processes);
469 2153 : slot = &BackgroundWorkerData->slot[rw->rw_shmem_slot];
470 2153 : slot->pid = rw->rw_pid;
3511 rhaas 471 ECB :
3511 rhaas 472 CBC 2153 : if (rw->rw_worker.bgw_notify_pid != 0)
473 1631 : kill(rw->rw_worker.bgw_notify_pid, SIGUSR1);
3511 rhaas 474 GIC 2153 : }
3511 rhaas 475 ECB :
2228 476 : /*
477 : * Report that the PID of a background worker is now zero because a
478 : * previously-running background worker has exited.
479 : *
480 : * This function should only be called from the postmaster.
481 : */
482 : void
2228 rhaas 483 GIC 1943 : ReportBackgroundWorkerExit(slist_mutable_iter *cur)
484 : {
485 : RegisteredBgWorker *rw;
2228 rhaas 486 ECB : BackgroundWorkerSlot *slot;
487 : int notify_pid;
488 :
2228 rhaas 489 GIC 1943 : rw = slist_container(RegisteredBgWorker, rw_lnode, cur->cur);
490 :
491 1943 : Assert(rw->rw_shmem_slot < max_worker_processes);
2228 rhaas 492 CBC 1943 : slot = &BackgroundWorkerData->slot[rw->rw_shmem_slot];
2228 rhaas 493 GIC 1943 : slot->pid = rw->rw_pid;
2225 rhaas 494 CBC 1943 : notify_pid = rw->rw_worker.bgw_notify_pid;
2228 rhaas 495 ECB :
496 : /*
497 : * If this worker is slated for deregistration, do that before notifying
498 : * the process which started it. Otherwise, if that process tries to
499 : * reuse the slot immediately, it might not be available yet. In theory
500 : * that could happen anyway if the process checks slot->pid at just the
501 : * wrong moment, but this makes the window narrower.
502 : */
2228 rhaas 503 GIC 1943 : if (rw->rw_terminate ||
504 449 : rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART)
505 1624 : ForgetBackgroundWorker(cur);
2228 rhaas 506 ECB :
2225 rhaas 507 CBC 1943 : if (notify_pid != 0)
508 1605 : kill(notify_pid, SIGUSR1);
2228 rhaas 509 GIC 1943 : }
2228 rhaas 510 ECB :
3511 511 : /*
512 : * Cancel SIGUSR1 notifications for a PID belonging to an exiting backend.
513 : *
514 : * This function should only be called from the postmaster.
515 : */
516 : void
3511 rhaas 517 GIC 182 : BackgroundWorkerStopNotifications(pid_t pid)
518 : {
519 : slist_iter siter;
3511 rhaas 520 ECB :
3511 rhaas 521 GIC 568 : slist_foreach(siter, &BackgroundWorkerList)
522 : {
523 : RegisteredBgWorker *rw;
3511 rhaas 524 ECB :
3511 rhaas 525 GIC 386 : rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
526 386 : if (rw->rw_worker.bgw_notify_pid == pid)
527 15 : rw->rw_worker.bgw_notify_pid = 0;
3511 rhaas 528 ECB : }
3511 rhaas 529 CBC 182 : }
3511 rhaas 530 ECB :
531 : /*
836 tgl 532 : * Cancel any not-yet-started worker requests that have waiting processes.
533 : *
534 : * This is called during a normal ("smart" or "fast") database shutdown.
535 : * After this point, no new background workers will be started, so anything
536 : * that might be waiting for them needs to be kicked off its wait. We do
537 : * that by canceling the bgworker registration entirely, which is perhaps
538 : * overkill, but since we're shutting down it does not matter whether the
539 : * registration record sticks around.
540 : *
541 : * This function should only be called from the postmaster.
542 : */
543 : void
836 tgl 544 GIC 352 : ForgetUnstartedBackgroundWorkers(void)
545 : {
546 : slist_mutable_iter iter;
836 tgl 547 ECB :
836 tgl 548 GIC 736 : slist_foreach_modify(iter, &BackgroundWorkerList)
549 : {
550 : RegisteredBgWorker *rw;
836 tgl 551 ECB : BackgroundWorkerSlot *slot;
552 :
836 tgl 553 GIC 384 : rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
554 384 : Assert(rw->rw_shmem_slot < max_worker_processes);
555 384 : slot = &BackgroundWorkerData->slot[rw->rw_shmem_slot];
836 tgl 556 ECB :
557 : /* If it's not yet started, and there's someone waiting ... */
836 tgl 558 CBC 384 : if (slot->pid == InvalidPid &&
836 tgl 559 GIC 33 : rw->rw_worker.bgw_notify_pid != 0)
560 : {
836 tgl 561 ECB : /* ... then zap it, and notify the waiter */
836 tgl 562 LBC 0 : int notify_pid = rw->rw_worker.bgw_notify_pid;
563 :
836 tgl 564 UIC 0 : ForgetBackgroundWorker(&iter);
836 tgl 565 UBC 0 : if (notify_pid != 0)
836 tgl 566 UIC 0 : kill(notify_pid, SIGUSR1);
836 tgl 567 EUB : }
568 : }
836 tgl 569 GBC 352 : }
570 :
571 : /*
3259 rhaas 572 ECB : * Reset background worker crash state.
573 : *
574 : * We assume that, after a crash-and-restart cycle, background workers without
575 : * the never-restart flag should be restarted immediately, instead of waiting
576 : * for bgw_restart_time to elapse. On the other hand, workers with that flag
577 : * should be forgotten immediately, since we won't ever restart them.
578 : *
579 : * This function should only be called from the postmaster.
580 : */
581 : void
3259 rhaas 582 GIC 4 : ResetBackgroundWorkerCrashTimes(void)
583 : {
584 : slist_mutable_iter iter;
3259 rhaas 585 ECB :
3259 rhaas 586 GIC 8 : slist_foreach_modify(iter, &BackgroundWorkerList)
587 : {
588 : RegisteredBgWorker *rw;
3259 rhaas 589 ECB :
3259 rhaas 590 GIC 4 : rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
591 :
2189 592 4 : if (rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART)
2189 rhaas 593 ECB : {
594 : /*
1378 michael 595 : * Workers marked BGW_NEVER_RESTART shouldn't get relaunched after
596 : * the crash, so forget about them. (If we wait until after the
597 : * crash to forget about them, and they are parallel workers,
598 : * parallel_terminate_count will get incremented after we've
599 : * already zeroed parallel_register_count, which would be bad.)
600 : */
2189 rhaas 601 UIC 0 : ForgetBackgroundWorker(&iter);
602 : }
603 : else
2189 rhaas 604 EUB : {
605 : /*
606 : * The accounting which we do via parallel_register_count and
607 : * parallel_terminate_count would get messed up if a worker marked
608 : * parallel could survive a crash and restart cycle. All such
609 : * workers should be marked BGW_NEVER_RESTART, and thus control
610 : * should never reach this branch.
611 : */
2189 rhaas 612 GIC 4 : Assert((rw->rw_worker.bgw_flags & BGWORKER_CLASS_PARALLEL) == 0);
613 :
614 : /*
2189 rhaas 615 ECB : * Allow this worker to be restarted immediately after we finish
616 : * resetting.
617 : */
2929 rhaas 618 GIC 4 : rw->rw_crashed_at = 0;
619 :
620 : /*
836 tgl 621 ECB : * If there was anyone waiting for it, they're history.
622 : */
836 tgl 623 GIC 4 : rw->rw_worker.bgw_notify_pid = 0;
624 : }
625 : }
3259 rhaas 626 CBC 4 : }
627 :
628 : #ifdef EXEC_BACKEND
3554 rhaas 629 ECB : /*
630 : * In EXEC_BACKEND mode, workers use this to retrieve their details from
631 : * shared memory.
632 : */
633 : BackgroundWorker *
634 : BackgroundWorkerEntry(int slotno)
635 : {
636 : static BackgroundWorker myEntry;
637 : BackgroundWorkerSlot *slot;
638 :
639 : Assert(slotno < BackgroundWorkerData->total_slots);
640 : slot = &BackgroundWorkerData->slot[slotno];
641 : Assert(slot->in_use);
642 :
643 : /* must copy this in case we don't intend to retain shmem access */
644 : memcpy(&myEntry, &slot->worker, sizeof myEntry);
645 : return &myEntry;
646 : }
647 : #endif
648 :
649 : /*
650 : * Complain about the BackgroundWorker definition using error level elevel.
651 : * Return true if it looks ok, false if not (unless elevel >= ERROR, in
652 : * which case we won't return at all in the not-OK case).
653 : */
654 : static bool
3554 rhaas 655 GIC 2194 : SanityCheckBackgroundWorker(BackgroundWorker *worker, int elevel)
656 : {
657 : /* sanity check for flags */
604 andres 658 ECB :
659 : /*
660 : * We used to support workers not connected to shared memory, but don't
661 : * anymore. Thus this is a required flag now. We're not removing the flag
662 : * for compatibility reasons and because the flag still provides some
663 : * signal when reading code.
664 : */
604 andres 665 GIC 2194 : if (!(worker->bgw_flags & BGWORKER_SHMEM_ACCESS))
666 : {
604 andres 667 UIC 0 : ereport(elevel,
604 andres 668 ECB : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
669 : errmsg("background worker \"%s\": background workers without shared memory access are not supported",
604 andres 670 EUB : worker->bgw_name)));
604 andres 671 UIC 0 : return false;
672 : }
673 :
604 andres 674 GBC 2194 : if (worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION)
675 : {
3554 rhaas 676 GIC 2185 : if (worker->bgw_start_time == BgWorkerStart_PostmasterStart)
3554 rhaas 677 ECB : {
3554 rhaas 678 UIC 0 : ereport(elevel,
3554 rhaas 679 ECB : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
680 : errmsg("background worker \"%s\": cannot request database access if starting at postmaster start",
3554 rhaas 681 EUB : worker->bgw_name)));
3554 rhaas 682 UIC 0 : return false;
683 : }
684 :
3554 rhaas 685 EUB : /* XXX other checks? */
686 : }
687 :
3554 rhaas 688 GIC 2194 : if ((worker->bgw_restart_time < 0 &&
689 1594 : worker->bgw_restart_time != BGW_NEVER_RESTART) ||
690 2194 : (worker->bgw_restart_time > USECS_PER_DAY / 1000))
3554 rhaas 691 ECB : {
3554 rhaas 692 LBC 0 : ereport(elevel,
3554 rhaas 693 ECB : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
694 : errmsg("background worker \"%s\": invalid restart interval",
3554 rhaas 695 EUB : worker->bgw_name)));
3554 rhaas 696 UIC 0 : return false;
697 : }
698 :
2189 rhaas 699 EUB : /*
700 : * Parallel workers may not be configured for restart, because the
701 : * parallel_register_count/parallel_terminate_count accounting can't
702 : * handle parallel workers lasting through a crash-and-restart cycle.
703 : */
2189 rhaas 704 GIC 2194 : if (worker->bgw_restart_time != BGW_NEVER_RESTART &&
705 600 : (worker->bgw_flags & BGWORKER_CLASS_PARALLEL) != 0)
706 : {
2189 rhaas 707 LBC 0 : ereport(elevel,
2189 rhaas 708 ECB : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
709 : errmsg("background worker \"%s\": parallel workers may not be configured for restart",
2189 rhaas 710 EUB : worker->bgw_name)));
2189 rhaas 711 UIC 0 : return false;
712 : }
713 :
2047 peter_e 714 EUB : /*
715 : * If bgw_type is not filled in, use bgw_name.
716 : */
2047 peter_e 717 GIC 2194 : if (strcmp(worker->bgw_type, "") == 0)
2047 peter_e 718 UIC 0 : strcpy(worker->bgw_type, worker->bgw_name);
719 :
3554 rhaas 720 CBC 2194 : return true;
3554 rhaas 721 EUB : }
722 :
3523 rhaas 723 ECB : /*
724 : * Standard SIGTERM handler for background workers
725 : */
726 : static void
3523 rhaas 727 UIC 0 : bgworker_die(SIGNAL_ARGS)
728 : {
65 tmunro 729 UNC 0 : sigprocmask(SIG_SETMASK, &BlockSig, NULL);
3523 rhaas 730 EUB :
3523 rhaas 731 UIC 0 : ereport(FATAL,
3523 rhaas 732 EUB : (errcode(ERRCODE_ADMIN_SHUTDOWN),
733 : errmsg("terminating background worker \"%s\" due to administrator command",
2047 peter_e 734 : MyBgworkerEntry->bgw_type)));
735 : }
736 :
737 : /*
738 : * Start a new background worker
739 : *
740 : * This is the main entry point for background worker, to be called from
741 : * postmaster.
742 : */
743 : void
3523 rhaas 744 GIC 1948 : StartBackgroundWorker(void)
745 : {
746 : sigjmp_buf local_sigjmp_buf;
3523 rhaas 747 CBC 1948 : BackgroundWorker *worker = MyBgworkerEntry;
748 : bgworker_main_type entrypt;
749 :
750 1948 : if (worker == NULL)
3523 rhaas 751 UIC 0 : elog(FATAL, "unable to find bgworker entry");
752 :
3523 rhaas 753 CBC 1948 : IsBackgroundWorker = true;
3523 rhaas 754 EUB :
1124 peter 755 GIC 1948 : MyBackendType = B_BG_WORKER;
1124 peter 756 CBC 1948 : init_ps_display(worker->bgw_name);
757 :
3523 rhaas 758 1948 : SetProcessingMode(InitProcessing);
3523 rhaas 759 ECB :
760 : /* Apply PostAuthDelay */
3523 rhaas 761 CBC 1948 : if (PostAuthDelay > 0)
3523 rhaas 762 UIC 0 : pg_usleep(PostAuthDelay * 1000000L);
763 :
3523 rhaas 764 ECB : /*
3523 rhaas 765 EUB : * Set up signal handlers.
766 : */
3523 rhaas 767 GIC 1948 : if (worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION)
768 : {
769 : /*
3523 rhaas 770 ECB : * SIGINT is used to signal canceling the current action
771 : */
3523 rhaas 772 GIC 1939 : pqsignal(SIGINT, StatementCancelHandler);
773 1939 : pqsignal(SIGUSR1, procsignal_sigusr1_handler);
774 1939 : pqsignal(SIGFPE, FloatExceptionHandler);
3523 rhaas 775 ECB :
776 : /* XXX Any other handlers needed here? */
777 : }
778 : else
779 : {
3523 rhaas 780 GIC 9 : pqsignal(SIGINT, SIG_IGN);
769 tmunro 781 9 : pqsignal(SIGUSR1, SIG_IGN);
3523 rhaas 782 9 : pqsignal(SIGFPE, SIG_IGN);
3523 rhaas 783 ECB : }
3523 rhaas 784 CBC 1948 : pqsignal(SIGTERM, bgworker_die);
935 tgl 785 ECB : /* SIGQUIT handler was already set up by InitPostmasterChild */
3523 rhaas 786 GIC 1948 : pqsignal(SIGHUP, SIG_IGN);
3523 rhaas 787 ECB :
3260 bruce 788 GIC 1948 : InitializeTimeouts(); /* establishes SIGALRM handler */
3523 rhaas 789 ECB :
3523 rhaas 790 GIC 1948 : pqsignal(SIGPIPE, SIG_IGN);
3523 rhaas 791 CBC 1948 : pqsignal(SIGUSR2, SIG_IGN);
3523 rhaas 792 GIC 1948 : pqsignal(SIGCHLD, SIG_DFL);
3523 rhaas 793 ECB :
794 : /*
795 : * If an exception is encountered, processing resumes here.
796 : *
797 : * We just need to clean up, report the error, and go away.
798 : */
3523 rhaas 799 GIC 1948 : if (sigsetjmp(local_sigjmp_buf, 1) != 0)
800 : {
801 : /* Since not using PG_TRY, must reset error stack by hand */
3523 rhaas 802 CBC 64 : error_context_stack = NULL;
803 :
804 : /* Prevent interrupts while cleaning up */
805 64 : HOLD_INTERRUPTS();
806 :
807 : /*
948 tgl 808 ECB : * sigsetjmp will have blocked all signals, but we may need to accept
809 : * signals while communicating with our parallel leader. Once we've
810 : * done HOLD_INTERRUPTS() it should be safe to unblock signals.
811 : */
948 tgl 812 GIC 64 : BackgroundWorkerUnblockSignals();
813 :
814 : /* Report the error to the parallel leader and the server log */
3523 rhaas 815 CBC 64 : EmitErrorReport();
816 :
817 : /*
3523 rhaas 818 ECB : * Do we need more cleanup here? For shmem-connected bgworkers, we
819 : * will call InitProcess below, which will install ProcKill as exit
820 : * callback. That will take care of releasing locks, etc.
821 : */
822 :
823 : /* and go away */
3523 rhaas 824 GIC 64 : proc_exit(1);
825 : }
826 :
3523 rhaas 827 ECB : /* We can now handle ereport(ERROR) */
3523 rhaas 828 GIC 1948 : PG_exception_stack = &local_sigjmp_buf;
829 :
830 : /*
604 andres 831 ECB : * Create a per-backend PGPROC struct in shared memory, except in the
832 : * EXEC_BACKEND case where this was done in SubPostmasterMain. We must do
833 : * this before we can use LWLocks (and in the EXEC_BACKEND case we already
834 : * had to do some stuff with LWLocks).
835 : */
836 : #ifndef EXEC_BACKEND
604 andres 837 GIC 1948 : InitProcess();
838 : #endif
839 :
604 andres 840 ECB : /*
841 : * Early initialization.
842 : */
604 andres 843 GIC 1948 : BaseInit();
844 :
845 : /*
2186 tgl 846 ECB : * Look up the entry point function, loading its library if necessary.
847 : */
2186 tgl 848 GIC 3896 : entrypt = LookupBackgroundWorkerFunction(worker->bgw_library_name,
849 1948 : worker->bgw_function_name);
850 :
3523 rhaas 851 ECB : /*
852 : * Note that in normal processes, we would call InitPostgres here. For a
853 : * worker, however, we don't know what database to connect to, yet; so we
854 : * need to wait until the user code does it via
855 : * BackgroundWorkerInitializeConnection().
856 : */
857 :
858 : /*
859 : * Now invoke the user-defined worker code
860 : */
3523 rhaas 861 GIC 1948 : entrypt(worker->bgw_main_arg);
862 :
863 : /* ... and if it returns, we're done */
3523 rhaas 864 CBC 1298 : proc_exit(0);
865 : }
866 :
3554 rhaas 867 ECB : /*
868 : * Register a new static background worker.
869 : *
870 : * This can only be called directly from postmaster or in the _PG_init
871 : * function of a module library that's loaded by shared_preload_libraries;
872 : * otherwise it will have no effect.
873 : */
874 : void
3554 rhaas 875 GIC 602 : RegisterBackgroundWorker(BackgroundWorker *worker)
876 : {
877 : RegisteredBgWorker *rw;
3554 rhaas 878 ECB : static int numworkers = 0;
879 :
3554 rhaas 880 GIC 602 : if (!IsUnderPostmaster)
2844 881 602 : ereport(DEBUG1,
882 : (errmsg_internal("registering background worker \"%s\"", worker->bgw_name)));
3554 rhaas 883 ECB :
2200 rhaas 884 CBC 602 : if (!process_shared_preload_libraries_in_progress &&
2186 tgl 885 GIC 598 : strcmp(worker->bgw_library_name, "postgres") != 0)
886 : {
3554 rhaas 887 LBC 0 : if (!IsUnderPostmaster)
888 0 : ereport(LOG,
889 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3554 rhaas 890 EUB : errmsg("background worker \"%s\": must be registered in shared_preload_libraries",
891 : worker->bgw_name)));
3554 rhaas 892 UIC 0 : return;
893 : }
894 :
3554 rhaas 895 GBC 602 : if (!SanityCheckBackgroundWorker(worker, LOG))
3554 rhaas 896 UIC 0 : return;
897 :
3511 rhaas 898 CBC 602 : if (worker->bgw_notify_pid != 0)
3511 rhaas 899 EUB : {
3511 rhaas 900 UIC 0 : ereport(LOG,
3511 rhaas 901 ECB : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
902 : errmsg("background worker \"%s\": only dynamic background workers can request notification",
3260 bruce 903 EUB : worker->bgw_name)));
3511 rhaas 904 UIC 0 : return;
905 : }
906 :
3554 rhaas 907 EUB : /*
908 : * Enforce maximum number of workers. Note this is overly restrictive: we
909 : * could allow more non-shmem-connected workers, because these don't count
910 : * towards the MAX_BACKENDS limit elsewhere. For now, it doesn't seem
911 : * important to relax this restriction.
912 : */
3554 rhaas 913 GIC 602 : if (++numworkers > max_worker_processes)
914 : {
3554 rhaas 915 UIC 0 : ereport(LOG,
3554 rhaas 916 ECB : (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
917 : errmsg("too many background workers"),
3554 rhaas 918 EUB : errdetail_plural("Up to %d background worker can be registered with the current settings.",
919 : "Up to %d background workers can be registered with the current settings.",
920 : max_worker_processes,
921 : max_worker_processes),
922 : errhint("Consider increasing the configuration parameter \"max_worker_processes\".")));
3554 rhaas 923 UIC 0 : return;
924 : }
925 :
3554 rhaas 926 EUB : /*
927 : * Copy the registration data into the registered workers list.
928 : */
3554 rhaas 929 GIC 602 : rw = malloc(sizeof(RegisteredBgWorker));
930 602 : if (rw == NULL)
931 : {
3554 rhaas 932 LBC 0 : ereport(LOG,
3554 rhaas 933 ECB : (errcode(ERRCODE_OUT_OF_MEMORY),
934 : errmsg("out of memory")));
3554 rhaas 935 UBC 0 : return;
936 : }
937 :
3554 rhaas 938 GBC 602 : rw->rw_worker = *worker;
3554 rhaas 939 GIC 602 : rw->rw_backend = NULL;
940 602 : rw->rw_pid = 0;
3554 rhaas 941 CBC 602 : rw->rw_child_slot = 0;
942 602 : rw->rw_crashed_at = 0;
3460 943 602 : rw->rw_terminate = false;
3554 rhaas 944 ECB :
3554 rhaas 945 CBC 602 : slist_push_head(&BackgroundWorkerList, &rw->rw_lnode);
3554 rhaas 946 ECB : }
947 :
948 : /*
949 : * Register a new background worker from a regular backend.
950 : *
951 : * Returns true on success and false on failure. Failure typically indicates
952 : * that no background worker slots are currently available.
953 : *
954 : * If handle != NULL, we'll set *handle to a pointer that can subsequently
955 : * be used as an argument to GetBackgroundWorkerPid(). The caller can
956 : * free this pointer using pfree(), if desired.
957 : */
958 : bool
3511 rhaas 959 GIC 1592 : RegisterDynamicBackgroundWorker(BackgroundWorker *worker,
960 : BackgroundWorkerHandle **handle)
961 : {
3260 bruce 962 ECB : int slotno;
3260 bruce 963 GIC 1592 : bool success = false;
964 : bool parallel;
965 1592 : uint64 generation = 0;
3554 rhaas 966 ECB :
967 : /*
3260 bruce 968 : * We can't register dynamic background workers from the postmaster. If
969 : * this is a standalone backend, we're the only process and can't start
970 : * any more. In a multi-process environment, it might be theoretically
971 : * possible, but we don't currently support it due to locking
972 : * considerations; see comments on the BackgroundWorkerSlot data
973 : * structure.
974 : */
3554 rhaas 975 GIC 1592 : if (!IsUnderPostmaster)
3554 rhaas 976 UIC 0 : return false;
977 :
3554 rhaas 978 CBC 1592 : if (!SanityCheckBackgroundWorker(worker, ERROR))
3554 rhaas 979 UBC 0 : return false;
980 :
2319 rhaas 981 CBC 1592 : parallel = (worker->bgw_flags & BGWORKER_CLASS_PARALLEL) != 0;
2319 rhaas 982 EUB :
3554 rhaas 983 GIC 1592 : LWLockAcquire(BackgroundWorkerLock, LW_EXCLUSIVE);
3554 rhaas 984 ECB :
985 : /*
2319 986 : * If this is a parallel worker, check whether there are already too many
987 : * parallel workers; if so, don't register another one. Our view of
988 : * parallel_terminate_count may be slightly stale, but that doesn't really
989 : * matter: we would have gotten the same result if we'd arrived here
990 : * slightly earlier anyway. There's no help for it, either, since the
991 : * postmaster must not take locks; a memory barrier wouldn't guarantee
992 : * anything useful.
993 : */
2319 rhaas 994 GIC 1592 : if (parallel && (BackgroundWorkerData->parallel_register_count -
995 1309 : BackgroundWorkerData->parallel_terminate_count) >=
996 : max_parallel_workers)
2319 rhaas 997 ECB : {
2189 rhaas 998 CBC 9 : Assert(BackgroundWorkerData->parallel_register_count -
999 : BackgroundWorkerData->parallel_terminate_count <=
1000 : MAX_PARALLEL_WORKER_LIMIT);
2319 1001 9 : LWLockRelease(BackgroundWorkerLock);
2319 rhaas 1002 GIC 9 : return false;
1003 : }
2319 rhaas 1004 ECB :
3554 1005 : /*
1006 : * Look for an unused slot. If we find one, grab it.
1007 : */
3554 rhaas 1008 GIC 4985 : for (slotno = 0; slotno < BackgroundWorkerData->total_slots; ++slotno)
1009 : {
1010 4983 : BackgroundWorkerSlot *slot = &BackgroundWorkerData->slot[slotno];
3554 rhaas 1011 ECB :
3554 rhaas 1012 GIC 4983 : if (!slot->in_use)
3554 rhaas 1013 ECB : {
3554 rhaas 1014 GIC 1581 : memcpy(&slot->worker, worker, sizeof(BackgroundWorker));
2118 tgl 1015 CBC 1581 : slot->pid = InvalidPid; /* indicates not started yet */
3511 rhaas 1016 GIC 1581 : slot->generation++;
3460 rhaas 1017 CBC 1581 : slot->terminate = false;
3511 1018 1581 : generation = slot->generation;
2319 1019 1581 : if (parallel)
1020 1298 : BackgroundWorkerData->parallel_register_count++;
3554 rhaas 1021 ECB :
1022 : /*
3260 bruce 1023 : * Make sure postmaster doesn't see the slot as in use before it
1024 : * sees the new contents.
1025 : */
3554 rhaas 1026 GIC 1581 : pg_write_barrier();
1027 :
1028 1581 : slot->in_use = true;
3554 rhaas 1029 CBC 1581 : success = true;
3554 rhaas 1030 GIC 1581 : break;
3554 rhaas 1031 ECB : }
1032 : }
1033 :
3554 rhaas 1034 GIC 1583 : LWLockRelease(BackgroundWorkerLock);
1035 :
1036 : /* If we found a slot, tell the postmaster to notice the change. */
3554 rhaas 1037 CBC 1583 : if (success)
3554 rhaas 1038 GIC 1581 : SendPostmasterSignal(PMSIGNAL_BACKGROUND_WORKER_CHANGE);
1039 :
3511 rhaas 1040 ECB : /*
1041 : * If we found a slot and the user has provided a handle, initialize it.
1042 : */
3511 rhaas 1043 GIC 1583 : if (success && handle)
1044 : {
1045 1581 : *handle = palloc(sizeof(BackgroundWorkerHandle));
3511 rhaas 1046 CBC 1581 : (*handle)->slot = slotno;
3511 rhaas 1047 GIC 1581 : (*handle)->generation = generation;
3511 rhaas 1048 ECB : }
1049 :
3554 rhaas 1050 CBC 1583 : return success;
1051 : }
1052 :
3511 rhaas 1053 ECB : /*
1054 : * Get the PID of a dynamically-registered background worker.
1055 : *
1056 : * If the worker is determined to be running, the return value will be
1057 : * BGWH_STARTED and *pidp will get the PID of the worker process. If the
1058 : * postmaster has not yet attempted to start the worker, the return value will
1059 : * be BGWH_NOT_YET_STARTED. Otherwise, the return value is BGWH_STOPPED.
1060 : *
1061 : * BGWH_STOPPED can indicate either that the worker is temporarily stopped
1062 : * (because it is configured for automatic restart and exited non-zero),
1063 : * or that the worker is permanently stopped (because it exited with exit
1064 : * code 0, or was not configured for automatic restart), or even that the
1065 : * worker was unregistered without ever starting (either because startup
1066 : * failed and the worker is not configured for automatic restart, or because
1067 : * TerminateBackgroundWorker was used before the worker was successfully
1068 : * started).
1069 : */
1070 : BgwHandleStatus
3511 rhaas 1071 GIC 6691597 : GetBackgroundWorkerPid(BackgroundWorkerHandle *handle, pid_t *pidp)
1072 : {
1073 : BackgroundWorkerSlot *slot;
3260 bruce 1074 ECB : pid_t pid;
1075 :
3511 rhaas 1076 GIC 6691597 : Assert(handle->slot < max_worker_processes);
1077 6691597 : slot = &BackgroundWorkerData->slot[handle->slot];
1078 :
3511 rhaas 1079 ECB : /*
3260 bruce 1080 : * We could probably arrange to synchronize access to data using memory
1081 : * barriers only, but for now, let's just keep it simple and grab the
1082 : * lock. It seems unlikely that there will be enough traffic here to
1083 : * result in meaningful contention.
1084 : */
3511 rhaas 1085 GIC 6691597 : LWLockAcquire(BackgroundWorkerLock, LW_SHARED);
1086 :
1087 : /*
3511 rhaas 1088 ECB : * The generation number can't be concurrently changed while we hold the
1089 : * lock. The pid, which is updated by the postmaster, can change at any
1090 : * time, but we assume such changes are atomic. So the value we read
1091 : * won't be garbage, but it might be out of date by the time the caller
1092 : * examines it (but that's unavoidable anyway).
1093 : *
1094 : * The in_use flag could be in the process of changing from true to false,
1095 : * but if it is already false then it can't change further.
1096 : */
1950 rhaas 1097 GIC 6691597 : if (handle->generation != slot->generation || !slot->in_use)
3511 1098 1299 : pid = 0;
1099 : else
3511 rhaas 1100 CBC 6690298 : pid = slot->pid;
3511 rhaas 1101 ECB :
1102 : /* All done. */
3511 rhaas 1103 CBC 6691597 : LWLockRelease(BackgroundWorkerLock);
1104 :
3511 rhaas 1105 GIC 6691597 : if (pid == 0)
3511 rhaas 1106 CBC 1299 : return BGWH_STOPPED;
3511 rhaas 1107 GIC 6690298 : else if (pid == InvalidPid)
3511 rhaas 1108 CBC 200647 : return BGWH_NOT_YET_STARTED;
1109 6489651 : *pidp = pid;
1110 6489651 : return BGWH_STARTED;
3511 rhaas 1111 ECB : }
1112 :
1113 : /*
1114 : * Wait for a background worker to start up.
1115 : *
1116 : * This is like GetBackgroundWorkerPid(), except that if the worker has not
1117 : * yet started, we wait for it to do so; thus, BGWH_NOT_YET_STARTED is never
1118 : * returned. However, if the postmaster has died, we give up and return
1119 : * BGWH_POSTMASTER_DIED, since it that case we know that startup will not
1120 : * take place.
1121 : *
1122 : * The caller *must* have set our PID as the worker's bgw_notify_pid,
1123 : * else we will not be awoken promptly when the worker's state changes.
1124 : */
1125 : BgwHandleStatus
3511 rhaas 1126 GIC 1 : WaitForBackgroundWorkerStartup(BackgroundWorkerHandle *handle, pid_t *pidp)
1127 : {
1128 : BgwHandleStatus status;
3260 bruce 1129 ECB : int rc;
1130 :
1131 : for (;;)
3511 rhaas 1132 UIC 0 : {
1133 : pid_t pid;
1134 :
2739 rhaas 1135 GBC 1 : CHECK_FOR_INTERRUPTS();
1136 :
2739 rhaas 1137 GIC 1 : status = GetBackgroundWorkerPid(handle, &pid);
2739 rhaas 1138 CBC 1 : if (status == BGWH_STARTED)
2739 rhaas 1139 GIC 1 : *pidp = pid;
2739 rhaas 1140 CBC 1 : if (status != BGWH_NOT_YET_STARTED)
1141 1 : break;
3511 rhaas 1142 ECB :
2739 rhaas 1143 LBC 0 : rc = WaitLatch(MyLatch,
2378 rhaas 1144 ECB : WL_LATCH_SET | WL_POSTMASTER_DEATH, 0,
1145 : WAIT_EVENT_BGWORKER_STARTUP);
3511 rhaas 1146 EUB :
2739 rhaas 1147 UIC 0 : if (rc & WL_POSTMASTER_DEATH)
1148 : {
1149 0 : status = BGWH_POSTMASTER_DIED;
2739 rhaas 1150 UBC 0 : break;
1151 : }
2739 rhaas 1152 EUB :
2739 rhaas 1153 UBC 0 : ResetLatch(MyLatch);
1154 : }
1155 :
2901 rhaas 1156 GBC 1 : return status;
1157 : }
1158 :
2901 rhaas 1159 ECB : /*
1160 : * Wait for a background worker to stop.
1161 : *
1162 : * If the worker hasn't yet started, or is running, we wait for it to stop
1163 : * and then return BGWH_STOPPED. However, if the postmaster has died, we give
1164 : * up and return BGWH_POSTMASTER_DIED, because it's the postmaster that
1165 : * notifies us when a worker's state changes.
1166 : *
1167 : * The caller *must* have set our PID as the worker's bgw_notify_pid,
1168 : * else we will not be awoken promptly when the worker's state changes.
1169 : */
1170 : BgwHandleStatus
2901 rhaas 1171 GIC 1299 : WaitForBackgroundWorkerShutdown(BackgroundWorkerHandle *handle)
1172 : {
1173 : BgwHandleStatus status;
2901 rhaas 1174 ECB : int rc;
1175 :
1176 : for (;;)
2901 rhaas 1177 GIC 1639 : {
1178 : pid_t pid;
1179 :
2739 rhaas 1180 CBC 2938 : CHECK_FOR_INTERRUPTS();
1181 :
2739 rhaas 1182 GIC 2938 : status = GetBackgroundWorkerPid(handle, &pid);
2739 rhaas 1183 CBC 2938 : if (status == BGWH_STOPPED)
2439 tgl 1184 GIC 1299 : break;
2901 rhaas 1185 ECB :
2133 andres 1186 CBC 1639 : rc = WaitLatch(MyLatch,
2378 rhaas 1187 ECB : WL_LATCH_SET | WL_POSTMASTER_DEATH, 0,
1188 : WAIT_EVENT_BGWORKER_SHUTDOWN);
2901 1189 :
2739 rhaas 1190 GIC 1639 : if (rc & WL_POSTMASTER_DEATH)
1191 : {
2439 tgl 1192 UIC 0 : status = BGWH_POSTMASTER_DIED;
2439 tgl 1193 LBC 0 : break;
1194 : }
2901 rhaas 1195 EUB :
2133 andres 1196 GBC 1639 : ResetLatch(MyLatch);
1197 : }
1198 :
3511 rhaas 1199 CBC 1299 : return status;
1200 : }
1201 :
3460 rhaas 1202 ECB : /*
1203 : * Instruct the postmaster to terminate a background worker.
1204 : *
1205 : * Note that it's safe to do this without regard to whether the worker is
1206 : * still running, or even if the worker may already have exited and been
1207 : * unregistered.
1208 : */
1209 : void
3460 rhaas 1210 GIC 3 : TerminateBackgroundWorker(BackgroundWorkerHandle *handle)
1211 : {
1212 : BackgroundWorkerSlot *slot;
3260 bruce 1213 CBC 3 : bool signal_postmaster = false;
1214 :
3460 rhaas 1215 GIC 3 : Assert(handle->slot < max_worker_processes);
3460 rhaas 1216 CBC 3 : slot = &BackgroundWorkerData->slot[handle->slot];
1217 :
3460 rhaas 1218 ECB : /* Set terminate flag in shared memory, unless slot has been reused. */
3460 rhaas 1219 CBC 3 : LWLockAcquire(BackgroundWorkerLock, LW_EXCLUSIVE);
3460 rhaas 1220 GIC 3 : if (handle->generation == slot->generation)
1221 : {
3460 rhaas 1222 CBC 3 : slot->terminate = true;
1223 3 : signal_postmaster = true;
1224 : }
1225 3 : LWLockRelease(BackgroundWorkerLock);
3460 rhaas 1226 ECB :
1227 : /* Make sure the postmaster notices the change to shared memory. */
3460 rhaas 1228 CBC 3 : if (signal_postmaster)
3460 rhaas 1229 GIC 3 : SendPostmasterSignal(PMSIGNAL_BACKGROUND_WORKER_CHANGE);
1230 3 : }
2200 rhaas 1231 ECB :
1232 : /*
2186 tgl 1233 : * Look up (and possibly load) a bgworker entry point function.
1234 : *
1235 : * For functions contained in the core code, we use library name "postgres"
1236 : * and consult the InternalBGWorkers array. External functions are
1237 : * looked up, and loaded if necessary, using load_external_function().
1238 : *
1239 : * The point of this is to pass function names as strings across process
1240 : * boundaries. We can't pass actual function addresses because of the
1241 : * possibility that the function has been loaded at a different address
1242 : * in a different process. This is obviously a hazard for functions in
1243 : * loadable libraries, but it can happen even for functions in the core code
1244 : * on platforms using EXEC_BACKEND (e.g., Windows).
1245 : *
1246 : * At some point it might be worthwhile to get rid of InternalBGWorkers[]
1247 : * in favor of applying load_external_function() for core functions too;
1248 : * but that raises portability issues that are not worth addressing now.
1249 : */
1250 : static bgworker_main_type
2186 tgl 1251 GIC 1948 : LookupBackgroundWorkerFunction(const char *libraryname, const char *funcname)
1252 : {
1253 : /*
2186 tgl 1254 ECB : * If the function is to be loaded from postgres itself, search the
1255 : * InternalBGWorkers array.
1256 : */
2186 tgl 1257 GIC 1948 : if (strcmp(libraryname, "postgres") == 0)
1258 : {
1259 : int i;
2200 rhaas 1260 ECB :
2186 tgl 1261 GIC 2897 : for (i = 0; i < lengthof(InternalBGWorkers); i++)
1262 : {
1263 2897 : if (strcmp(InternalBGWorkers[i].fn_name, funcname) == 0)
2186 tgl 1264 CBC 1935 : return InternalBGWorkers[i].fn_addr;
1265 : }
2200 rhaas 1266 ECB :
2186 tgl 1267 : /* We can only reach this by programming error. */
2186 tgl 1268 UIC 0 : elog(ERROR, "internal function \"%s\" not found", funcname);
1269 : }
1270 :
2186 tgl 1271 EUB : /* Otherwise load from external library. */
2186 tgl 1272 GIC 13 : return (bgworker_main_type)
1273 13 : load_external_function(libraryname, funcname, true, NULL);
1274 : }
2047 peter_e 1275 ECB :
1276 : /*
1277 : * Given a PID, get the bgw_type of the background worker. Returns NULL if
1278 : * not a valid background worker.
1279 : *
1280 : * The return value is in static memory belonging to this function, so it has
1281 : * to be used before calling this function again. This is so that the caller
1282 : * doesn't have to worry about the background worker locking protocol.
1283 : */
1284 : const char *
2047 peter_e 1285 GIC 580 : GetBackgroundWorkerTypeByPid(pid_t pid)
1286 : {
1287 : int slotno;
2047 peter_e 1288 CBC 580 : bool found = false;
1289 : static char result[BGW_MAXLEN];
1290 :
1291 580 : LWLockAcquire(BackgroundWorkerLock, LW_SHARED);
1292 :
2047 peter_e 1293 GIC 627 : for (slotno = 0; slotno < BackgroundWorkerData->total_slots; slotno++)
2047 peter_e 1294 ECB : {
2047 peter_e 1295 GIC 627 : BackgroundWorkerSlot *slot = &BackgroundWorkerData->slot[slotno];
2047 peter_e 1296 ECB :
2047 peter_e 1297 GIC 627 : if (slot->pid > 0 && slot->pid == pid)
2047 peter_e 1298 ECB : {
2047 peter_e 1299 GIC 580 : strcpy(result, slot->worker.bgw_type);
2047 peter_e 1300 CBC 580 : found = true;
2047 peter_e 1301 GIC 580 : break;
2047 peter_e 1302 ECB : }
1303 : }
1304 :
2047 peter_e 1305 GIC 580 : LWLockRelease(BackgroundWorkerLock);
1306 :
1307 580 : if (!found)
2047 peter_e 1308 LBC 0 : return NULL;
1309 :
2047 peter_e 1310 CBC 580 : return result;
2047 peter_e 1311 EUB : }
|