Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * pmsignal.c
4 : * routines for signaling between the postmaster and its child processes
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * src/backend/storage/ipc/pmsignal.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include <signal.h>
18 : #include <unistd.h>
19 :
20 : #ifdef HAVE_SYS_PRCTL_H
21 : #include <sys/prctl.h>
22 : #endif
23 :
24 : #include "miscadmin.h"
25 : #include "postmaster/postmaster.h"
26 : #include "replication/walsender.h"
27 : #include "storage/pmsignal.h"
28 : #include "storage/shmem.h"
29 : #include "utils/memutils.h"
30 :
31 :
32 : /*
33 : * The postmaster is signaled by its children by sending SIGUSR1. The
34 : * specific reason is communicated via flags in shared memory. We keep
35 : * a boolean flag for each possible "reason", so that different reasons
36 : * can be signaled by different backends at the same time. (However,
37 : * if the same reason is signaled more than once simultaneously, the
38 : * postmaster will observe it only once.)
39 : *
40 : * The flags are actually declared as "volatile sig_atomic_t" for maximum
41 : * portability. This should ensure that loads and stores of the flag
42 : * values are atomic, allowing us to dispense with any explicit locking.
43 : *
44 : * In addition to the per-reason flags, we store a set of per-child-process
45 : * flags that are currently used only for detecting whether a backend has
46 : * exited without performing proper shutdown. The per-child-process flags
47 : * have three possible states: UNUSED, ASSIGNED, ACTIVE. An UNUSED slot is
48 : * available for assignment. An ASSIGNED slot is associated with a postmaster
49 : * child process, but either the process has not touched shared memory yet,
50 : * or it has successfully cleaned up after itself. A ACTIVE slot means the
51 : * process is actively using shared memory. The slots are assigned to
52 : * child processes at random, and postmaster.c is responsible for tracking
53 : * which one goes with which PID.
54 : *
55 : * Actually there is a fourth state, WALSENDER. This is just like ACTIVE,
56 : * but carries the extra information that the child is a WAL sender.
57 : * WAL senders too start in ACTIVE state, but switch to WALSENDER once they
58 : * start streaming the WAL (and they never go back to ACTIVE after that).
59 : *
60 : * We also have a shared-memory field that is used for communication in
61 : * the opposite direction, from postmaster to children: it tells why the
62 : * postmaster has broadcasted SIGQUIT signals, if indeed it has done so.
63 : */
64 :
65 : #define PM_CHILD_UNUSED 0 /* these values must fit in sig_atomic_t */
66 : #define PM_CHILD_ASSIGNED 1
67 : #define PM_CHILD_ACTIVE 2
68 : #define PM_CHILD_WALSENDER 3
69 :
70 : /* "typedef struct PMSignalData PMSignalData" appears in pmsignal.h */
71 : struct PMSignalData
72 : {
73 : /* per-reason flags for signaling the postmaster */
74 : sig_atomic_t PMSignalFlags[NUM_PMSIGNALS];
75 : /* global flags for signals from postmaster to children */
76 : QuitSignalReason sigquit_reason; /* why SIGQUIT was sent */
77 : /* per-child-process flags */
78 : int num_child_flags; /* # of entries in PMChildFlags[] */
79 : sig_atomic_t PMChildFlags[FLEXIBLE_ARRAY_MEMBER];
80 : };
81 :
82 : /* PMSignalState pointer is valid in both postmaster and child processes */
83 : NON_EXEC_STATIC volatile PMSignalData *PMSignalState = NULL;
84 :
85 : /*
86 : * These static variables are valid only in the postmaster. We keep a
87 : * duplicative private array so that we can trust its state even if some
88 : * failing child has clobbered the PMSignalData struct in shared memory.
89 : */
90 : static int num_child_inuse; /* # of entries in PMChildInUse[] */
91 : static int next_child_inuse; /* next slot to try to assign */
92 : static bool *PMChildInUse; /* true if i'th flag slot is assigned */
93 :
94 : /*
95 : * Signal handler to be notified if postmaster dies.
96 : */
97 : #ifdef USE_POSTMASTER_DEATH_SIGNAL
98 : volatile sig_atomic_t postmaster_possibly_dead = false;
99 :
100 : static void
207 tgl 101 GNC 17 : postmaster_death_handler(SIGNAL_ARGS)
102 : {
1733 tmunro 103 CBC 17 : postmaster_possibly_dead = true;
104 17 : }
105 :
106 : /*
107 : * The available signals depend on the OS. SIGUSR1 and SIGUSR2 are already
108 : * used for other things, so choose another one.
109 : *
110 : * Currently, we assume that we can always find a signal to use. That
111 : * seems like a reasonable assumption for all platforms that are modern
112 : * enough to have a parent-death signaling mechanism.
113 : */
114 : #if defined(SIGINFO)
115 : #define POSTMASTER_DEATH_SIGNAL SIGINFO
116 : #elif defined(SIGPWR)
117 : #define POSTMASTER_DEATH_SIGNAL SIGPWR
118 : #else
119 : #error "cannot find a signal to use for postmaster death"
120 : #endif
121 :
122 : #endif /* USE_POSTMASTER_DEATH_SIGNAL */
123 :
124 : /*
125 : * PMSignalShmemSize
126 : * Compute space needed for pmsignal.c's shared memory
127 : */
128 : Size
5087 tgl 129 6390 : PMSignalShmemSize(void)
130 : {
131 : Size size;
132 :
133 6390 : size = offsetof(PMSignalData, PMChildFlags);
134 6390 : size = add_size(size, mul_size(MaxLivePostmasterChildren(),
135 : sizeof(sig_atomic_t)));
136 :
137 6390 : return size;
138 : }
139 :
140 : /*
141 : * PMSignalShmemInit - initialize during shared-memory creation
142 : */
143 : void
144 1826 : PMSignalShmemInit(void)
145 : {
146 : bool found;
147 :
148 1826 : PMSignalState = (PMSignalData *)
149 1826 : ShmemInitStruct("PMSignalState", PMSignalShmemSize(), &found);
150 :
7050 bruce 151 1826 : if (!found)
152 : {
153 : /* initialize all flags to zeroes */
1476 peter 154 202333 : MemSet(unvolatize(PMSignalData *, PMSignalState), 0, PMSignalShmemSize());
180 tgl 155 1826 : num_child_inuse = MaxLivePostmasterChildren();
156 1826 : PMSignalState->num_child_flags = num_child_inuse;
157 :
158 : /*
159 : * Also allocate postmaster's private PMChildInUse[] array. We
160 : * might've already done that in a previous shared-memory creation
161 : * cycle, in which case free the old array to avoid a leak. (Do it
162 : * like this to support the possibility that MaxLivePostmasterChildren
163 : * changed.) In a standalone backend, we do not need this.
164 : */
165 1826 : if (PostmasterContext != NULL)
166 : {
167 597 : if (PMChildInUse)
168 4 : pfree(PMChildInUse);
169 597 : PMChildInUse = (bool *)
170 597 : MemoryContextAllocZero(PostmasterContext,
171 : num_child_inuse * sizeof(bool));
172 : }
173 1826 : next_child_inuse = 0;
174 : }
7826 175 1826 : }
176 :
177 : /*
178 : * SendPostmasterSignal - signal the postmaster from a child process
179 : */
180 : void
181 2769 : SendPostmasterSignal(PMSignalReason reason)
182 : {
183 : /* If called in a standalone backend, do nothing */
184 2769 : if (!IsUnderPostmaster)
7826 tgl 185 UBC 0 : return;
186 : /* Atomically set the proper flag */
5087 tgl 187 CBC 2769 : PMSignalState->PMSignalFlags[reason] = true;
188 : /* Send signal to postmaster */
7000 neilc 189 2769 : kill(PostmasterPid, SIGUSR1);
190 : }
191 :
192 : /*
193 : * CheckPostmasterSignal - check to see if a particular reason has been
194 : * signaled, and clear the signal flag. Should be called by postmaster
195 : * after receiving SIGUSR1.
196 : */
197 : bool
7826 tgl 198 16541 : CheckPostmasterSignal(PMSignalReason reason)
199 : {
200 : /* Careful here --- don't clear flag if we haven't seen it set */
5087 201 16541 : if (PMSignalState->PMSignalFlags[reason])
202 : {
203 2359 : PMSignalState->PMSignalFlags[reason] = false;
7826 204 2359 : return true;
205 : }
206 14182 : return false;
207 : }
208 :
209 : /*
210 : * SetQuitSignalReason - broadcast the reason for a system shutdown.
211 : * Should be called by postmaster before sending SIGQUIT to children.
212 : *
213 : * Note: in a crash-and-restart scenario, the "reason" field gets cleared
214 : * as a part of rebuilding shared memory; the postmaster need not do it
215 : * explicitly.
216 : */
217 : void
836 218 245 : SetQuitSignalReason(QuitSignalReason reason)
219 : {
220 245 : PMSignalState->sigquit_reason = reason;
221 245 : }
222 :
223 : /*
224 : * GetQuitSignalReason - obtain the reason for a system shutdown.
225 : * Called by child processes when they receive SIGQUIT.
226 : * If the postmaster hasn't actually sent SIGQUIT, will return PMQUIT_NOT_SENT.
227 : */
228 : QuitSignalReason
836 tgl 229 UBC 0 : GetQuitSignalReason(void)
230 : {
231 : /* This is called in signal handlers, so be extra paranoid. */
232 0 : if (!IsUnderPostmaster || PMSignalState == NULL)
233 0 : return PMQUIT_NOT_SENT;
234 0 : return PMSignalState->sigquit_reason;
235 : }
236 :
237 :
238 : /*
239 : * AssignPostmasterChildSlot - select an unused slot for a new postmaster
240 : * child process, and set its state to ASSIGNED. Returns a slot number
241 : * (one to N).
242 : *
243 : * Only the postmaster is allowed to execute this routine, so we need no
244 : * special locking.
245 : */
246 : int
5087 tgl 247 CBC 10980 : AssignPostmasterChildSlot(void)
248 : {
180 249 10980 : int slot = next_child_inuse;
250 : int n;
251 :
252 : /*
253 : * Scan for a free slot. Notice that we trust nothing about the contents
254 : * of PMSignalState, but use only postmaster-local data for this decision.
255 : * We track the last slot assigned so as not to waste time repeatedly
256 : * rescanning low-numbered slots.
257 : */
258 11034 : for (n = num_child_inuse; n > 0; n--)
259 : {
5087 260 11034 : if (--slot < 0)
180 261 603 : slot = num_child_inuse - 1;
262 11034 : if (!PMChildInUse[slot])
263 : {
264 10980 : PMChildInUse[slot] = true;
5087 265 10980 : PMSignalState->PMChildFlags[slot] = PM_CHILD_ASSIGNED;
180 266 10980 : next_child_inuse = slot;
5087 267 10980 : return slot + 1;
268 : }
269 : }
270 :
271 : /* Out of slots ... should never happen, else postmaster.c messed up */
5087 tgl 272 UBC 0 : elog(FATAL, "no free slots in PMChildFlags array");
273 : return 0; /* keep compiler quiet */
274 : }
275 :
276 : /*
277 : * ReleasePostmasterChildSlot - release a slot after death of a postmaster
278 : * child process. This must be called in the postmaster process.
279 : *
280 : * Returns true if the slot had been in ASSIGNED state (the expected case),
281 : * false otherwise (implying that the child failed to clean itself up).
282 : */
283 : bool
5087 tgl 284 CBC 10980 : ReleasePostmasterChildSlot(int slot)
285 : {
286 : bool result;
287 :
180 288 10980 : Assert(slot > 0 && slot <= num_child_inuse);
5087 289 10980 : slot--;
290 :
291 : /*
292 : * Note: the slot state might already be unused, because the logic in
293 : * postmaster.c is such that this might get called twice when a child
294 : * crashes. So we don't try to Assert anything about the state.
295 : */
296 10980 : result = (PMSignalState->PMChildFlags[slot] == PM_CHILD_ASSIGNED);
297 10980 : PMSignalState->PMChildFlags[slot] = PM_CHILD_UNUSED;
180 298 10980 : PMChildInUse[slot] = false;
5087 299 10980 : return result;
300 : }
301 :
302 : /*
303 : * IsPostmasterChildWalSender - check if given slot is in use by a
304 : * walsender process. This is called only by the postmaster.
305 : */
306 : bool
4832 heikki.linnakangas 307 1926 : IsPostmasterChildWalSender(int slot)
308 : {
180 tgl 309 1926 : Assert(slot > 0 && slot <= num_child_inuse);
4832 heikki.linnakangas 310 1926 : slot--;
311 :
312 1926 : if (PMSignalState->PMChildFlags[slot] == PM_CHILD_WALSENDER)
313 69 : return true;
314 : else
315 1857 : return false;
316 : }
317 :
318 : /*
319 : * MarkPostmasterChildActive - mark a postmaster child as about to begin
320 : * actively using shared memory. This is called in the child process.
321 : */
322 : void
5087 tgl 323 10583 : MarkPostmasterChildActive(void)
324 : {
325 10583 : int slot = MyPMChildSlot;
326 :
327 10583 : Assert(slot > 0 && slot <= PMSignalState->num_child_flags);
328 10583 : slot--;
329 10583 : Assert(PMSignalState->PMChildFlags[slot] == PM_CHILD_ASSIGNED);
4467 heikki.linnakangas 330 10583 : PMSignalState->PMChildFlags[slot] = PM_CHILD_ACTIVE;
331 10583 : }
332 :
333 : /*
334 : * MarkPostmasterChildWalSender - mark a postmaster child as a WAL sender
335 : * process. This is called in the child process, sometime after marking the
336 : * child as active.
337 : */
338 : void
339 831 : MarkPostmasterChildWalSender(void)
340 : {
341 831 : int slot = MyPMChildSlot;
342 :
343 831 : Assert(am_walsender);
344 :
345 831 : Assert(slot > 0 && slot <= PMSignalState->num_child_flags);
346 831 : slot--;
347 831 : Assert(PMSignalState->PMChildFlags[slot] == PM_CHILD_ACTIVE);
348 831 : PMSignalState->PMChildFlags[slot] = PM_CHILD_WALSENDER;
4832 349 831 : }
350 :
351 : /*
352 : * MarkPostmasterChildInactive - mark a postmaster child as done using
353 : * shared memory. This is called in the child process.
354 : */
355 : void
5087 tgl 356 10583 : MarkPostmasterChildInactive(void)
357 : {
358 10583 : int slot = MyPMChildSlot;
359 :
360 10583 : Assert(slot > 0 && slot <= PMSignalState->num_child_flags);
361 10583 : slot--;
4832 heikki.linnakangas 362 10583 : Assert(PMSignalState->PMChildFlags[slot] == PM_CHILD_ACTIVE ||
363 : PMSignalState->PMChildFlags[slot] == PM_CHILD_WALSENDER);
5087 tgl 364 10583 : PMSignalState->PMChildFlags[slot] = PM_CHILD_ASSIGNED;
365 10583 : }
366 :
367 :
368 : /*
369 : * PostmasterIsAliveInternal - check whether postmaster process is still alive
370 : *
371 : * This is the slow path of PostmasterIsAlive(), where the caller has already
372 : * checked 'postmaster_possibly_dead'. (On platforms that don't support
373 : * a signal for parent death, PostmasterIsAlive() is just an alias for this.)
374 : */
375 : bool
1733 tmunro 376 173 : PostmasterIsAliveInternal(void)
377 : {
378 : #ifdef USE_POSTMASTER_DEATH_SIGNAL
379 : /*
380 : * Reset the flag before checking, so that we don't miss a signal if
381 : * postmaster dies right after the check. If postmaster was indeed dead,
382 : * we'll re-arm it before returning to caller.
383 : */
384 173 : postmaster_possibly_dead = false;
385 : #endif
386 :
387 : #ifndef WIN32
388 : {
389 : char c;
390 : ssize_t rc;
391 :
392 173 : rc = read(postmaster_alive_fds[POSTMASTER_FD_WATCH], &c, 1);
393 :
394 : /*
395 : * In the usual case, the postmaster is still alive, and there is no
396 : * data in the pipe.
397 : */
398 173 : if (rc < 0 && (errno == EAGAIN || errno == EWOULDBLOCK))
4492 rhaas 399 158 : return true;
400 : else
401 : {
402 : /*
403 : * Postmaster is dead, or something went wrong with the read()
404 : * call.
405 : */
406 :
407 : #ifdef USE_POSTMASTER_DEATH_SIGNAL
1733 tmunro 408 15 : postmaster_possibly_dead = true;
409 : #endif
410 :
411 15 : if (rc < 0)
1733 tmunro 412 UBC 0 : elog(FATAL, "read on postmaster death monitoring pipe failed: %m");
1733 tmunro 413 CBC 15 : else if (rc > 0)
1733 tmunro 414 UBC 0 : elog(FATAL, "unexpected data in postmaster death monitoring pipe");
415 :
1733 tmunro 416 CBC 15 : return false;
417 : }
418 : }
419 :
420 : #else /* WIN32 */
421 : if (WaitForSingleObject(PostmasterHandle, 0) == WAIT_TIMEOUT)
422 : return true;
423 : else
424 : {
425 : #ifdef USE_POSTMASTER_DEATH_SIGNAL
426 : postmaster_possibly_dead = true;
427 : #endif
428 : return false;
429 : }
430 : #endif /* WIN32 */
431 : }
432 :
433 : /*
434 : * PostmasterDeathSignalInit - request signal on postmaster death if possible
435 : */
436 : void
437 12732 : PostmasterDeathSignalInit(void)
438 : {
439 : #ifdef USE_POSTMASTER_DEATH_SIGNAL
1418 tgl 440 12732 : int signum = POSTMASTER_DEATH_SIGNAL;
441 :
442 : /* Register our signal handler. */
1733 tmunro 443 12732 : pqsignal(signum, postmaster_death_handler);
444 :
445 : /* Request a signal on parent exit. */
446 : #if defined(PR_SET_PDEATHSIG)
447 12732 : if (prctl(PR_SET_PDEATHSIG, signum) < 0)
1733 tmunro 448 UBC 0 : elog(ERROR, "could not request parent death signal: %m");
449 : #elif defined(PROC_PDEATHSIG_CTL)
450 : if (procctl(P_PID, 0, PROC_PDEATHSIG_CTL, &signum) < 0)
451 : elog(ERROR, "could not request parent death signal: %m");
452 : #else
453 : #error "USE_POSTMASTER_DEATH_SIGNAL set, but there is no mechanism to request the signal"
454 : #endif
455 :
456 : /*
457 : * Just in case the parent was gone already and we missed it, we'd better
458 : * check the slow way on the first call.
459 : */
1733 tmunro 460 CBC 12732 : postmaster_possibly_dead = true;
461 : #endif /* USE_POSTMASTER_DEATH_SIGNAL */
462 12732 : }
|