Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * pmsignal.c
4 : : * routines for signaling between the postmaster and its child processes
5 : : *
6 : : *
7 : : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
8 : : * Portions Copyright (c) 1994, Regents of the University of California
9 : : *
10 : : * IDENTIFICATION
11 : : * src/backend/storage/ipc/pmsignal.c
12 : : *
13 : : *-------------------------------------------------------------------------
14 : : */
15 : : #include "postgres.h"
16 : :
17 : : #include <signal.h>
18 : : #include <unistd.h>
19 : :
20 : : #ifdef HAVE_SYS_PRCTL_H
21 : : #include <sys/prctl.h>
22 : : #endif
23 : :
24 : : #include "miscadmin.h"
25 : : #include "postmaster/postmaster.h"
26 : : #include "replication/walsender.h"
27 : : #include "storage/pmsignal.h"
28 : : #include "storage/shmem.h"
29 : : #include "utils/memutils.h"
30 : :
31 : :
32 : : /*
33 : : * The postmaster is signaled by its children by sending SIGUSR1. The
34 : : * specific reason is communicated via flags in shared memory. We keep
35 : : * a boolean flag for each possible "reason", so that different reasons
36 : : * can be signaled by different backends at the same time. (However,
37 : : * if the same reason is signaled more than once simultaneously, the
38 : : * postmaster will observe it only once.)
39 : : *
40 : : * The flags are actually declared as "volatile sig_atomic_t" for maximum
41 : : * portability. This should ensure that loads and stores of the flag
42 : : * values are atomic, allowing us to dispense with any explicit locking.
43 : : *
44 : : * In addition to the per-reason flags, we store a set of per-child-process
45 : : * flags that are currently used only for detecting whether a backend has
46 : : * exited without performing proper shutdown. The per-child-process flags
47 : : * have three possible states: UNUSED, ASSIGNED, ACTIVE. An UNUSED slot is
48 : : * available for assignment. An ASSIGNED slot is associated with a postmaster
49 : : * child process, but either the process has not touched shared memory yet,
50 : : * or it has successfully cleaned up after itself. A ACTIVE slot means the
51 : : * process is actively using shared memory. The slots are assigned to
52 : : * child processes at random, and postmaster.c is responsible for tracking
53 : : * which one goes with which PID.
54 : : *
55 : : * Actually there is a fourth state, WALSENDER. This is just like ACTIVE,
56 : : * but carries the extra information that the child is a WAL sender.
57 : : * WAL senders too start in ACTIVE state, but switch to WALSENDER once they
58 : : * start streaming the WAL (and they never go back to ACTIVE after that).
59 : : *
60 : : * We also have a shared-memory field that is used for communication in
61 : : * the opposite direction, from postmaster to children: it tells why the
62 : : * postmaster has broadcasted SIGQUIT signals, if indeed it has done so.
63 : : */
64 : :
65 : : #define PM_CHILD_UNUSED 0 /* these values must fit in sig_atomic_t */
66 : : #define PM_CHILD_ASSIGNED 1
67 : : #define PM_CHILD_ACTIVE 2
68 : : #define PM_CHILD_WALSENDER 3
69 : :
70 : : /* "typedef struct PMSignalData PMSignalData" appears in pmsignal.h */
71 : : struct PMSignalData
72 : : {
73 : : /* per-reason flags for signaling the postmaster */
74 : : sig_atomic_t PMSignalFlags[NUM_PMSIGNALS];
75 : : /* global flags for signals from postmaster to children */
76 : : QuitSignalReason sigquit_reason; /* why SIGQUIT was sent */
77 : : /* per-child-process flags */
78 : : int num_child_flags; /* # of entries in PMChildFlags[] */
79 : : sig_atomic_t PMChildFlags[FLEXIBLE_ARRAY_MEMBER];
80 : : };
81 : :
82 : : /* PMSignalState pointer is valid in both postmaster and child processes */
83 : : NON_EXEC_STATIC volatile PMSignalData *PMSignalState = NULL;
84 : :
85 : : /*
86 : : * These static variables are valid only in the postmaster. We keep a
87 : : * duplicative private array so that we can trust its state even if some
88 : : * failing child has clobbered the PMSignalData struct in shared memory.
89 : : */
90 : : static int num_child_inuse; /* # of entries in PMChildInUse[] */
91 : : static int next_child_inuse; /* next slot to try to assign */
92 : : static bool *PMChildInUse; /* true if i'th flag slot is assigned */
93 : :
94 : : /*
95 : : * Signal handler to be notified if postmaster dies.
96 : : */
97 : : #ifdef USE_POSTMASTER_DEATH_SIGNAL
98 : : volatile sig_atomic_t postmaster_possibly_dead = false;
99 : :
100 : : static void
578 tgl@sss.pgh.pa.us 101 :CBC 18 : postmaster_death_handler(SIGNAL_ARGS)
102 : : {
2104 tmunro@postgresql.or 103 : 18 : postmaster_possibly_dead = true;
104 : 18 : }
105 : :
106 : : /*
107 : : * The available signals depend on the OS. SIGUSR1 and SIGUSR2 are already
108 : : * used for other things, so choose another one.
109 : : *
110 : : * Currently, we assume that we can always find a signal to use. That
111 : : * seems like a reasonable assumption for all platforms that are modern
112 : : * enough to have a parent-death signaling mechanism.
113 : : */
114 : : #if defined(SIGINFO)
115 : : #define POSTMASTER_DEATH_SIGNAL SIGINFO
116 : : #elif defined(SIGPWR)
117 : : #define POSTMASTER_DEATH_SIGNAL SIGPWR
118 : : #else
119 : : #error "cannot find a signal to use for postmaster death"
120 : : #endif
121 : :
122 : : #endif /* USE_POSTMASTER_DEATH_SIGNAL */
123 : :
124 : : /*
125 : : * PMSignalShmemSize
126 : : * Compute space needed for pmsignal.c's shared memory
127 : : */
128 : : Size
5458 tgl@sss.pgh.pa.us 129 : 3475 : PMSignalShmemSize(void)
130 : : {
131 : : Size size;
132 : :
133 : 3475 : size = offsetof(PMSignalData, PMChildFlags);
134 : 3475 : size = add_size(size, mul_size(MaxLivePostmasterChildren(),
135 : : sizeof(sig_atomic_t)));
136 : :
137 : 3475 : return size;
138 : : }
139 : :
140 : : /*
141 : : * PMSignalShmemInit - initialize during shared-memory creation
142 : : */
143 : : void
144 : 898 : PMSignalShmemInit(void)
145 : : {
146 : : bool found;
147 : :
148 : 898 : PMSignalState = (PMSignalData *)
149 : 898 : ShmemInitStruct("PMSignalState", PMSignalShmemSize(), &found);
150 : :
7421 bruce@momjian.us 151 [ + - ]: 898 : if (!found)
152 : : {
153 : : /* initialize all flags to zeroes */
1847 peter@eisentraut.org 154 [ + - + - : 79681 : MemSet(unvolatize(PMSignalData *, PMSignalState), 0, PMSignalShmemSize());
+ - + + +
+ ]
551 tgl@sss.pgh.pa.us 155 : 898 : num_child_inuse = MaxLivePostmasterChildren();
156 : 898 : PMSignalState->num_child_flags = num_child_inuse;
157 : :
158 : : /*
159 : : * Also allocate postmaster's private PMChildInUse[] array. We
160 : : * might've already done that in a previous shared-memory creation
161 : : * cycle, in which case free the old array to avoid a leak. (Do it
162 : : * like this to support the possibility that MaxLivePostmasterChildren
163 : : * changed.) In a standalone backend, we do not need this.
164 : : */
165 [ + + ]: 898 : if (PostmasterContext != NULL)
166 : : {
167 [ + + ]: 733 : if (PMChildInUse)
168 : 5 : pfree(PMChildInUse);
169 : 733 : PMChildInUse = (bool *)
170 : 733 : MemoryContextAllocZero(PostmasterContext,
171 : : num_child_inuse * sizeof(bool));
172 : : }
173 : 898 : next_child_inuse = 0;
174 : : }
8197 175 : 898 : }
176 : :
177 : : /*
178 : : * SendPostmasterSignal - signal the postmaster from a child process
179 : : */
180 : : void
181 : 93133 : SendPostmasterSignal(PMSignalReason reason)
182 : : {
183 : : /* If called in a standalone backend, do nothing */
184 [ - + ]: 93133 : if (!IsUnderPostmaster)
8197 tgl@sss.pgh.pa.us 185 :UBC 0 : return;
186 : : /* Atomically set the proper flag */
5458 tgl@sss.pgh.pa.us 187 :CBC 93133 : PMSignalState->PMSignalFlags[reason] = true;
188 : : /* Send signal to postmaster */
7371 neilc@samurai.com 189 : 93133 : kill(PostmasterPid, SIGUSR1);
190 : : }
191 : :
192 : : /*
193 : : * CheckPostmasterSignal - check to see if a particular reason has been
194 : : * signaled, and clear the signal flag. Should be called by postmaster
195 : : * after receiving SIGUSR1.
196 : : */
197 : : bool
8197 tgl@sss.pgh.pa.us 198 : 642628 : CheckPostmasterSignal(PMSignalReason reason)
199 : : {
200 : : /* Careful here --- don't clear flag if we haven't seen it set */
5458 201 [ + + ]: 642628 : if (PMSignalState->PMSignalFlags[reason])
202 : : {
203 : 91872 : PMSignalState->PMSignalFlags[reason] = false;
8197 204 : 91872 : return true;
205 : : }
206 : 550756 : return false;
207 : : }
208 : :
209 : : /*
210 : : * SetQuitSignalReason - broadcast the reason for a system shutdown.
211 : : * Should be called by postmaster before sending SIGQUIT to children.
212 : : *
213 : : * Note: in a crash-and-restart scenario, the "reason" field gets cleared
214 : : * as a part of rebuilding shared memory; the postmaster need not do it
215 : : * explicitly.
216 : : */
217 : : void
1207 218 : 307 : SetQuitSignalReason(QuitSignalReason reason)
219 : : {
220 : 307 : PMSignalState->sigquit_reason = reason;
221 : 307 : }
222 : :
223 : : /*
224 : : * GetQuitSignalReason - obtain the reason for a system shutdown.
225 : : * Called by child processes when they receive SIGQUIT.
226 : : * If the postmaster hasn't actually sent SIGQUIT, will return PMQUIT_NOT_SENT.
227 : : */
228 : : QuitSignalReason
229 : 9 : GetQuitSignalReason(void)
230 : : {
231 : : /* This is called in signal handlers, so be extra paranoid. */
232 [ + - - + ]: 9 : if (!IsUnderPostmaster || PMSignalState == NULL)
1207 tgl@sss.pgh.pa.us 233 :UBC 0 : return PMQUIT_NOT_SENT;
1207 tgl@sss.pgh.pa.us 234 :CBC 9 : return PMSignalState->sigquit_reason;
235 : : }
236 : :
237 : :
238 : : /*
239 : : * AssignPostmasterChildSlot - select an unused slot for a new postmaster
240 : : * child process, and set its state to ASSIGNED. Returns a slot number
241 : : * (one to N).
242 : : *
243 : : * Only the postmaster is allowed to execute this routine, so we need no
244 : : * special locking.
245 : : */
246 : : int
5458 247 : 14625 : AssignPostmasterChildSlot(void)
248 : : {
551 249 : 14625 : int slot = next_child_inuse;
250 : : int n;
251 : :
252 : : /*
253 : : * Scan for a free slot. Notice that we trust nothing about the contents
254 : : * of PMSignalState, but use only postmaster-local data for this decision.
255 : : * We track the last slot assigned so as not to waste time repeatedly
256 : : * rescanning low-numbered slots.
257 : : */
258 [ + - ]: 14690 : for (n = num_child_inuse; n > 0; n--)
259 : : {
5458 260 [ + + ]: 14690 : if (--slot < 0)
551 261 : 744 : slot = num_child_inuse - 1;
262 [ + + ]: 14690 : if (!PMChildInUse[slot])
263 : : {
264 : 14625 : PMChildInUse[slot] = true;
5458 265 : 14625 : PMSignalState->PMChildFlags[slot] = PM_CHILD_ASSIGNED;
551 266 : 14625 : next_child_inuse = slot;
5458 267 : 14625 : return slot + 1;
268 : : }
269 : : }
270 : :
271 : : /* Out of slots ... should never happen, else postmaster.c messed up */
5458 tgl@sss.pgh.pa.us 272 [ # # ]:UBC 0 : elog(FATAL, "no free slots in PMChildFlags array");
273 : : return 0; /* keep compiler quiet */
274 : : }
275 : :
276 : : /*
277 : : * ReleasePostmasterChildSlot - release a slot after death of a postmaster
278 : : * child process. This must be called in the postmaster process.
279 : : *
280 : : * Returns true if the slot had been in ASSIGNED state (the expected case),
281 : : * false otherwise (implying that the child failed to clean itself up).
282 : : */
283 : : bool
5458 tgl@sss.pgh.pa.us 284 :CBC 14625 : ReleasePostmasterChildSlot(int slot)
285 : : {
286 : : bool result;
287 : :
551 288 [ + - - + ]: 14625 : Assert(slot > 0 && slot <= num_child_inuse);
5458 289 : 14625 : slot--;
290 : :
291 : : /*
292 : : * Note: the slot state might already be unused, because the logic in
293 : : * postmaster.c is such that this might get called twice when a child
294 : : * crashes. So we don't try to Assert anything about the state.
295 : : */
296 : 14625 : result = (PMSignalState->PMChildFlags[slot] == PM_CHILD_ASSIGNED);
297 : 14625 : PMSignalState->PMChildFlags[slot] = PM_CHILD_UNUSED;
551 298 : 14625 : PMChildInUse[slot] = false;
5458 299 : 14625 : return result;
300 : : }
301 : :
302 : : /*
303 : : * IsPostmasterChildWalSender - check if given slot is in use by a
304 : : * walsender process. This is called only by the postmaster.
305 : : */
306 : : bool
5203 heikki.linnakangas@i 307 : 1462 : IsPostmasterChildWalSender(int slot)
308 : : {
551 tgl@sss.pgh.pa.us 309 [ + - - + ]: 1462 : Assert(slot > 0 && slot <= num_child_inuse);
5203 heikki.linnakangas@i 310 : 1462 : slot--;
311 : :
312 [ + + ]: 1462 : if (PMSignalState->PMChildFlags[slot] == PM_CHILD_WALSENDER)
313 : 82 : return true;
314 : : else
315 : 1380 : return false;
316 : : }
317 : :
318 : : /*
319 : : * MarkPostmasterChildActive - mark a postmaster child as about to begin
320 : : * actively using shared memory. This is called in the child process.
321 : : */
322 : : void
5458 tgl@sss.pgh.pa.us 323 : 14392 : MarkPostmasterChildActive(void)
324 : : {
325 : 14392 : int slot = MyPMChildSlot;
326 : :
327 [ + - - + ]: 14392 : Assert(slot > 0 && slot <= PMSignalState->num_child_flags);
328 : 14392 : slot--;
329 [ - + ]: 14392 : Assert(PMSignalState->PMChildFlags[slot] == PM_CHILD_ASSIGNED);
4838 heikki.linnakangas@i 330 : 14392 : PMSignalState->PMChildFlags[slot] = PM_CHILD_ACTIVE;
331 : 14392 : }
332 : :
333 : : /*
334 : : * MarkPostmasterChildWalSender - mark a postmaster child as a WAL sender
335 : : * process. This is called in the child process, sometime after marking the
336 : : * child as active.
337 : : */
338 : : void
339 : 1029 : MarkPostmasterChildWalSender(void)
340 : : {
341 : 1029 : int slot = MyPMChildSlot;
342 : :
343 [ - + ]: 1029 : Assert(am_walsender);
344 : :
345 [ + - - + ]: 1029 : Assert(slot > 0 && slot <= PMSignalState->num_child_flags);
346 : 1029 : slot--;
347 [ - + ]: 1029 : Assert(PMSignalState->PMChildFlags[slot] == PM_CHILD_ACTIVE);
348 : 1029 : PMSignalState->PMChildFlags[slot] = PM_CHILD_WALSENDER;
5203 349 : 1029 : }
350 : :
351 : : /*
352 : : * MarkPostmasterChildInactive - mark a postmaster child as done using
353 : : * shared memory. This is called in the child process.
354 : : */
355 : : void
5458 tgl@sss.pgh.pa.us 356 : 14057 : MarkPostmasterChildInactive(void)
357 : : {
358 : 14057 : int slot = MyPMChildSlot;
359 : :
360 [ + - - + ]: 14057 : Assert(slot > 0 && slot <= PMSignalState->num_child_flags);
361 : 14057 : slot--;
5203 heikki.linnakangas@i 362 [ + + - + ]: 14057 : Assert(PMSignalState->PMChildFlags[slot] == PM_CHILD_ACTIVE ||
363 : : PMSignalState->PMChildFlags[slot] == PM_CHILD_WALSENDER);
5458 tgl@sss.pgh.pa.us 364 : 14057 : PMSignalState->PMChildFlags[slot] = PM_CHILD_ASSIGNED;
365 : 14057 : }
366 : :
367 : :
368 : : /*
369 : : * PostmasterIsAliveInternal - check whether postmaster process is still alive
370 : : *
371 : : * This is the slow path of PostmasterIsAlive(), where the caller has already
372 : : * checked 'postmaster_possibly_dead'. (On platforms that don't support
373 : : * a signal for parent death, PostmasterIsAlive() is just an alias for this.)
374 : : */
375 : : bool
2104 tmunro@postgresql.or 376 : 383 : PostmasterIsAliveInternal(void)
377 : : {
378 : : #ifdef USE_POSTMASTER_DEATH_SIGNAL
379 : : /*
380 : : * Reset the flag before checking, so that we don't miss a signal if
381 : : * postmaster dies right after the check. If postmaster was indeed dead,
382 : : * we'll re-arm it before returning to caller.
383 : : */
384 : 383 : postmaster_possibly_dead = false;
385 : : #endif
386 : :
387 : : #ifndef WIN32
388 : : {
389 : : char c;
390 : : ssize_t rc;
391 : :
392 : 383 : rc = read(postmaster_alive_fds[POSTMASTER_FD_WATCH], &c, 1);
393 : :
394 : : /*
395 : : * In the usual case, the postmaster is still alive, and there is no
396 : : * data in the pipe.
397 : : */
398 [ + + - + : 383 : if (rc < 0 && (errno == EAGAIN || errno == EWOULDBLOCK))
- - ]
4863 rhaas@postgresql.org 399 : 368 : return true;
400 : : else
401 : : {
402 : : /*
403 : : * Postmaster is dead, or something went wrong with the read()
404 : : * call.
405 : : */
406 : :
407 : : #ifdef USE_POSTMASTER_DEATH_SIGNAL
2104 tmunro@postgresql.or 408 : 15 : postmaster_possibly_dead = true;
409 : : #endif
410 : :
411 [ - + ]: 15 : if (rc < 0)
2104 tmunro@postgresql.or 412 [ # # ]:UBC 0 : elog(FATAL, "read on postmaster death monitoring pipe failed: %m");
2104 tmunro@postgresql.or 413 [ - + ]:CBC 15 : else if (rc > 0)
2104 tmunro@postgresql.or 414 [ # # ]:UBC 0 : elog(FATAL, "unexpected data in postmaster death monitoring pipe");
415 : :
2104 tmunro@postgresql.or 416 :CBC 15 : return false;
417 : : }
418 : : }
419 : :
420 : : #else /* WIN32 */
421 : : if (WaitForSingleObject(PostmasterHandle, 0) == WAIT_TIMEOUT)
422 : : return true;
423 : : else
424 : : {
425 : : #ifdef USE_POSTMASTER_DEATH_SIGNAL
426 : : postmaster_possibly_dead = true;
427 : : #endif
428 : : return false;
429 : : }
430 : : #endif /* WIN32 */
431 : : }
432 : :
433 : : /*
434 : : * PostmasterDeathSignalInit - request signal on postmaster death if possible
435 : : */
436 : : void
437 : 19697 : PostmasterDeathSignalInit(void)
438 : : {
439 : : #ifdef USE_POSTMASTER_DEATH_SIGNAL
1789 tgl@sss.pgh.pa.us 440 : 19697 : int signum = POSTMASTER_DEATH_SIGNAL;
441 : :
442 : : /* Register our signal handler. */
2104 tmunro@postgresql.or 443 : 19697 : pqsignal(signum, postmaster_death_handler);
444 : :
445 : : /* Request a signal on parent exit. */
446 : : #if defined(PR_SET_PDEATHSIG)
447 [ - + ]: 19697 : if (prctl(PR_SET_PDEATHSIG, signum) < 0)
2104 tmunro@postgresql.or 448 [ # # ]:UBC 0 : elog(ERROR, "could not request parent death signal: %m");
449 : : #elif defined(PROC_PDEATHSIG_CTL)
450 : : if (procctl(P_PID, 0, PROC_PDEATHSIG_CTL, &signum) < 0)
451 : : elog(ERROR, "could not request parent death signal: %m");
452 : : #else
453 : : #error "USE_POSTMASTER_DEATH_SIGNAL set, but there is no mechanism to request the signal"
454 : : #endif
455 : :
456 : : /*
457 : : * Just in case the parent was gone already and we missed it, we'd better
458 : : * check the slow way on the first call.
459 : : */
2104 tmunro@postgresql.or 460 :CBC 19697 : postmaster_possibly_dead = true;
461 : : #endif /* USE_POSTMASTER_DEATH_SIGNAL */
462 : 19697 : }
|