Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * postmaster.c
4 : : * This program acts as a clearing house for requests to the
5 : : * POSTGRES system. Frontend programs connect to the Postmaster,
6 : : * and postmaster forks a new backend process to handle the
7 : : * connection.
8 : : *
9 : : * The postmaster also manages system-wide operations such as
10 : : * startup and shutdown. The postmaster itself doesn't do those
11 : : * operations, mind you --- it just forks off a subprocess to do them
12 : : * at the right times. It also takes care of resetting the system
13 : : * if a backend crashes.
14 : : *
15 : : * The postmaster process creates the shared memory and semaphore
16 : : * pools during startup, but as a rule does not touch them itself.
17 : : * In particular, it is not a member of the PGPROC array of backends
18 : : * and so it cannot participate in lock-manager operations. Keeping
19 : : * the postmaster away from shared memory operations makes it simpler
20 : : * and more reliable. The postmaster is almost always able to recover
21 : : * from crashes of individual backends by resetting shared memory;
22 : : * if it did much with shared memory then it would be prone to crashing
23 : : * along with the backends.
24 : : *
25 : : * When a request message is received, we now fork() immediately.
26 : : * The child process performs authentication of the request, and
27 : : * then becomes a backend if successful. This allows the auth code
28 : : * to be written in a simple single-threaded style (as opposed to the
29 : : * crufty "poor man's multitasking" code that used to be needed).
30 : : * More importantly, it ensures that blockages in non-multithreaded
31 : : * libraries like SSL or PAM cannot cause denial of service to other
32 : : * clients.
33 : : *
34 : : *
35 : : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
36 : : * Portions Copyright (c) 1994, Regents of the University of California
37 : : *
38 : : *
39 : : * IDENTIFICATION
40 : : * src/backend/postmaster/postmaster.c
41 : : *
42 : : * NOTES
43 : : *
44 : : * Initialization:
45 : : * The Postmaster sets up shared memory data structures
46 : : * for the backends.
47 : : *
48 : : * Synchronization:
49 : : * The Postmaster shares memory with the backends but should avoid
50 : : * touching shared memory, so as not to become stuck if a crashing
51 : : * backend screws up locks or shared memory. Likewise, the Postmaster
52 : : * should never block on messages from frontend clients.
53 : : *
54 : : * Garbage Collection:
55 : : * The Postmaster cleans up after backends if they have an emergency
56 : : * exit and/or core dump.
57 : : *
58 : : * Error Reporting:
59 : : * Use write_stderr() only for reporting "interactive" errors
60 : : * (essentially, bogus arguments on the command line). Once the
61 : : * postmaster is launched, use ereport().
62 : : *
63 : : *-------------------------------------------------------------------------
64 : : */
65 : :
66 : : #include "postgres.h"
67 : :
68 : : #include <unistd.h>
69 : : #include <signal.h>
70 : : #include <time.h>
71 : : #include <sys/wait.h>
72 : : #include <ctype.h>
73 : : #include <sys/stat.h>
74 : : #include <sys/socket.h>
75 : : #include <fcntl.h>
76 : : #include <sys/param.h>
77 : : #include <netdb.h>
78 : : #include <limits.h>
79 : :
80 : : #ifdef USE_BONJOUR
81 : : #include <dns_sd.h>
82 : : #endif
83 : :
84 : : #ifdef USE_SYSTEMD
85 : : #include <systemd/sd-daemon.h>
86 : : #endif
87 : :
88 : : #ifdef HAVE_PTHREAD_IS_THREADED_NP
89 : : #include <pthread.h>
90 : : #endif
91 : :
92 : : #include "access/xlog.h"
93 : : #include "access/xlogrecovery.h"
94 : : #include "common/file_perm.h"
95 : : #include "common/file_utils.h"
96 : : #include "common/ip.h"
97 : : #include "common/pg_prng.h"
98 : : #include "lib/ilist.h"
99 : : #include "libpq/libpq.h"
100 : : #include "libpq/pqsignal.h"
101 : : #include "pg_getopt.h"
102 : : #include "pgstat.h"
103 : : #include "port/pg_bswap.h"
104 : : #include "postmaster/autovacuum.h"
105 : : #include "postmaster/auxprocess.h"
106 : : #include "postmaster/bgworker_internals.h"
107 : : #include "postmaster/pgarch.h"
108 : : #include "postmaster/postmaster.h"
109 : : #include "postmaster/syslogger.h"
110 : : #include "postmaster/walsummarizer.h"
111 : : #include "replication/logicallauncher.h"
112 : : #include "replication/slotsync.h"
113 : : #include "replication/walsender.h"
114 : : #include "storage/fd.h"
115 : : #include "storage/ipc.h"
116 : : #include "storage/pmsignal.h"
117 : : #include "storage/proc.h"
118 : : #include "tcop/backend_startup.h"
119 : : #include "tcop/tcopprot.h"
120 : : #include "utils/datetime.h"
121 : : #include "utils/memutils.h"
122 : : #include "utils/pidfile.h"
123 : : #include "utils/timestamp.h"
124 : : #include "utils/varlena.h"
125 : :
126 : : #ifdef EXEC_BACKEND
127 : : #include "storage/pg_shmem.h"
128 : : #endif
129 : :
130 : :
131 : : /*
132 : : * Possible types of a backend. Beyond being the possible bkend_type values in
133 : : * struct bkend, these are OR-able request flag bits for SignalSomeChildren()
134 : : * and CountChildren().
135 : : */
136 : : #define BACKEND_TYPE_NORMAL 0x0001 /* normal backend */
137 : : #define BACKEND_TYPE_AUTOVAC 0x0002 /* autovacuum worker process */
138 : : #define BACKEND_TYPE_WALSND 0x0004 /* walsender process */
139 : : #define BACKEND_TYPE_BGWORKER 0x0008 /* bgworker process */
140 : : #define BACKEND_TYPE_ALL 0x000F /* OR of all the above */
141 : :
142 : : /*
143 : : * List of active backends (or child processes anyway; we don't actually
144 : : * know whether a given child has become a backend or is still in the
145 : : * authorization phase). This is used mainly to keep track of how many
146 : : * children we have and send them appropriate signals when necessary.
147 : : *
148 : : * As shown in the above set of backend types, this list includes not only
149 : : * "normal" client sessions, but also autovacuum workers, walsenders, and
150 : : * background workers. (Note that at the time of launch, walsenders are
151 : : * labeled BACKEND_TYPE_NORMAL; we relabel them to BACKEND_TYPE_WALSND
152 : : * upon noticing they've changed their PMChildFlags entry. Hence that check
153 : : * must be done before any operation that needs to distinguish walsenders
154 : : * from normal backends.)
155 : : *
156 : : * Also, "dead_end" children are in it: these are children launched just for
157 : : * the purpose of sending a friendly rejection message to a would-be client.
158 : : * We must track them because they are attached to shared memory, but we know
159 : : * they will never become live backends. dead_end children are not assigned a
160 : : * PMChildSlot. dead_end children have bkend_type NORMAL.
161 : : *
162 : : * "Special" children such as the startup, bgwriter, autovacuum launcher, and
163 : : * slot sync worker tasks are not in this list. They are tracked via StartupPID
164 : : * and other pid_t variables below. (Thus, there can't be more than one of any
165 : : * given "special" child process type. We use BackendList entries for any
166 : : * child process there can be more than one of.)
167 : : */
168 : : typedef struct bkend
169 : : {
170 : : pid_t pid; /* process id of backend */
171 : : int32 cancel_key; /* cancel key for cancels for this backend */
172 : : int child_slot; /* PMChildSlot for this backend, if any */
173 : : int bkend_type; /* child process flavor, see above */
174 : : bool dead_end; /* is it going to send an error and quit? */
175 : : bool bgworker_notify; /* gets bgworker start/stop notifications */
176 : : dlist_node elem; /* list link in BackendList */
177 : : } Backend;
178 : :
179 : : static dlist_head BackendList = DLIST_STATIC_INIT(BackendList);
180 : :
181 : : #ifdef EXEC_BACKEND
182 : : Backend *ShmemBackendArray;
183 : : #endif
184 : :
185 : : BackgroundWorker *MyBgworkerEntry = NULL;
186 : :
187 : :
188 : :
189 : : /* The socket number we are listening for connections on */
190 : : int PostPortNumber = DEF_PGPORT;
191 : :
192 : : /* The directory names for Unix socket(s) */
193 : : char *Unix_socket_directories;
194 : :
195 : : /* The TCP listen address(es) */
196 : : char *ListenAddresses;
197 : :
198 : : /*
199 : : * SuperuserReservedConnections is the number of backends reserved for
200 : : * superuser use, and ReservedConnections is the number of backends reserved
201 : : * for use by roles with privileges of the pg_use_reserved_connections
202 : : * predefined role. These are taken out of the pool of MaxConnections backend
203 : : * slots, so the number of backend slots available for roles that are neither
204 : : * superuser nor have privileges of pg_use_reserved_connections is
205 : : * (MaxConnections - SuperuserReservedConnections - ReservedConnections).
206 : : *
207 : : * If the number of remaining slots is less than or equal to
208 : : * SuperuserReservedConnections, only superusers can make new connections. If
209 : : * the number of remaining slots is greater than SuperuserReservedConnections
210 : : * but less than or equal to
211 : : * (SuperuserReservedConnections + ReservedConnections), only superusers and
212 : : * roles with privileges of pg_use_reserved_connections can make new
213 : : * connections. Note that pre-existing superuser and
214 : : * pg_use_reserved_connections connections don't count against the limits.
215 : : */
216 : : int SuperuserReservedConnections;
217 : : int ReservedConnections;
218 : :
219 : : /* The socket(s) we're listening to. */
220 : : #define MAXLISTEN 64
221 : : static int NumListenSockets = 0;
222 : : static pgsocket *ListenSockets = NULL;
223 : :
224 : : /* still more option variables */
225 : : bool EnableSSL = false;
226 : :
227 : : int PreAuthDelay = 0;
228 : : int AuthenticationTimeout = 60;
229 : :
230 : : bool log_hostname; /* for ps display and logging */
231 : : bool Log_connections = false;
232 : :
233 : : bool enable_bonjour = false;
234 : : char *bonjour_name;
235 : : bool restart_after_crash = true;
236 : : bool remove_temp_files_after_crash = true;
237 : : bool send_abort_for_crash = false;
238 : : bool send_abort_for_kill = false;
239 : :
240 : : /* PIDs of special child processes; 0 when not running */
241 : : static pid_t StartupPID = 0,
242 : : BgWriterPID = 0,
243 : : CheckpointerPID = 0,
244 : : WalWriterPID = 0,
245 : : WalReceiverPID = 0,
246 : : WalSummarizerPID = 0,
247 : : AutoVacPID = 0,
248 : : PgArchPID = 0,
249 : : SysLoggerPID = 0,
250 : : SlotSyncWorkerPID = 0;
251 : :
252 : : /* Startup process's status */
253 : : typedef enum
254 : : {
255 : : STARTUP_NOT_RUNNING,
256 : : STARTUP_RUNNING,
257 : : STARTUP_SIGNALED, /* we sent it a SIGQUIT or SIGKILL */
258 : : STARTUP_CRASHED,
259 : : } StartupStatusEnum;
260 : :
261 : : static StartupStatusEnum StartupStatus = STARTUP_NOT_RUNNING;
262 : :
263 : : /* Startup/shutdown state */
264 : : #define NoShutdown 0
265 : : #define SmartShutdown 1
266 : : #define FastShutdown 2
267 : : #define ImmediateShutdown 3
268 : :
269 : : static int Shutdown = NoShutdown;
270 : :
271 : : static bool FatalError = false; /* T if recovering from backend crash */
272 : :
273 : : /*
274 : : * We use a simple state machine to control startup, shutdown, and
275 : : * crash recovery (which is rather like shutdown followed by startup).
276 : : *
277 : : * After doing all the postmaster initialization work, we enter PM_STARTUP
278 : : * state and the startup process is launched. The startup process begins by
279 : : * reading the control file and other preliminary initialization steps.
280 : : * In a normal startup, or after crash recovery, the startup process exits
281 : : * with exit code 0 and we switch to PM_RUN state. However, archive recovery
282 : : * is handled specially since it takes much longer and we would like to support
283 : : * hot standby during archive recovery.
284 : : *
285 : : * When the startup process is ready to start archive recovery, it signals the
286 : : * postmaster, and we switch to PM_RECOVERY state. The background writer and
287 : : * checkpointer are launched, while the startup process continues applying WAL.
288 : : * If Hot Standby is enabled, then, after reaching a consistent point in WAL
289 : : * redo, startup process signals us again, and we switch to PM_HOT_STANDBY
290 : : * state and begin accepting connections to perform read-only queries. When
291 : : * archive recovery is finished, the startup process exits with exit code 0
292 : : * and we switch to PM_RUN state.
293 : : *
294 : : * Normal child backends can only be launched when we are in PM_RUN or
295 : : * PM_HOT_STANDBY state. (connsAllowed can also restrict launching.)
296 : : * In other states we handle connection requests by launching "dead_end"
297 : : * child processes, which will simply send the client an error message and
298 : : * quit. (We track these in the BackendList so that we can know when they
299 : : * are all gone; this is important because they're still connected to shared
300 : : * memory, and would interfere with an attempt to destroy the shmem segment,
301 : : * possibly leading to SHMALL failure when we try to make a new one.)
302 : : * In PM_WAIT_DEAD_END state we are waiting for all the dead_end children
303 : : * to drain out of the system, and therefore stop accepting connection
304 : : * requests at all until the last existing child has quit (which hopefully
305 : : * will not be very long).
306 : : *
307 : : * Notice that this state variable does not distinguish *why* we entered
308 : : * states later than PM_RUN --- Shutdown and FatalError must be consulted
309 : : * to find that out. FatalError is never true in PM_RECOVERY, PM_HOT_STANDBY,
310 : : * or PM_RUN states, nor in PM_SHUTDOWN states (because we don't enter those
311 : : * states when trying to recover from a crash). It can be true in PM_STARTUP
312 : : * state, because we don't clear it until we've successfully started WAL redo.
313 : : */
314 : : typedef enum
315 : : {
316 : : PM_INIT, /* postmaster starting */
317 : : PM_STARTUP, /* waiting for startup subprocess */
318 : : PM_RECOVERY, /* in archive recovery mode */
319 : : PM_HOT_STANDBY, /* in hot standby mode */
320 : : PM_RUN, /* normal "database is alive" state */
321 : : PM_STOP_BACKENDS, /* need to stop remaining backends */
322 : : PM_WAIT_BACKENDS, /* waiting for live backends to exit */
323 : : PM_SHUTDOWN, /* waiting for checkpointer to do shutdown
324 : : * ckpt */
325 : : PM_SHUTDOWN_2, /* waiting for archiver and walsenders to
326 : : * finish */
327 : : PM_WAIT_DEAD_END, /* waiting for dead_end children to exit */
328 : : PM_NO_CHILDREN, /* all important children have exited */
329 : : } PMState;
330 : :
331 : : static PMState pmState = PM_INIT;
332 : :
333 : : /*
334 : : * While performing a "smart shutdown", we restrict new connections but stay
335 : : * in PM_RUN or PM_HOT_STANDBY state until all the client backends are gone.
336 : : * connsAllowed is a sub-state indicator showing the active restriction.
337 : : * It is of no interest unless pmState is PM_RUN or PM_HOT_STANDBY.
338 : : */
339 : : static bool connsAllowed = true;
340 : :
341 : : /* Start time of SIGKILL timeout during immediate shutdown or child crash */
342 : : /* Zero means timeout is not running */
343 : : static time_t AbortStartTime = 0;
344 : :
345 : : /* Length of said timeout */
346 : : #define SIGKILL_CHILDREN_AFTER_SECS 5
347 : :
348 : : static bool ReachedNormalRunning = false; /* T if we've reached PM_RUN */
349 : :
350 : : bool ClientAuthInProgress = false; /* T during new-client
351 : : * authentication */
352 : :
353 : : bool redirection_done = false; /* stderr redirected for syslogger? */
354 : :
355 : : /* received START_AUTOVAC_LAUNCHER signal */
356 : : static bool start_autovac_launcher = false;
357 : :
358 : : /* the launcher needs to be signaled to communicate some condition */
359 : : static bool avlauncher_needs_signal = false;
360 : :
361 : : /* received START_WALRECEIVER signal */
362 : : static bool WalReceiverRequested = false;
363 : :
364 : : /* set when there's a worker that needs to be started up */
365 : : static bool StartWorkerNeeded = true;
366 : : static bool HaveCrashedWorker = false;
367 : :
368 : : /* set when signals arrive */
369 : : static volatile sig_atomic_t pending_pm_pmsignal;
370 : : static volatile sig_atomic_t pending_pm_child_exit;
371 : : static volatile sig_atomic_t pending_pm_reload_request;
372 : : static volatile sig_atomic_t pending_pm_shutdown_request;
373 : : static volatile sig_atomic_t pending_pm_fast_shutdown_request;
374 : : static volatile sig_atomic_t pending_pm_immediate_shutdown_request;
375 : :
376 : : /* event multiplexing object */
377 : : static WaitEventSet *pm_wait_set;
378 : :
379 : : #ifdef USE_SSL
380 : : /* Set when and if SSL has been initialized properly */
381 : : bool LoadedSSL = false;
382 : : #endif
383 : :
384 : : #ifdef USE_BONJOUR
385 : : static DNSServiceRef bonjour_sdref = NULL;
386 : : #endif
387 : :
388 : : /*
389 : : * postmaster.c - function prototypes
390 : : */
391 : : static void CloseServerPorts(int status, Datum arg);
392 : : static void unlink_external_pid_file(int status, Datum arg);
393 : : static void getInstallationPaths(const char *argv0);
394 : : static void checkControlFile(void);
395 : : static void handle_pm_pmsignal_signal(SIGNAL_ARGS);
396 : : static void handle_pm_child_exit_signal(SIGNAL_ARGS);
397 : : static void handle_pm_reload_request_signal(SIGNAL_ARGS);
398 : : static void handle_pm_shutdown_request_signal(SIGNAL_ARGS);
399 : : static void process_pm_pmsignal(void);
400 : : static void process_pm_child_exit(void);
401 : : static void process_pm_reload_request(void);
402 : : static void process_pm_shutdown_request(void);
403 : : static void dummy_handler(SIGNAL_ARGS);
404 : : static void CleanupBackend(int pid, int exitstatus);
405 : : static bool CleanupBackgroundWorker(int pid, int exitstatus);
406 : : static void HandleChildCrash(int pid, int exitstatus, const char *procname);
407 : : static void LogChildExit(int lev, const char *procname,
408 : : int pid, int exitstatus);
409 : : static void PostmasterStateMachine(void);
410 : :
411 : : static void ExitPostmaster(int status) pg_attribute_noreturn();
412 : : static int ServerLoop(void);
413 : : static int BackendStartup(ClientSocket *client_sock);
414 : : static void report_fork_failure_to_client(ClientSocket *client_sock, int errnum);
415 : : static CAC_state canAcceptConnections(int backend_type);
416 : : static bool RandomCancelKey(int32 *cancel_key);
417 : : static void signal_child(pid_t pid, int signal);
418 : : static void sigquit_child(pid_t pid);
419 : : static bool SignalSomeChildren(int signal, int target);
420 : : static void TerminateChildren(int signal);
421 : :
422 : : #define SignalChildren(sig) SignalSomeChildren(sig, BACKEND_TYPE_ALL)
423 : :
424 : : static int CountChildren(int target);
425 : : static bool assign_backendlist_entry(RegisteredBgWorker *rw);
426 : : static void maybe_start_bgworkers(void);
427 : : static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
428 : : static pid_t StartChildProcess(BackendType type);
429 : : static void StartAutovacuumWorker(void);
430 : : static void MaybeStartWalReceiver(void);
431 : : static void MaybeStartWalSummarizer(void);
432 : : static void InitPostmasterDeathWatchHandle(void);
433 : : static void MaybeStartSlotSyncWorker(void);
434 : :
435 : : /*
436 : : * Archiver is allowed to start up at the current postmaster state?
437 : : *
438 : : * If WAL archiving is enabled always, we are allowed to start archiver
439 : : * even during recovery.
440 : : */
441 : : #define PgArchStartupAllowed() \
442 : : (((XLogArchivingActive() && pmState == PM_RUN) || \
443 : : (XLogArchivingAlways() && \
444 : : (pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY))) && \
445 : : PgArchCanRestart())
446 : :
447 : : #ifdef EXEC_BACKEND
448 : :
449 : : #ifdef WIN32
450 : : #define WNOHANG 0 /* ignored, so any integer value will do */
451 : :
452 : : static pid_t waitpid(pid_t pid, int *exitstatus, int options);
453 : : static void WINAPI pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired);
454 : :
455 : : static HANDLE win32ChildQueue;
456 : :
457 : : typedef struct
458 : : {
459 : : HANDLE waitHandle;
460 : : HANDLE procHandle;
461 : : DWORD procId;
462 : : } win32_deadchild_waitinfo;
463 : : #endif /* WIN32 */
464 : :
465 : : static void ShmemBackendArrayAdd(Backend *bn);
466 : : static void ShmemBackendArrayRemove(Backend *bn);
467 : : #endif /* EXEC_BACKEND */
468 : :
469 : : /* Macros to check exit status of a child process */
470 : : #define EXIT_STATUS_0(st) ((st) == 0)
471 : : #define EXIT_STATUS_1(st) (WIFEXITED(st) && WEXITSTATUS(st) == 1)
472 : : #define EXIT_STATUS_3(st) (WIFEXITED(st) && WEXITSTATUS(st) == 3)
473 : :
474 : : #ifndef WIN32
475 : : /*
476 : : * File descriptors for pipe used to monitor if postmaster is alive.
477 : : * First is POSTMASTER_FD_WATCH, second is POSTMASTER_FD_OWN.
478 : : */
479 : : int postmaster_alive_fds[2] = {-1, -1};
480 : : #else
481 : : /* Process handle of postmaster used for the same purpose on Windows */
482 : : HANDLE PostmasterHandle;
483 : : #endif
484 : :
485 : : /*
486 : : * Postmaster main entry point
487 : : */
488 : : void
10141 scrappy@hub.org 489 :CBC 736 : PostmasterMain(int argc, char *argv[])
490 : : {
491 : : int opt;
492 : : int status;
7128 tgl@sss.pgh.pa.us 493 : 736 : char *userDoption = NULL;
4840 494 : 736 : bool listen_addr_saved = false;
3718 peter_e@gmx.net 495 : 736 : char *output_config_variable = NULL;
496 : :
2004 tmunro@postgresql.or 497 : 736 : InitProcessGlobals();
498 : :
499 : 736 : PostmasterPid = MyProcPid;
500 : :
7627 tgl@sss.pgh.pa.us 501 : 736 : IsPostmasterEnvironment = true;
502 : :
503 : : /*
504 : : * Start our win32 signal implementation
505 : : */
506 : : #ifdef WIN32
507 : : pgwin32_signal_initialize();
508 : : #endif
509 : :
510 : : /*
511 : : * We should not be creating any files or directories before we check the
512 : : * data directory (see checkDataDir()), but just in case set the umask to
513 : : * the most restrictive (owner-only) permissions.
514 : : *
515 : : * checkDataDir() will reset the umask based on the data directory
516 : : * permissions.
517 : : */
2199 sfrost@snowman.net 518 : 736 : umask(PG_MODE_MASK_OWNER);
519 : :
520 : : /*
521 : : * By default, palloc() requests in the postmaster will be allocated in
522 : : * the PostmasterContext, which is space that can be recycled by backends.
523 : : * Allocated data that needs to be available to backends should be
524 : : * allocated in TopMemoryContext.
525 : : */
8691 tgl@sss.pgh.pa.us 526 : 736 : PostmasterContext = AllocSetContextCreate(TopMemoryContext,
527 : : "Postmaster",
528 : : ALLOCSET_DEFAULT_SIZES);
529 : 736 : MemoryContextSwitchTo(PostmasterContext);
530 : :
531 : : /* Initialize paths to installation files */
5461 532 : 736 : getInstallationPaths(argv[0]);
533 : :
534 : : /*
535 : : * Set up signal handlers for the postmaster process.
536 : : *
537 : : * CAUTION: when changing this list, check for side-effects on the signal
538 : : * handling setup of child processes. See tcop/postgres.c,
539 : : * bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/walwriter.c,
540 : : * postmaster/autovacuum.c, postmaster/pgarch.c, postmaster/syslogger.c,
541 : : * postmaster/bgworker.c and postmaster/checkpointer.c.
542 : : */
3662 543 : 736 : pqinitmask();
436 tmunro@postgresql.or 544 : 736 : sigprocmask(SIG_SETMASK, &BlockSig, NULL);
545 : :
458 546 : 736 : pqsignal(SIGHUP, handle_pm_reload_request_signal);
547 : 736 : pqsignal(SIGINT, handle_pm_shutdown_request_signal);
548 : 736 : pqsignal(SIGQUIT, handle_pm_shutdown_request_signal);
549 : 736 : pqsignal(SIGTERM, handle_pm_shutdown_request_signal);
550 : 736 : pqsignal(SIGALRM, SIG_IGN); /* ignored */
551 : 736 : pqsignal(SIGPIPE, SIG_IGN); /* ignored */
552 : 736 : pqsignal(SIGUSR1, handle_pm_pmsignal_signal);
553 : 736 : pqsignal(SIGUSR2, dummy_handler); /* unused, reserve for children */
554 : 736 : pqsignal(SIGCHLD, handle_pm_child_exit_signal);
555 : :
556 : : /* This may configure SIGURG, depending on platform. */
557 : 736 : InitializeLatchSupport();
558 : 736 : InitProcessLocalLatch();
559 : :
560 : : /*
561 : : * No other place in Postgres should touch SIGTTIN/SIGTTOU handling. We
562 : : * ignore those signals in a postmaster environment, so that there is no
563 : : * risk of a child process freezing up due to writing to stderr. But for
564 : : * a standalone backend, their default handling is reasonable. Hence, all
565 : : * child processes should just allow the inherited settings to stand.
566 : : */
567 : : #ifdef SIGTTIN
568 : 736 : pqsignal(SIGTTIN, SIG_IGN); /* ignored */
569 : : #endif
570 : : #ifdef SIGTTOU
571 : 736 : pqsignal(SIGTTOU, SIG_IGN); /* ignored */
572 : : #endif
573 : :
574 : : /* ignore SIGXFSZ, so that ulimit violations work like disk full */
575 : : #ifdef SIGXFSZ
576 : 736 : pqsignal(SIGXFSZ, SIG_IGN); /* ignored */
577 : : #endif
578 : :
579 : : /* Begin accepting signals. */
436 580 : 736 : sigprocmask(SIG_SETMASK, &UnBlockSig, NULL);
581 : :
582 : : /*
583 : : * Options setup
584 : : */
8003 tgl@sss.pgh.pa.us 585 : 736 : InitializeGUCOptions();
586 : :
8719 peter_e@gmx.net 587 : 736 : opterr = 1;
588 : :
589 : : /*
590 : : * Parse command-line options. CAUTION: keep this in sync with
591 : : * tcop/postgres.c (the option sets should not conflict) and with the
592 : : * common help() function in main/main.c.
593 : : */
489 peter@eisentraut.org 594 [ + + ]: 2548 : while ((opt = getopt(argc, argv, "B:bC:c:D:d:EeFf:h:ijk:lN:OPp:r:S:sTt:W:-:")) != -1)
595 : : {
9716 bruce@momjian.us 596 [ - + + + : 1812 : switch (opt)
+ - - - +
- - - - +
- - - - +
- - - - -
- - ]
597 : : {
9715 bruce@momjian.us 598 :UBC 0 : case 'B':
8086 peter_e@gmx.net 599 : 0 : SetConfigOption("shared_buffers", optarg, PGC_POSTMASTER, PGC_S_ARGV);
9715 bruce@momjian.us 600 : 0 : break;
601 : :
4738 bruce@momjian.us 602 :CBC 22 : case 'b':
603 : : /* Undocumented flag used for binary upgrades */
604 : 22 : IsBinaryUpgrade = true;
605 : 22 : break;
606 : :
4574 607 : 2 : case 'C':
4202 608 : 2 : output_config_variable = strdup(optarg);
4574 609 : 2 : break;
610 : :
489 peter@eisentraut.org 611 : 850 : case 'c':
612 : : case '-':
613 : : {
614 : : char *name,
615 : : *value;
616 : :
617 : 850 : ParseLongOption(optarg, &name, &value);
618 [ - + ]: 850 : if (!value)
619 : : {
489 peter@eisentraut.org 620 [ # # ]:UBC 0 : if (opt == '-')
621 [ # # ]: 0 : ereport(ERROR,
622 : : (errcode(ERRCODE_SYNTAX_ERROR),
623 : : errmsg("--%s requires a value",
624 : : optarg)));
625 : : else
626 [ # # ]: 0 : ereport(ERROR,
627 : : (errcode(ERRCODE_SYNTAX_ERROR),
628 : : errmsg("-c %s requires a value",
629 : : optarg)));
630 : : }
631 : :
489 peter@eisentraut.org 632 :CBC 850 : SetConfigOption(name, value, PGC_POSTMASTER, PGC_S_ARGV);
633 : 850 : pfree(name);
634 : 850 : pfree(value);
635 : 850 : break;
636 : : }
637 : :
9715 bruce@momjian.us 638 : 736 : case 'D':
4202 639 : 736 : userDoption = strdup(optarg);
9715 640 : 736 : break;
641 : :
9715 bruce@momjian.us 642 :UBC 0 : case 'd':
7091 tgl@sss.pgh.pa.us 643 : 0 : set_debug_options(atoi(optarg), PGC_POSTMASTER, PGC_S_ARGV);
644 : 0 : break;
645 : :
6674 peter_e@gmx.net 646 : 0 : case 'E':
647 : 0 : SetConfigOption("log_statement", "all", PGC_POSTMASTER, PGC_S_ARGV);
648 : 0 : break;
649 : :
650 : 0 : case 'e':
651 : 0 : SetConfigOption("datestyle", "euro", PGC_POSTMASTER, PGC_S_ARGV);
652 : 0 : break;
653 : :
8717 bruce@momjian.us 654 :CBC 85 : case 'F':
8086 peter_e@gmx.net 655 : 85 : SetConfigOption("fsync", "false", PGC_POSTMASTER, PGC_S_ARGV);
8717 bruce@momjian.us 656 : 85 : break;
657 : :
6674 peter_e@gmx.net 658 :UBC 0 : case 'f':
659 [ # # ]: 0 : if (!set_plan_disabling_options(optarg, PGC_POSTMASTER, PGC_S_ARGV))
660 : : {
661 : 0 : write_stderr("%s: invalid argument for option -f: \"%s\"\n",
662 : : progname, optarg);
663 : 0 : ExitPostmaster(1);
664 : : }
665 : 0 : break;
666 : :
8553 bruce@momjian.us 667 : 0 : case 'h':
7327 tgl@sss.pgh.pa.us 668 : 0 : SetConfigOption("listen_addresses", optarg, PGC_POSTMASTER, PGC_S_ARGV);
8553 bruce@momjian.us 669 : 0 : break;
670 : :
9652 671 : 0 : case 'i':
7327 tgl@sss.pgh.pa.us 672 : 0 : SetConfigOption("listen_addresses", "*", PGC_POSTMASTER, PGC_S_ARGV);
8955 bruce@momjian.us 673 : 0 : break;
674 : :
6674 peter_e@gmx.net 675 : 0 : case 'j':
676 : : /* only used by interactive backend */
677 : 0 : break;
678 : :
8553 bruce@momjian.us 679 :CBC 85 : case 'k':
4265 tgl@sss.pgh.pa.us 680 : 85 : SetConfigOption("unix_socket_directories", optarg, PGC_POSTMASTER, PGC_S_ARGV);
8553 bruce@momjian.us 681 : 85 : break;
682 : :
8955 bruce@momjian.us 683 :UBC 0 : case 'l':
8086 peter_e@gmx.net 684 : 0 : SetConfigOption("ssl", "true", PGC_POSTMASTER, PGC_S_ARGV);
9655 bruce@momjian.us 685 : 0 : break;
686 : :
9186 tgl@sss.pgh.pa.us 687 : 0 : case 'N':
8086 peter_e@gmx.net 688 : 0 : SetConfigOption("max_connections", optarg, PGC_POSTMASTER, PGC_S_ARGV);
9186 tgl@sss.pgh.pa.us 689 : 0 : break;
690 : :
6674 peter_e@gmx.net 691 : 0 : case 'O':
692 : 0 : SetConfigOption("allow_system_table_mods", "true", PGC_POSTMASTER, PGC_S_ARGV);
693 : 0 : break;
694 : :
695 : 0 : case 'P':
696 : 0 : SetConfigOption("ignore_system_indexes", "true", PGC_POSTMASTER, PGC_S_ARGV);
697 : 0 : break;
698 : :
9715 bruce@momjian.us 699 :CBC 32 : case 'p':
8086 peter_e@gmx.net 700 : 32 : SetConfigOption("port", optarg, PGC_POSTMASTER, PGC_S_ARGV);
9715 bruce@momjian.us 701 : 32 : break;
702 : :
6674 peter_e@gmx.net 703 :UBC 0 : case 'r':
704 : : /* only used by single-user backend */
705 : 0 : break;
706 : :
707 : 0 : case 'S':
708 : 0 : SetConfigOption("work_mem", optarg, PGC_POSTMASTER, PGC_S_ARGV);
9715 bruce@momjian.us 709 : 0 : break;
710 : :
711 : 0 : case 's':
6310 tgl@sss.pgh.pa.us 712 : 0 : SetConfigOption("log_statement_stats", "true", PGC_POSTMASTER, PGC_S_ARGV);
6674 peter_e@gmx.net 713 : 0 : break;
714 : :
715 : 0 : case 'T':
716 : :
717 : : /*
718 : : * This option used to be defined as sending SIGSTOP after a
719 : : * backend crash, but sending SIGABRT seems more useful.
720 : : */
510 tgl@sss.pgh.pa.us 721 : 0 : SetConfigOption("send_abort_for_crash", "true", PGC_POSTMASTER, PGC_S_ARGV);
9715 bruce@momjian.us 722 : 0 : break;
723 : :
6674 peter_e@gmx.net 724 : 0 : case 't':
725 : : {
6402 bruce@momjian.us 726 : 0 : const char *tmp = get_stats_option_name(optarg);
727 : :
728 [ # # ]: 0 : if (tmp)
729 : : {
730 : 0 : SetConfigOption(tmp, "true", PGC_POSTMASTER, PGC_S_ARGV);
731 : : }
732 : : else
733 : : {
734 : 0 : write_stderr("%s: invalid argument for option -t: \"%s\"\n",
735 : : progname, optarg);
736 : 0 : ExitPostmaster(1);
737 : : }
738 : 0 : break;
739 : : }
740 : :
6674 peter_e@gmx.net 741 : 0 : case 'W':
742 : 0 : SetConfigOption("post_auth_delay", optarg, PGC_POSTMASTER, PGC_S_ARGV);
743 : 0 : break;
744 : :
9715 bruce@momjian.us 745 : 0 : default:
7207 746 : 0 : write_stderr("Try \"%s --help\" for more information.\n",
747 : : progname);
8537 tgl@sss.pgh.pa.us 748 : 0 : ExitPostmaster(1);
749 : : }
750 : : }
751 : :
752 : : /*
753 : : * Postmaster accepts no non-option switch arguments.
754 : : */
8086 peter_e@gmx.net 755 [ - + ]:CBC 736 : if (optind < argc)
756 : : {
7207 bruce@momjian.us 757 :UBC 0 : write_stderr("%s: invalid argument: \"%s\"\n",
758 : 0 : progname, argv[optind]);
759 : 0 : write_stderr("Try \"%s --help\" for more information.\n",
760 : : progname);
8086 peter_e@gmx.net 761 : 0 : ExitPostmaster(1);
762 : : }
763 : :
764 : : /*
765 : : * Locate the proper configuration files and data directory, and read
766 : : * postgresql.conf for the first time.
767 : : */
7128 tgl@sss.pgh.pa.us 768 [ - + ]:CBC 736 : if (!SelectConfigFiles(userDoption, progname))
7128 tgl@sss.pgh.pa.us 769 :UBC 0 : ExitPostmaster(2);
770 : :
4574 bruce@momjian.us 771 [ + + ]:CBC 735 : if (output_config_variable != NULL)
772 : : {
773 : : /*
774 : : * If this is a runtime-computed GUC, it hasn't yet been initialized,
775 : : * and the present value is not useful. However, this is a convenient
776 : : * place to print the value for most GUCs because it is safe to run
777 : : * postmaster startup to this point even if the server is already
778 : : * running. For the handful of runtime-computed GUCs that we cannot
779 : : * provide meaningful values for yet, we wait until later in
780 : : * postmaster startup to print the value. We won't be able to use -C
781 : : * on running servers for those GUCs, but using this option now would
782 : : * lead to incorrect results for them.
783 : : */
941 michael@paquier.xyz 784 : 2 : int flags = GetConfigOptionFlags(output_config_variable, true);
785 : :
786 [ + + ]: 2 : if ((flags & GUC_RUNTIME_COMPUTED) == 0)
787 : : {
788 : : /*
789 : : * "-C guc" was specified, so print GUC's value and exit. No
790 : : * extra permission check is needed because the user is reading
791 : : * inside the data dir.
792 : : */
793 : 1 : const char *config_val = GetConfigOption(output_config_variable,
794 : : false, false);
795 : :
796 [ + - ]: 1 : puts(config_val ? config_val : "");
797 : 1 : ExitPostmaster(0);
798 : : }
799 : :
800 : : /*
801 : : * A runtime-computed GUC will be printed later on. As we initialize
802 : : * a server startup sequence, silence any log messages that may show
803 : : * up in the output generated. FATAL and more severe messages are
804 : : * useful to show, even if one would only expect at least PANIC. LOG
805 : : * entries are hidden.
806 : : */
676 tgl@sss.pgh.pa.us 807 : 1 : SetConfigOption("log_min_messages", "FATAL", PGC_SUSET,
808 : : PGC_S_OVERRIDE);
809 : : }
810 : :
811 : : /* Verify that DataDir looks reasonable */
7128 812 : 734 : checkDataDir();
813 : :
814 : : /* Check that pg_control exists */
2199 sfrost@snowman.net 815 : 734 : checkControlFile();
816 : :
817 : : /* And switch working directory into it */
6859 tgl@sss.pgh.pa.us 818 : 734 : ChangeToDataDir();
819 : :
820 : : /*
821 : : * Check for invalid combinations of GUC settings.
822 : : */
450 rhaas@postgresql.org 823 [ - + ]: 734 : if (SuperuserReservedConnections + ReservedConnections >= MaxConnections)
824 : : {
450 rhaas@postgresql.org 825 :UBC 0 : write_stderr("%s: superuser_reserved_connections (%d) plus reserved_connections (%d) must be less than max_connections (%d)\n",
826 : : progname,
827 : : SuperuserReservedConnections, ReservedConnections,
828 : : MaxConnections);
4265 magnus@hagander.net 829 : 0 : ExitPostmaster(1);
830 : : }
3257 heikki.linnakangas@i 831 [ + + - + ]:CBC 734 : if (XLogArchiveMode > ARCHIVE_MODE_OFF && wal_level == WAL_LEVEL_MINIMAL)
5100 heikki.linnakangas@i 832 [ # # ]:UBC 0 : ereport(ERROR,
833 : : (errmsg("WAL archival cannot be enabled when wal_level is \"minimal\"")));
5100 heikki.linnakangas@i 834 [ + + - + ]:CBC 734 : if (max_wal_senders > 0 && wal_level == WAL_LEVEL_MINIMAL)
5100 heikki.linnakangas@i 835 [ # # ]:UBC 0 : ereport(ERROR,
836 : : (errmsg("WAL streaming (max_wal_senders > 0) requires wal_level \"replica\" or \"logical\"")));
116 rhaas@postgresql.org 837 [ + + - + ]:GNC 734 : if (summarize_wal && wal_level == WAL_LEVEL_MINIMAL)
116 rhaas@postgresql.org 838 [ # # ]:UNC 0 : ereport(ERROR,
839 : : (errmsg("WAL cannot be summarized when wal_level is \"minimal\"")));
840 : :
841 : : /*
842 : : * Other one-time internal sanity checks can go here, if they are fast.
843 : : * (Put any slow processing further down, after postmaster.pid creation.)
844 : : */
7759 tgl@sss.pgh.pa.us 845 [ - + ]:CBC 734 : if (!CheckDateTokenTables())
846 : : {
7207 bruce@momjian.us 847 :UBC 0 : write_stderr("%s: invalid datetoken tables, please fix\n", progname);
7759 tgl@sss.pgh.pa.us 848 : 0 : ExitPostmaster(1);
849 : : }
850 : :
851 : : /*
852 : : * Now that we are done processing the postmaster arguments, reset
853 : : * getopt(3) library so that it will work correctly in subprocesses.
854 : : */
8213 tgl@sss.pgh.pa.us 855 :CBC 734 : optind = 1;
856 : : #ifdef HAVE_INT_OPTRESET
857 : : optreset = 1; /* some systems need this too */
858 : : #endif
859 : :
860 : : /* For debugging: display postmaster environment */
861 : : {
862 : : extern char **environ;
863 : : char **p;
864 : :
7551 865 [ - + ]: 734 : ereport(DEBUG3,
866 : : (errmsg_internal("%s: PostmasterMain: initial environment dump:",
867 : : progname)));
868 [ - + ]: 734 : ereport(DEBUG3,
869 : : (errmsg_internal("-----------------------------------------")));
8554 870 [ + + ]: 70740 : for (p = environ; *p; ++p)
7551 871 [ - + ]: 70006 : ereport(DEBUG3,
872 : : (errmsg_internal("\t%s", *p)));
873 [ - + ]: 734 : ereport(DEBUG3,
874 : : (errmsg_internal("-----------------------------------------")));
875 : : }
876 : :
877 : : /*
878 : : * Create lockfile for data directory.
879 : : *
880 : : * We want to do this before we try to grab the input sockets, because the
881 : : * data directory interlock is more reliable than the socket-file
882 : : * interlock (thanks to whoever decided to put socket files in /tmp :-().
883 : : * For the same reason, it's best to grab the TCP socket(s) before the
884 : : * Unix socket(s).
885 : : *
886 : : * Also note that this internally sets up the on_proc_exit function that
887 : : * is responsible for removing both data directory and socket lockfiles;
888 : : * so it must happen before opening sockets so that at exit, the socket
889 : : * lockfiles go away after CloseServerPorts runs.
890 : : */
6859 891 : 734 : CreateDataDirLockFile(true);
892 : :
893 : : /*
894 : : * Read the control file (for error checking and config info).
895 : : *
896 : : * Since we verify the control file's CRC, this has a useful side effect
897 : : * on machines where we need a run-time test for CRC support instructions.
898 : : * The postmaster will do the test once at startup, and then its child
899 : : * processes will inherit the correct function pointer and not need to
900 : : * repeat the test.
901 : : */
2401 andres@anarazel.de 902 : 733 : LocalProcessControlFile(false);
903 : :
904 : : /*
905 : : * Register the apply launcher. It's probably a good idea to call this
906 : : * before any modules had a chance to take the background worker slots.
907 : : */
2642 peter_e@gmx.net 908 : 733 : ApplyLauncherRegister();
909 : :
910 : : /*
911 : : * process any libraries that should be preloaded at postmaster start
912 : : */
6452 tgl@sss.pgh.pa.us 913 : 733 : process_shared_preload_libraries();
914 : :
915 : : /*
916 : : * Initialize SSL library, if specified.
917 : : */
918 : : #ifdef USE_SSL
1481 andrew@dunslane.net 919 [ + + ]: 733 : if (EnableSSL)
920 : : {
921 : 26 : (void) secure_initialize(true);
922 : 23 : LoadedSSL = true;
923 : : }
924 : : #endif
925 : :
926 : : /*
927 : : * Now that loadable modules have had their chance to alter any GUCs,
928 : : * calculate MaxBackends.
929 : : */
4120 alvherre@alvh.no-ip. 930 : 730 : InitializeMaxBackends();
931 : :
932 : : /*
933 : : * Give preloaded libraries a chance to request additional shared memory.
934 : : */
702 rhaas@postgresql.org 935 : 730 : process_shmem_requests();
936 : :
937 : : /*
938 : : * Now that loadable modules have had their chance to request additional
939 : : * shared memory, determine the value of any runtime-computed GUCs that
940 : : * depend on the amount of shared memory required.
941 : : */
949 michael@paquier.xyz 942 : 730 : InitializeShmemGUCs();
943 : :
944 : : /*
945 : : * Now that modules have been loaded, we can process any custom resource
946 : : * managers specified in the wal_consistency_checking GUC.
947 : : */
739 jdavis@postgresql.or 948 : 730 : InitializeWalConsistencyChecking();
949 : :
950 : : /*
951 : : * If -C was specified with a runtime-computed GUC, we held off printing
952 : : * the value earlier, as the GUC was not yet initialized. We handle -C
953 : : * for most GUCs before we lock the data directory so that the option may
954 : : * be used on a running server. However, a handful of GUCs are runtime-
955 : : * computed and do not have meaningful values until after locking the data
956 : : * directory, and we cannot safely calculate their values earlier on a
957 : : * running server. At this point, such GUCs should be properly
958 : : * initialized, and we haven't yet set up shared memory, so this is a good
959 : : * time to handle the -C option for these special GUCs.
960 : : */
941 michael@paquier.xyz 961 [ + + ]: 730 : if (output_config_variable != NULL)
962 : : {
963 : 1 : const char *config_val = GetConfigOption(output_config_variable,
964 : : false, false);
965 : :
966 [ + - ]: 1 : puts(config_val ? config_val : "");
967 : 1 : ExitPostmaster(0);
968 : : }
969 : :
970 : : /*
971 : : * Set up shared memory and semaphores.
972 : : *
973 : : * Note: if using SysV shmem and/or semas, each postmaster startup will
974 : : * normally choose the same IPC keys. This helps ensure that we will
975 : : * clean up dead IPC objects if the postmaster crashes and is restarted.
976 : : */
638 tgl@sss.pgh.pa.us 977 : 729 : CreateSharedMemoryAndSemaphores();
978 : :
979 : : /*
980 : : * Estimate number of openable files. This must happen after setting up
981 : : * semaphores, because on some platforms semaphores count as open files.
982 : : */
1677 983 : 728 : set_max_safe_fds();
984 : :
985 : : /*
986 : : * Set reference point for stack-depth checking.
987 : : */
787 988 : 728 : (void) set_stack_base();
989 : :
990 : : /*
991 : : * Initialize pipe (or process handle on Windows) that allows children to
992 : : * wake up from sleep on postmaster death.
993 : : */
1677 994 : 728 : InitPostmasterDeathWatchHandle();
995 : :
996 : : #ifdef WIN32
997 : :
998 : : /*
999 : : * Initialize I/O completion port used to deliver list of dead children.
1000 : : */
1001 : : win32ChildQueue = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 1);
1002 : : if (win32ChildQueue == NULL)
1003 : : ereport(FATAL,
1004 : : (errmsg("could not create I/O completion port for child queue")));
1005 : : #endif
1006 : :
1007 : : #ifdef EXEC_BACKEND
1008 : : /* Write out nondefault GUC settings for child processes to use */
1009 : : write_nondefault_variables(PGC_POSTMASTER);
1010 : :
1011 : : /*
1012 : : * Clean out the temp directory used to transmit parameters to child
1013 : : * processes (see internal_forkexec). We must do this before launching
1014 : : * any child processes, else we have a race condition: we could remove a
1015 : : * parameter file before the child can read it. It should be safe to do
1016 : : * so now, because we verified earlier that there are no conflicting
1017 : : * Postgres processes in this data directory.
1018 : : */
1019 : : RemovePgTempFilesInDir(PG_TEMP_FILES_DIR, true, false);
1020 : : #endif
1021 : :
1022 : : /*
1023 : : * Forcibly remove the files signaling a standby promotion request.
1024 : : * Otherwise, the existence of those files triggers a promotion too early,
1025 : : * whether a user wants that or not.
1026 : : *
1027 : : * This removal of files is usually unnecessary because they can exist
1028 : : * only during a few moments during a standby promotion. However there is
1029 : : * a race condition: if pg_ctl promote is executed and creates the files
1030 : : * during a promotion, the files can stay around even after the server is
1031 : : * brought up to be the primary. Then, if a new standby starts by using
1032 : : * the backup taken from the new primary, the files can exist at server
1033 : : * startup and must be removed in order to avoid an unexpected promotion.
1034 : : *
1035 : : * Note that promotion signal files need to be removed before the startup
1036 : : * process is invoked. Because, after that, they can be used by
1037 : : * postmaster's SIGUSR1 signal handler.
1038 : : */
1039 : 728 : RemovePromoteSignalFiles();
1040 : :
1041 : : /* Do the same for logrotate signal file */
1042 : 728 : RemoveLogrotateSignalFiles();
1043 : :
1044 : : /* Remove any outdated file holding the current log filenames. */
1045 [ + - - + ]: 728 : if (unlink(LOG_METAINFO_DATAFILE) < 0 && errno != ENOENT)
1677 tgl@sss.pgh.pa.us 1046 [ # # ]:UBC 0 : ereport(LOG,
1047 : : (errcode_for_file_access(),
1048 : : errmsg("could not remove file \"%s\": %m",
1049 : : LOG_METAINFO_DATAFILE)));
1050 : :
1051 : : /*
1052 : : * If enabled, start up syslogger collection subprocess
1053 : : */
1677 tgl@sss.pgh.pa.us 1054 :CBC 728 : SysLoggerPID = SysLogger_Start();
1055 : :
1056 : : /*
1057 : : * Reset whereToSendOutput from DestDebug (its starting state) to
1058 : : * DestNone. This stops ereport from sending log messages to stderr unless
1059 : : * Log_destination permits. We don't do this until the postmaster is
1060 : : * fully launched, since startup failures may as well be reported to
1061 : : * stderr.
1062 : : *
1063 : : * If we are in fact disabling logging to stderr, first emit a log message
1064 : : * saying so, to provide a breadcrumb trail for users who may not remember
1065 : : * that their logging is configured to go somewhere else.
1066 : : */
1067 [ - + ]: 728 : if (!(Log_destination & LOG_DESTINATION_STDERR))
1677 tgl@sss.pgh.pa.us 1068 [ # # ]:UBC 0 : ereport(LOG,
1069 : : (errmsg("ending log output to stderr"),
1070 : : errhint("Future log output will go to log destination \"%s\".",
1071 : : Log_destination_string)));
1072 : :
1677 tgl@sss.pgh.pa.us 1073 :CBC 728 : whereToSendOutput = DestNone;
1074 : :
1075 : : /*
1076 : : * Report server startup in log. While we could emit this much earlier,
1077 : : * it seems best to do so after starting the log collector, if we intend
1078 : : * to use one.
1079 : : */
1901 peter@eisentraut.org 1080 [ + - ]: 728 : ereport(LOG,
1081 : : (errmsg("starting %s", PG_VERSION_STR)));
1082 : :
1083 : : /*
1084 : : * Establish input sockets.
1085 : : *
1086 : : * First set up an on_proc_exit function that's charged with closing the
1087 : : * sockets again at postmaster shutdown.
1088 : : */
192 heikki.linnakangas@i 1089 :GNC 728 : ListenSockets = palloc(MAXLISTEN * sizeof(pgsocket));
1090 : 728 : on_proc_exit(CloseServerPorts, 0);
1091 : :
7327 tgl@sss.pgh.pa.us 1092 [ + - ]:CBC 728 : if (ListenAddresses)
1093 : : {
1094 : : char *rawstring;
1095 : : List *elemlist;
1096 : : ListCell *l;
6863 peter_e@gmx.net 1097 : 728 : int success = 0;
1098 : :
1099 : : /* Need a modifiable copy of ListenAddresses */
7189 tgl@sss.pgh.pa.us 1100 : 728 : rawstring = pstrdup(ListenAddresses);
1101 : :
1102 : : /* Parse string into list of hostnames */
1614 1103 [ - + ]: 728 : if (!SplitGUCList(rawstring, ',', &elemlist))
1104 : : {
1105 : : /* syntax error in list */
7189 tgl@sss.pgh.pa.us 1106 [ # # ]:UBC 0 : ereport(FATAL,
1107 : : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1108 : : errmsg("invalid list syntax in parameter \"%s\"",
1109 : : "listen_addresses")));
1110 : : }
1111 : :
7189 tgl@sss.pgh.pa.us 1112 [ + + + + :CBC 764 : foreach(l, elemlist)
+ + ]
1113 : : {
7168 bruce@momjian.us 1114 : 36 : char *curhost = (char *) lfirst(l);
1115 : :
7262 1116 [ - + ]: 36 : if (strcmp(curhost, "*") == 0)
33 heikki.linnakangas@i 1117 :UNC 0 : status = ListenServerPort(AF_UNSPEC, NULL,
7327 tgl@sss.pgh.pa.us 1118 :UBC 0 : (unsigned short) PostPortNumber,
1119 : : NULL,
1120 : : ListenSockets,
1121 : : &NumListenSockets,
1122 : : MAXLISTEN);
1123 : : else
33 heikki.linnakangas@i 1124 :GNC 36 : status = ListenServerPort(AF_UNSPEC, curhost,
7571 tgl@sss.pgh.pa.us 1125 :CBC 36 : (unsigned short) PostPortNumber,
1126 : : NULL,
1127 : : ListenSockets,
1128 : : &NumListenSockets,
1129 : : MAXLISTEN);
1130 : :
6863 peter_e@gmx.net 1131 [ + - ]: 36 : if (status == STATUS_OK)
1132 : : {
1133 : 36 : success++;
1134 : : /* record the first successful host addr in lockfile */
4840 tgl@sss.pgh.pa.us 1135 [ + - ]: 36 : if (!listen_addr_saved)
1136 : : {
1137 : 36 : AddToDataDirLockFile(LOCK_FILE_LINE_LISTEN_ADDR, curhost);
1138 : 36 : listen_addr_saved = true;
1139 : : }
1140 : : }
1141 : : else
7327 tgl@sss.pgh.pa.us 1142 [ # # ]:UBC 0 : ereport(WARNING,
1143 : : (errmsg("could not create listen socket for \"%s\"",
1144 : : curhost)));
1145 : : }
1146 : :
4265 tgl@sss.pgh.pa.us 1147 [ + + - + ]:CBC 728 : if (!success && elemlist != NIL)
6863 peter_e@gmx.net 1148 [ # # ]:UBC 0 : ereport(FATAL,
1149 : : (errmsg("could not create any TCP/IP sockets")));
1150 : :
7189 tgl@sss.pgh.pa.us 1151 :CBC 728 : list_free(elemlist);
1152 : 728 : pfree(rawstring);
1153 : : }
1154 : :
1155 : : #ifdef USE_BONJOUR
1156 : : /* Register for Bonjour only if we opened TCP socket(s) */
1157 : : if (enable_bonjour && NumListenSockets > 0)
1158 : : {
1159 : : DNSServiceErrorType err;
1160 : :
1161 : : /*
1162 : : * We pass 0 for interface_index, which will result in registering on
1163 : : * all "applicable" interfaces. It's not entirely clear from the
1164 : : * DNS-SD docs whether this would be appropriate if we have bound to
1165 : : * just a subset of the available network interfaces.
1166 : : */
1167 : : err = DNSServiceRegister(&bonjour_sdref,
1168 : : 0,
1169 : : 0,
1170 : : bonjour_name,
1171 : : "_postgresql._tcp.",
1172 : : NULL,
1173 : : NULL,
1174 : : pg_hton16(PostPortNumber),
1175 : : 0,
1176 : : NULL,
1177 : : NULL,
1178 : : NULL);
1179 : : if (err != kDNSServiceErr_NoError)
1180 : : ereport(LOG,
1181 : : (errmsg("DNSServiceRegister() failed: error code %ld",
1182 : : (long) err)));
1183 : :
1184 : : /*
1185 : : * We don't bother to read the mDNS daemon's reply, and we expect that
1186 : : * it will automatically terminate our registration when the socket is
1187 : : * closed at postmaster termination. So there's nothing more to be
1188 : : * done here. However, the bonjour_sdref is kept around so that
1189 : : * forked children can close their copies of the socket.
1190 : : */
1191 : : }
1192 : : #endif
1193 : :
4265 1194 [ + - ]: 728 : if (Unix_socket_directories)
1195 : : {
1196 : : char *rawstring;
1197 : : List *elemlist;
1198 : : ListCell *l;
1199 : 728 : int success = 0;
1200 : :
1201 : : /* Need a modifiable copy of Unix_socket_directories */
1202 : 728 : rawstring = pstrdup(Unix_socket_directories);
1203 : :
1204 : : /* Parse string into list of directories */
1205 [ - + ]: 728 : if (!SplitDirectoriesString(rawstring, ',', &elemlist))
1206 : : {
1207 : : /* syntax error in list */
4265 tgl@sss.pgh.pa.us 1208 [ # # ]:UBC 0 : ereport(FATAL,
1209 : : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1210 : : errmsg("invalid list syntax in parameter \"%s\"",
1211 : : "unix_socket_directories")));
1212 : : }
1213 : :
4265 tgl@sss.pgh.pa.us 1214 [ + + + + :CBC 1453 : foreach(l, elemlist)
+ + ]
1215 : : {
1216 : 725 : char *socketdir = (char *) lfirst(l);
1217 : :
33 heikki.linnakangas@i 1218 :GNC 725 : status = ListenServerPort(AF_UNIX, NULL,
4265 tgl@sss.pgh.pa.us 1219 :CBC 725 : (unsigned short) PostPortNumber,
1220 : : socketdir,
1221 : : ListenSockets,
1222 : : &NumListenSockets,
1223 : : MAXLISTEN);
1224 : :
1225 [ + - ]: 725 : if (status == STATUS_OK)
1226 : : {
1227 : 725 : success++;
1228 : : /* record the first successful Unix socket in lockfile */
1229 [ + - ]: 725 : if (success == 1)
1230 : 725 : AddToDataDirLockFile(LOCK_FILE_LINE_SOCKET_DIR, socketdir);
1231 : : }
1232 : : else
4265 tgl@sss.pgh.pa.us 1233 [ # # ]:UBC 0 : ereport(WARNING,
1234 : : (errmsg("could not create Unix-domain socket in directory \"%s\"",
1235 : : socketdir)));
1236 : : }
1237 : :
4265 tgl@sss.pgh.pa.us 1238 [ + + - + ]:CBC 728 : if (!success && elemlist != NIL)
4265 tgl@sss.pgh.pa.us 1239 [ # # ]:UBC 0 : ereport(FATAL,
1240 : : (errmsg("could not create any Unix-domain sockets")));
1241 : :
4265 tgl@sss.pgh.pa.us 1242 :CBC 728 : list_free_deep(elemlist);
1243 : 728 : pfree(rawstring);
1244 : : }
1245 : :
1246 : : /*
1247 : : * check that we have some socket to listen on
1248 : : */
192 heikki.linnakangas@i 1249 [ - + ]:GNC 728 : if (NumListenSockets == 0)
7327 tgl@sss.pgh.pa.us 1250 [ # # ]:UBC 0 : ereport(FATAL,
1251 : : (errmsg("no socket created for listening")));
1252 : :
1253 : : /*
1254 : : * If no valid TCP ports, write an empty line for listen address,
1255 : : * indicating the Unix socket must be used. Note that this line is not
1256 : : * added to the lock file until there is a socket backing it.
1257 : : */
4840 tgl@sss.pgh.pa.us 1258 [ + + ]:CBC 728 : if (!listen_addr_saved)
1259 : 692 : AddToDataDirLockFile(LOCK_FILE_LINE_LISTEN_ADDR, "");
1260 : :
1261 : : /*
1262 : : * Record postmaster options. We delay this till now to avoid recording
1263 : : * bogus options (eg, unusable port number).
1264 : : */
7276 bruce@momjian.us 1265 [ - + ]: 728 : if (!CreateOptsFile(argc, argv, my_exec_path))
8537 tgl@sss.pgh.pa.us 1266 :UBC 0 : ExitPostmaster(1);
1267 : :
1268 : : /*
1269 : : * Write the external PID file if requested
1270 : : */
7127 tgl@sss.pgh.pa.us 1271 [ - + ]:CBC 728 : if (external_pid_file)
1272 : : {
7127 tgl@sss.pgh.pa.us 1273 :UBC 0 : FILE *fpidfile = fopen(external_pid_file, "w");
1274 : :
7128 1275 [ # # ]: 0 : if (fpidfile)
1276 : : {
1277 : 0 : fprintf(fpidfile, "%d\n", MyProcPid);
1278 : 0 : fclose(fpidfile);
1279 : :
1280 : : /* Make PID file world readable */
4683 peter_e@gmx.net 1281 [ # # ]: 0 : if (chmod(external_pid_file, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH) != 0)
33 michael@paquier.xyz 1282 :UNC 0 : write_stderr("%s: could not change permissions of external PID file \"%s\": %m\n",
1283 : : progname, external_pid_file);
1284 : : }
1285 : : else
1286 : 0 : write_stderr("%s: could not write external PID file \"%s\": %m\n",
1287 : : progname, external_pid_file);
1288 : :
4255 peter_e@gmx.net 1289 :UBC 0 : on_proc_exit(unlink_external_pid_file, 0);
1290 : : }
1291 : :
1292 : : /*
1293 : : * Remove old temporary files. At this point there can be no other
1294 : : * Postgres processes running in this directory, so this should be safe.
1295 : : */
3379 andres@anarazel.de 1296 :CBC 728 : RemovePgTempFiles();
1297 : :
1298 : : /*
1299 : : * Initialize the autovacuum subsystem (again, no process start yet)
1300 : : */
6473 tgl@sss.pgh.pa.us 1301 : 728 : autovac_init();
1302 : :
1303 : : /*
1304 : : * Load configuration files for client authentication.
1305 : : */
5347 1306 [ - + ]: 728 : if (!load_hba())
1307 : : {
1308 : : /*
1309 : : * It makes no sense to continue if we fail to load the HBA file,
1310 : : * since there is no way to connect to the database in this case.
1311 : : */
5347 tgl@sss.pgh.pa.us 1312 [ # # ]:UBC 0 : ereport(FATAL,
1313 : : /* translator: %s is a configuration file */
1314 : : (errmsg("could not load %s", HbaFileName)));
1315 : : }
4223 heikki.linnakangas@i 1316 :CBC 728 : if (!load_ident())
1317 : : {
1318 : : /*
1319 : : * We can start up without the IDENT file, although it means that you
1320 : : * cannot log in using any of the authentication methods that need a
1321 : : * user name mapping. load_ident() already logged the details of error
1322 : : * to the log.
1323 : : */
1324 : : }
1325 : :
1326 : : #ifdef HAVE_PTHREAD_IS_THREADED_NP
1327 : :
1328 : : /*
1329 : : * On macOS, libintl replaces setlocale() with a version that calls
1330 : : * CFLocaleCopyCurrent() when its second argument is "" and every relevant
1331 : : * environment variable is unset or empty. CFLocaleCopyCurrent() makes
1332 : : * the process multithreaded. The postmaster calls sigprocmask() and
1333 : : * calls fork() without an immediate exec(), both of which have undefined
1334 : : * behavior in a multithreaded program. A multithreaded postmaster is the
1335 : : * normal case on Windows, which offers neither fork() nor sigprocmask().
1336 : : */
1337 : : if (pthread_is_threaded_np() != 0)
1338 : : ereport(FATAL,
1339 : : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1340 : : errmsg("postmaster became multithreaded during startup"),
1341 : : errhint("Set the LC_ALL environment variable to a valid locale.")));
1342 : : #endif
1343 : :
1344 : : /*
1345 : : * Remember postmaster startup time
1346 : : */
6864 tgl@sss.pgh.pa.us 1347 : 728 : PgStartTime = GetCurrentTimestamp();
1348 : :
1349 : : /*
1350 : : * Report postmaster status in the postmaster.pid file, to allow pg_ctl to
1351 : : * see what's happening.
1352 : : */
2482 1353 : 728 : AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STARTING);
1354 : :
1355 : : /* Start bgwriter and checkpointer so they can help with recovery */
986 tmunro@postgresql.or 1356 [ + - ]: 728 : if (CheckpointerPID == 0)
41 heikki.linnakangas@i 1357 :GNC 728 : CheckpointerPID = StartChildProcess(B_CHECKPOINTER);
986 tmunro@postgresql.or 1358 [ + - ]:CBC 728 : if (BgWriterPID == 0)
41 heikki.linnakangas@i 1359 :GNC 728 : BgWriterPID = StartChildProcess(B_BG_WRITER);
1360 : :
1361 : : /*
1362 : : * We're ready to rock and roll...
1363 : : */
1364 : 728 : StartupPID = StartChildProcess(B_STARTUP);
6093 tgl@sss.pgh.pa.us 1365 [ - + ]:CBC 728 : Assert(StartupPID != 0);
3202 1366 : 728 : StartupStatus = STARTUP_RUNNING;
6093 1367 : 728 : pmState = PM_STARTUP;
1368 : :
1369 : : /* Some workers may be scheduled to start now */
2545 1370 : 728 : maybe_start_bgworkers();
1371 : :
9716 bruce@momjian.us 1372 : 728 : status = ServerLoop();
1373 : :
1374 : : /*
1375 : : * ServerLoop probably shouldn't ever return, but if it does, close down.
1376 : : */
9716 bruce@momjian.us 1377 :UBC 0 : ExitPostmaster(status != STATUS_OK);
1378 : :
1379 : : abort(); /* not reached */
1380 : : }
1381 : :
1382 : :
1383 : : /*
1384 : : * on_proc_exit callback to close server's listen sockets
1385 : : */
1386 : : static void
3178 tgl@sss.pgh.pa.us 1387 :CBC 728 : CloseServerPorts(int status, Datum arg)
1388 : : {
1389 : : int i;
1390 : :
1391 : : /*
1392 : : * First, explicitly close all the socket FDs. We used to just let this
1393 : : * happen implicitly at postmaster exit, but it's better to close them
1394 : : * before we remove the postmaster.pid lockfile; otherwise there's a race
1395 : : * condition if a new postmaster wants to re-use the TCP port number.
1396 : : */
192 heikki.linnakangas@i 1397 [ + + ]:GNC 1489 : for (i = 0; i < NumListenSockets; i++)
1398 : : {
33 1399 [ - + ]: 761 : if (closesocket(ListenSockets[i]) != 0)
33 heikki.linnakangas@i 1400 [ # # ]:UNC 0 : elog(LOG, "could not close listen socket: %m");
1401 : : }
192 heikki.linnakangas@i 1402 :GNC 728 : NumListenSockets = 0;
1403 : :
1404 : : /*
1405 : : * Next, remove any filesystem entries for Unix sockets. To avoid race
1406 : : * conditions against incoming postmasters, this must happen after closing
1407 : : * the sockets and before removing lock files.
1408 : : */
3178 tgl@sss.pgh.pa.us 1409 :CBC 728 : RemoveSocketFiles();
1410 : :
1411 : : /*
1412 : : * We don't do anything about socket lock files here; those will be
1413 : : * removed in a later on_proc_exit callback.
1414 : : */
1415 : 728 : }
1416 : :
1417 : : /*
1418 : : * on_proc_exit callback to delete external_pid_file
1419 : : */
1420 : : static void
4255 peter_e@gmx.net 1421 :UBC 0 : unlink_external_pid_file(int status, Datum arg)
1422 : : {
1423 [ # # ]: 0 : if (external_pid_file)
1424 : 0 : unlink(external_pid_file);
1425 : 0 : }
1426 : :
1427 : :
1428 : : /*
1429 : : * Compute and check the directory paths to files that are part of the
1430 : : * installation (as deduced from the postgres executable's own location)
1431 : : */
1432 : : static void
5461 tgl@sss.pgh.pa.us 1433 :CBC 736 : getInstallationPaths(const char *argv0)
1434 : : {
1435 : : DIR *pdir;
1436 : :
1437 : : /* Locate the postgres executable itself */
1438 [ - + ]: 736 : if (find_my_exec(argv0, my_exec_path) < 0)
1227 peter@eisentraut.org 1439 [ # # ]:UBC 0 : ereport(FATAL,
1440 : : (errmsg("%s: could not locate my own executable path", argv0)));
1441 : :
1442 : : #ifdef EXEC_BACKEND
1443 : : /* Locate executable backend before we change working directory */
1444 : : if (find_other_exec(argv0, "postgres", PG_BACKEND_VERSIONSTR,
1445 : : postgres_exec_path) < 0)
1446 : : ereport(FATAL,
1447 : : (errmsg("%s: could not locate matching postgres executable",
1448 : : argv0)));
1449 : : #endif
1450 : :
1451 : : /*
1452 : : * Locate the pkglib directory --- this has to be set early in case we try
1453 : : * to load any modules from it in response to postgresql.conf entries.
1454 : : */
5461 tgl@sss.pgh.pa.us 1455 :CBC 736 : get_pkglib_path(my_exec_path, pkglib_path);
1456 : :
1457 : : /*
1458 : : * Verify that there's a readable directory there; otherwise the Postgres
1459 : : * installation is incomplete or corrupt. (A typical cause of this
1460 : : * failure is that the postgres executable has been moved or hardlinked to
1461 : : * some directory that's not a sibling of the installation lib/
1462 : : * directory.)
1463 : : */
1464 : 736 : pdir = AllocateDir(pkglib_path);
1465 [ - + ]: 736 : if (pdir == NULL)
5461 tgl@sss.pgh.pa.us 1466 [ # # ]:UBC 0 : ereport(ERROR,
1467 : : (errcode_for_file_access(),
1468 : : errmsg("could not open directory \"%s\": %m",
1469 : : pkglib_path),
1470 : : errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been moved away from its proper location.",
1471 : : my_exec_path)));
5461 tgl@sss.pgh.pa.us 1472 :CBC 736 : FreeDir(pdir);
1473 : :
1474 : : /*
1475 : : * It's not worth checking the share/ directory. If the lib/ directory is
1476 : : * there, then share/ probably is too.
1477 : : */
1478 : 736 : }
1479 : :
1480 : : /*
1481 : : * Check that pg_control exists in the correct location in the data directory.
1482 : : *
1483 : : * No attempt is made to validate the contents of pg_control here. This is
1484 : : * just a sanity check to see if we are looking at a real data directory.
1485 : : */
1486 : : static void
2199 sfrost@snowman.net 1487 : 734 : checkControlFile(void)
1488 : : {
1489 : : char path[MAXPGPATH];
1490 : : FILE *fp;
1491 : :
7128 tgl@sss.pgh.pa.us 1492 : 734 : snprintf(path, sizeof(path), "%s/global/pg_control", DataDir);
1493 : :
7261 1494 : 734 : fp = AllocateFile(path, PG_BINARY_R);
1495 [ - + ]: 734 : if (fp == NULL)
1496 : : {
7207 bruce@momjian.us 1497 :UBC 0 : write_stderr("%s: could not find the database system\n"
1498 : : "Expected to find it in the directory \"%s\",\n"
1499 : : "but could not open file \"%s\": %m\n",
1500 : : progname, DataDir, path);
7261 tgl@sss.pgh.pa.us 1501 : 0 : ExitPostmaster(2);
1502 : : }
7261 tgl@sss.pgh.pa.us 1503 :CBC 734 : FreeFile(fp);
1504 : 734 : }
1505 : :
1506 : : /*
1507 : : * Determine how long should we let ServerLoop sleep, in milliseconds.
1508 : : *
1509 : : * In normal conditions we wait at most one minute, to ensure that the other
1510 : : * background tasks handled by ServerLoop get done even when no requests are
1511 : : * arriving. However, if there are background workers waiting to be started,
1512 : : * we don't actually sleep so that they are quickly serviced. Other exception
1513 : : * cases are as shown in the code.
1514 : : */
1515 : : static int
458 tmunro@postgresql.or 1516 : 123609 : DetermineSleepTime(void)
1517 : : {
4147 alvherre@alvh.no-ip. 1518 : 123609 : TimestampTz next_wakeup = 0;
1519 : :
1520 : : /*
1521 : : * Normal case: either there are no background workers at all, or we're in
1522 : : * a shutdown sequence (during which we ignore bgworkers altogether).
1523 : : */
1524 [ + + ]: 123609 : if (Shutdown > NoShutdown ||
1525 [ + - + - ]: 119518 : (!StartWorkerNeeded && !HaveCrashedWorker))
1526 : : {
3222 tgl@sss.pgh.pa.us 1527 [ + + ]: 123609 : if (AbortStartTime != 0)
1528 : : {
1529 : : int seconds;
1530 : :
1531 : : /* time left to abort; clamp to 0 in case it already expired */
458 tmunro@postgresql.or 1532 : 1683 : seconds = SIGKILL_CHILDREN_AFTER_SECS -
3222 tgl@sss.pgh.pa.us 1533 : 1683 : (time(NULL) - AbortStartTime);
1534 : :
458 tmunro@postgresql.or 1535 : 1683 : return Max(seconds * 1000, 0);
1536 : : }
1537 : : else
1538 : 121926 : return 60 * 1000;
1539 : : }
1540 : :
4147 alvherre@alvh.no-ip. 1541 [ # # ]:UBC 0 : if (StartWorkerNeeded)
458 tmunro@postgresql.or 1542 : 0 : return 0;
1543 : :
4147 alvherre@alvh.no-ip. 1544 [ # # ]: 0 : if (HaveCrashedWorker)
1545 : : {
1546 : : slist_mutable_iter siter;
1547 : :
1548 : : /*
1549 : : * When there are crashed bgworkers, we sleep just long enough that
1550 : : * they are restarted when they request to be. Scan the list to
1551 : : * determine the minimum of all wakeup times according to most recent
1552 : : * crash time and requested restart interval.
1553 : : */
3925 rhaas@postgresql.org 1554 [ # # # # : 0 : slist_foreach_modify(siter, &BackgroundWorkerList)
# # ]
1555 : : {
1556 : : RegisteredBgWorker *rw;
1557 : : TimestampTz this_wakeup;
1558 : :
4147 alvherre@alvh.no-ip. 1559 : 0 : rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
1560 : :
1561 [ # # ]: 0 : if (rw->rw_crashed_at == 0)
1562 : 0 : continue;
1563 : :
3831 rhaas@postgresql.org 1564 [ # # ]: 0 : if (rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART
1565 [ # # ]: 0 : || rw->rw_terminate)
1566 : : {
3917 tgl@sss.pgh.pa.us 1567 : 0 : ForgetBackgroundWorker(&siter);
4147 alvherre@alvh.no-ip. 1568 : 0 : continue;
1569 : : }
1570 : :
1571 : 0 : this_wakeup = TimestampTzPlusMilliseconds(rw->rw_crashed_at,
1572 : : 1000L * rw->rw_worker.bgw_restart_time);
1573 [ # # # # ]: 0 : if (next_wakeup == 0 || this_wakeup < next_wakeup)
1574 : 0 : next_wakeup = this_wakeup;
1575 : : }
1576 : : }
1577 : :
1578 [ # # ]: 0 : if (next_wakeup != 0)
1579 : : {
1580 : : int ms;
1581 : :
1582 : : /* result of TimestampDifferenceMilliseconds is in [0, INT_MAX] */
444 tgl@sss.pgh.pa.us 1583 : 0 : ms = (int) TimestampDifferenceMilliseconds(GetCurrentTimestamp(),
1584 : : next_wakeup);
1585 : 0 : return Min(60 * 1000, ms);
1586 : : }
1587 : :
458 tmunro@postgresql.or 1588 : 0 : return 60 * 1000;
1589 : : }
1590 : :
1591 : : /*
1592 : : * Activate or deactivate notifications of server socket events. Since we
1593 : : * don't currently have a way to remove events from an existing WaitEventSet,
1594 : : * we'll just destroy and recreate the whole thing. This is called during
1595 : : * shutdown so we can wait for backends to exit without accepting new
1596 : : * connections, and during crash reinitialization when we need to start
1597 : : * listening for new connections again. The WaitEventSet will be freed in fork
1598 : : * children by ClosePostmasterPorts().
1599 : : */
1600 : : static void
458 tmunro@postgresql.or 1601 :CBC 1486 : ConfigurePostmasterWaitSet(bool accept_connections)
1602 : : {
1603 [ + + ]: 1486 : if (pm_wait_set)
1604 : 758 : FreeWaitEventSet(pm_wait_set);
1605 : 1486 : pm_wait_set = NULL;
1606 : :
143 heikki.linnakangas@i 1607 [ + + ]:GNC 2219 : pm_wait_set = CreateWaitEventSet(NULL,
192 1608 : 733 : accept_connections ? (1 + NumListenSockets) : 1);
458 tmunro@postgresql.or 1609 :CBC 1486 : AddWaitEventToSet(pm_wait_set, WL_LATCH_SET, PGINVALID_SOCKET, MyLatch,
1610 : : NULL);
1611 : :
1612 [ + + ]: 1486 : if (accept_connections)
1613 : : {
192 heikki.linnakangas@i 1614 [ + + ]:GNC 1499 : for (int i = 0; i < NumListenSockets; i++)
1615 : 766 : AddWaitEventToSet(pm_wait_set, WL_SOCKET_ACCEPT, ListenSockets[i],
1616 : : NULL, NULL);
1617 : : }
458 tmunro@postgresql.or 1618 :CBC 1486 : }
1619 : :
1620 : : /*
1621 : : * Main idle loop of postmaster
1622 : : */
1623 : : static int
10054 scrappy@hub.org 1624 : 728 : ServerLoop(void)
1625 : : {
1626 : : time_t last_lockfile_recheck_time,
1627 : : last_touch_time;
1628 : : WaitEvent events[MAXLISTEN];
1629 : : int nevents;
1630 : :
458 tmunro@postgresql.or 1631 : 728 : ConfigurePostmasterWaitSet(true);
3113 tgl@sss.pgh.pa.us 1632 : 728 : last_lockfile_recheck_time = last_touch_time = time(NULL);
1633 : :
1634 : : for (;;)
9716 bruce@momjian.us 1635 : 122881 : {
1636 : : time_t now;
1637 : :
458 tmunro@postgresql.or 1638 : 123609 : nevents = WaitEventSetWait(pm_wait_set,
1639 : 123609 : DetermineSleepTime(),
1640 : : events,
1641 : : lengthof(events),
1642 : : 0 /* postmaster posts no wait_events */ );
1643 : :
1644 : : /*
1645 : : * Latch set by signal handler, or new connection pending on any of
1646 : : * our sockets? If the latter, fork a child process to deal with it.
1647 : : */
1648 [ + + ]: 246488 : for (int i = 0; i < nevents; i++)
1649 : : {
1650 [ + + ]: 123607 : if (events[i].events & WL_LATCH_SET)
1651 : 112021 : ResetLatch(MyLatch);
1652 : :
1653 : : /*
1654 : : * The following requests are handled unconditionally, even if we
1655 : : * didn't see WL_LATCH_SET. This gives high priority to shutdown
1656 : : * and reload requests where the latch happens to appear later in
1657 : : * events[] or will be reported by a later call to
1658 : : * WaitEventSetWait().
1659 : : */
445 1660 [ + + ]: 123607 : if (pending_pm_shutdown_request)
1661 : 725 : process_pm_shutdown_request();
1662 [ + + ]: 123607 : if (pending_pm_reload_request)
1663 : 127 : process_pm_reload_request();
1664 [ + + ]: 123607 : if (pending_pm_child_exit)
1665 : 19485 : process_pm_child_exit();
1666 [ + + ]: 122879 : if (pending_pm_pmsignal)
1667 : 91804 : process_pm_pmsignal();
1668 : :
1669 [ + + ]: 122879 : if (events[i].events & WL_SOCKET_ACCEPT)
1670 : : {
1671 : : ClientSocket s;
1672 : :
33 heikki.linnakangas@i 1673 [ + - ]:GNC 11586 : if (AcceptConnection(events[i].fd, &s) == STATUS_OK)
1674 : 11586 : BackendStartup(&s);
1675 : :
1676 : : /* We no longer need the open socket in this process */
1677 [ + - ]: 11586 : if (s.sock != PGINVALID_SOCKET)
1678 : : {
1679 [ - + ]: 11586 : if (closesocket(s.sock) != 0)
33 heikki.linnakangas@i 1680 [ # # ]:UNC 0 : elog(LOG, "could not close client socket: %m");
1681 : : }
1682 : : }
1683 : : }
1684 : :
1685 : : /* If we have lost the log collector, try to start a new one */
6083 andrew@dunslane.net 1686 [ + + - + ]:CBC 122881 : if (SysLoggerPID == 0 && Logging_collector)
7192 tgl@sss.pgh.pa.us 1687 :UBC 0 : SysLoggerPID = SysLogger_Start();
1688 : :
1689 : : /*
1690 : : * If no background writer process is running, and we are not in a
1691 : : * state that prevents it, start one. It doesn't matter if this
1692 : : * fails, we'll just try again later. Likewise for the checkpointer.
1693 : : */
4548 simon@2ndQuadrant.co 1694 [ + + + + ]:CBC 122881 : if (pmState == PM_RUN || pmState == PM_RECOVERY ||
986 tmunro@postgresql.or 1695 [ + + + + ]: 6038 : pmState == PM_HOT_STANDBY || pmState == PM_STARTUP)
1696 : : {
4548 simon@2ndQuadrant.co 1697 [ + + ]: 118771 : if (CheckpointerPID == 0)
41 heikki.linnakangas@i 1698 :GNC 5 : CheckpointerPID = StartChildProcess(B_CHECKPOINTER);
4335 simon@2ndQuadrant.co 1699 [ + + ]:CBC 118771 : if (BgWriterPID == 0)
41 heikki.linnakangas@i 1700 :GNC 5 : BgWriterPID = StartChildProcess(B_BG_WRITER);
1701 : : }
1702 : :
1703 : : /*
1704 : : * Likewise, if we have lost the walwriter process, try to start a new
1705 : : * one. But this is needed only in normal operation (else we cannot
1706 : : * be writing any new WAL).
1707 : : */
6093 tgl@sss.pgh.pa.us 1708 [ + + - + ]:CBC 122881 : if (WalWriterPID == 0 && pmState == PM_RUN)
41 heikki.linnakangas@i 1709 :UNC 0 : WalWriterPID = StartChildProcess(B_WAL_WRITER);
1710 : :
1711 : : /*
1712 : : * If we have lost the autovacuum launcher, try to start a new one. We
1713 : : * don't want autovacuum to run in binary upgrade mode because
1714 : : * autovacuum might update relfrozenxid for empty tables before the
1715 : : * physical files are put in place.
1716 : : */
4738 bruce@momjian.us 1717 [ + + + + :CBC 130590 : if (!IsBinaryUpgrade && AutoVacPID == 0 &&
+ + ]
6132 tgl@sss.pgh.pa.us 1718 [ + + ]: 12318 : (AutoVacuumingActive() || start_autovac_launcher) &&
6093 1719 [ + + ]: 4409 : pmState == PM_RUN)
1720 : : {
27 heikki.linnakangas@i 1721 :GNC 1309 : AutoVacPID = StartChildProcess(B_AUTOVAC_LAUNCHER);
6370 tgl@sss.pgh.pa.us 1722 [ + - ]:GBC 1309 : if (AutoVacPID != 0)
5995 bruce@momjian.us 1723 : 1309 : start_autovac_launcher = false; /* signal processed */
1724 : : }
1725 : :
1726 : : /* If we have lost the archiver, try to start a new one. */
3229 fujii@postgresql.org 1727 [ + + + + :CBC 122881 : if (PgArchPID == 0 && PgArchStartupAllowed())
- + + + +
- + + - +
- + - - -
- - - ]
41 heikki.linnakangas@i 1728 :UNC 0 : PgArchPID = StartChildProcess(B_ARCHIVER);
1729 : :
1730 : : /* If we need to start a slot sync worker, try to do that now */
52 akapila@postgresql.o 1731 :GNC 122881 : MaybeStartSlotSyncWorker();
1732 : :
1733 : : /* If we need to signal the autovacuum launcher, do so now */
5347 alvherre@alvh.no-ip. 1734 [ - + ]:CBC 122881 : if (avlauncher_needs_signal)
1735 : : {
5347 alvherre@alvh.no-ip. 1736 :UBC 0 : avlauncher_needs_signal = false;
1737 [ # # ]: 0 : if (AutoVacPID != 0)
5340 tgl@sss.pgh.pa.us 1738 : 0 : kill(AutoVacPID, SIGUSR2);
1739 : : }
1740 : :
1741 : : /* If we need to start a WAL receiver, try to do that now */
2484 tgl@sss.pgh.pa.us 1742 [ + + ]:CBC 122881 : if (WalReceiverRequested)
1743 : 163 : MaybeStartWalReceiver();
1744 : :
1745 : : /* If we need to start a WAL summarizer, try to do that now */
116 rhaas@postgresql.org 1746 :GNC 122881 : MaybeStartWalSummarizer();
1747 : :
1748 : : /* Get other worker processes running, if needed */
4147 alvherre@alvh.no-ip. 1749 [ + - + + ]:CBC 122881 : if (StartWorkerNeeded || HaveCrashedWorker)
2545 tgl@sss.pgh.pa.us 1750 : 3203 : maybe_start_bgworkers();
1751 : :
1752 : : #ifdef HAVE_PTHREAD_IS_THREADED_NP
1753 : :
1754 : : /*
1755 : : * With assertions enabled, check regularly for appearance of
1756 : : * additional threads. All builds check at start and exit.
1757 : : */
1758 : : Assert(pthread_is_threaded_np() == 0);
1759 : : #endif
1760 : :
1761 : : /*
1762 : : * Lastly, check to see if it's time to do some things that we don't
1763 : : * want to do every single time through the loop, because they're a
1764 : : * bit expensive. Note that there's up to a minute of slop in when
1765 : : * these tasks will be performed, since DetermineSleepTime() will let
1766 : : * us sleep at most that long; except for SIGKILL timeout which has
1767 : : * special-case logic there.
1768 : : */
3110 1769 : 122881 : now = time(NULL);
1770 : :
1771 : : /*
1772 : : * If we already sent SIGQUIT to children and they are slow to shut
1773 : : * down, it's time to send them SIGKILL (or SIGABRT if requested).
1774 : : * This doesn't happen normally, but under certain conditions backends
1775 : : * can get stuck while shutting down. This is a last measure to get
1776 : : * them unwedged.
1777 : : *
1778 : : * Note we also do this during recovery from a process crash.
1779 : : */
510 1780 [ + + + + ]: 122881 : if ((Shutdown >= ImmediateShutdown || FatalError) &&
3222 1781 [ + + ]: 1691 : AbortStartTime != 0 &&
1782 [ - + ]: 1683 : (now - AbortStartTime) >= SIGKILL_CHILDREN_AFTER_SECS)
1783 : : {
1784 : : /* We were gentle with them before. Not anymore */
1311 tgl@sss.pgh.pa.us 1785 [ # # # # ]:UBC 0 : ereport(LOG,
1786 : : /* translator: %s is SIGKILL or SIGABRT */
1787 : : (errmsg("issuing %s to recalcitrant children",
1788 : : send_abort_for_kill ? "SIGABRT" : "SIGKILL")));
510 1789 [ # # ]: 0 : TerminateChildren(send_abort_for_kill ? SIGABRT : SIGKILL);
1790 : : /* reset flag so we don't SIGKILL again */
3844 alvherre@alvh.no-ip. 1791 : 0 : AbortStartTime = 0;
1792 : : }
1793 : :
1794 : : /*
1795 : : * Once a minute, verify that postmaster.pid hasn't been removed or
1796 : : * overwritten. If it has, we force a shutdown. This avoids having
1797 : : * postmasters and child processes hanging around after their database
1798 : : * is gone, and maybe causing problems if a new database cluster is
1799 : : * created in the same place. It also provides some protection
1800 : : * against a DBA foolishly removing postmaster.pid and manually
1801 : : * starting a new postmaster. Data corruption is likely to ensue from
1802 : : * that anyway, but we can minimize the damage by aborting ASAP.
1803 : : */
3113 tgl@sss.pgh.pa.us 1804 [ + + ]:CBC 122881 : if (now - last_lockfile_recheck_time >= 1 * SECS_PER_MINUTE)
1805 : : {
1806 [ - + ]: 12 : if (!RecheckDataDirLockFile())
1807 : : {
3113 tgl@sss.pgh.pa.us 1808 [ # # ]:UBC 0 : ereport(LOG,
1809 : : (errmsg("performing immediate shutdown because data directory lock file is invalid")));
1810 : 0 : kill(MyProcPid, SIGQUIT);
1811 : : }
3113 tgl@sss.pgh.pa.us 1812 :CBC 12 : last_lockfile_recheck_time = now;
1813 : : }
1814 : :
1815 : : /*
1816 : : * Touch Unix socket and lock files every 58 minutes, to ensure that
1817 : : * they are not removed by overzealous /tmp-cleaning tasks. We assume
1818 : : * no one runs cleaners with cutoff times of less than an hour ...
1819 : : */
1820 [ - + ]: 122881 : if (now - last_touch_time >= 58 * SECS_PER_MINUTE)
1821 : : {
3113 tgl@sss.pgh.pa.us 1822 :UBC 0 : TouchSocketFiles();
1823 : 0 : TouchSocketLockFiles();
1824 : 0 : last_touch_time = now;
1825 : : }
1826 : : }
1827 : : }
1828 : :
1829 : : /*
1830 : : * The client has sent a cancel request packet, not a normal
1831 : : * start-a-new-connection packet. Perform the necessary processing.
1832 : : * Nothing is sent back to the client.
1833 : : */
1834 : : void
27 heikki.linnakangas@i 1835 :GNC 10 : processCancelRequest(int backendPID, int32 cancelAuthCode)
1836 : : {
1837 : : Backend *bp;
1838 : :
1839 : : #ifndef EXEC_BACKEND
1840 : : dlist_iter iter;
1841 : : #else
1842 : : int i;
1843 : : #endif
1844 : :
1845 : : /*
1846 : : * See if we have a matching backend. In the EXEC_BACKEND case, we can no
1847 : : * longer access the postmaster's own backend list, and must rely on the
1848 : : * duplicate array in shared memory.
1849 : : */
1850 : : #ifndef EXEC_BACKEND
4198 alvherre@alvh.no-ip. 1851 [ + - + - ]:CBC 18 : dlist_foreach(iter, &BackendList)
1852 : : {
1853 : 18 : bp = dlist_container(Backend, elem, iter.cur);
1854 : : #else
1855 : : for (i = MaxLivePostmasterChildren() - 1; i >= 0; i--)
1856 : : {
1857 : : bp = (Backend *) &ShmemBackendArray[i];
1858 : : #endif
9411 scrappy@hub.org 1859 [ + + ]: 18 : if (bp->pid == backendPID)
1860 : : {
1861 [ + - ]: 10 : if (bp->cancel_key == cancelAuthCode)
1862 : : {
1863 : : /* Found a match; signal that backend to cancel current op */
7551 tgl@sss.pgh.pa.us 1864 [ - + ]: 10 : ereport(DEBUG2,
1865 : : (errmsg_internal("processing cancel request: sending SIGINT to process %d",
1866 : : backendPID)));
6354 1867 : 10 : signal_child(bp->pid, SIGINT);
1868 : : }
1869 : : else
1870 : : /* Right PID, wrong key: no way, Jose */
6005 neilc@samurai.com 1871 [ # # ]:UBC 0 : ereport(LOG,
1872 : : (errmsg("wrong key in cancel request for process %d",
1873 : : backendPID)));
8334 peter_e@gmx.net 1874 :CBC 10 : return;
1875 : : }
1876 : : #ifndef EXEC_BACKEND /* make GNU Emacs 26.1 see brace balance */
1877 : : }
1878 : : #else
1879 : : }
1880 : : #endif
1881 : :
1882 : : /* No matching backend */
6005 neilc@samurai.com 1883 [ # # ]:UBC 0 : ereport(LOG,
1884 : : (errmsg("PID %d in cancel request did not match any process",
1885 : : backendPID)));
1886 : : }
1887 : :
1888 : : /*
1889 : : * canAcceptConnections --- check to see if database state allows connections
1890 : : * of the specified type. backend_type can be BACKEND_TYPE_NORMAL,
1891 : : * BACKEND_TYPE_AUTOVAC, or BACKEND_TYPE_BGWORKER. (Note that we don't yet
1892 : : * know whether a NORMAL connection might turn into a walsender.)
1893 : : */
1894 : : static CAC_state
1651 tgl@sss.pgh.pa.us 1895 :CBC 14748 : canAcceptConnections(int backend_type)
1896 : : {
4900 1897 : 14748 : CAC_state result = CAC_OK;
1898 : :
1899 : : /*
1900 : : * Can't start backends when in startup/shutdown/inconsistent recovery
1901 : : * state. We treat autovac workers the same as user backends for this
1902 : : * purpose. However, bgworkers are excluded from this test; we expect
1903 : : * bgworker_should_start_now() decided whether the DB state allows them.
1904 : : */
1339 1905 [ + + + + : 14748 : if (pmState != PM_RUN && pmState != PM_HOT_STANDBY &&
+ - ]
1906 : : backend_type != BACKEND_TYPE_BGWORKER)
1907 : : {
1908 [ + + ]: 123 : if (Shutdown > NoShutdown)
5995 bruce@momjian.us 1909 : 14 : return CAC_SHUTDOWN; /* shutdown is pending */
1116 fujii@postgresql.org 1910 [ + + + + ]: 109 : else if (!FatalError && pmState == PM_STARTUP)
4753 bruce@momjian.us 1911 : 104 : return CAC_STARTUP; /* normal startup */
1116 fujii@postgresql.org 1912 [ + + + - ]: 5 : else if (!FatalError && pmState == PM_RECOVERY)
1913 : 1 : return CAC_NOTCONSISTENT; /* not yet at consistent recovery
1914 : : * state */
1915 : : else
4900 tgl@sss.pgh.pa.us 1916 : 4 : return CAC_RECOVERY; /* else must be crash recovery */
1917 : : }
1918 : :
1919 : : /*
1920 : : * "Smart shutdown" restrictions are applied only to normal connections,
1921 : : * not to autovac workers or bgworkers.
1922 : : */
739 sfrost@snowman.net 1923 [ - + - - ]: 14625 : if (!connsAllowed && backend_type == BACKEND_TYPE_NORMAL)
739 sfrost@snowman.net 1924 :UBC 0 : return CAC_SHUTDOWN; /* shutdown is pending */
1925 : :
1926 : : /*
1927 : : * Don't start too many children.
1928 : : *
1929 : : * We allow more connections here than we can have backends because some
1930 : : * might still be authenticating; they might fail auth, or some existing
1931 : : * backend might exit before the auth cycle is completed. The exact
1932 : : * MaxBackends limit is enforced when a new backend tries to join the
1933 : : * shared-inval backend array.
1934 : : *
1935 : : * The limit here must match the sizes of the per-child-process arrays;
1936 : : * see comments for MaxLivePostmasterChildren().
1937 : : */
5203 heikki.linnakangas@i 1938 [ - + ]:CBC 14625 : if (CountChildren(BACKEND_TYPE_ALL) >= MaxLivePostmasterChildren())
4900 tgl@sss.pgh.pa.us 1939 :UBC 0 : result = CAC_TOOMANY;
1940 : :
4900 tgl@sss.pgh.pa.us 1941 :CBC 14625 : return result;
1942 : : }
1943 : :
1944 : : /*
1945 : : * ClosePostmasterPorts -- close all the postmaster's open sockets
1946 : : *
1947 : : * This is called during child process startup to release file descriptors
1948 : : * that are not needed by that child process. The postmaster still has
1949 : : * them open, of course.
1950 : : *
1951 : : * Note: we pass am_syslogger as a boolean because we don't want to set
1952 : : * the global variable yet when this is called.
1953 : : */
1954 : : void
7192 1955 : 19693 : ClosePostmasterPorts(bool am_syslogger)
1956 : : {
1957 : : /* Release resources held by the postmaster's WaitEventSet. */
458 tmunro@postgresql.or 1958 [ + + ]: 19693 : if (pm_wait_set)
1959 : : {
1960 : 17499 : FreeWaitEventSetAfterFork(pm_wait_set);
1961 : 17499 : pm_wait_set = NULL;
1962 : : }
1963 : :
1964 : : #ifndef WIN32
1965 : :
1966 : : /*
1967 : : * Close the write end of postmaster death watch pipe. It's important to
1968 : : * do this as early as possible, so that if postmaster dies, others won't
1969 : : * think that it's still running because we're holding the pipe open.
1970 : : */
1744 peter@eisentraut.org 1971 [ - + ]: 19693 : if (close(postmaster_alive_fds[POSTMASTER_FD_OWN]) != 0)
4664 heikki.linnakangas@i 1972 [ # # ]:UBC 0 : ereport(FATAL,
1973 : : (errcode_for_file_access(),
1974 : : errmsg_internal("could not close postmaster death monitoring pipe in child process: %m")));
4664 heikki.linnakangas@i 1975 :CBC 19693 : postmaster_alive_fds[POSTMASTER_FD_OWN] = -1;
1976 : : /* Notify fd.c that we released one pipe FD. */
1511 tgl@sss.pgh.pa.us 1977 : 19693 : ReleaseExternalFD();
1978 : : #endif
1979 : :
1980 : : /*
1981 : : * Close the postmaster's listen sockets. These aren't tracked by fd.c,
1982 : : * so we don't call ReleaseExternalFD() here.
1983 : : *
1984 : : * The listen sockets are marked as FD_CLOEXEC, so this isn't needed in
1985 : : * EXEC_BACKEND mode.
1986 : : */
1987 : : #ifndef EXEC_BACKEND
191 heikki.linnakangas@i 1988 [ + + ]:GNC 19693 : if (ListenSockets)
1989 : : {
1990 [ + + ]: 40207 : for (int i = 0; i < NumListenSockets; i++)
1991 : : {
33 1992 [ - + ]: 20515 : if (closesocket(ListenSockets[i]) != 0)
33 heikki.linnakangas@i 1993 [ # # ]:UNC 0 : elog(LOG, "could not close listen socket: %m");
1994 : : }
191 heikki.linnakangas@i 1995 :GNC 19692 : pfree(ListenSockets);
1996 : : }
192 1997 : 19693 : NumListenSockets = 0;
1998 : 19693 : ListenSockets = NULL;
1999 : : #endif
2000 : :
2001 : : /*
2002 : : * If using syslogger, close the read side of the pipe. We don't bother
2003 : : * tracking this in fd.c, either.
2004 : : */
7192 tgl@sss.pgh.pa.us 2005 [ + + ]:CBC 19693 : if (!am_syslogger)
2006 : : {
2007 : : #ifndef WIN32
2008 [ + + ]: 19692 : if (syslogPipe[0] >= 0)
2009 : 14 : close(syslogPipe[0]);
2010 : 19692 : syslogPipe[0] = -1;
2011 : : #else
2012 : : if (syslogPipe[0])
2013 : : CloseHandle(syslogPipe[0]);
2014 : : syslogPipe[0] = 0;
2015 : : #endif
2016 : : }
2017 : :
2018 : : #ifdef USE_BONJOUR
2019 : : /* If using Bonjour, close the connection to the mDNS daemon */
2020 : : if (bonjour_sdref)
2021 : : close(DNSServiceRefSockFD(bonjour_sdref));
2022 : : #endif
8466 2023 : 19693 : }
2024 : :
2025 : :
2026 : : /*
2027 : : * InitProcessGlobals -- set MyProcPid, MyStartTime[stamp], random seeds
2028 : : *
2029 : : * Called early in the postmaster and every backend.
2030 : : */
2031 : : void
2004 tmunro@postgresql.or 2032 : 20621 : InitProcessGlobals(void)
2033 : : {
2034 : 20621 : MyProcPid = getpid();
2035 : 20621 : MyStartTimestamp = GetCurrentTimestamp();
2036 : 20621 : MyStartTime = timestamptz_to_time_t(MyStartTimestamp);
2037 : :
2038 : : /*
2039 : : * Set a different global seed in every process. We want something
2040 : : * unpredictable, so if possible, use high-quality random bits for the
2041 : : * seed. Otherwise, fall back to a seed based on timestamp and PID.
2042 : : */
868 tgl@sss.pgh.pa.us 2043 [ + - - + ]: 20621 : if (unlikely(!pg_prng_strong_seed(&pg_global_prng_state)))
2044 : : {
2045 : : uint64 rseed;
2046 : :
2047 : : /*
2048 : : * Since PIDs and timestamps tend to change more frequently in their
2049 : : * least significant bits, shift the timestamp left to allow a larger
2050 : : * total number of seeds in a given time period. Since that would
2051 : : * leave only 20 bits of the timestamp that cycle every ~1 second,
2052 : : * also mix in some higher bits.
2053 : : */
1933 tgl@sss.pgh.pa.us 2054 :UBC 0 : rseed = ((uint64) MyProcPid) ^
1977 tmunro@postgresql.or 2055 : 0 : ((uint64) MyStartTimestamp << 12) ^
1933 tgl@sss.pgh.pa.us 2056 : 0 : ((uint64) MyStartTimestamp >> 20);
2057 : :
868 2058 : 0 : pg_prng_seed(&pg_global_prng_state, rseed);
2059 : : }
2060 : :
2061 : : /*
2062 : : * Also make sure that we've set a good seed for random(3). Use of that
2063 : : * is deprecated in core Postgres, but extensions might use it.
2064 : : */
2065 : : #ifndef WIN32
868 tgl@sss.pgh.pa.us 2066 :CBC 20621 : srandom(pg_prng_uint32(&pg_global_prng_state));
2067 : : #endif
2004 tmunro@postgresql.or 2068 : 20621 : }
2069 : :
2070 : : /*
2071 : : * Child processes use SIGUSR1 to notify us of 'pmsignals'. pg_ctl uses
2072 : : * SIGUSR1 to ask postmaster to check for logrotate and promote files.
2073 : : */
2074 : : static void
458 2075 : 91987 : handle_pm_pmsignal_signal(SIGNAL_ARGS)
2076 : : {
2077 : 91987 : pending_pm_pmsignal = true;
2078 : 91987 : SetLatch(MyLatch);
2079 : 91987 : }
2080 : :
2081 : : /*
2082 : : * pg_ctl uses SIGHUP to request a reload of the configuration files.
2083 : : */
2084 : : static void
2085 : 127 : handle_pm_reload_request_signal(SIGNAL_ARGS)
2086 : : {
2087 : 127 : pending_pm_reload_request = true;
2088 : 127 : SetLatch(MyLatch);
2089 : 127 : }
2090 : :
2091 : : /*
2092 : : * Re-read config files, and tell children to do same.
2093 : : */
2094 : : static void
2095 : 127 : process_pm_reload_request(void)
2096 : : {
2097 : 127 : pending_pm_reload_request = false;
2098 : :
2099 [ + + ]: 127 : ereport(DEBUG2,
2100 : : (errmsg_internal("postmaster received reload request signal")));
2101 : :
8340 tgl@sss.pgh.pa.us 2102 [ + - ]: 127 : if (Shutdown <= SmartShutdown)
2103 : : {
7572 2104 [ + - ]: 127 : ereport(LOG,
2105 : : (errmsg("received SIGHUP, reloading configuration files")));
8197 2106 : 127 : ProcessConfigFile(PGC_SIGHUP);
7647 bruce@momjian.us 2107 : 127 : SignalChildren(SIGHUP);
5520 heikki.linnakangas@i 2108 [ + + ]: 127 : if (StartupPID != 0)
2109 : 27 : signal_child(StartupPID, SIGHUP);
7260 tgl@sss.pgh.pa.us 2110 [ + - ]: 127 : if (BgWriterPID != 0)
6354 2111 : 127 : signal_child(BgWriterPID, SIGHUP);
4548 simon@2ndQuadrant.co 2112 [ + - ]: 127 : if (CheckpointerPID != 0)
2113 : 127 : signal_child(CheckpointerPID, SIGHUP);
6109 tgl@sss.pgh.pa.us 2114 [ + + ]: 127 : if (WalWriterPID != 0)
2115 : 100 : signal_child(WalWriterPID, SIGHUP);
5203 heikki.linnakangas@i 2116 [ + + ]: 127 : if (WalReceiverPID != 0)
2117 : 24 : signal_child(WalReceiverPID, SIGHUP);
116 rhaas@postgresql.org 2118 [ - + ]:GNC 127 : if (WalSummarizerPID != 0)
116 rhaas@postgresql.org 2119 :UNC 0 : signal_child(WalSummarizerPID, SIGHUP);
6849 tgl@sss.pgh.pa.us 2120 [ + + ]:CBC 127 : if (AutoVacPID != 0)
6354 2121 : 91 : signal_child(AutoVacPID, SIGHUP);
7209 2122 [ + + ]: 127 : if (PgArchPID != 0)
6354 2123 : 4 : signal_child(PgArchPID, SIGHUP);
7192 2124 [ - + ]: 127 : if (SysLoggerPID != 0)
6354 tgl@sss.pgh.pa.us 2125 :UBC 0 : signal_child(SysLoggerPID, SIGHUP);
52 akapila@postgresql.o 2126 [ + + ]:GNC 127 : if (SlotSyncWorkerPID != 0)
2127 : 1 : signal_child(SlotSyncWorkerPID, SIGHUP);
2128 : :
2129 : : /* Reload authentication config files too */
5690 magnus@hagander.net 2130 [ - + ]:CBC 127 : if (!load_hba())
2659 tgl@sss.pgh.pa.us 2131 [ # # ]:UBC 0 : ereport(LOG,
2132 : : /* translator: %s is a configuration file */
2133 : : (errmsg("%s was not reloaded", HbaFileName)));
2134 : :
4223 heikki.linnakangas@i 2135 [ - + ]:CBC 127 : if (!load_ident())
2659 tgl@sss.pgh.pa.us 2136 [ # # ]:UBC 0 : ereport(LOG,
2137 : : (errmsg("%s was not reloaded", IdentFileName)));
2138 : :
2139 : : #ifdef USE_SSL
2140 : : /* Reload SSL configuration as well */
2659 tgl@sss.pgh.pa.us 2141 [ + + ]:CBC 127 : if (EnableSSL)
2142 : : {
2659 tgl@sss.pgh.pa.us 2143 [ + - ]:GBC 2 : if (secure_initialize(false) == 0)
2144 : 2 : LoadedSSL = true;
2145 : : else
2659 tgl@sss.pgh.pa.us 2146 [ # # ]:UBC 0 : ereport(LOG,
2147 : : (errmsg("SSL configuration was not reloaded")));
2148 : : }
2149 : : else
2150 : : {
2659 tgl@sss.pgh.pa.us 2151 :CBC 125 : secure_destroy();
2152 : 125 : LoadedSSL = false;
2153 : : }
2154 : : #endif
2155 : :
2156 : : #ifdef EXEC_BACKEND
2157 : : /* Update the starting-point file for future children */
2158 : : write_nondefault_variables(PGC_SIGHUP);
2159 : : #endif
2160 : : }
458 tmunro@postgresql.or 2161 : 127 : }
2162 : :
2163 : : /*
2164 : : * pg_ctl uses SIGTERM, SIGINT and SIGQUIT to request different types of
2165 : : * shutdown.
2166 : : */
2167 : : static void
2168 : 725 : handle_pm_shutdown_request_signal(SIGNAL_ARGS)
2169 : : {
2170 [ + + + - ]: 725 : switch (postgres_signal_arg)
2171 : : {
2172 : 17 : case SIGTERM:
2173 : : /* smart is implied if the other two flags aren't set */
2174 : 17 : pending_pm_shutdown_request = true;
2175 : 17 : break;
2176 : 409 : case SIGINT:
2177 : 409 : pending_pm_fast_shutdown_request = true;
2178 : 409 : pending_pm_shutdown_request = true;
2179 : 409 : break;
2180 : 299 : case SIGQUIT:
2181 : 299 : pending_pm_immediate_shutdown_request = true;
2182 : 299 : pending_pm_shutdown_request = true;
2183 : 299 : break;
2184 : : }
2185 : 725 : SetLatch(MyLatch);
8719 peter_e@gmx.net 2186 : 725 : }
2187 : :
2188 : : /*
2189 : : * Process shutdown request.
2190 : : */
2191 : : static void
458 tmunro@postgresql.or 2192 : 725 : process_pm_shutdown_request(void)
2193 : : {
2194 : : int mode;
2195 : :
7551 tgl@sss.pgh.pa.us 2196 [ + + ]: 725 : ereport(DEBUG2,
2197 : : (errmsg_internal("postmaster received shutdown request signal")));
2198 : :
458 tmunro@postgresql.or 2199 : 725 : pending_pm_shutdown_request = false;
2200 : :
2201 : : /*
2202 : : * If more than one shutdown request signal arrived since the last server
2203 : : * loop, take the one that is the most immediate. That matches the
2204 : : * priority that would apply if we processed them one by one in any order.
2205 : : */
2206 [ + + ]: 725 : if (pending_pm_immediate_shutdown_request)
2207 : : {
2208 : 299 : pending_pm_immediate_shutdown_request = false;
2209 : 299 : pending_pm_fast_shutdown_request = false;
2210 : 299 : mode = ImmediateShutdown;
2211 : : }
2212 [ + + ]: 426 : else if (pending_pm_fast_shutdown_request)
2213 : : {
2214 : 409 : pending_pm_fast_shutdown_request = false;
2215 : 409 : mode = FastShutdown;
2216 : : }
2217 : : else
2218 : 17 : mode = SmartShutdown;
2219 : :
2220 [ + + + - ]: 725 : switch (mode)
2221 : : {
2222 : 17 : case SmartShutdown:
2223 : :
2224 : : /*
2225 : : * Smart Shutdown:
2226 : : *
2227 : : * Wait for children to end their work, then shut down.
2228 : : */
8957 vadim4o@yahoo.com 2229 [ - + ]: 17 : if (Shutdown >= SmartShutdown)
8197 tgl@sss.pgh.pa.us 2230 :UBC 0 : break;
8957 vadim4o@yahoo.com 2231 :CBC 17 : Shutdown = SmartShutdown;
7572 tgl@sss.pgh.pa.us 2232 [ + - ]: 17 : ereport(LOG,
2233 : : (errmsg("received smart shutdown request")));
2234 : :
2235 : : /* Report status */
2482 2236 : 17 : AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STOPPING);
2237 : : #ifdef USE_SYSTEMD
3071 peter_e@gmx.net 2238 : 17 : sd_notify(0, "STOPPING=1");
2239 : : #endif
2240 : :
2241 : : /*
2242 : : * If we reached normal running, we go straight to waiting for
2243 : : * client backends to exit. If already in PM_STOP_BACKENDS or a
2244 : : * later state, do not change it.
2245 : : */
739 sfrost@snowman.net 2246 [ - + - - ]: 17 : if (pmState == PM_RUN || pmState == PM_HOT_STANDBY)
2247 : 17 : connsAllowed = false;
1339 tgl@sss.pgh.pa.us 2248 [ # # # # ]:UBC 0 : else if (pmState == PM_STARTUP || pmState == PM_RECOVERY)
2249 : : {
2250 : : /* There should be no clients, so proceed to stop children */
2251 : 0 : pmState = PM_STOP_BACKENDS;
2252 : : }
2253 : :
2254 : : /*
2255 : : * Now wait for online backup mode to end and backends to exit. If
2256 : : * that is already the case, PostmasterStateMachine will take the
2257 : : * next step.
2258 : : */
6093 tgl@sss.pgh.pa.us 2259 :CBC 17 : PostmasterStateMachine();
8197 2260 : 17 : break;
2261 : :
458 tmunro@postgresql.or 2262 : 409 : case FastShutdown:
2263 : :
2264 : : /*
2265 : : * Fast Shutdown:
2266 : : *
2267 : : * Abort all children with SIGTERM (rollback active transactions
2268 : : * and exit) and shut down when they are gone.
2269 : : */
8957 vadim4o@yahoo.com 2270 [ - + ]: 409 : if (Shutdown >= FastShutdown)
8197 tgl@sss.pgh.pa.us 2271 :UBC 0 : break;
7368 tgl@sss.pgh.pa.us 2272 :CBC 409 : Shutdown = FastShutdown;
7572 2273 [ + - ]: 409 : ereport(LOG,
2274 : : (errmsg("received fast shutdown request")));
2275 : :
2276 : : /* Report status */
2482 2277 : 409 : AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STOPPING);
2278 : : #ifdef USE_SYSTEMD
3071 peter_e@gmx.net 2279 : 409 : sd_notify(0, "STOPPING=1");
2280 : : #endif
2281 : :
2055 michael@paquier.xyz 2282 [ + - - + ]: 409 : if (pmState == PM_STARTUP || pmState == PM_RECOVERY)
2283 : : {
2284 : : /* Just shut down background processes silently */
1339 tgl@sss.pgh.pa.us 2285 :UBC 0 : pmState = PM_STOP_BACKENDS;
2286 : : }
5043 rhaas@postgresql.org 2287 [ + + ]:CBC 409 : else if (pmState == PM_RUN ||
2288 [ + - ]: 47 : pmState == PM_HOT_STANDBY)
2289 : : {
2290 : : /* Report that we're about to zap live client sessions */
6093 tgl@sss.pgh.pa.us 2291 [ + - ]: 409 : ereport(LOG,
2292 : : (errmsg("aborting any active transactions")));
1339 2293 : 409 : pmState = PM_STOP_BACKENDS;
2294 : : }
2295 : :
2296 : : /*
2297 : : * PostmasterStateMachine will issue any necessary signals, or
2298 : : * take the next step if no child processes need to be killed.
2299 : : */
6093 2300 : 409 : PostmasterStateMachine();
8197 2301 : 409 : break;
2302 : :
458 tmunro@postgresql.or 2303 : 299 : case ImmediateShutdown:
2304 : :
2305 : : /*
2306 : : * Immediate Shutdown:
2307 : : *
2308 : : * abort all children with SIGQUIT, wait for them to exit,
2309 : : * terminate remaining ones with SIGKILL, then exit without
2310 : : * attempt to properly shut down the data base system.
2311 : : */
3943 alvherre@alvh.no-ip. 2312 [ - + ]: 299 : if (Shutdown >= ImmediateShutdown)
3943 alvherre@alvh.no-ip. 2313 :UBC 0 : break;
3943 alvherre@alvh.no-ip. 2314 :CBC 299 : Shutdown = ImmediateShutdown;
7572 tgl@sss.pgh.pa.us 2315 [ + - ]: 299 : ereport(LOG,
2316 : : (errmsg("received immediate shutdown request")));
2317 : :
2318 : : /* Report status */
2482 2319 : 299 : AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STOPPING);
2320 : : #ifdef USE_SYSTEMD
3071 peter_e@gmx.net 2321 : 299 : sd_notify(0, "STOPPING=1");
2322 : : #endif
2323 : :
2324 : : /* tell children to shut down ASAP */
2325 : : /* (note we don't apply send_abort_for_crash here) */
1207 tgl@sss.pgh.pa.us 2326 : 299 : SetQuitSignalReason(PMQUIT_FOR_STOP);
3943 alvherre@alvh.no-ip. 2327 : 299 : TerminateChildren(SIGQUIT);
2328 : 299 : pmState = PM_WAIT_BACKENDS;
2329 : :
2330 : : /* set stopwatch for them to die */
2331 : 299 : AbortStartTime = time(NULL);
2332 : :
2333 : : /*
2334 : : * Now wait for backends to exit. If there are none,
2335 : : * PostmasterStateMachine will take the next step.
2336 : : */
2337 : 299 : PostmasterStateMachine();
8957 vadim4o@yahoo.com 2338 : 299 : break;
2339 : : }
458 tmunro@postgresql.or 2340 : 725 : }
2341 : :
2342 : : static void
2343 : 19543 : handle_pm_child_exit_signal(SIGNAL_ARGS)
2344 : : {
2345 : 19543 : pending_pm_child_exit = true;
2346 : 19543 : SetLatch(MyLatch);
10141 scrappy@hub.org 2347 : 19543 : }
2348 : :
2349 : : /*
2350 : : * Cleanup after a child process dies.
2351 : : */
2352 : : static void
458 tmunro@postgresql.or 2353 : 19485 : process_pm_child_exit(void)
2354 : : {
2355 : : int pid; /* process id of dead child process */
2356 : : int exitstatus; /* its exit status */
2357 : :
2358 : 19485 : pending_pm_child_exit = false;
2359 : :
7551 tgl@sss.pgh.pa.us 2360 [ - + ]: 19485 : ereport(DEBUG4,
2361 : : (errmsg_internal("reaping dead processes")));
2362 : :
4301 2363 [ + + ]: 39318 : while ((pid = waitpid(-1, &exitstatus, WNOHANG)) > 0)
2364 : : {
2365 : : /*
2366 : : * Check if this child was a startup process.
2367 : : */
6093 2368 [ + + ]: 19833 : if (pid == StartupPID)
2369 : : {
7260 2370 : 733 : StartupPID = 0;
2371 : :
2372 : : /*
2373 : : * Startup process exited in response to a shutdown request (or it
2374 : : * completed normally regardless of the shutdown request).
2375 : : */
4162 2376 [ + + ]: 733 : if (Shutdown > NoShutdown &&
2377 [ + - + - : 91 : (EXIT_STATUS_0(exitstatus) || EXIT_STATUS_1(exitstatus)))
+ + ]
2378 : : {
3202 2379 : 47 : StartupStatus = STARTUP_NOT_RUNNING;
4162 2380 : 47 : pmState = PM_WAIT_BACKENDS;
2381 : : /* PostmasterStateMachine logic does the rest */
3428 simon@2ndQuadrant.co 2382 : 47 : continue;
2383 : : }
2384 : :
2385 [ + - - + ]: 686 : if (EXIT_STATUS_3(exitstatus))
2386 : : {
3428 simon@2ndQuadrant.co 2387 [ # # ]:UBC 0 : ereport(LOG,
2388 : : (errmsg("shutdown at recovery target")));
3202 tgl@sss.pgh.pa.us 2389 : 0 : StartupStatus = STARTUP_NOT_RUNNING;
1339 2390 : 0 : Shutdown = Max(Shutdown, SmartShutdown);
3428 simon@2ndQuadrant.co 2391 : 0 : TerminateChildren(SIGTERM);
2392 : 0 : pmState = PM_WAIT_BACKENDS;
2393 : : /* PostmasterStateMachine logic does the rest */
4162 tgl@sss.pgh.pa.us 2394 : 0 : continue;
2395 : : }
2396 : :
2397 : : /*
2398 : : * Unexpected exit of startup process (including FATAL exit)
2399 : : * during PM_STARTUP is treated as catastrophic. There are no
2400 : : * other processes running yet, so we can just exit.
2401 : : */
1693 tgl@sss.pgh.pa.us 2402 [ + + ]:CBC 686 : if (pmState == PM_STARTUP &&
2403 [ + - ]: 501 : StartupStatus != STARTUP_SIGNALED &&
2404 [ - + ]: 501 : !EXIT_STATUS_0(exitstatus))
2405 : : {
6991 bruce@momjian.us 2406 :UBC 0 : LogChildExit(LOG, _("startup process"),
2407 : : pid, exitstatus);
7572 tgl@sss.pgh.pa.us 2408 [ # # ]: 0 : ereport(LOG,
2409 : : (errmsg("aborting startup due to startup process failure")));
8537 2410 : 0 : ExitPostmaster(1);
2411 : : }
2412 : :
2413 : : /*
2414 : : * After PM_STARTUP, any unexpected exit (including FATAL exit) of
2415 : : * the startup process is catastrophic, so kill other children,
2416 : : * and set StartupStatus so we don't try to reinitialize after
2417 : : * they're gone. Exception: if StartupStatus is STARTUP_SIGNALED,
2418 : : * then we previously sent the startup process a SIGQUIT; so
2419 : : * that's probably the reason it died, and we do want to try to
2420 : : * restart in that case.
2421 : : *
2422 : : * This stanza also handles the case where we sent a SIGQUIT
2423 : : * during PM_STARTUP due to some dead_end child crashing: in that
2424 : : * situation, if the startup process dies on the SIGQUIT, we need
2425 : : * to transition to PM_WAIT_BACKENDS state which will allow
2426 : : * PostmasterStateMachine to restart the startup process. (On the
2427 : : * other hand, the startup process might complete normally, if we
2428 : : * were too late with the SIGQUIT. In that case we'll fall
2429 : : * through and commence normal operations.)
2430 : : */
5534 heikki.linnakangas@i 2431 [ + + ]:CBC 686 : if (!EXIT_STATUS_0(exitstatus))
2432 : : {
3202 tgl@sss.pgh.pa.us 2433 [ + + ]: 47 : if (StartupStatus == STARTUP_SIGNALED)
2434 : : {
2435 : 44 : StartupStatus = STARTUP_NOT_RUNNING;
1693 2436 [ - + ]: 44 : if (pmState == PM_STARTUP)
1693 tgl@sss.pgh.pa.us 2437 :UBC 0 : pmState = PM_WAIT_BACKENDS;
2438 : : }
2439 : : else
3202 tgl@sss.pgh.pa.us 2440 :CBC 3 : StartupStatus = STARTUP_CRASHED;
5534 heikki.linnakangas@i 2441 : 47 : HandleChildCrash(pid, exitstatus,
2442 : 47 : _("startup process"));
6093 tgl@sss.pgh.pa.us 2443 : 47 : continue;
2444 : : }
2445 : :
2446 : : /*
2447 : : * Startup succeeded, commence normal operations
2448 : : */
3202 2449 : 639 : StartupStatus = STARTUP_NOT_RUNNING;
5529 heikki.linnakangas@i 2450 : 639 : FatalError = false;
1693 tgl@sss.pgh.pa.us 2451 : 639 : AbortStartTime = 0;
5072 rhaas@postgresql.org 2452 : 639 : ReachedNormalRunning = true;
5529 heikki.linnakangas@i 2453 : 639 : pmState = PM_RUN;
739 sfrost@snowman.net 2454 : 639 : connsAllowed = true;
2455 : :
2456 : : /*
2457 : : * Crank up the background tasks, if we didn't do that already
2458 : : * when we entered consistent recovery state. It doesn't matter
2459 : : * if this fails, we'll just try again later.
2460 : : */
4548 simon@2ndQuadrant.co 2461 [ - + ]: 639 : if (CheckpointerPID == 0)
41 heikki.linnakangas@i 2462 :UNC 0 : CheckpointerPID = StartChildProcess(B_CHECKPOINTER);
4335 simon@2ndQuadrant.co 2463 [ - + ]:CBC 639 : if (BgWriterPID == 0)
41 heikki.linnakangas@i 2464 :UNC 0 : BgWriterPID = StartChildProcess(B_BG_WRITER);
4356 tgl@sss.pgh.pa.us 2465 [ + - ]:CBC 639 : if (WalWriterPID == 0)
41 heikki.linnakangas@i 2466 :GNC 639 : WalWriterPID = StartChildProcess(B_WAL_WRITER);
116 rhaas@postgresql.org 2467 : 639 : MaybeStartWalSummarizer();
2468 : :
2469 : : /*
2470 : : * Likewise, start other special children as needed. In a restart
2471 : : * situation, some of them may be alive already.
2472 : : */
4738 bruce@momjian.us 2473 [ + + + + :CBC 639 : if (!IsBinaryUpgrade && AutoVacuumingActive() && AutoVacPID == 0)
+ - ]
27 heikki.linnakangas@i 2474 :GNC 558 : AutoVacPID = StartChildProcess(B_AUTOVAC_LAUNCHER);
3229 fujii@postgresql.org 2475 [ + + - + :CBC 639 : if (PgArchStartupAllowed() && PgArchPID == 0)
+ + - + -
+ - - - +
- - - - +
- + + ]
41 heikki.linnakangas@i 2476 :GNC 42 : PgArchPID = StartChildProcess(B_ARCHIVER);
52 akapila@postgresql.o 2477 : 639 : MaybeStartSlotSyncWorker();
2478 : :
2479 : : /* workers may be scheduled to start now */
2545 tgl@sss.pgh.pa.us 2480 :CBC 639 : maybe_start_bgworkers();
2481 : :
2482 : : /* at this point we are really open for business */
5529 heikki.linnakangas@i 2483 [ + - ]: 639 : ereport(LOG,
2484 : : (errmsg("database system is ready to accept connections")));
2485 : :
2486 : : /* Report status */
2482 tgl@sss.pgh.pa.us 2487 : 639 : AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_READY);
2488 : : #ifdef USE_SYSTEMD
3071 peter_e@gmx.net 2489 : 639 : sd_notify(0, "READY=1");
2490 : : #endif
2491 : :
5527 heikki.linnakangas@i 2492 : 639 : continue;
2493 : : }
2494 : :
2495 : : /*
2496 : : * Was it the bgwriter? Normal exit can be ignored; we'll start a new
2497 : : * one at the next iteration of the postmaster's main loop, if
2498 : : * necessary. Any other exit condition is treated as a crash.
2499 : : */
6093 tgl@sss.pgh.pa.us 2500 [ + + ]: 19100 : if (pid == BgWriterPID)
2501 : : {
7244 2502 : 733 : BgWriterPID = 0;
4548 simon@2ndQuadrant.co 2503 [ + + ]: 733 : if (!EXIT_STATUS_0(exitstatus))
2504 : 307 : HandleChildCrash(pid, exitstatus,
2505 : 307 : _("background writer process"));
2506 : 733 : continue;
2507 : : }
2508 : :
2509 : : /*
2510 : : * Was it the checkpointer?
2511 : : */
2512 [ + + ]: 18367 : if (pid == CheckpointerPID)
2513 : : {
2514 : 733 : CheckpointerPID = 0;
6093 tgl@sss.pgh.pa.us 2515 [ + + + - ]: 733 : if (EXIT_STATUS_0(exitstatus) && pmState == PM_SHUTDOWN)
2516 : : {
2517 : : /*
2518 : : * OK, we saw normal exit of the checkpointer after it's been
2519 : : * told to shut down. We expect that it wrote a shutdown
2520 : : * checkpoint. (If for some reason it didn't, recovery will
2521 : : * occur on next postmaster start.)
2522 : : *
2523 : : * At this point we should have no normal backend children
2524 : : * left (else we'd not be in PM_SHUTDOWN state) but we might
2525 : : * have dead_end children to wait for.
2526 : : *
2527 : : * If we have an archiver subprocess, tell it to do a last
2528 : : * archive cycle and quit. Likewise, if we have walsender
2529 : : * processes, tell them to send any remaining WAL and quit.
2530 : : */
2531 [ - + ]: 426 : Assert(Shutdown > NoShutdown);
2532 : :
2533 : : /* Waken archiver for the last time */
5938 2534 [ + + ]: 426 : if (PgArchPID != 0)
2535 : 12 : signal_child(PgArchPID, SIGUSR2);
2536 : :
2537 : : /*
2538 : : * Waken walsenders for the last time. No regular backends
2539 : : * should be around anymore.
2540 : : */
2505 andres@anarazel.de 2541 : 426 : SignalChildren(SIGUSR2);
2542 : :
5203 heikki.linnakangas@i 2543 : 426 : pmState = PM_SHUTDOWN_2;
2544 : : }
2545 : : else
2546 : : {
2547 : : /*
2548 : : * Any unexpected exit of the checkpointer (including FATAL
2549 : : * exit) is treated as a crash.
2550 : : */
6093 tgl@sss.pgh.pa.us 2551 : 307 : HandleChildCrash(pid, exitstatus,
4548 simon@2ndQuadrant.co 2552 : 307 : _("checkpointer process"));
2553 : : }
2554 : :
7260 tgl@sss.pgh.pa.us 2555 : 733 : continue;
2556 : : }
2557 : :
2558 : : /*
2559 : : * Was it the wal writer? Normal exit can be ignored; we'll start a
2560 : : * new one at the next iteration of the postmaster's main loop, if
2561 : : * necessary. Any other exit condition is treated as a crash.
2562 : : */
6093 2563 [ + + ]: 17634 : if (pid == WalWriterPID)
2564 : : {
6109 2565 : 639 : WalWriterPID = 0;
2566 [ + + ]: 639 : if (!EXIT_STATUS_0(exitstatus))
2567 : 260 : HandleChildCrash(pid, exitstatus,
6002 peter_e@gmx.net 2568 : 260 : _("WAL writer process"));
6109 tgl@sss.pgh.pa.us 2569 : 639 : continue;
2570 : : }
2571 : :
2572 : : /*
2573 : : * Was it the wal receiver? If exit status is zero (normal) or one
2574 : : * (FATAL exit), we assume everything is all right just like normal
2575 : : * backends. (If we need a new wal receiver, we'll start one at the
2576 : : * next iteration of the postmaster's main loop.)
2577 : : */
5203 heikki.linnakangas@i 2578 [ + + ]: 16995 : if (pid == WalReceiverPID)
2579 : : {
2580 : 319 : WalReceiverPID = 0;
2581 [ + - + - : 319 : if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
+ + ]
2582 : 14 : HandleChildCrash(pid, exitstatus,
2583 : 14 : _("WAL receiver process"));
2584 : 319 : continue;
2585 : : }
2586 : :
2587 : : /*
2588 : : * Was it the wal summarizer? Normal exit can be ignored; we'll start
2589 : : * a new one at the next iteration of the postmaster's main loop, if
2590 : : * necessary. Any other exit condition is treated as a crash.
2591 : : */
116 rhaas@postgresql.org 2592 [ + + ]:GNC 16676 : if (pid == WalSummarizerPID)
2593 : : {
2594 : 12 : WalSummarizerPID = 0;
2595 [ + - ]: 12 : if (!EXIT_STATUS_0(exitstatus))
2596 : 12 : HandleChildCrash(pid, exitstatus,
2597 : 12 : _("WAL summarizer process"));
2598 : 12 : continue;
2599 : : }
2600 : :
2601 : : /*
2602 : : * Was it the autovacuum launcher? Normal exit can be ignored; we'll
2603 : : * start a new one at the next iteration of the postmaster's main
2604 : : * loop, if necessary. Any other exit condition is treated as a
2605 : : * crash.
2606 : : */
6093 tgl@sss.pgh.pa.us 2607 [ + + ]:CBC 16664 : if (pid == AutoVacPID)
2608 : : {
6849 2609 : 1867 : AutoVacPID = 0;
6268 alvherre@alvh.no-ip. 2610 [ + + ]: 1867 : if (!EXIT_STATUS_0(exitstatus))
6849 tgl@sss.pgh.pa.us 2611 : 223 : HandleChildCrash(pid, exitstatus,
6268 alvherre@alvh.no-ip. 2612 : 223 : _("autovacuum launcher process"));
6849 tgl@sss.pgh.pa.us 2613 : 1867 : continue;
2614 : : }
2615 : :
2616 : : /*
2617 : : * Was it the archiver? If exit status is zero (normal) or one (FATAL
2618 : : * exit), we assume everything is all right just like normal backends
2619 : : * and just try to restart a new one so that we immediately retry
2620 : : * archiving remaining files. (If fail, we'll try again in future
2621 : : * cycles of the postmaster's main loop.) Unless we were waiting for
2622 : : * it to shut down; don't restart it in that case, and
2623 : : * PostmasterStateMachine() will advance to the next shutdown step.
2624 : : */
6093 2625 [ + + ]: 14797 : if (pid == PgArchPID)
2626 : : {
7209 2627 : 45 : PgArchPID = 0;
1126 fujii@postgresql.org 2628 [ + + + - : 45 : if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
+ - ]
2629 : 33 : HandleChildCrash(pid, exitstatus,
2630 : 33 : _("archiver process"));
3229 2631 [ + - - + : 45 : if (PgArchStartupAllowed())
+ - + - +
- - + + +
+ - - + -
- ]
41 heikki.linnakangas@i 2632 :UNC 0 : PgArchPID = StartChildProcess(B_ARCHIVER);
7209 tgl@sss.pgh.pa.us 2633 :CBC 45 : continue;
2634 : : }
2635 : :
2636 : : /* Was it the system logger? If so, try to start a new one */
6093 2637 [ - + ]: 14752 : if (pid == SysLoggerPID)
2638 : : {
7192 tgl@sss.pgh.pa.us 2639 :UBC 0 : SysLoggerPID = 0;
2640 : : /* for safety's sake, launch new logger *first* */
2641 : 0 : SysLoggerPID = SysLogger_Start();
6354 2642 [ # # ]: 0 : if (!EXIT_STATUS_0(exitstatus))
6991 bruce@momjian.us 2643 : 0 : LogChildExit(LOG, _("system logger process"),
2644 : : pid, exitstatus);
7192 tgl@sss.pgh.pa.us 2645 : 0 : continue;
2646 : : }
2647 : :
2648 : : /*
2649 : : * Was it the slot sync worker? Normal exit or FATAL exit can be
2650 : : * ignored (FATAL can be caused by libpqwalreceiver on receiving
2651 : : * shutdown request by the startup process during promotion); we'll
2652 : : * start a new one at the next iteration of the postmaster's main
2653 : : * loop, if necessary. Any other exit condition is treated as a crash.
2654 : : */
52 akapila@postgresql.o 2655 [ + + ]:GNC 14752 : if (pid == SlotSyncWorkerPID)
2656 : : {
2657 : 4 : SlotSyncWorkerPID = 0;
2658 [ + + + - : 4 : if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
- + ]
52 akapila@postgresql.o 2659 :UNC 0 : HandleChildCrash(pid, exitstatus,
2660 : 0 : _("slot sync worker process"));
52 akapila@postgresql.o 2661 :GNC 4 : continue;
2662 : : }
2663 : :
2664 : : /* Was it one of our background workers? */
4147 alvherre@alvh.no-ip. 2665 [ + + ]:CBC 14748 : if (CleanupBackgroundWorker(pid, exitstatus))
2666 : : {
2667 : : /* have it be restarted */
2668 : 2412 : HaveCrashedWorker = true;
2669 : 2412 : continue;
2670 : : }
2671 : :
2672 : : /*
2673 : : * Else do standard backend child cleanup.
2674 : : */
7193 tgl@sss.pgh.pa.us 2675 : 12336 : CleanupBackend(pid, exitstatus);
2676 : : } /* loop over pending child-death reports */
2677 : :
2678 : : /*
2679 : : * After cleaning out the SIGCHLD queue, see if we have any state changes
2680 : : * or actions to make.
2681 : : */
6093 2682 : 19485 : PostmasterStateMachine();
10141 scrappy@hub.org 2683 : 18757 : }
2684 : :
2685 : : /*
2686 : : * Scan the bgworkers list and see if the given PID (which has just stopped
2687 : : * or crashed) is in it. Handle its shutdown if so, and return true. If not a
2688 : : * bgworker, return false.
2689 : : *
2690 : : * This is heavily based on CleanupBackend. One important difference is that
2691 : : * we don't know yet that the dying process is a bgworker, so we must be silent
2692 : : * until we're sure it is.
2693 : : */
2694 : : static bool
4147 alvherre@alvh.no-ip. 2695 : 14748 : CleanupBackgroundWorker(int pid,
2696 : : int exitstatus) /* child's exit status */
2697 : : {
2698 : : char namebuf[MAXPGPATH];
2699 : : slist_mutable_iter iter;
2700 : :
2599 rhaas@postgresql.org 2701 [ + + + + : 29666 : slist_foreach_modify(iter, &BackgroundWorkerList)
+ + ]
2702 : : {
2703 : : RegisteredBgWorker *rw;
2704 : :
4147 alvherre@alvh.no-ip. 2705 : 17330 : rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
2706 : :
2707 [ + + ]: 17330 : if (rw->rw_pid != pid)
2708 : 14918 : continue;
2709 : :
2710 : : #ifdef WIN32
2711 : : /* see CleanupBackend */
2712 : : if (exitstatus == ERROR_WAIT_NO_CHILDREN)
2713 : : exitstatus = 0;
2714 : : #endif
2715 : :
2418 peter_e@gmx.net 2716 : 2412 : snprintf(namebuf, MAXPGPATH, _("background worker \"%s\""),
2717 : 2412 : rw->rw_worker.bgw_type);
2718 : :
2719 : :
4147 alvherre@alvh.no-ip. 2720 [ + + ]: 2412 : if (!EXIT_STATUS_0(exitstatus))
2721 : : {
2722 : : /* Record timestamp, so we know when to restart the worker. */
2723 : 883 : rw->rw_crashed_at = GetCurrentTimestamp();
2724 : : }
2725 : : else
2726 : : {
2727 : : /* Zero exit status means terminate */
2728 : 1529 : rw->rw_crashed_at = 0;
3630 rhaas@postgresql.org 2729 : 1529 : rw->rw_terminate = true;
2730 : : }
2731 : :
2732 : : /*
2733 : : * Additionally, just like a backend, any exit status other than 0 or
2734 : : * 1 is considered a crash and causes a system-wide restart.
2735 : : */
975 andres@anarazel.de 2736 [ + + + - : 2412 : if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
+ + ]
2737 : : {
2738 : 281 : HandleChildCrash(pid, exitstatus, namebuf);
2739 : 281 : return true;
2740 : : }
2741 : :
2742 : : /*
2743 : : * We must release the postmaster child slot. If the worker failed to
2744 : : * do so, it did not clean up after itself, requiring a crash-restart
2745 : : * cycle.
2746 : : */
2747 [ - + ]: 2131 : if (!ReleasePostmasterChildSlot(rw->rw_child_slot))
2748 : : {
3630 rhaas@postgresql.org 2749 :UBC 0 : HandleChildCrash(pid, exitstatus, namebuf);
2750 : 0 : return true;
2751 : : }
2752 : :
2753 : : /* Get it out of the BackendList and clear out remaining data */
3148 rhaas@postgresql.org 2754 :CBC 2131 : dlist_delete(&rw->rw_backend->elem);
2755 : : #ifdef EXEC_BACKEND
2756 : : ShmemBackendArrayRemove(rw->rw_backend);
2757 : : #endif
2758 : :
2759 : : /*
2760 : : * It's possible that this background worker started some OTHER
2761 : : * background worker and asked to be notified when that worker started
2762 : : * or stopped. If so, cancel any notifications destined for the
2763 : : * now-dead backend.
2764 : : */
2765 [ + + ]: 2131 : if (rw->rw_backend->bgworker_notify)
2766 : 145 : BackgroundWorkerStopNotifications(rw->rw_pid);
188 heikki.linnakangas@i 2767 :GNC 2131 : pfree(rw->rw_backend);
3148 rhaas@postgresql.org 2768 :CBC 2131 : rw->rw_backend = NULL;
4147 alvherre@alvh.no-ip. 2769 : 2131 : rw->rw_pid = 0;
2770 : 2131 : rw->rw_child_slot = 0;
2489 tgl@sss.pgh.pa.us 2771 : 2131 : ReportBackgroundWorkerExit(&iter); /* report child death */
2772 : :
3215 rhaas@postgresql.org 2773 [ + + ]: 2131 : LogChildExit(EXIT_STATUS_0(exitstatus) ? DEBUG1 : LOG,
2774 : : namebuf, pid, exitstatus);
2775 : :
4147 alvherre@alvh.no-ip. 2776 : 2131 : return true;
2777 : : }
2778 : :
2779 : 12336 : return false;
2780 : : }
2781 : :
2782 : : /*
2783 : : * CleanupBackend -- cleanup after terminated backend.
2784 : : *
2785 : : * Remove all local state associated with backend.
2786 : : *
2787 : : * If you change this, see also CleanupBackgroundWorker.
2788 : : */
2789 : : static void
7193 tgl@sss.pgh.pa.us 2790 : 12336 : CleanupBackend(int pid,
2791 : : int exitstatus) /* child's exit status. */
2792 : : {
2793 : : dlist_mutable_iter iter;
2794 : :
6991 bruce@momjian.us 2795 : 12336 : LogChildExit(DEBUG2, _("server process"), pid, exitstatus);
2796 : :
2797 : : /*
2798 : : * If a backend dies in an ugly way then we must signal all other backends
2799 : : * to quickdie. If exit status is zero (normal) or one (FATAL exit), we
2800 : : * assume everything is all right and proceed to remove the backend from
2801 : : * the active backend list.
2802 : : */
2803 : :
2804 : : #ifdef WIN32
2805 : :
2806 : : /*
2807 : : * On win32, also treat ERROR_WAIT_NO_CHILDREN (128) as nonfatal case,
2808 : : * since that sometimes happens under load when the process fails to start
2809 : : * properly (long before it starts using shared memory). Microsoft reports
2810 : : * it is related to mutex failure:
2811 : : * http://archives.postgresql.org/pgsql-hackers/2010-09/msg00790.php
2812 : : */
2813 : : if (exitstatus == ERROR_WAIT_NO_CHILDREN)
2814 : : {
2815 : : LogChildExit(LOG, _("server process"), pid, exitstatus);
2816 : : exitstatus = 0;
2817 : : }
2818 : : #endif
2819 : :
6354 tgl@sss.pgh.pa.us 2820 [ + + + + : 12336 : if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
+ + ]
2821 : : {
6991 bruce@momjian.us 2822 : 284 : HandleChildCrash(pid, exitstatus, _("server process"));
7260 tgl@sss.pgh.pa.us 2823 : 284 : return;
2824 : : }
2825 : :
4198 alvherre@alvh.no-ip. 2826 [ + - + - ]: 32220 : dlist_foreach_modify(iter, &BackendList)
2827 : : {
2828 : 32220 : Backend *bp = dlist_container(Backend, elem, iter.cur);
2829 : :
7260 tgl@sss.pgh.pa.us 2830 [ + + ]: 32220 : if (bp->pid == pid)
2831 : : {
6093 2832 [ + + ]: 12052 : if (!bp->dead_end)
2833 : : {
5458 2834 [ - + ]: 11929 : if (!ReleasePostmasterChildSlot(bp->child_slot))
2835 : : {
2836 : : /*
2837 : : * Uh-oh, the child failed to clean itself up. Treat as a
2838 : : * crash after all.
2839 : : */
5458 tgl@sss.pgh.pa.us 2840 :UBC 0 : HandleChildCrash(pid, exitstatus, _("server process"));
2841 : 0 : return;
2842 : : }
2843 : : #ifdef EXEC_BACKEND
2844 : : ShmemBackendArrayRemove(bp);
2845 : : #endif
2846 : : }
3882 rhaas@postgresql.org 2847 [ + + ]:CBC 12052 : if (bp->bgworker_notify)
2848 : : {
2849 : : /*
2850 : : * This backend may have been slated to receive SIGUSR1 when
2851 : : * some background worker started or stopped. Cancel those
2852 : : * notifications, as we don't want to signal PIDs that are not
2853 : : * PostgreSQL backends. This gets skipped in the (probably
2854 : : * very common) case where the backend has never requested any
2855 : : * such notifications.
2856 : : */
2857 : 67 : BackgroundWorkerStopNotifications(bp->pid);
2858 : : }
4196 tgl@sss.pgh.pa.us 2859 : 12052 : dlist_delete(iter.cur);
188 heikki.linnakangas@i 2860 :GNC 12052 : pfree(bp);
7260 tgl@sss.pgh.pa.us 2861 :CBC 12052 : break;
2862 : : }
2863 : : }
2864 : : }
2865 : :
2866 : : /*
2867 : : * HandleChildCrash -- cleanup after failed backend, bgwriter, checkpointer,
2868 : : * walwriter, autovacuum, archiver, slot sync worker, or background worker.
2869 : : *
2870 : : * The objectives here are to clean up our local state about the child
2871 : : * process, and to signal all other remaining children to quickdie.
2872 : : */
2873 : : static void
7193 2874 : 1768 : HandleChildCrash(int pid, int exitstatus, const char *procname)
2875 : : {
2876 : : dlist_mutable_iter iter;
2877 : : slist_iter siter;
2878 : : Backend *bp;
2879 : : bool take_action;
2880 : :
2881 : : /*
2882 : : * We only log messages and send signals if this is the first process
2883 : : * crash and we're not doing an immediate shutdown; otherwise, we're only
2884 : : * here to update postmaster's idea of live processes. If we have already
2885 : : * signaled children, nonzero exit status is to be expected, so don't
2886 : : * clutter log.
2887 : : */
3943 alvherre@alvh.no-ip. 2888 [ + + + + ]: 1768 : take_action = !FatalError && Shutdown != ImmediateShutdown;
2889 : :
2890 [ + + ]: 1768 : if (take_action)
2891 : : {
7193 tgl@sss.pgh.pa.us 2892 : 8 : LogChildExit(LOG, procname, pid, exitstatus);
7572 2893 [ + - ]: 8 : ereport(LOG,
2894 : : (errmsg("terminating any other active server processes")));
1207 2895 : 8 : SetQuitSignalReason(PMQUIT_FOR_CRASH);
2896 : : }
2897 : :
2898 : : /* Process background workers. */
4147 alvherre@alvh.no-ip. 2899 [ + + ]: 3643 : slist_foreach(siter, &BackgroundWorkerList)
2900 : : {
2901 : : RegisteredBgWorker *rw;
2902 : :
2903 : 1875 : rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
2904 [ + + ]: 1875 : if (rw->rw_pid == 0)
3973 bruce@momjian.us 2905 : 758 : continue; /* not running */
4147 alvherre@alvh.no-ip. 2906 [ + + ]: 1117 : if (rw->rw_pid == pid)
2907 : : {
2908 : : /*
2909 : : * Found entry for freshly-dead worker, so remove it.
2910 : : */
2911 : 281 : (void) ReleasePostmasterChildSlot(rw->rw_child_slot);
3148 rhaas@postgresql.org 2912 : 281 : dlist_delete(&rw->rw_backend->elem);
2913 : : #ifdef EXEC_BACKEND
2914 : : ShmemBackendArrayRemove(rw->rw_backend);
2915 : : #endif
188 heikki.linnakangas@i 2916 :GNC 281 : pfree(rw->rw_backend);
3148 rhaas@postgresql.org 2917 :CBC 281 : rw->rw_backend = NULL;
4147 alvherre@alvh.no-ip. 2918 : 281 : rw->rw_pid = 0;
2919 : 281 : rw->rw_child_slot = 0;
2920 : : /* don't reset crashed_at */
2921 : : /* don't report child stop, either */
2922 : : /* Keep looping so we can signal remaining workers */
2923 : : }
2924 : : else
2925 : : {
2926 : : /*
2927 : : * This worker is still alive. Unless we did so already, tell it
2928 : : * to commit hara-kiri.
2929 : : */
3943 2930 [ + + ]: 836 : if (take_action)
510 tgl@sss.pgh.pa.us 2931 : 5 : sigquit_child(rw->rw_pid);
2932 : : }
2933 : : }
2934 : :
2935 : : /* Process regular backends */
4198 alvherre@alvh.no-ip. 2936 [ + - + + ]: 3161 : dlist_foreach_modify(iter, &BackendList)
2937 : : {
2938 : 1393 : bp = dlist_container(Backend, elem, iter.cur);
2939 : :
7260 tgl@sss.pgh.pa.us 2940 [ + + ]: 1393 : if (bp->pid == pid)
2941 : : {
2942 : : /*
2943 : : * Found entry for freshly-dead backend, so remove it.
2944 : : */
6093 2945 [ + - ]: 284 : if (!bp->dead_end)
2946 : : {
5458 2947 : 284 : (void) ReleasePostmasterChildSlot(bp->child_slot);
2948 : : #ifdef EXEC_BACKEND
2949 : : ShmemBackendArrayRemove(bp);
2950 : : #endif
2951 : : }
4196 2952 : 284 : dlist_delete(iter.cur);
188 heikki.linnakangas@i 2953 :GNC 284 : pfree(bp);
2954 : : /* Keep looping so we can signal remaining backends */
2955 : : }
2956 : : else
2957 : : {
2958 : : /*
2959 : : * This backend is still alive. Unless we did so already, tell it
2960 : : * to commit hara-kiri.
2961 : : *
2962 : : * We could exclude dead_end children here, but at least when
2963 : : * sending SIGABRT it seems better to include them.
2964 : : *
2965 : : * Background workers were already processed above; ignore them
2966 : : * here.
2967 : : */
4147 alvherre@alvh.no-ip. 2968 [ + + ]:CBC 1109 : if (bp->bkend_type == BACKEND_TYPE_BGWORKER)
2969 : 836 : continue;
2970 : :
3943 2971 [ + + ]: 273 : if (take_action)
510 tgl@sss.pgh.pa.us 2972 : 6 : sigquit_child(bp->pid);
2973 : : }
2974 : : }
2975 : :
2976 : : /* Take care of the startup process too */
5534 heikki.linnakangas@i 2977 [ - + ]: 1768 : if (pid == StartupPID)
2978 : : {
5534 heikki.linnakangas@i 2979 :UBC 0 : StartupPID = 0;
2980 : : /* Caller adjusts StartupStatus, so don't touch it here */
2981 : : }
3943 alvherre@alvh.no-ip. 2982 [ + + - + ]:CBC 1768 : else if (StartupPID != 0 && take_action)
2983 : : {
510 tgl@sss.pgh.pa.us 2984 :UBC 0 : sigquit_child(StartupPID);
3202 2985 : 0 : StartupStatus = STARTUP_SIGNALED;
2986 : : }
2987 : :
2988 : : /* Take care of the bgwriter too */
7260 tgl@sss.pgh.pa.us 2989 [ - + ]:CBC 1768 : if (pid == BgWriterPID)
7452 JanWieck@Yahoo.com 2990 :UBC 0 : BgWriterPID = 0;
3943 alvherre@alvh.no-ip. 2991 [ + + + + ]:CBC 1768 : else if (BgWriterPID != 0 && take_action)
510 tgl@sss.pgh.pa.us 2992 : 8 : sigquit_child(BgWriterPID);
2993 : :
2994 : : /* Take care of the checkpointer too */
4548 simon@2ndQuadrant.co 2995 [ - + ]: 1768 : if (pid == CheckpointerPID)
4548 simon@2ndQuadrant.co 2996 :UBC 0 : CheckpointerPID = 0;
3943 alvherre@alvh.no-ip. 2997 [ + + + + ]:CBC 1768 : else if (CheckpointerPID != 0 && take_action)
510 tgl@sss.pgh.pa.us 2998 : 8 : sigquit_child(CheckpointerPID);
2999 : :
3000 : : /* Take care of the walwriter too */
6109 3001 [ - + ]: 1768 : if (pid == WalWriterPID)
6109 tgl@sss.pgh.pa.us 3002 :UBC 0 : WalWriterPID = 0;
3943 alvherre@alvh.no-ip. 3003 [ + + + + ]:CBC 1768 : else if (WalWriterPID != 0 && take_action)
510 tgl@sss.pgh.pa.us 3004 : 5 : sigquit_child(WalWriterPID);
3005 : :
3006 : : /* Take care of the walreceiver too */
5203 heikki.linnakangas@i 3007 [ - + ]: 1768 : if (pid == WalReceiverPID)
5203 heikki.linnakangas@i 3008 :UBC 0 : WalReceiverPID = 0;
3943 alvherre@alvh.no-ip. 3009 [ + + - + ]:CBC 1768 : else if (WalReceiverPID != 0 && take_action)
510 tgl@sss.pgh.pa.us 3010 :UBC 0 : sigquit_child(WalReceiverPID);
3011 : :
3012 : : /* Take care of the walsummarizer too */
116 rhaas@postgresql.org 3013 [ - + ]:GNC 1768 : if (pid == WalSummarizerPID)
116 rhaas@postgresql.org 3014 :UNC 0 : WalSummarizerPID = 0;
116 rhaas@postgresql.org 3015 [ + + - + ]:GNC 1768 : else if (WalSummarizerPID != 0 && take_action)
116 rhaas@postgresql.org 3016 :UNC 0 : sigquit_child(WalSummarizerPID);
3017 : :
3018 : : /* Take care of the autovacuum launcher too */
6849 tgl@sss.pgh.pa.us 3019 [ - + ]:CBC 1768 : if (pid == AutoVacPID)
6849 tgl@sss.pgh.pa.us 3020 :UBC 0 : AutoVacPID = 0;
3943 alvherre@alvh.no-ip. 3021 [ + + + + ]:CBC 1768 : else if (AutoVacPID != 0 && take_action)
510 tgl@sss.pgh.pa.us 3022 : 5 : sigquit_child(AutoVacPID);
3023 : :
3024 : : /* Take care of the archiver too */
1126 fujii@postgresql.org 3025 [ - + ]: 1768 : if (pid == PgArchPID)
1126 fujii@postgresql.org 3026 :UBC 0 : PgArchPID = 0;
1126 fujii@postgresql.org 3027 [ + + - + ]:CBC 1768 : else if (PgArchPID != 0 && take_action)
510 tgl@sss.pgh.pa.us 3028 :UBC 0 : sigquit_child(PgArchPID);
3029 : :
3030 : : /* Take care of the slot sync worker too */
52 akapila@postgresql.o 3031 [ - + ]:GNC 1768 : if (pid == SlotSyncWorkerPID)
52 akapila@postgresql.o 3032 :UNC 0 : SlotSyncWorkerPID = 0;
52 akapila@postgresql.o 3033 [ - + - - ]:GNC 1768 : else if (SlotSyncWorkerPID != 0 && take_action)
52 akapila@postgresql.o 3034 :UNC 0 : sigquit_child(SlotSyncWorkerPID);
3035 : :
3036 : : /* We do NOT restart the syslogger */
3037 : :
3943 alvherre@alvh.no-ip. 3038 [ + + ]:CBC 1768 : if (Shutdown != ImmediateShutdown)
3039 : 45 : FatalError = true;
3040 : :
3041 : : /* We now transit into a state of waiting for children to die */
5534 heikki.linnakangas@i 3042 [ + + ]: 1768 : if (pmState == PM_RECOVERY ||
5083 rhaas@postgresql.org 3043 [ + + ]: 1766 : pmState == PM_HOT_STANDBY ||
5534 heikki.linnakangas@i 3044 [ + + ]: 1765 : pmState == PM_RUN ||
1339 tgl@sss.pgh.pa.us 3045 [ + - ]: 1760 : pmState == PM_STOP_BACKENDS ||
5832 3046 [ - + ]: 1760 : pmState == PM_SHUTDOWN)
6093 3047 : 8 : pmState = PM_WAIT_BACKENDS;
3048 : :
3049 : : /*
3050 : : * .. and if this doesn't happen quickly enough, now the clock is ticking
3051 : : * for us to kill them without mercy.
3052 : : */
3943 alvherre@alvh.no-ip. 3053 [ + + ]: 1768 : if (AbortStartTime == 0)
3054 : 8 : AbortStartTime = time(NULL);
10141 scrappy@hub.org 3055 : 1768 : }
3056 : :
3057 : : /*
3058 : : * Log the death of a child process.
3059 : : */
3060 : : static void
8077 tgl@sss.pgh.pa.us 3061 : 14475 : LogChildExit(int lev, const char *procname, int pid, int exitstatus)
3062 : : {
3063 : : /*
3064 : : * size of activity_buffer is arbitrary, but set equal to default
3065 : : * track_activity_query_size
3066 : : */
3067 : : char activity_buffer[1024];
4559 3068 : 14475 : const char *activity = NULL;
3069 : :
3070 [ + + ]: 14475 : if (!EXIT_STATUS_0(exitstatus))
3071 : 1248 : activity = pgstat_get_crashed_backend_activity(pid,
3072 : : activity_buffer,
3073 : : sizeof(activity_buffer));
3074 : :
8195 3075 [ + + ]: 14475 : if (WIFEXITED(exitstatus))
7572 3076 [ + + + + ]: 14467 : ereport(lev,
3077 : :
3078 : : /*------
3079 : : translator: %s is a noun phrase describing a child process, such as
3080 : : "server process" */
3081 : : (errmsg("%s (PID %d) exited with exit code %d",
3082 : : procname, pid, WEXITSTATUS(exitstatus)),
3083 : : activity ? errdetail("Failed process was running: %s", activity) : 0));
8195 3084 [ + - ]: 8 : else if (WIFSIGNALED(exitstatus))
3085 : : {
3086 : : #if defined(WIN32)
3087 : : ereport(lev,
3088 : :
3089 : : /*------
3090 : : translator: %s is a noun phrase describing a child process, such as
3091 : : "server process" */
3092 : : (errmsg("%s (PID %d) was terminated by exception 0x%X",
3093 : : procname, pid, WTERMSIG(exitstatus)),
3094 : : errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."),
3095 : : activity ? errdetail("Failed process was running: %s", activity) : 0));
3096 : : #else
2489 3097 [ + + + - ]: 8 : ereport(lev,
3098 : :
3099 : : /*------
3100 : : translator: %s is a noun phrase describing a child process, such as
3101 : : "server process" */
3102 : : (errmsg("%s (PID %d) was terminated by signal %d: %s",
3103 : : procname, pid, WTERMSIG(exitstatus),
3104 : : pg_strsignal(WTERMSIG(exitstatus))),
3105 : : activity ? errdetail("Failed process was running: %s", activity) : 0));
3106 : : #endif
3107 : : }
3108 : : else
7572 tgl@sss.pgh.pa.us 3109 [ # # # # ]:UBC 0 : ereport(lev,
3110 : :
3111 : : /*------
3112 : : translator: %s is a noun phrase describing a child process, such as
3113 : : "server process" */
3114 : : (errmsg("%s (PID %d) exited with unrecognized status %d",
3115 : : procname, pid, exitstatus),
3116 : : activity ? errdetail("Failed process was running: %s", activity) : 0));
8195 tgl@sss.pgh.pa.us 3117 :CBC 14475 : }
3118 : :
3119 : : /*
3120 : : * Advance the postmaster's state machine and take actions as appropriate
3121 : : *
3122 : : * This is common code for process_pm_shutdown_request(),
3123 : : * process_pm_child_exit() and process_pm_pmsignal(), which process the signals
3124 : : * that might mean we need to change state.
3125 : : */
3126 : : static void
6093 3127 : 21246 : PostmasterStateMachine(void)
3128 : : {
3129 : : /* If we're doing a smart shutdown, try to advance that state. */
1339 3130 [ + + + + ]: 21246 : if (pmState == PM_RUN || pmState == PM_HOT_STANDBY)
3131 : : {
739 sfrost@snowman.net 3132 [ + + ]: 16378 : if (!connsAllowed)
3133 : : {
3134 : : /*
3135 : : * This state ends when we have no normal client backends running.
3136 : : * Then we're ready to stop other children.
3137 : : */
1339 tgl@sss.pgh.pa.us 3138 [ + + ]: 37 : if (CountChildren(BACKEND_TYPE_NORMAL) == 0)
3139 : 17 : pmState = PM_STOP_BACKENDS;
3140 : : }
3141 : : }
3142 : :
3143 : : /*
3144 : : * If we're ready to do so, signal child processes to shut down. (This
3145 : : * isn't a persistent state, but treating it as a distinct pmState allows
3146 : : * us to share this code across multiple shutdown code paths.)
3147 : : */
3148 [ + + ]: 21246 : if (pmState == PM_STOP_BACKENDS)
3149 : : {
3150 : : /*
3151 : : * Forget any pending requests for background workers, since we're no
3152 : : * longer willing to launch any new workers. (If additional requests
3153 : : * arrive, BackgroundWorkerStateChange will reject them.)
3154 : : */
1207 3155 : 426 : ForgetUnstartedBackgroundWorkers();
3156 : :
3157 : : /* Signal all backend children except walsenders */
1339 3158 : 426 : SignalSomeChildren(SIGTERM,
3159 : : BACKEND_TYPE_ALL - BACKEND_TYPE_WALSND);
3160 : : /* and the autovac launcher too */
3161 [ + + ]: 426 : if (AutoVacPID != 0)
3162 : 335 : signal_child(AutoVacPID, SIGTERM);
3163 : : /* and the bgwriter too */
3164 [ + - ]: 426 : if (BgWriterPID != 0)
3165 : 426 : signal_child(BgWriterPID, SIGTERM);
3166 : : /* and the walwriter too */
3167 [ + + ]: 426 : if (WalWriterPID != 0)
3168 : 379 : signal_child(WalWriterPID, SIGTERM);
3169 : : /* If we're in recovery, also stop startup and walreceiver procs */
3170 [ + + ]: 426 : if (StartupPID != 0)
3171 : 47 : signal_child(StartupPID, SIGTERM);
3172 [ + + ]: 426 : if (WalReceiverPID != 0)
3173 : 40 : signal_child(WalReceiverPID, SIGTERM);
116 rhaas@postgresql.org 3174 [ - + ]:GNC 426 : if (WalSummarizerPID != 0)
116 rhaas@postgresql.org 3175 :UNC 0 : signal_child(WalSummarizerPID, SIGTERM);
52 akapila@postgresql.o 3176 [ + + ]:GNC 426 : if (SlotSyncWorkerPID != 0)
3177 : 2 : signal_child(SlotSyncWorkerPID, SIGTERM);
3178 : : /* checkpointer, archiver, stats, and syslogger may continue for now */
3179 : :
3180 : : /* Now transition to PM_WAIT_BACKENDS state to wait for them to die */
1339 tgl@sss.pgh.pa.us 3181 :CBC 426 : pmState = PM_WAIT_BACKENDS;
3182 : : }
3183 : :
3184 : : /*
3185 : : * If we are in a state-machine state that implies waiting for backends to
3186 : : * exit, see if they're all gone, and change state if so.
3187 : : */
6093 3188 [ + + ]: 21246 : if (pmState == PM_WAIT_BACKENDS)
3189 : : {
3190 : : /*
3191 : : * PM_WAIT_BACKENDS state ends when we have no regular backends
3192 : : * (including autovac workers), no bgworkers (including unconnected
3193 : : * ones), and no walwriter, autovac launcher, bgwriter or slot sync
3194 : : * worker. If we are doing crash recovery or an immediate shutdown
3195 : : * then we expect the checkpointer to exit as well, otherwise not. The
3196 : : * stats and syslogger processes are disregarded since they are not
3197 : : * connected to shared memory; we also disregard dead_end children
3198 : : * here. Walsenders and archiver are also disregarded, they will be
3199 : : * terminated later after writing the checkpoint record.
3200 : : */
1339 3201 [ + + ]: 4337 : if (CountChildren(BACKEND_TYPE_ALL - BACKEND_TYPE_WALSND) == 0 &&
6093 3202 [ + + ]: 2093 : StartupPID == 0 &&
5203 heikki.linnakangas@i 3203 [ + + ]: 1897 : WalReceiverPID == 0 &&
116 rhaas@postgresql.org 3204 [ + + ]:GNC 1858 : WalSummarizerPID == 0 &&
4548 simon@2ndQuadrant.co 3205 [ + + ]:CBC 1849 : BgWriterPID == 0 &&
3902 fujii@postgresql.org 3206 [ + + ]: 1282 : (CheckpointerPID == 0 ||
3207 [ + + + + ]: 834 : (!FatalError && Shutdown < ImmediateShutdown)) &&
6093 tgl@sss.pgh.pa.us 3208 [ + + ]: 1108 : WalWriterPID == 0 &&
52 akapila@postgresql.o 3209 [ + + ]:GNC 876 : AutoVacPID == 0 &&
3210 [ + - ]: 733 : SlotSyncWorkerPID == 0)
3211 : : {
3943 alvherre@alvh.no-ip. 3212 [ + + + + ]:CBC 733 : if (Shutdown >= ImmediateShutdown || FatalError)
3213 : : {
3214 : : /*
3215 : : * Start waiting for dead_end children to die. This state
3216 : : * change causes ServerLoop to stop creating new ones.
3217 : : */
6093 tgl@sss.pgh.pa.us 3218 : 307 : pmState = PM_WAIT_DEAD_END;
3219 : :
3220 : : /*
3221 : : * We already SIGQUIT'd the archiver and stats processes, if
3222 : : * any, when we started immediate shutdown or entered
3223 : : * FatalError state.
3224 : : */
3225 : : }
3226 : : else
3227 : : {
3228 : : /*
3229 : : * If we get here, we are proceeding with normal shutdown. All
3230 : : * the regular children are gone, and it's time to tell the
3231 : : * checkpointer to do a shutdown checkpoint.
3232 : : */
3233 [ - + ]: 426 : Assert(Shutdown > NoShutdown);
3234 : : /* Start the checkpointer if not running */
4548 simon@2ndQuadrant.co 3235 [ - + ]: 426 : if (CheckpointerPID == 0)
41 heikki.linnakangas@i 3236 :UNC 0 : CheckpointerPID = StartChildProcess(B_CHECKPOINTER);
3237 : : /* And tell it to shut down */
4548 simon@2ndQuadrant.co 3238 [ + - ]:CBC 426 : if (CheckpointerPID != 0)
3239 : : {
3240 : 426 : signal_child(CheckpointerPID, SIGUSR2);
6093 tgl@sss.pgh.pa.us 3241 : 426 : pmState = PM_SHUTDOWN;
3242 : : }
3243 : : else
3244 : : {
3245 : : /*
3246 : : * If we failed to fork a checkpointer, just shut down.
3247 : : * Any required cleanup will happen at next restart. We
3248 : : * set FatalError so that an "abnormal shutdown" message
3249 : : * gets logged when we exit.
3250 : : *
3251 : : * We don't consult send_abort_for_crash here, as it's
3252 : : * unlikely that dumping cores would illuminate the reason
3253 : : * for checkpointer fork failure.
3254 : : */
6093 tgl@sss.pgh.pa.us 3255 :UBC 0 : FatalError = true;
3256 : 0 : pmState = PM_WAIT_DEAD_END;
3257 : :
3258 : : /* Kill the walsenders and archiver too */
4868 alvherre@alvh.no-ip. 3259 : 0 : SignalChildren(SIGQUIT);
5938 tgl@sss.pgh.pa.us 3260 [ # # ]: 0 : if (PgArchPID != 0)
3261 : 0 : signal_child(PgArchPID, SIGQUIT);
3262 : : }
3263 : : }
3264 : : }
3265 : : }
3266 : :
5203 heikki.linnakangas@i 3267 [ + + ]:CBC 21246 : if (pmState == PM_SHUTDOWN_2)
3268 : : {
3269 : : /*
3270 : : * PM_SHUTDOWN_2 state ends when there's no other children than
3271 : : * dead_end children left. There shouldn't be any regular backends
3272 : : * left by now anyway; what we're really waiting for is walsenders and
3273 : : * archiver.
3274 : : */
1817 tgl@sss.pgh.pa.us 3275 [ + + + + ]: 461 : if (PgArchPID == 0 && CountChildren(BACKEND_TYPE_ALL) == 0)
3276 : : {
5203 heikki.linnakangas@i 3277 : 426 : pmState = PM_WAIT_DEAD_END;
3278 : : }
3279 : : }
3280 : :
6093 tgl@sss.pgh.pa.us 3281 [ + + ]: 21246 : if (pmState == PM_WAIT_DEAD_END)
3282 : : {
3283 : : /* Don't allow any new socket connection events. */
458 tmunro@postgresql.or 3284 : 753 : ConfigurePostmasterWaitSet(false);
3285 : :
3286 : : /*
3287 : : * PM_WAIT_DEAD_END state ends when the BackendList is entirely empty
3288 : : * (ie, no dead_end children remain), and the archiver is gone too.
3289 : : *
3290 : : * The reason we wait for those two is to protect them against a new
3291 : : * postmaster starting conflicting subprocesses; this isn't an
3292 : : * ironclad protection, but it at least helps in the
3293 : : * shutdown-and-immediately-restart scenario. Note that they have
3294 : : * already been sent appropriate shutdown signals, either during a
3295 : : * normal state transition leading up to PM_WAIT_DEAD_END, or during
3296 : : * FatalError processing.
3297 : : */
739 andres@anarazel.de 3298 [ + + + + ]: 753 : if (dlist_is_empty(&BackendList) && PgArchPID == 0)
3299 : : {
3300 : : /* These other guys should be dead already */
6093 tgl@sss.pgh.pa.us 3301 [ - + ]: 733 : Assert(StartupPID == 0);
5203 heikki.linnakangas@i 3302 [ - + ]: 733 : Assert(WalReceiverPID == 0);
116 rhaas@postgresql.org 3303 [ - + ]:GNC 733 : Assert(WalSummarizerPID == 0);
6093 tgl@sss.pgh.pa.us 3304 [ - + ]:CBC 733 : Assert(BgWriterPID == 0);
4548 simon@2ndQuadrant.co 3305 [ - + ]: 733 : Assert(CheckpointerPID == 0);
6093 tgl@sss.pgh.pa.us 3306 [ - + ]: 733 : Assert(WalWriterPID == 0);
3307 [ - + ]: 733 : Assert(AutoVacPID == 0);
52 akapila@postgresql.o 3308 [ - + ]:GNC 733 : Assert(SlotSyncWorkerPID == 0);
3309 : : /* syslogger is not considered here */
6093 tgl@sss.pgh.pa.us 3310 :CBC 733 : pmState = PM_NO_CHILDREN;
3311 : : }
3312 : : }
3313 : :
3314 : : /*
3315 : : * If we've been told to shut down, we exit as soon as there are no
3316 : : * remaining children. If there was a crash, cleanup will occur at the
3317 : : * next startup. (Before PostgreSQL 8.3, we tried to recover from the
3318 : : * crash before exiting, but that seems unwise if we are quitting because
3319 : : * we got SIGTERM from init --- there may well not be time for recovery
3320 : : * before init decides to SIGKILL us.)
3321 : : *
3322 : : * Note that the syslogger continues to run. It will exit when it sees
3323 : : * EOF on its input pipe, which happens when there are no more upstream
3324 : : * processes.
3325 : : */
3326 [ + + + + ]: 21246 : if (Shutdown > NoShutdown && pmState == PM_NO_CHILDREN)
3327 : : {
3328 [ - + ]: 725 : if (FatalError)
3329 : : {
6093 tgl@sss.pgh.pa.us 3330 [ # # ]:UBC 0 : ereport(LOG, (errmsg("abnormal database system shutdown")));
3331 : 0 : ExitPostmaster(1);
3332 : : }
3333 : : else
3334 : : {
3335 : : /*
3336 : : * Normal exit from the postmaster is here. We don't need to log
3337 : : * anything here, since the UnlinkLockFiles proc_exit callback
3338 : : * will do so, and that should be the last user-visible action.
3339 : : */
6093 tgl@sss.pgh.pa.us 3340 :CBC 725 : ExitPostmaster(0);
3341 : : }
3342 : : }
3343 : :
3344 : : /*
3345 : : * If the startup process failed, or the user does not want an automatic
3346 : : * restart after backend crashes, wait for all non-syslogger children to
3347 : : * exit, and then exit postmaster. We don't try to reinitialize when the
3348 : : * startup process fails, because more than likely it will just fail again
3349 : : * and we will keep trying forever.
3350 : : */
1057 3351 [ + + ]: 20521 : if (pmState == PM_NO_CHILDREN)
3352 : : {
3353 [ + + ]: 8 : if (StartupStatus == STARTUP_CRASHED)
3354 : : {
3355 [ + - ]: 3 : ereport(LOG,
3356 : : (errmsg("shutting down due to startup process failure")));
3357 : 3 : ExitPostmaster(1);
3358 : : }
3359 [ - + ]: 5 : if (!restart_after_crash)
3360 : : {
1057 tgl@sss.pgh.pa.us 3361 [ # # ]:UBC 0 : ereport(LOG,
3362 : : (errmsg("shutting down because restart_after_crash is off")));
3363 : 0 : ExitPostmaster(1);
3364 : : }
3365 : : }
3366 : :
3367 : : /*
3368 : : * If we need to recover from a crash, wait for all non-syslogger children
3369 : : * to exit, then reset shmem and start the startup process.
3370 : : */
6093 tgl@sss.pgh.pa.us 3371 [ + + + + ]:CBC 20518 : if (FatalError && pmState == PM_NO_CHILDREN)
3372 : : {
3373 [ + - ]: 5 : ereport(LOG,
3374 : : (errmsg("all server processes terminated; reinitializing")));
3375 : :
3376 : : /* remove leftover temporary files after a crash */
1123 tomas.vondra@postgre 3377 [ + + ]: 5 : if (remove_temp_files_after_crash)
3378 : 4 : RemovePgTempFiles();
3379 : :
3380 : : /* allow background workers to immediately restart */
3630 rhaas@postgresql.org 3381 : 5 : ResetBackgroundWorkerCrashTimes();
3382 : :
5579 tgl@sss.pgh.pa.us 3383 : 5 : shmem_exit(1);
3384 : :
3385 : : /* re-read control file into local memory */
2401 andres@anarazel.de 3386 : 5 : LocalProcessControlFile(true);
3387 : :
3388 : : /* re-create shared memory and semaphores */
638 tgl@sss.pgh.pa.us 3389 : 5 : CreateSharedMemoryAndSemaphores();
3390 : :
41 heikki.linnakangas@i 3391 :GNC 5 : StartupPID = StartChildProcess(B_STARTUP);
6093 tgl@sss.pgh.pa.us 3392 [ - + ]:CBC 5 : Assert(StartupPID != 0);
3202 3393 : 5 : StartupStatus = STARTUP_RUNNING;
6093 3394 : 5 : pmState = PM_STARTUP;
3395 : : /* crash recovery started, reset SIGKILL flag */
3844 alvherre@alvh.no-ip. 3396 : 5 : AbortStartTime = 0;
3397 : :
3398 : : /* start accepting server socket connection events again */
458 tmunro@postgresql.or 3399 : 5 : ConfigurePostmasterWaitSet(true);
3400 : : }
6093 tgl@sss.pgh.pa.us 3401 : 20518 : }
3402 : :
3403 : :
3404 : : /*
3405 : : * Send a signal to a postmaster child process
3406 : : *
3407 : : * On systems that have setsid(), each child process sets itself up as a
3408 : : * process group leader. For signals that are generally interpreted in the
3409 : : * appropriate fashion, we signal the entire process group not just the
3410 : : * direct child process. This allows us to, for example, SIGQUIT a blocked
3411 : : * archive_recovery script, or SIGINT a script being run by a backend via
3412 : : * system().
3413 : : *
3414 : : * There is a race condition for recently-forked children: they might not
3415 : : * have executed setsid() yet. So we signal the child directly as well as
3416 : : * the group. We assume such a child will handle the signal before trying
3417 : : * to spawn any grandchild processes. We also assume that signaling the
3418 : : * child twice will not cause any problems.
3419 : : */
3420 : : static void
6354 3421 : 5170 : signal_child(pid_t pid, int signal)
3422 : : {
3423 [ - + ]: 5170 : if (kill(pid, signal) < 0)
6354 tgl@sss.pgh.pa.us 3424 [ # # ]:UBC 0 : elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) pid, signal);
3425 : : #ifdef HAVE_SETSID
6354 tgl@sss.pgh.pa.us 3426 [ + + ]:CBC 5170 : switch (signal)
3427 : : {
3428 : 3805 : case SIGINT:
3429 : : case SIGTERM:
3430 : : case SIGQUIT:
3431 : : case SIGKILL:
3432 : : case SIGABRT:
3433 [ + + ]: 3805 : if (kill(-pid, signal) < 0)
3434 [ - + ]: 1 : elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) (-pid), signal);
3435 : 3805 : break;
3436 : 1365 : default:
3437 : 1365 : break;
3438 : : }
3439 : : #endif
3440 : 5170 : }
3441 : :
3442 : : /*
3443 : : * Convenience function for killing a child process after a crash of some
3444 : : * other child process. We log the action at a higher level than we would
3445 : : * otherwise do, and we apply send_abort_for_crash to decide which signal
3446 : : * to send. Normally it's SIGQUIT -- and most other comments in this file
3447 : : * are written on the assumption that it is -- but developers might prefer
3448 : : * to use SIGABRT to collect per-child core dumps.
3449 : : */
3450 : : static void
510 3451 : 37 : sigquit_child(pid_t pid)
3452 : : {
3453 [ - + - - ]: 37 : ereport(DEBUG2,
3454 : : (errmsg_internal("sending %s to process %d",
3455 : : (send_abort_for_crash ? "SIGABRT" : "SIGQUIT"),
3456 : : (int) pid)));
3457 [ - + ]: 37 : signal_child(pid, (send_abort_for_crash ? SIGABRT : SIGQUIT));
3458 : 37 : }
3459 : :
3460 : : /*
3461 : : * Send a signal to the targeted children (but NOT special children;
3462 : : * dead_end children are never signaled, either).
3463 : : */
3464 : : static bool
5203 heikki.linnakangas@i 3465 : 1278 : SignalSomeChildren(int signal, int target)
3466 : : {
3467 : : dlist_iter iter;
3468 : 1278 : bool signaled = false;
3469 : :
4198 alvherre@alvh.no-ip. 3470 [ + - + + ]: 3054 : dlist_foreach(iter, &BackendList)
3471 : : {
3472 : 1776 : Backend *bp = dlist_container(Backend, elem, iter.cur);
3473 : :
6093 tgl@sss.pgh.pa.us 3474 [ + + ]: 1776 : if (bp->dead_end)
3475 : 3 : continue;
3476 : :
3477 : : /*
3478 : : * Since target == BACKEND_TYPE_ALL is the most common case, we test
3479 : : * it first and avoid touching shared memory for every child.
3480 : : */
4832 rhaas@postgresql.org 3481 [ + + ]: 1773 : if (target != BACKEND_TYPE_ALL)
3482 : : {
3483 : : /*
3484 : : * Assign bkend_type for any recently announced WAL Sender
3485 : : * processes.
3486 : : */
4147 alvherre@alvh.no-ip. 3487 [ + + + + ]: 1262 : if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
3488 : 435 : IsPostmasterChildWalSender(bp->child_slot))
3489 : 33 : bp->bkend_type = BACKEND_TYPE_WALSND;
3490 : :
3491 [ + + ]: 827 : if (!(target & bp->bkend_type))
4832 rhaas@postgresql.org 3492 : 33 : continue;
3493 : : }
3494 : :
7260 tgl@sss.pgh.pa.us 3495 [ - + ]: 1740 : ereport(DEBUG4,
3496 : : (errmsg_internal("sending signal %d to process %d",
3497 : : signal, (int) bp->pid)));
6354 3498 : 1740 : signal_child(bp->pid, signal);
5203 heikki.linnakangas@i 3499 : 1740 : signaled = true;
3500 : : }
3501 : 1278 : return signaled;
3502 : : }
3503 : :
3504 : : /*
3505 : : * Send a termination signal to children. This considers all of our children
3506 : : * processes, except syslogger and dead_end backends.
3507 : : */
3508 : : static void
3943 alvherre@alvh.no-ip. 3509 : 299 : TerminateChildren(int signal)
3510 : : {
3511 : 299 : SignalChildren(signal);
3512 [ + + ]: 299 : if (StartupPID != 0)
3513 : : {
3514 : 44 : signal_child(StartupPID, signal);
510 tgl@sss.pgh.pa.us 3515 [ - + - - : 44 : if (signal == SIGQUIT || signal == SIGKILL || signal == SIGABRT)
- - ]
3202 3516 : 44 : StartupStatus = STARTUP_SIGNALED;
3517 : : }
3943 alvherre@alvh.no-ip. 3518 [ + - ]: 299 : if (BgWriterPID != 0)
3519 : 299 : signal_child(BgWriterPID, signal);
3520 [ + - ]: 299 : if (CheckpointerPID != 0)
3521 : 299 : signal_child(CheckpointerPID, signal);
3522 [ + + ]: 299 : if (WalWriterPID != 0)
3523 : 255 : signal_child(WalWriterPID, signal);
3524 [ + + ]: 299 : if (WalReceiverPID != 0)
3525 : 15 : signal_child(WalReceiverPID, signal);
116 rhaas@postgresql.org 3526 [ + + ]:GNC 299 : if (WalSummarizerPID != 0)
3527 : 12 : signal_child(WalSummarizerPID, signal);
3943 alvherre@alvh.no-ip. 3528 [ + + ]:CBC 299 : if (AutoVacPID != 0)
3529 : 218 : signal_child(AutoVacPID, signal);
3530 [ + + ]: 299 : if (PgArchPID != 0)
3531 : 33 : signal_child(PgArchPID, signal);
52 akapila@postgresql.o 3532 [ - + ]:GNC 299 : if (SlotSyncWorkerPID != 0)
52 akapila@postgresql.o 3533 :UNC 0 : signal_child(SlotSyncWorkerPID, signal);
3943 alvherre@alvh.no-ip. 3534 :CBC 299 : }
3535 : :
3536 : : /*
3537 : : * BackendStartup -- start backend process
3538 : : *
3539 : : * returns: STATUS_ERROR if the fork failed, STATUS_OK otherwise.
3540 : : *
3541 : : * Note: if you change this code, also consider StartAutovacuumWorker.
3542 : : */
3543 : : static int
33 heikki.linnakangas@i 3544 :GNC 11586 : BackendStartup(ClientSocket *client_sock)
3545 : : {
3546 : : Backend *bn; /* for backend cleanup */
3547 : : pid_t pid;
3548 : : BackendStartupData startup_data;
3549 : :
3550 : : /*
3551 : : * Create backend data structure. Better before the fork() so we can
3552 : : * handle failure cleanly.
3553 : : */
188 3554 : 11586 : bn = (Backend *) palloc_extended(sizeof(Backend), MCXT_ALLOC_NO_OOM);
8211 tgl@sss.pgh.pa.us 3555 [ - + ]:CBC 11586 : if (!bn)
3556 : : {
7572 tgl@sss.pgh.pa.us 3557 [ # # ]:UBC 0 : ereport(LOG,
3558 : : (errcode(ERRCODE_OUT_OF_MEMORY),
3559 : : errmsg("out of memory")));
8211 3560 : 0 : return STATUS_ERROR;
3561 : : }
3562 : :
3563 : : /*
3564 : : * Compute the cancel key that will be assigned to this backend. The
3565 : : * backend will have its own copy in the forked-off process' value of
3566 : : * MyCancelKey, so that it can transmit the key to the frontend.
3567 : : */
2687 heikki.linnakangas@i 3568 [ - + ]:CBC 11586 : if (!RandomCancelKey(&MyCancelKey))
3569 : : {
188 heikki.linnakangas@i 3570 :UNC 0 : pfree(bn);
2687 heikki.linnakangas@i 3571 [ # # ]:UBC 0 : ereport(LOG,
3572 : : (errcode(ERRCODE_INTERNAL_ERROR),
3573 : : errmsg("could not generate random cancel key")));
3574 : 0 : return STATUS_ERROR;
3575 : : }
3576 : :
3577 : : /* Pass down canAcceptConnections state */
27 heikki.linnakangas@i 3578 :GNC 11586 : startup_data.canAcceptConnections = canAcceptConnections(BACKEND_TYPE_NORMAL);
3579 : 11586 : bn->dead_end = (startup_data.canAcceptConnections != CAC_OK);
3580 : 11586 : bn->cancel_key = MyCancelKey;
3581 : :
3582 : : /*
3583 : : * Unless it's a dead_end child, assign it a child slot number
3584 : : */
5458 tgl@sss.pgh.pa.us 3585 [ + + ]:CBC 11586 : if (!bn->dead_end)
3586 : 11463 : bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot();
3587 : : else
3588 : 123 : bn->child_slot = 0;
3589 : :
3590 : : /* Hasn't asked to be notified about any bgworkers yet */
3882 rhaas@postgresql.org 3591 : 11586 : bn->bgworker_notify = false;
3592 : :
27 heikki.linnakangas@i 3593 :GNC 11586 : pid = postmaster_child_launch(B_BACKEND,
3594 : : (char *) &startup_data, sizeof(startup_data),
3595 : : client_sock);
9716 bruce@momjian.us 3596 [ - + ]:CBC 11586 : if (pid < 0)
3597 : : {
3598 : : /* in parent, fork failed */
8134 tgl@sss.pgh.pa.us 3599 :UBC 0 : int save_errno = errno;
3600 : :
5347 3601 [ # # ]: 0 : if (!bn->dead_end)
3602 : 0 : (void) ReleasePostmasterChildSlot(bn->child_slot);
188 heikki.linnakangas@i 3603 :UNC 0 : pfree(bn);
7572 tgl@sss.pgh.pa.us 3604 :UBC 0 : errno = save_errno;
3605 [ # # ]: 0 : ereport(LOG,
3606 : : (errmsg("could not fork new process for connection: %m")));
33 heikki.linnakangas@i 3607 :UNC 0 : report_fork_failure_to_client(client_sock, save_errno);
9357 bruce@momjian.us 3608 :UBC 0 : return STATUS_ERROR;
3609 : : }
3610 : :
3611 : : /* in parent, successful fork */
7551 tgl@sss.pgh.pa.us 3612 [ + + ]:CBC 11586 : ereport(DEBUG2,
3613 : : (errmsg_internal("forked new backend, pid=%d socket=%d",
3614 : : (int) pid, (int) client_sock->sock)));
3615 : :
3616 : : /*
3617 : : * Everything's been successful, it's safe to add this backend to our list
3618 : : * of backends.
3619 : : */
9716 bruce@momjian.us 3620 : 11586 : bn->pid = pid;
2489 tgl@sss.pgh.pa.us 3621 : 11586 : bn->bkend_type = BACKEND_TYPE_NORMAL; /* Can change later to WALSND */
4198 alvherre@alvh.no-ip. 3622 : 11586 : dlist_push_head(&BackendList, &bn->elem);
3623 : :
3624 : : #ifdef EXEC_BACKEND
3625 : : if (!bn->dead_end)
3626 : : ShmemBackendArrayAdd(bn);
3627 : : #endif
3628 : :
9357 bruce@momjian.us 3629 : 11586 : return STATUS_OK;
3630 : : }
3631 : :
3632 : : /*
3633 : : * Try to report backend fork() failure to client before we close the
3634 : : * connection. Since we do not care to risk blocking the postmaster on
3635 : : * this connection, we set the connection to non-blocking and try only once.
3636 : : *
3637 : : * This is grungy special-purpose code; we cannot use backend libpq since
3638 : : * it's not up and running.
3639 : : */
3640 : : static void
33 heikki.linnakangas@i 3641 :UNC 0 : report_fork_failure_to_client(ClientSocket *client_sock, int errnum)
3642 : : {
3643 : : char buffer[1000];
3644 : : int rc;
3645 : :
3646 : : /* Format the error message packet (always V2 protocol) */
8134 tgl@sss.pgh.pa.us 3647 :UBC 0 : snprintf(buffer, sizeof(buffer), "E%s%s\n",
3648 : : _("could not fork new process for connection: "),
3649 : : strerror(errnum));
3650 : :
3651 : : /* Set port to non-blocking. Don't do send() if this fails */
33 heikki.linnakangas@i 3652 [ # # ]:UNC 0 : if (!pg_set_noblock(client_sock->sock))
8134 tgl@sss.pgh.pa.us 3653 :UBC 0 : return;
3654 : :
3655 : : /* We'll retry after EINTR, but ignore all other failures */
3656 : : do
3657 : : {
33 heikki.linnakangas@i 3658 :UNC 0 : rc = send(client_sock->sock, buffer, strlen(buffer) + 1, 0);
6482 tgl@sss.pgh.pa.us 3659 [ # # # # ]:UBC 0 : } while (rc < 0 && errno == EINTR);
3660 : : }
3661 : :
3662 : : /*
3663 : : * ExitPostmaster -- cleanup
3664 : : *
3665 : : * Do NOT call exit() directly --- always go through here!
3666 : : */
3667 : : static void
10141 scrappy@hub.org 3668 :CBC 730 : ExitPostmaster(int status)
3669 : : {
3670 : : #ifdef HAVE_PTHREAD_IS_THREADED_NP
3671 : :
3672 : : /*
3673 : : * There is no known cause for a postmaster to become multithreaded after
3674 : : * startup. Recheck to account for the possibility of unknown causes.
3675 : : * This message uses LOG level, because an unclean shutdown at this point
3676 : : * would usually not look much different from a clean shutdown.
3677 : : */
3678 : : if (pthread_is_threaded_np() != 0)
3679 : : ereport(LOG,
3680 : : (errcode(ERRCODE_INTERNAL_ERROR),
3681 : : errmsg_internal("postmaster became multithreaded"),
3682 : : errdetail("Please report this to <%s>.", PACKAGE_BUGREPORT)));
3683 : : #endif
3684 : :
3685 : : /* should cleanup shared memory and kill all backends */
3686 : :
3687 : : /*
3688 : : * Not sure of the semantics here. When the Postmaster dies, should the
3689 : : * backends all be killed? probably not.
3690 : : *
3691 : : * MUST -- vadim 05-10-1999
3692 : : */
3693 : :
9423 bruce@momjian.us 3694 : 730 : proc_exit(status);
3695 : : }
3696 : :
3697 : : /*
3698 : : * Handle pmsignal conditions representing requests from backends,
3699 : : * and check for promote and logrotate requests from pg_ctl.
3700 : : */
3701 : : static void
458 tmunro@postgresql.or 3702 : 91804 : process_pm_pmsignal(void)
3703 : : {
3704 : 91804 : pending_pm_pmsignal = false;
3705 : :
3706 [ + + ]: 91804 : ereport(DEBUG2,
3707 : : (errmsg_internal("postmaster received pmsignal signal")));
3708 : :
3709 : : /*
3710 : : * RECOVERY_STARTED and BEGIN_HOT_STANDBY signals are ignored in
3711 : : * unexpected states. If the startup process quickly starts up, completes
3712 : : * recovery, exits, we might process the death of the startup process
3713 : : * first. We don't want to go back to recovery in that case.
3714 : : */
5529 heikki.linnakangas@i 3715 [ + + ]: 91804 : if (CheckPostmasterSignal(PMSIGNAL_RECOVERY_STARTED) &&
4162 tgl@sss.pgh.pa.us 3716 [ + - + - ]: 232 : pmState == PM_STARTUP && Shutdown == NoShutdown)
3717 : : {
3718 : : /* WAL redo has started. We're out of reinitialization. */
5529 heikki.linnakangas@i 3719 : 232 : FatalError = false;
1693 tgl@sss.pgh.pa.us 3720 : 232 : AbortStartTime = 0;
3721 : :
3722 : : /*
3723 : : * Start the archiver if we're responsible for (re-)archiving received
3724 : : * files.
3725 : : */
3257 heikki.linnakangas@i 3726 [ - + ]: 232 : Assert(PgArchPID == 0);
3229 fujii@postgresql.org 3727 [ + + - + : 232 : if (XLogArchivingAlways())
+ + ]
41 heikki.linnakangas@i 3728 :GNC 3 : PgArchPID = StartChildProcess(B_ARCHIVER);
3729 : :
3730 : : /*
3731 : : * If we aren't planning to enter hot standby mode later, treat
3732 : : * RECOVERY_STARTED as meaning we're out of startup, and report status
3733 : : * accordingly.
3734 : : */
3071 peter_e@gmx.net 3735 [ + + ]:CBC 232 : if (!EnableHotStandby)
3736 : : {
2482 tgl@sss.pgh.pa.us 3737 : 2 : AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STANDBY);
3738 : : #ifdef USE_SYSTEMD
3071 peter_e@gmx.net 3739 : 2 : sd_notify(0, "READY=1");
3740 : : #endif
3741 : : }
3742 : :
5529 heikki.linnakangas@i 3743 : 232 : pmState = PM_RECOVERY;
3744 : : }
3745 : :
5083 rhaas@postgresql.org 3746 [ + + ]: 91804 : if (CheckPostmasterSignal(PMSIGNAL_BEGIN_HOT_STANDBY) &&
4162 tgl@sss.pgh.pa.us 3747 [ + - + - ]: 138 : pmState == PM_RECOVERY && Shutdown == NoShutdown)
3748 : : {
5230 simon@2ndQuadrant.co 3749 [ + - ]: 138 : ereport(LOG,
3750 : : (errmsg("database system is ready to accept read-only connections")));
3751 : :
3752 : : /* Report status */
2482 tgl@sss.pgh.pa.us 3753 : 138 : AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_READY);
3754 : : #ifdef USE_SYSTEMD
3071 peter_e@gmx.net 3755 : 138 : sd_notify(0, "READY=1");
3756 : : #endif
3757 : :
5083 rhaas@postgresql.org 3758 : 138 : pmState = PM_HOT_STANDBY;
739 sfrost@snowman.net 3759 : 138 : connsAllowed = true;
3760 : :
3761 : : /* Some workers may be scheduled to start now */
3480 rhaas@postgresql.org 3762 : 138 : StartWorkerNeeded = true;
3763 : : }
3764 : :
3765 : : /* Process background worker state changes. */
1207 tgl@sss.pgh.pa.us 3766 [ + + ]: 91804 : if (CheckPostmasterSignal(PMSIGNAL_BACKGROUND_WORKER_CHANGE))
3767 : : {
3768 : : /* Accept new worker requests only if not stopping. */
3769 : 1028 : BackgroundWorkerStateChange(pmState < PM_STOP_BACKENDS);
3770 : 1028 : StartWorkerNeeded = true;
3771 : : }
3772 : :
3480 rhaas@postgresql.org 3773 [ + + - + ]: 91804 : if (StartWorkerNeeded || HaveCrashedWorker)
2545 tgl@sss.pgh.pa.us 3774 : 1166 : maybe_start_bgworkers();
3775 : :
3776 : : /* Tell syslogger to rotate logfile if requested */
2052 akorotkov@postgresql 3777 [ + + ]: 91804 : if (SysLoggerPID != 0)
3778 : : {
3779 [ + - ]: 1 : if (CheckLogrotateSignal())
3780 : : {
3781 : 1 : signal_child(SysLoggerPID, SIGUSR1);
3782 : 1 : RemoveLogrotateSignalFiles();
3783 : : }
2052 akorotkov@postgresql 3784 [ # # ]:UBC 0 : else if (CheckPostmasterSignal(PMSIGNAL_ROTATE_LOGFILE))
3785 : : {
3786 : 0 : signal_child(SysLoggerPID, SIGUSR1);
3787 : : }
3788 : : }
3789 : :
4162 tgl@sss.pgh.pa.us 3790 [ + + ]:CBC 91804 : if (CheckPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER) &&
1339 tgl@sss.pgh.pa.us 3791 [ + - + - ]:GBC 88367 : Shutdown <= SmartShutdown && pmState < PM_STOP_BACKENDS)
3792 : : {
3793 : : /*
3794 : : * Start one iteration of the autovacuum daemon, even if autovacuuming
3795 : : * is nominally not enabled. This is so we can have an active defense
3796 : : * against transaction ID wraparound. We set a flag for the main loop
3797 : : * to do it rather than trying to do it here --- this is because the
3798 : : * autovac process itself may send the signal, and we want to handle
3799 : : * that by launching another iteration as soon as the current one
3800 : : * completes.
3801 : : */
6268 alvherre@alvh.no-ip. 3802 : 88367 : start_autovac_launcher = true;
3803 : : }
3804 : :
4162 tgl@sss.pgh.pa.us 3805 [ + + ]:CBC 91804 : if (CheckPostmasterSignal(PMSIGNAL_START_AUTOVAC_WORKER) &&
1339 3806 [ + - + - ]: 750 : Shutdown <= SmartShutdown && pmState < PM_STOP_BACKENDS)
3807 : : {
3808 : : /* The autovacuum launcher wants us to start a worker process. */
6268 alvherre@alvh.no-ip. 3809 : 750 : StartAutovacuumWorker();
3810 : : }
3811 : :
2484 tgl@sss.pgh.pa.us 3812 [ + + ]: 91804 : if (CheckPostmasterSignal(PMSIGNAL_START_WALRECEIVER))
3813 : : {
3814 : : /* Startup Process wants us to start the walreceiver process. */
3815 : : /* Start immediately if possible, else remember request for later. */
3816 : 321 : WalReceiverRequested = true;
3817 : 321 : MaybeStartWalReceiver();
3818 : : }
3819 : :
3820 : : /*
3821 : : * Try to advance postmaster's state machine, if a child requests it.
3822 : : *
3823 : : * Be careful about the order of this action relative to this function's
3824 : : * other actions. Generally, this should be after other actions, in case
3825 : : * they have effects PostmasterStateMachine would need to know about.
3826 : : * However, we should do it before the CheckPromoteSignal step, which
3827 : : * cannot have any (immediate) effect on the state machine, but does
3828 : : * depend on what state we're in now.
3829 : : */
1817 3830 [ + + ]: 91804 : if (CheckPostmasterSignal(PMSIGNAL_ADVANCE_STATE_MACHINE))
3831 : : {
4760 rhaas@postgresql.org 3832 : 1036 : PostmasterStateMachine();
3833 : : }
3834 : :
1817 tgl@sss.pgh.pa.us 3835 [ + + ]: 91804 : if (StartupPID != 0 &&
4807 rhaas@postgresql.org 3836 [ + + + + ]: 701 : (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
1339 tgl@sss.pgh.pa.us 3837 [ + - + + ]: 1209 : pmState == PM_HOT_STANDBY) &&
1817 3838 : 701 : CheckPromoteSignal())
3839 : : {
3840 : : /*
3841 : : * Tell startup process to finish recovery.
3842 : : *
3843 : : * Leave the promote signal file in place and let the Startup process
3844 : : * do the unlink.
3845 : : */
4807 rhaas@postgresql.org 3846 : 39 : signal_child(StartupPID, SIGUSR2);
3847 : : }
8433 tgl@sss.pgh.pa.us 3848 : 91804 : }
3849 : :
3850 : : /*
3851 : : * Dummy signal handler
3852 : : *
3853 : : * We use this for signals that we don't actually use in the postmaster,
3854 : : * but we do use in backends. If we were to SIG_IGN such signals in the
3855 : : * postmaster, then a newly started backend might drop a signal that arrives
3856 : : * before it's able to reconfigure its signal processing. (See notes in
3857 : : * tcop/postgres.c.)
3858 : : */
3859 : : static void
8197 tgl@sss.pgh.pa.us 3860 :UBC 0 : dummy_handler(SIGNAL_ARGS)
3861 : : {
3862 : 0 : }
3863 : :
3864 : : /*
3865 : : * Generate a random cancel key.
3866 : : */
3867 : : static bool
2687 heikki.linnakangas@i 3868 :CBC 14748 : RandomCancelKey(int32 *cancel_key)
3869 : : {
1930 michael@paquier.xyz 3870 : 14748 : return pg_strong_random(cancel_key, sizeof(int32));
3871 : : }
3872 : :
3873 : : /*
3874 : : * Count up number of child processes of specified types (dead_end children
3875 : : * are always excluded).
3876 : : */
3877 : : static int
5203 heikki.linnakangas@i 3878 : 19447 : CountChildren(int target)
3879 : : {
3880 : : dlist_iter iter;
9206 tgl@sss.pgh.pa.us 3881 : 19447 : int cnt = 0;
3882 : :
4198 alvherre@alvh.no-ip. 3883 [ + - + + ]: 73111 : dlist_foreach(iter, &BackendList)
3884 : : {
3885 : 53664 : Backend *bp = dlist_container(Backend, elem, iter.cur);
3886 : :
5203 heikki.linnakangas@i 3887 [ + + ]: 53664 : if (bp->dead_end)
3888 : 109 : continue;
3889 : :
3890 : : /*
3891 : : * Since target == BACKEND_TYPE_ALL is the most common case, we test
3892 : : * it first and avoid touching shared memory for every child.
3893 : : */
4832 rhaas@postgresql.org 3894 [ + + ]: 53555 : if (target != BACKEND_TYPE_ALL)
3895 : : {
3896 : : /*
3897 : : * Assign bkend_type for any recently announced WAL Sender
3898 : : * processes.
3899 : : */
4147 alvherre@alvh.no-ip. 3900 [ + + + + ]: 4693 : if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
3901 : 1027 : IsPostmasterChildWalSender(bp->child_slot))
3902 : 49 : bp->bkend_type = BACKEND_TYPE_WALSND;
3903 : :
3904 [ + + ]: 3666 : if (!(target & bp->bkend_type))
4832 rhaas@postgresql.org 3905 : 344 : continue;
3906 : : }
3907 : :
5203 heikki.linnakangas@i 3908 : 53211 : cnt++;
3909 : : }
9206 tgl@sss.pgh.pa.us 3910 : 19447 : return cnt;
3911 : : }
3912 : :
3913 : :
3914 : : /*
3915 : : * StartChildProcess -- start an auxiliary process for the postmaster
3916 : : *
3917 : : * "type" determines what kind of child will be started. All child types
3918 : : * initially go to AuxiliaryProcessMain, which will handle common setup.
3919 : : *
3920 : : * Return value of StartChildProcess is subprocess' PID, or 0 if failed
3921 : : * to start subprocess.
3922 : : */
3923 : : static pid_t
41 heikki.linnakangas@i 3924 :GNC 5835 : StartChildProcess(BackendType type)
3925 : : {
3926 : : pid_t pid;
3927 : :
27 3928 : 5835 : pid = postmaster_child_launch(type, NULL, 0, NULL);
8957 vadim4o@yahoo.com 3929 [ - + ]:CBC 5835 : if (pid < 0)
3930 : : {
3931 : : /* in parent, fork failed */
27 heikki.linnakangas@i 3932 [ # # ]:UNC 0 : ereport(LOG,
3933 : : (errmsg("could not fork \"%s\" process: %m", PostmasterChildName(type))));
3934 : :
3935 : : /*
3936 : : * fork failure is fatal during startup, but there's no need to choke
3937 : : * immediately if starting other child types fails.
3938 : : */
41 3939 [ # # ]: 0 : if (type == B_STARTUP)
7260 tgl@sss.pgh.pa.us 3940 :UBC 0 : ExitPostmaster(1);
3941 : 0 : return 0;
3942 : : }
3943 : :
3944 : : /*
3945 : : * in parent, successful fork
3946 : : */
8432 tgl@sss.pgh.pa.us 3947 :CBC 5835 : return pid;
3948 : : }
3949 : :
3950 : : /*
3951 : : * StartAutovacuumWorker
3952 : : * Start an autovac worker process.
3953 : : *
3954 : : * This function is here because it enters the resulting PID into the
3955 : : * postmaster's private backends list.
3956 : : *
3957 : : * NB -- this code very roughly matches BackendStartup.
3958 : : */
3959 : : static void
6268 alvherre@alvh.no-ip. 3960 : 750 : StartAutovacuumWorker(void)
3961 : : {
3962 : : Backend *bn;
3963 : :
3964 : : /*
3965 : : * If not in condition to run a process, don't try, but handle it like a
3966 : : * fork failure. This does not normally happen, since the signal is only
3967 : : * supposed to be sent by autovacuum launcher when it's OK to do it, but
3968 : : * we have to check to avoid race-condition problems during DB state
3969 : : * changes.
3970 : : */
1651 tgl@sss.pgh.pa.us 3971 [ + - ]: 750 : if (canAcceptConnections(BACKEND_TYPE_AUTOVAC) == CAC_OK)
3972 : : {
3973 : : /*
3974 : : * Compute the cancel key that will be assigned to this session. We
3975 : : * probably don't need cancel keys for autovac workers, but we'd
3976 : : * better have something random in the field to prevent unfriendly
3977 : : * people from sending cancels to them.
3978 : : */
2687 heikki.linnakangas@i 3979 [ - + ]: 750 : if (!RandomCancelKey(&MyCancelKey))
3980 : : {
2687 heikki.linnakangas@i 3981 [ # # ]:UBC 0 : ereport(LOG,
3982 : : (errcode(ERRCODE_INTERNAL_ERROR),
3983 : : errmsg("could not generate random cancel key")));
3984 : 0 : return;
3985 : : }
3986 : :
188 heikki.linnakangas@i 3987 :GNC 750 : bn = (Backend *) palloc_extended(sizeof(Backend), MCXT_ALLOC_NO_OOM);
6093 tgl@sss.pgh.pa.us 3988 [ + - ]:CBC 750 : if (bn)
3989 : : {
2735 heikki.linnakangas@i 3990 : 750 : bn->cancel_key = MyCancelKey;
3991 : :
3992 : : /* Autovac workers are not dead_end and need a child slot */
3993 : 750 : bn->dead_end = false;
3994 : 750 : bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot();
3995 : 750 : bn->bgworker_notify = false;
3996 : :
27 heikki.linnakangas@i 3997 :GNC 750 : bn->pid = StartChildProcess(B_AUTOVAC_WORKER);
2735 heikki.linnakangas@i 3998 [ + - ]:CBC 750 : if (bn->pid > 0)
3999 : : {
4000 : 750 : bn->bkend_type = BACKEND_TYPE_AUTOVAC;
4001 : 750 : dlist_push_head(&BackendList, &bn->elem);
4002 : : #ifdef EXEC_BACKEND
4003 : : ShmemBackendArrayAdd(bn);
4004 : : #endif
4005 : : /* all OK */
4006 : 750 : return;
4007 : : }
4008 : :
4009 : : /*
4010 : : * fork failed, fall through to report -- actual error message was
4011 : : * logged by StartChildProcess
4012 : : */
2735 heikki.linnakangas@i 4013 :UBC 0 : (void) ReleasePostmasterChildSlot(bn->child_slot);
188 heikki.linnakangas@i 4014 :UNC 0 : pfree(bn);
4015 : : }
4016 : : else
6093 tgl@sss.pgh.pa.us 4017 [ # # ]:UBC 0 : ereport(LOG,
4018 : : (errcode(ERRCODE_OUT_OF_MEMORY),
4019 : : errmsg("out of memory")));
4020 : : }
4021 : :
4022 : : /*
4023 : : * Report the failure to the launcher, if it's running. (If it's not, we
4024 : : * might not even be connected to shared memory, so don't try to call
4025 : : * AutoVacWorkerFailed.) Note that we also need to signal it so that it
4026 : : * responds to the condition, but we don't do that here, instead waiting
4027 : : * for ServerLoop to do it. This way we avoid a ping-pong signaling in
4028 : : * quick succession between the autovac launcher and postmaster in case
4029 : : * things get ugly.
4030 : : */
6138 alvherre@alvh.no-ip. 4031 [ # # ]: 0 : if (AutoVacPID != 0)
4032 : : {
6093 tgl@sss.pgh.pa.us 4033 : 0 : AutoVacWorkerFailed();
5347 alvherre@alvh.no-ip. 4034 : 0 : avlauncher_needs_signal = true;
4035 : : }
4036 : : }
4037 : :
4038 : : /*
4039 : : * MaybeStartWalReceiver
4040 : : * Start the WAL receiver process, if not running and our state allows.
4041 : : *
4042 : : * Note: if WalReceiverPID is already nonzero, it might seem that we should
4043 : : * clear WalReceiverRequested. However, there's a race condition if the
4044 : : * walreceiver terminates and the startup process immediately requests a new
4045 : : * one: it's quite possible to get the signal for the request before reaping
4046 : : * the dead walreceiver process. Better to risk launching an extra
4047 : : * walreceiver than to miss launching one we need. (The walreceiver code
4048 : : * has logic to recognize that it should go away if not needed.)
4049 : : */
4050 : : static void
2484 tgl@sss.pgh.pa.us 4051 :CBC 484 : MaybeStartWalReceiver(void)
4052 : : {
4053 [ + + ]: 484 : if (WalReceiverPID == 0 &&
4054 [ + + + + ]: 322 : (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
1339 4055 [ + + ]: 320 : pmState == PM_HOT_STANDBY) &&
4056 [ + - ]: 319 : Shutdown <= SmartShutdown)
4057 : : {
41 heikki.linnakangas@i 4058 :GNC 319 : WalReceiverPID = StartChildProcess(B_WAL_RECEIVER);
1817 tgl@sss.pgh.pa.us 4059 [ + - ]:CBC 319 : if (WalReceiverPID != 0)
4060 : 319 : WalReceiverRequested = false;
4061 : : /* else leave the flag set, so we'll try again later */
4062 : : }
2484 4063 : 484 : }
4064 : :
4065 : : /*
4066 : : * MaybeStartWalSummarizer
4067 : : * Start the WAL summarizer process, if not running and our state allows.
4068 : : */
4069 : : static void
116 rhaas@postgresql.org 4070 :GNC 123520 : MaybeStartWalSummarizer(void)
4071 : : {
4072 [ + + + + ]: 123520 : if (summarize_wal && WalSummarizerPID == 0 &&
4073 [ + + + + ]: 62 : (pmState == PM_RUN || pmState == PM_HOT_STANDBY) &&
4074 [ + - ]: 12 : Shutdown <= SmartShutdown)
41 heikki.linnakangas@i 4075 : 12 : WalSummarizerPID = StartChildProcess(B_WAL_SUMMARIZER);
116 rhaas@postgresql.org 4076 : 123520 : }
4077 : :
4078 : :
4079 : : /*
4080 : : * MaybeStartSlotSyncWorker
4081 : : * Start the slot sync worker, if not running and our state allows.
4082 : : *
4083 : : * We allow to start the slot sync worker when we are on a hot standby,
4084 : : * fast or immediate shutdown is not in progress, slot sync parameters
4085 : : * are configured correctly, and it is the first time of worker's launch,
4086 : : * or enough time has passed since the worker was launched last.
4087 : : */
4088 : : static void
52 akapila@postgresql.o 4089 : 123520 : MaybeStartSlotSyncWorker(void)
4090 : : {
4091 [ + + + + ]: 123520 : if (SlotSyncWorkerPID == 0 && pmState == PM_HOT_STANDBY &&
4092 [ + - + + : 1764 : Shutdown <= SmartShutdown && sync_replication_slots &&
+ + ]
4093 [ + + ]: 11 : ValidateSlotSyncParams(LOG) && SlotSyncWorkerCanRestart())
27 heikki.linnakangas@i 4094 : 4 : SlotSyncWorkerPID = StartChildProcess(B_SLOTSYNC_WORKER);
52 akapila@postgresql.o 4095 : 123520 : }
4096 : :
4097 : : /*
4098 : : * Create the opts file
4099 : : */
4100 : : static bool
7276 bruce@momjian.us 4101 :CBC 728 : CreateOptsFile(int argc, char *argv[], char *fullprogname)
4102 : : {
4103 : : FILE *fp;
4104 : : int i;
4105 : :
4106 : : #define OPTS_FILE "postmaster.opts"
4107 : :
6859 tgl@sss.pgh.pa.us 4108 [ - + ]: 728 : if ((fp = fopen(OPTS_FILE, "w")) == NULL)
4109 : : {
1227 peter@eisentraut.org 4110 [ # # ]:UBC 0 : ereport(LOG,
4111 : : (errcode_for_file_access(),
4112 : : errmsg("could not create file \"%s\": %m", OPTS_FILE)));
8680 peter_e@gmx.net 4113 : 0 : return false;
4114 : : }
4115 : :
8680 peter_e@gmx.net 4116 :CBC 728 : fprintf(fp, "%s", fullprogname);
4117 [ + + ]: 3612 : for (i = 1; i < argc; i++)
5771 bruce@momjian.us 4118 : 2884 : fprintf(fp, " \"%s\"", argv[i]);
8680 peter_e@gmx.net 4119 : 728 : fputs("\n", fp);
4120 : :
7384 tgl@sss.pgh.pa.us 4121 [ - + ]: 728 : if (fclose(fp))
4122 : : {
1227 peter@eisentraut.org 4123 [ # # ]:UBC 0 : ereport(LOG,
4124 : : (errcode_for_file_access(),
4125 : : errmsg("could not write file \"%s\": %m", OPTS_FILE)));
8680 peter_e@gmx.net 4126 : 0 : return false;
4127 : : }
4128 : :
8680 peter_e@gmx.net 4129 :CBC 728 : return true;
4130 : : }
4131 : :
4132 : :
4133 : : /*
4134 : : * MaxLivePostmasterChildren
4135 : : *
4136 : : * This reports the number of entries needed in per-child-process arrays
4137 : : * (the PMChildFlags array, and if EXEC_BACKEND the ShmemBackendArray).
4138 : : * These arrays include regular backends, autovac workers, walsenders
4139 : : * and background workers, but not special children nor dead_end children.
4140 : : * This allows the arrays to have a fixed maximum size, to wit the same
4141 : : * too-many-children limit enforced by canAcceptConnections(). The exact value
4142 : : * isn't too critical as long as it's more than MaxBackends.
4143 : : */
4144 : : int
5458 tgl@sss.pgh.pa.us 4145 : 18998 : MaxLivePostmasterChildren(void)
4146 : : {
4147 alvherre@alvh.no-ip. 4147 : 37996 : return 2 * (MaxConnections + autovacuum_max_workers + 1 +
1888 michael@paquier.xyz 4148 : 18998 : max_wal_senders + max_worker_processes);
4149 : : }
4150 : :
4151 : : /*
4152 : : * Connect background worker to a database.
4153 : : */
4154 : : void
2201 magnus@hagander.net 4155 : 621 : BackgroundWorkerInitializeConnection(const char *dbname, const char *username, uint32 flags)
4156 : : {
4147 alvherre@alvh.no-ip. 4157 : 621 : BackgroundWorker *worker = MyBgworkerEntry;
186 michael@paquier.xyz 4158 :GNC 621 : bits32 init_flags = 0; /* never honor session_preload_libraries */
4159 : :
4160 : : /* ignore datallowconn? */
4161 [ - + ]: 621 : if (flags & BGWORKER_BYPASS_ALLOWCONN)
186 michael@paquier.xyz 4162 :UNC 0 : init_flags |= INIT_PG_OVERRIDE_ALLOW_CONNS;
4163 : : /* ignore rolcanlogin? */
185 michael@paquier.xyz 4164 [ - + ]:GNC 621 : if (flags & BGWORKER_BYPASS_ROLELOGINCHECK)
185 michael@paquier.xyz 4165 :UNC 0 : init_flags |= INIT_PG_OVERRIDE_ROLE_LOGIN;
4166 : :
4167 : : /* XXX is this the right errcode? */
4147 alvherre@alvh.no-ip. 4168 [ - + ]:CBC 621 : if (!(worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION))
4147 alvherre@alvh.no-ip. 4169 [ # # ]:UBC 0 : ereport(FATAL,
4170 : : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
4171 : : errmsg("database connection requirement not indicated during registration")));
4172 : :
629 tgl@sss.pgh.pa.us 4173 :CBC 621 : InitPostgres(dbname, InvalidOid, /* database to connect to */
4174 : : username, InvalidOid, /* role to connect as */
4175 : : init_flags,
629 tgl@sss.pgh.pa.us 4176 :ECB (556) : NULL); /* no out_dbname */
4177 : :
4178 : : /* it had better not gotten out of "init" mode yet */
3359 rhaas@postgresql.org 4179 [ - + ]:CBC 621 : if (!IsInitProcessingMode())
3359 rhaas@postgresql.org 4180 [ # # ]:UBC 0 : ereport(ERROR,
4181 : : (errmsg("invalid processing mode in background worker")));
3359 rhaas@postgresql.org 4182 :CBC 621 : SetProcessingMode(NormalProcessing);
4183 : 621 : }
4184 : :
4185 : : /*
4186 : : * Connect background worker to a database using OIDs.
4187 : : */
4188 : : void
2201 magnus@hagander.net 4189 : 1781 : BackgroundWorkerInitializeConnectionByOid(Oid dboid, Oid useroid, uint32 flags)
4190 : : {
3359 rhaas@postgresql.org 4191 : 1781 : BackgroundWorker *worker = MyBgworkerEntry;
186 michael@paquier.xyz 4192 :GNC 1781 : bits32 init_flags = 0; /* never honor session_preload_libraries */
4193 : :
4194 : : /* ignore datallowconn? */
4195 [ + + ]: 1781 : if (flags & BGWORKER_BYPASS_ALLOWCONN)
4196 : 1 : init_flags |= INIT_PG_OVERRIDE_ALLOW_CONNS;
4197 : : /* ignore rolcanlogin? */
185 4198 [ + + ]: 1781 : if (flags & BGWORKER_BYPASS_ROLELOGINCHECK)
4199 : 1 : init_flags |= INIT_PG_OVERRIDE_ROLE_LOGIN;
4200 : :
4201 : : /* XXX is this the right errcode? */
3359 rhaas@postgresql.org 4202 [ - + ]:CBC 1781 : if (!(worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION))
3359 rhaas@postgresql.org 4203 [ # # ]:UBC 0 : ereport(FATAL,
4204 : : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
4205 : : errmsg("database connection requirement not indicated during registration")));
4206 : :
629 tgl@sss.pgh.pa.us 4207 :CBC 1781 : InitPostgres(NULL, dboid, /* database to connect to */
4208 : : NULL, useroid, /* role to connect as */
4209 : : init_flags,
629 tgl@sss.pgh.pa.us 4210 :ECB (1643) : NULL); /* no out_dbname */
4211 : :
4212 : : /* it had better not gotten out of "init" mode yet */
4147 alvherre@alvh.no-ip. 4213 [ - + ]:CBC 1774 : if (!IsInitProcessingMode())
4147 alvherre@alvh.no-ip. 4214 [ # # ]:UBC 0 : ereport(ERROR,
4215 : : (errmsg("invalid processing mode in background worker")));
4147 alvherre@alvh.no-ip. 4216 :CBC 1774 : SetProcessingMode(NormalProcessing);
4217 : 1774 : }
4218 : :
4219 : : /*
4220 : : * Block/unblock signals in a background worker
4221 : : */
4222 : : void
4147 alvherre@alvh.no-ip. 4223 :UBC 0 : BackgroundWorkerBlockSignals(void)
4224 : : {
436 tmunro@postgresql.or 4225 : 0 : sigprocmask(SIG_SETMASK, &BlockSig, NULL);
4147 alvherre@alvh.no-ip. 4226 : 0 : }
4227 : :
4228 : : void
4147 alvherre@alvh.no-ip. 4229 :CBC 2569 : BackgroundWorkerUnblockSignals(void)
4230 : : {
436 tmunro@postgresql.or 4231 : 2569 : sigprocmask(SIG_SETMASK, &UnBlockSig, NULL);
4147 alvherre@alvh.no-ip. 4232 : 2569 : }
4233 : :
4234 : : /*
4235 : : * Start a new bgworker.
4236 : : * Starting time conditions must have been checked already.
4237 : : *
4238 : : * Returns true on success, false on failure.
4239 : : * In either case, update the RegisteredBgWorker's state appropriately.
4240 : : *
4241 : : * This code is heavily based on autovacuum.c, q.v.
4242 : : */
4243 : : static bool
3894 rhaas@postgresql.org 4244 : 2412 : do_start_bgworker(RegisteredBgWorker *rw)
4245 : : {
4246 : : pid_t worker_pid;
4247 : :
2547 tgl@sss.pgh.pa.us 4248 [ - + ]: 2412 : Assert(rw->rw_pid == 0);
4249 : :
4250 : : /*
4251 : : * Allocate and assign the Backend element. Note we must do this before
4252 : : * forking, so that we can handle failures (out of memory or child-process
4253 : : * slots) cleanly.
4254 : : *
4255 : : * Treat failure as though the worker had crashed. That way, the
4256 : : * postmaster will wait a bit before attempting to start it again; if we
4257 : : * tried again right away, most likely we'd find ourselves hitting the
4258 : : * same resource-exhaustion condition.
4259 : : */
4260 [ - + ]: 2412 : if (!assign_backendlist_entry(rw))
4261 : : {
2547 tgl@sss.pgh.pa.us 4262 :UBC 0 : rw->rw_crashed_at = GetCurrentTimestamp();
4263 : 0 : return false;
4264 : : }
4265 : :
3215 rhaas@postgresql.org 4266 [ + + ]:CBC 2412 : ereport(DEBUG1,
4267 : : (errmsg_internal("starting background worker process \"%s\"",
4268 : : rw->rw_worker.bgw_name)));
4269 : :
27 heikki.linnakangas@i 4270 :GNC 2412 : worker_pid = postmaster_child_launch(B_BG_WORKER, (char *) &rw->rw_worker, sizeof(BackgroundWorker), NULL);
4271 [ - + ]: 2412 : if (worker_pid == -1)
4272 : : {
4273 : : /* in postmaster, fork failed ... */
27 heikki.linnakangas@i 4274 [ # # ]:UNC 0 : ereport(LOG,
4275 : : (errmsg("could not fork background worker process: %m")));
4276 : : /* undo what assign_backendlist_entry did */
4277 : 0 : ReleasePostmasterChildSlot(rw->rw_child_slot);
4278 : 0 : rw->rw_child_slot = 0;
4279 : 0 : pfree(rw->rw_backend);
4280 : 0 : rw->rw_backend = NULL;
4281 : : /* mark entry as crashed, so we'll try again later */
4282 : 0 : rw->rw_crashed_at = GetCurrentTimestamp();
4283 : 0 : return false;
4284 : : }
4285 : :
4286 : : /* in postmaster, fork successful ... */
27 heikki.linnakangas@i 4287 :GNC 2412 : rw->rw_pid = worker_pid;
4288 : 2412 : rw->rw_backend->pid = rw->rw_pid;
4289 : 2412 : ReportBackgroundWorkerPID(rw);
4290 : : /* add new worker to lists of backends */
4291 : 2412 : dlist_push_head(&BackendList, &rw->rw_backend->elem);
4292 : : #ifdef EXEC_BACKEND
4293 : : ShmemBackendArrayAdd(rw->rw_backend);
4294 : : #endif
4295 : 2412 : return true;
4296 : : }
4297 : :
4298 : : /*
4299 : : * Does the current postmaster state require starting a worker with the
4300 : : * specified start_time?
4301 : : */
4302 : : static bool
4147 alvherre@alvh.no-ip. 4303 :CBC 3259 : bgworker_should_start_now(BgWorkerStartTime start_time)
4304 : : {
4305 [ - + + + : 3259 : switch (pmState)
- ]
4306 : : {
4147 alvherre@alvh.no-ip. 4307 :UBC 0 : case PM_NO_CHILDREN:
4308 : : case PM_WAIT_DEAD_END:
4309 : : case PM_SHUTDOWN_2:
4310 : : case PM_SHUTDOWN:
4311 : : case PM_WAIT_BACKENDS:
4312 : : case PM_STOP_BACKENDS:
4313 : 0 : break;
4314 : :
4147 alvherre@alvh.no-ip. 4315 :CBC 2412 : case PM_RUN:
4316 [ + + ]: 2412 : if (start_time == BgWorkerStart_RecoveryFinished)
4317 : 1080 : return true;
4318 : : /* fall through */
4319 : :
4320 : : case PM_HOT_STANDBY:
4321 [ + + ]: 1470 : if (start_time == BgWorkerStart_ConsistentState)
4322 : 1332 : return true;
4323 : : /* fall through */
4324 : :
4325 : : case PM_RECOVERY:
4326 : : case PM_STARTUP:
4327 : : case PM_INIT:
4328 [ - + ]: 847 : if (start_time == BgWorkerStart_PostmasterStart)
4147 alvherre@alvh.no-ip. 4329 :UBC 0 : return true;
4330 : : /* fall through */
4331 : : }
4332 : :
4147 alvherre@alvh.no-ip. 4333 :CBC 847 : return false;
4334 : : }
4335 : :
4336 : : /*
4337 : : * Allocate the Backend struct for a connected background worker, but don't
4338 : : * add it to the list of backends just yet.
4339 : : *
4340 : : * On failure, return false without changing any worker state.
4341 : : *
4342 : : * Some info from the Backend is copied into the passed rw.
4343 : : */
4344 : : static bool
4345 : 2412 : assign_backendlist_entry(RegisteredBgWorker *rw)
4346 : : {
4347 : : Backend *bn;
4348 : :
4349 : : /*
4350 : : * Check that database state allows another connection. Currently the
4351 : : * only possible failure is CAC_TOOMANY, so we just log an error message
4352 : : * based on that rather than checking the error code precisely.
4353 : : */
1651 tgl@sss.pgh.pa.us 4354 [ - + ]: 2412 : if (canAcceptConnections(BACKEND_TYPE_BGWORKER) != CAC_OK)
4355 : : {
1651 tgl@sss.pgh.pa.us 4356 [ # # ]:UBC 0 : ereport(LOG,
4357 : : (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
4358 : : errmsg("no slot available for new background worker process")));
4359 : 0 : return false;
4360 : : }
4361 : :
4362 : : /*
4363 : : * Compute the cancel key that will be assigned to this session. We
4364 : : * probably don't need cancel keys for background workers, but we'd better
4365 : : * have something random in the field to prevent unfriendly people from
4366 : : * sending cancels to them.
4367 : : */
2687 heikki.linnakangas@i 4368 [ - + ]:CBC 2412 : if (!RandomCancelKey(&MyCancelKey))
4369 : : {
2687 heikki.linnakangas@i 4370 [ # # ]:UBC 0 : ereport(LOG,
4371 : : (errcode(ERRCODE_INTERNAL_ERROR),
4372 : : errmsg("could not generate random cancel key")));
4373 : 0 : return false;
4374 : : }
4375 : :
188 heikki.linnakangas@i 4376 :GNC 2412 : bn = palloc_extended(sizeof(Backend), MCXT_ALLOC_NO_OOM);
4147 alvherre@alvh.no-ip. 4377 [ - + ]:CBC 2412 : if (bn == NULL)
4378 : : {
4147 alvherre@alvh.no-ip. 4379 [ # # ]:UBC 0 : ereport(LOG,
4380 : : (errcode(ERRCODE_OUT_OF_MEMORY),
4381 : : errmsg("out of memory")));
4382 : 0 : return false;
4383 : : }
4384 : :
4147 alvherre@alvh.no-ip. 4385 :CBC 2412 : bn->cancel_key = MyCancelKey;
4386 : 2412 : bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot();
4387 : 2412 : bn->bkend_type = BACKEND_TYPE_BGWORKER;
4388 : 2412 : bn->dead_end = false;
3882 rhaas@postgresql.org 4389 : 2412 : bn->bgworker_notify = false;
4390 : :
4147 alvherre@alvh.no-ip. 4391 : 2412 : rw->rw_backend = bn;
4392 : 2412 : rw->rw_child_slot = bn->child_slot;
4393 : :
4394 : 2412 : return true;
4395 : : }
4396 : :
4397 : : /*
4398 : : * If the time is right, start background worker(s).
4399 : : *
4400 : : * As a side effect, the bgworker control variables are set or reset
4401 : : * depending on whether more workers may need to be started.
4402 : : *
4403 : : * We limit the number of workers started per call, to avoid consuming the
4404 : : * postmaster's attention for too long when many such requests are pending.
4405 : : * As long as StartWorkerNeeded is true, ServerLoop will not block and will
4406 : : * call this function again after dealing with any other issues.
4407 : : */
4408 : : static void
2545 tgl@sss.pgh.pa.us 4409 : 5736 : maybe_start_bgworkers(void)
4410 : : {
4411 : : #define MAX_BGWORKERS_TO_LAUNCH 100
4412 : 5736 : int num_launched = 0;
4147 alvherre@alvh.no-ip. 4413 : 5736 : TimestampTz now = 0;
4414 : : slist_mutable_iter iter;
4415 : :
4416 : : /*
4417 : : * During crash recovery, we have no need to be called until the state
4418 : : * transition out of recovery.
4419 : : */
4420 [ + + ]: 5736 : if (FatalError)
4421 : : {
4422 : 5 : StartWorkerNeeded = false;
4423 : 5 : HaveCrashedWorker = false;
2547 tgl@sss.pgh.pa.us 4424 : 5 : return;
4425 : : }
4426 : :
4427 : : /* Don't need to be called again unless we find a reason for it below */
4428 : 5731 : StartWorkerNeeded = false;
4147 alvherre@alvh.no-ip. 4429 : 5731 : HaveCrashedWorker = false;
4430 : :
3925 rhaas@postgresql.org 4431 [ + + + + : 15681 : slist_foreach_modify(iter, &BackgroundWorkerList)
+ + ]
4432 : : {
4433 : : RegisteredBgWorker *rw;
4434 : :
4147 alvherre@alvh.no-ip. 4435 : 9950 : rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
4436 : :
4437 : : /* ignore if already running */
4438 [ + + ]: 9950 : if (rw->rw_pid != 0)
4439 : 5131 : continue;
4440 : :
4441 : : /* if marked for death, clean up and remove from list */
3831 rhaas@postgresql.org 4442 [ - + ]: 4819 : if (rw->rw_terminate)
4443 : : {
3831 rhaas@postgresql.org 4444 :UBC 0 : ForgetBackgroundWorker(&iter);
4445 : 0 : continue;
4446 : : }
4447 : :
4448 : : /*
4449 : : * If this worker has crashed previously, maybe it needs to be
4450 : : * restarted (unless on registration it specified it doesn't want to
4451 : : * be restarted at all). Check how long ago did a crash last happen.
4452 : : * If the last crash is too recent, don't start it right away; let it
4453 : : * be restarted once enough time has passed.
4454 : : */
4147 alvherre@alvh.no-ip. 4455 [ + + ]:CBC 4819 : if (rw->rw_crashed_at != 0)
4456 : : {
4457 [ + + ]: 1560 : if (rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART)
3925 rhaas@postgresql.org 4458 : 18 : {
4459 : : int notify_pid;
4460 : :
2321 4461 : 18 : notify_pid = rw->rw_worker.bgw_notify_pid;
4462 : :
3917 tgl@sss.pgh.pa.us 4463 : 18 : ForgetBackgroundWorker(&iter);
4464 : :
4465 : : /* Report worker is gone now. */
2321 rhaas@postgresql.org 4466 [ + + ]: 18 : if (notify_pid != 0)
4467 : 12 : kill(notify_pid, SIGUSR1);
4468 : :
4147 alvherre@alvh.no-ip. 4469 : 18 : continue;
4470 : : }
4471 : :
4472 : : /* read system time only when needed */
4473 [ + - ]: 1542 : if (now == 0)
4474 : 1542 : now = GetCurrentTimestamp();
4475 : :
4476 [ + - ]: 1542 : if (!TimestampDifferenceExceeds(rw->rw_crashed_at, now,
2489 tgl@sss.pgh.pa.us 4477 : 1542 : rw->rw_worker.bgw_restart_time * 1000))
4478 : : {
4479 : : /* Set flag to remember that we have workers to start later */
4147 alvherre@alvh.no-ip. 4480 : 1542 : HaveCrashedWorker = true;
4481 : 1542 : continue;
4482 : : }
4483 : : }
4484 : :
4485 [ + + ]: 3259 : if (bgworker_should_start_now(rw->rw_worker.bgw_start_time))
4486 : : {
4487 : : /* reset crash time before trying to start worker */
4488 : 2412 : rw->rw_crashed_at = 0;
4489 : :
4490 : : /*
4491 : : * Try to start the worker.
4492 : : *
4493 : : * On failure, give up processing workers for now, but set
4494 : : * StartWorkerNeeded so we'll come back here on the next iteration
4495 : : * of ServerLoop to try again. (We don't want to wait, because
4496 : : * there might be additional ready-to-run workers.) We could set
4497 : : * HaveCrashedWorker as well, since this worker is now marked
4498 : : * crashed, but there's no need because the next run of this
4499 : : * function will do that.
4500 : : */
2547 tgl@sss.pgh.pa.us 4501 [ - + ]: 2412 : if (!do_start_bgworker(rw))
4502 : : {
2547 tgl@sss.pgh.pa.us 4503 :UBC 0 : StartWorkerNeeded = true;
3148 rhaas@postgresql.org 4504 : 0 : return;
4505 : : }
4506 : :
4507 : : /*
4508 : : * If we've launched as many workers as allowed, quit, but have
4509 : : * ServerLoop call us again to look for additional ready-to-run
4510 : : * workers. There might not be any, but we'll find out the next
4511 : : * time we run.
4512 : : */
2545 tgl@sss.pgh.pa.us 4513 [ - + ]:CBC 2412 : if (++num_launched >= MAX_BGWORKERS_TO_LAUNCH)
4514 : : {
2545 tgl@sss.pgh.pa.us 4515 :UBC 0 : StartWorkerNeeded = true;
4516 : 0 : return;
4517 : : }
4518 : : }
4519 : : }
4520 : : }
4521 : :
4522 : : /*
4523 : : * When a backend asks to be notified about worker state changes, we
4524 : : * set a flag in its backend entry. The background worker machinery needs
4525 : : * to know when such backends exit.
4526 : : */
4527 : : bool
3882 rhaas@postgresql.org 4528 :CBC 1792 : PostmasterMarkPIDForWorkerNotify(int pid)
4529 : : {
4530 : : dlist_iter iter;
4531 : : Backend *bp;
4532 : :
4533 [ + - + - ]: 3761 : dlist_foreach(iter, &BackendList)
4534 : : {
4535 : 3761 : bp = dlist_container(Backend, elem, iter.cur);
4536 [ + + ]: 3761 : if (bp->pid == pid)
4537 : : {
4538 : 1792 : bp->bgworker_notify = true;
4539 : 1792 : return true;
4540 : : }
4541 : : }
3882 rhaas@postgresql.org 4542 :UBC 0 : return false;
4543 : : }
4544 : :
4545 : : #ifdef EXEC_BACKEND
4546 : :
4547 : : Size
4548 : : ShmemBackendArraySize(void)
4549 : : {
4550 : : return mul_size(MaxLivePostmasterChildren(), sizeof(Backend));
4551 : : }
4552 : :
4553 : : void
4554 : : ShmemBackendArrayAllocation(void)
4555 : : {
4556 : : Size size = ShmemBackendArraySize();
4557 : :
4558 : : ShmemBackendArray = (Backend *) ShmemAlloc(size);
4559 : : /* Mark all slots as empty */
4560 : : memset(ShmemBackendArray, 0, size);
4561 : : }
4562 : :
4563 : : static void
4564 : : ShmemBackendArrayAdd(Backend *bn)
4565 : : {
4566 : : /* The array slot corresponding to my PMChildSlot should be free */
4567 : : int i = bn->child_slot - 1;
4568 : :
4569 : : Assert(ShmemBackendArray[i].pid == 0);
4570 : : ShmemBackendArray[i] = *bn;
4571 : : }
4572 : :
4573 : : static void
4574 : : ShmemBackendArrayRemove(Backend *bn)
4575 : : {
4576 : : int i = bn->child_slot - 1;
4577 : :
4578 : : Assert(ShmemBackendArray[i].pid == bn->pid);
4579 : : /* Mark the slot as empty */
4580 : : ShmemBackendArray[i].pid = 0;
4581 : : }
4582 : : #endif /* EXEC_BACKEND */
4583 : :
4584 : :
4585 : : #ifdef WIN32
4586 : :
4587 : : /*
4588 : : * Subset implementation of waitpid() for Windows. We assume pid is -1
4589 : : * (that is, check all child processes) and options is WNOHANG (don't wait).
4590 : : */
4591 : : static pid_t
4592 : : waitpid(pid_t pid, int *exitstatus, int options)
4593 : : {
4594 : : win32_deadchild_waitinfo *childinfo;
4595 : : DWORD exitcode;
4596 : : DWORD dwd;
4597 : : ULONG_PTR key;
4598 : : OVERLAPPED *ovl;
4599 : :
4600 : : /* Try to consume one win32_deadchild_waitinfo from the queue. */
4601 : : if (!GetQueuedCompletionStatus(win32ChildQueue, &dwd, &key, &ovl, 0))
4602 : : {
4603 : : errno = EAGAIN;
4604 : : return -1;
4605 : : }
4606 : :
4607 : : childinfo = (win32_deadchild_waitinfo *) key;
4608 : : pid = childinfo->procId;
4609 : :
4610 : : /*
4611 : : * Remove handle from wait - required even though it's set to wait only
4612 : : * once
4613 : : */
4614 : : UnregisterWaitEx(childinfo->waitHandle, NULL);
4615 : :
4616 : : if (!GetExitCodeProcess(childinfo->procHandle, &exitcode))
4617 : : {
4618 : : /*
4619 : : * Should never happen. Inform user and set a fixed exitcode.
4620 : : */
4621 : : write_stderr("could not read exit code for process\n");
4622 : : exitcode = 255;
4623 : : }
4624 : : *exitstatus = exitcode;
4625 : :
4626 : : /*
4627 : : * Close the process handle. Only after this point can the PID can be
4628 : : * recycled by the kernel.
4629 : : */
4630 : : CloseHandle(childinfo->procHandle);
4631 : :
4632 : : /*
4633 : : * Free struct that was allocated before the call to
4634 : : * RegisterWaitForSingleObject()
4635 : : */
4636 : : pfree(childinfo);
4637 : :
4638 : : return pid;
4639 : : }
4640 : :
4641 : : /*
4642 : : * Note! Code below executes on a thread pool! All operations must
4643 : : * be thread safe! Note that elog() and friends must *not* be used.
4644 : : */
4645 : : static void WINAPI
4646 : : pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired)
4647 : : {
4648 : : /* Should never happen, since we use INFINITE as timeout value. */
4649 : : if (TimerOrWaitFired)
4650 : : return;
4651 : :
4652 : : /*
4653 : : * Post the win32_deadchild_waitinfo object for waitpid() to deal with. If
4654 : : * that fails, we leak the object, but we also leak a whole process and
4655 : : * get into an unrecoverable state, so there's not much point in worrying
4656 : : * about that. We'd like to panic, but we can't use that infrastructure
4657 : : * from this thread.
4658 : : */
4659 : : if (!PostQueuedCompletionStatus(win32ChildQueue,
4660 : : 0,
4661 : : (ULONG_PTR) lpParameter,
4662 : : NULL))
4663 : : write_stderr("could not post child completion status\n");
4664 : :
4665 : : /* Queue SIGCHLD signal. */
4666 : : pg_queue_signal(SIGCHLD);
4667 : : }
4668 : :
4669 : : /*
4670 : : * Queue a waiter to signal when this child dies. The wait will be handled
4671 : : * automatically by an operating system thread pool. The memory and the
4672 : : * process handle will be freed by a later call to waitpid().
4673 : : */
4674 : : void
4675 : : pgwin32_register_deadchild_callback(HANDLE procHandle, DWORD procId)
4676 : : {
4677 : : win32_deadchild_waitinfo *childinfo;
4678 : :
4679 : : childinfo = palloc(sizeof(win32_deadchild_waitinfo));
4680 : : childinfo->procHandle = procHandle;
4681 : : childinfo->procId = procId;
4682 : :
4683 : : if (!RegisterWaitForSingleObject(&childinfo->waitHandle,
4684 : : procHandle,
4685 : : pgwin32_deadchild_callback,
4686 : : childinfo,
4687 : : INFINITE,
4688 : : WT_EXECUTEONLYONCE | WT_EXECUTEINWAITTHREAD))
4689 : : ereport(FATAL,
4690 : : (errmsg_internal("could not register process for wait: error code %lu",
4691 : : GetLastError())));
4692 : : }
4693 : :
4694 : : #endif /* WIN32 */
4695 : :
4696 : : /*
4697 : : * Initialize one and only handle for monitoring postmaster death.
4698 : : *
4699 : : * Called once in the postmaster, so that child processes can subsequently
4700 : : * monitor if their parent is dead.
4701 : : */
4702 : : static void
4664 heikki.linnakangas@i 4703 :CBC 728 : InitPostmasterDeathWatchHandle(void)
4704 : : {
4705 : : #ifndef WIN32
4706 : :
4707 : : /*
4708 : : * Create a pipe. Postmaster holds the write end of the pipe open
4709 : : * (POSTMASTER_FD_OWN), and children hold the read end. Children can pass
4710 : : * the read file descriptor to select() to wake up in case postmaster
4711 : : * dies, or check for postmaster death with a (read() == 0). Children must
4712 : : * close the write end as soon as possible after forking, because EOF
4713 : : * won't be signaled in the read end until all processes have closed the
4714 : : * write fd. That is taken care of in ClosePostmasterPorts().
4715 : : */
4716 [ - + ]: 728 : Assert(MyProcPid == PostmasterPid);
2550 tgl@sss.pgh.pa.us 4717 [ - + ]: 728 : if (pipe(postmaster_alive_fds) < 0)
4664 heikki.linnakangas@i 4718 [ # # ]:UBC 0 : ereport(FATAL,
4719 : : (errcode_for_file_access(),
4720 : : errmsg_internal("could not create pipe to monitor postmaster death: %m")));
4721 : :
4722 : : /* Notify fd.c that we've eaten two FDs for the pipe. */
1511 tgl@sss.pgh.pa.us 4723 :CBC 728 : ReserveExternalFD();
4724 : 728 : ReserveExternalFD();
4725 : :
4726 : : /*
4727 : : * Set O_NONBLOCK to allow testing for the fd's presence with a read()
4728 : : * call.
4729 : : */
2550 4730 [ - + ]: 728 : if (fcntl(postmaster_alive_fds[POSTMASTER_FD_WATCH], F_SETFL, O_NONBLOCK) == -1)
4664 heikki.linnakangas@i 4731 [ # # ]:UBC 0 : ereport(FATAL,
4732 : : (errcode_for_socket_access(),
4733 : : errmsg_internal("could not set postmaster death monitoring pipe to nonblocking mode: %m")));
4734 : : #else
4735 : :
4736 : : /*
4737 : : * On Windows, we use a process handle for the same purpose.
4738 : : */
4739 : : if (DuplicateHandle(GetCurrentProcess(),
4740 : : GetCurrentProcess(),
4741 : : GetCurrentProcess(),
4742 : : &PostmasterHandle,
4743 : : 0,
4744 : : TRUE,
4745 : : DUPLICATE_SAME_ACCESS) == 0)
4746 : : ereport(FATAL,
4747 : : (errmsg_internal("could not duplicate postmaster handle: error code %lu",
4748 : : GetLastError())));
4749 : : #endif /* WIN32 */
4664 heikki.linnakangas@i 4750 :CBC 728 : }
|