Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * latch.c
4 : * Routines for inter-process latches
5 : *
6 : * The poll() implementation uses the so-called self-pipe trick to overcome the
7 : * race condition involved with poll() and setting a global flag in the signal
8 : * handler. When a latch is set and the current process is waiting for it, the
9 : * signal handler wakes up the poll() in WaitLatch by writing a byte to a pipe.
10 : * A signal by itself doesn't interrupt poll() on all platforms, and even on
11 : * platforms where it does, a signal that arrives just before the poll() call
12 : * does not prevent poll() from entering sleep. An incoming byte on a pipe
13 : * however reliably interrupts the sleep, and causes poll() to return
14 : * immediately even if the signal arrives before poll() begins.
15 : *
16 : * The epoll() implementation overcomes the race with a different technique: it
17 : * keeps SIGURG blocked and consumes from a signalfd() descriptor instead. We
18 : * don't need to register a signal handler or create our own self-pipe. We
19 : * assume that any system that has Linux epoll() also has Linux signalfd().
20 : *
21 : * The kqueue() implementation waits for SIGURG with EVFILT_SIGNAL.
22 : *
23 : * The Windows implementation uses Windows events that are inherited by all
24 : * postmaster child processes. There's no need for the self-pipe trick there.
25 : *
26 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
27 : * Portions Copyright (c) 1994, Regents of the University of California
28 : *
29 : * IDENTIFICATION
30 : * src/backend/storage/ipc/latch.c
31 : *
32 : *-------------------------------------------------------------------------
33 : */
34 : #include "postgres.h"
35 :
36 : #include <fcntl.h>
37 : #include <limits.h>
38 : #include <signal.h>
39 : #include <unistd.h>
40 : #ifdef HAVE_SYS_EPOLL_H
41 : #include <sys/epoll.h>
42 : #endif
43 : #ifdef HAVE_SYS_EVENT_H
44 : #include <sys/event.h>
45 : #endif
46 : #ifdef HAVE_SYS_SIGNALFD_H
47 : #include <sys/signalfd.h>
48 : #endif
49 : #ifdef HAVE_POLL_H
50 : #include <poll.h>
51 : #endif
52 :
53 : #include "libpq/pqsignal.h"
54 : #include "miscadmin.h"
55 : #include "pgstat.h"
56 : #include "port/atomics.h"
57 : #include "portability/instr_time.h"
58 : #include "postmaster/postmaster.h"
59 : #include "storage/fd.h"
60 : #include "storage/ipc.h"
61 : #include "storage/latch.h"
62 : #include "storage/pmsignal.h"
63 : #include "storage/shmem.h"
64 : #include "utils/memutils.h"
65 :
66 : /*
67 : * Select the fd readiness primitive to use. Normally the "most modern"
68 : * primitive supported by the OS will be used, but for testing it can be
69 : * useful to manually specify the used primitive. If desired, just add a
70 : * define somewhere before this block.
71 : */
72 : #if defined(WAIT_USE_EPOLL) || defined(WAIT_USE_POLL) || \
73 : defined(WAIT_USE_KQUEUE) || defined(WAIT_USE_WIN32)
74 : /* don't overwrite manual choice */
75 : #elif defined(HAVE_SYS_EPOLL_H)
76 : #define WAIT_USE_EPOLL
77 : #elif defined(HAVE_KQUEUE)
78 : #define WAIT_USE_KQUEUE
79 : #elif defined(HAVE_POLL)
80 : #define WAIT_USE_POLL
81 : #elif WIN32
82 : #define WAIT_USE_WIN32
83 : #else
84 : #error "no wait set implementation available"
85 : #endif
86 :
87 : /*
88 : * By default, we use a self-pipe with poll() and a signalfd with epoll(), if
89 : * available. We avoid signalfd on illumos for now based on problem reports.
90 : * For testing the choice can also be manually specified.
91 : */
92 : #if defined(WAIT_USE_POLL) || defined(WAIT_USE_EPOLL)
93 : #if defined(WAIT_USE_SELF_PIPE) || defined(WAIT_USE_SIGNALFD)
94 : /* don't overwrite manual choice */
95 : #elif defined(WAIT_USE_EPOLL) && defined(HAVE_SYS_SIGNALFD_H) && \
96 : !defined(__illumos__)
97 : #define WAIT_USE_SIGNALFD
98 : #else
99 : #define WAIT_USE_SELF_PIPE
100 : #endif
101 : #endif
102 :
103 : /* typedef in latch.h */
104 : struct WaitEventSet
105 : {
106 : int nevents; /* number of registered events */
107 : int nevents_space; /* maximum number of events in this set */
108 :
109 : /*
110 : * Array, of nevents_space length, storing the definition of events this
111 : * set is waiting for.
112 : */
113 : WaitEvent *events;
114 :
115 : /*
116 : * If WL_LATCH_SET is specified in any wait event, latch is a pointer to
117 : * said latch, and latch_pos the offset in the ->events array. This is
118 : * useful because we check the state of the latch before performing doing
119 : * syscalls related to waiting.
120 : */
121 : Latch *latch;
122 : int latch_pos;
123 :
124 : /*
125 : * WL_EXIT_ON_PM_DEATH is converted to WL_POSTMASTER_DEATH, but this flag
126 : * is set so that we'll exit immediately if postmaster death is detected,
127 : * instead of returning.
128 : */
129 : bool exit_on_postmaster_death;
130 :
131 : #if defined(WAIT_USE_EPOLL)
132 : int epoll_fd;
133 : /* epoll_wait returns events in a user provided arrays, allocate once */
134 : struct epoll_event *epoll_ret_events;
135 : #elif defined(WAIT_USE_KQUEUE)
136 : int kqueue_fd;
137 : /* kevent returns events in a user provided arrays, allocate once */
138 : struct kevent *kqueue_ret_events;
139 : bool report_postmaster_not_running;
140 : #elif defined(WAIT_USE_POLL)
141 : /* poll expects events to be waited on every poll() call, prepare once */
142 : struct pollfd *pollfds;
143 : #elif defined(WAIT_USE_WIN32)
144 :
145 : /*
146 : * Array of windows events. The first element always contains
147 : * pgwin32_signal_event, so the remaining elements are offset by one (i.e.
148 : * event->pos + 1).
149 : */
150 : HANDLE *handles;
151 : #endif
152 : };
153 :
154 : /* A common WaitEventSet used to implement WatchLatch() */
155 : static WaitEventSet *LatchWaitSet;
156 :
157 : /* The position of the latch in LatchWaitSet. */
158 : #define LatchWaitSetLatchPos 0
159 :
160 : #ifndef WIN32
161 : /* Are we currently in WaitLatch? The signal handler would like to know. */
162 : static volatile sig_atomic_t waiting = false;
163 : #endif
164 :
165 : #ifdef WAIT_USE_SIGNALFD
166 : /* On Linux, we'll receive SIGURG via a signalfd file descriptor. */
167 : static int signal_fd = -1;
168 : #endif
169 :
170 : #ifdef WAIT_USE_SELF_PIPE
171 : /* Read and write ends of the self-pipe */
172 : static int selfpipe_readfd = -1;
173 : static int selfpipe_writefd = -1;
174 :
175 : /* Process owning the self-pipe --- needed for checking purposes */
176 : static int selfpipe_owner_pid = 0;
177 :
178 : /* Private function prototypes */
179 : static void latch_sigurg_handler(SIGNAL_ARGS);
180 : static void sendSelfPipeByte(void);
181 : #endif
182 :
183 : #if defined(WAIT_USE_SELF_PIPE) || defined(WAIT_USE_SIGNALFD)
184 : static void drain(void);
185 : #endif
186 :
187 : #if defined(WAIT_USE_EPOLL)
188 : static void WaitEventAdjustEpoll(WaitEventSet *set, WaitEvent *event, int action);
189 : #elif defined(WAIT_USE_KQUEUE)
190 : static void WaitEventAdjustKqueue(WaitEventSet *set, WaitEvent *event, int old_events);
191 : #elif defined(WAIT_USE_POLL)
192 : static void WaitEventAdjustPoll(WaitEventSet *set, WaitEvent *event);
193 : #elif defined(WAIT_USE_WIN32)
194 : static void WaitEventAdjustWin32(WaitEventSet *set, WaitEvent *event);
195 : #endif
196 :
197 : static inline int WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
198 : WaitEvent *occurred_events, int nevents);
199 :
200 : /*
201 : * Initialize the process-local latch infrastructure.
202 : *
203 : * This must be called once during startup of any process that can wait on
204 : * latches, before it issues any InitLatch() or OwnLatch() calls.
205 : */
206 : void
3829 tgl 207 CBC 14589 : InitializeLatchSupport(void)
208 : {
209 : #if defined(WAIT_USE_SELF_PIPE)
210 : int pipefd[2];
211 :
212 : if (IsUnderPostmaster)
213 : {
214 : /*
215 : * We might have inherited connections to a self-pipe created by the
216 : * postmaster. It's critical that child processes create their own
217 : * self-pipes, of course, and we really want them to close the
218 : * inherited FDs for safety's sake.
219 : */
220 : if (selfpipe_owner_pid != 0)
221 : {
222 : /* Assert we go through here but once in a child process */
223 : Assert(selfpipe_owner_pid != MyProcPid);
224 : /* Release postmaster's pipe FDs; ignore any error */
225 : (void) close(selfpipe_readfd);
226 : (void) close(selfpipe_writefd);
227 : /* Clean up, just for safety's sake; we'll set these below */
228 : selfpipe_readfd = selfpipe_writefd = -1;
229 : selfpipe_owner_pid = 0;
230 : /* Keep fd.c's accounting straight */
231 : ReleaseExternalFD();
232 : ReleaseExternalFD();
233 : }
234 : else
235 : {
236 : /*
237 : * Postmaster didn't create a self-pipe ... or else we're in an
238 : * EXEC_BACKEND build, in which case it doesn't matter since the
239 : * postmaster's pipe FDs were closed by the action of FD_CLOEXEC.
240 : * fd.c won't have state to clean up, either.
241 : */
242 : Assert(selfpipe_readfd == -1);
243 : }
244 : }
245 : else
246 : {
247 : /* In postmaster or standalone backend, assert we do this but once */
248 : Assert(selfpipe_readfd == -1);
249 : Assert(selfpipe_owner_pid == 0);
250 : }
251 :
252 : /*
253 : * Set up the self-pipe that allows a signal handler to wake up the
254 : * poll()/epoll_wait() in WaitLatch. Make the write-end non-blocking, so
255 : * that SetLatch won't block if the event has already been set many times
256 : * filling the kernel buffer. Make the read-end non-blocking too, so that
257 : * we can easily clear the pipe by reading until EAGAIN or EWOULDBLOCK.
258 : * Also, make both FDs close-on-exec, since we surely do not want any
259 : * child processes messing with them.
260 : */
261 : if (pipe(pipefd) < 0)
262 : elog(FATAL, "pipe() failed: %m");
263 : if (fcntl(pipefd[0], F_SETFL, O_NONBLOCK) == -1)
264 : elog(FATAL, "fcntl(F_SETFL) failed on read-end of self-pipe: %m");
265 : if (fcntl(pipefd[1], F_SETFL, O_NONBLOCK) == -1)
266 : elog(FATAL, "fcntl(F_SETFL) failed on write-end of self-pipe: %m");
267 : if (fcntl(pipefd[0], F_SETFD, FD_CLOEXEC) == -1)
268 : elog(FATAL, "fcntl(F_SETFD) failed on read-end of self-pipe: %m");
269 : if (fcntl(pipefd[1], F_SETFD, FD_CLOEXEC) == -1)
270 : elog(FATAL, "fcntl(F_SETFD) failed on write-end of self-pipe: %m");
271 :
272 : selfpipe_readfd = pipefd[0];
273 : selfpipe_writefd = pipefd[1];
274 : selfpipe_owner_pid = MyProcPid;
275 :
276 : /* Tell fd.c about these two long-lived FDs */
277 : ReserveExternalFD();
278 : ReserveExternalFD();
279 :
280 : pqsignal(SIGURG, latch_sigurg_handler);
281 : #endif
282 :
283 : #ifdef WAIT_USE_SIGNALFD
284 : sigset_t signalfd_mask;
285 :
107 tmunro 286 GNC 14589 : if (IsUnderPostmaster)
287 : {
288 : /*
289 : * It would probably be safe to re-use the inherited signalfd since
290 : * signalfds only see the current process's pending signals, but it
291 : * seems less surprising to close it and create our own.
292 : */
293 12732 : if (signal_fd != -1)
294 : {
295 : /* Release postmaster's signal FD; ignore any error */
296 12732 : (void) close(signal_fd);
297 12732 : signal_fd = -1;
298 12732 : ReleaseExternalFD();
299 : }
300 : }
301 :
769 tmunro 302 ECB : /* Block SIGURG, because we'll receive it through a signalfd. */
769 tmunro 303 GIC 14589 : sigaddset(&UnBlockSig, SIGURG);
304 :
305 : /* Set up the signalfd to receive SIGURG notifications. */
306 14589 : sigemptyset(&signalfd_mask);
307 14589 : sigaddset(&signalfd_mask, SIGURG);
308 14589 : signal_fd = signalfd(-1, &signalfd_mask, SFD_NONBLOCK | SFD_CLOEXEC);
769 tmunro 309 CBC 14589 : if (signal_fd < 0)
769 tmunro 310 UIC 0 : elog(FATAL, "signalfd() failed");
769 tmunro 311 GIC 14589 : ReserveExternalFD();
2575 andres 312 ECB : #endif
769 tmunro 313 :
314 : #ifdef WAIT_USE_KQUEUE
315 : /* Ignore SIGURG, because we'll receive it via kqueue. */
316 : pqsignal(SIGURG, SIG_IGN);
317 : #endif
3829 tgl 318 GIC 14589 : }
4593 heikki.linnakangas 319 ECB :
320 : void
983 tmunro 321 GIC 13988 : InitializeLatchWaitSet(void)
983 tmunro 322 ECB : {
323 : int latch_pos PG_USED_FOR_ASSERTS_ONLY;
324 :
983 tmunro 325 CBC 13988 : Assert(LatchWaitSet == NULL);
983 tmunro 326 EUB :
983 tmunro 327 ECB : /* Set up the WaitEventSet used by WaitLatch(). */
983 tmunro 328 GIC 13988 : LatchWaitSet = CreateWaitEventSet(TopMemoryContext, 2);
329 13988 : latch_pos = AddWaitEventToSet(LatchWaitSet, WL_LATCH_SET, PGINVALID_SOCKET,
330 : MyLatch, NULL);
331 13988 : if (IsUnderPostmaster)
332 12732 : AddWaitEventToSet(LatchWaitSet, WL_EXIT_ON_PM_DEATH,
333 : PGINVALID_SOCKET, NULL, NULL);
983 tmunro 334 ECB :
983 tmunro 335 GIC 13988 : Assert(latch_pos == LatchWaitSetLatchPos);
336 13988 : }
983 tmunro 337 ECB :
338 : void
769 tmunro 339 UIC 0 : ShutdownLatchSupport(void)
340 : {
769 tmunro 341 ECB : #if defined(WAIT_USE_POLL)
342 : pqsignal(SIGURG, SIG_IGN);
343 : #endif
344 :
769 tmunro 345 LBC 0 : if (LatchWaitSet)
346 : {
347 0 : FreeWaitEventSet(LatchWaitSet);
348 0 : LatchWaitSet = NULL;
349 : }
350 :
287 tmunro 351 ECB : #if defined(WAIT_USE_SELF_PIPE)
769 352 : close(selfpipe_readfd);
353 : close(selfpipe_writefd);
354 : selfpipe_readfd = -1;
769 tmunro 355 EUB : selfpipe_writefd = -1;
356 : selfpipe_owner_pid = InvalidPid;
357 : #endif
358 :
359 : #if defined(WAIT_USE_SIGNALFD)
769 tmunro 360 UIC 0 : close(signal_fd);
769 tmunro 361 UBC 0 : signal_fd = -1;
362 : #endif
363 0 : }
769 tmunro 364 EUB :
365 : /*
366 : * Initialize a process-local latch.
367 : */
368 : void
1504 peter 369 GIC 14589 : InitLatch(Latch *latch)
370 : {
4593 heikki.linnakangas 371 14589 : latch->is_set = false;
769 tmunro 372 14589 : latch->maybe_sleeping = false;
4593 heikki.linnakangas 373 14589 : latch->owner_pid = MyProcPid;
374 14589 : latch->is_shared = false;
375 :
287 tmunro 376 EUB : #if defined(WAIT_USE_SELF_PIPE)
2575 andres 377 : /* Assert InitializeLatchSupport has been called in this process */
378 : Assert(selfpipe_readfd >= 0 && selfpipe_owner_pid == MyProcPid);
287 tmunro 379 : #elif defined(WAIT_USE_SIGNALFD)
380 : /* Assert InitializeLatchSupport has been called in this process */
287 tmunro 381 GIC 14589 : Assert(signal_fd >= 0);
382 : #elif defined(WAIT_USE_WIN32)
383 : latch->event = CreateEvent(NULL, TRUE, FALSE, NULL);
384 : if (latch->event == NULL)
2575 andres 385 ECB : elog(ERROR, "CreateEvent failed: error code %lu", GetLastError());
386 : #endif /* WIN32 */
4593 heikki.linnakangas 387 CBC 14589 : }
4593 heikki.linnakangas 388 ECB :
389 : /*
390 : * Initialize a shared latch that can be set from other processes. The latch
391 : * is initially owned by no-one; use OwnLatch to associate it with the
392 : * current process.
393 : *
394 : * InitSharedLatch needs to be called in postmaster before forking child
395 : * processes, usually right after allocating the shared memory block
396 : * containing the latch with ShmemInitStruct. (The Unix implementation
4261 tgl 397 : * doesn't actually require that, but the Windows one does.) Because of
398 : * this restriction, we have no concurrency issues to worry about here.
399 : *
400 : * Note that other handles created in this module are never marked as
401 : * inheritable. Thus we do not need to worry about cleaning up child
402 : * process references to postmaster-private latches or WaitEventSets.
4593 heikki.linnakangas 403 : */
404 : void
1504 peter 405 GIC 202333 : InitSharedLatch(Latch *latch)
406 : {
407 : #ifdef WIN32
408 : SECURITY_ATTRIBUTES sa;
409 :
410 : /*
411 : * Set up security attributes to specify that the events are inherited.
412 : */
413 : ZeroMemory(&sa, sizeof(sa));
414 : sa.nLength = sizeof(sa);
415 : sa.bInheritHandle = TRUE;
416 :
417 : latch->event = CreateEvent(&sa, TRUE, FALSE, NULL);
418 : if (latch->event == NULL)
419 : elog(ERROR, "CreateEvent failed: error code %lu", GetLastError());
420 : #endif
2575 andres 421 ECB :
4593 heikki.linnakangas 422 GIC 202333 : latch->is_set = false;
769 tmunro 423 202333 : latch->maybe_sleeping = false;
4593 heikki.linnakangas 424 202333 : latch->owner_pid = 0;
425 202333 : latch->is_shared = true;
426 202333 : }
427 :
428 : /*
429 : * Associate a shared latch with the current process, allowing it to
430 : * wait on the latch.
431 : *
432 : * Although there is a sanity check for latch-already-owned, we don't do
433 : * any sort of locking here, meaning that we could fail to detect the error
434 : * if two processes try to own the same latch at about the same time. If
435 : * there is any risk of that, caller must provide an interlock to prevent it.
436 : */
437 : void
1504 peter 438 CBC 13364 : OwnLatch(Latch *latch)
4593 heikki.linnakangas 439 ECB : {
313 tmunro 440 : int owner_pid;
441 :
2575 andres 442 : /* Sanity checks */
2575 andres 443 GIC 13364 : Assert(latch->is_shared);
444 :
445 : #if defined(WAIT_USE_SELF_PIPE)
446 : /* Assert InitializeLatchSupport has been called in this process */
447 : Assert(selfpipe_readfd >= 0 && selfpipe_owner_pid == MyProcPid);
448 : #elif defined(WAIT_USE_SIGNALFD)
449 : /* Assert InitializeLatchSupport has been called in this process */
287 tmunro 450 13364 : Assert(signal_fd >= 0);
451 : #endif
452 :
313 453 13364 : owner_pid = latch->owner_pid;
313 tmunro 454 CBC 13364 : if (owner_pid != 0)
313 tmunro 455 UIC 0 : elog(PANIC, "latch already owned by PID %d", owner_pid);
456 :
4593 heikki.linnakangas 457 GIC 13364 : latch->owner_pid = MyProcPid;
458 13364 : }
4593 heikki.linnakangas 459 ECB :
460 : /*
461 : * Disown a shared latch currently owned by the current process.
462 : */
463 : void
1504 peter 464 GIC 13330 : DisownLatch(Latch *latch)
465 : {
4593 heikki.linnakangas 466 CBC 13330 : Assert(latch->is_shared);
4593 heikki.linnakangas 467 GIC 13330 : Assert(latch->owner_pid == MyProcPid);
468 :
4593 heikki.linnakangas 469 CBC 13330 : latch->owner_pid = 0;
470 13330 : }
4593 heikki.linnakangas 471 EUB :
472 : /*
4261 tgl 473 ECB : * Wait for a given latch to be set, or for postmaster death, or until timeout
474 : * is exceeded. 'wakeEvents' is a bitmask that specifies which of those events
475 : * to wait for. If the latch is already set (and WL_LATCH_SET is given), the
476 : * function returns immediately.
477 : *
478 : * The "timeout" is given in milliseconds. It must be >= 0 if WL_TIMEOUT flag
479 : * is given. Although it is declared as "long", we don't actually support
3794 480 : * timeouts longer than INT_MAX milliseconds. Note that some extra overhead
481 : * is incurred when WL_TIMEOUT is given, so avoid using a timeout if possible.
4593 heikki.linnakangas 482 : *
483 : * The latch must be owned by the current process, ie. it must be a
484 : * process-local latch initialized with InitLatch, or a shared latch
485 : * associated with the current process by calling OwnLatch.
486 : *
487 : * Returns bit mask indicating which condition(s) caused the wake-up. Note
488 : * that if multiple wake-up conditions are true, there is no guarantee that
489 : * we return all of them in one call, but we will return at least one.
490 : */
491 : int
1504 peter 492 GIC 327192 : WaitLatch(Latch *latch, int wakeEvents, long timeout,
493 : uint32 wait_event_info)
494 : {
495 : WaitEvent event;
496 :
497 : /* Postmaster-managed callers must handle postmaster death somehow. */
983 tmunro 498 327192 : Assert(!IsUnderPostmaster ||
499 : (wakeEvents & WL_EXIT_ON_PM_DEATH) ||
500 : (wakeEvents & WL_POSTMASTER_DEATH));
501 :
502 : /*
503 : * Some callers may have a latch other than MyLatch, or no latch at all,
504 : * or want to handle postmaster death differently. It's cheap to assign
505 : * those, so just do it every time.
506 : */
507 327192 : if (!(wakeEvents & WL_LATCH_SET))
983 tmunro 508 CBC 8 : latch = NULL;
983 tmunro 509 GIC 327192 : ModifyWaitEvent(LatchWaitSet, LatchWaitSetLatchPos, WL_LATCH_SET, latch);
510 327192 : LatchWaitSet->exit_on_postmaster_death =
511 327192 : ((wakeEvents & WL_EXIT_ON_PM_DEATH) != 0);
512 :
513 327192 : if (WaitEventSetWait(LatchWaitSet,
983 tmunro 514 CBC 327192 : (wakeEvents & WL_TIMEOUT) ? timeout : -1,
515 : &event, 1,
516 : wait_event_info) == 0)
983 tmunro 517 GIC 21058 : return WL_TIMEOUT;
518 : else
519 306119 : return event.events;
520 : }
521 :
522 : /*
4293 heikki.linnakangas 523 ECB : * Like WaitLatch, but with an extra socket argument for WL_SOCKET_*
524 : * conditions.
3982 tgl 525 : *
2063 526 : * When waiting on a socket, EOF and error conditions always cause the socket
527 : * to be reported as readable/writable/connected, so that the caller can deal
528 : * with the condition.
2575 andres 529 : *
1598 tmunro 530 : * wakeEvents must include either WL_EXIT_ON_PM_DEATH for automatic exit
531 : * if the postmaster dies or WL_POSTMASTER_DEATH for a flag set in the
532 : * return value if the postmaster dies. The latter is useful for rare cases
533 : * where some behavior other than immediate exit is needed.
534 : *
2575 andres 535 : * NB: These days this is just a wrapper around the WaitEventSet API. When
536 : * using a latch very frequently, consider creating a longer living
537 : * WaitEventSet instead; that's more efficient.
538 : */
539 : int
1504 peter 540 GIC 101617 : WaitLatchOrSocket(Latch *latch, int wakeEvents, pgsocket sock,
541 : long timeout, uint32 wait_event_info)
542 : {
2575 andres 543 101617 : int ret = 0;
544 : int rc;
545 : WaitEvent event;
546 101617 : WaitEventSet *set = CreateWaitEventSet(CurrentMemoryContext, 3);
547 :
4293 heikki.linnakangas 548 101617 : if (wakeEvents & WL_TIMEOUT)
2575 andres 549 89546 : Assert(timeout >= 0);
550 : else
551 12071 : timeout = -1;
552 :
553 101617 : if (wakeEvents & WL_LATCH_SET)
554 101406 : AddWaitEventToSet(set, WL_LATCH_SET, PGINVALID_SOCKET,
555 : latch, NULL);
2575 andres 556 ECB :
557 : /* Postmaster-managed callers must handle postmaster death somehow. */
1598 tmunro 558 GIC 101617 : Assert(!IsUnderPostmaster ||
1598 tmunro 559 ECB : (wakeEvents & WL_EXIT_ON_PM_DEATH) ||
560 : (wakeEvents & WL_POSTMASTER_DEATH));
561 :
1598 tmunro 562 CBC 101617 : if ((wakeEvents & WL_POSTMASTER_DEATH) && IsUnderPostmaster)
2575 andres 563 UIC 0 : AddWaitEventToSet(set, WL_POSTMASTER_DEATH, PGINVALID_SOCKET,
2575 andres 564 ECB : NULL, NULL);
565 :
1598 tmunro 566 GIC 101617 : if ((wakeEvents & WL_EXIT_ON_PM_DEATH) && IsUnderPostmaster)
1598 tmunro 567 CBC 101617 : AddWaitEventToSet(set, WL_EXIT_ON_PM_DEATH, PGINVALID_SOCKET,
568 : NULL, NULL);
1598 tmunro 569 ECB :
2063 tgl 570 CBC 101617 : if (wakeEvents & WL_SOCKET_MASK)
571 : {
572 : int ev;
573 :
574 101617 : ev = wakeEvents & WL_SOCKET_MASK;
2575 andres 575 GIC 101617 : AddWaitEventToSet(set, ev, sock, NULL, NULL);
576 : }
577 :
2378 rhaas 578 CBC 101617 : rc = WaitEventSetWait(set, timeout, &event, 1, wait_event_info);
2575 andres 579 EUB :
2575 andres 580 GIC 101617 : if (rc == 0)
581 130 : ret |= WL_TIMEOUT;
2575 andres 582 ECB : else
583 : {
2575 andres 584 GIC 101487 : ret |= event.events & (WL_LATCH_SET |
585 : WL_POSTMASTER_DEATH |
2063 tgl 586 ECB : WL_SOCKET_MASK);
587 : }
588 :
2575 andres 589 GIC 101617 : FreeWaitEventSet(set);
2575 andres 590 ECB :
2575 andres 591 CBC 101617 : return ret;
592 : }
593 :
4593 heikki.linnakangas 594 ECB : /*
595 : * Sets a latch and wakes up anyone waiting on it.
4261 tgl 596 : *
597 : * This is cheap if the latch is already set, otherwise not so much.
598 : *
599 : * NB: when calling this in a signal handler, be sure to save and restore
4260 600 : * errno around it. (That's standard practice in most signal handlers, of
601 : * course, but we used to omit it in handlers that only set a flag.)
602 : *
603 : * NB: this function is called from critical sections and signal handlers so
604 : * throwing an error is not a good idea.
4593 heikki.linnakangas 605 : */
606 : void
1504 peter 607 CBC 543810 : SetLatch(Latch *latch)
608 : {
609 : #ifndef WIN32
610 : pid_t owner_pid;
611 : #else
612 : HANDLE handle;
613 : #endif
614 :
615 : /*
616 : * The memory barrier has to be placed here to ensure that any flag
617 : * variables possibly changed by this process have been flushed to main
618 : * memory, before we check/set is_set.
619 : */
3008 andres 620 GIC 543810 : pg_memory_barrier();
621 :
622 : /* Quick exit if already set */
4593 heikki.linnakangas 623 CBC 543810 : if (latch->is_set)
4593 heikki.linnakangas 624 GIC 158490 : return;
625 :
626 385320 : latch->is_set = true;
627 :
769 tmunro 628 385320 : pg_memory_barrier();
629 385320 : if (!latch->maybe_sleeping)
630 59274 : return;
631 :
632 : #ifndef WIN32
633 :
634 : /*
635 : * See if anyone's waiting for the latch. It can be the current process if
769 tmunro 636 ECB : * we're in a signal handler. We use the self-pipe or SIGURG to ourselves
637 : * to wake up WaitEventSetWaitBlock() without races in that case. If it's
638 : * another process, send a signal.
4593 heikki.linnakangas 639 : *
4261 tgl 640 : * Fetch owner_pid only once, in case the latch is concurrently getting
641 : * owned or disowned. XXX: This assumes that pid_t is atomic, which isn't
642 : * guaranteed to be true! In practice, the effective range of pid_t fits
643 : * in a 32 bit integer, and so should be atomic. In the worst case, we
644 : * might end up signaling the wrong process. Even then, you're very
645 : * unlucky if a process with that bogus pid exists and belongs to
646 : * Postgres; and PG database processes should handle excess SIGUSR1
647 : * interrupts without a problem anyhow.
648 : *
649 : * Another sort of race condition that's possible here is for a new
650 : * process to own the latch immediately after we look, so we don't signal
651 : * it. This is okay so long as all callers of ResetLatch/WaitLatch follow
652 : * the standard coding convention of waiting at the bottom of their loops,
653 : * not the top, so that they'll correctly process latch-setting events
654 : * that happen before they enter the loop.
655 : */
4593 heikki.linnakangas 656 GIC 326046 : owner_pid = latch->owner_pid;
657 326046 : if (owner_pid == 0)
4593 heikki.linnakangas 658 UIC 0 : return;
4593 heikki.linnakangas 659 GIC 326046 : else if (owner_pid == MyProcPid)
660 : {
661 : #if defined(WAIT_USE_SELF_PIPE)
662 : if (waiting)
663 : sendSelfPipeByte();
664 : #else
769 tmunro 665 24232 : if (waiting)
666 24232 : kill(MyProcPid, SIGURG);
667 : #endif
668 : }
669 : else
670 301814 : kill(owner_pid, SIGURG);
671 :
2575 andres 672 ECB : #else
673 :
2575 andres 674 EUB : /*
2575 andres 675 ECB : * See if anyone's waiting for the latch. It can be the current process if
676 : * we're in a signal handler.
677 : *
678 : * Use a local variable here just in case somebody changes the event field
679 : * concurrently (which really should not happen).
680 : */
681 : handle = latch->event;
682 : if (handle)
683 : {
684 : SetEvent(handle);
685 :
686 : /*
687 : * Note that we silently ignore any errors. We might be in a signal
688 : * handler or other critical path where it's not safe to call elog().
689 : */
690 : }
691 : #endif
692 : }
693 :
694 : /*
695 : * Clear the latch. Calling WaitLatch after this will sleep, unless
696 : * the latch is set again before the WaitLatch call.
697 : */
698 : void
1504 peter 699 GIC 1325035 : ResetLatch(Latch *latch)
700 : {
701 : /* Only the owner should reset the latch */
4593 heikki.linnakangas 702 1325035 : Assert(latch->owner_pid == MyProcPid);
769 tmunro 703 1325035 : Assert(latch->maybe_sleeping == false);
704 :
4593 heikki.linnakangas 705 1325035 : latch->is_set = false;
706 :
707 : /*
708 : * Ensure that the write to is_set gets flushed to main memory before we
709 : * examine any flag variables. Otherwise a concurrent SetLatch might
710 : * falsely conclude that it needn't signal us, even though we have missed
711 : * seeing some flag updates that SetLatch was supposed to inform us of.
712 : */
3008 andres 713 1325035 : pg_memory_barrier();
4593 heikki.linnakangas 714 1325035 : }
4593 heikki.linnakangas 715 ECB :
716 : /*
717 : * Create a WaitEventSet with space for nevents different events to wait for.
2575 andres 718 : *
2502 stark 719 : * These events can then be efficiently waited upon together, using
720 : * WaitEventSetWait().
2575 andres 721 : */
722 : WaitEventSet *
2575 andres 723 GIC 125576 : CreateWaitEventSet(MemoryContext context, int nevents)
724 : {
725 : WaitEventSet *set;
726 : char *data;
727 125576 : Size sz = 0;
728 :
2499 peter_e 729 ECB : /*
2502 stark 730 : * Use MAXALIGN size/alignment to guarantee that later uses of memory are
731 : * aligned correctly. E.g. epoll_event might need 8 byte alignment on some
732 : * platforms, but earlier allocations like WaitEventSet and WaitEvent
733 : * might not be sized to guarantee that when purely using sizeof().
734 : */
2502 stark 735 GIC 125576 : sz += MAXALIGN(sizeof(WaitEventSet));
736 125576 : sz += MAXALIGN(sizeof(WaitEvent) * nevents);
737 :
738 : #if defined(WAIT_USE_EPOLL)
2502 stark 739 CBC 125576 : sz += MAXALIGN(sizeof(struct epoll_event) * nevents);
740 : #elif defined(WAIT_USE_KQUEUE)
741 : sz += MAXALIGN(sizeof(struct kevent) * nevents);
742 : #elif defined(WAIT_USE_POLL)
2502 stark 743 ECB : sz += MAXALIGN(sizeof(struct pollfd) * nevents);
744 : #elif defined(WAIT_USE_WIN32)
745 : /* need space for the pgwin32_signal_event */
746 : sz += MAXALIGN(sizeof(HANDLE) * (nevents + 1));
747 : #endif
748 :
2575 andres 749 GIC 125576 : data = (char *) MemoryContextAllocZero(context, sz);
750 :
2575 andres 751 CBC 125576 : set = (WaitEventSet *) data;
2502 stark 752 125576 : data += MAXALIGN(sizeof(WaitEventSet));
753 :
2575 andres 754 GIC 125576 : set->events = (WaitEvent *) data;
2502 stark 755 CBC 125576 : data += MAXALIGN(sizeof(WaitEvent) * nevents);
756 :
757 : #if defined(WAIT_USE_EPOLL)
2575 andres 758 GIC 125576 : set->epoll_ret_events = (struct epoll_event *) data;
2502 stark 759 125576 : data += MAXALIGN(sizeof(struct epoll_event) * nevents);
760 : #elif defined(WAIT_USE_KQUEUE)
761 : set->kqueue_ret_events = (struct kevent *) data;
762 : data += MAXALIGN(sizeof(struct kevent) * nevents);
763 : #elif defined(WAIT_USE_POLL)
764 : set->pollfds = (struct pollfd *) data;
2502 stark 765 ECB : data += MAXALIGN(sizeof(struct pollfd) * nevents);
766 : #elif defined(WAIT_USE_WIN32)
2575 andres 767 : set->handles = (HANDLE) data;
2502 stark 768 : data += MAXALIGN(sizeof(HANDLE) * nevents);
769 : #endif
2575 andres 770 :
2575 andres 771 CBC 125576 : set->latch = NULL;
2575 andres 772 GIC 125576 : set->nevents_space = nevents;
1598 tmunro 773 125576 : set->exit_on_postmaster_death = false;
2575 andres 774 ECB :
775 : #if defined(WAIT_USE_EPOLL)
1140 tgl 776 GIC 125576 : if (!AcquireExternalFD())
777 : {
778 : /* treat this as though epoll_create1 itself returned EMFILE */
1140 tgl 779 UIC 0 : elog(ERROR, "epoll_create1 failed: %m");
780 : }
2173 tgl 781 GIC 125576 : set->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
2575 andres 782 125576 : if (set->epoll_fd < 0)
783 : {
1140 tgl 784 UIC 0 : ReleaseExternalFD();
2173 785 0 : elog(ERROR, "epoll_create1 failed: %m");
786 : }
1159 tmunro 787 ECB : #elif defined(WAIT_USE_KQUEUE)
1140 tgl 788 : if (!AcquireExternalFD())
789 : {
790 : /* treat this as though kqueue itself returned EMFILE */
791 : elog(ERROR, "kqueue failed: %m");
792 : }
793 : set->kqueue_fd = kqueue();
794 : if (set->kqueue_fd < 0)
1140 tgl 795 EUB : {
796 : ReleaseExternalFD();
1159 tmunro 797 ECB : elog(ERROR, "kqueue failed: %m");
1140 tgl 798 : }
799 : if (fcntl(set->kqueue_fd, F_SETFD, FD_CLOEXEC) == -1)
1140 tgl 800 EUB : {
801 : int save_errno = errno;
802 :
803 : close(set->kqueue_fd);
804 : ReleaseExternalFD();
805 : errno = save_errno;
806 : elog(ERROR, "fcntl(F_SETFD) failed on kqueue descriptor: %m");
807 : }
808 : set->report_postmaster_not_running = false;
809 : #elif defined(WAIT_USE_WIN32)
810 :
811 : /*
812 : * To handle signals while waiting, we need to add a win32 specific event.
813 : * We accounted for the additional event at the top of this routine. See
814 : * port/win32/signal.c for more details.
815 : *
816 : * Note: pgwin32_signal_event should be first to ensure that it will be
817 : * reported when multiple events are set. We want to guarantee that
818 : * pending signals are serviced.
819 : */
820 : set->handles[0] = pgwin32_signal_event;
821 : StaticAssertStmt(WSA_INVALID_EVENT == NULL, "");
822 : #endif
823 :
2575 andres 824 GIC 125576 : return set;
825 : }
826 :
827 : /*
828 : * Free a previously created WaitEventSet.
829 : *
830 : * Note: preferably, this shouldn't have to free any resources that could be
831 : * inherited across an exec(). If it did, we'd likely leak those resources in
832 : * many scenarios. For the epoll case, we ensure that by setting EPOLL_CLOEXEC
833 : * when the FD is created. For the Windows case, we assume that the handles
834 : * involved are non-inheritable.
835 : */
836 : void
837 102316 : FreeWaitEventSet(WaitEventSet *set)
838 : {
839 : #if defined(WAIT_USE_EPOLL)
2575 andres 840 CBC 102316 : close(set->epoll_fd);
1140 tgl 841 GIC 102316 : ReleaseExternalFD();
842 : #elif defined(WAIT_USE_KQUEUE)
843 : close(set->kqueue_fd);
844 : ReleaseExternalFD();
845 : #elif defined(WAIT_USE_WIN32)
846 : WaitEvent *cur_event;
847 :
848 : for (cur_event = set->events;
849 : cur_event < (set->events + set->nevents);
850 : cur_event++)
851 : {
852 : if (cur_event->events & WL_LATCH_SET)
2575 andres 853 ECB : {
854 : /* uses the latch's HANDLE */
855 : }
856 : else if (cur_event->events & WL_POSTMASTER_DEATH)
857 : {
858 : /* uses PostmasterHandle */
859 : }
860 : else
861 : {
862 : /* Clean up the event object we created for the socket */
863 : WSAEventSelect(cur_event->fd, NULL, 0);
864 : WSACloseEvent(set->handles[cur_event->pos + 1]);
865 : }
866 : }
867 : #endif
868 :
2575 andres 869 GIC 102316 : pfree(set);
870 102316 : }
871 :
872 : /*
873 : * Free a previously created WaitEventSet in a child process after a fork().
874 : */
875 : void
107 tmunro 876 GNC 11472 : FreeWaitEventSetAfterFork(WaitEventSet *set)
877 : {
878 : #if defined(WAIT_USE_EPOLL)
879 11472 : close(set->epoll_fd);
880 11472 : ReleaseExternalFD();
881 : #elif defined(WAIT_USE_KQUEUE)
882 : /* kqueues are not normally inherited by child processes */
883 : ReleaseExternalFD();
884 : #endif
885 :
886 11472 : pfree(set);
887 11472 : }
888 :
889 : /* ---
890 : * Add an event to the set. Possible events are:
891 : * - WL_LATCH_SET: Wait for the latch to be set
892 : * - WL_POSTMASTER_DEATH: Wait for postmaster to die
893 : * - WL_SOCKET_READABLE: Wait for socket to become readable,
894 : * can be combined in one event with other WL_SOCKET_* events
895 : * - WL_SOCKET_WRITEABLE: Wait for socket to become writeable,
896 : * can be combined with other WL_SOCKET_* events
897 : * - WL_SOCKET_CONNECTED: Wait for socket connection to be established,
898 : * can be combined with other WL_SOCKET_* events (on non-Windows
899 : * platforms, this is the same as WL_SOCKET_WRITEABLE)
900 : * - WL_SOCKET_ACCEPT: Wait for new connection to a server socket,
901 : * can be combined with other WL_SOCKET_* events (on non-Windows
902 : * platforms, this is the same as WL_SOCKET_READABLE)
903 : * - WL_SOCKET_CLOSED: Wait for socket to be closed by remote peer.
904 : * - WL_EXIT_ON_PM_DEATH: Exit immediately if the postmaster dies
2575 andres 905 ECB : *
906 : * Returns the offset in WaitEventSet->events (starting from 0), which can be
907 : * used to modify previously added wait events using ModifyWaitEvent().
908 : *
909 : * In the WL_LATCH_SET case the latch must be owned by the current process,
910 : * i.e. it must be a process-local latch initialized with InitLatch, or a
911 : * shared latch associated with the current process by calling OwnLatch.
912 : *
913 : * In the WL_SOCKET_READABLE/WRITEABLE/CONNECTED/ACCEPT cases, EOF and error
914 : * conditions cause the socket to be reported as readable/writable/connected,
2063 tgl 915 : * so that the caller can deal with the condition.
2575 andres 916 : *
917 : * The user_data pointer specified here will be set for the events returned
918 : * by WaitEventSetWait(), allowing to easily associate additional data with
919 : * events.
920 : */
921 : int
2575 andres 922 CBC 359466 : AddWaitEventToSet(WaitEventSet *set, uint32 events, pgsocket fd, Latch *latch,
2575 andres 923 ECB : void *user_data)
924 : {
925 : WaitEvent *event;
926 :
927 : /* not enough space */
2575 andres 928 GIC 359466 : Assert(set->nevents < set->nevents_space);
929 :
1598 tmunro 930 359466 : if (events == WL_EXIT_ON_PM_DEATH)
931 : {
932 114438 : events = WL_POSTMASTER_DEATH;
933 114438 : set->exit_on_postmaster_death = true;
934 : }
935 :
2575 andres 936 359466 : if (latch)
937 : {
938 125276 : if (latch->owner_pid != MyProcPid)
2575 andres 939 UIC 0 : elog(ERROR, "cannot wait on a latch owned by another process");
2575 andres 940 GIC 125276 : if (set->latch)
2575 andres 941 UIC 0 : elog(ERROR, "cannot wait on more than one latch");
2575 andres 942 GIC 125276 : if ((events & WL_LATCH_SET) != WL_LATCH_SET)
2502 stark 943 UIC 0 : elog(ERROR, "latch events only support being set");
944 : }
945 : else
946 : {
2575 andres 947 GIC 234190 : if (events & WL_LATCH_SET)
2575 andres 948 UIC 0 : elog(ERROR, "cannot wait on latch without a specified latch");
949 : }
950 :
951 : /* waiting for socket readiness without a socket indicates a bug */
2063 tgl 952 GIC 359466 : if (fd == PGINVALID_SOCKET && (events & WL_SOCKET_MASK))
2575 andres 953 UIC 0 : elog(ERROR, "cannot wait on socket event without a socket");
954 :
2575 andres 955 GIC 359466 : event = &set->events[set->nevents];
956 359466 : event->pos = set->nevents++;
957 359466 : event->fd = fd;
2575 andres 958 CBC 359466 : event->events = events;
2575 andres 959 GIC 359466 : event->user_data = user_data;
960 : #ifdef WIN32
961 : event->reset = false;
962 : #endif
963 :
2575 andres 964 CBC 359466 : if (events == WL_LATCH_SET)
965 : {
966 125276 : set->latch = latch;
2575 andres 967 GIC 125276 : set->latch_pos = event->pos;
287 tmunro 968 ECB : #if defined(WAIT_USE_SELF_PIPE)
2575 andres 969 : event->fd = selfpipe_readfd;
970 : #elif defined(WAIT_USE_SIGNALFD)
769 tmunro 971 GIC 125276 : event->fd = signal_fd;
769 tmunro 972 ECB : #else
973 : event->fd = PGINVALID_SOCKET;
974 : #ifdef WAIT_USE_EPOLL
769 tmunro 975 EUB : return event->pos;
769 tmunro 976 ECB : #endif
2575 andres 977 EUB : #endif
2575 andres 978 ECB : }
2575 andres 979 GBC 234190 : else if (events == WL_POSTMASTER_DEATH)
980 : {
981 : #ifndef WIN32
2575 andres 982 GIC 123116 : event->fd = postmaster_alive_fds[POSTMASTER_FD_WATCH];
2575 andres 983 ECB : #endif
2575 andres 984 EUB : }
985 :
986 : /* perform wait primitive specific initialization, if needed */
987 : #if defined(WAIT_USE_EPOLL)
2575 andres 988 CBC 359466 : WaitEventAdjustEpoll(set, event, EPOLL_CTL_ADD);
1159 tmunro 989 EUB : #elif defined(WAIT_USE_KQUEUE)
990 : WaitEventAdjustKqueue(set, event, 0);
2575 andres 991 ECB : #elif defined(WAIT_USE_POLL)
992 : WaitEventAdjustPoll(set, event);
993 : #elif defined(WAIT_USE_WIN32)
994 : WaitEventAdjustWin32(set, event);
995 : #endif
996 :
2575 andres 997 GIC 359466 : return event->pos;
998 : }
999 :
2575 andres 1000 ECB : /*
1001 : * Change the event mask and, in the WL_LATCH_SET case, the latch associated
983 tmunro 1002 : * with the WaitEvent. The latch may be changed to NULL to disable the latch
1003 : * temporarily, and then set back to a latch later.
1004 : *
1005 : * 'pos' is the id returned by AddWaitEventToSet.
1006 : */
2575 andres 1007 : void
2575 andres 1008 GIC 506989 : ModifyWaitEvent(WaitEventSet *set, int pos, uint32 events, Latch *latch)
1009 : {
1010 : WaitEvent *event;
1011 : #if defined(WAIT_USE_KQUEUE)
1012 : int old_events;
1013 : #endif
1014 :
2575 andres 1015 CBC 506989 : Assert(pos < set->nevents);
1016 :
2575 andres 1017 GIC 506989 : event = &set->events[pos];
1159 tmunro 1018 ECB : #if defined(WAIT_USE_KQUEUE)
1019 : old_events = event->events;
1020 : #endif
1021 :
1022 : /*
1023 : * If neither the event mask nor the associated latch changes, return
2575 andres 1024 : * early. That's an important optimization for some sockets, where
1025 : * ModifyWaitEvent is frequently used to switch from waiting for reads to
1026 : * waiting on writes.
1027 : */
2575 andres 1028 GIC 506989 : if (events == event->events &&
1029 500168 : (!(event->events & WL_LATCH_SET) || set->latch == latch))
1030 479981 : return;
1031 :
1032 27008 : if (event->events & WL_LATCH_SET &&
2575 andres 1033 CBC 20187 : events != event->events)
1034 : {
2575 andres 1035 UIC 0 : elog(ERROR, "cannot modify latch event");
1036 : }
1037 :
2575 andres 1038 GIC 27008 : if (event->events & WL_POSTMASTER_DEATH)
1039 : {
2575 andres 1040 UIC 0 : elog(ERROR, "cannot modify postmaster death event");
1041 : }
1042 :
1043 : /* FIXME: validate event mask */
2575 andres 1044 CBC 27008 : event->events = events;
1045 :
2575 andres 1046 GIC 27008 : if (events == WL_LATCH_SET)
1047 : {
928 tmunro 1048 20187 : if (latch && latch->owner_pid != MyProcPid)
928 tmunro 1049 UIC 0 : elog(ERROR, "cannot wait on a latch owned by another process");
2575 andres 1050 GIC 20187 : set->latch = latch;
769 tmunro 1051 ECB :
1052 : /*
928 1053 : * On Unix, we don't need to modify the kernel object because the
1054 : * underlying pipe (if there is one) is the same for all latches so we
1055 : * can return immediately. On Windows, we need to update our array of
1056 : * handles, but we leave the old one in place and tolerate spurious
1057 : * wakeups if the latch is disabled.
1058 : */
1059 : #if defined(WAIT_USE_WIN32)
1060 : if (!latch)
1061 : return;
1062 : #else
928 tmunro 1063 GIC 20187 : return;
928 tmunro 1064 ECB : #endif
2575 andres 1065 : }
1066 :
1067 : #if defined(WAIT_USE_EPOLL)
2575 andres 1068 CBC 6821 : WaitEventAdjustEpoll(set, event, EPOLL_CTL_MOD);
1159 tmunro 1069 ECB : #elif defined(WAIT_USE_KQUEUE)
1070 : WaitEventAdjustKqueue(set, event, old_events);
2575 andres 1071 EUB : #elif defined(WAIT_USE_POLL)
1072 : WaitEventAdjustPoll(set, event);
1073 : #elif defined(WAIT_USE_WIN32)
2575 andres 1074 ECB : WaitEventAdjustWin32(set, event);
1075 : #endif
2575 andres 1076 EUB : }
1077 :
1078 : #if defined(WAIT_USE_EPOLL)
1079 : /*
2575 andres 1080 ECB : * action can be one of EPOLL_CTL_ADD | EPOLL_CTL_MOD | EPOLL_CTL_DEL
1081 : */
1082 : static void
2575 andres 1083 GIC 366287 : WaitEventAdjustEpoll(WaitEventSet *set, WaitEvent *event, int action)
2575 andres 1084 ECB : {
2575 andres 1085 EUB : struct epoll_event epoll_ev;
2575 andres 1086 ECB : int rc;
1087 :
1088 : /* pointer to our event, returned by epoll_wait */
2575 andres 1089 GIC 366287 : epoll_ev.data.ptr = event;
1090 : /* always wait for errors */
1091 366287 : epoll_ev.events = EPOLLERR | EPOLLHUP;
1092 :
1093 : /* prepare pollfd entry once */
1094 366287 : if (event->events == WL_LATCH_SET)
1095 : {
1096 125276 : Assert(set->latch != NULL);
1097 125276 : epoll_ev.events |= EPOLLIN;
1098 : }
2575 andres 1099 CBC 241011 : else if (event->events == WL_POSTMASTER_DEATH)
1100 : {
2575 andres 1101 GIC 123116 : epoll_ev.events |= EPOLLIN;
1102 : }
1103 : else
2575 andres 1104 ECB : {
2575 andres 1105 GIC 117895 : Assert(event->fd != PGINVALID_SOCKET);
419 tmunro 1106 117895 : Assert(event->events & (WL_SOCKET_READABLE |
1107 : WL_SOCKET_WRITEABLE |
1108 : WL_SOCKET_CLOSED));
1109 :
2575 andres 1110 117895 : if (event->events & WL_SOCKET_READABLE)
1111 108048 : epoll_ev.events |= EPOLLIN;
1112 117895 : if (event->events & WL_SOCKET_WRITEABLE)
1113 10126 : epoll_ev.events |= EPOLLOUT;
419 tmunro 1114 117895 : if (event->events & WL_SOCKET_CLOSED)
419 tmunro 1115 UIC 0 : epoll_ev.events |= EPOLLRDHUP;
1116 : }
1117 :
1118 : /*
2575 andres 1119 ECB : * Even though unused, we also pass epoll_ev as the data argument if
1120 : * EPOLL_CTL_DEL is passed as action. There used to be an epoll bug
1121 : * requiring that, and actually it makes the code simpler...
1122 : */
2575 andres 1123 GIC 366287 : rc = epoll_ctl(set->epoll_fd, action, event->fd, &epoll_ev);
1124 :
2575 andres 1125 CBC 366287 : if (rc < 0)
2575 andres 1126 UIC 0 : ereport(ERROR,
2575 andres 1127 ECB : (errcode_for_socket_access(),
1128 : errmsg("%s() failed: %m",
1129 : "epoll_ctl")));
2575 andres 1130 CBC 366287 : }
1131 : #endif
2575 andres 1132 ECB :
1133 : #if defined(WAIT_USE_POLL)
1134 : static void
1135 : WaitEventAdjustPoll(WaitEventSet *set, WaitEvent *event)
1136 : {
1137 : struct pollfd *pollfd = &set->pollfds[event->pos];
1138 :
1139 : pollfd->revents = 0;
1140 : pollfd->fd = event->fd;
1141 :
1142 : /* prepare pollfd entry once */
1143 : if (event->events == WL_LATCH_SET)
1144 : {
1145 : Assert(set->latch != NULL);
1146 : pollfd->events = POLLIN;
1147 : }
1148 : else if (event->events == WL_POSTMASTER_DEATH)
1149 : {
1150 : pollfd->events = POLLIN;
2575 andres 1151 EUB : }
1152 : else
1153 : {
1154 : Assert(event->events & (WL_SOCKET_READABLE |
1155 : WL_SOCKET_WRITEABLE |
1156 : WL_SOCKET_CLOSED));
1157 : pollfd->events = 0;
1158 : if (event->events & WL_SOCKET_READABLE)
2575 andres 1159 ECB : pollfd->events |= POLLIN;
1160 : if (event->events & WL_SOCKET_WRITEABLE)
1161 : pollfd->events |= POLLOUT;
419 tmunro 1162 EUB : #ifdef POLLRDHUP
1163 : if (event->events & WL_SOCKET_CLOSED)
1164 : pollfd->events |= POLLRDHUP;
1165 : #endif
2575 andres 1166 ECB : }
1167 :
1168 : Assert(event->fd != PGINVALID_SOCKET);
1169 : }
1170 : #endif
1171 :
1172 : #if defined(WAIT_USE_KQUEUE)
1173 :
1174 : /*
1175 : * On most BSD family systems, the udata member of struct kevent is of type
1176 : * void *, so we could directly convert to/from WaitEvent *. Unfortunately,
1177 : * NetBSD has it as intptr_t, so here we wallpaper over that difference with
1178 : * an lvalue cast.
1179 : */
1180 : #define AccessWaitEvent(k_ev) (*((WaitEvent **)(&(k_ev)->udata)))
1181 :
1182 : static inline void
1183 : WaitEventAdjustKqueueAdd(struct kevent *k_ev, int filter, int action,
1184 : WaitEvent *event)
1185 : {
1186 : k_ev->ident = event->fd;
1187 : k_ev->filter = filter;
1188 : k_ev->flags = action;
1189 : k_ev->fflags = 0;
1190 : k_ev->data = 0;
1191 : AccessWaitEvent(k_ev) = event;
1192 : }
1193 :
1194 : static inline void
1195 : WaitEventAdjustKqueueAddPostmaster(struct kevent *k_ev, WaitEvent *event)
1196 : {
1197 : /* For now postmaster death can only be added, not removed. */
1198 : k_ev->ident = PostmasterPid;
1199 : k_ev->filter = EVFILT_PROC;
1200 : k_ev->flags = EV_ADD;
1201 : k_ev->fflags = NOTE_EXIT;
1202 : k_ev->data = 0;
1203 : AccessWaitEvent(k_ev) = event;
1204 : }
1205 :
1206 : static inline void
1207 : WaitEventAdjustKqueueAddLatch(struct kevent *k_ev, WaitEvent *event)
1208 : {
1209 : /* For now latch can only be added, not removed. */
1210 : k_ev->ident = SIGURG;
1211 : k_ev->filter = EVFILT_SIGNAL;
1212 : k_ev->flags = EV_ADD;
1213 : k_ev->fflags = 0;
1214 : k_ev->data = 0;
1215 : AccessWaitEvent(k_ev) = event;
1216 : }
1217 :
1218 : /*
1219 : * old_events is the previous event mask, used to compute what has changed.
1220 : */
1221 : static void
1222 : WaitEventAdjustKqueue(WaitEventSet *set, WaitEvent *event, int old_events)
1223 : {
1224 : int rc;
1225 : struct kevent k_ev[2];
1226 : int count = 0;
1227 : bool new_filt_read = false;
1228 : bool old_filt_read = false;
1229 : bool new_filt_write = false;
1230 : bool old_filt_write = false;
1231 :
1232 : if (old_events == event->events)
1233 : return;
1234 :
1235 : Assert(event->events != WL_LATCH_SET || set->latch != NULL);
1236 : Assert(event->events == WL_LATCH_SET ||
1237 : event->events == WL_POSTMASTER_DEATH ||
1238 : (event->events & (WL_SOCKET_READABLE |
1239 : WL_SOCKET_WRITEABLE |
1240 : WL_SOCKET_CLOSED)));
1241 :
1242 : if (event->events == WL_POSTMASTER_DEATH)
1243 : {
1244 : /*
1245 : * Unlike all the other implementations, we detect postmaster death
1246 : * using process notification instead of waiting on the postmaster
1247 : * alive pipe.
1248 : */
1249 : WaitEventAdjustKqueueAddPostmaster(&k_ev[count++], event);
1250 : }
1251 : else if (event->events == WL_LATCH_SET)
1252 : {
1253 : /* We detect latch wakeup using a signal event. */
1254 : WaitEventAdjustKqueueAddLatch(&k_ev[count++], event);
1255 : }
1256 : else
1257 : {
1258 : /*
1259 : * We need to compute the adds and deletes required to get from the
1260 : * old event mask to the new event mask, since kevent treats readable
1261 : * and writable as separate events.
1262 : */
1263 : if (old_events & (WL_SOCKET_READABLE | WL_SOCKET_CLOSED))
1264 : old_filt_read = true;
1265 : if (event->events & (WL_SOCKET_READABLE | WL_SOCKET_CLOSED))
1266 : new_filt_read = true;
1267 : if (old_events & WL_SOCKET_WRITEABLE)
1268 : old_filt_write = true;
1269 : if (event->events & WL_SOCKET_WRITEABLE)
1270 : new_filt_write = true;
1271 : if (old_filt_read && !new_filt_read)
1272 : WaitEventAdjustKqueueAdd(&k_ev[count++], EVFILT_READ, EV_DELETE,
1273 : event);
1274 : else if (!old_filt_read && new_filt_read)
1275 : WaitEventAdjustKqueueAdd(&k_ev[count++], EVFILT_READ, EV_ADD,
1276 : event);
1277 : if (old_filt_write && !new_filt_write)
1278 : WaitEventAdjustKqueueAdd(&k_ev[count++], EVFILT_WRITE, EV_DELETE,
1279 : event);
1280 : else if (!old_filt_write && new_filt_write)
1281 : WaitEventAdjustKqueueAdd(&k_ev[count++], EVFILT_WRITE, EV_ADD,
1282 : event);
1283 : }
1284 :
1285 : /* For WL_SOCKET_READ -> WL_SOCKET_CLOSED, no change needed. */
1286 : if (count == 0)
1287 : return;
1288 :
1289 : Assert(count <= 2);
1290 :
1291 : rc = kevent(set->kqueue_fd, &k_ev[0], count, NULL, 0, NULL);
1292 :
1293 : /*
1294 : * When adding the postmaster's pid, we have to consider that it might
1295 : * already have exited and perhaps even been replaced by another process
1296 : * with the same pid. If so, we have to defer reporting this as an event
1297 : * until the next call to WaitEventSetWaitBlock().
1298 : */
1299 :
1300 : if (rc < 0)
1301 : {
1302 : if (event->events == WL_POSTMASTER_DEATH &&
1303 : (errno == ESRCH || errno == EACCES))
1304 : set->report_postmaster_not_running = true;
1305 : else
1306 : ereport(ERROR,
1307 : (errcode_for_socket_access(),
1308 : errmsg("%s() failed: %m",
1309 : "kevent")));
1310 : }
1311 : else if (event->events == WL_POSTMASTER_DEATH &&
1312 : PostmasterPid != getppid() &&
1313 : !PostmasterIsAlive())
1314 : {
1315 : /*
1316 : * The extra PostmasterIsAliveInternal() check prevents false alarms
1317 : * on systems that give a different value for getppid() while being
1318 : * traced by a debugger.
1319 : */
1320 : set->report_postmaster_not_running = true;
1321 : }
1322 : }
1323 :
1324 : #endif
1325 :
1326 : #if defined(WAIT_USE_WIN32)
1327 : static void
1328 : WaitEventAdjustWin32(WaitEventSet *set, WaitEvent *event)
1329 : {
1330 : HANDLE *handle = &set->handles[event->pos + 1];
1331 :
1332 : if (event->events == WL_LATCH_SET)
1333 : {
1334 : Assert(set->latch != NULL);
1335 : *handle = set->latch->event;
1336 : }
1337 : else if (event->events == WL_POSTMASTER_DEATH)
1338 : {
1339 : *handle = PostmasterHandle;
1340 : }
1341 : else
1342 : {
1343 : int flags = FD_CLOSE; /* always check for errors/EOF */
1344 :
1345 : if (event->events & WL_SOCKET_READABLE)
1346 : flags |= FD_READ;
1347 : if (event->events & WL_SOCKET_WRITEABLE)
1348 : flags |= FD_WRITE;
1349 : if (event->events & WL_SOCKET_CONNECTED)
1350 : flags |= FD_CONNECT;
1351 : if (event->events & WL_SOCKET_ACCEPT)
1352 : flags |= FD_ACCEPT;
1353 :
1354 : if (*handle == WSA_INVALID_EVENT)
1355 : {
1356 : *handle = WSACreateEvent();
1357 : if (*handle == WSA_INVALID_EVENT)
1358 : elog(ERROR, "failed to create event for socket: error code %d",
1359 : WSAGetLastError());
1360 : }
1361 : if (WSAEventSelect(event->fd, *handle, flags) != 0)
1362 : elog(ERROR, "failed to set up event for socket: error code %d",
1363 : WSAGetLastError());
1364 :
1365 : Assert(event->fd != PGINVALID_SOCKET);
1366 : }
1367 : }
1368 : #endif
1369 :
1370 : /*
1371 : * Wait for events added to the set to happen, or until the timeout is
1372 : * reached. At most nevents occurred events are returned.
1373 : *
1374 : * If timeout = -1, block until an event occurs; if 0, check sockets for
1375 : * readiness, but don't block; if > 0, block for at most timeout milliseconds.
1376 : *
1377 : * Returns the number of events occurred, or 0 if the timeout was reached.
1378 : *
1379 : * Returned events will have the fd, pos, user_data fields set to the
1380 : * values associated with the registered event.
1381 : */
1382 : int
2575 andres 1383 GIC 616882 : WaitEventSetWait(WaitEventSet *set, long timeout,
1384 : WaitEvent *occurred_events, int nevents,
1385 : uint32 wait_event_info)
1386 : {
1387 616882 : int returned_events = 0;
1388 : instr_time start_time;
1389 : instr_time cur_time;
1390 616882 : long cur_timeout = -1;
1391 :
1392 616882 : Assert(nevents > 0);
1393 :
1394 : /*
1395 : * Initialize timeout if requested. We must record the current time so
1396 : * that we can determine the remaining timeout if interrupted.
1397 : */
1398 616882 : if (timeout >= 0)
1399 : {
1400 208710 : INSTR_TIME_SET_CURRENT(start_time);
1401 208710 : Assert(timeout >= 0 && timeout <= INT_MAX);
1402 208710 : cur_timeout = timeout;
1403 : }
1404 : else
79 andres 1405 GNC 408172 : INSTR_TIME_SET_ZERO(start_time);
1406 :
2378 rhaas 1407 GIC 616882 : pgstat_report_wait_start(wait_event_info);
1408 :
1409 : #ifndef WIN32
2575 andres 1410 616882 : waiting = true;
1411 : #else
1412 : /* Ensure that signals are serviced even if latch is already set */
1413 : pgwin32_dispatch_queued_signals();
1414 : #endif
1415 1194546 : while (returned_events == 0)
1416 : {
1417 : int rc;
1418 :
1419 : /*
1420 : * Check if the latch is set already. If so, leave the loop
1421 : * immediately, avoid blocking again. We don't attempt to report any
1422 : * other events that might also be satisfied.
2575 andres 1423 ECB : *
1424 : * If someone sets the latch between this and the
1425 : * WaitEventSetWaitBlock() below, the setter will write a byte to the
1426 : * pipe (or signal us and the signal handler will do that), and the
1427 : * readiness routine will return immediately.
1428 : *
1429 : * On unix, If there's a pending byte in the self pipe, we'll notice
1430 : * whenever blocking. Only clearing the pipe in that case avoids
1431 : * having to drain it every time WaitLatchOrSocket() is used. Should
1432 : * the pipe-buffer fill up we're still ok, because the pipe is in
1433 : * nonblocking mode. It's unlikely for that to happen, because the
1434 : * self pipe isn't filled unless we're blocking (waiting = true), or
1435 : * from inside a signal handler in latch_sigurg_handler().
1436 : *
1437 : * On windows, we'll also notice if there's a pending event for the
1438 : * latch when blocking, but there's no danger of anything filling up,
1439 : * as "Setting an event that is already set has no effect.".
1440 : *
1441 : * Note: we assume that the kernel calls involved in latch management
1442 : * will provide adequate synchronization on machines with weak memory
1443 : * ordering, so that we cannot miss seeing is_set if a notification
1444 : * has already been queued.
1445 : */
769 tmunro 1446 GIC 663329 : if (set->latch && !set->latch->is_set)
769 tmunro 1447 ECB : {
1448 : /* about to sleep on a latch */
769 tmunro 1449 GIC 598578 : set->latch->maybe_sleeping = true;
769 tmunro 1450 CBC 598578 : pg_memory_barrier();
1451 : /* and recheck */
1452 : }
1453 :
2575 andres 1454 GIC 663329 : if (set->latch && set->latch->is_set)
2575 andres 1455 ECB : {
2575 andres 1456 GIC 64461 : occurred_events->fd = PGINVALID_SOCKET;
1457 64461 : occurred_events->pos = set->latch_pos;
1458 64461 : occurred_events->user_data =
1459 64461 : set->events[set->latch_pos].user_data;
1460 64461 : occurred_events->events = WL_LATCH_SET;
1461 64461 : occurred_events++;
1462 64461 : returned_events++;
1463 :
1464 : /* could have been set above */
769 tmunro 1465 64461 : set->latch->maybe_sleeping = false;
1466 :
2575 andres 1467 64461 : break;
1468 : }
1469 :
1470 : /*
1471 : * Wait for events using the readiness primitive chosen at the top of
1472 : * this file. If -1 is returned, a timeout has occurred, if 0 we have
1473 : * to retry, everything >= 1 is the number of returned events.
1474 : */
1475 598868 : rc = WaitEventSetWaitBlock(set, cur_timeout,
1476 : occurred_events, nevents);
1477 :
769 tmunro 1478 598853 : if (set->latch)
1479 : {
1480 598546 : Assert(set->latch->maybe_sleeping);
1481 598546 : set->latch->maybe_sleeping = false;
1482 : }
1483 :
2575 andres 1484 598853 : if (rc == -1)
1485 21189 : break; /* timeout occurred */
2575 andres 1486 ECB : else
2575 andres 1487 GIC 577664 : returned_events = rc;
1488 :
2575 andres 1489 ECB : /* If we're not done, update cur_timeout for next iteration */
2575 andres 1490 CBC 577664 : if (returned_events == 0 && timeout >= 0)
1491 : {
2575 andres 1492 GIC 41122 : INSTR_TIME_SET_CURRENT(cur_time);
1493 41122 : INSTR_TIME_SUBTRACT(cur_time, start_time);
2575 andres 1494 CBC 41122 : cur_timeout = timeout - (long) INSTR_TIME_GET_MILLISEC(cur_time);
2575 andres 1495 GIC 41122 : if (cur_timeout <= 0)
2575 andres 1496 LBC 0 : break;
2575 andres 1497 ECB : }
1498 : }
1499 : #ifndef WIN32
2575 andres 1500 CBC 616867 : waiting = false;
2575 andres 1501 ECB : #endif
1502 :
2378 rhaas 1503 GIC 616867 : pgstat_report_wait_end();
1504 :
2575 andres 1505 CBC 616867 : return returned_events;
1506 : }
2575 andres 1507 ECB :
1508 :
1509 : #if defined(WAIT_USE_EPOLL)
1510 :
1511 : /*
1512 : * Wait using linux's epoll_wait(2).
1513 : *
1514 : * This is the preferable wait method, as several readiness notifications are
1515 : * delivered, without having to iterate through all of set->events. The return
1516 : * epoll_event struct contain a pointer to our events, making association
1517 : * easy.
1518 : */
1519 : static inline int
2575 andres 1520 CBC 598868 : WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
2575 andres 1521 ECB : WaitEvent *occurred_events, int nevents)
1522 : {
2575 andres 1523 GIC 598868 : int returned_events = 0;
2575 andres 1524 ECB : int rc;
1525 : WaitEvent *cur_event;
1526 : struct epoll_event *cur_epoll_event;
1527 :
1528 : /* Sleep */
2575 andres 1529 GIC 598868 : rc = epoll_wait(set->epoll_fd, set->epoll_ret_events,
86 tmunro 1530 CBC 598868 : Min(nevents, set->nevents_space), cur_timeout);
1531 :
2575 andres 1532 ECB : /* Check return code */
2575 andres 1533 CBC 598868 : if (rc < 0)
2575 andres 1534 ECB : {
1535 : /* EINTR is okay, otherwise complain */
2575 andres 1536 GBC 24189 : if (errno != EINTR)
1537 : {
2575 andres 1538 UIC 0 : waiting = false;
1539 0 : ereport(ERROR,
2575 andres 1540 ECB : (errcode_for_socket_access(),
1541 : errmsg("%s() failed: %m",
1542 : "epoll_wait")));
1543 : }
2575 andres 1544 GIC 24189 : return 0;
2575 andres 1545 ECB : }
2575 andres 1546 GIC 574679 : else if (rc == 0)
1547 : {
1548 : /* timeout exceeded */
1549 21189 : return -1;
1550 : }
1551 :
1552 : /*
1553 : * At least one event occurred, iterate over the returned epoll events
1554 : * until they're either all processed, or we've returned all the events
1555 : * the caller desired.
1556 : */
1557 553490 : for (cur_epoll_event = set->epoll_ret_events;
1558 1107028 : cur_epoll_event < (set->epoll_ret_events + rc) &&
1559 : returned_events < nevents;
2575 andres 1560 CBC 553538 : cur_epoll_event++)
1561 : {
1562 : /* epoll's data pointer is set to the associated WaitEvent */
1563 553553 : cur_event = (WaitEvent *) cur_epoll_event->data.ptr;
1564 :
2575 andres 1565 GIC 553553 : occurred_events->pos = cur_event->pos;
1566 553553 : occurred_events->user_data = cur_event->user_data;
1567 553553 : occurred_events->events = 0;
1568 :
2575 andres 1569 CBC 553553 : if (cur_event->events == WL_LATCH_SET &&
1570 319069 : cur_epoll_event->events & (EPOLLIN | EPOLLERR | EPOLLHUP))
1571 : {
1572 : /* Drain the signalfd. */
769 tmunro 1573 319069 : drain();
1574 :
928 tmunro 1575 GIC 319069 : if (set->latch && set->latch->is_set)
2575 andres 1576 ECB : {
2575 andres 1577 GIC 296805 : occurred_events->fd = PGINVALID_SOCKET;
2575 andres 1578 GBC 296805 : occurred_events->events = WL_LATCH_SET;
1579 296805 : occurred_events++;
2575 andres 1580 GIC 296805 : returned_events++;
1581 : }
1582 : }
1583 234484 : else if (cur_event->events == WL_POSTMASTER_DEATH &&
2575 andres 1584 CBC 15 : cur_epoll_event->events & (EPOLLIN | EPOLLERR | EPOLLHUP))
1585 : {
2575 andres 1586 ECB : /*
1587 : * We expect an EPOLLHUP when the remote end is closed, but
1588 : * because we don't expect the pipe to become readable or to have
1589 : * any errors either, treat those cases as postmaster death, too.
1590 : *
1591 : * Be paranoid about a spurious event signaling the postmaster as
1592 : * being dead. There have been reports about that happening with
1593 : * older primitives (select(2) to be specific), and a spurious
1594 : * WL_POSTMASTER_DEATH event would be painful. Re-checking doesn't
1595 : * cost much.
1596 : */
1733 tmunro 1597 CBC 15 : if (!PostmasterIsAliveInternal())
2575 andres 1598 ECB : {
1598 tmunro 1599 GIC 15 : if (set->exit_on_postmaster_death)
1598 tmunro 1600 CBC 15 : proc_exit(1);
2575 andres 1601 UIC 0 : occurred_events->fd = PGINVALID_SOCKET;
1602 0 : occurred_events->events = WL_POSTMASTER_DEATH;
2575 andres 1603 LBC 0 : occurred_events++;
2575 andres 1604 UIC 0 : returned_events++;
2575 andres 1605 ECB : }
1606 : }
419 tmunro 1607 CBC 234469 : else if (cur_event->events & (WL_SOCKET_READABLE |
1608 : WL_SOCKET_WRITEABLE |
419 tmunro 1609 ECB : WL_SOCKET_CLOSED))
2575 andres 1610 : {
2575 andres 1611 GIC 234469 : Assert(cur_event->fd != PGINVALID_SOCKET);
1612 :
2575 andres 1613 CBC 234469 : if ((cur_event->events & WL_SOCKET_READABLE) &&
2575 andres 1614 GIC 221570 : (cur_epoll_event->events & (EPOLLIN | EPOLLERR | EPOLLHUP)))
2575 andres 1615 ECB : {
1616 : /* data available in socket, or EOF */
2575 andres 1617 CBC 217770 : occurred_events->events |= WL_SOCKET_READABLE;
2575 andres 1618 ECB : }
1619 :
2575 andres 1620 CBC 234469 : if ((cur_event->events & WL_SOCKET_WRITEABLE) &&
2575 andres 1621 GIC 17015 : (cur_epoll_event->events & (EPOLLOUT | EPOLLERR | EPOLLHUP)))
1622 : {
2575 andres 1623 ECB : /* writable, or EOF */
2575 andres 1624 CBC 16980 : occurred_events->events |= WL_SOCKET_WRITEABLE;
1625 : }
1626 :
419 tmunro 1627 GIC 234469 : if ((cur_event->events & WL_SOCKET_CLOSED) &&
419 tmunro 1628 UIC 0 : (cur_epoll_event->events & (EPOLLRDHUP | EPOLLERR | EPOLLHUP)))
1629 : {
1630 : /* remote peer shut down, or error */
1631 0 : occurred_events->events |= WL_SOCKET_CLOSED;
1632 : }
1633 :
1159 tmunro 1634 GIC 234469 : if (occurred_events->events != 0)
1635 : {
1636 234469 : occurred_events->fd = cur_event->fd;
1159 tmunro 1637 CBC 234469 : occurred_events++;
1159 tmunro 1638 GIC 234469 : returned_events++;
1159 tmunro 1639 ECB : }
1640 : }
1159 tmunro 1641 EUB : }
1642 :
1159 tmunro 1643 GBC 553475 : return returned_events;
1159 tmunro 1644 EUB : }
1645 :
1646 : #elif defined(WAIT_USE_KQUEUE)
1159 tmunro 1647 ECB :
1648 : /*
1649 : * Wait using kevent(2) on BSD-family systems and macOS.
1650 : *
1651 : * For now this mirrors the epoll code, but in future it could modify the fd
1652 : * set in the same call to kevent as it uses for waiting instead of doing that
1653 : * with separate system calls.
1654 : */
1655 : static int
1656 : WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
1657 : WaitEvent *occurred_events, int nevents)
1658 : {
1659 : int returned_events = 0;
1660 : int rc;
1661 : WaitEvent *cur_event;
1662 : struct kevent *cur_kqueue_event;
1663 : struct timespec timeout;
1664 : struct timespec *timeout_p;
1665 :
1666 : if (cur_timeout < 0)
1667 : timeout_p = NULL;
1159 tmunro 1668 EUB : else
1669 : {
1670 : timeout.tv_sec = cur_timeout / 1000;
1671 : timeout.tv_nsec = (cur_timeout % 1000) * 1000000;
1672 : timeout_p = &timeout;
1673 : }
1159 tmunro 1674 ECB :
1675 : /*
906 1676 : * Report postmaster events discovered by WaitEventAdjustKqueue() or an
1677 : * earlier call to WaitEventSetWait().
1678 : */
1679 : if (unlikely(set->report_postmaster_not_running))
1680 : {
1681 : if (set->exit_on_postmaster_death)
1682 : proc_exit(1);
1159 1683 : occurred_events->fd = PGINVALID_SOCKET;
1684 : occurred_events->events = WL_POSTMASTER_DEATH;
1685 : return 1;
1686 : }
1687 :
1688 : /* Sleep */
1689 : rc = kevent(set->kqueue_fd, NULL, 0,
1690 : set->kqueue_ret_events,
1691 : Min(nevents, set->nevents_space),
1692 : timeout_p);
1693 :
1694 : /* Check return code */
1695 : if (rc < 0)
1696 : {
1697 : /* EINTR is okay, otherwise complain */
1698 : if (errno != EINTR)
1699 : {
1700 : waiting = false;
1701 : ereport(ERROR,
1702 : (errcode_for_socket_access(),
1703 : errmsg("%s() failed: %m",
1704 : "kevent")));
1705 : }
1706 : return 0;
1707 : }
1708 : else if (rc == 0)
1709 : {
1710 : /* timeout exceeded */
1711 : return -1;
1712 : }
1713 :
1714 : /*
1715 : * At least one event occurred, iterate over the returned kqueue events
1716 : * until they're either all processed, or we've returned all the events
1717 : * the caller desired.
1718 : */
1719 : for (cur_kqueue_event = set->kqueue_ret_events;
1720 : cur_kqueue_event < (set->kqueue_ret_events + rc) &&
1721 : returned_events < nevents;
1722 : cur_kqueue_event++)
1723 : {
1724 : /* kevent's udata points to the associated WaitEvent */
1725 : cur_event = AccessWaitEvent(cur_kqueue_event);
1726 :
1727 : occurred_events->pos = cur_event->pos;
1728 : occurred_events->user_data = cur_event->user_data;
1729 : occurred_events->events = 0;
1730 :
1731 : if (cur_event->events == WL_LATCH_SET &&
1732 : cur_kqueue_event->filter == EVFILT_SIGNAL)
1733 : {
1734 : if (set->latch && set->latch->is_set)
1735 : {
1736 : occurred_events->fd = PGINVALID_SOCKET;
1737 : occurred_events->events = WL_LATCH_SET;
1738 : occurred_events++;
1739 : returned_events++;
1740 : }
1741 : }
1742 : else if (cur_event->events == WL_POSTMASTER_DEATH &&
1743 : cur_kqueue_event->filter == EVFILT_PROC &&
1744 : (cur_kqueue_event->fflags & NOTE_EXIT) != 0)
1745 : {
1746 : /*
1747 : * The kernel will tell this kqueue object only once about the
1748 : * exit of the postmaster, so let's remember that for next time so
1749 : * that we provide level-triggered semantics.
1750 : */
1751 : set->report_postmaster_not_running = true;
1752 :
1753 : if (set->exit_on_postmaster_death)
1754 : proc_exit(1);
1755 : occurred_events->fd = PGINVALID_SOCKET;
1756 : occurred_events->events = WL_POSTMASTER_DEATH;
1757 : occurred_events++;
1758 : returned_events++;
1759 : }
1760 : else if (cur_event->events & (WL_SOCKET_READABLE |
1761 : WL_SOCKET_WRITEABLE |
1762 : WL_SOCKET_CLOSED))
1763 : {
1764 : Assert(cur_event->fd >= 0);
1765 :
1766 : if ((cur_event->events & WL_SOCKET_READABLE) &&
1767 : (cur_kqueue_event->filter == EVFILT_READ))
1768 : {
1769 : /* readable, or EOF */
1770 : occurred_events->events |= WL_SOCKET_READABLE;
1771 : }
1772 :
1773 : if ((cur_event->events & WL_SOCKET_CLOSED) &&
1774 : (cur_kqueue_event->filter == EVFILT_READ) &&
1775 : (cur_kqueue_event->flags & EV_EOF))
1776 : {
1777 : /* the remote peer has shut down */
1778 : occurred_events->events |= WL_SOCKET_CLOSED;
1779 : }
1780 :
1781 : if ((cur_event->events & WL_SOCKET_WRITEABLE) &&
1782 : (cur_kqueue_event->filter == EVFILT_WRITE))
1783 : {
1784 : /* writable, or EOF */
1785 : occurred_events->events |= WL_SOCKET_WRITEABLE;
1786 : }
1787 :
1788 : if (occurred_events->events != 0)
1789 : {
1790 : occurred_events->fd = cur_event->fd;
1791 : occurred_events++;
1792 : returned_events++;
1793 : }
1794 : }
1795 : }
1796 :
1797 : return returned_events;
1798 : }
1799 :
1800 : #elif defined(WAIT_USE_POLL)
1801 :
1802 : /*
1803 : * Wait using poll(2).
1804 : *
1805 : * This allows to receive readiness notifications for several events at once,
1806 : * but requires iterating through all of set->pollfds.
1807 : */
1808 : static inline int
1809 : WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
1810 : WaitEvent *occurred_events, int nevents)
1811 : {
1812 : int returned_events = 0;
1813 : int rc;
1814 : WaitEvent *cur_event;
1815 : struct pollfd *cur_pollfd;
1816 :
1817 : /* Sleep */
1818 : rc = poll(set->pollfds, set->nevents, (int) cur_timeout);
1819 :
1820 : /* Check return code */
1821 : if (rc < 0)
1822 : {
1823 : /* EINTR is okay, otherwise complain */
1824 : if (errno != EINTR)
1825 : {
1826 : waiting = false;
1827 : ereport(ERROR,
1828 : (errcode_for_socket_access(),
1829 : errmsg("%s() failed: %m",
1830 : "poll")));
1831 : }
1832 : return 0;
1833 : }
1834 : else if (rc == 0)
1835 : {
1836 : /* timeout exceeded */
1837 : return -1;
1838 : }
1839 :
1840 : for (cur_event = set->events, cur_pollfd = set->pollfds;
1841 : cur_event < (set->events + set->nevents) &&
1842 : returned_events < nevents;
1843 : cur_event++, cur_pollfd++)
1844 : {
1845 : /* no activity on this FD, skip */
1846 : if (cur_pollfd->revents == 0)
1847 : continue;
1848 :
1849 : occurred_events->pos = cur_event->pos;
1850 : occurred_events->user_data = cur_event->user_data;
1851 : occurred_events->events = 0;
1852 :
1853 : if (cur_event->events == WL_LATCH_SET &&
1854 : (cur_pollfd->revents & (POLLIN | POLLHUP | POLLERR | POLLNVAL)))
1855 : {
1856 : /* There's data in the self-pipe, clear it. */
1857 : drain();
1858 :
1859 : if (set->latch && set->latch->is_set)
1860 : {
1861 : occurred_events->fd = PGINVALID_SOCKET;
1862 : occurred_events->events = WL_LATCH_SET;
1863 : occurred_events++;
1864 : returned_events++;
1865 : }
1866 : }
1867 : else if (cur_event->events == WL_POSTMASTER_DEATH &&
1868 : (cur_pollfd->revents & (POLLIN | POLLHUP | POLLERR | POLLNVAL)))
1869 : {
1870 : /*
1871 : * We expect an POLLHUP when the remote end is closed, but because
1872 : * we don't expect the pipe to become readable or to have any
1873 : * errors either, treat those cases as postmaster death, too.
1874 : *
1875 : * Be paranoid about a spurious event signaling the postmaster as
1876 : * being dead. There have been reports about that happening with
1877 : * older primitives (select(2) to be specific), and a spurious
1878 : * WL_POSTMASTER_DEATH event would be painful. Re-checking doesn't
1879 : * cost much.
1880 : */
1881 : if (!PostmasterIsAliveInternal())
1882 : {
1883 : if (set->exit_on_postmaster_death)
1884 : proc_exit(1);
1885 : occurred_events->fd = PGINVALID_SOCKET;
1886 : occurred_events->events = WL_POSTMASTER_DEATH;
1887 : occurred_events++;
1888 : returned_events++;
1889 : }
1890 : }
1891 : else if (cur_event->events & (WL_SOCKET_READABLE |
1892 : WL_SOCKET_WRITEABLE |
1893 : WL_SOCKET_CLOSED))
1894 : {
1895 : int errflags = POLLHUP | POLLERR | POLLNVAL;
1896 :
1897 : Assert(cur_event->fd >= PGINVALID_SOCKET);
1898 :
1899 : if ((cur_event->events & WL_SOCKET_READABLE) &&
1900 : (cur_pollfd->revents & (POLLIN | errflags)))
1901 : {
1902 : /* data available in socket, or EOF */
1903 : occurred_events->events |= WL_SOCKET_READABLE;
1904 : }
1905 :
1906 : if ((cur_event->events & WL_SOCKET_WRITEABLE) &&
1907 : (cur_pollfd->revents & (POLLOUT | errflags)))
1908 : {
1909 : /* writeable, or EOF */
1910 : occurred_events->events |= WL_SOCKET_WRITEABLE;
1911 : }
1912 :
1913 : #ifdef POLLRDHUP
1914 : if ((cur_event->events & WL_SOCKET_CLOSED) &&
1915 : (cur_pollfd->revents & (POLLRDHUP | errflags)))
1916 : {
1917 : /* remote peer closed, or error */
1918 : occurred_events->events |= WL_SOCKET_CLOSED;
1919 : }
1920 : #endif
1921 :
1922 : if (occurred_events->events != 0)
1923 : {
1924 : occurred_events->fd = cur_event->fd;
1925 : occurred_events++;
1926 : returned_events++;
1927 : }
1928 : }
1929 : }
1930 : return returned_events;
1931 : }
1932 :
1933 : #elif defined(WAIT_USE_WIN32)
1934 :
1935 : /*
1936 : * Wait using Windows' WaitForMultipleObjects().
1937 : *
1938 : * Unfortunately this will only ever return a single readiness notification at
1939 : * a time. Note that while the official documentation for
1940 : * WaitForMultipleObjects is ambiguous about multiple events being "consumed"
1941 : * with a single bWaitAll = FALSE call,
1942 : * https://blogs.msdn.microsoft.com/oldnewthing/20150409-00/?p=44273 confirms
1943 : * that only one event is "consumed".
1944 : */
1945 : static inline int
1946 : WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
1947 : WaitEvent *occurred_events, int nevents)
1948 : {
1949 : int returned_events = 0;
1950 : DWORD rc;
1951 : WaitEvent *cur_event;
1952 :
1953 : /* Reset any wait events that need it */
1954 : for (cur_event = set->events;
1955 : cur_event < (set->events + set->nevents);
1956 : cur_event++)
1957 : {
1958 : if (cur_event->reset)
1959 : {
1960 : WaitEventAdjustWin32(set, cur_event);
1961 : cur_event->reset = false;
1962 : }
1963 :
1964 : /*
1965 : * Windows does not guarantee to log an FD_WRITE network event
1966 : * indicating that more data can be sent unless the previous send()
1967 : * failed with WSAEWOULDBLOCK. While our caller might well have made
1968 : * such a call, we cannot assume that here. Therefore, if waiting for
1969 : * write-ready, force the issue by doing a dummy send(). If the dummy
1970 : * send() succeeds, assume that the socket is in fact write-ready, and
1971 : * return immediately. Also, if it fails with something other than
1972 : * WSAEWOULDBLOCK, return a write-ready indication to let our caller
1973 : * deal with the error condition.
1974 : */
1975 : if (cur_event->events & WL_SOCKET_WRITEABLE)
1976 : {
1977 : char c;
1978 : WSABUF buf;
1979 : DWORD sent;
1980 : int r;
1981 :
1982 : buf.buf = &c;
1983 : buf.len = 0;
1984 :
1985 : r = WSASend(cur_event->fd, &buf, 1, &sent, 0, NULL, NULL);
1986 : if (r == 0 || WSAGetLastError() != WSAEWOULDBLOCK)
1987 : {
1988 : occurred_events->pos = cur_event->pos;
1989 : occurred_events->user_data = cur_event->user_data;
1990 : occurred_events->events = WL_SOCKET_WRITEABLE;
1991 : occurred_events->fd = cur_event->fd;
1992 : return 1;
1993 : }
1994 : }
1995 : }
1996 :
1997 : /*
1998 : * Sleep.
1999 : *
2000 : * Need to wait for ->nevents + 1, because signal handle is in [0].
2001 : */
2002 : rc = WaitForMultipleObjects(set->nevents + 1, set->handles, FALSE,
2003 : cur_timeout);
2004 :
2005 : /* Check return code */
2006 : if (rc == WAIT_FAILED)
2007 : elog(ERROR, "WaitForMultipleObjects() failed: error code %lu",
2008 : GetLastError());
2009 : else if (rc == WAIT_TIMEOUT)
2010 : {
2011 : /* timeout exceeded */
2012 : return -1;
2013 : }
2014 :
2015 : if (rc == WAIT_OBJECT_0)
2016 : {
2017 : /* Service newly-arrived signals */
2018 : pgwin32_dispatch_queued_signals();
2019 : return 0; /* retry */
2020 : }
2021 :
2022 : /*
2023 : * With an offset of one, due to the always present pgwin32_signal_event,
2024 : * the handle offset directly corresponds to a wait event.
2025 : */
2026 : cur_event = (WaitEvent *) &set->events[rc - WAIT_OBJECT_0 - 1];
2027 :
2028 : occurred_events->pos = cur_event->pos;
2029 : occurred_events->user_data = cur_event->user_data;
2030 : occurred_events->events = 0;
2031 :
2032 : if (cur_event->events == WL_LATCH_SET)
2033 : {
2034 : /*
2035 : * We cannot use set->latch->event to reset the fired event if we
2036 : * aren't waiting on this latch now.
2037 : */
2038 : if (!ResetEvent(set->handles[cur_event->pos + 1]))
2039 : elog(ERROR, "ResetEvent failed: error code %lu", GetLastError());
2040 :
2041 : if (set->latch && set->latch->is_set)
2042 : {
2043 : occurred_events->fd = PGINVALID_SOCKET;
2044 : occurred_events->events = WL_LATCH_SET;
2045 : occurred_events++;
2046 : returned_events++;
2047 : }
2048 : }
2049 : else if (cur_event->events == WL_POSTMASTER_DEATH)
2050 : {
2051 : /*
2052 : * Postmaster apparently died. Since the consequences of falsely
2053 : * returning WL_POSTMASTER_DEATH could be pretty unpleasant, we take
2054 : * the trouble to positively verify this with PostmasterIsAlive(),
2055 : * even though there is no known reason to think that the event could
2056 : * be falsely set on Windows.
2057 : */
2058 : if (!PostmasterIsAliveInternal())
2059 : {
2060 : if (set->exit_on_postmaster_death)
2061 : proc_exit(1);
2062 : occurred_events->fd = PGINVALID_SOCKET;
2063 : occurred_events->events = WL_POSTMASTER_DEATH;
2064 : occurred_events++;
2065 : returned_events++;
2066 : }
2067 : }
2068 : else if (cur_event->events & WL_SOCKET_MASK)
2069 : {
2070 : WSANETWORKEVENTS resEvents;
2071 : HANDLE handle = set->handles[cur_event->pos + 1];
2072 :
2073 : Assert(cur_event->fd);
2074 :
2075 : occurred_events->fd = cur_event->fd;
2076 :
2077 : ZeroMemory(&resEvents, sizeof(resEvents));
2078 : if (WSAEnumNetworkEvents(cur_event->fd, handle, &resEvents) != 0)
2079 : elog(ERROR, "failed to enumerate network events: error code %d",
2080 : WSAGetLastError());
2081 : if ((cur_event->events & WL_SOCKET_READABLE) &&
2082 : (resEvents.lNetworkEvents & FD_READ))
2083 : {
2084 : /* data available in socket */
2085 : occurred_events->events |= WL_SOCKET_READABLE;
2086 :
2087 : /*------
2088 : * WaitForMultipleObjects doesn't guarantee that a read event will
2089 : * be returned if the latch is set at the same time. Even if it
2090 : * did, the caller might drop that event expecting it to reoccur
2091 : * on next call. So, we must force the event to be reset if this
2092 : * WaitEventSet is used again in order to avoid an indefinite
2093 : * hang. Refer https://msdn.microsoft.com/en-us/library/windows/desktop/ms741576(v=vs.85).aspx
2094 : * for the behavior of socket events.
2095 : *------
2096 : */
2097 : cur_event->reset = true;
2098 : }
2099 : if ((cur_event->events & WL_SOCKET_WRITEABLE) &&
2100 : (resEvents.lNetworkEvents & FD_WRITE))
2101 : {
2102 : /* writeable */
2103 : occurred_events->events |= WL_SOCKET_WRITEABLE;
2104 : }
2105 : if ((cur_event->events & WL_SOCKET_CONNECTED) &&
2106 : (resEvents.lNetworkEvents & FD_CONNECT))
2107 : {
2108 : /* connected */
2109 : occurred_events->events |= WL_SOCKET_CONNECTED;
2110 : }
2111 : if ((cur_event->events & WL_SOCKET_ACCEPT) &&
2112 : (resEvents.lNetworkEvents & FD_ACCEPT))
2113 : {
2114 : /* incoming connection could be accepted */
2115 : occurred_events->events |= WL_SOCKET_ACCEPT;
2116 : }
2117 : if (resEvents.lNetworkEvents & FD_CLOSE)
2118 : {
2119 : /* EOF/error, so signal all caller-requested socket flags */
2120 : occurred_events->events |= (cur_event->events & WL_SOCKET_MASK);
2121 : }
2122 :
2123 : if (occurred_events->events != 0)
2124 : {
2125 : occurred_events++;
2126 : returned_events++;
2127 : }
2128 : }
2129 :
2130 : return returned_events;
2131 : }
2132 : #endif
2133 :
2134 : /*
2135 : * Return whether the current build options can report WL_SOCKET_CLOSED.
2136 : */
2137 : bool
419 tmunro 2138 GIC 1857 : WaitEventSetCanReportClosed(void)
2139 : {
2140 : #if (defined(WAIT_USE_POLL) && defined(POLLRDHUP)) || \
2141 : defined(WAIT_USE_EPOLL) || \
2142 : defined(WAIT_USE_KQUEUE)
2143 1857 : return true;
2144 : #else
2145 : return false;
2146 : #endif
2147 : }
2148 :
2149 : /*
2150 : * Get the number of wait events registered in a given WaitEventSet.
2151 : */
2152 : int
739 efujita 2153 259 : GetNumRegisteredWaitEvents(WaitEventSet *set)
2154 : {
2155 259 : return set->nevents;
2156 : }
2157 :
2158 : #if defined(WAIT_USE_SELF_PIPE)
2159 :
2160 : /*
2161 : * SetLatch uses SIGURG to wake up the process waiting on the latch.
2162 : *
2163 : * Wake up WaitLatch, if we're waiting.
2164 : */
2165 : static void
2166 : latch_sigurg_handler(SIGNAL_ARGS)
2167 : {
2168 : int save_errno = errno;
2169 :
2170 : if (waiting)
2171 : sendSelfPipeByte();
2172 :
2173 : errno = save_errno;
2174 : }
2175 :
2176 : /* Send one byte to the self-pipe, to wake up WaitLatch */
2177 : static void
2178 : sendSelfPipeByte(void)
2179 : {
2180 : int rc;
2181 : char dummy = 0;
2182 :
2183 : retry:
4593 heikki.linnakangas 2184 ECB : rc = write(selfpipe_writefd, &dummy, 1);
2185 : if (rc < 0)
2186 : {
2187 : /* If interrupted by signal, just retry */
2188 : if (errno == EINTR)
2189 : goto retry;
2190 :
2191 : /*
2192 : * If the pipe is full, we don't need to retry, the data that's there
2193 : * already is enough to wake up WaitLatch.
2194 : */
2195 : if (errno == EAGAIN || errno == EWOULDBLOCK)
2196 : return;
2197 :
2198 : /*
4382 bruce 2199 : * Oops, the write() failed for some other reason. We might be in a
2200 : * signal handler, so it's not safe to elog(). We have no choice but
2201 : * silently ignore the error.
2202 : */
2203 : return;
2204 : }
2205 : }
2206 :
2207 : #endif
2208 :
2209 : #if defined(WAIT_USE_SELF_PIPE) || defined(WAIT_USE_SIGNALFD)
2210 :
2211 : /*
2212 : * Read all available data from self-pipe or signalfd.
2213 : *
2214 : * Note: this is only called when waiting = true. If it fails and doesn't
2215 : * return, it must reset that flag first (though ideally, this will never
2216 : * happen).
2217 : */
2218 : static void
769 tmunro 2219 GIC 319069 : drain(void)
2220 : {
2221 : char buf[1024];
2222 : int rc;
2223 : int fd;
2224 :
2225 : #ifdef WAIT_USE_SELF_PIPE
2226 : fd = selfpipe_readfd;
2227 : #else
2228 319069 : fd = signal_fd;
2229 : #endif
2230 :
2231 : for (;;)
2232 : {
2233 319069 : rc = read(fd, buf, sizeof(buf));
4593 heikki.linnakangas 2234 319069 : if (rc < 0)
2235 : {
4593 heikki.linnakangas 2236 UIC 0 : if (errno == EAGAIN || errno == EWOULDBLOCK)
2237 : break; /* the descriptor is empty */
2238 0 : else if (errno == EINTR)
4382 bruce 2239 0 : continue; /* retry */
2240 : else
2241 : {
4260 tgl 2242 0 : waiting = false;
2243 : #ifdef WAIT_USE_SELF_PIPE
2244 : elog(ERROR, "read() on self-pipe failed: %m");
2245 : #else
769 tmunro 2246 0 : elog(ERROR, "read() on signalfd failed: %m");
2247 : #endif
2248 : }
2249 : }
4593 heikki.linnakangas 2250 GIC 319069 : else if (rc == 0)
2251 : {
4260 tgl 2252 UIC 0 : waiting = false;
2253 : #ifdef WAIT_USE_SELF_PIPE
2254 : elog(ERROR, "unexpected EOF on self-pipe");
2255 : #else
769 tmunro 2256 0 : elog(ERROR, "unexpected EOF on signalfd");
2257 : #endif
2258 : }
4260 tgl 2259 GIC 319069 : else if (rc < sizeof(buf))
2260 : {
2261 : /* we successfully drained the pipe; no need to read() again */
2262 319069 : break;
2263 : }
2264 : /* else buffer wasn't big enough, so read again */
4593 heikki.linnakangas 2265 ECB : }
4593 heikki.linnakangas 2266 GIC 319069 : }
2267 :
2268 : #endif
|