Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * latch.c
4 : : * Routines for inter-process latches
5 : : *
6 : : * The poll() implementation uses the so-called self-pipe trick to overcome the
7 : : * race condition involved with poll() and setting a global flag in the signal
8 : : * handler. When a latch is set and the current process is waiting for it, the
9 : : * signal handler wakes up the poll() in WaitLatch by writing a byte to a pipe.
10 : : * A signal by itself doesn't interrupt poll() on all platforms, and even on
11 : : * platforms where it does, a signal that arrives just before the poll() call
12 : : * does not prevent poll() from entering sleep. An incoming byte on a pipe
13 : : * however reliably interrupts the sleep, and causes poll() to return
14 : : * immediately even if the signal arrives before poll() begins.
15 : : *
16 : : * The epoll() implementation overcomes the race with a different technique: it
17 : : * keeps SIGURG blocked and consumes from a signalfd() descriptor instead. We
18 : : * don't need to register a signal handler or create our own self-pipe. We
19 : : * assume that any system that has Linux epoll() also has Linux signalfd().
20 : : *
21 : : * The kqueue() implementation waits for SIGURG with EVFILT_SIGNAL.
22 : : *
23 : : * The Windows implementation uses Windows events that are inherited by all
24 : : * postmaster child processes. There's no need for the self-pipe trick there.
25 : : *
26 : : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
27 : : * Portions Copyright (c) 1994, Regents of the University of California
28 : : *
29 : : * IDENTIFICATION
30 : : * src/backend/storage/ipc/latch.c
31 : : *
32 : : *-------------------------------------------------------------------------
33 : : */
34 : : #include "postgres.h"
35 : :
36 : : #include <fcntl.h>
37 : : #include <limits.h>
38 : : #include <signal.h>
39 : : #include <unistd.h>
40 : : #ifdef HAVE_SYS_EPOLL_H
41 : : #include <sys/epoll.h>
42 : : #endif
43 : : #ifdef HAVE_SYS_EVENT_H
44 : : #include <sys/event.h>
45 : : #endif
46 : : #ifdef HAVE_SYS_SIGNALFD_H
47 : : #include <sys/signalfd.h>
48 : : #endif
49 : : #ifdef HAVE_POLL_H
50 : : #include <poll.h>
51 : : #endif
52 : :
53 : : #include "libpq/pqsignal.h"
54 : : #include "miscadmin.h"
55 : : #include "pgstat.h"
56 : : #include "port/atomics.h"
57 : : #include "portability/instr_time.h"
58 : : #include "postmaster/postmaster.h"
59 : : #include "storage/fd.h"
60 : : #include "storage/ipc.h"
61 : : #include "storage/latch.h"
62 : : #include "storage/pmsignal.h"
63 : : #include "utils/memutils.h"
64 : : #include "utils/resowner.h"
65 : :
66 : : /*
67 : : * Select the fd readiness primitive to use. Normally the "most modern"
68 : : * primitive supported by the OS will be used, but for testing it can be
69 : : * useful to manually specify the used primitive. If desired, just add a
70 : : * define somewhere before this block.
71 : : */
72 : : #if defined(WAIT_USE_EPOLL) || defined(WAIT_USE_POLL) || \
73 : : defined(WAIT_USE_KQUEUE) || defined(WAIT_USE_WIN32)
74 : : /* don't overwrite manual choice */
75 : : #elif defined(HAVE_SYS_EPOLL_H)
76 : : #define WAIT_USE_EPOLL
77 : : #elif defined(HAVE_KQUEUE)
78 : : #define WAIT_USE_KQUEUE
79 : : #elif defined(HAVE_POLL)
80 : : #define WAIT_USE_POLL
81 : : #elif WIN32
82 : : #define WAIT_USE_WIN32
83 : : #else
84 : : #error "no wait set implementation available"
85 : : #endif
86 : :
87 : : /*
88 : : * By default, we use a self-pipe with poll() and a signalfd with epoll(), if
89 : : * available. For testing the choice can also be manually specified.
90 : : */
91 : : #if defined(WAIT_USE_POLL) || defined(WAIT_USE_EPOLL)
92 : : #if defined(WAIT_USE_SELF_PIPE) || defined(WAIT_USE_SIGNALFD)
93 : : /* don't overwrite manual choice */
94 : : #elif defined(WAIT_USE_EPOLL) && defined(HAVE_SYS_SIGNALFD_H)
95 : : #define WAIT_USE_SIGNALFD
96 : : #else
97 : : #define WAIT_USE_SELF_PIPE
98 : : #endif
99 : : #endif
100 : :
101 : : /* typedef in latch.h */
102 : : struct WaitEventSet
103 : : {
104 : : ResourceOwner owner;
105 : :
106 : : int nevents; /* number of registered events */
107 : : int nevents_space; /* maximum number of events in this set */
108 : :
109 : : /*
110 : : * Array, of nevents_space length, storing the definition of events this
111 : : * set is waiting for.
112 : : */
113 : : WaitEvent *events;
114 : :
115 : : /*
116 : : * If WL_LATCH_SET is specified in any wait event, latch is a pointer to
117 : : * said latch, and latch_pos the offset in the ->events array. This is
118 : : * useful because we check the state of the latch before performing doing
119 : : * syscalls related to waiting.
120 : : */
121 : : Latch *latch;
122 : : int latch_pos;
123 : :
124 : : /*
125 : : * WL_EXIT_ON_PM_DEATH is converted to WL_POSTMASTER_DEATH, but this flag
126 : : * is set so that we'll exit immediately if postmaster death is detected,
127 : : * instead of returning.
128 : : */
129 : : bool exit_on_postmaster_death;
130 : :
131 : : #if defined(WAIT_USE_EPOLL)
132 : : int epoll_fd;
133 : : /* epoll_wait returns events in a user provided arrays, allocate once */
134 : : struct epoll_event *epoll_ret_events;
135 : : #elif defined(WAIT_USE_KQUEUE)
136 : : int kqueue_fd;
137 : : /* kevent returns events in a user provided arrays, allocate once */
138 : : struct kevent *kqueue_ret_events;
139 : : bool report_postmaster_not_running;
140 : : #elif defined(WAIT_USE_POLL)
141 : : /* poll expects events to be waited on every poll() call, prepare once */
142 : : struct pollfd *pollfds;
143 : : #elif defined(WAIT_USE_WIN32)
144 : :
145 : : /*
146 : : * Array of windows events. The first element always contains
147 : : * pgwin32_signal_event, so the remaining elements are offset by one (i.e.
148 : : * event->pos + 1).
149 : : */
150 : : HANDLE *handles;
151 : : #endif
152 : : };
153 : :
154 : : /* A common WaitEventSet used to implement WaitLatch() */
155 : : static WaitEventSet *LatchWaitSet;
156 : :
157 : : /* The position of the latch in LatchWaitSet. */
158 : : #define LatchWaitSetLatchPos 0
159 : :
160 : : #ifndef WIN32
161 : : /* Are we currently in WaitLatch? The signal handler would like to know. */
162 : : static volatile sig_atomic_t waiting = false;
163 : : #endif
164 : :
165 : : #ifdef WAIT_USE_SIGNALFD
166 : : /* On Linux, we'll receive SIGURG via a signalfd file descriptor. */
167 : : static int signal_fd = -1;
168 : : #endif
169 : :
170 : : #ifdef WAIT_USE_SELF_PIPE
171 : : /* Read and write ends of the self-pipe */
172 : : static int selfpipe_readfd = -1;
173 : : static int selfpipe_writefd = -1;
174 : :
175 : : /* Process owning the self-pipe --- needed for checking purposes */
176 : : static int selfpipe_owner_pid = 0;
177 : :
178 : : /* Private function prototypes */
179 : : static void latch_sigurg_handler(SIGNAL_ARGS);
180 : : static void sendSelfPipeByte(void);
181 : : #endif
182 : :
183 : : #if defined(WAIT_USE_SELF_PIPE) || defined(WAIT_USE_SIGNALFD)
184 : : static void drain(void);
185 : : #endif
186 : :
187 : : #if defined(WAIT_USE_EPOLL)
188 : : static void WaitEventAdjustEpoll(WaitEventSet *set, WaitEvent *event, int action);
189 : : #elif defined(WAIT_USE_KQUEUE)
190 : : static void WaitEventAdjustKqueue(WaitEventSet *set, WaitEvent *event, int old_events);
191 : : #elif defined(WAIT_USE_POLL)
192 : : static void WaitEventAdjustPoll(WaitEventSet *set, WaitEvent *event);
193 : : #elif defined(WAIT_USE_WIN32)
194 : : static void WaitEventAdjustWin32(WaitEventSet *set, WaitEvent *event);
195 : : #endif
196 : :
197 : : static inline int WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
198 : : WaitEvent *occurred_events, int nevents);
199 : :
200 : : /* ResourceOwner support to hold WaitEventSets */
201 : : static void ResOwnerReleaseWaitEventSet(Datum res);
202 : :
203 : : static const ResourceOwnerDesc wait_event_set_resowner_desc =
204 : : {
205 : : .name = "WaitEventSet",
206 : : .release_phase = RESOURCE_RELEASE_AFTER_LOCKS,
207 : : .release_priority = RELEASE_PRIO_WAITEVENTSETS,
208 : : .ReleaseResource = ResOwnerReleaseWaitEventSet,
209 : : .DebugPrint = NULL
210 : : };
211 : :
212 : : /* Convenience wrappers over ResourceOwnerRemember/Forget */
213 : : static inline void
143 heikki.linnakangas@i 214 :GNC 88451 : ResourceOwnerRememberWaitEventSet(ResourceOwner owner, WaitEventSet *set)
215 : : {
216 : 88451 : ResourceOwnerRemember(owner, PointerGetDatum(set), &wait_event_set_resowner_desc);
217 : 88451 : }
218 : : static inline void
219 : 88441 : ResourceOwnerForgetWaitEventSet(ResourceOwner owner, WaitEventSet *set)
220 : : {
221 : 88441 : ResourceOwnerForget(owner, PointerGetDatum(set), &wait_event_set_resowner_desc);
222 : 88441 : }
223 : :
224 : :
225 : : /*
226 : : * Initialize the process-local latch infrastructure.
227 : : *
228 : : * This must be called once during startup of any process that can wait on
229 : : * latches, before it issues any InitLatch() or OwnLatch() calls.
230 : : */
231 : : void
4200 tgl@sss.pgh.pa.us 232 :CBC 20625 : InitializeLatchSupport(void)
233 : : {
234 : : #if defined(WAIT_USE_SELF_PIPE)
235 : : int pipefd[2];
236 : :
237 : : if (IsUnderPostmaster)
238 : : {
239 : : /*
240 : : * We might have inherited connections to a self-pipe created by the
241 : : * postmaster. It's critical that child processes create their own
242 : : * self-pipes, of course, and we really want them to close the
243 : : * inherited FDs for safety's sake.
244 : : */
245 : : if (selfpipe_owner_pid != 0)
246 : : {
247 : : /* Assert we go through here but once in a child process */
248 : : Assert(selfpipe_owner_pid != MyProcPid);
249 : : /* Release postmaster's pipe FDs; ignore any error */
250 : : (void) close(selfpipe_readfd);
251 : : (void) close(selfpipe_writefd);
252 : : /* Clean up, just for safety's sake; we'll set these below */
253 : : selfpipe_readfd = selfpipe_writefd = -1;
254 : : selfpipe_owner_pid = 0;
255 : : /* Keep fd.c's accounting straight */
256 : : ReleaseExternalFD();
257 : : ReleaseExternalFD();
258 : : }
259 : : else
260 : : {
261 : : /*
262 : : * Postmaster didn't create a self-pipe ... or else we're in an
263 : : * EXEC_BACKEND build, in which case it doesn't matter since the
264 : : * postmaster's pipe FDs were closed by the action of FD_CLOEXEC.
265 : : * fd.c won't have state to clean up, either.
266 : : */
267 : : Assert(selfpipe_readfd == -1);
268 : : }
269 : : }
270 : : else
271 : : {
272 : : /* In postmaster or standalone backend, assert we do this but once */
273 : : Assert(selfpipe_readfd == -1);
274 : : Assert(selfpipe_owner_pid == 0);
275 : : }
276 : :
277 : : /*
278 : : * Set up the self-pipe that allows a signal handler to wake up the
279 : : * poll()/epoll_wait() in WaitLatch. Make the write-end non-blocking, so
280 : : * that SetLatch won't block if the event has already been set many times
281 : : * filling the kernel buffer. Make the read-end non-blocking too, so that
282 : : * we can easily clear the pipe by reading until EAGAIN or EWOULDBLOCK.
283 : : * Also, make both FDs close-on-exec, since we surely do not want any
284 : : * child processes messing with them.
285 : : */
286 : : if (pipe(pipefd) < 0)
287 : : elog(FATAL, "pipe() failed: %m");
288 : : if (fcntl(pipefd[0], F_SETFL, O_NONBLOCK) == -1)
289 : : elog(FATAL, "fcntl(F_SETFL) failed on read-end of self-pipe: %m");
290 : : if (fcntl(pipefd[1], F_SETFL, O_NONBLOCK) == -1)
291 : : elog(FATAL, "fcntl(F_SETFL) failed on write-end of self-pipe: %m");
292 : : if (fcntl(pipefd[0], F_SETFD, FD_CLOEXEC) == -1)
293 : : elog(FATAL, "fcntl(F_SETFD) failed on read-end of self-pipe: %m");
294 : : if (fcntl(pipefd[1], F_SETFD, FD_CLOEXEC) == -1)
295 : : elog(FATAL, "fcntl(F_SETFD) failed on write-end of self-pipe: %m");
296 : :
297 : : selfpipe_readfd = pipefd[0];
298 : : selfpipe_writefd = pipefd[1];
299 : : selfpipe_owner_pid = MyProcPid;
300 : :
301 : : /* Tell fd.c about these two long-lived FDs */
302 : : ReserveExternalFD();
303 : : ReserveExternalFD();
304 : :
305 : : pqsignal(SIGURG, latch_sigurg_handler);
306 : : #endif
307 : :
308 : : #ifdef WAIT_USE_SIGNALFD
309 : : sigset_t signalfd_mask;
310 : :
478 tmunro@postgresql.or 311 [ + + ]: 20625 : if (IsUnderPostmaster)
312 : : {
313 : : /*
314 : : * It would probably be safe to re-use the inherited signalfd since
315 : : * signalfds only see the current process's pending signals, but it
316 : : * seems less surprising to close it and create our own.
317 : : */
318 [ + - ]: 19697 : if (signal_fd != -1)
319 : : {
320 : : /* Release postmaster's signal FD; ignore any error */
321 : 19697 : (void) close(signal_fd);
322 : 19697 : signal_fd = -1;
323 : 19697 : ReleaseExternalFD();
324 : : }
325 : : }
326 : :
327 : : /* Block SIGURG, because we'll receive it through a signalfd. */
1140 328 : 20625 : sigaddset(&UnBlockSig, SIGURG);
329 : :
330 : : /* Set up the signalfd to receive SIGURG notifications. */
331 : 20625 : sigemptyset(&signalfd_mask);
332 : 20625 : sigaddset(&signalfd_mask, SIGURG);
333 : 20625 : signal_fd = signalfd(-1, &signalfd_mask, SFD_NONBLOCK | SFD_CLOEXEC);
334 [ - + ]: 20625 : if (signal_fd < 0)
1140 tmunro@postgresql.or 335 [ # # ]:UBC 0 : elog(FATAL, "signalfd() failed");
1140 tmunro@postgresql.or 336 :CBC 20625 : ReserveExternalFD();
337 : : #endif
338 : :
339 : : #ifdef WAIT_USE_KQUEUE
340 : : /* Ignore SIGURG, because we'll receive it via kqueue. */
341 : : pqsignal(SIGURG, SIG_IGN);
342 : : #endif
4200 tgl@sss.pgh.pa.us 343 : 20625 : }
344 : :
345 : : void
1354 tmunro@postgresql.or 346 : 19889 : InitializeLatchWaitSet(void)
347 : : {
348 : : int latch_pos PG_USED_FOR_ASSERTS_ONLY;
349 : :
350 [ - + ]: 19889 : Assert(LatchWaitSet == NULL);
351 : :
352 : : /* Set up the WaitEventSet used by WaitLatch(). */
143 heikki.linnakangas@i 353 :GNC 19889 : LatchWaitSet = CreateWaitEventSet(NULL, 2);
1354 tmunro@postgresql.or 354 :CBC 19889 : latch_pos = AddWaitEventToSet(LatchWaitSet, WL_LATCH_SET, PGINVALID_SOCKET,
355 : : MyLatch, NULL);
356 [ + + ]: 19889 : if (IsUnderPostmaster)
357 : 19697 : AddWaitEventToSet(LatchWaitSet, WL_EXIT_ON_PM_DEATH,
358 : : PGINVALID_SOCKET, NULL, NULL);
359 : :
360 [ - + ]: 19889 : Assert(latch_pos == LatchWaitSetLatchPos);
361 : 19889 : }
362 : :
363 : : void
1140 tmunro@postgresql.or 364 :UBC 0 : ShutdownLatchSupport(void)
365 : : {
366 : : #if defined(WAIT_USE_POLL)
367 : : pqsignal(SIGURG, SIG_IGN);
368 : : #endif
369 : :
370 [ # # ]: 0 : if (LatchWaitSet)
371 : : {
372 : 0 : FreeWaitEventSet(LatchWaitSet);
373 : 0 : LatchWaitSet = NULL;
374 : : }
375 : :
376 : : #if defined(WAIT_USE_SELF_PIPE)
377 : : close(selfpipe_readfd);
378 : : close(selfpipe_writefd);
379 : : selfpipe_readfd = -1;
380 : : selfpipe_writefd = -1;
381 : : selfpipe_owner_pid = InvalidPid;
382 : : #endif
383 : :
384 : : #if defined(WAIT_USE_SIGNALFD)
385 : 0 : close(signal_fd);
386 : 0 : signal_fd = -1;
387 : : #endif
388 : 0 : }
389 : :
390 : : /*
391 : : * Initialize a process-local latch.
392 : : */
393 : : void
1875 peter@eisentraut.org 394 :CBC 20625 : InitLatch(Latch *latch)
395 : : {
4964 heikki.linnakangas@i 396 : 20625 : latch->is_set = false;
1140 tmunro@postgresql.or 397 : 20625 : latch->maybe_sleeping = false;
4964 heikki.linnakangas@i 398 : 20625 : latch->owner_pid = MyProcPid;
399 : 20625 : latch->is_shared = false;
400 : :
401 : : #if defined(WAIT_USE_SELF_PIPE)
402 : : /* Assert InitializeLatchSupport has been called in this process */
403 : : Assert(selfpipe_readfd >= 0 && selfpipe_owner_pid == MyProcPid);
404 : : #elif defined(WAIT_USE_SIGNALFD)
405 : : /* Assert InitializeLatchSupport has been called in this process */
658 tmunro@postgresql.or 406 [ - + ]: 20625 : Assert(signal_fd >= 0);
407 : : #elif defined(WAIT_USE_WIN32)
408 : : latch->event = CreateEvent(NULL, TRUE, FALSE, NULL);
409 : : if (latch->event == NULL)
410 : : elog(ERROR, "CreateEvent failed: error code %lu", GetLastError());
411 : : #endif /* WIN32 */
4964 heikki.linnakangas@i 412 : 20625 : }
413 : :
414 : : /*
415 : : * Initialize a shared latch that can be set from other processes. The latch
416 : : * is initially owned by no-one; use OwnLatch to associate it with the
417 : : * current process.
418 : : *
419 : : * InitSharedLatch needs to be called in postmaster before forking child
420 : : * processes, usually right after allocating the shared memory block
421 : : * containing the latch with ShmemInitStruct. (The Unix implementation
422 : : * doesn't actually require that, but the Windows one does.) Because of
423 : : * this restriction, we have no concurrency issues to worry about here.
424 : : *
425 : : * Note that other handles created in this module are never marked as
426 : : * inheritable. Thus we do not need to worry about cleaning up child
427 : : * process references to postmaster-private latches or WaitEventSets.
428 : : */
429 : : void
1875 peter@eisentraut.org 430 : 80720 : InitSharedLatch(Latch *latch)
431 : : {
432 : : #ifdef WIN32
433 : : SECURITY_ATTRIBUTES sa;
434 : :
435 : : /*
436 : : * Set up security attributes to specify that the events are inherited.
437 : : */
438 : : ZeroMemory(&sa, sizeof(sa));
439 : : sa.nLength = sizeof(sa);
440 : : sa.bInheritHandle = TRUE;
441 : :
442 : : latch->event = CreateEvent(&sa, TRUE, FALSE, NULL);
443 : : if (latch->event == NULL)
444 : : elog(ERROR, "CreateEvent failed: error code %lu", GetLastError());
445 : : #endif
446 : :
4964 heikki.linnakangas@i 447 : 80720 : latch->is_set = false;
1140 tmunro@postgresql.or 448 : 80720 : latch->maybe_sleeping = false;
4964 heikki.linnakangas@i 449 : 80720 : latch->owner_pid = 0;
450 : 80720 : latch->is_shared = true;
451 : 80720 : }
452 : :
453 : : /*
454 : : * Associate a shared latch with the current process, allowing it to
455 : : * wait on the latch.
456 : : *
457 : : * Although there is a sanity check for latch-already-owned, we don't do
458 : : * any sort of locking here, meaning that we could fail to detect the error
459 : : * if two processes try to own the same latch at about the same time. If
460 : : * there is any risk of that, caller must provide an interlock to prevent it.
461 : : */
462 : : void
1875 peter@eisentraut.org 463 : 19717 : OwnLatch(Latch *latch)
464 : : {
465 : : int owner_pid;
466 : :
467 : : /* Sanity checks */
2946 andres@anarazel.de 468 [ - + ]: 19717 : Assert(latch->is_shared);
469 : :
470 : : #if defined(WAIT_USE_SELF_PIPE)
471 : : /* Assert InitializeLatchSupport has been called in this process */
472 : : Assert(selfpipe_readfd >= 0 && selfpipe_owner_pid == MyProcPid);
473 : : #elif defined(WAIT_USE_SIGNALFD)
474 : : /* Assert InitializeLatchSupport has been called in this process */
658 tmunro@postgresql.or 475 [ - + ]: 19717 : Assert(signal_fd >= 0);
476 : : #endif
477 : :
684 478 : 19717 : owner_pid = latch->owner_pid;
479 [ - + ]: 19717 : if (owner_pid != 0)
684 tmunro@postgresql.or 480 [ # # ]:UBC 0 : elog(PANIC, "latch already owned by PID %d", owner_pid);
481 : :
4964 heikki.linnakangas@i 482 :CBC 19717 : latch->owner_pid = MyProcPid;
483 : 19717 : }
484 : :
485 : : /*
486 : : * Disown a shared latch currently owned by the current process.
487 : : */
488 : : void
1875 peter@eisentraut.org 489 : 18093 : DisownLatch(Latch *latch)
490 : : {
4964 heikki.linnakangas@i 491 [ - + ]: 18093 : Assert(latch->is_shared);
492 [ - + ]: 18093 : Assert(latch->owner_pid == MyProcPid);
493 : :
494 : 18093 : latch->owner_pid = 0;
495 : 18093 : }
496 : :
497 : : /*
498 : : * Wait for a given latch to be set, or for postmaster death, or until timeout
499 : : * is exceeded. 'wakeEvents' is a bitmask that specifies which of those events
500 : : * to wait for. If the latch is already set (and WL_LATCH_SET is given), the
501 : : * function returns immediately.
502 : : *
503 : : * The "timeout" is given in milliseconds. It must be >= 0 if WL_TIMEOUT flag
504 : : * is given. Although it is declared as "long", we don't actually support
505 : : * timeouts longer than INT_MAX milliseconds. Note that some extra overhead
506 : : * is incurred when WL_TIMEOUT is given, so avoid using a timeout if possible.
507 : : *
508 : : * The latch must be owned by the current process, ie. it must be a
509 : : * process-local latch initialized with InitLatch, or a shared latch
510 : : * associated with the current process by calling OwnLatch.
511 : : *
512 : : * Returns bit mask indicating which condition(s) caused the wake-up. Note
513 : : * that if multiple wake-up conditions are true, there is no guarantee that
514 : : * we return all of them in one call, but we will return at least one.
515 : : */
516 : : int
1875 peter@eisentraut.org 517 : 437563 : WaitLatch(Latch *latch, int wakeEvents, long timeout,
518 : : uint32 wait_event_info)
519 : : {
520 : : WaitEvent event;
521 : :
522 : : /* Postmaster-managed callers must handle postmaster death somehow. */
1354 tmunro@postgresql.or 523 [ + - + + : 437563 : Assert(!IsUnderPostmaster ||
- + ]
524 : : (wakeEvents & WL_EXIT_ON_PM_DEATH) ||
525 : : (wakeEvents & WL_POSTMASTER_DEATH));
526 : :
527 : : /*
528 : : * Some callers may have a latch other than MyLatch, or no latch at all,
529 : : * or want to handle postmaster death differently. It's cheap to assign
530 : : * those, so just do it every time.
531 : : */
532 [ + + ]: 437563 : if (!(wakeEvents & WL_LATCH_SET))
533 : 1 : latch = NULL;
534 : 437563 : ModifyWaitEvent(LatchWaitSet, LatchWaitSetLatchPos, WL_LATCH_SET, latch);
535 : 437563 : LatchWaitSet->exit_on_postmaster_death =
536 : 437563 : ((wakeEvents & WL_EXIT_ON_PM_DEATH) != 0);
537 : :
538 [ + + ]: 437563 : if (WaitEventSetWait(LatchWaitSet,
539 [ + + ]: 437563 : (wakeEvents & WL_TIMEOUT) ? timeout : -1,
540 : : &event, 1,
541 : : wait_event_info) == 0)
542 : 31011 : return WL_TIMEOUT;
543 : : else
544 : 405085 : return event.events;
545 : : }
546 : :
547 : : /*
548 : : * Like WaitLatch, but with an extra socket argument for WL_SOCKET_*
549 : : * conditions.
550 : : *
551 : : * When waiting on a socket, EOF and error conditions always cause the socket
552 : : * to be reported as readable/writable/connected, so that the caller can deal
553 : : * with the condition.
554 : : *
555 : : * wakeEvents must include either WL_EXIT_ON_PM_DEATH for automatic exit
556 : : * if the postmaster dies or WL_POSTMASTER_DEATH for a flag set in the
557 : : * return value if the postmaster dies. The latter is useful for rare cases
558 : : * where some behavior other than immediate exit is needed.
559 : : *
560 : : * NB: These days this is just a wrapper around the WaitEventSet API. When
561 : : * using a latch very frequently, consider creating a longer living
562 : : * WaitEventSet instead; that's more efficient.
563 : : */
564 : : int
1875 peter@eisentraut.org 565 : 118950 : WaitLatchOrSocket(Latch *latch, int wakeEvents, pgsocket sock,
566 : : long timeout, uint32 wait_event_info)
567 : : {
2946 andres@anarazel.de 568 : 118950 : int ret = 0;
569 : : int rc;
570 : : WaitEvent event;
143 heikki.linnakangas@i 571 :GNC 118950 : WaitEventSet *set = CreateWaitEventSet(CurrentResourceOwner, 3);
572 : :
4664 573 [ + + ]: 118950 : if (wakeEvents & WL_TIMEOUT)
2946 andres@anarazel.de 574 [ - + ]: 104975 : Assert(timeout >= 0);
575 : : else
576 : 13975 : timeout = -1;
577 : :
578 [ + + ]: 118950 : if (wakeEvents & WL_LATCH_SET)
579 : 118249 : AddWaitEventToSet(set, WL_LATCH_SET, PGINVALID_SOCKET,
580 : : latch, NULL);
581 : :
582 : : /* Postmaster-managed callers must handle postmaster death somehow. */
1969 tmunro@postgresql.or 583 [ + - - + : 118950 : Assert(!IsUnderPostmaster ||
- - ]
584 : : (wakeEvents & WL_EXIT_ON_PM_DEATH) ||
585 : : (wakeEvents & WL_POSTMASTER_DEATH));
586 : :
587 [ - + - - ]: 118950 : if ((wakeEvents & WL_POSTMASTER_DEATH) && IsUnderPostmaster)
2946 andres@anarazel.de 588 :UNC 0 : AddWaitEventToSet(set, WL_POSTMASTER_DEATH, PGINVALID_SOCKET,
589 : : NULL, NULL);
590 : :
1969 tmunro@postgresql.or 591 [ + - + - ]:GNC 118950 : if ((wakeEvents & WL_EXIT_ON_PM_DEATH) && IsUnderPostmaster)
592 : 118950 : AddWaitEventToSet(set, WL_EXIT_ON_PM_DEATH, PGINVALID_SOCKET,
593 : : NULL, NULL);
594 : :
2434 tgl@sss.pgh.pa.us 595 [ + - ]: 118950 : if (wakeEvents & WL_SOCKET_MASK)
596 : : {
597 : : int ev;
598 : :
599 : 118950 : ev = wakeEvents & WL_SOCKET_MASK;
2946 andres@anarazel.de 600 : 118950 : AddWaitEventToSet(set, ev, sock, NULL, NULL);
601 : : }
602 : :
2749 rhaas@postgresql.org 603 : 118950 : rc = WaitEventSetWait(set, timeout, &event, 1, wait_event_info);
604 : :
2946 andres@anarazel.de 605 [ + + ]: 118935 : if (rc == 0)
606 : 176 : ret |= WL_TIMEOUT;
607 : : else
608 : : {
609 : 118759 : ret |= event.events & (WL_LATCH_SET |
610 : : WL_POSTMASTER_DEATH |
611 : : WL_SOCKET_MASK);
612 : : }
613 : :
614 : 118935 : FreeWaitEventSet(set);
615 : :
2946 andres@anarazel.de 616 :CBC 118935 : return ret;
617 : : }
618 : :
619 : : /*
620 : : * Sets a latch and wakes up anyone waiting on it.
621 : : *
622 : : * This is cheap if the latch is already set, otherwise not so much.
623 : : *
624 : : * NB: when calling this in a signal handler, be sure to save and restore
625 : : * errno around it. (That's standard practice in most signal handlers, of
626 : : * course, but we used to omit it in handlers that only set a flag.)
627 : : *
628 : : * NB: this function is called from critical sections and signal handlers so
629 : : * throwing an error is not a good idea.
630 : : */
631 : : void
1875 peter@eisentraut.org 632 : 750668 : SetLatch(Latch *latch)
633 : : {
634 : : #ifndef WIN32
635 : : pid_t owner_pid;
636 : : #else
637 : : HANDLE handle;
638 : : #endif
639 : :
640 : : /*
641 : : * The memory barrier has to be placed here to ensure that any flag
642 : : * variables possibly changed by this process have been flushed to main
643 : : * memory, before we check/set is_set.
644 : : */
3379 andres@anarazel.de 645 : 750668 : pg_memory_barrier();
646 : :
647 : : /* Quick exit if already set */
4964 heikki.linnakangas@i 648 [ + + ]: 750668 : if (latch->is_set)
649 : 155996 : return;
650 : :
651 : 594672 : latch->is_set = true;
652 : :
1140 tmunro@postgresql.or 653 : 594672 : pg_memory_barrier();
654 [ + + ]: 594672 : if (!latch->maybe_sleeping)
655 : 77217 : return;
656 : :
657 : : #ifndef WIN32
658 : :
659 : : /*
660 : : * See if anyone's waiting for the latch. It can be the current process if
661 : : * we're in a signal handler. We use the self-pipe or SIGURG to ourselves
662 : : * to wake up WaitEventSetWaitBlock() without races in that case. If it's
663 : : * another process, send a signal.
664 : : *
665 : : * Fetch owner_pid only once, in case the latch is concurrently getting
666 : : * owned or disowned. XXX: This assumes that pid_t is atomic, which isn't
667 : : * guaranteed to be true! In practice, the effective range of pid_t fits
668 : : * in a 32 bit integer, and so should be atomic. In the worst case, we
669 : : * might end up signaling the wrong process. Even then, you're very
670 : : * unlucky if a process with that bogus pid exists and belongs to
671 : : * Postgres; and PG database processes should handle excess SIGUSR1
672 : : * interrupts without a problem anyhow.
673 : : *
674 : : * Another sort of race condition that's possible here is for a new
675 : : * process to own the latch immediately after we look, so we don't signal
676 : : * it. This is okay so long as all callers of ResetLatch/WaitLatch follow
677 : : * the standard coding convention of waiting at the bottom of their loops,
678 : : * not the top, so that they'll correctly process latch-setting events
679 : : * that happen before they enter the loop.
680 : : */
4964 heikki.linnakangas@i 681 : 517455 : owner_pid = latch->owner_pid;
682 [ - + ]: 517455 : if (owner_pid == 0)
4964 heikki.linnakangas@i 683 :UBC 0 : return;
4964 heikki.linnakangas@i 684 [ + + ]:CBC 517455 : else if (owner_pid == MyProcPid)
685 : : {
686 : : #if defined(WAIT_USE_SELF_PIPE)
687 : : if (waiting)
688 : : sendSelfPipeByte();
689 : : #else
1140 tmunro@postgresql.or 690 [ + - ]: 118879 : if (waiting)
691 : 118879 : kill(MyProcPid, SIGURG);
692 : : #endif
693 : : }
694 : : else
695 : 398576 : kill(owner_pid, SIGURG);
696 : :
697 : : #else
698 : :
699 : : /*
700 : : * See if anyone's waiting for the latch. It can be the current process if
701 : : * we're in a signal handler.
702 : : *
703 : : * Use a local variable here just in case somebody changes the event field
704 : : * concurrently (which really should not happen).
705 : : */
706 : : handle = latch->event;
707 : : if (handle)
708 : : {
709 : : SetEvent(handle);
710 : :
711 : : /*
712 : : * Note that we silently ignore any errors. We might be in a signal
713 : : * handler or other critical path where it's not safe to call elog().
714 : : */
715 : : }
716 : : #endif
717 : : }
718 : :
719 : : /*
720 : : * Clear the latch. Calling WaitLatch after this will sleep, unless
721 : : * the latch is set again before the WaitLatch call.
722 : : */
723 : : void
1875 peter@eisentraut.org 724 : 1196476 : ResetLatch(Latch *latch)
725 : : {
726 : : /* Only the owner should reset the latch */
4964 heikki.linnakangas@i 727 [ - + ]: 1196476 : Assert(latch->owner_pid == MyProcPid);
1140 tmunro@postgresql.or 728 [ - + ]: 1196476 : Assert(latch->maybe_sleeping == false);
729 : :
4964 heikki.linnakangas@i 730 : 1196476 : latch->is_set = false;
731 : :
732 : : /*
733 : : * Ensure that the write to is_set gets flushed to main memory before we
734 : : * examine any flag variables. Otherwise a concurrent SetLatch might
735 : : * falsely conclude that it needn't signal us, even though we have missed
736 : : * seeing some flag updates that SetLatch was supposed to inform us of.
737 : : */
3379 andres@anarazel.de 738 : 1196476 : pg_memory_barrier();
4964 heikki.linnakangas@i 739 : 1196476 : }
740 : :
741 : : /*
742 : : * Create a WaitEventSet with space for nevents different events to wait for.
743 : : *
744 : : * These events can then be efficiently waited upon together, using
745 : : * WaitEventSetWait().
746 : : *
747 : : * The WaitEventSet is tracked by the given 'resowner'. Use NULL for session
748 : : * lifetime.
749 : : */
750 : : WaitEventSet *
143 heikki.linnakangas@i 751 :GNC 151922 : CreateWaitEventSet(ResourceOwner resowner, int nevents)
752 : : {
753 : : WaitEventSet *set;
754 : : char *data;
2946 andres@anarazel.de 755 :CBC 151922 : Size sz = 0;
756 : :
757 : : /*
758 : : * Use MAXALIGN size/alignment to guarantee that later uses of memory are
759 : : * aligned correctly. E.g. epoll_event might need 8 byte alignment on some
760 : : * platforms, but earlier allocations like WaitEventSet and WaitEvent
761 : : * might not be sized to guarantee that when purely using sizeof().
762 : : */
2873 stark@mit.edu 763 : 151922 : sz += MAXALIGN(sizeof(WaitEventSet));
764 : 151922 : sz += MAXALIGN(sizeof(WaitEvent) * nevents);
765 : :
766 : : #if defined(WAIT_USE_EPOLL)
767 : 151922 : sz += MAXALIGN(sizeof(struct epoll_event) * nevents);
768 : : #elif defined(WAIT_USE_KQUEUE)
769 : : sz += MAXALIGN(sizeof(struct kevent) * nevents);
770 : : #elif defined(WAIT_USE_POLL)
771 : : sz += MAXALIGN(sizeof(struct pollfd) * nevents);
772 : : #elif defined(WAIT_USE_WIN32)
773 : : /* need space for the pgwin32_signal_event */
774 : : sz += MAXALIGN(sizeof(HANDLE) * (nevents + 1));
775 : : #endif
776 : :
143 heikki.linnakangas@i 777 [ + + ]:GNC 151922 : if (resowner != NULL)
778 : 88451 : ResourceOwnerEnlarge(resowner);
779 : :
780 : 151922 : data = (char *) MemoryContextAllocZero(TopMemoryContext, sz);
781 : :
2946 andres@anarazel.de 782 :CBC 151922 : set = (WaitEventSet *) data;
2873 stark@mit.edu 783 : 151922 : data += MAXALIGN(sizeof(WaitEventSet));
784 : :
2946 andres@anarazel.de 785 : 151922 : set->events = (WaitEvent *) data;
2873 stark@mit.edu 786 : 151922 : data += MAXALIGN(sizeof(WaitEvent) * nevents);
787 : :
788 : : #if defined(WAIT_USE_EPOLL)
2946 andres@anarazel.de 789 : 151922 : set->epoll_ret_events = (struct epoll_event *) data;
2873 stark@mit.edu 790 : 151922 : data += MAXALIGN(sizeof(struct epoll_event) * nevents);
791 : : #elif defined(WAIT_USE_KQUEUE)
792 : : set->kqueue_ret_events = (struct kevent *) data;
793 : : data += MAXALIGN(sizeof(struct kevent) * nevents);
794 : : #elif defined(WAIT_USE_POLL)
795 : : set->pollfds = (struct pollfd *) data;
796 : : data += MAXALIGN(sizeof(struct pollfd) * nevents);
797 : : #elif defined(WAIT_USE_WIN32)
798 : : set->handles = (HANDLE) data;
799 : : data += MAXALIGN(sizeof(HANDLE) * nevents);
800 : : #endif
801 : :
2946 andres@anarazel.de 802 : 151922 : set->latch = NULL;
803 : 151922 : set->nevents_space = nevents;
1969 tmunro@postgresql.or 804 : 151922 : set->exit_on_postmaster_death = false;
805 : :
143 heikki.linnakangas@i 806 [ + + ]:GNC 151922 : if (resowner != NULL)
807 : : {
808 : 88451 : ResourceOwnerRememberWaitEventSet(resowner, set);
809 : 88451 : set->owner = resowner;
810 : : }
811 : :
812 : : #if defined(WAIT_USE_EPOLL)
1511 tgl@sss.pgh.pa.us 813 [ - + ]:CBC 151922 : if (!AcquireExternalFD())
814 : : {
815 : : /* treat this as though epoll_create1 itself returned EMFILE */
1511 tgl@sss.pgh.pa.us 816 [ # # ]:UBC 0 : elog(ERROR, "epoll_create1 failed: %m");
817 : : }
2544 tgl@sss.pgh.pa.us 818 :CBC 151922 : set->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
2946 andres@anarazel.de 819 [ - + ]: 151922 : if (set->epoll_fd < 0)
820 : : {
1511 tgl@sss.pgh.pa.us 821 :UBC 0 : ReleaseExternalFD();
2544 822 [ # # ]: 0 : elog(ERROR, "epoll_create1 failed: %m");
823 : : }
824 : : #elif defined(WAIT_USE_KQUEUE)
825 : : if (!AcquireExternalFD())
826 : : {
827 : : /* treat this as though kqueue itself returned EMFILE */
828 : : elog(ERROR, "kqueue failed: %m");
829 : : }
830 : : set->kqueue_fd = kqueue();
831 : : if (set->kqueue_fd < 0)
832 : : {
833 : : ReleaseExternalFD();
834 : : elog(ERROR, "kqueue failed: %m");
835 : : }
836 : : if (fcntl(set->kqueue_fd, F_SETFD, FD_CLOEXEC) == -1)
837 : : {
838 : : int save_errno = errno;
839 : :
840 : : close(set->kqueue_fd);
841 : : ReleaseExternalFD();
842 : : errno = save_errno;
843 : : elog(ERROR, "fcntl(F_SETFD) failed on kqueue descriptor: %m");
844 : : }
845 : : set->report_postmaster_not_running = false;
846 : : #elif defined(WAIT_USE_WIN32)
847 : :
848 : : /*
849 : : * To handle signals while waiting, we need to add a win32 specific event.
850 : : * We accounted for the additional event at the top of this routine. See
851 : : * port/win32/signal.c for more details.
852 : : *
853 : : * Note: pgwin32_signal_event should be first to ensure that it will be
854 : : * reported when multiple events are set. We want to guarantee that
855 : : * pending signals are serviced.
856 : : */
857 : : set->handles[0] = pgwin32_signal_event;
858 : : StaticAssertStmt(WSA_INVALID_EVENT == NULL, "");
859 : : #endif
860 : :
2946 andres@anarazel.de 861 :CBC 151922 : return set;
862 : : }
863 : :
864 : : /*
865 : : * Free a previously created WaitEventSet.
866 : : *
867 : : * Note: preferably, this shouldn't have to free any resources that could be
868 : : * inherited across an exec(). If it did, we'd likely leak those resources in
869 : : * many scenarios. For the epoll case, we ensure that by setting EPOLL_CLOEXEC
870 : : * when the FD is created. For the Windows case, we assume that the handles
871 : : * involved are non-inheritable.
872 : : */
873 : : void
874 : 119853 : FreeWaitEventSet(WaitEventSet *set)
875 : : {
143 heikki.linnakangas@i 876 [ + + ]:GNC 119853 : if (set->owner)
877 : : {
878 : 88441 : ResourceOwnerForgetWaitEventSet(set->owner, set);
879 : 88441 : set->owner = NULL;
880 : : }
881 : :
882 : : #if defined(WAIT_USE_EPOLL)
2946 andres@anarazel.de 883 :CBC 119853 : close(set->epoll_fd);
1511 tgl@sss.pgh.pa.us 884 : 119853 : ReleaseExternalFD();
885 : : #elif defined(WAIT_USE_KQUEUE)
886 : : close(set->kqueue_fd);
887 : : ReleaseExternalFD();
888 : : #elif defined(WAIT_USE_WIN32)
889 : : for (WaitEvent *cur_event = set->events;
890 : : cur_event < (set->events + set->nevents);
891 : : cur_event++)
892 : : {
893 : : if (cur_event->events & WL_LATCH_SET)
894 : : {
895 : : /* uses the latch's HANDLE */
896 : : }
897 : : else if (cur_event->events & WL_POSTMASTER_DEATH)
898 : : {
899 : : /* uses PostmasterHandle */
900 : : }
901 : : else
902 : : {
903 : : /* Clean up the event object we created for the socket */
904 : : WSAEventSelect(cur_event->fd, NULL, 0);
905 : : WSACloseEvent(set->handles[cur_event->pos + 1]);
906 : : }
907 : : }
908 : : #endif
909 : :
2946 andres@anarazel.de 910 : 119853 : pfree(set);
911 : 119853 : }
912 : :
913 : : /*
914 : : * Free a previously created WaitEventSet in a child process after a fork().
915 : : */
916 : : void
478 tmunro@postgresql.or 917 : 17503 : FreeWaitEventSetAfterFork(WaitEventSet *set)
918 : : {
919 : : #if defined(WAIT_USE_EPOLL)
920 : 17503 : close(set->epoll_fd);
921 : 17503 : ReleaseExternalFD();
922 : : #elif defined(WAIT_USE_KQUEUE)
923 : : /* kqueues are not normally inherited by child processes */
924 : : ReleaseExternalFD();
925 : : #endif
926 : :
927 : 17503 : pfree(set);
928 : 17503 : }
929 : :
930 : : /* ---
931 : : * Add an event to the set. Possible events are:
932 : : * - WL_LATCH_SET: Wait for the latch to be set
933 : : * - WL_POSTMASTER_DEATH: Wait for postmaster to die
934 : : * - WL_SOCKET_READABLE: Wait for socket to become readable,
935 : : * can be combined in one event with other WL_SOCKET_* events
936 : : * - WL_SOCKET_WRITEABLE: Wait for socket to become writeable,
937 : : * can be combined with other WL_SOCKET_* events
938 : : * - WL_SOCKET_CONNECTED: Wait for socket connection to be established,
939 : : * can be combined with other WL_SOCKET_* events (on non-Windows
940 : : * platforms, this is the same as WL_SOCKET_WRITEABLE)
941 : : * - WL_SOCKET_ACCEPT: Wait for new connection to a server socket,
942 : : * can be combined with other WL_SOCKET_* events (on non-Windows
943 : : * platforms, this is the same as WL_SOCKET_READABLE)
944 : : * - WL_SOCKET_CLOSED: Wait for socket to be closed by remote peer.
945 : : * - WL_EXIT_ON_PM_DEATH: Exit immediately if the postmaster dies
946 : : *
947 : : * Returns the offset in WaitEventSet->events (starting from 0), which can be
948 : : * used to modify previously added wait events using ModifyWaitEvent().
949 : : *
950 : : * In the WL_LATCH_SET case the latch must be owned by the current process,
951 : : * i.e. it must be a process-local latch initialized with InitLatch, or a
952 : : * shared latch associated with the current process by calling OwnLatch.
953 : : *
954 : : * In the WL_SOCKET_READABLE/WRITEABLE/CONNECTED/ACCEPT cases, EOF and error
955 : : * conditions cause the socket to be reported as readable/writable/connected,
956 : : * so that the caller can deal with the condition.
957 : : *
958 : : * The user_data pointer specified here will be set for the events returned
959 : : * by WaitEventSetWait(), allowing to easily associate additional data with
960 : : * events.
961 : : */
962 : : int
2946 andres@anarazel.de 963 : 432720 : AddWaitEventToSet(WaitEventSet *set, uint32 events, pgsocket fd, Latch *latch,
964 : : void *user_data)
965 : : {
966 : : WaitEvent *event;
967 : :
968 : : /* not enough space */
969 [ - + ]: 432720 : Assert(set->nevents < set->nevents_space);
970 : :
1969 tmunro@postgresql.or 971 [ + + ]: 432720 : if (events == WL_EXIT_ON_PM_DEATH)
972 : : {
973 : 138807 : events = WL_POSTMASTER_DEATH;
974 : 138807 : set->exit_on_postmaster_death = true;
975 : : }
976 : :
2946 andres@anarazel.de 977 [ + + ]: 432720 : if (latch)
978 : : {
979 [ - + ]: 151061 : if (latch->owner_pid != MyProcPid)
2946 andres@anarazel.de 980 [ # # ]:UBC 0 : elog(ERROR, "cannot wait on a latch owned by another process");
2946 andres@anarazel.de 981 [ - + ]:CBC 151061 : if (set->latch)
2946 andres@anarazel.de 982 [ # # ]:UBC 0 : elog(ERROR, "cannot wait on more than one latch");
2946 andres@anarazel.de 983 [ - + ]:CBC 151061 : if ((events & WL_LATCH_SET) != WL_LATCH_SET)
2873 stark@mit.edu 984 [ # # ]:UBC 0 : elog(ERROR, "latch events only support being set");
985 : : }
986 : : else
987 : : {
2946 andres@anarazel.de 988 [ - + ]:CBC 281659 : if (events & WL_LATCH_SET)
2946 andres@anarazel.de 989 [ # # ]:UBC 0 : elog(ERROR, "cannot wait on latch without a specified latch");
990 : : }
991 : :
992 : : /* waiting for socket readiness without a socket indicates a bug */
2434 tgl@sss.pgh.pa.us 993 [ + + - + ]:CBC 432720 : if (fd == PGINVALID_SOCKET && (events & WL_SOCKET_MASK))
2946 andres@anarazel.de 994 [ # # ]:UBC 0 : elog(ERROR, "cannot wait on socket event without a socket");
995 : :
2946 andres@anarazel.de 996 :CBC 432720 : event = &set->events[set->nevents];
997 : 432720 : event->pos = set->nevents++;
998 : 432720 : event->fd = fd;
999 : 432720 : event->events = events;
1000 : 432720 : event->user_data = user_data;
1001 : : #ifdef WIN32
1002 : : event->reset = false;
1003 : : #endif
1004 : :
1005 [ + + ]: 432720 : if (events == WL_LATCH_SET)
1006 : : {
1007 : 151061 : set->latch = latch;
1008 : 151061 : set->latch_pos = event->pos;
1009 : : #if defined(WAIT_USE_SELF_PIPE)
1010 : : event->fd = selfpipe_readfd;
1011 : : #elif defined(WAIT_USE_SIGNALFD)
1140 tmunro@postgresql.or 1012 : 151061 : event->fd = signal_fd;
1013 : : #else
1014 : : event->fd = PGINVALID_SOCKET;
1015 : : #ifdef WAIT_USE_EPOLL
1016 : : return event->pos;
1017 : : #endif
1018 : : #endif
1019 : : }
2946 andres@anarazel.de 1020 [ + + ]: 281659 : else if (events == WL_POSTMASTER_DEATH)
1021 : : {
1022 : : #ifndef WIN32
1023 : 150243 : event->fd = postmaster_alive_fds[POSTMASTER_FD_WATCH];
1024 : : #endif
1025 : : }
1026 : :
1027 : : /* perform wait primitive specific initialization, if needed */
1028 : : #if defined(WAIT_USE_EPOLL)
1029 : 432720 : WaitEventAdjustEpoll(set, event, EPOLL_CTL_ADD);
1030 : : #elif defined(WAIT_USE_KQUEUE)
1031 : : WaitEventAdjustKqueue(set, event, 0);
1032 : : #elif defined(WAIT_USE_POLL)
1033 : : WaitEventAdjustPoll(set, event);
1034 : : #elif defined(WAIT_USE_WIN32)
1035 : : WaitEventAdjustWin32(set, event);
1036 : : #endif
1037 : :
1038 : 432720 : return event->pos;
1039 : : }
1040 : :
1041 : : /*
1042 : : * Change the event mask and, in the WL_LATCH_SET case, the latch associated
1043 : : * with the WaitEvent. The latch may be changed to NULL to disable the latch
1044 : : * temporarily, and then set back to a latch later.
1045 : : *
1046 : : * 'pos' is the id returned by AddWaitEventToSet.
1047 : : */
1048 : : void
1049 : 774874 : ModifyWaitEvent(WaitEventSet *set, int pos, uint32 events, Latch *latch)
1050 : : {
1051 : : WaitEvent *event;
1052 : : #if defined(WAIT_USE_KQUEUE)
1053 : : int old_events;
1054 : : #endif
1055 : :
1056 [ - + ]: 774874 : Assert(pos < set->nevents);
1057 : :
1058 : 774874 : event = &set->events[pos];
1059 : : #if defined(WAIT_USE_KQUEUE)
1060 : : old_events = event->events;
1061 : : #endif
1062 : :
1063 : : /*
1064 : : * If neither the event mask nor the associated latch changes, return
1065 : : * early. That's an important optimization for some sockets, where
1066 : : * ModifyWaitEvent is frequently used to switch from waiting for reads to
1067 : : * waiting on writes.
1068 : : */
1069 [ + + ]: 774874 : if (events == event->events &&
1070 [ + + + + ]: 763192 : (!(event->events & WL_LATCH_SET) || set->latch == latch))
1071 : 735664 : return;
1072 : :
1073 [ + + ]: 39210 : if (event->events & WL_LATCH_SET &&
1074 [ - + ]: 27528 : events != event->events)
1075 : : {
2946 andres@anarazel.de 1076 [ # # ]:UBC 0 : elog(ERROR, "cannot modify latch event");
1077 : : }
1078 : :
2946 andres@anarazel.de 1079 [ - + ]:CBC 39210 : if (event->events & WL_POSTMASTER_DEATH)
1080 : : {
2946 andres@anarazel.de 1081 [ # # ]:UBC 0 : elog(ERROR, "cannot modify postmaster death event");
1082 : : }
1083 : :
1084 : : /* FIXME: validate event mask */
2946 andres@anarazel.de 1085 :CBC 39210 : event->events = events;
1086 : :
1087 [ + + ]: 39210 : if (events == WL_LATCH_SET)
1088 : : {
1299 tmunro@postgresql.or 1089 [ + + - + ]: 27528 : if (latch && latch->owner_pid != MyProcPid)
1299 tmunro@postgresql.or 1090 [ # # ]:UBC 0 : elog(ERROR, "cannot wait on a latch owned by another process");
2946 andres@anarazel.de 1091 :CBC 27528 : set->latch = latch;
1092 : :
1093 : : /*
1094 : : * On Unix, we don't need to modify the kernel object because the
1095 : : * underlying pipe (if there is one) is the same for all latches so we
1096 : : * can return immediately. On Windows, we need to update our array of
1097 : : * handles, but we leave the old one in place and tolerate spurious
1098 : : * wakeups if the latch is disabled.
1099 : : */
1100 : : #if defined(WAIT_USE_WIN32)
1101 : : if (!latch)
1102 : : return;
1103 : : #else
1299 tmunro@postgresql.or 1104 : 27528 : return;
1105 : : #endif
1106 : : }
1107 : :
1108 : : #if defined(WAIT_USE_EPOLL)
2946 andres@anarazel.de 1109 : 11682 : WaitEventAdjustEpoll(set, event, EPOLL_CTL_MOD);
1110 : : #elif defined(WAIT_USE_KQUEUE)
1111 : : WaitEventAdjustKqueue(set, event, old_events);
1112 : : #elif defined(WAIT_USE_POLL)
1113 : : WaitEventAdjustPoll(set, event);
1114 : : #elif defined(WAIT_USE_WIN32)
1115 : : WaitEventAdjustWin32(set, event);
1116 : : #endif
1117 : : }
1118 : :
1119 : : #if defined(WAIT_USE_EPOLL)
1120 : : /*
1121 : : * action can be one of EPOLL_CTL_ADD | EPOLL_CTL_MOD | EPOLL_CTL_DEL
1122 : : */
1123 : : static void
1124 : 444402 : WaitEventAdjustEpoll(WaitEventSet *set, WaitEvent *event, int action)
1125 : : {
1126 : : struct epoll_event epoll_ev;
1127 : : int rc;
1128 : :
1129 : : /* pointer to our event, returned by epoll_wait */
1130 : 444402 : epoll_ev.data.ptr = event;
1131 : : /* always wait for errors */
1132 : 444402 : epoll_ev.events = EPOLLERR | EPOLLHUP;
1133 : :
1134 : : /* prepare pollfd entry once */
1135 [ + + ]: 444402 : if (event->events == WL_LATCH_SET)
1136 : : {
1137 [ - + ]: 151061 : Assert(set->latch != NULL);
1138 : 151061 : epoll_ev.events |= EPOLLIN;
1139 : : }
1140 [ + + ]: 293341 : else if (event->events == WL_POSTMASTER_DEATH)
1141 : : {
1142 : 150243 : epoll_ev.events |= EPOLLIN;
1143 : : }
1144 : : else
1145 : : {
1146 [ - + ]: 143098 : Assert(event->fd != PGINVALID_SOCKET);
790 tmunro@postgresql.or 1147 [ - + ]: 143098 : Assert(event->events & (WL_SOCKET_READABLE |
1148 : : WL_SOCKET_WRITEABLE |
1149 : : WL_SOCKET_CLOSED));
1150 : :
2946 andres@anarazel.de 1151 [ + + ]: 143098 : if (event->events & WL_SOCKET_READABLE)
1152 : 130178 : epoll_ev.events |= EPOLLIN;
1153 [ + + ]: 143098 : if (event->events & WL_SOCKET_WRITEABLE)
1154 : 13252 : epoll_ev.events |= EPOLLOUT;
790 tmunro@postgresql.or 1155 [ - + ]: 143098 : if (event->events & WL_SOCKET_CLOSED)
790 tmunro@postgresql.or 1156 :UBC 0 : epoll_ev.events |= EPOLLRDHUP;
1157 : : }
1158 : :
1159 : : /*
1160 : : * Even though unused, we also pass epoll_ev as the data argument if
1161 : : * EPOLL_CTL_DEL is passed as action. There used to be an epoll bug
1162 : : * requiring that, and actually it makes the code simpler...
1163 : : */
2946 andres@anarazel.de 1164 :CBC 444402 : rc = epoll_ctl(set->epoll_fd, action, event->fd, &epoll_ev);
1165 : :
1166 [ - + ]: 444402 : if (rc < 0)
2946 andres@anarazel.de 1167 [ # # ]:UBC 0 : ereport(ERROR,
1168 : : (errcode_for_socket_access(),
1169 : : errmsg("%s() failed: %m",
1170 : : "epoll_ctl")));
2946 andres@anarazel.de 1171 :CBC 444402 : }
1172 : : #endif
1173 : :
1174 : : #if defined(WAIT_USE_POLL)
1175 : : static void
1176 : : WaitEventAdjustPoll(WaitEventSet *set, WaitEvent *event)
1177 : : {
1178 : : struct pollfd *pollfd = &set->pollfds[event->pos];
1179 : :
1180 : : pollfd->revents = 0;
1181 : : pollfd->fd = event->fd;
1182 : :
1183 : : /* prepare pollfd entry once */
1184 : : if (event->events == WL_LATCH_SET)
1185 : : {
1186 : : Assert(set->latch != NULL);
1187 : : pollfd->events = POLLIN;
1188 : : }
1189 : : else if (event->events == WL_POSTMASTER_DEATH)
1190 : : {
1191 : : pollfd->events = POLLIN;
1192 : : }
1193 : : else
1194 : : {
1195 : : Assert(event->events & (WL_SOCKET_READABLE |
1196 : : WL_SOCKET_WRITEABLE |
1197 : : WL_SOCKET_CLOSED));
1198 : : pollfd->events = 0;
1199 : : if (event->events & WL_SOCKET_READABLE)
1200 : : pollfd->events |= POLLIN;
1201 : : if (event->events & WL_SOCKET_WRITEABLE)
1202 : : pollfd->events |= POLLOUT;
1203 : : #ifdef POLLRDHUP
1204 : : if (event->events & WL_SOCKET_CLOSED)
1205 : : pollfd->events |= POLLRDHUP;
1206 : : #endif
1207 : : }
1208 : :
1209 : : Assert(event->fd != PGINVALID_SOCKET);
1210 : : }
1211 : : #endif
1212 : :
1213 : : #if defined(WAIT_USE_KQUEUE)
1214 : :
1215 : : /*
1216 : : * On most BSD family systems, the udata member of struct kevent is of type
1217 : : * void *, so we could directly convert to/from WaitEvent *. Unfortunately,
1218 : : * NetBSD has it as intptr_t, so here we wallpaper over that difference with
1219 : : * an lvalue cast.
1220 : : */
1221 : : #define AccessWaitEvent(k_ev) (*((WaitEvent **)(&(k_ev)->udata)))
1222 : :
1223 : : static inline void
1224 : : WaitEventAdjustKqueueAdd(struct kevent *k_ev, int filter, int action,
1225 : : WaitEvent *event)
1226 : : {
1227 : : k_ev->ident = event->fd;
1228 : : k_ev->filter = filter;
1229 : : k_ev->flags = action;
1230 : : k_ev->fflags = 0;
1231 : : k_ev->data = 0;
1232 : : AccessWaitEvent(k_ev) = event;
1233 : : }
1234 : :
1235 : : static inline void
1236 : : WaitEventAdjustKqueueAddPostmaster(struct kevent *k_ev, WaitEvent *event)
1237 : : {
1238 : : /* For now postmaster death can only be added, not removed. */
1239 : : k_ev->ident = PostmasterPid;
1240 : : k_ev->filter = EVFILT_PROC;
1241 : : k_ev->flags = EV_ADD;
1242 : : k_ev->fflags = NOTE_EXIT;
1243 : : k_ev->data = 0;
1244 : : AccessWaitEvent(k_ev) = event;
1245 : : }
1246 : :
1247 : : static inline void
1248 : : WaitEventAdjustKqueueAddLatch(struct kevent *k_ev, WaitEvent *event)
1249 : : {
1250 : : /* For now latch can only be added, not removed. */
1251 : : k_ev->ident = SIGURG;
1252 : : k_ev->filter = EVFILT_SIGNAL;
1253 : : k_ev->flags = EV_ADD;
1254 : : k_ev->fflags = 0;
1255 : : k_ev->data = 0;
1256 : : AccessWaitEvent(k_ev) = event;
1257 : : }
1258 : :
1259 : : /*
1260 : : * old_events is the previous event mask, used to compute what has changed.
1261 : : */
1262 : : static void
1263 : : WaitEventAdjustKqueue(WaitEventSet *set, WaitEvent *event, int old_events)
1264 : : {
1265 : : int rc;
1266 : : struct kevent k_ev[2];
1267 : : int count = 0;
1268 : : bool new_filt_read = false;
1269 : : bool old_filt_read = false;
1270 : : bool new_filt_write = false;
1271 : : bool old_filt_write = false;
1272 : :
1273 : : if (old_events == event->events)
1274 : : return;
1275 : :
1276 : : Assert(event->events != WL_LATCH_SET || set->latch != NULL);
1277 : : Assert(event->events == WL_LATCH_SET ||
1278 : : event->events == WL_POSTMASTER_DEATH ||
1279 : : (event->events & (WL_SOCKET_READABLE |
1280 : : WL_SOCKET_WRITEABLE |
1281 : : WL_SOCKET_CLOSED)));
1282 : :
1283 : : if (event->events == WL_POSTMASTER_DEATH)
1284 : : {
1285 : : /*
1286 : : * Unlike all the other implementations, we detect postmaster death
1287 : : * using process notification instead of waiting on the postmaster
1288 : : * alive pipe.
1289 : : */
1290 : : WaitEventAdjustKqueueAddPostmaster(&k_ev[count++], event);
1291 : : }
1292 : : else if (event->events == WL_LATCH_SET)
1293 : : {
1294 : : /* We detect latch wakeup using a signal event. */
1295 : : WaitEventAdjustKqueueAddLatch(&k_ev[count++], event);
1296 : : }
1297 : : else
1298 : : {
1299 : : /*
1300 : : * We need to compute the adds and deletes required to get from the
1301 : : * old event mask to the new event mask, since kevent treats readable
1302 : : * and writable as separate events.
1303 : : */
1304 : : if (old_events & (WL_SOCKET_READABLE | WL_SOCKET_CLOSED))
1305 : : old_filt_read = true;
1306 : : if (event->events & (WL_SOCKET_READABLE | WL_SOCKET_CLOSED))
1307 : : new_filt_read = true;
1308 : : if (old_events & WL_SOCKET_WRITEABLE)
1309 : : old_filt_write = true;
1310 : : if (event->events & WL_SOCKET_WRITEABLE)
1311 : : new_filt_write = true;
1312 : : if (old_filt_read && !new_filt_read)
1313 : : WaitEventAdjustKqueueAdd(&k_ev[count++], EVFILT_READ, EV_DELETE,
1314 : : event);
1315 : : else if (!old_filt_read && new_filt_read)
1316 : : WaitEventAdjustKqueueAdd(&k_ev[count++], EVFILT_READ, EV_ADD,
1317 : : event);
1318 : : if (old_filt_write && !new_filt_write)
1319 : : WaitEventAdjustKqueueAdd(&k_ev[count++], EVFILT_WRITE, EV_DELETE,
1320 : : event);
1321 : : else if (!old_filt_write && new_filt_write)
1322 : : WaitEventAdjustKqueueAdd(&k_ev[count++], EVFILT_WRITE, EV_ADD,
1323 : : event);
1324 : : }
1325 : :
1326 : : /* For WL_SOCKET_READ -> WL_SOCKET_CLOSED, no change needed. */
1327 : : if (count == 0)
1328 : : return;
1329 : :
1330 : : Assert(count <= 2);
1331 : :
1332 : : rc = kevent(set->kqueue_fd, &k_ev[0], count, NULL, 0, NULL);
1333 : :
1334 : : /*
1335 : : * When adding the postmaster's pid, we have to consider that it might
1336 : : * already have exited and perhaps even been replaced by another process
1337 : : * with the same pid. If so, we have to defer reporting this as an event
1338 : : * until the next call to WaitEventSetWaitBlock().
1339 : : */
1340 : :
1341 : : if (rc < 0)
1342 : : {
1343 : : if (event->events == WL_POSTMASTER_DEATH &&
1344 : : (errno == ESRCH || errno == EACCES))
1345 : : set->report_postmaster_not_running = true;
1346 : : else
1347 : : ereport(ERROR,
1348 : : (errcode_for_socket_access(),
1349 : : errmsg("%s() failed: %m",
1350 : : "kevent")));
1351 : : }
1352 : : else if (event->events == WL_POSTMASTER_DEATH &&
1353 : : PostmasterPid != getppid() &&
1354 : : !PostmasterIsAlive())
1355 : : {
1356 : : /*
1357 : : * The extra PostmasterIsAliveInternal() check prevents false alarms
1358 : : * on systems that give a different value for getppid() while being
1359 : : * traced by a debugger.
1360 : : */
1361 : : set->report_postmaster_not_running = true;
1362 : : }
1363 : : }
1364 : :
1365 : : #endif
1366 : :
1367 : : #if defined(WAIT_USE_WIN32)
1368 : : static void
1369 : : WaitEventAdjustWin32(WaitEventSet *set, WaitEvent *event)
1370 : : {
1371 : : HANDLE *handle = &set->handles[event->pos + 1];
1372 : :
1373 : : if (event->events == WL_LATCH_SET)
1374 : : {
1375 : : Assert(set->latch != NULL);
1376 : : *handle = set->latch->event;
1377 : : }
1378 : : else if (event->events == WL_POSTMASTER_DEATH)
1379 : : {
1380 : : *handle = PostmasterHandle;
1381 : : }
1382 : : else
1383 : : {
1384 : : int flags = FD_CLOSE; /* always check for errors/EOF */
1385 : :
1386 : : if (event->events & WL_SOCKET_READABLE)
1387 : : flags |= FD_READ;
1388 : : if (event->events & WL_SOCKET_WRITEABLE)
1389 : : flags |= FD_WRITE;
1390 : : if (event->events & WL_SOCKET_CONNECTED)
1391 : : flags |= FD_CONNECT;
1392 : : if (event->events & WL_SOCKET_ACCEPT)
1393 : : flags |= FD_ACCEPT;
1394 : :
1395 : : if (*handle == WSA_INVALID_EVENT)
1396 : : {
1397 : : *handle = WSACreateEvent();
1398 : : if (*handle == WSA_INVALID_EVENT)
1399 : : elog(ERROR, "failed to create event for socket: error code %d",
1400 : : WSAGetLastError());
1401 : : }
1402 : : if (WSAEventSelect(event->fd, *handle, flags) != 0)
1403 : : elog(ERROR, "failed to set up event for socket: error code %d",
1404 : : WSAGetLastError());
1405 : :
1406 : : Assert(event->fd != PGINVALID_SOCKET);
1407 : : }
1408 : : }
1409 : : #endif
1410 : :
1411 : : /*
1412 : : * Wait for events added to the set to happen, or until the timeout is
1413 : : * reached. At most nevents occurred events are returned.
1414 : : *
1415 : : * If timeout = -1, block until an event occurs; if 0, check sockets for
1416 : : * readiness, but don't block; if > 0, block for at most timeout milliseconds.
1417 : : *
1418 : : * Returns the number of events occurred, or 0 if the timeout was reached.
1419 : : *
1420 : : * Returned events will have the fd, pos, user_data fields set to the
1421 : : * values associated with the registered event.
1422 : : */
1423 : : int
1424 : 995224 : WaitEventSetWait(WaitEventSet *set, long timeout,
1425 : : WaitEvent *occurred_events, int nevents,
1426 : : uint32 wait_event_info)
1427 : : {
1428 : 995224 : int returned_events = 0;
1429 : : instr_time start_time;
1430 : : instr_time cur_time;
1431 : 995224 : long cur_timeout = -1;
1432 : :
1433 [ - + ]: 995224 : Assert(nevents > 0);
1434 : :
1435 : : /*
1436 : : * Initialize timeout if requested. We must record the current time so
1437 : : * that we can determine the remaining timeout if interrupted.
1438 : : */
1439 [ + + ]: 995224 : if (timeout >= 0)
1440 : : {
1441 : 343844 : INSTR_TIME_SET_CURRENT(start_time);
1442 [ + - - + ]: 343844 : Assert(timeout >= 0 && timeout <= INT_MAX);
1443 : 343844 : cur_timeout = timeout;
1444 : : }
1445 : : else
450 1446 : 651380 : INSTR_TIME_SET_ZERO(start_time);
1447 : :
2749 rhaas@postgresql.org 1448 : 995224 : pgstat_report_wait_start(wait_event_info);
1449 : :
1450 : : #ifndef WIN32
2946 andres@anarazel.de 1451 : 995224 : waiting = true;
1452 : : #else
1453 : : /* Ensure that signals are serviced even if latch is already set */
1454 : : pgwin32_dispatch_queued_signals();
1455 : : #endif
1456 [ + + ]: 2025956 : while (returned_events == 0)
1457 : : {
1458 : : int rc;
1459 : :
1460 : : /*
1461 : : * Check if the latch is set already. If so, leave the loop
1462 : : * immediately, avoid blocking again. We don't attempt to report any
1463 : : * other events that might also be satisfied.
1464 : : *
1465 : : * If someone sets the latch between this and the
1466 : : * WaitEventSetWaitBlock() below, the setter will write a byte to the
1467 : : * pipe (or signal us and the signal handler will do that), and the
1468 : : * readiness routine will return immediately.
1469 : : *
1470 : : * On unix, If there's a pending byte in the self pipe, we'll notice
1471 : : * whenever blocking. Only clearing the pipe in that case avoids
1472 : : * having to drain it every time WaitLatchOrSocket() is used. Should
1473 : : * the pipe-buffer fill up we're still ok, because the pipe is in
1474 : : * nonblocking mode. It's unlikely for that to happen, because the
1475 : : * self pipe isn't filled unless we're blocking (waiting = true), or
1476 : : * from inside a signal handler in latch_sigurg_handler().
1477 : : *
1478 : : * On windows, we'll also notice if there's a pending event for the
1479 : : * latch when blocking, but there's no danger of anything filling up,
1480 : : * as "Setting an event that is already set has no effect.".
1481 : : *
1482 : : * Note: we assume that the kernel calls involved in latch management
1483 : : * will provide adequate synchronization on machines with weak memory
1484 : : * ordering, so that we cannot miss seeing is_set if a notification
1485 : : * has already been queued.
1486 : : */
1140 tmunro@postgresql.or 1487 [ + + + + ]: 1230119 : if (set->latch && !set->latch->is_set)
1488 : : {
1489 : : /* about to sleep on a latch */
1490 : 1062669 : set->latch->maybe_sleeping = true;
1491 : 1062669 : pg_memory_barrier();
1492 : : /* and recheck */
1493 : : }
1494 : :
2946 andres@anarazel.de 1495 [ + + + + ]: 1230119 : if (set->latch && set->latch->is_set)
1496 : : {
1497 : 166653 : occurred_events->fd = PGINVALID_SOCKET;
1498 : 166653 : occurred_events->pos = set->latch_pos;
1499 : 166653 : occurred_events->user_data =
1500 : 166653 : set->events[set->latch_pos].user_data;
1501 : 166653 : occurred_events->events = WL_LATCH_SET;
1502 : 166653 : occurred_events++;
1503 : 166653 : returned_events++;
1504 : :
1505 : : /* could have been set above */
1140 tmunro@postgresql.or 1506 : 166653 : set->latch->maybe_sleeping = false;
1507 : :
2946 andres@anarazel.de 1508 : 166653 : break;
1509 : : }
1510 : :
1511 : : /*
1512 : : * Wait for events using the readiness primitive chosen at the top of
1513 : : * this file. If -1 is returned, a timeout has occurred, if 0 we have
1514 : : * to retry, everything >= 1 is the number of returned events.
1515 : : */
1516 : 1063466 : rc = WaitEventSetWaitBlock(set, cur_timeout,
1517 : : occurred_events, nevents);
1518 : :
1140 tmunro@postgresql.or 1519 [ + + ]: 1061931 : if (set->latch)
1520 : : {
1521 [ - + ]: 1061071 : Assert(set->latch->maybe_sleeping);
1522 : 1061071 : set->latch->maybe_sleeping = false;
1523 : : }
1524 : :
2946 andres@anarazel.de 1525 [ + + ]: 1061931 : if (rc == -1)
1526 : 31197 : break; /* timeout occurred */
1527 : : else
1528 : 1030734 : returned_events = rc;
1529 : :
1530 : : /* If we're not done, update cur_timeout for next iteration */
1531 [ + + + + ]: 1030734 : if (returned_events == 0 && timeout >= 0)
1532 : : {
1533 : 231002 : INSTR_TIME_SET_CURRENT(cur_time);
1534 : 231002 : INSTR_TIME_SUBTRACT(cur_time, start_time);
1535 : 231002 : cur_timeout = timeout - (long) INSTR_TIME_GET_MILLISEC(cur_time);
1536 [ + + ]: 231002 : if (cur_timeout <= 0)
1537 : 2 : break;
1538 : : }
1539 : : }
1540 : : #ifndef WIN32
1541 : 993689 : waiting = false;
1542 : : #endif
1543 : :
2749 rhaas@postgresql.org 1544 : 993689 : pgstat_report_wait_end();
1545 : :
2946 andres@anarazel.de 1546 : 993689 : return returned_events;
1547 : : }
1548 : :
1549 : :
1550 : : #if defined(WAIT_USE_EPOLL)
1551 : :
1552 : : /*
1553 : : * Wait using linux's epoll_wait(2).
1554 : : *
1555 : : * This is the preferable wait method, as several readiness notifications are
1556 : : * delivered, without having to iterate through all of set->events. The return
1557 : : * epoll_event struct contain a pointer to our events, making association
1558 : : * easy.
1559 : : */
1560 : : static inline int
1561 : 1063466 : WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
1562 : : WaitEvent *occurred_events, int nevents)
1563 : : {
1564 : 1063466 : int returned_events = 0;
1565 : : int rc;
1566 : : WaitEvent *cur_event;
1567 : : struct epoll_event *cur_epoll_event;
1568 : :
1569 : : /* Sleep */
1570 : 1063466 : rc = epoll_wait(set->epoll_fd, set->epoll_ret_events,
457 tmunro@postgresql.or 1571 : 1063466 : Min(nevents, set->nevents_space), cur_timeout);
1572 : :
1573 : : /* Check return code */
2946 andres@anarazel.de 1574 [ + + ]: 1061946 : if (rc < 0)
1575 : : {
1576 : : /* EINTR is okay, otherwise complain */
1577 [ - + ]: 118730 : if (errno != EINTR)
1578 : : {
2946 andres@anarazel.de 1579 :UBC 0 : waiting = false;
1580 [ # # ]: 0 : ereport(ERROR,
1581 : : (errcode_for_socket_access(),
1582 : : errmsg("%s() failed: %m",
1583 : : "epoll_wait")));
1584 : : }
2946 andres@anarazel.de 1585 :CBC 118730 : return 0;
1586 : : }
1587 [ + + ]: 943216 : else if (rc == 0)
1588 : : {
1589 : : /* timeout exceeded */
1590 : 31197 : return -1;
1591 : : }
1592 : :
1593 : : /*
1594 : : * At least one event occurred, iterate over the returned epoll events
1595 : : * until they're either all processed, or we've returned all the events
1596 : : * the caller desired.
1597 : : */
1598 : 912019 : for (cur_epoll_event = set->epoll_ret_events;
1599 [ + + + - ]: 1824052 : cur_epoll_event < (set->epoll_ret_events + rc) &&
1600 : : returned_events < nevents;
1601 : 912033 : cur_epoll_event++)
1602 : : {
1603 : : /* epoll's data pointer is set to the associated WaitEvent */
1604 : 912048 : cur_event = (WaitEvent *) cur_epoll_event->data.ptr;
1605 : :
1606 : 912048 : occurred_events->pos = cur_event->pos;
1607 : 912048 : occurred_events->user_data = cur_event->user_data;
1608 : 912048 : occurred_events->events = 0;
1609 : :
1610 [ + + ]: 912048 : if (cur_event->events == WL_LATCH_SET &&
1611 [ + - ]: 514436 : cur_epoll_event->events & (EPOLLIN | EPOLLERR | EPOLLHUP))
1612 : : {
1613 : : /* Drain the signalfd. */
1140 tmunro@postgresql.or 1614 : 514436 : drain();
1615 : :
1299 1616 [ + - + + ]: 514436 : if (set->latch && set->latch->is_set)
1617 : : {
2946 andres@anarazel.de 1618 : 398240 : occurred_events->fd = PGINVALID_SOCKET;
1619 : 398240 : occurred_events->events = WL_LATCH_SET;
1620 : 398240 : occurred_events++;
1621 : 398240 : returned_events++;
1622 : : }
1623 : : }
1624 [ + + ]: 397612 : else if (cur_event->events == WL_POSTMASTER_DEATH &&
1625 [ + - ]: 15 : cur_epoll_event->events & (EPOLLIN | EPOLLERR | EPOLLHUP))
1626 : : {
1627 : : /*
1628 : : * We expect an EPOLLHUP when the remote end is closed, but
1629 : : * because we don't expect the pipe to become readable or to have
1630 : : * any errors either, treat those cases as postmaster death, too.
1631 : : *
1632 : : * Be paranoid about a spurious event signaling the postmaster as
1633 : : * being dead. There have been reports about that happening with
1634 : : * older primitives (select(2) to be specific), and a spurious
1635 : : * WL_POSTMASTER_DEATH event would be painful. Re-checking doesn't
1636 : : * cost much.
1637 : : */
2104 tmunro@postgresql.or 1638 [ + - ]: 15 : if (!PostmasterIsAliveInternal())
1639 : : {
1969 1640 [ + - ]: 15 : if (set->exit_on_postmaster_death)
1641 : 15 : proc_exit(1);
2946 andres@anarazel.de 1642 :UBC 0 : occurred_events->fd = PGINVALID_SOCKET;
1643 : 0 : occurred_events->events = WL_POSTMASTER_DEATH;
1644 : 0 : occurred_events++;
1645 : 0 : returned_events++;
1646 : : }
1647 : : }
790 tmunro@postgresql.or 1648 [ + - ]:CBC 397597 : else if (cur_event->events & (WL_SOCKET_READABLE |
1649 : : WL_SOCKET_WRITEABLE |
1650 : : WL_SOCKET_CLOSED))
1651 : : {
2946 andres@anarazel.de 1652 [ - + ]: 397597 : Assert(cur_event->fd != PGINVALID_SOCKET);
1653 : :
1654 [ + + ]: 397597 : if ((cur_event->events & WL_SOCKET_READABLE) &&
1655 [ + + ]: 389906 : (cur_epoll_event->events & (EPOLLIN | EPOLLERR | EPOLLHUP)))
1656 : : {
1657 : : /* data available in socket, or EOF */
1658 : 388057 : occurred_events->events |= WL_SOCKET_READABLE;
1659 : : }
1660 : :
1661 [ + + ]: 397597 : if ((cur_event->events & WL_SOCKET_WRITEABLE) &&
1662 [ + + ]: 9933 : (cur_epoll_event->events & (EPOLLOUT | EPOLLERR | EPOLLHUP)))
1663 : : {
1664 : : /* writable, or EOF */
1665 : 9872 : occurred_events->events |= WL_SOCKET_WRITEABLE;
1666 : : }
1667 : :
790 tmunro@postgresql.or 1668 [ - + ]: 397597 : if ((cur_event->events & WL_SOCKET_CLOSED) &&
790 tmunro@postgresql.or 1669 [ # # ]:UBC 0 : (cur_epoll_event->events & (EPOLLRDHUP | EPOLLERR | EPOLLHUP)))
1670 : : {
1671 : : /* remote peer shut down, or error */
1672 : 0 : occurred_events->events |= WL_SOCKET_CLOSED;
1673 : : }
1674 : :
1530 tmunro@postgresql.or 1675 [ + - ]:CBC 397597 : if (occurred_events->events != 0)
1676 : : {
1677 : 397597 : occurred_events->fd = cur_event->fd;
1678 : 397597 : occurred_events++;
1679 : 397597 : returned_events++;
1680 : : }
1681 : : }
1682 : : }
1683 : :
1684 : 912004 : return returned_events;
1685 : : }
1686 : :
1687 : : #elif defined(WAIT_USE_KQUEUE)
1688 : :
1689 : : /*
1690 : : * Wait using kevent(2) on BSD-family systems and macOS.
1691 : : *
1692 : : * For now this mirrors the epoll code, but in future it could modify the fd
1693 : : * set in the same call to kevent as it uses for waiting instead of doing that
1694 : : * with separate system calls.
1695 : : */
1696 : : static int
1697 : : WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
1698 : : WaitEvent *occurred_events, int nevents)
1699 : : {
1700 : : int returned_events = 0;
1701 : : int rc;
1702 : : WaitEvent *cur_event;
1703 : : struct kevent *cur_kqueue_event;
1704 : : struct timespec timeout;
1705 : : struct timespec *timeout_p;
1706 : :
1707 : : if (cur_timeout < 0)
1708 : : timeout_p = NULL;
1709 : : else
1710 : : {
1711 : : timeout.tv_sec = cur_timeout / 1000;
1712 : : timeout.tv_nsec = (cur_timeout % 1000) * 1000000;
1713 : : timeout_p = &timeout;
1714 : : }
1715 : :
1716 : : /*
1717 : : * Report postmaster events discovered by WaitEventAdjustKqueue() or an
1718 : : * earlier call to WaitEventSetWait().
1719 : : */
1720 : : if (unlikely(set->report_postmaster_not_running))
1721 : : {
1722 : : if (set->exit_on_postmaster_death)
1723 : : proc_exit(1);
1724 : : occurred_events->fd = PGINVALID_SOCKET;
1725 : : occurred_events->events = WL_POSTMASTER_DEATH;
1726 : : return 1;
1727 : : }
1728 : :
1729 : : /* Sleep */
1730 : : rc = kevent(set->kqueue_fd, NULL, 0,
1731 : : set->kqueue_ret_events,
1732 : : Min(nevents, set->nevents_space),
1733 : : timeout_p);
1734 : :
1735 : : /* Check return code */
1736 : : if (rc < 0)
1737 : : {
1738 : : /* EINTR is okay, otherwise complain */
1739 : : if (errno != EINTR)
1740 : : {
1741 : : waiting = false;
1742 : : ereport(ERROR,
1743 : : (errcode_for_socket_access(),
1744 : : errmsg("%s() failed: %m",
1745 : : "kevent")));
1746 : : }
1747 : : return 0;
1748 : : }
1749 : : else if (rc == 0)
1750 : : {
1751 : : /* timeout exceeded */
1752 : : return -1;
1753 : : }
1754 : :
1755 : : /*
1756 : : * At least one event occurred, iterate over the returned kqueue events
1757 : : * until they're either all processed, or we've returned all the events
1758 : : * the caller desired.
1759 : : */
1760 : : for (cur_kqueue_event = set->kqueue_ret_events;
1761 : : cur_kqueue_event < (set->kqueue_ret_events + rc) &&
1762 : : returned_events < nevents;
1763 : : cur_kqueue_event++)
1764 : : {
1765 : : /* kevent's udata points to the associated WaitEvent */
1766 : : cur_event = AccessWaitEvent(cur_kqueue_event);
1767 : :
1768 : : occurred_events->pos = cur_event->pos;
1769 : : occurred_events->user_data = cur_event->user_data;
1770 : : occurred_events->events = 0;
1771 : :
1772 : : if (cur_event->events == WL_LATCH_SET &&
1773 : : cur_kqueue_event->filter == EVFILT_SIGNAL)
1774 : : {
1775 : : if (set->latch && set->latch->is_set)
1776 : : {
1777 : : occurred_events->fd = PGINVALID_SOCKET;
1778 : : occurred_events->events = WL_LATCH_SET;
1779 : : occurred_events++;
1780 : : returned_events++;
1781 : : }
1782 : : }
1783 : : else if (cur_event->events == WL_POSTMASTER_DEATH &&
1784 : : cur_kqueue_event->filter == EVFILT_PROC &&
1785 : : (cur_kqueue_event->fflags & NOTE_EXIT) != 0)
1786 : : {
1787 : : /*
1788 : : * The kernel will tell this kqueue object only once about the
1789 : : * exit of the postmaster, so let's remember that for next time so
1790 : : * that we provide level-triggered semantics.
1791 : : */
1792 : : set->report_postmaster_not_running = true;
1793 : :
1794 : : if (set->exit_on_postmaster_death)
1795 : : proc_exit(1);
1796 : : occurred_events->fd = PGINVALID_SOCKET;
1797 : : occurred_events->events = WL_POSTMASTER_DEATH;
1798 : : occurred_events++;
1799 : : returned_events++;
1800 : : }
1801 : : else if (cur_event->events & (WL_SOCKET_READABLE |
1802 : : WL_SOCKET_WRITEABLE |
1803 : : WL_SOCKET_CLOSED))
1804 : : {
1805 : : Assert(cur_event->fd >= 0);
1806 : :
1807 : : if ((cur_event->events & WL_SOCKET_READABLE) &&
1808 : : (cur_kqueue_event->filter == EVFILT_READ))
1809 : : {
1810 : : /* readable, or EOF */
1811 : : occurred_events->events |= WL_SOCKET_READABLE;
1812 : : }
1813 : :
1814 : : if ((cur_event->events & WL_SOCKET_CLOSED) &&
1815 : : (cur_kqueue_event->filter == EVFILT_READ) &&
1816 : : (cur_kqueue_event->flags & EV_EOF))
1817 : : {
1818 : : /* the remote peer has shut down */
1819 : : occurred_events->events |= WL_SOCKET_CLOSED;
1820 : : }
1821 : :
1822 : : if ((cur_event->events & WL_SOCKET_WRITEABLE) &&
1823 : : (cur_kqueue_event->filter == EVFILT_WRITE))
1824 : : {
1825 : : /* writable, or EOF */
1826 : : occurred_events->events |= WL_SOCKET_WRITEABLE;
1827 : : }
1828 : :
1829 : : if (occurred_events->events != 0)
1830 : : {
1831 : : occurred_events->fd = cur_event->fd;
1832 : : occurred_events++;
1833 : : returned_events++;
1834 : : }
1835 : : }
1836 : : }
1837 : :
1838 : : return returned_events;
1839 : : }
1840 : :
1841 : : #elif defined(WAIT_USE_POLL)
1842 : :
1843 : : /*
1844 : : * Wait using poll(2).
1845 : : *
1846 : : * This allows to receive readiness notifications for several events at once,
1847 : : * but requires iterating through all of set->pollfds.
1848 : : */
1849 : : static inline int
1850 : : WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
1851 : : WaitEvent *occurred_events, int nevents)
1852 : : {
1853 : : int returned_events = 0;
1854 : : int rc;
1855 : : WaitEvent *cur_event;
1856 : : struct pollfd *cur_pollfd;
1857 : :
1858 : : /* Sleep */
1859 : : rc = poll(set->pollfds, set->nevents, (int) cur_timeout);
1860 : :
1861 : : /* Check return code */
1862 : : if (rc < 0)
1863 : : {
1864 : : /* EINTR is okay, otherwise complain */
1865 : : if (errno != EINTR)
1866 : : {
1867 : : waiting = false;
1868 : : ereport(ERROR,
1869 : : (errcode_for_socket_access(),
1870 : : errmsg("%s() failed: %m",
1871 : : "poll")));
1872 : : }
1873 : : return 0;
1874 : : }
1875 : : else if (rc == 0)
1876 : : {
1877 : : /* timeout exceeded */
1878 : : return -1;
1879 : : }
1880 : :
1881 : : for (cur_event = set->events, cur_pollfd = set->pollfds;
1882 : : cur_event < (set->events + set->nevents) &&
1883 : : returned_events < nevents;
1884 : : cur_event++, cur_pollfd++)
1885 : : {
1886 : : /* no activity on this FD, skip */
1887 : : if (cur_pollfd->revents == 0)
1888 : : continue;
1889 : :
1890 : : occurred_events->pos = cur_event->pos;
1891 : : occurred_events->user_data = cur_event->user_data;
1892 : : occurred_events->events = 0;
1893 : :
1894 : : if (cur_event->events == WL_LATCH_SET &&
1895 : : (cur_pollfd->revents & (POLLIN | POLLHUP | POLLERR | POLLNVAL)))
1896 : : {
1897 : : /* There's data in the self-pipe, clear it. */
1898 : : drain();
1899 : :
1900 : : if (set->latch && set->latch->is_set)
1901 : : {
1902 : : occurred_events->fd = PGINVALID_SOCKET;
1903 : : occurred_events->events = WL_LATCH_SET;
1904 : : occurred_events++;
1905 : : returned_events++;
1906 : : }
1907 : : }
1908 : : else if (cur_event->events == WL_POSTMASTER_DEATH &&
1909 : : (cur_pollfd->revents & (POLLIN | POLLHUP | POLLERR | POLLNVAL)))
1910 : : {
1911 : : /*
1912 : : * We expect an POLLHUP when the remote end is closed, but because
1913 : : * we don't expect the pipe to become readable or to have any
1914 : : * errors either, treat those cases as postmaster death, too.
1915 : : *
1916 : : * Be paranoid about a spurious event signaling the postmaster as
1917 : : * being dead. There have been reports about that happening with
1918 : : * older primitives (select(2) to be specific), and a spurious
1919 : : * WL_POSTMASTER_DEATH event would be painful. Re-checking doesn't
1920 : : * cost much.
1921 : : */
1922 : : if (!PostmasterIsAliveInternal())
1923 : : {
1924 : : if (set->exit_on_postmaster_death)
1925 : : proc_exit(1);
1926 : : occurred_events->fd = PGINVALID_SOCKET;
1927 : : occurred_events->events = WL_POSTMASTER_DEATH;
1928 : : occurred_events++;
1929 : : returned_events++;
1930 : : }
1931 : : }
1932 : : else if (cur_event->events & (WL_SOCKET_READABLE |
1933 : : WL_SOCKET_WRITEABLE |
1934 : : WL_SOCKET_CLOSED))
1935 : : {
1936 : : int errflags = POLLHUP | POLLERR | POLLNVAL;
1937 : :
1938 : : Assert(cur_event->fd >= PGINVALID_SOCKET);
1939 : :
1940 : : if ((cur_event->events & WL_SOCKET_READABLE) &&
1941 : : (cur_pollfd->revents & (POLLIN | errflags)))
1942 : : {
1943 : : /* data available in socket, or EOF */
1944 : : occurred_events->events |= WL_SOCKET_READABLE;
1945 : : }
1946 : :
1947 : : if ((cur_event->events & WL_SOCKET_WRITEABLE) &&
1948 : : (cur_pollfd->revents & (POLLOUT | errflags)))
1949 : : {
1950 : : /* writeable, or EOF */
1951 : : occurred_events->events |= WL_SOCKET_WRITEABLE;
1952 : : }
1953 : :
1954 : : #ifdef POLLRDHUP
1955 : : if ((cur_event->events & WL_SOCKET_CLOSED) &&
1956 : : (cur_pollfd->revents & (POLLRDHUP | errflags)))
1957 : : {
1958 : : /* remote peer closed, or error */
1959 : : occurred_events->events |= WL_SOCKET_CLOSED;
1960 : : }
1961 : : #endif
1962 : :
1963 : : if (occurred_events->events != 0)
1964 : : {
1965 : : occurred_events->fd = cur_event->fd;
1966 : : occurred_events++;
1967 : : returned_events++;
1968 : : }
1969 : : }
1970 : : }
1971 : : return returned_events;
1972 : : }
1973 : :
1974 : : #elif defined(WAIT_USE_WIN32)
1975 : :
1976 : : /*
1977 : : * Wait using Windows' WaitForMultipleObjects(). Each call only "consumes" one
1978 : : * event, so we keep calling until we've filled up our output buffer to match
1979 : : * the behavior of the other implementations.
1980 : : *
1981 : : * https://blogs.msdn.microsoft.com/oldnewthing/20150409-00/?p=44273
1982 : : */
1983 : : static inline int
1984 : : WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
1985 : : WaitEvent *occurred_events, int nevents)
1986 : : {
1987 : : int returned_events = 0;
1988 : : DWORD rc;
1989 : : WaitEvent *cur_event;
1990 : :
1991 : : /* Reset any wait events that need it */
1992 : : for (cur_event = set->events;
1993 : : cur_event < (set->events + set->nevents);
1994 : : cur_event++)
1995 : : {
1996 : : if (cur_event->reset)
1997 : : {
1998 : : WaitEventAdjustWin32(set, cur_event);
1999 : : cur_event->reset = false;
2000 : : }
2001 : :
2002 : : /*
2003 : : * Windows does not guarantee to log an FD_WRITE network event
2004 : : * indicating that more data can be sent unless the previous send()
2005 : : * failed with WSAEWOULDBLOCK. While our caller might well have made
2006 : : * such a call, we cannot assume that here. Therefore, if waiting for
2007 : : * write-ready, force the issue by doing a dummy send(). If the dummy
2008 : : * send() succeeds, assume that the socket is in fact write-ready, and
2009 : : * return immediately. Also, if it fails with something other than
2010 : : * WSAEWOULDBLOCK, return a write-ready indication to let our caller
2011 : : * deal with the error condition.
2012 : : */
2013 : : if (cur_event->events & WL_SOCKET_WRITEABLE)
2014 : : {
2015 : : char c;
2016 : : WSABUF buf;
2017 : : DWORD sent;
2018 : : int r;
2019 : :
2020 : : buf.buf = &c;
2021 : : buf.len = 0;
2022 : :
2023 : : r = WSASend(cur_event->fd, &buf, 1, &sent, 0, NULL, NULL);
2024 : : if (r == 0 || WSAGetLastError() != WSAEWOULDBLOCK)
2025 : : {
2026 : : occurred_events->pos = cur_event->pos;
2027 : : occurred_events->user_data = cur_event->user_data;
2028 : : occurred_events->events = WL_SOCKET_WRITEABLE;
2029 : : occurred_events->fd = cur_event->fd;
2030 : : return 1;
2031 : : }
2032 : : }
2033 : : }
2034 : :
2035 : : /*
2036 : : * Sleep.
2037 : : *
2038 : : * Need to wait for ->nevents + 1, because signal handle is in [0].
2039 : : */
2040 : : rc = WaitForMultipleObjects(set->nevents + 1, set->handles, FALSE,
2041 : : cur_timeout);
2042 : :
2043 : : /* Check return code */
2044 : : if (rc == WAIT_FAILED)
2045 : : elog(ERROR, "WaitForMultipleObjects() failed: error code %lu",
2046 : : GetLastError());
2047 : : else if (rc == WAIT_TIMEOUT)
2048 : : {
2049 : : /* timeout exceeded */
2050 : : return -1;
2051 : : }
2052 : :
2053 : : if (rc == WAIT_OBJECT_0)
2054 : : {
2055 : : /* Service newly-arrived signals */
2056 : : pgwin32_dispatch_queued_signals();
2057 : : return 0; /* retry */
2058 : : }
2059 : :
2060 : : /*
2061 : : * With an offset of one, due to the always present pgwin32_signal_event,
2062 : : * the handle offset directly corresponds to a wait event.
2063 : : */
2064 : : cur_event = (WaitEvent *) &set->events[rc - WAIT_OBJECT_0 - 1];
2065 : :
2066 : : for (;;)
2067 : : {
2068 : : int next_pos;
2069 : : int count;
2070 : :
2071 : : occurred_events->pos = cur_event->pos;
2072 : : occurred_events->user_data = cur_event->user_data;
2073 : : occurred_events->events = 0;
2074 : :
2075 : : if (cur_event->events == WL_LATCH_SET)
2076 : : {
2077 : : /*
2078 : : * We cannot use set->latch->event to reset the fired event if we
2079 : : * aren't waiting on this latch now.
2080 : : */
2081 : : if (!ResetEvent(set->handles[cur_event->pos + 1]))
2082 : : elog(ERROR, "ResetEvent failed: error code %lu", GetLastError());
2083 : :
2084 : : if (set->latch && set->latch->is_set)
2085 : : {
2086 : : occurred_events->fd = PGINVALID_SOCKET;
2087 : : occurred_events->events = WL_LATCH_SET;
2088 : : occurred_events++;
2089 : : returned_events++;
2090 : : }
2091 : : }
2092 : : else if (cur_event->events == WL_POSTMASTER_DEATH)
2093 : : {
2094 : : /*
2095 : : * Postmaster apparently died. Since the consequences of falsely
2096 : : * returning WL_POSTMASTER_DEATH could be pretty unpleasant, we
2097 : : * take the trouble to positively verify this with
2098 : : * PostmasterIsAlive(), even though there is no known reason to
2099 : : * think that the event could be falsely set on Windows.
2100 : : */
2101 : : if (!PostmasterIsAliveInternal())
2102 : : {
2103 : : if (set->exit_on_postmaster_death)
2104 : : proc_exit(1);
2105 : : occurred_events->fd = PGINVALID_SOCKET;
2106 : : occurred_events->events = WL_POSTMASTER_DEATH;
2107 : : occurred_events++;
2108 : : returned_events++;
2109 : : }
2110 : : }
2111 : : else if (cur_event->events & WL_SOCKET_MASK)
2112 : : {
2113 : : WSANETWORKEVENTS resEvents;
2114 : : HANDLE handle = set->handles[cur_event->pos + 1];
2115 : :
2116 : : Assert(cur_event->fd);
2117 : :
2118 : : occurred_events->fd = cur_event->fd;
2119 : :
2120 : : ZeroMemory(&resEvents, sizeof(resEvents));
2121 : : if (WSAEnumNetworkEvents(cur_event->fd, handle, &resEvents) != 0)
2122 : : elog(ERROR, "failed to enumerate network events: error code %d",
2123 : : WSAGetLastError());
2124 : : if ((cur_event->events & WL_SOCKET_READABLE) &&
2125 : : (resEvents.lNetworkEvents & FD_READ))
2126 : : {
2127 : : /* data available in socket */
2128 : : occurred_events->events |= WL_SOCKET_READABLE;
2129 : :
2130 : : /*------
2131 : : * WaitForMultipleObjects doesn't guarantee that a read event
2132 : : * will be returned if the latch is set at the same time. Even
2133 : : * if it did, the caller might drop that event expecting it to
2134 : : * reoccur on next call. So, we must force the event to be
2135 : : * reset if this WaitEventSet is used again in order to avoid
2136 : : * an indefinite hang.
2137 : : *
2138 : : * Refer
2139 : : * https://msdn.microsoft.com/en-us/library/windows/desktop/ms741576(v=vs.85).aspx
2140 : : * for the behavior of socket events.
2141 : : *------
2142 : : */
2143 : : cur_event->reset = true;
2144 : : }
2145 : : if ((cur_event->events & WL_SOCKET_WRITEABLE) &&
2146 : : (resEvents.lNetworkEvents & FD_WRITE))
2147 : : {
2148 : : /* writeable */
2149 : : occurred_events->events |= WL_SOCKET_WRITEABLE;
2150 : : }
2151 : : if ((cur_event->events & WL_SOCKET_CONNECTED) &&
2152 : : (resEvents.lNetworkEvents & FD_CONNECT))
2153 : : {
2154 : : /* connected */
2155 : : occurred_events->events |= WL_SOCKET_CONNECTED;
2156 : : }
2157 : : if ((cur_event->events & WL_SOCKET_ACCEPT) &&
2158 : : (resEvents.lNetworkEvents & FD_ACCEPT))
2159 : : {
2160 : : /* incoming connection could be accepted */
2161 : : occurred_events->events |= WL_SOCKET_ACCEPT;
2162 : : }
2163 : : if (resEvents.lNetworkEvents & FD_CLOSE)
2164 : : {
2165 : : /* EOF/error, so signal all caller-requested socket flags */
2166 : : occurred_events->events |= (cur_event->events & WL_SOCKET_MASK);
2167 : : }
2168 : :
2169 : : if (occurred_events->events != 0)
2170 : : {
2171 : : occurred_events++;
2172 : : returned_events++;
2173 : : }
2174 : : }
2175 : :
2176 : : /* Is the output buffer full? */
2177 : : if (returned_events == nevents)
2178 : : break;
2179 : :
2180 : : /* Have we run out of possible events? */
2181 : : next_pos = cur_event->pos + 1;
2182 : : if (next_pos == set->nevents)
2183 : : break;
2184 : :
2185 : : /*
2186 : : * Poll the rest of the event handles in the array starting at
2187 : : * next_pos being careful to skip over the initial signal handle too.
2188 : : * This time we use a zero timeout.
2189 : : */
2190 : : count = set->nevents - next_pos;
2191 : : rc = WaitForMultipleObjects(count,
2192 : : set->handles + 1 + next_pos,
2193 : : false,
2194 : : 0);
2195 : :
2196 : : /*
2197 : : * We don't distinguish between errors and WAIT_TIMEOUT here because
2198 : : * we already have events to report.
2199 : : */
2200 : : if (rc < WAIT_OBJECT_0 || rc >= WAIT_OBJECT_0 + count)
2201 : : break;
2202 : :
2203 : : /* We have another event to decode. */
2204 : : cur_event = &set->events[next_pos + (rc - WAIT_OBJECT_0)];
2205 : : }
2206 : :
2207 : : return returned_events;
2208 : : }
2209 : : #endif
2210 : :
2211 : : /*
2212 : : * Return whether the current build options can report WL_SOCKET_CLOSED.
2213 : : */
2214 : : bool
790 2215 : 928 : WaitEventSetCanReportClosed(void)
2216 : : {
2217 : : #if (defined(WAIT_USE_POLL) && defined(POLLRDHUP)) || \
2218 : : defined(WAIT_USE_EPOLL) || \
2219 : : defined(WAIT_USE_KQUEUE)
2220 : 928 : return true;
2221 : : #else
2222 : : return false;
2223 : : #endif
2224 : : }
2225 : :
2226 : : /*
2227 : : * Get the number of wait events registered in a given WaitEventSet.
2228 : : */
2229 : : int
1110 efujita@postgresql.o 2230 : 447 : GetNumRegisteredWaitEvents(WaitEventSet *set)
2231 : : {
2232 : 447 : return set->nevents;
2233 : : }
2234 : :
2235 : : #if defined(WAIT_USE_SELF_PIPE)
2236 : :
2237 : : /*
2238 : : * SetLatch uses SIGURG to wake up the process waiting on the latch.
2239 : : *
2240 : : * Wake up WaitLatch, if we're waiting.
2241 : : */
2242 : : static void
2243 : : latch_sigurg_handler(SIGNAL_ARGS)
2244 : : {
2245 : : if (waiting)
2246 : : sendSelfPipeByte();
2247 : : }
2248 : :
2249 : : /* Send one byte to the self-pipe, to wake up WaitLatch */
2250 : : static void
2251 : : sendSelfPipeByte(void)
2252 : : {
2253 : : int rc;
2254 : : char dummy = 0;
2255 : :
2256 : : retry:
2257 : : rc = write(selfpipe_writefd, &dummy, 1);
2258 : : if (rc < 0)
2259 : : {
2260 : : /* If interrupted by signal, just retry */
2261 : : if (errno == EINTR)
2262 : : goto retry;
2263 : :
2264 : : /*
2265 : : * If the pipe is full, we don't need to retry, the data that's there
2266 : : * already is enough to wake up WaitLatch.
2267 : : */
2268 : : if (errno == EAGAIN || errno == EWOULDBLOCK)
2269 : : return;
2270 : :
2271 : : /*
2272 : : * Oops, the write() failed for some other reason. We might be in a
2273 : : * signal handler, so it's not safe to elog(). We have no choice but
2274 : : * silently ignore the error.
2275 : : */
2276 : : return;
2277 : : }
2278 : : }
2279 : :
2280 : : #endif
2281 : :
2282 : : #if defined(WAIT_USE_SELF_PIPE) || defined(WAIT_USE_SIGNALFD)
2283 : :
2284 : : /*
2285 : : * Read all available data from self-pipe or signalfd.
2286 : : *
2287 : : * Note: this is only called when waiting = true. If it fails and doesn't
2288 : : * return, it must reset that flag first (though ideally, this will never
2289 : : * happen).
2290 : : */
2291 : : static void
1140 tmunro@postgresql.or 2292 : 514436 : drain(void)
2293 : : {
2294 : : char buf[1024];
2295 : : int rc;
2296 : : int fd;
2297 : :
2298 : : #ifdef WAIT_USE_SELF_PIPE
2299 : : fd = selfpipe_readfd;
2300 : : #else
2301 : 514436 : fd = signal_fd;
2302 : : #endif
2303 : :
2304 : : for (;;)
2305 : : {
1140 tmunro@postgresql.or 2306 :UBC 0 : rc = read(fd, buf, sizeof(buf));
4964 heikki.linnakangas@i 2307 [ - + ]:CBC 514436 : if (rc < 0)
2308 : : {
4964 heikki.linnakangas@i 2309 [ # # # # ]:UBC 0 : if (errno == EAGAIN || errno == EWOULDBLOCK)
2310 : : break; /* the descriptor is empty */
2311 [ # # ]: 0 : else if (errno == EINTR)
4753 bruce@momjian.us 2312 : 0 : continue; /* retry */
2313 : : else
2314 : : {
4631 tgl@sss.pgh.pa.us 2315 : 0 : waiting = false;
2316 : : #ifdef WAIT_USE_SELF_PIPE
2317 : : elog(ERROR, "read() on self-pipe failed: %m");
2318 : : #else
1140 tmunro@postgresql.or 2319 [ # # ]: 0 : elog(ERROR, "read() on signalfd failed: %m");
2320 : : #endif
2321 : : }
2322 : : }
4964 heikki.linnakangas@i 2323 [ - + ]:CBC 514436 : else if (rc == 0)
2324 : : {
4631 tgl@sss.pgh.pa.us 2325 :UBC 0 : waiting = false;
2326 : : #ifdef WAIT_USE_SELF_PIPE
2327 : : elog(ERROR, "unexpected EOF on self-pipe");
2328 : : #else
1140 tmunro@postgresql.or 2329 [ # # ]: 0 : elog(ERROR, "unexpected EOF on signalfd");
2330 : : #endif
2331 : : }
4631 tgl@sss.pgh.pa.us 2332 [ + - ]:CBC 514436 : else if (rc < sizeof(buf))
2333 : : {
2334 : : /* we successfully drained the pipe; no need to read() again */
2335 : 514436 : break;
2336 : : }
2337 : : /* else buffer wasn't big enough, so read again */
2338 : : }
4964 heikki.linnakangas@i 2339 : 514436 : }
2340 : :
2341 : : #endif
2342 : :
2343 : : static void
143 heikki.linnakangas@i 2344 :GNC 1 : ResOwnerReleaseWaitEventSet(Datum res)
2345 : : {
2346 : 1 : WaitEventSet *set = (WaitEventSet *) DatumGetPointer(res);
2347 : :
2348 [ - + ]: 1 : Assert(set->owner != NULL);
2349 : 1 : set->owner = NULL;
2350 : 1 : FreeWaitEventSet(set);
2351 : 1 : }
|