Age Owner TLA Line data Source code
1 : /* -------------------------------------------------------------------------
2 : *
3 : * pgstat_io.c
4 : * Implementation of IO statistics.
5 : *
6 : * This file contains the implementation of IO statistics. It is kept separate
7 : * from pgstat.c to enforce the line between the statistics access / storage
8 : * implementation and the details about individual types of statistics.
9 : *
10 : * Copyright (c) 2021-2023, PostgreSQL Global Development Group
11 : *
12 : * IDENTIFICATION
13 : * src/backend/utils/activity/pgstat_io.c
14 : * -------------------------------------------------------------------------
15 : */
16 :
17 : #include "postgres.h"
18 :
19 : #include "executor/instrument.h"
20 : #include "storage/bufmgr.h"
21 : #include "utils/pgstat_internal.h"
22 :
23 :
24 : typedef struct PgStat_PendingIO
25 : {
26 : PgStat_Counter counts[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES];
27 : instr_time pending_times[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES];
28 : } PgStat_PendingIO;
29 :
30 :
31 : static PgStat_PendingIO PendingIOStats;
32 : bool have_iostats = false;
33 :
34 :
35 : /*
36 : * Check that stats have not been counted for any combination of IOObject,
37 : * IOContext, and IOOp which are not tracked for the passed-in BackendType. If
38 : * stats are tracked for this combination and IO times are non-zero, counts
39 : * should be non-zero.
40 : *
41 : * The passed-in PgStat_BktypeIO must contain stats from the BackendType
42 : * specified by the second parameter. Caller is responsible for locking the
43 : * passed-in PgStat_BktypeIO, if needed.
44 : */
45 : bool
60 andres 46 GNC 86715 : pgstat_bktype_io_stats_valid(PgStat_BktypeIO *backend_io,
47 : BackendType bktype)
48 : {
41 tgl 49 260145 : for (int io_object = 0; io_object < IOOBJECT_NUM_TYPES; io_object++)
50 : {
51 867150 : for (int io_context = 0; io_context < IOCONTEXT_NUM_TYPES; io_context++)
52 : {
53 5549760 : for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++)
54 : {
55 : /* we do track it */
2 andres 56 4856040 : if (pgstat_tracks_io_op(bktype, io_object, io_context, io_op))
57 : {
58 : /* ensure that if IO times are non-zero, counts are > 0 */
59 2056211 : if (backend_io->times[io_object][io_context][io_op] != 0 &&
60 223 : backend_io->counts[io_object][io_context][io_op] <= 0)
2 andres 61 UNC 0 : return false;
62 :
60 andres 63 GNC 2056211 : continue;
64 : }
65 :
66 : /* we don't track it, and it is not 0 */
2 67 2799829 : if (backend_io->counts[io_object][io_context][io_op] != 0)
60 andres 68 UNC 0 : return false;
69 : }
70 : }
71 : }
72 :
60 andres 73 GNC 86715 : return true;
74 : }
75 :
76 : void
77 70047217 : pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op)
78 : {
4 79 70047217 : pgstat_count_io_op_n(io_object, io_context, io_op, 1);
80 70047217 : }
81 :
82 : void
83 72427699 : pgstat_count_io_op_n(IOObject io_object, IOContext io_context, IOOp io_op, uint32 cnt)
84 : {
40 tgl 85 72427699 : Assert((unsigned int) io_object < IOOBJECT_NUM_TYPES);
86 72427699 : Assert((unsigned int) io_context < IOCONTEXT_NUM_TYPES);
87 72427699 : Assert((unsigned int) io_op < IOOP_NUM_TYPES);
60 andres 88 72427699 : Assert(pgstat_tracks_io_op(MyBackendType, io_object, io_context, io_op));
89 :
2 90 72427699 : PendingIOStats.counts[io_object][io_context][io_op] += cnt;
91 :
60 92 72427699 : have_iostats = true;
93 72427699 : }
94 :
95 : instr_time
2 96 2380495 : pgstat_prepare_io_time(void)
97 : {
98 : instr_time io_start;
99 :
100 2380495 : if (track_io_timing)
101 2 : INSTR_TIME_SET_CURRENT(io_start);
102 : else
103 2380493 : INSTR_TIME_SET_ZERO(io_start);
104 :
105 2380495 : return io_start;
106 : }
107 :
108 : /*
109 : * Like pgstat_count_io_op_n() except it also accumulates time.
110 : */
111 : void
112 2380482 : pgstat_count_io_op_time(IOObject io_obj, IOContext io_context, IOOp io_op,
113 : instr_time start_time, uint32 cnt)
114 : {
115 2380482 : if (track_io_timing)
116 : {
117 : instr_time io_time;
118 :
119 2 : INSTR_TIME_SET_CURRENT(io_time);
120 2 : INSTR_TIME_SUBTRACT(io_time, start_time);
121 :
122 2 : if (io_op == IOOP_WRITE)
123 : {
2 andres 124 UNC 0 : pgstat_count_buffer_write_time(INSTR_TIME_GET_MICROSEC(io_time));
125 0 : if (io_obj == IOOBJECT_RELATION)
126 0 : INSTR_TIME_ADD(pgBufferUsage.blk_write_time, io_time);
127 : }
2 andres 128 GNC 2 : else if (io_op == IOOP_READ)
129 : {
130 2 : pgstat_count_buffer_read_time(INSTR_TIME_GET_MICROSEC(io_time));
131 2 : if (io_obj == IOOBJECT_RELATION)
132 2 : INSTR_TIME_ADD(pgBufferUsage.blk_read_time, io_time);
133 : }
134 :
135 2 : INSTR_TIME_ADD(PendingIOStats.pending_times[io_obj][io_context][io_op],
136 : io_time);
137 : }
138 :
139 2380482 : pgstat_count_io_op_n(io_obj, io_context, io_op, cnt);
140 2380482 : }
141 :
142 : PgStat_IO *
60 143 56 : pgstat_fetch_stat_io(void)
144 : {
145 56 : pgstat_snapshot_fixed(PGSTAT_KIND_IO);
146 :
147 56 : return &pgStatLocal.snapshot.io;
148 : }
149 :
150 : /*
151 : * Flush out locally pending IO statistics
152 : *
153 : * If no stats have been recorded, this function returns false.
154 : *
155 : * If nowait is true, this function returns true if the lock could not be
156 : * acquired. Otherwise, return false.
157 : */
158 : bool
159 117100 : pgstat_flush_io(bool nowait)
160 : {
161 : LWLock *bktype_lock;
162 : PgStat_BktypeIO *bktype_shstats;
163 :
164 117100 : if (!have_iostats)
165 31163 : return false;
166 :
167 85937 : bktype_lock = &pgStatLocal.shmem->io.locks[MyBackendType];
168 85937 : bktype_shstats =
169 85937 : &pgStatLocal.shmem->io.stats.stats[MyBackendType];
170 :
171 85937 : if (!nowait)
172 74985 : LWLockAcquire(bktype_lock, LW_EXCLUSIVE);
173 10952 : else if (!LWLockConditionalAcquire(bktype_lock, LW_EXCLUSIVE))
174 6 : return true;
175 :
41 tgl 176 257793 : for (int io_object = 0; io_object < IOOBJECT_NUM_TYPES; io_object++)
177 : {
178 859310 : for (int io_context = 0; io_context < IOCONTEXT_NUM_TYPES; io_context++)
179 : {
180 5499584 : for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++)
181 : {
182 : instr_time time;
183 :
2 andres 184 4812136 : bktype_shstats->counts[io_object][io_context][io_op] +=
185 4812136 : PendingIOStats.counts[io_object][io_context][io_op];
186 :
187 4812136 : time = PendingIOStats.pending_times[io_object][io_context][io_op];
188 :
189 4812136 : bktype_shstats->times[io_object][io_context][io_op] +=
190 4812136 : INSTR_TIME_GET_MICROSEC(time);
191 : }
192 : }
193 : }
194 :
60 195 85931 : Assert(pgstat_bktype_io_stats_valid(bktype_shstats, MyBackendType));
196 :
197 85931 : LWLockRelease(bktype_lock);
198 :
199 85931 : memset(&PendingIOStats, 0, sizeof(PendingIOStats));
200 :
201 85931 : have_iostats = false;
202 :
203 85931 : return false;
204 : }
205 :
206 : const char *
207 4032 : pgstat_get_io_context_name(IOContext io_context)
208 : {
209 4032 : switch (io_context)
210 : {
211 1008 : case IOCONTEXT_BULKREAD:
212 1008 : return "bulkread";
213 1008 : case IOCONTEXT_BULKWRITE:
214 1008 : return "bulkwrite";
215 1008 : case IOCONTEXT_NORMAL:
216 1008 : return "normal";
217 1008 : case IOCONTEXT_VACUUM:
218 1008 : return "vacuum";
219 : }
220 :
60 andres 221 UNC 0 : elog(ERROR, "unrecognized IOContext value: %d", io_context);
222 : pg_unreachable();
223 : }
224 :
225 : const char *
60 andres 226 GNC 1008 : pgstat_get_io_object_name(IOObject io_object)
227 : {
228 1008 : switch (io_object)
229 : {
230 504 : case IOOBJECT_RELATION:
231 504 : return "relation";
232 504 : case IOOBJECT_TEMP_RELATION:
233 504 : return "temp relation";
234 : }
235 :
60 andres 236 UNC 0 : elog(ERROR, "unrecognized IOObject value: %d", io_object);
237 : pg_unreachable();
238 : }
239 :
240 : void
60 andres 241 GNC 441 : pgstat_io_reset_all_cb(TimestampTz ts)
242 : {
243 6615 : for (int i = 0; i < BACKEND_NUM_TYPES; i++)
244 : {
245 6174 : LWLock *bktype_lock = &pgStatLocal.shmem->io.locks[i];
246 6174 : PgStat_BktypeIO *bktype_shstats = &pgStatLocal.shmem->io.stats.stats[i];
247 :
248 6174 : LWLockAcquire(bktype_lock, LW_EXCLUSIVE);
249 :
250 : /*
251 : * Use the lock in the first BackendType's PgStat_BktypeIO to protect
252 : * the reset timestamp as well.
253 : */
254 6174 : if (i == 0)
255 441 : pgStatLocal.shmem->io.stats.stat_reset_timestamp = ts;
256 :
257 6174 : memset(bktype_shstats, 0, sizeof(*bktype_shstats));
258 6174 : LWLockRelease(bktype_lock);
259 : }
260 441 : }
261 :
262 : void
263 1048 : pgstat_io_snapshot_cb(void)
264 : {
265 15720 : for (int i = 0; i < BACKEND_NUM_TYPES; i++)
266 : {
267 14672 : LWLock *bktype_lock = &pgStatLocal.shmem->io.locks[i];
268 14672 : PgStat_BktypeIO *bktype_shstats = &pgStatLocal.shmem->io.stats.stats[i];
269 14672 : PgStat_BktypeIO *bktype_snap = &pgStatLocal.snapshot.io.stats[i];
270 :
271 14672 : LWLockAcquire(bktype_lock, LW_SHARED);
272 :
273 : /*
274 : * Use the lock in the first BackendType's PgStat_BktypeIO to protect
275 : * the reset timestamp as well.
276 : */
277 14672 : if (i == 0)
278 1048 : pgStatLocal.snapshot.io.stat_reset_timestamp =
279 1048 : pgStatLocal.shmem->io.stats.stat_reset_timestamp;
280 :
281 : /* using struct assignment due to better type safety */
282 14672 : *bktype_snap = *bktype_shstats;
283 14672 : LWLockRelease(bktype_lock);
284 : }
285 1048 : }
286 :
287 : /*
288 : * IO statistics are not collected for all BackendTypes.
289 : *
290 : * The following BackendTypes do not participate in the cumulative stats
291 : * subsystem or do not perform IO on which we currently track:
292 : * - Syslogger because it is not connected to shared memory
293 : * - Archiver because most relevant archiving IO is delegated to a
294 : * specialized command or module
295 : * - WAL Receiver and WAL Writer IO is not tracked in pg_stat_io for now
296 : *
297 : * Function returns true if BackendType participates in the cumulative stats
298 : * subsystem for IO and false if it does not.
299 : *
300 : * When adding a new BackendType, also consider adding relevant restrictions to
301 : * pgstat_tracks_io_object() and pgstat_tracks_io_op().
302 : */
303 : bool
304 77300315 : pgstat_tracks_io_bktype(BackendType bktype)
305 : {
306 : /*
307 : * List every type so that new backend types trigger a warning about
308 : * needing to adjust this switch.
309 : */
310 77300315 : switch (bktype)
311 : {
312 15960 : case B_INVALID:
313 : case B_ARCHIVER:
314 : case B_LOGGER:
315 : case B_WAL_RECEIVER:
316 : case B_WAL_WRITER:
317 15960 : return false;
318 :
319 77284355 : case B_AUTOVAC_LAUNCHER:
320 : case B_AUTOVAC_WORKER:
321 : case B_BACKEND:
322 : case B_BG_WORKER:
323 : case B_BG_WRITER:
324 : case B_CHECKPOINTER:
325 : case B_STANDALONE_BACKEND:
326 : case B_STARTUP:
327 : case B_WAL_SENDER:
328 77284355 : return true;
329 : }
330 :
60 andres 331 UNC 0 : return false;
332 : }
333 :
334 : /*
335 : * Some BackendTypes do not perform IO on certain IOObjects or in certain
336 : * IOContexts. Some IOObjects are never operated on in some IOContexts. Check
337 : * that the given BackendType is expected to do IO in the given IOContext and
338 : * on the given IOObject and that the given IOObject is expected to be operated
339 : * on in the given IOContext.
340 : */
341 : bool
60 andres 342 GNC 77299531 : pgstat_tracks_io_object(BackendType bktype, IOObject io_object,
343 : IOContext io_context)
344 : {
345 : bool no_temp_rel;
346 :
347 : /*
348 : * Some BackendTypes should never track IO statistics.
349 : */
350 77299531 : if (!pgstat_tracks_io_bktype(bktype))
351 15680 : return false;
352 :
353 : /*
354 : * Currently, IO on temporary relations can only occur in the
355 : * IOCONTEXT_NORMAL IOContext.
356 : */
357 77283851 : if (io_context != IOCONTEXT_NORMAL &&
358 : io_object == IOOBJECT_TEMP_RELATION)
359 1816647 : return false;
360 :
361 : /*
362 : * In core Postgres, only regular backends and WAL Sender processes
363 : * executing queries will use local buffers and operate on temporary
364 : * relations. Parallel workers will not use local buffers (see
365 : * InitLocalBuffers()); however, extensions leveraging background workers
366 : * have no such limitation, so track IO on IOOBJECT_TEMP_RELATION for
367 : * BackendType B_BG_WORKER.
368 : */
369 75453089 : no_temp_rel = bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER ||
370 75176627 : bktype == B_CHECKPOINTER || bktype == B_AUTOVAC_WORKER ||
371 150920293 : bktype == B_STANDALONE_BACKEND || bktype == B_STARTUP;
372 :
373 75467204 : if (no_temp_rel && io_context == IOCONTEXT_NORMAL &&
374 : io_object == IOOBJECT_TEMP_RELATION)
375 416864 : return false;
376 :
377 : /*
378 : * Some BackendTypes do not currently perform any IO in certain
379 : * IOContexts, and, while it may not be inherently incorrect for them to
380 : * do so, excluding those rows from the view makes the view easier to use.
381 : */
382 75050340 : if ((bktype == B_CHECKPOINTER || bktype == B_BG_WRITER) &&
383 236296 : (io_context == IOCONTEXT_BULKREAD ||
384 216213 : io_context == IOCONTEXT_BULKWRITE ||
385 : io_context == IOCONTEXT_VACUUM))
386 60249 : return false;
387 :
388 74990091 : if (bktype == B_AUTOVAC_LAUNCHER && io_context == IOCONTEXT_VACUUM)
389 2590 : return false;
390 :
391 74987501 : if ((bktype == B_AUTOVAC_WORKER || bktype == B_AUTOVAC_LAUNCHER) &&
392 : io_context == IOCONTEXT_BULKWRITE)
393 4781 : return false;
394 :
395 74982720 : return true;
396 : }
397 :
398 : /*
399 : * Some BackendTypes will never do certain IOOps and some IOOps should not
400 : * occur in certain IOContexts or on certain IOObjects. Check that the given
401 : * IOOp is valid for the given BackendType in the given IOContext and on the
402 : * given IOObject. Note that there are currently no cases of an IOOp being
403 : * invalid for a particular BackendType only within a certain IOContext and/or
404 : * only on a certain IOObject.
405 : */
406 : bool
407 77295499 : pgstat_tracks_io_op(BackendType bktype, IOObject io_object,
408 : IOContext io_context, IOOp io_op)
409 : {
410 : bool strategy_io_context;
411 :
412 : /* if (io_context, io_object) will never collect stats, we're done */
413 77295499 : if (!pgstat_tracks_io_object(bktype, io_object, io_context))
414 2314459 : return false;
415 :
416 : /*
417 : * Some BackendTypes will not do certain IOOps.
418 : */
419 74981040 : if ((bktype == B_BG_WRITER || bktype == B_CHECKPOINTER) &&
10 420 193053 : (io_op == IOOP_READ || io_op == IOOP_EVICT || io_op == IOOP_HIT))
60 421 8895 : return false;
422 :
423 74972145 : if ((bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER ||
424 193356 : bktype == B_CHECKPOINTER) && io_op == IOOP_EXTEND)
425 3801 : return false;
426 :
427 : /*
428 : * Some IOOps are not valid in certain IOContexts and some IOOps are only
429 : * valid in certain contexts.
430 : */
431 74968344 : if (io_context == IOCONTEXT_BULKREAD && io_op == IOOP_EXTEND)
432 83556 : return false;
433 :
434 73449906 : strategy_io_context = io_context == IOCONTEXT_BULKREAD ||
435 148334694 : io_context == IOCONTEXT_BULKWRITE || io_context == IOCONTEXT_VACUUM;
436 :
437 : /*
438 : * IOOP_REUSE is only relevant when a BufferAccessStrategy is in use.
439 : */
440 74884788 : if (!strategy_io_context && io_op == IOOP_REUSE)
441 114038 : return false;
442 :
443 : /*
444 : * IOOP_FSYNC IOOps done by a backend using a BufferAccessStrategy are
445 : * counted in the IOCONTEXT_NORMAL IOContext. See comment in
446 : * register_dirty_segment() for more details.
447 : */
448 74770750 : if (strategy_io_context && io_op == IOOP_FSYNC)
449 250725 : return false;
450 :
451 : /*
452 : * Temporary tables are not logged and thus do not require fsync'ing.
453 : */
454 74520025 : if (io_context == IOCONTEXT_NORMAL &&
455 1422076 : io_object == IOOBJECT_TEMP_RELATION && io_op == IOOP_FSYNC)
456 27099 : return false;
457 :
458 74492926 : return true;
459 : }
|