Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * autoprewarm.c
4 : * Periodically dump information about the blocks present in
5 : * shared_buffers, and reload them on server restart.
6 : *
7 : * Due to locking considerations, we can't actually begin prewarming
8 : * until the server reaches a consistent state. We need the catalogs
9 : * to be consistent so that we can figure out which relation to lock,
10 : * and we need to lock the relations so that we don't try to prewarm
11 : * pages from a relation that is in the process of being dropped.
12 : *
13 : * While prewarming, autoprewarm will use two workers. There's a
14 : * leader worker that reads and sorts the list of blocks to be
15 : * prewarmed and then launches a per-database worker for each
16 : * relevant database in turn. The former keeps running after the
17 : * initial prewarm is complete to update the dump file periodically.
18 : *
19 : * Copyright (c) 2016-2023, PostgreSQL Global Development Group
20 : *
21 : * IDENTIFICATION
22 : * contrib/pg_prewarm/autoprewarm.c
23 : *
24 : *-------------------------------------------------------------------------
25 : */
26 :
27 : #include "postgres.h"
28 :
29 : #include <unistd.h>
30 :
31 : #include "access/relation.h"
32 : #include "access/xact.h"
33 : #include "catalog/pg_class.h"
34 : #include "catalog/pg_type.h"
35 : #include "miscadmin.h"
36 : #include "pgstat.h"
37 : #include "postmaster/bgworker.h"
38 : #include "postmaster/interrupt.h"
39 : #include "storage/buf_internals.h"
40 : #include "storage/dsm.h"
41 : #include "storage/fd.h"
42 : #include "storage/ipc.h"
43 : #include "storage/latch.h"
44 : #include "storage/lwlock.h"
45 : #include "storage/proc.h"
46 : #include "storage/procsignal.h"
47 : #include "storage/shmem.h"
48 : #include "storage/smgr.h"
49 : #include "tcop/tcopprot.h"
50 : #include "utils/acl.h"
51 : #include "utils/datetime.h"
52 : #include "utils/guc.h"
53 : #include "utils/memutils.h"
54 : #include "utils/rel.h"
55 : #include "utils/relfilenumbermap.h"
56 : #include "utils/resowner.h"
57 :
58 : #define AUTOPREWARM_FILE "autoprewarm.blocks"
59 :
60 : /* Metadata for each block we dump. */
61 : typedef struct BlockInfoRecord
62 : {
63 : Oid database;
64 : Oid tablespace;
65 : RelFileNumber filenumber;
66 : ForkNumber forknum;
67 : BlockNumber blocknum;
68 : } BlockInfoRecord;
69 :
70 : /* Shared state information for autoprewarm bgworker. */
71 : typedef struct AutoPrewarmSharedState
72 : {
73 : LWLock lock; /* mutual exclusion */
74 : pid_t bgworker_pid; /* for main bgworker */
75 : pid_t pid_using_dumpfile; /* for autoprewarm or block dump */
76 :
77 : /* Following items are for communication with per-database worker */
78 : dsm_handle block_info_handle;
79 : Oid database;
80 : int prewarm_start_idx;
81 : int prewarm_stop_idx;
82 : int prewarmed_blocks;
83 : } AutoPrewarmSharedState;
84 :
85 : PGDLLEXPORT void autoprewarm_main(Datum main_arg);
86 : PGDLLEXPORT void autoprewarm_database_main(Datum main_arg);
2057 rhaas 87 ECB :
2057 rhaas 88 CBC 2 : PG_FUNCTION_INFO_V1(autoprewarm_start_worker);
2057 rhaas 89 GIC 3 : PG_FUNCTION_INFO_V1(autoprewarm_dump_now);
90 :
91 : static void apw_load_buffers(void);
92 : static int apw_dump_now(bool is_bgworker, bool dump_unlogged);
93 : static void apw_start_leader_worker(void);
94 : static void apw_start_database_worker(void);
95 : static bool apw_init_shmem(void);
96 : static void apw_detach_shmem(int code, Datum arg);
97 : static int apw_compare_blockinfo(const void *p, const void *q);
98 : static void autoprewarm_shmem_request(void);
99 : static shmem_request_hook_type prev_shmem_request_hook = NULL;
100 :
101 : /* Pointer to shared-memory state. */
102 : static AutoPrewarmSharedState *apw_state = NULL;
103 :
104 : /* GUC variables. */
105 : static bool autoprewarm = true; /* start worker? */
106 : static int autoprewarm_interval = 300; /* dump interval */
107 :
108 : /*
109 : * Module load callback.
110 : */
2057 rhaas 111 ECB : void
2057 rhaas 112 GIC 5 : _PG_init(void)
2057 rhaas 113 ECB : {
2057 rhaas 114 GIC 5 : DefineCustomIntVariable("pg_prewarm.autoprewarm_interval",
115 : "Sets the interval between dumps of shared buffers",
116 : "If set to zero, time-based dumping is disabled.",
117 : &autoprewarm_interval,
118 : 300,
119 : 0, INT_MAX / 1000,
120 : PGC_SIGHUP,
121 : GUC_UNIT_S,
122 : NULL,
123 : NULL,
124 : NULL);
2057 rhaas 125 ECB :
2057 rhaas 126 CBC 5 : if (!process_shared_preload_libraries_in_progress)
2057 rhaas 127 GIC 3 : return;
128 :
2057 rhaas 129 ECB : /* can't define PGC_POSTMASTER variable after startup */
2057 rhaas 130 GIC 2 : DefineCustomBoolVariable("pg_prewarm.autoprewarm",
131 : "Starts the autoprewarm worker.",
132 : NULL,
133 : &autoprewarm,
134 : true,
135 : PGC_POSTMASTER,
136 : 0,
137 : NULL,
138 : NULL,
139 : NULL);
2057 rhaas 140 ECB :
412 tgl 141 GIC 2 : MarkGUCPrefixReserved("pg_prewarm");
2057 rhaas 142 ECB :
331 rhaas 143 CBC 2 : prev_shmem_request_hook = shmem_request_hook;
331 rhaas 144 GIC 2 : shmem_request_hook = autoprewarm_shmem_request;
145 :
2057 rhaas 146 ECB : /* Register autoprewarm worker, if enabled. */
2057 rhaas 147 CBC 2 : if (autoprewarm)
1029 andres 148 GIC 2 : apw_start_leader_worker();
149 : }
150 :
151 : /*
152 : * Requests any additional shared memory required for autoprewarm.
153 : */
331 rhaas 154 ECB : static void
331 rhaas 155 GIC 2 : autoprewarm_shmem_request(void)
331 rhaas 156 ECB : {
331 rhaas 157 GBC 2 : if (prev_shmem_request_hook)
331 rhaas 158 UIC 0 : prev_shmem_request_hook();
331 rhaas 159 ECB :
331 rhaas 160 CBC 2 : RequestAddinShmemSpace(MAXALIGN(sizeof(AutoPrewarmSharedState)));
331 rhaas 161 GIC 2 : }
162 :
163 : /*
164 : * Main entry point for the leader autoprewarm process. Per-database workers
165 : * have a separate entry point.
166 : */
2057 rhaas 167 ECB : void
2057 rhaas 168 GIC 2 : autoprewarm_main(Datum main_arg)
2057 rhaas 169 ECB : {
2057 rhaas 170 CBC 2 : bool first_time = true;
838 tgl 171 2 : bool final_dump_allowed = true;
2057 rhaas 172 GIC 2 : TimestampTz last_dump_time = 0;
173 :
2057 rhaas 174 ECB : /* Establish signal handlers; once that's done, unblock signals. */
883 fujii 175 CBC 2 : pqsignal(SIGTERM, SignalHandlerForShutdownRequest);
176 2 : pqsignal(SIGHUP, SignalHandlerForConfigReload);
2057 rhaas 177 2 : pqsignal(SIGUSR1, procsignal_sigusr1_handler);
2057 rhaas 178 GIC 2 : BackgroundWorkerUnblockSignals();
179 :
2057 rhaas 180 ECB : /* Create (if necessary) and attach to our shared memory area. */
2057 rhaas 181 GBC 2 : if (apw_init_shmem())
2057 rhaas 182 UIC 0 : first_time = false;
183 :
2057 rhaas 184 ECB : /* Set on-detach hook so that our PID will be cleared on exit. */
2057 rhaas 185 GIC 2 : on_shmem_exit(apw_detach_shmem, 0);
186 :
187 : /*
188 : * Store our PID in the shared memory area --- unless there's already
189 : * another worker running, in which case just exit.
2057 rhaas 190 ECB : */
2057 rhaas 191 CBC 2 : LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
2057 rhaas 192 GIC 2 : if (apw_state->bgworker_pid != InvalidPid)
2057 rhaas 193 EUB : {
2057 rhaas 194 UBC 0 : LWLockRelease(&apw_state->lock);
2057 rhaas 195 UIC 0 : ereport(LOG,
196 : (errmsg("autoprewarm worker is already running under PID %d",
197 : (int) apw_state->bgworker_pid)));
198 0 : return;
2057 rhaas 199 ECB : }
2057 rhaas 200 CBC 2 : apw_state->bgworker_pid = MyProcPid;
2057 rhaas 201 GIC 2 : LWLockRelease(&apw_state->lock);
202 :
203 : /*
204 : * Preload buffers from the dump file only if we just created the shared
205 : * memory region. Otherwise, it's either already been done or shouldn't
206 : * be done - e.g. because the old dump file has been overwritten since the
207 : * server was started.
208 : *
209 : * There's not much point in performing a dump immediately after we finish
210 : * preloading; so, if we do end up preloading, consider the last dump time
211 : * to be equal to the current time.
212 : *
213 : * If apw_load_buffers() is terminated early by a shutdown request,
214 : * prevent dumping out our state below the loop, because we'd effectively
215 : * just truncate the saved state to however much we'd managed to preload.
2057 rhaas 216 ECB : */
2057 rhaas 217 GIC 2 : if (first_time)
2057 rhaas 218 ECB : {
2057 rhaas 219 CBC 2 : apw_load_buffers();
838 tgl 220 2 : final_dump_allowed = !ShutdownRequestPending;
2057 rhaas 221 GIC 2 : last_dump_time = GetCurrentTimestamp();
222 : }
223 :
2057 rhaas 224 ECB : /* Periodically dump buffers until terminated. */
883 fujii 225 GIC 5 : while (!ShutdownRequestPending)
226 : {
2057 rhaas 227 ECB : /* In case of a SIGHUP, just reload the configuration. */
883 fujii 228 GIC 3 : if (ConfigReloadPending)
2057 rhaas 229 EUB : {
883 fujii 230 UBC 0 : ConfigReloadPending = false;
2057 rhaas 231 UIC 0 : ProcessConfigFile(PGC_SIGHUP);
232 : }
2057 rhaas 233 ECB :
2057 rhaas 234 GIC 3 : if (autoprewarm_interval <= 0)
235 : {
2057 rhaas 236 ECB : /* We're only dumping at shutdown, so just wait forever. */
883 fujii 237 GIC 3 : (void) WaitLatch(MyLatch,
238 : WL_LATCH_SET | WL_EXIT_ON_PM_DEATH,
239 : -1L,
240 : PG_WAIT_EXTENSION);
241 : }
242 : else
243 : {
244 : TimestampTz next_dump_time;
245 : long delay_in_ms;
246 :
2057 rhaas 247 EUB : /* Compute the next dump time. */
2057 rhaas 248 UBC 0 : next_dump_time =
2057 rhaas 249 UIC 0 : TimestampTzPlusMilliseconds(last_dump_time,
250 : autoprewarm_interval * 1000);
880 tgl 251 EUB : delay_in_ms =
880 tgl 252 UIC 0 : TimestampDifferenceMilliseconds(GetCurrentTimestamp(),
253 : next_dump_time);
254 :
2057 rhaas 255 EUB : /* Perform a dump if it's time. */
2057 rhaas 256 UIC 0 : if (delay_in_ms <= 0)
2057 rhaas 257 EUB : {
2057 rhaas 258 UBC 0 : last_dump_time = GetCurrentTimestamp();
259 0 : apw_dump_now(true, false);
2057 rhaas 260 UIC 0 : continue;
261 : }
262 :
2057 rhaas 263 EUB : /* Sleep until the next dump time. */
883 fujii 264 UIC 0 : (void) WaitLatch(MyLatch,
265 : WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
266 : delay_in_ms,
267 : PG_WAIT_EXTENSION);
268 : }
269 :
1598 tmunro 270 ECB : /* Reset the latch, loop. */
883 fujii 271 GIC 3 : ResetLatch(MyLatch);
272 : }
273 :
274 : /*
275 : * Dump one last time. We assume this is probably the result of a system
276 : * shutdown, although it's possible that we've merely been terminated.
2057 rhaas 277 ECB : */
838 tgl 278 CBC 2 : if (final_dump_allowed)
838 tgl 279 GIC 2 : apw_dump_now(true, true);
280 : }
281 :
282 : /*
283 : * Read the dump file and launch per-database workers one at a time to
284 : * prewarm the buffers found there.
285 : */
2057 rhaas 286 ECB : static void
2057 rhaas 287 GIC 2 : apw_load_buffers(void)
2057 rhaas 288 ECB : {
2057 rhaas 289 GIC 2 : FILE *file = NULL;
290 : int num_elements,
291 : i;
292 : BlockInfoRecord *blkinfo;
293 : dsm_segment *seg;
294 :
295 : /*
296 : * Skip the prewarm if the dump file is in use; otherwise, prevent any
297 : * other process from writing it while we're using it.
2057 rhaas 298 ECB : */
2057 rhaas 299 CBC 2 : LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
300 2 : if (apw_state->pid_using_dumpfile == InvalidPid)
2057 rhaas 301 GIC 2 : apw_state->pid_using_dumpfile = MyProcPid;
302 : else
2057 rhaas 303 EUB : {
2057 rhaas 304 UBC 0 : LWLockRelease(&apw_state->lock);
2057 rhaas 305 UIC 0 : ereport(LOG,
306 : (errmsg("skipping prewarm because block dump file is being written by PID %d",
307 : (int) apw_state->pid_using_dumpfile)));
2057 rhaas 308 GIC 1 : return;
2057 rhaas 309 ECB : }
2057 rhaas 310 GIC 2 : LWLockRelease(&apw_state->lock);
311 :
312 : /*
313 : * Open the block dump file. Exit quietly if it doesn't exist, but report
314 : * any other error.
2057 rhaas 315 ECB : */
2057 rhaas 316 CBC 2 : file = AllocateFile(AUTOPREWARM_FILE, "r");
2057 rhaas 317 GIC 2 : if (!file)
2057 rhaas 318 ECB : {
2057 rhaas 319 GIC 1 : if (errno == ENOENT)
2057 rhaas 320 ECB : {
2057 rhaas 321 CBC 1 : LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
322 1 : apw_state->pid_using_dumpfile = InvalidPid;
323 1 : LWLockRelease(&apw_state->lock);
2057 rhaas 324 GIC 1 : return; /* No file to load. */
2057 rhaas 325 EUB : }
2057 rhaas 326 UIC 0 : ereport(ERROR,
327 : (errcode_for_file_access(),
328 : errmsg("could not read file \"%s\": %m",
329 : AUTOPREWARM_FILE)));
330 : }
331 :
2057 rhaas 332 ECB : /* First line of the file is a record count. */
1802 tgl 333 GBC 1 : if (fscanf(file, "<<%d>>\n", &num_elements) != 1)
2057 rhaas 334 UIC 0 : ereport(ERROR,
335 : (errcode_for_file_access(),
336 : errmsg("could not read from file \"%s\": %m",
337 : AUTOPREWARM_FILE)));
338 :
2057 rhaas 339 ECB : /* Allocate a dynamic shared memory segment to store the record data. */
2057 rhaas 340 CBC 1 : seg = dsm_create(sizeof(BlockInfoRecord) * num_elements, 0);
2057 rhaas 341 GIC 1 : blkinfo = (BlockInfoRecord *) dsm_segment_address(seg);
342 :
2057 rhaas 343 ECB : /* Read records, one per line. */
2057 rhaas 344 GIC 177 : for (i = 0; i < num_elements; i++)
345 : {
346 : unsigned forknum;
2057 rhaas 347 ECB :
193 rhaas 348 CBC 176 : if (fscanf(file, "%u,%u,%u,%u,%u\n", &blkinfo[i].database,
277 rhaas 349 GNC 176 : &blkinfo[i].tablespace, &blkinfo[i].filenumber,
2057 rhaas 350 GBC 176 : &forknum, &blkinfo[i].blocknum) != 5)
2057 rhaas 351 UIC 0 : ereport(ERROR,
352 : (errmsg("autoprewarm block dump file is corrupted at line %d",
2057 rhaas 353 ECB : i + 1)));
2057 rhaas 354 GIC 176 : blkinfo[i].forknum = forknum;
355 : }
2057 rhaas 356 ECB :
2057 rhaas 357 GIC 1 : FreeFile(file);
358 :
2057 rhaas 359 ECB : /* Sort the blocks to be loaded. */
2057 rhaas 360 GIC 1 : pg_qsort(blkinfo, num_elements, sizeof(BlockInfoRecord),
361 : apw_compare_blockinfo);
362 :
2057 rhaas 363 ECB : /* Populate shared memory state. */
2057 rhaas 364 CBC 1 : apw_state->block_info_handle = dsm_segment_handle(seg);
365 1 : apw_state->prewarm_start_idx = apw_state->prewarm_stop_idx = 0;
2057 rhaas 366 GIC 1 : apw_state->prewarmed_blocks = 0;
367 :
2057 rhaas 368 ECB : /* Get the info position of the first block of the next database. */
2057 rhaas 369 GIC 2 : while (apw_state->prewarm_start_idx < num_elements)
2057 rhaas 370 ECB : {
1802 tgl 371 CBC 1 : int j = apw_state->prewarm_start_idx;
1802 tgl 372 GIC 1 : Oid current_db = blkinfo[j].database;
373 :
374 : /*
375 : * Advance the prewarm_stop_idx to the first BlockInfoRecord that does
376 : * not belong to this database.
2057 rhaas 377 ECB : */
1802 tgl 378 CBC 1 : j++;
1802 tgl 379 GIC 176 : while (j < num_elements)
2057 rhaas 380 ECB : {
1802 tgl 381 GIC 175 : if (current_db != blkinfo[j].database)
382 : {
383 : /*
384 : * Combine BlockInfoRecords for global objects with those of
385 : * the database.
2057 rhaas 386 ECB : */
2057 rhaas 387 GBC 1 : if (current_db != InvalidOid)
2057 rhaas 388 LBC 0 : break;
1802 tgl 389 GIC 1 : current_db = blkinfo[j].database;
390 : }
2057 rhaas 391 ECB :
1802 tgl 392 GIC 175 : j++;
393 : }
394 :
395 : /*
396 : * If we reach this point with current_db == InvalidOid, then only
397 : * BlockInfoRecords belonging to global objects exist. We can't
398 : * prewarm without a database connection, so just bail out.
2057 rhaas 399 ECB : */
2057 rhaas 400 GBC 1 : if (current_db == InvalidOid)
2057 rhaas 401 UIC 0 : break;
402 :
2057 rhaas 403 ECB : /* Configure stop point and database for next per-database worker. */
1802 tgl 404 CBC 1 : apw_state->prewarm_stop_idx = j;
2057 rhaas 405 1 : apw_state->database = current_db;
2057 rhaas 406 GIC 1 : Assert(apw_state->prewarm_start_idx < apw_state->prewarm_stop_idx);
407 :
2057 rhaas 408 ECB : /* If we've run out of free buffers, don't launch another worker. */
2057 rhaas 409 GBC 1 : if (!have_free_buffer())
2057 rhaas 410 UIC 0 : break;
411 :
412 : /*
413 : * Likewise, don't launch if we've already been told to shut down.
414 : * (The launch would fail anyway, but we might as well skip it.)
838 tgl 415 ECB : */
838 tgl 416 GBC 1 : if (ShutdownRequestPending)
838 tgl 417 UIC 0 : break;
418 :
419 : /*
420 : * Start a per-database worker to load blocks for this database; this
421 : * function will return once the per-database worker exits.
2057 rhaas 422 ECB : */
2057 rhaas 423 GIC 1 : apw_start_database_worker();
424 :
2057 rhaas 425 ECB : /* Prepare for next database. */
2057 rhaas 426 GIC 1 : apw_state->prewarm_start_idx = apw_state->prewarm_stop_idx;
427 : }
428 :
2057 rhaas 429 ECB : /* Clean up. */
2057 rhaas 430 CBC 1 : dsm_detach(seg);
431 1 : LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
432 1 : apw_state->block_info_handle = DSM_HANDLE_INVALID;
433 1 : apw_state->pid_using_dumpfile = InvalidPid;
2057 rhaas 434 GIC 1 : LWLockRelease(&apw_state->lock);
435 :
838 tgl 436 ECB : /* Report our success, if we were able to finish. */
838 tgl 437 CBC 1 : if (!ShutdownRequestPending)
838 tgl 438 GIC 1 : ereport(LOG,
439 : (errmsg("autoprewarm successfully prewarmed %d of %d previously-loaded blocks",
440 : apw_state->prewarmed_blocks, num_elements)));
441 : }
442 :
443 : /*
444 : * Prewarm all blocks for one database (and possibly also global objects, if
445 : * those got grouped with this database).
446 : */
2057 rhaas 447 ECB : void
2057 rhaas 448 GIC 1 : autoprewarm_database_main(Datum main_arg)
449 : {
450 : int pos;
2057 rhaas 451 ECB : BlockInfoRecord *block_info;
2057 rhaas 452 CBC 1 : Relation rel = NULL;
453 1 : BlockNumber nblocks = 0;
2057 rhaas 454 GIC 1 : BlockInfoRecord *old_blk = NULL;
455 : dsm_segment *seg;
456 :
2057 rhaas 457 ECB : /* Establish signal handlers; once that's done, unblock signals. */
2057 rhaas 458 CBC 1 : pqsignal(SIGTERM, die);
2057 rhaas 459 GIC 1 : BackgroundWorkerUnblockSignals();
460 :
2057 rhaas 461 ECB : /* Connect to correct database and get block information. */
2057 rhaas 462 CBC 1 : apw_init_shmem();
463 1 : seg = dsm_attach(apw_state->block_info_handle);
2057 rhaas 464 GBC 1 : if (seg == NULL)
2057 rhaas 465 UIC 0 : ereport(ERROR,
466 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2057 rhaas 467 ECB : errmsg("could not map dynamic shared memory segment")));
1830 magnus 468 CBC 1 : BackgroundWorkerInitializeConnectionByOid(apw_state->database, InvalidOid, 0);
2057 rhaas 469 1 : block_info = (BlockInfoRecord *) dsm_segment_address(seg);
2057 rhaas 470 GIC 1 : pos = apw_state->prewarm_start_idx;
471 :
472 : /*
473 : * Loop until we run out of blocks to prewarm or until we run out of free
474 : * buffers.
2057 rhaas 475 ECB : */
2057 rhaas 476 GIC 177 : while (pos < apw_state->prewarm_stop_idx && have_free_buffer())
2057 rhaas 477 ECB : {
2057 rhaas 478 GIC 176 : BlockInfoRecord *blk = &block_info[pos++];
479 : Buffer buf;
2057 rhaas 480 ECB :
2057 rhaas 481 GIC 176 : CHECK_FOR_INTERRUPTS();
482 :
483 : /*
484 : * Quit if we've reached records for another database. If previous
485 : * blocks are of some global objects, then continue pre-warming.
2057 rhaas 486 ECB : */
2057 rhaas 487 CBC 176 : if (old_blk != NULL && old_blk->database != blk->database &&
2057 rhaas 488 GBC 1 : old_blk->database != 0)
2057 rhaas 489 UIC 0 : break;
490 :
491 : /*
492 : * As soon as we encounter a block of a new relation, close the old
493 : * relation. Note that rel will be NULL if try_relation_open failed
494 : * previously; in that case, there is nothing to close.
2057 rhaas 495 ECB : */
277 rhaas 496 GNC 176 : if (old_blk != NULL && old_blk->filenumber != blk->filenumber &&
497 : rel != NULL)
2057 rhaas 498 ECB : {
2057 rhaas 499 CBC 45 : relation_close(rel, AccessShareLock);
500 45 : rel = NULL;
2057 rhaas 501 GIC 45 : CommitTransactionCommand();
502 : }
503 :
504 : /*
505 : * Try to open each new relation, but only once, when we first
506 : * encounter it. If it's been dropped, skip the associated blocks.
2057 rhaas 507 ECB : */
277 rhaas 508 GNC 176 : if (old_blk == NULL || old_blk->filenumber != blk->filenumber)
509 : {
510 : Oid reloid;
2057 rhaas 511 ECB :
2057 rhaas 512 CBC 46 : Assert(rel == NULL);
513 46 : StartTransactionCommand();
277 rhaas 514 GNC 46 : reloid = RelidByRelfilenumber(blk->tablespace, blk->filenumber);
2057 rhaas 515 CBC 46 : if (OidIsValid(reloid))
2057 rhaas 516 GIC 46 : rel = try_relation_open(reloid, AccessShareLock);
2057 rhaas 517 ECB :
2057 rhaas 518 GBC 46 : if (!rel)
2057 rhaas 519 UIC 0 : CommitTransactionCommand();
2057 rhaas 520 ECB : }
2057 rhaas 521 GIC 176 : if (!rel)
2057 rhaas 522 EUB : {
2057 rhaas 523 UBC 0 : old_blk = blk;
2057 rhaas 524 UIC 0 : continue;
525 : }
526 :
2057 rhaas 527 ECB : /* Once per fork, check for fork existence and size. */
2057 rhaas 528 CBC 176 : if (old_blk == NULL ||
277 rhaas 529 GNC 175 : old_blk->filenumber != blk->filenumber ||
2057 rhaas 530 GIC 130 : old_blk->forknum != blk->forknum)
531 : {
532 : /*
533 : * smgrexists is not safe for illegal forknum, hence check whether
534 : * the passed forknum is valid before using it in smgrexists.
2057 rhaas 535 ECB : */
2057 rhaas 536 CBC 60 : if (blk->forknum > InvalidForkNumber &&
537 120 : blk->forknum <= MAX_FORKNUM &&
636 tgl 538 60 : smgrexists(RelationGetSmgr(rel), blk->forknum))
2057 rhaas 539 GIC 60 : nblocks = RelationGetNumberOfBlocksInFork(rel, blk->forknum);
2057 rhaas 540 EUB : else
2057 rhaas 541 UIC 0 : nblocks = 0;
542 : }
543 :
2057 rhaas 544 ECB : /* Check whether blocknum is valid and within fork file size. */
2057 rhaas 545 GIC 176 : if (blk->blocknum >= nblocks)
546 : {
2057 rhaas 547 EUB : /* Move to next forknum. */
2057 rhaas 548 UBC 0 : old_blk = blk;
2057 rhaas 549 UIC 0 : continue;
550 : }
551 :
2057 rhaas 552 ECB : /* Prewarm buffer. */
2057 rhaas 553 GIC 176 : buf = ReadBufferExtended(rel, blk->forknum, blk->blocknum, RBM_NORMAL,
2057 rhaas 554 ECB : NULL);
2057 rhaas 555 GIC 176 : if (BufferIsValid(buf))
2057 rhaas 556 ECB : {
2057 rhaas 557 CBC 176 : apw_state->prewarmed_blocks++;
2057 rhaas 558 GIC 176 : ReleaseBuffer(buf);
559 : }
2057 rhaas 560 ECB :
2057 rhaas 561 GIC 176 : old_blk = blk;
562 : }
2057 rhaas 563 ECB :
2057 rhaas 564 GIC 1 : dsm_detach(seg);
565 :
2057 rhaas 566 ECB : /* Release lock on previous relation. */
2057 rhaas 567 GIC 1 : if (rel)
2057 rhaas 568 ECB : {
2057 rhaas 569 CBC 1 : relation_close(rel, AccessShareLock);
2057 rhaas 570 GIC 1 : CommitTransactionCommand();
2057 rhaas 571 ECB : }
2057 rhaas 572 GIC 1 : }
573 :
574 : /*
575 : * Dump information on blocks in shared buffers. We use a text format here
576 : * so that it's easy to understand and even change the file contents if
577 : * necessary.
578 : * Returns the number of blocks dumped.
579 : */
1802 tgl 580 ECB : static int
2057 rhaas 581 GIC 3 : apw_dump_now(bool is_bgworker, bool dump_unlogged)
582 : {
583 : int num_blocks;
584 : int i;
585 : int ret;
586 : BlockInfoRecord *block_info_array;
587 : BufferDesc *bufHdr;
588 : FILE *file;
589 : char transient_dump_file_path[MAXPGPATH];
590 : pid_t pid;
2057 rhaas 591 ECB :
2057 rhaas 592 CBC 3 : LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
593 3 : pid = apw_state->pid_using_dumpfile;
594 3 : if (apw_state->pid_using_dumpfile == InvalidPid)
595 3 : apw_state->pid_using_dumpfile = MyProcPid;
2057 rhaas 596 GIC 3 : LWLockRelease(&apw_state->lock);
2057 rhaas 597 ECB :
2057 rhaas 598 GIC 3 : if (pid != InvalidPid)
2057 rhaas 599 EUB : {
2057 rhaas 600 UBC 0 : if (!is_bgworker)
2057 rhaas 601 UIC 0 : ereport(ERROR,
602 : (errmsg("could not perform block dump because dump file is being used by PID %d",
603 : (int) apw_state->pid_using_dumpfile)));
2057 rhaas 604 EUB :
2057 rhaas 605 UIC 0 : ereport(LOG,
606 : (errmsg("skipping block dump because it is already being performed by PID %d",
607 : (int) apw_state->pid_using_dumpfile)));
608 0 : return 0;
609 : }
610 :
2057 rhaas 611 ECB : block_info_array =
2057 rhaas 612 GIC 3 : (BlockInfoRecord *) palloc(sizeof(BlockInfoRecord) * NBuffers);
2057 rhaas 613 ECB :
2057 rhaas 614 GIC 49155 : for (num_blocks = 0, i = 0; i < NBuffers; i++)
615 : {
616 : uint32 buf_state;
2057 rhaas 617 ECB :
2057 rhaas 618 GIC 49152 : CHECK_FOR_INTERRUPTS();
2057 rhaas 619 ECB :
2057 rhaas 620 GIC 49152 : bufHdr = GetBufferDescriptor(i);
621 :
2057 rhaas 622 ECB : /* Lock each buffer header before inspecting. */
2057 rhaas 623 GIC 49152 : buf_state = LockBufHdr(bufHdr);
624 :
625 : /*
626 : * Unlogged tables will be automatically truncated after a crash or
627 : * unclean shutdown. In such cases we need not prewarm them. Dump them
628 : * only if requested by caller.
2057 rhaas 629 ECB : */
2057 rhaas 630 CBC 49152 : if (buf_state & BM_TAG_VALID &&
2057 rhaas 631 GIC 528 : ((buf_state & BM_PERMANENT) || dump_unlogged))
2057 rhaas 632 ECB : {
228 rhaas 633 GNC 528 : block_info_array[num_blocks].database = bufHdr->tag.dbOid;
634 528 : block_info_array[num_blocks].tablespace = bufHdr->tag.spcOid;
635 1056 : block_info_array[num_blocks].filenumber =
636 528 : BufTagGetRelNumber(&bufHdr->tag);
637 1056 : block_info_array[num_blocks].forknum =
638 528 : BufTagGetForkNum(&bufHdr->tag);
2057 rhaas 639 CBC 528 : block_info_array[num_blocks].blocknum = bufHdr->tag.blockNum;
640 528 : ++num_blocks;
2057 rhaas 641 ECB : }
642 :
2057 rhaas 643 GIC 49152 : UnlockBufHdr(bufHdr, buf_state);
2057 rhaas 644 ECB : }
645 :
2057 rhaas 646 GIC 3 : snprintf(transient_dump_file_path, MAXPGPATH, "%s.tmp", AUTOPREWARM_FILE);
2057 rhaas 647 CBC 3 : file = AllocateFile(transient_dump_file_path, "w");
648 3 : if (!file)
2057 rhaas 649 LBC 0 : ereport(ERROR,
2057 rhaas 650 EUB : (errcode_for_file_access(),
651 : errmsg("could not open file \"%s\": %m",
652 : transient_dump_file_path)));
653 :
1802 tgl 654 GIC 3 : ret = fprintf(file, "<<%d>>\n", num_blocks);
2057 rhaas 655 CBC 3 : if (ret < 0)
2057 rhaas 656 ECB : {
2057 rhaas 657 UIC 0 : int save_errno = errno;
2057 rhaas 658 EUB :
2057 rhaas 659 UIC 0 : FreeFile(file);
2057 rhaas 660 UBC 0 : unlink(transient_dump_file_path);
661 0 : errno = save_errno;
662 0 : ereport(ERROR,
2057 rhaas 663 EUB : (errcode_for_file_access(),
664 : errmsg("could not write to file \"%s\": %m",
665 : transient_dump_file_path)));
666 : }
667 :
2057 rhaas 668 GIC 531 : for (i = 0; i < num_blocks; i++)
2057 rhaas 669 ECB : {
2057 rhaas 670 GIC 528 : CHECK_FOR_INTERRUPTS();
2057 rhaas 671 ECB :
193 rhaas 672 GIC 528 : ret = fprintf(file, "%u,%u,%u,%u,%u\n",
2057 rhaas 673 CBC 528 : block_info_array[i].database,
674 528 : block_info_array[i].tablespace,
277 rhaas 675 GNC 528 : block_info_array[i].filenumber,
2057 rhaas 676 CBC 528 : (uint32) block_info_array[i].forknum,
677 528 : block_info_array[i].blocknum);
678 528 : if (ret < 0)
2057 rhaas 679 ECB : {
2057 rhaas 680 UIC 0 : int save_errno = errno;
2057 rhaas 681 EUB :
2057 rhaas 682 UIC 0 : FreeFile(file);
2057 rhaas 683 UBC 0 : unlink(transient_dump_file_path);
684 0 : errno = save_errno;
685 0 : ereport(ERROR,
2057 rhaas 686 EUB : (errcode_for_file_access(),
687 : errmsg("could not write to file \"%s\": %m",
688 : transient_dump_file_path)));
689 : }
690 : }
691 :
2057 rhaas 692 GIC 3 : pfree(block_info_array);
2057 rhaas 693 ECB :
694 : /*
695 : * Rename transient_dump_file_path to AUTOPREWARM_FILE to make things
696 : * permanent.
697 : */
2057 rhaas 698 GIC 3 : ret = FreeFile(file);
2057 rhaas 699 CBC 3 : if (ret != 0)
2057 rhaas 700 ECB : {
2057 rhaas 701 UIC 0 : int save_errno = errno;
2057 rhaas 702 EUB :
2057 rhaas 703 UIC 0 : unlink(transient_dump_file_path);
2057 rhaas 704 UBC 0 : errno = save_errno;
705 0 : ereport(ERROR,
2057 rhaas 706 EUB : (errcode_for_file_access(),
707 : errmsg("could not close file \"%s\": %m",
708 : transient_dump_file_path)));
709 : }
710 :
2057 rhaas 711 GIC 3 : (void) durable_rename(transient_dump_file_path, AUTOPREWARM_FILE, ERROR);
2057 rhaas 712 CBC 3 : apw_state->pid_using_dumpfile = InvalidPid;
2057 rhaas 713 ECB :
2057 rhaas 714 GIC 3 : ereport(DEBUG1,
781 peter 715 ECB : (errmsg_internal("wrote block details for %d blocks", num_blocks)));
2057 rhaas 716 GIC 3 : return num_blocks;
2057 rhaas 717 ECB : }
718 :
719 : /*
720 : * SQL-callable function to launch autoprewarm.
721 : */
722 : Datum
2057 rhaas 723 UIC 0 : autoprewarm_start_worker(PG_FUNCTION_ARGS)
2057 rhaas 724 EUB : {
725 : pid_t pid;
726 :
2057 rhaas 727 UIC 0 : if (!autoprewarm)
2057 rhaas 728 UBC 0 : ereport(ERROR,
2057 rhaas 729 EUB : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
730 : errmsg("autoprewarm is disabled")));
731 :
2057 rhaas 732 UIC 0 : apw_init_shmem();
2057 rhaas 733 UBC 0 : LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
734 0 : pid = apw_state->bgworker_pid;
735 0 : LWLockRelease(&apw_state->lock);
2057 rhaas 736 EUB :
2057 rhaas 737 UIC 0 : if (pid != InvalidPid)
2057 rhaas 738 UBC 0 : ereport(ERROR,
2057 rhaas 739 EUB : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
740 : errmsg("autoprewarm worker is already running under PID %d",
741 : (int) pid)));
742 :
1029 andres 743 UIC 0 : apw_start_leader_worker();
2057 rhaas 744 EUB :
2057 rhaas 745 UIC 0 : PG_RETURN_VOID();
2057 rhaas 746 EUB : }
747 :
748 : /*
749 : * SQL-callable function to perform an immediate block dump.
750 : *
751 : * Note: this is declared to return int8, as insurance against some
752 : * very distant day when we might make NBuffers wider than int.
753 : */
754 : Datum
2057 rhaas 755 GIC 1 : autoprewarm_dump_now(PG_FUNCTION_ARGS)
2057 rhaas 756 ECB : {
757 : int num_blocks;
758 :
2057 rhaas 759 GIC 1 : apw_init_shmem();
2057 rhaas 760 ECB :
2057 rhaas 761 GIC 1 : PG_ENSURE_ERROR_CLEANUP(apw_detach_shmem, 0);
2057 rhaas 762 ECB : {
2057 rhaas 763 GIC 1 : num_blocks = apw_dump_now(false, true);
2057 rhaas 764 ECB : }
2057 rhaas 765 GIC 1 : PG_END_ENSURE_ERROR_CLEANUP(apw_detach_shmem, 0);
2057 rhaas 766 ECB :
1802 tgl 767 GIC 1 : PG_RETURN_INT64((int64) num_blocks);
2057 rhaas 768 ECB : }
769 :
770 : /*
771 : * Allocate and initialize autoprewarm related shared memory, if not already
772 : * done, and set up backend-local pointer to that state. Returns true if an
773 : * existing shared memory segment was found.
774 : */
775 : static bool
2057 rhaas 776 GIC 4 : apw_init_shmem(void)
2057 rhaas 777 ECB : {
778 : bool found;
779 :
2057 rhaas 780 GIC 4 : LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
2057 rhaas 781 CBC 4 : apw_state = ShmemInitStruct("autoprewarm",
2057 rhaas 782 ECB : sizeof(AutoPrewarmSharedState),
783 : &found);
2057 rhaas 784 GIC 4 : if (!found)
2057 rhaas 785 ECB : {
786 : /* First time through ... */
2057 rhaas 787 GIC 2 : LWLockInitialize(&apw_state->lock, LWLockNewTrancheId());
2057 rhaas 788 CBC 2 : apw_state->bgworker_pid = InvalidPid;
789 2 : apw_state->pid_using_dumpfile = InvalidPid;
2057 rhaas 790 ECB : }
2057 rhaas 791 GIC 4 : LWLockRelease(AddinShmemInitLock);
2057 rhaas 792 ECB :
1894 rhaas 793 GIC 4 : LWLockRegisterTranche(apw_state->lock.tranche, "autoprewarm");
1894 rhaas 794 ECB :
2057 rhaas 795 GIC 4 : return found;
2057 rhaas 796 ECB : }
797 :
798 : /*
799 : * Clear our PID from autoprewarm shared state.
800 : */
801 : static void
2057 rhaas 802 GIC 2 : apw_detach_shmem(int code, Datum arg)
2057 rhaas 803 ECB : {
2057 rhaas 804 GIC 2 : LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
2057 rhaas 805 CBC 2 : if (apw_state->pid_using_dumpfile == MyProcPid)
2057 rhaas 806 LBC 0 : apw_state->pid_using_dumpfile = InvalidPid;
2057 rhaas 807 GBC 2 : if (apw_state->bgworker_pid == MyProcPid)
2057 rhaas 808 CBC 2 : apw_state->bgworker_pid = InvalidPid;
809 2 : LWLockRelease(&apw_state->lock);
810 2 : }
2057 rhaas 811 ECB :
812 : /*
813 : * Start autoprewarm leader worker process.
814 : */
815 : static void
1029 andres 816 GIC 2 : apw_start_leader_worker(void)
2057 rhaas 817 ECB : {
818 : BackgroundWorker worker;
819 : BackgroundWorkerHandle *handle;
820 : BgwHandleStatus status;
821 : pid_t pid;
822 :
267 peter 823 GIC 138 : MemSet(&worker, 0, sizeof(BackgroundWorker));
2057 rhaas 824 CBC 2 : worker.bgw_flags = BGWORKER_SHMEM_ACCESS;
825 2 : worker.bgw_start_time = BgWorkerStart_ConsistentState;
826 2 : strcpy(worker.bgw_library_name, "pg_prewarm");
827 2 : strcpy(worker.bgw_function_name, "autoprewarm_main");
1029 andres 828 2 : strcpy(worker.bgw_name, "autoprewarm leader");
829 2 : strcpy(worker.bgw_type, "autoprewarm leader");
2057 rhaas 830 ECB :
2057 rhaas 831 GIC 2 : if (process_shared_preload_libraries_in_progress)
2057 rhaas 832 ECB : {
2057 rhaas 833 GIC 2 : RegisterBackgroundWorker(&worker);
2057 rhaas 834 CBC 2 : return;
2057 rhaas 835 ECB : }
836 :
837 : /* must set notify PID to wait for startup */
2057 rhaas 838 UIC 0 : worker.bgw_notify_pid = MyProcPid;
2057 rhaas 839 EUB :
2057 rhaas 840 UIC 0 : if (!RegisterDynamicBackgroundWorker(&worker, &handle))
2057 rhaas 841 UBC 0 : ereport(ERROR,
2057 rhaas 842 EUB : (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
843 : errmsg("could not register background process"),
844 : errhint("You may need to increase max_worker_processes.")));
845 :
2057 rhaas 846 UIC 0 : status = WaitForBackgroundWorkerStartup(handle, &pid);
2057 rhaas 847 UBC 0 : if (status != BGWH_STARTED)
848 0 : ereport(ERROR,
2057 rhaas 849 EUB : (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
850 : errmsg("could not start background process"),
851 : errhint("More details may be available in the server log.")));
852 : }
853 :
854 : /*
855 : * Start autoprewarm per-database worker process.
856 : */
857 : static void
2057 rhaas 858 GIC 1 : apw_start_database_worker(void)
2057 rhaas 859 ECB : {
860 : BackgroundWorker worker;
861 : BackgroundWorkerHandle *handle;
862 :
267 peter 863 GIC 69 : MemSet(&worker, 0, sizeof(BackgroundWorker));
2057 rhaas 864 CBC 1 : worker.bgw_flags =
2057 rhaas 865 ECB : BGWORKER_SHMEM_ACCESS | BGWORKER_BACKEND_DATABASE_CONNECTION;
2057 rhaas 866 GIC 1 : worker.bgw_start_time = BgWorkerStart_ConsistentState;
1483 rhaas 867 CBC 1 : worker.bgw_restart_time = BGW_NEVER_RESTART;
2057 868 1 : strcpy(worker.bgw_library_name, "pg_prewarm");
869 1 : strcpy(worker.bgw_function_name, "autoprewarm_database_main");
2047 peter_e 870 1 : strcpy(worker.bgw_name, "autoprewarm worker");
871 1 : strcpy(worker.bgw_type, "autoprewarm worker");
2057 rhaas 872 ECB :
873 : /* must set notify PID to wait for shutdown */
2057 rhaas 874 GIC 1 : worker.bgw_notify_pid = MyProcPid;
2057 rhaas 875 ECB :
2057 rhaas 876 GIC 1 : if (!RegisterDynamicBackgroundWorker(&worker, &handle))
2057 rhaas 877 LBC 0 : ereport(ERROR,
2057 rhaas 878 EUB : (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
879 : errmsg("registering dynamic bgworker autoprewarm failed"),
880 : errhint("Consider increasing configuration parameter \"max_worker_processes\".")));
881 :
882 : /*
883 : * Ignore return value; if it fails, postmaster has died, but we have
884 : * checks for that elsewhere.
885 : */
2057 rhaas 886 GIC 1 : WaitForBackgroundWorkerShutdown(handle);
2057 rhaas 887 CBC 1 : }
2057 rhaas 888 ECB :
889 : /* Compare member elements to check whether they are not equal. */
890 : #define cmp_member_elem(fld) \
891 : do { \
892 : if (a->fld < b->fld) \
893 : return -1; \
894 : else if (a->fld > b->fld) \
895 : return 1; \
896 : } while(0)
897 :
898 : /*
899 : * apw_compare_blockinfo
900 : *
901 : * We depend on all records for a particular database being consecutive
902 : * in the dump file; each per-database worker will preload blocks until
903 : * it sees a block for some other database. Sorting by tablespace,
904 : * filenumber, forknum, and blocknum isn't critical for correctness, but
905 : * helps us get a sequential I/O pattern.
906 : */
907 : static int
2057 rhaas 908 GIC 1444 : apw_compare_blockinfo(const void *p, const void *q)
2057 rhaas 909 ECB : {
1802 tgl 910 GIC 1444 : const BlockInfoRecord *a = (const BlockInfoRecord *) p;
1802 tgl 911 CBC 1444 : const BlockInfoRecord *b = (const BlockInfoRecord *) q;
2057 rhaas 912 ECB :
2057 rhaas 913 GIC 1444 : cmp_member_elem(database);
2057 rhaas 914 CBC 1375 : cmp_member_elem(tablespace);
277 rhaas 915 GNC 1375 : cmp_member_elem(filenumber);
2057 rhaas 916 CBC 417 : cmp_member_elem(forknum);
917 363 : cmp_member_elem(blocknum);
2057 rhaas 918 ECB :
2057 rhaas 919 UIC 0 : return 0;
2057 rhaas 920 EUB : }
|