Age Owner TLA Line data Source code
1 : /*
2 : * file.c
3 : *
4 : * file system operations
5 : *
6 : * Copyright (c) 2010-2023, PostgreSQL Global Development Group
7 : * src/bin/pg_upgrade/file.c
8 : */
9 :
10 : #include "postgres_fe.h"
11 :
12 : #include <sys/stat.h>
13 : #include <fcntl.h>
14 : #ifdef HAVE_COPYFILE_H
15 : #include <copyfile.h>
16 : #endif
17 : #ifdef __linux__
18 : #include <sys/ioctl.h>
19 : #include <linux/fs.h>
20 : #endif
21 :
22 : #include "access/visibilitymapdefs.h"
23 : #include "common/file_perm.h"
24 : #include "pg_upgrade.h"
25 : #include "storage/bufpage.h"
26 : #include "storage/checksum.h"
27 : #include "storage/checksum_impl.h"
28 :
29 :
30 : /*
31 : * cloneFile()
32 : *
33 : * Clones/reflinks a relation file from src to dst.
34 : *
35 : * schemaName/relName are relation's SQL name (used for error messages only).
36 : */
37 : void
1614 peter_e 38 UBC 0 : cloneFile(const char *src, const char *dst,
39 : const char *schemaName, const char *relName)
40 : {
41 : #if defined(HAVE_COPYFILE) && defined(COPYFILE_CLONE_FORCE)
42 : if (copyfile(src, dst, NULL, COPYFILE_CLONE_FORCE) < 0)
43 : pg_fatal("error while cloning relation \"%s.%s\" (\"%s\" to \"%s\"): %s",
44 : schemaName, relName, src, dst, strerror(errno));
45 : #elif defined(__linux__) && defined(FICLONE)
46 : int src_fd;
47 : int dest_fd;
48 :
49 0 : if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0)
271 tgl 50 UNC 0 : pg_fatal("error while cloning relation \"%s.%s\": could not open file \"%s\": %s",
1614 peter_e 51 UBC 0 : schemaName, relName, src, strerror(errno));
52 :
53 0 : if ((dest_fd = open(dst, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
54 : pg_file_create_mode)) < 0)
271 tgl 55 UNC 0 : pg_fatal("error while cloning relation \"%s.%s\": could not create file \"%s\": %s",
1614 peter_e 56 UBC 0 : schemaName, relName, dst, strerror(errno));
57 :
58 0 : if (ioctl(dest_fd, FICLONE, src_fd) < 0)
59 : {
251 michael 60 0 : int save_errno = errno;
61 :
1614 peter_e 62 0 : unlink(dst);
63 :
271 tgl 64 UNC 0 : pg_fatal("error while cloning relation \"%s.%s\" (\"%s\" to \"%s\"): %s",
251 michael 65 EUB : schemaName, relName, src, dst, strerror(save_errno));
66 : }
67 :
1614 peter_e 68 UIC 0 : close(src_fd);
1614 peter_e 69 UBC 0 : close(dest_fd);
1614 peter_e 70 EUB : #endif
1614 peter_e 71 UIC 0 : }
1614 peter_e 72 EUB :
73 :
74 : /*
75 : * copyFile()
76 : *
77 : * Copies a relation file from src to dst.
78 : * schemaName/relName are relation's SQL name (used for error messages only).
79 : */
80 : void
2382 tgl 81 GIC 1561 : copyFile(const char *src, const char *dst,
2382 tgl 82 ECB : const char *schemaName, const char *relName)
83 : {
84 : #ifndef WIN32
85 : int src_fd;
86 : int dest_fd;
87 : char *buffer;
88 :
2382 tgl 89 GIC 1561 : if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0)
271 tgl 90 UNC 0 : pg_fatal("error while copying relation \"%s.%s\": could not open file \"%s\": %s",
2382 tgl 91 UBC 0 : schemaName, relName, src, strerror(errno));
4715 bruce 92 EUB :
2382 tgl 93 GIC 1561 : if ((dest_fd = open(dst, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
1828 sfrost 94 ECB : pg_file_create_mode)) < 0)
271 tgl 95 UNC 0 : pg_fatal("error while copying relation \"%s.%s\": could not create file \"%s\": %s",
2382 tgl 96 UBC 0 : schemaName, relName, dst, strerror(errno));
3429 peter_e 97 EUB :
98 : /* copy in fairly large chunks for best efficiency */
99 : #define COPY_BUF_SIZE (50 * BLCKSZ)
100 :
3788 bruce 101 GIC 1561 : buffer = (char *) pg_malloc(COPY_BUF_SIZE);
4715 bruce 102 ECB :
103 : /* perform data copying i.e read src source, write to destination */
104 : while (true)
4715 bruce 105 GIC 1293 : {
4715 bruce 106 CBC 2854 : ssize_t nbytes = read(src_fd, buffer, COPY_BUF_SIZE);
4715 bruce 107 ECB :
4715 bruce 108 GIC 2854 : if (nbytes < 0)
271 tgl 109 UNC 0 : pg_fatal("error while copying relation \"%s.%s\": could not read file \"%s\": %s",
2382 tgl 110 UBC 0 : schemaName, relName, src, strerror(errno));
4715 bruce 111 EUB :
4715 bruce 112 GIC 2854 : if (nbytes == 0)
4715 bruce 113 CBC 1561 : break;
4715 bruce 114 ECB :
4715 bruce 115 GIC 1293 : errno = 0;
4715 bruce 116 CBC 1293 : if (write(dest_fd, buffer, nbytes) != nbytes)
4715 bruce 117 ECB : {
118 : /* if write didn't set errno, assume problem is no disk space */
3429 peter_e 119 UIC 0 : if (errno == 0)
3429 peter_e 120 UBC 0 : errno = ENOSPC;
271 tgl 121 UNC 0 : pg_fatal("error while copying relation \"%s.%s\": could not write file \"%s\": %s",
2382 tgl 122 UBC 0 : schemaName, relName, dst, strerror(errno));
4715 bruce 123 EUB : }
124 : }
125 :
3788 bruce 126 GIC 1561 : pg_free(buffer);
2382 tgl 127 CBC 1561 : close(src_fd);
128 1561 : close(dest_fd);
4715 bruce 129 ECB :
130 : #else /* WIN32 */
131 :
132 : if (CopyFile(src, dst, true) == 0)
133 : {
134 : _dosmaperr(GetLastError());
135 : pg_fatal("error while copying relation \"%s.%s\" (\"%s\" to \"%s\"): %s",
136 : schemaName, relName, src, dst, strerror(errno));
137 : }
138 :
139 : #endif /* WIN32 */
2382 tgl 140 GIC 1561 : }
4715 bruce 141 ECB :
142 :
143 : /*
144 : * linkFile()
145 : *
146 : * Hard-links a relation file from src to dst.
147 : * schemaName/relName are relation's SQL name (used for error messages only).
148 : */
149 : void
2382 tgl 150 UIC 0 : linkFile(const char *src, const char *dst,
2382 tgl 151 EUB : const char *schemaName, const char *relName)
152 : {
1131 peter 153 UIC 0 : if (link(src, dst) < 0)
271 tgl 154 UNC 0 : pg_fatal("error while creating link for relation \"%s.%s\" (\"%s\" to \"%s\"): %s",
2382 tgl 155 UBC 0 : schemaName, relName, src, dst, strerror(errno));
4715 bruce 156 0 : }
4715 bruce 157 EUB :
158 :
159 : /*
160 : * rewriteVisibilityMap()
161 : *
162 : * Transform a visibility map file, copying from src to dst.
163 : * schemaName/relName are relation's SQL name (used for error messages only).
164 : *
165 : * In versions of PostgreSQL prior to catversion 201603011, PostgreSQL's
166 : * visibility map included one bit per heap page; it now includes two.
167 : * When upgrading a cluster from before that time to a current PostgreSQL
168 : * version, we could refuse to copy visibility maps from the old cluster
169 : * to the new cluster; the next VACUUM would recreate them, but at the
170 : * price of scanning the entire table. So, instead, we rewrite the old
171 : * visibility maps in the new format. That way, the all-visible bits
172 : * remain set for the pages for which they were set previously. The
173 : * all-frozen bits are never set by this conversion; we leave that to VACUUM.
174 : */
175 : void
2382 tgl 176 UIC 0 : rewriteVisibilityMap(const char *fromfile, const char *tofile,
2382 tgl 177 EUB : const char *schemaName, const char *relName)
178 : {
179 : int src_fd;
180 : int dst_fd;
181 : PGIOAlignedBlock buffer;
182 : PGIOAlignedBlock new_vmbuf;
2498 rhaas 183 UIC 0 : ssize_t totalBytesRead = 0;
2585 rhaas 184 EUB : ssize_t src_filesize;
185 : int rewriteVmBytesPerPage;
2585 rhaas 186 UIC 0 : BlockNumber new_blkno = 0;
2585 rhaas 187 EUB : struct stat statbuf;
188 :
189 : /* Compute number of old-format bytes per new page */
2585 rhaas 190 UIC 0 : rewriteVmBytesPerPage = (BLCKSZ - SizeOfPageHeaderData) / 2;
2585 rhaas 191 EUB :
2382 tgl 192 UIC 0 : if ((src_fd = open(fromfile, O_RDONLY | PG_BINARY, 0)) < 0)
271 tgl 193 UNC 0 : pg_fatal("error while copying relation \"%s.%s\": could not open file \"%s\": %s",
2382 tgl 194 UBC 0 : schemaName, relName, fromfile, strerror(errno));
2585 rhaas 195 EUB :
2585 rhaas 196 UIC 0 : if (fstat(src_fd, &statbuf) != 0)
271 tgl 197 UNC 0 : pg_fatal("error while copying relation \"%s.%s\": could not stat file \"%s\": %s",
2382 tgl 198 UBC 0 : schemaName, relName, fromfile, strerror(errno));
2585 rhaas 199 EUB :
2382 tgl 200 UIC 0 : if ((dst_fd = open(tofile, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
1828 sfrost 201 EUB : pg_file_create_mode)) < 0)
271 tgl 202 UNC 0 : pg_fatal("error while copying relation \"%s.%s\": could not create file \"%s\": %s",
2382 tgl 203 UBC 0 : schemaName, relName, tofile, strerror(errno));
2585 rhaas 204 EUB :
205 : /* Save old file size */
2585 rhaas 206 UIC 0 : src_filesize = statbuf.st_size;
2585 rhaas 207 EUB :
208 : /*
209 : * Turn each visibility map page into 2 pages one by one. Each new page
210 : * has the same page header as the old one. If the last section of the
211 : * last page is empty, we skip it, mostly to avoid turning one-page
212 : * visibility maps for small relations into two pages needlessly.
213 : */
2498 rhaas 214 UIC 0 : while (totalBytesRead < src_filesize)
2585 rhaas 215 EUB : {
216 : ssize_t bytesRead;
217 : char *old_cur;
218 : char *old_break;
219 : char *old_blkend;
220 : PageHeaderData pageheader;
221 : bool old_lastblk;
222 :
1681 tgl 223 UIC 0 : if ((bytesRead = read(src_fd, buffer.data, BLCKSZ)) != BLCKSZ)
2498 rhaas 224 EUB : {
2382 tgl 225 UIC 0 : if (bytesRead < 0)
271 tgl 226 UNC 0 : pg_fatal("error while copying relation \"%s.%s\": could not read file \"%s\": %s",
2382 tgl 227 UBC 0 : schemaName, relName, fromfile, strerror(errno));
2382 tgl 228 EUB : else
271 tgl 229 UNC 0 : pg_fatal("error while copying relation \"%s.%s\": partial page found in file \"%s\"",
2382 tgl 230 EUB : schemaName, relName, fromfile);
231 : }
232 :
2498 rhaas 233 UIC 0 : totalBytesRead += BLCKSZ;
2498 rhaas 234 UBC 0 : old_lastblk = (totalBytesRead == src_filesize);
2585 rhaas 235 EUB :
236 : /* Save the page header data */
1681 tgl 237 UIC 0 : memcpy(&pageheader, buffer.data, SizeOfPageHeaderData);
2585 rhaas 238 EUB :
239 : /*
240 : * These old_* variables point to old visibility map page. old_cur
241 : * points to current position on old page. old_blkend points to end of
242 : * old block. old_break is the end+1 position on the old page for the
243 : * data that will be transferred to the current new page.
244 : */
1681 tgl 245 UIC 0 : old_cur = buffer.data + SizeOfPageHeaderData;
1681 tgl 246 UBC 0 : old_blkend = buffer.data + bytesRead;
2585 rhaas 247 0 : old_break = old_cur + rewriteVmBytesPerPage;
2585 rhaas 248 EUB :
2382 tgl 249 UIC 0 : while (old_break <= old_blkend)
2585 rhaas 250 EUB : {
251 : char *new_cur;
2585 rhaas 252 UIC 0 : bool empty = true;
2585 rhaas 253 EUB : bool old_lastpart;
254 :
255 : /* First, copy old page header to new page */
1681 tgl 256 UIC 0 : memcpy(new_vmbuf.data, &pageheader, SizeOfPageHeaderData);
2585 rhaas 257 EUB :
258 : /* Rewriting the last part of the last old page? */
2382 tgl 259 UIC 0 : old_lastpart = old_lastblk && (old_break == old_blkend);
2585 rhaas 260 EUB :
1681 tgl 261 UIC 0 : new_cur = new_vmbuf.data + SizeOfPageHeaderData;
2585 rhaas 262 EUB :
263 : /* Process old page bytes one by one, and turn it into new page. */
2382 tgl 264 UIC 0 : while (old_cur < old_break)
2585 rhaas 265 EUB : {
2382 tgl 266 UIC 0 : uint8 byte = *(uint8 *) old_cur;
2585 rhaas 267 UBC 0 : uint16 new_vmbits = 0;
2585 rhaas 268 EUB : int i;
269 :
270 : /* Generate new format bits while keeping old information */
2585 rhaas 271 UIC 0 : for (i = 0; i < BITS_PER_BYTE; i++)
2585 rhaas 272 EUB : {
2382 tgl 273 UIC 0 : if (byte & (1 << i))
2585 rhaas 274 EUB : {
2585 rhaas 275 UIC 0 : empty = false;
2382 tgl 276 UBC 0 : new_vmbits |=
277 0 : VISIBILITYMAP_ALL_VISIBLE << (BITS_PER_HEAPBLOCK * i);
2585 rhaas 278 EUB : }
279 : }
280 :
281 : /* Copy new visibility map bytes to new-format page */
2382 tgl 282 UIC 0 : new_cur[0] = (char) (new_vmbits & 0xFF);
2382 tgl 283 UBC 0 : new_cur[1] = (char) (new_vmbits >> 8);
2585 rhaas 284 EUB :
2382 tgl 285 UIC 0 : old_cur++;
2585 rhaas 286 UBC 0 : new_cur += BITS_PER_HEAPBLOCK;
2585 rhaas 287 EUB : }
288 :
289 : /* If the last part of the last page is empty, skip writing it */
2585 rhaas 290 UIC 0 : if (old_lastpart && empty)
2585 rhaas 291 UBC 0 : break;
2585 rhaas 292 EUB :
293 : /* Set new checksum for visibility map page, if enabled */
2382 tgl 294 UIC 0 : if (new_cluster.controldata.data_checksum_version != 0)
1681 tgl 295 UBC 0 : ((PageHeader) new_vmbuf.data)->pd_checksum =
296 0 : pg_checksum_page(new_vmbuf.data, new_blkno);
2585 rhaas 297 EUB :
2382 tgl 298 UIC 0 : errno = 0;
1681 tgl 299 UBC 0 : if (write(dst_fd, new_vmbuf.data, BLCKSZ) != BLCKSZ)
2585 rhaas 300 EUB : {
301 : /* if write didn't set errno, assume problem is no disk space */
2382 tgl 302 UIC 0 : if (errno == 0)
2382 tgl 303 UBC 0 : errno = ENOSPC;
271 tgl 304 UNC 0 : pg_fatal("error while copying relation \"%s.%s\": could not write file \"%s\": %s",
2382 tgl 305 UBC 0 : schemaName, relName, tofile, strerror(errno));
2585 rhaas 306 EUB : }
307 :
308 : /* Advance for next new page */
2585 rhaas 309 UIC 0 : old_break += rewriteVmBytesPerPage;
2585 rhaas 310 UBC 0 : new_blkno++;
2585 rhaas 311 EUB : }
312 : }
313 :
314 : /* Clean up */
2585 rhaas 315 UIC 0 : close(dst_fd);
2585 rhaas 316 UBC 0 : close(src_fd);
317 0 : }
2585 rhaas 318 EUB :
319 : void
1614 peter_e 320 UIC 0 : check_file_clone(void)
1614 peter_e 321 EUB : {
322 : char existing_file[MAXPGPATH];
323 : char new_link_file[MAXPGPATH];
324 :
1614 peter_e 325 UIC 0 : snprintf(existing_file, sizeof(existing_file), "%s/PG_VERSION", old_cluster.pgdata);
1614 peter_e 326 UBC 0 : snprintf(new_link_file, sizeof(new_link_file), "%s/PG_VERSION.clonetest", new_cluster.pgdata);
327 0 : unlink(new_link_file); /* might fail */
1614 peter_e 328 EUB :
329 : #if defined(HAVE_COPYFILE) && defined(COPYFILE_CLONE_FORCE)
330 : if (copyfile(existing_file, new_link_file, NULL, COPYFILE_CLONE_FORCE) < 0)
331 : pg_fatal("could not clone file between old and new data directories: %s",
332 : strerror(errno));
333 : #elif defined(__linux__) && defined(FICLONE)
334 : {
335 : int src_fd;
336 : int dest_fd;
337 :
1614 peter_e 338 UIC 0 : if ((src_fd = open(existing_file, O_RDONLY | PG_BINARY, 0)) < 0)
271 tgl 339 UNC 0 : pg_fatal("could not open file \"%s\": %s",
1614 peter_e 340 UBC 0 : existing_file, strerror(errno));
1614 peter_e 341 EUB :
1614 peter_e 342 UIC 0 : if ((dest_fd = open(new_link_file, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
1614 peter_e 343 EUB : pg_file_create_mode)) < 0)
271 tgl 344 UNC 0 : pg_fatal("could not create file \"%s\": %s",
1614 peter_e 345 UBC 0 : new_link_file, strerror(errno));
1614 peter_e 346 EUB :
1614 peter_e 347 UIC 0 : if (ioctl(dest_fd, FICLONE, src_fd) < 0)
271 tgl 348 UNC 0 : pg_fatal("could not clone file between old and new data directories: %s",
1614 peter_e 349 UBC 0 : strerror(errno));
1614 peter_e 350 EUB :
1614 peter_e 351 UIC 0 : close(src_fd);
1614 peter_e 352 UBC 0 : close(dest_fd);
1614 peter_e 353 EUB : }
354 : #else
355 : pg_fatal("file cloning not supported on this platform");
356 : #endif
357 :
1614 peter_e 358 UIC 0 : unlink(new_link_file);
1614 peter_e 359 UBC 0 : }
1614 peter_e 360 EUB :
361 : void
4555 bruce 362 UIC 0 : check_hard_link(void)
4715 bruce 363 EUB : {
364 : char existing_file[MAXPGPATH];
365 : char new_link_file[MAXPGPATH];
366 :
4555 bruce 367 UIC 0 : snprintf(existing_file, sizeof(existing_file), "%s/PG_VERSION", old_cluster.pgdata);
4555 bruce 368 UBC 0 : snprintf(new_link_file, sizeof(new_link_file), "%s/PG_VERSION.linktest", new_cluster.pgdata);
4715 369 0 : unlink(new_link_file); /* might fail */
4715 bruce 370 EUB :
1131 peter 371 UIC 0 : if (link(existing_file, new_link_file) < 0)
2382 tgl 372 UBC 0 : pg_fatal("could not create hard link between old and new data directories: %s\n"
373 : "In link mode the old and new data directories must be on the same file system.",
2382 tgl 374 UIC 0 : strerror(errno));
2382 tgl 375 EUB :
4715 bruce 376 UIC 0 : unlink(new_link_file);
4715 bruce 377 UBC 0 : }
|