Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * compress_io.c
4 : * Routines for archivers to write an uncompressed or compressed data
5 : * stream.
6 : *
7 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * This file includes two APIs for dealing with compressed data. The first
11 : * provides more flexibility, using callbacks to read/write data from the
12 : * underlying stream. The second API is a wrapper around fopen and
13 : * friends, providing an interface similar to those, but abstracts away
14 : * the possible compression. The second API is aimed for the resulting
15 : * files to be easily manipulated with an external compression utility
16 : * program.
17 : *
18 : * Compressor API
19 : * --------------
20 : *
21 : * The interface for writing to an archive consists of three functions:
22 : * AllocateCompressor, writeData, and EndCompressor. First you call
23 : * AllocateCompressor, then write all the data by calling writeData as many
24 : * times as needed, and finally EndCompressor. writeData will call the
25 : * WriteFunc that was provided to AllocateCompressor for each chunk of
26 : * compressed data.
27 : *
28 : * The interface for reading an archive consists of the same three functions:
29 : * AllocateCompressor, readData, and EndCompressor. First you call
30 : * AllocateCompressor, then read all the data by calling readData to read the
31 : * whole compressed stream which repeatedly calls the given ReadFunc. ReadFunc
32 : * returns the compressed data one chunk at a time. Then readData decompresses
33 : * it and passes the decompressed data to ahwrite(), until ReadFunc returns 0
34 : * to signal EOF. The interface is the same for compressed and uncompressed
35 : * streams.
36 : *
37 : * Compressed stream API
38 : * ----------------------
39 : *
40 : * The compressed stream API is providing a set of function pointers for
41 : * opening, reading, writing, and finally closing files. The implemented
42 : * function pointers are documented in the corresponding header file and are
43 : * common for all streams. It allows the caller to use the same functions for
44 : * both compressed and uncompressed streams.
45 : *
46 : * The interface consists of three functions, InitCompressFileHandle,
47 : * InitDiscoverCompressFileHandle, and EndCompressFileHandle. If the
48 : * compression is known, then start by calling InitCompressFileHandle,
49 : * otherwise discover it by using InitDiscoverCompressFileHandle. Then call
50 : * the function pointers as required for the read/write operations. Finally
51 : * call EndCompressFileHandle to end the stream.
52 : *
53 : * InitDiscoverCompressFileHandle tries to infer the compression by the
54 : * filename suffix. If the suffix is not yet known then it tries to simply
55 : * open the file and if it fails, it tries to open the same file with
56 : * compressed suffixes (.gz, .lz4 and .zst, in this order).
57 : *
58 : * IDENTIFICATION
59 : * src/bin/pg_dump/compress_io.c
60 : *
61 : *-------------------------------------------------------------------------
62 : */
63 : #include "postgres_fe.h"
64 :
65 : #include <sys/stat.h>
66 : #include <unistd.h>
67 :
68 : #include "compress_gzip.h"
69 : #include "compress_io.h"
70 : #include "compress_lz4.h"
71 : #include "compress_none.h"
72 : #include "compress_zstd.h"
73 : #include "pg_backup_utils.h"
74 :
75 : /*----------------------
76 : * Generic functions
77 : *----------------------
78 : */
79 :
80 : /*
81 : * Checks whether support for a compression algorithm is implemented in
82 : * pg_dump/restore.
83 : *
84 : * On success returns NULL, otherwise returns a malloc'ed string which can be
85 : * used by the caller in an error message.
86 : */
87 : char *
45 tomas.vondra 88 GNC 236 : supports_compression(const pg_compress_specification compression_spec)
89 : {
90 236 : const pg_compress_algorithm algorithm = compression_spec.algorithm;
91 236 : bool supported = false;
92 :
93 236 : if (algorithm == PG_COMPRESSION_NONE)
94 176 : supported = true;
95 : #ifdef HAVE_LIBZ
96 236 : if (algorithm == PG_COMPRESSION_GZIP)
97 42 : supported = true;
98 : #endif
99 : #ifdef USE_LZ4
100 236 : if (algorithm == PG_COMPRESSION_LZ4)
101 9 : supported = true;
102 : #endif
103 : #ifdef USE_ZSTD
4 104 236 : if (algorithm == PG_COMPRESSION_ZSTD)
105 9 : supported = true;
106 : #endif
107 :
45 108 236 : if (!supported)
45 tomas.vondra 109 UNC 0 : return psprintf("this build does not support compression with %s",
110 : get_compress_algorithm_name(algorithm));
111 :
45 tomas.vondra 112 GNC 236 : return NULL;
113 : }
114 :
115 : /*----------------------
116 : * Compressor API
117 : *----------------------
118 : */
119 :
45 tomas.vondra 120 ECB : /*
121 : * Allocate a new compressor.
122 : */
123 : CompressorState *
128 michael 124 GNC 214 : AllocateCompressor(const pg_compress_specification compression_spec,
125 : ReadFunc readF, WriteFunc writeF)
126 : {
127 : CompressorState *cs;
4511 heikki.linnakangas 128 ECB :
3841 tgl 129 CBC 214 : cs = (CompressorState *) pg_malloc0(sizeof(CompressorState));
45 tomas.vondra 130 GNC 214 : cs->readF = readF;
4511 heikki.linnakangas 131 CBC 214 : cs->writeF = writeF;
4511 heikki.linnakangas 132 ECB :
128 michael 133 GNC 214 : if (compression_spec.algorithm == PG_COMPRESSION_NONE)
45 tomas.vondra 134 UNC 0 : InitCompressorNone(cs, compression_spec);
45 tomas.vondra 135 GNC 214 : else if (compression_spec.algorithm == PG_COMPRESSION_GZIP)
136 110 : InitCompressorGzip(cs, compression_spec);
137 104 : else if (compression_spec.algorithm == PG_COMPRESSION_LZ4)
138 52 : InitCompressorLZ4(cs, compression_spec);
4 139 52 : else if (compression_spec.algorithm == PG_COMPRESSION_ZSTD)
140 52 : InitCompressorZstd(cs, compression_spec);
141 :
45 tomas.vondra 142 CBC 214 : return cs;
143 : }
4511 heikki.linnakangas 144 ECB :
145 : /*
146 : * Terminate compression library context and flush its buffers.
147 : */
148 : void
4511 heikki.linnakangas 149 CBC 214 : EndCompressor(ArchiveHandle *AH, CompressorState *cs)
4511 heikki.linnakangas 150 ECB : {
45 tomas.vondra 151 GNC 214 : cs->end(AH, cs);
152 214 : pg_free(cs);
4511 heikki.linnakangas 153 CBC 214 : }
154 :
155 : /*----------------------
156 : * Compressed stream API
157 : *----------------------
158 : */
159 :
160 : /*
161 : * Private routines
162 : */
163 : static int
45 tomas.vondra 164 GIC 522 : hasSuffix(const char *filename, const char *suffix)
165 : {
166 522 : int filenamelen = strlen(filename);
167 522 : int suffixlen = strlen(suffix);
168 :
169 522 : if (filenamelen < suffixlen)
45 tomas.vondra 170 UIC 0 : return 0;
171 :
45 tomas.vondra 172 GIC 522 : return memcmp(&filename[filenamelen - suffixlen],
173 : suffix,
174 522 : suffixlen) == 0;
175 : }
176 :
177 : /* free() without changing errno; useful in several places below */
178 : static void
2972 tgl 179 GNC 1008 : free_keep_errno(void *p)
180 : {
181 1008 : int save_errno = errno;
182 :
183 1008 : free(p);
184 1008 : errno = save_errno;
185 1008 : }
186 :
187 : /*
188 : * Public interface
189 : */
190 :
191 : /*
192 : * Initialize a compress file handle for the specified compression algorithm.
193 : */
194 : CompressFileHandle *
45 tomas.vondra 195 610 : InitCompressFileHandle(const pg_compress_specification compression_spec)
196 : {
197 : CompressFileHandle *CFH;
198 :
199 610 : CFH = pg_malloc0(sizeof(CompressFileHandle));
200 :
201 610 : if (compression_spec.algorithm == PG_COMPRESSION_NONE)
202 293 : InitCompressFileHandleNone(CFH, compression_spec);
203 317 : else if (compression_spec.algorithm == PG_COMPRESSION_GZIP)
204 209 : InitCompressFileHandleGzip(CFH, compression_spec);
205 108 : else if (compression_spec.algorithm == PG_COMPRESSION_LZ4)
206 54 : InitCompressFileHandleLZ4(CFH, compression_spec);
4 207 54 : else if (compression_spec.algorithm == PG_COMPRESSION_ZSTD)
208 54 : InitCompressFileHandleZstd(CFH, compression_spec);
209 :
45 210 610 : return CFH;
211 : }
212 :
213 : /*
214 : * Checks if a compressed file (with the specified extension) exists.
215 : *
216 : * The filename of the tested file is stored to fname buffer (the existing
217 : * buffer is freed, new buffer is allocated and returned through the pointer).
218 : */
219 : static bool
4 220 240 : check_compressed_file(const char *path, char **fname, char *ext)
221 : {
222 240 : free_keep_errno(*fname);
223 240 : *fname = psprintf("%s.%s", path, ext);
224 240 : return (access(*fname, F_OK) == 0);
225 : }
226 :
227 : /*
228 : * Open a file for reading. 'path' is the file to open, and 'mode' should
229 : * be either "r" or "rb".
230 : *
231 : * If the file at 'path' contains the suffix of a supported compression method,
232 : * currently this includes ".gz", ".lz4" and ".zst", then this compression will be used
233 : * throughout. Otherwise the compression will be inferred by iteratively trying
234 : * to open the file at 'path', first as is, then by appending known compression
235 : * suffixes. So if you pass "foo" as 'path', this will open either "foo" or
236 : * "foo.{gz,lz4,zst}", trying in that order.
237 : *
238 : * On failure, return NULL with an error code in errno.
239 : */
240 : CompressFileHandle *
45 241 174 : InitDiscoverCompressFileHandle(const char *path, const char *mode)
242 : {
243 174 : CompressFileHandle *CFH = NULL;
244 : struct stat st;
245 : char *fname;
246 174 : pg_compress_specification compression_spec = {0};
247 :
248 174 : compression_spec.algorithm = PG_COMPRESSION_NONE;
249 :
250 174 : Assert(strcmp(mode, PG_BINARY_R) == 0);
251 :
252 174 : fname = strdup(path);
253 :
254 174 : if (hasSuffix(fname, ".gz"))
45 tomas.vondra 255 UNC 0 : compression_spec.algorithm = PG_COMPRESSION_GZIP;
4 tomas.vondra 256 GNC 174 : else if (hasSuffix(fname, ".lz4"))
4 tomas.vondra 257 UNC 0 : compression_spec.algorithm = PG_COMPRESSION_LZ4;
4 tomas.vondra 258 GNC 174 : else if (hasSuffix(fname, ".zst"))
4 tomas.vondra 259 UNC 0 : compression_spec.algorithm = PG_COMPRESSION_ZSTD;
260 : else
261 : {
4 tomas.vondra 262 GNC 174 : if (stat(path, &st) == 0)
45 263 15 : compression_spec.algorithm = PG_COMPRESSION_NONE;
4 264 159 : else if (check_compressed_file(path, &fname, "gz"))
265 105 : compression_spec.algorithm = PG_COMPRESSION_GZIP;
266 54 : else if (check_compressed_file(path, &fname, "lz4"))
267 27 : compression_spec.algorithm = PG_COMPRESSION_LZ4;
268 27 : else if (check_compressed_file(path, &fname, "zst"))
269 27 : compression_spec.algorithm = PG_COMPRESSION_ZSTD;
270 : }
271 :
45 272 174 : CFH = InitCompressFileHandle(compression_spec);
17 273 174 : if (!CFH->open_func(fname, -1, mode, CFH))
274 : {
45 tomas.vondra 275 UNC 0 : free_keep_errno(CFH);
276 0 : CFH = NULL;
277 : }
45 tomas.vondra 278 GNC 174 : free_keep_errno(fname);
279 :
280 174 : return CFH;
281 : }
282 :
283 : /*
284 : * Close an open file handle and release its memory.
285 : *
286 : * On failure, returns false and sets errno appropriately.
287 : */
288 : bool
289 594 : EndCompressFileHandle(CompressFileHandle *CFH)
290 : {
17 291 594 : bool ret = false;
292 :
45 293 594 : if (CFH->private_data)
294 594 : ret = CFH->close_func(CFH);
295 :
296 594 : free_keep_errno(CFH);
297 :
298 594 : return ret;
299 : }
|