Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * tzparser.c
4 : * Functions for parsing timezone offset files
5 : *
6 : * Note: this code is invoked from the check_hook for the GUC variable
7 : * timezone_abbreviations. Therefore, it should report problems using
8 : * GUC_check_errmsg() and related functions, and try to avoid throwing
9 : * elog(ERROR). This is not completely bulletproof at present --- in
10 : * particular out-of-memory will throw an error. Could probably fix with
11 : * PG_TRY if necessary.
12 : *
13 : *
14 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
15 : * Portions Copyright (c) 1994, Regents of the University of California
16 : *
17 : * IDENTIFICATION
18 : * src/backend/utils/misc/tzparser.c
19 : *
20 : *-------------------------------------------------------------------------
21 : */
22 :
23 : #include "postgres.h"
24 :
25 : #include <ctype.h>
26 :
27 : #include "miscadmin.h"
28 : #include "storage/fd.h"
29 : #include "utils/guc.h"
30 : #include "utils/memutils.h"
31 : #include "utils/tzparser.h"
32 :
33 :
34 : #define WHITESPACE " \t\n\r"
35 :
36 : static bool validateTzEntry(tzEntry *tzentry);
37 : static bool splitTzLine(const char *filename, int lineno,
38 : char *line, tzEntry *tzentry);
39 : static int addToArray(tzEntry **base, int *arraysize, int n,
40 : tzEntry *entry, bool override);
41 : static int ParseTzFile(const char *filename, int depth,
42 : tzEntry **base, int *arraysize, int n);
43 :
44 :
45 : /*
46 : * Apply additional validation checks to a tzEntry
47 : *
48 : * Returns true if OK, else false
49 : */
50 : static bool
6102 tgl 51 CBC 1289113 : validateTzEntry(tzEntry *tzentry)
52 : {
53 : unsigned char *p;
54 :
55 : /*
56 : * Check restrictions imposed by datetktbl storage format (see datetime.c)
57 : */
58 1289113 : if (strlen(tzentry->abbrev) > TOKMAXLEN)
59 : {
4385 tgl 60 UBC 0 : GUC_check_errmsg("time zone abbreviation \"%s\" is too long (maximum %d characters) in time zone file \"%s\", line %d",
61 : tzentry->abbrev, TOKMAXLEN,
62 : tzentry->filename, tzentry->lineno);
6102 63 0 : return false;
64 : }
65 :
66 : /*
67 : * Sanity-check the offset: shouldn't exceed 14 hours
68 : */
6031 bruce 69 CBC 1289113 : if (tzentry->offset > 14 * 60 * 60 ||
70 1289113 : tzentry->offset < -14 * 60 * 60)
71 : {
4385 tgl 72 UBC 0 : GUC_check_errmsg("time zone offset %d is out of range in time zone file \"%s\", line %d",
73 : tzentry->offset,
74 : tzentry->filename, tzentry->lineno);
6102 75 0 : return false;
76 : }
77 :
78 : /*
79 : * Convert abbrev to lowercase (must match datetime.c's conversion)
80 : */
6102 tgl 81 CBC 5978583 : for (p = (unsigned char *) tzentry->abbrev; *p; p++)
82 4689470 : *p = pg_tolower(*p);
83 :
84 1289113 : return true;
85 : }
86 :
87 : /*
88 : * Attempt to parse the line as a timezone abbrev spec
89 : *
90 : * Valid formats are:
91 : * name zone
92 : * name offset dst
93 : *
94 : * Returns true if OK, else false; data is stored in *tzentry
95 : */
96 : static bool
97 1289113 : splitTzLine(const char *filename, int lineno, char *line, tzEntry *tzentry)
98 : {
99 : char *abbrev;
100 : char *offset;
101 : char *offset_endptr;
102 : char *remain;
103 : char *is_dst;
104 :
105 1289113 : tzentry->lineno = lineno;
106 1289113 : tzentry->filename = filename;
107 :
108 1289113 : abbrev = strtok(line, WHITESPACE);
109 1289113 : if (!abbrev)
110 : {
4385 tgl 111 UBC 0 : GUC_check_errmsg("missing time zone abbreviation in time zone file \"%s\", line %d",
112 : filename, lineno);
6102 113 0 : return false;
114 : }
3097 tgl 115 CBC 1289113 : tzentry->abbrev = pstrdup(abbrev);
116 :
6102 117 1289113 : offset = strtok(NULL, WHITESPACE);
118 1289113 : if (!offset)
119 : {
4385 tgl 120 UBC 0 : GUC_check_errmsg("missing time zone offset in time zone file \"%s\", line %d",
121 : filename, lineno);
6102 122 0 : return false;
123 : }
124 :
125 : /* We assume zone names don't begin with a digit or sign */
3097 tgl 126 CBC 1289113 : if (isdigit((unsigned char) *offset) || *offset == '+' || *offset == '-')
127 : {
128 953686 : tzentry->zone = NULL;
129 953686 : tzentry->offset = strtol(offset, &offset_endptr, 10);
130 953686 : if (offset_endptr == offset || *offset_endptr != '\0')
131 : {
3097 tgl 132 UBC 0 : GUC_check_errmsg("invalid number for time zone offset in time zone file \"%s\", line %d",
133 : filename, lineno);
134 0 : return false;
135 : }
136 :
3097 tgl 137 CBC 953686 : is_dst = strtok(NULL, WHITESPACE);
138 953686 : if (is_dst && pg_strcasecmp(is_dst, "D") == 0)
139 : {
140 315696 : tzentry->is_dst = true;
141 315696 : remain = strtok(NULL, WHITESPACE);
142 : }
143 : else
144 : {
145 : /* there was no 'D' dst specifier */
146 637990 : tzentry->is_dst = false;
147 637990 : remain = is_dst;
148 : }
149 : }
150 : else
151 : {
152 : /*
153 : * Assume entry is a zone name. We do not try to validate it by
154 : * looking up the zone, because that would force loading of a lot of
155 : * zones that probably will never be used in the current session.
156 : */
157 335427 : tzentry->zone = pstrdup(offset);
158 335427 : tzentry->offset = 0;
6102 159 335427 : tzentry->is_dst = false;
3097 160 335427 : remain = strtok(NULL, WHITESPACE);
161 : }
162 :
6031 bruce 163 1289113 : if (!remain) /* no more non-whitespace chars */
6102 tgl 164 UBC 0 : return true;
165 :
6102 tgl 166 CBC 1289113 : if (remain[0] != '#') /* must be a comment */
167 : {
4385 tgl 168 UBC 0 : GUC_check_errmsg("invalid syntax in time zone file \"%s\", line %d",
169 : filename, lineno);
6102 170 0 : return false;
171 : }
6102 tgl 172 CBC 1289113 : return true;
173 : }
174 :
175 : /*
176 : * Insert entry into sorted array
177 : *
178 : * *base: base address of array (changeable if must enlarge array)
179 : * *arraysize: allocated length of array (changeable if must enlarge array)
180 : * n: current number of valid elements in array
181 : * entry: new data to insert
182 : * override: true if OK to override
183 : *
184 : * Returns the new array length (new value for n), or -1 if error
185 : */
186 : static int
187 1289113 : addToArray(tzEntry **base, int *arraysize, int n,
188 : tzEntry *entry, bool override)
189 : {
190 : tzEntry *arrayptr;
191 : int low;
192 : int high;
193 :
194 : /*
195 : * Search the array for a duplicate; as a useful side effect, the array is
196 : * maintained in sorted order. We use strcmp() to ensure we match the
197 : * sort order datetime.c expects.
198 : */
199 1289113 : arrayptr = *base;
200 1289113 : low = 0;
6031 bruce 201 1289113 : high = n - 1;
6102 tgl 202 9431577 : while (low <= high)
203 : {
6031 bruce 204 8142479 : int mid = (low + high) >> 1;
205 8142479 : tzEntry *midptr = arrayptr + mid;
206 : int cmp;
207 :
6102 tgl 208 8142479 : cmp = strcmp(entry->abbrev, midptr->abbrev);
209 8142479 : if (cmp < 0)
210 3249104 : high = mid - 1;
211 4893375 : else if (cmp > 0)
212 4893360 : low = mid + 1;
213 : else
214 : {
215 : /*
216 : * Found a duplicate entry; complain unless it's the same.
217 : */
3097 218 15 : if ((midptr->zone == NULL && entry->zone == NULL &&
219 12 : midptr->offset == entry->offset &&
3097 tgl 220 UBC 0 : midptr->is_dst == entry->is_dst) ||
3097 tgl 221 CBC 15 : (midptr->zone != NULL && entry->zone != NULL &&
3097 tgl 222 UBC 0 : strcmp(midptr->zone, entry->zone) == 0))
223 : {
224 : /* return unchanged array */
6102 225 0 : return n;
226 : }
6102 tgl 227 CBC 15 : if (override)
228 : {
229 : /* same abbrev but something is different, override */
3097 230 15 : midptr->zone = entry->zone;
6102 231 15 : midptr->offset = entry->offset;
232 15 : midptr->is_dst = entry->is_dst;
233 15 : return n;
234 : }
235 : /* same abbrev but something is different, complain */
4385 tgl 236 UBC 0 : GUC_check_errmsg("time zone abbreviation \"%s\" is multiply defined",
237 : entry->abbrev);
238 0 : GUC_check_errdetail("Entry in time zone file \"%s\", line %d, conflicts with entry in file \"%s\", line %d.",
239 : midptr->filename, midptr->lineno,
240 : entry->filename, entry->lineno);
6102 241 0 : return -1;
242 : }
243 : }
244 :
245 : /*
246 : * No match, insert at position "low".
247 : */
6102 tgl 248 CBC 1289098 : if (n >= *arraysize)
249 : {
250 6577 : *arraysize *= 2;
251 6577 : *base = (tzEntry *) repalloc(*base, *arraysize * sizeof(tzEntry));
252 : }
253 :
254 1289098 : arrayptr = *base + low;
255 :
256 1289098 : memmove(arrayptr + 1, arrayptr, (n - low) * sizeof(tzEntry));
257 :
258 1289098 : memcpy(arrayptr, entry, sizeof(tzEntry));
259 :
6031 bruce 260 1289098 : return n + 1;
261 : }
262 :
263 : /*
264 : * Parse a single timezone abbrev file --- can recurse to handle @INCLUDE
265 : *
266 : * filename: user-specified file name (does not include path)
267 : * depth: current recursion depth
268 : * *base: array for results (changeable if must enlarge array)
269 : * *arraysize: allocated length of array (changeable if must enlarge array)
270 : * n: current number of valid elements in array
271 : *
272 : * Returns the new array length (new value for n), or -1 if error
273 : */
274 : static int
6102 tgl 275 6583 : ParseTzFile(const char *filename, int depth,
276 : tzEntry **base, int *arraysize, int n)
277 : {
278 : char share_path[MAXPGPATH];
279 : char file_path[MAXPGPATH];
280 : FILE *tzFile;
281 : char tzbuf[1024];
282 : char *line;
283 : tzEntry tzentry;
6031 bruce 284 6583 : int lineno = 0;
285 6583 : bool override = false;
286 : const char *p;
287 :
288 : /*
289 : * We enforce that the filename is all alpha characters. This may be
290 : * overly restrictive, but we don't want to allow access to anything
291 : * outside the timezonesets directory, so for instance '/' *must* be
292 : * rejected.
293 : */
6102 tgl 294 52664 : for (p = filename; *p; p++)
295 : {
296 46081 : if (!isalpha((unsigned char) *p))
297 : {
298 : /* at level 0, just use guc.c's regular "invalid value" message */
6102 tgl 299 UBC 0 : if (depth > 0)
4385 300 0 : GUC_check_errmsg("invalid time zone file name \"%s\"",
301 : filename);
6102 302 0 : return -1;
303 : }
304 : }
305 :
306 : /*
307 : * The maximal recursion depth is a pretty arbitrary setting. It is hard
308 : * to imagine that someone needs more than 3 levels so stick with this
309 : * conservative setting until someone complains.
310 : */
6102 tgl 311 CBC 6583 : if (depth > 3)
312 : {
4385 tgl 313 UBC 0 : GUC_check_errmsg("time zone file recursion limit exceeded in file \"%s\"",
314 : filename);
6102 315 0 : return -1;
316 : }
317 :
6102 tgl 318 CBC 6583 : get_share_path(my_exec_path, share_path);
319 6583 : snprintf(file_path, sizeof(file_path), "%s/timezonesets/%s",
320 : share_path, filename);
321 6583 : tzFile = AllocateFile(file_path, "r");
322 6583 : if (!tzFile)
323 : {
324 : /*
325 : * Check to see if the problem is not the filename but the directory.
326 : * This is worth troubling over because if the installation share/
327 : * directory is missing or unreadable, this is likely to be the first
328 : * place we notice a problem during postmaster startup.
329 : */
5090 tgl 330 UBC 0 : int save_errno = errno;
331 : DIR *tzdir;
332 :
333 0 : snprintf(file_path, sizeof(file_path), "%s/timezonesets",
334 : share_path);
335 0 : tzdir = AllocateDir(file_path);
336 0 : if (tzdir == NULL)
337 : {
4385 338 0 : GUC_check_errmsg("could not open directory \"%s\": %m",
339 : file_path);
340 0 : GUC_check_errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been moved away from its proper location.",
341 : my_exec_path);
5090 342 0 : return -1;
343 : }
344 0 : FreeDir(tzdir);
345 0 : errno = save_errno;
346 :
347 : /*
348 : * otherwise, if file doesn't exist and it's level 0, guc.c's
349 : * complaint is enough
350 : */
6102 351 0 : if (errno != ENOENT || depth > 0)
4385 352 0 : GUC_check_errmsg("could not read time zone file \"%s\": %m",
353 : filename);
354 :
6102 355 0 : return -1;
356 : }
357 :
6102 tgl 358 CBC 4163385 : while (!feof(tzFile))
359 : {
360 4163385 : lineno++;
361 4163385 : if (fgets(tzbuf, sizeof(tzbuf), tzFile) == NULL)
362 : {
363 6583 : if (ferror(tzFile))
364 : {
4385 tgl 365 UBC 0 : GUC_check_errmsg("could not read time zone file \"%s\": %m",
366 : filename);
313 367 0 : n = -1;
368 0 : break;
369 : }
370 : /* else we're at EOF after all */
6102 tgl 371 CBC 6583 : break;
372 : }
6031 bruce 373 4156802 : if (strlen(tzbuf) == sizeof(tzbuf) - 1)
374 : {
375 : /* the line is too long for tzbuf */
4385 tgl 376 UBC 0 : GUC_check_errmsg("line is too long in time zone file \"%s\", line %d",
377 : filename, lineno);
313 378 0 : n = -1;
379 0 : break;
380 : }
381 :
382 : /* skip over whitespace */
6102 tgl 383 CBC 4156802 : line = tzbuf;
384 44105584 : while (*line && isspace((unsigned char) *line))
385 39948782 : line++;
386 :
6031 bruce 387 4156802 : if (*line == '\0') /* empty line */
6102 tgl 388 144727 : continue;
6031 bruce 389 4012075 : if (*line == '#') /* comment line */
6102 tgl 390 2722950 : continue;
391 :
392 1289125 : if (pg_strncasecmp(line, "@INCLUDE", strlen("@INCLUDE")) == 0)
393 6 : {
394 : /* pstrdup so we can use filename in result data structure */
6031 bruce 395 6 : char *includeFile = pstrdup(line + strlen("@INCLUDE"));
396 :
6102 tgl 397 6 : includeFile = strtok(includeFile, WHITESPACE);
398 6 : if (!includeFile || !*includeFile)
399 : {
4385 tgl 400 UBC 0 : GUC_check_errmsg("@INCLUDE without file name in time zone file \"%s\", line %d",
401 : filename, lineno);
313 402 0 : n = -1;
403 0 : break;
404 : }
6102 tgl 405 CBC 6 : n = ParseTzFile(includeFile, depth + 1,
406 : base, arraysize, n);
407 6 : if (n < 0)
313 tgl 408 UBC 0 : break;
6102 tgl 409 CBC 6 : continue;
410 : }
411 :
412 1289119 : if (pg_strncasecmp(line, "@OVERRIDE", strlen("@OVERRIDE")) == 0)
413 : {
414 6 : override = true;
415 6 : continue;
416 : }
417 :
418 1289113 : if (!splitTzLine(filename, lineno, line, &tzentry))
419 : {
313 tgl 420 UBC 0 : n = -1;
421 0 : break;
422 : }
6102 tgl 423 CBC 1289113 : if (!validateTzEntry(&tzentry))
424 : {
313 tgl 425 UBC 0 : n = -1;
426 0 : break;
427 : }
6102 tgl 428 CBC 1289113 : n = addToArray(base, arraysize, n, &tzentry, override);
429 1289113 : if (n < 0)
313 tgl 430 UBC 0 : break;
431 : }
432 :
6102 tgl 433 CBC 6583 : FreeFile(tzFile);
434 :
435 6583 : return n;
436 : }
437 :
438 : /*
439 : * load_tzoffsets --- read and parse the specified timezone offset file
440 : *
441 : * On success, return a filled-in TimeZoneAbbrevTable, which must have been
442 : * guc_malloc'd not palloc'd. On failure, return NULL, using GUC_check_errmsg
443 : * and friends to give details of the problem.
444 : */
445 : TimeZoneAbbrevTable *
4385 446 6577 : load_tzoffsets(const char *filename)
447 : {
448 6577 : TimeZoneAbbrevTable *result = NULL;
449 : MemoryContext tmpContext;
450 : MemoryContext oldContext;
451 : tzEntry *array;
452 : int arraysize;
453 : int n;
454 :
455 : /*
456 : * Create a temp memory context to work in. This makes it easy to clean
457 : * up afterwards.
458 : */
6102 459 6577 : tmpContext = AllocSetContextCreate(CurrentMemoryContext,
460 : "TZParserMemory",
461 : ALLOCSET_SMALL_SIZES);
462 6577 : oldContext = MemoryContextSwitchTo(tmpContext);
463 :
464 : /* Initialize array at a reasonable size */
465 6577 : arraysize = 128;
466 6577 : array = (tzEntry *) palloc(arraysize * sizeof(tzEntry));
467 :
468 : /* Parse the file(s) */
469 6577 : n = ParseTzFile(filename, 0, &array, &arraysize, 0);
470 :
471 : /* If no errors so far, let datetime.c allocate memory & convert format */
4385 472 6577 : if (n >= 0)
473 : {
3097 474 6577 : result = ConvertTimeZoneAbbrevs(array, n);
4385 475 6577 : if (!result)
4385 tgl 476 UBC 0 : GUC_check_errmsg("out of memory");
477 : }
478 :
479 : /* Clean up */
6102 tgl 480 CBC 6577 : MemoryContextSwitchTo(oldContext);
481 6577 : MemoryContextDelete(tmpContext);
482 :
4385 483 6577 : return result;
484 : }
|