Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * regc_pg_locale.c
4 : * ctype functions adapted to work on pg_wchar (a/k/a chr),
5 : * and functions to cache the results of wholesale ctype probing.
6 : *
7 : * This file is #included by regcomp.c; it's not meant to compile standalone.
8 : *
9 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
10 : * Portions Copyright (c) 1994, Regents of the University of California
11 : *
12 : * IDENTIFICATION
13 : * src/backend/regex/regc_pg_locale.c
14 : *
15 : *-------------------------------------------------------------------------
16 : */
17 :
18 : #include "catalog/pg_collation.h"
19 : #include "utils/pg_locale.h"
20 :
21 : /*
22 : * To provide as much functionality as possible on a variety of platforms,
23 : * without going so far as to implement everything from scratch, we use
24 : * several implementation strategies depending on the situation:
25 : *
26 : * 1. In C/POSIX collations, we use hard-wired code. We can't depend on
27 : * the <ctype.h> functions since those will obey LC_CTYPE. Note that these
28 : * collations don't give a fig about multibyte characters.
29 : *
30 : * 2. In the "default" collation (which is supposed to obey LC_CTYPE):
31 : *
32 : * 2a. When working in UTF8 encoding, we use the <wctype.h> functions.
33 : * This assumes that every platform uses Unicode codepoints directly
34 : * as the wchar_t representation of Unicode. On some platforms
35 : * wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF.
36 : *
37 : * 2b. In all other encodings, we use the <ctype.h> functions for pg_wchar
38 : * values up to 255, and punt for values above that. This is 100% correct
39 : * only in single-byte encodings such as LATINn. However, non-Unicode
40 : * multibyte encodings are mostly Far Eastern character sets for which the
41 : * properties being tested here aren't very relevant for higher code values
42 : * anyway. The difficulty with using the <wctype.h> functions with
43 : * non-Unicode multibyte encodings is that we can have no certainty that
44 : * the platform's wchar_t representation matches what we do in pg_wchar
45 : * conversions.
46 : *
47 : * 3. Other collations are only supported on platforms that HAVE_LOCALE_T.
48 : * Here, we use the locale_t-extended forms of the <wctype.h> and <ctype.h>
49 : * functions, under exactly the same cases as #2.
50 : *
51 : * There is one notable difference between cases 2 and 3: in the "default"
52 : * collation we force ASCII letters to follow ASCII upcase/downcase rules,
53 : * while in a non-default collation we just let the library functions do what
54 : * they will. The case where this matters is treatment of I/i in Turkish,
55 : * and the behavior is meant to match the upper()/lower() SQL functions.
56 : *
57 : * We store the active collation setting in static variables. In principle
58 : * it could be passed down to here via the regex library's "struct vars" data
59 : * structure; but that would require somewhat invasive changes in the regex
60 : * library, and right now there's no real benefit to be gained from that.
61 : *
62 : * NB: the coding here assumes pg_wchar is an unsigned type.
63 : */
64 :
65 : typedef enum
66 : {
67 : PG_REGEX_LOCALE_C, /* C locale (encoding independent) */
68 : PG_REGEX_LOCALE_WIDE, /* Use <wctype.h> functions */
69 : PG_REGEX_LOCALE_1BYTE, /* Use <ctype.h> functions */
70 : PG_REGEX_LOCALE_WIDE_L, /* Use locale_t <wctype.h> functions */
71 : PG_REGEX_LOCALE_1BYTE_L, /* Use locale_t <ctype.h> functions */
72 : PG_REGEX_LOCALE_ICU /* Use ICU uchar.h functions */
73 : } PG_Locale_Strategy;
74 :
75 : static PG_Locale_Strategy pg_regex_strategy;
76 : static pg_locale_t pg_regex_locale;
77 : static Oid pg_regex_collation;
78 :
79 : /*
80 : * Hard-wired character properties for C locale
81 : */
82 : #define PG_ISDIGIT 0x01
83 : #define PG_ISALPHA 0x02
84 : #define PG_ISALNUM (PG_ISDIGIT | PG_ISALPHA)
85 : #define PG_ISUPPER 0x04
86 : #define PG_ISLOWER 0x08
87 : #define PG_ISGRAPH 0x10
88 : #define PG_ISPRINT 0x20
89 : #define PG_ISPUNCT 0x40
90 : #define PG_ISSPACE 0x80
91 :
92 : static const unsigned char pg_char_properties[128] = {
93 : /* NUL */ 0,
94 : /* ^A */ 0,
95 : /* ^B */ 0,
96 : /* ^C */ 0,
97 : /* ^D */ 0,
98 : /* ^E */ 0,
99 : /* ^F */ 0,
100 : /* ^G */ 0,
101 : /* ^H */ 0,
102 : /* ^I */ PG_ISSPACE,
103 : /* ^J */ PG_ISSPACE,
104 : /* ^K */ PG_ISSPACE,
105 : /* ^L */ PG_ISSPACE,
106 : /* ^M */ PG_ISSPACE,
107 : /* ^N */ 0,
108 : /* ^O */ 0,
109 : /* ^P */ 0,
110 : /* ^Q */ 0,
111 : /* ^R */ 0,
112 : /* ^S */ 0,
113 : /* ^T */ 0,
114 : /* ^U */ 0,
115 : /* ^V */ 0,
116 : /* ^W */ 0,
117 : /* ^X */ 0,
118 : /* ^Y */ 0,
119 : /* ^Z */ 0,
120 : /* ^[ */ 0,
121 : /* ^\ */ 0,
122 : /* ^] */ 0,
123 : /* ^^ */ 0,
124 : /* ^_ */ 0,
125 : /* */ PG_ISPRINT | PG_ISSPACE,
126 : /* ! */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
127 : /* " */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
128 : /* # */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
129 : /* $ */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
130 : /* % */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
131 : /* & */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
132 : /* ' */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
133 : /* ( */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
134 : /* ) */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
135 : /* * */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
136 : /* + */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
137 : /* , */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
138 : /* - */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
139 : /* . */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
140 : /* / */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
141 : /* 0 */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
142 : /* 1 */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
143 : /* 2 */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
144 : /* 3 */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
145 : /* 4 */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
146 : /* 5 */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
147 : /* 6 */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
148 : /* 7 */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
149 : /* 8 */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
150 : /* 9 */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
151 : /* : */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
152 : /* ; */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
153 : /* < */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
154 : /* = */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
155 : /* > */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
156 : /* ? */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
157 : /* @ */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
158 : /* A */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
159 : /* B */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
160 : /* C */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
161 : /* D */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
162 : /* E */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
163 : /* F */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
164 : /* G */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
165 : /* H */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
166 : /* I */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
167 : /* J */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
168 : /* K */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
169 : /* L */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
170 : /* M */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
171 : /* N */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
172 : /* O */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
173 : /* P */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
174 : /* Q */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
175 : /* R */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
176 : /* S */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
177 : /* T */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
178 : /* U */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
179 : /* V */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
180 : /* W */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
181 : /* X */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
182 : /* Y */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
183 : /* Z */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
184 : /* [ */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
185 : /* \ */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
186 : /* ] */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
187 : /* ^ */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
188 : /* _ */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
189 : /* ` */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
190 : /* a */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
191 : /* b */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
192 : /* c */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
193 : /* d */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
194 : /* e */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
195 : /* f */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
196 : /* g */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
197 : /* h */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
198 : /* i */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
199 : /* j */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
200 : /* k */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
201 : /* l */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
202 : /* m */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
203 : /* n */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
204 : /* o */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
205 : /* p */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
206 : /* q */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
207 : /* r */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
208 : /* s */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
209 : /* t */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
210 : /* u */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
211 : /* v */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
212 : /* w */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
213 : /* x */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
214 : /* y */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
215 : /* z */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
216 : /* { */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
217 : /* | */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
218 : /* } */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
219 : /* ~ */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
220 : /* DEL */ 0
221 : };
222 :
223 :
224 : /*
225 : * pg_set_regex_collation: set collation for these functions to obey
226 : *
227 : * This is called when beginning compilation or execution of a regexp.
228 : * Since there's no need for reentrancy of regexp operations, it's okay
229 : * to store the results in static variables.
230 : */
231 : void
4382 tgl 232 CBC 912287 : pg_set_regex_collation(Oid collation)
233 : {
444 peter 234 912287 : if (!OidIsValid(collation))
235 : {
236 : /*
237 : * This typically means that the parser could not resolve a conflict
238 : * of implicit collations, so report it that way.
239 : */
444 peter 240 UBC 0 : ereport(ERROR,
241 : (errcode(ERRCODE_INDETERMINATE_COLLATION),
242 : errmsg("could not determine which collation to use for regular expression"),
243 : errhint("Use the COLLATE clause to set the collation explicitly.")));
244 : }
245 :
4382 tgl 246 CBC 912287 : if (lc_ctype_is_c(collation))
247 : {
248 : /* C/POSIX collations use this path regardless of database encoding */
249 62841 : pg_regex_strategy = PG_REGEX_LOCALE_C;
250 62841 : pg_regex_locale = 0;
4067 251 62841 : pg_regex_collation = C_COLLATION_OID;
252 : }
253 : else
254 : {
255 : /*
256 : * NB: pg_newlocale_from_collation will fail if not HAVE_LOCALE_T; the
257 : * case of pg_regex_locale != 0 but not HAVE_LOCALE_T does not have to
258 : * be considered below.
259 : */
444 peter 260 849446 : pg_regex_locale = pg_newlocale_from_collation(collation);
261 :
45 jdavis 262 GNC 849446 : if (!pg_locale_deterministic(pg_regex_locale))
1479 peter 263 CBC 12 : ereport(ERROR,
264 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
265 : errmsg("nondeterministic collations are not supported for regular expressions")));
266 :
267 : #ifdef USE_ICU
2208 peter_e 268 849434 : if (pg_regex_locale && pg_regex_locale->provider == COLLPROVIDER_ICU)
269 848999 : pg_regex_strategy = PG_REGEX_LOCALE_ICU;
270 : else
271 : #endif
4382 tgl 272 435 : if (GetDatabaseEncoding() == PG_UTF8)
273 : {
274 433 : if (pg_regex_locale)
275 429 : pg_regex_strategy = PG_REGEX_LOCALE_WIDE_L;
276 : else
277 4 : pg_regex_strategy = PG_REGEX_LOCALE_WIDE;
278 : }
279 : else
280 : {
281 2 : if (pg_regex_locale)
4382 tgl 282 UBC 0 : pg_regex_strategy = PG_REGEX_LOCALE_1BYTE_L;
283 : else
4382 tgl 284 CBC 2 : pg_regex_strategy = PG_REGEX_LOCALE_1BYTE;
285 : }
286 :
4067 287 849434 : pg_regex_collation = collation;
288 : }
4382 289 912275 : }
290 :
291 : static int
292 74885 : pg_wc_isdigit(pg_wchar c)
293 : {
294 74885 : switch (pg_regex_strategy)
295 : {
296 943 : case PG_REGEX_LOCALE_C:
297 1886 : return (c <= (pg_wchar) 127 &&
298 943 : (pg_char_properties[c] & PG_ISDIGIT));
4382 tgl 299 UBC 0 : case PG_REGEX_LOCALE_WIDE:
300 : if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
301 0 : return iswdigit((wint_t) c);
302 : /* FALL THRU */
303 : case PG_REGEX_LOCALE_1BYTE:
304 0 : return (c <= (pg_wchar) UCHAR_MAX &&
305 0 : isdigit((unsigned char) c));
4382 tgl 306 CBC 6144 : case PG_REGEX_LOCALE_WIDE_L:
307 : #ifdef HAVE_LOCALE_T
308 : if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
2208 peter_e 309 6144 : return iswdigit_l((wint_t) c, pg_regex_locale->info.lt);
310 : #endif
311 : /* FALL THRU */
312 : case PG_REGEX_LOCALE_1BYTE_L:
313 : #ifdef HAVE_LOCALE_T
4382 tgl 314 UBC 0 : return (c <= (pg_wchar) UCHAR_MAX &&
2208 peter_e 315 0 : isdigit_l((unsigned char) c, pg_regex_locale->info.lt));
316 : #endif
317 : break;
2208 peter_e 318 CBC 67798 : case PG_REGEX_LOCALE_ICU:
319 : #ifdef USE_ICU
320 67798 : return u_isdigit(c);
321 : #endif
322 : break;
323 : }
4382 tgl 324 UBC 0 : return 0; /* can't get here, but keep compiler quiet */
325 : }
326 :
327 : static int
4382 tgl 328 CBC 14475 : pg_wc_isalpha(pg_wchar c)
329 : {
330 14475 : switch (pg_regex_strategy)
331 : {
4382 tgl 332 UBC 0 : case PG_REGEX_LOCALE_C:
333 0 : return (c <= (pg_wchar) 127 &&
334 0 : (pg_char_properties[c] & PG_ISALPHA));
335 0 : case PG_REGEX_LOCALE_WIDE:
336 : if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
337 0 : return iswalpha((wint_t) c);
338 : /* FALL THRU */
339 : case PG_REGEX_LOCALE_1BYTE:
340 0 : return (c <= (pg_wchar) UCHAR_MAX &&
341 0 : isalpha((unsigned char) c));
4382 tgl 342 CBC 6144 : case PG_REGEX_LOCALE_WIDE_L:
343 : #ifdef HAVE_LOCALE_T
344 : if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
2208 peter_e 345 6144 : return iswalpha_l((wint_t) c, pg_regex_locale->info.lt);
346 : #endif
347 : /* FALL THRU */
348 : case PG_REGEX_LOCALE_1BYTE_L:
349 : #ifdef HAVE_LOCALE_T
4382 tgl 350 UBC 0 : return (c <= (pg_wchar) UCHAR_MAX &&
2208 peter_e 351 0 : isalpha_l((unsigned char) c, pg_regex_locale->info.lt));
352 : #endif
353 : break;
2208 peter_e 354 CBC 8331 : case PG_REGEX_LOCALE_ICU:
355 : #ifdef USE_ICU
356 8331 : return u_isalpha(c);
357 : #endif
358 : break;
359 : }
4382 tgl 360 UBC 0 : return 0; /* can't get here, but keep compiler quiet */
361 : }
362 :
363 : static int
4382 tgl 364 CBC 34506 : pg_wc_isalnum(pg_wchar c)
365 : {
366 34506 : switch (pg_regex_strategy)
367 : {
368 821 : case PG_REGEX_LOCALE_C:
369 1642 : return (c <= (pg_wchar) 127 &&
370 821 : (pg_char_properties[c] & PG_ISALNUM));
4382 tgl 371 UBC 0 : case PG_REGEX_LOCALE_WIDE:
372 : if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
373 0 : return iswalnum((wint_t) c);
374 : /* FALL THRU */
375 : case PG_REGEX_LOCALE_1BYTE:
376 0 : return (c <= (pg_wchar) UCHAR_MAX &&
377 0 : isalnum((unsigned char) c));
4382 tgl 378 CBC 6144 : case PG_REGEX_LOCALE_WIDE_L:
379 : #ifdef HAVE_LOCALE_T
380 : if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
2208 peter_e 381 6144 : return iswalnum_l((wint_t) c, pg_regex_locale->info.lt);
382 : #endif
383 : /* FALL THRU */
384 : case PG_REGEX_LOCALE_1BYTE_L:
385 : #ifdef HAVE_LOCALE_T
4382 tgl 386 UBC 0 : return (c <= (pg_wchar) UCHAR_MAX &&
2208 peter_e 387 0 : isalnum_l((unsigned char) c, pg_regex_locale->info.lt));
388 : #endif
389 : break;
2208 peter_e 390 CBC 27541 : case PG_REGEX_LOCALE_ICU:
391 : #ifdef USE_ICU
392 27541 : return u_isalnum(c);
393 : #endif
394 : break;
395 : }
4382 tgl 396 UBC 0 : return 0; /* can't get here, but keep compiler quiet */
397 : }
398 :
399 : static int
773 tgl 400 CBC 16769 : pg_wc_isword(pg_wchar c)
401 : {
402 : /* We define word characters as alnum class plus underscore */
403 16769 : if (c == CHR('_'))
404 11 : return 1;
405 16758 : return pg_wc_isalnum(c);
406 : }
407 :
408 : static int
4382 409 14344 : pg_wc_isupper(pg_wchar c)
410 : {
411 14344 : switch (pg_regex_strategy)
412 : {
4382 tgl 413 UBC 0 : case PG_REGEX_LOCALE_C:
414 0 : return (c <= (pg_wchar) 127 &&
415 0 : (pg_char_properties[c] & PG_ISUPPER));
416 0 : case PG_REGEX_LOCALE_WIDE:
417 : if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
418 0 : return iswupper((wint_t) c);
419 : /* FALL THRU */
420 : case PG_REGEX_LOCALE_1BYTE:
421 0 : return (c <= (pg_wchar) UCHAR_MAX &&
422 0 : isupper((unsigned char) c));
4382 tgl 423 CBC 6144 : case PG_REGEX_LOCALE_WIDE_L:
424 : #ifdef HAVE_LOCALE_T
425 : if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
2208 peter_e 426 6144 : return iswupper_l((wint_t) c, pg_regex_locale->info.lt);
427 : #endif
428 : /* FALL THRU */
429 : case PG_REGEX_LOCALE_1BYTE_L:
430 : #ifdef HAVE_LOCALE_T
4382 tgl 431 UBC 0 : return (c <= (pg_wchar) UCHAR_MAX &&
2208 peter_e 432 0 : isupper_l((unsigned char) c, pg_regex_locale->info.lt));
433 : #endif
434 : break;
2208 peter_e 435 CBC 8200 : case PG_REGEX_LOCALE_ICU:
436 : #ifdef USE_ICU
437 8200 : return u_isupper(c);
438 : #endif
439 : break;
440 : }
4382 tgl 441 UBC 0 : return 0; /* can't get here, but keep compiler quiet */
442 : }
443 :
444 : static int
4382 tgl 445 CBC 14339 : pg_wc_islower(pg_wchar c)
446 : {
447 14339 : switch (pg_regex_strategy)
448 : {
4382 tgl 449 UBC 0 : case PG_REGEX_LOCALE_C:
450 0 : return (c <= (pg_wchar) 127 &&
451 0 : (pg_char_properties[c] & PG_ISLOWER));
452 0 : case PG_REGEX_LOCALE_WIDE:
453 : if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
454 0 : return iswlower((wint_t) c);
455 : /* FALL THRU */
456 : case PG_REGEX_LOCALE_1BYTE:
457 0 : return (c <= (pg_wchar) UCHAR_MAX &&
458 0 : islower((unsigned char) c));
4382 tgl 459 CBC 6144 : case PG_REGEX_LOCALE_WIDE_L:
460 : #ifdef HAVE_LOCALE_T
461 : if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
2208 peter_e 462 6144 : return iswlower_l((wint_t) c, pg_regex_locale->info.lt);
463 : #endif
464 : /* FALL THRU */
465 : case PG_REGEX_LOCALE_1BYTE_L:
466 : #ifdef HAVE_LOCALE_T
4382 tgl 467 UBC 0 : return (c <= (pg_wchar) UCHAR_MAX &&
2208 peter_e 468 0 : islower_l((unsigned char) c, pg_regex_locale->info.lt));
469 : #endif
470 : break;
2208 peter_e 471 CBC 8195 : case PG_REGEX_LOCALE_ICU:
472 : #ifdef USE_ICU
473 8195 : return u_islower(c);
474 : #endif
475 : break;
476 : }
4382 tgl 477 UBC 0 : return 0; /* can't get here, but keep compiler quiet */
478 : }
479 :
480 : static int
4382 tgl 481 CBC 14339 : pg_wc_isgraph(pg_wchar c)
482 : {
483 14339 : switch (pg_regex_strategy)
484 : {
4382 tgl 485 UBC 0 : case PG_REGEX_LOCALE_C:
486 0 : return (c <= (pg_wchar) 127 &&
487 0 : (pg_char_properties[c] & PG_ISGRAPH));
488 0 : case PG_REGEX_LOCALE_WIDE:
489 : if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
490 0 : return iswgraph((wint_t) c);
491 : /* FALL THRU */
492 : case PG_REGEX_LOCALE_1BYTE:
493 0 : return (c <= (pg_wchar) UCHAR_MAX &&
494 0 : isgraph((unsigned char) c));
4382 tgl 495 CBC 6144 : case PG_REGEX_LOCALE_WIDE_L:
496 : #ifdef HAVE_LOCALE_T
497 : if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
2208 peter_e 498 6144 : return iswgraph_l((wint_t) c, pg_regex_locale->info.lt);
499 : #endif
500 : /* FALL THRU */
501 : case PG_REGEX_LOCALE_1BYTE_L:
502 : #ifdef HAVE_LOCALE_T
4382 tgl 503 UBC 0 : return (c <= (pg_wchar) UCHAR_MAX &&
2208 peter_e 504 0 : isgraph_l((unsigned char) c, pg_regex_locale->info.lt));
505 : #endif
506 : break;
2208 peter_e 507 CBC 8195 : case PG_REGEX_LOCALE_ICU:
508 : #ifdef USE_ICU
509 8195 : return u_isgraph(c);
510 : #endif
511 : break;
512 : }
4382 tgl 513 UBC 0 : return 0; /* can't get here, but keep compiler quiet */
514 : }
515 :
516 : static int
4382 tgl 517 CBC 14339 : pg_wc_isprint(pg_wchar c)
518 : {
519 14339 : switch (pg_regex_strategy)
520 : {
4382 tgl 521 UBC 0 : case PG_REGEX_LOCALE_C:
522 0 : return (c <= (pg_wchar) 127 &&
523 0 : (pg_char_properties[c] & PG_ISPRINT));
524 0 : case PG_REGEX_LOCALE_WIDE:
525 : if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
526 0 : return iswprint((wint_t) c);
527 : /* FALL THRU */
528 : case PG_REGEX_LOCALE_1BYTE:
529 0 : return (c <= (pg_wchar) UCHAR_MAX &&
530 0 : isprint((unsigned char) c));
4382 tgl 531 CBC 6144 : case PG_REGEX_LOCALE_WIDE_L:
532 : #ifdef HAVE_LOCALE_T
533 : if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
2208 peter_e 534 6144 : return iswprint_l((wint_t) c, pg_regex_locale->info.lt);
535 : #endif
536 : /* FALL THRU */
537 : case PG_REGEX_LOCALE_1BYTE_L:
538 : #ifdef HAVE_LOCALE_T
4382 tgl 539 UBC 0 : return (c <= (pg_wchar) UCHAR_MAX &&
2208 peter_e 540 0 : isprint_l((unsigned char) c, pg_regex_locale->info.lt));
541 : #endif
542 : break;
2208 peter_e 543 CBC 8195 : case PG_REGEX_LOCALE_ICU:
544 : #ifdef USE_ICU
545 8195 : return u_isprint(c);
546 : #endif
547 : break;
548 : }
4382 tgl 549 UBC 0 : return 0; /* can't get here, but keep compiler quiet */
550 : }
551 :
552 : static int
4382 tgl 553 CBC 14339 : pg_wc_ispunct(pg_wchar c)
554 : {
555 14339 : switch (pg_regex_strategy)
556 : {
4382 tgl 557 UBC 0 : case PG_REGEX_LOCALE_C:
558 0 : return (c <= (pg_wchar) 127 &&
559 0 : (pg_char_properties[c] & PG_ISPUNCT));
560 0 : case PG_REGEX_LOCALE_WIDE:
561 : if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
562 0 : return iswpunct((wint_t) c);
563 : /* FALL THRU */
564 : case PG_REGEX_LOCALE_1BYTE:
565 0 : return (c <= (pg_wchar) UCHAR_MAX &&
566 0 : ispunct((unsigned char) c));
4382 tgl 567 CBC 6144 : case PG_REGEX_LOCALE_WIDE_L:
568 : #ifdef HAVE_LOCALE_T
569 : if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
2208 peter_e 570 6144 : return iswpunct_l((wint_t) c, pg_regex_locale->info.lt);
571 : #endif
572 : /* FALL THRU */
573 : case PG_REGEX_LOCALE_1BYTE_L:
574 : #ifdef HAVE_LOCALE_T
4382 tgl 575 UBC 0 : return (c <= (pg_wchar) UCHAR_MAX &&
2208 peter_e 576 0 : ispunct_l((unsigned char) c, pg_regex_locale->info.lt));
577 : #endif
578 : break;
2208 peter_e 579 CBC 8195 : case PG_REGEX_LOCALE_ICU:
580 : #ifdef USE_ICU
581 8195 : return u_ispunct(c);
582 : #endif
583 : break;
584 : }
4382 tgl 585 UBC 0 : return 0; /* can't get here, but keep compiler quiet */
586 : }
587 :
588 : static int
4382 tgl 589 CBC 44337 : pg_wc_isspace(pg_wchar c)
590 : {
591 44337 : switch (pg_regex_strategy)
592 : {
4382 tgl 593 UBC 0 : case PG_REGEX_LOCALE_C:
594 0 : return (c <= (pg_wchar) 127 &&
595 0 : (pg_char_properties[c] & PG_ISSPACE));
596 0 : case PG_REGEX_LOCALE_WIDE:
597 : if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
598 0 : return iswspace((wint_t) c);
599 : /* FALL THRU */
600 : case PG_REGEX_LOCALE_1BYTE:
601 0 : return (c <= (pg_wchar) UCHAR_MAX &&
602 0 : isspace((unsigned char) c));
4382 tgl 603 CBC 6144 : case PG_REGEX_LOCALE_WIDE_L:
604 : #ifdef HAVE_LOCALE_T
605 : if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
2208 peter_e 606 6144 : return iswspace_l((wint_t) c, pg_regex_locale->info.lt);
607 : #endif
608 : /* FALL THRU */
609 : case PG_REGEX_LOCALE_1BYTE_L:
610 : #ifdef HAVE_LOCALE_T
4382 tgl 611 UBC 0 : return (c <= (pg_wchar) UCHAR_MAX &&
2208 peter_e 612 0 : isspace_l((unsigned char) c, pg_regex_locale->info.lt));
613 : #endif
614 : break;
2208 peter_e 615 CBC 38193 : case PG_REGEX_LOCALE_ICU:
616 : #ifdef USE_ICU
617 38193 : return u_isspace(c);
618 : #endif
619 : break;
620 : }
4382 tgl 621 UBC 0 : return 0; /* can't get here, but keep compiler quiet */
622 : }
623 :
624 : static pg_wchar
4382 tgl 625 CBC 5258 : pg_wc_toupper(pg_wchar c)
626 : {
627 5258 : switch (pg_regex_strategy)
628 : {
629 537 : case PG_REGEX_LOCALE_C:
630 537 : if (c <= (pg_wchar) 127)
631 537 : return pg_ascii_toupper((unsigned char) c);
4382 tgl 632 UBC 0 : return c;
633 0 : case PG_REGEX_LOCALE_WIDE:
634 : /* force C behavior for ASCII characters, per comments above */
635 0 : if (c <= (pg_wchar) 127)
636 0 : return pg_ascii_toupper((unsigned char) c);
637 : if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
638 0 : return towupper((wint_t) c);
639 : /* FALL THRU */
640 : case PG_REGEX_LOCALE_1BYTE:
641 : /* force C behavior for ASCII characters, per comments above */
642 0 : if (c <= (pg_wchar) 127)
643 0 : return pg_ascii_toupper((unsigned char) c);
644 0 : if (c <= (pg_wchar) UCHAR_MAX)
645 0 : return toupper((unsigned char) c);
646 0 : return c;
4382 tgl 647 CBC 54 : case PG_REGEX_LOCALE_WIDE_L:
648 : #ifdef HAVE_LOCALE_T
649 : if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
2208 peter_e 650 54 : return towupper_l((wint_t) c, pg_regex_locale->info.lt);
651 : #endif
652 : /* FALL THRU */
653 : case PG_REGEX_LOCALE_1BYTE_L:
654 : #ifdef HAVE_LOCALE_T
4382 tgl 655 UBC 0 : if (c <= (pg_wchar) UCHAR_MAX)
2208 peter_e 656 0 : return toupper_l((unsigned char) c, pg_regex_locale->info.lt);
657 : #endif
4382 tgl 658 0 : return c;
2208 peter_e 659 CBC 4667 : case PG_REGEX_LOCALE_ICU:
660 : #ifdef USE_ICU
661 4667 : return u_toupper(c);
662 : #endif
663 : break;
664 : }
4382 tgl 665 UBC 0 : return 0; /* can't get here, but keep compiler quiet */
666 : }
667 :
668 : static pg_wchar
4382 tgl 669 CBC 5260 : pg_wc_tolower(pg_wchar c)
670 : {
671 5260 : switch (pg_regex_strategy)
672 : {
673 537 : case PG_REGEX_LOCALE_C:
674 537 : if (c <= (pg_wchar) 127)
675 537 : return pg_ascii_tolower((unsigned char) c);
4382 tgl 676 UBC 0 : return c;
677 0 : case PG_REGEX_LOCALE_WIDE:
678 : /* force C behavior for ASCII characters, per comments above */
679 0 : if (c <= (pg_wchar) 127)
680 0 : return pg_ascii_tolower((unsigned char) c);
681 : if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
682 0 : return towlower((wint_t) c);
683 : /* FALL THRU */
684 : case PG_REGEX_LOCALE_1BYTE:
685 : /* force C behavior for ASCII characters, per comments above */
686 0 : if (c <= (pg_wchar) 127)
687 0 : return pg_ascii_tolower((unsigned char) c);
688 0 : if (c <= (pg_wchar) UCHAR_MAX)
689 0 : return tolower((unsigned char) c);
690 0 : return c;
4382 tgl 691 CBC 54 : case PG_REGEX_LOCALE_WIDE_L:
692 : #ifdef HAVE_LOCALE_T
693 : if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
2208 peter_e 694 54 : return towlower_l((wint_t) c, pg_regex_locale->info.lt);
695 : #endif
696 : /* FALL THRU */
697 : case PG_REGEX_LOCALE_1BYTE_L:
698 : #ifdef HAVE_LOCALE_T
4382 tgl 699 UBC 0 : if (c <= (pg_wchar) UCHAR_MAX)
2208 peter_e 700 0 : return tolower_l((unsigned char) c, pg_regex_locale->info.lt);
701 : #endif
4382 tgl 702 0 : return c;
2208 peter_e 703 CBC 4669 : case PG_REGEX_LOCALE_ICU:
704 : #ifdef USE_ICU
705 4669 : return u_tolower(c);
706 : #endif
707 : break;
708 : }
4382 tgl 709 UBC 0 : return 0; /* can't get here, but keep compiler quiet */
710 : }
711 :
712 :
713 : /*
714 : * These functions cache the results of probing libc's ctype behavior for
715 : * all character codes of interest in a given encoding/collation. The
716 : * result is provided as a "struct cvec", but notice that the representation
717 : * is a touch different from a cvec created by regc_cvec.c: we allocate the
718 : * chrs[] and ranges[] arrays separately from the struct so that we can
719 : * realloc them larger at need. This is okay since the cvecs made here
720 : * should never be freed by freecvec().
721 : *
722 : * We use malloc not palloc since we mustn't lose control on out-of-memory;
723 : * the main regex code expects us to return a failure indication instead.
724 : */
725 :
726 : typedef int (*pg_wc_probefunc) (pg_wchar c);
727 :
728 : typedef struct pg_ctype_cache
729 : {
730 : pg_wc_probefunc probefunc; /* pg_wc_isalpha or a sibling */
731 : Oid collation; /* collation this entry is for */
732 : struct cvec cv; /* cache entry contents */
733 : struct pg_ctype_cache *next; /* chain link */
734 : } pg_ctype_cache;
735 :
736 : static pg_ctype_cache *pg_ctype_cache_list = NULL;
737 :
738 : /*
739 : * Add a chr or range to pcc->cv; return false if run out of memory
740 : */
741 : static bool
4067 tgl 742 CBC 5617 : store_match(pg_ctype_cache *pcc, pg_wchar chr1, int nchrs)
743 : {
744 : chr *newchrs;
745 :
746 5617 : if (nchrs > 1)
747 : {
748 1731 : if (pcc->cv.nranges >= pcc->cv.rangespace)
749 : {
4067 tgl 750 UBC 0 : pcc->cv.rangespace *= 2;
751 0 : newchrs = (chr *) realloc(pcc->cv.ranges,
752 0 : pcc->cv.rangespace * sizeof(chr) * 2);
753 0 : if (newchrs == NULL)
754 0 : return false;
755 0 : pcc->cv.ranges = newchrs;
756 : }
4067 tgl 757 CBC 1731 : pcc->cv.ranges[pcc->cv.nranges * 2] = chr1;
758 1731 : pcc->cv.ranges[pcc->cv.nranges * 2 + 1] = chr1 + nchrs - 1;
759 1731 : pcc->cv.nranges++;
760 : }
761 : else
762 : {
763 3886 : assert(nchrs == 1);
764 3886 : if (pcc->cv.nchrs >= pcc->cv.chrspace)
765 : {
766 14 : pcc->cv.chrspace *= 2;
767 14 : newchrs = (chr *) realloc(pcc->cv.chrs,
768 14 : pcc->cv.chrspace * sizeof(chr));
769 14 : if (newchrs == NULL)
4067 tgl 770 UBC 0 : return false;
4067 tgl 771 CBC 14 : pcc->cv.chrs = newchrs;
772 : }
773 3886 : pcc->cv.chrs[pcc->cv.nchrs++] = chr1;
774 : }
775 5617 : return true;
776 : }
777 :
778 : /*
779 : * Given a probe function (e.g., pg_wc_isalpha) get a struct cvec for all
780 : * chrs satisfying the probe function. The active collation is the one
781 : * previously set by pg_set_regex_collation. Return NULL if out of memory.
782 : *
783 : * Note that the result must not be freed or modified by caller.
784 : */
785 : static struct cvec *
2407 786 363 : pg_ctype_get_cache(pg_wc_probefunc probefunc, int cclasscode)
787 : {
788 : pg_ctype_cache *pcc;
789 : pg_wchar max_chr;
790 : pg_wchar cur_chr;
791 : int nmatches;
792 : chr *newchrs;
793 :
794 : /*
795 : * Do we already have the answer cached?
796 : */
4067 797 919 : for (pcc = pg_ctype_cache_list; pcc != NULL; pcc = pcc->next)
798 : {
799 794 : if (pcc->probefunc == probefunc &&
800 262 : pcc->collation == pg_regex_collation)
801 238 : return &pcc->cv;
802 : }
803 :
804 : /*
805 : * Nope, so initialize some workspace ...
806 : */
807 125 : pcc = (pg_ctype_cache *) malloc(sizeof(pg_ctype_cache));
808 125 : if (pcc == NULL)
4067 tgl 809 UBC 0 : return NULL;
4067 tgl 810 CBC 125 : pcc->probefunc = probefunc;
811 125 : pcc->collation = pg_regex_collation;
812 125 : pcc->cv.nchrs = 0;
813 125 : pcc->cv.chrspace = 128;
814 125 : pcc->cv.chrs = (chr *) malloc(pcc->cv.chrspace * sizeof(chr));
815 125 : pcc->cv.nranges = 0;
816 125 : pcc->cv.rangespace = 64;
817 125 : pcc->cv.ranges = (chr *) malloc(pcc->cv.rangespace * sizeof(chr) * 2);
818 125 : if (pcc->cv.chrs == NULL || pcc->cv.ranges == NULL)
4067 tgl 819 UBC 0 : goto out_of_memory;
2407 tgl 820 CBC 125 : pcc->cv.cclasscode = cclasscode;
821 :
822 : /*
823 : * Decide how many character codes we ought to look through. In general
824 : * we don't go past MAX_SIMPLE_CHR; chr codes above that are handled at
825 : * runtime using the "high colormap" mechanism. However, in C locale
826 : * there's no need to go further than 127, and if we only have a 1-byte
827 : * <ctype.h> API there's no need to go further than that can handle.
828 : *
829 : * If it's not MAX_SIMPLE_CHR that's constraining the search, mark the
830 : * output cvec as not having any locale-dependent behavior, since there
831 : * will be no need to do any run-time locale checks. (The #if's here
832 : * would always be true for production values of MAX_SIMPLE_CHR, but it's
833 : * useful to allow it to be small for testing purposes.)
834 : */
4067 835 125 : switch (pg_regex_strategy)
836 : {
837 10 : case PG_REGEX_LOCALE_C:
838 : #if MAX_SIMPLE_CHR >= 127
839 10 : max_chr = (pg_wchar) 127;
2407 840 10 : pcc->cv.cclasscode = -1;
841 : #else
842 : max_chr = (pg_wchar) MAX_SIMPLE_CHR;
843 : #endif
4067 844 10 : break;
845 27 : case PG_REGEX_LOCALE_WIDE:
846 : case PG_REGEX_LOCALE_WIDE_L:
2407 847 27 : max_chr = (pg_wchar) MAX_SIMPLE_CHR;
4067 848 27 : break;
4067 tgl 849 UBC 0 : case PG_REGEX_LOCALE_1BYTE:
850 : case PG_REGEX_LOCALE_1BYTE_L:
851 : #if MAX_SIMPLE_CHR >= UCHAR_MAX
852 0 : max_chr = (pg_wchar) UCHAR_MAX;
2407 853 0 : pcc->cv.cclasscode = -1;
854 : #else
855 : max_chr = (pg_wchar) MAX_SIMPLE_CHR;
856 : #endif
4067 857 0 : break;
2208 peter_e 858 CBC 88 : case PG_REGEX_LOCALE_ICU:
859 88 : max_chr = (pg_wchar) MAX_SIMPLE_CHR;
860 88 : break;
4067 tgl 861 UBC 0 : default:
862 0 : max_chr = 0; /* can't get here, but keep compiler quiet */
863 0 : break;
864 : }
865 :
866 : /*
867 : * And scan 'em ...
868 : */
4067 tgl 869 CBC 125 : nmatches = 0; /* number of consecutive matches */
870 :
871 236925 : for (cur_chr = 0; cur_chr <= max_chr; cur_chr++)
872 : {
873 236800 : if ((*probefunc) (cur_chr))
874 71695 : nmatches++;
875 165105 : else if (nmatches > 0)
876 : {
877 5600 : if (!store_match(pcc, cur_chr - nmatches, nmatches))
4067 tgl 878 UBC 0 : goto out_of_memory;
4067 tgl 879 CBC 5600 : nmatches = 0;
880 : }
881 : }
882 :
883 125 : if (nmatches > 0)
884 17 : if (!store_match(pcc, cur_chr - nmatches, nmatches))
4067 tgl 885 UBC 0 : goto out_of_memory;
886 :
887 : /*
888 : * We might have allocated more memory than needed, if so free it
889 : */
4067 tgl 890 CBC 125 : if (pcc->cv.nchrs == 0)
891 : {
892 43 : free(pcc->cv.chrs);
893 43 : pcc->cv.chrs = NULL;
894 43 : pcc->cv.chrspace = 0;
895 : }
896 82 : else if (pcc->cv.nchrs < pcc->cv.chrspace)
897 : {
898 82 : newchrs = (chr *) realloc(pcc->cv.chrs,
899 82 : pcc->cv.nchrs * sizeof(chr));
900 82 : if (newchrs == NULL)
4067 tgl 901 UBC 0 : goto out_of_memory;
4067 tgl 902 CBC 82 : pcc->cv.chrs = newchrs;
903 82 : pcc->cv.chrspace = pcc->cv.nchrs;
904 : }
905 125 : if (pcc->cv.nranges == 0)
906 : {
4067 tgl 907 UBC 0 : free(pcc->cv.ranges);
908 0 : pcc->cv.ranges = NULL;
909 0 : pcc->cv.rangespace = 0;
910 : }
4067 tgl 911 CBC 125 : else if (pcc->cv.nranges < pcc->cv.rangespace)
912 : {
913 125 : newchrs = (chr *) realloc(pcc->cv.ranges,
914 125 : pcc->cv.nranges * sizeof(chr) * 2);
915 125 : if (newchrs == NULL)
4067 tgl 916 UBC 0 : goto out_of_memory;
4067 tgl 917 CBC 125 : pcc->cv.ranges = newchrs;
918 125 : pcc->cv.rangespace = pcc->cv.nranges;
919 : }
920 :
921 : /*
922 : * Success, link it into cache chain
923 : */
924 125 : pcc->next = pg_ctype_cache_list;
925 125 : pg_ctype_cache_list = pcc;
926 :
927 125 : return &pcc->cv;
928 :
929 : /*
930 : * Failure, clean up
931 : */
4067 tgl 932 UBC 0 : out_of_memory:
297 peter 933 UNC 0 : free(pcc->cv.chrs);
934 0 : free(pcc->cv.ranges);
4067 tgl 935 UBC 0 : free(pcc);
936 :
4067 tgl 937 UIC 0 : return NULL;
938 : }
|