Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * like.c
4 : * like expression handling code.
5 : *
6 : * NOTES
7 : * A big hack of the regexp.c code!! Contributed by
8 : * Keith Parks <emkxp01@mtcc.demon.co.uk> (7/95).
9 : *
10 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
11 : * Portions Copyright (c) 1994, Regents of the University of California
12 : *
13 : * IDENTIFICATION
14 : * src/backend/utils/adt/like.c
15 : *
16 : *-------------------------------------------------------------------------
17 : */
18 : #include "postgres.h"
19 :
20 : #include <ctype.h>
21 :
22 : #include "catalog/pg_collation.h"
23 : #include "mb/pg_wchar.h"
24 : #include "miscadmin.h"
25 : #include "utils/builtins.h"
26 : #include "utils/pg_locale.h"
27 : #include "varatt.h"
28 :
29 :
30 : #define LIKE_TRUE 1
31 : #define LIKE_FALSE 0
32 : #define LIKE_ABORT (-1)
33 :
34 :
35 : static int SB_MatchText(const char *t, int tlen, const char *p, int plen,
36 : pg_locale_t locale, bool locale_is_c);
37 : static text *SB_do_like_escape(text *pat, text *esc);
38 :
39 : static int MB_MatchText(const char *t, int tlen, const char *p, int plen,
40 : pg_locale_t locale, bool locale_is_c);
41 : static text *MB_do_like_escape(text *pat, text *esc);
42 :
43 : static int UTF8_MatchText(const char *t, int tlen, const char *p, int plen,
44 : pg_locale_t locale, bool locale_is_c);
45 :
46 : static int SB_IMatchText(const char *t, int tlen, const char *p, int plen,
47 : pg_locale_t locale, bool locale_is_c);
48 :
49 : static int GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation);
50 : static int Generic_Text_IC_like(text *str, text *pat, Oid collation);
51 :
52 : /*--------------------
53 : * Support routine for MatchText. Compares given multibyte streams
54 : * as wide characters. If they match, returns 1 otherwise returns 0.
55 : *--------------------
56 : */
57 : static inline int
1521 peter 58 GIC 462 : wchareq(const char *p1, const char *p2)
8281 lockhart 59 ECB : {
60 : int p1_len;
61 :
62 : /* Optimization: quickly compare the first byte. */
6385 bruce 63 GIC 462 : if (*p1 != *p2)
6297 neilc 64 CBC 348 : return 0;
6528 bruce 65 ECB :
6528 bruce 66 GIC 114 : p1_len = pg_mblen(p1);
6528 bruce 67 CBC 114 : if (pg_mblen(p2) != p1_len)
6297 neilc 68 LBC 0 : return 0;
6528 bruce 69 EUB :
70 : /* They are the same length */
6528 bruce 71 GIC 228 : while (p1_len--)
8053 bruce 72 ECB : {
8241 tgl 73 GIC 114 : if (*p1++ != *p2++)
6297 neilc 74 LBC 0 : return 0;
8241 tgl 75 EUB : }
6297 neilc 76 GIC 114 : return 1;
8241 tgl 77 ECB : }
78 :
79 : /*
80 : * Formerly we had a routine iwchareq() here that tried to do case-insensitive
81 : * comparison of multibyte characters. It did not work at all, however,
82 : * because it relied on tolower() which has a single-byte API ... and
83 : * towlower() wouldn't be much better since we have no suitably cheap way
84 : * of getting a single character transformed to the system's wchar_t format.
85 : * So now, we just downcase the strings using lower() and apply regular LIKE
86 : * comparison. This should be revisited when we install better locale support.
87 : */
88 :
89 : /*
90 : * We do handle case-insensitive matching for single-byte encodings using
91 : * fold-on-the-fly processing, however.
92 : */
93 : static char
4383 tgl 94 UIC 0 : SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
4383 tgl 95 EUB : {
4383 tgl 96 UIC 0 : if (locale_is_c)
4383 tgl 97 UBC 0 : return pg_ascii_tolower(c);
4383 tgl 98 EUB : #ifdef HAVE_LOCALE_T
4383 tgl 99 UIC 0 : else if (locale)
2208 peter_e 100 UBC 0 : return tolower_l(c, locale->info.lt);
4383 tgl 101 EUB : #endif
102 : else
4383 tgl 103 UIC 0 : return pg_tolower(c);
4383 tgl 104 EUB : }
105 :
106 :
107 : #define NextByte(p, plen) ((p)++, (plen)--)
108 :
109 : /* Set up to compile like_match.c for multibyte characters */
110 : #define CHAREQ(p1, p2) wchareq((p1), (p2))
111 : #define NextChar(p, plen) \
112 : do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0)
113 : #define CopyAdvChar(dst, src, srclen) \
114 : do { int __l = pg_mblen(src); \
115 : (srclen) -= __l; \
116 : while (__l-- > 0) \
117 : *(dst)++ = *(src)++; \
118 : } while (0)
119 :
120 : #define MatchText MB_MatchText
121 : #define do_like_escape MB_do_like_escape
122 :
123 : #include "like_match.c"
124 :
125 : /* Set up to compile like_match.c for single-byte characters */
126 : #define CHAREQ(p1, p2) (*(p1) == *(p2))
127 : #define NextChar(p, plen) NextByte((p), (plen))
128 : #define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
129 :
130 : #define MatchText SB_MatchText
131 : #define do_like_escape SB_do_like_escape
132 :
133 : #include "like_match.c"
134 :
135 : /* setup to compile like_match.c for single byte case insensitive matches */
136 : #define MATCH_LOWER(t) SB_lower_char((unsigned char) (t), locale, locale_is_c)
137 : #define NextChar(p, plen) NextByte((p), (plen))
138 : #define MatchText SB_IMatchText
139 :
140 : #include "like_match.c"
141 :
142 : /* setup to compile like_match.c for UTF8 encoding, using fast NextChar */
143 :
144 : #define NextChar(p, plen) \
145 : do { (p)++; (plen)--; } while ((plen) > 0 && (*(p) & 0xC0) == 0x80 )
146 : #define MatchText UTF8_MatchText
147 :
148 : #include "like_match.c"
149 :
150 : /* Generic for all cases not requiring inline case-folding */
151 : static inline int
1479 peter 152 GIC 1411360 : GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation)
5790 andrew 153 ECB : {
444 peter 154 GIC 1411360 : if (collation && !lc_ctype_is_c(collation))
1479 peter 155 ECB : {
1418 tgl 156 GIC 89009 : pg_locale_t locale = pg_newlocale_from_collation(collation);
1479 peter 157 ECB :
45 jdavis 158 GNC 89009 : if (!pg_locale_deterministic(locale))
1479 peter 159 CBC 12 : ereport(ERROR,
1479 peter 160 ECB : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
161 : errmsg("nondeterministic collations are not supported for LIKE")));
162 : }
163 :
5790 andrew 164 GIC 1411348 : if (pg_database_encoding_max_length() == 1)
4383 tgl 165 CBC 21384 : return SB_MatchText(s, slen, p, plen, 0, true);
5790 andrew 166 1389964 : else if (GetDatabaseEncoding() == PG_UTF8)
4383 tgl 167 1389964 : return UTF8_MatchText(s, slen, p, plen, 0, true);
5790 andrew 168 ECB : else
4383 tgl 169 UIC 0 : return MB_MatchText(s, slen, p, plen, 0, true);
5790 andrew 170 EUB : }
171 :
172 : static inline int
4443 peter_e 173 GIC 45760 : Generic_Text_IC_like(text *str, text *pat, Oid collation)
5790 andrew 174 ECB : {
175 : char *s,
176 : *p;
177 : int slen,
178 : plen;
2208 peter_e 179 GIC 45760 : pg_locale_t locale = 0;
2208 peter_e 180 CBC 45760 : bool locale_is_c = false;
2208 peter_e 181 ECB :
444 peter 182 GIC 45760 : if (!OidIsValid(collation))
444 peter 183 ECB : {
184 : /*
185 : * This typically means that the parser could not resolve a conflict
186 : * of implicit collations, so report it that way.
187 : */
444 peter 188 UIC 0 : ereport(ERROR,
444 peter 189 EUB : (errcode(ERRCODE_INDETERMINATE_COLLATION),
190 : errmsg("could not determine which collation to use for ILIKE"),
191 : errhint("Use the COLLATE clause to set the collation explicitly.")));
192 : }
193 :
2208 peter_e 194 GIC 45760 : if (lc_ctype_is_c(collation))
2208 peter_e 195 CBC 11455 : locale_is_c = true;
444 peter 196 ECB : else
2208 peter_e 197 GIC 34305 : locale = pg_newlocale_from_collation(collation);
1479 peter 198 ECB :
45 jdavis 199 GNC 45760 : if (!pg_locale_deterministic(locale))
444 peter 200 CBC 6 : ereport(ERROR,
444 peter 201 ECB : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
202 : errmsg("nondeterministic collations are not supported for ILIKE")));
203 :
204 : /*
205 : * For efficiency reasons, in the single byte case we don't call lower()
206 : * on the pattern and text, but instead call SB_lower_char on each
207 : * character. In the multi-byte case we don't have much choice :-(. Also,
208 : * ICU does not support single-character case folding, so we go the long
209 : * way.
210 : */
211 :
2208 peter_e 212 GIC 45754 : if (pg_database_encoding_max_length() > 1 || (locale && locale->provider == COLLPROVIDER_ICU))
5678 andrew 213 ECB : {
2219 noah 214 GIC 45754 : pat = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
2219 noah 215 ECB : PointerGetDatum(pat)));
2219 noah 216 GIC 45754 : p = VARDATA_ANY(pat);
2219 noah 217 CBC 45754 : plen = VARSIZE_ANY_EXHDR(pat);
218 45754 : str = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
2219 noah 219 ECB : PointerGetDatum(str)));
2219 noah 220 GIC 45754 : s = VARDATA_ANY(str);
2219 noah 221 CBC 45754 : slen = VARSIZE_ANY_EXHDR(str);
5678 andrew 222 45754 : if (GetDatabaseEncoding() == PG_UTF8)
4383 tgl 223 45754 : return UTF8_MatchText(s, slen, p, plen, 0, true);
5678 andrew 224 ECB : else
4383 tgl 225 UIC 0 : return MB_MatchText(s, slen, p, plen, 0, true);
5678 andrew 226 EUB : }
227 : else
228 : {
5678 andrew 229 UIC 0 : p = VARDATA_ANY(pat);
5678 andrew 230 UBC 0 : plen = VARSIZE_ANY_EXHDR(pat);
231 0 : s = VARDATA_ANY(str);
232 0 : slen = VARSIZE_ANY_EXHDR(str);
4383 tgl 233 0 : return SB_IMatchText(s, slen, p, plen, locale, locale_is_c);
5678 andrew 234 EUB : }
235 : }
236 :
237 : /*
238 : * interface routines called by the function manager
239 : */
240 :
241 : Datum
8241 tgl 242 GIC 81542 : namelike(PG_FUNCTION_ARGS)
8281 lockhart 243 ECB : {
8278 lockhart 244 GIC 81542 : Name str = PG_GETARG_NAME(0);
5679 tgl 245 CBC 81542 : text *pat = PG_GETARG_TEXT_PP(1);
8241 tgl 246 ECB : bool result;
247 : char *s,
248 : *p;
249 : int slen,
250 : plen;
251 :
8278 lockhart 252 GIC 81542 : s = NameStr(*str);
8278 lockhart 253 CBC 81542 : slen = strlen(s);
5679 tgl 254 81542 : p = VARDATA_ANY(pat);
255 81542 : plen = VARSIZE_ANY_EXHDR(pat);
8278 lockhart 256 ECB :
1479 peter 257 GIC 81542 : result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE);
8278 lockhart 258 ECB :
8278 lockhart 259 GIC 81542 : PG_RETURN_BOOL(result);
8281 lockhart 260 ECB : }
261 :
262 : Datum
8241 tgl 263 GIC 2649 : namenlike(PG_FUNCTION_ARGS)
8281 lockhart 264 ECB : {
8278 lockhart 265 GIC 2649 : Name str = PG_GETARG_NAME(0);
5679 tgl 266 CBC 2649 : text *pat = PG_GETARG_TEXT_PP(1);
8241 tgl 267 ECB : bool result;
268 : char *s,
269 : *p;
270 : int slen,
271 : plen;
272 :
8278 lockhart 273 GIC 2649 : s = NameStr(*str);
8278 lockhart 274 CBC 2649 : slen = strlen(s);
5679 tgl 275 2649 : p = VARDATA_ANY(pat);
276 2649 : plen = VARSIZE_ANY_EXHDR(pat);
8278 lockhart 277 ECB :
1479 peter 278 GIC 2649 : result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE);
8278 lockhart 279 ECB :
8278 lockhart 280 GIC 2649 : PG_RETURN_BOOL(result);
9770 scrappy 281 ECB : }
282 :
283 : Datum
8281 lockhart 284 GIC 1171131 : textlike(PG_FUNCTION_ARGS)
8281 lockhart 285 ECB : {
5679 tgl 286 GIC 1171131 : text *str = PG_GETARG_TEXT_PP(0);
5679 tgl 287 CBC 1171131 : text *pat = PG_GETARG_TEXT_PP(1);
8278 lockhart 288 ECB : bool result;
289 : char *s,
290 : *p;
291 : int slen,
292 : plen;
293 :
5679 tgl 294 GIC 1171131 : s = VARDATA_ANY(str);
5679 tgl 295 CBC 1171131 : slen = VARSIZE_ANY_EXHDR(str);
296 1171131 : p = VARDATA_ANY(pat);
297 1171131 : plen = VARSIZE_ANY_EXHDR(pat);
8278 lockhart 298 ECB :
1479 peter 299 GIC 1171131 : result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE);
8278 lockhart 300 ECB :
8278 lockhart 301 GIC 1171119 : PG_RETURN_BOOL(result);
8281 lockhart 302 ECB : }
303 :
304 : Datum
8241 tgl 305 GIC 156038 : textnlike(PG_FUNCTION_ARGS)
8281 lockhart 306 ECB : {
5679 tgl 307 GIC 156038 : text *str = PG_GETARG_TEXT_PP(0);
5679 tgl 308 CBC 156038 : text *pat = PG_GETARG_TEXT_PP(1);
8278 lockhart 309 ECB : bool result;
310 : char *s,
311 : *p;
312 : int slen,
313 : plen;
314 :
5679 tgl 315 GIC 156038 : s = VARDATA_ANY(str);
5679 tgl 316 CBC 156038 : slen = VARSIZE_ANY_EXHDR(str);
317 156038 : p = VARDATA_ANY(pat);
318 156038 : plen = VARSIZE_ANY_EXHDR(pat);
8278 lockhart 319 ECB :
1479 peter 320 GIC 156038 : result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE);
8278 lockhart 321 ECB :
8278 lockhart 322 GIC 156038 : PG_RETURN_BOOL(result);
8281 lockhart 323 ECB : }
324 :
325 : Datum
7877 bruce 326 GIC 6 : bytealike(PG_FUNCTION_ARGS)
7877 bruce 327 ECB : {
5679 tgl 328 GIC 6 : bytea *str = PG_GETARG_BYTEA_PP(0);
5679 tgl 329 CBC 6 : bytea *pat = PG_GETARG_BYTEA_PP(1);
7877 bruce 330 ECB : bool result;
331 : char *s,
332 : *p;
333 : int slen,
334 : plen;
335 :
5679 tgl 336 GIC 6 : s = VARDATA_ANY(str);
5679 tgl 337 CBC 6 : slen = VARSIZE_ANY_EXHDR(str);
338 6 : p = VARDATA_ANY(pat);
339 6 : plen = VARSIZE_ANY_EXHDR(pat);
7877 bruce 340 ECB :
4383 tgl 341 GIC 6 : result = (SB_MatchText(s, slen, p, plen, 0, true) == LIKE_TRUE);
7877 bruce 342 ECB :
7877 bruce 343 GIC 6 : PG_RETURN_BOOL(result);
7877 bruce 344 ECB : }
345 :
346 : Datum
7877 bruce 347 GIC 6 : byteanlike(PG_FUNCTION_ARGS)
7877 bruce 348 ECB : {
5679 tgl 349 GIC 6 : bytea *str = PG_GETARG_BYTEA_PP(0);
5679 tgl 350 CBC 6 : bytea *pat = PG_GETARG_BYTEA_PP(1);
7877 bruce 351 ECB : bool result;
352 : char *s,
353 : *p;
354 : int slen,
355 : plen;
356 :
5679 tgl 357 GIC 6 : s = VARDATA_ANY(str);
5679 tgl 358 CBC 6 : slen = VARSIZE_ANY_EXHDR(str);
359 6 : p = VARDATA_ANY(pat);
360 6 : plen = VARSIZE_ANY_EXHDR(pat);
7877 bruce 361 ECB :
4383 tgl 362 GIC 6 : result = (SB_MatchText(s, slen, p, plen, 0, true) != LIKE_TRUE);
7877 bruce 363 ECB :
7877 bruce 364 GIC 6 : PG_RETURN_BOOL(result);
7877 bruce 365 ECB : }
366 :
367 : /*
368 : * Case-insensitive versions
369 : */
370 :
371 : Datum
8241 tgl 372 GIC 11448 : nameiclike(PG_FUNCTION_ARGS)
9770 scrappy 373 ECB : {
8278 lockhart 374 GIC 11448 : Name str = PG_GETARG_NAME(0);
5679 tgl 375 CBC 11448 : text *pat = PG_GETARG_TEXT_PP(1);
8278 lockhart 376 ECB : bool result;
377 : text *strtext;
378 :
2219 noah 379 GIC 11448 : strtext = DatumGetTextPP(DirectFunctionCall1(name_text,
2219 noah 380 ECB : NameGetDatum(str)));
4443 peter_e 381 GIC 11448 : result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) == LIKE_TRUE);
8278 lockhart 382 ECB :
8278 lockhart 383 GIC 11448 : PG_RETURN_BOOL(result);
9770 scrappy 384 ECB : }
385 :
386 : Datum
8241 tgl 387 GIC 3 : nameicnlike(PG_FUNCTION_ARGS)
8281 lockhart 388 ECB : {
8278 lockhart 389 GIC 3 : Name str = PG_GETARG_NAME(0);
5679 tgl 390 CBC 3 : text *pat = PG_GETARG_TEXT_PP(1);
8278 lockhart 391 ECB : bool result;
392 : text *strtext;
393 :
2219 noah 394 GIC 3 : strtext = DatumGetTextPP(DirectFunctionCall1(name_text,
2219 noah 395 ECB : NameGetDatum(str)));
4443 peter_e 396 GIC 3 : result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) != LIKE_TRUE);
8278 lockhart 397 ECB :
8278 lockhart 398 GIC 3 : PG_RETURN_BOOL(result);
8281 lockhart 399 ECB : }
400 :
401 : Datum
8241 tgl 402 GIC 34281 : texticlike(PG_FUNCTION_ARGS)
9770 scrappy 403 ECB : {
5679 tgl 404 GIC 34281 : text *str = PG_GETARG_TEXT_PP(0);
5679 tgl 405 CBC 34281 : text *pat = PG_GETARG_TEXT_PP(1);
8241 tgl 406 ECB : bool result;
407 :
4443 peter_e 408 GIC 34281 : result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) == LIKE_TRUE);
8278 lockhart 409 ECB :
8278 lockhart 410 GIC 34275 : PG_RETURN_BOOL(result);
9770 scrappy 411 ECB : }
412 :
413 : Datum
8241 tgl 414 GIC 28 : texticnlike(PG_FUNCTION_ARGS)
8281 lockhart 415 ECB : {
5679 tgl 416 GIC 28 : text *str = PG_GETARG_TEXT_PP(0);
5679 tgl 417 CBC 28 : text *pat = PG_GETARG_TEXT_PP(1);
8241 tgl 418 ECB : bool result;
419 :
4443 peter_e 420 GIC 28 : result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) != LIKE_TRUE);
8278 lockhart 421 ECB :
8278 lockhart 422 GIC 28 : PG_RETURN_BOOL(result);
8281 lockhart 423 ECB : }
424 :
425 : /*
426 : * like_escape() --- given a pattern and an ESCAPE string,
427 : * convert the pattern to use Postgres' standard backslash escape convention.
428 : */
429 : Datum
8241 tgl 430 GIC 106 : like_escape(PG_FUNCTION_ARGS)
8281 lockhart 431 ECB : {
5679 tgl 432 GIC 106 : text *pat = PG_GETARG_TEXT_PP(0);
5679 tgl 433 CBC 106 : text *esc = PG_GETARG_TEXT_PP(1);
8241 tgl 434 ECB : text *result;
435 :
7857 ishii 436 GIC 106 : if (pg_database_encoding_max_length() == 1)
5790 andrew 437 LBC 0 : result = SB_do_like_escape(pat, esc);
8241 tgl 438 EUB : else
7836 bruce 439 GIC 106 : result = MB_do_like_escape(pat, esc);
8278 lockhart 440 ECB :
8241 tgl 441 GIC 106 : PG_RETURN_TEXT_P(result);
9770 scrappy 442 ECB : }
443 :
444 : /*
445 : * like_escape_bytea() --- given a pattern and an ESCAPE string,
446 : * convert the pattern to use Postgres' standard backslash escape convention.
447 : */
448 : Datum
7877 bruce 449 GIC 6 : like_escape_bytea(PG_FUNCTION_ARGS)
7877 bruce 450 ECB : {
5679 tgl 451 GIC 6 : bytea *pat = PG_GETARG_BYTEA_PP(0);
5679 tgl 452 CBC 6 : bytea *esc = PG_GETARG_BYTEA_PP(1);
5624 bruce 453 6 : bytea *result = SB_do_like_escape((text *) pat, (text *) esc);
7877 bruce 454 ECB :
5624 bruce 455 GIC 6 : PG_RETURN_BYTEA_P((bytea *) result);
7877 bruce 456 ECB : }
|