Age Owner TLA Line data Source code
1 : /*
2 : * contrib/pg_trgm/trgm_gin.c
3 : */
4 : #include "postgres.h"
5 :
6 : #include "access/gin.h"
7 : #include "access/stratnum.h"
8 : #include "fmgr.h"
9 : #include "trgm.h"
10 : #include "varatt.h"
11 :
5870 teodor 12 UIC 0 : PG_FUNCTION_INFO_V1(gin_extract_trgm);
4451 tgl 13 GBC 4 : PG_FUNCTION_INFO_V1(gin_extract_value_trgm);
4451 tgl 14 CBC 4 : PG_FUNCTION_INFO_V1(gin_extract_query_trgm);
5870 teodor 15 4 : PG_FUNCTION_INFO_V1(gin_trgm_consistent);
2820 16 4 : PG_FUNCTION_INFO_V1(gin_trgm_triconsistent);
5870 teodor 17 ECB :
18 : /*
19 : * This function can only be called if a pre-9.1 version of the GIN operator
20 : * class definition is present in the catalogs (probably as a consequence
21 : * of upgrade-in-place). Cope.
22 : */
23 : Datum
5870 teodor 24 UIC 0 : gin_extract_trgm(PG_FUNCTION_ARGS)
4451 tgl 25 EUB : {
4434 tgl 26 UIC 0 : if (PG_NARGS() == 3)
4434 tgl 27 UBC 0 : return gin_extract_value_trgm(fcinfo);
28 0 : if (PG_NARGS() == 7)
29 0 : return gin_extract_query_trgm(fcinfo);
30 0 : elog(ERROR, "unexpected number of arguments to gin_extract_trgm");
4451 tgl 31 EUB : PG_RETURN_NULL();
32 : }
33 :
34 : Datum
4451 tgl 35 GIC 2404 : gin_extract_value_trgm(PG_FUNCTION_ARGS)
5870 teodor 36 ECB : {
2219 noah 37 GIC 2404 : text *val = (text *) PG_GETARG_TEXT_PP(0);
5624 bruce 38 CBC 2404 : int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
39 2404 : Datum *entries = NULL;
5624 bruce 40 ECB : TRGM *trg;
41 : int32 trglen;
42 :
5870 teodor 43 GIC 2404 : *nentries = 0;
5624 bruce 44 ECB :
2219 noah 45 GIC 2404 : trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val));
5870 teodor 46 CBC 2404 : trglen = ARRNELEM(trg);
5624 bruce 47 ECB :
5870 teodor 48 GIC 2404 : if (trglen > 0)
5870 teodor 49 ECB : {
50 : trgm *ptr;
51 : int32 i;
52 :
4473 tgl 53 GIC 2404 : *nentries = trglen;
5870 teodor 54 CBC 2404 : entries = (Datum *) palloc(sizeof(Datum) * trglen);
5870 teodor 55 ECB :
5870 teodor 56 GIC 2404 : ptr = GETARR(trg);
4473 tgl 57 CBC 35631 : for (i = 0; i < trglen; i++)
5870 teodor 58 ECB : {
4382 bruce 59 GIC 33227 : int32 item = trgm2int(ptr);
5624 bruce 60 ECB :
4473 tgl 61 GIC 33227 : entries[i] = Int32GetDatum(item);
5870 teodor 62 CBC 33227 : ptr++;
5870 teodor 63 ECB : }
64 : }
65 :
5870 teodor 66 GIC 2404 : PG_RETURN_POINTER(entries);
5870 teodor 67 ECB : }
68 :
69 : Datum
4451 tgl 70 GIC 172 : gin_extract_query_trgm(PG_FUNCTION_ARGS)
4451 tgl 71 ECB : {
2219 noah 72 GIC 172 : text *val = (text *) PG_GETARG_TEXT_PP(0);
4451 tgl 73 CBC 172 : int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
74 172 : StrategyNumber strategy = PG_GETARG_UINT16(2);
4382 bruce 75 ECB :
76 : /* bool **pmatch = (bool **) PG_GETARG_POINTER(3); */
3652 tgl 77 GIC 172 : Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4);
3652 tgl 78 ECB :
79 : /* bool **nullFlags = (bool **) PG_GETARG_POINTER(5); */
4382 bruce 80 GIC 172 : int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
4451 tgl 81 CBC 172 : Datum *entries = NULL;
4451 tgl 82 ECB : TRGM *trg;
83 : int32 trglen;
84 : trgm *ptr;
85 : TrgmPackedGraph *graph;
86 : int32 i;
87 :
4451 tgl 88 GIC 172 : switch (strategy)
4451 tgl 89 ECB : {
4451 tgl 90 GIC 80 : case SimilarityStrategyNumber:
2580 teodor 91 ECB : case WordSimilarityStrategyNumber:
92 : case StrictWordSimilarityStrategyNumber:
93 : case EqualStrategyNumber:
2219 noah 94 GIC 80 : trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val));
4451 tgl 95 CBC 80 : break;
96 48 : case ILikeStrategyNumber:
4451 tgl 97 ECB : #ifndef IGNORECASE
98 : elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
99 : #endif
100 : /* FALL THRU */
101 : case LikeStrategyNumber:
102 :
103 : /*
104 : * For wildcard search we extract all the trigrams that every
105 : * potentially-matching string must include.
106 : */
2219 noah 107 GIC 48 : trg = generate_wildcard_trgm(VARDATA_ANY(val),
2219 noah 108 CBC 48 : VARSIZE_ANY_EXHDR(val));
4451 tgl 109 48 : break;
3652 110 44 : case RegExpICaseStrategyNumber:
3652 tgl 111 ECB : #ifndef IGNORECASE
112 : elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
113 : #endif
114 : /* FALL THRU */
115 : case RegExpStrategyNumber:
3651 tgl 116 GIC 44 : trg = createTrgmNFA(val, PG_GET_COLLATION(),
3651 tgl 117 ECB : &graph, CurrentMemoryContext);
3652 tgl 118 GIC 44 : if (trg && ARRNELEM(trg) > 0)
3652 tgl 119 ECB : {
120 : /*
121 : * Successful regex processing: store NFA-like graph as
122 : * extra_data. GIN API requires an array of nentries
123 : * Pointers, but we just put the same value in each element.
124 : */
3652 tgl 125 GIC 34 : trglen = ARRNELEM(trg);
3652 tgl 126 CBC 34 : *extra_data = (Pointer *) palloc(sizeof(Pointer) * trglen);
127 848 : for (i = 0; i < trglen; i++)
128 814 : (*extra_data)[i] = (Pointer) graph;
3652 tgl 129 ECB : }
130 : else
131 : {
132 : /* No result: have to do full index scan. */
3652 tgl 133 GIC 10 : *nentries = 0;
3652 tgl 134 CBC 10 : *searchMode = GIN_SEARCH_MODE_ALL;
135 10 : PG_RETURN_POINTER(entries);
3652 tgl 136 ECB : }
3652 tgl 137 GIC 34 : break;
4451 tgl 138 LBC 0 : default:
4451 tgl 139 UBC 0 : elog(ERROR, "unrecognized strategy number: %d", strategy);
4382 bruce 140 EUB : trg = NULL; /* keep compiler quiet */
141 : break;
142 : }
143 :
4451 tgl 144 GIC 162 : trglen = ARRNELEM(trg);
4451 tgl 145 CBC 162 : *nentries = trglen;
4451 tgl 146 ECB :
4451 tgl 147 GIC 162 : if (trglen > 0)
4451 tgl 148 ECB : {
4451 tgl 149 GIC 138 : entries = (Datum *) palloc(sizeof(Datum) * trglen);
4451 tgl 150 CBC 138 : ptr = GETARR(trg);
151 1684 : for (i = 0; i < trglen; i++)
4451 tgl 152 ECB : {
4382 bruce 153 GIC 1546 : int32 item = trgm2int(ptr);
4451 tgl 154 ECB :
4451 tgl 155 GIC 1546 : entries[i] = Int32GetDatum(item);
4451 tgl 156 CBC 1546 : ptr++;
4451 tgl 157 ECB : }
158 : }
159 :
160 : /*
161 : * If no trigram was extracted then we have to scan all the index.
162 : */
4451 tgl 163 GIC 162 : if (trglen == 0)
4451 tgl 164 CBC 24 : *searchMode = GIN_SEARCH_MODE_ALL;
4451 tgl 165 ECB :
4451 tgl 166 GIC 162 : PG_RETURN_POINTER(entries);
4451 tgl 167 ECB : }
168 :
169 : Datum
5870 teodor 170 GIC 8 : gin_trgm_consistent(PG_FUNCTION_ARGS)
5870 teodor 171 ECB : {
5624 bruce 172 GIC 8 : bool *check = (bool *) PG_GETARG_POINTER(0);
4451 tgl 173 CBC 8 : StrategyNumber strategy = PG_GETARG_UINT16(1);
4382 bruce 174 ECB :
175 : /* text *query = PG_GETARG_TEXT_PP(2); */
4473 tgl 176 GIC 8 : int32 nkeys = PG_GETARG_INT32(3);
3652 tgl 177 CBC 8 : Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
5128 178 8 : bool *recheck = (bool *) PG_GETARG_POINTER(5);
4451 tgl 179 ECB : bool res;
180 : int32 i,
181 : ntrue;
182 : double nlimit;
183 :
184 : /* All cases served by this function are inexact */
5473 tgl 185 GIC 8 : *recheck = true;
5473 tgl 186 ECB :
4451 tgl 187 GIC 8 : switch (strategy)
4473 tgl 188 ECB : {
4451 tgl 189 UIC 0 : case SimilarityStrategyNumber:
2580 teodor 190 EUB : case WordSimilarityStrategyNumber:
191 : case StrictWordSimilarityStrategyNumber:
1845 teodor 192 UIC 0 : nlimit = index_strategy_get_limit(strategy);
2580 teodor 193 EUB :
194 : /* Count the matches */
4451 tgl 195 UIC 0 : ntrue = 0;
4451 tgl 196 UBC 0 : for (i = 0; i < nkeys; i++)
4451 tgl 197 EUB : {
4451 tgl 198 UIC 0 : if (check[i])
4451 tgl 199 UBC 0 : ntrue++;
4451 tgl 200 EUB : }
201 :
202 : /*--------------------
203 : * If DIVUNION is defined then similarity formula is:
204 : * c / (len1 + len2 - c)
205 : * where c is number of common trigrams and it stands as ntrue in
206 : * this code. Here we don't know value of len2 but we can assume
207 : * that c (ntrue) is a lower bound of len2, so upper bound of
208 : * similarity is:
209 : * c / (len1 + c - c) => c / len1
210 : * If DIVUNION is not defined then similarity formula is:
211 : * c / max(len1, len2)
212 : * And again, c (ntrue) is a lower bound of len2, but c <= len1
213 : * just by definition and, consequently, upper bound of
214 : * similarity is just c / len1.
215 : * So, independently on DIVUNION the upper bound formula is the same.
216 : */
2580 teodor 217 UIC 0 : res = (nkeys == 0) ? false :
2580 teodor 218 UBC 0 : (((((float4) ntrue) / ((float4) nkeys))) >= nlimit);
4451 tgl 219 0 : break;
4451 tgl 220 GBC 8 : case ILikeStrategyNumber:
4451 tgl 221 ECB : #ifndef IGNORECASE
222 : elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
223 : #endif
224 : /* FALL THRU */
225 : case LikeStrategyNumber:
226 : case EqualStrategyNumber:
227 : /* Check if all extracted trigrams are presented. */
4451 tgl 228 GIC 8 : res = true;
4451 tgl 229 CBC 16 : for (i = 0; i < nkeys; i++)
4451 tgl 230 ECB : {
4451 tgl 231 GIC 8 : if (!check[i])
4451 tgl 232 ECB : {
4451 tgl 233 UIC 0 : res = false;
4451 tgl 234 UBC 0 : break;
4451 tgl 235 EUB : }
236 : }
4451 tgl 237 GIC 8 : break;
3652 tgl 238 LBC 0 : case RegExpICaseStrategyNumber:
3652 tgl 239 EUB : #ifndef IGNORECASE
240 : elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
241 : #endif
242 : /* FALL THRU */
243 : case RegExpStrategyNumber:
3652 tgl 244 UIC 0 : if (nkeys < 1)
3652 tgl 245 EUB : {
246 : /* Regex processing gave no result: do full index scan */
3652 tgl 247 UIC 0 : res = true;
3652 tgl 248 EUB : }
249 : else
3652 tgl 250 UIC 0 : res = trigramsMatchGraph((TrgmPackedGraph *) extra_data[0],
3652 tgl 251 EUB : check);
3652 tgl 252 UIC 0 : break;
4451 tgl 253 UBC 0 : default:
254 0 : elog(ERROR, "unrecognized strategy number: %d", strategy);
4451 tgl 255 EUB : res = false; /* keep compiler quiet */
256 : break;
257 : }
258 :
5870 teodor 259 GIC 8 : PG_RETURN_BOOL(res);
5870 teodor 260 ECB : }
261 :
262 : /*
263 : * In all cases, GIN_TRUE is at least as favorable to inclusion as
264 : * GIN_MAYBE. If no better option is available, simply treat
265 : * GIN_MAYBE as if it were GIN_TRUE and apply the same test as the binary
266 : * consistent function.
267 : */
268 : Datum
2820 teodor 269 GIC 14358 : gin_trgm_triconsistent(PG_FUNCTION_ARGS)
2820 teodor 270 ECB : {
2495 rhaas 271 GIC 14358 : GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
2820 teodor 272 CBC 14358 : StrategyNumber strategy = PG_GETARG_UINT16(1);
2820 teodor 273 ECB :
274 : /* text *query = PG_GETARG_TEXT_PP(2); */
2820 teodor 275 GIC 14358 : int32 nkeys = PG_GETARG_INT32(3);
2820 teodor 276 CBC 14358 : Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
2495 rhaas 277 14358 : GinTernaryValue res = GIN_MAYBE;
2820 teodor 278 ECB : int32 i,
279 : ntrue;
280 : bool *boolcheck;
281 : double nlimit;
282 :
2820 teodor 283 GIC 14358 : switch (strategy)
2820 teodor 284 ECB : {
2820 teodor 285 GIC 8191 : case SimilarityStrategyNumber:
2580 teodor 286 ECB : case WordSimilarityStrategyNumber:
287 : case StrictWordSimilarityStrategyNumber:
1845 teodor 288 GIC 8191 : nlimit = index_strategy_get_limit(strategy);
2580 teodor 289 ECB :
290 : /* Count the matches */
2820 teodor 291 GIC 8191 : ntrue = 0;
2820 teodor 292 CBC 86909 : for (i = 0; i < nkeys; i++)
2820 teodor 293 ECB : {
2820 teodor 294 GIC 78718 : if (check[i] != GIN_FALSE)
2820 teodor 295 CBC 33172 : ntrue++;
2820 teodor 296 ECB : }
297 :
298 : /*
299 : * See comment in gin_trgm_consistent() about * upper bound
300 : * formula
301 : */
2580 teodor 302 GIC 16382 : res = (nkeys == 0)
2580 teodor 303 CBC 8191 : ? GIN_FALSE : (((((float4) ntrue) / ((float4) nkeys)) >= nlimit)
2495 rhaas 304 ECB : ? GIN_MAYBE : GIN_FALSE);
2820 teodor 305 GIC 8191 : break;
2820 teodor 306 CBC 4046 : case ILikeStrategyNumber:
2820 teodor 307 ECB : #ifndef IGNORECASE
308 : elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
309 : #endif
310 : /* FALL THRU */
311 : case LikeStrategyNumber:
312 : case EqualStrategyNumber:
313 : /* Check if all extracted trigrams are presented. */
2820 teodor 314 GIC 4046 : res = GIN_MAYBE;
2820 teodor 315 CBC 8182 : for (i = 0; i < nkeys; i++)
2820 teodor 316 ECB : {
2820 teodor 317 GIC 4156 : if (check[i] == GIN_FALSE)
2820 teodor 318 ECB : {
2820 teodor 319 GIC 20 : res = GIN_FALSE;
2820 teodor 320 CBC 20 : break;
2820 teodor 321 ECB : }
322 : }
2820 teodor 323 GIC 4046 : break;
2820 teodor 324 CBC 2121 : case RegExpICaseStrategyNumber:
2820 teodor 325 ECB : #ifndef IGNORECASE
326 : elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
327 : #endif
328 : /* FALL THRU */
329 : case RegExpStrategyNumber:
2820 teodor 330 GIC 2121 : if (nkeys < 1)
2820 teodor 331 ECB : {
332 : /* Regex processing gave no result: do full index scan */
2820 teodor 333 GIC 736 : res = GIN_MAYBE;
2820 teodor 334 ECB : }
335 : else
336 : {
337 : /*
338 : * As trigramsMatchGraph implements a monotonic boolean
339 : * function, promoting all GIN_MAYBE keys to GIN_TRUE will
340 : * give a conservative result.
341 : */
2820 teodor 342 GIC 1385 : boolcheck = (bool *) palloc(sizeof(bool) * nkeys);
2820 teodor 343 CBC 319289 : for (i = 0; i < nkeys; i++)
344 317904 : boolcheck[i] = (check[i] != GIN_FALSE);
345 1385 : if (!trigramsMatchGraph((TrgmPackedGraph *) extra_data[0],
2820 teodor 346 ECB : boolcheck))
2820 teodor 347 GIC 6 : res = GIN_FALSE;
2820 teodor 348 CBC 1385 : pfree(boolcheck);
2820 teodor 349 ECB : }
2820 teodor 350 GIC 2121 : break;
2820 teodor 351 LBC 0 : default:
2820 teodor 352 UBC 0 : elog(ERROR, "unrecognized strategy number: %d", strategy);
2495 rhaas 353 EUB : res = GIN_FALSE; /* keep compiler quiet */
354 : break;
355 : }
356 :
357 : /* All cases served by this function are inexact */
2820 teodor 358 GIC 14358 : Assert(res != GIN_TRUE);
2820 teodor 359 CBC 14358 : PG_RETURN_GIN_TERNARY_VALUE(res);
2820 teodor 360 ECB : }
|