Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * tsginidx.c
4 : * GIN support functions for tsvector_ops
5 : *
6 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7 : *
8 : *
9 : * IDENTIFICATION
10 : * src/backend/utils/adt/tsginidx.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 : #include "postgres.h"
15 :
16 : #include "access/gin.h"
17 : #include "access/stratnum.h"
18 : #include "miscadmin.h"
19 : #include "tsearch/ts_type.h"
20 : #include "tsearch/ts_utils.h"
21 : #include "utils/builtins.h"
22 : #include "varatt.h"
23 :
24 :
25 : Datum
5441 tgl 26 GIC 904978 : gin_cmp_tslexeme(PG_FUNCTION_ARGS)
5441 tgl 27 ECB : {
5050 bruce 28 GIC 904978 : text *a = PG_GETARG_TEXT_PP(0);
5050 bruce 29 CBC 904978 : text *b = PG_GETARG_TEXT_PP(1);
5050 bruce 30 ECB : int cmp;
31 :
4473 tgl 32 GIC 1809956 : cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a),
5050 bruce 33 CBC 1809956 : VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b),
5050 bruce 34 ECB : false);
35 :
5050 bruce 36 GIC 904978 : PG_FREE_IF_COPY(a, 0);
5050 bruce 37 CBC 904978 : PG_FREE_IF_COPY(b, 1);
38 904978 : PG_RETURN_INT32(cmp);
5441 tgl 39 ECB : }
40 :
41 : Datum
5441 tgl 42 GIC 222 : gin_cmp_prefix(PG_FUNCTION_ARGS)
5441 tgl 43 ECB : {
5050 bruce 44 GIC 222 : text *a = PG_GETARG_TEXT_PP(0);
5050 bruce 45 CBC 222 : text *b = PG_GETARG_TEXT_PP(1);
5050 bruce 46 ECB :
47 : #ifdef NOT_USED
48 : StrategyNumber strategy = PG_GETARG_UINT16(2);
49 : Pointer extra_data = PG_GETARG_POINTER(3);
50 : #endif
51 : int cmp;
52 :
4473 tgl 53 GIC 444 : cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a),
5050 bruce 54 CBC 444 : VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b),
5050 bruce 55 ECB : true);
56 :
5050 bruce 57 GIC 222 : if (cmp < 0)
5050 bruce 58 CBC 6 : cmp = 1; /* prevent continue scan */
5441 tgl 59 ECB :
5050 bruce 60 GIC 222 : PG_FREE_IF_COPY(a, 0);
5050 bruce 61 CBC 222 : PG_FREE_IF_COPY(b, 1);
62 222 : PG_RETURN_INT32(cmp);
5441 tgl 63 ECB : }
64 :
65 : Datum
5710 tgl 66 GIC 1548 : gin_extract_tsvector(PG_FUNCTION_ARGS)
5710 tgl 67 ECB : {
5710 tgl 68 GIC 1548 : TSVector vector = PG_GETARG_TSVECTOR(0);
5693 teodor 69 CBC 1548 : int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
5710 tgl 70 1548 : Datum *entries = NULL;
5710 tgl 71 ECB :
5689 teodor 72 GIC 1548 : *nentries = vector->size;
5710 tgl 73 CBC 1548 : if (vector->size > 0)
5710 tgl 74 ECB : {
75 : int i;
5710 tgl 76 GIC 1521 : WordEntry *we = ARRPTR(vector);
5710 tgl 77 ECB :
5710 tgl 78 GIC 1521 : entries = (Datum *) palloc(sizeof(Datum) * vector->size);
5710 tgl 79 ECB :
5710 tgl 80 GIC 87987 : for (i = 0; i < vector->size; i++)
5710 tgl 81 ECB : {
82 : text *txt;
83 :
5493 tgl 84 GIC 86466 : txt = cstring_to_text_with_len(STRPTR(vector) + we->pos, we->len);
5710 tgl 85 CBC 86466 : entries[i] = PointerGetDatum(txt);
5710 tgl 86 ECB :
5710 tgl 87 GIC 86466 : we++;
5710 tgl 88 ECB : }
89 : }
90 :
5710 tgl 91 GIC 1548 : PG_FREE_IF_COPY(vector, 0);
5710 tgl 92 CBC 1548 : PG_RETURN_POINTER(entries);
5710 tgl 93 ECB : }
94 :
95 : Datum
5611 tgl 96 GIC 225 : gin_extract_tsquery(PG_FUNCTION_ARGS)
5710 tgl 97 ECB : {
5710 tgl 98 GIC 225 : TSQuery query = PG_GETARG_TSQUERY(0);
5693 teodor 99 CBC 225 : int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
4382 bruce 100 ECB :
101 : /* StrategyNumber strategy = PG_GETARG_UINT16(2); */
5050 bruce 102 GIC 225 : bool **ptr_partialmatch = (bool **) PG_GETARG_POINTER(3);
5050 bruce 103 CBC 225 : Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4);
4382 bruce 104 ECB :
105 : /* bool **nullFlags = (bool **) PG_GETARG_POINTER(5); */
4473 tgl 106 GIC 225 : int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
5710 tgl 107 CBC 225 : Datum *entries = NULL;
5710 tgl 108 ECB :
5710 tgl 109 GIC 225 : *nentries = 0;
5710 tgl 110 ECB :
5710 tgl 111 GIC 225 : if (query->size > 0)
5710 tgl 112 ECB : {
4473 tgl 113 GIC 225 : QueryItem *item = GETQUERY(query);
3940 peter_e 114 ECB : int32 i,
115 : j;
116 : bool *partialmatch;
117 : int *map_item_operand;
118 :
119 : /*
120 : * If the query doesn't have any required positive matches (for
121 : * instance, it's something like '! foo'), we have to do a full index
122 : * scan.
123 : */
4473 tgl 124 GIC 225 : if (tsquery_requires_match(item))
4473 tgl 125 CBC 165 : *searchMode = GIN_SEARCH_MODE_DEFAULT;
4473 tgl 126 ECB : else
4473 tgl 127 GIC 60 : *searchMode = GIN_SEARCH_MODE_ALL;
5710 tgl 128 ECB :
129 : /* count number of VAL items */
4473 tgl 130 GIC 225 : j = 0;
5710 tgl 131 CBC 852 : for (i = 0; i < query->size; i++)
4473 tgl 132 ECB : {
5693 teodor 133 GIC 627 : if (item[i].type == QI_VAL)
4473 tgl 134 CBC 384 : j++;
4473 tgl 135 ECB : }
4473 tgl 136 GIC 225 : *nentries = j;
5710 tgl 137 ECB :
4473 tgl 138 GIC 225 : entries = (Datum *) palloc(sizeof(Datum) * j);
4473 tgl 139 CBC 225 : partialmatch = *ptr_partialmatch = (bool *) palloc(sizeof(bool) * j);
5710 tgl 140 ECB :
141 : /*
142 : * Make map to convert item's number to corresponding operand's (the
143 : * same, entry's) number. Entry's number is used in check array in
144 : * consistent method. We use the same map for each entry.
145 : */
4473 tgl 146 GIC 225 : *extra_data = (Pointer *) palloc(sizeof(Pointer) * j);
4473 tgl 147 CBC 225 : map_item_operand = (int *) palloc0(sizeof(int) * query->size);
5128 tgl 148 ECB :
149 : /* Now rescan the VAL items and fill in the arrays */
4473 tgl 150 GIC 225 : j = 0;
5710 tgl 151 CBC 852 : for (i = 0; i < query->size; i++)
4473 tgl 152 ECB : {
5693 teodor 153 GIC 627 : if (item[i].type == QI_VAL)
5710 tgl 154 ECB : {
5015 peter_e 155 GIC 384 : QueryOperand *val = &item[i].qoperand;
4473 tgl 156 ECB : text *txt;
157 :
5493 tgl 158 GIC 384 : txt = cstring_to_text_with_len(GETOPERAND(query) + val->distance,
5493 tgl 159 CBC 384 : val->length);
4473 160 384 : entries[j] = PointerGetDatum(txt);
161 384 : partialmatch[j] = val->prefix;
5050 bruce 162 384 : (*extra_data)[j] = (Pointer) map_item_operand;
5128 tgl 163 384 : map_item_operand[i] = j;
4473 164 384 : j++;
5710 tgl 165 ECB : }
166 : }
167 : }
168 :
5710 tgl 169 GIC 225 : PG_FREE_IF_COPY(query, 0);
5710 tgl 170 ECB :
5710 tgl 171 GIC 225 : PG_RETURN_POINTER(entries);
5710 tgl 172 ECB : }
173 :
174 : typedef struct
175 : {
176 : QueryItem *first_item;
177 : GinTernaryValue *check;
178 : int *map_item_operand;
179 : } GinChkVal;
180 :
181 : /*
182 : * TS_execute callback for matching a tsquery operand to GIN index data
183 : */
184 : static TSTernaryValue
989 tgl 185 GIC 24183 : checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data)
5710 tgl 186 ECB : {
989 tgl 187 GIC 24183 : GinChkVal *gcv = (GinChkVal *) checkval;
5128 tgl 188 ECB : int j;
189 : GinTernaryValue result;
190 :
191 : /* convert item's number to corresponding entry's (operand's) number */
5050 bruce 192 GIC 24183 : j = gcv->map_item_operand[((QueryItem *) val) - gcv->first_item];
5128 tgl 193 ECB :
194 : /* determine presence of current entry in indexed value */
782 tgl 195 GIC 24183 : result = gcv->check[j];
782 tgl 196 ECB :
197 : /*
198 : * If any val requiring a weight is used or caller needs position
199 : * information then we must recheck, so replace TRUE with MAYBE.
200 : */
782 tgl 201 GIC 24183 : if (result == GIN_TRUE)
3315 heikki.linnakangas 202 ECB : {
989 tgl 203 GIC 7827 : if (val->weight != 0 || data != NULL)
782 tgl 204 CBC 3249 : result = GIN_MAYBE;
3315 heikki.linnakangas 205 ECB : }
206 :
207 : /*
208 : * We rely on GinTernaryValue and TSTernaryValue using equivalent value
209 : * assignments. We could use a switch statement to map the values if that
210 : * ever stops being true, but it seems unlikely to happen.
211 : */
782 tgl 212 GIC 24183 : return (TSTernaryValue) result;
3315 heikki.linnakangas 213 ECB : }
214 :
215 : Datum
5611 tgl 216 GIC 12 : gin_tsquery_consistent(PG_FUNCTION_ARGS)
5710 tgl 217 ECB : {
5710 tgl 218 GIC 12 : bool *check = (bool *) PG_GETARG_POINTER(0);
5050 bruce 219 ECB :
220 : /* StrategyNumber strategy = PG_GETARG_UINT16(1); */
5710 tgl 221 GIC 12 : TSQuery query = PG_GETARG_TSQUERY(2);
5050 bruce 222 ECB :
223 : /* int32 nkeys = PG_GETARG_INT32(3); */
5050 bruce 224 GIC 12 : Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
5128 tgl 225 CBC 12 : bool *recheck = (bool *) PG_GETARG_POINTER(5);
2062 peter_e 226 12 : bool res = false;
5710 tgl 227 ECB :
228 : /* Initially assume query doesn't require recheck */
5473 tgl 229 GIC 12 : *recheck = false;
5473 tgl 230 ECB :
5710 tgl 231 GIC 12 : if (query->size > 0)
5710 tgl 232 ECB : {
233 : GinChkVal gcv;
234 :
235 : /*
236 : * check-parameter array has one entry for each value (operand) in the
237 : * query.
238 : */
2305 tgl 239 GIC 12 : gcv.first_item = GETQUERY(query);
1844 peter_e 240 CBC 12 : gcv.check = (GinTernaryValue *) check;
5050 bruce 241 GIC 12 : gcv.map_item_operand = (int *) (extra_data[0]);
5710 tgl 242 ECB :
782 tgl 243 GIC 12 : switch (TS_execute_ternary(GETQUERY(query),
244 : &gcv,
245 : TS_EXEC_PHRASE_NO_POS,
246 : checkcondition_gin))
782 tgl 247 EUB : {
782 tgl 248 UBC 0 : case TS_NO:
249 0 : res = false;
782 tgl 250 LBC 0 : break;
782 tgl 251 CBC 12 : case TS_YES:
252 12 : res = true;
782 tgl 253 GBC 12 : break;
782 tgl 254 UBC 0 : case TS_MAYBE:
255 0 : res = true;
256 0 : *recheck = true;
782 tgl 257 UIC 0 : break;
258 : }
259 : }
5710 tgl 260 ECB :
5710 tgl 261 GIC 12 : PG_RETURN_BOOL(res);
262 : }
263 :
3315 heikki.linnakangas 264 ECB : Datum
3315 heikki.linnakangas 265 GIC 18459 : gin_tsquery_triconsistent(PG_FUNCTION_ARGS)
3315 heikki.linnakangas 266 ECB : {
3296 heikki.linnakangas 267 GIC 18459 : GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
268 :
3315 heikki.linnakangas 269 ECB : /* StrategyNumber strategy = PG_GETARG_UINT16(1); */
3315 heikki.linnakangas 270 GIC 18459 : TSQuery query = PG_GETARG_TSQUERY(2);
271 :
3315 heikki.linnakangas 272 ECB : /* int32 nkeys = PG_GETARG_INT32(3); */
3315 heikki.linnakangas 273 CBC 18459 : Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
3296 heikki.linnakangas 274 GIC 18459 : GinTernaryValue res = GIN_FALSE;
3301 heikki.linnakangas 275 ECB :
3315 heikki.linnakangas 276 GIC 18459 : if (query->size > 0)
277 : {
278 : GinChkVal gcv;
279 :
280 : /*
281 : * check-parameter array has one entry for each value (operand) in the
282 : * query.
3315 heikki.linnakangas 283 ECB : */
2305 tgl 284 CBC 18459 : gcv.first_item = GETQUERY(query);
3315 heikki.linnakangas 285 18459 : gcv.check = check;
3315 heikki.linnakangas 286 GIC 18459 : gcv.map_item_operand = (int *) (extra_data[0]);
3315 heikki.linnakangas 287 ECB :
782 tgl 288 GIC 18459 : res = TS_execute_ternary(GETQUERY(query),
289 : &gcv,
290 : TS_EXEC_PHRASE_NO_POS,
291 : checkcondition_gin);
292 : }
3315 heikki.linnakangas 293 ECB :
3296 heikki.linnakangas 294 GIC 18459 : PG_RETURN_GIN_TERNARY_VALUE(res);
295 : }
296 :
297 : /*
298 : * Formerly, gin_extract_tsvector had only two arguments. Now it has three,
299 : * but we still need a pg_proc entry with two args to support reloading
300 : * pre-9.1 contrib/tsearch2 opclass declarations. This compatibility
301 : * function should go away eventually. (Note: you might say "hey, but the
302 : * code above is only *using* two args, so let's just declare it that way".
303 : * If you try that you'll find the opr_sanity regression test complains.)
304 : */
4435 tgl 305 EUB : Datum
4435 tgl 306 UIC 0 : gin_extract_tsvector_2args(PG_FUNCTION_ARGS)
4435 tgl 307 EUB : {
4435 tgl 308 UBC 0 : if (PG_NARGS() < 3) /* should not happen */
309 0 : elog(ERROR, "gin_extract_tsvector requires three arguments");
4435 tgl 310 UIC 0 : return gin_extract_tsvector(fcinfo);
311 : }
312 :
313 : /*
314 : * Likewise, we need a stub version of gin_extract_tsquery declared with
315 : * only five arguments.
316 : */
4435 tgl 317 EUB : Datum
4435 tgl 318 UIC 0 : gin_extract_tsquery_5args(PG_FUNCTION_ARGS)
4435 tgl 319 EUB : {
4435 tgl 320 UBC 0 : if (PG_NARGS() < 7) /* should not happen */
321 0 : elog(ERROR, "gin_extract_tsquery requires seven arguments");
4435 tgl 322 UIC 0 : return gin_extract_tsquery(fcinfo);
323 : }
324 :
325 : /*
326 : * Likewise, we need a stub version of gin_tsquery_consistent declared with
327 : * only six arguments.
328 : */
4435 tgl 329 EUB : Datum
4435 tgl 330 UIC 0 : gin_tsquery_consistent_6args(PG_FUNCTION_ARGS)
4435 tgl 331 EUB : {
4435 tgl 332 UBC 0 : if (PG_NARGS() < 8) /* should not happen */
333 0 : elog(ERROR, "gin_tsquery_consistent requires eight arguments");
4435 tgl 334 UIC 0 : return gin_tsquery_consistent(fcinfo);
335 : }
336 :
337 : /*
338 : * Likewise, a stub version of gin_extract_tsquery declared with argument
339 : * types that are no longer considered appropriate.
340 : */
2594 tgl 341 EUB : Datum
2594 tgl 342 UIC 0 : gin_extract_tsquery_oldsig(PG_FUNCTION_ARGS)
2594 tgl 343 EUB : {
2594 tgl 344 UIC 0 : return gin_extract_tsquery(fcinfo);
345 : }
346 :
347 : /*
348 : * Likewise, a stub version of gin_tsquery_consistent declared with argument
349 : * types that are no longer considered appropriate.
350 : */
2594 tgl 351 EUB : Datum
2594 tgl 352 UIC 0 : gin_tsquery_consistent_oldsig(PG_FUNCTION_ARGS)
2594 tgl 353 EUB : {
2594 tgl 354 UIC 0 : return gin_tsquery_consistent(fcinfo);
355 : }
|