Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * _int_selfuncs.c
4 : * Functions for selectivity estimation of intarray operators
5 : *
6 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * contrib/intarray/_int_selfuncs.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "_int.h"
18 : #include "access/htup_details.h"
19 : #include "catalog/pg_operator.h"
20 : #include "catalog/pg_statistic.h"
21 : #include "catalog/pg_type.h"
22 : #include "miscadmin.h"
23 : #include "utils/builtins.h"
24 : #include "utils/lsyscache.h"
25 : #include "utils/selfuncs.h"
26 : #include "utils/syscache.h"
27 :
2819 heikki.linnakangas 28 CBC 2 : PG_FUNCTION_INFO_V1(_int_overlap_sel);
29 2 : PG_FUNCTION_INFO_V1(_int_contains_sel);
30 2 : PG_FUNCTION_INFO_V1(_int_contained_sel);
31 1 : PG_FUNCTION_INFO_V1(_int_overlap_joinsel);
32 1 : PG_FUNCTION_INFO_V1(_int_contains_joinsel);
33 1 : PG_FUNCTION_INFO_V1(_int_contained_joinsel);
34 2 : PG_FUNCTION_INFO_V1(_int_matchsel);
35 :
36 :
37 : static Selectivity int_query_opr_selec(ITEM *item, Datum *mcelems, float4 *mcefreqs,
38 : int nmcelems, float4 minfreq);
39 : static int compare_val_int4(const void *a, const void *b);
40 :
41 : /*
42 : * Wrappers around the default array selectivity estimation functions.
43 : *
44 : * The default array selectivity operators for the @>, && and @< operators
45 : * work fine for integer arrays. However, if we tried to just use arraycontsel
46 : * and arraycontjoinsel directly as the cost estimator functions for our
47 : * operators, they would not work as intended, because they look at the
48 : * operator's OID. Our operators behave exactly like the built-in anyarray
49 : * versions, but we must tell the cost estimator functions which built-in
50 : * operators they correspond to. These wrappers just replace the operator
51 : * OID with the corresponding built-in operator's OID, and call the built-in
52 : * function.
53 : */
54 :
55 : Datum
56 6 : _int_overlap_sel(PG_FUNCTION_ARGS)
57 : {
58 6 : PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
59 : PG_GETARG_DATUM(0),
60 : ObjectIdGetDatum(OID_ARRAY_OVERLAP_OP),
61 : PG_GETARG_DATUM(2),
62 : PG_GETARG_DATUM(3)));
63 : }
64 :
65 : Datum
66 24 : _int_contains_sel(PG_FUNCTION_ARGS)
67 : {
68 24 : PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
69 : PG_GETARG_DATUM(0),
70 : ObjectIdGetDatum(OID_ARRAY_CONTAINS_OP),
71 : PG_GETARG_DATUM(2),
72 : PG_GETARG_DATUM(3)));
73 : }
74 :
75 : Datum
76 6 : _int_contained_sel(PG_FUNCTION_ARGS)
77 : {
78 6 : PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
79 : PG_GETARG_DATUM(0),
80 : ObjectIdGetDatum(OID_ARRAY_CONTAINED_OP),
81 : PG_GETARG_DATUM(2),
82 : PG_GETARG_DATUM(3)));
83 : }
84 :
85 : Datum
2819 heikki.linnakangas 86 UBC 0 : _int_overlap_joinsel(PG_FUNCTION_ARGS)
87 : {
88 0 : PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
89 : PG_GETARG_DATUM(0),
90 : ObjectIdGetDatum(OID_ARRAY_OVERLAP_OP),
91 : PG_GETARG_DATUM(2),
92 : PG_GETARG_DATUM(3),
93 : PG_GETARG_DATUM(4)));
94 : }
95 :
96 : Datum
97 0 : _int_contains_joinsel(PG_FUNCTION_ARGS)
98 : {
99 0 : PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
100 : PG_GETARG_DATUM(0),
101 : ObjectIdGetDatum(OID_ARRAY_CONTAINS_OP),
102 : PG_GETARG_DATUM(2),
103 : PG_GETARG_DATUM(3),
104 : PG_GETARG_DATUM(4)));
105 : }
106 :
107 : Datum
108 0 : _int_contained_joinsel(PG_FUNCTION_ARGS)
109 : {
110 0 : PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
111 : PG_GETARG_DATUM(0),
112 : ObjectIdGetDatum(OID_ARRAY_CONTAINED_OP),
113 : PG_GETARG_DATUM(2),
114 : PG_GETARG_DATUM(3),
115 : PG_GETARG_DATUM(4)));
116 : }
117 :
118 :
119 : /*
120 : * _int_matchsel -- restriction selectivity function for intarray @@ query_int
121 : */
122 : Datum
2819 heikki.linnakangas 123 CBC 36 : _int_matchsel(PG_FUNCTION_ARGS)
124 : {
125 36 : PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
126 :
127 36 : List *args = (List *) PG_GETARG_POINTER(2);
128 36 : int varRelid = PG_GETARG_INT32(3);
129 : VariableStatData vardata;
130 : Node *other;
131 : bool varonleft;
132 : Selectivity selec;
133 : QUERYTYPE *query;
134 36 : Datum *mcelems = NULL;
135 36 : float4 *mcefreqs = NULL;
136 36 : int nmcelems = 0;
137 36 : float4 minfreq = 0.0;
138 36 : float4 nullfrac = 0.0;
139 : AttStatsSlot sslot;
140 :
141 : /*
142 : * If expression is not "variable @@ something" or "something @@ variable"
143 : * then punt and return a default estimate.
144 : */
145 36 : if (!get_restriction_variable(root, args, varRelid,
146 : &vardata, &other, &varonleft))
2819 heikki.linnakangas 147 UBC 0 : PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
148 :
149 : /*
150 : * Variable should be int[]. We don't support cases where variable is
151 : * query_int.
152 : */
2819 heikki.linnakangas 153 CBC 36 : if (vardata.vartype != INT4ARRAYOID)
2819 heikki.linnakangas 154 UBC 0 : PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
155 :
156 : /*
157 : * Can't do anything useful if the something is not a constant, either.
158 : */
2819 heikki.linnakangas 159 CBC 36 : if (!IsA(other, Const))
160 : {
2819 heikki.linnakangas 161 UBC 0 : ReleaseVariableStats(vardata);
162 0 : PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
163 : }
164 :
165 : /*
166 : * The "@@" operator is strict, so we can cope with NULL right away.
167 : */
2819 heikki.linnakangas 168 CBC 36 : if (((Const *) other)->constisnull)
169 : {
2819 heikki.linnakangas 170 UBC 0 : ReleaseVariableStats(vardata);
171 0 : PG_RETURN_FLOAT8(0.0);
172 : }
173 :
174 : /* The caller made sure the const is a query, so get it now */
2819 heikki.linnakangas 175 CBC 36 : query = DatumGetQueryTypeP(((Const *) other)->constvalue);
176 :
177 : /* Empty query matches nothing */
178 36 : if (query->size == 0)
179 : {
2819 heikki.linnakangas 180 UBC 0 : ReleaseVariableStats(vardata);
181 0 : return (Selectivity) 0.0;
182 : }
183 :
184 : /*
185 : * Get the statistics for the intarray column.
186 : *
187 : * We're interested in the Most-Common-Elements list, and the NULL
188 : * fraction.
189 : */
2819 heikki.linnakangas 190 CBC 36 : if (HeapTupleIsValid(vardata.statsTuple))
191 : {
192 : Form_pg_statistic stats;
193 :
194 36 : stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
195 36 : nullfrac = stats->stanullfrac;
196 :
197 : /*
198 : * For an int4 array, the default array type analyze function will
199 : * collect a Most Common Elements list, which is an array of int4s.
200 : */
2157 tgl 201 36 : if (get_attstatsslot(&sslot, vardata.statsTuple,
202 : STATISTIC_KIND_MCELEM, InvalidOid,
203 : ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
204 : {
205 36 : Assert(sslot.valuetype == INT4OID);
206 :
207 : /*
208 : * There should be three more Numbers than Values, because the
209 : * last three (for intarray) cells are taken for minimal, maximal
210 : * and nulls frequency. Punt if not.
211 : */
212 36 : if (sslot.nnumbers == sslot.nvalues + 3)
213 : {
214 : /* Grab the lowest frequency. */
215 36 : minfreq = sslot.numbers[sslot.nnumbers - (sslot.nnumbers - sslot.nvalues)];
216 :
217 36 : mcelems = sslot.values;
218 36 : mcefreqs = sslot.numbers;
219 36 : nmcelems = sslot.nvalues;
220 : }
221 : }
222 : }
223 : else
2157 tgl 224 UBC 0 : memset(&sslot, 0, sizeof(sslot));
225 :
226 : /* Process the logical expression in the query, using the stats */
2819 heikki.linnakangas 227 CBC 36 : selec = int_query_opr_selec(GETQUERY(query) + query->size - 1,
228 : mcelems, mcefreqs, nmcelems, minfreq);
229 :
230 : /* MCE stats count only non-null rows, so adjust for null rows. */
231 36 : selec *= (1.0 - nullfrac);
232 :
2157 tgl 233 36 : free_attstatsslot(&sslot);
2819 heikki.linnakangas 234 36 : ReleaseVariableStats(vardata);
235 :
236 36 : CLAMP_PROBABILITY(selec);
237 :
238 36 : PG_RETURN_FLOAT8((float8) selec);
239 : }
240 :
241 : /*
242 : * Estimate selectivity of single intquery operator
243 : */
244 : static Selectivity
245 150 : int_query_opr_selec(ITEM *item, Datum *mcelems, float4 *mcefreqs,
246 : int nmcelems, float4 minfreq)
247 : {
248 : Selectivity selec;
249 :
250 : /* since this function recurses, it could be driven to stack overflow */
251 150 : check_stack_depth();
252 :
253 150 : if (item->type == VAL)
254 : {
255 : Datum *searchres;
256 :
257 84 : if (mcelems == NULL)
2819 heikki.linnakangas 258 UBC 0 : return (Selectivity) DEFAULT_EQ_SEL;
259 :
2819 heikki.linnakangas 260 CBC 84 : searchres = (Datum *) bsearch(&item->val, mcelems, nmcelems,
261 : sizeof(Datum), compare_val_int4);
262 84 : if (searchres)
263 : {
264 : /*
265 : * The element is in MCELEM. Return precise selectivity (or at
266 : * least as precise as ANALYZE could find out).
267 : */
268 84 : selec = mcefreqs[searchres - mcelems];
269 : }
270 : else
271 : {
272 : /*
273 : * The element is not in MCELEM. Punt, but assume that the
274 : * selectivity cannot be more than minfreq / 2.
275 : */
2819 heikki.linnakangas 276 UBC 0 : selec = Min(DEFAULT_EQ_SEL, minfreq / 2);
277 : }
278 : }
2819 heikki.linnakangas 279 CBC 66 : else if (item->type == OPR)
280 : {
281 : /* Current query node is an operator */
282 : Selectivity s1,
283 : s2;
284 :
285 66 : s1 = int_query_opr_selec(item - 1, mcelems, mcefreqs, nmcelems,
286 : minfreq);
287 66 : switch (item->val)
288 : {
289 18 : case (int32) '!':
290 18 : selec = 1.0 - s1;
291 18 : break;
292 :
293 30 : case (int32) '&':
294 30 : s2 = int_query_opr_selec(item + item->left, mcelems, mcefreqs,
295 : nmcelems, minfreq);
296 30 : selec = s1 * s2;
297 30 : break;
298 :
299 18 : case (int32) '|':
300 18 : s2 = int_query_opr_selec(item + item->left, mcelems, mcefreqs,
301 : nmcelems, minfreq);
302 18 : selec = s1 + s2 - s1 * s2;
303 18 : break;
304 :
2819 heikki.linnakangas 305 UBC 0 : default:
306 0 : elog(ERROR, "unrecognized operator: %d", item->val);
307 : selec = 0; /* keep compiler quiet */
308 : break;
309 : }
310 : }
311 : else
312 : {
313 0 : elog(ERROR, "unrecognized int query item type: %u", item->type);
314 : selec = 0; /* keep compiler quiet */
315 : }
316 :
317 : /* Clamp intermediate results to stay sane despite roundoff error */
2819 heikki.linnakangas 318 CBC 150 : CLAMP_PROBABILITY(selec);
319 :
320 150 : return selec;
321 : }
322 :
323 : /*
324 : * Comparison function for binary search in mcelem array.
325 : */
326 : static int
327 600 : compare_val_int4(const void *a, const void *b)
328 : {
329 600 : int32 key = *(int32 *) a;
330 600 : const Datum *t = (const Datum *) b;
331 :
332 600 : return key - DatumGetInt32(*t);
333 : }
|