LCOV - differential code coverage report
Current view: top level - contrib/intarray - _int_selfuncs.c (source / functions) Coverage Total Hit UBC CBC
Current: Differential Code Coverage HEAD vs 15 Lines: 78.0 % 91 71 20 71
Current Date: 2023-04-08 15:15:32 Functions: 81.2 % 16 13 3 13
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * _int_selfuncs.c
       4                 :  *    Functions for selectivity estimation of intarray operators
       5                 :  *
       6                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
       7                 :  * Portions Copyright (c) 1994, Regents of the University of California
       8                 :  *
       9                 :  *
      10                 :  * IDENTIFICATION
      11                 :  *    contrib/intarray/_int_selfuncs.c
      12                 :  *
      13                 :  *-------------------------------------------------------------------------
      14                 :  */
      15                 : #include "postgres.h"
      16                 : 
      17                 : #include "_int.h"
      18                 : #include "access/htup_details.h"
      19                 : #include "catalog/pg_operator.h"
      20                 : #include "catalog/pg_statistic.h"
      21                 : #include "catalog/pg_type.h"
      22                 : #include "miscadmin.h"
      23                 : #include "utils/builtins.h"
      24                 : #include "utils/lsyscache.h"
      25                 : #include "utils/selfuncs.h"
      26                 : #include "utils/syscache.h"
      27                 : 
      28 CBC           2 : PG_FUNCTION_INFO_V1(_int_overlap_sel);
      29               2 : PG_FUNCTION_INFO_V1(_int_contains_sel);
      30               2 : PG_FUNCTION_INFO_V1(_int_contained_sel);
      31               1 : PG_FUNCTION_INFO_V1(_int_overlap_joinsel);
      32               1 : PG_FUNCTION_INFO_V1(_int_contains_joinsel);
      33               1 : PG_FUNCTION_INFO_V1(_int_contained_joinsel);
      34               2 : PG_FUNCTION_INFO_V1(_int_matchsel);
      35                 : 
      36                 : 
      37                 : static Selectivity int_query_opr_selec(ITEM *item, Datum *mcelems, float4 *mcefreqs,
      38                 :                                        int nmcelems, float4 minfreq);
      39                 : static int  compare_val_int4(const void *a, const void *b);
      40                 : 
      41                 : /*
      42                 :  * Wrappers around the default array selectivity estimation functions.
      43                 :  *
      44                 :  * The default array selectivity operators for the @>, && and @< operators
      45                 :  * work fine for integer arrays. However, if we tried to just use arraycontsel
      46                 :  * and arraycontjoinsel directly as the cost estimator functions for our
      47                 :  * operators, they would not work as intended, because they look at the
      48                 :  * operator's OID. Our operators behave exactly like the built-in anyarray
      49                 :  * versions, but we must tell the cost estimator functions which built-in
      50                 :  * operators they correspond to. These wrappers just replace the operator
      51                 :  * OID with the corresponding built-in operator's OID, and call the built-in
      52                 :  * function.
      53                 :  */
      54                 : 
      55                 : Datum
      56               6 : _int_overlap_sel(PG_FUNCTION_ARGS)
      57                 : {
      58               6 :     PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
      59                 :                                         PG_GETARG_DATUM(0),
      60                 :                                         ObjectIdGetDatum(OID_ARRAY_OVERLAP_OP),
      61                 :                                         PG_GETARG_DATUM(2),
      62                 :                                         PG_GETARG_DATUM(3)));
      63                 : }
      64                 : 
      65                 : Datum
      66              24 : _int_contains_sel(PG_FUNCTION_ARGS)
      67                 : {
      68              24 :     PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
      69                 :                                         PG_GETARG_DATUM(0),
      70                 :                                         ObjectIdGetDatum(OID_ARRAY_CONTAINS_OP),
      71                 :                                         PG_GETARG_DATUM(2),
      72                 :                                         PG_GETARG_DATUM(3)));
      73                 : }
      74                 : 
      75                 : Datum
      76               6 : _int_contained_sel(PG_FUNCTION_ARGS)
      77                 : {
      78               6 :     PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
      79                 :                                         PG_GETARG_DATUM(0),
      80                 :                                         ObjectIdGetDatum(OID_ARRAY_CONTAINED_OP),
      81                 :                                         PG_GETARG_DATUM(2),
      82                 :                                         PG_GETARG_DATUM(3)));
      83                 : }
      84                 : 
      85                 : Datum
      86 UBC           0 : _int_overlap_joinsel(PG_FUNCTION_ARGS)
      87                 : {
      88               0 :     PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
      89                 :                                         PG_GETARG_DATUM(0),
      90                 :                                         ObjectIdGetDatum(OID_ARRAY_OVERLAP_OP),
      91                 :                                         PG_GETARG_DATUM(2),
      92                 :                                         PG_GETARG_DATUM(3),
      93                 :                                         PG_GETARG_DATUM(4)));
      94                 : }
      95                 : 
      96                 : Datum
      97               0 : _int_contains_joinsel(PG_FUNCTION_ARGS)
      98                 : {
      99               0 :     PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
     100                 :                                         PG_GETARG_DATUM(0),
     101                 :                                         ObjectIdGetDatum(OID_ARRAY_CONTAINS_OP),
     102                 :                                         PG_GETARG_DATUM(2),
     103                 :                                         PG_GETARG_DATUM(3),
     104                 :                                         PG_GETARG_DATUM(4)));
     105                 : }
     106                 : 
     107                 : Datum
     108               0 : _int_contained_joinsel(PG_FUNCTION_ARGS)
     109                 : {
     110               0 :     PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
     111                 :                                         PG_GETARG_DATUM(0),
     112                 :                                         ObjectIdGetDatum(OID_ARRAY_CONTAINED_OP),
     113                 :                                         PG_GETARG_DATUM(2),
     114                 :                                         PG_GETARG_DATUM(3),
     115                 :                                         PG_GETARG_DATUM(4)));
     116                 : }
     117                 : 
     118                 : 
     119                 : /*
     120                 :  * _int_matchsel -- restriction selectivity function for intarray @@ query_int
     121                 :  */
     122                 : Datum
     123 CBC          36 : _int_matchsel(PG_FUNCTION_ARGS)
     124                 : {
     125              36 :     PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
     126                 : 
     127              36 :     List       *args = (List *) PG_GETARG_POINTER(2);
     128              36 :     int         varRelid = PG_GETARG_INT32(3);
     129                 :     VariableStatData vardata;
     130                 :     Node       *other;
     131                 :     bool        varonleft;
     132                 :     Selectivity selec;
     133                 :     QUERYTYPE  *query;
     134              36 :     Datum      *mcelems = NULL;
     135              36 :     float4     *mcefreqs = NULL;
     136              36 :     int         nmcelems = 0;
     137              36 :     float4      minfreq = 0.0;
     138              36 :     float4      nullfrac = 0.0;
     139                 :     AttStatsSlot sslot;
     140                 : 
     141                 :     /*
     142                 :      * If expression is not "variable @@ something" or "something @@ variable"
     143                 :      * then punt and return a default estimate.
     144                 :      */
     145              36 :     if (!get_restriction_variable(root, args, varRelid,
     146                 :                                   &vardata, &other, &varonleft))
     147 UBC           0 :         PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
     148                 : 
     149                 :     /*
     150                 :      * Variable should be int[]. We don't support cases where variable is
     151                 :      * query_int.
     152                 :      */
     153 CBC          36 :     if (vardata.vartype != INT4ARRAYOID)
     154 UBC           0 :         PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
     155                 : 
     156                 :     /*
     157                 :      * Can't do anything useful if the something is not a constant, either.
     158                 :      */
     159 CBC          36 :     if (!IsA(other, Const))
     160                 :     {
     161 UBC           0 :         ReleaseVariableStats(vardata);
     162               0 :         PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
     163                 :     }
     164                 : 
     165                 :     /*
     166                 :      * The "@@" operator is strict, so we can cope with NULL right away.
     167                 :      */
     168 CBC          36 :     if (((Const *) other)->constisnull)
     169                 :     {
     170 UBC           0 :         ReleaseVariableStats(vardata);
     171               0 :         PG_RETURN_FLOAT8(0.0);
     172                 :     }
     173                 : 
     174                 :     /* The caller made sure the const is a query, so get it now */
     175 CBC          36 :     query = DatumGetQueryTypeP(((Const *) other)->constvalue);
     176                 : 
     177                 :     /* Empty query matches nothing */
     178              36 :     if (query->size == 0)
     179                 :     {
     180 UBC           0 :         ReleaseVariableStats(vardata);
     181               0 :         return (Selectivity) 0.0;
     182                 :     }
     183                 : 
     184                 :     /*
     185                 :      * Get the statistics for the intarray column.
     186                 :      *
     187                 :      * We're interested in the Most-Common-Elements list, and the NULL
     188                 :      * fraction.
     189                 :      */
     190 CBC          36 :     if (HeapTupleIsValid(vardata.statsTuple))
     191                 :     {
     192                 :         Form_pg_statistic stats;
     193                 : 
     194              36 :         stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
     195              36 :         nullfrac = stats->stanullfrac;
     196                 : 
     197                 :         /*
     198                 :          * For an int4 array, the default array type analyze function will
     199                 :          * collect a Most Common Elements list, which is an array of int4s.
     200                 :          */
     201              36 :         if (get_attstatsslot(&sslot, vardata.statsTuple,
     202                 :                              STATISTIC_KIND_MCELEM, InvalidOid,
     203                 :                              ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
     204                 :         {
     205              36 :             Assert(sslot.valuetype == INT4OID);
     206                 : 
     207                 :             /*
     208                 :              * There should be three more Numbers than Values, because the
     209                 :              * last three (for intarray) cells are taken for minimal, maximal
     210                 :              * and nulls frequency. Punt if not.
     211                 :              */
     212              36 :             if (sslot.nnumbers == sslot.nvalues + 3)
     213                 :             {
     214                 :                 /* Grab the lowest frequency. */
     215              36 :                 minfreq = sslot.numbers[sslot.nnumbers - (sslot.nnumbers - sslot.nvalues)];
     216                 : 
     217              36 :                 mcelems = sslot.values;
     218              36 :                 mcefreqs = sslot.numbers;
     219              36 :                 nmcelems = sslot.nvalues;
     220                 :             }
     221                 :         }
     222                 :     }
     223                 :     else
     224 UBC           0 :         memset(&sslot, 0, sizeof(sslot));
     225                 : 
     226                 :     /* Process the logical expression in the query, using the stats */
     227 CBC          36 :     selec = int_query_opr_selec(GETQUERY(query) + query->size - 1,
     228                 :                                 mcelems, mcefreqs, nmcelems, minfreq);
     229                 : 
     230                 :     /* MCE stats count only non-null rows, so adjust for null rows. */
     231              36 :     selec *= (1.0 - nullfrac);
     232                 : 
     233              36 :     free_attstatsslot(&sslot);
     234              36 :     ReleaseVariableStats(vardata);
     235                 : 
     236              36 :     CLAMP_PROBABILITY(selec);
     237                 : 
     238              36 :     PG_RETURN_FLOAT8((float8) selec);
     239                 : }
     240                 : 
     241                 : /*
     242                 :  * Estimate selectivity of single intquery operator
     243                 :  */
     244                 : static Selectivity
     245             150 : int_query_opr_selec(ITEM *item, Datum *mcelems, float4 *mcefreqs,
     246                 :                     int nmcelems, float4 minfreq)
     247                 : {
     248                 :     Selectivity selec;
     249                 : 
     250                 :     /* since this function recurses, it could be driven to stack overflow */
     251             150 :     check_stack_depth();
     252                 : 
     253             150 :     if (item->type == VAL)
     254                 :     {
     255                 :         Datum      *searchres;
     256                 : 
     257              84 :         if (mcelems == NULL)
     258 UBC           0 :             return (Selectivity) DEFAULT_EQ_SEL;
     259                 : 
     260 CBC          84 :         searchres = (Datum *) bsearch(&item->val, mcelems, nmcelems,
     261                 :                                       sizeof(Datum), compare_val_int4);
     262              84 :         if (searchres)
     263                 :         {
     264                 :             /*
     265                 :              * The element is in MCELEM.  Return precise selectivity (or at
     266                 :              * least as precise as ANALYZE could find out).
     267                 :              */
     268              84 :             selec = mcefreqs[searchres - mcelems];
     269                 :         }
     270                 :         else
     271                 :         {
     272                 :             /*
     273                 :              * The element is not in MCELEM.  Punt, but assume that the
     274                 :              * selectivity cannot be more than minfreq / 2.
     275                 :              */
     276 UBC           0 :             selec = Min(DEFAULT_EQ_SEL, minfreq / 2);
     277                 :         }
     278                 :     }
     279 CBC          66 :     else if (item->type == OPR)
     280                 :     {
     281                 :         /* Current query node is an operator */
     282                 :         Selectivity s1,
     283                 :                     s2;
     284                 : 
     285              66 :         s1 = int_query_opr_selec(item - 1, mcelems, mcefreqs, nmcelems,
     286                 :                                  minfreq);
     287              66 :         switch (item->val)
     288                 :         {
     289              18 :             case (int32) '!':
     290              18 :                 selec = 1.0 - s1;
     291              18 :                 break;
     292                 : 
     293              30 :             case (int32) '&':
     294              30 :                 s2 = int_query_opr_selec(item + item->left, mcelems, mcefreqs,
     295                 :                                          nmcelems, minfreq);
     296              30 :                 selec = s1 * s2;
     297              30 :                 break;
     298                 : 
     299              18 :             case (int32) '|':
     300              18 :                 s2 = int_query_opr_selec(item + item->left, mcelems, mcefreqs,
     301                 :                                          nmcelems, minfreq);
     302              18 :                 selec = s1 + s2 - s1 * s2;
     303              18 :                 break;
     304                 : 
     305 UBC           0 :             default:
     306               0 :                 elog(ERROR, "unrecognized operator: %d", item->val);
     307                 :                 selec = 0;      /* keep compiler quiet */
     308                 :                 break;
     309                 :         }
     310                 :     }
     311                 :     else
     312                 :     {
     313               0 :         elog(ERROR, "unrecognized int query item type: %u", item->type);
     314                 :         selec = 0;              /* keep compiler quiet */
     315                 :     }
     316                 : 
     317                 :     /* Clamp intermediate results to stay sane despite roundoff error */
     318 CBC         150 :     CLAMP_PROBABILITY(selec);
     319                 : 
     320             150 :     return selec;
     321                 : }
     322                 : 
     323                 : /*
     324                 :  * Comparison function for binary search in mcelem array.
     325                 :  */
     326                 : static int
     327             600 : compare_val_int4(const void *a, const void *b)
     328                 : {
     329             600 :     int32       key = *(int32 *) a;
     330             600 :     const Datum *t = (const Datum *) b;
     331                 : 
     332             600 :     return key - DatumGetInt32(*t);
     333                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a