LCOV - differential code coverage report
Current view: top level - src/backend/utils/adt - multirangetypes_selfuncs.c (source / functions) Coverage Total Hit LBC UIC UBC GBC GIC GNC CBC EUB ECB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 82.9 % 368 305 14 28 21 3 181 1 120 39 170 1
Current Date: 2023-04-08 15:15:32 Functions: 100.0 % 13 13 11 1 1 11
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * multirangetypes_selfuncs.c
       4                 :  *    Functions for selectivity estimation of multirange operators
       5                 :  *
       6                 :  * Estimates are based on histograms of lower and upper bounds, and the
       7                 :  * fraction of empty multiranges.
       8                 :  *
       9                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
      10                 :  * Portions Copyright (c) 1994, Regents of the University of California
      11                 :  *
      12                 :  *
      13                 :  * IDENTIFICATION
      14                 :  *    src/backend/utils/adt/multirangetypes_selfuncs.c
      15                 :  *
      16                 :  *-------------------------------------------------------------------------
      17                 :  */
      18                 : #include "postgres.h"
      19                 : 
      20                 : #include <math.h>
      21                 : 
      22                 : #include "access/htup_details.h"
      23                 : #include "catalog/pg_operator.h"
      24                 : #include "catalog/pg_statistic.h"
      25                 : #include "catalog/pg_type.h"
      26                 : #include "utils/float.h"
      27                 : #include "utils/fmgrprotos.h"
      28                 : #include "utils/lsyscache.h"
      29                 : #include "utils/rangetypes.h"
      30                 : #include "utils/multirangetypes.h"
      31                 : #include "utils/selfuncs.h"
      32                 : #include "utils/typcache.h"
      33                 : 
      34                 : static double calc_multirangesel(TypeCacheEntry *typcache,
      35                 :                                  VariableStatData *vardata,
      36                 :                                  const MultirangeType *constval, Oid operator);
      37                 : static double default_multirange_selectivity(Oid operator);
      38                 : static double default_multirange_selectivity(Oid operator);
      39                 : static double calc_hist_selectivity(TypeCacheEntry *typcache,
      40                 :                                     VariableStatData *vardata,
      41                 :                                     const MultirangeType *constval,
      42                 :                                     Oid operator);
      43                 : static double calc_hist_selectivity_scalar(TypeCacheEntry *typcache,
      44                 :                                            const RangeBound *constbound,
      45                 :                                            const RangeBound *hist,
      46                 :                                            int hist_nvalues, bool equal);
      47                 : static int  rbound_bsearch(TypeCacheEntry *typcache, const RangeBound *value,
      48                 :                            const RangeBound *hist, int hist_length, bool equal);
      49                 : static float8 get_position(TypeCacheEntry *typcache, const RangeBound *value,
      50                 :                            const RangeBound *hist1, const RangeBound *hist2);
      51                 : static float8 get_len_position(double value, double hist1, double hist2);
      52                 : static float8 get_distance(TypeCacheEntry *typcache, const RangeBound *bound1,
      53                 :                            const RangeBound *bound2);
      54                 : static int  length_hist_bsearch(Datum *length_hist_values,
      55                 :                                 int length_hist_nvalues, double value,
      56                 :                                 bool equal);
      57                 : static double calc_length_hist_frac(Datum *length_hist_values,
      58                 :                                     int length_hist_nvalues, double length1,
      59                 :                                     double length2, bool equal);
      60                 : static double calc_hist_selectivity_contained(TypeCacheEntry *typcache,
      61                 :                                               const RangeBound *lower,
      62                 :                                               RangeBound *upper,
      63                 :                                               const RangeBound *hist_lower,
      64                 :                                               int hist_nvalues,
      65                 :                                               Datum *length_hist_values,
      66                 :                                               int length_hist_nvalues);
      67                 : static double calc_hist_selectivity_contains(TypeCacheEntry *typcache,
      68                 :                                              const RangeBound *lower,
      69                 :                                              const RangeBound *upper,
      70                 :                                              const RangeBound *hist_lower,
      71                 :                                              int hist_nvalues,
      72                 :                                              Datum *length_hist_values,
      73                 :                                              int length_hist_nvalues);
      74                 : 
      75                 : /*
      76                 :  * Returns a default selectivity estimate for given operator, when we don't
      77                 :  * have statistics or cannot use them for some reason.
      78                 :  */
      79                 : static double
      80 CBC          96 : default_multirange_selectivity(Oid operator)
      81                 : {
      82              96 :     switch (operator)
      83                 :     {
      84               9 :         case OID_MULTIRANGE_OVERLAPS_MULTIRANGE_OP:
      85                 :         case OID_MULTIRANGE_OVERLAPS_RANGE_OP:
      86                 :         case OID_RANGE_OVERLAPS_MULTIRANGE_OP:
      87               9 :             return 0.01;
      88                 : 
      89              27 :         case OID_RANGE_CONTAINS_MULTIRANGE_OP:
      90                 :         case OID_RANGE_MULTIRANGE_CONTAINED_OP:
      91                 :         case OID_MULTIRANGE_CONTAINS_RANGE_OP:
      92                 :         case OID_MULTIRANGE_CONTAINS_MULTIRANGE_OP:
      93                 :         case OID_MULTIRANGE_RANGE_CONTAINED_OP:
      94                 :         case OID_MULTIRANGE_MULTIRANGE_CONTAINED_OP:
      95              27 :             return 0.005;
      96                 : 
      97 UBC           0 :         case OID_MULTIRANGE_CONTAINS_ELEM_OP:
      98                 :         case OID_MULTIRANGE_ELEM_CONTAINED_OP:
      99                 : 
     100                 :             /*
     101                 :              * "multirange @> elem" is more or less identical to a scalar
     102                 :              * inequality "A >= b AND A <= c".
     103                 :              */
     104               0 :             return DEFAULT_MULTIRANGE_INEQ_SEL;
     105                 : 
     106 CBC          60 :         case OID_MULTIRANGE_LESS_OP:
     107                 :         case OID_MULTIRANGE_LESS_EQUAL_OP:
     108                 :         case OID_MULTIRANGE_GREATER_OP:
     109                 :         case OID_MULTIRANGE_GREATER_EQUAL_OP:
     110                 :         case OID_MULTIRANGE_LEFT_RANGE_OP:
     111                 :         case OID_MULTIRANGE_LEFT_MULTIRANGE_OP:
     112                 :         case OID_RANGE_LEFT_MULTIRANGE_OP:
     113                 :         case OID_MULTIRANGE_RIGHT_RANGE_OP:
     114                 :         case OID_MULTIRANGE_RIGHT_MULTIRANGE_OP:
     115                 :         case OID_RANGE_RIGHT_MULTIRANGE_OP:
     116                 :         case OID_MULTIRANGE_OVERLAPS_LEFT_RANGE_OP:
     117                 :         case OID_RANGE_OVERLAPS_LEFT_MULTIRANGE_OP:
     118                 :         case OID_MULTIRANGE_OVERLAPS_LEFT_MULTIRANGE_OP:
     119                 :         case OID_MULTIRANGE_OVERLAPS_RIGHT_RANGE_OP:
     120                 :         case OID_RANGE_OVERLAPS_RIGHT_MULTIRANGE_OP:
     121                 :         case OID_MULTIRANGE_OVERLAPS_RIGHT_MULTIRANGE_OP:
     122                 :             /* these are similar to regular scalar inequalities */
     123              60 :             return DEFAULT_INEQ_SEL;
     124                 : 
     125 UBC           0 :         default:
     126                 : 
     127                 :             /*
     128                 :              * all multirange operators should be handled above, but just in
     129                 :              * case
     130                 :              */
     131               0 :             return 0.01;
     132                 :     }
     133                 : }
     134                 : 
     135                 : /*
     136                 :  * multirangesel -- restriction selectivity for multirange operators
     137                 :  */
     138                 : Datum
     139 CBC         333 : multirangesel(PG_FUNCTION_ARGS)
     140                 : {
     141             333 :     PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
     142             333 :     Oid         operator = PG_GETARG_OID(1);
     143             333 :     List       *args = (List *) PG_GETARG_POINTER(2);
     144             333 :     int         varRelid = PG_GETARG_INT32(3);
     145                 :     VariableStatData vardata;
     146                 :     Node       *other;
     147                 :     bool        varonleft;
     148                 :     Selectivity selec;
     149             333 :     TypeCacheEntry *typcache = NULL;
     150             333 :     MultirangeType *constmultirange = NULL;
     151             333 :     RangeType  *constrange = NULL;
     152                 : 
     153                 :     /*
     154                 :      * If expression is not (variable op something) or (something op
     155                 :      * variable), then punt and return a default estimate.
     156                 :      */
     157             333 :     if (!get_restriction_variable(root, args, varRelid,
     158                 :                                   &vardata, &other, &varonleft))
     159 UBC           0 :         PG_RETURN_FLOAT8(default_multirange_selectivity(operator));
     160                 : 
     161                 :     /*
     162                 :      * Can't do anything useful if the something is not a constant, either.
     163                 :      */
     164 CBC         333 :     if (!IsA(other, Const))
     165                 :     {
     166 UBC           0 :         ReleaseVariableStats(vardata);
     167               0 :         PG_RETURN_FLOAT8(default_multirange_selectivity(operator));
     168                 :     }
     169                 : 
     170                 :     /*
     171                 :      * All the multirange operators are strict, so we can cope with a NULL
     172                 :      * constant right away.
     173                 :      */
     174 CBC         333 :     if (((Const *) other)->constisnull)
     175                 :     {
     176 UBC           0 :         ReleaseVariableStats(vardata);
     177               0 :         PG_RETURN_FLOAT8(0.0);
     178                 :     }
     179                 : 
     180                 :     /*
     181                 :      * If var is on the right, commute the operator, so that we can assume the
     182                 :      * var is on the left in what follows.
     183                 :      */
     184 CBC         333 :     if (!varonleft)
     185                 :     {
     186                 :         /* we have other Op var, commute to make var Op other */
     187              12 :         operator = get_commutator(operator);
     188              12 :         if (!operator)
     189                 :         {
     190                 :             /* Use default selectivity (should we raise an error instead?) */
     191 UBC           0 :             ReleaseVariableStats(vardata);
     192               0 :             PG_RETURN_FLOAT8(default_multirange_selectivity(operator));
     193                 :         }
     194                 :     }
     195                 : 
     196                 :     /*
     197                 :      * OK, there's a Var and a Const we're dealing with here.  We need the
     198                 :      * Const to be of same multirange type as the column, else we can't do
     199                 :      * anything useful. (Such cases will likely fail at runtime, but here we'd
     200                 :      * rather just return a default estimate.)
     201                 :      *
     202                 :      * If the operator is "multirange @> element", the constant should be of
     203                 :      * the element type of the multirange column. Convert it to a multirange
     204                 :      * that includes only that single point, so that we don't need special
     205                 :      * handling for that in what follows.
     206                 :      */
     207 CBC         333 :     if (operator == OID_MULTIRANGE_CONTAINS_ELEM_OP)
     208                 :     {
     209              15 :         typcache = multirange_get_typcache(fcinfo, vardata.vartype);
     210                 : 
     211              15 :         if (((Const *) other)->consttype == typcache->rngtype->rngelemtype->type_id)
     212                 :         {
     213                 :             RangeBound  lower,
     214                 :                         upper;
     215                 : 
     216              15 :             lower.inclusive = true;
     217              15 :             lower.val = ((Const *) other)->constvalue;
     218              15 :             lower.infinite = false;
     219              15 :             lower.lower = true;
     220              15 :             upper.inclusive = true;
     221              15 :             upper.val = ((Const *) other)->constvalue;
     222              15 :             upper.infinite = false;
     223              15 :             upper.lower = false;
     224 GNC          15 :             constrange = range_serialize(typcache->rngtype, &lower, &upper,
     225                 :                                          false, NULL);
     226 GIC          15 :             constmultirange = make_multirange(typcache->type_id, typcache->rngtype,
     227 ECB             :                                               1, &constrange);
     228                 :         }
     229                 :     }
     230 GIC         318 :     else if (operator == OID_RANGE_MULTIRANGE_CONTAINED_OP ||
     231 CBC         285 :              operator == OID_MULTIRANGE_CONTAINS_RANGE_OP ||
     232             267 :              operator == OID_MULTIRANGE_OVERLAPS_RANGE_OP ||
     233             255 :              operator == OID_MULTIRANGE_OVERLAPS_LEFT_RANGE_OP ||
     234             243 :              operator == OID_MULTIRANGE_OVERLAPS_RIGHT_RANGE_OP ||
     235             231 :              operator == OID_MULTIRANGE_LEFT_RANGE_OP ||
     236 ECB             :              operator == OID_MULTIRANGE_RIGHT_RANGE_OP)
     237                 :     {
     238                 :         /*
     239                 :          * Promote a range in "multirange OP range" just like we do an element
     240                 :          * in "multirange OP element".
     241                 :          */
     242 GIC          99 :         typcache = multirange_get_typcache(fcinfo, vardata.vartype);
     243 CBC          99 :         if (((Const *) other)->consttype == typcache->rngtype->type_id)
     244 ECB             :         {
     245 GIC          99 :             constrange = DatumGetRangeTypeP(((Const *) other)->constvalue);
     246 CBC          99 :             constmultirange = make_multirange(typcache->type_id, typcache->rngtype,
     247 ECB             :                                               1, &constrange);
     248                 :         }
     249                 :     }
     250 GIC         219 :     else if (operator == OID_RANGE_OVERLAPS_MULTIRANGE_OP ||
     251 CBC         201 :              operator == OID_RANGE_OVERLAPS_LEFT_MULTIRANGE_OP ||
     252             192 :              operator == OID_RANGE_OVERLAPS_RIGHT_MULTIRANGE_OP ||
     253             183 :              operator == OID_RANGE_LEFT_MULTIRANGE_OP ||
     254             174 :              operator == OID_RANGE_RIGHT_MULTIRANGE_OP ||
     255             156 :              operator == OID_RANGE_CONTAINS_MULTIRANGE_OP ||
     256             156 :              operator == OID_MULTIRANGE_ELEM_CONTAINED_OP ||
     257 ECB             :              operator == OID_MULTIRANGE_RANGE_CONTAINED_OP)
     258                 :     {
     259                 :         /*
     260                 :          * Here, the Var is the elem/range, not the multirange.  For now we
     261                 :          * just punt and return the default estimate.  In future we could
     262                 :          * disassemble the multirange constant to do something more
     263                 :          * intelligent.
     264                 :          */
     265                 :     }
     266 GIC         147 :     else if (((Const *) other)->consttype == vardata.vartype)
     267 ECB             :     {
     268                 :         /* Both sides are the same multirange type */
     269 GIC         147 :         typcache = multirange_get_typcache(fcinfo, vardata.vartype);
     270 ECB             : 
     271 GIC         147 :         constmultirange = DatumGetMultirangeTypeP(((Const *) other)->constvalue);
     272 ECB             :     }
     273                 : 
     274                 :     /*
     275                 :      * If we got a valid constant on one side of the operator, proceed to
     276                 :      * estimate using statistics. Otherwise punt and return a default constant
     277                 :      * estimate.  Note that calc_multirangesel need not handle
     278                 :      * OID_MULTIRANGE_*_CONTAINED_OP.
     279                 :      */
     280 GIC         333 :     if (constmultirange)
     281 CBC         261 :         selec = calc_multirangesel(typcache, &vardata, constmultirange, operator);
     282 ECB             :     else
     283 GIC          72 :         selec = default_multirange_selectivity(operator);
     284 ECB             : 
     285 GIC         333 :     ReleaseVariableStats(vardata);
     286 ECB             : 
     287 GIC         333 :     CLAMP_PROBABILITY(selec);
     288 ECB             : 
     289 GIC         333 :     PG_RETURN_FLOAT8((float8) selec);
     290 ECB             : }
     291                 : 
     292                 : static double
     293 GIC         261 : calc_multirangesel(TypeCacheEntry *typcache, VariableStatData *vardata,
     294 ECB             :                    const MultirangeType *constval, Oid operator)
     295                 : {
     296                 :     double      hist_selec;
     297                 :     double      selec;
     298                 :     float4      empty_frac,
     299                 :                 null_frac;
     300                 : 
     301                 :     /*
     302                 :      * First look up the fraction of NULLs and empty multiranges from
     303                 :      * pg_statistic.
     304                 :      */
     305 GIC         261 :     if (HeapTupleIsValid(vardata->statsTuple))
     306 ECB             :     {
     307                 :         Form_pg_statistic stats;
     308                 :         AttStatsSlot sslot;
     309                 : 
     310 GIC         225 :         stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
     311 CBC         225 :         null_frac = stats->stanullfrac;
     312 ECB             : 
     313                 :         /* Try to get fraction of empty multiranges */
     314 GIC         225 :         if (get_attstatsslot(&sslot, vardata->statsTuple,
     315 ECB             :                              STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM,
     316                 :                              InvalidOid,
     317                 :                              ATTSTATSSLOT_NUMBERS))
     318                 :         {
     319 GIC         225 :             if (sslot.nnumbers != 1)
     320 LBC           0 :                 elog(ERROR, "invalid empty fraction statistic");  /* shouldn't happen */
     321 GBC         225 :             empty_frac = sslot.numbers[0];
     322 CBC         225 :             free_attstatsslot(&sslot);
     323 ECB             :         }
     324                 :         else
     325                 :         {
     326                 :             /* No empty fraction statistic. Assume no empty ranges. */
     327 UIC           0 :             empty_frac = 0.0;
     328 EUB             :         }
     329                 :     }
     330                 :     else
     331                 :     {
     332                 :         /*
     333                 :          * No stats are available. Follow through the calculations below
     334                 :          * anyway, assuming no NULLs and no empty multiranges. This still
     335                 :          * allows us to give a better-than-nothing estimate based on whether
     336                 :          * the constant is an empty multirange or not.
     337                 :          */
     338 GIC          36 :         null_frac = 0.0;
     339 CBC          36 :         empty_frac = 0.0;
     340 ECB             :     }
     341                 : 
     342 GIC         261 :     if (MultirangeIsEmpty(constval))
     343 ECB             :     {
     344                 :         /*
     345                 :          * An empty multirange matches all multiranges, all empty multiranges,
     346                 :          * or nothing, depending on the operator
     347                 :          */
     348 GIC         111 :         switch (operator)
     349 ECB             :         {
     350                 :                 /* these return false if either argument is empty */
     351 GIC          63 :             case OID_MULTIRANGE_OVERLAPS_RANGE_OP:
     352 ECB             :             case OID_MULTIRANGE_OVERLAPS_MULTIRANGE_OP:
     353                 :             case OID_MULTIRANGE_OVERLAPS_LEFT_RANGE_OP:
     354                 :             case OID_MULTIRANGE_OVERLAPS_LEFT_MULTIRANGE_OP:
     355                 :             case OID_MULTIRANGE_OVERLAPS_RIGHT_RANGE_OP:
     356                 :             case OID_MULTIRANGE_OVERLAPS_RIGHT_MULTIRANGE_OP:
     357                 :             case OID_MULTIRANGE_LEFT_RANGE_OP:
     358                 :             case OID_MULTIRANGE_LEFT_MULTIRANGE_OP:
     359                 :             case OID_MULTIRANGE_RIGHT_RANGE_OP:
     360                 :             case OID_MULTIRANGE_RIGHT_MULTIRANGE_OP:
     361                 :                 /* nothing is less than an empty multirange */
     362                 :             case OID_MULTIRANGE_LESS_OP:
     363 GIC          63 :                 selec = 0.0;
     364 CBC          63 :                 break;
     365 ECB             : 
     366                 :                 /*
     367                 :                  * only empty multiranges can be contained by an empty
     368                 :                  * multirange
     369                 :                  */
     370 GIC          15 :             case OID_RANGE_MULTIRANGE_CONTAINED_OP:
     371 ECB             :             case OID_MULTIRANGE_MULTIRANGE_CONTAINED_OP:
     372                 :                 /* only empty ranges are <= an empty multirange */
     373                 :             case OID_MULTIRANGE_LESS_EQUAL_OP:
     374 GIC          15 :                 selec = empty_frac;
     375 CBC          15 :                 break;
     376 ECB             : 
     377                 :                 /* everything contains an empty multirange */
     378 GIC          30 :             case OID_MULTIRANGE_CONTAINS_RANGE_OP:
     379 ECB             :             case OID_MULTIRANGE_CONTAINS_MULTIRANGE_OP:
     380                 :                 /* everything is >= an empty multirange */
     381                 :             case OID_MULTIRANGE_GREATER_EQUAL_OP:
     382 GIC          30 :                 selec = 1.0;
     383 CBC          30 :                 break;
     384 ECB             : 
     385                 :                 /* all non-empty multiranges are > an empty multirange */
     386 GIC           3 :             case OID_MULTIRANGE_GREATER_OP:
     387 CBC           3 :                 selec = 1.0 - empty_frac;
     388               3 :                 break;
     389 ECB             : 
     390                 :                 /* an element cannot be empty */
     391 UIC           0 :             case OID_MULTIRANGE_CONTAINS_ELEM_OP:
     392 EUB             : 
     393                 :                 /* filtered out by multirangesel() */
     394                 :             case OID_RANGE_OVERLAPS_MULTIRANGE_OP:
     395                 :             case OID_RANGE_OVERLAPS_LEFT_MULTIRANGE_OP:
     396                 :             case OID_RANGE_OVERLAPS_RIGHT_MULTIRANGE_OP:
     397                 :             case OID_RANGE_LEFT_MULTIRANGE_OP:
     398                 :             case OID_RANGE_RIGHT_MULTIRANGE_OP:
     399                 :             case OID_RANGE_CONTAINS_MULTIRANGE_OP:
     400                 :             case OID_MULTIRANGE_ELEM_CONTAINED_OP:
     401                 :             case OID_MULTIRANGE_RANGE_CONTAINED_OP:
     402                 : 
     403                 :             default:
     404 UIC           0 :                 elog(ERROR, "unexpected operator %u", operator);
     405 EUB             :                 selec = 0.0;    /* keep compiler quiet */
     406                 :                 break;
     407                 :         }
     408                 :     }
     409                 :     else
     410                 :     {
     411                 :         /*
     412                 :          * Calculate selectivity using bound histograms. If that fails for
     413                 :          * some reason, e.g no histogram in pg_statistic, use the default
     414                 :          * constant estimate for the fraction of non-empty values. This is
     415                 :          * still somewhat better than just returning the default estimate,
     416                 :          * because this still takes into account the fraction of empty and
     417                 :          * NULL tuples, if we had statistics for them.
     418                 :          */
     419 GIC         150 :         hist_selec = calc_hist_selectivity(typcache, vardata, constval,
     420 ECB             :                                            operator);
     421 GIC         150 :         if (hist_selec < 0.0)
     422 CBC          24 :             hist_selec = default_multirange_selectivity(operator);
     423 ECB             : 
     424                 :         /*
     425                 :          * Now merge the results for the empty multiranges and histogram
     426                 :          * calculations, realizing that the histogram covers only the
     427                 :          * non-null, non-empty values.
     428                 :          */
     429 GIC         150 :         if (operator == OID_RANGE_MULTIRANGE_CONTAINED_OP ||
     430 ECB             :             operator == OID_MULTIRANGE_MULTIRANGE_CONTAINED_OP)
     431                 :         {
     432                 :             /* empty is contained by anything non-empty */
     433 GIC          12 :             selec = (1.0 - empty_frac) * hist_selec + empty_frac;
     434 ECB             :         }
     435                 :         else
     436                 :         {
     437                 :             /* with any other operator, empty Op non-empty matches nothing */
     438 GIC         138 :             selec = (1.0 - empty_frac) * hist_selec;
     439 ECB             :         }
     440                 :     }
     441                 : 
     442                 :     /* all multirange operators are strict */
     443 GIC         261 :     selec *= (1.0 - null_frac);
     444 ECB             : 
     445                 :     /* result should be in range, but make sure... */
     446 GIC         261 :     CLAMP_PROBABILITY(selec);
     447 ECB             : 
     448 GIC         261 :     return selec;
     449 ECB             : }
     450                 : 
     451                 : /*
     452                 :  * Calculate multirange operator selectivity using histograms of multirange bounds.
     453                 :  *
     454                 :  * This estimate is for the portion of values that are not empty and not
     455                 :  * NULL.
     456                 :  */
     457                 : static double
     458 GIC         150 : calc_hist_selectivity(TypeCacheEntry *typcache, VariableStatData *vardata,
     459 ECB             :                       const MultirangeType *constval, Oid operator)
     460                 : {
     461 GIC         150 :     TypeCacheEntry *rng_typcache = typcache->rngtype;
     462 ECB             :     AttStatsSlot hslot;
     463                 :     AttStatsSlot lslot;
     464                 :     int         nhist;
     465                 :     RangeBound *hist_lower;
     466                 :     RangeBound *hist_upper;
     467                 :     int         i;
     468                 :     RangeBound  const_lower;
     469                 :     RangeBound  const_upper;
     470                 :     RangeBound  tmp;
     471                 :     double      hist_selec;
     472                 : 
     473                 :     /* Can't use the histogram with insecure multirange support functions */
     474 GIC         150 :     if (!statistic_proc_security_check(vardata,
     475 ECB             :                                        rng_typcache->rng_cmp_proc_finfo.fn_oid))
     476 UIC           0 :         return -1;
     477 GBC         150 :     if (OidIsValid(rng_typcache->rng_subdiff_finfo.fn_oid) &&
     478 CBC         150 :         !statistic_proc_security_check(vardata,
     479 ECB             :                                        rng_typcache->rng_subdiff_finfo.fn_oid))
     480 UIC           0 :         return -1;
     481 EUB             : 
     482                 :     /* Try to get histogram of ranges */
     483 GIC         276 :     if (!(HeapTupleIsValid(vardata->statsTuple) &&
     484 CBC         126 :           get_attstatsslot(&hslot, vardata->statsTuple,
     485 ECB             :                            STATISTIC_KIND_BOUNDS_HISTOGRAM, InvalidOid,
     486                 :                            ATTSTATSSLOT_VALUES)))
     487 GIC          24 :         return -1.0;
     488 ECB             : 
     489                 :     /* check that it's a histogram, not just a dummy entry */
     490 GIC         126 :     if (hslot.nvalues < 2)
     491 ECB             :     {
     492 UIC           0 :         free_attstatsslot(&hslot);
     493 UBC           0 :         return -1.0;
     494 EUB             :     }
     495                 : 
     496                 :     /*
     497                 :      * Convert histogram of ranges into histograms of its lower and upper
     498                 :      * bounds.
     499                 :      */
     500 GIC         126 :     nhist = hslot.nvalues;
     501 CBC         126 :     hist_lower = (RangeBound *) palloc(sizeof(RangeBound) * nhist);
     502             126 :     hist_upper = (RangeBound *) palloc(sizeof(RangeBound) * nhist);
     503            9468 :     for (i = 0; i < nhist; i++)
     504 ECB             :     {
     505                 :         bool        empty;
     506                 : 
     507 GIC        9342 :         range_deserialize(rng_typcache, DatumGetRangeTypeP(hslot.values[i]),
     508 CBC        9342 :                           &hist_lower[i], &hist_upper[i], &empty);
     509 ECB             :         /* The histogram should not contain any empty ranges */
     510 GIC        9342 :         if (empty)
     511 LBC           0 :             elog(ERROR, "bounds histogram contains an empty range");
     512 EUB             :     }
     513                 : 
     514                 :     /* @> and @< also need a histogram of range lengths */
     515 GIC         126 :     if (operator == OID_MULTIRANGE_CONTAINS_RANGE_OP ||
     516 CBC         102 :         operator == OID_MULTIRANGE_CONTAINS_MULTIRANGE_OP ||
     517             102 :         operator == OID_MULTIRANGE_RANGE_CONTAINED_OP ||
     518 ECB             :         operator == OID_MULTIRANGE_MULTIRANGE_CONTAINED_OP)
     519                 :     {
     520 GIC          60 :         if (!(HeapTupleIsValid(vardata->statsTuple) &&
     521 CBC          30 :               get_attstatsslot(&lslot, vardata->statsTuple,
     522 ECB             :                                STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM,
     523                 :                                InvalidOid,
     524                 :                                ATTSTATSSLOT_VALUES)))
     525                 :         {
     526 UIC           0 :             free_attstatsslot(&hslot);
     527 UBC           0 :             return -1.0;
     528 EUB             :         }
     529                 : 
     530                 :         /* check that it's a histogram, not just a dummy entry */
     531 GIC          30 :         if (lslot.nvalues < 2)
     532 ECB             :         {
     533 UIC           0 :             free_attstatsslot(&lslot);
     534 UBC           0 :             free_attstatsslot(&hslot);
     535               0 :             return -1.0;
     536 EUB             :         }
     537                 :     }
     538                 :     else
     539 GIC          96 :         memset(&lslot, 0, sizeof(lslot));
     540 ECB             : 
     541                 :     /* Extract the bounds of the constant value. */
     542 GIC         126 :     Assert(constval->rangeCount > 0);
     543 CBC         126 :     multirange_get_bounds(rng_typcache, constval, 0,
     544 ECB             :                           &const_lower, &tmp);
     545 GIC         126 :     multirange_get_bounds(rng_typcache, constval, constval->rangeCount - 1,
     546 ECB             :                           &tmp, &const_upper);
     547                 : 
     548                 :     /*
     549                 :      * Calculate selectivity comparing the lower or upper bound of the
     550                 :      * constant with the histogram of lower or upper bounds.
     551                 :      */
     552 GIC         126 :     switch (operator)
     553 ECB             :     {
     554 UIC           0 :         case OID_MULTIRANGE_LESS_OP:
     555 EUB             : 
     556                 :             /*
     557                 :              * The regular b-tree comparison operators (<, <=, >, >=) compare
     558                 :              * the lower bounds first, and the upper bounds for values with
     559                 :              * equal lower bounds. Estimate that by comparing the lower bounds
     560                 :              * only. This gives a fairly accurate estimate assuming there
     561                 :              * aren't many rows with a lower bound equal to the constant's
     562                 :              * lower bound.
     563                 :              */
     564                 :             hist_selec =
     565 UIC           0 :                 calc_hist_selectivity_scalar(rng_typcache, &const_lower,
     566 EUB             :                                              hist_lower, nhist, false);
     567 UIC           0 :             break;
     568 EUB             : 
     569 UIC           0 :         case OID_MULTIRANGE_LESS_EQUAL_OP:
     570 EUB             :             hist_selec =
     571 UIC           0 :                 calc_hist_selectivity_scalar(rng_typcache, &const_lower,
     572 EUB             :                                              hist_lower, nhist, true);
     573 UIC           0 :             break;
     574 EUB             : 
     575 UIC           0 :         case OID_MULTIRANGE_GREATER_OP:
     576 UBC           0 :             hist_selec =
     577               0 :                 1 - calc_hist_selectivity_scalar(rng_typcache, &const_lower,
     578 EUB             :                                                  hist_lower, nhist, false);
     579 UIC           0 :             break;
     580 EUB             : 
     581 UIC           0 :         case OID_MULTIRANGE_GREATER_EQUAL_OP:
     582 UBC           0 :             hist_selec =
     583               0 :                 1 - calc_hist_selectivity_scalar(rng_typcache, &const_lower,
     584 EUB             :                                                  hist_lower, nhist, true);
     585 UIC           0 :             break;
     586 EUB             : 
     587 GIC          12 :         case OID_MULTIRANGE_LEFT_RANGE_OP:
     588 ECB             :         case OID_MULTIRANGE_LEFT_MULTIRANGE_OP:
     589                 :             /* var << const when upper(var) < lower(const) */
     590                 :             hist_selec =
     591 GIC          12 :                 calc_hist_selectivity_scalar(rng_typcache, &const_lower,
     592 ECB             :                                              hist_upper, nhist, false);
     593 GIC          12 :             break;
     594 ECB             : 
     595 GIC          12 :         case OID_MULTIRANGE_RIGHT_RANGE_OP:
     596 ECB             :         case OID_MULTIRANGE_RIGHT_MULTIRANGE_OP:
     597                 :             /* var >> const when lower(var) > upper(const) */
     598 GIC          12 :             hist_selec =
     599 CBC          12 :                 1 - calc_hist_selectivity_scalar(rng_typcache, &const_upper,
     600 ECB             :                                                  hist_lower, nhist, true);
     601 GIC          12 :             break;
     602 ECB             : 
     603 GIC          12 :         case OID_MULTIRANGE_OVERLAPS_RIGHT_RANGE_OP:
     604 ECB             :         case OID_MULTIRANGE_OVERLAPS_RIGHT_MULTIRANGE_OP:
     605                 :             /* compare lower bounds */
     606 GIC          12 :             hist_selec =
     607 CBC          12 :                 1 - calc_hist_selectivity_scalar(rng_typcache, &const_lower,
     608 ECB             :                                                  hist_lower, nhist, false);
     609 GIC          12 :             break;
     610 ECB             : 
     611 GIC          12 :         case OID_MULTIRANGE_OVERLAPS_LEFT_RANGE_OP:
     612 ECB             :         case OID_MULTIRANGE_OVERLAPS_LEFT_MULTIRANGE_OP:
     613                 :             /* compare upper bounds */
     614                 :             hist_selec =
     615 GIC          12 :                 calc_hist_selectivity_scalar(rng_typcache, &const_upper,
     616 ECB             :                                              hist_upper, nhist, true);
     617 GIC          12 :             break;
     618 ECB             : 
     619 GIC          42 :         case OID_MULTIRANGE_OVERLAPS_RANGE_OP:
     620 ECB             :         case OID_MULTIRANGE_OVERLAPS_MULTIRANGE_OP:
     621                 :         case OID_MULTIRANGE_CONTAINS_ELEM_OP:
     622                 : 
     623                 :             /*
     624                 :              * A && B <=> NOT (A << B OR A >> B).
     625                 :              *
     626                 :              * Since A << B and A >> B are mutually exclusive events we can
     627                 :              * sum their probabilities to find probability of (A << B OR A >>
     628                 :              * B).
     629                 :              *
     630                 :              * "multirange @> elem" is equivalent to "multirange &&
     631                 :              * {[elem,elem]}". The caller already constructed the singular
     632                 :              * range from the element constant, so just treat it the same as
     633                 :              * &&.
     634                 :              */
     635                 :             hist_selec =
     636 GIC          42 :                 calc_hist_selectivity_scalar(rng_typcache,
     637 ECB             :                                              &const_lower, hist_upper,
     638                 :                                              nhist, false);
     639 GIC          42 :             hist_selec +=
     640 CBC          42 :                 (1.0 - calc_hist_selectivity_scalar(rng_typcache,
     641 ECB             :                                                     &const_upper, hist_lower,
     642                 :                                                     nhist, true));
     643 GIC          42 :             hist_selec = 1.0 - hist_selec;
     644 CBC          42 :             break;
     645 ECB             : 
     646 GIC          24 :         case OID_MULTIRANGE_CONTAINS_RANGE_OP:
     647 ECB             :         case OID_MULTIRANGE_CONTAINS_MULTIRANGE_OP:
     648                 :             hist_selec =
     649 GIC          24 :                 calc_hist_selectivity_contains(rng_typcache, &const_lower,
     650 ECB             :                                                &const_upper, hist_lower, nhist,
     651                 :                                                lslot.values, lslot.nvalues);
     652 GIC          24 :             break;
     653 ECB             : 
     654 GIC          12 :         case OID_MULTIRANGE_MULTIRANGE_CONTAINED_OP:
     655 ECB             :         case OID_RANGE_MULTIRANGE_CONTAINED_OP:
     656 GIC          12 :             if (const_lower.infinite)
     657 ECB             :             {
     658                 :                 /*
     659                 :                  * Lower bound no longer matters. Just estimate the fraction
     660                 :                  * with an upper bound <= const upper bound
     661                 :                  */
     662                 :                 hist_selec =
     663 UIC           0 :                     calc_hist_selectivity_scalar(rng_typcache, &const_upper,
     664 EUB             :                                                  hist_upper, nhist, true);
     665                 :             }
     666 GIC          12 :             else if (const_upper.infinite)
     667 ECB             :             {
     668 UIC           0 :                 hist_selec =
     669 UBC           0 :                     1.0 - calc_hist_selectivity_scalar(rng_typcache, &const_lower,
     670 EUB             :                                                        hist_lower, nhist, false);
     671                 :             }
     672                 :             else
     673                 :             {
     674                 :                 hist_selec =
     675 GIC          12 :                     calc_hist_selectivity_contained(rng_typcache, &const_lower,
     676 ECB             :                                                     &const_upper, hist_lower, nhist,
     677                 :                                                     lslot.values, lslot.nvalues);
     678                 :             }
     679 GIC          12 :             break;
     680 ECB             : 
     681                 :             /* filtered out by multirangesel() */
     682 UIC           0 :         case OID_RANGE_OVERLAPS_MULTIRANGE_OP:
     683 EUB             :         case OID_RANGE_OVERLAPS_LEFT_MULTIRANGE_OP:
     684                 :         case OID_RANGE_OVERLAPS_RIGHT_MULTIRANGE_OP:
     685                 :         case OID_RANGE_LEFT_MULTIRANGE_OP:
     686                 :         case OID_RANGE_RIGHT_MULTIRANGE_OP:
     687                 :         case OID_RANGE_CONTAINS_MULTIRANGE_OP:
     688                 :         case OID_MULTIRANGE_ELEM_CONTAINED_OP:
     689                 :         case OID_MULTIRANGE_RANGE_CONTAINED_OP:
     690                 : 
     691                 :         default:
     692 UIC           0 :             elog(ERROR, "unknown multirange operator %u", operator);
     693 EUB             :             hist_selec = -1.0;  /* keep compiler quiet */
     694                 :             break;
     695                 :     }
     696                 : 
     697 GIC         126 :     free_attstatsslot(&lslot);
     698 CBC         126 :     free_attstatsslot(&hslot);
     699 ECB             : 
     700 GIC         126 :     return hist_selec;
     701 ECB             : }
     702                 : 
     703                 : 
     704                 : /*
     705                 :  * Look up the fraction of values less than (or equal, if 'equal' argument
     706                 :  * is true) a given const in a histogram of range bounds.
     707                 :  */
     708                 : static double
     709 GIC         132 : calc_hist_selectivity_scalar(TypeCacheEntry *typcache, const RangeBound *constbound,
     710 ECB             :                              const RangeBound *hist, int hist_nvalues, bool equal)
     711                 : {
     712                 :     Selectivity selec;
     713                 :     int         index;
     714                 : 
     715                 :     /*
     716                 :      * Find the histogram bin the given constant falls into. Estimate
     717                 :      * selectivity as the number of preceding whole bins.
     718                 :      */
     719 GIC         132 :     index = rbound_bsearch(typcache, constbound, hist, hist_nvalues, equal);
     720 CBC         132 :     selec = (Selectivity) (Max(index, 0)) / (Selectivity) (hist_nvalues - 1);
     721 ECB             : 
     722                 :     /* Adjust using linear interpolation within the bin */
     723 GIC         132 :     if (index >= 0 && index < hist_nvalues - 1)
     724 CBC         180 :         selec += get_position(typcache, constbound, &hist[index],
     725              90 :                               &hist[index + 1]) / (Selectivity) (hist_nvalues - 1);
     726 ECB             : 
     727 GIC         132 :     return selec;
     728 ECB             : }
     729                 : 
     730                 : /*
     731                 :  * Binary search on an array of range bounds. Returns greatest index of range
     732                 :  * bound in array which is less(less or equal) than given range bound. If all
     733                 :  * range bounds in array are greater or equal(greater) than given range bound,
     734                 :  * return -1. When "equal" flag is set conditions in brackets are used.
     735                 :  *
     736                 :  * This function is used in scalar operator selectivity estimation. Another
     737                 :  * goal of this function is to find a histogram bin where to stop
     738                 :  * interpolation of portion of bounds which are less than or equal to given bound.
     739                 :  */
     740                 : static int
     741 GIC         168 : rbound_bsearch(TypeCacheEntry *typcache, const RangeBound *value, const RangeBound *hist,
     742 ECB             :                int hist_length, bool equal)
     743                 : {
     744 GIC         168 :     int         lower = -1,
     745 CBC         168 :                 upper = hist_length - 1,
     746 ECB             :                 cmp,
     747                 :                 middle;
     748                 : 
     749 GIC        1068 :     while (lower < upper)
     750 ECB             :     {
     751 GIC         900 :         middle = (lower + upper + 1) / 2;
     752 CBC         900 :         cmp = range_cmp_bounds(typcache, &hist[middle], value);
     753 ECB             : 
     754 GIC         900 :         if (cmp < 0 || (equal && cmp == 0))
     755 CBC         372 :             lower = middle;
     756 ECB             :         else
     757 GIC         528 :             upper = middle - 1;
     758 ECB             :     }
     759 GIC         168 :     return lower;
     760 ECB             : }
     761                 : 
     762                 : 
     763                 : /*
     764                 :  * Binary search on length histogram. Returns greatest index of range length in
     765                 :  * histogram which is less than (less than or equal) the given length value. If
     766                 :  * all lengths in the histogram are greater than (greater than or equal) the
     767                 :  * given length, returns -1.
     768                 :  */
     769                 : static int
     770 GIC         180 : length_hist_bsearch(Datum *length_hist_values, int length_hist_nvalues,
     771 ECB             :                     double value, bool equal)
     772                 : {
     773 GIC         180 :     int         lower = -1,
     774 CBC         180 :                 upper = length_hist_nvalues - 1,
     775 ECB             :                 middle;
     776                 : 
     777 GIC         936 :     while (lower < upper)
     778 ECB             :     {
     779                 :         double      middleval;
     780                 : 
     781 GIC         756 :         middle = (lower + upper + 1) / 2;
     782 ECB             : 
     783 GIC         756 :         middleval = DatumGetFloat8(length_hist_values[middle]);
     784 CBC         756 :         if (middleval < value || (equal && middleval <= value))
     785             372 :             lower = middle;
     786 ECB             :         else
     787 GIC         384 :             upper = middle - 1;
     788 ECB             :     }
     789 GIC         180 :     return lower;
     790 ECB             : }
     791                 : 
     792                 : /*
     793                 :  * Get relative position of value in histogram bin in [0,1] range.
     794                 :  */
     795                 : static float8
     796 GIC         138 : get_position(TypeCacheEntry *typcache, const RangeBound *value, const RangeBound *hist1,
     797 ECB             :              const RangeBound *hist2)
     798                 : {
     799 GIC         138 :     bool        has_subdiff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid);
     800 ECB             :     float8      position;
     801                 : 
     802 GIC         138 :     if (!hist1->infinite && !hist2->infinite)
     803 ECB             :     {
     804                 :         float8      bin_width;
     805                 : 
     806                 :         /*
     807                 :          * Both bounds are finite. Assuming the subtype's comparison function
     808                 :          * works sanely, the value must be finite, too, because it lies
     809                 :          * somewhere between the bounds.  If it doesn't, arbitrarily return
     810                 :          * 0.5.
     811                 :          */
     812 GIC         102 :         if (value->infinite)
     813 LBC           0 :             return 0.5;
     814 EUB             : 
     815                 :         /* Can't interpolate without subdiff function */
     816 GIC         102 :         if (!has_subdiff)
     817 LBC           0 :             return 0.5;
     818 EUB             : 
     819                 :         /* Calculate relative position using subdiff function. */
     820 GIC         102 :         bin_width = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo,
     821 ECB             :                                                      typcache->rng_collation,
     822 GIC         102 :                                                      hist2->val,
     823 CBC         102 :                                                      hist1->val));
     824             102 :         if (isnan(bin_width) || bin_width <= 0.0)
     825 LBC           0 :             return 0.5;         /* punt for NaN or zero-width bin */
     826 EUB             : 
     827 GIC         102 :         position = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo,
     828 ECB             :                                                     typcache->rng_collation,
     829 GIC         102 :                                                     value->val,
     830 CBC         102 :                                                     hist1->val))
     831 ECB             :             / bin_width;
     832                 : 
     833 GIC         102 :         if (isnan(position))
     834 LBC           0 :             return 0.5;         /* punt for NaN from subdiff, Inf/Inf, etc */
     835 EUB             : 
     836                 :         /* Relative position must be in [0,1] range */
     837 GIC         102 :         position = Max(position, 0.0);
     838 CBC         102 :         position = Min(position, 1.0);
     839             102 :         return position;
     840 ECB             :     }
     841 GIC          36 :     else if (hist1->infinite && !hist2->infinite)
     842 ECB             :     {
     843                 :         /*
     844                 :          * Lower bin boundary is -infinite, upper is finite. If the value is
     845                 :          * -infinite, return 0.0 to indicate it's equal to the lower bound.
     846                 :          * Otherwise return 1.0 to indicate it's infinitely far from the lower
     847                 :          * bound.
     848                 :          */
     849 GIC          30 :         return ((value->infinite && value->lower) ? 0.0 : 1.0);
     850 ECB             :     }
     851 GIC           6 :     else if (!hist1->infinite && hist2->infinite)
     852 ECB             :     {
     853                 :         /* same as above, but in reverse */
     854 GIC           6 :         return ((value->infinite && !value->lower) ? 1.0 : 0.0);
     855 ECB             :     }
     856                 :     else
     857                 :     {
     858                 :         /*
     859                 :          * If both bin boundaries are infinite, they should be equal to each
     860                 :          * other, and the value should also be infinite and equal to both
     861                 :          * bounds. (But don't Assert that, to avoid crashing if a user creates
     862                 :          * a datatype with a broken comparison function).
     863                 :          *
     864                 :          * Assume the value to lie in the middle of the infinite bounds.
     865                 :          */
     866 UIC           0 :         return 0.5;
     867 EUB             :     }
     868                 : }
     869                 : 
     870                 : 
     871                 : /*
     872                 :  * Get relative position of value in a length histogram bin in [0,1] range.
     873                 :  */
     874                 : static double
     875 GIC         180 : get_len_position(double value, double hist1, double hist2)
     876 ECB             : {
     877 GIC         180 :     if (!isinf(hist1) && !isinf(hist2))
     878 ECB             :     {
     879                 :         /*
     880                 :          * Both bounds are finite. The value should be finite too, because it
     881                 :          * lies somewhere between the bounds. If it doesn't, just return
     882                 :          * something.
     883                 :          */
     884 GIC          30 :         if (isinf(value))
     885 LBC           0 :             return 0.5;
     886 EUB             : 
     887 GIC          30 :         return 1.0 - (hist2 - value) / (hist2 - hist1);
     888 ECB             :     }
     889 GIC         150 :     else if (isinf(hist1) && !isinf(hist2))
     890 ECB             :     {
     891                 :         /*
     892                 :          * Lower bin boundary is -infinite, upper is finite. Return 1.0 to
     893                 :          * indicate the value is infinitely far from the lower bound.
     894                 :          */
     895 UIC           0 :         return 1.0;
     896 EUB             :     }
     897 GIC         150 :     else if (isinf(hist1) && isinf(hist2))
     898 ECB             :     {
     899                 :         /* same as above, but in reverse */
     900 UIC           0 :         return 0.0;
     901 EUB             :     }
     902                 :     else
     903                 :     {
     904                 :         /*
     905                 :          * If both bin boundaries are infinite, they should be equal to each
     906                 :          * other, and the value should also be infinite and equal to both
     907                 :          * bounds. (But don't Assert that, to avoid crashing unnecessarily if
     908                 :          * the caller messes up)
     909                 :          *
     910                 :          * Assume the value to lie in the middle of the infinite bounds.
     911                 :          */
     912 GIC         150 :         return 0.5;
     913 ECB             :     }
     914                 : }
     915                 : 
     916                 : /*
     917                 :  * Measure distance between two range bounds.
     918                 :  */
     919                 : static float8
     920 GIC         204 : get_distance(TypeCacheEntry *typcache, const RangeBound *bound1, const RangeBound *bound2)
     921 ECB             : {
     922 GIC         204 :     bool        has_subdiff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid);
     923 ECB             : 
     924 GIC         204 :     if (!bound1->infinite && !bound2->infinite)
     925 ECB             :     {
     926                 :         /*
     927                 :          * Neither bound is infinite, use subdiff function or return default
     928                 :          * value of 1.0 if no subdiff is available.
     929                 :          */
     930 GIC         120 :         if (has_subdiff)
     931 ECB             :         {
     932                 :             float8      res;
     933                 : 
     934 GIC         120 :             res = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo,
     935 ECB             :                                                    typcache->rng_collation,
     936 GIC         120 :                                                    bound2->val,
     937 CBC         120 :                                                    bound1->val));
     938 ECB             :             /* Reject possible NaN result, also negative result */
     939 GIC         120 :             if (isnan(res) || res < 0.0)
     940 LBC           0 :                 return 1.0;
     941 EUB             :             else
     942 GIC         120 :                 return res;
     943 ECB             :         }
     944                 :         else
     945 UIC           0 :             return 1.0;
     946 EUB             :     }
     947 GIC          84 :     else if (bound1->infinite && bound2->infinite)
     948 ECB             :     {
     949                 :         /* Both bounds are infinite */
     950 UIC           0 :         if (bound1->lower == bound2->lower)
     951 UBC           0 :             return 0.0;
     952 EUB             :         else
     953 UIC           0 :             return get_float8_infinity();
     954 EUB             :     }
     955                 :     else
     956                 :     {
     957                 :         /* One bound is infinite, the other is not */
     958 GIC          84 :         return get_float8_infinity();
     959 ECB             :     }
     960                 : }
     961                 : 
     962                 : /*
     963                 :  * Calculate the average of function P(x), in the interval [length1, length2],
     964                 :  * where P(x) is the fraction of tuples with length < x (or length <= x if
     965                 :  * 'equal' is true).
     966                 :  */
     967                 : static double
     968 GIC         180 : calc_length_hist_frac(Datum *length_hist_values, int length_hist_nvalues,
     969 ECB             :                       double length1, double length2, bool equal)
     970                 : {
     971                 :     double      frac;
     972                 :     double      A,
     973                 :                 B,
     974                 :                 PA,
     975                 :                 PB;
     976                 :     double      pos;
     977                 :     int         i;
     978                 :     double      area;
     979                 : 
     980 GIC         180 :     Assert(length2 >= length1);
     981 ECB             : 
     982 GIC         180 :     if (length2 < 0.0)
     983 LBC           0 :         return 0.0;             /* shouldn't happen, but doesn't hurt to check */
     984 EUB             : 
     985                 :     /* All lengths in the table are <= infinite. */
     986 GIC         180 :     if (isinf(length2) && equal)
     987 LBC           0 :         return 1.0;
     988 EUB             : 
     989                 :     /*----------
     990                 :      * The average of a function between A and B can be calculated by the
     991                 :      * formula:
     992                 :      *
     993                 :      *          B
     994                 :      *    1     /
     995                 :      * -------  | P(x)dx
     996                 :      *  B - A   /
     997                 :      *          A
     998                 :      *
     999                 :      * The geometrical interpretation of the integral is the area under the
    1000                 :      * graph of P(x). P(x) is defined by the length histogram. We calculate
    1001                 :      * the area in a piecewise fashion, iterating through the length histogram
    1002                 :      * bins. Each bin is a trapezoid:
    1003                 :      *
    1004                 :      *       P(x2)
    1005                 :      *        /|
    1006                 :      *       / |
    1007                 :      * P(x1)/  |
    1008                 :      *     |   |
    1009                 :      *     |   |
    1010                 :      *  ---+---+--
    1011                 :      *     x1  x2
    1012                 :      *
    1013                 :      * where x1 and x2 are the boundaries of the current histogram, and P(x1)
    1014                 :      * and P(x1) are the cumulative fraction of tuples at the boundaries.
    1015                 :      *
    1016                 :      * The area of each trapezoid is 1/2 * (P(x2) + P(x1)) * (x2 - x1)
    1017                 :      *
    1018                 :      * The first bin contains the lower bound passed by the caller, so we
    1019                 :      * use linear interpolation between the previous and next histogram bin
    1020                 :      * boundary to calculate P(x1). Likewise for the last bin: we use linear
    1021                 :      * interpolation to calculate P(x2). For the bins in between, x1 and x2
    1022                 :      * lie on histogram bin boundaries, so P(x1) and P(x2) are simply:
    1023                 :      * P(x1) =    (bin index) / (number of bins)
    1024                 :      * P(x2) = (bin index + 1 / (number of bins)
    1025                 :      */
    1026                 : 
    1027                 :     /* First bin, the one that contains lower bound */
    1028 GIC         180 :     i = length_hist_bsearch(length_hist_values, length_hist_nvalues, length1, equal);
    1029 CBC         180 :     if (i >= length_hist_nvalues - 1)
    1030              24 :         return 1.0;
    1031 ECB             : 
    1032 GIC         156 :     if (i < 0)
    1033 ECB             :     {
    1034 GIC          42 :         i = 0;
    1035 CBC          42 :         pos = 0.0;
    1036 ECB             :     }
    1037                 :     else
    1038                 :     {
    1039                 :         /* interpolate length1's position in the bin */
    1040 GIC         114 :         pos = get_len_position(length1,
    1041 CBC         114 :                                DatumGetFloat8(length_hist_values[i]),
    1042             114 :                                DatumGetFloat8(length_hist_values[i + 1]));
    1043 ECB             :     }
    1044 GIC         156 :     PB = (((double) i) + pos) / (double) (length_hist_nvalues - 1);
    1045 CBC         156 :     B = length1;
    1046 ECB             : 
    1047                 :     /*
    1048                 :      * In the degenerate case that length1 == length2, simply return
    1049                 :      * P(length1). This is not merely an optimization: if length1 == length2,
    1050                 :      * we'd divide by zero later on.
    1051                 :      */
    1052 GIC         156 :     if (length2 == length1)
    1053 CBC          72 :         return PB;
    1054 ECB             : 
    1055                 :     /*
    1056                 :      * Loop through all the bins, until we hit the last bin, the one that
    1057                 :      * contains the upper bound. (if lower and upper bounds are in the same
    1058                 :      * bin, this falls out immediately)
    1059                 :      */
    1060 GIC          84 :     area = 0.0;
    1061 CBC        1584 :     for (; i < length_hist_nvalues - 1; i++)
    1062 ECB             :     {
    1063 GIC        1584 :         double      bin_upper = DatumGetFloat8(length_hist_values[i + 1]);
    1064 ECB             : 
    1065                 :         /* check if we've reached the last bin */
    1066 GIC        1584 :         if (!(bin_upper < length2 || (equal && bin_upper <= length2)))
    1067 ECB             :             break;
    1068                 : 
    1069                 :         /* the upper bound of previous bin is the lower bound of this bin */
    1070 GIC        1500 :         A = B;
    1071 CBC        1500 :         PA = PB;
    1072 ECB             : 
    1073 GIC        1500 :         B = bin_upper;
    1074 CBC        1500 :         PB = (double) i / (double) (length_hist_nvalues - 1);
    1075 ECB             : 
    1076                 :         /*
    1077                 :          * Add the area of this trapezoid to the total. The point of the
    1078                 :          * if-check is to avoid NaN, in the corner case that PA == PB == 0,
    1079                 :          * and B - A == Inf. The area of a zero-height trapezoid (PA == PB ==
    1080                 :          * 0) is zero, regardless of the width (B - A).
    1081                 :          */
    1082 GIC        1500 :         if (PA > 0 || PB > 0)
    1083 CBC        1476 :             area += 0.5 * (PB + PA) * (B - A);
    1084 ECB             :     }
    1085                 : 
    1086                 :     /* Last bin */
    1087 GIC          84 :     A = B;
    1088 CBC          84 :     PA = PB;
    1089 ECB             : 
    1090 GIC          84 :     B = length2;                /* last bin ends at the query upper bound */
    1091 CBC          84 :     if (i >= length_hist_nvalues - 1)
    1092 LBC           0 :         pos = 0.0;
    1093 EUB             :     else
    1094                 :     {
    1095 GIC          84 :         if (DatumGetFloat8(length_hist_values[i]) == DatumGetFloat8(length_hist_values[i + 1]))
    1096 CBC          18 :             pos = 0.0;
    1097 ECB             :         else
    1098 GIC          66 :             pos = get_len_position(length2,
    1099 CBC          66 :                                    DatumGetFloat8(length_hist_values[i]),
    1100              66 :                                    DatumGetFloat8(length_hist_values[i + 1]));
    1101 ECB             :     }
    1102 GIC          84 :     PB = (((double) i) + pos) / (double) (length_hist_nvalues - 1);
    1103 ECB             : 
    1104 GIC          84 :     if (PA > 0 || PB > 0)
    1105 CBC          66 :         area += 0.5 * (PB + PA) * (B - A);
    1106 ECB             : 
    1107                 :     /*
    1108                 :      * Ok, we have calculated the area, ie. the integral. Divide by width to
    1109                 :      * get the requested average.
    1110                 :      *
    1111                 :      * Avoid NaN arising from infinite / infinite. This happens at least if
    1112                 :      * length2 is infinite. It's not clear what the correct value would be in
    1113                 :      * that case, so 0.5 seems as good as any value.
    1114                 :      */
    1115 GIC          84 :     if (isinf(area) && isinf(length2))
    1116 CBC          24 :         frac = 0.5;
    1117 ECB             :     else
    1118 GIC          60 :         frac = area / (length2 - length1);
    1119 ECB             : 
    1120 GIC          84 :     return frac;
    1121 ECB             : }
    1122                 : 
    1123                 : /*
    1124                 :  * Calculate selectivity of "var <@ const" operator, ie. estimate the fraction
    1125                 :  * of multiranges that fall within the constant lower and upper bounds. This uses
    1126                 :  * the histograms of range lower bounds and range lengths, on the assumption
    1127                 :  * that the range lengths are independent of the lower bounds.
    1128                 :  *
    1129                 :  * The caller has already checked that constant lower and upper bounds are
    1130                 :  * finite.
    1131                 :  */
    1132                 : static double
    1133 GIC          12 : calc_hist_selectivity_contained(TypeCacheEntry *typcache,
    1134 ECB             :                                 const RangeBound *lower, RangeBound *upper,
    1135                 :                                 const RangeBound *hist_lower, int hist_nvalues,
    1136                 :                                 Datum *length_hist_values, int length_hist_nvalues)
    1137                 : {
    1138                 :     int         i,
    1139                 :                 upper_index;
    1140                 :     float8      prev_dist;
    1141                 :     double      bin_width;
    1142                 :     double      upper_bin_width;
    1143                 :     double      sum_frac;
    1144                 : 
    1145                 :     /*
    1146                 :      * Begin by finding the bin containing the upper bound, in the lower bound
    1147                 :      * histogram. Any range with a lower bound > constant upper bound can't
    1148                 :      * match, ie. there are no matches in bins greater than upper_index.
    1149                 :      */
    1150 GIC          12 :     upper->inclusive = !upper->inclusive;
    1151 CBC          12 :     upper->lower = true;
    1152              12 :     upper_index = rbound_bsearch(typcache, upper, hist_lower, hist_nvalues,
    1153 ECB             :                                  false);
    1154                 : 
    1155                 :     /*
    1156                 :      * If the upper bound value is below the histogram's lower limit, there
    1157                 :      * are no matches.
    1158                 :      */
    1159 GIC          12 :     if (upper_index < 0)
    1160 LBC           0 :         return 0.0;
    1161 EUB             : 
    1162                 :     /*
    1163                 :      * If the upper bound value is at or beyond the histogram's upper limit,
    1164                 :      * start our loop at the last actual bin, as though the upper bound were
    1165                 :      * within that bin; get_position will clamp its result to 1.0 anyway.
    1166                 :      * (This corresponds to assuming that the data population above the
    1167                 :      * histogram's upper limit is empty, exactly like what we just assumed for
    1168                 :      * the lower limit.)
    1169                 :      */
    1170 GIC          12 :     upper_index = Min(upper_index, hist_nvalues - 2);
    1171 ECB             : 
    1172                 :     /*
    1173                 :      * Calculate upper_bin_width, ie. the fraction of the (upper_index,
    1174                 :      * upper_index + 1) bin which is greater than upper bound of query range
    1175                 :      * using linear interpolation of subdiff function.
    1176                 :      */
    1177 GIC          12 :     upper_bin_width = get_position(typcache, upper,
    1178 CBC          12 :                                    &hist_lower[upper_index],
    1179              12 :                                    &hist_lower[upper_index + 1]);
    1180 ECB             : 
    1181                 :     /*
    1182                 :      * In the loop, dist and prev_dist are the distance of the "current" bin's
    1183                 :      * lower and upper bounds from the constant upper bound.
    1184                 :      *
    1185                 :      * bin_width represents the width of the current bin. Normally it is 1.0,
    1186                 :      * meaning a full width bin, but can be less in the corner cases: start
    1187                 :      * and end of the loop. We start with bin_width = upper_bin_width, because
    1188                 :      * we begin at the bin containing the upper bound.
    1189                 :      */
    1190 GIC          12 :     prev_dist = 0.0;
    1191 CBC          12 :     bin_width = upper_bin_width;
    1192 ECB             : 
    1193 GIC          12 :     sum_frac = 0.0;
    1194 CBC          60 :     for (i = upper_index; i >= 0; i--)
    1195 ECB             :     {
    1196                 :         double      dist;
    1197                 :         double      length_hist_frac;
    1198 GIC          60 :         bool        final_bin = false;
    1199 ECB             : 
    1200                 :         /*
    1201                 :          * dist -- distance from upper bound of query range to lower bound of
    1202                 :          * the current bin in the lower bound histogram. Or to the lower bound
    1203                 :          * of the constant range, if this is the final bin, containing the
    1204                 :          * constant lower bound.
    1205                 :          */
    1206 GIC          60 :         if (range_cmp_bounds(typcache, &hist_lower[i], lower) < 0)
    1207 ECB             :         {
    1208 GIC          12 :             dist = get_distance(typcache, lower, upper);
    1209 ECB             : 
    1210                 :             /*
    1211                 :              * Subtract from bin_width the portion of this bin that we want to
    1212                 :              * ignore.
    1213                 :              */
    1214 GIC          24 :             bin_width -= get_position(typcache, lower, &hist_lower[i],
    1215 CBC          12 :                                       &hist_lower[i + 1]);
    1216              12 :             if (bin_width < 0.0)
    1217 LBC           0 :                 bin_width = 0.0;
    1218 GBC          12 :             final_bin = true;
    1219 ECB             :         }
    1220                 :         else
    1221 GIC          48 :             dist = get_distance(typcache, &hist_lower[i], upper);
    1222 ECB             : 
    1223                 :         /*
    1224                 :          * Estimate the fraction of tuples in this bin that are narrow enough
    1225                 :          * to not exceed the distance to the upper bound of the query range.
    1226                 :          */
    1227 GIC          60 :         length_hist_frac = calc_length_hist_frac(length_hist_values,
    1228 ECB             :                                                  length_hist_nvalues,
    1229                 :                                                  prev_dist, dist, true);
    1230                 : 
    1231                 :         /*
    1232                 :          * Add the fraction of tuples in this bin, with a suitable length, to
    1233                 :          * the total.
    1234                 :          */
    1235 GIC          60 :         sum_frac += length_hist_frac * bin_width / (double) (hist_nvalues - 1);
    1236 ECB             : 
    1237 GIC          60 :         if (final_bin)
    1238 CBC          12 :             break;
    1239 ECB             : 
    1240 GIC          48 :         bin_width = 1.0;
    1241 CBC          48 :         prev_dist = dist;
    1242 ECB             :     }
    1243                 : 
    1244 GIC          12 :     return sum_frac;
    1245 ECB             : }
    1246                 : 
    1247                 : /*
    1248                 :  * Calculate selectivity of "var @> const" operator, ie. estimate the fraction
    1249                 :  * of multiranges that contain the constant lower and upper bounds. This uses
    1250                 :  * the histograms of range lower bounds and range lengths, on the assumption
    1251                 :  * that the range lengths are independent of the lower bounds.
    1252                 :  */
    1253                 : static double
    1254 GIC          24 : calc_hist_selectivity_contains(TypeCacheEntry *typcache,
    1255 ECB             :                                const RangeBound *lower, const RangeBound *upper,
    1256                 :                                const RangeBound *hist_lower, int hist_nvalues,
    1257                 :                                Datum *length_hist_values, int length_hist_nvalues)
    1258                 : {
    1259                 :     int         i,
    1260                 :                 lower_index;
    1261                 :     double      bin_width,
    1262                 :                 lower_bin_width;
    1263                 :     double      sum_frac;
    1264                 :     float8      prev_dist;
    1265                 : 
    1266                 :     /* Find the bin containing the lower bound of query range. */
    1267 GIC          24 :     lower_index = rbound_bsearch(typcache, lower, hist_lower, hist_nvalues,
    1268 ECB             :                                  true);
    1269                 : 
    1270                 :     /*
    1271                 :      * If the lower bound value is below the histogram's lower limit, there
    1272                 :      * are no matches.
    1273                 :      */
    1274 GIC          24 :     if (lower_index < 0)
    1275 LBC           0 :         return 0.0;
    1276 EUB             : 
    1277                 :     /*
    1278                 :      * If the lower bound value is at or beyond the histogram's upper limit,
    1279                 :      * start our loop at the last actual bin, as though the upper bound were
    1280                 :      * within that bin; get_position will clamp its result to 1.0 anyway.
    1281                 :      * (This corresponds to assuming that the data population above the
    1282                 :      * histogram's upper limit is empty, exactly like what we just assumed for
    1283                 :      * the lower limit.)
    1284                 :      */
    1285 GIC          24 :     lower_index = Min(lower_index, hist_nvalues - 2);
    1286 ECB             : 
    1287                 :     /*
    1288                 :      * Calculate lower_bin_width, ie. the fraction of the of (lower_index,
    1289                 :      * lower_index + 1) bin which is greater than lower bound of query range
    1290                 :      * using linear interpolation of subdiff function.
    1291                 :      */
    1292 GIC          24 :     lower_bin_width = get_position(typcache, lower, &hist_lower[lower_index],
    1293 CBC          24 :                                    &hist_lower[lower_index + 1]);
    1294 ECB             : 
    1295                 :     /*
    1296                 :      * Loop through all the lower bound bins, smaller than the query lower
    1297                 :      * bound. In the loop, dist and prev_dist are the distance of the
    1298                 :      * "current" bin's lower and upper bounds from the constant upper bound.
    1299                 :      * We begin from query lower bound, and walk backwards, so the first bin's
    1300                 :      * upper bound is the query lower bound, and its distance to the query
    1301                 :      * upper bound is the length of the query range.
    1302                 :      *
    1303                 :      * bin_width represents the width of the current bin. Normally it is 1.0,
    1304                 :      * meaning a full width bin, except for the first bin, which is only
    1305                 :      * counted up to the constant lower bound.
    1306                 :      */
    1307 GIC          24 :     prev_dist = get_distance(typcache, lower, upper);
    1308 CBC          24 :     sum_frac = 0.0;
    1309              24 :     bin_width = lower_bin_width;
    1310             144 :     for (i = lower_index; i >= 0; i--)
    1311 ECB             :     {
    1312                 :         float8      dist;
    1313                 :         double      length_hist_frac;
    1314                 : 
    1315                 :         /*
    1316                 :          * dist -- distance from upper bound of query range to current value
    1317                 :          * of lower bound histogram or lower bound of query range (if we've
    1318                 :          * reach it).
    1319                 :          */
    1320 GIC         120 :         dist = get_distance(typcache, &hist_lower[i], upper);
    1321 ECB             : 
    1322                 :         /*
    1323                 :          * Get average fraction of length histogram which covers intervals
    1324                 :          * longer than (or equal to) distance to upper bound of query range.
    1325                 :          */
    1326 GIC         120 :         length_hist_frac =
    1327 CBC         120 :             1.0 - calc_length_hist_frac(length_hist_values,
    1328 ECB             :                                         length_hist_nvalues,
    1329                 :                                         prev_dist, dist, false);
    1330                 : 
    1331 GIC         120 :         sum_frac += length_hist_frac * bin_width / (double) (hist_nvalues - 1);
    1332 ECB             : 
    1333 GIC         120 :         bin_width = 1.0;
    1334 CBC         120 :         prev_dist = dist;
    1335 ECB             :     }
    1336                 : 
    1337 GIC          24 :     return sum_frac;
    1338 ECB             : }
        

Generated by: LCOV version v1.16-55-g56c0a2a