LCOV - differential code coverage report
Current view: top level - src/backend/utils/adt - rangetypes_selfuncs.c (source / functions) Coverage Total Hit UBC GNC CBC DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 80.5 % 344 277 67 1 276 1
Current Date: 2023-04-08 15:15:32 Functions: 100.0 % 13 13 1 12
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * rangetypes_selfuncs.c
       4                 :  *    Functions for selectivity estimation of range operators
       5                 :  *
       6                 :  * Estimates are based on histograms of lower and upper bounds, and the
       7                 :  * fraction of empty ranges.
       8                 :  *
       9                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
      10                 :  * Portions Copyright (c) 1994, Regents of the University of California
      11                 :  *
      12                 :  *
      13                 :  * IDENTIFICATION
      14                 :  *    src/backend/utils/adt/rangetypes_selfuncs.c
      15                 :  *
      16                 :  *-------------------------------------------------------------------------
      17                 :  */
      18                 : #include "postgres.h"
      19                 : 
      20                 : #include <math.h>
      21                 : 
      22                 : #include "access/htup_details.h"
      23                 : #include "catalog/pg_operator.h"
      24                 : #include "catalog/pg_statistic.h"
      25                 : #include "catalog/pg_type.h"
      26                 : #include "utils/float.h"
      27                 : #include "utils/fmgrprotos.h"
      28                 : #include "utils/lsyscache.h"
      29                 : #include "utils/rangetypes.h"
      30                 : #include "utils/selfuncs.h"
      31                 : #include "utils/typcache.h"
      32                 : 
      33                 : static double calc_rangesel(TypeCacheEntry *typcache, VariableStatData *vardata,
      34                 :                             const RangeType *constval, Oid operator);
      35                 : static double default_range_selectivity(Oid operator);
      36                 : static double calc_hist_selectivity(TypeCacheEntry *typcache,
      37                 :                                     VariableStatData *vardata, const RangeType *constval,
      38                 :                                     Oid operator);
      39                 : static double calc_hist_selectivity_scalar(TypeCacheEntry *typcache,
      40                 :                                            const RangeBound *constbound,
      41                 :                                            const RangeBound *hist, int hist_nvalues,
      42                 :                                            bool equal);
      43                 : static int  rbound_bsearch(TypeCacheEntry *typcache, const RangeBound *value,
      44                 :                            const RangeBound *hist, int hist_length, bool equal);
      45                 : static float8 get_position(TypeCacheEntry *typcache, const RangeBound *value,
      46                 :                            const RangeBound *hist1, const RangeBound *hist2);
      47                 : static float8 get_len_position(double value, double hist1, double hist2);
      48                 : static float8 get_distance(TypeCacheEntry *typcache, const RangeBound *bound1,
      49                 :                            const RangeBound *bound2);
      50                 : static int  length_hist_bsearch(Datum *length_hist_values,
      51                 :                                 int length_hist_nvalues, double value, bool equal);
      52                 : static double calc_length_hist_frac(Datum *length_hist_values,
      53                 :                                     int length_hist_nvalues, double length1, double length2, bool equal);
      54                 : static double calc_hist_selectivity_contained(TypeCacheEntry *typcache,
      55                 :                                               const RangeBound *lower, RangeBound *upper,
      56                 :                                               const RangeBound *hist_lower, int hist_nvalues,
      57                 :                                               Datum *length_hist_values, int length_hist_nvalues);
      58                 : static double calc_hist_selectivity_contains(TypeCacheEntry *typcache,
      59                 :                                              const RangeBound *lower, const RangeBound *upper,
      60                 :                                              const RangeBound *hist_lower, int hist_nvalues,
      61                 :                                              Datum *length_hist_values, int length_hist_nvalues);
      62                 : 
      63                 : /*
      64                 :  * Returns a default selectivity estimate for given operator, when we don't
      65                 :  * have statistics or cannot use them for some reason.
      66                 :  */
      67                 : static double
      68 CBC         260 : default_range_selectivity(Oid operator)
      69                 : {
      70             260 :     switch (operator)
      71                 :     {
      72              23 :         case OID_RANGE_OVERLAP_OP:
      73              23 :             return 0.01;
      74                 : 
      75              54 :         case OID_RANGE_CONTAINS_OP:
      76                 :         case OID_RANGE_CONTAINED_OP:
      77              54 :             return 0.005;
      78                 : 
      79              33 :         case OID_RANGE_CONTAINS_ELEM_OP:
      80                 :         case OID_RANGE_ELEM_CONTAINED_OP:
      81                 : 
      82                 :             /*
      83                 :              * "range @> elem" is more or less identical to a scalar
      84                 :              * inequality "A >= b AND A <= c".
      85                 :              */
      86              33 :             return DEFAULT_RANGE_INEQ_SEL;
      87                 : 
      88             150 :         case OID_RANGE_LESS_OP:
      89                 :         case OID_RANGE_LESS_EQUAL_OP:
      90                 :         case OID_RANGE_GREATER_OP:
      91                 :         case OID_RANGE_GREATER_EQUAL_OP:
      92                 :         case OID_RANGE_LEFT_OP:
      93                 :         case OID_RANGE_RIGHT_OP:
      94                 :         case OID_RANGE_OVERLAPS_LEFT_OP:
      95                 :         case OID_RANGE_OVERLAPS_RIGHT_OP:
      96                 :             /* these are similar to regular scalar inequalities */
      97             150 :             return DEFAULT_INEQ_SEL;
      98                 : 
      99 UBC           0 :         default:
     100                 :             /* all range operators should be handled above, but just in case */
     101               0 :             return 0.01;
     102                 :     }
     103                 : }
     104                 : 
     105                 : /*
     106                 :  * rangesel -- restriction selectivity for range operators
     107                 :  */
     108                 : Datum
     109 CBC         425 : rangesel(PG_FUNCTION_ARGS)
     110                 : {
     111             425 :     PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
     112             425 :     Oid         operator = PG_GETARG_OID(1);
     113             425 :     List       *args = (List *) PG_GETARG_POINTER(2);
     114             425 :     int         varRelid = PG_GETARG_INT32(3);
     115                 :     VariableStatData vardata;
     116                 :     Node       *other;
     117                 :     bool        varonleft;
     118                 :     Selectivity selec;
     119             425 :     TypeCacheEntry *typcache = NULL;
     120             425 :     RangeType  *constrange = NULL;
     121                 : 
     122                 :     /*
     123                 :      * If expression is not (variable op something) or (something op
     124                 :      * variable), then punt and return a default estimate.
     125                 :      */
     126             425 :     if (!get_restriction_variable(root, args, varRelid,
     127                 :                                   &vardata, &other, &varonleft))
     128 UBC           0 :         PG_RETURN_FLOAT8(default_range_selectivity(operator));
     129                 : 
     130                 :     /*
     131                 :      * Can't do anything useful if the something is not a constant, either.
     132                 :      */
     133 CBC         425 :     if (!IsA(other, Const))
     134                 :     {
     135 UBC           0 :         ReleaseVariableStats(vardata);
     136               0 :         PG_RETURN_FLOAT8(default_range_selectivity(operator));
     137                 :     }
     138                 : 
     139                 :     /*
     140                 :      * All the range operators are strict, so we can cope with a NULL constant
     141                 :      * right away.
     142                 :      */
     143 CBC         425 :     if (((Const *) other)->constisnull)
     144                 :     {
     145 UBC           0 :         ReleaseVariableStats(vardata);
     146               0 :         PG_RETURN_FLOAT8(0.0);
     147                 :     }
     148                 : 
     149                 :     /*
     150                 :      * If var is on the right, commute the operator, so that we can assume the
     151                 :      * var is on the left in what follows.
     152                 :      */
     153 CBC         425 :     if (!varonleft)
     154                 :     {
     155                 :         /* we have other Op var, commute to make var Op other */
     156               8 :         operator = get_commutator(operator);
     157               8 :         if (!operator)
     158                 :         {
     159                 :             /* Use default selectivity (should we raise an error instead?) */
     160 UBC           0 :             ReleaseVariableStats(vardata);
     161               0 :             PG_RETURN_FLOAT8(default_range_selectivity(operator));
     162                 :         }
     163                 :     }
     164                 : 
     165                 :     /*
     166                 :      * OK, there's a Var and a Const we're dealing with here.  We need the
     167                 :      * Const to be of same range type as the column, else we can't do anything
     168                 :      * useful. (Such cases will likely fail at runtime, but here we'd rather
     169                 :      * just return a default estimate.)
     170                 :      *
     171                 :      * If the operator is "range @> element", the constant should be of the
     172                 :      * element type of the range column. Convert it to a range that includes
     173                 :      * only that single point, so that we don't need special handling for that
     174                 :      * in what follows.
     175                 :      */
     176 CBC         425 :     if (operator == OID_RANGE_CONTAINS_ELEM_OP)
     177                 :     {
     178              39 :         typcache = range_get_typcache(fcinfo, vardata.vartype);
     179                 : 
     180              39 :         if (((Const *) other)->consttype == typcache->rngelemtype->type_id)
     181                 :         {
     182                 :             RangeBound  lower,
     183                 :                         upper;
     184                 : 
     185              39 :             lower.inclusive = true;
     186              39 :             lower.val = ((Const *) other)->constvalue;
     187              39 :             lower.infinite = false;
     188              39 :             lower.lower = true;
     189              39 :             upper.inclusive = true;
     190              39 :             upper.val = ((Const *) other)->constvalue;
     191              39 :             upper.infinite = false;
     192              39 :             upper.lower = false;
     193 GNC          39 :             constrange = range_serialize(typcache, &lower, &upper, false, NULL);
     194                 :         }
     195                 :     }
     196 CBC         386 :     else if (operator == OID_RANGE_ELEM_CONTAINED_OP)
     197                 :     {
     198                 :         /*
     199                 :          * Here, the Var is the elem, not the range.  For now we just punt and
     200                 :          * return the default estimate.  In future we could disassemble the
     201                 :          * range constant and apply scalarineqsel ...
     202                 :          */
     203                 :     }
     204             383 :     else if (((Const *) other)->consttype == vardata.vartype)
     205                 :     {
     206                 :         /* Both sides are the same range type */
     207             383 :         typcache = range_get_typcache(fcinfo, vardata.vartype);
     208                 : 
     209             383 :         constrange = DatumGetRangeTypeP(((Const *) other)->constvalue);
     210                 :     }
     211                 : 
     212                 :     /*
     213                 :      * If we got a valid constant on one side of the operator, proceed to
     214                 :      * estimate using statistics. Otherwise punt and return a default constant
     215                 :      * estimate.  Note that calc_rangesel need not handle
     216                 :      * OID_RANGE_ELEM_CONTAINED_OP.
     217                 :      */
     218             425 :     if (constrange)
     219             422 :         selec = calc_rangesel(typcache, &vardata, constrange, operator);
     220                 :     else
     221               3 :         selec = default_range_selectivity(operator);
     222                 : 
     223             425 :     ReleaseVariableStats(vardata);
     224                 : 
     225             425 :     CLAMP_PROBABILITY(selec);
     226                 : 
     227             425 :     PG_RETURN_FLOAT8((float8) selec);
     228                 : }
     229                 : 
     230                 : static double
     231             422 : calc_rangesel(TypeCacheEntry *typcache, VariableStatData *vardata,
     232                 :               const RangeType *constval, Oid operator)
     233                 : {
     234                 :     double      hist_selec;
     235                 :     double      selec;
     236                 :     float4      empty_frac,
     237                 :                 null_frac;
     238                 : 
     239                 :     /*
     240                 :      * First look up the fraction of NULLs and empty ranges from pg_statistic.
     241                 :      */
     242             422 :     if (HeapTupleIsValid(vardata->statsTuple))
     243                 :     {
     244                 :         Form_pg_statistic stats;
     245                 :         AttStatsSlot sslot;
     246                 : 
     247              81 :         stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
     248              81 :         null_frac = stats->stanullfrac;
     249                 : 
     250                 :         /* Try to get fraction of empty ranges */
     251              81 :         if (get_attstatsslot(&sslot, vardata->statsTuple,
     252                 :                              STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM,
     253                 :                              InvalidOid,
     254                 :                              ATTSTATSSLOT_NUMBERS))
     255                 :         {
     256              81 :             if (sslot.nnumbers != 1)
     257 UBC           0 :                 elog(ERROR, "invalid empty fraction statistic");  /* shouldn't happen */
     258 CBC          81 :             empty_frac = sslot.numbers[0];
     259              81 :             free_attstatsslot(&sslot);
     260                 :         }
     261                 :         else
     262                 :         {
     263                 :             /* No empty fraction statistic. Assume no empty ranges. */
     264 UBC           0 :             empty_frac = 0.0;
     265                 :         }
     266                 :     }
     267                 :     else
     268                 :     {
     269                 :         /*
     270                 :          * No stats are available. Follow through the calculations below
     271                 :          * anyway, assuming no NULLs and no empty ranges. This still allows us
     272                 :          * to give a better-than-nothing estimate based on whether the
     273                 :          * constant is an empty range or not.
     274                 :          */
     275 CBC         341 :         null_frac = 0.0;
     276             341 :         empty_frac = 0.0;
     277                 :     }
     278                 : 
     279             422 :     if (RangeIsEmpty(constval))
     280                 :     {
     281                 :         /*
     282                 :          * An empty range matches all ranges, all empty ranges, or nothing,
     283                 :          * depending on the operator
     284                 :          */
     285              93 :         switch (operator)
     286                 :         {
     287                 :                 /* these return false if either argument is empty */
     288               6 :             case OID_RANGE_OVERLAP_OP:
     289                 :             case OID_RANGE_OVERLAPS_LEFT_OP:
     290                 :             case OID_RANGE_OVERLAPS_RIGHT_OP:
     291                 :             case OID_RANGE_LEFT_OP:
     292                 :             case OID_RANGE_RIGHT_OP:
     293                 :                 /* nothing is less than an empty range */
     294                 :             case OID_RANGE_LESS_OP:
     295               6 :                 selec = 0.0;
     296               6 :                 break;
     297                 : 
     298                 :                 /* only empty ranges can be contained by an empty range */
     299              27 :             case OID_RANGE_CONTAINED_OP:
     300                 :                 /* only empty ranges are <= an empty range */
     301                 :             case OID_RANGE_LESS_EQUAL_OP:
     302              27 :                 selec = empty_frac;
     303              27 :                 break;
     304                 : 
     305                 :                 /* everything contains an empty range */
     306              45 :             case OID_RANGE_CONTAINS_OP:
     307                 :                 /* everything is >= an empty range */
     308                 :             case OID_RANGE_GREATER_EQUAL_OP:
     309              45 :                 selec = 1.0;
     310              45 :                 break;
     311                 : 
     312                 :                 /* all non-empty ranges are > an empty range */
     313              15 :             case OID_RANGE_GREATER_OP:
     314              15 :                 selec = 1.0 - empty_frac;
     315              15 :                 break;
     316                 : 
     317                 :                 /* an element cannot be empty */
     318 UBC           0 :             case OID_RANGE_CONTAINS_ELEM_OP:
     319                 :             default:
     320               0 :                 elog(ERROR, "unexpected operator %u", operator);
     321                 :                 selec = 0.0;    /* keep compiler quiet */
     322                 :                 break;
     323                 :         }
     324                 :     }
     325                 :     else
     326                 :     {
     327                 :         /*
     328                 :          * Calculate selectivity using bound histograms. If that fails for
     329                 :          * some reason, e.g no histogram in pg_statistic, use the default
     330                 :          * constant estimate for the fraction of non-empty values. This is
     331                 :          * still somewhat better than just returning the default estimate,
     332                 :          * because this still takes into account the fraction of empty and
     333                 :          * NULL tuples, if we had statistics for them.
     334                 :          */
     335 CBC         329 :         hist_selec = calc_hist_selectivity(typcache, vardata, constval,
     336                 :                                            operator);
     337             329 :         if (hist_selec < 0.0)
     338             257 :             hist_selec = default_range_selectivity(operator);
     339                 : 
     340                 :         /*
     341                 :          * Now merge the results for the empty ranges and histogram
     342                 :          * calculations, realizing that the histogram covers only the
     343                 :          * non-null, non-empty values.
     344                 :          */
     345             329 :         if (operator == OID_RANGE_CONTAINED_OP)
     346                 :         {
     347                 :             /* empty is contained by anything non-empty */
     348              36 :             selec = (1.0 - empty_frac) * hist_selec + empty_frac;
     349                 :         }
     350                 :         else
     351                 :         {
     352                 :             /* with any other operator, empty Op non-empty matches nothing */
     353             293 :             selec = (1.0 - empty_frac) * hist_selec;
     354                 :         }
     355                 :     }
     356                 : 
     357                 :     /* all range operators are strict */
     358             422 :     selec *= (1.0 - null_frac);
     359                 : 
     360                 :     /* result should be in range, but make sure... */
     361             422 :     CLAMP_PROBABILITY(selec);
     362                 : 
     363             422 :     return selec;
     364                 : }
     365                 : 
     366                 : /*
     367                 :  * Calculate range operator selectivity using histograms of range bounds.
     368                 :  *
     369                 :  * This estimate is for the portion of values that are not empty and not
     370                 :  * NULL.
     371                 :  */
     372                 : static double
     373             329 : calc_hist_selectivity(TypeCacheEntry *typcache, VariableStatData *vardata,
     374                 :                       const RangeType *constval, Oid operator)
     375                 : {
     376                 :     AttStatsSlot hslot;
     377                 :     AttStatsSlot lslot;
     378                 :     int         nhist;
     379                 :     RangeBound *hist_lower;
     380                 :     RangeBound *hist_upper;
     381                 :     int         i;
     382                 :     RangeBound  const_lower;
     383                 :     RangeBound  const_upper;
     384                 :     bool        empty;
     385                 :     double      hist_selec;
     386                 : 
     387                 :     /* Can't use the histogram with insecure range support functions */
     388             329 :     if (!statistic_proc_security_check(vardata,
     389                 :                                        typcache->rng_cmp_proc_finfo.fn_oid))
     390 UBC           0 :         return -1;
     391 CBC         329 :     if (OidIsValid(typcache->rng_subdiff_finfo.fn_oid) &&
     392             323 :         !statistic_proc_security_check(vardata,
     393                 :                                        typcache->rng_subdiff_finfo.fn_oid))
     394 UBC           0 :         return -1;
     395                 : 
     396                 :     /* Try to get histogram of ranges */
     397 CBC         401 :     if (!(HeapTupleIsValid(vardata->statsTuple) &&
     398              72 :           get_attstatsslot(&hslot, vardata->statsTuple,
     399                 :                            STATISTIC_KIND_BOUNDS_HISTOGRAM, InvalidOid,
     400                 :                            ATTSTATSSLOT_VALUES)))
     401             257 :         return -1.0;
     402                 : 
     403                 :     /* check that it's a histogram, not just a dummy entry */
     404              72 :     if (hslot.nvalues < 2)
     405                 :     {
     406 UBC           0 :         free_attstatsslot(&hslot);
     407               0 :         return -1.0;
     408                 :     }
     409                 : 
     410                 :     /*
     411                 :      * Convert histogram of ranges into histograms of its lower and upper
     412                 :      * bounds.
     413                 :      */
     414 CBC          72 :     nhist = hslot.nvalues;
     415              72 :     hist_lower = (RangeBound *) palloc(sizeof(RangeBound) * nhist);
     416              72 :     hist_upper = (RangeBound *) palloc(sizeof(RangeBound) * nhist);
     417            7344 :     for (i = 0; i < nhist; i++)
     418                 :     {
     419            7272 :         range_deserialize(typcache, DatumGetRangeTypeP(hslot.values[i]),
     420            7272 :                           &hist_lower[i], &hist_upper[i], &empty);
     421                 :         /* The histogram should not contain any empty ranges */
     422            7272 :         if (empty)
     423 UBC           0 :             elog(ERROR, "bounds histogram contains an empty range");
     424                 :     }
     425                 : 
     426                 :     /* @> and @< also need a histogram of range lengths */
     427 CBC          72 :     if (operator == OID_RANGE_CONTAINS_OP ||
     428                 :         operator == OID_RANGE_CONTAINED_OP)
     429                 :     {
     430              36 :         if (!(HeapTupleIsValid(vardata->statsTuple) &&
     431              18 :               get_attstatsslot(&lslot, vardata->statsTuple,
     432                 :                                STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM,
     433                 :                                InvalidOid,
     434                 :                                ATTSTATSSLOT_VALUES)))
     435                 :         {
     436 UBC           0 :             free_attstatsslot(&hslot);
     437               0 :             return -1.0;
     438                 :         }
     439                 : 
     440                 :         /* check that it's a histogram, not just a dummy entry */
     441 CBC          18 :         if (lslot.nvalues < 2)
     442                 :         {
     443 UBC           0 :             free_attstatsslot(&lslot);
     444               0 :             free_attstatsslot(&hslot);
     445               0 :             return -1.0;
     446                 :         }
     447                 :     }
     448                 :     else
     449 CBC          54 :         memset(&lslot, 0, sizeof(lslot));
     450                 : 
     451                 :     /* Extract the bounds of the constant value. */
     452              72 :     range_deserialize(typcache, constval, &const_lower, &const_upper, &empty);
     453              72 :     Assert(!empty);
     454                 : 
     455                 :     /*
     456                 :      * Calculate selectivity comparing the lower or upper bound of the
     457                 :      * constant with the histogram of lower or upper bounds.
     458                 :      */
     459              72 :     switch (operator)
     460                 :     {
     461 UBC           0 :         case OID_RANGE_LESS_OP:
     462                 : 
     463                 :             /*
     464                 :              * The regular b-tree comparison operators (<, <=, >, >=) compare
     465                 :              * the lower bounds first, and the upper bounds for values with
     466                 :              * equal lower bounds. Estimate that by comparing the lower bounds
     467                 :              * only. This gives a fairly accurate estimate assuming there
     468                 :              * aren't many rows with a lower bound equal to the constant's
     469                 :              * lower bound.
     470                 :              */
     471                 :             hist_selec =
     472               0 :                 calc_hist_selectivity_scalar(typcache, &const_lower,
     473                 :                                              hist_lower, nhist, false);
     474               0 :             break;
     475                 : 
     476               0 :         case OID_RANGE_LESS_EQUAL_OP:
     477                 :             hist_selec =
     478               0 :                 calc_hist_selectivity_scalar(typcache, &const_lower,
     479                 :                                              hist_lower, nhist, true);
     480               0 :             break;
     481                 : 
     482               0 :         case OID_RANGE_GREATER_OP:
     483               0 :             hist_selec =
     484               0 :                 1 - calc_hist_selectivity_scalar(typcache, &const_lower,
     485                 :                                                  hist_lower, nhist, false);
     486               0 :             break;
     487                 : 
     488               0 :         case OID_RANGE_GREATER_EQUAL_OP:
     489               0 :             hist_selec =
     490               0 :                 1 - calc_hist_selectivity_scalar(typcache, &const_lower,
     491                 :                                                  hist_lower, nhist, true);
     492               0 :             break;
     493                 : 
     494 CBC           9 :         case OID_RANGE_LEFT_OP:
     495                 :             /* var << const when upper(var) < lower(const) */
     496                 :             hist_selec =
     497               9 :                 calc_hist_selectivity_scalar(typcache, &const_lower,
     498                 :                                              hist_upper, nhist, false);
     499               9 :             break;
     500                 : 
     501               9 :         case OID_RANGE_RIGHT_OP:
     502                 :             /* var >> const when lower(var) > upper(const) */
     503               9 :             hist_selec =
     504               9 :                 1 - calc_hist_selectivity_scalar(typcache, &const_upper,
     505                 :                                                  hist_lower, nhist, true);
     506               9 :             break;
     507                 : 
     508               9 :         case OID_RANGE_OVERLAPS_RIGHT_OP:
     509                 :             /* compare lower bounds */
     510               9 :             hist_selec =
     511               9 :                 1 - calc_hist_selectivity_scalar(typcache, &const_lower,
     512                 :                                                  hist_lower, nhist, false);
     513               9 :             break;
     514                 : 
     515               9 :         case OID_RANGE_OVERLAPS_LEFT_OP:
     516                 :             /* compare upper bounds */
     517                 :             hist_selec =
     518               9 :                 calc_hist_selectivity_scalar(typcache, &const_upper,
     519                 :                                              hist_upper, nhist, true);
     520               9 :             break;
     521                 : 
     522              18 :         case OID_RANGE_OVERLAP_OP:
     523                 :         case OID_RANGE_CONTAINS_ELEM_OP:
     524                 : 
     525                 :             /*
     526                 :              * A && B <=> NOT (A << B OR A >> B).
     527                 :              *
     528                 :              * Since A << B and A >> B are mutually exclusive events we can
     529                 :              * sum their probabilities to find probability of (A << B OR A >>
     530                 :              * B).
     531                 :              *
     532                 :              * "range @> elem" is equivalent to "range && [elem,elem]". The
     533                 :              * caller already constructed the singular range from the element
     534                 :              * constant, so just treat it the same as &&.
     535                 :              */
     536                 :             hist_selec =
     537              18 :                 calc_hist_selectivity_scalar(typcache, &const_lower, hist_upper,
     538                 :                                              nhist, false);
     539              18 :             hist_selec +=
     540              18 :                 (1.0 - calc_hist_selectivity_scalar(typcache, &const_upper, hist_lower,
     541                 :                                                     nhist, true));
     542              18 :             hist_selec = 1.0 - hist_selec;
     543              18 :             break;
     544                 : 
     545               9 :         case OID_RANGE_CONTAINS_OP:
     546                 :             hist_selec =
     547               9 :                 calc_hist_selectivity_contains(typcache, &const_lower,
     548                 :                                                &const_upper, hist_lower, nhist,
     549                 :                                                lslot.values, lslot.nvalues);
     550               9 :             break;
     551                 : 
     552               9 :         case OID_RANGE_CONTAINED_OP:
     553               9 :             if (const_lower.infinite)
     554                 :             {
     555                 :                 /*
     556                 :                  * Lower bound no longer matters. Just estimate the fraction
     557                 :                  * with an upper bound <= const upper bound
     558                 :                  */
     559                 :                 hist_selec =
     560 UBC           0 :                     calc_hist_selectivity_scalar(typcache, &const_upper,
     561                 :                                                  hist_upper, nhist, true);
     562                 :             }
     563 CBC           9 :             else if (const_upper.infinite)
     564                 :             {
     565 UBC           0 :                 hist_selec =
     566               0 :                     1.0 - calc_hist_selectivity_scalar(typcache, &const_lower,
     567                 :                                                        hist_lower, nhist, false);
     568                 :             }
     569                 :             else
     570                 :             {
     571                 :                 hist_selec =
     572 CBC           9 :                     calc_hist_selectivity_contained(typcache, &const_lower,
     573                 :                                                     &const_upper, hist_lower, nhist,
     574                 :                                                     lslot.values, lslot.nvalues);
     575                 :             }
     576               9 :             break;
     577                 : 
     578 UBC           0 :         default:
     579               0 :             elog(ERROR, "unknown range operator %u", operator);
     580                 :             hist_selec = -1.0;  /* keep compiler quiet */
     581                 :             break;
     582                 :     }
     583                 : 
     584 CBC          72 :     free_attstatsslot(&lslot);
     585              72 :     free_attstatsslot(&hslot);
     586                 : 
     587              72 :     return hist_selec;
     588                 : }
     589                 : 
     590                 : 
     591                 : /*
     592                 :  * Look up the fraction of values less than (or equal, if 'equal' argument
     593                 :  * is true) a given const in a histogram of range bounds.
     594                 :  */
     595                 : static double
     596              72 : calc_hist_selectivity_scalar(TypeCacheEntry *typcache, const RangeBound *constbound,
     597                 :                              const RangeBound *hist, int hist_nvalues, bool equal)
     598                 : {
     599                 :     Selectivity selec;
     600                 :     int         index;
     601                 : 
     602                 :     /*
     603                 :      * Find the histogram bin the given constant falls into. Estimate
     604                 :      * selectivity as the number of preceding whole bins.
     605                 :      */
     606              72 :     index = rbound_bsearch(typcache, constbound, hist, hist_nvalues, equal);
     607              72 :     selec = (Selectivity) (Max(index, 0)) / (Selectivity) (hist_nvalues - 1);
     608                 : 
     609                 :     /* Adjust using linear interpolation within the bin */
     610              72 :     if (index >= 0 && index < hist_nvalues - 1)
     611             108 :         selec += get_position(typcache, constbound, &hist[index],
     612              54 :                               &hist[index + 1]) / (Selectivity) (hist_nvalues - 1);
     613                 : 
     614              72 :     return selec;
     615                 : }
     616                 : 
     617                 : /*
     618                 :  * Binary search on an array of range bounds. Returns greatest index of range
     619                 :  * bound in array which is less(less or equal) than given range bound. If all
     620                 :  * range bounds in array are greater or equal(greater) than given range bound,
     621                 :  * return -1. When "equal" flag is set conditions in brackets are used.
     622                 :  *
     623                 :  * This function is used in scalar operator selectivity estimation. Another
     624                 :  * goal of this function is to find a histogram bin where to stop
     625                 :  * interpolation of portion of bounds which are less than or equal to given bound.
     626                 :  */
     627                 : static int
     628              90 : rbound_bsearch(TypeCacheEntry *typcache, const RangeBound *value, const RangeBound *hist,
     629                 :                int hist_length, bool equal)
     630                 : {
     631              90 :     int         lower = -1,
     632              90 :                 upper = hist_length - 1,
     633                 :                 cmp,
     634                 :                 middle;
     635                 : 
     636             684 :     while (lower < upper)
     637                 :     {
     638             594 :         middle = (lower + upper + 1) / 2;
     639             594 :         cmp = range_cmp_bounds(typcache, &hist[middle], value);
     640                 : 
     641             594 :         if (cmp < 0 || (equal && cmp == 0))
     642             171 :             lower = middle;
     643                 :         else
     644             423 :             upper = middle - 1;
     645                 :     }
     646              90 :     return lower;
     647                 : }
     648                 : 
     649                 : 
     650                 : /*
     651                 :  * Binary search on length histogram. Returns greatest index of range length in
     652                 :  * histogram which is less than (less than or equal) the given length value. If
     653                 :  * all lengths in the histogram are greater than (greater than or equal) the
     654                 :  * given length, returns -1.
     655                 :  */
     656                 : static int
     657              54 : length_hist_bsearch(Datum *length_hist_values, int length_hist_nvalues,
     658                 :                     double value, bool equal)
     659                 : {
     660              54 :     int         lower = -1,
     661              54 :                 upper = length_hist_nvalues - 1,
     662                 :                 middle;
     663                 : 
     664             414 :     while (lower < upper)
     665                 :     {
     666                 :         double      middleval;
     667                 : 
     668             360 :         middle = (lower + upper + 1) / 2;
     669                 : 
     670             360 :         middleval = DatumGetFloat8(length_hist_values[middle]);
     671             360 :         if (middleval < value || (equal && middleval <= value))
     672             135 :             lower = middle;
     673                 :         else
     674             225 :             upper = middle - 1;
     675                 :     }
     676              54 :     return lower;
     677                 : }
     678                 : 
     679                 : /*
     680                 :  * Get relative position of value in histogram bin in [0,1] range.
     681                 :  */
     682                 : static float8
     683              81 : get_position(TypeCacheEntry *typcache, const RangeBound *value, const RangeBound *hist1,
     684                 :              const RangeBound *hist2)
     685                 : {
     686              81 :     bool        has_subdiff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid);
     687                 :     float8      position;
     688                 : 
     689              81 :     if (!hist1->infinite && !hist2->infinite)
     690                 :     {
     691                 :         float8      bin_width;
     692                 : 
     693                 :         /*
     694                 :          * Both bounds are finite. Assuming the subtype's comparison function
     695                 :          * works sanely, the value must be finite, too, because it lies
     696                 :          * somewhere between the bounds.  If it doesn't, arbitrarily return
     697                 :          * 0.5.
     698                 :          */
     699              81 :         if (value->infinite)
     700 UBC           0 :             return 0.5;
     701                 : 
     702                 :         /* Can't interpolate without subdiff function */
     703 CBC          81 :         if (!has_subdiff)
     704 UBC           0 :             return 0.5;
     705                 : 
     706                 :         /* Calculate relative position using subdiff function. */
     707 CBC          81 :         bin_width = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo,
     708                 :                                                      typcache->rng_collation,
     709              81 :                                                      hist2->val,
     710              81 :                                                      hist1->val));
     711              81 :         if (isnan(bin_width) || bin_width <= 0.0)
     712 UBC           0 :             return 0.5;         /* punt for NaN or zero-width bin */
     713                 : 
     714 CBC          81 :         position = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo,
     715                 :                                                     typcache->rng_collation,
     716              81 :                                                     value->val,
     717              81 :                                                     hist1->val))
     718                 :             / bin_width;
     719                 : 
     720              81 :         if (isnan(position))
     721 UBC           0 :             return 0.5;         /* punt for NaN from subdiff, Inf/Inf, etc */
     722                 : 
     723                 :         /* Relative position must be in [0,1] range */
     724 CBC          81 :         position = Max(position, 0.0);
     725              81 :         position = Min(position, 1.0);
     726              81 :         return position;
     727                 :     }
     728 UBC           0 :     else if (hist1->infinite && !hist2->infinite)
     729                 :     {
     730                 :         /*
     731                 :          * Lower bin boundary is -infinite, upper is finite. If the value is
     732                 :          * -infinite, return 0.0 to indicate it's equal to the lower bound.
     733                 :          * Otherwise return 1.0 to indicate it's infinitely far from the lower
     734                 :          * bound.
     735                 :          */
     736               0 :         return ((value->infinite && value->lower) ? 0.0 : 1.0);
     737                 :     }
     738               0 :     else if (!hist1->infinite && hist2->infinite)
     739                 :     {
     740                 :         /* same as above, but in reverse */
     741               0 :         return ((value->infinite && !value->lower) ? 1.0 : 0.0);
     742                 :     }
     743                 :     else
     744                 :     {
     745                 :         /*
     746                 :          * If both bin boundaries are infinite, they should be equal to each
     747                 :          * other, and the value should also be infinite and equal to both
     748                 :          * bounds. (But don't Assert that, to avoid crashing if a user creates
     749                 :          * a datatype with a broken comparison function).
     750                 :          *
     751                 :          * Assume the value to lie in the middle of the infinite bounds.
     752                 :          */
     753               0 :         return 0.5;
     754                 :     }
     755                 : }
     756                 : 
     757                 : 
     758                 : /*
     759                 :  * Get relative position of value in a length histogram bin in [0,1] range.
     760                 :  */
     761                 : static double
     762 CBC          81 : get_len_position(double value, double hist1, double hist2)
     763                 : {
     764              81 :     if (!isinf(hist1) && !isinf(hist2))
     765                 :     {
     766                 :         /*
     767                 :          * Both bounds are finite. The value should be finite too, because it
     768                 :          * lies somewhere between the bounds. If it doesn't, just return
     769                 :          * something.
     770                 :          */
     771              63 :         if (isinf(value))
     772 UBC           0 :             return 0.5;
     773                 : 
     774 CBC          63 :         return 1.0 - (hist2 - value) / (hist2 - hist1);
     775                 :     }
     776              18 :     else if (isinf(hist1) && !isinf(hist2))
     777                 :     {
     778                 :         /*
     779                 :          * Lower bin boundary is -infinite, upper is finite. Return 1.0 to
     780                 :          * indicate the value is infinitely far from the lower bound.
     781                 :          */
     782 UBC           0 :         return 1.0;
     783                 :     }
     784 CBC          18 :     else if (isinf(hist1) && isinf(hist2))
     785                 :     {
     786                 :         /* same as above, but in reverse */
     787 UBC           0 :         return 0.0;
     788                 :     }
     789                 :     else
     790                 :     {
     791                 :         /*
     792                 :          * If both bin boundaries are infinite, they should be equal to each
     793                 :          * other, and the value should also be infinite and equal to both
     794                 :          * bounds. (But don't Assert that, to avoid crashing unnecessarily if
     795                 :          * the caller messes up)
     796                 :          *
     797                 :          * Assume the value to lie in the middle of the infinite bounds.
     798                 :          */
     799 CBC          18 :         return 0.5;
     800                 :     }
     801                 : }
     802                 : 
     803                 : /*
     804                 :  * Measure distance between two range bounds.
     805                 :  */
     806                 : static float8
     807              63 : get_distance(TypeCacheEntry *typcache, const RangeBound *bound1, const RangeBound *bound2)
     808                 : {
     809              63 :     bool        has_subdiff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid);
     810                 : 
     811              63 :     if (!bound1->infinite && !bound2->infinite)
     812                 :     {
     813                 :         /*
     814                 :          * Neither bound is infinite, use subdiff function or return default
     815                 :          * value of 1.0 if no subdiff is available.
     816                 :          */
     817              45 :         if (has_subdiff)
     818                 :         {
     819                 :             float8      res;
     820                 : 
     821              45 :             res = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo,
     822                 :                                                    typcache->rng_collation,
     823              45 :                                                    bound2->val,
     824              45 :                                                    bound1->val));
     825                 :             /* Reject possible NaN result, also negative result */
     826              45 :             if (isnan(res) || res < 0.0)
     827 UBC           0 :                 return 1.0;
     828                 :             else
     829 CBC          45 :                 return res;
     830                 :         }
     831                 :         else
     832 UBC           0 :             return 1.0;
     833                 :     }
     834 CBC          18 :     else if (bound1->infinite && bound2->infinite)
     835                 :     {
     836                 :         /* Both bounds are infinite */
     837 UBC           0 :         if (bound1->lower == bound2->lower)
     838               0 :             return 0.0;
     839                 :         else
     840               0 :             return get_float8_infinity();
     841                 :     }
     842                 :     else
     843                 :     {
     844                 :         /* One bound is infinite, the other is not */
     845 CBC          18 :         return get_float8_infinity();
     846                 :     }
     847                 : }
     848                 : 
     849                 : /*
     850                 :  * Calculate the average of function P(x), in the interval [length1, length2],
     851                 :  * where P(x) is the fraction of tuples with length < x (or length <= x if
     852                 :  * 'equal' is true).
     853                 :  */
     854                 : static double
     855              54 : calc_length_hist_frac(Datum *length_hist_values, int length_hist_nvalues,
     856                 :                       double length1, double length2, bool equal)
     857                 : {
     858                 :     double      frac;
     859                 :     double      A,
     860                 :                 B,
     861                 :                 PA,
     862                 :                 PB;
     863                 :     double      pos;
     864                 :     int         i;
     865                 :     double      area;
     866                 : 
     867              54 :     Assert(length2 >= length1);
     868                 : 
     869              54 :     if (length2 < 0.0)
     870 UBC           0 :         return 0.0;             /* shouldn't happen, but doesn't hurt to check */
     871                 : 
     872                 :     /* All lengths in the table are <= infinite. */
     873 CBC          54 :     if (isinf(length2) && equal)
     874 UBC           0 :         return 1.0;
     875                 : 
     876                 :     /*----------
     877                 :      * The average of a function between A and B can be calculated by the
     878                 :      * formula:
     879                 :      *
     880                 :      *          B
     881                 :      *    1     /
     882                 :      * -------  | P(x)dx
     883                 :      *  B - A   /
     884                 :      *          A
     885                 :      *
     886                 :      * The geometrical interpretation of the integral is the area under the
     887                 :      * graph of P(x). P(x) is defined by the length histogram. We calculate
     888                 :      * the area in a piecewise fashion, iterating through the length histogram
     889                 :      * bins. Each bin is a trapezoid:
     890                 :      *
     891                 :      *       P(x2)
     892                 :      *        /|
     893                 :      *       / |
     894                 :      * P(x1)/  |
     895                 :      *     |   |
     896                 :      *     |   |
     897                 :      *  ---+---+--
     898                 :      *     x1  x2
     899                 :      *
     900                 :      * where x1 and x2 are the boundaries of the current histogram, and P(x1)
     901                 :      * and P(x1) are the cumulative fraction of tuples at the boundaries.
     902                 :      *
     903                 :      * The area of each trapezoid is 1/2 * (P(x2) + P(x1)) * (x2 - x1)
     904                 :      *
     905                 :      * The first bin contains the lower bound passed by the caller, so we
     906                 :      * use linear interpolation between the previous and next histogram bin
     907                 :      * boundary to calculate P(x1). Likewise for the last bin: we use linear
     908                 :      * interpolation to calculate P(x2). For the bins in between, x1 and x2
     909                 :      * lie on histogram bin boundaries, so P(x1) and P(x2) are simply:
     910                 :      * P(x1) =    (bin index) / (number of bins)
     911                 :      * P(x2) = (bin index + 1 / (number of bins)
     912                 :      */
     913                 : 
     914                 :     /* First bin, the one that contains lower bound */
     915 CBC          54 :     i = length_hist_bsearch(length_hist_values, length_hist_nvalues, length1, equal);
     916              54 :     if (i >= length_hist_nvalues - 1)
     917 UBC           0 :         return 1.0;
     918                 : 
     919 CBC          54 :     if (i < 0)
     920                 :     {
     921              18 :         i = 0;
     922              18 :         pos = 0.0;
     923                 :     }
     924                 :     else
     925                 :     {
     926                 :         /* interpolate length1's position in the bin */
     927              36 :         pos = get_len_position(length1,
     928              36 :                                DatumGetFloat8(length_hist_values[i]),
     929              36 :                                DatumGetFloat8(length_hist_values[i + 1]));
     930                 :     }
     931              54 :     PB = (((double) i) + pos) / (double) (length_hist_nvalues - 1);
     932              54 :     B = length1;
     933                 : 
     934                 :     /*
     935                 :      * In the degenerate case that length1 == length2, simply return
     936                 :      * P(length1). This is not merely an optimization: if length1 == length2,
     937                 :      * we'd divide by zero later on.
     938                 :      */
     939              54 :     if (length2 == length1)
     940               9 :         return PB;
     941                 : 
     942                 :     /*
     943                 :      * Loop through all the bins, until we hit the last bin, the one that
     944                 :      * contains the upper bound. (if lower and upper bounds are in the same
     945                 :      * bin, this falls out immediately)
     946                 :      */
     947              45 :     area = 0.0;
     948            1593 :     for (; i < length_hist_nvalues - 1; i++)
     949                 :     {
     950            1593 :         double      bin_upper = DatumGetFloat8(length_hist_values[i + 1]);
     951                 : 
     952                 :         /* check if we've reached the last bin */
     953            1593 :         if (!(bin_upper < length2 || (equal && bin_upper <= length2)))
     954                 :             break;
     955                 : 
     956                 :         /* the upper bound of previous bin is the lower bound of this bin */
     957            1548 :         A = B;
     958            1548 :         PA = PB;
     959                 : 
     960            1548 :         B = bin_upper;
     961            1548 :         PB = (double) i / (double) (length_hist_nvalues - 1);
     962                 : 
     963                 :         /*
     964                 :          * Add the area of this trapezoid to the total. The point of the
     965                 :          * if-check is to avoid NaN, in the corner case that PA == PB == 0,
     966                 :          * and B - A == Inf. The area of a zero-height trapezoid (PA == PB ==
     967                 :          * 0) is zero, regardless of the width (B - A).
     968                 :          */
     969            1548 :         if (PA > 0 || PB > 0)
     970            1530 :             area += 0.5 * (PB + PA) * (B - A);
     971                 :     }
     972                 : 
     973                 :     /* Last bin */
     974              45 :     A = B;
     975              45 :     PA = PB;
     976                 : 
     977              45 :     B = length2;                /* last bin ends at the query upper bound */
     978              45 :     if (i >= length_hist_nvalues - 1)
     979 UBC           0 :         pos = 0.0;
     980                 :     else
     981                 :     {
     982 CBC          45 :         if (DatumGetFloat8(length_hist_values[i]) == DatumGetFloat8(length_hist_values[i + 1]))
     983 UBC           0 :             pos = 0.0;
     984                 :         else
     985 CBC          45 :             pos = get_len_position(length2, DatumGetFloat8(length_hist_values[i]), DatumGetFloat8(length_hist_values[i + 1]));
     986                 :     }
     987              45 :     PB = (((double) i) + pos) / (double) (length_hist_nvalues - 1);
     988                 : 
     989              45 :     if (PA > 0 || PB > 0)
     990              45 :         area += 0.5 * (PB + PA) * (B - A);
     991                 : 
     992                 :     /*
     993                 :      * Ok, we have calculated the area, ie. the integral. Divide by width to
     994                 :      * get the requested average.
     995                 :      *
     996                 :      * Avoid NaN arising from infinite / infinite. This happens at least if
     997                 :      * length2 is infinite. It's not clear what the correct value would be in
     998                 :      * that case, so 0.5 seems as good as any value.
     999                 :      */
    1000              45 :     if (isinf(area) && isinf(length2))
    1001               9 :         frac = 0.5;
    1002                 :     else
    1003              36 :         frac = area / (length2 - length1);
    1004                 : 
    1005              45 :     return frac;
    1006                 : }
    1007                 : 
    1008                 : /*
    1009                 :  * Calculate selectivity of "var <@ const" operator, ie. estimate the fraction
    1010                 :  * of ranges that fall within the constant lower and upper bounds. This uses
    1011                 :  * the histograms of range lower bounds and range lengths, on the assumption
    1012                 :  * that the range lengths are independent of the lower bounds.
    1013                 :  *
    1014                 :  * The caller has already checked that constant lower and upper bounds are
    1015                 :  * finite.
    1016                 :  */
    1017                 : static double
    1018               9 : calc_hist_selectivity_contained(TypeCacheEntry *typcache,
    1019                 :                                 const RangeBound *lower, RangeBound *upper,
    1020                 :                                 const RangeBound *hist_lower, int hist_nvalues,
    1021                 :                                 Datum *length_hist_values, int length_hist_nvalues)
    1022                 : {
    1023                 :     int         i,
    1024                 :                 upper_index;
    1025                 :     float8      prev_dist;
    1026                 :     double      bin_width;
    1027                 :     double      upper_bin_width;
    1028                 :     double      sum_frac;
    1029                 : 
    1030                 :     /*
    1031                 :      * Begin by finding the bin containing the upper bound, in the lower bound
    1032                 :      * histogram. Any range with a lower bound > constant upper bound can't
    1033                 :      * match, ie. there are no matches in bins greater than upper_index.
    1034                 :      */
    1035               9 :     upper->inclusive = !upper->inclusive;
    1036               9 :     upper->lower = true;
    1037               9 :     upper_index = rbound_bsearch(typcache, upper, hist_lower, hist_nvalues,
    1038                 :                                  false);
    1039                 : 
    1040                 :     /*
    1041                 :      * If the upper bound value is below the histogram's lower limit, there
    1042                 :      * are no matches.
    1043                 :      */
    1044               9 :     if (upper_index < 0)
    1045 UBC           0 :         return 0.0;
    1046                 : 
    1047                 :     /*
    1048                 :      * If the upper bound value is at or beyond the histogram's upper limit,
    1049                 :      * start our loop at the last actual bin, as though the upper bound were
    1050                 :      * within that bin; get_position will clamp its result to 1.0 anyway.
    1051                 :      * (This corresponds to assuming that the data population above the
    1052                 :      * histogram's upper limit is empty, exactly like what we just assumed for
    1053                 :      * the lower limit.)
    1054                 :      */
    1055 CBC           9 :     upper_index = Min(upper_index, hist_nvalues - 2);
    1056                 : 
    1057                 :     /*
    1058                 :      * Calculate upper_bin_width, ie. the fraction of the (upper_index,
    1059                 :      * upper_index + 1) bin which is greater than upper bound of query range
    1060                 :      * using linear interpolation of subdiff function.
    1061                 :      */
    1062               9 :     upper_bin_width = get_position(typcache, upper,
    1063               9 :                                    &hist_lower[upper_index],
    1064               9 :                                    &hist_lower[upper_index + 1]);
    1065                 : 
    1066                 :     /*
    1067                 :      * In the loop, dist and prev_dist are the distance of the "current" bin's
    1068                 :      * lower and upper bounds from the constant upper bound.
    1069                 :      *
    1070                 :      * bin_width represents the width of the current bin. Normally it is 1.0,
    1071                 :      * meaning a full width bin, but can be less in the corner cases: start
    1072                 :      * and end of the loop. We start with bin_width = upper_bin_width, because
    1073                 :      * we begin at the bin containing the upper bound.
    1074                 :      */
    1075               9 :     prev_dist = 0.0;
    1076               9 :     bin_width = upper_bin_width;
    1077                 : 
    1078               9 :     sum_frac = 0.0;
    1079              27 :     for (i = upper_index; i >= 0; i--)
    1080                 :     {
    1081                 :         double      dist;
    1082                 :         double      length_hist_frac;
    1083              27 :         bool        final_bin = false;
    1084                 : 
    1085                 :         /*
    1086                 :          * dist -- distance from upper bound of query range to lower bound of
    1087                 :          * the current bin in the lower bound histogram. Or to the lower bound
    1088                 :          * of the constant range, if this is the final bin, containing the
    1089                 :          * constant lower bound.
    1090                 :          */
    1091              27 :         if (range_cmp_bounds(typcache, &hist_lower[i], lower) < 0)
    1092                 :         {
    1093               9 :             dist = get_distance(typcache, lower, upper);
    1094                 : 
    1095                 :             /*
    1096                 :              * Subtract from bin_width the portion of this bin that we want to
    1097                 :              * ignore.
    1098                 :              */
    1099              18 :             bin_width -= get_position(typcache, lower, &hist_lower[i],
    1100               9 :                                       &hist_lower[i + 1]);
    1101               9 :             if (bin_width < 0.0)
    1102 UBC           0 :                 bin_width = 0.0;
    1103 CBC           9 :             final_bin = true;
    1104                 :         }
    1105                 :         else
    1106              18 :             dist = get_distance(typcache, &hist_lower[i], upper);
    1107                 : 
    1108                 :         /*
    1109                 :          * Estimate the fraction of tuples in this bin that are narrow enough
    1110                 :          * to not exceed the distance to the upper bound of the query range.
    1111                 :          */
    1112              27 :         length_hist_frac = calc_length_hist_frac(length_hist_values,
    1113                 :                                                  length_hist_nvalues,
    1114                 :                                                  prev_dist, dist, true);
    1115                 : 
    1116                 :         /*
    1117                 :          * Add the fraction of tuples in this bin, with a suitable length, to
    1118                 :          * the total.
    1119                 :          */
    1120              27 :         sum_frac += length_hist_frac * bin_width / (double) (hist_nvalues - 1);
    1121                 : 
    1122              27 :         if (final_bin)
    1123               9 :             break;
    1124                 : 
    1125              18 :         bin_width = 1.0;
    1126              18 :         prev_dist = dist;
    1127                 :     }
    1128                 : 
    1129               9 :     return sum_frac;
    1130                 : }
    1131                 : 
    1132                 : /*
    1133                 :  * Calculate selectivity of "var @> const" operator, ie. estimate the fraction
    1134                 :  * of ranges that contain the constant lower and upper bounds. This uses
    1135                 :  * the histograms of range lower bounds and range lengths, on the assumption
    1136                 :  * that the range lengths are independent of the lower bounds.
    1137                 :  */
    1138                 : static double
    1139               9 : calc_hist_selectivity_contains(TypeCacheEntry *typcache,
    1140                 :                                const RangeBound *lower, const RangeBound *upper,
    1141                 :                                const RangeBound *hist_lower, int hist_nvalues,
    1142                 :                                Datum *length_hist_values, int length_hist_nvalues)
    1143                 : {
    1144                 :     int         i,
    1145                 :                 lower_index;
    1146                 :     double      bin_width,
    1147                 :                 lower_bin_width;
    1148                 :     double      sum_frac;
    1149                 :     float8      prev_dist;
    1150                 : 
    1151                 :     /* Find the bin containing the lower bound of query range. */
    1152               9 :     lower_index = rbound_bsearch(typcache, lower, hist_lower, hist_nvalues,
    1153                 :                                  true);
    1154                 : 
    1155                 :     /*
    1156                 :      * If the lower bound value is below the histogram's lower limit, there
    1157                 :      * are no matches.
    1158                 :      */
    1159               9 :     if (lower_index < 0)
    1160 UBC           0 :         return 0.0;
    1161                 : 
    1162                 :     /*
    1163                 :      * If the lower bound value is at or beyond the histogram's upper limit,
    1164                 :      * start our loop at the last actual bin, as though the upper bound were
    1165                 :      * within that bin; get_position will clamp its result to 1.0 anyway.
    1166                 :      * (This corresponds to assuming that the data population above the
    1167                 :      * histogram's upper limit is empty, exactly like what we just assumed for
    1168                 :      * the lower limit.)
    1169                 :      */
    1170 CBC           9 :     lower_index = Min(lower_index, hist_nvalues - 2);
    1171                 : 
    1172                 :     /*
    1173                 :      * Calculate lower_bin_width, ie. the fraction of the of (lower_index,
    1174                 :      * lower_index + 1) bin which is greater than lower bound of query range
    1175                 :      * using linear interpolation of subdiff function.
    1176                 :      */
    1177               9 :     lower_bin_width = get_position(typcache, lower, &hist_lower[lower_index],
    1178               9 :                                    &hist_lower[lower_index + 1]);
    1179                 : 
    1180                 :     /*
    1181                 :      * Loop through all the lower bound bins, smaller than the query lower
    1182                 :      * bound. In the loop, dist and prev_dist are the distance of the
    1183                 :      * "current" bin's lower and upper bounds from the constant upper bound.
    1184                 :      * We begin from query lower bound, and walk backwards, so the first bin's
    1185                 :      * upper bound is the query lower bound, and its distance to the query
    1186                 :      * upper bound is the length of the query range.
    1187                 :      *
    1188                 :      * bin_width represents the width of the current bin. Normally it is 1.0,
    1189                 :      * meaning a full width bin, except for the first bin, which is only
    1190                 :      * counted up to the constant lower bound.
    1191                 :      */
    1192               9 :     prev_dist = get_distance(typcache, lower, upper);
    1193               9 :     sum_frac = 0.0;
    1194               9 :     bin_width = lower_bin_width;
    1195              36 :     for (i = lower_index; i >= 0; i--)
    1196                 :     {
    1197                 :         float8      dist;
    1198                 :         double      length_hist_frac;
    1199                 : 
    1200                 :         /*
    1201                 :          * dist -- distance from upper bound of query range to current value
    1202                 :          * of lower bound histogram or lower bound of query range (if we've
    1203                 :          * reach it).
    1204                 :          */
    1205              27 :         dist = get_distance(typcache, &hist_lower[i], upper);
    1206                 : 
    1207                 :         /*
    1208                 :          * Get average fraction of length histogram which covers intervals
    1209                 :          * longer than (or equal to) distance to upper bound of query range.
    1210                 :          */
    1211              27 :         length_hist_frac =
    1212              27 :             1.0 - calc_length_hist_frac(length_hist_values,
    1213                 :                                         length_hist_nvalues,
    1214                 :                                         prev_dist, dist, false);
    1215                 : 
    1216              27 :         sum_frac += length_hist_frac * bin_width / (double) (hist_nvalues - 1);
    1217                 : 
    1218              27 :         bin_width = 1.0;
    1219              27 :         prev_dist = dist;
    1220                 :     }
    1221                 : 
    1222               9 :     return sum_frac;
    1223                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a