LCOV - differential code coverage report
Current view: top level - src/backend/utils/adt - network_selfuncs.c (source / functions) Coverage Total Hit UBC CBC
Current: Differential Code Coverage HEAD vs 15 Lines: 4.4 % 270 12 258 12
Current Date: 2023-04-08 15:15:32 Functions: 7.1 % 14 1 13 1
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * network_selfuncs.c
       4                 :  *    Functions for selectivity estimation of inet/cidr operators
       5                 :  *
       6                 :  * This module provides estimators for the subnet inclusion and overlap
       7                 :  * operators.  Estimates are based on null fraction, most common values,
       8                 :  * and histogram of inet/cidr columns.
       9                 :  *
      10                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
      11                 :  * Portions Copyright (c) 1994, Regents of the University of California
      12                 :  *
      13                 :  *
      14                 :  * IDENTIFICATION
      15                 :  *    src/backend/utils/adt/network_selfuncs.c
      16                 :  *
      17                 :  *-------------------------------------------------------------------------
      18                 :  */
      19                 : #include "postgres.h"
      20                 : 
      21                 : #include <math.h>
      22                 : 
      23                 : #include "access/htup_details.h"
      24                 : #include "catalog/pg_operator.h"
      25                 : #include "catalog/pg_statistic.h"
      26                 : #include "utils/builtins.h"
      27                 : #include "utils/inet.h"
      28                 : #include "utils/lsyscache.h"
      29                 : #include "utils/selfuncs.h"
      30                 : 
      31                 : 
      32                 : /* Default selectivity for the inet overlap operator */
      33                 : #define DEFAULT_OVERLAP_SEL 0.01
      34                 : 
      35                 : /* Default selectivity for the various inclusion operators */
      36                 : #define DEFAULT_INCLUSION_SEL 0.005
      37                 : 
      38                 : /* Default selectivity for specified operator */
      39                 : #define DEFAULT_SEL(operator) \
      40                 :     ((operator) == OID_INET_OVERLAP_OP ? \
      41                 :      DEFAULT_OVERLAP_SEL : DEFAULT_INCLUSION_SEL)
      42                 : 
      43                 : /* Maximum number of items to consider in join selectivity calculations */
      44                 : #define MAX_CONSIDERED_ELEMS 1024
      45                 : 
      46                 : static Selectivity networkjoinsel_inner(Oid operator,
      47                 :                                         VariableStatData *vardata1, VariableStatData *vardata2);
      48                 : static Selectivity networkjoinsel_semi(Oid operator,
      49                 :                                        VariableStatData *vardata1, VariableStatData *vardata2);
      50                 : static Selectivity mcv_population(float4 *mcv_numbers, int mcv_nvalues);
      51                 : static Selectivity inet_hist_value_sel(Datum *values, int nvalues,
      52                 :                                        Datum constvalue, int opr_codenum);
      53                 : static Selectivity inet_mcv_join_sel(Datum *mcv1_values,
      54                 :                                      float4 *mcv1_numbers, int mcv1_nvalues, Datum *mcv2_values,
      55                 :                                      float4 *mcv2_numbers, int mcv2_nvalues, Oid operator);
      56                 : static Selectivity inet_mcv_hist_sel(Datum *mcv_values, float4 *mcv_numbers,
      57                 :                                      int mcv_nvalues, Datum *hist_values, int hist_nvalues,
      58                 :                                      int opr_codenum);
      59                 : static Selectivity inet_hist_inclusion_join_sel(Datum *hist1_values,
      60                 :                                                 int hist1_nvalues,
      61                 :                                                 Datum *hist2_values, int hist2_nvalues,
      62                 :                                                 int opr_codenum);
      63                 : static Selectivity inet_semi_join_sel(Datum lhs_value,
      64                 :                                       bool mcv_exists, Datum *mcv_values, int mcv_nvalues,
      65                 :                                       bool hist_exists, Datum *hist_values, int hist_nvalues,
      66                 :                                       double hist_weight,
      67                 :                                       FmgrInfo *proc, int opr_codenum);
      68                 : static int  inet_opr_codenum(Oid operator);
      69                 : static int  inet_inclusion_cmp(inet *left, inet *right, int opr_codenum);
      70                 : static int  inet_masklen_inclusion_cmp(inet *left, inet *right,
      71                 :                                        int opr_codenum);
      72                 : static int  inet_hist_match_divider(inet *boundary, inet *query,
      73                 :                                     int opr_codenum);
      74                 : 
      75                 : /*
      76                 :  * Selectivity estimation for the subnet inclusion/overlap operators
      77                 :  */
      78                 : Datum
      79 CBC         450 : networksel(PG_FUNCTION_ARGS)
      80                 : {
      81             450 :     PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
      82             450 :     Oid         operator = PG_GETARG_OID(1);
      83             450 :     List       *args = (List *) PG_GETARG_POINTER(2);
      84             450 :     int         varRelid = PG_GETARG_INT32(3);
      85                 :     VariableStatData vardata;
      86                 :     Node       *other;
      87                 :     bool        varonleft;
      88                 :     Selectivity selec,
      89                 :                 mcv_selec,
      90                 :                 non_mcv_selec;
      91                 :     Datum       constvalue;
      92                 :     Form_pg_statistic stats;
      93                 :     AttStatsSlot hslot;
      94                 :     double      sumcommon,
      95                 :                 nullfrac;
      96                 :     FmgrInfo    proc;
      97                 : 
      98                 :     /*
      99                 :      * If expression is not (variable op something) or (something op
     100                 :      * variable), then punt and return a default estimate.
     101                 :      */
     102             450 :     if (!get_restriction_variable(root, args, varRelid,
     103                 :                                   &vardata, &other, &varonleft))
     104 UBC           0 :         PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
     105                 : 
     106                 :     /*
     107                 :      * Can't do anything useful if the something is not a constant, either.
     108                 :      */
     109 CBC         450 :     if (!IsA(other, Const))
     110                 :     {
     111 UBC           0 :         ReleaseVariableStats(vardata);
     112               0 :         PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
     113                 :     }
     114                 : 
     115                 :     /* All of the operators handled here are strict. */
     116 CBC         450 :     if (((Const *) other)->constisnull)
     117                 :     {
     118 UBC           0 :         ReleaseVariableStats(vardata);
     119               0 :         PG_RETURN_FLOAT8(0.0);
     120                 :     }
     121 CBC         450 :     constvalue = ((Const *) other)->constvalue;
     122                 : 
     123                 :     /* Otherwise, we need stats in order to produce a non-default estimate. */
     124             450 :     if (!HeapTupleIsValid(vardata.statsTuple))
     125                 :     {
     126             450 :         ReleaseVariableStats(vardata);
     127             450 :         PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
     128                 :     }
     129                 : 
     130 UBC           0 :     stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
     131               0 :     nullfrac = stats->stanullfrac;
     132                 : 
     133                 :     /*
     134                 :      * If we have most-common-values info, add up the fractions of the MCV
     135                 :      * entries that satisfy MCV OP CONST.  These fractions contribute directly
     136                 :      * to the result selectivity.  Also add up the total fraction represented
     137                 :      * by MCV entries.
     138                 :      */
     139               0 :     fmgr_info(get_opcode(operator), &proc);
     140               0 :     mcv_selec = mcv_selectivity(&vardata, &proc, InvalidOid,
     141                 :                                 constvalue, varonleft,
     142                 :                                 &sumcommon);
     143                 : 
     144                 :     /*
     145                 :      * If we have a histogram, use it to estimate the proportion of the
     146                 :      * non-MCV population that satisfies the clause.  If we don't, apply the
     147                 :      * default selectivity to that population.
     148                 :      */
     149               0 :     if (get_attstatsslot(&hslot, vardata.statsTuple,
     150                 :                          STATISTIC_KIND_HISTOGRAM, InvalidOid,
     151                 :                          ATTSTATSSLOT_VALUES))
     152                 :     {
     153               0 :         int         opr_codenum = inet_opr_codenum(operator);
     154                 : 
     155                 :         /* Commute if needed, so we can consider histogram to be on the left */
     156               0 :         if (!varonleft)
     157               0 :             opr_codenum = -opr_codenum;
     158               0 :         non_mcv_selec = inet_hist_value_sel(hslot.values, hslot.nvalues,
     159                 :                                             constvalue, opr_codenum);
     160                 : 
     161               0 :         free_attstatsslot(&hslot);
     162                 :     }
     163                 :     else
     164               0 :         non_mcv_selec = DEFAULT_SEL(operator);
     165                 : 
     166                 :     /* Combine selectivities for MCV and non-MCV populations */
     167               0 :     selec = mcv_selec + (1.0 - nullfrac - sumcommon) * non_mcv_selec;
     168                 : 
     169                 :     /* Result should be in range, but make sure... */
     170               0 :     CLAMP_PROBABILITY(selec);
     171                 : 
     172               0 :     ReleaseVariableStats(vardata);
     173                 : 
     174               0 :     PG_RETURN_FLOAT8(selec);
     175                 : }
     176                 : 
     177                 : /*
     178                 :  * Join selectivity estimation for the subnet inclusion/overlap operators
     179                 :  *
     180                 :  * This function has the same structure as eqjoinsel() in selfuncs.c.
     181                 :  *
     182                 :  * Throughout networkjoinsel and its subroutines, we have a performance issue
     183                 :  * in that the amount of work to be done is O(N^2) in the length of the MCV
     184                 :  * and histogram arrays.  To keep the runtime from getting out of hand when
     185                 :  * large statistics targets have been set, we arbitrarily limit the number of
     186                 :  * values considered to 1024 (MAX_CONSIDERED_ELEMS).  For the MCV arrays, this
     187                 :  * is easy: just consider at most the first N elements.  (Since the MCVs are
     188                 :  * sorted by decreasing frequency, this correctly gets us the first N MCVs.)
     189                 :  * For the histogram arrays, we decimate; that is consider only every k'th
     190                 :  * element, where k is chosen so that no more than MAX_CONSIDERED_ELEMS
     191                 :  * elements are considered.  This should still give us a good random sample of
     192                 :  * the non-MCV population.  Decimation is done on-the-fly in the loops that
     193                 :  * iterate over the histogram arrays.
     194                 :  */
     195                 : Datum
     196               0 : networkjoinsel(PG_FUNCTION_ARGS)
     197                 : {
     198               0 :     PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
     199               0 :     Oid         operator = PG_GETARG_OID(1);
     200               0 :     List       *args = (List *) PG_GETARG_POINTER(2);
     201                 : #ifdef NOT_USED
     202                 :     JoinType    jointype = (JoinType) PG_GETARG_INT16(3);
     203                 : #endif
     204               0 :     SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
     205                 :     double      selec;
     206                 :     VariableStatData vardata1;
     207                 :     VariableStatData vardata2;
     208                 :     bool        join_is_reversed;
     209                 : 
     210               0 :     get_join_variables(root, args, sjinfo,
     211                 :                        &vardata1, &vardata2, &join_is_reversed);
     212                 : 
     213               0 :     switch (sjinfo->jointype)
     214                 :     {
     215               0 :         case JOIN_INNER:
     216                 :         case JOIN_LEFT:
     217                 :         case JOIN_FULL:
     218                 : 
     219                 :             /*
     220                 :              * Selectivity for left/full join is not exactly the same as inner
     221                 :              * join, but we neglect the difference, as eqjoinsel does.
     222                 :              */
     223               0 :             selec = networkjoinsel_inner(operator, &vardata1, &vardata2);
     224               0 :             break;
     225               0 :         case JOIN_SEMI:
     226                 :         case JOIN_ANTI:
     227                 :             /* Here, it's important that we pass the outer var on the left. */
     228               0 :             if (!join_is_reversed)
     229               0 :                 selec = networkjoinsel_semi(operator, &vardata1, &vardata2);
     230                 :             else
     231               0 :                 selec = networkjoinsel_semi(get_commutator(operator),
     232                 :                                             &vardata2, &vardata1);
     233               0 :             break;
     234               0 :         default:
     235                 :             /* other values not expected here */
     236               0 :             elog(ERROR, "unrecognized join type: %d",
     237                 :                  (int) sjinfo->jointype);
     238                 :             selec = 0;          /* keep compiler quiet */
     239                 :             break;
     240                 :     }
     241                 : 
     242               0 :     ReleaseVariableStats(vardata1);
     243               0 :     ReleaseVariableStats(vardata2);
     244                 : 
     245               0 :     CLAMP_PROBABILITY(selec);
     246                 : 
     247               0 :     PG_RETURN_FLOAT8((float8) selec);
     248                 : }
     249                 : 
     250                 : /*
     251                 :  * Inner join selectivity estimation for subnet inclusion/overlap operators
     252                 :  *
     253                 :  * Calculates MCV vs MCV, MCV vs histogram and histogram vs histogram
     254                 :  * selectivity for join using the subnet inclusion operators.  Unlike the
     255                 :  * join selectivity function for the equality operator, eqjoinsel_inner(),
     256                 :  * one to one matching of the values is not enough.  Network inclusion
     257                 :  * operators are likely to match many to many, so we must check all pairs.
     258                 :  * (Note: it might be possible to exploit understanding of the histogram's
     259                 :  * btree ordering to reduce the work needed, but we don't currently try.)
     260                 :  * Also, MCV vs histogram selectivity is not neglected as in eqjoinsel_inner().
     261                 :  */
     262                 : static Selectivity
     263               0 : networkjoinsel_inner(Oid operator,
     264                 :                      VariableStatData *vardata1, VariableStatData *vardata2)
     265                 : {
     266                 :     Form_pg_statistic stats;
     267               0 :     double      nullfrac1 = 0.0,
     268               0 :                 nullfrac2 = 0.0;
     269               0 :     Selectivity selec = 0.0,
     270               0 :                 sumcommon1 = 0.0,
     271               0 :                 sumcommon2 = 0.0;
     272               0 :     bool        mcv1_exists = false,
     273               0 :                 mcv2_exists = false,
     274               0 :                 hist1_exists = false,
     275               0 :                 hist2_exists = false;
     276                 :     int         opr_codenum;
     277               0 :     int         mcv1_length = 0,
     278               0 :                 mcv2_length = 0;
     279                 :     AttStatsSlot mcv1_slot;
     280                 :     AttStatsSlot mcv2_slot;
     281                 :     AttStatsSlot hist1_slot;
     282                 :     AttStatsSlot hist2_slot;
     283                 : 
     284               0 :     if (HeapTupleIsValid(vardata1->statsTuple))
     285                 :     {
     286               0 :         stats = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
     287               0 :         nullfrac1 = stats->stanullfrac;
     288                 : 
     289               0 :         mcv1_exists = get_attstatsslot(&mcv1_slot, vardata1->statsTuple,
     290                 :                                        STATISTIC_KIND_MCV, InvalidOid,
     291                 :                                        ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
     292               0 :         hist1_exists = get_attstatsslot(&hist1_slot, vardata1->statsTuple,
     293                 :                                         STATISTIC_KIND_HISTOGRAM, InvalidOid,
     294                 :                                         ATTSTATSSLOT_VALUES);
     295                 :         /* Arbitrarily limit number of MCVs considered */
     296               0 :         mcv1_length = Min(mcv1_slot.nvalues, MAX_CONSIDERED_ELEMS);
     297               0 :         if (mcv1_exists)
     298               0 :             sumcommon1 = mcv_population(mcv1_slot.numbers, mcv1_length);
     299                 :     }
     300                 :     else
     301                 :     {
     302               0 :         memset(&mcv1_slot, 0, sizeof(mcv1_slot));
     303               0 :         memset(&hist1_slot, 0, sizeof(hist1_slot));
     304                 :     }
     305                 : 
     306               0 :     if (HeapTupleIsValid(vardata2->statsTuple))
     307                 :     {
     308               0 :         stats = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple);
     309               0 :         nullfrac2 = stats->stanullfrac;
     310                 : 
     311               0 :         mcv2_exists = get_attstatsslot(&mcv2_slot, vardata2->statsTuple,
     312                 :                                        STATISTIC_KIND_MCV, InvalidOid,
     313                 :                                        ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
     314               0 :         hist2_exists = get_attstatsslot(&hist2_slot, vardata2->statsTuple,
     315                 :                                         STATISTIC_KIND_HISTOGRAM, InvalidOid,
     316                 :                                         ATTSTATSSLOT_VALUES);
     317                 :         /* Arbitrarily limit number of MCVs considered */
     318               0 :         mcv2_length = Min(mcv2_slot.nvalues, MAX_CONSIDERED_ELEMS);
     319               0 :         if (mcv2_exists)
     320               0 :             sumcommon2 = mcv_population(mcv2_slot.numbers, mcv2_length);
     321                 :     }
     322                 :     else
     323                 :     {
     324               0 :         memset(&mcv2_slot, 0, sizeof(mcv2_slot));
     325               0 :         memset(&hist2_slot, 0, sizeof(hist2_slot));
     326                 :     }
     327                 : 
     328               0 :     opr_codenum = inet_opr_codenum(operator);
     329                 : 
     330                 :     /*
     331                 :      * Calculate selectivity for MCV vs MCV matches.
     332                 :      */
     333               0 :     if (mcv1_exists && mcv2_exists)
     334               0 :         selec += inet_mcv_join_sel(mcv1_slot.values, mcv1_slot.numbers,
     335                 :                                    mcv1_length,
     336                 :                                    mcv2_slot.values, mcv2_slot.numbers,
     337                 :                                    mcv2_length,
     338                 :                                    operator);
     339                 : 
     340                 :     /*
     341                 :      * Add in selectivities for MCV vs histogram matches, scaling according to
     342                 :      * the fractions of the populations represented by the histograms. Note
     343                 :      * that the second case needs to commute the operator.
     344                 :      */
     345               0 :     if (mcv1_exists && hist2_exists)
     346               0 :         selec += (1.0 - nullfrac2 - sumcommon2) *
     347               0 :             inet_mcv_hist_sel(mcv1_slot.values, mcv1_slot.numbers, mcv1_length,
     348                 :                               hist2_slot.values, hist2_slot.nvalues,
     349                 :                               opr_codenum);
     350               0 :     if (mcv2_exists && hist1_exists)
     351               0 :         selec += (1.0 - nullfrac1 - sumcommon1) *
     352               0 :             inet_mcv_hist_sel(mcv2_slot.values, mcv2_slot.numbers, mcv2_length,
     353                 :                               hist1_slot.values, hist1_slot.nvalues,
     354                 :                               -opr_codenum);
     355                 : 
     356                 :     /*
     357                 :      * Add in selectivity for histogram vs histogram matches, again scaling
     358                 :      * appropriately.
     359                 :      */
     360               0 :     if (hist1_exists && hist2_exists)
     361               0 :         selec += (1.0 - nullfrac1 - sumcommon1) *
     362               0 :             (1.0 - nullfrac2 - sumcommon2) *
     363               0 :             inet_hist_inclusion_join_sel(hist1_slot.values, hist1_slot.nvalues,
     364                 :                                          hist2_slot.values, hist2_slot.nvalues,
     365                 :                                          opr_codenum);
     366                 : 
     367                 :     /*
     368                 :      * If useful statistics are not available then use the default estimate.
     369                 :      * We can apply null fractions if known, though.
     370                 :      */
     371               0 :     if ((!mcv1_exists && !hist1_exists) || (!mcv2_exists && !hist2_exists))
     372               0 :         selec = (1.0 - nullfrac1) * (1.0 - nullfrac2) * DEFAULT_SEL(operator);
     373                 : 
     374                 :     /* Release stats. */
     375               0 :     free_attstatsslot(&mcv1_slot);
     376               0 :     free_attstatsslot(&mcv2_slot);
     377               0 :     free_attstatsslot(&hist1_slot);
     378               0 :     free_attstatsslot(&hist2_slot);
     379                 : 
     380               0 :     return selec;
     381                 : }
     382                 : 
     383                 : /*
     384                 :  * Semi join selectivity estimation for subnet inclusion/overlap operators
     385                 :  *
     386                 :  * Calculates MCV vs MCV, MCV vs histogram, histogram vs MCV, and histogram vs
     387                 :  * histogram selectivity for semi/anti join cases.
     388                 :  */
     389                 : static Selectivity
     390               0 : networkjoinsel_semi(Oid operator,
     391                 :                     VariableStatData *vardata1, VariableStatData *vardata2)
     392                 : {
     393                 :     Form_pg_statistic stats;
     394               0 :     Selectivity selec = 0.0,
     395               0 :                 sumcommon1 = 0.0,
     396               0 :                 sumcommon2 = 0.0;
     397               0 :     double      nullfrac1 = 0.0,
     398               0 :                 nullfrac2 = 0.0,
     399               0 :                 hist2_weight = 0.0;
     400               0 :     bool        mcv1_exists = false,
     401               0 :                 mcv2_exists = false,
     402               0 :                 hist1_exists = false,
     403               0 :                 hist2_exists = false;
     404                 :     int         opr_codenum;
     405                 :     FmgrInfo    proc;
     406                 :     int         i,
     407               0 :                 mcv1_length = 0,
     408               0 :                 mcv2_length = 0;
     409                 :     AttStatsSlot mcv1_slot;
     410                 :     AttStatsSlot mcv2_slot;
     411                 :     AttStatsSlot hist1_slot;
     412                 :     AttStatsSlot hist2_slot;
     413                 : 
     414               0 :     if (HeapTupleIsValid(vardata1->statsTuple))
     415                 :     {
     416               0 :         stats = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
     417               0 :         nullfrac1 = stats->stanullfrac;
     418                 : 
     419               0 :         mcv1_exists = get_attstatsslot(&mcv1_slot, vardata1->statsTuple,
     420                 :                                        STATISTIC_KIND_MCV, InvalidOid,
     421                 :                                        ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
     422               0 :         hist1_exists = get_attstatsslot(&hist1_slot, vardata1->statsTuple,
     423                 :                                         STATISTIC_KIND_HISTOGRAM, InvalidOid,
     424                 :                                         ATTSTATSSLOT_VALUES);
     425                 :         /* Arbitrarily limit number of MCVs considered */
     426               0 :         mcv1_length = Min(mcv1_slot.nvalues, MAX_CONSIDERED_ELEMS);
     427               0 :         if (mcv1_exists)
     428               0 :             sumcommon1 = mcv_population(mcv1_slot.numbers, mcv1_length);
     429                 :     }
     430                 :     else
     431                 :     {
     432               0 :         memset(&mcv1_slot, 0, sizeof(mcv1_slot));
     433               0 :         memset(&hist1_slot, 0, sizeof(hist1_slot));
     434                 :     }
     435                 : 
     436               0 :     if (HeapTupleIsValid(vardata2->statsTuple))
     437                 :     {
     438               0 :         stats = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple);
     439               0 :         nullfrac2 = stats->stanullfrac;
     440                 : 
     441               0 :         mcv2_exists = get_attstatsslot(&mcv2_slot, vardata2->statsTuple,
     442                 :                                        STATISTIC_KIND_MCV, InvalidOid,
     443                 :                                        ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
     444               0 :         hist2_exists = get_attstatsslot(&hist2_slot, vardata2->statsTuple,
     445                 :                                         STATISTIC_KIND_HISTOGRAM, InvalidOid,
     446                 :                                         ATTSTATSSLOT_VALUES);
     447                 :         /* Arbitrarily limit number of MCVs considered */
     448               0 :         mcv2_length = Min(mcv2_slot.nvalues, MAX_CONSIDERED_ELEMS);
     449               0 :         if (mcv2_exists)
     450               0 :             sumcommon2 = mcv_population(mcv2_slot.numbers, mcv2_length);
     451                 :     }
     452                 :     else
     453                 :     {
     454               0 :         memset(&mcv2_slot, 0, sizeof(mcv2_slot));
     455               0 :         memset(&hist2_slot, 0, sizeof(hist2_slot));
     456                 :     }
     457                 : 
     458               0 :     opr_codenum = inet_opr_codenum(operator);
     459               0 :     fmgr_info(get_opcode(operator), &proc);
     460                 : 
     461                 :     /* Estimate number of input rows represented by RHS histogram. */
     462               0 :     if (hist2_exists && vardata2->rel)
     463               0 :         hist2_weight = (1.0 - nullfrac2 - sumcommon2) * vardata2->rel->rows;
     464                 : 
     465                 :     /*
     466                 :      * Consider each element of the LHS MCV list, matching it to whatever RHS
     467                 :      * stats we have.  Scale according to the known frequency of the MCV.
     468                 :      */
     469               0 :     if (mcv1_exists && (mcv2_exists || hist2_exists))
     470                 :     {
     471               0 :         for (i = 0; i < mcv1_length; i++)
     472                 :         {
     473               0 :             selec += mcv1_slot.numbers[i] *
     474               0 :                 inet_semi_join_sel(mcv1_slot.values[i],
     475                 :                                    mcv2_exists, mcv2_slot.values, mcv2_length,
     476                 :                                    hist2_exists,
     477                 :                                    hist2_slot.values, hist2_slot.nvalues,
     478                 :                                    hist2_weight,
     479                 :                                    &proc, opr_codenum);
     480                 :         }
     481                 :     }
     482                 : 
     483                 :     /*
     484                 :      * Consider each element of the LHS histogram, except for the first and
     485                 :      * last elements, which we exclude on the grounds that they're outliers
     486                 :      * and thus not very representative.  Scale on the assumption that each
     487                 :      * such histogram element represents an equal share of the LHS histogram
     488                 :      * population (which is a bit bogus, because the members of its bucket may
     489                 :      * not all act the same with respect to the join clause, but it's hard to
     490                 :      * do better).
     491                 :      *
     492                 :      * If there are too many histogram elements, decimate to limit runtime.
     493                 :      */
     494               0 :     if (hist1_exists && hist1_slot.nvalues > 2 && (mcv2_exists || hist2_exists))
     495                 :     {
     496               0 :         double      hist_selec_sum = 0.0;
     497                 :         int         k,
     498                 :                     n;
     499                 : 
     500               0 :         k = (hist1_slot.nvalues - 3) / MAX_CONSIDERED_ELEMS + 1;
     501                 : 
     502               0 :         n = 0;
     503               0 :         for (i = 1; i < hist1_slot.nvalues - 1; i += k)
     504                 :         {
     505               0 :             hist_selec_sum +=
     506               0 :                 inet_semi_join_sel(hist1_slot.values[i],
     507                 :                                    mcv2_exists, mcv2_slot.values, mcv2_length,
     508                 :                                    hist2_exists,
     509                 :                                    hist2_slot.values, hist2_slot.nvalues,
     510                 :                                    hist2_weight,
     511                 :                                    &proc, opr_codenum);
     512               0 :             n++;
     513                 :         }
     514                 : 
     515               0 :         selec += (1.0 - nullfrac1 - sumcommon1) * hist_selec_sum / n;
     516                 :     }
     517                 : 
     518                 :     /*
     519                 :      * If useful statistics are not available then use the default estimate.
     520                 :      * We can apply null fractions if known, though.
     521                 :      */
     522               0 :     if ((!mcv1_exists && !hist1_exists) || (!mcv2_exists && !hist2_exists))
     523               0 :         selec = (1.0 - nullfrac1) * (1.0 - nullfrac2) * DEFAULT_SEL(operator);
     524                 : 
     525                 :     /* Release stats. */
     526               0 :     free_attstatsslot(&mcv1_slot);
     527               0 :     free_attstatsslot(&mcv2_slot);
     528               0 :     free_attstatsslot(&hist1_slot);
     529               0 :     free_attstatsslot(&hist2_slot);
     530                 : 
     531               0 :     return selec;
     532                 : }
     533                 : 
     534                 : /*
     535                 :  * Compute the fraction of a relation's population that is represented
     536                 :  * by the MCV list.
     537                 :  */
     538                 : static Selectivity
     539               0 : mcv_population(float4 *mcv_numbers, int mcv_nvalues)
     540                 : {
     541               0 :     Selectivity sumcommon = 0.0;
     542                 :     int         i;
     543                 : 
     544               0 :     for (i = 0; i < mcv_nvalues; i++)
     545                 :     {
     546               0 :         sumcommon += mcv_numbers[i];
     547                 :     }
     548                 : 
     549               0 :     return sumcommon;
     550                 : }
     551                 : 
     552                 : /*
     553                 :  * Inet histogram vs single value selectivity estimation
     554                 :  *
     555                 :  * Estimate the fraction of the histogram population that satisfies
     556                 :  * "value OPR CONST".  (The result needs to be scaled to reflect the
     557                 :  * proportion of the total population represented by the histogram.)
     558                 :  *
     559                 :  * The histogram is originally for the inet btree comparison operators.
     560                 :  * Only the common bits of the network part and the length of the network part
     561                 :  * (masklen) are interesting for the subnet inclusion operators.  Fortunately,
     562                 :  * btree comparison treats the network part as the major sort key.  Even so,
     563                 :  * the length of the network part would not really be significant in the
     564                 :  * histogram.  This would lead to big mistakes for data sets with uneven
     565                 :  * masklen distribution.  To reduce this problem, comparisons with the left
     566                 :  * and the right sides of the buckets are used together.
     567                 :  *
     568                 :  * Histogram bucket matches are calculated in two forms.  If the constant
     569                 :  * matches both bucket endpoints the bucket is considered as fully matched.
     570                 :  * The second form is to match the bucket partially; we recognize this when
     571                 :  * the constant matches just one endpoint, or the two endpoints fall on
     572                 :  * opposite sides of the constant.  (Note that when the constant matches an
     573                 :  * interior histogram element, it gets credit for partial matches to the
     574                 :  * buckets on both sides, while a match to a histogram endpoint gets credit
     575                 :  * for only one partial match.  This is desirable.)
     576                 :  *
     577                 :  * The divider in the partial bucket match is imagined as the distance
     578                 :  * between the decisive bits and the common bits of the addresses.  It will
     579                 :  * be used as a power of two as it is the natural scale for the IP network
     580                 :  * inclusion.  This partial bucket match divider calculation is an empirical
     581                 :  * formula and subject to change with more experiment.
     582                 :  *
     583                 :  * For a partial match, we try to calculate dividers for both of the
     584                 :  * boundaries.  If the address family of a boundary value does not match the
     585                 :  * constant or comparison of the length of the network parts is not correct
     586                 :  * for the operator, the divider for that boundary will not be taken into
     587                 :  * account.  If both of the dividers are valid, the greater one will be used
     588                 :  * to minimize the mistake in buckets that have disparate masklens.  This
     589                 :  * calculation is unfair when dividers can be calculated for both of the
     590                 :  * boundaries but they are far from each other; but it is not a common
     591                 :  * situation as the boundaries are expected to share most of their significant
     592                 :  * bits of their masklens.  The mistake would be greater, if we would use the
     593                 :  * minimum instead of the maximum, and we don't know a sensible way to combine
     594                 :  * them.
     595                 :  *
     596                 :  * For partial match in buckets that have different address families on the
     597                 :  * left and right sides, only the boundary with the same address family is
     598                 :  * taken into consideration.  This can cause more mistakes for these buckets
     599                 :  * if the masklens of their boundaries are also disparate.  But this can only
     600                 :  * happen in one bucket, since only two address families exist.  It seems a
     601                 :  * better option than not considering these buckets at all.
     602                 :  */
     603                 : static Selectivity
     604               0 : inet_hist_value_sel(Datum *values, int nvalues, Datum constvalue,
     605                 :                     int opr_codenum)
     606                 : {
     607               0 :     Selectivity match = 0.0;
     608                 :     inet       *query,
     609                 :                *left,
     610                 :                *right;
     611                 :     int         i,
     612                 :                 k,
     613                 :                 n;
     614                 :     int         left_order,
     615                 :                 right_order,
     616                 :                 left_divider,
     617                 :                 right_divider;
     618                 : 
     619                 :     /* guard against zero-divide below */
     620               0 :     if (nvalues <= 1)
     621               0 :         return 0.0;
     622                 : 
     623                 :     /* if there are too many histogram elements, decimate to limit runtime */
     624               0 :     k = (nvalues - 2) / MAX_CONSIDERED_ELEMS + 1;
     625                 : 
     626               0 :     query = DatumGetInetPP(constvalue);
     627                 : 
     628                 :     /* "left" is the left boundary value of the current bucket ... */
     629               0 :     left = DatumGetInetPP(values[0]);
     630               0 :     left_order = inet_inclusion_cmp(left, query, opr_codenum);
     631                 : 
     632               0 :     n = 0;
     633               0 :     for (i = k; i < nvalues; i += k)
     634                 :     {
     635                 :         /* ... and "right" is the right boundary value */
     636               0 :         right = DatumGetInetPP(values[i]);
     637               0 :         right_order = inet_inclusion_cmp(right, query, opr_codenum);
     638                 : 
     639               0 :         if (left_order == 0 && right_order == 0)
     640                 :         {
     641                 :             /* The whole bucket matches, since both endpoints do. */
     642               0 :             match += 1.0;
     643                 :         }
     644               0 :         else if ((left_order <= 0 && right_order >= 0) ||
     645               0 :                  (left_order >= 0 && right_order <= 0))
     646                 :         {
     647                 :             /* Partial bucket match. */
     648               0 :             left_divider = inet_hist_match_divider(left, query, opr_codenum);
     649               0 :             right_divider = inet_hist_match_divider(right, query, opr_codenum);
     650                 : 
     651               0 :             if (left_divider >= 0 || right_divider >= 0)
     652               0 :                 match += 1.0 / pow(2.0, Max(left_divider, right_divider));
     653                 :         }
     654                 : 
     655                 :         /* Shift the variables. */
     656               0 :         left = right;
     657               0 :         left_order = right_order;
     658                 : 
     659                 :         /* Count the number of buckets considered. */
     660               0 :         n++;
     661                 :     }
     662                 : 
     663               0 :     return match / n;
     664                 : }
     665                 : 
     666                 : /*
     667                 :  * Inet MCV vs MCV join selectivity estimation
     668                 :  *
     669                 :  * We simply add up the fractions of the populations that satisfy the clause.
     670                 :  * The result is exact and does not need to be scaled further.
     671                 :  */
     672                 : static Selectivity
     673               0 : inet_mcv_join_sel(Datum *mcv1_values, float4 *mcv1_numbers, int mcv1_nvalues,
     674                 :                   Datum *mcv2_values, float4 *mcv2_numbers, int mcv2_nvalues,
     675                 :                   Oid operator)
     676                 : {
     677               0 :     Selectivity selec = 0.0;
     678                 :     FmgrInfo    proc;
     679                 :     int         i,
     680                 :                 j;
     681                 : 
     682               0 :     fmgr_info(get_opcode(operator), &proc);
     683                 : 
     684               0 :     for (i = 0; i < mcv1_nvalues; i++)
     685                 :     {
     686               0 :         for (j = 0; j < mcv2_nvalues; j++)
     687               0 :             if (DatumGetBool(FunctionCall2(&proc,
     688                 :                                            mcv1_values[i],
     689                 :                                            mcv2_values[j])))
     690               0 :                 selec += mcv1_numbers[i] * mcv2_numbers[j];
     691                 :     }
     692               0 :     return selec;
     693                 : }
     694                 : 
     695                 : /*
     696                 :  * Inet MCV vs histogram join selectivity estimation
     697                 :  *
     698                 :  * For each MCV on the lefthand side, estimate the fraction of the righthand's
     699                 :  * histogram population that satisfies the join clause, and add those up,
     700                 :  * scaling by the MCV's frequency.  The result still needs to be scaled
     701                 :  * according to the fraction of the righthand's population represented by
     702                 :  * the histogram.
     703                 :  */
     704                 : static Selectivity
     705               0 : inet_mcv_hist_sel(Datum *mcv_values, float4 *mcv_numbers, int mcv_nvalues,
     706                 :                   Datum *hist_values, int hist_nvalues,
     707                 :                   int opr_codenum)
     708                 : {
     709               0 :     Selectivity selec = 0.0;
     710                 :     int         i;
     711                 : 
     712                 :     /*
     713                 :      * We'll call inet_hist_value_selec with the histogram on the left, so we
     714                 :      * must commute the operator.
     715                 :      */
     716               0 :     opr_codenum = -opr_codenum;
     717                 : 
     718               0 :     for (i = 0; i < mcv_nvalues; i++)
     719                 :     {
     720               0 :         selec += mcv_numbers[i] *
     721               0 :             inet_hist_value_sel(hist_values, hist_nvalues, mcv_values[i],
     722                 :                                 opr_codenum);
     723                 :     }
     724               0 :     return selec;
     725                 : }
     726                 : 
     727                 : /*
     728                 :  * Inet histogram vs histogram join selectivity estimation
     729                 :  *
     730                 :  * Here, we take all values listed in the second histogram (except for the
     731                 :  * first and last elements, which are excluded on the grounds of possibly
     732                 :  * not being very representative) and treat them as a uniform sample of
     733                 :  * the non-MCV population for that relation.  For each one, we apply
     734                 :  * inet_hist_value_selec to see what fraction of the first histogram
     735                 :  * it matches.
     736                 :  *
     737                 :  * We could alternatively do this the other way around using the operator's
     738                 :  * commutator.  XXX would it be worthwhile to do it both ways and take the
     739                 :  * average?  That would at least avoid non-commutative estimation results.
     740                 :  */
     741                 : static Selectivity
     742               0 : inet_hist_inclusion_join_sel(Datum *hist1_values, int hist1_nvalues,
     743                 :                              Datum *hist2_values, int hist2_nvalues,
     744                 :                              int opr_codenum)
     745                 : {
     746               0 :     double      match = 0.0;
     747                 :     int         i,
     748                 :                 k,
     749                 :                 n;
     750                 : 
     751               0 :     if (hist2_nvalues <= 2)
     752               0 :         return 0.0;             /* no interior histogram elements */
     753                 : 
     754                 :     /* if there are too many histogram elements, decimate to limit runtime */
     755               0 :     k = (hist2_nvalues - 3) / MAX_CONSIDERED_ELEMS + 1;
     756                 : 
     757               0 :     n = 0;
     758               0 :     for (i = 1; i < hist2_nvalues - 1; i += k)
     759                 :     {
     760               0 :         match += inet_hist_value_sel(hist1_values, hist1_nvalues,
     761               0 :                                      hist2_values[i], opr_codenum);
     762               0 :         n++;
     763                 :     }
     764                 : 
     765               0 :     return match / n;
     766                 : }
     767                 : 
     768                 : /*
     769                 :  * Inet semi join selectivity estimation for one value
     770                 :  *
     771                 :  * The function calculates the probability that there is at least one row
     772                 :  * in the RHS table that satisfies the "lhs_value op column" condition.
     773                 :  * It is used in semi join estimation to check a sample from the left hand
     774                 :  * side table.
     775                 :  *
     776                 :  * The MCV and histogram from the right hand side table should be provided as
     777                 :  * arguments with the lhs_value from the left hand side table for the join.
     778                 :  * hist_weight is the total number of rows represented by the histogram.
     779                 :  * For example, if the table has 1000 rows, and 10% of the rows are in the MCV
     780                 :  * list, and another 10% are NULLs, hist_weight would be 800.
     781                 :  *
     782                 :  * First, the lhs_value will be matched to the most common values.  If it
     783                 :  * matches any of them, 1.0 will be returned, because then there is surely
     784                 :  * a match.
     785                 :  *
     786                 :  * Otherwise, the histogram will be used to estimate the number of rows in
     787                 :  * the second table that match the condition.  If the estimate is greater
     788                 :  * than 1.0, 1.0 will be returned, because it means there is a greater chance
     789                 :  * that the lhs_value will match more than one row in the table.  If it is
     790                 :  * between 0.0 and 1.0, it will be returned as the probability.
     791                 :  */
     792                 : static Selectivity
     793               0 : inet_semi_join_sel(Datum lhs_value,
     794                 :                    bool mcv_exists, Datum *mcv_values, int mcv_nvalues,
     795                 :                    bool hist_exists, Datum *hist_values, int hist_nvalues,
     796                 :                    double hist_weight,
     797                 :                    FmgrInfo *proc, int opr_codenum)
     798                 : {
     799               0 :     if (mcv_exists)
     800                 :     {
     801                 :         int         i;
     802                 : 
     803               0 :         for (i = 0; i < mcv_nvalues; i++)
     804                 :         {
     805               0 :             if (DatumGetBool(FunctionCall2(proc,
     806                 :                                            lhs_value,
     807                 :                                            mcv_values[i])))
     808               0 :                 return 1.0;
     809                 :         }
     810                 :     }
     811                 : 
     812               0 :     if (hist_exists && hist_weight > 0)
     813                 :     {
     814                 :         Selectivity hist_selec;
     815                 : 
     816                 :         /* Commute operator, since we're passing lhs_value on the right */
     817               0 :         hist_selec = inet_hist_value_sel(hist_values, hist_nvalues,
     818                 :                                          lhs_value, -opr_codenum);
     819                 : 
     820               0 :         if (hist_selec > 0)
     821               0 :             return Min(1.0, hist_weight * hist_selec);
     822                 :     }
     823                 : 
     824               0 :     return 0.0;
     825                 : }
     826                 : 
     827                 : /*
     828                 :  * Assign useful code numbers for the subnet inclusion/overlap operators
     829                 :  *
     830                 :  * Only inet_masklen_inclusion_cmp() and inet_hist_match_divider() depend
     831                 :  * on the exact codes assigned here; but many other places in this file
     832                 :  * know that they can negate a code to obtain the code for the commutator
     833                 :  * operator.
     834                 :  */
     835                 : static int
     836               0 : inet_opr_codenum(Oid operator)
     837                 : {
     838               0 :     switch (operator)
     839                 :     {
     840               0 :         case OID_INET_SUP_OP:
     841               0 :             return -2;
     842               0 :         case OID_INET_SUPEQ_OP:
     843               0 :             return -1;
     844               0 :         case OID_INET_OVERLAP_OP:
     845               0 :             return 0;
     846               0 :         case OID_INET_SUBEQ_OP:
     847               0 :             return 1;
     848               0 :         case OID_INET_SUB_OP:
     849               0 :             return 2;
     850               0 :         default:
     851               0 :             elog(ERROR, "unrecognized operator %u for inet selectivity",
     852                 :                  operator);
     853                 :     }
     854                 :     return 0;                   /* unreached, but keep compiler quiet */
     855                 : }
     856                 : 
     857                 : /*
     858                 :  * Comparison function for the subnet inclusion/overlap operators
     859                 :  *
     860                 :  * If the comparison is okay for the specified inclusion operator, the return
     861                 :  * value will be 0.  Otherwise the return value will be less than or greater
     862                 :  * than 0 as appropriate for the operator.
     863                 :  *
     864                 :  * Comparison is compatible with the basic comparison function for the inet
     865                 :  * type.  See network_cmp_internal() in network.c for the original.  Basic
     866                 :  * comparison operators are implemented with the network_cmp_internal()
     867                 :  * function.  It is possible to implement the subnet inclusion operators with
     868                 :  * this function.
     869                 :  *
     870                 :  * Comparison is first on the common bits of the network part, then on the
     871                 :  * length of the network part (masklen) as in the network_cmp_internal()
     872                 :  * function.  Only the first part is in this function.  The second part is
     873                 :  * separated to another function for reusability.  The difference between the
     874                 :  * second part and the original network_cmp_internal() is that the inclusion
     875                 :  * operator is considered while comparing the lengths of the network parts.
     876                 :  * See the inet_masklen_inclusion_cmp() function below.
     877                 :  */
     878                 : static int
     879               0 : inet_inclusion_cmp(inet *left, inet *right, int opr_codenum)
     880                 : {
     881               0 :     if (ip_family(left) == ip_family(right))
     882                 :     {
     883                 :         int         order;
     884                 : 
     885               0 :         order = bitncmp(ip_addr(left), ip_addr(right),
     886               0 :                         Min(ip_bits(left), ip_bits(right)));
     887               0 :         if (order != 0)
     888               0 :             return order;
     889                 : 
     890               0 :         return inet_masklen_inclusion_cmp(left, right, opr_codenum);
     891                 :     }
     892                 : 
     893               0 :     return ip_family(left) - ip_family(right);
     894                 : }
     895                 : 
     896                 : /*
     897                 :  * Masklen comparison function for the subnet inclusion/overlap operators
     898                 :  *
     899                 :  * Compares the lengths of the network parts of the inputs.  If the comparison
     900                 :  * is okay for the specified inclusion operator, the return value will be 0.
     901                 :  * Otherwise the return value will be less than or greater than 0 as
     902                 :  * appropriate for the operator.
     903                 :  */
     904                 : static int
     905               0 : inet_masklen_inclusion_cmp(inet *left, inet *right, int opr_codenum)
     906                 : {
     907                 :     int         order;
     908                 : 
     909               0 :     order = (int) ip_bits(left) - (int) ip_bits(right);
     910                 : 
     911                 :     /*
     912                 :      * Return 0 if the operator would accept this combination of masklens.
     913                 :      * Note that opr_codenum zero (overlaps) will accept all cases.
     914                 :      */
     915               0 :     if ((order > 0 && opr_codenum >= 0) ||
     916               0 :         (order == 0 && opr_codenum >= -1 && opr_codenum <= 1) ||
     917               0 :         (order < 0 && opr_codenum <= 0))
     918               0 :         return 0;
     919                 : 
     920                 :     /*
     921                 :      * Otherwise, return a negative value for sup/supeq (notionally, the RHS
     922                 :      * needs to have a larger masklen than it has, which would make it sort
     923                 :      * later), or a positive value for sub/subeq (vice versa).
     924                 :      */
     925               0 :     return opr_codenum;
     926                 : }
     927                 : 
     928                 : /*
     929                 :  * Inet histogram partial match divider calculation
     930                 :  *
     931                 :  * First the families and the lengths of the network parts are compared using
     932                 :  * the subnet inclusion operator.  If those are acceptable for the operator,
     933                 :  * the divider will be calculated using the masklens and the common bits of
     934                 :  * the addresses.  -1 will be returned if it cannot be calculated.
     935                 :  *
     936                 :  * See commentary for inet_hist_value_sel() for some rationale for this.
     937                 :  */
     938                 : static int
     939               0 : inet_hist_match_divider(inet *boundary, inet *query, int opr_codenum)
     940                 : {
     941               0 :     if (ip_family(boundary) == ip_family(query) &&
     942               0 :         inet_masklen_inclusion_cmp(boundary, query, opr_codenum) == 0)
     943                 :     {
     944                 :         int         min_bits,
     945                 :                     decisive_bits;
     946                 : 
     947               0 :         min_bits = Min(ip_bits(boundary), ip_bits(query));
     948                 : 
     949                 :         /*
     950                 :          * Set decisive_bits to the masklen of the one that should contain the
     951                 :          * other according to the operator.
     952                 :          */
     953               0 :         if (opr_codenum < 0)
     954               0 :             decisive_bits = ip_bits(boundary);
     955               0 :         else if (opr_codenum > 0)
     956               0 :             decisive_bits = ip_bits(query);
     957                 :         else
     958               0 :             decisive_bits = min_bits;
     959                 : 
     960                 :         /*
     961                 :          * Now return the number of non-common decisive bits.  (This will be
     962                 :          * zero if the boundary and query in fact match, else positive.)
     963                 :          */
     964               0 :         if (min_bits > 0)
     965               0 :             return decisive_bits - bitncommon(ip_addr(boundary),
     966               0 :                                               ip_addr(query),
     967                 :                                               min_bits);
     968               0 :         return decisive_bits;
     969                 :     }
     970                 : 
     971               0 :     return -1;
     972                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a