LCOV - differential code coverage report
Current view: top level - src/backend/utils/adt - array_selfuncs.c (source / functions) Coverage Total Hit UBC CBC
Current: Differential Code Coverage HEAD vs 15 Lines: 80.3 % 319 256 63 256
Current Date: 2023-04-08 17:13:01 Functions: 84.6 % 13 11 2 11
Baseline: 15 Line coverage date bins:
Baseline Date: 2023-04-08 15:09:40 (240..) days: 80.3 % 319 256 63 256
Legend: Lines: hit not hit Function coverage date bins:
(240..) days: 84.6 % 13 11 2 11

 Age         Owner                  TLA  Line data    Source code
                                  1                 : /*-------------------------------------------------------------------------
                                  2                 :  *
                                  3                 :  * array_selfuncs.c
                                  4                 :  *    Functions for selectivity estimation of array operators
                                  5                 :  *
                                  6                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
                                  7                 :  * Portions Copyright (c) 1994, Regents of the University of California
                                  8                 :  *
                                  9                 :  *
                                 10                 :  * IDENTIFICATION
                                 11                 :  *    src/backend/utils/adt/array_selfuncs.c
                                 12                 :  *
                                 13                 :  *-------------------------------------------------------------------------
                                 14                 :  */
                                 15                 : #include "postgres.h"
                                 16                 : 
                                 17                 : #include <math.h>
                                 18                 : 
                                 19                 : #include "access/htup_details.h"
                                 20                 : #include "catalog/pg_collation.h"
                                 21                 : #include "catalog/pg_operator.h"
                                 22                 : #include "catalog/pg_statistic.h"
                                 23                 : #include "utils/array.h"
                                 24                 : #include "utils/builtins.h"
                                 25                 : #include "utils/lsyscache.h"
                                 26                 : #include "utils/selfuncs.h"
                                 27                 : #include "utils/typcache.h"
                                 28                 : 
                                 29                 : 
                                 30                 : /* Default selectivity constant for "@>" and "<@" operators */
                                 31                 : #define DEFAULT_CONTAIN_SEL 0.005
                                 32                 : 
                                 33                 : /* Default selectivity constant for "&&" operator */
                                 34                 : #define DEFAULT_OVERLAP_SEL 0.01
                                 35                 : 
                                 36                 : /* Default selectivity for given operator */
                                 37                 : #define DEFAULT_SEL(operator) \
                                 38                 :     ((operator) == OID_ARRAY_OVERLAP_OP ? \
                                 39                 :         DEFAULT_OVERLAP_SEL : DEFAULT_CONTAIN_SEL)
                                 40                 : 
                                 41                 : static Selectivity calc_arraycontsel(VariableStatData *vardata, Datum constval,
                                 42                 :                                      Oid elemtype, Oid operator);
                                 43                 : static Selectivity mcelem_array_selec(ArrayType *array,
                                 44                 :                                       TypeCacheEntry *typentry,
                                 45                 :                                       Datum *mcelem, int nmcelem,
                                 46                 :                                       float4 *numbers, int nnumbers,
                                 47                 :                                       float4 *hist, int nhist,
                                 48                 :                                       Oid operator);
                                 49                 : static Selectivity mcelem_array_contain_overlap_selec(Datum *mcelem, int nmcelem,
                                 50                 :                                                       float4 *numbers, int nnumbers,
                                 51                 :                                                       Datum *array_data, int nitems,
                                 52                 :                                                       Oid operator, TypeCacheEntry *typentry);
                                 53                 : static Selectivity mcelem_array_contained_selec(Datum *mcelem, int nmcelem,
                                 54                 :                                                 float4 *numbers, int nnumbers,
                                 55                 :                                                 Datum *array_data, int nitems,
                                 56                 :                                                 float4 *hist, int nhist,
                                 57                 :                                                 Oid operator, TypeCacheEntry *typentry);
                                 58                 : static float *calc_hist(const float4 *hist, int nhist, int n);
                                 59                 : static float *calc_distr(const float *p, int n, int m, float rest);
                                 60                 : static int  floor_log2(uint32 n);
                                 61                 : static bool find_next_mcelem(Datum *mcelem, int nmcelem, Datum value,
                                 62                 :                              int *index, TypeCacheEntry *typentry);
                                 63                 : static int  element_compare(const void *key1, const void *key2, void *arg);
                                 64                 : static int  float_compare_desc(const void *key1, const void *key2);
                                 65                 : 
                                 66                 : 
                                 67                 : /*
                                 68                 :  * scalararraysel_containment
                                 69                 :  *      Estimate selectivity of ScalarArrayOpExpr via array containment.
                                 70                 :  *
                                 71                 :  * If we have const =/<> ANY/ALL (array_var) then we can estimate the
                                 72                 :  * selectivity as though this were an array containment operator,
                                 73                 :  * array_var op ARRAY[const].
                                 74                 :  *
                                 75                 :  * scalararraysel() has already verified that the ScalarArrayOpExpr's operator
                                 76                 :  * is the array element type's default equality or inequality operator, and
                                 77                 :  * has aggressively simplified both inputs to constants.
                                 78                 :  *
                                 79                 :  * Returns selectivity (0..1), or -1 if we fail to estimate selectivity.
                                 80                 :  */
                                 81                 : Selectivity
 4054 tgl                        82 CBC        7644 : scalararraysel_containment(PlannerInfo *root,
                                 83                 :                            Node *leftop, Node *rightop,
                                 84                 :                            Oid elemtype, bool isEquality, bool useOr,
                                 85                 :                            int varRelid)
                                 86                 : {
                                 87                 :     Selectivity selec;
                                 88                 :     VariableStatData vardata;
                                 89                 :     Datum       constval;
                                 90                 :     TypeCacheEntry *typentry;
                                 91                 :     FmgrInfo   *cmpfunc;
                                 92                 : 
                                 93                 :     /*
                                 94                 :      * rightop must be a variable, else punt.
                                 95                 :      */
                                 96            7644 :     examine_variable(root, rightop, varRelid, &vardata);
                                 97            7644 :     if (!vardata.rel)
                                 98                 :     {
                                 99            7468 :         ReleaseVariableStats(vardata);
                                100            7468 :         return -1.0;
                                101                 :     }
                                102                 : 
                                103                 :     /*
                                104                 :      * leftop must be a constant, else punt.
                                105                 :      */
                                106             176 :     if (!IsA(leftop, Const))
                                107                 :     {
                                108             115 :         ReleaseVariableStats(vardata);
                                109             115 :         return -1.0;
                                110                 :     }
                                111              61 :     if (((Const *) leftop)->constisnull)
                                112                 :     {
                                113                 :         /* qual can't succeed if null on left */
 4054 tgl                       114 UBC           0 :         ReleaseVariableStats(vardata);
                                115               0 :         return (Selectivity) 0.0;
                                116                 :     }
 4054 tgl                       117 CBC          61 :     constval = ((Const *) leftop)->constvalue;
                                118                 : 
                                119                 :     /* Get element type's default comparison function */
                                120              61 :     typentry = lookup_type_cache(elemtype, TYPECACHE_CMP_PROC_FINFO);
                                121              61 :     if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid))
                                122                 :     {
 4054 tgl                       123 UBC           0 :         ReleaseVariableStats(vardata);
                                124               0 :         return -1.0;
                                125                 :     }
 4054 tgl                       126 CBC          61 :     cmpfunc = &typentry->cmp_proc_finfo;
                                127                 : 
                                128                 :     /*
                                129                 :      * If the operator is <>, swap ANY/ALL, then invert the result later.
                                130                 :      */
                                131              61 :     if (!isEquality)
                                132              43 :         useOr = !useOr;
                                133                 : 
                                134                 :     /* Get array element stats for var, if available */
 2165 peter_e                   135              67 :     if (HeapTupleIsValid(vardata.statsTuple) &&
                                136               6 :         statistic_proc_security_check(&vardata, cmpfunc->fn_oid))
 4054 tgl                       137               6 :     {
                                138                 :         Form_pg_statistic stats;
                                139                 :         AttStatsSlot sslot;
                                140                 :         AttStatsSlot hslot;
                                141                 : 
                                142               6 :         stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
                                143                 : 
                                144                 :         /* MCELEM will be an array of same type as element */
 2157                           145               6 :         if (get_attstatsslot(&sslot, vardata.statsTuple,
                                146                 :                              STATISTIC_KIND_MCELEM, InvalidOid,
                                147                 :                              ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
                                148                 :         {
                                149                 :             /* For ALL case, also get histogram of distinct-element counts */
 4054                           150               6 :             if (useOr ||
 2157 tgl                       151 UBC           0 :                 !get_attstatsslot(&hslot, vardata.statsTuple,
                                152                 :                                   STATISTIC_KIND_DECHIST, InvalidOid,
                                153                 :                                   ATTSTATSSLOT_NUMBERS))
 2157 tgl                       154 CBC           6 :                 memset(&hslot, 0, sizeof(hslot));
                                155                 : 
                                156                 :             /*
                                157                 :              * For = ANY, estimate as var @> ARRAY[const].
                                158                 :              *
                                159                 :              * For = ALL, estimate as var <@ ARRAY[const].
                                160                 :              */
 4054                           161               6 :             if (useOr)
 2157                           162               6 :                 selec = mcelem_array_contain_overlap_selec(sslot.values,
                                163                 :                                                            sslot.nvalues,
                                164                 :                                                            sslot.numbers,
                                165                 :                                                            sslot.nnumbers,
                                166                 :                                                            &constval, 1,
                                167                 :                                                            OID_ARRAY_CONTAINS_OP,
                                168                 :                                                            typentry);
                                169                 :             else
 2157 tgl                       170 UBC           0 :                 selec = mcelem_array_contained_selec(sslot.values,
                                171                 :                                                      sslot.nvalues,
                                172                 :                                                      sslot.numbers,
                                173                 :                                                      sslot.nnumbers,
                                174                 :                                                      &constval, 1,
                                175                 :                                                      hslot.numbers,
                                176                 :                                                      hslot.nnumbers,
                                177                 :                                                      OID_ARRAY_CONTAINED_OP,
                                178                 :                                                      typentry);
                                179                 : 
 2157 tgl                       180 CBC           6 :             free_attstatsslot(&hslot);
                                181               6 :             free_attstatsslot(&sslot);
                                182                 :         }
                                183                 :         else
                                184                 :         {
                                185                 :             /* No most-common-elements info, so do without */
 4054 tgl                       186 UBC           0 :             if (useOr)
                                187               0 :                 selec = mcelem_array_contain_overlap_selec(NULL, 0,
                                188                 :                                                            NULL, 0,
                                189                 :                                                            &constval, 1,
                                190                 :                                                            OID_ARRAY_CONTAINS_OP,
                                191                 :                                                            typentry);
                                192                 :             else
                                193               0 :                 selec = mcelem_array_contained_selec(NULL, 0,
                                194                 :                                                      NULL, 0,
                                195                 :                                                      &constval, 1,
                                196                 :                                                      NULL, 0,
                                197                 :                                                      OID_ARRAY_CONTAINED_OP,
                                198                 :                                                      typentry);
                                199                 :         }
                                200                 : 
                                201                 :         /*
                                202                 :          * MCE stats count only non-null rows, so adjust for null rows.
                                203                 :          */
 4054 tgl                       204 CBC           6 :         selec *= (1.0 - stats->stanullfrac);
                                205                 :     }
                                206                 :     else
                                207                 :     {
                                208                 :         /* No stats at all, so do without */
                                209              55 :         if (useOr)
                                210              55 :             selec = mcelem_array_contain_overlap_selec(NULL, 0,
                                211                 :                                                        NULL, 0,
                                212                 :                                                        &constval, 1,
                                213                 :                                                        OID_ARRAY_CONTAINS_OP,
                                214                 :                                                        typentry);
                                215                 :         else
 4054 tgl                       216 UBC           0 :             selec = mcelem_array_contained_selec(NULL, 0,
                                217                 :                                                  NULL, 0,
                                218                 :                                                  &constval, 1,
                                219                 :                                                  NULL, 0,
                                220                 :                                                  OID_ARRAY_CONTAINED_OP,
                                221                 :                                                  typentry);
                                222                 :         /* we assume no nulls here, so no stanullfrac correction */
                                223                 :     }
                                224                 : 
 4054 tgl                       225 CBC          61 :     ReleaseVariableStats(vardata);
                                226                 : 
                                227                 :     /*
                                228                 :      * If the operator is <>, invert the results.
                                229                 :      */
                                230              61 :     if (!isEquality)
                                231              43 :         selec = 1.0 - selec;
                                232                 : 
                                233              61 :     CLAMP_PROBABILITY(selec);
                                234                 : 
                                235              61 :     return selec;
                                236                 : }
                                237                 : 
                                238                 : /*
                                239                 :  * arraycontsel -- restriction selectivity for array @>, &&, <@ operators
                                240                 :  */
                                241                 : Datum
                                242             478 : arraycontsel(PG_FUNCTION_ARGS)
                                243                 : {
                                244             478 :     PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
                                245             478 :     Oid         operator = PG_GETARG_OID(1);
                                246             478 :     List       *args = (List *) PG_GETARG_POINTER(2);
                                247             478 :     int         varRelid = PG_GETARG_INT32(3);
                                248                 :     VariableStatData vardata;
                                249                 :     Node       *other;
                                250                 :     bool        varonleft;
                                251                 :     Selectivity selec;
                                252                 :     Oid         element_typeid;
                                253                 : 
                                254                 :     /*
                                255                 :      * If expression is not (variable op something) or (something op
                                256                 :      * variable), then punt and return a default estimate.
                                257                 :      */
                                258             478 :     if (!get_restriction_variable(root, args, varRelid,
                                259                 :                                   &vardata, &other, &varonleft))
 4054 tgl                       260 UBC           0 :         PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
                                261                 : 
                                262                 :     /*
                                263                 :      * Can't do anything useful if the something is not a constant, either.
                                264                 :      */
 4054 tgl                       265 CBC         478 :     if (!IsA(other, Const))
                                266                 :     {
 4054 tgl                       267 UBC           0 :         ReleaseVariableStats(vardata);
                                268               0 :         PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
                                269                 :     }
                                270                 : 
                                271                 :     /*
                                272                 :      * The "&&", "@>" and "<@" operators are strict, so we can cope with a
                                273                 :      * NULL constant right away.
                                274                 :      */
 4054 tgl                       275 CBC         478 :     if (((Const *) other)->constisnull)
                                276                 :     {
 4054 tgl                       277 UBC           0 :         ReleaseVariableStats(vardata);
                                278               0 :         PG_RETURN_FLOAT8(0.0);
                                279                 :     }
                                280                 : 
                                281                 :     /*
                                282                 :      * If var is on the right, commute the operator, so that we can assume the
                                283                 :      * var is on the left in what follows.
                                284                 :      */
 4054 tgl                       285 CBC         478 :     if (!varonleft)
                                286                 :     {
                                287              12 :         if (operator == OID_ARRAY_CONTAINS_OP)
 4054 tgl                       288 UBC           0 :             operator = OID_ARRAY_CONTAINED_OP;
 4054 tgl                       289 CBC          12 :         else if (operator == OID_ARRAY_CONTAINED_OP)
                                290              12 :             operator = OID_ARRAY_CONTAINS_OP;
                                291                 :     }
                                292                 : 
                                293                 :     /*
                                294                 :      * OK, there's a Var and a Const we're dealing with here.  We need the
                                295                 :      * Const to be an array with same element type as column, else we can't do
                                296                 :      * anything useful.  (Such cases will likely fail at runtime, but here
                                297                 :      * we'd rather just return a default estimate.)
                                298                 :      */
                                299             478 :     element_typeid = get_base_element_type(((Const *) other)->consttype);
                                300             956 :     if (element_typeid != InvalidOid &&
                                301             478 :         element_typeid == get_base_element_type(vardata.vartype))
                                302                 :     {
                                303             478 :         selec = calc_arraycontsel(&vardata, ((Const *) other)->constvalue,
                                304                 :                                   element_typeid, operator);
                                305                 :     }
                                306                 :     else
                                307                 :     {
 4054 tgl                       308 UBC           0 :         selec = DEFAULT_SEL(operator);
                                309                 :     }
                                310                 : 
 4054 tgl                       311 CBC         478 :     ReleaseVariableStats(vardata);
                                312                 : 
                                313             478 :     CLAMP_PROBABILITY(selec);
                                314                 : 
                                315             478 :     PG_RETURN_FLOAT8((float8) selec);
                                316                 : }
                                317                 : 
                                318                 : /*
                                319                 :  * arraycontjoinsel -- join selectivity for array @>, &&, <@ operators
                                320                 :  */
                                321                 : Datum
 4054 tgl                       322 UBC           0 : arraycontjoinsel(PG_FUNCTION_ARGS)
                                323                 : {
                                324                 :     /* For the moment this is just a stub */
                                325               0 :     Oid         operator = PG_GETARG_OID(1);
                                326                 : 
                                327               0 :     PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
                                328                 : }
                                329                 : 
                                330                 : /*
                                331                 :  * Calculate selectivity for "arraycolumn @> const", "arraycolumn && const"
                                332                 :  * or "arraycolumn <@ const" based on the statistics
                                333                 :  *
                                334                 :  * This function is mainly responsible for extracting the pg_statistic data
                                335                 :  * to be used; we then pass the problem on to mcelem_array_selec().
                                336                 :  */
                                337                 : static Selectivity
 4054 tgl                       338 CBC         478 : calc_arraycontsel(VariableStatData *vardata, Datum constval,
                                339                 :                   Oid elemtype, Oid operator)
                                340                 : {
                                341                 :     Selectivity selec;
                                342                 :     TypeCacheEntry *typentry;
                                343                 :     FmgrInfo   *cmpfunc;
                                344                 :     ArrayType  *array;
                                345                 : 
                                346                 :     /* Get element type's default comparison function */
                                347             478 :     typentry = lookup_type_cache(elemtype, TYPECACHE_CMP_PROC_FINFO);
                                348             478 :     if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid))
 4054 tgl                       349 UBC           0 :         return DEFAULT_SEL(operator);
 4054 tgl                       350 CBC         478 :     cmpfunc = &typentry->cmp_proc_finfo;
                                351                 : 
                                352                 :     /*
                                353                 :      * The caller made sure the const is an array with same element type, so
                                354                 :      * get it now
                                355                 :      */
                                356             478 :     array = DatumGetArrayTypeP(constval);
                                357                 : 
 2165 peter_e                   358             712 :     if (HeapTupleIsValid(vardata->statsTuple) &&
                                359             234 :         statistic_proc_security_check(vardata, cmpfunc->fn_oid))
 4054 tgl                       360             234 :     {
                                361                 :         Form_pg_statistic stats;
                                362                 :         AttStatsSlot sslot;
                                363                 :         AttStatsSlot hslot;
                                364                 : 
                                365             234 :         stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
                                366                 : 
                                367                 :         /* MCELEM will be an array of same type as column */
 2157                           368             234 :         if (get_attstatsslot(&sslot, vardata->statsTuple,
                                369                 :                              STATISTIC_KIND_MCELEM, InvalidOid,
                                370                 :                              ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
                                371                 :         {
                                372                 :             /*
                                373                 :              * For "array <@ const" case we also need histogram of distinct
                                374                 :              * element counts.
                                375                 :              */
 4054                           376             234 :             if (operator != OID_ARRAY_CONTAINED_OP ||
 2157                           377              36 :                 !get_attstatsslot(&hslot, vardata->statsTuple,
                                378                 :                                   STATISTIC_KIND_DECHIST, InvalidOid,
                                379                 :                                   ATTSTATSSLOT_NUMBERS))
                                380             198 :                 memset(&hslot, 0, sizeof(hslot));
                                381                 : 
                                382                 :             /* Use the most-common-elements slot for the array Var. */
 4054                           383             234 :             selec = mcelem_array_selec(array, typentry,
                                384                 :                                        sslot.values, sslot.nvalues,
                                385                 :                                        sslot.numbers, sslot.nnumbers,
                                386                 :                                        hslot.numbers, hslot.nnumbers,
                                387                 :                                        operator);
                                388                 : 
 2157                           389             234 :             free_attstatsslot(&hslot);
                                390             234 :             free_attstatsslot(&sslot);
                                391                 :         }
                                392                 :         else
                                393                 :         {
                                394                 :             /* No most-common-elements info, so do without */
 4054 tgl                       395 UBC           0 :             selec = mcelem_array_selec(array, typentry,
                                396                 :                                        NULL, 0, NULL, 0, NULL, 0,
                                397                 :                                        operator);
                                398                 :         }
                                399                 : 
                                400                 :         /*
                                401                 :          * MCE stats count only non-null rows, so adjust for null rows.
                                402                 :          */
 4054 tgl                       403 CBC         234 :         selec *= (1.0 - stats->stanullfrac);
                                404                 :     }
                                405                 :     else
                                406                 :     {
                                407                 :         /* No stats at all, so do without */
                                408             244 :         selec = mcelem_array_selec(array, typentry,
                                409                 :                                    NULL, 0, NULL, 0, NULL, 0,
                                410                 :                                    operator);
                                411                 :         /* we assume no nulls here, so no stanullfrac correction */
                                412                 :     }
                                413                 : 
                                414                 :     /* If constant was toasted, release the copy we made */
                                415             478 :     if (PointerGetDatum(array) != constval)
 4054 tgl                       416 UBC           0 :         pfree(array);
                                417                 : 
 4054 tgl                       418 CBC         478 :     return selec;
                                419                 : }
                                420                 : 
                                421                 : /*
                                422                 :  * Array selectivity estimation based on most common elements statistics
                                423                 :  *
                                424                 :  * This function just deconstructs and sorts the array constant's contents,
                                425                 :  * and then passes the problem on to mcelem_array_contain_overlap_selec or
                                426                 :  * mcelem_array_contained_selec depending on the operator.
                                427                 :  */
                                428                 : static Selectivity
                                429             478 : mcelem_array_selec(ArrayType *array, TypeCacheEntry *typentry,
                                430                 :                    Datum *mcelem, int nmcelem,
                                431                 :                    float4 *numbers, int nnumbers,
                                432                 :                    float4 *hist, int nhist,
                                433                 :                    Oid operator)
                                434                 : {
                                435                 :     Selectivity selec;
                                436                 :     int         num_elems;
                                437                 :     Datum      *elem_values;
                                438                 :     bool       *elem_nulls;
                                439                 :     bool        null_present;
                                440                 :     int         nonnull_nitems;
                                441                 :     int         i;
                                442                 : 
                                443                 :     /*
                                444                 :      * Prepare constant array data for sorting.  Sorting lets us find unique
                                445                 :      * elements and efficiently merge with the MCELEM array.
                                446                 :      */
                                447             478 :     deconstruct_array(array,
                                448                 :                       typentry->type_id,
                                449             478 :                       typentry->typlen,
                                450             478 :                       typentry->typbyval,
                                451             478 :                       typentry->typalign,
                                452                 :                       &elem_values, &elem_nulls, &num_elems);
                                453                 : 
                                454                 :     /* Collapse out any null elements */
                                455             478 :     nonnull_nitems = 0;
                                456             478 :     null_present = false;
                                457             932 :     for (i = 0; i < num_elems; i++)
                                458                 :     {
                                459             454 :         if (elem_nulls[i])
                                460              18 :             null_present = true;
                                461                 :         else
                                462             436 :             elem_values[nonnull_nitems++] = elem_values[i];
                                463                 :     }
                                464                 : 
                                465                 :     /*
                                466                 :      * Query "column @> '{anything, null}'" matches nothing.  For the other
                                467                 :      * two operators, presence of a null in the constant can be ignored.
                                468                 :      */
                                469             478 :     if (null_present && operator == OID_ARRAY_CONTAINS_OP)
                                470                 :     {
                                471               6 :         pfree(elem_values);
                                472               6 :         pfree(elem_nulls);
                                473               6 :         return (Selectivity) 0.0;
                                474                 :     }
                                475                 : 
                                476                 :     /* Sort extracted elements using their default comparison function. */
                                477             472 :     qsort_arg(elem_values, nonnull_nitems, sizeof(Datum),
                                478                 :               element_compare, typentry);
                                479                 : 
                                480                 :     /* Separate cases according to operator */
                                481             472 :     if (operator == OID_ARRAY_CONTAINS_OP || operator == OID_ARRAY_OVERLAP_OP)
                                482             436 :         selec = mcelem_array_contain_overlap_selec(mcelem, nmcelem,
                                483                 :                                                    numbers, nnumbers,
                                484                 :                                                    elem_values, nonnull_nitems,
                                485                 :                                                    operator, typentry);
                                486              36 :     else if (operator == OID_ARRAY_CONTAINED_OP)
                                487              36 :         selec = mcelem_array_contained_selec(mcelem, nmcelem,
                                488                 :                                              numbers, nnumbers,
                                489                 :                                              elem_values, nonnull_nitems,
                                490                 :                                              hist, nhist,
                                491                 :                                              operator, typentry);
                                492                 :     else
                                493                 :     {
 4054 tgl                       494 UBC           0 :         elog(ERROR, "arraycontsel called for unrecognized operator %u",
                                495                 :              operator);
                                496                 :         selec = 0.0;            /* keep compiler quiet */
                                497                 :     }
                                498                 : 
 4054 tgl                       499 CBC         472 :     pfree(elem_values);
                                500             472 :     pfree(elem_nulls);
                                501             472 :     return selec;
                                502                 : }
                                503                 : 
                                504                 : /*
                                505                 :  * Estimate selectivity of "column @> const" and "column && const" based on
                                506                 :  * most common element statistics.  This estimation assumes element
                                507                 :  * occurrences are independent.
                                508                 :  *
                                509                 :  * mcelem (of length nmcelem) and numbers (of length nnumbers) are from
                                510                 :  * the array column's MCELEM statistics slot, or are NULL/0 if stats are
                                511                 :  * not available.  array_data (of length nitems) is the constant's elements.
                                512                 :  *
                                513                 :  * Both the mcelem and array_data arrays are assumed presorted according
                                514                 :  * to the element type's cmpfunc.  Null elements are not present.
                                515                 :  *
                                516                 :  * TODO: this estimate probably could be improved by using the distinct
                                517                 :  * elements count histogram.  For example, excepting the special case of
                                518                 :  * "column @> '{}'", we can multiply the calculated selectivity by the
                                519                 :  * fraction of nonempty arrays in the column.
                                520                 :  */
                                521                 : static Selectivity
                                522             497 : mcelem_array_contain_overlap_selec(Datum *mcelem, int nmcelem,
                                523                 :                                    float4 *numbers, int nnumbers,
                                524                 :                                    Datum *array_data, int nitems,
                                525                 :                                    Oid operator, TypeCacheEntry *typentry)
                                526                 : {
                                527                 :     Selectivity selec,
                                528                 :                 elem_selec;
                                529                 :     int         mcelem_index,
                                530                 :                 i;
                                531                 :     bool        use_bsearch;
                                532                 :     float4      minfreq;
                                533                 : 
                                534                 :     /*
                                535                 :      * There should be three more Numbers than Values, because the last three
                                536                 :      * cells should hold minimal and maximal frequency among the non-null
                                537                 :      * elements, and then the frequency of null elements.  Ignore the Numbers
                                538                 :      * if not right.
                                539                 :      */
                                540             497 :     if (nnumbers != nmcelem + 3)
                                541                 :     {
                                542             299 :         numbers = NULL;
                                543             299 :         nnumbers = 0;
                                544                 :     }
                                545                 : 
                                546             497 :     if (numbers)
                                547                 :     {
                                548                 :         /* Grab the lowest observed frequency */
                                549             198 :         minfreq = numbers[nmcelem];
                                550                 :     }
                                551                 :     else
                                552                 :     {
                                553                 :         /* Without statistics make some default assumptions */
 3836 andrew                    554             299 :         minfreq = 2 * (float4) DEFAULT_CONTAIN_SEL;
                                555                 :     }
                                556                 : 
                                557                 :     /* Decide whether it is faster to use binary search or not. */
 4054 tgl                       558             497 :     if (nitems * floor_log2((uint32) nmcelem) < nmcelem + nitems)
                                559             398 :         use_bsearch = true;
                                560                 :     else
                                561              99 :         use_bsearch = false;
                                562                 : 
                                563             497 :     if (operator == OID_ARRAY_CONTAINS_OP)
                                564                 :     {
                                565                 :         /*
                                566                 :          * Initial selectivity for "column @> const" query is 1.0, and it will
                                567                 :          * be decreased with each element of constant array.
                                568                 :          */
                                569             425 :         selec = 1.0;
                                570                 :     }
                                571                 :     else
                                572                 :     {
                                573                 :         /*
                                574                 :          * Initial selectivity for "column && const" query is 0.0, and it will
                                575                 :          * be increased with each element of constant array.
                                576                 :          */
                                577              72 :         selec = 0.0;
                                578                 :     }
                                579                 : 
                                580                 :     /* Scan mcelem and array in parallel. */
                                581             497 :     mcelem_index = 0;
                                582             916 :     for (i = 0; i < nitems; i++)
                                583                 :     {
                                584             419 :         bool        match = false;
                                585                 : 
                                586                 :         /* Ignore any duplicates in the array data. */
                                587             479 :         if (i > 0 &&
 1577                           588              60 :             element_compare(&array_data[i - 1], &array_data[i], typentry) == 0)
 4054 tgl                       589 UBC           0 :             continue;
                                590                 : 
                                591                 :         /* Find the smallest MCELEM >= this array item. */
 4054 tgl                       592 CBC         419 :         if (use_bsearch)
                                593                 :         {
                                594             419 :             match = find_next_mcelem(mcelem, nmcelem, array_data[i],
                                595                 :                                      &mcelem_index, typentry);
                                596                 :         }
                                597                 :         else
                                598                 :         {
 4054 tgl                       599 UBC           0 :             while (mcelem_index < nmcelem)
                                600                 :             {
                                601               0 :                 int         cmp = element_compare(&mcelem[mcelem_index],
                                602               0 :                                                   &array_data[i],
                                603                 :                                                   typentry);
                                604                 : 
                                605               0 :                 if (cmp < 0)
                                606               0 :                     mcelem_index++;
                                607                 :                 else
                                608                 :                 {
                                609               0 :                     if (cmp == 0)
 3955 bruce                     610               0 :                         match = true;   /* mcelem is found */
 4054 tgl                       611               0 :                     break;
                                612                 :                 }
                                613                 :             }
                                614                 :         }
                                615                 : 
 4054 tgl                       616 CBC         419 :         if (match && numbers)
                                617                 :         {
                                618                 :             /* MCELEM matches the array item; use its frequency. */
                                619             204 :             elem_selec = numbers[mcelem_index];
                                620             204 :             mcelem_index++;
                                621                 :         }
                                622                 :         else
                                623                 :         {
                                624                 :             /*
                                625                 :              * The element is not in MCELEM.  Punt, but assume that the
                                626                 :              * selectivity cannot be more than minfreq / 2.
                                627                 :              */
                                628             215 :             elem_selec = Min(DEFAULT_CONTAIN_SEL, minfreq / 2);
                                629                 :         }
                                630                 : 
                                631                 :         /*
                                632                 :          * Update overall selectivity using the current element's selectivity
                                633                 :          * and an assumption of element occurrence independence.
                                634                 :          */
                                635             419 :         if (operator == OID_ARRAY_CONTAINS_OP)
                                636             347 :             selec *= elem_selec;
                                637                 :         else
                                638              72 :             selec = selec + elem_selec - selec * elem_selec;
                                639                 : 
                                640                 :         /* Clamp intermediate results to stay sane despite roundoff error */
                                641             419 :         CLAMP_PROBABILITY(selec);
                                642                 :     }
                                643                 : 
                                644             497 :     return selec;
                                645                 : }
                                646                 : 
                                647                 : /*
                                648                 :  * Estimate selectivity of "column <@ const" based on most common element
                                649                 :  * statistics.
                                650                 :  *
                                651                 :  * mcelem (of length nmcelem) and numbers (of length nnumbers) are from
                                652                 :  * the array column's MCELEM statistics slot, or are NULL/0 if stats are
                                653                 :  * not available.  array_data (of length nitems) is the constant's elements.
                                654                 :  * hist (of length nhist) is from the array column's DECHIST statistics slot,
                                655                 :  * or is NULL/0 if those stats are not available.
                                656                 :  *
                                657                 :  * Both the mcelem and array_data arrays are assumed presorted according
                                658                 :  * to the element type's cmpfunc.  Null elements are not present.
                                659                 :  *
                                660                 :  * Independent element occurrence would imply a particular distribution of
                                661                 :  * distinct element counts among matching rows.  Real data usually falsifies
                                662                 :  * that assumption.  For example, in a set of 11-element integer arrays having
                                663                 :  * elements in the range [0..10], element occurrences are typically not
                                664                 :  * independent.  If they were, a sufficiently-large set would include all
                                665                 :  * distinct element counts 0 through 11.  We correct for this using the
                                666                 :  * histogram of distinct element counts.
                                667                 :  *
                                668                 :  * In the "column @> const" and "column && const" cases, we usually have a
                                669                 :  * "const" with low number of elements (otherwise we have selectivity close
                                670                 :  * to 0 or 1 respectively).  That's why the effect of dependence related
                                671                 :  * to distinct element count distribution is negligible there.  In the
                                672                 :  * "column <@ const" case, number of elements is usually high (otherwise we
                                673                 :  * have selectivity close to 0).  That's why we should do a correction with
                                674                 :  * the array distinct element count distribution here.
                                675                 :  *
                                676                 :  * Using the histogram of distinct element counts produces a different
                                677                 :  * distribution law than independent occurrences of elements.  This
                                678                 :  * distribution law can be described as follows:
                                679                 :  *
                                680                 :  * P(o1, o2, ..., on) = f1^o1 * (1 - f1)^(1 - o1) * f2^o2 *
                                681                 :  *    (1 - f2)^(1 - o2) * ... * fn^on * (1 - fn)^(1 - on) * hist[m] / ind[m]
                                682                 :  *
                                683                 :  * where:
                                684                 :  * o1, o2, ..., on - occurrences of elements 1, 2, ..., n
                                685                 :  *      (1 - occurrence, 0 - no occurrence) in row
                                686                 :  * f1, f2, ..., fn - frequencies of elements 1, 2, ..., n
                                687                 :  *      (scalar values in [0..1]) according to collected statistics
                                688                 :  * m = o1 + o2 + ... + on = total number of distinct elements in row
                                689                 :  * hist[m] - histogram data for occurrence of m elements.
                                690                 :  * ind[m] - probability of m occurrences from n events assuming their
                                691                 :  *    probabilities to be equal to frequencies of array elements.
                                692                 :  *
                                693                 :  * ind[m] = sum(f1^o1 * (1 - f1)^(1 - o1) * f2^o2 * (1 - f2)^(1 - o2) *
                                694                 :  * ... * fn^on * (1 - fn)^(1 - on), o1, o2, ..., on) | o1 + o2 + .. on = m
                                695                 :  */
                                696                 : static Selectivity
                                697              36 : mcelem_array_contained_selec(Datum *mcelem, int nmcelem,
                                698                 :                              float4 *numbers, int nnumbers,
                                699                 :                              Datum *array_data, int nitems,
                                700                 :                              float4 *hist, int nhist,
                                701                 :                              Oid operator, TypeCacheEntry *typentry)
                                702                 : {
                                703                 :     int         mcelem_index,
                                704                 :                 i,
                                705              36 :                 unique_nitems = 0;
                                706                 :     float       selec,
                                707                 :                 minfreq,
                                708                 :                 nullelem_freq;
                                709                 :     float      *dist,
                                710                 :                *mcelem_dist,
                                711                 :                *hist_part;
                                712                 :     float       avg_count,
                                713                 :                 mult,
                                714                 :                 rest;
                                715                 :     float      *elem_selec;
                                716                 : 
                                717                 :     /*
                                718                 :      * There should be three more Numbers than Values in the MCELEM slot,
                                719                 :      * because the last three cells should hold minimal and maximal frequency
                                720                 :      * among the non-null elements, and then the frequency of null elements.
                                721                 :      * Punt if not right, because we can't do much without the element freqs.
                                722                 :      */
                                723              36 :     if (numbers == NULL || nnumbers != nmcelem + 3)
 4054 tgl                       724 UBC           0 :         return DEFAULT_CONTAIN_SEL;
                                725                 : 
                                726                 :     /* Can't do much without a count histogram, either */
 4053 tgl                       727 CBC          36 :     if (hist == NULL || nhist < 3)
 4053 tgl                       728 UBC           0 :         return DEFAULT_CONTAIN_SEL;
                                729                 : 
                                730                 :     /*
                                731                 :      * Grab some of the summary statistics that compute_array_stats() stores:
                                732                 :      * lowest frequency, frequency of null elements, and average distinct
                                733                 :      * element count.
                                734                 :      */
 4054 tgl                       735 CBC          36 :     minfreq = numbers[nmcelem];
                                736              36 :     nullelem_freq = numbers[nmcelem + 2];
 4053                           737              36 :     avg_count = hist[nhist - 1];
                                738                 : 
                                739                 :     /*
                                740                 :      * "rest" will be the sum of the frequencies of all elements not
                                741                 :      * represented in MCELEM.  The average distinct element count is the sum
                                742                 :      * of the frequencies of *all* elements.  Begin with that; we will proceed
                                743                 :      * to subtract the MCELEM frequencies.
                                744                 :      */
 4054                           745              36 :     rest = avg_count;
                                746                 : 
                                747                 :     /*
                                748                 :      * mult is a multiplier representing estimate of probability that each
                                749                 :      * mcelem that is not present in constant doesn't occur.
                                750                 :      */
                                751              36 :     mult = 1.0f;
                                752                 : 
                                753                 :     /*
                                754                 :      * elem_selec is array of estimated frequencies for elements in the
                                755                 :      * constant.
                                756                 :      */
                                757              36 :     elem_selec = (float *) palloc(sizeof(float) * nitems);
                                758                 : 
                                759                 :     /* Scan mcelem and array in parallel. */
                                760              36 :     mcelem_index = 0;
                                761             114 :     for (i = 0; i < nitems; i++)
                                762                 :     {
                                763              78 :         bool        match = false;
                                764                 : 
                                765                 :         /* Ignore any duplicates in the array data. */
                                766             138 :         if (i > 0 &&
 1577                           767              60 :             element_compare(&array_data[i - 1], &array_data[i], typentry) == 0)
 4054 tgl                       768 UBC           0 :             continue;
                                769                 : 
                                770                 :         /*
                                771                 :          * Iterate over MCELEM until we find an entry greater than or equal to
                                772                 :          * this element of the constant.  Update "rest" and "mult" for mcelem
                                773                 :          * entries skipped over.
                                774                 :          */
 4054 tgl                       775 CBC        2064 :         while (mcelem_index < nmcelem)
                                776                 :         {
                                777            2064 :             int         cmp = element_compare(&mcelem[mcelem_index],
                                778            2064 :                                               &array_data[i],
                                779                 :                                               typentry);
                                780                 : 
                                781            2064 :             if (cmp < 0)
                                782                 :             {
                                783            1986 :                 mult *= (1.0f - numbers[mcelem_index]);
                                784            1986 :                 rest -= numbers[mcelem_index];
                                785            1986 :                 mcelem_index++;
                                786                 :             }
                                787                 :             else
                                788                 :             {
                                789              78 :                 if (cmp == 0)
 2118                           790              78 :                     match = true;   /* mcelem is found */
 4054                           791              78 :                 break;
                                792                 :             }
                                793                 :         }
                                794                 : 
                                795              78 :         if (match)
                                796                 :         {
                                797                 :             /* MCELEM matches the array item. */
                                798              78 :             elem_selec[unique_nitems] = numbers[mcelem_index];
                                799                 :             /* "rest" is decremented for all mcelems, matched or not */
                                800              78 :             rest -= numbers[mcelem_index];
                                801              78 :             mcelem_index++;
                                802                 :         }
                                803                 :         else
                                804                 :         {
                                805                 :             /*
                                806                 :              * The element is not in MCELEM.  Punt, but assume that the
                                807                 :              * selectivity cannot be more than minfreq / 2.
                                808                 :              */
 4054 tgl                       809 UBC           0 :             elem_selec[unique_nitems] = Min(DEFAULT_CONTAIN_SEL,
                                810                 :                                             minfreq / 2);
                                811                 :         }
                                812                 : 
 4054 tgl                       813 CBC          78 :         unique_nitems++;
                                814                 :     }
                                815                 : 
                                816                 :     /*
                                817                 :      * If we handled all constant elements without exhausting the MCELEM
                                818                 :      * array, finish walking it to complete calculation of "rest" and "mult".
                                819                 :      */
                                820            3834 :     while (mcelem_index < nmcelem)
                                821                 :     {
                                822            3798 :         mult *= (1.0f - numbers[mcelem_index]);
                                823            3798 :         rest -= numbers[mcelem_index];
                                824            3798 :         mcelem_index++;
                                825                 :     }
                                826                 : 
                                827                 :     /*
                                828                 :      * The presence of many distinct rare elements materially decreases
                                829                 :      * selectivity.  Use the Poisson distribution to estimate the probability
                                830                 :      * of a column value having zero occurrences of such elements.  See above
                                831                 :      * for the definition of "rest".
                                832                 :      */
                                833              36 :     mult *= exp(-rest);
                                834                 : 
                                835                 :     /*----------
                                836                 :      * Using the distinct element count histogram requires
                                837                 :      *      O(unique_nitems * (nmcelem + unique_nitems))
                                838                 :      * operations.  Beyond a certain computational cost threshold, it's
                                839                 :      * reasonable to sacrifice accuracy for decreased planning time.  We limit
                                840                 :      * the number of operations to EFFORT * nmcelem; since nmcelem is limited
                                841                 :      * by the column's statistics target, the work done is user-controllable.
                                842                 :      *
                                843                 :      * If the number of operations would be too large, we can reduce it
                                844                 :      * without losing all accuracy by reducing unique_nitems and considering
                                845                 :      * only the most-common elements of the constant array.  To make the
                                846                 :      * results exactly match what we would have gotten with only those
                                847                 :      * elements to start with, we'd have to remove any discarded elements'
                                848                 :      * frequencies from "mult", but since this is only an approximation
                                849                 :      * anyway, we don't bother with that.  Therefore it's sufficient to qsort
                                850                 :      * elem_selec[] and take the largest elements.  (They will no longer match
                                851                 :      * up with the elements of array_data[], but we don't care.)
                                852                 :      *----------
                                853                 :      */
                                854                 : #define EFFORT 100
                                855                 : 
 4053                           856              36 :     if ((nmcelem + unique_nitems) > 0 &&
                                857              36 :         unique_nitems > EFFORT * nmcelem / (nmcelem + unique_nitems))
                                858                 :     {
                                859                 :         /*
                                860                 :          * Use the quadratic formula to solve for largest allowable N.  We
                                861                 :          * have A = 1, B = nmcelem, C = - EFFORT * nmcelem.
                                862                 :          */
 3955 bruce                     863 UBC           0 :         double      b = (double) nmcelem;
                                864                 :         int         n;
                                865                 : 
 4053 tgl                       866               0 :         n = (int) ((sqrt(b * b + 4 * EFFORT * b) - b) / 2);
                                867                 : 
                                868                 :         /* Sort, then take just the first n elements */
                                869               0 :         qsort(elem_selec, unique_nitems, sizeof(float),
                                870                 :               float_compare_desc);
                                871               0 :         unique_nitems = n;
                                872                 :     }
                                873                 : 
                                874                 :     /*
                                875                 :      * Calculate probabilities of each distinct element count for both mcelems
                                876                 :      * and constant elements.  At this point, assume independent element
                                877                 :      * occurrence.
                                878                 :      */
 4053 tgl                       879 CBC          36 :     dist = calc_distr(elem_selec, unique_nitems, unique_nitems, 0.0f);
                                880              36 :     mcelem_dist = calc_distr(numbers, nmcelem, unique_nitems, rest);
                                881                 : 
                                882                 :     /* ignore hist[nhist-1], which is the average not a histogram member */
                                883              36 :     hist_part = calc_hist(hist, nhist - 1, unique_nitems);
                                884                 : 
                                885              36 :     selec = 0.0f;
                                886             150 :     for (i = 0; i <= unique_nitems; i++)
                                887                 :     {
                                888                 :         /*
                                889                 :          * mult * dist[i] / mcelem_dist[i] gives us probability of qual
                                890                 :          * matching from assumption of independent element occurrence with the
                                891                 :          * condition that distinct element count = i.
                                892                 :          */
                                893             114 :         if (mcelem_dist[i] > 0)
                                894             114 :             selec += hist_part[i] * mult * dist[i] / mcelem_dist[i];
                                895                 :     }
                                896                 : 
                                897              36 :     pfree(dist);
                                898              36 :     pfree(mcelem_dist);
                                899              36 :     pfree(hist_part);
 4054                           900              36 :     pfree(elem_selec);
                                901                 : 
                                902                 :     /* Take into account occurrence of NULL element. */
                                903              36 :     selec *= (1.0f - nullelem_freq);
                                904                 : 
                                905              36 :     CLAMP_PROBABILITY(selec);
                                906                 : 
                                907              36 :     return selec;
                                908                 : }
                                909                 : 
                                910                 : /*
                                911                 :  * Calculate the first n distinct element count probabilities from a
                                912                 :  * histogram of distinct element counts.
                                913                 :  *
                                914                 :  * Returns a palloc'd array of n+1 entries, with array[k] being the
                                915                 :  * probability of element count k, k in [0..n].
                                916                 :  *
                                917                 :  * We assume that a histogram box with bounds a and b gives 1 / ((b - a + 1) *
                                918                 :  * (nhist - 1)) probability to each value in (a,b) and an additional half of
                                919                 :  * that to a and b themselves.
                                920                 :  */
                                921                 : static float *
                                922              36 : calc_hist(const float4 *hist, int nhist, int n)
                                923                 : {
                                924                 :     float      *hist_part;
                                925                 :     int         k,
                                926              36 :                 i = 0;
                                927              36 :     float       prev_interval = 0,
                                928                 :                 next_interval;
                                929                 :     float       frac;
                                930                 : 
                                931              36 :     hist_part = (float *) palloc((n + 1) * sizeof(float));
                                932                 : 
                                933                 :     /*
                                934                 :      * frac is a probability contribution for each interval between histogram
                                935                 :      * values.  We have nhist - 1 intervals, so contribution of each one will
                                936                 :      * be 1 / (nhist - 1).
                                937                 :      */
                                938              36 :     frac = 1.0f / ((float) (nhist - 1));
                                939                 : 
                                940             150 :     for (k = 0; k <= n; k++)
                                941                 :     {
                                942             114 :         int         count = 0;
                                943                 : 
                                944                 :         /*
                                945                 :          * Count the histogram boundaries equal to k.  (Although the histogram
                                946                 :          * should theoretically contain only exact integers, entries are
                                947                 :          * floats so there could be roundoff error in large values.  Treat any
                                948                 :          * fractional value as equal to the next larger k.)
                                949                 :          */
                                950            1008 :         while (i < nhist && hist[i] <= k)
                                951                 :         {
                                952             894 :             count++;
                                953             894 :             i++;
                                954                 :         }
                                955                 : 
                                956             114 :         if (count > 0)
                                957                 :         {
                                958                 :             /* k is an exact bound for at least one histogram box. */
                                959                 :             float       val;
                                960                 : 
                                961                 :             /* Find length between current histogram value and the next one */
                                962             108 :             if (i < nhist)
                                963             108 :                 next_interval = hist[i] - hist[i - 1];
                                964                 :             else
 4054 tgl                       965 UBC           0 :                 next_interval = 0;
                                966                 : 
                                967                 :             /*
                                968                 :              * count - 1 histogram boxes contain k exclusively.  They
                                969                 :              * contribute a total of (count - 1) * frac probability.  Also
                                970                 :              * factor in the partial histogram boxes on either side.
                                971                 :              */
 4054 tgl                       972 CBC         108 :             val = (float) (count - 1);
                                973             108 :             if (next_interval > 0)
                                974             108 :                 val += 0.5f / next_interval;
                                975             108 :             if (prev_interval > 0)
                                976              72 :                 val += 0.5f / prev_interval;
                                977             108 :             hist_part[k] = frac * val;
                                978                 : 
                                979             108 :             prev_interval = next_interval;
                                980                 :         }
                                981                 :         else
                                982                 :         {
                                983                 :             /* k does not appear as an exact histogram bound. */
                                984               6 :             if (prev_interval > 0)
                                985               6 :                 hist_part[k] = frac / prev_interval;
                                986                 :             else
 4054 tgl                       987 UBC           0 :                 hist_part[k] = 0.0f;
                                988                 :         }
                                989                 :     }
                                990                 : 
 4054 tgl                       991 CBC          36 :     return hist_part;
                                992                 : }
                                993                 : 
                                994                 : /*
                                995                 :  * Consider n independent events with probabilities p[].  This function
                                996                 :  * calculates probabilities of exact k of events occurrence for k in [0..m].
                                997                 :  * Returns a palloc'd array of size m+1.
                                998                 :  *
                                999                 :  * "rest" is the sum of the probabilities of all low-probability events not
                               1000                 :  * included in p.
                               1001                 :  *
                               1002                 :  * Imagine matrix M of size (n + 1) x (m + 1).  Element M[i,j] denotes the
                               1003                 :  * probability that exactly j of first i events occur.  Obviously M[0,0] = 1.
                               1004                 :  * For any constant j, each increment of i increases the probability iff the
                               1005                 :  * event occurs.  So, by the law of total probability:
                               1006                 :  *  M[i,j] = M[i - 1, j] * (1 - p[i]) + M[i - 1, j - 1] * p[i]
                               1007                 :  *      for i > 0, j > 0.
                               1008                 :  *  M[i,0] = M[i - 1, 0] * (1 - p[i]) for i > 0.
                               1009                 :  */
                               1010                 : static float *
                               1011              72 : calc_distr(const float *p, int n, int m, float rest)
                               1012                 : {
                               1013                 :     float      *row,
                               1014                 :                *prev_row,
                               1015                 :                *tmp;
                               1016                 :     int         i,
                               1017                 :                 j;
                               1018                 : 
                               1019                 :     /*
                               1020                 :      * Since we return only the last row of the matrix and need only the
                               1021                 :      * current and previous row for calculations, allocate two rows.
                               1022                 :      */
                               1023              72 :     row = (float *) palloc((m + 1) * sizeof(float));
                               1024              72 :     prev_row = (float *) palloc((m + 1) * sizeof(float));
                               1025                 : 
                               1026                 :     /* M[0,0] = 1 */
                               1027              72 :     row[0] = 1.0f;
                               1028            6012 :     for (i = 1; i <= n; i++)
                               1029                 :     {
                               1030            5940 :         float       t = p[i - 1];
                               1031                 : 
                               1032                 :         /* Swap rows */
                               1033            5940 :         tmp = row;
                               1034            5940 :         row = prev_row;
                               1035            5940 :         prev_row = tmp;
                               1036                 : 
                               1037                 :         /* Calculate next row */
                               1038           26574 :         for (j = 0; j <= i && j <= m; j++)
                               1039                 :         {
                               1040           20634 :             float       val = 0.0f;
                               1041                 : 
                               1042           20634 :             if (j < i)
                               1043           20478 :                 val += prev_row[j] * (1.0f - t);
                               1044           20634 :             if (j > 0)
                               1045           14694 :                 val += prev_row[j - 1] * t;
                               1046           20634 :             row[j] = val;
                               1047                 :         }
                               1048                 :     }
                               1049                 : 
                               1050                 :     /*
                               1051                 :      * The presence of many distinct rare (not in "p") elements materially
                               1052                 :      * decreases selectivity.  Model their collective occurrence with the
                               1053                 :      * Poisson distribution.
                               1054                 :      */
                               1055              72 :     if (rest > DEFAULT_CONTAIN_SEL)
                               1056                 :     {
                               1057                 :         float       t;
                               1058                 : 
                               1059                 :         /* Swap rows */
 4054 tgl                      1060 UBC           0 :         tmp = row;
                               1061               0 :         row = prev_row;
                               1062               0 :         prev_row = tmp;
                               1063                 : 
                               1064               0 :         for (i = 0; i <= m; i++)
                               1065               0 :             row[i] = 0.0f;
                               1066                 : 
                               1067                 :         /* Value of Poisson distribution for 0 occurrences */
                               1068               0 :         t = exp(-rest);
                               1069                 : 
                               1070                 :         /*
                               1071                 :          * Calculate convolution of previously computed distribution and the
                               1072                 :          * Poisson distribution.
                               1073                 :          */
                               1074               0 :         for (i = 0; i <= m; i++)
                               1075                 :         {
                               1076               0 :             for (j = 0; j <= m - i; j++)
                               1077               0 :                 row[j + i] += prev_row[j] * t;
                               1078                 : 
                               1079                 :             /* Get Poisson distribution value for (i + 1) occurrences */
                               1080               0 :             t *= rest / (float) (i + 1);
                               1081                 :         }
                               1082                 :     }
                               1083                 : 
 4054 tgl                      1084 CBC          72 :     pfree(prev_row);
                               1085              72 :     return row;
                               1086                 : }
                               1087                 : 
                               1088                 : /* Fast function for floor value of 2 based logarithm calculation. */
                               1089                 : static int
                               1090             497 : floor_log2(uint32 n)
                               1091                 : {
                               1092             497 :     int         logval = 0;
                               1093                 : 
                               1094             497 :     if (n == 0)
                               1095             299 :         return -1;
                               1096             198 :     if (n >= (1 << 16))
                               1097                 :     {
 4054 tgl                      1098 UBC           0 :         n >>= 16;
                               1099               0 :         logval += 16;
                               1100                 :     }
 4054 tgl                      1101 CBC         198 :     if (n >= (1 << 8))
                               1102                 :     {
                               1103              60 :         n >>= 8;
                               1104              60 :         logval += 8;
                               1105                 :     }
                               1106             198 :     if (n >= (1 << 4))
                               1107                 :     {
                               1108             138 :         n >>= 4;
                               1109             138 :         logval += 4;
                               1110                 :     }
                               1111             198 :     if (n >= (1 << 2))
                               1112                 :     {
                               1113             132 :         n >>= 2;
                               1114             132 :         logval += 2;
                               1115                 :     }
                               1116             198 :     if (n >= (1 << 1))
                               1117                 :     {
                               1118              93 :         logval += 1;
                               1119                 :     }
                               1120             198 :     return logval;
                               1121                 : }
                               1122                 : 
                               1123                 : /*
                               1124                 :  * find_next_mcelem binary-searches a most common elements array, starting
                               1125                 :  * from *index, for the first member >= value.  It saves the position of the
                               1126                 :  * match into *index and returns true if it's an exact match.  (Note: we
                               1127                 :  * assume the mcelem elements are distinct so there can't be more than one
                               1128                 :  * exact match.)
                               1129                 :  */
                               1130                 : static bool
                               1131             419 : find_next_mcelem(Datum *mcelem, int nmcelem, Datum value, int *index,
                               1132                 :                  TypeCacheEntry *typentry)
                               1133                 : {
                               1134             419 :     int         l = *index,
                               1135             419 :                 r = nmcelem - 1,
                               1136                 :                 i,
                               1137                 :                 res;
                               1138                 : 
                               1139            1661 :     while (l <= r)
                               1140                 :     {
                               1141            1446 :         i = (l + r) / 2;
 1577                          1142            1446 :         res = element_compare(&mcelem[i], &value, typentry);
 4054                          1143            1446 :         if (res == 0)
                               1144                 :         {
                               1145             204 :             *index = i;
                               1146             204 :             return true;
                               1147                 :         }
                               1148            1242 :         else if (res < 0)
                               1149             441 :             l = i + 1;
                               1150                 :         else
                               1151             801 :             r = i - 1;
                               1152                 :     }
                               1153             215 :     *index = l;
                               1154             215 :     return false;
                               1155                 : }
                               1156                 : 
                               1157                 : /*
                               1158                 :  * Comparison function for elements.
                               1159                 :  *
                               1160                 :  * We use the element type's default btree opclass, and its default collation
                               1161                 :  * if the type is collation-sensitive.
                               1162                 :  *
                               1163                 :  * XXX consider using SortSupport infrastructure
                               1164                 :  */
                               1165                 : static int
                               1166            3804 : element_compare(const void *key1, const void *key2, void *arg)
                               1167                 : {
                               1168            3804 :     Datum       d1 = *((const Datum *) key1);
                               1169            3804 :     Datum       d2 = *((const Datum *) key2);
 1577                          1170            3804 :     TypeCacheEntry *typentry = (TypeCacheEntry *) arg;
                               1171            3804 :     FmgrInfo   *cmpfunc = &typentry->cmp_proc_finfo;
                               1172                 :     Datum       c;
                               1173                 : 
                               1174            3804 :     c = FunctionCall2Coll(cmpfunc, typentry->typcollation, d1, d2);
 4054                          1175            3804 :     return DatumGetInt32(c);
                               1176                 : }
                               1177                 : 
                               1178                 : /*
                               1179                 :  * Comparison function for sorting floats into descending order.
                               1180                 :  */
                               1181                 : static int
 4054 tgl                      1182 UBC           0 : float_compare_desc(const void *key1, const void *key2)
                               1183                 : {
                               1184               0 :     float       d1 = *((const float *) key1);
                               1185               0 :     float       d2 = *((const float *) key2);
                               1186                 : 
                               1187               0 :     if (d1 > d2)
                               1188               0 :         return -1;
                               1189               0 :     else if (d1 < d2)
                               1190               0 :         return 1;
                               1191                 :     else
                               1192               0 :         return 0;
                               1193                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a