LCOV - differential code coverage report
Current view: top level - src/backend/nodes - queryjumblefuncs.c (source / functions) Coverage Total Hit UNC GNC
Current: Differential Code Coverage HEAD vs 15 Lines: 87.1 % 170 148 22 148
Current Date: 2023-04-08 15:15:32 Functions: 100.0 % 9 9 9
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * queryjumblefuncs.c
       4                 :  *   Query normalization and fingerprinting.
       5                 :  *
       6                 :  * Normalization is a process whereby similar queries, typically differing only
       7                 :  * in their constants (though the exact rules are somewhat more subtle than
       8                 :  * that) are recognized as equivalent, and are tracked as a single entry.  This
       9                 :  * is particularly useful for non-prepared queries.
      10                 :  *
      11                 :  * Normalization is implemented by fingerprinting queries, selectively
      12                 :  * serializing those fields of each query tree's nodes that are judged to be
      13                 :  * essential to the query.  This is referred to as a query jumble.  This is
      14                 :  * distinct from a regular serialization in that various extraneous
      15                 :  * information is ignored as irrelevant or not essential to the query, such
      16                 :  * as the collations of Vars and, most notably, the values of constants.
      17                 :  *
      18                 :  * This jumble is acquired at the end of parse analysis of each query, and
      19                 :  * a 64-bit hash of it is stored into the query's Query.queryId field.
      20                 :  * The server then copies this value around, making it available in plan
      21                 :  * tree(s) generated from the query.  The executor can then use this value
      22                 :  * to blame query costs on the proper queryId.
      23                 :  *
      24                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
      25                 :  * Portions Copyright (c) 1994, Regents of the University of California
      26                 :  *
      27                 :  *
      28                 :  * IDENTIFICATION
      29                 :  *    src/backend/nodes/queryjumblefuncs.c
      30                 :  *
      31                 :  *-------------------------------------------------------------------------
      32                 :  */
      33                 : #include "postgres.h"
      34                 : 
      35                 : #include "common/hashfn.h"
      36                 : #include "miscadmin.h"
      37                 : #include "nodes/queryjumble.h"
      38                 : #include "parser/scansup.h"
      39                 : 
      40                 : #define JUMBLE_SIZE             1024    /* query serialization buffer size */
      41                 : 
      42                 : /* GUC parameters */
      43                 : int         compute_query_id = COMPUTE_QUERY_ID_AUTO;
      44                 : 
      45                 : /* True when compute_query_id is ON, or AUTO and a module requests them */
      46                 : bool        query_id_enabled = false;
      47                 : 
      48                 : static void AppendJumble(JumbleState *jstate,
      49                 :                          const unsigned char *item, Size size);
      50                 : static void RecordConstLocation(JumbleState *jstate, int location);
      51                 : static void _jumbleNode(JumbleState *jstate, Node *node);
      52                 : static void _jumbleA_Const(JumbleState *jstate, Node *node);
      53                 : static void _jumbleList(JumbleState *jstate, Node *node);
      54                 : static void _jumbleRangeTblEntry(JumbleState *jstate, Node *node);
      55                 : 
      56                 : /*
      57                 :  * Given a possibly multi-statement source string, confine our attention to the
      58                 :  * relevant part of the string.
      59                 :  */
      60                 : const char *
      61 GNC       68943 : CleanQuerytext(const char *query, int *location, int *len)
      62                 : {
      63           68943 :     int         query_location = *location;
      64           68943 :     int         query_len = *len;
      65                 : 
      66                 :     /* First apply starting offset, unless it's -1 (unknown). */
      67           68943 :     if (query_location >= 0)
      68                 :     {
      69           68759 :         Assert(query_location <= strlen(query));
      70           68759 :         query += query_location;
      71                 :         /* Length of 0 (or -1) means "rest of string" */
      72           68759 :         if (query_len <= 0)
      73            3926 :             query_len = strlen(query);
      74                 :         else
      75           64833 :             Assert(query_len <= strlen(query));
      76                 :     }
      77                 :     else
      78                 :     {
      79                 :         /* If query location is unknown, distrust query_len as well */
      80             184 :         query_location = 0;
      81             184 :         query_len = strlen(query);
      82                 :     }
      83                 : 
      84                 :     /*
      85                 :      * Discard leading and trailing whitespace, too.  Use scanner_isspace()
      86                 :      * not libc's isspace(), because we want to match the lexer's behavior.
      87                 :      */
      88           69341 :     while (query_len > 0 && scanner_isspace(query[0]))
      89             398 :         query++, query_location++, query_len--;
      90           69415 :     while (query_len > 0 && scanner_isspace(query[query_len - 1]))
      91             472 :         query_len--;
      92                 : 
      93           68943 :     *location = query_location;
      94           68943 :     *len = query_len;
      95                 : 
      96           68943 :     return query;
      97                 : }
      98                 : 
      99                 : JumbleState *
     100           60772 : JumbleQuery(Query *query, const char *querytext)
     101                 : {
     102           60772 :     JumbleState *jstate = NULL;
     103                 : 
     104           60772 :     Assert(IsQueryIdEnabled());
     105                 : 
     106           60772 :     jstate = (JumbleState *) palloc(sizeof(JumbleState));
     107                 : 
     108                 :     /* Set up workspace for query jumbling */
     109           60772 :     jstate->jumble = (unsigned char *) palloc(JUMBLE_SIZE);
     110           60772 :     jstate->jumble_len = 0;
     111           60772 :     jstate->clocations_buf_size = 32;
     112           60772 :     jstate->clocations = (LocationLen *)
     113           60772 :         palloc(jstate->clocations_buf_size * sizeof(LocationLen));
     114           60772 :     jstate->clocations_count = 0;
     115           60772 :     jstate->highest_extern_param_id = 0;
     116                 : 
     117                 :     /* Compute query ID and mark the Query node with it */
     118           60772 :     _jumbleNode(jstate, (Node *) query);
     119           60772 :     query->queryId = DatumGetUInt64(hash_any_extended(jstate->jumble,
     120           60772 :                                                       jstate->jumble_len,
     121                 :                                                       0));
     122                 : 
     123                 :     /*
     124                 :      * If we are unlucky enough to get a hash of zero, use 1 instead for
     125                 :      * normal statements and 2 for utility queries.
     126                 :      */
     127           60772 :     if (query->queryId == UINT64CONST(0))
     128                 :     {
     129 UNC           0 :         if (query->utilityStmt)
     130               0 :             query->queryId = UINT64CONST(2);
     131                 :         else
     132               0 :             query->queryId = UINT64CONST(1);
     133                 :     }
     134                 : 
     135 GNC       60772 :     return jstate;
     136                 : }
     137                 : 
     138                 : /*
     139                 :  * Enables query identifier computation.
     140                 :  *
     141                 :  * Third-party plugins can use this function to inform core that they require
     142                 :  * a query identifier to be computed.
     143                 :  */
     144                 : void
     145               3 : EnableQueryId(void)
     146                 : {
     147               3 :     if (compute_query_id != COMPUTE_QUERY_ID_OFF)
     148               3 :         query_id_enabled = true;
     149               3 : }
     150                 : 
     151                 : /*
     152                 :  * AppendJumble: Append a value that is substantive in a given query to
     153                 :  * the current jumble.
     154                 :  */
     155                 : static void
     156         2819372 : AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
     157                 : {
     158         2819372 :     unsigned char *jumble = jstate->jumble;
     159         2819372 :     Size        jumble_len = jstate->jumble_len;
     160                 : 
     161                 :     /*
     162                 :      * Whenever the jumble buffer is full, we hash the current contents and
     163                 :      * reset the buffer to contain just that hash value, thus relying on the
     164                 :      * hash to summarize everything so far.
     165                 :      */
     166         5639012 :     while (size > 0)
     167                 :     {
     168                 :         Size        part_size;
     169                 : 
     170         2819640 :         if (jumble_len >= JUMBLE_SIZE)
     171                 :         {
     172                 :             uint64      start_hash;
     173                 : 
     174             801 :             start_hash = DatumGetUInt64(hash_any_extended(jumble,
     175                 :                                                           JUMBLE_SIZE, 0));
     176             801 :             memcpy(jumble, &start_hash, sizeof(start_hash));
     177             801 :             jumble_len = sizeof(start_hash);
     178                 :         }
     179         2819640 :         part_size = Min(size, JUMBLE_SIZE - jumble_len);
     180         2819640 :         memcpy(jumble + jumble_len, item, part_size);
     181         2819640 :         jumble_len += part_size;
     182         2819640 :         item += part_size;
     183         2819640 :         size -= part_size;
     184                 :     }
     185         2819372 :     jstate->jumble_len = jumble_len;
     186         2819372 : }
     187                 : 
     188                 : /*
     189                 :  * Record location of constant within query string of query tree
     190                 :  * that is currently being walked.
     191                 :  */
     192                 : static void
     193           86704 : RecordConstLocation(JumbleState *jstate, int location)
     194                 : {
     195                 :     /* -1 indicates unknown or undefined location */
     196           86704 :     if (location >= 0)
     197                 :     {
     198                 :         /* enlarge array if needed */
     199           82985 :         if (jstate->clocations_count >= jstate->clocations_buf_size)
     200                 :         {
     201              54 :             jstate->clocations_buf_size *= 2;
     202              54 :             jstate->clocations = (LocationLen *)
     203              54 :                 repalloc(jstate->clocations,
     204              54 :                          jstate->clocations_buf_size *
     205                 :                          sizeof(LocationLen));
     206                 :         }
     207           82985 :         jstate->clocations[jstate->clocations_count].location = location;
     208                 :         /* initialize lengths to -1 to simplify third-party module usage */
     209           82985 :         jstate->clocations[jstate->clocations_count].length = -1;
     210           82985 :         jstate->clocations_count++;
     211                 :     }
     212           86704 : }
     213                 : 
     214                 : #define JUMBLE_NODE(item) \
     215                 :     _jumbleNode(jstate, (Node *) expr->item)
     216                 : #define JUMBLE_LOCATION(location) \
     217                 :     RecordConstLocation(jstate, expr->location)
     218                 : #define JUMBLE_FIELD(item) \
     219                 :     AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
     220                 : #define JUMBLE_FIELD_SINGLE(item) \
     221                 :     AppendJumble(jstate, (const unsigned char *) &(item), sizeof(item))
     222                 : #define JUMBLE_STRING(str) \
     223                 : do { \
     224                 :     if (expr->str) \
     225                 :         AppendJumble(jstate, (const unsigned char *) (expr->str), strlen(expr->str) + 1); \
     226                 : } while(0)
     227                 : 
     228                 : #include "queryjumblefuncs.funcs.c"
     229                 : 
     230                 : static void
     231         2547341 : _jumbleNode(JumbleState *jstate, Node *node)
     232                 : {
     233         2547341 :     Node       *expr = node;
     234                 : 
     235         2547341 :     if (expr == NULL)
     236         1422976 :         return;
     237                 : 
     238                 :     /* Guard against stack overflow due to overly complex expressions */
     239         1124365 :     check_stack_depth();
     240                 : 
     241                 :     /*
     242                 :      * We always emit the node's NodeTag, then any additional fields that are
     243                 :      * considered significant, and then we recurse to any child nodes.
     244                 :      */
     245         1124365 :     JUMBLE_FIELD(type);
     246                 : 
     247         1124365 :     switch (nodeTag(expr))
     248                 :     {
     249                 : #include "queryjumblefuncs.switch.c"
     250                 : 
     251          278335 :         case T_List:
     252                 :         case T_IntList:
     253                 :         case T_OidList:
     254                 :         case T_XidList:
     255          278335 :             _jumbleList(jstate, expr);
     256          278335 :             break;
     257                 : 
     258 UNC           0 :         default:
     259                 :             /* Only a warning, since we can stumble along anyway */
     260               0 :             elog(WARNING, "unrecognized node type: %d",
     261                 :                  (int) nodeTag(expr));
     262               0 :             break;
     263                 :     }
     264                 : 
     265                 :     /* Special cases to handle outside the automated code */
     266 GNC     1124365 :     switch (nodeTag(expr))
     267                 :     {
     268            5083 :         case T_Param:
     269                 :             {
     270            5083 :                 Param      *p = (Param *) node;
     271                 : 
     272                 :                 /*
     273                 :                  * Update the highest Param id seen, in order to start
     274                 :                  * normalization correctly.
     275                 :                  */
     276            5083 :                 if (p->paramkind == PARAM_EXTERN &&
     277            4773 :                     p->paramid > jstate->highest_extern_param_id)
     278            4010 :                     jstate->highest_extern_param_id = p->paramid;
     279                 :             }
     280            5083 :             break;
     281         1119282 :         default:
     282         1119282 :             break;
     283                 :     }
     284                 : }
     285                 : 
     286                 : static void
     287          278335 : _jumbleList(JumbleState *jstate, Node *node)
     288                 : {
     289          278335 :     List       *expr = (List *) node;
     290                 :     ListCell   *l;
     291                 : 
     292          278335 :     switch (expr->type)
     293                 :     {
     294          277949 :         case T_List:
     295          760339 :             foreach(l, expr)
     296          482390 :                 _jumbleNode(jstate, lfirst(l));
     297          277949 :             break;
     298             386 :         case T_IntList:
     299             865 :             foreach(l, expr)
     300             479 :                 JUMBLE_FIELD_SINGLE(lfirst_int(l));
     301             386 :             break;
     302 UNC           0 :         case T_OidList:
     303               0 :             foreach(l, expr)
     304               0 :                 JUMBLE_FIELD_SINGLE(lfirst_oid(l));
     305               0 :             break;
     306               0 :         case T_XidList:
     307               0 :             foreach(l, expr)
     308               0 :                 JUMBLE_FIELD_SINGLE(lfirst_xid(l));
     309               0 :             break;
     310               0 :         default:
     311               0 :             elog(ERROR, "unrecognized list node type: %d",
     312                 :                  (int) expr->type);
     313                 :             return;
     314                 :     }
     315                 : }
     316                 : 
     317                 : static void
     318 GNC        7275 : _jumbleA_Const(JumbleState *jstate, Node *node)
     319                 : {
     320            7275 :     A_Const    *expr = (A_Const *) node;
     321                 : 
     322            7275 :     JUMBLE_FIELD(isnull);
     323            7275 :     if (!expr->isnull)
     324                 :     {
     325            7202 :         JUMBLE_FIELD(val.node.type);
     326            7202 :         switch (nodeTag(&expr->val))
     327                 :         {
     328            3524 :             case T_Integer:
     329            3524 :                 JUMBLE_FIELD(val.ival.ival);
     330            3524 :                 break;
     331              42 :             case T_Float:
     332              42 :                 JUMBLE_STRING(val.fval.fval);
     333              42 :                 break;
     334              96 :             case T_Boolean:
     335              96 :                 JUMBLE_FIELD(val.boolval.boolval);
     336              96 :                 break;
     337            3538 :             case T_String:
     338            3538 :                 JUMBLE_STRING(val.sval.sval);
     339            3538 :                 break;
     340               2 :             case T_BitString:
     341               2 :                 JUMBLE_STRING(val.bsval.bsval);
     342               2 :                 break;
     343 UNC           0 :             default:
     344               0 :                 elog(ERROR, "unrecognized node type: %d",
     345                 :                      (int) nodeTag(&expr->val));
     346                 :                 break;
     347                 :         }
     348                 :     }
     349 GNC        7275 : }
     350                 : 
     351                 : static void
     352           52382 : _jumbleRangeTblEntry(JumbleState *jstate, Node *node)
     353                 : {
     354           52382 :     RangeTblEntry *expr = (RangeTblEntry *) node;
     355                 : 
     356           52382 :     JUMBLE_FIELD(rtekind);
     357           52382 :     switch (expr->rtekind)
     358                 :     {
     359           37437 :         case RTE_RELATION:
     360           37437 :             JUMBLE_FIELD(relid);
     361           37437 :             JUMBLE_NODE(tablesample);
     362           37437 :             JUMBLE_FIELD(inh);
     363           37437 :             break;
     364            4577 :         case RTE_SUBQUERY:
     365            4577 :             JUMBLE_NODE(subquery);
     366            4577 :             break;
     367            5717 :         case RTE_JOIN:
     368            5717 :             JUMBLE_FIELD(jointype);
     369            5717 :             break;
     370            2915 :         case RTE_FUNCTION:
     371            2915 :             JUMBLE_NODE(functions);
     372            2915 :             break;
     373              37 :         case RTE_TABLEFUNC:
     374              37 :             JUMBLE_NODE(tablefunc);
     375              37 :             break;
     376            1175 :         case RTE_VALUES:
     377            1175 :             JUMBLE_NODE(values_lists);
     378            1175 :             break;
     379             449 :         case RTE_CTE:
     380                 : 
     381                 :             /*
     382                 :              * Depending on the CTE name here isn't ideal, but it's the only
     383                 :              * info we have to identify the referenced WITH item.
     384                 :              */
     385             449 :             JUMBLE_STRING(ctename);
     386             449 :             JUMBLE_FIELD(ctelevelsup);
     387             449 :             break;
     388              75 :         case RTE_NAMEDTUPLESTORE:
     389              75 :             JUMBLE_STRING(enrname);
     390              75 :             break;
     391 UNC           0 :         case RTE_RESULT:
     392               0 :             break;
     393               0 :         default:
     394               0 :             elog(ERROR, "unrecognized RTE kind: %d", (int) expr->rtekind);
     395                 :             break;
     396                 :     }
     397 GNC       52382 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a