LCOV - differential code coverage report
Current view: top level - contrib/pg_stat_statements - pg_stat_statements.c (source / functions) Coverage Total Hit UNC LBC UIC UBC GBC GIC GNC CBC EUB ECB DUB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 69.0 % 909 627 3 15 141 123 25 206 5 391 129 215 5 6
Current Date: 2023-04-08 15:15:32 Functions: 82.2 % 45 37 5 3 23 2 12 5 22 1
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * pg_stat_statements.c
       4                 :  *      Track statement planning and execution times as well as resource
       5                 :  *      usage across a whole database cluster.
       6                 :  *
       7                 :  * Execution costs are totaled for each distinct source query, and kept in
       8                 :  * a shared hashtable.  (We track only as many distinct queries as will fit
       9                 :  * in the designated amount of shared memory.)
      10                 :  *
      11                 :  * Starting in Postgres 9.2, this module normalized query entries.  As of
      12                 :  * Postgres 14, the normalization is done by the core if compute_query_id is
      13                 :  * enabled, or optionally by third-party modules.
      14                 :  *
      15                 :  * To facilitate presenting entries to users, we create "representative" query
      16                 :  * strings in which constants are replaced with parameter symbols ($n), to
      17                 :  * make it clearer what a normalized entry can represent.  To save on shared
      18                 :  * memory, and to avoid having to truncate oversized query strings, we store
      19                 :  * these strings in a temporary external query-texts file.  Offsets into this
      20                 :  * file are kept in shared memory.
      21                 :  *
      22                 :  * Note about locking issues: to create or delete an entry in the shared
      23                 :  * hashtable, one must hold pgss->lock exclusively.  Modifying any field
      24                 :  * in an entry except the counters requires the same.  To look up an entry,
      25                 :  * one must hold the lock shared.  To read or update the counters within
      26                 :  * an entry, one must hold the lock shared or exclusive (so the entry doesn't
      27                 :  * disappear!) and also take the entry's mutex spinlock.
      28                 :  * The shared state variable pgss->extent (the next free spot in the external
      29                 :  * query-text file) should be accessed only while holding either the
      30                 :  * pgss->mutex spinlock, or exclusive lock on pgss->lock.  We use the mutex to
      31                 :  * allow reserving file space while holding only shared lock on pgss->lock.
      32                 :  * Rewriting the entire external query-text file, eg for garbage collection,
      33                 :  * requires holding pgss->lock exclusively; this allows individual entries
      34                 :  * in the file to be read or written while holding only shared lock.
      35                 :  *
      36                 :  *
      37                 :  * Copyright (c) 2008-2023, PostgreSQL Global Development Group
      38                 :  *
      39                 :  * IDENTIFICATION
      40                 :  *    contrib/pg_stat_statements/pg_stat_statements.c
      41                 :  *
      42                 :  *-------------------------------------------------------------------------
      43                 :  */
      44                 : #include "postgres.h"
      45                 : 
      46                 : #include <math.h>
      47                 : #include <sys/stat.h>
      48                 : #include <unistd.h>
      49                 : 
      50                 : #include "access/parallel.h"
      51                 : #include "catalog/pg_authid.h"
      52                 : #include "common/hashfn.h"
      53                 : #include "executor/instrument.h"
      54                 : #include "funcapi.h"
      55                 : #include "jit/jit.h"
      56                 : #include "mb/pg_wchar.h"
      57                 : #include "miscadmin.h"
      58                 : #include "nodes/queryjumble.h"
      59                 : #include "optimizer/planner.h"
      60                 : #include "parser/analyze.h"
      61                 : #include "parser/parsetree.h"
      62                 : #include "parser/scanner.h"
      63                 : #include "parser/scansup.h"
      64                 : #include "pgstat.h"
      65                 : #include "storage/fd.h"
      66                 : #include "storage/ipc.h"
      67                 : #include "storage/lwlock.h"
      68                 : #include "storage/shmem.h"
      69                 : #include "storage/spin.h"
      70                 : #include "tcop/utility.h"
      71                 : #include "utils/acl.h"
      72                 : #include "utils/builtins.h"
      73                 : #include "utils/memutils.h"
      74                 : #include "utils/timestamp.h"
      75                 : 
      76 CBC           4 : PG_MODULE_MAGIC;
      77                 : 
      78                 : /* Location of permanent stats file (valid when database is shut down) */
      79                 : #define PGSS_DUMP_FILE  PGSTAT_STAT_PERMANENT_DIRECTORY "/pg_stat_statements.stat"
      80                 : 
      81                 : /*
      82                 :  * Location of external query text file.
      83                 :  */
      84                 : #define PGSS_TEXT_FILE  PG_STAT_TMP_DIR "/pgss_query_texts.stat"
      85                 : 
      86                 : /* Magic number identifying the stats file format */
      87                 : static const uint32 PGSS_FILE_HEADER = 0x20220408;
      88                 : 
      89                 : /* PostgreSQL major version number, changes in which invalidate all entries */
      90                 : static const uint32 PGSS_PG_MAJOR_VERSION = PG_VERSION_NUM / 100;
      91                 : 
      92                 : /* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */
      93                 : #define USAGE_EXEC(duration)    (1.0)
      94                 : #define USAGE_INIT              (1.0)   /* including initial planning */
      95                 : #define ASSUMED_MEDIAN_INIT     (10.0)  /* initial assumed median usage */
      96                 : #define ASSUMED_LENGTH_INIT     1024    /* initial assumed mean query length */
      97                 : #define USAGE_DECREASE_FACTOR   (0.99)  /* decreased every entry_dealloc */
      98                 : #define STICKY_DECREASE_FACTOR  (0.50)  /* factor for sticky entries */
      99                 : #define USAGE_DEALLOC_PERCENT   5   /* free this % of entries at once */
     100                 : #define IS_STICKY(c)    ((c.calls[PGSS_PLAN] + c.calls[PGSS_EXEC]) == 0)
     101                 : 
     102                 : /*
     103                 :  * Utility statements that pgss_ProcessUtility and pgss_post_parse_analyze
     104                 :  * ignores.
     105                 :  */
     106                 : #define PGSS_HANDLED_UTILITY(n)     (!IsA(n, ExecuteStmt) && \
     107                 :                                     !IsA(n, PrepareStmt) && \
     108                 :                                     !IsA(n, DeallocateStmt))
     109                 : 
     110                 : /*
     111                 :  * Extension version number, for supporting older extension versions' objects
     112                 :  */
     113                 : typedef enum pgssVersion
     114                 : {
     115                 :     PGSS_V1_0 = 0,
     116                 :     PGSS_V1_1,
     117                 :     PGSS_V1_2,
     118                 :     PGSS_V1_3,
     119                 :     PGSS_V1_8,
     120                 :     PGSS_V1_9,
     121                 :     PGSS_V1_10
     122                 : } pgssVersion;
     123                 : 
     124                 : typedef enum pgssStoreKind
     125                 : {
     126                 :     PGSS_INVALID = -1,
     127                 : 
     128                 :     /*
     129                 :      * PGSS_PLAN and PGSS_EXEC must be respectively 0 and 1 as they're used to
     130                 :      * reference the underlying values in the arrays in the Counters struct,
     131                 :      * and this order is required in pg_stat_statements_internal().
     132                 :      */
     133                 :     PGSS_PLAN = 0,
     134                 :     PGSS_EXEC,
     135                 : 
     136                 :     PGSS_NUMKIND                /* Must be last value of this enum */
     137                 : } pgssStoreKind;
     138                 : 
     139                 : /*
     140                 :  * Hashtable key that defines the identity of a hashtable entry.  We separate
     141                 :  * queries by user and by database even if they are otherwise identical.
     142                 :  *
     143                 :  * If you add a new key to this struct, make sure to teach pgss_store() to
     144                 :  * zero the padding bytes.  Otherwise, things will break, because pgss_hash is
     145                 :  * created using HASH_BLOBS, and thus tag_hash is used to hash this.
     146                 : 
     147                 :  */
     148                 : typedef struct pgssHashKey
     149                 : {
     150                 :     Oid         userid;         /* user OID */
     151                 :     Oid         dbid;           /* database OID */
     152                 :     uint64      queryid;        /* query identifier */
     153                 :     bool        toplevel;       /* query executed at top level */
     154                 : } pgssHashKey;
     155                 : 
     156                 : /*
     157                 :  * The actual stats counters kept within pgssEntry.
     158                 :  */
     159                 : typedef struct Counters
     160                 : {
     161                 :     int64       calls[PGSS_NUMKIND];    /* # of times planned/executed */
     162                 :     double      total_time[PGSS_NUMKIND];   /* total planning/execution time,
     163                 :                                              * in msec */
     164                 :     double      min_time[PGSS_NUMKIND]; /* minimum planning/execution time in
     165                 :                                          * msec */
     166                 :     double      max_time[PGSS_NUMKIND]; /* maximum planning/execution time in
     167                 :                                          * msec */
     168                 :     double      mean_time[PGSS_NUMKIND];    /* mean planning/execution time in
     169                 :                                              * msec */
     170                 :     double      sum_var_time[PGSS_NUMKIND]; /* sum of variances in
     171                 :                                              * planning/execution time in msec */
     172                 :     int64       rows;           /* total # of retrieved or affected rows */
     173                 :     int64       shared_blks_hit;    /* # of shared buffer hits */
     174                 :     int64       shared_blks_read;   /* # of shared disk blocks read */
     175                 :     int64       shared_blks_dirtied;    /* # of shared disk blocks dirtied */
     176                 :     int64       shared_blks_written;    /* # of shared disk blocks written */
     177                 :     int64       local_blks_hit; /* # of local buffer hits */
     178                 :     int64       local_blks_read;    /* # of local disk blocks read */
     179                 :     int64       local_blks_dirtied; /* # of local disk blocks dirtied */
     180                 :     int64       local_blks_written; /* # of local disk blocks written */
     181                 :     int64       temp_blks_read; /* # of temp blocks read */
     182                 :     int64       temp_blks_written;  /* # of temp blocks written */
     183                 :     double      blk_read_time;  /* time spent reading blocks, in msec */
     184                 :     double      blk_write_time; /* time spent writing blocks, in msec */
     185                 :     double      temp_blk_read_time; /* time spent reading temp blocks, in msec */
     186                 :     double      temp_blk_write_time;    /* time spent writing temp blocks, in
     187                 :                                          * msec */
     188                 :     double      usage;          /* usage factor */
     189                 :     int64       wal_records;    /* # of WAL records generated */
     190                 :     int64       wal_fpi;        /* # of WAL full page images generated */
     191                 :     uint64      wal_bytes;      /* total amount of WAL generated in bytes */
     192                 :     int64       jit_functions;  /* total number of JIT functions emitted */
     193                 :     double      jit_generation_time;    /* total time to generate jit code */
     194                 :     int64       jit_inlining_count; /* number of times inlining time has been
     195                 :                                      * > 0 */
     196                 :     double      jit_inlining_time;  /* total time to inline jit code */
     197                 :     int64       jit_optimization_count; /* number of times optimization time
     198                 :                                          * has been > 0 */
     199                 :     double      jit_optimization_time;  /* total time to optimize jit code */
     200                 :     int64       jit_emission_count; /* number of times emission time has been
     201                 :                                      * > 0 */
     202                 :     double      jit_emission_time;  /* total time to emit jit code */
     203                 : } Counters;
     204                 : 
     205                 : /*
     206                 :  * Global statistics for pg_stat_statements
     207                 :  */
     208                 : typedef struct pgssGlobalStats
     209                 : {
     210                 :     int64       dealloc;        /* # of times entries were deallocated */
     211                 :     TimestampTz stats_reset;    /* timestamp with all stats reset */
     212                 : } pgssGlobalStats;
     213                 : 
     214                 : /*
     215                 :  * Statistics per statement
     216                 :  *
     217                 :  * Note: in event of a failure in garbage collection of the query text file,
     218                 :  * we reset query_offset to zero and query_len to -1.  This will be seen as
     219                 :  * an invalid state by qtext_fetch().
     220                 :  */
     221                 : typedef struct pgssEntry
     222                 : {
     223                 :     pgssHashKey key;            /* hash key of entry - MUST BE FIRST */
     224                 :     Counters    counters;       /* the statistics for this query */
     225                 :     Size        query_offset;   /* query text offset in external file */
     226                 :     int         query_len;      /* # of valid bytes in query string, or -1 */
     227                 :     int         encoding;       /* query text encoding */
     228                 :     slock_t     mutex;          /* protects the counters only */
     229                 : } pgssEntry;
     230                 : 
     231                 : /*
     232                 :  * Global shared state
     233                 :  */
     234                 : typedef struct pgssSharedState
     235                 : {
     236                 :     LWLock     *lock;           /* protects hashtable search/modification */
     237                 :     double      cur_median_usage;   /* current median usage in hashtable */
     238                 :     Size        mean_query_len; /* current mean entry text length */
     239                 :     slock_t     mutex;          /* protects following fields only: */
     240                 :     Size        extent;         /* current extent of query file */
     241                 :     int         n_writers;      /* number of active writers to query file */
     242                 :     int         gc_count;       /* query file garbage collection cycle count */
     243                 :     pgssGlobalStats stats;      /* global statistics for pgss */
     244                 : } pgssSharedState;
     245                 : 
     246                 : /*---- Local variables ----*/
     247                 : 
     248                 : /* Current nesting depth of ExecutorRun+ProcessUtility calls */
     249                 : static int  exec_nested_level = 0;
     250                 : 
     251                 : /* Current nesting depth of planner calls */
     252                 : static int  plan_nested_level = 0;
     253                 : 
     254                 : /* Saved hook values in case of unload */
     255                 : static shmem_request_hook_type prev_shmem_request_hook = NULL;
     256                 : static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
     257                 : static post_parse_analyze_hook_type prev_post_parse_analyze_hook = NULL;
     258                 : static planner_hook_type prev_planner_hook = NULL;
     259                 : static ExecutorStart_hook_type prev_ExecutorStart = NULL;
     260                 : static ExecutorRun_hook_type prev_ExecutorRun = NULL;
     261                 : static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
     262                 : static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
     263                 : static ProcessUtility_hook_type prev_ProcessUtility = NULL;
     264                 : 
     265                 : /* Links to shared memory state */
     266                 : static pgssSharedState *pgss = NULL;
     267                 : static HTAB *pgss_hash = NULL;
     268                 : 
     269                 : /*---- GUC variables ----*/
     270                 : 
     271                 : typedef enum
     272                 : {
     273                 :     PGSS_TRACK_NONE,            /* track no statements */
     274                 :     PGSS_TRACK_TOP,             /* only top level statements */
     275                 :     PGSS_TRACK_ALL              /* all statements, including nested ones */
     276                 : }           PGSSTrackLevel;
     277                 : 
     278                 : static const struct config_enum_entry track_options[] =
     279                 : {
     280                 :     {"none", PGSS_TRACK_NONE, false},
     281                 :     {"top", PGSS_TRACK_TOP, false},
     282                 :     {"all", PGSS_TRACK_ALL, false},
     283                 :     {NULL, 0, false}
     284                 : };
     285                 : 
     286                 : static int  pgss_max = 5000;    /* max # statements to track */
     287                 : static int  pgss_track = PGSS_TRACK_TOP;    /* tracking level */
     288                 : static bool pgss_track_utility = true;  /* whether to track utility commands */
     289                 : static bool pgss_track_planning = false;    /* whether to track planning
     290                 :                                              * duration */
     291                 : static bool pgss_save = true;   /* whether to save stats across shutdown */
     292                 : 
     293                 : 
     294                 : #define pgss_enabled(level) \
     295                 :     (!IsParallelWorker() && \
     296                 :     (pgss_track == PGSS_TRACK_ALL || \
     297                 :     (pgss_track == PGSS_TRACK_TOP && (level) == 0)))
     298                 : 
     299                 : #define record_gc_qtexts() \
     300                 :     do { \
     301                 :         volatile pgssSharedState *s = (volatile pgssSharedState *) pgss; \
     302                 :         SpinLockAcquire(&s->mutex); \
     303                 :         s->gc_count++; \
     304                 :         SpinLockRelease(&s->mutex); \
     305                 :     } while(0)
     306                 : 
     307                 : /*---- Function declarations ----*/
     308                 : 
     309               5 : PG_FUNCTION_INFO_V1(pg_stat_statements_reset);
     310 GBC          12 : PG_FUNCTION_INFO_V1(pg_stat_statements_reset_1_7);
     311 LBC           0 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_2);
     312 CBC           5 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_3);
     313               4 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_8);
     314               5 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_9);
     315 GBC          14 : PG_FUNCTION_INFO_V1(pg_stat_statements_1_10);
     316 LBC           0 : PG_FUNCTION_INFO_V1(pg_stat_statements);
     317 GIC           5 : PG_FUNCTION_INFO_V1(pg_stat_statements_info);
     318                 : 
     319                 : static void pgss_shmem_request(void);
     320                 : static void pgss_shmem_startup(void);
     321                 : static void pgss_shmem_shutdown(int code, Datum arg);
     322                 : static void pgss_post_parse_analyze(ParseState *pstate, Query *query,
     323                 :                                     JumbleState *jstate);
     324                 : static PlannedStmt *pgss_planner(Query *parse,
     325                 :                                  const char *query_string,
     326                 :                                  int cursorOptions,
     327                 :                                  ParamListInfo boundParams);
     328                 : static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags);
     329                 : static void pgss_ExecutorRun(QueryDesc *queryDesc,
     330                 :                              ScanDirection direction,
     331                 :                              uint64 count, bool execute_once);
     332                 : static void pgss_ExecutorFinish(QueryDesc *queryDesc);
     333                 : static void pgss_ExecutorEnd(QueryDesc *queryDesc);
     334                 : static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
     335                 :                                 bool readOnlyTree,
     336                 :                                 ProcessUtilityContext context, ParamListInfo params,
     337                 :                                 QueryEnvironment *queryEnv,
     338                 :                                 DestReceiver *dest, QueryCompletion *qc);
     339                 : static void pgss_store(const char *query, uint64 queryId,
     340                 :                        int query_location, int query_len,
     341                 :                        pgssStoreKind kind,
     342                 :                        double total_time, uint64 rows,
     343                 :                        const BufferUsage *bufusage,
     344                 :                        const WalUsage *walusage,
     345                 :                        const struct JitInstrumentation *jitusage,
     346                 :                        JumbleState *jstate);
     347                 : static void pg_stat_statements_internal(FunctionCallInfo fcinfo,
     348                 :                                         pgssVersion api_version,
     349                 :                                         bool showtext);
     350                 : static Size pgss_memsize(void);
     351                 : static pgssEntry *entry_alloc(pgssHashKey *key, Size query_offset, int query_len,
     352                 :                               int encoding, bool sticky);
     353                 : static void entry_dealloc(void);
     354                 : static bool qtext_store(const char *query, int query_len,
     355                 :                         Size *query_offset, int *gc_count);
     356                 : static char *qtext_load_file(Size *buffer_size);
     357                 : static char *qtext_fetch(Size query_offset, int query_len,
     358                 :                          char *buffer, Size buffer_size);
     359                 : static bool need_gc_qtexts(void);
     360                 : static void gc_qtexts(void);
     361                 : static void entry_reset(Oid userid, Oid dbid, uint64 queryid);
     362                 : static char *generate_normalized_query(JumbleState *jstate, const char *query,
     363                 :                                        int query_loc, int *query_len_p);
     364                 : static void fill_in_constant_lengths(JumbleState *jstate, const char *query,
     365                 :                                      int query_loc);
     366                 : static int  comp_location(const void *a, const void *b);
     367                 : 
     368                 : 
     369                 : /*
     370                 :  * Module load callback
     371                 :  */
     372 ECB             : void
     373 GIC           4 : _PG_init(void)
     374                 : {
     375                 :     /*
     376                 :      * In order to create our shared memory area, we have to be loaded via
     377                 :      * shared_preload_libraries.  If not, fall out without hooking into any of
     378                 :      * the main system.  (We don't throw error here because it seems useful to
     379                 :      * allow the pg_stat_statements functions to be created even when the
     380                 :      * module isn't active.  The functions must protect themselves against
     381                 :      * being called then, however.)
     382 ECB             :      */
     383 CBC           4 :     if (!process_shared_preload_libraries_in_progress)
     384 GIC           1 :         return;
     385                 : 
     386                 :     /*
     387                 :      * Inform the postmaster that we want to enable query_id calculation if
     388                 :      * compute_query_id is set to auto.
     389 ECB             :      */
     390 GIC           3 :     EnableQueryId();
     391                 : 
     392                 :     /*
     393                 :      * Define (or redefine) custom GUC variables.
     394 ECB             :      */
     395 GIC           3 :     DefineCustomIntVariable("pg_stat_statements.max",
     396                 :                             "Sets the maximum number of statements tracked by pg_stat_statements.",
     397                 :                             NULL,
     398                 :                             &pgss_max,
     399                 :                             5000,
     400                 :                             100,
     401                 :                             INT_MAX / 2,
     402                 :                             PGC_POSTMASTER,
     403                 :                             0,
     404                 :                             NULL,
     405                 :                             NULL,
     406                 :                             NULL);
     407 ECB             : 
     408 GIC           3 :     DefineCustomEnumVariable("pg_stat_statements.track",
     409                 :                              "Selects which statements are tracked by pg_stat_statements.",
     410                 :                              NULL,
     411                 :                              &pgss_track,
     412                 :                              PGSS_TRACK_TOP,
     413                 :                              track_options,
     414                 :                              PGC_SUSET,
     415                 :                              0,
     416                 :                              NULL,
     417                 :                              NULL,
     418                 :                              NULL);
     419 ECB             : 
     420 GIC           3 :     DefineCustomBoolVariable("pg_stat_statements.track_utility",
     421                 :                              "Selects whether utility commands are tracked by pg_stat_statements.",
     422                 :                              NULL,
     423                 :                              &pgss_track_utility,
     424                 :                              true,
     425                 :                              PGC_SUSET,
     426                 :                              0,
     427                 :                              NULL,
     428                 :                              NULL,
     429                 :                              NULL);
     430 ECB             : 
     431 GIC           3 :     DefineCustomBoolVariable("pg_stat_statements.track_planning",
     432                 :                              "Selects whether planning duration is tracked by pg_stat_statements.",
     433                 :                              NULL,
     434                 :                              &pgss_track_planning,
     435                 :                              false,
     436                 :                              PGC_SUSET,
     437                 :                              0,
     438                 :                              NULL,
     439                 :                              NULL,
     440                 :                              NULL);
     441 ECB             : 
     442 GIC           3 :     DefineCustomBoolVariable("pg_stat_statements.save",
     443                 :                              "Save pg_stat_statements statistics across server shutdowns.",
     444                 :                              NULL,
     445                 :                              &pgss_save,
     446                 :                              true,
     447                 :                              PGC_SIGHUP,
     448                 :                              0,
     449                 :                              NULL,
     450                 :                              NULL,
     451                 :                              NULL);
     452 ECB             : 
     453 GIC           3 :     MarkGUCPrefixReserved("pg_stat_statements");
     454                 : 
     455                 :     /*
     456                 :      * Install hooks.
     457 ECB             :      */
     458 CBC           3 :     prev_shmem_request_hook = shmem_request_hook;
     459               3 :     shmem_request_hook = pgss_shmem_request;
     460               3 :     prev_shmem_startup_hook = shmem_startup_hook;
     461               3 :     shmem_startup_hook = pgss_shmem_startup;
     462               3 :     prev_post_parse_analyze_hook = post_parse_analyze_hook;
     463               3 :     post_parse_analyze_hook = pgss_post_parse_analyze;
     464               3 :     prev_planner_hook = planner_hook;
     465               3 :     planner_hook = pgss_planner;
     466               3 :     prev_ExecutorStart = ExecutorStart_hook;
     467               3 :     ExecutorStart_hook = pgss_ExecutorStart;
     468               3 :     prev_ExecutorRun = ExecutorRun_hook;
     469               3 :     ExecutorRun_hook = pgss_ExecutorRun;
     470               3 :     prev_ExecutorFinish = ExecutorFinish_hook;
     471               3 :     ExecutorFinish_hook = pgss_ExecutorFinish;
     472               3 :     prev_ExecutorEnd = ExecutorEnd_hook;
     473               3 :     ExecutorEnd_hook = pgss_ExecutorEnd;
     474               3 :     prev_ProcessUtility = ProcessUtility_hook;
     475 GIC           3 :     ProcessUtility_hook = pgss_ProcessUtility;
     476                 : }
     477                 : 
     478                 : /*
     479                 :  * shmem_request hook: request additional shared resources.  We'll allocate or
     480                 :  * attach to the shared resources in pgss_shmem_startup().
     481                 :  */
     482 ECB             : static void
     483 GIC           3 : pgss_shmem_request(void)
     484 ECB             : {
     485 GBC           3 :     if (prev_shmem_request_hook)
     486 UIC           0 :         prev_shmem_request_hook();
     487 ECB             : 
     488 CBC           3 :     RequestAddinShmemSpace(pgss_memsize());
     489               3 :     RequestNamedLWLockTranche("pg_stat_statements", 1);
     490 GIC           3 : }
     491                 : 
     492                 : /*
     493                 :  * shmem_startup hook: allocate or attach to shared memory,
     494                 :  * then load any pre-existing statistics from file.
     495                 :  * Also create and load the query-texts file, which is expected to exist
     496                 :  * (even if empty) while the module is enabled.
     497                 :  */
     498 ECB             : static void
     499 GIC           3 : pgss_shmem_startup(void)
     500                 : {
     501                 :     bool        found;
     502 ECB             :     HASHCTL     info;
     503 CBC           3 :     FILE       *file = NULL;
     504 GIC           3 :     FILE       *qfile = NULL;
     505                 :     uint32      header;
     506                 :     int32       num;
     507                 :     int32       pgver;
     508                 :     int32       i;
     509 ECB             :     int         buffer_size;
     510 GIC           3 :     char       *buffer = NULL;
     511 ECB             : 
     512 GBC           3 :     if (prev_shmem_startup_hook)
     513 UIC           0 :         prev_shmem_startup_hook();
     514                 : 
     515 ECB             :     /* reset in case this is a restart within the postmaster */
     516 CBC           3 :     pgss = NULL;
     517 GIC           3 :     pgss_hash = NULL;
     518                 : 
     519                 :     /*
     520                 :      * Create or attach to the shared memory state, including hash table
     521 ECB             :      */
     522 GIC           3 :     LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
     523 ECB             : 
     524 GIC           3 :     pgss = ShmemInitStruct("pg_stat_statements",
     525                 :                            sizeof(pgssSharedState),
     526                 :                            &found);
     527 ECB             : 
     528 GIC           3 :     if (!found)
     529                 :     {
     530 ECB             :         /* First time through ... */
     531 CBC           3 :         pgss->lock = &(GetNamedLWLockTranche("pg_stat_statements"))->lock;
     532               3 :         pgss->cur_median_usage = ASSUMED_MEDIAN_INIT;
     533               3 :         pgss->mean_query_len = ASSUMED_LENGTH_INIT;
     534               3 :         SpinLockInit(&pgss->mutex);
     535               3 :         pgss->extent = 0;
     536               3 :         pgss->n_writers = 0;
     537               3 :         pgss->gc_count = 0;
     538               3 :         pgss->stats.dealloc = 0;
     539 GIC           3 :         pgss->stats.stats_reset = GetCurrentTimestamp();
     540                 :     }
     541 ECB             : 
     542 CBC           3 :     info.keysize = sizeof(pgssHashKey);
     543               3 :     info.entrysize = sizeof(pgssEntry);
     544 GIC           3 :     pgss_hash = ShmemInitHash("pg_stat_statements hash",
     545                 :                               pgss_max, pgss_max,
     546                 :                               &info,
     547                 :                               HASH_ELEM | HASH_BLOBS);
     548 ECB             : 
     549 GIC           3 :     LWLockRelease(AddinShmemInitLock);
     550                 : 
     551                 :     /*
     552                 :      * If we're in the postmaster (or a standalone backend...), set up a shmem
     553                 :      * exit hook to dump the statistics to disk.
     554 ECB             :      */
     555 CBC           3 :     if (!IsUnderPostmaster)
     556 GIC           3 :         on_shmem_exit(pgss_shmem_shutdown, (Datum) 0);
     557                 : 
     558                 :     /*
     559                 :      * Done if some other process already completed our initialization.
     560 ECB             :      */
     561 CBC           3 :     if (found)
     562 GIC           3 :         return;
     563                 : 
     564                 :     /*
     565                 :      * Note: we don't bother with locks here, because there should be no other
     566                 :      * processes running when this code is reached.
     567                 :      */
     568                 : 
     569 ECB             :     /* Unlink query text file possibly left over from crash */
     570 GIC           3 :     unlink(PGSS_TEXT_FILE);
     571                 : 
     572 ECB             :     /* Allocate new query text temp file */
     573 CBC           3 :     qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
     574 GBC           3 :     if (qfile == NULL)
     575 UIC           0 :         goto write_error;
     576                 : 
     577                 :     /*
     578                 :      * If we were told not to load old statistics, we're done.  (Note we do
     579                 :      * not try to unlink any old dump file in this case.  This seems a bit
     580                 :      * questionable but it's the historical behavior.)
     581 ECB             :      */
     582 GIC           3 :     if (!pgss_save)
     583 EUB             :     {
     584 UBC           0 :         FreeFile(qfile);
     585 UIC           0 :         return;
     586                 :     }
     587                 : 
     588                 :     /*
     589                 :      * Attempt to load old statistics from the dump file.
     590 ECB             :      */
     591 CBC           3 :     file = AllocateFile(PGSS_DUMP_FILE, PG_BINARY_R);
     592 GIC           3 :     if (file == NULL)
     593 ECB             :     {
     594 GBC           3 :         if (errno != ENOENT)
     595 UIC           0 :             goto read_error;
     596 ECB             :         /* No existing persisted stats file, so we're done */
     597 CBC           3 :         FreeFile(qfile);
     598 GIC           3 :         return;
     599                 :     }
     600 EUB             : 
     601 UBC           0 :     buffer_size = 2048;
     602 UIC           0 :     buffer = (char *) palloc(buffer_size);
     603 EUB             : 
     604 UBC           0 :     if (fread(&header, sizeof(uint32), 1, file) != 1 ||
     605               0 :         fread(&pgver, sizeof(uint32), 1, file) != 1 ||
     606               0 :         fread(&num, sizeof(int32), 1, file) != 1)
     607 UIC           0 :         goto read_error;
     608 EUB             : 
     609 UBC           0 :     if (header != PGSS_FILE_HEADER ||
     610               0 :         pgver != PGSS_PG_MAJOR_VERSION)
     611 UIC           0 :         goto data_error;
     612 EUB             : 
     613 UIC           0 :     for (i = 0; i < num; i++)
     614                 :     {
     615                 :         pgssEntry   temp;
     616                 :         pgssEntry  *entry;
     617                 :         Size        query_offset;
     618 EUB             : 
     619 UBC           0 :         if (fread(&temp, sizeof(pgssEntry), 1, file) != 1)
     620 UIC           0 :             goto read_error;
     621                 : 
     622 EUB             :         /* Encoding is the only field we can easily sanity-check */
     623 UBC           0 :         if (!PG_VALID_BE_ENCODING(temp.encoding))
     624 UIC           0 :             goto data_error;
     625                 : 
     626 EUB             :         /* Resize buffer as needed */
     627 UIC           0 :         if (temp.query_len >= buffer_size)
     628 EUB             :         {
     629 UBC           0 :             buffer_size = Max(buffer_size * 2, temp.query_len + 1);
     630 UIC           0 :             buffer = repalloc(buffer, buffer_size);
     631                 :         }
     632 EUB             : 
     633 UBC           0 :         if (fread(buffer, 1, temp.query_len + 1, file) != temp.query_len + 1)
     634 UIC           0 :             goto read_error;
     635                 : 
     636 EUB             :         /* Should have a trailing null, but let's make sure */
     637 UIC           0 :         buffer[temp.query_len] = '\0';
     638                 : 
     639 EUB             :         /* Skip loading "sticky" entries */
     640 UBC           0 :         if (IS_STICKY(temp.counters))
     641 UIC           0 :             continue;
     642                 : 
     643 EUB             :         /* Store the query text */
     644 UBC           0 :         query_offset = pgss->extent;
     645               0 :         if (fwrite(buffer, 1, temp.query_len + 1, qfile) != temp.query_len + 1)
     646               0 :             goto write_error;
     647 UIC           0 :         pgss->extent += temp.query_len + 1;
     648                 : 
     649 EUB             :         /* make the hashtable entry (discards old entries if too many) */
     650 UIC           0 :         entry = entry_alloc(&temp.key, query_offset, temp.query_len,
     651                 :                             temp.encoding,
     652                 :                             false);
     653                 : 
     654 EUB             :         /* copy in the actual stats */
     655 UIC           0 :         entry->counters = temp.counters;
     656                 :     }
     657                 : 
     658 EUB             :     /* Read global statistics for pg_stat_statements */
     659 UBC           0 :     if (fread(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
     660 UIC           0 :         goto read_error;
     661 EUB             : 
     662 UBC           0 :     pfree(buffer);
     663               0 :     FreeFile(file);
     664 UIC           0 :     FreeFile(qfile);
     665                 : 
     666                 :     /*
     667                 :      * Remove the persisted stats file so it's not included in
     668                 :      * backups/replication standbys, etc.  A new file will be written on next
     669                 :      * shutdown.
     670                 :      *
     671                 :      * Note: it's okay if the PGSS_TEXT_FILE is included in a basebackup,
     672                 :      * because we remove that file on startup; it acts inversely to
     673                 :      * PGSS_DUMP_FILE, in that it is only supposed to be around when the
     674                 :      * server is running, whereas PGSS_DUMP_FILE is only supposed to be around
     675                 :      * when the server is not running.  Leaving the file creates no danger of
     676                 :      * a newly restored database having a spurious record of execution costs,
     677                 :      * which is what we're really concerned about here.
     678 EUB             :      */
     679 UIC           0 :     unlink(PGSS_DUMP_FILE);
     680 EUB             : 
     681 UIC           0 :     return;
     682 EUB             : 
     683 UBC           0 : read_error:
     684 UIC           0 :     ereport(LOG,
     685                 :             (errcode_for_file_access(),
     686                 :              errmsg("could not read file \"%s\": %m",
     687 EUB             :                     PGSS_DUMP_FILE)));
     688 UBC           0 :     goto fail;
     689               0 : data_error:
     690 UIC           0 :     ereport(LOG,
     691                 :             (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     692                 :              errmsg("ignoring invalid data in file \"%s\"",
     693 EUB             :                     PGSS_DUMP_FILE)));
     694 UBC           0 :     goto fail;
     695               0 : write_error:
     696 UIC           0 :     ereport(LOG,
     697                 :             (errcode_for_file_access(),
     698                 :              errmsg("could not write file \"%s\": %m",
     699 EUB             :                     PGSS_TEXT_FILE)));
     700 UBC           0 : fail:
     701               0 :     if (buffer)
     702               0 :         pfree(buffer);
     703               0 :     if (file)
     704               0 :         FreeFile(file);
     705               0 :     if (qfile)
     706 UIC           0 :         FreeFile(qfile);
     707 EUB             :     /* If possible, throw away the bogus file; ignore any error */
     708 UIC           0 :     unlink(PGSS_DUMP_FILE);
     709                 : 
     710                 :     /*
     711                 :      * Don't unlink PGSS_TEXT_FILE here; it should always be around while the
     712                 :      * server is running with pg_stat_statements enabled
     713                 :      */
     714                 : }
     715                 : 
     716                 : /*
     717                 :  * shmem_shutdown hook: Dump statistics into file.
     718                 :  *
     719                 :  * Note: we don't bother with acquiring lock, because there should be no
     720                 :  * other processes running when this is called.
     721                 :  */
     722 ECB             : static void
     723 GIC           3 : pgss_shmem_shutdown(int code, Datum arg)
     724                 : {
     725 ECB             :     FILE       *file;
     726 CBC           3 :     char       *qbuffer = NULL;
     727 GIC           3 :     Size        qbuffer_size = 0;
     728                 :     HASH_SEQ_STATUS hash_seq;
     729                 :     int32       num_entries;
     730                 :     pgssEntry  *entry;
     731                 : 
     732 ECB             :     /* Don't try to dump during a crash. */
     733 CBC           3 :     if (code)
     734 GIC           3 :         return;
     735                 : 
     736 ECB             :     /* Safety check ... shouldn't get here unless shmem is set up. */
     737 GBC           3 :     if (!pgss || !pgss_hash)
     738 UIC           0 :         return;
     739                 : 
     740 ECB             :     /* Don't dump if told not to. */
     741 GBC           3 :     if (!pgss_save)
     742 UIC           0 :         return;
     743 ECB             : 
     744 CBC           3 :     file = AllocateFile(PGSS_DUMP_FILE ".tmp", PG_BINARY_W);
     745 GBC           3 :     if (file == NULL)
     746 UIC           0 :         goto error;
     747 ECB             : 
     748 GBC           3 :     if (fwrite(&PGSS_FILE_HEADER, sizeof(uint32), 1, file) != 1)
     749 LBC           0 :         goto error;
     750 GBC           3 :     if (fwrite(&PGSS_PG_MAJOR_VERSION, sizeof(uint32), 1, file) != 1)
     751 LBC           0 :         goto error;
     752 CBC           3 :     num_entries = hash_get_num_entries(pgss_hash);
     753 GBC           3 :     if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
     754 UIC           0 :         goto error;
     755 ECB             : 
     756 CBC           3 :     qbuffer = qtext_load_file(&qbuffer_size);
     757 GBC           3 :     if (qbuffer == NULL)
     758 UIC           0 :         goto error;
     759                 : 
     760                 :     /*
     761                 :      * When serializing to disk, we store query texts immediately after their
     762                 :      * entry data.  Any orphaned query texts are thereby excluded.
     763 ECB             :      */
     764 CBC           3 :     hash_seq_init(&hash_seq, pgss_hash);
     765 GIC       24493 :     while ((entry = hash_seq_search(&hash_seq)) != NULL)
     766 ECB             :     {
     767 CBC       24490 :         int         len = entry->query_len;
     768 GIC       24490 :         char       *qstr = qtext_fetch(entry->query_offset, len,
     769                 :                                        qbuffer, qbuffer_size);
     770 ECB             : 
     771 GBC       24490 :         if (qstr == NULL)
     772 UIC           0 :             continue;           /* Ignore any entries with bogus texts */
     773 ECB             : 
     774 CBC       24490 :         if (fwrite(entry, sizeof(pgssEntry), 1, file) != 1 ||
     775 GIC       24490 :             fwrite(qstr, 1, len + 1, file) != len + 1)
     776                 :         {
     777 EUB             :             /* note: we assume hash_seq_term won't change errno */
     778 UBC           0 :             hash_seq_term(&hash_seq);
     779 UIC           0 :             goto error;
     780                 :         }
     781                 :     }
     782                 : 
     783 ECB             :     /* Dump global statistics for pg_stat_statements */
     784 GBC           3 :     if (fwrite(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
     785 UIC           0 :         goto error;
     786 ECB             : 
     787 CBC           3 :     free(qbuffer);
     788 GIC           3 :     qbuffer = NULL;
     789 ECB             : 
     790 GIC           3 :     if (FreeFile(file))
     791 EUB             :     {
     792 UBC           0 :         file = NULL;
     793 UIC           0 :         goto error;
     794                 :     }
     795                 : 
     796                 :     /*
     797                 :      * Rename file into place, so we atomically replace any old one.
     798 ECB             :      */
     799 GIC           3 :     (void) durable_rename(PGSS_DUMP_FILE ".tmp", PGSS_DUMP_FILE, LOG);
     800                 : 
     801 ECB             :     /* Unlink query-texts file; it's not needed while shutdown */
     802 GIC           3 :     unlink(PGSS_TEXT_FILE);
     803 ECB             : 
     804 GIC           3 :     return;
     805 EUB             : 
     806 UBC           0 : error:
     807 UIC           0 :     ereport(LOG,
     808                 :             (errcode_for_file_access(),
     809                 :              errmsg("could not write file \"%s\": %m",
     810 EUB             :                     PGSS_DUMP_FILE ".tmp")));
     811 UNC           0 :     free(qbuffer);
     812 UBC           0 :     if (file)
     813               0 :         FreeFile(file);
     814 UIC           0 :     unlink(PGSS_DUMP_FILE ".tmp");
     815               0 :     unlink(PGSS_TEXT_FILE);
     816                 : }
     817                 : 
     818                 : /*
     819                 :  * Post-parse-analysis hook: mark query with a queryId
     820 ECB             :  */
     821                 : static void
     822 CBC       60765 : pgss_post_parse_analyze(ParseState *pstate, Query *query, JumbleState *jstate)
     823 EUB             : {
     824 GIC       60765 :     if (prev_post_parse_analyze_hook)
     825 UIC           0 :         prev_post_parse_analyze_hook(pstate, query, jstate);
     826 ECB             : 
     827                 :     /* Safety check... */
     828 GIC       60765 :     if (!pgss || !pgss_hash || !pgss_enabled(exec_nested_level))
     829           11440 :         return;
     830                 : 
     831                 :     /*
     832                 :      * Clear queryId for prepared statements related utility, as those will
     833                 :      * inherit from the underlying statement's one (except DEALLOCATE which is
     834 ECB             :      * entirely untracked).
     835                 :      */
     836 CBC       49325 :     if (query->utilityStmt)
     837                 :     {
     838           23019 :         if (pgss_track_utility && !PGSS_HANDLED_UTILITY(query->utilityStmt))
     839                 :         {
     840            1481 :             query->queryId = UINT64CONST(0);
     841 GNC        1481 :             return;
     842                 :         }
     843                 :     }
     844                 : 
     845                 :     /*
     846                 :      * If query jumbling were able to identify any ignorable constants, we
     847                 :      * immediately create a hash table entry for the query, so that we can
     848                 :      * record the normalized form of the query string.  If there were no such
     849                 :      * constants, the normalized string would be the same as the query text
     850                 :      * anyway, so there's no need for an early entry.
     851                 :      */
     852 CBC       47844 :     if (jstate && jstate->clocations_count > 0)
     853           23708 :         pgss_store(pstate->p_sourcetext,
     854                 :                    query->queryId,
     855                 :                    query->stmt_location,
     856                 :                    query->stmt_len,
     857                 :                    PGSS_INVALID,
     858                 :                    0,
     859                 :                    0,
     860                 :                    NULL,
     861                 :                    NULL,
     862                 :                    NULL,
     863                 :                    jstate);
     864                 : }
     865                 : 
     866                 : /*
     867                 :  * Planner hook: forward to regular planner, but measure planning time
     868                 :  * if needed.
     869                 :  */
     870                 : static PlannedStmt *
     871           37043 : pgss_planner(Query *parse,
     872                 :              const char *query_string,
     873                 :              int cursorOptions,
     874                 :              ParamListInfo boundParams)
     875                 : {
     876                 :     PlannedStmt *result;
     877                 : 
     878                 :     /*
     879                 :      * We can't process the query if no query_string is provided, as
     880                 :      * pgss_store needs it.  We also ignore query without queryid, as it would
     881                 :      * be treated as a utility statement, which may not be the case.
     882                 :      *
     883                 :      * Note that planner_hook can be called from the planner itself, so we
     884                 :      * have a specific nesting level for the planner.  However, utility
     885                 :      * commands containing optimizable statements can also call the planner,
     886                 :      * same for regular DML (for instance for underlying foreign key queries).
     887                 :      * So testing the planner nesting level only is not enough to detect real
     888                 :      * top level planner call.
     889                 :      */
     890           37043 :     if (pgss_enabled(plan_nested_level + exec_nested_level)
     891           26463 :         && pgss_track_planning && query_string
     892              59 :         && parse->queryId != UINT64CONST(0))
     893              58 :     {
     894                 :         instr_time  start;
     895                 :         instr_time  duration;
     896                 :         BufferUsage bufusage_start,
     897                 :                     bufusage;
     898                 :         WalUsage    walusage_start,
     899                 :                     walusage;
     900                 : 
     901                 :         /* We need to track buffer usage as the planner can access them. */
     902              58 :         bufusage_start = pgBufferUsage;
     903                 : 
     904                 :         /*
     905                 :          * Similarly the planner could write some WAL records in some cases
     906                 :          * (e.g. setting a hint bit with those being WAL-logged)
     907                 :          */
     908              58 :         walusage_start = pgWalUsage;
     909              58 :         INSTR_TIME_SET_CURRENT(start);
     910                 : 
     911              58 :         plan_nested_level++;
     912              58 :         PG_TRY();
     913                 :         {
     914              58 :             if (prev_planner_hook)
     915 UBC           0 :                 result = prev_planner_hook(parse, query_string, cursorOptions,
     916                 :                                            boundParams);
     917                 :             else
     918 CBC          58 :                 result = standard_planner(parse, query_string, cursorOptions,
     919                 :                                           boundParams);
     920                 :         }
     921 UBC           0 :         PG_FINALLY();
     922                 :         {
     923 CBC          58 :             plan_nested_level--;
     924                 :         }
     925              58 :         PG_END_TRY();
     926                 : 
     927              58 :         INSTR_TIME_SET_CURRENT(duration);
     928              58 :         INSTR_TIME_SUBTRACT(duration, start);
     929                 : 
     930                 :         /* calc differences of buffer counters. */
     931              58 :         memset(&bufusage, 0, sizeof(BufferUsage));
     932              58 :         BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
     933                 : 
     934                 :         /* calc differences of WAL counters. */
     935              58 :         memset(&walusage, 0, sizeof(WalUsage));
     936              58 :         WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
     937                 : 
     938              58 :         pgss_store(query_string,
     939                 :                    parse->queryId,
     940                 :                    parse->stmt_location,
     941                 :                    parse->stmt_len,
     942                 :                    PGSS_PLAN,
     943              58 :                    INSTR_TIME_GET_MILLISEC(duration),
     944                 :                    0,
     945                 :                    &bufusage,
     946                 :                    &walusage,
     947                 :                    NULL,
     948                 :                    NULL);
     949                 :     }
     950                 :     else
     951                 :     {
     952           36985 :         if (prev_planner_hook)
     953 UBC           0 :             result = prev_planner_hook(parse, query_string, cursorOptions,
     954                 :                                        boundParams);
     955                 :         else
     956 CBC       36985 :             result = standard_planner(parse, query_string, cursorOptions,
     957                 :                                       boundParams);
     958                 :     }
     959                 : 
     960           36532 :     return result;
     961                 : }
     962                 : 
     963                 : /*
     964                 :  * ExecutorStart hook: start up tracking if needed
     965                 :  */
     966                 : static void
     967           43793 : pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
     968                 : {
     969           43793 :     if (prev_ExecutorStart)
     970 UBC           0 :         prev_ExecutorStart(queryDesc, eflags);
     971                 :     else
     972 CBC       43793 :         standard_ExecutorStart(queryDesc, eflags);
     973                 : 
     974                 :     /*
     975                 :      * If query has queryId zero, don't track it.  This prevents double
     976                 :      * counting of optimizable statements that are directly contained in
     977                 :      * utility statements.
     978                 :      */
     979           43530 :     if (pgss_enabled(exec_nested_level) && queryDesc->plannedstmt->queryId != UINT64CONST(0))
     980                 :     {
     981                 :         /*
     982                 :          * Set up to track total elapsed time in ExecutorRun.  Make sure the
     983                 :          * space is allocated in the per-query context so it will go away at
     984                 :          * ExecutorEnd.
     985                 :          */
     986           26765 :         if (queryDesc->totaltime == NULL)
     987                 :         {
     988                 :             MemoryContext oldcxt;
     989                 : 
     990           26765 :             oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
     991           26765 :             queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL, false);
     992           26765 :             MemoryContextSwitchTo(oldcxt);
     993                 :         }
     994                 :     }
     995           43530 : }
     996                 : 
     997                 : /*
     998                 :  * ExecutorRun hook: all we need do is track nesting depth
     999                 :  */
    1000                 : static void
    1001           42792 : pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count,
    1002                 :                  bool execute_once)
    1003                 : {
    1004           42792 :     exec_nested_level++;
    1005           42792 :     PG_TRY();
    1006                 :     {
    1007           42792 :         if (prev_ExecutorRun)
    1008 UBC           0 :             prev_ExecutorRun(queryDesc, direction, count, execute_once);
    1009                 :         else
    1010 CBC       42792 :             standard_ExecutorRun(queryDesc, direction, count, execute_once);
    1011                 :     }
    1012            3113 :     PG_FINALLY();
    1013                 :     {
    1014           42792 :         exec_nested_level--;
    1015                 :     }
    1016           42792 :     PG_END_TRY();
    1017           39679 : }
    1018                 : 
    1019                 : /*
    1020                 :  * ExecutorFinish hook: all we need do is track nesting depth
    1021                 :  */
    1022                 : static void
    1023           38050 : pgss_ExecutorFinish(QueryDesc *queryDesc)
    1024                 : {
    1025           38050 :     exec_nested_level++;
    1026           38050 :     PG_TRY();
    1027                 :     {
    1028           38050 :         if (prev_ExecutorFinish)
    1029 UBC           0 :             prev_ExecutorFinish(queryDesc);
    1030                 :         else
    1031 CBC       38050 :             standard_ExecutorFinish(queryDesc);
    1032                 :     }
    1033             131 :     PG_FINALLY();
    1034                 :     {
    1035           38050 :         exec_nested_level--;
    1036                 :     }
    1037           38050 :     PG_END_TRY();
    1038           37919 : }
    1039                 : 
    1040                 : /*
    1041                 :  * ExecutorEnd hook: store results if needed
    1042                 :  */
    1043                 : static void
    1044           40246 : pgss_ExecutorEnd(QueryDesc *queryDesc)
    1045                 : {
    1046           40246 :     uint64      queryId = queryDesc->plannedstmt->queryId;
    1047                 : 
    1048           40246 :     if (queryId != UINT64CONST(0) && queryDesc->totaltime &&
    1049           25681 :         pgss_enabled(exec_nested_level))
    1050                 :     {
    1051                 :         /*
    1052                 :          * Make sure stats accumulation is done.  (Note: it's okay if several
    1053                 :          * levels of hook all do this.)
    1054                 :          */
    1055           25681 :         InstrEndLoop(queryDesc->totaltime);
    1056                 : 
    1057           25583 :         pgss_store(queryDesc->sourceText,
    1058                 :                    queryId,
    1059           25681 :                    queryDesc->plannedstmt->stmt_location,
    1060           25681 :                    queryDesc->plannedstmt->stmt_len,
    1061                 :                    PGSS_EXEC,
    1062           25681 :                    queryDesc->totaltime->total * 1000.0,  /* convert to msec */
    1063 GNC       25681 :                    queryDesc->estate->es_total_processed,
    1064 CBC       25681 :                    &queryDesc->totaltime->bufusage,
    1065           25681 :                    &queryDesc->totaltime->walusage,
    1066           25681 :                    queryDesc->estate->es_jit ? &queryDesc->estate->es_jit->instr : NULL,
    1067                 :                    NULL);
    1068                 :     }
    1069                 : 
    1070           40246 :     if (prev_ExecutorEnd)
    1071 UBC           0 :         prev_ExecutorEnd(queryDesc);
    1072                 :     else
    1073 CBC       40246 :         standard_ExecutorEnd(queryDesc);
    1074           40246 : }
    1075                 : 
    1076                 : /*
    1077                 :  * ProcessUtility hook
    1078                 :  */
    1079                 : static void
    1080           27689 : pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
    1081                 :                     bool readOnlyTree,
    1082                 :                     ProcessUtilityContext context,
    1083                 :                     ParamListInfo params, QueryEnvironment *queryEnv,
    1084                 :                     DestReceiver *dest, QueryCompletion *qc)
    1085                 : {
    1086           27689 :     Node       *parsetree = pstmt->utilityStmt;
    1087           27689 :     uint64      saved_queryId = pstmt->queryId;
    1088           27689 :     int         saved_stmt_location = pstmt->stmt_location;
    1089           27689 :     int         saved_stmt_len = pstmt->stmt_len;
    1090                 : 
    1091                 :     /*
    1092                 :      * Force utility statements to get queryId zero.  We do this even in cases
    1093                 :      * where the statement contains an optimizable statement for which a
    1094                 :      * queryId could be derived (such as EXPLAIN or DECLARE CURSOR).  For such
    1095                 :      * cases, runtime control will first go through ProcessUtility and then
    1096                 :      * the executor, and we don't want the executor hooks to do anything,
    1097                 :      * since we are already measuring the statement's costs at the utility
    1098                 :      * level.
    1099                 :      *
    1100                 :      * Note that this is only done if pg_stat_statements is enabled and
    1101                 :      * configured to track utility statements, in the unlikely possibility
    1102                 :      * that user configured another extension to handle utility statements
    1103                 :      * only.
    1104                 :      */
    1105           27689 :     if (pgss_enabled(exec_nested_level) && pgss_track_utility)
    1106           22979 :         pstmt->queryId = UINT64CONST(0);
    1107                 : 
    1108                 :     /*
    1109                 :      * If it's an EXECUTE statement, we don't track it and don't increment the
    1110                 :      * nesting level.  This allows the cycles to be charged to the underlying
    1111                 :      * PREPARE instead (by the Executor hooks), which is much more useful.
    1112                 :      *
    1113                 :      * We also don't track execution of PREPARE.  If we did, we would get one
    1114                 :      * hash table entry for the PREPARE (with hash calculated from the query
    1115                 :      * string), and then a different one with the same query string (but hash
    1116                 :      * calculated from the query tree) would be used to accumulate costs of
    1117                 :      * ensuing EXECUTEs.  This would be confusing, and inconsistent with other
    1118                 :      * cases where planning time is not included at all.
    1119                 :      *
    1120                 :      * Likewise, we don't track execution of DEALLOCATE.
    1121                 :      */
    1122           27689 :     if (pgss_track_utility && pgss_enabled(exec_nested_level) &&
    1123           22979 :         PGSS_HANDLED_UTILITY(parsetree))
    1124           19496 :     {
    1125                 :         instr_time  start;
    1126                 :         instr_time  duration;
    1127                 :         uint64      rows;
    1128                 :         BufferUsage bufusage_start,
    1129                 :                     bufusage;
    1130                 :         WalUsage    walusage_start,
    1131                 :                     walusage;
    1132                 : 
    1133           21499 :         bufusage_start = pgBufferUsage;
    1134           21499 :         walusage_start = pgWalUsage;
    1135           21499 :         INSTR_TIME_SET_CURRENT(start);
    1136                 : 
    1137           21499 :         exec_nested_level++;
    1138           21499 :         PG_TRY();
    1139                 :         {
    1140           21499 :             if (prev_ProcessUtility)
    1141 UBC           0 :                 prev_ProcessUtility(pstmt, queryString, readOnlyTree,
    1142                 :                                     context, params, queryEnv,
    1143                 :                                     dest, qc);
    1144                 :             else
    1145 CBC       21499 :                 standard_ProcessUtility(pstmt, queryString, readOnlyTree,
    1146                 :                                         context, params, queryEnv,
    1147                 :                                         dest, qc);
    1148                 :         }
    1149            2003 :         PG_FINALLY();
    1150                 :         {
    1151           21499 :             exec_nested_level--;
    1152                 :         }
    1153           21499 :         PG_END_TRY();
    1154                 : 
    1155                 :         /*
    1156                 :          * CAUTION: do not access the *pstmt data structure again below here.
    1157                 :          * If it was a ROLLBACK or similar, that data structure may have been
    1158                 :          * freed.  We must copy everything we still need into local variables,
    1159                 :          * which we did above.
    1160                 :          *
    1161                 :          * For the same reason, we can't risk restoring pstmt->queryId to its
    1162                 :          * former value, which'd otherwise be a good idea.
    1163                 :          */
    1164                 : 
    1165           19496 :         INSTR_TIME_SET_CURRENT(duration);
    1166           19496 :         INSTR_TIME_SUBTRACT(duration, start);
    1167                 : 
    1168                 :         /*
    1169                 :          * Track the total number of rows retrieved or affected by the utility
    1170                 :          * statements of COPY, FETCH, CREATE TABLE AS, CREATE MATERIALIZED
    1171                 :          * VIEW, REFRESH MATERIALIZED VIEW and SELECT INTO.
    1172                 :          */
    1173           19496 :         rows = (qc && (qc->commandTag == CMDTAG_COPY ||
    1174           18217 :                        qc->commandTag == CMDTAG_FETCH ||
    1175           17960 :                        qc->commandTag == CMDTAG_SELECT ||
    1176           17788 :                        qc->commandTag == CMDTAG_REFRESH_MATERIALIZED_VIEW)) ?
    1177           38992 :             qc->nprocessed : 0;
    1178                 : 
    1179                 :         /* calc differences of buffer counters. */
    1180           19496 :         memset(&bufusage, 0, sizeof(BufferUsage));
    1181           19496 :         BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
    1182                 : 
    1183                 :         /* calc differences of WAL counters. */
    1184           19496 :         memset(&walusage, 0, sizeof(WalUsage));
    1185           19496 :         WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
    1186                 : 
    1187           19496 :         pgss_store(queryString,
    1188                 :                    saved_queryId,
    1189                 :                    saved_stmt_location,
    1190                 :                    saved_stmt_len,
    1191                 :                    PGSS_EXEC,
    1192           19496 :                    INSTR_TIME_GET_MILLISEC(duration),
    1193                 :                    rows,
    1194                 :                    &bufusage,
    1195                 :                    &walusage,
    1196                 :                    NULL,
    1197                 :                    NULL);
    1198                 :     }
    1199                 :     else
    1200                 :     {
    1201            6190 :         if (prev_ProcessUtility)
    1202 UBC           0 :             prev_ProcessUtility(pstmt, queryString, readOnlyTree,
    1203                 :                                 context, params, queryEnv,
    1204                 :                                 dest, qc);
    1205                 :         else
    1206 CBC        6190 :             standard_ProcessUtility(pstmt, queryString, readOnlyTree,
    1207                 :                                     context, params, queryEnv,
    1208                 :                                     dest, qc);
    1209                 :     }
    1210           25591 : }
    1211                 : 
    1212                 : /*
    1213                 :  * Store some statistics for a statement.
    1214                 :  *
    1215                 :  * If jstate is not NULL then we're trying to create an entry for which
    1216                 :  * we have no statistics as yet; we just want to record the normalized
    1217                 :  * query string.  total_time, rows, bufusage and walusage are ignored in this
    1218                 :  * case.
    1219                 :  *
    1220                 :  * If kind is PGSS_PLAN or PGSS_EXEC, its value is used as the array position
    1221                 :  * for the arrays in the Counters field.
    1222                 :  */
    1223                 : static void
    1224           68943 : pgss_store(const char *query, uint64 queryId,
    1225                 :            int query_location, int query_len,
    1226                 :            pgssStoreKind kind,
    1227                 :            double total_time, uint64 rows,
    1228                 :            const BufferUsage *bufusage,
    1229                 :            const WalUsage *walusage,
    1230                 :            const struct JitInstrumentation *jitusage,
    1231                 :            JumbleState *jstate)
    1232                 : {
    1233                 :     pgssHashKey key;
    1234                 :     pgssEntry  *entry;
    1235           68943 :     char       *norm_query = NULL;
    1236           68943 :     int         encoding = GetDatabaseEncoding();
    1237                 : 
    1238           68943 :     Assert(query != NULL);
    1239                 : 
    1240                 :     /* Safety check... */
    1241           68943 :     if (!pgss || !pgss_hash)
    1242 UBC           0 :         return;
    1243                 : 
    1244                 :     /*
    1245                 :      * Nothing to do if compute_query_id isn't enabled and no other module
    1246                 :      * computed a query identifier.
    1247                 :      */
    1248 CBC       68943 :     if (queryId == UINT64CONST(0))
    1249 UBC           0 :         return;
    1250                 : 
    1251                 :     /*
    1252                 :      * Confine our attention to the relevant part of the string, if the query
    1253                 :      * is a portion of a multi-statement source string, and update query
    1254                 :      * location and length if needed.
    1255                 :      */
    1256 CBC       68943 :     query = CleanQuerytext(query, &query_location, &query_len);
    1257                 : 
    1258                 :     /* Set up key for hashtable search */
    1259                 : 
    1260                 :     /* memset() is required when pgssHashKey is without padding only */
    1261           68943 :     memset(&key, 0, sizeof(pgssHashKey));
    1262                 : 
    1263           68943 :     key.userid = GetUserId();
    1264           68943 :     key.dbid = MyDatabaseId;
    1265           68943 :     key.queryid = queryId;
    1266           68943 :     key.toplevel = (exec_nested_level == 0);
    1267                 : 
    1268                 :     /* Lookup the hash table entry with shared lock. */
    1269           68943 :     LWLockAcquire(pgss->lock, LW_SHARED);
    1270                 : 
    1271           68943 :     entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
    1272                 : 
    1273                 :     /* Create new entry, if not present */
    1274           68943 :     if (!entry)
    1275                 :     {
    1276                 :         Size        query_offset;
    1277                 :         int         gc_count;
    1278                 :         bool        stored;
    1279                 :         bool        do_gc;
    1280                 : 
    1281                 :         /*
    1282                 :          * Create a new, normalized query string if caller asked.  We don't
    1283                 :          * need to hold the lock while doing this work.  (Note: in any case,
    1284                 :          * it's possible that someone else creates a duplicate hashtable entry
    1285                 :          * in the interval where we don't hold the lock below.  That case is
    1286                 :          * handled by entry_alloc.)
    1287                 :          */
    1288           24795 :         if (jstate)
    1289                 :         {
    1290            8841 :             LWLockRelease(pgss->lock);
    1291            8841 :             norm_query = generate_normalized_query(jstate, query,
    1292                 :                                                    query_location,
    1293                 :                                                    &query_len);
    1294            8841 :             LWLockAcquire(pgss->lock, LW_SHARED);
    1295                 :         }
    1296                 : 
    1297                 :         /* Append new query text to file with only shared lock held */
    1298           24795 :         stored = qtext_store(norm_query ? norm_query : query, query_len,
    1299                 :                              &query_offset, &gc_count);
    1300                 : 
    1301                 :         /*
    1302                 :          * Determine whether we need to garbage collect external query texts
    1303                 :          * while the shared lock is still held.  This micro-optimization
    1304                 :          * avoids taking the time to decide this while holding exclusive lock.
    1305                 :          */
    1306           24795 :         do_gc = need_gc_qtexts();
    1307                 : 
    1308                 :         /* Need exclusive lock to make a new hashtable entry - promote */
    1309           24795 :         LWLockRelease(pgss->lock);
    1310           24795 :         LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
    1311                 : 
    1312                 :         /*
    1313                 :          * A garbage collection may have occurred while we weren't holding the
    1314                 :          * lock.  In the unlikely event that this happens, the query text we
    1315                 :          * stored above will have been garbage collected, so write it again.
    1316                 :          * This should be infrequent enough that doing it while holding
    1317                 :          * exclusive lock isn't a performance problem.
    1318                 :          */
    1319           24795 :         if (!stored || pgss->gc_count != gc_count)
    1320 UBC           0 :             stored = qtext_store(norm_query ? norm_query : query, query_len,
    1321                 :                                  &query_offset, NULL);
    1322                 : 
    1323                 :         /* If we failed to write to the text file, give up */
    1324 CBC       24795 :         if (!stored)
    1325 UBC           0 :             goto done;
    1326                 : 
    1327                 :         /* OK to create a new hashtable entry */
    1328 CBC       24795 :         entry = entry_alloc(&key, query_offset, query_len, encoding,
    1329                 :                             jstate != NULL);
    1330                 : 
    1331                 :         /* If needed, perform garbage collection while exclusive lock held */
    1332           24795 :         if (do_gc)
    1333 UBC           0 :             gc_qtexts();
    1334                 :     }
    1335                 : 
    1336                 :     /* Increment the counts, except when jstate is not NULL */
    1337 CBC       68943 :     if (!jstate)
    1338                 :     {
    1339                 :         /*
    1340                 :          * Grab the spinlock while updating the counters (see comment about
    1341                 :          * locking rules at the head of the file)
    1342                 :          */
    1343           45235 :         volatile pgssEntry *e = (volatile pgssEntry *) entry;
    1344                 : 
    1345           45235 :         Assert(kind == PGSS_PLAN || kind == PGSS_EXEC);
    1346                 : 
    1347           45235 :         SpinLockAcquire(&e->mutex);
    1348                 : 
    1349                 :         /* "Unstick" entry if it was previously sticky */
    1350           45235 :         if (IS_STICKY(e->counters))
    1351           24232 :             e->counters.usage = USAGE_INIT;
    1352                 : 
    1353           45235 :         e->counters.calls[kind] += 1;
    1354           45235 :         e->counters.total_time[kind] += total_time;
    1355                 : 
    1356           45235 :         if (e->counters.calls[kind] == 1)
    1357                 :         {
    1358           24274 :             e->counters.min_time[kind] = total_time;
    1359           24274 :             e->counters.max_time[kind] = total_time;
    1360           24274 :             e->counters.mean_time[kind] = total_time;
    1361                 :         }
    1362                 :         else
    1363                 :         {
    1364                 :             /*
    1365                 :              * Welford's method for accurately computing variance. See
    1366                 :              * <http://www.johndcook.com/blog/standard_deviation/>
    1367                 :              */
    1368           20961 :             double      old_mean = e->counters.mean_time[kind];
    1369                 : 
    1370           20961 :             e->counters.mean_time[kind] +=
    1371           20961 :                 (total_time - old_mean) / e->counters.calls[kind];
    1372           20961 :             e->counters.sum_var_time[kind] +=
    1373           20961 :                 (total_time - old_mean) * (total_time - e->counters.mean_time[kind]);
    1374                 : 
    1375                 :             /* calculate min and max time */
    1376           20961 :             if (e->counters.min_time[kind] > total_time)
    1377            5572 :                 e->counters.min_time[kind] = total_time;
    1378           20961 :             if (e->counters.max_time[kind] < total_time)
    1379            2417 :                 e->counters.max_time[kind] = total_time;
    1380                 :         }
    1381           45235 :         e->counters.rows += rows;
    1382           45235 :         e->counters.shared_blks_hit += bufusage->shared_blks_hit;
    1383           45235 :         e->counters.shared_blks_read += bufusage->shared_blks_read;
    1384           45235 :         e->counters.shared_blks_dirtied += bufusage->shared_blks_dirtied;
    1385           45235 :         e->counters.shared_blks_written += bufusage->shared_blks_written;
    1386           45235 :         e->counters.local_blks_hit += bufusage->local_blks_hit;
    1387           45235 :         e->counters.local_blks_read += bufusage->local_blks_read;
    1388           45235 :         e->counters.local_blks_dirtied += bufusage->local_blks_dirtied;
    1389           45235 :         e->counters.local_blks_written += bufusage->local_blks_written;
    1390           45235 :         e->counters.temp_blks_read += bufusage->temp_blks_read;
    1391           45235 :         e->counters.temp_blks_written += bufusage->temp_blks_written;
    1392           45235 :         e->counters.blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->blk_read_time);
    1393           45235 :         e->counters.blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->blk_write_time);
    1394           45235 :         e->counters.temp_blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->temp_blk_read_time);
    1395           45235 :         e->counters.temp_blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->temp_blk_write_time);
    1396           45235 :         e->counters.usage += USAGE_EXEC(total_time);
    1397           45235 :         e->counters.wal_records += walusage->wal_records;
    1398           45235 :         e->counters.wal_fpi += walusage->wal_fpi;
    1399           45235 :         e->counters.wal_bytes += walusage->wal_bytes;
    1400           45235 :         if (jitusage)
    1401                 :         {
    1402              98 :             e->counters.jit_functions += jitusage->created_functions;
    1403              98 :             e->counters.jit_generation_time += INSTR_TIME_GET_MILLISEC(jitusage->generation_counter);
    1404                 : 
    1405              98 :             if (INSTR_TIME_GET_MILLISEC(jitusage->inlining_counter))
    1406              68 :                 e->counters.jit_inlining_count++;
    1407              98 :             e->counters.jit_inlining_time += INSTR_TIME_GET_MILLISEC(jitusage->inlining_counter);
    1408                 : 
    1409              98 :             if (INSTR_TIME_GET_MILLISEC(jitusage->optimization_counter))
    1410              96 :                 e->counters.jit_optimization_count++;
    1411              98 :             e->counters.jit_optimization_time += INSTR_TIME_GET_MILLISEC(jitusage->optimization_counter);
    1412                 : 
    1413              98 :             if (INSTR_TIME_GET_MILLISEC(jitusage->emission_counter))
    1414              96 :                 e->counters.jit_emission_count++;
    1415              98 :             e->counters.jit_emission_time += INSTR_TIME_GET_MILLISEC(jitusage->emission_counter);
    1416                 :         }
    1417                 : 
    1418           45235 :         SpinLockRelease(&e->mutex);
    1419                 :     }
    1420                 : 
    1421           23708 : done:
    1422           68943 :     LWLockRelease(pgss->lock);
    1423                 : 
    1424                 :     /* We postpone this clean-up until we're out of the lock */
    1425           68943 :     if (norm_query)
    1426            8841 :         pfree(norm_query);
    1427                 : }
    1428                 : 
    1429                 : /*
    1430                 :  * Reset statement statistics corresponding to userid, dbid, and queryid.
    1431                 :  */
    1432                 : Datum
    1433              40 : pg_stat_statements_reset_1_7(PG_FUNCTION_ARGS)
    1434                 : {
    1435                 :     Oid         userid;
    1436                 :     Oid         dbid;
    1437                 :     uint64      queryid;
    1438                 : 
    1439              40 :     userid = PG_GETARG_OID(0);
    1440              40 :     dbid = PG_GETARG_OID(1);
    1441              40 :     queryid = (uint64) PG_GETARG_INT64(2);
    1442                 : 
    1443              40 :     entry_reset(userid, dbid, queryid);
    1444                 : 
    1445              40 :     PG_RETURN_VOID();
    1446                 : }
    1447                 : 
    1448                 : /*
    1449                 :  * Reset statement statistics.
    1450                 :  */
    1451                 : Datum
    1452               1 : pg_stat_statements_reset(PG_FUNCTION_ARGS)
    1453                 : {
    1454               1 :     entry_reset(0, 0, 0);
    1455                 : 
    1456               1 :     PG_RETURN_VOID();
    1457                 : }
    1458                 : 
    1459                 : /* Number of output arguments (columns) for various API versions */
    1460                 : #define PG_STAT_STATEMENTS_COLS_V1_0    14
    1461                 : #define PG_STAT_STATEMENTS_COLS_V1_1    18
    1462                 : #define PG_STAT_STATEMENTS_COLS_V1_2    19
    1463                 : #define PG_STAT_STATEMENTS_COLS_V1_3    23
    1464                 : #define PG_STAT_STATEMENTS_COLS_V1_8    32
    1465                 : #define PG_STAT_STATEMENTS_COLS_V1_9    33
    1466                 : #define PG_STAT_STATEMENTS_COLS_V1_10   43
    1467                 : #define PG_STAT_STATEMENTS_COLS         43  /* maximum of above */
    1468                 : 
    1469                 : /*
    1470                 :  * Retrieve statement statistics.
    1471                 :  *
    1472                 :  * The SQL API of this function has changed multiple times, and will likely
    1473                 :  * do so again in future.  To support the case where a newer version of this
    1474                 :  * loadable module is being used with an old SQL declaration of the function,
    1475                 :  * we continue to support the older API versions.  For 1.2 and later, the
    1476                 :  * expected API version is identified by embedding it in the C name of the
    1477                 :  * function.  Unfortunately we weren't bright enough to do that for 1.1.
    1478                 :  */
    1479                 : Datum
    1480              41 : pg_stat_statements_1_10(PG_FUNCTION_ARGS)
    1481                 : {
    1482              41 :     bool        showtext = PG_GETARG_BOOL(0);
    1483                 : 
    1484              41 :     pg_stat_statements_internal(fcinfo, PGSS_V1_10, showtext);
    1485                 : 
    1486              41 :     return (Datum) 0;
    1487                 : }
    1488                 : 
    1489                 : Datum
    1490               1 : pg_stat_statements_1_9(PG_FUNCTION_ARGS)
    1491                 : {
    1492               1 :     bool        showtext = PG_GETARG_BOOL(0);
    1493                 : 
    1494               1 :     pg_stat_statements_internal(fcinfo, PGSS_V1_9, showtext);
    1495                 : 
    1496               1 :     return (Datum) 0;
    1497                 : }
    1498                 : 
    1499                 : Datum
    1500 UBC           0 : pg_stat_statements_1_8(PG_FUNCTION_ARGS)
    1501                 : {
    1502               0 :     bool        showtext = PG_GETARG_BOOL(0);
    1503                 : 
    1504               0 :     pg_stat_statements_internal(fcinfo, PGSS_V1_8, showtext);
    1505                 : 
    1506               0 :     return (Datum) 0;
    1507                 : }
    1508                 : 
    1509                 : Datum
    1510 CBC           1 : pg_stat_statements_1_3(PG_FUNCTION_ARGS)
    1511                 : {
    1512               1 :     bool        showtext = PG_GETARG_BOOL(0);
    1513                 : 
    1514               1 :     pg_stat_statements_internal(fcinfo, PGSS_V1_3, showtext);
    1515                 : 
    1516               1 :     return (Datum) 0;
    1517                 : }
    1518                 : 
    1519                 : Datum
    1520 UBC           0 : pg_stat_statements_1_2(PG_FUNCTION_ARGS)
    1521                 : {
    1522               0 :     bool        showtext = PG_GETARG_BOOL(0);
    1523                 : 
    1524               0 :     pg_stat_statements_internal(fcinfo, PGSS_V1_2, showtext);
    1525                 : 
    1526               0 :     return (Datum) 0;
    1527                 : }
    1528                 : 
    1529                 : /*
    1530                 :  * Legacy entry point for pg_stat_statements() API versions 1.0 and 1.1.
    1531                 :  * This can be removed someday, perhaps.
    1532                 :  */
    1533                 : Datum
    1534               0 : pg_stat_statements(PG_FUNCTION_ARGS)
    1535                 : {
    1536                 :     /* If it's really API 1.1, we'll figure that out below */
    1537               0 :     pg_stat_statements_internal(fcinfo, PGSS_V1_0, true);
    1538                 : 
    1539               0 :     return (Datum) 0;
    1540                 : }
    1541                 : 
    1542                 : /* Common code for all versions of pg_stat_statements() */
    1543                 : static void
    1544 CBC          43 : pg_stat_statements_internal(FunctionCallInfo fcinfo,
    1545                 :                             pgssVersion api_version,
    1546                 :                             bool showtext)
    1547                 : {
    1548              43 :     ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
    1549              43 :     Oid         userid = GetUserId();
    1550              43 :     bool        is_allowed_role = false;
    1551              43 :     char       *qbuffer = NULL;
    1552              43 :     Size        qbuffer_size = 0;
    1553              43 :     Size        extent = 0;
    1554              43 :     int         gc_count = 0;
    1555                 :     HASH_SEQ_STATUS hash_seq;
    1556                 :     pgssEntry  *entry;
    1557                 : 
    1558                 :     /*
    1559                 :      * Superusers or roles with the privileges of pg_read_all_stats members
    1560                 :      * are allowed
    1561                 :      */
    1562              43 :     is_allowed_role = has_privs_of_role(userid, ROLE_PG_READ_ALL_STATS);
    1563                 : 
    1564                 :     /* hash table must exist already */
    1565              43 :     if (!pgss || !pgss_hash)
    1566 UBC           0 :         ereport(ERROR,
    1567                 :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
    1568                 :                  errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
    1569                 : 
    1570 CBC          43 :     InitMaterializedSRF(fcinfo, 0);
    1571                 : 
    1572                 :     /*
    1573                 :      * Check we have the expected number of output arguments.  Aside from
    1574                 :      * being a good safety check, we need a kluge here to detect API version
    1575                 :      * 1.1, which was wedged into the code in an ill-considered way.
    1576                 :      */
    1577              43 :     switch (rsinfo->setDesc->natts)
    1578                 :     {
    1579 UBC           0 :         case PG_STAT_STATEMENTS_COLS_V1_0:
    1580               0 :             if (api_version != PGSS_V1_0)
    1581               0 :                 elog(ERROR, "incorrect number of output arguments");
    1582               0 :             break;
    1583               0 :         case PG_STAT_STATEMENTS_COLS_V1_1:
    1584                 :             /* pg_stat_statements() should have told us 1.0 */
    1585               0 :             if (api_version != PGSS_V1_0)
    1586               0 :                 elog(ERROR, "incorrect number of output arguments");
    1587               0 :             api_version = PGSS_V1_1;
    1588               0 :             break;
    1589               0 :         case PG_STAT_STATEMENTS_COLS_V1_2:
    1590               0 :             if (api_version != PGSS_V1_2)
    1591               0 :                 elog(ERROR, "incorrect number of output arguments");
    1592               0 :             break;
    1593 CBC           1 :         case PG_STAT_STATEMENTS_COLS_V1_3:
    1594               1 :             if (api_version != PGSS_V1_3)
    1595 UBC           0 :                 elog(ERROR, "incorrect number of output arguments");
    1596 CBC           1 :             break;
    1597 UBC           0 :         case PG_STAT_STATEMENTS_COLS_V1_8:
    1598               0 :             if (api_version != PGSS_V1_8)
    1599               0 :                 elog(ERROR, "incorrect number of output arguments");
    1600               0 :             break;
    1601 CBC           1 :         case PG_STAT_STATEMENTS_COLS_V1_9:
    1602               1 :             if (api_version != PGSS_V1_9)
    1603 UBC           0 :                 elog(ERROR, "incorrect number of output arguments");
    1604 CBC           1 :             break;
    1605              41 :         case PG_STAT_STATEMENTS_COLS_V1_10:
    1606              41 :             if (api_version != PGSS_V1_10)
    1607 UBC           0 :                 elog(ERROR, "incorrect number of output arguments");
    1608 CBC          41 :             break;
    1609 UBC           0 :         default:
    1610               0 :             elog(ERROR, "incorrect number of output arguments");
    1611                 :     }
    1612                 : 
    1613                 :     /*
    1614                 :      * We'd like to load the query text file (if needed) while not holding any
    1615                 :      * lock on pgss->lock.  In the worst case we'll have to do this again
    1616                 :      * after we have the lock, but it's unlikely enough to make this a win
    1617                 :      * despite occasional duplicated work.  We need to reload if anybody
    1618                 :      * writes to the file (either a retail qtext_store(), or a garbage
    1619                 :      * collection) between this point and where we've gotten shared lock.  If
    1620                 :      * a qtext_store is actually in progress when we look, we might as well
    1621                 :      * skip the speculative load entirely.
    1622                 :      */
    1623 CBC          43 :     if (showtext)
    1624                 :     {
    1625                 :         int         n_writers;
    1626                 : 
    1627                 :         /* Take the mutex so we can examine variables */
    1628                 :         {
    1629              43 :             volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
    1630                 : 
    1631              43 :             SpinLockAcquire(&s->mutex);
    1632              43 :             extent = s->extent;
    1633              43 :             n_writers = s->n_writers;
    1634              43 :             gc_count = s->gc_count;
    1635              43 :             SpinLockRelease(&s->mutex);
    1636                 :         }
    1637                 : 
    1638                 :         /* No point in loading file now if there are active writers */
    1639              43 :         if (n_writers == 0)
    1640              43 :             qbuffer = qtext_load_file(&qbuffer_size);
    1641                 :     }
    1642                 : 
    1643                 :     /*
    1644                 :      * Get shared lock, load or reload the query text file if we must, and
    1645                 :      * iterate over the hashtable entries.
    1646                 :      *
    1647                 :      * With a large hash table, we might be holding the lock rather longer
    1648                 :      * than one could wish.  However, this only blocks creation of new hash
    1649                 :      * table entries, and the larger the hash table the less likely that is to
    1650                 :      * be needed.  So we can hope this is okay.  Perhaps someday we'll decide
    1651                 :      * we need to partition the hash table to limit the time spent holding any
    1652                 :      * one lock.
    1653                 :      */
    1654              43 :     LWLockAcquire(pgss->lock, LW_SHARED);
    1655                 : 
    1656              43 :     if (showtext)
    1657                 :     {
    1658                 :         /*
    1659                 :          * Here it is safe to examine extent and gc_count without taking the
    1660                 :          * mutex.  Note that although other processes might change
    1661                 :          * pgss->extent just after we look at it, the strings they then write
    1662                 :          * into the file cannot yet be referenced in the hashtable, so we
    1663                 :          * don't care whether we see them or not.
    1664                 :          *
    1665                 :          * If qtext_load_file fails, we just press on; we'll return NULL for
    1666                 :          * every query text.
    1667                 :          */
    1668              43 :         if (qbuffer == NULL ||
    1669              43 :             pgss->extent != extent ||
    1670              43 :             pgss->gc_count != gc_count)
    1671                 :         {
    1672 UNC           0 :             free(qbuffer);
    1673 UIC           0 :             qbuffer = qtext_load_file(&qbuffer_size);
    1674                 :         }
    1675                 :     }
    1676 ECB             : 
    1677 CBC          43 :     hash_seq_init(&hash_seq, pgss_hash);
    1678 GIC       24097 :     while ((entry = hash_seq_search(&hash_seq)) != NULL)
    1679                 :     {
    1680                 :         Datum       values[PG_STAT_STATEMENTS_COLS];
    1681 ECB             :         bool        nulls[PG_STAT_STATEMENTS_COLS];
    1682 GIC       24054 :         int         i = 0;
    1683                 :         Counters    tmp;
    1684 ECB             :         double      stddev;
    1685 GIC       24054 :         int64       queryid = entry->key.queryid;
    1686 ECB             : 
    1687 CBC       24054 :         memset(values, 0, sizeof(values));
    1688 GIC       24054 :         memset(nulls, 0, sizeof(nulls));
    1689 ECB             : 
    1690 CBC       24054 :         values[i++] = ObjectIdGetDatum(entry->key.userid);
    1691           24054 :         values[i++] = ObjectIdGetDatum(entry->key.dbid);
    1692           24054 :         if (api_version >= PGSS_V1_9)
    1693 GIC       24044 :             values[i++] = BoolGetDatum(entry->key.toplevel);
    1694 ECB             : 
    1695 GIC       24054 :         if (is_allowed_role || entry->key.userid == userid)
    1696 ECB             :         {
    1697 CBC       24054 :             if (api_version >= PGSS_V1_2)
    1698 GIC       24054 :                 values[i++] = Int64GetDatumFast(queryid);
    1699 ECB             : 
    1700 GIC       24054 :             if (showtext)
    1701 ECB             :             {
    1702 GIC       24054 :                 char       *qstr = qtext_fetch(entry->query_offset,
    1703                 :                                                entry->query_len,
    1704                 :                                                qbuffer,
    1705                 :                                                qbuffer_size);
    1706 ECB             : 
    1707 GIC       24054 :                 if (qstr)
    1708                 :                 {
    1709                 :                     char       *enc;
    1710 ECB             : 
    1711 GIC       24054 :                     enc = pg_any_to_server(qstr,
    1712                 :                                            entry->query_len,
    1713                 :                                            entry->encoding);
    1714 ECB             : 
    1715 GIC       24054 :                     values[i++] = CStringGetTextDatum(enc);
    1716 ECB             : 
    1717 GBC       24054 :                     if (enc != qstr)
    1718 UIC           0 :                         pfree(enc);
    1719                 :                 }
    1720                 :                 else
    1721                 :                 {
    1722 EUB             :                     /* Just return a null if we fail to find the text */
    1723 UIC           0 :                     nulls[i++] = true;
    1724                 :                 }
    1725                 :             }
    1726                 :             else
    1727                 :             {
    1728 EUB             :                 /* Query text not requested */
    1729 UIC           0 :                 nulls[i++] = true;
    1730                 :             }
    1731                 :         }
    1732                 :         else
    1733                 :         {
    1734 EUB             :             /* Don't show queryid */
    1735 UBC           0 :             if (api_version >= PGSS_V1_2)
    1736 UIC           0 :                 nulls[i++] = true;
    1737                 : 
    1738                 :             /*
    1739                 :              * Don't show query text, but hint as to the reason for not doing
    1740                 :              * so if it was requested
    1741 EUB             :              */
    1742 UBC           0 :             if (showtext)
    1743 UIC           0 :                 values[i++] = CStringGetTextDatum("<insufficient privilege>");
    1744 EUB             :             else
    1745 UIC           0 :                 nulls[i++] = true;
    1746                 :         }
    1747                 : 
    1748                 :         /* copy counters to a local variable to keep locking time short */
    1749 ECB             :         {
    1750 GIC       24054 :             volatile pgssEntry *e = (volatile pgssEntry *) entry;
    1751 ECB             : 
    1752 CBC       24054 :             SpinLockAcquire(&e->mutex);
    1753           24054 :             tmp = e->counters;
    1754 GIC       24054 :             SpinLockRelease(&e->mutex);
    1755                 :         }
    1756                 : 
    1757 ECB             :         /* Skip entry if unexecuted (ie, it's a pending "sticky" entry) */
    1758 CBC       24054 :         if (IS_STICKY(tmp))
    1759 GIC         571 :             continue;
    1760                 : 
    1761 ECB             :         /* Note that we rely on PGSS_PLAN being 0 and PGSS_EXEC being 1. */
    1762 GIC       70449 :         for (int kind = 0; kind < PGSS_NUMKIND; kind++)
    1763 ECB             :         {
    1764 GIC       46966 :             if (kind == PGSS_EXEC || api_version >= PGSS_V1_8)
    1765 ECB             :             {
    1766 CBC       46957 :                 values[i++] = Int64GetDatumFast(tmp.calls[kind]);
    1767 GIC       46957 :                 values[i++] = Float8GetDatumFast(tmp.total_time[kind]);
    1768                 :             }
    1769 ECB             : 
    1770 GIC       46966 :             if ((kind == PGSS_EXEC && api_version >= PGSS_V1_3) ||
    1771                 :                 api_version >= PGSS_V1_8)
    1772 ECB             :             {
    1773 CBC       46957 :                 values[i++] = Float8GetDatumFast(tmp.min_time[kind]);
    1774           46957 :                 values[i++] = Float8GetDatumFast(tmp.max_time[kind]);
    1775 GIC       46957 :                 values[i++] = Float8GetDatumFast(tmp.mean_time[kind]);
    1776                 : 
    1777                 :                 /*
    1778                 :                  * Note we are calculating the population variance here, not
    1779                 :                  * the sample variance, as we have data for the whole
    1780                 :                  * population, so Bessel's correction is not used, and we
    1781                 :                  * don't divide by tmp.calls - 1.
    1782 ECB             :                  */
    1783 CBC       46957 :                 if (tmp.calls[kind] > 1)
    1784 GIC        4161 :                     stddev = sqrt(tmp.sum_var_time[kind] / tmp.calls[kind]);
    1785 ECB             :                 else
    1786 CBC       42796 :                     stddev = 0.0;
    1787 GIC       46957 :                 values[i++] = Float8GetDatumFast(stddev);
    1788                 :             }
    1789 ECB             :         }
    1790 CBC       23483 :         values[i++] = Int64GetDatumFast(tmp.rows);
    1791           23483 :         values[i++] = Int64GetDatumFast(tmp.shared_blks_hit);
    1792           23483 :         values[i++] = Int64GetDatumFast(tmp.shared_blks_read);
    1793           23483 :         if (api_version >= PGSS_V1_1)
    1794           23483 :             values[i++] = Int64GetDatumFast(tmp.shared_blks_dirtied);
    1795           23483 :         values[i++] = Int64GetDatumFast(tmp.shared_blks_written);
    1796           23483 :         values[i++] = Int64GetDatumFast(tmp.local_blks_hit);
    1797           23483 :         values[i++] = Int64GetDatumFast(tmp.local_blks_read);
    1798           23483 :         if (api_version >= PGSS_V1_1)
    1799           23483 :             values[i++] = Int64GetDatumFast(tmp.local_blks_dirtied);
    1800           23483 :         values[i++] = Int64GetDatumFast(tmp.local_blks_written);
    1801           23483 :         values[i++] = Int64GetDatumFast(tmp.temp_blks_read);
    1802           23483 :         values[i++] = Int64GetDatumFast(tmp.temp_blks_written);
    1803 GIC       23483 :         if (api_version >= PGSS_V1_1)
    1804 ECB             :         {
    1805 CBC       23483 :             values[i++] = Float8GetDatumFast(tmp.blk_read_time);
    1806 GIC       23483 :             values[i++] = Float8GetDatumFast(tmp.blk_write_time);
    1807 ECB             :         }
    1808 GIC       23483 :         if (api_version >= PGSS_V1_10)
    1809 ECB             :         {
    1810 CBC       23462 :             values[i++] = Float8GetDatumFast(tmp.temp_blk_read_time);
    1811 GIC       23462 :             values[i++] = Float8GetDatumFast(tmp.temp_blk_write_time);
    1812 ECB             :         }
    1813 GIC       23483 :         if (api_version >= PGSS_V1_8)
    1814                 :         {
    1815                 :             char        buf[256];
    1816                 :             Datum       wal_bytes;
    1817 ECB             : 
    1818 CBC       23474 :             values[i++] = Int64GetDatumFast(tmp.wal_records);
    1819 GIC       23474 :             values[i++] = Int64GetDatumFast(tmp.wal_fpi);
    1820 ECB             : 
    1821 GIC       23474 :             snprintf(buf, sizeof buf, UINT64_FORMAT, tmp.wal_bytes);
    1822                 : 
    1823 ECB             :             /* Convert to numeric. */
    1824 GIC       23474 :             wal_bytes = DirectFunctionCall3(numeric_in,
    1825                 :                                             CStringGetDatum(buf),
    1826                 :                                             ObjectIdGetDatum(0),
    1827 ECB             :                                             Int32GetDatum(-1));
    1828 GIC       23474 :             values[i++] = wal_bytes;
    1829 ECB             :         }
    1830 GIC       23483 :         if (api_version >= PGSS_V1_10)
    1831 ECB             :         {
    1832 CBC       23462 :             values[i++] = Int64GetDatumFast(tmp.jit_functions);
    1833           23462 :             values[i++] = Float8GetDatumFast(tmp.jit_generation_time);
    1834           23462 :             values[i++] = Int64GetDatumFast(tmp.jit_inlining_count);
    1835           23462 :             values[i++] = Float8GetDatumFast(tmp.jit_inlining_time);
    1836           23462 :             values[i++] = Int64GetDatumFast(tmp.jit_optimization_count);
    1837           23462 :             values[i++] = Float8GetDatumFast(tmp.jit_optimization_time);
    1838           23462 :             values[i++] = Int64GetDatumFast(tmp.jit_emission_count);
    1839 GIC       23462 :             values[i++] = Float8GetDatumFast(tmp.jit_emission_time);
    1840                 :         }
    1841 ECB             : 
    1842 GIC       23483 :         Assert(i == (api_version == PGSS_V1_0 ? PG_STAT_STATEMENTS_COLS_V1_0 :
    1843                 :                      api_version == PGSS_V1_1 ? PG_STAT_STATEMENTS_COLS_V1_1 :
    1844                 :                      api_version == PGSS_V1_2 ? PG_STAT_STATEMENTS_COLS_V1_2 :
    1845                 :                      api_version == PGSS_V1_3 ? PG_STAT_STATEMENTS_COLS_V1_3 :
    1846                 :                      api_version == PGSS_V1_8 ? PG_STAT_STATEMENTS_COLS_V1_8 :
    1847                 :                      api_version == PGSS_V1_9 ? PG_STAT_STATEMENTS_COLS_V1_9 :
    1848                 :                      api_version == PGSS_V1_10 ? PG_STAT_STATEMENTS_COLS_V1_10 :
    1849                 :                      -1 /* fail if you forget to update this assert */ ));
    1850 ECB             : 
    1851 GIC       23483 :         tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
    1852                 :     }
    1853 ECB             : 
    1854 GIC          43 :     LWLockRelease(pgss->lock);
    1855 ECB             : 
    1856 GNC          43 :     free(qbuffer);
    1857 GIC          43 : }
    1858                 : 
    1859                 : /* Number of output arguments (columns) for pg_stat_statements_info */
    1860                 : #define PG_STAT_STATEMENTS_INFO_COLS    2
    1861                 : 
    1862                 : /*
    1863                 :  * Return statistics of pg_stat_statements.
    1864 ECB             :  */
    1865                 : Datum
    1866 GIC           1 : pg_stat_statements_info(PG_FUNCTION_ARGS)
    1867                 : {
    1868 ECB             :     pgssGlobalStats stats;
    1869                 :     TupleDesc   tupdesc;
    1870 GNC           1 :     Datum       values[PG_STAT_STATEMENTS_INFO_COLS] = {0};
    1871               1 :     bool        nulls[PG_STAT_STATEMENTS_INFO_COLS] = {0};
    1872 EUB             : 
    1873 GIC           1 :     if (!pgss || !pgss_hash)
    1874 UIC           0 :         ereport(ERROR,
    1875                 :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
    1876                 :                  errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
    1877 ECB             : 
    1878 EUB             :     /* Build a tuple descriptor for our result type */
    1879 GIC           1 :     if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
    1880 UIC           0 :         elog(ERROR, "return type must be a row type");
    1881                 : 
    1882 ECB             :     /* Read global statistics for pg_stat_statements */
    1883                 :     {
    1884 GIC           1 :         volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
    1885                 : 
    1886 CBC           1 :         SpinLockAcquire(&s->mutex);
    1887               1 :         stats = s->stats;
    1888 GIC           1 :         SpinLockRelease(&s->mutex);
    1889 ECB             :     }
    1890                 : 
    1891 GIC           1 :     values[0] = Int64GetDatum(stats.dealloc);
    1892               1 :     values[1] = TimestampTzGetDatum(stats.stats_reset);
    1893                 : 
    1894               1 :     PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
    1895                 : }
    1896 ECB             : 
    1897                 : /*
    1898                 :  * Estimate shared memory space needed.
    1899                 :  */
    1900                 : static Size
    1901 CBC           3 : pgss_memsize(void)
    1902                 : {
    1903 ECB             :     Size        size;
    1904                 : 
    1905 GIC           3 :     size = MAXALIGN(sizeof(pgssSharedState));
    1906               3 :     size = add_size(size, hash_estimate_size(pgss_max, sizeof(pgssEntry)));
    1907                 : 
    1908               3 :     return size;
    1909                 : }
    1910                 : 
    1911                 : /*
    1912                 :  * Allocate a new hashtable entry.
    1913                 :  * caller must hold an exclusive lock on pgss->lock
    1914                 :  *
    1915                 :  * "query" need not be null-terminated; we rely on query_len instead
    1916                 :  *
    1917                 :  * If "sticky" is true, make the new entry artificially sticky so that it will
    1918                 :  * probably still be there when the query finishes execution.  We do this by
    1919                 :  * giving it a median usage value rather than the normal value.  (Strictly
    1920                 :  * speaking, query strings are normalized on a best effort basis, though it
    1921                 :  * would be difficult to demonstrate this even under artificial conditions.)
    1922                 :  *
    1923                 :  * Note: despite needing exclusive lock, it's not an error for the target
    1924 ECB             :  * entry to already exist.  This is because pgss_store releases and
    1925                 :  * reacquires lock after failing to find a match; so someone else could
    1926                 :  * have made the entry while we waited to get exclusive lock.
    1927                 :  */
    1928                 : static pgssEntry *
    1929 GIC       24795 : entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding,
    1930                 :             bool sticky)
    1931 ECB             : {
    1932 EUB             :     pgssEntry  *entry;
    1933                 :     bool        found;
    1934                 : 
    1935 ECB             :     /* Make space if needed */
    1936 GIC       24795 :     while (hash_get_num_entries(pgss_hash) >= pgss_max)
    1937 LBC           0 :         entry_dealloc();
    1938                 : 
    1939                 :     /* Find or create an entry with desired hash code */
    1940 GIC       24795 :     entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found);
    1941                 : 
    1942 CBC       24795 :     if (!found)
    1943                 :     {
    1944 ECB             :         /* New entry, initialize it */
    1945                 : 
    1946                 :         /* reset the statistics */
    1947 GIC       24794 :         memset(&entry->counters, 0, sizeof(Counters));
    1948 ECB             :         /* set the appropriate initial usage count */
    1949 CBC       24794 :         entry->counters.usage = sticky ? pgss->cur_median_usage : USAGE_INIT;
    1950 ECB             :         /* re-initialize the mutex each time ... we assume no one using it */
    1951 CBC       24794 :         SpinLockInit(&entry->mutex);
    1952                 :         /* ... and don't forget the query text metadata */
    1953 GIC       24794 :         Assert(query_len >= 0);
    1954 CBC       24794 :         entry->query_offset = query_offset;
    1955 GIC       24794 :         entry->query_len = query_len;
    1956           24794 :         entry->encoding = encoding;
    1957                 :     }
    1958                 : 
    1959           24795 :     return entry;
    1960                 : }
    1961 EUB             : 
    1962                 : /*
    1963                 :  * qsort comparator for sorting into increasing usage order
    1964                 :  */
    1965                 : static int
    1966 UBC           0 : entry_cmp(const void *lhs, const void *rhs)
    1967 EUB             : {
    1968 UBC           0 :     double      l_usage = (*(pgssEntry *const *) lhs)->counters.usage;
    1969               0 :     double      r_usage = (*(pgssEntry *const *) rhs)->counters.usage;
    1970                 : 
    1971               0 :     if (l_usage < r_usage)
    1972 UIC           0 :         return -1;
    1973               0 :     else if (l_usage > r_usage)
    1974               0 :         return +1;
    1975                 :     else
    1976               0 :         return 0;
    1977                 : }
    1978                 : 
    1979                 : /*
    1980 EUB             :  * Deallocate least-used entries.
    1981                 :  *
    1982                 :  * Caller must hold an exclusive lock on pgss->lock.
    1983                 :  */
    1984                 : static void
    1985 UIC           0 : entry_dealloc(void)
    1986                 : {
    1987                 :     HASH_SEQ_STATUS hash_seq;
    1988                 :     pgssEntry **entries;
    1989                 :     pgssEntry  *entry;
    1990                 :     int         nvictims;
    1991                 :     int         i;
    1992                 :     Size        tottextlen;
    1993                 :     int         nvalidtexts;
    1994                 : 
    1995                 :     /*
    1996                 :      * Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them.
    1997                 :      * While we're scanning the table, apply the decay factor to the usage
    1998                 :      * values, and update the mean query length.
    1999                 :      *
    2000                 :      * Note that the mean query length is almost immediately obsolete, since
    2001                 :      * we compute it before not after discarding the least-used entries.
    2002 EUB             :      * Hopefully, that doesn't affect the mean too much; it doesn't seem worth
    2003                 :      * making two passes to get a more current result.  Likewise, the new
    2004                 :      * cur_median_usage includes the entries we're about to zap.
    2005                 :      */
    2006                 : 
    2007 UIC           0 :     entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *));
    2008 EUB             : 
    2009 UBC           0 :     i = 0;
    2010 UIC           0 :     tottextlen = 0;
    2011 UBC           0 :     nvalidtexts = 0;
    2012                 : 
    2013               0 :     hash_seq_init(&hash_seq, pgss_hash);
    2014               0 :     while ((entry = hash_seq_search(&hash_seq)) != NULL)
    2015                 :     {
    2016               0 :         entries[i++] = entry;
    2017                 :         /* "Sticky" entries get a different usage decay rate. */
    2018               0 :         if (IS_STICKY(entry->counters))
    2019 UIC           0 :             entry->counters.usage *= STICKY_DECREASE_FACTOR;
    2020 EUB             :         else
    2021 UBC           0 :             entry->counters.usage *= USAGE_DECREASE_FACTOR;
    2022                 :         /* In the mean length computation, ignore dropped texts. */
    2023 UIC           0 :         if (entry->query_len >= 0)
    2024                 :         {
    2025               0 :             tottextlen += entry->query_len + 1;
    2026 UBC           0 :             nvalidtexts++;
    2027                 :         }
    2028                 :     }
    2029 EUB             : 
    2030                 :     /* Sort into increasing order by usage */
    2031 UIC           0 :     qsort(entries, i, sizeof(pgssEntry *), entry_cmp);
    2032 EUB             : 
    2033                 :     /* Record the (approximate) median usage */
    2034 UIC           0 :     if (i > 0)
    2035 UBC           0 :         pgss->cur_median_usage = entries[i / 2]->counters.usage;
    2036                 :     /* Record the mean query length */
    2037 UIC           0 :     if (nvalidtexts > 0)
    2038 UBC           0 :         pgss->mean_query_len = tottextlen / nvalidtexts;
    2039 EUB             :     else
    2040 UIC           0 :         pgss->mean_query_len = ASSUMED_LENGTH_INIT;
    2041 EUB             : 
    2042                 :     /* Now zap an appropriate fraction of lowest-usage entries */
    2043 UBC           0 :     nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
    2044 UIC           0 :     nvictims = Min(nvictims, i);
    2045                 : 
    2046 UBC           0 :     for (i = 0; i < nvictims; i++)
    2047                 :     {
    2048 UIC           0 :         hash_search(pgss_hash, &entries[i]->key, HASH_REMOVE, NULL);
    2049                 :     }
    2050 EUB             : 
    2051 UIC           0 :     pfree(entries);
    2052 EUB             : 
    2053                 :     /* Increment the number of times entries are deallocated */
    2054                 :     {
    2055 UIC           0 :         volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
    2056 EUB             : 
    2057 UIC           0 :         SpinLockAcquire(&s->mutex);
    2058               0 :         s->stats.dealloc += 1;
    2059               0 :         SpinLockRelease(&s->mutex);
    2060                 :     }
    2061               0 : }
    2062                 : 
    2063                 : /*
    2064                 :  * Given a query string (not necessarily null-terminated), allocate a new
    2065                 :  * entry in the external query text file and store the string there.
    2066                 :  *
    2067                 :  * If successful, returns true, and stores the new entry's offset in the file
    2068                 :  * into *query_offset.  Also, if gc_count isn't NULL, *gc_count is set to the
    2069                 :  * number of garbage collections that have occurred so far.
    2070                 :  *
    2071                 :  * On failure, returns false.
    2072                 :  *
    2073                 :  * At least a shared lock on pgss->lock must be held by the caller, so as
    2074                 :  * to prevent a concurrent garbage collection.  Share-lock-holding callers
    2075 ECB             :  * should pass a gc_count pointer to obtain the number of garbage collections,
    2076                 :  * so that they can recheck the count after obtaining exclusive lock to
    2077                 :  * detect whether a garbage collection occurred (and removed this entry).
    2078                 :  */
    2079                 : static bool
    2080 GIC       24795 : qtext_store(const char *query, int query_len,
    2081                 :             Size *query_offset, int *gc_count)
    2082                 : {
    2083                 :     Size        off;
    2084                 :     int         fd;
    2085                 : 
    2086 ECB             :     /*
    2087                 :      * We use a spinlock to protect extent/n_writers/gc_count, so that
    2088                 :      * multiple processes may execute this function concurrently.
    2089                 :      */
    2090                 :     {
    2091 CBC       24795 :         volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
    2092 ECB             : 
    2093 CBC       24795 :         SpinLockAcquire(&s->mutex);
    2094           24795 :         off = s->extent;
    2095 GIC       24795 :         s->extent += query_len + 1;
    2096           24795 :         s->n_writers++;
    2097 CBC       24795 :         if (gc_count)
    2098 GIC       24795 :             *gc_count = s->gc_count;
    2099           24795 :         SpinLockRelease(&s->mutex);
    2100                 :     }
    2101                 : 
    2102           24795 :     *query_offset = off;
    2103                 : 
    2104 ECB             :     /*
    2105                 :      * Don't allow the file to grow larger than what qtext_load_file can
    2106 EUB             :      * (theoretically) handle.  This has been seen to be reachable on 32-bit
    2107                 :      * platforms.
    2108                 :      */
    2109 GIC       24795 :     if (unlikely(query_len >= MaxAllocHugeSize - off))
    2110                 :     {
    2111 UIC           0 :         errno = EFBIG;          /* not quite right, but it'll do */
    2112 LBC           0 :         fd = -1;
    2113               0 :         goto error;
    2114 EUB             :     }
    2115                 : 
    2116 ECB             :     /* Now write the data into the successfully-reserved part of the file */
    2117 GBC       24795 :     fd = OpenTransientFile(PGSS_TEXT_FILE, O_RDWR | O_CREAT | PG_BINARY);
    2118 CBC       24795 :     if (fd < 0)
    2119 UBC           0 :         goto error;
    2120                 : 
    2121 CBC       24795 :     if (pg_pwrite(fd, query, query_len, off) != query_len)
    2122 UIC           0 :         goto error;
    2123 GIC       24795 :     if (pg_pwrite(fd, "\0", 1, off + query_len) != 1)
    2124 UIC           0 :         goto error;
    2125 ECB             : 
    2126 GIC       24795 :     CloseTransientFile(fd);
    2127 ECB             : 
    2128                 :     /* Mark our write complete */
    2129                 :     {
    2130 GIC       24795 :         volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
    2131                 : 
    2132 CBC       24795 :         SpinLockAcquire(&s->mutex);
    2133 GIC       24795 :         s->n_writers--;
    2134 GBC       24795 :         SpinLockRelease(&s->mutex);
    2135 EUB             :     }
    2136                 : 
    2137 GIC       24795 :     return true;
    2138                 : 
    2139 UIC           0 : error:
    2140 UBC           0 :     ereport(LOG,
    2141 EUB             :             (errcode_for_file_access(),
    2142                 :              errmsg("could not write file \"%s\": %m",
    2143                 :                     PGSS_TEXT_FILE)));
    2144                 : 
    2145 UBC           0 :     if (fd >= 0)
    2146 UIC           0 :         CloseTransientFile(fd);
    2147 EUB             : 
    2148                 :     /* Mark our write complete */
    2149                 :     {
    2150 UIC           0 :         volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
    2151                 : 
    2152 UBC           0 :         SpinLockAcquire(&s->mutex);
    2153 UIC           0 :         s->n_writers--;
    2154               0 :         SpinLockRelease(&s->mutex);
    2155                 :     }
    2156                 : 
    2157               0 :     return false;
    2158                 : }
    2159                 : 
    2160                 : /*
    2161                 :  * Read the external query text file into a malloc'd buffer.
    2162                 :  *
    2163                 :  * Returns NULL (without throwing an error) if unable to read, eg
    2164                 :  * file not there or insufficient memory.
    2165                 :  *
    2166                 :  * On success, the buffer size is also returned into *buffer_size.
    2167 ECB             :  *
    2168                 :  * This can be called without any lock on pgss->lock, but in that case
    2169                 :  * the caller is responsible for verifying that the result is sane.
    2170                 :  */
    2171                 : static char *
    2172 GIC          46 : qtext_load_file(Size *buffer_size)
    2173                 : {
    2174 ECB             :     char       *buf;
    2175                 :     int         fd;
    2176                 :     struct stat stat;
    2177 EUB             :     Size        nread;
    2178                 : 
    2179 GIC          46 :     fd = OpenTransientFile(PGSS_TEXT_FILE, O_RDONLY | PG_BINARY);
    2180              46 :     if (fd < 0)
    2181                 :     {
    2182 UBC           0 :         if (errno != ENOENT)
    2183 UIC           0 :             ereport(LOG,
    2184                 :                     (errcode_for_file_access(),
    2185                 :                      errmsg("could not read file \"%s\": %m",
    2186 ECB             :                             PGSS_TEXT_FILE)));
    2187 UIC           0 :         return NULL;
    2188 EUB             :     }
    2189                 : 
    2190                 :     /* Get file length */
    2191 GIC          46 :     if (fstat(fd, &stat))
    2192 EUB             :     {
    2193 UBC           0 :         ereport(LOG,
    2194                 :                 (errcode_for_file_access(),
    2195                 :                  errmsg("could not stat file \"%s\": %m",
    2196                 :                         PGSS_TEXT_FILE)));
    2197 LBC           0 :         CloseTransientFile(fd);
    2198               0 :         return NULL;
    2199                 :     }
    2200 EUB             : 
    2201 ECB             :     /* Allocate buffer; beware that off_t might be wider than size_t */
    2202 GIC          46 :     if (stat.st_size <= MaxAllocHugeSize)
    2203 GBC          46 :         buf = (char *) malloc(stat.st_size);
    2204                 :     else
    2205 UIC           0 :         buf = NULL;
    2206 GIC          46 :     if (buf == NULL)
    2207                 :     {
    2208 UBC           0 :         ereport(LOG,
    2209 EUB             :                 (errcode(ERRCODE_OUT_OF_MEMORY),
    2210                 :                  errmsg("out of memory"),
    2211                 :                  errdetail("Could not allocate enough memory to read file \"%s\".",
    2212                 :                            PGSS_TEXT_FILE)));
    2213 UIC           0 :         CloseTransientFile(fd);
    2214               0 :         return NULL;
    2215                 :     }
    2216                 : 
    2217 ECB             :     /*
    2218                 :      * OK, slurp in the file.  Windows fails if we try to read more than
    2219                 :      * INT_MAX bytes at once, and other platforms might not like that either,
    2220                 :      * so read a very large file in 1GB segments.
    2221                 :      */
    2222 GIC          46 :     nread = 0;
    2223              91 :     while (nread < stat.st_size)
    2224                 :     {
    2225              45 :         int         toread = Min(1024 * 1024 * 1024, stat.st_size - nread);
    2226                 : 
    2227                 :         /*
    2228                 :          * If we get a short read and errno doesn't get set, the reason is
    2229 ECB             :          * probably that garbage collection truncated the file since we did
    2230                 :          * the fstat(), so we don't log a complaint --- but we don't return
    2231                 :          * the data, either, since it's most likely corrupt due to concurrent
    2232 EUB             :          * writes from garbage collection.
    2233                 :          */
    2234 GIC          45 :         errno = 0;
    2235              45 :         if (read(fd, buf + nread, toread) != toread)
    2236                 :         {
    2237 UBC           0 :             if (errno)
    2238               0 :                 ereport(LOG,
    2239 EUB             :                         (errcode_for_file_access(),
    2240                 :                          errmsg("could not read file \"%s\": %m",
    2241 ECB             :                                 PGSS_TEXT_FILE)));
    2242 UIC           0 :             free(buf);
    2243               0 :             CloseTransientFile(fd);
    2244 LBC           0 :             return NULL;
    2245 EUB             :         }
    2246 GIC          45 :         nread += toread;
    2247                 :     }
    2248                 : 
    2249 CBC          46 :     if (CloseTransientFile(fd) != 0)
    2250 LBC           0 :         ereport(LOG,
    2251                 :                 (errcode_for_file_access(),
    2252                 :                  errmsg("could not close file \"%s\": %m", PGSS_TEXT_FILE)));
    2253                 : 
    2254 GIC          46 :     *buffer_size = nread;
    2255              46 :     return buf;
    2256                 : }
    2257                 : 
    2258                 : /*
    2259                 :  * Locate a query text in the file image previously read by qtext_load_file().
    2260 ECB             :  *
    2261                 :  * We validate the given offset/length, and return NULL if bogus.  Otherwise,
    2262                 :  * the result points to a null-terminated string within the buffer.
    2263                 :  */
    2264                 : static char *
    2265 GBC       48544 : qtext_fetch(Size query_offset, int query_len,
    2266                 :             char *buffer, Size buffer_size)
    2267 ECB             : {
    2268                 :     /* File read failed? */
    2269 GBC       48544 :     if (buffer == NULL)
    2270 UIC           0 :         return NULL;
    2271 ECB             :     /* Bogus offset/length? */
    2272 GBC       48544 :     if (query_len < 0 ||
    2273 GIC       48544 :         query_offset + query_len >= buffer_size)
    2274 LBC           0 :         return NULL;
    2275                 :     /* As a further sanity check, make sure there's a trailing null */
    2276 GIC       48544 :     if (buffer[query_offset + query_len] != '\0')
    2277 UIC           0 :         return NULL;
    2278                 :     /* Looks OK */
    2279 GIC       48544 :     return buffer + query_offset;
    2280                 : }
    2281                 : 
    2282                 : /*
    2283 ECB             :  * Do we need to garbage-collect the external query text file?
    2284                 :  *
    2285                 :  * Caller should hold at least a shared lock on pgss->lock.
    2286                 :  */
    2287                 : static bool
    2288 GIC       24795 : need_gc_qtexts(void)
    2289 ECB             : {
    2290                 :     Size        extent;
    2291                 : 
    2292                 :     /* Read shared extent pointer */
    2293                 :     {
    2294 GIC       24795 :         volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
    2295                 : 
    2296           24795 :         SpinLockAcquire(&s->mutex);
    2297           24795 :         extent = s->extent;
    2298           24795 :         SpinLockRelease(&s->mutex);
    2299                 :     }
    2300                 : 
    2301                 :     /*
    2302                 :      * Don't proceed if file does not exceed 512 bytes per possible entry.
    2303 ECB             :      *
    2304                 :      * Here and in the next test, 32-bit machines have overflow hazards if
    2305                 :      * pgss_max and/or mean_query_len are large.  Force the multiplications
    2306                 :      * and comparisons to be done in uint64 arithmetic to forestall trouble.
    2307                 :      */
    2308 GIC       24795 :     if ((uint64) extent < (uint64) 512 * pgss_max)
    2309           24795 :         return false;
    2310                 : 
    2311                 :     /*
    2312                 :      * Don't proceed if file is less than about 50% bloat.  Nothing can or
    2313 EUB             :      * should be done in the event of unusually large query texts accounting
    2314                 :      * for file's large size.  We go to the trouble of maintaining the mean
    2315                 :      * query length in order to prevent garbage collection from thrashing
    2316                 :      * uselessly.
    2317                 :      */
    2318 UIC           0 :     if ((uint64) extent < (uint64) pgss->mean_query_len * pgss_max * 2)
    2319               0 :         return false;
    2320                 : 
    2321               0 :     return true;
    2322                 : }
    2323                 : 
    2324                 : /*
    2325                 :  * Garbage-collect orphaned query texts in external file.
    2326                 :  *
    2327                 :  * This won't be called often in the typical case, since it's likely that
    2328                 :  * there won't be too much churn, and besides, a similar compaction process
    2329                 :  * occurs when serializing to disk at shutdown or as part of resetting.
    2330                 :  * Despite this, it seems prudent to plan for the edge case where the file
    2331                 :  * becomes unreasonably large, with no other method of compaction likely to
    2332                 :  * occur in the foreseeable future.
    2333                 :  *
    2334                 :  * The caller must hold an exclusive lock on pgss->lock.
    2335                 :  *
    2336 EUB             :  * At the first sign of trouble we unlink the query text file to get a clean
    2337                 :  * slate (although existing statistics are retained), rather than risk
    2338                 :  * thrashing by allowing the same problem case to recur indefinitely.
    2339                 :  */
    2340                 : static void
    2341 UIC           0 : gc_qtexts(void)
    2342                 : {
    2343                 :     char       *qbuffer;
    2344                 :     Size        qbuffer_size;
    2345               0 :     FILE       *qfile = NULL;
    2346                 :     HASH_SEQ_STATUS hash_seq;
    2347                 :     pgssEntry  *entry;
    2348                 :     Size        extent;
    2349                 :     int         nentries;
    2350                 : 
    2351 EUB             :     /*
    2352                 :      * When called from pgss_store, some other session might have proceeded
    2353                 :      * with garbage collection in the no-lock-held interim of lock strength
    2354                 :      * escalation.  Check once more that this is actually necessary.
    2355                 :      */
    2356 UIC           0 :     if (!need_gc_qtexts())
    2357               0 :         return;
    2358                 : 
    2359                 :     /*
    2360                 :      * Load the old texts file.  If we fail (out of memory, for instance),
    2361 EUB             :      * invalidate query texts.  Hopefully this is rare.  It might seem better
    2362                 :      * to leave things alone on an OOM failure, but the problem is that the
    2363                 :      * file is only going to get bigger; hoping for a future non-OOM result is
    2364                 :      * risky and can easily lead to complete denial of service.
    2365                 :      */
    2366 UIC           0 :     qbuffer = qtext_load_file(&qbuffer_size);
    2367               0 :     if (qbuffer == NULL)
    2368               0 :         goto gc_fail;
    2369                 : 
    2370                 :     /*
    2371 EUB             :      * We overwrite the query texts file in place, so as to reduce the risk of
    2372                 :      * an out-of-disk-space failure.  Since the file is guaranteed not to get
    2373                 :      * larger, this should always work on traditional filesystems; though we
    2374                 :      * could still lose on copy-on-write filesystems.
    2375                 :      */
    2376 UIC           0 :     qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
    2377               0 :     if (qfile == NULL)
    2378 EUB             :     {
    2379 UIC           0 :         ereport(LOG,
    2380                 :                 (errcode_for_file_access(),
    2381 EUB             :                  errmsg("could not write file \"%s\": %m",
    2382                 :                         PGSS_TEXT_FILE)));
    2383 UIC           0 :         goto gc_fail;
    2384 EUB             :     }
    2385                 : 
    2386 UIC           0 :     extent = 0;
    2387 UBC           0 :     nentries = 0;
    2388 EUB             : 
    2389 UIC           0 :     hash_seq_init(&hash_seq, pgss_hash);
    2390               0 :     while ((entry = hash_seq_search(&hash_seq)) != NULL)
    2391                 :     {
    2392               0 :         int         query_len = entry->query_len;
    2393 UBC           0 :         char       *qry = qtext_fetch(entry->query_offset,
    2394                 :                                       query_len,
    2395                 :                                       qbuffer,
    2396 EUB             :                                       qbuffer_size);
    2397                 : 
    2398 UIC           0 :         if (qry == NULL)
    2399 EUB             :         {
    2400                 :             /* Trouble ... drop the text */
    2401 UIC           0 :             entry->query_offset = 0;
    2402 UBC           0 :             entry->query_len = -1;
    2403                 :             /* entry will not be counted in mean query length computation */
    2404               0 :             continue;
    2405                 :         }
    2406                 : 
    2407 UIC           0 :         if (fwrite(qry, 1, query_len + 1, qfile) != query_len + 1)
    2408 EUB             :         {
    2409 UBC           0 :             ereport(LOG,
    2410                 :                     (errcode_for_file_access(),
    2411                 :                      errmsg("could not write file \"%s\": %m",
    2412 EUB             :                             PGSS_TEXT_FILE)));
    2413 UBC           0 :             hash_seq_term(&hash_seq);
    2414               0 :             goto gc_fail;
    2415                 :         }
    2416                 : 
    2417 UIC           0 :         entry->query_offset = extent;
    2418               0 :         extent += query_len + 1;
    2419               0 :         nentries++;
    2420                 :     }
    2421 EUB             : 
    2422                 :     /*
    2423                 :      * Truncate away any now-unused space.  If this fails for some odd reason,
    2424                 :      * we log it, but there's no need to fail.
    2425                 :      */
    2426 UIC           0 :     if (ftruncate(fileno(qfile), extent) != 0)
    2427 UBC           0 :         ereport(LOG,
    2428                 :                 (errcode_for_file_access(),
    2429 EUB             :                  errmsg("could not truncate file \"%s\": %m",
    2430                 :                         PGSS_TEXT_FILE)));
    2431                 : 
    2432 UIC           0 :     if (FreeFile(qfile))
    2433 EUB             :     {
    2434 UBC           0 :         ereport(LOG,
    2435                 :                 (errcode_for_file_access(),
    2436                 :                  errmsg("could not write file \"%s\": %m",
    2437 EUB             :                         PGSS_TEXT_FILE)));
    2438 UIC           0 :         qfile = NULL;
    2439               0 :         goto gc_fail;
    2440                 :     }
    2441 EUB             : 
    2442 UIC           0 :     elog(DEBUG1, "pgss gc of queries file shrunk size from %zu to %zu",
    2443                 :          pgss->extent, extent);
    2444                 : 
    2445                 :     /* Reset the shared extent pointer */
    2446               0 :     pgss->extent = extent;
    2447 EUB             : 
    2448                 :     /*
    2449                 :      * Also update the mean query length, to be sure that need_gc_qtexts()
    2450                 :      * won't still think we have a problem.
    2451                 :      */
    2452 UBC           0 :     if (nentries > 0)
    2453 UIC           0 :         pgss->mean_query_len = extent / nentries;
    2454                 :     else
    2455               0 :         pgss->mean_query_len = ASSUMED_LENGTH_INIT;
    2456                 : 
    2457               0 :     free(qbuffer);
    2458                 : 
    2459                 :     /*
    2460                 :      * OK, count a garbage collection cycle.  (Note: even though we have
    2461 EUB             :      * exclusive lock on pgss->lock, we must take pgss->mutex for this, since
    2462                 :      * other processes may examine gc_count while holding only the mutex.
    2463                 :      * Also, we have to advance the count *after* we've rewritten the file,
    2464                 :      * else other processes might not realize they read a stale file.)
    2465                 :      */
    2466 UIC           0 :     record_gc_qtexts();
    2467 EUB             : 
    2468 UBC           0 :     return;
    2469 EUB             : 
    2470 UIC           0 : gc_fail:
    2471                 :     /* clean up resources */
    2472               0 :     if (qfile)
    2473               0 :         FreeFile(qfile);
    2474 UNC           0 :     free(qbuffer);
    2475 EUB             : 
    2476                 :     /*
    2477                 :      * Since the contents of the external file are now uncertain, mark all
    2478                 :      * hashtable entries as having invalid texts.
    2479                 :      */
    2480 UIC           0 :     hash_seq_init(&hash_seq, pgss_hash);
    2481               0 :     while ((entry = hash_seq_search(&hash_seq)) != NULL)
    2482                 :     {
    2483               0 :         entry->query_offset = 0;
    2484 UBC           0 :         entry->query_len = -1;
    2485 EUB             :     }
    2486                 : 
    2487                 :     /*
    2488                 :      * Destroy the query text file and create a new, empty one
    2489                 :      */
    2490 UIC           0 :     (void) unlink(PGSS_TEXT_FILE);
    2491               0 :     qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
    2492 UBC           0 :     if (qfile == NULL)
    2493 UIC           0 :         ereport(LOG,
    2494                 :                 (errcode_for_file_access(),
    2495 EUB             :                  errmsg("could not recreate file \"%s\": %m",
    2496                 :                         PGSS_TEXT_FILE)));
    2497                 :     else
    2498 UBC           0 :         FreeFile(qfile);
    2499                 : 
    2500                 :     /* Reset the shared extent pointer */
    2501 UIC           0 :     pgss->extent = 0;
    2502                 : 
    2503                 :     /* Reset mean_query_len to match the new state */
    2504               0 :     pgss->mean_query_len = ASSUMED_LENGTH_INIT;
    2505                 : 
    2506                 :     /*
    2507                 :      * Bump the GC count even though we failed.
    2508                 :      *
    2509                 :      * This is needed to make concurrent readers of file without any lock on
    2510                 :      * pgss->lock notice existence of new version of file.  Once readers
    2511 EUB             :      * subsequently observe a change in GC count with pgss->lock held, that
    2512                 :      * forces a safe reopen of file.  Writers also require that we bump here,
    2513                 :      * of course.  (As required by locking protocol, readers and writers don't
    2514                 :      * trust earlier file contents until gc_count is found unchanged after
    2515                 :      * pgss->lock acquired in shared or exclusive mode respectively.)
    2516                 :      */
    2517 UIC           0 :     record_gc_qtexts();
    2518 ECB             : }
    2519                 : 
    2520                 : /*
    2521                 :  * Release entries corresponding to parameters passed.
    2522                 :  */
    2523                 : static void
    2524 CBC          41 : entry_reset(Oid userid, Oid dbid, uint64 queryid)
    2525                 : {
    2526                 :     HASH_SEQ_STATUS hash_seq;
    2527 ECB             :     pgssEntry  *entry;
    2528 EUB             :     FILE       *qfile;
    2529                 :     long        num_entries;
    2530 GIC          41 :     long        num_remove = 0;
    2531                 :     pgssHashKey key;
    2532 ECB             : 
    2533 CBC          41 :     if (!pgss || !pgss_hash)
    2534 UIC           0 :         ereport(ERROR,
    2535 ECB             :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
    2536                 :                  errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
    2537                 : 
    2538 CBC          41 :     LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
    2539              41 :     num_entries = hash_get_num_entries(pgss_hash);
    2540 ECB             : 
    2541 CBC          41 :     if (userid != 0 && dbid != 0 && queryid != UINT64CONST(0))
    2542                 :     {
    2543                 :         /* If all the parameters are available, use the fast path. */
    2544               1 :         memset(&key, 0, sizeof(pgssHashKey));
    2545               1 :         key.userid = userid;
    2546               1 :         key.dbid = dbid;
    2547 GBC           1 :         key.queryid = queryid;
    2548                 : 
    2549                 :         /* Remove the key if it exists, starting with the top-level entry  */
    2550 CBC           1 :         key.toplevel = false;
    2551 GIC           1 :         entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_REMOVE, NULL);
    2552               1 :         if (entry)              /* found */
    2553 LBC           0 :             num_remove++;
    2554 ECB             : 
    2555                 :         /* Also remove entries for top level statements */
    2556 GIC           1 :         key.toplevel = true;
    2557 ECB             : 
    2558                 :         /* Remove the key if exists */
    2559 GIC           1 :         entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_REMOVE, NULL);
    2560 CBC           1 :         if (entry)              /* found */
    2561               1 :             num_remove++;
    2562                 :     }
    2563              40 :     else if (userid != 0 || dbid != 0 || queryid != UINT64CONST(0))
    2564 ECB             :     {
    2565                 :         /* Remove entries corresponding to valid parameters. */
    2566 GIC           3 :         hash_seq_init(&hash_seq, pgss_hash);
    2567 CBC          39 :         while ((entry = hash_seq_search(&hash_seq)) != NULL)
    2568 ECB             :         {
    2569 GIC          36 :             if ((!userid || entry->key.userid == userid) &&
    2570              26 :                 (!dbid || entry->key.dbid == dbid) &&
    2571              24 :                 (!queryid || entry->key.queryid == queryid))
    2572                 :             {
    2573               4 :                 hash_search(pgss_hash, &entry->key, HASH_REMOVE, NULL);
    2574               4 :                 num_remove++;
    2575 ECB             :             }
    2576                 :         }
    2577                 :     }
    2578                 :     else
    2579                 :     {
    2580                 :         /* Remove all entries. */
    2581 GIC          37 :         hash_seq_init(&hash_seq, pgss_hash);
    2582             336 :         while ((entry = hash_seq_search(&hash_seq)) != NULL)
    2583                 :         {
    2584 CBC         299 :             hash_search(pgss_hash, &entry->key, HASH_REMOVE, NULL);
    2585             299 :             num_remove++;
    2586                 :         }
    2587                 :     }
    2588                 : 
    2589                 :     /* All entries are removed? */
    2590 GIC          41 :     if (num_entries != num_remove)
    2591               4 :         goto release_lock;
    2592 ECB             : 
    2593                 :     /*
    2594                 :      * Reset global statistics for pg_stat_statements since all entries are
    2595                 :      * removed.
    2596                 :      */
    2597                 :     {
    2598 CBC          37 :         volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
    2599 GIC          37 :         TimestampTz stats_reset = GetCurrentTimestamp();
    2600                 : 
    2601              37 :         SpinLockAcquire(&s->mutex);
    2602              37 :         s->stats.dealloc = 0;
    2603              37 :         s->stats.stats_reset = stats_reset;
    2604              37 :         SpinLockRelease(&s->mutex);
    2605 ECB             :     }
    2606                 : 
    2607                 :     /*
    2608 EUB             :      * Write new empty query file, perhaps even creating a new one to recover
    2609                 :      * if the file was missing.
    2610                 :      */
    2611 GIC          37 :     qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
    2612 GBC          37 :     if (qfile == NULL)
    2613                 :     {
    2614 UIC           0 :         ereport(LOG,
    2615                 :                 (errcode_for_file_access(),
    2616 ECB             :                  errmsg("could not create file \"%s\": %m",
    2617 EUB             :                         PGSS_TEXT_FILE)));
    2618 UIC           0 :         goto done;
    2619                 :     }
    2620                 : 
    2621                 :     /* If ftruncate fails, log it, but it's not a fatal problem */
    2622 CBC          37 :     if (ftruncate(fileno(qfile), 0) != 0)
    2623 UIC           0 :         ereport(LOG,
    2624 ECB             :                 (errcode_for_file_access(),
    2625                 :                  errmsg("could not truncate file \"%s\": %m",
    2626                 :                         PGSS_TEXT_FILE)));
    2627                 : 
    2628 GIC          37 :     FreeFile(qfile);
    2629 ECB             : 
    2630 CBC          37 : done:
    2631              37 :     pgss->extent = 0;
    2632                 :     /* This counts as a query text garbage collection for our purposes */
    2633 GIC          37 :     record_gc_qtexts();
    2634                 : 
    2635              41 : release_lock:
    2636              41 :     LWLockRelease(pgss->lock);
    2637              41 : }
    2638                 : 
    2639                 : /*
    2640                 :  * Generate a normalized version of the query string that will be used to
    2641                 :  * represent all similar queries.
    2642                 :  *
    2643                 :  * Note that the normalized representation may well vary depending on
    2644                 :  * just which "equivalent" query is used to create the hashtable entry.
    2645                 :  * We assume this is OK.
    2646                 :  *
    2647                 :  * If query_loc > 0, then "query" has been advanced by that much compared to
    2648                 :  * the original string start, so we need to translate the provided locations
    2649                 :  * to compensate.  (This lets us avoid re-scanning statements before the one
    2650                 :  * of interest, so it's worth doing.)
    2651                 :  *
    2652                 :  * *query_len_p contains the input string length, and is updated with
    2653 ECB             :  * the result string length on exit.  The resulting string might be longer
    2654                 :  * or shorter depending on what happens with replacement of constants.
    2655                 :  *
    2656                 :  * Returns a palloc'd string.
    2657                 :  */
    2658                 : static char *
    2659 GIC        8841 : generate_normalized_query(JumbleState *jstate, const char *query,
    2660                 :                           int query_loc, int *query_len_p)
    2661 ECB             : {
    2662                 :     char       *norm_query;
    2663 CBC        8841 :     int         query_len = *query_len_p;
    2664 ECB             :     int         i,
    2665                 :                 norm_query_buflen,  /* Space allowed for norm_query */
    2666                 :                 len_to_wrt,     /* Length (in bytes) to write */
    2667 GIC        8841 :                 quer_loc = 0,   /* Source query byte location */
    2668            8841 :                 n_quer_loc = 0, /* Normalized query byte location */
    2669            8841 :                 last_off = 0,   /* Offset from start for previous tok */
    2670 CBC        8841 :                 last_tok_len = 0;   /* Length (in bytes) of that tok */
    2671                 : 
    2672                 :     /*
    2673                 :      * Get constants' lengths (core system only gives us locations).  Note
    2674                 :      * this also ensures the items are sorted by location.
    2675                 :      */
    2676 GIC        8841 :     fill_in_constant_lengths(jstate, query, query_loc);
    2677                 : 
    2678                 :     /*
    2679 ECB             :      * Allow for $n symbols to be longer than the constants they replace.
    2680                 :      * Constants must take at least one byte in text form, while a $n symbol
    2681                 :      * certainly isn't more than 11 bytes, even if n reaches INT_MAX.  We
    2682                 :      * could refine that limit based on the max value of n for the current
    2683                 :      * query, but it hardly seems worth any extra effort to do so.
    2684                 :      */
    2685 GIC        8841 :     norm_query_buflen = query_len + jstate->clocations_count * 10;
    2686                 : 
    2687                 :     /* Allocate result buffer */
    2688            8841 :     norm_query = palloc(norm_query_buflen + 1);
    2689 ECB             : 
    2690 GIC       36766 :     for (i = 0; i < jstate->clocations_count; i++)
    2691 ECB             :     {
    2692                 :         int         off,        /* Offset from start for cur tok */
    2693                 :                     tok_len;    /* Length (in bytes) of that tok */
    2694                 : 
    2695 CBC       27925 :         off = jstate->clocations[i].location;
    2696 ECB             :         /* Adjust recorded location if we're dealing with partial string */
    2697 GIC       27925 :         off -= query_loc;
    2698                 : 
    2699 CBC       27925 :         tok_len = jstate->clocations[i].length;
    2700 ECB             : 
    2701 GIC       27925 :         if (tok_len < 0)
    2702 CBC         160 :             continue;           /* ignore any duplicates */
    2703 ECB             : 
    2704                 :         /* Copy next chunk (what precedes the next constant) */
    2705 GIC       27765 :         len_to_wrt = off - last_off;
    2706           27765 :         len_to_wrt -= last_tok_len;
    2707 ECB             : 
    2708 CBC       27765 :         Assert(len_to_wrt >= 0);
    2709 GIC       27765 :         memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
    2710 CBC       27765 :         n_quer_loc += len_to_wrt;
    2711 ECB             : 
    2712                 :         /* And insert a param symbol in place of the constant token */
    2713 GIC       55530 :         n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
    2714           27765 :                               i + 1 + jstate->highest_extern_param_id);
    2715                 : 
    2716           27765 :         quer_loc = off + tok_len;
    2717           27765 :         last_off = off;
    2718           27765 :         last_tok_len = tok_len;
    2719 ECB             :     }
    2720                 : 
    2721                 :     /*
    2722                 :      * We've copied up until the last ignorable constant.  Copy over the
    2723                 :      * remaining bytes of the original query string.
    2724                 :      */
    2725 CBC        8841 :     len_to_wrt = query_len - quer_loc;
    2726 ECB             : 
    2727 GIC        8841 :     Assert(len_to_wrt >= 0);
    2728 CBC        8841 :     memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
    2729            8841 :     n_quer_loc += len_to_wrt;
    2730                 : 
    2731 GIC        8841 :     Assert(n_quer_loc <= norm_query_buflen);
    2732            8841 :     norm_query[n_quer_loc] = '\0';
    2733                 : 
    2734            8841 :     *query_len_p = n_quer_loc;
    2735            8841 :     return norm_query;
    2736                 : }
    2737                 : 
    2738                 : /*
    2739                 :  * Given a valid SQL string and an array of constant-location records,
    2740                 :  * fill in the textual lengths of those constants.
    2741                 :  *
    2742                 :  * The constants may use any allowed constant syntax, such as float literals,
    2743                 :  * bit-strings, single-quoted strings and dollar-quoted strings.  This is
    2744                 :  * accomplished by using the public API for the core scanner.
    2745                 :  *
    2746                 :  * It is the caller's job to ensure that the string is a valid SQL statement
    2747                 :  * with constants at the indicated locations.  Since in practice the string
    2748                 :  * has already been parsed, and the locations that the caller provides will
    2749                 :  * have originated from within the authoritative parser, this should not be
    2750                 :  * a problem.
    2751                 :  *
    2752                 :  * Duplicate constant pointers are possible, and will have their lengths
    2753                 :  * marked as '-1', so that they are later ignored.  (Actually, we assume the
    2754                 :  * lengths were initialized as -1 to start with, and don't change them here.)
    2755                 :  *
    2756                 :  * If query_loc > 0, then "query" has been advanced by that much compared to
    2757                 :  * the original string start, so we need to translate the provided locations
    2758                 :  * to compensate.  (This lets us avoid re-scanning statements before the one
    2759                 :  * of interest, so it's worth doing.)
    2760 ECB             :  *
    2761                 :  * N.B. There is an assumption that a '-' character at a Const location begins
    2762                 :  * a negative numeric constant.  This precludes there ever being another
    2763                 :  * reason for a constant to start with a '-'.
    2764                 :  */
    2765                 : static void
    2766 GIC        8841 : fill_in_constant_lengths(JumbleState *jstate, const char *query,
    2767                 :                          int query_loc)
    2768 ECB             : {
    2769                 :     LocationLen *locs;
    2770                 :     core_yyscan_t yyscanner;
    2771                 :     core_yy_extra_type yyextra;
    2772                 :     core_YYSTYPE yylval;
    2773                 :     YYLTYPE     yylloc;
    2774 GIC        8841 :     int         last_loc = -1;
    2775 ECB             :     int         i;
    2776                 : 
    2777                 :     /*
    2778                 :      * Sort the records by location so that we can process them in order while
    2779                 :      * scanning the query text.
    2780                 :      */
    2781 CBC        8841 :     if (jstate->clocations_count > 1)
    2782 GIC        5774 :         qsort(jstate->clocations, jstate->clocations_count,
    2783                 :               sizeof(LocationLen), comp_location);
    2784            8841 :     locs = jstate->clocations;
    2785                 : 
    2786                 :     /* initialize the flex scanner --- should match raw_parser() */
    2787 CBC        8841 :     yyscanner = scanner_init(query,
    2788                 :                              &yyextra,
    2789                 :                              &ScanKeywords,
    2790 ECB             :                              ScanKeywordTokens);
    2791                 : 
    2792                 :     /* we don't want to re-emit any escape string warnings */
    2793 GIC        8841 :     yyextra.escape_string_warning = false;
    2794                 : 
    2795                 :     /* Search for each constant, in sequence */
    2796 CBC       36766 :     for (i = 0; i < jstate->clocations_count; i++)
    2797                 :     {
    2798           27925 :         int         loc = locs[i].location;
    2799                 :         int         tok;
    2800 ECB             : 
    2801                 :         /* Adjust recorded location if we're dealing with partial string */
    2802 GIC       27925 :         loc -= query_loc;
    2803                 : 
    2804           27925 :         Assert(loc >= 0);
    2805                 : 
    2806 CBC       27925 :         if (loc <= last_loc)
    2807 GIC         160 :             continue;           /* Duplicate constant, ignore */
    2808                 : 
    2809 ECB             :         /* Lex tokens until we find the desired constant */
    2810 EUB             :         for (;;)
    2811                 :         {
    2812 GIC      207745 :             tok = core_yylex(&yylval, &yylloc, yyscanner);
    2813                 : 
    2814                 :             /* We should not hit end-of-string, but if we do, behave sanely */
    2815          207745 :             if (tok == 0)
    2816 LBC           0 :                 break;          /* out of inner for-loop */
    2817                 : 
    2818 ECB             :             /*
    2819                 :              * We should find the token position exactly, but if we somehow
    2820                 :              * run past it, work with that.
    2821                 :              */
    2822 GIC      207745 :             if (yylloc >= loc)
    2823                 :             {
    2824           27765 :                 if (query[loc] == '-')
    2825                 :                 {
    2826                 :                     /*
    2827                 :                      * It's a negative value - this is the one and only case
    2828                 :                      * where we replace more than a single token.
    2829                 :                      *
    2830                 :                      * Do not compensate for the core system's special-case
    2831                 :                      * adjustment of location to that of the leading '-'
    2832 ECB             :                      * operator in the event of a negative constant.  It is
    2833                 :                      * also useful for our purposes to start from the minus
    2834 EUB             :                      * symbol.  In this way, queries like "select * from foo
    2835                 :                      * where bar = 1" and "select * from foo where bar = -2"
    2836                 :                      * will have identical normalized query strings.
    2837                 :                      */
    2838 GIC         359 :                     tok = core_yylex(&yylval, &yylloc, yyscanner);
    2839             359 :                     if (tok == 0)
    2840 UIC           0 :                         break;  /* out of inner for-loop */
    2841 ECB             :                 }
    2842                 : 
    2843                 :                 /*
    2844                 :                  * We now rely on the assumption that flex has placed a zero
    2845                 :                  * byte after the text of the current token in scanbuf.
    2846                 :                  */
    2847 CBC       27765 :                 locs[i].length = strlen(yyextra.scanbuf + loc);
    2848 GBC       27765 :                 break;          /* out of inner for-loop */
    2849                 :             }
    2850 ECB             :         }
    2851                 : 
    2852                 :         /* If we hit end-of-string, give up, leaving remaining lengths -1 */
    2853 CBC       27765 :         if (tok == 0)
    2854 LBC           0 :             break;
    2855                 : 
    2856 GIC       27765 :         last_loc = loc;
    2857                 :     }
    2858                 : 
    2859            8841 :     scanner_finish(yyscanner);
    2860 CBC        8841 : }
    2861                 : 
    2862 ECB             : /*
    2863                 :  * comp_location: comparator for qsorting LocationLen structs by location
    2864                 :  */
    2865                 : static int
    2866 CBC       33062 : comp_location(const void *a, const void *b)
    2867 ECB             : {
    2868 CBC       33062 :     int         l = ((const LocationLen *) a)->location;
    2869 GIC       33062 :     int         r = ((const LocationLen *) b)->location;
    2870 ECB             : 
    2871 GIC       33062 :     if (l < r)
    2872           22541 :         return -1;
    2873           10521 :     else if (l > r)
    2874           10354 :         return +1;
    2875                 :     else
    2876             167 :         return 0;
    2877                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a