Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * pg_buffercache_pages.c
4 : * display some contents of the buffer cache
5 : *
6 : * contrib/pg_buffercache/pg_buffercache_pages.c
7 : *-------------------------------------------------------------------------
8 : */
9 : #include "postgres.h"
10 :
11 : #include "access/htup_details.h"
12 : #include "catalog/pg_type.h"
13 : #include "funcapi.h"
14 : #include "storage/buf_internals.h"
15 : #include "storage/bufmgr.h"
16 :
17 :
18 : #define NUM_BUFFERCACHE_PAGES_MIN_ELEM 8
19 : #define NUM_BUFFERCACHE_PAGES_ELEM 9
20 : #define NUM_BUFFERCACHE_SUMMARY_ELEM 5
21 : #define NUM_BUFFERCACHE_USAGE_COUNTS_ELEM 4
22 :
6158 tgl 23 GIC 1 : PG_MODULE_MAGIC;
24 :
6602 neilc 25 ECB : /*
26 : * Record structure holding the to be exposed cache data.
27 : */
28 : typedef struct
29 : {
30 : uint32 bufferid;
31 : RelFileNumber relfilenumber;
32 : Oid reltablespace;
33 : Oid reldatabase;
34 : ForkNumber forknum;
35 : BlockNumber blocknum;
36 : bool isvalid;
37 : bool isdirty;
38 : uint16 usagecount;
39 :
40 : /*
41 : * An int32 is sufficiently large, as MAX_BACKENDS prevents a buffer from
42 : * being pinned by too many backends and each backend will only pin once
43 : * because of bufmgr.c's PrivateRefCount infrastructure.
44 : */
45 : int32 pinning_backends;
46 : } BufferCachePagesRec;
47 :
48 :
49 : /*
50 : * Function context for data persisting over repeated calls.
51 : */
52 : typedef struct
53 : {
54 : TupleDesc tupdesc;
55 : BufferCachePagesRec *record;
56 : } BufferCachePagesContext;
57 :
58 :
59 : /*
60 : * Function returning data from the shared buffer cache - buffer number,
61 : * relation node/tablespace/database/blocknum and dirty indicator.
62 : */
6602 neilc 63 GIC 2 : PG_FUNCTION_INFO_V1(pg_buffercache_pages);
178 andres 64 GNC 2 : PG_FUNCTION_INFO_V1(pg_buffercache_summary);
2 tgl 65 2 : PG_FUNCTION_INFO_V1(pg_buffercache_usage_counts);
66 :
193 rhaas 67 ECB : Datum
193 rhaas 68 CBC 32770 : pg_buffercache_pages(PG_FUNCTION_ARGS)
6602 neilc 69 ECB : {
70 : FuncCallContext *funcctx;
71 : Datum result;
6385 bruce 72 : MemoryContext oldcontext;
73 : BufferCachePagesContext *fctx; /* User function context. */
74 : TupleDesc tupledesc;
75 : TupleDesc expected_tupledesc;
76 : HeapTuple tuple;
77 :
6602 neilc 78 GIC 32770 : if (SRF_IS_FIRSTCALL())
79 : {
80 : int i;
81 :
6602 neilc 82 CBC 2 : funcctx = SRF_FIRSTCALL_INIT();
83 :
84 : /* Switch context when allocating stuff to be used in later calls */
6602 neilc 85 GIC 2 : oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
6385 bruce 86 ECB :
87 : /* Create a user function context for cross-call persistence */
6013 tgl 88 GIC 2 : fctx = (BufferCachePagesContext *) palloc(sizeof(BufferCachePagesContext));
6013 tgl 89 ECB :
90 : /*
91 : * To smoothly support upgrades from version 1.0 of this extension
3152 andres 92 : * transparently handle the (non-)existence of the pinning_backends
93 : * column. We unfortunately have to get the result type for that... -
94 : * we can't use the result type determined by the function definition
95 : * without potentially crashing when somebody uses the old (or even
96 : * wrong) function definition though.
97 : */
3152 andres 98 GIC 2 : if (get_call_result_type(fcinfo, NULL, &expected_tupledesc) != TYPEFUNC_COMPOSITE)
3152 andres 99 UIC 0 : elog(ERROR, "return type must be a row type");
100 :
3152 andres 101 GIC 2 : if (expected_tupledesc->natts < NUM_BUFFERCACHE_PAGES_MIN_ELEM ||
3152 andres 102 CBC 2 : expected_tupledesc->natts > NUM_BUFFERCACHE_PAGES_ELEM)
3152 andres 103 UBC 0 : elog(ERROR, "incorrect number of output arguments");
104 :
6013 tgl 105 ECB : /* Construct a tuple descriptor for the result rows. */
1601 andres 106 CBC 2 : tupledesc = CreateTemplateTupleDesc(expected_tupledesc->natts);
6602 neilc 107 GBC 2 : TupleDescInitEntry(tupledesc, (AttrNumber) 1, "bufferid",
108 : INT4OID, -1, 0);
6602 neilc 109 GIC 2 : TupleDescInitEntry(tupledesc, (AttrNumber) 2, "relfilenode",
193 rhaas 110 ECB : OIDOID, -1, 0);
6602 neilc 111 CBC 2 : TupleDescInitEntry(tupledesc, (AttrNumber) 3, "reltablespace",
112 : OIDOID, -1, 0);
113 2 : TupleDescInitEntry(tupledesc, (AttrNumber) 4, "reldatabase",
114 : OIDOID, -1, 0);
5351 heikki.linnakangas 115 2 : TupleDescInitEntry(tupledesc, (AttrNumber) 5, "relforknumber",
116 : INT2OID, -1, 0);
117 2 : TupleDescInitEntry(tupledesc, (AttrNumber) 6, "relblocknumber",
118 : INT8OID, -1, 0);
119 2 : TupleDescInitEntry(tupledesc, (AttrNumber) 7, "isdirty",
120 : BOOLOID, -1, 0);
121 2 : TupleDescInitEntry(tupledesc, (AttrNumber) 8, "usage_count",
122 : INT2OID, -1, 0);
6602 neilc 123 ECB :
3152 andres 124 GIC 2 : if (expected_tupledesc->natts == NUM_BUFFERCACHE_PAGES_ELEM)
3152 andres 125 CBC 2 : TupleDescInitEntry(tupledesc, (AttrNumber) 9, "pinning_backends",
126 : INT4OID, -1, 0);
127 :
6013 tgl 128 2 : fctx->tupdesc = BlessTupleDesc(tupledesc);
6602 neilc 129 ECB :
130 : /* Allocate NBuffers worth of BufferCachePagesRec records. */
2397 rhaas 131 GIC 2 : fctx->record = (BufferCachePagesRec *)
2397 rhaas 132 CBC 2 : MemoryContextAllocHuge(CurrentMemoryContext,
133 : sizeof(BufferCachePagesRec) * NBuffers);
134 :
6013 tgl 135 ECB : /* Set max calls and remember the user function context. */
6013 tgl 136 CBC 2 : funcctx->max_calls = NBuffers;
6013 tgl 137 GIC 2 : funcctx->user_fctx = fctx;
138 :
139 : /* Return to original context when allocating transient memory */
6602 neilc 140 CBC 2 : MemoryContextSwitchTo(oldcontext);
6602 neilc 141 ECB :
142 : /*
143 : * Scan through all the buffers, saving the relevant fields in the
6104 tgl 144 : * fctx->record structure.
145 : *
146 : * We don't hold the partition locks, so we don't get a consistent
147 : * snapshot across all buffers, but we do grab the buffer header
148 : * locks, so the information of each buffer is self-consistent.
149 : */
2992 andres 150 GIC 32770 : for (i = 0; i < NBuffers; i++)
151 : {
152 : BufferDesc *bufHdr;
153 : uint32 buf_state;
2992 andres 154 ECB :
2992 andres 155 GIC 32768 : bufHdr = GetBufferDescriptor(i);
156 : /* Lock each buffer header before inspecting. */
2555 157 32768 : buf_state = LockBufHdr(bufHdr);
158 :
6602 neilc 159 CBC 32768 : fctx->record[i].bufferid = BufferDescriptorGetBuffer(bufHdr);
228 rhaas 160 GNC 32768 : fctx->record[i].relfilenumber = BufTagGetRelNumber(&bufHdr->tag);
161 32768 : fctx->record[i].reltablespace = bufHdr->tag.spcOid;
162 32768 : fctx->record[i].reldatabase = bufHdr->tag.dbOid;
163 32768 : fctx->record[i].forknum = BufTagGetForkNum(&bufHdr->tag);
6602 neilc 164 CBC 32768 : fctx->record[i].blocknum = bufHdr->tag.blockNum;
2555 andres 165 32768 : fctx->record[i].usagecount = BUF_STATE_GET_USAGECOUNT(buf_state);
166 32768 : fctx->record[i].pinning_backends = BUF_STATE_GET_REFCOUNT(buf_state);
6602 neilc 167 ECB :
2555 andres 168 CBC 32768 : if (buf_state & BM_DIRTY)
6602 neilc 169 1884 : fctx->record[i].isdirty = true;
6602 neilc 170 ECB : else
6602 neilc 171 GIC 30884 : fctx->record[i].isdirty = false;
6602 neilc 172 ECB :
173 : /* Note if the buffer is valid, and has storage created */
2555 andres 174 GIC 32768 : if ((buf_state & BM_VALID) && (buf_state & BM_TAG_VALID))
6602 neilc 175 CBC 3646 : fctx->record[i].isvalid = true;
176 : else
6602 neilc 177 GIC 29122 : fctx->record[i].isvalid = false;
6602 neilc 178 ECB :
2555 andres 179 CBC 32768 : UnlockBufHdr(bufHdr, buf_state);
180 : }
6602 neilc 181 ECB : }
182 :
6602 neilc 183 CBC 32770 : funcctx = SRF_PERCALL_SETUP();
184 :
185 : /* Get the saved state */
6602 neilc 186 GIC 32770 : fctx = funcctx->user_fctx;
6602 neilc 187 ECB :
6602 neilc 188 GIC 32770 : if (funcctx->call_cntr < funcctx->max_calls)
189 : {
6385 bruce 190 CBC 32768 : uint32 i = funcctx->call_cntr;
191 : Datum values[NUM_BUFFERCACHE_PAGES_ELEM];
6013 tgl 192 ECB : bool nulls[NUM_BUFFERCACHE_PAGES_ELEM];
193 :
6013 tgl 194 CBC 32768 : values[0] = Int32GetDatum(fctx->record[i].bufferid);
6013 tgl 195 GIC 32768 : nulls[0] = false;
196 :
197 : /*
6385 bruce 198 ECB : * Set all fields except the bufferid to null if the buffer is unused
199 : * or not valid.
200 : */
6602 neilc 201 GIC 32768 : if (fctx->record[i].blocknum == InvalidBlockNumber ||
6385 bruce 202 3646 : fctx->record[i].isvalid == false)
203 : {
6013 tgl 204 29122 : nulls[1] = true;
6013 tgl 205 CBC 29122 : nulls[2] = true;
206 29122 : nulls[3] = true;
6013 tgl 207 GIC 29122 : nulls[4] = true;
6013 tgl 208 CBC 29122 : nulls[5] = true;
5846 bruce 209 29122 : nulls[6] = true;
5351 heikki.linnakangas 210 29122 : nulls[7] = true;
3152 andres 211 ECB : /* unused for v1.0 callers, but the array is always long enough */
3152 andres 212 CBC 29122 : nulls[8] = true;
6602 neilc 213 ECB : }
214 : else
215 : {
193 rhaas 216 GNC 3646 : values[1] = ObjectIdGetDatum(fctx->record[i].relfilenumber);
6013 tgl 217 GIC 3646 : nulls[1] = false;
218 3646 : values[2] = ObjectIdGetDatum(fctx->record[i].reltablespace);
219 3646 : nulls[2] = false;
6013 tgl 220 CBC 3646 : values[3] = ObjectIdGetDatum(fctx->record[i].reldatabase);
221 3646 : nulls[3] = false;
5351 heikki.linnakangas 222 3646 : values[4] = ObjectIdGetDatum(fctx->record[i].forknum);
6013 tgl 223 3646 : nulls[4] = false;
5351 heikki.linnakangas 224 3646 : values[5] = Int64GetDatum((int64) fctx->record[i].blocknum);
6013 tgl 225 3646 : nulls[5] = false;
5351 heikki.linnakangas 226 3646 : values[6] = BoolGetDatum(fctx->record[i].isdirty);
5846 bruce 227 3646 : nulls[6] = false;
5351 heikki.linnakangas 228 3646 : values[7] = Int16GetDatum(fctx->record[i].usagecount);
229 3646 : nulls[7] = false;
3152 andres 230 ECB : /* unused for v1.0 callers, but the array is always long enough */
3152 andres 231 CBC 3646 : values[8] = Int32GetDatum(fctx->record[i].pinning_backends);
232 3646 : nulls[8] = false;
6602 neilc 233 ECB : }
234 :
235 : /* Build and return the tuple. */
6013 tgl 236 CBC 32768 : tuple = heap_form_tuple(fctx->tupdesc, values, nulls);
6602 neilc 237 GIC 32768 : result = HeapTupleGetDatum(tuple);
238 :
239 32768 : SRF_RETURN_NEXT(funcctx, result);
6602 neilc 240 ECB : }
241 : else
6602 neilc 242 GIC 2 : SRF_RETURN_DONE(funcctx);
6602 neilc 243 ECB : }
244 :
245 : Datum
178 andres 246 GNC 2 : pg_buffercache_summary(PG_FUNCTION_ARGS)
247 : {
248 : Datum result;
249 : TupleDesc tupledesc;
250 : HeapTuple tuple;
251 : Datum values[NUM_BUFFERCACHE_SUMMARY_ELEM];
252 : bool nulls[NUM_BUFFERCACHE_SUMMARY_ELEM];
253 :
254 2 : int32 buffers_used = 0;
255 2 : int32 buffers_unused = 0;
256 2 : int32 buffers_dirty = 0;
257 2 : int32 buffers_pinned = 0;
258 2 : int64 usagecount_total = 0;
259 :
260 2 : if (get_call_result_type(fcinfo, NULL, &tupledesc) != TYPEFUNC_COMPOSITE)
178 andres 261 UNC 0 : elog(ERROR, "return type must be a row type");
262 :
178 andres 263 GNC 32770 : for (int i = 0; i < NBuffers; i++)
264 : {
265 : BufferDesc *bufHdr;
266 : uint32 buf_state;
267 :
268 : /*
269 : * This function summarizes the state of all headers. Locking the
270 : * buffer headers wouldn't provide an improved result as the state of
271 : * the buffer can still change after we release the lock and it'd
272 : * noticeably increase the cost of the function.
273 : */
274 32768 : bufHdr = GetBufferDescriptor(i);
275 32768 : buf_state = pg_atomic_read_u32(&bufHdr->state);
276 :
277 32768 : if (buf_state & BM_VALID)
278 : {
279 3646 : buffers_used++;
280 3646 : usagecount_total += BUF_STATE_GET_USAGECOUNT(buf_state);
281 :
282 3646 : if (buf_state & BM_DIRTY)
283 1884 : buffers_dirty++;
284 : }
285 : else
286 29122 : buffers_unused++;
287 :
288 32768 : if (BUF_STATE_GET_REFCOUNT(buf_state) > 0)
178 andres 289 UNC 0 : buffers_pinned++;
290 : }
291 :
178 andres 292 GNC 2 : memset(nulls, 0, sizeof(nulls));
293 2 : values[0] = Int32GetDatum(buffers_used);
294 2 : values[1] = Int32GetDatum(buffers_unused);
295 2 : values[2] = Int32GetDatum(buffers_dirty);
296 2 : values[3] = Int32GetDatum(buffers_pinned);
297 :
298 2 : if (buffers_used != 0)
299 2 : values[4] = Float8GetDatum((double) usagecount_total / buffers_used);
300 : else
178 andres 301 UNC 0 : nulls[4] = true;
302 :
303 : /* Build and return the tuple. */
178 andres 304 GNC 2 : tuple = heap_form_tuple(tupledesc, values, nulls);
305 2 : result = HeapTupleGetDatum(tuple);
306 :
307 2 : PG_RETURN_DATUM(result);
308 : }
309 :
310 : Datum
2 tgl 311 2 : pg_buffercache_usage_counts(PG_FUNCTION_ARGS)
312 : {
313 2 : ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
314 2 : int usage_counts[BM_MAX_USAGE_COUNT + 1] = {0};
315 2 : int dirty[BM_MAX_USAGE_COUNT + 1] = {0};
316 2 : int pinned[BM_MAX_USAGE_COUNT + 1] = {0};
317 : Datum values[NUM_BUFFERCACHE_USAGE_COUNTS_ELEM];
318 2 : bool nulls[NUM_BUFFERCACHE_USAGE_COUNTS_ELEM] = {0};
319 :
320 2 : InitMaterializedSRF(fcinfo, 0);
321 :
322 32770 : for (int i = 0; i < NBuffers; i++)
323 : {
324 32768 : BufferDesc *bufHdr = GetBufferDescriptor(i);
325 32768 : uint32 buf_state = pg_atomic_read_u32(&bufHdr->state);
326 : int usage_count;
327 :
328 32768 : usage_count = BUF_STATE_GET_USAGECOUNT(buf_state);
329 32768 : usage_counts[usage_count]++;
330 :
331 32768 : if (buf_state & BM_DIRTY)
332 1884 : dirty[usage_count]++;
333 :
334 32768 : if (BUF_STATE_GET_REFCOUNT(buf_state) > 0)
2 tgl 335 UNC 0 : pinned[usage_count]++;
336 : }
337 :
2 tgl 338 GNC 14 : for (int i = 0; i < BM_MAX_USAGE_COUNT + 1; i++)
339 : {
340 12 : values[0] = Int32GetDatum(i);
341 12 : values[1] = Int32GetDatum(usage_counts[i]);
342 12 : values[2] = Int32GetDatum(dirty[i]);
343 12 : values[3] = Int32GetDatum(pinned[i]);
344 :
345 12 : tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
346 : }
347 :
348 2 : return (Datum) 0;
349 : }
|