Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * dsa.c
4 : * Dynamic shared memory areas.
5 : *
6 : * This module provides dynamic shared memory areas which are built on top of
7 : * DSM segments. While dsm.c allows segments of memory of shared memory to be
8 : * created and shared between backends, it isn't designed to deal with small
9 : * objects. A DSA area is a shared memory heap usually backed by one or more
10 : * DSM segments which can allocate memory using dsa_allocate() and dsa_free().
11 : * Alternatively, it can be created in pre-existing shared memory, including a
12 : * DSM segment, and then create extra DSM segments as required. Unlike the
13 : * regular system heap, it deals in pseudo-pointers which must be converted to
14 : * backend-local pointers before they are dereferenced. These pseudo-pointers
15 : * can however be shared with other backends, and can be used to construct
16 : * shared data structures.
17 : *
18 : * Each DSA area manages a set of DSM segments, adding new segments as
19 : * required and detaching them when they are no longer needed. Each segment
20 : * contains a number of 4KB pages, a free page manager for tracking
21 : * consecutive runs of free pages, and a page map for tracking the source of
22 : * objects allocated on each page. Allocation requests above 8KB are handled
23 : * by choosing a segment and finding consecutive free pages in its free page
24 : * manager. Allocation requests for smaller sizes are handled using pools of
25 : * objects of a selection of sizes. Each pool consists of a number of 16 page
26 : * (64KB) superblocks allocated in the same way as large objects. Allocation
27 : * of large objects and new superblocks is serialized by a single LWLock, but
28 : * allocation of small objects from pre-existing superblocks uses one LWLock
29 : * per pool. Currently there is one pool, and therefore one lock, per size
30 : * class. Per-core pools to increase concurrency and strategies for reducing
31 : * the resulting fragmentation are areas for future research. Each superblock
32 : * is managed with a 'span', which tracks the superblock's freelist. Free
33 : * requests are handled by looking in the page map to find which span an
34 : * address was allocated from, so that small objects can be returned to the
35 : * appropriate free list, and large object pages can be returned directly to
36 : * the free page map. When allocating, simple heuristics for selecting
37 : * segments and superblocks try to encourage occupied memory to be
38 : * concentrated, increasing the likelihood that whole superblocks can become
39 : * empty and be returned to the free page manager, and whole segments can
40 : * become empty and be returned to the operating system.
41 : *
42 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
43 : * Portions Copyright (c) 1994, Regents of the University of California
44 : *
45 : * IDENTIFICATION
46 : * src/backend/utils/mmgr/dsa.c
47 : *
48 : *-------------------------------------------------------------------------
49 : */
50 :
51 : #include "postgres.h"
52 :
53 : #include "port/atomics.h"
54 : #include "port/pg_bitutils.h"
55 : #include "storage/dsm.h"
56 : #include "storage/ipc.h"
57 : #include "storage/lwlock.h"
58 : #include "storage/shmem.h"
59 : #include "utils/dsa.h"
60 : #include "utils/freepage.h"
61 : #include "utils/memutils.h"
62 :
63 : /*
64 : * The size of the initial DSM segment that backs a dsa_area created by
65 : * dsa_create. After creating some number of segments of this size we'll
66 : * double this size, and so on. Larger segments may be created if necessary
67 : * to satisfy large requests.
68 : */
69 : #define DSA_INITIAL_SEGMENT_SIZE ((size_t) (1 * 1024 * 1024))
70 :
71 : /*
72 : * How many segments to create before we double the segment size. If this is
73 : * low, then there is likely to be a lot of wasted space in the largest
74 : * segment. If it is high, then we risk running out of segment slots (see
75 : * dsm.c's limits on total number of segments), or limiting the total size
76 : * an area can manage when using small pointers.
77 : */
78 : #define DSA_NUM_SEGMENTS_AT_EACH_SIZE 2
79 :
80 : /*
81 : * The number of bits used to represent the offset part of a dsa_pointer.
82 : * This controls the maximum size of a segment, the maximum possible
83 : * allocation size and also the maximum number of segments per area.
84 : */
85 : #if SIZEOF_DSA_POINTER == 4
86 : #define DSA_OFFSET_WIDTH 27 /* 32 segments of size up to 128MB */
87 : #else
88 : #define DSA_OFFSET_WIDTH 40 /* 1024 segments of size up to 1TB */
89 : #endif
90 :
91 : /*
92 : * The maximum number of DSM segments that an area can own, determined by
93 : * the number of bits remaining (but capped at 1024).
94 : */
95 : #define DSA_MAX_SEGMENTS \
96 : Min(1024, (1 << ((SIZEOF_DSA_POINTER * 8) - DSA_OFFSET_WIDTH)))
97 :
98 : /* The bitmask for extracting the offset from a dsa_pointer. */
99 : #define DSA_OFFSET_BITMASK (((dsa_pointer) 1 << DSA_OFFSET_WIDTH) - 1)
100 :
101 : /* The maximum size of a DSM segment. */
102 : #define DSA_MAX_SEGMENT_SIZE ((size_t) 1 << DSA_OFFSET_WIDTH)
103 :
104 : /* Number of pages (see FPM_PAGE_SIZE) per regular superblock. */
105 : #define DSA_PAGES_PER_SUPERBLOCK 16
106 :
107 : /*
108 : * A magic number used as a sanity check for following DSM segments belonging
109 : * to a DSA area (this number will be XORed with the area handle and
110 : * the segment index).
111 : */
112 : #define DSA_SEGMENT_HEADER_MAGIC 0x0ce26608
113 :
114 : /* Build a dsa_pointer given a segment number and offset. */
115 : #define DSA_MAKE_POINTER(segment_number, offset) \
116 : (((dsa_pointer) (segment_number) << DSA_OFFSET_WIDTH) | (offset))
117 :
118 : /* Extract the segment number from a dsa_pointer. */
119 : #define DSA_EXTRACT_SEGMENT_NUMBER(dp) ((dp) >> DSA_OFFSET_WIDTH)
120 :
121 : /* Extract the offset from a dsa_pointer. */
122 : #define DSA_EXTRACT_OFFSET(dp) ((dp) & DSA_OFFSET_BITMASK)
123 :
124 : /* The type used for index segment indexes (zero based). */
125 : typedef size_t dsa_segment_index;
126 :
127 : /* Sentinel value for dsa_segment_index indicating 'none' or 'end'. */
128 : #define DSA_SEGMENT_INDEX_NONE (~(dsa_segment_index)0)
129 :
130 : /*
131 : * How many bins of segments do we have? The bins are used to categorize
132 : * segments by their largest contiguous run of free pages.
133 : */
134 : #define DSA_NUM_SEGMENT_BINS 16
135 :
136 : /*
137 : * What is the lowest bin that holds segments that *might* have n contiguous
138 : * free pages? There is no point in looking in segments in lower bins; they
139 : * definitely can't service a request for n free pages.
140 : */
141 : static inline size_t
261 tmunro 142 GNC 24346 : contiguous_pages_to_segment_bin(size_t n)
143 : {
144 : size_t bin;
145 :
146 24346 : if (n == 0)
147 646 : bin = 0;
148 : else
149 23700 : bin = pg_leftmost_one_pos_size_t(n) + 1;
150 :
151 24346 : return Min(bin, DSA_NUM_SEGMENT_BINS - 1);
152 : }
153 :
2319 rhaas 154 ECB : /* Macros for access to locks. */
155 : #define DSA_AREA_LOCK(area) (&area->control->lock)
156 : #define DSA_SCLASS_LOCK(area, sclass) (&area->control->pools[sclass].lock)
157 :
158 : /*
159 : * The header for an individual segment. This lives at the start of each DSM
160 : * segment owned by a DSA area including the first segment (where it appears
161 : * as part of the dsa_area_control struct).
162 : */
163 : typedef struct
164 : {
165 : /* Sanity check magic value. */
166 : uint32 magic;
167 : /* Total number of pages in this segment (excluding metadata area). */
168 : size_t usable_pages;
169 : /* Total size of this segment in bytes. */
170 : size_t size;
171 :
172 : /*
173 : * Index of the segment that precedes this one in the same segment bin, or
174 : * DSA_SEGMENT_INDEX_NONE if this is the first one.
175 : */
176 : dsa_segment_index prev;
177 :
178 : /*
179 : * Index of the segment that follows this one in the same segment bin, or
180 : * DSA_SEGMENT_INDEX_NONE if this is the last one.
181 : */
182 : dsa_segment_index next;
183 : /* The index of the bin that contains this segment. */
184 : size_t bin;
185 :
186 : /*
187 : * A flag raised to indicate that this segment is being returned to the
188 : * operating system and has been unpinned.
189 : */
190 : bool freed;
191 : } dsa_segment_header;
192 :
193 : /*
194 : * Metadata for one superblock.
195 : *
196 : * For most blocks, span objects are stored out-of-line; that is, the span
197 : * object is not stored within the block itself. But, as an exception, for a
198 : * "span of spans", the span object is stored "inline". The allocation is
199 : * always exactly one page, and the dsa_area_span object is located at
200 : * the beginning of that page. The size class is DSA_SCLASS_BLOCK_OF_SPANS,
201 : * and the remaining fields are used just as they would be in an ordinary
202 : * block. We can't allocate spans out of ordinary superblocks because
203 : * creating an ordinary superblock requires us to be able to allocate a span
204 : * *first*. Doing it this way avoids that circularity.
205 : */
206 : typedef struct
207 : {
208 : dsa_pointer pool; /* Containing pool. */
209 : dsa_pointer prevspan; /* Previous span. */
210 : dsa_pointer nextspan; /* Next span. */
211 : dsa_pointer start; /* Starting address. */
212 : size_t npages; /* Length of span in pages. */
213 : uint16 size_class; /* Size class. */
214 : uint16 ninitialized; /* Maximum number of objects ever allocated. */
215 : uint16 nallocatable; /* Number of objects currently allocatable. */
216 : uint16 firstfree; /* First object on free list. */
217 : uint16 nmax; /* Maximum number of objects ever possible. */
218 : uint16 fclass; /* Current fullness class. */
219 : } dsa_area_span;
220 :
221 : /*
222 : * Given a pointer to an object in a span, access the index of the next free
223 : * object in the same span (ie in the span's freelist) as an L-value.
224 : */
225 : #define NextFreeObjectIndex(object) (* (uint16 *) (object))
226 :
227 : /*
228 : * Small allocations are handled by dividing a single block of memory into
229 : * many small objects of equal size. The possible allocation sizes are
230 : * defined by the following array. Larger size classes are spaced more widely
231 : * than smaller size classes. We fudge the spacing for size classes >1kB to
232 : * avoid space wastage: based on the knowledge that we plan to allocate 64kB
233 : * blocks, we bump the maximum object size up to the largest multiple of
234 : * 8 bytes that still lets us fit the same number of objects into one block.
235 : *
236 : * NB: Because of this fudging, if we were ever to use differently-sized blocks
237 : * for small allocations, these size classes would need to be reworked to be
238 : * optimal for the new size.
239 : *
240 : * NB: The optimal spacing for size classes, as well as the size of the blocks
241 : * out of which small objects are allocated, is not a question that has one
242 : * right answer. Some allocators (such as tcmalloc) use more closely-spaced
243 : * size classes than we do here, while others (like aset.c) use more
244 : * widely-spaced classes. Spacing the classes more closely avoids wasting
245 : * memory within individual chunks, but also means a larger number of
246 : * potentially-unfilled blocks.
247 : */
248 : static const uint16 dsa_size_classes[] = {
249 : sizeof(dsa_area_span), 0, /* special size classes */
250 : 8, 16, 24, 32, 40, 48, 56, 64, /* 8 classes separated by 8 bytes */
251 : 80, 96, 112, 128, /* 4 classes separated by 16 bytes */
252 : 160, 192, 224, 256, /* 4 classes separated by 32 bytes */
253 : 320, 384, 448, 512, /* 4 classes separated by 64 bytes */
254 : 640, 768, 896, 1024, /* 4 classes separated by 128 bytes */
255 : 1280, 1560, 1816, 2048, /* 4 classes separated by ~256 bytes */
256 : 2616, 3120, 3640, 4096, /* 4 classes separated by ~512 bytes */
257 : 5456, 6552, 7280, 8192 /* 4 classes separated by ~1024 bytes */
258 : };
259 : #define DSA_NUM_SIZE_CLASSES lengthof(dsa_size_classes)
260 :
261 : /* Special size classes. */
262 : #define DSA_SCLASS_BLOCK_OF_SPANS 0
263 : #define DSA_SCLASS_SPAN_LARGE 1
264 :
265 : /*
266 : * The following lookup table is used to map the size of small objects
267 : * (less than 1kB) onto the corresponding size class. To use this table,
268 : * round the size of the object up to the next multiple of 8 bytes, and then
269 : * index into this array.
270 : */
271 : static const uint8 dsa_size_class_map[] = {
272 : 2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 11, 11, 12, 12, 13, 13,
273 : 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 17, 17, 17, 17,
274 : 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19,
275 : 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21,
276 : 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
277 : 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
278 : 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
279 : 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25
280 : };
281 : #define DSA_SIZE_CLASS_MAP_QUANTUM 8
282 :
283 : /*
284 : * Superblocks are binned by how full they are. Generally, each fullness
285 : * class corresponds to one quartile, but the block being used for
286 : * allocations is always at the head of the list for fullness class 1,
287 : * regardless of how full it really is.
288 : */
289 : #define DSA_FULLNESS_CLASSES 4
290 :
291 : /*
292 : * A dsa_area_pool represents a set of objects of a given size class.
293 : *
294 : * Perhaps there should be multiple pools for the same size class for
295 : * contention avoidance, but for now there is just one!
296 : */
297 : typedef struct
298 : {
299 : /* A lock protecting access to this pool. */
300 : LWLock lock;
301 : /* A set of linked lists of spans, arranged by fullness. */
302 : dsa_pointer spans[DSA_FULLNESS_CLASSES];
303 : /* Should we pad this out to a cacheline boundary? */
304 : } dsa_area_pool;
305 :
306 : /*
307 : * The control block for an area. This lives in shared memory, at the start of
308 : * the first DSM segment controlled by this area.
309 : */
310 : typedef struct
311 : {
312 : /* The segment header for the first segment. */
313 : dsa_segment_header segment_header;
314 : /* The handle for this area. */
315 : dsa_handle handle;
316 : /* The handles of the segments owned by this area. */
317 : dsm_handle segment_handles[DSA_MAX_SEGMENTS];
318 : /* Lists of segments, binned by maximum contiguous run of free pages. */
319 : dsa_segment_index segment_bins[DSA_NUM_SEGMENT_BINS];
320 : /* The object pools for each size class. */
321 : dsa_area_pool pools[DSA_NUM_SIZE_CLASSES];
322 : /* The total size of all active segments. */
323 : size_t total_segment_size;
324 : /* The maximum total size of backing storage we are allowed. */
325 : size_t max_total_segment_size;
326 : /* Highest used segment index in the history of this area. */
327 : dsa_segment_index high_segment_index;
328 : /* The reference count for this area. */
329 : int refcnt;
330 : /* A flag indicating that this area has been pinned. */
331 : bool pinned;
332 : /* The number of times that segments have been freed. */
333 : size_t freed_segment_counter;
334 : /* The LWLock tranche ID. */
335 : int lwlock_tranche_id;
336 : /* The general lock (protects everything except object pools). */
337 : LWLock lock;
338 : } dsa_area_control;
339 :
340 : /* Given a pointer to a pool, find a dsa_pointer. */
341 : #define DsaAreaPoolToDsaPointer(area, p) \
342 : DSA_MAKE_POINTER(0, (char *) p - (char *) area->control)
343 :
344 : /*
345 : * A dsa_segment_map is stored within the backend-private memory of each
346 : * individual backend. It holds the base address of the segment within that
347 : * backend, plus the addresses of key objects within the segment. Those
348 : * could instead be derived from the base address but it's handy to have them
349 : * around.
350 : */
351 : typedef struct
352 : {
353 : dsm_segment *segment; /* DSM segment */
354 : char *mapped_address; /* Address at which segment is mapped */
355 : dsa_segment_header *header; /* Header (same as mapped_address) */
356 : FreePageManager *fpm; /* Free page manager within segment. */
357 : dsa_pointer *pagemap; /* Page map within segment. */
358 : } dsa_segment_map;
359 :
360 : /*
361 : * Per-backend state for a storage area. Backends obtain one of these by
362 : * creating an area or attaching to an existing one using a handle. Each
363 : * process that needs to use an area uses its own object to track where the
364 : * segments are mapped.
365 : */
366 : struct dsa_area
367 : {
368 : /* Pointer to the control object in shared memory. */
369 : dsa_area_control *control;
370 :
371 : /* Has the mapping been pinned? */
372 : bool mapping_pinned;
373 :
374 : /*
375 : * This backend's array of segment maps, ordered by segment index
376 : * corresponding to control->segment_handles. Some of the area's segments
377 : * may not be mapped in this backend yet, and some slots may have been
378 : * freed and need to be detached; these operations happen on demand.
379 : */
380 : dsa_segment_map segment_maps[DSA_MAX_SEGMENTS];
381 :
382 : /* The highest segment index this backend has ever mapped. */
383 : dsa_segment_index high_segment_index;
384 :
385 : /* The last observed freed_segment_counter. */
386 : size_t freed_segment_counter;
387 : };
388 :
389 : #define DSA_SPAN_NOTHING_FREE ((uint16) -1)
390 : #define DSA_SUPERBLOCK_SIZE (DSA_PAGES_PER_SUPERBLOCK * FPM_PAGE_SIZE)
391 :
392 : /* Given a pointer to a segment_map, obtain a segment index number. */
393 : #define get_segment_index(area, segment_map_ptr) \
394 : (segment_map_ptr - &area->segment_maps[0])
395 :
396 : static void init_span(dsa_area *area, dsa_pointer span_pointer,
397 : dsa_area_pool *pool, dsa_pointer start, size_t npages,
398 : uint16 size_class);
399 : static bool transfer_first_span(dsa_area *area, dsa_area_pool *pool,
400 : int fromclass, int toclass);
401 : static inline dsa_pointer alloc_object(dsa_area *area, int size_class);
402 : static bool ensure_active_superblock(dsa_area *area, dsa_area_pool *pool,
403 : int size_class);
404 : static dsa_segment_map *get_segment_by_index(dsa_area *area,
405 : dsa_segment_index index);
406 : static void destroy_superblock(dsa_area *area, dsa_pointer span_pointer);
407 : static void unlink_span(dsa_area *area, dsa_area_span *span);
408 : static void add_span_to_fullness_class(dsa_area *area, dsa_area_span *span,
409 : dsa_pointer span_pointer, int fclass);
410 : static void unlink_segment(dsa_area *area, dsa_segment_map *segment_map);
411 : static dsa_segment_map *get_best_segment(dsa_area *area, size_t npages);
412 : static dsa_segment_map *make_new_segment(dsa_area *area, size_t requested_pages);
413 : static dsa_area *create_internal(void *place, size_t size,
414 : int tranche_id,
415 : dsm_handle control_handle,
416 : dsm_segment *control_segment);
417 : static dsa_area *attach_internal(void *place, dsm_segment *segment,
418 : dsa_handle handle);
419 : static void check_for_freed_segments(dsa_area *area);
420 : static void check_for_freed_segments_locked(dsa_area *area);
421 :
422 : /*
423 : * Create a new shared area in a new DSM segment. Further DSM segments will
424 : * be allocated as required to extend the available space.
425 : *
426 : * We can't allocate a LWLock tranche_id within this function, because tranche
427 : * IDs are a scarce resource; there are only 64k available, using low numbers
428 : * when possible matters, and we have no provision for recycling them. So,
429 : * we require the caller to provide one.
430 : */
431 : dsa_area *
2285 rhaas 432 GIC 38 : dsa_create(int tranche_id)
433 : {
434 : dsm_segment *segment;
435 : dsa_area *area;
436 :
437 : /*
438 : * Create the DSM segment that will hold the shared control object and the
439 : * first segment of usable space.
440 : */
2319 441 38 : segment = dsm_create(DSA_INITIAL_SEGMENT_SIZE, 0);
442 :
443 : /*
2319 rhaas 444 ECB : * All segments backing this area are pinned, so that DSA can explicitly
445 : * control their lifetime (otherwise a newly created segment belonging to
446 : * this area might be freed when the only backend that happens to have it
447 : * mapped in ends, corrupting the area).
448 : */
2319 rhaas 449 GIC 38 : dsm_pin_segment(segment);
450 :
451 : /* Create a new DSA area with the control object in this segment. */
452 38 : area = create_internal(dsm_segment_address(segment),
2319 rhaas 453 ECB : DSA_INITIAL_SEGMENT_SIZE,
454 : tranche_id,
455 : dsm_segment_handle(segment), segment);
456 :
457 : /* Clean up when the control segment detaches. */
2319 rhaas 458 GIC 38 : on_dsm_detach(segment, &dsa_on_dsm_detach_release_in_place,
459 38 : PointerGetDatum(dsm_segment_address(segment)));
460 :
2319 rhaas 461 CBC 38 : return area;
462 : }
463 :
2319 rhaas 464 ECB : /*
465 : * Create a new shared area in an existing shared memory space, which may be
466 : * either DSM or Postmaster-initialized memory. DSM segments will be
467 : * allocated as required to extend the available space, though that can be
468 : * prevented with dsa_set_size_limit(area, size) using the same size provided
469 : * to dsa_create_in_place.
470 : *
471 : * Areas created in-place must eventually be released by the backend that
472 : * created them and all backends that attach to them. This can be done
473 : * explicitly with dsa_release_in_place, or, in the special case that 'place'
474 : * happens to be in a pre-existing DSM segment, by passing in a pointer to the
475 : * segment so that a detach hook can be registered with the containing DSM
476 : * segment.
477 : *
478 : * See dsa_create() for a note about the tranche arguments.
479 : */
480 : dsa_area *
2319 rhaas 481 GIC 2204 : dsa_create_in_place(void *place, size_t size,
482 : int tranche_id, dsm_segment *segment)
483 : {
484 : dsa_area *area;
485 :
2285 486 2204 : area = create_internal(place, size, tranche_id,
487 : DSM_HANDLE_INVALID, NULL);
488 :
489 : /*
490 : * Clean up when the control segment detaches, if a containing DSM segment
491 : * was provided.
492 : */
2319 rhaas 493 CBC 2204 : if (segment != NULL)
2319 rhaas 494 GIC 378 : on_dsm_detach(segment, &dsa_on_dsm_detach_release_in_place,
495 : PointerGetDatum(place));
496 :
497 2204 : return area;
2319 rhaas 498 ECB : }
499 :
500 : /*
501 : * Obtain a handle that can be passed to other processes so that they can
502 : * attach to the given area. Cannot be called for areas created with
503 : * dsa_create_in_place.
504 : */
505 : dsa_handle
2319 rhaas 506 CBC 38 : dsa_get_handle(dsa_area *area)
507 : {
74 tgl 508 GNC 38 : Assert(area->control->handle != DSA_HANDLE_INVALID);
2319 rhaas 509 CBC 38 : return area->control->handle;
510 : }
511 :
512 : /*
513 : * Attach to an area given a handle generated (possibly in another process) by
514 : * dsa_get_handle. The area must have been created with dsa_create (not
515 : * dsa_create_in_place).
516 : */
517 : dsa_area *
518 79 : dsa_attach(dsa_handle handle)
519 : {
2319 rhaas 520 ECB : dsm_segment *segment;
521 : dsa_area *area;
522 :
523 : /*
524 : * An area handle is really a DSM segment handle for the first segment, so
525 : * we go ahead and attach to that.
526 : */
2319 rhaas 527 GIC 79 : segment = dsm_attach(handle);
528 79 : if (segment == NULL)
2319 rhaas 529 UIC 0 : ereport(ERROR,
2319 rhaas 530 ECB : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
531 : errmsg("could not attach to dynamic shared area")));
532 :
2319 rhaas 533 GIC 79 : area = attach_internal(dsm_segment_address(segment), segment, handle);
534 :
535 : /* Clean up when the control segment detaches. */
536 79 : on_dsm_detach(segment, &dsa_on_dsm_detach_release_in_place,
537 79 : PointerGetDatum(dsm_segment_address(segment)));
538 :
2319 rhaas 539 CBC 79 : return area;
2319 rhaas 540 ECB : }
2319 rhaas 541 EUB :
542 : /*
543 : * Attach to an area that was created with dsa_create_in_place. The caller
544 : * must somehow know the location in memory that was used when the area was
2319 rhaas 545 ECB : * created, though it may be mapped at a different virtual address in this
546 : * process.
547 : *
548 : * See dsa_create_in_place for note about releasing in-place areas, and the
549 : * optional 'segment' argument which can be provided to allow automatic
550 : * release if the containing memory happens to be a DSM segment.
551 : */
552 : dsa_area *
2319 rhaas 553 GIC 15889 : dsa_attach_in_place(void *place, dsm_segment *segment)
554 : {
555 : dsa_area *area;
556 :
74 tgl 557 GNC 15889 : area = attach_internal(place, NULL, DSA_HANDLE_INVALID);
558 :
559 : /*
560 : * Clean up when the control segment detaches, if a containing DSM segment
561 : * was provided.
562 : */
2319 rhaas 563 GIC 15889 : if (segment != NULL)
564 2508 : on_dsm_detach(segment, &dsa_on_dsm_detach_release_in_place,
2319 rhaas 565 ECB : PointerGetDatum(place));
566 :
2319 rhaas 567 GIC 15889 : return area;
568 : }
2319 rhaas 569 ECB :
570 : /*
571 : * Release a DSA area that was produced by dsa_create_in_place or
572 : * dsa_attach_in_place. The 'segment' argument is ignored but provides an
573 : * interface suitable for on_dsm_detach, for the convenience of users who want
574 : * to create a DSA segment inside an existing DSM segment and have it
575 : * automatically released when the containing DSM segment is detached.
576 : * 'place' should be the address of the place where the area was created.
577 : *
578 : * This callback is automatically registered for the DSM segment containing
579 : * the control object of in-place areas when a segment is provided to
580 : * dsa_create_in_place or dsa_attach_in_place, and also for all areas created
581 : * with dsa_create.
582 : */
583 : void
2319 rhaas 584 GIC 3003 : dsa_on_dsm_detach_release_in_place(dsm_segment *segment, Datum place)
585 : {
586 3003 : dsa_release_in_place(DatumGetPointer(place));
587 3003 : }
588 :
589 : /*
590 : * Release a DSA area that was produced by dsa_create_in_place or
591 : * dsa_attach_in_place. The 'code' argument is ignored but provides an
592 : * interface suitable for on_shmem_exit or before_shmem_exit, for the
593 : * convenience of users who want to create a DSA segment inside shared memory
594 : * other than a DSM segment and have it automatically release at backend exit.
595 : * 'place' should be the address of the place where the area was created.
2319 rhaas 596 ECB : */
597 : void
2319 rhaas 598 LBC 0 : dsa_on_shmem_exit_release_in_place(int code, Datum place)
2319 rhaas 599 ECB : {
2319 rhaas 600 UIC 0 : dsa_release_in_place(DatumGetPointer(place));
601 0 : }
602 :
603 : /*
604 : * Release a DSA area that was produced by dsa_create_in_place or
605 : * dsa_attach_in_place. It is preferable to use one of the 'dsa_on_XXX'
606 : * callbacks so that this is managed automatically, because failure to release
607 : * an area created in-place leaks its segments permanently.
608 : *
609 : * This is also called automatically for areas produced by dsa_create or
2319 rhaas 610 EUB : * dsa_attach as an implementation detail.
611 : */
612 : void
2319 rhaas 613 GBC 3003 : dsa_release_in_place(void *place)
614 : {
2319 rhaas 615 GIC 3003 : dsa_area_control *control = (dsa_area_control *) place;
616 : int i;
617 :
618 3003 : LWLockAcquire(&control->lock, LW_EXCLUSIVE);
619 3003 : Assert(control->segment_header.magic ==
620 : (DSA_SEGMENT_HEADER_MAGIC ^ control->handle ^ 0));
621 3003 : Assert(control->refcnt > 0);
622 3003 : if (--control->refcnt == 0)
623 : {
624 870 : for (i = 0; i <= control->high_segment_index; ++i)
2319 rhaas 625 ECB : {
626 : dsm_handle handle;
627 :
2319 rhaas 628 GIC 492 : handle = control->segment_handles[i];
629 492 : if (handle != DSM_HANDLE_INVALID)
2319 rhaas 630 CBC 114 : dsm_unpin_segment(handle);
2319 rhaas 631 ECB : }
632 : }
2319 rhaas 633 CBC 3003 : LWLockRelease(&control->lock);
634 3003 : }
635 :
2319 rhaas 636 ECB : /*
637 : * Keep a DSA area attached until end of session or explicit detach.
638 : *
639 : * By default, areas are owned by the current resource owner, which means they
640 : * are detached automatically when that scope ends.
641 : */
642 : void
2319 rhaas 643 GIC 14851 : dsa_pin_mapping(dsa_area *area)
644 : {
2319 rhaas 645 ECB : int i;
646 :
2319 rhaas 647 GIC 14851 : Assert(!area->mapping_pinned);
648 14851 : area->mapping_pinned = true;
649 :
650 29708 : for (i = 0; i <= area->high_segment_index; ++i)
651 14857 : if (area->segment_maps[i].segment != NULL)
652 123 : dsm_pin_mapping(area->segment_maps[i].segment);
653 14851 : }
654 :
2319 rhaas 655 ECB : /*
656 : * Allocate memory in this storage area. The return value is a dsa_pointer
657 : * that can be passed to other processes, and converted to a local pointer
658 : * with dsa_get_address. 'flags' is a bitmap which should be constructed
2240 659 : * from the following values:
660 : *
661 : * DSA_ALLOC_HUGE allows allocations >= 1GB. Otherwise, such allocations
662 : * will result in an ERROR.
663 : *
664 : * DSA_ALLOC_NO_OOM causes this function to return InvalidDsaPointer when
1592 tmunro 665 : * no memory is available or a size limit established by dsa_set_size_limit
666 : * would be exceeded. Otherwise, such allocations will result in an ERROR.
667 : *
668 : * DSA_ALLOC_ZERO causes the allocated memory to be zeroed. Otherwise, the
669 : * contents of newly-allocated memory are indeterminate.
670 : *
671 : * These flags correspond to similarly named flags used by
672 : * MemoryContextAllocExtended(). See also the macros dsa_allocate and
673 : * dsa_allocate0 which expand to a call to this function with commonly used
674 : * flags.
675 : */
676 : dsa_pointer
1660 tmunro 677 GIC 614531 : dsa_allocate_extended(dsa_area *area, size_t size, int flags)
678 : {
679 : uint16 size_class;
680 : dsa_pointer start_pointer;
681 : dsa_segment_map *segment_map;
682 : dsa_pointer result;
683 :
2319 rhaas 684 614531 : Assert(size > 0);
685 :
686 : /* Sanity check on huge individual allocation size. */
2240 687 614531 : if (((flags & DSA_ALLOC_HUGE) != 0 && !AllocHugeSizeIsValid(size)) ||
688 614531 : ((flags & DSA_ALLOC_HUGE) == 0 && !AllocSizeIsValid(size)))
2240 rhaas 689 LBC 0 : elog(ERROR, "invalid DSA memory alloc request size %zu", size);
690 :
691 : /*
692 : * If bigger than the largest size class, just grab a run of pages from
693 : * the free page manager, instead of allocating an object from a pool.
694 : * There will still be a span, but it's a special class of span that
695 : * manages this whole allocation and simply gives all pages back to the
2319 rhaas 696 ECB : * free page manager when dsa_free is called.
697 : */
2319 rhaas 698 GIC 614531 : if (size > dsa_size_classes[lengthof(dsa_size_classes) - 1])
2319 rhaas 699 ECB : {
1660 tmunro 700 CBC 2950 : size_t npages = fpm_size_to_pages(size);
1660 tmunro 701 EUB : size_t first_page;
702 : dsa_pointer span_pointer;
2319 rhaas 703 GIC 2950 : dsa_area_pool *pool = &area->control->pools[DSA_SCLASS_SPAN_LARGE];
704 :
705 : /* Obtain a span object. */
706 2950 : span_pointer = alloc_object(area, DSA_SCLASS_BLOCK_OF_SPANS);
707 2950 : if (!DsaPointerIsValid(span_pointer))
708 : {
709 : /* Raise error unless asked not to. */
1504 tmunro 710 LBC 0 : if ((flags & DSA_ALLOC_NO_OOM) == 0)
1504 tmunro 711 UIC 0 : ereport(ERROR,
1504 tmunro 712 ECB : (errcode(ERRCODE_OUT_OF_MEMORY),
713 : errmsg("out of memory"),
714 : errdetail("Failed on DSA request of size %zu.",
715 : size)));
2319 rhaas 716 UIC 0 : return InvalidDsaPointer;
717 : }
2319 rhaas 718 ECB :
2319 rhaas 719 CBC 2950 : LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
720 :
721 : /* Find a segment from which to allocate. */
2319 rhaas 722 GBC 2950 : segment_map = get_best_segment(area, npages);
723 2950 : if (segment_map == NULL)
2319 rhaas 724 GIC 21 : segment_map = make_new_segment(area, npages);
725 2950 : if (segment_map == NULL)
726 : {
727 : /* Can't make any more segments: game over. */
2319 rhaas 728 UBC 0 : LWLockRelease(DSA_AREA_LOCK(area));
2319 rhaas 729 UIC 0 : dsa_free(area, span_pointer);
730 :
2240 rhaas 731 ECB : /* Raise error unless asked not to. */
2054 andres 732 UIC 0 : if ((flags & DSA_ALLOC_NO_OOM) == 0)
2240 rhaas 733 0 : ereport(ERROR,
2240 rhaas 734 ECB : (errcode(ERRCODE_OUT_OF_MEMORY),
735 : errmsg("out of memory"),
736 : errdetail("Failed on DSA request of size %zu.",
737 : size)));
2319 rhaas 738 UIC 0 : return InvalidDsaPointer;
739 : }
2319 rhaas 740 EUB :
741 : /*
742 : * Ask the free page manager for a run of pages. This should always
743 : * succeed, since both get_best_segment and make_new_segment should
744 : * only return a non-NULL pointer if it actually contains enough
745 : * contiguous freespace. If it does fail, something in our backend
746 : * private state is out of whack, so use FATAL to kill the process.
747 : */
2319 rhaas 748 GIC 2950 : if (!FreePageManagerGet(segment_map->fpm, npages, &first_page))
2319 rhaas 749 UIC 0 : elog(FATAL,
2319 rhaas 750 EUB : "dsa_allocate could not find %zu free pages", npages);
2319 rhaas 751 GIC 2950 : LWLockRelease(DSA_AREA_LOCK(area));
752 :
753 2950 : start_pointer = DSA_MAKE_POINTER(get_segment_index(area, segment_map),
754 : first_page * FPM_PAGE_SIZE);
755 :
756 : /* Initialize span and pagemap. */
757 2950 : LWLockAcquire(DSA_SCLASS_LOCK(area, DSA_SCLASS_SPAN_LARGE),
758 : LW_EXCLUSIVE);
759 2950 : init_span(area, span_pointer, pool, start_pointer, npages,
2319 rhaas 760 ECB : DSA_SCLASS_SPAN_LARGE);
2319 rhaas 761 GBC 2950 : segment_map->pagemap[first_page] = span_pointer;
2319 rhaas 762 GIC 2950 : LWLockRelease(DSA_SCLASS_LOCK(area, DSA_SCLASS_SPAN_LARGE));
2319 rhaas 763 ECB :
764 : /* Zero-initialize the memory if requested. */
2240 rhaas 765 CBC 2950 : if ((flags & DSA_ALLOC_ZERO) != 0)
2240 rhaas 766 GIC 808 : memset(dsa_get_address(area, start_pointer), 0, size);
767 :
2319 768 2950 : return start_pointer;
2319 rhaas 769 ECB : }
770 :
771 : /* Map allocation to a size class. */
2319 rhaas 772 GIC 611581 : if (size < lengthof(dsa_size_class_map) * DSA_SIZE_CLASS_MAP_QUANTUM)
2319 rhaas 773 ECB : {
774 : int mapidx;
775 :
776 : /* For smaller sizes we have a lookup table... */
2319 rhaas 777 CBC 604529 : mapidx = ((size + DSA_SIZE_CLASS_MAP_QUANTUM - 1) /
778 604529 : DSA_SIZE_CLASS_MAP_QUANTUM) - 1;
2319 rhaas 779 GIC 604529 : size_class = dsa_size_class_map[mapidx];
2319 rhaas 780 ECB : }
781 : else
782 : {
783 : uint16 min;
784 : uint16 max;
785 :
786 : /* ... and for the rest we search by binary chop. */
2319 rhaas 787 GIC 7052 : min = dsa_size_class_map[lengthof(dsa_size_class_map) - 1];
788 7052 : max = lengthof(dsa_size_classes) - 1;
2319 rhaas 789 ECB :
2319 rhaas 790 CBC 32504 : while (min < max)
2319 rhaas 791 ECB : {
2319 rhaas 792 GIC 25452 : uint16 mid = (min + max) / 2;
793 25452 : uint16 class_size = dsa_size_classes[mid];
794 :
795 25452 : if (class_size < size)
796 9686 : min = mid + 1;
797 : else
798 15766 : max = mid;
2319 rhaas 799 ECB : }
800 :
2319 rhaas 801 GIC 7052 : size_class = min;
2319 rhaas 802 ECB : }
2319 rhaas 803 GIC 611581 : Assert(size <= dsa_size_classes[size_class]);
2319 rhaas 804 CBC 611581 : Assert(size_class == 0 || size > dsa_size_classes[size_class - 1]);
2319 rhaas 805 ECB :
806 : /* Attempt to allocate an object from the appropriate pool. */
2240 rhaas 807 CBC 611581 : result = alloc_object(area, size_class);
2319 rhaas 808 ECB :
809 : /* Check for failure to allocate. */
2240 rhaas 810 CBC 611581 : if (!DsaPointerIsValid(result))
811 : {
812 : /* Raise error unless asked not to. */
2240 rhaas 813 LBC 0 : if ((flags & DSA_ALLOC_NO_OOM) == 0)
2240 rhaas 814 UIC 0 : ereport(ERROR,
2240 rhaas 815 ECB : (errcode(ERRCODE_OUT_OF_MEMORY),
816 : errmsg("out of memory"),
817 : errdetail("Failed on DSA request of size %zu.", size)));
2240 rhaas 818 UIC 0 : return InvalidDsaPointer;
2240 rhaas 819 ECB : }
820 :
821 : /* Zero-initialize the memory if requested. */
2240 rhaas 822 CBC 611581 : if ((flags & DSA_ALLOC_ZERO) != 0)
2240 rhaas 823 GIC 307206 : memset(dsa_get_address(area, result), 0, size);
824 :
2240 rhaas 825 GBC 611581 : return result;
2243 rhaas 826 EUB : }
827 :
828 : /*
829 : * Free memory obtained with dsa_allocate.
2319 830 : */
831 : void
2319 rhaas 832 GIC 66782 : dsa_free(dsa_area *area, dsa_pointer dp)
833 : {
2319 rhaas 834 ECB : dsa_segment_map *segment_map;
835 : int pageno;
836 : dsa_pointer span_pointer;
837 : dsa_area_span *span;
838 : char *superblock;
839 : char *object;
840 : size_t size;
841 : int size_class;
842 :
843 : /* Make sure we don't have a stale segment in the slot 'dp' refers to. */
2319 rhaas 844 CBC 66782 : check_for_freed_segments(area);
845 :
846 : /* Locate the object, span and pool. */
2319 rhaas 847 GIC 66782 : segment_map = get_segment_by_index(area, DSA_EXTRACT_SEGMENT_NUMBER(dp));
848 66782 : pageno = DSA_EXTRACT_OFFSET(dp) / FPM_PAGE_SIZE;
849 66782 : span_pointer = segment_map->pagemap[pageno];
850 66782 : span = dsa_get_address(area, span_pointer);
851 66782 : superblock = dsa_get_address(area, span->start);
852 66782 : object = dsa_get_address(area, dp);
853 66782 : size_class = span->size_class;
854 66782 : size = dsa_size_classes[size_class];
855 :
2319 rhaas 856 ECB : /*
857 : * Special case for large objects that live in a special span: we return
858 : * those pages directly to the free page manager and free the span.
859 : */
2319 rhaas 860 CBC 66782 : if (span->size_class == DSA_SCLASS_SPAN_LARGE)
2319 rhaas 861 ECB : {
862 :
863 : #ifdef CLOBBER_FREED_MEMORY
2319 rhaas 864 CBC 2231 : memset(object, 0x7f, span->npages * FPM_PAGE_SIZE);
2319 rhaas 865 ECB : #endif
866 :
867 : /* Give pages back to free page manager. */
2319 rhaas 868 GIC 2231 : LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
869 2231 : FreePageManagerPut(segment_map->fpm,
870 2231 : DSA_EXTRACT_OFFSET(span->start) / FPM_PAGE_SIZE,
871 : span->npages);
2319 rhaas 872 CBC 2231 : LWLockRelease(DSA_AREA_LOCK(area));
873 : /* Unlink span. */
2319 rhaas 874 GIC 2231 : LWLockAcquire(DSA_SCLASS_LOCK(area, DSA_SCLASS_SPAN_LARGE),
875 : LW_EXCLUSIVE);
2319 rhaas 876 CBC 2231 : unlink_span(area, span);
2319 rhaas 877 GIC 2231 : LWLockRelease(DSA_SCLASS_LOCK(area, DSA_SCLASS_SPAN_LARGE));
878 : /* Free the span object so it can be reused. */
879 2231 : dsa_free(area, span_pointer);
2319 rhaas 880 CBC 2231 : return;
2319 rhaas 881 ECB : }
882 :
883 : #ifdef CLOBBER_FREED_MEMORY
2319 rhaas 884 CBC 64551 : memset(object, 0x7f, size);
885 : #endif
2319 rhaas 886 ECB :
2319 rhaas 887 GIC 64551 : LWLockAcquire(DSA_SCLASS_LOCK(area, size_class), LW_EXCLUSIVE);
2319 rhaas 888 ECB :
889 : /* Put the object on the span's freelist. */
2319 rhaas 890 GIC 64551 : Assert(object >= superblock);
2319 rhaas 891 CBC 64551 : Assert(object < superblock + DSA_SUPERBLOCK_SIZE);
892 64551 : Assert((object - superblock) % size == 0);
2319 rhaas 893 GIC 64551 : NextFreeObjectIndex(object) = span->firstfree;
894 64551 : span->firstfree = (object - superblock) / size;
895 64551 : ++span->nallocatable;
2319 rhaas 896 ECB :
897 : /*
898 : * See if the span needs to moved to a different fullness class, or be
899 : * freed so its pages can be given back to the segment.
900 : */
2319 rhaas 901 GIC 64551 : if (span->nallocatable == 1 && span->fclass == DSA_FULLNESS_CLASSES - 1)
2319 rhaas 902 ECB : {
903 : /*
904 : * The block was completely full and is located in the
905 : * highest-numbered fullness class, which is never scanned for free
906 : * chunks. We must move it to the next-lower fullness class.
907 : */
2319 rhaas 908 GIC 75 : unlink_span(area, span);
909 75 : add_span_to_fullness_class(area, span, span_pointer,
910 : DSA_FULLNESS_CLASSES - 2);
911 :
912 : /*
2319 rhaas 913 ECB : * If this is the only span, and there is no active span, then we
914 : * should probably move this span to fullness class 1. (Otherwise if
915 : * you allocate exactly all the objects in the only span, it moves to
916 : * class 3, then you free them all, it moves to 2, and then is given
917 : * back, leaving no active span).
918 : */
919 : }
2319 rhaas 920 CBC 64476 : else if (span->nallocatable == span->nmax &&
921 4072 : (span->fclass != 1 || span->prevspan != InvalidDsaPointer))
922 : {
923 : /*
924 : * This entire block is free, and it's not the active block for this
925 : * size class. Return the memory to the free page manager. We don't
926 : * do this for the active block to prevent hysteresis: if we
927 : * repeatedly allocate and free the only chunk in the active block, it
928 : * will be very inefficient if we deallocate and reallocate the block
929 : * every time.
930 : */
2319 rhaas 931 UIC 0 : destroy_superblock(area, span_pointer);
2319 rhaas 932 ECB : }
933 :
2319 rhaas 934 GIC 64551 : LWLockRelease(DSA_SCLASS_LOCK(area, size_class));
935 : }
936 :
937 : /*
938 : * Obtain a backend-local address for a dsa_pointer. 'dp' must point to
939 : * memory allocated by the given area (possibly in another process) that
940 : * hasn't yet been freed. This may cause a segment to be mapped into the
941 : * current process if required, and may cause freed segments to be unmapped.
942 : */
2319 rhaas 943 EUB : void *
2319 rhaas 944 GIC 9755899 : dsa_get_address(dsa_area *area, dsa_pointer dp)
945 : {
2319 rhaas 946 ECB : dsa_segment_index index;
947 : size_t offset;
948 :
949 : /* Convert InvalidDsaPointer to NULL. */
2319 rhaas 950 GIC 9755899 : if (!DsaPointerIsValid(dp))
951 1374990 : return NULL;
952 :
953 : /* Process any requests to detach from freed segments. */
954 8380909 : check_for_freed_segments(area);
955 :
2319 rhaas 956 ECB : /* Break the dsa_pointer into its components. */
2319 rhaas 957 GIC 8380909 : index = DSA_EXTRACT_SEGMENT_NUMBER(dp);
958 8380909 : offset = DSA_EXTRACT_OFFSET(dp);
959 8380909 : Assert(index < DSA_MAX_SEGMENTS);
960 :
961 : /* Check if we need to cause this segment to be mapped in. */
2319 rhaas 962 CBC 8380909 : if (unlikely(area->segment_maps[index].mapped_address == NULL))
2319 rhaas 963 ECB : {
964 : /* Call for effect (we don't need the result). */
2319 rhaas 965 GIC 11620 : get_segment_by_index(area, index);
2319 rhaas 966 ECB : }
967 :
2319 rhaas 968 GIC 8380909 : return area->segment_maps[index].mapped_address + offset;
2319 rhaas 969 ECB : }
970 :
971 : /*
972 : * Pin this area, so that it will continue to exist even if all backends
973 : * detach from it. In that case, the area can still be reattached to if a
974 : * handle has been recorded somewhere.
975 : */
976 : void
2319 rhaas 977 CBC 1864 : dsa_pin(dsa_area *area)
978 : {
2319 rhaas 979 GIC 1864 : LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
2319 rhaas 980 CBC 1864 : if (area->control->pinned)
981 : {
2319 rhaas 982 UIC 0 : LWLockRelease(DSA_AREA_LOCK(area));
983 0 : elog(ERROR, "dsa_area already pinned");
984 : }
2319 rhaas 985 GIC 1864 : area->control->pinned = true;
986 1864 : ++area->control->refcnt;
987 1864 : LWLockRelease(DSA_AREA_LOCK(area));
988 1864 : }
2319 rhaas 989 ECB :
990 : /*
991 : * Undo the effects of dsa_pin, so that the given area can be freed when no
992 : * backends are attached to it. May be called only if dsa_pin has been
993 : * called.
2319 rhaas 994 EUB : */
995 : void
2319 rhaas 996 UIC 0 : dsa_unpin(dsa_area *area)
2319 rhaas 997 ECB : {
2319 rhaas 998 LBC 0 : LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
999 0 : Assert(area->control->refcnt > 1);
1000 0 : if (!area->control->pinned)
1001 : {
2319 rhaas 1002 UIC 0 : LWLockRelease(DSA_AREA_LOCK(area));
1003 0 : elog(ERROR, "dsa_area not pinned");
1004 : }
1005 0 : area->control->pinned = false;
1006 0 : --area->control->refcnt;
1007 0 : LWLockRelease(DSA_AREA_LOCK(area));
2319 rhaas 1008 UBC 0 : }
1009 :
2319 rhaas 1010 EUB : /*
1011 : * Set the total size limit for this area. This limit is checked whenever new
1012 : * segments need to be allocated from the operating system. If the new size
1013 : * limit is already exceeded, this has no immediate effect.
1014 : *
1015 : * Note that the total virtual memory usage may be temporarily larger than
1016 : * this limit when segments have been freed, but not yet detached by all
1017 : * backends that have attached to them.
1018 : */
1019 : void
1660 tmunro 1020 GBC 3652 : dsa_set_size_limit(dsa_area *area, size_t limit)
1021 : {
2319 rhaas 1022 GIC 3652 : LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
1023 3652 : area->control->max_total_segment_size = limit;
1024 3652 : LWLockRelease(DSA_AREA_LOCK(area));
1025 3652 : }
1026 :
1027 : /*
1028 : * Aggressively free all spare memory in the hope of returning DSM segments to
1029 : * the operating system.
1030 : */
1031 : void
2319 rhaas 1032 LBC 0 : dsa_trim(dsa_area *area)
1033 : {
2319 rhaas 1034 ECB : int size_class;
1035 :
1036 : /*
1037 : * Trim in reverse pool order so we get to the spans-of-spans last, just
1038 : * in case any become entirely free while processing all the other pools.
1039 : */
2319 rhaas 1040 UIC 0 : for (size_class = DSA_NUM_SIZE_CLASSES - 1; size_class >= 0; --size_class)
1041 : {
1042 0 : dsa_area_pool *pool = &area->control->pools[size_class];
1043 : dsa_pointer span_pointer;
2319 rhaas 1044 EUB :
2319 rhaas 1045 UIC 0 : if (size_class == DSA_SCLASS_SPAN_LARGE)
1046 : {
1047 : /* Large object frees give back segments aggressively already. */
1048 0 : continue;
1049 : }
1050 :
1051 : /*
2319 rhaas 1052 EUB : * Search fullness class 1 only. That is where we expect to find an
1053 : * entirely empty superblock (entirely empty superblocks in other
1054 : * fullness classes are returned to the free page map by dsa_free).
1055 : */
2319 rhaas 1056 UIC 0 : LWLockAcquire(DSA_SCLASS_LOCK(area, size_class), LW_EXCLUSIVE);
2319 rhaas 1057 UBC 0 : span_pointer = pool->spans[1];
2319 rhaas 1058 UIC 0 : while (DsaPointerIsValid(span_pointer))
1059 : {
2319 rhaas 1060 UBC 0 : dsa_area_span *span = dsa_get_address(area, span_pointer);
2319 rhaas 1061 UIC 0 : dsa_pointer next = span->nextspan;
1062 :
1063 0 : if (span->nallocatable == span->nmax)
1064 0 : destroy_superblock(area, span_pointer);
1065 :
1066 0 : span_pointer = next;
1067 : }
2319 rhaas 1068 UBC 0 : LWLockRelease(DSA_SCLASS_LOCK(area, size_class));
2319 rhaas 1069 EUB : }
2319 rhaas 1070 UBC 0 : }
1071 :
2319 rhaas 1072 EUB : /*
1073 : * Print out debugging information about the internal state of the shared
1074 : * memory area.
1075 : */
1076 : void
2319 rhaas 1077 UIC 0 : dsa_dump(dsa_area *area)
2319 rhaas 1078 EUB : {
1079 : size_t i,
1080 : j;
1081 :
1082 : /*
1083 : * Note: This gives an inconsistent snapshot as it acquires and releases
1084 : * individual locks as it goes...
1085 : */
1086 :
2319 rhaas 1087 UIC 0 : LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
1662 tmunro 1088 0 : check_for_freed_segments_locked(area);
2319 rhaas 1089 UBC 0 : fprintf(stderr, "dsa_area handle %x:\n", area->control->handle);
2319 rhaas 1090 UIC 0 : fprintf(stderr, " max_total_segment_size: %zu\n",
1091 0 : area->control->max_total_segment_size);
1092 0 : fprintf(stderr, " total_segment_size: %zu\n",
1093 0 : area->control->total_segment_size);
1094 0 : fprintf(stderr, " refcnt: %d\n", area->control->refcnt);
1095 0 : fprintf(stderr, " pinned: %c\n", area->control->pinned ? 't' : 'f');
1096 0 : fprintf(stderr, " segment bins:\n");
1097 0 : for (i = 0; i < DSA_NUM_SEGMENT_BINS; ++i)
1098 : {
2319 rhaas 1099 UBC 0 : if (area->control->segment_bins[i] != DSA_SEGMENT_INDEX_NONE)
2319 rhaas 1100 EUB : {
1101 : dsa_segment_index segment_index;
1102 :
2319 rhaas 1103 UBC 0 : fprintf(stderr,
2118 tgl 1104 EUB : " segment bin %zu (at least %d contiguous pages free):\n",
2319 rhaas 1105 UBC 0 : i, 1 << (i - 1));
1106 0 : segment_index = area->control->segment_bins[i];
1107 0 : while (segment_index != DSA_SEGMENT_INDEX_NONE)
2319 rhaas 1108 EUB : {
1109 : dsa_segment_map *segment_map;
1110 :
1111 : segment_map =
2319 rhaas 1112 UIC 0 : get_segment_by_index(area, segment_index);
1113 :
1114 0 : fprintf(stderr,
2319 rhaas 1115 EUB : " segment index %zu, usable_pages = %zu, "
1116 : "contiguous_pages = %zu, mapped at %p\n",
1117 : segment_index,
2319 rhaas 1118 UBC 0 : segment_map->header->usable_pages,
1119 0 : fpm_largest(segment_map->fpm),
1120 : segment_map->mapped_address);
2319 rhaas 1121 UIC 0 : segment_index = segment_map->header->next;
1122 : }
1123 : }
2319 rhaas 1124 EUB : }
2319 rhaas 1125 UIC 0 : LWLockRelease(DSA_AREA_LOCK(area));
2319 rhaas 1126 EUB :
2319 rhaas 1127 UIC 0 : fprintf(stderr, " pools:\n");
1128 0 : for (i = 0; i < DSA_NUM_SIZE_CLASSES; ++i)
1129 : {
2319 rhaas 1130 UBC 0 : bool found = false;
2319 rhaas 1131 EUB :
2319 rhaas 1132 UIC 0 : LWLockAcquire(DSA_SCLASS_LOCK(area, i), LW_EXCLUSIVE);
2319 rhaas 1133 UBC 0 : for (j = 0; j < DSA_FULLNESS_CLASSES; ++j)
2319 rhaas 1134 UIC 0 : if (DsaPointerIsValid(area->control->pools[i].spans[j]))
1135 0 : found = true;
1136 0 : if (found)
2319 rhaas 1137 EUB : {
2319 rhaas 1138 UIC 0 : if (i == DSA_SCLASS_BLOCK_OF_SPANS)
2319 rhaas 1139 UBC 0 : fprintf(stderr, " pool for blocks of span objects:\n");
1140 0 : else if (i == DSA_SCLASS_SPAN_LARGE)
2319 rhaas 1141 UIC 0 : fprintf(stderr, " pool for large object spans:\n");
2319 rhaas 1142 EUB : else
2319 rhaas 1143 UIC 0 : fprintf(stderr,
2118 tgl 1144 EUB : " pool for size class %zu (object size %hu bytes):\n",
2319 rhaas 1145 UBC 0 : i, dsa_size_classes[i]);
1146 0 : for (j = 0; j < DSA_FULLNESS_CLASSES; ++j)
2319 rhaas 1147 EUB : {
2319 rhaas 1148 UBC 0 : if (!DsaPointerIsValid(area->control->pools[i].spans[j]))
2319 rhaas 1149 UIC 0 : fprintf(stderr, " fullness class %zu is empty\n", j);
2319 rhaas 1150 EUB : else
1151 : {
2319 rhaas 1152 UBC 0 : dsa_pointer span_pointer = area->control->pools[i].spans[j];
2319 rhaas 1153 EUB :
2319 rhaas 1154 UIC 0 : fprintf(stderr, " fullness class %zu:\n", j);
2319 rhaas 1155 UBC 0 : while (DsaPointerIsValid(span_pointer))
1156 : {
2319 rhaas 1157 EUB : dsa_area_span *span;
1158 :
2319 rhaas 1159 UIC 0 : span = dsa_get_address(area, span_pointer);
2319 rhaas 1160 UBC 0 : fprintf(stderr,
2316 rhaas 1161 EUB : " span descriptor at "
1162 : DSA_POINTER_FORMAT ", superblock at "
1163 : DSA_POINTER_FORMAT
1164 : ", pages = %zu, objects free = %hu/%hu\n",
1165 : span_pointer, span->start, span->npages,
2319 rhaas 1166 UBC 0 : span->nallocatable, span->nmax);
1167 0 : span_pointer = span->nextspan;
1168 : }
1169 : }
1170 : }
2319 rhaas 1171 EUB : }
2319 rhaas 1172 UBC 0 : LWLockRelease(DSA_SCLASS_LOCK(area, i));
1173 : }
2319 rhaas 1174 UIC 0 : }
1175 :
1176 : /*
1177 : * Return the smallest size that you can successfully provide to
2319 rhaas 1178 EUB : * dsa_create_in_place.
1179 : */
1180 : size_t
2319 rhaas 1181 GIC 12607 : dsa_minimum_size(void)
1182 : {
1183 : size_t size;
2319 rhaas 1184 GBC 12607 : int pages = 0;
1185 :
1186 12607 : size = MAXALIGN(sizeof(dsa_area_control)) +
1187 : MAXALIGN(sizeof(FreePageManager));
1188 :
1189 : /* Figure out how many pages we need, including the page map... */
2319 rhaas 1190 GIC 37821 : while (((size + FPM_PAGE_SIZE - 1) / FPM_PAGE_SIZE) > pages)
1191 : {
1192 25214 : ++pages;
2319 rhaas 1193 CBC 25214 : size += sizeof(dsa_pointer);
1194 : }
1195 :
1196 12607 : return pages * FPM_PAGE_SIZE;
1197 : }
2319 rhaas 1198 ECB :
1199 : /*
1200 : * Workhorse function for dsa_create and dsa_create_in_place.
1201 : */
1202 : static dsa_area *
2319 rhaas 1203 GIC 2242 : create_internal(void *place, size_t size,
2285 rhaas 1204 ECB : int tranche_id,
2319 1205 : dsm_handle control_handle,
1206 : dsm_segment *control_segment)
1207 : {
1208 : dsa_area_control *control;
1209 : dsa_area *area;
1210 : dsa_segment_map *segment_map;
1211 : size_t usable_pages;
1212 : size_t total_pages;
1213 : size_t metadata_bytes;
1214 : int i;
1215 :
1216 : /* Sanity check on the space we have to work in. */
2319 rhaas 1217 GIC 2242 : if (size < dsa_minimum_size())
2319 rhaas 1218 UIC 0 : elog(ERROR, "dsa_area space must be at least %zu, but %zu provided",
1219 : dsa_minimum_size(), size);
1220 :
1221 : /* Now figure out how much space is usable */
2319 rhaas 1222 GIC 2242 : total_pages = size / FPM_PAGE_SIZE;
1223 2242 : metadata_bytes =
1224 : MAXALIGN(sizeof(dsa_area_control)) +
1225 2242 : MAXALIGN(sizeof(FreePageManager)) +
1226 : total_pages * sizeof(dsa_pointer);
1227 : /* Add padding up to next page boundary. */
1228 2242 : if (metadata_bytes % FPM_PAGE_SIZE != 0)
2319 rhaas 1229 CBC 2242 : metadata_bytes += FPM_PAGE_SIZE - (metadata_bytes % FPM_PAGE_SIZE);
2319 rhaas 1230 GBC 2242 : Assert(metadata_bytes <= size);
2319 rhaas 1231 GIC 2242 : usable_pages = (size - metadata_bytes) / FPM_PAGE_SIZE;
1232 :
1233 : /*
2319 rhaas 1234 ECB : * Initialize the dsa_area_control object located at the start of the
1235 : * space.
1236 : */
2319 rhaas 1237 CBC 2242 : control = (dsa_area_control *) place;
982 tmunro 1238 GIC 2242 : memset(place, 0, sizeof(*control));
2319 rhaas 1239 2242 : control->segment_header.magic =
2319 rhaas 1240 CBC 2242 : DSA_SEGMENT_HEADER_MAGIC ^ control_handle ^ 0;
1241 2242 : control->segment_header.next = DSA_SEGMENT_INDEX_NONE;
1242 2242 : control->segment_header.prev = DSA_SEGMENT_INDEX_NONE;
1243 2242 : control->segment_header.usable_pages = usable_pages;
2319 rhaas 1244 GIC 2242 : control->segment_header.freed = false;
1245 2242 : control->segment_header.size = DSA_INITIAL_SEGMENT_SIZE;
1246 2242 : control->handle = control_handle;
1660 tmunro 1247 2242 : control->max_total_segment_size = (size_t) -1;
2319 rhaas 1248 2242 : control->total_segment_size = size;
2319 rhaas 1249 CBC 2242 : control->segment_handles[0] = control_handle;
1250 38114 : for (i = 0; i < DSA_NUM_SEGMENT_BINS; ++i)
1251 35872 : control->segment_bins[i] = DSA_SEGMENT_INDEX_NONE;
1252 2242 : control->refcnt = 1;
1253 2242 : control->lwlock_tranche_id = tranche_id;
2319 rhaas 1254 ECB :
1255 : /*
1256 : * Create the dsa_area object that this backend will use to access the
1257 : * area. Other backends will need to obtain their own dsa_area object by
1258 : * attaching.
1259 : */
2319 rhaas 1260 CBC 2242 : area = palloc(sizeof(dsa_area));
1261 2242 : area->control = control;
1262 2242 : area->mapping_pinned = false;
1263 2242 : memset(area->segment_maps, 0, sizeof(dsa_segment_map) * DSA_MAX_SEGMENTS);
1264 2242 : area->high_segment_index = 0;
2190 andres 1265 2242 : area->freed_segment_counter = 0;
2319 rhaas 1266 GIC 2242 : LWLockInitialize(&control->lock, control->lwlock_tranche_id);
1267 87438 : for (i = 0; i < DSA_NUM_SIZE_CLASSES; ++i)
1268 85196 : LWLockInitialize(DSA_SCLASS_LOCK(area, i),
1269 : control->lwlock_tranche_id);
1270 :
1271 : /* Set up the segment map for this process's mapping. */
2319 rhaas 1272 CBC 2242 : segment_map = &area->segment_maps[0];
1273 2242 : segment_map->segment = control_segment;
1274 2242 : segment_map->mapped_address = place;
1275 2242 : segment_map->header = (dsa_segment_header *) place;
1276 2242 : segment_map->fpm = (FreePageManager *)
1277 2242 : (segment_map->mapped_address +
2319 rhaas 1278 ECB : MAXALIGN(sizeof(dsa_area_control)));
2319 rhaas 1279 CBC 2242 : segment_map->pagemap = (dsa_pointer *)
1280 2242 : (segment_map->mapped_address +
2319 rhaas 1281 GIC 2242 : MAXALIGN(sizeof(dsa_area_control)) +
1282 : MAXALIGN(sizeof(FreePageManager)));
1283 :
2319 rhaas 1284 ECB : /* Set up the free page map. */
2319 rhaas 1285 CBC 2242 : FreePageManagerInitialize(segment_map->fpm, segment_map->mapped_address);
2319 rhaas 1286 ECB : /* There can be 0 usable pages if size is dsa_minimum_size(). */
1287 :
2319 rhaas 1288 CBC 2242 : if (usable_pages > 0)
1289 1919 : FreePageManagerPut(segment_map->fpm, metadata_bytes / FPM_PAGE_SIZE,
1290 : usable_pages);
2319 rhaas 1291 ECB :
1292 : /* Put this segment into the appropriate bin. */
2319 rhaas 1293 CBC 2242 : control->segment_bins[contiguous_pages_to_segment_bin(usable_pages)] = 0;
2319 rhaas 1294 GIC 2242 : segment_map->header->bin = contiguous_pages_to_segment_bin(usable_pages);
1295 :
1296 2242 : return area;
2319 rhaas 1297 ECB : }
1298 :
1299 : /*
1300 : * Workhorse function for dsa_attach and dsa_attach_in_place.
1301 : */
1302 : static dsa_area *
2319 rhaas 1303 GIC 15968 : attach_internal(void *place, dsm_segment *segment, dsa_handle handle)
1304 : {
2319 rhaas 1305 ECB : dsa_area_control *control;
1306 : dsa_area *area;
1307 : dsa_segment_map *segment_map;
1308 :
2319 rhaas 1309 GIC 15968 : control = (dsa_area_control *) place;
1310 15968 : Assert(control->handle == handle);
1311 15968 : Assert(control->segment_handles[0] == handle);
1312 15968 : Assert(control->segment_header.magic ==
1313 : (DSA_SEGMENT_HEADER_MAGIC ^ handle ^ 0));
1314 :
2319 rhaas 1315 ECB : /* Build the backend-local area object. */
2319 rhaas 1316 GIC 15968 : area = palloc(sizeof(dsa_area));
1317 15968 : area->control = control;
1318 15968 : area->mapping_pinned = false;
1319 15968 : memset(&area->segment_maps[0], 0,
1320 : sizeof(dsa_segment_map) * DSA_MAX_SEGMENTS);
2319 rhaas 1321 CBC 15968 : area->high_segment_index = 0;
2319 rhaas 1322 ECB :
1323 : /* Set up the segment map for this process's mapping. */
2319 rhaas 1324 CBC 15968 : segment_map = &area->segment_maps[0];
2118 tgl 1325 GIC 15968 : segment_map->segment = segment; /* NULL for in-place */
2319 rhaas 1326 15968 : segment_map->mapped_address = place;
1327 15968 : segment_map->header = (dsa_segment_header *) segment_map->mapped_address;
2319 rhaas 1328 CBC 15968 : segment_map->fpm = (FreePageManager *)
1329 15968 : (segment_map->mapped_address + MAXALIGN(sizeof(dsa_area_control)));
1330 15968 : segment_map->pagemap = (dsa_pointer *)
1331 15968 : (segment_map->mapped_address + MAXALIGN(sizeof(dsa_area_control)) +
1332 : MAXALIGN(sizeof(FreePageManager)));
2319 rhaas 1333 ECB :
1334 : /* Bump the reference count. */
2319 rhaas 1335 GIC 15968 : LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
2202 rhaas 1336 CBC 15968 : if (control->refcnt == 0)
2202 rhaas 1337 ECB : {
1338 : /* We can't attach to a DSA area that has already been destroyed. */
2202 rhaas 1339 LBC 0 : ereport(ERROR,
2202 rhaas 1340 ECB : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2193 1341 : errmsg("could not attach to dynamic shared area")));
2202 1342 : }
2319 rhaas 1343 CBC 15968 : ++control->refcnt;
2190 andres 1344 GIC 15968 : area->freed_segment_counter = area->control->freed_segment_counter;
2319 rhaas 1345 15968 : LWLockRelease(DSA_AREA_LOCK(area));
1346 :
2319 rhaas 1347 CBC 15968 : return area;
2319 rhaas 1348 ECB : }
1349 :
1350 : /*
2319 rhaas 1351 EUB : * Add a new span to fullness class 1 of the indicated pool.
1352 : */
1353 : static void
2319 rhaas 1354 GIC 15938 : init_span(dsa_area *area,
2319 rhaas 1355 ECB : dsa_pointer span_pointer,
1660 tmunro 1356 : dsa_area_pool *pool, dsa_pointer start, size_t npages,
2319 rhaas 1357 : uint16 size_class)
1358 : {
2319 rhaas 1359 CBC 15938 : dsa_area_span *span = dsa_get_address(area, span_pointer);
1660 tmunro 1360 GIC 15938 : size_t obsize = dsa_size_classes[size_class];
1361 :
1362 : /*
1363 : * The per-pool lock must be held because we manipulate the span list for
1364 : * this pool.
1365 : */
2319 rhaas 1366 CBC 15938 : Assert(LWLockHeldByMe(DSA_SCLASS_LOCK(area, size_class)));
1367 :
1368 : /* Push this span onto the front of the span list for fullness class 1. */
2319 rhaas 1369 GIC 15938 : if (DsaPointerIsValid(pool->spans[1]))
1370 : {
2319 rhaas 1371 ECB : dsa_area_span *head = (dsa_area_span *)
2319 rhaas 1372 CBC 2101 : dsa_get_address(area, pool->spans[1]);
1373 :
2319 rhaas 1374 GIC 2101 : head->prevspan = span_pointer;
1375 : }
1376 15938 : span->pool = DsaAreaPoolToDsaPointer(area, pool);
1377 15938 : span->nextspan = pool->spans[1];
2319 rhaas 1378 CBC 15938 : span->prevspan = InvalidDsaPointer;
2319 rhaas 1379 GIC 15938 : pool->spans[1] = span_pointer;
1380 :
2319 rhaas 1381 CBC 15938 : span->start = start;
2319 rhaas 1382 GIC 15938 : span->npages = npages;
1383 15938 : span->size_class = size_class;
2319 rhaas 1384 CBC 15938 : span->ninitialized = 0;
2319 rhaas 1385 GIC 15938 : if (size_class == DSA_SCLASS_BLOCK_OF_SPANS)
2319 rhaas 1386 ECB : {
1387 : /*
1388 : * A block-of-spans contains its own descriptor, so mark one object as
1389 : * initialized and reduce the count of allocatable objects by one.
1390 : * Doing this here has the side effect of also reducing nmax by one,
1391 : * which is important to make sure we free this object at the correct
1392 : * time.
1393 : */
2319 rhaas 1394 CBC 2000 : span->ninitialized = 1;
1395 2000 : span->nallocatable = FPM_PAGE_SIZE / obsize - 1;
2319 rhaas 1396 ECB : }
2319 rhaas 1397 CBC 13938 : else if (size_class != DSA_SCLASS_SPAN_LARGE)
2319 rhaas 1398 GIC 10988 : span->nallocatable = DSA_SUPERBLOCK_SIZE / obsize;
1399 15938 : span->firstfree = DSA_SPAN_NOTHING_FREE;
1400 15938 : span->nmax = span->nallocatable;
1401 15938 : span->fclass = 1;
1402 15938 : }
1403 :
1404 : /*
1405 : * Transfer the first span in one fullness class to the head of another
2319 rhaas 1406 ECB : * fullness class.
1407 : */
1408 : static bool
2319 rhaas 1409 CBC 26489 : transfer_first_span(dsa_area *area,
2319 rhaas 1410 ECB : dsa_area_pool *pool, int fromclass, int toclass)
1411 : {
1412 : dsa_pointer span_pointer;
1413 : dsa_area_span *span;
1414 : dsa_area_span *nextspan;
1415 :
1416 : /* Can't do it if source list is empty. */
2319 rhaas 1417 GIC 26489 : span_pointer = pool->spans[fromclass];
1418 26489 : if (!DsaPointerIsValid(span_pointer))
1419 25977 : return false;
1420 :
2319 rhaas 1421 ECB : /* Remove span from head of source list. */
2319 rhaas 1422 GIC 512 : span = dsa_get_address(area, span_pointer);
1423 512 : pool->spans[fromclass] = span->nextspan;
1424 512 : if (DsaPointerIsValid(span->nextspan))
1425 : {
1426 : nextspan = (dsa_area_span *)
1427 28 : dsa_get_address(area, span->nextspan);
1428 28 : nextspan->prevspan = InvalidDsaPointer;
2319 rhaas 1429 ECB : }
1430 :
1431 : /* Add span to head of target list. */
2319 rhaas 1432 GIC 512 : span->nextspan = pool->spans[toclass];
1433 512 : pool->spans[toclass] = span_pointer;
2319 rhaas 1434 CBC 512 : if (DsaPointerIsValid(span->nextspan))
2319 rhaas 1435 ECB : {
1436 : nextspan = (dsa_area_span *)
2319 rhaas 1437 GIC 106 : dsa_get_address(area, span->nextspan);
1438 106 : nextspan->prevspan = span_pointer;
2319 rhaas 1439 ECB : }
2319 rhaas 1440 CBC 512 : span->fclass = toclass;
1441 :
2319 rhaas 1442 GIC 512 : return true;
1443 : }
2319 rhaas 1444 ECB :
1445 : /*
1446 : * Allocate one object of the requested size class from the given area.
1447 : */
1448 : static inline dsa_pointer
2319 rhaas 1449 CBC 625519 : alloc_object(dsa_area *area, int size_class)
2319 rhaas 1450 ECB : {
2319 rhaas 1451 GIC 625519 : dsa_area_pool *pool = &area->control->pools[size_class];
2319 rhaas 1452 ECB : dsa_area_span *span;
1453 : dsa_pointer block;
1454 : dsa_pointer result;
1455 : char *object;
1456 : size_t size;
1457 :
1458 : /*
1459 : * Even though ensure_active_superblock can in turn call alloc_object if
1460 : * it needs to allocate a new span, that's always from a different pool,
1461 : * and the order of lock acquisition is always the same, so it's OK that
1462 : * we hold this lock for the duration of this function.
1463 : */
2319 rhaas 1464 GIC 625519 : Assert(!LWLockHeldByMe(DSA_SCLASS_LOCK(area, size_class)));
1465 625519 : LWLockAcquire(DSA_SCLASS_LOCK(area, size_class), LW_EXCLUSIVE);
1466 :
1467 : /*
1468 : * If there's no active superblock, we must successfully obtain one or
1469 : * fail the request.
1470 : */
1471 625519 : if (!DsaPointerIsValid(pool->spans[1]) &&
1472 13049 : !ensure_active_superblock(area, pool, size_class))
1473 : {
2319 rhaas 1474 UIC 0 : result = InvalidDsaPointer;
1475 : }
2319 rhaas 1476 ECB : else
1477 : {
1478 : /*
1479 : * There should be a block in fullness class 1 at this point, and it
1480 : * should never be completely full. Thus we can either pop an object
1481 : * from the free list or, failing that, initialize a new object.
1482 : */
2319 rhaas 1483 CBC 625519 : Assert(DsaPointerIsValid(pool->spans[1]));
2319 rhaas 1484 ECB : span = (dsa_area_span *)
2319 rhaas 1485 GIC 625519 : dsa_get_address(area, pool->spans[1]);
2319 rhaas 1486 GBC 625519 : Assert(span->nallocatable > 0);
2319 rhaas 1487 GIC 625519 : block = span->start;
1488 625519 : Assert(size_class < DSA_NUM_SIZE_CLASSES);
1489 625519 : size = dsa_size_classes[size_class];
1490 625519 : if (span->firstfree != DSA_SPAN_NOTHING_FREE)
1491 : {
1492 57765 : result = block + span->firstfree * size;
1493 57765 : object = dsa_get_address(area, result);
1494 57765 : span->firstfree = NextFreeObjectIndex(object);
2319 rhaas 1495 ECB : }
1496 : else
1497 : {
2319 rhaas 1498 CBC 567754 : result = block + span->ninitialized * size;
1499 567754 : ++span->ninitialized;
2319 rhaas 1500 ECB : }
2319 rhaas 1501 CBC 625519 : --span->nallocatable;
2319 rhaas 1502 ECB :
1503 : /* If it's now full, move it to the highest-numbered fullness class. */
2319 rhaas 1504 CBC 625519 : if (span->nallocatable == 0)
1505 451 : transfer_first_span(area, pool, 1, DSA_FULLNESS_CLASSES - 1);
2319 rhaas 1506 ECB : }
1507 :
2319 rhaas 1508 GIC 625519 : Assert(LWLockHeldByMe(DSA_SCLASS_LOCK(area, size_class)));
1509 625519 : LWLockRelease(DSA_SCLASS_LOCK(area, size_class));
2319 rhaas 1510 ECB :
2319 rhaas 1511 CBC 625519 : return result;
1512 : }
2319 rhaas 1513 ECB :
1514 : /*
1515 : * Ensure an active (i.e. fullness class 1) superblock, unless all existing
1516 : * superblocks are completely full and no more can be allocated.
1517 : *
1518 : * Fullness classes K of 0..N are loosely intended to represent blocks whose
1519 : * utilization percentage is at least K/N, but we only enforce this rigorously
1520 : * for the highest-numbered fullness class, which always contains exactly
1521 : * those blocks that are completely full. It's otherwise acceptable for a
1522 : * block to be in a higher-numbered fullness class than the one to which it
1523 : * logically belongs. In addition, the active block, which is always the
1524 : * first block in fullness class 1, is permitted to have a higher allocation
1525 : * percentage than would normally be allowable for that fullness class; we
1526 : * don't move it until it's completely full, and then it goes to the
1527 : * highest-numbered fullness class.
1528 : *
1529 : * It might seem odd that the active block is the head of fullness class 1
1530 : * rather than fullness class 0, but experience with other allocators has
1531 : * shown that it's usually better to allocate from a block that's moderately
1532 : * full rather than one that's nearly empty. Insofar as is reasonably
1533 : * possible, we want to avoid performing new allocations in a block that would
1534 : * otherwise become empty soon.
1535 : */
1536 : static bool
2319 rhaas 1537 GIC 13049 : ensure_active_superblock(dsa_area *area, dsa_area_pool *pool,
1538 : int size_class)
1539 : {
1540 : dsa_pointer span_pointer;
1541 : dsa_pointer start_pointer;
1660 tmunro 1542 13049 : size_t obsize = dsa_size_classes[size_class];
1543 : size_t nmax;
1544 : int fclass;
1545 13049 : size_t npages = 1;
1546 : size_t first_page;
1547 : size_t i;
1548 : dsa_segment_map *segment_map;
2319 rhaas 1549 ECB :
2319 rhaas 1550 GIC 13049 : Assert(LWLockHeldByMe(DSA_SCLASS_LOCK(area, size_class)));
1551 :
1552 : /*
1553 : * Compute the number of objects that will fit in a block of this size
2319 rhaas 1554 ECB : * class. Span-of-spans blocks are just a single page, and the first
1555 : * object isn't available for use because it describes the block-of-spans
1556 : * itself.
1557 : */
2319 rhaas 1558 GIC 13049 : if (size_class == DSA_SCLASS_BLOCK_OF_SPANS)
1559 2000 : nmax = FPM_PAGE_SIZE / obsize - 1;
1560 : else
1561 11049 : nmax = DSA_SUPERBLOCK_SIZE / obsize;
2319 rhaas 1562 ECB :
1563 : /*
1564 : * If fullness class 1 is empty, try to find a span to put in it by
1565 : * scanning higher-numbered fullness classes (excluding the last one,
1566 : * whose blocks are certain to all be completely full).
1567 : */
2319 rhaas 1568 GIC 26098 : for (fclass = 2; fclass < DSA_FULLNESS_CLASSES - 1; ++fclass)
1569 : {
2319 rhaas 1570 CBC 13049 : span_pointer = pool->spans[fclass];
2319 rhaas 1571 ECB :
2319 rhaas 1572 GIC 13145 : while (DsaPointerIsValid(span_pointer))
2319 rhaas 1573 ECB : {
1574 : int tfclass;
1575 : dsa_area_span *span;
1576 : dsa_area_span *nextspan;
1577 : dsa_area_span *prevspan;
1578 : dsa_pointer next_span_pointer;
1579 :
1580 : span = (dsa_area_span *)
2319 rhaas 1581 GIC 96 : dsa_get_address(area, span_pointer);
2319 rhaas 1582 CBC 96 : next_span_pointer = span->nextspan;
1583 :
2319 rhaas 1584 ECB : /* Figure out what fullness class should contain this span. */
2319 rhaas 1585 GIC 96 : tfclass = (nmax - span->nallocatable)
1586 96 : * (DSA_FULLNESS_CLASSES - 1) / nmax;
1587 :
1588 : /* Look up next span. */
1589 96 : if (DsaPointerIsValid(span->nextspan))
1590 : nextspan = (dsa_area_span *)
1591 35 : dsa_get_address(area, span->nextspan);
1592 : else
2319 rhaas 1593 CBC 61 : nextspan = NULL;
2319 rhaas 1594 ECB :
1595 : /*
1596 : * If utilization has dropped enough that this now belongs in some
1597 : * other fullness class, move it there.
1598 : */
2319 rhaas 1599 GIC 96 : if (tfclass < fclass)
1600 : {
2319 rhaas 1601 ECB : /* Remove from the current fullness class list. */
2319 rhaas 1602 GIC 1 : if (pool->spans[fclass] == span_pointer)
2319 rhaas 1603 ECB : {
1604 : /* It was the head; remove it. */
2319 rhaas 1605 CBC 1 : Assert(!DsaPointerIsValid(span->prevspan));
2319 rhaas 1606 GIC 1 : pool->spans[fclass] = span->nextspan;
1607 1 : if (nextspan != NULL)
2319 rhaas 1608 UIC 0 : nextspan->prevspan = InvalidDsaPointer;
1609 : }
1610 : else
2319 rhaas 1611 ECB : {
1612 : /* It was not the head. */
2319 rhaas 1613 UIC 0 : Assert(DsaPointerIsValid(span->prevspan));
2319 rhaas 1614 ECB : prevspan = (dsa_area_span *)
2319 rhaas 1615 UIC 0 : dsa_get_address(area, span->prevspan);
1616 0 : prevspan->nextspan = span->nextspan;
2319 rhaas 1617 ECB : }
2319 rhaas 1618 CBC 1 : if (nextspan != NULL)
2319 rhaas 1619 LBC 0 : nextspan->prevspan = span->prevspan;
2319 rhaas 1620 EUB :
1621 : /* Push onto the head of the new fullness class list. */
2319 rhaas 1622 GIC 1 : span->nextspan = pool->spans[tfclass];
1623 1 : pool->spans[tfclass] = span_pointer;
1624 1 : span->prevspan = InvalidDsaPointer;
2319 rhaas 1625 GBC 1 : if (DsaPointerIsValid(span->nextspan))
1626 : {
2319 rhaas 1627 EUB : nextspan = (dsa_area_span *)
2319 rhaas 1628 UBC 0 : dsa_get_address(area, span->nextspan);
2319 rhaas 1629 UIC 0 : nextspan->prevspan = span_pointer;
2319 rhaas 1630 ECB : }
2319 rhaas 1631 GBC 1 : span->fclass = tfclass;
1632 : }
1633 :
2319 rhaas 1634 ECB : /* Advance to next span on list. */
2319 rhaas 1635 CBC 96 : span_pointer = next_span_pointer;
2319 rhaas 1636 ECB : }
1637 :
1638 : /* Stop now if we found a suitable block. */
2319 rhaas 1639 GIC 13049 : if (DsaPointerIsValid(pool->spans[1]))
2319 rhaas 1640 UBC 0 : return true;
2319 rhaas 1641 EUB : }
1642 :
2319 rhaas 1643 ECB : /*
1644 : * If there are no blocks that properly belong in fullness class 1, pick
1645 : * one from some other fullness class and move it there anyway, so that we
1646 : * have an allocation target. Our last choice is to transfer a block
1647 : * that's almost empty (and might become completely empty soon if left
1648 : * alone), but even that is better than failing, which is what we must do
1649 : * if there are no blocks at all with freespace.
1650 : */
2319 rhaas 1651 CBC 13049 : Assert(!DsaPointerIsValid(pool->spans[1]));
2319 rhaas 1652 GBC 26038 : for (fclass = 2; fclass < DSA_FULLNESS_CLASSES - 1; ++fclass)
2319 rhaas 1653 GIC 13049 : if (transfer_first_span(area, pool, fclass, 1))
1654 60 : return true;
1655 25978 : if (!DsaPointerIsValid(pool->spans[1]) &&
1656 12989 : transfer_first_span(area, pool, 0, 1))
1657 1 : return true;
1658 :
1659 : /*
1660 : * We failed to find an existing span with free objects, so we need to
1661 : * allocate a new superblock and construct a new span to manage it.
1662 : *
2319 rhaas 1663 ECB : * First, get a dsa_area_span object to describe the new superblock block
1664 : * ... unless this allocation is for a dsa_area_span object, in which case
1665 : * that's surely not going to work. We handle that case by storing the
1666 : * span describing a block-of-spans inline.
1667 : */
2319 rhaas 1668 CBC 12988 : if (size_class != DSA_SCLASS_BLOCK_OF_SPANS)
2319 rhaas 1669 ECB : {
2319 rhaas 1670 GIC 10988 : span_pointer = alloc_object(area, DSA_SCLASS_BLOCK_OF_SPANS);
1671 10988 : if (!DsaPointerIsValid(span_pointer))
2319 rhaas 1672 UIC 0 : return false;
2319 rhaas 1673 GIC 10988 : npages = DSA_PAGES_PER_SUPERBLOCK;
1674 : }
1675 :
1676 : /* Find or create a segment and allocate the superblock. */
1677 12988 : LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
1678 12988 : segment_map = get_best_segment(area, npages);
1679 12988 : if (segment_map == NULL)
2319 rhaas 1680 ECB : {
2319 rhaas 1681 GIC 1294 : segment_map = make_new_segment(area, npages);
2319 rhaas 1682 CBC 1294 : if (segment_map == NULL)
2319 rhaas 1683 ECB : {
2319 rhaas 1684 UBC 0 : LWLockRelease(DSA_AREA_LOCK(area));
2319 rhaas 1685 LBC 0 : return false;
1686 : }
1687 : }
1688 :
1504 tmunro 1689 ECB : /*
1690 : * This shouldn't happen: get_best_segment() or make_new_segment()
1691 : * promised that we can successfully allocate npages.
1692 : */
2319 rhaas 1693 CBC 12988 : if (!FreePageManagerGet(segment_map->fpm, npages, &first_page))
1504 tmunro 1694 LBC 0 : elog(FATAL,
1695 : "dsa_allocate could not find %zu free pages for superblock",
1504 tmunro 1696 EUB : npages);
2319 rhaas 1697 GBC 12988 : LWLockRelease(DSA_AREA_LOCK(area));
1698 :
1699 : /* Compute the start of the superblock. */
2319 rhaas 1700 GIC 12988 : start_pointer =
1701 12988 : DSA_MAKE_POINTER(get_segment_index(area, segment_map),
1702 : first_page * FPM_PAGE_SIZE);
1703 :
1704 : /*
2319 rhaas 1705 ECB : * If this is a block-of-spans, carve the descriptor right out of the
2319 rhaas 1706 EUB : * allocated space.
1707 : */
2319 rhaas 1708 GIC 12988 : if (size_class == DSA_SCLASS_BLOCK_OF_SPANS)
2319 rhaas 1709 ECB : {
1710 : /*
1711 : * We have a pointer into the segment. We need to build a dsa_pointer
1712 : * from the segment index and offset into the segment.
1713 : */
2319 rhaas 1714 GIC 2000 : span_pointer = start_pointer;
1715 : }
1716 :
1717 : /* Initialize span and pagemap. */
1718 12988 : init_span(area, span_pointer, pool, start_pointer, npages, size_class);
1719 190796 : for (i = 0; i < npages; ++i)
2319 rhaas 1720 CBC 177808 : segment_map->pagemap[first_page + i] = span_pointer;
1721 :
2319 rhaas 1722 GIC 12988 : return true;
1723 : }
1724 :
1725 : /*
2319 rhaas 1726 ECB : * Return the segment map corresponding to a given segment index, mapping the
1727 : * segment in if necessary. For internal segment book-keeping, this is called
1728 : * with the area lock held. It is also called by dsa_free and dsa_get_address
1729 : * without any locking, relying on the fact they have a known live segment
1730 : * index and they always call check_for_freed_segments to ensures that any
1731 : * freed segment occupying the same slot is detached first.
1732 : */
1733 : static dsa_segment_map *
2319 rhaas 1734 CBC 94259 : get_segment_by_index(dsa_area *area, dsa_segment_index index)
1735 : {
2319 rhaas 1736 GIC 94259 : if (unlikely(area->segment_maps[index].mapped_address == NULL))
1737 : {
1738 : dsm_handle handle;
1739 : dsm_segment *segment;
1740 : dsa_segment_map *segment_map;
1741 :
1742 : /*
1743 : * If we are reached by dsa_free or dsa_get_address, there must be at
1744 : * least one object allocated in the referenced segment. Otherwise,
1745 : * their caller has a double-free or access-after-free bug, which we
2319 rhaas 1746 ECB : * have no hope of detecting. So we know it's safe to access this
1747 : * array slot without holding a lock; it won't change underneath us.
1748 : * Furthermore, we know that we can see the latest contents of the
1749 : * slot, as explained in check_for_freed_segments, which those
1750 : * functions call before arriving here.
1751 : */
2319 rhaas 1752 GIC 11664 : handle = area->control->segment_handles[index];
1753 :
1754 : /* It's an error to try to access an unused slot. */
1755 11664 : if (handle == DSM_HANDLE_INVALID)
2319 rhaas 1756 UIC 0 : elog(ERROR,
1757 : "dsa_area could not attach to a segment that has been freed");
1758 :
2319 rhaas 1759 GIC 11664 : segment = dsm_attach(handle);
1760 11664 : if (segment == NULL)
2319 rhaas 1761 UIC 0 : elog(ERROR, "dsa_area could not attach to segment");
2319 rhaas 1762 GIC 11664 : if (area->mapping_pinned)
1763 11337 : dsm_pin_mapping(segment);
2319 rhaas 1764 CBC 11664 : segment_map = &area->segment_maps[index];
2319 rhaas 1765 GIC 11664 : segment_map->segment = segment;
1766 11664 : segment_map->mapped_address = dsm_segment_address(segment);
2319 rhaas 1767 CBC 11664 : segment_map->header =
2319 rhaas 1768 GBC 11664 : (dsa_segment_header *) segment_map->mapped_address;
2319 rhaas 1769 GIC 11664 : segment_map->fpm = (FreePageManager *)
1770 11664 : (segment_map->mapped_address +
2319 rhaas 1771 ECB : MAXALIGN(sizeof(dsa_segment_header)));
2319 rhaas 1772 CBC 11664 : segment_map->pagemap = (dsa_pointer *)
2319 rhaas 1773 GBC 11664 : (segment_map->mapped_address +
2319 rhaas 1774 CBC 11664 : MAXALIGN(sizeof(dsa_segment_header)) +
2319 rhaas 1775 ECB : MAXALIGN(sizeof(FreePageManager)));
1776 :
1777 : /* Remember the highest index this backend has ever mapped. */
2319 rhaas 1778 CBC 11664 : if (area->high_segment_index < index)
1779 11664 : area->high_segment_index = index;
2319 rhaas 1780 ECB :
2319 rhaas 1781 CBC 11664 : Assert(segment_map->header->magic ==
2319 rhaas 1782 ECB : (DSA_SEGMENT_HEADER_MAGIC ^ area->control->handle ^ index));
1783 : }
1784 :
1662 tmunro 1785 : /*
1786 : * Callers of dsa_get_address() and dsa_free() don't hold the area lock,
1787 : * but it's a bug in the calling code and undefined behavior if the
1788 : * address is not live (ie if the segment might possibly have been freed,
1789 : * they're trying to use a dangling pointer).
1790 : *
1791 : * For dsa.c code that holds the area lock to manipulate segment_bins
1792 : * lists, it would be a bug if we ever reach a freed segment here. After
1793 : * it's marked as freed, the only thing any backend should do with it is
1794 : * unmap it, and it should always have done that in
1795 : * check_for_freed_segments_locked() before arriving here to resolve an
1796 : * index to a segment_map.
1797 : *
1798 : * Either way we can assert that we aren't returning a freed segment.
1799 : */
1662 tmunro 1800 GIC 94259 : Assert(!area->segment_maps[index].header->freed);
1801 :
2319 rhaas 1802 94259 : return &area->segment_maps[index];
1803 : }
1804 :
1805 : /*
1806 : * Return a superblock to the free page manager. If the underlying segment
1807 : * has become entirely free, then return it to the operating system.
1808 : *
1809 : * The appropriate pool lock must be held.
1810 : */
1811 : static void
2319 rhaas 1812 LBC 0 : destroy_superblock(dsa_area *area, dsa_pointer span_pointer)
1813 : {
1814 0 : dsa_area_span *span = dsa_get_address(area, span_pointer);
2319 rhaas 1815 UIC 0 : int size_class = span->size_class;
1816 : dsa_segment_map *segment_map;
1817 :
1818 :
1819 : /* Remove it from its fullness class list. */
1820 0 : unlink_span(area, span);
1821 :
1822 : /*
1823 : * Note: Here we acquire the area lock while we already hold a per-pool
2319 rhaas 1824 EUB : * lock. We never hold the area lock and then take a pool lock, or we
1825 : * could deadlock.
1826 : */
2319 rhaas 1827 UBC 0 : LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
1662 tmunro 1828 UIC 0 : check_for_freed_segments_locked(area);
1829 : segment_map =
1830 0 : get_segment_by_index(area, DSA_EXTRACT_SEGMENT_NUMBER(span->start));
2319 rhaas 1831 0 : FreePageManagerPut(segment_map->fpm,
2319 rhaas 1832 UBC 0 : DSA_EXTRACT_OFFSET(span->start) / FPM_PAGE_SIZE,
1833 : span->npages);
1834 : /* Check if the segment is now entirely free. */
2319 rhaas 1835 UIC 0 : if (fpm_largest(segment_map->fpm) == segment_map->header->usable_pages)
1836 : {
1837 0 : dsa_segment_index index = get_segment_index(area, segment_map);
1838 :
2319 rhaas 1839 EUB : /* If it's not the segment with extra control data, free it. */
2319 rhaas 1840 UBC 0 : if (index != 0)
1841 : {
2319 rhaas 1842 EUB : /*
1843 : * Give it back to the OS, and allow other backends to detect that
1844 : * they need to detach.
1845 : */
2319 rhaas 1846 UIC 0 : unlink_segment(area, segment_map);
2319 rhaas 1847 UBC 0 : segment_map->header->freed = true;
2319 rhaas 1848 UIC 0 : Assert(area->control->total_segment_size >=
2319 rhaas 1849 EUB : segment_map->header->size);
2319 rhaas 1850 UIC 0 : area->control->total_segment_size -=
1851 0 : segment_map->header->size;
2319 rhaas 1852 UBC 0 : dsm_unpin_segment(dsm_segment_handle(segment_map->segment));
2319 rhaas 1853 UIC 0 : dsm_detach(segment_map->segment);
1854 0 : area->control->segment_handles[index] = DSM_HANDLE_INVALID;
1855 0 : ++area->control->freed_segment_counter;
1856 0 : segment_map->segment = NULL;
1857 0 : segment_map->header = NULL;
2319 rhaas 1858 UBC 0 : segment_map->mapped_address = NULL;
2319 rhaas 1859 EUB : }
1860 : }
2319 rhaas 1861 UIC 0 : LWLockRelease(DSA_AREA_LOCK(area));
2319 rhaas 1862 EUB :
1863 : /*
1864 : * Span-of-spans blocks store the span which describes them within the
1865 : * block itself, so freeing the storage implicitly frees the descriptor
1866 : * also. If this is a block of any other type, we need to separately free
1867 : * the span object also. This recursive call to dsa_free will acquire the
1868 : * span pool's lock. We can't deadlock because the acquisition order is
1869 : * always some other pool and then the span pool.
1870 : */
2319 rhaas 1871 UIC 0 : if (size_class != DSA_SCLASS_BLOCK_OF_SPANS)
1872 0 : dsa_free(area, span_pointer);
2319 rhaas 1873 UBC 0 : }
1874 :
1875 : static void
2319 rhaas 1876 GIC 2306 : unlink_span(dsa_area *area, dsa_area_span *span)
1877 : {
1878 2306 : if (DsaPointerIsValid(span->nextspan))
1879 : {
1880 1850 : dsa_area_span *next = dsa_get_address(area, span->nextspan);
1881 :
1882 1850 : next->prevspan = span->prevspan;
2319 rhaas 1883 EUB : }
2319 rhaas 1884 GBC 2306 : if (DsaPointerIsValid(span->prevspan))
2319 rhaas 1885 EUB : {
2319 rhaas 1886 GIC 1330 : dsa_area_span *prev = dsa_get_address(area, span->prevspan);
1887 :
2319 rhaas 1888 CBC 1330 : prev->nextspan = span->nextspan;
1889 : }
2319 rhaas 1890 ECB : else
1891 : {
2319 rhaas 1892 CBC 976 : dsa_area_pool *pool = dsa_get_address(area, span->pool);
1893 :
1894 976 : pool->spans[span->fclass] = span->nextspan;
1895 : }
1896 2306 : }
1897 :
2319 rhaas 1898 ECB : static void
2319 rhaas 1899 GIC 75 : add_span_to_fullness_class(dsa_area *area, dsa_area_span *span,
2319 rhaas 1900 ECB : dsa_pointer span_pointer,
1901 : int fclass)
1902 : {
2319 rhaas 1903 GIC 75 : dsa_area_pool *pool = dsa_get_address(area, span->pool);
2319 rhaas 1904 ECB :
2319 rhaas 1905 GIC 75 : if (DsaPointerIsValid(pool->spans[fclass]))
2319 rhaas 1906 ECB : {
2319 rhaas 1907 GIC 29 : dsa_area_span *head = dsa_get_address(area,
2319 rhaas 1908 ECB : pool->spans[fclass]);
1909 :
2319 rhaas 1910 GIC 29 : head->prevspan = span_pointer;
2319 rhaas 1911 ECB : }
2319 rhaas 1912 GIC 75 : span->prevspan = InvalidDsaPointer;
1913 75 : span->nextspan = pool->spans[fclass];
1914 75 : pool->spans[fclass] = span_pointer;
2319 rhaas 1915 CBC 75 : span->fclass = fclass;
2319 rhaas 1916 GIC 75 : }
2319 rhaas 1917 ECB :
1918 : /*
1919 : * Detach from an area that was either created or attached to by this process.
1920 : */
1921 : void
2319 rhaas 1922 CBC 18029 : dsa_detach(dsa_area *area)
1923 : {
2319 rhaas 1924 ECB : int i;
1925 :
1926 : /* Detach from all segments. */
2319 rhaas 1927 CBC 49025 : for (i = 0; i <= area->high_segment_index; ++i)
1928 30996 : if (area->segment_maps[i].segment != NULL)
2319 rhaas 1929 GIC 12967 : dsm_detach(area->segment_maps[i].segment);
1930 :
1931 : /*
1932 : * Note that 'detaching' (= detaching from DSM segments) doesn't include
1933 : * 'releasing' (= adjusting the reference count). It would be nice to
2319 rhaas 1934 ECB : * combine these operations, but client code might never get around to
1935 : * calling dsa_detach because of an error path, and a detach hook on any
1936 : * particular segment is too late to detach other segments in the area
1937 : * without risking a 'leak' warning in the non-error path.
1938 : */
1939 :
1940 : /* Free the backend-local area object. */
2319 rhaas 1941 CBC 18029 : pfree(area);
2319 rhaas 1942 GIC 18029 : }
1943 :
1944 : /*
1945 : * Unlink a segment from the bin that contains it.
1946 : */
1947 : static void
1948 2609 : unlink_segment(dsa_area *area, dsa_segment_map *segment_map)
1949 : {
1950 2609 : if (segment_map->header->prev != DSA_SEGMENT_INDEX_NONE)
1951 : {
1952 : dsa_segment_map *prev;
2319 rhaas 1953 ECB :
2319 rhaas 1954 LBC 0 : prev = get_segment_by_index(area, segment_map->header->prev);
2319 rhaas 1955 UIC 0 : prev->header->next = segment_map->header->next;
1956 : }
1957 : else
1958 : {
2319 rhaas 1959 GIC 2609 : Assert(area->control->segment_bins[segment_map->header->bin] ==
2319 rhaas 1960 ECB : get_segment_index(area, segment_map));
2319 rhaas 1961 GIC 2609 : area->control->segment_bins[segment_map->header->bin] =
2319 rhaas 1962 CBC 2609 : segment_map->header->next;
1963 : }
2319 rhaas 1964 GIC 2609 : if (segment_map->header->next != DSA_SEGMENT_INDEX_NONE)
1965 : {
2319 rhaas 1966 EUB : dsa_segment_map *next;
1967 :
2319 rhaas 1968 UIC 0 : next = get_segment_by_index(area, segment_map->header->next);
1969 0 : next->header->prev = segment_map->header->prev;
1970 : }
2319 rhaas 1971 CBC 2609 : }
1972 :
2319 rhaas 1973 ECB : /*
1974 : * Find a segment that could satisfy a request for 'npages' of contiguous
1975 : * memory, or return NULL if none can be found. This may involve attaching to
1976 : * segments that weren't previously attached so that we can query their free
1977 : * pages map.
1978 : */
1979 : static dsa_segment_map *
1660 tmunro 1980 GBC 15938 : get_best_segment(dsa_area *area, size_t npages)
2319 rhaas 1981 EUB : {
1982 : size_t bin;
2319 rhaas 1983 ECB :
2319 rhaas 1984 GIC 15938 : Assert(LWLockHeldByMe(DSA_AREA_LOCK(area)));
1662 tmunro 1985 15938 : check_for_freed_segments_locked(area);
1986 :
1987 : /*
1988 : * Start searching from the first bin that *might* have enough contiguous
1989 : * pages.
1990 : */
2319 rhaas 1991 15938 : for (bin = contiguous_pages_to_segment_bin(npages);
2319 rhaas 1992 CBC 68417 : bin < DSA_NUM_SEGMENT_BINS;
2319 rhaas 1993 GIC 52479 : ++bin)
1994 : {
1995 : /*
2319 rhaas 1996 ECB : * The minimum contiguous size that any segment in this bin should
1997 : * have. We'll re-bin if we see segments with fewer.
1998 : */
1660 tmunro 1999 GIC 67102 : size_t threshold = (size_t) 1 << (bin - 1);
2000 : dsa_segment_index segment_index;
2001 :
2002 : /* Search this bin for a segment with enough contiguous space. */
2319 rhaas 2003 CBC 67102 : segment_index = area->control->segment_bins[bin];
2004 68336 : while (segment_index != DSA_SEGMENT_INDEX_NONE)
2319 rhaas 2005 ECB : {
2006 : dsa_segment_map *segment_map;
2007 : dsa_segment_index next_segment_index;
2008 : size_t contiguous_pages;
2009 :
2319 rhaas 2010 GIC 15857 : segment_map = get_segment_by_index(area, segment_index);
2319 rhaas 2011 CBC 15857 : next_segment_index = segment_map->header->next;
2319 rhaas 2012 GIC 15857 : contiguous_pages = fpm_largest(segment_map->fpm);
2013 :
2014 : /* Not enough for the request, still enough for this bin. */
2319 rhaas 2015 CBC 15857 : if (contiguous_pages >= threshold && contiguous_pages < npages)
2319 rhaas 2016 ECB : {
2319 rhaas 2017 UIC 0 : segment_index = next_segment_index;
2018 0 : continue;
2019 : }
2020 :
2021 : /* Re-bin it if it's no longer in the appropriate bin. */
2319 rhaas 2022 CBC 15857 : if (contiguous_pages < threshold)
2319 rhaas 2023 ECB : {
1660 tmunro 2024 : size_t new_bin;
2025 :
2319 rhaas 2026 GIC 2609 : new_bin = contiguous_pages_to_segment_bin(contiguous_pages);
2319 rhaas 2027 ECB :
2028 : /* Remove it from its current bin. */
2319 rhaas 2029 GBC 2609 : unlink_segment(area, segment_map);
2319 rhaas 2030 EUB :
2031 : /* Push it onto the front of its new bin. */
2319 rhaas 2032 GIC 2609 : segment_map->header->prev = DSA_SEGMENT_INDEX_NONE;
2033 2609 : segment_map->header->next =
2319 rhaas 2034 CBC 2609 : area->control->segment_bins[new_bin];
2319 rhaas 2035 GIC 2609 : segment_map->header->bin = new_bin;
2036 2609 : area->control->segment_bins[new_bin] = segment_index;
2037 2609 : if (segment_map->header->next != DSA_SEGMENT_INDEX_NONE)
2319 rhaas 2038 ECB : {
2039 : dsa_segment_map *next;
2040 :
2319 rhaas 2041 LBC 0 : next = get_segment_by_index(area,
2319 rhaas 2042 UIC 0 : segment_map->header->next);
2043 0 : Assert(next->header->bin == new_bin);
2319 rhaas 2044 LBC 0 : next->header->prev = segment_index;
2319 rhaas 2045 ECB : }
2046 :
2047 : /*
2048 : * But fall through to see if it's enough to satisfy this
2049 : * request anyway....
2050 : */
2051 : }
2052 :
2319 rhaas 2053 EUB : /* Check if we are done. */
2319 rhaas 2054 GBC 15857 : if (contiguous_pages >= npages)
2055 14623 : return segment_map;
2319 rhaas 2056 EUB :
2057 : /* Continue searching the same bin. */
2319 rhaas 2058 GIC 1234 : segment_index = next_segment_index;
2059 : }
2060 : }
2061 :
2062 : /* Not found. */
2063 1315 : return NULL;
2064 : }
2065 :
2319 rhaas 2066 ECB : /*
2067 : * Create a new segment that can handle at least requested_pages. Returns
2068 : * NULL if the requested total size limit or maximum allowed number of
2069 : * segments would be exceeded.
2070 : */
2071 : static dsa_segment_map *
1660 tmunro 2072 GIC 1315 : make_new_segment(dsa_area *area, size_t requested_pages)
2073 : {
2074 : dsa_segment_index new_index;
1660 tmunro 2075 ECB : size_t metadata_bytes;
2076 : size_t total_size;
2077 : size_t total_pages;
2078 : size_t usable_pages;
2079 : dsa_segment_map *segment_map;
2080 : dsm_segment *segment;
2081 :
2319 rhaas 2082 GIC 1315 : Assert(LWLockHeldByMe(DSA_AREA_LOCK(area)));
2083 :
2319 rhaas 2084 ECB : /* Find a segment slot that is not in use (linearly for now). */
2319 rhaas 2085 GIC 1336 : for (new_index = 1; new_index < DSA_MAX_SEGMENTS; ++new_index)
2086 : {
2087 1336 : if (area->control->segment_handles[new_index] == DSM_HANDLE_INVALID)
2088 1315 : break;
2089 : }
2090 1315 : if (new_index == DSA_MAX_SEGMENTS)
2319 rhaas 2091 UIC 0 : return NULL;
2092 :
2093 : /*
2319 rhaas 2094 ECB : * If the total size limit is already exceeded, then we exit early and
2095 : * avoid arithmetic wraparound in the unsigned expressions below.
2096 : */
2319 rhaas 2097 CBC 1315 : if (area->control->total_segment_size >=
2319 rhaas 2098 GIC 1315 : area->control->max_total_segment_size)
2319 rhaas 2099 LBC 0 : return NULL;
2319 rhaas 2100 ECB :
2101 : /*
2102 : * The size should be at least as big as requested, and at least big
2319 rhaas 2103 EUB : * enough to follow a geometric series that approximately doubles the
2104 : * total storage each time we create a new segment. We use geometric
2105 : * growth because the underlying DSM system isn't designed for large
2106 : * numbers of segments (otherwise we might even consider just using one
2107 : * DSM segment for each large allocation and for each superblock, and then
2108 : * we wouldn't need to use FreePageManager).
2319 rhaas 2109 ECB : *
2110 : * We decide on a total segment size first, so that we produce tidy
2319 rhaas 2111 EUB : * power-of-two sized segments. This is a good property to have if we
2112 : * move to huge pages in the future. Then we work back to the number of
2113 : * pages we can fit.
2114 : */
2319 rhaas 2115 GIC 1315 : total_size = DSA_INITIAL_SEGMENT_SIZE *
1660 tmunro 2116 1315 : ((size_t) 1 << (new_index / DSA_NUM_SEGMENTS_AT_EACH_SIZE));
2319 rhaas 2117 1315 : total_size = Min(total_size, DSA_MAX_SEGMENT_SIZE);
2118 1315 : total_size = Min(total_size,
2119 : area->control->max_total_segment_size -
2120 : area->control->total_segment_size);
2121 :
2122 1315 : total_pages = total_size / FPM_PAGE_SIZE;
2123 1315 : metadata_bytes =
2124 : MAXALIGN(sizeof(dsa_segment_header)) +
2125 1315 : MAXALIGN(sizeof(FreePageManager)) +
2126 : sizeof(dsa_pointer) * total_pages;
2319 rhaas 2127 ECB :
2128 : /* Add padding up to next page boundary. */
2319 rhaas 2129 CBC 1315 : if (metadata_bytes % FPM_PAGE_SIZE != 0)
2130 1315 : metadata_bytes += FPM_PAGE_SIZE - (metadata_bytes % FPM_PAGE_SIZE);
2319 rhaas 2131 GIC 1315 : if (total_size <= metadata_bytes)
2319 rhaas 2132 UIC 0 : return NULL;
2319 rhaas 2133 GIC 1315 : usable_pages = (total_size - metadata_bytes) / FPM_PAGE_SIZE;
2319 rhaas 2134 CBC 1315 : Assert(metadata_bytes + usable_pages * FPM_PAGE_SIZE <= total_size);
2319 rhaas 2135 ECB :
2136 : /* See if that is enough... */
2319 rhaas 2137 CBC 1315 : if (requested_pages > usable_pages)
2138 : {
2139 : /*
2140 : * We'll make an odd-sized segment, working forward from the requested
2319 rhaas 2141 ECB : * number of pages.
2142 : */
2319 rhaas 2143 LBC 0 : usable_pages = requested_pages;
2319 rhaas 2144 UBC 0 : metadata_bytes =
2319 rhaas 2145 ECB : MAXALIGN(sizeof(dsa_segment_header)) +
2319 rhaas 2146 LBC 0 : MAXALIGN(sizeof(FreePageManager)) +
2147 : usable_pages * sizeof(dsa_pointer);
2148 :
2319 rhaas 2149 ECB : /* Add padding up to next page boundary. */
2319 rhaas 2150 UIC 0 : if (metadata_bytes % FPM_PAGE_SIZE != 0)
2151 0 : metadata_bytes += FPM_PAGE_SIZE - (metadata_bytes % FPM_PAGE_SIZE);
2152 0 : total_size = metadata_bytes + usable_pages * FPM_PAGE_SIZE;
2153 :
2154 : /* Is that too large for dsa_pointer's addressing scheme? */
2319 rhaas 2155 UBC 0 : if (total_size > DSA_MAX_SEGMENT_SIZE)
2156 0 : return NULL;
2157 :
2319 rhaas 2158 EUB : /* Would that exceed the limit? */
2319 rhaas 2159 UIC 0 : if (total_size > area->control->max_total_segment_size -
2160 0 : area->control->total_segment_size)
2161 0 : return NULL;
2319 rhaas 2162 EUB : }
2163 :
2164 : /* Create the segment. */
2319 rhaas 2165 GIC 1315 : segment = dsm_create(total_size, 0);
2166 1315 : if (segment == NULL)
2319 rhaas 2167 UBC 0 : return NULL;
2319 rhaas 2168 GBC 1315 : dsm_pin_segment(segment);
2319 rhaas 2169 GIC 1315 : if (area->mapping_pinned)
2170 1207 : dsm_pin_mapping(segment);
2319 rhaas 2171 EUB :
2172 : /* Store the handle in shared memory to be found by index. */
2319 rhaas 2173 GBC 2630 : area->control->segment_handles[new_index] =
2319 rhaas 2174 GIC 1315 : dsm_segment_handle(segment);
2175 : /* Track the highest segment index in the history of the area. */
2176 1315 : if (area->control->high_segment_index < new_index)
2319 rhaas 2177 CBC 1315 : area->control->high_segment_index = new_index;
2319 rhaas 2178 ECB : /* Track the highest segment index this backend has ever mapped. */
2319 rhaas 2179 GBC 1315 : if (area->high_segment_index < new_index)
2319 rhaas 2180 CBC 1315 : area->high_segment_index = new_index;
2319 rhaas 2181 ECB : /* Track total size of all segments. */
2319 rhaas 2182 CBC 1315 : area->control->total_segment_size += total_size;
2319 rhaas 2183 GIC 1315 : Assert(area->control->total_segment_size <=
2184 : area->control->max_total_segment_size);
2319 rhaas 2185 ECB :
2186 : /* Build a segment map for this segment in this backend. */
2319 rhaas 2187 GIC 1315 : segment_map = &area->segment_maps[new_index];
2319 rhaas 2188 CBC 1315 : segment_map->segment = segment;
2189 1315 : segment_map->mapped_address = dsm_segment_address(segment);
2319 rhaas 2190 GIC 1315 : segment_map->header = (dsa_segment_header *) segment_map->mapped_address;
2319 rhaas 2191 CBC 1315 : segment_map->fpm = (FreePageManager *)
2192 1315 : (segment_map->mapped_address +
2193 : MAXALIGN(sizeof(dsa_segment_header)));
2194 1315 : segment_map->pagemap = (dsa_pointer *)
2195 1315 : (segment_map->mapped_address +
2319 rhaas 2196 GIC 1315 : MAXALIGN(sizeof(dsa_segment_header)) +
2197 : MAXALIGN(sizeof(FreePageManager)));
2198 :
2319 rhaas 2199 ECB : /* Set up the free page map. */
2319 rhaas 2200 CBC 1315 : FreePageManagerInitialize(segment_map->fpm, segment_map->mapped_address);
2201 1315 : FreePageManagerPut(segment_map->fpm, metadata_bytes / FPM_PAGE_SIZE,
2319 rhaas 2202 ECB : usable_pages);
2203 :
2204 : /* Set up the segment header and put it in the appropriate bin. */
2319 rhaas 2205 GIC 1315 : segment_map->header->magic =
2319 rhaas 2206 CBC 1315 : DSA_SEGMENT_HEADER_MAGIC ^ area->control->handle ^ new_index;
2207 1315 : segment_map->header->usable_pages = usable_pages;
2208 1315 : segment_map->header->size = total_size;
2319 rhaas 2209 GIC 1315 : segment_map->header->bin = contiguous_pages_to_segment_bin(usable_pages);
2210 1315 : segment_map->header->prev = DSA_SEGMENT_INDEX_NONE;
2211 1315 : segment_map->header->next =
2319 rhaas 2212 CBC 1315 : area->control->segment_bins[segment_map->header->bin];
2213 1315 : segment_map->header->freed = false;
2319 rhaas 2214 GIC 1315 : area->control->segment_bins[segment_map->header->bin] = new_index;
2215 1315 : if (segment_map->header->next != DSA_SEGMENT_INDEX_NONE)
2216 : {
2319 rhaas 2217 ECB : dsa_segment_map *next =
2319 rhaas 2218 LBC 0 : get_segment_by_index(area, segment_map->header->next);
2319 rhaas 2219 ECB :
2319 rhaas 2220 LBC 0 : Assert(next->header->bin == segment_map->header->bin);
2221 0 : next->header->prev = new_index;
2319 rhaas 2222 ECB : }
2223 :
2319 rhaas 2224 CBC 1315 : return segment_map;
2319 rhaas 2225 ECB : }
2226 :
2227 : /*
2228 : * Check if any segments have been freed by destroy_superblock, so we can
2229 : * detach from them in this backend. This function is called by
2319 rhaas 2230 EUB : * dsa_get_address and dsa_free to make sure that a dsa_pointer they have
2231 : * received can be resolved to the correct segment.
2232 : *
2233 : * The danger we want to defend against is that there could be an old segment
2234 : * mapped into a given slot in this backend, and the dsa_pointer they have
2235 : * might refer to some new segment in the same slot. So those functions must
2319 rhaas 2236 ECB : * be sure to process all instructions to detach from a freed segment that had
2237 : * been generated by the time this process received the dsa_pointer, before
2238 : * they call get_segment_by_index.
2239 : */
2240 : static void
2319 rhaas 2241 GIC 8447691 : check_for_freed_segments(dsa_area *area)
2242 : {
2243 : size_t freed_segment_counter;
2244 :
2245 : /*
2246 : * Any other process that has freed a segment has incremented
2247 : * freed_segment_counter while holding an LWLock, and that must precede
2248 : * any backend creating a new segment in the same slot while holding an
2249 : * LWLock, and that must precede the creation of any dsa_pointer pointing
2250 : * into the new segment which might reach us here, and the caller must
2251 : * have sent the dsa_pointer to this process using appropriate memory
2252 : * synchronization (some kind of locking or atomic primitive or system
2319 rhaas 2253 ECB : * call). So all we need to do on the reading side is ask for the load of
2254 : * freed_segment_counter to follow the caller's load of the dsa_pointer it
2255 : * has, and we can be sure to detect any segments that had been freed as
2256 : * of the time that the dsa_pointer reached this process.
2257 : */
2319 rhaas 2258 GIC 8447691 : pg_read_barrier();
2259 8447691 : freed_segment_counter = area->control->freed_segment_counter;
2260 8447691 : if (unlikely(area->freed_segment_counter != freed_segment_counter))
2261 : {
2262 : /* Check all currently mapped segments to find what's been freed. */
2319 rhaas 2263 UIC 0 : LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
1662 tmunro 2264 0 : check_for_freed_segments_locked(area);
2265 0 : LWLockRelease(DSA_AREA_LOCK(area));
2266 : }
1662 tmunro 2267 GIC 8447691 : }
2268 :
2269 : /*
1378 michael 2270 ECB : * Workhorse for check_for_freed_segments(), and also used directly in path
1662 tmunro 2271 : * where the area lock is already held. This should be called after acquiring
2272 : * the lock but before looking up any segment by index number, to make sure we
2273 : * unmap any stale segments that might have previously had the same index as a
2274 : * current segment.
1662 tmunro 2275 EUB : */
2276 : static void
1662 tmunro 2277 GBC 15938 : check_for_freed_segments_locked(dsa_area *area)
2278 : {
1660 tmunro 2279 ECB : size_t freed_segment_counter;
2280 : int i;
2281 :
1662 tmunro 2282 GIC 15938 : Assert(LWLockHeldByMe(DSA_AREA_LOCK(area)));
2283 15938 : freed_segment_counter = area->control->freed_segment_counter;
2284 15938 : if (unlikely(area->freed_segment_counter != freed_segment_counter))
2285 : {
2319 rhaas 2286 UIC 0 : for (i = 0; i <= area->high_segment_index; ++i)
2287 : {
2288 0 : if (area->segment_maps[i].header != NULL &&
2319 rhaas 2289 LBC 0 : area->segment_maps[i].header->freed)
2290 : {
2319 rhaas 2291 UIC 0 : dsm_detach(area->segment_maps[i].segment);
2292 0 : area->segment_maps[i].segment = NULL;
2293 0 : area->segment_maps[i].header = NULL;
2319 rhaas 2294 LBC 0 : area->segment_maps[i].mapped_address = NULL;
2319 rhaas 2295 ECB : }
2296 : }
2319 rhaas 2297 UIC 0 : area->freed_segment_counter = freed_segment_counter;
2319 rhaas 2298 EUB : }
2319 rhaas 2299 GIC 15938 : }
|