Age Owner TLA Line data Source code
1 : /*
2 : * contrib/intarray/_int_gist.c
3 : */
4 : #include "postgres.h"
5 :
6 : #include <limits.h>
7 : #include <math.h>
8 :
9 : #include "_int.h"
10 : #include "access/gist.h"
11 : #include "access/reloptions.h"
12 : #include "access/stratnum.h"
13 :
14 : #define GETENTRY(vec,pos) ((ArrayType *) DatumGetPointer((vec)->vector[(pos)].key))
15 :
16 : /*
17 : * Control the maximum sparseness of compressed keys.
18 : *
19 : * The upper safe bound for this limit is half the maximum allocatable array
20 : * size. A lower bound would give more guarantees that pathological data
21 : * wouldn't eat excessive CPU and memory, but at the expense of breaking
22 : * possibly working (after a fashion) indexes.
23 : */
24 : #define MAXNUMELTS (Min((MaxAllocSize / sizeof(Datum)),((MaxAllocSize - ARR_OVERHEAD_NONULLS(1)) / sizeof(int)))/2)
25 : /* or: #define MAXNUMELTS 1000000 */
26 :
27 : /*
28 : ** GiST support methods
29 : */
7242 bruce 30 GIC 2 : PG_FUNCTION_INFO_V1(g_int_consistent);
7242 bruce 31 CBC 2 : PG_FUNCTION_INFO_V1(g_int_compress);
32 2 : PG_FUNCTION_INFO_V1(g_int_decompress);
33 2 : PG_FUNCTION_INFO_V1(g_int_penalty);
34 2 : PG_FUNCTION_INFO_V1(g_int_picksplit);
35 2 : PG_FUNCTION_INFO_V1(g_int_union);
36 2 : PG_FUNCTION_INFO_V1(g_int_same);
1105 akorotkov 37 2 : PG_FUNCTION_INFO_V1(g_int_options);
7242 bruce 38 ECB :
39 :
40 : /*
41 : ** The GiST Consistent method for _intments
42 : ** Should return false if for all data items x below entry,
43 : ** the predicate x op query == false, where op is the oper
44 : ** corresponding to strategy in the pg_amop table.
45 : */
46 : Datum
7242 bruce 47 GIC 88504 : g_int_consistent(PG_FUNCTION_ARGS)
7242 bruce 48 ECB : {
7242 bruce 49 GIC 88504 : GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
4473 tgl 50 CBC 88504 : ArrayType *query = PG_GETARG_ARRAYTYPE_P_COPY(1);
7242 bruce 51 88504 : StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
5050 bruce 52 ECB :
53 : /* Oid subtype = PG_GETARG_OID(3); */
5473 tgl 54 GIC 88504 : bool *recheck = (bool *) PG_GETARG_POINTER(4);
194 peter 55 GNC 88504 : bool retval = false; /* silence compiler warning */
7242 bruce 56 ECB :
57 : /* this is exact except for RTSameStrategyNumber */
5473 tgl 58 GIC 88504 : *recheck = (strategy == RTSameStrategyNumber);
5473 tgl 59 ECB :
6031 bruce 60 GIC 88504 : if (strategy == BooleanSearchStrategy)
6031 bruce 61 ECB : {
6215 teodor 62 GIC 55498 : retval = execconsistent((QUERYTYPE *) query,
6031 bruce 63 CBC 55498 : (ArrayType *) DatumGetPointer(entry->key),
64 55498 : GIST_LEAF(entry));
6215 teodor 65 ECB :
6031 bruce 66 GIC 55498 : pfree(query);
6215 teodor 67 CBC 55498 : PG_RETURN_BOOL(retval);
6215 teodor 68 ECB : }
69 :
70 : /* sort query for fast search, key is already sorted */
6350 tgl 71 GIC 33006 : CHECKARRVALID(query);
7242 bruce 72 CBC 33006 : PREPAREARR(query);
7242 bruce 73 ECB :
7242 bruce 74 GIC 33006 : switch (strategy)
7242 bruce 75 ECB : {
7242 bruce 76 GIC 11080 : case RTOverlapStrategyNumber:
7242 bruce 77 CBC 11080 : retval = inner_int_overlap((ArrayType *) DatumGetPointer(entry->key),
7242 bruce 78 ECB : query);
7242 bruce 79 GIC 11080 : break;
7242 bruce 80 CBC 3125 : case RTSameStrategyNumber:
81 3125 : if (GIST_LEAF(entry))
4473 tgl 82 2964 : DirectFunctionCall3(g_int_same,
7242 bruce 83 ECB : entry->key,
84 : PointerGetDatum(query),
85 : PointerGetDatum(&retval));
86 : else
7242 bruce 87 GIC 161 : retval = inner_int_contains((ArrayType *) DatumGetPointer(entry->key),
7242 bruce 88 ECB : query);
7242 bruce 89 GIC 3125 : break;
7242 bruce 90 CBC 18801 : case RTContainsStrategyNumber:
6055 tgl 91 ECB : case RTOldContainsStrategyNumber:
7242 bruce 92 GIC 18801 : retval = inner_int_contains((ArrayType *) DatumGetPointer(entry->key),
7242 bruce 93 ECB : query);
7242 bruce 94 GIC 18801 : break;
7242 bruce 95 LBC 0 : case RTContainedByStrategyNumber:
6055 tgl 96 EUB : case RTOldContainedByStrategyNumber:
97 :
98 : /*
99 : * This code is unreachable as of intarray 1.4, because the <@
100 : * operator has been removed from the opclass. We keep it for now
101 : * to support older versions of the SQL definitions.
102 : */
7242 bruce 103 UIC 0 : if (GIST_LEAF(entry))
7242 bruce 104 UBC 0 : retval = inner_int_contains(query,
2118 tgl 105 0 : (ArrayType *) DatumGetPointer(entry->key));
7242 bruce 106 EUB : else
107 : {
108 : /*
109 : * Unfortunately, because empty arrays could be anywhere in
110 : * the index, we must search the whole tree.
111 : */
1342 tgl 112 UIC 0 : retval = true;
1342 tgl 113 EUB : }
7242 bruce 114 UIC 0 : break;
7242 bruce 115 UBC 0 : default:
2062 peter_e 116 0 : retval = false;
7242 bruce 117 EUB : }
6031 bruce 118 GIC 33006 : pfree(query);
7242 bruce 119 CBC 33006 : PG_RETURN_BOOL(retval);
7242 bruce 120 ECB : }
121 :
122 : Datum
7188 bruce 123 GIC 22546 : g_int_union(PG_FUNCTION_ARGS)
7188 bruce 124 ECB : {
6797 bruce 125 GIC 22546 : GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
7188 bruce 126 CBC 22546 : int *size = (int *) PG_GETARG_POINTER(1);
3940 peter_e 127 ECB : int32 i,
128 : *ptr;
129 : ArrayType *res;
6350 tgl 130 GIC 22546 : int totlen = 0;
7242 bruce 131 ECB :
6949 teodor 132 GIC 67967 : for (i = 0; i < entryvec->n; i++)
6350 tgl 133 ECB : {
6347 bruce 134 GIC 45421 : ArrayType *ent = GETENTRY(entryvec, i);
6350 tgl 135 ECB :
6350 tgl 136 GIC 45421 : CHECKARRVALID(ent);
6350 tgl 137 CBC 45421 : totlen += ARRNELEMS(ent);
6350 tgl 138 ECB : }
139 :
7188 bruce 140 GIC 22546 : res = new_intArrayType(totlen);
7188 bruce 141 CBC 22546 : ptr = ARRPTR(res);
7242 bruce 142 ECB :
6949 teodor 143 GIC 67967 : for (i = 0; i < entryvec->n; i++)
7188 bruce 144 ECB : {
6347 bruce 145 GIC 45421 : ArrayType *ent = GETENTRY(entryvec, i);
6347 bruce 146 ECB : int nel;
147 :
6350 tgl 148 GIC 45421 : nel = ARRNELEMS(ent);
3940 peter_e 149 CBC 45421 : memcpy(ptr, ARRPTR(ent), nel * sizeof(int32));
6350 tgl 150 45421 : ptr += nel;
7242 bruce 151 ECB : }
152 :
7188 bruce 153 GIC 22546 : QSORT(res, 1);
7188 bruce 154 CBC 22546 : res = _int_unique(res);
155 22546 : *size = VARSIZE(res);
7242 156 22546 : PG_RETURN_POINTER(res);
7242 bruce 157 ECB : }
158 :
159 : /*
160 : ** GiST Compress and Decompress methods
161 : */
162 : Datum
7242 bruce 163 GIC 17912 : g_int_compress(PG_FUNCTION_ARGS)
7242 bruce 164 ECB : {
7242 bruce 165 GIC 17912 : GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
7242 bruce 166 ECB : GISTENTRY *retval;
167 : ArrayType *r;
1105 akorotkov 168 GIC 17912 : int num_ranges = G_INT_GET_NUMRANGES();
1598 rhodiumtoad 169 ECB : int len,
170 : lenr;
171 : int *dr;
172 : int i,
173 : j,
174 : cand;
175 : int64 min;
176 :
7242 bruce 177 GIC 17912 : if (entry->leafkey)
7242 bruce 178 ECB : {
4473 tgl 179 GIC 13512 : r = DatumGetArrayTypePCopy(entry->key);
6350 tgl 180 CBC 13512 : CHECKARRVALID(r);
7242 bruce 181 13512 : PREPAREARR(r);
6355 teodor 182 ECB :
1105 akorotkov 183 GIC 13512 : if (ARRNELEMS(r) >= 2 * num_ranges)
6248 neilc 184 LBC 0 : elog(NOTICE, "input array is too big (%d maximum allowed, %d current), use gist__intbig_ops opclass instead",
1105 akorotkov 185 EUB : 2 * num_ranges - 1, ARRNELEMS(r));
186 :
7242 bruce 187 GIC 13512 : retval = palloc(sizeof(GISTENTRY));
7242 bruce 188 CBC 13512 : gistentryinit(*retval, PointerGetDatum(r),
2062 peter_e 189 ECB : entry->rel, entry->page, entry->offset, false);
190 :
7242 bruce 191 GIC 13512 : PG_RETURN_POINTER(retval);
7242 bruce 192 ECB : }
193 :
194 : /*
195 : * leaf entries never compress one more time, only when entry->leafkey
196 : * ==true, so now we work only with internal keys
197 : */
198 :
4473 tgl 199 GIC 4400 : r = DatumGetArrayTypeP(entry->key);
6350 tgl 200 CBC 4400 : CHECKARRVALID(r);
4473 201 4400 : if (ARRISEMPTY(r))
7242 bruce 202 ECB : {
7242 bruce 203 UIC 0 : if (r != (ArrayType *) DatumGetPointer(entry->key))
7242 bruce 204 UBC 0 : pfree(r);
205 0 : PG_RETURN_POINTER(entry);
7242 bruce 206 EUB : }
207 :
1105 akorotkov 208 GIC 4400 : if ((len = ARRNELEMS(r)) >= 2 * num_ranges)
7242 bruce 209 ECB : { /* compress */
7242 bruce 210 GIC 164 : if (r == (ArrayType *) DatumGetPointer(entry->key))
4473 tgl 211 CBC 164 : r = DatumGetArrayTypePCopy(entry->key);
7242 bruce 212 164 : r = resize_intArrayType(r, 2 * (len));
7242 bruce 213 ECB :
7242 bruce 214 GIC 164 : dr = ARRPTR(r);
7242 bruce 215 ECB :
216 : /*
217 : * "len" at this point is the number of ranges we will construct.
218 : * "lenr" is the number of ranges we must eventually remove by
219 : * merging, we must be careful to remove no more than this number.
220 : */
1105 akorotkov 221 GIC 164 : lenr = len - num_ranges;
1598 rhodiumtoad 222 ECB :
223 : /*
224 : * Initially assume we can merge consecutive ints into a range. but we
225 : * must count every value removed and stop when lenr runs out
226 : */
1598 rhodiumtoad 227 GIC 4771 : for (j = i = len - 1; i > 0 && lenr > 0; i--, j--)
1598 rhodiumtoad 228 ECB : {
1418 tgl 229 GIC 4607 : int r_end = dr[i];
1418 tgl 230 CBC 4607 : int r_start = r_end;
1418 tgl 231 ECB :
1418 tgl 232 GIC 28468 : while (i > 0 && lenr > 0 && dr[i - 1] == r_start - 1)
1598 rhodiumtoad 233 CBC 23861 : --r_start, --i, --lenr;
1418 tgl 234 4607 : dr[2 * j] = r_start;
235 4607 : dr[2 * j + 1] = r_end;
1598 rhodiumtoad 236 ECB : }
237 : /* just copy the rest, if any, as trivial ranges */
1598 rhodiumtoad 238 GIC 11957 : for (; i >= 0; i--, j--)
1418 tgl 239 CBC 11793 : dr[2 * j] = dr[2 * j + 1] = dr[i];
7242 bruce 240 ECB :
1598 rhodiumtoad 241 GIC 164 : if (++j)
1598 rhodiumtoad 242 ECB : {
243 : /*
244 : * shunt everything down to start at the right place
245 : */
61 peter 246 GNC 164 : memmove(&dr[0], &dr[2 * j], 2 * (len - j) * sizeof(int32));
1598 rhodiumtoad 247 ECB : }
248 :
249 : /*
250 : * make "len" be number of array elements, not ranges
251 : */
1418 tgl 252 GIC 164 : len = 2 * (len - j);
7242 bruce 253 CBC 164 : cand = 1;
1105 akorotkov 254 164 : while (len > num_ranges * 2)
7242 bruce 255 ECB : {
1598 rhodiumtoad 256 UIC 0 : min = PG_INT64_MAX;
7242 bruce 257 UBC 0 : for (i = 2; i < len; i += 2)
1418 tgl 258 0 : if (min > ((int64) dr[i] - (int64) dr[i - 1]))
7242 bruce 259 EUB : {
1418 tgl 260 UIC 0 : min = ((int64) dr[i] - (int64) dr[i - 1]);
7242 bruce 261 UBC 0 : cand = i;
7242 bruce 262 EUB : }
61 peter 263 UNC 0 : memmove(&dr[cand - 1], &dr[cand + 1], (len - cand - 1) * sizeof(int32));
7242 bruce 264 UBC 0 : len -= 2;
7242 bruce 265 EUB : }
266 :
267 : /*
268 : * check sparseness of result
269 : */
1598 rhodiumtoad 270 GIC 164 : lenr = internal_size(dr, len);
1598 rhodiumtoad 271 CBC 164 : if (lenr < 0 || lenr > MAXNUMELTS)
1598 rhodiumtoad 272 LBC 0 : ereport(ERROR,
1598 rhodiumtoad 273 EUB : (errmsg("data is too sparse, recreate index using gist__intbig_ops opclass instead")));
274 :
7242 bruce 275 GIC 164 : r = resize_intArrayType(r, len);
7242 bruce 276 CBC 164 : retval = palloc(sizeof(GISTENTRY));
277 164 : gistentryinit(*retval, PointerGetDatum(r),
2062 peter_e 278 ECB : entry->rel, entry->page, entry->offset, false);
7242 bruce 279 GIC 164 : PG_RETURN_POINTER(retval);
7242 bruce 280 ECB : }
281 : else
7242 bruce 282 GIC 4236 : PG_RETURN_POINTER(entry);
7242 bruce 283 ECB : }
284 :
285 : Datum
7242 bruce 286 GIC 332114 : g_int_decompress(PG_FUNCTION_ARGS)
7242 bruce 287 ECB : {
7242 bruce 288 GIC 332114 : GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
7242 bruce 289 ECB : GISTENTRY *retval;
290 : ArrayType *r;
1105 akorotkov 291 GIC 332114 : int num_ranges = G_INT_GET_NUMRANGES();
7242 bruce 292 ECB : int *dr,
293 : lenr;
294 : ArrayType *in;
295 : int lenin;
296 : int *din;
297 : int i,
298 : j;
299 :
4473 tgl 300 GIC 332114 : in = DatumGetArrayTypeP(entry->key);
7242 bruce 301 ECB :
6350 tgl 302 GIC 332114 : CHECKARRVALID(in);
4473 tgl 303 CBC 332114 : if (ARRISEMPTY(in))
5847 tgl 304 ECB : {
5624 bruce 305 GIC 258 : if (in != (ArrayType *) DatumGetPointer(entry->key))
5624 bruce 306 ECB : {
5847 tgl 307 GIC 258 : retval = palloc(sizeof(GISTENTRY));
5847 tgl 308 CBC 258 : gistentryinit(*retval, PointerGetDatum(in),
2062 peter_e 309 ECB : entry->rel, entry->page, entry->offset, false);
5847 tgl 310 GIC 258 : PG_RETURN_POINTER(retval);
5847 tgl 311 ECB : }
312 :
7242 bruce 313 UIC 0 : PG_RETURN_POINTER(entry);
5847 tgl 314 EUB : }
315 :
7242 bruce 316 GIC 331856 : lenin = ARRNELEMS(in);
7242 bruce 317 ECB :
1105 akorotkov 318 GIC 331856 : if (lenin < 2 * num_ranges)
7242 bruce 319 ECB : { /* not compressed value */
7242 bruce 320 GIC 329335 : if (in != (ArrayType *) DatumGetPointer(entry->key))
7242 bruce 321 ECB : {
7242 bruce 322 GIC 152062 : retval = palloc(sizeof(GISTENTRY));
7242 bruce 323 CBC 152062 : gistentryinit(*retval, PointerGetDatum(in),
2062 peter_e 324 ECB : entry->rel, entry->page, entry->offset, false);
325 :
7242 bruce 326 GIC 152062 : PG_RETURN_POINTER(retval);
7242 bruce 327 ECB : }
7242 bruce 328 GIC 177273 : PG_RETURN_POINTER(entry);
7242 bruce 329 ECB : }
330 :
7242 bruce 331 GIC 2521 : din = ARRPTR(in);
7242 bruce 332 CBC 2521 : lenr = internal_size(din, lenin);
1598 rhodiumtoad 333 2521 : if (lenr < 0 || lenr > MAXNUMELTS)
1598 rhodiumtoad 334 LBC 0 : ereport(ERROR,
1598 rhodiumtoad 335 EUB : (errmsg("compressed array is too big, recreate index using gist__intbig_ops opclass instead")));
336 :
7242 bruce 337 GIC 2521 : r = new_intArrayType(lenr);
7242 bruce 338 CBC 2521 : dr = ARRPTR(r);
7242 bruce 339 ECB :
7242 bruce 340 GIC 254621 : for (i = 0; i < lenin; i += 2)
7242 bruce 341 CBC 891392 : for (j = din[i]; j <= din[i + 1]; j++)
342 639292 : if ((!i) || *(dr - 1) != j)
343 639292 : *dr++ = j;
7242 bruce 344 ECB :
7242 bruce 345 GIC 2521 : if (in != (ArrayType *) DatumGetPointer(entry->key))
7242 bruce 346 CBC 2521 : pfree(in);
347 2521 : retval = palloc(sizeof(GISTENTRY));
348 2521 : gistentryinit(*retval, PointerGetDatum(r),
2062 peter_e 349 ECB : entry->rel, entry->page, entry->offset, false);
350 :
7242 bruce 351 GIC 2521 : PG_RETURN_POINTER(retval);
7242 bruce 352 ECB : }
353 :
354 : /*
355 : ** The GiST Penalty method for _intments
356 : */
357 : Datum
7188 bruce 358 GIC 152743 : g_int_penalty(PG_FUNCTION_ARGS)
7188 bruce 359 ECB : {
7188 bruce 360 GIC 152743 : GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0);
7188 bruce 361 CBC 152743 : GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
362 152743 : float *result = (float *) PG_GETARG_POINTER(2);
7242 bruce 363 ECB : ArrayType *ud;
364 : float tmp1,
365 : tmp2;
366 :
7242 bruce 367 GIC 152743 : ud = inner_int_union((ArrayType *) DatumGetPointer(origentry->key),
7188 bruce 368 CBC 152743 : (ArrayType *) DatumGetPointer(newentry->key));
7242 369 152743 : rt__int_size(ud, &tmp1);
370 152743 : rt__int_size((ArrayType *) DatumGetPointer(origentry->key), &tmp2);
371 152743 : *result = tmp1 - tmp2;
372 152743 : pfree(ud);
7242 bruce 373 ECB :
7188 bruce 374 GIC 152743 : PG_RETURN_POINTER(result);
7242 bruce 375 ECB : }
376 :
377 :
378 :
379 : Datum
7242 bruce 380 GIC 25501 : g_int_same(PG_FUNCTION_ARGS)
7242 bruce 381 ECB : {
4473 tgl 382 GIC 25501 : ArrayType *a = PG_GETARG_ARRAYTYPE_P(0);
4473 tgl 383 CBC 25501 : ArrayType *b = PG_GETARG_ARRAYTYPE_P(1);
7242 bruce 384 25501 : bool *result = (bool *) PG_GETARG_POINTER(2);
3940 peter_e 385 25501 : int32 n = ARRNELEMS(a);
3940 peter_e 386 ECB : int32 *da,
387 : *db;
388 :
6350 tgl 389 GIC 25501 : CHECKARRVALID(a);
6350 tgl 390 CBC 25501 : CHECKARRVALID(b);
6350 tgl 391 ECB :
7242 bruce 392 GIC 25501 : if (n != ARRNELEMS(b))
7242 bruce 393 ECB : {
7242 bruce 394 GIC 6411 : *result = false;
7242 bruce 395 CBC 6411 : PG_RETURN_POINTER(result);
7242 bruce 396 ECB : }
2062 peter_e 397 GIC 19090 : *result = true;
7242 bruce 398 CBC 19090 : da = ARRPTR(a);
399 19090 : db = ARRPTR(b);
400 2023059 : while (n--)
4473 tgl 401 ECB : {
7242 bruce 402 GIC 2004567 : if (*da++ != *db++)
7242 bruce 403 ECB : {
2062 peter_e 404 GIC 598 : *result = false;
7242 bruce 405 CBC 598 : break;
7242 bruce 406 ECB : }
407 : }
408 :
7242 bruce 409 GIC 19090 : PG_RETURN_POINTER(result);
7242 bruce 410 ECB : }
411 :
412 : /*****************************************************************
413 : ** Common GiST Method
414 : *****************************************************************/
415 :
416 : typedef struct
417 : {
418 : OffsetNumber pos;
419 : float cost;
420 : } SPLITCOST;
421 :
422 : static int
7242 bruce 423 GIC 37571 : comparecost(const void *a, const void *b)
7242 bruce 424 ECB : {
4228 peter_e 425 GIC 37571 : if (((const SPLITCOST *) a)->cost == ((const SPLITCOST *) b)->cost)
7242 bruce 426 CBC 22188 : return 0;
7242 bruce 427 ECB : else
4228 peter_e 428 GIC 15383 : return (((const SPLITCOST *) a)->cost > ((const SPLITCOST *) b)->cost) ? 1 : -1;
7242 bruce 429 ECB : }
430 :
431 : /*
432 : ** The GiST PickSplit method for _intments
433 : ** We use Guttman's poly time split algorithm
434 : */
435 : Datum
7188 bruce 436 GIC 174 : g_int_picksplit(PG_FUNCTION_ARGS)
7188 bruce 437 ECB : {
6797 bruce 438 GIC 174 : GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
7242 bruce 439 CBC 174 : GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
7242 bruce 440 ECB : OffsetNumber i,
441 : j;
442 : ArrayType *datum_alpha,
443 : *datum_beta;
444 : ArrayType *datum_l,
445 : *datum_r;
446 : ArrayType *union_d,
447 : *union_dl,
448 : *union_dr;
449 : ArrayType *inter_d;
450 : bool firsttime;
451 : float size_alpha,
452 : size_beta,
453 : size_union,
454 : size_inter;
455 : float size_waste,
456 : waste;
457 : float size_l,
458 : size_r;
459 : int nbytes;
7242 bruce 460 GIC 174 : OffsetNumber seed_1 = 0,
7242 bruce 461 CBC 174 : seed_2 = 0;
7242 bruce 462 ECB : OffsetNumber *left,
463 : *right;
464 : OffsetNumber maxoff;
465 : SPLITCOST *costvector;
466 :
467 : #ifdef GIST_DEBUG
468 : elog(DEBUG3, "--------picksplit %d", entryvec->n);
469 : #endif
470 :
6949 teodor 471 GIC 174 : maxoff = entryvec->n - 2;
7242 bruce 472 CBC 174 : nbytes = (maxoff + 2) * sizeof(OffsetNumber);
473 174 : v->spl_left = (OffsetNumber *) palloc(nbytes);
474 174 : v->spl_right = (OffsetNumber *) palloc(nbytes);
7242 bruce 475 ECB :
7242 bruce 476 GIC 174 : firsttime = true;
7242 bruce 477 CBC 174 : waste = 0.0;
478 20536 : for (i = FirstOffsetNumber; i < maxoff; i = OffsetNumberNext(i))
7242 bruce 479 ECB : {
6797 bruce 480 GIC 20362 : datum_alpha = GETENTRY(entryvec, i);
7242 bruce 481 CBC 1383601 : for (j = OffsetNumberNext(i); j <= maxoff; j = OffsetNumberNext(j))
7242 bruce 482 ECB : {
6797 bruce 483 GIC 1363239 : datum_beta = GETENTRY(entryvec, j);
7242 bruce 484 ECB :
485 : /* compute the wasted space by unioning these guys */
486 : /* size_waste = size_union - size_inter; */
7242 bruce 487 GIC 1363239 : union_d = inner_int_union(datum_alpha, datum_beta);
7242 bruce 488 CBC 1363239 : rt__int_size(union_d, &size_union);
489 1363239 : inter_d = inner_int_inter(datum_alpha, datum_beta);
490 1363239 : rt__int_size(inter_d, &size_inter);
491 1363239 : size_waste = size_union - size_inter;
7242 bruce 492 ECB :
7242 bruce 493 GIC 1363239 : pfree(union_d);
2974 kgrittn 494 CBC 1363239 : pfree(inter_d);
7242 bruce 495 ECB :
496 : /*
497 : * are these a more promising split that what we've already seen?
498 : */
499 :
7242 bruce 500 GIC 1363239 : if (size_waste > waste || firsttime)
7242 bruce 501 ECB : {
7242 bruce 502 GIC 974 : waste = size_waste;
7242 bruce 503 CBC 974 : seed_1 = i;
504 974 : seed_2 = j;
505 974 : firsttime = false;
7242 bruce 506 ECB : }
507 : }
508 : }
509 :
7242 bruce 510 GIC 174 : left = v->spl_left;
7242 bruce 511 CBC 174 : v->spl_nleft = 0;
512 174 : right = v->spl_right;
513 174 : v->spl_nright = 0;
514 174 : if (seed_1 == 0 || seed_2 == 0)
7242 bruce 515 ECB : {
7242 bruce 516 UIC 0 : seed_1 = 1;
7242 bruce 517 UBC 0 : seed_2 = 2;
7242 bruce 518 EUB : }
519 :
6797 bruce 520 GIC 174 : datum_alpha = GETENTRY(entryvec, seed_1);
7242 bruce 521 CBC 174 : datum_l = copy_intArrayType(datum_alpha);
522 174 : rt__int_size(datum_l, &size_l);
6797 523 174 : datum_beta = GETENTRY(entryvec, seed_2);
7242 524 174 : datum_r = copy_intArrayType(datum_beta);
525 174 : rt__int_size(datum_r, &size_r);
7242 bruce 526 ECB :
7242 bruce 527 GIC 174 : maxoff = OffsetNumberNext(maxoff);
7242 bruce 528 ECB :
529 : /*
530 : * sort entries
531 : */
7242 bruce 532 GIC 174 : costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
7242 bruce 533 CBC 20884 : for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
7242 bruce 534 ECB : {
7242 bruce 535 GIC 20710 : costvector[i - 1].pos = i;
6797 bruce 536 CBC 20710 : datum_alpha = GETENTRY(entryvec, i);
7242 537 20710 : union_d = inner_int_union(datum_l, datum_alpha);
538 20710 : rt__int_size(union_d, &size_alpha);
539 20710 : pfree(union_d);
540 20710 : union_d = inner_int_union(datum_r, datum_alpha);
541 20710 : rt__int_size(union_d, &size_beta);
542 20710 : pfree(union_d);
183 peter 543 GNC 20710 : costvector[i - 1].cost = fabsf((size_alpha - size_l) - (size_beta - size_r));
7242 bruce 544 ECB : }
61 peter 545 GNC 174 : qsort(costvector, maxoff, sizeof(SPLITCOST), comparecost);
7242 bruce 546 ECB :
547 : /*
548 : * Now split up the regions between the two seeds. An important property
549 : * of this split algorithm is that the split vector v has the indices of
550 : * items to be split in order in its left and right vectors. We exploit
551 : * this property by doing a merge in the code that actually splits the
552 : * page.
553 : *
554 : * For efficiency, we also place the new index tuple in this loop. This is
555 : * handled at the very end, when we have placed all the existing tuples
556 : * and i == maxoff + 1.
557 : */
558 :
559 :
7242 bruce 560 GIC 20884 : for (j = 0; j < maxoff; j++)
7242 bruce 561 ECB : {
7242 bruce 562 GIC 20710 : i = costvector[j].pos;
7242 bruce 563 ECB :
564 : /*
565 : * If we've already decided where to place this item, just put it on
566 : * the right list. Otherwise, we need to figure out which page needs
567 : * the least enlargement in order to store the item.
568 : */
569 :
7242 bruce 570 GIC 20710 : if (i == seed_1)
7242 bruce 571 ECB : {
7242 bruce 572 GIC 174 : *left++ = i;
7242 bruce 573 CBC 174 : v->spl_nleft++;
574 174 : continue;
7242 bruce 575 ECB : }
7242 bruce 576 GIC 20536 : else if (i == seed_2)
7242 bruce 577 ECB : {
7242 bruce 578 GIC 174 : *right++ = i;
7242 bruce 579 CBC 174 : v->spl_nright++;
580 174 : continue;
7242 bruce 581 ECB : }
582 :
583 : /* okay, which page needs least enlargement? */
6797 bruce 584 GIC 20362 : datum_alpha = GETENTRY(entryvec, i);
7242 bruce 585 CBC 20362 : union_dl = inner_int_union(datum_l, datum_alpha);
586 20362 : union_dr = inner_int_union(datum_r, datum_alpha);
587 20362 : rt__int_size(union_dl, &size_alpha);
588 20362 : rt__int_size(union_dr, &size_beta);
7242 bruce 589 ECB :
590 : /* pick which page to add it to */
7242 bruce 591 GIC 20362 : if (size_alpha - size_l < size_beta - size_r + WISH_F(v->spl_nleft, v->spl_nright, 0.01))
7242 bruce 592 ECB : {
2974 kgrittn 593 GIC 10071 : pfree(datum_l);
2974 kgrittn 594 CBC 10071 : pfree(union_dr);
7242 bruce 595 10071 : datum_l = union_dl;
596 10071 : size_l = size_alpha;
597 10071 : *left++ = i;
598 10071 : v->spl_nleft++;
7242 bruce 599 ECB : }
600 : else
601 : {
2974 kgrittn 602 GIC 10291 : pfree(datum_r);
2974 kgrittn 603 CBC 10291 : pfree(union_dl);
7242 bruce 604 10291 : datum_r = union_dr;
605 10291 : size_r = size_beta;
606 10291 : *right++ = i;
607 10291 : v->spl_nright++;
7242 bruce 608 ECB : }
609 : }
7242 bruce 610 GIC 174 : pfree(costvector);
7242 bruce 611 CBC 174 : *right = *left = FirstOffsetNumber;
7242 bruce 612 ECB :
7242 bruce 613 GIC 174 : v->spl_ldatum = PointerGetDatum(datum_l);
7242 bruce 614 CBC 174 : v->spl_rdatum = PointerGetDatum(datum_r);
7242 bruce 615 ECB :
7242 bruce 616 GIC 174 : PG_RETURN_POINTER(v);
7242 bruce 617 ECB : }
618 :
619 : Datum
1105 akorotkov 620 GIC 9 : g_int_options(PG_FUNCTION_ARGS)
1105 akorotkov 621 ECB : {
1105 akorotkov 622 GIC 9 : local_relopts *relopts = (local_relopts *) PG_GETARG_POINTER(0);
1105 akorotkov 623 ECB :
1105 akorotkov 624 GIC 9 : init_local_reloptions(relopts, sizeof(GISTIntArrayOptions));
1105 akorotkov 625 CBC 9 : add_local_int_reloption(relopts, "numranges",
1105 akorotkov 626 ECB : "number of ranges for compression",
627 : G_INT_NUMRANGES_DEFAULT, 1, G_INT_NUMRANGES_MAX,
628 : offsetof(GISTIntArrayOptions, num_ranges));
629 :
1105 akorotkov 630 GIC 9 : PG_RETURN_VOID();
1105 akorotkov 631 ECB : }
|