Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * network_gist.c
4 : : * GiST support for network types.
5 : : *
6 : : * The key thing to understand about this code is the definition of the
7 : : * "union" of a set of INET/CIDR values. It works like this:
8 : : * 1. If the values are not all of the same IP address family, the "union"
9 : : * is a dummy value with family number zero, minbits zero, commonbits zero,
10 : : * address all zeroes. Otherwise:
11 : : * 2. The union has the common IP address family number.
12 : : * 3. The union's minbits value is the smallest netmask length ("ip_bits")
13 : : * of all the input values.
14 : : * 4. Let C be the number of leading address bits that are in common among
15 : : * all the input values (C ranges from 0 to ip_maxbits for the family).
16 : : * 5. The union's commonbits value is C.
17 : : * 6. The union's address value is the same as the common prefix for its
18 : : * first C bits, and is zeroes to the right of that. The physical width
19 : : * of the address value is ip_maxbits for the address family.
20 : : *
21 : : * In a leaf index entry (representing a single key), commonbits is equal to
22 : : * ip_maxbits for the address family, minbits is the same as the represented
23 : : * value's ip_bits, and the address is equal to the represented address.
24 : : * Although it may appear that we're wasting a byte by storing the union
25 : : * format and not just the represented INET/CIDR value in leaf keys, the
26 : : * extra byte is actually "free" because of alignment considerations.
27 : : *
28 : : * Note that this design tracks minbits and commonbits independently; in any
29 : : * given union value, either might be smaller than the other. This does not
30 : : * help us much when descending the tree, because of the way inet comparison
31 : : * is defined: at non-leaf nodes we can't compare more than minbits bits
32 : : * even if we know them. However, it greatly improves the quality of split
33 : : * decisions. Preliminary testing suggests that searches are as much as
34 : : * twice as fast as for a simpler design in which a single field doubles as
35 : : * the common prefix length and the minimum ip_bits value.
36 : : *
37 : : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
38 : : * Portions Copyright (c) 1994, Regents of the University of California
39 : : *
40 : : *
41 : : * IDENTIFICATION
42 : : * src/backend/utils/adt/network_gist.c
43 : : *
44 : : *-------------------------------------------------------------------------
45 : : */
46 : : #include "postgres.h"
47 : :
48 : : #include <sys/socket.h>
49 : :
50 : : #include "access/gist.h"
51 : : #include "access/stratnum.h"
52 : : #include "utils/fmgrprotos.h"
53 : : #include "utils/inet.h"
54 : : #include "varatt.h"
55 : :
56 : : /*
57 : : * Operator strategy numbers used in the GiST inet_ops opclass
58 : : */
59 : : #define INETSTRAT_OVERLAPS RTOverlapStrategyNumber
60 : : #define INETSTRAT_EQ RTEqualStrategyNumber
61 : : #define INETSTRAT_NE RTNotEqualStrategyNumber
62 : : #define INETSTRAT_LT RTLessStrategyNumber
63 : : #define INETSTRAT_LE RTLessEqualStrategyNumber
64 : : #define INETSTRAT_GT RTGreaterStrategyNumber
65 : : #define INETSTRAT_GE RTGreaterEqualStrategyNumber
66 : : #define INETSTRAT_SUB RTSubStrategyNumber
67 : : #define INETSTRAT_SUBEQ RTSubEqualStrategyNumber
68 : : #define INETSTRAT_SUP RTSuperStrategyNumber
69 : : #define INETSTRAT_SUPEQ RTSuperEqualStrategyNumber
70 : :
71 : :
72 : : /*
73 : : * Representation of a GiST INET/CIDR index key. This is not identical to
74 : : * INET/CIDR because we need to keep track of the length of the common address
75 : : * prefix as well as the minimum netmask length. However, as long as it
76 : : * follows varlena header rules, the core GiST code won't know the difference.
77 : : * For simplicity we always use 1-byte-header varlena format.
78 : : */
79 : : typedef struct GistInetKey
80 : : {
81 : : uint8 va_header; /* varlena header --- don't touch directly */
82 : : unsigned char family; /* PGSQL_AF_INET, PGSQL_AF_INET6, or zero */
83 : : unsigned char minbits; /* minimum number of bits in netmask */
84 : : unsigned char commonbits; /* number of common prefix bits in addresses */
85 : : unsigned char ipaddr[16]; /* up to 128 bits of common address */
86 : : } GistInetKey;
87 : :
88 : : #define DatumGetInetKeyP(X) ((GistInetKey *) DatumGetPointer(X))
89 : : #define InetKeyPGetDatum(X) PointerGetDatum(X)
90 : :
91 : : /*
92 : : * Access macros; not really exciting, but we use these for notational
93 : : * consistency with access to INET/CIDR values. Note that family-zero values
94 : : * are stored with 4 bytes of address, not 16.
95 : : */
96 : : #define gk_ip_family(gkptr) ((gkptr)->family)
97 : : #define gk_ip_minbits(gkptr) ((gkptr)->minbits)
98 : : #define gk_ip_commonbits(gkptr) ((gkptr)->commonbits)
99 : : #define gk_ip_addr(gkptr) ((gkptr)->ipaddr)
100 : : #define ip_family_maxbits(fam) ((fam) == PGSQL_AF_INET6 ? 128 : 32)
101 : :
102 : : /* These require that the family field has been set: */
103 : : #define gk_ip_addrsize(gkptr) \
104 : : (gk_ip_family(gkptr) == PGSQL_AF_INET6 ? 16 : 4)
105 : : #define gk_ip_maxbits(gkptr) \
106 : : ip_family_maxbits(gk_ip_family(gkptr))
107 : : #define SET_GK_VARSIZE(dst) \
108 : : SET_VARSIZE_SHORT(dst, offsetof(GistInetKey, ipaddr) + gk_ip_addrsize(dst))
109 : :
110 : :
111 : : /*
112 : : * The GiST query consistency check
113 : : */
114 : : Datum
3659 tgl@sss.pgh.pa.us 115 :CBC 612 : inet_gist_consistent(PG_FUNCTION_ARGS)
116 : : {
117 : 612 : GISTENTRY *ent = (GISTENTRY *) PG_GETARG_POINTER(0);
118 : 612 : inet *query = PG_GETARG_INET_PP(1);
119 : 612 : StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
120 : :
121 : : /* Oid subtype = PG_GETARG_OID(3); */
122 : 612 : bool *recheck = (bool *) PG_GETARG_POINTER(4);
123 : 612 : GistInetKey *key = DatumGetInetKeyP(ent->key);
124 : : int minbits,
125 : : order;
126 : :
127 : : /* All operators served by this function are exact. */
128 : 612 : *recheck = false;
129 : :
130 : : /*
131 : : * Check 0: different families
132 : : *
133 : : * If key represents multiple address families, its children could match
134 : : * anything. This can only happen on an inner index page.
135 : : */
136 [ - + ]: 612 : if (gk_ip_family(key) == 0)
137 : : {
3659 tgl@sss.pgh.pa.us 138 [ # # ]:UBC 0 : Assert(!GIST_LEAF(ent));
139 : 0 : PG_RETURN_BOOL(true);
140 : : }
141 : :
142 : : /*
143 : : * Check 1: different families
144 : : *
145 : : * Matching families do not help any of the strategies.
146 : : */
3659 tgl@sss.pgh.pa.us 147 [ - + + + ]:CBC 612 : if (gk_ip_family(key) != ip_family(query))
148 : : {
149 [ + + + + ]: 108 : switch (strategy)
150 : : {
151 : 18 : case INETSTRAT_LT:
152 : : case INETSTRAT_LE:
153 [ - + - + ]: 18 : if (gk_ip_family(key) < ip_family(query))
3659 tgl@sss.pgh.pa.us 154 :UBC 0 : PG_RETURN_BOOL(true);
3659 tgl@sss.pgh.pa.us 155 :CBC 18 : break;
156 : :
157 : 18 : case INETSTRAT_GE:
158 : : case INETSTRAT_GT:
159 [ - + + - ]: 18 : if (gk_ip_family(key) > ip_family(query))
160 : 18 : PG_RETURN_BOOL(true);
3659 tgl@sss.pgh.pa.us 161 :UBC 0 : break;
162 : :
3659 tgl@sss.pgh.pa.us 163 :CBC 9 : case INETSTRAT_NE:
164 : 9 : PG_RETURN_BOOL(true);
165 : : }
166 : : /* For all other cases, we can be sure there is no match */
167 : 81 : PG_RETURN_BOOL(false);
168 : : }
169 : :
170 : : /*
171 : : * Check 2: network bit count
172 : : *
173 : : * Network bit count (ip_bits) helps to check leaves for sub network and
174 : : * sup network operators. At non-leaf nodes, we know every child value
175 : : * has ip_bits >= gk_ip_minbits(key), so we can avoid descending in some
176 : : * cases too.
177 : : */
178 [ + + + + : 504 : switch (strategy)
+ ]
179 : : {
180 : 84 : case INETSTRAT_SUB:
181 [ + - - + : 84 : if (GIST_LEAF(ent) && gk_ip_minbits(key) <= ip_bits(query))
+ + ]
182 : 60 : PG_RETURN_BOOL(false);
183 : 24 : break;
184 : :
185 : 42 : case INETSTRAT_SUBEQ:
186 [ + - - + : 42 : if (GIST_LEAF(ent) && gk_ip_minbits(key) < ip_bits(query))
+ + ]
187 : 18 : PG_RETURN_BOOL(false);
188 : 24 : break;
189 : :
190 : 84 : case INETSTRAT_SUPEQ:
191 : : case INETSTRAT_EQ:
192 [ - + + + ]: 84 : if (gk_ip_minbits(key) > ip_bits(query))
193 : 24 : PG_RETURN_BOOL(false);
194 : 60 : break;
195 : :
196 : 42 : case INETSTRAT_SUP:
197 [ - + + + ]: 42 : if (gk_ip_minbits(key) >= ip_bits(query))
198 : 24 : PG_RETURN_BOOL(false);
199 : 18 : break;
200 : : }
201 : :
202 : : /*
203 : : * Check 3: common network bits
204 : : *
205 : : * Compare available common prefix bits to the query, but not beyond
206 : : * either the query's netmask or the minimum netmask among the represented
207 : : * values. If these bits don't match the query, we have our answer (and
208 : : * may or may not need to descend, depending on the operator). If they do
209 : : * match, and we are not at a leaf, we descend in all cases.
210 : : *
211 : : * Note this is the final check for operators that only consider the
212 : : * network part of the address.
213 : : */
214 : 378 : minbits = Min(gk_ip_commonbits(key), gk_ip_minbits(key));
215 [ - + ]: 378 : minbits = Min(minbits, ip_bits(query));
216 : :
217 [ - + ]: 378 : order = bitncmp(gk_ip_addr(key), ip_addr(query), minbits);
218 : :
219 [ + + + + : 378 : switch (strategy)
+ - ]
220 : : {
221 : 138 : case INETSTRAT_SUB:
222 : : case INETSTRAT_SUBEQ:
223 : : case INETSTRAT_OVERLAPS:
224 : : case INETSTRAT_SUPEQ:
225 : : case INETSTRAT_SUP:
226 : 138 : PG_RETURN_BOOL(order == 0);
227 : :
228 : 84 : case INETSTRAT_LT:
229 : : case INETSTRAT_LE:
230 [ - + ]: 84 : if (order > 0)
3659 tgl@sss.pgh.pa.us 231 :UBC 0 : PG_RETURN_BOOL(false);
3659 tgl@sss.pgh.pa.us 232 [ + + - + ]:CBC 84 : if (order < 0 || !GIST_LEAF(ent))
233 : 48 : PG_RETURN_BOOL(true);
234 : 36 : break;
235 : :
236 : 30 : case INETSTRAT_EQ:
237 [ + + ]: 30 : if (order != 0)
238 : 21 : PG_RETURN_BOOL(false);
239 [ - + ]: 9 : if (!GIST_LEAF(ent))
3659 tgl@sss.pgh.pa.us 240 :UBC 0 : PG_RETURN_BOOL(true);
3659 tgl@sss.pgh.pa.us 241 :CBC 9 : break;
242 : :
243 : 84 : case INETSTRAT_GE:
244 : : case INETSTRAT_GT:
245 [ + + ]: 84 : if (order < 0)
246 : 48 : PG_RETURN_BOOL(false);
247 [ + - - + ]: 36 : if (order > 0 || !GIST_LEAF(ent))
3659 tgl@sss.pgh.pa.us 248 :UBC 0 : PG_RETURN_BOOL(true);
3659 tgl@sss.pgh.pa.us 249 :CBC 36 : break;
250 : :
251 : 42 : case INETSTRAT_NE:
252 [ + + - + ]: 42 : if (order != 0 || !GIST_LEAF(ent))
253 : 24 : PG_RETURN_BOOL(true);
254 : 18 : break;
255 : : }
256 : :
257 : : /*
258 : : * Remaining checks are only for leaves and basic comparison strategies.
259 : : * See network_cmp_internal() in network.c for the implementation we need
260 : : * to match. Note that in a leaf key, commonbits should equal the address
261 : : * length, so we compared the whole network parts above.
262 : : */
263 [ - + ]: 99 : Assert(GIST_LEAF(ent));
264 : :
265 : : /*
266 : : * Check 4: network bit count
267 : : *
268 : : * Next step is to compare netmask widths.
269 : : */
270 [ + + + + : 99 : switch (strategy)
- ]
271 : : {
272 : 36 : case INETSTRAT_LT:
273 : : case INETSTRAT_LE:
274 [ - + - + ]: 36 : if (gk_ip_minbits(key) < ip_bits(query))
3659 tgl@sss.pgh.pa.us 275 :UBC 0 : PG_RETURN_BOOL(true);
3659 tgl@sss.pgh.pa.us 276 [ - + + + ]:CBC 36 : if (gk_ip_minbits(key) > ip_bits(query))
277 : 18 : PG_RETURN_BOOL(false);
278 : 18 : break;
279 : :
280 : 9 : case INETSTRAT_EQ:
281 [ - + - + ]: 9 : if (gk_ip_minbits(key) != ip_bits(query))
3659 tgl@sss.pgh.pa.us 282 :UBC 0 : PG_RETURN_BOOL(false);
3659 tgl@sss.pgh.pa.us 283 :CBC 9 : break;
284 : :
285 : 36 : case INETSTRAT_GE:
286 : : case INETSTRAT_GT:
287 [ - + + + ]: 36 : if (gk_ip_minbits(key) > ip_bits(query))
288 : 18 : PG_RETURN_BOOL(true);
289 [ - + - + ]: 18 : if (gk_ip_minbits(key) < ip_bits(query))
3659 tgl@sss.pgh.pa.us 290 :UBC 0 : PG_RETURN_BOOL(false);
3659 tgl@sss.pgh.pa.us 291 :CBC 18 : break;
292 : :
293 : 18 : case INETSTRAT_NE:
294 [ - + + + ]: 18 : if (gk_ip_minbits(key) != ip_bits(query))
295 : 9 : PG_RETURN_BOOL(true);
296 : 9 : break;
297 : : }
298 : :
299 : : /*
300 : : * Check 5: whole address
301 : : *
302 : : * Netmask bit counts are the same, so check all the address bits.
303 : : */
304 [ - + - + ]: 54 : order = bitncmp(gk_ip_addr(key), ip_addr(query), gk_ip_maxbits(key));
305 : :
306 [ + + + + : 54 : switch (strategy)
+ + - ]
307 : : {
308 : 9 : case INETSTRAT_LT:
309 : 9 : PG_RETURN_BOOL(order < 0);
310 : :
311 : 9 : case INETSTRAT_LE:
312 : 9 : PG_RETURN_BOOL(order <= 0);
313 : :
314 : 9 : case INETSTRAT_EQ:
315 : 9 : PG_RETURN_BOOL(order == 0);
316 : :
317 : 9 : case INETSTRAT_GE:
318 : 9 : PG_RETURN_BOOL(order >= 0);
319 : :
320 : 9 : case INETSTRAT_GT:
321 : 9 : PG_RETURN_BOOL(order > 0);
322 : :
323 : 9 : case INETSTRAT_NE:
324 : 9 : PG_RETURN_BOOL(order != 0);
325 : : }
326 : :
3659 tgl@sss.pgh.pa.us 327 [ # # ]:UBC 0 : elog(ERROR, "unknown strategy for inet GiST");
328 : : PG_RETURN_BOOL(false); /* keep compiler quiet */
329 : : }
330 : :
331 : : /*
332 : : * Calculate parameters of the union of some GistInetKeys.
333 : : *
334 : : * Examine the keys in elements m..n inclusive of the GISTENTRY array,
335 : : * and compute these output parameters:
336 : : * *minfamily_p = minimum IP address family number
337 : : * *maxfamily_p = maximum IP address family number
338 : : * *minbits_p = minimum netmask width
339 : : * *commonbits_p = number of leading bits in common among the addresses
340 : : *
341 : : * minbits and commonbits are forced to zero if there's more than one
342 : : * address family.
343 : : */
344 : : static void
3659 tgl@sss.pgh.pa.us 345 :CBC 415 : calc_inet_union_params(GISTENTRY *ent,
346 : : int m, int n,
347 : : int *minfamily_p,
348 : : int *maxfamily_p,
349 : : int *minbits_p,
350 : : int *commonbits_p)
351 : : {
352 : : int minfamily,
353 : : maxfamily,
354 : : minbits,
355 : : commonbits;
356 : : unsigned char *addr;
357 : : GistInetKey *tmp;
358 : : int i;
359 : :
360 : : /* Must be at least one key. */
361 [ - + ]: 415 : Assert(m <= n);
362 : :
363 : : /* Initialize variables using the first key. */
364 : 415 : tmp = DatumGetInetKeyP(ent[m].key);
365 : 415 : minfamily = maxfamily = gk_ip_family(tmp);
366 : 415 : minbits = gk_ip_minbits(tmp);
367 : 415 : commonbits = gk_ip_commonbits(tmp);
368 : 415 : addr = gk_ip_addr(tmp);
369 : :
370 : : /* Scan remaining keys. */
371 [ + + ]: 1620 : for (i = m + 1; i <= n; i++)
372 : : {
373 : 1205 : tmp = DatumGetInetKeyP(ent[i].key);
374 : :
375 : : /* Determine range of family numbers */
376 [ - + ]: 1205 : if (minfamily > gk_ip_family(tmp))
3659 tgl@sss.pgh.pa.us 377 :UBC 0 : minfamily = gk_ip_family(tmp);
3659 tgl@sss.pgh.pa.us 378 [ - + ]:CBC 1205 : if (maxfamily < gk_ip_family(tmp))
3659 tgl@sss.pgh.pa.us 379 :UBC 0 : maxfamily = gk_ip_family(tmp);
380 : :
381 : : /* Find minimum minbits */
3659 tgl@sss.pgh.pa.us 382 [ - + ]:CBC 1205 : if (minbits > gk_ip_minbits(tmp))
3659 tgl@sss.pgh.pa.us 383 :UBC 0 : minbits = gk_ip_minbits(tmp);
384 : :
385 : : /* Find minimum number of bits in common */
3659 tgl@sss.pgh.pa.us 386 [ - + ]:CBC 1205 : if (commonbits > gk_ip_commonbits(tmp))
3659 tgl@sss.pgh.pa.us 387 :UBC 0 : commonbits = gk_ip_commonbits(tmp);
3659 tgl@sss.pgh.pa.us 388 [ + + ]:CBC 1205 : if (commonbits > 0)
389 : 823 : commonbits = bitncommon(addr, gk_ip_addr(tmp), commonbits);
390 : : }
391 : :
392 : : /* Force minbits/commonbits to zero if more than one family. */
393 [ - + ]: 415 : if (minfamily != maxfamily)
3659 tgl@sss.pgh.pa.us 394 :UBC 0 : minbits = commonbits = 0;
395 : :
3659 tgl@sss.pgh.pa.us 396 :CBC 415 : *minfamily_p = minfamily;
397 : 415 : *maxfamily_p = maxfamily;
398 : 415 : *minbits_p = minbits;
399 : 415 : *commonbits_p = commonbits;
400 : 415 : }
401 : :
402 : : /*
403 : : * Same as above, but the GISTENTRY elements to examine are those with
404 : : * indices listed in the offsets[] array.
405 : : */
406 : : static void
3659 tgl@sss.pgh.pa.us 407 :UBC 0 : calc_inet_union_params_indexed(GISTENTRY *ent,
408 : : OffsetNumber *offsets, int noffsets,
409 : : int *minfamily_p,
410 : : int *maxfamily_p,
411 : : int *minbits_p,
412 : : int *commonbits_p)
413 : : {
414 : : int minfamily,
415 : : maxfamily,
416 : : minbits,
417 : : commonbits;
418 : : unsigned char *addr;
419 : : GistInetKey *tmp;
420 : : int i;
421 : :
422 : : /* Must be at least one key. */
423 [ # # ]: 0 : Assert(noffsets > 0);
424 : :
425 : : /* Initialize variables using the first key. */
426 : 0 : tmp = DatumGetInetKeyP(ent[offsets[0]].key);
427 : 0 : minfamily = maxfamily = gk_ip_family(tmp);
428 : 0 : minbits = gk_ip_minbits(tmp);
429 : 0 : commonbits = gk_ip_commonbits(tmp);
430 : 0 : addr = gk_ip_addr(tmp);
431 : :
432 : : /* Scan remaining keys. */
433 [ # # ]: 0 : for (i = 1; i < noffsets; i++)
434 : : {
435 : 0 : tmp = DatumGetInetKeyP(ent[offsets[i]].key);
436 : :
437 : : /* Determine range of family numbers */
438 [ # # ]: 0 : if (minfamily > gk_ip_family(tmp))
439 : 0 : minfamily = gk_ip_family(tmp);
440 [ # # ]: 0 : if (maxfamily < gk_ip_family(tmp))
441 : 0 : maxfamily = gk_ip_family(tmp);
442 : :
443 : : /* Find minimum minbits */
444 [ # # ]: 0 : if (minbits > gk_ip_minbits(tmp))
445 : 0 : minbits = gk_ip_minbits(tmp);
446 : :
447 : : /* Find minimum number of bits in common */
448 [ # # ]: 0 : if (commonbits > gk_ip_commonbits(tmp))
449 : 0 : commonbits = gk_ip_commonbits(tmp);
450 [ # # ]: 0 : if (commonbits > 0)
451 : 0 : commonbits = bitncommon(addr, gk_ip_addr(tmp), commonbits);
452 : : }
453 : :
454 : : /* Force minbits/commonbits to zero if more than one family. */
455 [ # # ]: 0 : if (minfamily != maxfamily)
456 : 0 : minbits = commonbits = 0;
457 : :
458 : 0 : *minfamily_p = minfamily;
459 : 0 : *maxfamily_p = maxfamily;
460 : 0 : *minbits_p = minbits;
461 : 0 : *commonbits_p = commonbits;
462 : 0 : }
463 : :
464 : : /*
465 : : * Construct a GistInetKey representing a union value.
466 : : *
467 : : * Inputs are the family/minbits/commonbits values to use, plus a pointer to
468 : : * the address field of one of the union inputs. (Since we're going to copy
469 : : * just the bits-in-common, it doesn't matter which one.)
470 : : */
471 : : static GistInetKey *
3659 tgl@sss.pgh.pa.us 472 :CBC 415 : build_inet_union_key(int family, int minbits, int commonbits,
473 : : unsigned char *addr)
474 : : {
475 : : GistInetKey *result;
476 : :
477 : : /* Make sure any unused bits are zeroed. */
478 : 415 : result = (GistInetKey *) palloc0(sizeof(GistInetKey));
479 : :
480 : 415 : gk_ip_family(result) = family;
481 : 415 : gk_ip_minbits(result) = minbits;
482 : 415 : gk_ip_commonbits(result) = commonbits;
483 : :
484 : : /* Clone appropriate bytes of the address. */
485 [ + + ]: 415 : if (commonbits > 0)
486 : 244 : memcpy(gk_ip_addr(result), addr, (commonbits + 7) / 8);
487 : :
488 : : /* Clean any unwanted bits in the last partial byte. */
489 [ + + ]: 415 : if (commonbits % 8 != 0)
490 : 244 : gk_ip_addr(result)[commonbits / 8] &= ~(0xFF >> (commonbits % 8));
491 : :
492 : : /* Set varlena header correctly. */
493 [ - + ]: 415 : SET_GK_VARSIZE(result);
494 : :
495 : 415 : return result;
496 : : }
497 : :
498 : :
499 : : /*
500 : : * The GiST union function
501 : : *
502 : : * See comments at head of file for the definition of the union.
503 : : */
504 : : Datum
505 : 415 : inet_gist_union(PG_FUNCTION_ARGS)
506 : : {
507 : 415 : GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
508 : 415 : GISTENTRY *ent = entryvec->vector;
509 : : int minfamily,
510 : : maxfamily,
511 : : minbits,
512 : : commonbits;
513 : : unsigned char *addr;
514 : : GistInetKey *tmp,
515 : : *result;
516 : :
517 : : /* Determine parameters of the union. */
518 : 415 : calc_inet_union_params(ent, 0, entryvec->n - 1,
519 : : &minfamily, &maxfamily,
520 : : &minbits, &commonbits);
521 : :
522 : : /* If more than one family, emit family number zero. */
523 [ - + ]: 415 : if (minfamily != maxfamily)
3659 tgl@sss.pgh.pa.us 524 :UBC 0 : minfamily = 0;
525 : :
526 : : /* Initialize address using the first key. */
3659 tgl@sss.pgh.pa.us 527 :CBC 415 : tmp = DatumGetInetKeyP(ent[0].key);
528 : 415 : addr = gk_ip_addr(tmp);
529 : :
530 : : /* Construct the union value. */
531 : 415 : result = build_inet_union_key(minfamily, minbits, commonbits, addr);
532 : :
533 : 415 : PG_RETURN_POINTER(result);
534 : : }
535 : :
536 : : /*
537 : : * The GiST compress function
538 : : *
539 : : * Convert an inet value to GistInetKey.
540 : : */
541 : : Datum
542 : 662 : inet_gist_compress(PG_FUNCTION_ARGS)
543 : : {
544 : 662 : GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
545 : : GISTENTRY *retval;
546 : :
547 [ + + ]: 662 : if (entry->leafkey)
548 : : {
549 : 651 : retval = palloc(sizeof(GISTENTRY));
550 [ + - ]: 651 : if (DatumGetPointer(entry->key) != NULL)
551 : : {
552 : 651 : inet *in = DatumGetInetPP(entry->key);
553 : : GistInetKey *r;
554 : :
555 : 651 : r = (GistInetKey *) palloc0(sizeof(GistInetKey));
556 : :
557 [ + - ]: 651 : gk_ip_family(r) = ip_family(in);
558 [ + - ]: 651 : gk_ip_minbits(r) = ip_bits(in);
559 [ + + ]: 651 : gk_ip_commonbits(r) = gk_ip_maxbits(r);
560 [ + + + - ]: 651 : memcpy(gk_ip_addr(r), ip_addr(in), gk_ip_addrsize(r));
561 [ + + ]: 651 : SET_GK_VARSIZE(r);
562 : :
563 : 651 : gistentryinit(*retval, PointerGetDatum(r),
564 : : entry->rel, entry->page,
565 : : entry->offset, false);
566 : : }
567 : : else
568 : : {
3659 tgl@sss.pgh.pa.us 569 :UBC 0 : gistentryinit(*retval, (Datum) 0,
570 : : entry->rel, entry->page,
571 : : entry->offset, false);
572 : : }
573 : : }
574 : : else
3659 tgl@sss.pgh.pa.us 575 :CBC 11 : retval = entry;
576 : 662 : PG_RETURN_POINTER(retval);
577 : : }
578 : :
579 : : /*
580 : : * We do not need a decompress function, because the other GiST inet
581 : : * support functions work with the GistInetKey representation.
582 : : */
583 : :
584 : : /*
585 : : * The GiST fetch function
586 : : *
587 : : * Reconstruct the original inet datum from a GistInetKey.
588 : : */
589 : : Datum
3305 heikki.linnakangas@i 590 : 10 : inet_gist_fetch(PG_FUNCTION_ARGS)
591 : : {
3249 bruce@momjian.us 592 : 10 : GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
593 : 10 : GistInetKey *key = DatumGetInetKeyP(entry->key);
594 : : GISTENTRY *retval;
595 : : inet *dst;
596 : :
3305 heikki.linnakangas@i 597 : 10 : dst = (inet *) palloc0(sizeof(inet));
598 : :
599 [ - + ]: 10 : ip_family(dst) = gk_ip_family(key);
600 [ - + ]: 10 : ip_bits(dst) = gk_ip_minbits(key);
601 [ - + + - : 10 : memcpy(ip_addr(dst), gk_ip_addr(key), ip_addrsize(dst));
- + ]
602 [ - + + - ]: 10 : SET_INET_VARSIZE(dst);
603 : :
604 : 10 : retval = palloc(sizeof(GISTENTRY));
605 : 10 : gistentryinit(*retval, InetPGetDatum(dst), entry->rel, entry->page,
606 : : entry->offset, false);
607 : :
608 : 10 : PG_RETURN_POINTER(retval);
609 : : }
610 : :
611 : : /*
612 : : * The GiST page split penalty function
613 : : *
614 : : * Charge a large penalty if address family doesn't match, or a somewhat
615 : : * smaller one if the new value would degrade the union's minbits
616 : : * (minimum netmask width). Otherwise, penalty is inverse of the
617 : : * new number of common address bits.
618 : : */
619 : : Datum
3659 tgl@sss.pgh.pa.us 620 : 723 : inet_gist_penalty(PG_FUNCTION_ARGS)
621 : : {
622 : 723 : GISTENTRY *origent = (GISTENTRY *) PG_GETARG_POINTER(0);
623 : 723 : GISTENTRY *newent = (GISTENTRY *) PG_GETARG_POINTER(1);
624 : 723 : float *penalty = (float *) PG_GETARG_POINTER(2);
625 : 723 : GistInetKey *orig = DatumGetInetKeyP(origent->key),
626 : 723 : *new = DatumGetInetKeyP(newent->key);
627 : : int commonbits;
628 : :
629 [ + - ]: 723 : if (gk_ip_family(orig) == gk_ip_family(new))
630 : : {
631 [ + - ]: 723 : if (gk_ip_minbits(orig) <= gk_ip_minbits(new))
632 : : {
633 : 723 : commonbits = bitncommon(gk_ip_addr(orig), gk_ip_addr(new),
634 : 723 : Min(gk_ip_commonbits(orig),
635 : : gk_ip_commonbits(new)));
636 [ + + ]: 723 : if (commonbits > 0)
637 : 306 : *penalty = 1.0f / commonbits;
638 : : else
639 : 417 : *penalty = 2;
640 : : }
641 : : else
3659 tgl@sss.pgh.pa.us 642 :UBC 0 : *penalty = 3;
643 : : }
644 : : else
645 : 0 : *penalty = 4;
646 : :
3659 tgl@sss.pgh.pa.us 647 :CBC 723 : PG_RETURN_POINTER(penalty);
648 : : }
649 : :
650 : : /*
651 : : * The GiST PickSplit method
652 : : *
653 : : * There are two ways to split. First one is to split by address families,
654 : : * if there are multiple families appearing in the input.
655 : : *
656 : : * The second and more common way is to split by addresses. To achieve this,
657 : : * determine the number of leading bits shared by all the keys, then split on
658 : : * the next bit. (We don't currently consider the netmask widths while doing
659 : : * this; should we?) If we fail to get a nontrivial split that way, split
660 : : * 50-50.
661 : : */
662 : : Datum
3659 tgl@sss.pgh.pa.us 663 :UBC 0 : inet_gist_picksplit(PG_FUNCTION_ARGS)
664 : : {
665 : 0 : GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
666 : 0 : GIST_SPLITVEC *splitvec = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
667 : 0 : GISTENTRY *ent = entryvec->vector;
668 : : int minfamily,
669 : : maxfamily,
670 : : minbits,
671 : : commonbits;
672 : : unsigned char *addr;
673 : : GistInetKey *tmp,
674 : : *left_union,
675 : : *right_union;
676 : : int maxoff,
677 : : nbytes;
678 : : OffsetNumber i,
679 : : *left,
680 : : *right;
681 : :
682 : 0 : maxoff = entryvec->n - 1;
683 : 0 : nbytes = (maxoff + 1) * sizeof(OffsetNumber);
684 : :
685 : 0 : left = (OffsetNumber *) palloc(nbytes);
686 : 0 : right = (OffsetNumber *) palloc(nbytes);
687 : :
688 : 0 : splitvec->spl_left = left;
689 : 0 : splitvec->spl_right = right;
690 : :
691 : 0 : splitvec->spl_nleft = 0;
692 : 0 : splitvec->spl_nright = 0;
693 : :
694 : : /* Determine parameters of the union of all the inputs. */
695 : 0 : calc_inet_union_params(ent, FirstOffsetNumber, maxoff,
696 : : &minfamily, &maxfamily,
697 : : &minbits, &commonbits);
698 : :
699 [ # # ]: 0 : if (minfamily != maxfamily)
700 : : {
701 : : /* Multiple families, so split by family. */
702 [ # # ]: 0 : for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
703 : : {
704 : : /*
705 : : * If there's more than 2 families, all but maxfamily go into the
706 : : * left union. This could only happen if the inputs include some
707 : : * IPv4, some IPv6, and some already-multiple-family unions.
708 : : */
709 : 0 : tmp = DatumGetInetKeyP(ent[i].key);
710 [ # # ]: 0 : if (gk_ip_family(tmp) != maxfamily)
711 : 0 : left[splitvec->spl_nleft++] = i;
712 : : else
713 : 0 : right[splitvec->spl_nright++] = i;
714 : : }
715 : : }
716 : : else
717 : : {
718 : : /*
719 : : * Split on the next bit after the common bits. If that yields a
720 : : * trivial split, try the next bit position to the right. Repeat till
721 : : * success; or if we run out of bits, do an arbitrary 50-50 split.
722 : : */
723 [ # # ]: 0 : int maxbits = ip_family_maxbits(minfamily);
724 : :
725 [ # # ]: 0 : while (commonbits < maxbits)
726 : : {
727 : : /* Split using the commonbits'th bit position. */
728 : 0 : int bitbyte = commonbits / 8;
729 : 0 : int bitmask = 0x80 >> (commonbits % 8);
730 : :
731 : 0 : splitvec->spl_nleft = splitvec->spl_nright = 0;
732 : :
733 [ # # ]: 0 : for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
734 : : {
735 : 0 : tmp = DatumGetInetKeyP(ent[i].key);
736 : 0 : addr = gk_ip_addr(tmp);
737 [ # # ]: 0 : if ((addr[bitbyte] & bitmask) == 0)
738 : 0 : left[splitvec->spl_nleft++] = i;
739 : : else
740 : 0 : right[splitvec->spl_nright++] = i;
741 : : }
742 : :
743 [ # # # # ]: 0 : if (splitvec->spl_nleft > 0 && splitvec->spl_nright > 0)
744 : 0 : break; /* success */
745 : 0 : commonbits++;
746 : : }
747 : :
748 [ # # ]: 0 : if (commonbits >= maxbits)
749 : : {
750 : : /* Failed ... do a 50-50 split. */
751 : 0 : splitvec->spl_nleft = splitvec->spl_nright = 0;
752 : :
753 [ # # ]: 0 : for (i = FirstOffsetNumber; i <= maxoff / 2; i = OffsetNumberNext(i))
754 : : {
755 : 0 : left[splitvec->spl_nleft++] = i;
756 : : }
757 [ # # ]: 0 : for (; i <= maxoff; i = OffsetNumberNext(i))
758 : : {
759 : 0 : right[splitvec->spl_nright++] = i;
760 : : }
761 : : }
762 : : }
763 : :
764 : : /*
765 : : * Compute the union value for each side from scratch. In most cases we
766 : : * could approximate the union values with what we already know, but this
767 : : * ensures that each side has minbits and commonbits set as high as
768 : : * possible.
769 : : */
770 : 0 : calc_inet_union_params_indexed(ent, left, splitvec->spl_nleft,
771 : : &minfamily, &maxfamily,
772 : : &minbits, &commonbits);
773 [ # # ]: 0 : if (minfamily != maxfamily)
774 : 0 : minfamily = 0;
775 : 0 : tmp = DatumGetInetKeyP(ent[left[0]].key);
776 : 0 : addr = gk_ip_addr(tmp);
777 : 0 : left_union = build_inet_union_key(minfamily, minbits, commonbits, addr);
778 : 0 : splitvec->spl_ldatum = PointerGetDatum(left_union);
779 : :
780 : 0 : calc_inet_union_params_indexed(ent, right, splitvec->spl_nright,
781 : : &minfamily, &maxfamily,
782 : : &minbits, &commonbits);
783 [ # # ]: 0 : if (minfamily != maxfamily)
784 : 0 : minfamily = 0;
785 : 0 : tmp = DatumGetInetKeyP(ent[right[0]].key);
786 : 0 : addr = gk_ip_addr(tmp);
787 : 0 : right_union = build_inet_union_key(minfamily, minbits, commonbits, addr);
788 : 0 : splitvec->spl_rdatum = PointerGetDatum(right_union);
789 : :
790 : 0 : PG_RETURN_POINTER(splitvec);
791 : : }
792 : :
793 : : /*
794 : : * The GiST equality function
795 : : */
796 : : Datum
3659 tgl@sss.pgh.pa.us 797 :CBC 404 : inet_gist_same(PG_FUNCTION_ARGS)
798 : : {
799 : 404 : GistInetKey *left = DatumGetInetKeyP(PG_GETARG_DATUM(0));
800 : 404 : GistInetKey *right = DatumGetInetKeyP(PG_GETARG_DATUM(1));
801 : 404 : bool *result = (bool *) PG_GETARG_POINTER(2);
802 : :
803 : 1212 : *result = (gk_ip_family(left) == gk_ip_family(right) &&
804 [ + - ]: 404 : gk_ip_minbits(left) == gk_ip_minbits(right) &&
805 [ + - + - ]: 1212 : gk_ip_commonbits(left) == gk_ip_commonbits(right) &&
806 [ + - ]: 404 : memcmp(gk_ip_addr(left), gk_ip_addr(right),
807 [ - + ]: 404 : gk_ip_addrsize(left)) == 0);
808 : :
809 : 404 : PG_RETURN_POINTER(result);
810 : : }
|