Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * network_gist.c
4 : * GiST support for network types.
5 : *
6 : * The key thing to understand about this code is the definition of the
7 : * "union" of a set of INET/CIDR values. It works like this:
8 : * 1. If the values are not all of the same IP address family, the "union"
9 : * is a dummy value with family number zero, minbits zero, commonbits zero,
10 : * address all zeroes. Otherwise:
11 : * 2. The union has the common IP address family number.
12 : * 3. The union's minbits value is the smallest netmask length ("ip_bits")
13 : * of all the input values.
14 : * 4. Let C be the number of leading address bits that are in common among
15 : * all the input values (C ranges from 0 to ip_maxbits for the family).
16 : * 5. The union's commonbits value is C.
17 : * 6. The union's address value is the same as the common prefix for its
18 : * first C bits, and is zeroes to the right of that. The physical width
19 : * of the address value is ip_maxbits for the address family.
20 : *
21 : * In a leaf index entry (representing a single key), commonbits is equal to
22 : * ip_maxbits for the address family, minbits is the same as the represented
23 : * value's ip_bits, and the address is equal to the represented address.
24 : * Although it may appear that we're wasting a byte by storing the union
25 : * format and not just the represented INET/CIDR value in leaf keys, the
26 : * extra byte is actually "free" because of alignment considerations.
27 : *
28 : * Note that this design tracks minbits and commonbits independently; in any
29 : * given union value, either might be smaller than the other. This does not
30 : * help us much when descending the tree, because of the way inet comparison
31 : * is defined: at non-leaf nodes we can't compare more than minbits bits
32 : * even if we know them. However, it greatly improves the quality of split
33 : * decisions. Preliminary testing suggests that searches are as much as
34 : * twice as fast as for a simpler design in which a single field doubles as
35 : * the common prefix length and the minimum ip_bits value.
36 : *
37 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
38 : * Portions Copyright (c) 1994, Regents of the University of California
39 : *
40 : *
41 : * IDENTIFICATION
42 : * src/backend/utils/adt/network_gist.c
43 : *
44 : *-------------------------------------------------------------------------
45 : */
46 : #include "postgres.h"
47 :
48 : #include <sys/socket.h>
49 :
50 : #include "access/gist.h"
51 : #include "access/stratnum.h"
52 : #include "utils/builtins.h"
53 : #include "utils/inet.h"
54 : #include "varatt.h"
55 :
56 : /*
57 : * Operator strategy numbers used in the GiST inet_ops opclass
58 : */
59 : #define INETSTRAT_OVERLAPS RTOverlapStrategyNumber
60 : #define INETSTRAT_EQ RTEqualStrategyNumber
61 : #define INETSTRAT_NE RTNotEqualStrategyNumber
62 : #define INETSTRAT_LT RTLessStrategyNumber
63 : #define INETSTRAT_LE RTLessEqualStrategyNumber
64 : #define INETSTRAT_GT RTGreaterStrategyNumber
65 : #define INETSTRAT_GE RTGreaterEqualStrategyNumber
66 : #define INETSTRAT_SUB RTSubStrategyNumber
67 : #define INETSTRAT_SUBEQ RTSubEqualStrategyNumber
68 : #define INETSTRAT_SUP RTSuperStrategyNumber
69 : #define INETSTRAT_SUPEQ RTSuperEqualStrategyNumber
70 :
71 :
72 : /*
73 : * Representation of a GiST INET/CIDR index key. This is not identical to
74 : * INET/CIDR because we need to keep track of the length of the common address
75 : * prefix as well as the minimum netmask length. However, as long as it
76 : * follows varlena header rules, the core GiST code won't know the difference.
77 : * For simplicity we always use 1-byte-header varlena format.
78 : */
79 : typedef struct GistInetKey
80 : {
81 : uint8 va_header; /* varlena header --- don't touch directly */
82 : unsigned char family; /* PGSQL_AF_INET, PGSQL_AF_INET6, or zero */
83 : unsigned char minbits; /* minimum number of bits in netmask */
84 : unsigned char commonbits; /* number of common prefix bits in addresses */
85 : unsigned char ipaddr[16]; /* up to 128 bits of common address */
86 : } GistInetKey;
87 :
88 : #define DatumGetInetKeyP(X) ((GistInetKey *) DatumGetPointer(X))
89 : #define InetKeyPGetDatum(X) PointerGetDatum(X)
90 :
91 : /*
92 : * Access macros; not really exciting, but we use these for notational
93 : * consistency with access to INET/CIDR values. Note that family-zero values
94 : * are stored with 4 bytes of address, not 16.
95 : */
96 : #define gk_ip_family(gkptr) ((gkptr)->family)
97 : #define gk_ip_minbits(gkptr) ((gkptr)->minbits)
98 : #define gk_ip_commonbits(gkptr) ((gkptr)->commonbits)
99 : #define gk_ip_addr(gkptr) ((gkptr)->ipaddr)
100 : #define ip_family_maxbits(fam) ((fam) == PGSQL_AF_INET6 ? 128 : 32)
101 :
102 : /* These require that the family field has been set: */
103 : #define gk_ip_addrsize(gkptr) \
104 : (gk_ip_family(gkptr) == PGSQL_AF_INET6 ? 16 : 4)
105 : #define gk_ip_maxbits(gkptr) \
106 : ip_family_maxbits(gk_ip_family(gkptr))
107 : #define SET_GK_VARSIZE(dst) \
108 : SET_VARSIZE_SHORT(dst, offsetof(GistInetKey, ipaddr) + gk_ip_addrsize(dst))
109 :
110 :
111 : /*
112 : * The GiST query consistency check
113 : */
114 : Datum
3288 tgl 115 GIC 612 : inet_gist_consistent(PG_FUNCTION_ARGS)
3288 tgl 116 ECB : {
3288 tgl 117 GIC 612 : GISTENTRY *ent = (GISTENTRY *) PG_GETARG_POINTER(0);
3288 tgl 118 CBC 612 : inet *query = PG_GETARG_INET_PP(1);
119 612 : StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
3288 tgl 120 ECB :
121 : /* Oid subtype = PG_GETARG_OID(3); */
3288 tgl 122 GIC 612 : bool *recheck = (bool *) PG_GETARG_POINTER(4);
3288 tgl 123 CBC 612 : GistInetKey *key = DatumGetInetKeyP(ent->key);
3288 tgl 124 ECB : int minbits,
125 : order;
126 :
127 : /* All operators served by this function are exact. */
3288 tgl 128 GIC 612 : *recheck = false;
3288 tgl 129 ECB :
130 : /*
131 : * Check 0: different families
132 : *
133 : * If key represents multiple address families, its children could match
134 : * anything. This can only happen on an inner index page.
135 : */
3288 tgl 136 GIC 612 : if (gk_ip_family(key) == 0)
3288 tgl 137 ECB : {
3288 tgl 138 UIC 0 : Assert(!GIST_LEAF(ent));
3288 tgl 139 UBC 0 : PG_RETURN_BOOL(true);
3288 tgl 140 EUB : }
141 :
142 : /*
143 : * Check 1: different families
144 : *
145 : * Matching families do not help any of the strategies.
146 : */
3288 tgl 147 GIC 612 : if (gk_ip_family(key) != ip_family(query))
3288 tgl 148 ECB : {
3288 tgl 149 GIC 108 : switch (strategy)
3288 tgl 150 ECB : {
3288 tgl 151 GIC 18 : case INETSTRAT_LT:
3288 tgl 152 ECB : case INETSTRAT_LE:
3288 tgl 153 GIC 18 : if (gk_ip_family(key) < ip_family(query))
3288 tgl 154 LBC 0 : PG_RETURN_BOOL(true);
3288 tgl 155 GBC 18 : break;
3288 tgl 156 ECB :
3288 tgl 157 GIC 18 : case INETSTRAT_GE:
3288 tgl 158 ECB : case INETSTRAT_GT:
3288 tgl 159 GIC 18 : if (gk_ip_family(key) > ip_family(query))
3288 tgl 160 CBC 18 : PG_RETURN_BOOL(true);
3288 tgl 161 LBC 0 : break;
3288 tgl 162 EUB :
3288 tgl 163 GIC 9 : case INETSTRAT_NE:
3288 tgl 164 CBC 9 : PG_RETURN_BOOL(true);
3288 tgl 165 ECB : }
166 : /* For all other cases, we can be sure there is no match */
3288 tgl 167 GIC 81 : PG_RETURN_BOOL(false);
3288 tgl 168 ECB : }
169 :
170 : /*
171 : * Check 2: network bit count
172 : *
173 : * Network bit count (ip_bits) helps to check leaves for sub network and
174 : * sup network operators. At non-leaf nodes, we know every child value
175 : * has ip_bits >= gk_ip_minbits(key), so we can avoid descending in some
176 : * cases too.
177 : */
3288 tgl 178 GIC 504 : switch (strategy)
3288 tgl 179 ECB : {
3288 tgl 180 GIC 84 : case INETSTRAT_SUB:
3288 tgl 181 CBC 84 : if (GIST_LEAF(ent) && gk_ip_minbits(key) <= ip_bits(query))
182 60 : PG_RETURN_BOOL(false);
183 24 : break;
3288 tgl 184 ECB :
3288 tgl 185 GIC 42 : case INETSTRAT_SUBEQ:
3288 tgl 186 CBC 42 : if (GIST_LEAF(ent) && gk_ip_minbits(key) < ip_bits(query))
187 18 : PG_RETURN_BOOL(false);
188 24 : break;
3288 tgl 189 ECB :
3288 tgl 190 GIC 84 : case INETSTRAT_SUPEQ:
3288 tgl 191 ECB : case INETSTRAT_EQ:
3288 tgl 192 GIC 84 : if (gk_ip_minbits(key) > ip_bits(query))
3288 tgl 193 CBC 24 : PG_RETURN_BOOL(false);
194 60 : break;
3288 tgl 195 ECB :
3288 tgl 196 GIC 42 : case INETSTRAT_SUP:
3288 tgl 197 CBC 42 : if (gk_ip_minbits(key) >= ip_bits(query))
198 24 : PG_RETURN_BOOL(false);
199 18 : break;
3288 tgl 200 ECB : }
201 :
202 : /*
203 : * Check 3: common network bits
204 : *
205 : * Compare available common prefix bits to the query, but not beyond
206 : * either the query's netmask or the minimum netmask among the represented
207 : * values. If these bits don't match the query, we have our answer (and
208 : * may or may not need to descend, depending on the operator). If they do
209 : * match, and we are not at a leaf, we descend in all cases.
210 : *
211 : * Note this is the final check for operators that only consider the
212 : * network part of the address.
213 : */
3288 tgl 214 GIC 378 : minbits = Min(gk_ip_commonbits(key), gk_ip_minbits(key));
3288 tgl 215 CBC 378 : minbits = Min(minbits, ip_bits(query));
3288 tgl 216 ECB :
3288 tgl 217 GIC 378 : order = bitncmp(gk_ip_addr(key), ip_addr(query), minbits);
3288 tgl 218 ECB :
3288 tgl 219 GIC 378 : switch (strategy)
3288 tgl 220 ECB : {
3288 tgl 221 GIC 138 : case INETSTRAT_SUB:
3288 tgl 222 ECB : case INETSTRAT_SUBEQ:
223 : case INETSTRAT_OVERLAPS:
224 : case INETSTRAT_SUPEQ:
225 : case INETSTRAT_SUP:
3288 tgl 226 GIC 138 : PG_RETURN_BOOL(order == 0);
3288 tgl 227 ECB :
3288 tgl 228 GIC 84 : case INETSTRAT_LT:
3288 tgl 229 ECB : case INETSTRAT_LE:
3288 tgl 230 GIC 84 : if (order > 0)
3288 tgl 231 LBC 0 : PG_RETURN_BOOL(false);
3288 tgl 232 GBC 84 : if (order < 0 || !GIST_LEAF(ent))
3288 tgl 233 CBC 48 : PG_RETURN_BOOL(true);
234 36 : break;
3288 tgl 235 ECB :
3288 tgl 236 GIC 30 : case INETSTRAT_EQ:
3288 tgl 237 CBC 30 : if (order != 0)
238 21 : PG_RETURN_BOOL(false);
239 9 : if (!GIST_LEAF(ent))
3288 tgl 240 LBC 0 : PG_RETURN_BOOL(true);
3288 tgl 241 GBC 9 : break;
3288 tgl 242 ECB :
3288 tgl 243 GIC 84 : case INETSTRAT_GE:
3288 tgl 244 ECB : case INETSTRAT_GT:
3288 tgl 245 GIC 84 : if (order < 0)
3288 tgl 246 CBC 48 : PG_RETURN_BOOL(false);
247 36 : if (order > 0 || !GIST_LEAF(ent))
3288 tgl 248 LBC 0 : PG_RETURN_BOOL(true);
3288 tgl 249 GBC 36 : break;
3288 tgl 250 ECB :
3288 tgl 251 GIC 42 : case INETSTRAT_NE:
3288 tgl 252 CBC 42 : if (order != 0 || !GIST_LEAF(ent))
253 24 : PG_RETURN_BOOL(true);
254 18 : break;
3288 tgl 255 ECB : }
256 :
257 : /*
258 : * Remaining checks are only for leaves and basic comparison strategies.
259 : * See network_cmp_internal() in network.c for the implementation we need
260 : * to match. Note that in a leaf key, commonbits should equal the address
261 : * length, so we compared the whole network parts above.
262 : */
3288 tgl 263 GIC 99 : Assert(GIST_LEAF(ent));
3288 tgl 264 ECB :
265 : /*
266 : * Check 4: network bit count
267 : *
268 : * Next step is to compare netmask widths.
269 : */
3288 tgl 270 GIC 99 : switch (strategy)
3288 tgl 271 ECB : {
3288 tgl 272 GIC 36 : case INETSTRAT_LT:
3288 tgl 273 ECB : case INETSTRAT_LE:
3288 tgl 274 GIC 36 : if (gk_ip_minbits(key) < ip_bits(query))
3288 tgl 275 LBC 0 : PG_RETURN_BOOL(true);
3288 tgl 276 GBC 36 : if (gk_ip_minbits(key) > ip_bits(query))
3288 tgl 277 CBC 18 : PG_RETURN_BOOL(false);
278 18 : break;
3288 tgl 279 ECB :
3288 tgl 280 GIC 9 : case INETSTRAT_EQ:
3288 tgl 281 CBC 9 : if (gk_ip_minbits(key) != ip_bits(query))
3288 tgl 282 LBC 0 : PG_RETURN_BOOL(false);
3288 tgl 283 GBC 9 : break;
3288 tgl 284 ECB :
3288 tgl 285 GIC 36 : case INETSTRAT_GE:
3288 tgl 286 ECB : case INETSTRAT_GT:
3288 tgl 287 GIC 36 : if (gk_ip_minbits(key) > ip_bits(query))
3288 tgl 288 CBC 18 : PG_RETURN_BOOL(true);
289 18 : if (gk_ip_minbits(key) < ip_bits(query))
3288 tgl 290 LBC 0 : PG_RETURN_BOOL(false);
3288 tgl 291 GBC 18 : break;
3288 tgl 292 ECB :
3288 tgl 293 GIC 18 : case INETSTRAT_NE:
3288 tgl 294 CBC 18 : if (gk_ip_minbits(key) != ip_bits(query))
295 9 : PG_RETURN_BOOL(true);
296 9 : break;
3288 tgl 297 ECB : }
298 :
299 : /*
300 : * Check 5: whole address
301 : *
302 : * Netmask bit counts are the same, so check all the address bits.
303 : */
3288 tgl 304 GIC 54 : order = bitncmp(gk_ip_addr(key), ip_addr(query), gk_ip_maxbits(key));
3288 tgl 305 ECB :
3288 tgl 306 GIC 54 : switch (strategy)
3288 tgl 307 ECB : {
3288 tgl 308 GIC 9 : case INETSTRAT_LT:
3288 tgl 309 CBC 9 : PG_RETURN_BOOL(order < 0);
3288 tgl 310 ECB :
3288 tgl 311 GIC 9 : case INETSTRAT_LE:
3288 tgl 312 CBC 9 : PG_RETURN_BOOL(order <= 0);
3288 tgl 313 ECB :
3288 tgl 314 GIC 9 : case INETSTRAT_EQ:
3288 tgl 315 CBC 9 : PG_RETURN_BOOL(order == 0);
3288 tgl 316 ECB :
3288 tgl 317 GIC 9 : case INETSTRAT_GE:
3288 tgl 318 CBC 9 : PG_RETURN_BOOL(order >= 0);
3288 tgl 319 ECB :
3288 tgl 320 GIC 9 : case INETSTRAT_GT:
3288 tgl 321 CBC 9 : PG_RETURN_BOOL(order > 0);
3288 tgl 322 ECB :
3288 tgl 323 GIC 9 : case INETSTRAT_NE:
3288 tgl 324 CBC 9 : PG_RETURN_BOOL(order != 0);
3288 tgl 325 ECB : }
326 :
3288 tgl 327 UIC 0 : elog(ERROR, "unknown strategy for inet GiST");
3288 tgl 328 EUB : PG_RETURN_BOOL(false); /* keep compiler quiet */
329 : }
330 :
331 : /*
332 : * Calculate parameters of the union of some GistInetKeys.
333 : *
334 : * Examine the keys in elements m..n inclusive of the GISTENTRY array,
335 : * and compute these output parameters:
336 : * *minfamily_p = minimum IP address family number
337 : * *maxfamily_p = maximum IP address family number
338 : * *minbits_p = minimum netmask width
339 : * *commonbits_p = number of leading bits in common among the addresses
340 : *
341 : * minbits and commonbits are forced to zero if there's more than one
342 : * address family.
343 : */
344 : static void
3288 tgl 345 GIC 415 : calc_inet_union_params(GISTENTRY *ent,
3288 tgl 346 ECB : int m, int n,
347 : int *minfamily_p,
348 : int *maxfamily_p,
349 : int *minbits_p,
350 : int *commonbits_p)
351 : {
352 : int minfamily,
353 : maxfamily,
354 : minbits,
355 : commonbits;
356 : unsigned char *addr;
357 : GistInetKey *tmp;
358 : int i;
359 :
360 : /* Must be at least one key. */
3288 tgl 361 GIC 415 : Assert(m <= n);
3288 tgl 362 ECB :
363 : /* Initialize variables using the first key. */
3288 tgl 364 GIC 415 : tmp = DatumGetInetKeyP(ent[m].key);
3288 tgl 365 CBC 415 : minfamily = maxfamily = gk_ip_family(tmp);
366 415 : minbits = gk_ip_minbits(tmp);
367 415 : commonbits = gk_ip_commonbits(tmp);
368 415 : addr = gk_ip_addr(tmp);
3288 tgl 369 ECB :
370 : /* Scan remaining keys. */
3288 tgl 371 GIC 1620 : for (i = m + 1; i <= n; i++)
3288 tgl 372 ECB : {
3288 tgl 373 GIC 1205 : tmp = DatumGetInetKeyP(ent[i].key);
3288 tgl 374 ECB :
375 : /* Determine range of family numbers */
3288 tgl 376 GIC 1205 : if (minfamily > gk_ip_family(tmp))
3288 tgl 377 LBC 0 : minfamily = gk_ip_family(tmp);
3288 tgl 378 GBC 1205 : if (maxfamily < gk_ip_family(tmp))
3288 tgl 379 LBC 0 : maxfamily = gk_ip_family(tmp);
3288 tgl 380 EUB :
381 : /* Find minimum minbits */
3288 tgl 382 GIC 1205 : if (minbits > gk_ip_minbits(tmp))
3288 tgl 383 LBC 0 : minbits = gk_ip_minbits(tmp);
3288 tgl 384 EUB :
385 : /* Find minimum number of bits in common */
3288 tgl 386 GIC 1205 : if (commonbits > gk_ip_commonbits(tmp))
3288 tgl 387 LBC 0 : commonbits = gk_ip_commonbits(tmp);
3288 tgl 388 GBC 1205 : if (commonbits > 0)
3288 tgl 389 CBC 823 : commonbits = bitncommon(addr, gk_ip_addr(tmp), commonbits);
3288 tgl 390 ECB : }
391 :
392 : /* Force minbits/commonbits to zero if more than one family. */
3288 tgl 393 GIC 415 : if (minfamily != maxfamily)
3288 tgl 394 LBC 0 : minbits = commonbits = 0;
3288 tgl 395 EUB :
3288 tgl 396 GIC 415 : *minfamily_p = minfamily;
3288 tgl 397 CBC 415 : *maxfamily_p = maxfamily;
398 415 : *minbits_p = minbits;
399 415 : *commonbits_p = commonbits;
400 415 : }
3288 tgl 401 ECB :
402 : /*
403 : * Same as above, but the GISTENTRY elements to examine are those with
404 : * indices listed in the offsets[] array.
405 : */
406 : static void
3288 tgl 407 UIC 0 : calc_inet_union_params_indexed(GISTENTRY *ent,
3288 tgl 408 EUB : OffsetNumber *offsets, int noffsets,
409 : int *minfamily_p,
410 : int *maxfamily_p,
411 : int *minbits_p,
412 : int *commonbits_p)
413 : {
414 : int minfamily,
415 : maxfamily,
416 : minbits,
417 : commonbits;
418 : unsigned char *addr;
419 : GistInetKey *tmp;
420 : int i;
421 :
422 : /* Must be at least one key. */
3288 tgl 423 UIC 0 : Assert(noffsets > 0);
3288 tgl 424 EUB :
425 : /* Initialize variables using the first key. */
3288 tgl 426 UIC 0 : tmp = DatumGetInetKeyP(ent[offsets[0]].key);
3288 tgl 427 UBC 0 : minfamily = maxfamily = gk_ip_family(tmp);
428 0 : minbits = gk_ip_minbits(tmp);
429 0 : commonbits = gk_ip_commonbits(tmp);
430 0 : addr = gk_ip_addr(tmp);
3288 tgl 431 EUB :
432 : /* Scan remaining keys. */
3288 tgl 433 UIC 0 : for (i = 1; i < noffsets; i++)
3288 tgl 434 EUB : {
3288 tgl 435 UIC 0 : tmp = DatumGetInetKeyP(ent[offsets[i]].key);
3288 tgl 436 EUB :
437 : /* Determine range of family numbers */
3288 tgl 438 UIC 0 : if (minfamily > gk_ip_family(tmp))
3288 tgl 439 UBC 0 : minfamily = gk_ip_family(tmp);
440 0 : if (maxfamily < gk_ip_family(tmp))
441 0 : maxfamily = gk_ip_family(tmp);
3288 tgl 442 EUB :
443 : /* Find minimum minbits */
3288 tgl 444 UIC 0 : if (minbits > gk_ip_minbits(tmp))
3288 tgl 445 UBC 0 : minbits = gk_ip_minbits(tmp);
3288 tgl 446 EUB :
447 : /* Find minimum number of bits in common */
3288 tgl 448 UIC 0 : if (commonbits > gk_ip_commonbits(tmp))
3288 tgl 449 UBC 0 : commonbits = gk_ip_commonbits(tmp);
450 0 : if (commonbits > 0)
451 0 : commonbits = bitncommon(addr, gk_ip_addr(tmp), commonbits);
3288 tgl 452 EUB : }
453 :
454 : /* Force minbits/commonbits to zero if more than one family. */
3288 tgl 455 UIC 0 : if (minfamily != maxfamily)
3288 tgl 456 UBC 0 : minbits = commonbits = 0;
3288 tgl 457 EUB :
3288 tgl 458 UIC 0 : *minfamily_p = minfamily;
3288 tgl 459 UBC 0 : *maxfamily_p = maxfamily;
460 0 : *minbits_p = minbits;
461 0 : *commonbits_p = commonbits;
462 0 : }
3288 tgl 463 EUB :
464 : /*
465 : * Construct a GistInetKey representing a union value.
466 : *
467 : * Inputs are the family/minbits/commonbits values to use, plus a pointer to
468 : * the address field of one of the union inputs. (Since we're going to copy
469 : * just the bits-in-common, it doesn't matter which one.)
470 : */
471 : static GistInetKey *
3288 tgl 472 GIC 415 : build_inet_union_key(int family, int minbits, int commonbits,
3288 tgl 473 ECB : unsigned char *addr)
474 : {
475 : GistInetKey *result;
476 :
477 : /* Make sure any unused bits are zeroed. */
3288 tgl 478 GIC 415 : result = (GistInetKey *) palloc0(sizeof(GistInetKey));
3288 tgl 479 ECB :
3288 tgl 480 GIC 415 : gk_ip_family(result) = family;
3288 tgl 481 CBC 415 : gk_ip_minbits(result) = minbits;
482 415 : gk_ip_commonbits(result) = commonbits;
3288 tgl 483 ECB :
484 : /* Clone appropriate bytes of the address. */
3288 tgl 485 GIC 415 : if (commonbits > 0)
3288 tgl 486 CBC 244 : memcpy(gk_ip_addr(result), addr, (commonbits + 7) / 8);
3288 tgl 487 ECB :
488 : /* Clean any unwanted bits in the last partial byte. */
3288 tgl 489 GIC 415 : if (commonbits % 8 != 0)
3288 tgl 490 CBC 244 : gk_ip_addr(result)[commonbits / 8] &= ~(0xFF >> (commonbits % 8));
3288 tgl 491 ECB :
492 : /* Set varlena header correctly. */
3288 tgl 493 GIC 415 : SET_GK_VARSIZE(result);
3288 tgl 494 ECB :
3288 tgl 495 GIC 415 : return result;
3288 tgl 496 ECB : }
497 :
498 :
499 : /*
500 : * The GiST union function
501 : *
502 : * See comments at head of file for the definition of the union.
503 : */
504 : Datum
3288 tgl 505 GIC 415 : inet_gist_union(PG_FUNCTION_ARGS)
3288 tgl 506 ECB : {
3288 tgl 507 GIC 415 : GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
3288 tgl 508 CBC 415 : GISTENTRY *ent = entryvec->vector;
3288 tgl 509 ECB : int minfamily,
510 : maxfamily,
511 : minbits,
512 : commonbits;
513 : unsigned char *addr;
514 : GistInetKey *tmp,
515 : *result;
516 :
517 : /* Determine parameters of the union. */
3288 tgl 518 GIC 415 : calc_inet_union_params(ent, 0, entryvec->n - 1,
3288 tgl 519 ECB : &minfamily, &maxfamily,
520 : &minbits, &commonbits);
521 :
522 : /* If more than one family, emit family number zero. */
3288 tgl 523 GIC 415 : if (minfamily != maxfamily)
3288 tgl 524 LBC 0 : minfamily = 0;
3288 tgl 525 EUB :
526 : /* Initialize address using the first key. */
3288 tgl 527 GIC 415 : tmp = DatumGetInetKeyP(ent[0].key);
3288 tgl 528 CBC 415 : addr = gk_ip_addr(tmp);
3288 tgl 529 ECB :
530 : /* Construct the union value. */
3288 tgl 531 GIC 415 : result = build_inet_union_key(minfamily, minbits, commonbits, addr);
3288 tgl 532 ECB :
3288 tgl 533 GIC 415 : PG_RETURN_POINTER(result);
3288 tgl 534 ECB : }
535 :
536 : /*
537 : * The GiST compress function
538 : *
539 : * Convert an inet value to GistInetKey.
540 : */
541 : Datum
3288 tgl 542 GIC 662 : inet_gist_compress(PG_FUNCTION_ARGS)
3288 tgl 543 ECB : {
3288 tgl 544 GIC 662 : GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
3288 tgl 545 ECB : GISTENTRY *retval;
546 :
3288 tgl 547 GIC 662 : if (entry->leafkey)
3288 tgl 548 ECB : {
3288 tgl 549 GIC 651 : retval = palloc(sizeof(GISTENTRY));
3288 tgl 550 CBC 651 : if (DatumGetPointer(entry->key) != NULL)
3288 tgl 551 ECB : {
3288 tgl 552 GIC 651 : inet *in = DatumGetInetPP(entry->key);
3288 tgl 553 ECB : GistInetKey *r;
554 :
3288 tgl 555 GIC 651 : r = (GistInetKey *) palloc0(sizeof(GistInetKey));
3288 tgl 556 ECB :
3288 tgl 557 GIC 651 : gk_ip_family(r) = ip_family(in);
3288 tgl 558 CBC 651 : gk_ip_minbits(r) = ip_bits(in);
559 651 : gk_ip_commonbits(r) = gk_ip_maxbits(r);
560 651 : memcpy(gk_ip_addr(r), ip_addr(in), gk_ip_addrsize(r));
561 651 : SET_GK_VARSIZE(r);
3288 tgl 562 ECB :
3288 tgl 563 GIC 651 : gistentryinit(*retval, PointerGetDatum(r),
3288 tgl 564 ECB : entry->rel, entry->page,
565 : entry->offset, false);
566 : }
567 : else
568 : {
3288 tgl 569 UIC 0 : gistentryinit(*retval, (Datum) 0,
3288 tgl 570 EUB : entry->rel, entry->page,
571 : entry->offset, false);
572 : }
573 : }
574 : else
3288 tgl 575 GIC 11 : retval = entry;
3288 tgl 576 CBC 662 : PG_RETURN_POINTER(retval);
3288 tgl 577 ECB : }
578 :
579 : /*
580 : * We do not need a decompress function, because the other GiST inet
581 : * support functions work with the GistInetKey representation.
582 : */
583 :
584 : /*
585 : * The GiST fetch function
586 : *
587 : * Reconstruct the original inet datum from a GistInetKey.
588 : */
589 : Datum
2934 heikki.linnakangas 590 GIC 10 : inet_gist_fetch(PG_FUNCTION_ARGS)
2934 heikki.linnakangas 591 ECB : {
2878 bruce 592 GIC 10 : GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
2878 bruce 593 CBC 10 : GistInetKey *key = DatumGetInetKeyP(entry->key);
2878 bruce 594 ECB : GISTENTRY *retval;
595 : inet *dst;
596 :
2934 heikki.linnakangas 597 GIC 10 : dst = (inet *) palloc0(sizeof(inet));
2934 heikki.linnakangas 598 ECB :
2934 heikki.linnakangas 599 GIC 10 : ip_family(dst) = gk_ip_family(key);
2934 heikki.linnakangas 600 CBC 10 : ip_bits(dst) = gk_ip_minbits(key);
601 10 : memcpy(ip_addr(dst), gk_ip_addr(key), ip_addrsize(dst));
602 10 : SET_INET_VARSIZE(dst);
2934 heikki.linnakangas 603 ECB :
2934 heikki.linnakangas 604 GIC 10 : retval = palloc(sizeof(GISTENTRY));
2934 heikki.linnakangas 605 CBC 10 : gistentryinit(*retval, InetPGetDatum(dst), entry->rel, entry->page,
2062 peter_e 606 ECB : entry->offset, false);
607 :
2934 heikki.linnakangas 608 GIC 10 : PG_RETURN_POINTER(retval);
2934 heikki.linnakangas 609 ECB : }
610 :
611 : /*
612 : * The GiST page split penalty function
613 : *
614 : * Charge a large penalty if address family doesn't match, or a somewhat
615 : * smaller one if the new value would degrade the union's minbits
616 : * (minimum netmask width). Otherwise, penalty is inverse of the
617 : * new number of common address bits.
618 : */
619 : Datum
3288 tgl 620 GIC 723 : inet_gist_penalty(PG_FUNCTION_ARGS)
3288 tgl 621 ECB : {
3288 tgl 622 GIC 723 : GISTENTRY *origent = (GISTENTRY *) PG_GETARG_POINTER(0);
3288 tgl 623 CBC 723 : GISTENTRY *newent = (GISTENTRY *) PG_GETARG_POINTER(1);
624 723 : float *penalty = (float *) PG_GETARG_POINTER(2);
625 723 : GistInetKey *orig = DatumGetInetKeyP(origent->key),
626 723 : *new = DatumGetInetKeyP(newent->key);
3288 tgl 627 ECB : int commonbits;
628 :
3288 tgl 629 GIC 723 : if (gk_ip_family(orig) == gk_ip_family(new))
3288 tgl 630 ECB : {
3288 tgl 631 GIC 723 : if (gk_ip_minbits(orig) <= gk_ip_minbits(new))
3288 tgl 632 ECB : {
3288 tgl 633 GIC 723 : commonbits = bitncommon(gk_ip_addr(orig), gk_ip_addr(new),
3288 tgl 634 CBC 723 : Min(gk_ip_commonbits(orig),
3288 tgl 635 ECB : gk_ip_commonbits(new)));
3288 tgl 636 GIC 723 : if (commonbits > 0)
3288 tgl 637 CBC 306 : *penalty = 1.0f / commonbits;
3288 tgl 638 ECB : else
3288 tgl 639 GIC 417 : *penalty = 2;
3288 tgl 640 ECB : }
641 : else
3288 tgl 642 UIC 0 : *penalty = 3;
3288 tgl 643 EUB : }
644 : else
3288 tgl 645 UIC 0 : *penalty = 4;
3288 tgl 646 EUB :
3288 tgl 647 GIC 723 : PG_RETURN_POINTER(penalty);
3288 tgl 648 ECB : }
649 :
650 : /*
651 : * The GiST PickSplit method
652 : *
653 : * There are two ways to split. First one is to split by address families,
654 : * if there are multiple families appearing in the input.
655 : *
656 : * The second and more common way is to split by addresses. To achieve this,
657 : * determine the number of leading bits shared by all the keys, then split on
658 : * the next bit. (We don't currently consider the netmask widths while doing
659 : * this; should we?) If we fail to get a nontrivial split that way, split
660 : * 50-50.
661 : */
662 : Datum
3288 tgl 663 UIC 0 : inet_gist_picksplit(PG_FUNCTION_ARGS)
3288 tgl 664 EUB : {
3288 tgl 665 UIC 0 : GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
3288 tgl 666 UBC 0 : GIST_SPLITVEC *splitvec = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
667 0 : GISTENTRY *ent = entryvec->vector;
3288 tgl 668 EUB : int minfamily,
669 : maxfamily,
670 : minbits,
671 : commonbits;
672 : unsigned char *addr;
673 : GistInetKey *tmp,
674 : *left_union,
675 : *right_union;
676 : int maxoff,
677 : nbytes;
678 : OffsetNumber i,
679 : *left,
680 : *right;
681 :
3288 tgl 682 UIC 0 : maxoff = entryvec->n - 1;
3288 tgl 683 UBC 0 : nbytes = (maxoff + 1) * sizeof(OffsetNumber);
3288 tgl 684 EUB :
3288 tgl 685 UIC 0 : left = (OffsetNumber *) palloc(nbytes);
3288 tgl 686 UBC 0 : right = (OffsetNumber *) palloc(nbytes);
3288 tgl 687 EUB :
3288 tgl 688 UIC 0 : splitvec->spl_left = left;
3288 tgl 689 UBC 0 : splitvec->spl_right = right;
3288 tgl 690 EUB :
3288 tgl 691 UIC 0 : splitvec->spl_nleft = 0;
3288 tgl 692 UBC 0 : splitvec->spl_nright = 0;
3288 tgl 693 EUB :
694 : /* Determine parameters of the union of all the inputs. */
3288 tgl 695 UIC 0 : calc_inet_union_params(ent, FirstOffsetNumber, maxoff,
3288 tgl 696 EUB : &minfamily, &maxfamily,
697 : &minbits, &commonbits);
698 :
3288 tgl 699 UIC 0 : if (minfamily != maxfamily)
3288 tgl 700 EUB : {
701 : /* Multiple families, so split by family. */
3288 tgl 702 UIC 0 : for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
3288 tgl 703 EUB : {
704 : /*
705 : * If there's more than 2 families, all but maxfamily go into the
706 : * left union. This could only happen if the inputs include some
707 : * IPv4, some IPv6, and some already-multiple-family unions.
708 : */
3288 tgl 709 UIC 0 : tmp = DatumGetInetKeyP(ent[i].key);
3288 tgl 710 UBC 0 : if (gk_ip_family(tmp) != maxfamily)
711 0 : left[splitvec->spl_nleft++] = i;
3288 tgl 712 EUB : else
3288 tgl 713 UIC 0 : right[splitvec->spl_nright++] = i;
3288 tgl 714 EUB : }
715 : }
716 : else
717 : {
718 : /*
719 : * Split on the next bit after the common bits. If that yields a
720 : * trivial split, try the next bit position to the right. Repeat till
721 : * success; or if we run out of bits, do an arbitrary 50-50 split.
722 : */
3288 tgl 723 UIC 0 : int maxbits = ip_family_maxbits(minfamily);
3288 tgl 724 EUB :
3288 tgl 725 UIC 0 : while (commonbits < maxbits)
3288 tgl 726 EUB : {
727 : /* Split using the commonbits'th bit position. */
3288 tgl 728 UIC 0 : int bitbyte = commonbits / 8;
3288 tgl 729 UBC 0 : int bitmask = 0x80 >> (commonbits % 8);
3288 tgl 730 EUB :
3288 tgl 731 UIC 0 : splitvec->spl_nleft = splitvec->spl_nright = 0;
3288 tgl 732 EUB :
3288 tgl 733 UIC 0 : for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
3288 tgl 734 EUB : {
3288 tgl 735 UIC 0 : tmp = DatumGetInetKeyP(ent[i].key);
3288 tgl 736 UBC 0 : addr = gk_ip_addr(tmp);
737 0 : if ((addr[bitbyte] & bitmask) == 0)
738 0 : left[splitvec->spl_nleft++] = i;
3288 tgl 739 EUB : else
3288 tgl 740 UIC 0 : right[splitvec->spl_nright++] = i;
3288 tgl 741 EUB : }
742 :
3288 tgl 743 UIC 0 : if (splitvec->spl_nleft > 0 && splitvec->spl_nright > 0)
3288 tgl 744 UBC 0 : break; /* success */
745 0 : commonbits++;
3288 tgl 746 EUB : }
747 :
3288 tgl 748 UIC 0 : if (commonbits >= maxbits)
3288 tgl 749 EUB : {
750 : /* Failed ... do a 50-50 split. */
3288 tgl 751 UIC 0 : splitvec->spl_nleft = splitvec->spl_nright = 0;
3288 tgl 752 EUB :
3288 tgl 753 UIC 0 : for (i = FirstOffsetNumber; i <= maxoff / 2; i = OffsetNumberNext(i))
3288 tgl 754 EUB : {
3288 tgl 755 UIC 0 : left[splitvec->spl_nleft++] = i;
3288 tgl 756 EUB : }
3288 tgl 757 UIC 0 : for (; i <= maxoff; i = OffsetNumberNext(i))
3288 tgl 758 EUB : {
3288 tgl 759 UIC 0 : right[splitvec->spl_nright++] = i;
3288 tgl 760 EUB : }
761 : }
762 : }
763 :
764 : /*
765 : * Compute the union value for each side from scratch. In most cases we
766 : * could approximate the union values with what we already know, but this
767 : * ensures that each side has minbits and commonbits set as high as
768 : * possible.
769 : */
3288 tgl 770 UIC 0 : calc_inet_union_params_indexed(ent, left, splitvec->spl_nleft,
3288 tgl 771 EUB : &minfamily, &maxfamily,
772 : &minbits, &commonbits);
3288 tgl 773 UIC 0 : if (minfamily != maxfamily)
3288 tgl 774 UBC 0 : minfamily = 0;
775 0 : tmp = DatumGetInetKeyP(ent[left[0]].key);
776 0 : addr = gk_ip_addr(tmp);
777 0 : left_union = build_inet_union_key(minfamily, minbits, commonbits, addr);
778 0 : splitvec->spl_ldatum = PointerGetDatum(left_union);
3288 tgl 779 EUB :
3288 tgl 780 UIC 0 : calc_inet_union_params_indexed(ent, right, splitvec->spl_nright,
3288 tgl 781 EUB : &minfamily, &maxfamily,
782 : &minbits, &commonbits);
3288 tgl 783 UIC 0 : if (minfamily != maxfamily)
3288 tgl 784 UBC 0 : minfamily = 0;
785 0 : tmp = DatumGetInetKeyP(ent[right[0]].key);
786 0 : addr = gk_ip_addr(tmp);
787 0 : right_union = build_inet_union_key(minfamily, minbits, commonbits, addr);
788 0 : splitvec->spl_rdatum = PointerGetDatum(right_union);
3288 tgl 789 EUB :
3288 tgl 790 UIC 0 : PG_RETURN_POINTER(splitvec);
3288 tgl 791 EUB : }
792 :
793 : /*
794 : * The GiST equality function
795 : */
796 : Datum
3288 tgl 797 GIC 404 : inet_gist_same(PG_FUNCTION_ARGS)
3288 tgl 798 ECB : {
3288 tgl 799 GIC 404 : GistInetKey *left = DatumGetInetKeyP(PG_GETARG_DATUM(0));
3288 tgl 800 CBC 404 : GistInetKey *right = DatumGetInetKeyP(PG_GETARG_DATUM(1));
801 404 : bool *result = (bool *) PG_GETARG_POINTER(2);
3288 tgl 802 ECB :
3288 tgl 803 GIC 1212 : *result = (gk_ip_family(left) == gk_ip_family(right) &&
3288 tgl 804 CBC 404 : gk_ip_minbits(left) == gk_ip_minbits(right) &&
805 1212 : gk_ip_commonbits(left) == gk_ip_commonbits(right) &&
806 404 : memcmp(gk_ip_addr(left), gk_ip_addr(right),
807 404 : gk_ip_addrsize(left)) == 0);
3288 tgl 808 ECB :
3288 tgl 809 GIC 404 : PG_RETURN_POINTER(result);
3288 tgl 810 ECB : }
|