Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * pg_lfind.h
4 : * Optimized linear search routines using SIMD intrinsics where
5 : * available.
6 : *
7 : * Copyright (c) 2022-2023, PostgreSQL Global Development Group
8 : *
9 : * IDENTIFICATION
10 : * src/include/port/pg_lfind.h
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 : #ifndef PG_LFIND_H
15 : #define PG_LFIND_H
16 :
17 : #include "port/simd.h"
18 :
19 : /*
20 : * pg_lfind8
21 : *
22 : * Return true if there is an element in 'base' that equals 'key', otherwise
23 : * return false.
24 : */
25 : static inline bool
232 john.naylor 26 GNC 1621808 : pg_lfind8(uint8 key, uint8 *base, uint32 nelem)
27 : {
28 : uint32 i;
29 :
30 : /* round down to multiple of vector length */
31 1621808 : uint32 tail_idx = nelem & ~(sizeof(Vector8) - 1);
32 : Vector8 chunk;
33 :
34 2593476 : for (i = 0; i < tail_idx; i += sizeof(Vector8))
35 : {
36 1621848 : vector8_load(&chunk, &base[i]);
37 1621848 : if (vector8_has(chunk, key))
38 650180 : return true;
39 : }
40 :
41 : /* Process the remaining elements one at a time. */
42 971681 : for (; i < nelem; i++)
43 : {
44 60 : if (key == base[i])
45 7 : return true;
46 : }
47 :
48 971621 : return false;
49 : }
50 :
51 : /*
52 : * pg_lfind8_le
53 : *
54 : * Return true if there is an element in 'base' that is less than or equal to
55 : * 'key', otherwise return false.
56 : */
57 : static inline bool
58 160874 : pg_lfind8_le(uint8 key, uint8 *base, uint32 nelem)
59 : {
60 : uint32 i;
61 :
62 : /* round down to multiple of vector length */
63 160874 : uint32 tail_idx = nelem & ~(sizeof(Vector8) - 1);
64 : Vector8 chunk;
65 :
66 321775 : for (i = 0; i < tail_idx; i += sizeof(Vector8))
67 : {
68 160914 : vector8_load(&chunk, &base[i]);
69 160914 : if (vector8_has_le(chunk, key))
70 13 : return true;
71 : }
72 :
73 : /* Process the remaining elements one at a time. */
74 160908 : for (; i < nelem; i++)
75 : {
76 60 : if (base[i] <= key)
77 13 : return true;
78 : }
79 :
80 160848 : return false;
81 : }
82 :
83 : /*
84 : * pg_lfind32
85 : *
86 : * Return true if there is an element in 'base' that equals 'key', otherwise
87 : * return false.
88 : */
89 : static inline bool
249 90 6372203 : pg_lfind32(uint32 key, uint32 *base, uint32 nelem)
91 : {
92 6372203 : uint32 i = 0;
93 :
94 : #ifndef USE_NO_SIMD
95 :
96 : /*
97 : * For better instruction-level parallelism, each loop iteration operates
98 : * on a block of four registers. Testing for SSE2 has showed this is ~40%
99 : * faster than using a block of two registers.
100 : */
223 101 6372203 : const Vector32 keys = vector32_broadcast(key); /* load copies of key */
102 6372203 : const uint32 nelem_per_vector = sizeof(Vector32) / sizeof(uint32);
103 6372203 : const uint32 nelem_per_iteration = 4 * nelem_per_vector;
104 :
105 : /* round down to multiple of elements per iteration */
106 6372203 : const uint32 tail_idx = nelem & ~(nelem_per_iteration - 1);
107 :
108 : #if defined(USE_ASSERT_CHECKING)
249 109 6372203 : bool assert_result = false;
110 :
111 : /* pre-compute the result for assert checking */
112 14008084 : for (i = 0; i < nelem; i++)
113 : {
114 7641820 : if (key == base[i])
115 : {
116 5939 : assert_result = true;
117 5939 : break;
118 : }
119 : }
120 : #endif
121 :
223 122 6372231 : for (i = 0; i < tail_idx; i += nelem_per_iteration)
123 : {
124 : Vector32 vals1,
125 : vals2,
126 : vals3,
127 : vals4,
128 : result1,
129 : result2,
130 : result3,
131 : result4,
132 : tmp1,
133 : tmp2,
134 : result;
135 :
136 : /* load the next block into 4 registers */
137 30 : vector32_load(&vals1, &base[i]);
138 30 : vector32_load(&vals2, &base[i + nelem_per_vector]);
139 30 : vector32_load(&vals3, &base[i + nelem_per_vector * 2]);
140 30 : vector32_load(&vals4, &base[i + nelem_per_vector * 3]);
141 :
142 : /* compare each value to the key */
143 30 : result1 = vector32_eq(keys, vals1);
144 30 : result2 = vector32_eq(keys, vals2);
145 30 : result3 = vector32_eq(keys, vals3);
146 30 : result4 = vector32_eq(keys, vals4);
147 :
148 : /* combine the results into a single variable */
149 30 : tmp1 = vector32_or(result1, result2);
150 30 : tmp2 = vector32_or(result3, result4);
151 30 : result = vector32_or(tmp1, tmp2);
152 :
153 : /* see if there was a match */
154 30 : if (vector32_is_highbit_set(result))
155 : {
249 156 2 : Assert(assert_result == true);
157 2 : return true;
158 : }
159 : }
160 : #endif /* ! USE_NO_SIMD */
161 :
162 : /* Process the remaining elements one at a time. */
163 14007626 : for (; i < nelem; i++)
164 : {
165 7641362 : if (key == base[i])
166 : {
167 : #ifndef USE_NO_SIMD
168 5937 : Assert(assert_result == true);
169 : #endif
170 5937 : return true;
171 : }
172 : }
173 :
174 : #ifndef USE_NO_SIMD
175 6366264 : Assert(assert_result == false);
176 : #endif
177 6366264 : return false;
178 : }
179 :
180 : #endif /* PG_LFIND_H */
|