Age Owner Branch data TLA Line data Source code
1 : : /*
2 : : * contrib/hstore/hstore_compat.c
3 : : *
4 : : * Notes on old/new hstore format disambiguation.
5 : : *
6 : : * There are three formats to consider:
7 : : * 1) old contrib/hstore (referred to as hstore-old)
8 : : * 2) prerelease pgfoundry hstore
9 : : * 3) new contrib/hstore
10 : : *
11 : : * (2) and (3) are identical except for the HS_FLAG_NEWVERSION
12 : : * bit, which is set in (3) but not (2).
13 : : *
14 : : * Values that are already in format (3), or which are
15 : : * unambiguously in format (2), are handled by the first
16 : : * "return immediately" test in hstoreUpgrade().
17 : : *
18 : : * To stress a point: we ONLY get here with possibly-ambiguous
19 : : * values if we're doing some sort of in-place migration from an
20 : : * old prerelease pgfoundry hstore-new; and we explicitly don't
21 : : * support that without fixing up any potentially padded values
22 : : * first. Most of the code here is serious overkill, but the
23 : : * performance penalty isn't serious (especially compared to the
24 : : * palloc() that we have to do anyway) and the belt-and-braces
25 : : * validity checks provide some reassurance. (If for some reason
26 : : * we get a value that would have worked on the old code, but
27 : : * which would be botched by the conversion code, the validity
28 : : * checks will fail it first so we get an error rather than bad
29 : : * data.)
30 : : *
31 : : * Note also that empty hstores are the same in (2) and (3), so
32 : : * there are some special-case paths for them.
33 : : *
34 : : * We tell the difference between formats (2) and (3) as follows (but
35 : : * note that there are some edge cases where we can't tell; see
36 : : * comments in hstoreUpgrade):
37 : : *
38 : : * First, since there must be at least one entry, we look at
39 : : * how the bits line up. The new format looks like:
40 : : *
41 : : * 10kkkkkkkkkkkkkkkkkkkkkkkkkkkkkk (k..k = keylen)
42 : : * 0nvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv (v..v = keylen+vallen)
43 : : *
44 : : * The old format looks like one of these, depending on endianness
45 : : * and bitfield layout: (k..k = keylen, v..v = vallen, p..p = pos,
46 : : * n = isnull)
47 : : *
48 : : * kkkkkkkkkkkkkkkkvvvvvvvvvvvvvvvv
49 : : * nppppppppppppppppppppppppppppppp
50 : : *
51 : : * kkkkkkkkkkkkkkkkvvvvvvvvvvvvvvvv
52 : : * pppppppppppppppppppppppppppppppn
53 : : *
54 : : * vvvvvvvvvvvvvvvvkkkkkkkkkkkkkkkk
55 : : * nppppppppppppppppppppppppppppppp
56 : : *
57 : : * vvvvvvvvvvvvvvvvkkkkkkkkkkkkkkkk
58 : : * pppppppppppppppppppppppppppppppn (usual i386 format)
59 : : *
60 : : * If the entry is in old format, for the first entry "pos" must be 0.
61 : : * We can obviously see that either keylen or vallen must be >32768
62 : : * for there to be any ambiguity (which is why lengths less than that
63 : : * are fasttracked in hstore.h) Since "pos"==0, the "v" field in the
64 : : * new-format interpretation can only be 0 or 1, which constrains all
65 : : * but three bits of the old-format's k and v fields. But in addition
66 : : * to all of this, the data length implied by the keylen and vallen
67 : : * must fit in the varlena size. So the only ambiguous edge case for
68 : : * hstores with only one entry occurs between a new-format entry with
69 : : * an excess (~32k) of padding, and an old-format entry. But we know
70 : : * which format to use in that case based on how we were compiled, so
71 : : * no actual data corruption can occur.
72 : : *
73 : : * If there is more than one entry, the requirement that keys do not
74 : : * decrease in length, and that positions increase contiguously, and
75 : : * that the end of the data not be beyond the end of the varlena
76 : : * itself, disambiguates in almost all other cases. There is a small
77 : : * set of ambiguous cases which could occur if the old-format value
78 : : * has a large excess of padding and just the right pattern of key
79 : : * sizes, but these are also handled based on how we were compiled.
80 : : *
81 : : * The otherwise undocumented function hstore_version_diag is provided
82 : : * for testing purposes.
83 : : */
84 : : #include "postgres.h"
85 : :
86 : :
87 : : #include "hstore.h"
88 : :
89 : : /*
90 : : * This is the structure used for entries in the old contrib/hstore
91 : : * implementation. Notice that this is the same size as the new entry
92 : : * (two 32-bit words per key/value pair) and that the header is the
93 : : * same, so the old and new versions of ARRPTR, STRPTR, CALCDATASIZE
94 : : * etc. are compatible.
95 : : *
96 : : * If the above statement isn't true on some bizarre platform, we're
97 : : * a bit hosed (see StaticAssertStmt in hstoreValidOldFormat).
98 : : */
99 : : typedef struct
100 : : {
101 : : uint16 keylen;
102 : : uint16 vallen;
103 : : uint32
104 : : valisnull:1,
105 : : pos:31;
106 : : } HOldEntry;
107 : :
108 : : static int hstoreValidNewFormat(HStore *hs);
109 : : static int hstoreValidOldFormat(HStore *hs);
110 : :
111 : :
112 : : /*
113 : : * Validity test for a new-format hstore.
114 : : * 0 = not valid
115 : : * 1 = valid but with "slop" in the length
116 : : * 2 = exactly valid
117 : : */
118 : : static int
5310 tgl@sss.pgh.pa.us 119 :UBC 0 : hstoreValidNewFormat(HStore *hs)
120 : : {
5161 bruce@momjian.us 121 : 0 : int count = HS_COUNT(hs);
122 : 0 : HEntry *entries = ARRPTR(hs);
123 [ # # ]: 0 : int buflen = (count) ? HSE_ENDPOS(entries[2 * (count) - 1]) : 0;
124 : 0 : int vsize = CALCDATASIZE(count, buflen);
125 : : int i;
126 : :
5310 tgl@sss.pgh.pa.us 127 [ # # ]: 0 : if (hs->size_ & HS_FLAG_NEWVERSION)
128 : 0 : return 2;
129 : :
130 [ # # ]: 0 : if (count == 0)
131 : 0 : return 2;
132 : :
133 [ # # ]: 0 : if (!HSE_ISFIRST(entries[0]))
134 : 0 : return 0;
135 : :
136 [ # # ]: 0 : if (vsize > VARSIZE(hs))
137 : 0 : return 0;
138 : :
139 : : /* entry position must be nondecreasing */
140 : :
5161 bruce@momjian.us 141 [ # # ]: 0 : for (i = 1; i < 2 * count; ++i)
142 : : {
5310 tgl@sss.pgh.pa.us 143 [ # # ]: 0 : if (HSE_ISFIRST(entries[i])
5161 bruce@momjian.us 144 [ # # ]: 0 : || (HSE_ENDPOS(entries[i]) < HSE_ENDPOS(entries[i - 1])))
5310 tgl@sss.pgh.pa.us 145 : 0 : return 0;
146 : : }
147 : :
148 : : /* key length must be nondecreasing and keys must not be null */
149 : :
150 [ # # ]: 0 : for (i = 1; i < count; ++i)
151 : : {
3069 152 [ # # # # : 0 : if (HSTORE_KEYLEN(entries, i) < HSTORE_KEYLEN(entries, i - 1))
# # ]
5310 153 : 0 : return 0;
5161 bruce@momjian.us 154 [ # # ]: 0 : if (HSE_ISNULL(entries[2 * i]))
5310 tgl@sss.pgh.pa.us 155 : 0 : return 0;
156 : : }
157 : :
158 [ # # ]: 0 : if (vsize != VARSIZE(hs))
159 : 0 : return 1;
160 : :
161 : 0 : return 2;
162 : : }
163 : :
164 : : /*
165 : : * Validity test for an old-format hstore.
166 : : * 0 = not valid
167 : : * 1 = valid but with "slop" in the length
168 : : * 2 = exactly valid
169 : : */
170 : : static int
171 : 0 : hstoreValidOldFormat(HStore *hs)
172 : : {
5161 bruce@momjian.us 173 : 0 : int count = hs->size_;
174 : 0 : HOldEntry *entries = (HOldEntry *) ARRPTR(hs);
175 : : int vsize;
176 : 0 : int lastpos = 0;
177 : : int i;
178 : :
5310 tgl@sss.pgh.pa.us 179 [ # # ]: 0 : if (hs->size_ & HS_FLAG_NEWVERSION)
180 : 0 : return 0;
181 : :
182 : : /* New format uses an HEntry for key and another for value */
183 : : StaticAssertStmt(sizeof(HOldEntry) == 2 * sizeof(HEntry),
184 : : "old hstore format is not upward-compatible");
185 : :
186 [ # # ]: 0 : if (count == 0)
187 : 0 : return 2;
188 : :
189 [ # # ]: 0 : if (count > 0xFFFFFFF)
190 : 0 : return 0;
191 : :
5161 bruce@momjian.us 192 [ # # ]: 0 : if (CALCDATASIZE(count, 0) > VARSIZE(hs))
5310 tgl@sss.pgh.pa.us 193 : 0 : return 0;
194 : :
195 [ # # ]: 0 : if (entries[0].pos != 0)
196 : 0 : return 0;
197 : :
198 : : /* key length must be nondecreasing */
199 : :
200 [ # # ]: 0 : for (i = 1; i < count; ++i)
201 : : {
5161 bruce@momjian.us 202 [ # # ]: 0 : if (entries[i].keylen < entries[i - 1].keylen)
5310 tgl@sss.pgh.pa.us 203 : 0 : return 0;
204 : : }
205 : :
206 : : /*
207 : : * entry position must be strictly increasing, except for the first entry
208 : : * (which can be ""=>"" and thus zero-length); and all entries must be
209 : : * properly contiguous
210 : : */
211 : :
212 [ # # ]: 0 : for (i = 0; i < count; ++i)
213 : : {
214 [ # # ]: 0 : if (entries[i].pos != lastpos)
215 : 0 : return 0;
216 : 0 : lastpos += (entries[i].keylen
217 [ # # ]: 0 : + ((entries[i].valisnull) ? 0 : entries[i].vallen));
218 : : }
219 : :
5161 bruce@momjian.us 220 : 0 : vsize = CALCDATASIZE(count, lastpos);
221 : :
5310 tgl@sss.pgh.pa.us 222 [ # # ]: 0 : if (vsize > VARSIZE(hs))
223 : 0 : return 0;
224 : :
225 [ # # ]: 0 : if (vsize != VARSIZE(hs))
226 : 0 : return 1;
227 : :
228 : 0 : return 2;
229 : : }
230 : :
231 : :
232 : : /*
233 : : * hstoreUpgrade: PG_DETOAST_DATUM plus support for conversion of old hstores
234 : : */
235 : : HStore *
5310 tgl@sss.pgh.pa.us 236 :CBC 109996 : hstoreUpgrade(Datum orig)
237 : : {
238 : 109996 : HStore *hs = (HStore *) PG_DETOAST_DATUM(orig);
239 : : int valid_new;
240 : : int valid_old;
241 : :
242 : : /* Return immediately if no conversion needed */
1968 rhodiumtoad@postgres 243 [ + - ]: 109996 : if (hs->size_ & HS_FLAG_NEWVERSION)
244 : 109996 : return hs;
245 : :
246 : : /* Do we have a writable copy? If not, make one. */
1968 rhodiumtoad@postgres 247 [ # # ]:UBC 0 : if ((void *) hs == (void *) DatumGetPointer(orig))
248 : 0 : hs = (HStore *) PG_DETOAST_DATUM_COPY(orig);
249 : :
250 [ # # ]: 0 : if (hs->size_ == 0 ||
5310 tgl@sss.pgh.pa.us 251 [ # # # # ]: 0 : (VARSIZE(hs) < 32768 && HSE_ISFIRST((ARRPTR(hs)[0]))))
252 : : {
1968 rhodiumtoad@postgres 253 : 0 : HS_SETCOUNT(hs, HS_COUNT(hs));
254 [ # # ]: 0 : HS_FIXSIZE(hs, HS_COUNT(hs));
5310 tgl@sss.pgh.pa.us 255 : 0 : return hs;
256 : : }
257 : :
258 : 0 : valid_new = hstoreValidNewFormat(hs);
259 : 0 : valid_old = hstoreValidOldFormat(hs);
260 : :
261 [ # # # # ]: 0 : if (!valid_old || hs->size_ == 0)
262 : : {
263 [ # # ]: 0 : if (valid_new)
264 : : {
265 : : /*
266 : : * force the "new version" flag and the correct varlena length.
267 : : */
1968 rhodiumtoad@postgres 268 : 0 : HS_SETCOUNT(hs, HS_COUNT(hs));
269 [ # # ]: 0 : HS_FIXSIZE(hs, HS_COUNT(hs));
5310 tgl@sss.pgh.pa.us 270 : 0 : return hs;
271 : : }
272 : : else
273 : : {
5161 bruce@momjian.us 274 [ # # ]: 0 : elog(ERROR, "invalid hstore value found");
275 : : }
276 : : }
277 : :
278 : : /*
279 : : * this is the tricky edge case. It is only possible in some quite extreme
280 : : * cases (the hstore must have had a lot of wasted padding space at the
281 : : * end). But the only way a "new" hstore value could get here is if we're
282 : : * upgrading in place from a pre-release version of hstore-new (NOT
283 : : * contrib/hstore), so we work off the following assumptions: 1. If you're
284 : : * moving from old contrib/hstore to hstore-new, you're required to fix up
285 : : * any potential conflicts first, e.g. by running ALTER TABLE ... USING
286 : : * col::text::hstore; on all hstore columns before upgrading. 2. If you're
287 : : * moving from old contrib/hstore to new contrib/hstore, then "new" values
288 : : * are impossible here 3. If you're moving from pre-release hstore-new to
289 : : * hstore-new, then "old" values are impossible here 4. If you're moving
290 : : * from pre-release hstore-new to new contrib/hstore, you're not doing so
291 : : * as an in-place upgrade, so there is no issue So the upshot of all this
292 : : * is that we can treat all the edge cases as "new" if we're being built
293 : : * as hstore-new, and "old" if we're being built as contrib/hstore.
294 : : *
295 : : * XXX the WARNING can probably be downgraded to DEBUG1 once this has been
296 : : * beta-tested. But for now, it would be very useful to know if anyone can
297 : : * actually reach this case in a non-contrived setting.
298 : : */
299 : :
5310 tgl@sss.pgh.pa.us 300 [ # # ]: 0 : if (valid_new)
301 : : {
302 : : #ifdef HSTORE_IS_HSTORE_NEW
303 : : elog(WARNING, "ambiguous hstore value resolved as hstore-new");
304 : :
305 : : /*
306 : : * force the "new version" flag and the correct varlena length.
307 : : */
308 : : HS_SETCOUNT(hs, HS_COUNT(hs));
309 : : HS_FIXSIZE(hs, HS_COUNT(hs));
310 : : return hs;
311 : : #else
5161 bruce@momjian.us 312 [ # # ]: 0 : elog(WARNING, "ambiguous hstore value resolved as hstore-old");
313 : : #endif
314 : : }
315 : :
316 : : /*
317 : : * must have an old-style value. Overwrite it in place as a new-style one.
318 : : */
319 : : {
320 : 0 : int count = hs->size_;
321 : 0 : HEntry *new_entries = ARRPTR(hs);
322 : 0 : HOldEntry *old_entries = (HOldEntry *) ARRPTR(hs);
323 : : int i;
324 : :
5310 tgl@sss.pgh.pa.us 325 [ # # ]: 0 : for (i = 0; i < count; ++i)
326 : : {
5161 bruce@momjian.us 327 : 0 : uint32 pos = old_entries[i].pos;
328 : 0 : uint32 keylen = old_entries[i].keylen;
329 : 0 : uint32 vallen = old_entries[i].vallen;
330 : 0 : bool isnull = old_entries[i].valisnull;
331 : :
5310 tgl@sss.pgh.pa.us 332 [ # # ]: 0 : if (isnull)
333 : 0 : vallen = 0;
334 : :
5161 bruce@momjian.us 335 : 0 : new_entries[2 * i].entry = (pos + keylen) & HENTRY_POSMASK;
336 : 0 : new_entries[2 * i + 1].entry = (((pos + keylen + vallen) & HENTRY_POSMASK)
337 [ # # ]: 0 : | ((isnull) ? HENTRY_ISNULL : 0));
338 : : }
339 : :
5310 tgl@sss.pgh.pa.us 340 [ # # ]: 0 : if (count)
341 : 0 : new_entries[0].entry |= HENTRY_ISFIRST;
5161 bruce@momjian.us 342 : 0 : HS_SETCOUNT(hs, count);
343 [ # # ]: 0 : HS_FIXSIZE(hs, count);
344 : : }
345 : :
5310 tgl@sss.pgh.pa.us 346 : 0 : return hs;
347 : : }
348 : :
349 : :
5310 tgl@sss.pgh.pa.us 350 :CBC 7 : PG_FUNCTION_INFO_V1(hstore_version_diag);
351 : : Datum
5310 tgl@sss.pgh.pa.us 352 :UBC 0 : hstore_version_diag(PG_FUNCTION_ARGS)
353 : : {
5161 bruce@momjian.us 354 : 0 : HStore *hs = (HStore *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
355 : 0 : int valid_new = hstoreValidNewFormat(hs);
356 : 0 : int valid_old = hstoreValidOldFormat(hs);
357 : :
358 : 0 : PG_RETURN_INT32(valid_old * 10 + valid_new);
359 : : }
|