Age Owner Branch data TLA Line data Source code
1 : : /*-----------------------------------------------------------------------
2 : : * ascii.h
3 : : *
4 : : * Portions Copyright (c) 1999-2024, PostgreSQL Global Development Group
5 : : *
6 : : * src/include/utils/ascii.h
7 : : *
8 : : *-----------------------------------------------------------------------
9 : : */
10 : :
11 : : #ifndef _ASCII_H_
12 : : #define _ASCII_H_
13 : :
14 : : #include "port/simd.h"
15 : :
16 : : extern void ascii_safe_strlcpy(char *dest, const char *src, size_t destsiz);
17 : :
18 : : /*
19 : : * Verify a chunk of bytes for valid ASCII.
20 : : *
21 : : * Returns false if the input contains any zero bytes or bytes with the
22 : : * high-bit set. Input len must be a multiple of the chunk size (8 or 16).
23 : : */
24 : : static inline bool
76 nathan@postgresql.or 25 :CBC 1886993 : is_valid_ascii(const unsigned char *s, int len)
26 : : {
27 : 1886993 : const unsigned char *const s_end = s + len;
28 : : Vector8 chunk;
29 : 1886993 : Vector8 highbit_cum = vector8_broadcast(0);
30 : : #ifdef USE_NO_SIMD
31 : : Vector8 zero_cum = vector8_broadcast(0x80);
32 : : #endif
33 : :
34 [ - + ]: 1886993 : Assert(len % sizeof(chunk) == 0);
35 : :
36 [ + + ]: 5660979 : while (s < s_end)
37 : : {
38 : 3773986 : vector8_load(&chunk, s);
39 : :
40 : : /* Capture any zero bytes in this chunk. */
41 : : #ifdef USE_NO_SIMD
42 : :
43 : : /*
44 : : * First, add 0x7f to each byte. This sets the high bit in each byte,
45 : : * unless it was a zero. If any resulting high bits are zero, the
46 : : * corresponding high bits in the zero accumulator will be cleared.
47 : : *
48 : : * If none of the bytes in the chunk had the high bit set, the max
49 : : * value each byte can have after the addition is 0x7f + 0x7f = 0xfe,
50 : : * and we don't need to worry about carrying over to the next byte. If
51 : : * any input bytes did have the high bit set, it doesn't matter
52 : : * because we check for those separately.
53 : : */
54 : : zero_cum &= (chunk + vector8_broadcast(0x7F));
55 : : #else
56 : :
57 : : /*
58 : : * Set all bits in each lane of the highbit accumulator where input
59 : : * bytes are zero.
60 : : */
61 : 3773986 : highbit_cum = vector8_or(highbit_cum,
62 : : vector8_eq(chunk, vector8_broadcast(0)));
63 : : #endif
64 : :
65 : : /* Capture all set bits in this chunk. */
66 : 3773986 : highbit_cum = vector8_or(highbit_cum, chunk);
67 : :
68 : 3773986 : s += sizeof(chunk);
69 : : }
70 : :
71 : : /* Check if any high bits in the high bit accumulator got set. */
72 [ + + ]: 1886993 : if (vector8_is_highbit_set(highbit_cum))
73 : 492 : return false;
74 : :
75 : : #ifdef USE_NO_SIMD
76 : : /* Check if any high bits in the zero accumulator got cleared. */
77 : : if (zero_cum != vector8_broadcast(0x80))
78 : : return false;
79 : : #endif
80 : :
81 : 1886501 : return true;
82 : : }
83 : :
84 : : #endif /* _ASCII_H_ */
|