Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * pgstrcasecmp.c
4 : : * Portable SQL-like case-independent comparisons and conversions.
5 : : *
6 : : * SQL99 specifies Unicode-aware case normalization, which we don't yet
7 : : * have the infrastructure for. Instead we use tolower() to provide a
8 : : * locale-aware translation. However, there are some locales where this
9 : : * is not right either (eg, Turkish may do strange things with 'i' and
10 : : * 'I'). Our current compromise is to use tolower() for characters with
11 : : * the high bit set, and use an ASCII-only downcasing for 7-bit
12 : : * characters.
13 : : *
14 : : * NB: this code should match downcase_truncate_identifier() in scansup.c.
15 : : *
16 : : * We also provide strict ASCII-only case conversion functions, which can
17 : : * be used to implement C/POSIX case folding semantics no matter what the
18 : : * C library thinks the locale is.
19 : : *
20 : : *
21 : : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
22 : : *
23 : : * src/port/pgstrcasecmp.c
24 : : *
25 : : *-------------------------------------------------------------------------
26 : : */
27 : : #include "c.h"
28 : :
29 : : #include <ctype.h>
30 : :
31 : :
32 : : /*
33 : : * Case-independent comparison of two null-terminated strings.
34 : : */
35 : : int
7282 tgl@sss.pgh.pa.us 36 :CBC 9461754 : pg_strcasecmp(const char *s1, const char *s2)
37 : : {
38 : : for (;;)
39 : 4324904 : {
7168 bruce@momjian.us 40 : 13786658 : unsigned char ch1 = (unsigned char) *s1++;
41 : 13786658 : unsigned char ch2 = (unsigned char) *s2++;
42 : :
7282 tgl@sss.pgh.pa.us 43 [ + + ]: 13786658 : if (ch1 != ch2)
44 : : {
45 [ + + + + ]: 9336142 : if (ch1 >= 'A' && ch1 <= 'Z')
46 : 3802772 : ch1 += 'a' - 'A';
6685 bruce@momjian.us 47 [ + + - + ]: 5533370 : else if (IS_HIGHBIT_SET(ch1) && isupper(ch1))
7282 tgl@sss.pgh.pa.us 48 :UBC 0 : ch1 = tolower(ch1);
49 : :
7282 tgl@sss.pgh.pa.us 50 [ + + + + ]:CBC 9336142 : if (ch2 >= 'A' && ch2 <= 'Z')
51 : 2113152 : ch2 += 'a' - 'A';
6685 bruce@momjian.us 52 [ - + - - ]: 7222990 : else if (IS_HIGHBIT_SET(ch2) && isupper(ch2))
7282 tgl@sss.pgh.pa.us 53 :UBC 0 : ch2 = tolower(ch2);
54 : :
7282 tgl@sss.pgh.pa.us 55 [ + + ]:CBC 9336142 : if (ch1 != ch2)
56 : 8539042 : return (int) ch1 - (int) ch2;
57 : : }
58 [ + + ]: 5247616 : if (ch1 == 0)
59 : 922712 : break;
60 : : }
61 : 922712 : return 0;
62 : : }
63 : :
64 : : /*
65 : : * Case-independent comparison of two not-necessarily-null-terminated strings.
66 : : * At most n bytes will be examined from each string.
67 : : */
68 : : int
69 : 4976451 : pg_strncasecmp(const char *s1, const char *s2, size_t n)
70 : : {
71 [ + + ]: 7118907 : while (n-- > 0)
72 : : {
7168 bruce@momjian.us 73 : 5528055 : unsigned char ch1 = (unsigned char) *s1++;
74 : 5528055 : unsigned char ch2 = (unsigned char) *s2++;
75 : :
7282 tgl@sss.pgh.pa.us 76 [ + + ]: 5528055 : if (ch1 != ch2)
77 : : {
78 [ + + + + ]: 3433861 : if (ch1 >= 'A' && ch1 <= 'Z')
79 : 2516704 : ch1 += 'a' - 'A';
6685 bruce@momjian.us 80 [ - + - - ]: 917157 : else if (IS_HIGHBIT_SET(ch1) && isupper(ch1))
7282 tgl@sss.pgh.pa.us 81 :UBC 0 : ch1 = tolower(ch1);
82 : :
7282 tgl@sss.pgh.pa.us 83 [ + + + + ]:CBC 3433861 : if (ch2 >= 'A' && ch2 <= 'Z')
84 : 92081 : ch2 += 'a' - 'A';
6685 bruce@momjian.us 85 [ - + - - ]: 3341780 : else if (IS_HIGHBIT_SET(ch2) && isupper(ch2))
7282 tgl@sss.pgh.pa.us 86 :UBC 0 : ch2 = tolower(ch2);
87 : :
7282 tgl@sss.pgh.pa.us 88 [ + + ]:CBC 3433861 : if (ch1 != ch2)
89 : 3385599 : return (int) ch1 - (int) ch2;
90 : : }
91 [ - + ]: 2142456 : if (ch1 == 0)
7282 tgl@sss.pgh.pa.us 92 :UBC 0 : break;
93 : : }
7282 tgl@sss.pgh.pa.us 94 :CBC 1590852 : return 0;
95 : : }
96 : :
97 : : /*
98 : : * Fold a character to upper case.
99 : : *
100 : : * Unlike some versions of toupper(), this is safe to apply to characters
101 : : * that aren't lower case letters. Note however that the whole thing is
102 : : * a bit bogus for multibyte character sets.
103 : : */
104 : : unsigned char
105 : 153805 : pg_toupper(unsigned char ch)
106 : : {
107 [ + + + - ]: 153805 : if (ch >= 'a' && ch <= 'z')
108 : 85025 : ch += 'A' - 'a';
6685 bruce@momjian.us 109 [ - + - - ]: 68780 : else if (IS_HIGHBIT_SET(ch) && islower(ch))
7282 tgl@sss.pgh.pa.us 110 :UBC 0 : ch = toupper(ch);
7282 tgl@sss.pgh.pa.us 111 :CBC 153805 : return ch;
112 : : }
113 : :
114 : : /*
115 : : * Fold a character to lower case.
116 : : *
117 : : * Unlike some versions of tolower(), this is safe to apply to characters
118 : : * that aren't upper case letters. Note however that the whole thing is
119 : : * a bit bogus for multibyte character sets.
120 : : */
121 : : unsigned char
122 : 5863431 : pg_tolower(unsigned char ch)
123 : : {
124 [ + + + + ]: 5863431 : if (ch >= 'A' && ch <= 'Z')
125 : 4445760 : ch += 'a' - 'A';
6685 bruce@momjian.us 126 [ - + - - ]: 1417671 : else if (IS_HIGHBIT_SET(ch) && isupper(ch))
7282 tgl@sss.pgh.pa.us 127 :UBC 0 : ch = tolower(ch);
7282 tgl@sss.pgh.pa.us 128 :CBC 5863431 : return ch;
129 : : }
130 : :
131 : : /*
132 : : * Fold a character to upper case, following C/POSIX locale rules.
133 : : */
134 : : unsigned char
4774 135 : 76750 : pg_ascii_toupper(unsigned char ch)
136 : : {
137 [ + + + + ]: 76750 : if (ch >= 'a' && ch <= 'z')
138 : 57316 : ch += 'A' - 'a';
139 : 76750 : return ch;
140 : : }
141 : :
142 : : /*
143 : : * Fold a character to lower case, following C/POSIX locale rules.
144 : : */
145 : : unsigned char
146 : 171682 : pg_ascii_tolower(unsigned char ch)
147 : : {
148 [ + + + + ]: 171682 : if (ch >= 'A' && ch <= 'Z')
149 : 3369 : ch += 'a' - 'A';
150 : 171682 : return ch;
151 : : }
|