Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * pgstrcasecmp.c
4 : * Portable SQL-like case-independent comparisons and conversions.
5 : *
6 : * SQL99 specifies Unicode-aware case normalization, which we don't yet
7 : * have the infrastructure for. Instead we use tolower() to provide a
8 : * locale-aware translation. However, there are some locales where this
9 : * is not right either (eg, Turkish may do strange things with 'i' and
10 : * 'I'). Our current compromise is to use tolower() for characters with
11 : * the high bit set, and use an ASCII-only downcasing for 7-bit
12 : * characters.
13 : *
14 : * NB: this code should match downcase_truncate_identifier() in scansup.c.
15 : *
16 : * We also provide strict ASCII-only case conversion functions, which can
17 : * be used to implement C/POSIX case folding semantics no matter what the
18 : * C library thinks the locale is.
19 : *
20 : *
21 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
22 : *
23 : * src/port/pgstrcasecmp.c
24 : *
25 : *-------------------------------------------------------------------------
26 : */
27 : #include "c.h"
28 :
29 : #include <ctype.h>
30 :
31 :
32 : /*
33 : * Case-independent comparison of two null-terminated strings.
34 : */
35 : int
6911 tgl 36 CBC 13861049 : pg_strcasecmp(const char *s1, const char *s2)
37 : {
38 : for (;;)
39 6937058 : {
6797 bruce 40 20798107 : unsigned char ch1 = (unsigned char) *s1++;
41 20798107 : unsigned char ch2 = (unsigned char) *s2++;
42 :
6911 tgl 43 20798107 : if (ch1 != ch2)
44 : {
45 14136549 : if (ch1 >= 'A' && ch1 <= 'Z')
46 7566218 : ch1 += 'a' - 'A';
6314 bruce 47 6570331 : else if (IS_HIGHBIT_SET(ch1) && isupper(ch1))
6911 tgl 48 UBC 0 : ch1 = tolower(ch1);
49 :
6911 tgl 50 CBC 14136549 : if (ch2 >= 'A' && ch2 <= 'Z')
51 5101008 : ch2 += 'a' - 'A';
6314 bruce 52 9035541 : else if (IS_HIGHBIT_SET(ch2) && isupper(ch2))
6911 tgl 53 UBC 0 : ch2 = tolower(ch2);
54 :
6911 tgl 55 CBC 14136549 : if (ch1 != ch2)
56 12996291 : return (int) ch1 - (int) ch2;
57 : }
58 7801816 : if (ch1 == 0)
59 864758 : break;
60 : }
61 864758 : return 0;
62 : }
63 :
64 : /*
65 : * Case-independent comparison of two not-necessarily-null-terminated strings.
66 : * At most n bytes will be examined from each string.
67 : */
68 : int
69 9091611 : pg_strncasecmp(const char *s1, const char *s2, size_t n)
70 : {
71 15351163 : while (n-- > 0)
72 : {
6797 bruce 73 9550259 : unsigned char ch1 = (unsigned char) *s1++;
74 9550259 : unsigned char ch2 = (unsigned char) *s2++;
75 :
6911 tgl 76 9550259 : if (ch1 != ch2)
77 : {
78 3352523 : if (ch1 >= 'A' && ch1 <= 'Z')
79 2821517 : ch1 += 'a' - 'A';
6314 bruce 80 531006 : else if (IS_HIGHBIT_SET(ch1) && isupper(ch1))
6911 tgl 81 UBC 0 : ch1 = tolower(ch1);
82 :
6911 tgl 83 CBC 3352523 : if (ch2 >= 'A' && ch2 <= 'Z')
84 112910 : ch2 += 'a' - 'A';
6314 bruce 85 3239613 : else if (IS_HIGHBIT_SET(ch2) && isupper(ch2))
6911 tgl 86 UBC 0 : ch2 = tolower(ch2);
87 :
6911 tgl 88 CBC 3352523 : if (ch1 != ch2)
89 3290707 : return (int) ch1 - (int) ch2;
90 : }
91 6259552 : if (ch1 == 0)
6911 tgl 92 UBC 0 : break;
93 : }
6911 tgl 94 CBC 5800904 : return 0;
95 : }
96 :
97 : /*
98 : * Fold a character to upper case.
99 : *
100 : * Unlike some versions of toupper(), this is safe to apply to characters
101 : * that aren't lower case letters. Note however that the whole thing is
102 : * a bit bogus for multibyte character sets.
103 : */
104 : unsigned char
105 163686 : pg_toupper(unsigned char ch)
106 : {
107 163686 : if (ch >= 'a' && ch <= 'z')
108 88626 : ch += 'A' - 'a';
6314 bruce 109 75060 : else if (IS_HIGHBIT_SET(ch) && islower(ch))
6911 tgl 110 UBC 0 : ch = toupper(ch);
6911 tgl 111 CBC 163686 : return ch;
112 : }
113 :
114 : /*
115 : * Fold a character to lower case.
116 : *
117 : * Unlike some versions of tolower(), this is safe to apply to characters
118 : * that aren't upper case letters. Note however that the whole thing is
119 : * a bit bogus for multibyte character sets.
120 : */
121 : unsigned char
122 5785277 : pg_tolower(unsigned char ch)
123 : {
124 5785277 : if (ch >= 'A' && ch <= 'Z')
125 4719349 : ch += 'a' - 'A';
6314 bruce 126 1065928 : else if (IS_HIGHBIT_SET(ch) && isupper(ch))
6911 tgl 127 UBC 0 : ch = tolower(ch);
6911 tgl 128 CBC 5785277 : return ch;
129 : }
130 :
131 : /*
132 : * Fold a character to upper case, following C/POSIX locale rules.
133 : */
134 : unsigned char
4403 135 76266 : pg_ascii_toupper(unsigned char ch)
136 : {
137 76266 : if (ch >= 'a' && ch <= 'z')
138 57106 : ch += 'A' - 'a';
139 76266 : return ch;
140 : }
141 :
142 : /*
143 : * Fold a character to lower case, following C/POSIX locale rules.
144 : */
145 : unsigned char
146 256519 : pg_ascii_tolower(unsigned char ch)
147 : {
148 256519 : if (ch >= 'A' && ch <= 'Z')
149 3240 : ch += 'a' - 'A';
150 256519 : return ch;
151 : }
|