Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * collationcmds.c
4 : * collation-related commands support code
5 : *
6 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/commands/collationcmds.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/htup_details.h"
18 : #include "access/table.h"
19 : #include "access/xact.h"
20 : #include "catalog/dependency.h"
21 : #include "catalog/indexing.h"
22 : #include "catalog/namespace.h"
23 : #include "catalog/objectaccess.h"
24 : #include "catalog/pg_collation.h"
25 : #include "catalog/pg_database.h"
26 : #include "catalog/pg_namespace.h"
27 : #include "commands/alter.h"
28 : #include "commands/collationcmds.h"
29 : #include "commands/comment.h"
30 : #include "commands/dbcommands.h"
31 : #include "commands/defrem.h"
32 : #include "common/string.h"
33 : #include "mb/pg_wchar.h"
34 : #include "miscadmin.h"
35 : #include "utils/acl.h"
36 : #include "utils/builtins.h"
37 : #include "utils/lsyscache.h"
38 : #include "utils/pg_locale.h"
39 : #include "utils/rel.h"
40 : #include "utils/syscache.h"
41 :
42 :
43 : typedef struct
44 : {
45 : char *localename; /* name of locale, as per "locale -a" */
46 : char *alias; /* shortened alias for same */
47 : int enc; /* encoding */
48 : } CollAliasData;
49 :
50 :
51 : /*
52 : * CREATE COLLATION
53 : */
54 : ObjectAddress
2251 peter_e 55 GIC 185 : DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_exists)
56 : {
4439 peter_e 57 ECB : char *collName;
58 : Oid collNamespace;
59 : AclResult aclresult;
60 : ListCell *pl;
4382 bruce 61 GIC 185 : DefElem *fromEl = NULL;
62 185 : DefElem *localeEl = NULL;
4382 bruce 63 CBC 185 : DefElem *lccollateEl = NULL;
64 185 : DefElem *lcctypeEl = NULL;
2208 peter_e 65 185 : DefElem *providerEl = NULL;
1479 peter 66 185 : DefElem *deterministicEl = NULL;
32 peter 67 GNC 185 : DefElem *rulesEl = NULL;
702 tmunro 68 CBC 185 : DefElem *versionEl = NULL;
394 peter 69 ECB : char *collcollate;
70 : char *collctype;
388 71 : char *colliculocale;
72 : char *collicurules;
73 : bool collisdeterministic;
74 : int collencoding;
75 : char collprovider;
702 tmunro 76 GIC 185 : char *collversion = NULL;
77 : Oid newoid;
78 : ObjectAddress address;
79 :
4439 peter_e 80 CBC 185 : collNamespace = QualifiedNameGetCreationNamespace(names, &collName);
81 :
147 peter 82 GNC 185 : aclresult = object_aclcheck(NamespaceRelationId, collNamespace, GetUserId(), ACL_CREATE);
4439 peter_e 83 GIC 185 : if (aclresult != ACLCHECK_OK)
1954 peter_e 84 LBC 0 : aclcheck_error(aclresult, OBJECT_SCHEMA,
4439 peter_e 85 UIC 0 : get_namespace_name(collNamespace));
4439 peter_e 86 ECB :
4439 peter_e 87 CBC 503 : foreach(pl, parameters)
4439 peter_e 88 EUB : {
2190 tgl 89 GBC 339 : DefElem *defel = lfirst_node(DefElem, pl);
90 : DefElem **defelp;
4439 peter_e 91 ECB :
1899 tgl 92 GIC 339 : if (strcmp(defel->defname, "from") == 0)
4439 peter_e 93 CBC 44 : defelp = &fromEl;
1899 tgl 94 GIC 295 : else if (strcmp(defel->defname, "locale") == 0)
4439 peter_e 95 104 : defelp = &localeEl;
1899 tgl 96 CBC 191 : else if (strcmp(defel->defname, "lc_collate") == 0)
4439 peter_e 97 34 : defelp = &lccollateEl;
1899 tgl 98 157 : else if (strcmp(defel->defname, "lc_ctype") == 0)
4439 peter_e 99 28 : defelp = &lcctypeEl;
1899 tgl 100 129 : else if (strcmp(defel->defname, "provider") == 0)
2208 peter_e 101 82 : defelp = &providerEl;
1479 peter 102 47 : else if (strcmp(defel->defname, "deterministic") == 0)
103 28 : defelp = &deterministicEl;
32 peter 104 GNC 19 : else if (strcmp(defel->defname, "rules") == 0)
105 6 : defelp = &rulesEl;
702 tmunro 106 CBC 13 : else if (strcmp(defel->defname, "version") == 0)
107 10 : defelp = &versionEl;
4439 peter_e 108 ECB : else
109 : {
4439 peter_e 110 CBC 3 : ereport(ERROR,
4439 peter_e 111 ECB : (errcode(ERRCODE_SYNTAX_ERROR),
112 : errmsg("collation attribute \"%s\" not recognized",
2406 113 : defel->defname),
114 : parser_errposition(pstate, defel->location)));
115 : break;
4439 116 : }
630 dean.a.rasheed 117 GIC 336 : if (*defelp != NULL)
118 18 : errorConflictingDefElem(defel, pstate);
4439 peter_e 119 318 : *defelp = defel;
120 : }
121 :
630 dean.a.rasheed 122 164 : if (localeEl && (lccollateEl || lcctypeEl))
630 dean.a.rasheed 123 CBC 9 : ereport(ERROR,
630 dean.a.rasheed 124 ECB : errcode(ERRCODE_SYNTAX_ERROR),
125 : errmsg("conflicting or redundant options"),
126 : errdetail("LOCALE cannot be specified together with LC_COLLATE or LC_CTYPE."));
127 :
630 dean.a.rasheed 128 CBC 155 : if (fromEl && list_length(parameters) != 1)
4439 peter_e 129 3 : ereport(ERROR,
130 : errcode(ERRCODE_SYNTAX_ERROR),
131 : errmsg("conflicting or redundant options"),
132 : errdetail("FROM cannot be specified together with any other options."));
133 :
134 152 : if (fromEl)
4439 peter_e 135 ECB : {
136 : Oid collid;
137 : HeapTuple tp;
138 : Datum datum;
139 : bool isnull;
140 :
4382 bruce 141 GIC 41 : collid = get_collation_oid(defGetQualifiedName(fromEl), false);
4439 peter_e 142 35 : tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
143 35 : if (!HeapTupleIsValid(tp))
4439 peter_e 144 UIC 0 : elog(ERROR, "cache lookup failed for collation %u", collid);
145 :
2208 peter_e 146 GIC 35 : collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider;
1479 peter 147 CBC 35 : collisdeterministic = ((Form_pg_collation) GETSTRUCT(tp))->collisdeterministic;
2109 peter_e 148 35 : collencoding = ((Form_pg_collation) GETSTRUCT(tp))->collencoding;
4439 peter_e 149 ECB :
437 peter 150 GBC 35 : datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collcollate, &isnull);
437 peter 151 GIC 35 : if (!isnull)
437 peter 152 CBC 26 : collcollate = TextDatumGetCString(datum);
437 peter 153 ECB : else
437 peter 154 CBC 9 : collcollate = NULL;
155 :
156 35 : datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collctype, &isnull);
157 35 : if (!isnull)
158 26 : collctype = TextDatumGetCString(datum);
159 : else
160 9 : collctype = NULL;
161 :
388 162 35 : datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_colliculocale, &isnull);
163 35 : if (!isnull)
164 6 : colliculocale = TextDatumGetCString(datum);
165 : else
166 29 : colliculocale = NULL;
167 :
168 : /*
169 : * When the ICU locale comes from an existing collation, do not
170 : * canonicalize to a language tag.
171 : */
172 :
32 peter 173 GNC 35 : datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collicurules, &isnull);
174 35 : if (!isnull)
32 peter 175 UNC 0 : collicurules = TextDatumGetCString(datum);
176 : else
32 peter 177 GNC 35 : collicurules = NULL;
178 :
4439 peter_e 179 CBC 35 : ReleaseSysCache(tp);
2126 peter_e 180 ECB :
181 : /*
182 : * Copying the "default" collation is not allowed because most code
183 : * checks for DEFAULT_COLLATION_OID instead of COLLPROVIDER_DEFAULT,
184 : * and so having a second collation with COLLPROVIDER_DEFAULT would
185 : * not work and potentially confuse or crash some code. This could be
186 : * fixed with some legwork.
187 : */
2126 peter_e 188 GIC 35 : if (collprovider == COLLPROVIDER_DEFAULT)
189 3 : ereport(ERROR,
2126 peter_e 190 ECB : (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
191 : errmsg("collation \"default\" cannot be copied")));
4439 peter_e 192 EUB : }
193 : else
4439 peter_e 194 ECB : {
394 peter 195 GIC 111 : char *collproviderstr = NULL;
4439 peter_e 196 ECB :
394 peter 197 GIC 111 : collcollate = NULL;
198 111 : collctype = NULL;
388 199 111 : colliculocale = NULL;
32 peter 200 GNC 111 : collicurules = NULL;
201 :
394 peter 202 GIC 111 : if (providerEl)
203 76 : collproviderstr = defGetString(providerEl);
204 :
205 111 : if (deterministicEl)
394 peter 206 CBC 22 : collisdeterministic = defGetBoolean(deterministicEl);
2208 peter_e 207 ECB : else
394 peter 208 GIC 89 : collisdeterministic = true;
209 :
32 peter 210 GNC 111 : if (rulesEl)
211 6 : collicurules = defGetString(rulesEl);
212 :
394 peter 213 GIC 111 : if (versionEl)
214 1 : collversion = defGetString(versionEl);
215 :
394 peter 216 CBC 111 : if (collproviderstr)
217 : {
218 76 : if (pg_strcasecmp(collproviderstr, "icu") == 0)
219 76 : collprovider = COLLPROVIDER_ICU;
394 peter 220 LBC 0 : else if (pg_strcasecmp(collproviderstr, "libc") == 0)
221 0 : collprovider = COLLPROVIDER_LIBC;
222 : else
223 0 : ereport(ERROR,
394 peter 224 ECB : (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
225 : errmsg("unrecognized collation provider: %s",
226 : collproviderstr)));
227 : }
228 : else
394 peter 229 CBC 35 : collprovider = COLLPROVIDER_LIBC;
230 :
388 231 111 : if (localeEl)
388 peter 232 ECB : {
388 peter 233 GIC 89 : if (collprovider == COLLPROVIDER_LIBC)
388 peter 234 ECB : {
388 peter 235 CBC 16 : collcollate = defGetString(localeEl);
388 peter 236 GIC 16 : collctype = defGetString(localeEl);
388 peter 237 ECB : }
238 : else
388 peter 239 CBC 73 : colliculocale = defGetString(localeEl);
388 peter 240 ECB : }
2208 peter_e 241 EUB :
388 peter 242 GBC 111 : if (lccollateEl)
388 peter 243 GIC 22 : collcollate = defGetString(lccollateEl);
388 peter 244 EUB :
388 peter 245 GIC 111 : if (lcctypeEl)
246 16 : collctype = defGetString(lcctypeEl);
247 :
248 111 : if (collprovider == COLLPROVIDER_LIBC)
249 : {
388 peter 250 CBC 35 : if (!collcollate)
388 peter 251 UIC 0 : ereport(ERROR,
388 peter 252 ECB : (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
253 : errmsg("parameter \"lc_collate\" must be specified")));
254 :
388 peter 255 GIC 35 : if (!collctype)
388 peter 256 CBC 3 : ereport(ERROR,
388 peter 257 ECB : (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
258 : errmsg("parameter \"lc_ctype\" must be specified")));
259 : }
388 peter 260 CBC 76 : else if (collprovider == COLLPROVIDER_ICU)
261 : {
388 peter 262 GIC 76 : if (!colliculocale)
388 peter 263 CBC 3 : ereport(ERROR,
388 peter 264 ECB : (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
265 : errmsg("parameter \"locale\" must be specified")));
266 :
267 : /*
268 : * During binary upgrade, preserve the locale string. Otherwise,
269 : * canonicalize to a language tag.
270 : */
5 jdavis 271 GNC 73 : if (!IsBinaryUpgrade)
272 : {
273 72 : char *langtag = icu_language_tag(colliculocale,
274 : icu_validation_level);
275 :
276 69 : if (langtag && strcmp(colliculocale, langtag) != 0)
277 : {
278 52 : ereport(NOTICE,
279 : (errmsg("using standard form \"%s\" for locale \"%s\"",
280 : langtag, colliculocale)));
281 :
282 52 : colliculocale = langtag;
283 : }
284 : }
285 :
12 286 70 : icu_validate_locale(colliculocale);
388 peter 287 ECB : }
4439 peter_e 288 :
289 : /*
394 peter 290 : * Nondeterministic collations are currently only supported with ICU
291 : * because that's the only case where it can actually make a
332 tgl 292 : * difference. So we can save writing the code for the other
332 tgl 293 EUB : * providers.
294 : */
394 peter 295 GIC 96 : if (!collisdeterministic && collprovider != COLLPROVIDER_ICU)
296 3 : ereport(ERROR,
394 peter 297 ECB : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
298 : errmsg("nondeterministic collations not supported with this provider")));
299 :
31 peter 300 GNC 93 : if (collicurules && collprovider != COLLPROVIDER_ICU)
31 peter 301 UNC 0 : ereport(ERROR,
302 : (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
303 : errmsg("ICU rules cannot be specified unless locale provider is ICU")));
304 :
2109 peter_e 305 GIC 93 : if (collprovider == COLLPROVIDER_ICU)
306 : {
583 tgl 307 ECB : #ifdef USE_ICU
308 : /*
309 : * We could create ICU collations with collencoding == database
310 : * encoding, but it seems better to use -1 so that it matches the
311 : * way initdb would create ICU collations. However, only allow
312 : * one to be created when the current database's encoding is
313 : * supported. Otherwise the collation is useless, plus we get
314 : * surprising behaviors like not being able to drop the collation.
315 : *
316 : * Skip this test when !USE_ICU, because the error we want to
317 : * throw for that isn't thrown till later.
318 : */
583 tgl 319 GIC 64 : if (!is_encoding_supported_by_icu(GetDatabaseEncoding()))
583 tgl 320 LBC 0 : ereport(ERROR,
321 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
322 : errmsg("current database's encoding is not supported with this provider")));
583 tgl 323 ECB : #endif
2109 peter_e 324 GIC 64 : collencoding = -1;
583 tgl 325 ECB : }
326 : else
327 : {
2109 peter_e 328 GIC 29 : collencoding = GetDatabaseEncoding();
2109 peter_e 329 CBC 29 : check_encoding_locale_matches(collencoding, collcollate, collctype);
330 : }
331 : }
332 :
702 tmunro 333 125 : if (!collversion)
388 peter 334 GIC 124 : collversion = get_collation_actual_version(collprovider, collprovider == COLLPROVIDER_ICU ? colliculocale : collcollate);
335 :
4419 peter_e 336 125 : newoid = CollationCreate(collName,
337 : collNamespace,
338 : GetUserId(),
339 : collprovider,
340 : collisdeterministic,
341 : collencoding,
4412 tgl 342 ECB : collcollate,
2272 peter_e 343 : collctype,
344 : colliculocale,
345 : collicurules,
346 : collversion,
347 : if_not_exists,
2116 tgl 348 : false); /* not quiet */
2272 peter_e 349 EUB :
2272 peter_e 350 GIC 118 : if (!OidIsValid(newoid))
351 7 : return InvalidObjectAddress;
352 :
2077 tgl 353 ECB : /*
354 : * Check that the locales can be loaded. NB: pg_newlocale_from_collation
355 : * is only supposed to be called on non-C-equivalent locales.
356 : */
4419 peter_e 357 GIC 111 : CommandCounterIncrement();
2077 tgl 358 111 : if (!lc_collate_is_c(newoid) || !lc_ctype_is_c(newoid))
359 82 : (void) pg_newlocale_from_collation(newoid);
360 :
361 105 : ObjectAddressSet(address, CollationRelationId, newoid);
362 :
2959 alvherre 363 105 : return address;
364 : }
365 :
366 : /*
3736 alvherre 367 ECB : * Subroutine for ALTER COLLATION SET SCHEMA and RENAME
3736 alvherre 368 EUB : *
369 : * Is there a collation with the same name of the given collation already in
370 : * the given namespace? If so, raise an appropriate error message.
371 : */
3736 alvherre 372 ECB : void
3736 alvherre 373 GIC 18 : IsThereCollationInNamespace(const char *collname, Oid nspOid)
374 : {
375 : /* make sure the name doesn't already exist in new schema */
4412 tgl 376 CBC 18 : if (SearchSysCacheExists3(COLLNAMEENCNSP,
3736 alvherre 377 ECB : CStringGetDatum(collname),
378 : Int32GetDatum(GetDatabaseEncoding()),
379 : ObjectIdGetDatum(nspOid)))
4412 tgl 380 GIC 3 : ereport(ERROR,
4412 tgl 381 ECB : (errcode(ERRCODE_DUPLICATE_OBJECT),
382 : errmsg("collation \"%s\" for encoding \"%s\" already exists in schema \"%s\"",
383 : collname, GetDatabaseEncodingName(),
3736 alvherre 384 : get_namespace_name(nspOid))));
385 :
386 : /* mustn't match an any-encoding entry, either */
4412 tgl 387 GIC 15 : if (SearchSysCacheExists3(COLLNAMEENCNSP,
388 : CStringGetDatum(collname),
389 : Int32GetDatum(-1),
390 : ObjectIdGetDatum(nspOid)))
391 3 : ereport(ERROR,
392 : (errcode(ERRCODE_DUPLICATE_OBJECT),
393 : errmsg("collation \"%s\" already exists in schema \"%s\"",
394 : collname, get_namespace_name(nspOid))));
4439 peter_e 395 12 : }
396 :
397 : /*
702 tmunro 398 ECB : * ALTER COLLATION
399 : */
400 : ObjectAddress
702 tmunro 401 GIC 6 : AlterCollation(AlterCollationStmt *stmt)
402 : {
403 : Relation rel;
404 : Oid collOid;
702 tmunro 405 ECB : HeapTuple tup;
406 : Form_pg_collation collForm;
437 peter 407 : Datum datum;
408 : bool isnull;
702 tmunro 409 : char *oldversion;
410 : char *newversion;
411 : ObjectAddress address;
412 :
702 tmunro 413 GIC 6 : rel = table_open(CollationRelationId, RowExclusiveLock);
414 6 : collOid = get_collation_oid(stmt->collname, false);
415 :
162 jdavis 416 GNC 6 : if (collOid == DEFAULT_COLLATION_OID)
162 jdavis 417 UNC 0 : ereport(ERROR,
418 : (errmsg("cannot refresh version of default collation"),
419 : errhint("Use ALTER DATABASE ... REFRESH COLLATION VERSION instead.")));
420 :
147 peter 421 GNC 6 : if (!object_ownercheck(CollationRelationId, collOid, GetUserId()))
702 tmunro 422 UIC 0 : aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_COLLATION,
423 0 : NameListToString(stmt->collname));
424 :
702 tmunro 425 GIC 6 : tup = SearchSysCacheCopy1(COLLOID, ObjectIdGetDatum(collOid));
702 tmunro 426 CBC 6 : if (!HeapTupleIsValid(tup))
702 tmunro 427 UIC 0 : elog(ERROR, "cache lookup failed for collation %u", collOid);
428 :
702 tmunro 429 CBC 6 : collForm = (Form_pg_collation) GETSTRUCT(tup);
437 peter 430 GIC 6 : datum = SysCacheGetAttr(COLLOID, tup, Anum_pg_collation_collversion, &isnull);
431 6 : oldversion = isnull ? NULL : TextDatumGetCString(datum);
432 :
15 dgustafsson 433 GNC 6 : datum = SysCacheGetAttrNotNull(COLLOID, tup, collForm->collprovider == COLLPROVIDER_ICU ? Anum_pg_collation_colliculocale : Anum_pg_collation_collcollate);
437 peter 434 GIC 6 : newversion = get_collation_actual_version(collForm->collprovider, TextDatumGetCString(datum));
435 :
436 : /* cannot change from NULL to non-NULL or vice versa */
702 tmunro 437 6 : if ((!oldversion && newversion) || (oldversion && !newversion))
702 tmunro 438 LBC 0 : elog(ERROR, "invalid collation version change");
702 tmunro 439 GIC 6 : else if (oldversion && newversion && strcmp(newversion, oldversion) != 0)
702 tmunro 440 UIC 0 : {
441 : bool nulls[Natts_pg_collation];
702 tmunro 442 ECB : bool replaces[Natts_pg_collation];
443 : Datum values[Natts_pg_collation];
444 :
702 tmunro 445 UIC 0 : ereport(NOTICE,
702 tmunro 446 ECB : (errmsg("changing version from %s to %s",
447 : oldversion, newversion)));
448 :
702 tmunro 449 UIC 0 : memset(values, 0, sizeof(values));
450 0 : memset(nulls, false, sizeof(nulls));
451 0 : memset(replaces, false, sizeof(replaces));
702 tmunro 452 ECB :
702 tmunro 453 UIC 0 : values[Anum_pg_collation_collversion - 1] = CStringGetTextDatum(newversion);
454 0 : replaces[Anum_pg_collation_collversion - 1] = true;
455 :
456 0 : tup = heap_modify_tuple(tup, RelationGetDescr(rel),
457 : values, nulls, replaces);
458 : }
459 : else
702 tmunro 460 GIC 6 : ereport(NOTICE,
461 : (errmsg("version has not changed")));
462 :
463 6 : CatalogTupleUpdate(rel, &tup->t_self, tup);
702 tmunro 464 ECB :
702 tmunro 465 CBC 6 : InvokeObjectPostAlterHook(CollationRelationId, collOid, 0);
466 :
467 6 : ObjectAddressSet(address, CollationRelationId, collOid);
702 tmunro 468 EUB :
702 tmunro 469 GIC 6 : heap_freetuple(tup);
470 6 : table_close(rel, NoLock);
471 :
702 tmunro 472 CBC 6 : return address;
702 tmunro 473 EUB : }
474 :
475 :
2208 peter_e 476 ECB : Datum
772 tmunro 477 LBC 0 : pg_collation_actual_version(PG_FUNCTION_ARGS)
2208 peter_e 478 EUB : {
162 jdavis 479 UNC 0 : Oid collid = PG_GETARG_OID(0);
480 : char provider;
481 : char *locale;
482 : char *version;
483 : Datum datum;
162 jdavis 484 ECB :
162 jdavis 485 UNC 0 : if (collid == DEFAULT_COLLATION_OID)
486 : {
487 : /* retrieve from pg_database */
488 :
489 0 : HeapTuple dbtup = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
490 0 : if (!HeapTupleIsValid(dbtup))
491 0 : ereport(ERROR,
492 : (errcode(ERRCODE_UNDEFINED_OBJECT),
493 : errmsg("database with OID %u does not exist", MyDatabaseId)));
494 :
495 0 : provider = ((Form_pg_database) GETSTRUCT(dbtup))->datlocprovider;
496 :
15 dgustafsson 497 0 : datum = SysCacheGetAttrNotNull(DATABASEOID, dbtup,
498 : provider == COLLPROVIDER_ICU ?
499 : Anum_pg_database_daticulocale : Anum_pg_database_datcollate);
500 :
162 jdavis 501 0 : locale = TextDatumGetCString(datum);
502 :
503 0 : ReleaseSysCache(dbtup);
388 peter 504 EUB : }
505 : else
506 : {
507 : /* retrieve from pg_collation */
508 :
162 jdavis 509 UNC 0 : HeapTuple colltp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
510 0 : if (!HeapTupleIsValid(colltp))
511 0 : ereport(ERROR,
512 : (errcode(ERRCODE_UNDEFINED_OBJECT),
513 : errmsg("collation with OID %u does not exist", collid)));
162 jdavis 514 EUB :
162 jdavis 515 UNC 0 : provider = ((Form_pg_collation) GETSTRUCT(colltp))->collprovider;
516 0 : Assert(provider != COLLPROVIDER_DEFAULT);
15 dgustafsson 517 0 : datum = SysCacheGetAttrNotNull(COLLOID, colltp,
518 : provider == COLLPROVIDER_ICU ?
519 : Anum_pg_collation_colliculocale : Anum_pg_collation_collcollate);
520 :
162 jdavis 521 0 : locale = TextDatumGetCString(datum);
522 :
523 0 : ReleaseSysCache(colltp);
524 : }
525 :
526 0 : version = get_collation_actual_version(provider, locale);
2208 peter_e 527 UIC 0 : if (version)
2208 peter_e 528 UBC 0 : PG_RETURN_TEXT_P(cstring_to_text(version));
529 : else
2208 peter_e 530 UIC 0 : PG_RETURN_NULL();
531 : }
2208 peter_e 532 ECB :
533 :
534 : /* will we use "locale -a" in pg_import_system_collations? */
2101 tgl 535 : #if defined(HAVE_LOCALE_T) && !defined(WIN32)
536 : #define READ_LOCALE_A_OUTPUT
537 : #endif
538 :
539 : /* will we use EnumSystemLocalesEx in pg_import_system_collations? */
540 : #ifdef WIN32
541 : #define ENUM_SYSTEM_LOCALE
542 : #endif
543 :
544 :
2116 545 : #ifdef READ_LOCALE_A_OUTPUT
546 : /*
2208 peter_e 547 : * "Normalize" a libc locale name, stripping off encoding tags such as
2272 548 : * ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro"
549 : * -> "br_FR@euro"). Return true if a new, different name was
550 : * generated.
551 : */
552 : static bool
2208 peter_e 553 GIC 148167 : normalize_libc_locale_name(char *new, const char *old)
554 : {
2272 peter_e 555 GBC 148167 : char *n = new;
2272 peter_e 556 GIC 148167 : const char *o = old;
2272 peter_e 557 GBC 148167 : bool changed = false;
558 :
2272 peter_e 559 GIC 1018686 : while (*o)
560 : {
561 870519 : if (*o == '.')
562 : {
2272 peter_e 563 EUB : /* skip over encoding tag such as ".utf8" or ".UTF-8" */
2272 peter_e 564 GIC 48783 : o++;
565 196950 : while ((*o >= 'A' && *o <= 'Z')
566 253308 : || (*o >= 'a' && *o <= 'z')
2272 peter_e 567 GBC 105141 : || (*o >= '0' && *o <= '9')
568 302091 : || (*o == '-'))
569 204525 : o++;
2272 peter_e 570 GIC 48783 : changed = true;
571 : }
572 : else
2272 peter_e 573 GBC 821736 : *n++ = *o++;
574 : }
575 148167 : *n = '\0';
576 :
2272 peter_e 577 GIC 148167 : return changed;
578 : }
2272 peter_e 579 EUB :
580 : /*
2116 tgl 581 : * qsort comparator for CollAliasData items
582 : */
583 : static int
2116 tgl 584 GIC 244320 : cmpaliases(const void *a, const void *b)
585 : {
586 244320 : const CollAliasData *ca = (const CollAliasData *) a;
2116 tgl 587 GBC 244320 : const CollAliasData *cb = (const CollAliasData *) b;
2116 tgl 588 EUB :
589 : /* comparing localename is enough because other fields are derived */
2116 tgl 590 GIC 244320 : return strcmp(ca->localename, cb->localename);
591 : }
592 : #endif /* READ_LOCALE_A_OUTPUT */
2116 tgl 593 EUB :
2272 peter_e 594 :
2208 595 : #ifdef USE_ICU
596 : /*
597 : * Get a comment (specifically, the display name) for an ICU locale.
598 : * The result is a palloc'd string, or NULL if we can't get a comment
599 : * or find that it's not all ASCII. (We can *not* accept non-ASCII
600 : * comments, because the contents of template0 must be encoding-agnostic.)
601 : */
602 : static char *
2208 peter_e 603 GIC 244218 : get_icu_locale_comment(const char *localename)
604 : {
605 : UErrorCode status;
606 : UChar displayname[128];
607 : int32 len_uchar;
608 : int32 i;
609 : char *result;
610 :
2208 peter_e 611 CBC 244218 : status = U_ZERO_ERROR;
2116 tgl 612 GIC 244218 : len_uchar = uloc_getDisplayName(localename, "en",
2116 tgl 613 ECB : displayname, lengthof(displayname),
614 : &status);
2208 peter_e 615 CBC 244218 : if (U_FAILURE(status))
2115 tgl 616 UIC 0 : return NULL; /* no good reason to raise an error */
2115 tgl 617 ECB :
618 : /* Check for non-ASCII comment (can't use pg_is_ascii for this) */
2115 tgl 619 CBC 4152312 : for (i = 0; i < len_uchar; i++)
620 : {
2115 tgl 621 GIC 3912033 : if (displayname[i] > 127)
2115 tgl 622 CBC 3939 : return NULL;
2115 tgl 623 ECB : }
2208 peter_e 624 :
2115 tgl 625 : /* OK, transcribe */
2115 tgl 626 CBC 240279 : result = palloc(len_uchar + 1);
627 4112013 : for (i = 0; i < len_uchar; i++)
628 3871734 : result[i] = displayname[i];
2115 tgl 629 GIC 240279 : result[len_uchar] = '\0';
630 :
2208 peter_e 631 CBC 240279 : return result;
632 : }
2118 tgl 633 ECB : #endif /* USE_ICU */
634 :
2208 peter_e 635 :
636 : /*
637 : * Create a new collation using the input locale 'locale'. (subroutine for
638 : * pg_import_system_collations())
639 : *
640 : * 'nspid' is the namespace id where the collation will be created.
641 : *
642 : * 'nvalidp' is incremented if the locale has a valid encoding.
643 : *
644 : * 'ncreatedp' is incremented if the collation is actually created. If the
645 : * collation already exists it will quietly do nothing.
646 : *
647 : * The returned value is the encoding of the locale, -1 if the locale is not
648 : * valid for creating a collation.
649 : *
650 : */
651 : pg_attribute_unused()
652 : static int
96 peter 653 GNC 152106 : create_collation_from_locale(const char *locale, int nspid,
654 : int *nvalidp, int *ncreatedp)
655 : {
656 : int enc;
657 : Oid collid;
658 :
659 : /*
660 : * Some systems have locale names that don't consist entirely of
661 : * ASCII letters (such as "bokmål" or "français").
662 : * This is pretty silly, since we need the locale itself to
663 : * interpret the non-ASCII characters. We can't do much with
664 : * those, so we filter them out.
665 : */
666 152106 : if (!pg_is_ascii(locale))
667 : {
96 peter 668 UNC 0 : elog(DEBUG1, "skipping locale with non-ASCII name: \"%s\"", locale);
669 0 : return -1;
670 : }
671 :
96 peter 672 GNC 152106 : enc = pg_get_encoding_from_locale(locale, false);
673 152106 : if (enc < 0)
674 : {
675 1818 : elog(DEBUG1, "skipping locale with unrecognized encoding: \"%s\"", locale);
676 1818 : return -1;
677 : }
678 150288 : if (!PG_VALID_BE_ENCODING(enc))
679 : {
680 1515 : elog(DEBUG1, "skipping locale with client-only encoding: \"%s\"", locale);
681 1515 : return -1;
682 : }
683 148773 : if (enc == PG_SQL_ASCII)
684 606 : return -1; /* C/POSIX are already in the catalog */
685 :
686 : /* count valid locales found in operating system */
687 148167 : (*nvalidp)++;
688 :
689 : /*
690 : * Create a collation named the same as the locale, but quietly
691 : * doing nothing if it already exists. This is the behavior we
692 : * need even at initdb time, because some versions of "locale -a"
693 : * can report the same locale name more than once. And it's
694 : * convenient for later import runs, too, since you just about
695 : * always want to add on new locales without a lot of chatter
696 : * about existing ones.
697 : */
698 148167 : collid = CollationCreate(locale, nspid, GetUserId(),
699 : COLLPROVIDER_LIBC, true, enc,
700 : locale, locale, NULL, NULL,
701 148167 : get_collation_actual_version(COLLPROVIDER_LIBC, locale),
702 : true, true);
703 148167 : if (OidIsValid(collid))
704 : {
705 148167 : (*ncreatedp)++;
706 :
707 : /* Must do CCI between inserts to handle duplicates correctly */
708 148167 : CommandCounterIncrement();
709 : }
710 :
711 148167 : return enc;
712 : }
713 :
714 :
715 : #ifdef ENUM_SYSTEM_LOCALE
716 : /* parameter to be passed to the callback function win32_read_locale() */
717 : typedef struct
718 : {
719 : Oid nspid;
720 : int *ncreatedp;
721 : int *nvalidp;
722 : } CollParam;
723 :
724 : /*
725 : * Callback function for EnumSystemLocalesEx() in
726 : * pg_import_system_collations(). Creates a collation for every valid locale
727 : * and a POSIX alias collation.
728 : *
729 : * The callback contract is to return TRUE to continue enumerating and FALSE
730 : * to stop enumerating. We always want to continue.
731 : */
732 : static BOOL CALLBACK
733 : win32_read_locale(LPWSTR pStr, DWORD dwFlags, LPARAM lparam)
734 : {
735 : CollParam *param = (CollParam *) lparam;
736 : char localebuf[NAMEDATALEN];
737 : int result;
738 : int enc;
739 :
740 : (void) dwFlags;
741 :
742 : result = WideCharToMultiByte(CP_ACP, 0, pStr, -1, localebuf, NAMEDATALEN,
743 : NULL, NULL);
744 :
745 : if (result == 0)
746 : {
747 : if (GetLastError() == ERROR_INSUFFICIENT_BUFFER)
748 : elog(DEBUG1, "skipping locale with too-long name: \"%s\"", localebuf);
749 : return TRUE;
750 : }
751 : if (localebuf[0] == '\0')
752 : return TRUE;
753 :
754 : enc = create_collation_from_locale(localebuf, param->nspid,
755 : param->nvalidp, param->ncreatedp);
756 : if (enc < 0)
757 : return TRUE;
758 :
759 : /*
760 : * Windows will use hyphens between language and territory, where POSIX
761 : * uses an underscore. Simply create a POSIX alias.
762 : */
763 : if (strchr(localebuf, '-'))
764 : {
765 : char alias[NAMEDATALEN];
766 : Oid collid;
767 :
768 : strcpy(alias, localebuf);
769 : for (char *p = alias; *p; p++)
770 : if (*p == '-')
771 : *p = '_';
772 :
773 : collid = CollationCreate(alias, param->nspid, GetUserId(),
774 : COLLPROVIDER_LIBC, true, enc,
775 : localebuf, localebuf, NULL, NULL,
776 : get_collation_actual_version(COLLPROVIDER_LIBC, localebuf),
777 : true, true);
778 : if (OidIsValid(collid))
779 : {
780 : (*param->ncreatedp)++;
781 :
782 : CommandCounterIncrement();
783 : }
784 : }
785 :
786 : return TRUE;
787 : }
788 : #endif /* ENUM_SYSTEM_LOCALE */
789 :
790 :
791 : /*
792 : * pg_import_system_collations: add known system collations to pg_collation
793 : */
794 : Datum
2272 peter_e 795 GIC 303 : pg_import_system_collations(PG_FUNCTION_ARGS)
796 : {
2116 tgl 797 CBC 303 : Oid nspid = PG_GETARG_OID(0);
2116 tgl 798 GIC 303 : int ncreated = 0;
2272 peter_e 799 ECB :
2272 peter_e 800 CBC 303 : if (!superuser())
2272 peter_e 801 UIC 0 : ereport(ERROR,
802 : (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
1165 alvherre 803 ECB : errmsg("must be superuser to import system collations")));
804 :
762 tgl 805 GIC 303 : if (!SearchSysCacheExists1(NAMESPACEOID, ObjectIdGetDatum(nspid)))
762 tgl 806 UIC 0 : ereport(ERROR,
807 : (errcode(ERRCODE_UNDEFINED_SCHEMA),
808 : errmsg("schema with OID %u does not exist", nspid)));
809 :
810 : /* Load collations known to libc, using "locale -a" to enumerate them */
811 : #ifdef READ_LOCALE_A_OUTPUT
812 : {
813 : FILE *locale_a_handle;
814 : char localebuf[LOCALE_NAME_BUFLEN];
2116 tgl 815 GIC 303 : int nvalid = 0;
2116 tgl 816 ECB : Oid collid;
817 : CollAliasData *aliases;
818 : int naliases,
819 : maxaliases,
820 : i;
821 :
822 : /* expansible array of aliases */
2116 tgl 823 GIC 303 : maxaliases = 100;
2116 tgl 824 CBC 303 : aliases = (CollAliasData *) palloc(maxaliases * sizeof(CollAliasData));
825 303 : naliases = 0;
826 :
2116 tgl 827 GIC 303 : locale_a_handle = OpenPipeStream("locale -a", "r");
2116 tgl 828 CBC 303 : if (locale_a_handle == NULL)
2116 tgl 829 UBC 0 : ereport(ERROR,
830 : (errcode_for_file_access(),
831 : errmsg("could not execute command \"%s\": %m",
2116 tgl 832 ECB : "locale -a")));
833 :
2116 tgl 834 CBC 152409 : while (fgets(localebuf, sizeof(localebuf), locale_a_handle))
2116 tgl 835 ECB : {
836 : size_t len;
837 : int enc;
838 : char alias[LOCALE_NAME_BUFLEN];
2272 peter_e 839 :
2116 tgl 840 CBC 152106 : len = strlen(localebuf);
2272 peter_e 841 ECB :
2116 tgl 842 CBC 152106 : if (len == 0 || localebuf[len - 1] != '\n')
843 : {
572 tgl 844 LBC 0 : elog(DEBUG1, "skipping locale with too-long name: \"%s\"", localebuf);
2116 tgl 845 GIC 3939 : continue;
846 : }
847 152106 : localebuf[len - 1] = '\0';
848 :
96 peter 849 GNC 152106 : enc = create_collation_from_locale(localebuf, nspid, &nvalid, &ncreated);
2116 tgl 850 GIC 152106 : if (enc < 0)
572 851 3939 : continue;
852 :
853 : /*
854 : * Generate aliases such as "en_US" in addition to "en_US.utf8"
855 : * for ease of use. Note that collation names are unique per
856 : * encoding only, so this doesn't clash with "en_US" for LATIN1,
857 : * say.
858 : *
859 : * However, it might conflict with a name we'll see later in the
860 : * "locale -a" output. So save up the aliases and try to add them
861 : * after we've read all the output.
2116 tgl 862 ECB : */
2116 tgl 863 GIC 148167 : if (normalize_libc_locale_name(alias, localebuf))
864 : {
2116 tgl 865 CBC 48783 : if (naliases >= maxaliases)
866 : {
867 303 : maxaliases *= 2;
868 : aliases = (CollAliasData *)
869 303 : repalloc(aliases, maxaliases * sizeof(CollAliasData));
870 : }
2116 tgl 871 GIC 48783 : aliases[naliases].localename = pstrdup(localebuf);
2116 tgl 872 CBC 48783 : aliases[naliases].alias = pstrdup(alias);
2116 tgl 873 GIC 48783 : aliases[naliases].enc = enc;
874 48783 : naliases++;
2116 tgl 875 ECB : }
876 : }
877 :
878 : /*
879 : * We don't check the return value of this, because we want to support
880 : * the case where there "locale" command does not exist. (This is
881 : * unusual but can happen on minimalized Linux distributions, for
882 : * example.) We will warn below if no locales could be found.
883 : */
143 peter 884 GIC 303 : ClosePipeStream(locale_a_handle);
885 :
886 : /*
887 : * Before processing the aliases, sort them by locale name. The point
888 : * here is that if "locale -a" gives us multiple locale names with the
889 : * same encoding and base name, say "en_US.utf8" and "en_US.utf-8", we
890 : * want to pick a deterministic one of them. First in ASCII sort
891 : * order is a good enough rule. (Before PG 10, the code corresponding
892 : * to this logic in initdb.c had an additional ordering rule, to
893 : * prefer the locale name exactly matching the alias, if any. We
894 : * don't need to consider that here, because we would have already
895 : * created such a pg_collation entry above, and that one will win.)
896 : */
2116 tgl 897 303 : if (naliases > 1)
61 peter 898 GNC 303 : qsort(aliases, naliases, sizeof(CollAliasData), cmpaliases);
899 :
900 : /* Now add aliases, ignoring any that match pre-existing entries */
2116 tgl 901 GIC 49086 : for (i = 0; i < naliases; i++)
902 : {
903 48783 : char *locale = aliases[i].localename;
904 48783 : char *alias = aliases[i].alias;
905 48783 : int enc = aliases[i].enc;
906 :
907 48783 : collid = CollationCreate(alias, nspid, GetUserId(),
908 : COLLPROVIDER_LIBC, true, enc,
909 : locale, locale, NULL, NULL,
702 tmunro 910 48783 : get_collation_actual_version(COLLPROVIDER_LIBC, locale),
911 : true, true);
2116 tgl 912 48783 : if (OidIsValid(collid))
913 : {
914 48480 : ncreated++;
915 :
916 48480 : CommandCounterIncrement();
917 : }
918 : }
919 :
920 : /* Give a warning if "locale -a" seems to be malfunctioning */
921 303 : if (nvalid == 0)
2116 tgl 922 UIC 0 : ereport(WARNING,
923 : (errmsg("no usable system locales were found")));
924 : }
925 : #endif /* READ_LOCALE_A_OUTPUT */
926 :
927 : /*
928 : * Load collations known to ICU
929 : *
930 : * We use uloc_countAvailable()/uloc_getAvailable() rather than
931 : * ucol_countAvailable()/ucol_getAvailable(). The former returns a full
932 : * set of language+region combinations, whereas the latter only returns
933 : * language+region combinations if they are distinct from the language's
934 : * base collation. So there might not be a de-DE or en-GB, which would be
935 : * confusing.
936 : */
937 : #ifdef USE_ICU
938 : {
939 : int i;
940 :
941 : /*
942 : * Start the loop at -1 to sneak in the root locale without too much
943 : * code duplication.
944 : */
2057 peter_e 945 GIC 244521 : for (i = -1; i < uloc_countAvailable(); i++)
946 : {
947 : const char *name;
948 : char *langtag;
949 : char *icucomment;
950 : Oid collid;
951 :
2208 952 244218 : if (i == -1)
2153 bruce 953 303 : name = ""; /* ICU root locale */
954 : else
2057 peter_e 955 243915 : name = uloc_getAvailable(i);
956 :
5 jdavis 957 GNC 244218 : langtag = icu_language_tag(name, ERROR);
958 :
959 : /*
960 : * Be paranoid about not allowing any non-ASCII strings into
961 : * pg_collation
962 : */
19 963 244218 : if (!pg_is_ascii(langtag))
2115 tgl 964 UIC 0 : continue;
2115 tgl 965 ECB :
2208 peter_e 966 CBC 244218 : collid = CollationCreate(psprintf("%s-x-icu", langtag),
967 : nspid, GetUserId(),
1479 peter 968 ECB : COLLPROVIDER_ICU, true, -1,
969 : NULL, NULL, langtag, NULL,
19 jdavis 970 GNC 244218 : get_collation_actual_version(COLLPROVIDER_ICU, langtag),
971 : true, true);
2116 tgl 972 GIC 244218 : if (OidIsValid(collid))
2116 tgl 973 ECB : {
2116 tgl 974 GBC 244218 : ncreated++;
975 :
2116 tgl 976 GIC 244218 : CommandCounterIncrement();
977 :
2115 978 244218 : icucomment = get_icu_locale_comment(name);
979 244218 : if (icucomment)
980 240279 : CreateComments(collid, CollationRelationId, 0,
981 : icucomment);
982 : }
2208 peter_e 983 ECB : }
984 : }
985 : #endif /* USE_ICU */
986 :
987 : /* Load collations known to WIN32 */
988 : #ifdef ENUM_SYSTEM_LOCALE
989 : {
990 : int nvalid = 0;
991 : CollParam param;
992 :
993 : param.nspid = nspid;
994 : param.ncreatedp = &ncreated;
995 : param.nvalidp = &nvalid;
996 :
997 : /*
998 : * Enumerate the locales that are either installed on or supported
999 : * by the OS.
1000 : */
1001 : if (!EnumSystemLocalesEx(win32_read_locale, LOCALE_ALL,
1002 : (LPARAM) ¶m, NULL))
1003 : _dosmaperr(GetLastError());
1004 :
1005 : /* Give a warning if EnumSystemLocalesEx seems to be malfunctioning */
1006 : if (nvalid == 0)
1007 : ereport(WARNING,
1008 : (errmsg("no usable system locales were found")));
1009 : }
1010 : #endif /* ENUM_SYSTEM_LOCALE */
1011 :
2116 tgl 1012 GIC 303 : PG_RETURN_INT32(ncreated);
1013 : }
|