Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * ts_utils.c
4 : * various support functions
5 : *
6 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7 : *
8 : *
9 : * IDENTIFICATION
10 : * src/backend/tsearch/ts_utils.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : #include "postgres.h"
16 :
17 : #include <ctype.h>
18 :
19 : #include "miscadmin.h"
20 : #include "tsearch/ts_locale.h"
21 : #include "tsearch/ts_utils.h"
22 :
23 :
24 : /*
25 : * Given the base name and extension of a tsearch config file, return
26 : * its full path name. The base name is assumed to be user-supplied,
27 : * and is checked to prevent pathname attacks. The extension is assumed
28 : * to be safe.
29 : *
30 : * The result is a palloc'd string.
31 : */
32 : char *
5710 tgl 33 CBC 192 : get_tsearch_config_filename(const char *basename,
34 : const char *extension)
35 : {
36 : char sharepath[MAXPGPATH];
37 : char *result;
38 :
39 : /*
40 : * We limit the basename to contain a-z, 0-9, and underscores. This may
41 : * be overly restrictive, but we don't want to allow access to anything
42 : * outside the tsearch_data directory, so for instance '/' *must* be
43 : * rejected, and on some platforms '\' and ':' are risky as well. Allowing
44 : * uppercase might result in incompatible behavior between case-sensitive
45 : * and case-insensitive filesystems, and non-ASCII characters create other
46 : * interesting risks, so on the whole a tight policy seems best.
47 : */
5696 48 192 : if (strspn(basename, "abcdefghijklmnopqrstuvwxyz0123456789_") != strlen(basename))
5696 tgl 49 UBC 0 : ereport(ERROR,
50 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
51 : errmsg("invalid text search configuration file name \"%s\"",
52 : basename)));
53 :
5710 tgl 54 CBC 192 : get_share_path(my_exec_path, sharepath);
55 192 : result = palloc(MAXPGPATH);
56 192 : snprintf(result, MAXPGPATH, "%s/tsearch_data/%s.%s",
57 : sharepath, basename, extension);
58 :
59 192 : return result;
60 : }
61 :
62 : /*
63 : * Reads a stop-word file. Each word is run through 'wordop'
64 : * function, if given. wordop may either modify the input in-place,
65 : * or palloc a new version.
66 : */
67 : void
5624 bruce 68 19 : readstoplist(const char *fname, StopList *s, char *(*wordop) (const char *))
69 : {
5710 tgl 70 19 : char **stop = NULL;
71 :
72 19 : s->len = 0;
5706 73 19 : if (fname && *fname)
74 : {
75 19 : char *filename = get_tsearch_config_filename(fname, "stop");
76 : tsearch_readline_state trst;
77 : char *line;
5710 78 19 : int reallen = 0;
79 :
5408 80 19 : if (!tsearch_readline_begin(&trst, filename))
5710 tgl 81 UBC 0 : ereport(ERROR,
82 : (errcode(ERRCODE_CONFIG_FILE_ERROR),
83 : errmsg("could not open stop-word file \"%s\": %m",
84 : filename)));
85 :
5408 tgl 86 CBC 2432 : while ((line = tsearch_readline(&trst)) != NULL)
87 : {
5624 bruce 88 2413 : char *pbuf = line;
89 :
90 : /* Trim trailing space */
5706 tgl 91 11818 : while (*pbuf && !t_isspace(pbuf))
5407 92 9405 : pbuf += pg_mblen(pbuf);
5710 93 2413 : *pbuf = '\0';
94 :
95 : /* Skip empty lines */
5706 96 2413 : if (*line == '\0')
97 : {
5706 tgl 98 UBC 0 : pfree(line);
99 0 : continue;
100 : }
101 :
5710 tgl 102 CBC 2413 : if (s->len >= reallen)
103 : {
104 38 : if (reallen == 0)
105 : {
5706 106 19 : reallen = 64;
5710 107 19 : stop = (char **) palloc(sizeof(char *) * reallen);
108 : }
109 : else
110 : {
111 19 : reallen *= 2;
61 peter 112 GNC 19 : stop = (char **) repalloc(stop, sizeof(char *) * reallen);
113 : }
114 : }
5710 tgl 115 ECB :
5706 tgl 116 GIC 2413 : if (wordop)
5706 tgl 117 ECB : {
5706 tgl 118 CBC 2413 : stop[s->len] = wordop(line);
119 2413 : if (stop[s->len] != line)
5706 tgl 120 GIC 2413 : pfree(line);
121 : }
5710 tgl 122 EUB : else
5706 tgl 123 UIC 0 : stop[s->len] = line;
5710 tgl 124 ECB :
5710 tgl 125 GIC 2413 : (s->len)++;
126 : }
5706 tgl 127 ECB :
5408 tgl 128 CBC 19 : tsearch_readline_end(&trst);
5710 tgl 129 GIC 19 : pfree(filename);
130 : }
5710 tgl 131 ECB :
5710 tgl 132 GIC 19 : s->stop = stop;
133 :
5706 tgl 134 ECB : /* Sort to allow binary searching */
5710 tgl 135 CBC 19 : if (s->stop && s->len > 0)
3915 rhaas 136 19 : qsort(s->stop, s->len, sizeof(char *), pg_qsort_strcmp);
5710 tgl 137 GIC 19 : }
138 :
5710 tgl 139 ECB : bool
5624 bruce 140 GIC 7629 : searchstoplist(StopList *s, char *key)
5710 tgl 141 ECB : {
5710 tgl 142 CBC 12764 : return (s->stop && s->len > 0 &&
5710 tgl 143 GIC 5135 : bsearch(&key, s->stop, s->len,
144 : sizeof(char *), pg_qsort_strcmp));
145 : }
|