PostgreSQL Source Code git master
dict_synonym.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * dict_synonym.c
4 * Synonym dictionary: replace word by its synonym
5 *
6 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 *
8 *
9 * IDENTIFICATION
10 * src/backend/tsearch/dict_synonym.c
11 *
12 *-------------------------------------------------------------------------
13 */
14#include "postgres.h"
15
16#include "catalog/pg_collation_d.h"
17#include "commands/defrem.h"
18#include "tsearch/ts_locale.h"
19#include "tsearch/ts_public.h"
20#include "utils/fmgrprotos.h"
21#include "utils/formatting.h"
22
23typedef struct
24{
25 char *in;
26 char *out;
27 int outlen;
29} Syn;
30
31typedef struct
32{
33 int len; /* length of syn array */
34 Syn *syn;
36} DictSyn;
37
38/*
39 * Finds the next whitespace-delimited word within the 'in' string.
40 * Returns a pointer to the first character of the word, and a pointer
41 * to the next byte after the last character in the word (in *end).
42 * Character '*' at the end of word will not be treated as word
43 * character if flags is not null.
44 */
45static char *
46findwrd(char *in, char **end, uint16 *flags)
47{
48 char *start;
49 char *lastchar;
50
51 /* Skip leading spaces */
52 while (*in && isspace((unsigned char) *in))
53 in += pg_mblen(in);
54
55 /* Return NULL on empty lines */
56 if (*in == '\0')
57 {
58 *end = NULL;
59 return NULL;
60 }
61
62 lastchar = start = in;
63
64 /* Find end of word */
65 while (*in && !isspace((unsigned char) *in))
66 {
67 lastchar = in;
68 in += pg_mblen(in);
69 }
70
71 if (in - lastchar == 1 && t_iseq(lastchar, '*') && flags)
72 {
73 *flags = TSL_PREFIX;
74 *end = lastchar;
75 }
76 else
77 {
78 if (flags)
79 *flags = 0;
80 *end = in;
81 }
82
83 return start;
84}
85
86static int
87compareSyn(const void *a, const void *b)
88{
89 return strcmp(((const Syn *) a)->in, ((const Syn *) b)->in);
90}
91
92
95{
96 List *dictoptions = (List *) PG_GETARG_POINTER(0);
97 DictSyn *d;
98 ListCell *l;
99 char *filename = NULL;
100 bool case_sensitive = false;
102 char *starti,
103 *starto,
104 *end = NULL;
105 int cur = 0;
106 char *line = NULL;
107 uint16 flags = 0;
108
109 foreach(l, dictoptions)
110 {
111 DefElem *defel = (DefElem *) lfirst(l);
112
113 if (strcmp(defel->defname, "synonyms") == 0)
114 filename = defGetString(defel);
115 else if (strcmp(defel->defname, "casesensitive") == 0)
116 case_sensitive = defGetBoolean(defel);
117 else
119 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
120 errmsg("unrecognized synonym parameter: \"%s\"",
121 defel->defname)));
122 }
123
124 if (!filename)
126 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
127 errmsg("missing Synonyms parameter")));
128
130
131 if (!tsearch_readline_begin(&trst, filename))
133 (errcode(ERRCODE_CONFIG_FILE_ERROR),
134 errmsg("could not open synonym file \"%s\": %m",
135 filename)));
136
137 d = (DictSyn *) palloc0(sizeof(DictSyn));
138
139 while ((line = tsearch_readline(&trst)) != NULL)
140 {
141 starti = findwrd(line, &end, NULL);
142 if (!starti)
143 {
144 /* Empty line */
145 goto skipline;
146 }
147 if (*end == '\0')
148 {
149 /* A line with only one word. Ignore silently. */
150 goto skipline;
151 }
152 *end = '\0';
153
154 starto = findwrd(end + 1, &end, &flags);
155 if (!starto)
156 {
157 /* A line with only one word (+whitespace). Ignore silently. */
158 goto skipline;
159 }
160 *end = '\0';
161
162 /*
163 * starti now points to the first word, and starto to the second word
164 * on the line, with a \0 terminator at the end of both words.
165 */
166
167 if (cur >= d->len)
168 {
169 if (d->len == 0)
170 {
171 d->len = 64;
172 d->syn = (Syn *) palloc(sizeof(Syn) * d->len);
173 }
174 else
175 {
176 d->len *= 2;
177 d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len);
178 }
179 }
180
181 if (case_sensitive)
182 {
183 d->syn[cur].in = pstrdup(starti);
184 d->syn[cur].out = pstrdup(starto);
185 }
186 else
187 {
188 d->syn[cur].in = str_tolower(starti, strlen(starti), DEFAULT_COLLATION_OID);
189 d->syn[cur].out = str_tolower(starto, strlen(starto), DEFAULT_COLLATION_OID);
190 }
191
192 d->syn[cur].outlen = strlen(starto);
193 d->syn[cur].flags = flags;
194
195 cur++;
196
197skipline:
198 pfree(line);
199 }
200
202
203 d->len = cur;
204 qsort(d->syn, d->len, sizeof(Syn), compareSyn);
205
206 d->case_sensitive = case_sensitive;
207
209}
210
211Datum
213{
215 char *in = (char *) PG_GETARG_POINTER(1);
217 Syn key,
218 *found;
219 TSLexeme *res;
220
221 /* note: d->len test protects against Solaris bsearch-of-no-items bug */
222 if (len <= 0 || d->len <= 0)
223 PG_RETURN_POINTER(NULL);
224
225 if (d->case_sensitive)
226 key.in = pnstrdup(in, len);
227 else
228 key.in = str_tolower(in, len, DEFAULT_COLLATION_OID);
229
230 key.out = NULL;
231
232 found = (Syn *) bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn);
233 pfree(key.in);
234
235 if (!found)
236 PG_RETURN_POINTER(NULL);
237
238 res = palloc0(sizeof(TSLexeme) * 2);
239 res[0].lexeme = pnstrdup(found->out, found->outlen);
240 res[0].flags = found->flags;
241
243}
int32_t int32
Definition: c.h:498
uint16_t uint16
Definition: c.h:501
char * defGetString(DefElem *def)
Definition: define.c:35
bool defGetBoolean(DefElem *def)
Definition: define.c:94
static int compareSyn(const void *a, const void *b)
Definition: dict_synonym.c:87
static char * findwrd(char *in, char **end, uint16 *flags)
Definition: dict_synonym.c:46
Datum dsynonym_lexize(PG_FUNCTION_ARGS)
Definition: dict_synonym.c:212
Datum dsynonym_init(PG_FUNCTION_ARGS)
Definition: dict_synonym.c:94
struct cursor * cur
Definition: ecpg.c:29
int errcode(int sqlerrcode)
Definition: elog.c:854
int errmsg(const char *fmt,...)
Definition: elog.c:1071
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
char * str_tolower(const char *buff, size_t nbytes, Oid collid)
Definition: formatting.c:1637
return str start
int b
Definition: isn.c:74
int a
Definition: isn.c:73
int pg_mblen(const char *mbstr)
Definition: mbutils.c:1023
char * pstrdup(const char *in)
Definition: mcxt.c:2325
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:2170
void pfree(void *pointer)
Definition: mcxt.c:2150
void * palloc0(Size size)
Definition: mcxt.c:1973
void * palloc(Size size)
Definition: mcxt.c:1943
char * pnstrdup(const char *in, Size len)
Definition: mcxt.c:2336
const void size_t len
static char * filename
Definition: pg_dumpall.c:123
#define lfirst(lc)
Definition: pg_list.h:172
#define qsort(a, b, c, d)
Definition: port.h:479
uintptr_t Datum
Definition: postgres.h:69
char * defname
Definition: parsenodes.h:826
bool case_sensitive
Definition: dict_synonym.c:35
Syn * syn
Definition: dict_xsyn.c:38
int len
Definition: dict_xsyn.c:37
Definition: pg_list.h:54
Definition: dict_xsyn.c:29
uint16 flags
Definition: dict_synonym.c:28
char * in
Definition: dict_synonym.c:25
char * out
Definition: dict_synonym.c:26
int outlen
Definition: dict_synonym.c:27
char * lexeme
Definition: ts_public.h:138
uint16 flags
Definition: ts_public.h:136
bool tsearch_readline_begin(tsearch_readline_state *stp, const char *filename)
Definition: ts_locale.c:89
char * tsearch_readline(tsearch_readline_state *stp)
Definition: ts_locale.c:112
void tsearch_readline_end(tsearch_readline_state *stp)
Definition: ts_locale.c:157
#define t_iseq(x, c)
Definition: ts_locale.h:38
#define TSL_PREFIX
Definition: ts_public.h:143
char * get_tsearch_config_filename(const char *basename, const char *extension)
Definition: ts_utils.c:34