summaryrefslogtreecommitdiff
path: root/contrib
diff options
context:
space:
mode:
authorTom Lane2014-07-01 00:51:26 +0000
committerTom Lane2014-07-01 00:51:30 +0000
commit97c40ce61465582b96944e41ed6ec06c2016b95c (patch)
tree16f8fc36e2d2ae810f2e5ecba457b69826944298 /contrib
parent55863274d98556acf57013f64f545d9a1e640bba (diff)
Allow empty replacement strings in contrib/unaccent.
This is useful in languages where diacritic signs are represented as separate characters; it's also one step towards letting unaccent be used for arbitrary substring substitutions. In passing, improve the user documentation for unaccent, which was sadly vague about some important details. Mohammad Alhashash, reviewed by Abhijit Menon-Sen
Diffstat (limited to 'contrib')
-rw-r--r--contrib/unaccent/unaccent.c29
1 files changed, 23 insertions, 6 deletions
diff --git a/contrib/unaccent/unaccent.c b/contrib/unaccent/unaccent.c
index a337df61af4..5a31f85a132 100644
--- a/contrib/unaccent/unaccent.c
+++ b/contrib/unaccent/unaccent.c
@@ -104,11 +104,21 @@ initTrie(char *filename)
while ((line = tsearch_readline(&trst)) != NULL)
{
- /*
- * The format of each line must be "src trg" where src and trg
- * are sequences of one or more non-whitespace characters,
- * separated by whitespace. Whitespace at start or end of
- * line is ignored.
+ /*----------
+ * The format of each line must be "src" or "src trg", where
+ * src and trg are sequences of one or more non-whitespace
+ * characters, separated by whitespace. Whitespace at start
+ * or end of line is ignored. If trg is omitted, an empty
+ * string is used as the replacement.
+ *
+ * We use a simple state machine, with states
+ * 0 initial (before src)
+ * 1 in src
+ * 2 in whitespace after src
+ * 3 in trg
+ * 4 in whitespace after trg
+ * -1 syntax error detected (line will be ignored)
+ *----------
*/
int state;
char *ptr;
@@ -160,7 +170,14 @@ initTrie(char *filename)
}
}
- if (state >= 3)
+ if (state == 1 || state == 2)
+ {
+ /* trg was omitted, so use "" */
+ trg = "";
+ trglen = 0;
+ }
+
+ if (state > 0)
rootTrie = placeChar(rootTrie,
(unsigned char *) src, srclen,
trg, trglen);