summaryrefslogtreecommitdiff
path: root/contrib/unaccent/unaccent.c
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/unaccent/unaccent.c')
-rw-r--r--contrib/unaccent/unaccent.c65
1 files changed, 31 insertions, 34 deletions
diff --git a/contrib/unaccent/unaccent.c b/contrib/unaccent/unaccent.c
index 262d5ec15f..a337df61af 100644
--- a/contrib/unaccent/unaccent.c
+++ b/contrib/unaccent/unaccent.c
@@ -3,7 +3,7 @@
* unaccent.c
* Text search unaccent dictionary
*
- * Copyright (c) 2009-2012, PostgreSQL Global Development Group
+ * Copyright (c) 2009-2014, PostgreSQL Global Development Group
*
* IDENTIFICATION
* contrib/unaccent/unaccent.c
@@ -23,30 +23,29 @@
PG_MODULE_MAGIC;
/*
- * Unaccent dictionary uses uncompressed suffix tree to find a
- * character to replace. Each node of tree is an array of
- * SuffixChar struct with length = 256 (n-th element of array
+ * Unaccent dictionary uses a trie to find a character to replace. Each node of
+ * the trie is an array of 256 TrieChar structs (n-th element of array
* corresponds to byte)
*/
-typedef struct SuffixChar
+typedef struct TrieChar
{
- struct SuffixChar *nextChar;
+ struct TrieChar *nextChar;
char *replaceTo;
int replacelen;
-} SuffixChar;
+} TrieChar;
/*
- * placeChar - put str into tree's structure, byte by byte.
+ * placeChar - put str into trie's structure, byte by byte.
*/
-static SuffixChar *
-placeChar(SuffixChar *node, unsigned char *str, int lenstr, char *replaceTo, int replacelen)
+static TrieChar *
+placeChar(TrieChar *node, unsigned char *str, int lenstr, char *replaceTo, int replacelen)
{
- SuffixChar *curnode;
+ TrieChar *curnode;
if (!node)
{
- node = palloc(sizeof(SuffixChar) * 256);
- memset(node, 0, sizeof(SuffixChar) * 256);
+ node = palloc(sizeof(TrieChar) * 256);
+ memset(node, 0, sizeof(TrieChar) * 256);
}
curnode = node + *str;
@@ -71,13 +70,14 @@ placeChar(SuffixChar *node, unsigned char *str, int lenstr, char *replaceTo, int
}
/*
- * initSuffixTree - create suffix tree from file. Function converts
- * UTF8-encoded file into current encoding.
+ * initTrie - create trie from file.
+ *
+ * Function converts UTF8-encoded file into current encoding.
*/
-static SuffixChar *
-initSuffixTree(char *filename)
+static TrieChar *
+initTrie(char *filename)
{
- SuffixChar *volatile rootSuffixTree = NULL;
+ TrieChar *volatile rootTrie = NULL;
MemoryContext ccxt = CurrentMemoryContext;
tsearch_readline_state trst;
volatile bool skip;
@@ -161,9 +161,9 @@ initSuffixTree(char *filename)
}
if (state >= 3)
- rootSuffixTree = placeChar(rootSuffixTree,
- (unsigned char *) src, srclen,
- trg, trglen);
+ rootTrie = placeChar(rootTrie,
+ (unsigned char *) src, srclen,
+ trg, trglen);
pfree(line);
}
@@ -192,14 +192,14 @@ initSuffixTree(char *filename)
tsearch_readline_end(&trst);
- return rootSuffixTree;
+ return rootTrie;
}
/*
- * findReplaceTo - find multibyte character in tree
+ * findReplaceTo - find multibyte character in trie
*/
-static SuffixChar *
-findReplaceTo(SuffixChar *node, unsigned char *src, int srclen)
+static TrieChar *
+findReplaceTo(TrieChar *node, unsigned char *src, int srclen)
{
while (node)
{
@@ -216,12 +216,11 @@ findReplaceTo(SuffixChar *node, unsigned char *src, int srclen)
}
PG_FUNCTION_INFO_V1(unaccent_init);
-Datum unaccent_init(PG_FUNCTION_ARGS);
Datum
unaccent_init(PG_FUNCTION_ARGS)
{
List *dictoptions = (List *) PG_GETARG_POINTER(0);
- SuffixChar *rootSuffixTree = NULL;
+ TrieChar *rootTrie = NULL;
bool fileloaded = false;
ListCell *l;
@@ -235,7 +234,7 @@ unaccent_init(PG_FUNCTION_ARGS)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("multiple Rules parameters")));
- rootSuffixTree = initSuffixTree(defGetString(defel));
+ rootTrie = initTrie(defGetString(defel));
fileloaded = true;
}
else
@@ -254,29 +253,28 @@ unaccent_init(PG_FUNCTION_ARGS)
errmsg("missing Rules parameter")));
}
- PG_RETURN_POINTER(rootSuffixTree);
+ PG_RETURN_POINTER(rootTrie);
}
PG_FUNCTION_INFO_V1(unaccent_lexize);
-Datum unaccent_lexize(PG_FUNCTION_ARGS);
Datum
unaccent_lexize(PG_FUNCTION_ARGS)
{
- SuffixChar *rootSuffixTree = (SuffixChar *) PG_GETARG_POINTER(0);
+ TrieChar *rootTrie = (TrieChar *) PG_GETARG_POINTER(0);
char *srcchar = (char *) PG_GETARG_POINTER(1);
int32 len = PG_GETARG_INT32(2);
char *srcstart,
*trgchar = NULL;
int charlen;
TSLexeme *res = NULL;
- SuffixChar *node;
+ TrieChar *node;
srcstart = srcchar;
while (srcchar - srcstart < len)
{
charlen = pg_mblen(srcchar);
- node = findReplaceTo(rootSuffixTree, (unsigned char *) srcchar, charlen);
+ node = findReplaceTo(rootTrie, (unsigned char *) srcchar, charlen);
if (node && node->replaceTo)
{
if (!res)
@@ -313,7 +311,6 @@ unaccent_lexize(PG_FUNCTION_ARGS)
* Function-like wrapper for dictionary
*/
PG_FUNCTION_INFO_V1(unaccent_dict);
-Datum unaccent_dict(PG_FUNCTION_ARGS);
Datum
unaccent_dict(PG_FUNCTION_ARGS)
{