diff options
Diffstat (limited to 'contrib/unaccent/unaccent.c')
-rw-r--r-- | contrib/unaccent/unaccent.c | 65 |
1 files changed, 31 insertions, 34 deletions
diff --git a/contrib/unaccent/unaccent.c b/contrib/unaccent/unaccent.c index 262d5ec15f..a337df61af 100644 --- a/contrib/unaccent/unaccent.c +++ b/contrib/unaccent/unaccent.c @@ -3,7 +3,7 @@ * unaccent.c * Text search unaccent dictionary * - * Copyright (c) 2009-2012, PostgreSQL Global Development Group + * Copyright (c) 2009-2014, PostgreSQL Global Development Group * * IDENTIFICATION * contrib/unaccent/unaccent.c @@ -23,30 +23,29 @@ PG_MODULE_MAGIC; /* - * Unaccent dictionary uses uncompressed suffix tree to find a - * character to replace. Each node of tree is an array of - * SuffixChar struct with length = 256 (n-th element of array + * Unaccent dictionary uses a trie to find a character to replace. Each node of + * the trie is an array of 256 TrieChar structs (n-th element of array * corresponds to byte) */ -typedef struct SuffixChar +typedef struct TrieChar { - struct SuffixChar *nextChar; + struct TrieChar *nextChar; char *replaceTo; int replacelen; -} SuffixChar; +} TrieChar; /* - * placeChar - put str into tree's structure, byte by byte. + * placeChar - put str into trie's structure, byte by byte. */ -static SuffixChar * -placeChar(SuffixChar *node, unsigned char *str, int lenstr, char *replaceTo, int replacelen) +static TrieChar * +placeChar(TrieChar *node, unsigned char *str, int lenstr, char *replaceTo, int replacelen) { - SuffixChar *curnode; + TrieChar *curnode; if (!node) { - node = palloc(sizeof(SuffixChar) * 256); - memset(node, 0, sizeof(SuffixChar) * 256); + node = palloc(sizeof(TrieChar) * 256); + memset(node, 0, sizeof(TrieChar) * 256); } curnode = node + *str; @@ -71,13 +70,14 @@ placeChar(SuffixChar *node, unsigned char *str, int lenstr, char *replaceTo, int } /* - * initSuffixTree - create suffix tree from file. Function converts - * UTF8-encoded file into current encoding. + * initTrie - create trie from file. + * + * Function converts UTF8-encoded file into current encoding. */ -static SuffixChar * -initSuffixTree(char *filename) +static TrieChar * +initTrie(char *filename) { - SuffixChar *volatile rootSuffixTree = NULL; + TrieChar *volatile rootTrie = NULL; MemoryContext ccxt = CurrentMemoryContext; tsearch_readline_state trst; volatile bool skip; @@ -161,9 +161,9 @@ initSuffixTree(char *filename) } if (state >= 3) - rootSuffixTree = placeChar(rootSuffixTree, - (unsigned char *) src, srclen, - trg, trglen); + rootTrie = placeChar(rootTrie, + (unsigned char *) src, srclen, + trg, trglen); pfree(line); } @@ -192,14 +192,14 @@ initSuffixTree(char *filename) tsearch_readline_end(&trst); - return rootSuffixTree; + return rootTrie; } /* - * findReplaceTo - find multibyte character in tree + * findReplaceTo - find multibyte character in trie */ -static SuffixChar * -findReplaceTo(SuffixChar *node, unsigned char *src, int srclen) +static TrieChar * +findReplaceTo(TrieChar *node, unsigned char *src, int srclen) { while (node) { @@ -216,12 +216,11 @@ findReplaceTo(SuffixChar *node, unsigned char *src, int srclen) } PG_FUNCTION_INFO_V1(unaccent_init); -Datum unaccent_init(PG_FUNCTION_ARGS); Datum unaccent_init(PG_FUNCTION_ARGS) { List *dictoptions = (List *) PG_GETARG_POINTER(0); - SuffixChar *rootSuffixTree = NULL; + TrieChar *rootTrie = NULL; bool fileloaded = false; ListCell *l; @@ -235,7 +234,7 @@ unaccent_init(PG_FUNCTION_ARGS) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("multiple Rules parameters"))); - rootSuffixTree = initSuffixTree(defGetString(defel)); + rootTrie = initTrie(defGetString(defel)); fileloaded = true; } else @@ -254,29 +253,28 @@ unaccent_init(PG_FUNCTION_ARGS) errmsg("missing Rules parameter"))); } - PG_RETURN_POINTER(rootSuffixTree); + PG_RETURN_POINTER(rootTrie); } PG_FUNCTION_INFO_V1(unaccent_lexize); -Datum unaccent_lexize(PG_FUNCTION_ARGS); Datum unaccent_lexize(PG_FUNCTION_ARGS) { - SuffixChar *rootSuffixTree = (SuffixChar *) PG_GETARG_POINTER(0); + TrieChar *rootTrie = (TrieChar *) PG_GETARG_POINTER(0); char *srcchar = (char *) PG_GETARG_POINTER(1); int32 len = PG_GETARG_INT32(2); char *srcstart, *trgchar = NULL; int charlen; TSLexeme *res = NULL; - SuffixChar *node; + TrieChar *node; srcstart = srcchar; while (srcchar - srcstart < len) { charlen = pg_mblen(srcchar); - node = findReplaceTo(rootSuffixTree, (unsigned char *) srcchar, charlen); + node = findReplaceTo(rootTrie, (unsigned char *) srcchar, charlen); if (node && node->replaceTo) { if (!res) @@ -313,7 +311,6 @@ unaccent_lexize(PG_FUNCTION_ARGS) * Function-like wrapper for dictionary */ PG_FUNCTION_INFO_V1(unaccent_dict); -Datum unaccent_dict(PG_FUNCTION_ARGS); Datum unaccent_dict(PG_FUNCTION_ARGS) { |