summaryrefslogtreecommitdiff
path: root/contrib/dict_xsyn
diff options
context:
space:
mode:
authorTom Lane2007-10-15 21:36:50 +0000
committerTom Lane2007-10-15 21:36:50 +0000
commit5fcb079858bb392e87067b5526e9df950db38024 (patch)
tree4ffb764af092be94fbe0e033dce2f492e6c937f7 /contrib/dict_xsyn
parentfb631dba2a3c2c183bb99f2098491ecf96fb6664 (diff)
Add sample text search dictionary templates and parsers, to replace the
hard-to-maintain textual examples currently in the SGML docs. From Sergey Karpov.
Diffstat (limited to 'contrib/dict_xsyn')
-rw-r--r--contrib/dict_xsyn/Makefile38
-rw-r--r--contrib/dict_xsyn/README.dict_xsyn52
-rw-r--r--contrib/dict_xsyn/dict_xsyn.c235
-rw-r--r--contrib/dict_xsyn/dict_xsyn.sql.in29
-rw-r--r--contrib/dict_xsyn/expected/dict_xsyn.out22
-rw-r--r--contrib/dict_xsyn/sql/dict_xsyn.sql16
-rw-r--r--contrib/dict_xsyn/uninstall_dict_xsyn.sql9
-rw-r--r--contrib/dict_xsyn/xsyn_sample.rules6
8 files changed, 407 insertions, 0 deletions
diff --git a/contrib/dict_xsyn/Makefile b/contrib/dict_xsyn/Makefile
new file mode 100644
index 00000000000..563f039e468
--- /dev/null
+++ b/contrib/dict_xsyn/Makefile
@@ -0,0 +1,38 @@
+# $PostgreSQL: pgsql/contrib/dict_xsyn/Makefile,v 1.1 2007/10/15 21:36:50 tgl Exp $
+
+MODULE_big = dict_xsyn
+OBJS = dict_xsyn.o
+DATA_built = dict_xsyn.sql
+DATA = uninstall_dict_xsyn.sql
+DOCS = README.dict_xsyn
+REGRESS = dict_xsyn
+
+DICTDIR = tsearch_data
+DICTFILES = xsyn_sample.rules
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/dict_xsyn
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
+
+install: install-data
+
+.PHONY: install-data
+install-data: $(DICTFILES)
+ for i in $(DICTFILES); \
+ do $(INSTALL_DATA) $(srcdir)/$$i '$(DESTDIR)$(datadir)/$(DICTDIR)/'$$i; \
+ done
+
+uninstall: uninstall-data
+
+.PHONY: uninstall-data
+uninstall-data:
+ for i in $(DICTFILES); \
+ do rm -rf '$(DESTDIR)$(datadir)/$(DICTDIR)/'$$i ; \
+ done
diff --git a/contrib/dict_xsyn/README.dict_xsyn b/contrib/dict_xsyn/README.dict_xsyn
new file mode 100644
index 00000000000..9565eefefbc
--- /dev/null
+++ b/contrib/dict_xsyn/README.dict_xsyn
@@ -0,0 +1,52 @@
+Extended Synonym dictionary
+===========================
+
+This is a simple synonym dictionary. It replaces words with groups of their
+synonyms, and so makes it possible to search for a word using any of its
+synonyms.
+
+* Configuration
+
+It accepts the following options:
+
+ - KEEPORIG controls whether the original word is included, or only its
+ synonyms. Default is 'true'.
+
+ - RULES is the base name of the file containing the list of synonyms.
+ This file must be in $(prefix)/share/tsearch_data/, and its name must
+ end in ".rules" (which is not included in the RULES parameter).
+
+The rules file has the following format:
+
+ - Each line represents a group of synonyms for a single word, which is
+ given first on the line. Synonyms are separated by whitespace:
+
+ word syn1 syn2 syn3
+
+ - Sharp ('#') sign is a comment delimiter. It may appear at any position
+ inside the line. The rest of the line will be skipped.
+
+Look at xsyn_sample.rules, which is installed in $(prefix)/share/tsearch_data/,
+for an example.
+
+* Usage
+
+1. Compile and install
+
+2. Load dictionary
+
+ psql mydb < dict_xsyn.sql
+
+3. Test it
+
+ mydb=# SELECT ts_lexize('xsyn','word');
+ ts_lexize
+ ----------------
+ {word,syn1,syn2,syn3)
+
+4. Change the dictionary options as you wish
+
+ mydb# ALTER TEXT SEARCH DICTIONARY xsyn (KEEPORIG=false);
+ ALTER TEXT SEARCH DICTIONARY
+
+That's all.
diff --git a/contrib/dict_xsyn/dict_xsyn.c b/contrib/dict_xsyn/dict_xsyn.c
new file mode 100644
index 00000000000..1cd53a26bd1
--- /dev/null
+++ b/contrib/dict_xsyn/dict_xsyn.c
@@ -0,0 +1,235 @@
+/*-------------------------------------------------------------------------
+ *
+ * dict_xsyn.c
+ * Extended synonym dictionary
+ *
+ * Copyright (c) 2007, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * $PostgreSQL: pgsql/contrib/dict_xsyn/dict_xsyn.c,v 1.1 2007/10/15 21:36:50 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <ctype.h>
+
+#include "commands/defrem.h"
+#include "fmgr.h"
+#include "storage/fd.h"
+#include "tsearch/ts_locale.h"
+#include "tsearch/ts_utils.h"
+
+PG_MODULE_MAGIC;
+
+typedef struct
+{
+ char *key; /* Word */
+ char *value; /* Unparsed list of synonyms, including the word itself */
+} Syn;
+
+typedef struct
+{
+ int len;
+ Syn *syn;
+
+ bool keeporig;
+} DictSyn;
+
+
+PG_FUNCTION_INFO_V1(dxsyn_init);
+Datum dxsyn_init(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(dxsyn_lexize);
+Datum dxsyn_lexize(PG_FUNCTION_ARGS);
+
+static char *
+find_word(char *in, char **end)
+{
+ char *start;
+
+ *end = NULL;
+ while (*in && t_isspace(in))
+ in += pg_mblen(in);
+
+ if (!*in || *in == '#')
+ return NULL;
+ start = in;
+
+ while (*in && !t_isspace(in))
+ in += pg_mblen(in);
+
+ *end = in;
+
+ return start;
+}
+
+static int
+compare_syn(const void *a, const void *b)
+{
+ return strcmp(((Syn *) a)->key, ((Syn *) b)->key);
+}
+
+static void
+read_dictionary(DictSyn *d, char *filename)
+{
+ char *real_filename = get_tsearch_config_filename(filename, "rules");
+ FILE *fin;
+ char *line;
+ int cur = 0;
+
+ if ((fin = AllocateFile(real_filename, "r")) == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_CONFIG_FILE_ERROR),
+ errmsg("could not open synonym file \"%s\": %m",
+ real_filename)));
+
+ while ((line = t_readline(fin)) != NULL)
+ {
+ char *value;
+ char *key;
+ char *end = NULL;
+
+ if (*line == '\0')
+ continue;
+
+ value = lowerstr(line);
+ pfree(line);
+
+ key = find_word(value, &end);
+ if (!key)
+ {
+ pfree(value);
+ continue;
+ }
+
+ if (cur == d->len)
+ {
+ d->len = (d->len > 0) ? 2 * d->len : 16;
+ if (d->syn)
+ d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len);
+ else
+ d->syn = (Syn *) palloc(sizeof(Syn) * d->len);
+ }
+
+ d->syn[cur].key = pnstrdup(key, end - key);
+ d->syn[cur].value = value;
+
+ cur++;
+ }
+
+ FreeFile(fin);
+
+ d->len = cur;
+ if (cur > 1)
+ qsort(d->syn, d->len, sizeof(Syn), compare_syn);
+
+ pfree(real_filename);
+}
+
+Datum
+dxsyn_init(PG_FUNCTION_ARGS)
+{
+ List *dictoptions = (List *) PG_GETARG_POINTER(0);
+ DictSyn *d;
+ ListCell *l;
+
+ d = (DictSyn *) palloc0(sizeof(DictSyn));
+ d->len = 0;
+ d->syn = NULL;
+ d->keeporig = true;
+
+ foreach(l, dictoptions)
+ {
+ DefElem *defel = (DefElem *) lfirst(l);
+
+ if (pg_strcasecmp(defel->defname, "KEEPORIG") == 0)
+ {
+ d->keeporig = defGetBoolean(defel);
+ }
+ else if (pg_strcasecmp(defel->defname, "RULES") == 0)
+ {
+ read_dictionary(d, defGetString(defel));
+ }
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unrecognized xsyn parameter: \"%s\"",
+ defel->defname)));
+ }
+ }
+
+ PG_RETURN_POINTER(d);
+}
+
+Datum
+dxsyn_lexize(PG_FUNCTION_ARGS)
+{
+ DictSyn *d = (DictSyn *) PG_GETARG_POINTER(0);
+ char *in = (char *) PG_GETARG_POINTER(1);
+ int length = PG_GETARG_INT32(2);
+ Syn word;
+ Syn *found;
+ TSLexeme *res = NULL;
+
+ if (!length || d->len == 0)
+ PG_RETURN_POINTER(NULL);
+
+ /* Create search pattern */
+ {
+ char *temp = pnstrdup(in, length);
+
+ word.key = lowerstr(temp);
+ pfree(temp);
+ word.value = NULL;
+ }
+
+ /* Look for matching syn */
+ found = (Syn *)bsearch(&word, d->syn, d->len, sizeof(Syn), compare_syn);
+ pfree(word.key);
+
+ if (!found)
+ PG_RETURN_POINTER(NULL);
+
+ /* Parse string of synonyms and return array of words */
+ {
+ char *value = pstrdup(found->value);
+ int value_length = strlen(value);
+ char *pos = value;
+ int nsyns = 0;
+ bool is_first = true;
+
+ res = palloc(0);
+
+ while(pos < value + value_length)
+ {
+ char *end;
+ char *syn = find_word(pos, &end);
+
+ if (!syn)
+ break;
+ *end = '\0';
+
+ res = repalloc(res, sizeof(TSLexeme)*(nsyns + 2));
+ res[nsyns].lexeme = NULL;
+
+ /* first word is added to result only if KEEPORIG flag is set */
+ if(d->keeporig || !is_first)
+ {
+ res[nsyns].lexeme = pstrdup(syn);
+ res[nsyns + 1].lexeme = NULL;
+
+ nsyns++;
+ }
+
+ is_first = false;
+
+ pos = end + 1;
+ }
+
+ pfree(value);
+ }
+
+ PG_RETURN_POINTER(res);
+}
diff --git a/contrib/dict_xsyn/dict_xsyn.sql.in b/contrib/dict_xsyn/dict_xsyn.sql.in
new file mode 100644
index 00000000000..0e5755e5b17
--- /dev/null
+++ b/contrib/dict_xsyn/dict_xsyn.sql.in
@@ -0,0 +1,29 @@
+-- $PostgreSQL: pgsql/contrib/dict_xsyn/dict_xsyn.sql.in,v 1.1 2007/10/15 21:36:50 tgl Exp $
+
+-- Adjust this setting to control where the objects get created.
+SET search_path = public;
+
+BEGIN;
+
+CREATE FUNCTION dxsyn_init(internal)
+ RETURNS internal
+ AS 'MODULE_PATHNAME'
+ LANGUAGE C STRICT;
+
+CREATE FUNCTION dxsyn_lexize(internal, internal, internal, internal)
+ RETURNS internal
+ AS 'MODULE_PATHNAME'
+ LANGUAGE C STRICT;
+
+CREATE TEXT SEARCH TEMPLATE xsyn_template (
+ LEXIZE = dxsyn_lexize,
+ INIT = dxsyn_init
+);
+
+CREATE TEXT SEARCH DICTIONARY xsyn (
+ TEMPLATE = xsyn_template
+);
+
+COMMENT ON TEXT SEARCH DICTIONARY xsyn IS 'eXtended synonym dictionary';
+
+END;
diff --git a/contrib/dict_xsyn/expected/dict_xsyn.out b/contrib/dict_xsyn/expected/dict_xsyn.out
new file mode 100644
index 00000000000..99071ea8c74
--- /dev/null
+++ b/contrib/dict_xsyn/expected/dict_xsyn.out
@@ -0,0 +1,22 @@
+--
+-- first, define the datatype. Turn off echoing so that expected file
+-- does not depend on contents of this file.
+--
+SET client_min_messages = warning;
+\set ECHO none
+RESET client_min_messages;
+--configuration
+ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false);
+--lexize
+SELECT ts_lexize('xsyn', 'supernova');
+ ts_lexize
+----------------
+ {sn,sne,1987a}
+(1 row)
+
+SELECT ts_lexize('xsyn', 'grb');
+ ts_lexize
+-----------
+
+(1 row)
+
diff --git a/contrib/dict_xsyn/sql/dict_xsyn.sql b/contrib/dict_xsyn/sql/dict_xsyn.sql
new file mode 100644
index 00000000000..17f6df9cf3d
--- /dev/null
+++ b/contrib/dict_xsyn/sql/dict_xsyn.sql
@@ -0,0 +1,16 @@
+--
+-- first, define the datatype. Turn off echoing so that expected file
+-- does not depend on contents of this file.
+--
+SET client_min_messages = warning;
+\set ECHO none
+\i dict_xsyn.sql
+\set ECHO all
+RESET client_min_messages;
+
+--configuration
+ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false);
+
+--lexize
+SELECT ts_lexize('xsyn', 'supernova');
+SELECT ts_lexize('xsyn', 'grb');
diff --git a/contrib/dict_xsyn/uninstall_dict_xsyn.sql b/contrib/dict_xsyn/uninstall_dict_xsyn.sql
new file mode 100644
index 00000000000..7b7acea0d14
--- /dev/null
+++ b/contrib/dict_xsyn/uninstall_dict_xsyn.sql
@@ -0,0 +1,9 @@
+SET search_path = public;
+
+DROP TEXT SEARCH DICTIONARY xsyn;
+
+DROP TEXT SEARCH TEMPLATE xsyn_template;
+
+DROP FUNCTION dxsyn_init(internal);
+
+DROP FUNCTION dxsyn_lexize(internal,internal,internal,internal);
diff --git a/contrib/dict_xsyn/xsyn_sample.rules b/contrib/dict_xsyn/xsyn_sample.rules
new file mode 100644
index 00000000000..203bec793a1
--- /dev/null
+++ b/contrib/dict_xsyn/xsyn_sample.rules
@@ -0,0 +1,6 @@
+# Sample rules file for eXtended Synonym (xsyn) dictionary
+# format is as follows:
+#
+# word synonym1 synonym2 ...
+#
+supernova sn sne 1987a