summaryrefslogtreecommitdiff
path: root/src/include/tsearch
diff options
context:
space:
mode:
authorTom Lane2007-08-25 00:03:59 +0000
committerTom Lane2007-08-25 00:03:59 +0000
commit7351b5fa1781c3942d2d5ff2116d2d0ba882bd42 (patch)
tree8b27ff0f6ef15b271ffbe644d4a934462a80f900 /src/include/tsearch
parentb918bf86c65632a5716308d8a613f5538a770927 (diff)
Cleanup for some problems in tsearch patch:
- ispell initialization crashed on empty dictionary file - ispell initialization crashed on affix file with prefixes but no suffixes - stop words file was run through pg_verify_mbstr, with database encoding, but it's supposed to be UTF-8; similar bug for synonym files - bunch of comments added, typos fixed, and other cleanup Introduced consistent encoding checking/conversion of data read from tsearch configuration files, by doing this in a single t_readline() subroutine (replacing direct usages of fgets). Cleaned up API for readstopwords too. Heikki Linnakangas
Diffstat (limited to 'src/include/tsearch')
-rw-r--r--src/include/tsearch/dicts/spell.h40
-rw-r--r--src/include/tsearch/ts_locale.h12
-rw-r--r--src/include/tsearch/ts_public.h14
-rw-r--r--src/include/tsearch/ts_utils.h38
4 files changed, 48 insertions, 56 deletions
diff --git a/src/include/tsearch/dicts/spell.h b/src/include/tsearch/dicts/spell.h
index 6c15a672f33..3dc013fea1e 100644
--- a/src/include/tsearch/dicts/spell.h
+++ b/src/include/tsearch/dicts/spell.h
@@ -6,7 +6,7 @@
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
- * $PostgreSQL: pgsql/src/include/tsearch/dicts/spell.h,v 1.1 2007/08/21 01:11:29 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/tsearch/dicts/spell.h,v 1.2 2007/08/25 00:03:59 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -18,12 +18,17 @@
#include "tsearch/dicts/regis.h"
#include "tsearch/ts_public.h"
+/*
+ * Max length of a flag name. Names longer than this will be truncated
+ * to the maximum.
+ */
+#define MAXFLAGLEN 16
+
struct SPNode;
typedef struct
{
- uint32
- val:8,
+ uint32 val:8,
isword:1,
compoundflag:4,
affix:19;
@@ -54,22 +59,25 @@ typedef struct spell_struct
{
union
{
- char flag[16];
+ /*
+ * flag is filled in by NIImportDictionary. After NISortDictionary,
+ * d is valid and flag is invalid.
+ */
+ char flag[MAXFLAGLEN];
struct
{
int affix;
int len;
} d;
} p;
- char word[1];
+ char word[1]; /* variable length, null-terminated */
} SPELL;
#define SPELLHDRSZ (offsetof(SPELL, word))
typedef struct aff_struct
{
- uint32
- flag:8,
+ uint32 flag:8,
type:1,
flagflags:7,
issimple:1,
@@ -85,11 +93,16 @@ typedef struct aff_struct
} AFFIX;
/*
- * affixes use deictinary flags too
+ * affixes use dictionary flags too
*/
#define FF_COMPOUNDPERMITFLAG 0x10
#define FF_COMPOUNDFORBIDFLAG 0x20
#define FF_CROSSPRODUCT 0x40
+
+/*
+ * Don't change the order of these. Initialization sorts by these,
+ * and expects prefixes to come first after sorting.
+ */
#define FF_SUFFIX 1
#define FF_PREFIX 0
@@ -97,8 +110,7 @@ struct AffixNode;
typedef struct
{
- uint32
- val:8,
+ uint32 val:8,
naff:24;
AFFIX **aff;
struct AffixNode *node;
@@ -126,9 +138,13 @@ typedef struct
int naffixes;
AFFIX *Affix;
- int nspell;
- int mspell;
+ /*
+ * Temporary array of all words in the dict file. Only used during
+ * initialization
+ */
SPELL **Spell;
+ int nspell; /* number of valid entries in Spell array */
+ int mspell; /* allocated length of Spell array */
AffixNode *Suffix;
AffixNode *Prefix;
diff --git a/src/include/tsearch/ts_locale.h b/src/include/tsearch/ts_locale.h
index 8a197666473..dcae2af93a4 100644
--- a/src/include/tsearch/ts_locale.h
+++ b/src/include/tsearch/ts_locale.h
@@ -5,7 +5,7 @@
*
* Copyright (c) 1998-2007, PostgreSQL Global Development Group
*
- * $PostgreSQL: pgsql/src/include/tsearch/ts_locale.h,v 1.1 2007/08/21 01:11:29 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/tsearch/ts_locale.h,v 1.2 2007/08/25 00:03:59 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -38,11 +38,11 @@
#ifdef TS_USE_WIDE
-size_t char2wchar(wchar_t *to, const char *from, size_t len);
+extern size_t char2wchar(wchar_t *to, const char *from, size_t len);
#ifdef WIN32
-size_t wchar2char(char *to, const wchar_t *from, size_t len);
+extern size_t wchar2char(char *to, const wchar_t *from, size_t len);
#else /* WIN32 */
/* correct wcstombs */
@@ -81,8 +81,8 @@ extern int _t_isprint(const char *ptr);
#define COPYCHAR(d,s) TOUCHAR(d) = TOUCHAR(s)
#endif
-char *lowerstr(char *str);
-char *lowerstr_with_len(char *str, int len);
-char *recode_and_lowerstr(char *str);
+extern char *lowerstr(char *str);
+extern char *lowerstr_with_len(char *str, int len);
+extern char *t_readline(FILE *fp);
#endif /* __TSLOCALE_H__ */
diff --git a/src/include/tsearch/ts_public.h b/src/include/tsearch/ts_public.h
index 718abdb61d4..148129aa8bc 100644
--- a/src/include/tsearch/ts_public.h
+++ b/src/include/tsearch/ts_public.h
@@ -6,7 +6,7 @@
*
* Copyright (c) 1998-2007, PostgreSQL Global Development Group
*
- * $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.2 2007/08/22 01:39:46 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.3 2007/08/25 00:03:59 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -71,12 +71,11 @@ typedef struct
{
int len;
char **stop;
- char *(*wordop) (char *);
} StopList;
-extern void sortstoplist(StopList * s);
-extern void readstoplist(char *in, StopList * s);
-extern bool searchstoplist(StopList * s, char *key);
+extern void readstoplist(const char *fname, StopList *s,
+ char *(*wordop) (char *));
+extern bool searchstoplist(StopList *s, char *key);
/*
* Interface with dictionaries
@@ -102,9 +101,8 @@ typedef struct
#define TSL_ADDPOS 0x01
/*
- * Struct for supporting complex dictionaries like
- * thesaurus, pointer to is an 4-th argument for
- * dictlexize method
+ * Struct for supporting complex dictionaries like thesaurus.
+ * 4th argument for dictlexize method is a pointer to this
*/
typedef struct
{
diff --git a/src/include/tsearch/ts_utils.h b/src/include/tsearch/ts_utils.h
index f84db4c6e41..d2e5c8d8e49 100644
--- a/src/include/tsearch/ts_utils.h
+++ b/src/include/tsearch/ts_utils.h
@@ -5,7 +5,7 @@
*
* Copyright (c) 1998-2007, PostgreSQL Global Development Group
*
- * $PostgreSQL: pgsql/src/include/tsearch/ts_utils.h,v 1.1 2007/08/21 01:11:29 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/tsearch/ts_utils.h,v 1.2 2007/08/25 00:03:59 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -13,6 +13,7 @@
#define _PG_TS_UTILS_H_
#include "tsearch/ts_type.h"
+#include "tsearch/ts_public.h"
/*
* Common parse definitions for tsvector and tsquery
@@ -38,7 +39,8 @@ typedef struct
extern bool gettoken_tsvector(TSVectorParseState *state);
-struct ParseQueryNode;
+struct ParseQueryNode; /* private in backend/utils/adt/tsquery.c */
+
typedef struct
{
char *buffer; /* entire string we are scanning */
@@ -46,7 +48,7 @@ typedef struct
int4 state;
int4 count;
- /* reverse polish notation in list (for temprorary usage) */
+ /* reverse polish notation in list (for temporary usage) */
struct ParseQueryNode *str;
/* number in str */
@@ -102,36 +104,12 @@ extern void parsetext(Oid cfgId, ParsedText * prs, char *buf, int4 buflen);
* headline framework, flow in common to generate:
* 1 parse text with hlparsetext
* 2 parser-specific function to find part
- * 3 generatHeadline to generate result text
+ * 3 generateHeadline to generate result text
*/
-typedef struct
-{
- uint32 selected:1,
- in:1,
- replace:1,
- repeated:1,
- unused:4,
- type:8,
- len:16;
- char *word;
- QueryItem *item;
-} HeadlineWord;
-
-typedef struct
-{
- HeadlineWord *words;
- int4 lenwords;
- int4 curwords;
- char *startsel;
- char *stopsel;
- int2 startsellen;
- int2 stopsellen;
-} HeadlineText;
-
-extern void hlparsetext(Oid cfgId, HeadlineText * prs, TSQuery query,
+extern void hlparsetext(Oid cfgId, HeadlineParsedText * prs, TSQuery query,
char *buf, int4 buflen);
-extern text *generatHeadline(HeadlineText * prs);
+extern text *generateHeadline(HeadlineParsedText * prs);
/*
* token/node types for parsing