diff options
| author | Tom Lane | 2007-08-25 00:03:59 +0000 |
|---|---|---|
| committer | Tom Lane | 2007-08-25 00:03:59 +0000 |
| commit | 7351b5fa1781c3942d2d5ff2116d2d0ba882bd42 (patch) | |
| tree | 8b27ff0f6ef15b271ffbe644d4a934462a80f900 /src/include/tsearch | |
| parent | b918bf86c65632a5716308d8a613f5538a770927 (diff) | |
Cleanup for some problems in tsearch patch:
- ispell initialization crashed on empty dictionary file
- ispell initialization crashed on affix file with prefixes but no suffixes
- stop words file was run through pg_verify_mbstr, with database
encoding, but it's supposed to be UTF-8; similar bug for synonym files
- bunch of comments added, typos fixed, and other cleanup
Introduced consistent encoding checking/conversion of data read from tsearch
configuration files, by doing this in a single t_readline() subroutine
(replacing direct usages of fgets). Cleaned up API for readstopwords too.
Heikki Linnakangas
Diffstat (limited to 'src/include/tsearch')
| -rw-r--r-- | src/include/tsearch/dicts/spell.h | 40 | ||||
| -rw-r--r-- | src/include/tsearch/ts_locale.h | 12 | ||||
| -rw-r--r-- | src/include/tsearch/ts_public.h | 14 | ||||
| -rw-r--r-- | src/include/tsearch/ts_utils.h | 38 |
4 files changed, 48 insertions, 56 deletions
diff --git a/src/include/tsearch/dicts/spell.h b/src/include/tsearch/dicts/spell.h index 6c15a672f33..3dc013fea1e 100644 --- a/src/include/tsearch/dicts/spell.h +++ b/src/include/tsearch/dicts/spell.h @@ -6,7 +6,7 @@ * * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/include/tsearch/dicts/spell.h,v 1.1 2007/08/21 01:11:29 tgl Exp $ + * $PostgreSQL: pgsql/src/include/tsearch/dicts/spell.h,v 1.2 2007/08/25 00:03:59 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -18,12 +18,17 @@ #include "tsearch/dicts/regis.h" #include "tsearch/ts_public.h" +/* + * Max length of a flag name. Names longer than this will be truncated + * to the maximum. + */ +#define MAXFLAGLEN 16 + struct SPNode; typedef struct { - uint32 - val:8, + uint32 val:8, isword:1, compoundflag:4, affix:19; @@ -54,22 +59,25 @@ typedef struct spell_struct { union { - char flag[16]; + /* + * flag is filled in by NIImportDictionary. After NISortDictionary, + * d is valid and flag is invalid. + */ + char flag[MAXFLAGLEN]; struct { int affix; int len; } d; } p; - char word[1]; + char word[1]; /* variable length, null-terminated */ } SPELL; #define SPELLHDRSZ (offsetof(SPELL, word)) typedef struct aff_struct { - uint32 - flag:8, + uint32 flag:8, type:1, flagflags:7, issimple:1, @@ -85,11 +93,16 @@ typedef struct aff_struct } AFFIX; /* - * affixes use deictinary flags too + * affixes use dictionary flags too */ #define FF_COMPOUNDPERMITFLAG 0x10 #define FF_COMPOUNDFORBIDFLAG 0x20 #define FF_CROSSPRODUCT 0x40 + +/* + * Don't change the order of these. Initialization sorts by these, + * and expects prefixes to come first after sorting. + */ #define FF_SUFFIX 1 #define FF_PREFIX 0 @@ -97,8 +110,7 @@ struct AffixNode; typedef struct { - uint32 - val:8, + uint32 val:8, naff:24; AFFIX **aff; struct AffixNode *node; @@ -126,9 +138,13 @@ typedef struct int naffixes; AFFIX *Affix; - int nspell; - int mspell; + /* + * Temporary array of all words in the dict file. Only used during + * initialization + */ SPELL **Spell; + int nspell; /* number of valid entries in Spell array */ + int mspell; /* allocated length of Spell array */ AffixNode *Suffix; AffixNode *Prefix; diff --git a/src/include/tsearch/ts_locale.h b/src/include/tsearch/ts_locale.h index 8a197666473..dcae2af93a4 100644 --- a/src/include/tsearch/ts_locale.h +++ b/src/include/tsearch/ts_locale.h @@ -5,7 +5,7 @@ * * Copyright (c) 1998-2007, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/include/tsearch/ts_locale.h,v 1.1 2007/08/21 01:11:29 tgl Exp $ + * $PostgreSQL: pgsql/src/include/tsearch/ts_locale.h,v 1.2 2007/08/25 00:03:59 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -38,11 +38,11 @@ #ifdef TS_USE_WIDE -size_t char2wchar(wchar_t *to, const char *from, size_t len); +extern size_t char2wchar(wchar_t *to, const char *from, size_t len); #ifdef WIN32 -size_t wchar2char(char *to, const wchar_t *from, size_t len); +extern size_t wchar2char(char *to, const wchar_t *from, size_t len); #else /* WIN32 */ /* correct wcstombs */ @@ -81,8 +81,8 @@ extern int _t_isprint(const char *ptr); #define COPYCHAR(d,s) TOUCHAR(d) = TOUCHAR(s) #endif -char *lowerstr(char *str); -char *lowerstr_with_len(char *str, int len); -char *recode_and_lowerstr(char *str); +extern char *lowerstr(char *str); +extern char *lowerstr_with_len(char *str, int len); +extern char *t_readline(FILE *fp); #endif /* __TSLOCALE_H__ */ diff --git a/src/include/tsearch/ts_public.h b/src/include/tsearch/ts_public.h index 718abdb61d4..148129aa8bc 100644 --- a/src/include/tsearch/ts_public.h +++ b/src/include/tsearch/ts_public.h @@ -6,7 +6,7 @@ * * Copyright (c) 1998-2007, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.2 2007/08/22 01:39:46 tgl Exp $ + * $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.3 2007/08/25 00:03:59 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -71,12 +71,11 @@ typedef struct { int len; char **stop; - char *(*wordop) (char *); } StopList; -extern void sortstoplist(StopList * s); -extern void readstoplist(char *in, StopList * s); -extern bool searchstoplist(StopList * s, char *key); +extern void readstoplist(const char *fname, StopList *s, + char *(*wordop) (char *)); +extern bool searchstoplist(StopList *s, char *key); /* * Interface with dictionaries @@ -102,9 +101,8 @@ typedef struct #define TSL_ADDPOS 0x01 /* - * Struct for supporting complex dictionaries like - * thesaurus, pointer to is an 4-th argument for - * dictlexize method + * Struct for supporting complex dictionaries like thesaurus. + * 4th argument for dictlexize method is a pointer to this */ typedef struct { diff --git a/src/include/tsearch/ts_utils.h b/src/include/tsearch/ts_utils.h index f84db4c6e41..d2e5c8d8e49 100644 --- a/src/include/tsearch/ts_utils.h +++ b/src/include/tsearch/ts_utils.h @@ -5,7 +5,7 @@ * * Copyright (c) 1998-2007, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/include/tsearch/ts_utils.h,v 1.1 2007/08/21 01:11:29 tgl Exp $ + * $PostgreSQL: pgsql/src/include/tsearch/ts_utils.h,v 1.2 2007/08/25 00:03:59 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -13,6 +13,7 @@ #define _PG_TS_UTILS_H_ #include "tsearch/ts_type.h" +#include "tsearch/ts_public.h" /* * Common parse definitions for tsvector and tsquery @@ -38,7 +39,8 @@ typedef struct extern bool gettoken_tsvector(TSVectorParseState *state); -struct ParseQueryNode; +struct ParseQueryNode; /* private in backend/utils/adt/tsquery.c */ + typedef struct { char *buffer; /* entire string we are scanning */ @@ -46,7 +48,7 @@ typedef struct int4 state; int4 count; - /* reverse polish notation in list (for temprorary usage) */ + /* reverse polish notation in list (for temporary usage) */ struct ParseQueryNode *str; /* number in str */ @@ -102,36 +104,12 @@ extern void parsetext(Oid cfgId, ParsedText * prs, char *buf, int4 buflen); * headline framework, flow in common to generate: * 1 parse text with hlparsetext * 2 parser-specific function to find part - * 3 generatHeadline to generate result text + * 3 generateHeadline to generate result text */ -typedef struct -{ - uint32 selected:1, - in:1, - replace:1, - repeated:1, - unused:4, - type:8, - len:16; - char *word; - QueryItem *item; -} HeadlineWord; - -typedef struct -{ - HeadlineWord *words; - int4 lenwords; - int4 curwords; - char *startsel; - char *stopsel; - int2 startsellen; - int2 stopsellen; -} HeadlineText; - -extern void hlparsetext(Oid cfgId, HeadlineText * prs, TSQuery query, +extern void hlparsetext(Oid cfgId, HeadlineParsedText * prs, TSQuery query, char *buf, int4 buflen); -extern text *generatHeadline(HeadlineText * prs); +extern text *generateHeadline(HeadlineParsedText * prs); /* * token/node types for parsing |
