summaryrefslogtreecommitdiff
path: root/contrib/tsearch2/dict.h
blob: a0e9fe6facb52e9809c569f942cf707328fdb2df (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
/* $PostgreSQL: pgsql/contrib/tsearch2/dict.h,v 1.7 2006/05/31 14:05:31 teodor Exp $ */

#ifndef __DICT_H__
#define __DICT_H__
#include "postgres.h"
#include "fmgr.h"
#include "ts_cfg.h"

typedef struct
{
	int			len;
	char	  **stop;
	char	   *(*wordop) (char *);
}	StopList;

void		sortstoplist(StopList * s);
void		freestoplist(StopList * s);
void		readstoplist(text *in, StopList * s);
bool		searchstoplist(StopList * s, char *key);

typedef struct
{
	Oid			dict_id;
	FmgrInfo	lexize_info;
	void	   *dictionary;
}	DictInfo;

void		init_dict(Oid id, DictInfo * dict);
DictInfo   *finddict(Oid id);
Oid			name2id_dict(text *name);
void		reset_dict(void);

typedef struct {
	bool isend; /* in: marks for lexize_info about text end is reached */
	bool getnext; /* out: dict wants next lexeme */
	void	*private;  /* internal dict state between calls with getnext == true */
} DictSubState;

/* simple parser of cfg string */
typedef struct
{
	char	   *key;
	char	   *value;
}	Map;

void		parse_cfgdict(text *in, Map ** m);

/* return struct for any lexize function */
typedef struct
{
	/*
	 * number of variant of split word , for example Word 'fotballklubber'
	 * (norwegian) has two varian to split: ( fotball, klubb ) and ( fot,
	 * ball, klubb ). So, dictionary should return: 
	 * nvariant	lexeme 
	 *   1 		fotball 
	 *   1	   	klubb 
	 *	 2		fot 
	 *	 2		ball 
	 *   2		klubb
	 */
	uint16		nvariant;

	uint16		flags;

	/* C-string */
	char	   *lexeme;
}	TSLexeme;

#define TSL_ADDPOS		0x01


/*
 * Lexize subsystem
 */

typedef struct ParsedLex {
    int     	type;
    char    	*lemm;
    int     	lenlemm;
	bool		resfollow;
    struct ParsedLex *next;
} ParsedLex;

typedef struct ListParsedLex {
	ParsedLex	*head;
	ParsedLex	*tail;
} ListParsedLex;

typedef struct {
    TSCfgInfo       *cfg;
    Oid             curDictId;
    int             posDict;
    DictSubState    dictState;
    ParsedLex       *curSub;
	ListParsedLex	towork;   /* current list to work */
	ListParsedLex	waste;    /* list of lexemes that already lexized */

	/* fields to store last variant to lexize (basically, thesaurus 
	   or similar to, which wants  several lexemes */	
	   
	ParsedLex		*lastRes;
	TSLexeme		*tmpRes;
} LexizeData;


void LexizeInit(LexizeData *ld, TSCfgInfo *cfg);
void LexizeAddLemm(LexizeData *ld, int type, char *lemm, int lenlemm);
TSLexeme* LexizeExec(LexizeData *ld, ParsedLex **correspondLexem);

#endif