summaryrefslogtreecommitdiff
path: root/contrib/tsearch2/query.c
diff options
context:
space:
mode:
authorTeodor Sigaev2005-01-25 15:24:38 +0000
committerTeodor Sigaev2005-01-25 15:24:38 +0000
commit324300bc7ccba6988f16915468ee2b870ef3ae5f (patch)
tree0922e10a1c417c5bff0100730281ad22add28860 /contrib/tsearch2/query.c
parentd314616d128ba692aec434bd376bc40886f98f7b (diff)
improve support of agglutinative languages (query with compound words).
regression=# select to_tsquery( '\'fotballklubber\''); to_tsquery ------------------------------------------------ 'fotball' & 'klubb' | 'fot' & 'ball' & 'klubb' (1 row) So, changed interface to dictionaries, lexize method of dictionary shoud return pointer to aray of TSLexeme structs instead of char**. Last element should have TSLexeme->lexeme == NULL. typedef struct { /* number of variant of split word , for example Word 'fotballklubber' (norwegian) has two varian to split: ( fotball, klubb ) and ( fot, ball, klubb ). So, dictionary should return: nvariant lexeme 1 fotball 1 klubb 2 fot 2 ball 2 klubb */ uint16 nvariant; /* currently unused */ uint16 flags; /* C-string */ char *lexeme; } TSLexeme;
Diffstat (limited to 'contrib/tsearch2/query.c')
-rw-r--r--contrib/tsearch2/query.c43
1 files changed, 33 insertions, 10 deletions
diff --git a/contrib/tsearch2/query.c b/contrib/tsearch2/query.c
index 6787b63ae8..ee4f779d58 100644
--- a/contrib/tsearch2/query.c
+++ b/contrib/tsearch2/query.c
@@ -265,6 +265,7 @@ pushval_morph(QPRS_STATE * state, int typeval, char *strval, int lenval, int2 we
{
int4 count = 0;
PRSTEXT prs;
+ uint32 variant, pos, cntvar=0, cntpos=0, cnt=0;
prs.lenwords = 32;
prs.curwords = 0;
@@ -273,17 +274,39 @@ pushval_morph(QPRS_STATE * state, int typeval, char *strval, int lenval, int2 we
parsetext_v2(findcfg(state->cfg_id), &prs, strval, lenval);
- for (count = 0; count < prs.curwords; count++)
- {
- pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight);
- pfree(prs.words[count].word);
- if (count)
- pushquery(state, OPR, (int4) '&', 0, 0, 0);
- }
- pfree(prs.words);
+ if ( prs.curwords>0 ) {
+
+ while (count < prs.curwords) {
+ pos = prs.words[count].pos.pos;
+ cntvar=0;
+ while(count < prs.curwords && pos==prs.words[count].pos.pos) {
+ variant = prs.words[count].nvariant;
+
+ cnt=0;
+ while(count < prs.curwords && pos==prs.words[count].pos.pos && variant==prs.words[count].nvariant) {
+
+ pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight);
+ pfree(prs.words[count].word);
+ if ( cnt )
+ pushquery(state, OPR, (int4) '&', 0, 0, 0);
+ cnt++;
+ count++;
+ }
+
+ if ( cntvar )
+ pushquery(state, OPR, (int4) '|', 0, 0, 0);
+ cntvar++;
+ }
+
+ if (cntpos)
+ pushquery(state, OPR, (int4) '&', 0, 0, 0);
+
+ cntpos++;
+ }
+
+ pfree(prs.words);
- /* XXX */
- if (prs.curwords == 0)
+ } else
pushval_asis(state, VALSTOP, NULL, 0, 0);
}