diff options
author | Teodor Sigaev | 2005-01-25 15:24:38 +0000 |
---|---|---|
committer | Teodor Sigaev | 2005-01-25 15:24:38 +0000 |
commit | 324300bc7ccba6988f16915468ee2b870ef3ae5f (patch) | |
tree | 0922e10a1c417c5bff0100730281ad22add28860 /contrib/tsearch2/query.c | |
parent | d314616d128ba692aec434bd376bc40886f98f7b (diff) |
improve support of agglutinative languages (query with compound words).
regression=# select to_tsquery( '\'fotballklubber\'');
to_tsquery
------------------------------------------------
'fotball' & 'klubb' | 'fot' & 'ball' & 'klubb'
(1 row)
So, changed interface to dictionaries, lexize method of dictionary shoud return
pointer to aray of TSLexeme structs instead of char**. Last element should
have TSLexeme->lexeme == NULL.
typedef struct {
/* number of variant of split word , for example
Word 'fotballklubber' (norwegian) has two varian to split:
( fotball, klubb ) and ( fot, ball, klubb ). So, dictionary
should return:
nvariant lexeme
1 fotball
1 klubb
2 fot
2 ball
2 klubb
*/
uint16 nvariant;
/* currently unused */
uint16 flags;
/* C-string */
char *lexeme;
} TSLexeme;
Diffstat (limited to 'contrib/tsearch2/query.c')
-rw-r--r-- | contrib/tsearch2/query.c | 43 |
1 files changed, 33 insertions, 10 deletions
diff --git a/contrib/tsearch2/query.c b/contrib/tsearch2/query.c index 6787b63ae8..ee4f779d58 100644 --- a/contrib/tsearch2/query.c +++ b/contrib/tsearch2/query.c @@ -265,6 +265,7 @@ pushval_morph(QPRS_STATE * state, int typeval, char *strval, int lenval, int2 we { int4 count = 0; PRSTEXT prs; + uint32 variant, pos, cntvar=0, cntpos=0, cnt=0; prs.lenwords = 32; prs.curwords = 0; @@ -273,17 +274,39 @@ pushval_morph(QPRS_STATE * state, int typeval, char *strval, int lenval, int2 we parsetext_v2(findcfg(state->cfg_id), &prs, strval, lenval); - for (count = 0; count < prs.curwords; count++) - { - pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight); - pfree(prs.words[count].word); - if (count) - pushquery(state, OPR, (int4) '&', 0, 0, 0); - } - pfree(prs.words); + if ( prs.curwords>0 ) { + + while (count < prs.curwords) { + pos = prs.words[count].pos.pos; + cntvar=0; + while(count < prs.curwords && pos==prs.words[count].pos.pos) { + variant = prs.words[count].nvariant; + + cnt=0; + while(count < prs.curwords && pos==prs.words[count].pos.pos && variant==prs.words[count].nvariant) { + + pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight); + pfree(prs.words[count].word); + if ( cnt ) + pushquery(state, OPR, (int4) '&', 0, 0, 0); + cnt++; + count++; + } + + if ( cntvar ) + pushquery(state, OPR, (int4) '|', 0, 0, 0); + cntvar++; + } + + if (cntpos) + pushquery(state, OPR, (int4) '&', 0, 0, 0); + + cntpos++; + } + + pfree(prs.words); - /* XXX */ - if (prs.curwords == 0) + } else pushval_asis(state, VALSTOP, NULL, 0, 0); } |