diff options
Diffstat (limited to 'contrib/tsearch')
-rw-r--r-- | contrib/tsearch/crc32.c | 18 | ||||
-rw-r--r-- | contrib/tsearch/crc32.h | 3 | ||||
-rw-r--r-- | contrib/tsearch/deflex.h | 14 | ||||
-rw-r--r-- | contrib/tsearch/gistidx.h | 35 | ||||
-rw-r--r-- | contrib/tsearch/morph.c | 210 | ||||
-rw-r--r-- | contrib/tsearch/morph.h | 6 | ||||
-rw-r--r-- | contrib/tsearch/parser.h | 13 | ||||
-rw-r--r-- | contrib/tsearch/query.c | 748 | ||||
-rw-r--r-- | contrib/tsearch/query.h | 48 | ||||
-rw-r--r-- | contrib/tsearch/rewrite.c | 278 | ||||
-rw-r--r-- | contrib/tsearch/rewrite.h | 5 | ||||
-rw-r--r-- | contrib/tsearch/txtidx.c | 549 | ||||
-rw-r--r-- | contrib/tsearch/txtidx.h | 59 |
13 files changed, 1126 insertions, 860 deletions
diff --git a/contrib/tsearch/crc32.c b/contrib/tsearch/crc32.c index 61cb6ed3ed..dc93db727c 100644 --- a/contrib/tsearch/crc32.c +++ b/contrib/tsearch/crc32.c @@ -87,15 +87,17 @@ static const unsigned int crc32tab[256] = { 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d, }; -unsigned int crc32_sz(char * buf, int size){ +unsigned int +crc32_sz(char *buf, int size) +{ unsigned int crc = ~0; - char *p ; - int len, nr; + char *p; + int len, + nr; - len = 0 ; - nr=size; - for (len += nr, p = buf; nr--; ++p) { - _CRC32_(crc, *p) ; - } + len = 0; + nr = size; + for (len += nr, p = buf; nr--; ++p) + _CRC32_(crc, *p); return ~crc; } diff --git a/contrib/tsearch/crc32.h b/contrib/tsearch/crc32.h index c4f5ed703c..b08065d8c8 100644 --- a/contrib/tsearch/crc32.h +++ b/contrib/tsearch/crc32.h @@ -2,9 +2,8 @@ #define _CRC32_H /* Returns crc32 of data block */ -extern unsigned int crc32_sz(char * buf, int size); +extern unsigned int crc32_sz(char *buf, int size); /* Returns crc32 of null-terminated string */ #define crc32(buf) crc32_sz((buf),strlen(buf)) - #endif diff --git a/contrib/tsearch/deflex.h b/contrib/tsearch/deflex.h index 8c74595a68..7fbd84adf1 100644 --- a/contrib/tsearch/deflex.h +++ b/contrib/tsearch/deflex.h @@ -12,12 +12,12 @@ #define HOST 6 #define FLOAT 7 #define FINT 8 -#define PARTWORD 9 -#define NONLATINPARTWORD 10 -#define LATPARTWORD 11 -#define SPACE 12 -#define SYMTAG 13 -#define HTTP 14 +#define PARTWORD 9 +#define NONLATINPARTWORD 10 +#define LATPARTWORD 11 +#define SPACE 12 +#define SYMTAG 13 +#define HTTP 14 #define DEFISWORD 15 #define DEFISLATWORD 16 #define DEFISNONLATINWORD 17 @@ -25,6 +25,4 @@ #define FILEPATH 19 extern const char *descr[]; - #endif - diff --git a/contrib/tsearch/gistidx.h b/contrib/tsearch/gistidx.h index dc991ab0d7..f4576a1d24 100644 --- a/contrib/tsearch/gistidx.h +++ b/contrib/tsearch/gistidx.h @@ -9,7 +9,8 @@ * signature defines */ #define BITBYTE 8 -#define SIGLENINT 64 /* >121 => key will toast, so it will not work !!! */ +#define SIGLENINT 64 /* >121 => key will toast, so it will not + * work !!! */ #define SIGLEN ( sizeof(int4)*SIGLENINT ) #define SIGLENBIT (SIGLEN*BITBYTE) @@ -21,9 +22,9 @@ typedef char *BITVECP; a;\ } #define LOOPBIT(a) \ - for(i=0;i<SIGLENBIT;i++) {\ - a;\ - } + for(i=0;i<SIGLENBIT;i++) {\ + a;\ + } #define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) ) #define GETBITBYTE(x,i) ( ((char)(x)) >> i & 0x01 ) @@ -38,27 +39,27 @@ typedef char *BITVECP; /* - * type of index key + * type of index key */ -typedef struct { - int4 len; - int4 flag; - char data[1]; -} GISTTYPE; +typedef struct +{ + int4 len; + int4 flag; + char data[1]; +} GISTTYPE; -#define ARRKEY 0x01 -#define SIGNKEY 0x02 -#define ALLISTRUE 0x04 +#define ARRKEY 0x01 +#define SIGNKEY 0x02 +#define ALLISTRUE 0x04 -#define ISARRKEY(x) ( ((GISTTYPE*)x)->flag & ARRKEY ) +#define ISARRKEY(x) ( ((GISTTYPE*)x)->flag & ARRKEY ) #define ISSIGNKEY(x) ( ((GISTTYPE*)x)->flag & SIGNKEY ) #define ISALLTRUE(x) ( ((GISTTYPE*)x)->flag & ALLISTRUE ) -#define GTHDRSIZE ( sizeof(int4)*2 ) +#define GTHDRSIZE ( sizeof(int4)*2 ) #define CALCGTSIZE(flag, len) ( GTHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(int4)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) ) #define GETSIGN(x) ( (BITVECP)( (char*)x+GTHDRSIZE ) ) #define GETARR(x) ( (int4*)( (char*)x+GTHDRSIZE ) ) -#define ARRNELEM(x) ( ( ((GISTTYPE*)x)->len - GTHDRSIZE )/sizeof(int4) ) +#define ARRNELEM(x) ( ( ((GISTTYPE*)x)->len - GTHDRSIZE )/sizeof(int4) ) #endif - diff --git a/contrib/tsearch/morph.c b/contrib/tsearch/morph.c index a31946ac86..3a66ecbbc6 100644 --- a/contrib/tsearch/morph.c +++ b/contrib/tsearch/morph.c @@ -3,7 +3,7 @@ * New dictionary is include in dict.h. For languages which * use latin charset it may be need to modify mapdict table. * Teodor Sigaev <teodor@stack.net> - */ + */ #include "postgres.h" #include "utils/elog.h" @@ -20,157 +20,183 @@ * All of this methods are optional, but * if all methods are NULL, then dictionary does nothing :) * Return value of lemmatize must be palloced or the same. - * Return value of init must be malloced in other case - * it will be free in end of transaction! + * Return value of init must be malloced in other case + * it will be free in end of transaction! */ -typedef struct { - char localename[LOCALE_NAME_BUFLEN]; +typedef struct +{ + char localename[LOCALE_NAME_BUFLEN]; /* init dictionary */ - void* (*init)(void); + void *(*init) (void); /* close dictionary */ - void (*close)(void*); + void (*close) (void *); /* find in dictionary */ - char* (*lemmatize)(void*,char*,int*); - int (*is_stoplemm)(void*,char*,int); - int (*is_stemstoplemm)(void*,char*,int); -} DICT; + char *(*lemmatize) (void *, char *, int *); + int (*is_stoplemm) (void *, char *, int); + int (*is_stemstoplemm) (void *, char *, int); +} DICT; /* insert all dictionaries */ #define DICT_BODY #include "dict.h" -#undef DICT_BODY +#undef DICT_BODY -/* fill dictionary's structure */ +/* fill dictionary's structure */ #define DICT_TABLE -DICT dicts[] = { +DICT dicts[] = { { - "C",NULL,NULL,NULL,NULL,NULL /* fake dictionary */ + "C", NULL, NULL, NULL, NULL, NULL /* fake dictionary */ } #include "dict.h" }; + #undef DICT_TABLE /* array for storing dictinary's objects (if needed) */ -void* dictobjs[ lengthof(dicts) ]; +void *dictobjs[ + lengthof(dicts)]; #define STOPLEXEM -2 #define BYLOCALE -1 -#define NODICT 0 -#define DEFAULTDICT 1 - +#define NODICT 0 +#define DEFAULTDICT 1 + #define MAXNDICT 2 -typedef int2 MAPDICT[MAXNDICT]; +typedef int2 MAPDICT[MAXNDICT]; + #define GETDICT(x,i) *( ((int2*)(x)) + (i) ) /* map dictionaries for lexem type */ static MAPDICT mapdict[] = { - {NODICT, NODICT}, /* not used */ - {DEFAULTDICT, NODICT}, /* LATWORD */ - {BYLOCALE, NODICT}, /* NONLATINWORD */ - {BYLOCALE, DEFAULTDICT}, /* UWORD */ - {NODICT, NODICT}, /* EMAIL */ - {NODICT, NODICT}, /* FURL */ - {NODICT, NODICT}, /* HOST */ - {NODICT, NODICT}, /* FLOAT */ - {NODICT, NODICT}, /* FINT */ - {BYLOCALE, DEFAULTDICT}, /* PARTWORD */ - {BYLOCALE, NODICT}, /* NONLATINPARTWORD */ - {DEFAULTDICT, NODICT}, /* LATPARTWORD */ - {STOPLEXEM, NODICT}, /* SPACE */ - {STOPLEXEM, NODICT}, /* SYMTAG */ - {STOPLEXEM, NODICT}, /* HTTP */ - {BYLOCALE, DEFAULTDICT}, /* DEFISWORD */ - {DEFAULTDICT, NODICT}, /* DEFISLATWORD */ - {BYLOCALE, NODICT}, /* DEFISNONLATINWORD */ - {NODICT, NODICT}, /* URI */ - {NODICT, NODICT} /* FILEPATH */ + {NODICT, NODICT}, /* not used */ + {DEFAULTDICT, NODICT}, /* LATWORD */ + {BYLOCALE, NODICT}, /* NONLATINWORD */ + {BYLOCALE, DEFAULTDICT}, /* UWORD */ + {NODICT, NODICT}, /* EMAIL */ + {NODICT, NODICT}, /* FURL */ + {NODICT, NODICT}, /* HOST */ + {NODICT, NODICT}, /* FLOAT */ + {NODICT, NODICT}, /* FINT */ + {BYLOCALE, DEFAULTDICT}, /* PARTWORD */ + {BYLOCALE, NODICT}, /* NONLATINPARTWORD */ + {DEFAULTDICT, NODICT}, /* LATPARTWORD */ + {STOPLEXEM, NODICT}, /* SPACE */ + {STOPLEXEM, NODICT}, /* SYMTAG */ + {STOPLEXEM, NODICT}, /* HTTP */ + {BYLOCALE, DEFAULTDICT}, /* DEFISWORD */ + {DEFAULTDICT, NODICT}, /* DEFISLATWORD */ + {BYLOCALE, NODICT}, /* DEFISNONLATINWORD */ + {NODICT, NODICT}, /* URI */ + {NODICT, NODICT} /* FILEPATH */ }; -static bool inited=false; +static bool inited = false; + +void +initmorph(void) +{ + int i, + j, + k; + MAPDICT *md; + bool needinit[lengthof(dicts)]; -void initmorph(void) { - int i,j,k; - MAPDICT *md; - bool needinit[ lengthof(dicts) ]; #ifdef USE_LOCALE - PG_LocaleCategories lc; + PG_LocaleCategories lc; - int bylocaledict = NODICT; + int bylocaledict = NODICT; #endif - if ( inited ) return; - for(i=1; i<lengthof(dicts);i++) + if (inited) + return; + for (i = 1; i < lengthof(dicts); i++) needinit[i] = false; - + #ifdef USE_LOCALE PGLC_current(&lc); - for(i=1;i<lengthof(dicts);i++) - if (strcmp( dicts[i].localename, lc.lang ) == 0) { + for (i = 1; i < lengthof(dicts); i++) + if (strcmp(dicts[i].localename, lc.lang) == 0) + { bylocaledict = i; break; } PGLC_free_categories(&lc); #endif - for(i=1; i<lengthof(mapdict);i++) { - k=0; + for (i = 1; i < lengthof(mapdict); i++) + { + k = 0; md = &mapdict[i]; - for(j=0;j<MAXNDICT;j++) { - GETDICT(md,k) = GETDICT(md,j); - if ( GETDICT(md,k) == NODICT ) { + for (j = 0; j < MAXNDICT; j++) + { + GETDICT(md, k) = GETDICT(md, j); + if (GETDICT(md, k) == NODICT) break; - } else if ( GETDICT(md,k) == BYLOCALE ) { + else if (GETDICT(md, k) == BYLOCALE) + { #ifdef USE_LOCALE - if ( bylocaledict == NODICT ) + if (bylocaledict == NODICT) continue; - GETDICT(md,k) = bylocaledict; + GETDICT(md, k) = bylocaledict; #else continue; #endif } - if ( GETDICT(md,k) >= (int2)lengthof(dicts) ) + if (GETDICT(md, k) >= (int2) lengthof(dicts)) continue; - needinit[ GETDICT(md,k) ] = true; - k++; + needinit[GETDICT(md, k)] = true; + k++; } - for(;k<MAXNDICT;k++) - if ( GETDICT(md,k) != STOPLEXEM ) - GETDICT(md,k) = NODICT; + for (; k < MAXNDICT; k++) + if (GETDICT(md, k) != STOPLEXEM) + GETDICT(md, k) = NODICT; } - for(i=1; i<lengthof(dicts);i++) - if ( needinit[i] && dicts[i].init ) - dictobjs[i] = (*(dicts[i].init))(); - + for (i = 1; i < lengthof(dicts); i++) + if (needinit[i] && dicts[i].init) + dictobjs[i] = (*(dicts[i].init)) (); + inited = true; return; } -char* lemmatize( char* word, int *len, int type ) { - int2 nd; - int i; - DICT *dict; +char * +lemmatize(char *word, int *len, int type) +{ + int2 nd; + int i; + DICT *dict; - for(i=0;i<MAXNDICT;i++) { - nd = GETDICT( &mapdict[type], i ); - if ( nd == NODICT ) { - /* there is no dictionary */ + for (i = 0; i < MAXNDICT; i++) + { + nd = GETDICT(&mapdict[type], i); + if (nd == NODICT) + { + /* there is no dictionary */ return word; - } else if ( nd == STOPLEXEM ) { + } + else if (nd == STOPLEXEM) + { /* word is stopword */ return NULL; - } else { - dict = &dicts[ nd ]; - if ( dict->is_stoplemm && (*(dict->is_stoplemm))(dictobjs[nd], word, *len) ) + } + else + { + dict = &dicts[nd]; + if (dict->is_stoplemm && (*(dict->is_stoplemm)) (dictobjs[nd], word, *len)) return NULL; - if ( dict->lemmatize ) { - int oldlen = *len; - char *newword = (*(dict->lemmatize))(dictobjs[nd], word, len); + if (dict->lemmatize) + { + int oldlen = *len; + char *newword = (*(dict->lemmatize)) (dictobjs[nd], word, len); + /* word is recognized by distionary */ - if ( newword != word || *len != oldlen ) { - if ( dict->is_stemstoplemm && - (*(dict->is_stemstoplemm))(dictobjs[nd], word, *len) ) { - if ( newword != word && newword) + if (newword != word || *len != oldlen) + { + if (dict->is_stemstoplemm && + (*(dict->is_stemstoplemm)) (dictobjs[nd], word, *len)) + { + if (newword != word && newword) pfree(newword); return NULL; } @@ -183,6 +209,8 @@ char* lemmatize( char* word, int *len, int type ) { return word; } -bool is_stoptype(int type) { - return ( GETDICT( &mapdict[type], 0 ) == STOPLEXEM ) ? true : false; +bool +is_stoptype(int type) +{ + return (GETDICT(&mapdict[type], 0) == STOPLEXEM) ? true : false; } diff --git a/contrib/tsearch/morph.h b/contrib/tsearch/morph.h index 1e8f361ed9..1c64227fae 100644 --- a/contrib/tsearch/morph.h +++ b/contrib/tsearch/morph.h @@ -1,9 +1,9 @@ #ifndef __MORPH_H__ #define __MORPH_H__ -void initmorph(void); +void initmorph(void); -char* lemmatize( char* word, int *len, int type ); +char *lemmatize(char *word, int *len, int type); -bool is_stoptype(int type); +bool is_stoptype(int type); #endif diff --git a/contrib/tsearch/parser.h b/contrib/tsearch/parser.h index 780cb587d5..0e43e675fb 100644 --- a/contrib/tsearch/parser.h +++ b/contrib/tsearch/parser.h @@ -1,11 +1,10 @@ #ifndef __PARSER_H__ #define __PARSER_H__ -char *token; -int tokenlen; -int tsearch_yylex(void); -void start_parse_str(char*, int); -void start_parse_fh(FILE*, int); -void end_parse(void); - +char *token; +int tokenlen; +int tsearch_yylex(void); +void start_parse_str(char *, int); +void start_parse_fh(FILE *, int); +void end_parse(void); #endif diff --git a/contrib/tsearch/query.c b/contrib/tsearch/query.c index 238432ea43..90687d38cf 100644 --- a/contrib/tsearch/query.c +++ b/contrib/tsearch/query.c @@ -31,112 +31,131 @@ #include "parser.h" PG_FUNCTION_INFO_V1(mqtxt_in); -Datum mqtxt_in(PG_FUNCTION_ARGS); +Datum mqtxt_in(PG_FUNCTION_ARGS); + PG_FUNCTION_INFO_V1(qtxt_in); -Datum qtxt_in(PG_FUNCTION_ARGS); +Datum qtxt_in(PG_FUNCTION_ARGS); + PG_FUNCTION_INFO_V1(qtxt_out); -Datum qtxt_out(PG_FUNCTION_ARGS); +Datum qtxt_out(PG_FUNCTION_ARGS); PG_FUNCTION_INFO_V1(execqtxt); -Datum execqtxt(PG_FUNCTION_ARGS); +Datum execqtxt(PG_FUNCTION_ARGS); + PG_FUNCTION_INFO_V1(rexecqtxt); -Datum rexecqtxt(PG_FUNCTION_ARGS); +Datum rexecqtxt(PG_FUNCTION_ARGS); PG_FUNCTION_INFO_V1(querytree); -Datum querytree(PG_FUNCTION_ARGS); - -#define END 0 -#define ERR 1 -#define VAL 2 -#define OPR 3 -#define OPEN 4 -#define CLOSE 5 -#define VALTRUE 6 /* for stop words */ +Datum querytree(PG_FUNCTION_ARGS); + +#define END 0 +#define ERR 1 +#define VAL 2 +#define OPR 3 +#define OPEN 4 +#define CLOSE 5 +#define VALTRUE 6 /* for stop words */ #define VALFALSE 7 /* parser's states */ -#define WAITOPERAND 1 +#define WAITOPERAND 1 #define WAITOPERATOR 2 /* * node of query tree, also used * for storing polish notation in parser */ -typedef struct NODE { - int4 type; - int4 val; - int2 distance; - int2 length; - struct NODE *next; -} NODE; - -typedef struct { - char *buf; - int4 state; - int4 count; - /* reverse polish notation in list (for temprorary usage)*/ - NODE *str; +typedef struct NODE +{ + int4 type; + int4 val; + int2 distance; + int2 length; + struct NODE *next; +} NODE; + +typedef struct +{ + char *buf; + int4 state; + int4 count; + /* reverse polish notation in list (for temprorary usage) */ + NODE *str; /* number in str */ - int4 num; + int4 num; /* user-friendly operand */ - int4 lenop; - int4 sumlen; - char *op; - char *curop; + int4 lenop; + int4 sumlen; + char *op; + char *curop; /* state for value's parser */ - TI_IN_STATE valstate; -} QPRS_STATE; + TI_IN_STATE valstate; +} QPRS_STATE; /* * get token from query string */ -static int4 -gettoken_query( QPRS_STATE* state, int4* val, int4* lenval, char** strval ) { - while(1) { - switch(state->state) { - case WAITOPERAND: - if ( *(state->buf) == '!' ) { +static int4 +gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval) +{ + while (1) + { + switch (state->state) + { + case WAITOPERAND: + if (*(state->buf) == '!') + { (state->buf)++; - *val = (int4)'!'; + *val = (int4) '!'; return OPR; - } else if ( *(state->buf) == '(' ) { + } + else if (*(state->buf) == '(') + { state->count++; (state->buf)++; return OPEN; - } else if ( *(state->buf) != ' ' ) { + } + else if (*(state->buf) != ' ') + { state->valstate.prsbuf = state->buf; state->state = WAITOPERATOR; - if ( gettoken_txtidx( &(state->valstate) ) ) { + if (gettoken_txtidx(&(state->valstate))) + { *strval = state->valstate.word; *lenval = state->valstate.curpos - state->valstate.word; state->buf = state->valstate.prsbuf; return VAL; - } else + } + else elog(ERROR, "No operand"); } break; case WAITOPERATOR: - if ( *(state->buf) == '&' || *(state->buf) == '|' ) { + if (*(state->buf) == '&' || *(state->buf) == '|') + { state->state = WAITOPERAND; *val = (int4) *(state->buf); - (state->buf)++; + (state->buf)++; return OPR; - } else if ( *(state->buf) == ')' ) { + } + else if (*(state->buf) == ')') + { (state->buf)++; state->count--; - return ( state->count <0 ) ? ERR : CLOSE; - } else if ( *(state->buf) == '\0' ) { - return ( state->count ) ? ERR : END; - } else if ( *(state->buf) != ' ' ) + return (state->count < 0) ? ERR : CLOSE; + } + else if (*(state->buf) == '\0') + return (state->count) ? ERR : END; + else if (*(state->buf) != ' ') return ERR; break; - default: + default: return ERR; break; - } - (state->buf)++; + } + (state->buf)++; } return END; } @@ -145,16 +164,18 @@ gettoken_query( QPRS_STATE* state, int4* val, int4* lenval, char** strval ) { * push new one in polish notation reverse view */ static void -pushquery( QPRS_STATE *state, int4 type, int4 val, int4 distance, int4 lenval) { - NODE *tmp = (NODE*)palloc(sizeof(NODE)); - tmp->type=type; - tmp->val =val; - if ( distance>0xffff ) - elog(ERROR,"Value is too big"); - if ( lenval > 0xffff ) - elog(ERROR,"Operand is too long"); - tmp->distance=distance; - tmp->length=lenval; +pushquery(QPRS_STATE * state, int4 type, int4 val, int4 distance, int4 lenval) +{ + NODE *tmp = (NODE *) palloc(sizeof(NODE)); + + tmp->type = type; + tmp->val = val; + if (distance > 0xffff) + elog(ERROR, "Value is too big"); + if (lenval > 0xffff) + elog(ERROR, "Operand is too long"); + tmp->distance = distance; + tmp->length = lenval; tmp->next = state->str; state->str = tmp; state->num++; @@ -164,20 +185,23 @@ pushquery( QPRS_STATE *state, int4 type, int4 val, int4 distance, int4 lenval) { * This function is used for query_txt parsing */ static void -pushval_asis(QPRS_STATE *state, int type, char* strval, int lenval) { - if ( lenval>0xffff ) +pushval_asis(QPRS_STATE * state, int type, char *strval, int lenval) +{ + if (lenval > 0xffff) elog(ERROR, "Word is too long"); - pushquery(state, type, crc32_sz( (uint8*)strval, lenval ), - state->curop - state->op, lenval); + pushquery(state, type, crc32_sz((uint8 *) strval, lenval), + state->curop - state->op, lenval); + + while (state->curop - state->op + lenval + 1 >= state->lenop) + { + int4 tmp = state->curop - state->op; - while ( state->curop - state->op + lenval + 1 >= state->lenop ) { - int4 tmp = state->curop - state->op; state->lenop *= 2; - state->op = (char*)repalloc( (void*)state->op, state->lenop ); + state->op = (char *) repalloc((void *) state->op, state->lenop); state->curop = state->op + tmp; } - memcpy( (void*)state->curop, (void*)strval, lenval ); + memcpy((void *) state->curop, (void *) strval, lenval); state->curop += lenval; *(state->curop) = '\0'; state->curop++; @@ -189,28 +213,34 @@ pushval_asis(QPRS_STATE *state, int type, char* strval, int lenval) { * This function is used for mquery_txt parsing */ static void -pushval_morph(QPRS_STATE *state, int typeval, char* strval, int lenval) { - int4 type, lenlemm; - int4 count = 0; - char *lemm; - - start_parse_str( strval, lenval ); - while( (type=tsearch_yylex()) != 0 ) { - if ( tokenlen>0xffff ) { +pushval_morph(QPRS_STATE * state, int typeval, char *strval, int lenval) +{ + int4 type, + lenlemm; + int4 count = 0; + char *lemm; + + start_parse_str(strval, lenval); + while ((type = tsearch_yylex()) != 0) + { + if (tokenlen > 0xffff) + { end_parse(); elog(ERROR, "Word is too long"); } lenlemm = tokenlen; - lemm = lemmatize( token, &lenlemm, type ); - if ( lemm ) { - pushval_asis(state,VAL,lemm,lenlemm); - if ( lemm != token ) pfree(lemm); - } else { - pushval_asis(state,VALTRUE,0,0); + lemm = lemmatize(token, &lenlemm, type); + if (lemm) + { + pushval_asis(state, VAL, lemm, lenlemm); + if (lemm != token) + pfree(lemm); } - if ( count ) - pushquery(state, OPR, (int4)'&', 0,0); - count++; + else + pushval_asis(state, VALTRUE, 0, 0); + if (count) + pushquery(state, OPR, (int4) '&', 0, 0); + count++; } end_parse(); } @@ -219,100 +249,114 @@ pushval_morph(QPRS_STATE *state, int typeval, char* strval, int lenval) { /* * make polish notaion of query */ -static int4 -makepol(QPRS_STATE *state, void (*pushval)(QPRS_STATE*,int,char*,int)) { - int4 val,type; - int4 lenval; - char *strval; - int4 stack[STACKDEPTH]; - int4 lenstack=0; - - while( (type=gettoken_query(state, &val, &lenval, &strval))!=END ) { - switch(type) { +static int4 +makepol(QPRS_STATE * state, void (*pushval) (QPRS_STATE *, int, char *, int)) +{ + int4 val, + type; + int4 lenval; + char *strval; + int4 stack[STACKDEPTH]; + int4 lenstack = 0; + + while ((type = gettoken_query(state, &val, &lenval, &strval)) != END) + { + switch (type) + { case VAL: - (*pushval)(state, VAL, strval, lenval); - while ( lenstack && (stack[ lenstack-1 ] == (int4)'&' || - stack[ lenstack-1 ] == (int4)'!') ) { + (*pushval) (state, VAL, strval, lenval); + while (lenstack && (stack[lenstack - 1] == (int4) '&' || + stack[lenstack - 1] == (int4) '!')) + { lenstack--; - pushquery(state, OPR, stack[ lenstack ], 0,0); + pushquery(state, OPR, stack[lenstack], 0, 0); } break; case OPR: - if ( lenstack && val == (int4) '|' ) { - pushquery(state, OPR, val, 0,0); - } else { - if ( lenstack == STACKDEPTH ) - elog(ERROR,"Stack too short"); - stack[ lenstack ] = val; + if (lenstack && val == (int4) '|') + pushquery(state, OPR, val, 0, 0); + else + { + if (lenstack == STACKDEPTH) + elog(ERROR, "Stack too short"); + stack[lenstack] = val; lenstack++; } break; case OPEN: - if ( makepol( state, pushval ) == ERR ) return ERR; - if ( lenstack && (stack[ lenstack-1 ] == (int4)'&' || - stack[ lenstack-1 ] == (int4)'!') ) { + if (makepol(state, pushval) == ERR) + return ERR; + if (lenstack && (stack[lenstack - 1] == (int4) '&' || + stack[lenstack - 1] == (int4) '!')) + { lenstack--; - pushquery(state, OPR, stack[ lenstack ], 0,0); + pushquery(state, OPR, stack[lenstack], 0, 0); } break; case CLOSE: - while ( lenstack ) { + while (lenstack) + { lenstack--; - pushquery(state, OPR, stack[ lenstack ], 0,0); + pushquery(state, OPR, stack[lenstack], 0, 0); }; return END; break; case ERR: default: - elog(ERROR,"Syntax error"); + elog(ERROR, "Syntax error"); return ERR; - + } } - while (lenstack) { + while (lenstack) + { lenstack--; - pushquery(state, OPR, stack[ lenstack ],0,0); + pushquery(state, OPR, stack[lenstack], 0, 0); }; return END; } -typedef struct { - WordEntry *arrb; - WordEntry *arre; - char *values; - char *operand; -} CHKVAL; +typedef struct +{ + WordEntry *arrb; + WordEntry *arre; + char *values; + char *operand; +} CHKVAL; /* * compare 2 string values */ static int4 -ValCompare( CHKVAL *chkval, WordEntry *ptr, ITEM *item ) { - if ( ptr->len == item->length ) +ValCompare(CHKVAL * chkval, WordEntry * ptr, ITEM * item) +{ + if (ptr->len == item->length) return strncmp( - &(chkval->values[ ptr->pos ]), - &(chkval->operand[item->distance]), - item->length ); + &(chkval->values[ptr->pos]), + &(chkval->operand[item->distance]), + item->length); - return ( ptr->len > item->length ) ? 1 : -1; -} + return (ptr->len > item->length) ? 1 : -1; +} /* * is there value 'val' in array or not ? */ static bool -checkcondition_str( void *checkval, ITEM* val ) { - WordEntry *StopLow = ((CHKVAL*)checkval)->arrb; - WordEntry *StopHigh = ((CHKVAL*)checkval)->arre; - WordEntry *StopMiddle; - int difference; - - /* Loop invariant: StopLow <= val < StopHigh */ - - while (StopLow < StopHigh) { +checkcondition_str(void *checkval, ITEM * val) +{ + WordEntry *StopLow = ((CHKVAL *) checkval)->arrb; + WordEntry *StopHigh = ((CHKVAL *) checkval)->arre; + WordEntry *StopMiddle; + int difference; + + /* Loop invariant: StopLow <= val < StopHigh */ + + while (StopLow < StopHigh) + { StopMiddle = StopLow + (StopHigh - StopLow) / 2; - difference = ValCompare((CHKVAL*)checkval, StopMiddle, val); - if (difference == 0) + difference = ValCompare((CHKVAL *) checkval, StopMiddle, val); + if (difference == 0) return (true); else if (difference < 0) StopLow = StopMiddle + 1; @@ -320,27 +364,33 @@ checkcondition_str( void *checkval, ITEM* val ) { StopHigh = StopMiddle; } - return (false); + return (false); } /* * check for boolean condition */ bool -execute( ITEM* curitem, void *checkval, bool calcnot, bool (*chkcond)(void *checkval, ITEM *val )) { - if ( curitem->type == VAL ) { - return (*chkcond)( checkval, curitem ); - } else if ( curitem->val == (int4)'!' ) { - return ( calcnot ) ? - ( ( execute(curitem + 1, checkval, calcnot, chkcond) ) ? false : true ) - : true; - } else if ( curitem->val == (int4)'&' ) { - if ( execute(curitem + curitem->left, checkval, calcnot, chkcond) ) +execute(ITEM * curitem, void *checkval, bool calcnot, bool (*chkcond) (void *checkval, ITEM * val)) +{ + if (curitem->type == VAL) + return (*chkcond) (checkval, curitem); + else if (curitem->val == (int4) '!') + { + return (calcnot) ? + ((execute(curitem + 1, checkval, calcnot, chkcond)) ? false : true) + : true; + } + else if (curitem->val == (int4) '&') + { + if (execute(curitem + curitem->left, checkval, calcnot, chkcond)) return execute(curitem + 1, checkval, calcnot, chkcond); else return false; - } else { /* |-operator */ - if ( execute(curitem + curitem->left, checkval, calcnot, chkcond) ) + } + else + { /* |-operator */ + if (execute(curitem + curitem->left, checkval, calcnot, chkcond)) return true; else return execute(curitem + 1, checkval, calcnot, chkcond); @@ -349,69 +399,79 @@ execute( ITEM* curitem, void *checkval, bool calcnot, bool (*chkcond)(void *chec } /* - * boolean operations + * boolean operations */ Datum -rexecqtxt(PG_FUNCTION_ARGS) { +rexecqtxt(PG_FUNCTION_ARGS) +{ return DirectFunctionCall2( - execqtxt, - PG_GETARG_DATUM(1), - PG_GETARG_DATUM(0) + execqtxt, + PG_GETARG_DATUM(1), + PG_GETARG_DATUM(0) ); } Datum -execqtxt(PG_FUNCTION_ARGS) { - txtidx *val = ( txtidx * )DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0))); - QUERYTYPE *query = ( QUERYTYPE * )DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1))); - CHKVAL chkval; - bool result; - - if ( ! val->size ) { - PG_FREE_IF_COPY(val,0); - PG_FREE_IF_COPY(query,1); - PG_RETURN_BOOL( false ); +execqtxt(PG_FUNCTION_ARGS) +{ + txtidx *val = (txtidx *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0))); + QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1))); + CHKVAL chkval; + bool result; + + if (!val->size) + { + PG_FREE_IF_COPY(val, 0); + PG_FREE_IF_COPY(query, 1); + PG_RETURN_BOOL(false); } chkval.arrb = ARRPTR(val); chkval.arre = chkval.arrb + val->size; chkval.values = STRPTR(val); - chkval.operand = GETOPERAND( query ); - result = execute( - GETQUERY(query), - &chkval, - true, - checkcondition_str - ); - - PG_FREE_IF_COPY(val,0); - PG_FREE_IF_COPY(query,1); - PG_RETURN_BOOL( result ); + chkval.operand = GETOPERAND(query); + result = execute( + GETQUERY(query), + &chkval, + true, + checkcondition_str + ); + + PG_FREE_IF_COPY(val, 0); + PG_FREE_IF_COPY(query, 1); + PG_RETURN_BOOL(result); } /* * find left operand in polish notation view */ static void -findoprnd( ITEM *ptr, int4 *pos ) { +findoprnd(ITEM * ptr, int4 *pos) +{ #ifdef BS_DEBUG - elog(NOTICE, ( ptr[*pos].type == OPR ) ? - "%d %c" : "%d %d ", *pos, ptr[*pos].val ); + elog(NOTICE, (ptr[*pos].type == OPR) ? + "%d %c" : "%d %d ", *pos, ptr[*pos].val); #endif - if ( ptr[*pos].type == VAL || ptr[*pos].type == VALTRUE ) { + if (ptr[*pos].type == VAL || ptr[*pos].type == VALTRUE) + { ptr[*pos].left = 0; (*pos)++; - } else if ( ptr[*pos].val == (int4)'!' ) { + } + else if (ptr[*pos].val == (int4) '!') + { ptr[*pos].left = 1; (*pos)++; - findoprnd( ptr, pos ); - } else { - ITEM *curitem = &ptr[*pos]; - int4 tmp = *pos; + findoprnd(ptr, pos); + } + else + { + ITEM *curitem = &ptr[*pos]; + int4 tmp = *pos; + (*pos)++; - findoprnd(ptr,pos); + findoprnd(ptr, pos); curitem->left = *pos - tmp; - findoprnd(ptr,pos); + findoprnd(ptr, pos); } } @@ -419,16 +479,20 @@ findoprnd( ITEM *ptr, int4 *pos ) { /* * input */ -static QUERYTYPE *queryin(char *buf, void (*pushval)(QPRS_STATE*,int,char*,int) ) { - QPRS_STATE state; - int4 i; - QUERYTYPE *query; - int4 commonlen; - ITEM *ptr; - NODE *tmp; - int4 pos=0; +static QUERYTYPE * +queryin(char *buf, void (*pushval) (QPRS_STATE *, int, char *, int)) +{ + QPRS_STATE state; + int4 i; + QUERYTYPE *query; + int4 commonlen; + ITEM *ptr; + NODE *tmp; + int4 pos = 0; + #ifdef BS_DEBUG - char pbuf[16384],*cur; + char pbuf[16384], + *cur; #endif /* init state */ @@ -436,62 +500,64 @@ static QUERYTYPE *queryin(char *buf, void (*pushval)(QPRS_STATE*,int,char*,int) state.state = WAITOPERAND; state.count = 0; state.num = 0; - state.str=NULL; + state.str = NULL; /* init value parser's state */ state.valstate.oprisdelim = true; - state.valstate.len=32; - state.valstate.word = (char*)palloc( state.valstate.len ); + state.valstate.len = 32; + state.valstate.word = (char *) palloc(state.valstate.len); /* init list of operand */ - state.sumlen=0; - state.lenop=64; - state.curop = state.op = (char*)palloc( state.lenop ); + state.sumlen = 0; + state.lenop = 64; + state.curop = state.op = (char *) palloc(state.lenop); *(state.curop) = '\0'; - + /* parse query & make polish notation (postfix, but in reverse order) */ - makepol( &state, pushval ); - pfree( state.valstate.word ); - if (!state.num) - elog( ERROR,"Empty query"); + makepol(&state, pushval); + pfree(state.valstate.word); + if (!state.num) + elog(ERROR, "Empty query"); /* make finish struct */ commonlen = COMPUTESIZE(state.num, state.sumlen); - query = (QUERYTYPE*) palloc( commonlen ); + query = (QUERYTYPE *) palloc(commonlen); query->len = commonlen; query->size = state.num; ptr = GETQUERY(query); /* set item in polish notation */ - for(i=0; i<state.num; i++ ) { - ptr[i].type = state.str->type; + for (i = 0; i < state.num; i++) + { + ptr[i].type = state.str->type; ptr[i].val = state.str->val; - ptr[i].distance = state.str->distance; + ptr[i].distance = state.str->distance; ptr[i].length = state.str->length; tmp = state.str->next; - pfree( state.str ); + pfree(state.str); state.str = tmp; } - /* set user friendly-operand view */ - memcpy( (void*)GETOPERAND(query), (void*)state.op, state.sumlen ); - pfree( state.op ); + /* set user friendly-operand view */ + memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen); + pfree(state.op); - /* set left operand's position for every operator */ + /* set left operand's position for every operator */ pos = 0; - findoprnd( ptr, &pos ); + findoprnd(ptr, &pos); #ifdef BS_DEBUG cur = pbuf; *cur = '\0'; - for( i=0;i<query->size;i++ ) { - if ( ptr[i].type == OPR ) + for (i = 0; i < query->size; i++) + { + if (ptr[i].type == OPR) sprintf(cur, "%c(%d) ", ptr[i].val, ptr[i].left); - else - sprintf(cur, "%d(%s) ", ptr[i].val, GETOPERAND(query) + ptr[i].distance ); - cur = strchr(cur,'\0'); + else + sprintf(cur, "%d(%s) ", ptr[i].val, GETOPERAND(query) + ptr[i].distance); + cur = strchr(cur, '\0'); } - elog(NOTICE,"POR: %s", pbuf); + elog(NOTICE, "POR: %s", pbuf); #endif return query; @@ -501,59 +567,66 @@ static QUERYTYPE *queryin(char *buf, void (*pushval)(QPRS_STATE*,int,char*,int) * in without morphology */ Datum -qtxt_in(PG_FUNCTION_ARGS) { - PG_RETURN_POINTER( queryin((char*)PG_GETARG_POINTER(0),pushval_asis) ); +qtxt_in(PG_FUNCTION_ARGS) +{ + PG_RETURN_POINTER(queryin((char *) PG_GETARG_POINTER(0), pushval_asis)); } /* * in with morphology */ Datum -mqtxt_in(PG_FUNCTION_ARGS) { - QUERYTYPE *query; - ITEM* res; - int4 len; -#ifdef BS_DEBUG - ITEM *ptr; - int4 i; - char pbuf[16384],*cur; +mqtxt_in(PG_FUNCTION_ARGS) +{ + QUERYTYPE *query; + ITEM *res; + int4 len; + +#ifdef BS_DEBUG + ITEM *ptr; + int4 i; + char pbuf[16384], + *cur; #endif initmorph(); - query = queryin((char*)PG_GETARG_POINTER(0),pushval_morph); - res = clean_fakeval( GETQUERY(query), &len ); - if ( ! res ) { + query = queryin((char *) PG_GETARG_POINTER(0), pushval_morph); + res = clean_fakeval(GETQUERY(query), &len); + if (!res) + { pfree(query); PG_RETURN_NULL(); } - memcpy( (void*)GETQUERY(query), (void*)res, len*sizeof(ITEM) ); + memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(ITEM)); #ifdef BS_DEBUG cur = pbuf; *cur = '\0'; ptr = GETQUERY(query); - for( i=0;i<len;i++ ) { - if ( ptr[i].type == OPR ) + for (i = 0; i < len; i++) + { + if (ptr[i].type == OPR) sprintf(cur, "%c(%d) ", ptr[i].val, ptr[i].left); - else - sprintf(cur, "%d(%s) ", ptr[i].val, GETOPERAND(query) + ptr[i].distance ); - cur = strchr(cur,'\0'); + else + sprintf(cur, "%d(%s) ", ptr[i].val, GETOPERAND(query) + ptr[i].distance); + cur = strchr(cur, '\0'); } - elog(NOTICE,"POR: %s", pbuf); + elog(NOTICE, "POR: %s", pbuf); #endif pfree(res); - PG_RETURN_POINTER( query ); + PG_RETURN_POINTER(query); } /* * out function */ -typedef struct { - ITEM *curpol; - char *buf; - char *cur; - char *op; - int4 buflen; -} INFIX; +typedef struct +{ + ITEM *curpol; + char *buf; + char *cur; + char *op; + int4 buflen; +} INFIX; #define RESIZEBUF(inf,addsize) \ while( ( inf->cur - inf->buf ) + addsize + 1 >= inf->buflen ) \ @@ -565,98 +638,117 @@ while( ( inf->cur - inf->buf ) + addsize + 1 >= inf->buflen ) \ } /* - * recursive walk on tree and print it in + * recursive walk on tree and print it in * infix (human-readable) view */ static void -infix(INFIX *in, bool first) { - if ( in->curpol->type == VAL ) { - char *op = in->op + in->curpol->distance; - RESIZEBUF(in, in->curpol->length*2 + 2); - *(in->cur) = '\''; in->cur++; - while( *op ) { - if ( *op == '\'' ) { - *(in->cur) = '\\'; in->cur++; +infix(INFIX * in, bool first) +{ + if (in->curpol->type == VAL) + { + char *op = in->op + in->curpol->distance; + + RESIZEBUF(in, in->curpol->length * 2 + 2); + *(in->cur) = '\''; + in->cur++; + while (*op) + { + if (*op == '\'') + { + *(in->cur) = '\\'; + in->cur++; } *(in->cur) = *op; - op++; in->cur++; - } - *(in->cur) = '\''; in->cur++; + op++; + in->cur++; + } + *(in->cur) = '\''; + in->cur++; *(in->cur) = '\0'; in->curpol++; - } else if ( in->curpol->val == (int4)'!' ) { - bool isopr = false; + } + else if (in->curpol->val == (int4) '!') + { + bool isopr = false; + RESIZEBUF(in, 1); *(in->cur) = '!'; in->cur++; *(in->cur) = '\0'; in->curpol++; - if ( in->curpol->type == OPR ) { + if (in->curpol->type == OPR) + { isopr = true; RESIZEBUF(in, 2); sprintf(in->cur, "( "); - in->cur = strchr( in->cur, '\0' ); - } - infix( in, isopr ); - if ( isopr ) { + in->cur = strchr(in->cur, '\0'); + } + infix(in, isopr); + if (isopr) + { RESIZEBUF(in, 2); sprintf(in->cur, " )"); - in->cur = strchr( in->cur, '\0' ); - } - } else { - int4 op = in->curpol->val; - INFIX nrm; - + in->cur = strchr(in->cur, '\0'); + } + } + else + { + int4 op = in->curpol->val; + INFIX nrm; + in->curpol++; - if ( op == (int4)'|' && ! first) { + if (op == (int4) '|' && !first) + { RESIZEBUF(in, 2); sprintf(in->cur, "( "); - in->cur = strchr( in->cur, '\0' ); + in->cur = strchr(in->cur, '\0'); } nrm.curpol = in->curpol; nrm.op = in->op; nrm.buflen = 16; - nrm.cur = nrm.buf = (char*)palloc( sizeof(char) * nrm.buflen ); - + nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen); + /* get right operand */ - infix( &nrm, false ); - + infix(&nrm, false); + /* get & print left operand */ in->curpol = nrm.curpol; - infix( in, false ); + infix(in, false); - /* print operator & right operand*/ - RESIZEBUF(in, 3 + (nrm.cur - nrm.buf) ); + /* print operator & right operand */ + RESIZEBUF(in, 3 + (nrm.cur - nrm.buf)); sprintf(in->cur, " %c %s", op, nrm.buf); - in->cur = strchr( in->cur, '\0' ); - pfree( nrm.buf ); + in->cur = strchr(in->cur, '\0'); + pfree(nrm.buf); - if ( op == (int4)'|' && ! first) { + if (op == (int4) '|' && !first) + { RESIZEBUF(in, 2); sprintf(in->cur, " )"); - in->cur = strchr( in->cur, '\0' ); + in->cur = strchr(in->cur, '\0'); } } } Datum -qtxt_out(PG_FUNCTION_ARGS) { - QUERYTYPE *query = (QUERYTYPE*)DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0))); - INFIX nrm; +qtxt_out(PG_FUNCTION_ARGS) +{ + QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0))); + INFIX nrm; - if ( query->size == 0 ) - elog(ERROR,"Empty"); + if (query->size == 0) + elog(ERROR, "Empty"); nrm.curpol = GETQUERY(query); nrm.buflen = 32; - nrm.cur = nrm.buf = (char*)palloc( sizeof(char) * nrm.buflen ); + nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen); *(nrm.cur) = '\0'; nrm.op = GETOPERAND(query); - infix( &nrm, true ); - - PG_FREE_IF_COPY(query,0); - PG_RETURN_POINTER( nrm.buf ); + infix(&nrm, true); + + PG_FREE_IF_COPY(query, 0); + PG_RETURN_POINTER(nrm.buf); } /* @@ -664,38 +756,42 @@ qtxt_out(PG_FUNCTION_ARGS) { * which will be executed in non-leaf pages in index */ Datum -querytree(PG_FUNCTION_ARGS) { - QUERYTYPE *query = (QUERYTYPE*)DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0))); - INFIX nrm; - text *res; - ITEM *q; - int4 len; +querytree(PG_FUNCTION_ARGS) +{ + QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0))); + INFIX nrm; + text *res; + ITEM *q; + int4 len; - if ( query->size == 0 ) - elog(ERROR,"Empty"); + if (query->size == 0) + elog(ERROR, "Empty"); q = clean_NOT(GETQUERY(query), &len); - if ( ! q ) { - res = (text*) palloc( 1 + VARHDRSZ ); + if (!q) + { + res = (text *) palloc(1 + VARHDRSZ); VARATT_SIZEP(res) = 1 + VARHDRSZ; - *((char*)VARDATA(res)) = 'T'; - } else { + *((char *) VARDATA(res)) = 'T'; + } + else + { nrm.curpol = q; nrm.buflen = 32; - nrm.cur = nrm.buf = (char*)palloc( sizeof(char) * nrm.buflen ); + nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen); *(nrm.cur) = '\0'; nrm.op = GETOPERAND(query); - infix( &nrm, true ); + infix(&nrm, true); - res = (text*) palloc( nrm.cur-nrm.buf + VARHDRSZ ); - VARATT_SIZEP(res) = nrm.cur-nrm.buf + VARHDRSZ; - strncpy( VARDATA(res), nrm.buf, nrm.cur-nrm.buf ); + res = (text *) palloc(nrm.cur - nrm.buf + VARHDRSZ); + VARATT_SIZEP(res) = nrm.cur - nrm.buf + VARHDRSZ; + strncpy(VARDATA(res), nrm.buf, nrm.cur - nrm.buf); pfree(q); } - PG_FREE_IF_COPY(query,0); + PG_FREE_IF_COPY(query, 0); - PG_RETURN_POINTER( res ); + PG_RETURN_POINTER(res); } diff --git a/contrib/tsearch/query.h b/contrib/tsearch/query.h index 96117e78c8..a5f8d7deb8 100644 --- a/contrib/tsearch/query.h +++ b/contrib/tsearch/query.h @@ -9,41 +9,43 @@ * item in polish notation with back link * to left operand */ -typedef struct ITEM { - int2 type; - int2 left; - int4 val; +typedef struct ITEM +{ + int2 type; + int2 left; + int4 val; /* user-friendly value */ - uint16 distance; - uint16 length; -} ITEM; + uint16 distance; + uint16 length; +} ITEM; /* *Storage: * (len)(size)(array of ITEM)(array of operand in user-friendly form) */ -typedef struct { - int4 len; - int4 size; - char data[1]; -} QUERYTYPE; +typedef struct +{ + int4 len; + int4 size; + char data[1]; +} QUERYTYPE; #define HDRSIZEQT ( 2*sizeof(int4) ) #define COMPUTESIZE(size,lenofoperand) ( HDRSIZEQT + size * sizeof(ITEM) + lenofoperand ) #define GETQUERY(x) (ITEM*)( (char*)(x)+HDRSIZEQT ) -#define GETOPERAND(x) ( (char*)GETQUERY(x) + ((QUERYTYPE*)x)->size * sizeof(ITEM) ) +#define GETOPERAND(x) ( (char*)GETQUERY(x) + ((QUERYTYPE*)x)->size * sizeof(ITEM) ) #define ISOPERATOR(x) ( (x)=='!' || (x)=='&' || (x)=='|' || (x)=='(' || (x)==')' ) -#define END 0 -#define ERR 1 -#define VAL 2 -#define OPR 3 -#define OPEN 4 -#define CLOSE 5 -#define VALTRUE 6 /* for stop words */ -#define VALFALSE 7 +#define END 0 +#define ERR 1 +#define VAL 2 +#define OPR 3 +#define OPEN 4 +#define CLOSE 5 +#define VALTRUE 6 /* for stop words */ +#define VALFALSE 7 -bool execute( ITEM* curitem, void *checkval, - bool calcnot, bool (*chkcond)(void *checkval, ITEM* val )); +bool execute(ITEM * curitem, void *checkval, + bool calcnot, bool (*chkcond) (void *checkval, ITEM * val)); #endif diff --git a/contrib/tsearch/rewrite.c b/contrib/tsearch/rewrite.c index 590b15e7ca..9ad7a0a86a 100644 --- a/contrib/tsearch/rewrite.c +++ b/contrib/tsearch/rewrite.c @@ -19,52 +19,63 @@ #include "query.h" #include "rewrite.h" -typedef struct NODE { - struct NODE *left; +typedef struct NODE +{ + struct NODE *left; struct NODE *right; - ITEM* valnode; -} NODE; + ITEM *valnode; +} NODE; /* * make query tree from plain view of query */ -static NODE* -maketree(ITEM *in) { - NODE *node = (NODE*)palloc(sizeof(NODE)); +static NODE * +maketree(ITEM * in) +{ + NODE *node = (NODE *) palloc(sizeof(NODE)); + node->valnode = in; node->right = node->left = NULL; - if ( in->type == OPR ) { - node->right = maketree( in + 1 ); - if ( in->val != (int4)'!' ) - node->left = maketree( in + in->left ); + if (in->type == OPR) + { + node->right = maketree(in + 1); + if (in->val != (int4) '!') + node->left = maketree(in + in->left); } return node; } -typedef struct { - ITEM* ptr; - int4 len; - int4 cur; -} PLAINTREE; +typedef struct +{ + ITEM *ptr; + int4 len; + int4 cur; +} PLAINTREE; static void -plainnode(PLAINTREE *state, NODE* node) { - if ( state->cur == state->len ) { +plainnode(PLAINTREE * state, NODE * node) +{ + if (state->cur == state->len) + { state->len *= 2; - state->ptr=(ITEM*)repalloc( (void*)state->ptr, state->len*sizeof(ITEM) ); + state->ptr = (ITEM *) repalloc((void *) state->ptr, state->len * sizeof(ITEM)); } - memcpy( (void*)&(state->ptr[state->cur]), (void*)node->valnode, sizeof(ITEM) ); - if ( node->valnode->type == VAL ) { + memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(ITEM)); + if (node->valnode->type == VAL) state->cur++; - } else if ( node->valnode->val == (int4)'!' ) { - state->ptr[state->cur].left=1; + else if (node->valnode->val == (int4) '!') + { + state->ptr[state->cur].left = 1; state->cur++; plainnode(state, node->right); - } else { - int4 cur = state->cur; + } + else + { + int4 cur = state->cur; + state->cur++; plainnode(state, node->right); - state->ptr[cur].left = state->cur - cur; + state->ptr[cur].left = state->cur - cur; plainnode(state, node->left); } pfree(node); @@ -73,75 +84,96 @@ plainnode(PLAINTREE *state, NODE* node) { /* * make plain view of tree from 'normal' view of tree */ -static ITEM* -plaintree(NODE *root, int4 *len) { +static ITEM * +plaintree(NODE * root, int4 *len) +{ PLAINTREE pl; - pl.cur=0; - pl.len=16; - if ( root && (root->valnode->type == VAL || root->valnode->type == OPR) ) { - pl.ptr = (ITEM*)palloc( pl.len*sizeof(ITEM) ); + + pl.cur = 0; + pl.len = 16; + if (root && (root->valnode->type == VAL || root->valnode->type == OPR)) + { + pl.ptr = (ITEM *) palloc(pl.len * sizeof(ITEM)); plainnode(&pl, root); - } else { - pl.ptr = NULL; } + else + pl.ptr = NULL; *len = pl.cur; - return pl.ptr; + return pl.ptr; } static void -freetree(NODE *node) { - if ( !node ) return; - if ( node->left ) freetree(node->left); - if ( node->right ) freetree(node->right); - pfree( node ); +freetree(NODE * node) +{ + if (!node) + return; + if (node->left) + freetree(node->left); + if (node->right) + freetree(node->right); + pfree(node); } /* * clean tree for ! operator. - * It's usefull for debug, but in + * It's usefull for debug, but in * other case, such view is used with search in index. * Operator ! always return TRUE */ -static NODE* -clean_NOT_intree( NODE* node ) { - if ( node->valnode->type == VAL ) +static NODE * +clean_NOT_intree(NODE * node) +{ + if (node->valnode->type == VAL) return node; - if ( node->valnode->val == (int4)'!' ) { + if (node->valnode->val == (int4) '!') + { freetree(node); return NULL; } /* operator & or | */ - if ( node->valnode->val == (int4)'|' ) { - if ( (node->left=clean_NOT_intree(node->left)) == NULL || - (node->right=clean_NOT_intree(node->right)) == NULL ) { + if (node->valnode->val == (int4) '|') + { + if ((node->left = clean_NOT_intree(node->left)) == NULL || + (node->right = clean_NOT_intree(node->right)) == NULL) + { freetree(node); return NULL; } - } else { - NODE *res = node; - node->left=clean_NOT_intree(node->left); - node->right=clean_NOT_intree(node->right); - if ( node->left == NULL && node->right == NULL ) { + } + else + { + NODE *res = node; + + node->left = clean_NOT_intree(node->left); + node->right = clean_NOT_intree(node->right); + if (node->left == NULL && node->right == NULL) + { pfree(node); res = NULL; - } else if ( node->left == NULL ) { + } + else if (node->left == NULL) + { res = node->right; pfree(node); - } else if ( node->right == NULL ) { + } + else if (node->right == NULL) + { res = node->left; pfree(node); - } + } return res; } - return node; + return node; } -ITEM* -clean_NOT(ITEM* ptr, int4 *len) { - NODE *root = maketree( ptr ); - return plaintree(clean_NOT_intree(root), len); +ITEM * +clean_NOT(ITEM * ptr, int4 *len) +{ + NODE *root = maketree(ptr); + + return plaintree(clean_NOT_intree(root), len); } #define V_UNKNOWN 0 @@ -149,86 +181,112 @@ clean_NOT(ITEM* ptr, int4 *len) { #define V_FALSE 2 /* - * Clean query tree from values which is always in + * Clean query tree from values which is always in * text (stopword) - */ -static NODE* -clean_fakeval_intree( NODE* node, char *result ) { - char lresult = V_UNKNOWN, rresult = V_UNKNOWN; - if ( node->valnode->type == VAL ) + */ +static NODE * +clean_fakeval_intree(NODE * node, char *result) +{ + char lresult = V_UNKNOWN, + rresult = V_UNKNOWN; + + if (node->valnode->type == VAL) return node; - else if ( node->valnode->type == VALTRUE ) { - pfree( node ); + else if (node->valnode->type == VALTRUE) + { + pfree(node); *result = V_TRUE; return NULL; - } - + } + - if ( node->valnode->val == (int4)'!' ) { - node->right = clean_fakeval_intree( node->right, &rresult ); - if ( ! node->right ) { - *result = ( rresult == V_TRUE ) ? V_FALSE : V_TRUE; + if (node->valnode->val == (int4) '!') + { + node->right = clean_fakeval_intree(node->right, &rresult); + if (!node->right) + { + *result = (rresult == V_TRUE) ? V_FALSE : V_TRUE; freetree(node); return NULL; } - } else if ( node->valnode->val == (int4)'|' ) { - NODE *res = node; - node->left =clean_fakeval_intree(node->left, &lresult); - node->right=clean_fakeval_intree(node->right,&rresult); - if ( lresult == V_TRUE || rresult == V_TRUE ) { + } + else if (node->valnode->val == (int4) '|') + { + NODE *res = node; + + node->left = clean_fakeval_intree(node->left, &lresult); + node->right = clean_fakeval_intree(node->right, &rresult); + if (lresult == V_TRUE || rresult == V_TRUE) + { freetree(node); - *result=V_TRUE; + *result = V_TRUE; return NULL; - } else if ( lresult == V_FALSE && rresult == V_FALSE ) { + } + else if (lresult == V_FALSE && rresult == V_FALSE) + { freetree(node); - *result=V_FALSE; + *result = V_FALSE; return NULL; - } else if ( lresult == V_FALSE ) { + } + else if (lresult == V_FALSE) + { res = node->right; pfree(node); - } else if ( rresult == V_FALSE ) { + } + else if (rresult == V_FALSE) + { res = node->left; pfree(node); } - return res; - } else { - NODE *res = node; - node->left =clean_fakeval_intree(node->left, &lresult); - node->right=clean_fakeval_intree(node->right,&rresult); - if ( lresult == V_FALSE || rresult == V_FALSE ) { + return res; + } + else + { + NODE *res = node; + + node->left = clean_fakeval_intree(node->left, &lresult); + node->right = clean_fakeval_intree(node->right, &rresult); + if (lresult == V_FALSE || rresult == V_FALSE) + { freetree(node); - *result=V_FALSE; + *result = V_FALSE; return NULL; - } else if ( lresult == V_TRUE && rresult == V_TRUE ) { + } + else if (lresult == V_TRUE && rresult == V_TRUE) + { freetree(node); - *result=V_TRUE; + *result = V_TRUE; return NULL; - } else if ( lresult == V_TRUE ) { + } + else if (lresult == V_TRUE) + { res = node->right; pfree(node); - } else if ( rresult == V_TRUE ) { + } + else if (rresult == V_TRUE) + { res = node->left; pfree(node); } - return res; + return res; } - return node; + return node; } -ITEM* -clean_fakeval(ITEM* ptr, int4 *len) { - NODE *root = maketree( ptr ); - char result = V_UNKNOWN; - NODE *resroot; - +ITEM * +clean_fakeval(ITEM * ptr, int4 *len) +{ + NODE *root = maketree(ptr); + char result = V_UNKNOWN; + NODE *resroot; + resroot = clean_fakeval_intree(root, &result); - if ( result != V_UNKNOWN ) { - elog(ERROR,"Your query contained only stopword(s), ignored"); + if (result != V_UNKNOWN) + { + elog(ERROR, "Your query contained only stopword(s), ignored"); *len = 0; return NULL; } - - return plaintree(resroot, len); -} - + return plaintree(resroot, len); +} diff --git a/contrib/tsearch/rewrite.h b/contrib/tsearch/rewrite.h index 863ff9b672..c53e1c6dac 100644 --- a/contrib/tsearch/rewrite.h +++ b/contrib/tsearch/rewrite.h @@ -1,7 +1,6 @@ #ifndef __REWRITE_H__ #define __REWRITE_H__ -ITEM* clean_NOT(ITEM* ptr, int4 *len); -ITEM* clean_fakeval(ITEM* ptr, int4 *len); - +ITEM *clean_NOT(ITEM * ptr, int4 *len); +ITEM *clean_fakeval(ITEM * ptr, int4 *len); #endif diff --git a/contrib/tsearch/txtidx.c b/contrib/tsearch/txtidx.c index e1abd54d3b..6e1fe0edae 100644 --- a/contrib/tsearch/txtidx.c +++ b/contrib/tsearch/txtidx.c @@ -17,9 +17,9 @@ #include "utils/pg_locale.h" -#include <ctype.h> /* tolower */ -#include "txtidx.h" -#include "query.h" +#include <ctype.h> /* tolower */ +#include "txtidx.h" +#include "query.h" #include "deflex.h" #include "parser.h" @@ -27,51 +27,58 @@ #include "morph.h" PG_FUNCTION_INFO_V1(txtidx_in); -Datum txtidx_in(PG_FUNCTION_ARGS); +Datum txtidx_in(PG_FUNCTION_ARGS); + PG_FUNCTION_INFO_V1(txtidx_out); -Datum txtidx_out(PG_FUNCTION_ARGS); +Datum txtidx_out(PG_FUNCTION_ARGS); PG_FUNCTION_INFO_V1(txt2txtidx); -Datum txt2txtidx(PG_FUNCTION_ARGS); +Datum txt2txtidx(PG_FUNCTION_ARGS); PG_FUNCTION_INFO_V1(tsearch); -Datum tsearch(PG_FUNCTION_ARGS); +Datum tsearch(PG_FUNCTION_ARGS); PG_FUNCTION_INFO_V1(txtidxsize); -Datum txtidxsize(PG_FUNCTION_ARGS); +Datum txtidxsize(PG_FUNCTION_ARGS); /* * in/out text index type */ static char *BufferStr; static int -compareentry( const void * a, const void * b ) { - if ( ((WordEntry*)a)->len == ((WordEntry*)b)->len ) { - return strncmp( - &BufferStr[((WordEntry*)a)->pos], - &BufferStr[((WordEntry*)b)->pos], - ((WordEntry*)b)->len ); +compareentry(const void *a, const void *b) +{ + if (((WordEntry *) a)->len == ((WordEntry *) b)->len) + { + return strncmp( + &BufferStr[((WordEntry *) a)->pos], + &BufferStr[((WordEntry *) b)->pos], + ((WordEntry *) b)->len); } - return ( ((WordEntry*)a)->len > ((WordEntry*)b)->len ) ? 1 : -1; + return (((WordEntry *) a)->len > ((WordEntry *) b)->len) ? 1 : -1; } static int -uniqueentry( WordEntry* a, int4 l, char *buf, int4 *outbuflen ) { - WordEntry *ptr, *res; +uniqueentry(WordEntry * a, int4 l, char *buf, int4 *outbuflen) +{ + WordEntry *ptr, + *res; res = a; *outbuflen = res->len; - if ( l == 1 ) + if (l == 1) return l; - ptr = a+1; + ptr = a + 1; BufferStr = buf; - qsort((void*)a, l, sizeof(int4), compareentry ); + qsort((void *) a, l, sizeof(int4), compareentry); *outbuflen = res->len; - while (ptr - a < l) { - if ( ! (ptr->len == res->len && - strncmp(&buf[ ptr->pos ], &buf[ res->pos ],res->len) == 0 ) ) { + while (ptr - a < l) + { + if (!(ptr->len == res->len && + strncmp(&buf[ptr->pos], &buf[res->pos], res->len) == 0)) + { res++; res->len = ptr->len; res->pos = ptr->pos; @@ -81,10 +88,10 @@ uniqueentry( WordEntry* a, int4 l, char *buf, int4 *outbuflen ) { ptr++; } return res + 1 - a; -} +} #define WAITWORD 1 -#define WAITENDWORD 2 +#define WAITENDWORD 2 #define WAITNEXTCHAR 3 #define WAITENDCMPLX 4 @@ -92,81 +99,105 @@ uniqueentry( WordEntry* a, int4 l, char *buf, int4 *outbuflen ) { do { \ if ( state->curpos - state->word == state->len ) \ { \ - int4 clen = state->curpos - state->word; \ - state->len *= 2; \ - state->word = (char*)repalloc( (void*)state->word, state->len ); \ - state->curpos = state->word + clen; \ - } \ + int4 clen = state->curpos - state->word; \ + state->len *= 2; \ + state->word = (char*)repalloc( (void*)state->word, state->len ); \ + state->curpos = state->word + clen; \ + } \ } while (0) int4 -gettoken_txtidx( TI_IN_STATE *state ) { - int4 oldstate = 0; +gettoken_txtidx(TI_IN_STATE * state) +{ + int4 oldstate = 0; + state->curpos = state->word; state->state = WAITWORD; - while( 1 ) { - if ( state->state == WAITWORD ) { - if ( *(state->prsbuf) == '\0' ) { + while (1) + { + if (state->state == WAITWORD) + { + if (*(state->prsbuf) == '\0') return 0; - } else if ( *(state->prsbuf) == '\'' ) { - state->state = WAITENDCMPLX; - } else if ( *(state->prsbuf) == '\\' ) { + else if (*(state->prsbuf) == '\'') + state->state = WAITENDCMPLX; + else if (*(state->prsbuf) == '\\') + { state->state = WAITNEXTCHAR; oldstate = WAITENDWORD; - } else if ( state->oprisdelim && ISOPERATOR( *(state->prsbuf) ) ) { + } + else if (state->oprisdelim && ISOPERATOR(*(state->prsbuf))) elog(ERROR, "Syntax error"); - } else if ( *(state->prsbuf) != ' ' ) { + else if (*(state->prsbuf) != ' ') + { *(state->curpos) = *(state->prsbuf); state->curpos++; state->state = WAITENDWORD; } - } else if ( state->state == WAITNEXTCHAR ) { - if ( *(state->prsbuf) == '\0' ) { - elog(ERROR,"There is no escaped character"); - } else { + } + else if (state->state == WAITNEXTCHAR) + { + if (*(state->prsbuf) == '\0') + elog(ERROR, "There is no escaped character"); + else + { RESIZEPRSBUF; *(state->curpos) = *(state->prsbuf); state->curpos++; state->state = oldstate; } - } else if ( state->state == WAITENDWORD ) { - if ( *(state->prsbuf) == '\\' ) { + } + else if (state->state == WAITENDWORD) + { + if (*(state->prsbuf) == '\\') + { state->state = WAITNEXTCHAR; oldstate = WAITENDWORD; - } else if ( *(state->prsbuf) == ' ' || *(state->prsbuf) == '\0' || - ( state->oprisdelim && ISOPERATOR( *(state->prsbuf) ) ) ) { + } + else if (*(state->prsbuf) == ' ' || *(state->prsbuf) == '\0' || + (state->oprisdelim && ISOPERATOR(*(state->prsbuf)))) + { RESIZEPRSBUF; - if ( state->curpos == state->word ) + if (state->curpos == state->word) elog(ERROR, "Syntax error"); *(state->curpos) = '\0'; return 1; - } else { + } + else + { RESIZEPRSBUF; *(state->curpos) = *(state->prsbuf); state->curpos++; } - } else if ( state->state == WAITENDCMPLX ) { - if ( *(state->prsbuf) == '\'' ) { + } + else if (state->state == WAITENDCMPLX) + { + if (*(state->prsbuf) == '\'') + { RESIZEPRSBUF; *(state->curpos) = '\0'; - if ( state->curpos == state->word ) + if (state->curpos == state->word) elog(ERROR, "Syntax error"); state->prsbuf++; return 1; - } else if ( *(state->prsbuf) == '\\' ) { + } + else if (*(state->prsbuf) == '\\') + { state->state = WAITNEXTCHAR; oldstate = WAITENDCMPLX; - } else if ( *(state->prsbuf) == '\0' ) { - elog(ERROR,"Syntax error"); - } else { + } + else if (*(state->prsbuf) == '\0') + elog(ERROR, "Syntax error"); + else + { RESIZEPRSBUF; *(state->curpos) = *(state->prsbuf); state->curpos++; } - } else { + } + else elog(ERROR, "Inner bug :("); - } state->prsbuf++; } @@ -174,92 +205,111 @@ gettoken_txtidx( TI_IN_STATE *state ) { } Datum -txtidx_in(PG_FUNCTION_ARGS) { - char *buf = (char*)PG_GETARG_POINTER(0); +txtidx_in(PG_FUNCTION_ARGS) +{ + char *buf = (char *) PG_GETARG_POINTER(0); TI_IN_STATE state; - WordEntry *arr; - int4 len=0, totallen = 64; - txtidx *in; - char *tmpbuf, *cur; - int4 i,buflen = 256; + WordEntry *arr; + int4 len = 0, + totallen = 64; + txtidx *in; + char *tmpbuf, + *cur; + int4 i, + buflen = 256; state.prsbuf = buf; - state.len=32; - state.word = (char*)palloc( state.len ); + state.len = 32; + state.word = (char *) palloc(state.len); state.oprisdelim = false; - arr = (WordEntry*)palloc( sizeof(WordEntry) * totallen ); - cur = tmpbuf = (char*)palloc( buflen ); - while( gettoken_txtidx( &state ) ) { - if ( len == totallen ) { + arr = (WordEntry *) palloc(sizeof(WordEntry) * totallen); + cur = tmpbuf = (char *) palloc(buflen); + while (gettoken_txtidx(&state)) + { + if (len == totallen) + { totallen *= 2; - arr = (WordEntry*)repalloc( (void*)arr, sizeof(int4)*totallen ); + arr = (WordEntry *) repalloc((void *) arr, sizeof(int4) * totallen); } - while ( cur-tmpbuf + state.curpos - state.word >= buflen ) { - int4 dist = cur-tmpbuf; + while (cur - tmpbuf + state.curpos - state.word >= buflen) + { + int4 dist = cur - tmpbuf; + buflen *= 2; - tmpbuf = (char*)repalloc( (void*)tmpbuf, buflen ); - cur = tmpbuf+dist; + tmpbuf = (char *) repalloc((void *) tmpbuf, buflen); + cur = tmpbuf + dist; } - if ( state.curpos - state.word > 0xffff ) - elog(ERROR,"Word is too long"); + if (state.curpos - state.word > 0xffff) + elog(ERROR, "Word is too long"); arr[len].len = state.curpos - state.word; - if ( cur - tmpbuf > 0xffff ) - elog(ERROR,"Too long value"); + if (cur - tmpbuf > 0xffff) + elog(ERROR, "Too long value"); arr[len].pos = cur - tmpbuf; - memcpy( (void*)cur, (void*)state.word, arr[len].len ); + memcpy((void *) cur, (void *) state.word, arr[len].len); cur += arr[len].len; len++; } pfree(state.word); - if ( !len ) - elog(ERROR,"Void value"); + if (!len) + elog(ERROR, "Void value"); - len = uniqueentry( arr, len, tmpbuf, &buflen ); - totallen = CALCDATASIZE( len, buflen ); - in = (txtidx*)palloc( totallen ); + len = uniqueentry(arr, len, tmpbuf, &buflen); + totallen = CALCDATASIZE(len, buflen); + in = (txtidx *) palloc(totallen); in->len = totallen; in->size = len; cur = STRPTR(in); - for(i=0;i<len;i++) { - memcpy( (void*)cur, (void*)&tmpbuf[ arr[i].pos ], arr[i].len ); + for (i = 0; i < len; i++) + { + memcpy((void *) cur, (void *) &tmpbuf[arr[i].pos], arr[i].len); arr[i].pos = cur - STRPTR(in); cur += arr[i].len; } pfree(tmpbuf); - memcpy( (void*)ARRPTR(in), (void*)arr, sizeof(int4)*len ); - pfree( arr ); - PG_RETURN_POINTER( in ); + memcpy((void *) ARRPTR(in), (void *) arr, sizeof(int4) * len); + pfree(arr); + PG_RETURN_POINTER(in); } Datum -txtidxsize(PG_FUNCTION_ARGS) { - txtidx *in=(txtidx*)DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0))); - int4 ret = in->size; - PG_FREE_IF_COPY(in,0); - PG_RETURN_INT32( ret ); +txtidxsize(PG_FUNCTION_ARGS) +{ + txtidx *in = (txtidx *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0))); + int4 ret = in->size; + + PG_FREE_IF_COPY(in, 0); + PG_RETURN_INT32(ret); } Datum -txtidx_out(PG_FUNCTION_ARGS) { - txtidx *out=(txtidx*)DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0))); - char *outbuf; - int4 i,j,lenbuf = STRSIZE(out) + 1 /* \0 */ + out->size*2 /* '' */ + out->size - 1 /* space */; - WordEntry *ptr = ARRPTR(out); - char *curin, *curout; - - curout = outbuf = (char*) palloc( lenbuf ); - for(i=0;i<out->size;i++) { +txtidx_out(PG_FUNCTION_ARGS) +{ + txtidx *out = (txtidx *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0))); + char *outbuf; + int4 i, + j, + lenbuf = STRSIZE(out) + 1 /* \0 */ + out->size * 2 /* '' */ + out->size - 1 /* space */ ; + WordEntry *ptr = ARRPTR(out); + char *curin, + *curout; + + curout = outbuf = (char *) palloc(lenbuf); + for (i = 0; i < out->size; i++) + { curin = STRPTR(out) + ptr->pos; - if ( i!= 0 ) + if (i != 0) *curout++ = ' '; *curout++ = '\''; j = ptr->len; - while( j-- ) { - if ( *curin == '\'' ) { - int4 pos = curout - outbuf; - outbuf = (char*)repalloc((void*)outbuf, ++lenbuf ); + while (j--) + { + if (*curin == '\'') + { + int4 pos = curout - outbuf; + + outbuf = (char *) repalloc((void *) outbuf, ++lenbuf); curout = outbuf + pos; *curout++ = '\\'; } @@ -268,171 +318,198 @@ txtidx_out(PG_FUNCTION_ARGS) { *curout++ = '\''; ptr++; } - outbuf[ lenbuf-1 ] = '\0'; - PG_FREE_IF_COPY(out,0); - PG_RETURN_POINTER( outbuf ); + outbuf[lenbuf - 1] = '\0'; + PG_FREE_IF_COPY(out, 0); + PG_RETURN_POINTER(outbuf); } -typedef struct { - uint16 len; - char* word; -} WORD; +typedef struct +{ + uint16 len; + char *word; +} WORD; -typedef struct { - WORD *words; - int4 lenwords; - int4 curwords; -} PRSTEXT; +typedef struct +{ + WORD *words; + int4 lenwords; + int4 curwords; +} PRSTEXT; /* * Parse text to lexems */ static void -parsetext( PRSTEXT *prs, char *buf, int4 buflen ) { - int type,lenlemm; - char *ptr,*ptrw; - char *lemm; - - start_parse_str( buf, buflen ); - while( (type=tsearch_yylex()) != 0 ) { - if ( prs->curwords == prs->lenwords ) { +parsetext(PRSTEXT * prs, char *buf, int4 buflen) +{ + int type, + lenlemm; + char *ptr, + *ptrw; + char *lemm; + + start_parse_str(buf, buflen); + while ((type = tsearch_yylex()) != 0) + { + if (prs->curwords == prs->lenwords) + { prs->lenwords *= 2; - prs->words = (WORD*)repalloc( (void*)prs->words, prs->lenwords * sizeof(WORD) ); + prs->words = (WORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(WORD)); } - if ( tokenlen>0xffff ) { + if (tokenlen > 0xffff) + { end_parse(); elog(ERROR, "Word is too long"); } lenlemm = tokenlen; - lemm = lemmatize( token, &lenlemm, type ); - - if ( ! lemm ) + lemm = lemmatize(token, &lenlemm, type); + + if (!lemm) continue; - if ( lemm != token ) { - prs->words[ prs->curwords ].len = lenlemm; - prs->words[ prs->curwords ].word = lemm; - } else { - prs->words[ prs->curwords ].len = lenlemm; - ptrw = prs->words[ prs->curwords ].word = (char*)palloc( lenlemm ); + if (lemm != token) + { + prs->words[prs->curwords].len = lenlemm; + prs->words[prs->curwords].word = lemm; + } + else + { + prs->words[prs->curwords].len = lenlemm; + ptrw = prs->words[prs->curwords].word = (char *) palloc(lenlemm); ptr = token; - while( ptr-token < lenlemm ) { - *ptrw = tolower( (unsigned char) *ptr ); - ptr++; ptrw++; + while (ptr - token < lenlemm) + { + *ptrw = tolower((unsigned char) *ptr); + ptr++; + ptrw++; } } - prs->curwords++; + prs->curwords++; } end_parse(); } static int -compareWORD( const void * a, const void * b ) { - if ( ((WORD*)a)->len == ((WORD*)b)->len ) - return strncmp( - ((WORD*)a)->word, - ((WORD*)b)->word, - ((WORD*)b)->len ); - return ( ((WORD*)a)->len > ((WORD*)b)->len ) ? 1 : -1; +compareWORD(const void *a, const void *b) +{ + if (((WORD *) a)->len == ((WORD *) b)->len) + return strncmp( + ((WORD *) a)->word, + ((WORD *) b)->word, + ((WORD *) b)->len); + return (((WORD *) a)->len > ((WORD *) b)->len) ? 1 : -1; } static int -uniqueWORD( WORD* a, int4 l ) { - WORD *ptr, *res; +uniqueWORD(WORD * a, int4 l) +{ + WORD *ptr, + *res; - if ( l == 1 ) + if (l == 1) return l; res = a; ptr = a + 1; - qsort((void*)a, l, sizeof(WORD), compareWORD ); + qsort((void *) a, l, sizeof(WORD), compareWORD); - while (ptr - a < l) { - if ( ! (ptr->len == res->len && - strncmp(ptr->word, res->word ,res->len) == 0 ) ) { + while (ptr - a < l) + { + if (!(ptr->len == res->len && + strncmp(ptr->word, res->word, res->len) == 0)) + { res++; - res->len = ptr->len; + res->len = ptr->len; res->word = ptr->word; - } else { - pfree(ptr->word); } + else + pfree(ptr->word); ptr++; } return res + 1 - a; -} +} /* * make value of txtidx */ static txtidx * -makevalue( PRSTEXT *prs ) { - int4 i, lenstr=0, totallen; - txtidx *in; - WordEntry *ptr; - char *str,*cur; - - prs->curwords = uniqueWORD( prs->words, prs->curwords ); - for(i=0;i<prs->curwords;i++) +makevalue(PRSTEXT * prs) +{ + int4 i, + lenstr = 0, + totallen; + txtidx *in; + WordEntry *ptr; + char *str, + *cur; + + prs->curwords = uniqueWORD(prs->words, prs->curwords); + for (i = 0; i < prs->curwords; i++) lenstr += prs->words[i].len; - totallen = CALCDATASIZE( prs->curwords, lenstr ); - in = (txtidx*)palloc( totallen ); + totallen = CALCDATASIZE(prs->curwords, lenstr); + in = (txtidx *) palloc(totallen); in->len = totallen; in->size = prs->curwords; ptr = ARRPTR(in); cur = str = STRPTR(in); - for(i=0;i<prs->curwords;i++) { + for (i = 0; i < prs->curwords; i++) + { ptr->len = prs->words[i].len; - if ( cur-str > 0xffff ) - elog(ERROR,"Value is too big"); - ptr->pos = cur-str; + if (cur - str > 0xffff) + elog(ERROR, "Value is too big"); + ptr->pos = cur - str; ptr++; - memcpy( (void*)cur, (void*)prs->words[i].word, prs->words[i].len ); + memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len); pfree(prs->words[i].word); - cur += prs->words[i].len; + cur += prs->words[i].len; } pfree(prs->words); return in; } Datum -txt2txtidx(PG_FUNCTION_ARGS) { - text *in = (text*)DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0))); - PRSTEXT prs; - txtidx *out = NULL; - - prs.lenwords = 32; - prs.curwords = 0; - prs.words = (WORD*)palloc(sizeof(WORD)*prs.lenwords); +txt2txtidx(PG_FUNCTION_ARGS) +{ + text *in = (text *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0))); + PRSTEXT prs; + txtidx *out = NULL; + + prs.lenwords = 32; + prs.curwords = 0; + prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords); initmorph(); - parsetext( &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ ); - PG_FREE_IF_COPY(in,0); - - if ( prs.curwords ) { - out = makevalue( &prs ); - PG_RETURN_POINTER( out ); + parsetext(&prs, VARDATA(in), VARSIZE(in) - VARHDRSZ); + PG_FREE_IF_COPY(in, 0); + + if (prs.curwords) + { + out = makevalue(&prs); + PG_RETURN_POINTER(out); } pfree(prs.words); PG_RETURN_NULL(); -} +} /* * Trigger */ Datum -tsearch(PG_FUNCTION_ARGS) { +tsearch(PG_FUNCTION_ARGS) +{ TriggerData *trigdata; Trigger *trigger; - Relation rel; - HeapTuple rettuple = NULL; - int numidxattr,i; - PRSTEXT prs; - Datum datum = (Datum)0; + Relation rel; + HeapTuple rettuple = NULL; + int numidxattr, + i; + PRSTEXT prs; + Datum datum = (Datum) 0; if (!CALLED_AS_TRIGGER(fcinfo)) @@ -448,63 +525,69 @@ tsearch(PG_FUNCTION_ARGS) { rettuple = trigdata->tg_trigtuple; else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event)) rettuple = trigdata->tg_newtuple; - else + else elog(ERROR, "TSearch: Unknown event"); trigger = trigdata->tg_trigger; rel = trigdata->tg_relation; - if ( trigger->tgnargs < 2 ) - elog(ERROR,"TSearch: format tsearch(txtidx_field, text_field1,...)"); + if (trigger->tgnargs < 2) + elog(ERROR, "TSearch: format tsearch(txtidx_field, text_field1,...)"); numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]); - if ( numidxattr < 0 ) - elog(ERROR,"TSearch: Can not find txtidx_field"); + if (numidxattr < 0) + elog(ERROR, "TSearch: Can not find txtidx_field"); - prs.lenwords = 32; - prs.curwords = 0; - prs.words = (WORD*)palloc(sizeof(WORD)*prs.lenwords); + prs.lenwords = 32; + prs.curwords = 0; + prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords); initmorph(); - /* find all words in indexable column */ - for(i=1; i<trigger->tgnargs; i++) { - int4 numattr; - text *txt_toasted, *txt; - bool isnull; - Oid oidtype; + /* find all words in indexable column */ + for (i = 1; i < trigger->tgnargs; i++) + { + int4 numattr; + text *txt_toasted, + *txt; + bool isnull; + Oid oidtype; numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]); oidtype = SPI_gettypeid(rel->rd_att, numattr); - if ( numattr<0 || ( ! ( oidtype==TEXTOID || oidtype==VARCHAROID ) ) ) { + if (numattr < 0 || (!(oidtype == TEXTOID || oidtype == VARCHAROID))) + { elog(NOTICE, "TSearch: can not find field '%s'", trigger->tgargs[i]); continue; } - txt_toasted = (text*)DatumGetPointer( SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull ) ); - if ( isnull ) + txt_toasted = (text *) DatumGetPointer(SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull)); + if (isnull) continue; - txt = (text*)DatumGetPointer( PG_DETOAST_DATUM( PointerGetDatum ( txt_toasted ) ) ); + txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted))); - parsetext( &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ ); - if ( txt != txt_toasted ) + parsetext(&prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ); + if (txt != txt_toasted) pfree(txt); } /* make txtidx value */ - if (prs.curwords) { - datum = PointerGetDatum( makevalue( &prs ) ); - rettuple = SPI_modifytuple( rel, rettuple, 1, &numidxattr, - &datum, NULL ); + if (prs.curwords) + { + datum = PointerGetDatum(makevalue(&prs)); + rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr, + &datum, NULL); pfree(DatumGetPointer(datum)); - } else { - char nulls = 'n'; - pfree( prs.words ); - rettuple = SPI_modifytuple( rel, rettuple, 1, &numidxattr, - &datum, &nulls ); } - + else + { + char nulls = 'n'; + + pfree(prs.words); + rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr, + &datum, &nulls); + } + if (rettuple == NULL) elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result); - return PointerGetDatum( rettuple ); + return PointerGetDatum(rettuple); } - diff --git a/contrib/tsearch/txtidx.h b/contrib/tsearch/txtidx.h index 9a586ce518..d33bdf9d6d 100644 --- a/contrib/tsearch/txtidx.h +++ b/contrib/tsearch/txtidx.h @@ -14,33 +14,34 @@ #include "utils/builtins.h" #include "storage/bufpage.h" -typedef struct { - uint16 len; - uint16 pos; -} WordEntry; - -typedef struct { - int4 len; - int4 size; - char data[1]; -} txtidx; - -#define DATAHDRSIZE (sizeof(int4)*2) -#define CALCDATASIZE(x, lenstr) ( x * sizeof(WordEntry) + DATAHDRSIZE + lenstr ) -#define ARRPTR(x) ( (WordEntry*) ( (char*)x + DATAHDRSIZE ) ) -#define STRPTR(x) ( (char*)x + DATAHDRSIZE + ( sizeof(WordEntry) * ((txtidx*)x)->size ) ) -#define STRSIZE(x) ( ((txtidx*)x)->len - DATAHDRSIZE - ( sizeof(WordEntry) * ((txtidx*)x)->size ) ) - -typedef struct { - char *prsbuf; - char *word; - char *curpos; - int4 len; - int4 state; - bool oprisdelim; -} TI_IN_STATE; - -int4 gettoken_txtidx( TI_IN_STATE *state ); - +typedef struct +{ + uint16 len; + uint16 pos; +} WordEntry; + +typedef struct +{ + int4 len; + int4 size; + char data[1]; +} txtidx; + +#define DATAHDRSIZE (sizeof(int4)*2) +#define CALCDATASIZE(x, lenstr) ( x * sizeof(WordEntry) + DATAHDRSIZE + lenstr ) +#define ARRPTR(x) ( (WordEntry*) ( (char*)x + DATAHDRSIZE ) ) +#define STRPTR(x) ( (char*)x + DATAHDRSIZE + ( sizeof(WordEntry) * ((txtidx*)x)->size ) ) +#define STRSIZE(x) ( ((txtidx*)x)->len - DATAHDRSIZE - ( sizeof(WordEntry) * ((txtidx*)x)->size ) ) + +typedef struct +{ + char *prsbuf; + char *word; + char *curpos; + int4 len; + int4 state; + bool oprisdelim; +} TI_IN_STATE; + +int4 gettoken_txtidx(TI_IN_STATE * state); #endif - |