diff options
Diffstat (limited to 'contrib/fulltextindex')
-rw-r--r-- | contrib/fulltextindex/Makefile | 16 | ||||
-rw-r--r-- | contrib/fulltextindex/README.fti | 200 | ||||
-rw-r--r-- | contrib/fulltextindex/TODO | 1 | ||||
-rw-r--r-- | contrib/fulltextindex/WARNING | 25 | ||||
-rw-r--r-- | contrib/fulltextindex/fti.c | 468 | ||||
-rw-r--r-- | contrib/fulltextindex/fti.pl | 212 | ||||
-rw-r--r-- | contrib/fulltextindex/fti.sql.in | 6 | ||||
-rwxr-xr-x | contrib/fulltextindex/timings.sh | 350 | ||||
-rw-r--r-- | contrib/fulltextindex/uninstall.sql | 4 |
9 files changed, 0 insertions, 1282 deletions
diff --git a/contrib/fulltextindex/Makefile b/contrib/fulltextindex/Makefile deleted file mode 100644 index 981c91861a..0000000000 --- a/contrib/fulltextindex/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -# $PostgreSQL: pgsql/contrib/fulltextindex/Makefile,v 1.14 2005/09/27 17:13:02 tgl Exp $ - -MODULES = fti -DATA_built = fti.sql -DOCS = README.fti -SCRIPTS = fti.pl - -ifdef USE_PGXS -PGXS := $(shell pg_config --pgxs) -include $(PGXS) -else -subdir = contrib/fulltextindex -top_builddir = ../.. -include $(top_builddir)/src/Makefile.global -include $(top_srcdir)/contrib/contrib-global.mk -endif diff --git a/contrib/fulltextindex/README.fti b/contrib/fulltextindex/README.fti deleted file mode 100644 index c2a24096c3..0000000000 --- a/contrib/fulltextindex/README.fti +++ /dev/null @@ -1,200 +0,0 @@ -An attempt at some sort of Full Text Indexing for PostgreSQL. - -The included software is an attempt to add some sort of Full Text Indexing -support to PostgreSQL. I mean by this that we can ask questions like: - - Give me all rows that have 'still' and 'nash' in the 'artist' or 'title' - fields. - -Ofcourse we can write this as: - - select * from cds where (artist ~* 'stills' or title ~* 'stills') and - (artist ~* 'nash' or title ~* 'nash'); - -But this does not use any indices, and therefore, if your database -gets very large, it will not have very high performance (the above query -requires a sequential scan of the table). - -The approach used by this add-on is to define a trigger on the table and -columns you want to do these queries on. On every insert to the table, it -takes the value in the specified columns, breaks the text in these columns -up into pieces, and stores all sub-strings into another table, together -with a reference to the row in the original table that contained this -sub-string (it uses the oid of that row). - -By now creating an index over the 'fti-table', we can search for -substrings that occur in the original table. By making a join between -the fti-table and the orig-table, we can get the actual rows we want -(this can also be done by using subselects - but subselects are currently -inefficient in PostgreSQL, and maybe there're other ways too). - -The trigger code also allows an array called StopWords, that prevents -certain words from being indexed. - -As an example we take the previous query, where we assume we have all -sub-strings in the table 'cds-fti': - - select c.* - from cds c, cds-fti f1, cds-fti f2 - where f1.string ~ '^stills' and - f2.string ~ '^nash' and - f1.id = c.oid and - f2.id = c.oid ; - -We can use the ~ (case-sensitive regular expression) here, because of -the way sub-strings are built: from right to left, ie. house -> 'se' + -'use' + 'ouse' + 'house'. If a ~ search starts with a ^ (match start of -string), btree indices can be used by PostgreSQL. - -Now, how do we create the trigger that maintains the fti-table? First: the -fti-table should have the following schema: - - create cds-fti ( string varchar(N), id oid ) without oids; - -Don't change the *names* of the columns, the varchar() can in fact also -be of text-type. If you do use varchar, make sure the largest possible -sub-string will fit. - -The create the function that contains the trigger:: - - create function fti() returns trigger as - '/path/to/fti.so' language C; - -And finally define the trigger on the 'cds' table: - - create trigger cds-fti-trigger after update or insert or delete on cds - for each row execute procedure fti(cds-fti, artist, title); - -Here, the trigger will be defined on table 'cds', it will create -sub-strings from the fields 'artist' and 'title', and it will place -those sub-strings in the table 'cds-fti'. - -Now populate the table 'cds'. This will also populate the table 'cds-fti'. -It's fastest to populate the table *before* you create the indices. Use the -supplied 'fti.pl' to assist you with this. - -Before you start using the system, you should at least have the following -indices: - - create index cds-fti-idx on cds-fti (string); -- String matching - create index cds-fti-idx on cds-fti (id); -- For deleting a cds row - create index cds-oid-idx on cds (oid); -- For joining cds to cds-fti - -To get the most performance out of this, you should have 'cds-fti' -clustered on disk, ie. all rows with the same sub-strings should be -close to each other. There are 3 ways of doing this: - -1. After you have created the indices, execute 'cluster cds-fti-idx on cds-fti'. -2. Do a 'select * into tmp-table from cds-fti order by string' *before* - you create the indices, then 'drop table cds-fti' and - 'alter table tmp-table rename to cds-fti' -3. *Before* creating indices, dump the contents of the cds-fti table using - 'pg_dump -a -t cds-fti dbase-name', remove the \connect - from the beginning and the \. from the end, and sort it using the - UNIX 'sort' program, and reload the data. - -Method 1 is very slow, 2 a lot faster, and for very large tables, 3 is -preferred. - - -BENCH: -~~~~~ - -Maarten Boekhold <maartenb@dutepp0.et.tudelft.nl> -The following data was generated by the 'timings.sh' script included -in this directory. It uses a very large table with music-related -articles as a source for the fti-table. The tables used are: - -product : contains product information : 540.429 rows -artist_fti : fti table for product : 4.501.321 rows -clustered : same as above, only clustered : 4.501.321 rows - -A sequential scan of the artist_fti table (and thus also the clustered table) -takes around 6:16 minutes.... - -Unfortunately I cannot provide anybody else with this test-data, since I -am not allowed to redistribute the data (it's a database being sold by -a couple of wholesale companies). Anyways, it's megabytes, so you probably -wouldn't want it in this distribution anyways. - -I haven't tested this with less data. - -The test-machine is a Pentium 133, 64 MB, Linux 2.0.32 with the database -on a 'QUANTUM BIGFOOT_CY4320A, 4134MB w/67kB Cache, CHS=8960/15/63'. This -is a very slow disk. - -The postmaster was running with: - - postmaster -i -b /usr/local/pgsql/bin/postgres -S 1024 -B 256 \ - -o -o /usr/local/pgsql/debug-output -F -d 1 - -('trashing' means a 'select count(*) from artist_fti' to completely trash -any disk-caches and buffers....) - -TESTING ON UNCLUSTERED FTI -trashing -1: ^lapton and ^ric : 0.050u 0.000s 5m37.484s 0.01% -2: ^lapton and ^ric : 0.050u 0.030s 5m32.447s 0.02% -3: ^lapton and ^ric : 0.030u 0.020s 5m28.822s 0.01% -trashing -1: ^lling and ^tones : 0.020u 0.030s 0m54.313s 0.09% -2: ^lling and ^tones : 0.040u 0.030s 0m5.057s 1.38% -3: ^lling and ^tones : 0.010u 0.050s 0m2.072s 2.89% -trashing -1: ^aughan and ^evie : 0.020u 0.030s 0m26.241s 0.19% -2: ^aughan and ^evie : 0.050u 0.010s 0m1.316s 4.55% -3: ^aughan and ^evie : 0.030u 0.020s 0m1.029s 4.85% -trashing -1: ^lling : 0.040u 0.010s 0m55.104s 0.09% -2: ^lling : 0.030u 0.030s 0m4.716s 1.27% -3: ^lling : 0.040u 0.010s 0m2.157s 2.31% -trashing -1: ^stev and ^ray and ^vaugh : 0.040u 0.000s 1m5.630s 0.06% -2: ^stev and ^ray and ^vaugh : 0.050u 0.020s 1m3.561s 0.11% -3: ^stev and ^ray and ^vaugh : 0.050u 0.010s 1m5.923s 0.09% -trashing -1: ^lling (no join) : 0.050u 0.020s 0m24.139s 0.28% -2: ^lling (no join) : 0.040u 0.040s 0m1.087s 7.35% -3: ^lling (no join) : 0.020u 0.030s 0m0.772s 6.48% -trashing -1: ^vaughan (no join) : 0.040u 0.030s 0m9.075s 0.77% -2: ^vaughan (no join) : 0.030u 0.010s 0m0.609s 6.56% -3: ^vaughan (no join) : 0.040u 0.010s 0m0.503s 9.94% -trashing -1: ^rol (no join) : 0.020u 0.030s 0m49.898s 0.10% -2: ^rol (no join) : 0.030u 0.020s 0m3.136s 1.59% -3: ^rol (no join) : 0.030u 0.020s 0m1.231s 4.06% - -TESTING ON CLUSTERED FTI -trashing -1: ^lapton and ^ric : 0.020u 0.020s 2m17.120s 0.02% -2: ^lapton and ^ric : 0.030u 0.020s 2m11.767s 0.03% -3: ^lapton and ^ric : 0.040u 0.010s 2m8.128s 0.03% -trashing -1: ^lling and ^tones : 0.020u 0.030s 0m18.179s 0.27% -2: ^lling and ^tones : 0.030u 0.010s 0m1.897s 2.10% -3: ^lling and ^tones : 0.040u 0.010s 0m1.619s 3.08% -trashing -1: ^aughan and ^evie : 0.070u 0.010s 0m11.765s 0.67% -2: ^aughan and ^evie : 0.040u 0.010s 0m1.198s 4.17% -3: ^aughan and ^evie : 0.030u 0.020s 0m0.872s 5.73% -trashing -1: ^lling : 0.040u 0.000s 0m28.623s 0.13% -2: ^lling : 0.030u 0.010s 0m2.339s 1.70% -3: ^lling : 0.030u 0.010s 0m1.975s 2.02% -trashing -1: ^stev and ^ray and ^vaugh : 0.020u 0.010s 0m17.667s 0.16% -2: ^stev and ^ray and ^vaugh : 0.030u 0.010s 0m3.745s 1.06% -3: ^stev and ^ray and ^vaugh : 0.030u 0.020s 0m3.439s 1.45% -trashing -1: ^lling (no join) : 0.020u 0.040s 0m2.218s 2.70% -2: ^lling (no join) : 0.020u 0.020s 0m0.506s 7.90% -3: ^lling (no join) : 0.030u 0.030s 0m0.510s 11.76% -trashing -1: ^vaughan (no join) : 0.040u 0.050s 0m2.048s 4.39% -2: ^vaughan (no join) : 0.030u 0.020s 0m0.332s 15.04% -3: ^vaughan (no join) : 0.040u 0.010s 0m0.318s 15.72% -trashing -1: ^rol (no join) : 0.020u 0.030s 0m2.384s 2.09% -2: ^rol (no join) : 0.020u 0.030s 0m0.676s 7.39% -3: ^rol (no join) : 0.020u 0.030s 0m0.697s 7.17% diff --git a/contrib/fulltextindex/TODO b/contrib/fulltextindex/TODO deleted file mode 100644 index a1b359256f..0000000000 --- a/contrib/fulltextindex/TODO +++ /dev/null @@ -1 +0,0 @@ -Place "stop" words in lookup table diff --git a/contrib/fulltextindex/WARNING b/contrib/fulltextindex/WARNING deleted file mode 100644 index ea60db30ee..0000000000 --- a/contrib/fulltextindex/WARNING +++ /dev/null @@ -1,25 +0,0 @@ -WARNING -------- - -This implementation of full text indexing is very slow and inefficient. It is -STRONGLY recommended that you switch to using contrib/tsearch which offers these -features: - -Advantages ----------- -* Actively developed and improved -* Tight integration with OpenFTS (openfts.sourceforge.net) -* Orders of magnitude faster (eg. 300 times faster for two keyword search) -* No extra tables or multi-way joins required -* Select syntax allows easy 'and'ing, 'or'ing and 'not'ing of keywords -* Built-in stemmer with customisable dictionaries (ie. searching for 'jellies' will find 'jelly') -* Stop words automatically ignored -* Supports non-C locales - -Disadvantages -------------- -* Only indexes full words - substring searches on words won't work. - eg. Searching for 'burg' won't find 'burger' - -Due to the deficiencies in this module, it is quite likely that it will be removed from the standard PostgreSQL distribution in the future. - diff --git a/contrib/fulltextindex/fti.c b/contrib/fulltextindex/fti.c deleted file mode 100644 index a0282ec95b..0000000000 --- a/contrib/fulltextindex/fti.c +++ /dev/null @@ -1,468 +0,0 @@ -#include "postgres.h" - -#include <ctype.h> - -#include "executor/spi.h" -#include "commands/trigger.h" - -/* - * Trigger function accepts variable number of arguments: - * - * $PostgreSQL: pgsql/contrib/fulltextindex/fti.c,v 1.27 2006/05/30 22:12:12 tgl Exp $ - * - * 1. relation in which to store the substrings - * 2. fields to extract substrings from - * - * The relation in which to insert *must* have the following layout: - * - * string varchar(#) - * id oid - * - * where # is the largest size of the varchar columns being indexed - * - * Example: - * - * -- Create the SQL function based on the compiled shared object - * create function fti() returns trigger as - * '/usr/local/pgsql/lib/contrib/fti.so' language C; - * - * -- Create the FTI table - * create table product_fti (string varchar(255), id oid) without oids; - * - * -- Create an index to assist string matches - * create index product_fti_string_idx on product_fti (string); - * - * -- Create an index to assist trigger'd deletes - * create index product_fti_id_idx on product_fti (id); - * - * -- Create an index on the product oid column to assist joins - * -- between the fti table and the product table - * create index product_oid_idx on product (oid); - * - * -- Create the trigger to perform incremental changes to the full text index. - * create trigger product_fti_trig after update or insert or delete on product - * for each row execute procedure fti(product_fti, title, artist); - * ^^^^^^^^^^^ - * table where full text index is stored - * ^^^^^^^^^^^^^ - * columns to index in the base table - * - * After populating 'product', try something like: - * - * SELECT DISTINCT(p.*) FROM product p, product_fti f1, product_fti f2 WHERE - * f1.string ~ '^slippery' AND f2.string ~ '^wet' AND p.oid=f1.id AND p.oid=f2.id; - * - * To check that your indicies are being used correctly, make sure you - * EXPLAIN SELECT ... your test query above. - * - * CHANGELOG - * --------- - * - * august 3 2001 - * Extended fti function to accept more than one column as a - * parameter and all specified columns are indexed. Changed - * all uses of sprintf to snprintf. Made error messages more - * consistent. - * - * march 4 1998 Changed breakup() to return less substrings. Only breakup - * in word parts which are in turn shortened from the start - * of the word (ie. word, ord, rd) - * Did allocation of substring buffer outside of breakup() - * - * oct. 5 1997, fixed a bug in string breakup (where there are more nonalpha - * characters between words then 1). - * - * oct 4-5 1997 implemented the thing, at least the basic functionallity - * of it all.... - * - * TODO - * ---- - * - * prevent generating duplicate words for an oid in the fti table - * save a plan for deletes - * create a function that will make the index *after* we have populated - * the main table (probably first delete all contents to be sure there's - * nothing in it, then re-populate the fti-table) - * - * can we do something with operator overloading or a seperate function - * that can build the final query automagically? - */ - -PG_MODULE_MAGIC; - -#define MAX_FTI_QUERY_LENGTH 8192 - -extern Datum fti(PG_FUNCTION_ARGS); -static char *breakup(char *, char *); -static bool is_stopword(char *); - -static bool new_tuple = false; - - -#ifdef USE_STOP_WORDS - -/* THIS LIST MUST BE IN SORTED ORDER, A BINARY SEARCH IS USED!!!! */ -char *StopWords[] = { /* list of words to skip in indexing */ - "no", - "the", - "yes" -}; -#endif /* USE_STOP_WORDS */ - -/* stuff for caching query-plans, stolen from contrib/spi/\*.c */ -typedef struct -{ - char *ident; - int nplans; - void **splan; -} EPlan; - -static EPlan *InsertPlans = NULL; -static EPlan *DeletePlans = NULL; -static int nInsertPlans = 0; -static int nDeletePlans = 0; - -static EPlan *find_plan(char *ident, EPlan ** eplan, int *nplans); - -/***********************************************************************/ -PG_FUNCTION_INFO_V1(fti); - -Datum -fti(PG_FUNCTION_ARGS) -{ - TriggerData *trigdata; - Trigger *trigger; /* to get trigger name */ - int nargs; /* # of arguments */ - char **args; /* arguments */ - char *relname; /* triggered relation name */ - Relation rel; /* triggered relation */ - char *indexname; /* name of table for substrings */ - HeapTuple rettuple = NULL; - TupleDesc tupdesc; /* tuple description */ - bool isinsert = false; - bool isdelete = false; - int ret; - char query[MAX_FTI_QUERY_LENGTH]; - Oid oid; - - /* - * FILE *debug; - */ - - /* - * debug = fopen("/dev/xconsole", "w"); fprintf(debug, "FTI: entered - * function\n"); fflush(debug); - */ - - if (!CALLED_AS_TRIGGER(fcinfo)) - /* internal error */ - elog(ERROR, "not fired by trigger manager"); - - /* It's safe to cast now that we've checked */ - trigdata = (TriggerData *) fcinfo->context; - - if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event)) - ereport(ERROR, - (errcode(ERRCODE_TRIGGERED_ACTION_EXCEPTION), - errmsg("can't process STATEMENT events"))); - - if (TRIGGER_FIRED_BEFORE(trigdata->tg_event)) - ereport(ERROR, - (errcode(ERRCODE_TRIGGERED_ACTION_EXCEPTION), - errmsg("must be fired AFTER event"))); - - if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event)) - isinsert = true; - if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event)) - { - isdelete = true; - isinsert = true; - } - if (TRIGGER_FIRED_BY_DELETE(trigdata->tg_event)) - isdelete = true; - - trigger = trigdata->tg_trigger; - rel = trigdata->tg_relation; - relname = SPI_getrelname(rel); - rettuple = trigdata->tg_trigtuple; - if (isdelete && isinsert) /* is an UPDATE */ - rettuple = trigdata->tg_newtuple; - - if ((ret = SPI_connect()) < 0) - /* internal error */ - elog(ERROR, "SPI_connect failed, returned %d", ret); - - nargs = trigger->tgnargs; - if (nargs < 2) - ereport(ERROR, - (errcode(ERRCODE_TRIGGERED_ACTION_EXCEPTION), - errmsg("fti trigger must have at least 2 arguments"))); - - args = trigger->tgargs; - indexname = args[0]; - tupdesc = rel->rd_att; /* what the tuple looks like (?) */ - - /* get oid of current tuple, needed by all, so place here */ - oid = HeapTupleGetOid(rettuple); - if (!OidIsValid(oid)) - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_COLUMN), - errmsg("OID is not present"), - errhint("Full Text Index requires indexed tables be created WITH OIDS."))); - - if (isdelete) - { - void *pplan; - Oid *argtypes; - Datum values[1]; - EPlan *plan; - int i; - - snprintf(query, MAX_FTI_QUERY_LENGTH, "D%s", indexname); - for (i = 1; i < nargs; i++) - snprintf(query, MAX_FTI_QUERY_LENGTH, "%s$%s", query, args[i]); - - plan = find_plan(query, &DeletePlans, &nDeletePlans); - if (plan->nplans <= 0) - { - argtypes = (Oid *) palloc(sizeof(Oid)); - - argtypes[0] = OIDOID; - - snprintf(query, MAX_FTI_QUERY_LENGTH, "DELETE FROM %s WHERE id = $1", indexname); - pplan = SPI_prepare(query, 1, argtypes); - if (!pplan) - /* internal error */ - elog(ERROR, "SPI_prepare returned NULL in delete"); - pplan = SPI_saveplan(pplan); - if (pplan == NULL) - /* internal error */ - elog(ERROR, "SPI_saveplan returned NULL in delete"); - - plan->splan = (void **) malloc(sizeof(void *)); - *(plan->splan) = pplan; - plan->nplans = 1; - } - - values[0] = oid; - - ret = SPI_execp(*(plan->splan), values, NULL, 0); - if (ret != SPI_OK_DELETE) - ereport(ERROR, - (errcode(ERRCODE_TRIGGERED_ACTION_EXCEPTION), - errmsg("error executing delete"))); - } - - if (isinsert) - { - char *substring; - char *column; - void *pplan; - Oid *argtypes; - Datum values[2]; - int colnum; - struct varlena *data; - EPlan *plan; - int i; - char *buff; - char *string; - - snprintf(query, MAX_FTI_QUERY_LENGTH, "I%s", indexname); - for (i = 1; i < nargs; i++) - snprintf(query, MAX_FTI_QUERY_LENGTH, "%s$%s", query, args[i]); - - plan = find_plan(query, &InsertPlans, &nInsertPlans); - - /* no plan yet, so allocate mem for argtypes */ - if (plan->nplans <= 0) - { - argtypes = (Oid *) palloc(2 * sizeof(Oid)); - - argtypes[0] = VARCHAROID; /* create table t_name (string - * varchar, */ - argtypes[1] = OIDOID; /* id oid); */ - - /* prepare plan to gain speed */ - snprintf(query, MAX_FTI_QUERY_LENGTH, "INSERT INTO %s (string, id) VALUES ($1, $2)", - indexname); - pplan = SPI_prepare(query, 2, argtypes); - if (!pplan) - /* internal error */ - elog(ERROR, "SPI_prepare returned NULL in insert"); - - pplan = SPI_saveplan(pplan); - if (pplan == NULL) - /* internal error */ - elog(ERROR, "SPI_saveplan returned NULL in insert"); - - plan->splan = (void **) malloc(sizeof(void *)); - *(plan->splan) = pplan; - plan->nplans = 1; - } - - /* prepare plan for query */ - for (i = 0; i < nargs - 1; i++) - { - colnum = SPI_fnumber(tupdesc, args[i + 1]); - if (colnum == SPI_ERROR_NOATTRIBUTE) - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_COLUMN), - errmsg("column \"%s\" of \"%s\" does not exist", - args[i + 1], indexname))); - - /* Get the char* representation of the column */ - column = SPI_getvalue(rettuple, tupdesc, colnum); - - /* make sure we don't try to index NULL's */ - if (column) - { - string = column; - while (*string != '\0') - { - *string = tolower((unsigned char) *string); - string++; - } - - data = (struct varlena *) palloc(sizeof(int32) + strlen(column) +1); - buff = palloc(strlen(column) + 1); - /* saves lots of calls in while-loop and in breakup() */ - - new_tuple = true; - - while ((substring = breakup(column, buff))) - { - int l; - - l = strlen(substring); - - data->vl_len = l + sizeof(int32); - memcpy(VARDATA(data), substring, l); - values[0] = PointerGetDatum(data); - values[1] = oid; - - ret = SPI_execp(*(plan->splan), values, NULL, 0); - if (ret != SPI_OK_INSERT) - ereport(ERROR, - (errcode(ERRCODE_TRIGGERED_ACTION_EXCEPTION), - errmsg("error executing insert"))); - } - pfree(buff); - pfree(data); - } - } - } - - SPI_finish(); - return PointerGetDatum(rettuple); -} - -static char * -breakup(char *string, char *substring) -{ - static char *last_start; - static char *cur_pos; - - if (new_tuple) - { - cur_pos = last_start = &string[strlen(string) - 1]; - new_tuple = false; /* don't initialize this next time */ - } - - while (cur_pos > string) /* don't read before start of 'string' */ - { - /* - * skip pieces at the end of a string that are not alfa-numeric (ie. - * 'string$%^&', last_start first points to '&', and after this to 'g' - */ - if (!isalnum((unsigned char) *last_start)) - { - while (!isalnum((unsigned char) *last_start) && - last_start > string) - last_start--; - cur_pos = last_start; - } - - cur_pos--; /* substrings are at minimum 2 characters long */ - - if (isalnum((unsigned char) *cur_pos)) - { - /* Houston, we have a substring! :) */ - memcpy(substring, cur_pos, last_start - cur_pos + 1); - substring[last_start - cur_pos + 1] = '\0'; - if (!is_stopword(substring)) - return substring; - } - else - { - last_start = cur_pos - 1; - cur_pos = last_start; - } - } - - return NULL; /* we've processed all of 'string' */ -} - -/* copied from src/backend/parser/keywords.c and adjusted for our situation*/ -static bool -is_stopword(char *text) -{ -#ifdef USE_STOP_WORDS - char **StopLow; /* for list of stop-words */ - char **StopHigh; - char **StopMiddle; - int difference; - - StopLow = &StopWords[0]; /* initialize stuff for binary search */ - StopHigh = endof(StopWords); - - /* Loop invariant: *StopLow <= text < *StopHigh */ - - while (StopLow < StopHigh) - { - StopMiddle = StopLow + (StopHigh - StopLow) / 2; - difference = strcmp(*StopMiddle, text); - if (difference == 0) - return (true); - else if (difference < 0) - StopLow = StopMiddle + 1; - else - StopHigh = StopMiddle; - } -#endif /* USE_STOP_WORDS */ - - return (false); -} - -/* for caching of query plans, stolen from contrib/spi/\*.c */ -static EPlan * -find_plan(char *ident, EPlan ** eplan, int *nplans) -{ - EPlan *newp; - int i; - - if (*nplans > 0) - { - for (i = 0; i < *nplans; i++) - { - if (strcmp((*eplan)[i].ident, ident) == 0) - break; - } - if (i != *nplans) - return (*eplan + i); - *eplan = (EPlan *) realloc(*eplan, (i + 1) * sizeof(EPlan)); - newp = *eplan + i; - } - else - { - newp = *eplan = (EPlan *) malloc(sizeof(EPlan)); - (*nplans) = i = 0; - } - - newp->ident = (char *) malloc(strlen(ident) + 1); - strcpy(newp->ident, ident); - newp->nplans = 0; - newp->splan = NULL; - (*nplans)++; - - return (newp); -} diff --git a/contrib/fulltextindex/fti.pl b/contrib/fulltextindex/fti.pl deleted file mode 100644 index 958e3a3273..0000000000 --- a/contrib/fulltextindex/fti.pl +++ /dev/null @@ -1,212 +0,0 @@ -#!/usr/bin/perl -# -# $PostgreSQL: pgsql/contrib/fulltextindex/fti.pl,v 1.9 2006/03/11 04:38:29 momjian Exp $ -# -# This script substracts all suffixes of all words in a specific column in a table -# and generates output that can be loaded into a new table with the -# psql '\copy' command. The new table should have the following structure: -# -# create table tab ( -# string text, -# id oid -# ); -# -# Note that you cannot use 'copy' (the SQL-command) directly, because -# there's no '\.' included at the end of the output. -# -# The output can be fed through the UNIX commands 'uniq' and 'sort' -# to generate the smallest and sorted output to populate the fti-table. -# -# Example: -# -# fti.pl -u -d mydb -t mytable -c mycolumn,mycolumn2 -f myfile -# sort -o myoutfile myfile -# uniq myoutfile sorted-file -# -# psql -u mydb -# -# \copy my_fti_table from myfile -# -# create index fti_idx on my_fti_table (string,id); -# -# create function fti() returns trigger as -# '/path/to/fti/file/fti.so' -# language C; -# -# create trigger my_fti_trigger after update or insert or delete -# on mytable -# for each row execute procedure fti(my_fti_table, mycolumn); -# -# Make sure you have an index on mytable(oid) to be able to do somewhat -# efficient substring searches. - -#use lib '/usr/local/pgsql/lib/perl5/'; -use lib '/mnt/web/guide/postgres/lib/perl5/site_perl'; -use Pg; -use Getopt::Std; - -$PGRES_EMPTY_QUERY = 0 ; -$PGRES_COMMAND_OK = 1 ; -$PGRES_TUPLES_OK = 2 ; -$PGRES_COPY_OUT = 3 ; -$PGRES_COPY_IN = 4 ; -$PGRES_BAD_RESPONSE = 5 ; -$PGRES_NONFATAL_ERROR = 6 ; -$PGRES_FATAL_ERROR = 7 ; - -# the minimum length of word to include in the full text index -$MIN_WORD_LENGTH = 2; - -# the minimum length of the substrings in the full text index -$MIN_SUBSTRING_LENGTH = 2; - -$[ = 0; # make sure string offsets start at 0 - -sub break_up { - my $string = pop @_; - - # convert strings to lower case - $string = lc($string); - @strings = split(/\W+/, $string); - @subs = (); - - foreach $s (@strings) { - $len = length($s); - next if ($len <= $MIN_WORD_LENGTH); - for ($i = 0; $i <= $len - $MIN_SUBSTRING_LENGTH; $i++) { - $tmp = substr($s, $i); - push(@subs, $tmp); - } - } - - return @subs; -} - -sub connect_db { - my $dbname = shift @_; - my $user = shift @_; - my $passwd = shift @_; - - if (!defined($dbname) || $dbname eq "") { - return 1; - } - $connect_string = "dbname=$dbname"; - - if ($user ne "") { - if ($passwd eq "") { - return 0; - } - $connect_string = "$connect_string user=$user password=$passwd ". - "authtype=password"; - } - - $PG_CONN = PQconnectdb($connect_string); - - if (PQstatus($PG_CONN)) { - print STDERR "Couldn't make connection with database!\n"; - print STDERR PQerrorMessage($PG_CONN), "\n"; - return 0; - } - - return 1; -} - -sub quit_prog { - close(OUT); - unlink $opt_f; - if (defined($PG_CONN)) { - PQfinish($PG_CONN); - } - exit 1; -} - -sub get_username { - print "Username: "; - chop($n = <STDIN>); - - return $n;; -} - -sub get_password { - print "Password: "; - - system("stty -echo < /dev/tty"); - chop($pwd = <STDIN>); - print "\n"; - system("stty echo < /dev/tty"); - - return $pwd; -} - -sub main { - getopts('d:t:c:f:u'); - - if (!$opt_d || !$opt_t || !$opt_c || !$opt_f) { - print STDERR "usage: $0 [-u] -d database -t table -c column[,column...] ". - "-f output-file\n"; - return 1; - } - - @cols = split(/,/, $opt_c); - - if (defined($opt_u)) { - $uname = get_username(); - $pwd = get_password(); - } else { - $uname = ""; - $pwd = ""; - } - - $SIG{'INT'} = 'quit_prog'; - if (!connect_db($opt_d, $uname, $pwd)) { - print STDERR "Connecting to database failed!\n"; - return 1; - } - - if (!open(OUT, ">$opt_f")) { - print STDERR "Couldnt' open file '$opt_f' for output!\n"; - return 1; - } - - PQexec($PG_CONN, "SET search_path = public"); - PQexec($PG_CONN, "begin"); - - $query = "declare C cursor for select (\""; - $query .= join("\" || ' ' || \"", @cols); - $query .= "\") as string, oid from $opt_t"; - $res = PQexec($PG_CONN, $query); - if (!$res || (PQresultStatus($res) != $PGRES_COMMAND_OK)) { - print STDERR "Error declaring cursor!\n"; - print STDERR PQerrorMessage($PG_CONN), "\n"; - PQfinish($PG_CONN); - return 1; - } - PQclear($res); - - $query = "fetch in C"; - while (($res = PQexec($PG_CONN, $query)) && - (PQresultStatus($res) == $PGRES_TUPLES_OK) && - (PQntuples($res) == 1)) { - $col = PQgetvalue($res, 0, 0); - $oid = PQgetvalue($res, 0, 1); - - @subs = break_up($col); - foreach $i (@subs) { - print OUT "$i\t$oid\n"; - } - } - - if (!$res || (PQresultStatus($res) != PGRES_TUPLES_OK)) { - print STDERR "Error retrieving data from backend!\n"; - print STDERR PQerrorMEssage($PG_CONN), "\n"; - PQfinish($PG_CONN); - return 1; - } - - PQclear($res); - PQfinish($PG_CONN); - - return 0; -} - -exit main(); diff --git a/contrib/fulltextindex/fti.sql.in b/contrib/fulltextindex/fti.sql.in deleted file mode 100644 index c060ee8757..0000000000 --- a/contrib/fulltextindex/fti.sql.in +++ /dev/null @@ -1,6 +0,0 @@ --- Adjust this setting to control where the objects get created. -SET search_path = public; - -CREATE OR REPLACE FUNCTION fti() RETURNS trigger AS - 'MODULE_PATHNAME', 'fti' - LANGUAGE C VOLATILE CALLED ON NULL INPUT; diff --git a/contrib/fulltextindex/timings.sh b/contrib/fulltextindex/timings.sh deleted file mode 100755 index ad1495128c..0000000000 --- a/contrib/fulltextindex/timings.sh +++ /dev/null @@ -1,350 +0,0 @@ -#!/bin/sh - -PATH=${PATH}:/usr/local/pgsql/bin -TIMEFORMAT="%3Uu %3Ss %lR %P%%" -export PATH TIMEFORMAT - -case "$1" in - -n) - trashing=0 - ;; - *) - trashing=1 - ;; -esac - -echo "TESTING ON UNCLUSTERED FTI" - -# trash disk -if [ $trashing = 1 ] -then - echo "trashing" - psql -q -n -o /dev/null -c "select count(*) from product;" test -else - echo -fi - -Q="select count(p.oid) from product p, artist_fti f1, artist_fti f2 - where - f1.string ~ '^lapton' and f2.string ~ '^ric' and - f1.id=p.oid and f2.id=p.oid;" - -echo -n "1: ^lapton and ^ric : " -time psql -q -n -o /dev/null -c "$Q" test -echo -n "2: ^lapton and ^ric : " -time psql -q -n -o /dev/null -c "$Q" test -echo -n "3: ^lapton and ^ric : " -time psql -q -n -o /dev/null -c "$Q" test - -# trash disk -if [ $trashing = 1 ] -then - echo "trashing" - psql -q -n -o /dev/null -c "select count(*) from product;" test -else - echo -fi - -Q="select count(p.oid) from product p, artist_fti f1, artist_fti f2 - where - f1.string ~ '^lling' and f2.string ~ '^tones' and - f1.id=p.oid and f2.id=p.oid;" - -echo -n "1: ^lling and ^tones : " -time psql -q -n -o /dev/null -c "$Q" test -echo -n "2: ^lling and ^tones : " -time psql -q -n -o /dev/null -c "$Q" test -echo -n "3: ^lling and ^tones : " -time psql -q -n -o /dev/null -c "$Q" test - -# trash disk -if [ $trashing = 1 ] -then - echo "trashing" - psql -q -n -o /dev/null -c "select count(*) from product;" test -else - echo -fi - -Q="select count(p.oid) from product p, artist_fti f1, artist_fti f2 - where - f1.string ~ '^aughan' and f2.string ~ '^evie' and - f1.id=p.oid and f2.id=p.oid;" - -echo -n "1: ^aughan and ^evie : " -time psql -q -n -o /dev/null -c "$Q" test -echo -n "2: ^aughan and ^evie : " -time psql -q -n -o /dev/null -c "$Q" test -echo -n "3: ^aughan and ^evie : " -time psql -q -n -o /dev/null -c "$Q" test - -# trash disk -if [ $trashing = 1 ] -then - echo "trashing" - psql -q -n -o /dev/null -c "select count(*) from product;" test -else - echo -fi - -Q="select count(p.oid) from product p, artist_fti f1 - where - f1.string ~ '^lling' and - p.oid=f1.id;" - -echo -n "1: ^lling : " -time psql -q -n -o /dev/null -c "$Q" test -echo -n "2: ^lling : " -time psql -q -n -o /dev/null -c "$Q" test -echo -n "3: ^lling : " -time psql -q -n -o /dev/null -c "$Q" test - -# trash disk -if [ $trashing = 1 ] -then - echo "trashing" - psql -q -n -o /dev/null -c "select count(*) from product;" test -else - echo -fi - -Q="select count(p.oid) from product p, artist_fti f1, artist_fti f2, artist_fti f3 - where - f1.string ~ '^stev' and - f2.string ~ '^ray' and - f3.string ~ '^vaugh' and - p.oid=f1.id and p.oid=f2.id and p.oid=f3.id;" - -echo -n "1: ^stev and ^ray and ^vaugh : " -time psql -q -n -o /dev/null -c "$Q" test -echo -n "2: ^stev and ^ray and ^vaugh : " -time psql -q -n -o /dev/null -c "$Q" test -echo -n "3: ^stev and ^ray and ^vaugh : " -time psql -q -n -o /dev/null -c "$Q" test - -# trash disk -if [ $trashing = 1 ] -then - echo "trashing" - psql -q -n -o /dev/null -c "select count(*) from product;" test -else - echo -fi - -Q="select count(*) from artist_fti where string ~ '^lling';" - -echo -n "1: ^lling (no join) : " -time psql -q -n -o /dev/null -c "$Q" test -echo -n "2: ^lling (no join) : " -time psql -q -n -o /dev/null -c "$Q" test -echo -n "3: ^lling (no join) : " -time psql -q -n -o /dev/null -c "$Q" test - -# trash disk -if [ $trashing = 1 ] -then - echo "trashing" - psql -q -n -o /dev/null -c "select count(*) from product;" test -else - echo -fi - -Q="select count(*) from artist_fti where string ~ '^vaughan';" - -echo -n "1: ^vaughan (no join) : " -time psql -q -n -o /dev/null -c "$Q" test -echo -n "2: ^vaughan (no join) : " -time psql -q -n -o /dev/null -c "$Q" test -echo -n "3: ^vaughan (no join) : " -time psql -q -n -o /dev/null -c "$Q" test - -# trash disk -if [ $trashing = 1 ] -then - echo "trashing" - psql -q -n -o /dev/null -c "select count(*) from product;" test -else - echo -fi - -Q="select count(*) from artist_fti where string ~ '^rol';" - -echo -n "1: ^rol (no join) : " -time psql -q -n -o /dev/null -c "$Q" test -echo -n "2: ^rol (no join) : " -time psql -q -n -o /dev/null -c "$Q" test -echo -n "3: ^rol (no join) : " -time psql -q -n -o /dev/null -c "$Q" test - -echo -echo "TESTING ON CLUSTERED FTI" - -# trash disk -if [ $trashing = 1 ] -then - echo "trashing" - psql -q -n -o /dev/null -c "select count(*) from product;" test -else - echo -fi - -Q="select count(p.oid) from product p, clustered f1, clustered f2 - where - f1.string ~ '^lapton' and f2.string ~ '^ric' and - f1.id=p.oid and f2.id=p.oid;" - -echo -n "1: ^lapton and ^ric : " -time psql -q -n -o /dev/null -c "$Q" test -echo -n "2: ^lapton and ^ric : " -time psql -q -n -o /dev/null -c "$Q" test -echo -n "3: ^lapton and ^ric : " -time psql -q -n -o /dev/null -c "$Q" test - -# trash disk -if [ $trashing = 1 ] -then - echo "trashing" - psql -q -n -o /dev/null -c "select count(*) from product;" test -else - echo -fi - -Q="select count(p.oid) from product p, clustered f1, clustered f2 - where - f1.string ~ '^lling' and f2.string ~ '^tones' and - f1.id=p.oid and f2.id=p.oid;" - -echo -n "1: ^lling and ^tones : " -time psql -q -n -o /dev/null -c "$Q" test -echo -n "2: ^lling and ^tones : " -time psql -q -n -o /dev/null -c "$Q" test -echo -n "3: ^lling and ^tones : " -time psql -q -n -o /dev/null -c "$Q" test - -# trash disk -if [ $trashing = 1 ] -then - echo "trashing" - psql -q -n -o /dev/null -c "select count(*) from product;" test -else - echo -fi - -Q="select count(p.oid) from product p, clustered f1, clustered f2 - where - f1.string ~ '^aughan' and f2.string ~ '^evie' and - f1.id=p.oid and f2.id=p.oid;" - -echo -n "1: ^aughan and ^evie : " -time psql -q -n -o /dev/null -c "$Q" test -echo -n "2: ^aughan and ^evie : " -time psql -q -n -o /dev/null -c "$Q" test -echo -n "3: ^aughan and ^evie : " -time psql -q -n -o /dev/null -c "$Q" test - -# trash disk -if [ $trashing = 1 ] -then - echo "trashing" - psql -q -n -o /dev/null -c "select count(*) from product;" test -else - echo -fi - -Q="select count(p.oid) from product p, clustered f1 - where - f1.string ~ '^lling' and - p.oid=f1.id;" - -echo -n "1: ^lling : " -time psql -q -n -o /dev/null -c "$Q" test -echo -n "2: ^lling : " -time psql -q -n -o /dev/null -c "$Q" test -echo -n "3: ^lling : " -time psql -q -n -o /dev/null -c "$Q" test - -# trash disk -if [ $trashing = 1 ] -then - echo "trashing" - psql -q -n -o /dev/null -c "select count(*) from product;" test -else - echo -fi - -Q="select count(p.oid) from product p, clustered f1, clustered f2, clustered f3 - where - f1.string ~ '^stev' and - f2.string ~ '^ray' and - f3.string ~ '^vaugh' and - p.oid=f1.id and p.oid=f2.id and p.oid=f3.id;" - -echo -n "1: ^stev and ^ray and ^vaugh : " -time psql -q -n -o /dev/null -c "$Q" test -echo -n "2: ^stev and ^ray and ^vaugh : " -time psql -q -n -o /dev/null -c "$Q" test -echo -n "3: ^stev and ^ray and ^vaugh : " -time psql -q -n -o /dev/null -c "$Q" test - -# trash disk -if [ $trashing = 1 ] -then - echo "trashing" - psql -q -n -o /dev/null -c "select count(*) from product;" test -else - echo -fi - -Q="select count(*) from clustered where string ~ '^lling';" - -echo -n "1: ^lling (no join) : " -time psql -q -n -o /dev/null -c "$Q" test -echo -n "2: ^lling (no join) : " -time psql -q -n -o /dev/null -c "$Q" test -echo -n "3: ^lling (no join) : " -time psql -q -n -o /dev/null -c "$Q" test - -# trash disk -if [ $trashing = 1 ] -then - echo "trashing" - psql -q -n -o /dev/null -c "select count(*) from product;" test -else - echo -fi - -Q="select count(*) from clustered where string ~ '^vaughan';" - -echo -n "1: ^vaughan (no join) : " -time psql -q -n -o /dev/null -c "$Q" test -echo -n "2: ^vaughan (no join) : " -time psql -q -n -o /dev/null -c "$Q" test -echo -n "3: ^vaughan (no join) : " -time psql -q -n -o /dev/null -c "$Q" test - -# trash disk -if [ $trashing = 1 ] -then - echo "trashing" - psql -q -n -o /dev/null -c "select count(*) from product;" test -else - echo -fi - -Q="select count(*) from clustered where string ~ '^rol';" - -echo -n "1: ^rol (no join) : " -time psql -q -n -o /dev/null -c "$Q" test -echo -n "2: ^rol (no join) : " -time psql -q -n -o /dev/null -c "$Q" test -echo -n "3: ^rol (no join) : " -time psql -q -n -o /dev/null -c "$Q" test - - - - - - - - - diff --git a/contrib/fulltextindex/uninstall.sql b/contrib/fulltextindex/uninstall.sql deleted file mode 100644 index a06bab860f..0000000000 --- a/contrib/fulltextindex/uninstall.sql +++ /dev/null @@ -1,4 +0,0 @@ --- Adjust this setting to control where the objects get created. -SET search_path = public; - -DROP FUNCTION fti() CASCADE; |