summaryrefslogtreecommitdiff
path: root/contrib/fulltextindex
diff options
context:
space:
mode:
authorBruce Momjian2000-06-15 18:55:34 +0000
committerBruce Momjian2000-06-15 18:55:34 +0000
commitf7f177d372750e4f766ccefdf20e1b30d66cba0a (patch)
treed4e2a148640ba36d9a1e8cbf8557faa606ce5ba1 /contrib/fulltextindex
parent82c4733116813ff862dade1984b6fb74149f4124 (diff)
/contrib patch from Karel.
Diffstat (limited to 'contrib/fulltextindex')
-rw-r--r--contrib/fulltextindex/BENCH97
-rw-r--r--contrib/fulltextindex/Makefile66
-rw-r--r--contrib/fulltextindex/README100
-rw-r--r--contrib/fulltextindex/fticopy204
4 files changed, 150 insertions, 317 deletions
diff --git a/contrib/fulltextindex/BENCH b/contrib/fulltextindex/BENCH
index d8375fe606..e69de29bb2 100644
--- a/contrib/fulltextindex/BENCH
+++ b/contrib/fulltextindex/BENCH
@@ -1,97 +0,0 @@
-The following data was generated by the 'timings.sh' script included
-in this directory. It uses a very large table with music-related
-articles as a source for the fti-table. The tables used are:
-
-product : contains product information : 540.429 rows
-artist_fti : fti table for product : 4.501.321 rows
-clustered : same as above, only clustered : 4.501.321 rows
-
-A sequential scan of the artist_fti table (and thus also the clustered table)
-takes around 6:16 minutes....
-
-Unfortunately I cannot probide anybody else with this test-date, since I
-am not allowed to redistribute the data (it's a database being sold by
-a couple of wholesale companies). Anyways, it's megabytes, so you probably
-wouldn't want it in this distribution anyways.
-
-I haven't tested this with less data.
-
-The test-machine is a Pentium 133, 64 MB, Linux 2.0.32 with the database
-on a 'QUANTUM BIGFOOT_CY4320A, 4134MB w/67kB Cache, CHS=8960/15/63'. This
-is a very slow disk.
-
-The postmaster was running with:
-
- postmaster -i -b /usr/local/pgsql/bin/postgres -S 1024 -B 256 \
- -o -o /usr/local/pgsql/debug-output -F -d 1
-
-('trashing' means a 'select count(*) from artist_fti' to completely trash
-any disk-caches and buffers....)
-
-TESTING ON UNCLUSTERED FTI
-trashing
-1: ^lapton and ^ric : 0.050u 0.000s 5m37.484s 0.01%
-2: ^lapton and ^ric : 0.050u 0.030s 5m32.447s 0.02%
-3: ^lapton and ^ric : 0.030u 0.020s 5m28.822s 0.01%
-trashing
-1: ^lling and ^tones : 0.020u 0.030s 0m54.313s 0.09%
-2: ^lling and ^tones : 0.040u 0.030s 0m5.057s 1.38%
-3: ^lling and ^tones : 0.010u 0.050s 0m2.072s 2.89%
-trashing
-1: ^aughan and ^evie : 0.020u 0.030s 0m26.241s 0.19%
-2: ^aughan and ^evie : 0.050u 0.010s 0m1.316s 4.55%
-3: ^aughan and ^evie : 0.030u 0.020s 0m1.029s 4.85%
-trashing
-1: ^lling : 0.040u 0.010s 0m55.104s 0.09%
-2: ^lling : 0.030u 0.030s 0m4.716s 1.27%
-3: ^lling : 0.040u 0.010s 0m2.157s 2.31%
-trashing
-1: ^stev and ^ray and ^vaugh : 0.040u 0.000s 1m5.630s 0.06%
-2: ^stev and ^ray and ^vaugh : 0.050u 0.020s 1m3.561s 0.11%
-3: ^stev and ^ray and ^vaugh : 0.050u 0.010s 1m5.923s 0.09%
-trashing
-1: ^lling (no join) : 0.050u 0.020s 0m24.139s 0.28%
-2: ^lling (no join) : 0.040u 0.040s 0m1.087s 7.35%
-3: ^lling (no join) : 0.020u 0.030s 0m0.772s 6.48%
-trashing
-1: ^vaughan (no join) : 0.040u 0.030s 0m9.075s 0.77%
-2: ^vaughan (no join) : 0.030u 0.010s 0m0.609s 6.56%
-3: ^vaughan (no join) : 0.040u 0.010s 0m0.503s 9.94%
-trashing
-1: ^rol (no join) : 0.020u 0.030s 0m49.898s 0.10%
-2: ^rol (no join) : 0.030u 0.020s 0m3.136s 1.59%
-3: ^rol (no join) : 0.030u 0.020s 0m1.231s 4.06%
-
-TESTING ON CLUSTERED FTI
-trashing
-1: ^lapton and ^ric : 0.020u 0.020s 2m17.120s 0.02%
-2: ^lapton and ^ric : 0.030u 0.020s 2m11.767s 0.03%
-3: ^lapton and ^ric : 0.040u 0.010s 2m8.128s 0.03%
-trashing
-1: ^lling and ^tones : 0.020u 0.030s 0m18.179s 0.27%
-2: ^lling and ^tones : 0.030u 0.010s 0m1.897s 2.10%
-3: ^lling and ^tones : 0.040u 0.010s 0m1.619s 3.08%
-trashing
-1: ^aughan and ^evie : 0.070u 0.010s 0m11.765s 0.67%
-2: ^aughan and ^evie : 0.040u 0.010s 0m1.198s 4.17%
-3: ^aughan and ^evie : 0.030u 0.020s 0m0.872s 5.73%
-trashing
-1: ^lling : 0.040u 0.000s 0m28.623s 0.13%
-2: ^lling : 0.030u 0.010s 0m2.339s 1.70%
-3: ^lling : 0.030u 0.010s 0m1.975s 2.02%
-trashing
-1: ^stev and ^ray and ^vaugh : 0.020u 0.010s 0m17.667s 0.16%
-2: ^stev and ^ray and ^vaugh : 0.030u 0.010s 0m3.745s 1.06%
-3: ^stev and ^ray and ^vaugh : 0.030u 0.020s 0m3.439s 1.45%
-trashing
-1: ^lling (no join) : 0.020u 0.040s 0m2.218s 2.70%
-2: ^lling (no join) : 0.020u 0.020s 0m0.506s 7.90%
-3: ^lling (no join) : 0.030u 0.030s 0m0.510s 11.76%
-trashing
-1: ^vaughan (no join) : 0.040u 0.050s 0m2.048s 4.39%
-2: ^vaughan (no join) : 0.030u 0.020s 0m0.332s 15.04%
-3: ^vaughan (no join) : 0.040u 0.010s 0m0.318s 15.72%
-trashing
-1: ^rol (no join) : 0.020u 0.030s 0m2.384s 2.09%
-2: ^rol (no join) : 0.020u 0.030s 0m0.676s 7.39%
-3: ^rol (no join) : 0.020u 0.030s 0m0.697s 7.17%
diff --git a/contrib/fulltextindex/Makefile b/contrib/fulltextindex/Makefile
index bf1d02d227..a417b4a2bf 100644
--- a/contrib/fulltextindex/Makefile
+++ b/contrib/fulltextindex/Makefile
@@ -1,24 +1,58 @@
-SRCDIR= ../../src
+#
+# $Header: /cvsroot/pgsql/contrib/fulltextindex/Attic/Makefile,v 1.3 2000/06/15 18:54:51 momjian Exp $
+#
-include $(SRCDIR)/Makefile.global
+TOPDIR=../..
-CONTRIBDIR=$(LIBDIR)/contrib
+include ../Makefile.global
-CFLAGS+= $(CFLAGS_SL)
+NAME = fti
-TARGETS= fti$(DLSUFFIX)
-CLEANFILES+= $(TARGETS)
-CURDIR=`pwd`
+PROGRAM =
+OBJS = $(NAME).o
+DOCS = $(NAME).doc
+SQLS = $(NAME).sql
+BINS = fti.pl
+EXAMPLES=
+MODS = $(NAME)$(DLSUFFIX)
-all:: $(TARGETS)
+CFLAGS += -I. $(CFLAGS_SL)
-%.sql: %.source
- rm -f $@; \
- sed -e "s:_CURRENTDIR_:$(CURDIR):g" \
- -e "s:_DLSUFFIX_:$(DLSUFFIX):g" < $< > $@
+OTHER_CLEAN = $(SQLS)
-clean:
- rm -f $(TARGETS) *.o
+all: $(MODS) $(SQLS)
-dist:
- tar cf fti.tar README BENCH Makefile fti.c timings.sh
+%.sql: %.sql.in
+ $(SED) "s|MODULE_PATHNAME|$(CONTRIB_MODDIR)/$@|" < $< > $@
+
+install: install_doc install_sql install_mod install_bin
+
+install_doc:
+ for inst_file in $(DOCS); do \
+ $(INSTALL) $(INSTL_LIB_OPTS) $$inst_file $(CONTRIB_DOCDIR); \
+ done
+
+install_sql:
+ for inst_file in $(SQLS); do \
+ $(INSTALL) $(INSTL_LIB_OPTS) $$inst_file $(CONTRIB_SQLDIR); \
+ done
+
+install_mod:
+ for inst_file in $(MODS); do \
+ $(INSTALL) $(INSTL_SHLIB_OPTS) $$inst_file $(CONTRIB_MODDIR); \
+ done
+
+install_bin:
+ for inst_file in $(BINS); do \
+ $(INSTALL) $(INSTL_EXE_OPTS) $$inst_file $(CONTRIB_BINDIR); \
+ done
+
+depend dep:
+ $(CC) -MM -MG $(CFLAGS) *.c > depend
+
+clean:
+ $(RM) *~ $(OBJS) $(MODS) $(PROGRAM) depend $(OTHER_CLEAN) core log
+
+ifeq (depend,$(wildcard depend))
+include depend
+endif
diff --git a/contrib/fulltextindex/README b/contrib/fulltextindex/README
index 06850f7493..fdb6fcf3b1 100644
--- a/contrib/fulltextindex/README
+++ b/contrib/fulltextindex/README
@@ -94,4 +94,104 @@ Method 1 is very slow, 2 a lot faster, and for very large tables, 3 is
preferred.
+BENCH:
+~~~~~
+
Maarten Boekhold <maartenb@dutepp0.et.tudelft.nl>
+The following data was generated by the 'timings.sh' script included
+in this directory. It uses a very large table with music-related
+articles as a source for the fti-table. The tables used are:
+
+product : contains product information : 540.429 rows
+artist_fti : fti table for product : 4.501.321 rows
+clustered : same as above, only clustered : 4.501.321 rows
+
+A sequential scan of the artist_fti table (and thus also the clustered table)
+takes around 6:16 minutes....
+
+Unfortunately I cannot probide anybody else with this test-date, since I
+am not allowed to redistribute the data (it's a database being sold by
+a couple of wholesale companies). Anyways, it's megabytes, so you probably
+wouldn't want it in this distribution anyways.
+
+I haven't tested this with less data.
+
+The test-machine is a Pentium 133, 64 MB, Linux 2.0.32 with the database
+on a 'QUANTUM BIGFOOT_CY4320A, 4134MB w/67kB Cache, CHS=8960/15/63'. This
+is a very slow disk.
+
+The postmaster was running with:
+
+ postmaster -i -b /usr/local/pgsql/bin/postgres -S 1024 -B 256 \
+ -o -o /usr/local/pgsql/debug-output -F -d 1
+
+('trashing' means a 'select count(*) from artist_fti' to completely trash
+any disk-caches and buffers....)
+
+TESTING ON UNCLUSTERED FTI
+trashing
+1: ^lapton and ^ric : 0.050u 0.000s 5m37.484s 0.01%
+2: ^lapton and ^ric : 0.050u 0.030s 5m32.447s 0.02%
+3: ^lapton and ^ric : 0.030u 0.020s 5m28.822s 0.01%
+trashing
+1: ^lling and ^tones : 0.020u 0.030s 0m54.313s 0.09%
+2: ^lling and ^tones : 0.040u 0.030s 0m5.057s 1.38%
+3: ^lling and ^tones : 0.010u 0.050s 0m2.072s 2.89%
+trashing
+1: ^aughan and ^evie : 0.020u 0.030s 0m26.241s 0.19%
+2: ^aughan and ^evie : 0.050u 0.010s 0m1.316s 4.55%
+3: ^aughan and ^evie : 0.030u 0.020s 0m1.029s 4.85%
+trashing
+1: ^lling : 0.040u 0.010s 0m55.104s 0.09%
+2: ^lling : 0.030u 0.030s 0m4.716s 1.27%
+3: ^lling : 0.040u 0.010s 0m2.157s 2.31%
+trashing
+1: ^stev and ^ray and ^vaugh : 0.040u 0.000s 1m5.630s 0.06%
+2: ^stev and ^ray and ^vaugh : 0.050u 0.020s 1m3.561s 0.11%
+3: ^stev and ^ray and ^vaugh : 0.050u 0.010s 1m5.923s 0.09%
+trashing
+1: ^lling (no join) : 0.050u 0.020s 0m24.139s 0.28%
+2: ^lling (no join) : 0.040u 0.040s 0m1.087s 7.35%
+3: ^lling (no join) : 0.020u 0.030s 0m0.772s 6.48%
+trashing
+1: ^vaughan (no join) : 0.040u 0.030s 0m9.075s 0.77%
+2: ^vaughan (no join) : 0.030u 0.010s 0m0.609s 6.56%
+3: ^vaughan (no join) : 0.040u 0.010s 0m0.503s 9.94%
+trashing
+1: ^rol (no join) : 0.020u 0.030s 0m49.898s 0.10%
+2: ^rol (no join) : 0.030u 0.020s 0m3.136s 1.59%
+3: ^rol (no join) : 0.030u 0.020s 0m1.231s 4.06%
+
+TESTING ON CLUSTERED FTI
+trashing
+1: ^lapton and ^ric : 0.020u 0.020s 2m17.120s 0.02%
+2: ^lapton and ^ric : 0.030u 0.020s 2m11.767s 0.03%
+3: ^lapton and ^ric : 0.040u 0.010s 2m8.128s 0.03%
+trashing
+1: ^lling and ^tones : 0.020u 0.030s 0m18.179s 0.27%
+2: ^lling and ^tones : 0.030u 0.010s 0m1.897s 2.10%
+3: ^lling and ^tones : 0.040u 0.010s 0m1.619s 3.08%
+trashing
+1: ^aughan and ^evie : 0.070u 0.010s 0m11.765s 0.67%
+2: ^aughan and ^evie : 0.040u 0.010s 0m1.198s 4.17%
+3: ^aughan and ^evie : 0.030u 0.020s 0m0.872s 5.73%
+trashing
+1: ^lling : 0.040u 0.000s 0m28.623s 0.13%
+2: ^lling : 0.030u 0.010s 0m2.339s 1.70%
+3: ^lling : 0.030u 0.010s 0m1.975s 2.02%
+trashing
+1: ^stev and ^ray and ^vaugh : 0.020u 0.010s 0m17.667s 0.16%
+2: ^stev and ^ray and ^vaugh : 0.030u 0.010s 0m3.745s 1.06%
+3: ^stev and ^ray and ^vaugh : 0.030u 0.020s 0m3.439s 1.45%
+trashing
+1: ^lling (no join) : 0.020u 0.040s 0m2.218s 2.70%
+2: ^lling (no join) : 0.020u 0.020s 0m0.506s 7.90%
+3: ^lling (no join) : 0.030u 0.030s 0m0.510s 11.76%
+trashing
+1: ^vaughan (no join) : 0.040u 0.050s 0m2.048s 4.39%
+2: ^vaughan (no join) : 0.030u 0.020s 0m0.332s 15.04%
+3: ^vaughan (no join) : 0.040u 0.010s 0m0.318s 15.72%
+trashing
+1: ^rol (no join) : 0.020u 0.030s 0m2.384s 2.09%
+2: ^rol (no join) : 0.020u 0.030s 0m0.676s 7.39%
+3: ^rol (no join) : 0.020u 0.030s 0m0.697s 7.17%
diff --git a/contrib/fulltextindex/fticopy b/contrib/fulltextindex/fticopy
index 6b6d68e490..e69de29bb2 100644
--- a/contrib/fulltextindex/fticopy
+++ b/contrib/fulltextindex/fticopy
@@ -1,204 +0,0 @@
-#!/usr/bin/perl
-#
-# This script substracts all substrings out of a specific column in a table
-# and generates output that can be loaded into a new table with the
-# psql '\copy' command. The new table should have the following structure:
-#
-# create table tab (
-# string text,
-# id oid
-# );
-#
-# Note that you cannot use 'copy' (the SQL-command) directly, because
-# there's no '\.' included at the end of the output.
-#
-# The output can be fed through the UNIX commands 'uniq' and 'sort'
-# to generate the smallest and sorted output to populate the fti-table.
-#
-# Example:
-#
-# fti.pl -u -d mydb -t mytable -c mycolumn -f myfile
-# sort -o myoutfile myfile
-# uniq myoutfile sorted-file
-#
-# psql -u mydb
-#
-# \copy my_fti_table from myfile
-#
-# create index fti_idx on my_fti_table (string,id);
-#
-# create function fti() returns opaque as
-# '/path/to/fti/file/fti.so'
-# language 'newC';
-#
-# create trigger my_fti_trigger after update or insert or delete
-# on mytable
-# for each row execute procedure fti(my_fti_table, mycolumn);
-#
-# Make sure you have an index on mytable(oid) to be able to do somewhat
-# efficient substring searches.
-
-#use lib '/usr/local/pgsql/lib/perl5/';
-use lib '/mnt/web/guide/postgres/lib/perl5/site_perl';
-use Pg;
-use Getopt::Std;
-
-$PGRES_EMPTY_QUERY = 0 ;
-$PGRES_COMMAND_OK = 1 ;
-$PGRES_TUPLES_OK = 2 ;
-$PGRES_COPY_OUT = 3 ;
-$PGRES_COPY_IN = 4 ;
-$PGRES_BAD_RESPONSE = 5 ;
-$PGRES_NONFATAL_ERROR = 6 ;
-$PGRES_FATAL_ERROR = 7 ;
-
-$[ = 0; # make sure string offsets start at 0
-
-sub break_up {
- my $string = pop @_;
-
- @strings = split(/\W+/, $string);
- @subs = ();
-
- foreach $s (@strings) {
- $len = length($s);
- next if ($len < 4);
-
- $lpos = $len-1;
- while ($lpos >= 3) {
- $fpos = $lpos - 3;
- while ($fpos >= 0) {
- $sub = substr($s, $fpos, $lpos - $fpos + 1);
- push(@subs, $sub);
- $fpos = $fpos - 1;
- }
- $lpos = $lpos - 1;
- }
- }
-
- return @subs;
-}
-
-sub connect_db {
- my $dbname = shift @_;
- my $user = shift @_;
- my $passwd = shift @_;
-
- if (!defined($dbname) || $dbname eq "") {
- return 1;
- }
- $connect_string = "dbname=$dbname";
-
- if ($user ne "") {
- if ($passwd eq "") {
- return 0;
- }
- $connect_string = "$connect_string user=$user password=$passwd ".
- "authtype=password";
- }
-
- $PG_CONN = PQconnectdb($connect_string);
-
- if (PQstatus($PG_CONN)) {
- print STDERR "Couldn't make connection with database!\n";
- print STDERR PQerrorMessage($PG_CONN), "\n";
- return 0;
- }
-
- return 1;
-}
-
-sub quit_prog {
- close(OUT);
- unlink $opt_f;
- if (defined($PG_CONN)) {
- PQfinish($PG_CONN);
- }
- exit 1;
-}
-
-sub get_username {
- print "Username: ";
- chop($n = <STDIN>);
-
- return $n;;
-}
-
-sub get_password {
- print "Password: ";
-
- system("stty -echo < /dev/tty");
- chop($pwd = <STDIN>);
- print "\n";
- system("stty echo < /dev/tty");
-
- return $pwd;
-}
-
-sub main {
- getopts('d:t:c:f:u');
-
- if (!$opt_d || !$opt_t || !$opt_c || !$opt_f) {
- print STDERR "usage: $0 [-u] -d database -t table -c column ".
- "-f output-file\n";
- return 1;
- }
-
- if (defined($opt_u)) {
- $uname = get_username();
- $pwd = get_password();
- } else {
- $uname = "";
- $pwd = "";
- }
-
- $SIG{'INT'} = 'quit_prog';
- if (!connect_db($opt_d, $uname, $pwd)) {
- print STDERR "Connecting to database failed!\n";
- return 1;
- }
-
- if (!open(OUT, ">$opt_f")) {
- print STDERR "Couldnt' open file '$opt_f' for output!\n";
- return 1;
- }
-
- PQexec($PG_CONN, "begin");
-
- $query = "declare C cursor for select $opt_c, oid from $opt_t";
- $res = PQexec($PG_CONN, $query);
- if (!$res || (PQresultStatus($res) != $PGRES_COMMAND_OK)) {
- print STDERR "Error declaring cursor!\n";
- print STDERR PQerrorMessage($PG_CONN), "\n";
- PQfinish($PG_CONN);
- return 1;
- }
- PQclear($res);
-
- $query = "fetch in C";
- while (($res = PQexec($PG_CONN, $query)) &&
- (PQresultStatus($res) == $PGRES_TUPLES_OK) &&
- (PQntuples($res) == 1)) {
- $col = PQgetvalue($res, 0, 0);
- $oid = PQgetvalue($res, 0, 1);
-
- @subs = break_up($col);
- foreach $i (@subs) {
- print OUT "$i\t$oid\n";
- }
- }
-
- if (!$res || (PQresultStatus($res) != PGRES_TUPLES_OK)) {
- print STDERR "Error retrieving data from backend!\n";
- print STDERR PQerrorMEssage($PG_CONN), "\n";
- PQfinish($PG_CONN);
- return 1;
- }
-
- PQclear($res);
- PQfinish($PG_CONN);
-
- return 0;
-}
-
-exit main();