diff options
author | Bruce Momjian | 2000-06-15 18:55:34 +0000 |
---|---|---|
committer | Bruce Momjian | 2000-06-15 18:55:34 +0000 |
commit | f7f177d372750e4f766ccefdf20e1b30d66cba0a (patch) | |
tree | d4e2a148640ba36d9a1e8cbf8557faa606ce5ba1 /contrib/fulltextindex | |
parent | 82c4733116813ff862dade1984b6fb74149f4124 (diff) |
/contrib patch from Karel.
Diffstat (limited to 'contrib/fulltextindex')
-rw-r--r-- | contrib/fulltextindex/BENCH | 97 | ||||
-rw-r--r-- | contrib/fulltextindex/Makefile | 66 | ||||
-rw-r--r-- | contrib/fulltextindex/README | 100 | ||||
-rw-r--r-- | contrib/fulltextindex/fticopy | 204 |
4 files changed, 150 insertions, 317 deletions
diff --git a/contrib/fulltextindex/BENCH b/contrib/fulltextindex/BENCH index d8375fe606..e69de29bb2 100644 --- a/contrib/fulltextindex/BENCH +++ b/contrib/fulltextindex/BENCH @@ -1,97 +0,0 @@ -The following data was generated by the 'timings.sh' script included -in this directory. It uses a very large table with music-related -articles as a source for the fti-table. The tables used are: - -product : contains product information : 540.429 rows -artist_fti : fti table for product : 4.501.321 rows -clustered : same as above, only clustered : 4.501.321 rows - -A sequential scan of the artist_fti table (and thus also the clustered table) -takes around 6:16 minutes.... - -Unfortunately I cannot probide anybody else with this test-date, since I -am not allowed to redistribute the data (it's a database being sold by -a couple of wholesale companies). Anyways, it's megabytes, so you probably -wouldn't want it in this distribution anyways. - -I haven't tested this with less data. - -The test-machine is a Pentium 133, 64 MB, Linux 2.0.32 with the database -on a 'QUANTUM BIGFOOT_CY4320A, 4134MB w/67kB Cache, CHS=8960/15/63'. This -is a very slow disk. - -The postmaster was running with: - - postmaster -i -b /usr/local/pgsql/bin/postgres -S 1024 -B 256 \ - -o -o /usr/local/pgsql/debug-output -F -d 1 - -('trashing' means a 'select count(*) from artist_fti' to completely trash -any disk-caches and buffers....) - -TESTING ON UNCLUSTERED FTI -trashing -1: ^lapton and ^ric : 0.050u 0.000s 5m37.484s 0.01% -2: ^lapton and ^ric : 0.050u 0.030s 5m32.447s 0.02% -3: ^lapton and ^ric : 0.030u 0.020s 5m28.822s 0.01% -trashing -1: ^lling and ^tones : 0.020u 0.030s 0m54.313s 0.09% -2: ^lling and ^tones : 0.040u 0.030s 0m5.057s 1.38% -3: ^lling and ^tones : 0.010u 0.050s 0m2.072s 2.89% -trashing -1: ^aughan and ^evie : 0.020u 0.030s 0m26.241s 0.19% -2: ^aughan and ^evie : 0.050u 0.010s 0m1.316s 4.55% -3: ^aughan and ^evie : 0.030u 0.020s 0m1.029s 4.85% -trashing -1: ^lling : 0.040u 0.010s 0m55.104s 0.09% -2: ^lling : 0.030u 0.030s 0m4.716s 1.27% -3: ^lling : 0.040u 0.010s 0m2.157s 2.31% -trashing -1: ^stev and ^ray and ^vaugh : 0.040u 0.000s 1m5.630s 0.06% -2: ^stev and ^ray and ^vaugh : 0.050u 0.020s 1m3.561s 0.11% -3: ^stev and ^ray and ^vaugh : 0.050u 0.010s 1m5.923s 0.09% -trashing -1: ^lling (no join) : 0.050u 0.020s 0m24.139s 0.28% -2: ^lling (no join) : 0.040u 0.040s 0m1.087s 7.35% -3: ^lling (no join) : 0.020u 0.030s 0m0.772s 6.48% -trashing -1: ^vaughan (no join) : 0.040u 0.030s 0m9.075s 0.77% -2: ^vaughan (no join) : 0.030u 0.010s 0m0.609s 6.56% -3: ^vaughan (no join) : 0.040u 0.010s 0m0.503s 9.94% -trashing -1: ^rol (no join) : 0.020u 0.030s 0m49.898s 0.10% -2: ^rol (no join) : 0.030u 0.020s 0m3.136s 1.59% -3: ^rol (no join) : 0.030u 0.020s 0m1.231s 4.06% - -TESTING ON CLUSTERED FTI -trashing -1: ^lapton and ^ric : 0.020u 0.020s 2m17.120s 0.02% -2: ^lapton and ^ric : 0.030u 0.020s 2m11.767s 0.03% -3: ^lapton and ^ric : 0.040u 0.010s 2m8.128s 0.03% -trashing -1: ^lling and ^tones : 0.020u 0.030s 0m18.179s 0.27% -2: ^lling and ^tones : 0.030u 0.010s 0m1.897s 2.10% -3: ^lling and ^tones : 0.040u 0.010s 0m1.619s 3.08% -trashing -1: ^aughan and ^evie : 0.070u 0.010s 0m11.765s 0.67% -2: ^aughan and ^evie : 0.040u 0.010s 0m1.198s 4.17% -3: ^aughan and ^evie : 0.030u 0.020s 0m0.872s 5.73% -trashing -1: ^lling : 0.040u 0.000s 0m28.623s 0.13% -2: ^lling : 0.030u 0.010s 0m2.339s 1.70% -3: ^lling : 0.030u 0.010s 0m1.975s 2.02% -trashing -1: ^stev and ^ray and ^vaugh : 0.020u 0.010s 0m17.667s 0.16% -2: ^stev and ^ray and ^vaugh : 0.030u 0.010s 0m3.745s 1.06% -3: ^stev and ^ray and ^vaugh : 0.030u 0.020s 0m3.439s 1.45% -trashing -1: ^lling (no join) : 0.020u 0.040s 0m2.218s 2.70% -2: ^lling (no join) : 0.020u 0.020s 0m0.506s 7.90% -3: ^lling (no join) : 0.030u 0.030s 0m0.510s 11.76% -trashing -1: ^vaughan (no join) : 0.040u 0.050s 0m2.048s 4.39% -2: ^vaughan (no join) : 0.030u 0.020s 0m0.332s 15.04% -3: ^vaughan (no join) : 0.040u 0.010s 0m0.318s 15.72% -trashing -1: ^rol (no join) : 0.020u 0.030s 0m2.384s 2.09% -2: ^rol (no join) : 0.020u 0.030s 0m0.676s 7.39% -3: ^rol (no join) : 0.020u 0.030s 0m0.697s 7.17% diff --git a/contrib/fulltextindex/Makefile b/contrib/fulltextindex/Makefile index bf1d02d227..a417b4a2bf 100644 --- a/contrib/fulltextindex/Makefile +++ b/contrib/fulltextindex/Makefile @@ -1,24 +1,58 @@ -SRCDIR= ../../src +# +# $Header: /cvsroot/pgsql/contrib/fulltextindex/Attic/Makefile,v 1.3 2000/06/15 18:54:51 momjian Exp $ +# -include $(SRCDIR)/Makefile.global +TOPDIR=../.. -CONTRIBDIR=$(LIBDIR)/contrib +include ../Makefile.global -CFLAGS+= $(CFLAGS_SL) +NAME = fti -TARGETS= fti$(DLSUFFIX) -CLEANFILES+= $(TARGETS) -CURDIR=`pwd` +PROGRAM = +OBJS = $(NAME).o +DOCS = $(NAME).doc +SQLS = $(NAME).sql +BINS = fti.pl +EXAMPLES= +MODS = $(NAME)$(DLSUFFIX) -all:: $(TARGETS) +CFLAGS += -I. $(CFLAGS_SL) -%.sql: %.source - rm -f $@; \ - sed -e "s:_CURRENTDIR_:$(CURDIR):g" \ - -e "s:_DLSUFFIX_:$(DLSUFFIX):g" < $< > $@ +OTHER_CLEAN = $(SQLS) -clean: - rm -f $(TARGETS) *.o +all: $(MODS) $(SQLS) -dist: - tar cf fti.tar README BENCH Makefile fti.c timings.sh +%.sql: %.sql.in + $(SED) "s|MODULE_PATHNAME|$(CONTRIB_MODDIR)/$@|" < $< > $@ + +install: install_doc install_sql install_mod install_bin + +install_doc: + for inst_file in $(DOCS); do \ + $(INSTALL) $(INSTL_LIB_OPTS) $$inst_file $(CONTRIB_DOCDIR); \ + done + +install_sql: + for inst_file in $(SQLS); do \ + $(INSTALL) $(INSTL_LIB_OPTS) $$inst_file $(CONTRIB_SQLDIR); \ + done + +install_mod: + for inst_file in $(MODS); do \ + $(INSTALL) $(INSTL_SHLIB_OPTS) $$inst_file $(CONTRIB_MODDIR); \ + done + +install_bin: + for inst_file in $(BINS); do \ + $(INSTALL) $(INSTL_EXE_OPTS) $$inst_file $(CONTRIB_BINDIR); \ + done + +depend dep: + $(CC) -MM -MG $(CFLAGS) *.c > depend + +clean: + $(RM) *~ $(OBJS) $(MODS) $(PROGRAM) depend $(OTHER_CLEAN) core log + +ifeq (depend,$(wildcard depend)) +include depend +endif diff --git a/contrib/fulltextindex/README b/contrib/fulltextindex/README index 06850f7493..fdb6fcf3b1 100644 --- a/contrib/fulltextindex/README +++ b/contrib/fulltextindex/README @@ -94,4 +94,104 @@ Method 1 is very slow, 2 a lot faster, and for very large tables, 3 is preferred. +BENCH: +~~~~~ + Maarten Boekhold <maartenb@dutepp0.et.tudelft.nl> +The following data was generated by the 'timings.sh' script included +in this directory. It uses a very large table with music-related +articles as a source for the fti-table. The tables used are: + +product : contains product information : 540.429 rows +artist_fti : fti table for product : 4.501.321 rows +clustered : same as above, only clustered : 4.501.321 rows + +A sequential scan of the artist_fti table (and thus also the clustered table) +takes around 6:16 minutes.... + +Unfortunately I cannot probide anybody else with this test-date, since I +am not allowed to redistribute the data (it's a database being sold by +a couple of wholesale companies). Anyways, it's megabytes, so you probably +wouldn't want it in this distribution anyways. + +I haven't tested this with less data. + +The test-machine is a Pentium 133, 64 MB, Linux 2.0.32 with the database +on a 'QUANTUM BIGFOOT_CY4320A, 4134MB w/67kB Cache, CHS=8960/15/63'. This +is a very slow disk. + +The postmaster was running with: + + postmaster -i -b /usr/local/pgsql/bin/postgres -S 1024 -B 256 \ + -o -o /usr/local/pgsql/debug-output -F -d 1 + +('trashing' means a 'select count(*) from artist_fti' to completely trash +any disk-caches and buffers....) + +TESTING ON UNCLUSTERED FTI +trashing +1: ^lapton and ^ric : 0.050u 0.000s 5m37.484s 0.01% +2: ^lapton and ^ric : 0.050u 0.030s 5m32.447s 0.02% +3: ^lapton and ^ric : 0.030u 0.020s 5m28.822s 0.01% +trashing +1: ^lling and ^tones : 0.020u 0.030s 0m54.313s 0.09% +2: ^lling and ^tones : 0.040u 0.030s 0m5.057s 1.38% +3: ^lling and ^tones : 0.010u 0.050s 0m2.072s 2.89% +trashing +1: ^aughan and ^evie : 0.020u 0.030s 0m26.241s 0.19% +2: ^aughan and ^evie : 0.050u 0.010s 0m1.316s 4.55% +3: ^aughan and ^evie : 0.030u 0.020s 0m1.029s 4.85% +trashing +1: ^lling : 0.040u 0.010s 0m55.104s 0.09% +2: ^lling : 0.030u 0.030s 0m4.716s 1.27% +3: ^lling : 0.040u 0.010s 0m2.157s 2.31% +trashing +1: ^stev and ^ray and ^vaugh : 0.040u 0.000s 1m5.630s 0.06% +2: ^stev and ^ray and ^vaugh : 0.050u 0.020s 1m3.561s 0.11% +3: ^stev and ^ray and ^vaugh : 0.050u 0.010s 1m5.923s 0.09% +trashing +1: ^lling (no join) : 0.050u 0.020s 0m24.139s 0.28% +2: ^lling (no join) : 0.040u 0.040s 0m1.087s 7.35% +3: ^lling (no join) : 0.020u 0.030s 0m0.772s 6.48% +trashing +1: ^vaughan (no join) : 0.040u 0.030s 0m9.075s 0.77% +2: ^vaughan (no join) : 0.030u 0.010s 0m0.609s 6.56% +3: ^vaughan (no join) : 0.040u 0.010s 0m0.503s 9.94% +trashing +1: ^rol (no join) : 0.020u 0.030s 0m49.898s 0.10% +2: ^rol (no join) : 0.030u 0.020s 0m3.136s 1.59% +3: ^rol (no join) : 0.030u 0.020s 0m1.231s 4.06% + +TESTING ON CLUSTERED FTI +trashing +1: ^lapton and ^ric : 0.020u 0.020s 2m17.120s 0.02% +2: ^lapton and ^ric : 0.030u 0.020s 2m11.767s 0.03% +3: ^lapton and ^ric : 0.040u 0.010s 2m8.128s 0.03% +trashing +1: ^lling and ^tones : 0.020u 0.030s 0m18.179s 0.27% +2: ^lling and ^tones : 0.030u 0.010s 0m1.897s 2.10% +3: ^lling and ^tones : 0.040u 0.010s 0m1.619s 3.08% +trashing +1: ^aughan and ^evie : 0.070u 0.010s 0m11.765s 0.67% +2: ^aughan and ^evie : 0.040u 0.010s 0m1.198s 4.17% +3: ^aughan and ^evie : 0.030u 0.020s 0m0.872s 5.73% +trashing +1: ^lling : 0.040u 0.000s 0m28.623s 0.13% +2: ^lling : 0.030u 0.010s 0m2.339s 1.70% +3: ^lling : 0.030u 0.010s 0m1.975s 2.02% +trashing +1: ^stev and ^ray and ^vaugh : 0.020u 0.010s 0m17.667s 0.16% +2: ^stev and ^ray and ^vaugh : 0.030u 0.010s 0m3.745s 1.06% +3: ^stev and ^ray and ^vaugh : 0.030u 0.020s 0m3.439s 1.45% +trashing +1: ^lling (no join) : 0.020u 0.040s 0m2.218s 2.70% +2: ^lling (no join) : 0.020u 0.020s 0m0.506s 7.90% +3: ^lling (no join) : 0.030u 0.030s 0m0.510s 11.76% +trashing +1: ^vaughan (no join) : 0.040u 0.050s 0m2.048s 4.39% +2: ^vaughan (no join) : 0.030u 0.020s 0m0.332s 15.04% +3: ^vaughan (no join) : 0.040u 0.010s 0m0.318s 15.72% +trashing +1: ^rol (no join) : 0.020u 0.030s 0m2.384s 2.09% +2: ^rol (no join) : 0.020u 0.030s 0m0.676s 7.39% +3: ^rol (no join) : 0.020u 0.030s 0m0.697s 7.17% diff --git a/contrib/fulltextindex/fticopy b/contrib/fulltextindex/fticopy index 6b6d68e490..e69de29bb2 100644 --- a/contrib/fulltextindex/fticopy +++ b/contrib/fulltextindex/fticopy @@ -1,204 +0,0 @@ -#!/usr/bin/perl -# -# This script substracts all substrings out of a specific column in a table -# and generates output that can be loaded into a new table with the -# psql '\copy' command. The new table should have the following structure: -# -# create table tab ( -# string text, -# id oid -# ); -# -# Note that you cannot use 'copy' (the SQL-command) directly, because -# there's no '\.' included at the end of the output. -# -# The output can be fed through the UNIX commands 'uniq' and 'sort' -# to generate the smallest and sorted output to populate the fti-table. -# -# Example: -# -# fti.pl -u -d mydb -t mytable -c mycolumn -f myfile -# sort -o myoutfile myfile -# uniq myoutfile sorted-file -# -# psql -u mydb -# -# \copy my_fti_table from myfile -# -# create index fti_idx on my_fti_table (string,id); -# -# create function fti() returns opaque as -# '/path/to/fti/file/fti.so' -# language 'newC'; -# -# create trigger my_fti_trigger after update or insert or delete -# on mytable -# for each row execute procedure fti(my_fti_table, mycolumn); -# -# Make sure you have an index on mytable(oid) to be able to do somewhat -# efficient substring searches. - -#use lib '/usr/local/pgsql/lib/perl5/'; -use lib '/mnt/web/guide/postgres/lib/perl5/site_perl'; -use Pg; -use Getopt::Std; - -$PGRES_EMPTY_QUERY = 0 ; -$PGRES_COMMAND_OK = 1 ; -$PGRES_TUPLES_OK = 2 ; -$PGRES_COPY_OUT = 3 ; -$PGRES_COPY_IN = 4 ; -$PGRES_BAD_RESPONSE = 5 ; -$PGRES_NONFATAL_ERROR = 6 ; -$PGRES_FATAL_ERROR = 7 ; - -$[ = 0; # make sure string offsets start at 0 - -sub break_up { - my $string = pop @_; - - @strings = split(/\W+/, $string); - @subs = (); - - foreach $s (@strings) { - $len = length($s); - next if ($len < 4); - - $lpos = $len-1; - while ($lpos >= 3) { - $fpos = $lpos - 3; - while ($fpos >= 0) { - $sub = substr($s, $fpos, $lpos - $fpos + 1); - push(@subs, $sub); - $fpos = $fpos - 1; - } - $lpos = $lpos - 1; - } - } - - return @subs; -} - -sub connect_db { - my $dbname = shift @_; - my $user = shift @_; - my $passwd = shift @_; - - if (!defined($dbname) || $dbname eq "") { - return 1; - } - $connect_string = "dbname=$dbname"; - - if ($user ne "") { - if ($passwd eq "") { - return 0; - } - $connect_string = "$connect_string user=$user password=$passwd ". - "authtype=password"; - } - - $PG_CONN = PQconnectdb($connect_string); - - if (PQstatus($PG_CONN)) { - print STDERR "Couldn't make connection with database!\n"; - print STDERR PQerrorMessage($PG_CONN), "\n"; - return 0; - } - - return 1; -} - -sub quit_prog { - close(OUT); - unlink $opt_f; - if (defined($PG_CONN)) { - PQfinish($PG_CONN); - } - exit 1; -} - -sub get_username { - print "Username: "; - chop($n = <STDIN>); - - return $n;; -} - -sub get_password { - print "Password: "; - - system("stty -echo < /dev/tty"); - chop($pwd = <STDIN>); - print "\n"; - system("stty echo < /dev/tty"); - - return $pwd; -} - -sub main { - getopts('d:t:c:f:u'); - - if (!$opt_d || !$opt_t || !$opt_c || !$opt_f) { - print STDERR "usage: $0 [-u] -d database -t table -c column ". - "-f output-file\n"; - return 1; - } - - if (defined($opt_u)) { - $uname = get_username(); - $pwd = get_password(); - } else { - $uname = ""; - $pwd = ""; - } - - $SIG{'INT'} = 'quit_prog'; - if (!connect_db($opt_d, $uname, $pwd)) { - print STDERR "Connecting to database failed!\n"; - return 1; - } - - if (!open(OUT, ">$opt_f")) { - print STDERR "Couldnt' open file '$opt_f' for output!\n"; - return 1; - } - - PQexec($PG_CONN, "begin"); - - $query = "declare C cursor for select $opt_c, oid from $opt_t"; - $res = PQexec($PG_CONN, $query); - if (!$res || (PQresultStatus($res) != $PGRES_COMMAND_OK)) { - print STDERR "Error declaring cursor!\n"; - print STDERR PQerrorMessage($PG_CONN), "\n"; - PQfinish($PG_CONN); - return 1; - } - PQclear($res); - - $query = "fetch in C"; - while (($res = PQexec($PG_CONN, $query)) && - (PQresultStatus($res) == $PGRES_TUPLES_OK) && - (PQntuples($res) == 1)) { - $col = PQgetvalue($res, 0, 0); - $oid = PQgetvalue($res, 0, 1); - - @subs = break_up($col); - foreach $i (@subs) { - print OUT "$i\t$oid\n"; - } - } - - if (!$res || (PQresultStatus($res) != PGRES_TUPLES_OK)) { - print STDERR "Error retrieving data from backend!\n"; - print STDERR PQerrorMEssage($PG_CONN), "\n"; - PQfinish($PG_CONN); - return 1; - } - - PQclear($res); - PQfinish($PG_CONN); - - return 0; -} - -exit main(); |