From 64def09592535dc043741fb46f16eb37e152d90f Mon Sep 17 00:00:00 2001 From: Teodor Sigaev Date: Tue, 11 Sep 2007 11:54:42 +0000 Subject: Add regression tests for ispell, synonym and thesaurus dictionaries. Rename synonym.syn.sample and thesaurs.ths.sample to synonym_sample.syn and thesaurs_sample.ths accordingly to be able to use they in regression test. Ispell dictionary uses synthetic simple dictionary files. --- src/test/regress/expected/tsdicts.out | 320 ++++++++++++++++++++++++++++++++++ src/test/regress/parallel_schedule | 4 +- src/test/regress/serial_schedule | 3 +- src/test/regress/sql/tsdicts.sql | 121 +++++++++++++ 4 files changed, 445 insertions(+), 3 deletions(-) create mode 100644 src/test/regress/expected/tsdicts.out create mode 100644 src/test/regress/sql/tsdicts.sql (limited to 'src/test') diff --git a/src/test/regress/expected/tsdicts.out b/src/test/regress/expected/tsdicts.out new file mode 100644 index 00000000000..859c7bb5226 --- /dev/null +++ b/src/test/regress/expected/tsdicts.out @@ -0,0 +1,320 @@ +--Test text search dictionaries and configurations +-- Test ISpell dictionary with ispell affix file +CREATE TEXT SEARCH DICTIONARY ispell ( + Template=ispell, + DictFile=ispell_sample, + AffFile=ispell_sample +); +SELECT ts_lexize('ispell', 'skies'); + ts_lexize +----------- + {sky} +(1 row) + +SELECT ts_lexize('ispell', 'bookings'); + ts_lexize +---------------- + {booking,book} +(1 row) + +SELECT ts_lexize('ispell', 'booking'); + ts_lexize +---------------- + {booking,book} +(1 row) + +SELECT ts_lexize('ispell', 'foot'); + ts_lexize +----------- + {foot} +(1 row) + +SELECT ts_lexize('ispell', 'foots'); + ts_lexize +----------- + {foot} +(1 row) + +SELECT ts_lexize('ispell', 'rebookings'); + ts_lexize +---------------- + {booking,book} +(1 row) + +SELECT ts_lexize('ispell', 'rebooking'); + ts_lexize +---------------- + {booking,book} +(1 row) + +SELECT ts_lexize('ispell', 'rebook'); + ts_lexize +----------- + +(1 row) + +SELECT ts_lexize('ispell', 'unbookings'); + ts_lexize +----------- + {book} +(1 row) + +SELECT ts_lexize('ispell', 'unbooking'); + ts_lexize +----------- + {book} +(1 row) + +SELECT ts_lexize('ispell', 'unbook'); + ts_lexize +----------- + {book} +(1 row) + +SELECT ts_lexize('ispell', 'footklubber'); + ts_lexize +---------------- + {foot,klubber} +(1 row) + +SELECT ts_lexize('ispell', 'footballklubber'); + ts_lexize +------------------------------------------------------ + {footballklubber,foot,ball,klubber,football,klubber} +(1 row) + +SELECT ts_lexize('ispell', 'ballyklubber'); + ts_lexize +---------------- + {ball,klubber} +(1 row) + +SELECT ts_lexize('ispell', 'footballyklubber'); + ts_lexize +--------------------- + {foot,ball,klubber} +(1 row) + +-- Test ISpell dictionary with hunspell affix file +CREATE TEXT SEARCH DICTIONARY hunspell ( + Template=ispell, + DictFile=ispell_sample, + AffFile=hunspell_sample +); +SELECT ts_lexize('hunspell', 'skies'); + ts_lexize +----------- + {sky} +(1 row) + +SELECT ts_lexize('hunspell', 'bookings'); + ts_lexize +---------------- + {booking,book} +(1 row) + +SELECT ts_lexize('hunspell', 'booking'); + ts_lexize +---------------- + {booking,book} +(1 row) + +SELECT ts_lexize('hunspell', 'foot'); + ts_lexize +----------- + {foot} +(1 row) + +SELECT ts_lexize('hunspell', 'foots'); + ts_lexize +----------- + {foot} +(1 row) + +SELECT ts_lexize('hunspell', 'rebookings'); + ts_lexize +---------------- + {booking,book} +(1 row) + +SELECT ts_lexize('hunspell', 'rebooking'); + ts_lexize +---------------- + {booking,book} +(1 row) + +SELECT ts_lexize('hunspell', 'rebook'); + ts_lexize +----------- + +(1 row) + +SELECT ts_lexize('hunspell', 'unbookings'); + ts_lexize +----------- + {book} +(1 row) + +SELECT ts_lexize('hunspell', 'unbooking'); + ts_lexize +----------- + {book} +(1 row) + +SELECT ts_lexize('hunspell', 'unbook'); + ts_lexize +----------- + {book} +(1 row) + +SELECT ts_lexize('hunspell', 'footklubber'); + ts_lexize +---------------- + {foot,klubber} +(1 row) + +SELECT ts_lexize('hunspell', 'footballklubber'); + ts_lexize +------------------------------------------------------ + {footballklubber,foot,ball,klubber,football,klubber} +(1 row) + +SELECT ts_lexize('hunspell', 'ballyklubber'); + ts_lexize +---------------- + {ball,klubber} +(1 row) + +SELECT ts_lexize('hunspell', 'footballyklubber'); + ts_lexize +--------------------- + {foot,ball,klubber} +(1 row) + +-- Synonim dictionary +CREATE TEXT SEARCH DICTIONARY synonym ( + Template=synonym, + Synonyms=synonym_sample +); +SELECT ts_lexize('synonym', 'PoStGrEs'); + ts_lexize +----------- + {pgsql} +(1 row) + +SELECT ts_lexize('synonym', 'Gogle'); + ts_lexize +----------- + {googl} +(1 row) + +-- Create and simple test thesaurus dictionary +-- More test in configuration checks because of ts_lexize +-- can not give more tat one word as it may wish thesaurus. +CREATE TEXT SEARCH DICTIONARY thesaurus ( + Template=thesaurus, + DictFile=thesaurus_sample, + Dictionary=english_stem +); +NOTICE: thesaurus word-sample "the" is recognized as stop-word, assign any stop-word (rule 8) +SELECT ts_lexize('thesaurus', 'one'); +NOTICE: thesaurus word-sample "the" is recognized as stop-word, assign any stop-word (rule 8) + ts_lexize +----------- + {1} +(1 row) + +-- Test ispell dictionary in configuration +CREATE TEXT SEARCH CONFIGURATION ispell_tst ( + COPY=english +); +ALTER TEXT SEARCH CONFIGURATION ispell_tst ALTER MAPPING FOR + hword, lhword, lpart_hword, lword, nlhword, nlpart_hword, nlword, part_hword, word + WITH ispell, english_stem; +SELECT to_tsvector('ispell_tst', 'Booking the skies after rebookings for footballklubber from a foot'); + to_tsvector +---------------------------------------------------------------------------------------------------- + 'sky':3 'ball':7 'book':1,5 'foot':7,10 'booking':1,5 'klubber':7 'football':7 'footballklubber':7 +(1 row) + +SELECT to_tsquery('ispell_tst', 'footballklubber'); + to_tsquery +------------------------------------------------------------------------------ + ( 'footballklubber' | 'foot' & 'ball' & 'klubber' ) | 'football' & 'klubber' +(1 row) + +SELECT to_tsquery('ispell_tst', 'footballyklubber:b & rebookings:A & sky'); + to_tsquery +------------------------------------------------------------------------ + 'foot':B & 'ball':B & 'klubber':B & ( 'booking':A | 'book':A ) & 'sky' +(1 row) + +-- Test ispell dictionary with hunspell affix in configuration +CREATE TEXT SEARCH CONFIGURATION hunspell_tst ( + COPY=ispell_tst +); +ALTER TEXT SEARCH CONFIGURATION hunspell_tst ALTER MAPPING + REPLACE ispell WITH hunspell; +SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footballklubber from a foot'); + to_tsvector +---------------------------------------------------------------------------------------------------- + 'sky':3 'ball':7 'book':1,5 'foot':7,10 'booking':1,5 'klubber':7 'football':7 'footballklubber':7 +(1 row) + +SELECT to_tsquery('hunspell_tst', 'footballklubber'); + to_tsquery +------------------------------------------------------------------------------ + ( 'footballklubber' | 'foot' & 'ball' & 'klubber' ) | 'football' & 'klubber' +(1 row) + +SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky'); + to_tsquery +------------------------------------------------------------------------ + 'foot':B & 'ball':B & 'klubber':B & ( 'booking':A | 'book':A ) & 'sky' +(1 row) + +-- Test synonym dictionary in configuration +CREATE TEXT SEARCH CONFIGURATION synonym_tst ( + COPY=english +); +ALTER TEXT SEARCH CONFIGURATION synonym_tst ALTER MAPPING FOR + lword, lpart_hword, lhword + WITH synonym, english_stem; +SELECT to_tsvector('synonym_tst', 'Postgresql is often called as postgres or pgsql and pronounced as postgre'); + to_tsvector +--------------------------------------------------- + 'call':4 'often':3 'pgsql':1,6,8,12 'pronounc':10 +(1 row) + +SELECT to_tsvector('synonym_tst', 'Most common mistake is to write Gogle instead of Google'); + to_tsvector +---------------------------------------------------------- + 'googl':7,10 'write':6 'common':2 'mistak':3 'instead':8 +(1 row) + +-- test thesaurus in configuration +-- see thesaurus_sample.ths to understand 'odd' resulting tsvector +CREATE TEXT SEARCH CONFIGURATION thesaurus_tst ( + COPY=synonym_tst +); +ALTER TEXT SEARCH CONFIGURATION thesaurus_tst ALTER MAPPING FOR + lword, lpart_hword, lhword + WITH synonym, thesaurus, english_stem; +SELECT to_tsvector('thesaurus_tst', 'one postgres one two one two three one'); + to_tsvector +---------------------------------- + '1':1,5 '12':3 '123':4 'pgsql':2 +(1 row) + +SELECT to_tsvector('thesaurus_tst', 'Supernovae star is very new star and usually called supernovae (abbrevation SN)'); + to_tsvector +------------------------------------------------------------- + 'sn':1,9,11 'new':4 'call':8 'star':5 'usual':7 'abbrev':10 +(1 row) + +SELECT to_tsvector('thesaurus_tst', 'Booking tickets is looking like a booking a tickets'); + to_tsvector +------------------------------------------------------- + 'card':3,10 'like':6 'look':5 'invit':2,9 'order':1,8 +(1 row) + diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index a8f5c799d9a..4d5af5b16d5 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -1,5 +1,5 @@ # ---------- -# $PostgreSQL: pgsql/src/test/regress/parallel_schedule,v 1.43 2007/08/21 01:11:30 tgl Exp $ +# $PostgreSQL: pgsql/src/test/regress/parallel_schedule,v 1.44 2007/09/11 11:54:42 teodor Exp $ # # By convention, we put no more than twenty tests in any one parallel group; # this limits the number of connections needed to run the tests. @@ -77,7 +77,7 @@ test: misc # ---------- # Another group of parallel tests # ---------- -test: select_views portals_p2 rules foreign_key cluster dependency guc combocid tsearch +test: select_views portals_p2 rules foreign_key cluster dependency guc combocid tsearch tsdicts # ---------- # Another group of parallel tests diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule index a11a8702334..856682469cc 100644 --- a/src/test/regress/serial_schedule +++ b/src/test/regress/serial_schedule @@ -1,4 +1,4 @@ -# $PostgreSQL: pgsql/src/test/regress/serial_schedule,v 1.40 2007/08/21 01:11:30 tgl Exp $ +# $PostgreSQL: pgsql/src/test/regress/serial_schedule,v 1.41 2007/09/11 11:54:42 teodor Exp $ # This should probably be in an order similar to parallel_schedule. test: boolean test: char @@ -102,6 +102,7 @@ test: rangefuncs test: prepare test: without_oid test: conversion +test: tsdicts test: truncate test: alter_table test: sequence diff --git a/src/test/regress/sql/tsdicts.sql b/src/test/regress/sql/tsdicts.sql new file mode 100644 index 00000000000..2e6cf791d87 --- /dev/null +++ b/src/test/regress/sql/tsdicts.sql @@ -0,0 +1,121 @@ +--Test text search dictionaries and configurations + +-- Test ISpell dictionary with ispell affix file +CREATE TEXT SEARCH DICTIONARY ispell ( + Template=ispell, + DictFile=ispell_sample, + AffFile=ispell_sample +); + +SELECT ts_lexize('ispell', 'skies'); +SELECT ts_lexize('ispell', 'bookings'); +SELECT ts_lexize('ispell', 'booking'); +SELECT ts_lexize('ispell', 'foot'); +SELECT ts_lexize('ispell', 'foots'); +SELECT ts_lexize('ispell', 'rebookings'); +SELECT ts_lexize('ispell', 'rebooking'); +SELECT ts_lexize('ispell', 'rebook'); +SELECT ts_lexize('ispell', 'unbookings'); +SELECT ts_lexize('ispell', 'unbooking'); +SELECT ts_lexize('ispell', 'unbook'); + +SELECT ts_lexize('ispell', 'footklubber'); +SELECT ts_lexize('ispell', 'footballklubber'); +SELECT ts_lexize('ispell', 'ballyklubber'); +SELECT ts_lexize('ispell', 'footballyklubber'); + +-- Test ISpell dictionary with hunspell affix file +CREATE TEXT SEARCH DICTIONARY hunspell ( + Template=ispell, + DictFile=ispell_sample, + AffFile=hunspell_sample +); + +SELECT ts_lexize('hunspell', 'skies'); +SELECT ts_lexize('hunspell', 'bookings'); +SELECT ts_lexize('hunspell', 'booking'); +SELECT ts_lexize('hunspell', 'foot'); +SELECT ts_lexize('hunspell', 'foots'); +SELECT ts_lexize('hunspell', 'rebookings'); +SELECT ts_lexize('hunspell', 'rebooking'); +SELECT ts_lexize('hunspell', 'rebook'); +SELECT ts_lexize('hunspell', 'unbookings'); +SELECT ts_lexize('hunspell', 'unbooking'); +SELECT ts_lexize('hunspell', 'unbook'); + +SELECT ts_lexize('hunspell', 'footklubber'); +SELECT ts_lexize('hunspell', 'footballklubber'); +SELECT ts_lexize('hunspell', 'ballyklubber'); +SELECT ts_lexize('hunspell', 'footballyklubber'); + +-- Synonim dictionary +CREATE TEXT SEARCH DICTIONARY synonym ( + Template=synonym, + Synonyms=synonym_sample +); + +SELECT ts_lexize('synonym', 'PoStGrEs'); +SELECT ts_lexize('synonym', 'Gogle'); + +-- Create and simple test thesaurus dictionary +-- More test in configuration checks because of ts_lexize +-- can not give more tat one word as it may wish thesaurus. +CREATE TEXT SEARCH DICTIONARY thesaurus ( + Template=thesaurus, + DictFile=thesaurus_sample, + Dictionary=english_stem +); + +SELECT ts_lexize('thesaurus', 'one'); + +-- Test ispell dictionary in configuration +CREATE TEXT SEARCH CONFIGURATION ispell_tst ( + COPY=english +); + +ALTER TEXT SEARCH CONFIGURATION ispell_tst ALTER MAPPING FOR + hword, lhword, lpart_hword, lword, nlhword, nlpart_hword, nlword, part_hword, word + WITH ispell, english_stem; + +SELECT to_tsvector('ispell_tst', 'Booking the skies after rebookings for footballklubber from a foot'); +SELECT to_tsquery('ispell_tst', 'footballklubber'); +SELECT to_tsquery('ispell_tst', 'footballyklubber:b & rebookings:A & sky'); + +-- Test ispell dictionary with hunspell affix in configuration +CREATE TEXT SEARCH CONFIGURATION hunspell_tst ( + COPY=ispell_tst +); + +ALTER TEXT SEARCH CONFIGURATION hunspell_tst ALTER MAPPING + REPLACE ispell WITH hunspell; + +SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footballklubber from a foot'); +SELECT to_tsquery('hunspell_tst', 'footballklubber'); +SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky'); + +-- Test synonym dictionary in configuration +CREATE TEXT SEARCH CONFIGURATION synonym_tst ( + COPY=english +); + +ALTER TEXT SEARCH CONFIGURATION synonym_tst ALTER MAPPING FOR + lword, lpart_hword, lhword + WITH synonym, english_stem; + +SELECT to_tsvector('synonym_tst', 'Postgresql is often called as postgres or pgsql and pronounced as postgre'); +SELECT to_tsvector('synonym_tst', 'Most common mistake is to write Gogle instead of Google'); + +-- test thesaurus in configuration +-- see thesaurus_sample.ths to understand 'odd' resulting tsvector +CREATE TEXT SEARCH CONFIGURATION thesaurus_tst ( + COPY=synonym_tst +); + +ALTER TEXT SEARCH CONFIGURATION thesaurus_tst ALTER MAPPING FOR + lword, lpart_hword, lhword + WITH synonym, thesaurus, english_stem; + +SELECT to_tsvector('thesaurus_tst', 'one postgres one two one two three one'); +SELECT to_tsvector('thesaurus_tst', 'Supernovae star is very new star and usually called supernovae (abbrevation SN)'); +SELECT to_tsvector('thesaurus_tst', 'Booking tickets is looking like a booking a tickets'); + -- cgit v1.2.3