Extend GIN to support partial-match searches, and extend tsquery to support

author Tom Lane <tgl@sss.pgh.pa.us>

Fri, 16 May 2008 16:31:02 +0000 (16:31 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Fri, 16 May 2008 16:31:02 +0000 (16:31 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Fri, 16 May 2008 16:31:02 +0000 (16:31 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Fri, 16 May 2008 16:31:02 +0000 (16:31 +0000)
diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml

index fb813d70423a33addf978795d904058dbb6401ae..48dfe0a9c4772350937e210eac9507bd3e9f69cc 100644 (file)
--- a/doc/src/sgml/datatype.sgml
+++ b/doc/src/sgml/datatype.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/datatype.sgml,v 1.226 2008/03/30 04:08:14 neilc Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/datatype.sgml,v 1.227 2008/05/16 16:31:01 tgl Exp $ -->
  
   <chapter id="datatype">
    <title id="datatype-title">Data Types</title>
@@ -3298,18 +3298,17 @@ SELECT * FROM test;
  SELECT 'a fat cat sat on a mat and ate a fat rat'::tsvector;
                        tsvector
  ----------------------------------------------------
- 'a' 'on' 'and' 'ate' 'cat' 'fat' 'mat' 'rat' 'sat'
+ 'a' 'and' 'ate' 'cat' 'fat' 'mat' 'on' 'rat' 'sat'
  </programlisting>
  
-     (As the example shows, the sorting is first by length and then
-     alphabetically, but that detail is seldom important.)  To represent
+     To represent
       lexemes containing whitespace or punctuation, surround them with quotes:
  
  <programlisting>
  SELECT $$the lexeme '    ' contains spaces$$::tsvector;
                   tsvector                  
  -------------------------------------------
- 'the' '    ' 'lexeme' 'spaces' 'contains'
+ '    ' 'contains' 'lexeme' 'spaces' 'the'
  </programlisting>
  
       (We use dollar-quoted string literals in this example and the next one,
@@ -3320,7 +3319,7 @@ SELECT $$the lexeme '    ' contains spaces$$::tsvector;
  SELECT $$the lexeme 'Joe''s' contains a quote$$::tsvector;
                      tsvector                    
  ------------------------------------------------
- 'a' 'the' 'Joe''s' 'quote' 'lexeme' 'contains'
+ 'Joe''s' 'a' 'contains' 'lexeme' 'quote' 'the'
  </programlisting>
  
       Optionally, integer <firstterm>position(s)</>
@@ -3330,7 +3329,7 @@ SELECT $$the lexeme 'Joe''s' contains a quote$$::tsvector;
  SELECT 'a:1 fat:2 cat:3 sat:4 on:5 a:6 mat:7 and:8 ate:9 a:10 fat:11 rat:12'::tsvector;
                                    tsvector
  -------------------------------------------------------------------------------
- 'a':1,6,10 'on':5 'and':8 'ate':9 'cat':3 'fat':2,11 'mat':7 'rat':12 'sat':4
+ 'a':1,6,10 'and':8 'ate':9 'cat':3 'fat':2,11 'mat':7 'on':5 'rat':12 'sat':4
  </programlisting>
  
       A position normally indicates the source word's location in the
@@ -3369,7 +3368,7 @@ SELECT 'a:1A fat:2B,4C cat:5D'::tsvector;
  select 'The Fat Rats'::tsvector;
        tsvector      
  --------------------
- 'Fat' 'The' 'Rats'
+ 'Fat' 'Rats' 'The'
  </programlisting>
  
       For most English-text-searching applications the above words would
@@ -3439,6 +3438,19 @@ SELECT 'fat:ab &amp; cat'::tsquery;
  </programlisting>
      </para>
  
+    <para>
+     Also, lexemes in a <type>tsquery</type> can be labeled with <literal>*</>
+     to specify prefix matching:
+<programlisting>
+SELECT 'super:*'::tsquery;
+  tsquery  
+-----------
+ 'super':*
+</programlisting>
+     This query will match any word in a <type>tsvector</> that begins
+     with <quote>super</>.
+    </para>
+
      <para>
       Quoting rules for lexemes are the same as described above for
       lexemes in <type>tsvector</>; and, as with <type>tsvector</>,
diff --git a/doc/src/sgml/gin.sgml b/doc/src/sgml/gin.sgml

index ad82da6b38e7b28787ffe8ab46ec9ffe556e111f..961451f714a8fe7c9a9bec56193b5ad7d096f8c6 100644 (file)
--- a/doc/src/sgml/gin.sgml
+++ b/doc/src/sgml/gin.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/gin.sgml,v 2.14 2008/04/14 17:05:32 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/gin.sgml,v 2.15 2008/05/16 16:31:01 tgl Exp $ -->
  
  <chapter id="GIN">
  <title>GIN Indexes</title>
@@ -52,15 +52,15 @@
   </para>
  
   <para>
-   All it takes to get a <acronym>GIN</acronym> access method working
-   is to implement four user-defined methods, which define the behavior of
+   All it takes to get a <acronym>GIN</acronym> access method working is to
+   implement four (or five) user-defined methods, which define the behavior of
     keys in the tree and the relationships between keys, indexed values,
     and indexable queries. In short, <acronym>GIN</acronym> combines
     extensibility with generality, code reuse, and a clean interface.
   </para>
  
   <para>
-   The four methods that an index operator class for
+   The four methods that an operator class for
     <acronym>GIN</acronym> must provide are:
   </para>
  
@@ -77,7 +77,7 @@
      </varlistentry>
  
      <varlistentry>
-     <term>Datum* extractValue(Datum inputValue, int32 *nkeys)</term>
+     <term>Datum *extractValue(Datum inputValue, int32 *nkeys)</term>
       <listitem>
        <para>
         Returns an array of keys given a value to be indexed.  The
@@ -87,8 +87,8 @@
      </varlistentry>
  
      <varlistentry>
-     <term>Datum* extractQuery(Datum query, int32 *nkeys,
-        StrategyNumber n)</term>
+     <term>Datum *extractQuery(Datum query, int32 *nkeys,
+        StrategyNumber n, bool **pmatch)</term>
       <listitem>
        <para>
         Returns an array of keys given a value to be queried; that is,
@@ -100,13 +100,22 @@
         to consult <literal>n</> to determine the data type of
         <literal>query</> and the key values that need to be extracted.
         The number of returned keys must be stored into <literal>*nkeys</>.
-       If number of keys is equal to zero then <function>extractQuery</> 
-       should store 0 or -1 into <literal>*nkeys</>. 0 means that any 
-       row matches the <literal>query</> and sequence scan should be 
-       produced. -1 means nothing can satisfy <literal>query</>. 
-       Choice of value should be based on semantics meaning of operation with 
-       given strategy number.
+       If the query contains no keys then <function>extractQuery</> 
+       should store 0 or -1 into <literal>*nkeys</>, depending on the
+       semantics of the operator.  0 means that every
+       value matches the <literal>query</> and a sequential scan should be 
+       produced.  -1 means nothing can match the <literal>query</>. 
+       <literal>pmatch</> is an output argument for use when partial match
+       is supported.  To use it, <function>extractQuery</> must allocate
+       an array of <literal>*nkeys</> booleans and store its address at
+       <literal>*pmatch</>.  Each element of the array should be set to TRUE
+       if the corresponding key requires partial match, FALSE if not.
+       If <literal>*pmatch</> is set to NULL then GIN assumes partial match
+       is not required.  The variable is initialized to NULL before call,
+       so this argument can simply be ignored by operator classes that do
+       not support partial match.
        </para>
+
       </listitem>
      </varlistentry>
  
@@ -133,6 +142,39 @@
  
    </variablelist>
  
+ <para>
+  Optionally, an operator class for
+  <acronym>GIN</acronym> can supply a fifth method:
+ </para>
+
+  <variablelist>
+
+    <varlistentry>
+     <term>int comparePartial(Datum partial_key, Datum key, StrategyNumber n)</term>
+     <listitem>
+      <para>
+       Compare a partial-match query to an index key.  Returns an integer
+       whose sign indicates the result: less than zero means the index key
+       does not match the query, but the index scan should continue; zero
+       means that the index key does match the query; greater than zero
+       indicates that the index scan should stop because no more matches
+       are possible.  The strategy number <literal>n</> of the operator
+       that generated the partial match query is provided, in case its
+       semantics are needed to determine when to end the scan.
+      </para>
+     </listitem>
+    </varlistentry>
+
+  </variablelist>
+
+ <para>
+  To support <quote>partial match</> queries, an operator class must
+  provide the <function>comparePartial</> method, and its
+  <function>extractQuery</> method must set the <literal>pmatch</>
+  parameter when a partial-match query is encountered.  See
+  <xref linkend="gin-partial-match"> for details.
+ </para>
+
  </sect1>
  
  <sect1 id="gin-implementation">
@@ -146,6 +188,33 @@
    list of heap pointers (PL, posting list) if the list is small enough.
   </para>
  
+ <sect2 id="gin-partial-match">
+  <title>Partial match algorithm</title>
+  
+  <para>
+   GIN can support <quote>partial match</> queries, in which the query
+   does not determine an exact match for one or more keys, but the possible
+   matches fall within a reasonably narrow range of key values (within the
+   key sorting order determined by the <function>compare</> support method).
+   The <function>extractQuery</> method, instead of returning a key value
+   to be matched exactly, returns a key value that is the lower bound of
+   the range to be searched, and sets the <literal>pmatch</> flag true.
+   The key range is then searched using the <function>comparePartial</>
+   method.  <function>comparePartial</> must return zero for an actual
+   match, less than zero for a non-match that is still within the range
+   to be searched, or greater than zero if the index key is past the range
+   that could match.
+  </para>
+
+  <para>
+   During a partial-match scan, all <literal>itemPointer</>s for matching keys
+   are OR'ed into a <literal>TIDBitmap</>.
+   The scan fails if the <literal>TIDBitmap</> becomes lossy.
+   In this case an error message will be reported with advice
+   to increase <literal>work_mem</>.
+  </para>
+ </sect2>
+
  </sect1>
  
  <sect1 id="gin-tips">
@@ -236,8 +305,14 @@
   </para>
  
   <para>
-  <acronym>GIN</acronym> searches keys only by equality matching.  This might
-  be improved in future.
+  It is possible for an operator class to circumvent the restriction against
+  full index scan.  To do that, <function>extractValue</> must return at least
+  one (possibly dummy) key for every indexed value, and
+  <function>extractQuery</function> must convert an unrestricted search into
+  a partial-match query that will scan the whole index.  This is inefficient
+  but might be necessary to avoid corner-case failures with operators such
+  as LIKE.  Note however that failure could still occur if the intermediate
+  <literal>TIDBitmap</> becomes lossy.
   </para>
  </sect1>
  
@@ -247,9 +322,11 @@
   <para>
    The <productname>PostgreSQL</productname> source distribution includes
    <acronym>GIN</acronym> operator classes for <type>tsvector</> and
-  for one-dimensional arrays of all internal types.  The following
-  <filename>contrib</> modules also contain <acronym>GIN</acronym>
-  operator classes:
+  for one-dimensional arrays of all internal types.  Prefix searching in
+  <type>tsvector</> is implemented using the <acronym>GIN</> partial match
+  feature.
+  The following <filename>contrib</> modules also contain
+  <acronym>GIN</acronym> operator classes:
   </para>
  
   <variablelist>
diff --git a/doc/src/sgml/textsearch.sgml b/doc/src/sgml/textsearch.sgml

index caa8847ef8e6bf78ff5a5b00fc7069a063ca2f29..41db566b6cc48614b9d4ec44f756194f68943a16 100644 (file)
--- a/doc/src/sgml/textsearch.sgml
+++ b/doc/src/sgml/textsearch.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.43 2008/04/14 17:05:32 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.44 2008/05/16 16:31:01 tgl Exp $ -->
  
  <chapter id="textsearch">
   <title id="textsearch-title">Full Text Search</title>
@@ -754,6 +754,20 @@ SELECT to_tsquery('english', 'Fat | Rats:AB');
   'fat' | 'rat':AB
  </programlisting>
  
+    Also, <literal>*</> can be attached to a lexeme to specify prefix matching:
+
+<programlisting>
+SELECT to_tsquery('supern:*A &amp; star:A*B');
+        to_tsquery        
+--------------------------
+ 'supern':*A &amp; 'star':*AB
+</programlisting>
+
+    Such a lexeme will match any word in a <type>tsvector</> that begins
+    with the given string.
+   </para>
+
+   <para>
      <function>to_tsquery</function> can also accept single-quoted
      phrases.  This is primarily useful when the configuration includes a
      thesaurus dictionary that may trigger on such phrases.
@@ -798,7 +812,8 @@ SELECT to_tsquery('''supernovae stars'' &amp; !crab');
  </programlisting>
  
      Note that <function>plainto_tsquery</> cannot
-    recognize either Boolean operators or weight labels in its input:
+    recognize Boolean operators, weight labels, or prefix-match labels
+    in its input:
  
  <programlisting>
  SELECT plainto_tsquery('english', 'The Fat &amp; Rats:C');
diff --git a/doc/src/sgml/xindex.sgml b/doc/src/sgml/xindex.sgml

index 6bf7535b636300bcc4581f0a54320d3e8424e03c..84b2c9050a1a198f2ac515f9eca596085c839805 100644 (file)
--- a/doc/src/sgml/xindex.sgml
+++ b/doc/src/sgml/xindex.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/xindex.sgml,v 1.62 2008/04/14 17:05:32 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/xindex.sgml,v 1.63 2008/05/16 16:31:01 tgl Exp $ -->
  
  <sect1 id="xindex">
   <title>Interfacing Extensions To Indexes</title>
@@ -444,6 +444,13 @@
         <entry>consistent - determine whether value matches query condition</entry>
         <entry>4</entry>
        </row>
+      <row>
+       <entry>comparePartial - (optional method) compare partial key from
+        query and key from index, and return an integer less than zero, zero,
+        or greater than zero, indicating whether GIN should ignore this index
+        entry, treat the entry as a match, or stop the index scan</entry>
+       <entry>5</entry>
+      </row>
       </tbody>
      </tgroup>
     </table>
diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c

index 3bedcc99606b7fbeba6aa5094ba1b0436a9269fb..3d60d337df4e7fa3ccd6b9d01fe96955aba9ea89 100644 (file)
--- a/src/backend/access/gin/ginget.c
+++ b/src/backend/access/gin/ginget.c
@@ -8,7 +8,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *         $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.15 2008/05/12 00:00:44 alvherre Exp $
+ *         $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.16 2008/05/16 16:31:01 tgl Exp $
   *-------------------------------------------------------------------------
   */
  
@@ -18,8 +18,13 @@
  #include "catalog/index.h"
  #include "miscadmin.h"
  #include "storage/bufmgr.h"
+#include "utils/datum.h"
  #include "utils/memutils.h"
  
+
+/*
+ * Tries to refind previously taken ItemPointer on page.
+ */
  static bool
  findItemInPage(Page page, ItemPointer item, OffsetNumber *off)
  {
@@ -46,8 +51,204 @@ findItemInPage(Page page, ItemPointer item, OffsetNumber *off)
  }
  
  /*
- * Start* functions setup state of searches: find correct buffer and locks it,
- * Stop* functions unlock buffer (but don't release!)
+ * Goes to the next page if current offset is outside of bounds
+ */
+static bool
+moveRightIfItNeeded( GinBtreeData *btree, GinBtreeStack *stack )
+{
+   Page page = BufferGetPage(stack->buffer);
+
+   if ( stack->off > PageGetMaxOffsetNumber(page) )
+   {
+       /*
+        * We scanned the whole page, so we should take right page
+        */
+       stack->blkno = GinPageGetOpaque(page)->rightlink;
+
+       if ( GinPageRightMost(page) )
+           return false;  /* no more pages */
+
+       LockBuffer(stack->buffer, GIN_UNLOCK);
+       stack->buffer = ReleaseAndReadBuffer(stack->buffer, btree->index, stack->blkno);
+       LockBuffer(stack->buffer, GIN_SHARE);
+       stack->off = FirstOffsetNumber;
+   }
+
+   return true;
+}
+
+/*
+ * Does fullscan of posting tree and saves ItemPointers
+ * in scanEntry->partialMatch TIDBitmap
+ */
+static void
+scanForItems( Relation index, GinScanEntry scanEntry, BlockNumber rootPostingTree )
+{
+   GinPostingTreeScan *gdi;
+   Buffer              buffer;
+   Page                page;
+   BlockNumber         blkno;
+
+   gdi = prepareScanPostingTree(index, rootPostingTree, TRUE);
+
+   buffer = scanBeginPostingTree(gdi);
+   IncrBufferRefCount(buffer); /* prevent unpin in freeGinBtreeStack */
+
+   freeGinBtreeStack(gdi->stack);
+   pfree(gdi);
+
+   /*
+    * Goes through all leaves
+    */
+   for(;;)
+   {
+       page = BufferGetPage(buffer);
+
+       if ((GinPageGetOpaque(page)->flags & GIN_DELETED) == 0 && GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber )
+       {
+           tbm_add_tuples( scanEntry->partialMatch,
+                           (ItemPointer)GinDataPageGetItem(page, FirstOffsetNumber),
+                           GinPageGetOpaque(page)->maxoff, false);
+           scanEntry->predictNumberResult += GinPageGetOpaque(page)->maxoff;
+       }
+
+       blkno = GinPageGetOpaque(page)->rightlink;
+       if ( GinPageRightMost(page) )
+       {
+           UnlockReleaseBuffer(buffer);
+           return;  /* no more pages */
+       }
+
+       LockBuffer(buffer, GIN_UNLOCK);
+       buffer = ReleaseAndReadBuffer(buffer, index, blkno);
+       LockBuffer(buffer, GIN_SHARE);
+   }
+}
+
+/*
+ * Collects all ItemPointer into the TIDBitmap struct
+ * for entries partially matched to search entry.
+ *
+ * Returns true if done, false if it's needed to restart scan from scratch
+ */
+static bool
+computePartialMatchList( GinBtreeData *btree, GinBtreeStack *stack, GinScanEntry scanEntry )
+{
+   Page        page;
+   IndexTuple  itup;
+   Datum       idatum;
+   bool        isnull;
+   int32       cmp;
+
+   scanEntry->partialMatch = tbm_create( work_mem * 1024L );
+
+   for(;;)
+   {
+       /*
+        * stack->off points to the interested entry, buffer is already locked
+        */
+       if ( moveRightIfItNeeded(btree, stack) == false )
+           return true;
+
+       page = BufferGetPage(stack->buffer);
+       itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
+       idatum = index_getattr(itup, 1, btree->ginstate->tupdesc, &isnull);
+       Assert(!isnull);
+
+       /*----------
+        * Check of partial match.
+        * case cmp == 0 => match
+        * case cmp > 0 => not match and finish scan
+        * case cmp < 0 => not match and continue scan
+        *----------
+        */
+       cmp = DatumGetInt32(FunctionCall3(&btree->ginstate->comparePartialFn,
+                                         scanEntry->entry,
+                                         idatum,
+                                         UInt16GetDatum(scanEntry->strategy)));
+
+       if ( cmp > 0 )
+           return true;
+       else if ( cmp < 0 )
+       {
+           stack->off++;
+           continue;
+       }
+
+       if ( GinIsPostingTree(itup) )
+       {
+           BlockNumber rootPostingTree = GinGetPostingTree(itup);
+           Datum       newDatum,
+                       savedDatum = datumCopy (
+                                       idatum,
+                                       btree->ginstate->tupdesc->attrs[0]->attbyval,
+                                       btree->ginstate->tupdesc->attrs[0]->attlen
+                                   );
+           /*
+            * We should unlock current page (but not unpin) during
+            * tree scan to prevent deadlock with vacuum processes.
+            *
+            * We save current entry value (savedDatum) to be able to refind
+            * our tuple after re-locking
+            */
+           LockBuffer(stack->buffer, GIN_UNLOCK);
+           scanForItems( btree->index, scanEntry, rootPostingTree );
+
+           /*
+            * We lock again the entry page and while it was unlocked
+            * insert might occured, so we need to refind our position
+            */
+           LockBuffer(stack->buffer, GIN_SHARE);
+           page = BufferGetPage(stack->buffer);
+           if ( !GinPageIsLeaf(page) )
+           {
+               /*
+                * Root page becomes non-leaf while we unlock it. We
+                * will start again, this situation doesn't cause
+                * often - root can became a non-leaf only one per
+                * life of index.
+                */
+
+               return false;
+           }
+
+           for(;;)
+           {
+               if ( moveRightIfItNeeded(btree, stack) == false )
+                   elog(ERROR, "lost saved point in index"); /* must not happen !!! */
+
+               page = BufferGetPage(stack->buffer);
+               itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
+               newDatum = index_getattr(itup, FirstOffsetNumber, btree->ginstate->tupdesc, &isnull);
+
+               if ( compareEntries(btree->ginstate, newDatum, savedDatum) == 0 )
+               {
+                   /* Found!  */
+                   if ( btree->ginstate->tupdesc->attrs[0]->attbyval == false )
+                       pfree( DatumGetPointer(savedDatum) );
+                   break;
+               }
+
+               stack->off++;
+           }
+       }
+       else
+       {
+           tbm_add_tuples( scanEntry->partialMatch, GinGetPosting(itup),  GinGetNPosting(itup), false);
+           scanEntry->predictNumberResult +=  GinGetNPosting(itup);
+       }
+
+       /*
+        * Ok, we save ItemPointers, go to the next entry
+        */
+       stack->off++;
+   }
+
+   return true;
+}
+
+/*
+ * Start* functions setup begining state of searches: finds correct buffer and pins it.
   */
  static void
  startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry)
@@ -78,10 +279,45 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry)
     entry->offset = InvalidOffsetNumber;
     entry->list = NULL;
     entry->nlist = 0;
+   entry->partialMatch = NULL;
+   entry->partialMatchResult = NULL;
     entry->reduceResult = FALSE;
     entry->predictNumberResult = 0;
  
-   if (btreeEntry.findItem(&btreeEntry, stackEntry))
+   if ( entry->isPartialMatch )
+   {
+       /*
+        * btreeEntry.findItem points to the first equal or greater value
+        * than needed. So we will scan further and collect all
+        * ItemPointers
+        */
+       btreeEntry.findItem(&btreeEntry, stackEntry);
+       if ( computePartialMatchList( &btreeEntry, stackEntry, entry ) == false )
+       {
+           /*
+            * GIN tree was seriously restructured, so we will
+            * cleanup all found data and rescan. See comments near
+            * 'return false' in computePartialMatchList()
+            */
+           if ( entry->partialMatch )
+           {
+               tbm_free( entry->partialMatch );
+               entry->partialMatch = NULL;
+           }
+           LockBuffer(stackEntry->buffer, GIN_UNLOCK);
+           freeGinBtreeStack(stackEntry);
+
+           startScanEntry(index, ginstate, entry);
+           return;
+       }
+
+       if ( entry->partialMatch && !tbm_is_empty(entry->partialMatch) )
+       {
+           tbm_begin_iterate(entry->partialMatch);
+           entry->isFinished = FALSE;
+       }
+   }
+   else if (btreeEntry.findItem(&btreeEntry, stackEntry))
     {
         IndexTuple  itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off));
  
@@ -91,6 +327,13 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry)
             GinPostingTreeScan *gdi;
             Page        page;
  
+           /*
+            * We should unlock entry page before make deal with
+            * posting tree to prevent deadlocks with vacuum processes.
+            * Because entry is never deleted from page and posting tree is
+            * never reduced to the posting list, we can unlock page after
+            * getting BlockNumber of root of posting tree.
+            */
             LockBuffer(stackEntry->buffer, GIN_UNLOCK);
             needUnlock = FALSE;
             gdi = prepareScanPostingTree(index, rootPostingTree, TRUE);
@@ -111,7 +354,7 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry)
              */
             entry->list = (ItemPointerData *) palloc( BLCKSZ );
             entry->nlist = GinPageGetOpaque(page)->maxoff;
-           memcpy( entry->list, GinDataPageGetItem(page, FirstOffsetNumber), 
+           memcpy( entry->list, GinDataPageGetItem(page, FirstOffsetNumber),
                         GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData) );
  
             LockBuffer(entry->buffer, GIN_UNLOCK);
@@ -142,7 +385,14 @@ startScanKey(Relation index, GinState *ginstate, GinScanKey key)
         return;
  
     for (i = 0; i < key->nentries; i++)
+   {
         startScanEntry(index, ginstate, key->scanEntry + i);
+       /*
+        * Copy strategy number to each entry of key to
+        * use in comparePartialFn call
+        */
+       key->scanEntry[i].strategy = key->strategy;
+   }
  
     memset(key->entryRes, TRUE, sizeof(bool) * key->nentries);
     key->isFinished = FALSE;
@@ -233,12 +483,12 @@ entryGetNextItem(Relation index, GinScanEntry entry)
                  * Found position equal to or greater than stored
                  */
                 entry->nlist = GinPageGetOpaque(page)->maxoff;
-               memcpy( entry->list, GinDataPageGetItem(page, FirstOffsetNumber), 
+               memcpy( entry->list, GinDataPageGetItem(page, FirstOffsetNumber),
                             GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData) );
  
                 LockBuffer(entry->buffer, GIN_UNLOCK);
  
-               if ( !ItemPointerIsValid(&entry->curItem) || 
+               if ( !ItemPointerIsValid(&entry->curItem) ||
                      compareItemPointers( &entry->curItem, entry->list + entry->offset - 1 ) == 0 )
                 {
                     /*
@@ -248,7 +498,7 @@ entryGetNextItem(Relation index, GinScanEntry entry)
  
                      break;
                 }
-           
+
                 /*
                  * Find greater than entry->curItem position, store it.
                  */
@@ -275,6 +525,38 @@ entryGetItem(Relation index, GinScanEntry entry)
         entry->isFinished = entry->master->isFinished;
         entry->curItem = entry->master->curItem;
     }
+   else if ( entry->partialMatch )
+   {
+       do
+       {
+           if ( entry->partialMatchResult == NULL || entry->offset >= entry->partialMatchResult->ntuples )
+           {
+               entry->partialMatchResult = tbm_iterate( entry->partialMatch );
+
+               if ( entry->partialMatchResult == NULL )
+               {
+                   ItemPointerSet(&entry->curItem, InvalidBlockNumber, InvalidOffsetNumber);
+                   entry->isFinished = TRUE;
+                   break;
+               }
+               else if ( entry->partialMatchResult->ntuples < 0 )
+               {
+                   /* bitmap became lossy */
+                   ereport(ERROR,
+                           (errcode(ERRCODE_OUT_OF_MEMORY),
+                           errmsg("not enough memory to store result of partial match operator" ),
+                           errhint("Increase the \"work_mem\" parameter.")));
+               }
+               entry->offset = 0;
+           }
+
+           ItemPointerSet(&entry->curItem,
+                           entry->partialMatchResult->blockno,
+                           entry->partialMatchResult->offsets[ entry->offset ]);
+           entry->offset ++;
+
+       } while (entry->isFinished == FALSE && entry->reduceResult == TRUE && dropItem(entry));
+   }
     else if (!BufferIsValid(entry->buffer))
     {
         entry->offset++;
@@ -297,6 +579,54 @@ entryGetItem(Relation index, GinScanEntry entry)
     return entry->isFinished;
  }
  
+/*
+ * restart from saved position. Actually it's needed only for
+ * partial match. function is called only by ginrestpos()
+ */
+void
+ginrestartentry(GinScanEntry entry)
+{
+   ItemPointerData stopItem = entry->curItem;
+   bool savedReduceResult;
+
+   if ( entry->master || entry->partialMatch == NULL )
+       return; /* entry is slave or not a partial match type*/
+
+   if ( entry->isFinished )
+       return; /* entry was finished before ginmarkpos() call */
+
+   if ( ItemPointerGetBlockNumber(&stopItem) == InvalidBlockNumber )
+       return; /* entry  wasn't began before ginmarkpos() call */
+
+   /*
+    * Reset iterator
+    */
+   tbm_begin_iterate( entry->partialMatch );
+   entry->partialMatchResult = NULL;
+   entry->offset = 0;
+
+   /*
+    * Temporary reset reduceResult flag to guarantee refinding
+    * of curItem
+    */
+   savedReduceResult = entry->reduceResult;
+   entry->reduceResult = FALSE;
+
+   do
+   {
+       /*
+        * We can use null instead of index because
+        * partial match doesn't use it
+        */
+       if ( entryGetItem( NULL, entry ) == false )
+           elog(ERROR, "cannot refind scan position"); /* must not be here! */
+   } while( compareItemPointers( &stopItem, &entry->curItem ) != 0 );
+
+   Assert( entry->isFinished == FALSE );
+
+   entry->reduceResult = savedReduceResult;
+}
+
  /*
   * Sets key->curItem to new found heap item pointer for one scan key
   * Returns isFinished, ie TRUE means we did NOT get a new item pointer!
@@ -494,7 +824,7 @@ gingettuple(PG_FUNCTION_ARGS)
     bool        res;
  
     if (dir != ForwardScanDirection)
-       elog(ERROR, "Gin doesn't support other scan directions than forward");
+       elog(ERROR, "GIN doesn't support other scan directions than forward");
  
     if (GinIsNewKey(scan))
         newScanKey(scan);
diff --git a/src/backend/access/gin/ginscan.c b/src/backend/access/gin/ginscan.c

index 10a528817e6909dcd1c0086b4a156f0beba90507..cec24fbfdbd190656e996464cc95d1fca4c0a62a 100644 (file)
--- a/src/backend/access/gin/ginscan.c
+++ b/src/backend/access/gin/ginscan.c
@@ -8,7 +8,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *         $PostgreSQL: pgsql/src/backend/access/gin/ginscan.c,v 1.13 2008/05/12 00:00:44 alvherre Exp $
+ *         $PostgreSQL: pgsql/src/backend/access/gin/ginscan.c,v 1.14 2008/05/16 16:31:01 tgl Exp $
   *-------------------------------------------------------------------------
   */
  
@@ -36,7 +36,8 @@ ginbeginscan(PG_FUNCTION_ARGS)
  
  static void
  fillScanKey(GinState *ginstate, GinScanKey key, Datum query,
-           Datum *entryValues, uint32 nEntryValues, StrategyNumber strategy)
+           Datum *entryValues, bool *partial_matches, uint32 nEntryValues, 
+           StrategyNumber strategy)
  {
     uint32      i,
                 j;
@@ -58,6 +59,8 @@ fillScanKey(GinState *ginstate, GinScanKey key, Datum query,
         key->scanEntry[i].buffer = InvalidBuffer;
         key->scanEntry[i].list = NULL;
         key->scanEntry[i].nlist = 0;
+       key->scanEntry[i].isPartialMatch = ( ginstate->canPartialMatch && partial_matches ) 
+                                               ? partial_matches[i] : false;
  
         /* link to the equals entry in current scan key */
         key->scanEntry[i].master = NULL;
@@ -98,6 +101,8 @@ resetScanKeys(GinScanKey keys, uint32 nkeys)
             key->scanEntry[j].buffer = InvalidBuffer;
             key->scanEntry[j].list = NULL;
             key->scanEntry[j].nlist = 0;
+           key->scanEntry[j].partialMatch = NULL;
+           key->scanEntry[j].partialMatchResult = NULL;
         }
     }
  }
@@ -122,6 +127,8 @@ freeScanKeys(GinScanKey keys, uint32 nkeys, bool removeRes)
                 ReleaseBuffer(key->scanEntry[j].buffer);
             if (removeRes && key->scanEntry[j].list)
                 pfree(key->scanEntry[j].list);
+           if (removeRes && key->scanEntry[j].partialMatch)
+               tbm_free(key->scanEntry[j].partialMatch);
         }
  
         if (removeRes)
@@ -153,19 +160,21 @@ newScanKey(IndexScanDesc scan)
     {
         Datum      *entryValues;
         int32       nEntryValues;
+       bool        *partial_matches = NULL;
  
-       if (scankey[i].sk_flags & SK_ISNULL)
-           elog(ERROR, "Gin doesn't support NULL as scan key");
         Assert(scankey[i].sk_attno == 1);
  
-       entryValues = (Datum *) DatumGetPointer(
-                                               FunctionCall3(
+       /* XXX can't we treat nulls by just setting isVoidRes? */
+       /* This would amount to assuming that all GIN operators are strict */
+       if (scankey[i].sk_flags & SK_ISNULL)
+           elog(ERROR, "GIN doesn't support NULL as scan key");
+
+       entryValues = (Datum *) DatumGetPointer(FunctionCall4(
                                                 &so->ginstate.extractQueryFn,
                                                       scankey[i].sk_argument,
                                               PointerGetDatum(&nEntryValues),
-                                      UInt16GetDatum(scankey[i].sk_strategy)
-                                                             )
-           );
+                                      UInt16GetDatum(scankey[i].sk_strategy),
+                                       PointerGetDatum(&partial_matches)));
         if (nEntryValues < 0)
         {
             /*
@@ -175,12 +184,16 @@ newScanKey(IndexScanDesc scan)
             so->isVoidRes = true;
             break;
         }
+
+       /*
+        * extractQueryFn signals that everything matches
+        */
         if (entryValues == NULL || nEntryValues == 0)
             /* full scan... */
             continue;
  
         fillScanKey(&so->ginstate, &(so->keys[nkeys]), scankey[i].sk_argument,
-                   entryValues, nEntryValues, scankey[i].sk_strategy);
+                   entryValues, partial_matches, nEntryValues, scankey[i].sk_strategy);
         nkeys++;
     }
  
@@ -253,7 +266,7 @@ ginendscan(PG_FUNCTION_ARGS)
  }
  
  static GinScanKey
-copyScanKeys(GinScanKey keys, uint32 nkeys)
+copyScanKeys(GinScanKey keys, uint32 nkeys, bool restart)
  {
     GinScanKey  newkeys;
     uint32      i,
@@ -277,6 +290,9 @@ copyScanKeys(GinScanKey keys, uint32 nkeys)
  
                 newkeys[i].scanEntry[j].master = newkeys[i].scanEntry + masterN;
             }
+
+           if ( restart )
+               ginrestartentry( &keys[i].scanEntry[j] );
         }
     }
  
@@ -290,7 +306,7 @@ ginmarkpos(PG_FUNCTION_ARGS)
     GinScanOpaque so = (GinScanOpaque) scan->opaque;
  
     freeScanKeys(so->markPos, so->nkeys, FALSE);
-   so->markPos = copyScanKeys(so->keys, so->nkeys);
+   so->markPos = copyScanKeys(so->keys, so->nkeys, FALSE);
  
     PG_RETURN_VOID();
  }
@@ -302,7 +318,7 @@ ginrestrpos(PG_FUNCTION_ARGS)
     GinScanOpaque so = (GinScanOpaque) scan->opaque;
  
     freeScanKeys(so->keys, so->nkeys, FALSE);
-   so->keys = copyScanKeys(so->markPos, so->nkeys);
+   so->keys = copyScanKeys(so->markPos, so->nkeys, TRUE);
  
     PG_RETURN_VOID();
  }
diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c

index 7da7689f826e3be30e162aae0f3d0eca831f4642..36105e20d2d6b4b3f87424d7f5e4128300816be6 100644 (file)
--- a/src/backend/access/gin/ginutil.c
+++ b/src/backend/access/gin/ginutil.c
@@ -8,7 +8,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *         $PostgreSQL: pgsql/src/backend/access/gin/ginutil.c,v 1.14 2008/05/12 00:00:44 alvherre Exp $
+ *         $PostgreSQL: pgsql/src/backend/access/gin/ginutil.c,v 1.15 2008/05/16 16:31:01 tgl Exp $
   *-------------------------------------------------------------------------
   */
  
@@ -41,6 +41,22 @@ initGinState(GinState *state, Relation index)
     fmgr_info_copy(&(state->consistentFn),
                    index_getprocinfo(index, 1, GIN_CONSISTENT_PROC),
                    CurrentMemoryContext);
+   
+   /*
+    * Check opclass capability to do partial match. 
+    */
+   if ( index_getprocid(index, 1, GIN_COMPARE_PARTIAL_PROC) != InvalidOid )
+   {
+       fmgr_info_copy(&(state->comparePartialFn),
+                      index_getprocinfo(index, 1, GIN_COMPARE_PARTIAL_PROC),
+                      CurrentMemoryContext);
+
+       state->canPartialMatch = true;
+   }
+   else
+   {
+       state->canPartialMatch = false;
+   }
  }
  
  /*
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c

index 908dbc481f2edfcc845339dd40385dc308685ffe..4fc7c53654899fd413ea4b9bc4eca8ea0b4bd67b 100644 (file)
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -9,7 +9,7 @@
   *
   *
   * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.229 2008/04/13 20:51:20 tgl Exp $
+ *   $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.230 2008/05/16 16:31:01 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -2364,7 +2364,10 @@ expand_boolean_index_clause(Node *clause,
   * expand_indexqual_opclause --- expand a single indexqual condition
   *     that is an operator clause
   *
- * The input is a single RestrictInfo, the output a list of RestrictInfos
+ * The input is a single RestrictInfo, the output a list of RestrictInfos.
+ *
+ * In the base case this is just list_make1(), but we have to be prepared to
+ * expand special cases that were accepted by match_special_index_operator().
   */
  static List *
  expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily)
@@ -2379,63 +2382,77 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily)
     Const      *prefix = NULL;
     Const      *rest = NULL;
     Pattern_Prefix_Status pstatus;
-   List       *result;
  
+   /*
+    * LIKE and regex operators are not members of any btree index opfamily,
+    * but they can be members of opfamilies for more exotic index types such
+    * as GIN.  Therefore, we should only do expansion if the operator is
+    * actually not in the opfamily.  But checking that requires a syscache
+    * lookup, so it's best to first see if the operator is one we are
+    * interested in.
+    */
     switch (expr_op)
     {
-           /*
-            * LIKE and regex operators are not members of any index opfamily,
-            * so if we find one in an indexqual list we can assume that it
-            * was accepted by match_special_index_operator().
-            */
         case OID_TEXT_LIKE_OP:
         case OID_BPCHAR_LIKE_OP:
         case OID_NAME_LIKE_OP:
         case OID_BYTEA_LIKE_OP:
-           pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like,
-                                          &prefix, &rest);
-           result = prefix_quals(leftop, opfamily, prefix, pstatus);
+           if (!op_in_opfamily(expr_op, opfamily))
+           {
+               pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like,
+                                              &prefix, &rest);
+               return prefix_quals(leftop, opfamily, prefix, pstatus);
+           }
             break;
  
         case OID_TEXT_ICLIKE_OP:
         case OID_BPCHAR_ICLIKE_OP:
         case OID_NAME_ICLIKE_OP:
-           /* the right-hand const is type text for all of these */
-           pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like_IC,
-                                          &prefix, &rest);
-           result = prefix_quals(leftop, opfamily, prefix, pstatus);
+           if (!op_in_opfamily(expr_op, opfamily))
+           {
+               /* the right-hand const is type text for all of these */
+               pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like_IC,
+                                              &prefix, &rest);
+               return prefix_quals(leftop, opfamily, prefix, pstatus);
+           }
             break;
  
         case OID_TEXT_REGEXEQ_OP:
         case OID_BPCHAR_REGEXEQ_OP:
         case OID_NAME_REGEXEQ_OP:
-           /* the right-hand const is type text for all of these */
-           pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex,
-                                          &prefix, &rest);
-           result = prefix_quals(leftop, opfamily, prefix, pstatus);
+           if (!op_in_opfamily(expr_op, opfamily))
+           {
+               /* the right-hand const is type text for all of these */
+               pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex,
+                                              &prefix, &rest);
+               return prefix_quals(leftop, opfamily, prefix, pstatus);
+           }
             break;
  
         case OID_TEXT_ICREGEXEQ_OP:
         case OID_BPCHAR_ICREGEXEQ_OP:
         case OID_NAME_ICREGEXEQ_OP:
-           /* the right-hand const is type text for all of these */
-           pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex_IC,
-                                          &prefix, &rest);
-           result = prefix_quals(leftop, opfamily, prefix, pstatus);
+           if (!op_in_opfamily(expr_op, opfamily))
+           {
+               /* the right-hand const is type text for all of these */
+               pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex_IC,
+                                              &prefix, &rest);
+               return prefix_quals(leftop, opfamily, prefix, pstatus);
+           }
             break;
  
         case OID_INET_SUB_OP:
         case OID_INET_SUBEQ_OP:
-           result = network_prefix_quals(leftop, expr_op, opfamily,
-                                         patt->constvalue);
-           break;
-
-       default:
-           result = list_make1(rinfo);
+           if (!op_in_opfamily(expr_op, opfamily))
+           {
+               return network_prefix_quals(leftop, expr_op, opfamily,
+                                           patt->constvalue);
+           }
             break;
     }
  
-   return result;
+   /* Default case: just make a list of the unmodified indexqual */
+   return list_make1(rinfo);
  }
  
  /*
diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c

index a217ff0c5bc935ba7c4f43671e526f068c0a265e..0e00252daae8859a4af9f3f4952e0a71e6c3aa75 100644 (file)
--- a/src/backend/tsearch/to_tsany.c
+++ b/src/backend/tsearch/to_tsany.c
@@ -7,7 +7,7 @@
   *
   *
   * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/tsearch/to_tsany.c,v 1.11 2008/03/25 22:42:43 tgl Exp $
+ *   $PostgreSQL: pgsql/src/backend/tsearch/to_tsany.c,v 1.12 2008/05/16 16:31:01 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -32,23 +32,22 @@ get_current_ts_config(PG_FUNCTION_ARGS)
  static int
  compareWORD(const void *a, const void *b)
  {
-   if (((ParsedWord *) a)->len == ((ParsedWord *) b)->len)
+   int res;
+
+   res = tsCompareString(
+                   ((ParsedWord *) a)->word, ((ParsedWord *) a)->len,
+                   ((ParsedWord *) b)->word, ((ParsedWord *) b)->len,
+                   false );
+   
+   if (res == 0)
     {
-       int         res = strncmp(
-                                 ((ParsedWord *) a)->word,
-                                 ((ParsedWord *) b)->word,
-                                 ((ParsedWord *) b)->len);
+       if (((ParsedWord *) a)->pos.pos == ((ParsedWord *) b)->pos.pos)
+           return 0;
  
-       if (res == 0)
-       {
-           if (((ParsedWord *) a)->pos.pos == ((ParsedWord *) b)->pos.pos)
-               return 0;
-
-           return (((ParsedWord *) a)->pos.pos > ((ParsedWord *) b)->pos.pos) ? 1 : -1;
-       }
-       return res;
+       res = (((ParsedWord *) a)->pos.pos > ((ParsedWord *) b)->pos.pos) ? 1 : -1;
     }
-   return (((ParsedWord *) a)->len > ((ParsedWord *) b)->len) ? 1 : -1;
+
+   return res;
  }
  
  static int
@@ -268,7 +267,7 @@ to_tsvector(PG_FUNCTION_ARGS)
   * and different variants are ORred together.
   */
  static void
-pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval, int2 weight)
+pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval, int2 weight, bool prefix)
  {
     int4        count = 0;
     ParsedText  prs;
@@ -302,7 +301,8 @@ pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval,
                 while (count < prs.curwords && pos == prs.words[count].pos.pos && variant == prs.words[count].nvariant)
                 {
  
-                   pushValue(state, prs.words[count].word, prs.words[count].len, weight);
+                   pushValue(state, prs.words[count].word, prs.words[count].len, weight, 
+                           (  (prs.words[count].flags & TSL_PREFIX) || prefix  ) ? true : false );
                     pfree(prs.words[count].word);
                     if (cnt)
                         pushOperator(state, OP_AND);
diff --git a/src/backend/tsearch/ts_parse.c b/src/backend/tsearch/ts_parse.c

index 22862bdb806b363069a8c84473cb56b2e4b43296..0634f54a71ba7fdc8f314bf3fc58ae93ad166e47 100644 (file)
--- a/src/backend/tsearch/ts_parse.c
+++ b/src/backend/tsearch/ts_parse.c
@@ -7,7 +7,7 @@
   *
   *
   * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/tsearch/ts_parse.c,v 1.7 2008/01/01 19:45:52 momjian Exp $
+ *   $PostgreSQL: pgsql/src/backend/tsearch/ts_parse.c,v 1.8 2008/05/16 16:31:01 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -415,6 +415,7 @@ parsetext(Oid cfgId, ParsedText *prs, char *buf, int buflen)
                 prs->words[prs->curwords].len = strlen(ptr->lexeme);
                 prs->words[prs->curwords].word = ptr->lexeme;
                 prs->words[prs->curwords].nvariant = ptr->nvariant;
+               prs->words[prs->curwords].flags = ptr->flags & TSL_PREFIX;
                 prs->words[prs->curwords].alen = 0;
                 prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
                 ptr++;
@@ -463,8 +464,8 @@ hlfinditem(HeadlineParsedText *prs, TSQuery query, char *buf, int buflen)
     for (i = 0; i < query->size; i++)
     {
         if (item->type == QI_VAL &&
-           item->operand.length == buflen &&
-       strncmp(GETOPERAND(query) + item->operand.distance, buf, buflen) == 0)
+           tsCompareString( GETOPERAND(query) + item->operand.distance, item->operand.length,
+                            buf, buflen, item->operand.prefix ) == 0 )
         {
             if (word->item)
             {
diff --git a/src/backend/utils/adt/tsginidx.c b/src/backend/utils/adt/tsginidx.c

index 55518834ae97a8df2e144d40f0dba625ee787d9e..a09c92eebea3877b41942c4300d1179573dafef4 100644 (file)
--- a/src/backend/utils/adt/tsginidx.c
+++ b/src/backend/utils/adt/tsginidx.c
@@ -7,7 +7,7 @@
   *
   *
   * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/utils/adt/tsginidx.c,v 1.11 2008/04/14 17:05:33 tgl Exp $
+ *   $PostgreSQL: pgsql/src/backend/utils/adt/tsginidx.c,v 1.12 2008/05/16 16:31:01 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -19,6 +19,46 @@
  #include "utils/builtins.h"
  
  
+Datum
+gin_cmp_tslexeme(PG_FUNCTION_ARGS)
+{
+   text    *a = PG_GETARG_TEXT_P(0);
+   text    *b = PG_GETARG_TEXT_P(1);
+   int     cmp;
+
+   cmp = tsCompareString(
+                   VARDATA(a), VARSIZE(a) - VARHDRSZ,
+                   VARDATA(b), VARSIZE(b) - VARHDRSZ,
+                   false );
+
+   PG_FREE_IF_COPY(a,0);
+   PG_FREE_IF_COPY(b,1);
+   PG_RETURN_INT32( cmp );
+}
+
+Datum
+gin_cmp_prefix(PG_FUNCTION_ARGS)
+{
+   text    *a = PG_GETARG_TEXT_P(0);
+   text    *b = PG_GETARG_TEXT_P(1);
+#ifdef NOT_USED
+   StrategyNumber strategy = PG_GETARG_UINT16(2);
+#endif
+   int     cmp;
+
+   cmp = tsCompareString(
+                   VARDATA(a), VARSIZE(a) - VARHDRSZ,
+                   VARDATA(b), VARSIZE(b) - VARHDRSZ,
+                   true );
+
+   if ( cmp < 0 )
+       cmp = 1;  /* prevent continue scan */
+
+   PG_FREE_IF_COPY(a,0);
+   PG_FREE_IF_COPY(b,1);
+   PG_RETURN_INT32( cmp );
+}
+
  Datum
  gin_extract_tsvector(PG_FUNCTION_ARGS)
  {
@@ -55,7 +95,9 @@ gin_extract_tsquery(PG_FUNCTION_ARGS)
     TSQuery     query = PG_GETARG_TSQUERY(0);
     int32      *nentries = (int32 *) PG_GETARG_POINTER(1);
     /* StrategyNumber strategy = PG_GETARG_UINT16(2); */
+   bool      **ptr_partialmatch = (bool**) PG_GETARG_POINTER(3);
     Datum      *entries = NULL;
+   bool       *partialmatch;
  
     *nentries = 0;
  
@@ -65,12 +107,14 @@ gin_extract_tsquery(PG_FUNCTION_ARGS)
                     j = 0,
                     len;
         QueryItem  *item;
+       bool        use_fullscan=false;
  
         item = clean_NOT(GETQUERY(query), &len);
         if (!item)
-           ereport(ERROR,
-                   (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-                    errmsg("query requires full scan, which is not supported by GIN indexes")));
+       {
+           use_fullscan = true;
+           *nentries = 1;
+       }
  
         item = GETQUERY(query);
  
@@ -79,6 +123,7 @@ gin_extract_tsquery(PG_FUNCTION_ARGS)
                 (*nentries)++;
  
         entries = (Datum *) palloc(sizeof(Datum) * (*nentries));
+       partialmatch = *ptr_partialmatch = (bool*) palloc(sizeof(bool) * (*nentries));
  
         for (i = 0; i < query->size; i++)
             if (item[i].type == QI_VAL)
@@ -88,8 +133,12 @@ gin_extract_tsquery(PG_FUNCTION_ARGS)
  
                 txt = cstring_to_text_with_len(GETOPERAND(query) + val->distance,
                                                val->length);
+               partialmatch[j] = val->prefix;
                 entries[j++] = PointerGetDatum(txt);
             }
+
+       if ( use_fullscan )
+           entries[j++] = PointerGetDatum(cstring_to_text_with_len("", 0));
     }
     else
         *nentries = -1;         /* nothing can be found */
diff --git a/src/backend/utils/adt/tsgistidx.c b/src/backend/utils/adt/tsgistidx.c

index ecbac7b40f23c2ae4918ed97f69dce788a8976cb..b18f71315458b6e79bb70ca3d4e3b2e3870b1b21 100644 (file)
--- a/src/backend/utils/adt/tsgistidx.c
+++ b/src/backend/utils/adt/tsgistidx.c
@@ -7,7 +7,7 @@
   *
   *
   * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/utils/adt/tsgistidx.c,v 1.8 2008/04/14 17:05:33 tgl Exp $
+ *   $PostgreSQL: pgsql/src/backend/utils/adt/tsgistidx.c,v 1.9 2008/05/16 16:31:01 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -307,6 +307,12 @@ checkcondition_arr(void *checkval, QueryOperand *val)
  
     /* Loop invariant: StopLow <= val < StopHigh */
  
+   /* 
+    * we are not able to find a a prefix by hash value 
+    */
+   if ( val->prefix )
+       return true;
+
     while (StopLow < StopHigh)
     {
         StopMiddle = StopLow + (StopHigh - StopLow) / 2;
@@ -324,6 +330,11 @@ checkcondition_arr(void *checkval, QueryOperand *val)
  static bool
  checkcondition_bit(void *checkval, QueryOperand *val)
  {
+   /* 
+    * we are not able to find a a prefix in signature tree 
+    */
+   if ( val->prefix )
+       return true; 
     return GETBIT(checkval, HASHVAL(val->valcrc));
  }
  
diff --git a/src/backend/utils/adt/tsquery.c b/src/backend/utils/adt/tsquery.c

index e4f1beba905438f5f737525a4e53c8dfdba791a2..9a890d2ae687e8bee4916e90f3e73262a32678b5 100644 (file)
--- a/src/backend/utils/adt/tsquery.c
+++ b/src/backend/utils/adt/tsquery.c
@@ -7,7 +7,7 @@
   *
   *
   * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.17 2008/04/11 22:52:05 tgl Exp $
+ *   $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.18 2008/05/16 16:31:01 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -56,12 +56,14 @@ struct TSQueryParserStateData
  #define WAITSINGLEOPERAND 4
  
  /*
- * subroutine to parse the weight part, like ':1AB' of a query.
+ * subroutine to parse the modifiers (weight and prefix flag currently) 
+ * part, like ':1AB' of a query.
   */
  static char *
-get_weight(char *buf, int16 *weight)
+get_modifiers(char *buf, int16 *weight, bool *prefix)
  {
     *weight = 0;
+   *prefix = false;
  
     if (!t_iseq(buf, ':'))
         return buf;
@@ -87,6 +89,9 @@ get_weight(char *buf, int16 *weight)
             case 'D':
                 *weight |= 1;
                 break;
+           case '*':
+               *prefix = true;
+               break;
             default:
                 return buf;
         }
@@ -118,8 +123,11 @@ typedef enum
  static ts_tokentype
  gettoken_query(TSQueryParserState state,
                int8 *operator,
-              int *lenval, char **strval, int16 *weight)
+              int *lenval, char **strval, int16 *weight, bool *prefix)
  {
+   *weight = 0;
+   *prefix = false;
+
     while (1)
     {
         switch (state->state)
@@ -157,7 +165,7 @@ gettoken_query(TSQueryParserState state,
                     reset_tsvector_parser(state->valstate, state->buf);
                     if (gettoken_tsvector(state->valstate, strval, lenval, NULL, NULL, &state->buf))
                     {
-                       state->buf = get_weight(state->buf, weight);
+                       state->buf = get_modifiers(state->buf, weight, prefix);
                         state->state = WAITOPERATOR;
                         return PT_VAL;
                     }
@@ -232,7 +240,7 @@ pushOperator(TSQueryParserState state, int8 oper)
  }
  
  static void
-pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int lenval, int weight)
+pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int lenval, int weight, bool prefix)
  {
     QueryOperand *tmp;
  
@@ -250,6 +258,7 @@ pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int
     tmp = (QueryOperand *) palloc0(sizeof(QueryOperand));
     tmp->type = QI_VAL;
     tmp->weight = weight;
+   tmp->prefix = prefix;
     tmp->valcrc = (int32) valcrc;
     tmp->length = lenval;
     tmp->distance = distance;
@@ -264,7 +273,7 @@ pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int
   * of the string.
   */
  void
-pushValue(TSQueryParserState state, char *strval, int lenval, int2 weight)
+pushValue(TSQueryParserState state, char *strval, int lenval, int2 weight, bool prefix)
  {
     pg_crc32    valcrc;
  
@@ -277,7 +286,7 @@ pushValue(TSQueryParserState state, char *strval, int lenval, int2 weight)
     INIT_CRC32(valcrc);
     COMP_CRC32(valcrc, strval, lenval);
     FIN_CRC32(valcrc);
-   pushValue_internal(state, valcrc, state->curop - state->op, lenval, weight);
+   pushValue_internal(state, valcrc, state->curop - state->op, lenval, weight, prefix);
  
     /* append the value string to state.op, enlarging buffer if needed first */
     while (state->curop - state->op + lenval + 1 >= state->lenop)
@@ -330,16 +339,17 @@ makepol(TSQueryParserState state,
     int8        opstack[STACKDEPTH];
     int         lenstack = 0;
     int16       weight = 0;
+   bool        prefix;
  
     /* since this function recurses, it could be driven to stack overflow */
     check_stack_depth();
  
-   while ((type = gettoken_query(state, &operator, &lenval, &strval, &weight)) != PT_END)
+   while ((type = gettoken_query(state, &operator, &lenval, &strval, &weight, &prefix)) != PT_END)
     {
         switch (type)
         {
             case PT_VAL:
-               pushval(opaque, state, strval, lenval, weight);
+               pushval(opaque, state, strval, lenval, weight, prefix);
                 while (lenstack && (opstack[lenstack - 1] == OP_AND ||
                                     opstack[lenstack - 1] == OP_NOT))
                 {
@@ -549,9 +559,9 @@ parse_tsquery(char *buf,
  
  static void
  pushval_asis(Datum opaque, TSQueryParserState state, char *strval, int lenval,
-            int16 weight)
+            int16 weight, bool prefix)
  {
-   pushValue(state, strval, lenval, weight);
+   pushValue(state, strval, lenval, weight, prefix);
  }
  
  /*
@@ -605,7 +615,7 @@ infix(INFIX *in, bool first)
         char       *op = in->op + curpol->distance;
         int         clen;
  
-       RESIZEBUF(in, curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 5);
+       RESIZEBUF(in, curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 6);
         *(in->cur) = '\'';
         in->cur++;
         while (*op)
@@ -628,10 +638,15 @@ infix(INFIX *in, bool first)
         }
         *(in->cur) = '\'';
         in->cur++;
-       if (curpol->weight)
+       if (curpol->weight || curpol->prefix)
         {
             *(in->cur) = ':';
             in->cur++;
+           if ( curpol->prefix )
+           {
+               *(in->cur) = '*';
+               in->cur++;
+           }
             if (curpol->weight & (1 << 3))
             {
                 *(in->cur) = 'A';
@@ -769,6 +784,7 @@ tsqueryout(PG_FUNCTION_ARGS)
   * uint8   type, QI_VAL
   * uint8   weight
   *         operand text in client encoding, null-terminated
+ * uint8   prefix
   *
   * For each operator:
   * uint8   type, QI_OPR
@@ -793,6 +809,7 @@ tsquerysend(PG_FUNCTION_ARGS)
         {
             case QI_VAL:
                 pq_sendint(&buf, item->operand.weight, sizeof(uint8));
+               pq_sendint(&buf, item->operand.prefix, sizeof(uint8));
                 pq_sendstring(&buf, GETOPERAND(query) + item->operand.distance);
                 break;
             case QI_OPR:
@@ -844,10 +861,12 @@ tsqueryrecv(PG_FUNCTION_ARGS)
         {
             size_t      val_len;    /* length after recoding to server encoding */
             uint8       weight;
+           uint8       prefix;
             const char *val;
             pg_crc32    valcrc;
  
             weight = (uint8) pq_getmsgint(buf, sizeof(uint8));
+           prefix = (uint8) pq_getmsgint(buf, sizeof(uint8));
             val = pq_getmsgstring(buf);
             val_len = strlen(val);
  
@@ -869,6 +888,7 @@ tsqueryrecv(PG_FUNCTION_ARGS)
             FIN_CRC32(valcrc);
  
             item->operand.weight = weight;
+           item->operand.prefix = (prefix) ? true : false;
             item->operand.valcrc = (int32) valcrc;
             item->operand.length = val_len;
             item->operand.distance = datalen;
diff --git a/src/backend/utils/adt/tsquery_util.c b/src/backend/utils/adt/tsquery_util.c

index b81835c9692a2ddea80c07bfd224ea1842dec68d..4cc1a2a21eba990a804f512ebba8a00bfcd120ec 100644 (file)
--- a/src/backend/utils/adt/tsquery_util.c
+++ b/src/backend/utils/adt/tsquery_util.c
@@ -7,7 +7,7 @@
   *
   *
   * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_util.c,v 1.8 2008/01/01 19:45:53 momjian Exp $
+ *   $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_util.c,v 1.9 2008/05/16 16:31:01 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -125,10 +125,7 @@ QTNodeCompare(QTNode *an, QTNode *bn)
             return (ao->valcrc > bo->valcrc) ? -1 : 1;
         }
  
-       if (ao->length == bo->length)
-           return strncmp(an->word, bn->word, ao->length);
-       else
-           return (ao->length > bo->length) ? -1 : 1;
+       return tsCompareString( an->word, ao->length, bn->word, bo->length, false);
     }
  }
  
diff --git a/src/backend/utils/adt/tsrank.c b/src/backend/utils/adt/tsrank.c

index 065c94d20977c9e4e29985d039dad2d21adf6ac7..d23e05e9939060559dc481bc71cd89f466afaa51 100644 (file)
--- a/src/backend/utils/adt/tsrank.c
+++ b/src/backend/utils/adt/tsrank.c
@@ -7,7 +7,7 @@
   *
   *
   * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.12 2008/01/01 19:45:53 momjian Exp $
+ *   $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.13 2008/05/16 16:31:01 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -71,45 +71,60 @@ cnt_length(TSVector t)
     return len;
  }
  
-static int
-WordECompareQueryItem(char *eval, char *qval, WordEntry *ptr, QueryOperand *item)
-{
-   if (ptr->len == item->length)
-       return strncmp(
-                      eval + ptr->pos,
-                      qval + item->distance,
-                      item->length);
  
-   return (ptr->len > item->length) ? 1 : -1;
-}
+#define    WordECompareQueryItem(e,q,p,i,m) \
+   tsCompareString((q) + (i)->distance, (i)->length,   \
+                   (e) + (p)->pos, (p)->len, (m))
+
  
  /*
- * Returns a pointer to a WordEntry corresponding 'item' from tsvector 't'. 'q'
- * is the TSQuery containing 'item'. Returns NULL if not found.
+ * Returns a pointer to a WordEntry's array corresponding to 'item' from
+ * tsvector 't'. 'q' is the TSQuery containing 'item'.
+ * Returns NULL if not found.
   */
  static WordEntry *
-find_wordentry(TSVector t, TSQuery q, QueryOperand *item)
+find_wordentry(TSVector t, TSQuery q, QueryOperand *item, int32 *nitem)
  {
     WordEntry  *StopLow = ARRPTR(t);
     WordEntry  *StopHigh = (WordEntry *) STRPTR(t);
-   WordEntry  *StopMiddle;
+   WordEntry  *StopMiddle = StopHigh;
     int         difference;
  
-   /* Loop invariant: StopLow <= item < StopHigh */
+   *nitem=0;
  
+   /* Loop invariant: StopLow <= item < StopHigh */
     while (StopLow < StopHigh)
     {
         StopMiddle = StopLow + (StopHigh - StopLow) / 2;
-       difference = WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item);
+       difference = WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, false);
         if (difference == 0)
-           return StopMiddle;
-       else if (difference < 0)
+       {
+           StopHigh = StopMiddle;
+           *nitem=1;
+           break;
+       }
+       else if (difference > 0)
             StopLow = StopMiddle + 1;
         else
             StopHigh = StopMiddle;
     }
  
-   return NULL;
+   if ( item->prefix == true )
+   {
+       if ( StopLow >= StopHigh )
+           StopMiddle = StopHigh;
+
+       *nitem=0;
+
+       while( StopMiddle < (WordEntry *) STRPTR(t) && 
+               WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, true) == 0 )
+       {
+           (*nitem)++;
+           StopMiddle++;
+       }
+   }
+
+   return ( *nitem > 0 ) ? StopHigh : NULL;
  }
  
  
@@ -123,12 +138,9 @@ compareQueryOperand(const void *a, const void *b, void *arg)
     QueryOperand *qa = (*(QueryOperand **) a);
     QueryOperand *qb = (*(QueryOperand **) b);
  
-   if (qa->length == qb->length)
-       return strncmp(operand + qa->distance,
-                      operand + qb->distance,
-                      qb->length);
-
-   return (qa->length > qb->length) ? 1 : -1;
+   return tsCompareString(operand + qa->distance, qa->length,
+                          operand + qb->distance, qb->length,
+                          false);
  }
  
  /*
@@ -198,12 +210,14 @@ calc_rank_and(float *w, TSVector t, TSQuery q)
                 k,
                 l,
                 p;
-   WordEntry  *entry;
+   WordEntry  *entry,
+              *firstentry;
     WordEntryPos *post,
                *ct;
     int4        dimt,
                 lenct,
-               dist;
+               dist,
+               nitem;
     float       res = -1.0;
     QueryOperand **item;
     int         size = q->size;
@@ -219,40 +233,44 @@ calc_rank_and(float *w, TSVector t, TSQuery q)
  
     for (i = 0; i < size; i++)
     {
-       entry = find_wordentry(t, q, item[i]);
+       firstentry = entry = find_wordentry(t, q, item[i], &nitem);
         if (!entry)
             continue;
  
-       if (entry->haspos)
-           pos[i] = _POSVECPTR(t, entry);
-       else
-           pos[i] = &POSNULL;
-
-
-       dimt = pos[i]->npos;
-       post = pos[i]->pos;
-       for (k = 0; k < i; k++)
+       while( entry - firstentry < nitem )
         {
-           if (!pos[k])
-               continue;
-           lenct = pos[k]->npos;
-           ct = pos[k]->pos;
-           for (l = 0; l < dimt; l++)
+           if (entry->haspos)
+               pos[i] = _POSVECPTR(t, entry);
+           else
+               pos[i] = &POSNULL;
+
+           dimt = pos[i]->npos;
+           post = pos[i]->pos;
+           for (k = 0; k < i; k++)
             {
-               for (p = 0; p < lenct; p++)
+               if (!pos[k])
+                   continue;
+               lenct = pos[k]->npos;
+               ct = pos[k]->pos;
+               for (l = 0; l < dimt; l++)
                 {
-                   dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p]));
-                   if (dist || (dist == 0 && (pos[i] == &POSNULL || pos[k] == &POSNULL)))
+                   for (p = 0; p < lenct; p++)
                     {
-                       float       curw;
-
-                       if (!dist)
-                           dist = MAXENTRYPOS;
-                       curw = sqrt(wpos(post[l]) * wpos(ct[p]) * word_distance(dist));
-                       res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw);
+                       dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p]));
+                       if (dist || (dist == 0 && (pos[i] == &POSNULL || pos[k] == &POSNULL)))
+                       {
+                           float       curw;
+   
+                           if (!dist)
+                               dist = MAXENTRYPOS;
+                           curw = sqrt(wpos(post[l]) * wpos(ct[p]) * word_distance(dist));
+                           res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw);
+                       }
                     }
                 }
             }
+
+           entry++;
         }
     }
     pfree(pos);
@@ -263,11 +281,13 @@ calc_rank_and(float *w, TSVector t, TSQuery q)
  static float
  calc_rank_or(float *w, TSVector t, TSQuery q)
  {
-   WordEntry  *entry;
+   WordEntry  *entry,
+              *firstentry;
     WordEntryPos *post;
     int4        dimt,
                 j,
-               i;
+               i,
+               nitem;
     float       res = 0.0;
     QueryOperand **item;
     int         size = q->size;
@@ -280,41 +300,46 @@ calc_rank_or(float *w, TSVector t, TSQuery q)
                     wjm;
         int4        jm;
  
-       entry = find_wordentry(t, q, item[i]);
+       firstentry = entry = find_wordentry(t, q, item[i], &nitem);
         if (!entry)
             continue;
  
-       if (entry->haspos)
-       {
-           dimt = POSDATALEN(t, entry);
-           post = POSDATAPTR(t, entry);
-       }
-       else
+       while( entry - firstentry < nitem )
         {
-           dimt = POSNULL.npos;
-           post = POSNULL.pos;
-       }
+           if (entry->haspos)
+           {
+               dimt = POSDATALEN(t, entry);
+               post = POSDATAPTR(t, entry);
+           }
+           else
+           {
+               dimt = POSNULL.npos;
+               post = POSNULL.pos;
+           }
  
-       resj = 0.0;
-       wjm = -1.0;
-       jm = 0;
-       for (j = 0; j < dimt; j++)
-       {
-           resj = resj + wpos(post[j]) / ((j + 1) * (j + 1));
-           if (wpos(post[j]) > wjm)
+           resj = 0.0;
+           wjm = -1.0;
+           jm = 0;
+           for (j = 0; j < dimt; j++)
             {
-               wjm = wpos(post[j]);
-               jm = j;
+               resj = resj + wpos(post[j]) / ((j + 1) * (j + 1));
+               if (wpos(post[j]) > wjm)
+               {
+                   wjm = wpos(post[j]);
+                   jm = j;
+               }
             }
-       }
  /*
-       limit (sum(i/i^2),i->inf) = pi^2/6
-       resj = sum(wi/i^2),i=1,noccurence,
-       wi - should be sorted desc,
-       don't sort for now, just choose maximum weight. This should be corrected
-       Oleg Bartunov
+           limit (sum(i/i^2),i->inf) = pi^2/6
+           resj = sum(wi/i^2),i=1,noccurence,
+           wi - should be sorted desc,
+           don't sort for now, just choose maximum weight. This should be corrected
+           Oleg Bartunov
  */
-       res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685;
+           res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685;
+
+           entry++;
+       }
     }
     if (size > 0)
         res = res / size;
@@ -594,11 +619,13 @@ static DocRepresentation *
  get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
  {
     QueryItem  *item = GETQUERY(qr->query);
-   WordEntry  *entry;
+   WordEntry  *entry,
+              *firstentry;
     WordEntryPos *post;
     int4        dimt,
                 j,
-               i;
+               i,
+               nitem;
     int         len = qr->query->size * 4,
                 cur = 0;
     DocRepresentation *doc;
@@ -619,63 +646,68 @@ get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
         if (QR_GET_OPERAND_EXISTS(qr, &item[i]))
             continue;
  
-       entry = find_wordentry(txt, qr->query, curoperand);
+       firstentry = entry = find_wordentry(txt, qr->query, curoperand, &nitem);
         if (!entry)
             continue;
  
-       if (entry->haspos)
-       {
-           dimt = POSDATALEN(txt, entry);
-           post = POSDATAPTR(txt, entry);
-       }
-       else
-       {
-           dimt = POSNULL.npos;
-           post = POSNULL.pos;
-       }
-
-       while (cur + dimt >= len)
+       while( entry - firstentry < nitem )
         {
-           len *= 2;
-           doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len);
-       }
-
-       for (j = 0; j < dimt; j++)
-       {
-           if (j == 0)
+           if (entry->haspos)
+           {
+               dimt = POSDATALEN(txt, entry);
+               post = POSDATAPTR(txt, entry);
+           }
+           else
             {
-               int         k;
+               dimt = POSNULL.npos;
+               post = POSNULL.pos;
+           }
  
-               doc[cur].nitem = 0;
-               doc[cur].item = (QueryItem **) palloc(sizeof(QueryItem *) * qr->query->size);
+           while (cur + dimt >= len)
+           {
+               len *= 2;
+               doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len);
+           }
  
-               for (k = 0; k < qr->query->size; k++)
+           for (j = 0; j < dimt; j++)
+           {
+               if (j == 0)
                 {
-                   QueryOperand *kptr = &item[k].operand;
-                   QueryOperand *iptr = &item[i].operand;
-
-                   if (k == i ||
-                       (item[k].type == QI_VAL &&
-                        compareQueryOperand(&kptr, &iptr, operand) == 0))
+                   int         k;
+   
+                   doc[cur].nitem = 0;
+                   doc[cur].item = (QueryItem **) palloc(sizeof(QueryItem *) * qr->query->size);
+   
+                   for (k = 0; k < qr->query->size; k++)
                     {
-                       /*
-                        * if k == i, we've already checked above that it's
-                        * type == Q_VAL
-                        */
-                       doc[cur].item[doc[cur].nitem] = item + k;
-                       doc[cur].nitem++;
-                       QR_SET_OPERAND_EXISTS(qr, item + k);
+                       QueryOperand *kptr = &item[k].operand;
+                       QueryOperand *iptr = &item[i].operand;
+   
+                       if (k == i ||
+                           (item[k].type == QI_VAL &&
+                            compareQueryOperand(&kptr, &iptr, operand) == 0))
+                       {
+                           /*
+                            * if k == i, we've already checked above that it's
+                            * type == Q_VAL
+                            */
+                           doc[cur].item[doc[cur].nitem] = item + k;
+                           doc[cur].nitem++;
+                           QR_SET_OPERAND_EXISTS(qr, item + k);
+                       }
                     }
                 }
+               else
+               {
+                   doc[cur].nitem = doc[cur - 1].nitem;
+                   doc[cur].item = doc[cur - 1].item;
+               }
+               doc[cur].pos = WEP_GETPOS(post[j]);
+               doc[cur].wclass = WEP_GETWEIGHT(post[j]);
+               cur++;
             }
-           else
-           {
-               doc[cur].nitem = doc[cur - 1].nitem;
-               doc[cur].item = doc[cur - 1].item;
-           }
-           doc[cur].pos = WEP_GETPOS(post[j]);
-           doc[cur].wclass = WEP_GETWEIGHT(post[j]);
-           cur++;
+
+           entry++;
         }
     }
  
diff --git a/src/backend/utils/adt/tsvector.c b/src/backend/utils/adt/tsvector.c

index 18342800cc006847bbcae862e3da6316390d9192..7a8da86423ff7767aa827cb44de74a4edd1df85d 100644 (file)
--- a/src/backend/utils/adt/tsvector.c
+++ b/src/backend/utils/adt/tsvector.c
@@ -7,7 +7,7 @@
   *
   *
   * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.13 2008/03/10 12:57:05 tgl Exp $
+ *   $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.14 2008/05/16 16:31:01 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -85,14 +85,9 @@ compareentry(const void *va, const void *vb, void *arg)
     const WordEntryIN *b = (const WordEntryIN *) vb;
     char       *BufferStr = (char *) arg;
  
-   if (a->entry.len == b->entry.len)
-   {
-       return strncmp(&BufferStr[a->entry.pos],
-                      &BufferStr[b->entry.pos],
-                      a->entry.len);
-   }
-
-   return (a->entry.len > b->entry.len) ? 1 : -1;
+   return tsCompareString( &BufferStr[a->entry.pos], a->entry.len,
+                           &BufferStr[b->entry.pos], b->entry.len,
+                           false );
  }
  
  /*
diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c

index 539a9be9789cea96957530fd3588e5b6b0e99f37..4e7d50b526a830a4b589b9497d87d652f0131847 100644 (file)
--- a/src/backend/utils/adt/tsvector_op.c
+++ b/src/backend/utils/adt/tsvector_op.c
@@ -7,7 +7,7 @@
   *
   *
   * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.15 2008/04/08 18:20:29 tgl Exp $
+ *   $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.16 2008/05/16 16:31:01 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -127,11 +127,7 @@ silly_cmp_tsvector(const TSVector a, const TSVector b)
             {
                 return (aptr->haspos > bptr->haspos) ? -1 : 1;
             }
-           else if (aptr->len != bptr->len)
-           {
-               return (aptr->len > bptr->len) ? -1 : 1;
-           }
-           else if ((res = strncmp(STRPTR(a) + aptr->pos, STRPTR(b) + bptr->pos, bptr->len)) != 0)
+           else if ( (res=tsCompareString( STRPTR(a) + aptr->pos, aptr->len, STRPTR(b) + bptr->pos, bptr->len, false)) !=0 )
             {
                 return res;
             }
@@ -286,18 +282,10 @@ tsvector_setweight(PG_FUNCTION_ARGS)
     PG_RETURN_POINTER(out);
  }
  
-static int
-compareEntry(char *ptra, WordEntry *a, char *ptrb, WordEntry *b)
-{
-   if (a->len == b->len)
-   {
-       return strncmp(
-                      ptra + a->pos,
-                      ptrb + b->pos,
-                      a->len);
-   }
-   return (a->len > b->len) ? 1 : -1;
-}
+#define compareEntry(pa, a, pb, b) \
+   tsCompareString((pa) + (a)->pos, (a)->len,  \
+                   (pb) + (b)->pos, (b)->len,  \
+                   false)
  
  /*
   * Add positions from src to dest after offsetting them by maxpos.
@@ -534,18 +522,46 @@ tsvector_concat(PG_FUNCTION_ARGS)
  }
  
  /*
- * compare 2 string values
+ * Compare two strings by tsvector rules. 
+ * if isPrefix = true then it returns not-zero value if b has prefix a
   */
-static int4
-ValCompare(CHKVAL *chkval, WordEntry *ptr, QueryOperand *item)
+int4
+tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
  {
-   if (ptr->len == item->length)
-       return strncmp(
-                      &(chkval->values[ptr->pos]),
-                      &(chkval->operand[item->distance]),
-                      item->length);
+   int cmp;
+
+   if ( lena == 0 )
+   {
+       if ( prefix )
+           cmp = 0; /* emtry string is equal to any if a prefix match */ 
+       else
+           cmp = (lenb>0) ? -1 : 0;
+   }
+   else if ( lenb == 0 )
+   {
+       cmp = (lena>0) ? 1 : 0;
+   }
+   else
+   {
+       cmp = memcmp(a, b, Min(lena, lenb));
  
-   return (ptr->len > item->length) ? 1 : -1;
+       if ( prefix )
+       {
+           if ( cmp == 0 && lena > lenb )
+           {
+               /*
+                * b argument is not beginning with argument a
+                */
+               cmp=1;
+           }
+       }
+       else if ( (cmp == 0) && (lena != lenb) )
+       {
+           cmp = (lena < lenb) ? -1 : 1;
+       }
+   }
+
+   return cmp;
  }
  
  /*
@@ -582,25 +598,52 @@ checkcondition_str(void *checkval, QueryOperand *val)
     CHKVAL     *chkval = (CHKVAL *) checkval;
     WordEntry  *StopLow = chkval->arrb;
     WordEntry  *StopHigh = chkval->arre;
-   WordEntry  *StopMiddle;
-   int         difference;
+   WordEntry  *StopMiddle = StopHigh;
+   int         difference = -1; 
+   bool        res=false;
  
     /* Loop invariant: StopLow <= val < StopHigh */
-
     while (StopLow < StopHigh)
     {
         StopMiddle = StopLow + (StopHigh - StopLow) / 2;
-       difference = ValCompare(chkval, StopMiddle, val);
+       difference = tsCompareString( chkval->operand + val->distance, val->length,
+                                     chkval->values + StopMiddle->pos, StopMiddle->len,
+                                     false);
+
         if (difference == 0)
-           return (val->weight && StopMiddle->haspos) ?
+       {
+           res = (val->weight && StopMiddle->haspos) ?
                 checkclass_str(chkval, StopMiddle, val) : true;
-       else if (difference < 0)
+           break;
+       }
+       else if (difference > 0)
             StopLow = StopMiddle + 1;
         else
             StopHigh = StopMiddle;
     }
  
-   return (false);
+   if ( res == false && val->prefix == true )
+   {
+       /*
+        * there was a failed exact search, so we should scan further to find
+        * a prefix match.
+        */
+       if ( StopLow >= StopHigh )
+           StopMiddle = StopHigh;
+
+       while( res == false && StopMiddle < chkval->arre && 
+               tsCompareString( chkval->operand + val->distance, val->length,
+                                chkval->values + StopMiddle->pos, StopMiddle->len,
+                                true) == 0 )
+       {
+           res = (val->weight && StopMiddle->haspos) ?
+               checkclass_str(chkval, StopMiddle, val) : true;
+
+           StopMiddle++;
+       }
+   }
+
+   return res; 
  }
  
  /*
@@ -758,50 +801,38 @@ check_weight(TSVector txt, WordEntry *wptr, int8 weight)
     return num;
  }
  
-static WordEntry **
-SEI_realloc(WordEntry **in, uint32 *len)
-{
-   if (*len == 0 || in == NULL)
-   {
-       *len = 8;
-       in = palloc(sizeof(WordEntry *) * (*len));
-   }
-   else
-   {
-       *len *= 2;
-       in = repalloc(in, sizeof(WordEntry *) * (*len));
-   }
-   return in;
-}
+#define compareStatWord(a,e,s,t) \
+   tsCompareString(STATSTRPTR(s) + (a)->pos, (a)->len, \
+                   STRPTR(t) + (e)->pos, (e)->len,     \
+                   false)
  
-static int
-compareStatWord(StatEntry *a, WordEntry *b, tsstat *stat, TSVector txt)
+typedef struct WordEntryMark
  {
-   if (a->len == b->len)
-       return strncmp(
-                      STATSTRPTR(stat) + a->pos,
-                      STRPTR(txt) + b->pos,
-                      a->len
-           );
-   return (a->len > b->len) ? 1 : -1;
-}
+   WordEntry   *newentry;
+   StatEntry   *pos;
+} WordEntryMark;
  
  static tsstat *
-formstat(tsstat *stat, TSVector txt, WordEntry **entry, uint32 len)
+formstat(tsstat *stat, TSVector txt, List *entries)
  {
-   tsstat     *newstat;
-   uint32      totallen,
-               nentry;
-   uint32      slen = 0;
-   WordEntry **ptr = entry;
-   char       *curptr;
-   StatEntry  *sptr,
-              *nptr;
-
-   while (ptr - entry < len)
+   tsstat         *newstat;
+   uint32          totallen,
+                   nentry,
+                   len = list_length(entries);
+   uint32          slen = 0;
+   WordEntry      *ptr;
+   char           *curptr;
+   StatEntry      *sptr,
+                  *nptr;
+   ListCell       *entry;
+   StatEntry      *PosSE = STATPTR(stat),
+                  *prevPosSE;
+   WordEntryMark  *mark;
+
+   foreach( entry, entries )
     {
-       slen += (*ptr)->len;
-       ptr++;
+       mark = (WordEntryMark*)lfirst(entry);
+       slen += mark->newentry->len;
     }
  
     nentry = stat->size + len;
@@ -815,78 +846,46 @@ formstat(tsstat *stat, TSVector txt, WordEntry **entry, uint32 len)
     memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat));
     curptr = STATSTRPTR(newstat) + STATSTRSIZE(stat);
  
-   ptr = entry;
     sptr = STATPTR(stat);
     nptr = STATPTR(newstat);
  
-   if (len == 1)
+   foreach(entry, entries)
     {
-       StatEntry  *StopLow = STATPTR(stat);
-       StatEntry  *StopHigh = (StatEntry *) STATSTRPTR(stat);
+       prevPosSE = PosSE;
  
-       while (StopLow < StopHigh)
+       mark = (WordEntryMark*)lfirst(entry);
+       ptr  = mark->newentry;
+       PosSE = mark->pos;
+
+       /*
+        * Copy missed entries 
+        */
+       if ( PosSE > prevPosSE )
         {
-           sptr = StopLow + (StopHigh - StopLow) / 2;
-           if (compareStatWord(sptr, *ptr, stat, txt) < 0)
-               StopLow = sptr + 1;
-           else
-               StopHigh = sptr;
+           memcpy( nptr, prevPosSE, sizeof(StatEntry) * (PosSE-prevPosSE) );
+           nptr += PosSE-prevPosSE;
         }
-       nptr = STATPTR(newstat) + (StopLow - STATPTR(stat));
-       memcpy(STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow - STATPTR(stat)));
-       if ((*ptr)->haspos)
-           nptr->nentry = (stat->weight) ? check_weight(txt, *ptr, stat->weight) : POSDATALEN(txt, *ptr);
+
+       /*
+        * Copy new entry
+        */
+       if (ptr->haspos)
+           nptr->nentry = (stat->weight) ? check_weight(txt, ptr, stat->weight) : POSDATALEN(txt, ptr);
         else
             nptr->nentry = 1;
         nptr->ndoc = 1;
-       nptr->len = (*ptr)->len;
-       memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+       nptr->len = ptr->len;
+       memcpy(curptr, STRPTR(txt) + ptr->pos, nptr->len);
         nptr->pos = curptr - STATSTRPTR(newstat);
-       memcpy(nptr + 1, StopLow, sizeof(StatEntry) * (((StatEntry *) STATSTRPTR(stat)) - StopLow));
-   }
-   else
-   {
-       while (sptr - STATPTR(stat) < stat->size && ptr - entry < len)
-       {
-           if (compareStatWord(sptr, *ptr, stat, txt) < 0)
-           {
-               memcpy(nptr, sptr, sizeof(StatEntry));
-               sptr++;
-           }
-           else
-           {
-               if ((*ptr)->haspos)
-                   nptr->nentry = (stat->weight) ? check_weight(txt, *ptr, stat->weight) : POSDATALEN(txt, *ptr);
-               else
-                   nptr->nentry = 1;
-               nptr->ndoc = 1;
-               nptr->len = (*ptr)->len;
-               memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
-               nptr->pos = curptr - STATSTRPTR(newstat);
-               curptr += nptr->len;
-               ptr++;
-           }
-           nptr++;
-       }
+       curptr += nptr->len;
+       nptr++;
  
-       memcpy(nptr, sptr, sizeof(StatEntry) * (stat->size - (sptr - STATPTR(stat))));
-
-       while (ptr - entry < len)
-       {
-           if ((*ptr)->haspos)
-               nptr->nentry = (stat->weight) ? check_weight(txt, *ptr, stat->weight) : POSDATALEN(txt, *ptr);
-           else
-               nptr->nentry = 1;
-           nptr->ndoc = 1;
-           nptr->len = (*ptr)->len;
-           memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
-           nptr->pos = curptr - STATSTRPTR(newstat);
-           curptr += nptr->len;
-           ptr++;
-           nptr++;
-       }
+       pfree(mark);
     }
  
+   if ( PosSE < (StatEntry *) STATSTRPTR(stat) )
+       memcpy(nptr, PosSE, sizeof(StatEntry) * (stat->size - (PosSE - STATPTR(stat))));
+
     return newstat;
  }
  
@@ -907,12 +906,11 @@ ts_accum(tsstat *stat, Datum data)
  {
     tsstat     *newstat;
     TSVector    txt = DatumGetTSVector(data);
-   WordEntry **newentry = NULL;
-   uint32      len = 0,
-               cur = 0;
     StatEntry  *sptr;
     WordEntry  *wptr;
     int         n = 0;
+   List       *newentries=NIL;
+   StatEntry  *StopLow;
  
     if (stat == NULL)
     {                           /* Init in first */
@@ -932,16 +930,23 @@ ts_accum(tsstat *stat, Datum data)
  
     sptr = STATPTR(stat);
     wptr = ARRPTR(txt);
+   StopLow = STATPTR(stat);
  
-   if (stat->size < 100 * txt->size)
-   {                           /* merge */
-       while (sptr - STATPTR(stat) < stat->size && wptr - ARRPTR(txt) < txt->size)
-       {
-           int         cmp = compareStatWord(sptr, wptr, stat, txt);
+   while (wptr - ARRPTR(txt) < txt->size)
+   {
+       StatEntry  *StopHigh = (StatEntry *) STATSTRPTR(stat);
+       int         cmp;
+
+       /*
+        * We do not set StopLow to begin of array because tsvector is ordered 
+        * with the sames rule, so we can search from last stopped position
+        */
  
-           if (cmp < 0)
-               sptr++;
-           else if (cmp == 0)
+       while (StopLow < StopHigh)
+       {
+           sptr = StopLow + (StopHigh - StopLow) / 2;
+           cmp = compareStatWord(sptr, wptr, stat, txt);
+           if (cmp == 0)
             {
                 if (stat->weight == 0)
                 {
@@ -953,90 +958,38 @@ ts_accum(tsstat *stat, Datum data)
                     sptr->ndoc++;
                     sptr->nentry += n;
                 }
-               sptr++;
-               wptr++;
+               break;
             }
+           else if (cmp < 0)
+               StopLow = sptr + 1;
             else
-           {
-               if (stat->weight == 0 || check_weight(txt, wptr, stat->weight) != 0)
-               {
-                   if (cur == len)
-                       newentry = SEI_realloc(newentry, &len);
-                   newentry[cur] = wptr;
-                   cur++;
-               }
-               wptr++;
-           }
+               StopHigh = sptr;
         }
  
-       while (wptr - ARRPTR(txt) < txt->size)
-       {
+       if (StopLow >= StopHigh)
+       {                   /* not found */
             if (stat->weight == 0 || check_weight(txt, wptr, stat->weight) != 0)
             {
-               if (cur == len)
-                   newentry = SEI_realloc(newentry, &len);
-               newentry[cur] = wptr;
-               cur++;
-           }
-           wptr++;
-       }
-   }
-   else
-   {                           /* search */
-       while (wptr - ARRPTR(txt) < txt->size)
-       {
-           StatEntry  *StopLow = STATPTR(stat);
-           StatEntry  *StopHigh = (StatEntry *) STATSTRPTR(stat);
-           int         cmp;
+               WordEntryMark *mark = (WordEntryMark*)palloc(sizeof(WordEntryMark));
  
-           while (StopLow < StopHigh)
-           {
-               sptr = StopLow + (StopHigh - StopLow) / 2;
-               cmp = compareStatWord(sptr, wptr, stat, txt);
-               if (cmp == 0)
-               {
-                   if (stat->weight == 0)
-                   {
-                       sptr->ndoc++;
-                       sptr->nentry += (wptr->haspos) ? POSDATALEN(txt, wptr) : 1;
-                   }
-                   else if (wptr->haspos && (n = check_weight(txt, wptr, stat->weight)) != 0)
-                   {
-                       sptr->ndoc++;
-                       sptr->nentry += n;
-                   }
-                   break;
-               }
-               else if (cmp < 0)
-                   StopLow = sptr + 1;
-               else
-                   StopHigh = sptr;
-           }
+               mark->newentry = wptr;
+               mark->pos = StopLow;
+               newentries = lappend( newentries, mark );
  
-           if (StopLow >= StopHigh)
-           {                   /* not found */
-               if (stat->weight == 0 || check_weight(txt, wptr, stat->weight) != 0)
-               {
-                   if (cur == len)
-                       newentry = SEI_realloc(newentry, &len);
-                   newentry[cur] = wptr;
-                   cur++;
-               }
             }
-           wptr++;
         }
+       wptr++;
     }
  
-
-   if (cur == 0)
+   if (list_length(newentries) == 0)
     {                           /* no new words */
         if (txt != (TSVector) DatumGetPointer(data))
             pfree(txt);
         return stat;
     }
  
-   newstat = formstat(stat, txt, newentry, cur);
-   pfree(newentry);
+   newstat = formstat(stat, txt, newentries);
+   list_free(newentries);
  
     if (txt != (TSVector) DatumGetPointer(data))
         pfree(txt);
diff --git a/src/include/access/gin.h b/src/include/access/gin.h

index add5df6ba11e4da4fd373db78878cc25f1460c97..353dd4e3f728eac7cb6633ca66b367f1f6fdf6d1 100644 (file)
--- a/src/include/access/gin.h
+++ b/src/include/access/gin.h
@@ -4,7 +4,7 @@
   *
   * Copyright (c) 2006-2008, PostgreSQL Global Development Group
   *
- * $PostgreSQL: pgsql/src/include/access/gin.h,v 1.19 2008/05/12 00:00:53 alvherre Exp $
+ * $PostgreSQL: pgsql/src/include/access/gin.h,v 1.20 2008/05/16 16:31:01 tgl Exp $
   *--------------------------------------------------------------------------
   */
  
@@ -15,6 +15,7 @@
  #include "access/itup.h"
  #include "access/relscan.h"
  #include "fmgr.h"
+#include "nodes/tidbitmap.h"
  #include "storage/block.h"
  #include "storage/buf.h"
  #include "storage/off.h"
@@ -28,7 +29,8 @@
  #define GIN_EXTRACTVALUE_PROC         2
  #define GIN_EXTRACTQUERY_PROC         3
  #define GIN_CONSISTENT_PROC               4
-#define GINNProcs                     4
+#define GIN_COMPARE_PARTIAL_PROC      5
+#define GINNProcs                     5
  
  /*
   * Page opaque data in a inverted index page.
@@ -141,7 +143,10 @@ typedef struct GinState
     FmgrInfo    extractValueFn;
     FmgrInfo    extractQueryFn;
     FmgrInfo    consistentFn;
+   FmgrInfo    comparePartialFn;   /* optional method */
  
+   bool        canPartialMatch;    /* can opclass perform partial
+                                    * match (prefix search)? */
     TupleDesc   tupdesc;
  } GinState;
  
@@ -360,6 +365,12 @@ typedef struct GinScanEntryData
     /* current ItemPointer to heap */
     ItemPointerData curItem;
  
+   /* partial match support */
+   bool        isPartialMatch;
+   TIDBitmap  *partialMatch;
+   TBMIterateResult *partialMatchResult;
+   StrategyNumber strategy;
+
     /* used for Posting list and one page in Posting tree */
     ItemPointerData *list;
     uint32           nlist;
@@ -424,6 +435,7 @@ extern PGDLLIMPORT int GinFuzzySearchLimit;
  
  extern Datum gingetbitmap(PG_FUNCTION_ARGS);
  extern Datum gingettuple(PG_FUNCTION_ARGS);
+extern void ginrestartentry(GinScanEntry entry);
  
  /* ginvacuum.c */
  extern Datum ginbulkdelete(PG_FUNCTION_ARGS);
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h

index e0bd027fdac62926e88665534461f38d56afc68e..94d70c2c8779ad07dd31da302c6ea127c0ad96f7 100644 (file)
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -37,7 +37,7 @@
   * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.459 2008/05/15 00:17:40 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.460 2008/05/16 16:31:01 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -53,6 +53,6 @@
   */
  
  /*                         yyyymmddN */
-#define CATALOG_VERSION_NO 200805141
+#define CATALOG_VERSION_NO 200805161
  
  #endif
diff --git a/src/include/catalog/pg_am.h b/src/include/catalog/pg_am.h

index 9837a8c200382bb1e5ad8abd13be7c6b6df92ed8..0fe5d05e7c6942ae9570e7e3a4548b56b8012149 100644 (file)
--- a/src/include/catalog/pg_am.h
+++ b/src/include/catalog/pg_am.h
@@ -8,7 +8,7 @@
   * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/catalog/pg_am.h,v 1.55 2008/04/10 22:25:25 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_am.h,v 1.56 2008/05/16 16:31:01 tgl Exp $
   *
   * NOTES
   *     the genbki.sh script reads this file and generates .bki
@@ -114,7 +114,7 @@ DESCR("hash index access method");
  DATA(insert OID = 783 (  gist  0 7 f f t t t t t t gistinsert gistbeginscan gistgettuple gistgetbitmap gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbulkdelete gistvacuumcleanup gistcostestimate gistoptions ));
  DESCR("GiST index access method");
  #define GIST_AM_OID 783
-DATA(insert OID = 2742 (  gin  0 4 f f f f f f t f gininsert ginbeginscan gingettuple gingetbitmap ginrescan ginendscan ginmarkpos ginrestrpos ginbuild ginbulkdelete ginvacuumcleanup gincostestimate ginoptions ));
+DATA(insert OID = 2742 (  gin  0 5 f f f f f f t f gininsert ginbeginscan gingettuple gingetbitmap ginrescan ginendscan ginmarkpos ginrestrpos ginbuild ginbulkdelete ginvacuumcleanup gincostestimate ginoptions ));
  DESCR("GIN index access method");
  #define GIN_AM_OID 2742
  
diff --git a/src/include/catalog/pg_amproc.h b/src/include/catalog/pg_amproc.h

index 8a80446953865ddb83a130ce4e16e897999ab604..36bd0f8409603179fcde636330b3330354922ad7 100644 (file)
--- a/src/include/catalog/pg_amproc.h
+++ b/src/include/catalog/pg_amproc.h
@@ -22,7 +22,7 @@
   * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/catalog/pg_amproc.h,v 1.71 2008/03/27 03:57:34 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_amproc.h,v 1.72 2008/05/16 16:31:01 tgl Exp $
   *
   * NOTES
   *   the genbki.sh script reads this file and generates .bki
@@ -321,10 +321,11 @@ DATA(insert ( 2745   1025 1025 1 381 ));
  DATA(insert (  2745   1025 1025 2 2743 ));
  DATA(insert (  2745   1025 1025 3 2774 ));
  DATA(insert (  2745   1025 1025 4 2744 ));
-DATA(insert (  3659   3614 3614 1 360 ));
+DATA(insert (  3659   3614 3614 1 3724 ));
  DATA(insert (  3659   3614 3614 2 3656 ));
  DATA(insert (  3659   3614 3614 3 3657 ));
  DATA(insert (  3659   3614 3614 4 3658 ));
+DATA(insert (  3659   3614 3614 5 2700 ));
  DATA(insert (  3626   3614 3614 1 3622 ));
  DATA(insert (  3683   3615 3615 1 3668 ));
  
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h

index cc1d5db786b3cab23adf8fef9dac5bb9cc384eaf..21e094ad5700f6ac838770592d4991a1f73885f2 100644 (file)
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.499 2008/05/15 00:17:40 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.500 2008/05/16 16:31:01 tgl Exp $
   *
   * NOTES
   *   The script catalog/genbki.sh reads this file and generates .bki
@@ -4018,7 +4018,7 @@ DESCR("gin(internal)");
  /* GIN array support */
  DATA(insert OID = 2743 (  ginarrayextract   PGNSP PGUID 12 1 0 f f t f i 2 2281 "2277 2281" _null_ _null_ _null_   ginarrayextract - _null_ _null_ ));
  DESCR("GIN array support");
-DATA(insert OID = 2774 (  ginqueryarrayextract PGNSP PGUID 12 1 0 f f t f i 3 2281 "2277 2281 21" _null_ _null_ _null_ ginqueryarrayextract - _null_ _null_ ));
+DATA(insert OID = 2774 (  ginqueryarrayextract PGNSP PGUID 12 1 0 f f t f i 4 2281 "2277 2281 21 2281" _null_ _null_ _null_ ginqueryarrayextract - _null_ _null_ ));
  DESCR("GIN array support");
  DATA(insert OID = 2744 (  ginarrayconsistent PGNSP PGUID 12 1 0 f f t f i 4 16 "2281 21 2281 2281" _null_ _null_ _null_    ginarrayconsistent - _null_ _null_ ));
  DESCR("GIN array support");
@@ -4253,10 +4253,14 @@ DESCR("GiST tsvector support");
  
  DATA(insert OID = 3656 (  gin_extract_tsvector PGNSP PGUID 12 1 0 f f t f i 2 2281 "3614 2281" _null_ _null_ _null_    gin_extract_tsvector - _null_ _null_ ));
  DESCR("GIN tsvector support");
-DATA(insert OID = 3657 (  gin_extract_tsquery  PGNSP PGUID 12 1 0 f f t f i 3 2281 "3615 2281 21" _null_ _null_ _null_ gin_extract_tsquery - _null_ _null_ ));
+DATA(insert OID = 3657 (  gin_extract_tsquery  PGNSP PGUID 12 1 0 f f t f i 4 2281 "3615 2281 21 2281" _null_ _null_ _null_ gin_extract_tsquery - _null_ _null_ ));
  DESCR("GIN tsvector support");
  DATA(insert OID = 3658 (  gin_tsquery_consistent PGNSP PGUID 12 1 0 f f t f i 4 16 "2281 21 3615 2281" _null_ _null_ _null_    gin_tsquery_consistent - _null_ _null_ ));
  DESCR("GIN tsvector support");
+DATA(insert OID = 3724 (  gin_cmp_tslexeme      PGNSP PGUID 12 1 0 f f t f i 2 23 "25 25" _null_ _null_ _null_ gin_cmp_tslexeme - _null_ _null_ ));
+DESCR("GIN tsvector support");
+DATA(insert OID = 2700 (  gin_cmp_prefix        PGNSP PGUID 12 1 0 f f t f i 3 23 "25 25 21" _null_ _null_ _null_ gin_cmp_prefix - _null_ _null_ ));
+DESCR("GIN tsvector support");
  
  DATA(insert OID = 3662 (  tsquery_lt           PGNSP PGUID 12 1 0 f f t f i 2 16 "3615 3615" _null_ _null_ _null_ tsquery_lt - _null_ _null_ ));
  DESCR("less-than");
diff --git a/src/include/tsearch/ts_public.h b/src/include/tsearch/ts_public.h

index 8f21abf032cd1a02751950efb3b612e9969268a8..d08d35db1931a5f99664e2158f1edc5c65e102cf 100644 (file)
--- a/src/include/tsearch/ts_public.h
+++ b/src/include/tsearch/ts_public.h
@@ -6,7 +6,7 @@
   *
   * Copyright (c) 1998-2008, PostgreSQL Global Development Group
   *
- * $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.8 2008/01/01 19:45:59 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.9 2008/05/16 16:31:02 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -99,6 +99,7 @@ typedef struct
  } TSLexeme;
  
  #define TSL_ADDPOS     0x01
+#define TSL_PREFIX     0x02
  
  /*
   * Struct for supporting complex dictionaries like thesaurus.
diff --git a/src/include/tsearch/ts_type.h b/src/include/tsearch/ts_type.h

index 9f5e63e2273e8fcd882de9c7f0184408b9b4f323..42680408a914f8b6731344cddd4d9ec549c0c994 100644 (file)
--- a/src/include/tsearch/ts_type.h
+++ b/src/include/tsearch/ts_type.h
@@ -5,7 +5,7 @@
   *
   * Copyright (c) 1998-2008, PostgreSQL Global Development Group
   *
- * $PostgreSQL: pgsql/src/include/tsearch/ts_type.h,v 1.10 2008/01/01 19:45:59 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/tsearch/ts_type.h,v 1.11 2008/05/16 16:31:02 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -179,6 +179,7 @@ typedef struct
                                  * bitmask of allowed weights. if it =0 then
                                  * any weight are allowed. Weights and bit
                                  * map: A: 1<<3 B: 1<<2 C: 1<<1 D: 1<<0 */
+   bool        prefix;         /* true if it's a prefix search */
     int32       valcrc;         /* XXX: pg_crc32 would be a more appropriate
                                  * data type, but we use comparisons to signed
                                  * integers in the code. They would need to be
diff --git a/src/include/tsearch/ts_utils.h b/src/include/tsearch/ts_utils.h

index 0085b64951a885b346dfea58687606dcbe6b07d8..6afd4c99f7a03fcb84d5628bb9b45d703af9ff4d 100644 (file)
--- a/src/include/tsearch/ts_utils.h
+++ b/src/include/tsearch/ts_utils.h
@@ -5,7 +5,7 @@
   *
   * Copyright (c) 1998-2008, PostgreSQL Global Development Group
   *
- * $PostgreSQL: pgsql/src/include/tsearch/ts_utils.h,v 1.14 2008/04/21 00:26:47 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/tsearch/ts_utils.h,v 1.15 2008/05/16 16:31:02 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -42,9 +42,10 @@ typedef struct TSQueryParserStateData *TSQueryParserState;
  
  typedef void (*PushFunction) (Datum opaque, TSQueryParserState state,
                                           char *token, int tokenlen,
-                                         int2 tokenweights     /* bitmap as described
+                                         int2 tokenweights,    /* bitmap as described
                                                                  * in QueryOperand
-                                 struct */ );
+                                                                * struct */
+                                         bool  prefix);
  
  extern TSQuery parse_tsquery(char *buf,
               PushFunction pushval,
@@ -52,7 +53,7 @@ extern TSQuery parse_tsquery(char *buf,
  
  /* Functions for use by PushFunction implementations */
  extern void pushValue(TSQueryParserState state,
-         char *strval, int lenval, int2 weight);
+         char *strval, int lenval, int2 weight, bool prefix);
  extern void pushStop(TSQueryParserState state);
  extern void pushOperator(TSQueryParserState state, int8 operator);
  
@@ -74,6 +75,7 @@ typedef struct
          */
         uint16     *apos;
     }           pos;
+   uint16      flags;  /* currently, only TSL_PREFIX */
     char       *word;
     uint32      alen;
  } ParsedWord;
@@ -110,6 +112,7 @@ extern bool TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
   * to_ts* - text transformation to tsvector, tsquery
   */
  extern TSVector make_tsvector(ParsedText *prs);
+extern int32 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix);
  
  extern Datum to_tsvector_byid(PG_FUNCTION_ARGS);
  extern Datum to_tsvector(PG_FUNCTION_ARGS);
@@ -142,6 +145,8 @@ extern Datum gtsvectorout(PG_FUNCTION_ARGS);
   */
  
  extern Datum gin_extract_tsvector(PG_FUNCTION_ARGS);
+extern Datum gin_cmp_tslexeme(PG_FUNCTION_ARGS);
+extern Datum gin_cmp_prefix(PG_FUNCTION_ARGS);
  extern Datum gin_extract_tsquery(PG_FUNCTION_ARGS);
  extern Datum gin_tsquery_consistent(PG_FUNCTION_ARGS);
  
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out

index ef787115742fdcb1cc298eb6678ed0a3d1580a24..efab1354fe38e02687d37ea44f51d38194701126 100644 (file)
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -935,9 +935,11 @@ WHERE p1.amprocfamily = p3.oid AND p3.opfmethod = p2.oid AND
  
  -- Detect missing pg_amproc entries: should have as many support functions
  -- as AM expects for each datatype combination supported by the opfamily.
+-- GIN is a special case because it has an optional support function.
  SELECT p1.amname, p2.opfname, p3.amproclefttype, p3.amprocrighttype
  FROM pg_am AS p1, pg_opfamily AS p2, pg_amproc AS p3
  WHERE p2.opfmethod = p1.oid AND p3.amprocfamily = p2.oid AND
+    p1.amname <> 'gin' AND
      p1.amsupport != (SELECT count(*) FROM pg_amproc AS p4
                       WHERE p4.amprocfamily = p2.oid AND
                             p4.amproclefttype = p3.amproclefttype AND
@@ -946,18 +948,43 @@ WHERE p2.opfmethod = p1.oid AND p3.amprocfamily = p2.oid AND
  --------+---------+----------------+-----------------
  (0 rows)
  
+-- Similar check for GIN, allowing one optional proc
+SELECT p1.amname, p2.opfname, p3.amproclefttype, p3.amprocrighttype
+FROM pg_am AS p1, pg_opfamily AS p2, pg_amproc AS p3
+WHERE p2.opfmethod = p1.oid AND p3.amprocfamily = p2.oid AND
+    p1.amname = 'gin' AND
+    p1.amsupport - 1 >  (SELECT count(*) FROM pg_amproc AS p4
+                         WHERE p4.amprocfamily = p2.oid AND
+                           p4.amproclefttype = p3.amproclefttype AND
+                           p4.amprocrighttype = p3.amprocrighttype);
+ amname | opfname | amproclefttype | amprocrighttype 
+--------+---------+----------------+-----------------
+(0 rows)
+
  -- Also, check if there are any pg_opclass entries that don't seem to have
--- pg_amproc support.
+-- pg_amproc support.  Again, GIN has to be checked separately.
  SELECT amname, opcname, count(*)
  FROM pg_am am JOIN pg_opclass op ON opcmethod = am.oid
       LEFT JOIN pg_amproc p ON amprocfamily = opcfamily AND
           amproclefttype = amprocrighttype AND amproclefttype = opcintype
+WHERE am.amname <> 'gin'
  GROUP BY amname, amsupport, opcname, amprocfamily
  HAVING count(*) != amsupport OR amprocfamily IS NULL;
   amname | opcname | count 
  --------+---------+-------
  (0 rows)
  
+SELECT amname, opcname, count(*)
+FROM pg_am am JOIN pg_opclass op ON opcmethod = am.oid
+     LEFT JOIN pg_amproc p ON amprocfamily = opcfamily AND
+         amproclefttype = amprocrighttype AND amproclefttype = opcintype
+WHERE am.amname = 'gin'
+GROUP BY amname, amsupport, opcname, amprocfamily
+HAVING count(*) < amsupport - 1 OR amprocfamily IS NULL;
+ amname | opcname | count 
+--------+---------+-------
+(0 rows)
+
  -- Unfortunately, we can't check the amproc link very well because the
  -- signature of the function may be different for different support routines
  -- or different base data types.
diff --git a/src/test/regress/expected/tsdicts.out b/src/test/regress/expected/tsdicts.out

index 4b8929361a85e09e6fc30f4bddce7a0612b06f07..3ae6a671dad4dd63deda69b4a41436ba92543aed 100644 (file)
--- a/src/test/regress/expected/tsdicts.out
+++ b/src/test/regress/expected/tsdicts.out
@@ -232,7 +232,7 @@ ALTER TEXT SEARCH CONFIGURATION ispell_tst ALTER MAPPING FOR
  SELECT to_tsvector('ispell_tst', 'Booking the skies after rebookings for footballklubber from a foot');
                                              to_tsvector                                             
  ----------------------------------------------------------------------------------------------------
- 'sky':3 'ball':7 'book':1,5 'foot':7,10 'booking':1,5 'klubber':7 'football':7 'footballklubber':7
+ 'ball':7 'book':1,5 'booking':1,5 'foot':7,10 'football':7 'footballklubber':7 'klubber':7 'sky':3
  (1 row)
  
  SELECT to_tsquery('ispell_tst', 'footballklubber');
@@ -256,7 +256,7 @@ ALTER TEXT SEARCH CONFIGURATION hunspell_tst ALTER MAPPING
  SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footballklubber from a foot');
                                              to_tsvector                                             
  ----------------------------------------------------------------------------------------------------
- 'sky':3 'ball':7 'book':1,5 'foot':7,10 'booking':1,5 'klubber':7 'football':7 'footballklubber':7
+ 'ball':7 'book':1,5 'booking':1,5 'foot':7,10 'football':7 'footballklubber':7 'klubber':7 'sky':3
  (1 row)
  
  SELECT to_tsquery('hunspell_tst', 'footballklubber');
@@ -287,7 +287,7 @@ SELECT to_tsvector('synonym_tst', 'Postgresql is often called as postgres or pgs
  SELECT to_tsvector('synonym_tst', 'Most common mistake is to write Gogle instead of Google');
                         to_tsvector                        
  ----------------------------------------------------------
- 'googl':7,10 'write':6 'common':2 'mistak':3 'instead':8
+ 'common':2 'googl':7,10 'instead':8 'mistak':3 'write':6
  (1 row)
  
  -- test thesaurus in configuration
@@ -307,12 +307,12 @@ SELECT to_tsvector('thesaurus_tst', 'one postgres one two one two three one');
  SELECT to_tsvector('thesaurus_tst', 'Supernovae star is very new star and usually called supernovae (abbrevation SN)');
                           to_tsvector                         
  -------------------------------------------------------------
- 'sn':1,9,11 'new':4 'call':8 'star':5 'usual':7 'abbrev':10
+ 'abbrev':10 'call':8 'new':4 'sn':1,9,11 'star':5 'usual':7
  (1 row)
  
  SELECT to_tsvector('thesaurus_tst', 'Booking tickets is looking like a booking a tickets');
                        to_tsvector                      
  -------------------------------------------------------
- 'card':3,10 'like':6 'look':5 'invit':2,9 'order':1,8
+ 'card':3,10 'invit':2,9 'like':6 'look':5 'order':1,8
  (1 row)
  
diff --git a/src/test/regress/expected/tsearch.out b/src/test/regress/expected/tsearch.out

index 1f7e6ffbafcd38016fa70ecc575c7340f2c68785..468a623e973f7c91a2d48506ee6276a7ca779678 100644 (file)
--- a/src/test/regress/expected/tsearch.out
+++ b/src/test/regress/expected/tsearch.out
@@ -92,6 +92,12 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
      39
  (1 row)
  
+SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
+ count 
+-------
+   494
+(1 row)
+
  create index wowidx on test_tsvector using gist (a);
  SET enable_seqscan=OFF;
  SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
@@ -130,6 +136,12 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
      39
  (1 row)
  
+SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
+ count 
+-------
+   494
+(1 row)
+
  RESET enable_seqscan;
  DROP INDEX wowidx;
  CREATE INDEX wowidx ON test_tsvector USING gin (a);
@@ -170,6 +182,12 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
      39
  (1 row)
  
+SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
+ count 
+-------
+   494
+(1 row)
+
    
  RESET enable_seqscan;
  INSERT INTO test_tsvector VALUES ('???', 'DFG:1A,2B,6C,10 FGH');
@@ -380,7 +398,7 @@ SELECT to_tsvector('english', '345 qwe@efd.r '' http://www.com/ http://aew.werc.
  <i <b> wow  < jqw <> qwerty');
                                                                                                                                                                                                                                                                                                                                                                                                                        to_tsvector                                                                                                                                                                                                                                                                                                                                                                                                                       
  --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- 'ad':17 'dw':19 'jf':39 '234':61 '345':1 '4.2':54,55,56 '455':31 'jqw':64 'qwe':2,18,27,28,35 'wer':36 'wow':63 '-4.2':58,60 'asdf':37 'ewr1':43 'qwer':38 'sdjk':40 '5.005':32 'efd.r':3 'ewri2':44 'hjwer':42 'qwqwe':29 'wefjn':48 'gist.c':52 'gist.h':50 'qwerti':65 '234.435':30 'qwe-wer':34 'readlin':53,57,59 'www.com':4 '+4.0e-10':26 'gist.h.c':51 'rewt/ewr':47 '/?ad=qwe&dw':7,10,14,22 '/wqe-324/ewr':49 'aew.werc.ewr':6 '1aew.werc.ewr':9 '2aew.werc.ewr':11 '3aew.werc.ewr':13 '4aew.werc.ewr':15 '/usr/local/fff':45 '/awdf/dwqe/4325':46 'teodor@stack.net':33 '/?ad=qwe&dw=%20%32':25 '5aew.werc.ewr:8100':16 '6aew.werc.ewr:8100':21 '7aew.werc.ewr:8100':24 'aew.werc.ewr/?ad=qwe&dw':5 '1aew.werc.ewr/?ad=qwe&dw':8 '3aew.werc.ewr/?ad=qwe&dw':12 '6aew.werc.ewr:8100/?ad=qwe&dw':20 '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32':23
+ '+4.0e-10':26 '-4.2':58,60 '/?ad=qwe&dw':7,10,14,22 '/?ad=qwe&dw=%20%32':25 '/awdf/dwqe/4325':46 '/usr/local/fff':45 '/wqe-324/ewr':49 '1aew.werc.ewr':9 '1aew.werc.ewr/?ad=qwe&dw':8 '234':61 '234.435':30 '2aew.werc.ewr':11 '345':1 '3aew.werc.ewr':13 '3aew.werc.ewr/?ad=qwe&dw':12 '4.2':54,55,56 '455':31 '4aew.werc.ewr':15 '5.005':32 '5aew.werc.ewr:8100':16 '6aew.werc.ewr:8100':21 '6aew.werc.ewr:8100/?ad=qwe&dw':20 '7aew.werc.ewr:8100':24 '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32':23 'ad':17 'aew.werc.ewr':6 'aew.werc.ewr/?ad=qwe&dw':5 'asdf':37 'dw':19 'efd.r':3 'ewr1':43 'ewri2':44 'gist.c':52 'gist.h':50 'gist.h.c':51 'hjwer':42 'jf':39 'jqw':64 'qwe':2,18,27,28,35 'qwe-wer':34 'qwer':38 'qwerti':65 'qwqwe':29 'readlin':53,57,59 'rewt/ewr':47 'sdjk':40 'teodor@stack.net':33 'wefjn':48 'wer':36 'wow':63 'www.com':4
  (1 row)
  
  SELECT length(to_tsvector('english', '345 qwe@efd.r '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/?  ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>">
@@ -852,7 +870,7 @@ SET default_text_search_config=simple;
  SELECT to_tsvector('SKIES My booKs');
          to_tsvector         
  ----------------------------
- 'my':2 'books':3 'skies':1
+ 'books':3 'my':2 'skies':1
  (1 row)
  
  SELECT plainto_tsquery('SKIES My booKs');
@@ -871,7 +889,7 @@ SET default_text_search_config=english;
  SELECT to_tsvector('SKIES My booKs');
     to_tsvector    
  ------------------
- 'sky':1 'book':3
+ 'book':3 'sky':1
  (1 row)
  
  SELECT plainto_tsquery('SKIES My booKs');
diff --git a/src/test/regress/expected/tstypes.out b/src/test/regress/expected/tstypes.out

index 4672f099e0ac8613e8abe14709a71283d70b51cf..6284fb6181340dd806089384c8d74e9392869132 100644 (file)
--- a/src/test/regress/expected/tstypes.out
+++ b/src/test/regress/expected/tstypes.out
@@ -44,31 +44,31 @@ SELECT E'''1 \\''2'''::tsvector;
  SELECT E'''1 \\''2''3'::tsvector;
    tsvector   
  -------------
- '3' '1 ''2'
+ '1 ''2' '3'
  (1 row)
  
  SELECT E'''1 \\''2'' 3'::tsvector;
    tsvector   
  -------------
- '3' '1 ''2'
+ '1 ''2' '3'
  (1 row)
  
  SELECT E'''1 \\''2'' '' 3'' 4 '::tsvector;
       tsvector     
  ------------------
- '4' ' 3' '1 ''2'
+ ' 3' '1 ''2' '4'
  (1 row)
  
  SELECT $$'\\as' ab\c ab\\c AB\\\c ab\\\\c$$::tsvector;
                  tsvector                
  ----------------------------------------
- '\\as' 'abc' 'AB\\c' 'ab\\c' 'ab\\\\c'
+ 'AB\\c' '\\as' 'ab\\\\c' 'ab\\c' 'abc'
  (1 row)
  
  SELECT tsvectorin(tsvectorout($$'\\as' ab\c ab\\c AB\\\c ab\\\\c$$::tsvector));
                 tsvectorin               
  ----------------------------------------
- '\\as' 'abc' 'AB\\c' 'ab\\c' 'ab\\\\c'
+ 'AB\\c' '\\as' 'ab\\\\c' 'ab\\c' 'abc'
  (1 row)
  
  SELECT '''w'':4A,3B,2C,1D,5 a:8';
@@ -86,13 +86,13 @@ SELECT 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
  SELECT setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
                          setweight                         
  ----------------------------------------------------------
- 'a':1C,3C 'w':5C,6C,12C,13C 'asd':1C 'zxc':81C,222C,567C
+ 'a':1C,3C 'asd':1C 'w':5C,6C,12C,13C 'zxc':81C,222C,567C
  (1 row)
  
  SELECT strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
       strip     
  ---------------
- 'a' 'w' 'asd'
+ 'a' 'asd' 'w'
  (1 row)
  
  --Base tsquery test
@@ -336,6 +336,12 @@ SELECT $$'\\as'$$::tsquery;
   '\\as'
  (1 row)
  
+SELECT 'a:* & nbb:*ac | doo:a* | goo'::tsquery;
+                 tsquery                  
+------------------------------------------
+ ( 'a':* & 'nbb':*AC | 'doo':*A ) | 'goo'
+(1 row)
+
  SELECT 'a' < 'b & c'::tsquery as "true";
   true 
  ------
@@ -439,12 +445,96 @@ SELECT 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB' as "true";
   t
  (1 row)
  
+SELECT 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & c:*C' as "false";
+ false 
+-------
+ f
+(1 row)
+
+SELECT 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & c:*CB' as "true";
+ true 
+------
+ t
+(1 row)
+
+SELECT 'a b:89  ca:23A,64b cb:80c d:34c'::tsvector @@ 'd:AC & c:*C' as "true";
+ true 
+------
+ t
+(1 row)
+
+SELECT 'a b:89  ca:23A,64c cb:80b d:34c'::tsvector @@ 'd:AC & c:*C' as "true";
+ true 
+------
+ t
+(1 row)
+
+SELECT 'a b:89  ca:23A,64c cb:80b d:34c'::tsvector @@ 'd:AC & c:*B' as "true";
+ true 
+------
+ t
+(1 row)
+
+SELECT 'supernova'::tsvector @@ 'super'::tsquery AS "false";
+ false 
+-------
+ f
+(1 row)
+
+SELECT 'supeanova supernova'::tsvector @@ 'super'::tsquery AS "false";
+ false 
+-------
+ f
+(1 row)
+
+SELECT 'supeznova supernova'::tsvector @@ 'super'::tsquery AS "false";
+ false 
+-------
+ f
+(1 row)
+
+SELECT 'supernova'::tsvector @@ 'super:*'::tsquery AS "true";
+ true 
+------
+ t
+(1 row)
+
+SELECT 'supeanova supernova'::tsvector @@ 'super:*'::tsquery AS "true";
+ true 
+------
+ t
+(1 row)
+
+SELECT 'supeznova supernova'::tsvector @@ 'super:*'::tsquery AS "true";
+ true 
+------
+ t
+(1 row)
+
  SELECT ts_rank(' a:1 s:2C d g'::tsvector, 'a | s');
    ts_rank  
  -----------
   0.0911891
  (1 row)
  
+SELECT ts_rank(' a:1 sa:2C d g'::tsvector, 'a | s');
+  ts_rank  
+-----------
+ 0.0303964
+(1 row)
+
+SELECT ts_rank(' a:1 sa:2C d g'::tsvector, 'a | s:*');
+  ts_rank  
+-----------
+ 0.0911891
+(1 row)
+
+SELECT ts_rank(' a:1 sa:2C d g'::tsvector, 'a | sa:*');
+  ts_rank  
+-----------
+ 0.0911891
+(1 row)
+
  SELECT ts_rank(' a:1 s:2B d g'::tsvector, 'a | s');
   ts_rank  
  ----------
@@ -481,6 +571,30 @@ SELECT ts_rank_cd(' a:1 s:2C d g'::tsvector, 'a | s');
          0.3
  (1 row)
  
+SELECT ts_rank_cd(' a:1 sa:2C d g'::tsvector, 'a | s');
+ ts_rank_cd 
+------------
+        0.1
+(1 row)
+
+SELECT ts_rank_cd(' a:1 sa:2C d g'::tsvector, 'a | s:*');
+ ts_rank_cd 
+------------
+        0.3
+(1 row)
+
+SELECT ts_rank_cd(' a:1 sa:2C d g'::tsvector, 'a | sa:*');
+ ts_rank_cd 
+------------
+        0.3
+(1 row)
+
+SELECT ts_rank_cd(' a:1 sa:3C sab:2c d g'::tsvector, 'a | sa:*');
+ ts_rank_cd 
+------------
+        0.5
+(1 row)
+
  SELECT ts_rank_cd(' a:1 s:2B d g'::tsvector, 'a | s');
   ts_rank_cd 
  ------------
diff --git a/src/test/regress/sql/opr_sanity.sql b/src/test/regress/sql/opr_sanity.sql

index deaa3714feff0977870550f1548109d43254fb51..84b2d800f780c43365a37c7aac50bb9aa46f33aa 100644 (file)
--- a/src/test/regress/sql/opr_sanity.sql
+++ b/src/test/regress/sql/opr_sanity.sql
@@ -746,25 +746,47 @@ WHERE p1.amprocfamily = p3.oid AND p3.opfmethod = p2.oid AND
  
  -- Detect missing pg_amproc entries: should have as many support functions
  -- as AM expects for each datatype combination supported by the opfamily.
+-- GIN is a special case because it has an optional support function.
  
  SELECT p1.amname, p2.opfname, p3.amproclefttype, p3.amprocrighttype
  FROM pg_am AS p1, pg_opfamily AS p2, pg_amproc AS p3
  WHERE p2.opfmethod = p1.oid AND p3.amprocfamily = p2.oid AND
+    p1.amname <> 'gin' AND
      p1.amsupport != (SELECT count(*) FROM pg_amproc AS p4
                       WHERE p4.amprocfamily = p2.oid AND
                             p4.amproclefttype = p3.amproclefttype AND
                             p4.amprocrighttype = p3.amprocrighttype);
  
+-- Similar check for GIN, allowing one optional proc
+
+SELECT p1.amname, p2.opfname, p3.amproclefttype, p3.amprocrighttype
+FROM pg_am AS p1, pg_opfamily AS p2, pg_amproc AS p3
+WHERE p2.opfmethod = p1.oid AND p3.amprocfamily = p2.oid AND
+    p1.amname = 'gin' AND
+    p1.amsupport - 1 >  (SELECT count(*) FROM pg_amproc AS p4
+                         WHERE p4.amprocfamily = p2.oid AND
+                           p4.amproclefttype = p3.amproclefttype AND
+                           p4.amprocrighttype = p3.amprocrighttype);
+
  -- Also, check if there are any pg_opclass entries that don't seem to have
--- pg_amproc support.
+-- pg_amproc support.  Again, GIN has to be checked separately.
  
  SELECT amname, opcname, count(*)
  FROM pg_am am JOIN pg_opclass op ON opcmethod = am.oid
       LEFT JOIN pg_amproc p ON amprocfamily = opcfamily AND
           amproclefttype = amprocrighttype AND amproclefttype = opcintype
+WHERE am.amname <> 'gin'
  GROUP BY amname, amsupport, opcname, amprocfamily
  HAVING count(*) != amsupport OR amprocfamily IS NULL;
  
+SELECT amname, opcname, count(*)
+FROM pg_am am JOIN pg_opclass op ON opcmethod = am.oid
+     LEFT JOIN pg_amproc p ON amprocfamily = opcfamily AND
+         amproclefttype = amprocrighttype AND amproclefttype = opcintype
+WHERE am.amname = 'gin'
+GROUP BY amname, amsupport, opcname, amprocfamily
+HAVING count(*) < amsupport - 1 OR amprocfamily IS NULL;
+
  -- Unfortunately, we can't check the amproc link very well because the
  -- signature of the function may be different for different support routines
  -- or different base data types.
diff --git a/src/test/regress/sql/tsearch.sql b/src/test/regress/sql/tsearch.sql

index 3cf34524d349f0fa8a0746ead37df577b9538fce..dc7427d3b1df30e4e3c1e9e4328aa9b0c99daa09 100644 (file)
--- a/src/test/regress/sql/tsearch.sql
+++ b/src/test/regress/sql/tsearch.sql
@@ -47,6 +47,7 @@ SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
  SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
  SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
  SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
  
  create index wowidx on test_tsvector using gist (a);
  
@@ -58,6 +59,7 @@ SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
  SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
  SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
  SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
  
  RESET enable_seqscan;
  
@@ -73,6 +75,7 @@ SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
  SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
  SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
  SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
    
  RESET enable_seqscan;
  INSERT INTO test_tsvector VALUES ('???', 'DFG:1A,2B,6C,10 FGH');
diff --git a/src/test/regress/sql/tstypes.sql b/src/test/regress/sql/tstypes.sql

index 49afc3b23a285819e609f81b9d364a7c81be46a6..fd7c7024f5e547018323b1766eb229afb8c541d9 100644 (file)
--- a/src/test/regress/sql/tstypes.sql
+++ b/src/test/regress/sql/tstypes.sql
@@ -58,6 +58,7 @@ SELECT '1&(2&(4&(5|6)))'::tsquery;
  SELECT '1&(2&(4&(5|!6)))'::tsquery;
  SELECT E'1&(''2''&('' 4''&(\\|5 | ''6 \\'' !|&'')))'::tsquery;
  SELECT $$'\\as'$$::tsquery;
+SELECT 'a:* & nbb:*ac | doo:a* | goo'::tsquery;
  
  SELECT 'a' < 'b & c'::tsquery as "true";
  SELECT 'a' > 'b & c'::tsquery as "false";
@@ -81,8 +82,23 @@ SELECT 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B' as "true";
  SELECT 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A' as "true";
  SELECT 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C' as "false";
  SELECT 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB' as "true";
+SELECT 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & c:*C' as "false";
+SELECT 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & c:*CB' as "true";
+SELECT 'a b:89  ca:23A,64b cb:80c d:34c'::tsvector @@ 'd:AC & c:*C' as "true";
+SELECT 'a b:89  ca:23A,64c cb:80b d:34c'::tsvector @@ 'd:AC & c:*C' as "true";
+SELECT 'a b:89  ca:23A,64c cb:80b d:34c'::tsvector @@ 'd:AC & c:*B' as "true";
+
+SELECT 'supernova'::tsvector @@ 'super'::tsquery AS "false";
+SELECT 'supeanova supernova'::tsvector @@ 'super'::tsquery AS "false";
+SELECT 'supeznova supernova'::tsvector @@ 'super'::tsquery AS "false";
+SELECT 'supernova'::tsvector @@ 'super:*'::tsquery AS "true";
+SELECT 'supeanova supernova'::tsvector @@ 'super:*'::tsquery AS "true";
+SELECT 'supeznova supernova'::tsvector @@ 'super:*'::tsquery AS "true";
  
  SELECT ts_rank(' a:1 s:2C d g'::tsvector, 'a | s');
+SELECT ts_rank(' a:1 sa:2C d g'::tsvector, 'a | s');
+SELECT ts_rank(' a:1 sa:2C d g'::tsvector, 'a | s:*');
+SELECT ts_rank(' a:1 sa:2C d g'::tsvector, 'a | sa:*');
  SELECT ts_rank(' a:1 s:2B d g'::tsvector, 'a | s');
  SELECT ts_rank(' a:1 s:2 d g'::tsvector, 'a | s');
  SELECT ts_rank(' a:1 s:2C d g'::tsvector, 'a & s');
@@ -90,6 +106,10 @@ SELECT ts_rank(' a:1 s:2B d g'::tsvector, 'a & s');
  SELECT ts_rank(' a:1 s:2 d g'::tsvector, 'a & s');
  
  SELECT ts_rank_cd(' a:1 s:2C d g'::tsvector, 'a | s');
+SELECT ts_rank_cd(' a:1 sa:2C d g'::tsvector, 'a | s');
+SELECT ts_rank_cd(' a:1 sa:2C d g'::tsvector, 'a | s:*');
+SELECT ts_rank_cd(' a:1 sa:2C d g'::tsvector, 'a | sa:*');
+SELECT ts_rank_cd(' a:1 sa:3C sab:2c d g'::tsvector, 'a | sa:*');
  SELECT ts_rank_cd(' a:1 s:2B d g'::tsvector, 'a | s');
  SELECT ts_rank_cd(' a:1 s:2 d g'::tsvector, 'a | s');
  SELECT ts_rank_cd(' a:1 s:2C d g'::tsvector, 'a & s');
author	Tom Lane <tgl@sss.pgh.pa.us>
	Fri, 16 May 2008 16:31:02 +0000 (16:31 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Fri, 16 May 2008 16:31:02 +0000 (16:31 +0000)
doc/src/sgml/datatype.sgml		patch \| blob \| blame \| history
doc/src/sgml/gin.sgml		patch \| blob \| blame \| history
doc/src/sgml/textsearch.sgml		patch \| blob \| blame \| history
doc/src/sgml/xindex.sgml		patch \| blob \| blame \| history
src/backend/access/gin/ginget.c		patch \| blob \| blame \| history
src/backend/access/gin/ginscan.c		patch \| blob \| blame \| history
src/backend/access/gin/ginutil.c		patch \| blob \| blame \| history
src/backend/optimizer/path/indxpath.c		patch \| blob \| blame \| history
src/backend/tsearch/to_tsany.c		patch \| blob \| blame \| history
src/backend/tsearch/ts_parse.c		patch \| blob \| blame \| history
src/backend/utils/adt/tsginidx.c		patch \| blob \| blame \| history
src/backend/utils/adt/tsgistidx.c		patch \| blob \| blame \| history
src/backend/utils/adt/tsquery.c		patch \| blob \| blame \| history
src/backend/utils/adt/tsquery_util.c		patch \| blob \| blame \| history
src/backend/utils/adt/tsrank.c		patch \| blob \| blame \| history
src/backend/utils/adt/tsvector.c		patch \| blob \| blame \| history
src/backend/utils/adt/tsvector_op.c		patch \| blob \| blame \| history
src/include/access/gin.h		patch \| blob \| blame \| history
src/include/catalog/catversion.h		patch \| blob \| blame \| history
src/include/catalog/pg_am.h		patch \| blob \| blame \| history
src/include/catalog/pg_amproc.h		patch \| blob \| blame \| history
src/include/catalog/pg_proc.h		patch \| blob \| blame \| history
src/include/tsearch/ts_public.h		patch \| blob \| blame \| history
src/include/tsearch/ts_type.h		patch \| blob \| blame \| history
src/include/tsearch/ts_utils.h		patch \| blob \| blame \| history
src/test/regress/expected/opr_sanity.out		patch \| blob \| blame \| history
src/test/regress/expected/tsdicts.out		patch \| blob \| blame \| history
src/test/regress/expected/tsearch.out		patch \| blob \| blame \| history
src/test/regress/expected/tstypes.out		patch \| blob \| blame \| history
src/test/regress/sql/opr_sanity.sql		patch \| blob \| blame \| history
src/test/regress/sql/tsearch.sql		patch \| blob \| blame \| history
src/test/regress/sql/tstypes.sql		patch \| blob \| blame \| history