Improve make_tsvector() to handle empty input, and simplify its callers.
authorTom Lane <tgl@sss.pgh.pa.us>
Tue, 18 Jul 2017 17:13:47 +0000 (13:13 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Tue, 18 Jul 2017 17:13:47 +0000 (13:13 -0400)
It seemed a bit silly that each caller of make_tsvector() was laboriously
special-casing the situation where no lexemes were found, when it would
be easy and much more bullet-proof to make make_tsvector() handle that.

src/backend/tsearch/to_tsany.c
src/backend/utils/adt/tsvector_op.c

index b410a49908add20e0ccf2a15dc14f9f1ab2a8336..35d9ab276cfd7a76a638ff6f50fbb9d4c2cf3e55 100644 (file)
@@ -149,6 +149,8 @@ uniqueWORD(ParsedWord *a, int32 l)
 
 /*
  * make value of tsvector, given parsed text
+ *
+ * Note: frees prs->words and subsidiary data.
  */
 TSVector
 make_tsvector(ParsedText *prs)
@@ -162,7 +164,11 @@ make_tsvector(ParsedText *prs)
    char       *str;
    int         stroff;
 
-   prs->curwords = uniqueWORD(prs->words, prs->curwords);
+   /* Merge duplicate words */
+   if (prs->curwords > 0)
+       prs->curwords = uniqueWORD(prs->words, prs->curwords);
+
+   /* Determine space needed */
    for (i = 0; i < prs->curwords; i++)
    {
        lenstr += prs->words[i].len;
@@ -217,7 +223,10 @@ make_tsvector(ParsedText *prs)
            ptr->haspos = 0;
        ptr++;
    }
-   pfree(prs->words);
+
+   if (prs->words)
+       pfree(prs->words);
+
    return in;
 }
 
@@ -231,26 +240,19 @@ to_tsvector_byid(PG_FUNCTION_ARGS)
 
    prs.lenwords = VARSIZE_ANY_EXHDR(in) / 6;   /* just estimation of word's
                                                 * number */
-   if (prs.lenwords == 0)
+   if (prs.lenwords < 2)
        prs.lenwords = 2;
    prs.curwords = 0;
    prs.pos = 0;
    prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
 
    parsetext(cfgId, &prs, VARDATA_ANY(in), VARSIZE_ANY_EXHDR(in));
+
    PG_FREE_IF_COPY(in, 1);
 
-   if (prs.curwords)
-       out = make_tsvector(&prs);
-   else
-   {
-       pfree(prs.words);
-       out = palloc(CALCDATASIZE(0, 0));
-       SET_VARSIZE(out, CALCDATASIZE(0, 0));
-       out->size = 0;
-   }
+   out = make_tsvector(&prs);
 
-   PG_RETURN_POINTER(out);
+   PG_RETURN_TSVECTOR(out);
 }
 
 Datum
@@ -281,21 +283,10 @@ jsonb_to_tsvector_byid(PG_FUNCTION_ARGS)
 
    iterate_jsonb_string_values(jb, &state, add_to_tsvector);
 
-   if (prs.curwords > 0)
-       result = make_tsvector(&prs);
-   else
-   {
-       /*
-        * There weren't any string elements in jsonb, so we need to return an
-        * empty vector
-        */
-       result = palloc(CALCDATASIZE(0, 0));
-       SET_VARSIZE(result, CALCDATASIZE(0, 0));
-       result->size = 0;
-   }
-
    PG_FREE_IF_COPY(jb, 1);
 
+   result = make_tsvector(&prs);
+
    PG_RETURN_TSVECTOR(result);
 }
 
@@ -327,21 +318,10 @@ json_to_tsvector_byid(PG_FUNCTION_ARGS)
 
    iterate_json_string_values(json, &state, add_to_tsvector);
 
-   if (prs.curwords > 0)
-       result = make_tsvector(&prs);
-   else
-   {
-       /*
-        * There weren't any string elements in json, so we need to return an
-        * empty vector
-        */
-       result = palloc(CALCDATASIZE(0, 0));
-       SET_VARSIZE(result, CALCDATASIZE(0, 0));
-       result->size = 0;
-   }
-
    PG_FREE_IF_COPY(json, 1);
 
+   result = make_tsvector(&prs);
+
    PG_RETURN_TSVECTOR(result);
 }
 
index 2d7407c29cbde9671fc43ecbcbd462149ce9ee7e..822520299ed525633f474858ac816fe0e464ca4e 100644 (file)
@@ -2579,28 +2579,15 @@ tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column)
    }
 
    /* make tsvector value */
-   if (prs.curwords)
-   {
-       datum = PointerGetDatum(make_tsvector(&prs));
-       isnull = false;
-       rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
-                                            1, &tsvector_attr_num,
-                                            &datum, &isnull);
-       pfree(DatumGetPointer(datum));
-   }
-   else
-   {
-       TSVector    out = palloc(CALCDATASIZE(0, 0));
-
-       SET_VARSIZE(out, CALCDATASIZE(0, 0));
-       out->size = 0;
-       datum = PointerGetDatum(out);
-       isnull = false;
-       rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
-                                            1, &tsvector_attr_num,
-                                            &datum, &isnull);
-       pfree(prs.words);
-   }
+   datum = TSVectorGetDatum(make_tsvector(&prs));
+   isnull = false;
+
+   /* and insert it into tuple */
+   rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
+                                        1, &tsvector_attr_num,
+                                        &datum, &isnull);
+
+   pfree(DatumGetPointer(datum));
 
    return PointerGetDatum(rettuple);
 }