Further cleanup of ts_headline code.
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 9 Apr 2020 19:38:43 +0000 (15:38 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 9 Apr 2020 19:38:43 +0000 (15:38 -0400)
Suppress a probably-meaningless uninitialized-variable warning
(induced by my previous patch, I'm sorry to say).

Improve mark_hl_fragments()'s test for overlapping cover strings:
it failed to consider the possibility that the current string is
strictly within another one.  That's unlikely given the preceding
splitting into MaxWords fragments, but I don't think it's impossible.

Discussion: https://postgr.es/m/16345-2e0cf5cddbdcd3b4@postgresql.org

src/backend/tsearch/wparser_def.c

index e5c4d7cb04911a87cb834f82c453ecafb5f8b199..48e55e141a4115945865e8002b38c77403e9eeee 100644 (file)
@@ -2337,22 +2337,24 @@ mark_hl_fragments(HeadlineParsedText *prs, TSQuery query, bool highlightall,
                        /* Mark the chosen fragments (covers) */
                        mark_fragment(prs, highlightall, startpos, endpos);
                        num_f++;
-                       /* exclude overlapping covers */
+                       /* Exclude covers overlapping this one from future consideration */
                        for (i = 0; i < numcovers; i++)
                        {
                                if (i != minI &&
-                                       ((covers[i].startpos >= covers[minI].startpos &&
-                                         covers[i].startpos <= covers[minI].endpos) ||
-                                        (covers[i].endpos >= covers[minI].startpos &&
-                                         covers[i].endpos <= covers[minI].endpos)))
+                                       ((covers[i].startpos >= startpos &&
+                                         covers[i].startpos <= endpos) ||
+                                        (covers[i].endpos >= startpos &&
+                                         covers[i].endpos <= endpos) ||
+                                        (covers[i].startpos < startpos &&
+                                         covers[i].endpos > endpos)))
                                        covers[i].excluded = true;
                        }
                }
                else
-                       break;
+                       break;                          /* no selectable covers remain */
        }
 
-       /* show at least min_words if we have not marked anything */
+       /* show the first min_words words if we have not marked anything */
        if (num_f <= 0)
        {
                startpos = endpos = curlen = 0;
@@ -2510,6 +2512,7 @@ mark_hl_words(HeadlineParsedText *prs, TSQuery query, bool highlightall,
                if (bestlen < 0)
                {
                        curlen = 0;
+                       pose = 0;
                        for (i = 0; i < prs->curwords && curlen < min_words; i++)
                        {
                                if (!NONWORDTOKEN(prs->words[i].type))