summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane2013-02-13 19:07:26 +0000
committerTom Lane2013-02-13 19:07:26 +0000
commit697f5cadf5d81eeee3f3018d711ef33261dc7cd5 (patch)
tree3c736f0a8e9eb1e8f4e707db6b7a9d5607003eb6
parent06af3f96568646e7f878622bfa6e73f9eaa5f3f0 (diff)
Fix contrib/pg_trgm's similarity() function for trigram-free strings.
Cases such as similarity('', '') produced a NaN result due to computing 0/0. Per discussion, make it return zero instead. This appears to be the basic cause of bug #7867 from Michele Baravalle, although it remains unclear why her installation doesn't think Cyrillic letters are letters. Back-patch to all active branches.
-rw-r--r--contrib/pg_trgm/expected/pg_trgm.out6
-rw-r--r--contrib/pg_trgm/sql/pg_trgm.sql2
-rw-r--r--contrib/pg_trgm/trgm_op.c8
3 files changed, 14 insertions, 2 deletions
diff --git a/contrib/pg_trgm/expected/pg_trgm.out b/contrib/pg_trgm/expected/pg_trgm.out
index 98385347295..641e8cecf6b 100644
--- a/contrib/pg_trgm/expected/pg_trgm.out
+++ b/contrib/pg_trgm/expected/pg_trgm.out
@@ -59,6 +59,12 @@ select similarity('wow',' WOW ');
1
(1 row)
+select similarity('---', '####---');
+ similarity
+------------
+ 0
+(1 row)
+
CREATE TABLE test_trgm(t text);
\copy test_trgm from 'data/trgm.data
select t,similarity(t,'qwertyu0988') as sml from test_trgm where t % 'qwertyu0988' order by sml desc, t;
diff --git a/contrib/pg_trgm/sql/pg_trgm.sql b/contrib/pg_trgm/sql/pg_trgm.sql
index 13045827ac7..1bcf2c9146c 100644
--- a/contrib/pg_trgm/sql/pg_trgm.sql
+++ b/contrib/pg_trgm/sql/pg_trgm.sql
@@ -19,6 +19,8 @@ select show_trgm('a b C0*%^');
select similarity('wow','WOWa ');
select similarity('wow',' WOW ');
+select similarity('---', '####---');
+
CREATE TABLE test_trgm(t text);
\copy test_trgm from 'data/trgm.data
diff --git a/contrib/pg_trgm/trgm_op.c b/contrib/pg_trgm/trgm_op.c
index 19b6747d68b..30965f818cb 100644
--- a/contrib/pg_trgm/trgm_op.c
+++ b/contrib/pg_trgm/trgm_op.c
@@ -311,6 +311,10 @@ cnt_sml(TRGM *trg1, TRGM *trg2)
len1 = ARRNELEM(trg1);
len2 = ARRNELEM(trg2);
+ /* explicit test is needed to avoid 0/0 division when both lengths are 0 */
+ if (len1 <= 0 || len2 <= 0)
+ return (float4) 0.0;
+
while (ptr1 - GETARR(trg1) < len1 && ptr2 - GETARR(trg2) < len2)
{
int res = CMPTRGM(ptr1, ptr2);
@@ -328,9 +332,9 @@ cnt_sml(TRGM *trg1, TRGM *trg2)
}
#ifdef DIVUNION
- return ((((float4) count) / ((float4) (len1 + len2 - count))));
+ return ((float4) count) / ((float4) (len1 + len2 - count));
#else
- return (((float) count) / ((float) ((len1 > len2) ? len1 : len2)));
+ return ((float4) count) / ((float4) ((len1 > len2) ? len1 : len2));
#endif
}