summaryrefslogtreecommitdiff
path: root/contrib/fuzzystrmatch
diff options
context:
space:
mode:
authorBruce Momjian2001-08-07 18:16:01 +0000
committerBruce Momjian2001-08-07 18:16:01 +0000
commitcdd02cdf00f020292cdcc8dafa5475e0149c34a6 (patch)
tree317c5c67dbecac152c6c67766ac86d3f5c8e9536 /contrib/fuzzystrmatch
parentfb5b85a8f2663883f1e3287680dbe0db54e1b617 (diff)
Sorry - I should have gotten to this sooner. Here's a patch which you should
be able to apply against what you just committed. It rolls soundex into fuzzystrmatch. Remove soundex/metaphone and merge into fuzzystrmatch. Joe Conway
Diffstat (limited to 'contrib/fuzzystrmatch')
-rw-r--r--contrib/fuzzystrmatch/README.fuzzystrmatch10
-rw-r--r--contrib/fuzzystrmatch/README.soundex62
-rw-r--r--contrib/fuzzystrmatch/fuzzystrmatch.c68
-rw-r--r--contrib/fuzzystrmatch/fuzzystrmatch.h29
-rw-r--r--contrib/fuzzystrmatch/fuzzystrmatch.sql.in6
5 files changed, 165 insertions, 10 deletions
diff --git a/contrib/fuzzystrmatch/README.fuzzystrmatch b/contrib/fuzzystrmatch/README.fuzzystrmatch
index 9109ea7e06e..8d310b4ade4 100644
--- a/contrib/fuzzystrmatch/README.fuzzystrmatch
+++ b/contrib/fuzzystrmatch/README.fuzzystrmatch
@@ -20,6 +20,11 @@
* Metaphone was originally created by Lawrence Philips and presented in article
* in "Computer Language" December 1990 issue.
*
+ * soundex()
+ * -----------
+ * Folded existing soundex contrib into this one. Renamed text_soundex() (C function)
+ * to soundex() for consistency.
+ *
* Permission to use, copy, modify, and distribute this software and its
* documentation for any purpose, without fee, and without a written agreement
* is hereby granted, provided that the above copyright notice and this
@@ -40,12 +45,15 @@
*/
-Version 0.1 (3 August, 2001):
+Version 0.2 (7 August, 2001):
Functions to calculate the degree to which two strings match in a "fuzzy" way
Tested under Linux (Red Hat 6.2 and 7.0) and PostgreSQL 7.2devel
Release Notes:
+ Version 0.2
+ - folded soundex contrib into this one
+
Version 0.1
- initial release
diff --git a/contrib/fuzzystrmatch/README.soundex b/contrib/fuzzystrmatch/README.soundex
new file mode 100644
index 00000000000..b9a61495422
--- /dev/null
+++ b/contrib/fuzzystrmatch/README.soundex
@@ -0,0 +1,62 @@
+NOTE: Modified August 07, 2001 by Joe Conway. Updated for accuracy
+ after combining soundex code into the fuzzystrmatch contrib
+---------------------------------------------------------------------
+The Soundex system is a method of matching similar sounding names
+(or any words) to the same code. It was initially used by the
+United States Census in 1880, 1900, and 1910, but it has little use
+beyond English names (or the English pronunciation of names), and
+it is not a linguistic tool.
+
+The following are some usage examples:
+
+SELECT soundex('hello world!');
+
+CREATE TABLE s (nm text)\g
+
+insert into s values ('john')\g
+insert into s values ('joan')\g
+insert into s values ('wobbly')\g
+
+select * from s
+where soundex(nm) = soundex('john')\g
+
+select nm from s a, s b
+where soundex(a.nm) = soundex(b.nm)
+and a.oid <> b.oid\g
+
+CREATE FUNCTION text_sx_eq(text, text) RETURNS bool AS
+'select soundex($1) = soundex($2)'
+LANGUAGE 'sql'\g
+
+CREATE FUNCTION text_sx_lt(text,text) RETURNS bool AS
+'select soundex($1) < soundex($2)'
+LANGUAGE 'sql'\g
+
+CREATE FUNCTION text_sx_gt(text,text) RETURNS bool AS
+'select soundex($1) > soundex($2)'
+LANGUAGE 'sql';
+
+CREATE FUNCTION text_sx_le(text,text) RETURNS bool AS
+'select soundex($1) <= soundex($2)'
+LANGUAGE 'sql';
+
+CREATE FUNCTION text_sx_ge(text,text) RETURNS bool AS
+'select soundex($1) >= soundex($2)'
+LANGUAGE 'sql';
+
+CREATE FUNCTION text_sx_ne(text,text) RETURNS bool AS
+'select soundex($1) <> soundex($2)'
+LANGUAGE 'sql';
+
+DROP OPERATOR #= (text,text)\g
+
+CREATE OPERATOR #= (leftarg=text, rightarg=text, procedure=text_sx_eq,
+commutator=text_sx_eq)\g
+
+SELECT *
+FROM s
+WHERE text_sx_eq(nm,'john')\g
+
+SELECT *
+from s
+where s.nm #= 'john';
diff --git a/contrib/fuzzystrmatch/fuzzystrmatch.c b/contrib/fuzzystrmatch/fuzzystrmatch.c
index 80e9e69cfad..4a2d3f932e7 100644
--- a/contrib/fuzzystrmatch/fuzzystrmatch.c
+++ b/contrib/fuzzystrmatch/fuzzystrmatch.c
@@ -629,3 +629,71 @@ int _metaphone (
return(META_SUCCESS);
} /* END metaphone */
+
+
+/*
+ * SQL function: soundex(text) returns text
+ */
+PG_FUNCTION_INFO_V1(soundex);
+
+Datum
+soundex(PG_FUNCTION_ARGS)
+{
+ char outstr[SOUNDEX_LEN + 1];
+ char *arg;
+
+ arg = _textout(PG_GETARG_TEXT_P(0));
+
+ _soundex(arg, outstr);
+
+ PG_RETURN_TEXT_P(_textin(outstr));
+}
+
+static void
+_soundex(const char *instr, char *outstr)
+{
+ int count;
+
+ AssertArg(instr);
+ AssertArg(outstr);
+
+ outstr[SOUNDEX_LEN] = '\0';
+
+ /* Skip leading non-alphabetic characters */
+ while (!isalpha((unsigned char) instr[0]) && instr[0])
+ ++instr;
+
+ /* No string left */
+ if (!instr[0])
+ {
+ outstr[0] = (char) 0;
+ return;
+ }
+
+ /* Take the first letter as is */
+ *outstr++ = (char) toupper((unsigned char) *instr++);
+
+ count = 1;
+ while (*instr && count < SOUNDEX_LEN)
+ {
+ if (isalpha((unsigned char) *instr) &&
+ soundex_code(*instr) != soundex_code(*(instr - 1)))
+ {
+ *outstr = soundex_code(instr[0]);
+ if (*outstr != '0')
+ {
+ ++outstr;
+ ++count;
+ }
+ }
+ ++instr;
+ }
+
+ /* Fill with 0's */
+ while (count < SOUNDEX_LEN)
+ {
+ *outstr = '0';
+ ++outstr;
+ ++count;
+ }
+}
diff --git a/contrib/fuzzystrmatch/fuzzystrmatch.h b/contrib/fuzzystrmatch/fuzzystrmatch.h
index 5ed9c3746e3..9a9be1e9a01 100644
--- a/contrib/fuzzystrmatch/fuzzystrmatch.h
+++ b/contrib/fuzzystrmatch/fuzzystrmatch.h
@@ -51,31 +51,42 @@
#include "utils/builtins.h"
-#define MAX_LEVENSHTEIN_STRLEN 255
-#define MAX_METAPHONE_STRLEN 255
-
-typedef struct dynmatrix
-{
- int value;
-} dynmat;
-
/*
* External declarations
*/
extern Datum levenshtein(PG_FUNCTION_ARGS);
extern Datum metaphone(PG_FUNCTION_ARGS);
+extern Datum soundex(PG_FUNCTION_ARGS);
/*
- * Internal declarations
+ * Soundex
*/
+static void _soundex(const char *instr, char *outstr);
+#define SOUNDEX_LEN 4
+#define _textin(str) DirectFunctionCall1(textin, CStringGetDatum(str))
+#define _textout(str) DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(str)))
+/* ABCDEFGHIJKLMNOPQRSTUVWXYZ */
+static const char *soundex_table = "01230120022455012623010202";
+#define soundex_code(letter) soundex_table[toupper((unsigned char) (letter)) - 'A']
+
+
+/*
+ * Levenshtein
+ */
#define STRLEN(p) strlen(p)
#define CHAREQ(p1, p2) (*(p1) == *(p2))
#define NextChar(p) ((p)++)
+#define MAX_LEVENSHTEIN_STRLEN 255
+
+/*
+ * Metaphone
+ */
+#define MAX_METAPHONE_STRLEN 255
/*
* Original code by Michael G Schwern starts here.
diff --git a/contrib/fuzzystrmatch/fuzzystrmatch.sql.in b/contrib/fuzzystrmatch/fuzzystrmatch.sql.in
index 4125b75d144..b02f1b28ebc 100644
--- a/contrib/fuzzystrmatch/fuzzystrmatch.sql.in
+++ b/contrib/fuzzystrmatch/fuzzystrmatch.sql.in
@@ -3,3 +3,9 @@ CREATE FUNCTION levenshtein (text,text) RETURNS int
CREATE FUNCTION metaphone (text,int) RETURNS text
AS 'MODULE_PATHNAME','metaphone' LANGUAGE 'c' with (iscachable, isstrict);
+
+CREATE FUNCTION soundex(text) RETURNS text
+ AS 'MODULE_PATHNAME', 'soundex' LANGUAGE 'c' with (iscachable, isstrict);
+
+CREATE FUNCTION text_soundex(text) RETURNS text
+ AS 'MODULE_PATHNAME', 'soundex' LANGUAGE 'c';