diff options
| author | Bruce Momjian | 2001-08-07 18:16:01 +0000 |
|---|---|---|
| committer | Bruce Momjian | 2001-08-07 18:16:01 +0000 |
| commit | cdd02cdf00f020292cdcc8dafa5475e0149c34a6 (patch) | |
| tree | 317c5c67dbecac152c6c67766ac86d3f5c8e9536 /contrib/fuzzystrmatch | |
| parent | fb5b85a8f2663883f1e3287680dbe0db54e1b617 (diff) | |
Sorry - I should have gotten to this sooner. Here's a patch which you should
be able to apply against what you just committed. It rolls soundex into
fuzzystrmatch.
Remove soundex/metaphone and merge into fuzzystrmatch.
Joe Conway
Diffstat (limited to 'contrib/fuzzystrmatch')
| -rw-r--r-- | contrib/fuzzystrmatch/README.fuzzystrmatch | 10 | ||||
| -rw-r--r-- | contrib/fuzzystrmatch/README.soundex | 62 | ||||
| -rw-r--r-- | contrib/fuzzystrmatch/fuzzystrmatch.c | 68 | ||||
| -rw-r--r-- | contrib/fuzzystrmatch/fuzzystrmatch.h | 29 | ||||
| -rw-r--r-- | contrib/fuzzystrmatch/fuzzystrmatch.sql.in | 6 |
5 files changed, 165 insertions, 10 deletions
diff --git a/contrib/fuzzystrmatch/README.fuzzystrmatch b/contrib/fuzzystrmatch/README.fuzzystrmatch index 9109ea7e06e..8d310b4ade4 100644 --- a/contrib/fuzzystrmatch/README.fuzzystrmatch +++ b/contrib/fuzzystrmatch/README.fuzzystrmatch @@ -20,6 +20,11 @@ * Metaphone was originally created by Lawrence Philips and presented in article * in "Computer Language" December 1990 issue. * + * soundex() + * ----------- + * Folded existing soundex contrib into this one. Renamed text_soundex() (C function) + * to soundex() for consistency. + * * Permission to use, copy, modify, and distribute this software and its * documentation for any purpose, without fee, and without a written agreement * is hereby granted, provided that the above copyright notice and this @@ -40,12 +45,15 @@ */ -Version 0.1 (3 August, 2001): +Version 0.2 (7 August, 2001): Functions to calculate the degree to which two strings match in a "fuzzy" way Tested under Linux (Red Hat 6.2 and 7.0) and PostgreSQL 7.2devel Release Notes: + Version 0.2 + - folded soundex contrib into this one + Version 0.1 - initial release diff --git a/contrib/fuzzystrmatch/README.soundex b/contrib/fuzzystrmatch/README.soundex new file mode 100644 index 00000000000..b9a61495422 --- /dev/null +++ b/contrib/fuzzystrmatch/README.soundex @@ -0,0 +1,62 @@ +NOTE: Modified August 07, 2001 by Joe Conway. Updated for accuracy + after combining soundex code into the fuzzystrmatch contrib +--------------------------------------------------------------------- +The Soundex system is a method of matching similar sounding names +(or any words) to the same code. It was initially used by the +United States Census in 1880, 1900, and 1910, but it has little use +beyond English names (or the English pronunciation of names), and +it is not a linguistic tool. + +The following are some usage examples: + +SELECT soundex('hello world!'); + +CREATE TABLE s (nm text)\g + +insert into s values ('john')\g +insert into s values ('joan')\g +insert into s values ('wobbly')\g + +select * from s +where soundex(nm) = soundex('john')\g + +select nm from s a, s b +where soundex(a.nm) = soundex(b.nm) +and a.oid <> b.oid\g + +CREATE FUNCTION text_sx_eq(text, text) RETURNS bool AS +'select soundex($1) = soundex($2)' +LANGUAGE 'sql'\g + +CREATE FUNCTION text_sx_lt(text,text) RETURNS bool AS +'select soundex($1) < soundex($2)' +LANGUAGE 'sql'\g + +CREATE FUNCTION text_sx_gt(text,text) RETURNS bool AS +'select soundex($1) > soundex($2)' +LANGUAGE 'sql'; + +CREATE FUNCTION text_sx_le(text,text) RETURNS bool AS +'select soundex($1) <= soundex($2)' +LANGUAGE 'sql'; + +CREATE FUNCTION text_sx_ge(text,text) RETURNS bool AS +'select soundex($1) >= soundex($2)' +LANGUAGE 'sql'; + +CREATE FUNCTION text_sx_ne(text,text) RETURNS bool AS +'select soundex($1) <> soundex($2)' +LANGUAGE 'sql'; + +DROP OPERATOR #= (text,text)\g + +CREATE OPERATOR #= (leftarg=text, rightarg=text, procedure=text_sx_eq, +commutator=text_sx_eq)\g + +SELECT * +FROM s +WHERE text_sx_eq(nm,'john')\g + +SELECT * +from s +where s.nm #= 'john'; diff --git a/contrib/fuzzystrmatch/fuzzystrmatch.c b/contrib/fuzzystrmatch/fuzzystrmatch.c index 80e9e69cfad..4a2d3f932e7 100644 --- a/contrib/fuzzystrmatch/fuzzystrmatch.c +++ b/contrib/fuzzystrmatch/fuzzystrmatch.c @@ -629,3 +629,71 @@ int _metaphone ( return(META_SUCCESS); } /* END metaphone */ + + +/* + * SQL function: soundex(text) returns text + */ +PG_FUNCTION_INFO_V1(soundex); + +Datum +soundex(PG_FUNCTION_ARGS) +{ + char outstr[SOUNDEX_LEN + 1]; + char *arg; + + arg = _textout(PG_GETARG_TEXT_P(0)); + + _soundex(arg, outstr); + + PG_RETURN_TEXT_P(_textin(outstr)); +} + +static void +_soundex(const char *instr, char *outstr) +{ + int count; + + AssertArg(instr); + AssertArg(outstr); + + outstr[SOUNDEX_LEN] = '\0'; + + /* Skip leading non-alphabetic characters */ + while (!isalpha((unsigned char) instr[0]) && instr[0]) + ++instr; + + /* No string left */ + if (!instr[0]) + { + outstr[0] = (char) 0; + return; + } + + /* Take the first letter as is */ + *outstr++ = (char) toupper((unsigned char) *instr++); + + count = 1; + while (*instr && count < SOUNDEX_LEN) + { + if (isalpha((unsigned char) *instr) && + soundex_code(*instr) != soundex_code(*(instr - 1))) + { + *outstr = soundex_code(instr[0]); + if (*outstr != '0') + { + ++outstr; + ++count; + } + } + ++instr; + } + + /* Fill with 0's */ + while (count < SOUNDEX_LEN) + { + *outstr = '0'; + ++outstr; + ++count; + } +} diff --git a/contrib/fuzzystrmatch/fuzzystrmatch.h b/contrib/fuzzystrmatch/fuzzystrmatch.h index 5ed9c3746e3..9a9be1e9a01 100644 --- a/contrib/fuzzystrmatch/fuzzystrmatch.h +++ b/contrib/fuzzystrmatch/fuzzystrmatch.h @@ -51,31 +51,42 @@ #include "utils/builtins.h" -#define MAX_LEVENSHTEIN_STRLEN 255 -#define MAX_METAPHONE_STRLEN 255 - -typedef struct dynmatrix -{ - int value; -} dynmat; - /* * External declarations */ extern Datum levenshtein(PG_FUNCTION_ARGS); extern Datum metaphone(PG_FUNCTION_ARGS); +extern Datum soundex(PG_FUNCTION_ARGS); /* - * Internal declarations + * Soundex */ +static void _soundex(const char *instr, char *outstr); +#define SOUNDEX_LEN 4 +#define _textin(str) DirectFunctionCall1(textin, CStringGetDatum(str)) +#define _textout(str) DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(str))) +/* ABCDEFGHIJKLMNOPQRSTUVWXYZ */ +static const char *soundex_table = "01230120022455012623010202"; +#define soundex_code(letter) soundex_table[toupper((unsigned char) (letter)) - 'A'] + + +/* + * Levenshtein + */ #define STRLEN(p) strlen(p) #define CHAREQ(p1, p2) (*(p1) == *(p2)) #define NextChar(p) ((p)++) +#define MAX_LEVENSHTEIN_STRLEN 255 + +/* + * Metaphone + */ +#define MAX_METAPHONE_STRLEN 255 /* * Original code by Michael G Schwern starts here. diff --git a/contrib/fuzzystrmatch/fuzzystrmatch.sql.in b/contrib/fuzzystrmatch/fuzzystrmatch.sql.in index 4125b75d144..b02f1b28ebc 100644 --- a/contrib/fuzzystrmatch/fuzzystrmatch.sql.in +++ b/contrib/fuzzystrmatch/fuzzystrmatch.sql.in @@ -3,3 +3,9 @@ CREATE FUNCTION levenshtein (text,text) RETURNS int CREATE FUNCTION metaphone (text,int) RETURNS text AS 'MODULE_PATHNAME','metaphone' LANGUAGE 'c' with (iscachable, isstrict); + +CREATE FUNCTION soundex(text) RETURNS text + AS 'MODULE_PATHNAME', 'soundex' LANGUAGE 'c' with (iscachable, isstrict); + +CREATE FUNCTION text_soundex(text) RETURNS text + AS 'MODULE_PATHNAME', 'soundex' LANGUAGE 'c'; |
