diff options
author | Thomas G. Lockhart | 2002-06-11 15:44:38 +0000 |
---|---|---|
committer | Thomas G. Lockhart | 2002-06-11 15:44:38 +0000 |
commit | ea01a451ccec55cada68cb45a54519a750fd3d60 (patch) | |
tree | e6f38191dbf7383eaf35f41009b37a13b2ce1fe7 /src/backend | |
parent | 090dd22de67e6a7e50cfc3efb92a8472fa8750ba (diff) |
Implement SQL99 OVERLAY(). Allows substitution of a substring in a string.
Implement SQL99 SIMILAR TO as a synonym for our existing operator "~".
Implement SQL99 regular expression SUBSTRING(string FROM pat FOR escape).
Extend the definition to make the FOR clause optional.
Define textregexsubstr() to actually implement this feature.
Update the regression test to include these new string features.
All tests pass.
Rename the regular expression support routines from "pg95_xxx" to "pg_xxx".
Define CREATE CHARACTER SET in the parser per SQL99. No implementation yet.
Diffstat (limited to 'src/backend')
-rw-r--r-- | src/backend/parser/gram.y | 98 | ||||
-rw-r--r-- | src/backend/parser/keywords.c | 6 | ||||
-rw-r--r-- | src/backend/parser/scan.l | 26 | ||||
-rw-r--r-- | src/backend/regex/regcomp.c | 8 | ||||
-rw-r--r-- | src/backend/regex/regerror.c | 4 | ||||
-rw-r--r-- | src/backend/regex/regexec.c | 4 | ||||
-rw-r--r-- | src/backend/regex/regfree.c | 2 | ||||
-rw-r--r-- | src/backend/regex/retest.c | 6 | ||||
-rw-r--r-- | src/backend/utils/adt/regexp.c | 99 |
9 files changed, 203 insertions, 50 deletions
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index fbf63db85e2..b8fd3d03489 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -11,7 +11,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.320 2002/06/11 13:40:50 wieck Exp $ + * $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.321 2002/06/11 15:41:37 thomas Exp $ * * HISTORY * AUTHOR DATE MAJOR EVENT @@ -227,10 +227,10 @@ static void doNegateFloat(Value *v); %type <node> join_outer, join_qual %type <jtype> join_type -%type <list> extract_list, position_list +%type <list> extract_list, overlay_list, position_list %type <list> substr_list, trim_list %type <ival> opt_interval -%type <node> substr_from, substr_for +%type <node> overlay_placing, substr_from, substr_for %type <boolean> opt_binary, opt_using, opt_instead, opt_cursor %type <boolean> opt_with_copy, index_opt_unique, opt_verbose, opt_full @@ -336,7 +336,7 @@ static void doNegateFloat(Value *v); FALSE_P, FETCH, FLOAT_P, FOR, FORCE, FOREIGN, FORWARD, FREEZE, FROM, FULL, FUNCTION, - GLOBAL, GRANT, GROUP_P, + GET, GLOBAL, GRANT, GROUP_P, HANDLER, HAVING, HOUR_P, ILIKE, IMMEDIATE, IMMUTABLE, IMPLICIT, IN_P, INCREMENT, INDEX, INHERITS, @@ -356,16 +356,16 @@ static void doNegateFloat(Value *v); NUMERIC, OF, OFF, OFFSET, OIDS, OLD, ON, ONLY, OPERATOR, OPTION, OR, ORDER, - OUT_P, OUTER_P, OVERLAPS, OWNER, + OUT_P, OUTER_P, OVERLAPS, OVERLAY, OWNER, - PARTIAL, PASSWORD, PATH_P, PENDANT, POSITION, PRECISION, PRIMARY, + PARTIAL, PASSWORD, PATH_P, PENDANT, PLACING, POSITION, PRECISION, PRIMARY, PRIOR, PRIVILEGES, PROCEDURE, PROCEDURAL, READ, REAL, REFERENCES, REINDEX, RELATIVE, RENAME, REPLACE, RESET, RESTRICT, RETURNS, REVOKE, RIGHT, ROLLBACK, ROW, RULE, SCHEMA, SCROLL, SECOND_P, SECURITY, SELECT, SEQUENCE, SERIALIZABLE, - SESSION, SESSION_USER, SET, SETOF, SHARE, SHOW, SMALLINT, SOME, + SESSION, SESSION_USER, SET, SETOF, SHARE, SHOW, SIMILAR, SMALLINT, SOME, STABLE, START, STATEMENT, STATISTICS, STDIN, STDOUT, STORAGE, STRICT, SUBSTRING, SYSID, @@ -402,7 +402,7 @@ static void doNegateFloat(Value *v); %right NOT %right '=' %nonassoc '<' '>' -%nonassoc LIKE ILIKE +%nonassoc LIKE ILIKE SIMILAR %nonassoc ESCAPE %nonassoc OVERLAPS %nonassoc BETWEEN @@ -420,6 +420,7 @@ static void doNegateFloat(Value *v); %right UMINUS %left '[' ']' %left '(' ')' +%left COLLATE %left TYPECAST %left '.' %% @@ -2139,6 +2140,14 @@ DefineStmt: CREATE AGGREGATE func_name definition n->definition = $4; $$ = (Node *)n; } + | CREATE CHARACTER SET opt_as any_name GET definition opt_collate + { + DefineStmt *n = makeNode(DefineStmt); + n->defType = CHARACTER; + n->defnames = $5; + n->definition = $7; + $$ = (Node *)n; + } ; definition: '(' def_list ')' { $$ = $2; } @@ -4978,9 +4987,18 @@ qual_all_Op: all_Op * it's factored out just to eliminate redundant coding. */ a_expr: c_expr - { $$ = $1; } + { $$ = $1; } | a_expr TYPECAST Typename { $$ = makeTypeCast($1, $3); } + | a_expr COLLATE ColId + { + FuncCall *n = makeNode(FuncCall); + n->funcname = SystemFuncName($3); + n->args = makeList1($1); + n->agg_star = FALSE; + n->agg_distinct = FALSE; + $$ = (Node *) n; + } | a_expr AT TIME ZONE c_expr { FuncCall *n = makeNode(FuncCall); @@ -5088,6 +5106,30 @@ a_expr: c_expr n->agg_distinct = FALSE; $$ = (Node *) makeSimpleA_Expr(OP, "!~~*", $1, (Node *) n); } + + | a_expr SIMILAR TO a_expr %prec SIMILAR + { $$ = (Node *) makeSimpleA_Expr(OP, "~", $1, $4); } + | a_expr SIMILAR TO a_expr ESCAPE a_expr + { + FuncCall *n = makeNode(FuncCall); + n->funcname = SystemFuncName("like_escape"); + n->args = makeList2($4, $6); + n->agg_star = FALSE; + n->agg_distinct = FALSE; + $$ = (Node *) makeSimpleA_Expr(OP, "~", $1, (Node *) n); + } + | a_expr NOT SIMILAR TO a_expr %prec SIMILAR + { $$ = (Node *) makeSimpleA_Expr(OP, "!~", $1, $5); } + | a_expr NOT SIMILAR TO a_expr ESCAPE a_expr + { + FuncCall *n = makeNode(FuncCall); + n->funcname = SystemFuncName("like_escape"); + n->args = makeList2($5, $7); + n->agg_star = FALSE; + n->agg_distinct = FALSE; + $$ = (Node *) makeSimpleA_Expr(OP, "!~", $1, (Node *) n); + } + /* NullTest clause * Define SQL92-style Null test clause. * Allow two forms described in the standard: @@ -5568,6 +5610,20 @@ c_expr: columnref n->agg_distinct = FALSE; $$ = (Node *)n; } + | OVERLAY '(' overlay_list ')' + { + /* overlay(A PLACING B FROM C FOR D) is converted to + * substring(A, 1, C-1) || B || substring(A, C+1, C+D) + * overlay(A PLACING B FROM C) is converted to + * substring(A, 1, C-1) || B || substring(A, C+1, C+char_length(B)) + */ + FuncCall *n = makeNode(FuncCall); + n->funcname = SystemFuncName("overlay"); + n->args = $3; + n->agg_star = FALSE; + n->agg_distinct = FALSE; + $$ = (Node *)n; + } | POSITION '(' position_list ')' { /* position(A in B) is converted to position(B, A) */ @@ -5706,6 +5762,25 @@ extract_arg: IDENT { $$ = $1; } | SCONST { $$ = $1; } ; +/* OVERLAY() arguments + * SQL99 defines the OVERLAY() function: + * o overlay(text placing text from int for int) + * o overlay(text placing text from int) + */ +overlay_list: a_expr overlay_placing substr_from substr_for + { + $$ = makeList4($1, $2, $3, $4); + } + | a_expr overlay_placing substr_from + { + $$ = makeList3($1, $2, $3); + } + ; + +overlay_placing: PLACING a_expr + { $$ = $2; } + ; + /* position_list uses b_expr not a_expr to avoid conflict with general IN */ position_list: b_expr IN_P b_expr @@ -6259,6 +6334,7 @@ unreserved_keyword: | FORCE | FORWARD | FUNCTION + | GET | GLOBAL | HANDLER | HOUR_P @@ -6404,6 +6480,7 @@ col_name_keyword: | NONE | NULLIF | NUMERIC + | OVERLAY | POSITION | REAL | SETOF @@ -6423,7 +6500,7 @@ col_name_keyword: * * Do not include POSITION, SUBSTRING, etc here since they have explicit * productions in a_expr to support the goofy SQL9x argument syntax. - * - thomas 2000-11-28 + * - thomas 2000-11-28 */ func_name_keyword: AUTHORIZATION @@ -6445,6 +6522,7 @@ func_name_keyword: | OUTER_P | OVERLAPS | RIGHT + | SIMILAR | VERBOSE ; diff --git a/src/backend/parser/keywords.c b/src/backend/parser/keywords.c index c60b2df2a6d..dca44c448f9 100644 --- a/src/backend/parser/keywords.c +++ b/src/backend/parser/keywords.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/parser/keywords.c,v 1.112 2002/06/11 13:40:51 wieck Exp $ + * $Header: /cvsroot/pgsql/src/backend/parser/keywords.c,v 1.113 2002/06/11 15:41:37 thomas Exp $ * *------------------------------------------------------------------------- */ @@ -129,6 +129,7 @@ static const ScanKeyword ScanKeywords[] = { {"from", FROM}, {"full", FULL}, {"function", FUNCTION}, + {"get", GET}, {"global", GLOBAL}, {"grant", GRANT}, {"group", GROUP_P}, @@ -211,11 +212,13 @@ static const ScanKeyword ScanKeywords[] = { {"out", OUT_P}, {"outer", OUTER_P}, {"overlaps", OVERLAPS}, + {"overlay", OVERLAY}, {"owner", OWNER}, {"partial", PARTIAL}, {"password", PASSWORD}, {"path", PATH_P}, {"pendant", PENDANT}, + {"placing", PLACING}, {"position", POSITION}, {"precision", PRECISION}, {"primary", PRIMARY}, @@ -251,6 +254,7 @@ static const ScanKeyword ScanKeywords[] = { {"setof", SETOF}, {"share", SHARE}, {"show", SHOW}, + {"similar", SIMILAR}, {"smallint", SMALLINT}, {"some", SOME}, {"stable", STABLE}, diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l index a6bc3549e17..e3d88b4521b 100644 --- a/src/backend/parser/scan.l +++ b/src/backend/parser/scan.l @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.94 2002/05/02 18:44:10 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.95 2002/06/11 15:41:37 thomas Exp $ * *------------------------------------------------------------------------- */ @@ -92,14 +92,14 @@ unsigned char unescape_single_char(unsigned char c); * We use exclusive states for quoted strings, extended comments, * and to eliminate parsing troubles for numeric strings. * Exclusive states: - * <xbit> bit string literal + * <xb> bit string literal * <xc> extended C-style comments - thomas 1997-07-12 * <xd> delimited identifiers (double-quoted identifiers) - thomas 1997-10-27 * <xh> hexadecimal numeric string - thomas 1997-11-16 * <xq> quoted strings - thomas 1997-07-30 */ -%x xbit +%x xb %x xc %x xd %x xh @@ -107,10 +107,10 @@ unsigned char unescape_single_char(unsigned char c); /* Bit string */ -xbitstart [bB]{quote} -xbitstop {quote} -xbitinside [^']* -xbitcat {quote}{whitespace_with_newline}{quote} +xbstart [bB]{quote} +xbstop {quote} +xbinside [^']* +xbcat {quote}{whitespace_with_newline}{quote} /* Hexadecimal number */ @@ -285,13 +285,13 @@ other . <xc><<EOF>> { yyerror("unterminated /* comment"); } -{xbitstart} { +{xbstart} { token_start = yytext; - BEGIN(xbit); + BEGIN(xb); startlit(); addlitchar('b'); } -<xbit>{xbitstop} { +<xb>{xbstop} { BEGIN(INITIAL); if (literalbuf[strspn(literalbuf + 1, "01") + 1] != '\0') yyerror("invalid bit string input"); @@ -299,14 +299,14 @@ other . return BITCONST; } <xh>{xhinside} | -<xbit>{xbitinside} { +<xb>{xbinside} { addlit(yytext, yyleng); } <xh>{xhcat} | -<xbit>{xbitcat} { +<xb>{xbcat} { /* ignore */ } -<xbit><<EOF>> { yyerror("unterminated bit string literal"); } +<xb><<EOF>> { yyerror("unterminated bit string literal"); } {xhstart} { token_start = yytext; diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c index bea464bd2ae..d6f7b26fa1a 100644 --- a/src/backend/regex/regcomp.c +++ b/src/backend/regex/regcomp.c @@ -174,9 +174,10 @@ static int never = 0; /* for use in asserts; shuts lint up */ /* * regcomp - interface for parser and compilation + * returns 0 success, otherwise REG_something */ -int /* 0 success, otherwise REG_something */ -pg95_regcomp(regex_t *preg, const char *pattern, int cflags) +int +pg_regcomp(regex_t *preg, const char *pattern, int cflags) { struct parse pa; struct re_guts *g; @@ -224,7 +225,6 @@ pg95_regcomp(regex_t *preg, const char *pattern, int cflags) (void) pg_mb2wchar((unsigned char *) pattern, wcp); len = pg_wchar_strlen(wcp); #else - len = strlen((char *) pattern); #endif } @@ -305,7 +305,7 @@ pg95_regcomp(regex_t *preg, const char *pattern, int cflags) /* win or lose, we're done */ if (p->error != 0) /* lose */ - pg95_regfree(preg); + pg_regfree(preg); return p->error; } diff --git a/src/backend/regex/regerror.c b/src/backend/regex/regerror.c index 3fa00946aa4..fb12cba3048 100644 --- a/src/backend/regex/regerror.c +++ b/src/backend/regex/regerror.c @@ -117,8 +117,8 @@ static struct rerr */ /* ARGSUSED */ size_t -pg95_regerror(int errcode, const regex_t *preg, - char *errbuf, size_t errbuf_size) +pg_regerror(int errcode, const regex_t *preg, + char *errbuf, size_t errbuf_size) { struct rerr *r; size_t len; diff --git a/src/backend/regex/regexec.c b/src/backend/regex/regexec.c index 47677304413..06459ef1dbc 100644 --- a/src/backend/regex/regexec.c +++ b/src/backend/regex/regexec.c @@ -149,8 +149,8 @@ do { \ * when choosing which matcher to call. */ int /* 0 success, REG_NOMATCH failure */ -pg95_regexec(const regex_t *preg, const char *string, size_t nmatch, - regmatch_t *pmatch, int eflags) +pg_regexec(const regex_t *preg, const char *string, size_t nmatch, + regmatch_t *pmatch, int eflags) { struct re_guts *g = preg->re_g; diff --git a/src/backend/regex/regfree.c b/src/backend/regex/regfree.c index 87e0c99ef96..5672fcf240f 100644 --- a/src/backend/regex/regfree.c +++ b/src/backend/regex/regfree.c @@ -49,7 +49,7 @@ * regfree - free everything */ void -pg95_regfree(regex_t *preg) +pg_regfree(regex_t *preg) { struct re_guts *g; diff --git a/src/backend/regex/retest.c b/src/backend/regex/retest.c index eee8ef09ae3..ca5d6c5394a 100644 --- a/src/backend/regex/retest.c +++ b/src/backend/regex/retest.c @@ -1,7 +1,7 @@ /* * a simple regexp debug program * - * $Header: /cvsroot/pgsql/src/backend/regex/Attic/retest.c,v 1.4 1999/07/17 20:17:34 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/regex/Attic/retest.c,v 1.5 2002/06/11 15:41:37 thomas Exp $ */ #include "postgres.h" @@ -22,7 +22,7 @@ main() if (p) *p = '\0'; - sts = pg95_regcomp(&re, buf, 1); + sts = pg_regcomp(&re, buf, 1); printf("regcomp: parses \"%s\" and returns %d\n", buf, sts); for (;;) { @@ -33,7 +33,7 @@ main() if (p) *p = '\0'; - sts = pg95_regexec(&re, buf, 0, 0, 0); + sts = pg_regexec(&re, buf, 0, 0, 0); printf("regexec: returns %d\n", sts); } } diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c index a61717a8602..9afc58feee7 100644 --- a/src/backend/utils/adt/regexp.c +++ b/src/backend/utils/adt/regexp.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/adt/regexp.c,v 1.38 2001/11/05 17:46:29 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/adt/regexp.c,v 1.39 2002/06/11 15:41:37 thomas Exp $ * * Alistair Crooks added the code for the regex caching * agc - cached the regular expressions used - there's a good chance @@ -19,7 +19,7 @@ * * agc - incorporated Keith Bostic's Berkeley regex code into * the tree for all ports. To distinguish this regex code from any that - * is existent on a platform, I've prepended the string "pg95_" to + * is existent on a platform, I've prepended the string "pg_" to * the functions regcomp, regerror, regexec and regfree. * Fixed a bug that was originally a typo by me, where `i' was used * instead of `oldest' when compiling regular expressions - benign @@ -53,11 +53,13 @@ struct cached_re_str static int rec = 0; /* # of cached re's */ static struct cached_re_str rev[MAX_CACHED_RES]; /* cached re's */ static unsigned long lru; /* system lru tag */ +static int pg_lastre = 0; /* attempt to compile `re' as an re, then match it against text */ /* cflags - flag to regcomp indicates case sensitivity */ static bool -RE_compile_and_execute(text *text_re, char *text, int cflags) +RE_compile_and_execute(text *text_re, char *text, int cflags, + int nmatch, regmatch_t *pmatch) { char *re; int oldest; @@ -68,9 +70,27 @@ RE_compile_and_execute(text *text_re, char *text, int cflags) re = DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(text_re))); + if ((i = pg_lastre) < rec) + { + if (rev[i].cre_s) + { + if (strcmp(rev[i].cre_s, re) == 0 && + rev[i].cre_type == cflags) + { + rev[i].cre_lru = ++lru; + pfree(re); + return (pg_regexec(&rev[i].cre_re, + text, nmatch, + pmatch, 0) == 0); + } + } + } + /* find a previously compiled regular expression */ for (i = 0; i < rec; i++) { + if (i == pg_lastre) continue; + if (rev[i].cre_s) { if (strcmp(rev[i].cre_s, re) == 0 && @@ -78,9 +98,9 @@ RE_compile_and_execute(text *text_re, char *text, int cflags) { rev[i].cre_lru = ++lru; pfree(re); - return (pg95_regexec(&rev[i].cre_re, - text, 0, - (regmatch_t *) NULL, 0) == 0); + return (pg_regexec(&rev[i].cre_re, + text, nmatch, + pmatch, 0) == 0); } } } @@ -107,7 +127,7 @@ RE_compile_and_execute(text *text_re, char *text, int cflags) if (rev[i].cre_lru > lru) lru = rev[i].cre_lru; } - pg95_regfree(&rev[oldest].cre_re); + pg_regfree(&rev[oldest].cre_re); /* * use malloc/free for the cre_s field because the storage has to @@ -118,7 +138,7 @@ RE_compile_and_execute(text *text_re, char *text, int cflags) } /* compile the re */ - regcomp_result = pg95_regcomp(&rev[oldest].cre_re, re, cflags); + regcomp_result = pg_regcomp(&rev[oldest].cre_re, re, cflags); if (regcomp_result == 0) { /* @@ -130,16 +150,16 @@ RE_compile_and_execute(text *text_re, char *text, int cflags) rev[oldest].cre_type = cflags; pfree(re); /* agc - fixed an old typo here */ - return (pg95_regexec(&rev[oldest].cre_re, text, 0, - (regmatch_t *) NULL, 0) == 0); + return (pg_regexec(&rev[oldest].cre_re, text, + nmatch, pmatch, 0) == 0); } else { char errMsg[1000]; /* re didn't compile */ - pg95_regerror(regcomp_result, &rev[oldest].cre_re, errMsg, - sizeof(errMsg)); + pg_regerror(regcomp_result, &rev[oldest].cre_re, errMsg, + sizeof(errMsg)); elog(ERROR, "Invalid regular expression: %s", errMsg); } @@ -167,7 +187,7 @@ fixedlen_regexeq(char *s, text *p, int charlen, int cflags) memcpy(sterm, s, charlen); sterm[charlen] = '\0'; - result = RE_compile_and_execute(p, sterm, cflags); + result = RE_compile_and_execute(p, sterm, cflags, 0, NULL); pfree(sterm); @@ -230,7 +250,7 @@ textregexne(PG_FUNCTION_ARGS) /* * routines that use the regexp stuff, but ignore the case. - * for this, we use the REG_ICASE flag to pg95_regcomp + * for this, we use the REG_ICASE flag to pg_regcomp */ @@ -281,3 +301,54 @@ nameicregexne(PG_FUNCTION_ARGS) strlen(NameStr(*n)), REG_ICASE | REG_EXTENDED)); } + + +/* textregexsubstr() + * Return a substring matched by a regular expression. + */ +Datum +textregexsubstr(PG_FUNCTION_ARGS) +{ + text *s = PG_GETARG_TEXT_P(0); + text *p = PG_GETARG_TEXT_P(1); + text *result; + char *sterm; + int len; + bool match; + int nmatch = 1; + regmatch_t pmatch; + + /* be sure sterm is null-terminated */ + len = VARSIZE(s) - VARHDRSZ; + sterm = (char *) palloc(len + 1); + memcpy(sterm, VARDATA(s), len); + sterm[len] = '\0'; + /* We need the match info back from the pattern match + * to be able to actually extract the substring. + * It seems to be adequate to pass in a structure to return + * only one result. + */ + match = RE_compile_and_execute(p, sterm, REG_EXTENDED, nmatch, &pmatch); + pfree(sterm); + + /* match? then return the substring matching the pattern */ + if (match) + { + return (DirectFunctionCall3(text_substr, + PointerGetDatum(s), + Int32GetDatum(pmatch.rm_so+1), + Int32GetDatum(pmatch.rm_eo-pmatch.rm_so))); + } +#if 0 + /* otherwise, return a zero-length string */ + else + { + result = palloc(VARHDRSZ); + VARATT_SIZEP(result) = VARHDRSZ; + PG_RETURN_TEXT_P(result); + } +#endif + + /* not reached */ + PG_RETURN_NULL(); +} |