summaryrefslogtreecommitdiff
path: root/src/include
diff options
context:
space:
mode:
authorTom Lane2012-07-10 22:00:39 +0000
committerTom Lane2012-07-10 22:00:39 +0000
commit18c8dc32ce6759a8837f01e3dd0036cd19ee8683 (patch)
treed2d058f0f362cf22af5856a4e20cc7f814835f87 /src/include
parentf12960d8c9ce454a37c2082549b7766ce36627d2 (diff)
Back-patch fix for extraction of fixed prefixes from regular expressions.
Back-patch of commits 628cbb50ba80c83917b07a7609ddec12cda172d0 and c6aae3042be5249e672b731ebeb21875b5343010. This has been broken since 7.3, so back-patch to all supported branches.
Diffstat (limited to 'src/include')
-rw-r--r--src/include/regex/regex.h4
-rw-r--r--src/include/regex/regguts.h39
-rw-r--r--src/include/utils/builtins.h2
3 files changed, 33 insertions, 12 deletions
diff --git a/src/include/regex/regex.h b/src/include/regex/regex.h
index cec4b837cd1..616c2c6450d 100644
--- a/src/include/regex/regex.h
+++ b/src/include/regex/regex.h
@@ -156,6 +156,9 @@ typedef struct
/* two specials for debugging and testing */
#define REG_ATOI 101 /* convert error-code name to number */
#define REG_ITOA 102 /* convert error-code number to name */
+/* non-error result codes for pg_regprefix */
+#define REG_PREFIX (-1) /* identified a common prefix */
+#define REG_EXACT (-2) /* identified an exact match */
@@ -164,6 +167,7 @@ typedef struct
*/
extern int pg_regcomp(regex_t *, const pg_wchar *, size_t, int, Oid);
extern int pg_regexec(regex_t *, const pg_wchar *, size_t, size_t, rm_detail_t *, size_t, regmatch_t[], int);
+extern int pg_regprefix(regex_t *, pg_wchar **, size_t *);
extern void pg_regfree(regex_t *);
extern size_t pg_regerror(int, const regex_t *, char *, size_t);
extern void pg_set_regex_collation(Oid collation);
diff --git a/src/include/regex/regguts.h b/src/include/regex/regguts.h
index e8415799ec6..e1e406f4eaa 100644
--- a/src/include/regex/regguts.h
+++ b/src/include/regex/regguts.h
@@ -199,19 +199,21 @@ struct colordesc
color sub; /* open subcolor, if any; or free-chain ptr */
#define NOSUB COLORLESS /* value of "sub" when no open subcolor */
struct arc *arcs; /* chain of all arcs of this color */
+ chr firstchr; /* char first assigned to this color */
int flags; /* bit values defined next */
#define FREECOL 01 /* currently free */
#define PSEUDO 02 /* pseudocolor, no real chars */
-#define UNUSEDCOLOR(cd) ((cd)->flags&FREECOL)
+#define UNUSEDCOLOR(cd) ((cd)->flags & FREECOL)
union tree *block; /* block of solid color, if any */
};
/*
* The color map itself
*
- * Only the "tree" part is used at execution time, and that only via the
- * GETCOLOR() macro. Possibly that should be separated from the compile-time
- * data.
+ * Much of the data in the colormap struct is only used at compile time.
+ * However, the bulk of the space usage is in the "tree" structure, so it's
+ * not clear that there's much point in converting the rest to a more compact
+ * form when compilation is finished.
*/
struct colormap
{
@@ -279,15 +281,14 @@ struct state;
struct arc
{
- int type;
-#define ARCFREE '\0'
+ int type; /* 0 if free, else an NFA arc type code */
color co;
struct state *from; /* where it's from (and contained within) */
struct state *to; /* where it's to */
- struct arc *outchain; /* *from's outs chain or free chain */
+ struct arc *outchain; /* link in *from's outs chain or free chain */
#define freechain outchain
- struct arc *inchain; /* *to's ins chain */
- struct arc *colorchain; /* color's arc chain */
+ struct arc *inchain; /* link in *to's ins chain */
+ struct arc *colorchain; /* link in color's arc chain */
struct arc *colorchainRev; /* back-link in color's arc chain */
};
@@ -339,24 +340,38 @@ struct nfa
/*
* definitions for compacted NFA
+ *
+ * The main space savings in a compacted NFA is from making the arcs as small
+ * as possible. We store only the transition color and next-state number for
+ * each arc. The list of out arcs for each state is an array beginning at
+ * cnfa.states[statenumber], and terminated by a dummy carc struct with
+ * co == COLORLESS.
+ *
+ * The non-dummy carc structs are of two types: plain arcs and LACON arcs.
+ * Plain arcs just store the transition color number as "co". LACON arcs
+ * store the lookahead constraint number plus cnfa.ncolors as "co". LACON
+ * arcs can be distinguished from plain by testing for co >= cnfa.ncolors.
*/
struct carc
{
color co; /* COLORLESS is list terminator */
- int to; /* state number */
+ int to; /* next-state number */
};
struct cnfa
{
int nstates; /* number of states */
- int ncolors; /* number of colors */
+ int ncolors; /* number of colors (max color in use + 1) */
int flags;
-#define HASLACONS 01 /* uses lookahead constraints */
+#define HASLACONS 01 /* uses lookahead constraints */
int pre; /* setup state number */
int post; /* teardown state number */
color bos[2]; /* colors, if any, assigned to BOS and BOL */
color eos[2]; /* colors, if any, assigned to EOS and EOL */
+ char *stflags; /* vector of per-state flags bytes */
+#define CNFA_NOPROGRESS 01 /* flag bit for a no-progress state */
struct carc **states; /* vector of pointers to outarc lists */
+ /* states[n] are pointers into a single malloc'd array of arcs */
struct carc *arcs; /* the area for the lists */
};
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index d1e83707606..6ec3cce8226 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -568,6 +568,8 @@ extern Datum regexp_split_to_table(PG_FUNCTION_ARGS);
extern Datum regexp_split_to_table_no_flags(PG_FUNCTION_ARGS);
extern Datum regexp_split_to_array(PG_FUNCTION_ARGS);
extern Datum regexp_split_to_array_no_flags(PG_FUNCTION_ARGS);
+extern char *regexp_fixed_prefix(text *text_re, bool case_insensitive,
+ Oid collation, bool *exact);
/* regproc.c */
extern Datum regprocin(PG_FUNCTION_ARGS);