summaryrefslogtreecommitdiff
path: root/src/backend/regex
diff options
context:
space:
mode:
authorTom Lane2012-07-07 21:39:50 +0000
committerTom Lane2012-07-07 21:39:50 +0000
commitc6aae3042be5249e672b731ebeb21875b5343010 (patch)
treecc5a7b837854edb3573a915766377501a7ac949e /src/backend/regex
parenta184e4db83c80d28103774ced984c7790fbd44ba (diff)
Simplify and document regex library's compact-NFA representation.
The previous coding abused the first element of a cNFA state's arcs list to hold a per-state flag bit, which was confusing, undocumented, and not even particularly efficient. Get rid of that in favor of a separate "stflags" vector. Since there's only one bit in use, I chose to allocate a char per state; we could possibly replace this with a bitmap at some point, but that would make accesses a little slower. It's already about 8X smaller than before, so let's not get overly tense. Also document the representation better than it was before, which is to say not at all. This patch is a byproduct of investigations towards extracting a "fixed prefix" string from the compact-NFA representation of regex patterns. Might need to back-patch it if we decide to back-patch that fix, but for now it's just code cleanup so I'll just put it in HEAD.
Diffstat (limited to 'src/backend/regex')
-rw-r--r--src/backend/regex/regc_nfa.c34
-rw-r--r--src/backend/regex/regcomp.c2
-rw-r--r--src/backend/regex/rege_dfa.c11
3 files changed, 23 insertions, 24 deletions
diff --git a/src/backend/regex/regc_nfa.c b/src/backend/regex/regc_nfa.c
index 66a361ee2ff..085842c92b7 100644
--- a/src/backend/regex/regc_nfa.c
+++ b/src/backend/regex/regc_nfa.c
@@ -1330,14 +1330,16 @@ compact(struct nfa * nfa,
for (s = nfa->states; s != NULL; s = s->next)
{
nstates++;
- narcs += 1 + s->nouts + 1;
- /* 1 as a fake for flags, nouts for arcs, 1 as endmarker */
+ narcs += s->nouts + 1; /* need one extra for endmarker */
}
+ cnfa->stflags = (char *) MALLOC(nstates * sizeof(char));
cnfa->states = (struct carc **) MALLOC(nstates * sizeof(struct carc *));
cnfa->arcs = (struct carc *) MALLOC(narcs * sizeof(struct carc));
- if (cnfa->states == NULL || cnfa->arcs == NULL)
+ if (cnfa->stflags == NULL || cnfa->states == NULL || cnfa->arcs == NULL)
{
+ if (cnfa->stflags != NULL)
+ FREE(cnfa->stflags);
if (cnfa->states != NULL)
FREE(cnfa->states);
if (cnfa->arcs != NULL)
@@ -1359,9 +1361,8 @@ compact(struct nfa * nfa,
for (s = nfa->states; s != NULL; s = s->next)
{
assert((size_t) s->no < nstates);
+ cnfa->stflags[s->no] = 0;
cnfa->states[s->no] = ca;
- ca->co = 0; /* clear and skip flags "arc" */
- ca++;
first = ca;
for (a = s->outs; a != NULL; a = a->outchain)
switch (a->type)
@@ -1392,8 +1393,8 @@ compact(struct nfa * nfa,
/* mark no-progress states */
for (a = nfa->pre->outs; a != NULL; a = a->outchain)
- cnfa->states[a->to->no]->co = 1;
- cnfa->states[nfa->pre->no]->co = 1;
+ cnfa->stflags[a->to->no] = CNFA_NOPROGRESS;
+ cnfa->stflags[nfa->pre->no] = CNFA_NOPROGRESS;
}
/*
@@ -1433,6 +1434,7 @@ freecnfa(struct cnfa * cnfa)
{
assert(cnfa->nstates != 0); /* not empty already */
cnfa->nstates = 0;
+ FREE(cnfa->stflags);
FREE(cnfa->states);
FREE(cnfa->arcs);
}
@@ -1617,7 +1619,7 @@ dumpcnfa(struct cnfa * cnfa,
fprintf(f, ", haslacons");
fprintf(f, "\n");
for (st = 0; st < cnfa->nstates; st++)
- dumpcstate(st, cnfa->states[st], cnfa, f);
+ dumpcstate(st, cnfa, f);
fflush(f);
}
#endif
@@ -1629,22 +1631,20 @@ dumpcnfa(struct cnfa * cnfa,
*/
static void
dumpcstate(int st,
- struct carc * ca,
struct cnfa * cnfa,
FILE *f)
{
- int i;
+ struct carc * ca;
int pos;
- fprintf(f, "%d%s", st, (ca[0].co) ? ":" : ".");
+ fprintf(f, "%d%s", st, (cnfa->stflags[st] & CNFA_NOPROGRESS) ? ":" : ".");
pos = 1;
- for (i = 1; ca[i].co != COLORLESS; i++)
+ for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++)
{
- if (ca[i].co < cnfa->ncolors)
- fprintf(f, "\t[%ld]->%d", (long) ca[i].co, ca[i].to);
+ if (ca->co < cnfa->ncolors)
+ fprintf(f, "\t[%ld]->%d", (long) ca->co, ca->to);
else
- fprintf(f, "\t:%ld:->%d", (long) ca[i].co - cnfa->ncolors,
- ca[i].to);
+ fprintf(f, "\t:%ld:->%d", (long) (ca->co - cnfa->ncolors), ca->to);
if (pos == 5)
{
fprintf(f, "\n");
@@ -1653,7 +1653,7 @@ dumpcstate(int st,
else
pos++;
}
- if (i == 1 || pos != 1)
+ if (ca == cnfa->states[st] || pos != 1)
fprintf(f, "\n");
fflush(f);
}
diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c
index 57055f04abb..ceb6f0f8737 100644
--- a/src/backend/regex/regcomp.c
+++ b/src/backend/regex/regcomp.c
@@ -162,7 +162,7 @@ static void dumparcs(struct state *, FILE *);
static int dumprarcs(struct arc *, struct state *, FILE *, int);
static void dumparc(struct arc *, struct state *, FILE *);
static void dumpcnfa(struct cnfa *, FILE *);
-static void dumpcstate(int, struct carc *, struct cnfa *, FILE *);
+static void dumpcstate(int, struct cnfa *, FILE *);
#endif
/* === regc_cvec.c === */
static struct cvec *newcvec(int, int);
diff --git a/src/backend/regex/rege_dfa.c b/src/backend/regex/rege_dfa.c
index da7a0bf402f..7a7ba5b89cf 100644
--- a/src/backend/regex/rege_dfa.c
+++ b/src/backend/regex/rege_dfa.c
@@ -457,14 +457,14 @@ miss(struct vars * v, /* used only for debug flags */
gotstate = 0;
for (i = 0; i < d->nstates; i++)
if (ISBSET(css->states, i))
- for (ca = cnfa->states[i] + 1; ca->co != COLORLESS; ca++)
+ for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++)
if (ca->co == co)
{
BSET(d->work, ca->to);
gotstate = 1;
if (ca->to == cnfa->post)
ispost = 1;
- if (!cnfa->states[ca->to]->co)
+ if (!(cnfa->stflags[ca->to] & CNFA_NOPROGRESS))
noprogress = 0;
FDEBUG(("%d -> %d\n", i, ca->to));
}
@@ -475,10 +475,9 @@ miss(struct vars * v, /* used only for debug flags */
dolacons = 0;
for (i = 0; i < d->nstates; i++)
if (ISBSET(d->work, i))
- for (ca = cnfa->states[i] + 1; ca->co != COLORLESS;
- ca++)
+ for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++)
{
- if (ca->co <= cnfa->ncolors)
+ if (ca->co < cnfa->ncolors)
continue; /* NOTE CONTINUE */
sawlacons = 1;
if (ISBSET(d->work, ca->to))
@@ -489,7 +488,7 @@ miss(struct vars * v, /* used only for debug flags */
dolacons = 1;
if (ca->to == cnfa->post)
ispost = 1;
- if (!cnfa->states[ca->to]->co)
+ if (!(cnfa->stflags[ca->to] & CNFA_NOPROGRESS))
noprogress = 0;
FDEBUG(("%d :> %d\n", i, ca->to));
}