From c6aae3042be5249e672b731ebeb21875b5343010 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 7 Jul 2012 17:39:50 -0400 Subject: Simplify and document regex library's compact-NFA representation. The previous coding abused the first element of a cNFA state's arcs list to hold a per-state flag bit, which was confusing, undocumented, and not even particularly efficient. Get rid of that in favor of a separate "stflags" vector. Since there's only one bit in use, I chose to allocate a char per state; we could possibly replace this with a bitmap at some point, but that would make accesses a little slower. It's already about 8X smaller than before, so let's not get overly tense. Also document the representation better than it was before, which is to say not at all. This patch is a byproduct of investigations towards extracting a "fixed prefix" string from the compact-NFA representation of regex patterns. Might need to back-patch it if we decide to back-patch that fix, but for now it's just code cleanup so I'll just put it in HEAD. --- src/backend/regex/regc_nfa.c | 34 +++++++++++++++++----------------- src/backend/regex/regcomp.c | 2 +- src/backend/regex/rege_dfa.c | 11 +++++------ 3 files changed, 23 insertions(+), 24 deletions(-) (limited to 'src/backend/regex') diff --git a/src/backend/regex/regc_nfa.c b/src/backend/regex/regc_nfa.c index 66a361ee2ff..085842c92b7 100644 --- a/src/backend/regex/regc_nfa.c +++ b/src/backend/regex/regc_nfa.c @@ -1330,14 +1330,16 @@ compact(struct nfa * nfa, for (s = nfa->states; s != NULL; s = s->next) { nstates++; - narcs += 1 + s->nouts + 1; - /* 1 as a fake for flags, nouts for arcs, 1 as endmarker */ + narcs += s->nouts + 1; /* need one extra for endmarker */ } + cnfa->stflags = (char *) MALLOC(nstates * sizeof(char)); cnfa->states = (struct carc **) MALLOC(nstates * sizeof(struct carc *)); cnfa->arcs = (struct carc *) MALLOC(narcs * sizeof(struct carc)); - if (cnfa->states == NULL || cnfa->arcs == NULL) + if (cnfa->stflags == NULL || cnfa->states == NULL || cnfa->arcs == NULL) { + if (cnfa->stflags != NULL) + FREE(cnfa->stflags); if (cnfa->states != NULL) FREE(cnfa->states); if (cnfa->arcs != NULL) @@ -1359,9 +1361,8 @@ compact(struct nfa * nfa, for (s = nfa->states; s != NULL; s = s->next) { assert((size_t) s->no < nstates); + cnfa->stflags[s->no] = 0; cnfa->states[s->no] = ca; - ca->co = 0; /* clear and skip flags "arc" */ - ca++; first = ca; for (a = s->outs; a != NULL; a = a->outchain) switch (a->type) @@ -1392,8 +1393,8 @@ compact(struct nfa * nfa, /* mark no-progress states */ for (a = nfa->pre->outs; a != NULL; a = a->outchain) - cnfa->states[a->to->no]->co = 1; - cnfa->states[nfa->pre->no]->co = 1; + cnfa->stflags[a->to->no] = CNFA_NOPROGRESS; + cnfa->stflags[nfa->pre->no] = CNFA_NOPROGRESS; } /* @@ -1433,6 +1434,7 @@ freecnfa(struct cnfa * cnfa) { assert(cnfa->nstates != 0); /* not empty already */ cnfa->nstates = 0; + FREE(cnfa->stflags); FREE(cnfa->states); FREE(cnfa->arcs); } @@ -1617,7 +1619,7 @@ dumpcnfa(struct cnfa * cnfa, fprintf(f, ", haslacons"); fprintf(f, "\n"); for (st = 0; st < cnfa->nstates; st++) - dumpcstate(st, cnfa->states[st], cnfa, f); + dumpcstate(st, cnfa, f); fflush(f); } #endif @@ -1629,22 +1631,20 @@ dumpcnfa(struct cnfa * cnfa, */ static void dumpcstate(int st, - struct carc * ca, struct cnfa * cnfa, FILE *f) { - int i; + struct carc * ca; int pos; - fprintf(f, "%d%s", st, (ca[0].co) ? ":" : "."); + fprintf(f, "%d%s", st, (cnfa->stflags[st] & CNFA_NOPROGRESS) ? ":" : "."); pos = 1; - for (i = 1; ca[i].co != COLORLESS; i++) + for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++) { - if (ca[i].co < cnfa->ncolors) - fprintf(f, "\t[%ld]->%d", (long) ca[i].co, ca[i].to); + if (ca->co < cnfa->ncolors) + fprintf(f, "\t[%ld]->%d", (long) ca->co, ca->to); else - fprintf(f, "\t:%ld:->%d", (long) ca[i].co - cnfa->ncolors, - ca[i].to); + fprintf(f, "\t:%ld:->%d", (long) (ca->co - cnfa->ncolors), ca->to); if (pos == 5) { fprintf(f, "\n"); @@ -1653,7 +1653,7 @@ dumpcstate(int st, else pos++; } - if (i == 1 || pos != 1) + if (ca == cnfa->states[st] || pos != 1) fprintf(f, "\n"); fflush(f); } diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c index 57055f04abb..ceb6f0f8737 100644 --- a/src/backend/regex/regcomp.c +++ b/src/backend/regex/regcomp.c @@ -162,7 +162,7 @@ static void dumparcs(struct state *, FILE *); static int dumprarcs(struct arc *, struct state *, FILE *, int); static void dumparc(struct arc *, struct state *, FILE *); static void dumpcnfa(struct cnfa *, FILE *); -static void dumpcstate(int, struct carc *, struct cnfa *, FILE *); +static void dumpcstate(int, struct cnfa *, FILE *); #endif /* === regc_cvec.c === */ static struct cvec *newcvec(int, int); diff --git a/src/backend/regex/rege_dfa.c b/src/backend/regex/rege_dfa.c index da7a0bf402f..7a7ba5b89cf 100644 --- a/src/backend/regex/rege_dfa.c +++ b/src/backend/regex/rege_dfa.c @@ -457,14 +457,14 @@ miss(struct vars * v, /* used only for debug flags */ gotstate = 0; for (i = 0; i < d->nstates; i++) if (ISBSET(css->states, i)) - for (ca = cnfa->states[i] + 1; ca->co != COLORLESS; ca++) + for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++) if (ca->co == co) { BSET(d->work, ca->to); gotstate = 1; if (ca->to == cnfa->post) ispost = 1; - if (!cnfa->states[ca->to]->co) + if (!(cnfa->stflags[ca->to] & CNFA_NOPROGRESS)) noprogress = 0; FDEBUG(("%d -> %d\n", i, ca->to)); } @@ -475,10 +475,9 @@ miss(struct vars * v, /* used only for debug flags */ dolacons = 0; for (i = 0; i < d->nstates; i++) if (ISBSET(d->work, i)) - for (ca = cnfa->states[i] + 1; ca->co != COLORLESS; - ca++) + for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++) { - if (ca->co <= cnfa->ncolors) + if (ca->co < cnfa->ncolors) continue; /* NOTE CONTINUE */ sawlacons = 1; if (ISBSET(d->work, ca->to)) @@ -489,7 +488,7 @@ miss(struct vars * v, /* used only for debug flags */ dolacons = 1; if (ca->to == cnfa->post) ispost = 1; - if (!cnfa->states[ca->to]->co) + if (!(cnfa->stflags[ca->to] & CNFA_NOPROGRESS)) noprogress = 0; FDEBUG(("%d :> %d\n", i, ca->to)); } -- cgit v1.2.3