diff options
Diffstat (limited to 'src/common')
-rw-r--r-- | src/common/Makefile | 9 | ||||
-rw-r--r-- | src/common/kwlookup.c | 73 |
2 files changed, 39 insertions, 43 deletions
diff --git a/src/common/Makefile b/src/common/Makefile index 317b071e026..d0c2b970eb3 100644 --- a/src/common/Makefile +++ b/src/common/Makefile @@ -63,6 +63,11 @@ OBJS_FRONTEND = $(OBJS_COMMON) fe_memutils.o file_utils.o restricted_token.o OBJS_SHLIB = $(OBJS_FRONTEND:%.o=%_shlib.o) OBJS_SRV = $(OBJS_COMMON:%.o=%_srv.o) +# where to find gen_keywordlist.pl and subsidiary files +TOOLSDIR = $(top_srcdir)/src/tools +GEN_KEYWORDLIST = $(PERL) -I $(TOOLSDIR) $(TOOLSDIR)/gen_keywordlist.pl +GEN_KEYWORDLIST_DEPS = $(TOOLSDIR)/gen_keywordlist.pl $(TOOLSDIR)/PerfectHash.pm + all: libpgcommon.a libpgcommon_shlib.a libpgcommon_srv.a distprep: kwlist_d.h @@ -118,8 +123,8 @@ libpgcommon_srv.a: $(OBJS_SRV) $(CC) $(CFLAGS) $(subst -DFRONTEND,, $(CPPFLAGS)) -c $< -o $@ # generate SQL keyword lookup table to be included into keywords*.o. -kwlist_d.h: $(top_srcdir)/src/include/parser/kwlist.h $(top_srcdir)/src/tools/gen_keywordlist.pl - $(PERL) $(top_srcdir)/src/tools/gen_keywordlist.pl --extern $< +kwlist_d.h: $(top_srcdir)/src/include/parser/kwlist.h $(GEN_KEYWORDLIST_DEPS) + $(GEN_KEYWORDLIST) --extern $< # Dependencies of keywords*.o need to be managed explicitly to make sure # that you don't get broken parsing code, even in a non-enable-depend build. diff --git a/src/common/kwlookup.c b/src/common/kwlookup.c index d72842e7592..6545480b5c7 100644 --- a/src/common/kwlookup.c +++ b/src/common/kwlookup.c @@ -35,60 +35,51 @@ * receive a different case-normalization mapping. */ int -ScanKeywordLookup(const char *text, +ScanKeywordLookup(const char *str, const ScanKeywordList *keywords) { - int len, - i; - char word[NAMEDATALEN]; - const char *kw_string; - const uint16 *kw_offsets; - const uint16 *low; - const uint16 *high; - - len = strlen(text); + size_t len; + int h; + const char *kw; + /* + * Reject immediately if too long to be any keyword. This saves useless + * hashing and downcasing work on long strings. + */ + len = strlen(str); if (len > keywords->max_kw_len) - return -1; /* too long to be any keyword */ - - /* We assume all keywords are shorter than NAMEDATALEN. */ - Assert(len < NAMEDATALEN); + return -1; /* - * Apply an ASCII-only downcasing. We must not use tolower() since it may - * produce the wrong translation in some locales (eg, Turkish). + * Compute the hash function. We assume it was generated to produce + * case-insensitive results. Since it's a perfect hash, we need only + * match to the specific keyword it identifies. */ - for (i = 0; i < len; i++) - { - char ch = text[i]; + h = keywords->hash(str, len); - if (ch >= 'A' && ch <= 'Z') - ch += 'a' - 'A'; - word[i] = ch; - } - word[len] = '\0'; + /* An out-of-range result implies no match */ + if (h < 0 || h >= keywords->num_keywords) + return -1; /* - * Now do a binary search using plain strcmp() comparison. + * Compare character-by-character to see if we have a match, applying an + * ASCII-only downcasing to the input characters. We must not use + * tolower() since it may produce the wrong translation in some locales + * (eg, Turkish). */ - kw_string = keywords->kw_string; - kw_offsets = keywords->kw_offsets; - low = kw_offsets; - high = kw_offsets + (keywords->num_keywords - 1); - while (low <= high) + kw = GetScanKeyword(h, keywords); + while (*str != '\0') { - const uint16 *middle; - int difference; + char ch = *str++; - middle = low + (high - low) / 2; - difference = strcmp(kw_string + *middle, word); - if (difference == 0) - return middle - kw_offsets; - else if (difference < 0) - low = middle + 1; - else - high = middle - 1; + if (ch >= 'A' && ch <= 'Z') + ch += 'a' - 'A'; + if (ch != *kw++) + return -1; } + if (*kw != '\0') + return -1; - return -1; + /* Success! */ + return h; } |