summaryrefslogtreecommitdiff
path: root/src/common
diff options
context:
space:
mode:
Diffstat (limited to 'src/common')
-rw-r--r--src/common/Makefile9
-rw-r--r--src/common/kwlookup.c73
2 files changed, 39 insertions, 43 deletions
diff --git a/src/common/Makefile b/src/common/Makefile
index 317b071e026..d0c2b970eb3 100644
--- a/src/common/Makefile
+++ b/src/common/Makefile
@@ -63,6 +63,11 @@ OBJS_FRONTEND = $(OBJS_COMMON) fe_memutils.o file_utils.o restricted_token.o
OBJS_SHLIB = $(OBJS_FRONTEND:%.o=%_shlib.o)
OBJS_SRV = $(OBJS_COMMON:%.o=%_srv.o)
+# where to find gen_keywordlist.pl and subsidiary files
+TOOLSDIR = $(top_srcdir)/src/tools
+GEN_KEYWORDLIST = $(PERL) -I $(TOOLSDIR) $(TOOLSDIR)/gen_keywordlist.pl
+GEN_KEYWORDLIST_DEPS = $(TOOLSDIR)/gen_keywordlist.pl $(TOOLSDIR)/PerfectHash.pm
+
all: libpgcommon.a libpgcommon_shlib.a libpgcommon_srv.a
distprep: kwlist_d.h
@@ -118,8 +123,8 @@ libpgcommon_srv.a: $(OBJS_SRV)
$(CC) $(CFLAGS) $(subst -DFRONTEND,, $(CPPFLAGS)) -c $< -o $@
# generate SQL keyword lookup table to be included into keywords*.o.
-kwlist_d.h: $(top_srcdir)/src/include/parser/kwlist.h $(top_srcdir)/src/tools/gen_keywordlist.pl
- $(PERL) $(top_srcdir)/src/tools/gen_keywordlist.pl --extern $<
+kwlist_d.h: $(top_srcdir)/src/include/parser/kwlist.h $(GEN_KEYWORDLIST_DEPS)
+ $(GEN_KEYWORDLIST) --extern $<
# Dependencies of keywords*.o need to be managed explicitly to make sure
# that you don't get broken parsing code, even in a non-enable-depend build.
diff --git a/src/common/kwlookup.c b/src/common/kwlookup.c
index d72842e7592..6545480b5c7 100644
--- a/src/common/kwlookup.c
+++ b/src/common/kwlookup.c
@@ -35,60 +35,51 @@
* receive a different case-normalization mapping.
*/
int
-ScanKeywordLookup(const char *text,
+ScanKeywordLookup(const char *str,
const ScanKeywordList *keywords)
{
- int len,
- i;
- char word[NAMEDATALEN];
- const char *kw_string;
- const uint16 *kw_offsets;
- const uint16 *low;
- const uint16 *high;
-
- len = strlen(text);
+ size_t len;
+ int h;
+ const char *kw;
+ /*
+ * Reject immediately if too long to be any keyword. This saves useless
+ * hashing and downcasing work on long strings.
+ */
+ len = strlen(str);
if (len > keywords->max_kw_len)
- return -1; /* too long to be any keyword */
-
- /* We assume all keywords are shorter than NAMEDATALEN. */
- Assert(len < NAMEDATALEN);
+ return -1;
/*
- * Apply an ASCII-only downcasing. We must not use tolower() since it may
- * produce the wrong translation in some locales (eg, Turkish).
+ * Compute the hash function. We assume it was generated to produce
+ * case-insensitive results. Since it's a perfect hash, we need only
+ * match to the specific keyword it identifies.
*/
- for (i = 0; i < len; i++)
- {
- char ch = text[i];
+ h = keywords->hash(str, len);
- if (ch >= 'A' && ch <= 'Z')
- ch += 'a' - 'A';
- word[i] = ch;
- }
- word[len] = '\0';
+ /* An out-of-range result implies no match */
+ if (h < 0 || h >= keywords->num_keywords)
+ return -1;
/*
- * Now do a binary search using plain strcmp() comparison.
+ * Compare character-by-character to see if we have a match, applying an
+ * ASCII-only downcasing to the input characters. We must not use
+ * tolower() since it may produce the wrong translation in some locales
+ * (eg, Turkish).
*/
- kw_string = keywords->kw_string;
- kw_offsets = keywords->kw_offsets;
- low = kw_offsets;
- high = kw_offsets + (keywords->num_keywords - 1);
- while (low <= high)
+ kw = GetScanKeyword(h, keywords);
+ while (*str != '\0')
{
- const uint16 *middle;
- int difference;
+ char ch = *str++;
- middle = low + (high - low) / 2;
- difference = strcmp(kw_string + *middle, word);
- if (difference == 0)
- return middle - kw_offsets;
- else if (difference < 0)
- low = middle + 1;
- else
- high = middle - 1;
+ if (ch >= 'A' && ch <= 'Z')
+ ch += 'a' - 'A';
+ if (ch != *kw++)
+ return -1;
}
+ if (*kw != '\0')
+ return -1;
- return -1;
+ /* Success! */
+ return h;
}