/*------------------------------------------------------------------------- * * kwlookup.c * Key word lookup for PostgreSQL * * * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * src/common/kwlookup.c * *------------------------------------------------------------------------- */ #include "c.h" #include "common/kwlookup.h" /* * ScanKeywordLookup - see if a given word is a keyword * * The list of keywords to be matched against is passed as a ScanKeywordList. * * Returns the keyword number (0..N-1) of the keyword, or -1 if no match. * Callers typically use the keyword number to index into information * arrays, but that is no concern of this code. * * The match is done case-insensitively. Note that we deliberately use a * dumbed-down case conversion that will only translate 'A'-'Z' into 'a'-'z', * even if we are in a locale where tolower() would produce more or different * translations. This is to conform to the SQL99 spec, which says that * keywords are to be matched in this way even though non-keyword identifiers * receive a different case-normalization mapping. */ int ScanKeywordLookup(const char *text, const ScanKeywordList *keywords) { int len, i; char word[NAMEDATALEN]; const char *kw_string; const uint16 *kw_offsets; const uint16 *low; const uint16 *high; len = strlen(text); if (len > keywords->max_kw_len) return -1; /* too long to be any keyword */ /* We assume all keywords are shorter than NAMEDATALEN. */ Assert(len < NAMEDATALEN); /* * Apply an ASCII-only downcasing. We must not use tolower() since it may * produce the wrong translation in some locales (eg, Turkish). */ for (i = 0; i < len; i++) { char ch = text[i]; if (ch >= 'A' && ch <= 'Z') ch += 'a' - 'A'; word[i] = ch; } word[len] = '\0'; /* * Now do a binary search using plain strcmp() comparison. */ kw_string = keywords->kw_string; kw_offsets = keywords->kw_offsets; low = kw_offsets; high = kw_offsets + (keywords->num_keywords - 1); while (low <= high) { const uint16 *middle; int difference; middle = low + (high - low) / 2; difference = strcmp(kw_string + *middle, word); if (difference == 0) return middle - kw_offsets; else if (difference < 0) low = middle + 1; else high = middle - 1; } return -1; }