These do the same thing as the standard isdigit(), isspace(), and
isprint() but with multibyte and encoding support. But all the
callers are only interested in analyzing single-byte ASCII characters.
So this extra layer is overkill and we can replace the uses with the
standard functions.
All the t_is*() functions in ts_locale.c are under scrutiny because
they don't use the common locale provider framework but instead use
the global libc locale settings. For the functions being touched by
this patch, we don't need all that anyway, as mentioned above, so the
simplest solution is to just remove them. The few remaining t_is*()
functions will need a different treatment in a separate patch.
pg_trgm has some compile-time options with macros such as
KEEPONLYALNUM. These are not documented, and the non-default variant
is not supported by any test cases. As part of this undertaking, I'm
removing the non-default variant, as it is in the way of cleanup. So
in this case, the not-KEEPONLYALNUM code path is gone.
Reviewed-by: Jeff Davis <pgsql@j-davis.com>
Discussion: https://www.postgresql.org/message-id/flat/
653f3b84-fc87-45a7-9a0c-
bfb4fcab3e7d%40eisentraut.org
char *start;
*end = NULL;
- while (*in && t_isspace(in))
+ while (*in && isspace((unsigned char) *in))
in += pg_mblen(in);
if (!*in || *in == '#')
return NULL;
start = in;
- while (*in && !t_isspace(in))
+ while (*in && !isspace((unsigned char) *in))
in += pg_mblen(in);
*end = in;
case LQPRS_WAITFNUM:
if (t_iseq(ptr, ','))
state = LQPRS_WAITSNUM;
- else if (t_isdigit(ptr))
+ else if (isdigit((unsigned char) *ptr))
{
int low = atoi(ptr);
UNCHAR;
break;
case LQPRS_WAITSNUM:
- if (t_isdigit(ptr))
+ if (isdigit((unsigned char) *ptr))
{
int high = atoi(ptr);
case LQPRS_WAITCLOSE:
if (t_iseq(ptr, '}'))
state = LQPRS_WAITEND;
- else if (!t_isdigit(ptr))
+ else if (!isdigit((unsigned char) *ptr))
UNCHAR;
break;
case LQPRS_WAITND:
}
else if (t_iseq(ptr, ','))
state = LQPRS_WAITSNUM;
- else if (!t_isdigit(ptr))
+ else if (!isdigit((unsigned char) *ptr))
UNCHAR;
break;
case LQPRS_WAITEND:
*lenval = charlen;
*flag = 0;
}
- else if (!t_isspace(state->buf))
+ else if (!isspace((unsigned char) *state->buf))
ereturn(state->escontext, ERR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("operand syntax error")));
*/
#define LPADDING 2
#define RPADDING 1
-#define KEEPONLYALNUM
/*
* Caution: IGNORECASE macro means that trigrams are case-insensitive.
* If this macro is disabled, the ~* and ~~* operators must be removed from
*(((char*)(a))+2) = *(((char*)(b))+2); \
} while(0)
-#ifdef KEEPONLYALNUM
#define ISWORDCHR(c) (t_isalnum(c))
#define ISPRINTABLECHAR(a) ( isascii( *(unsigned char*)(a) ) && (isalnum( *(unsigned char*)(a) ) || *(unsigned char*)(a)==' ') )
-#else
-#define ISWORDCHR(c) (!t_isspace(c))
-#define ISPRINTABLECHAR(a) ( isascii( *(unsigned char*)(a) ) && isprint( *(unsigned char*)(a) ) )
-#endif
#define ISPRINTABLETRGM(t) ( ISPRINTABLECHAR( ((char*)(t)) ) && ISPRINTABLECHAR( ((char*)(t))+1 ) && ISPRINTABLECHAR( ((char*)(t))+2 ) )
#define ISESCAPECHAR(x) (*(x) == '\\') /* Wildcard escape character */
{
ptrlen = pg_mblen(ptr);
/* ignore whitespace, but end src or trg */
- if (t_isspace(ptr))
+ if (isspace((unsigned char) *ptr))
{
if (state == 1)
state = 2;
char *lastchar;
/* Skip leading spaces */
- while (*in && t_isspace(in))
+ while (*in && isspace((unsigned char) *in))
in += pg_mblen(in);
/* Return NULL on empty lines */
lastchar = start = in;
/* Find end of word */
- while (*in && !t_isspace(in))
+ while (*in && !isspace((unsigned char) *in))
{
lastchar = in;
in += pg_mblen(in);
ptr = line;
/* is it a comment? */
- while (*ptr && t_isspace(ptr))
+ while (*ptr && isspace((unsigned char) *ptr))
ptr += pg_mblen(ptr);
if (t_iseq(ptr, '#') || *ptr == '\0' ||
errmsg("unexpected delimiter")));
state = TR_WAITSUBS;
}
- else if (!t_isspace(ptr))
+ else if (!isspace((unsigned char) *ptr))
{
beginwrd = ptr;
state = TR_INLEX;
newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
state = TR_WAITSUBS;
}
- else if (t_isspace(ptr))
+ else if (isspace((unsigned char) *ptr))
{
newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
state = TR_WAITLEX;
state = TR_INSUBS;
beginwrd = ptr + pg_mblen(ptr);
}
- else if (!t_isspace(ptr))
+ else if (!isspace((unsigned char) *ptr))
{
useasis = false;
beginwrd = ptr;
}
else if (state == TR_INSUBS)
{
- if (t_isspace(ptr))
+ if (isspace((unsigned char) *ptr))
{
if (ptr == beginwrd)
ereport(ERROR,
*sflagset = next;
while (**sflagset)
{
- if (t_isdigit(*sflagset))
+ if (isdigit((unsigned char) **sflagset))
{
if (!met_comma)
ereport(ERROR,
*sflagset)));
met_comma = true;
}
- else if (!t_isspace(*sflagset))
+ else if (!isspace((unsigned char) **sflagset))
{
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
while (*s)
{
/* we allow only single encoded flags for faster works */
- if (pg_mblen(s) == 1 && t_isprint(s) && !t_isspace(s))
+ if (pg_mblen(s) == 1 && isprint((unsigned char) *s) && !isspace((unsigned char) *s))
s++;
else
{
s = line;
while (*s)
{
- if (t_isspace(s))
+ if (isspace((unsigned char) *s))
{
*s = '\0';
break;
{
if (t_iseq(*str, '#'))
return false;
- else if (!t_isspace(*str))
+ else if (!isspace((unsigned char) **str))
{
int clen = pg_mblen(*str);
}
else /* state == PAE_INMASK */
{
- if (t_isspace(*str))
+ if (isspace((unsigned char) **str))
{
*next = '\0';
return true;
{
if (t_iseq(str, '#'))
return false;
- else if (!t_isspace(str))
+ else if (!isspace((unsigned char) *str))
{
COPYCHAR(pmask, str);
pmask += pg_mblen(str);
*pmask = '\0';
state = PAE_WAIT_FIND;
}
- else if (!t_isspace(str))
+ else if (!isspace((unsigned char) *str))
{
COPYCHAR(pmask, str);
pmask += pg_mblen(str);
prepl += pg_mblen(str);
state = PAE_INREPL;
}
- else if (!t_isspace(str))
+ else if (!isspace((unsigned char) *str))
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("syntax error")));
COPYCHAR(pfind, str);
pfind += pg_mblen(str);
}
- else if (!t_isspace(str))
+ else if (!isspace((unsigned char) *str))
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("syntax error")));
prepl += pg_mblen(str);
state = PAE_INREPL;
}
- else if (!t_isspace(str))
+ else if (!isspace((unsigned char) *str))
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("syntax error")));
COPYCHAR(prepl, str);
prepl += pg_mblen(str);
}
- else if (!t_isspace(str))
+ else if (!isspace((unsigned char) *str))
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("syntax error")));
char *sflag;
int clen;
- while (*s && t_isspace(s))
+ while (*s && isspace((unsigned char) *s))
s += pg_mblen(s);
if (!*s)
/* Get flag without \n */
sflag = sbuf;
- while (*s && !t_isspace(s) && *s != '\n')
+ while (*s && !isspace((unsigned char) *s) && *s != '\n')
{
clen = pg_mblen(s);
COPYCHAR(sflag, s);
while ((recoded = tsearch_readline(&trst)) != NULL)
{
- if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#'))
+ if (*recoded == '\0' || isspace((unsigned char) *recoded) || t_iseq(recoded, '#'))
{
pfree(recoded);
continue;
{
char *s = recoded + strlen("FLAG");
- while (*s && t_isspace(s))
+ while (*s && isspace((unsigned char) *s))
s += pg_mblen(s);
if (*s)
{
int fields_read;
- if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#'))
+ if (*recoded == '\0' || isspace((unsigned char) *recoded) || t_iseq(recoded, '#'))
goto nextline;
fields_read = parse_ooaffentry(recoded, type, sflag, find, repl, mask);
s = findchar2(recoded, 'l', 'L');
if (s)
{
- while (*s && !t_isspace(s))
+ while (*s && !isspace((unsigned char) *s))
s += pg_mblen(s);
- while (*s && t_isspace(s))
+ while (*s && isspace((unsigned char) *s))
s += pg_mblen(s);
if (*s && pg_mblen(s) == 1)
s = recoded + 4; /* we need non-lowercased string */
flagflags = 0;
- while (*s && t_isspace(s))
+ while (*s && isspace((unsigned char) *s))
s += pg_mblen(s);
if (*s == '*')
s++;
if (*s == '\0' || *s == '#' || *s == '\n' || *s == ':' ||
- t_isspace(s))
+ isspace((unsigned char) *s))
{
oldformat = true;
goto nextline;
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("invalid affix alias \"%s\"",
Conf->Spell[i]->p.flag)));
- if (*end != '\0' && !t_isdigit(end) && !t_isspace(end))
+ if (*end != '\0' && !isdigit((unsigned char) *end) && !isspace((unsigned char) *end))
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("invalid affix alias \"%s\"",
*/
#define WC_BUF_LEN 3
-int
-t_isdigit(const char *ptr)
-{
- int clen = pg_mblen(ptr);
- wchar_t character[WC_BUF_LEN];
- pg_locale_t mylocale = 0; /* TODO */
-
- if (clen == 1 || database_ctype_is_c)
- return isdigit(TOUCHAR(ptr));
-
- char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
-
- return iswdigit((wint_t) character[0]);
-}
-
-int
-t_isspace(const char *ptr)
-{
- int clen = pg_mblen(ptr);
- wchar_t character[WC_BUF_LEN];
- pg_locale_t mylocale = 0; /* TODO */
-
- if (clen == 1 || database_ctype_is_c)
- return isspace(TOUCHAR(ptr));
-
- char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
-
- return iswspace((wint_t) character[0]);
-}
-
int
t_isalpha(const char *ptr)
{
return iswalnum((wint_t) character[0]);
}
-int
-t_isprint(const char *ptr)
-{
- int clen = pg_mblen(ptr);
- wchar_t character[WC_BUF_LEN];
- pg_locale_t mylocale = 0; /* TODO */
-
- if (clen == 1 || database_ctype_is_c)
- return isprint(TOUCHAR(ptr));
-
- char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
-
- return iswprint((wint_t) character[0]);
-}
-
/*
* Set up to read a file using tsearch_readline(). This facility is
char *pbuf = line;
/* Trim trailing space */
- while (*pbuf && !t_isspace(pbuf))
+ while (*pbuf && !isspace((unsigned char) *pbuf))
pbuf += pg_mblen(pbuf);
*pbuf = '\0';
continue;
}
- if (!t_isdigit(ptr))
+ if (!isdigit((unsigned char) *ptr))
return false;
errno = 0;
* So we still treat OR literal as operation with possibly incorrect
* operand and will not search it as lexeme
*/
- if (!t_isspace(ptr))
+ if (!isspace((unsigned char) *ptr))
break;
}
/* generic syntax error message is fine */
return PT_ERR;
}
- else if (!t_isspace(state->buf))
+ else if (!isspace((unsigned char) *state->buf))
{
/*
* We rely on the tsvector parser to parse the value for
{
return (state->count) ? PT_ERR : PT_END;
}
- else if (!t_isspace(state->buf))
+ else if (!isspace((unsigned char) *state->buf))
{
return PT_ERR;
}
state->state = WAITOPERAND;
continue;
}
- else if (!t_isspace(state->buf))
+ else if (!isspace((unsigned char) *state->buf))
{
/*
* We rely on the tsvector parser to parse the value for
state->buf++;
continue;
}
- else if (!t_isspace(state->buf))
+ else if (!isspace((unsigned char) *state->buf))
{
/* insert implicit AND between operands */
state->state = WAITOPERAND;
else if ((state->oprisdelim && ISOPERATOR(state->prsbuf)) ||
(state->is_web && t_iseq(state->prsbuf, '"')))
PRSSYNTAXERROR;
- else if (!t_isspace(state->prsbuf))
+ else if (!isspace((unsigned char) *state->prsbuf))
{
COPYCHAR(curpos, state->prsbuf);
curpos += pg_mblen(state->prsbuf);
statecode = WAITNEXTCHAR;
oldstate = WAITENDWORD;
}
- else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' ||
+ else if (isspace((unsigned char) *state->prsbuf) || *(state->prsbuf) == '\0' ||
(state->oprisdelim && ISOPERATOR(state->prsbuf)) ||
(state->is_web && t_iseq(state->prsbuf, '"')))
{
}
else if (statecode == INPOSINFO)
{
- if (t_isdigit(state->prsbuf))
+ if (isdigit((unsigned char) *state->prsbuf))
{
if (posalen == 0)
{
PRSSYNTAXERROR;
WEP_SETWEIGHT(pos[npos - 1], 0);
}
- else if (t_isspace(state->prsbuf) ||
+ else if (isspace((unsigned char) *state->prsbuf) ||
*(state->prsbuf) == '\0')
RETURN_TOKEN;
- else if (!t_isdigit(state->prsbuf))
+ else if (!isdigit((unsigned char) *state->prsbuf))
PRSSYNTAXERROR;
}
else /* internal error */
#define COPYCHAR(d,s) memcpy(d, s, pg_mblen(s))
-extern int t_isdigit(const char *ptr);
-extern int t_isspace(const char *ptr);
extern int t_isalpha(const char *ptr);
extern int t_isalnum(const char *ptr);
-extern int t_isprint(const char *ptr);
extern char *lowerstr(const char *str);
extern char *lowerstr_with_len(const char *str, int len);