summaryrefslogtreecommitdiff
path: root/src/fe_utils
diff options
context:
space:
mode:
Diffstat (limited to 'src/fe_utils')
-rw-r--r--src/fe_utils/psqlscan.l124
1 files changed, 44 insertions, 80 deletions
diff --git a/src/fe_utils/psqlscan.l b/src/fe_utils/psqlscan.l
index 02cb356f343..08dffde1ba0 100644
--- a/src/fe_utils/psqlscan.l
+++ b/src/fe_utils/psqlscan.l
@@ -114,12 +114,11 @@ extern void psql_yyset_column(int column_no, yyscan_t yyscanner);
* <xd> delimited identifiers (double-quoted identifiers)
* <xh> hexadecimal numeric string
* <xq> standard quoted strings
+ * <xqs> quote stop (detect continued strings)
* <xe> extended quoted strings (support backslash escape sequences)
* <xdolq> $foo$ quoted strings
* <xui> quoted identifier with Unicode escapes
- * <xuiend> end of a quoted identifier with Unicode escapes, UESCAPE can follow
* <xus> quoted string with Unicode escapes
- * <xusend> end of a quoted string with Unicode escapes, UESCAPE can follow
*
* Note: we intentionally don't mimic the backend's <xeu> state; we have
* no need to distinguish it from <xe> state, and no good way to get out
@@ -132,12 +131,11 @@ extern void psql_yyset_column(int column_no, yyscan_t yyscanner);
%x xd
%x xh
%x xq
+%x xqs
%x xe
%x xdolq
%x xui
-%x xuiend
%x xus
-%x xusend
/*
* In order to make the world safe for Windows and Mac clients as well as
@@ -177,19 +175,18 @@ special_whitespace ({space}+|{comment}{newline})
horiz_whitespace ({horiz_space}|{comment})
whitespace_with_newline ({horiz_whitespace}*{newline}{special_whitespace}*)
+quote '
+/* If we see {quote} then {quotecontinue}, the quoted string continues */
+quotecontinue {whitespace_with_newline}{quote}
+
/*
- * To ensure that {quotecontinue} can be scanned without having to back up
- * if the full pattern isn't matched, we include trailing whitespace in
- * {quotestop}. This matches all cases where {quotecontinue} fails to match,
- * except for {quote} followed by whitespace and just one "-" (not two,
- * which would start a {comment}). To cover that we have {quotefail}.
- * The actions for {quotestop} and {quotefail} must throw back characters
- * beyond the quote proper.
+ * {quotecontinuefail} is needed to avoid lexer backup when we fail to match
+ * {quotecontinue}. It might seem that this could just be {whitespace}*,
+ * but if there's a dash after {whitespace_with_newline}, it must be consumed
+ * to see if there's another dash --- which would start a {comment} and thus
+ * allow continuation of the {quotecontinue} token.
*/
-quote '
-quotestop {quote}{whitespace}*
-quotecontinue {quote}{whitespace_with_newline}{quote}
-quotefail {quote}{whitespace}*"-"
+quotecontinuefail {whitespace}*"-"?
/* Bit string
* It is tempting to scan the string for only those characters
@@ -250,21 +247,12 @@ xdstop {dquote}
xddouble {dquote}{dquote}
xdinside [^"]+
-/* Unicode escapes */
-uescape [uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']{quote}
-/* error rule to avoid backup */
-uescapefail [uU][eE][sS][cC][aA][pP][eE]{whitespace}*"-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*|[uU][eE][sS][cC][aA][pP]|[uU][eE][sS][cC][aA]|[uU][eE][sS][cC]|[uU][eE][sS]|[uU][eE]|[uU]
-
/* Quoted identifier with Unicode escapes */
xuistart [uU]&{dquote}
/* Quoted string with Unicode escapes */
xusstart [uU]&{quote}
-/* Optional UESCAPE after a quoted string or identifier with Unicode escapes. */
-xustop1 {uescapefail}?
-xustop2 {uescape}
-
/* error rule to avoid backup */
xufailed [uU]&
@@ -438,20 +426,10 @@ other .
BEGIN(xb);
ECHO;
}
-<xb>{quotestop} |
-<xb>{quotefail} {
- yyless(1);
- BEGIN(INITIAL);
- ECHO;
- }
<xh>{xhinside} |
<xb>{xbinside} {
ECHO;
}
-<xh>{quotecontinue} |
-<xb>{quotecontinue} {
- ECHO;
- }
{xhstart} {
/* Hexadecimal bit type.
@@ -463,12 +441,6 @@ other .
BEGIN(xh);
ECHO;
}
-<xh>{quotestop} |
-<xh>{quotefail} {
- yyless(1);
- BEGIN(INITIAL);
- ECHO;
- }
{xnstart} {
yyless(1); /* eat only 'n' this time */
@@ -490,32 +462,41 @@ other .
BEGIN(xus);
ECHO;
}
-<xq,xe>{quotestop} |
-<xq,xe>{quotefail} {
- yyless(1);
- BEGIN(INITIAL);
- ECHO;
- }
-<xus>{quotestop} |
-<xus>{quotefail} {
- /* throw back all but the quote */
- yyless(1);
- BEGIN(xusend);
+
+<xb,xh,xq,xe,xus>{quote} {
+ /*
+ * When we are scanning a quoted string and see an end
+ * quote, we must look ahead for a possible continuation.
+ * If we don't see one, we know the end quote was in fact
+ * the end of the string. To reduce the lexer table size,
+ * we use a single "xqs" state to do the lookahead for all
+ * types of strings.
+ */
+ cur_state->state_before_str_stop = YYSTATE;
+ BEGIN(xqs);
ECHO;
}
-<xusend>{whitespace} {
+<xqs>{quotecontinue} {
+ /*
+ * Found a quote continuation, so return to the in-quote
+ * state and continue scanning the literal. Nothing is
+ * added to the literal's contents.
+ */
+ BEGIN(cur_state->state_before_str_stop);
ECHO;
}
-<xusend>{other} |
-<xusend>{xustop1} {
+<xqs>{quotecontinuefail} |
+<xqs>{other} {
+ /*
+ * Failed to see a quote continuation. Throw back
+ * everything after the end quote, and handle the string
+ * according to the state we were in previously.
+ */
yyless(0);
BEGIN(INITIAL);
- ECHO;
- }
-<xusend>{xustop2} {
- BEGIN(INITIAL);
- ECHO;
+ /* There's nothing to echo ... */
}
+
<xq,xe,xus>{xqdouble} {
ECHO;
}
@@ -540,9 +521,6 @@ other .
<xe>{xehexesc} {
ECHO;
}
-<xq,xe,xus>{quotecontinue} {
- ECHO;
- }
<xe>. {
/* This is only needed for \ just before EOF */
ECHO;
@@ -599,21 +577,7 @@ other .
BEGIN(INITIAL);
ECHO;
}
-<xui>{dquote} {
- yyless(1);
- BEGIN(xuiend);
- ECHO;
- }
-<xuiend>{whitespace} {
- ECHO;
- }
-<xuiend>{other} |
-<xuiend>{xustop1} {
- yyless(0);
- BEGIN(INITIAL);
- ECHO;
- }
-<xuiend>{xustop2} {
+<xui>{dquote} {
BEGIN(INITIAL);
ECHO;
}
@@ -1084,8 +1048,7 @@ psql_scan(PsqlScanState state,
switch (state->start_state)
{
case INITIAL:
- case xuiend: /* we treat these like INITIAL */
- case xusend:
+ case xqs: /* we treat this like INITIAL */
if (state->paren_depth > 0)
{
result = PSCAN_INCOMPLETE;
@@ -1240,7 +1203,8 @@ psql_scan_reselect_sql_lexer(PsqlScanState state)
bool
psql_scan_in_quote(PsqlScanState state)
{
- return state->start_state != INITIAL;
+ return state->start_state != INITIAL &&
+ state->start_state != xqs;
}
/*