diff options
Diffstat (limited to 'src/fe_utils')
| -rw-r--r-- | src/fe_utils/psqlscan.l | 124 |
1 files changed, 44 insertions, 80 deletions
diff --git a/src/fe_utils/psqlscan.l b/src/fe_utils/psqlscan.l index 02cb356f343..08dffde1ba0 100644 --- a/src/fe_utils/psqlscan.l +++ b/src/fe_utils/psqlscan.l @@ -114,12 +114,11 @@ extern void psql_yyset_column(int column_no, yyscan_t yyscanner); * <xd> delimited identifiers (double-quoted identifiers) * <xh> hexadecimal numeric string * <xq> standard quoted strings + * <xqs> quote stop (detect continued strings) * <xe> extended quoted strings (support backslash escape sequences) * <xdolq> $foo$ quoted strings * <xui> quoted identifier with Unicode escapes - * <xuiend> end of a quoted identifier with Unicode escapes, UESCAPE can follow * <xus> quoted string with Unicode escapes - * <xusend> end of a quoted string with Unicode escapes, UESCAPE can follow * * Note: we intentionally don't mimic the backend's <xeu> state; we have * no need to distinguish it from <xe> state, and no good way to get out @@ -132,12 +131,11 @@ extern void psql_yyset_column(int column_no, yyscan_t yyscanner); %x xd %x xh %x xq +%x xqs %x xe %x xdolq %x xui -%x xuiend %x xus -%x xusend /* * In order to make the world safe for Windows and Mac clients as well as @@ -177,19 +175,18 @@ special_whitespace ({space}+|{comment}{newline}) horiz_whitespace ({horiz_space}|{comment}) whitespace_with_newline ({horiz_whitespace}*{newline}{special_whitespace}*) +quote ' +/* If we see {quote} then {quotecontinue}, the quoted string continues */ +quotecontinue {whitespace_with_newline}{quote} + /* - * To ensure that {quotecontinue} can be scanned without having to back up - * if the full pattern isn't matched, we include trailing whitespace in - * {quotestop}. This matches all cases where {quotecontinue} fails to match, - * except for {quote} followed by whitespace and just one "-" (not two, - * which would start a {comment}). To cover that we have {quotefail}. - * The actions for {quotestop} and {quotefail} must throw back characters - * beyond the quote proper. + * {quotecontinuefail} is needed to avoid lexer backup when we fail to match + * {quotecontinue}. It might seem that this could just be {whitespace}*, + * but if there's a dash after {whitespace_with_newline}, it must be consumed + * to see if there's another dash --- which would start a {comment} and thus + * allow continuation of the {quotecontinue} token. */ -quote ' -quotestop {quote}{whitespace}* -quotecontinue {quote}{whitespace_with_newline}{quote} -quotefail {quote}{whitespace}*"-" +quotecontinuefail {whitespace}*"-"? /* Bit string * It is tempting to scan the string for only those characters @@ -250,21 +247,12 @@ xdstop {dquote} xddouble {dquote}{dquote} xdinside [^"]+ -/* Unicode escapes */ -uescape [uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']{quote} -/* error rule to avoid backup */ -uescapefail [uU][eE][sS][cC][aA][pP][eE]{whitespace}*"-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*|[uU][eE][sS][cC][aA][pP]|[uU][eE][sS][cC][aA]|[uU][eE][sS][cC]|[uU][eE][sS]|[uU][eE]|[uU] - /* Quoted identifier with Unicode escapes */ xuistart [uU]&{dquote} /* Quoted string with Unicode escapes */ xusstart [uU]&{quote} -/* Optional UESCAPE after a quoted string or identifier with Unicode escapes. */ -xustop1 {uescapefail}? -xustop2 {uescape} - /* error rule to avoid backup */ xufailed [uU]& @@ -438,20 +426,10 @@ other . BEGIN(xb); ECHO; } -<xb>{quotestop} | -<xb>{quotefail} { - yyless(1); - BEGIN(INITIAL); - ECHO; - } <xh>{xhinside} | <xb>{xbinside} { ECHO; } -<xh>{quotecontinue} | -<xb>{quotecontinue} { - ECHO; - } {xhstart} { /* Hexadecimal bit type. @@ -463,12 +441,6 @@ other . BEGIN(xh); ECHO; } -<xh>{quotestop} | -<xh>{quotefail} { - yyless(1); - BEGIN(INITIAL); - ECHO; - } {xnstart} { yyless(1); /* eat only 'n' this time */ @@ -490,32 +462,41 @@ other . BEGIN(xus); ECHO; } -<xq,xe>{quotestop} | -<xq,xe>{quotefail} { - yyless(1); - BEGIN(INITIAL); - ECHO; - } -<xus>{quotestop} | -<xus>{quotefail} { - /* throw back all but the quote */ - yyless(1); - BEGIN(xusend); + +<xb,xh,xq,xe,xus>{quote} { + /* + * When we are scanning a quoted string and see an end + * quote, we must look ahead for a possible continuation. + * If we don't see one, we know the end quote was in fact + * the end of the string. To reduce the lexer table size, + * we use a single "xqs" state to do the lookahead for all + * types of strings. + */ + cur_state->state_before_str_stop = YYSTATE; + BEGIN(xqs); ECHO; } -<xusend>{whitespace} { +<xqs>{quotecontinue} { + /* + * Found a quote continuation, so return to the in-quote + * state and continue scanning the literal. Nothing is + * added to the literal's contents. + */ + BEGIN(cur_state->state_before_str_stop); ECHO; } -<xusend>{other} | -<xusend>{xustop1} { +<xqs>{quotecontinuefail} | +<xqs>{other} { + /* + * Failed to see a quote continuation. Throw back + * everything after the end quote, and handle the string + * according to the state we were in previously. + */ yyless(0); BEGIN(INITIAL); - ECHO; - } -<xusend>{xustop2} { - BEGIN(INITIAL); - ECHO; + /* There's nothing to echo ... */ } + <xq,xe,xus>{xqdouble} { ECHO; } @@ -540,9 +521,6 @@ other . <xe>{xehexesc} { ECHO; } -<xq,xe,xus>{quotecontinue} { - ECHO; - } <xe>. { /* This is only needed for \ just before EOF */ ECHO; @@ -599,21 +577,7 @@ other . BEGIN(INITIAL); ECHO; } -<xui>{dquote} { - yyless(1); - BEGIN(xuiend); - ECHO; - } -<xuiend>{whitespace} { - ECHO; - } -<xuiend>{other} | -<xuiend>{xustop1} { - yyless(0); - BEGIN(INITIAL); - ECHO; - } -<xuiend>{xustop2} { +<xui>{dquote} { BEGIN(INITIAL); ECHO; } @@ -1084,8 +1048,7 @@ psql_scan(PsqlScanState state, switch (state->start_state) { case INITIAL: - case xuiend: /* we treat these like INITIAL */ - case xusend: + case xqs: /* we treat this like INITIAL */ if (state->paren_depth > 0) { result = PSCAN_INCOMPLETE; @@ -1240,7 +1203,8 @@ psql_scan_reselect_sql_lexer(PsqlScanState state) bool psql_scan_in_quote(PsqlScanState state) { - return state->start_state != INITIAL; + return state->start_state != INITIAL && + state->start_state != xqs; } /* |
