Tweak the backend scanner (and psqlscan.l, which must track the backend

scanner anyway) to avoid having any backup states. According to the flex manual, this should speed things up, and indeed the backend scanner is about a third faster according to some quick profiling checks. I haven't tried to measure the speed change in psql, but it probably is similar.
author: Tom Lane 2005-05-26 01:24:29 +0000
committer: Tom Lane 2005-05-26 01:24:29 +0000
commit: 15e4d1e2a7f565d805692daad895a07802279aea (patch)
tree: 67ef5cf474bad2e9d9ce1bee91c6317339a760ed /src/bin
parent: 38af680ad51d98e895f1968c6cc9f808c88a7725 (diff)
1 files changed, 76 insertions, 19 deletions
diff --git a/src/bin/psql/psqlscan.l b/src/bin/psql/psqlscan.l
index 147d77872d8..88763d504bc 100644
--- a/src/bin/psql/psqlscan.l
+++ b/src/bin/psql/psqlscan.l
@@ -11,7 +11,9 @@
  * are (except for a few) the same as the backend's, but their actions are
  * just ECHO whereas the backend's actions generally do other things.
  *
- * XXX The rules in this file must be kept in sync with the main parser!!!
+ * XXX The rules in this file must be kept in sync with the backend lexer!!!
+ *
+ * XXX Avoid creating backtracking cases --- see the backend lexer for info.
  *
  * The most difficult aspect of this code is that we need to work in multibyte
  * encodings that are not ASCII-safe.  A "safe" encoding is one in which each
@@ -31,7 +33,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/bin/psql/psqlscan.l,v 1.9 2004/12/31 22:03:15 pgsql Exp $
+ *	  $PostgreSQL: pgsql/src/bin/psql/psqlscan.l,v 1.10 2005/05/26 01:24:29 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -207,6 +209,20 @@ special_whitespace		({space}+|{comment}{newline})
 horiz_whitespace		({horiz_space}|{comment})
 whitespace_with_newline	({horiz_whitespace}*{newline}{special_whitespace}*)
 
+/*
+ * To ensure that {quotecontinue} can be scanned without having to back up
+ * if the full pattern isn't matched, we include trailing whitespace in
+ * {quotestop}.  This matches all cases where {quotecontinue} fails to match,
+ * except for {quote} followed by whitespace and just one "-" (not two,
+ * which would start a {comment}).  To cover that we have {quotefail}.
+ * The actions for {quotestop} and {quotefail} must throw back characters
+ * beyond the quote proper.
+ */
+quote			'
+quotestop		{quote}{whitespace}*
+quotecontinue	{quote}{whitespace_with_newline}{quote}
+quotefail		{quote}{whitespace}*"-"
+
 /* Bit string
  * It is tempting to scan the string for only those characters
  * which are allowed. However, this leads to silently swallowed
@@ -217,16 +233,12 @@ whitespace_with_newline	({horiz_whitespace}*{newline}{special_whitespace}*)
  * validate the contents.
  */
 xbstart			[bB]{quote}
-xbstop			{quote}
 xbinside		[^']*
-xbcat			{quote}{whitespace_with_newline}{quote}
 
 /* Hexadecimal number
  */
 xhstart			[xX]{quote}
-xhstop			{quote}
 xhinside		[^']*
-xhcat			{quote}{whitespace_with_newline}{quote}
 
 /* National character
  */
@@ -234,26 +246,26 @@ xnstart			[nN]{quote}
 
 /* Extended quote
  * xqdouble implements embedded quote
- * xqcat allows strings to cross input lines
  */
-quote			'
 xqstart			{quote}
-xqstop			{quote}
 xqdouble		{quote}{quote}
 xqinside		[^\\']+
 xqescape		[\\][^0-7]
 xqoctesc		[\\][0-7]{1,3}
-xqcat			{quote}{whitespace_with_newline}{quote}
 
 /* $foo$ style quotes ("dollar quoting")
  * The quoted string starts with $foo$ where "foo" is an optional string
  * in the form of an identifier, except that it may not contain "$", 
  * and extends to the first occurrence of an identical string.  
  * There is *no* processing of the quoted text.
+ *
+ * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
+ * fails to match its trailing "$".
  */
 dolq_start		[A-Za-z\200-\377_]
 dolq_cont		[A-Za-z\200-\377_0-9]
 dolqdelim		\$({dolq_start}{dolq_cont}*)?\$
+dolqfailed		\${dolq_start}{dolq_cont}*
 dolqinside		[^$]+
 
 /* Double quote
@@ -311,12 +323,17 @@ operator		{op_chars}+
 
 /* we no longer allow unary minus in numbers. 
  * instead we pass it separately to parser. there it gets
- * coerced via doNegate() -- Leon aug 20 1999 
+ * coerced via doNegate() -- Leon aug 20 1999
+ *
+ * {realfail1} and {realfail2} are added to prevent the need for scanner
+ * backup when the {real} rule fails to match completely.
  */
 
 integer			{digit}+
 decimal			(({digit}*\.{digit}+)|({digit}+\.{digit}*))
-real			((({digit}*\.{digit}+)|({digit}+\.{digit}*)|({digit}+))([Ee][-+]?{digit}+))
+real			({integer}|{decimal})[Ee][-+]?{digit}+
+realfail1		({integer}|{decimal})[Ee]
+realfail2		({integer}|{decimal})[Ee][-+]
 
 param			\${integer}
 
@@ -383,11 +400,17 @@ other			.
 					ECHO;
 				}
 
+<xc>\*+			{
+					ECHO;
+				}
+
 {xbstart}		{
 					BEGIN(xb);
 					ECHO;
 				}
-<xb>{xbstop}	{
+<xb>{quotestop}	|
+<xb>{quotefail} {
+					yyless(1);
 					BEGIN(INITIAL);
 					ECHO;
 				}
@@ -395,8 +418,8 @@ other			.
 <xb>{xbinside}	{
 					ECHO;
 				}
-<xh>{xhcat}		|
-<xb>{xbcat}		{
+<xh>{quotecontinue}	|
+<xb>{quotecontinue}	{
 					ECHO;
 				}
 
@@ -410,13 +433,15 @@ other			.
 					BEGIN(xh);
 					ECHO;
 				}
-<xh>{xhstop}	{
+<xh>{quotestop}	|
+<xh>{quotefail} {
+					yyless(1);
 					BEGIN(INITIAL);
 					ECHO;
 				}
 
 {xnstart}		{
-					BEGIN(xq);
+					yyless(1);				/* eat only 'n' this time */
 					ECHO;
 				}
 
@@ -424,7 +449,9 @@ other			.
 					BEGIN(xq);
 					ECHO;
 				}
-<xq>{xqstop}	{
+<xq>{quotestop}	|
+<xq>{quotefail} {
+					yyless(1);
 					BEGIN(INITIAL);
 					ECHO;
 				}
@@ -440,7 +467,7 @@ other			.
 <xq>{xqoctesc}  {
 					ECHO;
 				}
-<xq>{xqcat}		{
+<xq>{quotecontinue} {
 					ECHO;
 				}
 <xq>.			{
@@ -453,6 +480,11 @@ other			.
 					BEGIN(xdolq);
 					ECHO;
 				}
+{dolqfailed}	{
+					/* throw back all but the initial "$" */
+					yyless(1);
+					ECHO;
+				}
 <xdolq>{dolqdelim} {
 					if (strcmp(yytext, cur_state->dolqstart) == 0)
 					{
@@ -474,6 +506,9 @@ other			.
 <xdolq>{dolqinside} {
 					ECHO;
 				}
+<xdolq>{dolqfailed} {
+					ECHO;
+				}
 <xdolq>.		{
 					/* This is only needed for $ inside the quoted text */
 					ECHO;
@@ -636,6 +671,21 @@ other			.
 {real}			{
 					ECHO;
 				}
+{realfail1}		{
+					/*
+					 * throw back the [Ee], and treat as {decimal}.  Note
+					 * that it is possible the input is actually {integer},
+					 * but since this case will almost certainly lead to a
+					 * syntax error anyway, we don't bother to distinguish.
+					 */
+					yyless(yyleng-1);
+					ECHO;
+				}
+{realfail2}		{
+					/* throw back the [Ee][+-], and proceed as above */
+					yyless(yyleng-2);
+					ECHO;
+				}
 
 
 {identifier}	{
@@ -817,6 +867,13 @@ other			.
 										  (char) strtol(yytext + 1, NULL, 0));
 				}
 
+"\\"0[xX]	{
+					/* failed hex case */
+					yyless(2);
+					appendPQExpBufferChar(output_buf,
+										  (char) strtol(yytext + 1, NULL, 0));
+				}
+
 "\\".			{ emit(yytext + 1, 1); }
 
 {other}|\n		{ ECHO; }
author	Tom Lane	2005-05-26 01:24:29 +0000
committer	Tom Lane	2005-05-26 01:24:29 +0000
commit	15e4d1e2a7f565d805692daad895a07802279aea (patch)
tree	67ef5cf474bad2e9d9ce1bee91c6317339a760ed /src/bin
parent	38af680ad51d98e895f1968c6cc9f808c88a7725 (diff)