diff options
author | Tom Lane | 2025-02-27 15:53:38 +0000 |
---|---|---|
committer | Tom Lane | 2025-02-27 15:53:38 +0000 |
commit | c8c74ad7e1cbc71b616f8ae786776c521729976b (patch) | |
tree | a6dbcc5abaa8aa43df77f923a12b5b1cc34463f8 /src/bin/pgbench | |
parent | e167191dc146b65146fbd32e147be30dd8f1f166 (diff) |
Get rid of O(N^2) script-parsing overhead in pgbench.
pgbench wants to record the starting line number of each command
in its scripts. It was computing that by scanning from the script
start and counting newlines, so that O(N^2) work had to be done
for an N-command script. In a script with 50K lines, this adds
up to about 10 seconds on my machine.
To add insult to injury, the results were subtly wrong, because
expr_scanner_offset() scanned to find the NUL that flex inserts
at the end of the current token --- and before the first yylex
call, no such NUL has been inserted. So we ended by computing the
script's last line number not its first one. This was visible only
in case of \gset at the start of a script, which perhaps accounts
for the lack of complaints.
To fix, steal an idea from plpgsql and track the current lexer
ending position and line count as we advance through the script.
(It's a bit simpler than plpgsql since we can't need to back up.)
Also adjust a couple of other places that were invoking scans
from script start when they didn't really need to. I made a new
psqlscan function psql_scan_get_location() that replaces both
expr_scanner_offset() and expr_scanner_get_lineno(), since in
practice expr_scanner_get_lineno() was only being invoked to find
the line number of the current lexer end position.
Reported-by: Daniel Vérité <daniel@manitou-mail.org>
Author: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/84a8a89e-adb8-47a9-9d34-c13f7150ee45@manitou-mail.org
Diffstat (limited to 'src/bin/pgbench')
-rw-r--r-- | src/bin/pgbench/exprscan.l | 65 | ||||
-rw-r--r-- | src/bin/pgbench/pgbench.c | 14 | ||||
-rw-r--r-- | src/bin/pgbench/pgbench.h | 4 |
3 files changed, 28 insertions, 55 deletions
diff --git a/src/bin/pgbench/exprscan.l b/src/bin/pgbench/exprscan.l index 8943a52e9f0..5747af38cb2 100644 --- a/src/bin/pgbench/exprscan.l +++ b/src/bin/pgbench/exprscan.l @@ -271,10 +271,14 @@ void expr_yyerror_more(yyscan_t yyscanner, const char *message, const char *more) { PsqlScanState state = yyget_extra(yyscanner); - int error_detection_offset = expr_scanner_offset(state) - 1; + int lineno; + int error_detection_offset; YYSTYPE lval; char *full_line; + psql_scan_get_location(state, &lineno, &error_detection_offset); + error_detection_offset--; + /* * While parsing an expression, we may not have collected the whole line * yet from the input source. Lex till EOL so we can report whole line. @@ -289,7 +293,6 @@ expr_yyerror_more(yyscan_t yyscanner, const char *message, const char *more) /* Extract the line, trimming trailing newline if any */ full_line = expr_scanner_get_substring(state, expr_start_offset, - expr_scanner_offset(state), true); syntax_error(expr_source, expr_lineno, full_line, expr_command, @@ -336,12 +339,15 @@ expr_lex_one_word(PsqlScanState state, PQExpBuffer word_buf, int *offset) /* And lex. */ lexresult = yylex(&lval, state->scanner); - /* - * Save start offset of word, if any. We could do this more efficiently, - * but for now this seems fine. - */ + /* Save start offset of word, if any. */ if (lexresult) - *offset = expr_scanner_offset(state) - word_buf->len; + { + int lineno; + int end_offset; + + psql_scan_get_location(state, &lineno, &end_offset); + *offset = end_offset - word_buf->len; + } else *offset = -1; @@ -404,35 +410,25 @@ expr_scanner_finish(yyscan_t yyscanner) } /* - * Get offset from start of string to end of current lexer token. + * Get a malloc'd copy of the lexer input string from start_offset + * to end of current lexer token. If chomp is true, drop any trailing + * newline(s). * * We rely on the knowledge that flex modifies the scan buffer by storing * a NUL at the end of the current token (yytext). Note that this might * not work quite right if we were parsing a sub-buffer, but since pgbench - * never invokes that functionality, it doesn't matter. - */ -int -expr_scanner_offset(PsqlScanState state) -{ - return strlen(state->scanbuf); -} - -/* - * Get a malloc'd copy of the lexer input string from start_offset - * to just before end_offset. If chomp is true, drop any trailing - * newline(s). + * never invokes that functionality, it doesn't matter. Also, this will + * give the wrong answer (the whole remainder of the input) if called + * before any yylex() call has been done. */ char * expr_scanner_get_substring(PsqlScanState state, - int start_offset, int end_offset, + int start_offset, bool chomp) { char *result; const char *scanptr = state->scanbuf + start_offset; - int slen = end_offset - start_offset; - - Assert(slen >= 0); - Assert(end_offset <= strlen(state->scanbuf)); + size_t slen = strlen(scanptr); if (chomp) { @@ -447,22 +443,3 @@ expr_scanner_get_substring(PsqlScanState state, return result; } - -/* - * Get the line number associated with the given string offset - * (which must not be past the end of where we've lexed to). - */ -int -expr_scanner_get_lineno(PsqlScanState state, int offset) -{ - int lineno = 1; - const char *p = state->scanbuf; - - while (*p && offset > 0) - { - if (*p == '\n') - lineno++; - p++, offset--; - } - return lineno; -} diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c index fdc957fa34d..38f8bc11bcd 100644 --- a/src/bin/pgbench/pgbench.c +++ b/src/bin/pgbench/pgbench.c @@ -5690,8 +5690,8 @@ process_backslash_command(PsqlScanState sstate, const char *source) initPQExpBuffer(&word_buf); /* Remember location of the backslash */ - start_offset = expr_scanner_offset(sstate) - 1; - lineno = expr_scanner_get_lineno(sstate, start_offset); + psql_scan_get_location(sstate, &lineno, &start_offset); + start_offset--; /* Collect first word of command */ if (!expr_lex_one_word(sstate, &word_buf, &word_offset)) @@ -5747,7 +5747,6 @@ process_backslash_command(PsqlScanState sstate, const char *source) my_command->first_line = expr_scanner_get_substring(sstate, start_offset, - expr_scanner_offset(sstate), true); expr_scanner_finish(yyscanner); @@ -5777,7 +5776,6 @@ process_backslash_command(PsqlScanState sstate, const char *source) my_command->first_line = expr_scanner_get_substring(sstate, start_offset, - expr_scanner_offset(sstate), true); if (my_command->meta == META_SLEEP) @@ -5952,8 +5950,6 @@ ParseScript(const char *script, const char *desc, int weight) PQExpBufferData line_buf; int alloc_num; int index; - int lineno; - int start_offset; #define COMMANDS_ALLOC_NUM 128 alloc_num = COMMANDS_ALLOC_NUM; @@ -5977,7 +5973,6 @@ ParseScript(const char *script, const char *desc, int weight) * stdstrings should be true, which is a bit riskier. */ psql_scan_setup(sstate, script, strlen(script), 0, true); - start_offset = expr_scanner_offset(sstate) - 1; initPQExpBuffer(&line_buf); @@ -5985,12 +5980,15 @@ ParseScript(const char *script, const char *desc, int weight) for (;;) { + int lineno; + int start_offset; PsqlScanResult sr; promptStatus_t prompt; Command *command = NULL; resetPQExpBuffer(&line_buf); - lineno = expr_scanner_get_lineno(sstate, start_offset); + + psql_scan_get_location(sstate, &lineno, &start_offset); sr = psql_scan(sstate, &line_buf, &prompt); diff --git a/src/bin/pgbench/pgbench.h b/src/bin/pgbench/pgbench.h index f6a883611c5..0ba216e5f72 100644 --- a/src/bin/pgbench/pgbench.h +++ b/src/bin/pgbench/pgbench.h @@ -149,11 +149,9 @@ extern yyscan_t expr_scanner_init(PsqlScanState state, const char *source, int lineno, int start_offset, const char *command); extern void expr_scanner_finish(yyscan_t yyscanner); -extern int expr_scanner_offset(PsqlScanState state); extern char *expr_scanner_get_substring(PsqlScanState state, - int start_offset, int end_offset, + int start_offset, bool chomp); -extern int expr_scanner_get_lineno(PsqlScanState state, int offset); extern void syntax_error(const char *source, int lineno, const char *line, const char *command, const char *msg, |