pgbench: Add a real expression syntax to \set
authorRobert Haas <rhaas@postgresql.org>
Mon, 2 Mar 2015 19:21:41 +0000 (14:21 -0500)
committerRobert Haas <rhaas@postgresql.org>
Mon, 2 Mar 2015 19:21:41 +0000 (14:21 -0500)
Previously, you could do \set variable operand1 operator operand2, but
nothing more complicated.  Now, you can \set variable expression, which
makes it much simpler to do multi-step calculations here.  This also
adds support for the modulo operator (%), with the same semantics as in
C.

Robert Haas and Fabien Coelho, reviewed by Álvaro Herrera and
Stephen Frost

contrib/pgbench/.gitignore
contrib/pgbench/Makefile
contrib/pgbench/exprparse.y [new file with mode: 0644]
contrib/pgbench/exprscan.l [new file with mode: 0644]
contrib/pgbench/pgbench.c
contrib/pgbench/pgbench.h [new file with mode: 0644]
doc/src/sgml/pgbench.sgml
src/tools/msvc/Mkvcbuild.pm

index 489a2d62d0685e2ba31ccf2be7ff3d277d87350a..aae819ed70f9f4e7d8cdcd26ef0094b7d8d883e9 100644 (file)
@@ -1 +1,3 @@
+/exprparse.c
+/exprscan.c
 /pgbench
index b8e2fc841ec38b252e0a65cc41a55207ce5284c2..6d132228dd7848c350ef008825d33eed50bb16d9 100644 (file)
@@ -4,7 +4,9 @@ PGFILEDESC = "pgbench - a simple program for running benchmark tests"
 PGAPPICON = win32
 
 PROGRAM = pgbench
-OBJS   = pgbench.o $(WIN32RES)
+OBJS   = pgbench.o exprparse.o $(WIN32RES)
+
+EXTRA_CLEAN    = exprparse.c exprscan.c
 
 PG_CPPFLAGS = -I$(libpq_srcdir)
 PG_LIBS = $(libpq_pgport) $(PTHREAD_LIBS)
@@ -18,8 +20,21 @@ subdir = contrib/pgbench
 top_builddir = ../..
 include $(top_builddir)/src/Makefile.global
 include $(top_srcdir)/contrib/contrib-global.mk
+
+distprep: exprparse.c exprscan.c
 endif
 
 ifneq ($(PORTNAME), win32)
 override CFLAGS += $(PTHREAD_CFLAGS)
 endif
+
+# There is no correct way to write a rule that generates two files.
+# Rules with two targets don't have that meaning, they are merely
+# shorthand for two otherwise separate rules.  To be safe for parallel
+# make, we must chain the dependencies like this.  The semicolon is
+# important; otherwise, make will choose the built-in rule.
+
+exprparse.h: exprparse.c ;
+
+# exprscan is compiled as part of exprparse
+exprparse.o: exprscan.c
diff --git a/contrib/pgbench/exprparse.y b/contrib/pgbench/exprparse.y
new file mode 100644 (file)
index 0000000..243c6b9
--- /dev/null
@@ -0,0 +1,96 @@
+%{
+/*-------------------------------------------------------------------------
+ *
+ * exprparse.y
+ *       bison grammar for a simple expression syntax
+ *
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres_fe.h"
+
+#include "pgbench.h"
+
+PgBenchExpr *expr_parse_result;
+
+static PgBenchExpr *make_integer_constant(int64 ival);
+static PgBenchExpr *make_variable(char *varname);
+static PgBenchExpr *make_op(char operator, PgBenchExpr *lexpr,
+               PgBenchExpr *rexpr);
+
+%}
+
+%expect 0
+%name-prefix="expr_yy"
+
+%union
+{
+       int64           ival;
+       char       *str;
+       PgBenchExpr *expr;
+}
+
+%type <expr> expr
+%type <ival> INTEGER
+%type <str> VARIABLE
+%token INTEGER VARIABLE
+%token CHAR_ERROR /* never used, will raise a syntax error */
+
+%left  '+' '-'
+%left  '*' '/' '%'
+%right UMINUS
+
+%%
+
+result: expr                           { expr_parse_result = $1; }
+
+expr: '(' expr ')'                     { $$ = $2; }
+       | '+' expr %prec UMINUS { $$ = $2; }
+       | '-' expr %prec UMINUS { $$ = make_op('-', make_integer_constant(0), $2); }
+       | expr '+' expr                 { $$ = make_op('+', $1, $3); }
+       | expr '-' expr                 { $$ = make_op('-', $1, $3); }
+       | expr '*' expr                 { $$ = make_op('*', $1, $3); }
+       | expr '/' expr                 { $$ = make_op('/', $1, $3); }
+       | expr '%' expr                 { $$ = make_op('%', $1, $3); }
+       | INTEGER                               { $$ = make_integer_constant($1); }
+       | VARIABLE                              { $$ = make_variable($1); }
+       ;
+
+%%
+
+static PgBenchExpr *
+make_integer_constant(int64 ival)
+{
+       PgBenchExpr *expr = pg_malloc(sizeof(PgBenchExpr));
+
+       expr->etype = ENODE_INTEGER_CONSTANT;
+       expr->u.integer_constant.ival = ival;
+       return expr;
+}
+
+static PgBenchExpr *
+make_variable(char *varname)
+{
+       PgBenchExpr *expr = pg_malloc(sizeof(PgBenchExpr));
+
+       expr->etype = ENODE_VARIABLE;
+       expr->u.variable.varname = varname;
+       return expr;
+}
+
+static PgBenchExpr *
+make_op(char operator, PgBenchExpr *lexpr, PgBenchExpr *rexpr)
+{
+       PgBenchExpr *expr = pg_malloc(sizeof(PgBenchExpr));
+
+       expr->etype = ENODE_OPERATOR;
+       expr->u.operator.operator = operator;
+       expr->u.operator.lexpr = lexpr;
+       expr->u.operator.rexpr = rexpr;
+       return expr;
+}
+
+#include "exprscan.c"
diff --git a/contrib/pgbench/exprscan.l b/contrib/pgbench/exprscan.l
new file mode 100644 (file)
index 0000000..4c9229c
--- /dev/null
@@ -0,0 +1,105 @@
+%{
+/*-------------------------------------------------------------------------
+ *
+ * exprscan.l
+ *       a lexical scanner for a simple expression syntax
+ *
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* line and column number for error reporting */
+static int     yyline = 0, yycol = 0;
+
+/* Handles to the buffer that the lexer uses internally */
+static YY_BUFFER_STATE scanbufhandle;
+static char *scanbuf;
+static int     scanbuflen;
+
+/* flex 2.5.4 doesn't bother with a decl for this */
+int expr_yylex(void);
+
+%}
+
+%option 8bit
+%option never-interactive
+%option nodefault
+%option noinput
+%option nounput
+%option noyywrap
+%option warn
+%option prefix="expr_yy"
+
+non_newline            [^\n\r]
+space                  [ \t\r\f]
+
+%%
+
+"+"                            { yycol += yyleng; return '+'; }
+"-"                            { yycol += yyleng; return '-'; }
+"*"                            { yycol += yyleng; return '*'; }
+"/"                            { yycol += yyleng; return '/'; }
+"%"                            { yycol += yyleng; return '%'; }
+"("                            { yycol += yyleng; return '('; }
+")"                            { yycol += yyleng; return ')'; }
+:[a-zA-Z0-9_]+ { yycol += yyleng; yylval.str = pg_strdup(yytext + 1); return VARIABLE; }
+[0-9]+                 { yycol += yyleng; yylval.ival = strtoint64(yytext); return INTEGER; }
+
+[\n]                   { yycol = 0; yyline++; }
+{space}                        { yycol += yyleng; /* ignore */ }
+
+.                              {
+                                       yycol += yyleng;
+                                       fprintf(stderr, "unexpected character '%s'\n", yytext);
+                                       return CHAR_ERROR;
+                               }
+%%
+
+void
+yyerror(const char *message)
+{
+       /* yyline is always 1 as pgbench calls the parser for each line...
+        * so the interesting location information is the column number */
+       fprintf(stderr, "%s at column %d\n", message, yycol);
+       /* go on to raise the error from pgbench with more information */
+       /* exit(1); */
+}
+
+/*
+ * Called before any actual parsing is done
+ */
+void
+expr_scanner_init(const char *str)
+{
+       Size    slen = strlen(str);
+
+       /*
+        * Might be left over after error
+        */
+       if (YY_CURRENT_BUFFER)
+               yy_delete_buffer(YY_CURRENT_BUFFER);
+
+       /*
+        * Make a scan buffer with special termination needed by flex.
+        */
+       scanbuflen = slen;
+       scanbuf = pg_malloc(slen + 2);
+       memcpy(scanbuf, str, slen);
+       scanbuf[slen] = scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR;
+       scanbufhandle = yy_scan_buffer(scanbuf, slen + 2);
+
+       BEGIN(INITIAL);
+}
+
+
+/*
+ * Called after parsing is done to clean up after seg_scanner_init()
+ */
+void
+expr_scanner_finish(void)
+{
+       yy_delete_buffer(scanbufhandle);
+       pg_free(scanbuf);
+}
index ddd11a09c5b1762b59a91f2c2b5be54dc9a903ed..706fdf5b197bb328ae829dc3639409ac2e42e0b9 100644 (file)
@@ -57,6 +57,8 @@
 #define M_PI 3.14159265358979323846
 #endif
 
+#include "pgbench.h"
+
 /*
  * Multi-platform pthread implementations
  */
@@ -289,6 +291,7 @@ typedef struct
        int                     type;                   /* command type (SQL_COMMAND or META_COMMAND) */
        int                     argc;                   /* number of command words */
        char       *argv[MAX_ARGS]; /* command word list */
+       PgBenchExpr *expr;                      /* parsed expression */
 } Command;
 
 typedef struct
@@ -423,7 +426,7 @@ usage(void)
  * This function is a modified version of scanint8() from
  * src/backend/utils/adt/int8.c.
  */
-static int64
+int64
 strtoint64(const char *str)
 {
        const char *ptr = str;
@@ -879,6 +882,91 @@ getQueryParams(CState *st, const Command *command, const char **params)
                params[i] = getVariable(st, command->argv[i + 1]);
 }
 
+/*
+ * Recursive evaluation of an expression in a pgbench script
+ * using the current state of variables.
+ * Returns whether the evaluation was ok,
+ * the value itself is returned through the retval pointer.
+ */
+static bool
+evaluateExpr(CState *st, PgBenchExpr *expr, int64 *retval)
+{
+       switch (expr->etype)
+       {
+               case ENODE_INTEGER_CONSTANT:
+                       {
+                               *retval = expr->u.integer_constant.ival;
+                               return true;
+                       }
+
+               case ENODE_VARIABLE:
+                       {
+                               char       *var;
+
+                               if ((var = getVariable(st, expr->u.variable.varname)) == NULL)
+                               {
+                                       fprintf(stderr, "undefined variable %s\n",
+                                               expr->u.variable.varname);
+                                       return false;
+                               }
+                               *retval = strtoint64(var);
+                               return true;
+                       }
+
+               case ENODE_OPERATOR:
+                       {
+                               int64   lval;
+                               int64   rval;
+
+                               if (!evaluateExpr(st, expr->u.operator.lexpr, &lval))
+                                       return false;
+                               if (!evaluateExpr(st, expr->u.operator.rexpr, &rval))
+                                       return false;
+                               switch (expr->u.operator.operator)
+                               {
+                                       case '+':
+                                               *retval = lval + rval;
+                                               return true;
+
+                                       case '-':
+                                               *retval = lval - rval;
+                                               return true;
+
+                                       case '*':
+                                               *retval = lval * rval;
+                                               return true;
+
+                                       case '/':
+                                               if (rval == 0)
+                                               {
+                                                       fprintf(stderr, "division by zero\n");
+                                                       return false;
+                                               }
+                                               *retval = lval / rval;
+                                               return true;
+
+                                       case '%':
+                                               if (rval == 0)
+                                               {
+                                                       fprintf(stderr, "division by zero\n");
+                                                       return false;
+                                               }
+                                               *retval = lval % rval;
+                                               return true;
+                               }
+
+                               fprintf(stderr, "bad operator\n");
+                               return false;
+                       }
+
+               default:
+                       break;
+       }
+
+       fprintf(stderr, "bad expression\n");
+       return false;
+}
+
 /*
  * Run a shell command. The result is assigned to the variable if not NULL.
  * Return true if succeeded, or false on error.
@@ -1515,64 +1603,16 @@ top:
                }
                else if (pg_strcasecmp(argv[0], "set") == 0)
                {
-                       char       *var;
-                       int64           ope1,
-                                               ope2;
                        char            res[64];
+                       PgBenchExpr *expr = commands[st->state]->expr;
+                       int64           result;
 
-                       if (*argv[2] == ':')
+                       if (!evaluateExpr(st, expr, &result))
                        {
-                               if ((var = getVariable(st, argv[2] + 1)) == NULL)
-                               {
-                                       fprintf(stderr, "%s: undefined variable %s\n", argv[0], argv[2]);
-                                       st->ecnt++;
-                                       return true;
-                               }
-                               ope1 = strtoint64(var);
-                       }
-                       else
-                               ope1 = strtoint64(argv[2]);
-
-                       if (argc < 5)
-                               snprintf(res, sizeof(res), INT64_FORMAT, ope1);
-                       else
-                       {
-                               if (*argv[4] == ':')
-                               {
-                                       if ((var = getVariable(st, argv[4] + 1)) == NULL)
-                                       {
-                                               fprintf(stderr, "%s: undefined variable %s\n", argv[0], argv[4]);
-                                               st->ecnt++;
-                                               return true;
-                                       }
-                                       ope2 = strtoint64(var);
-                               }
-                               else
-                                       ope2 = strtoint64(argv[4]);
-
-                               if (strcmp(argv[3], "+") == 0)
-                                       snprintf(res, sizeof(res), INT64_FORMAT, ope1 + ope2);
-                               else if (strcmp(argv[3], "-") == 0)
-                                       snprintf(res, sizeof(res), INT64_FORMAT, ope1 - ope2);
-                               else if (strcmp(argv[3], "*") == 0)
-                                       snprintf(res, sizeof(res), INT64_FORMAT, ope1 * ope2);
-                               else if (strcmp(argv[3], "/") == 0)
-                               {
-                                       if (ope2 == 0)
-                                       {
-                                               fprintf(stderr, "%s: division by zero\n", argv[0]);
-                                               st->ecnt++;
-                                               return true;
-                                       }
-                                       snprintf(res, sizeof(res), INT64_FORMAT, ope1 / ope2);
-                               }
-                               else
-                               {
-                                       fprintf(stderr, "%s: unsupported operator %s\n", argv[0], argv[3]);
-                                       st->ecnt++;
-                                       return true;
-                               }
+                               st->ecnt++;
+                               return true;
                        }
+                       sprintf(res, INT64_FORMAT, result);
 
                        if (!putVariable(st, argv[0], argv[1], res))
                        {
@@ -2151,7 +2191,7 @@ parseQuery(Command *cmd, const char *raw_sql)
 
 /* Parse a command; return a Command struct, or NULL if it's a comment */
 static Command *
-process_commands(char *buf)
+process_commands(char *buf, const char *source, const int lineno)
 {
        const char      delim[] = " \f\n\r\t\v";
 
@@ -2182,16 +2222,23 @@ process_commands(char *buf)
 
        if (*p == '\\')
        {
+               int             max_args = -1;
                my_commands->type = META_COMMAND;
 
                j = 0;
                tok = strtok(++p, delim);
 
+               if (tok != NULL && pg_strcasecmp(tok, "set") == 0)
+                       max_args = 2;
+
                while (tok != NULL)
                {
                        my_commands->argv[j++] = pg_strdup(tok);
                        my_commands->argc++;
-                       tok = strtok(NULL, delim);
+                       if (max_args >= 0 && my_commands->argc >= max_args)
+                               tok = strtok(NULL, "");
+                       else
+                               tok = strtok(NULL, delim);
                }
 
                if (pg_strcasecmp(my_commands->argv[0], "setrandom") == 0)
@@ -2250,9 +2297,17 @@ process_commands(char *buf)
                                exit(1);
                        }
 
-                       for (j = my_commands->argc < 5 ? 3 : 5; j < my_commands->argc; j++)
-                               fprintf(stderr, "%s: extra argument \"%s\" ignored\n",
-                                               my_commands->argv[0], my_commands->argv[j]);
+                       expr_scanner_init(my_commands->argv[2]);
+
+                       if (expr_yyparse() != 0)
+                       {
+                               fprintf(stderr, "%s: parse error\n", my_commands->argv[0]);
+                               exit(1);
+                       }
+
+                       my_commands->expr = expr_parse_result;
+
+                       expr_scanner_finish();
                }
                else if (pg_strcasecmp(my_commands->argv[0], "sleep") == 0)
                {
@@ -2393,7 +2448,7 @@ process_file(char *filename)
 
        Command   **my_commands;
        FILE       *fd;
-       int                     lineno;
+       int                     lineno, index;
        char       *buf;
        int                     alloc_num;
 
@@ -2416,22 +2471,24 @@ process_file(char *filename)
        }
 
        lineno = 0;
+       index = 0;
 
        while ((buf = read_line_from_file(fd)) != NULL)
        {
                Command    *command;
+               lineno += 1;
 
-               command = process_commands(buf);
+               command = process_commands(buf, filename, lineno);
 
                free(buf);
 
                if (command == NULL)
                        continue;
 
-               my_commands[lineno] = command;
-               lineno++;
+               my_commands[index] = command;
+               index++;
 
-               if (lineno >= alloc_num)
+               if (index >= alloc_num)
                {
                        alloc_num += COMMANDS_ALLOC_NUM;
                        my_commands = pg_realloc(my_commands, sizeof(Command *) * alloc_num);
@@ -2439,7 +2496,7 @@ process_file(char *filename)
        }
        fclose(fd);
 
-       my_commands[lineno] = NULL;
+       my_commands[index] = NULL;
 
        sql_files[num_files++] = my_commands;
 
@@ -2447,12 +2504,12 @@ process_file(char *filename)
 }
 
 static Command **
-process_builtin(char *tb)
+process_builtin(char *tb, const char *source)
 {
 #define COMMANDS_ALLOC_NUM 128
 
        Command   **my_commands;
-       int                     lineno;
+       int                     lineno, index;
        char            buf[BUFSIZ];
        int                     alloc_num;
 
@@ -2460,6 +2517,7 @@ process_builtin(char *tb)
        my_commands = (Command **) pg_malloc(sizeof(Command *) * alloc_num);
 
        lineno = 0;
+       index = 0;
 
        for (;;)
        {
@@ -2478,21 +2536,23 @@ process_builtin(char *tb)
 
                *p = '\0';
 
-               command = process_commands(buf);
+               lineno += 1;
+
+               command = process_commands(buf, source, lineno);
                if (command == NULL)
                        continue;
 
-               my_commands[lineno] = command;
-               lineno++;
+               my_commands[index] = command;
+               index++;
 
-               if (lineno >= alloc_num)
+               if (index >= alloc_num)
                {
                        alloc_num += COMMANDS_ALLOC_NUM;
                        my_commands = pg_realloc(my_commands, sizeof(Command *) * alloc_num);
                }
        }
 
-       my_commands[lineno] = NULL;
+       my_commands[index] = NULL;
 
        return my_commands;
 }
@@ -3222,17 +3282,20 @@ main(int argc, char **argv)
        switch (ttype)
        {
                case 0:
-                       sql_files[0] = process_builtin(tpc_b);
+                       sql_files[0] = process_builtin(tpc_b,
+                                                                                  "<builtin: TPC-B (sort of)>");
                        num_files = 1;
                        break;
 
                case 1:
-                       sql_files[0] = process_builtin(select_only);
+                       sql_files[0] = process_builtin(select_only,
+                                                                                  "<builtin: select only>");
                        num_files = 1;
                        break;
 
                case 2:
-                       sql_files[0] = process_builtin(simple_update);
+                       sql_files[0] = process_builtin(simple_update,
+                                                                                  "<builtin: simple update>");
                        num_files = 1;
                        break;
 
diff --git a/contrib/pgbench/pgbench.h b/contrib/pgbench/pgbench.h
new file mode 100644 (file)
index 0000000..128bf11
--- /dev/null
@@ -0,0 +1,56 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgbench.h
+ *
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef PGBENCH_H
+#define PGBENCH_H
+
+typedef enum PgBenchExprType
+{
+       ENODE_INTEGER_CONSTANT,
+       ENODE_VARIABLE,
+       ENODE_OPERATOR
+} PgBenchExprType;
+
+struct PgBenchExpr;
+typedef struct PgBenchExpr PgBenchExpr;
+
+struct PgBenchExpr
+{
+       PgBenchExprType etype;
+       union
+       {
+               struct
+               {
+                       int64 ival;
+               } integer_constant;
+               struct
+               {
+                       char *varname;
+               } variable;
+               struct
+               {
+                       char operator;
+                       PgBenchExpr     *lexpr;
+                       PgBenchExpr *rexpr;
+               } operator;
+       } u;
+};
+
+extern PgBenchExpr *expr_parse_result;
+
+extern int      expr_yyparse(void);
+extern int      expr_yylex(void);
+extern void expr_yyerror(const char *str);
+extern void expr_scanner_init(const char *str);
+extern void expr_scanner_finish(void);
+
+extern int64 strtoint64(const char *str);
+
+#endif
index 7d203cda84c7740197d97510dbaa81700808e737..16b82a3bbe04337858b7df7020956091cc93c3a1 100644 (file)
@@ -751,22 +751,25 @@ pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</>
   <variablelist>
    <varlistentry>
     <term>
-     <literal>\set <replaceable>varname</> <replaceable>operand1</> [ <replaceable>operator</> <replaceable>operand2</> ]</literal>
+     <literal>\set <replaceable>varname</> <replaceable>expression</>
     </term>
 
     <listitem>
      <para>
-      Sets variable <replaceable>varname</> to a calculated integer value.
-      Each <replaceable>operand</> is either an integer constant or a
-      <literal>:</><replaceable>variablename</> reference to a variable
-      having an integer value.  The <replaceable>operator</> can be
-      <literal>+</>, <literal>-</>, <literal>*</>, or <literal>/</>.
+      Sets variable <replaceable>varname</> to an integer value calculated
+      from <replaceable>expression</>.
+      The expression may contain integer constants such as <literal>5432</>,
+      references to variables <literal>:</><replaceable>variablename</>,
+      and expressions composed of unary (<literal>-</>) or binary operators
+      (<literal>+</>, <literal>-</>, <literal>*</>, <literal>/</>, <literal>%</>)
+      with their usual associativity, and parentheses.
      </para>
 
      <para>
-      Example:
+      Examples:
 <programlisting>
 \set ntellers 10 * :scale
+\set aid (1021 * :aid) % (100000 * :scale) + 1
 </programlisting></para>
     </listitem>
    </varlistentry>
index dba9b631688457a821eeef24cf90d7ffbaac2c3e..5dc8426b9351dacc0610d7f7d41c87e1e2d2ad4b 100644 (file)
@@ -49,6 +49,7 @@ my $contrib_extraincludes =
   { 'tsearch2' => ['contrib/tsearch2'], 'dblink' => ['src/backend'] };
 my $contrib_extrasource = {
        'cube' => [ 'cubescan.l', 'cubeparse.y' ],
+       'pgbench' => [ 'exprscan.l', 'exprparse.y' ],
        'seg'  => [ 'segscan.l',  'segparse.y' ], };
 my @contrib_excludes = ('pgcrypto', 'intagg', 'sepgsql');