Extend pgbench's expression syntax to support a few built-in functions.
authorRobert Haas <rhaas@postgresql.org>
Tue, 1 Mar 2016 18:04:09 +0000 (13:04 -0500)
committerRobert Haas <rhaas@postgresql.org>
Tue, 1 Mar 2016 18:08:30 +0000 (13:08 -0500)
Fabien Coelho, reviewed mostly by Michael Paquier and me, but also by
Heikki Linnakangas, BeomYong Lee, Kyotaro Horiguchi, Oleksander
Shulgin, and Álvaro Herrera.

doc/src/sgml/ref/pgbench.sgml
src/bin/pgbench/exprparse.y
src/bin/pgbench/exprscan.l
src/bin/pgbench/pgbench.c
src/bin/pgbench/pgbench.h

index ade1b530d26968eed8c0e44fbcc27ab6dc0f2fb9..f39f341a269f87b76300f1ef65cbe1a84b9861e3 100644 (file)
@@ -786,7 +786,7 @@ pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</>
   </para>
 
   <variablelist>
-   <varlistentry>
+   <varlistentry id='pgbench-metacommand-set'>
     <term>
      <literal>\set <replaceable>varname</> <replaceable>expression</></literal>
     </term>
@@ -798,8 +798,10 @@ pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</>
       The expression may contain integer constants such as <literal>5432</>,
       references to variables <literal>:</><replaceable>variablename</>,
       and expressions composed of unary (<literal>-</>) or binary operators
-      (<literal>+</>, <literal>-</>, <literal>*</>, <literal>/</>, <literal>%</>)
-      with their usual associativity, and parentheses.
+      (<literal>+</>, <literal>-</>, <literal>*</>, <literal>/</>,
+      <literal>%</>) with their usual associativity,
+      <link linkend="pgbench-builtin-functions">function calls</>, and
+      parentheses.
      </para>
 
      <para>
@@ -994,6 +996,62 @@ END;
 
  </refsect2>
 
+ <refsect2 id="pgbench-builtin-functions">
+  <title>Built-In Functions</title>
+
+   <para>
+     The following functions are built into <application>pgbench</> and
+     may be used in conjunction with
+     <link linkend="pgbench-metacommand-set"><literal>\set</literal></link>.
+   </para>
+
+   <!-- list pgbench functions in alphabetical order -->
+   <table>
+    <title>pgbench Functions</title>
+    <tgroup cols="5">
+     <thead>
+      <row>
+       <entry>Function</entry>
+       <entry>Return Type</entry>
+       <entry>Description</entry>
+       <entry>Example</entry>
+       <entry>Result</entry>
+      </row>
+     </thead>
+     <tbody>
+      <row>
+       <entry><literal><function>abs(<replaceable>a</>)</></></>
+       <entry>same as <replaceable>a</></>
+       <entry>integer value</>
+       <entry><literal>abs(-17)</></>
+       <entry><literal>17</></>
+      </row>
+      <row>
+       <entry><literal><function>debug(<replaceable>a</>)</></></>
+       <entry>same as <replaceable>a</> </>
+       <entry>print to <systemitem>stderr</systemitem> the given argument</>
+       <entry><literal>debug(5432)</></>
+       <entry><literal>5432</></>
+      </row>
+      <row>
+       <entry><literal><function>max(<replaceable>i</> [, <replaceable>...</> ] )</></></>
+       <entry>integer</>
+       <entry>maximum value</>
+       <entry><literal>max(5, 4, 3, 2)</></>
+       <entry><literal>5</></>
+      </row>
+      <row>
+       <entry><literal><function>min(<replaceable>i</> [, <replaceable>...</> ] )</></></>
+       <entry>integer</>
+       <entry>minimum value</>
+       <entry><literal>min(5, 4, 3, 2)</></>
+       <entry><literal>2</></>
+      </row>
+     </tbody>
+     </tgroup>
+   </table>
+ </refsect2>
+
  <refsect2>
   <title>Per-Transaction Logging</title>
 
index 06ee04b68732e552565f348dfaea03c0df41544e..cac4d5e4f44ab14122b2e816dd31b973bf7d6022 100644 (file)
 
 PgBenchExpr *expr_parse_result;
 
+static PgBenchExprList *make_elist(PgBenchExpr *exp, PgBenchExprList *list);
 static PgBenchExpr *make_integer_constant(int64 ival);
 static PgBenchExpr *make_variable(char *varname);
-static PgBenchExpr *make_op(char operator, PgBenchExpr *lexpr,
+static PgBenchExpr *make_op(const char *operator, PgBenchExpr *lexpr,
        PgBenchExpr *rexpr);
+static int find_func(const char *fname);
+static PgBenchExpr *make_func(const int fnumber, PgBenchExprList *args);
 
 %}
 
@@ -31,13 +34,15 @@ static PgBenchExpr *make_op(char operator, PgBenchExpr *lexpr,
    int64       ival;
    char       *str;
    PgBenchExpr *expr;
+   PgBenchExprList *elist;
 }
 
+%type <elist> elist
 %type <expr> expr
-%type <ival> INTEGER
-%type <str> VARIABLE
+%type <ival> INTEGER function
+%type <str> VARIABLE FUNCTION
 
-%token INTEGER VARIABLE
+%token INTEGER VARIABLE FUNCTION
 %token CHAR_ERROR /* never used, will raise a syntax error */
 
 /* Precedence: lowest to highest */
@@ -49,16 +54,25 @@ static PgBenchExpr *make_op(char operator, PgBenchExpr *lexpr,
 
 result: expr               { expr_parse_result = $1; }
 
+elist:                     { $$ = NULL; }
+   | expr                  { $$ = make_elist($1, NULL); }
+   | elist ',' expr        { $$ = make_elist($3, $1); }
+   ;
+
 expr: '(' expr ')'         { $$ = $2; }
    | '+' expr %prec UMINUS { $$ = $2; }
-   | '-' expr %prec UMINUS { $$ = make_op('-', make_integer_constant(0), $2); }
-   | expr '+' expr         { $$ = make_op('+', $1, $3); }
-   | expr '-' expr         { $$ = make_op('-', $1, $3); }
-   | expr '*' expr         { $$ = make_op('*', $1, $3); }
-   | expr '/' expr         { $$ = make_op('/', $1, $3); }
-   | expr '%' expr         { $$ = make_op('%', $1, $3); }
+   | '-' expr %prec UMINUS { $$ = make_op("-", make_integer_constant(0), $2); }
+   | expr '+' expr         { $$ = make_op("+", $1, $3); }
+   | expr '-' expr         { $$ = make_op("-", $1, $3); }
+   | expr '*' expr         { $$ = make_op("*", $1, $3); }
+   | expr '/' expr         { $$ = make_op("/", $1, $3); }
+   | expr '%' expr         { $$ = make_op("%", $1, $3); }
    | INTEGER               { $$ = make_integer_constant($1); }
    | VARIABLE              { $$ = make_variable($1); }
+   | function '(' elist ')'{ $$ = make_func($1, $3); }
+   ;
+
+function: FUNCTION         { $$ = find_func($1); pg_free($1); }
    ;
 
 %%
@@ -84,14 +98,131 @@ make_variable(char *varname)
 }
 
 static PgBenchExpr *
-make_op(char operator, PgBenchExpr *lexpr, PgBenchExpr *rexpr)
+make_op(const char *operator, PgBenchExpr *lexpr, PgBenchExpr *rexpr)
+{
+   return make_func(find_func(operator),
+                    make_elist(rexpr, make_elist(lexpr, NULL)));
+}
+
+/*
+ * List of available functions:
+ * - fname: function name
+ * - nargs: number of arguments
+ *          -1 is a special value for min & max meaning #args >= 1
+ * - tag: function identifier from PgBenchFunction enum
+ */
+static struct
+{
+   char * fname;
+   int nargs;
+   PgBenchFunction tag;
+} PGBENCH_FUNCTIONS[] = {
+   /* parsed as operators, executed as functions */
+   { "+", 2, PGBENCH_ADD },
+   { "-", 2, PGBENCH_SUB },
+   { "*", 2, PGBENCH_MUL },
+   { "/", 2, PGBENCH_DIV },
+   { "%", 2, PGBENCH_MOD },
+   /* actual functions */
+   { "abs", 1, PGBENCH_ABS },
+   { "min", -1, PGBENCH_MIN },
+   { "max", -1, PGBENCH_MAX },
+   { "debug", 1, PGBENCH_DEBUG },
+   /* keep as last array element */
+   { NULL, 0, 0 }
+};
+
+/*
+ * Find a function from its name
+ *
+ * return the index of the function from the PGBENCH_FUNCTIONS array
+ * or fail if the function is unknown.
+ */
+static int
+find_func(const char * fname)
+{
+   int i = 0;
+
+   while (PGBENCH_FUNCTIONS[i].fname)
+   {
+       if (pg_strcasecmp(fname, PGBENCH_FUNCTIONS[i].fname) == 0)
+           return i;
+       i++;
+   }
+
+   expr_yyerror_more("unexpected function name", fname);
+
+   /* not reached */
+   return -1;
+}
+
+/* Expression linked list builder */
+static PgBenchExprList *
+make_elist(PgBenchExpr *expr, PgBenchExprList *list)
+{
+   PgBenchExprLink * cons;
+
+   if (list == NULL)
+   {
+       list = pg_malloc(sizeof(PgBenchExprList));
+       list->head = NULL;
+       list->tail = NULL;
+   }
+
+   cons = pg_malloc(sizeof(PgBenchExprLink));
+   cons->expr = expr;
+   cons->next = NULL;
+
+   if (list->head == NULL)
+       list->head = cons;
+   else
+       list->tail->next = cons;
+
+   list->tail = cons;
+
+   return list;
+}
+
+/* Return the length of an expression list */
+static int
+elist_length(PgBenchExprList *list)
+{
+   PgBenchExprLink *link = list != NULL? list->head: NULL;
+   int len = 0;
+
+   for (; link != NULL; link = link->next)
+       len++;
+
+   return len;
+}
+
+/* Build function call expression */
+static PgBenchExpr *
+make_func(const int fnumber, PgBenchExprList *args)
 {
    PgBenchExpr *expr = pg_malloc(sizeof(PgBenchExpr));
 
-   expr->etype = ENODE_OPERATOR;
-   expr->u.operator.operator = operator;
-   expr->u.operator.lexpr = lexpr;
-   expr->u.operator.rexpr = rexpr;
+   Assert(fnumber >= 0);
+
+   if (PGBENCH_FUNCTIONS[fnumber].nargs >= 0 &&
+       PGBENCH_FUNCTIONS[fnumber].nargs != elist_length(args))
+       expr_yyerror_more("unexpected number of arguments",
+                         PGBENCH_FUNCTIONS[fnumber].fname);
+
+   /* check at least one arg for min & max */
+   if (PGBENCH_FUNCTIONS[fnumber].nargs == -1 &&
+       elist_length(args) == 0)
+       expr_yyerror_more("at least one argument expected",
+                         PGBENCH_FUNCTIONS[fnumber].fname);
+
+   expr->etype = ENODE_FUNCTION;
+   expr->u.function.function = PGBENCH_FUNCTIONS[fnumber].tag;
+
+   /* only the link is used, the head/tail is not useful anymore */
+   expr->u.function.args = args != NULL? args->head: NULL;
+   if (args)
+       pg_free(args);
+
    return expr;
 }
 
index f1c4c7e5142dd970ad6cd96b0e22f7b668f4f2cc..df673f09f203bcc707cb940abca8dd1e9fdc2186 100644 (file)
@@ -46,6 +46,7 @@ space         [ \t\r\f]
 "%"                { yycol += yyleng; return '%'; }
 "("                { yycol += yyleng; return '('; }
 ")"                { yycol += yyleng; return ')'; }
+","                { yycol += yyleng; return ','; }
 
 :[a-zA-Z0-9_]+ {
                    yycol += yyleng;
@@ -57,8 +58,14 @@ space            [ \t\r\f]
                    yylval.ival = strtoint64(yytext);
                    return INTEGER;
                }
+[a-zA-Z0-9_]+   {
+                   yycol += yyleng;
+                   yylval.str = pg_strdup(yytext);
+                   return FUNCTION;
+               }
 
 [\n]           { yycol = 0; yyline++; }
+
 {space}+       { yycol += yyleng; /* ignore */ }
 
 .              {
@@ -71,10 +78,16 @@ space           [ \t\r\f]
 %%
 
 void
-yyerror(const char *message)
+expr_yyerror_more(const char *message, const char *more)
 {
    syntax_error(expr_source, expr_lineno, expr_full_line, expr_command,
-                message, NULL, expr_col + yycol);
+                message, more, expr_col + yycol);
+}
+
+void
+yyerror(const char *message)
+{
+   expr_yyerror_more(message, NULL);
 }
 
 /*
@@ -94,6 +107,9 @@ expr_scanner_init(const char *str, const char *source,
    expr_command = (char *) cmd;
    expr_col = (int) ecol;
 
+   /* reset column count for this scan */
+   yycol = 0;
+
    /*
     * Might be left over after error
     */
index 596d112d5babdc7f44e7c283cf15756ffd0d2d24..66cfdc9af8bbd25b20d760036f0e580f543d2894 100644 (file)
@@ -372,6 +372,8 @@ static void doLog(TState *thread, CState *st, instr_time *now,
      StatsData *agg, bool skipped, double latency, double lag);
 
 
+static bool evaluateExpr(CState *, PgBenchExpr *, int64 *);
+
 static void
 usage(void)
 {
@@ -990,117 +992,191 @@ getQueryParams(CState *st, const Command *command, const char **params)
        params[i] = getVariable(st, command->argv[i + 1]);
 }
 
+/* maximum number of function arguments */
+#define MAX_FARGS 16
+
 /*
- * Recursive evaluation of an expression in a pgbench script
- * using the current state of variables.
- * Returns whether the evaluation was ok,
- * the value itself is returned through the retval pointer.
+ * Recursive evaluation of functions
  */
 static bool
-evaluateExpr(CState *st, PgBenchExpr *expr, int64 *retval)
+evalFunc(CState *st,
+        PgBenchFunction func, PgBenchExprLink *args, int64 *retval)
 {
-   switch (expr->etype)
-   {
-       case ENODE_INTEGER_CONSTANT:
-           {
-               *retval = expr->u.integer_constant.ival;
-               return true;
-           }
+   /* evaluate all function arguments */
+   int         nargs = 0;
+   int64       iargs[MAX_FARGS];
+   PgBenchExprLink *l = args;
 
-       case ENODE_VARIABLE:
-           {
-               char       *var;
+   for (nargs = 0; nargs < MAX_FARGS && l != NULL; nargs++, l = l->next)
+       if (!evaluateExpr(st, l->expr, &iargs[nargs]))
+           return false;
 
-               if ((var = getVariable(st, expr->u.variable.varname)) == NULL)
-               {
-                   fprintf(stderr, "undefined variable \"%s\"\n",
-                           expr->u.variable.varname);
-                   return false;
-               }
-               *retval = strtoint64(var);
-               return true;
-           }
+   if (l != NULL)
+   {
+       fprintf(stderr,
+               "too many function arguments, maximum is %d\n", MAX_FARGS);
+       return false;
+   }
 
-       case ENODE_OPERATOR:
+   /* then evaluate function */
+   switch (func)
+   {
+       case PGBENCH_ADD:
+       case PGBENCH_SUB:
+       case PGBENCH_MUL:
+       case PGBENCH_DIV:
+       case PGBENCH_MOD:
            {
-               int64       lval;
-               int64       rval;
+               int64       lval = iargs[0],
+                           rval = iargs[1];
 
-               if (!evaluateExpr(st, expr->u.operator.lexpr, &lval))
-                   return false;
-               if (!evaluateExpr(st, expr->u.operator.rexpr, &rval))
-                   return false;
-               switch (expr->u.operator.operator)
+               Assert(nargs == 2);
+
+               switch (func)
                {
-                   case '+':
+                   case PGBENCH_ADD:
                        *retval = lval + rval;
                        return true;
 
-                   case '-':
+                   case PGBENCH_SUB:
                        *retval = lval - rval;
                        return true;
 
-                   case '*':
+                   case PGBENCH_MUL:
                        *retval = lval * rval;
                        return true;
 
-                   case '/':
+                   case PGBENCH_DIV:
+                   case PGBENCH_MOD:
                        if (rval == 0)
                        {
                            fprintf(stderr, "division by zero\n");
                            return false;
                        }
-
-                       /*
-                        * INT64_MIN / -1 is problematic, since the result
-                        * can't be represented on a two's-complement machine.
-                        * Some machines produce INT64_MIN, some produce zero,
-                        * some throw an exception. We can dodge the problem
-                        * by recognizing that division by -1 is the same as
-                        * negation.
-                        */
+                       /* special handling of -1 divisor */
                        if (rval == -1)
                        {
-                           *retval = -lval;
-
-                           /* overflow check (needed for INT64_MIN) */
-                           if (lval == PG_INT64_MIN)
+                           if (func == PGBENCH_DIV)
                            {
-                               fprintf(stderr, "bigint out of range\n");
-                               return false;
+                               /* overflow check (needed for INT64_MIN) */
+                               if (lval == PG_INT64_MIN)
+                               {
+                                   fprintf(stderr, "bigint out of range\n");
+                                   return false;
+                               }
+                               else
+                                   *retval = -lval;
                            }
+                           else
+                               *retval = 0;
+                           return true;
                        }
-                       else
+                       /* divisor is not -1 */
+                       if (func == PGBENCH_DIV)
                            *retval = lval / rval;
-
+                       else    /* func == PGBENCH_MOD */
+                           *retval = lval % rval;
                        return true;
 
-                   case '%':
-                       if (rval == 0)
-                       {
-                           fprintf(stderr, "division by zero\n");
-                           return false;
-                       }
+                   default:
+                       /* cannot get here */
+                       Assert(0);
+               }
+           }
 
-                       /*
-                        * Some machines throw a floating-point exception for
-                        * INT64_MIN % -1.  Dodge that problem by noting that
-                        * any value modulo -1 is 0.
-                        */
-                       if (rval == -1)
-                           *retval = 0;
-                       else
-                           *retval = lval % rval;
+       case PGBENCH_ABS:
+           {
+               Assert(nargs == 1);
 
-                       return true;
+               if (iargs[0] < 0)
+                   *retval = -iargs[0];
+               else
+                   *retval = iargs[0];
+
+               return true;
+           }
+
+       case PGBENCH_DEBUG:
+           {
+               Assert(nargs == 1);
+
+               fprintf(stderr, "debug(script=%d,command=%d): " INT64_FORMAT "\n",
+                       st->use_file, st->state + 1, iargs[0]);
+
+               *retval = iargs[0];
+
+               return true;
+           }
+
+       case PGBENCH_MIN:
+       case PGBENCH_MAX:
+           {
+               int64       extremum = iargs[0];
+               int         i;
+
+               Assert(nargs >= 1);
+
+               for (i = 1; i < nargs; i++)
+               {
+                   int64       ival = iargs[i];
+
+                   if (func == PGBENCH_MIN)
+                       extremum = extremum < ival ? extremum : ival;
+                   else if (func == PGBENCH_MAX)
+                       extremum = extremum > ival ? extremum : ival;
                }
 
-               fprintf(stderr, "bad operator\n");
-               return false;
+               *retval = extremum;
+               return true;
            }
 
        default:
-           break;
+           fprintf(stderr, "unexpected function tag: %d\n", func);
+           exit(1);
+   }
+}
+
+/*
+ * Recursive evaluation of an expression in a pgbench script
+ * using the current state of variables.
+ * Returns whether the evaluation was ok,
+ * the value itself is returned through the retval pointer.
+ */
+static bool
+evaluateExpr(CState *st, PgBenchExpr *expr, int64 *retval)
+{
+   switch (expr->etype)
+   {
+       case ENODE_INTEGER_CONSTANT:
+           {
+               *retval = expr->u.integer_constant.ival;
+               return true;
+           }
+
+       case ENODE_VARIABLE:
+           {
+               char       *var;
+
+               if ((var = getVariable(st, expr->u.variable.varname)) == NULL)
+               {
+                   fprintf(stderr, "undefined variable \"%s\"\n",
+                           expr->u.variable.varname);
+                   return false;
+               }
+               *retval = strtoint64(var);
+               return true;
+           }
+
+       case ENODE_FUNCTION:
+           return evalFunc(st,
+                           expr->u.function.function,
+                           expr->u.function.args,
+                           retval);
+
+       default:
+           fprintf(stderr, "unexpected enode type in evaluation: %d\n",
+                   expr->etype);
+           exit(1);
    }
 
    fprintf(stderr, "bad expression\n");
@@ -1710,6 +1786,7 @@ top:
                st->ecnt++;
                return true;
            }
+
            sprintf(res, INT64_FORMAT, result);
 
            if (!putVariable(st, argv[0], argv[1], res))
index 5bb24807c15c57928bc4ec338eeddad8311fad4a..c6aeb5b6dbd077d4db2b414d7fc8df192a35a4b0 100644 (file)
 #ifndef PGBENCH_H
 #define PGBENCH_H
 
+/* Types of expression nodes */
 typedef enum PgBenchExprType
 {
    ENODE_INTEGER_CONSTANT,
    ENODE_VARIABLE,
-   ENODE_OPERATOR
+   ENODE_FUNCTION
 } PgBenchExprType;
 
+/* List of operators and callable functions */
+typedef enum PgBenchFunction
+{
+   PGBENCH_ADD,
+   PGBENCH_SUB,
+   PGBENCH_MUL,
+   PGBENCH_DIV,
+   PGBENCH_MOD,
+   PGBENCH_DEBUG,
+   PGBENCH_ABS,
+   PGBENCH_MIN,
+   PGBENCH_MAX,
+} PgBenchFunction;
+
 typedef struct PgBenchExpr PgBenchExpr;
+typedef struct PgBenchExprLink PgBenchExprLink;
+typedef struct PgBenchExprList PgBenchExprList;
 
 struct PgBenchExpr
 {
@@ -35,18 +52,31 @@ struct PgBenchExpr
        }           variable;
        struct
        {
-           char        operator;
-           PgBenchExpr *lexpr;
-           PgBenchExpr *rexpr;
-       }           operator;
+           PgBenchFunction function;
+           PgBenchExprLink *args;
+       }           function;
    }           u;
 };
 
+/* List of expression nodes */
+struct PgBenchExprLink
+{
+   PgBenchExpr *expr;
+   PgBenchExprLink *next;
+};
+
+struct PgBenchExprList
+{
+   PgBenchExprLink *head;
+   PgBenchExprLink *tail;
+};
+
 extern PgBenchExpr *expr_parse_result;
 
 extern int expr_yyparse(void);
 extern int expr_yylex(void);
 extern void expr_yyerror(const char *str);
+extern void expr_yyerror_more(const char *str, const char *more);
 extern void expr_scanner_init(const char *str, const char *source,
                  const int lineno, const char *line,
                  const char *cmd, const int ecol);