Skip to content

Commit 6a651d8

Browse files
committed
Add --aggregate-interval option.
The new option specifies length of aggregation interval (in seconds). May be used only together with -l. With this option, the log contains per-interval summary (number of transactions, min/max latency and two additional fields useful for variance estimation). Patch contributed by Tomas Vondra, reviewed by Pavel Stehule. Slight change by Tatsuo Ishii, suggested by Robert Hass to emit an error message indicating that the option is not currently supported on Windows.
1 parent 2ab218b commit 6a651d8

File tree

2 files changed

+192
-14
lines changed

2 files changed

+192
-14
lines changed

contrib/pgbench/pgbench.c

+135-12
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@ char *index_tablespace = NULL;
162162

163163
bool use_log; /* log transaction latencies to a file */
164164
bool use_quiet; /* quiet logging onto stderr */
165+
int agg_interval; /* log aggregates instead of individual transactions */
165166
bool is_connect; /* establish connection for each transaction */
166167
bool is_latencies; /* report per-command latencies */
167168
int main_pid; /* main process id used in log filename */
@@ -257,6 +258,18 @@ typedef struct
257258
char *argv[MAX_ARGS]; /* command word list */
258259
} Command;
259260

261+
typedef struct
262+
{
263+
264+
long start_time; /* when does the interval start */
265+
int cnt; /* number of transactions */
266+
double min_duration; /* min/max durations */
267+
double max_duration;
268+
double sum; /* sum(duration), sum(duration^2) - for estimates */
269+
double sum2;
270+
271+
} AggVals;
272+
260273
static Command **sql_files[MAX_FILES]; /* SQL script files */
261274
static int num_files; /* number of script files */
262275
static int num_commands = 0; /* total number of Command structs */
@@ -390,6 +403,8 @@ usage(void)
390403
" -l write transaction times to log file\n"
391404
" --sampling-rate NUM\n"
392405
" fraction of transactions to log (e.g. 0.01 for 1%% sample)\n"
406+
" --aggregate-interval NUM\n"
407+
" aggregate data over NUM seconds\n"
393408
" -M simple|extended|prepared\n"
394409
" protocol for submitting queries to server (default: simple)\n"
395410
" -n do not run VACUUM before tests\n"
@@ -911,9 +926,25 @@ clientDone(CState *st, bool ok)
911926
return false; /* always false */
912927
}
913928

929+
static
930+
void agg_vals_init(AggVals * aggs, instr_time start)
931+
{
932+
/* basic counters */
933+
aggs->cnt = 0; /* number of transactions */
934+
aggs->sum = 0; /* SUM(duration) */
935+
aggs->sum2 = 0; /* SUM(duration*duration) */
936+
937+
/* min and max transaction duration */
938+
aggs->min_duration = 0;
939+
aggs->max_duration = 0;
940+
941+
/* start of the current interval */
942+
aggs->start_time = INSTR_TIME_GET_DOUBLE(start);
943+
}
944+
914945
/* return false iff client should be disconnected */
915946
static bool
916-
doCustom(TState *thread, CState *st, instr_time *conn_time, FILE *logfile)
947+
doCustom(TState *thread, CState *st, instr_time *conn_time, FILE *logfile, AggVals * agg)
917948
{
918949
PGresult *res;
919950
Command **commands;
@@ -978,22 +1009,74 @@ doCustom(TState *thread, CState *st, instr_time *conn_time, FILE *logfile)
9781009
if (sample_rate == 0.0 ||
9791010
pg_erand48(thread->random_state) <= sample_rate)
9801011
{
981-
9821012
INSTR_TIME_SET_CURRENT(now);
9831013
diff = now;
9841014
INSTR_TIME_SUBTRACT(diff, st->txn_begin);
9851015
usec = (double) INSTR_TIME_GET_MICROSEC(diff);
9861016

1017+
/* should we aggregate the results or not? */
1018+
if (agg_interval > 0)
1019+
{
1020+
/* are we still in the same interval? if yes, accumulate the
1021+
* values (print them otherwise) */
1022+
if (agg->start_time + agg_interval >= INSTR_TIME_GET_DOUBLE(now))
1023+
{
1024+
agg->cnt += 1;
1025+
agg->sum += usec;
1026+
agg->sum2 += usec * usec;
1027+
1028+
/* first in this aggregation interval */
1029+
if ((agg->cnt == 1) || (usec < agg->min_duration))
1030+
agg->min_duration = usec;
1031+
1032+
if ((agg->cnt == 1) || (usec > agg->max_duration))
1033+
agg->max_duration = usec;
1034+
}
1035+
else
1036+
{
1037+
/* Loop until we reach the interval of the current transaction (and
1038+
* print all the empty intervals in between). */
1039+
while (agg->start_time + agg_interval < INSTR_TIME_GET_DOUBLE(now))
1040+
{
1041+
/* This is a non-Windows branch (thanks to the ifdef in usage), so
1042+
* we don't need to handle this in a special way (see below). */
1043+
fprintf(logfile, "%ld %d %.0f %.0f %.0f %.0f\n",
1044+
agg->start_time, agg->cnt, agg->sum, agg->sum2,
1045+
agg->min_duration, agg->max_duration);
1046+
1047+
/* move to the next inteval */
1048+
agg->start_time = agg->start_time + agg_interval;
1049+
1050+
/* reset for "no transaction" intervals */
1051+
agg->cnt = 0;
1052+
agg->min_duration = 0;
1053+
agg->max_duration = 0;
1054+
agg->sum = 0;
1055+
agg->sum2 = 0;
1056+
}
1057+
1058+
/* and now update the reset values (include the current) */
1059+
agg->cnt = 1;
1060+
agg->min_duration = usec;
1061+
agg->max_duration = usec;
1062+
agg->sum = usec;
1063+
agg->sum2 = usec * usec;
1064+
}
1065+
}
1066+
else
1067+
{
1068+
/* no, print raw transactions */
9871069
#ifndef WIN32
988-
/* This is more than we really ought to know about instr_time */
989-
fprintf(logfile, "%d %d %.0f %d %ld %ld\n",
990-
st->id, st->cnt, usec, st->use_file,
991-
(long) now.tv_sec, (long) now.tv_usec);
1070+
/* This is more than we really ought to know about instr_time */
1071+
fprintf(logfile, "%d %d %.0f %d %ld %ld\n",
1072+
st->id, st->cnt, usec, st->use_file,
1073+
(long) now.tv_sec, (long) now.tv_usec);
9921074
#else
993-
/* On Windows, instr_time doesn't provide a timestamp anyway */
994-
fprintf(logfile, "%d %d %.0f %d 0 0\n",
995-
st->id, st->cnt, usec, st->use_file);
1075+
/* On Windows, instr_time doesn't provide a timestamp anyway */
1076+
fprintf(logfile, "%d %d %.0f %d 0 0\n",
1077+
st->id, st->cnt, usec, st->use_file);
9961078
#endif
1079+
}
9971080
}
9981081
}
9991082

@@ -2055,6 +2138,7 @@ main(int argc, char **argv)
20552138
{"tablespace", required_argument, NULL, 2},
20562139
{"unlogged-tables", no_argument, &unlogged_tables, 1},
20572140
{"sampling-rate", required_argument, NULL, 4},
2141+
{"aggregate-interval", required_argument, NULL, 5},
20582142
{NULL, 0, NULL, 0}
20592143
};
20602144

@@ -2293,6 +2377,19 @@ main(int argc, char **argv)
22932377
exit(1);
22942378
}
22952379
break;
2380+
case 5:
2381+
#ifdef WIN32
2382+
fprintf(stderr, "--aggregate-interval is not currently supported on Windows");
2383+
exit(1);
2384+
#else
2385+
agg_interval = atoi(optarg);
2386+
if (agg_interval <= 0)
2387+
{
2388+
fprintf(stderr, "invalid number of seconds for aggregation: %d\n", agg_interval);
2389+
exit(1);
2390+
}
2391+
#endif
2392+
break;
22962393
default:
22972394
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
22982395
exit(1);
@@ -2342,6 +2439,28 @@ main(int argc, char **argv)
23422439
exit(1);
23432440
}
23442441

2442+
/* --sampling-rate may must not be used with --aggregate-interval */
2443+
if (sample_rate > 0.0 && agg_interval > 0)
2444+
{
2445+
fprintf(stderr, "log sampling (--sampling-rate) and aggregation (--aggregate-interval) can't be used at the same time\n");
2446+
exit(1);
2447+
}
2448+
2449+
if (agg_interval > 0 && (! use_log)) {
2450+
fprintf(stderr, "log aggregation is allowed only when actually logging transactions\n");
2451+
exit(1);
2452+
}
2453+
2454+
if ((duration > 0) && (agg_interval > duration)) {
2455+
fprintf(stderr, "number of seconds for aggregation (%d) must not be higher that test duration (%d)\n", agg_interval, duration);
2456+
exit(1);
2457+
}
2458+
2459+
if ((duration > 0) && (agg_interval > 0) && (duration % agg_interval != 0)) {
2460+
fprintf(stderr, "duration (%d) must be a multiple of aggregation interval (%d)\n", duration, agg_interval);
2461+
exit(1);
2462+
}
2463+
23452464
/*
23462465
* is_latencies only works with multiple threads in thread-based
23472466
* implementations, not fork-based ones, because it supposes that the
@@ -2601,7 +2720,10 @@ threadRun(void *arg)
26012720
int remains = nstate; /* number of remaining clients */
26022721
int i;
26032722

2723+
AggVals aggs;
2724+
26042725
result = pg_malloc(sizeof(TResult));
2726+
26052727
INSTR_TIME_SET_ZERO(result->conn_time);
26062728

26072729
/* open log file if requested */
@@ -2636,6 +2758,8 @@ threadRun(void *arg)
26362758
INSTR_TIME_SET_CURRENT(result->conn_time);
26372759
INSTR_TIME_SUBTRACT(result->conn_time, thread->start_time);
26382760

2761+
agg_vals_init(&aggs, thread->start_time);
2762+
26392763
/* send start up queries in async manner */
26402764
for (i = 0; i < nstate; i++)
26412765
{
@@ -2644,7 +2768,7 @@ threadRun(void *arg)
26442768
int prev_ecnt = st->ecnt;
26452769

26462770
st->use_file = getrand(thread, 0, num_files - 1);
2647-
if (!doCustom(thread, st, &result->conn_time, logfile))
2771+
if (!doCustom(thread, st, &result->conn_time, logfile, &aggs))
26482772
remains--; /* I've aborted */
26492773

26502774
if (st->ecnt > prev_ecnt && commands[st->state]->type == META_COMMAND)
@@ -2746,7 +2870,7 @@ threadRun(void *arg)
27462870
if (st->con && (FD_ISSET(PQsocket(st->con), &input_mask)
27472871
|| commands[st->state]->type == META_COMMAND))
27482872
{
2749-
if (!doCustom(thread, st, &result->conn_time, logfile))
2873+
if (!doCustom(thread, st, &result->conn_time, logfile, &aggs))
27502874
remains--; /* I've aborted */
27512875
}
27522876

@@ -2773,7 +2897,6 @@ threadRun(void *arg)
27732897
return result;
27742898
}
27752899

2776-
27772900
/*
27782901
* Support for duration option: set timer_exceeded after so many seconds.
27792902
*/

doc/src/sgml/pgbench.sgml

+57-2
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,21 @@ pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</>
350350
</listitem>
351351
</varlistentry>
352352

353+
<varlistentry>
354+
<term><option>--aggregate-interval</option> <replaceable>seconds</></term>
355+
<listitem>
356+
<para>
357+
Length of aggregation interval (in seconds). May be used only together
358+
with <application>-l</application> - with this option, the log contains
359+
per-interval summary (number of transactions, min/max latency and two
360+
additional fields useful for variance estimation).
361+
</para>
362+
<para>
363+
This option is not currently supported on Windows.
364+
</para>
365+
</listitem>
366+
</varlistentry>
367+
353368
<varlistentry>
354369
<term><option>-M</option> <replaceable>querymode</></term>
355370
<listitem>
@@ -746,8 +761,9 @@ END;
746761
<title>Per-Transaction Logging</title>
747762

748763
<para>
749-
With the <option>-l</> option, <application>pgbench</> writes the time
750-
taken by each transaction to a log file. The log file will be named
764+
With the <option>-l</> option but without the <option>--aggregate-interval</option>,
765+
<application>pgbench</> writes the time taken by each transaction
766+
to a log file. The log file will be named
751767
<filename>pgbench_log.<replaceable>nnn</></filename>, where
752768
<replaceable>nnn</> is the PID of the pgbench process.
753769
If the <option>-j</> option is 2 or higher, creating multiple worker
@@ -792,6 +808,45 @@ END;
792808
</para>
793809
</refsect2>
794810

811+
<refsect2>
812+
<title>Aggregated Logging</title>
813+
814+
<para>
815+
With the <option>--aggregate-interval</option> option, the logs use a bit different format:
816+
817+
<synopsis>
818+
<replaceable>interval_start</> <replaceable>num_of_transactions</> <replaceable>latency_sum</> <replaceable>latency_2_sum</> <replaceable>min_latency</> <replaceable>max_latency</>
819+
</synopsis>
820+
821+
where <replaceable>interval_start</> is the start of the interval (UNIX epoch
822+
format timestamp), <replaceable>num_of_transactions</> is the number of transactions
823+
within the interval, <replaceable>latency_sum</replaceable> is a sum of latencies
824+
(so you can compute average latency easily). The following two fields are useful
825+
for variance estimation - <replaceable>latency_sum</> is a sum of latencies and
826+
<replaceable>latency_2_sum</> is a sum of 2nd powers of latencies. The last two
827+
fields are <replaceable>min_latency</> - a minimum latency within the interval, and
828+
<replaceable>max_latency</> - maximum latency within the interval. A transaction is
829+
counted into the interval when it was committed.
830+
</para>
831+
832+
<para>
833+
Here is example outputs:
834+
<screen>
835+
1345828501 5601 1542744 483552416 61 2573
836+
1345828503 7884 1979812 565806736 60 1479
837+
1345828505 7208 1979422 567277552 59 1391
838+
1345828507 7685 1980268 569784714 60 1398
839+
1345828509 7073 1979779 573489941 236 1411
840+
</screen></para>
841+
842+
<para>
843+
Notice that while the plain (unaggregated) log file contains index
844+
of the custom script files, the aggregated log does not. Therefore if
845+
you need per script data, you need to aggregate the data on your own.
846+
</para>
847+
848+
</refsect2>
849+
795850
<refsect2>
796851
<title>Per-Statement Latencies</title>
797852

0 commit comments

Comments
 (0)