benchwarmer


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259

#!/bin/bash

source ./config

# Take clients and total transactions from command line if passed, use defaults
# otherwise

# Clients
if [ ! -z "$1" ]; then
  CLIENTS="$1"
elif [ -z "$CLIENTS" ]; then
  CLIENTS=4
fi

# Transactions
if [ ! -z "$2" ]; then
  TOTTRANS="$2"
elif [ -z "$TOTTRANS" ]; then
  TOTTRANS=1000
fi

# Query the test database and put its background writer statistics into
# a set of environment variables, probably for use inserting stats into
# the results database.
#
# This presumes that you have already set $BACKEND_FSYNC before
# calling.
function get_bgwriter {
  BGW=`$TESTPSQL -A -t -F" " -c "select checkpoints_timed,checkpoints_req,buffers_checkpoint,buffers_clean,maxwritten_clean,buffers_backend,buffers_alloc,$BACKEND_FSYNC from pg_stat_bgwriter"`
  set -- $BGW
  checkpoints_timed=$1
  checkpoints_req=$2
  buffers_checkpoint=$3
  buffers_clean=$4
  maxwritten_clean=$5
  buffers_backend=$6
  buffers_alloc=$7
  buffers_backend_fsync=$8
}

# Figure out how many transactions per client, then recompute
# a new total; this copes with rounding issues
TRANS=`expr $TOTTRANS / $CLIENTS`
TOTTRANS=`expr $TRANS \* $CLIENTS`

# If a runtime has been specified instead, throw away the transaction counts
# use it instead
if [ -n "$RUNTIME" ] ; then
  TESTLEN="-T $RUNTIME"
  TOTTRANS=""
else
  TESTLEN="-t $TRANS"
fi

# Set WORKERS string so that the largest possible worker count
# up to MAX_WORKERS is used, while still dividing CLIENTS into an
# even number per worker.
WORKERS=""
NUM_WORKERS="1"

if [ -n "$MAX_WORKERS" ] ; then

  # Only bother with/allow adjustment to WORKERS if the max is >1.
  # That keeps up out of trouble if using a pgbench before 9.0,
  # where using any value for "-j" won't be allowed, as long as the
  # config file we're given isn't setup incorrectly.

  if [ "$MAX_WORKERS" -gt 1 ]; then
    NUM_WORKERS=$MAX_WORKERS

    while [ "$NUM_WORKERS" -gt 1 ]; do
      (( remainder=$CLIENTS % $NUM_WORKERS ))
      if [ $remainder -eq 0 ] ; then
        break
      fi
      (( NUM_WORKERS = $NUM_WORKERS - 1 ))
    done

    WORKERS="-j ${NUM_WORKERS}"
  fi
fi

# psql statements for the test database and the result database
TESTPSQL="psql -h $TESTHOST -U $TESTUSER -p $TESTPORT -d $TESTDB"
RESULTPSQL="psql -h $RESULTHOST -U $RESULTUSER -p $RESULTPORT -d $RESULTDB"
echo Running tests using:  $TESTPSQL
echo Storing results using:  $RESULTPSQL

# Determine database scale
# TODO If running a custom test, this won't return a useful value.  Should
# include a config file setting for whether we're testing with the
# standard tables or not.
SCALE=`$TESTPSQL -A -t -c "select count(*) from ${TABLEPREFIX}branches"`
if [ -z "$SCALE" ]; then
  echo ERROR:  Attempt to determine database scale returned \"$SCALE\", aborting
  exit
fi

# Confirm we have a useful pgbench to run
if [ ! -f $PGBENCHBIN ]; then
  echo ERROR:  cannot find pgbench binary $PGBENCHBIN , aborting
  exit
fi

# Find current test set
SET=`$RESULTPSQL -A -t -c "select max(set) from testset"`
if [ "$?" -ne "0" ]; then
  echo ERROR:  Attempt to determine test set failed
  # TODO Create a fake first set if this happens?  Right now,
  # the likely case is that the test DDL was never added, which
  # makes that sort of correction attempt unlikely to be useful
  exit
fi

# Cleanup pgbench tables, unless we've been told to skip that
if [ "$SKIPINIT" -ne "1" ]; then
  echo Cleaning up database $TESTDB
  $TESTPSQL -c "truncate table ${TABLEPREFIX}history"
  $TESTPSQL -c "vacuum analyze"
  $TESTPSQL -c "checkpoint"
  # We want clean stats from the pg_stat_bgwriter, but those won't show up
  # until after the checkpoint is done.  Wait a bit for the stats to update
  sleep 20
fi

# Create the tests record
DBSIZE=`$TESTPSQL -A -t -c "select pg_database_size('$TESTDB')"`
$RESULTPSQL -q -c "insert into tests (script,clients,workers,set,scale,dbsize) values('$SCRIPT','$CLIENTS','$NUM_WORKERS','$SET','$SCALE','$DBSIZE')"
TEST=`$RESULTPSQL -A -t -c "select max(test) from tests"`
if [ "$?" -ne "0" ]; then
  echo ERROR  Can\'t read from tests table.  Was the test data installed?
  exit
fi

if [ -z "$TEST" ]; then
  echo ERROR:  Attempt to get a test number returned \"$TEST\", aborting
  exit
fi

# Figure out if this version of PostgreSQL includes buffers_backend_fsync
$TESTPSQL -c "SELECT buffers_backend_fsync FROM pg_stat_bgwriter" >> /dev/null
if [ "$?" -ne "0" ]; then
  BACKEND_FSYNC="0"
else
  BACKEND_FSYNC="buffers_backend_fsync"
fi

# Grab starting values for statistics
get_bgwriter
$RESULTPSQL -c "insert into test_bgwriter(test,checkpoints_timed,checkpoints_req,buffers_checkpoint,buffers_clean,maxwritten_clean,buffers_backend,buffers_alloc,buffers_backend_fsync) values('$TEST','$checkpoints_timed','$checkpoints_req','$buffers_checkpoint','$buffers_clean','$maxwritten_clean','$buffers_backend','$buffers_alloc','$buffers_backend_fsync')"

echo This is test $TEST

mkdir -p results/$TEST
cd results/$TEST

echo Script $SCRIPT executing for $CLIENTS concurrent users... 1>&2
$PGBENCHBIN -f $BASEDIR/$TESTDIR/$SCRIPT -s $SCALE -l -n $TESTLEN -U $TESTUSER -h $TESTHOST -p $TESTPORT -c $CLIENTS $WORKERS $TESTDB > results.txt &
P=$!
wait $P
$RESULTPSQL -q -c "update tests set end_time=now() where test=$TEST"

# Update bgwriter data with delta
get_bgwriter
$RESULTPSQL -c "update test_bgwriter set \
  checkpoints_timed=$checkpoints_timed - checkpoints_timed,\
  checkpoints_req=$checkpoints_req - checkpoints_req,\
  buffers_checkpoint=$buffers_checkpoint - buffers_checkpoint,\
  buffers_clean=$buffers_clean - buffers_clean,\
  maxwritten_clean=$maxwritten_clean - maxwritten_clean,\
  buffers_backend=$buffers_backend - buffers_backend,\
  buffers_alloc=$buffers_alloc - buffers_alloc,\
  buffers_backend_fsync=$buffers_backend_fsync - buffers_backend_fsync \
  where test='$TEST'"

# Save pgbench log and results
# In the multi-threaded case, there may be a number of files, so copy
# them all in
cat pgbench_log.${P}* > pgbench.log
cat results.txt
echo Worst latency results:

# TODO On Solaris, this may need to use /usr/xpg4/bin/tail instead
cat pgbench.log | cut -f 3 -d " " | sort -n | tail -n 5

tps=`grep "(including connections establishing)" results.txt | cut -d " " -f 3`
trans=`grep "number of transactions actually processed:" results.txt | cut -d":" -f 2 | cut -d "/" -f 1`
$RESULTPSQL -q -c "update tests set tps='$tps',trans='$trans' where test=$TEST"

# Confirm we have an patched version of pgbench that has timestamps
TESTFORTS=`cat pgbench.log | head -n 1 | cut -d" " -f 6`
if [ -z "$TESTFORTS" ]; then
  echo
  echo "ERROR:  the pgbench used for this test is missing transaction"
  echo timestamps.  No latency information will be imported into
  echo the database, and no plots will be generated.
  echo

else

  # Import timestamp information
  $BASEDIR/log-to-csv.py $TEST < pgbench.log > timing.csv
  $RESULTPSQL -c "copy timing from stdin with csv" < timing.csv

  # Plot result graphs
  $RESULTPSQL -A -t -F' ' -c "select extract(epoch from date_trunc('second',ts)),count(*) from timing where test=$TEST group by date_trunc('second',ts) order by date_trunc('second',ts)" > tpsdata.txt
  gnuplot $BASEDIR/tps.script

  $RESULTPSQL -A -t -F' ' -c "select extract(epoch from ts),latency from timing where test=$TEST" > latency.txt
  gnuplot $BASEDIR/latency.script

  $RESULTPSQL -q -c "update tests set avg_latency=(select avg(latency) from timing where tests.test=timing.test), max_latency=(select max(latency)from timing where tests.test=timing.test), percentile_90_latency=(select latency from timing where tests.test=timing.test order by latency offset (round(0.90*trans)) limit 1) where tests.test='$TEST'"
fi

  cp $BASEDIR/test-index.html index.html

# Now that we're done plotting and computing stats, wipe the low-level 
# data we don't need anymore
$RESULTPSQL -q -c "truncate table timing"

# Save some configuration information about the server
CLIENTHOST=`hostname`
SERVERHOST="$TESTHOST"
if [ "$SERVERHOST" = "localhost" ]; then
  SERVERHOST="$CLIENTHOST"
fi
SETTINGS="pg_settings.txt"

# PostgreSQL and results
echo Test results: > $SETTINGS
$RESULTPSQL -c "select script,clients,round(tps) as tps,1000*round(avg_latency)/1000 as avg_latency,1000*round(max_latency)/1000 as max_latency from tests where test=$TEST" | grep -v " row)" >> $SETTINGS
echo Server $SERVERHOST, client $CLIENTHOST >> $SETTINGS
echo >> $SETTINGS
echo Server settings in postgresql.conf: >> $SETTINGS
$RESULTPSQL -c "select name,current_setting(name) from pg_settings where source='configuration file' and not name in ('DateStyle','lc_messages','lc_monetary','lc_numeric','lc_time','listen_addresses','log_directory','log_rotation_age','log_rotation_size','log_truncate_on_rotation');" | grep -v " rows)" >> $SETTINGS

# Operating system information 
echo >> $SETTINGS
echo "benchmark client OS Configuration (may not be the same as the server)" >> $SETTINGS
uname -a >> $SETTINGS
for f in `ls /proc/sys/vm/dirty_*` ; do
  S=`cat $f`
  echo  $f=$S >> $SETTINGS
done
echo >> $SETTINGS

for f in `ls /etc/lsb-release /etc/debian_version /etc/redhat-release 2>/dev/null` ; do
  echo $f: >> $SETTINGS
  cat $f >> $SETTINGS
  echo >> $SETTINGS
done


# Remove temporary files
rm pgbench_log.${P}*
rm pgbench.log
rm timing.csv
rm tpsdata.txt
rm latency.txt