diff options
| author | Tom Lane | 2021-11-29 02:32:36 +0000 |
|---|---|---|
| committer | Tom Lane | 2021-11-29 02:33:07 +0000 |
| commit | 3804539e48e794781c6145c7f988f5d507418fa8 (patch) | |
| tree | 317904b43ca8c1d510b23cb8fdd7b05a75e971bc /src/bin/pgbench | |
| parent | f44ceb46ec2d8da48f6e145bf462d5620c25e079 (diff) | |
Replace random(), pg_erand48(), etc with a better PRNG API and algorithm.
Standardize on xoroshiro128** as our basic PRNG algorithm, eliminating
a bunch of platform dependencies as well as fundamentally-obsolete PRNG
code. In addition, this API replacement will ease replacing the
algorithm again in future, should that become necessary.
xoroshiro128** is a few percent slower than the drand48 family,
but it can produce full-width 64-bit random values not only 48-bit,
and it should be much more trustworthy. It's likely to be noticeably
faster than the platform's random(), depending on which platform you
are thinking about; and we can have non-global state vectors easily,
unlike with random(). It is not cryptographically strong, but neither
are the functions it replaces.
Fabien Coelho, reviewed by Dean Rasheed, Aleksander Alekseev, and myself
Discussion: https://postgr.es/m/alpine.DEB.2.22.394.2105241211230.165418@pseudo
Diffstat (limited to 'src/bin/pgbench')
| -rw-r--r-- | src/bin/pgbench/pgbench.c | 122 | ||||
| -rw-r--r-- | src/bin/pgbench/t/001_pgbench_with_server.pl | 23 |
2 files changed, 62 insertions, 83 deletions
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c index c12b6f06159..ea9639984cf 100644 --- a/src/bin/pgbench/pgbench.c +++ b/src/bin/pgbench/pgbench.c @@ -59,6 +59,7 @@ #include "common/int.h" #include "common/logging.h" +#include "common/pg_prng.h" #include "common/string.h" #include "common/username.h" #include "fe_utils/cancel.h" @@ -350,16 +351,8 @@ typedef struct StatsData */ pg_time_usec_t epoch_shift; -/* - * Struct to keep random state. - */ -typedef struct RandomState -{ - unsigned short xseed[3]; -} RandomState; - /* Various random sequences are initialized from this one. */ -static RandomState base_random_sequence; +static pg_prng_state base_random_sequence; /* Synchronization barrier for start and connection */ static THREAD_BARRIER_T barrier; @@ -461,7 +454,7 @@ typedef struct * Separate randomness for each client. This is used for random functions * PGBENCH_RANDOM_* during the execution of the script. */ - RandomState cs_func_rs; + pg_prng_state cs_func_rs; int use_file; /* index in sql_script for this client */ int command; /* command number in script */ @@ -498,9 +491,9 @@ typedef struct * random state to make all of them independent of each other and * therefore deterministic at the thread level. */ - RandomState ts_choose_rs; /* random state for selecting a script */ - RandomState ts_throttle_rs; /* random state for transaction throttling */ - RandomState ts_sample_rs; /* random state for log sampling */ + pg_prng_state ts_choose_rs; /* random state for selecting a script */ + pg_prng_state ts_throttle_rs; /* random state for transaction throttling */ + pg_prng_state ts_sample_rs; /* random state for log sampling */ int64 throttle_trigger; /* previous/next throttling (us) */ FILE *logfile; /* where to log, or NULL */ @@ -898,42 +891,28 @@ strtodouble(const char *str, bool errorOK, double *dv) } /* - * Initialize a random state struct. + * Initialize a prng state struct. * * We derive the seed from base_random_sequence, which must be set up already. */ static void -initRandomState(RandomState *random_state) -{ - random_state->xseed[0] = (unsigned short) - (pg_jrand48(base_random_sequence.xseed) & 0xFFFF); - random_state->xseed[1] = (unsigned short) - (pg_jrand48(base_random_sequence.xseed) & 0xFFFF); - random_state->xseed[2] = (unsigned short) - (pg_jrand48(base_random_sequence.xseed) & 0xFFFF); +initRandomState(pg_prng_state *state) +{ + pg_prng_seed(state, pg_prng_uint64(&base_random_sequence)); } + /* - * Random number generator: uniform distribution from min to max inclusive. + * random number generator: uniform distribution from min to max inclusive. * * Although the limits are expressed as int64, you can't generate the full * int64 range in one call, because the difference of the limits mustn't - * overflow int64. In practice it's unwise to ask for more than an int32 - * range, because of the limited precision of pg_erand48(). + * overflow int64. This is not checked. */ static int64 -getrand(RandomState *random_state, int64 min, int64 max) +getrand(pg_prng_state *state, int64 min, int64 max) { - /* - * Odd coding is so that min and max have approximately the same chance of - * being selected as do numbers between them. - * - * pg_erand48() is thread-safe and concurrent, which is why we use it - * rather than random(), which in glibc is non-reentrant, and therefore - * protected by a mutex, and therefore a bottleneck on machines with many - * CPUs. - */ - return min + (int64) ((max - min + 1) * pg_erand48(random_state->xseed)); + return min + (int64) pg_prng_uint64_range(state, 0, max - min); } /* @@ -942,7 +921,7 @@ getrand(RandomState *random_state, int64 min, int64 max) * value is exp(-parameter). */ static int64 -getExponentialRand(RandomState *random_state, int64 min, int64 max, +getExponentialRand(pg_prng_state *state, int64 min, int64 max, double parameter) { double cut, @@ -952,8 +931,8 @@ getExponentialRand(RandomState *random_state, int64 min, int64 max, /* abort if wrong parameter, but must really be checked beforehand */ Assert(parameter > 0.0); cut = exp(-parameter); - /* erand in [0, 1), uniform in (0, 1] */ - uniform = 1.0 - pg_erand48(random_state->xseed); + /* pg_prng_double value in [0, 1), uniform in (0, 1] */ + uniform = 1.0 - pg_prng_double(state); /* * inner expression in (cut, 1] (if parameter > 0), rand in [0, 1) @@ -966,7 +945,7 @@ getExponentialRand(RandomState *random_state, int64 min, int64 max, /* random number generator: gaussian distribution from min to max inclusive */ static int64 -getGaussianRand(RandomState *random_state, int64 min, int64 max, +getGaussianRand(pg_prng_state *state, int64 min, int64 max, double parameter) { double stdev; @@ -990,13 +969,13 @@ getGaussianRand(RandomState *random_state, int64 min, int64 max, do { /* - * pg_erand48 generates [0,1), but for the basic version of the + * pg_prng_double generates [0, 1), but for the basic version of the * Box-Muller transform the two uniformly distributed random numbers - * are expected in (0, 1] (see + * are expected to be in (0, 1] (see * https://en.wikipedia.org/wiki/Box-Muller_transform) */ - double rand1 = 1.0 - pg_erand48(random_state->xseed); - double rand2 = 1.0 - pg_erand48(random_state->xseed); + double rand1 = 1.0 - pg_prng_double(state); + double rand2 = 1.0 - pg_prng_double(state); /* Box-Muller basic form transform */ double var_sqrt = sqrt(-2.0 * log(rand1)); @@ -1026,7 +1005,7 @@ getGaussianRand(RandomState *random_state, int64 min, int64 max, * not be one. */ static int64 -getPoissonRand(RandomState *random_state, double center) +getPoissonRand(pg_prng_state *state, double center) { /* * Use inverse transform sampling to generate a value > 0, such that the @@ -1034,8 +1013,8 @@ getPoissonRand(RandomState *random_state, double center) */ double uniform; - /* erand in [0, 1), uniform in (0, 1] */ - uniform = 1.0 - pg_erand48(random_state->xseed); + /* pg_prng_double value in [0, 1), uniform in (0, 1] */ + uniform = 1.0 - pg_prng_double(state); return (int64) (-log(uniform) * center + 0.5); } @@ -1048,7 +1027,7 @@ getPoissonRand(RandomState *random_state, double center) * This works for s > 1.0, but may perform badly for s very close to 1.0. */ static int64 -computeIterativeZipfian(RandomState *random_state, int64 n, double s) +computeIterativeZipfian(pg_prng_state *state, int64 n, double s) { double b = pow(2.0, s - 1.0); double x, @@ -1063,8 +1042,8 @@ computeIterativeZipfian(RandomState *random_state, int64 n, double s) while (true) { /* random variates */ - u = pg_erand48(random_state->xseed); - v = pg_erand48(random_state->xseed); + u = pg_prng_double(state); + v = pg_prng_double(state); x = floor(pow(u, -1.0 / (s - 1.0))); @@ -1078,14 +1057,14 @@ computeIterativeZipfian(RandomState *random_state, int64 n, double s) /* random number generator: zipfian distribution from min to max inclusive */ static int64 -getZipfianRand(RandomState *random_state, int64 min, int64 max, double s) +getZipfianRand(pg_prng_state *state, int64 min, int64 max, double s) { int64 n = max - min + 1; /* abort if parameter is invalid */ Assert(MIN_ZIPFIAN_PARAM <= s && s <= MAX_ZIPFIAN_PARAM); - return min - 1 + computeIterativeZipfian(random_state, n, s); + return min - 1 + computeIterativeZipfian(state, n, s); } /* @@ -1142,7 +1121,7 @@ getHashMurmur2(int64 val, uint64 seed) * For small sizes, this generates each of the (size!) possible permutations * of integers in the range [0, size) with roughly equal probability. Once * the size is larger than 20, the number of possible permutations exceeds the - * number of distinct states of the internal pseudorandom number generators, + * number of distinct states of the internal pseudorandom number generator, * and so not all possible permutations can be generated, but the permutations * chosen should continue to give the appearance of being random. * @@ -1152,8 +1131,8 @@ getHashMurmur2(int64 val, uint64 seed) static int64 permute(const int64 val, const int64 isize, const int64 seed) { - RandomState random_state1; - RandomState random_state2; + /* using a high-end PRNG is probably overkill */ + pg_prng_state state; uint64 size; uint64 v; int masklen; @@ -1163,14 +1142,8 @@ permute(const int64 val, const int64 isize, const int64 seed) if (isize < 2) return 0; /* nothing to permute */ - /* Initialize a pair of random states using the seed */ - random_state1.xseed[0] = seed & 0xFFFF; - random_state1.xseed[1] = (seed >> 16) & 0xFFFF; - random_state1.xseed[2] = (seed >> 32) & 0xFFFF; - - random_state2.xseed[0] = (((uint64) seed) >> 48) & 0xFFFF; - random_state2.xseed[1] = seed & 0xFFFF; - random_state2.xseed[2] = (seed >> 16) & 0xFFFF; + /* Initialize prng state using the seed */ + pg_prng_seed(&state, (uint64) seed); /* Computations are performed on unsigned values */ size = (uint64) isize; @@ -1216,8 +1189,8 @@ permute(const int64 val, const int64 isize, const int64 seed) t; /* Random multiply (by an odd number), XOR and rotate of lower half */ - m = (uint64) getrand(&random_state1, 0, mask) | 1; - r = (uint64) getrand(&random_state2, 0, mask); + m = (pg_prng_uint64(&state) & mask) | 1; + r = pg_prng_uint64(&state) & mask; if (v <= mask) { v = ((v * m) ^ r) & mask; @@ -1225,8 +1198,8 @@ permute(const int64 val, const int64 isize, const int64 seed) } /* Random multiply (by an odd number), XOR and rotate of upper half */ - m = (uint64) getrand(&random_state1, 0, mask) | 1; - r = (uint64) getrand(&random_state2, 0, mask); + m = (pg_prng_uint64(&state) & mask) | 1; + r = pg_prng_uint64(&state) & mask; t = size - 1 - v; if (t <= mask) { @@ -1236,7 +1209,7 @@ permute(const int64 val, const int64 isize, const int64 seed) } /* Random offset */ - r = (uint64) getrand(&random_state2, 0, size - 1); + r = pg_prng_uint64_range(&state, 0, size - 1); v = (v + r) % size; } @@ -3831,7 +3804,7 @@ doLog(TState *thread, CState *st, * to the random sample. */ if (sample_rate != 0.0 && - pg_erand48(thread->ts_sample_rs.xseed) > sample_rate) + pg_prng_double(&thread->ts_sample_rs) > sample_rate) return; /* should we aggregate the results or not? */ @@ -5770,12 +5743,11 @@ set_random_seed(const char *seed) if (seed != NULL) pg_log_info("setting random seed to %llu", (unsigned long long) iseed); + random_seed = iseed; - /* Fill base_random_sequence with low-order bits of seed */ - base_random_sequence.xseed[0] = iseed & 0xFFFF; - base_random_sequence.xseed[1] = (iseed >> 16) & 0xFFFF; - base_random_sequence.xseed[2] = (iseed >> 32) & 0xFFFF; + /* Initialize base_random_sequence using seed */ + pg_prng_seed(&base_random_sequence, (uint64) iseed); return true; } @@ -6449,9 +6421,7 @@ main(int argc, char **argv) /* set default seed for hash functions */ if (lookupVariable(&state[0], "default_seed") == NULL) { - uint64 seed = - ((uint64) pg_jrand48(base_random_sequence.xseed) & 0xFFFFFFFF) | - (((uint64) pg_jrand48(base_random_sequence.xseed) & 0xFFFFFFFF) << 32); + uint64 seed = pg_prng_uint64(&base_random_sequence); for (i = 0; i < nclients; i++) if (!putVariableInt(&state[i], "startup", "default_seed", (int64) seed)) diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl index 69ffa595dd4..97ed6c71edf 100644 --- a/src/bin/pgbench/t/001_pgbench_with_server.pl +++ b/src/bin/pgbench/t/001_pgbench_with_server.pl @@ -369,7 +369,6 @@ $node->append_conf('postgresql.conf', $node->reload; # test expressions -# command 1..3 and 23 depend on random seed which is used to call srandom. $node->pgbench( '--random-seed=5432 -t 1 -Dfoo=-10.1 -Dbla=false -Di=+3 -Dn=null -Dt=t -Df=of -Dd=1.0', 0, @@ -378,9 +377,9 @@ $node->pgbench( qr{setting random seed to 5432\b}, # After explicit seeding, the four random checks (1-3,20) are - # deterministic - qr{command=1.: int 13\b}, # uniform random - qr{command=2.: int 116\b}, # exponential random + # deterministic; but see also magic values in checks 111,113. + qr{command=1.: int 17\b}, # uniform random + qr{command=2.: int 104\b}, # exponential random qr{command=3.: int 1498\b}, # gaussian random qr{command=4.: int 4\b}, qr{command=5.: int 5\b}, @@ -394,7 +393,7 @@ $node->pgbench( qr{command=15.: double 15\b}, qr{command=16.: double 16\b}, qr{command=17.: double 17\b}, - qr{command=20.: int 1\b}, # zipfian random + qr{command=20.: int 3\b}, # zipfian random qr{command=21.: double -27\b}, qr{command=22.: double 1024\b}, qr{command=23.: double 1\b}, @@ -448,6 +447,7 @@ $node->pgbench( qr{command=109.: boolean true\b}, qr{command=110.: boolean true\b}, qr{command=111.: boolean true\b}, + qr{command=113.: boolean true\b}, ], 'pgbench expressions', { @@ -591,8 +591,17 @@ SELECT :v0, :v1, :v2, :v3; \set t debug(0 <= :p and :p < :size and :p = permute(:v + :size, :size) and :p <> permute(:v + 1, :size)) -- actual values \set t debug(permute(:v, 1) = 0) -\set t debug(permute(0, 2, 5432) = 0 and permute(1, 2, 5432) = 1 and \ - permute(0, 2, 5435) = 1 and permute(1, 2, 5435) = 0) +\set t debug(permute(0, 2, 5431) = 0 and permute(1, 2, 5431) = 1 and \ + permute(0, 2, 5433) = 1 and permute(1, 2, 5433) = 0) +-- check permute's portability across architectures +\set size debug(:max - 10) +\set t debug(permute(:size-1, :size, 5432) = 520382784483822430 and \ + permute(:size-2, :size, 5432) = 1143715004660802862 and \ + permute(:size-3, :size, 5432) = 447293596416496998 and \ + permute(:size-4, :size, 5432) = 916527772266572956 and \ + permute(:size-5, :size, 5432) = 2763809008686028849 and \ + permute(:size-6, :size, 5432) = 8648551549198294572 and \ + permute(:size-7, :size, 5432) = 4542876852200565125) } }); |
