Improved uniformity of RandomPCG::randf.

When generating single precision floats, Godot casts a uint32_t to float,
causing uniformity loss.

This new randf, inspired by T. R. Campbell's random_real, samples the output
of rand as the fraction part of an infinite binary number, with some tricks
to reduce ops and branching. This method provides "good enough" uniformity at
decent speed, for floats greater than 2^-64. Smaller numbers are floored to 0.
This commit is contained in:
toasteater 2019-03-18 11:41:16 +08:00
parent df7d3708c5
commit 5f1b9a2313
2 changed files with 62 additions and 8 deletions

View File

@ -44,13 +44,9 @@ void RandomPCG::randomize() {
}
double RandomPCG::random(double p_from, double p_to) {
unsigned int r = rand();
double ret = (double)r / (double)RANDOM_MAX;
return (ret) * (p_to - p_from) + p_from;
return randd() * (p_to - p_from) + p_from;
}
float RandomPCG::random(float p_from, float p_to) {
unsigned int r = rand();
float ret = (float)r / (float)RANDOM_MAX;
return (ret) * (p_to - p_from) + p_from;
return randf() * (p_to - p_from) + p_from;
}

View File

@ -35,6 +35,28 @@
#include "thirdparty/misc/pcg.h"
#if defined(__GNUC__) || (_llvm_has_builtin(__builtin_clz))
#define CLZ32(x) __builtin_clz(x)
#elif defined(_MSC_VER)
#include "intrin.h"
static int __bsr_clz32(uint32_t x) {
unsigned long index;
_BitScanReverse(&index, x);
return 31 - index;
}
#define CLZ32(x) __bsr_clz32(x)
#else
#endif
#if defined(__GNUC__) || (_llvm_has_builtin(__builtin_ldexp) && _llvm_has_builtin(__builtin_ldexpf))
#define LDEXP(s, e) __builtin_ldexp(s, e)
#define LDEXPF(s, e) __builtin_ldexpf(s, e)
#else
#include "math.h"
#define LDEXP(s, e) ldexp(s, e)
#define LDEXPF(s, e) ldexp(s, e)
#endif
class RandomPCG {
pcg32_random_t pcg;
uint64_t current_seed; // seed with this to get the same state
@ -58,8 +80,44 @@ public:
current_seed = pcg.state;
return pcg32_random_r(&pcg);
}
_FORCE_INLINE_ double randd() { return (double)rand() / (double)RANDOM_MAX; }
_FORCE_INLINE_ float randf() { return (float)rand() / (float)RANDOM_MAX; }
// Obtaining floating point numbers in [0, 1] range with "good enough" uniformity.
// These functions sample the output of rand() as the fraction part of an infinite binary number,
// with some tricks applied to reduce ops and branching:
// 1. Instead of shifting to the first 1 and connecting random bits, we simply set the MSB and LSB to 1.
// Provided that the RNG is actually uniform bit by bit, this should have the exact same effect.
// 2. In order to compensate for exponent info loss, we count zeros from another random number,
// and just add that to the initial offset.
// This has the same probability as counting and shifting an actual bit stream: 2^-n for n zeroes.
// For all numbers above 2^-96 (2^-64 for floats), the functions should be uniform.
// However, all numbers below that threshold are floored to 0.
// The thresholds are chosen to minimize rand() calls while keeping the numbers within a totally subjective quality standard.
// If clz or ldexp isn't available, fall back to bit truncation for performance, sacrificing uniformity.
_FORCE_INLINE_ double randd() {
#if defined(CLZ32)
uint32_t proto_exp_offset = rand();
if (unlikely(proto_exp_offset == 0)) {
return 0;
}
uint64_t significand = (((uint64_t)rand()) << 32) | rand() | 0x8000000000000001U;
return LDEXP((double)significand, -64 - CLZ32(proto_exp_offset));
#else
#pragma message("RandomPCG::randd - intrinsic clz is not available, falling back to bit truncation")
return (double)(((((uint64_t)rand()) << 32) | rand()) & 0x1FFFFFFFFFFFFFU) / (double)0x1FFFFFFFFFFFFFU;
#endif
}
_FORCE_INLINE_ float randf() {
#if defined(CLZ32)
uint32_t proto_exp_offset = rand();
if (unlikely(proto_exp_offset == 0)) {
return 0;
}
return LDEXPF((float)(rand() | 0x80000001), -32 - CLZ32(proto_exp_offset));
#else
#pragma message("RandomPCG::randf - intrinsic clz is not available, falling back to bit truncation")
return (float)(rand() & 0xFFFFFF) / (float)0xFFFFFF;
#endif
}
double random(double p_from, double p_to);
float random(float p_from, float p_to);