514 lines
12 KiB
C
514 lines
12 KiB
C
/*
|
|
* Sccsid: @(#)rnd.c 9.1.1.9 9/7/95 16:24:04
|
|
*
|
|
* RANDOM.C -- Implements Park & Miller's "Minimum Standard" RNG
|
|
*
|
|
* (Reference: CACM, Oct 1988, pp 1192-1201)
|
|
*
|
|
* NextRand: Computes next random integer
|
|
* UnifInt: Yields an long uniformly distributed between given bounds
|
|
* UnifReal: ields a real uniformly distributed between given bounds
|
|
* Exponential: Yields a real exponentially distributed with given mean
|
|
*
|
|
*/
|
|
|
|
#include "config.h"
|
|
#include <stdio.h>
|
|
#include <math.h>
|
|
#include "dss.h"
|
|
#include "rnd.h"
|
|
|
|
char *env_config PROTO((char *tag, char *dflt));
|
|
#define PI (3.141597)
|
|
|
|
|
|
/******************************************************************
|
|
|
|
NextRand: Computes next random integer
|
|
|
|
*******************************************************************/
|
|
|
|
/*
|
|
* long NextRand( long nSeed )
|
|
*/
|
|
long
|
|
NextRand(long nSeed)
|
|
|
|
/*
|
|
* nSeed is the previous random number; the returned value is the
|
|
* next random number. The routine generates all numbers in the
|
|
* range 1 .. nM-1.
|
|
*/
|
|
|
|
{
|
|
|
|
/*
|
|
* The routine returns (nSeed * nA) mod nM, where nA (the
|
|
* multiplier) is 16807, and nM (the modulus) is
|
|
* 2147483647 = 2^31 - 1.
|
|
*
|
|
* nM is prime and nA is a primitive element of the range 1..nM-1.
|
|
* This * means that the map nSeed = (nSeed*nA) mod nM, starting
|
|
* from any nSeed in 1..nM-1, runs through all elements of 1..nM-1
|
|
* before repeating. It never hits 0 or nM.
|
|
*
|
|
* To compute (nSeed * nA) mod nM without overflow, use the
|
|
* following trick. Write nM as nQ * nA + nR, where nQ = nM / nA
|
|
* and nR = nM % nA. (For nM = 2147483647 and nA = 16807,
|
|
* get nQ = 127773 and nR = 2836.) Write nSeed as nU * nQ + nV,
|
|
* where nU = nSeed / nQ and nV = nSeed % nQ. Then we have:
|
|
*
|
|
* nM = nA * nQ + nR nQ = nM / nA nR < nA < nQ
|
|
*
|
|
* nSeed = nU * nQ + nV nU = nSeed / nQ nV < nU
|
|
*
|
|
* Since nA < nQ, we have nA*nQ < nM < nA*nQ + nA < nA*nQ + nQ,
|
|
* i.e., nM/nQ = nA. This gives bounds on nU and nV as well:
|
|
* nM > nSeed => nM/nQ * >= nSeed/nQ => nA >= nU ( > nV ).
|
|
*
|
|
* Using ~ to mean "congruent mod nM" this gives:
|
|
*
|
|
* nA * nSeed ~ nA * (nU*nQ + nV)
|
|
*
|
|
* ~ nA*nU*nQ + nA*nV
|
|
*
|
|
* ~ nU * (-nR) + nA*nV (as nA*nQ ~ -nR)
|
|
*
|
|
* Both products in the last sum can be computed without overflow
|
|
* (i.e., both have absolute value < nM) since nU*nR < nA*nQ < nM,
|
|
* and nA*nV < nA*nQ < nM. Since the two products have opposite
|
|
* sign, their sum lies between -(nM-1) and +(nM-1). If
|
|
* non-negative, it is the answer (i.e., it's congruent to
|
|
* nA*nSeed and lies between 0 and nM-1). Otherwise adding nM
|
|
* yields a number still congruent to nA*nSeed, but now between
|
|
* 0 and nM-1, so that's the answer.
|
|
*/
|
|
|
|
long nU, nV;
|
|
|
|
nU = nSeed / nQ;
|
|
nV = nSeed - nQ * nU; /* i.e., nV = nSeed % nQ */
|
|
nSeed = nA * nV - nU * nR;
|
|
if (nSeed < 0)
|
|
nSeed += nM;
|
|
return (nSeed);
|
|
}
|
|
|
|
/******************************************************************
|
|
|
|
UnifInt: Yields an long uniformly distributed between given bounds
|
|
|
|
*******************************************************************/
|
|
|
|
/*
|
|
* long UnifInt( long nLow, long nHigh, long nStream )
|
|
*/
|
|
long
|
|
UnifInt(long nLow, long nHigh, long nStream)
|
|
|
|
/*
|
|
* Returns an integer uniformly distributed between nLow and nHigh,
|
|
* including * the endpoints. nStream is the random number stream.
|
|
* Stream 0 is used if nStream is not in the range 0..MAX_STREAM.
|
|
*/
|
|
|
|
{
|
|
double dRange;
|
|
long nTemp;
|
|
|
|
if (nStream < 0 || nStream > MAX_STREAM)
|
|
nStream = 0;
|
|
if (nLow == nHigh)
|
|
return (nLow);
|
|
if (nLow > nHigh)
|
|
{
|
|
nTemp = nLow;
|
|
nLow = nHigh;
|
|
nHigh = nTemp;
|
|
}
|
|
dRange = (double) (nHigh - nLow + 1);
|
|
Seed[nStream] = NextRand(Seed[nStream]);
|
|
nTemp = (long) (((double) Seed[nStream] / dM) * (dRange));
|
|
return (nLow + nTemp);
|
|
}
|
|
|
|
|
|
|
|
/******************************************************************
|
|
|
|
UnifReal: Yields a real uniformly distributed between given bounds
|
|
|
|
*******************************************************************/
|
|
|
|
/*
|
|
* double UnifReal( double dLow, double dHigh, long nStream )
|
|
*/
|
|
double
|
|
UnifReal(double dLow, double dHigh, long nStream)
|
|
|
|
/*
|
|
* Returns a double uniformly distributed between dLow and dHigh,
|
|
* excluding the endpoints. nStream is the random number stream.
|
|
* Stream 0 is used if nStream is not in the range 0..MAX_STREAM.
|
|
*/
|
|
|
|
{
|
|
double dTemp;
|
|
|
|
if (nStream < 0 || nStream > MAX_STREAM)
|
|
nStream = 0;
|
|
if (dLow == dHigh)
|
|
return (dLow);
|
|
if (dLow > dHigh)
|
|
{
|
|
dTemp = dLow;
|
|
dLow = dHigh;
|
|
dHigh = dTemp;
|
|
}
|
|
Seed[nStream] = NextRand(Seed[nStream]);
|
|
dTemp = ((double) Seed[nStream] / dM) * (dHigh - dLow);
|
|
return (dLow + dTemp);
|
|
}
|
|
|
|
|
|
|
|
/******************************************************************%
|
|
|
|
Exponential: Yields a real exponentially distributed with given mean
|
|
|
|
*******************************************************************/
|
|
|
|
/*
|
|
* double Exponential( double dMean, long nStream )
|
|
*/
|
|
double
|
|
Exponential(double dMean, long nStream)
|
|
|
|
/*
|
|
* Returns a double uniformly distributed with mean dMean.
|
|
* 0.0 is returned iff dMean <= 0.0. nStream is the random number
|
|
* stream. Stream 0 is used if nStream is not in the range
|
|
* 0..MAX_STREAM.
|
|
*/
|
|
|
|
{
|
|
double dTemp;
|
|
|
|
if (nStream < 0 || nStream > MAX_STREAM)
|
|
nStream = 0;
|
|
if (dMean <= 0.0)
|
|
return (0.0);
|
|
|
|
Seed[nStream] = NextRand(Seed[nStream]);
|
|
dTemp = (double) Seed[nStream] / dM; /* unif between 0..1 */
|
|
return (-dMean * log(1.0 - dTemp));
|
|
}
|
|
|
|
/*
|
|
* Extensions to dbgen for generation of skewed data.
|
|
* Surajit Chaudhuri, Vivek Narasayya.
|
|
* (Jan '99)
|
|
*/
|
|
|
|
#define EPSILON (0.0001)
|
|
|
|
/*
|
|
* Round the number x to the nearest integer.
|
|
*/
|
|
double
|
|
round(double x)
|
|
{
|
|
double fx = floor(x);
|
|
double cx = ceil(x);
|
|
|
|
if((x-fx) <= (cx-x))
|
|
return fx;
|
|
else
|
|
return cx;
|
|
}
|
|
|
|
/*
|
|
* Perform binary search to find D (number of distinct values for given n, zipf)
|
|
*
|
|
*/
|
|
double
|
|
SolveForMultipler(long n, double zipf)
|
|
{
|
|
long lowD = 1;
|
|
long highD = n;
|
|
long mid;
|
|
double sumMult;
|
|
double nRowsPrime;
|
|
long numRows;
|
|
long i;
|
|
|
|
while ( (highD - lowD) > 1)
|
|
{
|
|
mid = (highD + lowD) / 2;
|
|
sumMult = 0.0;
|
|
|
|
/* compute multiplier */
|
|
for(i=1;i<=mid;i++)
|
|
{
|
|
sumMult += 1.0 / (pow((double) i, zipf));
|
|
}
|
|
|
|
/* compute number of rows generated for this mulitpler */
|
|
numRows = 0;
|
|
for(i=1;i<=mid;i++)
|
|
{
|
|
nRowsPrime = ((n / sumMult) / pow((double) i, zipf));
|
|
numRows += (long) round(nRowsPrime);
|
|
}
|
|
if(((double)(n-numRows))/n < EPSILON)
|
|
{
|
|
break;
|
|
}
|
|
|
|
if(numRows > n)
|
|
{
|
|
/* we overestimated numRows -- we need fewer distinct values */
|
|
highD = mid;
|
|
}
|
|
else
|
|
{
|
|
/* underestimation of numRows -- need lower multiplier */
|
|
lowD = mid;
|
|
}
|
|
}
|
|
return sumMult;
|
|
}
|
|
|
|
|
|
double
|
|
GetMultiplier(long n, double zipf)
|
|
{
|
|
double multiplier;
|
|
double term;
|
|
long i;
|
|
|
|
if(zipf == 0.0)
|
|
multiplier = n;
|
|
else if(zipf==1.0)
|
|
multiplier = log(n) + 0.577 ;
|
|
else if(zipf==2.0)
|
|
multiplier = (PI*PI)/6.0;
|
|
else if(zipf==3.0)
|
|
multiplier = 1.202;
|
|
else if(zipf==4.0)
|
|
multiplier = (PI*PI*PI*PI)/90.0;
|
|
else
|
|
{
|
|
|
|
/* compute multiplier (must be within given bounds) */
|
|
multiplier = 0;
|
|
for(i=1;i<=n;i++)
|
|
{
|
|
term = 1.0 / pow((double)i, zipf);
|
|
multiplier += term;
|
|
|
|
/* if later terms add very little we can stop */
|
|
if(term < EPSILON)
|
|
break;
|
|
}
|
|
}
|
|
return multiplier;
|
|
}
|
|
|
|
/******************************************************************%
|
|
|
|
RANDOM: Yields a random number from a Zipfian distribution with
|
|
a specified skewVal. Skew is an integer in the range 0..3
|
|
|
|
*******************************************************************/
|
|
|
|
/*
|
|
*
|
|
*/
|
|
long
|
|
SkewInt(long nLow, long nHigh, long nStream, double skewVal, long n)
|
|
/*
|
|
*
|
|
*/
|
|
{
|
|
double zipf;
|
|
double dRange;
|
|
long nTemp;
|
|
double multiplier;
|
|
double Czn;
|
|
long numDistinctValuesGenerated;
|
|
|
|
|
|
/* check for validity of skewVal */
|
|
if(skewVal < 0 || skewVal > 5)
|
|
zipf = 0; /* assume uniform */
|
|
else if(skewVal==5.0)
|
|
{
|
|
/* special case */
|
|
/* check if any values have been generated for this column */
|
|
if(NumDistinctValuesGenerated[nStream]==0)
|
|
{
|
|
/* pick a skew value to be used for this column*/
|
|
zipf = (int) UnifInt(0, 4, 0);
|
|
ColumnSkewValue[nStream] = zipf;
|
|
}
|
|
else
|
|
{
|
|
/* column skew value has been initialized before */
|
|
zipf = ColumnSkewValue[nStream];
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* skewVal is between 0 and 4 */
|
|
zipf = skewVal;
|
|
}
|
|
|
|
/* If no values have been generated for this stream as yet, get multiplier */
|
|
if(NumDistinctValuesGenerated[nStream]==0)
|
|
{
|
|
Multiplier[nStream] = GetMultiplier(n, zipf);
|
|
}
|
|
multiplier = Multiplier[nStream];
|
|
|
|
/*
|
|
* Check how many copies of the current value
|
|
* have already been generated for this stream.
|
|
* If we have generated enough, proceed to
|
|
* next value, and decide how many copies of
|
|
* this value should be generated.
|
|
*/
|
|
|
|
if(CurrentValueCounter[nStream] == CurrentValueTarget[nStream])
|
|
{
|
|
/* proceed to next value */
|
|
if (nStream < 0 || nStream > MAX_STREAM)
|
|
nStream = 0;
|
|
if (nLow == nHigh)
|
|
nTemp = nLow;
|
|
else
|
|
{
|
|
if (nLow > nHigh)
|
|
{
|
|
nTemp = nLow;
|
|
nLow = nHigh;
|
|
nHigh = nTemp;
|
|
}
|
|
dRange = (double) (nHigh - nLow + 1);
|
|
Seed[nStream] = NextRand(Seed[nStream]);
|
|
nTemp = (long) (((double) Seed[nStream] / dM) * (dRange));
|
|
nTemp += nLow;
|
|
CurrentValue[nStream] = nTemp;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* return another copy of current value */
|
|
nTemp = CurrentValue[nStream];
|
|
CurrentValueCounter[nStream]++;
|
|
return nTemp;
|
|
}
|
|
|
|
/*
|
|
* check how many distinct values for this column
|
|
* have already been generated.
|
|
*/
|
|
numDistinctValuesGenerated = NumDistinctValuesGenerated[nStream] + 1;
|
|
|
|
if(n<1)
|
|
n = (nHigh - nLow + 1);
|
|
|
|
/*
|
|
* zipf is guaranteed to be in the range 0..4
|
|
* pick the multiplier
|
|
if(zipf == 0)
|
|
multiplier = n;
|
|
else if(zipf==1)
|
|
multiplier = log(n) + 0.577 ;
|
|
else if(zipf==2)
|
|
multiplier = (PI*PI)/6.0;
|
|
else if(zipf==3)
|
|
multiplier = 1.202;
|
|
else if(zipf==4)
|
|
multiplier = (PI*PI*PI*PI)/90.0;
|
|
*/
|
|
|
|
Czn = n/multiplier;
|
|
CurrentValueTarget[nStream]=
|
|
(long) (Czn/pow((double)numDistinctValuesGenerated, zipf));
|
|
/* ensure that there is at least one value*/
|
|
CurrentValueTarget[nStream] = (CurrentValueTarget[nStream] < 1) ? 1: CurrentValueTarget[nStream];
|
|
CurrentValueCounter[nStream] = 1;
|
|
NumDistinctValuesGenerated[nStream]++;
|
|
|
|
return nTemp;
|
|
}
|
|
|
|
|
|
/******************************************************************%
|
|
|
|
Returns the number of copies of the next value
|
|
in the given distribution. Skew is an integer in the range 0..4
|
|
|
|
*******************************************************************/
|
|
/*
|
|
*
|
|
*/
|
|
long
|
|
SkewIntCount(long nStream, double skewVal, long n)
|
|
/*
|
|
*
|
|
*/
|
|
{
|
|
double zipf;
|
|
double multiplier;
|
|
double Czn;
|
|
long numDistinctValuesGenerated;
|
|
long numCopies;
|
|
|
|
|
|
/* check for validity of skewVal */
|
|
if(skewVal < 0 || skewVal > 4)
|
|
zipf = 0; /* assume uniform */
|
|
else if(skewVal==5.0)
|
|
{
|
|
/* column skew value has been initialized before */
|
|
zipf = ColumnSkewValue[nStream];
|
|
}
|
|
else
|
|
{
|
|
/* skewVal is between 0 and 4 */
|
|
zipf = skewVal;
|
|
}
|
|
|
|
/*
|
|
* check how many distinct values for this column
|
|
* have already been generated.
|
|
*/
|
|
numDistinctValuesGenerated = NumDistinctValuesGenerated[nStream] + 1;
|
|
|
|
multiplier = Multiplier[nStream];
|
|
/*
|
|
* zipf is guaranteed to be in the range 0..4
|
|
* pick the multiplier
|
|
if(zipf == 0)
|
|
multiplier = n;
|
|
else if(zipf==1)
|
|
multiplier = log(n) + 0.577 ;
|
|
else if(zipf==2)
|
|
multiplier = (PI*PI)/6.0;
|
|
else if(zipf==3)
|
|
multiplier = 1.202;
|
|
else if(zipf==4)
|
|
multiplier = (PI*PI*PI*PI)/90.0;
|
|
*/
|
|
|
|
Czn = n/multiplier;
|
|
numCopies=
|
|
(long) (Czn/pow((double)numDistinctValuesGenerated, zipf));
|
|
/* ensure that there is at least one value*/
|
|
CurrentValueTarget[nStream] = (CurrentValueTarget[nStream] < 1) ? 1: CurrentValueTarget[nStream];
|
|
NumDistinctValuesGenerated[nStream]++;
|
|
return numCopies;
|
|
}
|