Probability Distributions

steve_bank · Jun 20, 2024

For a distribution such as normal the practical problem is generating random samples for arbitrary means and standard deviations.

That is the quantile function. For a normal distribution the inverse error function is part of the normal quantile. Random normally distrusted numbers have a range of uses for example Monte Carlo methods. Same with the exponential distribution.

Using a factorial table improves speed.

Part of my small math library I have been working on.

I tried numeral integration for the erf() and it was not as acurate as the math tools. The approximation closely matches libraries.

Code:

class RAND_DIST{
    private:

    //URAND random number algorithm
    //URAND A Universal Random Number Generator  by Malcom and Moler
    unsigned long long randll = 1;  //random number
    unsigned long long m = 0x7fffffff;
    //unsigned long long k1 = 2*floor(m*(.5 - sqrt(3)/6.)) + 1;
    //unsigned long long k2 = 8*ceil(m*atan(1.)/8.) + 5;
    unsigned long long k1 = 2.*(m*(.5 - sqrt(3)/6.)) + 1;
    unsigned long long k2 = 8.*(m*(atan(1.)/8.) + 5.);

    public:
    void init(unsigned long long seed);
    int rand_exp(long long n,double *y,double u);
    int rand_nor(long long n,double *y,double u,double sigma);
    int uni_float(int n,double *y,int lo,int hi,int ndp);
    int uni_int(int n,int *y,int lo,int hi);
    double erf(double x);
    double ierf(double x);
};//RAND_DIST

void  RAND_DIST::init(unsigned long long seed){
        randll = seed;
    }//init()

double RAND_DIST::erf(double x){
        double factorial[23] = {
            1.,
            1.,
            2.,
            6.,
            24.,
            120.,
            720.,
            5040.,
            40320.,
            362880.,
            3628800.,
            39916800.,
            479001600.,
            6227020800.,
            87178291200.,
            1307674368000.,
            20922789888000.,
            355687428096000.,
            6402373705728000.,
            121645100408832000.,
            2432902008176640000.,
            51090942171709440000.,
            1124000727777607680000.};

        int i,n = 20;
        double s = 0,k;
        for(i=0;i<23;i++){
            k =   (1./(2*i+1))*pow(-1,i)/factorial[i];
            s += k * pow(x,2*i+1);
            }
        return s*2./sqrt(_PI);
}//erf()

double RAND_DIST::ierf(double x){
    //inverse error function
    if(x==0)return 0;
   //if(x <= -1 || x >= 1.)return 999;
    int k,m,nc = 1000;
    double einv = 0.,q = sqrt(_PI)*x/2.;
    #include "coef.txt"  //c[]
    //double c[nc];
    //for(k=0;k<nc;k++)c[k] = 0;
    //c[0] = 1;
    //for(k=0;k<nc;k++){
        //for(m=0;m<k;m++)
           // c[k]+= (c[m]*c[k-1-m])/((m+1)*(2*m + 1));
      //  }
   for(k=0;k<nc;k++)einv += ((c[k])/(2*k+1))*pow(q,(2*k+1));
    return einv;
}//ierf()

int RAND_DIST::rand_exp(long long n,double *y,double u){
    // array of random numbers from an exonentual distribution
    // p represents random probabilities >0 <1
    if(u <= 0 || n <= 0)return -1;
    double p;
    long long i;
    for(i=0;i<n;i++){
        randll = randll*k2 + k1;
        p = (double)(randll%m+1)/(double)(m+2);
        // quantile - probability to  variable for a given mean
        y[i] = -1.* log(1. - p) * u;
    }
    return 0;
}//rand_exp()


int RAND_DIST::rand_nor(long long n,double *y,double u,double sigma){
    // array of random numbers from a normal distribution
    // URAND random number algorithm
    // p random probabilities >0 <1
    if(n <= 0)return - 1;
    double q,p,k1;
    int i,k,nc = 0;
    q = sqrt(2)*sigma;
    for(i=0;i<n;i++){

        randll = randll*k2 + k1;
        // p >0  <1
        //p = (double(rand())+.01)/double(RAND_MAX + 1);
        p = (double)(randll%m+1e-6)/(double)(m+1e-3);
        p = 2.*p-1.;
        y[i] = u + ierf(p)*q;  //normal qantile
    }
    return 0;
}//rand_nor()

int RAND_DIST::uni_float(int n,double *y,int lo,int hi,int ndp){
    //uniform distribution floats > lo< hi + 1
    if(lo >= hi || n <= 0)return -1;
    int xint,i,nnums = (hi-lo),dp = pow(10,ndp);
    double xdec;
    for(i=0;i<n;i++){
        randll = randll*k2 + k1;
        xdec = (double)(randll%m)/(double)(m+1);
        if(ndp>0){
            xdec = xdec*dp;
            xdec = int(xdec);
            xdec = xdec/dp;
        }
        randll = randll*k2 + k1;
        xint = (randll%m)%nnums + lo;
        y[i] = xint + xdec;
    }
    return 0;
}//uni_float()

int RAND_DIST::uni_int(int n,int *y,int lo,int hi){
    //uniform distribution ints lo -> hi-1
    if(lo >= hi || n <= 0)return -1;
    int i,nnums = hi-lo;
    for(i=0;i<n;i++){
        randll = randll*k2 + k1;
        if(hi < 2)y[i] = (randll%m)%nnums;
        y[i] = (randll%m)%nnums + lo;
    }
    return 0;
}//uni_int(

Swammerdami · Jun 21, 2024

We've done this at least twice before. I still like my version: It needs only 1½ transcendental functions and 1 uniform variate per normal variate. It's terse. Studying it gives insights about the uniform distribution.

Code:

double unif_random(void); // return a uniform variate on (0, 1)

double expon_random(void) // return an exponential variate with mean 1 and std-dev 1
{
         return -log(unif_random());
}

// return a normal variate with mean 0 and std-dev 1
// (mean + std_dev * norm_random()) for a  non-standard variate
double norm_random(void) 
{
         static double r, theta;
         static int iset = 1;
         if (iset ^= 1)
                 return r * sin(theta);
         r = sqrt(2 * expon_random());
         theta = unif_random() * 2 * 3.14159265358979323844;
         return r * cos(theta);
}

steve_bank · Jun 21, 2024

The normal distribution I coded is 6 to 7 sigma depending on the number of samples. To get that I used URAND with a larger max rand number, IOW finer resolution of the probabilities. Using C rand() resulted in +- 2.5 sigma, not good enough for simulations.

The 6 sigma tails are important when doing Monte Carlo simulations.

lpetrich · Jun 21, 2024

What kind of input values?

Discrete
Continuous

What is the range of input values?

Finite
Semi-infinite (usually 0 to +infinity)
Complete infinite (-infinity to +infinity)

A sampling of them:

Binomial
Parameters: total number n, individual probability p
Discrete, finite
$ \displaystyle{ P(k) = \frac{n!}{k! (n-k)!} (1-p)^{n-k} p^k } $

Poisson
Parameters: mean m
Discrete, semi-infinite
$ \displaystyle{ P(k) = \frac{m^k}{k!} e^{-m} } $

Poisson: limit of binomial for n -> oo with m = n*p

Chi-squared
Parameters: number of degrees of freedom n
Continuous, semi-infinite
$ \displaystyle{ P(x) = 2 \frac{(x/2)^{n-1} e^{-x/2}}{\Gamma(n/2)} } $

Normal
Parameters: mean m and standard deviation s
Continuous, infinite
$ \displaystyle{ P(x) = \frac{1}{s\sqrt{2\pi}} e^{(x-m)^2/(2s^2)} } $

Central limit theorem - the normal distribution is a limiting case of many distributions

steve_bank · Jun 21, 2024

Testing the normal and exponential distributions.

I compared the cumulative distributions to the Scilab functions and they are close. They will never exactly match because the numbers are random.

Being symmetrical the median and the median of the normal distribution are the same.

For the exponential distribution the mean and median are not the same. The median = log(2)*mean.
The mean and starboard deviation may appear correct but for the exponential distribution the median = log(2)*mean. t as to be checked.

The methods are not 'mine' or the way I do it. I went through a learning curve, the methods are standard math theory.

Gnuplot script to plot the histogram an cumulative distribution for the normal and exponential,

norm.plt
set term windows background rgb "white" title "DFT" fontscale 1
clear
reset
set key off
set multiplot layout 2,1 columnsfirst

set border 3
# Add a vertical dotted line at x=0 to show centre (mean) of distribution.
set yzeroaxis
# Each bar is4half the (visual) width of its x-range.
set boxwidth 4 absolute
set style fill solid 1.0 noborder
bin_width = 5;
bin_number(x) = floor(x/bin_width)
rounded(x) = bin_width * ( bin_number(x) + 0.5 )
set xrange[*:*]
plot 'norm.dat' using (rounded($1))

1) smooth frequency with boxes

set grid lt 1 lw 1 lc rgb "black" dashtype solid
# signal
set autoscale x
show autoscale
set zeroaxis
set yrange[*:*]
plot 'norm.dat' using 1:2 with lines ls 4 lt -1 lw 3
show grid
unset autoscale y

exp.plt
set term windows background rgb "white" title "DFT" fontscale 1
clear
reset
set key off
set multiplot layout 2,1 columnsfirst

set border 3
# Add a vertical dotted line at x=0 to show centre (mean) of distribution.
set yzeroaxis
# Each bar is4half the (visual) width of its x-range.
set boxwidth 4 absolute
set style fill solid 1.0 noborder
bin_width = 5;
bin_number(x) = floor(x/bin_width)
rounded(x) = bin_width * ( bin_number(x) + 0.5 )
set xrange[0:*]
plot 'exp.dat' using (rounded($1))

1) smooth frequency with boxes

set grid lt 1 lw 1 lc rgb "black" dashtype solid
# signal
set autoscale x
show autoscale
set zeroaxis
set yrange[*:*]
plot 'exp.dat' using 1:2 with lines ls 4 lt -1 lw 3
show grid
unset autoscale y

show grid
unset multiplot

Code:

oid save(int n,string fname, double *x1,double *x2){
    int i;
    FILE *p = fopen(fname.c_str(),"w");
    for(i=0;i<n;i++)fprintf(p,"%.10f\t %.10f\t \n",x1[i],x2[i]);
    fclose(p);
}//save()

int comp_doub_ascend(const void * a, const void * b) {
  if (*(double*)a > *(double*)b) return 1;
  else if (*(double*)a < *(double*)b) return -1;
  else return 0;
}
double cum_distf(int n,double *y,double *cd){
    int i,flag = 0;
    double m=0;
    qsort(y,n,sizeof(double),comp_doub_ascend);
    for(i = 0;i < n;i++){
        cd[i] = 100.* double(i)/double(n-1);
        if(!flag && cd[i] > 50.){
            flag = 1;
            m = y[i];
            }
    }//for
    return m;
}//cum_distf()

double medianf(int n,double *y){
    //y is sorted
    int i ;
    double c;
    for(i=0;i<n;i++){
        c = double(i)/double(n-1);
        if(c >= 0.5)break;
        }
    return y[i];
}//medianf()


void avestd(int n,double *y,double *av,double *std){
    int i;

    double asum = 0, ssum = 0;
    for(i=0;i<n;i++)asum += y[i];
    *av = asum/double(n);
    for(i=0;i<n;i++)ssum += pow(*av-y[i],2);
    *std = sqrt(ssum/double(n));
}//ave()

void minmaxi(int n,int *y,int *min,int *max){
    *max = y[0],*min = y[0];
    //printf("%d   %d\n\n",y[0],y[n-1]);
    int i;
    for(i=0;i<n;i++){
        if(y[i] < *min)*min = y[i];
        if(y[i] > *max)*max = y[i];
    }
}

void rand_nor_test(void){
    cout<<"norm test"<<endl;
    RAND_DIST rd;
    unsigned long long seed = unsigned (time(NULL));
    rd.init(seed);
    double m,u = 100,sigma = 20;
    double ave,std,minf,maxf,sx;
    int n = 1000;
    double y[n],cd[n];
    rd.rand_nor(n,y,u,sigma);
    cum_distf(n,y,cd);
    save(n,"norm.dat",y,cd);
    m = medianf(n,y);
    avestd(n,y,&ave,&std);
    printf("av  %.5f   st  %.5f\n",ave,std);
    minmaxf(n,y,&minf,&maxf);
    sx = (maxf-minf)/std; //data spread in standard deviations
    printf("min %.5f  max %.5f\n",minf,maxf);
    printf("median %.5f\n",m);
    printf("sx  %f\n",sx);
}//rand_nor_test()

void rand_exp_test(void){
    cout<<"exp test"<<endl;
    RAND_DIST rd;
    unsigned long long seed = unsigned (time(NULL));
    rd.init(seed);
    double m,u = 100,sigma = 20;
    double ave,std,minf,maxf,sx;
    int n = 1000;
    double y[n],cd[n];
   rd.rand_exp(n,y,u);
    cum_distf(n,y,cd);
    save(n,"exp.dat",y,cd);
    m = medianf(n,y);
    avestd(n,y,&ave,&std);
    printf("av  %.5f   st  %.5f\n",ave,std);
    minmaxf(n,y,&minf,&maxf);
    sx = (maxf-minf)/std;
    printf("min %.5f  max %.5f\n",minf,maxf);
    printf("expected median %.5f  actual median %.5f\n",log(2)*u,m);
    printf("sx  %f\n",sx);
}//rand_exp_test()

int main() {
    RAND_DIST rd;
    unsigned long long seed = unsigned (time(NULL));
    rd.init(seed);
   rand_nor_test();
   rand_exp_test();
   system("norm.plt");
   system("exp.plt");
    return 0;
}//main()

Swammerdami · Jun 21, 2024

When the topic of probability distributions comes up I cannot resist mentioning the Cauchy distribution, named after its discoverer Augustin-Louis Cauchy. There are comments about it in a thread from 2½ years ago on "The Central Limit Theorem" -- a Theorem the Cauchy distribution does NOT obey.

Here's an image comparing the Cauchy and Gaussian bell-shapes. Does the "bell" of this weird distribution seem innocuous enough?

steve_bank · Jun 21, 2024

The Central Limit Theorem is important in applied samplng statistics. It says the distribution of sample means regardless of the underlying distributed will be normally distributed, or close enough to use normal statistics.

Central limit theorem - Wikipedia

en.wikipedia.org

I know it holds true if the sample size is big enough.

This leads to the Chi^2 distribution another important applied statistic.and confidence intervals.

It is said quitting a statistic without a confidence interval is meaningless.

There used to be plastic circular calculators to do Chi^2 and confidence intervals.Dermig sample size for a given confidence interval.

Chi-squared test - Wikipedia

en.wikipedia.org

Confidence interval - Wikipedia

en.wikipedia.org

A walk down memory lane. At my first job in a small company I was the manufacturing, test, reliability, and quality engineer. At my second job I was a full time reliability engineer.

steve_bank · Jun 22, 2024

Swammerdami said:
We've done this at least twice before. I still like my version: It needs only 1½ transcendental functions and 1 uniform variate per normal variate. It's terse. Studying it gives insights about the uniform distribution.

Code:

double unif_random(void); // return a uniform variate on (0, 1) double expon_random(void) // return an exponential variate with mean 1 and std-dev 1 { return -log(unif_random()); } // return a normal variate with mean 0 and std-dev 1 // (mean + std_dev * norm_random()) for a non-standard variate double norm_random(void) { static double r, theta; static int iset = 1; if (iset ^= 1) return r * sin(theta); r = sqrt(2 * expon_random()); theta = unif_random() * 2 * 3.14159265358979323844; return r * cos(theta); }

Gone around twice?

I have not seen you post these before.

Your exponential function uses the exponential distribution quintuple function and does create a distribution with mean 1 as written. The same as the function I posted so there is no 'going around it again';

There is one issue, your function is technically incorrect. It is -log(1-p) not -log(p).

For p = ,2, -log(p) = 1.6904 -log(1-p) = .2231 different points on the distribution. For generating a sequence of random variables it does not matter, but if you are looking for a specific point on the distribution it is incorrect.

unsigned int seed = time(NULL);
srand(seed);
//random number >0,<1
double unif_rand(void){return double(rand()+1)/(RAND_MAX+2);}

double expon_random(double u) {return -log(1.-unif_rand())*u;}

Make u 1 to get a mean of 1.

The n0rmal function works but it is incomprehensible to me, can you elaborate on how it works or provide a link? Yes, you can shift the normalized distribution to any mean and standard deviation, that is basic textbook theory. Much easier to use the normal quantile.

Your function requires an exponential and uniform distribution, the more standard way requires the inverse error function and a uniform distribution.

You used the exponential quartile -log(1-p)*um why not use the nornal quartile?

You would have to add more code to shift the mean and standard deviation to make it a usable function.

If I combine the inverse error function and the normal quartile using rand(). No transcendental functions.

Derivation of norrnal quartile.

Quantile function of the normal distribution

The Book of Statistical Proofs – a centralized, open and collaboratively edited archive of statistical theorems for the computational sciences

statproofbook.github.io

Code:

double rand_norm(double mean,double sigma){
    #include "coef.txt"  //c[]
    double y,p,ierf,q ;
    int k,m,nc = 1000;
    //p>0,p < 1
    p = (double)(rand() +1)/(RAND_MAX+2);
    p = 2.*p-1.;
    q =  sqrt(_PI)*p/2.;
   for(k=0;k<nc;k++)ierf += ((c[k])/(2*k+1))*pow(q,(2*k+1));
   y = mean + ierf*sqrt(2)*sigma;  //normal quantile
   return y;
}

Swammerdami · Jun 22, 2024

steve_bank said:
Swammerdami said:

...
// return a normal variate with mean 0 and std-dev 1
// (mean + std_dev * norm_random()) for a non-standard variate

Click to expand...

Gone around twice?

I have not seen you post these before.

I did. I may have changed variable names or such. Some of my libraries are available on-line under my real name; I make minor changes to avoid Google hits and thus keep that "firewall" intact!

steve_bank said:
There is one issue, your [exponential] function is technically incorrect. It is -log(1-p) not -log(p).

You seem to admit that the function returns variates correctly, what difference do the internals matter? When p is uniform on (0, 1), it and (1-p) provide exactly equivalent distributions.

steve_bank said:
The n0rmal function works but it is incomprehensible to me, can you elaborate on how it works or provide a link?

We went through this before also. Did you notice that the code produces (r, theta) on HALF the calls and returns (r * sin(theta)) trivially on the other half of its calls? It is actually producing a variate on a TWO-D gaussian distribution, and generating two ONE-D variates from that. That that works at all -- and is easier than producing a ONE-D variate -- seems very interesting to me, and reminds us of mathematical remarks about the Gaussian.

steve_bank said:
You would have to add more code to shift the mean and standard deviation to make it a usable function.

You had the same objection last time. I added a comment (in RED above) showing how trivial that conversion is.

There's a simple reason why I like my approach, avoiding unnecessary function arguments. Anyone using the function will know how to convert a standard normal variate into one with specified mean and standard deviation. Not only is it unhelpful for the library routine to do this, but it's much easier to memorize the argument order when there are only zero arguments instead of two!

steve_bank · Jul 6, 2024

Don't remember the last time.
The functions I posted for exponential and normal distributions use the quantile functions for the distributions. Derivations are on the net, quantile functions convert a uniform distribution into the probability distribution.

You are using the exponential quartile function. The quantifiable function is 1 - p not p.

Picture a normal distribution with zero mean. On the left are negative numbers, positive on the right. A probability of .2 is on the left skirt of the distribution. 1-.2 = .8 is on the right.

Approximations Of The Inverse Error Function – Mimir Games

www.mimirgames.com

At first, second and even third glance it is very hard to see the importance of the inverse error function. What makes the inverse error function so important is that it allows you to take a perfectly flat random distribution and transform it into a Gaussian distribution.
As seen in Figure 3, we generated a histogram by generating random numbers between [-1,1] using a flat distribution. We then took each of those random numbers and put it through the inverse error function. Generating a histogram we see that the flat distribution transformed into a Gaussian distribution.

Did you understand why -log(p) generates an exponential distribution?

Quantile function - Wikipedia

en.wikipedia.org

Swammerdami · Jul 7, 2024

Hi steve. It appears you have TWO threads going on almost the same topic. This must be convenient! When stymied in one of the threads, you ignore the stymie and post in the second thread.

But, except for two unanswered questions there is nothing you can teach me. And it appears, reciprocally, that you are unable to learn anything from me. Your attempts at "conversation", if that's what these are, are pointless. Perhaps you should solicit queries from your disciples here, and educate THEM.

BUT I do have two questions for you. Here's one I've asked TWICE. Will the third time be a charm?

Swammerdami said:
My question was about your "URAND LG". Perhaps you should show us that code.

Swammerdami said:

steve_bank said:

Execution times C++ Mersenne Twister vs LG.

for 10^10 iterations C++ took 68 seconds, URAND LG took .016 seconds.

Click to expand...

Obviously something went badly wrong in your experiment! I'm sure you'll agree that you didn't do 10^10 iterations of anything in 16 milliseconds!

Click to expand...

No comment about this?

According to your result, your "RAND LG" was more than 4500 times faster than Mersenne Twister. Do you want to recant this peculiar claim? I've already mentioned that Jenkin's Burble PRNG -- optimized for speed -- is only about twice as fast as the Twister.

Is your "URAND LG" proprietary? Patent pending?

By the way, I displayed the EXACT 2147483648-sized set of real numbers produced (with equal probability) by default when a uniform real over (0, 1) is needed.

Swammerdami said:
... when choosing a uniform real variate over (0, 1) one starts with a specified number of bits. By default my library routines use 31 bits. The variate is then chosen to be distributed uniformly over
{1,3,5,7,9,... , 4294967291, 4294967293, 2³²-1 } ÷ 2³²
Note that this chooses a number from (0, 1) -- not [0,1) as many or most implementations do. Note that the mean will be 0.5 exactly. Perfectionistic? Sure, but why not? Steve, modify your code to use, say 8 bits instead of 32 bits and se what mean value you get.

Did you do the same? How did your code fare when you invoked log(uniform()) billions of times?

steve_bank · Jul 7, 2024

Weer you implying -log(p) was your invention? Did you knpw why it worked?

When I was working if I was in a meeting and I presented code or hardware designs I could not explain how it worked I'd get laughed out of the room.

Can you expain how yourn ormal gertor works?

You can look at chapter 3 of Knuth's Semi Numeral Algorithms for a detailed random number statistical analysis. Online PDF.

The generator I posted uses 32 bits, I increased it to 64 to increase the period. _int128 could be used..

As to the CLT I speak from experience with using statistics. It is easy enough to demonstrate with a simulation, which i did way back to convince myself.

Run the frequewncy anakysis code I posted on yiur genertor and post the results.

A sign on someone's office. 'In god we trust, all else bring data'.

Data always trunps debate.

Usng thereored repoted run time the genertor I coded ran faster than the C++ built in. The C++ fgnctions prbaly have sver levels of execution. Note that 10^10 is a lot of iterations.

I posted the code warts and all along with the test code I used, feel free to run your own evaluation.

Swammerdami · Jul 7, 2024

There's only one question to which we ant to hear your answer. When will that come?

Here, I'll make the font a bit bigger.

Swammerdami said:
BUT I do have two questions for you. Here's one I've asked TWICE. Will the third time be a charm?

Swammerdami said:

My question was about your "URAND LG". Perhaps you should show us that code.

Swammerdami said:

steve_bank said:

Execution times C++ Mersenne Twister vs LG.

for 10^10 iterations C++ took 68 seconds, URAND LG took .016 seconds.

Click to expand...

Obviously something went badly wrong in your experiment! I'm sure you'll agree that you didn't do 10^10 iterations of anything in 16 milliseconds!

Click to expand...

No comment about this?

Click to expand...

According to your result, your "RAND LG" was more than 4500 times faster than Mersenne Twister. Do you want to recant this peculiar claim? I've already mentioned that Jenkin's Burble PRNG -- optimized for speed -- is only about twice as fast as the Twister.

Is your "URAND LG" proprietary? Patent pending?

Until you figure out an answer to this, everything else you say is just blather, worthless.

I think You simply made a big typo, or a big "thinko." Why don't you just say so? Or are you a narcissist who doubles down, then triples down on your blunders, hoping they'll be forgotten?

steve_bank · Jul 8, 2024

Demonstrating the Central Limit Theorem

Create a random array of mixed exponential and normal distributions.
Take samples and calculate the means.

There is not enough points to get a good histogram but the cumulative distribution is clearly normal.

Code:

include <random>
#include <bits/stdc++.h>
#include <algorithm>
#include <functional>




int Rand(int n)
{
    return rand() % n ;
}

double sdev(int n,double *y){
    int i;
    double s = 0, avg;
    for(i=0;i<n;i++)s += y[i];
    avg = s/double(n);
    s = 0;
    for(i=0;i<n;i++)s += pow(avg-y[i],2);
    return sqrt(s/double(n));
}//sdev

double mean(int n,double *y){
    double s = 0;
    int i;
    for(i=0;i<n;i++)s += y[i];
    return s/n;
}

void clt(void){
    typedef mt19937 random_engine;
    unsigned seed = chrono::system_clock::now().time_since_epoch().count();
    random_engine g (seed);
    double un = 100,std = 20,ue = 50;      //distributin means amd stndard deviation
    exponential_distribution<> diste(1/ue);
    normal_distribution<> distn(un, std);

    int nsamples =150, navg = 500,m,j,k,i,n = 100000;
    double s[10000],pavg,pstdev,savg[navg],sstdev[navg];  //sampand population means and standard deviation
    double y[n],cd[navg];

    //create randomized array of exponnential and normal distributions
    for(i=0;i<n/2;i++)y[i] = distn(g);
    for(i=n/2;i<n;i++)y[i] = diste(g);
   double *start = &y[0], *end = &y[n-1];
   random_shuffle(start, end, pointer_to_unary_function<int, int>(Rand));
   random_shuffle(start, end, pointer_to_unary_function<int, int>(Rand));
  random_shuffle(start, end, pointer_to_unary_function<int, int>(Rand));
  k = 0;
  for(i=0;i<navg;i++){
        m = 0;
        for(j=k;j<k+nsamples;j++)s[m++] = y[j]; // take samples
        savg[i] = mean(nsamples,s); //sample mean standard deviation
        sstdev[i] = sdev(nsamples,s);
        k += nsamples;
    }//i
    pavg = mean(n,y); //samplen mean standard deviation
    pstdev = sdev(n,y); //population mean standard deviation
    printf("Populatio mean %.5f   std  %.5f\n\n",pavg,pstdev);
    for(i=0;i<navg;i++)cd[i] = 100*double(i)/double(navg-1);
    sort(savg,savg+navg);
   // for(i=0;i<navg;i++)printf("%.5f\t %.5f\n",savg[i],cd[i]);
    FILE *p = fopen("cd.dat","w");
    for(i=0;i<navg;i++)fprintf(p,"%.5f\t %.5f\n",savg[i],cd[i]);
    fclose(p);
}

int main()
{
    clt();
    system(clt.plt"); //Gnuplot
    return 0;
}

clt.plt
set term windows background rgb "white" title "DFT" fontscale 1
clear
reset
set key off
set multiplot layout 2,1 columnsfirst

set border 3
# Add a vertical dotted line at x=0 to show centre (mean) of distribution.
set yzeroaxis
# Each bar is4half the (visual) width of its x-range.
set boxwidth 4 absolute
set style fill solid 1.0 noborder
bin_width = 5;
bin_number(x) = floor(x/bin_width)
rounded(x) = bin_width * ( bin_number(x) + 0.5 )
set xrange[*:*]
plot 'cd.dat' using (rounded($1))

1) smooth frequency with boxes
#

set grid lt 1 lw 1 lc rgb "black" dashtype solid
# signal
set autoscale x
show autoscale
set zeroaxis
set yrange[*:*]
plot 'cd.dat' using 1:2 with lines ls 4 lt -1 lw 3
show grid
unset autoscale y

steve_bank · Jul 8, 2024

The paper URAND A universal Random Number Generator is in the public domain from the 70s, my copy came from a Stanford site.

URAND is an LG generator with specific constants, the wiki page on LG generators has a list of different constants that are used.

URAND and I doubt any LG set of constants are patenable. C++ has an LG genertor.

I picked URAND because the derivcation of the constants is straightforward to follow abd udesnd. The opposite of cryptc and convoluted.

Post your random number code.

steve_bank · Jul 8, 2024

Swammerdami said:
There's only one question to which we ant to hear your answer. When will that come?

Here, I'll make the font a bit bigger.

Swammerdami said:

BUT I do have two questions for you. Here's one I've asked TWICE. Will the third time be a charm?

Swammerdami said:

My question was about your "URAND LG". Perhaps you should show us that code.

Swammerdami said:

steve_bank said:

Execution times C++ Mersenne Twister vs LG.

for 10^10 iterations C++ took 68 seconds, URAND LG took .016 seconds.

Click to expand...

Obviously something went badly wrong in your experiment! I'm sure you'll agree that you didn't do 10^10 iterations of anything in 16 milliseconds!

Click to expand...

No comment about this?

Click to expand...

According to your result, your "RAND LG" was more than 4500 times faster than Mersenne Twister. Do you want to recant this peculiar claim? I've already mentioned that Jenkin's Burble PRNG -- optimized for speed -- is only about twice as fast as the Twister.

Is your "URAND LG" proprietary? Patent pending?

Click to expand...

Until you figure out an answer to this, everything else you say is just blather, worthless.

I think You simply made a big typo, or a big "thinko." Why don't you just say so? Or are you a narcissist who doubles down, then triples down on your blunders, hoping they'll be forgotten?

I make a reasonable effort to be correct on what I post, if I get something wrong who cares. This is an informal forum.

ou talk as if you are in some kind of completion with me and you are not. You are too irrational and too shallow in statistics to have a conversation.

From past threads you lack an ujnderstndin of theory. You posted -log(1) probably from something oiu saw on the net without undemanding where it comes from, and with an difference it came from you.

If you never post any substantive code then you can be critical of others never posting anything that can be wrong or questioned.

If you think you are in a battle with me it is all in your head.

Post your random number generator so I can quantitatively compare it with other generators.

I posted a link to the paper several times and noted authorship.

steve_bank · Jul 8, 2024

Here you go Swami, after a leisurely meandering process this is what I have.

It is from here, if you have any issues you will have to get a psychic to talk to the dead authors.

http://i.stanford.edu/pub/cstr/reports/cs/tr/73/334/CS-TR-73-334.pdf

Knuths text, chaprter 3 random numbers.

Loading…

seriouscomputerist.atariverse.com

From the paper the period is on the order 0f m. Numerical problems prevented getting a full 64 bit long long. m is 52 bits for an approximate period of 2^52.

The output is a 32 bit signed integer.

From the simple frequency test I did it compares to the C++ implementation of Merssene Twister in terms of closeness to a theoretical uniform distribution.

A completer evaluation as outlined in Knuth;s Semi Numerical Algorithms would take some time. There are software tools on the net for testing generators.

For me it was about learning something about random number generation.

Code:

    static unsigned long long rand_num = 1;
    unsigned long long rand_max = 0x7fffffff;
    unsigned long long k1 = 951722585092923;
    unsigned long long k2 = 1768559438007117;
    int sf = 1 + hi - lo;
    static int init = 1;
    if(init){
        rand_num = time(NULL);
        init = 0;
        }
    int xint;
    double xdec;
    rand_num = rand_num*k2 + k1;
    xdec = double(rand_num%(rand_max-1))/double(rand_max);
    rand_num = rand_num*k2 + k1;
    xint = (rand_num%rand_max)%sf + lo;
    return xint + xdec;
}

int unif_int(int lo,int hi){
    static unsigned long long rand_num = 1;
    unsigned long long rand_max = 0x7fffffff;
    unsigned long long k1 = 951722585092923;
    unsigned long long k2 = 1768559438007117;
    int sf = 1 + hi - lo;
    static int init = 1;
    if(init){
        rand_num = time(NULL);
        init = 0;
        }
    rand_num = rand_num*k2 + k1;
    return (rand_num%rand_max)%sf + lo;
}

void comstants(void){
    unsigned long long m =0x000fffffffffffff;
    unsigned long long mh = m/2;
    unsigned long long k1 = 2 * ceil(mh*(.5 - sqrt(3)/6.)) + 1;
    unsigned long long k2 = 8 * ceil(mh*(atan(1.)/8.)) + 5;
    printf("k1   %llu           k2    %llu\n",k1,k2);
    printf("k1  0x%llx    k2    0x%llx\n",k1,k2);
    FILE *p = fopen("k.dat","w");
    fprintf(p,"K1  %lld   K2  %lld\n",k1,k2);
    fclose(p);
}

void test(void){
    int i,lo = -10,hi = 10,xi;
    double xd;
    for(i=0;i<100;i++){
        xi = unif_int(lo,hi);
        xd = unif_float(lo,hi);
        printf("%+10d\t  %+10.5f\n",xi,xd);
        }
}

Python has Numpy\Scipy and C++ has built ins. There are several well known tested libraries for
C/C++.

Swammerdami · Jul 8, 2024

I've had enough of your insults and misinformation; so by default your posts no longer appear for me. There's only one thing I want from you -- an answer to the VERY simple question I've asked 4 times, lately in a very large font.

I want to know if you are capable of the gumption and humility to admit that post was gibberish. It appears that you are incapable. Do you think that nobody can see you if you keep your head buried in the sand?

If you DO deign to answer the question, please ask a friend -- if you have one -- to PM me. I just want to know how many weeks (months? ever?) it takes you to admit your post -- intended as an insult against me -- was some kind of giant brain fart.

steve_bank · Jul 9, 2024

If you obsess about a trivial at I wat I e as a speed difference go fr it.

You are evading my request to post your random number generator for evaluation. You ponly past snippetys.

Insults? Harley. You are making excuses.

Your did not understand -log(1) that you posted.
You can not explain how your normal distribution works.

As requested yet again I posted a link to the paper that has the ordinal FORTRAN code. You probably will not read the paper.

A very basic and important question about your normal generator, how wide is the distribution in terms of standard deviations? Not a trick question. Basic statistics.

The question of course is rhetorical as there will be no answer.

My guess is that in your work you never had to deal with peer reviews and explaining what you did.

Adios amigo. I do not like to put people on ignore but that is the solution.

steve_bank · Jul 10, 2024

Generating large random numbers.

Hours of entertainer for those who look for patterns in numubers.

Code:

import random as rn

def humungous_unif_int(nnums,ndigits):
    f = open(path+"big_rand.txt",'w')
    for i in range(nnums):
        s = ' '
        for j in range(ndigits):
            digit = rn.randint(0,9)
            s += repr(digit)
        print(s)
        s += '\n' 
        f.write(s)
    f.close()
  
void humonous_unif_int(int nnums,int ndigits){
    int k,i,digit;
    srand(time(NULL));
    FILE *p = fopen("big_rand.txt","w");
    for(k=0;k<nnums;k++){
        for(i=0;i<ndigits;i++){
            digit = rand()%10;
            fprintf(p,"%d",digit);
            cout<<digit;
        }
        fprintf(p,"\n");
        cout<<endl;
    }
    fclose (p);
}

93952269091253643003946470064505381847165797936426
42651725712591588053194624950718166088011758835489
02710681592054797500957270124673411601222363593427
78217608916512203550629419799222712557887059481503
78784177576821675256342265734117257900401580642471
16625972006046475007303109084051832400704005764923
45136014863689278440904248739134707630063362927183
34935266049819795825463592860557858599900477350820
70792222464331881530331730644286373062353009349612
95107461533643981079757050289981923404281604644919

Probability Distributions

Diabetic retinopathy and poor eyesight. Typos ...

Squadron Leader

Diabetic retinopathy and poor eyesight. Typos ...

Contributor

Diabetic retinopathy and poor eyesight. Typos ...

Squadron Leader

Diabetic retinopathy and poor eyesight. Typos ...

Diabetic retinopathy and poor eyesight. Typos ...

Squadron Leader

Diabetic retinopathy and poor eyesight. Typos ...

Squadron Leader

Diabetic retinopathy and poor eyesight. Typos ...

Squadron Leader

Diabetic retinopathy and poor eyesight. Typos ...

Diabetic retinopathy and poor eyesight. Typos ...

Diabetic retinopathy and poor eyesight. Typos ...

Diabetic retinopathy and poor eyesight. Typos ...

Squadron Leader

Diabetic retinopathy and poor eyesight. Typos ...

Diabetic retinopathy and poor eyesight. Typos ...