#! /usr/bin/perl -w # # Amit Singh, iitD # Generate data conforming to the Pareto Distribution # $Id: 30/Nov/1997/21:17:00 $ # # 1. The Pareto Distribution # # Pareto/Power-Law/Double-Exponential Distribution # # P[X <= x] = 1 - (alpha / x) ** beta # alpha: location parameter # beta: shape parameter # alpha, beta >= 0 # x >= alpha # # pdf: f(x) = beta * alpha ^ beta * x ^ (-beta - 1) # # Note that Pareto is heavy-tailed # We have P[X >= x] ~ c * x^(-beta) as x -> Infinity, beta >= 0 # # Integration of the pdf between limits 'u' and 'l' gives # # beta # ---------- * alpha^(beta) * [ (1 / l^(beta)) - (1 / u^(beta)) ] # (1 + beta) # # # 2. The Integral Transform # # Theorem: Let X be a random variable with pdf f and cdf F. [Assume that # f(x) = 0, for x not belonging to (a, b)]. Let Y be the random variable # defined by Y = F(X). Then Y is uniformly distributed over [0, 1]. Y is # called the Integral Transform of X. # # We can use the above result in order to generate a random sample from # a random variable with specified distribution (in our case, the Pareto # distribution). # # Let X be a random variable with cdf F from which a sample is required. # Let y1 be a value (between 0 and 1) obtained from a table of random # numbers (we shall use the 'rand' function). Since Y = F(X) is uniformly # distributed over [0, 1], we may consider y1 as an observation from that # random variable. Solving the equation y1 = F(x1) for x1, we obtain a # value from a random variable whose cdf is F. # # 3. The Sample # # Now, cdf_Pareto = 1 - (alpha / x)^(beta) # Thus, the final expression which shall give us the required sample is: # # alpha # x(i) = ------------------- # (1 - y(i))^(1/beta) # # Hardcoded values of the parameters my $alpha = 32; my $beta = 0.5; my $betainv = 1 / ($beta); # The ugliest part - highly _KERNEL_ dependent, uh. # We'd need the second 'word' of the first 'line' of this file :-( # my $entropysrc = "/proc/interrupts"; # The 'command' to get the appropriate number is equally kludgy # my $extract_entropy = "head -1 $entropysrc"; ($#ARGV eq -1) && die "usage: $0 "; my ($i, $x, $s) = (0, 0, 0); for ($i=0; $i<$ARGV[0]; $i++) { $x = `$extract_entropy`; @x = split(' ', $x); srand($x[1]); $x = rand 1; $s = int($alpha / ((1 - $x) ** $betainv)); print "$s\n"; }