Module stikpetP.distributions.dist_spearman
Expand source code
import pandas as pd
from math import factorial
from math import atanh
from math import exp
from statistics import NormalDist
from scipy.stats import t
from scipy.stats import norm
from ..other.table_cross import tab_cross
def di_scdf(ordField1, ordField2, levels1=None, levels2=None, method="as89", iters=500):
'''
Spearman Rank Cumulative Distribution Function
----------------------------------------------
A function to determine the cdf of the Spearman Rank Distribution. A few different options are available. See the notes for details.
Parameters
----------
ordField1 : pandas series
the ordinal or scale scores of the first variable
ordField2 : pandas series
the ordinal or scale scores of the second variable
levels1 : list or dictionary, optional
the categories to use from ordField1
levels2 : list or dictionary, optional
the categories to use from ordField2
method : {"as89", "t", "exact", "iman-conover", "z-fieller", "z-olds"}, optional
which method to use
Returns
-------
Depending on the method used:
* *cdf*, the cdf value
* *statistic*, the statistic used for the cdf
* *df*, the degrees of freedom
Notes
-----
The "exact" method uses the following four steps.
1. Determine all possible permutations of the scores in the first variable
2. Determine for each the Spearman rho with the second variable
3. Count how often the Spearman rho is above or equal to the original Spearman rho
4. Divide the result by n!.
A t-distribution approximation (method="t") was proposed by Kendall and Stuart (1979, p. 503), while Iman and Conover (1978, p. 271) refer to Pitman (1937):
$$t_{ks} = r_s\\times\\sqrt{\\frac{n-2}{1-r_s^2}}$$
$$df = n - 2$$
$$sig. = 2\\times\\left(1-T\\left(\\left|t_s\\right|, df\\right)\\right)$$
A normal approximation was proposed by Fieller et al. (1957, p. 472), and Choi (1977, p. 646):
$$z_F = \\frac{\\text{atanh}\\left(r_s\\right)}{\\sqrt{\\frac{1.06}{n-3}}}$$
$$sig. = 2\\times\\left(1-\\Phi\\left(\\left|z_F\\right|\\right)\\right)$$
An alternative normal approximation was proposed by Olds (1938, p. 142; 1949, p. 117) and Franklin (1988, p. 56):
$$z_O = \\frac{x}{ASE}$$
$$x = \\frac{S}{2} - \\frac{n^3 - n}{12}$$
$$ASE = \\sqrt{n-1}\\times\\frac{n\\times\\left(n+1\\right)}{12}$$
$$S = \\frac{\\left(n^3-n\\right)\\times\\left(1-r_s\\right)}{6}$$
$$sig. = 2\\times\\left(1-\\Phi\\left(\\left|z_O\\right|\\right)\\right)$$
Iman and Conover (1978, p. 272) use a different approach, using both the Normal and Student t distribution:
$$J = \\frac{r_s}{2}\\times\\left(\\sqrt{n-1}+\\sqrt{\\frac{n-2}{1-r_s^2}}\\right)$$
Reject the null hypothesis at \\(\\alpha\\) level if:
$$J > \\frac{Q\\left(\\Phi\\left(1-\\frac{\\alpha}{2}\\right)\\right) + Q\\left(T\\left(1-\\frac{\\alpha}{2}\\right), df)\\right)}{n-2}$$
The function will use an iterative search for the approximate p-value.
Algorithm AS 89 (Best & Roberts, 1975) is based on Olds. This algorithm is for upper-tail only, so it has been converted to lower tail by using:
$$S^* = \\frac{n^3 - n}{6} - S$$
$$S = \\sum_{i=1}^n d_i^2 = \\sum_{i=1}^n \\left(R_{x,i}-R_{y,i}\\right)^2 = \\frac{\\left(n^3-n\\right)\\times\\left(1-r_s\\right)}{6}$$
*Symbols Used*
* \\(n\\), the sample size
* \\(r_s\\), the Spearman rho
* \\(\\Phi\\left(\\dots\\right)\\), the cumulative density function of the standard normal distribution.
* \\(T\\left(\\dots\\right)\\), the cumulative density function of the Student T distribution.
* \\(Q\\left(\\dots\\right)\\), the inverse cumulative density function of the function between parenthesis.
References
----------
Choi, S. C. (1977). Tests of equality of dependent correlation coefficients. *Biometrika, 64*(3), 645–647. doi:10.1093/biomet/64.3.645
Fieller, E. C., Hartley, H. O., & Pearson, E. S. (1957). Tests for rank correlation coefficients. I. *Biometrika, 44*(3–4), 470–481. doi:10.1093/biomet/44.3-4.470
Franklin, L. A. (1988). A note on approximations and convergence in distribution for spearman’s rank correlation coefficient. *Communications in Statistics - Theory and Methods, 17*(1), 55–59. doi:10.1080/03610928808829609
Iman, R. L., & Conover, W. J. (1978). Approximations of the critical region for spearman’s rho with and without ties present. *Communications in Statistics - Simulation and Computation, 7*(3), 269–282. doi:10.1080/03610917808812076
Kendall, M., & Stuart, A. (1979). *The advanced theory of statistics. Volume 2: Inference and relationship* (4th ed., Vol. 2). Griffin.
Olds, E. G. (1938). Distributions of sums of squares of rank differences for small numbers of individuals. *The Annals of Mathematical Statistics, 9*(2), 133–148. doi:10.1214/aoms/1177732332
Olds, E. G. (1949). The 5% significance levels for sums of squares of rank differences and a correction. *The Annals of Mathematical Statistics, 20*(1), 117–118. doi:10.1214/aoms/1177730099
Pitman, E. J. G. (1937). Significance tests which may be applied to samples from any populations. II. The correlation coefficient test. *Supplement to the Journal of the Royal Statistical Society, 4*(2), 225–232. doi:10.2307/2983647
Author
------
Made by P. Stikker
Companion website: https://PeterStatistics.com
YouTube channel: https://www.youtube.com/stikpet
Donations: https://www.patreon.com/bePatron?u=19398076
'''
from ..correlations.cor_spearman_rho import r_spearman_rho
ct = tab_cross(ordField1, ordField2, order1=levels1, order2=levels2)
n = ct.sum().sum()
rs = r_spearman_rho(ordField1, ordField2, levels1, levels2, test="none")
res = []
if method == "exact":
permu = he_perm(n)
nPerm = factorial(n)
testPerm = [0]*n
for i in range(0, nPerm):
for j in range(0, n):
testPerm[j] = permu.iloc[i, j]
rTest = r_spearman_rho(ordField1, testPerm, levels1, test="none")
if rTest < rs:
nAbove = nAbove + 1
pvalue = nAbove / nPerm
di_scdf = pvalue
elif method == "as89":
S = (n**3 - n) * (1 - rs) / 6
statistic = S
if (S < (n**3 - n) / 6):
S = (n**3 - n) / 3 - S
pvalue = 1 - he_AS89(n, S)
di_scdf = pvalue
elif method == "t":
#(Kendall & Stuart, 1979, p. 503)
ts = rs * ((n - 2) / (1 - rs**2))**0.5
df = n - 2
pvalue = t.cdf(abs(ts), df)
if rs < 0:
pvalue = 1 - pvalue
res = pd.DataFrame([[pvalue, ts, df]])
res.columns = ["cdf", "statistic", "df"]
di_scdf = res
elif method == "z-fieller":
#Fieller et al. 1957, p. 472; Choi, 1977, p. 646)
zs = atanh(rs) / (1.06 / (n - 3))**0.5
pvalue = NormalDist().cdf(zs)
res = pd.DataFrame([[pvalue, zs]])
res.columns = ["cdf", "statistic"]
di_scdf = res
elif method == "z-olds":
#Olds, 1938, p. 142; Olds, 1949, p. 117
S = (n**3 - n) * (1 - rs) / 6
x = S / 2 - (n**3 - n) / 12
ase = (n - 1)**0.5 * (n * (n + 1) / 12)
Z = x / ase
pvalue = 1 - NormalDist().cdf(Z)
res = pd.DataFrame([[pvalue, Z]])
res.columns = ["cdf", "statistic"]
di_scdf = res
elif method == "iman-conover":
#Iman & Conover, 1978, p. 272
j = abs(rs) / 2 * ((n - 1)**0.5 + ((n - 2) / (1 - abs(rs)**2))**0.5)
#zpval = 2 * (1 - NormalDist().cdf(abs(j)))
df = n - 2
#tpval = 2*(1 - t.cdf(abs(j), df))
#pvalue = (zpval + tpval) / 2
pLow = 0
pHigh = 1
pvalue = 0.05
nIter = 1
Repeat = True
while Repeat:
zCrit = norm.ppf(1 - pvalue / 2)
tCrit = t.ppf(1 - pvalue/2, df)
Jcrit = (zCrit + tCrit) / 2
if Jcrit == j or nIter == iters:
Repeat = False
else:
if (Jcrit < j):
pHigh = pvalue
pvalue = (pLow + pvalue) / 2
elif (Jcrit > j):
pLow = pvalue
pvalue = (pHigh + pvalue) / 2
nIter = nIter + 1
if rs > 0:
pvalue = 1 - pvalue / 2
else:
pvalue = pvalue / 2
di_scdf = pvalue
return di_scdf
def he_perm(n):
#Using Heap Algorithm non-recursive
#from https://sedgewick.io/wp-content/uploads/2022/03/2002PermGeneration.pdf
nPerm = factorial(n)
c = [0]*n
res = [[0]*(n)]*(nPerm)
res = pd.DataFrame(res)
a = [0]*n
for i in range(0, n):
a[i] = i + 1
res.iloc[0, i] = i + 1
resRow = 1
i = 1
while i < n:
if c[i] < i:
if i % 2 == 0:
ff = a[0]
a[0] = a[i]
a[i] = ff
else:
ff = a[c[i]]
a[c[i]] = a[i]
a[i] = ff
for ff in range(0, n):
res.iloc[resRow, ff] = a[ff - 1]
resRow = resRow + 1
c[i] = c[i] + 1
i = 1
else:
c[i] = 0
i = i + 1
return res
def he_AS89(n, IST):
L = [0]*7
#Test admissibility of arguments and initialize
PRHO = 1
if (n <= 1 or IST <= 0):
he_AS89 = PRHO
else:
PRHO = 0
if (IST > n * (n * n - 1) / 3):
he_AS89 = PRHO
else:
JS = IST
if (JS != 2 * (JS / 2)):
JS = JS + 1
if (n > 6):
#GOTO 6
#6
b = 1 / n
x = (6 * (JS - 1) * b / (1 / (b * b) - 1) - 1) * (1 / b - 1)**0.5
y = x * x
u = x * b * (0.2274 + b * (0.2531 + 0.1745 * b) + y * (-0.0758 + b * (0.1033 + 0.3932 * b) - y * b * (0.0879 + 0.0151 * b - y * (0.0072 - 0.0831 * b + y * b * (0.0131 - 0.00046 * y)))))
PRHO = u / exp(y / 2) + (1 - NormalDist().cdf(x))
if (PRHO < 0):
PRHO = 0
elif (PRHO > 1):
PRHO = 0
he_AS89 = PRHO
else:
#Exact evaluation of probability
NFAC = 1
for i in range(1, n+1):
NFAC = NFAC * i
L[i] = i
#1 continue
PRHO = 1 / NFAC
if (JS == (n * (n * n - 1) / 3)):
he_AS89 = PRHO
else:
IFR = 0
for m in range(1, NFAC+1):
ISE = 0
for i in range(0, n):
ISE = ISE + (i - L[i])**2
#2 continue
if (JS <= ISE):
IFR = IFR + 1
n1 = n
#3
LOOP3 = True
while (LOOP3):
LOOP3 = False
MT = L[1]
NN = n1 - 1
for i in range(1, NN+1):
L[i] = L[i + 1]
#4 continue
L[n1] = MT
if (L[n1] == n1 and n1 == 2):
n1 = n1 - 1
if (m != NFAC):
LOOP3 = True
PRHO = IFR / NFAC
he_AS89 = PRHO
return he_AS89
Functions
def di_scdf(ordField1, ordField2, levels1=None, levels2=None, method='as89', iters=500)
-
Spearman Rank Cumulative Distribution Function
A function to determine the cdf of the Spearman Rank Distribution. A few different options are available. See the notes for details.
Parameters
ordField1
:pandas series
- the ordinal or scale scores of the first variable
ordField2
:pandas series
- the ordinal or scale scores of the second variable
levels1
:list
ordictionary
, optional- the categories to use from ordField1
levels2
:list
ordictionary
, optional- the categories to use from ordField2
method
:{"as89", "t", "exact", "iman-conover", "z-fieller", "z-olds"}
, optional- which method to use
Returns
Depending on the method used:
- cdf, the cdf value
- statistic, the statistic used for the cdf
- df, the degrees of freedom
Notes
The "exact" method uses the following four steps.
- Determine all possible permutations of the scores in the first variable
- Determine for each the Spearman rho with the second variable
- Count how often the Spearman rho is above or equal to the original Spearman rho
- Divide the result by n!.
A t-distribution approximation (method="t") was proposed by Kendall and Stuart (1979, p. 503), while Iman and Conover (1978, p. 271) refer to Pitman (1937): t_{ks} = r_s\times\sqrt{\frac{n-2}{1-r_s^2}} df = n - 2 sig. = 2\times\left(1-T\left(\left|t_s\right|, df\right)\right)
A normal approximation was proposed by Fieller et al. (1957, p. 472), and Choi (1977, p. 646): z_F = \frac{\text{atanh}\left(r_s\right)}{\sqrt{\frac{1.06}{n-3}}} sig. = 2\times\left(1-\Phi\left(\left|z_F\right|\right)\right)
An alternative normal approximation was proposed by Olds (1938, p. 142; 1949, p. 117) and Franklin (1988, p. 56): z_O = \frac{x}{ASE} x = \frac{S}{2} - \frac{n^3 - n}{12} ASE = \sqrt{n-1}\times\frac{n\times\left(n+1\right)}{12} S = \frac{\left(n^3-n\right)\times\left(1-r_s\right)}{6} sig. = 2\times\left(1-\Phi\left(\left|z_O\right|\right)\right)
Iman and Conover (1978, p. 272) use a different approach, using both the Normal and Student t distribution: J = \frac{r_s}{2}\times\left(\sqrt{n-1}+\sqrt{\frac{n-2}{1-r_s^2}}\right) Reject the null hypothesis at \alpha level if: J > \frac{Q\left(\Phi\left(1-\frac{\alpha}{2}\right)\right) + Q\left(T\left(1-\frac{\alpha}{2}\right), df)\right)}{n-2}
The function will use an iterative search for the approximate p-value.
Algorithm AS 89 (Best & Roberts, 1975) is based on Olds. This algorithm is for upper-tail only, so it has been converted to lower tail by using: S^* = \frac{n^3 - n}{6} - S S = \sum_{i=1}^n d_i^2 = \sum_{i=1}^n \left(R_{x,i}-R_{y,i}\right)^2 = \frac{\left(n^3-n\right)\times\left(1-r_s\right)}{6}
Symbols Used
- n, the sample size
- r_s, the Spearman rho
- \Phi\left(\dots\right), the cumulative density function of the standard normal distribution.
- T\left(\dots\right), the cumulative density function of the Student T distribution.
- Q\left(\dots\right), the inverse cumulative density function of the function between parenthesis.
References
Choi, S. C. (1977). Tests of equality of dependent correlation coefficients. Biometrika, 64(3), 645–647. doi:10.1093/biomet/64.3.645
Fieller, E. C., Hartley, H. O., & Pearson, E. S. (1957). Tests for rank correlation coefficients. I. Biometrika, 44(3–4), 470–481. doi:10.1093/biomet/44.3-4.470
Franklin, L. A. (1988). A note on approximations and convergence in distribution for spearman’s rank correlation coefficient. Communications in Statistics - Theory and Methods, 17(1), 55–59. doi:10.1080/03610928808829609
Iman, R. L., & Conover, W. J. (1978). Approximations of the critical region for spearman’s rho with and without ties present. Communications in Statistics - Simulation and Computation, 7(3), 269–282. doi:10.1080/03610917808812076
Kendall, M., & Stuart, A. (1979). The advanced theory of statistics. Volume 2: Inference and relationship (4th ed., Vol. 2). Griffin.
Olds, E. G. (1938). Distributions of sums of squares of rank differences for small numbers of individuals. The Annals of Mathematical Statistics, 9(2), 133–148. doi:10.1214/aoms/1177732332
Olds, E. G. (1949). The 5% significance levels for sums of squares of rank differences and a correction. The Annals of Mathematical Statistics, 20(1), 117–118. doi:10.1214/aoms/1177730099
Pitman, E. J. G. (1937). Significance tests which may be applied to samples from any populations. II. The correlation coefficient test. Supplement to the Journal of the Royal Statistical Society, 4(2), 225–232. doi:10.2307/2983647
Author
Made by P. Stikker
Companion website: https://PeterStatistics.com
YouTube channel: https://www.youtube.com/stikpet
Donations: https://www.patreon.com/bePatron?u=19398076Expand source code
def di_scdf(ordField1, ordField2, levels1=None, levels2=None, method="as89", iters=500): ''' Spearman Rank Cumulative Distribution Function ---------------------------------------------- A function to determine the cdf of the Spearman Rank Distribution. A few different options are available. See the notes for details. Parameters ---------- ordField1 : pandas series the ordinal or scale scores of the first variable ordField2 : pandas series the ordinal or scale scores of the second variable levels1 : list or dictionary, optional the categories to use from ordField1 levels2 : list or dictionary, optional the categories to use from ordField2 method : {"as89", "t", "exact", "iman-conover", "z-fieller", "z-olds"}, optional which method to use Returns ------- Depending on the method used: * *cdf*, the cdf value * *statistic*, the statistic used for the cdf * *df*, the degrees of freedom Notes ----- The "exact" method uses the following four steps. 1. Determine all possible permutations of the scores in the first variable 2. Determine for each the Spearman rho with the second variable 3. Count how often the Spearman rho is above or equal to the original Spearman rho 4. Divide the result by n!. A t-distribution approximation (method="t") was proposed by Kendall and Stuart (1979, p. 503), while Iman and Conover (1978, p. 271) refer to Pitman (1937): $$t_{ks} = r_s\\times\\sqrt{\\frac{n-2}{1-r_s^2}}$$ $$df = n - 2$$ $$sig. = 2\\times\\left(1-T\\left(\\left|t_s\\right|, df\\right)\\right)$$ A normal approximation was proposed by Fieller et al. (1957, p. 472), and Choi (1977, p. 646): $$z_F = \\frac{\\text{atanh}\\left(r_s\\right)}{\\sqrt{\\frac{1.06}{n-3}}}$$ $$sig. = 2\\times\\left(1-\\Phi\\left(\\left|z_F\\right|\\right)\\right)$$ An alternative normal approximation was proposed by Olds (1938, p. 142; 1949, p. 117) and Franklin (1988, p. 56): $$z_O = \\frac{x}{ASE}$$ $$x = \\frac{S}{2} - \\frac{n^3 - n}{12}$$ $$ASE = \\sqrt{n-1}\\times\\frac{n\\times\\left(n+1\\right)}{12}$$ $$S = \\frac{\\left(n^3-n\\right)\\times\\left(1-r_s\\right)}{6}$$ $$sig. = 2\\times\\left(1-\\Phi\\left(\\left|z_O\\right|\\right)\\right)$$ Iman and Conover (1978, p. 272) use a different approach, using both the Normal and Student t distribution: $$J = \\frac{r_s}{2}\\times\\left(\\sqrt{n-1}+\\sqrt{\\frac{n-2}{1-r_s^2}}\\right)$$ Reject the null hypothesis at \\(\\alpha\\) level if: $$J > \\frac{Q\\left(\\Phi\\left(1-\\frac{\\alpha}{2}\\right)\\right) + Q\\left(T\\left(1-\\frac{\\alpha}{2}\\right), df)\\right)}{n-2}$$ The function will use an iterative search for the approximate p-value. Algorithm AS 89 (Best & Roberts, 1975) is based on Olds. This algorithm is for upper-tail only, so it has been converted to lower tail by using: $$S^* = \\frac{n^3 - n}{6} - S$$ $$S = \\sum_{i=1}^n d_i^2 = \\sum_{i=1}^n \\left(R_{x,i}-R_{y,i}\\right)^2 = \\frac{\\left(n^3-n\\right)\\times\\left(1-r_s\\right)}{6}$$ *Symbols Used* * \\(n\\), the sample size * \\(r_s\\), the Spearman rho * \\(\\Phi\\left(\\dots\\right)\\), the cumulative density function of the standard normal distribution. * \\(T\\left(\\dots\\right)\\), the cumulative density function of the Student T distribution. * \\(Q\\left(\\dots\\right)\\), the inverse cumulative density function of the function between parenthesis. References ---------- Choi, S. C. (1977). Tests of equality of dependent correlation coefficients. *Biometrika, 64*(3), 645–647. doi:10.1093/biomet/64.3.645 Fieller, E. C., Hartley, H. O., & Pearson, E. S. (1957). Tests for rank correlation coefficients. I. *Biometrika, 44*(3–4), 470–481. doi:10.1093/biomet/44.3-4.470 Franklin, L. A. (1988). A note on approximations and convergence in distribution for spearman’s rank correlation coefficient. *Communications in Statistics - Theory and Methods, 17*(1), 55–59. doi:10.1080/03610928808829609 Iman, R. L., & Conover, W. J. (1978). Approximations of the critical region for spearman’s rho with and without ties present. *Communications in Statistics - Simulation and Computation, 7*(3), 269–282. doi:10.1080/03610917808812076 Kendall, M., & Stuart, A. (1979). *The advanced theory of statistics. Volume 2: Inference and relationship* (4th ed., Vol. 2). Griffin. Olds, E. G. (1938). Distributions of sums of squares of rank differences for small numbers of individuals. *The Annals of Mathematical Statistics, 9*(2), 133–148. doi:10.1214/aoms/1177732332 Olds, E. G. (1949). The 5% significance levels for sums of squares of rank differences and a correction. *The Annals of Mathematical Statistics, 20*(1), 117–118. doi:10.1214/aoms/1177730099 Pitman, E. J. G. (1937). Significance tests which may be applied to samples from any populations. II. The correlation coefficient test. *Supplement to the Journal of the Royal Statistical Society, 4*(2), 225–232. doi:10.2307/2983647 Author ------ Made by P. Stikker Companion website: https://PeterStatistics.com YouTube channel: https://www.youtube.com/stikpet Donations: https://www.patreon.com/bePatron?u=19398076 ''' from ..correlations.cor_spearman_rho import r_spearman_rho ct = tab_cross(ordField1, ordField2, order1=levels1, order2=levels2) n = ct.sum().sum() rs = r_spearman_rho(ordField1, ordField2, levels1, levels2, test="none") res = [] if method == "exact": permu = he_perm(n) nPerm = factorial(n) testPerm = [0]*n for i in range(0, nPerm): for j in range(0, n): testPerm[j] = permu.iloc[i, j] rTest = r_spearman_rho(ordField1, testPerm, levels1, test="none") if rTest < rs: nAbove = nAbove + 1 pvalue = nAbove / nPerm di_scdf = pvalue elif method == "as89": S = (n**3 - n) * (1 - rs) / 6 statistic = S if (S < (n**3 - n) / 6): S = (n**3 - n) / 3 - S pvalue = 1 - he_AS89(n, S) di_scdf = pvalue elif method == "t": #(Kendall & Stuart, 1979, p. 503) ts = rs * ((n - 2) / (1 - rs**2))**0.5 df = n - 2 pvalue = t.cdf(abs(ts), df) if rs < 0: pvalue = 1 - pvalue res = pd.DataFrame([[pvalue, ts, df]]) res.columns = ["cdf", "statistic", "df"] di_scdf = res elif method == "z-fieller": #Fieller et al. 1957, p. 472; Choi, 1977, p. 646) zs = atanh(rs) / (1.06 / (n - 3))**0.5 pvalue = NormalDist().cdf(zs) res = pd.DataFrame([[pvalue, zs]]) res.columns = ["cdf", "statistic"] di_scdf = res elif method == "z-olds": #Olds, 1938, p. 142; Olds, 1949, p. 117 S = (n**3 - n) * (1 - rs) / 6 x = S / 2 - (n**3 - n) / 12 ase = (n - 1)**0.5 * (n * (n + 1) / 12) Z = x / ase pvalue = 1 - NormalDist().cdf(Z) res = pd.DataFrame([[pvalue, Z]]) res.columns = ["cdf", "statistic"] di_scdf = res elif method == "iman-conover": #Iman & Conover, 1978, p. 272 j = abs(rs) / 2 * ((n - 1)**0.5 + ((n - 2) / (1 - abs(rs)**2))**0.5) #zpval = 2 * (1 - NormalDist().cdf(abs(j))) df = n - 2 #tpval = 2*(1 - t.cdf(abs(j), df)) #pvalue = (zpval + tpval) / 2 pLow = 0 pHigh = 1 pvalue = 0.05 nIter = 1 Repeat = True while Repeat: zCrit = norm.ppf(1 - pvalue / 2) tCrit = t.ppf(1 - pvalue/2, df) Jcrit = (zCrit + tCrit) / 2 if Jcrit == j or nIter == iters: Repeat = False else: if (Jcrit < j): pHigh = pvalue pvalue = (pLow + pvalue) / 2 elif (Jcrit > j): pLow = pvalue pvalue = (pHigh + pvalue) / 2 nIter = nIter + 1 if rs > 0: pvalue = 1 - pvalue / 2 else: pvalue = pvalue / 2 di_scdf = pvalue return di_scdf
def he_AS89(n, IST)
-
Expand source code
def he_AS89(n, IST): L = [0]*7 #Test admissibility of arguments and initialize PRHO = 1 if (n <= 1 or IST <= 0): he_AS89 = PRHO else: PRHO = 0 if (IST > n * (n * n - 1) / 3): he_AS89 = PRHO else: JS = IST if (JS != 2 * (JS / 2)): JS = JS + 1 if (n > 6): #GOTO 6 #6 b = 1 / n x = (6 * (JS - 1) * b / (1 / (b * b) - 1) - 1) * (1 / b - 1)**0.5 y = x * x u = x * b * (0.2274 + b * (0.2531 + 0.1745 * b) + y * (-0.0758 + b * (0.1033 + 0.3932 * b) - y * b * (0.0879 + 0.0151 * b - y * (0.0072 - 0.0831 * b + y * b * (0.0131 - 0.00046 * y))))) PRHO = u / exp(y / 2) + (1 - NormalDist().cdf(x)) if (PRHO < 0): PRHO = 0 elif (PRHO > 1): PRHO = 0 he_AS89 = PRHO else: #Exact evaluation of probability NFAC = 1 for i in range(1, n+1): NFAC = NFAC * i L[i] = i #1 continue PRHO = 1 / NFAC if (JS == (n * (n * n - 1) / 3)): he_AS89 = PRHO else: IFR = 0 for m in range(1, NFAC+1): ISE = 0 for i in range(0, n): ISE = ISE + (i - L[i])**2 #2 continue if (JS <= ISE): IFR = IFR + 1 n1 = n #3 LOOP3 = True while (LOOP3): LOOP3 = False MT = L[1] NN = n1 - 1 for i in range(1, NN+1): L[i] = L[i + 1] #4 continue L[n1] = MT if (L[n1] == n1 and n1 == 2): n1 = n1 - 1 if (m != NFAC): LOOP3 = True PRHO = IFR / NFAC he_AS89 = PRHO return he_AS89
def he_perm(n)
-
Expand source code
def he_perm(n): #Using Heap Algorithm non-recursive #from https://sedgewick.io/wp-content/uploads/2022/03/2002PermGeneration.pdf nPerm = factorial(n) c = [0]*n res = [[0]*(n)]*(nPerm) res = pd.DataFrame(res) a = [0]*n for i in range(0, n): a[i] = i + 1 res.iloc[0, i] = i + 1 resRow = 1 i = 1 while i < n: if c[i] < i: if i % 2 == 0: ff = a[0] a[0] = a[i] a[i] = ff else: ff = a[c[i]] a[c[i]] = a[i] a[i] = ff for ff in range(0, n): res.iloc[resRow, ff] = a[ff - 1] resRow = resRow + 1 c[i] = c[i] + 1 i = 1 else: c[i] = 0 i = i + 1 return res