Module `stikpetP.distributions.dist_spearman`

Expand source code

import pandas as pd
from math import factorial
from math import atanh
from math import exp
from statistics import NormalDist
from scipy.stats import t
from scipy.stats import norm
from ..other.table_cross import tab_cross

def di_scdf(ordField1, ordField2, levels1=None, levels2=None, method="as89", iters=500):
    '''
    Spearman Rank Cumulative Distribution Function
    ----------------------------------------------
    A function to determine the cdf of the Spearman Rank Distribution. A few different options are available. See the notes for details.
    
    Parameters
    ----------
    ordField1 : pandas series
        the ordinal or scale scores of the first variable
    ordField2 : pandas series
        the ordinal or scale scores of the second variable
    levels1 : list or dictionary, optional
        the categories to use from ordField1
    levels2 : list or dictionary, optional
        the categories to use from ordField2
    method : {"as89", "t", "exact", "iman-conover", "z-fieller", "z-olds"}, optional
        which method to use
    
    Returns
    -------
    Depending on the method used:
    
    * *cdf*, the cdf value
    * *statistic*, the statistic used for the cdf
    * *df*, the degrees of freedom
    
    
    Notes
    -----
    The "exact" method uses the following four steps.
    
    1. Determine all possible permutations of the scores in the first variable
    2. Determine for each the Spearman rho with the second variable
    3. Count how often the Spearman rho is above or equal to the original Spearman rho
    4. Divide the result by n!.
    
    A t-distribution approximation (method="t") was proposed by Kendall and Stuart (1979, p. 503), while Iman and Conover (1978, p. 271) refer to Pitman (1937):
    $$t_{ks} = r_s\\times\\sqrt{\\frac{n-2}{1-r_s^2}}$$
    $$df = n - 2$$
    $$sig. = 2\\times\\left(1-T\\left(\\left|t_s\\right|, df\\right)\\right)$$
    
    A normal approximation was proposed by Fieller et al. (1957, p. 472), and Choi (1977, p. 646):
    $$z_F = \\frac{\\text{atanh}\\left(r_s\\right)}{\\sqrt{\\frac{1.06}{n-3}}}$$
    $$sig. = 2\\times\\left(1-\\Phi\\left(\\left|z_F\\right|\\right)\\right)$$
    
    An alternative normal approximation was proposed by Olds (1938, p. 142; 1949, p. 117) and Franklin (1988, p. 56):
    $$z_O = \\frac{x}{ASE}$$
    $$x = \\frac{S}{2} - \\frac{n^3 - n}{12}$$
    $$ASE = \\sqrt{n-1}\\times\\frac{n\\times\\left(n+1\\right)}{12}$$
    $$S = \\frac{\\left(n^3-n\\right)\\times\\left(1-r_s\\right)}{6}$$
    $$sig. = 2\\times\\left(1-\\Phi\\left(\\left|z_O\\right|\\right)\\right)$$
    
    Iman and Conover (1978, p. 272) use a different approach, using both the Normal and Student t distribution:
    $$J = \\frac{r_s}{2}\\times\\left(\\sqrt{n-1}+\\sqrt{\\frac{n-2}{1-r_s^2}}\\right)$$
    Reject the null hypothesis at \\(\\alpha\\) level if:
    $$J > \\frac{Q\\left(\\Phi\\left(1-\\frac{\\alpha}{2}\\right)\\right) + Q\\left(T\\left(1-\\frac{\\alpha}{2}\\right), df)\\right)}{n-2}$$
    
    The function will use an iterative search for the approximate p-value.
    
    Algorithm AS 89 (Best & Roberts, 1975) is based on Olds. This algorithm is for upper-tail only, so it has been converted to lower tail by using:
    $$S^* = \\frac{n^3 - n}{6} - S$$
    $$S = \\sum_{i=1}^n d_i^2 = \\sum_{i=1}^n \\left(R_{x,i}-R_{y,i}\\right)^2 = \\frac{\\left(n^3-n\\right)\\times\\left(1-r_s\\right)}{6}$$
    
    *Symbols Used*
    
    * \\(n\\), the sample size
    * \\(r_s\\), the Spearman rho
    * \\(\\Phi\\left(\\dots\\right)\\), the cumulative density function of the standard normal distribution.
    * \\(T\\left(\\dots\\right)\\), the cumulative density function of the Student T distribution.
    * \\(Q\\left(\\dots\\right)\\), the inverse cumulative density function of the function between parenthesis.
    
    References
    ----------
    Choi, S. C. (1977). Tests of equality of dependent correlation coefficients. *Biometrika, 64*(3), 645–647. doi:10.1093/biomet/64.3.645
    
    Fieller, E. C., Hartley, H. O., & Pearson, E. S. (1957). Tests for rank correlation coefficients. I. *Biometrika, 44*(3–4), 470–481. doi:10.1093/biomet/44.3-4.470
    
    Franklin, L. A. (1988). A note on approximations and convergence in distribution for spearman’s rank correlation coefficient. *Communications in Statistics - Theory and Methods, 17*(1), 55–59. doi:10.1080/03610928808829609
    
    Iman, R. L., & Conover, W. J. (1978). Approximations of the critical region for spearman’s rho with and without ties present. *Communications in Statistics - Simulation and Computation, 7*(3), 269–282. doi:10.1080/03610917808812076
    
    Kendall, M., & Stuart, A. (1979). *The advanced theory of statistics. Volume 2: Inference and relationship* (4th ed., Vol. 2). Griffin.
    
    Olds, E. G. (1938). Distributions of sums of squares of rank differences for small numbers of individuals. *The Annals of Mathematical Statistics, 9*(2), 133–148. doi:10.1214/aoms/1177732332
    
    Olds, E. G. (1949). The 5% significance levels for sums of squares of rank differences and a correction. *The Annals of Mathematical Statistics, 20*(1), 117–118. doi:10.1214/aoms/1177730099
    
    Pitman, E. J. G. (1937). Significance tests which may be applied to samples from any populations. II. The correlation coefficient test. *Supplement to the Journal of the Royal Statistical Society, 4*(2), 225–232. doi:10.2307/2983647
    
    Author
    ------
    Made by P. Stikker
    
    Companion website: https://PeterStatistics.com  
    YouTube channel: https://www.youtube.com/stikpet  
    Donations: https://www.patreon.com/bePatron?u=19398076   
    
    '''
    from ..correlations.cor_spearman_rho import r_spearman_rho
    
    ct = tab_cross(ordField1, ordField2, order1=levels1, order2=levels2)    
    n = ct.sum().sum()
    
    rs = r_spearman_rho(ordField1, ordField2, levels1, levels2, test="none")    
    res = []
    
    if method == "exact":
        permu = he_perm(n)
        nPerm = factorial(n)
            
        testPerm = [0]*n
        for i in range(0, nPerm):
            for j in range(0, n):
                testPerm[j] = permu.iloc[i, j]
            
            rTest = r_spearman_rho(ordField1, testPerm, levels1, test="none")
            
            if rTest < rs:
                nAbove = nAbove + 1
        
        pvalue = nAbove / nPerm
        
        di_scdf = pvalue
        
    elif method == "as89":
        S = (n**3 - n) * (1 - rs) / 6
        statistic = S
        if (S < (n**3 - n) / 6):
            S = (n**3 - n) / 3 - S
        
        pvalue = 1 - he_AS89(n, S)
        
        di_scdf = pvalue
    
    elif method == "t":
        #(Kendall & Stuart, 1979, p. 503)
        ts = rs * ((n - 2) / (1 - rs**2))**0.5
        df = n - 2
        pvalue = t.cdf(abs(ts), df)
        if rs < 0:
            pvalue = 1 - pvalue
        
        res = pd.DataFrame([[pvalue, ts, df]])
        res.columns = ["cdf", "statistic", "df"]
        
        di_scdf = res
    
    elif method == "z-fieller":
        #Fieller et al. 1957, p. 472; Choi, 1977, p. 646)
        zs = atanh(rs) / (1.06 / (n - 3))**0.5
        pvalue = NormalDist().cdf(zs)
        
        res = pd.DataFrame([[pvalue, zs]])
        res.columns = ["cdf", "statistic"]
        
        di_scdf = res
    
    elif method == "z-olds":
        #Olds, 1938, p. 142; Olds, 1949, p. 117
        S = (n**3 - n) * (1 - rs) / 6
        x = S / 2 - (n**3 - n) / 12
        ase = (n - 1)**0.5 * (n * (n + 1) / 12)
        Z = x / ase
        pvalue = 1 - NormalDist().cdf(Z)
        
        res = pd.DataFrame([[pvalue, Z]])
        res.columns = ["cdf", "statistic"]
        
        di_scdf = res
    
    elif method == "iman-conover":
        #Iman & Conover, 1978, p. 272
        j = abs(rs) / 2 * ((n - 1)**0.5 + ((n - 2) / (1 - abs(rs)**2))**0.5)
        #zpval = 2 * (1 - NormalDist().cdf(abs(j)))
        
        df = n - 2
        #tpval = 2*(1 - t.cdf(abs(j), df))
        #pvalue = (zpval + tpval) / 2
    
        pLow = 0
        pHigh = 1
        pvalue = 0.05
        nIter = 1
        
        Repeat = True
        while Repeat:
        
            zCrit = norm.ppf(1 - pvalue / 2)
            tCrit = t.ppf(1 - pvalue/2, df)
            Jcrit = (zCrit + tCrit) / 2
            if Jcrit == j or nIter == iters:
                Repeat = False
            else:
                if (Jcrit < j):
                    pHigh = pvalue
                    pvalue = (pLow + pvalue) / 2
                
                elif (Jcrit > j):
                    pLow = pvalue
                    pvalue = (pHigh + pvalue) / 2
                
                nIter = nIter + 1
            
        if rs > 0:
            pvalue = 1 - pvalue / 2
        else:
            pvalue = pvalue / 2
        
        di_scdf = pvalue
        
    return di_scdf


def he_perm(n):
    #Using Heap Algorithm non-recursive
    #from https://sedgewick.io/wp-content/uploads/2022/03/2002PermGeneration.pdf
    
    nPerm = factorial(n)
    
    c = [0]*n
    res = [[0]*(n)]*(nPerm)
    res = pd.DataFrame(res)
    
    a = [0]*n
    for i in range(0, n):
        a[i] = i + 1
        res.iloc[0, i] = i + 1
    resRow = 1
    
    i = 1
    while i < n:
        if c[i] < i:
            if i % 2 == 0:
                ff = a[0]
                a[0] = a[i]
                a[i] = ff
            else:
                ff = a[c[i]]
                a[c[i]] = a[i]
                a[i] = ff
            for ff in range(0, n):
                res.iloc[resRow, ff] = a[ff - 1]

            resRow = resRow + 1
            
            c[i] = c[i] + 1
            i = 1
        else:
            c[i] = 0
            i = i + 1
    return res

def he_AS89(n, IST):
    L = [0]*7
    
    #Test admissibility of arguments and initialize
    PRHO = 1
    if (n <= 1 or IST <= 0):
        he_AS89 = PRHO
    else:
        PRHO = 0
        if (IST > n * (n * n - 1) / 3):
            he_AS89 = PRHO
        else:
            JS = IST
            if (JS != 2 * (JS / 2)):
                JS = JS + 1
            if (n > 6):
                #GOTO 6
                #6
                b = 1 / n
                x = (6 * (JS - 1) * b / (1 / (b * b) - 1) - 1) * (1 / b - 1)**0.5
                y = x * x
                u = x * b * (0.2274 + b * (0.2531 + 0.1745 * b) + y * (-0.0758 + b * (0.1033 + 0.3932 * b) - y * b * (0.0879 + 0.0151 * b - y * (0.0072 - 0.0831 * b + y * b * (0.0131 - 0.00046 * y)))))
                
                PRHO = u / exp(y / 2) + (1 - NormalDist().cdf(x))
                if (PRHO < 0):
                    PRHO = 0
                elif (PRHO > 1):
                    PRHO = 0
                he_AS89 = PRHO
            
            else:
                #Exact evaluation of probability                
                NFAC = 1                
                for i in range(1, n+1):
                    NFAC = NFAC * i
                    L[i] = i

                #1 continue                
                PRHO = 1 / NFAC
                
                if (JS == (n * (n * n - 1) / 3)):
                    he_AS89 = PRHO
                else:
                    IFR = 0
                    
                    for m in range(1, NFAC+1):
                        ISE = 0
                        for i in range(0, n):
                            ISE = ISE + (i - L[i])**2
                        
                        #2 continue
                        if (JS <= ISE):
                            IFR = IFR + 1
                        
                        n1 = n
                        
                        #3
                        LOOP3 = True
                        while (LOOP3):
                            LOOP3 = False
                            MT = L[1]
                            NN = n1 - 1
                            for i in range(1, NN+1):
                                L[i] = L[i + 1]
                            
                            #4 continue
                            
                            L[n1] = MT
                            if (L[n1] == n1 and n1 == 2):
                                n1 = n1 - 1
                              
                                if (m != NFAC):
                                    LOOP3 = True
                                  
                    PRHO = IFR / NFAC
                    he_AS89 = PRHO
    
    return he_AS89

Functions

def di_scdf(ordField1, ordField2, levels1=None, levels2=None, method='as89', iters=500)

Spearman Rank Cumulative Distribution Function

A function to determine the cdf of the Spearman Rank Distribution. A few different options are available. See the notes for details.

Parameters

ordField1 : pandas series: the ordinal or scale scores of the first variable
ordField2 : pandas series: the ordinal or scale scores of the second variable
levels1 : list or dictionary, optional: the categories to use from ordField1
levels2 : list or dictionary, optional: the categories to use from ordField2
method : {"as89", "t", "exact", "iman-conover", "z-fieller", "z-olds"}, optional: which method to use

Returns

Depending on the method used:

cdf, the cdf value
statistic, the statistic used for the cdf
df, the degrees of freedom

Notes

The "exact" method uses the following four steps.

Determine all possible permutations of the scores in the first variable
Determine for each the Spearman rho with the second variable
Count how often the Spearman rho is above or equal to the original Spearman rho
Divide the result by n!.

A t-distribution approximation (method="t") was proposed by Kendall and Stuart (1979, p. 503), while Iman and Conover (1978, p. 271) refer to Pitman (1937): $t_{ks} = r_s\times\sqrt{\frac{n-2}{1-r_s^2}}$ $df = n - 2$ $sig. = 2\times\left(1-T\left(\left|t_s\right|, df\right)\right)$

A normal approximation was proposed by Fieller et al. (1957, p. 472), and Choi (1977, p. 646): $z_F = \frac{\text{atanh}\left(r_s\right)}{\sqrt{\frac{1.06}{n-3}}}$ $sig. = 2\times\left(1-\Phi\left(\left|z_F\right|\right)\right)$

An alternative normal approximation was proposed by Olds (1938, p. 142; 1949, p. 117) and Franklin (1988, p. 56): $z_O = \frac{x}{ASE}$ $x = \frac{S}{2} - \frac{n^3 - n}{12}$ $ASE = \sqrt{n-1}\times\frac{n\times\left(n+1\right)}{12}$ $S = \frac{\left(n^3-n\right)\times\left(1-r_s\right)}{6}$ $sig. = 2\times\left(1-\Phi\left(\left|z_O\right|\right)\right)$

Iman and Conover (1978, p. 272) use a different approach, using both the Normal and Student t distribution: $J = \frac{r_s}{2}\times\left(\sqrt{n-1}+\sqrt{\frac{n-2}{1-r_s^2}}\right)$ Reject the null hypothesis at $\alpha$ level if: $J > \frac{Q\left(\Phi\left(1-\frac{\alpha}{2}\right)\right) + Q\left(T\left(1-\frac{\alpha}{2}\right), df)\right)}{n-2}$

The function will use an iterative search for the approximate p-value.

Algorithm AS 89 (Best & Roberts, 1975) is based on Olds. This algorithm is for upper-tail only, so it has been converted to lower tail by using: $S^* = \frac{n^3 - n}{6} - S$ $S = \sum_{i=1}^n d_i^2 = \sum_{i=1}^n \left(R_{x,i}-R_{y,i}\right)^2 = \frac{\left(n^3-n\right)\times\left(1-r_s\right)}{6}$

Symbols Used

$n$ , the sample size
$r_s$ , the Spearman rho
$\Phi\left(\dots\right)$ , the cumulative density function of the standard normal distribution.
$T\left(\dots\right)$ , the cumulative density function of the Student T distribution.
$Q\left(\dots\right)$ , the inverse cumulative density function of the function between parenthesis.

References

Choi, S. C. (1977). Tests of equality of dependent correlation coefficients. Biometrika, 64(3), 645–647. doi:10.1093/biomet/64.3.645

Fieller, E. C., Hartley, H. O., & Pearson, E. S. (1957). Tests for rank correlation coefficients. I. Biometrika, 44(3–4), 470–481. doi:10.1093/biomet/44.3-4.470

Franklin, L. A. (1988). A note on approximations and convergence in distribution for spearman’s rank correlation coefficient. Communications in Statistics - Theory and Methods, 17(1), 55–59. doi:10.1080/03610928808829609

Iman, R. L., & Conover, W. J. (1978). Approximations of the critical region for spearman’s rho with and without ties present. Communications in Statistics - Simulation and Computation, 7(3), 269–282. doi:10.1080/03610917808812076

Kendall, M., & Stuart, A. (1979). The advanced theory of statistics. Volume 2: Inference and relationship (4th ed., Vol. 2). Griffin.

Olds, E. G. (1938). Distributions of sums of squares of rank differences for small numbers of individuals. The Annals of Mathematical Statistics, 9(2), 133–148. doi:10.1214/aoms/1177732332

Olds, E. G. (1949). The 5% significance levels for sums of squares of rank differences and a correction. The Annals of Mathematical Statistics, 20(1), 117–118. doi:10.1214/aoms/1177730099

Pitman, E. J. G. (1937). Significance tests which may be applied to samples from any populations. II. The correlation coefficient test. Supplement to the Journal of the Royal Statistical Society, 4(2), 225–232. doi:10.2307/2983647

Author

Made by P. Stikker

Companion website: https://PeterStatistics.com
YouTube channel: https://www.youtube.com/stikpet
Donations: https://www.patreon.com/bePatron?u=19398076

Expand source code

def di_scdf(ordField1, ordField2, levels1=None, levels2=None, method="as89", iters=500):
    '''
    Spearman Rank Cumulative Distribution Function
    ----------------------------------------------
    A function to determine the cdf of the Spearman Rank Distribution. A few different options are available. See the notes for details.
    
    Parameters
    ----------
    ordField1 : pandas series
        the ordinal or scale scores of the first variable
    ordField2 : pandas series
        the ordinal or scale scores of the second variable
    levels1 : list or dictionary, optional
        the categories to use from ordField1
    levels2 : list or dictionary, optional
        the categories to use from ordField2
    method : {"as89", "t", "exact", "iman-conover", "z-fieller", "z-olds"}, optional
        which method to use
    
    Returns
    -------
    Depending on the method used:
    
    * *cdf*, the cdf value
    * *statistic*, the statistic used for the cdf
    * *df*, the degrees of freedom
    
    
    Notes
    -----
    The "exact" method uses the following four steps.
    
    1. Determine all possible permutations of the scores in the first variable
    2. Determine for each the Spearman rho with the second variable
    3. Count how often the Spearman rho is above or equal to the original Spearman rho
    4. Divide the result by n!.
    
    A t-distribution approximation (method="t") was proposed by Kendall and Stuart (1979, p. 503), while Iman and Conover (1978, p. 271) refer to Pitman (1937):
    $$t_{ks} = r_s\\times\\sqrt{\\frac{n-2}{1-r_s^2}}$$
    $$df = n - 2$$
    $$sig. = 2\\times\\left(1-T\\left(\\left|t_s\\right|, df\\right)\\right)$$
    
    A normal approximation was proposed by Fieller et al. (1957, p. 472), and Choi (1977, p. 646):
    $$z_F = \\frac{\\text{atanh}\\left(r_s\\right)}{\\sqrt{\\frac{1.06}{n-3}}}$$
    $$sig. = 2\\times\\left(1-\\Phi\\left(\\left|z_F\\right|\\right)\\right)$$
    
    An alternative normal approximation was proposed by Olds (1938, p. 142; 1949, p. 117) and Franklin (1988, p. 56):
    $$z_O = \\frac{x}{ASE}$$
    $$x = \\frac{S}{2} - \\frac{n^3 - n}{12}$$
    $$ASE = \\sqrt{n-1}\\times\\frac{n\\times\\left(n+1\\right)}{12}$$
    $$S = \\frac{\\left(n^3-n\\right)\\times\\left(1-r_s\\right)}{6}$$
    $$sig. = 2\\times\\left(1-\\Phi\\left(\\left|z_O\\right|\\right)\\right)$$
    
    Iman and Conover (1978, p. 272) use a different approach, using both the Normal and Student t distribution:
    $$J = \\frac{r_s}{2}\\times\\left(\\sqrt{n-1}+\\sqrt{\\frac{n-2}{1-r_s^2}}\\right)$$
    Reject the null hypothesis at \\(\\alpha\\) level if:
    $$J > \\frac{Q\\left(\\Phi\\left(1-\\frac{\\alpha}{2}\\right)\\right) + Q\\left(T\\left(1-\\frac{\\alpha}{2}\\right), df)\\right)}{n-2}$$
    
    The function will use an iterative search for the approximate p-value.
    
    Algorithm AS 89 (Best & Roberts, 1975) is based on Olds. This algorithm is for upper-tail only, so it has been converted to lower tail by using:
    $$S^* = \\frac{n^3 - n}{6} - S$$
    $$S = \\sum_{i=1}^n d_i^2 = \\sum_{i=1}^n \\left(R_{x,i}-R_{y,i}\\right)^2 = \\frac{\\left(n^3-n\\right)\\times\\left(1-r_s\\right)}{6}$$
    
    *Symbols Used*
    
    * \\(n\\), the sample size
    * \\(r_s\\), the Spearman rho
    * \\(\\Phi\\left(\\dots\\right)\\), the cumulative density function of the standard normal distribution.
    * \\(T\\left(\\dots\\right)\\), the cumulative density function of the Student T distribution.
    * \\(Q\\left(\\dots\\right)\\), the inverse cumulative density function of the function between parenthesis.
    
    References
    ----------
    Choi, S. C. (1977). Tests of equality of dependent correlation coefficients. *Biometrika, 64*(3), 645–647. doi:10.1093/biomet/64.3.645
    
    Fieller, E. C., Hartley, H. O., & Pearson, E. S. (1957). Tests for rank correlation coefficients. I. *Biometrika, 44*(3–4), 470–481. doi:10.1093/biomet/44.3-4.470
    
    Franklin, L. A. (1988). A note on approximations and convergence in distribution for spearman’s rank correlation coefficient. *Communications in Statistics - Theory and Methods, 17*(1), 55–59. doi:10.1080/03610928808829609
    
    Iman, R. L., & Conover, W. J. (1978). Approximations of the critical region for spearman’s rho with and without ties present. *Communications in Statistics - Simulation and Computation, 7*(3), 269–282. doi:10.1080/03610917808812076
    
    Kendall, M., & Stuart, A. (1979). *The advanced theory of statistics. Volume 2: Inference and relationship* (4th ed., Vol. 2). Griffin.
    
    Olds, E. G. (1938). Distributions of sums of squares of rank differences for small numbers of individuals. *The Annals of Mathematical Statistics, 9*(2), 133–148. doi:10.1214/aoms/1177732332
    
    Olds, E. G. (1949). The 5% significance levels for sums of squares of rank differences and a correction. *The Annals of Mathematical Statistics, 20*(1), 117–118. doi:10.1214/aoms/1177730099
    
    Pitman, E. J. G. (1937). Significance tests which may be applied to samples from any populations. II. The correlation coefficient test. *Supplement to the Journal of the Royal Statistical Society, 4*(2), 225–232. doi:10.2307/2983647
    
    Author
    ------
    Made by P. Stikker
    
    Companion website: https://PeterStatistics.com  
    YouTube channel: https://www.youtube.com/stikpet  
    Donations: https://www.patreon.com/bePatron?u=19398076   
    
    '''
    from ..correlations.cor_spearman_rho import r_spearman_rho
    
    ct = tab_cross(ordField1, ordField2, order1=levels1, order2=levels2)    
    n = ct.sum().sum()
    
    rs = r_spearman_rho(ordField1, ordField2, levels1, levels2, test="none")    
    res = []
    
    if method == "exact":
        permu = he_perm(n)
        nPerm = factorial(n)
            
        testPerm = [0]*n
        for i in range(0, nPerm):
            for j in range(0, n):
                testPerm[j] = permu.iloc[i, j]
            
            rTest = r_spearman_rho(ordField1, testPerm, levels1, test="none")
            
            if rTest < rs:
                nAbove = nAbove + 1
        
        pvalue = nAbove / nPerm
        
        di_scdf = pvalue
        
    elif method == "as89":
        S = (n**3 - n) * (1 - rs) / 6
        statistic = S
        if (S < (n**3 - n) / 6):
            S = (n**3 - n) / 3 - S
        
        pvalue = 1 - he_AS89(n, S)
        
        di_scdf = pvalue
    
    elif method == "t":
        #(Kendall & Stuart, 1979, p. 503)
        ts = rs * ((n - 2) / (1 - rs**2))**0.5
        df = n - 2
        pvalue = t.cdf(abs(ts), df)
        if rs < 0:
            pvalue = 1 - pvalue
        
        res = pd.DataFrame([[pvalue, ts, df]])
        res.columns = ["cdf", "statistic", "df"]
        
        di_scdf = res
    
    elif method == "z-fieller":
        #Fieller et al. 1957, p. 472; Choi, 1977, p. 646)
        zs = atanh(rs) / (1.06 / (n - 3))**0.5
        pvalue = NormalDist().cdf(zs)
        
        res = pd.DataFrame([[pvalue, zs]])
        res.columns = ["cdf", "statistic"]
        
        di_scdf = res
    
    elif method == "z-olds":
        #Olds, 1938, p. 142; Olds, 1949, p. 117
        S = (n**3 - n) * (1 - rs) / 6
        x = S / 2 - (n**3 - n) / 12
        ase = (n - 1)**0.5 * (n * (n + 1) / 12)
        Z = x / ase
        pvalue = 1 - NormalDist().cdf(Z)
        
        res = pd.DataFrame([[pvalue, Z]])
        res.columns = ["cdf", "statistic"]
        
        di_scdf = res
    
    elif method == "iman-conover":
        #Iman & Conover, 1978, p. 272
        j = abs(rs) / 2 * ((n - 1)**0.5 + ((n - 2) / (1 - abs(rs)**2))**0.5)
        #zpval = 2 * (1 - NormalDist().cdf(abs(j)))
        
        df = n - 2
        #tpval = 2*(1 - t.cdf(abs(j), df))
        #pvalue = (zpval + tpval) / 2
    
        pLow = 0
        pHigh = 1
        pvalue = 0.05
        nIter = 1
        
        Repeat = True
        while Repeat:
        
            zCrit = norm.ppf(1 - pvalue / 2)
            tCrit = t.ppf(1 - pvalue/2, df)
            Jcrit = (zCrit + tCrit) / 2
            if Jcrit == j or nIter == iters:
                Repeat = False
            else:
                if (Jcrit < j):
                    pHigh = pvalue
                    pvalue = (pLow + pvalue) / 2
                
                elif (Jcrit > j):
                    pLow = pvalue
                    pvalue = (pHigh + pvalue) / 2
                
                nIter = nIter + 1
            
        if rs > 0:
            pvalue = 1 - pvalue / 2
        else:
            pvalue = pvalue / 2
        
        di_scdf = pvalue
        
    return di_scdf

def he_AS89(n, IST)

Expand source code

def he_AS89(n, IST):
    L = [0]*7
    
    #Test admissibility of arguments and initialize
    PRHO = 1
    if (n <= 1 or IST <= 0):
        he_AS89 = PRHO
    else:
        PRHO = 0
        if (IST > n * (n * n - 1) / 3):
            he_AS89 = PRHO
        else:
            JS = IST
            if (JS != 2 * (JS / 2)):
                JS = JS + 1
            if (n > 6):
                #GOTO 6
                #6
                b = 1 / n
                x = (6 * (JS - 1) * b / (1 / (b * b) - 1) - 1) * (1 / b - 1)**0.5
                y = x * x
                u = x * b * (0.2274 + b * (0.2531 + 0.1745 * b) + y * (-0.0758 + b * (0.1033 + 0.3932 * b) - y * b * (0.0879 + 0.0151 * b - y * (0.0072 - 0.0831 * b + y * b * (0.0131 - 0.00046 * y)))))
                
                PRHO = u / exp(y / 2) + (1 - NormalDist().cdf(x))
                if (PRHO < 0):
                    PRHO = 0
                elif (PRHO > 1):
                    PRHO = 0
                he_AS89 = PRHO
            
            else:
                #Exact evaluation of probability                
                NFAC = 1                
                for i in range(1, n+1):
                    NFAC = NFAC * i
                    L[i] = i

                #1 continue                
                PRHO = 1 / NFAC
                
                if (JS == (n * (n * n - 1) / 3)):
                    he_AS89 = PRHO
                else:
                    IFR = 0
                    
                    for m in range(1, NFAC+1):
                        ISE = 0
                        for i in range(0, n):
                            ISE = ISE + (i - L[i])**2
                        
                        #2 continue
                        if (JS <= ISE):
                            IFR = IFR + 1
                        
                        n1 = n
                        
                        #3
                        LOOP3 = True
                        while (LOOP3):
                            LOOP3 = False
                            MT = L[1]
                            NN = n1 - 1
                            for i in range(1, NN+1):
                                L[i] = L[i + 1]
                            
                            #4 continue
                            
                            L[n1] = MT
                            if (L[n1] == n1 and n1 == 2):
                                n1 = n1 - 1
                              
                                if (m != NFAC):
                                    LOOP3 = True
                                  
                    PRHO = IFR / NFAC
                    he_AS89 = PRHO
    
    return he_AS89

def he_perm(n)

Expand source code

def he_perm(n):
    #Using Heap Algorithm non-recursive
    #from https://sedgewick.io/wp-content/uploads/2022/03/2002PermGeneration.pdf
    
    nPerm = factorial(n)
    
    c = [0]*n
    res = [[0]*(n)]*(nPerm)
    res = pd.DataFrame(res)
    
    a = [0]*n
    for i in range(0, n):
        a[i] = i + 1
        res.iloc[0, i] = i + 1
    resRow = 1
    
    i = 1
    while i < n:
        if c[i] < i:
            if i % 2 == 0:
                ff = a[0]
                a[0] = a[i]
                a[i] = ff
            else:
                ff = a[c[i]]
                a[c[i]] = a[i]
                a[i] = ff
            for ff in range(0, n):
                res.iloc[resRow, ff] = a[ff - 1]

            resRow = resRow + 1
            
            c[i] = c[i] + 1
            i = 1
        else:
            c[i] = 0
            i = i + 1
    return res