from statistics import NormalDist
from math import comb
import pandas as pd
from scipy.stats import t

def wcdf(W, n):
    '''
    Wilcoxon cumulative distribution function
    '''
    pVal=0
    for i in range(0,W+1):
        pVal = pVal + srf(i, n)
    return pVal/(2**n)

def srf(x,y): #srf = sum rank frequency
    if x<0:
        return 0
    elif x > comb(y+1, 2):
        return 0
    elif y==1 and (x==0 or x==1):
        return 1
    elif y>=0:
        return srf(x-y, y-1) + srf(x, y-1)

def ts_wilcoxon_os(data, hypMed = "none", ties = True, 
               appr = "wilcoxon", eqMed = "wilcoxon", cc = False):
    '''
    one-sample Wilcoxon signed rank test
     
    This function will perform one-sample Wilcoxon signed rank test
    
    Parameters
    ----------
    data : list or Pandas data series with the data as numbers
    hypMed : optional hypothesized median, otherwise the midrange will be used
    ties : optional boolean to use a tie correction (default is True)
    appr : optional method to use for approximation. Either "wilcoxon" (default), "exact" "imanz" or "imant" for Iman's z or t approximation
    eqMed : optional method to deal with scores equal to hypMed. Either "wilcoxon" (default), "pratt" or "zsplit"
    cc : optional boolean to use a continuity correction (default is False)
        
    Returns
    -------
    testResults : Pandas dataframe with the Wilcoxon W, test statistic, degrees of freedom (only applicable for Iman t approximation), significance (p-value) and test used
   
    Notes
    -----
    This uses the ranks function from Pandas, the NormalDist function from Python's statistics, the t function from scipy.stats, comb function from Python's math library and two helper functions wcdf and srf for the exact Wilcoxon distribution.
    
    Author
    ------
    Made by P. Stikker
    
    Please visit: https://PeterStatistics.com
    
    YouTube channel: https://www.youtube.com/stikpet
    
    '''
    #set hypothesized median to mid range if not provided
    if (hypMed=="none"):
        hypMed = (min(data) + max(data)) / 2
        
    #sample size (n)
    n = len(data)
    
    #adjust sample size if wilcoxon method is used for equal distance
    if (eqMed == "wilcoxon"):
        nr = n - len(data[data == hypMed])
    else:
        nr = n
        
    absDiffs = 0
    #remove scores equal to hypMed if eqMed is wilcoxon
    if (eqMed == "wilcoxon" or appr=="exact"):
        data = data[data != hypMed]
        
    #determine the absolute deviations from the hypMed
    diffs = data - hypMed
    absDiffs = abs(diffs)
    ranks = absDiffs.rank()
    W = sum(ranks[diffs > 0])
    
    if appr=="exact":
        #check if ties exist
        if max(ranks.value_counts()) > 1:
            return "ties exist, cannot compute exact method"
        else:
            Wmin = sum(ranks[diffs < 0])
            statistic = min(W, Wmin)
            pVal = wcdf(int(statistic), len(ranks))*2
            testUsed = "one-sample Wilcoxon signed rank exact test"
            df = "n.a."
            
    else:
        #add half the equal to median ranks if zsplit is used
        nD0 = sum(diffs==0)
        if (eqMed == "zsplit"):
            W = W + sum(ranks[diffs == 0])/2

        rAvg = nr*(nr + 1)/4
        s2 = nr * (nr+1) * (2*nr + 1)/24
        #adjust if Pratt method is used
        if (eqMed == "pratt"):
            #normal approximation adjustment based on Cureton (1967)
            s2 = s2 - nD0 * (nD0 + 1) * (2 * nD0 + 1) / 24
            rAvg = (nr * (nr + 1) - nD0 * (nD0 + 1)) / 4

        #the ties correction
        tCorr = 0
        if (ties):
            #remove ranks of scores equal to hypMed for Pratt (wilcoxon already removed)
            if (eqMed == "pratt"):
                ranks = ranks[absDiffs != 0]
            for i in ranks.value_counts():
                tCorr = tCorr + (i**3 - i)
            tCorr = tCorr/48
            s2 = s2 - tCorr

        se = (s2)**0.5
        num = abs(W - rAvg)
        #apply continuity correction if needed
        if (cc):
            num = num - 0.5

        df = "n.a."

        if (appr=="imant"):
            statistic = num / ((s2 * nr - (W - rAvg)**2) / (nr - 1))**0.5
            df = nr - 1
            pVal = 2 * (1 - t.cdf(abs(statistic), df))
        else:
            statistic = num / se

            if (appr == "imanz"):
                statistic = statistic / 2 * (1 + ((nr - 1) / (nr - statistic**2))**0.5)

            pVal = 2 * (1 - NormalDist().cdf(abs(statistic))) 

        testUsed = "one-sample Wilcoxon signed rank test"
        if (ties and cc):
            testUsed = ", ".join([testUsed, ", with ties and continuity correction"])
        elif (ties):
            testUsed = ", ".join([testUsed, ", with ties correction"])
        elif (cc):
            testUsed = ", ".join([testUsed, ", with continuity correction"])

        if (appr == "imant"):
            testUsed = ", ".join([testUsed, ", using Iman (1974) t approximation"])
        elif (appr == "imanz"):
            testUsed = ", ".join([testUsed, ", using Iman (1974) z approximation"])

        if (eqMed == "pratt"):
            testUsed = ", ".join([testUsed, " Pratt method for equal to hyp. med. (inc. Cureton adjustment for normal approximation)"])
        elif (eqMed == "zsplit"):
            testUsed = ", ".join([testUsed, ", z-split method for equal to hyp. med."])

    testResults = pd.DataFrame([[W, statistic, df, pVal, testUsed]], columns=["W", "statistic", "df", "p-value", "test"])
    pd.set_option('display.max_colwidth', None)
    
    return testResults

#Example
dataList = [1, 2, 5, 1, 1, 5, 3, 1, 5, 1, 1, 5, 1, 1, 3, 3, 3, 4, 2, 4]
data = pd.Series(dataList)

ts_wilcoxon_os(data)

ts_wilcoxon_os(data, ties=False, appr="imanz", eqMed = "zsplit", cc = False)

dataList2 = [1,2,5,6,8,9,12,13,17,19]
data2 = pd.Series(dataList2)

ts_wilcoxon_os(data2, hypMed= 8.1, appr="exact")