Module stikpetP.other.poho_pairwise_iso

Expand source code
import pandas as pd
from ..other.table_cross import tab_cross
from ..tests.test_mann_whitney import ts_mann_whitney
from ..tests.test_mood_median import ts_mood_median
from ..tests.test_fligner_policello import ts_fligner_policello

def ph_pairwise_iso(catField, ordField, categories=None, levels=None, is_test="mann-whitney", **kwargs):
    '''
    Post-Hoc Pairwise Independent-Samples Test for Ordinal Data
    -------------------------------------
    The Mann-Whitney U, Mood Median and Fligner-Policello test are designed for two categories and an ordinal variable. These can therefor be used as a post-hoc test for a Kruskal-Wallis test (see ts_kruskal_wallis()). 
    The test compares each possible pair of categories from the catField and their mean rank. The null hypothesis is that these are then equal. A simple Bonferroni adjustment is also made for the multiple testing.
    Other post-hoc tests that could be considered are Dunn, Nemenyi, Steel-Dwass, Conover-Iman.
        
    Parameters
    ----------
    catField : pandas series
        data with categories
    ordField : pandas series
        data with the scores
    categories : list or dictionary, optional
        the categories to use from catField
    levels : list or dictionary, optional
        the levels or order used in ordField.
    is_test : {"mann-whitney", "mood", "fligner-policello}, optional
        the test to use
    **kwargs : various, optional
        other arguments to pass on for the specific test used.
        
    Returns
    -------
    A dataframe with:
    
    * *category 1*, one of the two categories being compared
    * *category 2*, second of the two categories being compared
    * *statistic*, the test statistic
    * *p-value*, the p-value (significance)
    * *adj. p-value*, the Bonferroni adjusted p-value
    
    Notes
    -----
    This function selects each possible pair of categories and then simply runs the requested test, using only those two categories.
    
    See ts_mann_whitney(), ts_mood_median() and/or ts_fligner_policello() for details of the calculations and the additional parameters that can be passed via *kwargs*.
    
    The Bonferroni adjustment is simply:
    $$p_{adj} = \\min \\left(p \\times n_{comp}, 1\\right)$$
    $$n_{comp} = \\frac{k\\times\\left(k-1\\right)}{2}$$
    
    *Symbols used:*
    
    * \\(n_{comp}\\), number of comparisons (pairs)
    * \\(k\\), number of categories
    
    Author
    ------
    Made by P. Stikker
    
    Companion website: https://PeterStatistics.com  
    YouTube channel: https://www.youtube.com/stikpet  
    Donations: https://www.patreon.com/bePatron?u=19398076
        
    '''
    #create the cross table    
    ct = tab_cross(ordField, catField, order1=levels, order2=categories, totals="include")
    
    #basic counts
    k = ct.shape[1]-1
    
    ncomp = (k * (k - 1)) / 2
    res = pd.DataFrame()    
    selCats= pd.Series(dtype="object")
    resRow = 0
    for i in range(0, k-1):
        for j in range(i + 1,k):
            res.at[resRow, 0] = ct.columns[i]
            res.at[resRow, 1] = ct.columns[j]
            selCats.at[0] = res.iloc[resRow, 0]
            selCats.at[1] = res.iloc[resRow, 1]

            if is_test=="mann-whitney":
                tstRes = ts_mann_whitney(catField, ordField, selCats, levels, **kwargs)
                
                res.at[resRow, 2] = tstRes.iloc[0, 3]
                res.at[resRow, 3] = tstRes.iloc[0, 4]
                res.at[resRow, 4] = res.iloc[resRow, 3] * ncomp
                if res.iloc[resRow, 4] > 1:
                    res.at[resRow, 4] = 1            
                res.at[resRow, 5] = tstRes.iloc[0, 5]         
                
            elif is_test=="mood":
                tstRes = ts_mood_median(catField, ordField, selCats, levels, **kwargs)
                if isinstance(tstRes, float):                
                    res.at[resRow, 2] = None
                    res.at[resRow, 3] = None
                    res.at[resRow, 4] = tstRes
                    res.at[resRow, 5] = tstRes
                    res.at[resRow, 6] = "Fisher exact"
                else:
                    res.at[resRow, 2] = tstRes.iloc[0,3]
                    res.at[resRow, 3] = tstRes.iloc[0,4]
                    res.at[resRow, 4] = tstRes.iloc[0,5]
                    res.at[resRow, 5] = tstRes.iloc[0,5]
                    res.at[resRow, 6] = tstRes.iloc[0,8]               
                    
                res.at[resRow, 5] = res.iloc[resRow, 4] * ncomp
                if res.iloc[resRow, 5] > 1:
                    res.at[resRow, 5] = 1
                
            elif is_test=="fligner-policello":
                tstRes = ts_fligner_policello(catField, ordField, selCats, levels, **kwargs)
                res.at[resRow, 2] = tstRes.iloc[0, 1]
                res.at[resRow, 3] = tstRes.iloc[0, 2]
                res.at[resRow, 4] = res.iloc[resRow, 3] * ncomp
                if res.iloc[resRow, 4] > 1:
                    res.at[resRow, 4] = 1                            
                res.at[resRow, 5] = tstRes.iloc[0, 3]
                
            resRow = resRow + 1

    if is_test=="mann-whitney":
        colNames = ["category 1","category 2","statistic","p-value","adj. p-value","test"]
    elif is_test=="mood":
        colNames = ["category 1","category 2","statistic", "df", "p-value","adj. p-value","test"]
    elif is_test=="fligner-policello":
         colNames = ["category 1","category 2","statistic","p-value","adj. p-value","test"] 
         
    res.columns=colNames
    
    return res

Functions

def ph_pairwise_iso(catField, ordField, categories=None, levels=None, is_test='mann-whitney', **kwargs)

Post-Hoc Pairwise Independent-Samples Test for Ordinal Data

The Mann-Whitney U, Mood Median and Fligner-Policello test are designed for two categories and an ordinal variable. These can therefor be used as a post-hoc test for a Kruskal-Wallis test (see ts_kruskal_wallis()). The test compares each possible pair of categories from the catField and their mean rank. The null hypothesis is that these are then equal. A simple Bonferroni adjustment is also made for the multiple testing. Other post-hoc tests that could be considered are Dunn, Nemenyi, Steel-Dwass, Conover-Iman.

Parameters

catField : pandas series
data with categories
ordField : pandas series
data with the scores
categories : list or dictionary, optional
the categories to use from catField
levels : list or dictionary, optional
the levels or order used in ordField.
is_test : {"mann-whitney", "mood", "fligner-policello}, optional
the test to use
**kwargs : various, optional
other arguments to pass on for the specific test used.

Returns

A dataframe with:
 
  • category 1, one of the two categories being compared
  • category 2, second of the two categories being compared
  • statistic, the test statistic
  • p-value, the p-value (significance)
  • adj. p-value, the Bonferroni adjusted p-value

Notes

This function selects each possible pair of categories and then simply runs the requested test, using only those two categories.

See ts_mann_whitney(), ts_mood_median() and/or ts_fligner_policello() for details of the calculations and the additional parameters that can be passed via kwargs.

The Bonferroni adjustment is simply: p_{adj} = \min \left(p \times n_{comp}, 1\right) n_{comp} = \frac{k\times\left(k-1\right)}{2}

Symbols used:

  • n_{comp}, number of comparisons (pairs)
  • k, number of categories

Author

Made by P. Stikker

Companion website: https://PeterStatistics.com
YouTube channel: https://www.youtube.com/stikpet
Donations: https://www.patreon.com/bePatron?u=19398076

Expand source code
def ph_pairwise_iso(catField, ordField, categories=None, levels=None, is_test="mann-whitney", **kwargs):
    '''
    Post-Hoc Pairwise Independent-Samples Test for Ordinal Data
    -------------------------------------
    The Mann-Whitney U, Mood Median and Fligner-Policello test are designed for two categories and an ordinal variable. These can therefor be used as a post-hoc test for a Kruskal-Wallis test (see ts_kruskal_wallis()). 
    The test compares each possible pair of categories from the catField and their mean rank. The null hypothesis is that these are then equal. A simple Bonferroni adjustment is also made for the multiple testing.
    Other post-hoc tests that could be considered are Dunn, Nemenyi, Steel-Dwass, Conover-Iman.
        
    Parameters
    ----------
    catField : pandas series
        data with categories
    ordField : pandas series
        data with the scores
    categories : list or dictionary, optional
        the categories to use from catField
    levels : list or dictionary, optional
        the levels or order used in ordField.
    is_test : {"mann-whitney", "mood", "fligner-policello}, optional
        the test to use
    **kwargs : various, optional
        other arguments to pass on for the specific test used.
        
    Returns
    -------
    A dataframe with:
    
    * *category 1*, one of the two categories being compared
    * *category 2*, second of the two categories being compared
    * *statistic*, the test statistic
    * *p-value*, the p-value (significance)
    * *adj. p-value*, the Bonferroni adjusted p-value
    
    Notes
    -----
    This function selects each possible pair of categories and then simply runs the requested test, using only those two categories.
    
    See ts_mann_whitney(), ts_mood_median() and/or ts_fligner_policello() for details of the calculations and the additional parameters that can be passed via *kwargs*.
    
    The Bonferroni adjustment is simply:
    $$p_{adj} = \\min \\left(p \\times n_{comp}, 1\\right)$$
    $$n_{comp} = \\frac{k\\times\\left(k-1\\right)}{2}$$
    
    *Symbols used:*
    
    * \\(n_{comp}\\), number of comparisons (pairs)
    * \\(k\\), number of categories
    
    Author
    ------
    Made by P. Stikker
    
    Companion website: https://PeterStatistics.com  
    YouTube channel: https://www.youtube.com/stikpet  
    Donations: https://www.patreon.com/bePatron?u=19398076
        
    '''
    #create the cross table    
    ct = tab_cross(ordField, catField, order1=levels, order2=categories, totals="include")
    
    #basic counts
    k = ct.shape[1]-1
    
    ncomp = (k * (k - 1)) / 2
    res = pd.DataFrame()    
    selCats= pd.Series(dtype="object")
    resRow = 0
    for i in range(0, k-1):
        for j in range(i + 1,k):
            res.at[resRow, 0] = ct.columns[i]
            res.at[resRow, 1] = ct.columns[j]
            selCats.at[0] = res.iloc[resRow, 0]
            selCats.at[1] = res.iloc[resRow, 1]

            if is_test=="mann-whitney":
                tstRes = ts_mann_whitney(catField, ordField, selCats, levels, **kwargs)
                
                res.at[resRow, 2] = tstRes.iloc[0, 3]
                res.at[resRow, 3] = tstRes.iloc[0, 4]
                res.at[resRow, 4] = res.iloc[resRow, 3] * ncomp
                if res.iloc[resRow, 4] > 1:
                    res.at[resRow, 4] = 1            
                res.at[resRow, 5] = tstRes.iloc[0, 5]         
                
            elif is_test=="mood":
                tstRes = ts_mood_median(catField, ordField, selCats, levels, **kwargs)
                if isinstance(tstRes, float):                
                    res.at[resRow, 2] = None
                    res.at[resRow, 3] = None
                    res.at[resRow, 4] = tstRes
                    res.at[resRow, 5] = tstRes
                    res.at[resRow, 6] = "Fisher exact"
                else:
                    res.at[resRow, 2] = tstRes.iloc[0,3]
                    res.at[resRow, 3] = tstRes.iloc[0,4]
                    res.at[resRow, 4] = tstRes.iloc[0,5]
                    res.at[resRow, 5] = tstRes.iloc[0,5]
                    res.at[resRow, 6] = tstRes.iloc[0,8]               
                    
                res.at[resRow, 5] = res.iloc[resRow, 4] * ncomp
                if res.iloc[resRow, 5] > 1:
                    res.at[resRow, 5] = 1
                
            elif is_test=="fligner-policello":
                tstRes = ts_fligner_policello(catField, ordField, selCats, levels, **kwargs)
                res.at[resRow, 2] = tstRes.iloc[0, 1]
                res.at[resRow, 3] = tstRes.iloc[0, 2]
                res.at[resRow, 4] = res.iloc[resRow, 3] * ncomp
                if res.iloc[resRow, 4] > 1:
                    res.at[resRow, 4] = 1                            
                res.at[resRow, 5] = tstRes.iloc[0, 3]
                
            resRow = resRow + 1

    if is_test=="mann-whitney":
        colNames = ["category 1","category 2","statistic","p-value","adj. p-value","test"]
    elif is_test=="mood":
        colNames = ["category 1","category 2","statistic", "df", "p-value","adj. p-value","test"]
    elif is_test=="fligner-policello":
         colNames = ["category 1","category 2","statistic","p-value","adj. p-value","test"] 
         
    res.columns=colNames
    
    return res