Module stikpetP.other.poho_pairwise_is

Expand source code
import pandas as pd
from ..tests.test_student_t_is import ts_student_t_is
from ..tests.test_welch_t_is import ts_welch_t_is
from ..tests.test_trimmed_mean_is import ts_trimmed_mean_is
from ..tests.test_z_is import ts_z_is

def ph_pairwise_is(nomField, scaleField, categories=None, isTest = "student", trimProp = 0.1):
    '''
    Post-Hoc Pairwise Independent Samples Test
    ------------------------------------------
    This function can perform various pairwise independent samples tests, for use after a one-way ANOVA, to determine which categories significantly differ from each other.
    
    A simple Bonferroni correction is also applied.
    
    The independent samples tests that can be used are:
    
    * Student t, see ts_student_t_is() for details. An alternative version for this is available by using the ph_pairwise_t() function.
    * Welch t, see ts_welch_t_is() for details
    * Trimmed Mean / Yuen, see ts_trimmed_mean_is() for details
    * Z, see ts_z_is() for details
    
    Parameters
    ----------
    nomField : pandas series
        data with categories
    scaleField : pandas series
        data with the scores
    categories : list or dictionary, optional
        the categories to use from catField
    isTest : {"student", "welch", "trimmed", "yuen", "z"}, optional
        the independent samples test to use. Default is "student"
    trimProp : float, optional
        the trim proportion to use, if applicable. Default is 0.1.
        
    Returns
    -------
    A data frame with:
    
    * *category 1*, the first category in the pair
    * *category 2*, the second category in the pair
    * *n1*, sample size of first category
    * *n2*, sample size of second category
    * *mean 1*, arithmetic mean of scores in first category
    * *mean 2*, arithmetic mean of scores in second category
    * *sample diff.*, difference between the two arithmetic means
    * *hyp diff.*, the hypothesized difference
    * *statistic*, the test-statistic
    * *df*, the degrees of freedom
    * *p-value*, the unadjusted p-value (significance)
    * *adj. p-value*, the Bonferroni adjusted p-values
    * *test*, description of test used
    
    Notes
    -----
    
    A simple Bonferroni correction is applied for the multiple comparisons. This is simply:
    $$sig._{adj} = \\min \\left(sig. \\times n_{comp}, 1\\right)$$
    
    With:
    $$n_{comp} = \\frac{k\\times\\left(k-1\\right)}{2}
    
    Where \\(k\\) is the number of categories.
    
    Author
    ------
    Made by P. Stikker
    
    Companion website: https://PeterStatistics.com  
    YouTube channel: https://www.youtube.com/stikpet  
    Donations: https://www.patreon.com/bePatron?u=19398076
    
    '''
    
    if type(nomField) == list:
        nomField = pd.Series(nomField)
        
    if type(scaleField) == list:
        scaleField = pd.Series(scaleField)
        
    data = pd.concat([nomField, scaleField], axis=1)
    data.columns = ["category", "score"]
    
    #remove unused categories
    if categories is not None:
        data = data[data.category.isin(categories)]
    
    #Remove rows with missing values and reset index
    data = data.dropna()    
    data.reset_index()
    
    cats = pd.unique(data["category"])
    
    k = len(cats)
    ncomp = k * (k - 1) / 2
    
    res = pd.DataFrame()
    resRow=0
    for i in range(0, k-1):
        for j in range(i+1, k):
            res.at[resRow, 0] = cats[i]
            res.at[resRow, 1] = cats[j]
            sel2cat = [cats[i], cats[j]]
            if isTest == "student":
                isRes = ts_student_t_is(nomField, scaleField, sel2cat)
            elif isTest == "welch":
                isRes = ts_welch_t_is(nomField, scaleField, sel2cat)
            elif isTest == "trimmed":
                isRes = ts_trimmed_mean_is(nomField, scaleField, sel2cat, trimProp=trimProp, se="yuen-dixon")
            elif isTest == "yuen":
                isRes = ts_trimmed_mean_is(nomField, scaleField, sel2cat, trimProp=trimProp, se="yuen")
            elif isTest == "z":
                isRes = ts_z_is(nomField, scaleField, sel2cat)
                
            res.at[resRow, 2] = isRes.iloc[0,0]
            res.at[resRow, 3] = isRes.iloc[0,1]
            res.at[resRow, 4] = isRes.iloc[0,2]
            res.at[resRow, 5] = isRes.iloc[0,3]
            res.at[resRow, 6] = isRes.iloc[0,4]
            res.at[resRow, 7] = isRes.iloc[0,5]
            res.at[resRow, 8] = isRes.iloc[0,6]
            if isTest == "z":
                res.at[resRow, 9] = None
                res.at[resRow, 10] = isRes.iloc[0,7]
            else:
                res.at[resRow, 9] = isRes.iloc[0,7]
                res.at[resRow, 10] = isRes.iloc[0,8]
            
            res.at[resRow, 11] = res.iloc[resRow,10] * ncomp
            if res.iloc[resRow,11] > 1:
                res.iloc[resRow,11] = 1
            
            if isTest == "z":
                res.at[resRow, 12] = isRes.iloc[0,8]
            else:
                res.at[resRow, 12] = isRes.iloc[0,9]
                
            resRow = resRow + 1
    
    res.columns = ["category 1", "category 2", "n1", "n2", "mean 1", "mean 2", "sample diff.", "hyp diff.", "statistic", "df", "p-value", "adj. p-value", "test"]
    return res

Functions

def ph_pairwise_is(nomField, scaleField, categories=None, isTest='student', trimProp=0.1)

Post-Hoc Pairwise Independent Samples Test

This function can perform various pairwise independent samples tests, for use after a one-way ANOVA, to determine which categories significantly differ from each other.

A simple Bonferroni correction is also applied.

The independent samples tests that can be used are:

  • Student t, see ts_student_t_is() for details. An alternative version for this is available by using the ph_pairwise_t() function.
  • Welch t, see ts_welch_t_is() for details
  • Trimmed Mean / Yuen, see ts_trimmed_mean_is() for details
  • Z, see ts_z_is() for details

Parameters

nomField : pandas series
data with categories
scaleField : pandas series
data with the scores
categories : list or dictionary, optional
the categories to use from catField
isTest : {"student", "welch", "trimmed", "yuen", "z"}, optional
the independent samples test to use. Default is "student"
trimProp : float, optional
the trim proportion to use, if applicable. Default is 0.1.

Returns

A data frame with:
 
  • category 1, the first category in the pair
  • category 2, the second category in the pair
  • n1, sample size of first category
  • n2, sample size of second category
  • mean 1, arithmetic mean of scores in first category
  • mean 2, arithmetic mean of scores in second category
  • sample diff., difference between the two arithmetic means
  • hyp diff., the hypothesized difference
  • statistic, the test-statistic
  • df, the degrees of freedom
  • p-value, the unadjusted p-value (significance)
  • adj. p-value, the Bonferroni adjusted p-values
  • test, description of test used

Notes

A simple Bonferroni correction is applied for the multiple comparisons. This is simply: sig._{adj} = \min \left(sig. \times n_{comp}, 1\right)

With: $$n_{comp} = \frac{k\times\left(k-1\right)}{2}

Where k is the number of categories.

Author

Made by P. Stikker

Companion website: https://PeterStatistics.com
YouTube channel: https://www.youtube.com/stikpet
Donations: https://www.patreon.com/bePatron?u=19398076

Expand source code
def ph_pairwise_is(nomField, scaleField, categories=None, isTest = "student", trimProp = 0.1):
    '''
    Post-Hoc Pairwise Independent Samples Test
    ------------------------------------------
    This function can perform various pairwise independent samples tests, for use after a one-way ANOVA, to determine which categories significantly differ from each other.
    
    A simple Bonferroni correction is also applied.
    
    The independent samples tests that can be used are:
    
    * Student t, see ts_student_t_is() for details. An alternative version for this is available by using the ph_pairwise_t() function.
    * Welch t, see ts_welch_t_is() for details
    * Trimmed Mean / Yuen, see ts_trimmed_mean_is() for details
    * Z, see ts_z_is() for details
    
    Parameters
    ----------
    nomField : pandas series
        data with categories
    scaleField : pandas series
        data with the scores
    categories : list or dictionary, optional
        the categories to use from catField
    isTest : {"student", "welch", "trimmed", "yuen", "z"}, optional
        the independent samples test to use. Default is "student"
    trimProp : float, optional
        the trim proportion to use, if applicable. Default is 0.1.
        
    Returns
    -------
    A data frame with:
    
    * *category 1*, the first category in the pair
    * *category 2*, the second category in the pair
    * *n1*, sample size of first category
    * *n2*, sample size of second category
    * *mean 1*, arithmetic mean of scores in first category
    * *mean 2*, arithmetic mean of scores in second category
    * *sample diff.*, difference between the two arithmetic means
    * *hyp diff.*, the hypothesized difference
    * *statistic*, the test-statistic
    * *df*, the degrees of freedom
    * *p-value*, the unadjusted p-value (significance)
    * *adj. p-value*, the Bonferroni adjusted p-values
    * *test*, description of test used
    
    Notes
    -----
    
    A simple Bonferroni correction is applied for the multiple comparisons. This is simply:
    $$sig._{adj} = \\min \\left(sig. \\times n_{comp}, 1\\right)$$
    
    With:
    $$n_{comp} = \\frac{k\\times\\left(k-1\\right)}{2}
    
    Where \\(k\\) is the number of categories.
    
    Author
    ------
    Made by P. Stikker
    
    Companion website: https://PeterStatistics.com  
    YouTube channel: https://www.youtube.com/stikpet  
    Donations: https://www.patreon.com/bePatron?u=19398076
    
    '''
    
    if type(nomField) == list:
        nomField = pd.Series(nomField)
        
    if type(scaleField) == list:
        scaleField = pd.Series(scaleField)
        
    data = pd.concat([nomField, scaleField], axis=1)
    data.columns = ["category", "score"]
    
    #remove unused categories
    if categories is not None:
        data = data[data.category.isin(categories)]
    
    #Remove rows with missing values and reset index
    data = data.dropna()    
    data.reset_index()
    
    cats = pd.unique(data["category"])
    
    k = len(cats)
    ncomp = k * (k - 1) / 2
    
    res = pd.DataFrame()
    resRow=0
    for i in range(0, k-1):
        for j in range(i+1, k):
            res.at[resRow, 0] = cats[i]
            res.at[resRow, 1] = cats[j]
            sel2cat = [cats[i], cats[j]]
            if isTest == "student":
                isRes = ts_student_t_is(nomField, scaleField, sel2cat)
            elif isTest == "welch":
                isRes = ts_welch_t_is(nomField, scaleField, sel2cat)
            elif isTest == "trimmed":
                isRes = ts_trimmed_mean_is(nomField, scaleField, sel2cat, trimProp=trimProp, se="yuen-dixon")
            elif isTest == "yuen":
                isRes = ts_trimmed_mean_is(nomField, scaleField, sel2cat, trimProp=trimProp, se="yuen")
            elif isTest == "z":
                isRes = ts_z_is(nomField, scaleField, sel2cat)
                
            res.at[resRow, 2] = isRes.iloc[0,0]
            res.at[resRow, 3] = isRes.iloc[0,1]
            res.at[resRow, 4] = isRes.iloc[0,2]
            res.at[resRow, 5] = isRes.iloc[0,3]
            res.at[resRow, 6] = isRes.iloc[0,4]
            res.at[resRow, 7] = isRes.iloc[0,5]
            res.at[resRow, 8] = isRes.iloc[0,6]
            if isTest == "z":
                res.at[resRow, 9] = None
                res.at[resRow, 10] = isRes.iloc[0,7]
            else:
                res.at[resRow, 9] = isRes.iloc[0,7]
                res.at[resRow, 10] = isRes.iloc[0,8]
            
            res.at[resRow, 11] = res.iloc[resRow,10] * ncomp
            if res.iloc[resRow,11] > 1:
                res.iloc[resRow,11] = 1
            
            if isTest == "z":
                res.at[resRow, 12] = isRes.iloc[0,8]
            else:
                res.at[resRow, 12] = isRes.iloc[0,9]
                
            resRow = resRow + 1
    
    res.columns = ["category 1", "category 2", "n1", "n2", "mean 1", "mean 2", "sample diff.", "hyp diff.", "statistic", "df", "p-value", "adj. p-value", "test"]
    return res