Module `stikpetP.other.poho_pairwise_is`

Expand source code

import pandas as pd
from ..tests.test_student_t_is import ts_student_t_is
from ..tests.test_welch_t_is import ts_welch_t_is
from ..tests.test_trimmed_mean_is import ts_trimmed_mean_is
from ..tests.test_z_is import ts_z_is

def ph_pairwise_is(nomField, scaleField, categories=None, isTest = "student", trimProp = 0.1):
    '''
    Post-Hoc Pairwise Independent Samples Test
    ------------------------------------------
    This function can perform various pairwise independent samples tests, for use after a one-way ANOVA, to determine which categories significantly differ from each other.
    
    A simple Bonferroni correction is also applied.
    
    The independent samples tests that can be used are:
    
    * Student t, see ts_student_t_is() for details. An alternative version for this is available by using the ph_pairwise_t() function.
    * Welch t, see ts_welch_t_is() for details
    * Trimmed Mean / Yuen, see ts_trimmed_mean_is() for details
    * Z, see ts_z_is() for details
    
    Parameters
    ----------
    nomField : pandas series
        data with categories
    scaleField : pandas series
        data with the scores
    categories : list or dictionary, optional
        the categories to use from catField
    isTest : {"student", "welch", "trimmed", "yuen", "z"}, optional
        the independent samples test to use. Default is "student"
    trimProp : float, optional
        the trim proportion to use, if applicable. Default is 0.1.
        
    Returns
    -------
    A data frame with:
    
    * *category 1*, the first category in the pair
    * *category 2*, the second category in the pair
    * *n1*, sample size of first category
    * *n2*, sample size of second category
    * *mean 1*, arithmetic mean of scores in first category
    * *mean 2*, arithmetic mean of scores in second category
    * *sample diff.*, difference between the two arithmetic means
    * *hyp diff.*, the hypothesized difference
    * *statistic*, the test-statistic
    * *df*, the degrees of freedom
    * *p-value*, the unadjusted p-value (significance)
    * *adj. p-value*, the Bonferroni adjusted p-values
    * *test*, description of test used
    
    Notes
    -----
    
    A simple Bonferroni correction is applied for the multiple comparisons. This is simply:
    $$sig._{adj} = \\min \\left(sig. \\times n_{comp}, 1\\right)$$
    
    With:
    $$n_{comp} = \\frac{k\\times\\left(k-1\\right)}{2}
    
    Where \\(k\\) is the number of categories.
    
    Author
    ------
    Made by P. Stikker
    
    Companion website: https://PeterStatistics.com  
    YouTube channel: https://www.youtube.com/stikpet  
    Donations: https://www.patreon.com/bePatron?u=19398076
    
    '''
    
    if type(nomField) == list:
        nomField = pd.Series(nomField)
        
    if type(scaleField) == list:
        scaleField = pd.Series(scaleField)
        
    data = pd.concat([nomField, scaleField], axis=1)
    data.columns = ["category", "score"]
    
    #remove unused categories
    if categories is not None:
        data = data[data.category.isin(categories)]
    
    #Remove rows with missing values and reset index
    data = data.dropna()    
    data.reset_index()
    
    cats = pd.unique(data["category"])
    
    k = len(cats)
    ncomp = k * (k - 1) / 2
    
    res = pd.DataFrame()
    resRow=0
    for i in range(0, k-1):
        for j in range(i+1, k):
            res.at[resRow, 0] = cats[i]
            res.at[resRow, 1] = cats[j]
            sel2cat = [cats[i], cats[j]]
            if isTest == "student":
                isRes = ts_student_t_is(nomField, scaleField, sel2cat)
            elif isTest == "welch":
                isRes = ts_welch_t_is(nomField, scaleField, sel2cat)
            elif isTest == "trimmed":
                isRes = ts_trimmed_mean_is(nomField, scaleField, sel2cat, trimProp=trimProp, se="yuen-dixon")
            elif isTest == "yuen":
                isRes = ts_trimmed_mean_is(nomField, scaleField, sel2cat, trimProp=trimProp, se="yuen")
            elif isTest == "z":
                isRes = ts_z_is(nomField, scaleField, sel2cat)
                
            res.at[resRow, 2] = isRes.iloc[0,0]
            res.at[resRow, 3] = isRes.iloc[0,1]
            res.at[resRow, 4] = isRes.iloc[0,2]
            res.at[resRow, 5] = isRes.iloc[0,3]
            res.at[resRow, 6] = isRes.iloc[0,4]
            res.at[resRow, 7] = isRes.iloc[0,5]
            res.at[resRow, 8] = isRes.iloc[0,6]
            if isTest == "z":
                res.at[resRow, 9] = None
                res.at[resRow, 10] = isRes.iloc[0,7]
            else:
                res.at[resRow, 9] = isRes.iloc[0,7]
                res.at[resRow, 10] = isRes.iloc[0,8]
            
            res.at[resRow, 11] = res.iloc[resRow,10] * ncomp
            if res.iloc[resRow,11] > 1:
                res.iloc[resRow,11] = 1
            
            if isTest == "z":
                res.at[resRow, 12] = isRes.iloc[0,8]
            else:
                res.at[resRow, 12] = isRes.iloc[0,9]
                
            resRow = resRow + 1
    
    res.columns = ["category 1", "category 2", "n1", "n2", "mean 1", "mean 2", "sample diff.", "hyp diff.", "statistic", "df", "p-value", "adj. p-value", "test"]
    return res

Functions

def ph_pairwise_is(nomField, scaleField, categories=None, isTest='student', trimProp=0.1)

Post-Hoc Pairwise Independent Samples Test

This function can perform various pairwise independent samples tests, for use after a one-way ANOVA, to determine which categories significantly differ from each other.

A simple Bonferroni correction is also applied.

The independent samples tests that can be used are:

Student t, see ts_student_t_is() for details. An alternative version for this is available by using the ph_pairwise_t() function.
Welch t, see ts_welch_t_is() for details
Trimmed Mean / Yuen, see ts_trimmed_mean_is() for details
Z, see ts_z_is() for details

Parameters

nomField : pandas series: data with categories
scaleField : pandas series: data with the scores
categories : list or dictionary, optional: the categories to use from catField
isTest : {"student", "welch", "trimmed", "yuen", "z"}, optional: the independent samples test to use. Default is "student"
trimProp : float, optional: the trim proportion to use, if applicable. Default is 0.1.

Returns

A data frame with:

category 1, the first category in the pair
category 2, the second category in the pair
n1, sample size of first category
n2, sample size of second category
mean 1, arithmetic mean of scores in first category
mean 2, arithmetic mean of scores in second category
sample diff., difference between the two arithmetic means
hyp diff., the hypothesized difference
statistic, the test-statistic
df, the degrees of freedom
p-value, the unadjusted p-value (significance)
adj. p-value, the Bonferroni adjusted p-values
test, description of test used

Notes

A simple Bonferroni correction is applied for the multiple comparisons. This is simply: $sig._{adj} = \min \left(sig. \times n_{comp}, 1\right)$

With: $$n_{comp} = \frac{k\times\left(k-1\right)}{2}

Where $k$ is the number of categories.

Author

Made by P. Stikker

Companion website: https://PeterStatistics.com
YouTube channel: https://www.youtube.com/stikpet
Donations: https://www.patreon.com/bePatron?u=19398076

Expand source code

def ph_pairwise_is(nomField, scaleField, categories=None, isTest = "student", trimProp = 0.1):
    '''
    Post-Hoc Pairwise Independent Samples Test
    ------------------------------------------
    This function can perform various pairwise independent samples tests, for use after a one-way ANOVA, to determine which categories significantly differ from each other.
    
    A simple Bonferroni correction is also applied.
    
    The independent samples tests that can be used are:
    
    * Student t, see ts_student_t_is() for details. An alternative version for this is available by using the ph_pairwise_t() function.
    * Welch t, see ts_welch_t_is() for details
    * Trimmed Mean / Yuen, see ts_trimmed_mean_is() for details
    * Z, see ts_z_is() for details
    
    Parameters
    ----------
    nomField : pandas series
        data with categories
    scaleField : pandas series
        data with the scores
    categories : list or dictionary, optional
        the categories to use from catField
    isTest : {"student", "welch", "trimmed", "yuen", "z"}, optional
        the independent samples test to use. Default is "student"
    trimProp : float, optional
        the trim proportion to use, if applicable. Default is 0.1.
        
    Returns
    -------
    A data frame with:
    
    * *category 1*, the first category in the pair
    * *category 2*, the second category in the pair
    * *n1*, sample size of first category
    * *n2*, sample size of second category
    * *mean 1*, arithmetic mean of scores in first category
    * *mean 2*, arithmetic mean of scores in second category
    * *sample diff.*, difference between the two arithmetic means
    * *hyp diff.*, the hypothesized difference
    * *statistic*, the test-statistic
    * *df*, the degrees of freedom
    * *p-value*, the unadjusted p-value (significance)
    * *adj. p-value*, the Bonferroni adjusted p-values
    * *test*, description of test used
    
    Notes
    -----
    
    A simple Bonferroni correction is applied for the multiple comparisons. This is simply:
    $$sig._{adj} = \\min \\left(sig. \\times n_{comp}, 1\\right)$$
    
    With:
    $$n_{comp} = \\frac{k\\times\\left(k-1\\right)}{2}
    
    Where \\(k\\) is the number of categories.
    
    Author
    ------
    Made by P. Stikker
    
    Companion website: https://PeterStatistics.com  
    YouTube channel: https://www.youtube.com/stikpet  
    Donations: https://www.patreon.com/bePatron?u=19398076
    
    '''
    
    if type(nomField) == list:
        nomField = pd.Series(nomField)
        
    if type(scaleField) == list:
        scaleField = pd.Series(scaleField)
        
    data = pd.concat([nomField, scaleField], axis=1)
    data.columns = ["category", "score"]
    
    #remove unused categories
    if categories is not None:
        data = data[data.category.isin(categories)]
    
    #Remove rows with missing values and reset index
    data = data.dropna()    
    data.reset_index()
    
    cats = pd.unique(data["category"])
    
    k = len(cats)
    ncomp = k * (k - 1) / 2
    
    res = pd.DataFrame()
    resRow=0
    for i in range(0, k-1):
        for j in range(i+1, k):
            res.at[resRow, 0] = cats[i]
            res.at[resRow, 1] = cats[j]
            sel2cat = [cats[i], cats[j]]
            if isTest == "student":
                isRes = ts_student_t_is(nomField, scaleField, sel2cat)
            elif isTest == "welch":
                isRes = ts_welch_t_is(nomField, scaleField, sel2cat)
            elif isTest == "trimmed":
                isRes = ts_trimmed_mean_is(nomField, scaleField, sel2cat, trimProp=trimProp, se="yuen-dixon")
            elif isTest == "yuen":
                isRes = ts_trimmed_mean_is(nomField, scaleField, sel2cat, trimProp=trimProp, se="yuen")
            elif isTest == "z":
                isRes = ts_z_is(nomField, scaleField, sel2cat)
                
            res.at[resRow, 2] = isRes.iloc[0,0]
            res.at[resRow, 3] = isRes.iloc[0,1]
            res.at[resRow, 4] = isRes.iloc[0,2]
            res.at[resRow, 5] = isRes.iloc[0,3]
            res.at[resRow, 6] = isRes.iloc[0,4]
            res.at[resRow, 7] = isRes.iloc[0,5]
            res.at[resRow, 8] = isRes.iloc[0,6]
            if isTest == "z":
                res.at[resRow, 9] = None
                res.at[resRow, 10] = isRes.iloc[0,7]
            else:
                res.at[resRow, 9] = isRes.iloc[0,7]
                res.at[resRow, 10] = isRes.iloc[0,8]
            
            res.at[resRow, 11] = res.iloc[resRow,10] * ncomp
            if res.iloc[resRow,11] > 1:
                res.iloc[resRow,11] = 1
            
            if isTest == "z":
                res.at[resRow, 12] = isRes.iloc[0,8]
            else:
                res.at[resRow, 12] = isRes.iloc[0,9]
                
            resRow = resRow + 1
    
    res.columns = ["category 1", "category 2", "n1", "n2", "mean 1", "mean 2", "sample diff.", "hyp diff.", "statistic", "df", "p-value", "adj. p-value", "test"]
    return res