Module stikpetP.other.poho_pairwise_iso
Expand source code
import pandas as pd
from ..other.table_cross import tab_cross
from ..tests.test_mann_whitney import ts_mann_whitney
from ..tests.test_mood_median import ts_mood_median
from ..tests.test_fligner_policello import ts_fligner_policello
def ph_pairwise_iso(catField, ordField, categories=None, levels=None, is_test="mann-whitney", **kwargs):
'''
Post-Hoc Pairwise Independent-Samples Test for Ordinal Data
-------------------------------------
The Mann-Whitney U, Mood Median and Fligner-Policello test are designed for two categories and an ordinal variable. These can therefor be used as a post-hoc test for a Kruskal-Wallis test (see ts_kruskal_wallis()).
The test compares each possible pair of categories from the catField and their mean rank. The null hypothesis is that these are then equal. A simple Bonferroni adjustment is also made for the multiple testing.
Other post-hoc tests that could be considered are Dunn, Nemenyi, Steel-Dwass, Conover-Iman.
Parameters
----------
catField : pandas series
data with categories
ordField : pandas series
data with the scores
categories : list or dictionary, optional
the categories to use from catField
levels : list or dictionary, optional
the levels or order used in ordField.
is_test : {"mann-whitney", "mood", "fligner-policello}, optional
the test to use
**kwargs : various, optional
other arguments to pass on for the specific test used.
Returns
-------
A dataframe with:
* *category 1*, one of the two categories being compared
* *category 2*, second of the two categories being compared
* *statistic*, the test statistic
* *p-value*, the p-value (significance)
* *adj. p-value*, the Bonferroni adjusted p-value
Notes
-----
This function selects each possible pair of categories and then simply runs the requested test, using only those two categories.
See ts_mann_whitney(), ts_mood_median() and/or ts_fligner_policello() for details of the calculations and the additional parameters that can be passed via *kwargs*.
The Bonferroni adjustment is simply:
$$p_{adj} = \\min \\left(p \\times n_{comp}, 1\\right)$$
$$n_{comp} = \\frac{k\\times\\left(k-1\\right)}{2}$$
*Symbols used:*
* \\(n_{comp}\\), number of comparisons (pairs)
* \\(k\\), number of categories
Author
------
Made by P. Stikker
Companion website: https://PeterStatistics.com
YouTube channel: https://www.youtube.com/stikpet
Donations: https://www.patreon.com/bePatron?u=19398076
'''
#create the cross table
ct = tab_cross(ordField, catField, order1=levels, order2=categories, totals="include")
#basic counts
k = ct.shape[1]-1
ncomp = (k * (k - 1)) / 2
res = pd.DataFrame()
selCats= pd.Series(dtype="object")
resRow = 0
for i in range(0, k-1):
for j in range(i + 1,k):
res.at[resRow, 0] = ct.columns[i]
res.at[resRow, 1] = ct.columns[j]
selCats.at[0] = res.iloc[resRow, 0]
selCats.at[1] = res.iloc[resRow, 1]
if is_test=="mann-whitney":
tstRes = ts_mann_whitney(catField, ordField, selCats, levels, **kwargs)
res.at[resRow, 2] = tstRes.iloc[0, 3]
res.at[resRow, 3] = tstRes.iloc[0, 4]
res.at[resRow, 4] = res.iloc[resRow, 3] * ncomp
if res.iloc[resRow, 4] > 1:
res.at[resRow, 4] = 1
res.at[resRow, 5] = tstRes.iloc[0, 5]
elif is_test=="mood":
tstRes = ts_mood_median(catField, ordField, selCats, levels, **kwargs)
if isinstance(tstRes, float):
res.at[resRow, 2] = None
res.at[resRow, 3] = None
res.at[resRow, 4] = tstRes
res.at[resRow, 5] = tstRes
res.at[resRow, 6] = "Fisher exact"
else:
res.at[resRow, 2] = tstRes.iloc[0,3]
res.at[resRow, 3] = tstRes.iloc[0,4]
res.at[resRow, 4] = tstRes.iloc[0,5]
res.at[resRow, 5] = tstRes.iloc[0,5]
res.at[resRow, 6] = tstRes.iloc[0,8]
res.at[resRow, 5] = res.iloc[resRow, 4] * ncomp
if res.iloc[resRow, 5] > 1:
res.at[resRow, 5] = 1
elif is_test=="fligner-policello":
tstRes = ts_fligner_policello(catField, ordField, selCats, levels, **kwargs)
res.at[resRow, 2] = tstRes.iloc[0, 1]
res.at[resRow, 3] = tstRes.iloc[0, 2]
res.at[resRow, 4] = res.iloc[resRow, 3] * ncomp
if res.iloc[resRow, 4] > 1:
res.at[resRow, 4] = 1
res.at[resRow, 5] = tstRes.iloc[0, 3]
resRow = resRow + 1
if is_test=="mann-whitney":
colNames = ["category 1","category 2","statistic","p-value","adj. p-value","test"]
elif is_test=="mood":
colNames = ["category 1","category 2","statistic", "df", "p-value","adj. p-value","test"]
elif is_test=="fligner-policello":
colNames = ["category 1","category 2","statistic","p-value","adj. p-value","test"]
res.columns=colNames
return res
Functions
def ph_pairwise_iso(catField, ordField, categories=None, levels=None, is_test='mann-whitney', **kwargs)-
Post-Hoc Pairwise Independent-Samples Test for Ordinal Data
The Mann-Whitney U, Mood Median and Fligner-Policello test are designed for two categories and an ordinal variable. These can therefor be used as a post-hoc test for a Kruskal-Wallis test (see ts_kruskal_wallis()). The test compares each possible pair of categories from the catField and their mean rank. The null hypothesis is that these are then equal. A simple Bonferroni adjustment is also made for the multiple testing. Other post-hoc tests that could be considered are Dunn, Nemenyi, Steel-Dwass, Conover-Iman.
Parameters
catField:pandas series- data with categories
ordField:pandas series- data with the scores
categories:listordictionary, optional- the categories to use from catField
levels:listordictionary, optional- the levels or order used in ordField.
is_test:{"mann-whitney", "mood", "fligner-policello}, optional- the test to use
**kwargs:various, optional- other arguments to pass on for the specific test used.
Returns
A dataframe with:
- category 1, one of the two categories being compared
- category 2, second of the two categories being compared
- statistic, the test statistic
- p-value, the p-value (significance)
- adj. p-value, the Bonferroni adjusted p-value
Notes
This function selects each possible pair of categories and then simply runs the requested test, using only those two categories.
See ts_mann_whitney(), ts_mood_median() and/or ts_fligner_policello() for details of the calculations and the additional parameters that can be passed via kwargs.
The Bonferroni adjustment is simply: p_{adj} = \min \left(p \times n_{comp}, 1\right) n_{comp} = \frac{k\times\left(k-1\right)}{2}
Symbols used:
- n_{comp}, number of comparisons (pairs)
- k, number of categories
Author
Made by P. Stikker
Companion website: https://PeterStatistics.com
YouTube channel: https://www.youtube.com/stikpet
Donations: https://www.patreon.com/bePatron?u=19398076Expand source code
def ph_pairwise_iso(catField, ordField, categories=None, levels=None, is_test="mann-whitney", **kwargs): ''' Post-Hoc Pairwise Independent-Samples Test for Ordinal Data ------------------------------------- The Mann-Whitney U, Mood Median and Fligner-Policello test are designed for two categories and an ordinal variable. These can therefor be used as a post-hoc test for a Kruskal-Wallis test (see ts_kruskal_wallis()). The test compares each possible pair of categories from the catField and their mean rank. The null hypothesis is that these are then equal. A simple Bonferroni adjustment is also made for the multiple testing. Other post-hoc tests that could be considered are Dunn, Nemenyi, Steel-Dwass, Conover-Iman. Parameters ---------- catField : pandas series data with categories ordField : pandas series data with the scores categories : list or dictionary, optional the categories to use from catField levels : list or dictionary, optional the levels or order used in ordField. is_test : {"mann-whitney", "mood", "fligner-policello}, optional the test to use **kwargs : various, optional other arguments to pass on for the specific test used. Returns ------- A dataframe with: * *category 1*, one of the two categories being compared * *category 2*, second of the two categories being compared * *statistic*, the test statistic * *p-value*, the p-value (significance) * *adj. p-value*, the Bonferroni adjusted p-value Notes ----- This function selects each possible pair of categories and then simply runs the requested test, using only those two categories. See ts_mann_whitney(), ts_mood_median() and/or ts_fligner_policello() for details of the calculations and the additional parameters that can be passed via *kwargs*. The Bonferroni adjustment is simply: $$p_{adj} = \\min \\left(p \\times n_{comp}, 1\\right)$$ $$n_{comp} = \\frac{k\\times\\left(k-1\\right)}{2}$$ *Symbols used:* * \\(n_{comp}\\), number of comparisons (pairs) * \\(k\\), number of categories Author ------ Made by P. Stikker Companion website: https://PeterStatistics.com YouTube channel: https://www.youtube.com/stikpet Donations: https://www.patreon.com/bePatron?u=19398076 ''' #create the cross table ct = tab_cross(ordField, catField, order1=levels, order2=categories, totals="include") #basic counts k = ct.shape[1]-1 ncomp = (k * (k - 1)) / 2 res = pd.DataFrame() selCats= pd.Series(dtype="object") resRow = 0 for i in range(0, k-1): for j in range(i + 1,k): res.at[resRow, 0] = ct.columns[i] res.at[resRow, 1] = ct.columns[j] selCats.at[0] = res.iloc[resRow, 0] selCats.at[1] = res.iloc[resRow, 1] if is_test=="mann-whitney": tstRes = ts_mann_whitney(catField, ordField, selCats, levels, **kwargs) res.at[resRow, 2] = tstRes.iloc[0, 3] res.at[resRow, 3] = tstRes.iloc[0, 4] res.at[resRow, 4] = res.iloc[resRow, 3] * ncomp if res.iloc[resRow, 4] > 1: res.at[resRow, 4] = 1 res.at[resRow, 5] = tstRes.iloc[0, 5] elif is_test=="mood": tstRes = ts_mood_median(catField, ordField, selCats, levels, **kwargs) if isinstance(tstRes, float): res.at[resRow, 2] = None res.at[resRow, 3] = None res.at[resRow, 4] = tstRes res.at[resRow, 5] = tstRes res.at[resRow, 6] = "Fisher exact" else: res.at[resRow, 2] = tstRes.iloc[0,3] res.at[resRow, 3] = tstRes.iloc[0,4] res.at[resRow, 4] = tstRes.iloc[0,5] res.at[resRow, 5] = tstRes.iloc[0,5] res.at[resRow, 6] = tstRes.iloc[0,8] res.at[resRow, 5] = res.iloc[resRow, 4] * ncomp if res.iloc[resRow, 5] > 1: res.at[resRow, 5] = 1 elif is_test=="fligner-policello": tstRes = ts_fligner_policello(catField, ordField, selCats, levels, **kwargs) res.at[resRow, 2] = tstRes.iloc[0, 1] res.at[resRow, 3] = tstRes.iloc[0, 2] res.at[resRow, 4] = res.iloc[resRow, 3] * ncomp if res.iloc[resRow, 4] > 1: res.at[resRow, 4] = 1 res.at[resRow, 5] = tstRes.iloc[0, 3] resRow = resRow + 1 if is_test=="mann-whitney": colNames = ["category 1","category 2","statistic","p-value","adj. p-value","test"] elif is_test=="mood": colNames = ["category 1","category 2","statistic", "df", "p-value","adj. p-value","test"] elif is_test=="fligner-policello": colNames = ["category 1","category 2","statistic","p-value","adj. p-value","test"] res.columns=colNames return res