Module stikpetP.other.poho_pairwise_is
Expand source code
import pandas as pd
from ..tests.test_student_t_is import ts_student_t_is
from ..tests.test_welch_t_is import ts_welch_t_is
from ..tests.test_trimmed_mean_is import ts_trimmed_mean_is
from ..tests.test_z_is import ts_z_is
def ph_pairwise_is(nomField, scaleField, categories=None, isTest = "student", trimProp = 0.1):
'''
Post-Hoc Pairwise Independent Samples Test
------------------------------------------
This function can perform various pairwise independent samples tests, for use after a one-way ANOVA, to determine which categories significantly differ from each other.
A simple Bonferroni correction is also applied.
The independent samples tests that can be used are:
* Student t, see ts_student_t_is() for details. An alternative version for this is available by using the ph_pairwise_t() function.
* Welch t, see ts_welch_t_is() for details
* Trimmed Mean / Yuen, see ts_trimmed_mean_is() for details
* Z, see ts_z_is() for details
Parameters
----------
nomField : pandas series
data with categories
scaleField : pandas series
data with the scores
categories : list or dictionary, optional
the categories to use from catField
isTest : {"student", "welch", "trimmed", "yuen", "z"}, optional
the independent samples test to use. Default is "student"
trimProp : float, optional
the trim proportion to use, if applicable. Default is 0.1.
Returns
-------
A data frame with:
* *category 1*, the first category in the pair
* *category 2*, the second category in the pair
* *n1*, sample size of first category
* *n2*, sample size of second category
* *mean 1*, arithmetic mean of scores in first category
* *mean 2*, arithmetic mean of scores in second category
* *sample diff.*, difference between the two arithmetic means
* *hyp diff.*, the hypothesized difference
* *statistic*, the test-statistic
* *df*, the degrees of freedom
* *p-value*, the unadjusted p-value (significance)
* *adj. p-value*, the Bonferroni adjusted p-values
* *test*, description of test used
Notes
-----
A simple Bonferroni correction is applied for the multiple comparisons. This is simply:
$$sig._{adj} = \\min \\left(sig. \\times n_{comp}, 1\\right)$$
With:
$$n_{comp} = \\frac{k\\times\\left(k-1\\right)}{2}
Where \\(k\\) is the number of categories.
Author
------
Made by P. Stikker
Companion website: https://PeterStatistics.com
YouTube channel: https://www.youtube.com/stikpet
Donations: https://www.patreon.com/bePatron?u=19398076
'''
if type(nomField) == list:
nomField = pd.Series(nomField)
if type(scaleField) == list:
scaleField = pd.Series(scaleField)
data = pd.concat([nomField, scaleField], axis=1)
data.columns = ["category", "score"]
#remove unused categories
if categories is not None:
data = data[data.category.isin(categories)]
#Remove rows with missing values and reset index
data = data.dropna()
data.reset_index()
cats = pd.unique(data["category"])
k = len(cats)
ncomp = k * (k - 1) / 2
res = pd.DataFrame()
resRow=0
for i in range(0, k-1):
for j in range(i+1, k):
res.at[resRow, 0] = cats[i]
res.at[resRow, 1] = cats[j]
sel2cat = [cats[i], cats[j]]
if isTest == "student":
isRes = ts_student_t_is(nomField, scaleField, sel2cat)
elif isTest == "welch":
isRes = ts_welch_t_is(nomField, scaleField, sel2cat)
elif isTest == "trimmed":
isRes = ts_trimmed_mean_is(nomField, scaleField, sel2cat, trimProp=trimProp, se="yuen-dixon")
elif isTest == "yuen":
isRes = ts_trimmed_mean_is(nomField, scaleField, sel2cat, trimProp=trimProp, se="yuen")
elif isTest == "z":
isRes = ts_z_is(nomField, scaleField, sel2cat)
res.at[resRow, 2] = isRes.iloc[0,0]
res.at[resRow, 3] = isRes.iloc[0,1]
res.at[resRow, 4] = isRes.iloc[0,2]
res.at[resRow, 5] = isRes.iloc[0,3]
res.at[resRow, 6] = isRes.iloc[0,4]
res.at[resRow, 7] = isRes.iloc[0,5]
res.at[resRow, 8] = isRes.iloc[0,6]
if isTest == "z":
res.at[resRow, 9] = None
res.at[resRow, 10] = isRes.iloc[0,7]
else:
res.at[resRow, 9] = isRes.iloc[0,7]
res.at[resRow, 10] = isRes.iloc[0,8]
res.at[resRow, 11] = res.iloc[resRow,10] * ncomp
if res.iloc[resRow,11] > 1:
res.iloc[resRow,11] = 1
if isTest == "z":
res.at[resRow, 12] = isRes.iloc[0,8]
else:
res.at[resRow, 12] = isRes.iloc[0,9]
resRow = resRow + 1
res.columns = ["category 1", "category 2", "n1", "n2", "mean 1", "mean 2", "sample diff.", "hyp diff.", "statistic", "df", "p-value", "adj. p-value", "test"]
return res
Functions
def ph_pairwise_is(nomField, scaleField, categories=None, isTest='student', trimProp=0.1)
-
Post-Hoc Pairwise Independent Samples Test
This function can perform various pairwise independent samples tests, for use after a one-way ANOVA, to determine which categories significantly differ from each other.
A simple Bonferroni correction is also applied.
The independent samples tests that can be used are:
- Student t, see ts_student_t_is() for details. An alternative version for this is available by using the ph_pairwise_t() function.
- Welch t, see ts_welch_t_is() for details
- Trimmed Mean / Yuen, see ts_trimmed_mean_is() for details
- Z, see ts_z_is() for details
Parameters
nomField
:pandas series
- data with categories
scaleField
:pandas series
- data with the scores
categories
:list
ordictionary
, optional- the categories to use from catField
isTest
:{"student", "welch", "trimmed", "yuen", "z"}
, optional- the independent samples test to use. Default is "student"
trimProp
:float
, optional- the trim proportion to use, if applicable. Default is 0.1.
Returns
A data frame with:
- category 1, the first category in the pair
- category 2, the second category in the pair
- n1, sample size of first category
- n2, sample size of second category
- mean 1, arithmetic mean of scores in first category
- mean 2, arithmetic mean of scores in second category
- sample diff., difference between the two arithmetic means
- hyp diff., the hypothesized difference
- statistic, the test-statistic
- df, the degrees of freedom
- p-value, the unadjusted p-value (significance)
- adj. p-value, the Bonferroni adjusted p-values
- test, description of test used
Notes
A simple Bonferroni correction is applied for the multiple comparisons. This is simply: sig._{adj} = \min \left(sig. \times n_{comp}, 1\right)
With: $$n_{comp} = \frac{k\times\left(k-1\right)}{2}
Where k is the number of categories.
Author
Made by P. Stikker
Companion website: https://PeterStatistics.com
YouTube channel: https://www.youtube.com/stikpet
Donations: https://www.patreon.com/bePatron?u=19398076Expand source code
def ph_pairwise_is(nomField, scaleField, categories=None, isTest = "student", trimProp = 0.1): ''' Post-Hoc Pairwise Independent Samples Test ------------------------------------------ This function can perform various pairwise independent samples tests, for use after a one-way ANOVA, to determine which categories significantly differ from each other. A simple Bonferroni correction is also applied. The independent samples tests that can be used are: * Student t, see ts_student_t_is() for details. An alternative version for this is available by using the ph_pairwise_t() function. * Welch t, see ts_welch_t_is() for details * Trimmed Mean / Yuen, see ts_trimmed_mean_is() for details * Z, see ts_z_is() for details Parameters ---------- nomField : pandas series data with categories scaleField : pandas series data with the scores categories : list or dictionary, optional the categories to use from catField isTest : {"student", "welch", "trimmed", "yuen", "z"}, optional the independent samples test to use. Default is "student" trimProp : float, optional the trim proportion to use, if applicable. Default is 0.1. Returns ------- A data frame with: * *category 1*, the first category in the pair * *category 2*, the second category in the pair * *n1*, sample size of first category * *n2*, sample size of second category * *mean 1*, arithmetic mean of scores in first category * *mean 2*, arithmetic mean of scores in second category * *sample diff.*, difference between the two arithmetic means * *hyp diff.*, the hypothesized difference * *statistic*, the test-statistic * *df*, the degrees of freedom * *p-value*, the unadjusted p-value (significance) * *adj. p-value*, the Bonferroni adjusted p-values * *test*, description of test used Notes ----- A simple Bonferroni correction is applied for the multiple comparisons. This is simply: $$sig._{adj} = \\min \\left(sig. \\times n_{comp}, 1\\right)$$ With: $$n_{comp} = \\frac{k\\times\\left(k-1\\right)}{2} Where \\(k\\) is the number of categories. Author ------ Made by P. Stikker Companion website: https://PeterStatistics.com YouTube channel: https://www.youtube.com/stikpet Donations: https://www.patreon.com/bePatron?u=19398076 ''' if type(nomField) == list: nomField = pd.Series(nomField) if type(scaleField) == list: scaleField = pd.Series(scaleField) data = pd.concat([nomField, scaleField], axis=1) data.columns = ["category", "score"] #remove unused categories if categories is not None: data = data[data.category.isin(categories)] #Remove rows with missing values and reset index data = data.dropna() data.reset_index() cats = pd.unique(data["category"]) k = len(cats) ncomp = k * (k - 1) / 2 res = pd.DataFrame() resRow=0 for i in range(0, k-1): for j in range(i+1, k): res.at[resRow, 0] = cats[i] res.at[resRow, 1] = cats[j] sel2cat = [cats[i], cats[j]] if isTest == "student": isRes = ts_student_t_is(nomField, scaleField, sel2cat) elif isTest == "welch": isRes = ts_welch_t_is(nomField, scaleField, sel2cat) elif isTest == "trimmed": isRes = ts_trimmed_mean_is(nomField, scaleField, sel2cat, trimProp=trimProp, se="yuen-dixon") elif isTest == "yuen": isRes = ts_trimmed_mean_is(nomField, scaleField, sel2cat, trimProp=trimProp, se="yuen") elif isTest == "z": isRes = ts_z_is(nomField, scaleField, sel2cat) res.at[resRow, 2] = isRes.iloc[0,0] res.at[resRow, 3] = isRes.iloc[0,1] res.at[resRow, 4] = isRes.iloc[0,2] res.at[resRow, 5] = isRes.iloc[0,3] res.at[resRow, 6] = isRes.iloc[0,4] res.at[resRow, 7] = isRes.iloc[0,5] res.at[resRow, 8] = isRes.iloc[0,6] if isTest == "z": res.at[resRow, 9] = None res.at[resRow, 10] = isRes.iloc[0,7] else: res.at[resRow, 9] = isRes.iloc[0,7] res.at[resRow, 10] = isRes.iloc[0,8] res.at[resRow, 11] = res.iloc[resRow,10] * ncomp if res.iloc[resRow,11] > 1: res.iloc[resRow,11] = 1 if isTest == "z": res.at[resRow, 12] = isRes.iloc[0,8] else: res.at[resRow, 12] = isRes.iloc[0,9] resRow = resRow + 1 res.columns = ["category 1", "category 2", "n1", "n2", "mean 1", "mean 2", "sample diff.", "hyp diff.", "statistic", "df", "p-value", "adj. p-value", "test"] return res