Module stikpetP.effect_sizes.eff_size_pairwise_bin_bin
Expand source code
import pandas as pd
from ..effect_sizes.eff_size_bin_bin import es_bin_bin
def es_pairwise_bin_bin(field1, field2, categories1=None, categories2=None, es="pearson", collapse=None):
'''
Pairwise Binary-Binary Effect Size
-------------------------------------
This determines the requested binary-binary effect size for every possible 2x2 table in a larger nxk table.
Parameters
----------
field1 : pandas series
data of the first field
field2 : pandas series
data of the second field
categories1 : list or dictionary, optional
the categories to use from field1
categories2 : list or dictionary, optional
the categories to use from field2
es : string, optional
any of possible effect sizes from es_bin_bin()
collapse : {None, "both", "field1", "field2"} : string, optional
when selecting a row or column compare to all other rows/columns, or all other individual rows/columns
Returns
-------
A dataframe with with the effect sizes for each possible 2x2 table.
Notes
-----
With the *collapse* parameter it is possible to choose how to create the 2x2 tables from a nxk table
* *None*, will choose every possible combination of 2 rows and 2 columns
* *field1*, will choose every possible pair of field2 categories and compare with one category from field1 and combine all other categories from field1.
* *field2*, will choose every possible pair of field1 categories and compare with one category from field2 and combine all other categories from field2.
Author
------
Made by P. Stikker
Companion website: https://PeterStatistics.com
YouTube channel: https://www.youtube.com/stikpet
Donations: https://www.patreon.com/bePatron?u=19398076
'''
if type(field1) is list:
field1 = pd.Series(field1)
if type(field2) is list:
field2 = pd.Series(field2)
res = pd.DataFrame()
if collapse in ["both", 'field1', 'field2']:
#combine as one dataframe
df = pd.concat([field1, field2], axis=1)
df = df.dropna()
#only use given categories
if categories1 is not None:
df = df[df.iloc[:, 0].isin(categories1)]
if categories2 is not None:
df = df[df.iloc[:, 1].isin(categories2)]
#get the unique categories
cats1 = list(set(df.iloc[:, 0]))
cats2 = list(set(df.iloc[:, 1]))
#number of categories
n_rows = len(cats1)
n_cols = len(cats2)
es_values=[]
for i in range(0, n_rows):
for j in range(0, n_cols):
if collapse=="both" or collapse=="field1":
df['cat1 collapsed'] = df.iloc[:, 0].apply(lambda x: 'other' if x != cats1[i] else x)
if collapse=="both" or collapse=="field2":
df['cat2 collapsed'] = df.iloc[:, 1].apply(lambda x: 'other' if x != cats2[j] else x)
if collapse=="field1":
for j2 in range(j+1, n_cols):
if j2 < n_cols:
selCats2 = [cats2[j], cats2[j2]]
es_value = es_bin_bin(df['cat1 collapsed'], df.iloc[:, 1], categories2=selCats2, method=es)
# Create a new DataFrame with the results
new_row = pd.DataFrame({
0: [cats1[i]],
1: ['not ' + cats1[i]],
2: [cats2[j]],
3: [cats2[j2]]
})
# Append the new row to the result DataFrame
res = pd.concat([res, new_row], ignore_index=True)
es_values.append(es_value)
elif collapse=="field2":
for i2 in range(i+1, n_rows):
if i2 < n_rows:
selCats1 = [cats1[i], cats1[i2]]
es_value = es_bin_bin(df.iloc[:, 0], df['cat2 collapsed'], categories1=selCats1, method=es)
# Create a new DataFrame with the results
new_row = pd.DataFrame({
0: [cats1[i]],
1: [cats1[i2]],
2: [cats2[j]],
3: ['not ' + cats2[j]]
})
# Append the new row to the result DataFrame
res = pd.concat([res, new_row], ignore_index=True)
es_values.append(es_value)
else:
es_value = es_bin_bin(df['cat1 collapsed'], df['cat2 collapsed'], method=es)
# Create a new DataFrame with the results
new_row = pd.DataFrame({
0: [cats1[i]],
1: ['not ' + cats1[i]],
2: [cats2[j]],
3: ['not ' + cats2[j]]
})
# Append the new row to the result DataFrame
res = pd.concat([res, new_row], ignore_index=True)
es_values.append(es_value)
else:
#create the cross table
ct = tab_cross(field1, field2, order1=categories1, order2=categories2, totals="exclude")
n_rows = ct.shape[0]
n_cols = ct.shape[1]
for cat1_1 in range(0, n_rows-1):
for cat1_2 in range(cat1_1+1, n_rows):
for cat2_1 in range(0, n_cols-1):
for cat2_2 in range(cat2_1+1, n_cols):
selCats1 = [ct.index[cat1_1], ct.index[cat1_2]]
selCats2 = [ct.columns[cat2_1], ct.columns[cat2_2]]
es_value = es_bin_bin(field1, field2, categories1=selCats1, categories2=selCats2, method=es)
# Create a new DataFrame with the results
new_row = pd.DataFrame({
0: [ct.index[cat1_1]],
1: [ct.index[cat1_2]],
2: [ct.columns[cat2_1]],
3: [ct.columns[cat2_2]]
})
# Append the new row to the result DataFrame
res = pd.concat([res, new_row], ignore_index=True)
es_values.append(es_value)
res.columns = ['field1 cat. 1', 'field1 cat. 1', 'field2 cat. 1', 'field2 cat 2'] + res.columns[4:].tolist()
res[es] = es_values
return res
Functions
def es_pairwise_bin_bin(field1, field2, categories1=None, categories2=None, es='pearson', collapse=None)-
Pairwise Binary-Binary Effect Size
This determines the requested binary-binary effect size for every possible 2x2 table in a larger nxk table.
Parameters
field1:pandas series- data of the first field
field2:pandas series- data of the second field
categories1:listordictionary, optional- the categories to use from field1
categories2:listordictionary, optional- the categories to use from field2
es:string, optional- any of possible effect sizes from es_bin_bin()
collapse:{None, "both", "field1", "field2"} : string, optional- when selecting a row or column compare to all other rows/columns, or all other individual rows/columns
Returns
A dataframe with with the effect sizes for each possible 2x2 table.
Notes
With the collapse parameter it is possible to choose how to create the 2x2 tables from a nxk table
- None, will choose every possible combination of 2 rows and 2 columns
- field1, will choose every possible pair of field2 categories and compare with one category from field1 and combine all other categories from field1.
- field2, will choose every possible pair of field1 categories and compare with one category from field2 and combine all other categories from field2.
Author
Made by P. Stikker
Companion website: https://PeterStatistics.com
YouTube channel: https://www.youtube.com/stikpet
Donations: https://www.patreon.com/bePatron?u=19398076Expand source code
def es_pairwise_bin_bin(field1, field2, categories1=None, categories2=None, es="pearson", collapse=None): ''' Pairwise Binary-Binary Effect Size ------------------------------------- This determines the requested binary-binary effect size for every possible 2x2 table in a larger nxk table. Parameters ---------- field1 : pandas series data of the first field field2 : pandas series data of the second field categories1 : list or dictionary, optional the categories to use from field1 categories2 : list or dictionary, optional the categories to use from field2 es : string, optional any of possible effect sizes from es_bin_bin() collapse : {None, "both", "field1", "field2"} : string, optional when selecting a row or column compare to all other rows/columns, or all other individual rows/columns Returns ------- A dataframe with with the effect sizes for each possible 2x2 table. Notes ----- With the *collapse* parameter it is possible to choose how to create the 2x2 tables from a nxk table * *None*, will choose every possible combination of 2 rows and 2 columns * *field1*, will choose every possible pair of field2 categories and compare with one category from field1 and combine all other categories from field1. * *field2*, will choose every possible pair of field1 categories and compare with one category from field2 and combine all other categories from field2. Author ------ Made by P. Stikker Companion website: https://PeterStatistics.com YouTube channel: https://www.youtube.com/stikpet Donations: https://www.patreon.com/bePatron?u=19398076 ''' if type(field1) is list: field1 = pd.Series(field1) if type(field2) is list: field2 = pd.Series(field2) res = pd.DataFrame() if collapse in ["both", 'field1', 'field2']: #combine as one dataframe df = pd.concat([field1, field2], axis=1) df = df.dropna() #only use given categories if categories1 is not None: df = df[df.iloc[:, 0].isin(categories1)] if categories2 is not None: df = df[df.iloc[:, 1].isin(categories2)] #get the unique categories cats1 = list(set(df.iloc[:, 0])) cats2 = list(set(df.iloc[:, 1])) #number of categories n_rows = len(cats1) n_cols = len(cats2) es_values=[] for i in range(0, n_rows): for j in range(0, n_cols): if collapse=="both" or collapse=="field1": df['cat1 collapsed'] = df.iloc[:, 0].apply(lambda x: 'other' if x != cats1[i] else x) if collapse=="both" or collapse=="field2": df['cat2 collapsed'] = df.iloc[:, 1].apply(lambda x: 'other' if x != cats2[j] else x) if collapse=="field1": for j2 in range(j+1, n_cols): if j2 < n_cols: selCats2 = [cats2[j], cats2[j2]] es_value = es_bin_bin(df['cat1 collapsed'], df.iloc[:, 1], categories2=selCats2, method=es) # Create a new DataFrame with the results new_row = pd.DataFrame({ 0: [cats1[i]], 1: ['not ' + cats1[i]], 2: [cats2[j]], 3: [cats2[j2]] }) # Append the new row to the result DataFrame res = pd.concat([res, new_row], ignore_index=True) es_values.append(es_value) elif collapse=="field2": for i2 in range(i+1, n_rows): if i2 < n_rows: selCats1 = [cats1[i], cats1[i2]] es_value = es_bin_bin(df.iloc[:, 0], df['cat2 collapsed'], categories1=selCats1, method=es) # Create a new DataFrame with the results new_row = pd.DataFrame({ 0: [cats1[i]], 1: [cats1[i2]], 2: [cats2[j]], 3: ['not ' + cats2[j]] }) # Append the new row to the result DataFrame res = pd.concat([res, new_row], ignore_index=True) es_values.append(es_value) else: es_value = es_bin_bin(df['cat1 collapsed'], df['cat2 collapsed'], method=es) # Create a new DataFrame with the results new_row = pd.DataFrame({ 0: [cats1[i]], 1: ['not ' + cats1[i]], 2: [cats2[j]], 3: ['not ' + cats2[j]] }) # Append the new row to the result DataFrame res = pd.concat([res, new_row], ignore_index=True) es_values.append(es_value) else: #create the cross table ct = tab_cross(field1, field2, order1=categories1, order2=categories2, totals="exclude") n_rows = ct.shape[0] n_cols = ct.shape[1] for cat1_1 in range(0, n_rows-1): for cat1_2 in range(cat1_1+1, n_rows): for cat2_1 in range(0, n_cols-1): for cat2_2 in range(cat2_1+1, n_cols): selCats1 = [ct.index[cat1_1], ct.index[cat1_2]] selCats2 = [ct.columns[cat2_1], ct.columns[cat2_2]] es_value = es_bin_bin(field1, field2, categories1=selCats1, categories2=selCats2, method=es) # Create a new DataFrame with the results new_row = pd.DataFrame({ 0: [ct.index[cat1_1]], 1: [ct.index[cat1_2]], 2: [ct.columns[cat2_1]], 3: [ct.columns[cat2_2]] }) # Append the new row to the result DataFrame res = pd.concat([res, new_row], ignore_index=True) es_values.append(es_value) res.columns = ['field1 cat. 1', 'field1 cat. 1', 'field2 cat. 1', 'field2 cat 2'] + res.columns[4:].tolist() res[es] = es_values return res