Module stikpetP.effect_sizes.eff_size_pairwise_bin_bin

Expand source code
import pandas as pd
from ..effect_sizes.eff_size_bin_bin import es_bin_bin

def es_pairwise_bin_bin(field1, field2, categories1=None, categories2=None, es="pearson", collapse=None):
    '''
    Pairwise Binary-Binary Effect Size
    -------------------------------------
    This determines the requested binary-binary effect size for every possible 2x2 table in a larger nxk table. 
            
    Parameters
    ----------
    field1 : pandas series
        data of the first field
    field2 : pandas series
        data of the second field
    categories1 : list or dictionary, optional
        the categories to use from field1
    categories2 : list or dictionary, optional
        the categories to use from field2
    es : string, optional
        any of possible effect sizes from es_bin_bin()
    collapse : {None, "both", "field1", "field2"} : string, optional
        when selecting a row or column compare to all other rows/columns, or all other individual rows/columns
        
    Returns
    -------
    A dataframe with with the effect sizes for each possible 2x2 table.
    
    Notes
    -----
    With the *collapse* parameter it is possible to choose how to create the 2x2 tables from a nxk table

    * *None*, will choose every possible combination of 2 rows and 2 columns
    * *field1*, will choose every possible pair of field2 categories and compare with one category from field1 and combine all other categories from field1.
    * *field2*, will choose every possible pair of field1 categories and compare with one category from field2 and combine all other categories from field2.
    
    Author
    ------
    Made by P. Stikker
    
    Companion website: https://PeterStatistics.com  
    YouTube channel: https://www.youtube.com/stikpet  
    Donations: https://www.patreon.com/bePatron?u=19398076
        
    '''
    if type(field1) is list:
            field1 = pd.Series(field1)
        
    if type(field2) is list:
        field2 = pd.Series(field2)

    res = pd.DataFrame() 
    if collapse in ["both", 'field1', 'field2']:
        #combine as one dataframe
        df = pd.concat([field1, field2], axis=1)
        df = df.dropna()
        
        #only use given categories
        if categories1 is not None:
            df = df[df.iloc[:, 0].isin(categories1)]
        if categories2 is not None:
            df = df[df.iloc[:, 1].isin(categories2)]

        #get the unique categories
        cats1 = list(set(df.iloc[:, 0]))
        cats2 = list(set(df.iloc[:, 1]))
        #number of categories
        n_rows = len(cats1)
        n_cols = len(cats2)

        es_values=[]
        for i in range(0, n_rows):
            for j in range(0, n_cols):
                if collapse=="both" or collapse=="field1":
                    df['cat1 collapsed'] = df.iloc[:, 0].apply(lambda x: 'other' if x != cats1[i] else x)
                if collapse=="both" or collapse=="field2":
                    df['cat2 collapsed'] = df.iloc[:, 1].apply(lambda x: 'other' if x != cats2[j] else x)

                if collapse=="field1":
                    for j2 in range(j+1, n_cols):
                        if j2 < n_cols:
                            selCats2 = [cats2[j], cats2[j2]]
                            es_value = es_bin_bin(df['cat1 collapsed'], df.iloc[:, 1], categories2=selCats2, method=es)
                            # Create a new DataFrame with the results
                            new_row = pd.DataFrame({
                                0: [cats1[i]],
                                1: ['not ' + cats1[i]],
                                2: [cats2[j]],
                                3: [cats2[j2]]
                            })
                            
                            # Append the new row to the result DataFrame
                            res = pd.concat([res, new_row], ignore_index=True)
                            es_values.append(es_value)

                elif collapse=="field2":
                    for i2 in range(i+1, n_rows):
                        if i2 < n_rows:
                            selCats1 = [cats1[i], cats1[i2]]
                            es_value = es_bin_bin(df.iloc[:, 0], df['cat2 collapsed'], categories1=selCats1, method=es)
                            
                            # Create a new DataFrame with the results
                            new_row = pd.DataFrame({
                                0: [cats1[i]],
                                1: [cats1[i2]],
                                2: [cats2[j]],
                                3: ['not ' + cats2[j]]
                            })
                            
                            # Append the new row to the result DataFrame
                            res = pd.concat([res, new_row], ignore_index=True)
                            es_values.append(es_value)
                
                else:
                    es_value = es_bin_bin(df['cat1 collapsed'], df['cat2 collapsed'], method=es)
        
                    # Create a new DataFrame with the results
                    new_row = pd.DataFrame({
                        0: [cats1[i]],
                        1: ['not ' + cats1[i]],
                        2: [cats2[j]],
                        3: ['not ' + cats2[j]]
                    })
                    
                    # Append the new row to the result DataFrame
                    res = pd.concat([res, new_row], ignore_index=True)
                    es_values.append(es_value)
    else:
           
        #create the cross table   
        ct = tab_cross(field1, field2, order1=categories1, order2=categories2, totals="exclude")
    
        n_rows = ct.shape[0]
        n_cols = ct.shape[1]
                
        for cat1_1 in range(0, n_rows-1):
            for cat1_2 in range(cat1_1+1, n_rows):
                for cat2_1 in range(0, n_cols-1):
                    for cat2_2 in range(cat2_1+1, n_cols):
                        selCats1 = [ct.index[cat1_1], ct.index[cat1_2]]
                        selCats2 = [ct.columns[cat2_1], ct.columns[cat2_2]]
    
                        es_value = es_bin_bin(field1, field2, categories1=selCats1, categories2=selCats2, method=es)
                            
                        # Create a new DataFrame with the results
                        new_row = pd.DataFrame({
                            0: [ct.index[cat1_1]],
                            1: [ct.index[cat1_2]],
                            2: [ct.columns[cat2_1]],
                            3: [ct.columns[cat2_2]]
                        })
                        
                        # Append the new row to the result DataFrame
                        res = pd.concat([res, new_row], ignore_index=True)
                        es_values.append(es_value)
                    
    res.columns = ['field1 cat. 1', 'field1 cat. 1', 'field2 cat. 1', 'field2 cat 2'] + res.columns[4:].tolist()
    res[es] = es_values
    return res

Functions

def es_pairwise_bin_bin(field1, field2, categories1=None, categories2=None, es='pearson', collapse=None)

Pairwise Binary-Binary Effect Size

This determines the requested binary-binary effect size for every possible 2x2 table in a larger nxk table.

Parameters

field1 : pandas series
data of the first field
field2 : pandas series
data of the second field
categories1 : list or dictionary, optional
the categories to use from field1
categories2 : list or dictionary, optional
the categories to use from field2
es : string, optional
any of possible effect sizes from es_bin_bin()
collapse : {None, "both", "field1", "field2"} : string, optional
when selecting a row or column compare to all other rows/columns, or all other individual rows/columns

Returns

A dataframe with with the effect sizes for each possible 2x2 table.

Notes

With the collapse parameter it is possible to choose how to create the 2x2 tables from a nxk table

  • None, will choose every possible combination of 2 rows and 2 columns
  • field1, will choose every possible pair of field2 categories and compare with one category from field1 and combine all other categories from field1.
  • field2, will choose every possible pair of field1 categories and compare with one category from field2 and combine all other categories from field2.

Author

Made by P. Stikker

Companion website: https://PeterStatistics.com
YouTube channel: https://www.youtube.com/stikpet
Donations: https://www.patreon.com/bePatron?u=19398076

Expand source code
def es_pairwise_bin_bin(field1, field2, categories1=None, categories2=None, es="pearson", collapse=None):
    '''
    Pairwise Binary-Binary Effect Size
    -------------------------------------
    This determines the requested binary-binary effect size for every possible 2x2 table in a larger nxk table. 
            
    Parameters
    ----------
    field1 : pandas series
        data of the first field
    field2 : pandas series
        data of the second field
    categories1 : list or dictionary, optional
        the categories to use from field1
    categories2 : list or dictionary, optional
        the categories to use from field2
    es : string, optional
        any of possible effect sizes from es_bin_bin()
    collapse : {None, "both", "field1", "field2"} : string, optional
        when selecting a row or column compare to all other rows/columns, or all other individual rows/columns
        
    Returns
    -------
    A dataframe with with the effect sizes for each possible 2x2 table.
    
    Notes
    -----
    With the *collapse* parameter it is possible to choose how to create the 2x2 tables from a nxk table

    * *None*, will choose every possible combination of 2 rows and 2 columns
    * *field1*, will choose every possible pair of field2 categories and compare with one category from field1 and combine all other categories from field1.
    * *field2*, will choose every possible pair of field1 categories and compare with one category from field2 and combine all other categories from field2.
    
    Author
    ------
    Made by P. Stikker
    
    Companion website: https://PeterStatistics.com  
    YouTube channel: https://www.youtube.com/stikpet  
    Donations: https://www.patreon.com/bePatron?u=19398076
        
    '''
    if type(field1) is list:
            field1 = pd.Series(field1)
        
    if type(field2) is list:
        field2 = pd.Series(field2)

    res = pd.DataFrame() 
    if collapse in ["both", 'field1', 'field2']:
        #combine as one dataframe
        df = pd.concat([field1, field2], axis=1)
        df = df.dropna()
        
        #only use given categories
        if categories1 is not None:
            df = df[df.iloc[:, 0].isin(categories1)]
        if categories2 is not None:
            df = df[df.iloc[:, 1].isin(categories2)]

        #get the unique categories
        cats1 = list(set(df.iloc[:, 0]))
        cats2 = list(set(df.iloc[:, 1]))
        #number of categories
        n_rows = len(cats1)
        n_cols = len(cats2)

        es_values=[]
        for i in range(0, n_rows):
            for j in range(0, n_cols):
                if collapse=="both" or collapse=="field1":
                    df['cat1 collapsed'] = df.iloc[:, 0].apply(lambda x: 'other' if x != cats1[i] else x)
                if collapse=="both" or collapse=="field2":
                    df['cat2 collapsed'] = df.iloc[:, 1].apply(lambda x: 'other' if x != cats2[j] else x)

                if collapse=="field1":
                    for j2 in range(j+1, n_cols):
                        if j2 < n_cols:
                            selCats2 = [cats2[j], cats2[j2]]
                            es_value = es_bin_bin(df['cat1 collapsed'], df.iloc[:, 1], categories2=selCats2, method=es)
                            # Create a new DataFrame with the results
                            new_row = pd.DataFrame({
                                0: [cats1[i]],
                                1: ['not ' + cats1[i]],
                                2: [cats2[j]],
                                3: [cats2[j2]]
                            })
                            
                            # Append the new row to the result DataFrame
                            res = pd.concat([res, new_row], ignore_index=True)
                            es_values.append(es_value)

                elif collapse=="field2":
                    for i2 in range(i+1, n_rows):
                        if i2 < n_rows:
                            selCats1 = [cats1[i], cats1[i2]]
                            es_value = es_bin_bin(df.iloc[:, 0], df['cat2 collapsed'], categories1=selCats1, method=es)
                            
                            # Create a new DataFrame with the results
                            new_row = pd.DataFrame({
                                0: [cats1[i]],
                                1: [cats1[i2]],
                                2: [cats2[j]],
                                3: ['not ' + cats2[j]]
                            })
                            
                            # Append the new row to the result DataFrame
                            res = pd.concat([res, new_row], ignore_index=True)
                            es_values.append(es_value)
                
                else:
                    es_value = es_bin_bin(df['cat1 collapsed'], df['cat2 collapsed'], method=es)
        
                    # Create a new DataFrame with the results
                    new_row = pd.DataFrame({
                        0: [cats1[i]],
                        1: ['not ' + cats1[i]],
                        2: [cats2[j]],
                        3: ['not ' + cats2[j]]
                    })
                    
                    # Append the new row to the result DataFrame
                    res = pd.concat([res, new_row], ignore_index=True)
                    es_values.append(es_value)
    else:
           
        #create the cross table   
        ct = tab_cross(field1, field2, order1=categories1, order2=categories2, totals="exclude")
    
        n_rows = ct.shape[0]
        n_cols = ct.shape[1]
                
        for cat1_1 in range(0, n_rows-1):
            for cat1_2 in range(cat1_1+1, n_rows):
                for cat2_1 in range(0, n_cols-1):
                    for cat2_2 in range(cat2_1+1, n_cols):
                        selCats1 = [ct.index[cat1_1], ct.index[cat1_2]]
                        selCats2 = [ct.columns[cat2_1], ct.columns[cat2_2]]
    
                        es_value = es_bin_bin(field1, field2, categories1=selCats1, categories2=selCats2, method=es)
                            
                        # Create a new DataFrame with the results
                        new_row = pd.DataFrame({
                            0: [ct.index[cat1_1]],
                            1: [ct.index[cat1_2]],
                            2: [ct.columns[cat2_1]],
                            3: [ct.columns[cat2_2]]
                        })
                        
                        # Append the new row to the result DataFrame
                        res = pd.concat([res, new_row], ignore_index=True)
                        es_values.append(es_value)
                    
    res.columns = ['field1 cat. 1', 'field1 cat. 1', 'field2 cat. 1', 'field2 cat 2'] + res.columns[4:].tolist()
    res[es] = es_values
    return res