Module `stikpetP.effect_sizes.eff_size_pairwise_bin_bin`

Expand source code

import pandas as pd
from ..effect_sizes.eff_size_bin_bin import es_bin_bin

def es_pairwise_bin_bin(field1, field2, categories1=None, categories2=None, es="pearson", collapse=None):
    '''
    Pairwise Binary-Binary Effect Size
    -------------------------------------
    This determines the requested binary-binary effect size for every possible 2x2 table in a larger nxk table. 
            
    Parameters
    ----------
    field1 : pandas series
        data of the first field
    field2 : pandas series
        data of the second field
    categories1 : list or dictionary, optional
        the categories to use from field1
    categories2 : list or dictionary, optional
        the categories to use from field2
    es : string, optional
        any of possible effect sizes from es_bin_bin()
    collapse : {None, "both", "field1", "field2"} : string, optional
        when selecting a row or column compare to all other rows/columns, or all other individual rows/columns
        
    Returns
    -------
    A dataframe with with the effect sizes for each possible 2x2 table.
    
    Notes
    -----
    With the *collapse* parameter it is possible to choose how to create the 2x2 tables from a nxk table

    * *None*, will choose every possible combination of 2 rows and 2 columns
    * *field1*, will choose every possible pair of field2 categories and compare with one category from field1 and combine all other categories from field1.
    * *field2*, will choose every possible pair of field1 categories and compare with one category from field2 and combine all other categories from field2.
    
    Author
    ------
    Made by P. Stikker
    
    Companion website: https://PeterStatistics.com  
    YouTube channel: https://www.youtube.com/stikpet  
    Donations: https://www.patreon.com/bePatron?u=19398076
        
    '''
    if type(field1) is list:
            field1 = pd.Series(field1)
        
    if type(field2) is list:
        field2 = pd.Series(field2)

    res = pd.DataFrame() 
    if collapse in ["both", 'field1', 'field2']:
        #combine as one dataframe
        df = pd.concat([field1, field2], axis=1)
        df = df.dropna()
        
        #only use given categories
        if categories1 is not None:
            df = df[df.iloc[:, 0].isin(categories1)]
        if categories2 is not None:
            df = df[df.iloc[:, 1].isin(categories2)]

        #get the unique categories
        cats1 = list(set(df.iloc[:, 0]))
        cats2 = list(set(df.iloc[:, 1]))
        #number of categories
        n_rows = len(cats1)
        n_cols = len(cats2)

        es_values=[]
        for i in range(0, n_rows):
            for j in range(0, n_cols):
                if collapse=="both" or collapse=="field1":
                    df['cat1 collapsed'] = df.iloc[:, 0].apply(lambda x: 'other' if x != cats1[i] else x)
                if collapse=="both" or collapse=="field2":
                    df['cat2 collapsed'] = df.iloc[:, 1].apply(lambda x: 'other' if x != cats2[j] else x)

                if collapse=="field1":
                    for j2 in range(j+1, n_cols):
                        if j2 < n_cols:
                            selCats2 = [cats2[j], cats2[j2]]
                            es_value = es_bin_bin(df['cat1 collapsed'], df.iloc[:, 1], categories2=selCats2, method=es)
                            # Create a new DataFrame with the results
                            new_row = pd.DataFrame({
                                0: [cats1[i]],
                                1: ['not ' + cats1[i]],
                                2: [cats2[j]],
                                3: [cats2[j2]]
                            })
                            
                            # Append the new row to the result DataFrame
                            res = pd.concat([res, new_row], ignore_index=True)
                            es_values.append(es_value)

                elif collapse=="field2":
                    for i2 in range(i+1, n_rows):
                        if i2 < n_rows:
                            selCats1 = [cats1[i], cats1[i2]]
                            es_value = es_bin_bin(df.iloc[:, 0], df['cat2 collapsed'], categories1=selCats1, method=es)
                            
                            # Create a new DataFrame with the results
                            new_row = pd.DataFrame({
                                0: [cats1[i]],
                                1: [cats1[i2]],
                                2: [cats2[j]],
                                3: ['not ' + cats2[j]]
                            })
                            
                            # Append the new row to the result DataFrame
                            res = pd.concat([res, new_row], ignore_index=True)
                            es_values.append(es_value)
                
                else:
                    es_value = es_bin_bin(df['cat1 collapsed'], df['cat2 collapsed'], method=es)
        
                    # Create a new DataFrame with the results
                    new_row = pd.DataFrame({
                        0: [cats1[i]],
                        1: ['not ' + cats1[i]],
                        2: [cats2[j]],
                        3: ['not ' + cats2[j]]
                    })
                    
                    # Append the new row to the result DataFrame
                    res = pd.concat([res, new_row], ignore_index=True)
                    es_values.append(es_value)
    else:
           
        #create the cross table   
        ct = tab_cross(field1, field2, order1=categories1, order2=categories2, totals="exclude")
    
        n_rows = ct.shape[0]
        n_cols = ct.shape[1]
                
        for cat1_1 in range(0, n_rows-1):
            for cat1_2 in range(cat1_1+1, n_rows):
                for cat2_1 in range(0, n_cols-1):
                    for cat2_2 in range(cat2_1+1, n_cols):
                        selCats1 = [ct.index[cat1_1], ct.index[cat1_2]]
                        selCats2 = [ct.columns[cat2_1], ct.columns[cat2_2]]
    
                        es_value = es_bin_bin(field1, field2, categories1=selCats1, categories2=selCats2, method=es)
                            
                        # Create a new DataFrame with the results
                        new_row = pd.DataFrame({
                            0: [ct.index[cat1_1]],
                            1: [ct.index[cat1_2]],
                            2: [ct.columns[cat2_1]],
                            3: [ct.columns[cat2_2]]
                        })
                        
                        # Append the new row to the result DataFrame
                        res = pd.concat([res, new_row], ignore_index=True)
                        es_values.append(es_value)
                    
    res.columns = ['field1 cat. 1', 'field1 cat. 1', 'field2 cat. 1', 'field2 cat 2'] + res.columns[4:].tolist()
    res[es] = es_values
    return res

Functions

def es_pairwise_bin_bin(field1, field2, categories1=None, categories2=None, es='pearson', collapse=None)

Pairwise Binary-Binary Effect Size

This determines the requested binary-binary effect size for every possible 2x2 table in a larger nxk table.

Parameters

field1 : pandas series: data of the first field
field2 : pandas series: data of the second field
categories1 : list or dictionary, optional: the categories to use from field1
categories2 : list or dictionary, optional: the categories to use from field2
es : string, optional: any of possible effect sizes from es_bin_bin()
collapse : {None, "both", "field1", "field2"} : string, optional: when selecting a row or column compare to all other rows/columns, or all other individual rows/columns

Returns

A dataframe with with the effect sizes for each possible 2x2 table.

Notes

With the collapse parameter it is possible to choose how to create the 2x2 tables from a nxk table

None, will choose every possible combination of 2 rows and 2 columns
field1, will choose every possible pair of field2 categories and compare with one category from field1 and combine all other categories from field1.
field2, will choose every possible pair of field1 categories and compare with one category from field2 and combine all other categories from field2.

Author

Made by P. Stikker

Companion website: https://PeterStatistics.com
YouTube channel: https://www.youtube.com/stikpet
Donations: https://www.patreon.com/bePatron?u=19398076

Expand source code

def es_pairwise_bin_bin(field1, field2, categories1=None, categories2=None, es="pearson", collapse=None):
    '''
    Pairwise Binary-Binary Effect Size
    -------------------------------------
    This determines the requested binary-binary effect size for every possible 2x2 table in a larger nxk table. 
            
    Parameters
    ----------
    field1 : pandas series
        data of the first field
    field2 : pandas series
        data of the second field
    categories1 : list or dictionary, optional
        the categories to use from field1
    categories2 : list or dictionary, optional
        the categories to use from field2
    es : string, optional
        any of possible effect sizes from es_bin_bin()
    collapse : {None, "both", "field1", "field2"} : string, optional
        when selecting a row or column compare to all other rows/columns, or all other individual rows/columns
        
    Returns
    -------
    A dataframe with with the effect sizes for each possible 2x2 table.
    
    Notes
    -----
    With the *collapse* parameter it is possible to choose how to create the 2x2 tables from a nxk table

    * *None*, will choose every possible combination of 2 rows and 2 columns
    * *field1*, will choose every possible pair of field2 categories and compare with one category from field1 and combine all other categories from field1.
    * *field2*, will choose every possible pair of field1 categories and compare with one category from field2 and combine all other categories from field2.
    
    Author
    ------
    Made by P. Stikker
    
    Companion website: https://PeterStatistics.com  
    YouTube channel: https://www.youtube.com/stikpet  
    Donations: https://www.patreon.com/bePatron?u=19398076
        
    '''
    if type(field1) is list:
            field1 = pd.Series(field1)
        
    if type(field2) is list:
        field2 = pd.Series(field2)

    res = pd.DataFrame() 
    if collapse in ["both", 'field1', 'field2']:
        #combine as one dataframe
        df = pd.concat([field1, field2], axis=1)
        df = df.dropna()
        
        #only use given categories
        if categories1 is not None:
            df = df[df.iloc[:, 0].isin(categories1)]
        if categories2 is not None:
            df = df[df.iloc[:, 1].isin(categories2)]

        #get the unique categories
        cats1 = list(set(df.iloc[:, 0]))
        cats2 = list(set(df.iloc[:, 1]))
        #number of categories
        n_rows = len(cats1)
        n_cols = len(cats2)

        es_values=[]
        for i in range(0, n_rows):
            for j in range(0, n_cols):
                if collapse=="both" or collapse=="field1":
                    df['cat1 collapsed'] = df.iloc[:, 0].apply(lambda x: 'other' if x != cats1[i] else x)
                if collapse=="both" or collapse=="field2":
                    df['cat2 collapsed'] = df.iloc[:, 1].apply(lambda x: 'other' if x != cats2[j] else x)

                if collapse=="field1":
                    for j2 in range(j+1, n_cols):
                        if j2 < n_cols:
                            selCats2 = [cats2[j], cats2[j2]]
                            es_value = es_bin_bin(df['cat1 collapsed'], df.iloc[:, 1], categories2=selCats2, method=es)
                            # Create a new DataFrame with the results
                            new_row = pd.DataFrame({
                                0: [cats1[i]],
                                1: ['not ' + cats1[i]],
                                2: [cats2[j]],
                                3: [cats2[j2]]
                            })
                            
                            # Append the new row to the result DataFrame
                            res = pd.concat([res, new_row], ignore_index=True)
                            es_values.append(es_value)

                elif collapse=="field2":
                    for i2 in range(i+1, n_rows):
                        if i2 < n_rows:
                            selCats1 = [cats1[i], cats1[i2]]
                            es_value = es_bin_bin(df.iloc[:, 0], df['cat2 collapsed'], categories1=selCats1, method=es)
                            
                            # Create a new DataFrame with the results
                            new_row = pd.DataFrame({
                                0: [cats1[i]],
                                1: [cats1[i2]],
                                2: [cats2[j]],
                                3: ['not ' + cats2[j]]
                            })
                            
                            # Append the new row to the result DataFrame
                            res = pd.concat([res, new_row], ignore_index=True)
                            es_values.append(es_value)
                
                else:
                    es_value = es_bin_bin(df['cat1 collapsed'], df['cat2 collapsed'], method=es)
        
                    # Create a new DataFrame with the results
                    new_row = pd.DataFrame({
                        0: [cats1[i]],
                        1: ['not ' + cats1[i]],
                        2: [cats2[j]],
                        3: ['not ' + cats2[j]]
                    })
                    
                    # Append the new row to the result DataFrame
                    res = pd.concat([res, new_row], ignore_index=True)
                    es_values.append(es_value)
    else:
           
        #create the cross table   
        ct = tab_cross(field1, field2, order1=categories1, order2=categories2, totals="exclude")
    
        n_rows = ct.shape[0]
        n_cols = ct.shape[1]
                
        for cat1_1 in range(0, n_rows-1):
            for cat1_2 in range(cat1_1+1, n_rows):
                for cat2_1 in range(0, n_cols-1):
                    for cat2_2 in range(cat2_1+1, n_cols):
                        selCats1 = [ct.index[cat1_1], ct.index[cat1_2]]
                        selCats2 = [ct.columns[cat2_1], ct.columns[cat2_2]]
    
                        es_value = es_bin_bin(field1, field2, categories1=selCats1, categories2=selCats2, method=es)
                            
                        # Create a new DataFrame with the results
                        new_row = pd.DataFrame({
                            0: [ct.index[cat1_1]],
                            1: [ct.index[cat1_2]],
                            2: [ct.columns[cat2_1]],
                            3: [ct.columns[cat2_2]]
                        })
                        
                        # Append the new row to the result DataFrame
                        res = pd.concat([res, new_row], ignore_index=True)
                        es_values.append(es_value)
                    
    res.columns = ['field1 cat. 1', 'field1 cat. 1', 'field2 cat. 1', 'field2 cat 2'] + res.columns[4:].tolist()
    res[es] = es_values
    return res