Module stikpetP.effect_sizes.eff_size_cohen_g

Expand source code
import pandas as pd

def es_cohen_g(data, p0Cat=None, codes=None):
    '''
    Cohen's g
    ---------
     
    Cohen’s g (Cohen, 1988) is an effect size measure that could be accompanying a one-sample binomial (see Rosnow & Rosenthal, 2003), score or Wald test. It is simply the difference of the sample proportion with 0.5. 
    
    A video explanation of Cohen g can be found at https://youtu.be/tPZMvB8QrM0. This function is shown in this [YouTube video](https://youtu.be/UqpkM8LIo-M) and the effect size is also described at [PeterStatistics.com](https://peterstatistics.com/Terms/EffectSizes/CohenG.html)
    
    Parameters
    ----------
    data : list or pandas data series 
        the data
    p0Cat : optional
        the category for which p0=0.5 was used
    codes : list, optional 
        list with the two codes to use
        
    Returns
    -------
    pandas.DataFrame
        A dataframe with the following columns:
    
        - *g for cat. 1* : Cohen g value for category 1
        - *g for cat. 2* : Cohen g value for category 2
   
    Notes
    -----
    To decide on which category is associated with p0 (fixed at 0.5) the following is used:
    * If codes are provided, the first code is assumed to be the category for the p0.
    * If p0Cat is specified that will be used for p0 and all other categories will be considered as category 2, this means if there are more than two categories the remaining two or more (besides p0Cat) will be merged as one large category.
    * If neither codes or p0Cat is specified and more than two categories are in the data a warning is printed and no results.
    * If neither codes or p0Cat is specified and there are two categories, p0 is assumed to be for the first category found
    
    The formula used is (Cohen, 1988, p. 147):
    $$g=p-0.5$$
    
    *Symbols used*:
    
    * $p$ is the sample proportion
    
    *Classification*
    
    Use the **th_cohen_g()** function for a classification of the value.
    
    Before, After and Alternatives
    ------------------------------
    Before this effect size you might first want to perform a test:
    * [ts_binomial_os](../tests/test_binomial_os.html#ts_binomial_os) for a One-Sample Binomial Test
    * [ts_score_os](../tests/test_score_os.html#ts_score_os) for One-Sample Score Test
    * [ts_wald_os](../tests/test_wald_os.html#ts_wald_os) for One-Sample Wald Test

    After this, you might want a rule-of-thumb:
    * [th_cohen_g](../other.thumb/cohen_g.html#th_cohen_g) for rules-of-thumb for Cohen g

    Alternatives could be:
    * [es_cohen_h_os](../effect_sizes/eff_size_cohen_h_os.html#es_cohen_h_os) for Cohen h'
    * [es_alt_ratio](../effect_sizes/eff_size_alt_ratio.html#es_alt_ratio) for Alternative Ratio
    * [r_rosenthal](../correlations/cor_rosenthal.html#r_rosenthal) for Rosenthal Correlation if a z-value is available
    
    References
    ----------
    Cohen, J. (1988). *Statistical power analysis for the behavioral sciences* (2nd ed.). L. Erlbaum Associates.
    
    Author
    ------
    Made by P. Stikker
    
    Companion website: https://PeterStatistics.com  
    YouTube channel: https://www.youtube.com/stikpet  
    Donations: https://www.patreon.com/bePatron?u=19398076
    
    Examples
    --------
    Example 1: Numeric list
    >>> ex1 = [1, 1, 2, 1, 2, 1, 2, 1]
    >>> es_cohen_g(ex1)
       g for 1  g for 2
    0    0.125   -0.125
    
    Example 2: pandas Series
    >>> import pandas as pd
    >>> df1 = pd.read_csv('https://peterstatistics.com/Packages/ExampleData/GSS2012a.csv', sep=',', low_memory=False, storage_options={'User-Agent': 'Mozilla/5.0'})
    >>> es_cohen_g(df1['sex'])
       g for FEMALE  g for MALE
    0      0.051165   -0.051165
    >>> es_cohen_g(df1['mar1'], codes=["DIVORCED", "NEVER MARRIED"])
       g for DIVORCED  g for NEVER MARRIED
    0       -0.057123             0.057123
    
    '''
    
    if type(data) is list:
        data = pd.Series(data)

    #remove missing values
    data = data.dropna()
    
    if codes is None:
        #create a frequency table
        freq = data.value_counts()

        if p0Cat is None:
            #check if there were exactly two categories or not
            if len(freq) != 2:
                # unable to determine which category p0 would belong to, so print warning and end
                print("WARNING: data does not have two unique categories, please specify two categories using codes parameter")
                return
            else:
                #simply select the two categories as cat1 and cat2
                n1 = freq.values[0]
                n2 = freq.values[1]
                n = n1 + n2
                #assume p0 was for first category
                cat1_lbl = freq.index[0]
                cat2_lbl = freq.index[1]
                
        else:
            n = sum(freq.values)
            n1 = sum(data==p0Cat)
            n2 = n - n1
            cat1_lbl = p0Cat
            if len(freq.values) == 2:
                if freq.index[0] == p0Cat:
                    cat2_lbl = freq.index[1]
                else:
                    cat2_lbl = freq.index[0]
            else:
                cat2_lbl = "all other"
    else:        
        n1 = sum(data==codes[0])
        n2 = sum(data==codes[1])
        n = n1 + n2
        cat1_lbl = codes[0]
        cat2_lbl = codes[1]

    p1 = n1/n
    p2 = 1 - p1
    g1 = p1 - 0.5
    g2 = p2 - 0.5

    results = pd.DataFrame([[g1, g2]], columns=['g for ' + str(cat1_lbl), 'g for ' + str(cat2_lbl)])
    
    return (results)

Functions

def es_cohen_g(data, p0Cat=None, codes=None)

Cohen's g

Cohen’s g (Cohen, 1988) is an effect size measure that could be accompanying a one-sample binomial (see Rosnow & Rosenthal, 2003), score or Wald test. It is simply the difference of the sample proportion with 0.5.

A video explanation of Cohen g can be found at https://youtu.be/tPZMvB8QrM0. This function is shown in this YouTube video and the effect size is also described at PeterStatistics.com

Parameters

data : list or pandas data series
the data
p0Cat : optional
the category for which p0=0.5 was used
codes : list, optional
list with the two codes to use

Returns

pandas.DataFrame

A dataframe with the following columns:

  • g for cat. 1 : Cohen g value for category 1
  • g for cat. 2 : Cohen g value for category 2

Notes

To decide on which category is associated with p0 (fixed at 0.5) the following is used: * If codes are provided, the first code is assumed to be the category for the p0. * If p0Cat is specified that will be used for p0 and all other categories will be considered as category 2, this means if there are more than two categories the remaining two or more (besides p0Cat) will be merged as one large category. * If neither codes or p0Cat is specified and more than two categories are in the data a warning is printed and no results. * If neither codes or p0Cat is specified and there are two categories, p0 is assumed to be for the first category found

The formula used is (Cohen, 1988, p. 147): g=p-0.5

Symbols used:

  • $p$ is the sample proportion

Classification

Use the th_cohen_g() function for a classification of the value.

Before, After and Alternatives

Before this effect size you might first want to perform a test: * ts_binomial_os for a One-Sample Binomial Test * ts_score_os for One-Sample Score Test * ts_wald_os for One-Sample Wald Test

After this, you might want a rule-of-thumb: * th_cohen_g for rules-of-thumb for Cohen g

Alternatives could be: * es_cohen_h_os for Cohen h' * es_alt_ratio for Alternative Ratio * r_rosenthal for Rosenthal Correlation if a z-value is available

References

Cohen, J. (1988). Statistical power analysis for the behavioral sciences (2nd ed.). L. Erlbaum Associates.

Author

Made by P. Stikker

Companion website: https://PeterStatistics.com
YouTube channel: https://www.youtube.com/stikpet
Donations: https://www.patreon.com/bePatron?u=19398076

Examples

Example 1: Numeric list

>>> ex1 = [1, 1, 2, 1, 2, 1, 2, 1]
>>> es_cohen_g(ex1)
   g for 1  g for 2
0    0.125   -0.125

Example 2: pandas Series

>>> import pandas as pd
>>> df1 = pd.read_csv('https://peterstatistics.com/Packages/ExampleData/GSS2012a.csv', sep=',', low_memory=False, storage_options={'User-Agent': 'Mozilla/5.0'})
>>> es_cohen_g(df1['sex'])
   g for FEMALE  g for MALE
0      0.051165   -0.051165
>>> es_cohen_g(df1['mar1'], codes=["DIVORCED", "NEVER MARRIED"])
   g for DIVORCED  g for NEVER MARRIED
0       -0.057123             0.057123
Expand source code
def es_cohen_g(data, p0Cat=None, codes=None):
    '''
    Cohen's g
    ---------
     
    Cohen’s g (Cohen, 1988) is an effect size measure that could be accompanying a one-sample binomial (see Rosnow & Rosenthal, 2003), score or Wald test. It is simply the difference of the sample proportion with 0.5. 
    
    A video explanation of Cohen g can be found at https://youtu.be/tPZMvB8QrM0. This function is shown in this [YouTube video](https://youtu.be/UqpkM8LIo-M) and the effect size is also described at [PeterStatistics.com](https://peterstatistics.com/Terms/EffectSizes/CohenG.html)
    
    Parameters
    ----------
    data : list or pandas data series 
        the data
    p0Cat : optional
        the category for which p0=0.5 was used
    codes : list, optional 
        list with the two codes to use
        
    Returns
    -------
    pandas.DataFrame
        A dataframe with the following columns:
    
        - *g for cat. 1* : Cohen g value for category 1
        - *g for cat. 2* : Cohen g value for category 2
   
    Notes
    -----
    To decide on which category is associated with p0 (fixed at 0.5) the following is used:
    * If codes are provided, the first code is assumed to be the category for the p0.
    * If p0Cat is specified that will be used for p0 and all other categories will be considered as category 2, this means if there are more than two categories the remaining two or more (besides p0Cat) will be merged as one large category.
    * If neither codes or p0Cat is specified and more than two categories are in the data a warning is printed and no results.
    * If neither codes or p0Cat is specified and there are two categories, p0 is assumed to be for the first category found
    
    The formula used is (Cohen, 1988, p. 147):
    $$g=p-0.5$$
    
    *Symbols used*:
    
    * $p$ is the sample proportion
    
    *Classification*
    
    Use the **th_cohen_g()** function for a classification of the value.
    
    Before, After and Alternatives
    ------------------------------
    Before this effect size you might first want to perform a test:
    * [ts_binomial_os](../tests/test_binomial_os.html#ts_binomial_os) for a One-Sample Binomial Test
    * [ts_score_os](../tests/test_score_os.html#ts_score_os) for One-Sample Score Test
    * [ts_wald_os](../tests/test_wald_os.html#ts_wald_os) for One-Sample Wald Test

    After this, you might want a rule-of-thumb:
    * [th_cohen_g](../other.thumb/cohen_g.html#th_cohen_g) for rules-of-thumb for Cohen g

    Alternatives could be:
    * [es_cohen_h_os](../effect_sizes/eff_size_cohen_h_os.html#es_cohen_h_os) for Cohen h'
    * [es_alt_ratio](../effect_sizes/eff_size_alt_ratio.html#es_alt_ratio) for Alternative Ratio
    * [r_rosenthal](../correlations/cor_rosenthal.html#r_rosenthal) for Rosenthal Correlation if a z-value is available
    
    References
    ----------
    Cohen, J. (1988). *Statistical power analysis for the behavioral sciences* (2nd ed.). L. Erlbaum Associates.
    
    Author
    ------
    Made by P. Stikker
    
    Companion website: https://PeterStatistics.com  
    YouTube channel: https://www.youtube.com/stikpet  
    Donations: https://www.patreon.com/bePatron?u=19398076
    
    Examples
    --------
    Example 1: Numeric list
    >>> ex1 = [1, 1, 2, 1, 2, 1, 2, 1]
    >>> es_cohen_g(ex1)
       g for 1  g for 2
    0    0.125   -0.125
    
    Example 2: pandas Series
    >>> import pandas as pd
    >>> df1 = pd.read_csv('https://peterstatistics.com/Packages/ExampleData/GSS2012a.csv', sep=',', low_memory=False, storage_options={'User-Agent': 'Mozilla/5.0'})
    >>> es_cohen_g(df1['sex'])
       g for FEMALE  g for MALE
    0      0.051165   -0.051165
    >>> es_cohen_g(df1['mar1'], codes=["DIVORCED", "NEVER MARRIED"])
       g for DIVORCED  g for NEVER MARRIED
    0       -0.057123             0.057123
    
    '''
    
    if type(data) is list:
        data = pd.Series(data)

    #remove missing values
    data = data.dropna()
    
    if codes is None:
        #create a frequency table
        freq = data.value_counts()

        if p0Cat is None:
            #check if there were exactly two categories or not
            if len(freq) != 2:
                # unable to determine which category p0 would belong to, so print warning and end
                print("WARNING: data does not have two unique categories, please specify two categories using codes parameter")
                return
            else:
                #simply select the two categories as cat1 and cat2
                n1 = freq.values[0]
                n2 = freq.values[1]
                n = n1 + n2
                #assume p0 was for first category
                cat1_lbl = freq.index[0]
                cat2_lbl = freq.index[1]
                
        else:
            n = sum(freq.values)
            n1 = sum(data==p0Cat)
            n2 = n - n1
            cat1_lbl = p0Cat
            if len(freq.values) == 2:
                if freq.index[0] == p0Cat:
                    cat2_lbl = freq.index[1]
                else:
                    cat2_lbl = freq.index[0]
            else:
                cat2_lbl = "all other"
    else:        
        n1 = sum(data==codes[0])
        n2 = sum(data==codes[1])
        n = n1 + n2
        cat1_lbl = codes[0]
        cat2_lbl = codes[1]

    p1 = n1/n
    p2 = 1 - p1
    g1 = p1 - 0.5
    g2 = p2 - 0.5

    results = pd.DataFrame([[g1, g2]], columns=['g for ' + str(cat1_lbl), 'g for ' + str(cat2_lbl)])
    
    return (results)