Module stikpetP.effect_sizes.eff_size_alt_ratio

Expand source code
import pandas as pd

def es_alt_ratio(data, p0=0.5, p0Cat=None, codes=None):
    '''
    Alternative Ratio
    -----------------
     
    The Alternative Ratio is an effect size measure that could be accompanying a one-sample binomial, score or Wald test.It is simply the sample proportion (percentage), divided by the expected population proportion (often set at 0.5)
    
    The Alternative Ratio is only mentioned in the documentation of a program called PASS from NCSS (n.d.), and referred to as Relative Risk by JonB (2015).
    
    This function is shown in this [YouTube video](https://youtu.be/cpkzLBOh3zA) and the effect size is also described at [PeterStatistics.com](https://peterstatistics.com/Terms/EffectSizes/AlternativeRatio.html)
    
    Parameters
    ----------
    data : list or pandas data series 
        the data
    p0 : float, optional 
        hypothesized proportion for the first category (default is 0.5)
    p0Cat : optional
        the category for which p0 was used
    codes : list, optional 
        the two codes to use
        
    Returns
    -------
    pandas.DataFrame
        A dataframe with the following columns:
    
        - *AR1* : the alternative category for one category  
        - *AR2* : the alternative category for the other category  
        - *comment* : the category for which p0 was
   
    Notes
    -----
    To decide on which category is associated with p0 the following is used:
    * If codes are provided, the first code is assumed to be the category for the p0.
    * If p0Cat is specified that will be used for p0 and all other categories will be considered as category 2, this means if there are more than two categories the remaining two or more (besides p0Cat) will be merged as one large category.
    * If neither codes or p0Cat is specified and more than two categories are in the data a warning is printed and no results.
    * If neither codes or p0Cat is specified and there are two categories, p0 is assumed to be for the category closest matching the p0 value (i.e. if p0 is above 0.5 the category with the highest count is assumed to be used for p0)
    
    The formula used is:
    $$AR=\\frac{p}{\\pi}$$
    
    *Symbols used*:
    
    * $p$ is the sample proportion of one of the categories
    * $\\pi$ the expected proportion

    Before, After and Alternatives
    ------------------------------
    Before this effect size you might first want to perform a test:
    * [ts_binomial_os](../tests/test_binomial_os.html#ts_binomial_os) for a One-Sample Binomial Test
    * [ts_score_os](../tests/test_score_os.html#ts_score_os) for One-Sample Score Test
    * [ts_wald_os](../tests/test_wald_os.html#ts_wald_os) for One-Sample Wald Test

    Unfortunately I'm not aware of any rule-of-thumb for this measure.

    Alternatives could be:
    * [es_cohen_g](../effect_sizes/eff_size_cohen_g.html#es_cohen_g) for Cohen g
    * [es_cohen_h_os](../effect_sizes/eff_size_cohen_g.html#es_cohen_g) for for Cohen g
    * [r_rosenthal](../correlations/cor_rosenthal.html#r_rosenthal) for Rosenthal Correlation if a z-value is available
    
    References
    ----------
    JonB. (2015, October 14). Effect size of a binomial test and its relation to other measures of effect size. StackExchange - Cross Validated. https://stats.stackexchange.com/q/176856
    
    NCSS. (n.d.). Tests for one proportion. In PASS Sample Size Software (pp. 100-1-100–132). Retrieved November 10, 2018, from https://www.ncss.com/wp-content/themes/ncss/pdf/Procedures/PASS/Tests_for_One_Proportion.pdf
    
    Author
    ------
    Made by P. Stikker
    
    Companion website: https://PeterStatistics.com  
    YouTube channel: https://www.youtube.com/stikpet  
    Donations: https://www.patreon.com/bePatron?u=19398076
    
    Examples
    --------
    
    Example 1: Numeric list
    >>> ex1 = [1, 1, 2, 1, 2, 1, 2, 1]
    >>> es_alt_ratio(ex1)
       Alt.Ratio Cat. 1  Alt.Ratio Cat. 2            comment
    0              1.25              0.75  assuming p0 for 1
    
    >>> es_alt_ratio(ex1, p0=0.3)
       Alt.Ratio Cat. 1  Alt.Ratio Cat. 2            comment
    0          2.083333          0.535714  assuming p0 for 1
    
    Example 2: Text list
    >>> ex2 = ["Female", "Male", "Male", "Female", "Male", "Male"]
    >>> es_alt_ratio(ex2)
       Alt.Ratio Cat. 1  Alt.Ratio Cat. 2               comment
    0          1.333333          0.666667  assuming p0 for Male
    
    >>> es_alt_ratio(ex2, p0Cat='Female')
       Alt.Ratio Cat. 1  Alt.Ratio Cat. 2             comment
    0          0.666667          1.333333  with p0 for Female
    
    >>> es_alt_ratio(ex2, codes=['Female', 'Male'])
       Alt.Ratio Cat. 1  Alt.Ratio Cat. 2             comment
    0          0.666667          1.333333  with p0 for Female
    
    Example 3: pandas Series
    >>> import pandas as pd
    >>> df1 = pd.read_csv('https://peterstatistics.com/Packages/ExampleData/GSS2012a.csv', sep=',', low_memory=False, storage_options={'User-Agent': 'Mozilla/5.0'})
    >>> es_alt_ratio(df1['sex'])
       Alt.Ratio Cat. 1  Alt.Ratio Cat. 2                 comment
    0           1.10233           0.89767  assuming p0 for FEMALE

    >>> es_alt_ratio(df1['mar1'], codes=["DIVORCED", "NEVER MARRIED"])
       Alt.Ratio Cat. 1  Alt.Ratio Cat. 2               comment
    0          0.885755          1.114245  with p0 for DIVORCED
    
    '''
    
    if type(data) is list:
        data = pd.Series(data)

    #remove missing values
    data = data.dropna()
        
    #Determine number of successes, failures, and total sample size
    if codes is None:
        #create a frequency table
        freq = data.value_counts()

        if p0Cat is None:
            #check if there were exactly two categories or not
            if len(freq) != 2:
                # unable to determine which category p0 would belong to, so print warning and end
                print("WARNING: data does not have two unique categories, please specify two categories using codes parameter")
                return
            else:
                #simply select the two categories as cat1 and cat2
                n1 = freq.values[0]
                n2 = freq.values[1]
                n = n1 + n2
                #determine p0 was for which category
                p0_cat = freq.index[0]
                if p0 > 0.5 and n1 < n2:
                    n3=n2
                    n2 = n1
                    n1 = n3
                    p0_cat = freq.index[1]
                cat_used =  "assuming p0 for " + str(p0_cat)
        else:
            n = sum(freq.values)
            n1 = sum(data==p0Cat)
            n2 = n - n1
            p0_cat = p0Cat
            cat_used = "with p0 for " + str(p0Cat)
    else:        
        n1 = sum(data==codes[0])
        n2 = sum(data==codes[1])
        n = n1 + n2
        cat_used =  "with p0 for " + str(codes[0])
        
    p1 = n1 / n
    p2 = n2 / n
    AR1 = p1 / p0
    AR2 = p2 / (1 - p0)
    
    results = pd.DataFrame([[AR1, AR2, cat_used]], columns=["Alt.Ratio Cat. 1", "Alt.Ratio Cat. 2", "comment"])
    
    return (results)

Functions

def es_alt_ratio(data, p0=0.5, p0Cat=None, codes=None)

Alternative Ratio

The Alternative Ratio is an effect size measure that could be accompanying a one-sample binomial, score or Wald test.It is simply the sample proportion (percentage), divided by the expected population proportion (often set at 0.5)

The Alternative Ratio is only mentioned in the documentation of a program called PASS from NCSS (n.d.), and referred to as Relative Risk by JonB (2015).

This function is shown in this YouTube video and the effect size is also described at PeterStatistics.com

Parameters

data : list or pandas data series
the data
p0 : float, optional
hypothesized proportion for the first category (default is 0.5)
p0Cat : optional
the category for which p0 was used
codes : list, optional
the two codes to use

Returns

pandas.DataFrame

A dataframe with the following columns:

  • AR1 : the alternative category for one category
  • AR2 : the alternative category for the other category
  • comment : the category for which p0 was

Notes

To decide on which category is associated with p0 the following is used: * If codes are provided, the first code is assumed to be the category for the p0. * If p0Cat is specified that will be used for p0 and all other categories will be considered as category 2, this means if there are more than two categories the remaining two or more (besides p0Cat) will be merged as one large category. * If neither codes or p0Cat is specified and more than two categories are in the data a warning is printed and no results. * If neither codes or p0Cat is specified and there are two categories, p0 is assumed to be for the category closest matching the p0 value (i.e. if p0 is above 0.5 the category with the highest count is assumed to be used for p0)

The formula used is: AR=\frac{p}{\pi}

Symbols used:

  • $p$ is the sample proportion of one of the categories
  • $\pi$ the expected proportion

Before, After and Alternatives

Before this effect size you might first want to perform a test: * ts_binomial_os for a One-Sample Binomial Test * ts_score_os for One-Sample Score Test * ts_wald_os for One-Sample Wald Test

Unfortunately I'm not aware of any rule-of-thumb for this measure.

Alternatives could be: * es_cohen_g for Cohen g * es_cohen_h_os for for Cohen g * r_rosenthal for Rosenthal Correlation if a z-value is available

References

JonB. (2015, October 14). Effect size of a binomial test and its relation to other measures of effect size. StackExchange - Cross Validated. https://stats.stackexchange.com/q/176856

NCSS. (n.d.). Tests for one proportion. In PASS Sample Size Software (pp. 100-1-100–132). Retrieved November 10, 2018, from https://www.ncss.com/wp-content/themes/ncss/pdf/Procedures/PASS/Tests_for_One_Proportion.pdf

Author

Made by P. Stikker

Companion website: https://PeterStatistics.com
YouTube channel: https://www.youtube.com/stikpet
Donations: https://www.patreon.com/bePatron?u=19398076

Examples

Example 1: Numeric list

>>> ex1 = [1, 1, 2, 1, 2, 1, 2, 1]
>>> es_alt_ratio(ex1)
   Alt.Ratio Cat. 1  Alt.Ratio Cat. 2            comment
0              1.25              0.75  assuming p0 for 1
>>> es_alt_ratio(ex1, p0=0.3)
   Alt.Ratio Cat. 1  Alt.Ratio Cat. 2            comment
0          2.083333          0.535714  assuming p0 for 1

Example 2: Text list

>>> ex2 = ["Female", "Male", "Male", "Female", "Male", "Male"]
>>> es_alt_ratio(ex2)
   Alt.Ratio Cat. 1  Alt.Ratio Cat. 2               comment
0          1.333333          0.666667  assuming p0 for Male
>>> es_alt_ratio(ex2, p0Cat='Female')
   Alt.Ratio Cat. 1  Alt.Ratio Cat. 2             comment
0          0.666667          1.333333  with p0 for Female
>>> es_alt_ratio(ex2, codes=['Female', 'Male'])
   Alt.Ratio Cat. 1  Alt.Ratio Cat. 2             comment
0          0.666667          1.333333  with p0 for Female

Example 3: pandas Series

>>> import pandas as pd
>>> df1 = pd.read_csv('https://peterstatistics.com/Packages/ExampleData/GSS2012a.csv', sep=',', low_memory=False, storage_options={'User-Agent': 'Mozilla/5.0'})
>>> es_alt_ratio(df1['sex'])
   Alt.Ratio Cat. 1  Alt.Ratio Cat. 2                 comment
0           1.10233           0.89767  assuming p0 for FEMALE
>>> es_alt_ratio(df1['mar1'], codes=["DIVORCED", "NEVER MARRIED"])
   Alt.Ratio Cat. 1  Alt.Ratio Cat. 2               comment
0          0.885755          1.114245  with p0 for DIVORCED
Expand source code
def es_alt_ratio(data, p0=0.5, p0Cat=None, codes=None):
    '''
    Alternative Ratio
    -----------------
     
    The Alternative Ratio is an effect size measure that could be accompanying a one-sample binomial, score or Wald test.It is simply the sample proportion (percentage), divided by the expected population proportion (often set at 0.5)
    
    The Alternative Ratio is only mentioned in the documentation of a program called PASS from NCSS (n.d.), and referred to as Relative Risk by JonB (2015).
    
    This function is shown in this [YouTube video](https://youtu.be/cpkzLBOh3zA) and the effect size is also described at [PeterStatistics.com](https://peterstatistics.com/Terms/EffectSizes/AlternativeRatio.html)
    
    Parameters
    ----------
    data : list or pandas data series 
        the data
    p0 : float, optional 
        hypothesized proportion for the first category (default is 0.5)
    p0Cat : optional
        the category for which p0 was used
    codes : list, optional 
        the two codes to use
        
    Returns
    -------
    pandas.DataFrame
        A dataframe with the following columns:
    
        - *AR1* : the alternative category for one category  
        - *AR2* : the alternative category for the other category  
        - *comment* : the category for which p0 was
   
    Notes
    -----
    To decide on which category is associated with p0 the following is used:
    * If codes are provided, the first code is assumed to be the category for the p0.
    * If p0Cat is specified that will be used for p0 and all other categories will be considered as category 2, this means if there are more than two categories the remaining two or more (besides p0Cat) will be merged as one large category.
    * If neither codes or p0Cat is specified and more than two categories are in the data a warning is printed and no results.
    * If neither codes or p0Cat is specified and there are two categories, p0 is assumed to be for the category closest matching the p0 value (i.e. if p0 is above 0.5 the category with the highest count is assumed to be used for p0)
    
    The formula used is:
    $$AR=\\frac{p}{\\pi}$$
    
    *Symbols used*:
    
    * $p$ is the sample proportion of one of the categories
    * $\\pi$ the expected proportion

    Before, After and Alternatives
    ------------------------------
    Before this effect size you might first want to perform a test:
    * [ts_binomial_os](../tests/test_binomial_os.html#ts_binomial_os) for a One-Sample Binomial Test
    * [ts_score_os](../tests/test_score_os.html#ts_score_os) for One-Sample Score Test
    * [ts_wald_os](../tests/test_wald_os.html#ts_wald_os) for One-Sample Wald Test

    Unfortunately I'm not aware of any rule-of-thumb for this measure.

    Alternatives could be:
    * [es_cohen_g](../effect_sizes/eff_size_cohen_g.html#es_cohen_g) for Cohen g
    * [es_cohen_h_os](../effect_sizes/eff_size_cohen_g.html#es_cohen_g) for for Cohen g
    * [r_rosenthal](../correlations/cor_rosenthal.html#r_rosenthal) for Rosenthal Correlation if a z-value is available
    
    References
    ----------
    JonB. (2015, October 14). Effect size of a binomial test and its relation to other measures of effect size. StackExchange - Cross Validated. https://stats.stackexchange.com/q/176856
    
    NCSS. (n.d.). Tests for one proportion. In PASS Sample Size Software (pp. 100-1-100–132). Retrieved November 10, 2018, from https://www.ncss.com/wp-content/themes/ncss/pdf/Procedures/PASS/Tests_for_One_Proportion.pdf
    
    Author
    ------
    Made by P. Stikker
    
    Companion website: https://PeterStatistics.com  
    YouTube channel: https://www.youtube.com/stikpet  
    Donations: https://www.patreon.com/bePatron?u=19398076
    
    Examples
    --------
    
    Example 1: Numeric list
    >>> ex1 = [1, 1, 2, 1, 2, 1, 2, 1]
    >>> es_alt_ratio(ex1)
       Alt.Ratio Cat. 1  Alt.Ratio Cat. 2            comment
    0              1.25              0.75  assuming p0 for 1
    
    >>> es_alt_ratio(ex1, p0=0.3)
       Alt.Ratio Cat. 1  Alt.Ratio Cat. 2            comment
    0          2.083333          0.535714  assuming p0 for 1
    
    Example 2: Text list
    >>> ex2 = ["Female", "Male", "Male", "Female", "Male", "Male"]
    >>> es_alt_ratio(ex2)
       Alt.Ratio Cat. 1  Alt.Ratio Cat. 2               comment
    0          1.333333          0.666667  assuming p0 for Male
    
    >>> es_alt_ratio(ex2, p0Cat='Female')
       Alt.Ratio Cat. 1  Alt.Ratio Cat. 2             comment
    0          0.666667          1.333333  with p0 for Female
    
    >>> es_alt_ratio(ex2, codes=['Female', 'Male'])
       Alt.Ratio Cat. 1  Alt.Ratio Cat. 2             comment
    0          0.666667          1.333333  with p0 for Female
    
    Example 3: pandas Series
    >>> import pandas as pd
    >>> df1 = pd.read_csv('https://peterstatistics.com/Packages/ExampleData/GSS2012a.csv', sep=',', low_memory=False, storage_options={'User-Agent': 'Mozilla/5.0'})
    >>> es_alt_ratio(df1['sex'])
       Alt.Ratio Cat. 1  Alt.Ratio Cat. 2                 comment
    0           1.10233           0.89767  assuming p0 for FEMALE

    >>> es_alt_ratio(df1['mar1'], codes=["DIVORCED", "NEVER MARRIED"])
       Alt.Ratio Cat. 1  Alt.Ratio Cat. 2               comment
    0          0.885755          1.114245  with p0 for DIVORCED
    
    '''
    
    if type(data) is list:
        data = pd.Series(data)

    #remove missing values
    data = data.dropna()
        
    #Determine number of successes, failures, and total sample size
    if codes is None:
        #create a frequency table
        freq = data.value_counts()

        if p0Cat is None:
            #check if there were exactly two categories or not
            if len(freq) != 2:
                # unable to determine which category p0 would belong to, so print warning and end
                print("WARNING: data does not have two unique categories, please specify two categories using codes parameter")
                return
            else:
                #simply select the two categories as cat1 and cat2
                n1 = freq.values[0]
                n2 = freq.values[1]
                n = n1 + n2
                #determine p0 was for which category
                p0_cat = freq.index[0]
                if p0 > 0.5 and n1 < n2:
                    n3=n2
                    n2 = n1
                    n1 = n3
                    p0_cat = freq.index[1]
                cat_used =  "assuming p0 for " + str(p0_cat)
        else:
            n = sum(freq.values)
            n1 = sum(data==p0Cat)
            n2 = n - n1
            p0_cat = p0Cat
            cat_used = "with p0 for " + str(p0Cat)
    else:        
        n1 = sum(data==codes[0])
        n2 = sum(data==codes[1])
        n = n1 + n2
        cat_used =  "with p0 for " + str(codes[0])
        
    p1 = n1 / n
    p2 = n2 / n
    AR1 = p1 / p0
    AR2 = p2 / (1 - p0)
    
    results = pd.DataFrame([[AR1, AR2, cat_used]], columns=["Alt.Ratio Cat. 1", "Alt.Ratio Cat. 2", "comment"])
    
    return (results)