Module `stikpetP.other.poho_pairwise_gof`

Expand source code

import pandas as pd
from ..tests.test_multinomial_gof import ts_multinomial_gof
from ..tests.test_powerdivergence_gof import ts_powerdivergence_gof
from ..tests.test_neyman_gof import ts_neyman_gof
from ..tests.test_mod_log_likelihood_gof import ts_mod_log_likelihood_gof
from ..tests.test_g_gof import ts_g_gof
from ..tests.test_freeman_tukey_read import ts_freeman_tukey_read
from ..tests.test_freeman_tukey_gof import ts_freeman_tukey_gof
from ..tests.test_pearson_gof import ts_pearson_gof
from ..other.p_adjustments import p_adjust


def ph_pairwise_gof(data, test="pearson", expCount=None, mtc='bonferroni', **kwargs):
    '''
    Pairwise Goodness-of-Fit Tests for Post-Hoc Analysis
    --------------------------------------------
    
    This function will perform a goodness-of-fit test for each possible pair in the data. This could be any of the goodness-of-fit tests, e.g. a Pearson chi-square.

    The unadjusted p-values and Bonferroni adjusted p-values are both determined.

    This function is shown in this [YouTube video](https://youtu.be/DNx7-eVp16g) and the test is also described at [PeterStatistics.com](https://peterstatistics.com/Terms/Tests/PostHocAfterGoF.html)
    
    Parameters
    ----------
    data : list or pandas series
    test : {"pearson", "freeman-tukey", "freeman-tukey-read", "g", "mod-log-g", "neyman", "powerdivergence", "multinomial"}, optional
        test to use for each pair
    expCount : pandas dataframe, optional 
        categories and expected counts
    mtc : string, optional
        any of the methods available in p_adjust() to correct for multiple tests
    **kwargs : optional
        additional arguments for the specific test that are passed along.
    
    Returns
    -------
    pandas.DataFrame
        A dataframe with the following columns:
    
        - *category 1* : the label of the first category
        - *category 2* : the label of the second category
        - *n1* : the sample size of the first category
        - *n2* : the sample size of the second category
        - *obs. prop. 1* : the proportion in the sample of the first category
        - *exp. prop. 1* : the expected proportion for the first category
        
        - *statistic* : the chi-square test statistic
        - *df* : the degrees of freedom or in case
        - *p-value* : the unadjusted significance
        - *adj. p-value* : the adjusted significance
        - *minExp* : the minimum expected count
        - *percBelow5* : the percentage of cells with an expected count below 5
        - *test* : description of the test used
        
        - In case of a multinomial test, the same columns except:
    
        - *p obs* instead of *statistic* : showing the probability of the observed sample table
        - *n combs.*, instead of *df* : showing the number of possible tables
        - no *minExp* and *propBelow5* column.

    Notes
    -----
    None
    
    Before, After and Alternatives
    ------------------------------
    Before this an omnibus test might be helpful, these are also the tests used on each pair:
    * [ts_pearson_gof](../tests/test_pearson_gof.html#ts_pearson_gof) for Pearson Chi-Square Goodness-of-Fit Test
    * [ts_freeman_tukey_gof](../tests/test_freeman_tukey_gof.html#ts_freeman_tukey_gof) for Freeman-Tukey Test of Goodness-of-Fit
    * [ts_freeman_tukey_read](../tests/test_freeman_tukey_read.html#ts_freeman_tukey_read) for Freeman-Tukey-Read Test of Goodness-of-Fit
    * [ts_g_gof](../tests/test_g_gof.html#ts_g_gof) for G (Likelihood Ratio) Goodness-of-Fit Test
    * [ts_mod_log_likelihood_gof](../tests/test_mod_log_likelihood_gof.html#ts_mod_log_likelihood_gof) for Mod-Log Likelihood Test of Goodness-of-Fit
    * [ts_multinomial_gof](../tests/test_multinomial_gof.html#ts_multinomial_gof) for Multinomial Goodness-of-Fit Test
    * [ts_neyman_gof](../tests/test_neyman_gof.html#ts_neyman_gof) for Neyman Test of Goodness-of-Fit
    * [ts_powerdivergence_gof](../tests/test_powerdivergence_gof.html#ts_powerdivergence_gof) for Power Divergence GoF Test
    
    After this you might want to add an effect size measure:
    * [es_post_hoc_gof](../effect_sizes/eff_size_post_hoc_gof.html#es_post_hoc_gof) for various effect sizes
    
    Alternative post-hoc tests:
    * [ph_pairwise_bin](../other/poho_pairwise_bin.html#ph_pairwise_bin) for Pairwise Binary Test
    * [ph_residual_gof_bin](../other/poho_residual_gof_bin.html#ph_residual_gof_bin) for Residuals Tests
    * [ph_residual_gof_gof](../other/poho_residual_gof_gof.html#ph_residual_gof_gof) for Residuals Using Goodness-of-Fit Tests

    More info on the adjustment for multiple testing:
    * [p_adjust](../other/p_adjustments.html#p_adjust)
    
    Author
    ------
    Made by P. Stikker
    
    Companion website: https://PeterStatistics.com  
    YouTube channel: https://www.youtube.com/stikpet  
    Donations: https://www.patreon.com/bePatron?u=19398076

    Examples
    --------
    Examples: get data
    >>> import pandas as pd
    >>> pd.set_option('display.width',1000)
    >>> pd.set_option('display.max_columns', 1000)    
    >>> gss_df = pd.read_csv('https://peterstatistics.com/Packages/ExampleData/GSS2012a.csv', sep=',', low_memory=False, storage_options={'User-Agent': 'Mozilla/5.0'})
    >>> ex1 = gss_df['mar1'];

    Example 1 using default settings:
    >>> ph_pairwise_gof(ex1)
          category 1     category 2     n1     n2  obs. prop. 1  exp. prop. 1   statistic   df        p-value   adj. p-value  minExp  percBelow5                                        test
    0        MARRIED  NEVER MARRIED  972.0  395.0      0.711046           0.5  243.547184  1.0   6.626501e-55   6.626501e-54   683.5         0.0  Pearson chi-square test of goodness-of-fit
    1        MARRIED       DIVORCED  972.0  314.0      0.755832           0.5  336.674961  1.0   3.380767e-75   3.380767e-74   643.0         0.0  Pearson chi-square test of goodness-of-fit
    2        MARRIED        WIDOWED  972.0  181.0      0.843018           0.5  542.654814  1.0  4.987536e-120  4.987536e-119   576.5         0.0  Pearson chi-square test of goodness-of-fit
    3        MARRIED      SEPARATED  972.0   79.0      0.924833           0.5  758.752617  1.0  5.015130e-167  5.015130e-166   525.5         0.0  Pearson chi-square test of goodness-of-fit
    4  NEVER MARRIED       DIVORCED  395.0  314.0      0.557123           0.5    9.253879  1.0   2.349972e-03   2.349972e-02   354.5         0.0  Pearson chi-square test of goodness-of-fit
    5  NEVER MARRIED        WIDOWED  395.0  181.0      0.685764           0.5   79.506944  1.0   4.805333e-19   4.805333e-18   288.0         0.0  Pearson chi-square test of goodness-of-fit
    6  NEVER MARRIED      SEPARATED  395.0   79.0      0.833333           0.5  210.666667  1.0   9.826958e-48   9.826958e-47   237.0         0.0  Pearson chi-square test of goodness-of-fit
    7       DIVORCED        WIDOWED  314.0  181.0      0.634343           0.5   35.735354  1.0   2.260249e-09   2.260249e-08   247.5         0.0  Pearson chi-square test of goodness-of-fit
    8       DIVORCED      SEPARATED  314.0   79.0      0.798982           0.5  140.521628  1.0   2.047138e-32   2.047138e-31   196.5         0.0  Pearson chi-square test of goodness-of-fit
    9        WIDOWED      SEPARATED  181.0   79.0      0.696154           0.5   40.015385  1.0   2.519705e-10   2.519705e-09   130.0         0.0  Pearson chi-square test of goodness-of-fit

    Example 2 using a G test with Pearson correction:
    >>> ph_pairwise_gof(ex1, test="g", mtc='holm', cc='pearson')
          category 1     category 2     n1     n2  obs. prop. 1  exp. prop. 1   statistic   df        p-value   adj. p-value  minExp  percBelow5                                               test
    0        MARRIED  NEVER MARRIED  972.0  395.0      0.711046           0.5  251.165891  1.0   1.446330e-56   1.012431e-55   683.5         0.0  G test of goodness-of-fit, and Pearson correction
    1        MARRIED       DIVORCED  972.0  314.0      0.755832           0.5  352.887628  1.0   9.961507e-79   7.969206e-78   643.0         0.0  G test of goodness-of-fit, and Pearson correction
    2        MARRIED        WIDOWED  972.0  181.0      0.843018           0.5  595.621443  1.0  1.500292e-131  1.350263e-130   576.5         0.0  G test of goodness-of-fit, and Pearson correction
    3        MARRIED      SEPARATED  972.0   79.0      0.924833           0.5  895.323655  1.0  1.019541e-196  1.019541e-195   525.5         0.0  G test of goodness-of-fit, and Pearson correction
    4  NEVER MARRIED       DIVORCED  395.0  314.0      0.557123           0.5    9.261034  1.0   2.340808e-03   2.340808e-03   354.5         0.0  G test of goodness-of-fit, and Pearson correction
    5  NEVER MARRIED        WIDOWED  395.0  181.0      0.685764           0.5   81.303809  1.0   1.935532e-19   7.742128e-19   288.0         0.0  G test of goodness-of-fit, and Pearson correction
    6  NEVER MARRIED      SEPARATED  395.0   79.0      0.833333           0.5  229.486329  1.0   7.715537e-52   4.629322e-51   237.0         0.0  G test of goodness-of-fit, and Pearson correction
    7       DIVORCED        WIDOWED  314.0  181.0      0.634343           0.5   36.105157  1.0   1.869514e-09   3.739029e-09   247.5         0.0  G test of goodness-of-fit, and Pearson correction
    8       DIVORCED      SEPARATED  314.0   79.0      0.798982           0.5  150.008211  1.0   1.726494e-34   8.632469e-34   196.5         0.0  G test of goodness-of-fit, and Pearson correction
    9        WIDOWED      SEPARATED  181.0   79.0      0.696154           0.5   40.952653  1.0   1.559621e-10   4.678864e-10   130.0         0.0  G test of goodness-of-fit, and Pearson correction
    
    '''
    if type(data) is list:
        data = pd.Series(data)
            
    freq = data.value_counts()
        
    if expCount is None:
        #assume all to be equal
        n = sum(freq)
        k = len(freq)
        categories = list(freq.index)
        expC = [n/k] * k
        
    else:
        #check if categories match
        nE = 0
        n = 0
        for i in range(0, len(expCount)):
            nE = nE + expCount.iloc[i,1]
            n = n + freq[expCount.iloc[i,0]]
        
        expC = []
        for i in range(0,len(expCount)):
            expC.append(expCount.iloc[i, 1]/nE*n)
            
        k = len(expC)
        categories = list(expCount.iloc[:,0])
    
    n_pairs = int(k*(k-1)/2)

    results = pd.DataFrame()
    resRow=0
    for i in range(0, k-1):
        for j in range(i+1, k):
            #category names
            results.at[resRow, 0] = categories[i]
            results.at[resRow, 1] = categories[j]
            #category sizes
            n1 = freq[categories[i]]
            n2 = freq[categories[j]]
            results.at[resRow, 2] = n1
            results.at[resRow, 3] = n2
    
            #data and expected counts
            expected_proportion_1 = expC[i]/n
            expected_proportion_2 = expC[j]/n
            exp_count_1 = (n1 + n2)*(expected_proportion_1*1/(expected_proportion_1+expected_proportion_2))
            exp_count_2 = (n1 + n2)*(expected_proportion_2*1/(expected_proportion_1+expected_proportion_2))
            exP = pd.DataFrame([[categories[i], exp_count_1], [categories[j], exp_count_2]], columns=['category', 'count'])
            results.at[resRow, 4] = n1/(n1 + n2)
            results.at[resRow, 5] = exp_count_1/(n1 + n2)
            pair = [categories[i], categories[j]]
            data_pair = data[data.isin(pair)]
            
            if test=="pearson":                
                pair_test_result = ts_pearson_gof(data_pair, expCounts=exP, **kwargs)
            elif test=="freeman-tukey":
                pair_test_result = ts_freeman_tukey_gof(data_pair, expCounts=exP, **kwargs)
            elif test=="freeman-tukey-read":
                pair_test_result = ts_freeman_tukey_read(data_pair, expCounts=exP, **kwargs)
            elif test=="g":
                pair_test_result = ts_g_gof(data_pair, expCounts=exP, **kwargs)
            elif test=="mod-log-g":
                pair_test_result = ts_mod_log_likelihood_gof(data_pair, expCounts=exP, **kwargs)
            elif test=="neyman":
                pair_test_result = ts_neyman_gof(data_pair, expCounts=exP, **kwargs)
            elif test=="powerdivergence":
                pair_test_result = ts_powerdivergence_gof(data_pair, expCounts=exP, **kwargs)

            if test=="multinomial":
                pair_test_result = ts_multinomial_gof(data_pair, expCounts=exP, **kwargs)
                results.at[resRow, 6] = pair_test_result.iloc[0, 0]
                results.at[resRow, 7] = pair_test_result.iloc[0, 1]
                results.at[resRow, 8] = pair_test_result.iloc[0, 2]
                results.at[resRow, 9] = results.at[resRow, 8]
                results.at[resRow, 10] = pair_test_result.iloc[0, 3]
                
            else:
                results.at[resRow, 6] = pair_test_result.iloc[0, 2]
                results.at[resRow, 7] = pair_test_result.iloc[0, 3]
                results.at[resRow, 8] = pair_test_result.iloc[0, 4]
                results.at[resRow, 9] = results.at[resRow, 8]
                results.at[resRow, 10] = pair_test_result.iloc[0, 5]
                results.at[resRow, 11] = pair_test_result.iloc[0, 6]
                results.at[resRow, 12] = pair_test_result.iloc[0, 7]
              
            resRow = resRow + 1

    results.iloc[:,9] = p_adjust(results.iloc[:,8], method=mtc)
    
    if test == "multinomial":
        # Set columns for multinomial case
        results.columns = [
            "category 1", "category 2", "n1", "n2", "obs. prop. 1", "exp. prop. 1", "p obs", "n combs.",
            "p-value", "adj. p-value", "test"
        ]
    else:
        # Set columns for other cases
        results.columns = [
            "category 1", "category 2", "n1", "n2", "obs. prop. 1", "exp. prop. 1", "statistic", "df", 
            "p-value", "adj. p-value", "minExp", "percBelow5", "test"
        ]
    
    return results

Functions

def ph_pairwise_gof(data, test='pearson', expCount=None, mtc='bonferroni', **kwargs)

Pairwise Goodness-of-Fit Tests for Post-Hoc Analysis

This function will perform a goodness-of-fit test for each possible pair in the data. This could be any of the goodness-of-fit tests, e.g. a Pearson chi-square.

The unadjusted p-values and Bonferroni adjusted p-values are both determined.

This function is shown in this YouTube video and the test is also described at PeterStatistics.com

Parameters

data : list or pandas series
test : {"pearson", "freeman-tukey", "freeman-tukey-read", "g", "mod-log-g", "neyman", "powerdivergence", "multinomial"}, optional: test to use for each pair
expCount : pandas dataframe, optional: categories and expected counts
mtc : string, optional: any of the methods available in p_adjust() to correct for multiple tests
**kwargs : optional: additional arguments for the specific test that are passed along.

Returns

pandas.DataFrame

A dataframe with the following columns:

category 1 : the label of the first category
category 2 : the label of the second category
n1 : the sample size of the first category
n2 : the sample size of the second category
obs. prop. 1 : the proportion in the sample of the first category
exp. prop. 1 : the expected proportion for the first category
statistic : the chi-square test statistic
df : the degrees of freedom or in case
p-value : the unadjusted significance
adj. p-value : the adjusted significance
minExp : the minimum expected count
percBelow5 : the percentage of cells with an expected count below 5
test : description of the test used
In case of a multinomial test, the same columns except:
p obs instead of statistic : showing the probability of the observed sample table
n combs., instead of df : showing the number of possible tables
no minExp and propBelow5 column.

Notes

None

Before, After and Alternatives

Before this an omnibus test might be helpful, these are also the tests used on each pair: * ts_pearson_gof for Pearson Chi-Square Goodness-of-Fit Test * ts_freeman_tukey_gof for Freeman-Tukey Test of Goodness-of-Fit * ts_freeman_tukey_read for Freeman-Tukey-Read Test of Goodness-of-Fit * ts_g_gof for G (Likelihood Ratio) Goodness-of-Fit Test * ts_mod_log_likelihood_gof for Mod-Log Likelihood Test of Goodness-of-Fit * ts_multinomial_gof for Multinomial Goodness-of-Fit Test * ts_neyman_gof for Neyman Test of Goodness-of-Fit * ts_powerdivergence_gof for Power Divergence GoF Test

After this you might want to add an effect size measure: * es_post_hoc_gof for various effect sizes

Alternative post-hoc tests: * ph_pairwise_bin for Pairwise Binary Test * ph_residual_gof_bin for Residuals Tests * ph_residual_gof_gof for Residuals Using Goodness-of-Fit Tests

More info on the adjustment for multiple testing: * p_adjust

Author

Made by P. Stikker

Companion website: https://PeterStatistics.com
YouTube channel: https://www.youtube.com/stikpet
Donations: https://www.patreon.com/bePatron?u=19398076

Examples

Examples: get data

>>> import pandas as pd
>>> pd.set_option('display.width',1000)
>>> pd.set_option('display.max_columns', 1000)    
>>> gss_df = pd.read_csv('https://peterstatistics.com/Packages/ExampleData/GSS2012a.csv', sep=',', low_memory=False, storage_options={'User-Agent': 'Mozilla/5.0'})
>>> ex1 = gss_df['mar1'];

Example 1 using default settings:

>>> ph_pairwise_gof(ex1)
      category 1     category 2     n1     n2  obs. prop. 1  exp. prop. 1   statistic   df        p-value   adj. p-value  minExp  percBelow5                                        test
0        MARRIED  NEVER MARRIED  972.0  395.0      0.711046           0.5  243.547184  1.0   6.626501e-55   6.626501e-54   683.5         0.0  Pearson chi-square test of goodness-of-fit
1        MARRIED       DIVORCED  972.0  314.0      0.755832           0.5  336.674961  1.0   3.380767e-75   3.380767e-74   643.0         0.0  Pearson chi-square test of goodness-of-fit
2        MARRIED        WIDOWED  972.0  181.0      0.843018           0.5  542.654814  1.0  4.987536e-120  4.987536e-119   576.5         0.0  Pearson chi-square test of goodness-of-fit
3        MARRIED      SEPARATED  972.0   79.0      0.924833           0.5  758.752617  1.0  5.015130e-167  5.015130e-166   525.5         0.0  Pearson chi-square test of goodness-of-fit
4  NEVER MARRIED       DIVORCED  395.0  314.0      0.557123           0.5    9.253879  1.0   2.349972e-03   2.349972e-02   354.5         0.0  Pearson chi-square test of goodness-of-fit
5  NEVER MARRIED        WIDOWED  395.0  181.0      0.685764           0.5   79.506944  1.0   4.805333e-19   4.805333e-18   288.0         0.0  Pearson chi-square test of goodness-of-fit
6  NEVER MARRIED      SEPARATED  395.0   79.0      0.833333           0.5  210.666667  1.0   9.826958e-48   9.826958e-47   237.0         0.0  Pearson chi-square test of goodness-of-fit
7       DIVORCED        WIDOWED  314.0  181.0      0.634343           0.5   35.735354  1.0   2.260249e-09   2.260249e-08   247.5         0.0  Pearson chi-square test of goodness-of-fit
8       DIVORCED      SEPARATED  314.0   79.0      0.798982           0.5  140.521628  1.0   2.047138e-32   2.047138e-31   196.5         0.0  Pearson chi-square test of goodness-of-fit
9        WIDOWED      SEPARATED  181.0   79.0      0.696154           0.5   40.015385  1.0   2.519705e-10   2.519705e-09   130.0         0.0  Pearson chi-square test of goodness-of-fit

Example 2 using a G test with Pearson correction:

>>> ph_pairwise_gof(ex1, test="g", mtc='holm', cc='pearson')
      category 1     category 2     n1     n2  obs. prop. 1  exp. prop. 1   statistic   df        p-value   adj. p-value  minExp  percBelow5                                               test
0        MARRIED  NEVER MARRIED  972.0  395.0      0.711046           0.5  251.165891  1.0   1.446330e-56   1.012431e-55   683.5         0.0  G test of goodness-of-fit, and Pearson correction
1        MARRIED       DIVORCED  972.0  314.0      0.755832           0.5  352.887628  1.0   9.961507e-79   7.969206e-78   643.0         0.0  G test of goodness-of-fit, and Pearson correction
2        MARRIED        WIDOWED  972.0  181.0      0.843018           0.5  595.621443  1.0  1.500292e-131  1.350263e-130   576.5         0.0  G test of goodness-of-fit, and Pearson correction
3        MARRIED      SEPARATED  972.0   79.0      0.924833           0.5  895.323655  1.0  1.019541e-196  1.019541e-195   525.5         0.0  G test of goodness-of-fit, and Pearson correction
4  NEVER MARRIED       DIVORCED  395.0  314.0      0.557123           0.5    9.261034  1.0   2.340808e-03   2.340808e-03   354.5         0.0  G test of goodness-of-fit, and Pearson correction
5  NEVER MARRIED        WIDOWED  395.0  181.0      0.685764           0.5   81.303809  1.0   1.935532e-19   7.742128e-19   288.0         0.0  G test of goodness-of-fit, and Pearson correction
6  NEVER MARRIED      SEPARATED  395.0   79.0      0.833333           0.5  229.486329  1.0   7.715537e-52   4.629322e-51   237.0         0.0  G test of goodness-of-fit, and Pearson correction
7       DIVORCED        WIDOWED  314.0  181.0      0.634343           0.5   36.105157  1.0   1.869514e-09   3.739029e-09   247.5         0.0  G test of goodness-of-fit, and Pearson correction
8       DIVORCED      SEPARATED  314.0   79.0      0.798982           0.5  150.008211  1.0   1.726494e-34   8.632469e-34   196.5         0.0  G test of goodness-of-fit, and Pearson correction
9        WIDOWED      SEPARATED  181.0   79.0      0.696154           0.5   40.952653  1.0   1.559621e-10   4.678864e-10   130.0         0.0  G test of goodness-of-fit, and Pearson correction

Expand source code

def ph_pairwise_gof(data, test="pearson", expCount=None, mtc='bonferroni', **kwargs):
    '''
    Pairwise Goodness-of-Fit Tests for Post-Hoc Analysis
    --------------------------------------------
    
    This function will perform a goodness-of-fit test for each possible pair in the data. This could be any of the goodness-of-fit tests, e.g. a Pearson chi-square.

    The unadjusted p-values and Bonferroni adjusted p-values are both determined.

    This function is shown in this [YouTube video](https://youtu.be/DNx7-eVp16g) and the test is also described at [PeterStatistics.com](https://peterstatistics.com/Terms/Tests/PostHocAfterGoF.html)
    
    Parameters
    ----------
    data : list or pandas series
    test : {"pearson", "freeman-tukey", "freeman-tukey-read", "g", "mod-log-g", "neyman", "powerdivergence", "multinomial"}, optional
        test to use for each pair
    expCount : pandas dataframe, optional 
        categories and expected counts
    mtc : string, optional
        any of the methods available in p_adjust() to correct for multiple tests
    **kwargs : optional
        additional arguments for the specific test that are passed along.
    
    Returns
    -------
    pandas.DataFrame
        A dataframe with the following columns:
    
        - *category 1* : the label of the first category
        - *category 2* : the label of the second category
        - *n1* : the sample size of the first category
        - *n2* : the sample size of the second category
        - *obs. prop. 1* : the proportion in the sample of the first category
        - *exp. prop. 1* : the expected proportion for the first category
        
        - *statistic* : the chi-square test statistic
        - *df* : the degrees of freedom or in case
        - *p-value* : the unadjusted significance
        - *adj. p-value* : the adjusted significance
        - *minExp* : the minimum expected count
        - *percBelow5* : the percentage of cells with an expected count below 5
        - *test* : description of the test used
        
        - In case of a multinomial test, the same columns except:
    
        - *p obs* instead of *statistic* : showing the probability of the observed sample table
        - *n combs.*, instead of *df* : showing the number of possible tables
        - no *minExp* and *propBelow5* column.

    Notes
    -----
    None
    
    Before, After and Alternatives
    ------------------------------
    Before this an omnibus test might be helpful, these are also the tests used on each pair:
    * [ts_pearson_gof](../tests/test_pearson_gof.html#ts_pearson_gof) for Pearson Chi-Square Goodness-of-Fit Test
    * [ts_freeman_tukey_gof](../tests/test_freeman_tukey_gof.html#ts_freeman_tukey_gof) for Freeman-Tukey Test of Goodness-of-Fit
    * [ts_freeman_tukey_read](../tests/test_freeman_tukey_read.html#ts_freeman_tukey_read) for Freeman-Tukey-Read Test of Goodness-of-Fit
    * [ts_g_gof](../tests/test_g_gof.html#ts_g_gof) for G (Likelihood Ratio) Goodness-of-Fit Test
    * [ts_mod_log_likelihood_gof](../tests/test_mod_log_likelihood_gof.html#ts_mod_log_likelihood_gof) for Mod-Log Likelihood Test of Goodness-of-Fit
    * [ts_multinomial_gof](../tests/test_multinomial_gof.html#ts_multinomial_gof) for Multinomial Goodness-of-Fit Test
    * [ts_neyman_gof](../tests/test_neyman_gof.html#ts_neyman_gof) for Neyman Test of Goodness-of-Fit
    * [ts_powerdivergence_gof](../tests/test_powerdivergence_gof.html#ts_powerdivergence_gof) for Power Divergence GoF Test
    
    After this you might want to add an effect size measure:
    * [es_post_hoc_gof](../effect_sizes/eff_size_post_hoc_gof.html#es_post_hoc_gof) for various effect sizes
    
    Alternative post-hoc tests:
    * [ph_pairwise_bin](../other/poho_pairwise_bin.html#ph_pairwise_bin) for Pairwise Binary Test
    * [ph_residual_gof_bin](../other/poho_residual_gof_bin.html#ph_residual_gof_bin) for Residuals Tests
    * [ph_residual_gof_gof](../other/poho_residual_gof_gof.html#ph_residual_gof_gof) for Residuals Using Goodness-of-Fit Tests

    More info on the adjustment for multiple testing:
    * [p_adjust](../other/p_adjustments.html#p_adjust)
    
    Author
    ------
    Made by P. Stikker
    
    Companion website: https://PeterStatistics.com  
    YouTube channel: https://www.youtube.com/stikpet  
    Donations: https://www.patreon.com/bePatron?u=19398076

    Examples
    --------
    Examples: get data
    >>> import pandas as pd
    >>> pd.set_option('display.width',1000)
    >>> pd.set_option('display.max_columns', 1000)    
    >>> gss_df = pd.read_csv('https://peterstatistics.com/Packages/ExampleData/GSS2012a.csv', sep=',', low_memory=False, storage_options={'User-Agent': 'Mozilla/5.0'})
    >>> ex1 = gss_df['mar1'];

    Example 1 using default settings:
    >>> ph_pairwise_gof(ex1)
          category 1     category 2     n1     n2  obs. prop. 1  exp. prop. 1   statistic   df        p-value   adj. p-value  minExp  percBelow5                                        test
    0        MARRIED  NEVER MARRIED  972.0  395.0      0.711046           0.5  243.547184  1.0   6.626501e-55   6.626501e-54   683.5         0.0  Pearson chi-square test of goodness-of-fit
    1        MARRIED       DIVORCED  972.0  314.0      0.755832           0.5  336.674961  1.0   3.380767e-75   3.380767e-74   643.0         0.0  Pearson chi-square test of goodness-of-fit
    2        MARRIED        WIDOWED  972.0  181.0      0.843018           0.5  542.654814  1.0  4.987536e-120  4.987536e-119   576.5         0.0  Pearson chi-square test of goodness-of-fit
    3        MARRIED      SEPARATED  972.0   79.0      0.924833           0.5  758.752617  1.0  5.015130e-167  5.015130e-166   525.5         0.0  Pearson chi-square test of goodness-of-fit
    4  NEVER MARRIED       DIVORCED  395.0  314.0      0.557123           0.5    9.253879  1.0   2.349972e-03   2.349972e-02   354.5         0.0  Pearson chi-square test of goodness-of-fit
    5  NEVER MARRIED        WIDOWED  395.0  181.0      0.685764           0.5   79.506944  1.0   4.805333e-19   4.805333e-18   288.0         0.0  Pearson chi-square test of goodness-of-fit
    6  NEVER MARRIED      SEPARATED  395.0   79.0      0.833333           0.5  210.666667  1.0   9.826958e-48   9.826958e-47   237.0         0.0  Pearson chi-square test of goodness-of-fit
    7       DIVORCED        WIDOWED  314.0  181.0      0.634343           0.5   35.735354  1.0   2.260249e-09   2.260249e-08   247.5         0.0  Pearson chi-square test of goodness-of-fit
    8       DIVORCED      SEPARATED  314.0   79.0      0.798982           0.5  140.521628  1.0   2.047138e-32   2.047138e-31   196.5         0.0  Pearson chi-square test of goodness-of-fit
    9        WIDOWED      SEPARATED  181.0   79.0      0.696154           0.5   40.015385  1.0   2.519705e-10   2.519705e-09   130.0         0.0  Pearson chi-square test of goodness-of-fit

    Example 2 using a G test with Pearson correction:
    >>> ph_pairwise_gof(ex1, test="g", mtc='holm', cc='pearson')
          category 1     category 2     n1     n2  obs. prop. 1  exp. prop. 1   statistic   df        p-value   adj. p-value  minExp  percBelow5                                               test
    0        MARRIED  NEVER MARRIED  972.0  395.0      0.711046           0.5  251.165891  1.0   1.446330e-56   1.012431e-55   683.5         0.0  G test of goodness-of-fit, and Pearson correction
    1        MARRIED       DIVORCED  972.0  314.0      0.755832           0.5  352.887628  1.0   9.961507e-79   7.969206e-78   643.0         0.0  G test of goodness-of-fit, and Pearson correction
    2        MARRIED        WIDOWED  972.0  181.0      0.843018           0.5  595.621443  1.0  1.500292e-131  1.350263e-130   576.5         0.0  G test of goodness-of-fit, and Pearson correction
    3        MARRIED      SEPARATED  972.0   79.0      0.924833           0.5  895.323655  1.0  1.019541e-196  1.019541e-195   525.5         0.0  G test of goodness-of-fit, and Pearson correction
    4  NEVER MARRIED       DIVORCED  395.0  314.0      0.557123           0.5    9.261034  1.0   2.340808e-03   2.340808e-03   354.5         0.0  G test of goodness-of-fit, and Pearson correction
    5  NEVER MARRIED        WIDOWED  395.0  181.0      0.685764           0.5   81.303809  1.0   1.935532e-19   7.742128e-19   288.0         0.0  G test of goodness-of-fit, and Pearson correction
    6  NEVER MARRIED      SEPARATED  395.0   79.0      0.833333           0.5  229.486329  1.0   7.715537e-52   4.629322e-51   237.0         0.0  G test of goodness-of-fit, and Pearson correction
    7       DIVORCED        WIDOWED  314.0  181.0      0.634343           0.5   36.105157  1.0   1.869514e-09   3.739029e-09   247.5         0.0  G test of goodness-of-fit, and Pearson correction
    8       DIVORCED      SEPARATED  314.0   79.0      0.798982           0.5  150.008211  1.0   1.726494e-34   8.632469e-34   196.5         0.0  G test of goodness-of-fit, and Pearson correction
    9        WIDOWED      SEPARATED  181.0   79.0      0.696154           0.5   40.952653  1.0   1.559621e-10   4.678864e-10   130.0         0.0  G test of goodness-of-fit, and Pearson correction
    
    '''
    if type(data) is list:
        data = pd.Series(data)
            
    freq = data.value_counts()
        
    if expCount is None:
        #assume all to be equal
        n = sum(freq)
        k = len(freq)
        categories = list(freq.index)
        expC = [n/k] * k
        
    else:
        #check if categories match
        nE = 0
        n = 0
        for i in range(0, len(expCount)):
            nE = nE + expCount.iloc[i,1]
            n = n + freq[expCount.iloc[i,0]]
        
        expC = []
        for i in range(0,len(expCount)):
            expC.append(expCount.iloc[i, 1]/nE*n)
            
        k = len(expC)
        categories = list(expCount.iloc[:,0])
    
    n_pairs = int(k*(k-1)/2)

    results = pd.DataFrame()
    resRow=0
    for i in range(0, k-1):
        for j in range(i+1, k):
            #category names
            results.at[resRow, 0] = categories[i]
            results.at[resRow, 1] = categories[j]
            #category sizes
            n1 = freq[categories[i]]
            n2 = freq[categories[j]]
            results.at[resRow, 2] = n1
            results.at[resRow, 3] = n2
    
            #data and expected counts
            expected_proportion_1 = expC[i]/n
            expected_proportion_2 = expC[j]/n
            exp_count_1 = (n1 + n2)*(expected_proportion_1*1/(expected_proportion_1+expected_proportion_2))
            exp_count_2 = (n1 + n2)*(expected_proportion_2*1/(expected_proportion_1+expected_proportion_2))
            exP = pd.DataFrame([[categories[i], exp_count_1], [categories[j], exp_count_2]], columns=['category', 'count'])
            results.at[resRow, 4] = n1/(n1 + n2)
            results.at[resRow, 5] = exp_count_1/(n1 + n2)
            pair = [categories[i], categories[j]]
            data_pair = data[data.isin(pair)]
            
            if test=="pearson":                
                pair_test_result = ts_pearson_gof(data_pair, expCounts=exP, **kwargs)
            elif test=="freeman-tukey":
                pair_test_result = ts_freeman_tukey_gof(data_pair, expCounts=exP, **kwargs)
            elif test=="freeman-tukey-read":
                pair_test_result = ts_freeman_tukey_read(data_pair, expCounts=exP, **kwargs)
            elif test=="g":
                pair_test_result = ts_g_gof(data_pair, expCounts=exP, **kwargs)
            elif test=="mod-log-g":
                pair_test_result = ts_mod_log_likelihood_gof(data_pair, expCounts=exP, **kwargs)
            elif test=="neyman":
                pair_test_result = ts_neyman_gof(data_pair, expCounts=exP, **kwargs)
            elif test=="powerdivergence":
                pair_test_result = ts_powerdivergence_gof(data_pair, expCounts=exP, **kwargs)

            if test=="multinomial":
                pair_test_result = ts_multinomial_gof(data_pair, expCounts=exP, **kwargs)
                results.at[resRow, 6] = pair_test_result.iloc[0, 0]
                results.at[resRow, 7] = pair_test_result.iloc[0, 1]
                results.at[resRow, 8] = pair_test_result.iloc[0, 2]
                results.at[resRow, 9] = results.at[resRow, 8]
                results.at[resRow, 10] = pair_test_result.iloc[0, 3]
                
            else:
                results.at[resRow, 6] = pair_test_result.iloc[0, 2]
                results.at[resRow, 7] = pair_test_result.iloc[0, 3]
                results.at[resRow, 8] = pair_test_result.iloc[0, 4]
                results.at[resRow, 9] = results.at[resRow, 8]
                results.at[resRow, 10] = pair_test_result.iloc[0, 5]
                results.at[resRow, 11] = pair_test_result.iloc[0, 6]
                results.at[resRow, 12] = pair_test_result.iloc[0, 7]
              
            resRow = resRow + 1

    results.iloc[:,9] = p_adjust(results.iloc[:,8], method=mtc)
    
    if test == "multinomial":
        # Set columns for multinomial case
        results.columns = [
            "category 1", "category 2", "n1", "n2", "obs. prop. 1", "exp. prop. 1", "p obs", "n combs.",
            "p-value", "adj. p-value", "test"
        ]
    else:
        # Set columns for other cases
        results.columns = [
            "category 1", "category 2", "n1", "n2", "obs. prop. 1", "exp. prop. 1", "statistic", "df", 
            "p-value", "adj. p-value", "minExp", "percBelow5", "test"
        ]
    
    return results