Module stikpetP.measures.meas_means_stds

Expand source code
import pandas as pd

def me_means_stds(catField, scaleField, categories=None, ddof=1, show_total=True):
    '''
    Means and Standard Deviations
    -----------------------------

    This function returns the count, mean and standard deviation for each category (and optional total)

    Parameters
    ----------
    catField : pandas series or list
        data with categories
    scaleField : pandas series or list
        data with the scores
    categories : list, optional
        the categories to use from catField
    ddof : float, optional
        adjustment for numerator in calculation of standard deviation. Use 0 for population or biased sample, 1 (default) for sample
    show_total : bool, optional
        show the results for all scores.

    Returns
    -------
    Dataframe with:
    
    * *category*, name of the category
    * *n*, the count of the category
    * *mean*, the arithmetic mean of the category
    * *std*, the standard deviation of the category

    Author
    ------
    Made by P. Stikker
    
    Companion website: https://PeterStatistics.com  
    YouTube channel: https://www.youtube.com/stikpet  
    Donations: https://www.patreon.com/bePatron?u=19398076
    
    '''
    #convert to pandas series if needed
    if type(catField) is list:
        catField = pd.Series(catField)
    
    if type(scaleField) is list:
        scaleField = pd.Series(scaleField)
    
    #combine as one dataframe
    dfr_sub = pd.concat([catField, scaleField], axis=1)
    dfr_sub = dfr_sub.dropna()
    dfr_sub.columns = ['category', 'score']

    #remove unused categories
    if categories is not None:
        dfr_sub = dfr_sub[dfr_sub.category.isin(categories)]
    
    #the counts, means and standard deviation of each group
    counts = dfr_sub.groupby('category').count() 
    means = dfr_sub.groupby('category').mean()
    stds = dfr_sub.groupby('category').std(ddof=ddof)

    # create a dataframe from them
    res = pd.concat([counts, means, stds], axis=1)
    res.columns = ['count', 'mean', 'std.']
    res = res.reset_index()

    if show_total:
        # count, mean and standard deviation of all scores
        n = dfr_sub['score'].count()
        m = dfr_sub['score'].mean()
        s = dfr_sub['score'].std(ddof=ddof)    
        # add it tot the results
        res.loc[len(res)] = ['TOTAL', n, m, s]

    return res

Functions

def me_means_stds(catField, scaleField, categories=None, ddof=1, show_total=True)

Means And Standard Deviations

This function returns the count, mean and standard deviation for each category (and optional total)

Parameters

catField : pandas series or list
data with categories
scaleField : pandas series or list
data with the scores
categories : list, optional
the categories to use from catField
ddof : float, optional
adjustment for numerator in calculation of standard deviation. Use 0 for population or biased sample, 1 (default) for sample
show_total : bool, optional
show the results for all scores.

Returns

Dataframe with:
 
  • category, name of the category
  • n, the count of the category
  • mean, the arithmetic mean of the category
  • std, the standard deviation of the category

Author

Made by P. Stikker

Companion website: https://PeterStatistics.com
YouTube channel: https://www.youtube.com/stikpet
Donations: https://www.patreon.com/bePatron?u=19398076

Expand source code
def me_means_stds(catField, scaleField, categories=None, ddof=1, show_total=True):
    '''
    Means and Standard Deviations
    -----------------------------

    This function returns the count, mean and standard deviation for each category (and optional total)

    Parameters
    ----------
    catField : pandas series or list
        data with categories
    scaleField : pandas series or list
        data with the scores
    categories : list, optional
        the categories to use from catField
    ddof : float, optional
        adjustment for numerator in calculation of standard deviation. Use 0 for population or biased sample, 1 (default) for sample
    show_total : bool, optional
        show the results for all scores.

    Returns
    -------
    Dataframe with:
    
    * *category*, name of the category
    * *n*, the count of the category
    * *mean*, the arithmetic mean of the category
    * *std*, the standard deviation of the category

    Author
    ------
    Made by P. Stikker
    
    Companion website: https://PeterStatistics.com  
    YouTube channel: https://www.youtube.com/stikpet  
    Donations: https://www.patreon.com/bePatron?u=19398076
    
    '''
    #convert to pandas series if needed
    if type(catField) is list:
        catField = pd.Series(catField)
    
    if type(scaleField) is list:
        scaleField = pd.Series(scaleField)
    
    #combine as one dataframe
    dfr_sub = pd.concat([catField, scaleField], axis=1)
    dfr_sub = dfr_sub.dropna()
    dfr_sub.columns = ['category', 'score']

    #remove unused categories
    if categories is not None:
        dfr_sub = dfr_sub[dfr_sub.category.isin(categories)]
    
    #the counts, means and standard deviation of each group
    counts = dfr_sub.groupby('category').count() 
    means = dfr_sub.groupby('category').mean()
    stds = dfr_sub.groupby('category').std(ddof=ddof)

    # create a dataframe from them
    res = pd.concat([counts, means, stds], axis=1)
    res.columns = ['count', 'mean', 'std.']
    res = res.reset_index()

    if show_total:
        # count, mean and standard deviation of all scores
        n = dfr_sub['score'].count()
        m = dfr_sub['score'].mean()
        s = dfr_sub['score'].std(ddof=ddof)    
        # add it tot the results
        res.loc[len(res)] = ['TOTAL', n, m, s]

    return res