Module `stikpetP.measures.meas_means_stds`

Expand source code

import pandas as pd

def me_means_stds(catField, scaleField, categories=None, ddof=1, show_total=True):
    '''
    Means and Standard Deviations
    -----------------------------

    This function returns the count, mean and standard deviation for each category (and optional total)

    Parameters
    ----------
    catField : pandas series or list
        data with categories
    scaleField : pandas series or list
        data with the scores
    categories : list, optional
        the categories to use from catField
    ddof : float, optional
        adjustment for numerator in calculation of standard deviation. Use 0 for population or biased sample, 1 (default) for sample
    show_total : bool, optional
        show the results for all scores.

    Returns
    -------
    Dataframe with:
    
    * *category*, name of the category
    * *n*, the count of the category
    * *mean*, the arithmetic mean of the category
    * *std*, the standard deviation of the category

    Author
    ------
    Made by P. Stikker
    
    Companion website: https://PeterStatistics.com  
    YouTube channel: https://www.youtube.com/stikpet  
    Donations: https://www.patreon.com/bePatron?u=19398076
    
    '''
    #convert to pandas series if needed
    if type(catField) is list:
        catField = pd.Series(catField)
    
    if type(scaleField) is list:
        scaleField = pd.Series(scaleField)
    
    #combine as one dataframe
    dfr_sub = pd.concat([catField, scaleField], axis=1)
    dfr_sub = dfr_sub.dropna()
    dfr_sub.columns = ['category', 'score']

    #remove unused categories
    if categories is not None:
        dfr_sub = dfr_sub[dfr_sub.category.isin(categories)]
    
    #the counts, means and standard deviation of each group
    counts = dfr_sub.groupby('category').count() 
    means = dfr_sub.groupby('category').mean()
    stds = dfr_sub.groupby('category').std(ddof=ddof)

    # create a dataframe from them
    res = pd.concat([counts, means, stds], axis=1)
    res.columns = ['count', 'mean', 'std.']
    res = res.reset_index()

    if show_total:
        # count, mean and standard deviation of all scores
        n = dfr_sub['score'].count()
        m = dfr_sub['score'].mean()
        s = dfr_sub['score'].std(ddof=ddof)    
        # add it tot the results
        res.loc[len(res)] = ['TOTAL', n, m, s]

    return res

Functions

def me_means_stds(catField, scaleField, categories=None, ddof=1, show_total=True)

Means And Standard Deviations

This function returns the count, mean and standard deviation for each category (and optional total)

Parameters

catField : pandas series or list: data with categories
scaleField : pandas series or list: data with the scores
categories : list, optional: the categories to use from catField
ddof : float, optional: adjustment for numerator in calculation of standard deviation. Use 0 for population or biased sample, 1 (default) for sample
show_total : bool, optional: show the results for all scores.

Returns

Dataframe with:

category, name of the category
n, the count of the category
mean, the arithmetic mean of the category
std, the standard deviation of the category

Author

Made by P. Stikker

Companion website: https://PeterStatistics.com
YouTube channel: https://www.youtube.com/stikpet
Donations: https://www.patreon.com/bePatron?u=19398076

Expand source code

def me_means_stds(catField, scaleField, categories=None, ddof=1, show_total=True):
    '''
    Means and Standard Deviations
    -----------------------------

    This function returns the count, mean and standard deviation for each category (and optional total)

    Parameters
    ----------
    catField : pandas series or list
        data with categories
    scaleField : pandas series or list
        data with the scores
    categories : list, optional
        the categories to use from catField
    ddof : float, optional
        adjustment for numerator in calculation of standard deviation. Use 0 for population or biased sample, 1 (default) for sample
    show_total : bool, optional
        show the results for all scores.

    Returns
    -------
    Dataframe with:
    
    * *category*, name of the category
    * *n*, the count of the category
    * *mean*, the arithmetic mean of the category
    * *std*, the standard deviation of the category

    Author
    ------
    Made by P. Stikker
    
    Companion website: https://PeterStatistics.com  
    YouTube channel: https://www.youtube.com/stikpet  
    Donations: https://www.patreon.com/bePatron?u=19398076
    
    '''
    #convert to pandas series if needed
    if type(catField) is list:
        catField = pd.Series(catField)
    
    if type(scaleField) is list:
        scaleField = pd.Series(scaleField)
    
    #combine as one dataframe
    dfr_sub = pd.concat([catField, scaleField], axis=1)
    dfr_sub = dfr_sub.dropna()
    dfr_sub.columns = ['category', 'score']

    #remove unused categories
    if categories is not None:
        dfr_sub = dfr_sub[dfr_sub.category.isin(categories)]
    
    #the counts, means and standard deviation of each group
    counts = dfr_sub.groupby('category').count() 
    means = dfr_sub.groupby('category').mean()
    stds = dfr_sub.groupby('category').std(ddof=ddof)

    # create a dataframe from them
    res = pd.concat([counts, means, stds], axis=1)
    res.columns = ['count', 'mean', 'std.']
    res = res.reset_index()

    if show_total:
        # count, mean and standard deviation of all scores
        n = dfr_sub['score'].count()
        m = dfr_sub['score'].mean()
        s = dfr_sub['score'].std(ddof=ddof)    
        # add it tot the results
        res.loc[len(res)] = ['TOTAL', n, m, s]

    return res