Module stikpetP.measures.meas_means_stds
Expand source code
import pandas as pd
def me_means_stds(catField, scaleField, categories=None, ddof=1, show_total=True):
'''
Means and Standard Deviations
-----------------------------
This function returns the count, mean and standard deviation for each category (and optional total)
Parameters
----------
catField : pandas series or list
data with categories
scaleField : pandas series or list
data with the scores
categories : list, optional
the categories to use from catField
ddof : float, optional
adjustment for numerator in calculation of standard deviation. Use 0 for population or biased sample, 1 (default) for sample
show_total : bool, optional
show the results for all scores.
Returns
-------
Dataframe with:
* *category*, name of the category
* *n*, the count of the category
* *mean*, the arithmetic mean of the category
* *std*, the standard deviation of the category
Author
------
Made by P. Stikker
Companion website: https://PeterStatistics.com
YouTube channel: https://www.youtube.com/stikpet
Donations: https://www.patreon.com/bePatron?u=19398076
'''
#convert to pandas series if needed
if type(catField) is list:
catField = pd.Series(catField)
if type(scaleField) is list:
scaleField = pd.Series(scaleField)
#combine as one dataframe
dfr_sub = pd.concat([catField, scaleField], axis=1)
dfr_sub = dfr_sub.dropna()
dfr_sub.columns = ['category', 'score']
#remove unused categories
if categories is not None:
dfr_sub = dfr_sub[dfr_sub.category.isin(categories)]
#the counts, means and standard deviation of each group
counts = dfr_sub.groupby('category').count()
means = dfr_sub.groupby('category').mean()
stds = dfr_sub.groupby('category').std(ddof=ddof)
# create a dataframe from them
res = pd.concat([counts, means, stds], axis=1)
res.columns = ['count', 'mean', 'std.']
res = res.reset_index()
if show_total:
# count, mean and standard deviation of all scores
n = dfr_sub['score'].count()
m = dfr_sub['score'].mean()
s = dfr_sub['score'].std(ddof=ddof)
# add it tot the results
res.loc[len(res)] = ['TOTAL', n, m, s]
return res
Functions
def me_means_stds(catField, scaleField, categories=None, ddof=1, show_total=True)
-
Means And Standard Deviations
This function returns the count, mean and standard deviation for each category (and optional total)
Parameters
catField
:pandas series
orlist
- data with categories
scaleField
:pandas series
orlist
- data with the scores
categories
:list
, optional- the categories to use from catField
ddof
:float
, optional- adjustment for numerator in calculation of standard deviation. Use 0 for population or biased sample, 1 (default) for sample
show_total
:bool
, optional- show the results for all scores.
Returns
Dataframe with:
- category, name of the category
- n, the count of the category
- mean, the arithmetic mean of the category
- std, the standard deviation of the category
Author
Made by P. Stikker
Companion website: https://PeterStatistics.com
YouTube channel: https://www.youtube.com/stikpet
Donations: https://www.patreon.com/bePatron?u=19398076Expand source code
def me_means_stds(catField, scaleField, categories=None, ddof=1, show_total=True): ''' Means and Standard Deviations ----------------------------- This function returns the count, mean and standard deviation for each category (and optional total) Parameters ---------- catField : pandas series or list data with categories scaleField : pandas series or list data with the scores categories : list, optional the categories to use from catField ddof : float, optional adjustment for numerator in calculation of standard deviation. Use 0 for population or biased sample, 1 (default) for sample show_total : bool, optional show the results for all scores. Returns ------- Dataframe with: * *category*, name of the category * *n*, the count of the category * *mean*, the arithmetic mean of the category * *std*, the standard deviation of the category Author ------ Made by P. Stikker Companion website: https://PeterStatistics.com YouTube channel: https://www.youtube.com/stikpet Donations: https://www.patreon.com/bePatron?u=19398076 ''' #convert to pandas series if needed if type(catField) is list: catField = pd.Series(catField) if type(scaleField) is list: scaleField = pd.Series(scaleField) #combine as one dataframe dfr_sub = pd.concat([catField, scaleField], axis=1) dfr_sub = dfr_sub.dropna() dfr_sub.columns = ['category', 'score'] #remove unused categories if categories is not None: dfr_sub = dfr_sub[dfr_sub.category.isin(categories)] #the counts, means and standard deviation of each group counts = dfr_sub.groupby('category').count() means = dfr_sub.groupby('category').mean() stds = dfr_sub.groupby('category').std(ddof=ddof) # create a dataframe from them res = pd.concat([counts, means, stds], axis=1) res.columns = ['count', 'mean', 'std.'] res = res.reset_index() if show_total: # count, mean and standard deviation of all scores n = dfr_sub['score'].count() m = dfr_sub['score'].mean() s = dfr_sub['score'].std(ddof=ddof) # add it tot the results res.loc[len(res)] = ['TOTAL', n, m, s] return res