Module stikpetP.visualisations.vis_bar_stacked_single

Expand source code
import pandas as pd
import matplotlib.pyplot as plt

def vi_bar_stacked_single(data, catCoding = None, orientation = "h"):    
    '''
    Single Stacked Bar-Chart
    ------------------------
    
    A regular bar-chart but with the bars on top of each other, instead of next to each other. This is called a compound bar chart, stacked bar chart (Wilkinson, 2005, p. 157) or component bar chart (Zedeck, 2014, p. 54). 
    
    It can be defined as: “a bar chart showing multiple bars stacked at each x-axis category, each representing a value of the stacking variable” (Upton & Cook, 2014, p. 88).

    This function is shown in this [YouTube video](https://youtu.be/j92bv5gFwpI) and the visualisation is also described at [PeterStatistics.com](https://peterstatistics.com/Terms/Visualisations/bar-chart.html)
    
    Parameters
    ----------
    data : list or pandas series 
        the data
    catCoding : dictionary, optional 
        the coding to use
    orientation : {"h", "v"}, optional 
        indicate horizontal or vertical chart, Default is "h"
    
    Notes
    -----
    This function uses the **barh()** function from pyplot

    Before, After and Alternatives
    ------------------------------
    Before the visualisation you might first want to get an impression using a frequency table:
    * [tab_frequency](../other/table_frequency.html#tab_frequency)

    After visualisation you might want some descriptive measures:
    * [me_consensus](../measures/meas_consensus.html#me_consensus) for the Consensus
    * [me_hodges_lehmann_os](../measures/meas_hodges_lehmann_os.html#me_hodges_lehmann_os) for the Hodges-Lehmann Estimate (One-Sample)
    * [me_median](../measures/meas_median.html#me_median) for the Median
    * [me_quantiles](../measures/meas_quantiles.html#me_quantiles) for Quantiles
    * [me_quartiles](../measures/meas_quartiles.html#me_quantiles) for Quartiles / Hinges
    * [me_quartile_range](../measures/meas_quartile_range.html#me_quartile_range) for Interquartile Range, Semi-Interquartile Range and Mid-Quartile Range
    
    or perform a test:
    * [ts_sign_os](../tests/test_sign_os.html#ts_sign_os) for One-Sample Sign Test
    * [ts_trinomial_os](../tests/test_trinomial_os.html#ts_trinomial_os) for One-Sample Trinomial Test
    * [ts_wilcoxon_os](../tests/test_wilcoxon_os.html#ts_wilcoxon_os) for One-Sample Wilcoxon Signed Rank Test
    
    Alternatives for this visualisation could be:
    * [vi_bar_dual_axis](../visualisations/vis_bar_dual_axis.html#vi_bar_dual_axis) for Dual-Axis Bar Chart
    
    References 
    ----------
    Upton, G. J. G., & Cook, I. (2014). *Dictionary of statistics* (3rd ed.). Oxford University Press.
    
    Wilkinson, L. (2005). *The grammar of graphics* (2nd ed). Springer.
    
    Zedeck, S. (Ed.). (2014). *APA dictionary of statistics and research methods*. American Psychological Association.
    
    Author
    ------
    Made by P. Stikker
    
    Companion website: https://PeterStatistics.com  
    YouTube channel: https://www.youtube.com/stikpet  
    Donations: https://www.patreon.com/bePatron?u=19398076
    
    Examples
    ---------    
    Example 1: pandas series
    >>> df2 = pd.read_csv('https://peterstatistics.com/Packages/ExampleData/StudentStatistics.csv', sep=';', low_memory=False, storage_options={'User-Agent': 'Mozilla/5.0'})
    >>> ex1 = df2['Teach_Motivate']
    >>> order = {"Fully Disagree":1, "Disagree":2, "Neither disagree nor agree":3, "Agree":4, "Fully agree":5}
    >>> vi_bar_stacked_single(ex1, catCoding=order);
    >>> vi_bar_stacked_single(ex1, catCoding=order, orientation="v");
    
    Example 2: Numeric data
    >>> ex2 = [1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5]
    >>> vi_bar_stacked_single(ex2);
    >>> ex2 = [1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5]
    >>> vi_bar_stacked_single(ex2);

    
    '''
    if type(data) is list:
        data = pd.Series(data)
        
    data = data.dropna()
    
    if catCoding is not None:
        pd.set_option('future.no_silent_downcasting', True)
        data = data.map(catCoding).astype('Int8')
        categories = catCoding.keys()
    myFreq = data.value_counts()
    myFreq = myFreq.sort_index()
    
    if catCoding is None:
        categories = myFreq.index
    
    myPerc = myFreq/myFreq.sum()*100
    cf = myPerc.cumsum()
    myLabels= myFreq.index.tolist()
    
    if orientation=="v":
        plt.figure(figsize=(2, 5))
        plt.bar(0, myPerc.values[0], edgecolor='white', width = 0.2)
        for i in range(1, len(myFreq)):
            plt.bar(0, myPerc.values[i], bottom=cf.values[i-1], edgecolor='white', width = 0.2)
        plt.legend(categories, bbox_to_anchor=(1.05, 1))
        plt.ylabel('percent')
        frame1 = plt.gca()
        frame1.axes.get_xaxis().set_visible(False)
    else:
        plt.figure(figsize=(5, 2))
        plt.barh(0, myPerc.values[0], edgecolor='white', height = 0.2)
        for i in range(1, len(myFreq)):
            plt.barh(0, myPerc.values[i], left=cf.values[i-1], edgecolor='white', height = 0.2)
        plt.legend(categories, bbox_to_anchor=(1.05, 1))
        plt.xlabel('percent')
        frame1 = plt.gca()
        frame1.axes.get_yaxis().set_visible(False)
        
    plt.show()
        
    return

Functions

def vi_bar_stacked_single(data, catCoding=None, orientation='h')

Single Stacked Bar-Chart

A regular bar-chart but with the bars on top of each other, instead of next to each other. This is called a compound bar chart, stacked bar chart (Wilkinson, 2005, p. 157) or component bar chart (Zedeck, 2014, p. 54).

It can be defined as: “a bar chart showing multiple bars stacked at each x-axis category, each representing a value of the stacking variable” (Upton & Cook, 2014, p. 88).

This function is shown in this YouTube video and the visualisation is also described at PeterStatistics.com

Parameters

data : list or pandas series
the data
catCoding : dictionary, optional
the coding to use
orientation : {"h", "v"}, optional
indicate horizontal or vertical chart, Default is "h"

Notes

This function uses the barh() function from pyplot

Before, After and Alternatives

Before the visualisation you might first want to get an impression using a frequency table: * tab_frequency

After visualisation you might want some descriptive measures: * me_consensus for the Consensus * me_hodges_lehmann_os for the Hodges-Lehmann Estimate (One-Sample) * me_median for the Median * me_quantiles for Quantiles * me_quartiles for Quartiles / Hinges * me_quartile_range for Interquartile Range, Semi-Interquartile Range and Mid-Quartile Range

or perform a test: * ts_sign_os for One-Sample Sign Test * ts_trinomial_os for One-Sample Trinomial Test * ts_wilcoxon_os for One-Sample Wilcoxon Signed Rank Test

Alternatives for this visualisation could be: * vi_bar_dual_axis for Dual-Axis Bar Chart

References

Upton, G. J. G., & Cook, I. (2014). Dictionary of statistics (3rd ed.). Oxford University Press.

Wilkinson, L. (2005). The grammar of graphics (2nd ed). Springer.

Zedeck, S. (Ed.). (2014). APA dictionary of statistics and research methods. American Psychological Association.

Author

Made by P. Stikker

Companion website: https://PeterStatistics.com
YouTube channel: https://www.youtube.com/stikpet
Donations: https://www.patreon.com/bePatron?u=19398076

Examples

Example 1: pandas series

>>> df2 = pd.read_csv('https://peterstatistics.com/Packages/ExampleData/StudentStatistics.csv', sep=';', low_memory=False, storage_options={'User-Agent': 'Mozilla/5.0'})
>>> ex1 = df2['Teach_Motivate']
>>> order = {"Fully Disagree":1, "Disagree":2, "Neither disagree nor agree":3, "Agree":4, "Fully agree":5}
>>> vi_bar_stacked_single(ex1, catCoding=order);
>>> vi_bar_stacked_single(ex1, catCoding=order, orientation="v");

Example 2: Numeric data

>>> ex2 = [1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5]
>>> vi_bar_stacked_single(ex2);
>>> ex2 = [1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5]
>>> vi_bar_stacked_single(ex2);
Expand source code
def vi_bar_stacked_single(data, catCoding = None, orientation = "h"):    
    '''
    Single Stacked Bar-Chart
    ------------------------
    
    A regular bar-chart but with the bars on top of each other, instead of next to each other. This is called a compound bar chart, stacked bar chart (Wilkinson, 2005, p. 157) or component bar chart (Zedeck, 2014, p. 54). 
    
    It can be defined as: “a bar chart showing multiple bars stacked at each x-axis category, each representing a value of the stacking variable” (Upton & Cook, 2014, p. 88).

    This function is shown in this [YouTube video](https://youtu.be/j92bv5gFwpI) and the visualisation is also described at [PeterStatistics.com](https://peterstatistics.com/Terms/Visualisations/bar-chart.html)
    
    Parameters
    ----------
    data : list or pandas series 
        the data
    catCoding : dictionary, optional 
        the coding to use
    orientation : {"h", "v"}, optional 
        indicate horizontal or vertical chart, Default is "h"
    
    Notes
    -----
    This function uses the **barh()** function from pyplot

    Before, After and Alternatives
    ------------------------------
    Before the visualisation you might first want to get an impression using a frequency table:
    * [tab_frequency](../other/table_frequency.html#tab_frequency)

    After visualisation you might want some descriptive measures:
    * [me_consensus](../measures/meas_consensus.html#me_consensus) for the Consensus
    * [me_hodges_lehmann_os](../measures/meas_hodges_lehmann_os.html#me_hodges_lehmann_os) for the Hodges-Lehmann Estimate (One-Sample)
    * [me_median](../measures/meas_median.html#me_median) for the Median
    * [me_quantiles](../measures/meas_quantiles.html#me_quantiles) for Quantiles
    * [me_quartiles](../measures/meas_quartiles.html#me_quantiles) for Quartiles / Hinges
    * [me_quartile_range](../measures/meas_quartile_range.html#me_quartile_range) for Interquartile Range, Semi-Interquartile Range and Mid-Quartile Range
    
    or perform a test:
    * [ts_sign_os](../tests/test_sign_os.html#ts_sign_os) for One-Sample Sign Test
    * [ts_trinomial_os](../tests/test_trinomial_os.html#ts_trinomial_os) for One-Sample Trinomial Test
    * [ts_wilcoxon_os](../tests/test_wilcoxon_os.html#ts_wilcoxon_os) for One-Sample Wilcoxon Signed Rank Test
    
    Alternatives for this visualisation could be:
    * [vi_bar_dual_axis](../visualisations/vis_bar_dual_axis.html#vi_bar_dual_axis) for Dual-Axis Bar Chart
    
    References 
    ----------
    Upton, G. J. G., & Cook, I. (2014). *Dictionary of statistics* (3rd ed.). Oxford University Press.
    
    Wilkinson, L. (2005). *The grammar of graphics* (2nd ed). Springer.
    
    Zedeck, S. (Ed.). (2014). *APA dictionary of statistics and research methods*. American Psychological Association.
    
    Author
    ------
    Made by P. Stikker
    
    Companion website: https://PeterStatistics.com  
    YouTube channel: https://www.youtube.com/stikpet  
    Donations: https://www.patreon.com/bePatron?u=19398076
    
    Examples
    ---------    
    Example 1: pandas series
    >>> df2 = pd.read_csv('https://peterstatistics.com/Packages/ExampleData/StudentStatistics.csv', sep=';', low_memory=False, storage_options={'User-Agent': 'Mozilla/5.0'})
    >>> ex1 = df2['Teach_Motivate']
    >>> order = {"Fully Disagree":1, "Disagree":2, "Neither disagree nor agree":3, "Agree":4, "Fully agree":5}
    >>> vi_bar_stacked_single(ex1, catCoding=order);
    >>> vi_bar_stacked_single(ex1, catCoding=order, orientation="v");
    
    Example 2: Numeric data
    >>> ex2 = [1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5]
    >>> vi_bar_stacked_single(ex2);
    >>> ex2 = [1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5]
    >>> vi_bar_stacked_single(ex2);

    
    '''
    if type(data) is list:
        data = pd.Series(data)
        
    data = data.dropna()
    
    if catCoding is not None:
        pd.set_option('future.no_silent_downcasting', True)
        data = data.map(catCoding).astype('Int8')
        categories = catCoding.keys()
    myFreq = data.value_counts()
    myFreq = myFreq.sort_index()
    
    if catCoding is None:
        categories = myFreq.index
    
    myPerc = myFreq/myFreq.sum()*100
    cf = myPerc.cumsum()
    myLabels= myFreq.index.tolist()
    
    if orientation=="v":
        plt.figure(figsize=(2, 5))
        plt.bar(0, myPerc.values[0], edgecolor='white', width = 0.2)
        for i in range(1, len(myFreq)):
            plt.bar(0, myPerc.values[i], bottom=cf.values[i-1], edgecolor='white', width = 0.2)
        plt.legend(categories, bbox_to_anchor=(1.05, 1))
        plt.ylabel('percent')
        frame1 = plt.gca()
        frame1.axes.get_xaxis().set_visible(False)
    else:
        plt.figure(figsize=(5, 2))
        plt.barh(0, myPerc.values[0], edgecolor='white', height = 0.2)
        for i in range(1, len(myFreq)):
            plt.barh(0, myPerc.values[i], left=cf.values[i-1], edgecolor='white', height = 0.2)
        plt.legend(categories, bbox_to_anchor=(1.05, 1))
        plt.xlabel('percent')
        frame1 = plt.gca()
        frame1.axes.get_yaxis().set_visible(False)
        
    plt.show()
        
    return