1. Bar Plots#

1.1. Two axes combined#

# Install and load packages

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import wbgapi as wb
import matplotlib.patches as patches
import warnings
warnings.filterwarnings('ignore')
# Load an example dataset

data = pd.read_stata("https://github.com/d2cml-ai/python_visual_library/blob/main/data/BarPlotsCombineTwoAxes.dta?raw=true")

data2 = data[data.food_group.isin(['animal', 'fruit', 'grain', 'veg', 'starch', 'processed_sugar'])]
# Plot

def twin_barplot(x,y,color, alpha, **kwargs):
    ax = plt.twinx()
    sns.barplot(x=x,y=y,color=color,alpha = alpha, **kwargs, ax=ax)


g2 = sns.FacetGrid(data2, col = "food_group", height=4, col_wrap=3)

g2.map(sns.lineplot, "int1mo", "total_exp", color = 'black')

g2.map(twin_barplot, "int1mo", "number_group", color = 'grey', alpha = 0.5)

plt.plot();
../_images/dd45669055e1a11b94f64d5a286a48a06b33627ea0407b49c948a1681d7bf065.png

1.2. Multiple variables#

#Load an example dataset

data = pd.read_stata("https://github.com/d2cml-ai/python_visual_library/blob/main/data/BarPlotsMultipleVariables.dta?raw=true")
# Collapse the dataset across each treatment group

data2= (data
       .groupby(['treatment_group'])
       .agg({'treatment_group':'mean', 
             'bl_w_Main_Paddy_Prod':'mean','ml_w_Main_Paddy_Prod':'mean', 
             'bl_w_Spring_Winter_Potato_Prod':'mean', 'ml_w_Spring_Winter_Potato_Prod':'mean',
             'bl_w_Summer_Maize_Prod':'mean', 'ml_w_Summer_Maize_Prod':'mean',
            }))

data2_reset=data2.reset_index(drop=True)


# Reshape data
data3 = pd.melt(data2_reset, id_vars=['treatment_group'])

cond = data3['variable'].str.contains('bl', case = False)
data3['name']= np.where(cond, 'bl', 'ml')

def change (q):
    variable = str(q['variable'])
    if variable == 'ml_w_Main_Paddy_Prod' or 'bl_w_Main_Paddy_Prod':
        if 'Main' in variable:
            return ('Main_Paddy_Prod')
        elif 'Potato' in variable:
            return ('Spring_Winter_Potato_Prod')
        elif 'Maize' in variable:
            return ('Summer_Maize_Prod')
    else:
        pass
data3["variable"] = data3.apply(change, axis=1)

data3['name.group'] = data3.name.astype(str).str.cat(data3[['treatment_group']].astype(str), sep = '.')
# Plot

plt.figure(figsize=(14,10))

ax = sns.barplot(data = data3, x = 'value', y = 'variable', hue = 'name.group', palette ='pastel')

plt.bar_label(ax.containers[0], fmt='%.1f', fontsize = 14)
plt.bar_label(ax.containers[1], fmt='%.1f', fontsize = 14)
plt.bar_label(ax.containers[2], fmt='%.1f', fontsize = 14)
plt.bar_label(ax.containers[3], fmt='%.1f', fontsize = 14)

plt.legend(loc="lower right", ncol = len(ax.lines))
plt.show()
../_images/4222520b13c876ab7c9ec91f4171026f7781487596b819ab5a75b8af504529fa.png

1.3. Two variables#

# Load an example dataset 

data = pd.read_stata("https://github.com/d2cml-ai/python_visual_library/blob/main/data/BarPlotsTwoVariables.dta?raw=true")
# Construct an interaction 

data['sc'] = data.study.astype(str).str.cat(data[['case']].astype(str), sep = '.')
# Plot 

plt.figure(figsize=(14,10))

ax = sns.barplot(data=data, x = 'sc', y = 'treat_correct', hue = 'type', palette = 'pastel')

plt.ylabel('Providers ordering correct treatment $→$', fontsize=20)
plt.xlabel('', fontsize=20)

plt.bar_label(ax.containers[0], fmt='%.2f', fontsize = 15)
plt.bar_label(ax.containers[1], fmt='%.2f', fontsize = 15)

plt.legend(title = 'Measurement', loc="upper center", ncol = len(ax.lines), fontsize=14)
plt.show()
../_images/8e7eeb9eca82ae597a1c453fa764d4e749ffd1935ecfa411b1a706df793c7ef1.png

1.4. Horizontal multiple variables#

# Load an example dataset

data = pd.read_stata("https://github.com/d2cml-ai/python_visual_library/blob/main/data/HorizontalBarMultipleVars.dta?raw=true")
# Collapse the dataset across each treatment group

data['refer'] = data['refer'].map({'No': 0, 'Yes': 1})
data[["refer"]] = data[["refer"]].apply(pd.to_numeric)
data['med_class_any_16'] = data['med_class_any_16'].map({'No': 0, 'Steroids': 1})
data[["med_class_any_16"]] = data[["med_class_any_16"]].apply(pd.to_numeric)
data['med_class_any_6'] = data['med_class_any_6'].map({'No': 0, 'Antibiotics': 1})
data[["med_class_any_6"]] = data[["med_class_any_6"]].apply(pd.to_numeric)
data[["med_any"]] = data[["med_any"]].apply(pd.to_numeric)

data2= (data
       .groupby(['facility_type'])
       .agg({'facility_type':'first','as_correct':'mean','ch_correct': 'mean','cp_correct' : 'mean',
             'tb_correct': 'mean', 'med_any': 'mean',
            'checklist': 'mean', 'refer': 'mean', 
            'med_any':'mean', 'med_class_any_6': 'mean',
            'med_class_any_16': 'mean'}))

data2_reset=data2.reset_index(drop=True)


# Reshape data
data3 = pd.melt(data2_reset, id_vars=['facility_type'], value_vars=['as_correct', 'ch_correct', 'cp_correct', 'tb_correct','med_any','checklist','refer','med_class_any_6','med_class_any_16'])

data3["variable"] = data3["variable"].astype("category")
data3["variable"] = data3["variable"].cat.rename_categories(["Asthma: Inhaler/Bronchodilator",
                                                           "Child Diarrhoea: ORS",
                                                             "Checklist",
                                                           "Chest Pain: Referral/Aspirin/ECG",
                                                             "Any medication",
                                                             "Steroids",
                                                             "Antibiotics",
                                                             "Referred (non-diarrhea)",
                                                           "Tuberculosis: AFB Smear"])
# Plot

plt.figure(figsize=(14,10))

ax = sns.barplot(data=data3, x = 'value', y = 'variable', hue = 'facility_type', palette ='pastel')

plt.bar_label(ax.containers[0], fmt='%.2f', fontsize = 11)
plt.bar_label(ax.containers[1], fmt='%.2f', fontsize = 11)
plt.bar_label(ax.containers[2], fmt='%.2f', fontsize = 11)

plt.legend(loc="lower right")
plt.show()
../_images/03a793cbcb1170a41cbb33477233eb126bf2371fc1b6a5397df015535c665a5a.png

1.5. Decreasing order#

# Load example datasets 

data = wb.data.DataFrame('NY.GDP.PCAP.KD', time = 2018)
country_code = wb.economy.DataFrame()

data2 = pd.concat([data, country_code], axis=1)
data3 = data2[data2['aggregate'] == False]

data4 = data3.nsmallest(30, 'NY.GDP.PCAP.KD', keep = 'all')
#Plot

plt.figure(figsize=(14,10))

ax = sns.barplot(data=data4, x = 'NY.GDP.PCAP.KD', y = 'name', hue = 'region', 
                 order=data4.sort_values('NY.GDP.PCAP.KD',ascending = False).name,
                palette='pastel')
plt.bar_label(ax.containers[0], fmt='%.1f', fontsize = 11)
plt.bar_label(ax.containers[1], fmt='%.1f', fontsize = 11)
plt.bar_label(ax.containers[2], fmt='%.1f', fontsize = 11)
plt.bar_label(ax.containers[3], fmt='%.1f', fontsize = 11)
plt.bar_label(ax.containers[4], fmt='%.1f', fontsize = 11)

ax.set_xlim(1,1500)

plt.ylabel('')
plt.xlabel('GDP per capita (constant 2000 US$) in 2018', fontsize = 14)
plt.legend(title = 'Region', loc = "lower right", fontsize=12)
plt.show()
../_images/57fae305f6f5e6d43d834799af2814b95d89ae1c931a7498c40b0d46d7de5d11.png

1.6. Horizontal bar plot#

#  Load example dataset

data1 = pd.read_stata("https://github.com/d2cml-ai/python_visual_library/blob/main/data/HorizontalBarPlot1.dta?raw=true")
data2 = pd.read_stata("https://github.com/d2cml-ai/python_visual_library/blob/main/data/HorizontalBarPlot2.dta?raw=true")

data = pd.concat([data2, data1], axis=0)
# Collapse data

data4= (data1
       .groupby(['study'])
       .agg({'study':'first','hours':'mean'}))
# Plot

plt.figure(figsize=(14,10))

ax = sns.barplot(data = data4, x = 'hours', y = 'study', color = 'grey')

plt.bar_label(ax.containers[0], fmt='%.1f', fontsize = 14)

plt.show()
../_images/2e9251797f5d4bc911743965775cff097bf1afdb2e976ad688c5312b0bd7b624.png

1.7. Horizontal decreasing order#

# Load an example data set

data = pd.read_stata("https://github.com/d2cml-ai/python_visual_library/blob/main/data/HorizontalBetterbar.dta?raw=true")
data['sp1_h1'] = data['sp1_h1'].map({'No': 0, 'Yes': 1})
data[["sp1_h1"]] = data[["sp1_h1"]].apply(pd.to_numeric)

data['sp1_h2'] = data['sp1_h2'].map({'No': 0, 'Yes': 1})
data[["sp1_h2"]] = data[["sp1_h2"]].apply(pd.to_numeric)

data['sp1_h3'] = data['sp1_h3'].map({'No': 0, 'Yes': 1})
data[["sp1_h3"]] = data[["sp1_h3"]].apply(pd.to_numeric)

data['sp1_h4'] = data['sp1_h4'].map({'No': 0, 'Yes': 1})
data[["sp1_h4"]] = data[["sp1_h4"]].apply(pd.to_numeric)

data['sp1_h5'] = data['sp1_h5'].map({'No': 0, 'Yes': 1})
data[["sp1_h5"]] = data[["sp1_h5"]].apply(pd.to_numeric)

data['sp1_h7'] = data['sp1_h7'].map({'No': 0, 'Yes': 1})
data[["sp1_h7"]] = data[["sp1_h7"]].apply(pd.to_numeric)

data['sp1_h8'] = data['sp1_h8'].map({'No': 0, 'Yes': 1})
data[["sp1_h8"]] = data[["sp1_h8"]].apply(pd.to_numeric)

data['sp1_h9'] = data['sp1_h9'].map({'No': 0, 'Yes': 1})
data[["sp1_h9"]] = data[["sp1_h9"]].apply(pd.to_numeric)

data['sp1_h10'] = data['sp1_h10'].map({'No': 0, 'Yes': 1})
data[["sp1_h10"]] = data[["sp1_h10"]].apply(pd.to_numeric)

data['sp1_h11'] = data['sp1_h11'].map({'No': 0, 'Yes': 1})
data[["sp1_h11"]] = data[["sp1_h11"]].apply(pd.to_numeric)

data['sp1_h12'] = data['sp1_h12'].map({'No': 0, 'Yes': 1})
data[["sp1_h12"]] = data[["sp1_h12"]].apply(pd.to_numeric)

data['sp1_h13'] = data['sp1_h13'].map({'No': 0, 'Yes': 1})
data[["sp1_h13"]] = data[["sp1_h13"]].apply(pd.to_numeric)

data['sp1_h14'] = data['sp1_h14'].map({'No': 0, 'Yes': 1})
data[["sp1_h14"]] = data[["sp1_h14"]].apply(pd.to_numeric)

data['sp1_h15'] = data['sp1_h15'].map({'No': 0, 'Yes': 1})
data[["sp1_h15"]] = data[["sp1_h15"]].apply(pd.to_numeric)

data['sp1_h16'] = data['sp1_h16'].map({'No': 0, 'Yes': 1})
data[["sp1_h16"]] = data[["sp1_h16"]].apply(pd.to_numeric)

data['sp1_h17'] = data['sp1_h17'].map({'No': 0, 'Yes': 1})
data[["sp1_h17"]] = data[["sp1_h17"]].apply(pd.to_numeric)

data['sp1_h18'] = data['sp1_h18'].map({'No': 0, 'Yes': 1})
data[["sp1_h18"]] = data[["sp1_h18"]].apply(pd.to_numeric)

data['sp1_h19'] = data['sp1_h19'].map({'No': 0, 'Yes': 1})
data[["sp1_h19"]] = data[["sp1_h19"]].apply(pd.to_numeric)

data['sp1_h20'] = data['sp1_h20'].map({'No': 0, 'Yes': 1})
data[["sp1_h20"]] = data[["sp1_h20"]].apply(pd.to_numeric)

data['sp1_h21'] = data['sp1_h21'].map({'No': 0, 'Yes': 1})
data[["sp1_h21"]] = data[["sp1_h21"]].apply(pd.to_numeric)

data['sp1_e1'] = data['sp1_e1'].map({'No': 0, 'Yes': 1})
data[["sp1_e1"]] = data[["sp1_e1"]].apply(pd.to_numeric)

data['sp1_e2'] = data['sp1_e2'].map({'No': 0, 'Yes': 1})
data[["sp1_e2"]] = data[["sp1_e2"]].apply(pd.to_numeric)

data['sp1_e3'] = data['sp1_e3'].map({'No': 0, 'Yes': 1})
data[["sp1_e3"]] = data[["sp1_e3"]].apply(pd.to_numeric)

data['sp1_e5'] = data['sp1_e5'].map({'No': 0, 'Yes': 1})
data[["sp1_e5"]] = data[["sp1_e5"]].apply(pd.to_numeric)

data['sp1_e6'] = data['sp1_e6'].map({'No': 0, 'Yes': 1})
data[["sp1_e6"]] = data[["sp1_e6"]].apply(pd.to_numeric)
# Select data

data2 = data[["sp1_h1", "sp1_h2", "sp1_h3", "sp1_h4", "sp1_h5", "sp1_h7", "sp1_h8", 
              "sp1_h9", "sp1_h10", "sp1_h11", "sp1_h12", "sp1_h13", "sp1_h14", "sp1_h15",
              "sp1_h16", "sp1_h17", "sp1_h18", "sp1_h19", "sp1_h20", "sp1_h21", 
              "sp1_e1", "sp1_e2", "sp1_e3", "sp1_e5", "sp1_e6"]]


# Collapse data

data3= data2.agg(['mean'])


# Reshape data

data4 = pd.melt(data3, value_vars=["sp1_h1", "sp1_h2", "sp1_h3", "sp1_h4", "sp1_h5", "sp1_h7", "sp1_h8", 
              "sp1_h9", "sp1_h10", "sp1_h11", "sp1_h12", "sp1_h13", "sp1_h14", "sp1_h15",
              "sp1_h16", "sp1_h17", "sp1_h18", "sp1_h19", "sp1_h20", "sp1_h21", 
              "sp1_e1", "sp1_e2", "sp1_e3", "sp1_e5", "sp1_e6"])
# Plot

plt.figure(figsize=(14,10))

ax = sns.barplot(data = data4, x = 'value', y = 'variable', color = 'grey',
                order=data4.sort_values('value',ascending = False).variable)

plt.bar_label(ax.containers[0], fmt='%.2f', fontsize = 11)
plt.show()
../_images/60b220d38b5c611b3d809c7a3a4f1f9f086c53ad7ae19dbb3eb08f822c8ee708.png

1.8. Pre-defined error bars#

# Load an example dataset

data = pd.read_stata("https://github.com/d2cml-ai/python_visual_library/blob/main/data/HorizontalGroupSE.dta?raw=true")
data['dr_3'] = data['dr_3'].map({'No Referral': 0, 'Referral': 1})
data[["dr_3"]] = data[["dr_3"]].apply(pd.to_numeric)
data['med_b2_any_antibiotic'] = data['med_b2_any_antibiotic'].map({'No Antibiotic': 0, 'Gave Antibiotic': 1})
data[["med_b2_any_antibiotic"]] = data[["med_b2_any_antibiotic"]].apply(pd.to_numeric)
data['med_b2_any_steroid'] = data['med_b2_any_steroid'].map({'No Steroid': 0, 'Gave Steroid': 1})
data[["med_b2_any_steroid"]] = data[["med_b2_any_steroid"]].apply(pd.to_numeric)
data['med_l_any_2'] = data['med_l_any_2'].map({'No': 0, 'Fluoroquinolone': 1})
data[["med_l_any_2"]] = data[["med_l_any_2"]].apply(pd.to_numeric)
data['med_l_any_1'] = data['med_l_any_1'].map({'No': 0})
data[["med_l_any_1"]] = data[["med_l_any_1"]].apply(pd.to_numeric)
# Reshape and collapse data for means

data2= (data
       .groupby(['city'])
       .agg({'city':'first', "dr_3":'mean', 
             "correct_treatment":'mean', "med_b2_any_antibiotic":'mean', 
             "med_b2_any_steroid":'mean', "med_b2_any_antister":'mean', 
              "med_l_any_2":'mean', "med_b2_any_schedule_h":'mean', 
             "med_b2_any_schedule_h1":'mean', "med_b2_any_schedule_x":'mean', 
             "med_l_any_1":'mean'}))

data2_2 = pd.melt(data2, id_vars=['city'], value_vars=['dr_3', 'correct_treatment', 'med_b2_any_antibiotic', 'med_b2_any_steroid', 'med_b2_any_antister', 'med_l_any_2', 'med_b2_any_schedule_h', 'med_b2_any_schedule_h1', 'med_b2_any_schedule_x', 'med_l_any_1'])
data2_2.rename(columns={'value': 'mean'}, inplace=True)


# Reshape and collapse data for error bars

data3= (data
       .groupby(['city'])
       .agg({'city':'first', "dr_3":'sem', 
             "correct_treatment":'sem', "med_b2_any_antibiotic":'sem', 
             "med_b2_any_steroid":'sem', "med_b2_any_antister":'sem', 
              "med_l_any_2":'sem', "med_b2_any_schedule_h":'sem', 
             "med_b2_any_schedule_h1":'sem', "med_b2_any_schedule_x":'sem', 
             "med_l_any_1":'sem'}))

data3_2 = pd.melt(data3, id_vars=['city'], value_vars=['dr_3', 'correct_treatment', 'med_b2_any_antibiotic', 'med_b2_any_steroid', 'med_b2_any_antister', 'med_l_any_2', 'med_b2_any_schedule_h', 'med_b2_any_schedule_h1', 'med_b2_any_schedule_x', 'med_l_any_1'])
data3_2.rename(columns={'value': 'sem', 'city': 'city2', 'variable': 'variable2'}, inplace=True)


# Concat datas

dataf = pd.concat([data2_2, data3_2], axis=1)
# Plot

dataf = dataf.sort_values(['city', 'variable'])
plt.figure(figsize=(14,10))

ax = sns.barplot(data=dataf, y = 'variable', x ='mean', hue ='city', 
                 palette = 'pastel')

y_coords = [p.get_y() + 0.5 * p.get_height() for p in ax.patches]
x_coords = [p.get_width() for p in ax.patches]
ax.errorbar(x=x_coords, y=y_coords, xerr=dataf["sem"], fmt="none", c="k")

plt.bar_label(ax.containers[0], fmt='%.2f', fontsize = 11, padding=60)
plt.bar_label(ax.containers[1], fmt='%.2f', fontsize = 11, padding=30)
plt.bar_label(ax.containers[2], fmt='%.2f', fontsize = 11, padding=30)

plt.show()
../_images/e1240f148bc46271b249ed297e5c358356c622e61e5fabe00a71b7732f1177af.png

1.9. Stacked#

# Load an example data set

data = pd.read_stata("https://github.com/d2cml-ai/python_visual_library/blob/main/data/StackBarGraphs.dta?raw=true")
data['med_t'] = data.med_b2_antister_cat_1.astype(str).str.cat(data[['med_b2_antister_cat_2', 
                                                                     'med_b2_antister_cat_3',
                                                                    'med_b2_antister_cat_4',
                                                                    'med_b2_antister_cat_5']].astype(str), sep = '.')

data["med_t"] = data["med_t"].astype("category")
data["med_t"] = data["med_t"].cat.rename_categories(["No antibiotic or steroid", 
                                                     "Antibiotic and steroid",
                                                    "Steroid",
                                                    "No medication",
                                                    "Antibiotic"])
plt.figure(figsize=(10,7))

hue_order = data.med_t.unique()[::-1]

g = sns.FacetGrid(data, col = "dr_3", height=10, sharex=False);
g.map_dataframe(sns.histplot, data = data, x = "checkgroup", hue = "med_t",
                multiple="fill", stat="proportion", discrete=True, shrink= 0.7)

labels = hue_order
colors = sns.color_palette('deep').as_hex()[:len(labels)]
handles = [patches.Patch(color = col, label = lab) for col, lab in zip(colors, labels)]
plt.legend(handles = handles, title = '', loc = 'center left', fontsize = 15)
plt.show();
<Figure size 1000x700 with 0 Axes>
../_images/532037587d3737af4e1ae5a24eadb1a2bb112b590c9305484bcaa1dc6347e968.png