How to add additional text to matplotlib annotations

Question

I have used seaborn's titanic dataset as a proxy for my very large dataset to create the chart and data based on that.

The following code runs without any errors:

import seaborn as sns
import pandas as pd
import numpy as np
sns.set_theme(style="darkgrid")

# Load the example Titanic dataset
df = sns.load_dataset("titanic")

# split fare into decile groups and order them
df['fare_grp'] = pd.qcut(df['fare'], q=10,labels=None, retbins=False, precision=0).astype(str)
df.groupby(['fare_grp'],dropna=False).size()
df['fare_grp_num'] = pd.qcut(df['fare'], q=10,labels=False, retbins=False, precision=0).astype(str)
df.groupby(['fare_grp_num'],dropna=False).size()
df['fare_ord_grp'] = df['fare_grp_num'] + ' ' +df['fare_grp']
df['fare_ord_grp']

# set variables
target = 'survived'
ydim = 'fare_ord_grp'
xdim = 'embark_town'

#del [result]

non_events = pd.DataFrame(df[df[target]==0].groupby([ydim,xdim],as_index=False, dropna=False)[target].count()).rename(columns={target: 'non_events'})
non_events[xdim]=non_events[xdim].replace(np.nan, 'Missing', regex=True)
non_events[ydim]=non_events[ydim].replace(np.nan, 'Missing', regex=True)
non_events_total = pd.DataFrame(df[df[target]==0].groupby([xdim],dropna=False,as_index=False)[target].count()).rename(columns={target: 'non_events_total_by_xdim'}).replace(np.nan, 'Missing', regex=True)

events = pd.DataFrame(df[df[target]==1].groupby([ydim,xdim],as_index=False, dropna=False)[target].count()).rename(columns={target: 'events'})
events[xdim]=events[xdim].replace(np.nan, 'Missing', regex=True)
events[ydim]=events[ydim].replace(np.nan, 'Missing', regex=True)
events_total = pd.DataFrame(df[df[target]==1].groupby([xdim],dropna=False,as_index=False)[target].count()).rename(columns={target: 'events_total_by_xdim'}).replace(np.nan, 'Missing', regex=True)

grand_total = pd.DataFrame(df.groupby([xdim],dropna=False,as_index=False)[target].count()).rename(columns={target: 'total_by_xdim'}).replace(np.nan, 'Missing', regex=True)

grand_total=grand_total.merge(non_events_total, how='left', on=xdim).merge(events_total, how='left', on=xdim)

result = pd.merge(non_events, events, how="outer",on=[ydim,xdim])

result['total'] = result['non_events'].fillna(0) + result['events'].fillna(0)
result[xdim] = result[xdim].replace(np.nan, 'Missing', regex=True)
result = pd.merge(result, grand_total, how="left",on=[xdim])

result['survival rate %'] = round(result['events']/result['total']*100,2)
result['% event dist by xdim'] = round(result['events']/result['events_total_by_xdim']*100,2)
result['% non-event dist by xdim'] = round(result['non_events']/result['non_events_total_by_xdim']*100,2)
result['% total dist by xdim'] = round(result['total']/result['total_by_xdim']*100,2)

display(result)
value_name1 = "% dist by " + str(xdim)
dfl = pd.melt(result, id_vars=[ydim, xdim],value_vars =['% total dist by xdim'], var_name = 'Type',value_name=value_name1).drop(columns='Type')
dfl2 = dfl.pivot(index=ydim, columns=xdim, values=value_name1)
print(dfl2)
title1 = "% dist by " + str(xdim)
ax=dfl2.T.plot(kind='bar', stacked=True, rot=1, figsize=(8, 8), title=title1)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45)
ax.legend(bbox_to_anchor=(1.0, 1.0),title = 'Fare Range')
ax.set_ylabel('% Dist')
for p in ax.patches:
    width, height = p.get_width(), p.get_height()
    x, y = p.get_xy() 
    ax.text(x+width/2, y+height/2,'{:.0f}%'.format(height),horizontalalignment='center', verticalalignment='center')

It produces the following stacked percent bar chart, which shows the % of total distribution by embark town.

I also want to show the survival rate along with the %distribution in each block. For example, for Queenstown, fare range 1 (7.6, 7.9], the % total distribution is 56%. I want to display the survival rate 37.21% as (56%, 37.21%). I am not able to figure it out. Kindly offer any suggestions. Thanks.

Here is the output summary table for reference

	fare_ord_grp	embark_town	non_events	events	total	total_by_xdim	non_events_total_by_xdim	events_total_by_xdim	survival rate %	% event dist by xdim	% non-event dist by xdim	% total dist by xdim
0	0 (-0.1,7.6]	Cherbourg	22	7	29	168	75	93	24.14	7.53	29.33	17.26
1	0 (-0.1,7.6]	Queenstown	4	NaN	4	77	47	30	NaN	NaN	8.51	5.19
2	0 (-0.1,7.6]	Southampton	53	6	59	644	427	217	10.17	2.76	12.41	9.16
3	1 (7.6,7.9]	Queenstown	27	16	43	77	47	30	37.21	53.33	57.45	55.84
4	1 (7.6,7.9]	Southampton	34	10	44	644	427	217	22.73	4.61	7.96	6.83
5	2 (7.9,8]	Cherbourg	4	1	5	168	75	93	20	1.08	5.33	2.98
6	2 (7.9,8]	Southampton	83	13	96	644	427	217	13.54	5.99	19.44	14.91
7	3 (8.0,10.5]	Cherbourg	2	1	3	168	75	93	33.33	1.08	2.67	1.79
8	3 (8.0,10.5]	Queenstown	2	NaN	2	77	47	30	NaN	NaN	4.26	2.6
9	3 (8.0,10.5]	Southampton	56	17	73	644	427	217	23.29	7.83	13.11	11.34
10	4 (10.5,14.5]	Cherbourg	7	8	15	168	75	93	53.33	8.6	9.33	8.93
11	4 (10.5,14.5]	Queenstown	1	2	3	77	47	30	66.67	6.67	2.13	3.9
12	4 (10.5,14.5]	Southampton	40	26	66	644	427	217	39.39	11.98	9.37	10.25
13	5 (14.5,21.7]	Cherbourg	9	10	19	168	75	93	52.63	10.75	12	11.31
14	5 (14.5,21.7]	Queenstown	5	3	8	77	47	30	37.5	10	10.64	10.39
15	5 (14.5,21.7]	Southampton	37	24	61	644	427	217	39.34	11.06	8.67	9.47
16	6 (21.7,27]	Cherbourg	1	4	5	168	75	93	80	4.3	1.33	2.98
17	6 (21.7,27]	Queenstown	2	3	5	77	47	30	60	10	4.26	6.49
18	6 (21.7,27]	Southampton	40	39	79	644	427	217	49.37	17.97	9.37	12.27
19	7 (27.0,39.7]	Cherbourg	14	10	24	168	75	93	41.67	10.75	18.67	14.29
20	7 (27.0,39.7]	Queenstown	5	NaN	5	77	47	30	NaN	NaN	10.64	6.49
21	7 (27.0,39.7]	Southampton	38	24	62	644	427	217	38.71	11.06	8.9	9.63
22	8 (39.7,78]	Cherbourg	5	19	24	168	75	93	79.17	20.43	6.67	14.29
23	8 (39.7,78]	Southampton	37	28	65	644	427	217	43.08	12.9	8.67	10.09
24	9 (78.0,512.3]	Cherbourg	11	33	44	168	75	93	75	35.48	14.67	26.19
25	9 (78.0,512.3]	Queenstown	1	1	2	77	47	30	50	3.33	2.13	2.6
26	9 (78.0,512.3]	Southampton	9	30	39	644	427	217	76.92	13.82	2.11	6.06
27	2 (7.9,8]	Queenstown	NaN	5	5	77	47	30	100	16.67	NaN	6.49
28	9 (78.0,512.3]	Missing	NaN	2	2	2	NaN	2	100	100	NaN	100

Trenton McKinney · Accepted Answer · 2021-05-11 02:53:55Z

dfl2.T is being plotted, but 'survival rate %' is in result. As such, the indices for the values from dfl2.T do not correspond with 'survival rate %'.
Because all of values in result['% total dist by xdim'] are not unique, we can't use a dict of matched key-values.
Create a corresponding pivoted DataFrame for 'survival rate %', and then flatten it. All of the values will be in the same order as the '% total dist by xdim' values from dfl2.T. As such, they can be indexed.
With respect to dfl2.T, the plot API plots in column order, which means .flatten(order='F') must be used to flatten the array in the correct order to be indexed.

# create a corresponding pivoted dataframe for survival rate %
dfl3 = pd.melt(result, id_vars=[ydim, xdim],value_vars =['survival rate %'], var_name = 'Type',value_name=value_name1).drop(columns='Type')
dfl4 = dfl3.pivot(index=ydim, columns=xdim, values=value_name1)

# flatten dfl4.T in column order
dfl4_flattened = dfl4.T.to_numpy().flatten(order='F')

for i, p in enumerate(ax.patches):
    width, height = p.get_width(), p.get_height()
    x, y = p.get_xy() 
    
    # only print values when height is not 0
    if height != 0:
        
        # create the text string
        text = f'{height:.0f}%, {dfl4_flattened[i]:.0f}%'
        
        # annotate the bar segments
        ax.text(x+width/2, y+height/2, text, horizontalalignment='center', verticalalignment='center')

Notes

Here we can see dfl2.T and dfl4.T

# dfl2.T
fare_ord_grp  0 (-0.1, 7.6]  1 (7.6, 7.9]  2 (7.9, 8.0]  3 (8.0, 10.5]  4 (10.5, 14.5]  5 (14.5, 21.7]  6 (21.7, 27.0]  7 (27.0, 39.7]  8 (39.7, 78.0]  9 (78.0, 512.3]
embark_town                                                                                                                                                            
Cherbourg             17.26           NaN          2.98           1.79            8.93           11.31            2.98           14.29           14.29            26.19
Missing                 NaN           NaN           NaN            NaN             NaN             NaN             NaN             NaN             NaN           100.00
Queenstown             5.19         55.84          6.49           2.60            3.90           10.39            6.49            6.49             NaN             2.60
Southampton            9.16          6.83         14.91          11.34           10.25            9.47           12.27            9.63           10.09             6.06

# dfl4.T
fare_ord_grp  0 (-0.1, 7.6]  1 (7.6, 7.9]  2 (7.9, 8.0]  3 (8.0, 10.5]  4 (10.5, 14.5]  5 (14.5, 21.7]  6 (21.7, 27.0]  7 (27.0, 39.7]  8 (39.7, 78.0]  9 (78.0, 512.3]
embark_town                                                                                                                                                            
Cherbourg             24.14           NaN         20.00          33.33           53.33           52.63           80.00           41.67           79.17            75.00
Missing                 NaN           NaN           NaN            NaN             NaN             NaN             NaN             NaN             NaN           100.00
Queenstown              NaN         37.21        100.00            NaN           66.67           37.50           60.00             NaN             NaN            50.00
Southampton           10.17         22.73         13.54          23.29           39.39           39.34           49.37           38.71           43.08            76.92

Collectives™ on Stack Overflow

How to add additional text to matplotlib annotations

1 Answer 1

Notes

Comments

Hot Network Questions

Collectives™ on Stack Overflow

1 Answer 1

Notes

Comments

Related