Cash Flow

Published

February 17, 2026

Abstract

Quick EDA’s to explore cash flow

Baseline Cash Flow

Income Sources

Code
```{python}
#| label: fig-income-sources-pct
#| fig-cap: Average percentage of cash income from each source at baseline

inflow_df = (pd.DataFrame({'Category': [inflow_name_map.get(c, c) for c in inflow_pct_cols],
                           'Mean': [baseline[c].mean() for c in inflow_pct_cols]})
             .sort_values('Mean', ascending=True))
inflow_df['Category'] = pd.Categorical(inflow_df['Category'], categories=inflow_df['Category'], ordered=True)

(ggplot(inflow_df, aes(x='Category', y='Mean'))
 + geom_col(fill='steelblue')
 + geom_text(aes(label='Mean'), format_string='{:.1f}%', ha='left', nudge_y=0.5, size=8)
 + coord_flip()
 + labs(x='', y='Mean % of Total Cash Inflow', title='Income Sources at Baseline')
 + theme(figure_size=(7, 5), axis_text_y=element_text(angle=45, ha='right')))
```
Figure 1: Average percentage of cash income from each source at baseline
Code
```{python}
#| label: fig-income-sources-amount
#| fig-cap: Average amount of cash income from each source at baseline (% × total inflow)

bl_pos = baseline[baseline['cash_inflow'] > 0]
amount_df = (pd.DataFrame({
    'Category': [inflow_name_map.get(c, c) for c in inflow_pct_cols],
    'Amount': [(bl_pos[c] / 100 * bl_pos['cash_inflow_99']).mean() for c in inflow_pct_cols],
    'N': [(bl_pos[c] > 0).sum() for c in inflow_pct_cols]
}).sort_values('Amount', ascending=True))
amount_df['Category'] = pd.Categorical(amount_df['Category'], categories=amount_df['Category'], ordered=True)
amount_df['label'] = amount_df.apply(lambda r: f"{r['Amount']:,.0f} (n={r['N']})", axis=1)

(ggplot(amount_df, aes(x='Category', y='Amount'))
 + geom_col(fill='steelblue')
 + geom_text(aes(label='label'), ha='left', nudge_y=50, size=7)
 + coord_flip()
 + labs(x='', y='Mean Amount', title='Income Sources at Baseline: Amount')
 + theme(figure_size=(7, 5), axis_text_y=element_text(angle=45, ha='right')))
```
Figure 2: Average amount of cash income from each source at baseline (% × total inflow)

915 housholds (40%) report zero cash inflow at baseline

Code
```{python}
#| label: fig-dominant-income-source
#| fig-cap: Number of households with concentrated income (≥70% from single source)

bl_pos = baseline[baseline['cash_inflow'] > 0].copy()
bl_pos['dominant_pct'] = bl_pos[inflow_pct_cols].max(axis=1)
bl_pos['dominant_src'] = bl_pos[inflow_pct_cols].idxmax(axis=1).map(inflow_name_map)
concentrated = bl_pos[bl_pos['dominant_pct'] >= 70]
n_conc, n_tot = len(concentrated), len(bl_pos)

conc_counts = concentrated['dominant_src'].value_counts().reset_index()
conc_counts.columns = ['Source', 'N']
conc_counts = conc_counts.sort_values('N', ascending=True)
conc_counts['Source'] = pd.Categorical(conc_counts['Source'], categories=conc_counts['Source'], ordered=True)

(ggplot(conc_counts, aes(x='Source', y='N'))
 + geom_col(fill='steelblue')
 + geom_text(aes(label='N'), ha='left', nudge_y=1, size=8)
 + coord_flip()
 + labs(x='', y='Number of Households',
        title=f'Concentrated Income (≥70%): {n_conc}/ 2272 HH ({(n_conc/2272)*100:.1f}%)')
 + theme(figure_size=(7, 5), axis_text_y=element_text(angle=45, ha='right')))
```
Figure 3: Number of households with concentrated income (≥70% from single source)

Spending Categories

Code
```{python}
#| label: fig-outflow-sources-pct
#| fig-cap: Average percentage of cash outflow to each category at baseline

outflow_df = (pd.DataFrame({'Category': [outflow_name_map.get(c, c) for c in outflow_pct_cols],
                            'Mean': [baseline[c].mean() for c in outflow_pct_cols]})
              .sort_values('Mean', ascending=True))
outflow_df['Category'] = pd.Categorical(outflow_df['Category'], categories=outflow_df['Category'], ordered=True)

(ggplot(outflow_df, aes(x='Category', y='Mean'))
 + geom_col(fill='coral')
 + geom_text(aes(label='Mean'), format_string='{:.1f}%', ha='left', nudge_y=0.5, size=8)
 + coord_flip()
 + labs(x='', y='Mean % of Total Cash Outflow', title='Spending Categories at Baseline')
 + theme(figure_size=(7, 5), axis_text_y=element_text(angle=45, ha='right')))
```
Figure 4: Average percentage of cash outflow to each category at baseline
Code
```{python}
#| label: fig-outflow-sources-amount
#| fig-cap: Average amount of cash outflow to each category at baseline (% × total outflow)

bl_out_pos = baseline[baseline['cash_outflow'] > 0]
out_amount_df = (pd.DataFrame({
    'Category': [outflow_name_map.get(c, c) for c in outflow_pct_cols],
    'Amount': [(bl_out_pos[c] / 100 * bl_out_pos['cash_outflow_99']).mean() for c in outflow_pct_cols],
    'N': [(bl_out_pos[c] > 0).sum() for c in outflow_pct_cols]
}).sort_values('Amount', ascending=True))
out_amount_df['Category'] = pd.Categorical(out_amount_df['Category'], categories=out_amount_df['Category'], ordered=True)
out_amount_df['label'] = out_amount_df.apply(lambda r: f"{r['Amount']:,.0f} (n={r['N']})", axis=1)

(ggplot(out_amount_df, aes(x='Category', y='Amount'))
 + geom_col(fill='coral')
 + geom_text(aes(label='label'), ha='left', nudge_y=50, size=7)
 + coord_flip()
 + labs(x='', y='Mean Amount', title='Spending Categories at Baseline: Amount')
 + theme(figure_size=(7, 5), axis_text_y=element_text(angle=45, ha='right')))
```
Figure 5: Average amount of cash outflow to each category at baseline (% × total outflow)
Code
```{python}
#| label: fig-dominant-outflow-source
#| fig-cap: Number of households with concentrated spending (≥70% to single category)

bl_out_pos = baseline[baseline['cash_outflow'] > 0].copy()
bl_out_pos['dominant_pct'] = bl_out_pos[outflow_pct_cols].max(axis=1)
bl_out_pos['dominant_src'] = bl_out_pos[outflow_pct_cols].idxmax(axis=1).map(outflow_name_map)
concentrated_out = bl_out_pos[bl_out_pos['dominant_pct'] >= 70]
n_conc_out, n_tot_out = len(concentrated_out), len(bl_out_pos)

out_counts = concentrated_out['dominant_src'].value_counts().reset_index()
out_counts.columns = ['Source', 'N']
out_counts = out_counts.sort_values('N', ascending=True)
out_counts['Source'] = pd.Categorical(out_counts['Source'], categories=out_counts['Source'], ordered=True)

(ggplot(out_counts, aes(x='Source', y='N'))
 + geom_col(fill='coral')
 + geom_text(aes(label='N'), ha='left', nudge_y=1, size=8)
 + coord_flip()
 + labs(x='', y='Number of Households',
        title=f'Concentrated Spending (≥70%): {n_conc_out}/{n_tot_out} HH ({n_conc_out/n_tot_out*100:.1f}%)')
 + theme(figure_size=(7, 5), axis_text_y=element_text(angle=45, ha='right')))
```
Figure 6: Number of households with concentrated spending (≥70% to single category)