CDFs

Published

March 17, 2026

Abstract

CDFs that compare outcomes between arms

Code
```{python}
#| warning: false

panel_df = pd.read_stata(
    TRANSFORM / "30_merged_panel-hh_id-period.dta",
    convert_categoricals=False
)



panel_df["log_food_purchase_total_99_ae"] = np.log(panel_df["food_purchase_total_99_ae"] + 1)

# Generate arms as strings
panel_df["arm"]       = panel_df["treatment"].map(TREATMENT_LABELS)
panel_df["arm_split"] = panel_df["treatment_split"].map(TREATMENT_SPLIT_LABELS)

# Generate a new arm split where treatment == 1 is preserved, treatment == 2 and draw == H becomes 6 (predictable high) and treatment == 2 and draw == L becomes 7 (predictable low)
panel_df["treatment_pred_split"] = panel_df.apply(
    lambda row: 6 if (row["treatment"] == 2 and row["draw"] == "H") else (7 if (row["treatment"] == 2 and row["draw"] == "L") else (1 if row["treatment"] == 1 else (0 if row["treatment"] == 0 else None))),
    axis=1
)
panel_df["arm_pred_split"] = panel_df["treatment_pred_split"].map(TREATMENT_PRED_SPLIT_LABELS)
panel_df = panel_df.copy()

panel_df = panel_df[panel_df["survey_completed"] == 1]  
panel_df = panel_df.sort_values("period").reset_index()
full_panel_df = panel_df.copy()
baseline_df = panel_df[panel_df["period"] == 0].copy()
panel_df = panel_df[panel_df["period"] > 0].copy()
panel_df = panel_df.merge(
    baseline_df[["hh_id"] + list(outcomes.keys())],
    on="hh_id",
    suffixes=("", "_bl"),
)


resid_cols = {}
for var in outcomes.keys():
    X = sm.add_constant(panel_df[f"{var}_bl"])
    model = sm.OLS(panel_df[var], X, missing='drop' ).fit()
    resid_cols[f"{var}_resid"] = model.resid

panel_df = pd.concat([panel_df, pd.DataFrame(resid_cols)], axis=1)
    

phone_survey_panel_w_control = panel_df[(panel_df["period"] > 0) & (panel_df["period"] < 6)].copy()



def make_interactive_cdf(outcome: str):
    fig = go.Figure()
    groups = {
        "Control": phone_survey_panel_w_control[phone_survey_panel_w_control["treatment"] == 0][outcome].dropna(),
        "Stable": phone_survey_panel_w_control[phone_survey_panel_w_control["treatment"] == 1][outcome]. dropna(),
        "Predictable": phone_survey_panel_w_control[phone_survey_panel_w_control["treatment"] == 2][outcome].dropna(),
        "Predictable High": phone_survey_panel_w_control[phone_survey_panel_w_control["treatment_pred_split"] == 6][outcome].dropna(),
        "Predictable Low": phone_survey_panel_w_control[phone_survey_panel_w_control["treatment_pred_split"] == 7][outcome].dropna(),
        "Risky": phone_survey_panel_w_control[phone_survey_panel_w_control["treatment"] == 3][outcome].dropna(),
        "Risky Balanced": phone_survey_panel_w_control[phone_survey_panel_w_control["treatment_split"] == 3][outcome].dropna(),
        "Risky High": phone_survey_panel_w_control[phone_survey_panel_w_control["treatment_split"] == 4][outcome].dropna(),
        "Risky Low": phone_survey_panel_w_control[phone_survey_panel_w_control["treatment_split"] == 5][outcome].dropna(),
    }
    for name, data in groups.items():
        sorted_data = np.sort(data)
        cdf = np.arange(1, len(sorted_data) + 1) / len(sorted_data)
        fig.add_trace(go.Scatter(
            x=sorted_data, y=cdf,
            mode='lines',
            line_shape='hv',  # makes it a step function
            name=name
        ))
    # Make Figure have a white background
    fig.update_layout(
        template="plotly_white"
    )
    # Ensure the figures x-scale doens't change on toggling traces by bounding the scale to the min and max vlaues of the outcome
    all_data = pd.concat(groups.values(), axis=1)
    fig.update_xaxes(range=[all_data.min(), all_data.max()])
    return fig


for outcome in outcomes.keys():
    fig = make_interactive_cdf(f"{outcome}_resid")
    fig_json = json.dumps(fig, cls=PlotlyJSONEncoder)
    with open(OUTDIR / f"{outcome}_cdf.json", "w") as f:
        f.write(fig_json)

ojs_define(outcomes=outcomes)
```
Code
```{ojs}
//| panel: input
outcomeOptions = new Map(Object.entries(outcomes))

viewof selectedOutcome = Inputs.select(
  Array.from(outcomeOptions.keys()),
  {
    label: "Outcome",
    value: Array.from(outcomeOptions.keys())[0],
    format: d => outcomeOptions.get(d)
  }
)
```
Code
```{ojs}
//| panel: fill
Plotly = require("https://cdn.plot.ly/plotly-2.35.2.min.js")

plotlyChart = {
  const el = html`<div style="height:520px;"></div>`;
  const url = `resources/plotly-data/${selectedOutcome}_cdf.json`;
  const fig = await fetch(url).then(r => r.json());
  await Plotly.newPlot(el, fig.data, fig.layout, {responsive: true});
  return el;
}
```