```{python}
#| warning: false
panel_df = pd.read_stata(
TRANSFORM / "30_merged_panel-hh_id-period.dta",
convert_categoricals=False
)
panel_df["log_food_purchase_total_99_ae"] = np.log(panel_df["food_purchase_total_99_ae"] + 1)
# Generate arms as strings
panel_df["arm"] = panel_df["treatment"].map(TREATMENT_LABELS)
panel_df["arm_split"] = panel_df["treatment_split"].map(TREATMENT_SPLIT_LABELS)
# Generate a new arm split where treatment == 1 is preserved, treatment == 2 and draw == H becomes 6 (predictable high) and treatment == 2 and draw == L becomes 7 (predictable low)
panel_df["treatment_pred_split"] = panel_df.apply(
lambda row: 6 if (row["treatment"] == 2 and row["draw"] == "H") else (7 if (row["treatment"] == 2 and row["draw"] == "L") else (1 if row["treatment"] == 1 else (0 if row["treatment"] == 0 else None))),
axis=1
)
panel_df["arm_pred_split"] = panel_df["treatment_pred_split"].map(TREATMENT_PRED_SPLIT_LABELS)
panel_df = panel_df.copy()
panel_df = panel_df[panel_df["survey_completed"] == 1]
panel_df = panel_df.sort_values("period").reset_index()
full_panel_df = panel_df.copy()
baseline_df = panel_df[panel_df["period"] == 0].copy()
panel_df = panel_df[panel_df["period"] > 0].copy()
panel_df = panel_df.merge(
baseline_df[["hh_id"] + list(outcomes.keys())],
on="hh_id",
suffixes=("", "_bl"),
)
resid_cols = {}
for var in outcomes.keys():
X = sm.add_constant(panel_df[f"{var}_bl"])
model = sm.OLS(panel_df[var], X, missing='drop' ).fit()
resid_cols[f"{var}_resid"] = model.resid
panel_df = pd.concat([panel_df, pd.DataFrame(resid_cols)], axis=1)
phone_survey_panel_w_control = panel_df[(panel_df["period"] > 0) & (panel_df["period"] < 6)].copy()
def make_interactive_cdf(outcome: str):
fig = go.Figure()
groups = {
"Control": phone_survey_panel_w_control[phone_survey_panel_w_control["treatment"] == 0][outcome].dropna(),
"Stable": phone_survey_panel_w_control[phone_survey_panel_w_control["treatment"] == 1][outcome]. dropna(),
"Predictable": phone_survey_panel_w_control[phone_survey_panel_w_control["treatment"] == 2][outcome].dropna(),
"Predictable High": phone_survey_panel_w_control[phone_survey_panel_w_control["treatment_pred_split"] == 6][outcome].dropna(),
"Predictable Low": phone_survey_panel_w_control[phone_survey_panel_w_control["treatment_pred_split"] == 7][outcome].dropna(),
"Risky": phone_survey_panel_w_control[phone_survey_panel_w_control["treatment"] == 3][outcome].dropna(),
"Risky Balanced": phone_survey_panel_w_control[phone_survey_panel_w_control["treatment_split"] == 3][outcome].dropna(),
"Risky High": phone_survey_panel_w_control[phone_survey_panel_w_control["treatment_split"] == 4][outcome].dropna(),
"Risky Low": phone_survey_panel_w_control[phone_survey_panel_w_control["treatment_split"] == 5][outcome].dropna(),
}
for name, data in groups.items():
sorted_data = np.sort(data)
cdf = np.arange(1, len(sorted_data) + 1) / len(sorted_data)
fig.add_trace(go.Scatter(
x=sorted_data, y=cdf,
mode='lines',
line_shape='hv', # makes it a step function
name=name
))
# Make Figure have a white background
fig.update_layout(
template="plotly_white"
)
# Ensure the figures x-scale doens't change on toggling traces by bounding the scale to the min and max vlaues of the outcome
all_data = pd.concat(groups.values(), axis=1)
fig.update_xaxes(range=[all_data.min(), all_data.max()])
return fig
for outcome in outcomes.keys():
fig = make_interactive_cdf(f"{outcome}_resid")
fig_json = json.dumps(fig, cls=PlotlyJSONEncoder)
with open(OUTDIR / f"{outcome}_cdf.json", "w") as f:
f.write(fig_json)
ojs_define(outcomes=outcomes)
```