Skip to main content

Journal Article

In [1]:
%run prelude.ipy

Binary Performance Metrics

In [7]:
trials["norm_duration_ms"] = trials.duration_ms / trials.code_lines
trials["response_ge_half"] = trials.response_proportion >= 0.5
trials["keystroke_ge_one"] = trials.keystroke_coefficient >= 1

duration_all_med = trials.duration_ms.median()
trials["duration_ge_med"] = trials.duration_ms >= duration_all_med

Complexity/Demographic/Performance Columns

In [2]:
demo_cols = ["age", "py_years", "prog_years", "degree_num", "cs_major_num"]
complexity_cols = ["code_lines", "cyclo_comp", "hal_effort", "hal_volume"]
cont_perf_cols = ["grade_value", "duration_ms", "keystroke_coefficient", "response_proportion"]
bin_perf_cols = ["grade_correct", "duration_ge_med", "keystroke_ge_one", "response_ge_half"]

By Code Metrics and Demographics

In [4]:
from eyecode import classify
In [3]:
ind_vars = [complexity_cols, demo_cols, complexity_cols + demo_cols]
ind_names = ["comp", "demo", "comp_demo"]
dep_vars = [cont_perf_cols, bin_perf_cols]
dep_names = ["cont", "bin"]
In [12]:
results = {}
for ind_vs, ind_n in zip(ind_vars, ind_names):
    for dep_vs, dep_n in zip(dep_vars, dep_names):
        regressor = dep_n == "cont"
        for dep_v in dep_vs:
            importances = classify.feature_importances(trials, ind_vs, dep_v, regressor=regressor)
            cross_val = classify.cross_validation(trials, ind_vs, dep_v, regressor=regressor, repeat=10)
            results[(ind_n, dep_v)] = (importances, cross_val)
In [72]:
def plot_performance(items, ax=None, figsize=None):
    if ax is None:
        fig = pyplot.figure(figsize=figsize)
        ax = pyplot.axes()
        
    names, means, err = [], [], []
    for name, (importances, cross_val) in items:
        names.append("{0} ({1})".format(name[1], name[0]))
        scores = classify.cross_val_performance(cross_val)
        means.append(scores.mean())
        err.append(scores.std())
    data = pandas.DataFrame({ "mean": means, "err": err, "name": names })\
        .sort("mean", ascending=False)
    ax = data.plot(x="name", y="mean", kind="bar", yerr=data.err,
                   color=kelly_colors, error_kw={"ecolor": "black"},
                   ax=ax)
    ax.set_xticklabels(data.name, rotation=60, ha="right")
    return ax

Continuous Performance Metrics

In [73]:
ax = plot_performance([(k, v) for k, v in results.iteritems() if k[1] in cont_perf_cols],
                      figsize=(10, 8))
ax.set_title("Classifier Performance (Continuous)")
ax.set_ylabel("Avg. $R^2$ (Std. Err.)")
ax.set_ylim(-0.4, 0.7)
ax.set_xlabel("")
ax.figure.tight_layout()
ax.figure
Out[73]:

Binary Performance Metrics

In [74]:
ax = plot_performance([(k, v) for k, v in results.iteritems() if k[1] in bin_perf_cols],
                      figsize=(12, 8))
ax.set_title("Classifier Performance (Binary)")
ax.set_ylabel("Avg. AUC Score (Std. Err.)")
ax.set_ylim(0.5, 1)
ax.set_xlabel("")
ax.figure.tight_layout()
ax.figure
Out[74]: