In [15]:
%run prelude.ipy
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
In [3]:
from eyecode import classify
In [49]:
trials["program_name"] = trials.apply(lambda t: "{0} {1}".format(t["base"], t["version"]), axis=1)
for col in ["base", "version", "program_name"]:
values = list(trials[col].unique())
trials[col + "_num"] = trials[col].apply(lambda v: values.index(v))
# Add binary performance columns
trials["response_ge_half"] = trials.response_proportion >= 0.5
trials["keystroke_ge_one"] = trials.keystroke_coefficient >= 1
duration_all_med = trials.duration_ms.median()
trials["duration_ge_med"] = trials.duration_ms >= duration_all_med
In [42]:
# Add continuous experiment metrics
experiments.index = experiments.exp_id
experiments["avg_keystroke_coefficient"] = trials.groupby("exp_id").keystroke_coefficient.mean()
experiments["avg_response_proportion"] = trials.groupby("exp_id").response_proportion.mean()
In [53]:
demo_cols = ["age", "py_years", "prog_years", "degree_num", "cs_major_num"]
complexity_cols = ["code_lines", "cyclo_comp", "hal_effort", "hal_volume"]
cont_perf_cols = ["grade_value", "duration_ms", "keystroke_coefficient", "response_proportion"]
bin_perf_cols = ["grade_correct", "duration_ge_med", "keystroke_ge_one", "response_ge_half"]
exp_per_cols = ["total_grade", "duration_sec", "avg_keystroke_coefficient", "avg_response_proportion"]
# Add demographics info to trials
demo_trials = trials.merge(experiments[["exp_id"] + demo_cols ], on="exp_id")
Experiments - Demographics Only
In [44]:
plot.misc.classify_boxplots(experiments, demo_cols, exp_per_cols, regressor=True)
Out[44]:
Trials - Demographics Only
Don't forget: demographics don't change per trial!
In [18]:
plot.misc.classify_boxplots(demo_trials, demo_cols, cont_perf_cols, regressor=True)
Out[18]:
In [17]:
plot.misc.classify_boxplots(demo_trials, demo_cols, bin_perf_cols)
Out[17]:
Trials - Demographics + Complexity Metrics
Continuous
In [27]:
plot.misc.classify_boxplots(demo_trials, complexity_cols + demo_cols, cont_perf_cols, regressor=True)
Out[27]:
Binary
In [24]:
plot.misc.classify_boxplots(demo_trials, complexity_cols + demo_cols, bin_perf_cols)
Out[24]:
Trials - Best Demographics and Complexity
Continuous
In [31]:
plot.misc.classify_boxplots(demo_trials, ["prog_years", "code_lines"], cont_perf_cols, regressor=True)
Out[31]:
Binary
In [25]:
plot.misc.classify_boxplots(demo_trials, ["prog_years", "code_lines"], bin_perf_cols)
Out[25]:
Trials - Best Complexity
Continuous
In [32]:
plot.misc.classify_boxplots(demo_trials, ["code_lines"], cont_perf_cols, regressor=True)
Out[32]:
Binary
In [26]:
plot.misc.classify_boxplots(demo_trials, ["code_lines"], bin_perf_cols)
Out[26]:
Trials - Base, Version, and Demographics
Continuous
In [65]:
plot.misc.classify_boxplots(demo_trials, ["base_num", "version_num"], cont_perf_cols, regressor=True)
Out[65]:
In [66]:
plot.misc.classify_boxplots(demo_trials, ["base_num", "version_num"] + demo_cols, cont_perf_cols, regressor=True)
Out[66]:
Binary
In [54]:
plot.misc.classify_boxplots(demo_trials, ["base_num", "version_num"], bin_perf_cols)
Out[54]:
In [67]:
plot.misc.classify_boxplots(demo_trials, ["base_num", "version_num"] + demo_cols, bin_perf_cols)
Out[67]:
Trials - Base, Version, and Complexity
In [68]:
plot.misc.classify_boxplots(demo_trials, ["base_num", "version_num"] + complexity_cols, bin_perf_cols)
Out[68]: