Skip to main content

Analysis by Demographics

In [15]:
%run prelude.ipy

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

In [3]:
from eyecode import classify
In [49]:
trials["program_name"] = trials.apply(lambda t: "{0} {1}".format(t["base"], t["version"]), axis=1)

for col in ["base", "version", "program_name"]:
    values = list(trials[col].unique())
    trials[col + "_num"] = trials[col].apply(lambda v: values.index(v))

# Add binary performance columns
trials["response_ge_half"] = trials.response_proportion >= 0.5
trials["keystroke_ge_one"] = trials.keystroke_coefficient >= 1
    
duration_all_med = trials.duration_ms.median()
trials["duration_ge_med"] = trials.duration_ms >= duration_all_med
In [42]:
# Add continuous experiment metrics
experiments.index = experiments.exp_id
experiments["avg_keystroke_coefficient"] = trials.groupby("exp_id").keystroke_coefficient.mean()
experiments["avg_response_proportion"] = trials.groupby("exp_id").response_proportion.mean()
In [53]:
demo_cols = ["age", "py_years", "prog_years", "degree_num", "cs_major_num"]
complexity_cols = ["code_lines", "cyclo_comp", "hal_effort", "hal_volume"]
cont_perf_cols = ["grade_value", "duration_ms", "keystroke_coefficient", "response_proportion"]
bin_perf_cols = ["grade_correct", "duration_ge_med", "keystroke_ge_one", "response_ge_half"]
exp_per_cols = ["total_grade", "duration_sec", "avg_keystroke_coefficient", "avg_response_proportion"]

# Add demographics info to trials
demo_trials = trials.merge(experiments[["exp_id"] + demo_cols ], on="exp_id")

Experiments - Demographics Only

In [44]:
plot.misc.classify_boxplots(experiments, demo_cols, exp_per_cols, regressor=True)
Out[44]:

Trials - Demographics Only

Don't forget: demographics don't change per trial!

In [18]:
plot.misc.classify_boxplots(demo_trials, demo_cols, cont_perf_cols, regressor=True)
Out[18]:
In [17]:
plot.misc.classify_boxplots(demo_trials, demo_cols, bin_perf_cols)
Out[17]:

Trials - Demographics + Complexity Metrics

Continuous

In [27]:
plot.misc.classify_boxplots(demo_trials, complexity_cols + demo_cols, cont_perf_cols, regressor=True)
Out[27]:

Binary

In [24]:
plot.misc.classify_boxplots(demo_trials, complexity_cols + demo_cols, bin_perf_cols)
Out[24]:

Trials - Best Demographics and Complexity

Continuous

In [31]:
plot.misc.classify_boxplots(demo_trials, ["prog_years", "code_lines"], cont_perf_cols, regressor=True)
Out[31]:

Binary

In [25]:
plot.misc.classify_boxplots(demo_trials, ["prog_years", "code_lines"], bin_perf_cols)
Out[25]:

Trials - Best Complexity

Continuous

In [32]:
plot.misc.classify_boxplots(demo_trials, ["code_lines"], cont_perf_cols, regressor=True)
Out[32]:

Binary

In [26]:
plot.misc.classify_boxplots(demo_trials, ["code_lines"], bin_perf_cols)
Out[26]:

Trials - Base, Version, and Demographics

Continuous

In [65]:
plot.misc.classify_boxplots(demo_trials, ["base_num", "version_num"], cont_perf_cols, regressor=True)
Out[65]:
In [66]:
plot.misc.classify_boxplots(demo_trials, ["base_num", "version_num"] + demo_cols, cont_perf_cols, regressor=True)
Out[66]:

Binary

In [54]:
plot.misc.classify_boxplots(demo_trials, ["base_num", "version_num"], bin_perf_cols)
Out[54]:
In [67]:
plot.misc.classify_boxplots(demo_trials, ["base_num", "version_num"] + demo_cols, bin_perf_cols)
Out[67]:

Trials - Base, Version, and Complexity

In [68]:
plot.misc.classify_boxplots(demo_trials, ["base_num", "version_num"] + complexity_cols, bin_perf_cols)
Out[68]: