Skip to main content

Performance Models

In [1]:
%run prelude.ipy
In [2]:
import statsmodels.formula.api as sm
from statsmodels import graphics
from statsmodels.api import families
from patsy import Treatment
from eyecode import classify
In [5]:
y = np.log(trials.duration_ms)
X = trials[["base_num", "version_num"]]
sm.GLM(y, X).fit().summary()
Out[5]:
Generalized Linear Model Regression Results
Dep. Variable: duration_ms No. Observations: 1602
Model: GLM Df Residuals: 1600
Model Family: Gaussian Df Model: 1
Link Function: identity Scale: 19.9525928208
Method: IRLS Log-Likelihood: -4669.8
Date: Wed, 06 Nov 2013 Deviance: 31924.
Time: 20:36:14 Pearson chi2: 3.19e+04
No. Iterations: 3
coef std err t P>|t| [95.0% Conf. Int.]
base_num 0.9645 0.031 31.293 0.000 0.904 1.025
version_num 0.4365 0.013 34.385 0.000 0.412 0.461
In [15]:
plot.misc.classify_boxplots(trials, ["base_num"], ["duration_ms_log"], regressor=True)
Out[15]:
In [23]:
plot.misc.classify_boxplots(trials, ["base_num"], ["duration_ms_log"], regressor=True)
Out[23]:
In [24]:
import scipy.stats
In [55]:
base = "overload"
versions = programs[programs.base == base].version.values
b_trials = util.filter_program(trials, base)
axes = plot.misc.respprop_distributions(b_trials)
axes[0].figure
Out[55]:
In [57]:
samples = []
for v in versions:
    samples.append(util.filter_program(b_trials, base, v).response_proportion.values)
In [58]:
scipy.stats.mstats.kruskalwallis(*samples)
Out[58]:
(1.4703770023136522, 0.47941508015969614)
In [31]:
experiments
Out[31]:
&ltclass 'pandas.core.frame.DataFrame'&gt
Int64Index: 162 entries, 0 to 161
Data columns (total 17 columns):
exp_id               162  non-null values
age                  162  non-null values
degree               162  non-null values
gender               162  non-null values
py_years             162  non-null values
prog_years           162  non-null values
cs_major             162  non-null values
difficulty           162  non-null values
guess_correct        162  non-null values
total_grade          162  non-null values
duration_sec         162  non-null values
location             162  non-null values
gender_num           162  non-null values
degree_num           162  non-null values
cs_major_num         162  non-null values
difficulty_num       162  non-null values
guess_correct_num    162  non-null values
dtypes: float64(3), int64(8), object(6)
In [16]:
y = experiments.total_grade
X = experiments[["age", "py_years", "prog_years"]]
sm.GLM(y, X).fit().summary()
Out[16]:
Generalized Linear Model Regression Results
Dep. Variable: total_grade No. Observations: 162
Model: GLM Df Residuals: 159
Model Family: Gaussian Df Model: 2
Link Function: identity Scale: 579.368216602
Method: IRLS Log-Likelihood: -743.67
Date: Thu, 12 Sep 2013 Deviance: 92120.
Time: 19:20:33 Pearson chi2: 9.21e+04
No. Iterations: 3
coef std err t P>|t| [95.0% Conf. Int.]
age 2.5830 0.120 21.566 0.000 2.348 2.818
py_years 4.4059 1.055 4.175 0.000 2.337 6.474
prog_years -1.1169 0.392 -2.847 0.005 -1.886 -0.348
In [17]:
trials
Out[17]:
&ltclass 'pandas.core.frame.DataFrame'&gt
Int64Index: 1602 entries, 0 to 1601
Data columns (total 25 columns):
trial_id                 1602  non-null values
exp_id                   1602  non-null values
base                     1602  non-null values
version                  1602  non-null values
grade_value              1602  non-null values
grade_category           1602  non-null values
started_ms               1602  non-null values
ended_ms                 1602  non-null values
duration_ms              1602  non-null values
keystroke_duration_ms    1602  non-null values
keystroke_count          1602  non-null values
keystroke_efficiency     1602  non-null values
response_proportion      1602  non-null values
code_chars               1602  non-null values
code_lines               1602  non-null values
cyclo_comp               1602  non-null values
hal_eff                  1602  non-null values
hal_vol                  1602  non-null values
output_chars             1602  non-null values
output_lines             1602  non-null values
true_output              1602  non-null values
pred_output              1602  non-null values
grade_perfect            1602  non-null values
grade_correct            1602  non-null values
grade_common             1602  non-null values
dtypes: bool(3), float64(4), int64(13), object(5)
In [21]:
y = trials.grade_value
X = trials[["code_lines", "cyclo_comp", "hal_eff", "hal_vol", "response_proportion", "duration_ms", "keystroke_efficiency"]]
sm.GLM(y, X).fit().summary()
Out[21]:
Generalized Linear Model Regression Results
Dep. Variable: grade_value No. Observations: 1602
Model: GLM Df Residuals: 1595
Model Family: Gaussian Df Model: 6
Link Function: identity Scale: 13.6558380381
Method: IRLS Log-Likelihood: -4363.6
Date: Thu, 12 Sep 2013 Deviance: 21781.
Time: 19:23:46 Pearson chi2: 2.18e+04
No. Iterations: 3
coef std err t P>|t| [95.0% Conf. Int.]
code_lines 0.5147 0.035 14.752 0.000 0.446 0.583
cyclo_comp 0.8050 0.097 8.336 0.000 0.616 0.994
hal_eff -0.0002 1.26e-05 -15.836 0.000 -0.000 -0.000
hal_vol 0.0021 0.002 0.855 0.393 -0.003 0.007
response_proportion 2.8366 0.314 9.023 0.000 2.220 3.453
duration_ms 3.741e-06 1.37e-06 2.733 0.006 1.06e-06 6.42e-06
keystroke_efficiency -0.1747 0.056 -3.142 0.002 -0.284 -0.066
In [41]:
y = trials.grade_value
X = trials[["duration_ms", "response_proportion"]]
sm.GLM(y, X).fit().summary()
Out[41]:
Generalized Linear Model Regression Results
Dep. Variable: grade_value No. Observations: 1602
Model: GLM Df Residuals: 1600
Model Family: Gaussian Df Model: 1
Link Function: identity Scale: 28.9801792829
Method: IRLS Log-Likelihood: -4968.8
Date: Thu, 12 Sep 2013 Deviance: 46368.
Time: 20:18:04 Pearson chi2: 4.64e+04
No. Iterations: 3
coef std err t P>|t| [95.0% Conf. Int.]
duration_ms 2.558e-05 1.52e-06 16.834 0.000 2.26e-05 2.86e-05
response_proportion 9.9774 0.359 27.796 0.000 9.274 10.681
In [23]:
y = trials.grade_correct
X = trials[["code_lines", "cyclo_comp", "hal_eff", "hal_vol", "response_proportion", "duration_ms", "keystroke_efficiency"]]
sm.Logit(y, X).fit().summary()
Optimization terminated successfully.
         Current function value: 848.329276
         Iterations 6

Out[23]:
Logit Regression Results
Dep. Variable: grade_correct No. Observations: 1602
Model: Logit Df Residuals: 1595
Method: MLE Df Model: 6
Date: Thu, 12 Sep 2013 Pseudo R-squ.: 0.07538
Time: 19:28:13 Log-Likelihood: -848.33
converged: True LL-Null: -917.49
LLR p-value: 2.259e-27
coef std err z P>|z| [95.0% Conf. Int.]
code_lines 0.0350 0.023 1.511 0.131 -0.010 0.080
cyclo_comp -0.4135 0.074 -5.552 0.000 -0.560 -0.268
hal_eff -9.632e-05 9.56e-06 -10.072 0.000 -0.000 -7.76e-05
hal_vol 0.0112 0.002 5.941 0.000 0.007 0.015
response_proportion 0.5612 0.201 2.797 0.005 0.168 0.954
duration_ms 1.905e-06 1.02e-06 1.867 0.062 -9.52e-08 3.91e-06
keystroke_efficiency -0.1560 0.034 -4.617 0.000 -0.222 -0.090
In [46]:
cols = ["code_lines", "cyclo_comp", "hal_eff", "hal_vol", "duration_ms", "response_proportion", "keystroke_efficiency"]
df = classify.feature_importances(trials, cols, "grade_correct")
fig = plot.misc.feature_importances(df).figure
fig.tight_layout()
fig
Out[46]:
In [52]:
trials[["response_proportion", "grade_correct"]].corr()
Out[52]:
response_proportion grade_correct
response_proportion 1.000000 -0.044063
grade_correct -0.044063 1.000000
In [28]:
cols = ["code_lines", "cyclo_comp", "hal_eff", "hal_vol", "duration_ms"]
classify.feature_importances(trials, cols, "grade_value", regressor=True)
Out[28]:
column importance importance_std
0 cyclo_comp 0.359428 0.273018
1 hal_vol 0.245420 0.235357
2 hal_eff 0.209919 0.219426
3 code_lines 0.124042 0.148497
4 duration_ms 0.061191 0.013429
In [36]:
cols = ["age", "py_years", "prog_years", "degree_num", "gender_num", "cs_major_num"]
df = classify.feature_importances(experiments, cols, "total_grade", regressor=True, num_estimators=1000)
plot.misc.feature_importances(df).figure
Out[36]:
In [78]:
cols = ["cyclo_comp", "hal_eff", "hal_vol", "duration_ms", "response_proportion", "keystroke_efficiency"]
df = classify.area_under_curve(trials, cols, "grade_correct")
plot.misc.area_under_curve(df).figure
Out[78]:
In [94]:
cols = ["cyclo_comp", "hal_eff", "hal_vol", "duration_ms", "response_proportion", "keystroke_efficiency"]
df = classify.area_under_curve(trials, cols, "grade_value", regressor=True)
plot.misc.area_under_curve(df).figure
Out[94]:

Grade from Program Base/Version

In []:
def to_int(x):
    return x.astype(int)
In [126]:
rows = []
g = True
for _, df in trials.groupby("exp_id"):
    rows.append({ "base": "_chance", "grade_correct": g})
    g = not g
fake_trials = pandas.concat([trials, pandas.DataFrame(rows)], ignore_index=True)
In [134]:
for b, df in trials.groupby("base"):
    print b, df.grade_correct.sum() / float(len(df))
between 0.40251572327
counting 0.590062111801
funcall 0.888888888889
initvar 0.7375
order 0.925465838509
overload 0.832298136646
partition 0.69375
rectangle 0.949685534591
scope 0.51572327044
whitespace 0.8625

In [192]:
m = sm.glm(formula="to_int(grade_correct) ~ base - 1", data=trials, family=families.Binomial())
fit = m.fit()
fit.summary()
Out[192]:
Generalized Linear Model Regression Results
Dep. Variable: to_int(grade_correct) No. Observations: 1602
Model: GLM Df Residuals: 1592
Model Family: Binomial Df Model: 9
Link Function: logit Scale: 1.0
Method: IRLS Log-Likelihood: -784.74
Date: Thu, 07 Nov 2013 Deviance: 1569.5
Time: 15:32:19 Pearson chi2: 1.60e+03
No. Iterations: 7
coef std err t P>|t| [95.0% Conf. Int.]
base[between] -0.3950 0.162 -2.443 0.015 -0.712 -0.078
base[counting] 0.3642 0.160 2.273 0.023 0.050 0.678
base[funcall] 2.0794 0.250 8.318 0.000 1.589 2.569
base[initvar] 1.0330 0.180 5.749 0.000 0.681 1.385
base[order] 2.5190 0.300 8.395 0.000 1.931 3.107
base[overload] 1.6020 0.211 7.594 0.000 1.189 2.015
base[partition] 0.8177 0.172 4.768 0.000 0.482 1.154
base[rectangle] 2.9378 0.363 8.098 0.000 2.227 3.649
base[scope] 0.0629 0.159 0.396 0.692 -0.248 0.374
base[whitespace] 1.8362 0.230 7.999 0.000 1.386 2.286
In [193]:
ax = plot.misc.fit_coefficients_base(fit)
ax.figure.tight_layout()
ax.figure
Out[193]:
In [174]:
m = sm.logit(formula="to_int(grade_correct) ~ program_name - 1", data=trials)
fit = m.fit()
fit.summary()
Optimization terminated successfully.
         Current function value: 0.473106
         Iterations 8

Out[174]:
Logit Regression Results
Dep. Variable: to_int(grade_correct) No. Observations: 1602
Model: Logit Df Residuals: 1577
Method: MLE Df Model: 24
Date: Thu, 07 Nov 2013 Pseudo R-squ.: 0.1739
Time: 15:11:16 Log-Likelihood: -757.92
converged: True LL-Null: -917.49
LLR p-value: 2.284e-53
coef std err z P>|z| [95.0% Conf. Int.]
program_name[between_functions] -0.6931 0.255 -2.714 0.007 -1.194 -0.193
program_name[between_inline] -0.1782 0.212 -0.842 0.400 -0.593 0.237
program_name[counting_nospace] 1.3581 0.264 5.139 0.000 0.840 1.876
program_name[counting_twospaces] -0.6523 0.247 -2.645 0.008 -1.136 -0.169
program_name[funcall_nospace] 2.0369 0.434 4.693 0.000 1.186 2.888
program_name[funcall_space] 1.5686 0.348 4.513 0.000 0.887 2.250
program_name[funcall_vars] 3.2189 0.721 4.464 0.000 1.806 4.632
program_name[initvar_bothbad] 1.5640 0.367 4.267 0.000 0.846 2.282
program_name[initvar_good] 0.6931 0.289 2.401 0.016 0.127 1.259
program_name[initvar_onebad] 0.9555 0.304 3.145 0.002 0.360 1.551
program_name[order_inorder] 2.3979 0.426 5.624 0.000 1.562 3.234
program_name[order_shuffled] 2.6271 0.423 6.214 0.000 1.799 3.456
program_name[overload_multmixed] 1.7148 0.362 4.736 0.000 1.005 2.424
program_name[overload_plusmixed] 1.7430 0.410 4.254 0.000 0.940 2.546
program_name[overload_strings] 1.3863 0.337 4.112 0.000 0.726 2.047
program_name[partition_balanced] 0.4055 0.289 1.405 0.160 -0.160 0.971
program_name[partition_unbalanced] 1.0745 0.310 3.471 0.001 0.468 1.681
program_name[partition_unbalanced_pivot] 0.9808 0.303 3.240 0.001 0.387 1.574
program_name[rectangle_basic] 3.8286 1.011 3.788 0.000 1.847 5.810
program_name[rectangle_class] 2.6027 0.518 5.023 0.000 1.587 3.618
program_name[rectangle_tuples] 2.8332 0.594 4.769 0.000 1.669 3.998
program_name[scope_diffname] 0.2177 0.221 0.986 0.324 -0.215 0.651
program_name[scope_samename] -0.1054 0.230 -0.459 0.647 -0.556 0.345
program_name[whitespace_linedup] 2.0513 0.354 5.793 0.000 1.357 2.745
program_name[whitespace_zigzag] 1.6546 0.303 5.466 0.000 1.061 2.248
In [187]:
ax = plot.misc.fit_coefficients_version(fit, figsize=(12, 8))
ax.figure.tight_layout()
ax.figure
Out[187]:
In [82]:
means = fit.conf_int().apply(lambda x: np.mean(x), axis=1)
err = fit.conf_int().apply(lambda x: (x[1] - x[0]) / 2.0, axis=1)
names = sorted(trials.program_name.unique())
colors = []
last_base = None
color_i = -1
for n in names:
    base = n.split("_")[0]
    if base != last_base:
        color_i += 1
        last_base = base
    colors.append(kelly_colors[color_i])
    
ax = means.plot(kind="bar", yerr=err, error_kw={ "ecolor": "black"}, color=colors, figsize=(12, 8))
ax.set_title("Logit Coefficients for Correct Grade by Base/Version")
ax.set_ylabel("Coefficient")
ax.set_xlabel("Program Base/Version")
ax.set_xticklabels(names)
fig = ax.figure
fig.tight_layout()
fig
Out[82]:

Duration from Program Base/Version

In [47]:
m = sm.ols(formula="duration_ms_log ~ base -1", data=trials)
fit = m.fit()
fit.summary()
Out[47]:
OLS Regression Results
Dep. Variable: duration_ms_log R-squared: 0.997
Model: OLS Adj. R-squared: 0.997
Method: Least Squares F-statistic: 6.035e+04
Date: Thu, 07 Nov 2013 Prob (F-statistic): 0.00
Time: 11:17:38 Log-Likelihood: -1357.4
No. Observations: 1602 AIC: 2735.
Df Residuals: 1592 BIC: 2789.
Df Model: 10
coef std err t P>|t| [95.0% Conf. Int.]
base[between] 11.9904 0.045 266.945 0.000 11.902 12.079
base[counting] 10.8884 0.045 243.931 0.000 10.801 10.976
base[funcall] 10.3847 0.044 233.368 0.000 10.297 10.472
base[initvar] 10.9474 0.045 244.490 0.000 10.860 11.035
base[order] 10.9793 0.045 245.967 0.000 10.892 11.067
base[overload] 10.3769 0.045 232.472 0.000 10.289 10.464
base[partition] 10.6605 0.045 238.083 0.000 10.573 10.748
base[rectangle] 11.2353 0.045 250.133 0.000 11.147 11.323
base[scope] 10.7979 0.045 240.395 0.000 10.710 10.886
base[whitespace] 11.5817 0.045 258.655 0.000 11.494 11.670
Omnibus: 155.980 Durbin-Watson: 1.140
Prob(Omnibus): 0.000 Jarque-Bera (JB): 218.183
Skew: 0.758 Prob(JB): 4.19e-48
Kurtosis: 3.985 Cond. No. 1.01
In [52]:
means = fit.conf_int().apply(lambda x: np.mean(x), axis=1)
err = fit.conf_int().apply(lambda x: (x[1] - x[0]) / 2.0, axis=1)
ax = means.plot(kind="bar", yerr=err, error_kw={ "ecolor": "black"}, color=kelly_colors)
ax.set_title("OLS Coefficients for Log Duration (ms) by Program")
ax.set_ylabel("Coefficient (95% CI)")
ax.set_ylim(10, 12.2)
ax.set_xlabel("Program Base")
ax.set_xticklabels(sorted(trials.base.unique()))
fig = ax.figure
fig.tight_layout()
fig
Out[52]:
In [41]:
m = sm.ols(formula="duration_ms_log ~ program_name -1", data=trials)
fit = m.fit()
fit.summary()
Out[41]:
OLS Regression Results
Dep. Variable: duration_ms_log R-squared: 0.997
Model: OLS Adj. R-squared: 0.997
Method: Least Squares F-statistic: 2.417e+04
Date: Thu, 07 Nov 2013 Prob (F-statistic): 0.00
Time: 11:07:16 Log-Likelihood: -1349.0
No. Observations: 1602 AIC: 2748.
Df Residuals: 1577 BIC: 2882.
Df Model: 25
coef std err t P>|t| [95.0% Conf. Int.]
program_name[between_functions] 11.9825 0.068 175.829 0.000 11.849 12.116
program_name[between_inline] 11.9965 0.060 201.046 0.000 11.879 12.114
program_name[counting_nospace] 10.9755 0.060 181.881 0.000 10.857 11.094
program_name[counting_twospaces] 10.7834 0.066 162.756 0.000 10.653 10.913
program_name[funcall_nospace] 10.4056 0.079 132.553 0.000 10.252 10.560
program_name[funcall_space] 10.3400 0.074 139.109 0.000 10.194 10.486
program_name[funcall_vars] 10.4137 0.079 132.656 0.000 10.260 10.568
program_name[initvar_bothbad] 10.9568 0.079 139.574 0.000 10.803 11.111
program_name[initvar_good] 10.9287 0.077 141.868 0.000 10.778 11.080
program_name[initvar_onebad] 10.9572 0.077 142.238 0.000 10.806 11.108
program_name[order_inorder] 10.8999 0.067 163.383 0.000 10.769 11.031
program_name[order_shuffled] 11.0436 0.060 184.045 0.000 10.926 11.161
program_name[overload_multmixed] 10.2579 0.074 139.188 0.000 10.113 10.402
program_name[overload_plusmixed] 10.4209 0.083 126.204 0.000 10.259 10.583
program_name[overload_strings] 10.4672 0.076 137.129 0.000 10.317 10.617
program_name[partition_balanced] 10.6149 0.080 132.593 0.000 10.458 10.772
program_name[partition_unbalanced] 10.6486 0.076 139.506 0.000 10.499 10.798
program_name[partition_unbalanced_pivot] 10.7140 0.076 140.363 0.000 10.564 10.864
program_name[rectangle_basic] 11.1206 0.083 134.678 0.000 10.959 11.283
program_name[rectangle_class] 11.2706 0.074 151.628 0.000 11.125 11.416
program_name[rectangle_tuples] 11.2971 0.077 146.651 0.000 11.146 11.448
program_name[scope_diffname] 10.7570 0.062 173.122 0.000 10.635 10.879
program_name[scope_samename] 10.8425 0.065 166.976 0.000 10.715 10.970
program_name[whitespace_linedup] 11.5761 0.064 181.758 0.000 11.451 11.701
program_name[whitespace_zigzag] 11.5872 0.063 184.222 0.000 11.464 11.711
Omnibus: 169.169 Durbin-Watson: 1.139
Prob(Omnibus): 0.000 Jarque-Bera (JB): 243.154
Skew: 0.795 Prob(JB): 1.58e-53
Kurtosis: 4.056 Cond. No. 1.38
In [45]:
means = fit.conf_int().apply(lambda x: np.mean(x), axis=1) - 10
err = fit.conf_int().apply(lambda x: (x[1] - x[0]) / 2.0, axis=1)
names = sorted(trials.program_name.unique())
colors = []
last_base = None
color_i = -1
for n in names:
    base = n.split("_")[0]
    if base != last_base:
        color_i += 1
        last_base = base
    colors.append(kelly_colors[color_i])
    
ax = means.plot(kind="bar", yerr=err, error_kw={ "ecolor": "black"}, color=colors, figsize=(12, 8))
ax.set_title("OLS Coefficients for Log Duration (ms) by Base/Version")
ax.set_ylabel("Coefficient - 10")
ax.set_xlabel("Program Base/Version")
ax.set_xticklabels(names)
fig = ax.figure
fig.tight_layout()
fig
Out[45]:
In [66]:
m = sm.ols(formula="duration_ms_log ~ code_lines", data=trials)
fit = m.fit()
fit.summary()
Out[66]:
OLS Regression Results
Dep. Variable: duration_ms_log R-squared: 0.227
Model: OLS Adj. R-squared: 0.226
Method: Least Squares F-statistic: 469.2
Date: Thu, 07 Nov 2013 Prob (F-statistic): 1.85e-91
Time: 11:34:07 Log-Likelihood: -1587.6
No. Observations: 1602 AIC: 3179.
Df Residuals: 1600 BIC: 3190.
Df Model: 1
coef std err t P>|t| [95.0% Conf. Int.]
Intercept 10.2632 0.037 277.496 0.000 10.191 10.336
code_lines 0.0637 0.003 21.662 0.000 0.058 0.069
Omnibus: 20.175 Durbin-Watson: 1.407
Prob(Omnibus): 0.000 Jarque-Bera (JB): 20.790
Skew: 0.276 Prob(JB): 3.06e-05
Kurtosis: 2.921 Cond. No. 28.7
In [67]:
graphics.regressionplots.plot_fit(fit, 1)
Out[67]:
In [208]:
t = trials[trials.response_proportion < .4]
t.grade_value.hist().figure
Out[208]:
In [207]:
t = trials[trials.response_proportion >= .4]
t.grade_value.hist().figure
Out[207]: