In [6]:
%run prelude.ipy
In [2]:
from eyecode.plot.kelly_colors import kelly_colors
trials["norm_duration_ms"] = trials.duration_ms / trials.code_lines
performance_cols = ["grade_value", "duration_ms", "norm_duration_ms", "keystroke_coefficient","response_proportion"]
demographic_cols = ["age", "py_years", "prog_years", "degree_num"]
trials = trials.merge(experiments[["exp_id"] + demographic_cols], on="exp_id")
Performance Measures
-
Grade
- A grade of 7 or higher (out of 10) is correct
- More complex programs should result in a lower grade
-
Trial duration
- Time from start to finish (reading + responding)
- More complex programs should take longer to read and respond to (higher duration)
-
Keystroke coefficient
- Number of actual keystrokes / required keystrokes
- More complex programs should require more keystrokes due to mistakes/corrections (higher coefficient)
-
Response proportion
- Time spent responding / trial duration
- More complex programs should require more reading time up front (higher proportion)
In [17]:
base = "between"
versions = programs[programs.base == base].version.values
b_trials = util.filter_program(trials, base)
axes = plot.misc.grade_distributions(b_trials)
axes[0].figure
Out[17]:
In [12]:
fun_grades, in_grades = [b_trials[b_trials.version == v].grade_value for v in versions]
stats.wilcox_test(fun_grades, in_grades)
Out[12]:
In [22]:
rows = []
for col in performance_cols + demographic_cols:
fun_values, in_values = [b_trials[b_trials.version == v][col] for v in versions]
p = stats.wilcox_test(fun_values, in_values)
rows.append([col, p, p < 0.05])
df = pandas.DataFrame(rows, columns=["Measure", "P-Value", "Significant?"])
show_dataframe(df)
Out[22]:
In [32]:
common, not_common = util.split_by_boolean(b_trials, "grade_common")
fig, axes = pyplot.subplots(2, 2)
# Py years
common.py_years.hist(ax=axes[0, 0], color=kelly_colors[0])
not_common.py_years.hist(ax=axes[0, 1], color=kelly_colors[1])
# Prog years
common.prog_years.hist(ax=axes[1, 0], color=kelly_colors[2])
not_common.prog_years.hist(ax=axes[1, 1], color=kelly_colors[3])
fig.tight_layout()
fig
Out[32]:
In [34]:
stats.wilcox_test(common.py_years, not_common.py_years)
Out[34]:
In [33]:
stats.wilcox_test(common.prog_years, not_common.prog_years)
Out[33]:
Grade Distributions (all programs)
In [14]:
for base in programs.base:
versions = programs[programs.base == base].version.values
b_trials = util.filter_program(trials, base)
width = (len(versions) + 1) * 4
axes = plot.misc.grade_distributions(b_trials, figsize=(width, 4))
fig = axes[0].figure
fig.tight_layout()
fig.savefig("../../../Journal Article/figures/{0}_grade_distributions.png".format(base))
pyplot.close("all")
Duration Distributions (all programs)
In [22]:
for base in programs.base:
versions = programs[programs.base == base].version.values
b_trials = util.filter_program(trials, base)
width = (len(versions) + 1) * 4
axes = plot.misc.duration_distributions(b_trials, figsize=(width, 4), colors=kelly_colors[5:])
fig = axes[0].figure
fig.tight_layout()
fig.savefig("../../../Journal Article/figures/{0}_duration_distributions.png".format(base))
pyplot.close("all")
Correct Grade Correlations by Base
In [111]:
ax = plot.misc.grade_correlations(trials, figsize=(10, 8))
ax.figure
Out[111]:
In [3]:
ax = plot.misc.grade_correlations(trials, figsize=(10, 8))
ax.figure
Out[3]:
In [5]:
bases = list(programs.base.unique())
rows = []
for exp_id, exp_trials in trials.groupby("exp_id"):
row = [exp_id]
for b in bases:
t = util.filter_program(exp_trials, b)
if len(t) > 0:
row.append(t.grade_correct.values[0])
else:
row.append(np.NaN)
rows.append(row)
df = pandas.DataFrame(rows, columns=["exp_id"] + bases).dropna()
In [17]:
def entropy(X):
x_values = X.unique()
h = 0.0
for x_v in x_values:
p_x = len(X[X == x_v]) / float(len(X))
h += (p_x * np.log(p_x))
return -h
In [20]:
def mutual_information(XY, x_col, y_col):
x_values = XY[x_col].unique()
y_values = XY[y_col].unique()
mi = 0.0
for x_v in x_values:
for y_v in y_values:
p_xy = len(XY[(XY[x_col] == x_v) & (XY[y_col] == y_v)]) / float(len(XY))
p_x = len(XY[XY[x_col] == x_v]) / float(len(XY))
p_y = len(XY[XY[y_col] == y_v]) / float(len(XY))
#print p_xy, p_x, x_v, p_y, y_v
mi_part = (p_xy * np.log((p_xy / (p_x * p_y))))
if not np.isnan(mi_part):
mi += mi_part
return mi
In [60]:
import itertools as it
rows = []
for b1, b2 in it.product(bases, bases):
h_b1 = entropy(df[b1])
h_b2 = entropy(df[b2])
mi = mutual_information(df, b1, b2)
r = mi / float(h_b1 + h_b2) # redundency
rows.append([b1, b2, r * 2])
x = pandas.DataFrame(rows, columns=["Base 1", "Base 2", "MI"])
x = x.pivot("Base 1", "Base 2", "MI")
x[(x < .99) & (x >= 0.015)]
Out[60]:
In [73]:
import networkx
In [92]:
g = networkx.Graph()
for idx1, row1 in x.iterrows():
for idx2, row2 in row1.iteritems():
if idx1 != idx2:
g.add_edge(idx1, idx2, weight=row2)
fig = pyplot.figure()
networkx.draw_spring(g, weight="weight.sum")
fig
Out[92]:
In [37]:
fig = pyplot.figure(figsize=(8, 8))
ax = pyplot.axes()
pyplot.pcolor(x.values)
ax.set_yticks(np.arange(len(bases)) + 0.5)
ax.set_yticklabels(bases)
ax.set_xticks(np.arange(len(bases)) + 0.5)
ax.set_xticklabels(bases, rotation=90)
pyplot.close(fig)
fig
Out[37]:
In [100]:
from scipy.cluster.vq import whiten, kmeans2
In [142]:
obs = []
bases = []
for base, ts in trials.groupby("base"):
bases.append(base)
obs.append([ts.grade_value.mean(), ts.duration_ms.mean(),
ts.keystroke_coefficient.mean(), ts.response_proportion.mean()])
w = whiten(obs)
k = 2
_, labels = kmeans2(w, k)
for i in range(k):
cls = []
for j, l in enumerate(labels):
if l == i:
cls.append(bases[j])
print cls
Category
In [146]:
values = list(programs["category"].unique())
programs["category_num"] = programs["category"].apply(lambda v: values.index(v))
programs
Out[146]:
Visualizing Performance Metrics
In [13]:
base = "whitespace"
b_trials = util.filter_program(trials, base)
xs, ys, zs = b_trials.grade_value, np.log(b_trials.duration_ms / 1000.0), b_trials.response_proportion
colors = [kelly_colors[b] for b in b_trials.base_num]
def plot_metrics(ax):
ax.set_xlabel("Grade (0-10)")
ax.set_ylabel("Duration (log sec)")
ax.set_zlabel("Response Prop.")
ax.scatter3D(xs, ys, zs, color=colors)
fig = plot.misc.plot3d_views(plot_metrics, figsize=(20, 10))
fig.tight_layout()
fig.suptitle("Performance Scatter Plot ({0})".format(base))
fig
Out[13]:
In [14]:
def pareto_frontier_multi(myArray):
# Sort on first dimension
myArray = myArray[myArray[:,0].argsort()]
# Add first row to pareto_frontier
pareto_frontier = myArray[0:1,:]
# Test next row against the last row in pareto_frontier
for row in myArray[1:,:]:
if sum([row[x] >= pareto_frontier[-1][x]
for x in range(len(row))]) == len(row):
# If it is better on all features add the row to pareto_frontier
pareto_frontier = np.concatenate((pareto_frontier, [row]))
return pareto_frontier
In [26]:
x = trials[["grade_value", "duration_ms", "keystroke_coefficient", "response_proportion"]].copy()
x["grade_value"] = 10 - x["grade_value"]
x["duration_ms"] = np.log(x["duration_ms"])
for col in x.columns:
x[col] /= x[col].max()
pareto_frontier_multi(x.values)
Out[26]:
In [24]:
x.values
Out[24]: