example_graph = nx.DiGraph()
example_graph.add_nodes_from([1,2,3,4,5,6,7])
example_graph.add_edges_from([(3,2), (4,2), (6,5), (7,6)])
matplotlib.style.use(SBSTYLE)
nx.draw_networkx(example_graph, pos={1: (0,0), 2: (0,1), 3: (1,1), 4: (1, 1.5), 5: (0, 2), 6: (1, 2), 7: (2,2)},
node_list=[1,2,3,4,5,6,7],
node_color=[1, 2, 1, 3, 2, 1,2])
limits=plt.axis("off")
example_graph2 = nx.DiGraph()
example_graph2.add_nodes_from(['Alice', 'Bob', 'Carol'])
example_graph2.add_weighted_edges_from([('Alice', 'Bob', 2), ('Bob', 'Alice', 1), ('Carol', 'Bob', 1)])
matplotlib.style.use(SBSTYLE)
nx.draw_networkx(example_graph2, node_list=['Alice', 'Bob', 'Carol'], node_color=[1,3,2], node_size=1000)
limits=plt.axis("off")
from matplotlib.ticker import FuncFormatter
PROJECTS_TO_C = ["Polymath {}".format(i) for i in range(1, 11)]
PARTICIPANTS = Series([PM_FRAME.loc[project]['authors (accumulated)'].iloc[-1] for
project in PROJECTS_TO_C], index=PROJECTS_TO_C)
R_NETWORKS = Series([PM_FRAME.loc[project]['r_network'].dropna().iloc[-1] for project in PROJECTS_TO_C],
index=PROJECTS_TO_C)
WITH_D = [project for project in PROJECTS_TO_C if not PM_FRAME.loc[project]['research'].all()]
D_NETWORKS = Series([PM_FRAME.loc[project]['d_network'].dropna().iloc[-1] for project in WITH_D],
index=WITH_D)
R_PARTICIPANTS = R_NETWORKS.apply(lambda network: set(network.author_frame.index))
D_PARTICIPANTS = D_NETWORKS.apply(lambda network: set(network.author_frame.index))
COMMENTS = Series([PM_FRAME.loc[project]['number of comments (accumulated)'].iloc[-1] for
project in PROJECTS_TO_C], index=PROJECTS_TO_C)
df = DataFrame({'all threads': PARTICIPANTS, 'research threads': R_PARTICIPANTS, 'discussion threads': D_PARTICIPANTS},
index=PROJECTS_TO_C)
df['authors only active in research threads'] = df['research threads'] - df['discussion threads']
df['authors only active in "discussion" threads'] = df['discussion threads'] - df['research threads']
df['authors active in both types of threads'] = df['all threads'] - df['authors only active in research threads'] - df['authors only active in "discussion" threads']
for project in PROJECTS_TO_C:
if pd.isnull(df.loc[project]['authors only active in research threads']):
df.loc[project]['authors only active in research threads'] = df.loc[project]['all threads']
data = df[['authors only active in research threads', 'authors only active in "discussion" threads', 'authors active in both types of threads']]
data = data.applymap(lambda set: len(set) if pd.notnull(set) else 0)
matplotlib.style.use(SBSTYLE)
axes = data.plot(kind='bar', stacked=True, color=['steelblue', 'lightsteelblue', 'lightgrey'],
title="Number of participants per thread-type in each Polymath project\n Number of comments per project")
axes.set_ylabel("Number of participants")
axes.annotate('published', xy=(0, 115), xytext=(0, 130),
arrowprops=dict(facecolor='steelblue', shrink=0.05),
)
axes.annotate('published', xy=(3, 60), xytext=(1.5, 80),
arrowprops=dict(facecolor='steelblue', shrink=0.05),
)
axes.annotate('re-used', xy=(4, 130), xytext=(4.5, 140),
arrowprops=dict(facecolor='lightsteelblue', shrink=0.05),
)
axes.annotate('published', xy=(7, 155), xytext=(7.5, 170),
arrowprops=dict(facecolor='steelblue', shrink=0.05),
)
data2 = np.sqrt(COMMENTS)
axes2 = axes.twinx()
axes2.yaxis.set_major_formatter(FuncFormatter(lambda x, pos:"{:0.0f}".format(np.square(x))))
axes2.set_ylabel("Number of comments")
axes2.plot(axes.get_xticks(), data2.values,
linestyle='-', marker='.', linewidth=.5,
color='darkgrey')
[<matplotlib.lines.Line2D at 0x1c7773f28>]
plot_community_evolution("Polymaths")
<matplotlib.figure.Figure at 0x1c730ca20>
select_n = plot_participation_evolution("Polymath", n=2)
(threshold: participation to at least two projects)
from mpl_toolkits.axes_grid1 import make_axes_locatable
authors_n = sorted([author for author, bool in select_n.items() if bool])
def general_heatmap(authors=None, binary=False, thread_level=True,
binary_method='average', method='ward', log=True,
fontsize=8):
if thread_level:
authors_filtered = list(ALL_AUTHORS)
try:
authors_filtered.remove("Anonymous")
except:
pass
data=PM_FRAME['comment_counter']
else:
authors_filtered = list(ALL_AUTHORS) if not authors else authors
try:
authors_filtered.remove("Anonymous")
except:
pass
data = get_last(POLYMATHS)[0]['comment_counter (accumulated)']
if binary:
as_matrix=np.array([[True if author in data[thread] else False for author in authors_filtered]
for thread in data.index])
Z_author = linkage(as_matrix.T, method=binary_method, metric='hamming')
Z_thread = linkage(as_matrix, method=binary_method, metric='hamming')
c, _ = cophenet(Z_author, pdist(as_matrix.T))
print("Cophenetic Correlation Coefficient with {}: {}".format(binary_method, c))
else:
as_matrix = []
for thread in data.index:
new_row = [data.loc[thread][author] for author in authors_filtered]
as_matrix.append(new_row)
as_matrix = np.array(as_matrix)
Z_author = linkage(as_matrix.T, method=method, metric='euclidean')
Z_thread = linkage(as_matrix, method=method, metric='euclidean')
c, _ = cophenet(Z_author, pdist(as_matrix.T))
print("Cophenetic Correlation Coefficient with {}: {}".format(method, c))
# start setting up plots
matplotlib.style.use(SBSTYLE)
fig, ax_heatmap = plt.subplots()
# compute and plot dendogram (top-plot)
ddata_author = dendrogram(Z_author, color_threshold=.07,
no_plot=True)
ddata_thread = dendrogram(Z_thread, color_threshold=.07, no_plot=True)
df = DataFrame(as_matrix, columns=authors_filtered)
cols = [authors_filtered[i] for i in ddata_author['leaves']]
df = df[cols]
rows = [df.index[i] for i in ddata_thread['leaves']]
df = df.reindex(rows)
# plot heatmap (bottom)
heatmap = ax_heatmap.pcolor(df,
edgecolors='w',
cmap=mpl.cm.binary if binary else mpl.cm.GnBu,
norm=mpl.colors.LogNorm() if log else None)
ax_heatmap.autoscale(tight=True) # get rid of whitespace in margins of heatmap
ax_heatmap.set_aspect('equal') # ensure heatmap cells are square
ax_heatmap.xaxis.set_ticks_position('bottom') # put column labels at the bottom
ax_heatmap.tick_params(bottom='off', top='off', left='off', right='off') # turn off ticks
ax_heatmap.set_title("Project-Engagement in Polymath")
ax_heatmap.set_yticks(np.arange(0.5, len(df.index)+.5, 1))
ax_heatmap.set_yticklabels(df.index + 1, fontsize=fontsize)
ax_heatmap.set_xticks(np.arange(len(df.columns)) + 0.5)
ax_heatmap.set_xticklabels(df.columns, rotation=90, fontsize=fontsize)
if not binary:
divider_h = make_axes_locatable(ax_heatmap)
cax = divider_h.append_axes("right", "3%", pad="1%")
plt.colorbar(heatmap, cax=cax)
lines = (ax_heatmap.xaxis.get_ticklines() +
ax_heatmap.yaxis.get_ticklines())
plt.setp(lines, visible=False)
plt.tight_layout()
general_heatmap(authors=authors_n, thread_level=False,
binary=False, log=True)
Cophenetic Correlation Coefficient with ward: 0.9424851136308227
Note: Zollman's model could be seen as private announcements/observations only.
project_heatmap("Polymath 4", cluster_threads=True, method='average', log=True, fontsize=10)
project_heatmap("Polymath 1", cluster_threads=True, method='average', log=True, fontsize=9)
draw_network("Polymath 4", graph_type="interaction", reset=True)
import io
import base64
from IPython.display import HTML
video = io.open('FIGS/out.m4v', 'r+b').read()
encoded = base64.b64encode(video)
HTML(data='''<video alt="test" controls>
<source src="data:video/mp4;base64,{0}" type="video/mp4" />
</video>'''.format(encoded.decode('ascii')))