Python matplotlib.pylab 模块,tight_layout() 实例源码
我们从Python开源项目中,提取了以下25个代码示例,用于说明如何使用matplotlib.pylab.tight_layout()。
def plot(params_dir):
model_dirs = [name for name in os.listdir(params_dir)
if os.path.isdir(os.path.join(params_dir, name))]
df = defaultdict(list)
for model_dir in model_dirs:
df[re.sub('_bin_scaled_mono_True_ratio', '', model_dir)] = [
dd.io.load(path)['best_epoch']['validate_objective']
for path in glob.glob(os.path.join(
params_dir, model_dir) + '/*.h5')]
df = pd.DataFrame(dict([(k, pd.Series(v)) for k, v in df.iteritems()]))
df.to_csv(os.path.basename(os.path.normpath(params_dir)))
plt.figure(figsize=(16, 4), dpi=300)
g = sns.boxplot(df)
g.set_xticklabels(df.columns, rotation=45)
plt.tight_layout()
plt.savefig('{}_errors_box_plot.png'.format(
os.path.join(IMAGES_DIRECTORY,
os.path.basename(os.path.normpath(params_dir)))))
def show_bars_over_time(
task_output_path=None,
query_laps=[0, 1, 2, 5, None],
ncols=10):
'''
'''
nrows = len(query_laps)
fig_handle, ax_handles_RC = pylab.subplots(
figsize=(SMALL_FIG_SIZE[0] * ncols, SMALL_FIG_SIZE[1] * nrows),
nrows=nrows, ncols=ncols, sharex=True, sharey=True)
for row_id, lap_val in enumerate(query_laps):
cur_model, lap_val = bnpy.load_model_at_lap(task_output_path, lap_val)
cur_topics_KV = cur_model.obsModel.getTopics()
# Plot the current model
cur_ax_list = ax_handles_RC[row_id].flatten().tolist()
bnpy.viz.BarsViz.show_square_images(
cur_topics_KV,
vmin=0.0, vmax=0.06,
ax_list=cur_ax_list)
cur_ax_list[0].set_ylabel("lap: %d" % lap_val)
pylab.tight_layout()
###############################################################################
#
# Show the clusters over time
def show_bars_over_time(
task_output_path=None,
query_laps=[0, 1, 2, 5, None],
ncols=10):
'''
'''
nrows = len(query_laps)
fig_handle, ax_handles_RC = pylab.subplots(
figsize=(SMALL_FIG_SIZE[0] * ncols, SMALL_FIG_SIZE[1] * nrows),
nrows=nrows, ncols=ncols, sharex=True, sharey=True)
for row_id, lap_val in enumerate(query_laps):
cur_model, lap_val = bnpy.load_model_at_lap(task_output_path, lap_val)
cur_topics_KV = cur_model.obsModel.getTopics()
# Plot the current model
cur_ax_list = ax_handles_RC[row_id].flatten().tolist()
bnpy.viz.BarsViz.show_square_images(
cur_topics_KV,
vmin=0.0, vmax=0.06,
ax_list=cur_ax_list)
cur_ax_list[0].set_ylabel("lap: %d" % lap_val)
pylab.tight_layout()
###############################################################################
#
# Show the clusters over time
def showExampleDocs(pylab=None, nrows=3, ncols=3):
if pylab is None:
from matplotlib import pylab
Data = get_data(seed=0, nObsPerDoc=200)
PRNG = np.random.RandomState(0)
chosenDocs = PRNG.choice(Data.nDoc, nrows * ncols, replace=False)
for ii, d in enumerate(chosenDocs):
start = Data.doc_range[d]
stop = Data.doc_range[d + 1]
Xd = Data.X[start:stop]
pylab.subplot(nrows, ncols, ii + 1)
pylab.plot(Xd[:, 0], Xd[:, 1], 'k.')
pylab.axis('image')
pylab.xlim([-1.5, 1.5])
pylab.ylim([-1.5, 1.5])
pylab.xticks([])
pylab.yticks([])
pylab.tight_layout()
# Set Toy Parameters
###########################################################
def save(self, out_path):
'''Saves a figure for the monitor
Args:
out_path: str
'''
plt.clf()
np.set_printoptions(precision=4)
font = {
'size': 7
}
matplotlib.rc('font', **font)
y = 2
x = ((len(self.d) - 1) // y) + 1
fig, axes = plt.subplots(y, x)
fig.set_size_inches(20, 8)
for j, (k, v) in enumerate(self.d.iteritems()):
ax = axes[j // x, j % x]
ax.plot(v, label=k)
if k in self.d_valid.keys():
ax.plot(self.d_valid[k], label=k + '(valid)')
ax.set_title(k)
ax.legend()
plt.tight_layout()
plt.savefig(out_path, facecolor=(1, 1, 1))
plt.close()
def plot1D_mat(a, b, M, title=''):
""" Plot matrix M with the source and target 1D distribution
Creates a subplot with the source distribution a on the left and
target distribution b on the tot. The matrix M is shown in between.
Parameters
----------
a : np.array, shape (na,)
Source distribution
b : np.array, shape (nb,)
Target distribution
M : np.array, shape (na,nb)
Matrix to plot
"""
na, nb = M.shape
gs = gridspec.GridSpec(3, 3)
xa = np.arange(na)
xb = np.arange(nb)
ax1 = pl.subplot(gs[0, 1:])
pl.plot(xb, b, 'r', label='Target distribution')
pl.yticks(())
pl.title(title)
ax2 = pl.subplot(gs[1:, 0])
pl.plot(a, xa, 'b', label='Source distribution')
pl.gca().invert_xaxis()
pl.gca().invert_yaxis()
pl.xticks(())
pl.subplot(gs[1:, 1:], sharex=ax1, sharey=ax2)
pl.imshow(M, interpolation='nearest')
pl.axis('off')
pl.xlim((0, nb))
pl.tight_layout()
pl.subplots_adjust(wspace=0., hspace=0.2)
def run_frey():
# import dataset
data = pods.datasets.brendan_faces()
# Y = data['Y'][:50, :]
Y = data['Y']
Yn = Y - np.mean(Y, axis=0)
Yn /= np.std(Y, axis=0)
Y = Yn
# inference
print "inference ..."
M = 30
D = 20
lvm = vfe.SGPLVM(Y, D, M, lik='Gaussian')
lvm.optimise(method='L-BFGS-B', maxiter=10)
plt.figure()
mx, vx = lvm.get_posterior_x()
zu = lvm.sgp_layer.zu
plt.scatter(mx[:, 0], mx[:, 1])
plt.plot(zu[:, 0], zu[:, 1], 'ko')
nx = ny = 30
x_values = np.linspace(-5, 5, nx)
y_values = np.linspace(-5, 5, ny)
sx = 28
sy = 20
canvas = np.empty((sx * ny, sy * nx))
for i, yi in enumerate(x_values):
for j, xi in enumerate(y_values):
z_mu = np.array([[xi, yi]])
x_mean, x_var = lvm.predict_f(z_mu)
canvas[(nx - i - 1) * sx:(nx - i) * sx, j *
sy:(j + 1) * sy] = x_mean.reshape(sx, sy)
plt.figure(figsize=(8, 10))
Xi, Yi = np.meshgrid(x_values, y_values)
plt.imshow(canvas, origin="upper", cmap="gray")
plt.tight_layout()
plt.show()
def run_frey():
# import dataset
data = pods.datasets.brendan_faces()
# Y = data['Y'][:50, :]
Y = data['Y']
Yn = Y - np.mean(Y, axis=0)
Yn /= np.std(Y, axis=0)
Y = Yn
# inference
print "inference ..."
M = 30
D = 20
lvm = aep.SGPLVM(Y, D, M, lik='Gaussian')
# lvm.train(alpha=0.5, no_epochs=10, n_per_mb=100, lrate=0.1, fixed_params=['sn'])
lvm.optimise(method='L-BFGS-B', alpha=0.1, maxiter=10)
plt.figure()
mx, vx = lvm.get_posterior_x()
zu = lvm.sgp_layer.zu
plt.scatter(mx[:, 0], mx[:, 1])
plt.plot(zu[:, 0], zu[:, 1], 'ko')
nx = ny = 30
x_values = np.linspace(-5, 5, nx)
y_values = np.linspace(-5, 5, ny)
sx = 28
sy = 20
canvas = np.empty((sx * ny, sy * nx))
for i, yi in enumerate(x_values):
for j, xi in enumerate(y_values):
z_mu = np.array([[xi, yi]])
x_mean, x_var = lvm.predict_f(z_mu)
canvas[(nx - i - 1) * sx:(nx - i) * sx, j *
sy:(j + 1) * sy] = x_mean.reshape(sx, sy)
plt.figure(figsize=(8, 10))
Xi, Yi = np.meshgrid(x_values, y_values)
plt.imshow(canvas, origin="upper", cmap="gray")
plt.tight_layout()
plt.show()
def plot_graphs(df, trending_daily, day_from, day_to, limit, country_code, folder_out=None):
days = pd.DatetimeIndex(start=day_from, end=day_to, freq='D')
for day in days:
fig = plt.figure()
ax = fig.add_subplot(111)
plt.rc('lines', linewidth=2)
data = trending_daily.get_group(str(day.date()))
places, clusters = top_trending(data, limit)
for cluster in clusters:
places.add(max_from_cluster(cluster, data))
ax.set_prop_cycle(plt.cycler('color', ['r', 'b', 'yellow'] + [plt.cm.Accent(i) for i in np.linspace(0, 1, limit-3)]
) + plt.cycler('linestyle', ['-', '-', '-', '-', '-', '--', '--', '--', '--', '--']))
frame = export(places, clusters, data)
frame.sort_values('trending_rank', ascending=False, inplace=True)
for i in range(len(frame)):
item = frame.index[i]
lat, lon, country = item
result_items = ReverseGeoCode().get_address_attributes(lat, lon, 10, 'city', 'country_code')
if 'city' not in result_items.keys():
mark = "%s (%s)" % (manipulate_display_name(result_items['display_name']),
result_items['country_code'].upper() if 'country_code' in result_items.keys() else country)
else:
if check_eng(result_items['city']):
mark = "%s (%s)" % (result_items['city'], result_items['country_code'].upper())
else:
mark = "%.2f %.2f (%s)" % (lat, lon, result_items['country_code'].upper())
gp = df.loc[item].plot(ax=ax, x='date', y='count', label=mark)
ax.tick_params(axis='both', which='major', labelsize=10)
ax.set_yscale("log", nonposy='clip')
plt.xlabel('Date', fontsize='small', verticalalignment='baseline', horizontalalignment='right')
plt.ylabel('Total number of views (log)', fontsize='small', verticalalignment='center', horizontalalignment='center', labelpad=6)
gp.legend(loc='best', fontsize='xx-small', ncol=2)
gp.set_title('Top 10 OSM trending places on ' + str(day.date()), {'fontsize': 'large', 'verticalalignment': 'bottom'})
plt.tight_layout()
db = TrendingDb()
db.update_table_img(plt, str(day.date()), region=country_code)
plt.close()
def main(args_dict):
# Set up plot
matplotlib_configure_as_notebook()
fig, ax = plt.subplots(1, 2, facecolor='w', figsize=(9.25, 3.25))
# Estimating Z
Ms = np.arange(3, args_dict['M']+1)
ax[0].set_xlabel('number of samples $M$')
ax[0].set_ylabel('MSE of $\hat{Z}$, in units of $Z^2$')
ax[0].set_xlim((np.min(Ms), np.max(Ms)))
ax[0].set_xscale('log')
ax[0].set_yscale('log')
ax[0].grid(b=True, which='major', linestyle='dotted', lw=.5, color='black', alpha=0.5)
ax[0].plot(Ms, Z_Gumbel_MSE(Ms), linestyle='-', color=tableau20(0), label='Gumbel: MSE')
ax[0].plot(Ms, Z_Gumbel_var(Ms), linestyle='dashed', color=tableau20(0), label='Gumbel: var')
ax[0].plot(Ms, Z_Exponential_MSE(Ms), linestyle='-', color=tableau20(2), label='Exponential: MSE')
ax[0].plot(Ms, Z_Exponential_var(Ms), linestyle='dashed', color=tableau20(2), label='Exponential: var')
# Estimating ln Z
Ms = np.arange(1, args_dict['M']+1)
ax[1].set_xlabel('number of samples $M$')
ax[1].set_ylabel('MSE of $\widehat{\ln Z}$, in units of $1$')
ax[1].set_xlim((np.min(Ms), np.max(Ms)))
ax[1].set_xscale('log')
ax[1].set_yscale('log')
ax[1].grid(b=True, which='major', linestyle='dotted', lw=0.5, color='black', alpha=0.5)
ax[1].plot(Ms, lnZ_Gumbel_MSE(Ms), linestyle='-', color=tableau20(0), label='Gumbel: MSE')
ax[1].plot(Ms, lnZ_Exponential_MSE(Ms), linestyle='-', color=tableau20(2), label='Exponential: MSE')
ax[1].plot(Ms, lnZ_Exponential_var(Ms), linestyle='dashed', color=tableau20(2), label='Exponential: var')
# Finalize plot
lgd0 = ax[0].legend()
lgd1 = ax[1].legend()
plt.tight_layout()
save_plot(fig, 'figures/fig1', (lgd0, lgd1,))
def show_clusters_over_time(
task_output_path=None,
query_laps=[0, 1, 2, 5, 10, None],
nrows=2):
''' Read model snapshots from provided folder and make visualizations
Post Condition
--------------
New matplotlib plot with some nice pictures.
'''
ncols = int(np.ceil(len(query_laps) // float(nrows)))
fig_handle, ax_handle_list = pylab.subplots(
figsize=(FIG_SIZE[0] * ncols, FIG_SIZE[1] * nrows),
nrows=nrows, ncols=ncols, sharex=True, sharey=True)
for plot_id, lap_val in enumerate(query_laps):
cur_model, lap_val = bnpy.load_model_at_lap(task_output_path, lap_val)
# Plot the current model
cur_ax_handle = ax_handle_list.flatten()[plot_id]
bnpy.viz.PlotComps.plotCompsFromHModel(
cur_model, Data=dataset, ax_handle=cur_ax_handle)
cur_ax_handle.set_xticks([-2, -1, 0, 1, 2])
cur_ax_handle.set_yticks([-2, -1, 0, 1, 2])
cur_ax_handle.set_xlabel("lap: %d" % lap_val)
pylab.tight_layout()
###############################################################################
# Training from K=1 cluster
# -------------------------
#
# Using 1 initial cluster, with birth and merge proposal moves.
def show_clusters_over_time(
task_output_path=None,
query_laps=[0, 1, 2, 5, 10, None],
nrows=2):
''' Read model snapshots from provided folder and make visualizations
Post Condition
--------------
New matplotlib plot with some nice pictures.
'''
ncols = int(np.ceil(len(query_laps) // float(nrows)))
fig_handle, ax_handle_list = pylab.subplots(
figsize=(FIG_SIZE[0] * ncols, FIG_SIZE[1] * nrows),
nrows=nrows, ncols=ncols, sharex=True, sharey=True)
for plot_id, lap_val in enumerate(query_laps):
cur_model, lap_val = bnpy.load_model_at_lap(task_output_path, lap_val)
# Plot the current model
cur_ax_handle = ax_handle_list.flatten()[plot_id]
bnpy.viz.PlotComps.plotCompsFromHModel(
cur_model, Data=dataset, ax_handle=cur_ax_handle)
cur_ax_handle.set_xticks([-2, -1, 0, 1, 2])
cur_ax_handle.set_yticks([-2, -1, 0, 1, 2])
cur_ax_handle.set_xlabel("lap: %d" % lap_val)
pylab.tight_layout()
###############################################################################
#
# Show the estimated clusters over time
def show_clusters_over_time(
task_output_path=None,
query_laps=[0, 1, 2, 10, 20, None],
nrows=2):
'''
'''
ncols = int(np.ceil(len(query_laps) // float(nrows)))
fig_handle, ax_handle_list = pylab.subplots(
figsize=(FIG_SIZE[0] * ncols, FIG_SIZE[1] * nrows),
nrows=nrows, ncols=ncols, sharex=True, sharey=True)
for plot_id, lap_val in enumerate(query_laps):
cur_model, lap_val = bnpy.load_model_at_lap(task_output_path, lap_val)
cur_ax_handle = ax_handle_list.flatten()[plot_id]
bnpy.viz.PlotComps.plotCompsFromHModel(
cur_model, dataset=dataset, ax_handle=cur_ax_handle)
cur_ax_handle.set_xlim([-4.5, 4.5])
cur_ax_handle.set_xlabel("lap: %d" % lap_val)
pylab.tight_layout()
###############################################################################
#
# Run with *merge* moves only, from K=5 initial clusters
# --------------------------------------------------------
#
# Unfortunately, no pairwise merge is accepted.
# The model is stuck using 5 clusters when one cluster would do.
def show_single_sequence(seq_id):
start = dataset.doc_range[seq_id]
stop = dataset.doc_range[seq_id + 1]
for dim in xrange(12):
X_seq = dataset.X[start:stop]
pylab.plot(X_seq[:, dim], '.-')
pylab.xlabel('time')
pylab.ylabel('angle')
pylab.tight_layout()
###############################################################################
#
# Visualization of the first sequence
# -----------------------------------
def show_top_words_over_time(
task_output_path=None,
vocabList=None,
query_laps=[0, 1, 2, 5, None],
ncols=10):
'''
'''
nrows = len(query_laps)
fig_handle, ax_handles_RC = pylab.subplots(
figsize=(SMALL_FIG_SIZE[0] * ncols, SMALL_FIG_SIZE[1] * nrows),
nrows=nrows, ncols=ncols, sharex=True, sharey=True)
for row_id, lap_val in enumerate(query_laps):
cur_model, lap_val = bnpy.load_model_at_lap(task_output_path, lap_val)
# Plot the current model
cur_ax_list = ax_handles_RC[row_id].flatten().tolist()
bnpy.viz.PrintTopics.plotCompsFromHModel(
cur_model,
vocabList=vocabList,
fontsize=9,
Ktop=7,
ax_list=cur_ax_list)
cur_ax_list[0].set_ylabel("lap: %d" % lap_val)
pylab.subplots_adjust(
wspace=0.04, hspace=0.1,
left=0.01, right=0.99, top=0.99, bottom=0.1)
pylab.tight_layout()
###############################################################################
#
# Show the topics over time
def show_top_words_over_time(
task_output_path=None,
vocabList=None,
query_laps=[0, 1, 2, 5, None],
ncols=10):
'''
'''
nrows = len(query_laps)
fig_handle, ax_handles_RC = pylab.subplots(
figsize=(SMALL_FIG_SIZE[0] * ncols, SMALL_FIG_SIZE[1] * nrows),
nrows=nrows, ncols=ncols, sharex=True, sharey=True)
for row_id, lap_val in enumerate(query_laps):
cur_model, lap_val = bnpy.load_model_at_lap(task_output_path, lap_val)
# Plot the current model
cur_ax_list = ax_handles_RC[row_id].flatten().tolist()
bnpy.viz.PrintTopics.plotCompsFromHModel(
cur_model,
vocabList=vocabList,
fontsize=9,
Ktop=7,
ax_list=cur_ax_list)
cur_ax_list[0].set_ylabel("lap: %d" % lap_val)
pylab.subplots_adjust(
wspace=0.04, hspace=0.1,
left=0.01, right=0.99, top=0.99, bottom=0.1)
pylab.tight_layout()
###############################################################################
#
# Show the topics over time
def show_clusters_over_time(
task_output_path=None,
query_laps=[0, 1, 2, 10, 20, None],
nrows=2):
'''
'''
ncols = int(np.ceil(len(query_laps) // float(nrows)))
fig_handle, ax_handle_list = pylab.subplots(
figsize=(FIG_SIZE[0] * ncols, FIG_SIZE[1] * nrows),
nrows=nrows, ncols=ncols, sharex=True, sharey=True)
for plot_id, lap_val in enumerate(query_laps):
cur_model, lap_val = bnpy.load_model_at_lap(task_output_path, lap_val)
cur_ax_handle = ax_handle_list.flatten()[plot_id]
bnpy.viz.PlotComps.plotCompsFromHModel(
cur_model, dataset=dataset, ax_handle=cur_ax_handle)
cur_ax_handle.set_title("lap: %d" % lap_val)
cur_ax_handle.set_xlabel(dataset.column_names[0])
cur_ax_handle.set_ylabel(dataset.column_names[1])
cur_ax_handle.set_xlim(data_ax_h.get_xlim())
cur_ax_handle.set_ylim(data_ax_h.get_ylim())
pylab.tight_layout()
###############################################################################
#
# *DiagGauss* observation model, without moves
# --------------------------------------------
#
# Start with too many clusters (K=25)
def show_clusters_over_time(
task_output_path=None,
query_laps=[0, 1, 2, 10, 20, None],
nrows=2):
''' Show 2D elliptical contours overlaid on raw data.
'''
ncols = int(np.ceil(len(query_laps) // float(nrows)))
fig_handle, ax_handle_list = pylab.subplots(
figsize=(SMALL_FIG_SIZE[0] * ncols, SMALL_FIG_SIZE[1] * nrows),
nrows=nrows, ncols=ncols, sharex=True, sharey=True)
for plot_id, lap_val in enumerate(query_laps):
cur_model, lap_val = bnpy.load_model_at_lap(task_output_path, lap_val)
cur_ax_handle = ax_handle_list.flatten()[plot_id]
bnpy.viz.PlotComps.plotCompsFromHModel(
cur_model, dataset=dataset, ax_handle=cur_ax_handle)
cur_ax_handle.set_title("lap: %d" % lap_val)
cur_ax_handle.set_xlabel(dataset.column_names[0])
cur_ax_handle.set_ylabel(dataset.column_names[1])
cur_ax_handle.set_xlim(data_ax_h.get_xlim())
cur_ax_handle.set_ylim(data_ax_h.get_ylim())
pylab.tight_layout()
###############################################################################
#
# *DiagGauss* observation model
# -----------------------------
#
# Assume diagonal covariances.
#
# Start with too many clusters (K=20)
def show_bars_over_time(
task_output_path=None,
query_laps=[0, 1, 2, 5, None],
ncols=10):
'''
'''
nrows = len(query_laps)
fig_handle, ax_handles_RC = pylab.subplots(
figsize=(SMALL_FIG_SIZE[0] * ncols, SMALL_FIG_SIZE[1] * nrows),
nrows=nrows, ncols=ncols, sharex=True, sharey=True)
for row_id, lap_val in enumerate(query_laps):
cur_model, lap_val = bnpy.load_model_at_lap(task_output_path, lap_val)
cur_topics_KV = cur_model.obsModel.getTopics()
# Plot the current model
cur_ax_list = ax_handles_RC[row_id].flatten().tolist()
bnpy.viz.BarsViz.show_square_images(
cur_topics_KV,
vmin=0.0, vmax=0.1,
ax_list=cur_ax_list)
cur_ax_list[0].set_ylabel("lap: %d" % lap_val)
pylab.tight_layout()
###############################################################################
# From K=2 initial clusters
# -------------------------
#
# Using random initialization
def show_bars_over_time(
task_output_path=None,
query_laps=[0, 1, 2, 5, None],
ncols=10):
''' Show square-image visualization of estimated topics over time.
Post Condition
--------------
New matplotlib figure with visualization (one row per lap).
'''
nrows = len(query_laps)
fig_handle, ax_handles_RC = pylab.subplots(
figsize=(SMALL_FIG_SIZE[0] * ncols, SMALL_FIG_SIZE[1] * nrows),
nrows=nrows, ncols=ncols, sharex=True, sharey=True)
for row_id, lap_val in enumerate(query_laps):
cur_model, lap_val = bnpy.load_model_at_lap(task_output_path, lap_val)
cur_topics_KV = cur_model.obsModel.getTopics()
# Plot the current model
cur_ax_list = ax_handles_RC[row_id].flatten().tolist()
bnpy.viz.BarsViz.show_square_images(
cur_topics_KV,
vmin=0.0, vmax=0.06,
ax_list=cur_ax_list)
cur_ax_list[0].set_ylabel("lap: %d" % lap_val)
pylab.tight_layout()
###############################################################################
#
# Examine the bars over time
def show_bars_over_time(
task_output_path=None,
query_laps=[0, 1, 2, 5, None],
ncols=10):
'''
'''
nrows = len(query_laps)
fig_handle, ax_handles_RC = pylab.subplots(
figsize=(SMALL_FIG_SIZE[0] * ncols, SMALL_FIG_SIZE[1] * nrows),
nrows=nrows, ncols=ncols, sharex=True, sharey=True)
for row_id, lap_val in enumerate(query_laps):
cur_model, lap_val = bnpy.load_model_at_lap(task_output_path, lap_val)
cur_topics_KV = cur_model.obsModel.getTopics()
# Plot the current model
cur_ax_list = ax_handles_RC[row_id].flatten().tolist()
bnpy.viz.BarsViz.show_square_images(
cur_topics_KV,
vmin=0.0, vmax=0.06,
ax_list=cur_ax_list)
cur_ax_list[0].set_ylabel("lap: %d" % lap_val)
pylab.tight_layout()
###############################################################################
# Train LDA topic model
# ---------------------
#
# Using 10 clusters and the 'randexamples' initialization procedure.
def run_mnist():
np.random.seed(42)
# import dataset
f = gzip.open('./tmp/data/mnist.pkl.gz', 'rb')
(x_train, t_train), (x_valid, t_valid), (x_test, t_test) = cPickle.load(f)
f.close()
Y = x_train[:100, :]
labels = t_train[:100]
Y[Y < 0.5] = -1
Y[Y > 0.5] = 1
# inference
print "inference ..."
M = 30
D = 2
# lvm = vfe.SGPLVM(Y, D, M, lik='Gaussian')
lvm = vfe.SGPLVM(Y, D, M, lik='Probit')
# lvm.train(alpha=0.5, no_epochs=10, n_per_mb=100, lrate=0.1, fixed_params=['sn'])
lvm.optimise(method='L-BFGS-B')
plt.figure()
mx, vx = lvm.get_posterior_x()
zu = lvm.sgp_layer.zu
plt.scatter(mx[:, 0], mx[:, 1], c=labels)
plt.plot(zu[:, 0], zu[:, 1], 'ko')
nx = ny = 30
x_values = np.linspace(-5, 5, nx)
y_values = np.linspace(-5, 5, ny)
sx = 28
sy = 28
canvas = np.empty((sx * ny, sy * nx))
for i, yi in enumerate(x_values):
for j, xi in enumerate(y_values):
z_mu = np.array([[xi, yi]])
x_mean, x_var = lvm.predict_f(z_mu)
t = x_mean / np.sqrt(1 + x_var)
Z = 0.5 * (1 + special.erf(t / np.sqrt(2)))
canvas[(nx - i - 1) * sx:(nx - i) * sx, j *
sy:(j + 1) * sy] = Z.reshape(sx, sy)
plt.figure(figsize=(8, 10))
Xi, Yi = np.meshgrid(x_values, y_values)
plt.imshow(canvas, origin="upper", cmap="gray")
plt.tight_layout()
plt.show()
def run_mnist():
np.random.seed(42)
# import dataset
f = gzip.open('./tmp/data/mnist.pkl.gz', 'rb')
(x_train, t_train), (x_valid, t_valid), (x_test, t_test) = cPickle.load(f)
f.close()
Y = x_train[:100, :]
labels = t_train[:100]
Y[Y < 0.5] = -1
Y[Y > 0.5] = 1
# inference
print "inference ..."
M = 30
D = 2
# lvm = aep.SGPLVM(Y, D, M, lik='Gaussian')
lvm = aep.SGPLVM(Y, D, M, lik='Probit')
# lvm.train(alpha=0.5, no_epochs=10, n_per_mb=100, lrate=0.1, fixed_params=['sn'])
lvm.optimise(method='L-BFGS-B', alpha=0.1)
plt.figure()
mx, vx = lvm.get_posterior_x()
zu = lvm.sgp_layer.zu
plt.scatter(mx[:, 0], mx[:, 1], c=labels)
plt.plot(zu[:, 0], zu[:, 1], 'ko')
nx = ny = 30
x_values = np.linspace(-5, 5, nx)
y_values = np.linspace(-5, 5, ny)
sx = 28
sy = 28
canvas = np.empty((sx * ny, sy * nx))
for i, yi in enumerate(x_values):
for j, xi in enumerate(y_values):
z_mu = np.array([[xi, yi]])
x_mean, x_var = lvm.predict_f(z_mu)
t = x_mean / np.sqrt(1 + x_var)
Z = 0.5 * (1 + special.erf(t / np.sqrt(2)))
canvas[(nx - i - 1) * sx:(nx - i) * sx, j *
sy:(j + 1) * sy] = Z.reshape(sx, sy)
plt.figure(figsize=(8, 10))
Xi, Yi = np.meshgrid(x_values, y_values)
plt.imshow(canvas, origin="upper", cmap="gray")
plt.tight_layout()
plt.show()
def main(args_dict):
# Extract configuration from command line arguments
Ms = np.array(args_dict['Ms'])
alphas = np.linspace(args_dict['alpha_min'], args_dict['alpha_max'], args_dict['alpha_num'])
K = args_dict['K']
do_confidence = args_dict['confidence']
# Estimate MSEs by sampling
print('Estimating MSE of estimators of Z...')
MSEs_Z, MSE_stdevs_Z = estimate_MSE_vs_alpha(lambda x: x, Ms, alphas, K)
print('Estimating MSE of estimators of ln(Z)...')
MSEs_lnZ, MSE_stdevs_lnZ = estimate_MSE_vs_alpha(np.log, Ms, alphas, K)
# Set up plot
matplotlib_configure_as_notebook()
fig = plt.figure(facecolor='w', figsize=(8.25, 3.25))
gs = gridspec.GridSpec(1, 3, width_ratios=[1.0, 1.0, 0.5])
ax = [plt.subplot(gs[0]), plt.subplot(gs[2]), plt.subplot(gs[1])]
ax[0].set_xlabel('$\\alpha$')
ax[2].set_xlabel('$\\alpha$')
ax[0].set_ylabel('MSE of estimators of $Z$, in units of $Z^2$')
ax[2].set_ylabel('MSE of estimators of $\ln Z$, in units of $1$')
colors = [plt.cm.plasma(0.8 - 1.0 * i / len(Ms)) for i in xrange(len(Ms))]
# Gumbel (alpha=0) and Exponential (alpha=1) tricks can be handled analytically
legend_Gumbel = 'Gumbel trick\n($\\alpha=0$, theoretical)'
legend_Exponential = 'Exponential trick\n($\\alpha=1$, theoretical)'
ax[0].scatter(np.zeros(len(Ms)), Z_Gumbel_MSE(Ms), marker='o', color=colors, label=legend_Gumbel)
ax[0].scatter(np.ones(len(Ms)), Z_Exponential_MSE(Ms), marker='^', color=colors, label=legend_Exponential)
ax[2].scatter(np.zeros(len(Ms)), lnZ_Gumbel_MSE(Ms), marker='o', color=colors, label=legend_Gumbel)
ax[2].scatter(np.ones(len(Ms)), lnZ_Exponential_MSE(Ms), marker='^', color=colors, label=legend_Exponential)
# Remaining tricks MSE were estimated by sampling
labels = ['$M=%d$' % (M) for M in Ms]
plot_MSEs_to_axis(ax[0], alphas, MSEs_Z, MSE_stdevs_Z, do_confidence, labels, colors)
plot_MSEs_to_axis(ax[2], alphas, MSEs_lnZ, MSE_stdevs_lnZ, do_confidence, labels, colors)
# Finalize plot
ax[0].set_ylim((5*1e-3, 10))
ax[2].set_ylim((5*1e-3, 10))
handles, labels = ax[0].get_legend_handles_labels()
remove_chartjunk(ax[1])
ax[1].spines["bottom"].set_visible(False)
ax[1].tick_params(axis="both", which="both", bottom="off", top="off", labelbottom="off", left="off", right="off", labelleft="off")
ax[1].legend(handles, labels, frameon=False, loc='upper center', bbox_to_anchor=[0.44, 1.05])
plt.tight_layout()
save_plot(fig, 'figures/fig2_K%d' % (K))
def show_many_random_initial_models(
obsPriorArgsDict,
initArgsDict,
nrows=1, ncols=6):
''' Create plot of many different random initializations
'''
fig_handle, ax_handle_list = pylab.subplots(
figsize=(SMALL_FIG_SIZE[0] * ncols, SMALL_FIG_SIZE[1] * nrows),
nrows=nrows, ncols=ncols, sharex=True, sharey=True)
for trial_id in range(nrows * ncols):
cur_model = bnpy.make_initialized_model(
dataset,
allocModelName='FiniteMixtureModel',
obsModelName='Gauss',
algName='VB',
allocPriorArgsDict=dict(gamma=10.0),
obsPriorArgsDict=obsPriorArgsDict,
initArgsDict=initArgsDict,
seed=int(trial_id),
)
# Plot the current model
cur_ax_handle = ax_handle_list.flatten()[trial_id]
bnpy.viz.PlotComps.plotCompsFromHModel(
cur_model, Data=dataset, ax_handle=cur_ax_handle)
cur_ax_handle.set_xticks([-2, -1, 0, 1, 2])
cur_ax_handle.set_yticks([-2, -1, 0, 1, 2])
pylab.tight_layout()
###############################################################################
# initname: 'randexamples'
# ------------------------
# This procedure selects K examples uniformly at random.
# Each cluster is then initialized from one selected example,
# using a standard global step update.
#
# **Example 1**:
# Initialize with 8 clusters, with prior biased towards small covariances
#
# .. math::
#
# \E_{\mbox{prior}}[ \Sigma_k ] = 0.01 I_D