我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用scipy.stats.gaussian_kde()。
def kde_scipy(data, grid, **kwargs): """ Kernel Density Estimation with Scipy Parameters ---------- data : numpy.array Data points used to compute a density estimator. It has `n x p` dimensions, representing n points and p variables. grid : numpy.array Data points at which the desity will be estimated. It has `m x p` dimensions, representing m points and p variables. Returns ------- out : numpy.array Density estimate. Has `m x 1` dimensions """ kde = gaussian_kde(data.T, **kwargs) return kde.evaluate(grid.T)
def plot(samples, path = None, true_value = 5, title = 'ABC posterior'): Bayes_estimate = np.mean(samples, axis = 0) theta = true_value xmin, xmax = max(samples[:,0]), min(samples[:,0]) positions = np.linspace(xmin, xmax, samples.shape[0]) gaussian_kernel = gaussian_kde(samples[:,0].reshape(samples.shape[0],)) values = gaussian_kernel(positions) plt.figure() plt.plot(positions,gaussian_kernel(positions)) plt.plot([theta, theta],[min(values), max(values)+.1*(max(values)-min(values))]) plt.plot([Bayes_estimate, Bayes_estimate],[min(values), max(values)+.1*(max(values)-min(values))]) plt.ylim([min(values), max(values)+.1*(max(values)-min(values))]) plt.xlabel(r'$\theta$') plt.ylabel('density') #plt.xlim([0,1]) plt.rc('axes', labelsize=15) plt.legend(loc='best', frameon=False, numpoints=1) font = {'size' : 15} plt.rc('font', **font) plt.title(title) if path is not None : plt.savefig(path) return plt
def KDE_entropy(x): ''' Estimate the entropy H(X) from samples {x_i}_{i=1}^N Using Kernel Density Estimator (KDE) and resubstitution Input: x: 2D list of size N*d_x Output: one number of H(X) ''' N = len(x) d = len(x[0]) local_est = np.zeros(N) for i in range(N): kernel = sst.gaussian_kde(x.transpose()) local_est[i] = kernel.evaluate(x[i].transpose()) return -np.mean(map(log,local_est))
def kde_opt2c(df_cell_train_feats, y_train, df_cell_test_feats): def prepare_feats(df): df_new = pd.DataFrame() df_new["hour"] = df["hour"] df_new["weekday"] = df["weekday"] + df["hour"] / 24. df_new["accuracy"] = df["accuracy"].apply(lambda x: np.log10(x)) df_new["x"] = df["x"] df_new["y"] = df["y"] return df_new logging.info("train kde_opt2c model") df_cell_train_feats_kde = prepare_feats(df_cell_train_feats) df_cell_test_feats_kde = prepare_feats(df_cell_test_feats) n_class = len(np.unique(y_train)) y_test_pred = np.zeros((len(df_cell_test_feats_kde), n_class), "d") for i in range(n_class): X = df_cell_train_feats_kde[y_train == i] y_test_pred_i = np.ones(len(df_cell_test_feats_kde), "d") for feat in df_cell_train_feats_kde.columns.values: X_feat = X[feat].values kde = gaussian_kde(X_feat, "scott") y_test_pred_i *= kde.evaluate(df_cell_test_feats_kde[feat].values) y_test_pred[:, i] += y_test_pred_i return y_test_pred
def kde_opt3c(df_cell_train_feats, y_train, df_cell_test_feats): def prepare_feats(df): df_new = pd.DataFrame() df_new["hour"] = df["hour"] df_new["weekday"] = df["weekday"] + df["hour"] / 24. df_new["accuracy"] = df["accuracy"].apply(lambda x: np.log10(x)) df_new["x"] = df["x"] df_new["y"] = df["y"] return df_new logging.info("train kde_opt3c model") df_cell_train_feats_kde = prepare_feats(df_cell_train_feats) df_cell_test_feats_kde = prepare_feats(df_cell_test_feats) n_class = len(np.unique(y_train)) y_test_pred = np.zeros((len(df_cell_test_feats_kde), n_class), "d") for i in range(n_class): X = df_cell_train_feats_kde[y_train == i] y_test_pred_i = np.ones(len(df_cell_test_feats_kde), "d") for feat in df_cell_train_feats_kde.columns.values: X_feat = X[feat].values kde = gaussian_kde(X_feat, "scott") kde = gaussian_kde(X_feat, kde.factor * 0.741379) y_test_pred_i *= kde.evaluate(df_cell_test_feats_kde[feat].values) y_test_pred[:, i] += y_test_pred_i return y_test_pred
def _scipy_bivariate_kde(x, y, bw, gridsize, cut, clip): """Compute a bivariate kde using scipy.""" data = np.c_[x, y] kde = stats.gaussian_kde(data.T) data_std = data.std(axis=0, ddof=1) if isinstance(bw, string_types): bw = "scotts" if bw == "scott" else bw bw_x = getattr(kde, "%s_factor" % bw)() * data_std[0] bw_y = getattr(kde, "%s_factor" % bw)() * data_std[1] elif np.isscalar(bw): bw_x, bw_y = bw, bw else: msg = ("Cannot specify a different bandwidth for each dimension " "with the scipy backend. You should install statsmodels.") raise ValueError(msg) x_support = _kde_support(data[:, 0], bw_x, gridsize, cut, clip[0]) y_support = _kde_support(data[:, 1], bw_y, gridsize, cut, clip[1]) xx, yy = np.meshgrid(x_support, y_support) z = kde([xx.ravel(), yy.ravel()]).reshape(xx.shape) return xx, yy, z
def _plot(cls, ax, y, style=None, bw_method=None, ind=None, column_num=None, stacking_id=None, **kwds): from scipy.stats import gaussian_kde from scipy import __version__ as spv y = remove_na(y) if LooseVersion(spv) >= '0.11.0': gkde = gaussian_kde(y, bw_method=bw_method) else: gkde = gaussian_kde(y) if bw_method is not None: msg = ('bw_method was added in Scipy 0.11.0.' + ' Scipy version in use is %s.' % spv) warnings.warn(msg) y = gkde.evaluate(ind) lines = MPLPlot._plot(ax, ind, y, style=style, **kwds) return lines
def work(self, fig=None, ax=None): """Draw a one dimensional kernel density plot. You can specify either a figure or an axis to draw on. Parameters: ----------- fig: matplotlib figure object ax: matplotlib axis object to draw on Returns: -------- fig, ax: matplotlib figure and axis objects """ if ax is None: if fig is None: return fig, ax else: ax = fig.gca() from scipy.stats import gaussian_kde x = self.data[self.aes['x']] gkde = gaussian_kde(x) ind = np.linspace(x.min(), x.max(), 200) ax.plot(ind, gkde.evaluate(ind)) return fig, ax
def estimate_mode(acc): """ Estimate the mode of a set of float values between 0 and 1. :param acc: Data. :returns: The mode of the sample :rtype: float """ # Taken from sloika. if len(acc) > 1: da = gaussian_kde(acc) optimization_result = minimize_scalar(lambda x: -da(x), bounds=(0, 1), method='Bounded') if optimization_result.success: try: mode = optimization_result.x[0] except IndexError: mode = optimization_result.x else: sys.stderr.write("Mode computation failed") mode = 0 else: mode = acc[0] return mode
def density(data, nbins=500, rng=None): """ Computes density for metrics which return vectors Required parameters: data: - Dictionary of the vectors of data """ density = OrderedDict() xs = OrderedDict() for subj in data: hist = np.histogram(data[subj], nbins) hist = np.max(hist[0]) dens = gaussian_kde(data[subj]) if rng is not None: xs[subj] = np.linspace(rng[0], rng[1], nbins) else: xs[subj] = np.linspace(0, np.max(data[subj]), nbins) density[subj] = dens.evaluate(xs[subj])*np.max(data[subj]*hist) return {"xs": xs, "pdfs": density}
def __init__(self, hyperparameter, param_name, data, oob_strategy='resample', bandwith=0.4): if oob_strategy not in ['resample', 'round', 'ignore']: raise ValueError() self.oob_strategy = oob_strategy self.param_name = param_name self.hyperparameter = hyperparameter reshaped = np.reshape(data, (len(data), 1)) if self.hyperparameter.log: if isinstance(self.hyperparameter, UniformIntegerHyperparameter): # self.probabilities = {val: self.distrib.pdf(np.log2(val)) for val in range(self.hyperparameter.lower, self.hyperparameter.upper)} raise ValueError('Log Integer hyperparameter not supported: %s' %param_name) # self.distrib = gaussian_kde(np.log2(data)) # self.distrib = KernelDensity(kernel='gaussian').fit(np.log2(np.reshape(data, (len(data), 1)))) self.distrib = KernelDensity(kernel='gaussian', bandwidth=bandwith).fit(np.log2(reshaped)) else: # self.distrib = gaussian_kde(data) self.distrib = KernelDensity(kernel='gaussian', bandwidth=bandwith).fit(reshaped) pass
def test_gaussian_kde(n_samples=1000): # Compare gaussian KDE results to scipy.stats.gaussian_kde from scipy.stats import gaussian_kde np.random.seed(0) x_in = np.random.normal(0, 1, n_samples) x_out = np.linspace(-5, 5, 30) for h in [0.01, 0.1, 1]: bt = BallTree(x_in[:, None]) try: gkde = gaussian_kde(x_in, bw_method=h / np.std(x_in)) except TypeError: raise SkipTest("Old version of scipy, doesn't accept " "explicit bandwidth.") dens_bt = bt.kernel_density(x_out[:, None], h) / n_samples dens_gkde = gkde.evaluate(x_out) assert_array_almost_equal(dens_bt, dens_gkde, decimal=3)
def test_gaussian_kde(n_samples=1000): # Compare gaussian KDE results to scipy.stats.gaussian_kde from scipy.stats import gaussian_kde np.random.seed(0) x_in = np.random.normal(0, 1, n_samples) x_out = np.linspace(-5, 5, 30) for h in [0.01, 0.1, 1]: kdt = KDTree(x_in[:, None]) try: gkde = gaussian_kde(x_in, bw_method=h / np.std(x_in)) except TypeError: raise SkipTest("Old scipy, does not accept explicit bandwidth.") dens_kdt = kdt.kernel_density(x_out[:, None], h) / n_samples dens_gkde = gkde.evaluate(x_out) assert_array_almost_equal(dens_kdt, dens_gkde, decimal=3)
def kl_divergence(p_samples, q_samples): # estimate densities # p_samples = np.nan_to_num(p_samples) # q_samples = np.nan_to_num(q_samples) if isinstance(p_samples, tuple): idx, p_samples = p_samples if idx not in _cached_p_pdf: _cached_p_pdf[idx] = sc.gaussian_kde(p_samples) p_pdf = _cached_p_pdf[idx] else: p_pdf = sc.gaussian_kde(p_samples) q_pdf = sc.gaussian_kde(q_samples) # joint support left = min(min(p_samples), min(q_samples)) right = max(max(p_samples), max(q_samples)) p_samples_num = p_samples.shape[0] q_samples_num = q_samples.shape[0] # quantise lin = np.linspace(left, right, min(max(p_samples_num, q_samples_num), MAX_GRID_POINTS)) p = p_pdf.pdf(lin) q = q_pdf.pdf(lin) # KL kl = min(sc.entropy(p, q), MAX_KL) return kl
def ci_metrics(p_samples, q_samples, alpha=.95): if isinstance(p_samples, tuple): idx, p_samples = p_samples # compute CIs p_left, p_right = _compute_ci(p_samples, alpha) q_left, q_right = _compute_ci(q_samples, alpha) precision = 0 iou = 0 recall = 0 f1 = 0 if (p_right > q_left) and (q_right > p_left): # intersection int_left = max(q_left, p_left) int_right = min(p_right, q_right) union_left = min(p_left, q_left) union_right = max(p_right, q_right) iou = (int_right - int_left) / (union_right - union_left) precision = (int_right - int_left) / (q_right - q_left) recall = (int_right - int_left) / (p_right - p_left) f1 = 2 * precision * recall / (precision + recall) # estimate densities # p_pdf = sc.gaussian_kde(p_samples) # q_pdf = sc.gaussian_kde(q_samples) # # precision = q_pdf.integrate_box(int_left, int_right) / alpha # recall = p_pdf.integrate_box(int_left, int_right) / alpha return iou, precision, recall, f1
def visualize_distribution(data): """ Visualize bucket distribution, a distribution over: (sentence_number, question_length, sentence_length)""" bucket_distribution = [] for qid, value in data.iteritems(): q_length = len(value['question']) s_number = len(value['sentences']) s_length = max([len(sent) for sent in value['sentences'].itervalues()]) bucket_distribution.append([s_number, q_length, s_length]) # pdb.set_trace() distribution = np.transpose(np.array(bucket_distribution)) kde = stats.gaussian_kde(distribution) density = kde(distribution) idx = density.argsort() x = distribution[0, idx] y = distribution[1, idx] z = distribution[2, idx] density = density[idx] fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.scatter(x, y, z, c=density) ax.set_xlabel('sentence number') ax.set_ylabel('question length') ax.set_zlabel('sentence length') plt.show()
def fit(self, y): self.lb = y.min() - 1e6 self.ub = y.max() + 1e6 self.kde = gaussian_kde(y)
def bayes_factor(x, y, distribution='normal', num_iters=25000, inference='sampling'): """ Args: x (array_like): sample of a treatment group y (array_like): sample of a control group distribution: name of the KPI distribution model, which assumes a Stan model file with the same name exists num_iters: number of iterations of bayes sampling inference: sampling or variational inference method for approximation the posterior Returns: dictionary with statistics """ traces, n_x, n_y, mu_x, mu_y = _bayes_sampling(x, y, distribution=distribution, num_iters=num_iters, inference=inference) trace_normalized_effect_size = get_trace_normalized_effect_size(distribution, traces) trace_absolute_effect_size = traces['delta'] kde = gaussian_kde(trace_normalized_effect_size) prior = cauchy.pdf(0, loc=0, scale=1) # BF_01 bf = kde.evaluate(0)[0] / prior stop = bf > 3 or bf < 1 / 3. credibleMass = 0.95 # another magic number leftOut = 1.0 - credibleMass p1 = round(leftOut/2.0, 5) p2 = round(1.0 - leftOut/2.0, 5) credible_interval = HDI_from_MCMC(trace_absolute_effect_size, credibleMass) return {'stop' : bool(stop), 'delta' : float(mu_x - mu_y), 'confidence_interval' : [{'percentile': p*100, 'value': v} for p, v in zip([p1, p2], credible_interval)], 'treatment_sample_size' : int(n_x), 'control_sample_size' : int(n_y), 'treatment_mean' : float(mu_x), 'control_mean' : float(mu_y), 'number_of_iterations' : num_iters}
def __plot_scatter(prl, max_points=None, fileout="PR_scatter.png", title="Precision Recall Scatter Plot"): """ :param prl: list of tuples (precision, recall) :param max_points: max number of tuples to plot :param fileout: output filename :param title: plot title """ prs = [i[0] for i in prl] recs = [i[1] for i in prl] if max_points is not None: prs = prs[:max_points] recs = recs[:max_points] xy = np.vstack([prs, recs]) z = gaussian_kde(xy)(xy) x = np.array(prs) y = np.array(recs) base = min(z) rg = max(z) - base z = np.array(z) idx = z.argsort() x, y, z = x[idx], y[idx], (z[idx] - base) / rg fig, ax = plt.subplots() sca = ax.scatter(x, y, c=z, s=50, edgecolor='', cmap=plt.cm.jet) fig.colorbar(sca) plt.ylabel("Recall", fontsize=20, labelpad=15) plt.xlabel("Precision", fontsize=20) plt.ylim([-0.01, 1.01]) plt.xlim([-0.01, 1.01]) plt.title(title) if matplotlib.get_backend().lower() in ['agg', 'macosx']: fig.set_tight_layout(True) else: fig.tight_layout() plt.savefig("%s" % fileout)
def setHistogram(self, values=None, bins=None, use_kde=False, histogram=None): """ Set background histogram (or density estimation, violin plot) The histogram of bins is calculated from values, optionally as a Gaussian KDE. If histogram is provided, its values are used directly and other parameters are ignored. """ if (values is None or not len(values)) and histogram is None: self.setPixmap(None) return if histogram is not None: self._histogram = hist = histogram else: if bins is None: bins = min(100, max(10, len(values) // 20)) if use_kde: hist = gaussian_kde(values, None if isinstance(use_kde, bool) else use_kde)( np.linspace(np.min(values), np.max(values), bins)) else: hist = np.histogram(values, bins)[0] self._histogram = hist = hist / hist.max() HEIGHT = self.rect().height() / 2 OFFSET = HEIGHT * .3 pixmap = QPixmap(QSize(len(hist), 2 * (HEIGHT + OFFSET))) # +1 avoids right/bottom frame border shadow pixmap.fill(Qt.transparent) painter = QPainter(pixmap) painter.setPen(QPen(Qt.darkGray)) for x, value in enumerate(hist): painter.drawLine(x, HEIGHT * (1 - value) + OFFSET, x, HEIGHT * (1 + value) + OFFSET) if self.orientation() != Qt.Horizontal: pixmap = pixmap.transformed(QTransform().rotate(-90)) self.setPixmap(pixmap)
def make_kde(self): """ Makes the kernel density estimation(s) for create_distplot(). This is called when curve_type = 'kde' in create_distplot(). :rtype (list) curve: list of kde representations """ curve = [None] * self.trace_number for index in range(self.trace_number): self.curve_x[index] = [self.start[index] + x * (self.end[index] - self.start[index]) / 500 for x in range(500)] self.curve_y[index] = (scipy.stats.gaussian_kde (self.hist_data[index]) (self.curve_x[index])) if self.histnorm == ALTERNATIVE_HISTNORM: self.curve_y[index] *= self.bin_size[index] for index in range(self.trace_number): curve[index] = dict(type='scatter', x=self.curve_x[index], y=self.curve_y[index], xaxis='x1', yaxis='y1', mode='lines', name=self.group_labels[index], legendgroup=self.group_labels[index], showlegend=False if self.show_hist else True, marker=dict(color=self.colors[index])) return curve
def getDensityKernel(x,y): # filter NaNs and infinite numbers idx = (np.isfinite(x) * np.isfinite(y)) x = x[idx] y = y[idx] # Calculate the point density xy = np.vstack([x,y]) z = gaussian_kde(xy)(xy) idx = z.argsort() return x[idx], y[idx], z[idx] # RADIAL PROFILES
def maximize(samples): ''' Use Kernel Density Estimation to find a continuous PDF from the discrete sampling. Maximize that distribution. Parameters ---------- samples : list (shape = (nsamples, ndim)) Observations drawn from the distribution which is going to be fit. Returns ------- maximum : list(ndim) Maxima of the probability distributions along each axis. ''' from scipy.optimize import minimize from scipy.stats import gaussian_kde as kde # Give the samples array the proper shape. samples = np.transpose(samples) # Estimate the continous PDF. estimate = kde(samples) # Take the minimum of the estimate. def PDF(x): return -estimate(x) # Initial guess on maximum is made from 50th percentile of the sample. p0 = [np.percentile(samples[i], 50) for i in range(samples.shape[0])] # Calculate the maximum of the distribution. maximum = minimize(PDF, p0).x return maximum
def kde_opt4(df_cell_train_feats, y_train, df_cell_test_feats): def prepare_feats(df): df_new = pd.DataFrame() df_new["hour"] = df["hour"] df_new["weekday"] = df["weekday"] + df["hour"] / 24. df_new["accuracy"] = df["accuracy"].apply(lambda x: np.log10(x)) df_new["x"] = df["x"] df_new["y"] = df["y"] return df_new logging.info("train kde_opt4 model") df_cell_train_feats_kde = prepare_feats(df_cell_train_feats) df_cell_test_feats_kde = prepare_feats(df_cell_test_feats) n_class = len(np.unique(y_train)) y_test_pred = np.zeros((len(df_cell_test_feats_kde), n_class), "d") for i in range(n_class): X = df_cell_train_feats_kde[y_train == i] y_test_pred_i = np.ones(len(df_cell_test_feats_kde), "d") for feat in df_cell_train_feats_kde.columns.values: X_feat = X[feat].values BGK10_output = kdeBGK10(X_feat) if BGK10_output is None: kde = gaussian_kde(X_feat, "scott") kde = gaussian_kde(X_feat, kde.factor * 0.741379) y_test_pred_i *= kde.evaluate(df_cell_test_feats_kde[feat].values) else: bandwidth, mesh, density = BGK10_output kde = KernelDensity(kernel='gaussian', metric='manhattan', bandwidth=bandwidth) kde.fit(X_feat[:, np.newaxis]) y_test_pred_i *= np.exp(kde.score_samples(df_cell_test_feats_kde[feat].values[:, np.newaxis])) y_test_pred[:, i] += y_test_pred_i return y_test_pred
def weighted_kde(Bn, conf): # N.B. apply WKDE function to the instance multiplied by its confindence value weighted_ins = np.vstack([conf[i] * B.data() for i, B in enumerate(Bn)]) return stats.gaussian_kde(weighted_ins.T)
def _scipy_univariate_kde(data, bw, gridsize, cut, clip): """Compute a univariate kernel density estimate using scipy.""" kde = stats.gaussian_kde(data, bw_method=bw) if isinstance(bw, string_types): bw = "scotts" if bw == "scott" else bw bw = getattr(kde, "%s_factor" % bw)() * np.std(data) grid = _kde_support(data, bw, gridsize, cut, clip) y = kde(grid) return grid, y
def plot_distributions(data, cmap = plt.cm.Spectral_r, percentiles = [5, 25, 50, 75, 95], percentiles_colors = ['gray', 'gray', 'red', 'gray', 'gray']): """Plots the data point color as local density""" npoints, ntimes = data.shape; for t in range(ntimes): cols = gaussian_kde(data[:,t])(data[:,t]); idx = cols.argsort(); plt.scatter(np.ones(npoints)*t, data[idx,t], c=cols[idx], s=30, edgecolor='face', cmap = cmap) pct = np.percentile(data, percentiles, axis = 0); for i in range(len(percentiles)): #plt.plot(iqr[s0][i,:], c = plt.cm.Spectral(i/5.0), linewidth = 3); plt.plot(pct[i,:], c = percentiles_colors[i], linewidth = 2);
def plot_embedding_contours(Y, contours = 10, cmap = plt.cm.plasma, xmin = None, xmax = None, ymin = None, ymax = None, npts = 100, density = False): """Plot a 2d density map of the embedding Y""" if xmin is None: xmin = np.min(Y[:,0]); if xmax is None: xmax = np.max(Y[:,0]); if ymin is None: ymin = np.min(Y[:,1]); if ymax is None: ymax = np.max(Y[:,1]); #print xmin,xmax,ymin,ymax dx = float(xmax-xmin) / npts; dy = float(xmax-xmin) / npts; xx, yy = np.mgrid[xmin:xmax:dx, ymin:ymax:dy] positions = np.vstack([xx.ravel(), yy.ravel()]) kernel = st.gaussian_kde(Y.T); #print xx.shape #print positions.shape #print Y.shape #print kernel(positions).shape f = kernel(positions) f = np.reshape(f, xx.shape) #print f.shape ax = plt.gca() ax.set_xlim(xmin, xmax) ax.set_ylim(ymin, ymax) # Contourf plot if density: cfset = ax.contourf(xx, yy, f, cmap=cmap) ## Or kernel density estimate plot instead of the contourf plot #ax.imshow(np.rot90(f), cmap='Blues', extent=[xmin, xmax, ymin, ymax]) # Contour plot if contours is not None: cset = ax.contour(xx, yy, f, contours, cmap=cmap) # Label plot #ax.clabel(cset, inline=1, fontsize=10) ax.set_xlabel('Y0') ax.set_ylabel('Y1')
def violin_plot(figure,X,data,colors,xlabel="",ylabel="",n_points=400,violin_width=None,linewidth=3,marker_size=20): font = fm.FontProperties(family = 'Trebuchet', weight ='light') #font = fm.FontProperties(family = 'CenturyGothic',fname = '/Library/Fonts/Microsoft/Century Gothic', weight ='light') figure.patch.set_facecolor('white') axes = figure.add_subplot(111) if violin_width is None: if len(X)>1: violin_width = ((np.array(X)[1:] - np.array(X)[:-1]).mean())/3. else: violin_width = 0.33 for x in xrange(len(X)): color = colors[x] Y = gaussian_kde(data[x]) D_smooth = np.linspace(np.percentile(Y.dataset,1),np.percentile(Y.dataset,99),n_points) Y_smooth = Y.evaluate(D_smooth) Y_smooth = violin_width*Y_smooth/Y_smooth.max() axes.fill_betweenx(D_smooth,X[x],X[x]+Y_smooth,facecolor=color,alpha=0.1) axes.fill_betweenx(D_smooth,X[x],X[x]-Y_smooth,facecolor=color,alpha=0.1) axes.plot(X[x]+Y_smooth,D_smooth,color=color,linewidth=linewidth,alpha=0.8) axes.plot(X[x]-Y_smooth,D_smooth,color=color,linewidth=linewidth,alpha=0.8) axes.plot([X[x]-Y_smooth[0],X[x]+Y_smooth[0]],[D_smooth[0],D_smooth[0]],color=color,linewidth=linewidth,alpha=0.8) axes.plot([X[x]-Y_smooth[-1],X[x]+Y_smooth[-1]],[D_smooth[-1],D_smooth[-1]],color=color,linewidth=linewidth,alpha=0.8) axes.plot(X[x]-Y_smooth,D_smooth,color=color,linewidth=linewidth,alpha=0.8) axes.plot(X[x],np.percentile(data[x],50),'o',markersize=marker_size,markeredgewidth=linewidth,color=color) axes.plot([X[x],X[x]],[np.percentile(data[x],25),np.percentile(data[x],75)],color=color,linewidth=2*linewidth,alpha=0.5) axes.set_xlim(min(X)-1,max(X)+1) axes.set_xlabel(xlabel,fontproperties=font, size=10, style='italic') axes.set_xticklabels(axes.get_xticks(),fontproperties=font, size=12) axes.set_ylabel(ylabel, fontproperties=font, size=10, style='italic') axes.set_yticklabels(axes.get_yticks(),fontproperties=font, size=12)
def work(self, fig=None, ax=None): """Draw a two dimensional kernel density plot. You can specify either a figure or an axis to draw on. Parameters: ----------- fig: matplotlib figure object ax: matplotlib axis object to draw on Returns: -------- fig, ax: matplotlib figure and axis objects """ if ax is None: if fig is None: return fig, ax else: ax = fig.gca() x = self.data[self.aes['x']] y = self.data[self.aes['y']] # TODO: unused? # rvs = np.array([x, y]) x_min = x.min() x_max = x.max() y_min = y.min() y_max = y.max() X, Y = np.mgrid[x_min:x_max:200j, y_min:y_max:200j] positions = np.vstack([X.ravel(), Y.ravel()]) values = np.vstack([x, y]) import scipy.stats as stats kernel = stats.gaussian_kde(values) Z = np.reshape(kernel(positions).T, X.shape) ax.contour(Z, extent=[x_min, x_max, y_min, y_max]) return fig, ax
def plot_pdf(x, cov_factor=None, *args, **kwargs): import matplotlib.pyplot as plt from scipy.stats import gaussian_kde density = gaussian_kde(x) xgrid = np.linspace(min(x), max(x), 200) if cov_factor is not None: density.covariance_factor = lambda: cov_factor density._compute_covariance() y = density(xgrid) plt.plot(xgrid, y, *args, **kwargs)
def model_accuracy(results, scatter_lims, error_density_lims, output_file): f, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 10)) ax1.scatter(results['actuals'], results['estimates']) max_point = results[['actuals', 'estimates']].max().max() ax1.plot([0, max_point], [0, max_point]) ax1.set_xlim(scatter_lims) ax1.set_ylim(scatter_lims) density = gaussian_kde(results['error'][results['estimates']>0]) x = np.linspace(error_density_lims[0], error_density_lims[1], 10000) ax2.plot(x, density(x)) plt.savefig(output_file)
def __init__(self, sample, label=None): """Estimates the density function based on a sample. sample: sequence of data label: string """ self.label = label if label is not None else '_nolegend_' self.kde = stats.gaussian_kde(sample) low = min(sample) high = max(sample) self.linspace = np.linspace(low, high, 101)
def plot_rugdensity(series, name=None, ylab=None, xlab=None): if len(series) > 1: dens = gaussian_kde(series) x = np.linspace(np.min(series), np.max(series), 100) y = dens.evaluate(x)*np.max(series) d_rug = Scatter( x=series, y=[0]*len(series), mode='markers', marker=Marker( color='rgba(0,0,0,0.9)', symbol='line-ns-open', size=10, opacity=0.5 ), name=name ) else: x = 0 y = series d_dens = Scatter( x=x, y=y, line=Line( color='rgba(0,0,0,0.9)' ), hoverinfo='x', name=name, ) if len(series) > 1: data = [d_dens, d_rug] else: data = [d_dens] layout = std_layout(name, ylab, xlab) fig = Figure(data=data, layout=layout) return fig
def featureMapDensity(lang='py', normed=True): freq = getValue(lang, type='freq') raw = np.array([]) for i in xrange(len(freq)): raw = np.append(raw, [i]*freq[i]) print "[SUCCESS] Calculated raw for", lang gaussKDE = gaussian_kde(raw, bw_method=0.5) ind = np.linspace(1, 115, 200) plt.plot(ind, gaussKDE(ind), label="%s"%lang)
def __init__(self, param_priors, param_hyperparameters, n_iter): self.hyperparameters = collections.OrderedDict(sorted(param_hyperparameters.items())) self.n_iter = n_iter self.param_index = [] self.distributions = {} kde_data = None for name, hyperparameter in param_hyperparameters.items(): if isinstance(hyperparameter, UniformFloatHyperparameter): self.param_index.append(name) data = np.array(param_priors[name]) if hyperparameter.log: data = np.log2(data) if kde_data is None: kde_data = np.reshape(np.array(data), (1, len(data))) else: reshaped = np.reshape(np.array(data), (1, len(data))) kde_data = np.concatenate((kde_data, reshaped), axis=0) elif isinstance(hyperparameter, UniformIntegerHyperparameter): raise ValueError('UniformIntegerHyperparameter not yet implemented:', name) elif isinstance(hyperparameter, CategoricalHyperparameter): self.distributions[name] = openmlpimp.utils.rv_discrete_wrapper(name, param_priors[name]) else: raise ValueError() if len(self.param_index) < 2: raise ValueError('Need at least 2 float hyperparameters') self.kde = gaussian_kde(kde_data)
def weighted_kde(self, data, weights, xs): data_wt = np.array([data[i] for i in range(len(data)) for w in range(weights[i])]) density = gaussian_kde(data_wt) ys = density(xs) return ys
def __init__(self, pe): self.MINPE = pe.MINPE self.x_grid = np.arange(SPAN) # Build KDE from global_lens kde = gaussian_kde(pe.global_lens) pdf = kde.evaluate(self.x_grid) #pdf[pdf < SMALL_VALUE] = SMALL_VALUE self.pdf = pdf / pdf.sum() #self.cdf = np.cumsum(self.pdf) self.target_lens = pe.target_lens self.ref = pe.ref self.db = {}
def kdepeak(x, x_grid=None): ''' Parameters ---------- Returns ------- ''' if x_grid==None: x_grid = np.linspace(np.min(x),np.max(x),201) kde = gaussian_kde(x) return x_grid,kde.evaluate(x_grid)
def visual_get_kde(self): mu = self.__mu.ravel() density = gaussian_kde(mu) xs = np.linspace(mu.min(),mu.max(),200) density.covariance_factor = lambda : .25 density._compute_covariance() return xs, density(xs)
def create_x_pdf(self, where=None, color=None): if where is None: where = self.good if color is None: color = '#bbbbbb' xmin, xmax = self.xlim ymin, ymax = self.ylim x = np.linspace(xmin, xmax, 100) x_kde = stats.gaussian_kde(self.x[where]) x_pdf = x_kde(x) x_pdf_n = x_pdf/np.nanmax(x_pdf)*(ymax - ymin)*self._FAC + ymin """ x_pdf = self.crossplot_ax.fill_between(x, x_pdf_n, ymin, color=color, lw=0, alpha=alpha, zorder=-1) """ x_pdf, = self.crossplot_ax.plot(x, x_pdf_n, color=color[:3], alpha=color[-1], zorder=-1) #""" self.x_pdfs.append(x_pdf) self.crossplot_ax.set_xlim(*self.xlim) self.crossplot_ax.set_ylim(*self.ylim)
def create_xy_pdf(self, where=None, color=None): if where is None: where = self.good if color is None: color = '#bbbbbb' xmin, xmax = self.xlim ymin, ymax = self.ylim X, Y = np.mgrid[xmin:xmax:100j, ymin:ymax:100j] positions = np.vstack([X.ravel(), Y.ravel()]) values = np.vstack([self.x[where], self.y[where]]) xy_kde = stats.gaussian_kde(values) xy_pdf = np.reshape(xy_kde(positions), X.shape) """ vmin = np.nanmin(xy_pdf) vmax = np.nanmax(xy_pdf) cmap = matplotlib.cm.gray_r xy_pdf = self.crossplot_ax.contourf(X, Y, xy_pdf, 20, cmap=cmap, vmin=vmin, vmax=2*vmax, alpha=alpha, zorder=-2) """ xy_pdf = self.crossplot_ax.contour(X, Y, xy_pdf, 5, colors=color[:3], alpha=color[-1], zorder=-2) #""" self.xy_pdfs.append(xy_pdf) self.crossplot_ax.set_xlim(*self.xlim) self.crossplot_ax.set_ylim(*self.ylim)
def fit(self, data): self.gkde = stats.gaussian_kde(data.T, self.bw)
def _step(self, action): obs, reward, done, info = self.env.step(action) location = info.get('location') if location is not None: """ self.locations.append(location) if len(self.locations) == self.buffer_size: # rebuild the kde self.kde = stats.gaussian_kde(np.array(self.locations).T, self.bandwidth) # plot it? dims = obs.shape[:2] grid = np.indices(dims) kde = self.kde.logpdf(grid.reshape([2, -1])) kde = kde.reshape(dims) info['kde'] = kde #plt.imsave('test.png', kde) # drop the older locations self.locations = self.locations[self.buffer_size//2:] #plt.imsave('counts.png', self.counts) #info['logprob'] = logprob if self.kde: logpdf = self.kde.logpdf(np.array(location)) info['logpdf'] = logpdf reward -= logpdf """ location = location + self.breadth # padding index = tuple(location.tolist()) patch = extract_patch(self.counts, index, self.breadth) count = (self.kernel * patch).sum() info['log/visits'] = count logprob = np.log(count / self.total) info['log/visit_logprob'] = logprob #reward = 0 bonus = self.explore_scale * (self.logprob - logprob) info['log/explore_bonus'] = np.abs(bonus) reward += bonus self.logprob = logprob if self.decay: self.counts *= self.decay else: self.total += 1 self.counts[index] += 1 return obs, reward, done, info