Python numpy 模块,histogram() 实例源码
我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.histogram()。
def mypsd(Rates,time_range,bin_w = 5., nmax = 4000):
bins = np.arange(0,len(time_range),1)
#print bins
a,b = np.histogram(Rates, bins)
ff = (1./len(bins))*abs(np.fft.fft(Rates- np.mean(Rates)))**2
Fs = 1./(1*0.001)
freq2 = np.fft.fftfreq(len(bins))[0:len(bins/2)+1] # d= dt
freq = np.fft.fftfreq(len(bins))[:len(ff)/2+1]
px = ff[0:len(ff)/2+1]
max_px = np.max(px[1:])
idx = px == max_px
corr_freq = freq[pl.find(idx)]
new_px = px
max_pow = new_px[pl.find(idx)]
return new_px,freq,corr_freq[0],freq2, max_pow
def get_histogram(self, data):
"""
Project the descriptions on to the codebook/vocabulary,
returning the histogram of words
[N x 1] => [1 x K] histogram
"""
if self.method == 'vq' or self.method == 'bow':
code = self.get_code(data)
code_hist = self.bow(data, code, self.K)
elif self.method == 'vlad':
code = self.get_code(data)
code_hist = self.vlad(data, code)
elif self.method == 'fisher':
code = self.get_code(data)
code_hist = self.fisher(data, code)
else:
raise NotImplementedError('''Histogram method %s not implemented. '''
'''Use vq/bow or vlad or fisher!''' % self.method)
return code_hist
def histogram(name, values, bins, collections=None):
# pylint: disable=line-too-long
"""Outputs a `Summary` protocol buffer with a histogram.
The generated
[`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto)
has one summary value containing a histogram for `values`.
This op reports an `InvalidArgument` error if any value is not finite.
Args:
name: A name for the generated node. Will also serve as a series name in
TensorBoard.
values: A real numeric `Tensor`. Any shape. Values to use to
build the histogram.
collections: Optional list of graph collections keys. The new summary op is
added to these collections. Defaults to `[GraphKeys.SUMMARIES]`.
Returns:
A scalar `Tensor` of type `string`. The serialized `Summary` protocol
buffer.
"""
name = _clean_tag(name)
values = makenp(values)
hist = make_histogram(values.astype(float), bins)
return Summary(value=[Summary.Value(tag=name, histo=hist)])
def modeFilter(data, window=500, step=None, bins=None):
"""Filter based on histogram-based mode function"""
d1 = data.view(np.ndarray)
vals = []
l2 = int(window/2.)
if step is None:
step = l2
i = 0
while True:
if i > len(data)-step:
break
vals.append(mode(d1[i:i+window], bins))
i += step
chunks = [np.linspace(vals[0], vals[0], l2)]
for i in range(len(vals)-1):
chunks.append(np.linspace(vals[i], vals[i+1], step))
remain = len(data) - step*(len(vals)-1) - l2
chunks.append(np.linspace(vals[-1], vals[-1], remain))
d2 = np.hstack(chunks)
if (hasattr(data, 'implements') and data.implements('MetaArray')):
return MetaArray(d2, info=data.infoCopy())
return d2
def modeFilter(data, window=500, step=None, bins=None):
"""Filter based on histogram-based mode function"""
d1 = data.view(np.ndarray)
vals = []
l2 = int(window/2.)
if step is None:
step = l2
i = 0
while True:
if i > len(data)-step:
break
vals.append(mode(d1[i:i+window], bins))
i += step
chunks = [np.linspace(vals[0], vals[0], l2)]
for i in range(len(vals)-1):
chunks.append(np.linspace(vals[i], vals[i+1], step))
remain = len(data) - step*(len(vals)-1) - l2
chunks.append(np.linspace(vals[-1], vals[-1], remain))
d2 = np.hstack(chunks)
if (hasattr(data, 'implements') and data.implements('MetaArray')):
return MetaArray(d2, info=data.infoCopy())
return d2
def makedists(pdata,binl):
##### This is called from within makeraindist.
##### Caclulate distributions
pds=pdata.shape; nlat=pds[1]; nlon=pds[0]; nd=pds[2]
bins=np.append(0,binl)
n=np.empty((nlon,nlat,len(binl)))
binno=np.empty(pdata.shape)
for ilon in range(nlon):
for ilat in range(nlat):
# this is the histogram - we'll get frequency from this
thisn,thisbin=np.histogram(pdata[ilon,ilat,:],bins)
n[ilon,ilat,:]=thisn
# these are the bin locations. we'll use these for the amount dist
binno[ilon,ilat,:]=np.digitize(pdata[ilon,ilat,:],bins)
#### Calculate the number of days with non-missing data, for normalization
ndmat=np.tile(np.expand_dims(np.nansum(n,axis=2),axis=2),(1,1,len(bins)-1))
thisppdfmap=n/ndmat
#### Iterate back over the bins and add up all the precip - this will be the rain amount distribution
testpamtmap=np.empty(thisppdfmap.shape)
for ibin in range(len(bins)-1):
testpamtmap[:,:,ibin]=(pdata*(ibin==binno)).sum(axis=2)
thispamtmap=testpamtmap/ndmat
return thisppdfmap,thispamtmap
def makedists(pdata,binl):
##### This is called from within makeraindist.
##### Caclulate distributions
pds=pdata.shape; nlat=pds[1]; nlon=pds[0]; nd=pds[2]
bins=np.append(0,binl)
n=np.empty((nlon,nlat,len(binl)))
binno=np.empty(pdata.shape)
for ilon in range(nlon):
for ilat in range(nlat):
# this is the histogram - we'll get frequency from this
thisn,thisbin=np.histogram(pdata[ilon,ilat,:],bins)
n[ilon,ilat,:]=thisn
# these are the bin locations. we'll use these for the amount dist
binno[ilon,ilat,:]=np.digitize(pdata[ilon,ilat,:],bins)
#### Calculate the number of days with non-missing data, for normalization
ndmat=np.tile(np.expand_dims(np.nansum(n,axis=2),axis=2),(1,1,len(bins)-1))
thisppdfmap=n/ndmat
#### Iterate back over the bins and add up all the precip - this will be the rain amount distribution
testpamtmap=np.empty(thisppdfmap.shape)
for ibin in range(len(bins)-1):
testpamtmap[:,:,ibin]=(pdata*(ibin==binno)).sum(axis=2)
thispamtmap=testpamtmap/ndmat
return thisppdfmap,thispamtmap
def add_column(self, table):
"""Add single column DataFrame to the histogram object.
If multiple columns share the same name, a (n) will be appended to the name, where n is
the next available number.
Args:
:table: (:obj:`dataframe`)
A PySpark DataFrame with a single column
"""
if len(table.columns) > 1:
raise ValueError('More then one column is being added, use add_data() to add multi-column DataFrames')
column_name = table.columns[0]
if not isinstance(table.schema.fields[0].dataType, NumericType):
raise ValueError('Column %s has a non-numeric type (%s), only numeric types are supported'
% (column_name, str(table.schema.fields[0].dataType)))
self.col_list.append((table, column_name))
def to_pandas(self, kind='hist'):
"""Returns a pandas dataframe from the Histogram object.
This function calculates the Histogram function in Spark if it was not done yet.
Args:
:kind: (:obj:`str`, optional):
'hist' or 'density'. When using hist this returns the histogram object
as pandas dataframe. When using density the index contains the bin centers, and the values in the
DataFrame are the scaled values. Defaults to 'hist'
Returns:
A pandas DataFrame from the Histogram object.
"""
self.build()
if kind == 'hist':
return pd.DataFrame(self.hist_dict).set_index([self._get_col_names()])
elif kind == 'density':
result = pd.DataFrame(self.hist_dict).set_index([self._get_bin_centers()])
return result.apply(lambda x: x / x.max(), axis=0)
def add_data(self, data):
"""Ads 1 or more columns to a histogram.
Multiple options are available:
* Add a single column dataframe
* Add a list of single column dataframes
* Add a dataframe with multiple columns
Args:
:data:
A single column Spark dataframe, a list of single column Spark
dataframes, or a multi column Spark dataframe.
"""
if isinstance(data, list):
for df_column in data:
self.add_column(df_column)
elif len(data.columns) > 1:
for col_name in data.columns:
self.add_column(data.select(col_name))
else:
self.add_column(data)
def add_column(self, table):
"""Add single column DataFrame to the histogram object.
If multiple columns share the same name, a (n) will be appended to the name, where n is
the next available number.
Args:
table (:obj:`dataframe`): A pyspark dataframe with a single column
"""
if len(table.columns) > 1:
raise ValueError('More then one column is being added, use add_data() to add multi-column DataFrames')
column_name = table.columns[0]
if not isinstance(table.schema.fields[0].dataType, NumericType):
raise ValueError('Column %s has a non-numeric type (%s), only numeric types are supported'
% (column_name, str(table.schema.fields[0].dataType)))
self.col_list.append((table, column_name))
def to_pandas(self, kind='hist'):
"""Returns a pandas dataframe from the Histogram object.
This function calculates the Histogram function in Spark if it was not done yet.
Args:
kind (:obj:`str`, optional): 'hist' or 'density'. When using hist this returns the histogram object
as pandas dataframe. When using density the index contains the bin centers, and the values in the
dataframe are the scaled values. Defaults to 'hist'
Returns:
A pandas DataFrame from the Histogram object.
"""
self.build()
if kind == 'hist':
return pd.DataFrame(self.hist_dict).set_index([self._get_col_names()])
elif kind == 'density':
result = pd.DataFrame(self.hist_dict).set_index([self._get_bin_centers()])
return result.apply(lambda x: x / x.max(), axis=0)
def add_data(self, data):
"""Ads 1 or more columns to a histogram
Multiple options are available:
* Add a single column dataframe
* Add a list of single column dataframes
* Add a dataframe with multiple columns
Args:
(:obj:`Data`): A single column Spark dataframe, a list of single column Spark
dataframes, or a multi column Spark dataframe.
"""
if isinstance(data, list):
for df_column in data:
self.add_column(df_column)
elif len(data.columns) > 1:
for col_name in data.columns:
self.add_column(data.select(col_name))
else:
self.add_column(data)
def calculate_plane_histogram(plane, doseplane, dosegridpoints,
maxdose, dd, id, structure, hist):
"""Calculate the DVH for the given plane in the structure."""
contours = [[x[0:2] for x in c['data']] for c in plane]
# If there is no dose for the current plane, go to the next plane
if not len(doseplane):
return (np.arange(0, maxdose), 0)
# Create a zero valued bool grid
grid = np.zeros((dd['rows'], dd['columns']), dtype=np.uint8)
# Calculate the histogram for each contour in the plane
# and boolean xor to remove holes
for i, contour in enumerate(contours):
m = get_contour_mask(dd, id, dosegridpoints, contour)
grid = np.logical_xor(m.astype(np.uint8), grid).astype(np.bool)
hist, vol = calculate_contour_dvh(
grid, doseplane, maxdose, dd, id, structure)
return (hist, vol)
def calculate_contour_dvh(mask, doseplane, maxdose, dd, id, structure):
"""Calculate the differential DVH for the given contour and dose plane."""
# Multiply the structure mask by the dose plane to get the dose mask
mask = ma.array(doseplane * dd['dosegridscaling'] * 100, mask=~mask)
# Calculate the differential dvh
hist, edges = np.histogram(mask.compressed(),
bins=maxdose,
range=(0, maxdose))
# Calculate the volume for the contour for the given dose plane
vol = sum(hist) * ((id['pixelspacing'][0]) *
(id['pixelspacing'][1]) *
(structure['thickness']))
return hist, vol
# ========================== Test DVH Calculation =========================== #
def rdf(coords, bins=100, r_max=None):
"""
Radial distribution function
Parameters
----------
coords :
list of coordinate arrays
bins : int or numpy array
distance bins
r_max : positive float or None
maximum distance
"""
if np.ndim(coords) == 2: coords = [coords]
d = np.sqrt(np.concatenate(map(calc_distances, coords), 0))
if r_max is not None: d = d[d<r_max]
g, bins = np.histogram(d, bins=bins)
r = 0.5 * (bins[1:]+bins[:-1])
return r, g/r**2
def get_hist_val(self, var_value):
"""Get bin count for bin by value of histogram variable
:param var_value: a specific value to find corresponding bin.
:returns: bin counter value
:rtype: int
"""
try:
bin_label = self.value_to_bin_label(var_value)
except Exception as exc:
self.log().error(
'bin label for variable value "%s" not found (%s)',
str(var_value),
exc.message)
return 0
return self.get_bin_count(bin_label)
def to_normalized(self, **kwargs):
"""Return a normalized copy of this histogram
:param str new_var_name: assign new variable name
:param list variable_range: variable range used for finding the right bins to get values from.
:param bool combine_values: if bin_specs is not set, combine existing bin labels with variable range.
"""
# convert to normalized histogram
new_var_name = str(kwargs.pop('variable', self.variable))
bin_vals = self.get_bin_vals(**kwargs)
values = np.float64(bin_vals[0]) / bin_vals[0].sum()
# When values is a numpy array of 1 element np.float64() returns a 0-dimensional array. See
# https://github.com/numpy/numpy/issues/3161. The following
# if-statement is a workaround for this issue.
if not values.shape:
values = values.reshape((1,))
return Histogram(counts=(values, bin_vals[1]), variable=new_var_name)
def _from_numpy(self, counts, bin_edges):
"""Create Histogram from NumPy-style histogram
:param array counts: numpy histogram counts array
:param array bin_edges: bin edges
"""
# initialize from NumPy-style histogram
_check_num_vals(counts)
if len(counts) == len(bin_edges) - 1:
# interpret specified variable values as bin edges
del self._bin_specs
self.bin_specs = {'bin_edges': list(bin_edges)}
bin_edges = list(range(len(counts)))
elif len(counts) != len(bin_edges):
# cannot interpret specified variable values as bin values
self.log().critical('numbers of specified variable values (%d) and value counts (%d) do not match',
len(bin_edges), len(counts))
raise AssertionError('specified variable values and value counts do not match')
self._val_counts = ValueCounts((self.variable,), (self.variable,),
dict(((v,), c) for c, v in zip(counts, bin_edges)))
def to_root_hist(histogram, **kwargs):
"""Convert Eskapade histogram to root histogram
Input Eskapade histogram first gets converted to a numpy histogram,
which is then converted to a root histogram. All kwargs besides the
input histograms are passed on to histogram.get_bin_vals(), which makes
the numpy histogram.
:param histogram: input Eskapade histogram
:returns: root histogram
:rtype: ROOT.TH1
"""
if not isinstance(histogram, Histogram):
raise TypeError('histogram not of type %s' % Histogram)
# convert to ROOT histogram
new_var_name = str(kwargs.pop('variable', histogram.variable))
return bin_vals_to_hist(histogram.get_bin_vals(**kwargs), var_name=new_var_name)
def hist_to_bin_vals(hist):
"""Convert root histogram to numpy bin_vals
Create bin_counts and bin_edges lists, similar to np.histogram()
function.
:param ROOT.TH1 hist: input root histogram, assumed to be 1-dimensional.
:returns: two comma-separated arrays: bin_entries, bin_edges
"""
# check input type
assert isinstance(hist, ROOT.TH1), 'root hist needs to be 1-dimensional'
# create bin_counts and bin_edges lists, similar to np.histogram() function
bin_entries = []
bin_edges = []
n_bins = hist.GetNbinsX()
for i in range(n_bins):
bin_entries.append(hist.GetBinContent(i + 1))
bin_edges.append(hist.GetBinLowEdge(i + 1))
bin_edges.append(hist.GetBinLowEdge(n_bins + 1))
return bin_entries, bin_edges
def plot_entropy_distribution():
fig = plt.figure()
ax = fig.add_subplot(111)
entropy = read_pickle('output/normalized_entropy.obj')
hist, bin_edges = np.histogram(entropy, bins=10000)
print hist, bin_edges
#ax.set_yscale('log')
#ax.set_xscale('log')
ax.plot(bin_edges[:-1], hist, marker='o', markersize=3, markeredgecolor='none', color='#D65F5F')
#ax.set_ylim([10**0, 10**6])
#ax.set_xlim([10**0, 10**6])
ax.set_xlabel('Entropy')
ax.set_ylabel('Frequency')
fig.tight_layout()
fig.savefig( 'output/normalized_entropy_distribution.pdf', bbox_inches='tight')
def test_outliers(self):
# Check that outliers are not tallied
a = np.arange(10) + .5
# Lower outliers
h, b = histogram(a, range=[0, 9])
assert_equal(h.sum(), 9)
# Upper outliers
h, b = histogram(a, range=[1, 10])
assert_equal(h.sum(), 9)
# Normalization
h, b = histogram(a, range=[1, 9], normed=True)
assert_almost_equal((h * diff(b)).sum(), 1, decimal=15)
# Weights
w = np.arange(10) + .5
h, b = histogram(a, range=[1, 9], weights=w, normed=True)
assert_equal((h * diff(b)).sum(), 1)
h, b = histogram(a, bins=8, range=[1, 9], weights=w)
assert_equal(h, w[1:-1])
def test_simple(self):
"""
Straightforward testing with a mixture of linspace data (for
consistency). All test values have been precomputed and the values
shouldn't change
"""
# Some basic sanity checking, with some fixed data.
# Checking for the correct number of bins
basic_test = {50: {'fd': 4, 'scott': 4, 'rice': 8, 'sturges': 7,
'doane': 8, 'sqrt': 8, 'auto': 7},
500: {'fd': 8, 'scott': 8, 'rice': 16, 'sturges': 10,
'doane': 12, 'sqrt': 23, 'auto': 10},
5000: {'fd': 17, 'scott': 17, 'rice': 35, 'sturges': 14,
'doane': 17, 'sqrt': 71, 'auto': 17}}
for testlen, expectedResults in basic_test.items():
# Create some sort of non uniform data to test with
# (2 peak uniform mixture)
x1 = np.linspace(-10, -1, testlen // 5 * 2)
x2 = np.linspace(1, 10, testlen // 5 * 3)
x = np.concatenate((x1, x2))
for estimator, numbins in expectedResults.items():
a, b = np.histogram(x, estimator)
assert_equal(len(a), numbins, err_msg="For the {0} estimator "
"with datasize of {1}".format(estimator, testlen))
def test_small(self):
"""
Smaller datasets have the potential to cause issues with the data
adaptive methods, especially the FD method. All bin numbers have been
precalculated.
"""
small_dat = {1: {'fd': 1, 'scott': 1, 'rice': 1, 'sturges': 1,
'doane': 1, 'sqrt': 1},
2: {'fd': 2, 'scott': 1, 'rice': 3, 'sturges': 2,
'doane': 1, 'sqrt': 2},
3: {'fd': 2, 'scott': 2, 'rice': 3, 'sturges': 3,
'doane': 3, 'sqrt': 2}}
for testlen, expectedResults in small_dat.items():
testdat = np.arange(testlen)
for estimator, expbins in expectedResults.items():
a, b = np.histogram(testdat, estimator)
assert_equal(len(a), expbins, err_msg="For the {0} estimator "
"with datasize of {1}".format(estimator, testlen))
def test_outlier(self):
"""
Check the FD, Scott and Doane with outliers.
The FD estimates a smaller binwidth since it's less affected by
outliers. Since the range is so (artificially) large, this means more
bins, most of which will be empty, but the data of interest usually is
unaffected. The Scott estimator is more affected and returns fewer bins,
despite most of the variance being in one area of the data. The Doane
estimator lies somewhere between the other two.
"""
xcenter = np.linspace(-10, 10, 50)
outlier_dataset = np.hstack((np.linspace(-110, -100, 5), xcenter))
outlier_resultdict = {'fd': 21, 'scott': 5, 'doane': 11}
for estimator, numbins in outlier_resultdict.items():
a, b = np.histogram(outlier_dataset, estimator)
assert_equal(len(a), numbins)
def _hist_bin_sqrt(x):
"""
Square root histogram bin estimator.
Bin width is inversely proportional to the data size. Used by many
programs for its simplicity.
Parameters
----------
x : array_like
Input data that is to be histogrammed, trimmed to range. May not
be empty.
Returns
-------
h : An estimate of the optimal bin width for the given data.
"""
return x.ptp() / np.sqrt(x.size)
def _hist_bin_sturges(x):
"""
Sturges histogram bin estimator.
A very simplistic estimator based on the assumption of normality of
the data. This estimator has poor performance for non-normal data,
which becomes especially obvious for large data sets. The estimate
depends only on size of the data.
Parameters
----------
x : array_like
Input data that is to be histogrammed, trimmed to range. May not
be empty.
Returns
-------
h : An estimate of the optimal bin width for the given data.
"""
return x.ptp() / (np.log2(x.size) + 1.0)
def _hist_bin_rice(x):
"""
Rice histogram bin estimator.
Another simple estimator with no normality assumption. It has better
performance for large data than Sturges, but tends to overestimate
the number of bins. The number of bins is proportional to the cube
root of data size (asymptotically optimal). The estimate depends
only on size of the data.
Parameters
----------
x : array_like
Input data that is to be histogrammed, trimmed to range. May not
be empty.
Returns
-------
h : An estimate of the optimal bin width for the given data.
"""
return x.ptp() / (2.0 * x.size ** (1.0 / 3))
def _hist_bin_scott(x):
"""
Scott histogram bin estimator.
The binwidth is proportional to the standard deviation of the data
and inversely proportional to the cube root of data size
(asymptotically optimal).
Parameters
----------
x : array_like
Input data that is to be histogrammed, trimmed to range. May not
be empty.
Returns
-------
h : An estimate of the optimal bin width for the given data.
"""
return (24.0 * np.pi**0.5 / x.size)**(1.0 / 3.0) * np.std(x)
def calc_information_sampling(data, bins, pys1, pxs, label, b, b1, len_unique_a, p_YgX, unique_inverse_x,
unique_inverse_y, calc_DKL=False):
bins = bins.astype(np.float32)
num_of_bins = bins.shape[0]
# bins = stats.mstats.mquantiles(np.squeeze(data.reshape(1, -1)), np.linspace(0,1, num=num_of_bins))
# hist, bin_edges = np.histogram(np.squeeze(data.reshape(1, -1)), normed=True)
digitized = bins[np.digitize(np.squeeze(data.reshape(1, -1)), bins) - 1].reshape(len(data), -1)
b2 = np.ascontiguousarray(digitized).view(
np.dtype((np.void, digitized.dtype.itemsize * digitized.shape[1])))
unique_array, unique_inverse_t, unique_counts = \
np.unique(b2, return_index=False, return_inverse=True, return_counts=True)
p_ts = unique_counts / float(sum(unique_counts))
PXs, PYs = np.asarray(pxs).T, np.asarray(pys1).T
if calc_DKL:
pxy_given_T = np.array(
[calc_probs(i, unique_inverse_t, label, b, b1, len_unique_a) for i in range(0, len(unique_array))]
)
p_XgT = np.vstack(pxy_given_T[:, 0])
p_YgT = pxy_given_T[:, 1]
p_YgT = np.vstack(p_YgT).T
DKL_YgX_YgT = np.sum([inf_ut.KL(c_p_YgX, p_YgT.T) for c_p_YgX in p_YgX.T], axis=0)
H_Xgt = np.nansum(p_XgT * np.log2(p_XgT), axis=1)
local_IXT, local_ITY = calc_information_from_mat(PXs, PYs, p_ts, digitized, unique_inverse_x, unique_inverse_y,
unique_array)
return local_IXT, local_ITY
def fit_koff(nmax=523, NN=4e8, **params):
tbind = params.pop("tbind")
params["kd"] = 1e9/tbind
dx = params.pop("dx")
rw = randomwalk.get_rw(NAME, params, setup=setup_rw, calc=True)
rw.domains[1].dx = dx
times = draw_empirically(rw, N=NN, nmax=nmax, success=False)
bins = np.logspace(np.log10(min(times)), np.log10(max(times)), 35)
#bins = np.logspace(-3., 2., 35)
hist, _ = np.histogram(times, bins=bins)
cfd = np.cumsum(hist)/float(np.sum(hist))
t = 0.5*(bins[:-1] + bins[1:])
tmean = times.mean()
toff = NLS(t, cfd, t0=tmean)
koff = 1./toff
return dict(t=t, cfd=cfd, toff=toff, tmean=tmean, koff=koff)
##### run rw in collect mode and draw bindings from empirical distributions
def compute_normal_histograms(normal_cloud):
norm_x_vals = []
norm_y_vals = []
norm_z_vals = []
numBins = 64
for norm_component in pc2.read_points(normal_cloud,
field_names = ('normal_x', 'normal_y', 'normal_z'),
skip_nans=True):
norm_x_vals.append(norm_component[0])
norm_y_vals.append(norm_component[1])
norm_z_vals.append(norm_component[2])
# Compute histograms for the normals in the point cloud
norm1_hist = np.histogram(norm_x_vals, bins=numBins, range=(0, 256))
norm2_hist = np.histogram(norm_y_vals, bins=numBins, range=(0, 256))
norm3_hist = np.histogram(norm_z_vals, bins=numBins, range=(0, 256))
# Concatenate and normalize the histograms
norm_hist_features = np.concatenate((norm1_hist[0],norm2_hist[0], norm3_hist[0])).astype(np.float64)
norm_features = norm_hist_features / np.sum(norm_hist_features)
return norm_features
def build_histogram(feature_id, bins=50):
feature = Feature.objects.get(pk=feature_id)
if feature.is_categorical:
bins = len(feature.categories)
# Only read column with that name
dataframe = _get_dataframe(feature.dataset.id)
bin_set = []
bins, bin_edges = np.histogram(dataframe[feature.name], bins=bins)
for bin_index, bin_value in enumerate(bins):
from_value = bin_edges[bin_index]
to_value = bin_edges[bin_index + 1]
bin = Bin(
feature=feature,
from_value=from_value,
to_value=to_value,
count=bin_value
)
bin_set.append(bin)
Bin.objects.bulk_create(bin_set)
del bins, bin_edges, bin_set
def from_data(cls, data, binsize=1):
"""Initialization for a DVH from raw data.
Parameters
----------
data : iterable or numpy array
An iterable of dose data that is used to create the histogram
binsize : int, optional
Bin width size (in cGy used to create the histogram)
"""
data = np.array(data)
bins = np.arange(0, data.max() + 1, binsize)
if bins.size == 1:
bins = np.array([0, data.max()])
if data.max() not in bins:
bins = np.append(bins, data.max())
counts, bins = np.histogram(data, bins)
return cls(counts, bins)
def density(x, nbins, normalize=True):
"""
Histogram of univariate input data: basically calls numpy's histogram method and
does a proper normalization.
@param x: input numpy array
@param nbins: number of bins
@type nbins: integer
@param normalize: if true, histogram will be normalized
"""
from numpy import histogram
hy, hx = histogram(x, nbins)
hx = 0.5 * (hx[1:] + hx[:-1])
hy = hy.astype('d')
if normalize:
hy /= (hx[1] - hx[0]) * hy.sum()
return hx, hy
def log_histogram(self, name, value, step=None):
"""Log a histogram for given name on given step.
Args:
name (str): name of the variable (it will be converted to a valid
tensorflow summary name).
value (tuple or list): either list of numbers
to be summarized as a histogram, or a tuple of bin_edges and
bincounts that directly define a histogram.
step (int): non-negative integer used for visualization
"""
if isinstance(value, six.string_types):
raise TypeError('"value" should be a number, got {}'
.format(type(value)))
self._check_step(step)
tf_name = self._ensure_tf_name(name)
summary = self._histogram_summary(tf_name, value, step=step)
self._log_summary(tf_name, summary, value, step=step)
def estimate_basket_length(baskets):
basket_lengths = list()
basket_ids = baskets['data']
for basket_id in basket_ids:
basket = baskets['data'][basket_id]['basket']
basket_len = len(basket)
basket_lengths.append(basket_len)
if len(basket_lengths) <= 10:
return int(np.round(np.median(basket_lengths)))
nbr_bins = np.round(estimate_nbr_bins(basket_lengths))
val, bins = np.histogram(basket_lengths, bins=nbr_bins)
ebl = int(np.round(bins[np.argmax(val)]))
ebl = ebl + 1 if ebl == 1 else ebl
return ebl
def estimate_month_basket_length(baskets):
month_basket_lenght = [[] for x in xrange(12)]
basket_ids = baskets['data']
for basket_id in basket_ids:
date_object = datetime.datetime.strptime(basket_id[0:10], '%Y_%m_%d')
basket = baskets['data'][basket_id]['basket']
month_id = date_object.month - 1
basket_len = len(basket)
month_basket_lenght[month_id].append(basket_len)
month_ebl = list()
for month_id in xrange(12):
nbr_bins = estimate_nbr_bins(month_basket_lenght[month_id])
nbr_bins = np.round(nbr_bins)
val, bins = np.histogram(month_basket_lenght[month_id], bins=nbr_bins)
mebl = int(np.round(bins[np.argmax(val)]))
mebl = mebl + 1 if mebl == 1 else mebl
month_ebl.append(mebl)
return month_ebl
def generate_data(sample_size=200, pd=[[0.4, 0.4], [0.1, 0.1]]):
pd = np.array(pd)
pd /= pd.sum()
offset = 50
bins = np.r_[np.zeros((1,)), np.cumsum(pd)]
bin_counts = np.histogram(np.random.rand(sample_size), bins)[0]
data = np.empty((0, 2))
targets = []
for ((i, j), p), count in zip(np.ndenumerate(pd), bin_counts):
xs = np.random.uniform(low=0.0, high=50.0, size=count) + j * offset
ys = np.random.uniform(low=0.0, high=50.0, size=count) + -i * offset
data = np.vstack((data, np.c_[xs, ys]))
if i == j:
targets.extend([1] * count)
else:
targets.extend([-1] * count)
return np.c_[data, targets]
def get_mode_pth_from_array(posterior, tuningcurve=None):
"""If tuningcurve is provided, then we map it back to the external coordinates / units.
Otherwise, we stay in the bin space."""
n_xbins = posterior.shape[0]
if tuningcurve is None:
xmin = 0
xmax = n_xbins
else:
# TODO: this only works for TuningCurve1D currently
if isinstance(tuningcurve, auxiliary.TuningCurve1D):
xmin = tuningcurve.bins[0]
xmax = tuningcurve.bins[-1]
else:
raise TypeError("tuningcurve type not yet supported!")
_, bins = np.histogram([], bins=n_xbins, range=(xmin,xmax))
xbins = (bins + xmax/n_xbins)[:-1]
mode_pth = np.argmax(posterior, axis=0)*xmax/n_xbins
mode_pth = np.where(np.isnan(posterior.sum(axis=0)), np.nan, mode_pth)
return mode_pth
def get_mean_pth_from_array(posterior, tuningcurve=None):
"""If tuningcurve is provided, then we map it back to the external coordinates / units.
Otherwise, we stay in the bin space."""
n_xbins = posterior.shape[0]
if tuningcurve is None:
xmin = 0
xmax = 1
else:
# TODO: this only works for TuningCurve1D currently
if isinstance(tuningcurve, auxiliary.TuningCurve1D):
xmin = tuningcurve.bins[0]
xmax = tuningcurve.bins[-1]
else:
raise TypeError("tuningcurve type not yet supported!")
_, bins = np.histogram([], bins=n_xbins, range=(xmin,xmax))
xbins = (bins + xmax/n_xbins)[:-1]
mean_pth = (xbins * posterior.T).sum(axis=1)
return mean_pth
def generateHistogram(self):
# 10 equal-width bins computed on all the data
if not self.has_true_labels:
hist, bin_edges = np.histogram(self.plot_datasets['all'].values, bins = 10, density = False)
else:
hist, bin_edges = np.histogram(self.plot_datasets['malicious'].values, bins = 10, density = False)
x_labels = [str(bin_edges[e]) + ' - ' + str(bin_edges[e+1]) for e in range(len(bin_edges)-1)]
barplot = BarPlot(x_labels)
for label, dataset in self.plot_datasets.iteritems():
hist, bin_edges = np.histogram(dataset.values, bins = bin_edges, density = False)
hist_dataset = PlotDataset(hist, dataset.label)
hist_dataset.setColor(dataset.color)
barplot.addDataset(hist_dataset)
output_filename = self.output_directory + 'histogram.json'
with open(output_filename, 'w') as f:
barplot.exportJson(f)
def reorganize_histogram_data(self, data):
data_x, data_y = data
try:
data_x = float(data_x)
except:
logger.error("Channel X Must be Scalar Data")
try:
data_y = data_y.flatten()
bins = 10 # default bins
if "BINS" in self.params:
bins = self.params['BINS']
data_y = np.histogram(data_y, bins)
except:
logger.error("Channel Y Must be Numpy Array")
return (data_x, data_y)
def plot(data=None, x=None, y=None, hue=None, kind='line',
offset=0.75, cmap='Dark2', smooth=1, order=None, bins=10, weights=None, figsize=None):
'''
Create 'Joy Plot':
data (pd.DataFrame): DataFrame holding all data
x (str) : DataFrame column to use as x value
y (str) : DataFrame column to use as y values
hue (str): DataFrame column to use to group data
kind (str): specify plot type; line or hist
offset (int/float): vertical seperation between plots
cmap (str/list): name of matplotlib cmap, or list
of colors to be used for plots
smooth (int): smoothing window, if smoothing to be applied
order (list): order of categories - top to bottom
bins (int/list): bins if using hist. int for all hists to have same bins
else list of bin no. for each hist
weights (boolean/list): should the histogram be weighted?
'''
plotter = _pyjoyplotter(data=data, x=x, y=y, hue=hue,
offset=offset, cmap=cmap, smooth=smooth, kind=kind,
order=order, bins=bins, weights=weights, figsize=figsize)
return plotter._plot()
def classify(self, image):
"""
Given a 28x28 image, returns an array representing the 2 highest
probable prediction
:param image:
:return: array of 2 highest prob-digit tuples
"""
if cv2.__version__[0] == '2':
res = self.model.find_nearest(np.array([self.feature(image)]), k=11)
else:
res = self.model.findNearest(np.array([self.feature(image)]), k=11)
hist = np.histogram(res[2], bins=9, range=(1, 10), normed=True)[0]
zipped = sorted(zip(hist, np.arange(1, 10)), reverse=True)
return np.array(zipped[:2])
def length_histogram(fqin, name):
'''
Create a histogram, and return the bin edges of the bin containing the most reads
'''
logging.info("Creating length histogram to find bin with most reads.")
lengths = get_lengths(fqin)
plt.hist(lengths, bins='auto')
plt.savefig(name, format='png', dpi=100)
plt.close("all")
hist, bin_edges = np.histogram(lengths, bins='auto')
maxindex = np.argmax(hist)
return (bin_edges[maxindex], bin_edges[maxindex + 1])
def test_against_numpy(self):
source = [np.random.random((16, 12, 5)) for _ in range(10)]
stack = np.stack(source, axis = -1)
bins = np.linspace(0, 1, num = 10)
from_numpy = np.histogram(stack, bins = bins)[0]
from_ihistogram = last(ihistogram(source, bins = bins))
# Since histogram output is int, cannot use allclose
self.assertTrue(np.all(np.equal(from_numpy, from_ihistogram)))
def ihistogram(arrays, bins):
"""
Streaming histogram calculation.
Parameters
----------
arrays : iterable of ndarrays
Arrays to be combined. This iterable can also a generator. Arrays in this stream
can be of any shape; the histogram is computed over the flattened array.
bins : iterable
Bin edges, including the rightmost edge, allowing for non-uniform bin widths.
Yields
------
hist : `~numpy.ndarray`
Streamed histogram.
See Also
--------
numpy.histogram : 1D histogram of dense arrays.
"""
# TODO: weights
bins = np.asarray(bins)
# np.histogram also returns the bin edges, which we ignore
hist_func = lambda arr: np.histogram(arr, bins = bins)[0]
hist = hist_func(next(arrays))
yield hist
for arr in arrays:
hist += hist_func(arr)
yield hist
def gradient_histogram(flow_img, binsize=12):
""" calculate histogram """
assert len(flow_img.shape) == 3, "Wrong flow image."
# NOTE the frame is in RGB, while cv2 is in BGR, so do REMEMBER to reverse it.
img_mag, img_v, img_u = np.split(flow_img, 3, 2)
# NOTE the role reversal: the "y-coordinate" is the first function parameter, the "x-coordinate" is the second.
# NOTE that we use same axis configure as image axis(x is larger to the right, y is larger to the bottom),
# so add a minus sign before img_v, to make the two axis align.
orientation = np.arctan2(-img_v, img_u)
# Original result not applicable
# Directly use full 360 degree
new_orient = orientation
# Prune zero motion
_mag_greater_zero = img_mag > 0.0
pruned_orient = new_orient[_mag_greater_zero]
# Histogram of optical flow
hofbins = np.arange(-math.pi, math.pi+1e-6, 2*math.pi/binsize)
hist, bin_edges = np.histogram(pruned_orient.flatten(), bins= hofbins) #, density=True)
# Normalize
hist = hist.astype(np.float32) / (np.sum(_mag_greater_zero) + 1e-6)
return hist, bin_edges