Python numpy 模块,number() 实例源码
我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.number()。
def _check_annotations(value):
"""
Recursively check that value is either of a "simple" type (number, string,
date/time) or is a (possibly nested) dict, list or numpy array containing
only simple types.
"""
if isinstance(value, np.ndarray):
if not issubclass(value.dtype.type, ALLOWED_ANNOTATION_TYPES):
raise ValueError("Invalid annotation. NumPy arrays with dtype %s"
"are not allowed" % value.dtype.type)
elif isinstance(value, dict):
for element in value.values():
_check_annotations(element)
elif isinstance(value, (list, tuple)):
for element in value:
_check_annotations(element)
elif not isinstance(value, ALLOWED_ANNOTATION_TYPES):
raise ValueError("Invalid annotation. Annotations of type %s are not"
"allowed" % type(value))
def _check_annotations(value):
"""
Recursively check that value is either of a "simple" type (number, string,
date/time) or is a (possibly nested) dict, list or numpy array containing
only simple types.
"""
if isinstance(value, np.ndarray):
if not issubclass(value.dtype.type, ALLOWED_ANNOTATION_TYPES):
raise ValueError("Invalid annotation. NumPy arrays with dtype %s"
"are not allowed" % value.dtype.type)
elif isinstance(value, dict):
for element in value.values():
_check_annotations(element)
elif isinstance(value, (list, tuple)):
for element in value:
_check_annotations(element)
elif not isinstance(value, ALLOWED_ANNOTATION_TYPES):
raise ValueError("Invalid annotation. Annotations of type %s are not"
"allowed" % type(value))
def linear_trajectory_to(self, target_tf, traj_len):
"""Creates a trajectory of poses linearly interpolated from this tf to a target tf.
Parameters
----------
target_tf : :obj:`RigidTransform`
The RigidTransform to interpolate to.
traj_len : int
The number of RigidTransforms in the returned trajectory.
Returns
-------
:obj:`list` of :obj:`RigidTransform`
A list of interpolated transforms from this transform to the target.
"""
if traj_len < 0:
raise ValueError('Traj len must at least 0')
delta_t = 1.0 / (traj_len + 1)
t = 0.0
traj = []
while t < 1.0:
traj.append(self.interpolate_with(target_tf, t))
t += delta_t
traj.append(target_tf)
return traj
def drop_inconsistent_keys(self, columns, obj):
"""Drop inconsistent keys
Drop inconsistent keys from a ValueCounts or Histogram object.
:param list columns: columns key to retrieve desired datatypes
:param object obj: ValueCounts or Histogram object to drop inconsistent keys from
"""
# has array been converted first? if so, set correct comparison
# datatype
comp_dtype = []
for col in columns:
dt = np.dtype(self.var_dtype[col]).type()
is_converted = isinstance(
dt, np.number) or isinstance(
dt, np.datetime64)
if is_converted:
comp_dtype.append(np.int64)
else:
comp_dtype.append(self.var_dtype[col])
# keep only keys of types in comp_dtype
obj.remove_keys_of_inconsistent_type(prefered_key_type=comp_dtype)
return obj
def categorize_columns(self, df):
"""Categorize columns of dataframe by data type
:param df: input (pandas) data frame
"""
# check presence and data type of requested columns
# sort columns into numerical, timestamp and category based
for c in self.columns:
for col in c:
if col not in df.columns:
raise KeyError('column "{0:s}" not in dataframe "{1:s}"'.format(col, self.read_key))
dt = self.get_data_type(df, col)
if col not in self.var_dtype:
self.var_dtype[col] = dt.type
if (self.var_dtype[col] is np.string_) or (self.var_dtype[col] is np.object_):
self.var_dtype[col] = str
if not any(dt in types for types in (STRING_SUBSTR, NUMERIC_SUBSTR, TIME_SUBSTR)):
raise TypeError('cannot process column "{0:s}" of data type "{1:s}"'.format(col, str(dt)))
is_number = isinstance(dt.type(), np.number)
is_timestamp = isinstance(dt.type(), np.datetime64)
colset = self.num_cols if is_number else self.dt_cols if is_timestamp else self.str_cols
if col not in colset:
colset.append(col)
self.log().debug('Data type of column "%s" is "%s"', col, self.var_dtype[col])
def test_ticket_1539(self):
dtypes = [x for x in np.typeDict.values()
if (issubclass(x, np.number)
and not issubclass(x, np.timedelta64))]
a = np.array([], dtypes[0])
failures = []
# ignore complex warnings
with warnings.catch_warnings():
warnings.simplefilter('ignore', np.ComplexWarning)
for x in dtypes:
b = a.astype(x)
for y in dtypes:
c = a.astype(y)
try:
np.dot(b, c)
except TypeError:
failures.append((x, y))
if failures:
raise AssertionError("Failures: %r" % failures)
def round(self, decimals=0, out=None):
"""
Return an array rounded a to the given number of decimals.
Refer to `numpy.around` for full documentation.
See Also
--------
numpy.around : equivalent function
"""
result = self._data.round(decimals=decimals, out=out).view(type(self))
if result.ndim > 0:
result._mask = self._mask
result._update_from(self)
elif self._mask:
# Return masked when the scalar is masked
result = masked
# No explicit output: we're done
if out is None:
return result
if isinstance(out, MaskedArray):
out.__setmask__(self._mask)
return out
def setup_class(cls):
# Load a dataframe
dataframe = pd.read_csv('tests/data/decathlon.csv', index_col=0)
# Determine the categorical columns
cls.df_categorical = dataframe.select_dtypes(exclude=[np.number])
# Determine the numerical columns
cls.df_numeric = dataframe.drop(cls.df_categorical.columns, axis='columns')
# Determine the size of the numerical part of the dataframe
(cls.n, cls.p) = cls.df_numeric.shape
# Determine the covariance matrix
X = cls.df_numeric.copy()
cls.center_reduced = ((X - X.mean()) / X.std()).values
cls.cov = cls.center_reduced.T @ cls.center_reduced
# Calculate a full PCA
cls.n_components = len(cls.df_numeric.columns)
cls.pca = PCA(dataframe, n_components=cls.n_components, scaled=True)
def _filter(self, dataframe, supplementary_row_names, supplementary_column_names):
# Extract the categorical columns
self.categorical_columns = dataframe.select_dtypes(exclude=[np.number])
# Extract the supplementary rows
self.supplementary_rows = dataframe.loc[supplementary_row_names].copy()
self.supplementary_rows.drop(supplementary_column_names, axis=1, inplace=True)
# Extract the supplementary columns
self.supplementary_columns = dataframe[supplementary_column_names].copy()
self.supplementary_columns.drop(supplementary_row_names, axis=0, inplace=True)
# Remove the the supplementary columns and rows from the dataframe
dataframe.drop(supplementary_row_names, axis=0, inplace=True)
dataframe.drop(supplementary_column_names, axis=1, inplace=True)
def _filter(self, dataframe, supplementary_row_names, supplementary_column_names):
# Extract the categorical columns
self.categorical_columns = dataframe.select_dtypes(exclude=[np.number])
# Extract the supplementary rows
self.supplementary_rows = dataframe.loc[supplementary_row_names].copy()
self.supplementary_rows.drop(self.categorical_columns.columns, axis='columns', inplace=True)
# Extract the supplementary columns
self.supplementary_columns = dataframe[supplementary_column_names].copy()
self.supplementary_columns.drop(supplementary_row_names, axis='rows', inplace=True)
# Remove the categorical column and the supplementary columns and rows from the dataframe
dataframe.drop(supplementary_row_names, axis='rows', inplace=True)
dataframe.drop(supplementary_column_names, axis='columns', inplace=True)
dataframe.drop(self.categorical_columns.columns, axis='columns', inplace=True)
def __init__(self, bin_type, *repr_args):
"""
Constructor for a bin object.
:param id: identifier (e.g. bin number) of the bin
:param bin_type: "numerical" or "categorical"
:param repr_args: arguments to represent this bin.
args for numerical bin includes lower, upper, lower_closed, upper_closed
args for categorical bin includes a list of categories for this bin.
"""
if bin_type == "numerical" and len(repr_args) != 4:
raise ValueError("args for numerical bin are lower, upper, lower_closed, upper_closed.")
if bin_type == "categorical" and len(repr_args) != 1 and type(repr_args[0]) is not list:
raise ValueError("args for categorical bin is a list of categorical values for this bin.")
self.bin_type = bin_type
if bin_type == "numerical":
self.representation = NumericalRepresentation(*repr_args)
elif bin_type == "categorical":
self.representation = CategoricalRepresentation(*repr_args)
def _get_power(mean1, std1, n1, mean2, std2, n2, z_1_minus_alpha):
"""
Compute statistical power.
This is a helper function for compute_statistical_power(x, y, alpha=0.05)
Args:
mean1 (float): mean value of the treatment distribution
std1 (float): standard deviation of the treatment distribution
n1 (integer): number of samples of the treatment distribution
mean2 (float): mean value of the control distribution
std2 (float): standard deviation of the control distribution
n2 (integer): number of samples of the control distribution
z_1_minus_alpha (float): critical value for significance level alpha. That is, z-value for 1-alpha.
Returns:
float: statistical power --- that is, the probability of a test to detect an effect,
if the effect actually exists.
"""
effect_size = mean1 - mean2
std = pooled_std(std1, n1, std2, n2)
tmp = (n1 * n2 * effect_size**2) / ((n1 + n2) * std**2)
z_beta = z_1_minus_alpha - np.sqrt(tmp)
beta = stats.norm.cdf(z_beta)
power = 1 - beta
return power
def test_import_trajectory_interp_nans(self):
fields = ['mdy', 'hms', 'lat', 'long', 'ell_ht', 'ortho_ht', 'num_sats', 'pdop']
df = ti.import_trajectory(os.path.abspath('tests/sample_trajectory.txt'),
columns=fields, skiprows=1, timeformat='hms',
interp=True)
# Test and verify an arbitrary line of data against the same line in the pandas DataFrame
line11 = ['3/22/2017', '9:59:00.20', 76.5350241071, -68.7218956324, 65.898, 82.778, 11, 2.00]
sample_line = dict(zip(fields, line11))
np.testing.assert_almost_equal(df.lat[10], sample_line['lat'], decimal=10)
np.testing.assert_almost_equal(df.long[10], sample_line['long'], decimal=10)
numeric = df.select_dtypes(include=[np.number])
# check whether NaNs were interpolated for numeric type fields
self.assertTrue(numeric.iloc[[2]].notnull().values.all())
def test_import_trajectory_fields(self):
# test number of fields in data greater than number of fields named
fields = ['mdy', 'hms', 'lat', 'long', 'ell_ht']
df = ti.import_trajectory(os.path.abspath('tests/sample_trajectory.txt'),
columns=fields, skiprows=1, timeformat='hms')
columns = [x for x in fields if x is not None]
np.testing.assert_array_equal(df.columns, columns[2:])
# test fields in the middle are dropped
fields = ['mdy', 'hms', 'lat', 'long', 'ell_ht', None, 'num_sats', 'pdop']
df = ti.import_trajectory(os.path.abspath('tests/sample_trajectory.txt'),
columns=fields, skiprows=1, timeformat='hms')
columns = [x for x in fields if x is not None]
np.testing.assert_array_equal(df.columns, columns[2:])
def test_ticket_1539(self):
dtypes = [x for x in np.typeDict.values()
if (issubclass(x, np.number)
and not issubclass(x, np.timedelta64))]
a = np.array([], dtypes[0])
failures = []
# ignore complex warnings
with warnings.catch_warnings():
warnings.simplefilter('ignore', np.ComplexWarning)
for x in dtypes:
b = a.astype(x)
for y in dtypes:
c = a.astype(y)
try:
np.dot(b, c)
except TypeError:
failures.append((x, y))
if failures:
raise AssertionError("Failures: %r" % failures)
def round(self, decimals=0, out=None):
"""
Return an array rounded a to the given number of decimals.
Refer to `numpy.around` for full documentation.
See Also
--------
numpy.around : equivalent function
"""
result = self._data.round(decimals=decimals, out=out).view(type(self))
if result.ndim > 0:
result._mask = self._mask
result._update_from(self)
elif self._mask:
# Return masked when the scalar is masked
result = masked
# No explicit output: we're done
if out is None:
return result
if isinstance(out, MaskedArray):
out.__setmask__(self._mask)
return out
def get_binary_op_return_class(cls1, cls2):
if cls1 is cls2:
return cls1
if cls1 in (np.ndarray, np.matrix, np.ma.masked_array) or issubclass(cls1, (numeric_type, np.number, list, tuple)):
return cls2
if cls2 in (np.ndarray, np.matrix, np.ma.masked_array) or issubclass(cls2, (numeric_type, np.number, list, tuple)):
return cls1
if issubclass(cls1, YTQuantity):
return cls2
if issubclass(cls2, YTQuantity):
return cls1
if issubclass(cls1, cls2):
return cls1
if issubclass(cls2, cls1):
return cls2
else:
raise RuntimeError("Undefined operation for a YTArray subclass. "
"Received operand types (%s) and (%s)" % (cls1, cls2))
def transform(self, X, y=None):
"""Apply dimensionality reduction to X.
X is masked.
Parameters
----------
X : array-like, shape (n_samples, n_features)
New data, where n_samples is the number of samples
and n_features is the number of features.
Returns
-------
X_new : array-like, shape (n_samples, n_components)
"""
from sklearn.utils import check_array
from sklearn.utils.validation import check_is_fitted
check_is_fitted(self, ['mask_'], all_or_any=all)
X = check_array(X)
return X[:, self.mask_]
def transform(self, X, y=None):
"""Apply dimensionality reduction to X.
X is masked.
Parameters
----------
X : array-like, shape (n_samples, n_features)
New data, where n_samples is the number of samples
and n_features is the number of features.
Returns
-------
X_new : array-like, shape (n_samples, n_components)
"""
from sklearn.utils import check_array
from sklearn.utils.validation import check_is_fitted
check_is_fitted(self, ['mask_'], all_or_any=all)
if hasattr(X, 'columns'):
X = X.values
X = check_array(X[:, self.mask_])
return X
def test_ticket_1539(self):
dtypes = [x for x in np.typeDict.values()
if (issubclass(x, np.number)
and not issubclass(x, np.timedelta64))]
a = np.array([], dtypes[0])
failures = []
# ignore complex warnings
with warnings.catch_warnings():
warnings.simplefilter('ignore', np.ComplexWarning)
for x in dtypes:
b = a.astype(x)
for y in dtypes:
c = a.astype(y)
try:
np.dot(b, c)
except TypeError:
failures.append((x, y))
if failures:
raise AssertionError("Failures: %r" % failures)
def fit(self, X, y=None):
# Return if not imputing
if self.impute is False:
return self
# Grab list of object column names before doing imputation
self.object_columns = X.select_dtypes(include=['object']).columns.values
self.fill = pd.Series([X[c].value_counts().index[0]
if X[c].dtype == np.dtype('O')
or pd.core.common.is_categorical_dtype(X[c])
else X[c].mean() for c in X], index=X.columns)
if self.verbose:
num_nans = sum(X.select_dtypes(include=[np.number]).isnull().sum())
num_total = sum(X.select_dtypes(include=[np.number]).count())
percentage_imputed = num_nans / num_total * 100
print("Percentage Imputed: %.2f%%" % percentage_imputed)
print("Note: Impute will always happen on prediction dataframe, otherwise rows are dropped, and will lead "
"to missing predictions")
# return self for scikit compatibility
return self
def test_ticket_1539(self):
dtypes = [x for x in np.typeDict.values()
if (issubclass(x, np.number)
and not issubclass(x, np.timedelta64))]
a = np.array([], dtypes[0])
failures = []
# ignore complex warnings
with warnings.catch_warnings():
warnings.simplefilter('ignore', np.ComplexWarning)
for x in dtypes:
b = a.astype(x)
for y in dtypes:
c = a.astype(y)
try:
np.dot(b, c)
except TypeError:
failures.append((x, y))
if failures:
raise AssertionError("Failures: %r" % failures)
def round(self, decimals=0, out=None):
"""
Return an array rounded a to the given number of decimals.
Refer to `numpy.around` for full documentation.
See Also
--------
numpy.around : equivalent function
"""
result = self._data.round(decimals=decimals, out=out).view(type(self))
result._mask = self._mask
result._update_from(self)
# No explicit output: we're done
if out is None:
return result
if isinstance(out, MaskedArray):
out.__setmask__(self._mask)
return out
def load_MNIST_images(filename):
"""
returns a 28x28x[number of MNIST images] matrix containing
the raw MNIST images
:param filename: input data file
"""
with open(filename, "r") as f:
magic = np.fromfile(f, dtype=np.dtype('>i4'), count=1)
num_images = int(np.fromfile(f, dtype=np.dtype('>i4'), count=1))
num_rows = int(np.fromfile(f, dtype=np.dtype('>i4'), count=1))
num_cols = int(np.fromfile(f, dtype=np.dtype('>i4'), count=1))
images = np.fromfile(f, dtype=np.ubyte)
images = images.reshape((num_images, num_rows * num_cols)).transpose()
images = images.astype(np.float64) / 255
f.close()
return images
def test_ticket_1539(self):
dtypes = [x for x in np.typeDict.values()
if (issubclass(x, np.number)
and not issubclass(x, np.timedelta64))]
a = np.array([], dtypes[0])
failures = []
# ignore complex warnings
with warnings.catch_warnings():
warnings.simplefilter('ignore', np.ComplexWarning)
for x in dtypes:
b = a.astype(x)
for y in dtypes:
c = a.astype(y)
try:
np.dot(b, c)
except TypeError:
failures.append((x, y))
if failures:
raise AssertionError("Failures: %r" % failures)
def test_ticket_1539(self):
dtypes = [x for x in np.typeDict.values()
if (issubclass(x, np.number)
and not issubclass(x, np.timedelta64))]
a = np.array([], np.bool_) # not x[0] because it is unordered
failures = []
for x in dtypes:
b = a.astype(x)
for y in dtypes:
c = a.astype(y)
try:
np.dot(b, c)
except TypeError:
failures.append((x, y))
if failures:
raise AssertionError("Failures: %r" % failures)
def round(self, decimals=0, out=None):
"""
Return each element rounded to the given number of decimals.
Refer to `numpy.around` for full documentation.
See Also
--------
ndarray.around : corresponding function for ndarrays
numpy.around : equivalent function
"""
result = self._data.round(decimals=decimals, out=out).view(type(self))
if result.ndim > 0:
result._mask = self._mask
result._update_from(self)
elif self._mask:
# Return masked when the scalar is masked
result = masked
# No explicit output: we're done
if out is None:
return result
if isinstance(out, MaskedArray):
out.__setmask__(self._mask)
return out
def get_numeric_subclasses(cls=numpy.number, ignore=None):
"""
Return subclasses of `cls` in the numpy scalar hierarchy.
We only return subclasses that correspond to unique data types.
The hierarchy can be seen here:
http://docs.scipy.org/doc/numpy/reference/arrays.scalars.html
"""
if ignore is None:
ignore = []
rval = []
dtype = numpy.dtype(cls)
dtype_num = dtype.num
if dtype_num not in ignore:
# Safety check: we should be able to represent 0 with this data type.
numpy.array(0, dtype=dtype)
rval.append(cls)
ignore.append(dtype_num)
for sub in cls.__subclasses__():
rval += [c for c in get_numeric_subclasses(sub, ignore=ignore)]
return rval
def largest(*args):
"""
Return the [elementwise] largest of a variable number of arguments.
Like python's max.
"""
if len(args) == 2:
a, b = args
return switch(a > b, a, b)
else:
return max(stack(args), axis=0)
##########################
# Comparison
##########################
def reshape(x, newshape, ndim=None):
if ndim is None:
newshape = as_tensor_variable(newshape)
if newshape.ndim != 1:
raise TypeError(
"New shape in reshape must be a vector or a list/tuple of"
" scalar. Got %s after conversion to a vector." % newshape)
try:
ndim = get_vector_length(newshape)
except ValueError:
raise ValueError(
"The length of the provided shape (%s) cannot "
"be automatically determined, so Theano is not able "
"to know what the number of dimensions of the reshaped "
"variable will be. You can provide the 'ndim' keyword "
"argument to 'reshape' to avoid this problem." % newshape)
op = Reshape(ndim)
rval = op(x, newshape)
return rval
def infer_shape(self, node, shapes):
if isinstance(node.inputs[1], TensorVariable):
# We have padded node.inputs[0] to the right number of
# dimensions for the output
l = []
for sh1, sh2, b1 in zip(shapes[0],
shapes[1][1:],
node.inputs[0].broadcastable):
if b1:
l.append(sh2)
else:
l.append(sh1)
return [tuple(l)]
else:
import theano.typed_list
assert isinstance(node.inputs[1],
theano.typed_list.TypedListVariable)
raise ShapeError("Case not implemented")
shape = shapes[0]
for i in xrange(len(shapes[0]) - 1):
shape[i] = shapes[1][i]
return [(shape)]
def check_for_x_over_absX(numerators, denominators):
"""Convert x/abs(x) into sign(x). """
# TODO: this function should dig/search through dimshuffles
# This won't catch a dimshuffled absolute value
for den in list(denominators):
if (den.owner and den.owner.op == T.abs_ and
den.owner.inputs[0] in numerators):
if den.owner.inputs[0].type.dtype.startswith('complex'):
# TODO: Make an Op that projects a complex number to
# have unit length but projects 0 to 0. That
# would be a weird Op, but consistent with the
# special case below. I heard there's some
# convention in Matlab that is similar to
# this... but not sure.
pass
else:
denominators.remove(den)
numerators.remove(den.owner.inputs[0])
numerators.append(T.sgn(den.owner.inputs[0]))
return numerators, denominators
def test_axis_statistics():
adel_output_df = pd.read_csv(INPUTS_DIRPATH/ADEL_OUTPUT_FILENAME)
adel_output_df['species'] = '0'
axis_statistics_df, intermediate_df = pp.axis_statistics(adel_output_df, domain_area=1)
axis_statistics_df.drop('species', 1, inplace=True)
intermediate_df.drop('species', 1, inplace=True)
axis_statistics_df.to_csv(OUTPUTS_DIRPATH/'actual_axis_statistics.csv', index=False, na_rep='NA')
intermediate_df.to_csv(OUTPUTS_DIRPATH/'actual_intermediate.csv', index=False, na_rep='NA')
desired_axis_statistics_df = pd.read_csv(OUTPUTS_DIRPATH/'desired_axis_statistics.csv')
desired_axis_statistics_df.drop('has_ear', 1, inplace=True)
axis_statistics_df = axis_statistics_df.select_dtypes(include=[np.number])
desired_axis_statistics_df = desired_axis_statistics_df.select_dtypes(include=[np.number])
np.testing.assert_allclose(axis_statistics_df.values, desired_axis_statistics_df.values, RELATIVE_TOLERANCE, ABSOLUTE_TOLERANCE)
desired_intermediate_df = pd.read_csv(OUTPUTS_DIRPATH/'desired_intermediate.csv')
desired_intermediate_df.drop('has_ear', 1, inplace=True)
intermediate_df = intermediate_df.select_dtypes(include=[np.number])
desired_intermediate_df = desired_intermediate_df.select_dtypes(include=[np.number])
np.testing.assert_allclose(intermediate_df.values, desired_intermediate_df.values, RELATIVE_TOLERANCE, ABSOLUTE_TOLERANCE)
def _chart_csv_response(chart, name, data_set_name=None):
"Respond with the data from a chart."
if not data_set_name:
data_set_name = name.split('_')[2]
if not settings.DEBUG:
response = HttpResponse(mimetype='text/csv')
response['Content-Disposition'] = \
'attachment; filename=%s.csv' % name
else:
response = HttpResponse(mimetype='text/html')
writer = csv.writer(response)
for row in chart.get_data(data_set_name):
if isinstance(row, (float, int, numpy.number)):
writer.writerow([row])
else:
writer.writerow(row)
return response
def test_ticket_1539(self):
dtypes = [x for x in np.typeDict.values()
if (issubclass(x, np.number)
and not issubclass(x, np.timedelta64))]
a = np.array([], dtypes[0])
failures = []
# ignore complex warnings
with warnings.catch_warnings():
warnings.simplefilter('ignore', np.ComplexWarning)
for x in dtypes:
b = a.astype(x)
for y in dtypes:
c = a.astype(y)
try:
np.dot(b, c)
except TypeError:
failures.append((x, y))
if failures:
raise AssertionError("Failures: %r" % failures)
def round(self, decimals=0, out=None):
"""
Return an array rounded a to the given number of decimals.
Refer to `numpy.around` for full documentation.
See Also
--------
numpy.around : equivalent function
"""
result = self._data.round(decimals=decimals, out=out).view(type(self))
if result.ndim > 0:
result._mask = self._mask
result._update_from(self)
elif self._mask:
# Return masked when the scalar is masked
result = masked
# No explicit output: we're done
if out is None:
return result
if isinstance(out, MaskedArray):
out.__setmask__(self._mask)
return out
def prefer_alignment(value_type):
if np.issubdtype(value_type, np.number):
return ALIGN.RIGHT
else:
return ALIGN.LEFT
def _check_valid_rotation(self, rotation):
"""Checks that the given rotation matrix is valid.
"""
if not isinstance(rotation, np.ndarray) or not np.issubdtype(rotation.dtype, np.number):
raise ValueError('Rotation must be specified as numeric numpy array')
if len(rotation.shape) != 2 or rotation.shape[0] != 3 or rotation.shape[1] != 3:
raise ValueError('Rotation must be specified as a 3x3 ndarray')
if np.abs(np.linalg.det(rotation) - 1.0) > 1e-3:
raise ValueError('Illegal rotation. Must have determinant == 1.0')
def _check_valid_translation(self, translation):
"""Checks that the translation vector is valid.
"""
if not isinstance(translation, np.ndarray) or not np.issubdtype(translation.dtype, np.number):
raise ValueError('Translation must be specified as numeric numpy array')
t = translation.squeeze()
if len(t.shape) != 1 or t.shape[0] != 3:
raise ValueError('Translation must be specified as a 3-vector, 3x1 ndarray, or 1x3 ndarray')
def check(self,df):
if self.objective == "regression" or self.objective == "classification":
if self.input_type == "text":
if not self.text_field:
raise Exception("Please specify a text field")
else:
if not self.target:
raise Exception("Please specify a target field")
if len(self.fields) == 0:
raise Exception("Please specify at least one predictor field")
numericTarget = False
if df[self.target].dtype == np.number:
numericTarget = True
if self.objective == "regression" and not numericTarget:
raise Exception("Please use a numeric target field for the regression objective")
if self.objective == "classification" and numericTarget:
raise Exception("Please use a string target field for the classification objective")
elif self.objective == "time_series":
if not self.target:
raise Exception("Please specify a target field")
if not self.order_field:
raise Exception("Please specify an index field")
if df[self.target].dtype != np.number:
raise Exception("Please use a numeric target field for the time series objective")
else:
if len(self.fields) == 0:
raise Exception("Please specify at least one predictor field")
def process_columns(self, df):
"""Process columns before histogram filling
Specifically, convert timestamp columns to integers
and numeric variables are converted to indices
:param df: input (pandas) data frame
:returns: output (pandas) data frame with converted timestamp columns
:rtype: pandas DataFrame
"""
# timestamp variables are converted to ns here
# make temp df for value counting (used below)
idf = df[self.str_cols].copy(deep=False)
for col in self.dt_cols:
self.log().debug('Converting column "%s" of type "%s" to nanosec', col, self.var_dtype[col])
idf[col] = df[col].apply(hf.to_ns)
# numerical variables are converted to indices here
for col in self.num_cols + self.dt_cols:
self.log().debug('Converting column "%s" of type "%s" to index', col, self.var_dtype[col])
# find column specific bin_specs. if not found, use dict of default
# values.
dt = df[col].dtype
is_number = isinstance(dt.type(), np.number)
is_timestamp = isinstance(dt.type(), np.datetime64)
sf = idf if is_timestamp else df
bin_specs = self.bin_specs.get(col, self._unit_bin_specs if is_number else self._unit_timestamp_specs)
idf[col] = sf[col].apply(hf.value_to_bin_index, **bin_specs)
return idf
def bioenv(output_dir: str, distance_matrix: skbio.DistanceMatrix,
metadata: qiime2.Metadata) -> None:
# convert metadata to numeric values where applicable, drop the non-numeric
# values, and then drop samples that contain NaNs
df = metadata.to_dataframe()
df = df.apply(lambda x: pd.to_numeric(x, errors='ignore'))
# filter categorical columns
pre_filtered_cols = set(df.columns)
df = df.select_dtypes([numpy.number]).dropna()
filtered_categorical_cols = pre_filtered_cols - set(df.columns)
# filter 0 variance numerical columns
pre_filtered_cols = set(df.columns)
df = df.loc[:, df.var() != 0]
filtered_zero_variance_cols = pre_filtered_cols - set(df.columns)
# filter the distance matrix to exclude samples that were dropped from
# the metadata, and keep track of how many samples survived the filtering
# so that information can be presented to the user.
initial_dm_length = distance_matrix.shape[0]
distance_matrix = distance_matrix.filter(df.index, strict=False)
filtered_dm_length = distance_matrix.shape[0]
result = skbio.stats.distance.bioenv(distance_matrix, df)
result = q2templates.df_to_html(result)
index = os.path.join(TEMPLATES, 'bioenv_assets', 'index.html')
q2templates.render(index, output_dir, context={
'initial_dm_length': initial_dm_length,
'filtered_dm_length': filtered_dm_length,
'filtered_categorical_cols': ', '.join(filtered_categorical_cols),
'filtered_zero_variance_cols': ', '.join(filtered_zero_variance_cols),
'result': result})
def sanitize(x: Any) -> Any: # pylint: disable=invalid-name,too-many-return-statements
"""
Sanitize turns PyTorch and Numpy types into basic Python types so they
can be serialized into JSON.
"""
if isinstance(x, (str, float, int, bool)):
# x is already serializable
return x
elif isinstance(x, torch.autograd.Variable):
return sanitize(x.data)
elif isinstance(x, torch._TensorBase): # pylint: disable=protected-access
# tensor needs to be converted to a list (and moved to cpu if necessary)
return x.cpu().tolist()
elif isinstance(x, numpy.ndarray):
# array needs to be converted to a list
return x.tolist()
elif isinstance(x, numpy.number):
# NumPy numbers need to be converted to Python numbers
return x.item()
elif isinstance(x, dict):
# Dicts need their values sanitized
return {key: sanitize(value) for key, value in x.items()}
elif isinstance(x, (list, tuple)):
# Lists and Tuples need their values sanitized
return [sanitize(x_i) for x_i in x]
else:
raise ValueError("cannot sanitize {} of type {}".format(x, type(x)))
def test_array_side_effect(self):
# The second use of itemsize was throwing an exception because in
# ctors.c, discover_itemsize was calling PyObject_Length without
# checking the return code. This failed to get the length of the
# number 2, and the exception hung around until something checked
# PyErr_Occurred() and returned an error.
assert_equal(np.dtype('S10').itemsize, 10)
np.array([['abc', 2], ['long ', '0123456789']], dtype=np.string_)
assert_equal(np.dtype('S10').itemsize, 10)
def test_simple(self):
a = [[1, 2], [3, 4]]
a_str = [[b'1', b'2'], [b'3', b'4']]
modes = ['raise', 'wrap', 'clip']
indices = [-1, 4]
index_arrays = [np.empty(0, dtype=np.intp),
np.empty(tuple(), dtype=np.intp),
np.empty((1, 1), dtype=np.intp)]
real_indices = {'raise': {-1: 1, 4: IndexError},
'wrap': {-1: 1, 4: 0},
'clip': {-1: 0, 4: 1}}
# Currently all types but object, use the same function generation.
# So it should not be necessary to test all. However test also a non
# refcounted struct on top of object.
types = np.int, np.object, np.dtype([('', 'i', 2)])
for t in types:
# ta works, even if the array may be odd if buffer interface is used
ta = np.array(a if np.issubdtype(t, np.number) else a_str, dtype=t)
tresult = list(ta.T.copy())
for index_array in index_arrays:
if index_array.size != 0:
tresult[0].shape = (2,) + index_array.shape
tresult[1].shape = (2,) + index_array.shape
for mode in modes:
for index in indices:
real_index = real_indices[mode][index]
if real_index is IndexError and index_array.size != 0:
index_array.put(0, index)
assert_raises(IndexError, ta.take, index_array,
mode=mode, axis=1)
elif index_array.size != 0:
index_array.put(0, index)
res = ta.take(index_array, mode=mode, axis=1)
assert_array_equal(res, tresult[real_index])
else:
res = ta.take(index_array, mode=mode, axis=1)
assert_(res.shape == (2,) + index_array.shape)
def _delegate_binop(self, other):
# This emulates the logic in
# multiarray/number.c:PyArray_GenericBinaryFunction
if (not isinstance(other, np.ndarray)
and not hasattr(other, "__numpy_ufunc__")):
other_priority = getattr(other, "__array_priority__", -1000000)
if self.__array_priority__ < other_priority:
return True
return False
def _convert_array(self, array):
try:
global np
import numpy as np
except ImportError as ex:
raise ImportError('DataFrameClient requires Numpy, '
'"{ex}" problem importing'.format(ex=str(ex)))
if self.ignore_nan:
number_types = (int, float, np.number)
condition = (all(isinstance(el, number_types) for el in array) and
np.isnan(array))
return list(np.where(condition, None, array))
else:
return list(array)
def maybe_format(item):
"""Pretty-format a string, integer, float, or percent
Parameters
----------
item : pandas.Series
A single-item series containing a .name attribute and a value in the
first (0th) index
"""
value = item[0]
if pd.isnull(value):
return 'N/A'
elif isinstance(value, str):
return value
elif 'percent' in item.name.lower():
return '{:.2f}%'.format(value)
elif isinstance(value, pd.Timestamp):
return str(np.datetime64(value, 'D'))
elif (isinstance(value, float) # this must go before ints!
or np.issubdtype(value, np.number)):
if value >= 1e3:
return locale.format("%d", int(value), grouping=True)
else:
return locale.format("%.3g", value, grouping=True)
elif (isinstance(value, int)
or np.issubdtype(value, np.integer)):
return locale.format("%d", value, grouping=True)
else:
raise TypeError
def q(self):
"""The number of columns in the initial dataframe
As opposed to `p` which is the number of columns in the indicator matrix of the initial
dataframe.
"""
return self.initial_dataframe.shape[1]