我们从Python开源项目中,提取了以下17个代码示例,用于说明如何使用scipy.empty()。
def annopred_inf(beta_hats, pr_sigi, n=1000, reference_ld_mats=None, ld_window_size=100): """ infinitesimal model with snp-specific heritability derived from annotation used as the initial values for MCMC of non-infinitesimal model """ num_betas = len(beta_hats) updated_betas = sp.empty(num_betas) m = len(beta_hats) for i, wi in enumerate(range(0, num_betas, ld_window_size)): start_i = wi stop_i = min(num_betas, wi + ld_window_size) curr_window_size = stop_i - start_i Li = 1.0/pr_sigi[start_i: stop_i] D = reference_ld_mats[i] A = (n/(1))*D + sp.diag(Li) A_inv = linalg.pinv(A) updated_betas[start_i: stop_i] = sp.dot(A_inv / (1.0/n) , beta_hats[start_i: stop_i]) # Adjust the beta_hats return updated_betas
def split_data(self,n,v=5): ''' The function split the data into v folds. Whatever the number of sample per class Input: n : the number of samples v : the number of folds Output: None ''' step = n //v # Compute the number of samples in each fold sp.random.seed(1) # Set the random generator to the same initial state t = sp.random.permutation(n) # Generate random sampling of the indices indices=[] for i in range(v-1): # group in v fold indices.append(t[i*step:(i+1)*step]) indices.append(t[(v-1)*step:n]) for i in range(v): self.iT.append(sp.asarray(indices[i])) l = range(v) l.remove(i) temp = sp.empty(0,dtype=sp.int64) for j in l: temp = sp.concatenate((temp,sp.asarray(indices[j]))) self.it.append(temp)
def glmnetSet(opts = None): import scipy # default options options = { "weights" : scipy.empty([0]), "offset" : scipy.empty([0]), "alpha" : scipy.float64(1.0), "nlambda" : scipy.int32(100), "lambda_min" : scipy.empty([0]), "lambdau" : scipy.empty([0]), "standardize" : True, "intr" : True, "thresh" : scipy.float64(1e-7), "dfmax" : scipy.empty([0]), "pmax" : scipy.empty([0]), "exclude" : scipy.empty([0], dtype = scipy.integer), "penalty_factor" : scipy.empty([0]), "cl" : scipy.array([[scipy.float64(-scipy.inf)], [scipy.float64(scipy.inf)]]), "maxit" : scipy.int32(1e5), "gtype" : [], "ltype" : 'Newton', "standardize_resp" : False, "mtype" : 'ungrouped' } # quick return if no user opts if opts == None: print('pdco default options:') print(options) return options # if options are passed in by user, update options with values from opts optsInOptions = set(opts.keys()) - set(options.keys()); if len(optsInOptions) > 0: # assert 'opts' keys are subsets of 'options' keys print(optsInOptions, ' : unknown option for glmnetSet') raise ValueError('attempting to set glmnet options that are not known to glmnetSet') else: options = merge_dicts(options, opts) return options
def _parse_plink_snps_(genotype_file, snp_indices): plinkf = plinkfile.PlinkFile(genotype_file) samples = plinkf.get_samples() num_individs = len(samples) num_snps = len(snp_indices) raw_snps = sp.empty((num_snps,num_individs),dtype='int8') #If these indices are not in order then we place them in the right place while parsing SNPs. snp_order = sp.argsort(snp_indices) ordered_snp_indices = list(snp_indices[snp_order]) ordered_snp_indices.reverse() print 'Iterating over file to load SNPs' snp_i = 0 next_i = ordered_snp_indices.pop() line_i = 0 max_i = ordered_snp_indices[0] while line_i <= max_i: if line_i < next_i: plinkf.next() elif line_i==next_i: line = plinkf.next() snp = sp.array(line, dtype='int8') bin_counts = line.allele_counts() if bin_counts[-1]>0: mode_v = sp.argmax(bin_counts[:2]) snp[snp==3] = mode_v s_i = snp_order[snp_i] raw_snps[s_i]=snp if line_i < max_i: next_i = ordered_snp_indices.pop() snp_i+=1 line_i +=1 plinkf.close() assert snp_i==len(raw_snps), 'Failed to parse SNPs?' num_indivs = len(raw_snps[0]) freqs = sp.sum(raw_snps,1, dtype='float32')/(2*float(num_indivs)) return raw_snps, freqs
def learn(self,x,y): ''' Function that learns the GMM with ridge regularizationb from training samples Input: x : the training samples y : the labels Output: the mean, covariance and proportion of each class, as well as the spectral decomposition of the covariance matrix ''' ## Get information from the data C = sp.unique(y).shape[0] #C = int(y.max(0)) # Number of classes n = x.shape[0] # Number of samples d = x.shape[1] # Number of variables eps = sp.finfo(sp.float64).eps ## Initialization self.ni = sp.empty((C,1)) # Vector of number of samples for each class self.prop = sp.empty((C,1)) # Vector of proportion self.mean = sp.empty((C,d)) # Vector of means self.cov = sp.empty((C,d,d)) # Matrix of covariance self.Q = sp.empty((C,d,d)) # Matrix of eigenvectors self.L = sp.empty((C,d)) # Vector of eigenvalues self.classnum = sp.empty(C).astype('uint8') ## Learn the parameter of the model for each class for c,cR in enumerate(sp.unique(y)): j = sp.where(y==(cR))[0] self.classnum[c] = cR # Save the right label self.ni[c] = float(j.size) self.prop[c] = self.ni[c]/n self.mean[c,:] = sp.mean(x[j,:],axis=0) self.cov[c,:,:] = sp.cov(x[j,:],bias=1,rowvar=0) # Normalize by ni to be consistent with the update formulae # Spectral decomposition L,Q = linalg.eigh(self.cov[c,:,:]) idx = L.argsort()[::-1] self.L[c,:] = L[idx] self.Q[c,:,:]=Q[:,idx]
def predict(self,xt,tau=None,proba=None): ''' Function that predict the label for sample xt using the learned model Inputs: xt: the samples to be classified Outputs: y: the class K: the decision value for each class ''' ## Get information from the data nt = xt.shape[0] # Number of testing samples C = self.ni.shape[0] # Number of classes ## Initialization K = sp.empty((nt,C)) if tau is None: TAU=self.tau else: TAU=tau for c in range(C): invCov,logdet = self.compute_inverse_logdet(c,TAU) cst = logdet - 2*sp.log(self.prop[c]) # Pre compute the constant xtc = xt-self.mean[c,:] temp = sp.dot(invCov,xtc.T).T K[:,c] = sp.sum(xtc*temp,axis=1)+cst del temp,xtc ## ## Assign the label save in classnum to the minimum value of K yp = self.classnum[sp.argmin(K,1)] ## Reassign label with real value if proba is None: return yp else: return yp,K
def glmnetCoef(obj, s = None, exact = False): if s is None: s = obj['lambdau'] if exact and len(s) > 0: raise NotImplementedError('exact = True not implemented in glmnetCoef') result = glmnetPredict(obj, scipy.empty([0]), s, 'coefficients') return(result)
def fake_mldata(columns_dict, dataname, matfile, ordering=None): """Create a fake mldata data set. Parameters ---------- columns_dict : dict, keys=str, values=ndarray Contains data as columns_dict[column_name] = array of data. dataname : string Name of data set. matfile : string or file object The file name string or the file-like object of the output file. ordering : list, default None List of column_names, determines the ordering in the data set. Notes ----- This function transposes all arrays, while fetch_mldata only transposes 'data', keep that into account in the tests. """ datasets = dict(columns_dict) # transpose all variables for name in datasets: datasets[name] = datasets[name].T if ordering is None: ordering = sorted(list(datasets.keys())) # NOTE: setting up this array is tricky, because of the way Matlab # re-packages 1D arrays datasets['mldata_descr_ordering'] = sp.empty((1, len(ordering)), dtype='object') for i, name in enumerate(ordering): datasets['mldata_descr_ordering'][0, i] = name scipy.io.savemat(matfile, datasets, oned_as='column')
def fit(self, X): # Constants max_iter = self.max_iter tol = self.tol n_samples, n_features = X.shape n_components = self.n_components # Initialize parameters self._init_params(X) # Initialize responsibility = sp.empty((n_samples, n_components)) log_likelihood = self.log_likelihood(X) for iter in range(max_iter): # E step for n in range(n_samples): for k in range(n_components): responsibility[n][k] = self.pdf(X[n], k) / self.pdf(X[n]) # M step eff = sp.sum(responsibility, axis=0) for k in range(n_components): # Update mean mean = sp.dot(responsibility[:, k], X) / eff[k] # Update covariance cov = sp.zeros((n_features, n_features)) for n in range(n_samples): cov += responsibility[n][k] * sp.outer(X[n] - mean, X[n] - mean) cov /= eff[k] # Update the k component self.comps[k] = multivariate_normal(mean, cov, allow_singular=True) # Update mixture coefficient self.coef[k] = eff[k] / n_samples # Convergent test log_likelihood_new = self.log_likelihood(X) diff = log_likelihood_new - log_likelihood print('GMM: {0:5d}: {1:10.5e} {2:10.5e}'.format( iter, log_likelihood_new, spla.norm(diff) / spla.norm(log_likelihood))) if (spla.norm(diff) < tol * spla.norm(log_likelihood)): break log_likelihood = log_likelihood_new return self
def open_data(filename): ''' The function open and load the image given its name. The type of the data is checked from the file and the scipy array is initialized accordingly. Input: filename: the name of the file Output: im: the data cube GeoTransform: the geotransform information Projection: the projection information ''' data = gdal.Open(filename,gdal.GA_ReadOnly) if data is None: print 'Impossible to open '+filename exit() nc = data.RasterXSize nl = data.RasterYSize d = data.RasterCount # Get the type of the data gdal_dt = data.GetRasterBand(1).DataType if gdal_dt == gdal.GDT_Byte: dt = 'uint8' elif gdal_dt == gdal.GDT_Int16: dt = 'int16' elif gdal_dt == gdal.GDT_UInt16: dt = 'uint16' elif gdal_dt == gdal.GDT_Int32: dt = 'int32' elif gdal_dt == gdal.GDT_UInt32: dt = 'uint32' elif gdal_dt == gdal.GDT_Float32: dt = 'float32' elif gdal_dt == gdal.GDT_Float64: dt = 'float64' elif gdal_dt == gdal.GDT_CInt16 or gdal_dt == gdal.GDT_CInt32 or gdal_dt == gdal.GDT_CFloat32 or gdal_dt == gdal.GDT_CFloat64 : dt = 'complex64' else: print 'Data type unkown' exit() # Initialize the array if d == 1: im = sp.empty((nl,nc),dtype=dt) else: im = sp.empty((nl,nc,d),dtype=dt) if d == 1: im[:,:]=data.GetRasterBand(1).ReadAsArray() else : for i in range(d): im[:,:,i]=data.GetRasterBand(i+1).ReadAsArray() GeoTransform = data.GetGeoTransform() Projection = data.GetProjection() data = None return im,GeoTransform,Projection