我们从Python开源项目中,提取了以下33个代码示例,用于说明如何使用scipy.cluster()。
def debugplot(self, u = None): print 'creating plot...' n = len(self.region['members'][0]) / 2 plt.figure(figsize=(6, n/2*4+1)) m = self.region['members'] d = self.region['maxdistance'] for i in range(n): plt.subplot(numpy.ceil(n / 2.), 2, 1+i) j = i * 2 k = i * 2 + 1 plt.plot(m[:,j], m[:,k], 'x', color='b', ms=1) plt.gca().add_artist(plt.Circle((m[0,j], m[0,k]), d, color='g', alpha=0.3)) if u is not None: plt.plot(u[j], u[k], 's', color='r') plt.gca().add_artist(plt.Circle((u[j], u[k]), d, color='r', alpha=0.3)) prefix='friends%s-%s_' % ('1' if self.jackknife else '', self.metric) plt.savefig(prefix + 'cluster.pdf') plt.close() print 'creating plot... done'
def find_a_dominant_color(image): # K-mean clustering to find the k most dominant color, from: # http://stackoverflow.com/questions/3241929/python-find-dominant-most-common-color-in-an-image n_clusters = 5 # Get image into a workable form im = image.copy() im = im.resize((150, 150)) # optional, to reduce time ar = scipy.misc.fromimage(im) im_shape = ar.shape ar = ar.reshape(scipy.product(im_shape[:2]), im_shape[2]) ar = np.float_(ar) # Compute clusters codes, dist = scipy.cluster.vq.kmeans(ar, n_clusters) vecs, dist = scipy.cluster.vq.vq(ar, codes) # assign codes counts, bins = scipy.histogram(vecs, len(codes)) # count occurrences # Get the indexes of the most frequent, 2nd most frequent, 3rd, ... sorted_idxs = np.argsort(counts) # Get the color peak = codes[sorted_idxs[1]] # get second most frequent color return [int(i) for i in peak.tolist()] # list comprehension to quickly cast everything to int
def process_options(args): options = argparser().parse_args(args) if options.max_rank is not None and options.max_rank < 1: raise ValueError('max-rank must be >= 1') if options.eps <= 0.0: raise ValueError('eps must be > 0') wv = wvlib.load(options.vectors[0], max_rank=options.max_rank) if options.normalize: logging.info('normalize vectors to unit length') wv.normalize() words, vectors = wv.words(), wv.vectors() if options.whiten: logging.info('normalize features to unit variance') vectors = scipy.cluster.vq.whiten(vectors) return words, vectors, options
def write_fasta(file_list): ''' Writes fasta files for cluster sequqnce from bedfile ''' for infile in file_list: ifi = open(infile, "r") out_fa_file = infile + ".fa" with open(out_fa_file, "w") as of: of.close() pass of = open(out_fa_file,'a+') for line in ifi: f_header, f_seq = fasta_header(line) ''' Create outfile ''' of.write("{}\n{}\n".format(f_header, f_seq)) print("done {}".format(out_fa_file)) ifi.close()
def compute_group_overlap_score(ref_labels, pred_labels, threshold_overlap_pred=0.5, threshold_overlap_ref=0.5): """How well do the pred_labels explain the ref_labels? A predicted cluster explains a reference cluster if it is contained within the reference cluster with at least 50% (threshold_overlap_pred) of its points and these correspond to at least 50% (threshold_overlap_ref) of the reference cluster. """ ref_unique, ref_counts = np.unique(ref_labels, return_counts=True) ref_dict = dict(zip(ref_unique, ref_counts)) pred_unique, pred_counts = np.unique(pred_labels, return_counts=True) pred_dict = dict(zip(pred_unique, pred_counts)) summary = [] for true in ref_unique: sub_pred_unique, sub_pred_counts = np.unique(pred_labels[true == ref_labels], return_counts=True) relative_overlaps_pred = [sub_pred_counts[i] / pred_dict[n] for i, n in enumerate(sub_pred_unique)] relative_overlaps_ref = [sub_pred_counts[i] / ref_dict[true] for i, n in enumerate(sub_pred_unique)] pred_best_index = np.argmax(relative_overlaps_pred) summary.append(1 if (relative_overlaps_pred[pred_best_index] >= threshold_overlap_pred and relative_overlaps_ref[pred_best_index] >= threshold_overlap_ref) else 0) # print(true, sub_pred_unique[pred_best_index], relative_overlaps_pred[pred_best_index], # relative_overlaps_ref[pred_best_index], summary[-1]) return sum(summary)/len(summary)
def hierarch_cluster(M): """Cluster matrix using hierarchical clustering. Parameters ---------- M : np.ndarray Matrix, for example, distance matrix. Returns ------- Mclus : np.ndarray Clustered matrix. indices : np.ndarray Indices used to cluster the matrix. """ import scipy as sp import scipy.cluster link = sp.cluster.hierarchy.linkage(M) indices = sp.cluster.hierarchy.leaves_list(link) Mclus = np.array(M[:, indices]) Mclus = Mclus[indices, :] if False: pl.matshow(Mclus) pl.colorbar() return Mclus, indices
def get_leafs(self, use_labels=False): """ Use to retrieve labels. Parameters ---------- use_labels : bool, optional If True, self.labels will be returned. If False, will use either columns (if matrix is pandas DataFrame) or indices (if matrix is np.ndarray) """ if isinstance(self.mat, pd.DataFrame): if use_labels: return [self.labels[i] for i in scipy.cluster.hierarchy.leaves_list(self.linkage)] else: return [self.mat.columns.tolist()[i] for i in scipy.cluster.hierarchy.leaves_list(self.linkage)] else: return scipy.cluster.hierarchy.leaves_list(self.linkage)
def calc_agg_coeff(self): """ Returns the agglomerative coefficient, measuring the clustering structure of the linkage matrix. Because it grows with the number of observations, this measure should not be used to compare datasets of very different sizes. For each observation i, denote by m(i) its dissimilarity to the first cluster it is merged with, divided by the dissimilarity of the merger in the final step of the algorithm. The agglomerative coefficient is the average of all 1 - m(i). """ # Turn into pandas DataFrame for fancy indexing Z = pd.DataFrame(self.linkage, columns = ['obs1','obs2','dist','n_org'] ) # Get all distances at which an original observation is merged all_dist = Z[ ( Z.obs1.isin(self.leafs) ) | (Z.obs2.isin(self.leafs) ) ].dist.values # Divide all distances by last merger all_dist /= self.linkage[-1][2] # Calc final coefficient coeff = np.mean( 1 - all_dist ) return coeff
def main(): from optparse import OptionParser parser = OptionParser(usage="%prog --XSCALEfile=<LP filename> --outname=<output dendogram>") parser.add_option("-o","--outname", dest="outname", default='Dendrogram', help="output dendogram file name") parser.add_option("-t", "--threshold", dest="threshold", default='0.4', help="Distance threshold for clustering") parser.add_option("-c", "--count",action="store_true", dest="count", default=False, help="Counts datasets in the biggest cluster and exit") (options, args) = parser.parse_args() thr = float(options.threshold) CC = Clustering('Cluster_log.txt') link = CC.tree() if options.count: CC.checkMultiplicity(thr) print(CC.thrEstimation()) else: CC.checkMultiplicity(thr) CC.merge('ano', thr)
def plot_feature_overlap(df, cmap='binary', method='cluster'): """Plot feature-feature presence overlap of a pandas dataframe. Args: df: A pandas dataframe. cmap: A matplotlib colormap. method: Method of clustering, one of 'cluster' or 'tree'. """ V = len(df.columns) present = (df == df).as_matrix().astype(np.float32) overlap = np.dot(present.T, present) assert overlap.shape == (V, V) # Sort features to make blocks contiguous. if method == 'tree': # TODO(fritzo) Fix this to not look awful. grid = make_complete_graph(V) weights = np.empty(grid.shape[1], dtype=np.float32) for k, v1, v2 in grid.T: weights[k] = overlap[v1, v2] edges = estimate_tree(grid, weights) order, order_inv = order_vertices(edges) elif method == 'cluster': distance = scipy.spatial.distance.pdist(overlap) clustering = scipy.cluster.hierarchy.complete(distance) order_inv = scipy.cluster.hierarchy.leaves_list(clustering) else: raise ValueError(method) overlap = overlap[order_inv, :] overlap = overlap[:, order_inv] assert overlap.shape == (V, V) pyplot.imshow(overlap**0.5, cmap=cmap) pyplot.axis('off')
def cluster(self, u, ndim, keepRadius=False): """ """ if self.verbose: print 'building region ...' if len(u) > 10: if keepRadius and self.region is not None and 'maxdistance' in self.region: maxdistance = self.region['maxdistance'] else: if self.radial: if self.jackknife: #maxdistance = initial_rdistance_guess(u, k=1, metric=self.metric) maxdistance = nearest_rdistance_guess(u, metric=self.metric) else: maxdistance = find_rdistance(u, nbootstraps=20, metric=self.metric, verbose=self.verbose) else: maxdistance = find_maxdistance(u) if self.force_shrink and self.region is not None and 'maxdistance' in self.region: maxdistance = min(maxdistance, self.region['maxdistance']) if self.keep_phantom_points and len(self.phantom_points) > 0: # add phantoms to u now print 'including phantom points in cluster members', self.phantom_points u = numpy.vstack((u, self.phantom_points)) ulow = numpy.max([u.min(axis=0) - maxdistance, numpy.zeros(ndim)], axis=0) uhigh = numpy.min([u.max(axis=0) + maxdistance, numpy.ones(ndim)], axis=0) else: maxdistance = None ulow = numpy.zeros(ndim) uhigh = numpy.ones(ndim) if self.verbose: print 'setting sampling region:', (ulow, uhigh), maxdistance self.region = dict(members=u, maxdistance=maxdistance, ulow=ulow, uhigh=uhigh) self.generator = None
def rebuild(self, u, ndim, keepRadius=False): if self.last_cluster_points is None or \ len(self.last_cluster_points) != len(u) or \ numpy.any(self.last_cluster_points != u): self.cluster(u=self.transform_new_points(u), ndim=ndim, keepRadius=keepRadius) self.last_cluster_points = u # reset generator self.generator = self.generate(ndim=ndim)
def rebuild(self, u, ndim, keepMetric=False): if self.last_cluster_points is not None and \ len(self.last_cluster_points) == len(u) and \ numpy.all(self.last_cluster_points == u): # do nothing if everything stayed the same return self.cluster(u=u, ndim=ndim, keepMetric=keepMetric) self.last_cluster_points = u print 'maxdistance:', self.region.maxdistance self.generator = self.generate(ndim)
def start_clustering(self): functions.log('Calculate {0} distances...'.format(int(len(self.orfs) * (len(self.orfs) + 1) / 2))) self.distances = self.create_distance_matrix() functions.log('Start clustering...') self.linkage_matrix = scipy.cluster.hierarchy.linkage(ssd.squareform(self.distances), method='complete') functions.log('Clustering done.')
def clustering(self, clustering_distance): """ Create clusters :param clustering_distance: max distance between ORFs in one clusters :return: """ functions.log('Create clusters...') for ind, o in enumerate(scipy.cluster.hierarchy.fcluster(self.linkage_matrix, clustering_distance, 'distance')): if self.clusters.get(o) is None: self.clusters[o] = set() self.clusters[o].add(ind) self.orfs_clusters[ind] = o
def get_args(): parser = argparse.ArgumentParser(description='Clustering ORFs by position in the graph') parser.add_argument('sequences', type=str, help='Path to ORFs sequences file') parser.add_argument('paths', type=str, help='Path to ORFs paths file from ORFFinderInGraph.py') parser.add_argument('graph', type=str, help='Path to graph in GFA format') parser.add_argument('threshold', type=int, help='Max level of dissimilarity of ORFs in one cluster(between 0 and 1)') parser.add_argument('-o', '--output', type=str, default='', help='Output folder') parser.add_argument('-sd', '--savedistances', default='False', action='store_true', help='Save pairwise distances matrix (in .npy format)') parser.set_defaults(savedistances=False) return parser.parse_args()
def _auto_color(self, url:str, ranks): phrases = ["Calculating colors..."] # in case I want more #try: await self.bot.say("**{}**".format(random.choice(phrases))) clusters = 10 async with self.session.get(url) as r: image = await r.content.read() with open('data/leveler/temp_auto.png','wb') as f: f.write(image) im = Image.open('data/leveler/temp_auto.png').convert('RGBA') im = im.resize((290, 290)) # resized to reduce time ar = scipy.misc.fromimage(im) shape = ar.shape ar = ar.reshape(scipy.product(shape[:2]), shape[2]) codes, dist = scipy.cluster.vq.kmeans(ar.astype(float), clusters) vecs, dist = scipy.cluster.vq.vq(ar, codes) # assign codes counts, bins = scipy.histogram(vecs, len(codes)) # count occurrences # sort counts freq_index = [] index = 0 for count in counts: freq_index.append((index, count)) index += 1 sorted_list = sorted(freq_index, key=operator.itemgetter(1), reverse=True) colors = [] for rank in ranks: color_index = min(rank, len(codes)) peak = codes[sorted_list[color_index][0]] # gets the original index peak = peak.astype(int) colors.append(''.join(format(c, '02x') for c in peak)) return colors # returns array #except: #await self.bot.say("```Error or no scipy. Install scipy doing 'pip3 install numpy' and 'pip3 install scipy' or read here: https://github.com/AznStevy/Maybe-Useful-Cogs/blob/master/README.md```") # converts hex to rgb
def write_cluster_ids(words, cluster_ids, out=None): """Write given list of words and their corresponding cluster ids to out.""" assert len(words) == len(cluster_ids), 'word/cluster ids number mismatch' if out is None: out = sys.stdout for word, cid in izip(words, cluster_ids): print >> out, '%s\t%d' % (word, cid)
def main(argv=None): if argv is None: argv = sys.argv try: words, vectors, options = process_options(argv[1:]) except Exception, e: if str(e): print >> sys.stderr, 'Error: %s' % str(e) return 1 else: raise dbscan = sklearn.cluster.DBSCAN(eps=options.eps, metric=options.metric) dbscan.fit(numpy.array(vectors)) noisy = sum(1 for l in dbscan.labels_ if l == -1) unique = len(set(dbscan.labels_)) logging.info('%d clusters, %d noisy, %d vectors' % (unique, noisy, len(vectors))) if noisy >= len(vectors) / 4: logging.warning('%d/%d noisy (-1) labels (try higher eps?)' % \ (noisy, len(vectors))) elif unique < (len(vectors)/2)**0.5: logging.warning('only %d clusters (try lower eps?)' % unique) write_cluster_ids(words, dbscan.labels_) return 0
def process_options(args): options = argparser().parse_args(args) if options.max_rank is not None and options.max_rank < 1: raise ValueError('max-rank must be >= 1') if options.k is not None and options.k < 2: raise ValueError('cluster number must be >= 2') if options.method == MINIBATCH_KMEANS and not with_sklearn: logging.warning('minibatch kmeans not available, using kmeans (slow)') options.method = KMEANS if options.jobs != 1 and (options.method != KMEANS or not with_sklearn): logging.warning('jobs > 1 only supported scikit-learn %s' % KMEANS) options.jobs = 1 wv = wvlib.load(options.vectors[0], max_rank=options.max_rank) if options.k is None: options.k = int(math.ceil((len(wv.words())/2)**0.5)) logging.info('set k=%d (%d words)' % (options.k, len(wv.words()))) if options.normalize: logging.info('normalize vectors to unit length') wv.normalize() words, vectors = wv.words(), wv.vectors() if options.whiten: logging.info('normalize features to unit variance') vectors = scipy.cluster.vq.whiten(vectors) return words, vectors, options
def minibatch_kmeans(vectors, k): if not with_sklearn: raise NotImplementedError # Sculley (http://www.eecs.tufts.edu/~dsculley/papers/fastkmeans.pdf) # uses batch size 1000. sklearn KMeans defaults to n_init 10 kmeans = sklearn.cluster.MiniBatchKMeans(k, batch_size=1000, n_init=10) kmeans.fit(vectors) return kmeans.labels_
def __getattr__(self, key): if key == 'linkage': self.cluster() return self.linkage elif key in ['leafs', 'leaves']: return self.get_leafs() elif key == 'cophenet': return self.calc_cophenet() elif key == 'agg_coeff': return self.calc_agg_coeff()
def calc_cophenet(self): """ Returns Cophenetic Correlation coefficient of your clustering. This (very very briefly) compares (correlates) the actual pairwise distances of all your samples to those implied by the hierarchical clustering. The closer the value is to 1, the better the clustering preserves the original distances, """ return scipy.cluster.hierarchy.cophenet( self.linkage, self.condensed_dist_mat )
def cluster(self, method='ward'): """ Cluster distance matrix. This will automatically be called when attribute linkage is requested for the first time. Parameters ---------- method : str, optional Clustering method (see scipy.cluster.hierarchy.linkage for reference) """ # First, convert similarity matrix to distance matrix if self.mat_type != 'distance': if isinstance( self.mat, pd.DataFrame ): self.dist_mat = ( self.mat.as_matrix() - self.mat.max().max() ) * -1 else: self.dist_mat = ( self.mat - self.mat.max() ) * -1 else: if isinstance( self.mat, pd.DataFrame ): self.dist_mat = self.mat.as_matrix() else: self.dist_mat = self.mat # Second, convert into condensed distance matrix - otherwise clustering # thinks we are passing observations instead of final scores self.condensed_dist_mat = scipy.spatial.distance.squareform( self.dist_mat, checks=False ) self.linkage = scipy.cluster.hierarchy.linkage(self.condensed_dist_mat, method=method) # Save method in case we want to look it up later self.method = method module_logger.info('Clustering done using method "{0}"'.format(method) )
def plot3d(self, k=5, criterion='maxclust', **kwargs): """Plot neuron using :func:`pymaid.plot.plot3d`. Will only work if instance has neurons attached to it. Parameters ---------- k : {int, float} criterion : {'maxclust','distance'}, optional If `maxclust`, `k` clusters will be formed. If `distance`, clusters will be created at threshold `k`. **kwargs will be passed to plot.plot3d() see help(plot.plot3d) for a list of keywords See Also -------- :func:`pymaid.plot.plot3d` Function called to generate 3d plot """ if 'neurons' not in self.__dict__: module_logger.error( 'This works only with cluster results from neurons') return None cmap = self.get_colormap(k=k, criterion=criterion) kwargs.update({'color': cmap}) return plotting.plot3d(self.neurons, **kwargs)
def to_json(self, fname='cluster.json', k=5, criterion='maxclust'): """ Convert clustered neurons into json file that can be loaded into CATMAID selection table. Parameters ---------- fname : str, optional Filename to save selection to k : {int, float} criterion : {'maxclust','distance'}, optional If `maxclust`, `k` clusters will be formed. If `distance`, clusters will be created at threshold `k`. See Also -------- :func:`pymaid.plot.plot3d` Function called to generate 3d plot """ cmap = self.get_colormap(k=k, criterion=criterion) # Convert to 0-255 cmap = { n : [ int(v*255) for v in cmap[n] ] for n in cmap } data = [ dict(skeleton_id=int(n), color="#{:02x}{:02x}{:02x}".format( cmap[n][0],cmap[n][1],cmap[n][2] ), opacity=1 ) for n in cmap ] with open(fname, 'w') as outfile: json.dump(data, outfile) module_logger.info('Selection saved as %s in %s' % (fname, os.getcwd())) return
def get_clusters(self, k, criterion='maxclust', return_type='labels'): """ Wrapper for cluster.hierarchy.fcluster to get clusters. Parameters ---------- k : {int, float} criterion : {'maxclust','distance'}, optional If `maxclust`, `k` clusters will be formed. If `distance`, clusters will be created at threshold `k`. return_type : {'labels','indices','columns','rows'} Determines what to construct the clusters of. 'labels' only works if labels are provided. 'indices' refers to index in distance matrix. 'columns'/'rows' works if distance matrix is pandas DataFrame Returns ------- list list of clusters [ [leaf1, leaf5], [leaf2, ...], ... ] """ cl = scipy.cluster.hierarchy.fcluster(self.linkage, k, criterion=criterion) if self.labels and return_type.lower()=='labels': return [[self.labels[j] for j in range(len(cl)) if cl[j] == i] for i in range(min(cl), max(cl) + 1)] elif return_type.lower() == 'rows': return [[self.mat.columns.tolist()[j] for j in range(len(cl)) if cl[j] == i] for i in range(min(cl), max(cl) + 1)] elif return_type.lower() == 'columns': return [[self.mat.index.tolist()[j] for j in range(len(cl)) if cl[j] == i] for i in range(min(cl), max(cl) + 1)] else: return [[j for j in range(len(cl)) if cl[j] == i] for i in range(min(cl), max(cl) + 1)]
def checkMultiplicity(self, thr): FlatC = hierarchy.fcluster(self.Tree, thr, criterion='distance') counter=collections.Counter(FlatC) Best = max(counter.iteritems(), key=operator.itemgetter(1))[0] print('You are clustering with a threshold of %s'%(thr)) print('The biggest cluster contains %s datasets from a total of %s'%(counter[Best], len(self.labelList)))
def minimalForCompleteness(self): print("Running estimator for minimal threshold for completeness") labels=self.createLabels() x = 0.00 dx = 0.05 countsList = {} x_list = [] while x < 1: Arrays= {} FlatC = hierarchy.fcluster(self.Tree, x, criterion='distance') counter=collections.Counter(FlatC) Best = max(counter.iteritems(), key=operator.itemgetter(1))[0] toProcess=[Best] y=0 for cluster, filename in zip(FlatC,labels): if cluster in toProcess: hklFile = any_reflection_file(filename) b= hklFile.as_miller_arrays() for column in b: if column.is_xray_intensity_array(): Arrays[y]=column break y+=1 try: Arr = Arrays[0] except: countsList.append(0) for label in range(1, y): try: Arr = Arr.concatenate(Arrays[label]) except: pass countsList[x]=(Arr.completeness()) x+= dx # return minimal for max L = [] for key in countsList: if countsList[key]>0.98: L.append(key) L.sort() return L[0]
def cluster(self, u, ndim, keepMetric=False): w = self.metric.transform(u) prev_region = self.region if keepMetric: self.region = RadFriendsRegion(members=w) if self.force_shrink and self.region.maxdistance > self.prev_maxdistance: self.region = RadFriendsRegion(members=w, maxdistance=self.prev_maxdistance) self.prev_maxdistance = self.region.maxdistance print 'keeping metric, not reclustering.' return metric_updated = False clustermetric = self.metric print 'computing distances for clustering...' # Overlay all clusters (shift by cluster mean) print 'Metric update ...' cluster_mean = numpy.mean(u, axis=0) shifted_cluster_members = u - cluster_mean # Using original points and new metric, compute RadFriends bootstrapped distance and store if self.metriclearner == 'none': metric = self.metric # stay with identity matrix metric_updated = False elif self.metriclearner == 'simplescaling': metric = SimpleScaling() metric.fit(shifted_cluster_members) metric_updated = True elif self.metriclearner == 'truncatedscaling': metric = TruncatedScaling() metric.fit(shifted_cluster_members) metric_updated = self.metric == IdentityMetric() or not numpy.all(self.metric.scale == metric.scale) else: assert False, self.metriclearner self.metric = metric wnew = self.metric.transform(u) print 'Region update ...' self.region = RadFriendsRegion(members=wnew) #, maxdistance=shifted_region.maxdistance) if not metric_updated and self.force_shrink and self.prev_maxdistance is not None: if self.region.maxdistance > self.prev_maxdistance: self.region = RadFriendsRegion(members=w, maxdistance=self.prev_maxdistance) self.prev_maxdistance = self.region.maxdistance print 'done.'
def plot_matrix(self): """ Plot distance matrix and dendrogram using matplotlib. Returns ------- matplotlib figure """ # Compute and plot first dendrogram for all nodes. fig = pylab.figure(figsize=(8, 8)) ax1 = fig.add_axes([0.09, 0.1, 0.2, 0.6]) Z1 = scipy.cluster.hierarchy.dendrogram( self.linkage, orientation='left', labels=self.labels) ax1.set_xticks([]) ax1.set_yticks([]) # Compute and plot second dendrogram. ax2 = fig.add_axes([0.3, 0.71, 0.6, 0.2]) Z2 = scipy.cluster.hierarchy.dendrogram(self.linkage, labels=self.labels) ax2.set_xticks([]) ax2.set_yticks([]) # Plot distance matrix. axmatrix = fig.add_axes([0.3, 0.1, 0.6, 0.6]) idx1 = Z1['leaves'] idx2 = Z2['leaves'] D = self.mat.copy() if isinstance(D, pd.DataFrame): D = D.as_matrix() D = D[idx1, :] D = D[:, idx2] im = axmatrix.matshow( D, aspect='auto', origin='lower', cmap=pylab.cm.YlGnBu) axmatrix.set_xticks([]) axmatrix.set_yticks([]) # Plot colorbar. axcolor = fig.add_axes([0.91, 0.1, 0.02, 0.6]) pylab.colorbar(im, cax=axcolor) module_logger.info( 'Use matplotlib.pyplot.show() to render figure.') return fig
def merge(self, anomFlag, thr): FlatC = hierarchy.fcluster(self.Tree, thr, criterion='distance') Log = open(self.CurrentDir+'/.cc_cluster.log', 'a') counter=collections.Counter(FlatC) Best = max(counter.iteritems(), key=operator.itemgetter(1))[0] Process = True #change checkboxes to standard variables if Process: ToProcess = [Best] else: ToProcess = set(Clusters) for key in ToProcess: if counter[key]==1: ToProcess = [x for x in ToProcess if x != key] #Processing pipeline, #Does all the XSCALE run for x in ToProcess: if [thr,x, anomFlag] not in self.alreadyDone: os.mkdir(self.CurrentDir+'/cc_Cluster_%.2f_%s_%s'%(float(thr),x, anomFlag)) Xscale=open(self.CurrentDir+'/cc_Cluster_%.2f_%s_%s/XSCALE.INP'%(float(thr),x, anomFlag), 'a') Pointless=open(self.CurrentDir+'/cc_Cluster_%.2f_%s_%s/launch_pointless.sh'%(float(thr),x,anomFlag ), 'a') print('OUTPUT_FILE=scaled.hkl',file=Xscale) print('MERGE= TRUE', file=Xscale) print('pointless hklout clustered.mtz << eof', file=Pointless) if anomFlag=='ano': print('FRIEDEL\'S_LAW= FALSE', file=Xscale) elif anomFlag=='no_ano': print('FRIEDEL\'S_LAW= TRUE', file=Xscale) Xscale.close() Pointless.close() for cluster, filename in zip(FlatC,self.labelList): if cluster in ToProcess: OUT = open(self.CurrentDir+'/cc_Cluster_%.2f_%s_%s/XSCALE.INP'%(float(thr),cluster,anomFlag), 'a') Pointless=open(self.CurrentDir+'/cc_Cluster_%.2f_%s_%s/launch_pointless.sh'%(float(thr),cluster,anomFlag), 'a') print ('INPUT_FILE= ../%s'%(filename), file=OUT) #print ('INCLUDE_RESOLUTION_RANGE=20, 1.8', file=OUT) print ('MINIMUM_I/SIGMA= 0', file=OUT) print ('XDSIN ../%s'%(filename), file= Pointless) OUT.close() Pointless.close() #optional run of XSCALE newProcesses=[] for x in ToProcess: if [thr,x, anomFlag] not in self.alreadyDone: plt.savefig(self.CurrentDir+'/cc_Cluster_%.2f_%s_%s/Dendrogram.png'%(float(thr),x,anomFlag)) P= subprocess.Popen('/opt/pxsoft/xds/vdefault/linux-x86_64/xscale_par',cwd=self.CurrentDir+'/cc_Cluster_%.2f_%s_%s/'%(float(thr), x, anomFlag)) P.wait() print('Cluster, %s , %s , %s'%(float(thr),x, anomFlag), file=Log) Pointless=open(self.CurrentDir+'/cc_Cluster_%.2f_%s_%s/launch_pointless.sh'%(float(thr),x,anomFlag), 'a') print('COPY \n bg\n TOLERANCE 4 \n eof', file= Pointless) Pointless.close() os.chmod(self.CurrentDir+'/cc_Cluster_%.2f_%s_%s/launch_pointless.sh'%(self.threshold,x,self.anomFlag ), st.st_mode | 0o111) newProcesses.append([thr,x, anomFlag])