我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用six.moves.range()。
def multinomial_entropy(probs, count): """Compute entropy of multinomial distribution with given probs and count. Args: probs: A 1-dimensional array of normalized probabilities. count: The number of draws in a multinomial distribution. Returns: A number in [0, count * len(probs)] representing entropy. """ assert count > 0 multi_probs = probs for _ in range(count - 1): if len(probs) > 2: raise NotImplementedError( 'Only categorical and binomial are supported') multi_probs = np.convolve(multi_probs, probs) return entropy(multi_probs)
def test_gc_stresstest(): with Storage('gs://seunglab-test/cloudvolume/connection_pool/', n_threads=0) as stor: stor.put_file('test', 'some string') n_trials = 500 pbar = tqdm(total=n_trials) @retry def create_conn(interface): # assert GC_POOL.total_connections() <= GC_POOL.max_connections * 5 bucket = GC_POOL.get_connection() blob = bucket.get_blob('cloudvolume/connection_pool/test') blob.download_as_string() GC_POOL.release_connection(bucket) pbar.update() with ThreadedQueue(n_threads=20) as tq: for _ in range(n_trials): tq.put(create_conn) pbar.close()
def wers(originals, results): count = len(originals) rates = [] mean = 0.0 assert count == len(results) for i in range(count): rate = wer(originals[i], results[i]) mean = mean + rate rates.append(rate) return rates, mean / float(count) # The following code is from: http://hetland.org/coding/python/levenshtein.py # This is a straightforward implementation of a well-known algorithm, and thus # probably shouldn't be covered by copyright to begin with. But in case it is, # the author (Magnus Lie Hetland) has, to the extent possible under law, # dedicated all copyright and related and neighboring rights to this software # to the public domain worldwide, by distributing it under the CC0 license, # version 1.0. This software is distributed without any warranty. For more # information, see <http://creativecommons.org/publicdomain/zero/1.0>
def levenshtein(a,b): "Calculates the Levenshtein distance between a and b." n, m = len(a), len(b) if n > m: # Make sure n <= m, to use O(min(n,m)) space a,b = b,a n,m = m,n current = list(range(n+1)) for i in range(1,m+1): previous, current = current, [i]+[0]*n for j in range(1,n+1): add, delete = previous[j]+1, current[j-1]+1 change = previous[j-1] if a[j-1] != b[i-1]: change = change + 1 current[j] = min(add, delete, change) return current[n] # gather_nd is taken from https://github.com/tensorflow/tensorflow/issues/206#issuecomment-229678962 # # Unfortunately we can't just use tf.gather_nd because it does not have gradients # implemented yet, so we need this workaround. #
def sparse_tuple_from(sequences, dtype=np.int32): r"""Creates a sparse representention of ``sequences``. Args: * sequences: a list of lists of type dtype where each element is a sequence Returns a tuple with (indices, values, shape) """ indices = [] values = [] for n, seq in enumerate(sequences): indices.extend(zip([n]*len(seq), range(len(seq)))) values.extend(seq) indices = np.asarray(indices, dtype=np.int64) values = np.asarray(values, dtype=dtype) shape = np.asarray([len(sequences), indices.max(0)[1]+1], dtype=np.int64) return tf.SparseTensor(indices=indices, values=values, shape=shape)
def logprob(self): """Compute non-normalized log probability of data and latent state. This is used for testing goodness of fit of the latent state kernel. """ assert len(self._added_rows) == self._num_rows V, E, K, M = self._VEKM vertex_logits = logprob_dc(self._vert_ss, self._vert_prior, axis=1) logprob = vertex_logits.sum() for e, v1, v2 in self._tree.tree_grid.T: logprob += (logprob_dc(self._edge_ss[e, :, :], self._edge_prior) - vertex_logits[v1] - vertex_logits[v2]) for v in range(V): beg, end = self._table.ragged_index[v:v + 2] logprob += logprob_dc(self._feat_ss[beg:end, :], self._feat_prior) logprob -= logprob_dc(self._meas_ss[v, :], self._meas_prior[v]) return logprob
def treegauss_remove_row( data_row, tree_grid, latent_row, vert_ss, edge_ss, feat_ss, ): # Update sufficient statistics. for v in range(latent_row.shape[0]): z = latent_row[v, :] vert_ss[v, :, :] -= np.outer(z, z) for e in range(tree_grid.shape[1]): z1 = latent_row[tree_grid[1, e], :] z2 = latent_row[tree_grid[2, e], :] edge_ss[e, :, :] -= np.outer(z1, z2) for v, x in enumerate(data_row): if np.isnan(x): continue z = latent_row[v, :] feat_ss[v] -= 1 feat_ss[v, 1] -= x feat_ss[v, 2:] -= x * z # TODO Use central covariance.
def train_ensemble(table, tree_prior, config): """Train a TreeCat ensemble model using subsample-annealed MCMC. The ensemble size is controlled by config['model_ensemble_size']. Let N be the number of data rows and V be the number of features. Args: table: A Table instance holding N rows of V features of data. tree_prior: A [K]-shaped numpy array of prior edge log odds, where K is the number of edges in the complete graph on V vertices. config: A global config dict. Returns: A trained model as a dictionary with keys: tree: A TreeStructure instance with the learned latent structure. suffstats: Sufficient statistics of features, vertices, and edges. assignments: An [N, V] numpy array of latent cluster ids for each cell in the dataset. """ tasks = [] for sub_seed in range(config['model_ensemble_size']): sub_config = config.copy() sub_config['seed'] += sub_seed tasks.append((table, tree_prior, sub_config)) return parallel_map(_train_model, tasks)
def observed_perplexity(self, counts): """Compute perplexity = exp(entropy) of observed variables. Perplexity is an information theoretic measure of the number of clusters or latent classes. Perplexity is a real number in the range [1, M], where M is model_num_clusters. Args: counts: A [V]-shaped array of multinomial counts. Returns: A [V]-shaped numpy array of perplexity. """ V, E, M, R = self._VEMR if counts is not None: counts = np.ones(V, dtype=np.int8) assert counts.shape == (V, ) assert counts.dtype == np.int8 assert np.all(counts > 0) observed_entropy = np.empty(V, dtype=np.float32) for v in range(V): beg, end = self._ragged_index[v:v + 2] probs = np.dot(self._feat_cond[beg:end, :], self._vert_probs[v, :]) observed_entropy[v] = multinomial_entropy(probs, counts[v]) return np.exp(observed_entropy)
def jit_remove_edge(grid, e2k, neighbors, components, e): """Remove an edge from a spanning tree.""" k = e2k[e] v1, v2 = grid[1:3, k] jit_set_remove(neighbors[v1], v2) jit_set_remove(neighbors[v2], v1) stack = np.zeros(neighbors.shape[0], np.int16) jit_set_add(stack, v1) while stack[0]: v1 = jit_set_pop(stack) components[v1] = True for i in range(neighbors[v1, 0]): v2 = neighbors[v1, i + 1] if not components[v2]: jit_set_add(stack, v2) return k
def find_valid_edges(components, valid_edges): """Find all edges between two components in a complete undirected graph. Args: components: A [V]-shaped array of boolean component ids. This assumes there are exactly two nonemtpy components. valid_edges: An uninitialized array where output is written. On return, the subarray valid_edges[:end] will contain edge ids k for all valid edges. Returns: The number of valid edges found. """ k = 0 end = 0 for v2, c2 in enumerate(components): for v1 in range(v2): if c2 ^ components[v1]: valid_edges[end] = k end += 1 k += 1 return end
def quantize_from_probs2(probs, resolution): """Quantize multiple non-normalized probs to given resolution. Args: probs: An [N, M]-shaped numpy array of non-normalized probabilities. Returns: An [N, M]-shaped array of quantized probabilities such that np.all(result.sum(axis=1) == resolution). """ assert len(probs.shape) == 2 N, M = probs.shape probs = probs / probs.sum(axis=1, keepdims=True) result = np.zeros(probs.shape, np.int8) range_N = np.arange(N, dtype=np.int32) for _ in range(resolution): sample = probs.argmax(axis=1) result[range_N, sample] += 1 probs[range_N, sample] -= 1.0 / resolution return result
def make_ragged_mask(ragged_index, mask): """Convert a boolean mask from dense to ragged format. Args: ragged_index: A [V+1]-shaped numpy array as returned by make_ragged_index. mask: A [V,...]-shaped numpy array of booleans. Returns: A [R,...]-shaped numpy array, where R = ragged_index[-1]. """ V = ragged_index.shape[0] - 1 R = ragged_index[-1] assert mask.shape[0] == V assert mask.dtype == np.bool_ ragged_mask = np.empty((R, ) + mask.shape[1:], dtype=np.bool_) for v in range(V): beg, end = ragged_index[v:v + 2] ragged_mask[beg:end] = mask[v] return ragged_mask
def count_observations(ragged_index, data): """Count the observations in each cell of a ragged data array. Args: ragged_index: A [V+1]-shaped numpy array as returned by make_ragged_index. data: A [N, R]-shaped ragged array of multinomial count data, where N is the number of rows and R = ragged_index[-1]. Returns: A [N, V]-shaped array whose entries are the number of observations in each cell of data. """ N, R = data.shape assert R == ragged_index[-1] V = len(ragged_index) - 1 counts = np.zeros([N, V], np.int8) for v in range(V): beg, end = ragged_index[v:v + 2] counts[:, v] = data[:, beg:end].sum(axis=1) return counts
def test_algo_record_vars(self): test_algo = TradingAlgorithm( script=record_variables, sim_params=self.sim_params, env=self.env, ) set_algo_instance(test_algo) self.zipline_test_config['algorithm'] = test_algo self.zipline_test_config['trade_count'] = 200 zipline = simfactory.create_test_zipline( **self.zipline_test_config) output, _ = drain_zipline(self, zipline) self.assertEqual(len(output), 252) incr = [] for o in output[:200]: incr.append(o['daily_perf']['recorded_vars']['incr']) np.testing.assert_array_equal(incr, range(1, 201))
def minutes_for_days(): """ 500 randomly selected days. This is used to make sure our test coverage is unbaised towards any rules. We use a random sample because testing on all the trading days took around 180 seconds on my laptop, which is far too much for normal unit testing. We manually set the seed so that this will be deterministic. Results of multiple runs were compared to make sure that this is actually true. This returns a generator of tuples each wrapping a single generator. Iterating over this yeilds a single day, iterating over the day yields the minutes for that day. """ env = TradingEnvironment() random.seed('deterministic') return ((env.market_minutes_for_day(random.choice(env.trading_days)),) for _ in range(500))
def test_s3_stresstest(): with Storage('s3://seunglab-test/cloudvolume/connection_pool/', n_threads=0) as stor: stor.put_file('test', 'some string') n_trials = 500 pbar = tqdm(total=n_trials) @retry def create_conn(interface): conn = S3_POOL.get_connection() # assert S3_POOL.total_connections() <= S3_POOL.max_connections * 5 bucket = conn.get_object( Bucket='seunglab-test', Key='cloudvolume/connection_pool/test', ) S3_POOL.release_connection(conn) pbar.update() with ThreadedQueue(n_threads=20) as tq: for _ in range(n_trials): tq.put(create_conn) pbar.close()
def test_derived_class(): def what_fun(should_be_fun): assert should_be_fun == 'fun' class DerivedThreadedQueue(ThreadedQueue): def _initialize_interface(self): return 'fun' with DerivedThreadedQueue(n_threads=1) as tq: for _ in range(1000): tq.put(what_fun) tq.wait() assert tq.processed == 1000 # shouldn't crash w/ 0 threads because it's a derived class with DerivedThreadedQueue(n_threads=0) as tq: pass
def xyzrange(start_vec, end_vec=None, stride_vec=(1,1,1)): if end_vec is None: end_vec = start_vec start_vec = (0,0,0) start_vec = np.array(start_vec, dtype=int) end_vec = np.array(end_vec, dtype=int) rangeargs = ( (start, end, stride) for start, end, stride in zip(start_vec, end_vec, stride_vec) ) xyzranges = [ range(*arg) for arg in rangeargs ] # iterate then x first, then y, then z # this way you process in the xy plane slice by slice # but you don't create process lots of prefix-adjacent keys # since all the keys start with X zyxranges = xyzranges[::-1] def vectorize(): pt = Vec(0,0,0) for z,y,x in product(*zyxranges): pt.x, pt.y, pt.z = x, y, z yield pt return vectorize()
def test_in_flight_timeout(self): """ Test to ensure that connection id fetching will block when max_id is reached/ In previous versions of the driver this test will cause a NoHostAvailable exception to be thrown, when the max_id is restricted @since 3.3 @jira_ticket PYTHON-514 @expected_result When many requests are run on a single node connection acquisition should block until connection is available or the request times out. @test_category connection timeout """ futures = [] query = '''SELECT * FROM system.local''' for i in range(100): futures.append(self.session.execute_async(query)) for future in futures: future.result()
def get_connection(self, timeout=5): """ Helper method to solve automated testing issues within Jenkins. Officially patched under the 2.0 branch through 17998ef72a2fe2e67d27dd602b6ced33a58ad8ef, but left as is for the 1.0 branch due to possible regressions for fixing an automated testing edge-case. """ conn = None e = None for i in range(5): try: contact_point = CASSANDRA_IP conn = self.klass.factory(host=contact_point, timeout=timeout, protocol_version=PROTOCOL_VERSION) break except (OperationTimedOut, NoHostAvailable, ConnectionShutdown) as e: continue if conn: return conn else: raise e
def test_single_connection_pipelined_requests(self): """ Test a single connection with pipelined requests. """ conn = self.get_connection() query = "SELECT keyspace_name FROM system.schema_keyspaces LIMIT 1" responses = [False] * 100 event = Event() def cb(response_list, request_num, *args, **kwargs): response_list[request_num] = True if all(response_list): conn.close() event.set() for i in range(100): conn.send_msg( QueryMessage(query=query, consistency_level=ConsistencyLevel.ONE), request_id=i, cb=partial(cb, responses, i)) event.wait()
def test_connect_timeout(self): # Underlying socket implementations don't always throw a socket timeout even with min float # This can be timing sensitive, added retry to ensure failure occurs if it can max_retry_count = 10 exception_thrown = False for i in range(max_retry_count): start = time.time() try: conn = self.get_connection(timeout=sys.float_info.min) conn.close() except Exception as e: end = time.time() self.assertAlmostEqual(start, end, 1) exception_thrown = True break self.assertTrue(exception_thrown)
def recv_results_rows(cls, f, protocol_version, user_type_map, result_metadata): paging_state, column_metadata = cls.recv_results_metadata(f, user_type_map) column_metadata = column_metadata or result_metadata rowcount = read_int(f) rows = [cls.recv_row(f, len(column_metadata)) for _ in range(rowcount)] colnames = [c[2] for c in column_metadata] coltypes = [c[3] for c in column_metadata] try: parsed_rows = [ tuple(ctype.from_binary(val, protocol_version) for ctype, val in zip(coltypes, row)) for row in rows] except Exception: for row in rows: for i in range(len(row)): try: coltypes[i].from_binary(row[i], protocol_version) except Exception as e: raise DriverException('Failed decoding result column "%s" of type %s: %s' % (colnames[i], coltypes[i].cql_parameterized_type(), str(e))) return paging_state, coltypes, (colnames, parsed_rows)
def recv_schema_change(cls, f, protocol_version): # "CREATED", "DROPPED", or "UPDATED" change_type = read_string(f) if protocol_version >= 3: target = read_string(f) keyspace = read_string(f) event = {'target_type': target, 'change_type': change_type, 'keyspace': keyspace} if target != SchemaTargetType.KEYSPACE: target_name = read_string(f) if target == SchemaTargetType.FUNCTION: event['function'] = UserFunctionDescriptor(target_name, [read_string(f) for _ in range(read_short(f))]) elif target == SchemaTargetType.AGGREGATE: event['aggregate'] = UserAggregateDescriptor(target_name, [read_string(f) for _ in range(read_short(f))]) else: event[target.lower()] = target_name else: keyspace = read_string(f) table = read_string(f) if table: event = {'target_type': SchemaTargetType.TABLE, 'change_type': change_type, 'keyspace': keyspace, 'table': table} else: event = {'target_type': SchemaTargetType.KEYSPACE, 'change_type': change_type, 'keyspace': keyspace} return event
def deserialize_safe(cls, byts, protocol_version): subtype, = cls.subtypes if protocol_version >= 3: unpack = int32_unpack length = 4 else: unpack = uint16_unpack length = 2 numelements = unpack(byts[:length]) p = length result = [] inner_proto = max(3, protocol_version) for _ in range(numelements): itemlen = unpack(byts[p:p + length]) p += length item = byts[p:p + itemlen] p += itemlen result.append(subtype.from_binary(item, inner_proto)) return cls.adapter(result)
def deserialize_safe(cls, byts, protocol_version): key_type, value_type = cls.subtypes if protocol_version >= 3: unpack = int32_unpack length = 4 else: unpack = uint16_unpack length = 2 numelements = unpack(byts[:length]) p = length themap = util.OrderedMapSerializedKey(key_type, protocol_version) inner_proto = max(3, protocol_version) for _ in range(numelements): key_len = unpack(byts[p:p + length]) p += length keybytes = byts[p:p + key_len] p += key_len val_len = unpack(byts[p:p + length]) p += length valbytes = byts[p:p + val_len] p += val_len key = key_type.from_binary(keybytes, inner_proto) val = value_type.from_binary(valbytes, inner_proto) themap._insert_unchecked(key, keybytes, val) return themap
def save_super_images(self, images, sample_batchs, filenames, sentenceID, save_dir, subset): # batch_size samples for each embedding numSamples = len(sample_batchs) for j in range(len(filenames)): s_tmp = '%s-1real-%dsamples/%s/%s' %\ (save_dir, numSamples, subset, filenames[j]) folder = s_tmp[:s_tmp.rfind('/')] if not os.path.isdir(folder): print('Make a new folder: ', folder) mkdir_p(folder) superimage = [images[j]] # cfg.TRAIN.NUM_COPY samples for each text embedding/sentence for i in range(len(sample_batchs)): superimage.append(sample_batchs[i][j]) superimage = np.concatenate(superimage, axis=1) fullpath = '%s_sentence%d.jpg' % (s_tmp, sentenceID) scipy.misc.imsave(fullpath, superimage)
def eval_one_dataset(self, sess, dataset, save_dir, subset='train'): count = 0 print('num_examples:', dataset._num_examples) while count < dataset._num_examples: start = count % dataset._num_examples images, embeddings_batchs, filenames, _ =\ dataset.next_batch_test(self.batch_size, start, 1) print('count = ', count, 'start = ', start) for i in range(len(embeddings_batchs)): samples_batchs = [] # Generate up to 16 images for each sentence, # with randomness from noise z and conditioning augmentation. for j in range(np.minimum(16, cfg.TRAIN.NUM_COPY)): samples = sess.run(self.fake_images, {self.embeddings: embeddings_batchs[i]}) samples_batchs.append(samples) self.save_super_images(images, samples_batchs, filenames, i, save_dir, subset) count += self.batch_size
def x_povm(dim): """The X POVM simplifies to measuring Pauli X eigenvectors for dim=2. :param dim: Dimension of the system :returns: POVM with generalized X measurments """ vectors = np.zeros([dim * (dim - 1), dim]) k = 0 for i in range(dim - 1): for j in range(i + 1, dim): vectors[k, i], vectors[k, j] = 1.0, 1.0 k += 1 vectors[k, i], vectors[k, j] = 1.0, -1.0 k += 1 vectors /= np.sqrt(2 * (dim - 1)) return POVM.from_vectors(vectors, info_complete=False)
def y_povm(dim): """The Y POVM simplifies to measuring Pauli Y eigenvectors for dim=2. :param dim: Dimension of the system :returns: POVM with generalized Y measurments """ vectors = np.zeros([dim * (dim - 1), dim], dtype=complex) k = 0 for i in range(dim - 1): for j in range(i + 1, dim): vectors[k, i], vectors[k, j] = 1.0, 1.0j k += 1 vectors[k, i], vectors[k, j] = 1.0, -1.0j k += 1 vectors /= np.sqrt(2 * (dim - 1)) return POVM.from_vectors(vectors, info_complete=False)
def random_local_ham(sites, ldim=2, intlen=2, randstate=None): """Generates a random Hamiltonian on `sites` sites with local dimension `ldim`, which is a sum of local Hamiltonians with interaction length `intlen`. :param sites: Number of sites :param ldim: Local dimension :param intlen: Interaction length of the local Hamiltonians :returns: MPA representation of the global Hamiltonian """ def get_local_ham(): op = _random_op(intlen, ldim, hermitian=True, normalized=True) op = global_to_local(op, sites=intlen) return mp.MPArray.from_array(op, ndims=2) assert sites >= intlen local_hams = [get_local_ham() for _ in range(sites + 1 - intlen)] return mp.local_sum(local_hams)
def group_sites(self, sites_per_group): """Group several MPA sites into one site. The resulting MPA has length ``len(self) // sites_per_group`` and ``sites_per_group * self.ndims[i]`` physical legs on site ``i``. The physical legs on each sites are in local form. :param int sites_per_group: Number of sites to be grouped into one :returns: An MPA with ``sites_per_group`` fewer sites and more ndims """ if (len(self) % sites_per_group) != 0: raise ValueError('Cannot group: {} not a multiple of {}' .format(len(self), sites_per_group)) if sites_per_group == 1: return self ltens = [_ltens_to_array(self._lt[i:i + sites_per_group]) for i in range(0, len(self), sites_per_group)] return MPArray(ltens)
def split_sites(self, sites_per_group): """Split MPA sites into several sites. The resulting MPA has length ``len(self) * sites_per_group`` and ``self.ndims[i] // sites_per_group`` indices on site i. :param int sites_per_group: Split each site in that many sites :returns: An mpa with ``sites_per_group`` more sites and fewer ``ndims`` """ ltens = [] for i in range(len(self)): ndims = self.ndims[i] assert (ndims % sites_per_group) == 0, \ 'ndims not a multiple of sites_per_group' ltens += _extract_factors(self._lt[i], ndims // sites_per_group) return MPArray(ltens)
def _rcanonicalize(self, to_site): """Left-canonicalizes all local tensors _ltens[:to_site] in place :param to_site: Index of the site up to which canonicalization is to be performed """ assert 0 <= to_site < len(self), 'to_site={!r}'.format(to_site) lcanon, rcanon = self._lt.canonical_form for site in range(lcanon, to_site): ltens = self._lt[site] q, r = qr(ltens.reshape((-1, ltens.shape[-1]))) # if ltens.shape[-1] > prod(ltens.phys_shape) --> trivial comp. # can be accounted by adapting rank here newtens = (q.reshape(ltens.shape[:-1] + (-1,)), matdot(r, self._lt[site + 1])) self._lt.update(slice(site, site + 2), newtens, canonicalization=('left', None))
def singularvals(self): """Return singular values of ``self`` for all bipartitions :returns: Iterate over bipartitions with 1, 2, ... len(self) - 1 sites on the left hand side. Yields a ``np.ndarray`` containing singular values for each bipartition. .. note:: May decrease the rank (without changing the represented tensor). """ if len(self) == 1: return # No bipartitions with two non-empty parts for a single site self.canonicalize(right=1) iterator = self._compress_svd_r(max(self.ranks), None, truncated_svd) # We want everything from the iterator except for the last element. for _, (sv, rank) in zip(range(len(self) - 1), iterator): # We could verify that `rank` did not decrease but it may # decrease because of zero singular values -- let's trust # that relerr=0.0 behaves as intended. # assert old_rank == rank yield sv
def full_rank(ldims): """Computes a list of maximal ranks for a tensor with given local dimesions :param ldims: Dimensions of the legs of the tensor per site. Can be either passed as one number per site (``[2, 5, 2]``) or if there are multiple legs per site as a list of tuples similar to :py:attr:`MPArray.shape` (e.g. ``[(2,), (3, 4), (5,)])``) :returns: Tuple of ranks that are maximal for the local dimensions ``ldims``. >>> full_rank([3] * 5) [3, 9, 9, 3] >>> full_rank([2] * 8) [2, 4, 8, 16, 8, 4, 2] >>> full_rank([(2, 3)] * 4) [6, 36, 6] """ # Use arbitrary-size Python integers for dimension # computation. Fixed-size numpy integers will overflow quickly # (e.g. 2**63 = -9223372036854775808 on 64-bit systems). ldims_raveled = np.array([int(np.prod(ldim)) for ldim in ldims], dtype=object) return [min(np.prod(ldims_raveled[:cut]), np.prod(ldims_raveled[cut:])) for cut in range(1, len(ldims))]
def _check_reductions_args(nr_sites, width, startsites, stopsites): """Expand the arguments of :func:`reductions_mpo()` et al. The arguments are documented in :func:`reductions_mpo()`. """ if stopsites is None: assert width is not None if startsites is None: startsites = range(nr_sites - width + 1) else: startsites = tuple(startsites) # Allow iterables stopsites = (start + width for start in startsites) else: assert width is None assert startsites is not None return startsites, stopsites
def test_sum(nr_sites, local_dim, rank, rgen, dtype): """Compare mpa.sum() with full array computation""" mpa = factory.random_mpa(nr_sites, local_dim, rank, rgen, dtype) array_sum = mpa.to_array().sum() # Test summation over all indices and different argument values. assert_almost_equal(mpa.sum(), array_sum) assert_almost_equal(mpa.sum(0), array_sum) assert_almost_equal(mpa.sum([0]), array_sum) assert_almost_equal(mpa.sum([[0]] * nr_sites), array_sum) # Test summation over site-dependent indices n_plegs = 3 if nr_sites <= 4 and local_dim <= 2 else 2 mpa = factory.random_mpa(nr_sites, [local_dim] * n_plegs, rank, rgen, dtype) # Pseudo-randomly choose how many physical legs to sum over at each site. num_sum = ((rgen.choice(range(ndims + 1)), ndims) for ndims in mpa.ndims) # Pseudo-randomly choose which physical legs to sum over. axes = tuple( rgen.choice(range(ndims), num, replace=False) for num, ndims in num_sum) array_axes = tuple(n_plegs * pos + a for pos, ax in enumerate(axes) for a in ax) mpa_sum = mpa.sum(axes) if hasattr(mpa_sum, 'to_array'): # possibly, no physical legs are left mpa_sum = mpa_sum.to_array() array_sum = mpa.to_array().sum(array_axes) assert_array_almost_equal(mpa_sum, array_sum)
def test_sumup(nr_sites, local_dim, rank, rgen, dtype): mpas = [factory.random_mpa(nr_sites, local_dim, 3, dtype=dtype, randstate=rgen) for _ in range(rank if rank is not np.nan else 1)] sum_naive = ft.reduce(mp.MPArray.__add__, mpas) sum_mp = mp.sumup(mpas) assert_array_almost_equal(sum_naive.to_array(), sum_mp.to_array()) assert all(r <= 3 * rank for r in sum_mp.ranks) assert(sum_mp.dtype is dtype) weights = rgen.randn(len(mpas)) summands = [w * mpa for w, mpa in zip(weights, mpas)] sum_naive = ft.reduce(mp.MPArray.__add__, summands) sum_mp = mp.sumup(mpas, weights=weights) assert_array_almost_equal(sum_naive.to_array(), sum_mp.to_array()) assert all(r <= 3 * rank for r in sum_mp.ranks) assert(sum_mp.dtype is dtype)
def test_diag_2plegs(nr_sites, local_dim, rank, rgen): mpa = factory.random_mpa(nr_sites, 2 * (local_dim,), rank, randstate=rgen) mpa_np = mpa.to_array() # this should be a single, 1D numpy array diag_mp = mp.diag(mpa, axis=1) diag_np = np.array([mpa_np[(slice(None), i) * nr_sites] for i in range(local_dim)]) for a, b in zip(diag_mp, diag_np): assert a.ndims[0] == 1 assert_array_almost_equal(a.to_array(), b) ############################################################################### # Shape changes, conversions # ############################################################################### # nr_sites, local_dim, rank, sites_per_group
def test_split(nr_sites, local_dim, rank, rgen): if nr_sites < 2: return mpa = factory.random_mpa(nr_sites, local_dim, rank, randstate=rgen) for pos in range(nr_sites - 1): mpa_l, mpa_r = mpa.split(pos) assert len(mpa_l) == pos + 1 assert len(mpa_l) + len(mpa_r) == nr_sites assert_correct_normalization(mpa_l) assert_correct_normalization(mpa_r) recons = np.tensordot(mpa_l.to_array(), mpa_r.to_array(), axes=(-1, 0)) assert_array_almost_equal(mpa.to_array(), recons) for (lnorm, rnorm) in it.product(range(nr_sites - 1), range(1, nr_sites)): mpa_l, mpa_r = mpa.split(nr_sites // 2 - 1) assert_correct_normalization(mpa_l) assert_correct_normalization(mpa_r)
def test_singularvals(nr_sites, local_dim, rank, dtype, rgen): mps = factory.random_mpa(nr_sites, local_dim, rank, randstate=rgen, dtype=dtype, normalized=True, force_rank=True) psi = mps.to_array() # Start from a non-normalized state assert mps.canonical_form == (0, nr_sites) svals = list(mps.singularvals()) if nr_sites == 1: assert mps.canonical_form == (0, 1) else: # The last local tensor update from _compress_svd_r() is not # carried out. This behaviour may change. assert mps.canonical_form == (nr_sites - 2, nr_sites - 1) assert len(svals) == nr_sites - 1 for n_left in range(1, nr_sites): sv = svals[n_left - 1] mat = psi.reshape((local_dim**n_left, -1)) sv2 = np.linalg.svd(mat, full_matrices=False, compute_uv=False) n_sv = min(len(sv), len(sv2)) # Output from `svd()` is always in descending order assert_almost_equal(sv[n_sv:], 0.0) assert_almost_equal(sv2[n_sv:], 0.0) assert_array_almost_equal(sv[:n_sv], sv2[:n_sv])
def read(self, device=None, offset=0, bs=None, count=1): """ Using DIRECT_O read from the block device specified to stdout (Without any optional arguments will read the first 4k from the device) """ volume = self.get_volume(device) block_size = bs or BLOCK_SIZE offset = int(offset) * block_size count = int(count) print("Offset: ", offset) total = 0 with directio.open(volume['path'], buffered=block_size) as file: file.seek(offset) for i in range(0, count): total += os.write(sys.stdout.fileno(), file.read(block_size)) os.write(sys.stdout.fileno(), "\nRead: %d Bytes\n" % total)
def write(self, device=None, char=0, bs=None, count=None): """ Using DIRECT_O write a character in 4k chunks to a specified block device (Without any optional arguments will write NULL's to the entire device) """ volume = self.get_volume(device) block_size = bs or BLOCK_SIZE # Calculate the number of blocks that are in the volume count = count or (volume['size'] / block_size) data = "".join([chr(int(char)) for i in range(0, block_size)]) print("Writing: '%c'" % data[0]) total = 0 with directio.open(volume['path'], buffered=block_size) as file: for i in range(0, count): self.dot() total += file.write(data) print("\nWrote: ", total) return 0
def generate_batch(batch_size, bag_window): global data_index span = 2 * bag_window + 1 # [ bag_window target bag_window ] batch = np.ndarray(shape=(batch_size, span - 1), dtype=np.int32) labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32) buffer = collections.deque(maxlen=span) for _ in range(span): buffer.append(data[data_index]) data_index = (data_index + 1) % len(data) for i in range(batch_size): # just for testing buffer_list = list(buffer) labels[i, 0] = buffer_list.pop(bag_window) batch[i] = buffer_list # iterate to the next buffer buffer.append(data[data_index]) data_index = (data_index + 1) % len(data) return batch, labels
def main(): args = parse_args() if check_manifest(args.manifest): print('manifest exists; exiting') return manifest = driver.DriverManager( namespace='ekko.manifest.drivers', name=args.driver, invoke_on_load=True, invoke_args=[args.manifest] ).driver size_of_disk = args.backupsize * 1024**3 # Convert GB to B incremental = 0 metadata = manifest_structure.Metadata(incremental, size_of_disk) manifest.initialize() manifest.put_metadata(metadata) num_of_segments = int(size_of_disk / metadata.segment_size) segments = read_segments(range(0, num_of_segments - 1), metadata) manifest.put_segments(segments, metadata)