Python six.moves 模块,reduce() 实例源码

我们从Python开源项目中,提取了以下40个代码示例,用于说明如何使用six.moves.reduce()

项目:ecpy    作者:elliptic-shiho    | 项目源码 | 文件源码
def crt(ak, nk):
  from six.moves import reduce
  """
  Chinese-Reminders-Theorem Implementation
  using Gauss's proof and generalization on gcd(n1, n2) != 1
  Should be len(ak) == len(nk)
  Original: https://gist.github.com/elliptic-shiho/901d223135965308a5f9ff0cf99dd7c8
  Explanation: http://elliptic-shiho.hatenablog.com/entry/2016/04/03/020117

  Args:
    ak: A Numbers [a1, a2, ..., ak]
    nk: A Modulus [n1, n2, ..., nk]
  """
  assert len(ak) == len(nk)
  N = reduce(lambda x, y: x * y, nk, 1)
  l = lcm(*nk)
  s = 0
  for n, a in zip(nk, ak):
    m = N // n
    g, x, y = egcd(m, n)
    s += (m // g) * x * a
    s %= l
  return s
项目:etc    作者:sublee    | 项目源码 | 文件源码
def get(self, key, recursive=False, sorted=False, quorum=False,
            wait=False, wait_index=None, timeout=None):
        key_chunks = split_key(key)
        if not wait:
            # Get immediately.
            try:
                node = reduce(MockNode.get_node, key_chunks, self.root)
            except KeyError:
                raise KeyNotFound(index=self.index)
            return self.make_result(Got, node, remember=False, sorted=sorted)
        # Wait...
        if wait_index is not None:
            indices = self.indices.get(key_chunks, ())
            x = bisect.bisect_left(indices, (wait_index, False))
            for index, exact in indices[x:]:
                if recursive or exact:
                    # Matched past result found.
                    return self.history[index]
        # Register an event and wait...
        event_key = (recursive, key_chunks)
        event = self.events.setdefault(event_key, threading.Event())
        if not event.wait(timeout):
            raise TimedOut
        index, __ = self.indices[key_chunks][-1]
        return self.history[index]
项目:edx-enterprise    作者:edx    | 项目源码 | 文件源码
def get_common_course_modes(course_runs):
    """
    Fake implementation returning common course modes.

    Arguments:
        course_run_ids(Iterable[str]): Target Course run IDs.

    Returns:
        set: course modes found in all given course runs
    """
    course_run_modes = [
        set(seat.get("type") for seat in course_run.get("seats"))
        for course_run in FAKE_COURSE_RUNS_RESPONSE
        if course_run.get("key") in course_runs
    ]

    return six_reduce(lambda left, right: left & right, course_run_modes)
项目:artemis    作者:QUVA-Lab    | 项目源码 | 文件源码
def _bitwise_filter_op(op, *filter_sets):

    output_set = filter_sets[0].copy()
    if op=='not':
        assert len(filter_sets)==1
        for k in output_set.keys():
            output_set[k] = _bitwise_not(filter_sets[0][k])
    elif op in ('and', 'or'):
        for k in output_set.keys():
            output_set[k] = reduce(_bitwise_and if op=='and' else _bitwise_or, [fs[k] for fs in filter_sets])
    elif op=='andcascade':
        for k in output_set.keys():
            output_set[k] = reduce(_bitwise_andcascade, [fs[k] for fs in filter_sets[::-1]])
    else:
        raise AssertionError('op should be one of {}'.format(('and', 'or', 'andcascade', 'not')))
    return output_set
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def mul_calculate(num, denum, aslist=False, out_type=None):
    if not num and not denum:
        # Smallest 1 possible.
        if aslist:
            return []
        else:
            return numpy.int8(1)

    # Make sure we do not accidently upcast data types.
    if out_type is None:
        out_dtype = scalar.upcast(*[v.dtype for v in (num + denum)])
    else:
        out_dtype = out_type.dtype
    one = theano._asarray(1, dtype=out_dtype)

    v = reduce(numpy.multiply, num, one) / reduce(numpy.multiply, denum, one)
    if aslist:
        if numpy.all(v == 1):
            return []
        else:
            return [v]
    return v
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def build_gemm_call(self):

        return reduce(str.__add__, (
            self.declare_NS,
            self.check_xyz_rank2,
            self.setup_z_Nz_Sz,
            self.check_xyz_double_or_float,
            self.check_ab_double_or_float,
            self.check_dims,
            self.check_strides,
            self.encode_strides_in_unit,
            self.compute_strides,
            self.begin_switch_typenum,
            self.case_float,
            self.case_float_ab_constants,
            self.case_float_gemm,
            self.case_double,
            self.case_double_ab_constants,
            self.case_double_gemm,
            self.end_switch_typenum), '')
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def values_eq_approx_high_tol(a, b):
    """
    This fct is needed to don't have DebugMode raise useless
    error due to ronding error.

    This happen as We reduce on the two last dimensions, so this
    can raise the absolute error if the number of element we
    reduce on is significant.

    """
    assert a.ndim == 4
    atol = None
    if a.shape[-1] * a.shape[-2] > 100:
        # For float32 the default atol is 1e-5
        atol = 3e-5
    return CudaNdarrayType.values_eq_approx(a, b, atol=atol)
项目:chatbot-MemN2N-tensorflow    作者:vyraun    | 项目源码 | 文件源码
def build_vocab(self, data, candidates):
        vocab = reduce(lambda x, y: x | y, (set(
            list(chain.from_iterable(s)) + q) for s, q, a in data))
        vocab |= reduce(lambda x, y: x | y, (set(candidate)
                                             for candidate in candidates))
        vocab = sorted(vocab)
        self.word_idx = dict((c, i + 1) for i, c in enumerate(vocab))
        max_story_size = max(map(len, (s for s, _, _ in data)))
        mean_story_size = int(np.mean([len(s) for s, _, _ in data]))
        self.sentence_size = max(
            map(len, chain.from_iterable(s for s, _, _ in data)))
        self.candidate_sentence_size = max(map(len, candidates))
        query_size = max(map(len, (q for _, q, _ in data)))
        self.memory_size = min(self.memory_size, max_story_size)
        self.vocab_size = len(self.word_idx) + 1  # +1 for nil word
        self.sentence_size = max(
            query_size, self.sentence_size)  # for the position
        # params
        print("vocab size:", self.vocab_size)
        print("Longest sentence length", self.sentence_size)
        print("Longest candidate sentence length",
              self.candidate_sentence_size)
        print("Longest story length", max_story_size)
        print("Average story length", mean_story_size)
项目:GeneGAN    作者:Prinsphield    | 项目源码 | 文件源码
def discriminator(self, name, image):
        X = image / 255.0
        if name in self.reuse.keys():
            reuse = self.reuse[name]
        else:
            self.reuse[name] = True
            reuse = False

        with tf.variable_scope(name, reuse=reuse) as scope:   
            X = self.make_conv('conv1', X, shape=[4,4,3,128], strides=[1,2,2,1])
            X = self.leakyRelu(X, 0.2)
            # print(name, X.get_shape())

            X = self.make_conv_bn('conv2', X, shape=[4,4,128,256], strides=[1,2,2,1])
            X = self.leakyRelu(X, 0.2)
            # print(name, X.get_shape())

            X = self.make_conv_bn('conv3', X, shape=[4,4,256,512], strides=[1,2,2,1])
            X = self.leakyRelu(X, 0.2)
            # print(name, X.get_shape())

            X = self.make_conv_bn('conv4', X, shape=[4,4,512,512], strides=[1,2,2,1])
            X = self.leakyRelu(X, 0.2)
            # print(name, X.get_shape())

            flat_dim = reduce(lambda x,y: x*y, X.get_shape().as_list()[1:])
            X = tf.reshape(X, [-1, flat_dim])
            X = self.make_fc('fct', X, 1)
            # X = tf.nn.sigmoid(X)
            return X
项目:typecaster    作者:nbedi    | 项目源码 | 文件源码
def html_to_ssml(text):
    """
    Replaces specific html tags with probable SSML counterparts.
    """
    ssml_text = reduce(lambda x, y: x.replace(y, html_to_ssml_maps[y]), html_to_ssml_maps, text)
    return ssml_text
项目:InplusTrader_Linux    作者:zhengwsh    | 项目源码 | 文件源码
def get_realtime_quotes(code_list, open_only=False):
    import tushare as ts

    max_len = 800
    loop_cnt = int(math.ceil(float(len(code_list)) / max_len))

    total_df = reduce(lambda df1, df2: df1.append(df2),
                      [ts.get_realtime_quotes([code for code in code_list[i::loop_cnt]])
                       for i in range(loop_cnt)])
    total_df["is_index"] = False

    index_symbol = ["sh", "sz", "hs300", "sz50", "zxb", "cyb"]
    index_df = ts.get_realtime_quotes(index_symbol)
    index_df["code"] = index_symbol
    index_df["is_index"] = True
    total_df = total_df.append(index_df)
    total_df = total_df.set_index("code").sort_index()

    columns = set(total_df.columns) - set(["name", "time", "date"])
    # columns = filter(lambda x: "_v" not in x, columns)
    for label in columns:
        total_df[label] = total_df[label].map(lambda x: 0 if str(x).strip() == "" else x)
        total_df[label] = total_df[label].astype(float)

    total_df["chg"] = total_df["price"] / total_df["pre_close"] - 1

    total_df["order_book_id"] = total_df.index
    total_df["order_book_id"] = total_df["order_book_id"].apply(tushare_code_2_order_book_id)

    total_df["datetime"] = total_df["date"] + " " + total_df["time"]
    total_df["datetime"] = total_df["datetime"].apply(lambda x: convert_dt_to_int(datetime.datetime.strptime(x, "%Y-%m-%d %H:%M:%S")))

    total_df["close"] = total_df["price"]

    if open_only:
        total_df = total_df[total_df.open > 0]

    return total_df
项目:ecpy    作者:elliptic-shiho    | 项目源码 | 文件源码
def gcd_multiple(*a):
  from six.moves import reduce
  """
  Apply gcd to some variables.
  Args: 
    a: args list
  """
  return reduce(gcd, a)
项目:ecpy    作者:elliptic-shiho    | 项目源码 | 文件源码
def lcm(*a):
  from six.moves import reduce
  """
  Calculate Least Common Multiple
  Args:
    *a: args list
  """
  return reduce(op.mul, a) // gcd_multiple(*a)
项目:QDREN    作者:andreamad8    | 项目源码 | 文件源码
def get_train_test(which_task='data/tasks_1-20_v1-2/en/',task_num=1):
    train, val, test = load_task(which_task,task_num)
    data = train + test + val

    vocab = sorted(reduce(lambda x, y: x | y, (set(list(chain.from_iterable(s)) + q + a) for s, q, a in data)))
    word_idx = dict((c, i + 1) for i, c in enumerate(vocab))

    max_story_size = max(map(len, (s for s, _, _ in data)))
    mean_story_size = int(np.mean([ len(s) for s, _, _ in data ]))
    sentence_size = max(map(len, chain.from_iterable(s for s, _, _ in data)))
    query_size = max(map(len, (q for _, q, _ in data)))
    if (task_num==3):
        max_story_size = min(130, max_story_size)
    else:
        max_story_size = min(70, max_story_size)


    vocab_size = len(word_idx) +1# +1 for nil word
    sentence_size = max(query_size, sentence_size) # for the position
    sentence_size+=1
    logging.info("Longest sentence length: "+ str( sentence_size))
    logging.info("Longest story length: "+ str( max_story_size))
    logging.info("Average story length: "+ str( mean_story_size))
    logging.info("Training sample: "+ str(len(train)))
    logging.info("Validation sample: "+ str(len(val)))
    logging.info("Test sample: "+ str(len(test)))
    logging.info("Vocab size : "+ str(vocab_size))


    S, Q, A = vectorize_data(train, word_idx, sentence_size, max_story_size)
    valS, valQ, valA = vectorize_data(val, word_idx, sentence_size, max_story_size)
    testS, testQ, testA = vectorize_data(test, word_idx, sentence_size, max_story_size)
    return {'train':{'S':S, 'Q':np.expand_dims(Q, axis=1), 'A':A},
            'val':{'S':valS, 'Q':np.expand_dims(valQ, axis=1), 'A':valA},
            'test':{'S':testS, 'Q':np.expand_dims(testQ, axis=1), 'A':testA},
            'vocab':vocab,
            'vocab_size':vocab_size,
            'sent_len':sentence_size,
            'sent_numb':max_story_size,
            'word_idx':word_idx,
            'len_training':len(train)}
项目:bx-python    作者:bxlab    | 项目源码 | 文件源码
def transform_by_chrom(all_epo, from_elem_list, tree, chrom, opt, out_fd):
    BED4_FRM = "%s\t%d\t%d\t%s\n"
    BED12_FRM = "%s\t%d\t%d\t%s\t1000\t+\t%d\t%d\t0,0,0\t%d\t%s\t%s\n"
    assert len( set(from_elem_list['chrom']) ) <= 1

    mapped_elem_count = 0
    for from_elem in from_elem_list:
        matching_block_ids = [attrgetter("value")(_) for _ in tree.find(chrom, from_elem['start'], from_elem['end'])]

        # do the actual mapping
        to_elem_slices = [_ for _ in (transform(from_elem, all_epo[i], opt.gap) for i in matching_block_ids) if _]
        if len(to_elem_slices) > 1 or len(to_elem_slices) == 0:
            log.debug("%s no match or in different chain/chromosomes" % (str(from_elem)))
            continue
        to_elem_slices = to_elem_slices[0]

        # apply threshold
        if (from_elem[2] - from_elem[1]) * opt.threshold > reduce(lambda b,a: a[2]-a[1] + b, to_elem_slices, 0):
            log.debug("%s did not pass threshold" % (str(from_elem)))
            continue

        # if to_species had insertions you can join elements
        to_elem_list = sorted(union_elements(to_elem_slices), key=lambda a: a[1])
        if to_elem_list:
            mapped_elem_count += 1
            log.debug("\tjoined to %d elements" % (len(to_elem_list)))
            if opt.format == "BED4":
                map(lambda tel: out_fd.write(BED4_FRM % tel), to_elem_list)
            else:
                start = to_elem_list[0][1]
                end = to_elem_list[-1][2]
                out_fd.write(BED12_FRM % (to_elem_list[0][0], start, end, from_elem['id'],
                        start, end, len(to_elem_list),
                        ",".join( "%d" % (e[2]-e[1]) for e in to_elem_list ),
                        ",".join( "%d" % (e[1]-start) for e in to_elem_list ) )
                        )
    log.info("%s %d of %d elements mapped" % (chrom, mapped_elem_count, from_elem_list.shape[0]))
项目:bx-python    作者:bxlab    | 项目源码 | 文件源码
def _zero_mantissa(dval):
    """Determine whether the mantissa bits of the given double are all
    zero."""
    bb = _double_as_bytes(dval)
    return ((bb[1] & 0x0f) | reduce(operator.or_, bb[2:])) == 0

##
## Functions to test for IEEE 754 special values
##
项目:bx-python    作者:bxlab    | 项目源码 | 文件源码
def cdbhash( s ):
    return reduce( lambda h, c: (((h << 5) + h) ^ ord(c)) & 0xffffffff, s, 5381 )
项目:ebb-lint    作者:pyga    | 项目源码 | 文件源码
def last(seq):
    return reduce(lambda l, r: r, seq)
项目:wiki-sem-500    作者:belph    | 项目源码 | 文件源码
def lookup_phrase(self, phrase, normalize_vec=False):
        """Looks up the given phrase in this embedding"""
        phrase = self.get_normalized(phrase)
        vectors = []
        split = phrase.split('_')
        if self.supports_phrases:
            i = 0
            while i < len(split):
                # Yes, this is O(n^2). Since phrases are all short in
                # outlier detection, this is still tractable, but it
                # could be better.
                best_match = [(l, x) for (l, x) in phrase_gen(split[i:]) if self.in_vocabulary(x)]
                if best_match:
                    i += best_match[-1][0]
                    vectors.append(np.asarray(self.get_vector(best_match[-1][1])))
                else:
                    i += 1
        else:
            for word in (w for w in split if self.in_vocabulary(w)):
                vectors.append(np.asarray(self.get_vector(word)))
        if len(vectors) == 0:
            #print("OOV: " + phrase + " (split: " + str(split) + ")")
            return None
        else:
            summed = reduce(operator.add, vectors)
            average = summed / len(vectors)
            if normalize_vec:
                averaged = average / np.linalg.norm(average)
            return average
项目:qtip    作者:opnfv    | 项目源码 | 文件源码
def run(self):
        collected = []
        for log_item_config in self._config[self.LOGS]:
            log_item = LogItem(log_item_config, self)
            matches = [load_parser(c[CProp.TYPE])(c, log_item).run()
                       for c in log_item.get_config(CProp.PARSERS)]
            collected = chain(collected, reduce(chain, matches))
        return reduce(merge_matchobj_to_dict, collected, {'groups': (), 'groupdict': {}})
项目:etc    作者:sublee    | 项目源码 | 文件源码
def set(self, key, value=None, dir=False, ttl=None, refresh=False,
            prev_value=None, prev_index=None, prev_exist=None, timeout=None):
        if refresh:
            prev_exist = True
            if value is not None:
                raise RefreshValue(index=self.index)
            elif ttl is None:
                raise RefreshTTLRequired(index=self.index)
        expiration = ttl and (datetime.utcnow() + timedelta(ttl))
        key_chunks = split_key(key)
        index = self.next_index()
        should_test = prev_value is not None or prev_index is not None
        parent_node = reduce(MockNode.get_node, key_chunks[:-1], self.root)
        try:
            node = parent_node.get_node(key_chunks[-1])
        except KeyError:
            if prev_exist or should_test:
                raise KeyNotFound(index=self.index)
            node = MockNode(key, index, value, dir, ttl, expiration)
            parent_node.add_node(node)
        else:
            if prev_exist is not None and not prev_exist:
                raise NodeExist(index=self.index)
            if refresh:
                if node.dir:
                    raise NotFile(index=self.index)
                value = node.value
            self.compare(node, prev_value, prev_index)
            node.set(index, value, dir, ttl, expiration)
        if refresh:
            result_class = ComparedThenSwapped if should_test else Set
            notify = False
        else:
            result_class = Updated if prev_exist or should_test else Set
            notify = True
        return self.make_result(result_class, node,
                                key_chunks=key_chunks, notify=notify)
项目:etc    作者:sublee    | 项目源码 | 文件源码
def append(self, key, value=None, dir=False, ttl=None, timeout=None):
        expiration = ttl and (datetime.utcnow() + timedelta(ttl))
        key_chunks = split_key(key)
        parent_node = reduce(MockNode.get_node, key_chunks, self.root)
        for x in itertools.count(len(parent_node.nodes)):
            item_key = '%020d' % x
            if not parent_node.has_node(item_key):
                break
        key = os.path.join(key, item_key)
        index = self.next_index()
        node = MockNode(key, index, value, dir, ttl, expiration)
        parent_node.add_node(node)
        return self.make_result(Created, node, key_chunks=key_chunks)
项目:etc    作者:sublee    | 项目源码 | 文件源码
def delete(self, key, dir=False, recursive=False,
               prev_value=None, prev_index=None, timeout=None):
        key_chunks = split_key(key)
        parent_node = reduce(MockNode.get_node, key_chunks[:-1], self.root)
        try:
            node = parent_node.get_node(key_chunks[-1])
        except KeyError:
            raise KeyNotFound(index=self.index)
        self.compare(node, prev_value, prev_index)
        parent_node.pop_node(key_chunks[-1])
        return self.make_result(Deleted, prev_node=node, key_chunks=key_chunks)
项目:wub    作者:nanoporetech    | 项目源码 | 文件源码
def reverse_complement(seq):
    """ Return reverse complement of a string (base) sequence.

    :param seq: Input sequence.
    :returns: Reverse complement of input sequence.
    :rtype: str

    """
    if len(seq) == 0:
        return seq
    return reduce(lambda x, y: x + y, map(base_complement, seq[::-1]))
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_not_lazy_if_inplace(self):
        # Tests that if the outputs are scalars and the graph is big,
        # we disable the inplace opt to speed up optimization
        x = tensor.vector('x', dtype=self.dtype)
        y = tensor.vector('y', dtype=self.dtype)
        c = tensor.iscalar('c')
        mode = theano.compile.get_mode(self.mode).excluding(
            # Disable many opt to keep the graph big enough to disable
            # the opt.
            'fusion', 'local_add_canonizer',
            'inplace', 'constant_folding', 'constant_folding')
        y2 = reduce(lambda x, y: x + y, [y] + list(range(200)))
        f = theano.function([c, x, y], ifelse(c, x, y2), mode=mode)
        # For not inplace ifelse
        ifnode = [n for n in f.maker.fgraph.toposort()
                  if isinstance(n.op, IfElse)]
        assert len(ifnode) == 1
        assert not ifnode[0].op.as_view
        rng = numpy.random.RandomState(utt.fetch_seed())

        xlen = rng.randint(200)
        ylen = rng.randint(200)

        vx = numpy.asarray(rng.uniform(size=(xlen,)), self.dtype)
        vy = numpy.asarray(rng.uniform(size=(ylen,)), self.dtype)

        assert numpy.allclose(vx, f(1, vx, vy))
        assert numpy.allclose(vy + sum(range(200)), f(0, vx, vy))
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def local_useless_reduce(node):
    """Sum(a, axis=[]) -> a  """
    if isinstance(node.op, T.CAReduce):
        summed, = node.inputs
        # if reduce were doing anything, the output ndim would be reduced
        if summed.type == node.outputs[0].type:
            return [summed]


# Enabling this optimization at canonicalization step break this test:
# theano/tensor/tests/test_opt.py:T_local_reduce.test_local_reduce_broadcast_some_0
# see gh-790 issue.
#
# @register_canonicalize
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def add_calculate(num, denum, aslist=False, out_type=None):
    # TODO: make sure that this function and mul_calculate are similar
    if out_type is None:
        zero = 0.0
    else:
        zero = theano._asarray(0, dtype=out_type.dtype)
    # zero = 0.0 if out_type is None else theano._asarray(0,
    # dtype=out_type.dtype)
    v = reduce(numpy.add, num, zero) - reduce(numpy.add, denum, zero)
    if aslist:
        if numpy.all(v == 0):
            return []
        else:
            return [v]
    return v
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def check_chain(r, *chain):
    """
    WRITEME

    """
    if isinstance(r, graph.Apply):
        r = r.outputs[0]
    return _check_chain(r, reduce(list.__iadd__, ([x, 0] for x in chain)))
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def local_gpu_split(node):
    if isinstance(node.op, tensor.Split):
        input = node.inputs[0]
        outs_clients = reduce(list.__add__,
                              [out.clients for out in node.outputs])
        if (input.owner and isinstance(input.owner.op, HostFromGpu) or
            any(c != 'output' and isinstance(c.op, GpuFromHost) for c, idx
                in outs_clients)):
            new_op = GpuSplit(**node.op._props_dict())
            split_res = new_op(as_cuda_ndarray_variable(input),
                               *node.inputs[1:], return_list=True)
            return [host_from_gpu(o) for o in split_res]
    return False
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def c_support_code_apply(self, node, nodename):
        # REMEMBER TO RAISE c_code_cache_version when changing any of
        # these files
        files = ['corr_gemm.cu']
        codes = [open(os.path.join(os.path.split(__file__)[0], f)).read()
                 for f in files]
        return reduce(str.__add__, codes)
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def c_support_code_apply(self, node, nodename):
        # REMEMBER TO RAISE c_code_cache_version when changing any of
        # these files
        files = ['corr3d_gemm.cu']
        codes = [open(os.path.join(os.path.split(__file__)[0], f)).read()
                 for f in files]
        return reduce(str.__add__, codes)
项目:personalized-dialog    作者:chaitjo    | 项目源码 | 文件源码
def build_vocab(self,data,candidates,save=False,load=False):
        if load:
            vocab_file = open('vocab.obj', 'rb')
            vocab = pickle.load(vocab_file)
        else:
            vocab = reduce(lambda x, y: x | y, (set(list(chain.from_iterable(s)) + q) for s, q, a in data))
            vocab |= reduce(lambda x,y: x|y, (set(candidate) for candidate in candidates) )
            vocab=sorted(vocab)

        self.word_idx = dict((c, i + 1) for i, c in enumerate(vocab))
        max_story_size = max(map(len, (s for s, _, _ in data)))
        mean_story_size = int(np.mean([ len(s) for s, _, _ in data ]))
        self.sentence_size = max(map(len, chain.from_iterable(s for s, _, _ in data)))
        self.candidate_sentence_size=max(map(len,candidates))
        query_size = max(map(len, (q for _, q, _ in data)))
        self.memory_size = min(self.memory_size, max_story_size)
        self.vocab_size = len(self.word_idx) + 1 # +1 for nil word
        self.sentence_size = max(query_size, self.sentence_size) # for the position
        # params
        print("vocab size:",self.vocab_size)
        print("Longest sentence length", self.sentence_size)
        print("Longest candidate sentence length", self.candidate_sentence_size)
        print("Longest story length", max_story_size)
        print("Average story length", mean_story_size)

        if save:
            vocab_file = open('vocab.obj', 'wb')
            pickle.dump(vocab, vocab_file)
项目:puppet    作者:Raytone-D    | 项目源码 | 文件源码
def get_realtime_quotes(code_list, open_only=False):
    import tushare as ts

    max_len = 800
    loop_cnt = int(math.ceil(float(len(code_list)) / max_len))

    total_df = reduce(lambda df1, df2: df1.append(df2),
                      [ts.get_realtime_quotes([code for code in code_list[i::loop_cnt]])
                       for i in range(loop_cnt)])
    total_df["is_index"] = False

    index_symbol = ["sh", "sz", "hs300", "sz50", "zxb", "cyb"]
    index_df = ts.get_realtime_quotes(index_symbol)
    index_df["code"] = index_symbol
    index_df["is_index"] = True
    total_df = total_df.append(index_df)
    total_df = total_df.set_index("code").sort_index()

    columns = set(total_df.columns) - set(["name", "time", "date"])
    # columns = filter(lambda x: "_v" not in x, columns)
    for label in columns:
        total_df[label] = total_df[label].map(lambda x: 0 if str(x).strip() == "" else x)
        total_df[label] = total_df[label].astype(float)

    total_df["chg"] = total_df["price"] / total_df["pre_close"] - 1

    total_df["order_book_id"] = total_df.index
    total_df["order_book_id"] = total_df["order_book_id"].apply(tushare_code_2_order_book_id)

    total_df["datetime"] = total_df["date"] + " " + total_df["time"]
    total_df["datetime"] = total_df["datetime"].apply(lambda x: convert_dt_to_int(datetime.datetime.strptime(x, "%Y-%m-%d %H:%M:%S")))

    total_df["close"] = total_df["price"]

    if open_only:
        total_df = total_df[total_df.open > 0]

    return total_df
项目:dplython    作者:dodger487    | 项目源码 | 文件源码
def evaluate(self, previousResult, original=None, special=None):
    if special not in {None, "transform", "agg"}:
      raise Exception("Special must be one of None, 'transform', or 'agg'.")

    original = original if original is not None else previousResult

    if (original._grouped_self
        and special 
        and not isinstance(self._queue[0], FunctionStep)):
      name = GetName(self)

      # TODO: Rewrite this, this is terrible.
      go_to_index = len(self._queue)
      for idx, item in enumerate(self._queue):
        if isinstance(item, OperatorStep):
          go_to_index = idx
          break

      transform_input = lambda x: reduce(
          lambda prevResult, f: f.evaluate(prevResult, original, special=special),
          self._queue[1:go_to_index],
          x
      )
      if special == "transform":
        out = original._grouped_self[name].transform(transform_input)
      elif special == "agg":
        out = original._grouped_self[name].agg(transform_input)
      out = reduce(lambda prevResult, f: f.evaluate(prevResult, original, special=special),
                      self._queue[go_to_index:],
                      out)
      return out
    else:
      output = reduce(lambda prevResult, f: f.evaluate(prevResult, original),
                      self._queue,
                      original)
      return output
项目:PyPPL    作者:pwwang    | 项目源码 | 文件源码
def reduce(func, vec):
    """
    Python2 and Python3 compatible reduce
    @params:
        `func`: The reduce function
        `vec`: The list to be reduced
    @returns:
        The reduced value
    """
    return moves.reduce(func, vec)
项目:InplusTrader_Linux    作者:zhengwsh    | 项目源码 | 文件源码
def get_realtime_quotes(code_list, open_only=False):
    import tushare as ts

    max_len = 800
    loop_cnt = int(math.ceil(float(len(code_list)) / max_len))

    total_df = reduce(lambda df1, df2: df1.append(df2),
                      [ts.get_realtime_quotes([code for code in code_list[i::loop_cnt]])
                       for i in range(loop_cnt)])
    total_df["is_index"] = False

    index_symbol = ["sh", "sz", "hs300", "sz50", "zxb", "cyb"]
    index_df = ts.get_realtime_quotes(index_symbol)
    index_df["code"] = index_symbol
    index_df["is_index"] = True
    total_df = total_df.append(index_df)

    columns = set(total_df.columns) - set(["name", "time", "date", "code"])
    # columns = filter(lambda x: "_v" not in x, columns)
    for label in columns:
        total_df[label] = total_df[label].map(lambda x: 0 if str(x).strip() == "" else x)
        total_df[label] = total_df[label].astype(float)

    total_df["chg"] = total_df["price"] / total_df["pre_close"] - 1

    total_df["order_book_id"] = total_df["code"]
    total_df["order_book_id"] = total_df["order_book_id"].apply(tushare_code_2_order_book_id)

    total_df = total_df.set_index("order_book_id").sort_index()

    total_df["datetime"] = total_df["date"] + " " + total_df["time"]
    total_df["datetime"] = total_df["datetime"].apply(lambda x: convert_dt_to_int(datetime.datetime.strptime(x, "%Y-%m-%d %H:%M:%S")))

    total_df["close"] = total_df["price"]
    total_df["last"] = total_df["price"]

    total_df["limit_up"] = total_df.apply(lambda row: row.pre_close * (1.1 if "ST" not in row["name"] else 1.05), axis=1).round(2)
    total_df["limit_down"] = total_df.apply(lambda row: row.pre_close * (0.9 if "ST" not in row["name"] else 0.95), axis=1).round(2)

    if open_only:
        total_df = total_df[total_df.open > 0]

    return total_df
项目:QDREN    作者:andreamad8    | 项目源码 | 文件源码
def get_train_test(which_task='data/tasks_1-20_v1-2/en/',task_num=1):
    train, val, test = load_task(which_task,task_num)
    data = train + test + val

    vocab = sorted(reduce(lambda x, y: x | y, (set(list(chain.from_iterable(s)) + q + a) for s, q, a in data)))
    word_idx = dict((c, i + 1) for i, c in enumerate(vocab))

    max_story_size = max(map(len, (s for s, _, _ in data)))
    mean_story_size = int(np.mean([ len(s) for s, _, _ in data ]))
    sentence_size = max(map(len, chain.from_iterable(s for s, _, _ in data)))
    query_size = max(map(len, (q for _, q, _ in data)))
    if (task_num==3):
        max_story_size = min(130, max_story_size)
    else:
        max_story_size = min(70, max_story_size)


    vocab_size = len(word_idx) +1# +1 for nil word
    sentence_size = max(query_size, sentence_size) # for the position
    sentence_size+=1
    print("Longest sentence length", sentence_size)
    print("Longest story length", max_story_size)
    print("Average story length", mean_story_size)
    print("Training sample",len(train))
    print("Validation sample",len(val))
    print("Test sample",len(test))
    print("Vocab size ",vocab_size)


    # embeddings_mat = get_emb_matrix(vocab_size,word_idx,embed_size = embedding_size ,emb_file='data/glove.6B.{}d.txt'.format(embedding_size))
    # embeddings_mat = pickle.load( open( "emb_task1.p", "rb" ) )
    # train/validation/test sets
    S, Q, A = vectorize_data(train, word_idx, sentence_size, max_story_size)
    valS, valQ, valA = vectorize_data(val, word_idx, sentence_size, max_story_size)
    testS, testQ, testA = vectorize_data(test, word_idx, sentence_size, max_story_size)
    return {'train':{'S':S, 'Q':Q, 'A':A},
            'val':{'S':valS, 'Q':valQ, 'A':valA},
            'test':{'S':testS, 'Q':testQ, 'A':testA},
            'vocab':vocab,
            'vocab_size':vocab_size,
            'sent_len':sentence_size,
            'sent_numb':max_story_size,
            'word_idx':word_idx,
            'len_training':len(train)}
项目:wub    作者:nanoporetech    | 项目源码 | 文件源码
def stats_from_aligned_read(read, with_clipps=False):
    """Create summary information for an aligned read (modified from tang.util.bio).

    :param read: :class:`pysam.AlignedSegment` object
    :param with_clipps:
    """
    tags = dict(read.tags)
    try:
        tags.get('NM')
    except:
        raise IOError(
            "Read is missing required 'NM' tag. Try running 'samtools fillmd -S - ref.fa'.")
    name = read.qname
    if read.flag == 4:
        return None
    match = reduce(lambda x, y: x + y[1] if y[0] == 0 else x, read.cigar, 0)
    ins = reduce(lambda x, y: x + y[1] if y[0] == 1 else x, read.cigar, 0)
    delt = reduce(lambda x, y: x + y[1] if y[0] == 2 else x, read.cigar, 0)
    # NM is edit distance: NM = INS + DEL + SUB
    sub = tags['NM'] - ins - delt
    length = match + ins + delt

    # Count clips:
    clipps = reduce(
        lambda x, y: x + y[1] if (y[0] == 4 or y[0] == 5) else x, read.cigar, 0)
    if with_clipps:
        length += clipps

    iden = float(match - sub) / match
    if with_clipps:
        acc = 1.0 - (float(tags['NM'] + clipps) / length)
    else:
        acc = 1.0 - (float(tags['NM']) / length)
    coverage = float(read.query_alignment_length) / read.infer_query_length()
    direction = '-' if read.is_reverse else '+'
    results = OrderedDict([
        ("name", name),
        ("ref", read.reference_name),
        ("coverage", coverage),
        ("direction", direction),
        ("aln_length", length),
        ("insertion", ins),
        ("deletion", delt),
        ("mismatch", sub),
        ("match", match - sub),
        ("identity", iden),
        ("accuracy", acc),
        ("clipps", clipps),
    ])
    return results
项目:rqalpha    作者:ricequant    | 项目源码 | 文件源码
def get_realtime_quotes(order_book_id_list, open_only=False, include_limit=False):
    import tushare as ts

    code_list = [order_book_id_2_tushare_code(code) for code in order_book_id_list]

    max_len = 800
    loop_cnt = int(math.ceil(float(len(code_list)) / max_len))

    total_df = reduce(lambda df1, df2: df1.append(df2),
                      [ts.get_realtime_quotes([code for code in code_list[i::loop_cnt]])
                       for i in range(loop_cnt)])
    total_df["is_index"] = False

    index_symbol = ["sh", "sz", "hs300", "sz50", "zxb", "cyb"]
    index_df = ts.get_realtime_quotes(index_symbol)
    index_df["code"] = index_symbol
    index_df["is_index"] = True
    total_df = total_df.append(index_df)

    columns = set(total_df.columns) - set(["name", "time", "date", "code"])
    # columns = filter(lambda x: "_v" not in x, columns)
    for label in columns:
        total_df[label] = total_df[label].map(lambda x: 0 if str(x).strip() == "" else x)
        total_df[label] = total_df[label].astype(float)

    total_df["chg"] = total_df["price"] / total_df["pre_close"] - 1

    total_df["order_book_id"] = total_df["code"]
    total_df["order_book_id"] = total_df["order_book_id"].apply(tushare_code_2_order_book_id)

    total_df = total_df.set_index("order_book_id").sort_index()
    total_df["order_book_id"] = total_df.index

    total_df["datetime"] = total_df["date"] + " " + total_df["time"]
    # total_df["datetime"] = total_df["datetime"].apply(
    #     lambda x: convert_dt_to_int(datetime.datetime.strptime(x, "%Y-%m-%d %H:%M:%S")))

    total_df["close"] = total_df["price"]
    total_df["last"] = total_df["price"]

    total_df = total_df.rename(columns={
        "{}{}_p".format(base_name, i): "{}{}".format(base_name, i)
        for i in range(1, 6) for base_name in ["a", "b"]
    })
    total_df = total_df.rename(columns={"pre_close": "prev_close"})

    del total_df["code"]
    del total_df["is_index"]
    del total_df["date"]
    del total_df["time"]

    if include_limit:
        total_df["limit_up"] = total_df.apply(
            lambda row: row.prev_close * (1.1 if "ST" not in row["name"] else 1.05), axis=1).round(2)
        total_df["limit_down"] = total_df.apply(
            lambda row: row.prev_close * (0.9 if "ST" not in row["name"] else 0.95), axis=1).round(2)

    if open_only:
        total_df = total_df[total_df.open > 0]

    return total_df
项目:memory-networks    作者:suriyadeepan    | 项目源码 | 文件源码
def fetch(task_id=1, batch_size=32):

    # task data
    train, test = load_task(datadir, task_id)
    data = train + test

    # metadata
    vocab = sorted(reduce(lambda x, y: x | y, (set(list(chain.from_iterable(s)) + q + a) for s, q, a in data)))
    word_idx = dict((c, i + 1) for i, c in enumerate(vocab))

    # sizes
    max_story_size = max(map(len, (s for s, _, _ in data)))
    mean_story_size = int(np.mean([ len(s) for s, _, _ in data ]))
    sentence_size = max(map(len, chain.from_iterable(s for s, _, _ in data)))
    query_size = max(map(len, (q for _, q, _ in data)))
    memory_size = min(50, max_story_size)
    vocab_size = len(word_idx) + 1 # +1 for nil word
    sentence_size = max(query_size, sentence_size) # for the position

    # train/validation/test sets
    S, Q, A = vectorize_data(train, word_idx, sentence_size, memory_size)
    trainS, valS, trainQ, valQ, trainA, valA = cross_validation.train_test_split(S, Q, A, test_size=.1, random_state=None)
    testS, testQ, testA = vectorize_data(test, word_idx, sentence_size, memory_size)

    # params
    n_train = trainS.shape[0]
    n_test = testS.shape[0]
    n_val = valS.shape[0]

    batches = zip(range(0, n_train-batch_size, batch_size), range(batch_size, n_train, batch_size))
    batches = [(start, end) for start, end in batches]

    data = {
        'trS' : trainS,
        'trQ' : trainQ,
        'trA' : trainA,
        'teS' : testS,
        'teQ' : testQ,
        'teA' : testA,
        'vaS' : valS,
        'vaQ' : valQ,
        'vaA' : valA,
        'batches' : batches
        }


    metadata = {
            'vocab_size' : vocab_size,
            'vocab' : vocab,
            'word_idx' : word_idx,
            'sentence_size' : sentence_size,
            'memory_size' : memory_size
            }

    return data, metadata