Python nltk 模块,ConditionalFreqDist() 实例源码

我们从Python开源项目中,提取了以下5个代码示例,用于说明如何使用nltk.ConditionalFreqDist()

项目:OpinionMining728    作者:stasi009    | 项目源码 | 文件源码
def statistics_by_aspect():
    filename = "aspects_train.csv"
    words_dist = nltk.ConditionalFreqDist()
    sample_sizes = nltk.FreqDist()

    samples_stream = get_samples_stream(filename)
    for aspect,words in samples_stream:
        sample_sizes[aspect] += 1
        for word in words:
            words_dist[aspect][word] += 1

    for category,dist in words_dist.iteritems():
        print "\n------- Category: {}".format(category)
        print dist.most_common(20)

    total_samples = sample_sizes.N()
    print "\ntotally {} samples".format(total_samples)
    for aspect, count in sample_sizes.iteritems():
        print "aspect[{}] has {} samples, {:.2f}%".format(aspect,count, count*100.0/total_samples)
项目:wntf    作者:tonybaloney    | 项目源码 | 文件源码
def findtags(self, tag_prefix, tagged_text):
        '''
        Find all words that match a 'tag' (word type) prefix

        :param tag_prefix: The tag prefix
        :type  tag_prefix: ``str``

        :param tagged_text: The text to search
        :type  tagged_text: ``list`` of ``dict``
        '''
        cfd = nltk.ConditionalFreqDist((tag, word) for (word, tag) in tagged_text
            if tag.startswith(tag_prefix))
        return dict((tag, cfd[tag].most_common(50)) for tag in cfd.conditions())
项目:Neural-Learner-for-English-Language-Test    作者:taineleau    | 项目源码 | 文件源码
def ngram_baseline(text):
    ngs = ngrams(text, 2)
    cnt = 0
    """
    for t in ngs:
        print(t, )
        cnt = cnt + 1
        if (cnt > 1000):
            break
    """
    refine = []
    for (first, second) in ngs:
        if (second[1] == 1):
            #print(first[0], second[0], zip(first[0], second[0]))
            #tmp = (first[0], second[0])
            #print(tmp)
            #break
            refine.append((first[0], second[0]))
    cnt = 0
    """
    for t in refine:
        print(t)
        cnt = cnt + 1
        if (cnt > 1000):
            break
    #print(ngs)
    """
    cfdist = nltk.ConditionalFreqDist(refine)
    return cfdist
项目:ircbot    作者:pbzweihander    | 项目源码 | 文件源码
def calc_cfd(doc):
    # Calculate conditional frequency distribution of bigrams
    words = [w for w, t in Mecab().pos(doc)]
    bigrams = nltk.bigrams(words)
    return nltk.ConditionalFreqDist(bigrams)
项目:facebook-message-analysis    作者:szheng17    | 项目源码 | 文件源码
def generate_from_trigrams(lm, start_words, n_words):
        """
        backoff model
        start_words: list of two strings.
        n_words: integer >= 0, number of words to generate, not including start_words
        lm: lowercase_tokens must be nonempty
        """
        # Create probability maps
        trigram_counter = Counter(ngrams(lm.lowercase_tokens, 3))
        trigram_prob = trigram_prob_map(trigram_counter)
        bigram_cfd = nltk.ConditionalFreqDist(ngrams(lm.lowercase_tokens, 2))
        bigram_prob = bigram_prob_map(bigram_cfd)
        unigram_counter = Counter(lm.lowercase_tokens)
        unigram_prob = unigram_prob_map(unigram_counter)

        # Build sentence
        w1, w2 = start_words[0], start_words[1]
        words = [w1, w2]
        for i in range(n_words):
            # Use trigram
            if (w1, w2) in trigram_prob:
                prob_map = trigram_prob[(w1, w2)]
                next_words = prob_map.keys()
                next_word = choice(next_words, p=[prob_map[w] for w in next_words])

            # Use bigram
            elif w2 in bigram_prob:
                prob_map = bigram_prob[w2]
                next_words = prob_map.keys()
                next_word = choice(next_words, p=[prob_map[w] for w in next_words])

            # Use unigram
            else:
                prob_map = unigram_prob
                next_words = prob_map.keys()
                next_word = choice(next_words, p=[prob_map[w] for w in next_words])


            # Update words
            w1 = w2
            w2 = next_word
            words.append(w2)
        sentence = ' '.join(words)
        return sentence