Java 类org.apache.lucene.search.spell.DirectSpellChecker 实例源码

项目:elasticsearch_my    文件:DirectSpellcheckerSettings.java   
public DirectSpellChecker createDirectSpellChecker() {

        DirectSpellChecker directSpellChecker = new DirectSpellChecker();
        directSpellChecker.setAccuracy(accuracy());
        Comparator<SuggestWord> comparator;
        switch (sort()) {
            case SCORE:
                comparator = SCORE_COMPARATOR;
                break;
            case FREQUENCY:
                comparator = LUCENE_FREQUENCY;
                break;
            default:
                throw new IllegalArgumentException("Illegal suggest sort: " + sort());
        }
        directSpellChecker.setComparator(comparator);
        directSpellChecker.setDistance(stringDistance());
        directSpellChecker.setMaxEdits(maxEdits());
        directSpellChecker.setMaxInspections(maxInspections());
        directSpellChecker.setMaxQueryFrequency(maxTermFreq());
        directSpellChecker.setMinPrefix(prefixLength());
        directSpellChecker.setMinQueryLength(minWordLength());
        directSpellChecker.setThresholdFrequency(minDocFreq());
        directSpellChecker.setLowerCaseTerms(false);
        return directSpellChecker;
    }
项目:elasticsearch_my    文件:TermSuggester.java   
@Override
public TermSuggestion innerExecute(String name, TermSuggestionContext suggestion, IndexSearcher searcher, CharsRefBuilder spare)
        throws IOException {
    DirectSpellChecker directSpellChecker = suggestion.getDirectSpellCheckerSettings().createDirectSpellChecker();
    final IndexReader indexReader = searcher.getIndexReader();
    TermSuggestion response = new TermSuggestion(
            name, suggestion.getSize(), suggestion.getDirectSpellCheckerSettings().sort()
    );
    List<Token> tokens = queryTerms(suggestion, spare);
    for (Token token : tokens) {
        // TODO: Extend DirectSpellChecker in 4.1, to get the raw suggested words as BytesRef
        SuggestWord[] suggestedWords = directSpellChecker.suggestSimilar(
                token.term, suggestion.getShardSize(), indexReader, suggestion.getDirectSpellCheckerSettings().suggestMode()
        );
        Text key = new Text(new BytesArray(token.term.bytes()));
        TermSuggestion.Entry resultEntry = new TermSuggestion.Entry(key, token.startOffset, token.endOffset - token.startOffset);
        for (SuggestWord suggestWord : suggestedWords) {
            Text word = new Text(suggestWord.string);
            resultEntry.addOption(new TermSuggestion.Entry.Option(word, suggestWord.freq, suggestWord.score));
        }
        response.addTerm(resultEntry);
    }
    return response;
}
项目:elasticsearch_my    文件:DirectCandidateGeneratorBuilder.java   
private static StringDistance resolveDistance(String distanceVal) {
    distanceVal = distanceVal.toLowerCase(Locale.US);
    if ("internal".equals(distanceVal)) {
        return DirectSpellChecker.INTERNAL_LEVENSHTEIN;
    } else if ("damerau_levenshtein".equals(distanceVal) || "damerauLevenshtein".equals(distanceVal)) {
        return new LuceneLevenshteinDistance();
    } else if ("levenstein".equals(distanceVal)) {
        return new LevensteinDistance();
        // TODO Jaro and Winkler are 2 people - so apply same naming logic
        // as damerau_levenshtein
    } else if ("jarowinkler".equals(distanceVal)) {
        return new JaroWinklerDistance();
    } else if ("ngram".equals(distanceVal)) {
        return new NGramDistance();
    } else {
        throw new IllegalArgumentException("Illegal distance option " + distanceVal);
    }
}
项目:elasticsearch_my    文件:DirectCandidateGenerator.java   
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader,
        double nonErrorLikelihood, int numCandidates, Analyzer preFilter, Analyzer postFilter, Terms terms) throws IOException {
    if (terms == null) {
        throw new IllegalArgumentException("generator field [" + field + "] doesn't exist");
    }
    this.spellchecker = spellchecker;
    this.field = field;
    this.numCandidates = numCandidates;
    this.suggestMode = suggestMode;
    this.reader = reader;
    final long dictSize = terms.getSumTotalTermFreq();
    this.useTotalTermFrequency = dictSize != -1;
    this.dictSize =  dictSize == -1 ? reader.maxDoc() : dictSize;
    this.preFilter = preFilter;
    this.postFilter = postFilter;
    this.nonErrorLikelihood = nonErrorLikelihood;
    float thresholdFrequency = spellchecker.getThresholdFrequency();
    this.frequencyPlateau = thresholdFrequency >= 1.0f ? (int) thresholdFrequency: (int)(dictSize * thresholdFrequency);
    termsEnum = terms.iterator();
}
项目:Elasticsearch    文件:TermSuggester.java   
@Override
public TermSuggestion innerExecute(String name, TermSuggestionContext suggestion, IndexSearcher searcher, CharsRefBuilder spare) throws IOException {
    DirectSpellChecker directSpellChecker = SuggestUtils.getDirectSpellChecker(suggestion.getDirectSpellCheckerSettings());
    final IndexReader indexReader = searcher.getIndexReader();
    TermSuggestion response = new TermSuggestion(
            name, suggestion.getSize(), suggestion.getDirectSpellCheckerSettings().sort()
    );
    List<Token> tokens = queryTerms(suggestion, spare);
    for (Token token : tokens) {
        // TODO: Extend DirectSpellChecker in 4.1, to get the raw suggested words as BytesRef
        SuggestWord[] suggestedWords = directSpellChecker.suggestSimilar(
                token.term, suggestion.getShardSize(), indexReader, suggestion.getDirectSpellCheckerSettings().suggestMode()
        );
        Text key = new Text(new BytesArray(token.term.bytes()));
        TermSuggestion.Entry resultEntry = new TermSuggestion.Entry(key, token.startOffset, token.endOffset - token.startOffset);
        for (SuggestWord suggestWord : suggestedWords) {
            Text word = new Text(suggestWord.string);
            resultEntry.addOption(new TermSuggestion.Entry.Option(word, suggestWord.freq, suggestWord.score));
        }
        response.addTerm(resultEntry);
    }
    return response;
}
项目:Elasticsearch    文件:DirectCandidateGenerator.java   
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood,  int numCandidates, Analyzer preFilter, Analyzer postFilter, Terms terms) throws IOException {
    if (terms == null) {
        throw new IllegalArgumentException("generator field [" + field + "] doesn't exist");
    }
    this.spellchecker = spellchecker;
    this.field = field;
    this.numCandidates = numCandidates;
    this.suggestMode = suggestMode;
    this.reader = reader;
    final long dictSize = terms.getSumTotalTermFreq();
    this.useTotalTermFrequency = dictSize != -1;
    this.dictSize =  dictSize == -1 ? reader.maxDoc() : dictSize;
    this.preFilter = preFilter;
    this.postFilter = postFilter;
    this.nonErrorLikelihood = nonErrorLikelihood;
    float thresholdFrequency = spellchecker.getThresholdFrequency();
    this.frequencyPlateau = thresholdFrequency >= 1.0f ? (int) thresholdFrequency: (int)(dictSize * thresholdFrequency);
    termsEnum = terms.iterator();
}
项目:preDict    文件:LuceneWordSearch.java   
@Override
public void indexingDone() {
    try {
        spellChecker = new DirectSpellChecker();
        spellChecker.setMaxEdits(2);
        spellChecker.setAccuracy(0.1f);
        spellChecker.setMinPrefix(0);
        reader = DirectoryReader.open(writer);

        fuzzySuggester = new FuzzySuggester(directory, "", writer.getAnalyzer());
        Dictionary dict = new DocumentValueSourceDictionary(reader, WORD_FIELD, new LongValuesSource() {

            @Override
            public boolean needsScores() {
                return false;
            }

            @Override
            public LongValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException {
                return null;
            }
        });
        fuzzySuggester.build(dict);

        writer.close();
        searcher = new IndexSearcher(DirectoryReader.open(directory));
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}
项目:elasticsearch_my    文件:TermSuggestionBuilder.java   
@Override
public StringDistance toLucene() {
    return DirectSpellChecker.INTERNAL_LEVENSHTEIN;
}
项目:elasticsearch_my    文件:DirectCandidateGenerator.java   
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader,
        double nonErrorLikelihood, int numCandidates) throws IOException {
    this(spellchecker, field, suggestMode, reader, nonErrorLikelihood, numCandidates, null, null, MultiFields.getTerms(reader, field));
}
项目:Elasticsearch    文件:DirectCandidateGenerator.java   
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates) throws IOException {
    this(spellchecker, field, suggestMode, reader,  nonErrorLikelihood, numCandidates, null, null, MultiFields.getTerms(reader, field));
}