public DirectSpellChecker createDirectSpellChecker() { DirectSpellChecker directSpellChecker = new DirectSpellChecker(); directSpellChecker.setAccuracy(accuracy()); Comparator<SuggestWord> comparator; switch (sort()) { case SCORE: comparator = SCORE_COMPARATOR; break; case FREQUENCY: comparator = LUCENE_FREQUENCY; break; default: throw new IllegalArgumentException("Illegal suggest sort: " + sort()); } directSpellChecker.setComparator(comparator); directSpellChecker.setDistance(stringDistance()); directSpellChecker.setMaxEdits(maxEdits()); directSpellChecker.setMaxInspections(maxInspections()); directSpellChecker.setMaxQueryFrequency(maxTermFreq()); directSpellChecker.setMinPrefix(prefixLength()); directSpellChecker.setMinQueryLength(minWordLength()); directSpellChecker.setThresholdFrequency(minDocFreq()); directSpellChecker.setLowerCaseTerms(false); return directSpellChecker; }
@Override public TermSuggestion innerExecute(String name, TermSuggestionContext suggestion, IndexSearcher searcher, CharsRefBuilder spare) throws IOException { DirectSpellChecker directSpellChecker = suggestion.getDirectSpellCheckerSettings().createDirectSpellChecker(); final IndexReader indexReader = searcher.getIndexReader(); TermSuggestion response = new TermSuggestion( name, suggestion.getSize(), suggestion.getDirectSpellCheckerSettings().sort() ); List<Token> tokens = queryTerms(suggestion, spare); for (Token token : tokens) { // TODO: Extend DirectSpellChecker in 4.1, to get the raw suggested words as BytesRef SuggestWord[] suggestedWords = directSpellChecker.suggestSimilar( token.term, suggestion.getShardSize(), indexReader, suggestion.getDirectSpellCheckerSettings().suggestMode() ); Text key = new Text(new BytesArray(token.term.bytes())); TermSuggestion.Entry resultEntry = new TermSuggestion.Entry(key, token.startOffset, token.endOffset - token.startOffset); for (SuggestWord suggestWord : suggestedWords) { Text word = new Text(suggestWord.string); resultEntry.addOption(new TermSuggestion.Entry.Option(word, suggestWord.freq, suggestWord.score)); } response.addTerm(resultEntry); } return response; }
@Override public CandidateSet drawCandidates(CandidateSet set) throws IOException { Candidate original = set.originalTerm; BytesRef term = preFilter(original.term, spare, byteSpare); final long frequency = original.frequency; spellchecker.setThresholdFrequency(this.suggestMode == SuggestMode.SUGGEST_ALWAYS ? 0 : thresholdFrequency(frequency, dictSize)); SuggestWord[] suggestSimilar = spellchecker.suggestSimilar(new Term(field, term), numCandidates, reader, this.suggestMode); List<Candidate> candidates = new ArrayList<>(suggestSimilar.length); for (int i = 0; i < suggestSimilar.length; i++) { SuggestWord suggestWord = suggestSimilar[i]; BytesRef candidate = new BytesRef(suggestWord.string); postFilter(new Candidate(candidate, internalFrequency(candidate), suggestWord.score, score(suggestWord.freq, suggestWord.score, dictSize), false), spare, byteSpare, candidates); } set.addCandidates(candidates); return set; }
@Override public TermSuggestion innerExecute(String name, TermSuggestionContext suggestion, IndexSearcher searcher, CharsRefBuilder spare) throws IOException { DirectSpellChecker directSpellChecker = SuggestUtils.getDirectSpellChecker(suggestion.getDirectSpellCheckerSettings()); final IndexReader indexReader = searcher.getIndexReader(); TermSuggestion response = new TermSuggestion( name, suggestion.getSize(), suggestion.getDirectSpellCheckerSettings().sort() ); List<Token> tokens = queryTerms(suggestion, spare); for (Token token : tokens) { // TODO: Extend DirectSpellChecker in 4.1, to get the raw suggested words as BytesRef SuggestWord[] suggestedWords = directSpellChecker.suggestSimilar( token.term, suggestion.getShardSize(), indexReader, suggestion.getDirectSpellCheckerSettings().suggestMode() ); Text key = new Text(new BytesArray(token.term.bytes())); TermSuggestion.Entry resultEntry = new TermSuggestion.Entry(key, token.startOffset, token.endOffset - token.startOffset); for (SuggestWord suggestWord : suggestedWords) { Text word = new Text(suggestWord.string); resultEntry.addOption(new TermSuggestion.Entry.Option(word, suggestWord.freq, suggestWord.score)); } response.addTerm(resultEntry); } return response; }
@Test public void testComparator() throws Exception { SpellCheckComponent component = (SpellCheckComponent) h.getCore().getSearchComponent("spellcheck"); assertNotNull(component); AbstractLuceneSpellChecker spellChecker; Comparator<SuggestWord> comp; spellChecker = (AbstractLuceneSpellChecker) component.getSpellChecker("freq"); assertNotNull(spellChecker); comp = spellChecker.getSpellChecker().getComparator(); assertNotNull(comp); assertTrue(comp instanceof SuggestWordFrequencyComparator); spellChecker = (AbstractLuceneSpellChecker) component.getSpellChecker("fqcn"); assertNotNull(spellChecker); comp = spellChecker.getSpellChecker().getComparator(); assertNotNull(comp); assertTrue(comp instanceof SampleComparator); }
private List<String> getUsingSpellcheck(String searchQuery) throws IOException { SuggestWord[] suggestions = spellChecker.suggestSimilar(new Term(WORD_FIELD, searchQuery), 2, reader, SuggestMode.SUGGEST_ALWAYS); List<String> result = new ArrayList<>(); for(SuggestWord suggestion : suggestions) { result.add(suggestion.string); } return result; }
public SuggestWord[] suggest(String term, int count, String field) throws Exception { SearcherAndTaxonomy reference = data.getManager().acquire(); try { return spellChecker.suggestSimilar(new Term(field, term), count, reference.searcher.getIndexReader()); } finally { data.getManager().release(reference); } }
@Override public SpellingResult getSuggestions(SpellingOptions options) throws IOException { LOG.debug("getSuggestions: " + options.tokens); SpellingResult result = new SpellingResult(); float accuracy = (options.accuracy == Float.MIN_VALUE) ? checker.getAccuracy() : options.accuracy; for (Token token : options.tokens) { String tokenText = token.toString(); Term term = new Term(field, tokenText); int freq = options.reader.docFreq(term); int count = (options.alternativeTermCount > 0 && freq > 0) ? options.alternativeTermCount: options.count; SuggestWord[] suggestions = checker.suggestSimilar(term, count,options.reader, options.suggestMode, accuracy); result.addFrequency(token, freq); // If considering alternatives to "correctly-spelled" terms, then add the // original as a viable suggestion. if (options.alternativeTermCount > 0 && freq > 0) { boolean foundOriginal = false; SuggestWord[] suggestionsWithOrig = new SuggestWord[suggestions.length + 1]; for (int i = 0; i < suggestions.length; i++) { if (suggestions[i].string.equals(tokenText)) { foundOriginal = true; break; } suggestionsWithOrig[i + 1] = suggestions[i]; } if (!foundOriginal) { SuggestWord orig = new SuggestWord(); orig.freq = freq; orig.string = tokenText; suggestionsWithOrig[0] = orig; suggestions = suggestionsWithOrig; } } if(suggestions.length==0 && freq==0) { List<String> empty = Collections.emptyList(); result.add(token, empty); } else { for (SuggestWord suggestion : suggestions) { result.add(token, suggestion.string, suggestion.freq); } } } return result; }
@Override public int compare(SuggestWord suggestWord, SuggestWord suggestWord1) { return suggestWord.string.compareTo(suggestWord1.string); }
@Override public SpellingResult getSuggestions(SpellingOptions options) throws IOException { LOG.debug("getSuggestions: " + options.tokens); SpellingResult result = new SpellingResult(); float accuracy = (options.accuracy == Float.MIN_VALUE) ? checker.getAccuracy() : options.accuracy; for (Token token : options.tokens) { String tokenText = token.toString(); Term term = new Term(field, tokenText); int freq = options.reader.docFreq(term); int count = (options.alternativeTermCount != null && freq > 0) ? options.alternativeTermCount: options.count; SuggestWord[] suggestions = checker.suggestSimilar(term, count,options.reader, options.suggestMode, accuracy); result.addFrequency(token, freq); // If considering alternatives to "correctly-spelled" terms, then add the // original as a viable suggestion. if (options.alternativeTermCount != null && freq > 0) { boolean foundOriginal = false; SuggestWord[] suggestionsWithOrig = new SuggestWord[suggestions.length + 1]; for (int i = 0; i < suggestions.length; i++) { if (suggestions[i].string.equals(tokenText)) { foundOriginal = true; break; } suggestionsWithOrig[i + 1] = suggestions[i]; } if (!foundOriginal) { SuggestWord orig = new SuggestWord(); orig.freq = freq; orig.string = tokenText; suggestionsWithOrig[0] = orig; suggestions = suggestionsWithOrig; } } if(suggestions.length==0 && freq==0) { List<String> empty = Collections.emptyList(); result.add(token, empty); } else { for (SuggestWord suggestion : suggestions) { result.add(token, suggestion.string, suggestion.freq); } } } return result; }