public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates, Analyzer preFilter, Analyzer postFilter, Terms terms) throws IOException { if (terms == null) { throw new IllegalArgumentException("generator field [" + field + "] doesn't exist"); } this.spellchecker = spellchecker; this.field = field; this.numCandidates = numCandidates; this.suggestMode = suggestMode; this.reader = reader; final long dictSize = terms.getSumTotalTermFreq(); this.useTotalTermFrequency = dictSize != -1; this.dictSize = dictSize == -1 ? reader.maxDoc() : dictSize; this.preFilter = preFilter; this.postFilter = postFilter; this.nonErrorLikelihood = nonErrorLikelihood; float thresholdFrequency = spellchecker.getThresholdFrequency(); this.frequencyPlateau = thresholdFrequency >= 1.0f ? (int) thresholdFrequency: (int)(dictSize * thresholdFrequency); termsEnum = terms.iterator(); }
@Override public CandidateSet drawCandidates(CandidateSet set) throws IOException { Candidate original = set.originalTerm; BytesRef term = preFilter(original.term, spare, byteSpare); final long frequency = original.frequency; spellchecker.setThresholdFrequency(this.suggestMode == SuggestMode.SUGGEST_ALWAYS ? 0 : thresholdFrequency(frequency, dictSize)); SuggestWord[] suggestSimilar = spellchecker.suggestSimilar(new Term(field, term), numCandidates, reader, this.suggestMode); List<Candidate> candidates = new ArrayList<>(suggestSimilar.length); for (int i = 0; i < suggestSimilar.length; i++) { SuggestWord suggestWord = suggestSimilar[i]; BytesRef candidate = new BytesRef(suggestWord.string); postFilter(new Candidate(candidate, internalFrequency(candidate), suggestWord.score, score(suggestWord.freq, suggestWord.score, dictSize), false), spare, byteSpare, candidates); } set.addCandidates(candidates); return set; }
private List<LookupResult> getLookupResults(SpellingOptions options, Token currentToken) throws IOException { CharsRef scratch = new CharsRef(); scratch.chars = currentToken.buffer(); scratch.offset = 0; scratch.length = currentToken.length(); boolean onlyMorePopular = (options.suggestMode == SuggestMode.SUGGEST_MORE_POPULAR) && !(lookup instanceof WFSTCompletionLookup) && !(lookup instanceof AnalyzingSuggester); List<LookupResult> suggestions = lookup.lookup(scratch, onlyMorePopular, options.count); if (suggestions == null || suggestions.size() == 0) { return null; } return suggestions; }
private static SuggestMode resolveSuggestMode(String suggestMode) { suggestMode = suggestMode.toLowerCase(Locale.US); if ("missing".equals(suggestMode)) { return SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX; } else if ("popular".equals(suggestMode)) { return SuggestMode.SUGGEST_MORE_POPULAR; } else if ("always".equals(suggestMode)) { return SuggestMode.SUGGEST_ALWAYS; } else { throw new IllegalArgumentException("Illegal suggest mode " + suggestMode); } }
private List<String> getUsingSpellcheck(String searchQuery) throws IOException { SuggestWord[] suggestions = spellChecker.suggestSimilar(new Term(WORD_FIELD, searchQuery), 2, reader, SuggestMode.SUGGEST_ALWAYS); List<String> result = new ArrayList<>(); for(SuggestWord suggestion : suggestions) { result.add(suggestion.string); } return result; }
public SpellingOptions(Collection<Token> tokens, IndexReader reader, int count, SuggestMode suggestMode, boolean extendedResults, float accuracy, SolrParams customParams) { this.tokens = tokens; this.reader = reader; this.count = count; this.suggestMode = suggestMode; this.extendedResults = extendedResults; this.accuracy = accuracy; this.customParams = customParams; }
public SpellingOptions(Collection<Token> tokens, IndexReader reader, int count, int alternativeTermCount, SuggestMode suggestMode, boolean extendedResults, float accuracy, SolrParams customParams) { this.tokens = tokens; this.reader = reader; this.count = count; this.alternativeTermCount = alternativeTermCount; this.suggestMode = suggestMode; this.extendedResults = extendedResults; this.accuracy = accuracy; this.customParams = customParams; }
@Override public SpellingResult getSuggestions(SpellingOptions options) throws IOException { LOG.debug("getSuggestions: " + options.tokens); if (lookup == null) { LOG.info("Lookup is null - invoke spellchecker.build first"); return EMPTY_RESULT; } SpellingResult res = new SpellingResult(); CharsRef scratch = new CharsRef(); for (Token t : options.tokens) { scratch.chars = t.buffer(); scratch.offset = 0; scratch.length = t.length(); boolean onlyMorePopular = (options.suggestMode == SuggestMode.SUGGEST_MORE_POPULAR) && !(lookup instanceof WFSTCompletionLookup) && !(lookup instanceof AnalyzingSuggester); List<LookupResult> suggestions = lookup.lookup(scratch, onlyMorePopular, options.count); if (suggestions == null) { continue; } if (options.suggestMode != SuggestMode.SUGGEST_MORE_POPULAR) { Collections.sort(suggestions); } for (LookupResult lr : suggestions) { res.add(t, lr.key.toString(), (int)lr.value); } } return res; }
public SpellingOptions(Collection<Token> tokens, IndexReader reader, int count, Integer alternativeTermCount, SuggestMode suggestMode, boolean extendedResults, float accuracy, SolrParams customParams) { this.tokens = tokens; this.reader = reader; this.count = count; this.alternativeTermCount = alternativeTermCount; this.suggestMode = suggestMode; this.extendedResults = extendedResults; this.accuracy = accuracy; this.customParams = customParams; }
public SuggestMode suggestMode() { return suggestMode; }
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates) throws IOException { this(spellchecker, field, suggestMode, reader, nonErrorLikelihood, numCandidates, null, null, MultiFields.getTerms(reader, field)); }
/** * <p> * Generate suggestions by breaking the passed-in term into multiple words. * The scores returned are equal to the number of word breaks needed so a * lower score is generally preferred over a higher score. * </p> * * @param suggestMode * - default = {@link SuggestMode#SUGGEST_WHEN_NOT_IN_INDEX} * @param sortMethod * - default = * {@link BreakSuggestionSortMethod#NUM_CHANGES_THEN_MAX_FREQUENCY} * @return one or more arrays of words formed by breaking up the original term * @throws IOException If there is a low-level I/O error. */ public SuggestWord[][] suggestWordBreaks(Term term, int maxSuggestions, IndexReader ir, SuggestMode suggestMode, BreakSuggestionSortMethod sortMethod) throws IOException { if (maxSuggestions < 1) { return new SuggestWord[0][0]; } if (suggestMode == null) { suggestMode = SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX; } if (sortMethod == null) { sortMethod = BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY; } int queueInitialCapacity = maxSuggestions > 10 ? 10 : maxSuggestions; Comparator<SuggestWordArrayWrapper> queueComparator = sortMethod == BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY ? new LengthThenMaxFreqComparator() : new LengthThenSumFreqComparator(); Queue<SuggestWordArrayWrapper> suggestions = new PriorityQueue<>( queueInitialCapacity, queueComparator); int origFreq = ir.docFreq(term); if (origFreq > 0 && suggestMode == SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX) { return new SuggestWord[0][]; } int useMinSuggestionFrequency = minSuggestionFrequency; if (suggestMode == SuggestMode.SUGGEST_MORE_POPULAR) { useMinSuggestionFrequency = (origFreq == 0 ? 1 : origFreq); } generateBreakUpSuggestions(term, ir, 1, maxSuggestions, useMinSuggestionFrequency, new SuggestWord[0], suggestions, 0, sortMethod); SuggestWord[][] suggestionArray = new SuggestWord[suggestions.size()][]; for (int i = suggestions.size() - 1; i >= 0; i--) { suggestionArray[i] = suggestions.remove().suggestWords; } return suggestionArray; }
@Test public void testExtendedResults() throws Exception { IndexBasedSpellChecker checker = new IndexBasedSpellChecker(); NamedList spellchecker = new NamedList(); spellchecker.add("classname", IndexBasedSpellChecker.class.getName()); File indexDir = createTempDir(); indexDir.mkdirs(); spellchecker.add(AbstractLuceneSpellChecker.INDEX_DIR, indexDir.getAbsolutePath()); spellchecker.add(AbstractLuceneSpellChecker.FIELD, "title"); spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME, spellchecker); SolrCore core = h.getCore(); String dictName = checker.init(spellchecker, core); assertTrue(dictName + " is not equal to " + SolrSpellChecker.DEFAULT_DICTIONARY_NAME, dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) == true); RefCounted<SolrIndexSearcher> holder = core.getSearcher(); SolrIndexSearcher searcher = holder.get(); try { checker.build(core, searcher); IndexReader reader = searcher.getIndexReader(); Collection<Token> tokens = queryConverter.convert("documemt"); SpellingOptions spellOpts = new SpellingOptions(tokens, reader, 1, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, true, 0.5f, null); SpellingResult result = checker.getSuggestions(spellOpts); assertTrue("result is null and it shouldn't be", result != null); //should be lowercased, b/c we are using a lowercasing analyzer Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next()); assertTrue("documemt is null and it shouldn't be", suggestions != null); assertTrue("documemt Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1); Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next(); assertTrue(entry.getKey() + " is not equal to " + "document", entry.getKey().equals("document") == true); assertTrue(entry.getValue() + " does not equal: " + 2, entry.getValue() == 2); //test something not in the spell checker spellOpts.tokens = queryConverter.convert("super"); result = checker.getSuggestions(spellOpts); assertTrue("result is null and it shouldn't be", result != null); suggestions = result.get(spellOpts.tokens.iterator().next()); assertTrue("suggestions size should be 0", suggestions.size()==0); spellOpts.tokens = queryConverter.convert("document"); result = checker.getSuggestions(spellOpts); assertTrue("result is null and it shouldn't be", result != null); suggestions = result.get(spellOpts.tokens.iterator().next()); assertTrue("suggestions is not null and it should be", suggestions == null); } finally { holder.decref(); } }
@Test public void testAlternateLocation() throws Exception { String[] ALT_DOCS = new String[]{ "jumpin jack flash", "Sargent Peppers Lonely Hearts Club Band", "Born to Run", "Thunder Road", "Londons Burning", "A Horse with No Name", "Sweet Caroline" }; IndexBasedSpellChecker checker = new IndexBasedSpellChecker(); NamedList spellchecker = new NamedList(); spellchecker.add("classname", IndexBasedSpellChecker.class.getName()); File tmpDir = createTempDir(); File indexDir = new File(tmpDir, "spellingIdx"); //create a standalone index File altIndexDir = new File(tmpDir, "alternateIdx" + new Date().getTime()); Directory dir = newFSDirectory(altIndexDir); IndexWriter iw = new IndexWriter( dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer()) ); for (int i = 0; i < ALT_DOCS.length; i++) { Document doc = new Document(); doc.add(new TextField("title", ALT_DOCS[i], Field.Store.YES)); iw.addDocument(doc); } iw.forceMerge(1); iw.close(); dir.close(); indexDir.mkdirs(); spellchecker.add(AbstractLuceneSpellChecker.INDEX_DIR, indexDir.getAbsolutePath()); spellchecker.add(AbstractLuceneSpellChecker.LOCATION, altIndexDir.getAbsolutePath()); spellchecker.add(AbstractLuceneSpellChecker.FIELD, "title"); spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME, spellchecker); SolrCore core = h.getCore(); String dictName = checker.init(spellchecker, core); assertTrue(dictName + " is not equal to " + SolrSpellChecker.DEFAULT_DICTIONARY_NAME, dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) == true); RefCounted<SolrIndexSearcher> holder = core.getSearcher(); SolrIndexSearcher searcher = holder.get(); try { checker.build(core, searcher); IndexReader reader = searcher.getIndexReader(); Collection<Token> tokens = queryConverter.convert("flesh"); SpellingOptions spellOpts = new SpellingOptions(tokens, reader, 1, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, true, 0.5f, null); SpellingResult result = checker.getSuggestions(spellOpts); assertTrue("result is null and it shouldn't be", result != null); //should be lowercased, b/c we are using a lowercasing analyzer Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next()); assertTrue("flesh is null and it shouldn't be", suggestions != null); assertTrue("flesh Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1); Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next(); assertTrue(entry.getKey() + " is not equal to " + "flash", entry.getKey().equals("flash") == true); assertTrue(entry.getValue() + " does not equal: " + 1, entry.getValue() == 1); //test something not in the spell checker spellOpts.tokens = queryConverter.convert("super"); result = checker.getSuggestions(spellOpts); assertTrue("result is null and it shouldn't be", result != null); suggestions = result.get(spellOpts.tokens.iterator().next()); assertTrue("suggestions size should be 0", suggestions.size()==0); spellOpts.tokens = queryConverter.convert("Caroline"); result = checker.getSuggestions(spellOpts); assertTrue("result is null and it shouldn't be", result != null); suggestions = result.get(spellOpts.tokens.iterator().next()); assertTrue("suggestions is not null and it should be", suggestions == null); } finally { holder.decref(); } }
@Override @SuppressWarnings("unchecked") public void process(ResponseBuilder rb) throws IOException { SolrParams params = rb.req.getParams(); if (!params.getBool(COMPONENT_NAME, false) || spellCheckers.isEmpty()) { return; } boolean shardRequest = "true".equals(params.get(ShardParams.IS_SHARD)); String q = params.get(SPELLCHECK_Q); SolrSpellChecker spellChecker = getSpellChecker(params); Collection<Token> tokens = null; if (q == null) { // enforce useage of the spellcheck.q parameter - i.e. a query we can tokenize with a regular tokenizer and not // a solr query for the spell checking. Useage of the SolrQueryConverter is buggy and breaks frequently throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "The spellcheck.q parameter is required."); } else { //we have a spell check param, tokenize it with the query analyzer applicable for this spellchecker tokens = getTokens(q, spellChecker.getQueryAnalyzer()); } if (tokens != null && tokens.isEmpty() == false) { if (spellChecker != null) { int count = params.getInt(SPELLCHECK_COUNT, 1); boolean onlyMorePopular = params.getBool(SPELLCHECK_ONLY_MORE_POPULAR, DEFAULT_ONLY_MORE_POPULAR); boolean extendedResults = params.getBool(SPELLCHECK_EXTENDED_RESULTS, false); boolean collate = params.getBool(SPELLCHECK_COLLATE, false); float accuracy = params.getFloat(SPELLCHECK_ACCURACY, Float.MIN_VALUE); Integer alternativeTermCount = params.getInt(SpellingParams.SPELLCHECK_ALTERNATIVE_TERM_COUNT); Integer maxResultsForSuggest = params.getInt(SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST); ModifiableSolrParams customParams = new ModifiableSolrParams(); for (String checkerName : getDictionaryNames(params)) { customParams.add(getCustomParams(checkerName, params)); } Integer hitsInteger = (Integer) rb.rsp.getToLog().get("hits"); long hits = 0; if (hitsInteger == null) { hits = rb.getNumberDocumentsFound(); } else { hits = hitsInteger.longValue(); } SpellingResult spellingResult = null; if (maxResultsForSuggest == null || hits <= maxResultsForSuggest) { SuggestMode suggestMode = SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX; if (onlyMorePopular) { suggestMode = SuggestMode.SUGGEST_MORE_POPULAR; } else if (alternativeTermCount != null) { suggestMode = SuggestMode.SUGGEST_ALWAYS; } IndexReader reader = rb.req.getSearcher().getIndexReader(); SpellingOptions options = new SpellingOptions(tokens, reader, count, alternativeTermCount, suggestMode, extendedResults, accuracy, customParams); spellingResult = spellChecker.getSuggestions(options); } else { spellingResult = new SpellingResult(); } boolean isCorrectlySpelled = hits > (maxResultsForSuggest==null ? 0 : maxResultsForSuggest); NamedList suggestions = toNamedList(shardRequest, spellingResult, q, extendedResults, collate, isCorrectlySpelled); if (collate) { ModifiableSolrParams modParams = new ModifiableSolrParams(params); // SH: having both spellcheck.q and q set screws up collations for some queries, such as "java develope" modParams.remove(CommonParams.Q); //SH: Note that the collator runs a query against the DF specified field. Ideally it should //run the query against the spellchecker field but that's inaccessible here addCollationsToResponse(modParams, spellingResult, rb, q, suggestions, spellChecker.isSuggestionsMayOverlap()); } NamedList response = new SimpleOrderedMap(); response.add("suggestions", suggestions); rb.rsp.add("spellcheck", response); } else { throw new SolrException(SolrException.ErrorCode.NOT_FOUND, "Specified dictionaries do not exist: " + getDictionaryNameAsSingleString(getDictionaryNames(params))); } } }
@Override public SpellingResult getSuggestions(SpellingOptions options) throws IOException { LOG.debug("getSuggestions: " + options.tokens); if (lookup == null) { LOG.info("Lookup is null - invoke spellchecker.build first"); return EMPTY_RESULT; } SpellingResult res = new SpellingResult(); CharsRef scratch = new CharsRef(); for (Token currentToken : options.tokens) { scratch.chars = currentToken.buffer(); scratch.offset = 0; scratch.length = currentToken.length(); boolean onlyMorePopular = (options.suggestMode == SuggestMode.SUGGEST_MORE_POPULAR) && !(lookup instanceof WFSTCompletionLookup) && !(lookup instanceof AnalyzingSuggester); // get more than the requested suggestions as a lot get collapsed by the corrections List<LookupResult> suggestions = lookup.lookup(scratch, onlyMorePopular, options.count * 10); if (suggestions == null || suggestions.size() == 0) { continue; } if (options.suggestMode != SuggestMode.SUGGEST_MORE_POPULAR) { Collections.sort(suggestions); } final LinkedHashMap<String, Integer> lhm = new LinkedHashMap<String, Integer>(); for (LookupResult lr : suggestions) { String suggestion = lr.key.toString(); if(this.suggestionAnalyzer != null) { String correction = getAnalyzerResult(suggestion); // multiple could map to the same, so don't repeat suggestions if(!isStringNullOrEmpty(correction)){ if(lhm.containsKey(correction)){ lhm.put(correction, lhm.get(correction) + (int) lr.value); } else { lhm.put(correction, (int) lr.value); } } } else { lhm.put(suggestion, (int) lr.value); } if(lhm.size() >= options.count){ break; } } // sort by new doc frequency Map<String, Integer> orderedMap = null; if (options.suggestMode != SuggestMode.SUGGEST_MORE_POPULAR){ // retain the sort order from above orderedMap = lhm; } else { orderedMap = new TreeMap<String, Integer>(new Comparator<String>() { @Override public int compare(String s1, String s2) { return lhm.get(s2).compareTo(lhm.get(s1)); } }); orderedMap.putAll(lhm); } for(Map.Entry<String, Integer> entry: orderedMap.entrySet()){ res.add(currentToken, entry.getKey(), entry.getValue()); } } return res; }
@Override public SpellingResult getSuggestions(SpellingOptions options) throws IOException { LOG.debug("getSuggestions: " + options.tokens); if (lookup == null) { LOG.info("Lookup is null - invoke spellchecker.build first"); return EMPTY_RESULT; } SpellingResult res = new SpellingResult(); for (Token currentToken : options.tokens) { String tokenText = currentToken.toString(); // we need to ensure that we combine matches for different cases, and take the most common // where multiple case versions exist final Hashtable<String, LookupResult> htSuggestions = new Hashtable<String, LookupResult>(); final Hashtable<String, Integer> htSuggestionCounts = new Hashtable<String, Integer>(); for(String sToken: generateCaseVariants(tokenText)){ Token newToken = newToken(currentToken, sToken); List<LookupResult> tmpSuggestions = getLookupResults(options, newToken); if(tmpSuggestions != null){ for(LookupResult lu: tmpSuggestions) { final String key = lu.key.toString().toLowerCase(); LookupResult existing = htSuggestions.get(key); if(existing != null) { // replace if more frequent if (lu.value > existing.value) { htSuggestions.put(key, lu); } htSuggestionCounts.put(key, htSuggestionCounts.get(key) + (int)lu.value); } else{ htSuggestions.put(key, lu); htSuggestionCounts.put(key, (int)lu.value); } } } } List<String> suggestions = new ArrayList<String>(htSuggestions.keySet()); if (options.suggestMode != SuggestMode.SUGGEST_MORE_POPULAR) { Collections.sort(suggestions); } else{ Collections.sort(suggestions, new Comparator<String>() { public int compare(String sug1, String sug2) { int sug1Count = htSuggestionCounts.get(sug1); int sug2Count = htSuggestionCounts.get(sug2); return sug2Count - sug1Count; } }); } for (String match : suggestions) { LookupResult lr = htSuggestions.get(match); res.add(currentToken, lr.key.toString(), (int)lr.value); } } return res; }
/** * <p> * Generate suggestions by breaking the passed-in term into multiple words. * The scores returned are equal to the number of word breaks needed so a * lower score is generally preferred over a higher score. * </p> * * @param suggestMode * - default = {@link SuggestMode#SUGGEST_WHEN_NOT_IN_INDEX} * @param sortMethod * - default = * {@link BreakSuggestionSortMethod#NUM_CHANGES_THEN_MAX_FREQUENCY} * @return one or more arrays of words formed by breaking up the original term * @throws IOException If there is a low-level I/O error. */ public SuggestWord[][] suggestWordBreaks(Term term, int maxSuggestions, IndexReader ir, SuggestMode suggestMode, BreakSuggestionSortMethod sortMethod) throws IOException { if (maxSuggestions < 1) { return new SuggestWord[0][0]; } if (suggestMode == null) { suggestMode = SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX; } if (sortMethod == null) { sortMethod = BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY; } int queueInitialCapacity = maxSuggestions > 10 ? 10 : maxSuggestions; Comparator<SuggestWordArrayWrapper> queueComparator = sortMethod == BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY ? new LengthThenMaxFreqComparator() : new LengthThenSumFreqComparator(); Queue<SuggestWordArrayWrapper> suggestions = new PriorityQueue<SuggestWordArrayWrapper>( queueInitialCapacity, queueComparator); int origFreq = ir.docFreq(term); if (origFreq > 0 && suggestMode == SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX) { return new SuggestWord[0][]; } int useMinSuggestionFrequency = minSuggestionFrequency; if (suggestMode == SuggestMode.SUGGEST_MORE_POPULAR) { useMinSuggestionFrequency = (origFreq == 0 ? 1 : origFreq); } generateBreakUpSuggestions(term, ir, 1, maxSuggestions, useMinSuggestionFrequency, new SuggestWord[0], suggestions, 0, sortMethod); SuggestWord[][] suggestionArray = new SuggestWord[suggestions.size()][]; for (int i = suggestions.size() - 1; i >= 0; i--) { suggestionArray[i] = suggestions.remove().suggestWords; } return suggestionArray; }
@Override @SuppressWarnings("unchecked") public void process(ResponseBuilder rb) throws IOException { SolrParams params = rb.req.getParams(); if (!params.getBool(COMPONENT_NAME, false) || spellCheckers.isEmpty()) { return; } boolean shardRequest = "true".equals(params.get(ShardParams.IS_SHARD)); String q = params.get(SPELLCHECK_Q); SolrSpellChecker spellChecker = getSpellChecker(params); Collection<Token> tokens = null; if (q != null) { //we have a spell check param, tokenize it with the query analyzer applicable for this spellchecker tokens = getTokens(q, spellChecker.getQueryAnalyzer()); } else { q = rb.getQueryString(); if (q == null) { q = params.get(CommonParams.Q); } tokens = queryConverter.convert(q); } if (tokens != null && tokens.isEmpty() == false) { if (spellChecker != null) { int count = params.getInt(SPELLCHECK_COUNT, 1); boolean onlyMorePopular = params.getBool(SPELLCHECK_ONLY_MORE_POPULAR, DEFAULT_ONLY_MORE_POPULAR); boolean extendedResults = params.getBool(SPELLCHECK_EXTENDED_RESULTS, false); boolean collate = params.getBool(SPELLCHECK_COLLATE, false); float accuracy = params.getFloat(SPELLCHECK_ACCURACY, Float.MIN_VALUE); Integer alternativeTermCount = params.getInt(SpellingParams.SPELLCHECK_ALTERNATIVE_TERM_COUNT); Integer maxResultsForSuggest = params.getInt(SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST); ModifiableSolrParams customParams = new ModifiableSolrParams(); for (String checkerName : getDictionaryNames(params)) { customParams.add(getCustomParams(checkerName, params)); } Integer hitsInteger = (Integer) rb.rsp.getToLog().get("hits"); long hits = 0; if (hitsInteger == null) { hits = rb.getNumberDocumentsFound(); } else { hits = hitsInteger.longValue(); } SpellingResult spellingResult = null; if (maxResultsForSuggest == null || hits <= maxResultsForSuggest) { SuggestMode suggestMode = SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX; if (onlyMorePopular) { suggestMode = SuggestMode.SUGGEST_MORE_POPULAR; } else if (alternativeTermCount != null) { suggestMode = SuggestMode.SUGGEST_ALWAYS; } IndexReader reader = rb.req.getSearcher().getIndexReader(); SpellingOptions options = new SpellingOptions(tokens, reader, count, alternativeTermCount, suggestMode, extendedResults, accuracy, customParams); spellingResult = spellChecker.getSuggestions(options); } else { spellingResult = new SpellingResult(); } boolean isCorrectlySpelled = hits > (maxResultsForSuggest==null ? 0 : maxResultsForSuggest); NamedList suggestions = toNamedList(shardRequest, spellingResult, q, extendedResults, collate, isCorrectlySpelled); if (collate) { addCollationsToResponse(params, spellingResult, rb, q, suggestions, spellChecker.isSuggestionsMayOverlap()); } NamedList response = new SimpleOrderedMap(); response.add("suggestions", suggestions); rb.rsp.add("spellcheck", response); } else { throw new SolrException(SolrException.ErrorCode.NOT_FOUND, "Specified dictionaries do not exist: " + getDictionaryNameAsSingleString(getDictionaryNames(params))); } } }
@Test public void testExtendedResults() throws Exception { IndexBasedSpellChecker checker = new IndexBasedSpellChecker(); NamedList spellchecker = new NamedList(); spellchecker.add("classname", IndexBasedSpellChecker.class.getName()); File indexDir = new File(TEMP_DIR, "spellingIdx" + new Date().getTime()); indexDir.mkdirs(); spellchecker.add(AbstractLuceneSpellChecker.INDEX_DIR, indexDir.getAbsolutePath()); spellchecker.add(AbstractLuceneSpellChecker.FIELD, "title"); spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME, spellchecker); SolrCore core = h.getCore(); String dictName = checker.init(spellchecker, core); assertTrue(dictName + " is not equal to " + SolrSpellChecker.DEFAULT_DICTIONARY_NAME, dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) == true); RefCounted<SolrIndexSearcher> holder = core.getSearcher(); SolrIndexSearcher searcher = holder.get(); try { checker.build(core, searcher); IndexReader reader = searcher.getIndexReader(); Collection<Token> tokens = queryConverter.convert("documemt"); SpellingOptions spellOpts = new SpellingOptions(tokens, reader, 1, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, true, 0.5f, null); SpellingResult result = checker.getSuggestions(spellOpts); assertTrue("result is null and it shouldn't be", result != null); //should be lowercased, b/c we are using a lowercasing analyzer Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next()); assertTrue("documemt is null and it shouldn't be", suggestions != null); assertTrue("documemt Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1); Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next(); assertTrue(entry.getKey() + " is not equal to " + "document", entry.getKey().equals("document") == true); assertTrue(entry.getValue() + " does not equal: " + 2, entry.getValue() == 2); //test something not in the spell checker spellOpts.tokens = queryConverter.convert("super"); result = checker.getSuggestions(spellOpts); assertTrue("result is null and it shouldn't be", result != null); suggestions = result.get(spellOpts.tokens.iterator().next()); assertTrue("suggestions size should be 0", suggestions.size()==0); spellOpts.tokens = queryConverter.convert("document"); result = checker.getSuggestions(spellOpts); assertTrue("result is null and it shouldn't be", result != null); suggestions = result.get(spellOpts.tokens.iterator().next()); assertTrue("suggestions is not null and it should be", suggestions == null); } finally { holder.decref(); } }
@Test public void testAlternateLocation() throws Exception { String[] ALT_DOCS = new String[]{ "jumpin jack flash", "Sargent Peppers Lonely Hearts Club Band", "Born to Run", "Thunder Road", "Londons Burning", "A Horse with No Name", "Sweet Caroline" }; IndexBasedSpellChecker checker = new IndexBasedSpellChecker(); NamedList spellchecker = new NamedList(); spellchecker.add("classname", IndexBasedSpellChecker.class.getName()); File indexDir = new File(TEMP_DIR, "spellingIdx" + new Date().getTime()); //create a standalone index File altIndexDir = new File(TEMP_DIR, "alternateIdx" + new Date().getTime()); Directory dir = newFSDirectory(altIndexDir); IndexWriter iw = new IndexWriter( dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)) ); for (int i = 0; i < ALT_DOCS.length; i++) { Document doc = new Document(); doc.add(new TextField("title", ALT_DOCS[i], Field.Store.YES)); iw.addDocument(doc); } iw.forceMerge(1); iw.close(); dir.close(); indexDir.mkdirs(); spellchecker.add(AbstractLuceneSpellChecker.INDEX_DIR, indexDir.getAbsolutePath()); spellchecker.add(AbstractLuceneSpellChecker.LOCATION, altIndexDir.getAbsolutePath()); spellchecker.add(AbstractLuceneSpellChecker.FIELD, "title"); spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME, spellchecker); SolrCore core = h.getCore(); String dictName = checker.init(spellchecker, core); assertTrue(dictName + " is not equal to " + SolrSpellChecker.DEFAULT_DICTIONARY_NAME, dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) == true); RefCounted<SolrIndexSearcher> holder = core.getSearcher(); SolrIndexSearcher searcher = holder.get(); try { checker.build(core, searcher); IndexReader reader = searcher.getIndexReader(); Collection<Token> tokens = queryConverter.convert("flesh"); SpellingOptions spellOpts = new SpellingOptions(tokens, reader, 1, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, true, 0.5f, null); SpellingResult result = checker.getSuggestions(spellOpts); assertTrue("result is null and it shouldn't be", result != null); //should be lowercased, b/c we are using a lowercasing analyzer Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next()); assertTrue("flesh is null and it shouldn't be", suggestions != null); assertTrue("flesh Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1); Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next(); assertTrue(entry.getKey() + " is not equal to " + "flash", entry.getKey().equals("flash") == true); assertTrue(entry.getValue() + " does not equal: " + 1, entry.getValue() == 1); //test something not in the spell checker spellOpts.tokens = queryConverter.convert("super"); result = checker.getSuggestions(spellOpts); assertTrue("result is null and it shouldn't be", result != null); suggestions = result.get(spellOpts.tokens.iterator().next()); assertTrue("suggestions size should be 0", suggestions.size()==0); spellOpts.tokens = queryConverter.convert("Caroline"); result = checker.getSuggestions(spellOpts); assertTrue("result is null and it shouldn't be", result != null); suggestions = result.get(spellOpts.tokens.iterator().next()); assertTrue("suggestions is not null and it should be", suggestions == null); } finally { holder.decref(); } }