@Test public void testEmptyReader() throws IOException { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())); iwc.setMergePolicy(newLogMergePolicy()); // Make sure the index is created? RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); writer.commit(); writer.close(); IndexReader ir = DirectoryReader.open(dir); Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME); InputIterator inputIterator = dictionary.getEntryIterator(); assertNull(inputIterator.next()); assertEquals(inputIterator.weight(), 0); assertNull(inputIterator.payload()); ir.close(); dir.close(); }
@Test public void testEmptyReader() throws IOException { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())); iwc.setMergePolicy(newLogMergePolicy()); // Make sure the index is created? RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); writer.commit(); writer.close(); IndexReader ir = DirectoryReader.open(dir); Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new DoubleConstValueSource(10), PAYLOAD_FIELD_NAME); InputIterator inputIterator = dictionary.getEntryIterator(); assertNull(inputIterator.next()); assertEquals(inputIterator.weight(), 0); assertNull(inputIterator.payload()); ir.close(); dir.close(); }
@Override public Dictionary create(SolrCore core, SolrIndexSearcher searcher) { if (params == null) { // should not happen; implies setParams was not called throw new IllegalStateException("Value of params not set"); } String sourceLocation = (String)params.get(Suggester.LOCATION); if (sourceLocation == null) { throw new IllegalArgumentException(Suggester.LOCATION + " parameter is mandatory for using FileDictionary"); } String fieldDelimiter = (params.get(FIELD_DELIMITER) != null) ? (String) params.get(FIELD_DELIMITER) : FileDictionary.DEFAULT_FIELD_DELIMITER; try { return new FileDictionary(new InputStreamReader( core.getResourceLoader().openResource(sourceLocation), StandardCharsets.UTF_8), fieldDelimiter); } catch (IOException e) { throw new RuntimeException(); } }
@Override public Dictionary create(SolrCore core, SolrIndexSearcher searcher) { if(params == null) { // should not happen; implies setParams was not called throw new IllegalStateException("Value of params not set"); } String field = (String) params.get(FIELD); String weightField = (String) params.get(WEIGHT_FIELD); String payloadField = (String) params.get(PAYLOAD_FIELD); if (field == null) { throw new IllegalArgumentException(FIELD + " is a mandatory parameter"); } if (weightField == null) { throw new IllegalArgumentException(WEIGHT_FIELD + " is a mandatory parameter"); } return new DocumentDictionary(searcher.getIndexReader(), field, weightField, payloadField); }
@Override public Dictionary create(SolrCore core, SolrIndexSearcher searcher) { if(params == null) { // should not happen; implies setParams was not called throw new IllegalStateException("Value of params not set"); } String field = (String)params.get(SolrSpellChecker.FIELD); if (field == null) { throw new IllegalArgumentException(SolrSpellChecker.FIELD + " is a mandatory parameter"); } float threshold = params.get(THRESHOLD_TOKEN_FREQUENCY) == null ? 0.0f : (Float)params.get(THRESHOLD_TOKEN_FREQUENCY); return new HighFrequencyDictionary(searcher.getIndexReader(), field, threshold); }
@Override public Dictionary create(SolrCore core, SolrIndexSearcher searcher) { if (params == null) { // should not happen; implies setParams was not called throw new IllegalStateException("Value of params not set"); } String sourceLocation = (String)params.get(Suggester.LOCATION); if (sourceLocation == null) { throw new IllegalArgumentException(Suggester.LOCATION + " parameter is mandatory for using FileDictionary"); } String fieldDelimiter = (params.get(FIELD_DELIMITER) != null) ? (String) params.get(FIELD_DELIMITER) : FileDictionary.DEFAULT_FIELD_DELIMITER; try { return new FileDictionary(new InputStreamReader( core.getResourceLoader().openResource(sourceLocation), IOUtils.CHARSET_UTF_8), fieldDelimiter); } catch (IOException e) { throw new RuntimeException(); } }
@Override public void indexingDone() { try { spellChecker = new DirectSpellChecker(); spellChecker.setMaxEdits(2); spellChecker.setAccuracy(0.1f); spellChecker.setMinPrefix(0); reader = DirectoryReader.open(writer); fuzzySuggester = new FuzzySuggester(directory, "", writer.getAnalyzer()); Dictionary dict = new DocumentValueSourceDictionary(reader, WORD_FIELD, new LongValuesSource() { @Override public boolean needsScores() { return false; } @Override public LongValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException { return null; } }); fuzzySuggester.build(dict); writer.close(); searcher = new IndexSearcher(DirectoryReader.open(directory)); } catch (IOException e) { throw new RuntimeException(e); } }
@NotNull private static SpellChecker createIndexSpellchecker(@NotNull final Directory index) throws IOException { final Directory spellCheckerDirectory = new RAMDirectory(); final IndexReader indexReader = DirectoryReader.open(index); final Analyzer analyzer = new SimpleAnalyzer(); final IndexWriterConfig config = new IndexWriterConfig(analyzer); final Dictionary dictionary = new HighFrequencyDictionary(indexReader, DRUG_TERMS_FIELD, 0.0f); final SpellChecker spellChecker = new SpellChecker(spellCheckerDirectory); spellChecker.indexDictionary(dictionary, config, false); spellChecker.setAccuracy(SPELLCHECK_ACCURACY); return spellChecker; }
public void testEmpty() throws Exception { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); writer.commit(); writer.close(); IndexReader ir = DirectoryReader.open(dir); Dictionary dictionary = new HighFrequencyDictionary(ir, "bogus", 0.1f); BytesRefIterator tf = dictionary.getEntryIterator(); assertNull(tf.getComparator()); assertNull(tf.next()); dir.close(); }
@Override public Dictionary create(SolrCore core, SolrIndexSearcher searcher) { if(params == null) { // should not happen; implies setParams was not called throw new IllegalStateException("Value of params not set"); } String field = (String) params.get(FIELD); String payloadField = (String) params.get(PAYLOAD_FIELD); String weightExpression = (String) params.get(WEIGHT_EXPRESSION); Set<SortField> sortFields = new HashSet<>(); if (field == null) { throw new IllegalArgumentException(FIELD + " is a mandatory parameter"); } if (weightExpression == null) { throw new IllegalArgumentException(WEIGHT_EXPRESSION + " is a mandatory parameter"); } for(int i = 0; i < params.size(); i++) { if (params.getName(i).equals(SORT_FIELD)) { String sortFieldName = (String) params.getVal(i); SortField.Type sortFieldType = getSortFieldType(core, sortFieldName); if (sortFieldType == null) { throw new IllegalArgumentException(sortFieldName + " could not be mapped to any appropriate type" + " [long, int, float, double]"); } SortField sortField = new SortField(sortFieldName, sortFieldType); sortFields.add(sortField); } } return new DocumentValueSourceDictionary(searcher.getIndexReader(), field, fromExpression(weightExpression, sortFields), payloadField); }
/** * Indexes the data from the given reader. * @param reader Source index reader, from which autocomplete words are obtained for the defined field * @param field the field of the source index reader to index for autocompletion * @param mergeFactor mergeFactor to use when indexing * @param ramMB the max amount or memory in MB to use * @param optimize whether or not the autocomplete index should be optimized * @throws AlreadyClosedException if the Autocompleter is already closed * @throws IOException */ public final void indexDictionary(IndexReader reader, String field, int mergeFactor, int ramMB, boolean optimize) throws IOException { synchronized (modifyCurrentIndexLock) { ensureOpen(); final Directory dir = this.autoCompleteIndex; final Dictionary dict = new LuceneDictionary(reader, field); final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, new WhitespaceAnalyzer(Version.LUCENE_CURRENT)).setRAMBufferSizeMB(ramMB)); IndexSearcher indexSearcher = obtainSearcher(); final List<IndexReader> readers = new ArrayList<IndexReader>(); if (searcher.maxDoc() > 0) { ReaderUtil.gatherSubReaders(readers, searcher.getIndexReader()); } //clear the index writer.deleteAll(); try { Iterator<String> iter = dict.getWordsIterator(); while (iter.hasNext()) { String word = iter.next(); // ok index the word Document doc = createDocument(word, reader.docFreq(new Term(field, word))); writer.addDocument(doc); } } finally { releaseSearcher(indexSearcher); } // close writer if (optimize) writer.optimize(); writer.close(); // also re-open the autocomplete index to see our own changes when the next suggestion // is fetched: swapSearcher(dir); } }
private void indexSpellCheck(String id) throws SearchException { if(!spellcheck) return; IndexReader reader=null; FSDirectory spellDir=null; Resource dir = _createSpellDirectory(id); try { File spellFile = FileWrapper.toFile(dir); spellDir = FSDirectory.getDirectory(spellFile); reader = _getReader(id,false); Dictionary dictionary = new LuceneDictionary(reader,"contents"); SpellChecker spellChecker = new SpellChecker(spellDir); spellChecker.indexDictionary(dictionary); } catch(IOException ioe) { throw new SearchException(ioe); } finally { flushEL(reader); closeEL(reader); } }
/** * Like build(), but without flushing the old entries, and *ignores duplicate entries* * * @param dict * @throws IOException */ public void add(Dictionary dict) throws IOException { InputIterator iter = dict.getEntryIterator(); BytesRef text; while ((text = iter.next()) != null) { if (lookup(text.utf8ToString(), 1, true, false).size() > 0) { continue; } add(text, iter.contexts(), iter.weight(), iter.payload()); } }
private void buildSuggesterIndex() throws IOException { try (DirectoryReader reader = DirectoryReader.open(indexDir)) { final Dictionary dictionary = new DocumentDictionary(reader, "content", null, null, "username"); suggester.build(dictionary); suggester.refresh(); } }
public void testEmpty() throws Exception { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); writer.commit(); writer.close(); IndexReader ir = DirectoryReader.open(dir); Dictionary dictionary = new HighFrequencyDictionary(ir, "bogus", 0.1f); BytesRefIterator tf = dictionary.getWordsIterator(); assertNull(tf.getComparator()); assertNull(tf.next()); dir.close(); }
@Override public Dictionary create(SolrCore core, SolrIndexSearcher searcher) { if(params == null) { // should not happen; implies setParams was not called throw new IllegalStateException("Value of params not set"); } String field = (String) params.get(FIELD); String payloadField = (String) params.get(PAYLOAD_FIELD); String weightExpression = (String) params.get(WEIGHT_EXPRESSION); Set<SortField> sortFields = new HashSet<SortField>(); if (field == null) { throw new IllegalArgumentException(FIELD + " is a mandatory parameter"); } if (weightExpression == null) { throw new IllegalArgumentException(WEIGHT_EXPRESSION + " is a mandatory parameter"); } for(int i = 0; i < params.size(); i++) { if (params.getName(i).equals(SORT_FIELD)) { String sortFieldName = (String) params.getVal(i); SortField.Type sortFieldType = getSortFieldType(core, sortFieldName); if (sortFieldType == null) { throw new IllegalArgumentException(sortFieldName + " could not be mapped to any appropriate type" + " [long, int, float, double]"); } SortField sortField = new SortField(sortFieldName, sortFieldType); sortFields.add(sortField); } } return new DocumentValueSourceDictionary(searcher.getIndexReader(), field, fromExpression(weightExpression, sortFields), payloadField); }
/** Build lookup from a dictionary. Some implementations may require sorted * or unsorted keys from the dictionary's iterator - use * {@link SortedInputIterator} or * {@link UnsortedInputIterator} in such case. */ public void build(Dictionary dict) throws IOException { BytesRefIterator it = dict.getWordsIterator(); InputIterator tfit; if (it instanceof InputIterator) { tfit = (InputIterator)it; } else { tfit = new InputIterator.InputIteratorWrapper(it); } build(tfit); }
public void updateSpellCheckerIndex(NodeDocumentVersion nDocVer) { log.info("Observed Wine added/updated event for {1} from Thread {0}", Thread.currentThread().getName(), String.valueOf(nDocVer)); String text = (nDocVer != null) ? nDocVer.getText() : null; if (text != null) { Dictionary dictionary = null; try { FullTextEntityManager ftEm = (FullTextEntityManager) entityManager; SearchFactory searchFactory = ftEm.getSearchFactory(); dictionary = new SetDictionary(text, searchFactory.getAnalyzer("wine_en")); } catch (IOException ioExc) { log.error("Failed to analyze dictionary text {0} from Wine {1} to update spell checker due to: {2}" + text + nDocVer.getUuid() + ioExc.toString()); } if (dictionary != null) { Directory dir = null; // only allow one thread to update the index at a time ... // the Dictionary is pre-computed, so it should happen quickly // ... // this synchronized approach only works because this component // is application-scoped synchronized (this) { try { dir = FSDirectory.open(new File("lucene_index/spellcheck")); SpellChecker spell = new SpellChecker(dir); spell.indexDictionary(dictionary); spell.close(); log.info("Successfully updated the spell checker index after Document added/updated."); } catch (Exception exc) { log.error("Failed to update the spell checker index!", exc); } finally { if (dir != null) { try { dir.close(); } catch (Exception zzz) { } } } } } } }
/** * Creates a new spell-check index based on search-index */ public void createSpellIndex() { if (isSpellCheckEnabled) { IndexReader indexReader = null; try { log.info("Start generating Spell-Index..."); long startSpellIndexTime = 0; if (log.isDebugEnabled()) { startSpellIndexTime = System.currentTimeMillis(); } final Directory indexDir = FSDirectory.open(new File(indexPath)); indexReader = IndexReader.open(indexDir); // 1. Create content spellIndex final File spellDictionaryFile = new File(spellDictionaryPath); final Directory contentSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + CONTENT_PATH));// true final SpellChecker contentSpellChecker = new SpellChecker(contentSpellIndexDirectory); final Dictionary contentDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.CONTENT_FIELD_NAME); contentSpellChecker.indexDictionary(contentDictionary); // 2. Create title spellIndex final Directory titleSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + TITLE_PATH));// true final SpellChecker titleSpellChecker = new SpellChecker(titleSpellIndexDirectory); final Dictionary titleDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.TITLE_FIELD_NAME); titleSpellChecker.indexDictionary(titleDictionary); // 3. Create description spellIndex final Directory descriptionSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + DESCRIPTION_PATH));// true final SpellChecker descriptionSpellChecker = new SpellChecker(descriptionSpellIndexDirectory); final Dictionary descriptionDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.DESCRIPTION_FIELD_NAME); descriptionSpellChecker.indexDictionary(descriptionDictionary); // 4. Create author spellIndex final Directory authorSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + AUTHOR_PATH));// true final SpellChecker authorSpellChecker = new SpellChecker(authorSpellIndexDirectory); final Dictionary authorDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.AUTHOR_FIELD_NAME); authorSpellChecker.indexDictionary(authorDictionary); // Merge all part spell indexes (content,title etc.) to one common spell index final Directory spellIndexDirectory = FSDirectory.open(spellDictionaryFile);// true final IndexWriter merger = new IndexWriter(spellIndexDirectory, new StandardAnalyzer(Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.UNLIMITED); final Directory[] directories = { contentSpellIndexDirectory, titleSpellIndexDirectory, descriptionSpellIndexDirectory, authorSpellIndexDirectory }; merger.addIndexesNoOptimize(directories); merger.optimize(); merger.close(); spellChecker = new SpellChecker(spellIndexDirectory); spellChecker.setAccuracy(0.7f); if (log.isDebugEnabled()) { log.debug("SpellIndex created in " + (System.currentTimeMillis() - startSpellIndexTime) + "ms"); } log.info("New generated Spell-Index ready to use."); } catch (final IOException ioEx) { log.warn("Can not create SpellIndex", ioEx); } finally { if (indexReader != null) { try { indexReader.close(); } catch (final IOException e) { log.warn("Can not close indexReader properly", e); } } } } }
public void addDictionary(Dictionary dict, long minWeight, long maxWeight, float weight) { dicts.add(new WeightedDictionary(dict, minWeight, maxWeight, weight)); }
WeightedDictionary(Dictionary dict, long minWeight, long maxWeight, float weight) { this.dict = dict; this.minWeight = minWeight; this.maxWeight = maxWeight; this.weight = weight; }
/** Build lookup from a dictionary. Some implementations may require sorted * or unsorted keys from the dictionary's iterator - use * {@link SortedInputIterator} or * {@link UnsortedInputIterator} in such case. */ public void build(Dictionary dict) throws IOException { build(dict.getEntryIterator()); }
/** * Create a Dictionary using options in <code>core</code> and optionally * uses <code>searcher</code>, in case of index based dictionaries */ public abstract Dictionary create(SolrCore core, SolrIndexSearcher searcher);