public void testSortValues() throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); for (int i = 0; i < 10; i++) { Document document = new Document(); String text = new String(new char[]{(char) (97 + i), (char) (97 + i)}); document.add(new TextField("str", text, Field.Store.YES)); document.add(new SortedDocValuesField("str", new BytesRef(text))); indexWriter.addDocument(document); } IndexReader reader = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(indexWriter)); IndexSearcher searcher = new IndexSearcher(reader); TopFieldDocs docs = searcher.search(new MatchAllDocsQuery(), 10, new Sort(new SortField("str", SortField.Type.STRING))); for (int i = 0; i < 10; i++) { FieldDoc fieldDoc = (FieldDoc) docs.scoreDocs[i]; assertThat((BytesRef) fieldDoc.fields[0], equalTo(new BytesRef(new String(new char[]{(char) (97 + i), (char) (97 + i)})))); } }
public void testNRTSearchOnClosedWriter() throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); DirectoryReader reader = DirectoryReader.open(indexWriter); for (int i = 0; i < 100; i++) { Document document = new Document(); TextField field = new TextField("_id", Integer.toString(i), Field.Store.YES); field.setBoost(i); document.add(field); indexWriter.addDocument(document); } reader = refreshReader(reader); indexWriter.close(); TermsEnum termDocs = SlowCompositeReaderWrapper.wrap(reader).terms("_id").iterator(); termDocs.next(); }
@Override public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException { final IndexReader topReader = ReaderUtil.getTopLevelContext(readerContext).reader(); final AtomicReader r = SlowCompositeReaderWrapper.wrap(topReader); final int off = readerContext.docBase; final SortedDocValues sindex = FieldCache.DEFAULT.getTermsIndex(r, field); final int end = sindex.getValueCount(); return new IntDocValues(this) { @Override public int intVal(int doc) { return (end - sindex.getOrd(doc+off) - 1); } }; }
@Override public List<AtomicReader> getMergeReaders() throws IOException { if (unsortedReaders == null) { unsortedReaders = super.getMergeReaders(); final AtomicReader atomicView; if (unsortedReaders.size() == 1) { atomicView = unsortedReaders.get(0); } else { final IndexReader multiReader = new MultiReader(unsortedReaders.toArray(new AtomicReader[unsortedReaders.size()])); atomicView = SlowCompositeReaderWrapper.wrap(multiReader); } docMap = sorter.sort(atomicView); sortedView = SortingAtomicReader.wrap(atomicView, docMap); } // a null doc map means that the readers are already sorted return docMap == null ? unsortedReaders : Collections.singletonList(sortedView); }
protected void checkOnlineClassification(Classifier<T> classifier, String inputDoc, T expectedResult, Analyzer analyzer, String textFieldName, String classFieldName, Query query) throws Exception { AtomicReader atomicReader = null; try { populateSampleIndex(analyzer); atomicReader = SlowCompositeReaderWrapper.wrap(indexWriter.getReader()); classifier.train(atomicReader, textFieldName, classFieldName, analyzer, query); ClassificationResult<T> classificationResult = classifier.assignClass(inputDoc); assertNotNull(classificationResult.getAssignedClass()); assertEquals("got an assigned class of " + classificationResult.getAssignedClass(), expectedResult, classificationResult.getAssignedClass()); assertTrue("got a not positive score " + classificationResult.getScore(), classificationResult.getScore() > 0); updateSampleIndex(analyzer); ClassificationResult<T> secondClassificationResult = classifier.assignClass(inputDoc); assertEquals(classificationResult.getAssignedClass(), secondClassificationResult.getAssignedClass()); assertEquals(Double.valueOf(classificationResult.getScore()), Double.valueOf(secondClassificationResult.getScore())); } finally { if (atomicReader != null) atomicReader.close(); } }
public void testGetFilterHandleNumericParseError() throws Exception { NumericRangeFilterBuilder filterBuilder = new NumericRangeFilterBuilder(); filterBuilder.setStrictMode(false); String xml = "<NumericRangeFilter fieldName='AGE' type='int' lowerTerm='-1' upperTerm='NaN'/>"; Document doc = getDocumentFromString(xml); Filter filter = filterBuilder.getFilter(doc.getDocumentElement()); Directory ramDir = newDirectory(); IndexWriter writer = new IndexWriter(ramDir, newIndexWriterConfig(null)); writer.commit(); try { AtomicReader reader = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(ramDir)); try { assertNull(filter.getDocIdSet(reader.getContext(), reader.getLiveDocs())); } finally { reader.close(); } } finally { writer.commit(); writer.close(); ramDir.close(); } }
public void testCachingWorks() throws Exception { Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir); writer.close(); IndexReader reader = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir)); AtomicReaderContext context = (AtomicReaderContext) reader.getContext(); MockFilter filter = new MockFilter(); CachingWrapperFilter cacher = new CachingWrapperFilter(filter); // first time, nested filter is called DocIdSet strongRef = cacher.getDocIdSet(context, context.reader().getLiveDocs()); assertTrue("first time", filter.wasCalled()); // make sure no exception if cache is holding the wrong docIdSet cacher.getDocIdSet(context, context.reader().getLiveDocs()); // second time, nested filter should not be called filter.clear(); cacher.getDocIdSet(context, context.reader().getLiveDocs()); assertFalse("second time", filter.wasCalled()); reader.close(); dir.close(); }
public void testIsCacheAble() throws Exception { Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir); writer.addDocument(new Document()); writer.close(); IndexReader reader = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir)); // not cacheable: assertDocIdSetCacheable(reader, new QueryWrapperFilter(new TermQuery(new Term("test","value"))), false); // returns default empty docidset, always cacheable: assertDocIdSetCacheable(reader, NumericRangeFilter.newIntRange("test", Integer.valueOf(10000), Integer.valueOf(-10000), true, true), true); // is cacheable: assertDocIdSetCacheable(reader, FieldCacheRangeFilter.newIntRange("test", Integer.valueOf(10), Integer.valueOf(20), true, true), false); // a fixedbitset filter is always cacheable assertDocIdSetCacheable(reader, new Filter() { @Override public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) { return new FixedBitSet(context.reader().maxDoc()); } }, true); reader.close(); dir.close(); }
public void testBasics() throws Exception { // sanity check of norms writer // TODO: generalize AtomicReader slow = SlowCompositeReaderWrapper.wrap(reader); NumericDocValues fooNorms = slow.getNormValues("foo"); NumericDocValues barNorms = slow.getNormValues("bar"); for (int i = 0; i < slow.maxDoc(); i++) { assertFalse(fooNorms.get(i) == barNorms.get(i)); } // sanity check of searching TopDocs foodocs = searcher.search(new TermQuery(new Term("foo", "brown")), 10); assertTrue(foodocs.totalHits > 0); TopDocs bardocs = searcher.search(new TermQuery(new Term("bar", "brown")), 10); assertTrue(bardocs.totalHits > 0); assertTrue(foodocs.scoreDocs[0].score < bardocs.scoreDocs[0].score); }
@Override public void setUp() throws Exception { super.setUp(); directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(new MockAnalyzer(random())) .setMergePolicy(newLogMergePolicy()) .setSimilarity(new DefaultSimilarity())); for (int i = 0; i < values.length; i++) { Document doc = new Document(); doc .add(newTextField(FIELD, values[i], Field.Store.YES)); writer.addDocument(doc); } indexReader = SlowCompositeReaderWrapper.wrap(writer.getReader()); writer.close(); indexSearcher = newSearcher(indexReader); indexSearcher.setSimilarity(new DefaultSimilarity()); }
/** * @param flagConfig Contains all information necessary for configuring LuceneUtils. * {@link FlagConfig#luceneindexpath()} must be non-empty. */ public LuceneUtils(FlagConfig flagConfig) throws IOException { if (flagConfig.luceneindexpath().isEmpty()) { throw new IllegalArgumentException( "-luceneindexpath is a required argument for initializing LuceneUtils instance."); } this.compositeReader = DirectoryReader.open( FSDirectory.open(FileSystems.getDefault().getPath(flagConfig.luceneindexpath()))); this.leafReader = SlowCompositeReaderWrapper.wrap(compositeReader); MultiFields.getFields(compositeReader); this.flagConfig = flagConfig; if (!flagConfig.stoplistfile().isEmpty()) loadStopWords(flagConfig.stoplistfile()); if (!flagConfig.startlistfile().isEmpty()) loadStartWords(flagConfig.startlistfile()); VerbatimLogger.info("Initialized LuceneUtils from Lucene index in directory: " + flagConfig.luceneindexpath() + "\n"); VerbatimLogger.info("Fields in index are: " + String.join(", ", this.getFieldNames()) + "\n"); }
private Map<String, Integer> getTermsFromIndex() { Map<String, Integer> indexedTerms = Maps.newLinkedHashMap(); try { DirectoryReader indexReader = indexer.getIndexReader(); Terms terms = SlowCompositeReaderWrapper.wrap(indexReader).terms(Indexer.INDEX_CONTENT); if (terms == null) { return indexedTerms; } TermsEnum termEnum = terms.iterator(null); BytesRef byteRef = null; while ((byteRef = termEnum.next()) != null) { String term = byteRef.utf8ToString(); int count = indexReader.docFreq(new Term(Indexer.INDEX_CONTENT, byteRef)); indexedTerms.put(term, Integer.valueOf(count)); } indexReader.close(); } catch (Exception e) { logger.error("Failed to dump index", e); } return indexedTerms; }
@Override public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException { final IndexReader topReader = ReaderUtil.getTopLevelContext(readerContext).reader(); final AtomicReader r = topReader instanceof CompositeReader ? new SlowCompositeReaderWrapper((CompositeReader)topReader) : (AtomicReader) topReader; final int off = readerContext.docBase; final SortedDocValues sindex = FieldCache.DEFAULT.getTermsIndex(r, field); final int end = sindex.getValueCount(); return new IntDocValues(this) { @Override public int intVal(int doc) { return (end - sindex.getOrd(doc+off) - 1); } }; }
public void testGetFilterHandleNumericParseError() throws Exception { NumericRangeFilterBuilder filterBuilder = new NumericRangeFilterBuilder(); filterBuilder.setStrictMode(false); String xml = "<NumericRangeFilter fieldName='AGE' type='int' lowerTerm='-1' upperTerm='NaN'/>"; Document doc = getDocumentFromString(xml); Filter filter = filterBuilder.getFilter(doc.getDocumentElement()); Directory ramDir = newDirectory(); IndexWriter writer = new IndexWriter(ramDir, newIndexWriterConfig(TEST_VERSION_CURRENT, null)); writer.commit(); try { AtomicReader reader = new SlowCompositeReaderWrapper(DirectoryReader.open(ramDir)); try { assertNull(filter.getDocIdSet(reader.getContext(), reader.getLiveDocs())); } finally { reader.close(); } } finally { writer.commit(); writer.close(); ramDir.close(); } }
public void testIsCacheAble() throws Exception { Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir); writer.addDocument(new Document()); writer.close(); IndexReader reader = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir)); // not cacheable: assertDocIdSetCacheable(reader, new QueryWrapperFilter(new TermQuery(new Term("test","value"))), false); // returns default empty docidset, always cacheable: assertDocIdSetCacheable(reader, NumericRangeFilter.newIntRange("test", Integer.valueOf(10000), Integer.valueOf(-10000), true, true), true); // is cacheable: assertDocIdSetCacheable(reader, FieldCacheRangeFilter.newIntRange("test", Integer.valueOf(10), Integer.valueOf(20), true, true), true); // a fixedbitset filter is always cacheable assertDocIdSetCacheable(reader, new Filter() { @Override public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) { return new FixedBitSet(context.reader().maxDoc()); } }, true); reader.close(); dir.close(); }
public void testBasics() throws Exception { // sanity check of norms writer // TODO: generalize AtomicReader slow = new SlowCompositeReaderWrapper(reader); NumericDocValues fooNorms = slow.getNormValues("foo"); NumericDocValues barNorms = slow.getNormValues("bar"); for (int i = 0; i < slow.maxDoc(); i++) { assertFalse(fooNorms.get(i) == barNorms.get(i)); } // sanity check of searching TopDocs foodocs = searcher.search(new TermQuery(new Term("foo", "brown")), 10); assertTrue(foodocs.totalHits > 0); TopDocs bardocs = searcher.search(new TermQuery(new Term("bar", "brown")), 10); assertTrue(bardocs.totalHits > 0); assertTrue(foodocs.scoreDocs[0].score < bardocs.scoreDocs[0].score); }
@Override public void setUp() throws Exception { super.setUp(); directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMergePolicy(newLogMergePolicy()) .setSimilarity(new DefaultSimilarity())); for (int i = 0; i < values.length; i++) { Document doc = new Document(); doc .add(newTextField(FIELD, values[i], Field.Store.YES)); writer.addDocument(doc); } indexReader = SlowCompositeReaderWrapper.wrap(writer.getReader()); writer.close(); indexSearcher = newSearcher(indexReader); indexSearcher.setSimilarity(new DefaultSimilarity()); }
@Override public TextMetrics execute(ModuleResultProvider results, ProgressListener progressListener) throws Exception { final int count; IndexReader reader = results.getResultFor(LuceneResult.class).getIndexReader(); try { Terms terms = SlowCompositeReaderWrapper.wrap(reader) .terms(TextRepository.CHAPTER_TEXT_FIELD); count = (int) (terms.getSumTotalTermFreq()); } finally { reader.close(); } return new TextMetrics() { @Override public int getWordCount() { return count; } }; }
public void testGetFilterHandleNumericParseError() throws Exception { NumericRangeFilterBuilder filterBuilder = new NumericRangeFilterBuilder(); filterBuilder.setStrictMode(false); String xml = "<NumericRangeFilter fieldName='AGE' type='int' lowerTerm='-1' upperTerm='NaN'/>"; Document doc = getDocumentFromString(xml); Filter filter = filterBuilder.getFilter(doc.getDocumentElement()); Directory ramDir = newDirectory(); IndexWriter writer = new IndexWriter(ramDir, newIndexWriterConfig(TEST_VERSION_CURRENT, null)); writer.commit(); try { AtomicReader reader = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(ramDir)); try { assertNull(filter.getDocIdSet(reader.getContext(), reader.getLiveDocs())); } finally { reader.close(); } } finally { writer.commit(); writer.close(); ramDir.close(); } }
/** * * @return the BabelMorphAPI Version */ public String getBabelMorphVersion()throws IOException { Terms terms = SlowCompositeReaderWrapper.wrap(morphologicalDictionary.getIndexReader()).terms(BabelMorphIndexField.VERSION.toString()); TermsEnum termsEnum = terms.iterator(); String version = termsEnum.next().utf8ToString(); return "BabelMorph API v"+version; }
/** * * @return the available languages */ public Set<Language> getBabelMorphLanguages()throws IOException { Set<Language> languages = new HashSet<>(); Terms terms = SlowCompositeReaderWrapper.wrap(morphologicalDictionary.getIndexReader()).terms(BabelMorphIndexField.LANGUAGE.toString()); TermsEnum termsEnum = terms.iterator(); BytesRef text; while((text = termsEnum.next()) != null) { languages.add(Language.valueOf(text.utf8ToString())); } return languages; }
/** * * @return the available part-of-speech */ public Set<POS> getBabelMorphPoS()throws IOException { Set<POS> pos = new HashSet<>(); Terms terms = SlowCompositeReaderWrapper.wrap(morphologicalDictionary.getIndexReader()).terms(BabelMorphIndexField.POS.toString()); TermsEnum termsEnum = terms.iterator(); BytesRef text; while((text = termsEnum.next()) != null) { pos.add(POS.valueOf(text.utf8ToString())); } return pos; }
/** * Ritorna una collezione di stringhe come valori per popolare la tabella. * Viene usato sia dalla parte gui per far vedere i valori, sia in * inizializzazione del Segmenter per popolare fisicamente la tabella * * @param tableName nome della tabella * @return lista di valori */ public Collection<? extends String> getValuesForTable(String tableName) { BiMap<String, String> invFields = fieldsTable.inverse(); String field = invFields.get(tableName); List<String> ret = new ArrayList<>(); if (field != null) { if (reader == null) { openIndex(); } try { final LeafReader ar = SlowCompositeReaderWrapper.wrap(reader); final int maxdoc = reader.maxDoc(); for (int i = 0; i < maxdoc; i++) { Document doc = ar.document(i); String val = doc.get(field); if (val != null) { if (val.trim().length() > 0) { ret.add(val.trim().toLowerCase()); } } } } catch (Exception e) { LogGui.printException(e); } } return ret; }
public void testDocMakerThreadSafety() throws Exception { // 1. alg definition (required in every "logic" test) String algLines[] = { "# ----- properties ", "content.source=org.apache.lucene.benchmark.byTask.feeds.SortableSingleDocSource", "doc.term.vector=false", "log.step.AddDoc=10000", "content.source.forever=true", "directory=RAMDirectory", "doc.reuse.fields=false", "doc.stored=false", "doc.tokenized=false", "doc.index.props=true", "# ----- alg ", "CreateIndex", "[ { AddDoc > : 250 ] : 4", "CloseIndex", }; // 2. we test this value later CountingSearchTestTask.numSearches = 0; // 3. execute the algorithm (required in every "logic" test) Benchmark benchmark = execBenchmark(algLines); DirectoryReader r = DirectoryReader.open(benchmark.getRunData().getDirectory()); SortedDocValues idx = FieldCache.DEFAULT.getTermsIndex(SlowCompositeReaderWrapper.wrap(r), "country"); final int maxDoc = r.maxDoc(); assertEquals(1000, maxDoc); for(int i=0;i<1000;i++) { assertTrue("doc " + i + " has null country", idx.getOrd(i) != -1); } r.close(); }
@Override public void setUp() throws Exception { super.setUp(); directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), directory, new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)); //Add series of docs with filterable fields : acces rights, prices, dates and "in-stock" flags addDoc(writer, "admin guest", "010", "20040101", "Y"); addDoc(writer, "guest", "020", "20040101", "Y"); addDoc(writer, "guest", "020", "20050101", "Y"); addDoc(writer, "admin", "020", "20050101", "Maybe"); addDoc(writer, "admin guest", "030", "20050101", "N"); reader = SlowCompositeReaderWrapper.wrap(writer.getReader()); writer.close(); }
public void testMissingTermAndField() throws Exception { String fieldName = "field1"; Directory rd = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), rd); Document doc = new Document(); doc.add(newStringField(fieldName, "value1", Field.Store.NO)); w.addDocument(doc); IndexReader reader = SlowCompositeReaderWrapper.wrap(w.getReader()); assertTrue(reader.getContext() instanceof AtomicReaderContext); AtomicReaderContext context = (AtomicReaderContext) reader.getContext(); w.close(); DocIdSet idSet = termFilter(fieldName, "value1").getDocIdSet(context, context.reader().getLiveDocs()); assertNotNull("must not be null", idSet); DocIdSetIterator iter = idSet.iterator(); assertEquals(iter.nextDoc(), 0); assertEquals(iter.nextDoc(), DocIdSetIterator.NO_MORE_DOCS); idSet = termFilter(fieldName, "value2").getDocIdSet(context, context.reader().getLiveDocs()); assertNull("must be null", idSet); idSet = termFilter("field2", "value1").getDocIdSet(context, context.reader().getLiveDocs()); assertNull("must be null", idSet); reader.close(); rd.close(); }
@BeforeClass public static void beforeClassSorterTestBase() throws Exception { dir = newDirectory(); int numDocs = atLeast(20); createIndex(dir, numDocs, random()); reader = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dir)); }