public void testMMapDirectory() throws IOException { long start = System.currentTimeMillis(); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig .OpenMode.CREATE); FSDirectory open = FSDirectory.open(Paths.get("E:/testlucene")); IndexWriter indexWriter = new IndexWriter(open, indexWriterConfig); for (int i = 0; i < 10000000; i++) { indexWriter.addDocument(addDocument(i)); } indexWriter.commit(); indexWriter.close(); long end = System.currentTimeMillis(); log.error("MMapDirectory consumes {}s!", (end - start) / 1000); start = System.currentTimeMillis(); IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(open)); int total = 0; for (int i = 0; i < 10000000; i++) { TermQuery key1 = new TermQuery(new Term("key1", "key" + i)); TopDocs search = indexSearcher.search(key1, 10); total += search.totalHits; } System.out.println(total); end = System.currentTimeMillis(); log.error("MMapDirectory search consumes {}ms!", (end - start)); }
public void testRamDirectory() throws IOException { long start = System.currentTimeMillis(); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig .OpenMode.CREATE); RAMDirectory ramDirectory = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(ramDirectory, indexWriterConfig); for (int i = 0; i < 10000000; i++) { indexWriter.addDocument(addDocument(i)); } indexWriter.commit(); indexWriter.close(); long end = System.currentTimeMillis(); log.error("RamDirectory consumes {}s!", (end - start) / 1000); start = System.currentTimeMillis(); IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(ramDirectory)); int total = 0; for (int i = 0; i < 10000000; i++) { TermQuery key1 = new TermQuery(new Term("key1", "key" + i)); TopDocs search = indexSearcher.search(key1, 10); total += search.totalHits; } System.out.println(total); end = System.currentTimeMillis(); log.error("RamDirectory search consumes {}ms!", (end - start)); }
private IndexWriterConfig getIndexWriterConfig(boolean create) { final IndexWriterConfig iwc = new IndexWriterConfig(engineConfig.getAnalyzer()); iwc.setCommitOnClose(false); // we by default don't commit on close iwc.setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND); iwc.setIndexDeletionPolicy(deletionPolicy); // with tests.verbose, lucene sets this up: plumb to align with filesystem stream boolean verbose = false; try { verbose = Boolean.parseBoolean(System.getProperty("tests.verbose")); } catch (Exception ignore) { } iwc.setInfoStream(verbose ? InfoStream.getDefault() : new LoggerInfoStream(logger)); iwc.setMergeScheduler(mergeScheduler); MergePolicy mergePolicy = config().getMergePolicy(); // Give us the opportunity to upgrade old segments while performing // background merges mergePolicy = new ElasticsearchMergePolicy(mergePolicy); iwc.setMergePolicy(mergePolicy); iwc.setSimilarity(engineConfig.getSimilarity()); iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().getMbFrac()); iwc.setCodec(engineConfig.getCodec()); iwc.setUseCompoundFile(true); // always use compound on flush - reduces # of file-handles on refresh return iwc; }
/** * This method removes all lucene files from the given directory. It will first try to delete all commit points / segments * files to ensure broken commits or corrupted indices will not be opened in the future. If any of the segment files can't be deleted * this operation fails. */ public static void cleanLuceneIndex(Directory directory) throws IOException { try (Lock writeLock = directory.obtainLock(IndexWriter.WRITE_LOCK_NAME)) { for (final String file : directory.listAll()) { if (file.startsWith(IndexFileNames.SEGMENTS) || file.equals(IndexFileNames.OLD_SEGMENTS_GEN)) { directory.deleteFile(file); // remove all segment_N files } } } try (IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(Lucene.STANDARD_ANALYZER) .setMergePolicy(NoMergePolicy.INSTANCE) // no merges .setCommitOnClose(false) // no commits .setOpenMode(IndexWriterConfig.OpenMode.CREATE))) // force creation - don't append... { // do nothing and close this will kick of IndexFileDeleter which will remove all pending files } }
private IndexReader indexOneDoc(Directory dir, String field, String value, Analyzer analyzer) throws IOException { IndexWriterConfig iwc = newIndexWriterConfig(analyzer); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); FieldType ft = new FieldType(TextField.TYPE_STORED); ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); Field textField = new Field(field, "", ft); Document doc = new Document(); doc.add(textField); textField.setStringValue(value); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); return ir; }
/** * Test the WordScorer emitted by the smoothing model */ public void testBuildWordScorer() throws IOException { SmoothingModel testModel = createTestModel(); Map<String, Analyzer> mapping = new HashMap<>(); mapping.put("field", new WhitespaceAnalyzer()); PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(), mapping); IndexWriter writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(wrapper)); Document doc = new Document(); doc.add(new Field("field", "someText", TextField.TYPE_NOT_STORED)); writer.addDocument(doc); DirectoryReader ir = DirectoryReader.open(writer); WordScorer wordScorer = testModel.buildWordScorerFactory().newScorer(ir, MultiFields.getTerms(ir, "field"), "field", 0.9d, BytesRefs.toBytesRef(" ")); assertWordScorer(wordScorer, testModel); }
public void testSingleValued() throws IOException { Directory dir = newDirectory(); // we need the default codec to check for singletons IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null).setCodec(TestUtil.getDefaultCodec())); Document doc = new Document(); for (IndexableField f : NumberFieldMapper.NumberType.HALF_FLOAT.createFields("half_float", 3f, false, true, false)) { doc.add(f); } w.addDocument(doc); final DirectoryReader dirReader = DirectoryReader.open(w); LeafReader reader = getOnlyLeafReader(dirReader); SortedNumericDoubleValues values = new SortedNumericDVIndexFieldData.SortedNumericHalfFloatFieldData( reader, "half_float").getDoubleValues(); assertNotNull(FieldData.unwrapSingleton(values)); values.setDocument(0); assertEquals(1, values.count()); assertEquals(3f, values.valueAt(0), 0f); IOUtils.close(dirReader, w, dir); }
public static InternalEngine createInternalEngine(@Nullable final IndexWriterFactory indexWriterFactory, @Nullable final Supplier<SequenceNumbersService> sequenceNumbersServiceSupplier, final EngineConfig config) { return new InternalEngine(config) { @Override IndexWriter createWriter(Directory directory, IndexWriterConfig iwc) throws IOException { return (indexWriterFactory != null) ? indexWriterFactory.createWriter(directory, iwc) : super.createWriter(directory, iwc); } @Override public SequenceNumbersService seqNoService() { return (sequenceNumbersServiceSupplier != null) ? sequenceNumbersServiceSupplier.get() : super.seqNoService(); } }; }
private void assertCompressionEquals(Mode expected, Codec actual) throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(null); iwc.setCodec(actual); IndexWriter iw = new IndexWriter(dir, iwc); iw.addDocument(new Document()); iw.commit(); iw.close(); DirectoryReader ir = DirectoryReader.open(dir); SegmentReader sr = (SegmentReader) ir.leaves().get(0).reader(); String v = sr.getSegmentInfo().info.getAttribute(Lucene50StoredFieldsFormat.MODE_KEY); assertNotNull(v); assertEquals(expected, Mode.valueOf(v)); ir.close(); dir.close(); }
public void testCanOpenIndex() throws IOException { final ShardId shardId = new ShardId("index", "_na_", 1); IndexWriterConfig iwc = newIndexWriterConfig(); Path tempDir = createTempDir(); final BaseDirectoryWrapper dir = newFSDirectory(tempDir); assertFalse(Store.canOpenIndex(logger, tempDir, shardId, (id, l) -> new DummyShardLock(id))); IndexWriter writer = new IndexWriter(dir, iwc); Document doc = new Document(); doc.add(new StringField("id", "1", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); writer.commit(); writer.close(); assertTrue(Store.canOpenIndex(logger, tempDir, shardId, (id, l) -> new DummyShardLock(id))); DirectoryService directoryService = new DirectoryService(shardId, INDEX_SETTINGS) { @Override public Directory newDirectory() throws IOException { return dir; } }; Store store = new Store(shardId, INDEX_SETTINGS, directoryService, new DummyShardLock(shardId)); store.markStoreCorrupted(new CorruptIndexException("foo", "bar")); assertFalse(Store.canOpenIndex(logger, tempDir, shardId, (id, l) -> new DummyShardLock(id))); store.close(); }
public void testVectorHighlighter() throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); Document document = new Document(); document.add(new TextField("_id", "1", Field.Store.YES)); FieldType vectorsType = new FieldType(TextField.TYPE_STORED); vectorsType.setStoreTermVectors(true); vectorsType.setStoreTermVectorPositions(true); vectorsType.setStoreTermVectorOffsets(true); document.add(new Field("content", "the big bad dog", vectorsType)); indexWriter.addDocument(document); IndexReader reader = DirectoryReader.open(indexWriter); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1); assertThat(topDocs.totalHits, equalTo(1)); FastVectorHighlighter highlighter = new FastVectorHighlighter(); String fragment = highlighter.getBestFragment(highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))), reader, topDocs.scoreDocs[0].doc, "content", 30); assertThat(fragment, notNullValue()); assertThat(fragment, equalTo("the big <b>bad</b> dog")); }
public void testVectorHighlighterNoStore() throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); Document document = new Document(); document.add(new TextField("_id", "1", Field.Store.YES)); FieldType vectorsType = new FieldType(TextField.TYPE_NOT_STORED); vectorsType.setStoreTermVectors(true); vectorsType.setStoreTermVectorPositions(true); vectorsType.setStoreTermVectorOffsets(true); document.add(new Field("content", "the big bad dog", vectorsType)); indexWriter.addDocument(document); IndexReader reader = DirectoryReader.open(indexWriter); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1); assertThat(topDocs.totalHits, equalTo(1)); FastVectorHighlighter highlighter = new FastVectorHighlighter(); String fragment = highlighter.getBestFragment(highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))), reader, topDocs.scoreDocs[0].doc, "content", 30); assertThat(fragment, nullValue()); }
public void testVectorHighlighterNoTermVector() throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); Document document = new Document(); document.add(new TextField("_id", "1", Field.Store.YES)); document.add(new TextField("content", "the big bad dog", Field.Store.YES)); indexWriter.addDocument(document); IndexReader reader = DirectoryReader.open(indexWriter); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1); assertThat(topDocs.totalHits, equalTo(1)); FastVectorHighlighter highlighter = new FastVectorHighlighter(); String fragment = highlighter.getBestFragment(highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))), reader, topDocs.scoreDocs[0].doc, "content", 30); assertThat(fragment, nullValue()); }
public void testSortValues() throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); for (int i = 0; i < 10; i++) { Document document = new Document(); String text = new String(new char[]{(char) (97 + i), (char) (97 + i)}); document.add(new TextField("str", text, Field.Store.YES)); document.add(new SortedDocValuesField("str", new BytesRef(text))); indexWriter.addDocument(document); } IndexReader reader = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(indexWriter)); IndexSearcher searcher = new IndexSearcher(reader); TopFieldDocs docs = searcher.search(new MatchAllDocsQuery(), 10, new Sort(new SortField("str", SortField.Type.STRING))); for (int i = 0; i < 10; i++) { FieldDoc fieldDoc = (FieldDoc) docs.scoreDocs[i]; assertThat((BytesRef) fieldDoc.fields[0], equalTo(new BytesRef(new String(new char[]{(char) (97 + i), (char) (97 + i)})))); } }
public void testSimpleNumericOps() throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); Document document = new Document(); document.add(new TextField("_id", "1", Field.Store.YES)); document.add(new LegacyIntField("test", 2, LegacyIntField.TYPE_STORED)); indexWriter.addDocument(document); IndexReader reader = DirectoryReader.open(indexWriter); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1); Document doc = searcher.doc(topDocs.scoreDocs[0].doc); IndexableField f = doc.getField("test"); assertThat(f.stringValue(), equalTo("2")); BytesRefBuilder bytes = new BytesRefBuilder(); LegacyNumericUtils.intToPrefixCoded(2, 0, bytes); topDocs = searcher.search(new TermQuery(new Term("test", bytes.get())), 1); doc = searcher.doc(topDocs.scoreDocs[0].doc); f = doc.getField("test"); assertThat(f.stringValue(), equalTo("2")); indexWriter.close(); }
public void testNRTSearchOnClosedWriter() throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); DirectoryReader reader = DirectoryReader.open(indexWriter); for (int i = 0; i < 100; i++) { Document document = new Document(); TextField field = new TextField("_id", Integer.toString(i), Field.Store.YES); field.setBoost(i); document.add(field); indexWriter.addDocument(document); } reader = refreshReader(reader); indexWriter.close(); TermsEnum termDocs = SlowCompositeReaderWrapper.wrap(reader).terms("_id").iterator(); termDocs.next(); }
/** * test version lookup actually works */ public void testSimple() throws Exception { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); Document doc = new Document(); doc.add(new Field(UidFieldMapper.NAME, "6", UidFieldMapper.Defaults.FIELD_TYPE)); doc.add(new NumericDocValuesField(VersionFieldMapper.NAME, 87)); writer.addDocument(doc); DirectoryReader reader = DirectoryReader.open(writer); LeafReaderContext segment = reader.leaves().get(0); PerThreadIDAndVersionLookup lookup = new PerThreadIDAndVersionLookup(segment.reader()); // found doc DocIdAndVersion result = lookup.lookup(new BytesRef("6"), null, segment); assertNotNull(result); assertEquals(87, result.version); assertEquals(0, result.docId); // not found doc assertNull(lookup.lookup(new BytesRef("7"), null, segment)); // deleted doc assertNull(lookup.lookup(new BytesRef("6"), new Bits.MatchNoBits(1), segment)); reader.close(); writer.close(); dir.close(); }
/** Test that version map cache works, is evicted on close, etc */ public void testCache() throws Exception { int size = Versions.lookupStates.size(); Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); Document doc = new Document(); doc.add(new Field(UidFieldMapper.NAME, "6", UidFieldMapper.Defaults.FIELD_TYPE)); doc.add(new NumericDocValuesField(VersionFieldMapper.NAME, 87)); writer.addDocument(doc); DirectoryReader reader = DirectoryReader.open(writer); // should increase cache size by 1 assertEquals(87, Versions.loadVersion(reader, new Term(UidFieldMapper.NAME, "6"))); assertEquals(size+1, Versions.lookupStates.size()); // should be cache hit assertEquals(87, Versions.loadVersion(reader, new Term(UidFieldMapper.NAME, "6"))); assertEquals(size+1, Versions.lookupStates.size()); reader.close(); writer.close(); // core should be evicted from the map assertEquals(size, Versions.lookupStates.size()); dir.close(); }
/** Test that version map cache behaves properly with a filtered reader */ public void testCacheFilterReader() throws Exception { int size = Versions.lookupStates.size(); Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); Document doc = new Document(); doc.add(new Field(UidFieldMapper.NAME, "6", UidFieldMapper.Defaults.FIELD_TYPE)); doc.add(new NumericDocValuesField(VersionFieldMapper.NAME, 87)); writer.addDocument(doc); DirectoryReader reader = DirectoryReader.open(writer); assertEquals(87, Versions.loadVersion(reader, new Term(UidFieldMapper.NAME, "6"))); assertEquals(size+1, Versions.lookupStates.size()); // now wrap the reader DirectoryReader wrapped = ElasticsearchDirectoryReader.wrap(reader, new ShardId("bogus", "_na_", 5)); assertEquals(87, Versions.loadVersion(wrapped, new Term(UidFieldMapper.NAME, "6"))); // same size map: core cache key is shared assertEquals(size+1, Versions.lookupStates.size()); reader.close(); writer.close(); // core should be evicted from the map assertEquals(size, Versions.lookupStates.size()); dir.close(); }
public void testNoTokens() throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.KEYWORD_ANALYZER)); FieldType allFt = getAllFieldType(); Document doc = new Document(); doc.add(new Field("_id", "1", StoredField.TYPE)); doc.add(new AllField("_all", "", 2.0f, allFt)); indexWriter.addDocument(doc); IndexReader reader = DirectoryReader.open(indexWriter); IndexSearcher searcher = new IndexSearcher(reader); TopDocs docs = searcher.search(new MatchAllDocsQuery(), 10); assertThat(docs.totalHits, equalTo(1)); assertThat(docs.scoreDocs[0].doc, equalTo(0)); }
@Override public void process(ProcessingContext<Corpus> ctx, Corpus corpus) throws ModuleException { try (KeywordAnalyzer kwa = new KeywordAnalyzer()) { IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_36, kwa); writerConfig.setOpenMode(append ? OpenMode.CREATE_OR_APPEND : OpenMode.CREATE); try (Directory dir = FSDirectory.open(indexDir)) { try (IndexWriter writer = new IndexWriter(dir, writerConfig)) { AlvisDBIndexerResolvedObjects resObj = getResolvedObjects(); Logger logger = getLogger(ctx); EvaluationContext evalCtx = new EvaluationContext(logger); for (ADBElements.Resolved ent : resObj.elements) { ent.indexElements(logger, writer, evalCtx, corpus); } } } catch (IOException e) { rethrow(e); } } }
/** * Main entry point. * * @param args the command line arguments. * @throws IOException in case of I/O failure. * @throws ParseException in case of Query parse exception. */ public static void main(String[] args) throws IOException, ParseException { // 1. Creates a directory reference. This is where index datafiles will be created. Directory directory = FSDirectory.open(new File("/tmp").toPath()); // 2. Creates an IndexWriter try (IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig())) { // 3. Add some data indexSomeData(writer); // 4. Search search(directory); writer.deleteAll(); } }
@Test public void test() throws Exception { Path path = FileSystems.getDefault().getPath("", "index"); Directory directory = FSDirectory.open(path); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer).setOpenMode(IndexWriterConfig.OpenMode.CREATE); IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig); Document document = new Document(); document.add(new LegacyLongField("id", 5499, Field.Store.YES)); document.add(new Field("title", "小米6", TYPE_STORED)); document.add(new Field("sellPoint", "骁龙835,6G内存,双摄!", TYPE_STORED)); document. indexWriter.addDocument(document); indexWriter.commit(); indexWriter.close(); }
/** * Stores features from a specified feature file to the specified project's Lucene index * Sample query: featureId:rs44022* AND (variationType:del OR variationType:ins) * * @param featureFileId a FeatureFile, for which features to save * @param projectId a project, for which to write an index * @param entries a list of FeatureIndexEntry to write to index * @throws IOException */ public void writeLuceneIndexForProject(final Long featureFileId, final long projectId, final List<? extends FeatureIndexEntry> entries) throws IOException { try ( StandardAnalyzer analyzer = new StandardAnalyzer(); Directory index = fileManager.createIndexForProject(projectId); IndexWriter writer = new IndexWriter(index, new IndexWriterConfig(analyzer).setOpenMode( IndexWriterConfig.OpenMode.CREATE_OR_APPEND)) ) { FacetsConfig facetsConfig = new FacetsConfig(); facetsConfig.setIndexFieldName(FeatureIndexFields.CHR_ID.getFieldName(), FeatureIndexFields.FACET_CHR_ID.getFieldName()); for (FeatureIndexEntry entry : entries) { Document document = new Document(); addCommonDocumentFields(document, entry, featureFileId); if (entry instanceof VcfIndexEntry) { addVcfDocumentFields(document, entry); } writer.addDocument(facetsConfig.build(document)); } } }
/** * Deletes features from specified feature files from project's index * * @param projectId a project to delete index entries * @param fileIds a list of Pair of feature types to file Ids, which entries to delete. To delete gene file * entries, pass FeatureType.GENE */ public void deleteFromIndexByFileId(final long projectId, List<Pair<FeatureType, Long>> fileIds) { if (fileIds == null || fileIds.isEmpty() || !fileManager.indexForProjectExists(projectId)) { return; } try ( StandardAnalyzer analyzer = new StandardAnalyzer(); Directory index = fileManager.getIndexForProject(projectId); IndexWriter writer = new IndexWriter(index, new IndexWriterConfig(analyzer).setOpenMode( IndexWriterConfig.OpenMode.CREATE_OR_APPEND)) ) { if (fileManager.indexForProjectExists(projectId)) { for (Pair<FeatureType, Long> id : fileIds) { deleteDocumentByTypeAndId(id.getKey(), id.getValue(), writer); } } } catch (IOException e) { LOGGER.error("Exception while deleting from index:", e); } }
/** * Constructor for LuceneIndex * * @param dataDirectory Path to the directory to create an index directory within. * @throws IndexException */ public LuceneIndex(Path dataDirectory) throws IndexException { //TODO: Check to make sure directory is read/writable path = dataDirectory.resolve(INDEXDIR); try { dir = FSDirectory.open(path); analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); writer = new IndexWriter(dir, iwc); reader = DirectoryReader.open(writer, false); searcher = new IndexSearcher(reader); parser = new QueryParser(IndexDocumentAdapter.FIELD_SEARCH, analyzer); } catch (IOException e) { LOG.error(e.getLocalizedMessage()); throw new IndexException(e); } }
/** * Initializes profanity set. * * @param dictFilePath * dictionary file path */ private void initializeProfanitySet(String dictFilePath) { if (dictFilePath != null) { File file = new File(dictFilePath); if (file.exists() && file.isFile()) { try { IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, analyzer); IndexWriter indexWriter = new IndexWriter(directory, config); BufferedReader reader = new BufferedReader(new FileReader(file)); Set<String> bannedWords = new HashSet<String>(); String line = null; while ((line = reader.readLine()) != null) { bannedWords.add(line.trim()); Document doc = new Document(); doc.add(new StringField(LUCENE_FIELD_NAME, line, Store.NO)); indexWriter.addDocument(doc); } this.bannedWords = bannedWords; indexWriter.close(); reader.close(); } catch (Exception ex) { LOG.error("Error reading file", ex); } } } }
public InMemoryIndex(Map<String,String> id2Text){ Analyzer analyzer = new EnglishAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); try { IndexWriter writer = new IndexWriter(directory, iwc); for (String id:id2Text.keySet()) { Document doc=new Document(); doc.add(new StringField("id", id, Field.Store.YES)); doc.add(new TextField("content", id2Text.get(id), Field.Store.YES)); writer.addDocument(doc); } writer.close(); } catch (IOException e) { e.printStackTrace(); } }
private static List<String> search(List<String> contents, String query, int n) throws IOException, ParseException { List<String> r=new ArrayList<>(); Directory dir=new RAMDirectory(); IndexWriter indexWriter=new IndexWriter(dir, new IndexWriterConfig(new EnglishAnalyzer())); for (String method:contents){ Document document=new Document(); document.add(new TextField("content",method, Field.Store.YES)); indexWriter.addDocument(document); } indexWriter.close(); QueryParser qp = new QueryParser("content", new EnglishAnalyzer()); IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(dir)); TopDocs topDocs = indexSearcher.search(qp.parse(query), n); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { r.add(indexSearcher.doc(scoreDoc.doc).get("content")); } return r; }
/** * Indexing documents using provided Analyzer * * @param create to decide create new or append to previous one * @throws IOException */ public void index(final Boolean create, List<Document> documents, Analyzer analyzer) throws IOException { final Directory dir = FSDirectory.open(Paths.get(pathToIndexFolder)); final IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); } final IndexWriter w = new IndexWriter(dir, iwc); w.addDocuments(documents); w.close(); }
@Before public void setupIndex() throws Exception { dir = new RAMDirectory(); try(IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER))) { for (int i = 0; i < docs.length; i++) { Document doc = new Document(); doc.add(new Field("_id", Integer.toString(i + 1), StoredField.TYPE)); doc.add(newTextField("text", docs[i], Field.Store.YES)); indexWriter.addDocument(doc); } } reader = DirectoryReader.open(dir); searcher = new IndexSearcher(reader); }
public static void createIndexQ(List<CQAResult> QASetList, Directory dir) { System.out.println("Creating Questions Index"); IndexWriterConfig iwc = new IndexWriterConfig(ANALYZER.getVersion(), ANALYZER); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); try { IndexWriter writer = new IndexWriter(dir, iwc); int id = 0; //XXX seq_id for (CQAResult qaSet : QASetList) { Document doc = new Document(); if (qaSet.subject == null) { id++; continue; } doc.add(new IntField(QID, id++, Field.Store.YES)); doc.add(new TextField(BEST_ANSWER_FIELD, qaSet.subject, Field.Store.NO)); doc.add(new TextField(Q_DESCRIPTION, qaSet.content, Field.Store.NO)); writer.addDocument(doc); } writer.close(); } catch (IOException e) { e.printStackTrace(); } }
/** * 获取writer * * @return * @throws IOException */ protected static IndexWriter getIndexWriter() throws IOException { if (null != indexWriter) { return indexWriter; } else { // 防止并发 synchronized (IndexUtil.class) { // 初始化writer IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)); // 每次都重新创建 config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); indexWriter = new IndexWriter(directory, config); } return indexWriter; } }
public void createWriter(String indexPath){ /* The indexPath specifies where to create the index */ // I am can imagine that there are lots of ways to create indexers - // We could add in some parameters to customize its creation try { Directory dir = FSDirectory.open(Paths.get(indexPath)); System.out.println("Indexing to directory '" + indexPath + "'..."); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); writer = new IndexWriter(dir, iwc); } catch (IOException e){ e.printStackTrace(); System.exit(1); } }
public void initializeIndexBuilder() throws Exception { // Create a new index directory and writer to index a triples file. // Raise an error if an index already exists, so we don't accidentally overwrite it. String indexDir = getIndexDirectoryName(); if ((new File(indexDir)).isDirectory()) throw new IOException("Index directory already exists, remove it before indexing"); indexDirectory = FSDirectory.open(Paths.get(indexDir)); IndexWriterConfig iwc = new IndexWriterConfig(getIndexAnalyzer()); // we always create a new index from scratch: iwc.setOpenMode(OpenMode.CREATE); iwc.setCodec(new Lucene54Codec(Mode.BEST_SPEED)); // the default //iwc.setCodec(new Lucene54Codec(Mode.BEST_COMPRESSION)); // slower, but better compression indexWriter = new IndexWriter(indexDirectory, iwc); indexAnalyzer = getIndexAnalyzer(); if (INDEX_PREDICATES) printlnProg("Indexing individual predicates"); if (INDEX_TEXT) printlnProg("Indexing combined predicate text values"); if (INDEX_LANGUAGE) printlnProg("Indexing predicates for language(s): " + supportedLanguages); }
/** * Opens all the needed streams that the engine needs to work properly. * * @throws IndexException */ private void openStreams() throws IndexException { try { if (_nativeLocking) { _storage = FSDirectory.open(new File(INDEX_DIR), new NativeFSLockFactory(INDEX_DIR)); } else { _storage = FSDirectory.open(new File(INDEX_DIR)); } IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_32 ,ANALYZER); conf.setMaxBufferedDocs(_maxDocsBuffer); conf.setRAMBufferSizeMB(_maxRAMBufferSize); _iWriter = new IndexWriter(_storage, conf); } catch (IOException e) { closeAll(); throw new IndexException("Unable to initialize the index", e); } }