public void testSyncedFlush() throws IOException { try (Store store = createStore(); Engine engine = new InternalEngine(config(defaultSettings, store, createTempDir(), new LogByteSizeMergePolicy(), IndexRequest.UNSET_AUTO_GENERATED_TIMESTAMP, null))) { final String syncId = randomUnicodeOfCodepointLengthBetween(10, 20); ParsedDocument doc = testParsedDocument("1", "test", null, testDocumentWithTextField(), B_1, null); engine.index(indexForDoc(doc)); Engine.CommitId commitID = engine.flush(); assertThat(commitID, equalTo(new Engine.CommitId(store.readLastCommittedSegmentsInfo().getId()))); byte[] wrongBytes = Base64.getDecoder().decode(commitID.toString()); wrongBytes[0] = (byte) ~wrongBytes[0]; Engine.CommitId wrongId = new Engine.CommitId(wrongBytes); assertEquals("should fail to sync flush with wrong id (but no docs)", engine.syncFlush(syncId + "1", wrongId), Engine.SyncedFlushResult.COMMIT_MISMATCH); engine.index(indexForDoc(doc)); assertEquals("should fail to sync flush with right id but pending doc", engine.syncFlush(syncId + "2", commitID), Engine.SyncedFlushResult.PENDING_OPERATIONS); commitID = engine.flush(); assertEquals("should succeed to flush commit with right id and no pending doc", engine.syncFlush(syncId, commitID), Engine.SyncedFlushResult.SUCCESS); assertEquals(store.readLastCommittedSegmentsInfo().getUserData().get(Engine.SYNC_COMMIT_ID), syncId); assertEquals(engine.getLastCommittedSegmentInfos().getUserData().get(Engine.SYNC_COMMIT_ID), syncId); } }
private static IndexWriterConfig standardConfig() { IndexWriterConfig writerConfig = new IndexWriterConfig( LuceneDataSource.KEYWORD_ANALYZER ); writerConfig.setMaxBufferedDocs( 100000 ); // TODO figure out depending on environment? writerConfig.setIndexDeletionPolicy( new MultipleBackupDeletionPolicy() ); writerConfig.setUseCompoundFile( true ); // TODO: TieredMergePolicy & possibly SortingMergePolicy LogByteSizeMergePolicy mergePolicy = new LogByteSizeMergePolicy(); mergePolicy.setNoCFSRatio( 1.0 ); mergePolicy.setMinMergeMB( 0.1 ); mergePolicy.setMergeFactor( 2 ); writerConfig.setMergePolicy( mergePolicy ); return writerConfig; }
public IndexWriter getIndexWriter(String path, boolean create, Analyzer analyzer) throws IOException { // Everything in this method copied from LuceneUtils try { createDirRobust(path); final IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_32, analyzer); config.setOpenMode(create ? OpenMode.CREATE : OpenMode.APPEND); LogByteSizeMergePolicy mergePolicy = new LogByteSizeMergePolicy(); mergePolicy.setUseCompoundFile(true); config.setMergePolicy(mergePolicy); return new IndexWriter(getDirectory(path), config); } catch (final IOException e) { LOG.error("Problem with path " + path + ": " + e.getMessage(), e); throw new IOException("Problem with path " + path + ": " + e.getMessage(), e); } }
@Before public void setup() throws Exception { Version version = VersionUtils.randomVersionBetween(random(), Version.V_2_0_0, Version.V_2_3_0); // we need 2.x so that fielddata is allowed on string fields Settings settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version).build(); indexService = createIndex("test", settings); mapperService = indexService.mapperService(); indicesFieldDataCache = getInstanceFromNode(IndicesService.class).getIndicesFieldDataCache(); ifdService = indexService.fieldData(); // LogByteSizeMP to preserve doc ID order writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(new StandardAnalyzer()).setMergePolicy(new LogByteSizeMergePolicy())); }
public static IndexWriter getIndexWriter(String indexPath, boolean create) throws IOException { Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new SmartChineseAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); LogMergePolicy mergePolicy = new LogByteSizeMergePolicy(); mergePolicy.setMergeFactor(50); mergePolicy.setMaxMergeDocs(5000); if (create){ iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); } else { iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); } return new IndexWriter(dir, iwc); }
public static LogMergePolicy newLogMergePolicy(Random r) { LogMergePolicy logmp = r.nextBoolean() ? new LogDocMergePolicy() : new LogByteSizeMergePolicy(); logmp.setCalibrateSizeByDeletes(r.nextBoolean()); if (rarely(r)) { logmp.setMergeFactor(TestUtil.nextInt(r, 2, 9)); } else { logmp.setMergeFactor(TestUtil.nextInt(r, 10, 50)); } configureRandom(r, logmp); return logmp; }
public void testLogMergePolicyConfig() throws Exception { final Class<? extends LogMergePolicy> mpClass = random().nextBoolean() ? LogByteSizeMergePolicy.class : LogDocMergePolicy.class; System.setProperty("solr.test.log.merge.policy", mpClass.getName()); initCore("solrconfig-logmergepolicy.xml","schema-minimal.xml"); IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore().getLatestSchema()); // verify some props set to -1 get lucene internal defaults assertEquals(-1, solrConfig.indexConfig.maxBufferedDocs); assertEquals(IndexWriterConfig.DISABLE_AUTO_FLUSH, iwc.getMaxBufferedDocs()); assertEquals(-1, solrConfig.indexConfig.maxIndexingThreads); assertEquals(IndexWriterConfig.DEFAULT_MAX_THREAD_STATES, iwc.getMaxThreadStates()); assertEquals(-1, solrConfig.indexConfig.ramBufferSizeMB, 0.0D); assertEquals(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB, iwc.getRAMBufferSizeMB(), 0.0D); LogMergePolicy logMP = assertAndCast(mpClass, iwc.getMergePolicy()); // set by legacy <mergeFactor> setting assertEquals(11, logMP.getMergeFactor()); // set by legacy <maxMergeDocs> setting assertEquals(456, logMP.getMaxMergeDocs()); }
public LuceneIndexer(String indexName) throws IOException { this.indexName = indexName; luceneWriterService = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors(), new NamedThreadFactory(threadGroup, indexName + " Lucene writer")); luceneWriterFutureCheckerService = Executors.newFixedThreadPool(1, new NamedThreadFactory(threadGroup, indexName + " Lucene future checker")); setupRoot(); File indexDirectoryFile = new File(root.getPath() + "/" + indexName); System.out.println("Index: " + indexDirectoryFile); Directory indexDirectory = initDirectory(indexDirectoryFile); indexDirectory.clearLock("write.lock"); IndexWriterConfig config = new IndexWriterConfig(luceneVersion, new StandardAnalyzer(luceneVersion)); MergePolicy mergePolicy = new LogByteSizeMergePolicy(); config.setMergePolicy(mergePolicy); config.setSimilarity(new ShortTextSimilarity()); IndexWriter indexWriter = new IndexWriter(indexDirectory, config); trackingIndexWriter = new NRTManager.TrackingIndexWriter(indexWriter); boolean applyAllDeletes = false; searcherManager = new NRTManager(trackingIndexWriter, null, applyAllDeletes); // Refreshes searcher every 5 seconds when nobody is waiting, and up to 100 msec delay // when somebody is waiting: reopenThread = new NRTManagerReopenThread(searcherManager, 5.0, 0.1); this.startThread(); }
@Test public void testOpenIfChangedMergedSegment() throws Exception { // test openIfChanged() when all index segments were merged - used to be // a bug in ParentArray, caught by testOpenIfChangedManySegments - only // this test is not random Directory dir = newDirectory(); // hold onto IW to forceMerge // note how we don't close it, since DTW will close it. final IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMergePolicy(new LogByteSizeMergePolicy())); DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir) { @Override protected IndexWriter openIndexWriter(Directory directory, IndexWriterConfig config) throws IOException { return iw; } }; TaxonomyReader reader = new DirectoryTaxonomyReader(writer); assertEquals(1, reader.getSize()); assertEquals(1, reader.getParallelTaxonomyArrays().parents().length); // add category and call forceMerge -- this should flush IW and merge segments down to 1 // in ParentArray.initFromReader, this used to fail assuming there are no parents. writer.addCategory(new FacetLabel("1")); iw.forceMerge(1); // now calling openIfChanged should trip on the bug TaxonomyReader newtr = TaxonomyReader.openIfChanged(reader); assertNotNull(newtr); reader.close(); reader = newtr; assertEquals(2, reader.getSize()); assertEquals(2, reader.getParallelTaxonomyArrays().parents().length); reader.close(); writer.close(); dir.close(); }
@Test public void testOpenIfChangedNoChangesButSegmentMerges() throws Exception { // test openIfChanged() when the taxonomy hasn't really changed, but segments // were merged. The NRT reader will be reopened, and ParentArray used to assert // that the new reader contains more ordinals than were given from the old // TaxReader version Directory dir = newDirectory(); // hold onto IW to forceMerge // note how we don't close it, since DTW will close it. final IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMergePolicy(new LogByteSizeMergePolicy())); DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir) { @Override protected IndexWriter openIndexWriter(Directory directory, IndexWriterConfig config) throws IOException { return iw; } }; // add a category so that the following DTR open will cause a flush and // a new segment will be created writer.addCategory(new FacetLabel("a")); TaxonomyReader reader = new DirectoryTaxonomyReader(writer); assertEquals(2, reader.getSize()); assertEquals(2, reader.getParallelTaxonomyArrays().parents().length); // merge all the segments so that NRT reader thinks there's a change iw.forceMerge(1); // now calling openIfChanged should trip on the wrong assert in ParetArray's ctor TaxonomyReader newtr = TaxonomyReader.openIfChanged(reader); assertNotNull(newtr); reader.close(); reader = newtr; assertEquals(2, reader.getSize()); assertEquals(2, reader.getParallelTaxonomyArrays().parents().length); reader.close(); writer.close(); dir.close(); }
@Test public void testOpenIfChangedMergedSegment() throws Exception { // test openIfChanged() when all index segments were merged - used to be // a bug in ParentArray, caught by testOpenIfChangedManySegments - only // this test is not random Directory dir = newDirectory(); // hold onto IW to forceMerge // note how we don't close it, since DTW will close it. final IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMergePolicy(new LogByteSizeMergePolicy())); DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir) { @Override protected IndexWriter openIndexWriter(Directory directory, IndexWriterConfig config) throws IOException { return iw; } }; TaxonomyReader reader = new DirectoryTaxonomyReader(writer); assertEquals(1, reader.getSize()); assertEquals(1, reader.getParallelTaxonomyArrays().parents().length); // add category and call forceMerge -- this should flush IW and merge segments down to 1 // in ParentArray.initFromReader, this used to fail assuming there are no parents. writer.addCategory(new CategoryPath("1")); iw.forceMerge(1); // now calling openIfChanged should trip on the bug TaxonomyReader newtr = TaxonomyReader.openIfChanged(reader); assertNotNull(newtr); reader.close(); reader = newtr; assertEquals(2, reader.getSize()); assertEquals(2, reader.getParallelTaxonomyArrays().parents().length); reader.close(); writer.close(); dir.close(); }
@Test public void testOpenIfChangedNoChangesButSegmentMerges() throws Exception { // test openIfChanged() when the taxonomy hasn't really changed, but segments // were merged. The NRT reader will be reopened, and ParentArray used to assert // that the new reader contains more ordinals than were given from the old // TaxReader version Directory dir = newDirectory(); // hold onto IW to forceMerge // note how we don't close it, since DTW will close it. final IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMergePolicy(new LogByteSizeMergePolicy())); DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir) { @Override protected IndexWriter openIndexWriter(Directory directory, IndexWriterConfig config) throws IOException { return iw; } }; // add a category so that the following DTR open will cause a flush and // a new segment will be created writer.addCategory(new CategoryPath("a")); TaxonomyReader reader = new DirectoryTaxonomyReader(writer); assertEquals(2, reader.getSize()); assertEquals(2, reader.getParallelTaxonomyArrays().parents().length); // merge all the segments so that NRT reader thinks there's a change iw.forceMerge(1); // now calling openIfChanged should trip on the wrong assert in ParetArray's ctor TaxonomyReader newtr = TaxonomyReader.openIfChanged(reader); assertNotNull(newtr); reader.close(); reader = newtr; assertEquals(2, reader.getSize()); assertEquals(2, reader.getParallelTaxonomyArrays().parents().length); reader.close(); writer.close(); dir.close(); }
/** * Create the {@link IndexWriterConfig} that would be used for opening the internal index writer. * <br>Extensions can configure the {@link IndexWriter} as they see fit, * including setting a {@link org.apache.lucene.index.MergeScheduler merge-scheduler}, or * {@link org.apache.lucene.index.IndexDeletionPolicy deletion-policy}, different RAM size * etc.<br> * <br><b>NOTE:</b> internal docids of the configured index must not be altered. * For that, categories are never deleted from the taxonomy index. * In addition, merge policy in effect must not merge none adjacent segments. * * @see #openIndexWriter(Directory, IndexWriterConfig) * * @param openMode see {@link OpenMode} */ protected IndexWriterConfig createIndexWriterConfig(OpenMode openMode) { // TODO: should we use a more optimized Codec, e.g. Pulsing (or write custom)? // The taxonomy has a unique structure, where each term is associated with one document // :Post-Release-Update-Version.LUCENE_XY: // Make sure we use a MergePolicy which always merges adjacent segments and thus // keeps the doc IDs ordered as well (this is crucial for the taxonomy index). return new IndexWriterConfig(Version.LUCENE_4_10_0, null).setOpenMode(openMode).setMergePolicy( new LogByteSizeMergePolicy()); }
/** * Create the {@link IndexWriterConfig} that would be used for opening the internal index writer. * <br>Extensions can configure the {@link IndexWriter} as they see fit, * including setting a {@link org.apache.lucene.index.MergeScheduler merge-scheduler}, or * {@link org.apache.lucene.index.IndexDeletionPolicy deletion-policy}, different RAM size * etc.<br> * <br><b>NOTE:</b> internal docids of the configured index must not be altered. * For that, categories are never deleted from the taxonomy index. * In addition, merge policy in effect must not merge none adjacent segments. * * @see #openIndexWriter(Directory, IndexWriterConfig) * * @param openMode see {@link OpenMode} */ protected IndexWriterConfig createIndexWriterConfig(OpenMode openMode) { // TODO: should we use a more optimized Codec, e.g. Pulsing (or write custom)? // The taxonomy has a unique structure, where each term is associated with one document // Make sure we use a MergePolicy which always merges adjacent segments and thus // keeps the doc IDs ordered as well (this is crucial for the taxonomy index). return new IndexWriterConfig(Version.LUCENE_42, null).setOpenMode(openMode).setMergePolicy( new LogByteSizeMergePolicy()); }
/** * Create the {@link IndexWriterConfig} that would be used for opening the internal index writer. * <br>Extensions can configure the {@link IndexWriter} as they see fit, * including setting a {@link org.apache.lucene.index.MergeScheduler merge-scheduler}, or * {@link org.apache.lucene.index.IndexDeletionPolicy deletion-policy}, different RAM size * etc.<br> * <br><b>NOTE:</b> internal docids of the configured index must not be altered. * For that, categories are never deleted from the taxonomy index. * In addition, merge policy in effect must not merge none adjacent segments. * * @see #openIndexWriter(Directory, IndexWriterConfig) * * @param openMode see {@link OpenMode} */ protected IndexWriterConfig createIndexWriterConfig(OpenMode openMode) { // TODO: should we use a more optimized Codec, e.g. Pulsing (or write custom)? // The taxonomy has a unique structure, where each term is associated with one document // :Post-Release-Update-Version.LUCENE_XY: // Make sure we use a MergePolicy which always merges adjacent segments and thus // keeps the doc IDs ordered as well (this is crucial for the taxonomy index). return new IndexWriterConfig(Version.LUCENE_47, null).setOpenMode(openMode).setMergePolicy( new LogByteSizeMergePolicy()); }
/** * Create the {@link IndexWriterConfig} that would be used for opening the internal index writer. * <br>Extensions can configure the {@link IndexWriter} as they see fit, * including setting a {@link org.apache.lucene.index.MergeScheduler merge-scheduler}, or * {@link org.apache.lucene.index.IndexDeletionPolicy deletion-policy}, different RAM size * etc.<br> * <br><b>NOTE:</b> internal docids of the configured index must not be altered. * For that, categories are never deleted from the taxonomy index. * In addition, merge policy in effect must not merge none adjacent segments. * * @see #openIndexWriter(Directory, IndexWriterConfig) * * @param openMode see {@link OpenMode} */ protected IndexWriterConfig createIndexWriterConfig(OpenMode openMode) { // TODO: should we use a more optimized Codec, e.g. Pulsing (or write custom)? // The taxonomy has a unique structure, where each term is associated with one document // Make sure we use a MergePolicy which always merges adjacent segments and thus // keeps the doc IDs ordered as well (this is crucial for the taxonomy index). return new IndexWriterConfig(Version.LUCENE_43, null).setOpenMode(openMode).setMergePolicy( new LogByteSizeMergePolicy()); }