@Override public MergePolicy getInstance(Map<String, String> params) throws IOException { String field = params.get(SORT_FIELD); SortField.Type sortFieldType = SortField.Type.DOC; if (params.containsKey(SORT_FIELD_TYPE)) { sortFieldType = SortField.Type.valueOf(params.get(SORT_FIELD_TYPE).toUpperCase()); } if (sortFieldType == SortField.Type.DOC) { throw new IOException( "Relying on internal lucene DocIDs is not guaranteed to work, this is only an implementation detail."); } boolean desc = true; if (params.containsKey(SORT_DESC)) { try { desc = Boolean.valueOf(params.get(SORT_DESC)); } catch (Exception e) { desc = true; } } SortField sortField = new SortField(field, sortFieldType, desc); Sort sort = new Sort(sortField); return new SortingMergePolicyDecorator(new TieredMergePolicy(), sort); }
static MergePolicy newSortingMergePolicy(Sort sort) { // usually create a MP with a low merge factor so that many merges happen MergePolicy mp; int thingToDo = random().nextInt(3); if (thingToDo == 0) { TieredMergePolicy tmp = newTieredMergePolicy(random()); final int numSegs = TestUtil.nextInt(random(), 3, 5); tmp.setSegmentsPerTier(numSegs); tmp.setMaxMergeAtOnce(TestUtil.nextInt(random(), 2, numSegs)); mp = tmp; } else if (thingToDo == 1) { LogMergePolicy lmp = newLogMergePolicy(random()); lmp.setMergeFactor(TestUtil.nextInt(random(), 3, 5)); mp = lmp; } else { // just a regular random one from LTC (could be alcoholic etc) mp = newMergePolicy(); } // wrap it with a sorting mp return new SortingMergePolicy(mp, sort); }
/** just tries to configure things to keep the open file * count lowish */ public static void reduceOpenFiles(IndexWriter w) { // keep number of open files lowish MergePolicy mp = w.getConfig().getMergePolicy(); if (mp instanceof LogMergePolicy) { LogMergePolicy lmp = (LogMergePolicy) mp; lmp.setMergeFactor(Math.min(5, lmp.getMergeFactor())); lmp.setNoCFSRatio(1.0); } else if (mp instanceof TieredMergePolicy) { TieredMergePolicy tmp = (TieredMergePolicy) mp; tmp.setMaxMergeAtOnce(Math.min(5, tmp.getMaxMergeAtOnce())); tmp.setSegmentsPerTier(Math.min(5, tmp.getSegmentsPerTier())); tmp.setNoCFSRatio(1.0); } MergeScheduler ms = w.getConfig().getMergeScheduler(); if (ms instanceof ConcurrentMergeScheduler) { // wtf... shouldnt it be even lower since its 1 by default?!?! ((ConcurrentMergeScheduler) ms).setMaxMergesAndThreads(3, 2); } }
public static TieredMergePolicy newTieredMergePolicy(Random r) { TieredMergePolicy tmp = new TieredMergePolicy(); if (rarely(r)) { tmp.setMaxMergeAtOnce(TestUtil.nextInt(r, 2, 9)); tmp.setMaxMergeAtOnceExplicit(TestUtil.nextInt(r, 2, 9)); } else { tmp.setMaxMergeAtOnce(TestUtil.nextInt(r, 10, 50)); tmp.setMaxMergeAtOnceExplicit(TestUtil.nextInt(r, 10, 50)); } if (rarely(r)) { tmp.setMaxMergedSegmentMB(0.2 + r.nextDouble() * 2.0); } else { tmp.setMaxMergedSegmentMB(r.nextDouble() * 100); } tmp.setFloorSegmentMB(0.2 + r.nextDouble() * 2.0); tmp.setForceMergeDeletesPctAllowed(0.0 + r.nextDouble() * 30.0); if (rarely(r)) { tmp.setSegmentsPerTier(TestUtil.nextInt(r, 2, 20)); } else { tmp.setSegmentsPerTier(TestUtil.nextInt(r, 10, 50)); } configureRandom(r, tmp); tmp.setReclaimDeletesWeight(r.nextDouble()*4); return tmp; }
@Test public void testTieredMPSolrIndexConfigCreation() throws Exception { SolrConfig solrConfig = new SolrConfig("solr" + File.separator + "collection1", "solrconfig-tieredmergepolicy.xml", null); SolrIndexConfig solrIndexConfig = new SolrIndexConfig(solrConfig, null, null); assertNotNull(solrIndexConfig); IndexSchema indexSchema = IndexSchemaFactory.buildIndexSchema("schema.xml", solrConfig); IndexWriterConfig iwc = solrIndexConfig.toIndexWriterConfig(indexSchema); assertNotNull("null mp", iwc.getMergePolicy()); assertTrue("mp is not TMP", iwc.getMergePolicy() instanceof TieredMergePolicy); TieredMergePolicy mp = (TieredMergePolicy) iwc.getMergePolicy(); assertEquals("mp.maxMergeAtOnceExplicit", 19, mp.getMaxMergeAtOnceExplicit()); assertEquals("mp.segmentsPerTier",9,(int)mp.getSegmentsPerTier()); assertNotNull("null ms", iwc.getMergeScheduler()); assertTrue("ms is not CMS", iwc.getMergeScheduler() instanceof ConcurrentMergeScheduler); ConcurrentMergeScheduler ms = (ConcurrentMergeScheduler) iwc.getMergeScheduler(); assertEquals("ms.maxMergeCount", 987, ms.getMaxMergeCount()); assertEquals("ms.maxThreadCount", 42, ms.getMaxThreadCount()); }
@Test public void testDefaults() throws Exception { SolrConfig sc = new SolrConfig(new SolrResourceLoader("solr/collection1"), "solrconfig-defaults.xml", null); SolrIndexConfig sic = sc.indexConfig; assertEquals("default ramBufferSizeMB", 100.0D, sic.ramBufferSizeMB, 0.0D); assertEquals("default LockType", SolrIndexConfig.LOCK_TYPE_NATIVE, sic.lockType); assertEquals("default useCompoundFile", false, sic.useCompoundFile); IndexSchema indexSchema = IndexSchemaFactory.buildIndexSchema("schema.xml", solrConfig); IndexWriterConfig iwc = sic.toIndexWriterConfig(indexSchema); assertNotNull("null mp", iwc.getMergePolicy()); assertTrue("mp is not TMP", iwc.getMergePolicy() instanceof TieredMergePolicy); assertNotNull("null ms", iwc.getMergeScheduler()); assertTrue("ms is not CMS", iwc.getMergeScheduler() instanceof ConcurrentMergeScheduler); }
public void testLegacyMergePolicyConfig() throws Exception { final boolean expectCFS = Boolean.parseBoolean(System.getProperty("useCompoundFile")); initCore("solrconfig-mergepolicy-legacy.xml","schema-minimal.xml"); IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore().getLatestSchema()); assertEquals(expectCFS, iwc.getUseCompoundFile()); assertEquals("termIndexInteval", 256, iwc.getTermIndexInterval()); TieredMergePolicy tieredMP = assertAndCast(TieredMergePolicy.class, iwc.getMergePolicy()); assertEquals(7, tieredMP.getMaxMergeAtOnce()); assertEquals(7.0D, tieredMP.getSegmentsPerTier(), 0.0D); assertEquals(expectCFS ? 1.0D : 0.0D, tieredMP.getNoCFSRatio(), 0.0D); assertCommitSomeNewDocs(); assertCompoundSegments(h.getCore(), expectCFS); }
/** just tries to configure things to keep the open file * count lowish */ public static void reduceOpenFiles(IndexWriter w) { // keep number of open files lowish MergePolicy mp = w.getConfig().getMergePolicy(); if (mp instanceof LogMergePolicy) { LogMergePolicy lmp = (LogMergePolicy) mp; lmp.setMergeFactor(Math.min(5, lmp.getMergeFactor())); lmp.setUseCompoundFile(true); } else if (mp instanceof TieredMergePolicy) { TieredMergePolicy tmp = (TieredMergePolicy) mp; tmp.setMaxMergeAtOnce(Math.min(5, tmp.getMaxMergeAtOnce())); tmp.setSegmentsPerTier(Math.min(5, tmp.getSegmentsPerTier())); tmp.setUseCompoundFile(true); } MergeScheduler ms = w.getConfig().getMergeScheduler(); if (ms instanceof ConcurrentMergeScheduler) { ((ConcurrentMergeScheduler) ms).setMaxThreadCount(2); ((ConcurrentMergeScheduler) ms).setMaxMergeCount(3); } }
public void testTieredMergePolicyConfig() throws Exception { IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore().getSchema()); MergePolicy mp = iwc.getMergePolicy(); assertTrue(mp instanceof TieredMergePolicy); TieredMergePolicy tieredMP = (TieredMergePolicy) mp; // mp-specific setter assertEquals(19, tieredMP.getMaxMergeAtOnceExplicit()); // make sure we apply compoundFile and mergeFactor assertEquals(false, tieredMP.getUseCompoundFile()); assertEquals(7, tieredMP.getMaxMergeAtOnce()); // make sure we overrode segmentsPerTier (split from maxMergeAtOnce out of mergeFactor) assertEquals(9D, tieredMP.getSegmentsPerTier(), 0.001); // make sure we overrode noCFSRatio (useless because we disabled useCompoundFile, // but just to make sure it works) assertEquals(1.0D, tieredMP.getNoCFSRatio(), 0.001); }
private static void createShard(Configuration configuration, int i, Path path, int totalShardCount) throws IOException { HdfsDirectory hdfsDirectory = new HdfsDirectory(configuration, path); IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer()); TieredMergePolicy mergePolicy = (TieredMergePolicy) conf.getMergePolicy(); mergePolicy.setUseCompoundFile(false); IndexWriter indexWriter = new IndexWriter(hdfsDirectory, conf); Partitioner<IntWritable, IntWritable> partitioner = new HashPartitioner<IntWritable, IntWritable>(); int partition = partitioner.getPartition(new IntWritable(i), null, totalShardCount); assertEquals(i, partition); Document doc = getDoc(i); indexWriter.addDocument(doc); indexWriter.close(); }
static MergePolicy newSortingMergePolicy(Sorter sorter) { // create a MP with a low merge factor so that many merges happen MergePolicy mp; if (random().nextBoolean()) { TieredMergePolicy tmp = newTieredMergePolicy(random()); final int numSegs = _TestUtil.nextInt(random(), 3, 5); tmp.setSegmentsPerTier(numSegs); tmp.setMaxMergeAtOnce(_TestUtil.nextInt(random(), 2, numSegs)); mp = tmp; } else { LogMergePolicy lmp = newLogMergePolicy(random()); lmp.setMergeFactor(_TestUtil.nextInt(random(), 3, 5)); mp = lmp; } // wrap it with a sorting mp return new SortingMergePolicy(mp, sorter); }
public void testTieredMergePolicySettingsUpdate() throws IOException { IndexSettings indexSettings = indexSettings(Settings.EMPTY); assertEquals(((TieredMergePolicy) indexSettings.getMergePolicy()).getForceMergeDeletesPctAllowed(), MergePolicyConfig.DEFAULT_EXPUNGE_DELETES_ALLOWED, 0.0d); indexSettings.updateIndexMetaData(newIndexMeta("index", Settings.builder().put(MergePolicyConfig.INDEX_MERGE_POLICY_EXPUNGE_DELETES_ALLOWED_SETTING.getKey(), MergePolicyConfig.DEFAULT_EXPUNGE_DELETES_ALLOWED + 1.0d).build())); assertEquals(((TieredMergePolicy) indexSettings.getMergePolicy()).getForceMergeDeletesPctAllowed(), MergePolicyConfig.DEFAULT_EXPUNGE_DELETES_ALLOWED + 1.0d, 0.0d); assertEquals(((TieredMergePolicy) indexSettings.getMergePolicy()).getFloorSegmentMB(), MergePolicyConfig.DEFAULT_FLOOR_SEGMENT.getMbFrac(), 0); indexSettings.updateIndexMetaData(newIndexMeta("index", Settings.builder().put(MergePolicyConfig.INDEX_MERGE_POLICY_FLOOR_SEGMENT_SETTING.getKey(), new ByteSizeValue(MergePolicyConfig.DEFAULT_FLOOR_SEGMENT.getMb() + 1, ByteSizeUnit.MB)).build())); assertEquals(((TieredMergePolicy) indexSettings.getMergePolicy()).getFloorSegmentMB(), new ByteSizeValue(MergePolicyConfig.DEFAULT_FLOOR_SEGMENT.getMb() + 1, ByteSizeUnit.MB).getMbFrac(), 0.001); assertEquals(((TieredMergePolicy) indexSettings.getMergePolicy()).getMaxMergeAtOnce(), MergePolicyConfig.DEFAULT_MAX_MERGE_AT_ONCE); indexSettings.updateIndexMetaData(newIndexMeta("index", Settings.builder().put(MergePolicyConfig.INDEX_MERGE_POLICY_MAX_MERGE_AT_ONCE_SETTING.getKey(), MergePolicyConfig.DEFAULT_MAX_MERGE_AT_ONCE - 1).build())); assertEquals(((TieredMergePolicy) indexSettings.getMergePolicy()).getMaxMergeAtOnce(), MergePolicyConfig.DEFAULT_MAX_MERGE_AT_ONCE - 1); assertEquals(((TieredMergePolicy) indexSettings.getMergePolicy()).getMaxMergeAtOnceExplicit(), MergePolicyConfig.DEFAULT_MAX_MERGE_AT_ONCE_EXPLICIT); indexSettings.updateIndexMetaData(newIndexMeta("index", Settings.builder().put(MergePolicyConfig.INDEX_MERGE_POLICY_MAX_MERGE_AT_ONCE_EXPLICIT_SETTING.getKey(), MergePolicyConfig.DEFAULT_MAX_MERGE_AT_ONCE_EXPLICIT - 1).build())); assertEquals(((TieredMergePolicy) indexSettings.getMergePolicy()).getMaxMergeAtOnceExplicit(), MergePolicyConfig.DEFAULT_MAX_MERGE_AT_ONCE_EXPLICIT-1); assertEquals(((TieredMergePolicy) indexSettings.getMergePolicy()).getMaxMergedSegmentMB(), MergePolicyConfig.DEFAULT_MAX_MERGED_SEGMENT.getMbFrac(), 0.0001); indexSettings.updateIndexMetaData(newIndexMeta("index", Settings.builder().put(MergePolicyConfig.INDEX_MERGE_POLICY_MAX_MERGED_SEGMENT_SETTING.getKey(), new ByteSizeValue(MergePolicyConfig.DEFAULT_MAX_MERGED_SEGMENT.getBytes() + 1)).build())); assertEquals(((TieredMergePolicy) indexSettings.getMergePolicy()).getMaxMergedSegmentMB(), new ByteSizeValue(MergePolicyConfig.DEFAULT_MAX_MERGED_SEGMENT.getBytes() + 1).getMbFrac(), 0.0001); assertEquals(((TieredMergePolicy) indexSettings.getMergePolicy()).getReclaimDeletesWeight(), MergePolicyConfig.DEFAULT_RECLAIM_DELETES_WEIGHT, 0); indexSettings.updateIndexMetaData(newIndexMeta("index", Settings.builder().put(MergePolicyConfig.INDEX_MERGE_POLICY_RECLAIM_DELETES_WEIGHT_SETTING.getKey(), MergePolicyConfig.DEFAULT_RECLAIM_DELETES_WEIGHT + 1).build())); assertEquals(((TieredMergePolicy) indexSettings.getMergePolicy()).getReclaimDeletesWeight(), MergePolicyConfig.DEFAULT_RECLAIM_DELETES_WEIGHT + 1, 0); assertEquals(((TieredMergePolicy) indexSettings.getMergePolicy()).getSegmentsPerTier(), MergePolicyConfig.DEFAULT_SEGMENTS_PER_TIER, 0); indexSettings.updateIndexMetaData(newIndexMeta("index", Settings.builder().put(MergePolicyConfig.INDEX_MERGE_POLICY_SEGMENTS_PER_TIER_SETTING.getKey(), MergePolicyConfig.DEFAULT_SEGMENTS_PER_TIER + 1).build())); assertEquals(((TieredMergePolicy) indexSettings.getMergePolicy()).getSegmentsPerTier(), MergePolicyConfig.DEFAULT_SEGMENTS_PER_TIER + 1, 0); indexSettings.updateIndexMetaData(newIndexMeta("index", EMPTY_SETTINGS)); // see if defaults are restored assertEquals(((TieredMergePolicy) indexSettings.getMergePolicy()).getForceMergeDeletesPctAllowed(), MergePolicyConfig.DEFAULT_EXPUNGE_DELETES_ALLOWED, 0.0d); assertEquals(((TieredMergePolicy) indexSettings.getMergePolicy()).getFloorSegmentMB(), new ByteSizeValue(MergePolicyConfig.DEFAULT_FLOOR_SEGMENT.getMb(), ByteSizeUnit.MB).getMbFrac(), 0.00); assertEquals(((TieredMergePolicy) indexSettings.getMergePolicy()).getMaxMergeAtOnce(), MergePolicyConfig.DEFAULT_MAX_MERGE_AT_ONCE); assertEquals(((TieredMergePolicy) indexSettings.getMergePolicy()).getMaxMergeAtOnceExplicit(), MergePolicyConfig.DEFAULT_MAX_MERGE_AT_ONCE_EXPLICIT); assertEquals(((TieredMergePolicy) indexSettings.getMergePolicy()).getMaxMergedSegmentMB(), new ByteSizeValue(MergePolicyConfig.DEFAULT_MAX_MERGED_SEGMENT.getBytes() + 1).getMbFrac(), 0.0001); assertEquals(((TieredMergePolicy) indexSettings.getMergePolicy()).getReclaimDeletesWeight(), MergePolicyConfig.DEFAULT_RECLAIM_DELETES_WEIGHT, 0); assertEquals(((TieredMergePolicy) indexSettings.getMergePolicy()).getSegmentsPerTier(), MergePolicyConfig.DEFAULT_SEGMENTS_PER_TIER, 0); }
public MergePolicyConfig(ESLogger logger, Settings indexSettings) { this.logger = logger; this.noCFSRatio = parseNoCFSRatio(indexSettings.get(INDEX_COMPOUND_FORMAT, Double.toString(TieredMergePolicy.DEFAULT_NO_CFS_RATIO))); double forceMergeDeletesPctAllowed = indexSettings.getAsDouble("index.merge.policy.expunge_deletes_allowed", DEFAULT_EXPUNGE_DELETES_ALLOWED); // percentage ByteSizeValue floorSegment = indexSettings.getAsBytesSize("index.merge.policy.floor_segment", DEFAULT_FLOOR_SEGMENT); int maxMergeAtOnce = indexSettings.getAsInt("index.merge.policy.max_merge_at_once", DEFAULT_MAX_MERGE_AT_ONCE); int maxMergeAtOnceExplicit = indexSettings.getAsInt("index.merge.policy.max_merge_at_once_explicit", DEFAULT_MAX_MERGE_AT_ONCE_EXPLICIT); // TODO is this really a good default number for max_merge_segment, what happens for large indices, won't they end up with many segments? ByteSizeValue maxMergedSegment = indexSettings.getAsBytesSize("index.merge.policy.max_merged_segment", DEFAULT_MAX_MERGED_SEGMENT); double segmentsPerTier = indexSettings.getAsDouble("index.merge.policy.segments_per_tier", DEFAULT_SEGMENTS_PER_TIER); double reclaimDeletesWeight = indexSettings.getAsDouble("index.merge.policy.reclaim_deletes_weight", DEFAULT_RECLAIM_DELETES_WEIGHT); this.mergesEnabled = indexSettings.getAsBoolean(INDEX_MERGE_ENABLED, true); if (mergesEnabled == false) { logger.warn("[{}] is set to false, this should only be used in tests and can cause serious problems in production environments", INDEX_MERGE_ENABLED); } maxMergeAtOnce = adjustMaxMergeAtOnceIfNeeded(maxMergeAtOnce, segmentsPerTier); mergePolicy.setNoCFSRatio(noCFSRatio); mergePolicy.setForceMergeDeletesPctAllowed(forceMergeDeletesPctAllowed); mergePolicy.setFloorSegmentMB(floorSegment.mbFrac()); mergePolicy.setMaxMergeAtOnce(maxMergeAtOnce); mergePolicy.setMaxMergeAtOnceExplicit(maxMergeAtOnceExplicit); mergePolicy.setMaxMergedSegmentMB(maxMergedSegment.mbFrac()); mergePolicy.setSegmentsPerTier(segmentsPerTier); mergePolicy.setReclaimDeletesWeight(reclaimDeletesWeight); logger.debug("using [tiered] merge mergePolicy with expunge_deletes_allowed[{}], floor_segment[{}], max_merge_at_once[{}], max_merge_at_once_explicit[{}], max_merged_segment[{}], segments_per_tier[{}], reclaim_deletes_weight[{}]", forceMergeDeletesPctAllowed, floorSegment, maxMergeAtOnce, maxMergeAtOnceExplicit, maxMergedSegment, segmentsPerTier, reclaimDeletesWeight); }
@Override public MergePolicy getInstance(Map<String, String> config) throws IOException { TieredMergePolicy mergePolicy = new TieredMergePolicy(); if (config.containsKey(SEGMENTS_PER_TIER)) { mergePolicy.setSegmentsPerTier(Double.valueOf(config.get(SEGMENTS_PER_TIER))); } if (config.containsKey(MAX_MERGE_AT_ONCE)) { mergePolicy.setMaxMergeAtOnce(Integer.valueOf(config.get(MAX_MERGE_AT_ONCE))); } return mergePolicy; }
@BeforeClass public static void beforeClass() throws Exception { savedFactory = System.getProperty("solr.DirectoryFactory"); System.setProperty("solr.directoryFactory", "org.apache.solr.core.MockFSDirectoryFactory"); System.setProperty("enable.update.log", "false"); // schema12 doesn't support _version_ System.setProperty("solr.tests.mergePolicy", TieredMergePolicy.class.getName()); initCore("solrconfig.xml", "schema12.xml"); }
public void testDefaultMergePolicyConfig() throws Exception { initCore("solrconfig-mergepolicy-defaults.xml","schema-minimal.xml"); IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore().getLatestSchema()); assertEquals(false, iwc.getUseCompoundFile()); TieredMergePolicy tieredMP = assertAndCast(TieredMergePolicy.class, iwc.getMergePolicy()); assertEquals(0.0D, tieredMP.getNoCFSRatio(), 0.0D); assertCommitSomeNewDocs(); assertCompoundSegments(h.getCore(), false); }
public void testTieredMergePolicyConfig() throws Exception { final boolean expectCFS = Boolean.parseBoolean(System.getProperty("useCompoundFile")); initCore("solrconfig-tieredmergepolicy.xml","schema-minimal.xml"); IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore().getLatestSchema()); assertEquals(expectCFS, iwc.getUseCompoundFile()); TieredMergePolicy tieredMP = assertAndCast(TieredMergePolicy.class, iwc.getMergePolicy()); // set by legacy <mergeFactor> setting assertEquals(7, tieredMP.getMaxMergeAtOnce()); // mp-specific setters assertEquals(19, tieredMP.getMaxMergeAtOnceExplicit()); assertEquals(0.1D, tieredMP.getNoCFSRatio(), 0.0D); // make sure we overrode segmentsPerTier // (split from maxMergeAtOnce out of mergeFactor) assertEquals(9D, tieredMP.getSegmentsPerTier(), 0.001); assertCommitSomeNewDocs(); // even though we have a single segment (which is 100% of the size of // the index which is higher then our 0.6D threashold) the // compound ratio doesn't matter because the segment was never merged assertCompoundSegments(h.getCore(), expectCFS); assertCommitSomeNewDocs(); assertNumSegments(h.getCore(), 2); assertCompoundSegments(h.getCore(), expectCFS); assertU(optimize()); assertNumSegments(h.getCore(), 1); // we've now forced a merge, and the MP ratio should be in play assertCompoundSegments(h.getCore(), false); }
public static void setUseCompoundFile(MergePolicy mp, boolean v) { if (mp instanceof TieredMergePolicy) { ((TieredMergePolicy) mp).setUseCompoundFile(v); } else if (mp instanceof LogMergePolicy) { ((LogMergePolicy) mp).setUseCompoundFile(v); } else { throw new IllegalArgumentException("cannot set compound file for MergePolicy " + mp); } }
@Test public void testLucene23Upgrades() throws Exception { double bufferSize = solrConfig.indexConfig.ramBufferSizeMB; assertTrue(bufferSize + " does not equal: " + 100, bufferSize == 100); String mergePolicy = solrConfig.indexConfig.mergePolicyInfo.className; assertEquals(TieredMergePolicy.class.getName(), mergePolicy); String mergeSched = solrConfig.indexConfig.mergeSchedulerInfo.className; assertTrue(mergeSched + " is not equal to " + SolrIndexConfig.DEFAULT_MERGE_SCHEDULER_CLASSNAME, mergeSched.equals(SolrIndexConfig.DEFAULT_MERGE_SCHEDULER_CLASSNAME) == true); }
private void setupWriter(Configuration configuration) throws IOException { TableDescriptor tableDescriptor = new TableDescriptor(); tableDescriptor.setName("test-table"); String uuid = UUID.randomUUID().toString(); tableDescriptor.setTableUri(new Path(_base, "table-table").toUri().toString()); tableDescriptor.setShardCount(2); TableContext tableContext = TableContext.create(tableDescriptor); ShardContext shardContext = ShardContext.create(tableContext, "shard-00000000"); Path tablePath = new Path(_base, "table-table"); _shardPath = new Path(tablePath, "shard-00000000"); String indexDirName = "index_" + uuid; _path = new Path(_shardPath, indexDirName + ".commit"); _fileSystem.mkdirs(_path); _badRowIdsPath = new Path(_shardPath, indexDirName + ".badrowids"); _badIndexPath = new Path(_shardPath, indexDirName + ".badindex"); _inUsePath = new Path(_shardPath, indexDirName + ".inuse"); Directory commitDirectory = new HdfsDirectory(configuration, _path); _mainDirectory = new HdfsDirectory(configuration, _shardPath); _fieldManager = tableContext.getFieldManager(); Analyzer analyzerForIndex = _fieldManager.getAnalyzerForIndex(); IndexWriterConfig conf = new IndexWriterConfig(LUCENE_VERSION, analyzerForIndex); // conf.setMergePolicy(NoMergePolicy.NO_COMPOUND_FILES); TieredMergePolicy mergePolicy = (TieredMergePolicy) conf.getMergePolicy(); mergePolicy.setUseCompoundFile(false); _commitWriter = new IndexWriter(commitDirectory, conf.clone()); // Make sure there's an empty index... new IndexWriter(_mainDirectory, conf.clone()).close(); _mainWriter = new IndexWriter(_mainDirectory, conf.clone()); BufferStore.initNewBuffer(128, 128 * 128); _indexImporter = new IndexImporter(_timer, getBlurIndex(shardContext, _mainDirectory), shardContext, TimeUnit.MINUTES, 10, 10, null, _mainDirectory); }
public JsonObject asJson() { JsonArrayBuilder strategiesJsonBuilder = Json.createArrayBuilder(); for (ClusterStrategy strategy : this.clusterConfig.strategies) { strategiesJsonBuilder.add(Json.createObjectBuilder() .add("clusteringEps", strategy.clusteringEps) .add("clusteringMinPoints", strategy.clusteringMinPoints)); } JsonObject json = Json.createObjectBuilder() .add("similarity", similarity.toString()) .add("mergePolicy", this.mergePolicy instanceof TieredMergePolicy ? Json.createObjectBuilder() .add("type", "TieredMergePolicy") .add("maxMergeAtOnce", ((TieredMergePolicy) this.mergePolicy).getMaxMergeAtOnce()) .add("segmentsPerTier", ((TieredMergePolicy) this.mergePolicy).getSegmentsPerTier()) : Json.createObjectBuilder() .add("type", "LogDocMergePolicy") .add("maxMergeDocs", ((LogMergePolicy) this.mergePolicy).getMaxMergeDocs()) .add("mergeFactor", ((LogMergePolicy) this.mergePolicy).getMergeFactor())) .add("lruTaxonomyWriterCacheSize", lruTaxonomyWriterCacheSize) .add("numberOfConcurrentTasks", numberOfConcurrentTasks) .add("commitCount", commitCount) .add("commitTimeout", commitTimeout) .add("cacheFacetOrdinals", this.cacheFacetOrdinals) .add("clustering", Json.createObjectBuilder() .add("clusterMoreRecords", clusterConfig.clusterMoreRecords) .add("strategies", strategiesJsonBuilder)) .build(); return json; }
/** * Builds a new {@link FSIndex}. * * @param name * the index name * @param mbeanName * the JMX MBean object name * @param path * the directory path * @param analyzer * the index writer analyzer * @param refresh * the index reader refresh frequency in seconds * @param ramBufferMB * the index writer RAM buffer size in MB * @param maxMergeMB * the directory max merge size in MB * @param maxCachedMB * the directory max cache size in MB * @param refreshTask * action to be done during refresh */ public void init(String name, String mbeanName, Path path, Analyzer analyzer, double refresh, int ramBufferMB, int maxMergeMB, int maxCachedMB, Runnable refreshTask) { try { this.path = path; this.name = name; // Open or create directory FSDirectory fsDirectory = FSDirectory.open(path); this.directory = new NRTCachingDirectory(fsDirectory, maxMergeMB, maxCachedMB); // Setup index writer IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); indexWriterConfig.setRAMBufferSizeMB(ramBufferMB); indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); indexWriterConfig.setUseCompoundFile(true); indexWriterConfig.setMergePolicy(new TieredMergePolicy()); this.indexWriter = new IndexWriter(this.directory, indexWriterConfig); // Setup NRT search SearcherFactory searcherFactory = new SearcherFactory() { @Override public IndexSearcher newSearcher(IndexReader reader, IndexReader previousReader) { if (refreshTask != null) { refreshTask.run(); } IndexSearcher searcher = new IndexSearcher(reader); searcher.setSimilarity(new NoIDFSimilarity()); return searcher; } }; TrackingIndexWriter trackingWriter = new TrackingIndexWriter(this.indexWriter); this.searcherManager = new SearcherManager(this.indexWriter, true, searcherFactory); this.searcherReopener = new ControlledRealTimeReopenThread<>(trackingWriter, this.searcherManager, refresh, refresh); this.searcherReopener.start(); // Register JMX MBean // mbean = new ObjectName(mbeanName); // ManagementFactory.getPlatformMBeanServer().registerMBean(service, // this.mbean); } catch (Exception e) { throw new FhirIndexException(e, "Error while creating index %s", name); } }
public static TieredMergePolicy newTieredMergePolicy() { return newTieredMergePolicy(random()); }
@Test public void testMulipleCommitsAndReopens() throws IOException { IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer()); conf.setMergeScheduler(new SerialMergeScheduler()); TieredMergePolicy mergePolicy = (TieredMergePolicy) conf.getMergePolicy(); mergePolicy.setUseCompoundFile(false); Set<String> fileSet = new TreeSet<String>(); long seed = new Random().nextLong(); System.out.println("Seed:" + seed); Random random = new Random(seed); int docCount = 0; int passes = 10; byte[] segmentsGenContents = null; for (int run = 0; run < passes; run++) { final FastHdfsKeyValueDirectory directory = new FastHdfsKeyValueDirectory(false, _timer, _configuration, new Path(_path, "test_multiple_commits_reopens")); if (segmentsGenContents != null) { byte[] segmentsGenContentsCurrent = readSegmentsGen(directory); assertTrue(Arrays.equals(segmentsGenContents, segmentsGenContentsCurrent)); } assertFiles(fileSet, run, -1, directory); assertEquals(docCount, getDocumentCount(directory)); IndexWriter writer = new IndexWriter(directory, conf.clone()); int numberOfCommits = random.nextInt(100); for (int i = 0; i < numberOfCommits; i++) { assertFiles(fileSet, run, i, directory); addDocuments(writer, random.nextInt(100)); // Before Commit writer.commit(); // After Commit // Set files after commit { fileSet.clear(); List<IndexCommit> listCommits = DirectoryReader.listCommits(directory); assertEquals(1, listCommits.size()); IndexCommit indexCommit = listCommits.get(0); fileSet.addAll(indexCommit.getFileNames()); } segmentsGenContents = readSegmentsGen(directory); } docCount = getDocumentCount(directory); } }
public GenericBlurRecordWriter(Configuration configuration, int attemptId, String tmpDirName) throws IOException { _configuration = configuration; _documentBufferStrategy = BlurOutputFormat.getDocumentBufferStrategy(_configuration); _indexLocally = BlurOutputFormat.isIndexLocally(_configuration); _optimizeInFlight = BlurOutputFormat.isOptimizeInFlight(_configuration); TableDescriptor tableDescriptor = BlurOutputFormat.getTableDescriptor(_configuration); int shardCount = tableDescriptor.getShardCount(); int shardId = attemptId % shardCount; Path tableOutput = BlurOutputFormat.getOutputPath(_configuration); String shardName = ShardUtil.getShardName(BlurConstants.SHARD_PREFIX, shardId); Path indexPath = new Path(tableOutput, shardName); _newIndex = new Path(indexPath, tmpDirName); _finalDir = new ProgressableDirectory(new HdfsDirectory(_configuration, _newIndex), getProgressable()); _finalDir.setLockFactory(NoLockFactory.getNoLockFactory()); TableContext tableContext = TableContext.create(tableDescriptor); _fieldManager = tableContext.getFieldManager(); Analyzer analyzer = _fieldManager.getAnalyzerForIndex(); _conf = new IndexWriterConfig(LuceneVersionConstant.LUCENE_VERSION, analyzer); _conf.setCodec(new Blur024Codec()); _conf.setSimilarity(tableContext.getSimilarity()); TieredMergePolicy mergePolicy = (TieredMergePolicy) _conf.getMergePolicy(); mergePolicy.setUseCompoundFile(false); _overFlowConf = _conf.clone(); if (_indexLocally) { String localDirPath = System.getProperty(JAVA_IO_TMPDIR); _localPath = new File(localDirPath, UUID.randomUUID().toString() + ".tmp"); SimpleFSDirectory directory = new SimpleFSDirectory(_localPath); _localDir = new ProgressableDirectory(directory, getProgressable()); _writer = new IndexWriter(_localDir, _conf.clone()); } else { _localPath = null; _localDir = null; _writer = new IndexWriter(_finalDir, _conf.clone()); } }
private MergePolicy getTieredMergePolicy(double segmentsPerTier, int maxMergeAtOnce) { TieredMergePolicy mp = new TieredMergePolicy(); return mp.setMaxMergeAtOnce(maxMergeAtOnce).setSegmentsPerTier(segmentsPerTier); }
private void initIndexWriter() throws IOException { Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); iwc.setMergedSegmentWarmer( new SimpleMergedSegmentWarmer(new LoggingInfoStream(Level.FINER))); iwc.setReaderPooling(true); // iwc.setMergeScheduler(new SerialMergeScheduler()); ConcurrentMergeScheduler mergeScheduler = new ConcurrentMergeScheduler(); iwc.setMergeScheduler(mergeScheduler); TieredMergePolicy mergePolicy = new TieredMergePolicy(); mergePolicy.setMaxMergeAtOnce(_maxMergeAtOnce); mergePolicy.setSegmentsPerTier(_segmentsPerTier); iwc.setMergePolicy(mergePolicy); iwc.setInfoStream(new LoggingInfoStream(Level.FINER)); _writer = new IndexWriter(getDirectory(), iwc); }