Java 类org.apache.lucene.index.IndexWriterConfig 实例源码
项目:RedisDirectory
文件:TestLucene.java
public void testMMapDirectory() throws IOException {
long start = System.currentTimeMillis();
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig
.OpenMode.CREATE);
FSDirectory open = FSDirectory.open(Paths.get("E:/testlucene"));
IndexWriter indexWriter = new IndexWriter(open, indexWriterConfig);
for (int i = 0; i < 10000000; i++) {
indexWriter.addDocument(addDocument(i));
}
indexWriter.commit();
indexWriter.close();
long end = System.currentTimeMillis();
log.error("MMapDirectory consumes {}s!", (end - start) / 1000);
start = System.currentTimeMillis();
IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(open));
int total = 0;
for (int i = 0; i < 10000000; i++) {
TermQuery key1 = new TermQuery(new Term("key1", "key" + i));
TopDocs search = indexSearcher.search(key1, 10);
total += search.totalHits;
}
System.out.println(total);
end = System.currentTimeMillis();
log.error("MMapDirectory search consumes {}ms!", (end - start));
}
项目:RedisDirectory
文件:TestLucene.java
public void testRamDirectory() throws IOException {
long start = System.currentTimeMillis();
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig
.OpenMode.CREATE);
RAMDirectory ramDirectory = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(ramDirectory, indexWriterConfig);
for (int i = 0; i < 10000000; i++) {
indexWriter.addDocument(addDocument(i));
}
indexWriter.commit();
indexWriter.close();
long end = System.currentTimeMillis();
log.error("RamDirectory consumes {}s!", (end - start) / 1000);
start = System.currentTimeMillis();
IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(ramDirectory));
int total = 0;
for (int i = 0; i < 10000000; i++) {
TermQuery key1 = new TermQuery(new Term("key1", "key" + i));
TopDocs search = indexSearcher.search(key1, 10);
total += search.totalHits;
}
System.out.println(total);
end = System.currentTimeMillis();
log.error("RamDirectory search consumes {}ms!", (end - start));
}
项目:elasticsearch_my
文件:InternalEngine.java
private IndexWriterConfig getIndexWriterConfig(boolean create) {
final IndexWriterConfig iwc = new IndexWriterConfig(engineConfig.getAnalyzer());
iwc.setCommitOnClose(false); // we by default don't commit on close
iwc.setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND);
iwc.setIndexDeletionPolicy(deletionPolicy);
// with tests.verbose, lucene sets this up: plumb to align with filesystem stream
boolean verbose = false;
try {
verbose = Boolean.parseBoolean(System.getProperty("tests.verbose"));
} catch (Exception ignore) {
}
iwc.setInfoStream(verbose ? InfoStream.getDefault() : new LoggerInfoStream(logger));
iwc.setMergeScheduler(mergeScheduler);
MergePolicy mergePolicy = config().getMergePolicy();
// Give us the opportunity to upgrade old segments while performing
// background merges
mergePolicy = new ElasticsearchMergePolicy(mergePolicy);
iwc.setMergePolicy(mergePolicy);
iwc.setSimilarity(engineConfig.getSimilarity());
iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().getMbFrac());
iwc.setCodec(engineConfig.getCodec());
iwc.setUseCompoundFile(true); // always use compound on flush - reduces # of file-handles on refresh
return iwc;
}
项目:elasticsearch_my
文件:Lucene.java
/**
* This method removes all lucene files from the given directory. It will first try to delete all commit points / segments
* files to ensure broken commits or corrupted indices will not be opened in the future. If any of the segment files can't be deleted
* this operation fails.
*/
public static void cleanLuceneIndex(Directory directory) throws IOException {
try (Lock writeLock = directory.obtainLock(IndexWriter.WRITE_LOCK_NAME)) {
for (final String file : directory.listAll()) {
if (file.startsWith(IndexFileNames.SEGMENTS) || file.equals(IndexFileNames.OLD_SEGMENTS_GEN)) {
directory.deleteFile(file); // remove all segment_N files
}
}
}
try (IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)
.setMergePolicy(NoMergePolicy.INSTANCE) // no merges
.setCommitOnClose(false) // no commits
.setOpenMode(IndexWriterConfig.OpenMode.CREATE))) // force creation - don't append...
{
// do nothing and close this will kick of IndexFileDeleter which will remove all pending files
}
}
项目:elasticsearch_my
文件:CustomUnifiedHighlighterTests.java
private IndexReader indexOneDoc(Directory dir, String field, String value, Analyzer analyzer) throws IOException {
IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
FieldType ft = new FieldType(TextField.TYPE_STORED);
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Field textField = new Field(field, "", ft);
Document doc = new Document();
doc.add(textField);
textField.setStringValue(value);
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
return ir;
}
项目:elasticsearch_my
文件:SmoothingModelTestCase.java
/**
* Test the WordScorer emitted by the smoothing model
*/
public void testBuildWordScorer() throws IOException {
SmoothingModel testModel = createTestModel();
Map<String, Analyzer> mapping = new HashMap<>();
mapping.put("field", new WhitespaceAnalyzer());
PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(), mapping);
IndexWriter writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(wrapper));
Document doc = new Document();
doc.add(new Field("field", "someText", TextField.TYPE_NOT_STORED));
writer.addDocument(doc);
DirectoryReader ir = DirectoryReader.open(writer);
WordScorer wordScorer = testModel.buildWordScorerFactory().newScorer(ir, MultiFields.getTerms(ir, "field"), "field", 0.9d,
BytesRefs.toBytesRef(" "));
assertWordScorer(wordScorer, testModel);
}
项目:elasticsearch_my
文件:HalfFloatFielddataTests.java
public void testSingleValued() throws IOException {
Directory dir = newDirectory();
// we need the default codec to check for singletons
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null).setCodec(TestUtil.getDefaultCodec()));
Document doc = new Document();
for (IndexableField f : NumberFieldMapper.NumberType.HALF_FLOAT.createFields("half_float", 3f, false, true, false)) {
doc.add(f);
}
w.addDocument(doc);
final DirectoryReader dirReader = DirectoryReader.open(w);
LeafReader reader = getOnlyLeafReader(dirReader);
SortedNumericDoubleValues values = new SortedNumericDVIndexFieldData.SortedNumericHalfFloatFieldData(
reader, "half_float").getDoubleValues();
assertNotNull(FieldData.unwrapSingleton(values));
values.setDocument(0);
assertEquals(1, values.count());
assertEquals(3f, values.valueAt(0), 0f);
IOUtils.close(dirReader, w, dir);
}
项目:elasticsearch_my
文件:InternalEngineTests.java
public static InternalEngine createInternalEngine(@Nullable final IndexWriterFactory indexWriterFactory,
@Nullable final Supplier<SequenceNumbersService> sequenceNumbersServiceSupplier,
final EngineConfig config) {
return new InternalEngine(config) {
@Override
IndexWriter createWriter(Directory directory, IndexWriterConfig iwc) throws IOException {
return (indexWriterFactory != null) ?
indexWriterFactory.createWriter(directory, iwc) :
super.createWriter(directory, iwc);
}
@Override
public SequenceNumbersService seqNoService() {
return (sequenceNumbersServiceSupplier != null) ? sequenceNumbersServiceSupplier.get() : super.seqNoService();
}
};
}
项目:elasticsearch_my
文件:CodecTests.java
private void assertCompressionEquals(Mode expected, Codec actual) throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(null);
iwc.setCodec(actual);
IndexWriter iw = new IndexWriter(dir, iwc);
iw.addDocument(new Document());
iw.commit();
iw.close();
DirectoryReader ir = DirectoryReader.open(dir);
SegmentReader sr = (SegmentReader) ir.leaves().get(0).reader();
String v = sr.getSegmentInfo().info.getAttribute(Lucene50StoredFieldsFormat.MODE_KEY);
assertNotNull(v);
assertEquals(expected, Mode.valueOf(v));
ir.close();
dir.close();
}
项目:elasticsearch_my
文件:StoreTests.java
public void testCanOpenIndex() throws IOException {
final ShardId shardId = new ShardId("index", "_na_", 1);
IndexWriterConfig iwc = newIndexWriterConfig();
Path tempDir = createTempDir();
final BaseDirectoryWrapper dir = newFSDirectory(tempDir);
assertFalse(Store.canOpenIndex(logger, tempDir, shardId, (id, l) -> new DummyShardLock(id)));
IndexWriter writer = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new StringField("id", "1", random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
writer.addDocument(doc);
writer.commit();
writer.close();
assertTrue(Store.canOpenIndex(logger, tempDir, shardId, (id, l) -> new DummyShardLock(id)));
DirectoryService directoryService = new DirectoryService(shardId, INDEX_SETTINGS) {
@Override
public Directory newDirectory() throws IOException {
return dir;
}
};
Store store = new Store(shardId, INDEX_SETTINGS, directoryService, new DummyShardLock(shardId));
store.markStoreCorrupted(new CorruptIndexException("foo", "bar"));
assertFalse(Store.canOpenIndex(logger, tempDir, shardId, (id, l) -> new DummyShardLock(id)));
store.close();
}
项目:elasticsearch_my
文件:VectorHighlighterTests.java
public void testVectorHighlighter() throws Exception {
Directory dir = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
Document document = new Document();
document.add(new TextField("_id", "1", Field.Store.YES));
FieldType vectorsType = new FieldType(TextField.TYPE_STORED);
vectorsType.setStoreTermVectors(true);
vectorsType.setStoreTermVectorPositions(true);
vectorsType.setStoreTermVectorOffsets(true);
document.add(new Field("content", "the big bad dog", vectorsType));
indexWriter.addDocument(document);
IndexReader reader = DirectoryReader.open(indexWriter);
IndexSearcher searcher = new IndexSearcher(reader);
TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1);
assertThat(topDocs.totalHits, equalTo(1));
FastVectorHighlighter highlighter = new FastVectorHighlighter();
String fragment = highlighter.getBestFragment(highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))),
reader, topDocs.scoreDocs[0].doc, "content", 30);
assertThat(fragment, notNullValue());
assertThat(fragment, equalTo("the big <b>bad</b> dog"));
}
项目:elasticsearch_my
文件:VectorHighlighterTests.java
public void testVectorHighlighterNoStore() throws Exception {
Directory dir = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
Document document = new Document();
document.add(new TextField("_id", "1", Field.Store.YES));
FieldType vectorsType = new FieldType(TextField.TYPE_NOT_STORED);
vectorsType.setStoreTermVectors(true);
vectorsType.setStoreTermVectorPositions(true);
vectorsType.setStoreTermVectorOffsets(true);
document.add(new Field("content", "the big bad dog", vectorsType));
indexWriter.addDocument(document);
IndexReader reader = DirectoryReader.open(indexWriter);
IndexSearcher searcher = new IndexSearcher(reader);
TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1);
assertThat(topDocs.totalHits, equalTo(1));
FastVectorHighlighter highlighter = new FastVectorHighlighter();
String fragment = highlighter.getBestFragment(highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))),
reader, topDocs.scoreDocs[0].doc, "content", 30);
assertThat(fragment, nullValue());
}
项目:elasticsearch_my
文件:VectorHighlighterTests.java
public void testVectorHighlighterNoTermVector() throws Exception {
Directory dir = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
Document document = new Document();
document.add(new TextField("_id", "1", Field.Store.YES));
document.add(new TextField("content", "the big bad dog", Field.Store.YES));
indexWriter.addDocument(document);
IndexReader reader = DirectoryReader.open(indexWriter);
IndexSearcher searcher = new IndexSearcher(reader);
TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1);
assertThat(topDocs.totalHits, equalTo(1));
FastVectorHighlighter highlighter = new FastVectorHighlighter();
String fragment = highlighter.getBestFragment(highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))),
reader, topDocs.scoreDocs[0].doc, "content", 30);
assertThat(fragment, nullValue());
}
项目:elasticsearch_my
文件:SimpleLuceneTests.java
public void testSortValues() throws Exception {
Directory dir = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
for (int i = 0; i < 10; i++) {
Document document = new Document();
String text = new String(new char[]{(char) (97 + i), (char) (97 + i)});
document.add(new TextField("str", text, Field.Store.YES));
document.add(new SortedDocValuesField("str", new BytesRef(text)));
indexWriter.addDocument(document);
}
IndexReader reader = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(indexWriter));
IndexSearcher searcher = new IndexSearcher(reader);
TopFieldDocs docs = searcher.search(new MatchAllDocsQuery(), 10, new Sort(new SortField("str", SortField.Type.STRING)));
for (int i = 0; i < 10; i++) {
FieldDoc fieldDoc = (FieldDoc) docs.scoreDocs[i];
assertThat((BytesRef) fieldDoc.fields[0], equalTo(new BytesRef(new String(new char[]{(char) (97 + i), (char) (97 + i)}))));
}
}
项目:elasticsearch_my
文件:SimpleLuceneTests.java
public void testSimpleNumericOps() throws Exception {
Directory dir = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
Document document = new Document();
document.add(new TextField("_id", "1", Field.Store.YES));
document.add(new LegacyIntField("test", 2, LegacyIntField.TYPE_STORED));
indexWriter.addDocument(document);
IndexReader reader = DirectoryReader.open(indexWriter);
IndexSearcher searcher = new IndexSearcher(reader);
TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1);
Document doc = searcher.doc(topDocs.scoreDocs[0].doc);
IndexableField f = doc.getField("test");
assertThat(f.stringValue(), equalTo("2"));
BytesRefBuilder bytes = new BytesRefBuilder();
LegacyNumericUtils.intToPrefixCoded(2, 0, bytes);
topDocs = searcher.search(new TermQuery(new Term("test", bytes.get())), 1);
doc = searcher.doc(topDocs.scoreDocs[0].doc);
f = doc.getField("test");
assertThat(f.stringValue(), equalTo("2"));
indexWriter.close();
}
项目:elasticsearch_my
文件:SimpleLuceneTests.java
public void testNRTSearchOnClosedWriter() throws Exception {
Directory dir = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
DirectoryReader reader = DirectoryReader.open(indexWriter);
for (int i = 0; i < 100; i++) {
Document document = new Document();
TextField field = new TextField("_id", Integer.toString(i), Field.Store.YES);
field.setBoost(i);
document.add(field);
indexWriter.addDocument(document);
}
reader = refreshReader(reader);
indexWriter.close();
TermsEnum termDocs = SlowCompositeReaderWrapper.wrap(reader).terms("_id").iterator();
termDocs.next();
}
项目:elasticsearch_my
文件:VersionLookupTests.java
/**
* test version lookup actually works
*/
public void testSimple() throws Exception {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
Document doc = new Document();
doc.add(new Field(UidFieldMapper.NAME, "6", UidFieldMapper.Defaults.FIELD_TYPE));
doc.add(new NumericDocValuesField(VersionFieldMapper.NAME, 87));
writer.addDocument(doc);
DirectoryReader reader = DirectoryReader.open(writer);
LeafReaderContext segment = reader.leaves().get(0);
PerThreadIDAndVersionLookup lookup = new PerThreadIDAndVersionLookup(segment.reader());
// found doc
DocIdAndVersion result = lookup.lookup(new BytesRef("6"), null, segment);
assertNotNull(result);
assertEquals(87, result.version);
assertEquals(0, result.docId);
// not found doc
assertNull(lookup.lookup(new BytesRef("7"), null, segment));
// deleted doc
assertNull(lookup.lookup(new BytesRef("6"), new Bits.MatchNoBits(1), segment));
reader.close();
writer.close();
dir.close();
}
项目:elasticsearch_my
文件:VersionsTests.java
/** Test that version map cache works, is evicted on close, etc */
public void testCache() throws Exception {
int size = Versions.lookupStates.size();
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
Document doc = new Document();
doc.add(new Field(UidFieldMapper.NAME, "6", UidFieldMapper.Defaults.FIELD_TYPE));
doc.add(new NumericDocValuesField(VersionFieldMapper.NAME, 87));
writer.addDocument(doc);
DirectoryReader reader = DirectoryReader.open(writer);
// should increase cache size by 1
assertEquals(87, Versions.loadVersion(reader, new Term(UidFieldMapper.NAME, "6")));
assertEquals(size+1, Versions.lookupStates.size());
// should be cache hit
assertEquals(87, Versions.loadVersion(reader, new Term(UidFieldMapper.NAME, "6")));
assertEquals(size+1, Versions.lookupStates.size());
reader.close();
writer.close();
// core should be evicted from the map
assertEquals(size, Versions.lookupStates.size());
dir.close();
}
项目:elasticsearch_my
文件:VersionsTests.java
/** Test that version map cache behaves properly with a filtered reader */
public void testCacheFilterReader() throws Exception {
int size = Versions.lookupStates.size();
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
Document doc = new Document();
doc.add(new Field(UidFieldMapper.NAME, "6", UidFieldMapper.Defaults.FIELD_TYPE));
doc.add(new NumericDocValuesField(VersionFieldMapper.NAME, 87));
writer.addDocument(doc);
DirectoryReader reader = DirectoryReader.open(writer);
assertEquals(87, Versions.loadVersion(reader, new Term(UidFieldMapper.NAME, "6")));
assertEquals(size+1, Versions.lookupStates.size());
// now wrap the reader
DirectoryReader wrapped = ElasticsearchDirectoryReader.wrap(reader, new ShardId("bogus", "_na_", 5));
assertEquals(87, Versions.loadVersion(wrapped, new Term(UidFieldMapper.NAME, "6")));
// same size map: core cache key is shared
assertEquals(size+1, Versions.lookupStates.size());
reader.close();
writer.close();
// core should be evicted from the map
assertEquals(size, Versions.lookupStates.size());
dir.close();
}
项目:elasticsearch_my
文件:SimpleAllTests.java
public void testNoTokens() throws Exception {
Directory dir = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.KEYWORD_ANALYZER));
FieldType allFt = getAllFieldType();
Document doc = new Document();
doc.add(new Field("_id", "1", StoredField.TYPE));
doc.add(new AllField("_all", "", 2.0f, allFt));
indexWriter.addDocument(doc);
IndexReader reader = DirectoryReader.open(indexWriter);
IndexSearcher searcher = new IndexSearcher(reader);
TopDocs docs = searcher.search(new MatchAllDocsQuery(), 10);
assertThat(docs.totalHits, equalTo(1));
assertThat(docs.scoreDocs[0].doc, equalTo(0));
}
项目:alvisnlp
文件:AlvisDBIndexer.java
@Override
public void process(ProcessingContext<Corpus> ctx, Corpus corpus) throws ModuleException {
try (KeywordAnalyzer kwa = new KeywordAnalyzer()) {
IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_36, kwa);
writerConfig.setOpenMode(append ? OpenMode.CREATE_OR_APPEND : OpenMode.CREATE);
try (Directory dir = FSDirectory.open(indexDir)) {
try (IndexWriter writer = new IndexWriter(dir, writerConfig)) {
AlvisDBIndexerResolvedObjects resObj = getResolvedObjects();
Logger logger = getLogger(ctx);
EvaluationContext evalCtx = new EvaluationContext(logger);
for (ADBElements.Resolved ent : resObj.elements) {
ent.indexElements(logger, writer, evalCtx, corpus);
}
}
}
catch (IOException e) {
rethrow(e);
}
}
}
项目:as-full-text-search-server
文件:LuceneBasicFlowExample.java
/**
* Main entry point.
*
* @param args the command line arguments.
* @throws IOException in case of I/O failure.
* @throws ParseException in case of Query parse exception.
*/
public static void main(String[] args) throws IOException, ParseException {
// 1. Creates a directory reference. This is where index datafiles will be created.
Directory directory = FSDirectory.open(new File("/tmp").toPath());
// 2. Creates an IndexWriter
try (IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig())) {
// 3. Add some data
indexSomeData(writer);
// 4. Search
search(directory);
writer.deleteAll();
}
}
项目:elasticsearch-full
文件:CreateIndexDemo.java
@Test
public void test() throws Exception {
Path path = FileSystems.getDefault().getPath("", "index");
Directory directory = FSDirectory.open(path);
Analyzer analyzer = new StandardAnalyzer();
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer).setOpenMode(IndexWriterConfig.OpenMode.CREATE);
IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig);
Document document = new Document();
document.add(new LegacyLongField("id", 5499, Field.Store.YES));
document.add(new Field("title", "小米6", TYPE_STORED));
document.add(new Field("sellPoint", "骁龙835,6G内存,双摄!", TYPE_STORED));
document.
indexWriter.addDocument(document);
indexWriter.commit();
indexWriter.close();
}
项目:NGB-master
文件:FeatureIndexDao.java
/**
* Stores features from a specified feature file to the specified project's Lucene index
* Sample query: featureId:rs44022* AND (variationType:del OR variationType:ins)
*
* @param featureFileId a FeatureFile, for which features to save
* @param projectId a project, for which to write an index
* @param entries a list of FeatureIndexEntry to write to index
* @throws IOException
*/
public void writeLuceneIndexForProject(final Long featureFileId, final long projectId,
final List<? extends FeatureIndexEntry> entries) throws IOException {
try (
StandardAnalyzer analyzer = new StandardAnalyzer();
Directory index = fileManager.createIndexForProject(projectId);
IndexWriter writer = new IndexWriter(index, new IndexWriterConfig(analyzer).setOpenMode(
IndexWriterConfig.OpenMode.CREATE_OR_APPEND))
) {
FacetsConfig facetsConfig = new FacetsConfig();
facetsConfig.setIndexFieldName(FeatureIndexFields.CHR_ID.getFieldName(),
FeatureIndexFields.FACET_CHR_ID.getFieldName());
for (FeatureIndexEntry entry : entries) {
Document document = new Document();
addCommonDocumentFields(document, entry, featureFileId);
if (entry instanceof VcfIndexEntry) {
addVcfDocumentFields(document, entry);
}
writer.addDocument(facetsConfig.build(document));
}
}
}
项目:NGB-master
文件:FeatureIndexDao.java
/**
* Deletes features from specified feature files from project's index
*
* @param projectId a project to delete index entries
* @param fileIds a list of Pair of feature types to file Ids, which entries to delete. To delete gene file
* entries, pass FeatureType.GENE
*/
public void deleteFromIndexByFileId(final long projectId, List<Pair<FeatureType, Long>> fileIds) {
if (fileIds == null || fileIds.isEmpty() || !fileManager.indexForProjectExists(projectId)) {
return;
}
try (
StandardAnalyzer analyzer = new StandardAnalyzer();
Directory index = fileManager.getIndexForProject(projectId);
IndexWriter writer = new IndexWriter(index, new IndexWriterConfig(analyzer).setOpenMode(
IndexWriterConfig.OpenMode.CREATE_OR_APPEND))
) {
if (fileManager.indexForProjectExists(projectId)) {
for (Pair<FeatureType, Long> id : fileIds) {
deleteDocumentByTypeAndId(id.getKey(), id.getValue(), writer);
}
}
} catch (IOException e) {
LOGGER.error("Exception while deleting from index:", e);
}
}
项目:RedisDirectory
文件:TestLucene.java
public void testRamDirectory() throws IOException {
long start = System.currentTimeMillis();
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig
.OpenMode.CREATE);
RAMDirectory ramDirectory = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(ramDirectory, indexWriterConfig);
for (int i = 0; i < 10000000; i++) {
indexWriter.addDocument(addDocument(i));
}
indexWriter.commit();
indexWriter.close();
long end = System.currentTimeMillis();
log.error("RamDirectory consumes {}s!", (end - start) / 1000);
start = System.currentTimeMillis();
IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(ramDirectory));
int total = 0;
for (int i = 0; i < 10000000; i++) {
TermQuery key1 = new TermQuery(new Term("key1", "key" + i));
TopDocs search = indexSearcher.search(key1, 10);
total += search.totalHits;
}
System.out.println(total);
end = System.currentTimeMillis();
log.error("RamDirectory search consumes {}ms!", (end - start));
}
项目:RedisDirectory
文件:TestLucene.java
public void testMMapDirectory() throws IOException {
long start = System.currentTimeMillis();
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig
.OpenMode.CREATE);
FSDirectory open = FSDirectory.open(Paths.get("E:/testlucene"));
IndexWriter indexWriter = new IndexWriter(open, indexWriterConfig);
for (int i = 0; i < 10000000; i++) {
indexWriter.addDocument(addDocument(i));
}
indexWriter.commit();
indexWriter.close();
long end = System.currentTimeMillis();
log.error("MMapDirectory consumes {}s!", (end - start) / 1000);
start = System.currentTimeMillis();
IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(open));
int total = 0;
for (int i = 0; i < 10000000; i++) {
TermQuery key1 = new TermQuery(new Term("key1", "key" + i));
TopDocs search = indexSearcher.search(key1, 10);
total += search.totalHits;
}
System.out.println(total);
end = System.currentTimeMillis();
log.error("MMapDirectory search consumes {}ms!", (end - start));
}
项目:historybook
文件:LuceneIndex.java
/**
* Constructor for LuceneIndex
*
* @param dataDirectory Path to the directory to create an index directory within.
* @throws IndexException
*/
public LuceneIndex(Path dataDirectory) throws IndexException {
//TODO: Check to make sure directory is read/writable
path = dataDirectory.resolve(INDEXDIR);
try {
dir = FSDirectory.open(path);
analyzer = new StandardAnalyzer();
IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
writer = new IndexWriter(dir, iwc);
reader = DirectoryReader.open(writer, false);
searcher = new IndexSearcher(reader);
parser = new QueryParser(IndexDocumentAdapter.FIELD_SEARCH, analyzer);
} catch (IOException e) {
LOG.error(e.getLocalizedMessage());
throw new IndexException(e);
}
}
项目:related-searches
文件:AbstractProfanityRemovingOutputWriter.java
/**
* Initializes profanity set.
*
* @param dictFilePath
* dictionary file path
*/
private void initializeProfanitySet(String dictFilePath) {
if (dictFilePath != null) {
File file = new File(dictFilePath);
if (file.exists() && file.isFile()) {
try {
IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, analyzer);
IndexWriter indexWriter = new IndexWriter(directory, config);
BufferedReader reader = new BufferedReader(new FileReader(file));
Set<String> bannedWords = new HashSet<String>();
String line = null;
while ((line = reader.readLine()) != null) {
bannedWords.add(line.trim());
Document doc = new Document();
doc.add(new StringField(LUCENE_FIELD_NAME, line, Store.NO));
indexWriter.addDocument(doc);
}
this.bannedWords = bannedWords;
indexWriter.close();
reader.close();
} catch (Exception ex) {
LOG.error("Error reading file", ex);
}
}
}
}
项目:SnowGraph
文件:InMemoryIndex.java
public InMemoryIndex(Map<String,String> id2Text){
Analyzer analyzer = new EnglishAnalyzer();
IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
try {
IndexWriter writer = new IndexWriter(directory, iwc);
for (String id:id2Text.keySet()) {
Document doc=new Document();
doc.add(new StringField("id", id, Field.Store.YES));
doc.add(new TextField("content", id2Text.get(id), Field.Store.YES));
writer.addDocument(doc);
}
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
项目:SnowGraph
文件:CodePatternSearcher.java
private static List<String> search(List<String> contents, String query, int n) throws IOException, ParseException {
List<String> r=new ArrayList<>();
Directory dir=new RAMDirectory();
IndexWriter indexWriter=new IndexWriter(dir, new IndexWriterConfig(new EnglishAnalyzer()));
for (String method:contents){
Document document=new Document();
document.add(new TextField("content",method, Field.Store.YES));
indexWriter.addDocument(document);
}
indexWriter.close();
QueryParser qp = new QueryParser("content", new EnglishAnalyzer());
IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(dir));
TopDocs topDocs = indexSearcher.search(qp.parse(query), n);
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
r.add(indexSearcher.doc(scoreDoc.doc).get("content"));
}
return r;
}
项目:lucene-tutorial
文件:MessageIndexer.java
/**
* Indexing documents using provided Analyzer
*
* @param create to decide create new or append to previous one
* @throws IOException
*/
public void index(final Boolean create, List<Document> documents, Analyzer analyzer) throws IOException {
final Directory dir = FSDirectory.open(Paths.get(pathToIndexFolder));
final IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
if (create) {
// Create a new index in the directory, removing any
// previously indexed documents:
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
}
else {
// Add new documents to an existing index:
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
}
final IndexWriter w = new IndexWriter(dir, iwc);
w.addDocuments(documents);
w.close();
}
项目:elasticsearch-learning-to-rank
文件:ExplorerQueryTests.java
@Before
public void setupIndex() throws Exception {
dir = new RAMDirectory();
try(IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER))) {
for (int i = 0; i < docs.length; i++) {
Document doc = new Document();
doc.add(new Field("_id", Integer.toString(i + 1), StoredField.TYPE));
doc.add(newTextField("text", docs[i], Field.Store.YES));
indexWriter.addDocument(doc);
}
}
reader = DirectoryReader.open(dir);
searcher = new IndexSearcher(reader);
}
项目:LiveQA
文件:QaPairIndex.java
public static void createIndexQ(List<CQAResult> QASetList, Directory dir) {
System.out.println("Creating Questions Index");
IndexWriterConfig iwc = new IndexWriterConfig(ANALYZER.getVersion(), ANALYZER);
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
try {
IndexWriter writer = new IndexWriter(dir, iwc);
int id = 0; //XXX seq_id
for (CQAResult qaSet : QASetList) {
Document doc = new Document();
if (qaSet.subject == null) {
id++;
continue;
}
doc.add(new IntField(QID, id++, Field.Store.YES));
doc.add(new TextField(BEST_ANSWER_FIELD, qaSet.subject, Field.Store.NO));
doc.add(new TextField(Q_DESCRIPTION, qaSet.content, Field.Store.NO));
writer.addDocument(doc);
}
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
项目:libooc
文件:BaseUtil.java
/**
* 获取writer
*
* @return
* @throws IOException
*/
protected static IndexWriter getIndexWriter() throws IOException {
if (null != indexWriter) {
return indexWriter;
} else {
// 防止并发
synchronized (IndexUtil.class) {
// 初始化writer
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35,
new StandardAnalyzer(Version.LUCENE_35));
// 每次都重新创建
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
indexWriter = new IndexWriter(directory, config);
}
return indexWriter;
}
}
项目:lucene4ir
文件:DocumentIndexer.java
public void createWriter(String indexPath){
/*
The indexPath specifies where to create the index
*/
// I am can imagine that there are lots of ways to create indexers -
// We could add in some parameters to customize its creation
try {
Directory dir = FSDirectory.open(Paths.get(indexPath));
System.out.println("Indexing to directory '" + indexPath + "'...");
IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
writer = new IndexWriter(dir, iwc);
} catch (IOException e){
e.printStackTrace();
System.exit(1);
}
}
项目:basekb-search
文件:FreebaseIndexer.java
public void initializeIndexBuilder() throws Exception {
// Create a new index directory and writer to index a triples file.
// Raise an error if an index already exists, so we don't accidentally overwrite it.
String indexDir = getIndexDirectoryName();
if ((new File(indexDir)).isDirectory())
throw new IOException("Index directory already exists, remove it before indexing");
indexDirectory = FSDirectory.open(Paths.get(indexDir));
IndexWriterConfig iwc = new IndexWriterConfig(getIndexAnalyzer());
// we always create a new index from scratch:
iwc.setOpenMode(OpenMode.CREATE);
iwc.setCodec(new Lucene54Codec(Mode.BEST_SPEED)); // the default
//iwc.setCodec(new Lucene54Codec(Mode.BEST_COMPRESSION)); // slower, but better compression
indexWriter = new IndexWriter(indexDirectory, iwc);
indexAnalyzer = getIndexAnalyzer();
if (INDEX_PREDICATES) printlnProg("Indexing individual predicates");
if (INDEX_TEXT) printlnProg("Indexing combined predicate text values");
if (INDEX_LANGUAGE) printlnProg("Indexing predicates for language(s): " + supportedLanguages);
}
项目:drftpd3
文件:LuceneEngine.java
/**
* Opens all the needed streams that the engine needs to work properly.
*
* @throws IndexException
*/
private void openStreams() throws IndexException {
try {
if (_nativeLocking) {
_storage = FSDirectory.open(new File(INDEX_DIR), new NativeFSLockFactory(INDEX_DIR));
} else {
_storage = FSDirectory.open(new File(INDEX_DIR));
}
IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_32 ,ANALYZER);
conf.setMaxBufferedDocs(_maxDocsBuffer);
conf.setRAMBufferSizeMB(_maxRAMBufferSize);
_iWriter = new IndexWriter(_storage, conf);
} catch (IOException e) {
closeAll();
throw new IndexException("Unable to initialize the index", e);
}
}
项目:NGB
文件:FeatureIndexDao.java
/**
* Stores features from a specified feature file to the specified project's Lucene index
* Sample query: featureId:rs44022* AND (variationType:del OR variationType:ins)
*
* @param featureFileId a FeatureFile, for which features to save
* @param projectId a project, for which to write an index
* @param entries a list of FeatureIndexEntry to write to index
* @throws IOException
*/
public void writeLuceneIndexForProject(final Long featureFileId, final long projectId,
final List<? extends FeatureIndexEntry> entries) throws IOException {
try (
StandardAnalyzer analyzer = new StandardAnalyzer();
Directory index = fileManager.createIndexForProject(projectId);
IndexWriter writer = new IndexWriter(index, new IndexWriterConfig(analyzer).setOpenMode(
IndexWriterConfig.OpenMode.CREATE_OR_APPEND))
) {
FacetsConfig facetsConfig = new FacetsConfig();
facetsConfig.setIndexFieldName(FeatureIndexFields.CHR_ID.getFieldName(),
FeatureIndexFields.FACET_CHR_ID.getFieldName());
for (FeatureIndexEntry entry : entries) {
Document document = new Document();
addCommonDocumentFields(document, entry, featureFileId);
if (entry instanceof VcfIndexEntry) {
addVcfDocumentFields(document, entry);
}
writer.addDocument(facetsConfig.build(document));
}
}
}
项目:NGB
文件:FeatureIndexDao.java
/**
* Deletes features from specified feature files from project's index
*
* @param projectId a project to delete index entries
* @param fileIds a list of Pair of feature types to file Ids, which entries to delete. To delete gene file
* entries, pass FeatureType.GENE
*/
public void deleteFromIndexByFileId(final long projectId, List<Pair<FeatureType, Long>> fileIds) {
if (fileIds == null || fileIds.isEmpty() || !fileManager.indexForProjectExists(projectId)) {
return;
}
try (
StandardAnalyzer analyzer = new StandardAnalyzer();
Directory index = fileManager.getIndexForProject(projectId);
IndexWriter writer = new IndexWriter(index, new IndexWriterConfig(analyzer).setOpenMode(
IndexWriterConfig.OpenMode.CREATE_OR_APPEND))
) {
if (fileManager.indexForProjectExists(projectId)) {
for (Pair<FeatureType, Long> id : fileIds) {
deleteDocumentByTypeAndId(id.getKey(), id.getValue(), writer);
}
}
} catch (IOException e) {
LOGGER.error("Exception while deleting from index:", e);
}
}