@BeforeClass public static void setup() throws IOException { dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir); final int numDocs = TestUtil.nextInt(random(), 1, 20); for (int i = 0; i < numDocs; ++i) { final int numHoles = random().nextInt(5); for (int j = 0; j < numHoles; ++j) { w.addDocument(new Document()); } Document doc = new Document(); doc.add(new StringField("foo", "bar", Store.NO)); w.addDocument(doc); } reader = w.getReader(); w.close(); Engine.Searcher engineSearcher = new Engine.Searcher("test", new IndexSearcher(reader)); searcher = new ContextIndexSearcher(engineSearcher, IndexSearcher.getDefaultQueryCache(), MAYBE_CACHE_POLICY); }
void addToDoc(Document doc, String... values){ Preconditions.checkArgument(valueType == String.class); if (isSorted()) { Preconditions.checkArgument(values.length < 2, "sorted fields cannot have multiple values"); } // add distinct elements to doc final Iterable<String> nonNull = FluentIterable.from(Arrays.asList(values)) .filter(new Predicate<String>() { @Override public boolean apply(@Nullable final String input) { return input != null; } }); for (final String value : ImmutableSet.copyOf(nonNull)) { final String truncatedValue = StringUtils.abbreviate(value, MAX_STRING_LENGTH); doc.add(new StringField(indexFieldName, truncatedValue, stored ? Store.YES : Store.NO)); } if (isSorted() && values.length == 1) { Preconditions.checkArgument(sortedValueType == SearchFieldSorting.FieldType.STRING); doc.add(new SortedDocValuesField(indexFieldName, new BytesRef(values[0]))); } }
void addToDoc(Document doc, byte[]... values){ Preconditions.checkArgument(valueType == String.class); if (isSorted()) { Preconditions.checkArgument(values.length < 2, "sorted fields cannot have multiple values"); } // add distinct elements to doc final Iterable<byte[]> nonNull = FluentIterable.from(Arrays.asList(values)) .filter(new Predicate<byte[]>() { @Override public boolean apply(@Nullable final byte[] input) { return input != null; } }); for (final byte[] value : ImmutableSet.copyOf(nonNull)) { final BytesRef truncatedValue = new BytesRef(value,0, Math.min(value.length, MAX_STRING_LENGTH)); doc.add(new StringField(indexFieldName, truncatedValue, stored ? Store.YES : Store.NO)); } if (isSorted() && values.length == 1) { Preconditions.checkArgument(sortedValueType == SearchFieldSorting.FieldType.STRING); doc.add(new SortedDocValuesField(indexFieldName, new BytesRef(values[0]))); } }
@Override public void run() { try { for (int i = 0; i < 10000; ++i) { final Document document = new Document(); final String key = "key" + i; final String val = "value" + i; document.add(new StringField(key, val, Field.Store.YES)); document.add(new SortedDocValuesField(key, new BytesRef(val.getBytes()))); index.add(document); data.put(key, val); sleep(1); } } catch (InterruptedException e) { } }
/** * Initializes profanity set. * * @param dictFilePath * dictionary file path */ private void initializeProfanitySet(String dictFilePath) { if (dictFilePath != null) { File file = new File(dictFilePath); if (file.exists() && file.isFile()) { try { IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, analyzer); IndexWriter indexWriter = new IndexWriter(directory, config); BufferedReader reader = new BufferedReader(new FileReader(file)); Set<String> bannedWords = new HashSet<String>(); String line = null; while ((line = reader.readLine()) != null) { bannedWords.add(line.trim()); Document doc = new Document(); doc.add(new StringField(LUCENE_FIELD_NAME, line, Store.NO)); indexWriter.addDocument(doc); } this.bannedWords = bannedWords; indexWriter.close(); reader.close(); } catch (Exception ex) { LOG.error("Error reading file", ex); } } } }
/** * initialzie field list * @param target target class */ @SuppressWarnings("unchecked") protected void initFieldTypes(Class<T> target) { this.target = target; List<String> fieldNames = new ArrayList<String>(); List<Class<? extends Field>> fieldTypes = new ArrayList<Class<? extends Field>>(); List<Store> stores = new ArrayList<Store>(); List<Boolean> isMultiples = new ArrayList<Boolean>(); initList("", target, fieldNames, fieldTypes, stores, isMultiples, null); this.fieldNames = fieldNames.toArray(new String[0]); this.fieldTypes = fieldTypes.toArray(new Class[0]); this.stores = stores.toArray(new Store[0]); this.isMultiples = isMultiples.toArray(new Boolean[0]); this.fieldTypeEnums = new FieldEnum[fieldTypes.size()]; for(int i=0;i<fieldTypes.size();i++) { this.fieldTypeEnums[i] = FieldEnum.valueOf(fieldTypes.get(i).getSimpleName()); } this.fieldsIndex = new HashMap<String, Integer>(); for(int i=0;i<fieldNames.size();i++) { this.fieldsIndex.put(fieldNames.get(i), i); } }
/** Build the example index. */ private void index() throws IOException { IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER, new WhitespaceAnalyzer())); // Writes facet ords to a separate directory from the main index DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); Document doc = new Document(); doc.add(new TextField("c", "foo bar", Store.NO)); doc.add(new NumericDocValuesField("popularity", 5L)); doc.add(new FacetField("A", "B")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new TextField("c", "foo foo bar", Store.NO)); doc.add(new NumericDocValuesField("popularity", 3L)); doc.add(new FacetField("A", "C")); indexWriter.addDocument(config.build(taxoWriter, doc)); indexWriter.close(); taxoWriter.close(); }
public void testIndexedBit() throws Exception { Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir); Document doc = new Document(); FieldType onlyStored = new FieldType(); onlyStored.setStored(true); doc.add(new Field("field", "value", onlyStored)); doc.add(new StringField("field2", "value", Field.Store.YES)); w.addDocument(doc); IndexReader r = w.getReader(); w.close(); assertFalse(r.document(0).getField("field").fieldType().indexed()); assertTrue(r.document(0).getField("field2").fieldType().indexed()); r.close(); dir.close(); }
public void testBulkMergeWithDeletes() throws IOException { final int numDocs = atLeast(200); Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE)); for (int i = 0; i < numDocs; ++i) { Document doc = new Document(); doc.add(new StringField("id", Integer.toString(i), Store.YES)); doc.add(new StoredField("f", TestUtil.randomSimpleString(random()))); w.addDocument(doc); } final int deleteCount = TestUtil.nextInt(random(), 5, numDocs); for (int i = 0; i < deleteCount; ++i) { final int id = random().nextInt(numDocs); w.deleteDocuments(new Term("id", Integer.toString(id))); } w.commit(); w.close(); w = new RandomIndexWriter(random(), dir); w.forceMerge(TestUtil.nextInt(random(), 1, 3)); w.commit(); w.close(); TestUtil.checkIndex(dir); dir.close(); }
public void testDisableImpersonation() throws Exception { Codec[] oldCodecs = new Codec[] { new Lucene40RWCodec(), new Lucene41RWCodec(), new Lucene42RWCodec() }; Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); conf.setCodec(oldCodecs[random().nextInt(oldCodecs.length)]); IndexWriter writer = new IndexWriter(dir, conf); Document doc = new Document(); doc.add(new StringField("f", "bar", Store.YES)); doc.add(new NumericDocValuesField("n", 18L)); writer.addDocument(doc); OLD_FORMAT_IMPERSONATION_IS_ACTIVE = false; try { writer.close(); fail("should not have succeeded to impersonate an old format!"); } catch (UnsupportedOperationException e) { writer.rollback(); } finally { OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; } dir.close(); }
public void testUpdateSameDocMultipleTimes() throws Exception { Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); IndexWriter writer = new IndexWriter(dir, conf); Document doc = new Document(); doc.add(new StringField("key", "doc", Store.NO)); doc.add(new BinaryDocValuesField("bdv", toBytes(5L))); writer.addDocument(doc); // flushed document writer.commit(); writer.addDocument(doc); // in-memory document writer.updateBinaryDocValue(new Term("key", "doc"), "bdv", toBytes(17L)); // update existing field writer.updateBinaryDocValue(new Term("key", "doc"), "bdv", toBytes(3L)); // update existing field 2nd time in this commit writer.close(); final DirectoryReader reader = DirectoryReader.open(dir); final AtomicReader r = SlowCompositeReaderWrapper.wrap(reader); BinaryDocValues bdv = r.getBinaryDocValues("bdv"); for (int i = 0; i < r.maxDoc(); i++) { assertEquals(3, getValue(bdv, i)); } reader.close(); dir.close(); }
public void testUpdateBinaryDVFieldWithSameNameAsPostingField() throws Exception { // this used to fail because FieldInfos.Builder neglected to update // globalFieldMaps.docValueTypes map Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); IndexWriter writer = new IndexWriter(dir, conf); Document doc = new Document(); doc.add(new StringField("f", "mock-value", Store.NO)); doc.add(new BinaryDocValuesField("f", toBytes(5L))); writer.addDocument(doc); writer.commit(); writer.updateBinaryDocValue(new Term("f", "mock-value"), "f", toBytes(17L)); writer.close(); DirectoryReader r = DirectoryReader.open(dir); BinaryDocValues bdv = r.leaves().get(0).reader().getBinaryDocValues("f"); assertEquals(17, getValue(bdv, 0)); r.close(); dir.close(); }
public void testUpdatesOrder() throws Exception { Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); IndexWriter writer = new IndexWriter(dir, conf); Document doc = new Document(); doc.add(new StringField("upd", "t1", Store.NO)); doc.add(new StringField("upd", "t2", Store.NO)); doc.add(new BinaryDocValuesField("f1", toBytes(1L))); doc.add(new BinaryDocValuesField("f2", toBytes(1L))); writer.addDocument(doc); writer.updateBinaryDocValue(new Term("upd", "t1"), "f1", toBytes(2L)); // update f1 to 2 writer.updateBinaryDocValue(new Term("upd", "t1"), "f2", toBytes(2L)); // update f2 to 2 writer.updateBinaryDocValue(new Term("upd", "t2"), "f1", toBytes(3L)); // update f1 to 3 writer.updateBinaryDocValue(new Term("upd", "t2"), "f2", toBytes(3L)); // update f2 to 3 writer.updateBinaryDocValue(new Term("upd", "t1"), "f1", toBytes(4L)); // update f1 to 4 (but not f2) writer.close(); DirectoryReader reader = DirectoryReader.open(dir); assertEquals(4, getValue(reader.leaves().get(0).reader().getBinaryDocValues("f1"), 0)); assertEquals(3, getValue(reader.leaves().get(0).reader().getBinaryDocValues("f2"), 0)); reader.close(); dir.close(); }
public void testUpdateAllDeletedSegment() throws Exception { Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); IndexWriter writer = new IndexWriter(dir, conf); Document doc = new Document(); doc.add(new StringField("id", "doc", Store.NO)); doc.add(new BinaryDocValuesField("f1", toBytes(1L))); writer.addDocument(doc); writer.addDocument(doc); writer.commit(); writer.deleteDocuments(new Term("id", "doc")); // delete all docs in the first segment writer.addDocument(doc); writer.updateBinaryDocValue(new Term("id", "doc"), "f1", toBytes(2L)); writer.close(); DirectoryReader reader = DirectoryReader.open(dir); assertEquals(1, reader.leaves().size()); assertEquals(2L, getValue(reader.leaves().get(0).reader().getBinaryDocValues("f1"), 0)); reader.close(); dir.close(); }
public void testUpdateTwoNonexistingTerms() throws Exception { Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); IndexWriter writer = new IndexWriter(dir, conf); Document doc = new Document(); doc.add(new StringField("id", "doc", Store.NO)); doc.add(new BinaryDocValuesField("f1", toBytes(1L))); writer.addDocument(doc); // update w/ multiple nonexisting terms in same field writer.updateBinaryDocValue(new Term("c", "foo"), "f1", toBytes(2L)); writer.updateBinaryDocValue(new Term("c", "bar"), "f1", toBytes(2L)); writer.close(); DirectoryReader reader = DirectoryReader.open(dir); assertEquals(1, reader.leaves().size()); assertEquals(1L, getValue(reader.leaves().get(0).reader().getBinaryDocValues("f1"), 0)); reader.close(); dir.close(); }
private static Directory createIndex () throws IOException { Directory directory = new RAMDirectory(); IndexWriter writer = getWriter(directory); for (int i = 0; i < 10; i++) { Document doc = new Document(); doc.add(new StringField("foo", String.valueOf(i), Store.YES)); writer.addDocument(doc); } writer.commit(); writer.close(); return directory; }
@Test public void testUpdateSameDocMultipleTimes() throws Exception { Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); IndexWriter writer = new IndexWriter(dir, conf); Document doc = new Document(); doc.add(new StringField("key", "doc", Store.NO)); doc.add(new NumericDocValuesField("ndv", 5)); writer.addDocument(doc); // flushed document writer.commit(); writer.addDocument(doc); // in-memory document writer.updateNumericDocValue(new Term("key", "doc"), "ndv", 17L); // update existing field writer.updateNumericDocValue(new Term("key", "doc"), "ndv", 3L); // update existing field 2nd time in this commit writer.close(); final DirectoryReader reader = DirectoryReader.open(dir); final AtomicReader r = SlowCompositeReaderWrapper.wrap(reader); NumericDocValues ndv = r.getNumericDocValues("ndv"); for (int i = 0; i < r.maxDoc(); i++) { assertEquals(3, ndv.get(i)); } reader.close(); dir.close(); }
@Test public void testUpdateNumericDVFieldWithSameNameAsPostingField() throws Exception { // this used to fail because FieldInfos.Builder neglected to update // globalFieldMaps.docValueTypes map Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); IndexWriter writer = new IndexWriter(dir, conf); Document doc = new Document(); doc.add(new StringField("f", "mock-value", Store.NO)); doc.add(new NumericDocValuesField("f", 5)); writer.addDocument(doc); writer.commit(); writer.updateNumericDocValue(new Term("f", "mock-value"), "f", 17L); writer.close(); DirectoryReader r = DirectoryReader.open(dir); NumericDocValues ndv = r.leaves().get(0).reader().getNumericDocValues("f"); assertEquals(17, ndv.get(0)); r.close(); dir.close(); }
@Test public void testUpdatesOrder() throws Exception { Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); IndexWriter writer = new IndexWriter(dir, conf); Document doc = new Document(); doc.add(new StringField("upd", "t1", Store.NO)); doc.add(new StringField("upd", "t2", Store.NO)); doc.add(new NumericDocValuesField("f1", 1L)); doc.add(new NumericDocValuesField("f2", 1L)); writer.addDocument(doc); writer.updateNumericDocValue(new Term("upd", "t1"), "f1", 2L); // update f1 to 2 writer.updateNumericDocValue(new Term("upd", "t1"), "f2", 2L); // update f2 to 2 writer.updateNumericDocValue(new Term("upd", "t2"), "f1", 3L); // update f1 to 3 writer.updateNumericDocValue(new Term("upd", "t2"), "f2", 3L); // update f2 to 3 writer.updateNumericDocValue(new Term("upd", "t1"), "f1", 4L); // update f1 to 4 (but not f2) writer.close(); DirectoryReader reader = DirectoryReader.open(dir); assertEquals(4, reader.leaves().get(0).reader().getNumericDocValues("f1").get(0)); assertEquals(3, reader.leaves().get(0).reader().getNumericDocValues("f2").get(0)); reader.close(); dir.close(); }
@Test public void testUpdateAllDeletedSegment() throws Exception { Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); IndexWriter writer = new IndexWriter(dir, conf); Document doc = new Document(); doc.add(new StringField("id", "doc", Store.NO)); doc.add(new NumericDocValuesField("f1", 1L)); writer.addDocument(doc); writer.addDocument(doc); writer.commit(); writer.deleteDocuments(new Term("id", "doc")); // delete all docs in the first segment writer.addDocument(doc); writer.updateNumericDocValue(new Term("id", "doc"), "f1", 2L); writer.close(); DirectoryReader reader = DirectoryReader.open(dir); assertEquals(1, reader.leaves().size()); assertEquals(2L, reader.leaves().get(0).reader().getNumericDocValues("f1").get(0)); reader.close(); dir.close(); }
@Test public void testUpdateTwoNonexistingTerms() throws Exception { Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); IndexWriter writer = new IndexWriter(dir, conf); Document doc = new Document(); doc.add(new StringField("id", "doc", Store.NO)); doc.add(new NumericDocValuesField("f1", 1L)); writer.addDocument(doc); // update w/ multiple nonexisting terms in same field writer.updateNumericDocValue(new Term("c", "foo"), "f1", 2L); writer.updateNumericDocValue(new Term("c", "bar"), "f1", 2L); writer.close(); DirectoryReader reader = DirectoryReader.open(dir); assertEquals(1, reader.leaves().size()); assertEquals(1L, reader.leaves().get(0).reader().getNumericDocValues("f1").get(0)); reader.close(); dir.close(); }
public void testMixedTypesAfterReopenAppend2() throws IOException { assumeTrue("codec does not support SORTED_SET", defaultCodecSupportsSortedSet()); Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))) ; Document doc = new Document(); doc.add(new SortedSetDocValuesField("foo", new BytesRef("foo"))); w.addDocument(doc); w.close(); doc = new Document(); w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); doc.add(new StringField("foo", "bar", Field.Store.NO)); doc.add(new BinaryDocValuesField("foo", new BytesRef("foo"))); try { // NOTE: this case follows a different code path inside // DefaultIndexingChain/FieldInfos, because the field (foo) // is first added without DocValues: w.addDocument(doc); fail("did not get expected exception"); } catch (IllegalArgumentException iae) { // expected } w.forceMerge(1); w.close(); dir.close(); }
public void testDocsWithField() throws Exception { Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); IndexWriter writer = new IndexWriter(dir, conf); Document doc = new Document(); doc.add(new NumericDocValuesField("dv", 0L)); writer.addDocument(doc); doc = new Document(); doc.add(new TextField("dv", "some text", Field.Store.NO)); doc.add(new NumericDocValuesField("dv", 0L)); writer.addDocument(doc); DirectoryReader r = writer.getReader(); writer.close(); AtomicReader subR = r.leaves().get(0).reader(); assertEquals(2, subR.numDocs()); Bits bits = FieldCache.DEFAULT.getDocsWithField(subR, "dv"); assertTrue(bits.get(0)); assertTrue(bits.get(1)); r.close(); dir.close(); }
public void testSameFieldNameForPostingAndDocValue() throws Exception { // LUCENE-5192: FieldInfos.Builder neglected to update // globalFieldNumbers.docValuesType map if the field existed, resulting in // potentially adding the same field with different DV types. Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); IndexWriter writer = new IndexWriter(dir, conf); Document doc = new Document(); doc.add(new StringField("f", "mock-value", Store.NO)); doc.add(new NumericDocValuesField("f", 5)); writer.addDocument(doc); writer.commit(); doc = new Document(); doc.add(new BinaryDocValuesField("f", new BytesRef("mock"))); try { writer.addDocument(doc); fail("should not have succeeded to add a field with different DV type than what already exists"); } catch (IllegalArgumentException e) { writer.rollback(); } dir.close(); }
public static Document newsToDocument(NewsDetailModel news){ Document document = new Document(); StringField idField = new StringField("id",news.getId(),Store.YES); StringField urlField = new StringField("url",news.getUrl(),Store.YES); StringField titleField = new StringField("title",news.getTitle(),Store.YES); StringField contentField = new StringField("content",news.getContent(),Store.YES); StringField timeField = new StringField("time",news.getTime(),Store.YES); document.add(idField); document.add(urlField); document.add(titleField); document.add(contentField); document.add(timeField); return document; }
public void index(Item item) throws IOException { String id = item.getId(); String text = item.getText(); long publicationTIme = item.getPublicationTime(); Document document = new Document(); Field idField = new StringField("id", id, Store.YES); document.add(idField); FieldType fieldType = new FieldType(); fieldType.setStored(true); fieldType.setIndexed(true); fieldType.setStoreTermVectors(true); document.add(new Field("text", text, fieldType)); document.add(new LongField("publicationTIme", publicationTIme, LongField.TYPE_STORED)); if(iwriter != null) { iwriter.addDocument(document); } }
private void addRule(Annotation at, Annotation ah, double prob) { Document doc = new Document(); for (String u : rulesSource.uses()) { String v1 = IndexRulesSource.value(u, at); String v2 = IndexRulesSource.value(u, ah); doc.add(new StringField(u + "-1", v1, Store.YES)); doc.add(new StringField(u + "-2", v2, Store.YES)); } doc.add(new StringField(IndexRulesSource.VALUE_FIELD, "" + prob, Store.YES)); synchronized (writer) { try { writer.addDocument(doc); } catch (IOException e) { log.debug(e); } } }
public void generateIndex(String path, List<AnnotatedEntailmentPair> aps) throws Exception { log.info("Rules extraction started."); IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_47, new WhitespaceAnalyzer(Version.LUCENE_47)); conf.setOpenMode(OpenMode.CREATE); writer = new IndexWriter(FSDirectory.open(new File(path)), conf); Document doc = new Document(); doc.add(new StringField(IndexRulesSource.TERMDOC_FIELD, "true", Store.YES)); for (String u : rulesSource.uses()) doc.add(new StringField(IndexRulesSource.USES_FIELD, u, Store.YES)); writer.addDocument(doc); start(aps.iterator()); writer.waitForMerges(); writer.close(true); log.info(cache.size() + " rules extracted!"); }
private static void addTweetToIndex(Tweet headline, IndexWriter writer) throws IOException { Document doc = new Document(); Field timestamp = new LongField(FieldNames.TIMESTAMP.name(), headline.getTimestamp().getTime(), Field.Store.YES); Field tweetID = new LongField(FieldNames.TWEETID.name(), headline.getTweetID(), Field.Store.NO); Field userName = new StringField(FieldNames.USERNAME.name(), headline.getUserName(), Store.YES); Field userID = new LongField(FieldNames.USERID.name(), headline.getUserID(), Field.Store.NO); Field tweet = new TextField(FieldNames.TEXT.name(), headline.getText(), Field.Store.YES); doc.add(tweet); doc.add(tweetID); doc.add(userName); doc.add(userID); doc.add(timestamp); writer.addDocument(doc); }
private static void createDoc_Tour( final IndexWriter indexWriter, final long tourId, final String title, final String description, final long time) throws IOException { final Document doc = new Document(); doc.add(new IntField(SEARCH_FIELD_DOC_SOURCE, DOC_SOURCE_TOUR, Store.YES)); doc.add(new LongField(SEARCH_FIELD_TOUR_ID, tourId, Store.YES)); doc.add(new LongField(SEARCH_FIELD_TIME, time, createFieldType_Long())); if (title != null) { doc.add(new Field(SEARCH_FIELD_TITLE, title, createFieldType_Text())); } if (description != null) { doc.add(new Field(SEARCH_FIELD_DESCRIPTION, description, createFieldType_Text())); } indexWriter.addDocument(doc); }
public void addTermToIndex(String term, String termType, Concept concept) { if (writer == null) throw new RuntimeException("Indexed not open for writing"); try { Document document = new Document(); document.add(new StringField("TYPE", CONCEPT_TYPE_STRING, Store.YES)); document.add(new Field("TERM", term, textVectorField)); document.add(new StringField("CONCEPT_ID", Integer.toString(concept.conceptId), Store.YES)); document.add(new StringField("DOMAIN_ID", concept.domainId, Store.YES)); document.add(new StringField("VOCABULARY_ID", concept.vocabularyId, Store.YES)); document.add(new StringField("CONCEPT_CLASS_ID", concept.conceptClassId, Store.YES)); document.add(new StringField("STANDARD_CONCEPT", concept.standardConcept, Store.YES)); document.add(new StringField("TERM_TYPE", termType, Store.YES)); writer.addDocument(document); } catch (Exception e) { throw new RuntimeException(e); } }
@SuppressWarnings("resource") public static void main(String[] args) throws Exception { final Directory dir = new RAMDirectory(); final StandardAnalyzer analyzer = new StandardAnalyzer(); final IndexWriterConfig conf = new IndexWriterConfig(analyzer); final IndexWriter writer = new IndexWriter(dir, conf); final Document doc = new Document(); doc.add(new TextField("f", "Russia\u2013United States relations", Store.YES)); writer.addDocument(doc); writer.close(); final DirectoryReader reader = DirectoryReader.open(dir); final IndexSearcher searcher = new IndexSearcher(reader); final QueryParser qp = new QueryParser("f", analyzer); search(searcher, qp, "Russia United States relations"); search(searcher, qp, "\"Russia United states relations\""); search(searcher, qp, "\"Russia-United states relations\""); search(searcher, qp, "\"Russia\u2013United states relations\""); reader.close(); dir.close(); }
@Before public void setupIndex() throws IOException { dirUnderTest = newDirectory(); indexWriterUnderTest = new RandomIndexWriter(random(), dirUnderTest); String[] docs = new String[] { "how now brown cow", "woc", "nworb", "won woh nworb" }; for (int i = 0; i < docs.length; i++) { Document doc = new Document(); doc.add(newStringField("id", "" + i, Field.Store.YES)); doc.add(newField("field", docs[i], Field.Store.NO)); indexWriterUnderTest.addDocument(doc); } indexWriterUnderTest.commit(); indexReaderUnderTest = indexWriterUnderTest.getReader(); searcherUnderTest = newSearcher(indexReaderUnderTest); }
/** * 将一个pojo类转换为lucene的document文档<br> * 遵守一一对应原则 * * @param t * @return */ public static <T> Document pojo2Document(T t) { Method[] methods = t.getClass().getDeclaredMethods(); Integer methodsLength = methods.length; String methodName = null; String menberName = null; Integer set = null; Document doc = new Document(); try { for (int i = 0; i < methodsLength; i++) { methodName = methods[i].getName(); if (methodName.startsWith("get") || methodName.startsWith("is")) { set = methodName.startsWith("get") ? 3 : 2; menberName = Introspector.decapitalize(methodName .substring(set, methodName.length())); doc.add(new Field(menberName, String.valueOf(methods[i].invoke(t)), Store.YES, Index.ANALYZED)); } } } catch (Exception e) { e.printStackTrace(); } return doc; }
@Override public final Document getDocument() { final Document d = new Document(); d.add(ConcreteLuceneConstants.getCommunicationUUIDField(this.getUUID())); d.add(ConcreteLuceneConstants.getCommunicationIDField(this.getCommunicationID())); d.add(ConcreteLuceneConstants.getSentenceUUIDField(this.getSentence().getUUID())); this.getAuthorId().ifPresent(aid -> { d.add(new StringField(ConcreteLuceneConstants.AUTHOR_ID_FIELD, aid.toString(), Store.NO)); }); this.getTextSpan() .map(MiscTextSpan::getText) .ifPresent(txt -> d.add(new Field(ConcreteLuceneConstants.TEXT_FIELD, txt.getContent(), ConcreteLuceneConstants.getContentFieldType()))); return d; }
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { int pmid = BlueCasUtil.getHeaderIntDocId(jCas); if (!BlueCasUtil.isEmptyText(jCas)) { // System.out.println("indexing:: " + pmid); Document doc = new Document(); doc.add(new IntField(PMID_FIELD, pmid, Store.YES)); doc.add(new TextField(CONTENT_FIELD, jCas.getDocumentText(), Store.YES)); doc.add(new TextField(TITLE_FIELD, getTitle(jCas), Store.YES)); try { indexWriter.addDocument(doc); } catch (IOException e) { throw new AnalysisEngineProcessException(e); } } }
public static List<List<Field>> getDocs(Row row, FieldManager fieldManager) throws IOException { List<Record> records = row.records; if (records == null) { return null; } int size = records.size(); if (size == 0) { return null; } final String rowId = row.id; List<List<Field>> docs = new ArrayList<List<Field>>(size); for (int i = 0; i < size; i++) { Record record = records.get(i); List<Field> fields = getDoc(fieldManager, rowId, record); docs.add(fields); } List<Field> doc = docs.get(0); doc.add(new StringField(BlurConstants.PRIME_DOC, BlurConstants.PRIME_DOC_VALUE, Store.NO)); return docs; }
private IndexReader getReader() throws CorruptIndexException, LockObtainFailedException, IOException { RAMDirectory directory = new RAMDirectory(); IndexWriterConfig conf = new IndexWriterConfig(LUCENE_VERSION, new KeywordAnalyzer()); IndexWriter writer = new IndexWriter(directory, conf); Document doc = new Document(); doc.add(new StringField(BlurConstants.PRIME_DOC, BlurConstants.PRIME_DOC_VALUE, Store.NO)); doc.add(new StringField("a", "b", Store.YES)); doc.add(new StringField("family", "f1", Store.YES)); Document doc1 = new Document(); doc.add(new StringField("a", "b", Store.YES)); writer.addDocument(doc); writer.addDocument(doc1); writer.close(); return DirectoryReader.open(directory); }