static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { try (InputStream stream = Files.newInputStream(file)) { Document doc = new Document(); Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField); doc.add(new LongPoint("modified", lastModified)); doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { System.out.println("adding " + file); writer.addDocument(doc); } else { System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } }
@BeforeClass public static void setup() throws IOException { dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir); final int numDocs = TestUtil.nextInt(random(), 1, 20); for (int i = 0; i < numDocs; ++i) { final int numHoles = random().nextInt(5); for (int j = 0; j < numHoles; ++j) { w.addDocument(new Document()); } Document doc = new Document(); doc.add(new StringField("foo", "bar", Store.NO)); w.addDocument(doc); } reader = w.getReader(); w.close(); Engine.Searcher engineSearcher = new Engine.Searcher("test", new IndexSearcher(reader)); searcher = new ContextIndexSearcher(engineSearcher, IndexSearcher.getDefaultQueryCache(), MAYBE_CACHE_POLICY); }
public void testEmpty() throws Exception { Document d = new Document(); d.add(new StringField("field", "value", Field.Store.NO)); writer.addDocument(d); refreshReader(); IndexFieldData fieldData = getForField("non_existing_field"); int max = randomInt(7); for (LeafReaderContext readerContext : readerContexts) { AtomicFieldData previous = null; for (int i = 0; i < max; i++) { AtomicFieldData current = fieldData.load(readerContext); assertThat(current.ramBytesUsed(), equalTo(0L)); if (previous != null) { assertThat(current, not(sameInstance(previous))); } previous = current; } } }
public void testCanOpenIndex() throws IOException { final ShardId shardId = new ShardId("index", "_na_", 1); IndexWriterConfig iwc = newIndexWriterConfig(); Path tempDir = createTempDir(); final BaseDirectoryWrapper dir = newFSDirectory(tempDir); assertFalse(Store.canOpenIndex(logger, tempDir, shardId, (id, l) -> new DummyShardLock(id))); IndexWriter writer = new IndexWriter(dir, iwc); Document doc = new Document(); doc.add(new StringField("id", "1", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); writer.commit(); writer.close(); assertTrue(Store.canOpenIndex(logger, tempDir, shardId, (id, l) -> new DummyShardLock(id))); DirectoryService directoryService = new DirectoryService(shardId, INDEX_SETTINGS) { @Override public Directory newDirectory() throws IOException { return dir; } }; Store store = new Store(shardId, INDEX_SETTINGS, directoryService, new DummyShardLock(shardId)); store.markStoreCorrupted(new CorruptIndexException("foo", "bar")); assertFalse(Store.canOpenIndex(logger, tempDir, shardId, (id, l) -> new DummyShardLock(id))); store.close(); }
@Override public void createIndex(NitriteId id, String field, String text) { try { Document document = new Document(); String jsonId = keySerializer.writeValueAsString(id); Field contentField = new TextField(field, text, Field.Store.NO); Field idField = new StringField(CONTENT_ID, jsonId, Field.Store.YES); document.add(idField); document.add(contentField); synchronized (this) { indexWriter.addDocument(document); commit(); } } catch (IOException ioe) { throw new IndexingException(errorMessage( "could not write full-text index data for " + text, 0), ioe); } catch (VirtualMachineError vme) { handleVirtualMachineError(vme); } }
@Override public void updateIndex(NitriteId id, String field, String text) { try { Document document = new Document(); String jsonId = keySerializer.writeValueAsString(id); Field contentField = new TextField(field, text, Field.Store.NO); Field idField = new StringField(CONTENT_ID, jsonId, Field.Store.YES); document.add(idField); document.add(contentField); synchronized (this) { indexWriter.updateDocument(new Term(CONTENT_ID, jsonId), document); commit(); } } catch (IOException ioe) { throw new IndexingException(errorMessage( "could not update full-text index for " + text, 0), ioe); } catch (VirtualMachineError vme) { handleVirtualMachineError(vme); } }
@Override public void startElement(String u, String l, String qName, Attributes attributes) throws SAXException { if (qName.equals("GGS:SpanAnnotation") || qName.equals("GGS:Annotation")) { Document doc = new Document(); for (int i = 0; i < attributes.getLength(); i++) { doc.add(new StringField(attributes.getLocalName(i), attributes.getValue(i), Field.Store.YES)); } try { annotationsWriter.addDocument(doc); } catch (IOException e) { e.printStackTrace(); } return; } if (qName.equals(sent) || nodesStack.size() > 0) { nodesStack.push(qName); segmentStartsStack.push(totalWords); if (attributes != null && attributes.getLength() > 0) { attributesStack.push(new AttributesImpl(attributes)); } else { attributesStack.push(new AttributesImpl()); } sb = new StringBuilder(); } }
private Document noteToDocument(Note note, String noteHtmlContents) { Document d = new Document (); String id = note.getId (); Project project = note.getProject(); String projectId = project.getID(); String projectName = project.getTitle(); String title = note.getTitle (); String date = note.getDate() != null ? new SimpleDateFormat ("yyyy-MM-dd").format(note.getDate().getDate()) : null; d.add (new StringField ("id", id, Field.Store.YES)); d.add (new StringField ("project_id", projectId, Field.Store.YES)); d.add (new StoredField ("project_name", projectName)); d.add (new TextField ("title", title, Field.Store.YES)); d.add (new TextField ("title_cs", title, Field.Store.YES)); d.add (new TextField ("date", date != null ? date : "", Field.Store.YES)); d.add (new TextField ("body", noteHtmlContents, Field.Store.YES)); return d; }
private Document eventToDocument(Event newEvent) { Document d = new Document (); String eventId = newEvent.getId (); String eventText = newEvent.getText(); String eventStartDate = newEvent.getStartDate() != null ? new SimpleDateFormat ("yyyy-MM-dd").format(newEvent.getStartDate().getDate()) : null; String eventStartTime = newEvent.getTimeString(); if (eventStartDate != null) eventStartTime = eventStartDate + " @ " + eventStartTime; d.add (new StringField ("id", eventId, Field.Store.YES)); d.add (new TextField ("text", eventText, Field.Store.YES)); d.add (new TextField ("text_cs", eventText, Field.Store.YES)); d.add (new StoredField ("original_start_date", eventStartTime != null ? eventStartTime : "")); return d; }
void addToDoc(Document doc, String... values){ Preconditions.checkArgument(valueType == String.class); if (isSorted()) { Preconditions.checkArgument(values.length < 2, "sorted fields cannot have multiple values"); } // add distinct elements to doc final Iterable<String> nonNull = FluentIterable.from(Arrays.asList(values)) .filter(new Predicate<String>() { @Override public boolean apply(@Nullable final String input) { return input != null; } }); for (final String value : ImmutableSet.copyOf(nonNull)) { final String truncatedValue = StringUtils.abbreviate(value, MAX_STRING_LENGTH); doc.add(new StringField(indexFieldName, truncatedValue, stored ? Store.YES : Store.NO)); } if (isSorted() && values.length == 1) { Preconditions.checkArgument(sortedValueType == SearchFieldSorting.FieldType.STRING); doc.add(new SortedDocValuesField(indexFieldName, new BytesRef(values[0]))); } }
void addToDoc(Document doc, byte[]... values){ Preconditions.checkArgument(valueType == String.class); if (isSorted()) { Preconditions.checkArgument(values.length < 2, "sorted fields cannot have multiple values"); } // add distinct elements to doc final Iterable<byte[]> nonNull = FluentIterable.from(Arrays.asList(values)) .filter(new Predicate<byte[]>() { @Override public boolean apply(@Nullable final byte[] input) { return input != null; } }); for (final byte[] value : ImmutableSet.copyOf(nonNull)) { final BytesRef truncatedValue = new BytesRef(value,0, Math.min(value.length, MAX_STRING_LENGTH)); doc.add(new StringField(indexFieldName, truncatedValue, stored ? Store.YES : Store.NO)); } if (isSorted() && values.length == 1) { Preconditions.checkArgument(sortedValueType == SearchFieldSorting.FieldType.STRING); doc.add(new SortedDocValuesField(indexFieldName, new BytesRef(values[0]))); } }
@Override public void run() { try { for (int i = 0; i < 10000; ++i) { final Document document = new Document(); final String key = "key" + i; final String val = "value" + i; document.add(new StringField(key, val, Field.Store.YES)); document.add(new SortedDocValuesField(key, new BytesRef(val.getBytes()))); index.add(document); data.put(key, val); sleep(1); } } catch (InterruptedException e) { } }
public void addIndex(UUser user) throws Exception { IndexWriter writer = getWriter(); Document doc = new Document(); /* * yes是会将数据存进索引,如果查询结果中需要将记录显示出来就要存进去,如果查询结果 * 只是显示标题之类的就可以不用存,而且内容过长不建议存进去 * 使用TextField类是可以用于查询的。 */ try { doc.add(new StringField("userid", String.valueOf(user.getId()), Field.Store.YES)); doc.add(new TextField("username", user.getUsername(), Field.Store.YES)); writer.addDocument(doc); } catch (Exception e) { e.printStackTrace(); throw e; } finally { writer.close(); } }
/** * Initializes profanity set. * * @param dictFilePath * dictionary file path */ private void initializeProfanitySet(String dictFilePath) { if (dictFilePath != null) { File file = new File(dictFilePath); if (file.exists() && file.isFile()) { try { IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, analyzer); IndexWriter indexWriter = new IndexWriter(directory, config); BufferedReader reader = new BufferedReader(new FileReader(file)); Set<String> bannedWords = new HashSet<String>(); String line = null; while ((line = reader.readLine()) != null) { bannedWords.add(line.trim()); Document doc = new Document(); doc.add(new StringField(LUCENE_FIELD_NAME, line, Store.NO)); indexWriter.addDocument(doc); } this.bannedWords = bannedWords; indexWriter.close(); reader.close(); } catch (Exception ex) { LOG.error("Error reading file", ex); } } } }
public InMemoryIndex(Map<String,String> id2Text){ Analyzer analyzer = new EnglishAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); try { IndexWriter writer = new IndexWriter(directory, iwc); for (String id:id2Text.keySet()) { Document doc=new Document(); doc.add(new StringField("id", id, Field.Store.YES)); doc.add(new TextField("content", id2Text.get(id), Field.Store.YES)); writer.addDocument(doc); } writer.close(); } catch (IOException e) { e.printStackTrace(); } }
@Override public Document transform(final Example input) throws TransformException { final Document doc = new Document(); doc.add(new Field(ExampleField.ID.getName(), input.getId(), StringField.TYPE_STORED)); doc.add(new SortedDocValuesField(ExampleField.ID.getName(), new BytesRef(input.getId()))); doc.add(new Field(ExampleField.TITLE.getName(), input.getTitle(), TextField.TYPE_STORED)); doc.add(new Field(ExampleField.BODY.getName(), input.getBody(), TextField.TYPE_STORED)); doc.add(new Field(ExampleField.COLOR.getName(), input.getColor(), StringField.TYPE_STORED)); doc.add(new SortedSetDocValuesFacetField(ExampleField.COLOR.getName(), input.getColor())); final Date createDate = input.getCreateDate(); doc.add(new NumericDocValuesField(ExampleField.CREATE_DATE.getName(), createDate.getTime())); doc.add(new StoredField(ExampleField.CREATE_DATE.getName(), createDate.getTime())); return doc; }
protected void addContentField(int pageNo, JRPrintText element) throws IOException { String allText; JRStyledText styledText = getStyledText(element); if (styledText == null) { allText = ""; } else { allText = styledText.getText(); } if (allText != null && allText.length() > 0) { Field tf = new Field(CONTENT_FIELD, allText, fieldType); Document doc = new Document(); doc.add(new IntField("pageNo", pageNo, Field.Store.YES)); PrintElementId peid = PrintElementId.forElement(element); doc.add(new StringField("uid", peid.toString(), Field.Store.YES)); displayTokens(allText, peid.toString()); doc.add(tf); writer.addDocument(doc); } }
private void initFields() { docnumField = new StringField(Lucene4IRConstants.FIELD_DOCNUM, "", Field.Store.YES); if(indexPositions){ titleField = new TermVectorEnabledTextField(Lucene4IRConstants.FIELD_TITLE, "", Field.Store.YES); textField = new TermVectorEnabledTextField(Lucene4IRConstants.FIELD_CONTENT, "", Field.Store.YES); allField = new TermVectorEnabledTextField(Lucene4IRConstants.FIELD_ALL, "", Field.Store.YES); urlField = new TermVectorEnabledTextField(Lucene4IRConstants.FIELD_URL, "", Field.Store.YES); dochdrField = new TermVectorEnabledTextField(Lucene4IRConstants.FIELD_DOCHDR, "", Field.Store.YES); } else { titleField = new TextField(Lucene4IRConstants.FIELD_TITLE, "", Field.Store.YES); textField = new TextField(Lucene4IRConstants.FIELD_CONTENT, "", Field.Store.YES); allField = new TextField(Lucene4IRConstants.FIELD_ALL, "", Field.Store.YES); urlField = new TextField(Lucene4IRConstants.FIELD_URL, "", Field.Store.YES); dochdrField = new TextField(Lucene4IRConstants.FIELD_DOCHDR, "", Field.Store.YES); } }
private void initFields() { docnumField = new StringField(Lucene4IRConstants.FIELD_DOCNUM, "", Field.Store.YES); pubdateField = new StringField(Lucene4IRConstants.FIELD_PUBDATE, "", Field.Store.YES); if(indexPositions){ titleField = new TermVectorEnabledTextField(Lucene4IRConstants.FIELD_TITLE, "", Field.Store.YES); textField = new TermVectorEnabledTextField(Lucene4IRConstants.FIELD_CONTENT, "", Field.Store.YES); allField = new TermVectorEnabledTextField(Lucene4IRConstants.FIELD_ALL, "", Field.Store.YES); sourceField = new TermVectorEnabledTextField(Lucene4IRConstants.FIELD_SOURCE, "", Field.Store.YES); } else { titleField = new TextField(Lucene4IRConstants.FIELD_TITLE, "", Field.Store.YES); textField = new TextField(Lucene4IRConstants.FIELD_CONTENT, "", Field.Store.YES); allField = new TextField(Lucene4IRConstants.FIELD_ALL, "", Field.Store.YES); sourceField = new TextField(Lucene4IRConstants.FIELD_SOURCE, "", Field.Store.YES); } }
public static void createIndex(List<Map<String, String>> list){ // deleteAll deleteAll(); // addDocument for (Map<String, String> searchDto: list) { Document doc = new Document(); for (Map.Entry<String, String> item: searchDto.entrySet()) { if (ExcelUtil.KEYWORDS.equals(item.getKey())) { doc.add(new TextField(item.getKey(), item.getValue(), Field.Store.YES)); } else { doc.add(new StringField(item.getKey(), item.getValue(), Field.Store.YES)); } } addDocument(doc); } }
List<Document> buildFragmentDocument(String title, String content, int fragmentSize){ List<Document> documents = new ArrayList<>(); int fragmentNum = content.length()/fragmentSize; for(int i = 0; i < fragmentNum; i++){ String fragment; if(i == fragmentNum - 1) fragment = content.substring(i*fragmentSize); else fragment = content.substring(i*fragmentSize, (1+i)*fragmentSize); TextField contentField = new TextField(Const.FIELD_CONTENT, separateWordsWithSpace.apply(fragment), Field.Store.YES); StringField titleField = new StringField(Const.FIELD_TITLE, title, Field.Store.YES); Document document = new Document(); document.add(titleField); document.add(contentField); documents.add(document); } return documents; }
public void addArtist(Artist artist) { Document document = new Document(); document.add(new StringField(ArtistField.ARTISTID.name(), artist.getArtistId(), Field.Store.YES)); document.add(new StringField(ArtistField.ARTISTNAME.name(), artist.getArtistName(), Field.Store.YES)); document.add( new StringField(ArtistField.ARTISTIMAGE.name(), nullIsBlank(artist.getArtistImage()), Field.Store.YES)); document .add(new StringField(ArtistField.BIOGRAPHY.name(), nullIsBlank(artist.getBiography()), Field.Store.YES)); document.add(new StringField(ArtistField.MEMBERS.name(), nullIsBlank(artist.getMembers()), Field.Store.YES)); try { artistWriter.addDocument(document); } catch (Exception e) { log.error("Unable to index artist - {}", artist.getArtistId()); } }
/** * If paragraph is not null, indexes code in the paragraph, otherwise indexes * the notebook name. * * @param id id of the document, different for Note name and paragraph * @param noteName name of the note * @param p paragraph * @return */ private Document newDocument(String id, String noteName, Paragraph p) { Document doc = new Document(); Field pathField = new StringField(ID_FIELD, id, Field.Store.YES); doc.add(pathField); doc.add(new StringField("title", noteName, Field.Store.YES)); if (null != p) { doc.add(new TextField(SEARCH_FIELD, p.getText(), Field.Store.YES)); Date date = p.getDateStarted() != null ? p.getDateStarted() : p.getDateCreated(); doc.add(new LongField("modified", date.getTime(), Field.Store.NO)); } else { doc.add(new TextField(SEARCH_FIELD, noteName, Field.Store.YES)); } return doc; }
public void indexWFMEntry(String word, long frequency) { // Create the document and fields only once, for no GC if (wfmEntry == null) { wfmEntry = new Document(); wordField = new StringField("key", word, Field.Store.NO); wfmEntry.add(wordField); freqField = new StoredField("frequency", frequency); wfmEntry.add(freqField); } else { wordField.setStringValue(word); freqField.setLongValue(frequency); } try { this.indexWriter.updateDocument(new Term("key", word), wfmEntry); } catch (IOException e) { logger.error("EXCEPTION caught while indexing document for wfm entry " + word + ":" + frequency); e.printStackTrace(); } }
private void addRule(Annotation at, Annotation ah, double prob) { Document doc = new Document(); for (String u : rulesSource.uses()) { String v1 = IndexRulesSource.value(u, at); String v2 = IndexRulesSource.value(u, ah); doc.add(new StringField(u + "-1", v1, Store.YES)); doc.add(new StringField(u + "-2", v2, Store.YES)); } doc.add(new StringField(IndexRulesSource.VALUE_FIELD, "" + prob, Store.YES)); synchronized (writer) { try { writer.addDocument(doc); } catch (IOException e) { log.debug(e); } } }
public void testNullAnalyzer() throws IOException { Directory dir = newDirectory(); IndexWriterConfig iwConf = newIndexWriterConfig(null); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf); // add 3 good docs for (int i = 0; i < 3; i++) { Document doc = new Document(); doc.add(new StringField("id", Integer.toString(i), Field.Store.NO)); iw.addDocument(doc); } // add broken doc try { Document broke = new Document(); broke.add(newTextField("test", "broken", Field.Store.NO)); iw.addDocument(broke); fail(); } catch (NullPointerException expected) {} // ensure good docs are still ok IndexReader ir = iw.getReader(); assertEquals(3, ir.numDocs()); ir.close(); iw.close(); dir.close(); }
public void indexParsedDocument(ParsedComment document) { Preconditions.checkNotNull(indexWriter, "The index writer is not initialized"); Document newDoc = new Document(); newDoc.add(new TextField(ParsedComment.Fields.SEARCHABLE_TEXT.name(), document.fullSearchableText(), Field.Store.YES)); newDoc.add(new StringField(ParsedComment.Fields.ID.name(), document.getId(), Field.Store.YES)); newDoc.add(new StringField(ParsedComment.Fields.PRODUCT_NAME.name(), document.getProductName(), Field.Store.YES)); newDoc.add(new StringField(ParsedComment.Fields.COMMENT.name(), document.getComment(), Field.Store.YES)); newDoc.add(new StringField(ParsedComment.Fields.URL.name(), document.getCommentUrl(), Field.Store.YES)); newDoc.add(new StringField(ParsedComment.Fields.SOURCE.name(), document.getSource().name(), Field.Store.YES)); newDoc.add(new StringField(ParsedComment.Fields.LABEL.name(), document.getCommentLabel(), Field.Store.YES)); try { indexWriter.addDocument(newDoc); indexWriter.commit(); } catch (IOException e) { throw new RuntimeException( "Could not write new document to the index directory", e); } }
/** * If paragraph is not null, indexes code in the paragraph, otherwise indexes * the notebook name. * * @param id id of the document, different for Note name and paragraph * @param noteName name of the note * @param p paragraph * @return */ private Document newDocument(String id, String noteName, Paragraph p) { Document doc = new Document(); Field pathField = new StringField(ID_FIELD, id, Field.Store.YES); doc.add(pathField); doc.add(new StringField("title", noteName, Field.Store.YES)); if (null != p) { doc.add(new TextField(SEARCH_FIELD_TEXT, p.getText(), Field.Store.YES)); if (p.getTitle() != null) { doc.add(new TextField(SEARCH_FIELD_TITLE, p.getTitle(), Field.Store.YES)); } Date date = p.getDateStarted() != null ? p.getDateStarted() : p.getDateCreated(); doc.add(new LongField("modified", date.getTime(), Field.Store.NO)); } else { doc.add(new TextField(SEARCH_FIELD_TEXT, noteName, Field.Store.YES)); } return doc; }
private void addLuceneIndexFields(String indexField, List<IndexableField> list, JsonNode node, JsonSchema nodeSchema) { JsonNode.Type type = nodeSchema.getSchemaType(); if (type == JsonNode.Type.ARRAY) { for (int i = 0; i < node.getSize(); i++) { addLuceneIndexFields(indexField, list, node.get(i), nodeSchema.getItemSchema()); } } else if (type == JsonNode.Type.OBJECT) { Iterator<String> properties = node.getProperties(); while (properties.hasNext()) { String propName = properties.next(); // Index property key for object nodes list.add(new StringField(indexField, propName, Field.Store.NO)); } } else if (type == JsonNode.Type.STRING) { list.add(new StringField(indexField, node.asString(), Field.Store.NO)); } else if (type == JsonNode.Type.BOOLEAN) { list.add(new StringField(indexField, node.asString(), Field.Store.NO)); } else if (type == JsonNode.Type.INTEGER) { list.add(new LongField(indexField, node.asLong(), Field.Store.NO)); } else if (type == JsonNode.Type.NUMBER) { list.add(new DoubleField(indexField, node.asDouble(), Field.Store.NO)); } else { throw new UnsupportedOperationException("Node type " + type + " not supported for index field " + indexField); } }
@Test public void simpleTest() throws IOException { LuceneValuesDB valuesDB = new LuceneValuesDB(); URL testPath = LuceneValuesDB.class.getResource("test.csv"); @SuppressWarnings("unchecked") UserDefineDocumentCreator creator = new UserDefineDocumentCreator(new Class[] { IntField.class, StringField.class, FloatField.class, TextField.class }, new String[] { "docNum", "docType", "score", "text" }); valuesDB.open(new File(testPath.getFile()), new CSVParser(), creator); assertEquals(1, valuesDB.search("docNum", 0).length); assertEquals(1, valuesDB.search("docType", "a").length); assertEquals(2, valuesDB.search("score", "0.1").length); assertEquals(1, valuesDB.search("text", "this is a pen").length); }
public void generateIndex(String path, List<AnnotatedEntailmentPair> aps) throws Exception { log.info("Rules extraction started."); IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_47, new WhitespaceAnalyzer(Version.LUCENE_47)); conf.setOpenMode(OpenMode.CREATE); writer = new IndexWriter(FSDirectory.open(new File(path)), conf); Document doc = new Document(); doc.add(new StringField(IndexRulesSource.TERMDOC_FIELD, "true", Store.YES)); for (String u : rulesSource.uses()) doc.add(new StringField(IndexRulesSource.USES_FIELD, u, Store.YES)); writer.addDocument(doc); start(aps.iterator()); writer.waitForMerges(); writer.close(true); log.info(cache.size() + " rules extracted!"); }
public void update(IndexWriter writer) throws IOException { // Add 10 docs: FieldType customType = new FieldType(StringField.TYPE_NOT_STORED); customType.setStoreTermVectors(true); for(int j=0; j<10; j++) { Document d = new Document(); int n = random().nextInt(); d.add(newField("id", Integer.toString(nextID++), customType)); d.add(newTextField("contents", English.intToEnglish(n), Field.Store.NO)); writer.addDocument(d); } // Delete 5 docs: int deleteID = nextID-1; for(int j=0; j<5; j++) { writer.deleteDocuments(new Term("id", ""+deleteID)); deleteID -= 2; } }
protected List<Document> getDocuments(Iterator<SpatialTestData> sampleData) { List<Document> documents = new ArrayList<>(); while (sampleData.hasNext()) { SpatialTestData data = sampleData.next(); Document document = new Document(); document.add(new StringField("id", data.id, Field.Store.YES)); document.add(new StringField("name", data.name, Field.Store.YES)); Shape shape = data.shape; shape = convertShapeFromGetDocuments(shape); if (shape != null) { for (Field f : strategy.createIndexableFields(shape)) { document.add(f); } if (storeShape)//just for diagnostics document.add(new StoredField(strategy.getFieldName(), shape.toString())); } documents.add(document); } return documents; }
public void testSetAllGroups() throws Exception { Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter( random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy())); Document doc = new Document(); doc.add(newField("group", "foo", StringField.TYPE_NOT_STORED)); w.addDocument(doc); IndexSearcher indexSearcher = newSearcher(w.getReader()); w.close(); GroupingSearch gs = new GroupingSearch("group"); gs.setAllGroups(true); TopGroups<?> groups = gs.search(indexSearcher, null, new TermQuery(new Term("group", "foo")), 0, 10); assertEquals(1, groups.totalHitCount); //assertEquals(1, groups.totalGroupCount.intValue()); assertEquals(1, groups.totalGroupedHitCount); assertEquals(1, gs.getAllMatchingGroups().size()); indexSearcher.getIndexReader().close(); dir.close(); }
private Document buildDocument(BytesRef text, Set<BytesRef> contexts, long weight, BytesRef payload) throws IOException { String textString = text.utf8ToString(); Document doc = new Document(); FieldType ft = getTextFieldType(); doc.add(new Field(TEXT_FIELD_NAME, textString, ft)); doc.add(new Field("textgrams", textString, ft)); doc.add(new StringField(EXACT_TEXT_FIELD_NAME, textString, Field.Store.NO)); doc.add(new BinaryDocValuesField(TEXT_FIELD_NAME, text)); doc.add(new NumericDocValuesField("weight", weight)); if (payload != null) { doc.add(new BinaryDocValuesField("payloads", payload)); } if (contexts != null) { for(BytesRef context : contexts) { // TODO: if we had a BinaryTermField we could fix // this "must be valid ut8f" limitation: doc.add(new StringField(CONTEXTS_FIELD_NAME, context.utf8ToString(), Field.Store.NO)); doc.add(new SortedSetDocValuesField(CONTEXTS_FIELD_NAME, context)); } } return doc; }
private void processSSDVFacetFields(Map<String,List<SortedSetDocValuesFacetField>> byField, Document doc) throws IOException { //System.out.println("process SSDV: " + byField); for(Map.Entry<String,List<SortedSetDocValuesFacetField>> ent : byField.entrySet()) { String indexFieldName = ent.getKey(); //System.out.println(" field=" + indexFieldName); for(SortedSetDocValuesFacetField facetField : ent.getValue()) { FacetLabel cp = new FacetLabel(facetField.dim, facetField.label); String fullPath = pathToString(cp.components, cp.length); //System.out.println("add " + fullPath); // For facet counts: doc.add(new SortedSetDocValuesField(indexFieldName, new BytesRef(fullPath))); // For drill-down: doc.add(new StringField(indexFieldName, fullPath, Field.Store.NO)); doc.add(new StringField(indexFieldName, facetField.dim, Field.Store.NO)); } } }
public void testNullDocument() throws IOException { Directory dir = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), dir); // add 3 good docs for (int i = 0; i < 3; i++) { Document doc = new Document(); doc.add(new StringField("id", Integer.toString(i), Field.Store.NO)); iw.addDocument(doc); } // add broken doc try { iw.addDocument(null); fail(); } catch (NullPointerException expected) {} // ensure good docs are still ok IndexReader ir = iw.getReader(); assertEquals(3, ir.numDocs()); ir.close(); iw.close(); dir.close(); }
public void testIndexWriter_LUCENE4656() throws IOException { Directory directory = newDirectory(); IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig(null)); TokenStream ts = new EmptyTokenStream(); assertFalse(ts.hasAttribute(TermToBytesRefAttribute.class)); Document doc = new Document(); doc.add(new StringField("id", "0", Field.Store.YES)); doc.add(new TextField("description", ts)); // this should not fail because we have no TermToBytesRefAttribute writer.addDocument(doc); assertEquals(1, writer.numDocs()); writer.close(); directory.close(); }
@Override public void modifyIndex(final IndexWriter writer, final IndexSearcher searcher) throws ModifyKnowledgeBaseException { for (final HashMap<String, String> hash : this.attributes) { final Document doc = new Document(); for (final Map.Entry<String, String> entry : hash.entrySet()) { String key = entry.getKey(); final String value = entry.getValue(); if (key.contains("_")) { key = key.replaceAll("_[\\d]", ""); } if (key.equalsIgnoreCase(primaryKeyField)) { doc.add(new StringField(key, value, Field.Store.YES)); } else { doc.add(new TextField(key, value, Field.Store.YES)); } } try { writer.addDocument(doc); } catch (final IOException e) { throw new ModifyKnowledgeBaseException( "IndexWriter add document exception", e); } } }
private void buildIndexItem(String deweyId, String type, String[] result, String parentPath) { //Create an Index element IndexElement test = new IndexElement(deweyId, type, result[1]); String path = test.epath(); path = StringUtils.replace(path, parentPath, ""); //Parser doesn't like / so paths are saved as name.name.... String luceneParentPath = parentPath.replaceAll("/", "."); if (!type.equals("doc")) { path = path.replaceFirst("/", ":"); } else { luceneParentPath = ""; } //Parser doesn't like / so paths are saved as name.name.... path = path.replaceAll("/", "."); //Add this element to the array (they will be added in reverse order. String fullItem = luceneParentPath + path + "." + test.type(); results.add(new ComplexItem(new StringField("item", fullItem, Field.Store.YES), test.id())); }