static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { try (InputStream stream = Files.newInputStream(file)) { Document doc = new Document(); Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField); doc.add(new LongPoint("modified", lastModified)); doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { System.out.println("adding " + file); writer.addDocument(doc); } else { System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } }
@Override protected Field[] getFields(IndexReader reader, int docId, String fieldName) throws IOException { // we know its low level reader, and matching docId, since that's how we call the highlighter with SourceLookup sourceLookup = searchContext.lookup().source(); sourceLookup.setSegmentAndDocument((LeafReaderContext) reader.getContext(), docId); List<Object> values = sourceLookup.extractRawValues(mapper.fieldType().name()); if (values.isEmpty()) { return EMPTY_FIELDS; } Field[] fields = new Field[values.size()]; for (int i = 0; i < values.size(); i++) { fields[i] = new Field(mapper.fieldType().name(), values.get(i).toString(), TextField.TYPE_NOT_STORED); } return fields; }
private IndexReader indexOneDoc(Directory dir, String field, String value, Analyzer analyzer) throws IOException { IndexWriterConfig iwc = newIndexWriterConfig(analyzer); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); FieldType ft = new FieldType(TextField.TYPE_STORED); ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); Field textField = new Field(field, "", ft); Document doc = new Document(); doc.add(textField); textField.setStringValue(value); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); return ir; }
private Engine.Index getIndex(final String id) { final String type = "test"; final ParseContext.Document document = new ParseContext.Document(); document.add(new TextField("test", "test", Field.Store.YES)); final Field uidField = new Field("_uid", Uid.createUid(type, id), UidFieldMapper.Defaults.FIELD_TYPE); final Field versionField = new NumericDocValuesField("_version", Versions.MATCH_ANY); final SeqNoFieldMapper.SequenceID seqID = SeqNoFieldMapper.SequenceID.emptySeqID(); document.add(uidField); document.add(versionField); document.add(seqID.seqNo); document.add(seqID.seqNoDocValue); document.add(seqID.primaryTerm); final BytesReference source = new BytesArray(new byte[] { 1 }); final ParsedDocument doc = new ParsedDocument(versionField, seqID, id, type, null, Arrays.asList(document), source, XContentType.JSON, null); return new Engine.Index(new Term("_uid", doc.uid()), doc); }
/** * Test the WordScorer emitted by the smoothing model */ public void testBuildWordScorer() throws IOException { SmoothingModel testModel = createTestModel(); Map<String, Analyzer> mapping = new HashMap<>(); mapping.put("field", new WhitespaceAnalyzer()); PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(), mapping); IndexWriter writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(wrapper)); Document doc = new Document(); doc.add(new Field("field", "someText", TextField.TYPE_NOT_STORED)); writer.addDocument(doc); DirectoryReader ir = DirectoryReader.open(writer); WordScorer wordScorer = testModel.buildWordScorerFactory().newScorer(ir, MultiFields.getTerms(ir, "field"), "field", 0.9d, BytesRefs.toBytesRef(" ")); assertWordScorer(wordScorer, testModel); }
public void testVectorHighlighter() throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); Document document = new Document(); document.add(new TextField("_id", "1", Field.Store.YES)); FieldType vectorsType = new FieldType(TextField.TYPE_STORED); vectorsType.setStoreTermVectors(true); vectorsType.setStoreTermVectorPositions(true); vectorsType.setStoreTermVectorOffsets(true); document.add(new Field("content", "the big bad dog", vectorsType)); indexWriter.addDocument(document); IndexReader reader = DirectoryReader.open(indexWriter); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1); assertThat(topDocs.totalHits, equalTo(1)); FastVectorHighlighter highlighter = new FastVectorHighlighter(); String fragment = highlighter.getBestFragment(highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))), reader, topDocs.scoreDocs[0].doc, "content", 30); assertThat(fragment, notNullValue()); assertThat(fragment, equalTo("the big <b>bad</b> dog")); }
public void testVectorHighlighterNoStore() throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); Document document = new Document(); document.add(new TextField("_id", "1", Field.Store.YES)); FieldType vectorsType = new FieldType(TextField.TYPE_NOT_STORED); vectorsType.setStoreTermVectors(true); vectorsType.setStoreTermVectorPositions(true); vectorsType.setStoreTermVectorOffsets(true); document.add(new Field("content", "the big bad dog", vectorsType)); indexWriter.addDocument(document); IndexReader reader = DirectoryReader.open(indexWriter); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1); assertThat(topDocs.totalHits, equalTo(1)); FastVectorHighlighter highlighter = new FastVectorHighlighter(); String fragment = highlighter.getBestFragment(highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))), reader, topDocs.scoreDocs[0].doc, "content", 30); assertThat(fragment, nullValue()); }
public void testVectorHighlighterNoTermVector() throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); Document document = new Document(); document.add(new TextField("_id", "1", Field.Store.YES)); document.add(new TextField("content", "the big bad dog", Field.Store.YES)); indexWriter.addDocument(document); IndexReader reader = DirectoryReader.open(indexWriter); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1); assertThat(topDocs.totalHits, equalTo(1)); FastVectorHighlighter highlighter = new FastVectorHighlighter(); String fragment = highlighter.getBestFragment(highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))), reader, topDocs.scoreDocs[0].doc, "content", 30); assertThat(fragment, nullValue()); }
public void testSortValues() throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); for (int i = 0; i < 10; i++) { Document document = new Document(); String text = new String(new char[]{(char) (97 + i), (char) (97 + i)}); document.add(new TextField("str", text, Field.Store.YES)); document.add(new SortedDocValuesField("str", new BytesRef(text))); indexWriter.addDocument(document); } IndexReader reader = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(indexWriter)); IndexSearcher searcher = new IndexSearcher(reader); TopFieldDocs docs = searcher.search(new MatchAllDocsQuery(), 10, new Sort(new SortField("str", SortField.Type.STRING))); for (int i = 0; i < 10; i++) { FieldDoc fieldDoc = (FieldDoc) docs.scoreDocs[i]; assertThat((BytesRef) fieldDoc.fields[0], equalTo(new BytesRef(new String(new char[]{(char) (97 + i), (char) (97 + i)})))); } }
public void testSimpleNumericOps() throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); Document document = new Document(); document.add(new TextField("_id", "1", Field.Store.YES)); document.add(new LegacyIntField("test", 2, LegacyIntField.TYPE_STORED)); indexWriter.addDocument(document); IndexReader reader = DirectoryReader.open(indexWriter); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1); Document doc = searcher.doc(topDocs.scoreDocs[0].doc); IndexableField f = doc.getField("test"); assertThat(f.stringValue(), equalTo("2")); BytesRefBuilder bytes = new BytesRefBuilder(); LegacyNumericUtils.intToPrefixCoded(2, 0, bytes); topDocs = searcher.search(new TermQuery(new Term("test", bytes.get())), 1); doc = searcher.doc(topDocs.scoreDocs[0].doc); f = doc.getField("test"); assertThat(f.stringValue(), equalTo("2")); indexWriter.close(); }
public void testNRTSearchOnClosedWriter() throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); DirectoryReader reader = DirectoryReader.open(indexWriter); for (int i = 0; i < 100; i++) { Document document = new Document(); TextField field = new TextField("_id", Integer.toString(i), Field.Store.YES); field.setBoost(i); document.add(field); indexWriter.addDocument(document); } reader = refreshReader(reader); indexWriter.close(); TermsEnum termDocs = SlowCompositeReaderWrapper.wrap(reader).terms("_id").iterator(); termDocs.next(); }
@Override public void createIndex(NitriteId id, String field, String text) { try { Document document = new Document(); String jsonId = keySerializer.writeValueAsString(id); Field contentField = new TextField(field, text, Field.Store.NO); Field idField = new StringField(CONTENT_ID, jsonId, Field.Store.YES); document.add(idField); document.add(contentField); synchronized (this) { indexWriter.addDocument(document); commit(); } } catch (IOException ioe) { throw new IndexingException(errorMessage( "could not write full-text index data for " + text, 0), ioe); } catch (VirtualMachineError vme) { handleVirtualMachineError(vme); } }
@Override public void updateIndex(NitriteId id, String field, String text) { try { Document document = new Document(); String jsonId = keySerializer.writeValueAsString(id); Field contentField = new TextField(field, text, Field.Store.NO); Field idField = new StringField(CONTENT_ID, jsonId, Field.Store.YES); document.add(idField); document.add(contentField); synchronized (this) { indexWriter.updateDocument(new Term(CONTENT_ID, jsonId), document); commit(); } } catch (IOException ioe) { throw new IndexingException(errorMessage( "could not update full-text index for " + text, 0), ioe); } catch (VirtualMachineError vme) { handleVirtualMachineError(vme); } }
@Override protected Field[] getFields(IndexReader reader, int docId, String fieldName) throws IOException { // we know its low level reader, and matching docId, since that's how we call the highlighter with SourceLookup sourceLookup = searchContext.lookup().source(); sourceLookup.setSegmentAndDocument((LeafReaderContext) reader.getContext(), docId); List<Object> values = sourceLookup.extractRawValues(hitContext.getSourcePath(mapper.fieldType().names().fullName())); if (values.isEmpty()) { return EMPTY_FIELDS; } Field[] fields = new Field[values.size()]; for (int i = 0; i < values.size(); i++) { fields[i] = new Field(mapper.fieldType().names().indexName(), values.get(i).toString(), TextField.TYPE_NOT_STORED); } return fields; }
private Document getDocument(File file) throws IOException { Document document = new Document(); // index file contents Field contentField = new Field(LuceneConstants.CONTENTS, new FileReader(file), TextField.TYPE_NOT_STORED); // index file name Field fileNameField = new Field(LuceneConstants.FILE_NAME, file.getName(), TextField.TYPE_STORED); // index file path Field filePathField = new Field(LuceneConstants.FILE_PATH, file.getCanonicalPath(), TextField.TYPE_STORED); document.add(contentField); document.add(fileNameField); document.add(filePathField); return document; }
private Document noteToDocument(Note note, String noteHtmlContents) { Document d = new Document (); String id = note.getId (); Project project = note.getProject(); String projectId = project.getID(); String projectName = project.getTitle(); String title = note.getTitle (); String date = note.getDate() != null ? new SimpleDateFormat ("yyyy-MM-dd").format(note.getDate().getDate()) : null; d.add (new StringField ("id", id, Field.Store.YES)); d.add (new StringField ("project_id", projectId, Field.Store.YES)); d.add (new StoredField ("project_name", projectName)); d.add (new TextField ("title", title, Field.Store.YES)); d.add (new TextField ("title_cs", title, Field.Store.YES)); d.add (new TextField ("date", date != null ? date : "", Field.Store.YES)); d.add (new TextField ("body", noteHtmlContents, Field.Store.YES)); return d; }
private Document eventToDocument(Event newEvent) { Document d = new Document (); String eventId = newEvent.getId (); String eventText = newEvent.getText(); String eventStartDate = newEvent.getStartDate() != null ? new SimpleDateFormat ("yyyy-MM-dd").format(newEvent.getStartDate().getDate()) : null; String eventStartTime = newEvent.getTimeString(); if (eventStartDate != null) eventStartTime = eventStartDate + " @ " + eventStartTime; d.add (new StringField ("id", eventId, Field.Store.YES)); d.add (new TextField ("text", eventText, Field.Store.YES)); d.add (new TextField ("text_cs", eventText, Field.Store.YES)); d.add (new StoredField ("original_start_date", eventStartTime != null ? eventStartTime : "")); return d; }
public void addIndex(UUser user) throws Exception { IndexWriter writer = getWriter(); Document doc = new Document(); /* * yes是会将数据存进索引,如果查询结果中需要将记录显示出来就要存进去,如果查询结果 * 只是显示标题之类的就可以不用存,而且内容过长不建议存进去 * 使用TextField类是可以用于查询的。 */ try { doc.add(new StringField("userid", String.valueOf(user.getId()), Field.Store.YES)); doc.add(new TextField("username", user.getUsername(), Field.Store.YES)); writer.addDocument(doc); } catch (Exception e) { e.printStackTrace(); throw e; } finally { writer.close(); } }
public InMemoryIndex(Map<String,String> id2Text){ Analyzer analyzer = new EnglishAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); try { IndexWriter writer = new IndexWriter(directory, iwc); for (String id:id2Text.keySet()) { Document doc=new Document(); doc.add(new StringField("id", id, Field.Store.YES)); doc.add(new TextField("content", id2Text.get(id), Field.Store.YES)); writer.addDocument(doc); } writer.close(); } catch (IOException e) { e.printStackTrace(); } }
private static List<String> search(List<String> contents, String query, int n) throws IOException, ParseException { List<String> r=new ArrayList<>(); Directory dir=new RAMDirectory(); IndexWriter indexWriter=new IndexWriter(dir, new IndexWriterConfig(new EnglishAnalyzer())); for (String method:contents){ Document document=new Document(); document.add(new TextField("content",method, Field.Store.YES)); indexWriter.addDocument(document); } indexWriter.close(); QueryParser qp = new QueryParser("content", new EnglishAnalyzer()); IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(dir)); TopDocs topDocs = indexSearcher.search(qp.parse(query), n); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { r.add(indexSearcher.doc(scoreDoc.doc).get("content")); } return r; }
@Override public Document transform(final Example input) throws TransformException { final Document doc = new Document(); doc.add(new Field(ExampleField.ID.getName(), input.getId(), StringField.TYPE_STORED)); doc.add(new SortedDocValuesField(ExampleField.ID.getName(), new BytesRef(input.getId()))); doc.add(new Field(ExampleField.TITLE.getName(), input.getTitle(), TextField.TYPE_STORED)); doc.add(new Field(ExampleField.BODY.getName(), input.getBody(), TextField.TYPE_STORED)); doc.add(new Field(ExampleField.COLOR.getName(), input.getColor(), StringField.TYPE_STORED)); doc.add(new SortedSetDocValuesFacetField(ExampleField.COLOR.getName(), input.getColor())); final Date createDate = input.getCreateDate(); doc.add(new NumericDocValuesField(ExampleField.CREATE_DATE.getName(), createDate.getTime())); doc.add(new StoredField(ExampleField.CREATE_DATE.getName(), createDate.getTime())); return doc; }
public static void createIndexQ(List<CQAResult> QASetList, Directory dir) { System.out.println("Creating Questions Index"); IndexWriterConfig iwc = new IndexWriterConfig(ANALYZER.getVersion(), ANALYZER); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); try { IndexWriter writer = new IndexWriter(dir, iwc); int id = 0; //XXX seq_id for (CQAResult qaSet : QASetList) { Document doc = new Document(); if (qaSet.subject == null) { id++; continue; } doc.add(new IntField(QID, id++, Field.Store.YES)); doc.add(new TextField(BEST_ANSWER_FIELD, qaSet.subject, Field.Store.NO)); doc.add(new TextField(Q_DESCRIPTION, qaSet.content, Field.Store.NO)); writer.addDocument(doc); } writer.close(); } catch (IOException e) { e.printStackTrace(); } }
private void initFields() { docnumField = new StringField(Lucene4IRConstants.FIELD_DOCNUM, "", Field.Store.YES); if(indexPositions){ titleField = new TermVectorEnabledTextField(Lucene4IRConstants.FIELD_TITLE, "", Field.Store.YES); textField = new TermVectorEnabledTextField(Lucene4IRConstants.FIELD_CONTENT, "", Field.Store.YES); allField = new TermVectorEnabledTextField(Lucene4IRConstants.FIELD_ALL, "", Field.Store.YES); urlField = new TermVectorEnabledTextField(Lucene4IRConstants.FIELD_URL, "", Field.Store.YES); dochdrField = new TermVectorEnabledTextField(Lucene4IRConstants.FIELD_DOCHDR, "", Field.Store.YES); } else { titleField = new TextField(Lucene4IRConstants.FIELD_TITLE, "", Field.Store.YES); textField = new TextField(Lucene4IRConstants.FIELD_CONTENT, "", Field.Store.YES); allField = new TextField(Lucene4IRConstants.FIELD_ALL, "", Field.Store.YES); urlField = new TextField(Lucene4IRConstants.FIELD_URL, "", Field.Store.YES); dochdrField = new TextField(Lucene4IRConstants.FIELD_DOCHDR, "", Field.Store.YES); } }
private void initFields() { docnumField = new StringField(Lucene4IRConstants.FIELD_DOCNUM, "", Field.Store.YES); pubdateField = new StringField(Lucene4IRConstants.FIELD_PUBDATE, "", Field.Store.YES); if(indexPositions){ titleField = new TermVectorEnabledTextField(Lucene4IRConstants.FIELD_TITLE, "", Field.Store.YES); textField = new TermVectorEnabledTextField(Lucene4IRConstants.FIELD_CONTENT, "", Field.Store.YES); allField = new TermVectorEnabledTextField(Lucene4IRConstants.FIELD_ALL, "", Field.Store.YES); sourceField = new TermVectorEnabledTextField(Lucene4IRConstants.FIELD_SOURCE, "", Field.Store.YES); } else { titleField = new TextField(Lucene4IRConstants.FIELD_TITLE, "", Field.Store.YES); textField = new TextField(Lucene4IRConstants.FIELD_CONTENT, "", Field.Store.YES); allField = new TextField(Lucene4IRConstants.FIELD_ALL, "", Field.Store.YES); sourceField = new TextField(Lucene4IRConstants.FIELD_SOURCE, "", Field.Store.YES); } }
private Document buildDucument(ShopDTO shopDTO){ Document document = new Document(); document.add(new IntField(ShopDTO.ShopParam.SHOP_ID, shopDTO.getShopid(), Field.Store.YES)); document.add(new TextField(ShopDTO.ShopParam.SHOP_NAME, shopDTO.getShopname(), Field.Store.YES)); //document.add(new StringField(ShopDTO.ShopParam.CITY_ID, shopDTO.getCityid()+"", Field.Store.YES)); document.add(new IntField(ShopDTO.ShopParam.CITY_ID, shopDTO.getCityid(), Field.Store.YES)); if (shopDTO.getTaglist()!=null && shopDTO.getTaglist().size()>0) { for (int tagid: shopDTO.getTaglist()) { document.add(new IntField(ShopDTO.ShopParam.TAG_ID, tagid, Field.Store.YES)); } } document.add(new IntField(ShopDTO.ShopParam.SCORE, shopDTO.getScore(), LuceneUtil.INT_FIELD_TYPE_STORED_SORTED)); document.add(new IntField(ShopDTO.ShopParam.HOT_SCORE, shopDTO.getHotscore(), LuceneUtil.INT_FIELD_TYPE_STORED_SORTED)); return document; }
public static void createIndex(List<Map<String, String>> list){ // deleteAll deleteAll(); // addDocument for (Map<String, String> searchDto: list) { Document doc = new Document(); for (Map.Entry<String, String> item: searchDto.entrySet()) { if (ExcelUtil.KEYWORDS.equals(item.getKey())) { doc.add(new TextField(item.getKey(), item.getValue(), Field.Store.YES)); } else { doc.add(new StringField(item.getKey(), item.getValue(), Field.Store.YES)); } } addDocument(doc); } }
List<Document> buildFragmentDocument(String title, String content, int fragmentSize){ List<Document> documents = new ArrayList<>(); int fragmentNum = content.length()/fragmentSize; for(int i = 0; i < fragmentNum; i++){ String fragment; if(i == fragmentNum - 1) fragment = content.substring(i*fragmentSize); else fragment = content.substring(i*fragmentSize, (1+i)*fragmentSize); TextField contentField = new TextField(Const.FIELD_CONTENT, separateWordsWithSpace.apply(fragment), Field.Store.YES); StringField titleField = new StringField(Const.FIELD_TITLE, title, Field.Store.YES); Document document = new Document(); document.add(titleField); document.add(contentField); documents.add(document); } return documents; }
/** * If paragraph is not null, indexes code in the paragraph, otherwise indexes * the notebook name. * * @param id id of the document, different for Note name and paragraph * @param noteName name of the note * @param p paragraph * @return */ private Document newDocument(String id, String noteName, Paragraph p) { Document doc = new Document(); Field pathField = new StringField(ID_FIELD, id, Field.Store.YES); doc.add(pathField); doc.add(new StringField("title", noteName, Field.Store.YES)); if (null != p) { doc.add(new TextField(SEARCH_FIELD, p.getText(), Field.Store.YES)); Date date = p.getDateStarted() != null ? p.getDateStarted() : p.getDateCreated(); doc.add(new LongField("modified", date.getTime(), Field.Store.NO)); } else { doc.add(new TextField(SEARCH_FIELD, noteName, Field.Store.YES)); } return doc; }
@Override public void setUp() throws Exception { super.setUp(); dir = new RAMDirectory(); appAnalyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, appAnalyzer)); int numDocs = 200; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); String variedFieldValue = variedFieldValues[i % variedFieldValues.length]; String repetitiveFieldValue = repetitiveFieldValues[i % repetitiveFieldValues.length]; doc.add(new TextField("variedField", variedFieldValue, Field.Store.YES)); doc.add(new TextField("repetitiveField", repetitiveFieldValue, Field.Store.YES)); writer.addDocument(doc); } writer.close(); reader = DirectoryReader.open(dir); }
private Document getDocFromPage(WikiPage page) { logger.debug("Converting page to document. Page:\n {}",page.toString()); Document doc = new Document(); String title = page.getTitle(); String contributor = page.getContributor(); String contents = page.getText(); if (title != null) { doc.add(new StringField("title", title, Field.Store.YES)); if (contributor != null) doc.add(new StringField("contributor", contributor, Field.Store.YES)); if (contents != null) doc.add(new TextField("contents", contents, Field.Store.NO)); } return doc; }
public void buildIndex(Directory dir) throws IOException { Random random = random(); MockAnalyzer analyzer = new MockAnalyzer(random()); analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH)); IndexWriterConfig config = newIndexWriterConfig(analyzer); Similarity provider = new MySimProvider(); config.setSimilarity(provider); RandomIndexWriter writer = new RandomIndexWriter(random, dir, config); final LineFileDocs docs = new LineFileDocs(random, defaultCodecSupportsDocValues()); int num = atLeast(100); for (int i = 0; i < num; i++) { Document doc = docs.nextDoc(); int boost = random().nextInt(255); Field f = new TextField(byteTestField, "" + boost, Field.Store.YES); f.setBoost(boost); doc.add(f); writer.addDocument(doc); doc.removeField(byteTestField); if (rarely()) { writer.commit(); } } writer.commit(); writer.close(); docs.close(); }
public static void main(String args[]) throws Exception { initLoggers(Level.INFO); RedisDirectory DIR = new RedisDirectory(REDIS_HOST, REDIS_PORT, REDIS_PASSWORD); DIR.init(); long t1 = System.currentTimeMillis(); try { Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); IndexWriter iw = new IndexWriter(DIR, iwc); Document doc = new Document(); doc.add(new StringField("id", "thanhnb", Field.Store.YES)); doc.add(new TextField("name", "Nguyen Ba Thanh", Field.Store.NO)); iw.updateDocument(new Term("id", "thanhnb"), doc); iw.commit(); iw.close(); } finally { DIR.destroy(); } long t2 = System.currentTimeMillis(); System.out.println("Finished in " + (t2 - t1) / 1000.0 + " sec"); }
public void indexParsedDocument(ParsedComment document) { Preconditions.checkNotNull(indexWriter, "The index writer is not initialized"); Document newDoc = new Document(); newDoc.add(new TextField(ParsedComment.Fields.SEARCHABLE_TEXT.name(), document.fullSearchableText(), Field.Store.YES)); newDoc.add(new StringField(ParsedComment.Fields.ID.name(), document.getId(), Field.Store.YES)); newDoc.add(new StringField(ParsedComment.Fields.PRODUCT_NAME.name(), document.getProductName(), Field.Store.YES)); newDoc.add(new StringField(ParsedComment.Fields.COMMENT.name(), document.getComment(), Field.Store.YES)); newDoc.add(new StringField(ParsedComment.Fields.URL.name(), document.getCommentUrl(), Field.Store.YES)); newDoc.add(new StringField(ParsedComment.Fields.SOURCE.name(), document.getSource().name(), Field.Store.YES)); newDoc.add(new StringField(ParsedComment.Fields.LABEL.name(), document.getCommentLabel(), Field.Store.YES)); try { indexWriter.addDocument(newDoc); indexWriter.commit(); } catch (IOException e) { throw new RuntimeException( "Could not write new document to the index directory", e); } }
public void testBogusTermVectors() throws IOException { Directory dir = newDirectory(); IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null)); Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.setStoreTermVectors(true); ft.setStoreTermVectorOffsets(true); Field field = new Field("foo", "", ft); field.setTokenStream(new CannedTokenStream( new Token("bar", 5, 10), new Token("bar", 1, 4) )); doc.add(field); iw.addDocument(doc); iw.close(); dir.close(); // checkindex }
public void testInfiniteFreq1() throws Exception { String document = "drug druggy drug drug drug"; Directory dir = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), dir); Document doc = new Document(); doc.add(newField("lyrics", document, new FieldType(TextField.TYPE_NOT_STORED))); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); IndexSearcher is = newSearcher(ir); PhraseQuery pq = new PhraseQuery(); // "drug the drug"~1 pq.add(new Term("lyrics", "drug"), 1); pq.add(new Term("lyrics", "drug"), 3); pq.setSlop(1); assertSaneScoring(pq, is); ir.close(); dir.close(); }
protected void make1dmfIndexNA( String... values ) throws Exception { IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig( TEST_VERSION_CURRENT, analyzerK).setOpenMode(OpenMode.CREATE)); Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_STORED); customType.setStoreTermVectors(true); customType.setStoreTermVectorOffsets(true); customType.setStoreTermVectorPositions(true); for( String value: values ) { doc.add( new Field( F, value, customType)); //doc.add( new Field( F, value, Store.YES, Index.NOT_ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) ); } writer.addDocument( doc ); writer.close(); if (reader != null) reader.close(); reader = DirectoryReader.open(dir); }
public void testChangeIndexOptions() throws Exception { Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); FieldType docsAndFreqs = new FieldType(TextField.TYPE_NOT_STORED); docsAndFreqs.setIndexOptions(IndexOptions.DOCS_AND_FREQS); FieldType docsOnly = new FieldType(TextField.TYPE_NOT_STORED); docsOnly.setIndexOptions(IndexOptions.DOCS_ONLY); Document doc = new Document(); doc.add(new Field("field", "a b c", docsAndFreqs)); w.addDocument(doc); w.addDocument(doc); doc = new Document(); doc.add(new Field("field", "a b c", docsOnly)); w.addDocument(doc); w.close(); dir.close(); }
/** make sure we can retrieve when norms are disabled */ public void testNoNorms() throws Exception { Directory dir = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), dir); Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.setOmitNorms(true); ft.freeze(); doc.add(newField("foo", "bar", ft)); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); IndexSearcher is = newSearcher(ir); for (Similarity sim : sims) { is.setSimilarity(sim); BooleanQuery query = new BooleanQuery(true); query.add(new TermQuery(new Term("foo", "bar")), BooleanClause.Occur.SHOULD); assertEquals(1, is.search(query, 10).totalHits); } ir.close(); dir.close(); }
private void checkTokens(Token[] tokens) throws IOException { Directory dir = newDirectory(); RandomIndexWriter riw = new RandomIndexWriter(random(), dir, iwc); boolean success = false; try { FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); // store some term vectors for the checkindex cross-check ft.setStoreTermVectors(true); ft.setStoreTermVectorPositions(true); ft.setStoreTermVectorOffsets(true); Document doc = new Document(); doc.add(new Field("body", new CannedTokenStream(tokens), ft)); riw.addDocument(doc); success = true; } finally { if (success) { IOUtils.close(riw, dir); } else { IOUtils.closeWhileHandlingException(riw, dir); } } }
public SingleFieldTestDb(Random random, String[] documents, String fName) { try { db = new MockDirectoryWrapper(random, new RAMDirectory()); docs = documents; fieldName = fName; IndexWriter writer = new IndexWriter(db, new IndexWriterConfig( Version.LUCENE_CURRENT, new MockAnalyzer(random))); for (int j = 0; j < docs.length; j++) { Document d = new Document(); d.add(new TextField(fieldName, docs[j], Field.Store.NO)); writer.addDocument(d); } writer.close(); } catch (java.io.IOException ioe) { throw new Error(ioe); } }