Java 类org.apache.lucene.document.TextField 实例源码
项目:Java-Data-Science-Cookbook
文件:IndexFiles.java
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
try (InputStream stream = Files.newInputStream(file)) {
Document doc = new Document();
Field pathField = new StringField("path", file.toString(), Field.Store.YES);
doc.add(pathField);
doc.add(new LongPoint("modified", lastModified));
doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));
if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
System.out.println("adding " + file);
writer.addDocument(doc);
} else {
System.out.println("updating " + file);
writer.updateDocument(new Term("path", file.toString()), doc);
}
}
}
项目:elasticsearch_my
文件:SourceSimpleFragmentsBuilder.java
@Override
protected Field[] getFields(IndexReader reader, int docId, String fieldName) throws IOException {
// we know its low level reader, and matching docId, since that's how we call the highlighter with
SourceLookup sourceLookup = searchContext.lookup().source();
sourceLookup.setSegmentAndDocument((LeafReaderContext) reader.getContext(), docId);
List<Object> values = sourceLookup.extractRawValues(mapper.fieldType().name());
if (values.isEmpty()) {
return EMPTY_FIELDS;
}
Field[] fields = new Field[values.size()];
for (int i = 0; i < values.size(); i++) {
fields[i] = new Field(mapper.fieldType().name(), values.get(i).toString(), TextField.TYPE_NOT_STORED);
}
return fields;
}
项目:elasticsearch_my
文件:CustomUnifiedHighlighterTests.java
private IndexReader indexOneDoc(Directory dir, String field, String value, Analyzer analyzer) throws IOException {
IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
FieldType ft = new FieldType(TextField.TYPE_STORED);
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Field textField = new Field(field, "", ft);
Document doc = new Document();
doc.add(textField);
textField.setStringValue(value);
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
return ir;
}
项目:elasticsearch_my
文件:RecoverySourceHandlerTests.java
private Engine.Index getIndex(final String id) {
final String type = "test";
final ParseContext.Document document = new ParseContext.Document();
document.add(new TextField("test", "test", Field.Store.YES));
final Field uidField = new Field("_uid", Uid.createUid(type, id), UidFieldMapper.Defaults.FIELD_TYPE);
final Field versionField = new NumericDocValuesField("_version", Versions.MATCH_ANY);
final SeqNoFieldMapper.SequenceID seqID = SeqNoFieldMapper.SequenceID.emptySeqID();
document.add(uidField);
document.add(versionField);
document.add(seqID.seqNo);
document.add(seqID.seqNoDocValue);
document.add(seqID.primaryTerm);
final BytesReference source = new BytesArray(new byte[] { 1 });
final ParsedDocument doc =
new ParsedDocument(versionField, seqID, id, type, null, Arrays.asList(document), source, XContentType.JSON, null);
return new Engine.Index(new Term("_uid", doc.uid()), doc);
}
项目:elasticsearch_my
文件:SmoothingModelTestCase.java
/**
* Test the WordScorer emitted by the smoothing model
*/
public void testBuildWordScorer() throws IOException {
SmoothingModel testModel = createTestModel();
Map<String, Analyzer> mapping = new HashMap<>();
mapping.put("field", new WhitespaceAnalyzer());
PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(), mapping);
IndexWriter writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(wrapper));
Document doc = new Document();
doc.add(new Field("field", "someText", TextField.TYPE_NOT_STORED));
writer.addDocument(doc);
DirectoryReader ir = DirectoryReader.open(writer);
WordScorer wordScorer = testModel.buildWordScorerFactory().newScorer(ir, MultiFields.getTerms(ir, "field"), "field", 0.9d,
BytesRefs.toBytesRef(" "));
assertWordScorer(wordScorer, testModel);
}
项目:elasticsearch_my
文件:VectorHighlighterTests.java
public void testVectorHighlighter() throws Exception {
Directory dir = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
Document document = new Document();
document.add(new TextField("_id", "1", Field.Store.YES));
FieldType vectorsType = new FieldType(TextField.TYPE_STORED);
vectorsType.setStoreTermVectors(true);
vectorsType.setStoreTermVectorPositions(true);
vectorsType.setStoreTermVectorOffsets(true);
document.add(new Field("content", "the big bad dog", vectorsType));
indexWriter.addDocument(document);
IndexReader reader = DirectoryReader.open(indexWriter);
IndexSearcher searcher = new IndexSearcher(reader);
TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1);
assertThat(topDocs.totalHits, equalTo(1));
FastVectorHighlighter highlighter = new FastVectorHighlighter();
String fragment = highlighter.getBestFragment(highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))),
reader, topDocs.scoreDocs[0].doc, "content", 30);
assertThat(fragment, notNullValue());
assertThat(fragment, equalTo("the big <b>bad</b> dog"));
}
项目:elasticsearch_my
文件:VectorHighlighterTests.java
public void testVectorHighlighterNoStore() throws Exception {
Directory dir = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
Document document = new Document();
document.add(new TextField("_id", "1", Field.Store.YES));
FieldType vectorsType = new FieldType(TextField.TYPE_NOT_STORED);
vectorsType.setStoreTermVectors(true);
vectorsType.setStoreTermVectorPositions(true);
vectorsType.setStoreTermVectorOffsets(true);
document.add(new Field("content", "the big bad dog", vectorsType));
indexWriter.addDocument(document);
IndexReader reader = DirectoryReader.open(indexWriter);
IndexSearcher searcher = new IndexSearcher(reader);
TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1);
assertThat(topDocs.totalHits, equalTo(1));
FastVectorHighlighter highlighter = new FastVectorHighlighter();
String fragment = highlighter.getBestFragment(highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))),
reader, topDocs.scoreDocs[0].doc, "content", 30);
assertThat(fragment, nullValue());
}
项目:elasticsearch_my
文件:VectorHighlighterTests.java
public void testVectorHighlighterNoTermVector() throws Exception {
Directory dir = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
Document document = new Document();
document.add(new TextField("_id", "1", Field.Store.YES));
document.add(new TextField("content", "the big bad dog", Field.Store.YES));
indexWriter.addDocument(document);
IndexReader reader = DirectoryReader.open(indexWriter);
IndexSearcher searcher = new IndexSearcher(reader);
TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1);
assertThat(topDocs.totalHits, equalTo(1));
FastVectorHighlighter highlighter = new FastVectorHighlighter();
String fragment = highlighter.getBestFragment(highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))),
reader, topDocs.scoreDocs[0].doc, "content", 30);
assertThat(fragment, nullValue());
}
项目:elasticsearch_my
文件:SimpleLuceneTests.java
public void testSortValues() throws Exception {
Directory dir = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
for (int i = 0; i < 10; i++) {
Document document = new Document();
String text = new String(new char[]{(char) (97 + i), (char) (97 + i)});
document.add(new TextField("str", text, Field.Store.YES));
document.add(new SortedDocValuesField("str", new BytesRef(text)));
indexWriter.addDocument(document);
}
IndexReader reader = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(indexWriter));
IndexSearcher searcher = new IndexSearcher(reader);
TopFieldDocs docs = searcher.search(new MatchAllDocsQuery(), 10, new Sort(new SortField("str", SortField.Type.STRING)));
for (int i = 0; i < 10; i++) {
FieldDoc fieldDoc = (FieldDoc) docs.scoreDocs[i];
assertThat((BytesRef) fieldDoc.fields[0], equalTo(new BytesRef(new String(new char[]{(char) (97 + i), (char) (97 + i)}))));
}
}
项目:elasticsearch_my
文件:SimpleLuceneTests.java
public void testSimpleNumericOps() throws Exception {
Directory dir = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
Document document = new Document();
document.add(new TextField("_id", "1", Field.Store.YES));
document.add(new LegacyIntField("test", 2, LegacyIntField.TYPE_STORED));
indexWriter.addDocument(document);
IndexReader reader = DirectoryReader.open(indexWriter);
IndexSearcher searcher = new IndexSearcher(reader);
TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1);
Document doc = searcher.doc(topDocs.scoreDocs[0].doc);
IndexableField f = doc.getField("test");
assertThat(f.stringValue(), equalTo("2"));
BytesRefBuilder bytes = new BytesRefBuilder();
LegacyNumericUtils.intToPrefixCoded(2, 0, bytes);
topDocs = searcher.search(new TermQuery(new Term("test", bytes.get())), 1);
doc = searcher.doc(topDocs.scoreDocs[0].doc);
f = doc.getField("test");
assertThat(f.stringValue(), equalTo("2"));
indexWriter.close();
}
项目:elasticsearch_my
文件:SimpleLuceneTests.java
public void testNRTSearchOnClosedWriter() throws Exception {
Directory dir = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
DirectoryReader reader = DirectoryReader.open(indexWriter);
for (int i = 0; i < 100; i++) {
Document document = new Document();
TextField field = new TextField("_id", Integer.toString(i), Field.Store.YES);
field.setBoost(i);
document.add(field);
indexWriter.addDocument(document);
}
reader = refreshReader(reader);
indexWriter.close();
TermsEnum termDocs = SlowCompositeReaderWrapper.wrap(reader).terms("_id").iterator();
termDocs.next();
}
项目:nitrite-database
文件:LuceneService.java
@Override
public void createIndex(NitriteId id, String field, String text) {
try {
Document document = new Document();
String jsonId = keySerializer.writeValueAsString(id);
Field contentField = new TextField(field, text, Field.Store.NO);
Field idField = new StringField(CONTENT_ID, jsonId, Field.Store.YES);
document.add(idField);
document.add(contentField);
synchronized (this) {
indexWriter.addDocument(document);
commit();
}
} catch (IOException ioe) {
throw new IndexingException(errorMessage(
"could not write full-text index data for " + text, 0), ioe);
} catch (VirtualMachineError vme) {
handleVirtualMachineError(vme);
}
}
项目:nitrite-database
文件:LuceneService.java
@Override
public void updateIndex(NitriteId id, String field, String text) {
try {
Document document = new Document();
String jsonId = keySerializer.writeValueAsString(id);
Field contentField = new TextField(field, text, Field.Store.NO);
Field idField = new StringField(CONTENT_ID, jsonId, Field.Store.YES);
document.add(idField);
document.add(contentField);
synchronized (this) {
indexWriter.updateDocument(new Term(CONTENT_ID, jsonId), document);
commit();
}
} catch (IOException ioe) {
throw new IndexingException(errorMessage(
"could not update full-text index for " + text, 0), ioe);
} catch (VirtualMachineError vme) {
handleVirtualMachineError(vme);
}
}
项目:Elasticsearch
文件:SourceSimpleFragmentsBuilder.java
@Override
protected Field[] getFields(IndexReader reader, int docId, String fieldName) throws IOException {
// we know its low level reader, and matching docId, since that's how we call the highlighter with
SourceLookup sourceLookup = searchContext.lookup().source();
sourceLookup.setSegmentAndDocument((LeafReaderContext) reader.getContext(), docId);
List<Object> values = sourceLookup.extractRawValues(hitContext.getSourcePath(mapper.fieldType().names().fullName()));
if (values.isEmpty()) {
return EMPTY_FIELDS;
}
Field[] fields = new Field[values.size()];
for (int i = 0; i < values.size(); i++) {
fields[i] = new Field(mapper.fieldType().names().indexName(), values.get(i).toString(), TextField.TYPE_NOT_STORED);
}
return fields;
}
项目:Tedyli-Searcher
文件:indexer.java
private Document getDocument(File file) throws IOException {
Document document = new Document();
// index file contents
Field contentField = new Field(LuceneConstants.CONTENTS, new FileReader(file), TextField.TYPE_NOT_STORED);
// index file name
Field fileNameField = new Field(LuceneConstants.FILE_NAME, file.getName(), TextField.TYPE_STORED);
// index file path
Field filePathField = new Field(LuceneConstants.FILE_PATH, file.getCanonicalPath(), TextField.TYPE_STORED);
document.add(contentField);
document.add(fileNameField);
document.add(filePathField);
return document;
}
项目:Reinickendorf_SER316
文件:NoteIndexer.java
private Document noteToDocument(Note note, String noteHtmlContents) {
Document d = new Document ();
String id = note.getId ();
Project project = note.getProject();
String projectId = project.getID();
String projectName = project.getTitle();
String title = note.getTitle ();
String date = note.getDate() != null ? new SimpleDateFormat ("yyyy-MM-dd").format(note.getDate().getDate()) : null;
d.add (new StringField ("id", id, Field.Store.YES));
d.add (new StringField ("project_id", projectId, Field.Store.YES));
d.add (new StoredField ("project_name", projectName));
d.add (new TextField ("title", title, Field.Store.YES));
d.add (new TextField ("title_cs", title, Field.Store.YES));
d.add (new TextField ("date", date != null ? date : "", Field.Store.YES));
d.add (new TextField ("body", noteHtmlContents, Field.Store.YES));
return d;
}
项目:Reinickendorf_SER316
文件:EventIndexer.java
private Document eventToDocument(Event newEvent) {
Document d = new Document ();
String eventId = newEvent.getId ();
String eventText = newEvent.getText();
String eventStartDate = newEvent.getStartDate() != null ? new SimpleDateFormat ("yyyy-MM-dd").format(newEvent.getStartDate().getDate()) : null;
String eventStartTime = newEvent.getTimeString();
if (eventStartDate != null) eventStartTime = eventStartDate + " @ " + eventStartTime;
d.add (new StringField ("id", eventId, Field.Store.YES));
d.add (new TextField ("text", eventText, Field.Store.YES));
d.add (new TextField ("text_cs", eventText, Field.Store.YES));
d.add (new StoredField ("original_start_date", eventStartTime != null ? eventStartTime : ""));
return d;
}
项目:cjs_ssms
文件:LuceneIndex.java
public void addIndex(UUser user) throws Exception {
IndexWriter writer = getWriter();
Document doc = new Document();
/*
* yes是会将数据存进索引,如果查询结果中需要将记录显示出来就要存进去,如果查询结果
* 只是显示标题之类的就可以不用存,而且内容过长不建议存进去
* 使用TextField类是可以用于查询的。
*/
try {
doc.add(new StringField("userid", String.valueOf(user.getId()), Field.Store.YES));
doc.add(new TextField("username", user.getUsername(), Field.Store.YES));
writer.addDocument(doc);
} catch (Exception e) {
e.printStackTrace();
throw e;
} finally {
writer.close();
}
}
项目:SnowGraph
文件:InMemoryIndex.java
public InMemoryIndex(Map<String,String> id2Text){
Analyzer analyzer = new EnglishAnalyzer();
IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
try {
IndexWriter writer = new IndexWriter(directory, iwc);
for (String id:id2Text.keySet()) {
Document doc=new Document();
doc.add(new StringField("id", id, Field.Store.YES));
doc.add(new TextField("content", id2Text.get(id), Field.Store.YES));
writer.addDocument(doc);
}
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
项目:SnowGraph
文件:CodePatternSearcher.java
private static List<String> search(List<String> contents, String query, int n) throws IOException, ParseException {
List<String> r=new ArrayList<>();
Directory dir=new RAMDirectory();
IndexWriter indexWriter=new IndexWriter(dir, new IndexWriterConfig(new EnglishAnalyzer()));
for (String method:contents){
Document document=new Document();
document.add(new TextField("content",method, Field.Store.YES));
indexWriter.addDocument(document);
}
indexWriter.close();
QueryParser qp = new QueryParser("content", new EnglishAnalyzer());
IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(dir));
TopDocs topDocs = indexSearcher.search(qp.parse(query), n);
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
r.add(indexSearcher.doc(scoreDoc.doc).get("content"));
}
return r;
}
项目:tripod
文件:ExampleIndexTransformer.java
@Override
public Document transform(final Example input) throws TransformException {
final Document doc = new Document();
doc.add(new Field(ExampleField.ID.getName(), input.getId(), StringField.TYPE_STORED));
doc.add(new SortedDocValuesField(ExampleField.ID.getName(), new BytesRef(input.getId())));
doc.add(new Field(ExampleField.TITLE.getName(), input.getTitle(), TextField.TYPE_STORED));
doc.add(new Field(ExampleField.BODY.getName(), input.getBody(), TextField.TYPE_STORED));
doc.add(new Field(ExampleField.COLOR.getName(), input.getColor(), StringField.TYPE_STORED));
doc.add(new SortedSetDocValuesFacetField(ExampleField.COLOR.getName(), input.getColor()));
final Date createDate = input.getCreateDate();
doc.add(new NumericDocValuesField(ExampleField.CREATE_DATE.getName(), createDate.getTime()));
doc.add(new StoredField(ExampleField.CREATE_DATE.getName(), createDate.getTime()));
return doc;
}
项目:LiveQA
文件:QaPairIndex.java
public static void createIndexQ(List<CQAResult> QASetList, Directory dir) {
System.out.println("Creating Questions Index");
IndexWriterConfig iwc = new IndexWriterConfig(ANALYZER.getVersion(), ANALYZER);
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
try {
IndexWriter writer = new IndexWriter(dir, iwc);
int id = 0; //XXX seq_id
for (CQAResult qaSet : QASetList) {
Document doc = new Document();
if (qaSet.subject == null) {
id++;
continue;
}
doc.add(new IntField(QID, id++, Field.Store.YES));
doc.add(new TextField(BEST_ANSWER_FIELD, qaSet.subject, Field.Store.NO));
doc.add(new TextField(Q_DESCRIPTION, qaSet.content, Field.Store.NO));
writer.addDocument(doc);
}
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
项目:lucene4ir
文件:TRECWebDocumentIndexer.java
private void initFields() {
docnumField = new StringField(Lucene4IRConstants.FIELD_DOCNUM, "", Field.Store.YES);
if(indexPositions){
titleField = new TermVectorEnabledTextField(Lucene4IRConstants.FIELD_TITLE, "", Field.Store.YES);
textField = new TermVectorEnabledTextField(Lucene4IRConstants.FIELD_CONTENT, "", Field.Store.YES);
allField = new TermVectorEnabledTextField(Lucene4IRConstants.FIELD_ALL, "", Field.Store.YES);
urlField = new TermVectorEnabledTextField(Lucene4IRConstants.FIELD_URL, "", Field.Store.YES);
dochdrField = new TermVectorEnabledTextField(Lucene4IRConstants.FIELD_DOCHDR, "", Field.Store.YES);
}
else {
titleField = new TextField(Lucene4IRConstants.FIELD_TITLE, "", Field.Store.YES);
textField = new TextField(Lucene4IRConstants.FIELD_CONTENT, "", Field.Store.YES);
allField = new TextField(Lucene4IRConstants.FIELD_ALL, "", Field.Store.YES);
urlField = new TextField(Lucene4IRConstants.FIELD_URL, "", Field.Store.YES);
dochdrField = new TextField(Lucene4IRConstants.FIELD_DOCHDR, "", Field.Store.YES);
}
}
项目:lucene4ir
文件:TRECAquaintDocumentIndexer.java
private void initFields() {
docnumField = new StringField(Lucene4IRConstants.FIELD_DOCNUM, "", Field.Store.YES);
pubdateField = new StringField(Lucene4IRConstants.FIELD_PUBDATE, "", Field.Store.YES);
if(indexPositions){
titleField = new TermVectorEnabledTextField(Lucene4IRConstants.FIELD_TITLE, "", Field.Store.YES);
textField = new TermVectorEnabledTextField(Lucene4IRConstants.FIELD_CONTENT, "", Field.Store.YES);
allField = new TermVectorEnabledTextField(Lucene4IRConstants.FIELD_ALL, "", Field.Store.YES);
sourceField = new TermVectorEnabledTextField(Lucene4IRConstants.FIELD_SOURCE, "", Field.Store.YES);
}
else {
titleField = new TextField(Lucene4IRConstants.FIELD_TITLE, "", Field.Store.YES);
textField = new TextField(Lucene4IRConstants.FIELD_CONTENT, "", Field.Store.YES);
allField = new TextField(Lucene4IRConstants.FIELD_ALL, "", Field.Store.YES);
sourceField = new TextField(Lucene4IRConstants.FIELD_SOURCE, "", Field.Store.YES);
}
}
项目:xxl-search
文件:LuceneSearchServiceImpl.java
private Document buildDucument(ShopDTO shopDTO){
Document document = new Document();
document.add(new IntField(ShopDTO.ShopParam.SHOP_ID, shopDTO.getShopid(), Field.Store.YES));
document.add(new TextField(ShopDTO.ShopParam.SHOP_NAME, shopDTO.getShopname(), Field.Store.YES));
//document.add(new StringField(ShopDTO.ShopParam.CITY_ID, shopDTO.getCityid()+"", Field.Store.YES));
document.add(new IntField(ShopDTO.ShopParam.CITY_ID, shopDTO.getCityid(), Field.Store.YES));
if (shopDTO.getTaglist()!=null && shopDTO.getTaglist().size()>0) {
for (int tagid: shopDTO.getTaglist()) {
document.add(new IntField(ShopDTO.ShopParam.TAG_ID, tagid, Field.Store.YES));
}
}
document.add(new IntField(ShopDTO.ShopParam.SCORE, shopDTO.getScore(), LuceneUtil.INT_FIELD_TYPE_STORED_SORTED));
document.add(new IntField(ShopDTO.ShopParam.HOT_SCORE, shopDTO.getHotscore(), LuceneUtil.INT_FIELD_TYPE_STORED_SORTED));
return document;
}
项目:xxl-search
文件:LuceneUtil.java
public static void createIndex(List<Map<String, String>> list){
// deleteAll
deleteAll();
// addDocument
for (Map<String, String> searchDto: list) {
Document doc = new Document();
for (Map.Entry<String, String> item: searchDto.entrySet()) {
if (ExcelUtil.KEYWORDS.equals(item.getKey())) {
doc.add(new TextField(item.getKey(), item.getValue(), Field.Store.YES));
} else {
doc.add(new StringField(item.getKey(), item.getValue(), Field.Store.YES));
}
}
addDocument(doc);
}
}
项目:GKHMC
文件:BuildBaikeFragmentIndex.java
List<Document> buildFragmentDocument(String title, String content, int fragmentSize){
List<Document> documents = new ArrayList<>();
int fragmentNum = content.length()/fragmentSize;
for(int i = 0; i < fragmentNum; i++){
String fragment;
if(i == fragmentNum - 1) fragment = content.substring(i*fragmentSize);
else fragment = content.substring(i*fragmentSize, (1+i)*fragmentSize);
TextField contentField = new TextField(Const.FIELD_CONTENT, separateWordsWithSpace.apply(fragment), Field.Store.YES);
StringField titleField = new StringField(Const.FIELD_TITLE, title, Field.Store.YES);
Document document = new Document();
document.add(titleField);
document.add(contentField);
documents.add(document);
}
return documents;
}
项目:incubator-zeppelin-druid
文件:LuceneSearch.java
/**
* If paragraph is not null, indexes code in the paragraph, otherwise indexes
* the notebook name.
*
* @param id id of the document, different for Note name and paragraph
* @param noteName name of the note
* @param p paragraph
* @return
*/
private Document newDocument(String id, String noteName, Paragraph p) {
Document doc = new Document();
Field pathField = new StringField(ID_FIELD, id, Field.Store.YES);
doc.add(pathField);
doc.add(new StringField("title", noteName, Field.Store.YES));
if (null != p) {
doc.add(new TextField(SEARCH_FIELD, p.getText(), Field.Store.YES));
Date date = p.getDateStarted() != null ? p.getDateStarted() : p.getDateCreated();
doc.add(new LongField("modified", date.getTime(), Field.Store.NO));
} else {
doc.add(new TextField(SEARCH_FIELD, noteName, Field.Store.YES));
}
return doc;
}
项目:search
文件:QueryAutoStopWordAnalyzerTest.java
@Override
public void setUp() throws Exception {
super.setUp();
dir = new RAMDirectory();
appAnalyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, appAnalyzer));
int numDocs = 200;
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
String variedFieldValue = variedFieldValues[i % variedFieldValues.length];
String repetitiveFieldValue = repetitiveFieldValues[i % repetitiveFieldValues.length];
doc.add(new TextField("variedField", variedFieldValue, Field.Store.YES));
doc.add(new TextField("repetitiveField", repetitiveFieldValue, Field.Store.YES));
writer.addDocument(doc);
}
writer.close();
reader = DirectoryReader.open(dir);
}
项目:wikisearch
文件:LuceneIndexCommand.java
private Document getDocFromPage(WikiPage page) {
logger.debug("Converting page to document. Page:\n {}",page.toString());
Document doc = new Document();
String title = page.getTitle();
String contributor = page.getContributor();
String contents = page.getText();
if (title != null) {
doc.add(new StringField("title", title, Field.Store.YES));
if (contributor != null)
doc.add(new StringField("contributor", contributor, Field.Store.YES));
if (contents != null)
doc.add(new TextField("contents", contents, Field.Store.NO));
}
return doc;
}
项目:search
文件:TestNorms.java
public void buildIndex(Directory dir) throws IOException {
Random random = random();
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
IndexWriterConfig config = newIndexWriterConfig(analyzer);
Similarity provider = new MySimProvider();
config.setSimilarity(provider);
RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
final LineFileDocs docs = new LineFileDocs(random, defaultCodecSupportsDocValues());
int num = atLeast(100);
for (int i = 0; i < num; i++) {
Document doc = docs.nextDoc();
int boost = random().nextInt(255);
Field f = new TextField(byteTestField, "" + boost, Field.Store.YES);
f.setBoost(boost);
doc.add(f);
writer.addDocument(doc);
doc.removeField(byteTestField);
if (rarely()) {
writer.commit();
}
}
writer.commit();
writer.close();
docs.close();
}
项目:redir
文件:QndRedisDirIndex.java
public static void main(String args[]) throws Exception {
initLoggers(Level.INFO);
RedisDirectory DIR = new RedisDirectory(REDIS_HOST, REDIS_PORT, REDIS_PASSWORD);
DIR.init();
long t1 = System.currentTimeMillis();
try {
Analyzer analyzer = new StandardAnalyzer();
IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
IndexWriter iw = new IndexWriter(DIR, iwc);
Document doc = new Document();
doc.add(new StringField("id", "thanhnb", Field.Store.YES));
doc.add(new TextField("name", "Nguyen Ba Thanh", Field.Store.NO));
iw.updateDocument(new Term("id", "thanhnb"), doc);
iw.commit();
iw.close();
} finally {
DIR.destroy();
}
long t2 = System.currentTimeMillis();
System.out.println("Finished in " + (t2 - t1) / 1000.0 + " sec");
}
项目:Review-It
文件:Indexer.java
public void indexParsedDocument(ParsedComment document) {
Preconditions.checkNotNull(indexWriter,
"The index writer is not initialized");
Document newDoc = new Document();
newDoc.add(new TextField(ParsedComment.Fields.SEARCHABLE_TEXT.name(),
document.fullSearchableText(), Field.Store.YES));
newDoc.add(new StringField(ParsedComment.Fields.ID.name(),
document.getId(), Field.Store.YES));
newDoc.add(new StringField(ParsedComment.Fields.PRODUCT_NAME.name(),
document.getProductName(), Field.Store.YES));
newDoc.add(new StringField(ParsedComment.Fields.COMMENT.name(),
document.getComment(), Field.Store.YES));
newDoc.add(new StringField(ParsedComment.Fields.URL.name(),
document.getCommentUrl(), Field.Store.YES));
newDoc.add(new StringField(ParsedComment.Fields.SOURCE.name(),
document.getSource().name(), Field.Store.YES));
newDoc.add(new StringField(ParsedComment.Fields.LABEL.name(),
document.getCommentLabel(), Field.Store.YES));
try {
indexWriter.addDocument(newDoc);
indexWriter.commit();
} catch (IOException e) {
throw new RuntimeException(
"Could not write new document to the index directory", e);
}
}
项目:search
文件:TestCheckIndex.java
public void testBogusTermVectors() throws IOException {
Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
Document doc = new Document();
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setStoreTermVectors(true);
ft.setStoreTermVectorOffsets(true);
Field field = new Field("foo", "", ft);
field.setTokenStream(new CannedTokenStream(
new Token("bar", 5, 10), new Token("bar", 1, 4)
));
doc.add(field);
iw.addDocument(doc);
iw.close();
dir.close(); // checkindex
}
项目:search
文件:TestSloppyPhraseQuery.java
public void testInfiniteFreq1() throws Exception {
String document = "drug druggy drug drug drug";
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newField("lyrics", document, new FieldType(TextField.TYPE_NOT_STORED)));
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher is = newSearcher(ir);
PhraseQuery pq = new PhraseQuery();
// "drug the drug"~1
pq.add(new Term("lyrics", "drug"), 1);
pq.add(new Term("lyrics", "drug"), 3);
pq.setSlop(1);
assertSaneScoring(pq, is);
ir.close();
dir.close();
}
项目:search
文件:AbstractTestCase.java
protected void make1dmfIndexNA( String... values ) throws Exception {
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
TEST_VERSION_CURRENT, analyzerK).setOpenMode(OpenMode.CREATE));
Document doc = new Document();
FieldType customType = new FieldType(TextField.TYPE_STORED);
customType.setStoreTermVectors(true);
customType.setStoreTermVectorOffsets(true);
customType.setStoreTermVectorPositions(true);
for( String value: values ) {
doc.add( new Field( F, value, customType));
//doc.add( new Field( F, value, Store.YES, Index.NOT_ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
}
writer.addDocument( doc );
writer.close();
if (reader != null) reader.close();
reader = DirectoryReader.open(dir);
}
项目:search
文件:TestIndexWriter.java
public void testChangeIndexOptions() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
FieldType docsAndFreqs = new FieldType(TextField.TYPE_NOT_STORED);
docsAndFreqs.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
FieldType docsOnly = new FieldType(TextField.TYPE_NOT_STORED);
docsOnly.setIndexOptions(IndexOptions.DOCS_ONLY);
Document doc = new Document();
doc.add(new Field("field", "a b c", docsAndFreqs));
w.addDocument(doc);
w.addDocument(doc);
doc = new Document();
doc.add(new Field("field", "a b c", docsOnly));
w.addDocument(doc);
w.close();
dir.close();
}
项目:search
文件:TestSimilarity2.java
/** make sure we can retrieve when norms are disabled */
public void testNoNorms() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
Document doc = new Document();
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setOmitNorms(true);
ft.freeze();
doc.add(newField("foo", "bar", ft));
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher is = newSearcher(ir);
for (Similarity sim : sims) {
is.setSimilarity(sim);
BooleanQuery query = new BooleanQuery(true);
query.add(new TermQuery(new Term("foo", "bar")), BooleanClause.Occur.SHOULD);
assertEquals(1, is.search(query, 10).totalHits);
}
ir.close();
dir.close();
}
项目:search
文件:TestPostingsOffsets.java
private void checkTokens(Token[] tokens) throws IOException {
Directory dir = newDirectory();
RandomIndexWriter riw = new RandomIndexWriter(random(), dir, iwc);
boolean success = false;
try {
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
// store some term vectors for the checkindex cross-check
ft.setStoreTermVectors(true);
ft.setStoreTermVectorPositions(true);
ft.setStoreTermVectorOffsets(true);
Document doc = new Document();
doc.add(new Field("body", new CannedTokenStream(tokens), ft));
riw.addDocument(doc);
success = true;
} finally {
if (success) {
IOUtils.close(riw, dir);
} else {
IOUtils.closeWhileHandlingException(riw, dir);
}
}
}
项目:search
文件:SingleFieldTestDb.java
public SingleFieldTestDb(Random random, String[] documents, String fName) {
try {
db = new MockDirectoryWrapper(random, new RAMDirectory());
docs = documents;
fieldName = fName;
IndexWriter writer = new IndexWriter(db, new IndexWriterConfig(
Version.LUCENE_CURRENT,
new MockAnalyzer(random)));
for (int j = 0; j < docs.length; j++) {
Document d = new Document();
d.add(new TextField(fieldName, docs[j], Field.Store.NO));
writer.addDocument(d);
}
writer.close();
} catch (java.io.IOException ioe) {
throw new Error(ioe);
}
}