/** * If paragraph is not null, indexes code in the paragraph, otherwise indexes * the notebook name. * * @param id id of the document, different for Note name and paragraph * @param noteName name of the note * @param p paragraph * @return */ private Document newDocument(String id, String noteName, Paragraph p) { Document doc = new Document(); Field pathField = new StringField(ID_FIELD, id, Field.Store.YES); doc.add(pathField); doc.add(new StringField("title", noteName, Field.Store.YES)); if (null != p) { doc.add(new TextField(SEARCH_FIELD, p.getText(), Field.Store.YES)); Date date = p.getDateStarted() != null ? p.getDateStarted() : p.getDateCreated(); doc.add(new LongField("modified", date.getTime(), Field.Store.NO)); } else { doc.add(new TextField(SEARCH_FIELD, noteName, Field.Store.YES)); } return doc; }
/** * If paragraph is not null, indexes code in the paragraph, otherwise indexes * the notebook name. * * @param id id of the document, different for Note name and paragraph * @param noteName name of the note * @param p paragraph * @return */ private Document newDocument(String id, String noteName, Paragraph p) { Document doc = new Document(); Field pathField = new StringField(ID_FIELD, id, Field.Store.YES); doc.add(pathField); doc.add(new StringField("title", noteName, Field.Store.YES)); if (null != p) { doc.add(new TextField(SEARCH_FIELD_TEXT, p.getText(), Field.Store.YES)); if (p.getTitle() != null) { doc.add(new TextField(SEARCH_FIELD_TITLE, p.getTitle(), Field.Store.YES)); } Date date = p.getDateStarted() != null ? p.getDateStarted() : p.getDateCreated(); doc.add(new LongField("modified", date.getTime(), Field.Store.NO)); } else { doc.add(new TextField(SEARCH_FIELD_TEXT, noteName, Field.Store.YES)); } return doc; }
private void addLuceneIndexFields(String indexField, List<IndexableField> list, JsonNode node, JsonSchema nodeSchema) { JsonNode.Type type = nodeSchema.getSchemaType(); if (type == JsonNode.Type.ARRAY) { for (int i = 0; i < node.getSize(); i++) { addLuceneIndexFields(indexField, list, node.get(i), nodeSchema.getItemSchema()); } } else if (type == JsonNode.Type.OBJECT) { Iterator<String> properties = node.getProperties(); while (properties.hasNext()) { String propName = properties.next(); // Index property key for object nodes list.add(new StringField(indexField, propName, Field.Store.NO)); } } else if (type == JsonNode.Type.STRING) { list.add(new StringField(indexField, node.asString(), Field.Store.NO)); } else if (type == JsonNode.Type.BOOLEAN) { list.add(new StringField(indexField, node.asString(), Field.Store.NO)); } else if (type == JsonNode.Type.INTEGER) { list.add(new LongField(indexField, node.asLong(), Field.Store.NO)); } else if (type == JsonNode.Type.NUMBER) { list.add(new DoubleField(indexField, node.asDouble(), Field.Store.NO)); } else { throw new UnsupportedOperationException("Node type " + type + " not supported for index field " + indexField); } }
/** * resolve field convertable premitive type * * premitive type * byte, short, int, long, float, double, char, boolean * * @param type field type * @return lucene field type */ private Class<? extends Field> resolveField(Type type) { if(type == String.class) { return StringField.class; } else if (type == Double.class || type == double.class) { return DoubleField.class; } else if(type == Float.class || type == float.class) { return FloatField.class; } else if(type == Integer.class || type == int.class || type == Short.class || type == short.class || type == Boolean.class || type == boolean.class || type == Byte.class || type == byte.class || type == Character.class || type == char.class) { return IntField.class; } else if(type == Long.class || type == long.class) { return LongField.class; } return null; }
public DocState(boolean reuseFields, FieldType ft, FieldType bodyFt) { this.reuseFields = reuseFields; if (reuseFields) { fields = new HashMap<>(); numericFields = new HashMap<>(); // Initialize the map with the default fields. fields.put(BODY_FIELD, new Field(BODY_FIELD, "", bodyFt)); fields.put(TITLE_FIELD, new Field(TITLE_FIELD, "", ft)); fields.put(DATE_FIELD, new Field(DATE_FIELD, "", ft)); fields.put(ID_FIELD, new StringField(ID_FIELD, "", Field.Store.YES)); fields.put(NAME_FIELD, new Field(NAME_FIELD, "", ft)); numericFields.put(DATE_MSEC_FIELD, new LongField(DATE_MSEC_FIELD, 0L, Field.Store.NO)); numericFields.put(TIME_SEC_FIELD, new IntField(TIME_SEC_FIELD, 0, Field.Store.NO)); doc = new Document(); } else { numericFields = null; fields = null; doc = null; } }
/** Build the example index. */ public void index() throws IOException { IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER, new WhitespaceAnalyzer())); // Add documents with a fake timestamp, 1000 sec before // "now", 2000 sec before "now", ...: for(int i=0;i<100;i++) { Document doc = new Document(); long then = nowSec - i * 1000; // Add as doc values field, so we can compute range facets: doc.add(new NumericDocValuesField("timestamp", then)); // Add as numeric field so we can drill-down: doc.add(new LongField("timestamp", then, Field.Store.NO)); indexWriter.addDocument(doc); } // Open near-real-time searcher searcher = new IndexSearcher(DirectoryReader.open(indexWriter, true)); indexWriter.close(); }
@Override public void setUp() throws Exception { super.setUp(); dir = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), dir); int numDocs = TestUtil.nextInt(random(), 2049, 4000); for (int i = 0; i < numDocs; i++) { Document document = new Document(); document.add(newTextField("english", English.intToEnglish(i), Field.Store.NO)); document.add(newTextField("oddeven", (i % 2 == 0) ? "even" : "odd", Field.Store.NO)); document.add(newStringField("byte", "" + ((byte) random().nextInt()), Field.Store.NO)); document.add(newStringField("short", "" + ((short) random().nextInt()), Field.Store.NO)); document.add(new IntField("int", random().nextInt(), Field.Store.NO)); document.add(new LongField("long", random().nextLong(), Field.Store.NO)); document.add(new FloatField("float", random().nextFloat(), Field.Store.NO)); document.add(new DoubleField("double", random().nextDouble(), Field.Store.NO)); document.add(new NumericDocValuesField("intdocvalues", random().nextInt())); document.add(new FloatDocValuesField("floatdocvalues", random().nextFloat())); iw.addDocument(document); } reader = iw.getReader(); iw.close(); searcher = newSearcher(reader); }
public void testLongFieldMinMax() throws Exception { Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir); int numDocs = atLeast(100); long minValue = Long.MAX_VALUE; long maxValue = Long.MIN_VALUE; for(int i=0;i<numDocs;i++ ){ Document doc = new Document(); long num = random().nextLong(); minValue = Math.min(num, minValue); maxValue = Math.max(num, maxValue); doc.add(new LongField("field", num, Field.Store.NO)); w.addDocument(doc); } IndexReader r = w.getReader(); Terms terms = MultiFields.getTerms(r, "field"); assertEquals(minValue, NumericUtils.getMinLong(terms)); assertEquals(maxValue, NumericUtils.getMaxLong(terms)); r.close(); w.close(); dir.close(); }
/** * Adds a file to index with {@link IndexWriter}. * @param file will be indexed by the function */ public void indexFile(final File file) { FileInputStream fis = getInputStream(file); Document doc = documentFactory.createDocument(); // Add the path of the file as a field named "path". Use a field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency or positional information: Field pathField = new StringField(fieldName, file.getAbsolutePath(), Field.Store.YES); doc.add(pathField); // Add the last modified date of the file a field named "modified". // Use a LongField that is indexed (i.e. efficiently filterable with NumericRangeFilter). doc.add(new LongField("modified", file.lastModified(), Field.Store.NO)); // Add the contents of the file to a field named "contents". // If that's not the case searching for special characters will fail. BufferedReader bufferedReader; try { bufferedReader = bufferedReaderFactory.createReader(fis); doc.add(new Field("contents", bufferedReader, TextField.TYPE_NOT_STORED)); addDocument(file, doc); fis.close(); } catch (IOException e) { logger.error(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
public void index(Item item) throws IOException { String id = item.getId(); String text = item.getText(); long publicationTIme = item.getPublicationTime(); Document document = new Document(); Field idField = new StringField("id", id, Store.YES); document.add(idField); FieldType fieldType = new FieldType(); fieldType.setStored(true); fieldType.setIndexed(true); fieldType.setStoreTermVectors(true); document.add(new Field("text", text, fieldType)); document.add(new LongField("publicationTIme", publicationTIme, LongField.TYPE_STORED)); if(iwriter != null) { iwriter.addDocument(document); } }
private static void addTweetToIndex(Tweet headline, IndexWriter writer) throws IOException { Document doc = new Document(); Field timestamp = new LongField(FieldNames.TIMESTAMP.name(), headline.getTimestamp().getTime(), Field.Store.YES); Field tweetID = new LongField(FieldNames.TWEETID.name(), headline.getTweetID(), Field.Store.NO); Field userName = new StringField(FieldNames.USERNAME.name(), headline.getUserName(), Store.YES); Field userID = new LongField(FieldNames.USERID.name(), headline.getUserID(), Field.Store.NO); Field tweet = new TextField(FieldNames.TEXT.name(), headline.getText(), Field.Store.YES); doc.add(tweet); doc.add(tweetID); doc.add(userName); doc.add(userID); doc.add(timestamp); writer.addDocument(doc); }
private static void createDoc_Tour( final IndexWriter indexWriter, final long tourId, final String title, final String description, final long time) throws IOException { final Document doc = new Document(); doc.add(new IntField(SEARCH_FIELD_DOC_SOURCE, DOC_SOURCE_TOUR, Store.YES)); doc.add(new LongField(SEARCH_FIELD_TOUR_ID, tourId, Store.YES)); doc.add(new LongField(SEARCH_FIELD_TIME, time, createFieldType_Long())); if (title != null) { doc.add(new Field(SEARCH_FIELD_TITLE, title, createFieldType_Text())); } if (description != null) { doc.add(new Field(SEARCH_FIELD_DESCRIPTION, description, createFieldType_Text())); } indexWriter.addDocument(doc); }
@Nonnull public static RecordDoc create(@Nonnull Record record, long indexTime) { checkNotNull(record); Document doc = new Document(); doc.add(new StringField(RECORD_URI, record.getUri().toString(), Store.YES)); doc.add(new StringField(RECORD_PATH, record.getPath(), Store.YES)); doc.add(new StringField(RECORD_PARENT, record.getFolder(), Store.YES)); doc.add(new StringField(RECORD_NAME, record.getName(), Store.YES)); doc.add(new LongField(RECORD_TIME, record.getTime(), Store.YES)); doc.add(new LongField(RECORD_SIZE, record.getSize(), Store.YES)); doc.add(new StringField(RECORD_DIR, String.valueOf(record.isDir()), Store.YES)); doc.add(new LongField(INDEX_TIME, indexTime, Store.YES)); return new RecordDoc(record, doc, indexTime); }
/** * Creates Lucene document with the fields: * <ul> * <li>path: relative path from the constructor</li> * <li>id: the same as path</li> * <li>modified: last modified date of the file</li> * <li>filesize: size of the file</li> * <li>title: name of the file</li> * </ul> * @return New Lucene document. */ @Override public Document createDocument() { Document doc = new Document(); doc.add(new StringField("path", path, Field.Store.YES)); doc.add(new StringField("id", path, Field.Store.YES)); doc.add(new StringField("modified", DateTools.timeToString(file.lastModified(), DateTools.Resolution.MINUTE), Field.Store.YES)); doc.add(new LongField("filesize", file.length(), Field.Store.YES)); doc.add(new TextField("title", file.getName(), Field.Store.YES)); return doc; }
public DocState(boolean reuseFields, FieldType ft, FieldType bodyFt) { this.reuseFields = reuseFields; if (reuseFields) { fields = new HashMap<String,Field>(); numericFields = new HashMap<String,Field>(); // Initialize the map with the default fields. fields.put(BODY_FIELD, new Field(BODY_FIELD, "", bodyFt)); fields.put(TITLE_FIELD, new Field(TITLE_FIELD, "", ft)); fields.put(DATE_FIELD, new Field(DATE_FIELD, "", ft)); fields.put(ID_FIELD, new StringField(ID_FIELD, "", Field.Store.YES)); fields.put(NAME_FIELD, new Field(NAME_FIELD, "", ft)); numericFields.put(DATE_MSEC_FIELD, new LongField(DATE_MSEC_FIELD, 0L, Field.Store.NO)); numericFields.put(TIME_SEC_FIELD, new IntField(TIME_SEC_FIELD, 0, Field.Store.NO)); doc = new Document(); } else { numericFields = null; fields = null; doc = null; } }
@Override public void configure(String fieldNameForThisInstance, Map<String, String> properties, Configuration configuration) { final String dateFormat = properties.get(DATE_FORMAT); if (dateFormat == null) { throw new RuntimeException("The property [" + DATE_FORMAT + "] can not be null."); } final String timeUnitStr = properties.get(TIME_UNIT); if (timeUnitStr != null) { _timeUnit = TimeUnit.valueOf(timeUnitStr.trim().toUpperCase()); } _simpleDateFormat = new ThreadValue<SimpleDateFormat>() { @Override protected SimpleDateFormat initialValue() { return new SimpleDateFormat(dateFormat); } }; String precisionStepStr = properties.get(NUMERIC_PRECISION_STEP); if (precisionStepStr != null) { _precisionStep = Integer.parseInt(precisionStepStr); _typeNotStored = new FieldType(LongField.TYPE_NOT_STORED); _typeNotStored.setNumericPrecisionStep(_precisionStep); _typeNotStored.freeze(); } else { _typeNotStored = LongField.TYPE_NOT_STORED; } }
@Override public void configure(String fieldNameForThisInstance, Map<String, String> properties, Configuration configuration) { String precisionStepStr = properties.get(NUMERIC_PRECISION_STEP); if (precisionStepStr != null) { _precisionStep = Integer.parseInt(precisionStepStr); _typeStored = new FieldType(LongField.TYPE_STORED); _typeStored.setNumericPrecisionStep(_precisionStep); _typeStored.freeze(); _typeNotStored = new FieldType(LongField.TYPE_NOT_STORED); _typeNotStored.setNumericPrecisionStep(_precisionStep); _typeNotStored.freeze(); } else { _typeStored = LongField.TYPE_STORED; _typeNotStored = LongField.TYPE_NOT_STORED; } }
public static void indexDocs(IndexWriter writer) throws IOException { Document doc = new Document(); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47); Field pathField = new StringField("path", "opt", Field.Store.YES); doc.add(pathField); doc.add(new LongField("modified", 123L, Field.Store.NO)); doc.add(new TextField("contents", "content is easy.", Field.Store.YES)); // if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) { writer.addDocument(doc); // writer.addDocument(doc, analyzer); // System.out.println("added"); // } else { // // writer.updateDocument(new Term("path", "opt"), doc); System.out.println("updated"); // } }
/** * Test method for {@link com.fuerve.villageelder.indexing.IndexManager#getIndexWriter()}. */ @Test public final void testGetIndexWriter() throws Exception { RAMDirectory indexDirectory = new RAMDirectory(); RAMDirectory taxonomyDirectory = new RAMDirectory(); IndexManager target = new IndexManager(indexDirectory, taxonomyDirectory); target.initializeIndex(); Document doc = new Document(); doc.add(new LongField("testfield", 1000L, Store.YES)); target.getIndexWriter().addDocument(doc); assertEquals(1, target.getIndexWriter().numDocs()); target.dispose(); }
private IndexReader buildDummyIndex() throws IOException { RAMDirectory indexDirectory = new RAMDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(Lucene.LUCENE_VERSION, Lucene.getPerFieldAnalyzer()); iwc.setOpenMode(OpenMode.CREATE); IndexWriter iw = new IndexWriter(indexDirectory, iwc); Document doc = new Document(); doc.add(new StringField("Author", "foo", Field.Store.YES)); doc.add(new LongField("RevisionNumber", 50L, Field.Store.YES)); doc.add(new StringField("Revision", "50", Field.Store.YES)); doc.add(new TextField("Message", "stuff", Field.Store.YES)); iw.addDocument(doc); doc = new Document(); doc.add(new StringField("Author", "bar", Field.Store.YES)); doc.add(new LongField("RevisionNumber", 5000L, Field.Store.YES)); doc.add(new StringField("Revision", "5000", Field.Store.YES)); doc.add(new TextField("Message", "stuff", Field.Store.YES)); iw.addDocument(doc); iw.commit(); iw.close(); DirectoryReader result = DirectoryReader.open(indexDirectory); return result; }
/** Build the example index. */ public void index() throws IOException { IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER, new WhitespaceAnalyzer(FacetExamples.EXAMPLES_VER))); // Add documents with a fake timestamp, 1000 sec before // "now", 2000 sec before "now", ...: for(int i=0;i<100;i++) { Document doc = new Document(); long then = nowSec - i * 1000; // Add as doc values field, so we can compute range facets: doc.add(new NumericDocValuesField("timestamp", then)); // Add as numeric field so we can drill-down: doc.add(new LongField("timestamp", then, Field.Store.NO)); indexWriter.addDocument(doc); } // Open near-real-time searcher searcher = new IndexSearcher(DirectoryReader.open(indexWriter, true)); indexWriter.close(); }
@Override public void setUp() throws Exception { super.setUp(); dir = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), dir); int numDocs = _TestUtil.nextInt(random(), 2049, 4000); for (int i = 0; i < numDocs; i++) { Document document = new Document(); document.add(newTextField("english", English.intToEnglish(i), Field.Store.NO)); document.add(newTextField("oddeven", (i % 2 == 0) ? "even" : "odd", Field.Store.NO)); document.add(newStringField("byte", "" + ((byte) random().nextInt()), Field.Store.NO)); document.add(newStringField("short", "" + ((short) random().nextInt()), Field.Store.NO)); document.add(new IntField("int", random().nextInt(), Field.Store.NO)); document.add(new LongField("long", random().nextLong(), Field.Store.NO)); document.add(new FloatField("float", random().nextFloat(), Field.Store.NO)); document.add(new DoubleField("double", random().nextDouble(), Field.Store.NO)); document.add(new NumericDocValuesField("intdocvalues", random().nextInt())); document.add(new FloatDocValuesField("floatdocvalues", random().nextFloat())); iw.addDocument(document); } reader = iw.getReader(); iw.close(); searcher = newSearcher(reader); }
void addToDoc(Document doc, Long value){ Preconditions.checkArgument(valueType == Long.class); if(value == null){ return; } doc.add(new LongField(indexFieldName, value, stored ? Store.YES : Store.NO)); if(isSorted()){ Preconditions.checkArgument(sortedValueType == SearchFieldSorting.FieldType.LONG); doc.add(new NumericDocValuesField(indexFieldName, value)); } }
/** * storing two things here ... the whole line ... and the line number */ private static void addDoc(IndexWriter w, String text, long lineNumber) throws IOException { Document doc = new Document(); doc.add(new TextField("text", text, Field.Store.YES)); doc.add(new LongField("line", lineNumber, Field.Store.YES)); w.addDocument(doc); }
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { long counter = COUNTER.incrementAndGet(); if (counter > MAX_ITEMS) { return; } System.out.println("Counter: " + counter); try (InputStream stream = Files.newInputStream(file)) { Document doc = new Document(); Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField); doc.add(new LongField("modified", lastModified, Field.Store.NO)); doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { System.out.println("adding " + file); writer.addDocument(doc); } else { System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } finally { long jobsDone = JOBS_DONE.incrementAndGet(); System.out.println("Jobs done: " + jobsDone); } }
public Field getField(ValueSource value) { if (value.isNull()) return null; Field.Store store = Field.Store.NO; // Only store hkey. switch (fieldType) { case INT: switch (TInstance.underlyingType(value.getType())) { case INT_8: return new IntField(name, value.getInt8(), store); case INT_16: return new IntField(name, value.getInt16(), store); case UINT_16: return new IntField(name, value.getUInt16(), store); case INT_32: default: return new IntField(name, value.getInt32(), store); } case LONG: return new LongField(name, value.getInt64(), store); case FLOAT: return new FloatField(name, value.getFloat(), store); case DOUBLE: return new DoubleField(name, value.getDouble(), store); case STRING: switch (TInstance.underlyingType(value.getType())) { case STRING: return new StringField(name, value.getString(), store); default: { StringBuilder str = new StringBuilder(); value.getType().format(value, AkibanAppender.of(str)); return new StringField(name, str.toString(), store); } } case TEXT: return new TextField(name, value.getString(), store); default: return null; } }
/** * add lucene field in document * @param doc document * @param name fieldName * @param val value * @param type field original Type * @param store store * @param textFieldable isTextField * @return if true, added document */ private boolean addField(Document doc, String name, Object val, Type type, Store store, TextFieldable textFieldable, NoIndex noIndex) { boolean add = true; if (noIndex != null) { if (type == Character.class || type == char.class) { val = (int) val; } else if(type == Boolean.class || type == boolean.class) { val = (boolean)val ? 1 : 0; } doc.add(new StoredField(name, val.toString())); } else if (textFieldable != null) { doc.add(new TextField(name, val.toString(), store)); } else if(type == String.class) { doc.add(new StringField(name, val.toString(), store)); }else if (type == Double.class || type == double.class) { doc.add(new DoubleField(name, (double) val, store)); } else if(type == Float.class || type == float.class) { doc.add(new FloatField(name, (float) val, store)); } else if(type == Short.class || type == short.class || type == Integer.class || type == int.class || type == Byte.class || type == byte.class) { doc.add(new IntField(name, Integer.valueOf(val.toString()), store)); } else if(type == Character.class || type == char.class) { doc.add(new IntField(name, Integer.valueOf((char)val), store)); } else if(type == Boolean.class || type == boolean.class) { if ((boolean)val) { doc.add(new IntField(name, 1, store)); } else { doc.add(new IntField(name, 0, store)); } } else if(type == Long.class || type == long.class) { doc.add(new LongField(name, (long) val, store)); } else { add = false; } return add; }
@Override public int doLogic() throws Exception { List<IndexableField> fields = doc.getFields(); Analyzer analyzer = getRunData().getAnalyzer(); int tokenCount = 0; for(final IndexableField field : fields) { if (!field.fieldType().tokenized() || field instanceof IntField || field instanceof LongField || field instanceof FloatField || field instanceof DoubleField) { continue; } final TokenStream stream = field.tokenStream(analyzer, null); // reset the TokenStream to the first token stream.reset(); TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); while(stream.incrementToken()) { termAtt.fillBytesRef(); tokenCount++; } stream.end(); stream.close(); } totalTokenCount += tokenCount; return tokenCount; }
@Override public void longField(FieldInfo fieldInfo, long value) { FieldType ft = new FieldType(LongField.TYPE_NOT_STORED); ft.setStored(true); ft.setIndexed(fieldInfo.isIndexed()); doc.add(new LongField(fieldInfo.name, value, ft)); }
private static void createDoc_Marker( final IndexWriter indexWriter, final long markerId, final long tourId, final String title, final String description, final long time) throws IOException { // private static final FieldType _longSearchField = new FieldType(LongField.TYPE_STORED); // private static final FieldType _textSearchField = new FieldType(TextField.TYPE_STORED); // { // _longSearchField.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); // _textSearchField.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); // } final Document doc = new Document(); doc.add(new IntField(SEARCH_FIELD_DOC_SOURCE, DOC_SOURCE_TOUR_MARKER, Store.YES)); doc.add(new LongField(SEARCH_FIELD_MARKER_ID, markerId, Store.YES)); doc.add(new LongField(SEARCH_FIELD_TOUR_ID, tourId, Store.YES)); doc.add(new LongField(SEARCH_FIELD_TIME, time, createFieldType_Long())); if (title != null) { doc.add(new Field(SEARCH_FIELD_TITLE, title, createFieldType_Text())); } if (description != null) { doc.add(new Field(SEARCH_FIELD_DESCRIPTION, description, createFieldType_Text())); } indexWriter.addDocument(doc); }
private static void createDoc_WayPoint( final IndexWriter indexWriter, final long markerId, final long tourId, final String title, final String description, final long time) throws IOException { final Document doc = new Document(); doc.add(new IntField(SEARCH_FIELD_DOC_SOURCE, DOC_SOURCE_WAY_POINT, Store.YES)); doc.add(new LongField(SEARCH_FIELD_MARKER_ID, markerId, Store.YES)); doc.add(new LongField(SEARCH_FIELD_TOUR_ID, tourId, Store.YES)); if (time != 0) { doc.add(new LongField(SEARCH_FIELD_TIME, time, createFieldType_Long())); } if (title != null) { doc.add(new Field(SEARCH_FIELD_TITLE, title, createFieldType_Text())); } if (description != null) { doc.add(new Field(SEARCH_FIELD_DESCRIPTION, description, createFieldType_Text())); } indexWriter.addDocument(doc); }
/** * Creates Lucene document for the zip file entry with the fields: * <ul> * <li>path: relative path from the constructor</li> * <li>id: relative path + path within the zip file</li> * <li>modified: last modified date of the entry</li> * <li>filesize: size of the entry</li> * <li>title: file name of the entry</li> * <li>archivepath: file name of the entry</li> * </ul> * @return New Lucene document. */ @Override public Document createDocument() { Document doc = new Document(); doc.add(new StringField("path", path, Field.Store.YES)); doc.add(new StringField("id", path + File.separator+zipEntry.getName(), Field.Store.YES)); doc.add(new StringField("modified", DateTools.timeToString(zipEntry.getTime(), DateTools.Resolution.MINUTE), Field.Store.YES)); doc.add(new LongField("filesize", zipEntry.getSize(), Field.Store.YES)); doc.add(new TextField("title", zipEntry.getName(), Field.Store.YES)); doc.add(new StringField("archivepath", zipEntry.getName(), Field.Store.YES)); return doc; }
@Override protected void addFields(ComponentChronicleBI chronicle, Document doc) { RefexMember rxc = (RefexMember) chronicle; for (Iterator it = rxc.getVersions().iterator(); it.hasNext(); ) { RefexMember.Version rxv = (RefexMember.Version) it.next(); if (rxv instanceof RefexLongVersionBI) { RefexLongVersionBI rxvl = (RefexLongVersionBI) rxv; doc.add(new LongField(ComponentProperty.LONG_EXTENSION_1.name(), rxvl.getLong1(), Field.Store.NO)); } } }
/** * Creates a Document containing contents and metadata for a specific page of a file * @param writer The writer used to save the metadata * @param file The file that the page belongs to * @param page The index of the page in the file * @param contents The string contents of the file */ public static void Build(IndexWriter writer, File file, int page, String contents) { if(file.canRead()) { try { //Log.i(TAG, "Started Indexing file: " + file.getName() + " " // + page); Document doc = new Document(); doc.add(new StringField("id", file.getPath() + ":" + page, Field.Store.NO)); doc.add(new StringField("path", file.getPath(), Field.Store.YES)); doc.add(new LongField("modified", file.lastModified(), Field.Store.YES)); // for(int i = 0; i < contents.size(); i++){ doc.add(new TextField("text", "" + contents, Field.Store.YES)); doc.add(new IntField("page", page, Field.Store.YES)); // } // TODO - Check what OpenMode.CREATE_OR_APPEND does; I think updateDocument should // always be used with CREATE_OR_APPEND, the if part may need to be removed if(writer.getConfig().getOpenMode() == OpenMode.CREATE) { writer.addDocument(doc); } else { // TODO - Test UpdateDocument writer.updateDocument(new Term("id", file.getPath() + ":" + page), doc); } Log.i(TAG, "Done Indexing file: " + file.getName() + " " + page); } catch(Exception e) { Log.e(TAG, "Error ", e); } } }
/** * Creates the metadata Document for a given file * @param filename The path of the file that the metadata will describe * @param pages The number of pages in the file; -1 if it's contents are not indexed * @return 0 upon successful index creation; -1 on error */ public int buildIndex(String filename, int pages) { try { //Log.i(TAG, "Writing Metadata"); Document doc = new Document(); File file = new File(filename); doc.add(new StringField("id", file.getPath() + ":meta", Field.Store.NO)); doc.add(new LongField("modified", file.lastModified(), Field.Store.YES)); doc.add(new StringField("path", file.getAbsolutePath(), Field.Store.YES)); if(pages != -1) { doc.add(new IntField("pages", pages, Field.Store.YES)); } if(writer.getConfig().getOpenMode() == OpenMode.CREATE) { writer.addDocument(doc); } else { writer.updateDocument(new Term("id", file.getPath() + ":meta"), doc); } //Log.i(TAG, "Done creating metadata for file " + filename); // Must only call ForceMerge and Commit once per document as they are very resource heavy operations writer.commit(); } catch(Exception e) { Log.e(TAG, "Error", e); return -1; } return 0; }
/** {@inheritDoc} */ @Override public void addFields(Document document, DecoratedKey partitionKey) { Long value = (Long) partitionKey.getToken().getTokenValue(); Field tokenField = new LongField(FIELD_NAME, value, Store.NO); document.add(tokenField); }
@Override public int doLogic() throws Exception { List<IndexableField> fields = doc.getFields(); Analyzer analyzer = getRunData().getAnalyzer(); int tokenCount = 0; for(final IndexableField field : fields) { if (!field.fieldType().tokenized() || field instanceof IntField || field instanceof LongField || field instanceof FloatField || field instanceof DoubleField) { continue; } final TokenStream stream = field.tokenStream(analyzer); // reset the TokenStream to the first token stream.reset(); TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); while(stream.incrementToken()) { termAtt.fillBytesRef(); tokenCount++; } } totalTokenCount += tokenCount; return tokenCount; }