protected void addContentField(int pageNo, JRPrintText element) throws IOException { String allText; JRStyledText styledText = getStyledText(element); if (styledText == null) { allText = ""; } else { allText = styledText.getText(); } if (allText != null && allText.length() > 0) { Field tf = new Field(CONTENT_FIELD, allText, fieldType); Document doc = new Document(); doc.add(new IntField("pageNo", pageNo, Field.Store.YES)); PrintElementId peid = PrintElementId.forElement(element); doc.add(new StringField("uid", peid.toString(), Field.Store.YES)); displayTokens(allText, peid.toString()); doc.add(tf); writer.addDocument(doc); } }
public static void createIndexQ(List<CQAResult> QASetList, Directory dir) { System.out.println("Creating Questions Index"); IndexWriterConfig iwc = new IndexWriterConfig(ANALYZER.getVersion(), ANALYZER); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); try { IndexWriter writer = new IndexWriter(dir, iwc); int id = 0; //XXX seq_id for (CQAResult qaSet : QASetList) { Document doc = new Document(); if (qaSet.subject == null) { id++; continue; } doc.add(new IntField(QID, id++, Field.Store.YES)); doc.add(new TextField(BEST_ANSWER_FIELD, qaSet.subject, Field.Store.NO)); doc.add(new TextField(Q_DESCRIPTION, qaSet.content, Field.Store.NO)); writer.addDocument(doc); } writer.close(); } catch (IOException e) { e.printStackTrace(); } }
private Document buildDucument(ShopDTO shopDTO){ Document document = new Document(); document.add(new IntField(ShopDTO.ShopParam.SHOP_ID, shopDTO.getShopid(), Field.Store.YES)); document.add(new TextField(ShopDTO.ShopParam.SHOP_NAME, shopDTO.getShopname(), Field.Store.YES)); //document.add(new StringField(ShopDTO.ShopParam.CITY_ID, shopDTO.getCityid()+"", Field.Store.YES)); document.add(new IntField(ShopDTO.ShopParam.CITY_ID, shopDTO.getCityid(), Field.Store.YES)); if (shopDTO.getTaglist()!=null && shopDTO.getTaglist().size()>0) { for (int tagid: shopDTO.getTaglist()) { document.add(new IntField(ShopDTO.ShopParam.TAG_ID, tagid, Field.Store.YES)); } } document.add(new IntField(ShopDTO.ShopParam.SCORE, shopDTO.getScore(), LuceneUtil.INT_FIELD_TYPE_STORED_SORTED)); document.add(new IntField(ShopDTO.ShopParam.HOT_SCORE, shopDTO.getHotscore(), LuceneUtil.INT_FIELD_TYPE_STORED_SORTED)); return document; }
/** * resolve field convertable premitive type * * premitive type * byte, short, int, long, float, double, char, boolean * * @param type field type * @return lucene field type */ private Class<? extends Field> resolveField(Type type) { if(type == String.class) { return StringField.class; } else if (type == Double.class || type == double.class) { return DoubleField.class; } else if(type == Float.class || type == float.class) { return FloatField.class; } else if(type == Integer.class || type == int.class || type == Short.class || type == short.class || type == Boolean.class || type == boolean.class || type == Byte.class || type == byte.class || type == Character.class || type == char.class) { return IntField.class; } else if(type == Long.class || type == long.class) { return LongField.class; } return null; }
@Test public void simpleTest() throws IOException { LuceneValuesDB valuesDB = new LuceneValuesDB(); URL testPath = LuceneValuesDB.class.getResource("test.csv"); @SuppressWarnings("unchecked") UserDefineDocumentCreator creator = new UserDefineDocumentCreator(new Class[] { IntField.class, StringField.class, FloatField.class, TextField.class }, new String[] { "docNum", "docType", "score", "text" }); valuesDB.open(new File(testPath.getFile()), new CSVParser(), creator); assertEquals(1, valuesDB.search("docNum", 0).length); assertEquals(1, valuesDB.search("docType", "a").length); assertEquals(2, valuesDB.search("score", "0.1").length); assertEquals(1, valuesDB.search("text", "this is a pen").length); }
public DocState(boolean reuseFields, FieldType ft, FieldType bodyFt) { this.reuseFields = reuseFields; if (reuseFields) { fields = new HashMap<>(); numericFields = new HashMap<>(); // Initialize the map with the default fields. fields.put(BODY_FIELD, new Field(BODY_FIELD, "", bodyFt)); fields.put(TITLE_FIELD, new Field(TITLE_FIELD, "", ft)); fields.put(DATE_FIELD, new Field(DATE_FIELD, "", ft)); fields.put(ID_FIELD, new StringField(ID_FIELD, "", Field.Store.YES)); fields.put(NAME_FIELD, new Field(NAME_FIELD, "", ft)); numericFields.put(DATE_MSEC_FIELD, new LongField(DATE_MSEC_FIELD, 0L, Field.Store.NO)); numericFields.put(TIME_SEC_FIELD, new IntField(TIME_SEC_FIELD, 0, Field.Store.NO)); doc = new Document(); } else { numericFields = null; fields = null; doc = null; } }
private Document newSampleDocument(int id, Shape... shapes) { Document doc = new Document(); doc.add(new IntField("id", id, Field.Store.YES)); //Potentially more than one shape in this field is supported by some // strategies; see the javadocs of the SpatialStrategy impl to see. for (Shape shape : shapes) { for (IndexableField f : strategy.createIndexableFields(shape)) { doc.add(f); } //store it too; the format is up to you // (assume point in this example) Point pt = (Point) shape; doc.add(new StoredField(strategy.getFieldName(), pt.getX()+" "+pt.getY())); } return doc; }
@Override public void setUp() throws Exception { super.setUp(); dir = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), dir); int numDocs = TestUtil.nextInt(random(), 2049, 4000); for (int i = 0; i < numDocs; i++) { Document document = new Document(); document.add(newTextField("english", English.intToEnglish(i), Field.Store.NO)); document.add(newTextField("oddeven", (i % 2 == 0) ? "even" : "odd", Field.Store.NO)); document.add(newStringField("byte", "" + ((byte) random().nextInt()), Field.Store.NO)); document.add(newStringField("short", "" + ((short) random().nextInt()), Field.Store.NO)); document.add(new IntField("int", random().nextInt(), Field.Store.NO)); document.add(new LongField("long", random().nextLong(), Field.Store.NO)); document.add(new FloatField("float", random().nextFloat(), Field.Store.NO)); document.add(new DoubleField("double", random().nextDouble(), Field.Store.NO)); document.add(new NumericDocValuesField("intdocvalues", random().nextInt())); document.add(new FloatDocValuesField("floatdocvalues", random().nextFloat())); iw.addDocument(document); } reader = iw.getReader(); iw.close(); searcher = newSearcher(reader); }
public void testIntFieldMinMax() throws Exception { Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir); int numDocs = atLeast(100); int minValue = Integer.MAX_VALUE; int maxValue = Integer.MIN_VALUE; for(int i=0;i<numDocs;i++ ){ Document doc = new Document(); int num = random().nextInt(); minValue = Math.min(num, minValue); maxValue = Math.max(num, maxValue); doc.add(new IntField("field", num, Field.Store.NO)); w.addDocument(doc); } IndexReader r = w.getReader(); Terms terms = MultiFields.getTerms(r, "field"); assertEquals(minValue, NumericUtils.getMinInt(terms)); assertEquals(maxValue, NumericUtils.getMaxInt(terms)); r.close(); w.close(); dir.close(); }
private static void createDoc_Tour( final IndexWriter indexWriter, final long tourId, final String title, final String description, final long time) throws IOException { final Document doc = new Document(); doc.add(new IntField(SEARCH_FIELD_DOC_SOURCE, DOC_SOURCE_TOUR, Store.YES)); doc.add(new LongField(SEARCH_FIELD_TOUR_ID, tourId, Store.YES)); doc.add(new LongField(SEARCH_FIELD_TIME, time, createFieldType_Long())); if (title != null) { doc.add(new Field(SEARCH_FIELD_TITLE, title, createFieldType_Text())); } if (description != null) { doc.add(new Field(SEARCH_FIELD_DESCRIPTION, description, createFieldType_Text())); } indexWriter.addDocument(doc); }
public DocState(boolean reuseFields, FieldType ft, FieldType bodyFt) { this.reuseFields = reuseFields; if (reuseFields) { fields = new HashMap<String,Field>(); numericFields = new HashMap<String,Field>(); // Initialize the map with the default fields. fields.put(BODY_FIELD, new Field(BODY_FIELD, "", bodyFt)); fields.put(TITLE_FIELD, new Field(TITLE_FIELD, "", ft)); fields.put(DATE_FIELD, new Field(DATE_FIELD, "", ft)); fields.put(ID_FIELD, new StringField(ID_FIELD, "", Field.Store.YES)); fields.put(NAME_FIELD, new Field(NAME_FIELD, "", ft)); numericFields.put(DATE_MSEC_FIELD, new LongField(DATE_MSEC_FIELD, 0L, Field.Store.NO)); numericFields.put(TIME_SEC_FIELD, new IntField(TIME_SEC_FIELD, 0, Field.Store.NO)); doc = new Document(); } else { numericFields = null; fields = null; doc = null; } }
private List<Field> getListOfUnmodifiedFieldsFromIAView(InformationAssetView iaView) { List<Field> listOfUnmodifiedFields = new ArrayList<Field>(); if (iaView.getCATDOCREF() != null) { listOfUnmodifiedFields.add(new TextField(InformationAssetViewFields.CATDOCREF.toString(), iaView .getCATDOCREF(), Field.Store.NO)); } if (iaView.getDESCRIPTION() != null) { listOfUnmodifiedFields.add(new TextField(InformationAssetViewFields.DESCRIPTION.toString(), iaView .getDESCRIPTION(), Field.Store.NO)); } if (iaView.getTITLE() != null) { listOfUnmodifiedFields.add(new TextField(InformationAssetViewFields.TITLE.toString(), iaView.getTITLE(), Field.Store.NO)); } if (iaView.getSOURCE() != null) { listOfUnmodifiedFields.add(new IntField(InformationAssetViewFields.SOURCE.toString(), Integer .parseInt(iaView.getSOURCE()), Field.Store.NO)); } return listOfUnmodifiedFields; }
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { int pmid = BlueCasUtil.getHeaderIntDocId(jCas); if (!BlueCasUtil.isEmptyText(jCas)) { // System.out.println("indexing:: " + pmid); Document doc = new Document(); doc.add(new IntField(PMID_FIELD, pmid, Store.YES)); doc.add(new TextField(CONTENT_FIELD, jCas.getDocumentText(), Store.YES)); doc.add(new TextField(TITLE_FIELD, getTitle(jCas), Store.YES)); try { indexWriter.addDocument(doc); } catch (IOException e) { throw new AnalysisEngineProcessException(e); } } }
@Override public void configure(String fieldNameForThisInstance, Map<String, String> properties, Configuration configuration) { String precisionStepStr = properties.get(NUMERIC_PRECISION_STEP); if (precisionStepStr != null) { _precisionStep = Integer.parseInt(precisionStepStr); _typeStored = new FieldType(IntField.TYPE_STORED); _typeStored.setNumericPrecisionStep(_precisionStep); _typeStored.freeze(); _typeNotStored = new FieldType(IntField.TYPE_NOT_STORED); _typeNotStored.setNumericPrecisionStep(_precisionStep); _typeNotStored.freeze(); } else { _typeStored = IntField.TYPE_STORED; _typeNotStored = IntField.TYPE_NOT_STORED; } }
public void addLine(int lineNumber, String text) { if (loadExistingIndex) { throw new IllegalStateException("Cannot add lines to an already existing index."); } Document luceneDocument = new Document(); luceneDocument.add(new IntField(LINE_NUMBER, (int) (lineNumber - 1), Field.Store.YES)); luceneDocument.add(new TextField(LINE_TEXT, text, Field.Store.NO)); try { synchronized (this) { luceneIndexWriter.addDocument(luceneDocument); } } catch (IOException e) { logger.warn(e.getMessage(), e); } }
@Override public void setUp() throws Exception { super.setUp(); dir = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), dir); int numDocs = _TestUtil.nextInt(random(), 2049, 4000); for (int i = 0; i < numDocs; i++) { Document document = new Document(); document.add(newTextField("english", English.intToEnglish(i), Field.Store.NO)); document.add(newTextField("oddeven", (i % 2 == 0) ? "even" : "odd", Field.Store.NO)); document.add(newStringField("byte", "" + ((byte) random().nextInt()), Field.Store.NO)); document.add(newStringField("short", "" + ((short) random().nextInt()), Field.Store.NO)); document.add(new IntField("int", random().nextInt(), Field.Store.NO)); document.add(new LongField("long", random().nextLong(), Field.Store.NO)); document.add(new FloatField("float", random().nextFloat(), Field.Store.NO)); document.add(new DoubleField("double", random().nextDouble(), Field.Store.NO)); document.add(new NumericDocValuesField("intdocvalues", random().nextInt())); document.add(new FloatDocValuesField("floatdocvalues", random().nextFloat())); iw.addDocument(document); } reader = iw.getReader(); iw.close(); searcher = newSearcher(reader); }
void addToDoc(Document doc, Integer value){ Preconditions.checkArgument(valueType == Integer.class); if(value == null){ return; } doc.add(new IntField(indexFieldName, value, stored ? Store.YES : Store.NO)); if(isSorted()){ Preconditions.checkArgument(sortedValueType == SearchFieldSorting.FieldType.INTEGER); doc.add(new NumericDocValuesField(indexFieldName, (long)value)); } }
public Field getField(ValueSource value) { if (value.isNull()) return null; Field.Store store = Field.Store.NO; // Only store hkey. switch (fieldType) { case INT: switch (TInstance.underlyingType(value.getType())) { case INT_8: return new IntField(name, value.getInt8(), store); case INT_16: return new IntField(name, value.getInt16(), store); case UINT_16: return new IntField(name, value.getUInt16(), store); case INT_32: default: return new IntField(name, value.getInt32(), store); } case LONG: return new LongField(name, value.getInt64(), store); case FLOAT: return new FloatField(name, value.getFloat(), store); case DOUBLE: return new DoubleField(name, value.getDouble(), store); case STRING: switch (TInstance.underlyingType(value.getType())) { case STRING: return new StringField(name, value.getString(), store); default: { StringBuilder str = new StringBuilder(); value.getType().format(value, AkibanAppender.of(str)); return new StringField(name, str.toString(), store); } } case TEXT: return new TextField(name, value.getString(), store); default: return null; } }
/** * add lucene field in document * @param doc document * @param name fieldName * @param val value * @param type field original Type * @param store store * @param textFieldable isTextField * @return if true, added document */ private boolean addField(Document doc, String name, Object val, Type type, Store store, TextFieldable textFieldable, NoIndex noIndex) { boolean add = true; if (noIndex != null) { if (type == Character.class || type == char.class) { val = (int) val; } else if(type == Boolean.class || type == boolean.class) { val = (boolean)val ? 1 : 0; } doc.add(new StoredField(name, val.toString())); } else if (textFieldable != null) { doc.add(new TextField(name, val.toString(), store)); } else if(type == String.class) { doc.add(new StringField(name, val.toString(), store)); }else if (type == Double.class || type == double.class) { doc.add(new DoubleField(name, (double) val, store)); } else if(type == Float.class || type == float.class) { doc.add(new FloatField(name, (float) val, store)); } else if(type == Short.class || type == short.class || type == Integer.class || type == int.class || type == Byte.class || type == byte.class) { doc.add(new IntField(name, Integer.valueOf(val.toString()), store)); } else if(type == Character.class || type == char.class) { doc.add(new IntField(name, Integer.valueOf((char)val), store)); } else if(type == Boolean.class || type == boolean.class) { if ((boolean)val) { doc.add(new IntField(name, 1, store)); } else { doc.add(new IntField(name, 0, store)); } } else if(type == Long.class || type == long.class) { doc.add(new LongField(name, (long) val, store)); } else { add = false; } return add; }
@Override public void setUp() throws Exception { super.setUp(); a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET); dir = newDirectory(); ramDir = newDirectory(); IndexWriter writer = new IndexWriter(ramDir, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET))); for (String text : texts) { addDoc(writer, text); } Document doc = new Document(); doc.add(new IntField(NUMERIC_FIELD_NAME, 1, Field.Store.NO)); doc.add(new StoredField(NUMERIC_FIELD_NAME, 1)); writer.addDocument(doc, analyzer); doc = new Document(); doc.add(new IntField(NUMERIC_FIELD_NAME, 3, Field.Store.NO)); doc.add(new StoredField(NUMERIC_FIELD_NAME, 3)); writer.addDocument(doc, analyzer); doc = new Document(); doc.add(new IntField(NUMERIC_FIELD_NAME, 5, Field.Store.NO)); doc.add(new StoredField(NUMERIC_FIELD_NAME, 5)); writer.addDocument(doc, analyzer); doc = new Document(); doc.add(new IntField(NUMERIC_FIELD_NAME, 7, Field.Store.NO)); doc.add(new StoredField(NUMERIC_FIELD_NAME, 7)); writer.addDocument(doc, analyzer); writer.forceMerge(1); writer.close(); reader = DirectoryReader.open(ramDir); numHighlights = 0; }
@Override public int doLogic() throws Exception { List<IndexableField> fields = doc.getFields(); Analyzer analyzer = getRunData().getAnalyzer(); int tokenCount = 0; for(final IndexableField field : fields) { if (!field.fieldType().tokenized() || field instanceof IntField || field instanceof LongField || field instanceof FloatField || field instanceof DoubleField) { continue; } final TokenStream stream = field.tokenStream(analyzer, null); // reset the TokenStream to the first token stream.reset(); TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); while(stream.incrementToken()) { termAtt.fillBytesRef(); tokenCount++; } stream.end(); stream.close(); } totalTokenCount += tokenCount; return tokenCount; }
@BeforeClass public static void beforeClass() throws Exception { // TODO: rewrite test (this needs to set QueryParser.enablePositionIncrements, too, for work with CURRENT): Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET); //initialize the parser builder = new CorePlusExtensionsParser("contents", analyzer); BufferedReader d = new BufferedReader(new InputStreamReader( TestParser.class.getResourceAsStream("reuters21578.txt"), StandardCharsets.US_ASCII)); dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(analyzer)); String line = d.readLine(); while (line != null) { int endOfDate = line.indexOf('\t'); String date = line.substring(0, endOfDate).trim(); String content = line.substring(endOfDate).trim(); Document doc = new Document(); doc.add(newTextField("date", date, Field.Store.YES)); doc.add(newTextField("contents", content, Field.Store.YES)); doc.add(new IntField("date2", Integer.valueOf(date), Field.Store.NO)); writer.addDocument(doc); line = d.readLine(); } d.close(); writer.close(); reader = DirectoryReader.open(dir); searcher = newSearcher(reader); }
/** Tests NumericRangeQuery on a multi-valued field (multiple numeric values per document). * This test ensures, that a classical TermRangeQuery returns exactly the same document numbers as * NumericRangeQuery (see SOLR-1322 for discussion) and the multiple precision terms per numeric value * do not interfere with multiple numeric values. */ public void testMultiValuedNRQ() throws Exception { Directory directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(new MockAnalyzer(random())) .setMaxBufferedDocs(TestUtil.nextInt(random(), 50, 1000))); DecimalFormat format = new DecimalFormat("00000000000", new DecimalFormatSymbols(Locale.ROOT)); int num = atLeast(500); for (int l = 0; l < num; l++) { Document doc = new Document(); for (int m=0, c=random().nextInt(10); m<=c; m++) { int value = random().nextInt(Integer.MAX_VALUE); doc.add(newStringField("asc", format.format(value), Field.Store.NO)); doc.add(new IntField("trie", value, Field.Store.NO)); } writer.addDocument(doc); } IndexReader reader = writer.getReader(); writer.close(); IndexSearcher searcher=newSearcher(reader); num = atLeast(50); for (int i = 0; i < num; i++) { int lower=random().nextInt(Integer.MAX_VALUE); int upper=random().nextInt(Integer.MAX_VALUE); if (lower>upper) { int a=lower; lower=upper; upper=a; } TermRangeQuery cq=TermRangeQuery.newStringRange("asc", format.format(lower), format.format(upper), true, true); NumericRangeQuery<Integer> tq=NumericRangeQuery.newIntRange("trie", lower, upper, true, true); TopDocs trTopDocs = searcher.search(cq, 1); TopDocs nrTopDocs = searcher.search(tq, 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", trTopDocs.totalHits, nrTopDocs.totalHits ); } reader.close(); directory.close(); }
private void addDoc(RandomIndexWriter w, Collection<String> terms, Map<BytesRef,Integer> termToID, int id) throws IOException { Document doc = new Document(); doc.add(new IntField("id", id, Field.Store.NO)); if (VERBOSE) { System.out.println("TEST: addDoc id:" + id + " terms=" + terms); } for (String s2 : terms) { doc.add(newStringField("f", s2, Field.Store.NO)); termToID.put(new BytesRef(s2), id); } w.addDocument(doc); terms.clear(); }
@Override public void intField(FieldInfo fieldInfo, int value) { FieldType ft = new FieldType(IntField.TYPE_NOT_STORED); ft.setStored(true); ft.setIndexed(fieldInfo.isIndexed()); doc.add(new IntField(fieldInfo.name, value, ft)); }
private static void createDoc_Marker( final IndexWriter indexWriter, final long markerId, final long tourId, final String title, final String description, final long time) throws IOException { // private static final FieldType _longSearchField = new FieldType(LongField.TYPE_STORED); // private static final FieldType _textSearchField = new FieldType(TextField.TYPE_STORED); // { // _longSearchField.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); // _textSearchField.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); // } final Document doc = new Document(); doc.add(new IntField(SEARCH_FIELD_DOC_SOURCE, DOC_SOURCE_TOUR_MARKER, Store.YES)); doc.add(new LongField(SEARCH_FIELD_MARKER_ID, markerId, Store.YES)); doc.add(new LongField(SEARCH_FIELD_TOUR_ID, tourId, Store.YES)); doc.add(new LongField(SEARCH_FIELD_TIME, time, createFieldType_Long())); if (title != null) { doc.add(new Field(SEARCH_FIELD_TITLE, title, createFieldType_Text())); } if (description != null) { doc.add(new Field(SEARCH_FIELD_DESCRIPTION, description, createFieldType_Text())); } indexWriter.addDocument(doc); }
private static void createDoc_WayPoint( final IndexWriter indexWriter, final long markerId, final long tourId, final String title, final String description, final long time) throws IOException { final Document doc = new Document(); doc.add(new IntField(SEARCH_FIELD_DOC_SOURCE, DOC_SOURCE_WAY_POINT, Store.YES)); doc.add(new LongField(SEARCH_FIELD_MARKER_ID, markerId, Store.YES)); doc.add(new LongField(SEARCH_FIELD_TOUR_ID, tourId, Store.YES)); if (time != 0) { doc.add(new LongField(SEARCH_FIELD_TIME, time, createFieldType_Long())); } if (title != null) { doc.add(new Field(SEARCH_FIELD_TITLE, title, createFieldType_Text())); } if (description != null) { doc.add(new Field(SEARCH_FIELD_DESCRIPTION, description, createFieldType_Text())); } indexWriter.addDocument(doc); }
@Override public Long call() throws Exception { IndexedGenerationCallable latch = componentNidLatch.remove(chronicle.getNid()); Document doc = new Document(); doc.add(new IntField(ComponentProperty.COMPONENT_ID.name(), chronicle.getNid(), LuceneIndexer.indexedComponentNidType)); addFields(chronicle, doc); // Note that the addDocument operation could cause duplicate documents to be // added to the index if a new luceneVersion is added after initial index // creation. It does this to avoid the performance penalty of // finding and deleting documents prior to inserting a new one. // // At this point, the number of duplicates should be // small, and we are willing to accept a small number of duplicates // because the new versions are additive (we don't allow deletion of content) // so the search results will be the same. Duplicates can be removed // by regenerating the index. long indexGeneration = trackingIndexWriter.addDocument(doc); if (latch != null) { latch.setIndexGeneration(indexGeneration); } return indexGeneration; }
public void indexDocs(File file) throws JDOMException, IOException { MedlineCitationSetReader reader = new MedlineCitationSetReader(file); while (reader.hasNext()) { MedlineCitation citation = reader.next(); Document doc = new Document(); doc.add(new IntField(PMID_FIELD, citation.getPmid(), Field.Store.YES)); doc.add(new TextField(ARTICLE_TITLE_FIELD, citation.getArticleTitle(), Field.Store.YES)); doc.add(new TextField(ABSTRACT_TEXT_FIELD, citation.getAbstractText(), Field.Store.YES)); writer.addDocument(doc); } writer.commit(); }
/** * Creates a Document containing contents and metadata for a specific page of a file * @param writer The writer used to save the metadata * @param file The file that the page belongs to * @param page The index of the page in the file * @param contents The string contents of the file */ public static void Build(IndexWriter writer, File file, int page, String contents) { if(file.canRead()) { try { //Log.i(TAG, "Started Indexing file: " + file.getName() + " " // + page); Document doc = new Document(); doc.add(new StringField("id", file.getPath() + ":" + page, Field.Store.NO)); doc.add(new StringField("path", file.getPath(), Field.Store.YES)); doc.add(new LongField("modified", file.lastModified(), Field.Store.YES)); // for(int i = 0; i < contents.size(); i++){ doc.add(new TextField("text", "" + contents, Field.Store.YES)); doc.add(new IntField("page", page, Field.Store.YES)); // } // TODO - Check what OpenMode.CREATE_OR_APPEND does; I think updateDocument should // always be used with CREATE_OR_APPEND, the if part may need to be removed if(writer.getConfig().getOpenMode() == OpenMode.CREATE) { writer.addDocument(doc); } else { // TODO - Test UpdateDocument writer.updateDocument(new Term("id", file.getPath() + ":" + page), doc); } Log.i(TAG, "Done Indexing file: " + file.getName() + " " + page); } catch(Exception e) { Log.e(TAG, "Error ", e); } } }
/** * Creates the metadata Document for a given file * @param filename The path of the file that the metadata will describe * @param pages The number of pages in the file; -1 if it's contents are not indexed * @return 0 upon successful index creation; -1 on error */ public int buildIndex(String filename, int pages) { try { //Log.i(TAG, "Writing Metadata"); Document doc = new Document(); File file = new File(filename); doc.add(new StringField("id", file.getPath() + ":meta", Field.Store.NO)); doc.add(new LongField("modified", file.lastModified(), Field.Store.YES)); doc.add(new StringField("path", file.getAbsolutePath(), Field.Store.YES)); if(pages != -1) { doc.add(new IntField("pages", pages, Field.Store.YES)); } if(writer.getConfig().getOpenMode() == OpenMode.CREATE) { writer.addDocument(doc); } else { writer.updateDocument(new Term("id", file.getPath() + ":meta"), doc); } //Log.i(TAG, "Done creating metadata for file " + filename); // Must only call ForceMerge and Commit once per document as they are very resource heavy operations writer.commit(); } catch(Exception e) { Log.e(TAG, "Error", e); return -1; } return 0; }
@Override public void setUp() throws Exception { super.setUp(); a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true); dir = newDirectory(); ramDir = newDirectory(); IndexWriter writer = new IndexWriter(ramDir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true))); for (String text : texts) { addDoc(writer, text); } Document doc = new Document(); doc.add(new IntField(NUMERIC_FIELD_NAME, 1, Field.Store.NO)); doc.add(new StoredField(NUMERIC_FIELD_NAME, 1)); writer.addDocument(doc, analyzer); doc = new Document(); doc.add(new IntField(NUMERIC_FIELD_NAME, 3, Field.Store.NO)); doc.add(new StoredField(NUMERIC_FIELD_NAME, 3)); writer.addDocument(doc, analyzer); doc = new Document(); doc.add(new IntField(NUMERIC_FIELD_NAME, 5, Field.Store.NO)); doc.add(new StoredField(NUMERIC_FIELD_NAME, 5)); writer.addDocument(doc, analyzer); doc = new Document(); doc.add(new IntField(NUMERIC_FIELD_NAME, 7, Field.Store.NO)); doc.add(new StoredField(NUMERIC_FIELD_NAME, 7)); writer.addDocument(doc, analyzer); writer.forceMerge(1); writer.close(); reader = DirectoryReader.open(ramDir); numHighlights = 0; }
@Override public int doLogic() throws Exception { List<IndexableField> fields = doc.getFields(); Analyzer analyzer = getRunData().getAnalyzer(); int tokenCount = 0; for(final IndexableField field : fields) { if (!field.fieldType().tokenized() || field instanceof IntField || field instanceof LongField || field instanceof FloatField || field instanceof DoubleField) { continue; } final TokenStream stream = field.tokenStream(analyzer); // reset the TokenStream to the first token stream.reset(); TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); while(stream.incrementToken()) { termAtt.fillBytesRef(); tokenCount++; } } totalTokenCount += tokenCount; return tokenCount; }
private Document newSampleDocument(int id, Shape... shapes) { Document doc = new Document(); doc.add(new IntField("id", id, Field.Store.YES)); //Potentially more than one shape in this field is supported by some // strategies; see the javadocs of the SpatialStrategy impl to see. for (Shape shape : shapes) { for (IndexableField f : strategy.createIndexableFields(shape)) { doc.add(f); } //store it too; the format is up to you doc.add(new StoredField(strategy.getFieldName(), ctx.toString(shape))); } return doc; }
@BeforeClass public static void beforeClass() throws Exception { // TODO: rewrite test (this needs to set QueryParser.enablePositionIncrements, too, for work with CURRENT): Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET, false); //initialize the parser builder = new CorePlusExtensionsParser("contents", analyzer); BufferedReader d = new BufferedReader(new InputStreamReader( TestParser.class.getResourceAsStream("reuters21578.txt"), "US-ASCII")); dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(Version.LUCENE_40, analyzer)); String line = d.readLine(); while (line != null) { int endOfDate = line.indexOf('\t'); String date = line.substring(0, endOfDate).trim(); String content = line.substring(endOfDate).trim(); Document doc = new Document(); doc.add(newTextField("date", date, Field.Store.YES)); doc.add(newTextField("contents", content, Field.Store.YES)); doc.add(new IntField("date2", Integer.valueOf(date), Field.Store.NO)); writer.addDocument(doc); line = d.readLine(); } d.close(); writer.close(); reader = DirectoryReader.open(dir); searcher = newSearcher(reader); }
@Test(expected=IllegalArgumentException.class) public void testDeletePartiallyWrittenFilesIfAbort() throws IOException { // disable CFS because this test checks file names iwConf.setMergePolicy(newLogMergePolicy(false)); iw.close(); iw = new RandomIndexWriter(random(), dir, iwConf); final Document validDoc = new Document(); validDoc.add(new IntField("id", 0, Store.YES)); iw.addDocument(validDoc); iw.commit(); // make sure that #writeField will fail to trigger an abort final Document invalidDoc = new Document(); FieldType fieldType = new FieldType(); fieldType.setStored(true); invalidDoc.add(new Field("invalid", fieldType) { @Override public String stringValue() { return null; } }); try { iw.addDocument(invalidDoc); iw.commit(); } finally { int counter = 0; for (String fileName : dir.listAll()) { if (fileName.endsWith(".fdt") || fileName.endsWith(".fdx")) { counter++; } } // Only one .fdt and one .fdx files must have been found assertEquals(2, counter); } }
/** Tests NumericRangeQuery on a multi-valued field (multiple numeric values per document). * This test ensures, that a classical TermRangeQuery returns exactly the same document numbers as * NumericRangeQuery (see SOLR-1322 for discussion) and the multiple precision terms per numeric value * do not interfere with multiple numeric values. */ public void testMultiValuedNRQ() throws Exception { Directory directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setMaxBufferedDocs(_TestUtil.nextInt(random(), 50, 1000))); DecimalFormat format = new DecimalFormat("00000000000", new DecimalFormatSymbols(Locale.ROOT)); int num = atLeast(500); for (int l = 0; l < num; l++) { Document doc = new Document(); for (int m=0, c=random().nextInt(10); m<=c; m++) { int value = random().nextInt(Integer.MAX_VALUE); doc.add(newStringField("asc", format.format(value), Field.Store.NO)); doc.add(new IntField("trie", value, Field.Store.NO)); } writer.addDocument(doc); } IndexReader reader = writer.getReader(); writer.close(); IndexSearcher searcher=newSearcher(reader); num = atLeast(50); for (int i = 0; i < num; i++) { int lower=random().nextInt(Integer.MAX_VALUE); int upper=random().nextInt(Integer.MAX_VALUE); if (lower>upper) { int a=lower; lower=upper; upper=a; } TermRangeQuery cq=TermRangeQuery.newStringRange("asc", format.format(lower), format.format(upper), true, true); NumericRangeQuery<Integer> tq=NumericRangeQuery.newIntRange("trie", lower, upper, true, true); TopDocs trTopDocs = searcher.search(cq, 1); TopDocs nrTopDocs = searcher.search(tq, 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", trTopDocs.totalHits, nrTopDocs.totalHits ); } reader.close(); directory.close(); }