public StandardQueryConfigHandler() { // Add listener that will build the FieldConfig. addFieldConfigListener(new FieldBoostMapFCListener(this)); addFieldConfigListener(new FieldDateResolutionFCListener(this)); addFieldConfigListener(new NumericFieldConfigListener(this)); // Default Values set(ConfigurationKeys.ALLOW_LEADING_WILDCARD, false); // default in 2.9 set(ConfigurationKeys.ANALYZER, null); //default value 2.4 set(ConfigurationKeys.DEFAULT_OPERATOR, Operator.OR); set(ConfigurationKeys.PHRASE_SLOP, 0); //default value 2.4 set(ConfigurationKeys.LOWERCASE_EXPANDED_TERMS, true); //default value 2.4 set(ConfigurationKeys.ENABLE_POSITION_INCREMENTS, false); //default value 2.4 set(ConfigurationKeys.FIELD_BOOST_MAP, new LinkedHashMap<String, Float>()); set(ConfigurationKeys.FUZZY_CONFIG, new FuzzyConfig()); set(ConfigurationKeys.LOCALE, Locale.getDefault()); set(ConfigurationKeys.MULTI_TERM_REWRITE_METHOD, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT); set(ConfigurationKeys.FIELD_DATE_RESOLUTION_MAP, new HashMap<CharSequence, DateTools.Resolution>()); }
@Override public void buildFieldConfig(FieldConfig fieldConfig) { DateTools.Resolution dateRes = null; Map<CharSequence, DateTools.Resolution> dateResMap = this.config.get(ConfigurationKeys.FIELD_DATE_RESOLUTION_MAP); if (dateResMap != null) { dateRes = dateResMap.get( fieldConfig.getField()); } if (dateRes == null) { dateRes = this.config.get(ConfigurationKeys.DATE_RESOLUTION); } if (dateRes != null) { fieldConfig.set(ConfigurationKeys.DATE_RESOLUTION, dateRes); } }
/** * Returns the date resolution that is used by RangeQueries for the given field. * Returns null, if no default or field specific date resolution has been set * for the given field. * */ public DateTools.Resolution getDateResolution(String fieldName) { if (fieldName == null) { throw new IllegalArgumentException("Field cannot be null."); } if (fieldToDateResolution == null) { // no field specific date resolutions set; return default date resolution instead return this.dateResolution; } DateTools.Resolution resolution = fieldToDateResolution.get(fieldName); if (resolution == null) { // no date resolutions set for the given field; return default date resolution instead resolution = this.dateResolution; } return resolution; }
/** * Returns the date resolution that is used by RangeQueries for the given field. Returns null, if no default or field specific date * resolution has been set for the given field. * */ public DateTools.Resolution getDateResolution(String fieldName) { if (fieldName == null) { throw new IllegalArgumentException("Field cannot be null."); } if (fieldToDateResolution == null) { // no field specific date resolutions set; return default date resolution instead return this.dateResolution; } DateTools.Resolution resolution = fieldToDateResolution.get(fieldName); if (resolution == null) { // no date resolutions set for the given field; return default date resolution instead resolution = this.dateResolution; } return resolution; }
public static Document Document(File f) throws java.io.FileNotFoundException { Document doc = new Document(); doc.add(new StoredField("path", f.getPath())); doc.add(new StoredField("modified", DateTools.timeToString(f.lastModified(), DateTools.Resolution.MINUTE))); //create new FieldType to store term positions (TextField is not sufficiently configurable) FieldType ft = new FieldType(); ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); ft.setTokenized(true); ft.setStoreTermVectors(true); ft.setStoreTermVectorPositions(true); Field contentsField = new Field("contents", new FileReader(f), ft); doc.add(contentsField); return doc; }
/** * Typically the value itself converted to string * But there are some exceptions where the toString() * doesn't work as expected. * It should be implemented * specific to the lucene requirement to be indexable * set the date according to offset from the GMT * see http://www.gossamer-threads.com/lists/lucene/java-user/39303?search_string=DateTools;#39303 * @param value * @return */ public static String getLuceneDateValue(Object value) { Calendar cal = new GregorianCalendar(); int minutesOffset = (cal.get(Calendar.ZONE_OFFSET) + cal.get(Calendar.DST_OFFSET)) / (60 * 1000); if (value!=null) { Date dateValue = null; try { dateValue = (Date)value; } catch (Exception e) { LOGGER.error("The type of the lucene value is " + value.getClass().getName() + ". Casting it to Date failed with " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); } if (dateValue!=null) { cal.setTime(dateValue); cal.add(Calendar.MINUTE, minutesOffset); return DateTools.dateToString(cal.getTime(), DateTools.Resolution.DAY); } } return null; }
/** * Transforms the user entered date string to lucene date string format * by trying to reconstruct the Date object either by local or by ISO (yyy-MM-dd) * DateTools calculates in GMT (comparing to the server TimeZone), so it should be adjusted * @param originalFieldValue * @param locale * @return */ private static String transformDateFields(String originalFieldValue, Locale locale) { String dateString = originalFieldValue; Date date; //DateTimeUtils dtu = new DateTimeUtils(locale); date = DateTimeUtils.getInstance().parseGUIDate(originalFieldValue, locale); if (date==null) { date = DateTimeUtils.getInstance().parseShortDate(originalFieldValue, locale); } //set the date according to offset from the GMT //see http://www.gossamer-threads.com/lists/lucene/java-user/39303?search_string=DateTools;#39303 Calendar cal = new GregorianCalendar(); int minutesOffset = (cal.get(Calendar.ZONE_OFFSET) + cal.get(Calendar.DST_OFFSET)) / (60 * 1000); if (date!=null) { cal.setTime(date); cal.add(Calendar.MINUTE, minutesOffset); return DateTools.dateToString(cal.getTime(), Resolution.DAY); } date = DateTimeUtils.getInstance().parseISODate(originalFieldValue); if (date!=null) { cal.setTime(date); cal.add(Calendar.MINUTE, minutesOffset); return DateTools.dateToString(cal.getTime(), Resolution.DAY); } return dateString; }
/** * Creates Lucene document with the fields: * <ul> * <li>path: relative path from the constructor</li> * <li>id: the same as path</li> * <li>modified: last modified date of the file</li> * <li>filesize: size of the file</li> * <li>title: name of the file</li> * </ul> * @return New Lucene document. */ @Override public Document createDocument() { Document doc = new Document(); doc.add(new StringField("path", path, Field.Store.YES)); doc.add(new StringField("id", path, Field.Store.YES)); doc.add(new StringField("modified", DateTools.timeToString(file.lastModified(), DateTools.Resolution.MINUTE), Field.Store.YES)); doc.add(new LongField("filesize", file.length(), Field.Store.YES)); doc.add(new TextField("title", file.getName(), Field.Store.YES)); return doc; }
/** * Returns the date resolution that is used by RangeQueries for the given * field. Returns null, if no default or field specific date resolution has * been set for the given field. * */ public DateTools.Resolution getDateResolution(String fieldName) { if (fieldName == null) { throw new IllegalArgumentException("Field must not be null."); } if (fieldToDateResolution == null) { // no field specific date resolutions set; return default date // resolution instead return this.dateResolution; } DateTools.Resolution resolution = fieldToDateResolution.get(fieldName); if (resolution == null) { // no date resolutions set for the given field; return default date // resolution instead resolution = this.dateResolution; } return resolution; }
/** * Indexes a single conversation. * * @param writer the index modifier. * @param conversationID the ID of the conversation to index. * @param external true if the conversation has a participant from an external server. * @param date the date the conversation was started. * @param jids the JIDs of the users in the conversation. * @param text the full text of the conversation. * @throws IOException if an IOException occurs. */ private void indexDocument(IndexModifier writer, long conversationID, boolean external, long date, Set<String> jids, String text) throws IOException { Document document = new Document(); document.add(new Field("conversationID", String.valueOf(conversationID), Field.Store.YES, Field.Index.UN_TOKENIZED)); document.add(new Field("external", String.valueOf(external), Field.Store.YES, Field.Index.UN_TOKENIZED)); document.add(new Field("date", DateTools.timeToString(date, DateTools.Resolution.DAY), Field.Store.YES, Field.Index.UN_TOKENIZED)); for (String jid : jids) { document.add(new Field("jid", jid, Field.Store.YES, Field.Index.TOKENIZED)); } document.add(new Field("text", text, Field.Store.NO, Field.Index.TOKENIZED)); writer.addDocument(document); }
private SearchResult createSearchResult(Document doc, float score, int hitId, int totalHits) throws ParseException { SearchResult result = new SearchResult(); result.hitId = hitId; result.totalHits = totalHits; result.score = score; result.date = DateTools.stringToDate(doc.get(FIELD_DATE)); result.summary = doc.get(FIELD_SUMMARY); result.author = doc.get(FIELD_AUTHOR); result.committer = doc.get(FIELD_COMMITTER); result.type = SearchObjectType.fromName(doc.get(FIELD_OBJECT_TYPE)); result.branch = doc.get(FIELD_BRANCH); result.commitId = doc.get(FIELD_COMMIT); result.path = doc.get(FIELD_PATH); if (doc.get(FIELD_TAG) != null) { result.tags = StringUtils.getStringsFromValue(doc.get(FIELD_TAG)); } return result; }
/** * Converts a string produced by {@link #timeToString} or {@link #dateToString} back to a time, represented * as a Date object. Is also able to parse dates encoded in the old Lucene 1.x DateField format, for * compatibility with old indexes (this functionality will go away in a future release). * * @param dateString A string produced by timeToString or dateToString * @return The parsed time as a Date object * @exception ParseException If parse error */ public final static Date stringToDate(String dateString) throws ParseException { try { return DateTools.stringToDate(dateString); } catch (ParseException pe) { // Handle dates encoded in the Lucene 1.x format, for compatibility with old indexes try { // This method will go away in a future release of Lucene... return DateField.stringToDate(dateString); } catch (Throwable t) { throw new ParseException("Unable to parse date string: " + t.getMessage(), 0); } } }
/** * Converts a string produced by {@link #timeToString} or {@link #dateToString} back to a time, represented * as the number of milliseconds since January 1, 1970, 00:00:00 GMT. Is also able to parse dates encoded in * the old Lucene 1.x DateField format, for compatibility with old indexes (this functionality will go away * in a future release). * * @param dateString A string produced by timeToString or dateToString * @return The number of milliseconds since January 1, 1970, 00:00:00 GMT * @exception ParseException If parse error */ public final static long stringToTime(String dateString) throws ParseException { try { return DateTools.stringToTime(dateString); } catch (ParseException pe) { // Handle dates encoded in the Lucene 1.x format, for compatibility with old indexes try { // This method will go away in a future release of Lucene... return DateField.stringToTime(dateString); } catch (Throwable t) { throw new ParseException("Unable to parse date string: " + t.getMessage(), 0); } } }
/** * Sets the date resolution used by RangeQueries for a specific field. * * @param fieldName field for which the date resolution is to be set * @param dateResolution date resolution to set */ public void setDateResolution(String fieldName, DateTools.Resolution dateResolution) { if (fieldName == null) { throw new IllegalArgumentException("Field cannot be null."); } if (fieldToDateResolution == null) { // lazily initialize HashMap fieldToDateResolution = new HashMap<>(); } fieldToDateResolution.put(fieldName, dateResolution); }
/** * Add a row to the index. * * @param row the row * @param commitIndex whether to commit the changes to the Lucene index */ protected void insert(Object[] row, boolean commitIndex) throws SQLException { String query = getQuery(row); Document doc = new Document(); doc.add(new Field(LUCENE_FIELD_QUERY, query, Field.Store.YES, Field.Index.NOT_ANALYZED)); long time = System.currentTimeMillis(); doc.add(new Field(LUCENE_FIELD_MODIFIED, DateTools.timeToString(time, DateTools.Resolution.SECOND), Field.Store.YES, Field.Index.NOT_ANALYZED)); StatementBuilder buff = new StatementBuilder(); for (int index : indexColumns) { String columnName = columns[index]; String data = asString(row[index], columnTypes[index]); // column names that start with _ // must be escaped to avoid conflicts // with internal field names (_DATA, _QUERY, _modified) if (columnName.startsWith(LUCENE_FIELD_COLUMN_PREFIX)) { columnName = LUCENE_FIELD_COLUMN_PREFIX + columnName; } doc.add(new Field(columnName, data, Field.Store.NO, Field.Index.ANALYZED)); buff.appendExceptFirst(" "); buff.append(data); } Field.Store storeText = STORE_DOCUMENT_TEXT_IN_INDEX ? Field.Store.YES : Field.Store.NO; doc.add(new Field(LUCENE_FIELD_DATA, buff.toString(), storeText, Field.Index.ANALYZED)); try { indexAccess.writer.addDocument(doc); if (commitIndex) { commitIndex(); } } catch (IOException e) { throw convertException(e); } }
public static String uid(File f) { // Append path and date into a string in such a way that lexicographic // sorting gives the same results as a walk of the file hierarchy. Thus // null (\u0000) is used both to separate directory components and to // separate the path from the date. return f.getPath().replace(dirSep, '\u0000') + "\u0000" + DateTools.timeToString(f.lastModified(), DateTools.Resolution.SECOND); }
/** * Sets the date resolution used by RangeQueries for a specific field. * * @param fieldName field for which the date resolution is to be set * @param dateResolution date resolution to set */ public void setDateResolution(String fieldName, DateTools.Resolution dateResolution) { if (fieldName == null) { throw new IllegalArgumentException("Field cannot be null."); } if (fieldToDateResolution == null) { // lazily initialize HashMap fieldToDateResolution = new HashMap<String, DateTools.Resolution>(); } fieldToDateResolution.put(fieldName, dateResolution); }
/** * @exception ParseException throw in overridden method to disallow */ protected Query getRangeQuery(String field, String part1, String part2, boolean inclusive) throws ParseException { if (lowercaseExpandedTerms) { part1 = part1.toLowerCase(); part2 = part2.toLowerCase(); } try { DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, locale); df.setLenient(true); Date d1 = df.parse(part1); Date d2 = df.parse(part2); if (inclusive) { // The user can only specify the date, not the time, so make sure // the time is set to the latest possible time of that date to really // include all documents: Calendar cal = Calendar.getInstance(locale); cal.setTime(d2); cal.set(Calendar.HOUR_OF_DAY, 23); cal.set(Calendar.MINUTE, 59); cal.set(Calendar.SECOND, 59); cal.set(Calendar.MILLISECOND, 999); d2 = cal.getTime(); } DateTools.Resolution resolution = getDateResolution(field); if (resolution == null) { // no default or field specific date resolution has been set, // use deprecated DateField to maintain compatibility with // pre-1.9 Lucene versions. part1 = DateField.dateToString(d1); part2 = DateField.dateToString(d2); } else { part1 = DateTools.dateToString(d1, resolution); part2 = DateTools.dateToString(d2, resolution); } } catch (Exception e) { } return newRangeQuery(field, part1, part2, inclusive); }
/** * Makes a document for a File. * <p> * The document has three fields: * <ul> * <li><code>path</code> containing the pathname of the file, as a * stored, tokenized field; * <li><code>modified</code> containing the last modified date of the * file as a keyword field as encoded by <a * href="lucene.document.DateField.html">DateField</a>; and * <li><code>contents</code> containing the full contents of the file, as * a Reader field; * </ul> */ public static Document Document( File f, String notebookLabel, String conceptLabel, String conceptUri) throws java.io.FileNotFoundException { // make a new, empty lucene document Document doc = new Document(); // no assemble the document from fields - some of them will be searchable, // others will be available in the result (as document attributes) i.e. stored in the index Field field; // concept URI as attribute - used to delete the document field = new Field("uri", conceptUri, Field.Store.YES, Field.Index.UN_TOKENIZED); doc.add(field); // path as attribute field = new Field("path", f.getPath(), Field.Store.YES, Field.Index.NO); doc.add(field); // SEARCHABLE concept label field = new Field("conceptLabel", conceptLabel, Field.Store.YES, Field.Index.TOKENIZED); doc.add(field); // notebook label attribute field = new Field("outlineLabel",notebookLabel,Field.Store.YES, Field.Index.NO); doc.add(field); // timestamp as attribute field = new Field("modified",DateTools.timeToString(f.lastModified(), DateTools.Resolution.SECOND),Field.Store.YES, Field.Index.NO); doc.add(field); // concept annotation - the most important FileInputStream is = new FileInputStream(f); Reader reader = new BufferedReader(new InputStreamReader(is)); field = new Field("contents", reader); doc.add(field); // return the document return doc; }