/** * Converts a string produced by {@link #timeToString} or {@link #dateToString} back to a time, represented * as a Date object. Is also able to parse dates encoded in the old Lucene 1.x DateField format, for * compatibility with old indexes (this functionality will go away in a future release). * * @param dateString A string produced by timeToString or dateToString * @return The parsed time as a Date object * @exception ParseException If parse error */ public final static Date stringToDate(String dateString) throws ParseException { try { return DateTools.stringToDate(dateString); } catch (ParseException pe) { // Handle dates encoded in the Lucene 1.x format, for compatibility with old indexes try { // This method will go away in a future release of Lucene... return DateField.stringToDate(dateString); } catch (Throwable t) { throw new ParseException("Unable to parse date string: " + t.getMessage(), 0); } } }
/** * Converts a string produced by {@link #timeToString} or {@link #dateToString} back to a time, represented * as the number of milliseconds since January 1, 1970, 00:00:00 GMT. Is also able to parse dates encoded in * the old Lucene 1.x DateField format, for compatibility with old indexes (this functionality will go away * in a future release). * * @param dateString A string produced by timeToString or dateToString * @return The number of milliseconds since January 1, 1970, 00:00:00 GMT * @exception ParseException If parse error */ public final static long stringToTime(String dateString) throws ParseException { try { return DateTools.stringToTime(dateString); } catch (ParseException pe) { // Handle dates encoded in the Lucene 1.x format, for compatibility with old indexes try { // This method will go away in a future release of Lucene... return DateField.stringToTime(dateString); } catch (Throwable t) { throw new ParseException("Unable to parse date string: " + t.getMessage(), 0); } } }
/** Makes a document for a File. <p> The document has three fields: <ul> <li><code>path</code>--containing the pathname of the file, as a stored, tokenized field; <li><code>modified</code>--containing the last modified date of the file as a keyword field as encoded by <a href="lucene.document.DateField.html">DateField</a>; and <li><code>contents</code>--containing the full contents of the file, as a Reader field; */ public static Document Document(File f) throws java.io.FileNotFoundException { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a Text field, so // that the index stores the path, and so that the path is searchable doc.add(Field.Text("path", f.getPath())); // Add the last modified date of the file a field named "modified". Use a // Keyword field, so that it's searchable, but so that no attempt is made // to tokenize the field into words. doc.add(Field.Keyword("modified", DateField.timeToString(f.lastModified()))); // Add the contents of the file a field named "contents". Use a Text // field, specifying a Reader, so that the text of the file is tokenized. // ?? why doesn't FileReader work here ?? FileInputStream is = new FileInputStream(f); Reader reader = new BufferedReader(new InputStreamReader(is)); doc.add(Field.Text("contents", reader)); // return the document return doc; }
/** * @exception ParseException throw in overridden method to disallow */ protected Query getRangeQuery(String field, String part1, String part2, boolean inclusive) throws ParseException { if (lowercaseExpandedTerms) { part1 = part1.toLowerCase(); part2 = part2.toLowerCase(); } try { DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, locale); df.setLenient(true); Date d1 = df.parse(part1); Date d2 = df.parse(part2); if (inclusive) { // The user can only specify the date, not the time, so make sure // the time is set to the latest possible time of that date to really // include all documents: Calendar cal = Calendar.getInstance(locale); cal.setTime(d2); cal.set(Calendar.HOUR_OF_DAY, 23); cal.set(Calendar.MINUTE, 59); cal.set(Calendar.SECOND, 59); cal.set(Calendar.MILLISECOND, 999); d2 = cal.getTime(); } DateTools.Resolution resolution = getDateResolution(field); if (resolution == null) { // no default or field specific date resolution has been set, // use deprecated DateField to maintain compatibility with // pre-1.9 Lucene versions. part1 = DateField.dateToString(d1); part2 = DateField.dateToString(d2); } else { part1 = DateTools.dateToString(d1, resolution); part2 = DateTools.dateToString(d2, resolution); } } catch (Exception e) { } return newRangeQuery(field, part1, part2, inclusive); }
/** * This will get a lucene document from a PDF file. * @param res The file to get the document for. * @return The lucene document. * @throws IOException If there is an error parsing or indexing the document. */ public static Document getDocument( Resource res ) { Document document = new Document(); FieldUtil.setMimeType(document, "application/pdf"); //document.add(FieldUtil.UnIndexed("mime-type", "application/pdf")); document.add( FieldUtil.UnIndexed("path", res.getPath() ) ); String uid = res.getPath().replace(FILE_SEPARATOR, '\u0000') + "\u0000" + DateField.timeToString(res.lastModified() ); document.add(FieldUtil.Text("uid", uid, false)); // Add the uid as a field, so that index can be incrementally maintained. // This field is not stored with document, it is indexed, but it is not // tokenized prior to indexing. //document.add(new Field("uid", uid, Field.Store.NO,Field.Index.UN_TOKENIZED)); //document.add(new Field("uid", uid, false, true, false)); InputStream is = null; try { is = IOUtil.toBufferedInputStream(res.getInputStream()); addContent(null, document, is); } catch(IOException ioe) { } finally { IOUtil.closeEL(is); } // return the document return document; }
public static String uid(Resource f) { return f.getPath().replace(FILE_SEPARATOR, '\u0000') + "\u0000" + DateField.timeToString(f.lastModified()); }
/** * translate the file to a Document Object * @param file * @return * @throws InterruptedException * @throws IOException */ public static Document toDocument(Resource file,String url,String charset) throws IOException { String ext = ResourceUtil.getExtension(file,null); Document doc=null; if(ext!=null) { ext=ext.toLowerCase(); //String mimeType=new MimetypesFileTypeMap().getContentType(f); // HTML if(ext.equals("htm") || ext.equals("html") || ext.equals("cfm") || ext.equals("cfml") || ext.equals("php") || ext.equals("asp") || ext.equals("aspx")) { doc= HTMLDocument.getDocument(file,charset); } // PDF else if(ext.equals("pdf")) { doc= PDFDocument.getDocument(file); } // DOC else if(ext.equals("doc")) { doc= WordDocument.getDocument(file); } } else { ContentTypeImpl ct = (ContentTypeImpl) ResourceUtil.getContentType(file); String type = ct.getMimeType(); String c=ct.getCharset(); if(c!=null) charset=c; //String type=ResourceUtil.getMimeType(file,""); if(type==null) {} // HTML else if(type.equals("text/html")) { doc= HTMLDocument.getDocument(file,charset); } // PDF else if(type.equals("application/pdf")) { doc= PDFDocument.getDocument(file); } // DOC else if(type.equals("application/msword")) { doc= WordDocument.getDocument(file); } } if(doc==null) doc= FileDocument.getDocument(file,charset); String strPath=file.getPath().replace('\\', '/'); String strName=strPath.substring(strPath.lastIndexOf('/')); doc.add(FieldUtil.UnIndexed("url", strName)); doc.add(FieldUtil.UnIndexed("key", strPath)); doc.add(FieldUtil.UnIndexed("path", file.getPath())); doc.add(FieldUtil.UnIndexed("size", Caster.toString(file.length()))); doc.add(FieldUtil.UnIndexed("modified",DateField.timeToString(file.lastModified()))); return doc; }