@Override public Filter getFilter(Element e) throws ParserException { String fieldName = DOMUtils.getAttributeWithInheritance(e, "fieldName"); String lowerTerm = e.getAttribute("lowerTerm"); String upperTerm = e.getAttribute("upperTerm"); boolean includeLower = DOMUtils.getAttribute(e, "includeLower", true); boolean includeUpper = DOMUtils.getAttribute(e, "includeUpper", true); return TermRangeFilter.newStringRange(fieldName, lowerTerm, upperTerm, includeLower, includeUpper); }
/** * Returns a filter for a date range, optionally including the min and max * values */ public static TermRangeFilter range(final String field, final Calendar min, final Calendar max, final boolean includeMin, final boolean includeMax) { if (min == null && max == null) { return null; } final String minStr = min == null ? LuceneFormatter.MIN_DATE : LuceneFormatter.format(min); final String maxStr = max == null ? LuceneFormatter.MAX_DATE : LuceneFormatter.format(max); return new TermRangeFilter(field, new BytesRef(minStr), new BytesRef(maxStr), includeMin, includeMax); }
/** * Returns a filter for a number range, optionally including the min and max * values */ public static TermRangeFilter range(final String field, final Number min, final Number max, final boolean includeMin, final boolean includeMax) { if (min == null && max == null) { return null; } final String minStr = min == null ? LuceneFormatter.MIN_DECIMAL : LuceneFormatter.format(min); final String maxStr = max == null ? LuceneFormatter.MAX_DECIMAL : LuceneFormatter.format(max); return new TermRangeFilter(field, new BytesRef(minStr), new BytesRef(maxStr), includeMin, includeMax); }
/** * Returns a filter for a string range, optionally including the min and max * values (both are required, or no filter will be applied */ public static TermRangeFilter range(final String field, final String min, final String max, final boolean includeMin, final boolean includeMax) { if (StringUtils.isEmpty(min) || StringUtils.isEmpty(max)) { return null; } return new TermRangeFilter(field, new BytesRef(min), new BytesRef(max), includeMin, includeMax); }
public void testFarsiRangeFilterCollating(Analyzer analyzer, BytesRef firstBeg, BytesRef firstEnd, BytesRef secondBeg, BytesRef secondEnd) throws Exception { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig( TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); doc.add(new TextField("content", "\u0633\u0627\u0628", Field.Store.YES)); doc.add(new StringField("body", "body", Field.Store.YES)); writer.addDocument(doc); writer.close(); IndexReader reader = DirectoryReader.open(dir); IndexSearcher searcher = new IndexSearcher(reader); Query query = new TermQuery(new Term("body","body")); // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi // orders the U+0698 character before the U+0633 character, so the single // index Term below should NOT be returned by a TermRangeFilter with a Farsi // Collator (or an Arabic one for the case when Farsi searcher not // supported). ScoreDoc[] result = searcher.search (query, new TermRangeFilter("content", firstBeg, firstEnd, true, true), 1).scoreDocs; assertEquals("The index Term should not be included.", 0, result.length); result = searcher.search (query, new TermRangeFilter("content", secondBeg, secondEnd, true, true), 1).scoreDocs; assertEquals("The index Term should be included.", 1, result.length); reader.close(); dir.close(); }
/** * Returns a filter for a date range, optionally including the min and max values */ public static TermRangeFilter range(final String field, final Calendar min, final Calendar max, final boolean includeMin, final boolean includeMax) { if (min == null && max == null) { return null; } final String minStr = min == null ? LuceneFormatter.MIN_DATE : LuceneFormatter.format(min); final String maxStr = max == null ? LuceneFormatter.MAX_DATE : LuceneFormatter.format(max); return new TermRangeFilter(field, minStr, maxStr, includeMin, includeMax); }
/** * Returns a filter for a number range, optionally including the min and max values */ public static TermRangeFilter range(final String field, final Number min, final Number max, final boolean includeMin, final boolean includeMax) { if (min == null && max == null) { return null; } final String minStr = min == null ? LuceneFormatter.MIN_DECIMAL : LuceneFormatter.format(min); final String maxStr = max == null ? LuceneFormatter.MAX_DECIMAL : LuceneFormatter.format(max); return new TermRangeFilter(field, minStr, maxStr, includeMin, includeMax); }
/** * Returns a filter for a string range, optionally including the min and max values (both are required, or no filter will be applied */ public static TermRangeFilter range(final String field, final String min, final String max, final boolean includeMin, final boolean includeMax) { if (StringUtils.isEmpty(min) || StringUtils.isEmpty(max)) { return null; } return new TermRangeFilter(field, min, max, includeMin, includeMax); }
public void testFarsiRangeFilterCollating(Analyzer analyzer, BytesRef firstBeg, BytesRef firstEnd, BytesRef secondBeg, BytesRef secondEnd) throws Exception { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig( TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); doc.add(new TextField("content", "\u0633\u0627\u0628", Field.Store.YES)); doc.add(new StringField("body", "body", Field.Store.YES)); writer.addDocument(doc); writer.close(); IndexReader reader = DirectoryReader.open(dir); IndexSearcher searcher = new IndexSearcher(reader); Query query = new TermQuery(new Term("body", "body")); // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi // orders the U+0698 character before the U+0633 character, so the single // index Term below should NOT be returned by a TermRangeFilter with a Farsi // Collator (or an Arabic one for the case when Farsi searcher not // supported). ScoreDoc[] result = searcher.search(query, new TermRangeFilter("content", firstBeg, firstEnd, true, true), 1).scoreDocs; assertEquals("The index Term should not be included.", 0, result.length); result = searcher.search(query, new TermRangeFilter("content", secondBeg, secondEnd, true, true), 1).scoreDocs; assertEquals("The index Term should be included.", 1, result.length); reader.close(); dir.close(); }
/** * Returns a filter for a date range including the min and max values */ public static TermRangeFilter range(final String field, final Calendar min, final Calendar max) { return range(field, min, max, true, true); }
/** * Returns a filter for a number range including the min and max values */ public static TermRangeFilter range(final String field, final Number min, final Number max) { return range(field, min, max, true, true); }
private Filter getRangeFilter(String field, String lowerPrice, String upperPrice) { Filter f = TermRangeFilter.newStringRange(field, lowerPrice, upperPrice, true, true); return f; }
@Override public void setUp() throws Exception { super.setUp(); directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), directory); // we use the default Locale/TZ since LuceneTestCase randomizes it Calendar cal = new GregorianCalendar(TimeZone.getDefault(), Locale.getDefault()); cal.clear(); cal.setTimeInMillis(1041397200000L); // 2003 January 01 for (int i = 0; i < MAX; i++) { Document doc = new Document(); doc.add(newStringField("key", "" + (i + 1), Field.Store.YES)); doc.add(newStringField("owner", (i < MAX / 2) ? "bob" : "sue", Field.Store.YES)); doc.add(newStringField("date", cal.getTime().toString(), Field.Store.YES)); writer.addDocument(doc); cal.add(Calendar.DATE, 1); } reader = writer.getReader(); writer.close(); searcher = newSearcher(reader); // query for everything to make life easier BooleanQuery bq = new BooleanQuery(); bq.add(new TermQuery(new Term("owner", "bob")), BooleanClause.Occur.SHOULD); bq.add(new TermQuery(new Term("owner", "sue")), BooleanClause.Occur.SHOULD); query = bq; // date filter matches everything too //Date pastTheEnd = parseDate("2099 Jan 1"); // dateFilter = DateFilter.Before("date", pastTheEnd); // just treat dates as strings and select the whole range for now... dateFilter = TermRangeFilter.newStringRange("date","","ZZZZ",true,true); bobFilter = new QueryWrapperFilter( new TermQuery(new Term("owner", "bob"))); sueFilter = new QueryWrapperFilter( new TermQuery(new Term("owner", "sue"))); }
public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Term midTerm, IndexWriterConfig config1, IndexWriterConfig config2) { this(input, dir1, dir2, new TermRangeFilter(midTerm.field(), null, midTerm.bytes(), true, false), config1, config2); }
protected ToParentBlockJoinQuery join(final String childTerm) { return new ToParentBlockJoinQuery( new TermQuery(new Term(child, childTerm)), new TermRangeFilter(parent, null, null, false, false), ScoreMode.None); }
/** * Lucene Lookup. * It now uses the new Lucene API used in release 3.6 * @param uri A URI to look for. * @return The file and offset where that URI can be found, or null if it * doesn't exist. * TODO Does TermRangeFilter needs to be modified to memory efficient enough. * The the optimizations in the previous used SparseRangeFilter may or may not * relevant for Lucene 3.6+ */ private ARCKey luceneLookUp(String uri) { // SparseRangeFilter + ConstantScoreQuery means we ignore norms, // bitsets, and other memory-eating things we don't need that TermQuery // or RangeFilter would imply. //Query query = new ConstantScoreQuery(new SparseRangeFilter( // DigestIndexer.FIELD_URL, uri, uri, true, true)); BytesRef uriRef = new BytesRef(uri.getBytes()); // Should we decide which charset? Query query = new ConstantScoreQuery(new TermRangeFilter( DigestIndexer.FIELD_URL, uriRef, uriRef, true, true)); try { AllDocsCollector allResultsCollector = new AllDocsCollector(); luceneSearcher.search(query, allResultsCollector); Document doc = null; List<ScoreDoc> hits = allResultsCollector.getHits(); if (hits != null) { log.debug("Found " + hits.size() + " hits for uri: " + uri); int i = 0; for (ScoreDoc hit: hits) { int docId = hit.doc; doc = luceneSearcher.doc(docId); String origin = doc.get(DigestIndexer.FIELD_ORIGIN); // Here is where we will handle multiple hits in the future if (origin == null) { log.debug("No origin for URL '" + uri + "' hit " + i++); continue; } String[] originParts = origin.split(","); if (originParts.length != 2) { throw new IllegalState("Bad origin for URL '" + uri + "': '" + origin + "'"); } log.debug("Found document with origin: " + origin); return new ARCKey(originParts[0], Long.parseLong(originParts[1])); } } } catch (IOException e) { throw new IOFailure("Fatal error looking up '" + uri + "'", e); } return null; }
/** * Returns a filter for a string range including the min and max values (both are required, or no filter will be applied */ public static TermRangeFilter range(final String field, final String min, final String max) { return range(field, min, max, true, true); }
public void testTermRangeFilter() throws Exception { Filter filter = new TermRangeFilter("title2", "d", "j", true, true); assertEquals(3, TestUtil.hitCount(searcher, allBooks, filter)); }
@Override public void setUp() throws Exception { directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(Version.LUCENE_41), IndexWriter.MaxFieldLength.UNLIMITED); Calendar cal = Calendar.getInstance(); cal.set(2009, 1, 1, 0, 0); for (int i = 0; i < MAX; i++) { Document doc = new Document(); doc.add(new Field("key", "" + (i + 1), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("owner", (i < MAX / 2) ? "bob" : "sue", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("date", DateTools.timeToString(cal.getTimeInMillis(), DateTools.Resolution.DAY), Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.addDocument(doc); cal.add(Calendar.DATE, 1); } writer.close(); searcher = new IndexSearcher(directory); BooleanQuery bq = new BooleanQuery(); bq.add(new TermQuery(new Term("owner", "bob")), BooleanClause.Occur.SHOULD); bq.add(new TermQuery(new Term("owner", "sue")), BooleanClause.Occur.SHOULD); query = bq; cal.set(2099, 1, 1, 0, 0); dateFilter = TermRangeFilter.Less("date", DateTools.timeToString( cal.getTimeInMillis(), DateTools.Resolution.DAY));// C bobFilter = new CachingWrapperFilter( new QueryWrapperFilter( new TermQuery(new Term("owner", "bob")))); sueFilter = new CachingWrapperFilter( new QueryWrapperFilter( new TermQuery(new Term("owner", "sue")))); }
/** * Constructs a filter for field f matching dates between from and to inclusively. Uses time resolution to * seconds. * * @param f The field name * @param from From Date * @param to To Date */ public DateRangeFilter(String f, Date from, Date to) { String lowerTerm = DateTools.dateToString(from, DateTools.Resolution.SECOND); String upperTerm = DateTools.dateToString(to, DateTools.Resolution.SECOND); _myFilter = new TermRangeFilter(f, lowerTerm, upperTerm, true, true); }
/** * Constructs a filter for field f matching dates on or before date. Uses time resolution to seconds. * * @param field Field name * @param date The Date * @return The Filter */ public static DateRangeFilter Before(String field, Date date) { return new DateRangeFilter(TermRangeFilter.Less(field, DateTools.dateToString(date, DateTools.Resolution.SECOND))); }