@Override public SpanQuery toFragmentQuery () throws QueryException { if (this.isNull || this.alternatives.size() == 0) return (SpanQuery) null; if (this.alternatives.size() == 1) { return (SpanQuery) this.alternatives.get(0) .retrieveNode(this.retrieveNode).toFragmentQuery(); }; Iterator<SpanQueryWrapper> clause = this.alternatives.iterator(); SpanOrQuery soquery = new SpanOrQuery(clause.next() .retrieveNode(this.retrieveNode).toFragmentQuery()); while (clause.hasNext()) { soquery.addClause(clause.next().retrieveNode(this.retrieveNode) .toFragmentQuery()); }; return (SpanQuery) soquery; }
@Test public void indexExample9 () throws IOException { KrillIndex ki = new KrillIndex(); ki.addDoc(createFieldDoc1()); ki.commit(); SpanQuery sq = new SpanNextQuery( new SpanOrQuery(new SpanTermQuery(new Term("base", "s:a")), new SpanTermQuery(new Term("base", "s:b"))), new SpanTermQuery(new Term("base", "s:c"))); Result kr = ki.search(sq, (short) 10); assertEquals(0, kr.getMatch(0).getStartPos()); assertEquals(2, kr.getMatch(0).getEndPos()); assertEquals(3, kr.getMatch(1).getStartPos()); assertEquals(5, kr.getMatch(1).getEndPos()); }
/** OR */ @Test public void testCase3 () throws IOException { ki = new KrillIndex(); ki.addDoc(createFieldDoc0()); ki.commit(); SpanQuery sq, sq2; // ec{1,2} sq = new SpanNextQuery(new SpanTermQuery(new Term("base", "s:e")), new SpanOrQuery(new SpanRepetitionQuery( new SpanTermQuery(new Term("base", "s:c")), 1, 1, true), new SpanRepetitionQuery( new SpanTermQuery(new Term("base", "s:b")), 1, 1, true))); kr = ki.search(sq, (short) 10); assertEquals((long) 3, kr.getTotalResults()); assertEquals(1, kr.getMatch(0).startPos); assertEquals(3, kr.getMatch(0).endPos); assertEquals(4, kr.getMatch(1).startPos); assertEquals(6, kr.getMatch(1).endPos); assertEquals(7, kr.getMatch(2).startPos); assertEquals(9, kr.getMatch(2).endPos); }
@Override public SpanOrQuery build(QueryNode node) throws QueryNodeException { // validates node BooleanQueryNode booleanNode = (BooleanQueryNode) node; List<QueryNode> children = booleanNode.getChildren(); SpanQuery[] spanQueries = new SpanQuery[children.size()]; int i = 0; for (QueryNode child : children) { spanQueries[i++] = (SpanQuery) child .getTag(QueryTreeBuilder.QUERY_TREE_BUILDER_TAGID); } return new SpanOrQuery(spanQueries); }
@Override public SpanQuery getSpanQuery(Element e) throws ParserException { String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName"); String value = DOMUtils.getNonBlankTextOrFail(e); try { List<SpanQuery> clausesList = new ArrayList<SpanQuery>(); TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(value)); TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); BytesRef bytes = termAtt.getBytesRef(); ts.reset(); while (ts.incrementToken()) { termAtt.fillBytesRef(); SpanTermQuery stq = new SpanTermQuery(new Term(fieldName, BytesRef.deepCopyOf(bytes))); clausesList.add(stq); } ts.end(); ts.close(); SpanOrQuery soq = new SpanOrQuery(clausesList.toArray(new SpanQuery[clausesList.size()])); soq.setBoost(DOMUtils.getAttribute(e, "boost", 1.0f)); return soq; } catch (IOException ioe) { throw new ParserException("IOException parsing value:" + value); } }
/** * Replace span OR queries with more efficient plain OR, unless the parent * query is another span query. */ protected Query rewrite(SpanOrQuery oq) { if (suppressRewrite()) return oq; // Rewrite each term, and add to a plain boolean query. BooleanQuery newQuery = new BooleanQuery(); SpanQuery[] clauses = oq.getClauses(); for (int i = 0; i < clauses.length; i++) newQuery.add(rewriteQuery(clauses[i]), BooleanClause.Occur.SHOULD); // Retain the original boost, if any. return copyBoost(oq, newQuery); }
/** * Rewrite a span-based OR query. The procedure in this case is simple: * remove all stop words, with no bi-gramming performed. * * @param q The query to rewrite * @return Rewritten version, or 'q' unchanged if no changed needed. */ protected Query rewrite(final SpanOrQuery q) { // Rewrite each clause. Allow single clauses to be promoted, and // avoid bi-gramming. // return rewriteClauses(q, q.getClauses(), true, false, 0, new SpanClauseJoiner() { public SpanQuery join(SpanQuery[] clauses) { return new SpanOrQuery(clauses); } }); }
@SuppressWarnings("rawtypes") @Override public void assertInstanceOf(Query q, Class other) { if (q instanceof SpanMultiTermQueryWrapper) { q = ((SpanMultiTermQueryWrapper)q).getWrappedQuery(); } else if (q instanceof SpanTermQuery && other.equals(TermQuery.class)) { assertTrue("termquery", true); return; } else if (q instanceof SpanNearQuery && other.equals(PhraseQuery.class)) { assertTrue("spannear/phrase", true); return; } else if (q instanceof SpanOrQuery && other.equals(BooleanQuery.class)) { assertTrue("spanor/boolean", true); return; } super.assertInstanceOf(q, other); }
/** * Is this a null or empty SpanQuery * @param q query to test * @return whether a null or empty SpanQuery */ private boolean isEmptyQuery(SpanQuery q) { if (q == null) { return true; } if (q instanceof SpanOrQuery) { SpanOrQuery soq = (SpanOrQuery)q; for (SpanQuery sq : soq.getClauses()) { if (! isEmptyQuery(sq)) { return false; } } return true; } return false; }
public void testStopwords() throws Exception { CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toAutomaton()); CommonQueryParserConfiguration qp = getParserConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet)); Query result = getQuery("field:the OR field:foo",qp); assertNotNull("result is null and it shouldn't be", result); System.out.println(result.getClass()); assertTrue("result is not a BooleanQuery", result instanceof SpanOrQuery || result instanceof BooleanQuery || result instanceof MatchNoDocsQuery); if (result instanceof BooleanQuery) { assertEquals(0, ((BooleanQuery) result).clauses().size()); } result = getQuery("field:woo OR field:the",qp); assertNotNull("result is null and it shouldn't be", result); assertTrue("result is not a TermQuery", result instanceof TermQuery); result = getQuery("(fieldX:xxxxx OR fieldy:xxxxxxxx)^2 AND (fieldx:the OR fieldy:foo)",qp); assertNotNull("result is null and it shouldn't be", result); assertTrue("result is not a BoostQuery", result instanceof BoostQuery); result = ((BoostQuery) result).getQuery(); assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery); if (VERBOSE) System.out.println("Result: " + result); assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 2, ((BooleanQuery) result).clauses().size() == 2); }
@Override protected SpanQuery convertUnknownQuery(String field, Query query) { if (query instanceof CommonTermsQuery) { // specialized since rewriting would change the result query // this query is TermContext sensitive. CommonTermsQuery ctq = (CommonTermsQuery) query; Set<Term> terms = new HashSet<>(); try { Weight w = ctq.createWeight(searcher, false, 1.0f); w.extractTerms(terms); } catch (IOException e) { throw new RuntimeException("IOException on searcher!!!", e); } List<SpanQuery> spanQs = new LinkedList<>(); for (Term term : terms) { if (term.field().equals(field)) { spanQs.add(new SpanTermQuery(term)); } } if (spanQs.size() == 0) { return getEmptySpanQuery(); } else if (spanQs.size() == 1) { return spanQs.get(0); } else { return new SpanOrQuery(spanQs.toArray(new SpanQuery[spanQs.size()])); } } super.convertUnknownQuery(field, query); return null; }
public void testSpanOrQuery() throws Exception { SpanNearQuery quick_fox = new SpanNearQuery(new SpanQuery[]{quick, fox}, 1, true); SpanNearQuery lazy_dog = new SpanNearQuery(new SpanQuery[]{lazy, dog}, 0, true); SpanNearQuery sleepy_cat = new SpanNearQuery(new SpanQuery[]{sleepy, cat}, 0, true); SpanNearQuery qf_near_ld = new SpanNearQuery( new SpanQuery[]{quick_fox, lazy_dog}, 3, true); assertOnlyBrownFox(qf_near_ld); dumpSpans(qf_near_ld); SpanNearQuery qf_near_sc = new SpanNearQuery( new SpanQuery[]{quick_fox, sleepy_cat}, 3, true); dumpSpans(qf_near_sc); SpanOrQuery or = new SpanOrQuery( new SpanQuery[]{qf_near_ld, qf_near_sc}); assertBothFoxes(or); dumpSpans(or); }
public void testPlay() throws Exception { SpanOrQuery or = new SpanOrQuery(new SpanQuery[]{quick, fox}); dumpSpans(or); SpanNearQuery quick_fox = new SpanNearQuery(new SpanQuery[]{quick, fox}, 1, true); SpanFirstQuery sfq = new SpanFirstQuery(quick_fox, 4); dumpSpans(sfq); dumpSpans(new SpanTermQuery(new Term("f", "the"))); SpanNearQuery quick_brown = new SpanNearQuery(new SpanQuery[]{quick, brown}, 0, false); dumpSpans(quick_brown); }
public void testExtractQueryMetadata_spanOrQuery() { SpanTermQuery spanTermQuery1 = new SpanTermQuery(new Term("_field", "_short_term")); SpanTermQuery spanTermQuery2 = new SpanTermQuery(new Term("_field", "_very_long_term")); SpanOrQuery spanOrQuery = new SpanOrQuery(spanTermQuery1, spanTermQuery2); Result result = analyze(spanOrQuery); assertThat(result.verified, is(false)); assertTermsEqual(result.terms, spanTermQuery1.getTerm(), spanTermQuery2.getTerm()); }
private Query addSlopToSpan(SpanQuery query, int slop) { if (query instanceof SpanNearQuery) { return new SpanNearQuery(((SpanNearQuery) query).getClauses(), slop, ((SpanNearQuery) query).isInOrder()); } else if (query instanceof SpanOrQuery) { SpanQuery[] clauses = new SpanQuery[((SpanOrQuery) query).getClauses().length]; int pos = 0; for (SpanQuery clause : ((SpanOrQuery) query).getClauses()) { clauses[pos++] = (SpanQuery) addSlopToSpan(clause, slop); } return new SpanOrQuery(clauses); } else { return query; } }
@Override protected void doAssertLuceneQuery(SpanOrQueryBuilder queryBuilder, Query query, SearchContext context) throws IOException { assertThat(query, instanceOf(SpanOrQuery.class)); SpanOrQuery spanOrQuery = (SpanOrQuery) query; assertThat(spanOrQuery.getClauses().length, equalTo(queryBuilder.clauses().size())); Iterator<SpanQueryBuilder> spanQueryBuilderIterator = queryBuilder.clauses().iterator(); for (SpanQuery spanQuery : spanOrQuery.getClauses()) { assertThat(spanQuery, equalTo(spanQueryBuilderIterator.next().toQuery(context.getQueryShardContext()))); } }
@Override public SpanQuery getSpanQuery(Element e) throws ParserException { List<SpanQuery> clausesList = new ArrayList<>(); for (Node kid = e.getFirstChild(); kid != null; kid = kid.getNextSibling()) { if (kid.getNodeType() == Node.ELEMENT_NODE) { SpanQuery clause = factory.getSpanQuery((Element) kid); clausesList.add(clause); } } SpanQuery[] clauses = clausesList.toArray(new SpanQuery[clausesList.size()]); SpanOrQuery soq = new SpanOrQuery(clauses); soq.setBoost(DOMUtils.getAttribute(e, "boost", 1.0f)); return soq; }
@Override public SpanQuery getSpanQuery(Element e) throws ParserException { String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName"); String value = DOMUtils.getNonBlankTextOrFail(e); List<SpanQuery> clausesList = new ArrayList<>(); TokenStream ts = null; try { ts = analyzer.tokenStream(fieldName, value); TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); BytesRef bytes = termAtt.getBytesRef(); ts.reset(); while (ts.incrementToken()) { termAtt.fillBytesRef(); SpanTermQuery stq = new SpanTermQuery(new Term(fieldName, BytesRef.deepCopyOf(bytes))); clausesList.add(stq); } ts.end(); SpanOrQuery soq = new SpanOrQuery(clausesList.toArray(new SpanQuery[clausesList.size()])); soq.setBoost(DOMUtils.getAttribute(e, "boost", 1.0f)); return soq; } catch (IOException ioe) { throw new ParserException("IOException parsing value:" + value); } finally { IOUtils.closeWhileHandlingException(ts); } }
public SpanQuery makeSpanClause() { SpanQuery [] spanQueries = new SpanQuery[size()]; Iterator<SpanQuery> sqi = weightBySpanQuery.keySet().iterator(); int i = 0; while (sqi.hasNext()) { SpanQuery sq = sqi.next(); sq.setBoost(weightBySpanQuery.get(sq).floatValue()); spanQueries[i++] = sq; } if (spanQueries.length == 1) return spanQueries[0]; else return new SpanOrQuery(spanQueries); }
private Query spanFilter(SpanQuery query) { if (query instanceof SpanNearQuery) { return spanNearFilter((SpanNearQuery) query); } else if (query instanceof SpanNotQuery) { return spanNotFilter((SpanNotQuery) query); } else if (query instanceof SpanOrQuery) { return spanOrFilter((SpanOrQuery) query); } else if (query instanceof SpanTermQuery) { return new TermQuery(((SpanTermQuery) query).getTerm()); } else if (query instanceof SpanMultiTermQueryWrapper) { return ((SpanMultiTermQueryWrapper) query).getWrappedQuery(); } else { return new QueryWrapperFilter(query); } }
private Query spanOrFilter(SpanOrQuery query) { List<Query> ret = new ArrayList<>(); for (SpanQuery sub : query.getClauses()) { ret.add(spanFilter(sub)); } return any(ret); }
private SpanQuery _listToOrQuery (ArrayList<SpanQueryWrapper> list) throws QueryException { if (list.size() == 1) { return (SpanQuery) list.get(0).toFragmentQuery(); }; Iterator<SpanQueryWrapper> clause = list.iterator(); SpanOrQuery soquery = new SpanOrQuery(clause.next().toFragmentQuery()); while (clause.hasNext()) { soquery.addClause(clause.next().toFragmentQuery()); }; return (SpanQuery) soquery; }
public void testPassesIfWrapped() throws IOException { RegexpQuery regex = new RegexpQuery(new Term("field", "worl.")); SpanQuery wrappedquery = new SpanOrQuery(new SpanMultiTermQueryWrapper<>(regex)); MemoryIndex mindex = new MemoryIndex(random().nextBoolean(), random().nextInt(50) * 1024 * 1024); mindex.addField("field", new MockAnalyzer(random()).tokenStream("field", "hello there")); // This passes though assertEquals(0, mindex.search(wrappedquery), 0.00001f); }
public void testOrSpans() throws Exception { assertEquals(getSpanQuery("term1 term2").toString(), "spanOr([term1, term2])"); assertEquals(getSpanQuery("term1 OR term2").toString(), "spanOr([term1, term2])"); assertTrue(getSpanQuery("term1 term2") instanceof SpanOrQuery); assertTrue(getSpanQuery("term1 term2") instanceof SpanOrQuery); }
/** make sure all sims work with spanOR(termX, termY) where termY does not exist */ public void testCrazySpans() throws Exception { // The problem: "normal" lucene queries create scorers, returning null if terms dont exist // This means they never score a term that does not exist. // however with spans, there is only one scorer for the whole hierarchy: // inner queries are not real queries, their boosts are ignored, etc. Directory dir = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), dir); Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); doc.add(newField("foo", "bar", ft)); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); IndexSearcher is = newSearcher(ir); for (Similarity sim : sims) { is.setSimilarity(sim); SpanTermQuery s1 = new SpanTermQuery(new Term("foo", "bar")); SpanTermQuery s2 = new SpanTermQuery(new Term("foo", "baz")); Query query = new SpanOrQuery(s1, s2); TopDocs td = is.search(query, 10); assertEquals(1, td.totalHits); float score = td.scoreDocs[0].score; assertTrue(score >= 0.0f); assertFalse("inf score for " + sim, Float.isInfinite(score)); } ir.close(); dir.close(); }
@Test public void testFlatQueryShouldBeGeneratedFromSequentiallyShiftedTokens() throws Exception { // prepare test data LinkedList<Token> tokenSequenceWithRepeatedGroup = new LinkedList<Token>(); tokenSequenceWithRepeatedGroup.add(new Token(TEST_QUERY.substring(0, 4), 0, 4, null)); tokenSequenceWithRepeatedGroup.add(new Token(TEST_QUERY.substring(5, 6), 5, 6, null)); tokenSequenceWithRepeatedGroup.add(new Token(TEST_QUERY.substring(6, 10), 6, 10, null)); tokenSequenceWithRepeatedGroup.add(new Token(TEST_QUERY.substring(10, 11), 10, 11, null)); assertTrue("All tokens in test data must be sequentially shifted", parser.isAllTokensSequentiallyShifted(tokenSequenceWithRepeatedGroup)); assertTrue(parser.getEnablePositionIncrements()); LinkedList<LinkedList<Token>> fixedTokenSequences = new LinkedList<LinkedList<Token>>(); fixedTokenSequences.add(tokenSequenceWithRepeatedGroup); // call method to test SpanOrQuery q = parser.generateSpanOrQuery(TEST_FIELD, fixedTokenSequences); // check results assertNotNull(q); SpanQuery[] spanQuery = q.getClauses(); assertEquals("Flat query must be generated", 1, spanQuery.length); assertTrue(spanQuery[0] instanceof SpanNearQuery); SpanNearQuery spanNearQuery = (SpanNearQuery) spanQuery[0]; assertEquals("Slop between term must be 0", 0, spanNearQuery.getSlop()); assertTrue("Terms must be in order", spanNearQuery.isInOrder()); SpanQuery[] termClauses = spanNearQuery.getClauses(); assertEquals("Flat query must be generated (Query: " + q + ")", tokenSequenceWithRepeatedGroup.size(), termClauses.length); for (int i = 0; i < termClauses.length; i++) { assertTrue(termClauses[i] instanceof SpanTermQuery); assertEquals("All tokens must become spanQuery terms", tokenSequenceWithRepeatedGroup.get(i).toString(), ((SpanTermQuery) termClauses[i]).getTerm().text()); } }
public void testPassesIfWrapped() throws IOException { RegexpQuery regex = new RegexpQuery(new Term("field", "worl.")); SpanQuery wrappedquery = new SpanOrQuery(new SpanMultiTermQueryWrapper<RegexpQuery>(regex)); MemoryIndex mindex = new MemoryIndex(random().nextBoolean(), random().nextInt(50) * 1024 * 1024); mindex.addField("field", new MockAnalyzer(random()).tokenStream("field", new StringReader("hello there"))); // This passes though assertEquals(0, mindex.search(wrappedquery), 0.00001f); }
@Override public SpanQuery getSpanQuery(Element e) throws ParserException { List<SpanQuery> clausesList = new ArrayList<SpanQuery>(); for (Node kid = e.getFirstChild(); kid != null; kid = kid.getNextSibling()) { if (kid.getNodeType() == Node.ELEMENT_NODE) { SpanQuery clause = factory.getSpanQuery((Element) kid); clausesList.add(clause); } } SpanQuery[] clauses = clausesList.toArray(new SpanQuery[clausesList.size()]); SpanOrQuery soq = new SpanOrQuery(clauses); soq.setBoost(DOMUtils.getAttribute(e, "boost", 1.0f)); return soq; }
/** * Rewrite a span-based OR query. * * @param q The query to rewrite * @return Rewritten version, or 'oq' unchanged if no changed needed. */ protected Query rewrite(final SpanOrQuery q) { // Rewrite each clause. Allow single clauses to be promoted. return rewriteClauses(q, q.getClauses(), true, new SpanClauseJoiner() { public SpanQuery join(SpanQuery[] clauses) { return new SpanOrQuery(clauses); } }); }
/** * Gloms the term onto each clause within an OR query. * * @param oq Query to glom into * @param term Term to glom on * @param before true to prepend the term, false to append. * @return A new glommed query. */ protected SpanQuery glomInside(SpanOrQuery oq, SpanTermQuery term, boolean before) { SpanQuery[] clauses = oq.getClauses(); boolean anyChanges = false; for (int i = 0; i < clauses.length; i++) { if (clauses[i] instanceof SpanTermQuery) { String ctText = extractTermText(clauses[i]); String newText = before ? (extractTermText(term) + "~" + ctText) : (ctText + "~" + extractTermText(term)); SpanQuery oldClause = clauses[i]; int termLength = isBigram(stopSet, newText) ? 2 : 1; clauses[i] = new SpanTermQuery(newTerm(term.getTerm().field(), newText), termLength); copyBoost(oldClause, term, clauses[i]); anyChanges = true; } else if (clauses[i] instanceof SpanOrQuery) { SpanQuery newq = glomInside((SpanOrQuery)clauses[i], term, before); if (newq != oq) { clauses[i] = newq; anyChanges = true; } } else assert false : "case not handled"; } // for i // No changes? Return the unaltered original query. if (!anyChanges) return oq; // All done! return (SpanQuery)copyBoost(oq, new SpanOrQuery(clauses)); }
@Override public SpanQuery getSpanQuery(Element e) throws ParserException { String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName"); String value = DOMUtils.getNonBlankTextOrFail(e); List<SpanQuery> clausesList = new ArrayList<SpanQuery>(); TokenStream ts = null; try { ts = analyzer.tokenStream(fieldName, value); TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); BytesRef bytes = termAtt.getBytesRef(); ts.reset(); while (ts.incrementToken()) { termAtt.fillBytesRef(); SpanTermQuery stq = new SpanTermQuery(new Term(fieldName, BytesRef.deepCopyOf(bytes))); clausesList.add(stq); } ts.end(); SpanOrQuery soq = new SpanOrQuery(clausesList.toArray(new SpanQuery[clausesList.size()])); soq.setBoost(DOMUtils.getAttribute(e, "boost", 1.0f)); return soq; } catch (IOException ioe) { throw new ParserException("IOException parsing value:" + value); } finally { IOUtils.closeWhileHandlingException(ts); } }