@Override protected QueryNode postProcessNode(QueryNode node) { // set setMultiTermRewriteMethod for WildcardQueryNode and // PrefixWildcardQueryNode if (node instanceof WildcardQueryNode || node instanceof AbstractRangeQueryNode || node instanceof RegexpQueryNode) { MultiTermQuery.RewriteMethod rewriteMethod = getQueryConfigHandler().get(ConfigurationKeys.MULTI_TERM_REWRITE_METHOD); if (rewriteMethod == null) { // This should not happen, this configuration is set in the // StandardQueryConfigHandler throw new IllegalArgumentException( "StandardQueryConfigHandler.ConfigurationKeys.MULTI_TERM_REWRITE_METHOD should be set on the QueryConfigHandler"); } // use a TAG to take the value to the Builder node.setTag(MultiTermRewriteMethodProcessor.TAG_ID, rewriteMethod); } return node; }
public StandardQueryConfigHandler() { // Add listener that will build the FieldConfig. addFieldConfigListener(new FieldBoostMapFCListener(this)); addFieldConfigListener(new FieldDateResolutionFCListener(this)); addFieldConfigListener(new NumericFieldConfigListener(this)); // Default Values set(ConfigurationKeys.ALLOW_LEADING_WILDCARD, false); // default in 2.9 set(ConfigurationKeys.ANALYZER, null); //default value 2.4 set(ConfigurationKeys.DEFAULT_OPERATOR, Operator.OR); set(ConfigurationKeys.PHRASE_SLOP, 0); //default value 2.4 set(ConfigurationKeys.LOWERCASE_EXPANDED_TERMS, true); //default value 2.4 set(ConfigurationKeys.ENABLE_POSITION_INCREMENTS, false); //default value 2.4 set(ConfigurationKeys.FIELD_BOOST_MAP, new LinkedHashMap<String, Float>()); set(ConfigurationKeys.FUZZY_CONFIG, new FuzzyConfig()); set(ConfigurationKeys.LOCALE, Locale.getDefault()); set(ConfigurationKeys.MULTI_TERM_REWRITE_METHOD, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT); set(ConfigurationKeys.FIELD_DATE_RESOLUTION_MAP, new HashMap<CharSequence, DateTools.Resolution>()); }
@Override public Query parse(QueryParseContext parseContext) throws IOException, QueryParsingException { XContentParser parser = parseContext.parser(); Token token = parser.nextToken(); if (!MATCH_NAME.equals(parser.currentName()) || token != XContentParser.Token.FIELD_NAME) { throw new QueryParsingException(parseContext, "spanMultiTerm must have [" + MATCH_NAME + "] multi term query clause"); } token = parser.nextToken(); if (token != XContentParser.Token.START_OBJECT) { throw new QueryParsingException(parseContext, "spanMultiTerm must have [" + MATCH_NAME + "] multi term query clause"); } Query subQuery = parseContext.parseInnerQuery(); if (!(subQuery instanceof MultiTermQuery)) { throw new QueryParsingException(parseContext, "spanMultiTerm [" + MATCH_NAME + "] must be of type multi term query"); } parser.nextToken(); return new SpanMultiTermQueryWrapper<>((MultiTermQuery) subQuery); }
private Optional<Query> limitingFilter(Query query, boolean isNegated) { if (query instanceof SpanQuery) { return limitingFilterForSpan((SpanQuery) query, isNegated); } else if (query instanceof Filter) { return Optional.of(query); } else if (query instanceof BooleanQuery) { return boolQuery((BooleanQuery) query, isNegated); } else if (query instanceof TermQuery) { return Optional.of(query); } else if (query instanceof PhraseQuery) { return phraseFilter((PhraseQuery) query, isNegated); } else if (query instanceof MultiTermQuery) { return Optional.of(query); } else if (query instanceof WildcardPhraseQuery) { return wildcardPhraseFilter((WildcardPhraseQuery) query, isNegated); } else if (query instanceof ToParentBlockJoinQuery) { //This can be really bad for performance, if the nested query contains expensive operations (phrases/spans) //On the other hand, it is only slow if the field actually has any data, and we currently do not have // any data in the only nested text field (enrichments.sentences) return Optional.of(query); } else { //This should never happen, but if it does, it might be really bad for performance //logger.warn("failed to limit query, this should never happen. Query : [{}]", query.toString()); return Optional.of(query); } }
void saveTerms( Collection<Query> flatQueries, IndexReader reader ) throws IOException{ for( Query query : flatQueries ){ Set<String> termSet = getTermSet( query ); if( query instanceof TermQuery ) termSet.add( ((TermQuery)query).getTerm().text() ); else if( query instanceof PhraseQuery ){ for( Term term : ((PhraseQuery)query).getTerms() ) termSet.add( term.text() ); } else if (query instanceof MultiTermQuery && reader != null) { BooleanQuery mtqTerms = (BooleanQuery) query.rewrite(reader); for (BooleanClause clause : mtqTerms.getClauses()) { termSet.add (((TermQuery) clause.getQuery()).getTerm().text()); } } else throw new RuntimeException( "query \"" + query.toString() + "\" must be flatten first." ); } }
/** * Create a new SlowFuzzyQuery that will match terms with a similarity * of at least <code>minimumSimilarity</code> to <code>term</code>. * If a <code>prefixLength</code> > 0 is specified, a common prefix * of that length is also required. * * @param term the term to search for * @param minimumSimilarity a value between 0 and 1 to set the required similarity * between the query term and the matching terms. For example, for a * <code>minimumSimilarity</code> of <code>0.5</code> a term of the same length * as the query term is considered similar to the query term if the edit distance * between both terms is less than <code>length(term)*0.5</code> * <p> * Alternatively, if <code>minimumSimilarity</code> is >= 1f, it is interpreted * as a pure Levenshtein edit distance. For example, a value of <code>2f</code> * will match all terms within an edit distance of <code>2</code> from the * query term. Edit distances specified in this way may not be fractional. * * @param prefixLength length of common (non-fuzzy) prefix * @param maxExpansions the maximum number of terms to match. If this number is * greater than {@link BooleanQuery#getMaxClauseCount} when the query is rewritten, * then the maxClauseCount will be used instead. * @throws IllegalArgumentException if minimumSimilarity is >= 1 or < 0 * or if prefixLength < 0 */ public SlowFuzzyQuery(Term term, float minimumSimilarity, int prefixLength, int maxExpansions) { super(term.field()); this.term = term; if (minimumSimilarity >= 1.0f && minimumSimilarity != (int)minimumSimilarity) throw new IllegalArgumentException("fractional edit distances are not allowed"); if (minimumSimilarity < 0.0f) throw new IllegalArgumentException("minimumSimilarity < 0"); if (prefixLength < 0) throw new IllegalArgumentException("prefixLength < 0"); if (maxExpansions < 0) throw new IllegalArgumentException("maxExpansions < 0"); setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(maxExpansions)); String text = term.text(); int len = text.codePointCount(0, text.length()); if (len > 0 && (minimumSimilarity >= 1f || len > 1.0f / (1.0f - minimumSimilarity))) { this.termLongEnough = true; } this.minimumSimilarity = minimumSimilarity; this.prefixLength = prefixLength; }
/** Test the TopTermsBoostOnlyBooleanQueryRewrite rewrite method. */ public void testBoostOnlyRewrite() throws Exception { Directory directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), directory); addDoc("Lucene", writer); addDoc("Lucene", writer); addDoc("Lucenne", writer); IndexReader reader = writer.getReader(); IndexSearcher searcher = newSearcher(reader); writer.close(); SlowFuzzyQuery query = new SlowFuzzyQuery(new Term("field", "lucene")); query.setRewriteMethod(new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(50)); ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; assertEquals(3, hits.length); // normally, 'Lucenne' would be the first result as IDF will skew the score. assertEquals("Lucene", reader.document(hits[0].doc).get("field")); assertEquals("Lucene", reader.document(hits[1].doc).get("field")); assertEquals("Lucenne", reader.document(hits[2].doc).get("field")); reader.close(); directory.close(); }
@Override public RegexpQuery build(QueryNode queryNode) throws QueryNodeException { RegexpQueryNode regexpNode = (RegexpQueryNode) queryNode; // TODO: make the maxStates configurable w/ a reasonable default (QueryParserBase uses 10000) RegexpQuery q = new RegexpQuery(new Term(regexpNode.getFieldAsString(), regexpNode.textToBytesRef())); MultiTermQuery.RewriteMethod method = (MultiTermQuery.RewriteMethod) queryNode .getTag(MultiTermRewriteMethodProcessor.TAG_ID); if (method != null) { q.setRewriteMethod(method); } return q; }
@Override protected Query newWildcardQuery(Term t) { if(t.text().equals("*")) { BooleanQuery bQuery = new BooleanQuery(); bQuery.add(createTermQuery(FIELD_FIELDS, t.field()), Occur.SHOULD); bQuery.add(createTermQuery(FIELD_PROPERTIES, t.field()), Occur.SHOULD); return bQuery; } else if (t.text().contains("\\")) { String regexp = SearchLanguageConversion.convert(SearchLanguageConversion.DEF_LUCENE, SearchLanguageConversion.DEF_REGEX, t.text()); return new RegexpQuery(new Term(t.field(), regexp)); } else { org.apache.lucene.search.WildcardQuery query = new org.apache.lucene.search.WildcardQuery(t); query.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(topTermSpanRewriteLimit)); return query; } }
public static MultiTermQuery.RewriteMethod parseRewriteMethod(@Nullable String rewriteMethod, @Nullable MultiTermQuery.RewriteMethod defaultRewriteMethod) { if (rewriteMethod == null) { return defaultRewriteMethod; } if (CONSTANT_SCORE.match(rewriteMethod)) { return MultiTermQuery.CONSTANT_SCORE_REWRITE; } if (SCORING_BOOLEAN.match(rewriteMethod)) { return MultiTermQuery.SCORING_BOOLEAN_REWRITE; } if (CONSTANT_SCORE_BOOLEAN.match(rewriteMethod)) { return MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE; } int firstDigit = -1; for (int i = 0; i < rewriteMethod.length(); ++i) { if (Character.isDigit(rewriteMethod.charAt(i))) { firstDigit = i; break; } } if (firstDigit >= 0) { final int size = Integer.parseInt(rewriteMethod.substring(firstDigit)); String rewriteMethodName = rewriteMethod.substring(0, firstDigit); if (TOP_TERMS.match(rewriteMethodName)) { return new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(size); } if (TOP_TERMS_BOOST.match(rewriteMethodName)) { return new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(size); } if (TOP_TERMS_BLENDED_FREQS.match(rewriteMethodName)) { return new MultiTermQuery.TopTermsBlendedFreqScoringRewrite(size); } } throw new IllegalArgumentException("Failed to parse rewrite_method [" + rewriteMethod + "]"); }
@Override public final Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, QueryShardContext context) { failIfNotIndexed(); PrefixQuery query = new PrefixQuery(new Term(name(), indexedValueForSearch(value))); if (method != null) { query.setRewriteMethod(method); } return query; }
@Override public final Query regexpQuery(String value, int flags, int maxDeterminizedStates, MultiTermQuery.RewriteMethod method, QueryShardContext context) { failIfNotIndexed(); RegexpQuery query = new RegexpQuery(new Term(name(), indexedValueForSearch(value)), flags, maxDeterminizedStates); if (method != null) { query.setRewriteMethod(method); } return query; }
public void testToQueryWildcarQuery() throws Exception { assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0); for (Operator op : Operator.values()) { BooleanClause.Occur defaultOp = op.toBooleanClauseOccur(); MapperQueryParser queryParser = new MapperQueryParser(createShardContext()); QueryParserSettings settings = new QueryParserSettings("first foo-bar-foobar* last"); settings.defaultField(STRING_FIELD_NAME); settings.fieldsAndWeights(Collections.emptyMap()); settings.analyzeWildcard(true); settings.fuzziness(Fuzziness.AUTO); settings.rewriteMethod(MultiTermQuery.CONSTANT_SCORE_REWRITE); settings.defaultOperator(op.toQueryParserOperator()); queryParser.reset(settings); Query query = queryParser.parse("first foo-bar-foobar* last"); Query expectedQuery = new BooleanQuery.Builder() .add(new BooleanClause(new TermQuery(new Term(STRING_FIELD_NAME, "first")), defaultOp)) .add(new BooleanQuery.Builder() .add(new BooleanClause(new TermQuery(new Term(STRING_FIELD_NAME, "foo")), defaultOp)) .add(new BooleanClause(new TermQuery(new Term(STRING_FIELD_NAME, "bar")), defaultOp)) .add(new BooleanClause(new PrefixQuery(new Term(STRING_FIELD_NAME, "foobar")), defaultOp)) .build(), defaultOp) .add(new BooleanClause(new TermQuery(new Term(STRING_FIELD_NAME, "last")), defaultOp)) .build(); assertThat(query, Matchers.equalTo(expectedQuery)); } }
public void testToQueryWilcardQueryWithSynonyms() throws Exception { assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0); for (Operator op : Operator.values()) { BooleanClause.Occur defaultOp = op.toBooleanClauseOccur(); MapperQueryParser queryParser = new MapperQueryParser(createShardContext()); QueryParserSettings settings = new QueryParserSettings("first foo-bar-foobar* last"); settings.defaultField(STRING_FIELD_NAME); settings.fieldsAndWeights(Collections.emptyMap()); settings.analyzeWildcard(true); settings.fuzziness(Fuzziness.AUTO); settings.rewriteMethod(MultiTermQuery.CONSTANT_SCORE_REWRITE); settings.defaultOperator(op.toQueryParserOperator()); settings.forceAnalyzer(new MockRepeatAnalyzer()); queryParser.reset(settings); Query query = queryParser.parse("first foo-bar-foobar* last"); Query expectedQuery = new BooleanQuery.Builder() .add(new BooleanClause(new SynonymQuery(new Term(STRING_FIELD_NAME, "first"), new Term(STRING_FIELD_NAME, "first")), defaultOp)) .add(new BooleanQuery.Builder() .add(new BooleanClause(new SynonymQuery(new Term(STRING_FIELD_NAME, "foo"), new Term(STRING_FIELD_NAME, "foo")), defaultOp)) .add(new BooleanClause(new SynonymQuery(new Term(STRING_FIELD_NAME, "bar"), new Term(STRING_FIELD_NAME, "bar")), defaultOp)) .add(new BooleanQuery.Builder() .add(new BooleanClause(new PrefixQuery(new Term(STRING_FIELD_NAME, "foobar")), BooleanClause.Occur.SHOULD)) .add(new BooleanClause(new PrefixQuery(new Term(STRING_FIELD_NAME, "foobar")), BooleanClause.Occur.SHOULD)) .setDisableCoord(true) .build(), defaultOp) .build(), defaultOp) .add(new BooleanClause(new SynonymQuery(new Term(STRING_FIELD_NAME, "last"), new Term(STRING_FIELD_NAME, "last")), defaultOp)) .build(); assertThat(query, Matchers.equalTo(expectedQuery)); } }
/** * Converts a FreeTextDateQuery to a BooleanClause */ private BooleanClause convertDate(FreeTextDateQuery query) { TermRangeQuery termQuery = new TermRangeQuery(query.getField(), convertDate(query.getStart(), query), convertDate(query.getEnd(), query), query.isIncludeStart(), query.isIncludeEnd()); termQuery.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); return new BooleanClause(termQuery, Occur.SHOULD); }
/** * Create a new SpanMultiTermQueryWrapper. * * @param query Query to wrap. * <p> * NOTE: This will call {@link MultiTermQuery#setRewriteMethod(MultiTermQuery.RewriteMethod)} * on the wrapped <code>query</code>, changing its rewrite method to a suitable one for spans. * Be sure to not change the rewrite method on the wrapped query afterwards! Doing so will * throw {@link UnsupportedOperationException} on rewriting this query! */ @SuppressWarnings({"rawtypes","unchecked"}) public SpanMultiTermQueryWrapper(Q query) { this.query = query; MultiTermQuery.RewriteMethod method = query.getRewriteMethod(); if (method instanceof TopTermsRewrite) { final int pqsize = ((TopTermsRewrite) method).getSize(); setRewriteMethod(new TopTermsSpanBooleanQueryRewrite(pqsize)); } else { setRewriteMethod(SCORING_SPAN_QUERY_REWRITE); } }
/** * Expert: returns the rewriteMethod */ public final SpanRewriteMethod getRewriteMethod() { final MultiTermQuery.RewriteMethod m = query.getRewriteMethod(); if (!(m instanceof SpanRewriteMethod)) throw new UnsupportedOperationException("You can only use SpanMultiTermQueryWrapper with a suitable SpanRewriteMethod."); return (SpanRewriteMethod) m; }
@Override public TermRangeQuery build(QueryNode queryNode) throws QueryNodeException { TermRangeQueryNode rangeNode = (TermRangeQueryNode) queryNode; FieldQueryNode upper = rangeNode.getUpperBound(); FieldQueryNode lower = rangeNode.getLowerBound(); String field = StringUtils.toString(rangeNode.getField()); String lowerText = lower.getTextAsString(); String upperText = upper.getTextAsString(); if (lowerText.length() == 0) { lowerText = null; } if (upperText.length() == 0) { upperText = null; } TermRangeQuery rangeQuery = TermRangeQuery.newStringRange(field, lowerText, upperText, rangeNode .isLowerInclusive(), rangeNode.isUpperInclusive()); MultiTermQuery.RewriteMethod method = (MultiTermQuery.RewriteMethod) queryNode .getTag(MultiTermRewriteMethodProcessor.TAG_ID); if (method != null) { rangeQuery.setRewriteMethod(method); } return rangeQuery; }
@Override public WildcardQuery build(QueryNode queryNode) throws QueryNodeException { WildcardQueryNode wildcardNode = (WildcardQueryNode) queryNode; WildcardQuery q = new WildcardQuery(new Term(wildcardNode.getFieldAsString(), wildcardNode.getTextAsString())); MultiTermQuery.RewriteMethod method = (MultiTermQuery.RewriteMethod)queryNode.getTag(MultiTermRewriteMethodProcessor.TAG_ID); if (method != null) { q.setRewriteMethod(method); } return q; }
@Override public PrefixQuery build(QueryNode queryNode) throws QueryNodeException { PrefixWildcardQueryNode wildcardNode = (PrefixWildcardQueryNode) queryNode; String text = wildcardNode.getText().subSequence(0, wildcardNode.getText().length() - 1).toString(); PrefixQuery q = new PrefixQuery(new Term(wildcardNode.getFieldAsString(), text)); MultiTermQuery.RewriteMethod method = (MultiTermQuery.RewriteMethod)queryNode.getTag(MultiTermRewriteMethodProcessor.TAG_ID); if (method != null) { q.setRewriteMethod(method); } return q; }
@Override public RegexpQuery build(QueryNode queryNode) throws QueryNodeException { RegexpQueryNode regexpNode = (RegexpQueryNode) queryNode; RegexpQuery q = new RegexpQuery(new Term(regexpNode.getFieldAsString(), regexpNode.textToBytesRef())); MultiTermQuery.RewriteMethod method = (MultiTermQuery.RewriteMethod) queryNode .getTag(MultiTermRewriteMethodProcessor.TAG_ID); if (method != null) { q.setRewriteMethod(method); } return q; }
@Override protected Query newRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) { if (isPass2ResolvingPhrases) { // Must use old-style RangeQuery in order to produce a BooleanQuery // that can be turned into SpanOr clause TermRangeQuery rangeQuery = TermRangeQuery.newStringRange(field, part1, part2, startInclusive, endInclusive); rangeQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); return rangeQuery; } return super.newRangeQuery(field, part1, part2, startInclusive, endInclusive); }