@Override public Query rewrite(IndexReader reader) throws IOException { Query rewritten = super.rewrite(reader); if (rewritten != this) { return rewritten; } IndexReaderContext context = reader.getContext(); TermContext[] ctx = new TermContext[terms.length]; int[] docFreqs = new int[ctx.length]; for (int i = 0; i < terms.length; i++) { ctx[i] = TermContext.build(context, terms[i]); docFreqs[i] = ctx[i].docFreq(); } final int maxDoc = reader.maxDoc(); blend(ctx, maxDoc, reader); return topLevelQuery(terms, ctx, docFreqs, maxDoc); }
private TermContext adjustTTF(IndexReaderContext readerContext, TermContext termContext, long sumTTF) { assert termContext.wasBuiltFor(readerContext); if (sumTTF == -1 && termContext.totalTermFreq() == -1) { return termContext; } TermContext newTermContext = new TermContext(readerContext); List<LeafReaderContext> leaves = readerContext.leaves(); final int len; if (leaves == null) { len = 1; } else { len = leaves.size(); } int df = termContext.docFreq(); long ttf = sumTTF; for (int i = 0; i < len; i++) { TermState termState = termContext.get(i); if (termState == null) { continue; } newTermContext.register(termState, i, df, ttf); df = 0; ttf = 0; } return newTermContext; }
protected SeqSpanWeight(SeqSpanQuery query, IndexSearcher searcher) throws IOException { super(query); this.selfQuery = query; this.similarity = searcher.getSimilarity(needsScores); this.positions = selfQuery.getPositions(); this.terms = selfQuery.getTerms(); this.field = terms[0].field(); if (positions.length < 2) { throw new IllegalStateException("PhraseWeight does not support less than 2 terms, call rewrite first"); } else if (positions[0] != 0) { throw new IllegalStateException("PhraseWeight requires that the first position is 0, call rewrite first"); } final IndexReaderContext context = searcher.getTopReaderContext(); states = new TermContext[terms.length]; TermStatistics termStats[] = new TermStatistics[terms.length]; for (int i = 0; i < terms.length; i++) { final Term term = terms[i]; states[i] = TermContext.build(context, term); termStats[i] = searcher.termStatistics(term, states[i]); } stats = similarity.computeWeight(searcher.collectionStatistics(terms[0].field()), termStats); }
@Override public Weight createWeight(IndexSearcher searcher) throws IOException { final IndexReaderContext context = searcher.getTopReaderContext(); final TermContext termState; if (perReaderTermState == null || perReaderTermState.topReaderContext != context) { // make TermQuery single-pass if we don't have a PRTS or if the context differs! termState = TermContext.build(context, term); } else { // PRTS was pre-build for this IS termState = this.perReaderTermState; } // we must not ignore the given docFreq - if set use the given value (lie) if (docFreq != -1) termState.setDocFreq(docFreq); return new TermWeight(searcher, termState); }
/** * Create a TopTermsSpanBooleanQueryRewrite for * at most <code>size</code> terms. */ public TopTermsSpanBooleanQueryRewrite(int size) { delegate = new TopTermsRewrite<SpanOrQuery>(size) { @Override protected int getMaxSize() { return Integer.MAX_VALUE; } @Override protected SpanOrQuery getTopLevelQuery() { return new SpanOrQuery(); } @Override protected void addClause(SpanOrQuery topLevel, Term term, int docFreq, float boost, TermContext states) { final SpanTermQuery q = new SpanTermQuery(term); q.setBoost(boost); topLevel.addClause(q); } }; }
public NearSpansOrdered(SpanNearQuery spanNearQuery, AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts, boolean collectPayloads) throws IOException { if (spanNearQuery.getClauses().length < 2) { throw new IllegalArgumentException("Less than 2 clauses: " + spanNearQuery); } this.collectPayloads = collectPayloads; allowedSlop = spanNearQuery.getSlop(); SpanQuery[] clauses = spanNearQuery.getClauses(); subSpans = new Spans[clauses.length]; matchPayload = new LinkedList<>(); subSpansByDoc = new Spans[clauses.length]; for (int i = 0; i < clauses.length; i++) { subSpans[i] = clauses[i].getSpans(context, acceptDocs, termContexts); subSpansByDoc[i] = subSpans[i]; // used in toSameDoc() } query = spanNearQuery; // kept for toString() only. }
public SpanWeight(SpanQuery query, IndexSearcher searcher) throws IOException { this.similarity = searcher.getSimilarity(); this.query = query; termContexts = new HashMap<>(); TreeSet<Term> terms = new TreeSet<>(); query.extractTerms(terms); final IndexReaderContext context = searcher.getTopReaderContext(); final TermStatistics termStats[] = new TermStatistics[terms.size()]; int i = 0; for (Term term : terms) { TermContext state = TermContext.build(context, term); termStats[i] = searcher.termStatistics(term, state); termContexts.put(term, state); i++; } final String field = query.getField(); if (field != null) { stats = similarity.computeWeight(query.getBoost(), searcher.collectionStatistics(query.getField()), termStats); } }
public MultiPhraseWeight(IndexSearcher searcher) throws IOException { this.similarity = searcher.getSimilarity(); final IndexReaderContext context = searcher.getTopReaderContext(); // compute idf ArrayList<TermStatistics> allTermStats = new ArrayList<>(); for(final Term[] terms: termArrays) { for (Term term: terms) { TermContext termContext = termContexts.get(term); if (termContext == null) { termContext = TermContext.build(context, term); termContexts.put(term, termContext); } allTermStats.add(searcher.termStatistics(term, termContext)); } } stats = similarity.computeWeight(getBoost(), searcher.collectionStatistics(field), allTermStats.toArray(new TermStatistics[allTermStats.size()])); }
public UnionDocsAndPositionsEnum(Bits liveDocs, AtomicReaderContext context, Term[] terms, Map<Term,TermContext> termContexts, TermsEnum termsEnum) throws IOException { List<DocsAndPositionsEnum> docsEnums = new LinkedList<>(); for (int i = 0; i < terms.length; i++) { final Term term = terms[i]; TermState termState = termContexts.get(term).get(context.ord); if (termState == null) { // Term doesn't exist in reader continue; } termsEnum.seekExact(term.bytes(), termState); DocsAndPositionsEnum postings = termsEnum.docsAndPositions(liveDocs, null, DocsEnum.FLAG_NONE); if (postings == null) { // term does exist, but has no positions throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")"); } cost += postings.cost(); docsEnums.add(postings); } _queue = new DocsQueue(docsEnums); _posList = new IntQueue(); }
private void getPayloads(Collection<byte []> payloads, SpanQuery query) throws IOException { Map<Term,TermContext> termContexts = new HashMap<>(); TreeSet<Term> terms = new TreeSet<>(); query.extractTerms(terms); for (Term term : terms) { termContexts.put(term, TermContext.build(context, term)); } for (AtomicReaderContext atomicReaderContext : context.leaves()) { final Spans spans = query.getSpans(atomicReaderContext, atomicReaderContext.reader().getLiveDocs(), termContexts); while (spans.next() == true) { if (spans.isPayloadAvailable()) { Collection<byte[]> payload = spans.getPayload(); for (byte [] bytes : payload) { payloads.add(bytes); } } } } }
@Override public Query rewrite(IndexReader reader) throws IOException { IndexReaderContext context = reader.getContext(); TermContext[] ctx = new TermContext[terms.length]; int[] docFreqs = new int[ctx.length]; for (int i = 0; i < terms.length; i++) { ctx[i] = TermContext.build(context, terms[i]); docFreqs[i] = ctx[i].docFreq(); } final int maxDoc = reader.maxDoc(); blend(ctx, maxDoc, reader); Query query = topLevelQuery(terms, ctx, docFreqs, maxDoc); query.setBoost(getBoost()); return query; }
private TermContext adjustTTF(TermContext termContext, long sumTTF) { if (sumTTF == -1 && termContext.totalTermFreq() == -1) { return termContext; } TermContext newTermContext = new TermContext(termContext.topReaderContext); List<LeafReaderContext> leaves = termContext.topReaderContext.leaves(); final int len; if (leaves == null) { len = 1; } else { len = leaves.size(); } int df = termContext.docFreq(); long ttf = sumTTF; for (int i = 0; i < len; i++) { TermState termState = termContext.get(i); if (termState == null) { continue; } newTermContext.register(termState, i, df, ttf); df = 0; ttf = 0; } return newTermContext; }
@Override public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { IndexReaderContext context = searcher.getTopReaderContext(); TermContext mainTermState = null; TermContext[] similarStates = new TermContext[similarTerms.length]; if (needsScores) { // // get the term contexts, for the main term + for each similar term // mainTermState = TermContext.build(context, mainTerm); for (int i = 0; i < similarTerms.length; i++) { similarStates[i] = TermContext.build(context, similarTerms[i].term); } } // else: do not compute the term states, this will help save seeks in the terms // dict on segments that have a cache entry for this query return new AugmentedTermWeight(searcher, needsScores, mainTermState, similarStates); }
@Override public Weight createWeight(final IndexSearcher searcher) throws IOException { final IndexReaderContext context = searcher.getTopReaderContext(); final TermContext termState; if ((perReaderTermS == null) || (perReaderTermS.topReaderContext != context)) { // make TermQuery single-pass if we don't have a PRTS or if the // context differs! termState = TermContext.build(context, term); } else { // PRTS was pre-build for this IS termState = perReaderTermS; } // we must not ignore the given docFreq - if set use the given value // (lie) if (docFreq != -1) { termState.setDocFreq(docFreq); } return new TermWeight(searcher, termState); }
@Override public Weight createWeight(final IndexSearcher searcher) throws IOException { final IndexReaderContext context = searcher.getTopReaderContext(); final TermContext termState; if ((perReaderTermS == null) || (perReaderTermS.topReaderContext != context)) { // make TermQuery single-pass if we don't have a PRTS or if the // context differs! termState = TermContext.build(context, term); // cache term // lookups! } else { // PRTS was pre-build for this IS termState = perReaderTermS; } // we must not ignore the given docFreq - if set use the given value // (lie) if (docFreq != -1) { termState.setDocFreq(docFreq); } return new TermWeight(searcher, termState); }
/** * Constructs ElementDistanceExclusionSpans from the specified * {@link SpanDistanceQuery}. * * @param query * a SpanDistanceQuery * @param context * @param acceptDocs * @param termContexts * @throws IOException */ public ElementDistanceExclusionSpans (SpanDistanceQuery query, LeafReaderContext context, Bits acceptDocs, Map<Term, TermContext> termContexts) throws IOException { super(query, context, acceptDocs, termContexts); elements = query.getElementQuery().getSpans(context, acceptDocs, termContexts); hasMoreElements = elements.next(); hasMoreSpans = firstSpans.next() && hasMoreElements; hasMoreSecondSpans = secondSpans.next(); elementPosition = 0; this.isOrdered = query.isOrdered(); candidateList = new ArrayList<CandidateSpan>(); targetList = new ArrayList<CandidateSpan>(); currentDocNum = firstSpans.doc(); minDistance = query.getMinDistance(); maxDistance = query.getMaxDistance(); }
public SimpleSpans (SimpleSpanQuery simpleSpanQuery, LeafReaderContext context, Bits acceptDocs, Map<Term, TermContext> termContexts) throws IOException { this(); query = simpleSpanQuery; collectPayloads = query.isCollectPayloads(); // Get the enumeration of the two spans to match SpanQuery sq; if ((sq = simpleSpanQuery.getFirstClause()) != null) firstSpans = sq.getSpans(context, acceptDocs, termContexts); if ((sq = simpleSpanQuery.getSecondClause()) != null) secondSpans = sq.getSpans(context, acceptDocs, termContexts); }
/** * Constructs SubSpans for the given {@link SpanSubspanQuery} * specifiying the start offset and the length of the subspans. * * @param subspanQuery * a SpanSubspanQuery * @param context * @param acceptDocs * @param termContexts * @throws IOException */ public SubSpans (SpanSubspanQuery subspanQuery, LeafReaderContext context, Bits acceptDocs, Map<Term, TermContext> termContexts) throws IOException { super(subspanQuery, context, acceptDocs, termContexts); this.startOffset = subspanQuery.getStartOffset(); this.length = subspanQuery.getLength(); this.matchPayload = new ArrayList<byte[]>(6); this.windowSize = subspanQuery.getWindowSize(); candidates = new PriorityQueue<>(windowSize, comparator); if (DEBUG) { log.trace("Init SubSpan at {} with length {}", this.startOffset, this.length); }; hasMoreSpans = firstSpans.next(); }
/** * Construct a FocusSpan for the given {@link SpanQuery}. * * @param query * A {@link SpanQuery}. * @param context * The {@link LeafReaderContext}. * @param acceptDocs * Bit vector representing the documents * to be searched in. * @param termContexts * A map managing {@link TermState TermStates}. * @param number * The class number to focus on. * @throws IOException */ public FocusSpans (SpanFocusQuery query, LeafReaderContext context, Bits acceptDocs, Map<Term, TermContext> termContexts) throws IOException { super(query, context, acceptDocs, termContexts); if (query.getClassNumbers() == null) { throw new IllegalArgumentException( "At least one class number must be specified."); } classNumbers = query.getClassNumbers(); windowSize = query.getWindowSize(); isSorted = query.isSorted(); matchTemporaryClass = query.matchTemporaryClass(); removeTemporaryClasses = query.removeTemporaryClasses(); candidates = new PriorityQueue<>(windowSize, comparator); hasMoreSpans = firstSpans.next(); currentDoc = firstSpans.doc(); this.query = query; }
/** * Constructs OrderedDistanceSpans based on the given * SpanDistanceQuery. * * @param query * a SpanDistanceQuery * @param context * @param acceptDocs * @param termContexts * @throws IOException */ public OrderedDistanceSpans (SpanDistanceQuery query, LeafReaderContext context, Bits acceptDocs, Map<Term, TermContext> termContexts) throws IOException { super(query, context, acceptDocs, termContexts); minDistance = query.getMinDistance(); maxDistance = query.getMaxDistance(); hasMoreFirstSpans = firstSpans.next(); candidateList = new ArrayList<>(); candidateListIndex = -1; if (hasMoreFirstSpans){ candidateListDocNum = firstSpans.doc(); } }
/** * Constructs RelationSpans from the given * {@link SpanRelationQuery}. * * @param relationSpanQuery * a SpanRelationQuery * @param context * @param acceptDocs * @param termContexts * @throws IOException */ public RelationSpans (SpanRelationQuery relationSpanQuery, LeafReaderContext context, Bits acceptDocs, Map<Term, TermContext> termContexts) throws IOException { super(relationSpanQuery, context, acceptDocs, termContexts); direction = relationSpanQuery.getDirection(); tempSourceNum = relationSpanQuery.getTempSourceNum(); tempTargetNum = relationSpanQuery.getTempTargetNum(); sourceClass = relationSpanQuery.getSourceClass(); targetClass = relationSpanQuery.getTargetClass(); candidateList = new ArrayList<>(); relationTermSpan = firstSpans; hasMoreSpans = relationTermSpan.next(); }
/** * Constructs SpansWithAttribute from the given * {@link SpanWithAttributeQuery} and {@link SpansWithId}, such as * elementSpans and relationSpans. * * @param spanWithAttributeQuery * a spanWithAttributeQuery * @param spansWithId * a SpansWithId * @param context * @param acceptDocs * @param termContexts * @throws IOException */ public SpansWithAttribute (SpanWithAttributeQuery spanWithAttributeQuery, SimpleSpans referentSpans, LeafReaderContext context, Bits acceptDocs, Map<Term, TermContext> termContexts) throws IOException { super(spanWithAttributeQuery, context, acceptDocs, termContexts); // if (!referentSpans.hasSpanId) { // throw new // IllegalArgumentException("Referent spans must have ids."); // } this.referentSpans = referentSpans; hasMoreSpans = referentSpans.next(); setAttributeList(spanWithAttributeQuery, context, acceptDocs, termContexts); }
public SpansWithAttribute (SpanWithAttributeQuery spanWithAttributeQuery, LeafReaderContext context, Bits acceptDocs, Map<Term, TermContext> termContexts) throws IOException { super(spanWithAttributeQuery, context, acceptDocs, termContexts); hasMoreSpans = true; setAttributeList(spanWithAttributeQuery, context, acceptDocs, termContexts); if (attributeList.size() == 0) { throw new IllegalArgumentException( "No (positive) attribute is defined."); } else if (attributeList.size() > 1) { referentSpans = attributeList.get(0); attributeList.remove(0); } }
/** * Adds the given {@link SpanAttributeQuery} to the attributeList * or * notAttributeList depending on the query, whether it is a * negation or not. * * @param sq * a SpanAttributeQuery * @param context * @param acceptDocs * @param termContexts * @throws IOException */ private void addAttributes (SpanAttributeQuery sq, LeafReaderContext context, Bits acceptDocs, Map<Term, TermContext> termContexts) throws IOException { AttributeSpans as = (AttributeSpans) sq.getSpans(context, acceptDocs, termContexts); if (sq.isNegation()) { notAttributeList.add(as); as.next(); } else { attributeList.add(as); hasMoreSpans &= as.next(); } }
/** * Construct a new ClassSpans object. * * @param operand * An arbitrary nested {@link SpanQuery}. * @param context * The {@link LeafReaderContext}. * @param acceptDocs * Bit vector representing the documents * to be searched in. * @param termContexts * A map managing {@link TermState TermStates}. * @param number * The identifying class number. */ public ClassSpans (SpanQuery operand, LeafReaderContext context, Bits acceptDocs, Map<Term, TermContext> termContexts, byte number) throws IOException { spans = operand.getSpans(context, acceptDocs, termContexts); // The number of the class this.number = number; // The current operand this.operand = operand; // The highlighted payload this.classedPayload = new ArrayList<byte[]>(3); }
/** * Constructs UnorderedDistanceSpans for the given * {@link SpanDistanceQuery} . * * @param query * a SpanDistanceQuery * @param context * @param acceptDocs * @param termContexts * @throws IOException */ public UnorderedDistanceSpans (SpanDistanceQuery query, LeafReaderContext context, Bits acceptDocs, Map<Term, TermContext> termContexts) throws IOException { super(query, context, acceptDocs, termContexts); minDistance = query.getMinDistance(); maxDistance = query.getMaxDistance(); firstSpanList = new ArrayList<CandidateSpan>(); secondSpanList = new ArrayList<CandidateSpan>(); matchList = new ArrayList<CandidateSpan>(); hasMoreFirstSpans = firstSpans.next(); hasMoreSecondSpans = secondSpans.next(); hasMoreSpans = hasMoreFirstSpans && hasMoreSecondSpans; }
@Override public Query rewrite(IndexReader reader) throws IOException { if (this.terms.isEmpty()) { return new BooleanQuery(); } else if (this.terms.size() == 1) { final Query tq = newTermQuery(this.terms.get(0), null); tq.setBoost(getBoost()); return tq; } final List<AtomicReaderContext> leaves = reader.leaves(); final int maxDoc = reader.maxDoc(); final TermContext[] contextArray = new TermContext[terms.size()]; final Term[] queryTerms = this.terms.toArray(new Term[0]); collectTermContext(reader, leaves, contextArray, queryTerms); return buildQuery(maxDoc, contextArray, queryTerms); }
public TermAutomatonWeight(Automaton automaton, IndexSearcher searcher, Map<Integer,TermContext> termStates) throws IOException { this.automaton = automaton; this.searcher = searcher; this.termStates = termStates; this.similarity = searcher.getSimilarity(); List<TermStatistics> allTermStats = new ArrayList<>(); for(Map.Entry<Integer,BytesRef> ent : idToTerm.entrySet()) { Integer termID = ent.getKey(); if (ent.getValue() != null) { allTermStats.add(searcher.termStatistics(new Term(field, ent.getValue()), termStates.get(termID))); } } stats = similarity.computeWeight(getBoost(), searcher.collectionStatistics(field), allTermStats.toArray(new TermStatistics[allTermStats.size()])); }
Map<Term,TermStatistics> getNodeTermStats(Set<Term> terms, int nodeID, long version) throws IOException { final NodeState node = nodes[nodeID]; final Map<Term,TermStatistics> stats = new HashMap<>(); final IndexSearcher s = node.searchers.acquire(version); if (s == null) { throw new SearcherExpiredException("node=" + nodeID + " version=" + version); } try { for(Term term : terms) { final TermContext termContext = TermContext.build(s.getIndexReader().getContext(), term); stats.put(term, s.termStatistics(term, termContext)); } } finally { node.searchers.release(s); } return stats; }
@Override public Weight createWeight(IndexSearcher searcher) throws IOException { final IndexReaderContext context = searcher.getTopReaderContext(); final TermContext termState; if (perReaderTermState == null || perReaderTermState.topReaderContext != context) { // make TermQuery single-pass if we don't have a PRTS or if the // context differs! termState = TermContext.build(context, term, true); // cache term // lookups! } else { // PRTS was pre-build for this IS termState = this.perReaderTermState; } // we must not ignore the given docFreq - if set use the given value // (lie) if (docFreq != -1) termState.setDocFreq(docFreq); return new TermWeight(searcher, termState); }