public void getPayloads(Spans[] subSpans) throws IOException { for (int i = 0; i < subSpans.length; i++) { if (subSpans[i] instanceof NearSpansOrdered) { if (((NearSpansOrdered) subSpans[i]).isPayloadAvailable()) { processPayloads(((NearSpansOrdered) subSpans[i]).getPayload(), subSpans[i].start(), subSpans[i].end()); } getPayloads(((NearSpansOrdered) subSpans[i]).getSubSpans()); } else if (subSpans[i] instanceof NearSpansUnordered) { if (((NearSpansUnordered) subSpans[i]).isPayloadAvailable()) { processPayloads(((NearSpansUnordered) subSpans[i]).getPayload(), subSpans[i].start(), subSpans[i].end()); } getPayloads(((NearSpansUnordered) subSpans[i]).getSubSpans()); } } }
@Override protected boolean setFreqCurrentDoc() throws IOException { if (!more) { return false; } doc = spans.doc(); freq = 0.0f; payloadScore = 0; payloadsSeen = 0; do { int matchLength = spans.end() - spans.start(); freq += docScorer.computeSlopFactor(matchLength); Spans[] spansArr = new Spans[1]; spansArr[0] = spans; getPayloads(spansArr); more = spans.next(); } while (more && (doc == spans.doc())); return true; }
private void getPayloads(Collection<byte []> payloads, SpanQuery query) throws IOException { Map<Term,TermContext> termContexts = new HashMap<>(); TreeSet<Term> terms = new TreeSet<>(); query.extractTerms(terms); for (Term term : terms) { termContexts.put(term, TermContext.build(context, term)); } for (AtomicReaderContext atomicReaderContext : context.leaves()) { final Spans spans = query.getSpans(atomicReaderContext, atomicReaderContext.reader().getLiveDocs(), termContexts); while (spans.next() == true) { if (spans.isPayloadAvailable()) { Collection<byte[]> payload = spans.getPayload(); for (byte [] bytes : payload) { payloads.add(bytes); } } } } }
/** * If the current x and y are not in the same document, to skip * the * span with the smaller document number, to the same OR a greater * document number than, the document number of the other span. Do * this until the x and the y are in the same doc, OR until the * last * document. * * @return true iff such a document exists. */ protected boolean ensureSameDoc (Spans x, Spans y) throws IOException { while (x.doc() != y.doc()) { if (x.doc() < y.doc()) { if (!x.skipTo(y.doc())) { hasMoreSpans = false; return false; } } else { if (!y.skipTo(x.doc())) { hasMoreSpans = false; return false; } } } return true; }
/** * Constructs a CandidateSpan for the given Span. * * @param span * a Span * @throws IOException */ public CandidateSpan (Spans span) throws IOException { this.doc = span.doc(); this.start = span.start(); this.end = span.end(); this.cost = span.cost(); this.payloads = new ArrayList<>(); if (span.isPayloadAvailable()) { setPayloads(span.getPayload()); } if (span instanceof SimpleSpans) { SimpleSpans temp = (SimpleSpans) span; this.spanId = temp.getSpanId(); this.hasSpanId = temp.hasSpanId; } else if (span instanceof ClassSpans) { this.spanId = ((ClassSpans) span).getNumber(); this.hasSpanId = true; } }
/** * Advances the element spans until encountering the given span. * * @param span * a span * @return <code>true</code> if such an element is found, * <code>false</code> * if the span is not in an element. * @throws IOException */ private boolean advanceElementTo (Spans span) throws IOException { while (hasMoreElements && elements.doc() == currentDocNum && elements.start() < span.end()) { if (span.start() >= elements.start() && span.end() <= elements.end()) { return true; } elementList.add(new CandidateSpan(elements, elementPosition)); hasMoreElements = elements.next(); elementPosition++; } return false; // invalid }
@Override protected boolean setCandidateList (List<CandidateSpan> candidateList, Spans candidate, boolean hasMoreCandidates, List<CandidateSpan> targetList) throws IOException { if (!targetList.isEmpty()) { CandidateSpan cs; CandidateSpan target = targetList.get(0); int position; while (hasMoreCandidates && candidate.doc() == target.getDoc()) { position = findElementPosition(candidate); if (position != -1) { cs = new CandidateSpan(candidate, position); if (isWithinMaxDistance(target, cs)) { candidateList.add(cs); } else break; } hasMoreCandidates = candidate.next(); } } return hasMoreCandidates; }
/** * Advances the <em>not-attributes</em> to be in the same or * greater * document number than referentSpans' document number. If a * <em>not-attribute</em> is in the same document, it is advanced * to be in * the same as or greater start position than the current * referentSpan. * * @throws IOException */ private void advanceNotAttributes (Spans referentSpans) throws IOException { for (AttributeSpans a : notAttributeList) { // advance the doc# of not AttributeSpans // logger.info("a "+a.start()); while (!a.isFinish() && a.doc() <= referentSpans.doc()) { if (a.doc() == referentSpans.doc() && a.start() >= referentSpans.start()) break; if (!a.next()) a.setFinish(true); } } }
public void test() throws IOException { PayloadTermQuery query = new PayloadTermQuery(new Term("field", "seventy"), new MaxPayloadFunction()); TopDocs hits = searcher.search(query, null, 100); assertTrue("hits is null and it shouldn't be", hits != null); assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100); //they should all have the exact same score, because they all contain seventy once, and we set //all the other similarity factors to be 1 assertTrue(hits.getMaxScore() + " does not equal: " + 1, hits.getMaxScore() == 1); for (int i = 0; i < hits.scoreDocs.length; i++) { ScoreDoc doc = hits.scoreDocs[i]; assertTrue(doc.score + " does not equal: " + 1, doc.score == 1); } CheckHits.checkExplanations(query, PayloadHelper.FIELD, searcher, true); Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), query); assertTrue("spans is null and it shouldn't be", spans != null); /*float score = hits.score(0); for (int i =1; i < hits.length(); i++) { assertTrue("scores are not equal and they should be", score == hits.score(i)); }*/ }
@Override public Spans getSpans(final LeafReaderContext context, Postings requiredPostings) throws IOException { Terms terms = context.reader().terms(field); if (terms == null) { return null; // field does not exist } ArrayList<Spans> subSpans = new ArrayList<>(clauses.size()); for (CustomSpanWeight w : subWeights) { Spans subSpan = w.getSpans(context, requiredPostings); if (subSpan != null) { subSpans.add(subSpan); } else { return null; // all required } } // all NearSpans require at least two subSpans return (!inOrder) ? new CustomNearSpansUnordered(slop, subSpans) : new NearSpansOrdered(slop, subSpans); }
@Override public Spans getSpans(final LeafReaderContext context, Postings requiredPostings) throws IOException { assert termContext.wasBuiltFor(ReaderUtil.getTopLevelContext(context)) : "The top-reader used to create Weight is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context); final TermState state = termContext.get(context.ord); if (state == null) { // term is not present in that reader assert context.reader().docFreq(term) == 0 : "no termstate found but term exists in reader term=" + term; return null; } final Terms terms = context.reader().terms(term.field()); if (terms == null) return null; if (terms.hasPositions() == false) throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run CustomSpanTermQuery (term=" + term.text() + ")"); final TermsEnum termsEnum = terms.iterator(); termsEnum.seekExact(term.bytes(), state); final PostingsEnum postings = termsEnum.postings(null, requiredPostings.getRequiredPostings()); float positionsCost = termPositionsCost(termsEnum) * PHRASE_TO_SPAN_TERM_POSITIONS_COST; return new TermSpans(getSimScorer(context), postings, term, positionsCost); }
private void getPayloads(Collection<byte []> payloads, SpanQuery query) throws IOException { Map<Term,TermContext> termContexts = new HashMap<Term,TermContext>(); TreeSet<Term> terms = new TreeSet<Term>(); query.extractTerms(terms); for (Term term : terms) { termContexts.put(term, TermContext.build(context, term, true)); } for (AtomicReaderContext atomicReaderContext : context.leaves()) { final Spans spans = query.getSpans(atomicReaderContext, atomicReaderContext.reader().getLiveDocs(), termContexts); while (spans.next() == true) { if (spans.isPayloadAvailable()) { Collection<byte[]> payload = spans.getPayload(); for (byte [] bytes : payload) { payloads.add(bytes); } } } } }
private void getPayloads(Collection<byte []> payloads, SpanQuery query) throws IOException { Map<Term,TermContext> termContexts = new HashMap<Term,TermContext>(); TreeSet<Term> terms = new TreeSet<Term>(); query.extractTerms(terms); for (Term term : terms) { termContexts.put(term, TermContext.build(context, term)); } for (AtomicReaderContext atomicReaderContext : context.leaves()) { final Spans spans = query.getSpans(atomicReaderContext, atomicReaderContext.reader().getLiveDocs(), termContexts); while (spans.next() == true) { if (spans.isPayloadAvailable()) { Collection<byte[]> payload = spans.getPayload(); for (byte [] bytes : payload) { payloads.add(bytes); } } } } }
long countSpans(String field, Query q) throws Exception { List<LeafReaderContext> ctxs = reader.leaves(); assert (ctxs.size() == 1); LeafReaderContext leafReaderContext = ctxs.get(0); SpanQuery sq = convert(field, q); sq = (SpanQuery) sq.rewrite(reader); float boost = getBoost(q); SpanWeight sw = sq.createWeight(searcher, false, boost); final Spans spans = sw.getSpans(leafReaderContext, SpanWeight.Postings.POSITIONS); long i = 0; if (spans != null) { while (spans.nextDoc() != Spans.NO_MORE_DOCS) { while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { i++; } } } return i; }
long countDocs(String field, Query q) throws Exception { BitSet docs = new BitSet(); List<LeafReaderContext> ctxs = reader.leaves(); assert (ctxs.size() == 1); LeafReaderContext leafReaderContext = ctxs.get(0); SpanQuery sq = convert(field, q); sq = (SpanQuery) sq.rewrite(reader); SpanWeight sw = sq.createWeight(searcher, false, getBoost(q)); final Spans spans = sw.getSpans(leafReaderContext, SpanWeight.Postings.POSITIONS); if (spans != null) { while (spans.nextDoc() != Spans.NO_MORE_DOCS) { while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { docs.set(spans.docID()); } } } long spanDocHits = docs.cardinality(); // double check with a regular searcher and original query TotalHitCountCollector coll = new TotalHitCountCollector(); searcher.search(q, coll); assertEquals(coll.getTotalHits(), spanDocHits); return spanDocHits; }
@Override public Spans getSpans (LeafReaderContext context, Bits acceptDocs, Map<Term, TermContext> termContexts) throws IOException { if (type.equals("spanWithAttribute")) { return new SpansWithAttribute(this, context, acceptDocs, termContexts); } SimpleSpans spans = (SimpleSpans) this.getFirstClause() .getSpans(context, acceptDocs, termContexts); return new SpansWithAttribute(this, spans, context, acceptDocs, termContexts); }
/** * Filters the span matches of each constraint, returning only the * matches * meeting all the constraints. * * @return only the span matches meeting all the constraints. */ @Override public Spans getSpans (LeafReaderContext context, Bits acceptDocs, Map<Term, TermContext> termContexts) throws IOException { SpanDistanceQuery sdq, sdq2; Spans ds, ds2; MultipleDistanceSpans mds = null; boolean exclusion; sdq = new SpanDistanceQuery(firstClause, secondClause, constraints.get(0), collectPayloads); ds = sdq.getSpans(context, acceptDocs, termContexts); for (int i = 1; i < constraints.size(); i++) { sdq2 = new SpanDistanceQuery(firstClause, secondClause, constraints.get(i), collectPayloads); ds2 = sdq2.getSpans(context, acceptDocs, termContexts); exclusion = sdq.isExclusion() && sdq2.isExclusion(); mds = new MultipleDistanceSpans(this, context, acceptDocs, termContexts, ds, ds2, isOrdered, exclusion); ds = mds; } return mds; }
@Override public Spans getSpans (LeafReaderContext context, Bits acceptDocs, Map<Term, TermContext> termContexts) throws IOException { // Temporary: if (isExclusion) return new ExpandedExclusionSpans(this, context, acceptDocs, termContexts); else return new ExpandedSpans(this, context, acceptDocs, termContexts); }
/** * Tells if the given span is in an element distance unit, or not, * by * advancing the element distance unit to the span position. * * @param span * a span * @return <code>true</code> if the element distance unit can be * advanced to * contain the given span, <code>false</code> otherwise. * @throws IOException */ private boolean advanceElementTo (Spans span) throws IOException { while (hasMoreElements && elements.doc() == currentDocNum && elements.start() < span.end()) { if (span.start() >= elements.start() && span.end() <= elements.end()) { return true; } hasMoreElements = elements.next(); elementPosition++; } return false; }
/** * Find the same doc shared by element, firstspan and secondspan. * * @return true iff such a doc is found. */ protected boolean findSameDoc (Spans x, Spans y, Spans e) throws IOException { while (hasMoreSpans) { if (ensureSameDoc(x, y) && e.doc() == x.doc()) { return true; } if (!ensureSameDoc(e, y)) { return false; }; } return false; }
/** * Advance elements until encountering a span within the given * document. * * @return true iff an element containing the span, is found. */ private boolean advanceElementTo (Spans span) throws IOException { while (hasMoreElements && elements.doc() == candidateListDocNum && elements.start() < span.end()) { if (span.start() >= elements.start() && span.end() <= elements.end()) { return true; } hasMoreElements = elements.next(); elementPosition++; } return false; }
/** * Finds the element position of the specified span in the element * list or by advancing the element spans until encountering the * span. * * @param span * a Span * @return the element position * @throws IOException */ private int findElementPosition (Spans span) throws IOException { // Check in the element list if (!elementList.isEmpty() && span.end() <= elementList .get(elementList.size() - 1).getEnd()) { for (CandidateSpan e : elementList) if (e.getEnd() >= span.end() && e.getStart() <= span.start()) { return e.getPosition(); } return -1; // The span is not in an element. } return (advanceElementTo(span) ? elementPosition : -1); }