@Override public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException { final Bits docsWithValue; if (valuesSource != null) { docsWithValue = valuesSource.docsWithValue(ctx); } else { docsWithValue = new Bits.MatchNoBits(ctx.reader().maxDoc()); } return new LeafBucketCollectorBase(sub, docsWithValue) { @Override public void collect(int doc, long bucket) throws IOException { if (docsWithValue != null && !docsWithValue.get(doc)) { collectBucket(sub, doc, bucket); } } }; }
@Override public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException { // no need to provide deleted docs to the filter final Bits[] bits = new Bits[filters.length]; for (int i = 0; i < filters.length; ++i) { bits[i] = Lucene.asSequentialAccessBits(ctx.reader().maxDoc(), filters[i].scorer(ctx)); } return new LeafBucketCollectorBase(sub, null) { @Override public void collect(int doc, long bucket) throws IOException { boolean matched = false; for (int i = 0; i < bits.length; i++) { if (bits[i].get(doc)) { collectBucket(sub, doc, bucketOrd(bucket, i)); matched = true; } } if (showOtherBucket && !matched) { collectBucket(sub, doc, bucketOrd(bucket, bits.length)); } } }; }
public NearSpansOrdered(SpanNearQuery spanNearQuery, AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts, boolean collectPayloads) throws IOException { if (spanNearQuery.getClauses().length < 2) { throw new IllegalArgumentException("Less than 2 clauses: " + spanNearQuery); } this.collectPayloads = collectPayloads; allowedSlop = spanNearQuery.getSlop(); SpanQuery[] clauses = spanNearQuery.getClauses(); subSpans = new Spans[clauses.length]; matchPayload = new LinkedList<>(); subSpansByDoc = new Spans[clauses.length]; for (int i = 0; i < clauses.length; i++) { subSpans[i] = clauses[i].getSpans(context, acceptDocs, termContexts); subSpansByDoc[i] = subSpans[i]; // used in toSameDoc() } query = spanNearQuery; // kept for toString() only. }
/** * Check whether there is one or more documents matching the provided query. */ public static boolean exists(IndexSearcher searcher, Query query) throws IOException { final Weight weight = searcher.createNormalizedWeight(query, false); // the scorer API should be more efficient at stopping after the first // match than the bulk scorer API for (LeafReaderContext context : searcher.getIndexReader().leaves()) { final Scorer scorer = weight.scorer(context); if (scorer == null) { continue; } final Bits liveDocs = context.reader().getLiveDocs(); final DocIdSetIterator iterator = scorer.iterator(); for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) { if (liveDocs == null || liveDocs.get(doc)) { return true; } } } return false; }
private Bits getMissingBits(final long offset) throws IOException { if (offset == -1) { return new Bits.MatchAllBits(maxDoc); } else { final IndexInput in = data.clone(); return new Bits() { @Override public boolean get(int index) { try { in.seek(offset + (index >> 3)); return (in.readByte() & (1 << (index & 7))) != 0; } catch (IOException e) { throw new RuntimeException(e); } } @Override public int length() { return maxDoc; } }; } }
@Override public Bits getDocsWithField(FieldInfo field) throws IOException { switch(field.getDocValuesType()) { case SORTED_SET: return DocValues.docsWithValue(getSortedSet(field), maxDoc); case SORTED: return DocValues.docsWithValue(getSorted(field), maxDoc); case BINARY: BinaryEntry be = binaries.get(field.number); return getMissingBits(be.missingOffset); case NUMERIC: NumericEntry ne = numerics.get(field.number); return getMissingBits(ne.missingOffset); default: throw new AssertionError(); } }
private Bits getMissingBits(final long offset) throws IOException { if (offset == -1) { return new Bits.MatchAllBits(maxDoc); } else { int length = (int) ((maxDoc + 7L) >>> 3); final RandomAccessInput in = data.randomAccessSlice(offset, length); return new Bits() { @Override public boolean get(int index) { try { return (in.readByte(index >> 3) & (1 << (index & 7))) != 0; } catch (IOException e) { throw new RuntimeException(e); } } @Override public int length() { return maxDoc; } }; } }
@Override public Bits getDocsWithField(FieldInfo field) throws IOException { switch(field.getDocValuesType()) { case SORTED_SET: return DocValues.docsWithValue(getSortedSet(field), maxDoc); case SORTED_NUMERIC: return DocValues.docsWithValue(getSortedNumeric(field), maxDoc); case SORTED: return DocValues.docsWithValue(getSorted(field), maxDoc); case BINARY: BinaryEntry be = binaries.get(field.number); return getMissingBits(be.missingOffset); case NUMERIC: NumericEntry ne = numerics.get(field.number); return getMissingBits(ne.missingOffset); default: throw new AssertionError(); } }
public UnionDocsAndPositionsEnum(Bits liveDocs, AtomicReaderContext context, Term[] terms, Map<Term,TermContext> termContexts, TermsEnum termsEnum) throws IOException { List<DocsAndPositionsEnum> docsEnums = new LinkedList<>(); for (int i = 0; i < terms.length; i++) { final Term term = terms[i]; TermState termState = termContexts.get(term).get(context.ord); if (termState == null) { // Term doesn't exist in reader continue; } termsEnum.seekExact(term.bytes(), termState); DocsAndPositionsEnum postings = termsEnum.docsAndPositions(liveDocs, null, DocsEnum.FLAG_NONE); if (postings == null) { // term does exist, but has no positions throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")"); } cost += postings.cost(); docsEnums.add(postings); } _queue = new DocsQueue(docsEnums); _posList = new IntQueue(); }
/** Merges in the term vectors from the readers in * <code>mergeState</code>. The default implementation skips * over deleted documents, and uses {@link #startDocument(int)}, * {@link #startField(FieldInfo, int, boolean, boolean, boolean)}, * {@link #startTerm(BytesRef, int)}, {@link #addPosition(int, int, int, BytesRef)}, * and {@link #finish(FieldInfos, int)}, * returning the number of documents that were written. * Implementations can override this method for more sophisticated * merging (bulk-byte copying, etc). */ public int merge(MergeState mergeState) throws IOException { int docCount = 0; for (int i = 0; i < mergeState.readers.size(); i++) { final AtomicReader reader = mergeState.readers.get(i); final int maxDoc = reader.maxDoc(); final Bits liveDocs = reader.getLiveDocs(); for (int docID = 0; docID < maxDoc; docID++) { if (liveDocs != null && !liveDocs.get(docID)) { // skip deleted docs continue; } // NOTE: it's very important to first assign to vectors then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 Fields vectors = reader.getTermVectors(docID); addAllDocVectors(vectors, mergeState); docCount++; mergeState.checkAbort.work(300); } } finish(mergeState.fieldInfos, docCount); return docCount; }
@Override public Bits docsWithValue(LeafReaderContext context) throws IOException { final SortedBinaryDocValues bytes = bytesValues(context); if (org.elasticsearch.index.fielddata.FieldData.unwrapSingleton(bytes) != null) { return org.elasticsearch.index.fielddata.FieldData.unwrapSingletonBits(bytes); } else { return org.elasticsearch.index.fielddata.FieldData.docsWithValue(bytes, context.reader().maxDoc()); } }
@Override public Bits docsWithValue(LeafReaderContext context) { final MultiGeoPointValues geoPoints = geoPointValues(context); if (org.elasticsearch.index.fielddata.FieldData.unwrapSingleton(geoPoints) != null) { return org.elasticsearch.index.fielddata.FieldData.unwrapSingletonBits(geoPoints); } else { return org.elasticsearch.index.fielddata.FieldData.docsWithValue(geoPoints, context.reader().maxDoc()); } }
@Override public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, Bits acceptDocs) throws IOException { final DocIdSetIterator disi; if (filter != null) { assert query == null; return super.bulkScorer(context, scoreDocsInOrder, acceptDocs); } else { assert query != null && innerWeight != null; BulkScorer bulkScorer = innerWeight.bulkScorer(context, scoreDocsInOrder, acceptDocs); if (bulkScorer == null) { return null; } return new ConstantBulkScorer(bulkScorer, this, queryWeight); } }
@Override public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { return new RandomAccessWeight(this) { @Override protected Bits getMatchingDocs(final LeafReaderContext context) throws IOException { final LeafSearchScript leafScript = searchScript.getLeafSearchScript(context); return new Bits() { @Override public boolean get(int doc) { leafScript.setDocument(doc); Object val = leafScript.run(); if (val == null) { return false; } if (val instanceof Boolean) { return (Boolean) val; } if (val instanceof Number) { return ((Number) val).longValue() != 0; } throw new IllegalArgumentException("Can't handle type [" + val + "] in script filter"); } @Override public int length() { return context.reader().maxDoc(); } }; } }; }
public synchronized Bits getReadOnlyLiveDocs() { //System.out.println("getROLiveDocs seg=" + info); assert Thread.holdsLock(writer); liveDocsShared = true; //if (liveDocs != null) { //System.out.println(" liveCount=" + liveDocs.count()); //} return liveDocs; }
@Override public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { final Scorer filterScorer = filter.scorer(context); final LeafCollector in = collector.getLeafCollector(context); final Bits bits = Lucene.asSequentialAccessBits(context.reader().maxDoc(), filterScorer); return new FilterLeafCollector(in) { @Override public void collect(int doc) throws IOException { if (bits.get(doc)) { in.collect(doc); } } }; }
private FiltersFunctionFactorScorer(CustomBoostFactorWeight w, Scorer scorer, ScoreMode scoreMode, FilterFunction[] filterFunctions, float maxBoost, LeafScoreFunction[] functions, Bits[] docSets, CombineFunction scoreCombiner, boolean needsScores) throws IOException { super(scorer, w); this.scoreMode = scoreMode; this.filterFunctions = filterFunctions; this.functions = functions; this.docSets = docSets; this.scoreCombiner = scoreCombiner; this.maxBoost = maxBoost; this.needsScores = needsScores; }
static DocMap build(final int maxDoc, final Bits liveDocs) { assert liveDocs != null; final PackedLongValues.Builder docMapBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT); int del = 0; for (int i = 0; i < maxDoc; ++i) { docMapBuilder.add(i - del); if (!liveDocs.get(i)) { ++del; } } final PackedLongValues docMap = docMapBuilder.build(); final int numDeletedDocs = del; assert docMap.size() == maxDoc; return new DocMap() { @Override public int get(int docID) { if (!liveDocs.get(docID)) { return -1; } return (int) docMap.get(docID); } @Override public int maxDoc() { return maxDoc; } @Override public int numDeletedDocs() { return numDeletedDocs; } }; }
@Override public long ramBytesUsed() { long base = RamUsageEstimator.NUM_BYTES_OBJECT_REF; if (bits instanceof Bits.MatchAllBits || bits instanceof Bits.MatchNoBits) { return base; } else { return base + (bits.length() >>> 3); } }
/** * test version lookup with two documents matching the ID */ public void testTwoDocuments() throws Exception { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); Document doc = new Document(); doc.add(new Field(UidFieldMapper.NAME, "6", UidFieldMapper.Defaults.FIELD_TYPE)); doc.add(new NumericDocValuesField(VersionFieldMapper.NAME, 87)); writer.addDocument(doc); writer.addDocument(doc); DirectoryReader reader = DirectoryReader.open(writer); LeafReaderContext segment = reader.leaves().get(0); PerThreadIDAndVersionLookup lookup = new PerThreadIDAndVersionLookup(segment.reader()); // return the last doc when there are duplicates DocIdAndVersion result = lookup.lookup(new BytesRef("6"), null, segment); assertNotNull(result); assertEquals(87, result.version); assertEquals(1, result.docId); // delete the first doc only FixedBitSet live = new FixedBitSet(2); live.set(1); result = lookup.lookup(new BytesRef("6"), live, segment); assertNotNull(result); assertEquals(87, result.version); assertEquals(1, result.docId); // delete the second doc only live.clear(1); live.set(0); result = lookup.lookup(new BytesRef("6"), live, segment); assertNotNull(result); assertEquals(87, result.version); assertEquals(0, result.docId); // delete both docs assertNull(lookup.lookup(new BytesRef("6"), new Bits.MatchNoBits(2), segment)); reader.close(); writer.close(); dir.close(); }
/** Creates a {@link DocMap} instance appropriate for * this reader. */ public static DocMap build(AtomicReader reader) { final int maxDoc = reader.maxDoc(); if (!reader.hasDeletions()) { return new NoDelDocMap(maxDoc); } final Bits liveDocs = reader.getLiveDocs(); return build(maxDoc, liveDocs); }
private boolean canReuse(DocsEnum reuse, Bits liveDocs) { if (reuse != null && (reuse instanceof SegmentDocsEnumBase)) { SegmentDocsEnumBase docsEnum = (SegmentDocsEnumBase) reuse; // If you are using ParellelReader, and pass in a // reused DocsEnum, it could have come from another // reader also using standard codec if (docsEnum.startFreqIn == freqIn) { // we only reuse if the the actual the incoming enum has the same liveDocs as the given liveDocs return liveDocs == docsEnum.liveDocs; } } return false; }
@Override public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException { // no need to provide deleted docs to the filter final Bits bits = Lucene.asSequentialAccessBits(ctx.reader().maxDoc(), filter.scorer(ctx)); return new LeafBucketCollectorBase(sub, null) { @Override public void collect(int doc, long bucket) throws IOException { if (bits.get(doc)) { collectBucket(sub, doc, bucket); } } }; }
@Override protected final int linearScan(int scanTo) throws IOException { final int[] docs = this.docs; final int upTo = count; final Bits liveDocs = this.liveDocs; for (int i = start; i < upTo; i++) { int d = docs[i]; if (scanTo <= d && liveDocs.get(d)) { start = i; freq = freqs[i]; return doc = docs[i]; } } return doc = refill(); }
@Override protected int scanTo(int target) throws IOException { int docAcc = accum; int frq = 1; final IndexInput freqIn = this.freqIn; final boolean omitTF = indexOmitsTF; final int loopLimit = limit; final Bits liveDocs = this.liveDocs; for (int i = ord; i < loopLimit; i++) { int code = freqIn.readVInt(); if (omitTF) { docAcc += code; } else { docAcc += code >>> 1; // shift off low bit frq = readFreq(freqIn, code); } if (docAcc >= target && liveDocs.get(docAcc)) { freq = frq; ord = i + 1; return accum = docAcc; } } ord = limit; freq = frq; accum = docAcc; return NO_MORE_DOCS; }
@Override protected final int nextUnreadDoc() throws IOException { int docAcc = accum; int frq = 1; final IndexInput freqIn = this.freqIn; final boolean omitTF = indexOmitsTF; final int loopLimit = limit; final Bits liveDocs = this.liveDocs; for (int i = ord; i < loopLimit; i++) { int code = freqIn.readVInt(); if (omitTF) { docAcc += code; } else { docAcc += code >>> 1; // shift off low bit frq = readFreq(freqIn, code); } if (liveDocs.get(docAcc)) { freq = frq; ord = i + 1; return accum = docAcc; } } ord = limit; freq = frq; accum = docAcc; return NO_MORE_DOCS; }
public SegmentDocsAndPositionsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits liveDocs) throws IOException { assert fieldInfo.getIndexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; assert !fieldInfo.hasPayloads(); this.liveDocs = liveDocs; // TODO: for full enum case (eg segment merging) this // seek is unnecessary; maybe we can avoid in such // cases freqIn.seek(termState.freqOffset); lazyProxPointer = termState.proxOffset; limit = termState.docFreq; assert limit > 0; ord = 0; doc = -1; accum = 0; position = 0; skipped = false; posPendingCount = 0; freqOffset = termState.freqOffset; proxOffset = termState.proxOffset; skipOffset = termState.skipOffset; // if (DEBUG) System.out.println("StandardR.D&PE reset seg=" + segment + " limit=" + limit + " freqFP=" + freqOffset + " proxFP=" + proxOffset); return this; }
@Override public Bits getDocsWithField(FieldInfo field) throws IOException { if (VersionFieldMapper.NAME.equals(field.name)) { return new Bits.MatchAllBits(reader.maxDoc()); } else { return in.getDocsWithField(field); } }
@Override public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags /* ignored */) throws IOException { TVDocsEnum docsEnum; if (reuse != null && reuse instanceof TVDocsEnum) { docsEnum = (TVDocsEnum) reuse; } else { docsEnum = new TVDocsEnum(); } docsEnum.reset(liveDocs, freq); return docsEnum; }
@Override protected void doPostCollection() throws IOException { IndexReader indexReader = context().searchContext().searcher().getIndexReader(); for (LeafReaderContext ctx : indexReader.leaves()) { Scorer childDocsScorer = childFilter.scorer(ctx); if (childDocsScorer == null) { continue; } DocIdSetIterator childDocsIter = childDocsScorer.iterator(); final LeafBucketCollector sub = collectableSubAggregators.getLeafCollector(ctx); final SortedDocValues globalOrdinals = valuesSource.globalOrdinalsValues(parentType, ctx); // Set the scorer, since we now replay only the child docIds sub.setScorer(ConstantScorer.create(childDocsIter, null, 1f)); final Bits liveDocs = ctx.reader().getLiveDocs(); for (int docId = childDocsIter.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = childDocsIter.nextDoc()) { if (liveDocs != null && liveDocs.get(docId) == false) { continue; } long globalOrdinal = globalOrdinals.getOrd(docId); if (globalOrdinal != -1) { long bucketOrd = parentOrdToBuckets.get(globalOrdinal); if (bucketOrd != -1) { collectBucket(sub, docId, bucketOrd); if (multipleBucketsPerParentOrd) { long[] otherBucketOrds = parentOrdToOtherBuckets.get(globalOrdinal); if (otherBucketOrds != null) { for (long otherBucketOrd : otherBucketOrds) { collectBucket(sub, docId, otherBucketOrd); } } } } } } } }
/** * Returns a Bits representing all documents from <code>dv</code> that have a value. */ public static Bits docsWithValue(final SortedDocValues dv, final int maxDoc) { return new Bits() { @Override public boolean get(int index) { return dv.getOrd(index) >= 0; } @Override public int length() { return maxDoc; } }; }
/** * Test the term index. * @lucene.experimental */ public static Status.TermIndexStatus testPostings(AtomicReader reader, PrintStream infoStream, boolean verbose, boolean failFast) throws IOException { // TODO: we should go and verify term vectors match, if // crossCheckTermVectors is on... Status.TermIndexStatus status; final int maxDoc = reader.maxDoc(); final Bits liveDocs = reader.getLiveDocs(); try { if (infoStream != null) { infoStream.print(" test: terms, freq, prox..."); } final Fields fields = reader.fields(); final FieldInfos fieldInfos = reader.getFieldInfos(); status = checkFields(fields, liveDocs, maxDoc, fieldInfos, true, false, infoStream, verbose); if (liveDocs != null) { if (infoStream != null) { infoStream.print(" test (ignoring deletes): terms, freq, prox..."); } checkFields(fields, null, maxDoc, fieldInfos, true, false, infoStream, verbose); } } catch (Throwable e) { if (failFast) { IOUtils.reThrow(e); } msg(infoStream, "ERROR: " + e); status = new Status.TermIndexStatus(); status.error = e; if (infoStream != null) { e.printStackTrace(infoStream); } } return status; }
@Override public FieldComparator<T> setNextReader(AtomicReaderContext context) throws IOException { if (missingValue != null) { docsWithField = FieldCache.DEFAULT.getDocsWithField(context.reader(), field); // optimization to remove unneeded checks on the bit interface: if (docsWithField instanceof Bits.MatchAllBits) { docsWithField = null; } } else { docsWithField = null; } return this; }
@Override public DocsEnum docs(Bits skipDocs, DocsEnum reuse, int flags) throws IOException { assert !eof; //if (DEBUG) { //System.out.println("BTTR.docs seg=" + segment); //} currentFrame.decodeMetaData(); //if (DEBUG) { //System.out.println(" state=" + currentFrame.state); //} return fr.parent.postingsReader.docs(fr.fieldInfo, currentFrame.state, skipDocs, reuse, flags); }
private void mergeNorms(SegmentWriteState segmentWriteState) throws IOException { DocValuesConsumer consumer = codec.normsFormat().normsConsumer(segmentWriteState); boolean success = false; try { for (FieldInfo field : mergeState.fieldInfos) { if (field.hasNorms()) { List<NumericDocValues> toMerge = new ArrayList<>(); List<Bits> docsWithField = new ArrayList<>(); for (AtomicReader reader : mergeState.readers) { NumericDocValues norms = reader.getNormValues(field.name); if (norms == null) { norms = DocValues.emptyNumeric(); } toMerge.add(norms); docsWithField.add(new Bits.MatchAllBits(reader.maxDoc())); } consumer.mergeNumericField(field, mergeState, toMerge, docsWithField); } } success = true; } finally { if (success) { IOUtils.close(consumer); } else { IOUtils.closeWhileHandlingException(consumer); } } }
private FiltersFunctionFactorScorer functionScorer(LeafReaderContext context) throws IOException { Scorer subQueryScorer = subQueryWeight.scorer(context); if (subQueryScorer == null) { return null; } final LeafScoreFunction[] functions = new LeafScoreFunction[filterFunctions.length]; final Bits[] docSets = new Bits[filterFunctions.length]; for (int i = 0; i < filterFunctions.length; i++) { FilterFunction filterFunction = filterFunctions[i]; functions[i] = filterFunction.function.getLeafScoreFunction(context); Scorer filterScorer = filterWeights[i].scorer(context); docSets[i] = Lucene.asSequentialAccessBits(context.reader().maxDoc(), filterScorer); } return new FiltersFunctionFactorScorer(this, subQueryScorer, scoreMode, filterFunctions, maxBoost, functions, docSets, combineFunction, needsScores); }
@Override public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { if (positions == null && startOffsets == null) { return null; } // TODO: slightly sheisty return (DocsAndPositionsEnum) docs(liveDocs, reuse, flags); }