@Override public RandomAccessOrds getOrdinalsValues() { final BytesRef term = new BytesRef(index); final SortedDocValues sortedValues = new SortedDocValues() { @Override public BytesRef lookupOrd(int ord) { return term; } @Override public int getValueCount() { return 1; } @Override public int getOrd(int docID) { return 0; } }; return (RandomAccessOrds) DocValues.singleton(sortedValues); }
private void verify(RandomAccessOrds values, int maxDoc) { for (MultiValueMode mode : new MultiValueMode[] {MultiValueMode.MIN, MultiValueMode.MAX}) { final SortedDocValues selected = mode.select(values); for (int i = 0; i < maxDoc; ++i) { final long actual = selected.getOrd(i); int expected = -1; values.setDocument(i); for (int j = 0; j < values.cardinality(); ++j) { if (expected == -1) { expected = (int) values.ordAt(j); } else { if (mode == MultiValueMode.MIN) { expected = Math.min(expected, (int)values.ordAt(j)); } else if (mode == MultiValueMode.MAX) { expected = Math.max(expected, (int)values.ordAt(j)); } } } assertEquals(mode.toString() + " docId=" + i, expected, actual); } } }
public void testSvValues() throws IOException { int numDocs = 1000000; int numOrdinals = numDocs / 4; Map<Integer, Long> controlDocToOrdinal = new HashMap<>(); OrdinalsBuilder builder = new OrdinalsBuilder(numDocs); long ordinal = builder.currentOrdinal(); for (int doc = 0; doc < numDocs; doc++) { if (doc % numOrdinals == 0) { ordinal = builder.nextOrdinal(); } controlDocToOrdinal.put(doc, ordinal); builder.addDoc(doc); } Ordinals ords = builder.build(); assertThat(ords, instanceOf(SinglePackedOrdinals.class)); RandomAccessOrds docs = ords.ordinals(); final SortedDocValues singleOrds = DocValues.unwrapSingleton(docs); assertNotNull(singleOrds); for (Map.Entry<Integer, Long> entry : controlDocToOrdinal.entrySet()) { assertThat(entry.getValue(), equalTo((long) singleOrds.getOrd(entry.getKey()))); } }
private SortedDocValues correctBuggyOrds(final SortedDocValues in) { final int maxDoc = state.segmentInfo.getDocCount(); for (int i = 0; i < maxDoc; i++) { if (in.getOrd(i) == 0) { return in; // ok } } // we had ord holes, return an ord-shifting-impl that corrects the bug return new SortedDocValues() { @Override public int getOrd(int docID) { return in.getOrd(docID) - 1; } @Override public BytesRef lookupOrd(int ord) { return in.lookupOrd(ord+1); } @Override public int getValueCount() { return in.getValueCount() - 1; } }; }
public SortedDocValues getTermsIndex(AtomicReader reader, String field, float acceptableOverheadRatio) throws IOException { SortedDocValues valuesIn = reader.getSortedDocValues(field); if (valuesIn != null) { // Not cached here by FieldCacheImpl (cached instead // per-thread by SegmentReader): return valuesIn; } else { final FieldInfo info = reader.getFieldInfos().fieldInfo(field); if (info == null) { return DocValues.emptySorted(); } else if (info.hasDocValues()) { // we don't try to build a sorted instance from numeric/binary doc // values because dedup can be very costly throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); } else if (!info.isIndexed()) { return DocValues.emptySorted(); } SortedDocValuesImpl impl = (SortedDocValuesImpl) caches.get(SortedDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio), false); return impl.iterator(); } }
public SortedSetDocValues getDocTermOrds(AtomicReader reader, String field) throws IOException { SortedSetDocValues dv = reader.getSortedSetDocValues(field); if (dv != null) { return dv; } SortedDocValues sdv = reader.getSortedDocValues(field); if (sdv != null) { return DocValues.singleton(sdv); } final FieldInfo info = reader.getFieldInfos().fieldInfo(field); if (info == null) { return DocValues.emptySortedSet(); } else if (info.hasDocValues()) { throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); } else if (!info.isIndexed()) { return DocValues.emptySortedSet(); } DocTermOrds dto = (DocTermOrds) caches.get(DocTermOrds.class).get(reader, new CacheKey(field, null), false); return dto.iterator(reader); }
/** "typical" single-valued faceting: not too many unique values, no prefixing. maps to global ordinals as a separate step */ static void accumSingleSeg(int counts[], SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { // First count in seg-ord space: final int segCounts[]; if (map == null) { segCounts = counts; } else { segCounts = new int[1+si.getValueCount()]; } int doc; while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { segCounts[1+si.getOrd(doc)]++; } // migrate to global ords (if necessary) if (map != null) { migrateGlobal(counts, segCounts, subIndex, map); } }
private SortedDocValues newSortedInstance(final NumericDocValues docToOrd, final BinaryDocValues values, final int count) { return new SortedDocValues() { @Override public int getOrd(int docID) { return (int) docToOrd.get(docID); } @Override public BytesRef lookupOrd(int ord) { return values.get(ord); } @Override public int getValueCount() { return count; } // Leave lookupTerm to super's binary search // Leave termsEnum to super }; }
@Override public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException { final IndexReader topReader = ReaderUtil.getTopLevelContext(readerContext).reader(); final AtomicReader r = SlowCompositeReaderWrapper.wrap(topReader); final int off = readerContext.docBase; final SortedDocValues sindex = FieldCache.DEFAULT.getTermsIndex(r, field); final int end = sindex.getValueCount(); return new IntDocValues(this) { @Override public int intVal(int doc) { return (end - sindex.getOrd(doc+off) - 1); } }; }
private void accumIntervalsSingle(SortedDocValues sdv, DocIdSetIterator disi, Bits bits) throws IOException { // First update the ordinals in the intervals to this segment for (FacetInterval interval : intervals) { interval.updateContext(sdv); } int doc; while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { if (bits != null && bits.get(doc) == false) { continue; } int ord = sdv.getOrd(doc); if (ord >= 0) { accumInterval(ord); } } }
public GroupExpandCollector(SortedDocValues docValues, FixedBitSet groupBits, IntOpenHashSet collapsedSet, int limit, Sort sort) throws IOException { int numGroups = collapsedSet.size(); groups = new IntObjectOpenHashMap<>(numGroups * 2); collectors = new ArrayList<>(); DocIdSetIterator iterator = groupBits.iterator(); int group; while ((group = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { Collector collector = (sort == null) ? TopScoreDocCollector.create(limit, true) : TopFieldCollector.create(sort, limit, false, false, false, true); groups.put(group, collector); collectors.add(collector); } this.collapsedSet = collapsedSet; this.groupBits = groupBits; this.docValues = docValues; }
@Override public SortedDocValues getSorted(FieldInfo field) throws IOException { final int valueCount = (int) binaries.get(field.number).count; final BinaryDocValues binary = getBinary(field); final NumericDocValues ordinals = getNumeric(field, ords.get(field.number)); return new SortedDocValues() { @Override public int getOrd(int docID) { return (int) ordinals.get(docID); } @Override public void lookupOrd(int ord, BytesRef result) { binary.get(ord, result); } @Override public int getValueCount() { return valueCount; } }; }
private SortedDocValues correctBuggyOrds(final SortedDocValues in) { final int maxDoc = state.segmentInfo.getDocCount(); for (int i = 0; i < maxDoc; i++) { if (in.getOrd(i) == 0) { return in; // ok } } // we had ord holes, return an ord-shifting-impl that corrects the bug return new SortedDocValues() { @Override public int getOrd(int docID) { return in.getOrd(docID) - 1; } @Override public void lookupOrd(int ord, BytesRef result) { in.lookupOrd(ord+1, result); } @Override public int getValueCount() { return in.getValueCount() - 1; } }; }
public SortedDocValues getTermsIndex(AtomicReader reader, String field, float acceptableOverheadRatio) throws IOException { SortedDocValues valuesIn = reader.getSortedDocValues(field); if (valuesIn != null) { // Not cached here by FieldCacheImpl (cached instead // per-thread by SegmentReader): return valuesIn; } else { final FieldInfo info = reader.getFieldInfos().fieldInfo(field); if (info == null) { return EMPTY_TERMSINDEX; } else if (info.hasDocValues()) { // we don't try to build a sorted instance from numeric/binary doc // values because dedup can be very costly throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); } else if (!info.isIndexed()) { return EMPTY_TERMSINDEX; } return (SortedDocValues) caches.get(SortedDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio), false); } }
public SortedSetDocValues getDocTermOrds(AtomicReader reader, String field) throws IOException { SortedSetDocValues dv = reader.getSortedSetDocValues(field); if (dv != null) { return dv; } SortedDocValues sdv = reader.getSortedDocValues(field); if (sdv != null) { return new SingletonSortedSetDocValues(sdv); } final FieldInfo info = reader.getFieldInfos().fieldInfo(field); if (info == null) { return SortedSetDocValues.EMPTY; } else if (info.hasDocValues()) { throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); } else if (!info.isIndexed()) { return SortedSetDocValues.EMPTY; } DocTermOrds dto = (DocTermOrds) caches.get(DocTermOrds.class).get(reader, new CacheKey(field, null), false); return dto.iterator(reader); }
@Test public void testDocValuesFormat() throws IOException { RAMDirectory directory = new RAMDirectory(); IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new WhitespaceAnalyzer(Version.LUCENE_43)); conf.setCodec(new Blur024Codec()); IndexWriter writer = new IndexWriter(directory, conf); Document doc = new Document(); doc.add(new StringField("f", "v", Store.YES)); doc.add(new SortedDocValuesField("f", new BytesRef("v"))); writer.addDocument(doc); writer.close(); DirectoryReader reader = DirectoryReader.open(directory); AtomicReaderContext context = reader.leaves().get(0); AtomicReader atomicReader = context.reader(); SortedDocValues sortedDocValues = atomicReader.getSortedDocValues("f"); assertTrue(sortedDocValues.getClass().getName().startsWith(DiskDocValuesProducer.class.getName())); reader.close(); }
public SortedDocValues getTermsIndex(AtomicReader reader, String field, float acceptableOverheadRatio) throws IOException { SortedDocValues valuesIn = reader.getSortedDocValues(field); if (valuesIn != null) { // Not cached here by FieldCacheImpl (cached instead // per-thread by SegmentReader): return valuesIn; } else { final FieldInfo info = reader.getFieldInfos().fieldInfo(field); if (info == null) { return SortedDocValues.EMPTY; } else if (info.hasDocValues()) { // we don't try to build a sorted instance from numeric/binary doc // values because dedup can be very costly throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); } else if (!info.isIndexed()) { return SortedDocValues.EMPTY; } return (SortedDocValues) caches.get(SortedDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio), false); } }
public long globalMaxOrd(IndexSearcher indexSearcher, String type) { DirectoryReader indexReader = (DirectoryReader) indexSearcher.getIndexReader(); if (indexReader.leaves().isEmpty()) { return 0; } else { LeafReaderContext atomicReaderContext = indexReader.leaves().get(0); IndexParentChildFieldData globalFieldData = indexFieldData.loadGlobal(indexReader); AtomicParentChildFieldData afd = globalFieldData.load(atomicReaderContext); SortedDocValues values = afd.getOrdinalsValues(type); return values.getValueCount(); } }
@Override protected void doPostCollection() throws IOException { IndexReader indexReader = context().searcher().getIndexReader(); for (LeafReaderContext ctx : indexReader.leaves()) { Scorer childDocsScorer = childFilter.scorer(ctx); if (childDocsScorer == null) { continue; } DocIdSetIterator childDocsIter = childDocsScorer.iterator(); final LeafBucketCollector sub = collectableSubAggregators.getLeafCollector(ctx); final SortedDocValues globalOrdinals = valuesSource.globalOrdinalsValues(parentType, ctx); // Set the scorer, since we now replay only the child docIds sub.setScorer(new ConstantScoreScorer(null, 1f,childDocsIter)); final Bits liveDocs = ctx.reader().getLiveDocs(); for (int docId = childDocsIter.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = childDocsIter.nextDoc()) { if (liveDocs != null && liveDocs.get(docId) == false) { continue; } long globalOrdinal = globalOrdinals.getOrd(docId); if (globalOrdinal != -1) { long bucketOrd = parentOrdToBuckets.get(globalOrdinal); if (bucketOrd != -1) { collectBucket(sub, docId, bucketOrd); if (multipleBucketsPerParentOrd) { long[] otherBucketOrds = parentOrdToOtherBuckets.get(globalOrdinal); if (otherBucketOrds != null) { for (long otherBucketOrd : otherBucketOrds) { collectBucket(sub, docId, otherBucketOrd); } } } } } } } }
public static String getParentId(ParentFieldMapper fieldMapper, LeafReader reader, int docId) { try { SortedDocValues docValues = reader.getSortedDocValues(fieldMapper.name()); if (docValues == null) { // hit has no _parent field. return null; } BytesRef parentId = docValues.get(docId); return parentId.length > 0 ? parentId.utf8ToString() : null; } catch (IOException e) { throw ExceptionsHelper.convertToElastic(e); } }
ReplaceMissing(SortedDocValues in, BytesRef term) { this.in = in; this.substituteTerm = term; int sub = in.lookupTerm(term); if (sub < 0) { substituteOrd = -sub-1; exists = false; } else { substituteOrd = sub; exists = true; } }
@Override public AtomicParentChildFieldData load(LeafReaderContext context) { final LeafReader reader = context.reader(); return new AbstractAtomicParentChildFieldData() { public Set<String> types() { return parentTypes; } @Override public SortedDocValues getOrdinalsValues(String type) { try { return DocValues.getSorted(reader, ParentFieldMapper.joinField(type)); } catch (IOException e) { throw new IllegalStateException("cannot load join doc values field for type [" + type + "]", e); } } @Override public long ramBytesUsed() { // unknown return 0; } @Override public Collection<Accountable> getChildResources() { return Collections.emptyList(); } @Override public void close() throws ElasticsearchException { } }; }
private static OrdinalMap buildOrdinalMap(AtomicParentChildFieldData[] atomicFD, String parentType) throws IOException { final SortedDocValues[] ordinals = new SortedDocValues[atomicFD.length]; for (int i = 0; i < ordinals.length; ++i) { ordinals[i] = atomicFD[i].getOrdinalsValues(parentType); } return OrdinalMap.build(null, ordinals, PackedInts.DEFAULT); }