/** "typical" single-valued faceting: not too many unique values, no prefixing. maps to global ordinals as a separate step */ static void accumSingleSeg(int counts[], SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { // First count in seg-ord space: final int segCounts[]; if (map == null) { segCounts = counts; } else { segCounts = new int[1+si.getValueCount()]; } int doc; while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { segCounts[1+si.getOrd(doc)]++; } // migrate to global ords (if necessary) if (map != null) { migrateGlobal(counts, segCounts, subIndex, map); } }
/** accumulates per-segment multi-valued facet counts, mapping to global ordinal space on-the-fly */ static void accumMultiGeneric(int counts[], int startTermIndex, SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { final LongValues ordMap = map == null ? null : map.getGlobalOrds(subIndex); int doc; while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { si.setDocument(doc); // strange do-while to collect the missing count (first ord is NO_MORE_ORDS) int term = (int) si.nextOrd(); if (term < 0) { if (startTermIndex == -1) { counts[0]++; // missing count } continue; } do { if (map != null) { term = (int) ordMap.get(term); } int arrIdx = term-startTermIndex; if (arrIdx>=0 && arrIdx<counts.length) counts[arrIdx]++; } while ((term = (int) si.nextOrd()) >= 0); } }
/** accumulates per-segment multi-valued facet counts, mapping to global ordinal space */ static void accumMulti(int counts[], int startTermIndex, SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { int doc; while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { si.setDocument(doc); // strange do-while to collect the missing count (first ord is NO_MORE_ORDS) int term = (int) si.nextOrd(); if (term < 0) { if (startTermIndex == -1) { counts[0]++; // missing count } continue; } do { if (map != null) { term = (int) map.getGlobalOrd(subIndex, term); } int arrIdx = term-startTermIndex; if (arrIdx>=0 && arrIdx<counts.length) counts[arrIdx]++; } while ((term = (int) si.nextOrd()) >= 0); } }
/** accumulates per-segment multi-valued facet counts, mapping to global ordinal space on-the-fly */ static void accumMultiGeneric(int counts[], int startTermIndex, SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { int doc; while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { si.setDocument(doc); // strange do-while to collect the missing count (first ord is NO_MORE_ORDS) int term = (int) si.nextOrd(); if (term < 0) { if (startTermIndex == -1) { counts[0]++; // missing count } continue; } do { if (map != null) { term = (int) map.getGlobalOrd(subIndex, term); } int arrIdx = term-startTermIndex; if (arrIdx>=0 && arrIdx<counts.length) counts[arrIdx]++; } while ((term = (int) si.nextOrd()) >= 0); } }
InternalGlobalOrdinalsIndexFieldData(IndexSettings indexSettings, String fieldName, AtomicOrdinalsFieldData[] segmentAfd, OrdinalMap ordinalMap, long memorySizeInBytes, Function<RandomAccessOrds, ScriptDocValues<?>> scriptFunction) { super(indexSettings, fieldName, memorySizeInBytes); this.atomicReaders = new Atomic[segmentAfd.length]; for (int i = 0; i < segmentAfd.length; i++) { atomicReaders[i] = new Atomic(segmentAfd[i], ordinalMap, i); } this.scriptFunction = scriptFunction; }
GlobalOrdinalMapping(OrdinalMap ordinalMap, RandomAccessOrds[] bytesValues, int segmentIndex) { super(); this.values = bytesValues[segmentIndex]; this.bytesValues = bytesValues; this.ordinalMap = ordinalMap; this.mapping = ordinalMap.getGlobalOrds(segmentIndex); }
private static OrdinalMap buildOrdinalMap(AtomicParentChildFieldData[] atomicFD, String parentType) throws IOException { final SortedDocValues[] ordinals = new SortedDocValues[atomicFD.length]; for (int i = 0; i < ordinals.length; ++i) { ordinals[i] = atomicFD[i].getOrdinalsValues(parentType); } return OrdinalMap.build(null, ordinals, PackedInts.DEFAULT); }
@Override public IndexParentChildFieldData localGlobalDirect(DirectoryReader indexReader) throws Exception { final long startTime = System.nanoTime(); long ramBytesUsed = 0; final Map<String, OrdinalMapAndAtomicFieldData> perType = new HashMap<>(); for (String type : parentTypes) { final AtomicParentChildFieldData[] fieldData = new AtomicParentChildFieldData[indexReader.leaves().size()]; for (LeafReaderContext context : indexReader.leaves()) { fieldData[context.ord] = load(context); } final OrdinalMap ordMap = buildOrdinalMap(fieldData, type); ramBytesUsed += ordMap.ramBytesUsed(); perType.put(type, new OrdinalMapAndAtomicFieldData(ordMap, fieldData)); } final AtomicParentChildFieldData[] fielddata = new AtomicParentChildFieldData[indexReader.leaves().size()]; for (int i = 0; i < fielddata.length; ++i) { fielddata[i] = new GlobalAtomicFieldData(parentTypes, perType, i); } breakerService.getBreaker(CircuitBreaker.FIELDDATA).addWithoutBreaking(ramBytesUsed); if (logger.isDebugEnabled()) { logger.debug( "global-ordinals [_parent] took [{}]", new TimeValue(System.nanoTime() - startTime, TimeUnit.NANOSECONDS) ); } return new GlobalFieldData(indexReader, fielddata, ramBytesUsed, perType); }
/** * Returns the global ordinal map for the specified type */ // TODO: OrdinalMap isn't expose in the field data framework, because it is an implementation detail. // However the JoinUtil works directly with OrdinalMap, so this is a hack to get access to OrdinalMap // I don't think we should expose OrdinalMap in IndexFieldData, because only parent/child relies on it and for the // rest of the code OrdinalMap is an implementation detail, but maybe we can expose it in IndexParentChildFieldData interface? public static MultiDocValues.OrdinalMap getOrdinalMap(IndexParentChildFieldData indexParentChildFieldData, String type) { if (indexParentChildFieldData instanceof ParentChildIndexFieldData.GlobalFieldData) { return ((GlobalFieldData) indexParentChildFieldData).ordinalMapPerType.get(type).ordMap; } else { // one segment, local ordinals are global return null; } }
@Override public SortedDocValues getSortedDocValues(String field) throws IOException { ensureOpen(); OrdinalMap map = null; synchronized (cachedOrdMaps) { map = cachedOrdMaps.get(field); if (map == null) { // uncached, or not a multi dv SortedDocValues dv = MultiDocValues.getSortedValues(in, field); if (dv instanceof MultiSortedDocValues) { map = ((MultiSortedDocValues)dv).mapping; if (map.owner == getCoreCacheKey()) { cachedOrdMaps.put(field, map); } } return dv; } } // cached ordinal map if (getFieldInfos().fieldInfo(field).getDocValuesType() != DocValuesType.SORTED) { return null; } int size = in.leaves().size(); final SortedDocValues[] values = new SortedDocValues[size]; final int[] starts = new int[size+1]; for (int i = 0; i < size; i++) { AtomicReaderContext context = in.leaves().get(i); SortedDocValues v = context.reader().getSortedDocValues(field); if (v == null) { v = DocValues.emptySorted(); } values[i] = v; starts[i] = context.docBase; } starts[size] = maxDoc(); return new MultiSortedDocValues(values, starts, map); }
InternalGlobalOrdinalsIndexFieldData(Index index, Settings settings, MappedFieldType.Names fieldNames, FieldDataType fieldDataType, AtomicOrdinalsFieldData[] segmentAfd, OrdinalMap ordinalMap, long memorySizeInBytes) { super(index, settings, fieldNames, fieldDataType, memorySizeInBytes); this.atomicReaders = new Atomic[segmentAfd.length]; for (int i = 0; i < segmentAfd.length; i++) { atomicReaders[i] = new Atomic(segmentAfd[i], ordinalMap, i); } }
@Override public IndexParentChildFieldData localGlobalDirect(DirectoryReader indexReader) throws Exception { final long startTime = System.nanoTime(); final Set<String> parentTypes; if (Version.indexCreated(indexSettings()).before(Version.V_2_0_0_beta1)) { synchronized (lock) { parentTypes = ImmutableSet.copyOf(this.parentTypes); } } else { parentTypes = this.parentTypes; } long ramBytesUsed = 0; final Map<String, OrdinalMapAndAtomicFieldData> perType = new HashMap<>(); for (String type : parentTypes) { final AtomicParentChildFieldData[] fieldData = new AtomicParentChildFieldData[indexReader.leaves().size()]; for (LeafReaderContext context : indexReader.leaves()) { fieldData[context.ord] = load(context); } final OrdinalMap ordMap = buildOrdinalMap(fieldData, type); ramBytesUsed += ordMap.ramBytesUsed(); perType.put(type, new OrdinalMapAndAtomicFieldData(ordMap, fieldData)); } final AtomicParentChildFieldData[] fielddata = new AtomicParentChildFieldData[indexReader.leaves().size()]; for (int i = 0; i < fielddata.length; ++i) { fielddata[i] = new GlobalAtomicFieldData(parentTypes, perType, i); } breakerService.getBreaker(CircuitBreaker.FIELDDATA).addWithoutBreaking(ramBytesUsed); if (logger.isDebugEnabled()) { logger.debug( "Global-ordinals[_parent] took {}", new TimeValue(System.nanoTime() - startTime, TimeUnit.NANOSECONDS) ); } return new GlobalFieldData(indexReader, fielddata, ramBytesUsed, perType); }
/** accumulates per-segment single-valued facet counts */ static void accumSingle(int counts[], int startTermIndex, SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { if (startTermIndex == -1 && (map == null || si.getValueCount() < disi.cost()*10)) { // no prefixing, not too many unique values wrt matching docs (lucene/facets heuristic): // collect separately per-segment, then map to global ords accumSingleSeg(counts, si, disi, subIndex, map); } else { // otherwise: do collect+map on the fly accumSingleGeneric(counts, startTermIndex, si, disi, subIndex, map); } }
/** accumulates per-segment single-valued facet counts, mapping to global ordinal space on-the-fly */ static void accumSingleGeneric(int counts[], int startTermIndex, SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { final LongValues ordmap = map == null ? null : map.getGlobalOrds(subIndex); int doc; while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { int term = si.getOrd(doc); if (map != null && term >= 0) { term = (int) ordmap.get(term); } int arrIdx = term-startTermIndex; if (arrIdx>=0 && arrIdx<counts.length) counts[arrIdx]++; } }
/** accumulates per-segment multi-valued facet counts */ static void accumMulti(int counts[], int startTermIndex, SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { if (startTermIndex == -1 && (map == null || si.getValueCount() < disi.cost()*10)) { // no prefixing, not too many unique values wrt matching docs (lucene/facets heuristic): // collect separately per-segment, then map to global ords accumMultiSeg(counts, si, disi, subIndex, map); } else { // otherwise: do collect+map on the fly accumMultiGeneric(counts, startTermIndex, si, disi, subIndex, map); } }
/** "typical" multi-valued faceting: not too many unique values, no prefixing. maps to global ordinals as a separate step */ static void accumMultiSeg(int counts[], SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { // First count in seg-ord space: final int segCounts[]; if (map == null) { segCounts = counts; } else { segCounts = new int[1+(int)si.getValueCount()]; } int doc; while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { si.setDocument(doc); int term = (int) si.nextOrd(); if (term < 0) { counts[0]++; // missing } else { do { segCounts[1+term]++; } while ((term = (int)si.nextOrd()) >= 0); } } // migrate to global ords (if necessary) if (map != null) { migrateGlobal(counts, segCounts, subIndex, map); } }
/** folds counts in segment ordinal space (segCounts) into global ordinal space (counts) */ static void migrateGlobal(int counts[], int segCounts[], int subIndex, OrdinalMap map) { final LongValues ordMap = map.getGlobalOrds(subIndex); // missing count counts[0] += segCounts[0]; // migrate actual ordinals for (int ord = 1; ord < segCounts.length; ord++) { int count = segCounts[ord]; if (count != 0) { counts[1+(int) ordMap.get(ord-1)] += count; } } }
public long accumulateObject(Object o, long shallowSize, java.util.Map<Field,Object> fieldValues, java.util.Collection<Object> queue) { if (o == LongValues.IDENTITY) { return 0L; } if (o instanceof OrdinalMap) { fieldValues = new HashMap<>(fieldValues); fieldValues.remove(ORDINAL_MAP_OWNER_FIELD); } return super.accumulateObject(o, shallowSize, fieldValues, queue); }
@Override public SortedDocValues getSortedDocValues(String field) throws IOException { ensureOpen(); OrdinalMap map = null; synchronized (cachedOrdMaps) { map = cachedOrdMaps.get(field); if (map == null) { // uncached, or not a multi dv SortedDocValues dv = MultiDocValues.getSortedValues(in, field); if (dv instanceof MultiSortedDocValues) { map = ((MultiSortedDocValues)dv).mapping; if (map.owner == getCoreCacheKey()) { cachedOrdMaps.put(field, map); } } return dv; } } // cached ordinal map if (getFieldInfos().fieldInfo(field).getDocValuesType() != DocValuesType.SORTED) { return null; } int size = in.leaves().size(); final SortedDocValues[] values = new SortedDocValues[size]; final int[] starts = new int[size+1]; for (int i = 0; i < size; i++) { AtomicReaderContext context = in.leaves().get(i); SortedDocValues v = context.reader().getSortedDocValues(field); if (v == null) { v = SortedDocValues.EMPTY; } values[i] = v; starts[i] = context.docBase; } starts[size] = maxDoc(); return new MultiSortedDocValues(values, starts, map); }
/** accumulates per-segment single-valued facet counts, mapping to global ordinal space */ // specialized since the single-valued case is simpler: you don't have to deal with missing count, etc static void accumSingle(int counts[], int startTermIndex, SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { int doc; while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { int term = si.getOrd(doc); if (map != null) { term = (int) map.getGlobalOrd(subIndex, term); } int arrIdx = term-startTermIndex; if (arrIdx>=0 && arrIdx<counts.length) counts[arrIdx]++; } }
/** accumulates per-segment single-valued facet counts, mapping to global ordinal space on-the-fly */ static void accumSingleGeneric(int counts[], int startTermIndex, SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { int doc; while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { int term = si.getOrd(doc); if (map != null && term >= 0) { term = (int) map.getGlobalOrd(subIndex, term); } int arrIdx = term-startTermIndex; if (arrIdx>=0 && arrIdx<counts.length) counts[arrIdx]++; } }