public SortedSetDocValues getDocTermOrds(AtomicReader reader, String field) throws IOException { SortedSetDocValues dv = reader.getSortedSetDocValues(field); if (dv != null) { return dv; } SortedDocValues sdv = reader.getSortedDocValues(field); if (sdv != null) { return DocValues.singleton(sdv); } final FieldInfo info = reader.getFieldInfos().fieldInfo(field); if (info == null) { return DocValues.emptySortedSet(); } else if (info.hasDocValues()) { throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); } else if (!info.isIndexed()) { return DocValues.emptySortedSet(); } DocTermOrds dto = (DocTermOrds) caches.get(DocTermOrds.class).get(reader, new CacheKey(field, null), false); return dto.iterator(reader); }
public LocalEnv(int offset, int limit, int startTermIndex, int adjust, int targetIdx, int nTerms, Predicate<BytesRef> termFilter, int mincount, int[] counts, CharsRefBuilder charsRef, boolean extend, SortedSetDocValues si, SolrIndexSearcher searcher, List<Entry<LeafReader, Bits>> leaves, String fieldName, T ft, NamedList res) { super(offset, limit, targetIdx, mincount, fieldName, ft, res); if (startTermIndex == -1) { // weird case where missing is counted at counts[0]. this.startTermOrd = 0; this.endTermOrd = nTerms - 1; } else if (startTermIndex >= 0) { this.startTermOrd = startTermIndex; this.endTermOrd = startTermIndex + nTerms; } else { throw new IllegalStateException(); } this.startTermIndex = startTermIndex; this.adjust = adjust; this.nTerms = nTerms; this.termFilter = termFilter; this.counts = counts; this.charsRef = charsRef; this.extend = extend; this.si = si; this.searcher = searcher; this.leaves = leaves; }
/** accumulates per-segment multi-valued facet counts, mapping to global ordinal space on-the-fly */ static void accumMultiGeneric(int counts[], int startTermIndex, SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { final LongValues ordMap = map == null ? null : map.getGlobalOrds(subIndex); int doc; while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { si.setDocument(doc); // strange do-while to collect the missing count (first ord is NO_MORE_ORDS) int term = (int) si.nextOrd(); if (term < 0) { if (startTermIndex == -1) { counts[0]++; // missing count } continue; } do { if (map != null) { term = (int) ordMap.get(term); } int arrIdx = term-startTermIndex; if (arrIdx>=0 && arrIdx<counts.length) counts[arrIdx]++; } while ((term = (int) si.nextOrd()) >= 0); } }
@Override public void collect(int doc) throws IOException { fromDocTermOrds.setDocument(doc); long ord; while ((ord = fromDocTermOrds.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { int termID = collectedTerms.add(fromDocTermOrds.lookupOrd(ord)); if (termID < 0) { termID = -termID - 1; } else { if (termID >= scoreSums.length) { scoreSums = ArrayUtil.grow(scoreSums); } } switch (scoreMode) { case Total: scoreSums[termID] += scorer.score(); break; case Max: scoreSums[termID] = Math.max(scoreSums[termID], scorer.score()); } } }
@Override public void collect(int doc) throws IOException { fromDocTermOrds.setDocument(doc); long ord; while ((ord = fromDocTermOrds.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { int termID = collectedTerms.add(fromDocTermOrds.lookupOrd(ord)); if (termID < 0) { termID = -termID - 1; } else { if (termID >= scoreSums.length) { scoreSums = ArrayUtil.grow(scoreSums); scoreCounts = ArrayUtil.grow(scoreCounts); } } scoreSums[termID] += scorer.score(); scoreCounts[termID]++; } }
public void testDocValues() throws IOException { assertU(adoc("id", "1", "floatdv", "4.5", "intdv", "-1", "intdv", "3", "stringdv", "value1", "stringdv", "value2")); assertU(commit()); try (SolrCore core = h.getCoreInc()) { final RefCounted<SolrIndexSearcher> searcherRef = core.openNewSearcher(true, true); final SolrIndexSearcher searcher = searcherRef.get(); try { final AtomicReader reader = searcher.getAtomicReader(); assertEquals(1, reader.numDocs()); final FieldInfos infos = reader.getFieldInfos(); assertEquals(DocValuesType.SORTED_SET, infos.fieldInfo("stringdv").getDocValuesType()); assertEquals(DocValuesType.SORTED_SET, infos.fieldInfo("floatdv").getDocValuesType()); assertEquals(DocValuesType.SORTED_SET, infos.fieldInfo("intdv").getDocValuesType()); SortedSetDocValues dv = reader.getSortedSetDocValues("stringdv"); dv.setDocument(0); assertEquals(0, dv.nextOrd()); assertEquals(1, dv.nextOrd()); assertEquals(SortedSetDocValues.NO_MORE_ORDS, dv.nextOrd()); } finally { searcherRef.decref(); } } }
@Override public void collect(int doc) throws IOException { fromDocTermOrds.setDocument(doc); long ord; while ((ord = fromDocTermOrds.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { fromDocTermOrds.lookupOrd(ord, scratch); int termID = collectedTerms.add(scratch); if (termID < 0) { termID = -termID - 1; } else { if (termID >= scoreSums.length) { scoreSums = ArrayUtil.grow(scoreSums); } } switch (scoreMode) { case Total: scoreSums[termID] += scorer.score(); break; case Max: scoreSums[termID] = Math.max(scoreSums[termID], scorer.score()); } } }
@Override public void collect(int doc) throws IOException { fromDocTermOrds.setDocument(doc); long ord; while ((ord = fromDocTermOrds.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { fromDocTermOrds.lookupOrd(ord, scratch); int termID = collectedTerms.add(scratch); if (termID < 0) { termID = -termID - 1; } else { if (termID >= scoreSums.length) { scoreSums = ArrayUtil.grow(scoreSums); } } scoreSums[termID] += scorer.score(); scoreCounts[termID]++; } }
public SortedSetDocValues getDocTermOrds(AtomicReader reader, String field) throws IOException { SortedSetDocValues dv = reader.getSortedSetDocValues(field); if (dv != null) { return dv; } SortedDocValues sdv = reader.getSortedDocValues(field); if (sdv != null) { return new SingletonSortedSetDocValues(sdv); } final FieldInfo info = reader.getFieldInfos().fieldInfo(field); if (info == null) { return SortedSetDocValues.EMPTY; } else if (info.hasDocValues()) { throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); } else if (!info.isIndexed()) { return SortedSetDocValues.EMPTY; } DocTermOrds dto = (DocTermOrds) caches.get(DocTermOrds.class).get(reader, new CacheKey(field, null), false); return dto.iterator(reader); }
/** accumulates per-segment multi-valued facet counts, mapping to global ordinal space */ static void accumMulti(int counts[], int startTermIndex, SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { int doc; while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { si.setDocument(doc); // strange do-while to collect the missing count (first ord is NO_MORE_ORDS) int term = (int) si.nextOrd(); if (term < 0) { if (startTermIndex == -1) { counts[0]++; // missing count } continue; } do { if (map != null) { term = (int) map.getGlobalOrd(subIndex, term); } int arrIdx = term-startTermIndex; if (arrIdx>=0 && arrIdx<counts.length) counts[arrIdx]++; } while ((term = (int) si.nextOrd()) >= 0); } }
@Override public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException { SortedSetDocValues sortedSetDocValues = _sortedSetDocValuesCache.get(field.number); if (sortedSetDocValues != null) { return sortedSetDocValues; } synchronized (_sortedSetDocValuesCache) { sortedSetDocValues = _sortedSetDocValuesCache.get(field.number); if (sortedSetDocValues != null) { return sortedSetDocValues; } sortedSetDocValues = newSortedSetDocValues(field); if (_cache && sortedSetDocValues != null) { _sortedSetDocValuesCache.put(field.number, sortedSetDocValues); } return sortedSetDocValues; } }
/** accumulates per-segment multi-valued facet counts, mapping to global ordinal space on-the-fly */ static void accumMultiGeneric(int counts[], int startTermIndex, SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { int doc; while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { si.setDocument(doc); // strange do-while to collect the missing count (first ord is NO_MORE_ORDS) int term = (int) si.nextOrd(); if (term < 0) { if (startTermIndex == -1) { counts[0]++; // missing count } continue; } do { if (map != null) { term = (int) map.getGlobalOrd(subIndex, term); } int arrIdx = term-startTermIndex; if (arrIdx>=0 && arrIdx<counts.length) counts[arrIdx]++; } while ((term = (int) si.nextOrd()) >= 0); } }
@Test public void testSortedSetDocValuesField() throws Exception { assumeTrue("default codec does not support SORTED_SET", defaultCodecSupportsSortedSet()); SortedSetDocValues dv = reader.getSortedSetDocValues(SORTED_SET_DV_FIELD); int maxDoc = reader.maxDoc(); BytesRef bytes = new BytesRef(); for (int i = 0; i < maxDoc; i++) { dv.setDocument(i); dv.lookupOrd(dv.nextOrd(), bytes); int value = sortedValues[i].intValue(); assertEquals("incorrect sorted-set DocValues for doc " + i, Integer.valueOf(value).toString(), bytes.utf8ToString()); dv.lookupOrd(dv.nextOrd(), bytes); assertEquals("incorrect sorted-set DocValues for doc " + i, Integer.valueOf(value + 1).toString(), bytes.utf8ToString()); assertEquals(SortedSetDocValues.NO_MORE_ORDS, dv.nextOrd()); } }
SortedSetRangeLeafCollector(SortedSetDocValues values, Range[] ranges, LeafBucketCollector sub) { super(sub, values); for (int i = 1; i < ranges.length; ++i) { if (RANGE_COMPARATOR.compare(ranges[i-1], ranges[i]) > 0) { throw new IllegalArgumentException("Ranges must be sorted"); } } this.values = values; this.sub = sub; froms = new long[ranges.length]; tos = new long[ranges.length]; // inclusive maxTos = new long[ranges.length]; for (int i = 0; i < ranges.length; ++i) { if (ranges[i].from == null) { froms[i] = 0; } else { froms[i] = values.lookupTerm(ranges[i].from); if (froms[i] < 0) { froms[i] = -1 - froms[i]; } } if (ranges[i].to == null) { tos[i] = values.getValueCount() - 1; } else { long ord = values.lookupTerm(ranges[i].to); if (ord < 0) { tos[i] = -2 - ord; } else { tos[i] = ord - 1; } } } maxTos[0] = tos[0]; for (int i = 1; i < tos.length; ++i) { maxTos[i] = Math.max(maxTos[i-1], tos[i]); } }
@Override public void collect(int doc, long bucket) throws IOException { values.setDocument(doc); int lo = 0; for (long ord = values.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = values.nextOrd()) { lo = collect(doc, ord, bucket, lo); } }
public LocalDocEnv(int offset, int limit, int startTermIndex, int adjust, int targetIdx, String targetDoc, int nTerms, Predicate<BytesRef> termFilter, int mincount, int[] counts, CharsRefBuilder charsRef, boolean extend, SortedSetDocValues si, SolrIndexSearcher searcher, DocSet docs, List<Entry<LeafReader, Bits>> leaves, String fieldName, T ft, NamedList res, Set<String> fl) { super(offset, limit, startTermIndex, adjust, targetIdx, nTerms, termFilter, mincount, counts, charsRef, extend, si, searcher, leaves, fieldName, ft, res); SchemaField uniqueKeyField = searcher.getSchema().getUniqueKeyField(); this.targetDoc = new BytesRef(targetDoc); this.idField = uniqueKeyField.getName(); this.sortField = uniqueKeyField.getSortField(false); this.idFieldComparator = this.sortField.getBytesComparator(); this.sort = new Sort(sortField); this.docs = docs; this.fl = fl; }
/** accumulates per-segment multi-valued facet counts */ static void accumMulti(int counts[], int startTermIndex, SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { if (startTermIndex == -1 && (map == null || si.getValueCount() < disi.cost()*10)) { // no prefixing, not too many unique values wrt matching docs (lucene/facets heuristic): // collect separately per-segment, then map to global ords accumMultiSeg(counts, si, disi, subIndex, map); } else { // otherwise: do collect+map on the fly accumMultiGeneric(counts, startTermIndex, si, disi, subIndex, map); } }
/** "typical" multi-valued faceting: not too many unique values, no prefixing. maps to global ordinals as a separate step */ static void accumMultiSeg(int counts[], SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { // First count in seg-ord space: final int segCounts[]; if (map == null) { segCounts = counts; } else { segCounts = new int[1+(int)si.getValueCount()]; } int doc; while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { si.setDocument(doc); int term = (int) si.nextOrd(); if (term < 0) { counts[0]++; // missing } else { do { segCounts[1+term]++; } while ((term = (int)si.nextOrd()) >= 0); } } // migrate to global ords (if necessary) if (map != null) { migrateGlobal(counts, segCounts, subIndex, map); } }
private void buildFieldMap( ResponseBuilder rb ) throws IOException { Log.debug( "buildFieldMap" ); SolrIndexSearcher searcher = rb.req.getSearcher(); // build a synonym map from the SortedDocValues - // for each field value: lower case, stemmed, lookup synonyms from synonyms.txt - map to fieldValue SynonymMap.Builder fieldBuilder = new SynonymMap.Builder( true ); SynonymMap.Builder termBuilder = new SynonymMap.Builder( true ); ArrayList<String> searchFields = getStringFields( searcher ); for (String searchField : searchFields ) { Log.debug( "adding searchField " + searchField ); CharsRef fieldChars = new CharsRef( searchField ); SortedSetDocValues sdv = FieldCache.DEFAULT.getDocTermOrds( searcher.getAtomicReader( ), searchField ); if (sdv == null) continue; Log.debug( "got SortedSetDocValues for " + searchField ); TermsEnum te = sdv.termsEnum(); while (te.next() != null) { BytesRef term = te.term(); String fieldValue = term.utf8ToString( ); addTerm ( fieldChars, fieldValue, fieldBuilder, termBuilder ); } } addDistributedTerms( rb, fieldBuilder, termBuilder, searchFields ); fieldMap = fieldBuilder.build( ); termMap = termBuilder.build( ); }
@Override public void collect(int doc) throws IOException { int groupOrd = groupFieldTermsIndex.getOrd(doc); if (facetFieldNumTerms == 0) { int segmentGroupedFacetsIndex = groupOrd * (facetFieldNumTerms + 1); if (facetPrefix != null || segmentGroupedFacetHits.exists(segmentGroupedFacetsIndex)) { return; } segmentTotalCount++; segmentFacetCounts[facetFieldNumTerms]++; segmentGroupedFacetHits.put(segmentGroupedFacetsIndex); BytesRef groupKey; if (groupOrd == -1) { groupKey = null; } else { groupKey = BytesRef.deepCopyOf(groupFieldTermsIndex.lookupOrd(groupOrd)); } groupedFacetHits.add(new GroupedFacetHit(groupKey, null)); return; } facetFieldDocTermOrds.setDocument(doc); long ord; boolean empty = true; while ((ord = facetFieldDocTermOrds.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { process(groupOrd, (int) ord); empty = false; } if (empty) { process(groupOrd, facetFieldNumTerms); // this facet ord is reserved for docs not containing facet field. } }
@Override public SortedSetDocValues getSortedSetDocValues(String field) throws IOException { SortedSetDocValues sortedSetDV = in.getSortedSetDocValues(field); if (sortedSetDV == null) { return null; } else { return new SortingSortedSetDocValues(sortedSetDV, docMap); } }
@Test public void testSortedSetDocValuesField() throws Exception { assumeTrue("default codec does not support SORTED_SET", defaultCodecSupportsSortedSet()); SortedSetDocValues dv = reader.getSortedSetDocValues(SORTED_SET_DV_FIELD); int maxDoc = reader.maxDoc(); for (int i = 0; i < maxDoc; i++) { dv.setDocument(i); BytesRef bytes = dv.lookupOrd(dv.nextOrd()); int value = sortedValues[i].intValue(); assertEquals("incorrect sorted-set DocValues for doc " + i, Integer.valueOf(value).toString(), bytes.utf8ToString()); bytes = dv.lookupOrd(dv.nextOrd()); assertEquals("incorrect sorted-set DocValues for doc " + i, Integer.valueOf(value + 1).toString(), bytes.utf8ToString()); assertEquals(SortedSetDocValues.NO_MORE_ORDS, dv.nextOrd()); } }
@Override public void collect(int doc) throws IOException { docTermOrds.setDocument(doc); long ord; while ((ord = docTermOrds.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { final BytesRef term = docTermOrds.lookupOrd(ord); collectorTerms.add(term); } }
@Override public FieldComparator<?> getComparator(int numHits, int sortPos) throws IOException { return new FieldComparator.TermOrdValComparator(numHits, getField(), missingValue == STRING_LAST) { @Override protected SortedDocValues getSortedDocValues(AtomicReaderContext context, String field) throws IOException { SortedSetDocValues sortedSet = FieldCache.DEFAULT.getDocTermOrds(context.reader(), field); if (sortedSet.getValueCount() >= Integer.MAX_VALUE) { throw new UnsupportedOperationException("fields containing more than " + (Integer.MAX_VALUE-1) + " unique terms are unsupported"); } SortedDocValues singleton = DocValues.unwrapSingleton(sortedSet); if (singleton != null) { // it's actually single-valued in practice, but indexed as multi-valued, // so just sort on the underlying single-valued dv directly. // regardless of selector type, this optimization is safe! return singleton; } else if (selector == Selector.MIN) { return new MinValue(sortedSet); } else { if (sortedSet instanceof RandomAccessOrds == false) { throw new UnsupportedOperationException("codec does not support random access ordinals, cannot use selector: " + selector); } RandomAccessOrds randomOrds = (RandomAccessOrds) sortedSet; switch(selector) { case MAX: return new MaxValue(randomOrds); case MIDDLE_MIN: return new MiddleMinValue(randomOrds); case MIDDLE_MAX: return new MiddleMaxValue(randomOrds); case MIN: default: throw new AssertionError(); } } } }; }