@Override protected NumericDocValues getKeys(LeafReaderContext context) { try { values = valuesSource.bytesValues(context); } catch (IOException e) { throw new ElasticsearchException("Error reading values", e); } return new NumericDocValues() { @Override public long get(int doc) { values.setDocument(doc); final int valuesCount = values.count(); if (valuesCount > 1) { throw new IllegalArgumentException("Sample diversifying key must be a single valued-field"); } if (valuesCount == 1) { final BytesRef bytes = values.valueAt(0); return bytes.hashCode(); } return 0; } }; }
@Override protected NumericDocValues getKeys(LeafReaderContext context) { try { values = valuesSource.longValues(context); } catch (IOException e) { throw new ElasticsearchException("Error reading values", e); } return new NumericDocValues() { @Override public long get(int doc) { values.setDocument(doc); final int valuesCount = values.count(); if (valuesCount > 1) { throw new IllegalArgumentException("Sample diversifying key must be a single valued-field"); } if (valuesCount == 1) { return values.valueAt(0); } return Long.MIN_VALUE; } }; }
public void testSingleValuedLongs() throws Exception { final int numDocs = scaledRandomIntBetween(1, 100); final long[] array = new long[numDocs]; final FixedBitSet docsWithValue = randomBoolean() ? null : new FixedBitSet(numDocs); for (int i = 0; i < array.length; ++i) { if (randomBoolean()) { array[i] = randomLong(); if (docsWithValue != null) { docsWithValue.set(i); } } else if (docsWithValue != null && randomBoolean()) { docsWithValue.set(i); } } final NumericDocValues singleValues = new NumericDocValues() { @Override public long get(int docID) { return array[docID]; } }; final SortedNumericDocValues multiValues = DocValues.singleton(singleValues, docsWithValue); verify(multiValues, numDocs); final FixedBitSet rootDocs = randomRootDocs(numDocs); final FixedBitSet innerDocs = randomInnerDocs(rootDocs); verify(multiValues, numDocs, rootDocs, innerDocs); }
private static FixedBitSet getSeqNosSet(final IndexReader reader, final long highestSeqNo) throws IOException { // _seq_no are stored as doc values for the time being, so this is how we get them // (as opposed to using an IndexSearcher or IndexReader) final FixedBitSet bitSet = new FixedBitSet((int) highestSeqNo + 1); final List<LeafReaderContext> leaves = reader.leaves(); if (leaves.isEmpty()) { return bitSet; } for (int i = 0; i < leaves.size(); i++) { final LeafReader leaf = leaves.get(i).reader(); final NumericDocValues values = leaf.getNumericDocValues(SeqNoFieldMapper.NAME); if (values == null) { continue; } final Bits bits = leaf.getLiveDocs(); for (int docID = 0; docID < leaf.maxDoc(); docID++) { if (bits == null || bits.get(docID)) { final long seqNo = values.get(docID); assertFalse("should not have more than one document with the same seq_no[" + seqNo + "]", bitSet.get((int) seqNo)); bitSet.set((int) seqNo); } } } return bitSet; }
private NumericDocValues loadByteField(FieldInfo field, IndexInput input) throws IOException { CodecUtil.checkHeader(input, Lucene40DocValuesFormat.INTS_CODEC_NAME, Lucene40DocValuesFormat.INTS_VERSION_START, Lucene40DocValuesFormat.INTS_VERSION_CURRENT); int valueSize = input.readInt(); if (valueSize != 1) { throw new CorruptIndexException("invalid valueSize: " + valueSize); } int maxDoc = state.segmentInfo.getDocCount(); final byte values[] = new byte[maxDoc]; input.readBytes(values, 0, values.length); ramBytesUsed.addAndGet(RamUsageEstimator.sizeOf(values)); return new NumericDocValues() { @Override public long get(int docID) { return values[docID]; } }; }
private NumericDocValues loadShortField(FieldInfo field, IndexInput input) throws IOException { CodecUtil.checkHeader(input, Lucene40DocValuesFormat.INTS_CODEC_NAME, Lucene40DocValuesFormat.INTS_VERSION_START, Lucene40DocValuesFormat.INTS_VERSION_CURRENT); int valueSize = input.readInt(); if (valueSize != 2) { throw new CorruptIndexException("invalid valueSize: " + valueSize); } int maxDoc = state.segmentInfo.getDocCount(); final short values[] = new short[maxDoc]; for (int i = 0; i < values.length; i++) { values[i] = input.readShort(); } ramBytesUsed.addAndGet(RamUsageEstimator.sizeOf(values)); return new NumericDocValues() { @Override public long get(int docID) { return values[docID]; } }; }
private NumericDocValues loadIntField(FieldInfo field, IndexInput input) throws IOException { CodecUtil.checkHeader(input, Lucene40DocValuesFormat.INTS_CODEC_NAME, Lucene40DocValuesFormat.INTS_VERSION_START, Lucene40DocValuesFormat.INTS_VERSION_CURRENT); int valueSize = input.readInt(); if (valueSize != 4) { throw new CorruptIndexException("invalid valueSize: " + valueSize); } int maxDoc = state.segmentInfo.getDocCount(); final int values[] = new int[maxDoc]; for (int i = 0; i < values.length; i++) { values[i] = input.readInt(); } ramBytesUsed.addAndGet(RamUsageEstimator.sizeOf(values)); return new NumericDocValues() { @Override public long get(int docID) { return values[docID]; } }; }
private NumericDocValues loadLongField(FieldInfo field, IndexInput input) throws IOException { CodecUtil.checkHeader(input, Lucene40DocValuesFormat.INTS_CODEC_NAME, Lucene40DocValuesFormat.INTS_VERSION_START, Lucene40DocValuesFormat.INTS_VERSION_CURRENT); int valueSize = input.readInt(); if (valueSize != 8) { throw new CorruptIndexException("invalid valueSize: " + valueSize); } int maxDoc = state.segmentInfo.getDocCount(); final long values[] = new long[maxDoc]; for (int i = 0; i < values.length; i++) { values[i] = input.readLong(); } ramBytesUsed.addAndGet(RamUsageEstimator.sizeOf(values)); return new NumericDocValues() { @Override public long get(int docID) { return values[docID]; } }; }
private NumericDocValues loadFloatField(FieldInfo field, IndexInput input) throws IOException { CodecUtil.checkHeader(input, Lucene40DocValuesFormat.FLOATS_CODEC_NAME, Lucene40DocValuesFormat.FLOATS_VERSION_START, Lucene40DocValuesFormat.FLOATS_VERSION_CURRENT); int valueSize = input.readInt(); if (valueSize != 4) { throw new CorruptIndexException("invalid valueSize: " + valueSize); } int maxDoc = state.segmentInfo.getDocCount(); final int values[] = new int[maxDoc]; for (int i = 0; i < values.length; i++) { values[i] = input.readInt(); } ramBytesUsed.addAndGet(RamUsageEstimator.sizeOf(values)); return new NumericDocValues() { @Override public long get(int docID) { return values[docID]; } }; }
private NumericDocValues loadDoubleField(FieldInfo field, IndexInput input) throws IOException { CodecUtil.checkHeader(input, Lucene40DocValuesFormat.FLOATS_CODEC_NAME, Lucene40DocValuesFormat.FLOATS_VERSION_START, Lucene40DocValuesFormat.FLOATS_VERSION_CURRENT); int valueSize = input.readInt(); if (valueSize != 8) { throw new CorruptIndexException("invalid valueSize: " + valueSize); } int maxDoc = state.segmentInfo.getDocCount(); final long values[] = new long[maxDoc]; for (int i = 0; i < values.length; i++) { values[i] = input.readLong(); } ramBytesUsed.addAndGet(RamUsageEstimator.sizeOf(values)); return new NumericDocValues() { @Override public long get(int docID) { return values[docID]; } }; }
@Override public Bytes getBytes(AtomicReader reader, String field, ByteParser parser, boolean setDocsWithField) throws IOException { final NumericDocValues valuesIn = reader.getNumericDocValues(field); if (valuesIn != null) { // Not cached here by FieldCacheImpl (cached instead // per-thread by SegmentReader): return new Bytes() { @Override public byte get(int docID) { return (byte) valuesIn.get(docID); } }; } else { final FieldInfo info = reader.getFieldInfos().fieldInfo(field); if (info == null) { return Bytes.EMPTY; } else if (info.hasDocValues()) { throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); } else if (!info.isIndexed()) { return Bytes.EMPTY; } return (Bytes) caches.get(Byte.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField); } }
public Shorts getShorts(AtomicReader reader, String field, ShortParser parser, boolean setDocsWithField) throws IOException { final NumericDocValues valuesIn = reader.getNumericDocValues(field); if (valuesIn != null) { // Not cached here by FieldCacheImpl (cached instead // per-thread by SegmentReader): return new Shorts() { @Override public short get(int docID) { return (short) valuesIn.get(docID); } }; } else { final FieldInfo info = reader.getFieldInfos().fieldInfo(field); if (info == null) { return Shorts.EMPTY; } else if (info.hasDocValues()) { throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); } else if (!info.isIndexed()) { return Shorts.EMPTY; } return (Shorts) caches.get(Short.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField); } }
@Override public Ints getInts(AtomicReader reader, String field, IntParser parser, boolean setDocsWithField) throws IOException { final NumericDocValues valuesIn = reader.getNumericDocValues(field); if (valuesIn != null) { // Not cached here by FieldCacheImpl (cached instead // per-thread by SegmentReader): return new Ints() { @Override public int get(int docID) { return (int) valuesIn.get(docID); } }; } else { final FieldInfo info = reader.getFieldInfos().fieldInfo(field); if (info == null) { return Ints.EMPTY; } else if (info.hasDocValues()) { throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); } else if (!info.isIndexed()) { return Ints.EMPTY; } return (Ints) caches.get(Integer.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField); } }
@Override public Floats getFloats(AtomicReader reader, String field, FloatParser parser, boolean setDocsWithField) throws IOException { final NumericDocValues valuesIn = reader.getNumericDocValues(field); if (valuesIn != null) { // Not cached here by FieldCacheImpl (cached instead // per-thread by SegmentReader): return new Floats() { @Override public float get(int docID) { return Float.intBitsToFloat((int) valuesIn.get(docID)); } }; } else { final FieldInfo info = reader.getFieldInfos().fieldInfo(field); if (info == null) { return Floats.EMPTY; } else if (info.hasDocValues()) { throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); } else if (!info.isIndexed()) { return Floats.EMPTY; } return (Floats) caches.get(Float.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField); } }
@Override public Longs getLongs(AtomicReader reader, String field, FieldCache.LongParser parser, boolean setDocsWithField) throws IOException { final NumericDocValues valuesIn = reader.getNumericDocValues(field); if (valuesIn != null) { // Not cached here by FieldCacheImpl (cached instead // per-thread by SegmentReader): return new Longs() { @Override public long get(int docID) { return valuesIn.get(docID); } }; } else { final FieldInfo info = reader.getFieldInfos().fieldInfo(field); if (info == null) { return Longs.EMPTY; } else if (info.hasDocValues()) { throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); } else if (!info.isIndexed()) { return Longs.EMPTY; } return (Longs) caches.get(Long.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField); } }
@Override public Doubles getDoubles(AtomicReader reader, String field, FieldCache.DoubleParser parser, boolean setDocsWithField) throws IOException { final NumericDocValues valuesIn = reader.getNumericDocValues(field); if (valuesIn != null) { // Not cached here by FieldCacheImpl (cached instead // per-thread by SegmentReader): return new Doubles() { @Override public double get(int docID) { return Double.longBitsToDouble(valuesIn.get(docID)); } }; } else { final FieldInfo info = reader.getFieldInfos().fieldInfo(field); if (info == null) { return Doubles.EMPTY; } else if (info.hasDocValues()) { throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); } else if (!info.isIndexed()) { return Doubles.EMPTY; } return (Doubles) caches.get(Double.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField); } }
BM25DocScorer(BM25StatsFixed stats, NumericDocValues norms) throws IOException { this.stats = stats; this.weightValue = stats.weight * (k1 + 1); this.multK1minusB = stats.multK1minusB; if (norms != null) { this.multK1_b_InvAvgdl = stats.multK1_b_InvAvgdl; this.norms = norms; } else { /* * This is equivalent to setting b to 0 (which mimics behavior of the old BM25 implementation): * Because the norm function below will aways return 1, * the value of the variable denom in the function score below * would be: * k1*(1-b) + k1*b*1 = k1 - b*k1 + k1*b = k1 * */ this.multK1_b_InvAvgdl = k1 * b; this.norms = new NumericDocValues () { @Override public long get(int docID) { return 1; } }; } }
private Explanation explainTFNorm(int doc, Explanation freq, BM25StatsFixed stats, NumericDocValues norms) { List<Explanation> subs = new ArrayList<>(); subs.add(freq); subs.add(Explanation.match(k1, "parameter k1")); if (norms == null) { subs.add(Explanation.match(0, "parameter b (norms omitted for field)")); return Explanation.match( (freq.getValue() * (k1 + 1)) / (freq.getValue() + k1), "tfNorm, computed from:", subs); } else { float doclen = norms.get(doc); subs.add(Explanation.match(b, "parameter b")); subs.add(Explanation.match(stats.avgdl, "avgFieldLength")); subs.add(Explanation.match(doclen, "fieldLength")); return Explanation.match( (freq.getValue() * (k1 + 1)) / (freq.getValue() + k1 * (1 - b + b * doclen/stats.avgdl)), "tfNorm, computed from:", subs); } }
private Explanation explainTFNorm(int doc, Explanation freq, BM25Stats stats, NumericDocValues norms) { List<Explanation> subs = new ArrayList<>(); subs.add(freq); subs.add(Explanation.match(k1, "parameter k1")); if (norms == null) { subs.add(Explanation.match(0, "parameter b (norms omitted for field)")); return Explanation.match( (freq.getValue() * (k1 + 1)) / (freq.getValue() + k1), "tfNorm, computed from:", subs); } else { float doclen = decodeNormValue((byte)norms.get(doc)); subs.add(Explanation.match(b, "parameter b")); subs.add(Explanation.match(stats.avgdl, "avgFieldLength")); subs.add(Explanation.match(doclen, "fieldLength")); return Explanation.match( (freq.getValue() * (k1 + 1)) / (freq.getValue() + k1 * (1 - b + b * doclen/stats.avgdl)), "tfNorm, computed from:", subs); } }
@Override public NumericDocValues getNormValues(String field) { FieldInfo fieldInfo = fieldInfos.get(field); if (fieldInfo == null || fieldInfo.omitsNorms()) return null; NumericDocValues norms = cachedNormValues; Similarity sim = getSimilarity(); if (!field.equals(cachedFieldName) || sim != cachedSimilarity) { // not cached? Info info = getInfo(field); int numTokens = info != null ? info.numTokens : 0; int numOverlapTokens = info != null ? info.numOverlapTokens : 0; float boost = info != null ? info.getBoost() : 1.0f; FieldInvertState invertState = new FieldInvertState(field, 0, numTokens, numOverlapTokens, 0, boost); long value = sim.computeNorm(invertState); norms = new MemoryIndexNormDocValues(value); // cache it for future reuse cachedNormValues = norms; cachedFieldName = field; cachedSimilarity = sim; if (DEBUG) System.err.println("MemoryIndexReader.norms: " + field + ":" + value + ":" + numTokens); } return norms; }
private SortedDocValues newSortedInstance(final NumericDocValues docToOrd, final BinaryDocValues values, final int count) { return new SortedDocValues() { @Override public int getOrd(int docID) { return (int) docToOrd.get(docID); } @Override public BytesRef lookupOrd(int ord) { return values.get(ord); } @Override public int getValueCount() { return count; } // Leave lookupTerm to super's binary search // Leave termsEnum to super }; }
@Override public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException { IndexSearcher searcher = (IndexSearcher)context.get("searcher"); final TFIDFSimilarity similarity = IDFValueSource.asTFIDF(searcher.getSimilarity(), field); if (similarity == null) { throw new UnsupportedOperationException("requires a TFIDFSimilarity (such as DefaultSimilarity)"); } final NumericDocValues norms = readerContext.reader().getNormValues(field); if (norms == null) { return new ConstDoubleDocValues(0.0, this); } return new FloatDocValues(this) { @Override public float floatVal(int doc) { return similarity.decodeNormValue(norms.get(doc)); } }; }
/** * checks that norms are the same across all fields */ public void assertNormsEquals(String info, IndexReader leftReader, IndexReader rightReader) throws IOException { Fields leftFields = MultiFields.getFields(leftReader); Fields rightFields = MultiFields.getFields(rightReader); // Fields could be null if there are no postings, // but then it must be null for both if (leftFields == null || rightFields == null) { assertNull(info, leftFields); assertNull(info, rightFields); return; } for (String field : leftFields) { NumericDocValues leftNorms = MultiDocValues.getNormValues(leftReader, field); NumericDocValues rightNorms = MultiDocValues.getNormValues(rightReader, field); if (leftNorms != null && rightNorms != null) { assertDocValuesEquals(info, leftReader.maxDoc(), leftNorms, rightNorms); } else { assertNull(info, leftNorms); assertNull(info, rightNorms); } } }
public void testBasics() throws Exception { // sanity check of norms writer // TODO: generalize AtomicReader slow = SlowCompositeReaderWrapper.wrap(reader); NumericDocValues fooNorms = slow.getNormValues("foo"); NumericDocValues barNorms = slow.getNormValues("bar"); for (int i = 0; i < slow.maxDoc(); i++) { assertFalse(fooNorms.get(i) == barNorms.get(i)); } // sanity check of searching TopDocs foodocs = searcher.search(new TermQuery(new Term("foo", "brown")), 10); assertTrue(foodocs.totalHits > 0); TopDocs bardocs = searcher.search(new TermQuery(new Term("bar", "brown")), 10); assertTrue(bardocs.totalHits > 0); assertTrue(foodocs.scoreDocs[0].score < bardocs.scoreDocs[0].score); }
/** simple encode/decode */ public void testSimple() throws Exception { Directory dir = newDirectory(); int bitsPerValue = DirectWriter.bitsRequired(2); IndexOutput output = dir.createOutput("foo", IOContext.DEFAULT); DirectWriter writer = DirectWriter.getInstance(output, 5, bitsPerValue); writer.add(1); writer.add(0); writer.add(2); writer.add(1); writer.add(2); writer.finish(); output.close(); IndexInput input = dir.openInput("foo", IOContext.DEFAULT); NumericDocValues reader = DirectReader.getInstance(input.randomAccessSlice(0, input.length()), bitsPerValue); assertEquals(1, reader.get(0)); assertEquals(0, reader.get(1)); assertEquals(2, reader.get(2)); assertEquals(1, reader.get(3)); assertEquals(2, reader.get(4)); input.close(); dir.close(); }
private void doTestBpv(Directory directory, int bpv) throws Exception { MyRandom random = new MyRandom(random().nextLong()); for (int i = 0; i < 100; i++) { long original[] = randomLongs(random, bpv); int bitsRequired = bpv == 64 ? 64 : DirectWriter.bitsRequired(1L<<(bpv-1)); String name = "bpv" + bpv + "_" + i; IndexOutput output = directory.createOutput(name, IOContext.DEFAULT); DirectWriter writer = DirectWriter.getInstance(output, original.length, bitsRequired); for (int j = 0; j < original.length; j++) { writer.add(original[j]); } writer.finish(); output.close(); IndexInput input = directory.openInput(name, IOContext.DEFAULT); NumericDocValues reader = DirectReader.getInstance(input.randomAccessSlice(0, input.length()), bitsRequired); for (int j = 0; j < original.length; j++) { assertEquals("bpv=" + bpv, original[j], reader.get(j)); } input.close(); } }