public TermsHash(final DocumentsWriterPerThread docWriter, final TermsHashConsumer consumer, boolean trackAllocations, final TermsHash nextTermsHash) { this.docState = docWriter.docState; this.consumer = consumer; this.trackAllocations = trackAllocations; this.nextTermsHash = nextTermsHash; this.bytesUsed = trackAllocations ? docWriter.bytesUsed : Counter.newCounter(); intPool = new IntBlockPool(docWriter.intBlockAllocator); bytePool = new ByteBlockPool(docWriter.byteBlockAllocator); if (nextTermsHash != null) { // We are primary primary = true; termBytePool = bytePool; nextTermsHash.termBytePool = bytePool; } else { primary = false; } }
TermsHash(final DocumentsWriterPerThread docWriter, boolean trackAllocations, TermsHash nextTermsHash) { this.docState = docWriter.docState; this.trackAllocations = trackAllocations; this.nextTermsHash = nextTermsHash; this.bytesUsed = trackAllocations ? docWriter.bytesUsed : Counter.newCounter(); intPool = new IntBlockPool(docWriter.intBlockAllocator); bytePool = new ByteBlockPool(docWriter.byteBlockAllocator); if (nextTermsHash != null) { // We are primary termBytePool = bytePool; nextTermsHash.termBytePool = bytePool; } }
public void add(int textStart) throws IOException { int termID = bytesHash.addByPoolOffset(textStart); if (termID >= 0) { // New posting // First time we are seeing this token since we last // flushed the hash. // Init stream slices if (numPostingInt + intPool.intUpto > IntBlockPool.INT_BLOCK_SIZE) { intPool.nextBuffer(); } if (ByteBlockPool.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt*ByteBlockPool.FIRST_LEVEL_SIZE) { bytePool.nextBuffer(); } intUptos = intPool.buffer; intUptoStart = intPool.intUpto; intPool.intUpto += streamCount; postingsArray.intStarts[termID] = intUptoStart + intPool.intOffset; for(int i=0;i<streamCount;i++) { final int upto = bytePool.newSlice(ByteBlockPool.FIRST_LEVEL_SIZE); intUptos[intUptoStart+i] = upto + bytePool.byteOffset; } postingsArray.byteStarts[termID] = intUptos[intUptoStart]; newTerm(termID); } else { termID = (-termID)-1; int intStart = postingsArray.intStarts[termID]; intUptos = intPool.buffers[intStart >> IntBlockPool.INT_BLOCK_SHIFT]; intUptoStart = intStart & IntBlockPool.INT_BLOCK_MASK; addTerm(termID); } }
@Override public int[] getIntBlock() { int[] b = new int[IntBlockPool.INT_BLOCK_SIZE]; bytesUsed.addAndGet(IntBlockPool.INT_BLOCK_SIZE * RamUsageEstimator.NUM_BYTES_INT); return b; }
public void testSingleWriterReader() { Counter bytesUsed = Counter.newCounter(); IntBlockPool pool = new IntBlockPool(new ByteTrackingAllocator(bytesUsed)); for (int j = 0; j < 2; j++) { IntBlockPool.SliceWriter writer = new IntBlockPool.SliceWriter(pool); int start = writer.startNewSlice(); int num = atLeast(100); for (int i = 0; i < num; i++) { writer.writeInt(i); } int upto = writer.getCurrentOffset(); IntBlockPool.SliceReader reader = new IntBlockPool.SliceReader(pool); reader.reset(start, upto); for (int i = 0; i < num; i++) { assertEquals(i, reader.readInt()); } assertTrue(reader.endOfSlice()); if (random().nextBoolean()) { pool.reset(true, false); assertEquals(0, bytesUsed.get()); } else { pool.reset(true, true); assertEquals(IntBlockPool.INT_BLOCK_SIZE * RamUsageEstimator.NUM_BYTES_INT, bytesUsed.get()); } } }
private void assertReader(IntBlockPool.SliceReader reader, StartEndAndValues values) { reader.reset(values.start, values.end); for (int i = 0; i < values.valueCount; i++) { assertEquals(values.valueOffset + i, reader.readInt()); } assertTrue(reader.endOfSlice()); }
public void add(int textStart) throws IOException { int termID = bytesHash.addByPoolOffset(textStart); if (termID >= 0) { // New posting // First time we are seeing this token since we last // flushed the hash. // Init stream slices if (numPostingInt + intPool.intUpto > IntBlockPool.INT_BLOCK_SIZE) intPool.nextBuffer(); if (ByteBlockPool.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt*ByteBlockPool.FIRST_LEVEL_SIZE) { bytePool.nextBuffer(); } intUptos = intPool.buffer; intUptoStart = intPool.intUpto; intPool.intUpto += streamCount; postingsArray.intStarts[termID] = intUptoStart + intPool.intOffset; for(int i=0;i<streamCount;i++) { final int upto = bytePool.newSlice(ByteBlockPool.FIRST_LEVEL_SIZE); intUptos[intUptoStart+i] = upto + bytePool.byteOffset; } postingsArray.byteStarts[termID] = intUptos[intUptoStart]; consumer.newTerm(termID); } else { termID = (-termID)-1; int intStart = postingsArray.intStarts[termID]; intUptos = intPool.buffers[intStart >> IntBlockPool.INT_BLOCK_SHIFT]; intUptoStart = intStart & IntBlockPool.INT_BLOCK_MASK; consumer.addTerm(termID); } }
/** Called once per inverted token. This is the primary * entry point (for first TermsHash); postings use this * API. */ void add() throws IOException { termAtt.fillBytesRef(); // We are first in the chain so we must "intern" the // term text into textStart address // Get the text & hash of this term. int termID = bytesHash.add(termBytesRef); //System.out.println("add term=" + termBytesRef.utf8ToString() + " doc=" + docState.docID + " termID=" + termID); if (termID >= 0) {// New posting bytesHash.byteStart(termID); // Init stream slices if (numPostingInt + intPool.intUpto > IntBlockPool.INT_BLOCK_SIZE) { intPool.nextBuffer(); } if (ByteBlockPool.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt*ByteBlockPool.FIRST_LEVEL_SIZE) { bytePool.nextBuffer(); } intUptos = intPool.buffer; intUptoStart = intPool.intUpto; intPool.intUpto += streamCount; postingsArray.intStarts[termID] = intUptoStart + intPool.intOffset; for(int i=0;i<streamCount;i++) { final int upto = bytePool.newSlice(ByteBlockPool.FIRST_LEVEL_SIZE); intUptos[intUptoStart+i] = upto + bytePool.byteOffset; } postingsArray.byteStarts[termID] = intUptos[intUptoStart]; newTerm(termID); } else { termID = (-termID)-1; int intStart = postingsArray.intStarts[termID]; intUptos = intPool.buffers[intStart >> IntBlockPool.INT_BLOCK_SHIFT]; intUptoStart = intStart & IntBlockPool.INT_BLOCK_MASK; addTerm(termID); } if (doNextCall) { nextPerField.add(postingsArray.textStarts[termID]); } }
public IntBlockAllocator(Counter bytesUsed) { super(IntBlockPool.INT_BLOCK_SIZE); this.bytesUsed = bytesUsed; }
@Override public void recycleIntBlocks(int[][] blocks, int offset, int length) { bytesUsed.addAndGet(-(length * (IntBlockPool.INT_BLOCK_SIZE * RamUsageEstimator.NUM_BYTES_INT))); }
public ByteTrackingAllocator(Counter bytesUsed) { this(IntBlockPool.INT_BLOCK_SIZE, bytesUsed); }
@Override void add() throws IOException { // We are first in the chain so we must "intern" the // term text into textStart address // Get the text & hash of this term. int termID; try { termID = bytesHash.add(termBytesRef, termAtt.fillBytesRef()); } catch (MaxBytesLengthExceededException e) { // Not enough room in current block // Just skip this term, to remain as robust as // possible during indexing. A TokenFilter // can be inserted into the analyzer chain if // other behavior is wanted (pruning the term // to a prefix, throwing an exception, etc). if (docState.maxTermPrefix == null) { final int saved = termBytesRef.length; try { termBytesRef.length = Math.min(30, DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8); docState.maxTermPrefix = termBytesRef.toString(); } finally { termBytesRef.length = saved; } } consumer.skippingLongTerm(); return; } if (termID >= 0) {// New posting bytesHash.byteStart(termID); // Init stream slices if (numPostingInt + intPool.intUpto > IntBlockPool.INT_BLOCK_SIZE) { intPool.nextBuffer(); } if (ByteBlockPool.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt*ByteBlockPool.FIRST_LEVEL_SIZE) { bytePool.nextBuffer(); } intUptos = intPool.buffer; intUptoStart = intPool.intUpto; intPool.intUpto += streamCount; postingsArray.intStarts[termID] = intUptoStart + intPool.intOffset; for(int i=0;i<streamCount;i++) { final int upto = bytePool.newSlice(ByteBlockPool.FIRST_LEVEL_SIZE); intUptos[intUptoStart+i] = upto + bytePool.byteOffset; } postingsArray.byteStarts[termID] = intUptos[intUptoStart]; consumer.newTerm(termID); } else { termID = (-termID)-1; final int intStart = postingsArray.intStarts[termID]; intUptos = intPool.buffers[intStart >> IntBlockPool.INT_BLOCK_SHIFT]; intUptoStart = intStart & IntBlockPool.INT_BLOCK_MASK; consumer.addTerm(termID); } if (doNextCall) nextPerField.add(postingsArray.textStarts[termID]); }