/** * Turns the given BytesReference into a ByteBuf. Note: the returned ByteBuf will reference the internal * pages of the BytesReference. Don't free the bytes of reference before the ByteBuf goes out of scope. */ public static ByteBuf toByteBuf(final BytesReference reference) { if (reference.length() == 0) { return Unpooled.EMPTY_BUFFER; } if (reference instanceof ByteBufBytesReference) { return ((ByteBufBytesReference) reference).toByteBuf(); } else { final BytesRefIterator iterator = reference.iterator(); // usually we have one, two, or three components from the header, the message, and a buffer final List<ByteBuf> buffers = new ArrayList<>(3); try { BytesRef slice; while ((slice = iterator.next()) != null) { buffers.add(Unpooled.wrappedBuffer(slice.bytes, slice.offset, slice.length)); } final CompositeByteBuf composite = Unpooled.compositeBuffer(buffers.size()); composite.addComponents(true, buffers); return composite; } catch (IOException ex) { throw new AssertionError("no IO happens here", ex); } } }
public void testIteratorRandom() throws IOException { int length = randomIntBetween(10, PAGE_SIZE * randomIntBetween(2, 8)); BytesReference pbr = newBytesReference(length); if (randomBoolean()) { int sliceOffset = randomIntBetween(0, pbr.length()); int sliceLength = randomIntBetween(0, pbr.length() - sliceOffset); pbr = pbr.slice(sliceOffset, sliceLength); } if (randomBoolean()) { pbr = new BytesArray(pbr.toBytesRef()); } BytesRefIterator iterator = pbr.iterator(); BytesRef ref = null; BytesRefBuilder builder = new BytesRefBuilder(); while((ref = iterator.next()) != null) { builder.append(ref); } assertArrayEquals(BytesReference.toBytes(pbr), BytesRef.deepCopyOf(builder.toBytesRef()).bytes); }
/** * This method iterates all terms in the given {@link TermsEnum} and * associates each terms ordinal with the terms documents. The caller must * exhaust the returned {@link BytesRefIterator} which returns all values * where the first returned value is associated with the ordinal <tt>1</tt> * etc. * <p> * If the {@link TermsEnum} contains prefix coded numerical values the terms * enum should be wrapped with either {@link #wrapNumeric32Bit(TermsEnum)} * or {@link #wrapNumeric64Bit(TermsEnum)} depending on its precision. If * the {@link TermsEnum} is not wrapped the returned * {@link BytesRefIterator} will contain partial precision terms rather than * only full-precision terms. * </p> */ public BytesRefIterator buildFromTerms(final TermsEnum termsEnum) throws IOException { return new BytesRefIterator() { private PostingsEnum docsEnum = null; @Override public BytesRef next() throws IOException { BytesRef ref; if ((ref = termsEnum.next()) != null) { docsEnum = termsEnum.postings(docsEnum, PostingsEnum.NONE); nextOrdinal(); int docId; while ((docId = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { addDoc(docId); } } return ref; } }; }
@Override public int hashCode() { if (hash == null) { final BytesRefIterator iterator = iterator(); BytesRef ref; int result = 1; try { while ((ref = iterator.next()) != null) { for (int i = 0; i < ref.length; i++) { result = 31 * result + ref.bytes[ref.offset + i]; } } } catch (IOException ex) { throw new AssertionError("wont happen", ex); } return hash = result; } else { return hash.intValue(); } }
/** * <p> * Returns a {@link BytesRefIterator} with point in time semantics. The * iterator provides access to all so far appended {@link BytesRef} instances. * </p> * <p> * If a non <code>null</code> {@link Comparator} is provided the iterator will * iterate the byte values in the order specified by the comparator. Otherwise * the order is the same as the values were appended. * </p> * <p> * This is a non-destructive operation. * </p> */ public BytesRefIterator iterator(final Comparator<BytesRef> comp) { final BytesRefBuilder spare = new BytesRefBuilder(); final int size = size(); final int[] indices = comp == null ? null : sort(comp); return new BytesRefIterator() { int pos = 0; @Override public BytesRef next() { if (pos < size) { return get(spare, indices == null ? pos++ : indices[pos++]); } return null; } @Override public Comparator<BytesRef> getComparator() { return comp; } }; }
/** * This method iterates all terms in the given {@link TermsEnum} and * associates each terms ordinal with the terms documents. The caller must * exhaust the returned {@link BytesRefIterator} which returns all values * where the first returned value is associted with the ordinal <tt>1</tt> * etc. * <p> * If the {@link TermsEnum} contains prefix coded numerical values the terms * enum should be wrapped with either {@link #wrapNumeric32Bit(TermsEnum)} * or {@link #wrapNumeric64Bit(TermsEnum)} depending on its precision. If * the {@link TermsEnum} is not wrapped the returned * {@link BytesRefIterator} will contain partial precision terms rather than * only full-precision terms. * </p> */ public BytesRefIterator buildFromTerms(final TermsEnum termsEnum) throws IOException { return new BytesRefIterator() { private PostingsEnum docsEnum = null; @Override public BytesRef next() throws IOException { BytesRef ref; if ((ref = termsEnum.next()) != null) { docsEnum = termsEnum.postings(docsEnum, PostingsEnum.NONE); nextOrdinal(); int docId; while ((docId = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { addDoc(docId); } } return ref; } }; }
/** * Builds the final automaton from a list of entries. */ private FST<Object> buildAutomaton(BytesRefSorter sorter) throws IOException { // Build the automaton. final Outputs<Object> outputs = NoOutputs.getSingleton(); final Object empty = outputs.getNoOutput(); final Builder<Object> builder = new Builder<>( FST.INPUT_TYPE.BYTE1, 0, 0, true, true, shareMaxTailLength, outputs, false, PackedInts.DEFAULT, true, 15); BytesRefBuilder scratch = new BytesRefBuilder(); BytesRef entry; final IntsRefBuilder scratchIntsRef = new IntsRefBuilder(); int count = 0; BytesRefIterator iter = sorter.iterator(); while((entry = iter.next()) != null) { count++; if (scratch.get().compareTo(entry) != 0) { builder.add(Util.toIntsRef(entry, scratchIntsRef), empty); scratch.copyBytes(entry); } } return count == 0 ? null : builder.finish(); }
@Override public BytesRefIterator iterator() throws IOException { if (sorted == null) { closeWriter(); sorted = File.createTempFile("RefSorter-", ".sorted", OfflineSorter.defaultTempDir()); sort.sort(input, sorted); input.delete(); input = null; } return new ByteSequenceIterator(new OfflineSorter.ByteSequencesReader(sorted), sort.getComparator()); }
private void check(BytesRefSorter sorter) throws Exception { for (int i = 0; i < 100; i++) { byte [] current = new byte [random().nextInt(256)]; random().nextBytes(current); sorter.add(new BytesRef(current)); } // Create two iterators and check that they're aligned with each other. BytesRefIterator i1 = sorter.iterator(); BytesRefIterator i2 = sorter.iterator(); // Verify sorter contract. try { sorter.add(new BytesRef(new byte [1])); fail("expected contract violation."); } catch (IllegalStateException e) { // Expected. } BytesRef spare1; BytesRef spare2; while ((spare1 = i1.next()) != null && (spare2 = i2.next()) != null) { assertEquals(spare1, spare2); } assertNull(i1.next()); assertNull(i2.next()); }
/** * Builds the final automaton from a list of entries. */ private FST<Object> buildAutomaton(BytesRefSorter sorter) throws IOException { // Build the automaton. final Outputs<Object> outputs = NoOutputs.getSingleton(); final Object empty = outputs.getNoOutput(); final Builder<Object> builder = new Builder<Object>( FST.INPUT_TYPE.BYTE1, 0, 0, true, true, shareMaxTailLength, outputs, null, false, PackedInts.DEFAULT, true, 15); BytesRef scratch = new BytesRef(); BytesRef entry; final IntsRef scratchIntsRef = new IntsRef(); int count = 0; BytesRefIterator iter = sorter.iterator(); while((entry = iter.next()) != null) { count++; if (scratch.compareTo(entry) != 0) { builder.add(Util.toIntsRef(entry, scratchIntsRef), empty); scratch.copyBytes(entry); } } return count == 0 ? null : builder.finish(); }
@Override public BytesRefIterator iterator() throws IOException { if (sorted == null) { closeWriter(); sorted = File.createTempFile("RefSorter-", ".sorted", Sort.defaultTempDir()); sort.sort(input, sorted); input.delete(); input = null; } return new ByteSequenceIterator(new Sort.ByteSequencesReader(sorted), sort.getComparator()); }
/** * <p> * Returns a {@link BytesRefIterator} with point in time semantics. The * iterator provides access to all so far appended {@link BytesRef} instances. * </p> * <p> * If a non <code>null</code> {@link Comparator} is provided the iterator will * iterate the byte values in the order specified by the comparator. Otherwise * the order is the same as the values were appended. * </p> * <p> * This is a non-destructive operation. * </p> */ public BytesRefIterator iterator(final Comparator<BytesRef> comp) { final BytesRef spare = new BytesRef(); final int size = size(); final int[] indices = comp == null ? null : sort(comp); return new BytesRefIterator() { int pos = 0; @Override public BytesRef next() { if (pos < size) { return get(spare, indices == null ? pos++ : indices[pos++]); } return null; } @Override public Comparator<BytesRef> getComparator() { return comp; } }; }
public void testIterator() throws IOException { int length = randomIntBetween(10, PAGE_SIZE * randomIntBetween(2, 8)); BytesReference pbr = newBytesReference(length); BytesRefIterator iterator = pbr.iterator(); BytesRef ref; BytesRefBuilder builder = new BytesRefBuilder(); while((ref = iterator.next()) != null) { builder.append(ref); } assertArrayEquals(BytesReference.toBytes(pbr), BytesRef.deepCopyOf(builder.toBytesRef()).bytes); }
public void testSliceIterator() throws IOException { int length = randomIntBetween(10, PAGE_SIZE * randomIntBetween(2, 8)); BytesReference pbr = newBytesReference(length); int sliceOffset = randomIntBetween(0, pbr.length()); int sliceLength = randomIntBetween(0, pbr.length() - sliceOffset); BytesReference slice = pbr.slice(sliceOffset, sliceLength); BytesRefIterator iterator = slice.iterator(); BytesRef ref = null; BytesRefBuilder builder = new BytesRefBuilder(); while((ref = iterator.next()) != null) { builder.append(ref); } assertArrayEquals(BytesReference.toBytes(slice), BytesRef.deepCopyOf(builder.toBytesRef()).bytes); }
public static BytesRef getSinglePageOrNull(BytesReference ref) throws IOException { if (ref.length() > 0) { BytesRefIterator iterator = ref.iterator(); BytesRef next = iterator.next(); BytesRef retVal = next.clone(); if (iterator.next() == null) { return retVal; } } else { return new BytesRef(); } return null; }
public static int getNumPages(BytesReference ref) throws IOException { int num = 0; if (ref.length() > 0) { BytesRefIterator iterator = ref.iterator(); while(iterator.next() != null) { num++; } } return num; }
BytesReferenceStreamInput(BytesRefIterator iterator, final int length) throws IOException { this.iterator = iterator; this.slice = iterator.next(); this.length = length; this.offset = 0; this.sliceOffset = 0; }
/** * Writes the bytes directly to the output stream. */ public void writeTo(OutputStream os) throws IOException { final BytesRefIterator iterator = iterator(); BytesRef ref; while ((ref = iterator.next()) != null) { os.write(ref.bytes, ref.offset, ref.length); } }
/** * Returns a BytesRefIterator for this BytesReference. This method allows * access to the internal pages of this reference without copying them. Use with care! * @see BytesRefIterator */ public BytesRefIterator iterator() { return new BytesRefIterator() { BytesRef ref = length() == 0 ? null : toBytesRef(); @Override public BytesRef next() throws IOException { BytesRef r = ref; ref = null; // only return it once... return r; } }; }
@Override public final BytesRefIterator iterator() { final int offset = this.offset; final int length = this.length; // this iteration is page aligned to ensure we do NOT materialize the pages from the ByteArray // we calculate the initial fragment size here to ensure that if this reference is a slice we are still page aligned // across the entire iteration. The first page is smaller if our offset != 0 then we start in the middle of the page // otherwise we iterate full pages until we reach the last chunk which also might end within a page. final int initialFragmentSize = offset != 0 ? PAGE_SIZE - (offset % PAGE_SIZE) : PAGE_SIZE; return new BytesRefIterator() { int position = 0; int nextFragmentSize = Math.min(length, initialFragmentSize); // this BytesRef is reused across the iteration on purpose - BytesRefIterator interface was designed for this final BytesRef slice = new BytesRef(); @Override public BytesRef next() throws IOException { if (nextFragmentSize != 0) { final boolean materialized = byteArray.get(offset + position, nextFragmentSize, slice); assert materialized == false : "iteration should be page aligned but array got materialized"; position += nextFragmentSize; final int remaining = length - position; nextFragmentSize = Math.min(remaining, PAGE_SIZE); return slice; } else { assert nextFragmentSize == 0 : "fragmentSize expected [0] but was: [" + nextFragmentSize + "]"; return null; // we are done with this iteration } } }; }
@Override public BytesRef toBytesRef() { BytesRefBuilder builder = new BytesRefBuilder(); builder.grow(length()); BytesRef spare; BytesRefIterator iterator = iterator(); try { while ((spare = iterator.next()) != null) { builder.append(spare); } } catch (IOException ex) { throw new AssertionError("won't happen", ex); // this is really an error since we don't do IO in our bytesreferences } return builder.toBytesRef(); }
public void testEmpty() throws Exception { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); writer.commit(); writer.close(); IndexReader ir = DirectoryReader.open(dir); Dictionary dictionary = new HighFrequencyDictionary(ir, "bogus", 0.1f); BytesRefIterator tf = dictionary.getEntryIterator(); assertNull(tf.getComparator()); assertNull(tf.next()); dir.close(); }
/** * Adds an iterator to the list. */ public void add(final BytesRefIterator wrapped) { iters.add(wrapped); if (curr == -1) { curr = 0; } }
@Override public final BytesRefIterator getWordsIterator() throws IOException { final Terms terms = MultiFields.getTerms(reader, field); if (terms != null) { return terms.iterator(null); } else { return BytesRefIterator.EMPTY; } }
public void testEmpty() throws Exception { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); writer.commit(); writer.close(); IndexReader ir = DirectoryReader.open(dir); Dictionary dictionary = new HighFrequencyDictionary(ir, "bogus", 0.1f); BytesRefIterator tf = dictionary.getWordsIterator(); assertNull(tf.getComparator()); assertNull(tf.next()); dir.close(); }