/************* methods *****************************/ public void positionAt(int offsetIntoBlock) { this.offsetIntoBlock = offsetIntoBlock; tokenLength = UVIntTool.getInt(block, offsetIntoBlock); tokenOffsetIntoBlock = offsetIntoBlock + UVIntTool.numBytes(tokenLength); int parentStartPositionIndex = tokenOffsetIntoBlock + tokenLength; int offsetWidth; if(nodeType == ColumnNodeType.FAMILY) { offsetWidth = blockMeta.getFamilyOffsetWidth(); } else if(nodeType == ColumnNodeType.QUALIFIER) { offsetWidth = blockMeta.getQualifierOffsetWidth(); } else { offsetWidth = blockMeta.getTagsOffsetWidth(); } parentStartPosition = (int) UFIntTool.fromBytes(block, parentStartPositionIndex, offsetWidth); }
/** * The following methods write data for each cell in the row, mostly consisting of indexes or * offsets into the timestamp/column data structures that are written in the middle of the block. * We use {@link UFIntTool} to encode these indexes/offsets to allow random access during a binary * search of a particular column/timestamp combination. * <p> * Branch nodes will not have any data in these sections. * </p> */ protected void writeFamilyNodeOffsets(OutputStream os) throws IOException { if (blockMeta.getFamilyOffsetWidth() <= 0) { return; } for (int i = 0; i < numCells; ++i) { int cellInsertionIndex = PrefixTreeEncoder.MULITPLE_FAMILIES_POSSIBLE ? tokenizerNode .getFirstInsertionIndex() + i : 0; int sortedIndex = prefixTreeEncoder.getFamilySorter().getSortedIndexForInsertionId( cellInsertionIndex); int indexedFamilyOffset = prefixTreeEncoder.getFamilyWriter().getOutputArrayOffset( sortedIndex); UFIntTool.writeBytes(blockMeta.getFamilyOffsetWidth(), indexedFamilyOffset, os); } }
/** * Now that all the cells have been added, do the work to reduce them to a series of byte[] * fragments that are ready to be written to the output stream. */ protected void compile(){ blockMeta.setNumKeyValueBytes(totalUnencodedBytes); int lastValueOffset = valueOffsets[totalCells]; blockMeta.setValueOffsetWidth(UFIntTool.numBytes(lastValueOffset)); blockMeta.setValueLengthWidth(UFIntTool.numBytes(maxValueLength)); blockMeta.setNumValueBytes(totalValueBytes); totalBytes += totalTagBytes + totalValueBytes; //these compile methods will add to totalBytes compileTypes(); compileMvccVersions(); compileTimestamps(); compileTags(); compileQualifiers(); compileFamilies(); compileRows(); int numMetaBytes = blockMeta.calculateNumMetaBytes(); blockMeta.setNumMetaBytes(numMetaBytes); totalBytes += numMetaBytes; }
/** * The following methods write data for each cell in the row, mostly consisting of indexes or * offsets into the timestamp/column data structures that are written in the middle of the block. * We use {@link UFIntTool} to encode these indexes/offsets to allow random access during a binary * search of a particular column/timestamp combination. * <p/> * Branch nodes will not have any data in these sections. */ protected void writeFamilyNodeOffsets(OutputStream os) throws IOException { if (blockMeta.getFamilyOffsetWidth() <= 0) { return; } for (int i = 0; i < numCells; ++i) { int cellInsertionIndex = PrefixTreeEncoder.MULITPLE_FAMILIES_POSSIBLE ? tokenizerNode .getFirstInsertionIndex() + i : 0; int sortedIndex = prefixTreeEncoder.getFamilySorter().getSortedIndexForInsertionId( cellInsertionIndex); int indexedFamilyOffset = prefixTreeEncoder.getFamilyWriter().getOutputArrayOffset( sortedIndex); UFIntTool.writeBytes(blockMeta.getFamilyOffsetWidth(), indexedFamilyOffset, os); } }
/** * Now that all the cells have been added, do the work to reduce them to a series of byte[] * fragments that are ready to be written to the output stream. */ protected void compile(){ blockMeta.setNumKeyValueBytes(totalUnencodedBytes); int lastValueOffset = valueOffsets[totalCells]; blockMeta.setValueOffsetWidth(UFIntTool.numBytes(lastValueOffset)); blockMeta.setValueLengthWidth(UFIntTool.numBytes(maxValueLength)); blockMeta.setNumValueBytes(totalValueBytes); totalBytes += totalValueBytes; //these compile methods will add to totalBytes compileTypes(); compileMvccVersions(); compileTimestamps(); compileQualifiers(); compileFamilies(); compileRows(); int numMetaBytes = blockMeta.calculateNumMetaBytes(); blockMeta.setNumMetaBytes(numMetaBytes); totalBytes += numMetaBytes; }
/************** methods *************************/ public long getMvccVersion(int index) { if (blockMeta.getMvccVersionIndexWidth() == 0) {//all mvccVersions in the block were identical return blockMeta.getMinMvccVersion(); } int startIndex = blockMeta.getAbsoluteMvccVersionOffset() + blockMeta.getMvccVersionDeltaWidth() * index; long delta = UFIntTool.fromBytes(block, startIndex, blockMeta.getMvccVersionDeltaWidth()); return blockMeta.getMinMvccVersion() + delta; }
/************** methods *************************/ public long getLong(int index) { if (blockMeta.getTimestampIndexWidth() == 0) {//all timestamps in the block were identical return blockMeta.getMinTimestamp(); } int startIndex = blockMeta.getAbsoluteTimestampOffset() + blockMeta.getTimestampDeltaWidth() * index; long delta = UFIntTool.fromBytes(block, startIndex, blockMeta.getTimestampDeltaWidth()); return blockMeta.getMinTimestamp() + delta; }
public LongEncoder compile() { int numUnique = uniqueValues.size(); if (numUnique == 1) { min = CollectionUtils.getFirst(uniqueValues); sortedUniqueValues = new long[] { min }; return this; } sortedUniqueValues = new long[numUnique]; int lastIndex = -1; for (long value : uniqueValues) { sortedUniqueValues[++lastIndex] = value; } Arrays.sort(sortedUniqueValues); min = ArrayUtils.getFirst(sortedUniqueValues); max = ArrayUtils.getLast(sortedUniqueValues); maxDelta = max - min; if (maxDelta > 0) { bytesPerDelta = UFIntTool.numBytes(maxDelta); } else { bytesPerDelta = 0; } int maxIndex = numUnique - 1; bytesPerIndex = UFIntTool.numBytes(maxIndex); totalCompressedBytes = numUnique * bytesPerDelta; return this; }
protected void writeQualifierNodeOffsets(OutputStream os) throws IOException { if (blockMeta.getQualifierOffsetWidth() <= 0) { return; } for (int i = 0; i < numCells; ++i) { int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i; int sortedIndex = prefixTreeEncoder.getQualifierSorter().getSortedIndexForInsertionId( cellInsertionIndex); int indexedQualifierOffset = prefixTreeEncoder.getQualifierWriter().getOutputArrayOffset( sortedIndex); UFIntTool.writeBytes(blockMeta.getQualifierOffsetWidth(), indexedQualifierOffset, os); } }
protected void writeTagNodeOffsets(OutputStream os) throws IOException { if (blockMeta.getTagsOffsetWidth() <= 0) { return; } for (int i = 0; i < numCells; ++i) { int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i; int sortedIndex = prefixTreeEncoder.getTagSorter().getSortedIndexForInsertionId( cellInsertionIndex); int indexedTagOffset = prefixTreeEncoder.getTagWriter().getOutputArrayOffset( sortedIndex); UFIntTool.writeBytes(blockMeta.getTagsOffsetWidth(), indexedTagOffset, os); } }
protected void writeTimestampIndexes(OutputStream os) throws IOException { if (blockMeta.getTimestampIndexWidth() <= 0) { return; } for (int i = 0; i < numCells; ++i) { int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i; long timestamp = prefixTreeEncoder.getTimestamps()[cellInsertionIndex]; int timestampIndex = prefixTreeEncoder.getTimestampEncoder().getIndex(timestamp); UFIntTool.writeBytes(blockMeta.getTimestampIndexWidth(), timestampIndex, os); } }
protected void writeMvccVersionIndexes(OutputStream os) throws IOException { if (blockMeta.getMvccVersionIndexWidth() <= 0) { return; } for (int i = 0; i < numCells; ++i) { int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i; long mvccVersion = prefixTreeEncoder.getMvccVersions()[cellInsertionIndex]; int mvccVersionIndex = prefixTreeEncoder.getMvccVersionEncoder().getIndex(mvccVersion); UFIntTool.writeBytes(blockMeta.getMvccVersionIndexWidth(), mvccVersionIndex, os); } }
protected void writeValueOffsets(OutputStream os) throws IOException { for (int i = 0; i < numCells; ++i) { int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i; long valueStartIndex = prefixTreeEncoder.getValueOffset(cellInsertionIndex); UFIntTool.writeBytes(blockMeta.getValueOffsetWidth(), valueStartIndex, os); } }
protected void writeValueLengths(OutputStream os) throws IOException { for (int i = 0; i < numCells; ++i) { int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i; int valueLength = prefixTreeEncoder.getValueLength(cellInsertionIndex); UFIntTool.writeBytes(blockMeta.getValueLengthWidth(), valueLength, os); } }
/** * If a branch or a nub, the last thing we append are the UFInt offsets to the child row nodes. */ protected void writeNextRowTrieNodeOffsets(OutputStream os) throws IOException { ArrayList<TokenizerNode> children = tokenizerNode.getChildren(); for (int i = 0; i < children.size(); ++i) { TokenizerNode child = children.get(i); int distanceToChild = tokenizerNode.getNegativeIndex() - child.getNegativeIndex(); UFIntTool.writeBytes(blockMeta.getNextNodeOffsetWidth(), distanceToChild, os); } }
public void writeBytes(OutputStream os) throws IOException { int parentOffsetWidth; if (this.nodeType == ColumnNodeType.FAMILY) { parentOffsetWidth = blockMeta.getFamilyOffsetWidth(); } else if (this.nodeType == ColumnNodeType.QUALIFIER) { parentOffsetWidth = blockMeta.getQualifierOffsetWidth(); } else { parentOffsetWidth = blockMeta.getTagsOffsetWidth(); } UVIntTool.writeBytes(tokenLength, os); os.write(token); UFIntTool.writeBytes(parentOffsetWidth, parentStartPosition, os); }