public void finishTerm(long defaultWeight) throws IOException { ArrayUtil.timSort(surfaceFormsAndPayload, 0, count); int deduplicator = 0; analyzed.append((byte) 0); analyzed.setLength(analyzed.length() + 1); analyzed.grow(analyzed.length()); for (int i = 0; i < count; i++) { analyzed.setByteAt(analyzed.length() - 1, (byte) deduplicator++); Util.toIntsRef(analyzed.get(), scratchInts); SurfaceFormAndPayload candiate = surfaceFormsAndPayload[i]; long cost = candiate.weight == -1 ? encodeWeight(Math.min(Integer.MAX_VALUE, defaultWeight)) : candiate.weight; builder.add(scratchInts.get(), outputs.newPair(cost, candiate.payload)); } seenSurfaceForms.clear(); count = 0; }
/** Change the size of this array. Content between indexes <code>0</code> and <code>min(size(), newSize)</code> will be preserved. */ @Override public void resize(long newSize) { final int numPages = numPages(newSize); if (numPages > pages.length) { pages = Arrays.copyOf(pages, ArrayUtil.oversize(numPages, RamUsageEstimator.NUM_BYTES_OBJECT_REF)); } for (int i = numPages - 1; i >= 0 && pages[i] == null; --i) { pages[i] = newBytePage(i); } for (int i = numPages; i < pages.length && pages[i] != null; ++i) { pages[i] = null; releasePage(i); } this.size = newSize; }
/** Change the size of this array. Content between indexes <code>0</code> and <code>min(size(), newSize)</code> will be preserved. */ @Override public void resize(long newSize) { final int numPages = numPages(newSize); if (numPages > pages.length) { pages = Arrays.copyOf(pages, ArrayUtil.oversize(numPages, RamUsageEstimator.NUM_BYTES_OBJECT_REF)); } for (int i = numPages - 1; i >= 0 && pages[i] == null; --i) { pages[i] = newObjectPage(i); } for (int i = numPages; i < pages.length && pages[i] != null; ++i) { pages[i] = null; releasePage(i); } this.size = newSize; }
/** Change the size of this array. Content between indexes <code>0</code> and <code>min(size(), newSize)</code> will be preserved. */ @Override public void resize(long newSize) { final int numPages = numPages(newSize); if (numPages > pages.length) { pages = Arrays.copyOf(pages, ArrayUtil.oversize(numPages, RamUsageEstimator.NUM_BYTES_OBJECT_REF)); } for (int i = numPages - 1; i >= 0 && pages[i] == null; --i) { pages[i] = newIntPage(i); } for (int i = numPages; i < pages.length && pages[i] != null; ++i) { pages[i] = null; releasePage(i); } this.size = newSize; }
/** Change the size of this array. Content between indexes <code>0</code> and <code>min(size(), newSize)</code> will be preserved. */ @Override public void resize(long newSize) { final int numPages = numPages(newSize); if (numPages > pages.length) { pages = Arrays.copyOf(pages, ArrayUtil.oversize(numPages, RamUsageEstimator.NUM_BYTES_OBJECT_REF)); } for (int i = numPages - 1; i >= 0 && pages[i] == null; --i) { pages[i] = newLongPage(i); } for (int i = numPages; i < pages.length && pages[i] != null; ++i) { pages[i] = null; releasePage(i); } this.size = newSize; }
@Override public void startTerm(BytesRef term, int freq) throws IOException { final int prefix = StringHelper.bytesDifference(lastTerm.get(), term); final int suffix = term.length - prefix; tvf.writeVInt(prefix); tvf.writeVInt(suffix); tvf.writeBytes(term.bytes, term.offset + prefix, suffix); tvf.writeVInt(freq); lastTerm.copyBytes(term); lastPosition = lastOffset = 0; if (offsets && positions) { // we might need to buffer if its a non-bulk merge offsetStartBuffer = ArrayUtil.grow(offsetStartBuffer, freq); offsetEndBuffer = ArrayUtil.grow(offsetEndBuffer, freq); } bufferedIndex = 0; bufferedFreq = freq; payloadData.clear(); }
void startBlock(SegmentTermsEnumFrame frame, boolean isFloor) { totalBlockCount++; if (isFloor) { if (frame.fp == frame.fpOrig) { floorBlockCount++; } floorSubBlockCount++; } else { nonFloorBlockCount++; } if (blockCountByPrefixLen.length <= frame.prefix) { blockCountByPrefixLen = ArrayUtil.grow(blockCountByPrefixLen, 1+frame.prefix); } blockCountByPrefixLen[frame.prefix]++; startBlockCount++; totalBlockSuffixBytes += frame.suffixesReader.length(); totalBlockStatsBytes += frame.statsReader.length(); }
void addPosition(int position, int startOffset, int length, int payloadLength) { if (hasPositions) { if (posStart + totalPositions == positionsBuf.length) { positionsBuf = ArrayUtil.grow(positionsBuf); } positionsBuf[posStart + totalPositions] = position; } if (hasOffsets) { if (offStart + totalPositions == startOffsetsBuf.length) { final int newLength = ArrayUtil.oversize(offStart + totalPositions, 4); startOffsetsBuf = Arrays.copyOf(startOffsetsBuf, newLength); lengthsBuf = Arrays.copyOf(lengthsBuf, newLength); } startOffsetsBuf[offStart + totalPositions] = startOffset; lengthsBuf[offStart + totalPositions] = length; } if (hasPayloads) { if (payStart + totalPositions == payloadLengthsBuf.length) { payloadLengthsBuf = ArrayUtil.grow(payloadLengthsBuf); } payloadLengthsBuf[payStart + totalPositions] = payloadLength; } ++totalPositions; }
/** * Decompress the chunk. */ void decompress() throws IOException { // decompress data final int chunkSize = chunkSize(); if (version >= VERSION_BIG_CHUNKS && chunkSize >= 2 * CompressingStoredFieldsReader.this.chunkSize) { bytes.offset = bytes.length = 0; for (int decompressed = 0; decompressed < chunkSize; ) { final int toDecompress = Math.min(chunkSize - decompressed, CompressingStoredFieldsReader.this.chunkSize); decompressor.decompress(fieldsStream, toDecompress, 0, toDecompress, spare); bytes.bytes = ArrayUtil.grow(bytes.bytes, bytes.length + spare.length); System.arraycopy(spare.bytes, spare.offset, bytes.bytes, bytes.length, spare.length); bytes.length += spare.length; decompressed += toDecompress; } } else { decompressor.decompress(fieldsStream, chunkSize, 0, chunkSize, bytes); } if (bytes.length != chunkSize) { throw new CorruptIndexException("Corrupted: expected chunk size = " + chunkSize() + ", got " + bytes.length + " (resource=" + fieldsStream + ")"); } }
ConjunctionScorer(Weight weight, Scorer[] scorers, float coord) { super(weight); this.coord = coord; this.docsAndFreqs = new DocsAndFreqs[scorers.length]; for (int i = 0; i < scorers.length; i++) { docsAndFreqs[i] = new DocsAndFreqs(scorers[i]); } // Sort the array the first time to allow the least frequent DocsEnum to // lead the matching. ArrayUtil.timSort(docsAndFreqs, new Comparator<DocsAndFreqs>() { @Override public int compare(DocsAndFreqs o1, DocsAndFreqs o2) { return Long.compare(o1.cost, o2.cost); } }); lead = docsAndFreqs[0]; // least frequent DocsEnum leads the intersection }
private void addOneValue(BytesRef value) { int termID = hash.add(value); if (termID < 0) { termID = -termID-1; } else { // reserve additional space for each unique value: // 1. when indexing, when hash is 50% full, rehash() suddenly needs 2*size ints. // TODO: can this same OOM happen in THPF? // 2. when flushing, we need 1 int per value (slot in the ordMap). iwBytesUsed.addAndGet(2 * RamUsageEstimator.NUM_BYTES_INT); } if (currentUpto == currentValues.length) { currentValues = ArrayUtil.grow(currentValues, currentValues.length+1); // reserve additional space for max # values per-doc // when flushing, we need an int[] to sort the mapped-ords within the doc iwBytesUsed.addAndGet((currentValues.length - currentUpto) * 2 * RamUsageEstimator.NUM_BYTES_INT); } currentValues[currentUpto] = termID; currentUpto++; }
/** * Saves the existing attribute states */ private void saveState() { // otherwise, we have delimiters, save state savedStartOffset = offsetAttribute.startOffset(); savedEndOffset = offsetAttribute.endOffset(); // if length by start + end offsets doesn't match the term text then assume this is a synonym and don't adjust the offsets. hasIllegalOffsets = (savedEndOffset - savedStartOffset != termAttribute.length()); savedType = typeAttribute.type(); if (savedBuffer.length < termAttribute.length()) { savedBuffer = new char[ArrayUtil.oversize(termAttribute.length(), RamUsageEstimator.NUM_BYTES_CHAR)]; } System.arraycopy(termAttribute.buffer(), 0, savedBuffer, 0, termAttribute.length()); iterator.text = savedBuffer; hasSavedState = true; }
/** * Build a minimal, deterministic automaton from a sorted list of {@link BytesRef} representing * strings in UTF-8. These strings must be binary-sorted. */ public static Automaton build(Collection<BytesRef> input) { final DaciukMihovAutomatonBuilder builder = new DaciukMihovAutomatonBuilder(); char[] chars = new char[0]; CharsRef ref = new CharsRef(); for (BytesRef b : input) { chars = ArrayUtil.grow(chars, b.length); final int len = UnicodeUtil.UTF8toUTF16(b, chars); ref.chars = chars; ref.length = len; builder.add(ref); } Automaton.Builder a = new Automaton.Builder(); convert(a, builder.complete(), new IdentityHashMap<State,Integer>()); return a.finish(); }
private void incr() { upto++; grow(); if (arcs.length <= upto) { @SuppressWarnings({"rawtypes","unchecked"}) final FST.Arc<T>[] newArcs = new FST.Arc[ArrayUtil.oversize(1+upto, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; System.arraycopy(arcs, 0, newArcs, 0, arcs.length); arcs = newArcs; } if (output.length <= upto) { @SuppressWarnings({"rawtypes","unchecked"}) final T[] newOutput = (T[]) new Object[ArrayUtil.oversize(1+upto, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; System.arraycopy(output, 0, newOutput, 0, output.length); output = newOutput; } }
/** Add a new element to this builder. */ public Builder add(long l) { if (pending == null) { throw new IllegalStateException("Cannot be reused after build()"); } if (pendingOff == pending.length) { // check size if (values.length == valuesOff) { final int newLength = ArrayUtil.oversize(valuesOff + 1, 8); grow(newLength); } pack(); } pending[pendingOff++] = l; size += 1; return this; }
protected int replace_s(int c_bra, int c_ket, CharSequence s) { final int adjustment = s.length() - (c_ket - c_bra); final int newLength = limit + adjustment; //resize if necessary if (newLength > current.length) { char newBuffer[] = new char[ArrayUtil.oversize(newLength, RamUsageEstimator.NUM_BYTES_CHAR)]; System.arraycopy(current, 0, newBuffer, 0, limit); current = newBuffer; } // if the substring being replaced is longer or shorter than the // replacement, need to shift things around if (adjustment != 0 && c_ket < limit) { System.arraycopy(current, c_ket, current, c_bra + s.length(), limit - c_ket); } // insert the replacement text // Note, faster is s.getChars(0, s.length(), current, c_bra); // but would have to duplicate this method for both String and StringBuilder for (int i = 0; i < s.length(); i++) current[c_bra + i] = s.charAt(i); limit += adjustment; if (cursor >= c_ket) cursor += adjustment; else if (cursor > c_bra) cursor = c_bra; return adjustment; }
@Override public void setDocument(int docId) { bytes = values.get(docId); in.reset(bytes.bytes, bytes.offset, bytes.length); if (!in.eof()) { // first value uses vLong on top of zig-zag encoding, then deltas are encoded using vLong long previousValue = longs[0] = ByteUtils.zigZagDecode(ByteUtils.readVLong(in)); count = 1; while (!in.eof()) { longs = ArrayUtil.grow(longs, count + 1); previousValue = longs[count++] = previousValue + ByteUtils.readVLong(in); } } else { count = 0; } }
ConjunctionScorer(final Weight weight, final Scorer[] scorers, final float coord, final LearnToRankClause[] ltrclauses, final int docBase) { super(weight); this.coord = coord; this.docBase = docBase; clauses = ltrclauses; docsAndFreqs = new DocsAndFreqs[scorers.length]; for (int i = 0; i < scorers.length; i++) { docsAndFreqs[i] = new DocsAndFreqs(scorers[i]); } // Sort the array the first time to allow the least frequent DocsEnum to // lead the matching. ArrayUtil.timSort(docsAndFreqs, new Comparator<DocsAndFreqs>() { @Override public int compare(final DocsAndFreqs obj1, final DocsAndFreqs obj2) { return Long.signum(obj1.cost - obj2.cost); } }); lead = docsAndFreqs[0]; // least frequent DocsEnum leads the // intersection }
/** * Supprime les accents d'une chaîne de caractères. * * @param text chaîne à nettoyer * @return chaîne sans accent * @see org.apache.lucene.analysis.ASCIIFoldingFilter */ public static String removeAccents(String text) { if (text == null) { return text; } int length = text.length(); char[] input = text.toCharArray(); char[] output = new char[256]; // Worst-case length required: final int maxSizeNeeded = 4 * length; if (output.length < maxSizeNeeded) { output = new char[ArrayUtil.oversize(maxSizeNeeded, RamUsageEstimator.NUM_BYTES_CHAR)]; } int outputPos = ASCIIFoldingFilter.foldToASCII(input, 0, output, 0, length); return new String(output, 0, outputPos); }
@Override public BytesRef next() { if (!hasNext()) { throw new NoSuchElementException(); } int count = counts.next().intValue(); int maxSize = count*9; // worst case if (maxSize > buffer.length) { buffer = ArrayUtil.grow(buffer, maxSize); } try { encodeValues(count); } catch (IOException bogus) { throw new RuntimeException(bogus); } ref.bytes = buffer; ref.offset = 0; ref.length = out.getPosition(); return ref; }
@Override public boolean equals(Object obj) { if (this == obj) { return true; } if ((obj == null) || (obj.getClass() != this.getClass())) { return false; } TermsFilter test = (TermsFilter) obj; // first check the fields before even comparing the bytes if (test.hashCode == hashCode && Arrays.equals(termsAndFields, test.termsAndFields)) { int lastOffset = termsAndFields[termsAndFields.length - 1].end; // compare offsets since we sort they must be identical if (ArrayUtil.equals(offsets, 0, test.offsets, 0, lastOffset + 1)) { // straight byte comparison since we sort they must be identical return ArrayUtil.equals(termsBytes, 0, test.termsBytes, 0, offsets[lastOffset]); } } return false; }
/** Creates a new iterator, buffering entries from the specified iterator */ public BufferedInputIterator(InputIterator source) throws IOException { BytesRef spare; int freqIndex = 0; hasPayloads = source.hasPayloads(); hasContexts = source.hasContexts(); while((spare = source.next()) != null) { entries.append(spare); if (hasPayloads) { payloads.append(source.payload()); } if (hasContexts) { contextSets.add(source.contexts()); } if (freqIndex >= freqs.length) { freqs = ArrayUtil.grow(freqs, freqs.length+1); } freqs[freqIndex++] = source.weight(); } comp = source.getComparator(); }
@Override public int nextPosition() throws IOException { final int token = postingInput.readVInt(); pos += token >>> 1; if (storeOffsets) { startOffset = endOffset + postingInput.readVInt(); endOffset = startOffset + postingInput.readVInt(); } if ((token & 1) != 0) { payload.offset = 0; payload.length = postingInput.readVInt(); if (payload.length > payload.bytes.length) { payload.bytes = new byte[ArrayUtil.oversize(payload.length, 1)]; } postingInput.readBytes(payload.bytes, 0, payload.length); } else { payload.length = 0; } return pos; }
@Override public void collect(int doc) throws IOException { int ord = collectedTerms.add(fromDocTerms.get(doc)); if (ord < 0) { ord = -ord - 1; } else { if (ord >= scoreSums.length) { scoreSums = ArrayUtil.grow(scoreSums); scoreCounts = ArrayUtil.grow(scoreCounts); } } float current = scorer.score(); float existing = scoreSums[ord]; if (Float.compare(existing, 0.0f) == 0) { scoreSums[ord] = current; scoreCounts[ord] = 1; } else { scoreSums[ord] = scoreSums[ord] + current; scoreCounts[ord]++; } }
@Override public void collect(int doc) throws IOException { fromDocTermOrds.setDocument(doc); long ord; while ((ord = fromDocTermOrds.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { int termID = collectedTerms.add(fromDocTermOrds.lookupOrd(ord)); if (termID < 0) { termID = -termID - 1; } else { if (termID >= scoreSums.length) { scoreSums = ArrayUtil.grow(scoreSums); } } switch (scoreMode) { case Total: scoreSums[termID] += scorer.score(); break; case Max: scoreSums[termID] = Math.max(scoreSums[termID], scorer.score()); } } }
@Override public void collect(int doc) throws IOException { fromDocTermOrds.setDocument(doc); long ord; while ((ord = fromDocTermOrds.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { int termID = collectedTerms.add(fromDocTermOrds.lookupOrd(ord)); if (termID < 0) { termID = -termID - 1; } else { if (termID >= scoreSums.length) { scoreSums = ArrayUtil.grow(scoreSums); scoreCounts = ArrayUtil.grow(scoreCounts); } } scoreSums[termID] += scorer.score(); scoreCounts[termID]++; } }