/** Merges in the stored fields from the readers in * <code>mergeState</code>. The default implementation skips * over deleted documents, and uses {@link #startDocument()}, * {@link #writeField(FieldInfo, IndexableField)}, and {@link #finish(FieldInfos, int)}, * returning the number of documents that were written. * Implementations can override this method for more sophisticated * merging (bulk-byte copying, etc). */ public int merge(MergeState mergeState) throws IOException { int docCount = 0; for (AtomicReader reader : mergeState.readers) { final int maxDoc = reader.maxDoc(); final Bits liveDocs = reader.getLiveDocs(); for (int i = 0; i < maxDoc; i++) { if (liveDocs != null && !liveDocs.get(i)) { // skip deleted docs continue; } // TODO: this could be more efficient using // FieldVisitor instead of loading/writing entire // doc; ie we just have to renumber the field number // on the fly? // NOTE: it's very important to first assign to doc then pass it to // fieldsWriter.addDocument; see LUCENE-1282 Document doc = reader.document(i); addDocument(doc, mergeState.fieldInfos); docCount++; mergeState.checkAbort.work(300); } } finish(mergeState.fieldInfos, docCount); return docCount; }
private int copyFieldsNoDeletions(MergeState mergeState, final AtomicReader reader, final Lucene40StoredFieldsReader matchingFieldsReader, int rawDocLengths[]) throws IOException { final int maxDoc = reader.maxDoc(); int docCount = 0; if (matchingFieldsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" while (docCount < maxDoc) { int len = Math.min(MAX_RAW_MERGE_DOCS, maxDoc - docCount); IndexInput stream = matchingFieldsReader.rawDocs(rawDocLengths, docCount, len); addRawDocuments(stream, rawDocLengths, len); docCount += len; mergeState.checkAbort.work(300 * len); } } else { for (; docCount < maxDoc; docCount++) { // NOTE: it's very important to first assign to doc then pass it to // fieldsWriter.addDocument; see LUCENE-1282 Document doc = reader.document(docCount); addDocument(doc, mergeState.fieldInfos); mergeState.checkAbort.work(300); } } return docCount; }
/** Merges in the term vectors from the readers in * <code>mergeState</code>. The default implementation skips * over deleted documents, and uses {@link #startDocument(int)}, * {@link #startField(FieldInfo, int, boolean, boolean, boolean)}, * {@link #startTerm(BytesRef, int)}, {@link #addPosition(int, int, int, BytesRef)}, * and {@link #finish(FieldInfos, int)}, * returning the number of documents that were written. * Implementations can override this method for more sophisticated * merging (bulk-byte copying, etc). */ public int merge(MergeState mergeState) throws IOException { int docCount = 0; for (int i = 0; i < mergeState.readers.size(); i++) { final AtomicReader reader = mergeState.readers.get(i); final int maxDoc = reader.maxDoc(); final Bits liveDocs = reader.getLiveDocs(); for (int docID = 0; docID < maxDoc; docID++) { if (liveDocs != null && !liveDocs.get(docID)) { // skip deleted docs continue; } // NOTE: it's very important to first assign to vectors then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 Fields vectors = reader.getTermVectors(docID); addAllDocVectors(vectors, mergeState); docCount++; mergeState.checkAbort.work(300); } } finish(mergeState.fieldInfos, docCount); return docCount; }
@Override public MergePolicy.DocMap getDocMap(final MergeState mergeState) { if (unsortedReaders == null) { throw new IllegalStateException(); } if (docMap == null) { return super.getDocMap(mergeState); } assert mergeState.docMaps.length == 1; // we returned a singleton reader final PackedLongValues deletes = getDeletes(unsortedReaders); return new MergePolicy.DocMap() { @Override public int map(int old) { final int oldWithDeletes = old + (int) deletes.get(old); final int newWithDeletes = docMap.oldToNew(oldWithDeletes); return mergeState.docMaps[0].get(newWithDeletes); } }; }
/** Merges in the stored fields from the readers in * <code>mergeState</code>. The default implementation skips * over deleted documents, and uses {@link #startDocument(int)}, * {@link #writeField(FieldInfo, IndexableField)}, and {@link #finish(FieldInfos, int)}, * returning the number of documents that were written. * Implementations can override this method for more sophisticated * merging (bulk-byte copying, etc). */ public int merge(MergeState mergeState) throws IOException { int docCount = 0; for (AtomicReader reader : mergeState.readers) { final int maxDoc = reader.maxDoc(); final Bits liveDocs = reader.getLiveDocs(); for (int i = 0; i < maxDoc; i++) { if (liveDocs != null && !liveDocs.get(i)) { // skip deleted docs continue; } // TODO: this could be more efficient using // FieldVisitor instead of loading/writing entire // doc; ie we just have to renumber the field number // on the fly? // NOTE: it's very important to first assign to doc then pass it to // fieldsWriter.addDocument; see LUCENE-1282 Document doc = reader.document(i); addDocument(doc, mergeState.fieldInfos); docCount++; mergeState.checkAbort.work(300); } } finish(mergeState.fieldInfos, docCount); return docCount; }
@Override public MergePolicy.DocMap getDocMap(final MergeState mergeState) { if (unsortedReaders == null) { throw new IllegalStateException(); } if (docMap == null) { return super.getDocMap(mergeState); } assert mergeState.docMaps.length == 1; // we returned a singleton reader final MonotonicAppendingLongBuffer deletes = getDeletes(unsortedReaders); return new MergePolicy.DocMap() { @Override public int map(int old) { final int oldWithDeletes = old + (int) deletes.get(old); final int newWithDeletes = docMap.oldToNew(oldWithDeletes); return mergeState.docMaps[0].get(newWithDeletes); } }; }
@Override public int merge(MergeState mergeState) throws IOException { int docCount = 0; // Used for bulk-reading raw bytes for stored fields int rawDocLengths[] = new int[MAX_RAW_MERGE_DOCS]; int idx = 0; for (AtomicReader reader : mergeState.readers) { final SegmentReader matchingSegmentReader = mergeState.matchingSegmentReaders[idx++]; Lucene40StoredFieldsReader matchingFieldsReader = null; if (matchingSegmentReader != null) { final StoredFieldsReader fieldsReader = matchingSegmentReader.getFieldsReader(); // we can only bulk-copy if the matching reader is also a Lucene40FieldsReader if (fieldsReader != null && fieldsReader instanceof Lucene40StoredFieldsReader) { matchingFieldsReader = (Lucene40StoredFieldsReader) fieldsReader; } } if (reader.getLiveDocs() != null) { docCount += copyFieldsWithDeletions(mergeState, reader, matchingFieldsReader, rawDocLengths); } else { docCount += copyFieldsNoDeletions(mergeState, reader, matchingFieldsReader, rawDocLengths); } } finish(mergeState.fieldInfos, docCount); return docCount; }
@Override public final int merge(MergeState mergeState) throws IOException { // Used for bulk-reading raw bytes for term vectors int rawDocLengths[] = new int[MAX_RAW_MERGE_DOCS]; int rawDocLengths2[] = new int[MAX_RAW_MERGE_DOCS]; int idx = 0; int numDocs = 0; for (int i = 0; i < mergeState.readers.size(); i++) { final AtomicReader reader = mergeState.readers.get(i); final SegmentReader matchingSegmentReader = mergeState.matchingSegmentReaders[idx++]; Lucene40TermVectorsReader matchingVectorsReader = null; if (matchingSegmentReader != null) { TermVectorsReader vectorsReader = matchingSegmentReader.getTermVectorsReader(); if (vectorsReader != null && vectorsReader instanceof Lucene40TermVectorsReader) { matchingVectorsReader = (Lucene40TermVectorsReader) vectorsReader; } } if (reader.getLiveDocs() != null) { numDocs += copyVectorsWithDeletions(mergeState, matchingVectorsReader, reader, rawDocLengths, rawDocLengths2); } else { numDocs += copyVectorsNoDeletions(mergeState, matchingVectorsReader, reader, rawDocLengths, rawDocLengths2); } } finish(mergeState.fieldInfos, numDocs); return numDocs; }
private int copyVectorsNoDeletions(MergeState mergeState, final Lucene40TermVectorsReader matchingVectorsReader, final AtomicReader reader, int rawDocLengths[], int rawDocLengths2[]) throws IOException { final int maxDoc = reader.maxDoc(); if (matchingVectorsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" int docCount = 0; while (docCount < maxDoc) { int len = Math.min(MAX_RAW_MERGE_DOCS, maxDoc - docCount); matchingVectorsReader.rawDocs(rawDocLengths, rawDocLengths2, docCount, len); addRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, len); docCount += len; mergeState.checkAbort.work(300 * len); } } else { for (int docNum = 0; docNum < maxDoc; docNum++) { // NOTE: it's very important to first assign to vectors then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 Fields vectors = reader.getTermVectors(docNum); addAllDocVectors(vectors, mergeState); mergeState.checkAbort.work(300); } } return maxDoc; }
/** Called during merging to merge all {@link Fields} from * sub-readers. This must recurse to merge all postings * (terms, docs, positions, etc.). A {@link * PostingsFormat} can override this default * implementation to do its own merging. */ public void merge(MergeState mergeState, Fields fields) throws IOException { for (String field : fields) { FieldInfo info = mergeState.fieldInfos.fieldInfo(field); assert info != null : "FieldInfo for field is null: "+ field; Terms terms = fields.terms(field); if (terms != null) { final TermsConsumer termsConsumer = addField(info); termsConsumer.merge(mergeState, info.getIndexOptions(), terms.iterator(null)); } } }
@Override public int merge(MergeState mergeState) throws IOException { if (random.nextInt(100) == 0) { throw new IOException("Fake IOException from TermVectorsWriter.merge()"); } return super.merge(mergeState); }
@Override public void merge(MergeState mergeState, Fields fields) throws IOException { if (random.nextInt(100) == 0) { throw new IOException("Fake IOException from FieldsConsumer.merge()"); } super.merge(mergeState, fields); }
@Override public void merge(MergeState mergeState, IndexOptions indexOptions, TermsEnum termsEnum) throws IOException { if (random.nextInt(100) == 0) { throw new IOException("Fake IOException from TermsConsumer.merge()"); } super.merge(mergeState, indexOptions, termsEnum); }
@Override public TermStats merge(MergeState mergeState, IndexOptions indexOptions, DocsEnum postings, FixedBitSet visitedDocs) throws IOException { if (random.nextInt(10000) == 0) { throw new IOException("Fake IOException from PostingsConsumer.merge()"); } return super.merge(mergeState, indexOptions, postings, visitedDocs); }
@Override public int merge(MergeState mergeState) throws IOException { if (random.nextInt(100) == 0) { throw new IOException("Fake IOException from StoredFieldsWriter.merge()"); } return super.merge(mergeState); }