private void readSortedField(int fieldNumber, IndexInput meta, FieldInfos infos) throws IOException { // sorted = binary + numeric if (meta.readVInt() != fieldNumber) { throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } if (meta.readByte() != Lucene45DocValuesFormat.BINARY) { throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } BinaryEntry b = readBinaryEntry(meta); binaries.put(fieldNumber, b); if (meta.readVInt() != fieldNumber) { throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } if (meta.readByte() != Lucene45DocValuesFormat.NUMERIC) { throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } NumericEntry n = readNumericEntry(meta); ords.put(fieldNumber, n); }
private void readSortedField(int fieldNumber, IndexInput meta, FieldInfos infos) throws IOException { // sorted = binary + numeric if (meta.readVInt() != fieldNumber) { throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } if (meta.readByte() != Lucene49DocValuesFormat.BINARY) { throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } BinaryEntry b = readBinaryEntry(meta); binaries.put(fieldNumber, b); if (meta.readVInt() != fieldNumber) { throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } if (meta.readByte() != Lucene49DocValuesFormat.NUMERIC) { throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } NumericEntry n = readNumericEntry(meta); ords.put(fieldNumber, n); }
private void readFields(IndexInput meta, FieldInfos infos) throws IOException { int fieldNumber = meta.readVInt(); while (fieldNumber != -1) { FieldInfo info = infos.fieldInfo(fieldNumber); if (info == null) { throw new CorruptIndexException("Invalid field number: " + fieldNumber + " (resource=" + meta + ")"); } else if (!info.hasNorms()) { throw new CorruptIndexException("Invalid field: " + info.name + " (resource=" + meta + ")"); } NormsEntry entry = new NormsEntry(); entry.format = meta.readByte(); entry.offset = meta.readLong(); switch(entry.format) { case CONST_COMPRESSED: case UNCOMPRESSED: case TABLE_COMPRESSED: case DELTA_COMPRESSED: break; default: throw new CorruptIndexException("Unknown format: " + entry.format + ", input=" + meta); } norms.put(fieldNumber, entry); fieldNumber = meta.readVInt(); } }
public void read(IndexInput input, FieldInfos fieldInfos) throws IOException { this.term = null; // invalidate cache newSuffixStart = input.readVInt(); int length = input.readVInt(); int totalLength = newSuffixStart + length; assert totalLength <= BYTE_BLOCK_SIZE-2 : "termLength=" + totalLength + ",resource=" + input; bytes.grow(totalLength); bytes.setLength(totalLength); input.readBytes(bytes.bytes(), newSuffixStart, length); final int fieldNumber = input.readVInt(); if (fieldNumber != currentFieldNumber) { currentFieldNumber = fieldNumber; // NOTE: too much sneakiness here, seriously this is a negative vint?! if (currentFieldNumber == -1) { field = ""; } else { assert fieldInfos.fieldInfo(currentFieldNumber) != null : currentFieldNumber; field = fieldInfos.fieldInfo(currentFieldNumber).name.intern(); } } else { assert field.equals(fieldInfos.fieldInfo(fieldNumber).name) : "currentFieldNumber=" + currentFieldNumber + " field=" + field + " vs " + fieldInfos.fieldInfo(fieldNumber) == null ? "null" : fieldInfos.fieldInfo(fieldNumber).name; } }
private void readSortedField(int fieldNumber, IndexInput meta, FieldInfos infos) throws IOException { // sorted = binary + numeric if (meta.readVInt() != fieldNumber) { throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } if (meta.readByte() != Lucene410DocValuesFormat.BINARY) { throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } BinaryEntry b = readBinaryEntry(meta); binaries.put(fieldNumber, b); if (meta.readVInt() != fieldNumber) { throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } if (meta.readByte() != Lucene410DocValuesFormat.NUMERIC) { throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } NumericEntry n = readNumericEntry(meta); ords.put(fieldNumber, n); }
static CodecReader wrap(CodecReader reader) throws IOException { final FieldInfos fieldInfos = reader.getFieldInfos(); final FieldInfo versionInfo = fieldInfos.fieldInfo(VersionFieldMapper.NAME); if (versionInfo != null && versionInfo.getDocValuesType() != DocValuesType.NONE) { // the reader is a recent one, it has versions and they are stored // in a numeric doc values field return reader; } // The segment is an old one, look at the _uid field final Terms terms = reader.terms(UidFieldMapper.NAME); if (terms == null || !terms.hasPayloads()) { // The segment doesn't have an _uid field or doesn't have payloads // don't try to do anything clever. If any other segment has versions // all versions of this segment will be initialized to 0 return reader; } // convert _uid payloads -> _version docvalues return new VersionFieldUpgrader(reader); }
public void testDocValues() throws IOException { assertU(adoc("id", "1", "floatdv", "4.5", "intdv", "-1", "intdv", "3", "stringdv", "value1", "stringdv", "value2")); assertU(commit()); try (SolrCore core = h.getCoreInc()) { final RefCounted<SolrIndexSearcher> searcherRef = core.openNewSearcher(true, true); final SolrIndexSearcher searcher = searcherRef.get(); try { final AtomicReader reader = searcher.getAtomicReader(); assertEquals(1, reader.numDocs()); final FieldInfos infos = reader.getFieldInfos(); assertEquals(DocValuesType.SORTED_SET, infos.fieldInfo("stringdv").getDocValuesType()); assertEquals(DocValuesType.SORTED_SET, infos.fieldInfo("floatdv").getDocValuesType()); assertEquals(DocValuesType.SORTED_SET, infos.fieldInfo("intdv").getDocValuesType()); SortedSetDocValues dv = reader.getSortedSetDocValues("stringdv"); dv.setDocument(0); assertEquals(0, dv.nextOrd()); assertEquals(1, dv.nextOrd()); assertEquals(SortedSetDocValues.NO_MORE_ORDS, dv.nextOrd()); } finally { searcherRef.decref(); } } }
TermInfosWriter(Directory directory, String segment, FieldInfos fis, int interval) throws IOException { initialize(directory, segment, fis, interval, false); boolean success = false; try { other = new TermInfosWriter(directory, segment, fis, interval, true); other.other = this; success = true; } finally { if (!success) { IOUtils.closeWhileHandlingException(output); try { directory.deleteFile(IndexFileNames.segmentFileName(segment, "", (isIndex ? Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION : Lucene3xPostingsFormat.TERMS_EXTENSION))); } catch (IOException ignored) { } } } }
@Override public TermVectorsReader vectorsReader(Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, IOContext context) throws IOException { return new Lucene3xTermVectorsReader(directory, segmentInfo, fieldInfos, context) { @Override protected boolean sortTermsByUnicode() { // We carefully peek into stack track above us: if // we are part of a "merge", we must sort by UTF16: boolean unicodeSortOrder = true; StackTraceElement[] trace = new Exception().getStackTrace(); for (int i = 0; i < trace.length; i++) { //System.out.println(trace[i].getClassName()); if ("merge".equals(trace[i].getMethodName())) { unicodeSortOrder = false; if (LuceneTestCase.VERBOSE) { System.out.println("NOTE: PreFlexRW codec: forcing legacy UTF16 vector term sort order"); } break; } } return unicodeSortOrder; } }; }
/** sugar method for startDocument() + writeField() for every stored field in the document */ protected final void addDocument(Iterable<? extends IndexableField> doc, FieldInfos fieldInfos) throws IOException { startDocument(); for (IndexableField field : doc) { if (field.fieldType().stored()) { writeField(fieldInfos.fieldInfo(field.name()), field); } } finishDocument(); }
/** Save a single segment's info. */ @Override public void write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext) throws IOException { final String fileName = IndexFileNames.segmentFileName(si.name, "", Lucene40SegmentInfoFormat.SI_EXTENSION); si.addFile(fileName); final IndexOutput output = dir.createOutput(fileName, ioContext); boolean success = false; try { CodecUtil.writeHeader(output, Lucene40SegmentInfoFormat.CODEC_NAME, Lucene40SegmentInfoFormat.VERSION_CURRENT); // Write the Lucene version that created this segment, since 3.1 output.writeString(si.getVersion().toString()); output.writeInt(si.getDocCount()); output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO)); output.writeStringStringMap(si.getDiagnostics()); output.writeStringStringMap(Collections.<String,String>emptyMap()); output.writeStringSet(si.files()); success = true; } finally { if (!success) { IOUtils.closeWhileHandlingException(output); // TODO: why must we do this? do we not get tracking dir wrapper? IOUtils.deleteFilesIgnoringExceptions(si.dir, fileName); } else { output.close(); } } }
/** Used only by clone. */ private Lucene40StoredFieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, IndexInput fieldsStream, IndexInput indexStream) { this.fieldInfos = fieldInfos; this.numTotalDocs = numTotalDocs; this.size = size; this.fieldsStream = fieldsStream; this.indexStream = indexStream; }
/** Sole constructor. */ public Lucene40StoredFieldsReader(Directory d, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException { final String segment = si.name; boolean success = false; fieldInfos = fn; try { fieldsStream = d.openInput(IndexFileNames.segmentFileName(segment, "", FIELDS_EXTENSION), context); final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", FIELDS_INDEX_EXTENSION); indexStream = d.openInput(indexStreamFN, context); CodecUtil.checkHeader(indexStream, CODEC_NAME_IDX, VERSION_START, VERSION_CURRENT); CodecUtil.checkHeader(fieldsStream, CODEC_NAME_DAT, VERSION_START, VERSION_CURRENT); assert HEADER_LENGTH_DAT == fieldsStream.getFilePointer(); assert HEADER_LENGTH_IDX == indexStream.getFilePointer(); final long indexSize = indexStream.length() - HEADER_LENGTH_IDX; this.size = (int) (indexSize >> 3); // Verify two sources of "maxDoc" agree: if (this.size != si.getDocCount()) { throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + this.size + " but segmentInfo shows " + si.getDocCount()); } numTotalDocs = (int) (indexSize >> 3); success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { try { close(); } catch (Throwable t) {} // ensure we throw our original exception } } }
@Override public void finish(FieldInfos fis, int numDocs) { long indexFP = indexStream.getFilePointer(); if (HEADER_LENGTH_IDX+((long) numDocs)*8 != indexFP) // This is most likely a bug in Sun JRE 1.6.0_04/_05; // we detect that the bug has struck, here, and // throw an exception to prevent the corruption from // entering the index. See LUCENE-1282 for // details. throw new RuntimeException("fdx size mismatch: docCount is " + numDocs + " but fdx file size is " + indexFP + " (wrote numDocs=" + ((indexFP-HEADER_LENGTH_IDX)/8.0) + " file=" + indexStream.toString() + "; now aborting this merge to prevent index corruption"); }
/** Sole constructor. */ public Lucene40PostingsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo segmentInfo, IOContext ioContext, String segmentSuffix) throws IOException { boolean success = false; IndexInput freqIn = null; IndexInput proxIn = null; try { freqIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene40PostingsFormat.FREQ_EXTENSION), ioContext); CodecUtil.checkHeader(freqIn, FRQ_CODEC, VERSION_START, VERSION_CURRENT); // TODO: hasProx should (somehow!) become codec private, // but it's tricky because 1) FIS.hasProx is global (it // could be all fields that have prox are written by a // different codec), 2) the field may have had prox in // the past but all docs w/ that field were deleted. // Really we'd need to init prxOut lazily on write, and // then somewhere record that we actually wrote it so we // know whether to open on read: if (fieldInfos.hasProx()) { proxIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene40PostingsFormat.PROX_EXTENSION), ioContext); CodecUtil.checkHeader(proxIn, PRX_CODEC, VERSION_START, VERSION_CURRENT); } else { proxIn = null; } this.freqIn = freqIn; this.proxIn = proxIn; success = true; } finally { if (!success) { IOUtils.closeWhileHandlingException(freqIn, proxIn); } } }
@Override public void finish(FieldInfos fis, int numDocs) { long indexFP = tvx.getFilePointer(); if (HEADER_LENGTH_INDEX+((long) numDocs)*16 != indexFP) // This is most likely a bug in Sun JRE 1.6.0_04/_05; // we detect that the bug has struck, here, and // throw an exception to prevent the corruption from // entering the index. See LUCENE-1282 for // details. throw new RuntimeException("tvx size mismatch: mergedDocs is " + numDocs + " but tvx size is " + indexFP + " (wrote numDocs=" + ((indexFP - HEADER_LENGTH_INDEX)/16.0) + " file=" + tvx.toString() + "; now aborting this merge to prevent index corruption"); }
/** Used by clone. */ Lucene40TermVectorsReader(FieldInfos fieldInfos, IndexInput tvx, IndexInput tvd, IndexInput tvf, int size, int numTotalDocs) { this.fieldInfos = fieldInfos; this.tvx = tvx; this.tvd = tvd; this.tvf = tvf; this.size = size; this.numTotalDocs = numTotalDocs; }
/** Save a single segment's info. */ @Override public void write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext) throws IOException { final String fileName = IndexFileNames.segmentFileName(si.name, "", Lucene46SegmentInfoFormat.SI_EXTENSION); si.addFile(fileName); final IndexOutput output = dir.createOutput(fileName, ioContext); boolean success = false; try { CodecUtil.writeHeader(output, Lucene46SegmentInfoFormat.CODEC_NAME, Lucene46SegmentInfoFormat.VERSION_CURRENT); Version version = si.getVersion(); if (version.major < 3 || version.major > 4) { throw new IllegalArgumentException("invalid major version: should be 3 or 4 but got: " + version.major + " segment=" + si); } // Write the Lucene version that created this segment, since 3.1 output.writeString(version.toString()); output.writeInt(si.getDocCount()); output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO)); output.writeStringStringMap(si.getDiagnostics()); output.writeStringSet(si.files()); CodecUtil.writeFooter(output); success = true; } finally { if (!success) { IOUtils.closeWhileHandlingException(output); // TODO: are we doing this outside of the tracking wrapper? why must SIWriter cleanup like this? IOUtils.deleteFilesIgnoringExceptions(si.dir, fileName); } else { output.close(); } } }
private void readSortedSetFieldWithAddresses(int fieldNumber, IndexInput meta, FieldInfos infos) throws IOException { // sortedset = binary + numeric (addresses) + ordIndex if (meta.readVInt() != fieldNumber) { throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } if (meta.readByte() != Lucene45DocValuesFormat.BINARY) { throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } BinaryEntry b = readBinaryEntry(meta); binaries.put(fieldNumber, b); if (meta.readVInt() != fieldNumber) { throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } if (meta.readByte() != Lucene45DocValuesFormat.NUMERIC) { throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } NumericEntry n1 = readNumericEntry(meta); ords.put(fieldNumber, n1); if (meta.readVInt() != fieldNumber) { throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } if (meta.readByte() != Lucene45DocValuesFormat.NUMERIC) { throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } NumericEntry n2 = readNumericEntry(meta); ordIndexes.put(fieldNumber, n2); }
private void readFields(IndexInput meta, FieldInfos infos) throws IOException { int fieldNumber = meta.readVInt(); while (fieldNumber != -1) { if ((lenientFieldInfoCheck && fieldNumber < 0) || (!lenientFieldInfoCheck && infos.fieldInfo(fieldNumber) == null)) { // trickier to validate more: because we re-use for norms, because we use multiple entries // for "composite" types like sortedset, etc. throw new CorruptIndexException("Invalid field number: " + fieldNumber + " (resource=" + meta + ")"); } byte type = meta.readByte(); if (type == Lucene45DocValuesFormat.NUMERIC) { numerics.put(fieldNumber, readNumericEntry(meta)); } else if (type == Lucene45DocValuesFormat.BINARY) { BinaryEntry b = readBinaryEntry(meta); binaries.put(fieldNumber, b); } else if (type == Lucene45DocValuesFormat.SORTED) { readSortedField(fieldNumber, meta, infos); } else if (type == Lucene45DocValuesFormat.SORTED_SET) { SortedSetEntry ss = readSortedSetEntry(meta); sortedSets.put(fieldNumber, ss); if (ss.format == SORTED_SET_WITH_ADDRESSES) { readSortedSetFieldWithAddresses(fieldNumber, meta, infos); } else if (ss.format == SORTED_SET_SINGLE_VALUED_SORTED) { if (meta.readVInt() != fieldNumber) { throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } if (meta.readByte() != Lucene45DocValuesFormat.SORTED) { throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } readSortedField(fieldNumber, meta, infos); } else { throw new AssertionError(); } } else { throw new CorruptIndexException("invalid type: " + type + ", resource=" + meta); } fieldNumber = meta.readVInt(); } }
private void readSortedSetFieldWithAddresses(int fieldNumber, IndexInput meta, FieldInfos infos) throws IOException { // sortedset = binary + numeric (addresses) + ordIndex if (meta.readVInt() != fieldNumber) { throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } if (meta.readByte() != Lucene49DocValuesFormat.BINARY) { throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } BinaryEntry b = readBinaryEntry(meta); binaries.put(fieldNumber, b); if (meta.readVInt() != fieldNumber) { throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } if (meta.readByte() != Lucene49DocValuesFormat.NUMERIC) { throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } NumericEntry n1 = readNumericEntry(meta); ords.put(fieldNumber, n1); if (meta.readVInt() != fieldNumber) { throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } if (meta.readByte() != Lucene49DocValuesFormat.NUMERIC) { throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } NumericEntry n2 = readNumericEntry(meta); ordIndexes.put(fieldNumber, n2); }
@Override public final TermVectorsReader vectorsReader(Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, IOContext context) throws IOException { return new CompressingTermVectorsReader(directory, segmentInfo, segmentSuffix, fieldInfos, context, formatName, compressionMode); }
@Override public void finish(FieldInfos fis, int numDocs) throws IOException { if (!pendingDocs.isEmpty()) { flush(); } if (numDocs != this.numDocs) { throw new RuntimeException("Wrote " + this.numDocs + " docs, finish called with numDocs=" + numDocs); } indexWriter.finish(numDocs, vectorsStream.getFilePointer()); CodecUtil.writeFooter(vectorsStream); }
@Override public void finish(FieldInfos fis, int numDocs) throws IOException { if (numBufferedDocs > 0) { flush(); } else { assert bufferedDocs.length == 0; } if (docBase != numDocs) { throw new RuntimeException("Wrote " + docBase + " docs, finish called with numDocs=" + numDocs); } indexWriter.finish(numDocs, fieldsStream.getFilePointer()); CodecUtil.writeFooter(fieldsStream); assert bufferedDocs.length == 0; }
Lucene3xTermVectorsReader(FieldInfos fieldInfos, IndexInput tvx, IndexInput tvd, IndexInput tvf, int size, int numTotalDocs, int docStoreOffset, int format) { this.fieldInfos = fieldInfos; this.tvx = tvx; this.tvd = tvd; this.tvf = tvf; this.size = size; this.numTotalDocs = numTotalDocs; this.docStoreOffset = docStoreOffset; this.format = format; this.storeCFSReader = null; }
public SegmentTermDocs(IndexInput freqStream, TermInfosReader tis, FieldInfos fieldInfos) { this.freqStream = freqStream.clone(); this.tis = tis; this.fieldInfos = fieldInfos; skipInterval = tis.getSkipInterval(); maxSkipLevels = tis.getMaxSkipLevels(); }
SegmentTermEnum(IndexInput i, FieldInfos fis, boolean isi) throws CorruptIndexException, IOException { input = i; fieldInfos = fis; isIndex = isi; maxSkipLevels = 1; // use single-level skip lists for formats > -3 int firstInt = input.readInt(); if (firstInt >= 0) { // original-format file, without explicit format version number format = 0; size = firstInt; // back-compatible settings indexInterval = 128; skipInterval = Integer.MAX_VALUE; // switch off skipTo optimization } else { // we have a format version number format = firstInt; // check that it is a format we can understand if (format > FORMAT_MINIMUM) throw new IndexFormatTooOldException(input, format, FORMAT_MINIMUM, FORMAT_CURRENT); if (format < FORMAT_CURRENT) throw new IndexFormatTooNewException(input, format, FORMAT_MINIMUM, FORMAT_CURRENT); size = readSize(input); // read the size indexInterval = input.readInt(); skipInterval = input.readInt(); maxSkipLevels = input.readInt(); assert indexInterval > 0: "indexInterval=" + indexInterval + " is negative; must be > 0"; assert skipInterval > 0: "skipInterval=" + skipInterval + " is negative; must be > 0"; } }
@Override public TermVectorsReader vectorsReader(Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, IOContext context) throws IOException { final String fileName = IndexFileNames.segmentFileName(Lucene3xSegmentInfoFormat.getDocStoreSegment(segmentInfo), "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION); // Unfortunately, for 3.x indices, each segment's // FieldInfos can lie about hasVectors (claim it's true // when really it's false).... so we have to carefully // check if the files really exist before trying to open // them (4.x has fixed this): final boolean exists; if (Lucene3xSegmentInfoFormat.getDocStoreOffset(segmentInfo) != -1 && Lucene3xSegmentInfoFormat.getDocStoreIsCompoundFile(segmentInfo)) { String cfxFileName = IndexFileNames.segmentFileName(Lucene3xSegmentInfoFormat.getDocStoreSegment(segmentInfo), "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION); if (segmentInfo.dir.fileExists(cfxFileName)) { Directory cfsDir = new CompoundFileDirectory(segmentInfo.dir, cfxFileName, context, false); try { exists = cfsDir.fileExists(fileName); } finally { cfsDir.close(); } } else { exists = false; } } else { exists = directory.fileExists(fileName); } if (!exists) { // 3x's FieldInfos sometimes lies and claims a segment // has vectors when it doesn't: return null; } else { return new Lucene3xTermVectorsReader(directory, segmentInfo, fieldInfos, context); } }
private Lucene3xStoredFieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, int format, int docStoreOffset, IndexInput fieldsStream, IndexInput indexStream) { this.fieldInfos = fieldInfos; this.numTotalDocs = numTotalDocs; this.size = size; this.format = format; this.docStoreOffset = docStoreOffset; this.fieldsStream = fieldsStream; this.indexStream = indexStream; this.storeCFSReader = null; }
private void readSortedSetFieldWithAddresses(int fieldNumber, IndexInput meta, FieldInfos infos) throws IOException { // sortedset = binary + numeric (addresses) + ordIndex if (meta.readVInt() != fieldNumber) { throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } if (meta.readByte() != Lucene410DocValuesFormat.BINARY) { throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } BinaryEntry b = readBinaryEntry(meta); binaries.put(fieldNumber, b); if (meta.readVInt() != fieldNumber) { throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } if (meta.readByte() != Lucene410DocValuesFormat.NUMERIC) { throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } NumericEntry n1 = readNumericEntry(meta); ords.put(fieldNumber, n1); if (meta.readVInt() != fieldNumber) { throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } if (meta.readByte() != Lucene410DocValuesFormat.NUMERIC) { throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); } NumericEntry n2 = readNumericEntry(meta); ordIndexes.put(fieldNumber, n2); }
VersionFieldUpgrader(CodecReader in) { super(in); // Find a free field number int fieldNumber = 0; for (FieldInfo fi : in.getFieldInfos()) { fieldNumber = Math.max(fieldNumber, fi.number + 1); } // TODO: lots of things can wrong here... FieldInfo newInfo = new FieldInfo(VersionFieldMapper.NAME, // field name fieldNumber, // field number false, // store term vectors false, // omit norms false, // store payloads IndexOptions.NONE, // index options DocValuesType.NUMERIC, // docvalues -1, // docvalues generation Collections.<String, String>emptyMap() // attributes ); newInfo.checkConsistency(); // fail merge immediately if above code is wrong final ArrayList<FieldInfo> fieldInfoList = new ArrayList<>(); for (FieldInfo info : in.getFieldInfos()) { if (!info.name.equals(VersionFieldMapper.NAME)) { fieldInfoList.add(info); } } fieldInfoList.add(newInfo); infos = new FieldInfos(fieldInfoList.toArray(new FieldInfo[fieldInfoList.size()])); }
EmptyFilterLeafReader(final LeafReader delegate) { super(delegate); final FieldInfos infos = delegate.getFieldInfos(); final List<FieldInfo> lfi = new ArrayList<FieldInfo>(metaFields.size()); for(String metaField: metaFields) { final FieldInfo _fi = infos.fieldInfo(metaField); if(_fi != null) { lfi.add(_fi); } } fi = lfi.toArray(new FieldInfo[0]); }
public FSTTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) throws IOException { final String termsFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FSTTermsWriter.TERMS_EXTENSION); this.postingsReader = postingsReader; final IndexInput in = state.directory.openInput(termsFileName, state.context); boolean success = false; try { version = readHeader(in); if (version >= FSTTermsWriter.TERMS_VERSION_CHECKSUM) { CodecUtil.checksumEntireFile(in); } this.postingsReader.init(in); seekDir(in); final FieldInfos fieldInfos = state.fieldInfos; final int numFields = in.readVInt(); for (int i = 0; i < numFields; i++) { int fieldNumber = in.readVInt(); FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber); long numTerms = in.readVLong(); long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : in.readVLong(); long sumDocFreq = in.readVLong(); int docCount = in.readVInt(); int longsSize = in.readVInt(); TermsReader current = new TermsReader(fieldInfo, in, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize); TermsReader previous = fields.put(fieldInfo.name, current); checkFieldSummary(state.segmentInfo, in, current, previous); } success = true; } finally { if (success) { IOUtils.close(in); } else { IOUtils.closeWhileHandlingException(in); } } }
public TermsReader(FieldInfos fieldInfos, IndexInput in, int termCount) throws IOException { this.termCount = termCount; final int fieldNumber = in.readVInt(); field = fieldInfos.fieldInfo(fieldNumber); if (field.getIndexOptions() != IndexOptions.DOCS_ONLY) { sumTotalTermFreq = in.readVLong(); } else { sumTotalTermFreq = -1; } sumDocFreq = in.readVLong(); docCount = in.readVInt(); fst = new FST<>(in, outputs); }
public SimpleTextStoredFieldsReader(Directory directory, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException { this.fieldInfos = fn; boolean success = false; try { in = directory.openInput(IndexFileNames.segmentFileName(si.name, "", SimpleTextStoredFieldsWriter.FIELDS_EXTENSION), context); success = true; } finally { if (!success) { try { close(); } catch (Throwable t) {} // ensure we throw our original exception } } readIndex(si.getDocCount()); }