@Override public SegmentInfo read(Directory directory, String segmentName, IOContext context) throws IOException { // NOTE: this is NOT how 3.x is really written... String fileName = IndexFileNames.segmentFileName(segmentName, "", Lucene3xSegmentInfoFormat.UPGRADED_SI_EXTENSION); boolean success = false; IndexInput input = directory.openInput(fileName, context); try { SegmentInfo si = readUpgradedSegmentInfo(segmentName, directory, input); success = true; return si; } finally { if (!success) { IOUtils.closeWhileHandlingException(input); } else { input.close(); } } }
private SegmentInfo readUpgradedSegmentInfo(String name, Directory dir, IndexInput input) throws IOException { CodecUtil.checkHeader(input, Lucene3xSegmentInfoFormat.UPGRADED_SI_CODEC_NAME, Lucene3xSegmentInfoFormat.UPGRADED_SI_VERSION_START, Lucene3xSegmentInfoFormat.UPGRADED_SI_VERSION_CURRENT); final Version version; try { version = Version.parse(input.readString()); } catch (ParseException pe) { throw new CorruptIndexException("unable to parse version string (input: " + input + "): " + pe.getMessage(), pe); } final int docCount = input.readInt(); final Map<String,String> attributes = input.readStringStringMap(); final boolean isCompoundFile = input.readByte() == SegmentInfo.YES; final Map<String,String> diagnostics = input.readStringStringMap(); final Set<String> files = input.readStringSet(); SegmentInfo info = new SegmentInfo(dir, version, name, docCount, isCompoundFile, null, diagnostics, Collections.unmodifiableMap(attributes)); info.setFiles(files); return info; }
/** Returns file names for shared doc stores, if any, else * null. */ public static Set<String> getDocStoreFiles(SegmentInfo info) { if (Lucene3xSegmentInfoFormat.getDocStoreOffset(info) != -1) { final String dsName = Lucene3xSegmentInfoFormat.getDocStoreSegment(info); Set<String> files = new HashSet<>(); if (Lucene3xSegmentInfoFormat.getDocStoreIsCompoundFile(info)) { files.add(IndexFileNames.segmentFileName(dsName, "", COMPOUND_FILE_STORE_EXTENSION)); } else { files.add(IndexFileNames.segmentFileName(dsName, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION)); files.add(IndexFileNames.segmentFileName(dsName, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION)); files.add(IndexFileNames.segmentFileName(dsName, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION)); files.add(IndexFileNames.segmentFileName(dsName, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION)); files.add(IndexFileNames.segmentFileName(dsName, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION)); } return files; } else { return null; } }
private void checkFieldSummary(SegmentInfo info, IndexInput in, TermsReader field, TermsReader previous) throws IOException { // #docs with field must be <= #docs if (field.docCount < 0 || field.docCount > info.getDocCount()) { throw new CorruptIndexException("invalid docCount: " + field.docCount + " maxDoc: " + info.getDocCount() + " (resource=" + in + ")"); } // #postings must be >= #docs with field if (field.sumDocFreq < field.docCount) { throw new CorruptIndexException("invalid sumDocFreq: " + field.sumDocFreq + " docCount: " + field.docCount + " (resource=" + in + ")"); } // #positions must be >= #postings if (field.sumTotalTermFreq != -1 && field.sumTotalTermFreq < field.sumDocFreq) { throw new CorruptIndexException("invalid sumTotalTermFreq: " + field.sumTotalTermFreq + " sumDocFreq: " + field.sumDocFreq + " (resource=" + in + ")"); } if (previous != null) { throw new CorruptIndexException("duplicate fields: " + field.fieldInfo.name + " (resource=" + in + ")"); } }
private void checkFieldSummary(SegmentInfo info, IndexInput indexIn, IndexInput blockIn, TermsReader field, TermsReader previous) throws IOException { // #docs with field must be <= #docs if (field.docCount < 0 || field.docCount > info.getDocCount()) { throw new CorruptIndexException("invalid docCount: " + field.docCount + " maxDoc: " + info.getDocCount() + " (resource=" + indexIn + ", " + blockIn + ")"); } // #postings must be >= #docs with field if (field.sumDocFreq < field.docCount) { throw new CorruptIndexException("invalid sumDocFreq: " + field.sumDocFreq + " docCount: " + field.docCount + " (resource=" + indexIn + ", " + blockIn + ")"); } // #positions must be >= #postings if (field.sumTotalTermFreq != -1 && field.sumTotalTermFreq < field.sumDocFreq) { throw new CorruptIndexException("invalid sumTotalTermFreq: " + field.sumTotalTermFreq + " sumDocFreq: " + field.sumDocFreq + " (resource=" + indexIn + ", " + blockIn + ")"); } if (previous != null) { throw new CorruptIndexException("duplicate fields: " + field.fieldInfo.name + " (resource=" + indexIn + ", " + blockIn + ")"); } }
@Override public TermVectorsReader vectorsReader(Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, IOContext context) throws IOException { return new Lucene3xTermVectorsReader(directory, segmentInfo, fieldInfos, context) { @Override protected boolean sortTermsByUnicode() { // We carefully peek into stack track above us: if // we are part of a "merge", we must sort by UTF16: boolean unicodeSortOrder = true; StackTraceElement[] trace = new Exception().getStackTrace(); for (int i = 0; i < trace.length; i++) { //System.out.println(trace[i].getClassName()); if ("merge".equals(trace[i].getMethodName())) { unicodeSortOrder = false; if (LuceneTestCase.VERBOSE) { System.out.println("NOTE: PreFlexRW codec: forcing legacy UTF16 vector term sort order"); } break; } } return unicodeSortOrder; } }; }
private SegmentInfo readUpgradedSegmentInfo(String name, Directory dir, IndexInput input) throws IOException { CodecUtil.checkHeader(input, Lucene3xSegmentInfoFormat.UPGRADED_SI_CODEC_NAME, Lucene3xSegmentInfoFormat.UPGRADED_SI_VERSION_START, Lucene3xSegmentInfoFormat.UPGRADED_SI_VERSION_CURRENT); final String version = input.readString(); final int docCount = input.readInt(); final Map<String,String> attributes = input.readStringStringMap(); final boolean isCompoundFile = input.readByte() == SegmentInfo.YES; final Map<String,String> diagnostics = input.readStringStringMap(); final Set<String> files = input.readStringSet(); SegmentInfo info = new SegmentInfo(dir, version, name, docCount, isCompoundFile, null, diagnostics, Collections.unmodifiableMap(attributes)); info.setFiles(files); return info; }
private void checkFieldSummary(SegmentInfo info, TermsReader field, TermsReader previous) throws IOException { // #docs with field must be <= #docs if (field.docCount < 0 || field.docCount > info.getDocCount()) { throw new CorruptIndexException("invalid docCount: " + field.docCount + " maxDoc: " + info.getDocCount() + " (resource=" + indexIn + ", " + blockIn + ")"); } // #postings must be >= #docs with field if (field.sumDocFreq < field.docCount) { throw new CorruptIndexException("invalid sumDocFreq: " + field.sumDocFreq + " docCount: " + field.docCount + " (resource=" + indexIn + ", " + blockIn + ")"); } // #positions must be >= #postings if (field.sumTotalTermFreq != -1 && field.sumTotalTermFreq < field.sumDocFreq) { throw new CorruptIndexException("invalid sumTotalTermFreq: " + field.sumTotalTermFreq + " sumDocFreq: " + field.sumDocFreq + " (resource=" + indexIn + ", " + blockIn + ")"); } if (previous != null) { throw new CorruptIndexException("duplicate fields: " + field.fieldInfo.name + " (resource=" + indexIn + ", " + blockIn + ")"); } }
/** Returns file names for shared doc stores, if any, else * null. */ public static Set<String> getDocStoreFiles(SegmentInfo info) { if (Lucene3xSegmentInfoFormat.getDocStoreOffset(info) != -1) { final String dsName = Lucene3xSegmentInfoFormat.getDocStoreSegment(info); Set<String> files = new HashSet<String>(); if (Lucene3xSegmentInfoFormat.getDocStoreIsCompoundFile(info)) { files.add(IndexFileNames.segmentFileName(dsName, "", COMPOUND_FILE_STORE_EXTENSION)); } else { files.add(IndexFileNames.segmentFileName(dsName, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION)); files.add(IndexFileNames.segmentFileName(dsName, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION)); files.add(IndexFileNames.segmentFileName(dsName, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION)); files.add(IndexFileNames.segmentFileName(dsName, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION)); files.add(IndexFileNames.segmentFileName(dsName, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION)); } return files; } else { return null; } }
/** Save a single segment's info. */ @Override public void write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext) throws IOException { final String fileName = IndexFileNames.segmentFileName(si.name, "", Lucene40SegmentInfoFormat.SI_EXTENSION); si.addFile(fileName); final IndexOutput output = dir.createOutput(fileName, ioContext); boolean success = false; try { CodecUtil.writeHeader(output, Lucene40SegmentInfoFormat.CODEC_NAME, Lucene40SegmentInfoFormat.VERSION_CURRENT); // Write the Lucene version that created this segment, since 3.1 output.writeString(si.getVersion().toString()); output.writeInt(si.getDocCount()); output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO)); output.writeStringStringMap(si.getDiagnostics()); output.writeStringStringMap(Collections.<String,String>emptyMap()); output.writeStringSet(si.files()); success = true; } finally { if (!success) { IOUtils.closeWhileHandlingException(output); // TODO: why must we do this? do we not get tracking dir wrapper? IOUtils.deleteFilesIgnoringExceptions(si.dir, fileName); } else { output.close(); } } }
/** Sole constructor. */ public Lucene40StoredFieldsReader(Directory d, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException { final String segment = si.name; boolean success = false; fieldInfos = fn; try { fieldsStream = d.openInput(IndexFileNames.segmentFileName(segment, "", FIELDS_EXTENSION), context); final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", FIELDS_INDEX_EXTENSION); indexStream = d.openInput(indexStreamFN, context); CodecUtil.checkHeader(indexStream, CODEC_NAME_IDX, VERSION_START, VERSION_CURRENT); CodecUtil.checkHeader(fieldsStream, CODEC_NAME_DAT, VERSION_START, VERSION_CURRENT); assert HEADER_LENGTH_DAT == fieldsStream.getFilePointer(); assert HEADER_LENGTH_IDX == indexStream.getFilePointer(); final long indexSize = indexStream.length() - HEADER_LENGTH_IDX; this.size = (int) (indexSize >> 3); // Verify two sources of "maxDoc" agree: if (this.size != si.getDocCount()) { throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + this.size + " but segmentInfo shows " + si.getDocCount()); } numTotalDocs = (int) (indexSize >> 3); success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { try { close(); } catch (Throwable t) {} // ensure we throw our original exception } } }
/** Sole constructor. */ public Lucene40PostingsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo segmentInfo, IOContext ioContext, String segmentSuffix) throws IOException { boolean success = false; IndexInput freqIn = null; IndexInput proxIn = null; try { freqIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene40PostingsFormat.FREQ_EXTENSION), ioContext); CodecUtil.checkHeader(freqIn, FRQ_CODEC, VERSION_START, VERSION_CURRENT); // TODO: hasProx should (somehow!) become codec private, // but it's tricky because 1) FIS.hasProx is global (it // could be all fields that have prox are written by a // different codec), 2) the field may have had prox in // the past but all docs w/ that field were deleted. // Really we'd need to init prxOut lazily on write, and // then somewhere record that we actually wrote it so we // know whether to open on read: if (fieldInfos.hasProx()) { proxIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene40PostingsFormat.PROX_EXTENSION), ioContext); CodecUtil.checkHeader(proxIn, PRX_CODEC, VERSION_START, VERSION_CURRENT); } else { proxIn = null; } this.freqIn = freqIn; this.proxIn = proxIn; success = true; } finally { if (!success) { IOUtils.closeWhileHandlingException(freqIn, proxIn); } } }
/** Save a single segment's info. */ @Override public void write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext) throws IOException { final String fileName = IndexFileNames.segmentFileName(si.name, "", Lucene46SegmentInfoFormat.SI_EXTENSION); si.addFile(fileName); final IndexOutput output = dir.createOutput(fileName, ioContext); boolean success = false; try { CodecUtil.writeHeader(output, Lucene46SegmentInfoFormat.CODEC_NAME, Lucene46SegmentInfoFormat.VERSION_CURRENT); Version version = si.getVersion(); if (version.major < 3 || version.major > 4) { throw new IllegalArgumentException("invalid major version: should be 3 or 4 but got: " + version.major + " segment=" + si); } // Write the Lucene version that created this segment, since 3.1 output.writeString(version.toString()); output.writeInt(si.getDocCount()); output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO)); output.writeStringStringMap(si.getDiagnostics()); output.writeStringSet(si.files()); CodecUtil.writeFooter(output); success = true; } finally { if (!success) { IOUtils.closeWhileHandlingException(output); // TODO: are we doing this outside of the tracking wrapper? why must SIWriter cleanup like this? IOUtils.deleteFilesIgnoringExceptions(si.dir, fileName); } else { output.close(); } } }
@Override public final TermVectorsReader vectorsReader(Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, IOContext context) throws IOException { return new CompressingTermVectorsReader(directory, segmentInfo, segmentSuffix, fieldInfos, context, formatName, compressionMode); }
private static String getNormFilename(SegmentInfo info, int number) { if (hasSeparateNorms(info, number)) { long gen = Long.parseLong(info.getAttribute(Lucene3xSegmentInfoFormat.NORMGEN_PREFIX + number)); return IndexFileNames.fileNameFromGeneration(info.name, SEPARATE_NORMS_EXTENSION + number, gen); } else { // single file for all norms return IndexFileNames.segmentFileName(info.name, "", NORMS_EXTENSION); } }
private static boolean hasSeparateNorms(SegmentInfo info, int number) { String v = info.getAttribute(Lucene3xSegmentInfoFormat.NORMGEN_PREFIX + number); if (v == null) { return false; } else { assert Long.parseLong(v) != SegmentInfo.NO; return true; } }
@Override public TermVectorsReader vectorsReader(Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, IOContext context) throws IOException { final String fileName = IndexFileNames.segmentFileName(Lucene3xSegmentInfoFormat.getDocStoreSegment(segmentInfo), "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION); // Unfortunately, for 3.x indices, each segment's // FieldInfos can lie about hasVectors (claim it's true // when really it's false).... so we have to carefully // check if the files really exist before trying to open // them (4.x has fixed this): final boolean exists; if (Lucene3xSegmentInfoFormat.getDocStoreOffset(segmentInfo) != -1 && Lucene3xSegmentInfoFormat.getDocStoreIsCompoundFile(segmentInfo)) { String cfxFileName = IndexFileNames.segmentFileName(Lucene3xSegmentInfoFormat.getDocStoreSegment(segmentInfo), "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION); if (segmentInfo.dir.fileExists(cfxFileName)) { Directory cfsDir = new CompoundFileDirectory(segmentInfo.dir, cfxFileName, context, false); try { exists = cfsDir.fileExists(fileName); } finally { cfsDir.close(); } } else { exists = false; } } else { exists = directory.fileExists(fileName); } if (!exists) { // 3x's FieldInfos sometimes lies and claims a segment // has vectors when it doesn't: return null; } else { return new Lucene3xTermVectorsReader(directory, segmentInfo, fieldInfos, context); } }
public SimpleTextStoredFieldsReader(Directory directory, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException { this.fieldInfos = fn; boolean success = false; try { in = directory.openInput(IndexFileNames.segmentFileName(si.name, "", SimpleTextStoredFieldsWriter.FIELDS_EXTENSION), context); success = true; } finally { if (!success) { try { close(); } catch (Throwable t) {} // ensure we throw our original exception } } readIndex(si.getDocCount()); }
public SimpleTextTermVectorsReader(Directory directory, SegmentInfo si, IOContext context) throws IOException { boolean success = false; try { in = directory.openInput(IndexFileNames.segmentFileName(si.name, "", VECTORS_EXTENSION), context); success = true; } finally { if (!success) { try { close(); } catch (Throwable t) {} // ensure we throw our original exception } } readIndex(si.getDocCount()); }
public SepPostingsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo segmentInfo, IOContext context, IntStreamFactory intFactory, String segmentSuffix) throws IOException { boolean success = false; try { final String docFileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, SepPostingsWriter.DOC_EXTENSION); docIn = intFactory.openInput(dir, docFileName, context); skipIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, SepPostingsWriter.SKIP_EXTENSION), context); if (fieldInfos.hasFreq()) { freqIn = intFactory.openInput(dir, IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, SepPostingsWriter.FREQ_EXTENSION), context); } else { freqIn = null; } if (fieldInfos.hasProx()) { posIn = intFactory.openInput(dir, IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, SepPostingsWriter.POS_EXTENSION), context); payloadIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, SepPostingsWriter.PAYLOAD_EXTENSION), context); } else { posIn = null; payloadIn = null; } success = true; } finally { if (!success) { close(); } } }
@Override public TermVectorsWriter vectorsWriter(Directory directory, SegmentInfo segmentInfo, IOContext context) throws IOException { if (random.nextInt(100) == 0) { throw new IOException("Fake IOException from TermVectorsFormat.vectorsWriter()"); } return new CrankyTermVectorsWriter(delegate.vectorsWriter(directory, segmentInfo, context), random); }
@Override public void write(Directory dir, SegmentInfo info, FieldInfos fis, IOContext ioContext) throws IOException { if (random.nextInt(100) == 0) { throw new IOException("Fake IOException from SegmentInfoWriter.write()"); } delegate.write(dir, info, fis, ioContext); }
@Override public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si, IOContext context) throws IOException { if (random.nextInt(100) == 0) { throw new IOException("Fake IOException from StoredFieldsFormat.fieldsWriter()"); } return new CrankyStoredFieldsWriter(delegate.fieldsWriter(directory, si, context), random); }