Java 类org.apache.lucene.index.FieldInfo.IndexOptions 实例源码
项目:lams
文件:SegmentTermDocs.java
void seek(TermInfo ti, Term term) throws IOException {
count = 0;
FieldInfo fi = fieldInfos.fieldInfo(term.field());
this.indexOptions = (fi != null) ? fi.getIndexOptions() : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
currentFieldStoresPayloads = (fi != null) ? fi.hasPayloads() : false;
if (ti == null) {
df = 0;
} else {
df = ti.docFreq;
doc = 0;
freqBasePointer = ti.freqPointer;
proxBasePointer = ti.proxPointer;
skipPointer = freqBasePointer + ti.skipOffset;
freqStream.seek(freqBasePointer);
haveSkipped = false;
}
}
项目:lams
文件:Lucene41PostingsWriter.java
@Override
public int setField(FieldInfo fieldInfo) {
IndexOptions indexOptions = fieldInfo.getIndexOptions();
fieldHasFreqs = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
fieldHasPositions = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
fieldHasOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
fieldHasPayloads = fieldInfo.hasPayloads();
skipWriter.setField(fieldHasPositions, fieldHasOffsets, fieldHasPayloads);
lastState = emptyState;
if (fieldHasPositions) {
if (fieldHasPayloads || fieldHasOffsets) {
return 3; // doc + pos + pay FP
} else {
return 2; // doc + pos FP
}
} else {
return 1; // doc FP
}
}
项目:search
文件:TestSimilarity2.java
/** make sure all sims work if TF is omitted */
public void testOmitTF() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
Document doc = new Document();
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setIndexOptions(IndexOptions.DOCS_ONLY);
ft.freeze();
Field f = newField("foo", "bar", ft);
doc.add(f);
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher is = newSearcher(ir);
for (Similarity sim : sims) {
is.setSimilarity(sim);
BooleanQuery query = new BooleanQuery(true);
query.add(new TermQuery(new Term("foo", "bar")), BooleanClause.Occur.SHOULD);
assertEquals(1, is.search(query, 10).totalHits);
}
ir.close();
dir.close();
}
项目:search
文件:TestIndexWriter.java
public void testChangeIndexOptions() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
FieldType docsAndFreqs = new FieldType(TextField.TYPE_NOT_STORED);
docsAndFreqs.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
FieldType docsOnly = new FieldType(TextField.TYPE_NOT_STORED);
docsOnly.setIndexOptions(IndexOptions.DOCS_ONLY);
Document doc = new Document();
doc.add(new Field("field", "a b c", docsAndFreqs));
w.addDocument(doc);
w.addDocument(doc);
doc = new Document();
doc.add(new Field("field", "a b c", docsOnly));
w.addDocument(doc);
w.close();
dir.close();
}
项目:search
文件:BasePostingsFormatTestCase.java
public SeedPostings(long seed, int minDocFreq, int maxDocFreq, Bits liveDocs, IndexOptions options) {
random = new Random(seed);
docRandom = new Random(random.nextLong());
docFreq = TestUtil.nextInt(random, minDocFreq, maxDocFreq);
this.liveDocs = liveDocs;
// TODO: more realistic to inversely tie this to numDocs:
maxDocSpacing = TestUtil.nextInt(random, 1, 100);
if (random.nextInt(10) == 7) {
// 10% of the time create big payloads:
payloadSize = 1 + random.nextInt(3);
} else {
payloadSize = 1 + random.nextInt(1);
}
fixedPayloads = random.nextBoolean();
byte[] payloadBytes = new byte[payloadSize];
payload = new BytesRef(payloadBytes);
this.options = options;
doPositions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS.compareTo(options) <= 0;
}
项目:search
文件:Lucene41PostingsWriter.java
@Override
public int setField(FieldInfo fieldInfo) {
IndexOptions indexOptions = fieldInfo.getIndexOptions();
fieldHasFreqs = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
fieldHasPositions = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
fieldHasOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
fieldHasPayloads = fieldInfo.hasPayloads();
skipWriter.setField(fieldHasPositions, fieldHasOffsets, fieldHasPayloads);
lastState = emptyState;
if (fieldHasPositions) {
if (fieldHasPayloads || fieldHasOffsets) {
return 3; // doc + pos + pay FP
} else {
return 2; // doc + pos FP
}
} else {
return 1; // doc FP
}
}
项目:search
文件:BasePostingsFormatTestCase.java
private static SeedPostings getSeedPostings(String term, long seed, boolean withLiveDocs, IndexOptions options) {
int minDocFreq, maxDocFreq;
if (term.startsWith("big_")) {
minDocFreq = RANDOM_MULTIPLIER * 50000;
maxDocFreq = RANDOM_MULTIPLIER * 70000;
} else if (term.startsWith("medium_")) {
minDocFreq = RANDOM_MULTIPLIER * 3000;
maxDocFreq = RANDOM_MULTIPLIER * 6000;
} else if (term.startsWith("low_")) {
minDocFreq = RANDOM_MULTIPLIER;
maxDocFreq = RANDOM_MULTIPLIER * 40;
} else {
minDocFreq = 1;
maxDocFreq = 3;
}
return new SeedPostings(seed, minDocFreq, maxDocFreq, withLiveDocs ? globalLiveDocs : null, options);
}
项目:search
文件:PreAnalyzedField.java
/**
* Utility method to create a {@link org.apache.lucene.document.FieldType}
* based on the {@link SchemaField}
*/
public static org.apache.lucene.document.FieldType createFieldType(SchemaField field) {
if (!field.indexed() && !field.stored()) {
if (log.isTraceEnabled())
log.trace("Ignoring unindexed/unstored field: " + field);
return null;
}
org.apache.lucene.document.FieldType newType = new org.apache.lucene.document.FieldType();
newType.setIndexed(field.indexed());
newType.setTokenized(field.isTokenized());
newType.setStored(field.stored());
newType.setOmitNorms(field.omitNorms());
IndexOptions options = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
if (field.omitTermFreqAndPositions()) {
options = IndexOptions.DOCS_ONLY;
} else if (field.omitPositions()) {
options = IndexOptions.DOCS_AND_FREQS;
} else if (field.storeOffsetsWithPositions()) {
options = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
}
newType.setIndexOptions(options);
newType.setStoreTermVectors(field.storeTermVector());
newType.setStoreTermVectorOffsets(field.storeTermOffsets());
newType.setStoreTermVectorPositions(field.storeTermPositions());
return newType;
}
项目:search
文件:BasePostingsFormatTestCase.java
private void testTerms(final Fields fieldsSource, final EnumSet<Option> options,
final IndexOptions maxTestOptions,
final IndexOptions maxIndexOptions,
final boolean alwaysTestMax) throws Exception {
if (options.contains(Option.THREADS)) {
int numThreads = TestUtil.nextInt(random(), 2, 5);
Thread[] threads = new Thread[numThreads];
for(int threadUpto=0;threadUpto<numThreads;threadUpto++) {
threads[threadUpto] = new TestThread(this, fieldsSource, options, maxTestOptions, maxIndexOptions, alwaysTestMax);
threads[threadUpto].start();
}
for(int threadUpto=0;threadUpto<numThreads;threadUpto++) {
threads[threadUpto].join();
}
} else {
testTermsOneThread(fieldsSource, options, maxTestOptions, maxIndexOptions, alwaysTestMax);
}
}
项目:search
文件:SegmentTermDocs.java
void seek(TermInfo ti, Term term) throws IOException {
count = 0;
FieldInfo fi = fieldInfos.fieldInfo(term.field());
this.indexOptions = (fi != null) ? fi.getIndexOptions() : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
currentFieldStoresPayloads = (fi != null) ? fi.hasPayloads() : false;
if (ti == null) {
df = 0;
} else {
df = ti.docFreq;
doc = 0;
freqBasePointer = ti.freqPointer;
proxBasePointer = ti.proxPointer;
skipPointer = freqBasePointer + ti.skipOffset;
freqStream.seek(freqBasePointer);
haveSkipped = false;
}
}
项目:search
文件:TestOmitPositions.java
public void testBasic() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
Field f = newField("foo", "this is a test test", ft);
doc.add(f);
for (int i = 0; i < 100; i++) {
w.addDocument(doc);
}
IndexReader reader = w.getReader();
w.close();
assertNull(MultiFields.getTermPositionsEnum(reader, null, "foo", new BytesRef("test")));
DocsEnum de = TestUtil.docs(random(), reader, "foo", new BytesRef("test"), null, null, DocsEnum.FLAG_FREQS);
while (de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
assertEquals(2, de.freq());
}
reader.close();
dir.close();
}
项目:search
文件:MemoryPostingsFormat.java
private void decodeMetaData() {
if (!didDecode) {
buffer.reset(current.output.bytes, current.output.offset, current.output.length);
docFreq = buffer.readVInt();
if (field.getIndexOptions() != IndexOptions.DOCS_ONLY) {
totalTermFreq = docFreq + buffer.readVLong();
} else {
totalTermFreq = -1;
}
postingsSpare.bytes = current.output.bytes;
postingsSpare.offset = buffer.getPosition();
postingsSpare.length = current.output.length - (buffer.getPosition() - current.output.offset);
//System.out.println(" df=" + docFreq + " totTF=" + totalTermFreq + " offset=" + buffer.getPosition() + " len=" + current.output.length);
didDecode = true;
}
}
项目:search
文件:MemoryPostingsFormat.java
@Override
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) {
boolean hasOffsets = field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
if (field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
return null;
}
decodeMetaData();
FSTDocsAndPositionsEnum docsAndPositionsEnum;
if (reuse == null || !(reuse instanceof FSTDocsAndPositionsEnum)) {
docsAndPositionsEnum = new FSTDocsAndPositionsEnum(field.hasPayloads(), hasOffsets);
} else {
docsAndPositionsEnum = (FSTDocsAndPositionsEnum) reuse;
if (!docsAndPositionsEnum.canReuse(field.hasPayloads(), hasOffsets)) {
docsAndPositionsEnum = new FSTDocsAndPositionsEnum(field.hasPayloads(), hasOffsets);
}
}
//System.out.println("D&P reset this=" + this);
return docsAndPositionsEnum.reset(postingsSpare, liveDocs, docFreq);
}
项目:search
文件:TestOmitTf.java
/** test that when freqs are omitted, that totalTermFreq and sumTotalTermFreq are -1 */
public void testStats() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir,
newIndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setIndexOptions(IndexOptions.DOCS_ONLY);
ft.freeze();
Field f = newField("foo", "bar", ft);
doc.add(f);
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
assertEquals(-1, ir.totalTermFreq(new Term("foo", new BytesRef("bar"))));
assertEquals(-1, ir.getSumTotalTermFreq("foo"));
ir.close();
dir.close();
}
项目:search
文件:SimpleTextFieldsWriter.java
@Override
public void startDoc(int docID, int termDocFreq) throws IOException {
if (!wroteTerm) {
// we lazily do this, in case the term had zero docs
write(TERM);
write(term);
newline();
wroteTerm = true;
}
write(DOC);
write(Integer.toString(docID));
newline();
if (indexOptions != IndexOptions.DOCS_ONLY) {
write(FREQ);
write(Integer.toString(termDocFreq));
newline();
}
lastStartOffset = 0;
}
项目:search
文件:SimpleTextFieldsReader.java
@Override
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
// Positions were not indexed
return null;
}
SimpleTextDocsAndPositionsEnum docsAndPositionsEnum;
if (reuse != null && reuse instanceof SimpleTextDocsAndPositionsEnum && ((SimpleTextDocsAndPositionsEnum) reuse).canReuse(SimpleTextFieldsReader.this.in)) {
docsAndPositionsEnum = (SimpleTextDocsAndPositionsEnum) reuse;
} else {
docsAndPositionsEnum = new SimpleTextDocsAndPositionsEnum();
}
return docsAndPositionsEnum.reset(docsStart, liveDocs, indexOptions, docFreq);
}
项目:search
文件:TestPostingsOffsets.java
private void checkTokens(Token[] tokens) throws IOException {
Directory dir = newDirectory();
RandomIndexWriter riw = new RandomIndexWriter(random(), dir, iwc);
boolean success = false;
try {
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
// store some term vectors for the checkindex cross-check
ft.setStoreTermVectors(true);
ft.setStoreTermVectorPositions(true);
ft.setStoreTermVectorOffsets(true);
Document doc = new Document();
doc.add(new Field("body", new CannedTokenStream(tokens), ft));
riw.addDocument(doc);
success = true;
} finally {
if (success) {
IOUtils.close(riw, dir);
} else {
IOUtils.closeWhileHandlingException(riw, dir);
}
}
}
项目:search
文件:SepPostingsWriter.java
@Override
public void startTerm() throws IOException {
docIndex.mark();
//System.out.println("SEPW: startTerm docIndex=" + docIndex);
if (indexOptions != IndexOptions.DOCS_ONLY) {
freqIndex.mark();
}
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
posIndex.mark();
payloadStart = payloadOut.getFilePointer();
lastPayloadLength = -1;
}
skipListWriter.resetSkip(docIndex, freqIndex, posIndex);
}
项目:search
文件:SepPostingsWriter.java
/** Adds a new doc in this term. If this returns null
* then we just skip consuming positions/payloads. */
@Override
public void startDoc(int docID, int termDocFreq) throws IOException {
final int delta = docID - lastDocID;
//System.out.println("SEPW: startDoc: write doc=" + docID + " delta=" + delta + " out.fp=" + docOut);
if (docID < 0 || (df > 0 && delta <= 0)) {
throw new CorruptIndexException("docs out of order (" + docID + " <= " + lastDocID + " ) (docOut: " + docOut + ")");
}
if ((++df % skipInterval) == 0) {
// TODO: -- awkward we have to make these two
// separate calls to skipper
//System.out.println(" buffer skip lastDocID=" + lastDocID);
skipListWriter.setSkipData(lastDocID, storePayloads, lastPayloadLength);
skipListWriter.bufferSkip(df);
}
lastDocID = docID;
docOut.write(delta);
if (indexOptions != IndexOptions.DOCS_ONLY) {
//System.out.println(" sepw startDoc: write freq=" + termDocFreq);
freqOut.write(termDocFreq);
}
}
项目:search
文件:TestPostingsOffsets.java
public void testLegalbutVeryLargeOffsets() throws Exception {
Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
Document doc = new Document();
Token t1 = new Token("foo", 0, Integer.MAX_VALUE-500);
if (random().nextBoolean()) {
t1.setPayload(new BytesRef("test"));
}
Token t2 = new Token("foo", Integer.MAX_VALUE-500, Integer.MAX_VALUE);
TokenStream tokenStream = new CannedTokenStream(
new Token[] { t1, t2 }
);
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
// store some term vectors for the checkindex cross-check
ft.setStoreTermVectors(true);
ft.setStoreTermVectorPositions(true);
ft.setStoreTermVectorOffsets(true);
Field field = new Field("foo", tokenStream, ft);
doc.add(field);
iw.addDocument(doc);
iw.close();
dir.close();
}
项目:search
文件:SortingAtomicReader.java
@Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, final int flags) throws IOException {
final DocsEnum inReuse;
final SortingDocsEnum wrapReuse;
if (reuse != null && reuse instanceof SortingDocsEnum) {
// if we're asked to reuse the given DocsEnum and it is Sorting, return
// the wrapped one, since some Codecs expect it.
wrapReuse = (SortingDocsEnum) reuse;
inReuse = wrapReuse.getWrapped();
} else {
wrapReuse = null;
inReuse = reuse;
}
final DocsEnum inDocs = in.docs(newToOld(liveDocs), inReuse, flags);
final boolean withFreqs = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >=0 && (flags & DocsEnum.FLAG_FREQS) != 0;
return new SortingDocsEnum(docMap.size(), wrapReuse, inDocs, withFreqs, docMap);
}
项目:lams
文件:IntersectTermsEnum.java
@Override
public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
if (fr.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
// Positions were not indexed:
return null;
}
currentFrame.decodeMetaData();
return fr.parent.postingsReader.docsAndPositions(fr.fieldInfo, currentFrame.termState, skipDocs, reuse, flags);
}
项目:lams
文件:SegmentTermDocs.java
/** Optimized implementation. */
public int read(final int[] docs, final int[] freqs)
throws IOException {
final int length = docs.length;
if (indexOptions == IndexOptions.DOCS_ONLY) {
return readNoTf(docs, freqs, length);
} else {
int i = 0;
while (i < length && count < df) {
// manually inlined call to next() for speed
final int docCode = freqStream.readVInt();
doc += docCode >>> 1; // shift off low bit
if ((docCode & 1) != 0) // if low bit is set
freq = 1; // freq is one
else
freq = freqStream.readVInt(); // else read freq
count++;
if (liveDocs == null || liveDocs.get(doc)) {
docs[i] = doc;
freqs[i] = freq;
++i;
}
}
return i;
}
}
项目:lams
文件:Lucene3xFields.java
@Override
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
PreDocsAndPositionsEnum docsPosEnum;
if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
return null;
} else if (reuse == null || !(reuse instanceof PreDocsAndPositionsEnum)) {
docsPosEnum = new PreDocsAndPositionsEnum();
} else {
docsPosEnum = (PreDocsAndPositionsEnum) reuse;
if (docsPosEnum.getFreqStream() != freqStream) {
docsPosEnum = new PreDocsAndPositionsEnum();
}
}
return docsPosEnum.reset(termEnum, liveDocs);
}
项目:lams
文件:SegmentTermPositions.java
public final int nextPosition() throws IOException {
if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
// This field does not store positions, payloads
return 0;
// perform lazy skips if necessary
lazySkip();
proxCount--;
return position += readDeltaPosition();
}
项目:lams
文件:Lucene41PostingsReader.java
private void _decodeTerm(DataInput in, FieldInfo fieldInfo, IntBlockTermState termState) throws IOException {
final boolean fieldHasPositions = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
final boolean fieldHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
final boolean fieldHasPayloads = fieldInfo.hasPayloads();
if (termState.docFreq == 1) {
termState.singletonDocID = in.readVInt();
} else {
termState.singletonDocID = -1;
termState.docStartFP += in.readVLong();
}
if (fieldHasPositions) {
termState.posStartFP += in.readVLong();
if (termState.totalTermFreq > BLOCK_SIZE) {
termState.lastPosBlockOffset = in.readVLong();
} else {
termState.lastPosBlockOffset = -1;
}
if ((fieldHasPayloads || fieldHasOffsets) && termState.totalTermFreq >= BLOCK_SIZE) {
termState.payStartFP += in.readVLong();
}
}
if (termState.docFreq > BLOCK_SIZE) {
termState.skipOffset = in.readVLong();
} else {
termState.skipOffset = -1;
}
}
项目:lams
文件:Lucene41PostingsReader.java
@Override
public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs,
DocsAndPositionsEnum reuse, int flags)
throws IOException {
boolean indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
boolean indexHasPayloads = fieldInfo.hasPayloads();
if ((!indexHasOffsets || (flags & DocsAndPositionsEnum.FLAG_OFFSETS) == 0) &&
(!indexHasPayloads || (flags & DocsAndPositionsEnum.FLAG_PAYLOADS) == 0)) {
BlockDocsAndPositionsEnum docsAndPositionsEnum;
if (reuse instanceof BlockDocsAndPositionsEnum) {
docsAndPositionsEnum = (BlockDocsAndPositionsEnum) reuse;
if (!docsAndPositionsEnum.canReuse(docIn, fieldInfo)) {
docsAndPositionsEnum = new BlockDocsAndPositionsEnum(fieldInfo);
}
} else {
docsAndPositionsEnum = new BlockDocsAndPositionsEnum(fieldInfo);
}
return docsAndPositionsEnum.reset(liveDocs, (IntBlockTermState) termState);
} else {
EverythingEnum everythingEnum;
if (reuse instanceof EverythingEnum) {
everythingEnum = (EverythingEnum) reuse;
if (!everythingEnum.canReuse(docIn, fieldInfo)) {
everythingEnum = new EverythingEnum(fieldInfo);
}
} else {
everythingEnum = new EverythingEnum(fieldInfo);
}
return everythingEnum.reset(liveDocs, (IntBlockTermState) termState, flags);
}
}
项目:lams
文件:Lucene41PostingsReader.java
public BlockDocsEnum(FieldInfo fieldInfo) throws IOException {
this.startDocIn = Lucene41PostingsReader.this.docIn;
this.docIn = null;
indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
indexHasPos = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
indexHasPayloads = fieldInfo.hasPayloads();
encoded = new byte[MAX_ENCODED_SIZE];
}
项目:lams
文件:Lucene41PostingsReader.java
public BlockDocsAndPositionsEnum(FieldInfo fieldInfo) throws IOException {
this.startDocIn = Lucene41PostingsReader.this.docIn;
this.docIn = null;
this.posIn = Lucene41PostingsReader.this.posIn.clone();
encoded = new byte[MAX_ENCODED_SIZE];
indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
indexHasPayloads = fieldInfo.hasPayloads();
}
项目:lams
文件:Lucene41PostingsReader.java
public EverythingEnum(FieldInfo fieldInfo) throws IOException {
this.startDocIn = Lucene41PostingsReader.this.docIn;
this.docIn = null;
this.posIn = Lucene41PostingsReader.this.posIn.clone();
this.payIn = Lucene41PostingsReader.this.payIn.clone();
encoded = new byte[MAX_ENCODED_SIZE];
indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
if (indexHasOffsets) {
offsetStartDeltaBuffer = new int[MAX_DATA_SIZE];
offsetLengthBuffer = new int[MAX_DATA_SIZE];
} else {
offsetStartDeltaBuffer = null;
offsetLengthBuffer = null;
startOffset = -1;
endOffset = -1;
}
indexHasPayloads = fieldInfo.hasPayloads();
if (indexHasPayloads) {
payloadLengthBuffer = new int[MAX_DATA_SIZE];
payloadBytes = new byte[128];
payload = new BytesRef();
} else {
payloadLengthBuffer = null;
payloadBytes = null;
payload = null;
}
}
项目:lams
文件:FieldInfos.java
private FieldInfo addOrUpdateInternal(String name, int preferredFieldNumber, boolean isIndexed,
boolean storeTermVector,
boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, DocValuesType docValues, DocValuesType normType) {
FieldInfo fi = fieldInfo(name);
if (fi == null) {
// This field wasn't yet added to this in-RAM
// segment's FieldInfo, so now we get a global
// number for this field. If the field was seen
// before then we'll get the same name and number,
// else we'll allocate a new one:
final int fieldNumber = globalFieldNumbers.addOrGet(name, preferredFieldNumber, docValues);
fi = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, docValues, normType, -1, null);
assert !byName.containsKey(fi.name);
globalFieldNumbers.verifyConsistent(Integer.valueOf(fi.number), fi.name, fi.getDocValuesType());
byName.put(fi.name, fi);
} else {
fi.update(isIndexed, storeTermVector, omitNorms, storePayloads, indexOptions);
if (docValues != null) {
// Only pay the synchronization cost if fi does not already have a DVType
boolean updateGlobal = !fi.hasDocValues();
if (updateGlobal) {
// Must also update docValuesType map so it's
// aware of this field's DocValueType. This will throw IllegalArgumentException if
// an illegal type change was attempted.
globalFieldNumbers.setDocValuesType(fi.number, name, docValues);
}
fi.setDocValuesType(docValues); // this will also perform the consistency check.
}
if (!fi.omitsNorms() && normType != null) {
fi.setNormValueType(normType);
}
}
return fi;
}
项目:search
文件:Lucene41PostingsReader.java
public BlockDocsEnum(FieldInfo fieldInfo) throws IOException {
this.startDocIn = Lucene41PostingsReader.this.docIn;
this.docIn = null;
indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
indexHasPos = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
indexHasPayloads = fieldInfo.hasPayloads();
encoded = new byte[MAX_ENCODED_SIZE];
}
项目:search
文件:Lucene41PostingsReader.java
public BlockDocsAndPositionsEnum(FieldInfo fieldInfo) throws IOException {
this.startDocIn = Lucene41PostingsReader.this.docIn;
this.docIn = null;
this.posIn = Lucene41PostingsReader.this.posIn.clone();
encoded = new byte[MAX_ENCODED_SIZE];
indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
indexHasPayloads = fieldInfo.hasPayloads();
}
项目:search
文件:TestIndexWriterExceptions.java
@Nightly
public void testTooManyTokens() throws Exception {
Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
Document doc = new Document();
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
doc.add(new Field("foo", new TokenStream() {
CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
long num = 0;
@Override
public boolean incrementToken() throws IOException {
if (num == Integer.MAX_VALUE + 1) {
return false;
}
clearAttributes();
if (num == 0) {
posIncAtt.setPositionIncrement(1);
} else {
posIncAtt.setPositionIncrement(0);
}
termAtt.append("a");
num++;
if (VERBOSE && num % 1000000 == 0) {
System.out.println("indexed: " + num);
}
return true;
}
}, ft));
try {
iw.addDocument(doc);
fail("didn't hit exception");
} catch (IllegalArgumentException expected) {
assertTrue(expected.getMessage().contains("too many tokens"));
}
iw.close();
dir.close();
}
项目:search
文件:TestPostingsHighlighter.java
public void testOneSentence() throws Exception {
Directory dir = newDirectory();
// use simpleanalyzer for more natural tokenization (else "test." is a token)
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Field body = new Field("body", "", offsetsType);
Document doc = new Document();
doc.add(body);
body.setStringValue("This is a test.");
iw.addDocument(doc);
body.setStringValue("Test a one sentence document.");
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
PostingsHighlighter highlighter = new PostingsHighlighter();
Query query = new TermQuery(new Term("body", "test"));
TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits);
String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
assertEquals(2, snippets.length);
assertEquals("This is a <b>test</b>.", snippets[0]);
assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
ir.close();
dir.close();
}
项目:search
文件:TestPostingsHighlighter.java
public void testMultipleFields() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Field body = new Field("body", "", offsetsType);
Field title = new Field("title", "", offsetsType);
Document doc = new Document();
doc.add(body);
doc.add(title);
body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
title.setStringValue("I am hoping for the best.");
iw.addDocument(doc);
body.setStringValue("Highlighting the first term. Hope it works.");
title.setStringValue("But best may not be good enough.");
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
PostingsHighlighter highlighter = new PostingsHighlighter();
BooleanQuery query = new BooleanQuery();
query.add(new TermQuery(new Term("body", "highlighting")), BooleanClause.Occur.SHOULD);
query.add(new TermQuery(new Term("title", "best")), BooleanClause.Occur.SHOULD);
TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits);
Map<String,String[]> snippets = highlighter.highlightFields(new String [] { "body", "title" }, query, searcher, topDocs);
assertEquals(2, snippets.size());
assertEquals("Just a test <b>highlighting</b> from postings. ", snippets.get("body")[0]);
assertEquals("<b>Highlighting</b> the first term. ", snippets.get("body")[1]);
assertEquals("I am hoping for the <b>best</b>.", snippets.get("title")[0]);
assertEquals("But <b>best</b> may not be good enough.", snippets.get("title")[1]);
ir.close();
dir.close();
}
项目:search
文件:TestPostingsHighlighter.java
public void testMultiplePassages() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Field body = new Field("body", "", offsetsType);
Document doc = new Document();
doc.add(body);
body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
iw.addDocument(doc);
body.setStringValue("This test is another test. Not a good sentence. Test test test test.");
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
PostingsHighlighter highlighter = new PostingsHighlighter();
Query query = new TermQuery(new Term("body", "test"));
TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits);
String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
assertEquals(2, snippets.length);
assertEquals("This is a <b>test</b>. Just a <b>test</b> highlighting from postings. ", snippets[0]);
assertEquals("This <b>test</b> is another <b>test</b>. ... <b>Test</b> <b>test</b> <b>test</b> <b>test</b>.", snippets[1]);
ir.close();
dir.close();
}
项目:search
文件:TestPostingsHighlighter.java
public void testBuddhism() throws Exception {
String text = "This eight-volume set brings together seminal papers in Buddhist studies from a vast " +
"range of academic disciplines published over the last forty years. With a new introduction " +
"by the editor, this collection is a unique and unrivalled research resource for both " +
"student and scholar. Coverage includes: - Buddhist origins; early history of Buddhism in " +
"South and Southeast Asia - early Buddhist Schools and Doctrinal History; Theravada Doctrine " +
"- the Origins and nature of Mahayana Buddhism; some Mahayana religious topics - Abhidharma " +
"and Madhyamaka - Yogacara, the Epistemological tradition, and Tathagatagarbha - Tantric " +
"Buddhism (Including China and Japan); Buddhism in Nepal and Tibet - Buddhism in South and " +
"Southeast Asia, and - Buddhism in China, East Asia, and Japan.";
Directory dir = newDirectory();
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, analyzer);
FieldType positionsType = new FieldType(TextField.TYPE_STORED);
positionsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Field body = new Field("body", text, positionsType);
Document document = new Document();
document.add(body);
iw.addDocument(document);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
PhraseQuery query = new PhraseQuery();
query.add(new Term("body", "buddhist"));
query.add(new Term("body", "origins"));
TopDocs topDocs = searcher.search(query, 10);
assertEquals(1, topDocs.totalHits);
PostingsHighlighter highlighter = new PostingsHighlighter();
String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
assertEquals(1, snippets.length);
assertTrue(snippets[0].contains("<b>Buddhist</b> <b>origins</b>"));
ir.close();
dir.close();
}
项目:search
文件:TestFieldsReader.java
public void test() throws IOException {
assertTrue(dir != null);
assertTrue(fieldInfos != null);
IndexReader reader = DirectoryReader.open(dir);
Document doc = reader.document(0);
assertTrue(doc != null);
assertTrue(doc.getField(DocHelper.TEXT_FIELD_1_KEY) != null);
Field field = (Field) doc.getField(DocHelper.TEXT_FIELD_2_KEY);
assertTrue(field != null);
assertTrue(field.fieldType().storeTermVectors());
assertFalse(field.fieldType().omitNorms());
assertTrue(field.fieldType().indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
field = (Field) doc.getField(DocHelper.TEXT_FIELD_3_KEY);
assertTrue(field != null);
assertFalse(field.fieldType().storeTermVectors());
assertTrue(field.fieldType().omitNorms());
assertTrue(field.fieldType().indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
field = (Field) doc.getField(DocHelper.NO_TF_KEY);
assertTrue(field != null);
assertFalse(field.fieldType().storeTermVectors());
assertFalse(field.fieldType().omitNorms());
assertTrue(field.fieldType().indexOptions() == IndexOptions.DOCS_ONLY);
DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(DocHelper.TEXT_FIELD_3_KEY);
reader.document(0, visitor);
final List<IndexableField> fields = visitor.getDocument().getFields();
assertEquals(1, fields.size());
assertEquals(DocHelper.TEXT_FIELD_3_KEY, fields.get(0).name());
reader.close();
}
项目:search
文件:FieldInfos.java
private FieldInfo addOrUpdateInternal(String name, int preferredFieldNumber, boolean isIndexed,
boolean storeTermVector,
boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, DocValuesType docValues, DocValuesType normType) {
FieldInfo fi = fieldInfo(name);
if (fi == null) {
// This field wasn't yet added to this in-RAM
// segment's FieldInfo, so now we get a global
// number for this field. If the field was seen
// before then we'll get the same name and number,
// else we'll allocate a new one:
final int fieldNumber = globalFieldNumbers.addOrGet(name, preferredFieldNumber, docValues);
fi = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, docValues, normType, -1, null);
assert !byName.containsKey(fi.name);
globalFieldNumbers.verifyConsistent(Integer.valueOf(fi.number), fi.name, fi.getDocValuesType());
byName.put(fi.name, fi);
} else {
fi.update(isIndexed, storeTermVector, omitNorms, storePayloads, indexOptions);
if (docValues != null) {
// Only pay the synchronization cost if fi does not already have a DVType
boolean updateGlobal = !fi.hasDocValues();
if (updateGlobal) {
// Must also update docValuesType map so it's
// aware of this field's DocValueType. This will throw IllegalArgumentException if
// an illegal type change was attempted.
globalFieldNumbers.setDocValuesType(fi.number, name, docValues);
}
fi.setDocValuesType(docValues); // this will also perform the consistency check.
}
if (!fi.omitsNorms() && normType != null) {
fi.setNormValueType(normType);
}
}
return fi;
}