Java 类org.apache.lucene.index.DocsEnum 实例源码
项目:lams
文件:MultiPhraseQuery.java
public UnionDocsAndPositionsEnum(Bits liveDocs, AtomicReaderContext context, Term[] terms, Map<Term,TermContext> termContexts, TermsEnum termsEnum) throws IOException {
List<DocsAndPositionsEnum> docsEnums = new LinkedList<>();
for (int i = 0; i < terms.length; i++) {
final Term term = terms[i];
TermState termState = termContexts.get(term).get(context.ord);
if (termState == null) {
// Term doesn't exist in reader
continue;
}
termsEnum.seekExact(term.bytes(), termState);
DocsAndPositionsEnum postings = termsEnum.docsAndPositions(liveDocs, null, DocsEnum.FLAG_NONE);
if (postings == null) {
// term does exist, but has no positions
throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")");
}
cost += postings.cost();
docsEnums.add(postings);
}
_queue = new DocsQueue(docsEnums);
_posList = new IntQueue();
}
项目:fangorn
文件:IndexTestCase.java
protected DocsAndPositionsEnum getPosEnum(IndexReader r, int docid, Term t)
throws IOException {
List<AtomicReaderContext> leaves = r.getContext().leaves();
for (AtomicReaderContext context : leaves) {
AtomicReader reader = context.reader();
DocsAndPositionsEnum termPositions = reader.termPositionsEnum(t);
int doc;
while ((doc = termPositions.nextDoc()) != DocsEnum.NO_MORE_DOCS
&& doc != docid) {
}
if (doc != DocsEnum.NO_MORE_DOCS) {
return termPositions;
}
}
assertFalse("Expected positions enum for doc " + docid, true);
return null; // will never come here
}
项目:search
文件:MemoryIndexTest.java
public void testDocsEnumStart() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
MemoryIndex memory = new MemoryIndex(random().nextBoolean(), random().nextInt(50) * 1024 * 1024);
memory.addField("foo", "bar", analyzer);
AtomicReader reader = (AtomicReader) memory.createSearcher().getIndexReader();
DocsEnum disi = TestUtil.docs(random(), reader, "foo", new BytesRef("bar"), null, null, DocsEnum.FLAG_NONE);
int docid = disi.docID();
assertEquals(-1, docid);
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
// now reuse and check again
TermsEnum te = reader.terms("foo").iterator(null);
assertTrue(te.seekExact(new BytesRef("bar")));
disi = te.docs(null, disi, DocsEnum.FLAG_NONE);
docid = disi.docID();
assertEquals(-1, docid);
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
reader.close();
}
项目:search
文件:SepPostingsReader.java
@Override
public DocsEnum docs(FieldInfo fieldInfo, BlockTermState _termState, Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
final SepTermState termState = (SepTermState) _termState;
SepDocsEnum docsEnum;
if (reuse == null || !(reuse instanceof SepDocsEnum)) {
docsEnum = new SepDocsEnum();
} else {
docsEnum = (SepDocsEnum) reuse;
if (docsEnum.startDocIn != docIn) {
// If you are using ParellelReader, and pass in a
// reused DocsAndPositionsEnum, it could have come
// from another reader also using sep codec
docsEnum = new SepDocsEnum();
}
}
return docsEnum.init(fieldInfo, termState, liveDocs);
}
项目:search
文件:TermFilter.java
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException {
Terms terms = context.reader().terms(term.field());
if (terms == null) {
return null;
}
final TermsEnum termsEnum = terms.iterator(null);
if (!termsEnum.seekExact(term.bytes())) {
return null;
}
return new DocIdSet() {
@Override
public DocIdSetIterator iterator() throws IOException {
return termsEnum.docs(acceptDocs, null, DocsEnum.FLAG_NONE);
}
};
}
项目:search
文件:ContainsPrefixTreeFilter.java
private SmallDocSet collectDocs(Bits acceptContains) throws IOException {
SmallDocSet set = null;
docsEnum = termsEnum.docs(acceptContains, docsEnum, DocsEnum.FLAG_NONE);
int docid;
while ((docid = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (set == null) {
int size = termsEnum.docFreq();
if (size <= 0)
size = 16;
set = new SmallDocSet(size);
}
set.set(docid);
}
return set;
}
项目:search
文件:SortingAtomicReader.java
@Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, final int flags) throws IOException {
final DocsEnum inReuse;
final SortingDocsEnum wrapReuse;
if (reuse != null && reuse instanceof SortingDocsEnum) {
// if we're asked to reuse the given DocsEnum and it is Sorting, return
// the wrapped one, since some Codecs expect it.
wrapReuse = (SortingDocsEnum) reuse;
inReuse = wrapReuse.getWrapped();
} else {
wrapReuse = null;
inReuse = reuse;
}
final DocsEnum inDocs = in.docs(newToOld(liveDocs), inReuse, flags);
final boolean withFreqs = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >=0 && (flags & DocsEnum.FLAG_FREQS) != 0;
return new SortingDocsEnum(docMap.size(), wrapReuse, inDocs, withFreqs, docMap);
}
项目:search
文件:TermsIncludingScoreQuery.java
@Override
public boolean score(Collector collector, int max) throws IOException {
FakeScorer fakeScorer = new FakeScorer();
collector.setScorer(fakeScorer);
if (doc == -1) {
doc = nextDocOutOfOrder();
}
while(doc < max) {
fakeScorer.doc = doc;
fakeScorer.score = scores[ords[scoreUpto]];
collector.collect(doc);
doc = nextDocOutOfOrder();
}
return doc != DocsEnum.NO_MORE_DOCS;
}
项目:search
文件:TermsIncludingScoreQuery.java
int nextDocOutOfOrder() throws IOException {
while (true) {
if (docsEnum != null) {
int docId = docsEnumNextDoc();
if (docId == DocIdSetIterator.NO_MORE_DOCS) {
docsEnum = null;
} else {
return doc = docId;
}
}
if (upto == terms.size()) {
return doc = DocIdSetIterator.NO_MORE_DOCS;
}
scoreUpto = upto;
if (termsEnum.seekExact(terms.get(ords[upto++], spare))) {
docsEnum = reuse = termsEnum.docs(acceptDocs, reuse, DocsEnum.FLAG_NONE);
}
}
}
项目:search
文件:TermsIncludingScoreQuery.java
protected void fillDocsAndScores(FixedBitSet matchingDocs, Bits acceptDocs, TermsEnum termsEnum) throws IOException {
BytesRef spare = new BytesRef();
DocsEnum docsEnum = null;
for (int i = 0; i < terms.size(); i++) {
if (termsEnum.seekExact(terms.get(ords[i], spare))) {
docsEnum = termsEnum.docs(acceptDocs, docsEnum, DocsEnum.FLAG_NONE);
float score = TermsIncludingScoreQuery.this.scores[ords[i]];
for (int doc = docsEnum.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docsEnum.nextDoc()) {
matchingDocs.set(doc);
// In the case the same doc is also related to a another doc, a score might be overwritten. I think this
// can only happen in a many-to-many relation
scores[doc] = score;
}
}
}
}
项目:search
文件:TermsIncludingScoreQuery.java
@Override
protected void fillDocsAndScores(FixedBitSet matchingDocs, Bits acceptDocs, TermsEnum termsEnum) throws IOException {
BytesRef spare = new BytesRef();
DocsEnum docsEnum = null;
for (int i = 0; i < terms.size(); i++) {
if (termsEnum.seekExact(terms.get(ords[i], spare))) {
docsEnum = termsEnum.docs(acceptDocs, docsEnum, DocsEnum.FLAG_NONE);
float score = TermsIncludingScoreQuery.this.scores[ords[i]];
for (int doc = docsEnum.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docsEnum.nextDoc()) {
// I prefer this:
/*if (scores[doc] < score) {
scores[doc] = score;
matchingDocs.set(doc);
}*/
// But this behaves the same as MVInnerScorer and only then the tests will pass:
if (!matchingDocs.get(doc)) {
scores[doc] = score;
matchingDocs.set(doc);
}
}
}
}
}
项目:search
文件:TestIDVersionPostingsFormat.java
/** Returns docID if found, else -1. */
public int lookup(BytesRef id, long version) throws IOException {
for(int seg=0;seg<numSegs;seg++) {
if (((IDVersionSegmentTermsEnum) termsEnums[seg]).seekExact(id, version)) {
if (VERBOSE) {
System.out.println(" found in seg=" + termsEnums[seg]);
}
docsEnums[seg] = termsEnums[seg].docs(liveDocs[seg], docsEnums[seg], 0);
int docID = docsEnums[seg].nextDoc();
if (docID != DocsEnum.NO_MORE_DOCS) {
lastVersion = ((IDVersionSegmentTermsEnum) termsEnums[seg]).getVersion();
return docBases[seg] + docID;
}
assert hasDeletions;
}
}
return -1;
}
项目:search
文件:TestUtil.java
public static DocsEnum docs(Random random, TermsEnum termsEnum, Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
if (random.nextBoolean()) {
if (random.nextBoolean()) {
final int posFlags;
switch (random.nextInt(4)) {
case 0: posFlags = 0; break;
case 1: posFlags = DocsAndPositionsEnum.FLAG_OFFSETS; break;
case 2: posFlags = DocsAndPositionsEnum.FLAG_PAYLOADS; break;
default: posFlags = DocsAndPositionsEnum.FLAG_OFFSETS | DocsAndPositionsEnum.FLAG_PAYLOADS; break;
}
// TODO: cast to DocsAndPositionsEnum?
DocsAndPositionsEnum docsAndPositions = termsEnum.docsAndPositions(liveDocs, null, posFlags);
if (docsAndPositions != null) {
return docsAndPositions;
}
}
flags |= DocsEnum.FLAG_FREQS;
}
return termsEnum.docs(liveDocs, reuse, flags);
}
项目:search
文件:LuceneTestCase.java
/**
* checks docs + freqs, sequentially
*/
public void assertDocsEnumEquals(String info, DocsEnum leftDocs, DocsEnum rightDocs, boolean hasFreqs) throws IOException {
if (leftDocs == null) {
assertNull(rightDocs);
return;
}
assertEquals(info, -1, leftDocs.docID());
assertEquals(info, -1, rightDocs.docID());
int docid;
while ((docid = leftDocs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
assertEquals(info, docid, rightDocs.nextDoc());
if (hasFreqs) {
assertEquals(info, leftDocs.freq(), rightDocs.freq());
}
}
assertEquals(info, DocIdSetIterator.NO_MORE_DOCS, rightDocs.nextDoc());
}
项目:search
文件:MultiPhraseQuery.java
public UnionDocsAndPositionsEnum(Bits liveDocs, AtomicReaderContext context, Term[] terms, Map<Term,TermContext> termContexts, TermsEnum termsEnum) throws IOException {
List<DocsAndPositionsEnum> docsEnums = new LinkedList<>();
for (int i = 0; i < terms.length; i++) {
final Term term = terms[i];
TermState termState = termContexts.get(term).get(context.ord);
if (termState == null) {
// Term doesn't exist in reader
continue;
}
termsEnum.seekExact(term.bytes(), termState);
DocsAndPositionsEnum postings = termsEnum.docsAndPositions(liveDocs, null, DocsEnum.FLAG_NONE);
if (postings == null) {
// term does exist, but has no positions
throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")");
}
cost += postings.cost();
docsEnums.add(postings);
}
_queue = new DocsQueue(docsEnums);
_posList = new IntQueue();
}
项目:search
文件:TestReuseDocsEnum.java
public void testReuseDocsEnumNoReuse() throws IOException {
Directory dir = newDirectory();
Codec cp = TestUtil.alwaysPostingsFormat(new Lucene40RWPostingsFormat());
RandomIndexWriter writer = new RandomIndexWriter(random(), dir,
newIndexWriterConfig(new MockAnalyzer(random())).setCodec(cp));
int numdocs = atLeast(20);
createRandomIndex(numdocs, writer, random());
writer.commit();
DirectoryReader open = DirectoryReader.open(dir);
for (AtomicReaderContext ctx : open.leaves()) {
AtomicReader indexReader = ctx.reader();
Terms terms = indexReader.terms("body");
TermsEnum iterator = terms.iterator(null);
IdentityHashMap<DocsEnum, Boolean> enums = new IdentityHashMap<>();
MatchNoBits bits = new Bits.MatchNoBits(indexReader.maxDoc());
while ((iterator.next()) != null) {
DocsEnum docs = iterator.docs(random().nextBoolean() ? bits : new Bits.MatchNoBits(indexReader.maxDoc()), null, random().nextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE);
enums.put(docs, true);
}
assertEquals(terms.size(), enums.size());
}
IOUtils.close(writer, open, dir);
}
项目:search
文件:SolrIndexSearcher.java
/**
* Returns the first document number containing the term <code>t</code>
* Returns -1 if no document was found.
* This method is primarily intended for clients that want to fetch
* documents using a unique identifier."
* @return the first document number containing the term
*/
public int getFirstMatch(Term t) throws IOException {
Fields fields = atomicReader.fields();
if (fields == null) return -1;
Terms terms = fields.terms(t.field());
if (terms == null) return -1;
BytesRef termBytes = t.bytes();
final TermsEnum termsEnum = terms.iterator(null);
if (!termsEnum.seekExact(termBytes)) {
return -1;
}
DocsEnum docs = termsEnum.docs(atomicReader.getLiveDocs(), null, DocsEnum.FLAG_NONE);
if (docs == null) return -1;
int id = docs.nextDoc();
return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}
项目:search
文件:TestRTGBase.java
protected int getFirstMatch(IndexReader r, Term t) throws IOException {
Fields fields = MultiFields.getFields(r);
if (fields == null) return -1;
Terms terms = fields.terms(t.field());
if (terms == null) return -1;
BytesRef termBytes = t.bytes();
final TermsEnum termsEnum = terms.iterator(null);
if (!termsEnum.seekExact(termBytes)) {
return -1;
}
DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(r), null, DocsEnum.FLAG_NONE);
int id = docs.nextDoc();
if (id != DocIdSetIterator.NO_MORE_DOCS) {
int next = docs.nextDoc();
assertEquals(DocIdSetIterator.NO_MORE_DOCS, next);
}
return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}
项目:Palmetto
文件:LuceneCorpusAdapter.java
@Override
public void getDocumentsWithWordAsSet(String word, IntOpenHashSet documents) {
DocsEnum docs = null;
Term term = new Term(fieldName, word);
try {
int baseDocId;
for (int i = 0; i < reader.length; i++) {
docs = reader[i].termDocsEnum(term);
baseDocId = contexts[i].docBase;
if (docs != null) {
while (docs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
documents.add(baseDocId + docs.docID());
}
}
}
} catch (IOException e) {
LOGGER.error("Error while requesting documents for word \"" + word + "\".", e);
}
}
项目:NYBC
文件:MemoryIndexTest.java
public void testDocsEnumStart() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
MemoryIndex memory = new MemoryIndex(random().nextBoolean(), random().nextInt(50) * 1024 * 1024);
memory.addField("foo", "bar", analyzer);
AtomicReader reader = (AtomicReader) memory.createSearcher().getIndexReader();
DocsEnum disi = _TestUtil.docs(random(), reader, "foo", new BytesRef("bar"), null, null, DocsEnum.FLAG_NONE);
int docid = disi.docID();
assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS);
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
// now reuse and check again
TermsEnum te = reader.terms("foo").iterator(null);
assertTrue(te.seekExact(new BytesRef("bar"), true));
disi = te.docs(null, disi, DocsEnum.FLAG_NONE);
docid = disi.docID();
assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS);
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
reader.close();
}
项目:read-open-source-code
文件:ContainsPrefixTreeFilter.java
private SmallDocSet collectDocs(Bits acceptContains) throws IOException {
SmallDocSet set = null;
docsEnum = termsEnum.docs(acceptContains, docsEnum, DocsEnum.FLAG_NONE);
int docid;
while ((docid = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (set == null) {
int size = termsEnum.docFreq();
if (size <= 0)
size = 16;
set = new SmallDocSet(size);
}
set.set(docid);
}
return set;
}
项目:read-open-source-code
文件:SepPostingsReader.java
@Override
public DocsEnum docs(FieldInfo fieldInfo, BlockTermState _termState, Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
final SepTermState termState = (SepTermState) _termState;
SepDocsEnum docsEnum;
if (reuse == null || !(reuse instanceof SepDocsEnum)) {
docsEnum = new SepDocsEnum();
} else {
docsEnum = (SepDocsEnum) reuse;
if (docsEnum.startDocIn != docIn) {
// If you are using ParellelReader, and pass in a
// reused DocsAndPositionsEnum, it could have come
// from another reader also using sep codec
docsEnum = new SepDocsEnum();
}
}
return docsEnum.init(fieldInfo, termState, liveDocs);
}
项目:read-open-source-code
文件:TermsIncludingScoreQuery.java
int nextDocOutOfOrder() throws IOException {
while (true) {
if (docsEnum != null) {
int docId = docsEnumNextDoc();
if (docId == DocIdSetIterator.NO_MORE_DOCS) {
docsEnum = null;
} else {
return doc = docId;
}
}
if (upto == terms.size()) {
return doc = DocIdSetIterator.NO_MORE_DOCS;
}
scoreUpto = upto;
if (termsEnum.seekExact(terms.get(ords[upto++], spare))) {
docsEnum = reuse = termsEnum.docs(acceptDocs, reuse, DocsEnum.FLAG_NONE);
}
}
}
项目:read-open-source-code
文件:SepPostingsReader.java
@Override
public DocsEnum docs(FieldInfo fieldInfo, BlockTermState _termState, Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
final SepTermState termState = (SepTermState) _termState;
SepDocsEnum docsEnum;
if (reuse == null || !(reuse instanceof SepDocsEnum)) {
docsEnum = new SepDocsEnum();
} else {
docsEnum = (SepDocsEnum) reuse;
if (docsEnum.startDocIn != docIn) {
// If you are using ParellelReader, and pass in a
// reused DocsAndPositionsEnum, it could have come
// from another reader also using sep codec
docsEnum = new SepDocsEnum();
}
}
return docsEnum.init(fieldInfo, termState, liveDocs);
}
项目:NYBC
文件:SepPostingsReader.java
@Override
public DocsEnum docs(FieldInfo fieldInfo, BlockTermState _termState, Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
final SepTermState termState = (SepTermState) _termState;
SepDocsEnum docsEnum;
if (reuse == null || !(reuse instanceof SepDocsEnum)) {
docsEnum = new SepDocsEnum();
} else {
docsEnum = (SepDocsEnum) reuse;
if (docsEnum.startDocIn != docIn) {
// If you are using ParellelReader, and pass in a
// reused DocsAndPositionsEnum, it could have come
// from another reader also using sep codec
docsEnum = new SepDocsEnum();
}
}
return docsEnum.init(fieldInfo, termState, liveDocs);
}
项目:read-open-source-code
文件:SolrIndexSearcher.java
/**
* Returns the first document number containing the term <code>t</code>
* Returns -1 if no document was found.
* This method is primarily intended for clients that want to fetch
* documents using a unique identifier."
* @return the first document number containing the term
*/
public int getFirstMatch(Term t) throws IOException {
Fields fields = atomicReader.fields();
if (fields == null) return -1;
Terms terms = fields.terms(t.field());
if (terms == null) return -1;
BytesRef termBytes = t.bytes();
final TermsEnum termsEnum = terms.iterator(null);
if (!termsEnum.seekExact(termBytes)) {
return -1;
}
DocsEnum docs = termsEnum.docs(atomicReader.getLiveDocs(), null, DocsEnum.FLAG_NONE);
if (docs == null) return -1;
int id = docs.nextDoc();
return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}
项目:read-open-source-code
文件:TermsIncludingScoreQuery.java
int nextDocOutOfOrder() throws IOException {
while (true) {
if (docsEnum != null) {
int docId = docsEnumNextDoc();
if (docId == DocIdSetIterator.NO_MORE_DOCS) {
docsEnum = null;
} else {
return doc = docId;
}
}
if (upto == terms.size()) {
return doc = DocIdSetIterator.NO_MORE_DOCS;
}
scoreUpto = upto;
if (termsEnum.seekExact(terms.get(ords[upto++], spare))) {
docsEnum = reuse = termsEnum.docs(acceptDocs, reuse, DocsEnum.FLAG_NONE);
}
}
}
项目:NYBC
文件:HighFreqTerms.java
public static long getTotalTermFreq(IndexReader reader, Term term) throws Exception {
long totalTF = 0L;
for (final AtomicReaderContext ctx : reader.leaves()) {
AtomicReader r = ctx.reader();
if (!r.hasDeletions()) {
// TODO: we could do this up front, during the scan
// (next()), instead of after-the-fact here w/ seek,
// if the codec supports it and there are no del
// docs...
final long totTF = r.totalTermFreq(term);
if (totTF != -1) {
totalTF += totTF;
continue;
} // otherwise we fall-through
}
// note: what should we do if field omits freqs? currently it counts as 1...
DocsEnum de = r.termDocsEnum(term);
if (de != null) {
while (de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS)
totalTF += de.freq();
}
}
return totalTF;
}
项目:read-open-source-code
文件:TermsIncludingScoreQuery.java
protected void fillDocsAndScores(FixedBitSet matchingDocs, Bits acceptDocs, TermsEnum termsEnum) throws IOException {
BytesRef spare = new BytesRef();
DocsEnum docsEnum = null;
for (int i = 0; i < terms.size(); i++) {
if (termsEnum.seekExact(terms.get(ords[i], spare))) {
docsEnum = termsEnum.docs(acceptDocs, docsEnum, DocsEnum.FLAG_NONE);
float score = TermsIncludingScoreQuery.this.scores[ords[i]];
for (int doc = docsEnum.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docsEnum.nextDoc()) {
matchingDocs.set(doc);
// In the case the same doc is also related to a another doc, a score might be overwritten. I think this
// can only happen in a many-to-many relation
scores[doc] = score;
}
}
}
}
项目:NYBC
文件:TestReuseDocsEnum.java
public void testReuseDocsEnumNoReuse() throws IOException {
Directory dir = newDirectory();
Codec cp = _TestUtil.alwaysPostingsFormat(new Lucene40RWPostingsFormat());
RandomIndexWriter writer = new RandomIndexWriter(random(), dir,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setCodec(cp));
int numdocs = atLeast(20);
createRandomIndex(numdocs, writer, random());
writer.commit();
DirectoryReader open = DirectoryReader.open(dir);
for (AtomicReaderContext ctx : open.leaves()) {
AtomicReader indexReader = ctx.reader();
Terms terms = indexReader.terms("body");
TermsEnum iterator = terms.iterator(null);
IdentityHashMap<DocsEnum, Boolean> enums = new IdentityHashMap<DocsEnum, Boolean>();
MatchNoBits bits = new Bits.MatchNoBits(indexReader.maxDoc());
while ((iterator.next()) != null) {
DocsEnum docs = iterator.docs(random().nextBoolean() ? bits : new Bits.MatchNoBits(indexReader.maxDoc()), null, random().nextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE);
enums.put(docs, true);
}
assertEquals(terms.size(), enums.size());
}
IOUtils.close(writer, open, dir);
}
项目:read-open-source-code
文件:TermFilter.java
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException {
Terms terms = context.reader().terms(term.field());
if (terms == null) {
return null;
}
final TermsEnum termsEnum = terms.iterator(null);
if (!termsEnum.seekExact(term.bytes())) {
return null;
}
return new DocIdSet() {
@Override
public DocIdSetIterator iterator() throws IOException {
return termsEnum.docs(acceptDocs, null, DocsEnum.FLAG_NONE);
}
};
}
项目:NYBC
文件:SolrIndexSearcher.java
/**
* Returns the first document number containing the term <code>t</code>
* Returns -1 if no document was found.
* This method is primarily intended for clients that want to fetch
* documents using a unique identifier."
* @return the first document number containing the term
*/
public int getFirstMatch(Term t) throws IOException {
Fields fields = atomicReader.fields();
if (fields == null) return -1;
Terms terms = fields.terms(t.field());
if (terms == null) return -1;
BytesRef termBytes = t.bytes();
final TermsEnum termsEnum = terms.iterator(null);
if (!termsEnum.seekExact(termBytes, false)) {
return -1;
}
DocsEnum docs = termsEnum.docs(atomicReader.getLiveDocs(), null, DocsEnum.FLAG_NONE);
if (docs == null) return -1;
int id = docs.nextDoc();
return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}
项目:NYBC
文件:TestRTGBase.java
protected int getFirstMatch(IndexReader r, Term t) throws IOException {
Fields fields = MultiFields.getFields(r);
if (fields == null) return -1;
Terms terms = fields.terms(t.field());
if (terms == null) return -1;
BytesRef termBytes = t.bytes();
final TermsEnum termsEnum = terms.iterator(null);
if (!termsEnum.seekExact(termBytes, false)) {
return -1;
}
DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(r), null, DocsEnum.FLAG_NONE);
int id = docs.nextDoc();
if (id != DocIdSetIterator.NO_MORE_DOCS) {
int next = docs.nextDoc();
assertEquals(DocIdSetIterator.NO_MORE_DOCS, next);
}
return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}
项目:incubator-blur
文件:TermDocIterableTest.java
@Test
public void testTermDocIterable() throws IOException {
for (int pass = 0; pass < 1; pass++) {
for (int id = 0; id < BLOCKS; id++) {
DocsEnum termDocs = reader.termDocsEnum(new Term("id", Integer.toString(id)));
TermDocIterable iterable = new TermDocIterable(termDocs, reader);
int count = 0;
int i = 0;
long s = System.nanoTime();
for (Document document : iterable) {
count++;
assertEquals(i, Integer.parseInt(document.get("field")));
i++;
}
long time = System.nanoTime() - s;
System.out.println(time / 1000000.0 + " " + id + " " + pass);
assertEquals(COUNT_PER_BLOCK, count);
}
}
}
项目:search-core
文件:SolrIndexSearcher.java
/**
* Returns the first document number containing the term <code>t</code> Returns -1 if no
* document was found. This method is primarily intended for clients that want to fetch
* documents using a unique identifier."
*
* @return the first document number containing the term
*/
public int getFirstMatch(Term t) throws IOException {
Fields fields = atomicReader.fields();
if(fields == null)
return -1;
Terms terms = fields.terms(t.field());
if(terms == null)
return -1;
BytesRef termBytes = t.bytes();
final TermsEnum termsEnum = terms.iterator(null);
if(!termsEnum.seekExact(termBytes, false)) {
return -1;
}
DocsEnum docs = termsEnum.docs(atomicReader.getLiveDocs(), null, DocsEnum.FLAG_NONE);
if(docs == null)
return -1;
int id = docs.nextDoc();
return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}
项目:read-open-source-code
文件:SepPostingsReader.java
@Override
public DocsEnum docs(FieldInfo fieldInfo, BlockTermState _termState, Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
final SepTermState termState = (SepTermState) _termState;
SepDocsEnum docsEnum;
if (reuse == null || !(reuse instanceof SepDocsEnum)) {
docsEnum = new SepDocsEnum();
} else {
docsEnum = (SepDocsEnum) reuse;
if (docsEnum.startDocIn != docIn) {
// If you are using ParellelReader, and pass in a
// reused DocsAndPositionsEnum, it could have come
// from another reader also using sep codec
docsEnum = new SepDocsEnum();
}
}
return docsEnum.init(fieldInfo, termState, liveDocs);
}
项目:lams
文件:Lucene40PostingsReader.java
@Override
public DocsEnum docs(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
if (canReuse(reuse, liveDocs)) {
// if (DEBUG) System.out.println("SPR.docs ts=" + termState);
return ((SegmentDocsEnumBase) reuse).reset(fieldInfo, (StandardTermState)termState);
}
return newDocsEnum(liveDocs, fieldInfo, (StandardTermState)termState);
}
项目:lams
文件:Lucene40PostingsReader.java
private boolean canReuse(DocsEnum reuse, Bits liveDocs) {
if (reuse != null && (reuse instanceof SegmentDocsEnumBase)) {
SegmentDocsEnumBase docsEnum = (SegmentDocsEnumBase) reuse;
// If you are using ParellelReader, and pass in a
// reused DocsEnum, it could have come from another
// reader also using standard codec
if (docsEnum.startFreqIn == freqIn) {
// we only reuse if the the actual the incoming enum has the same liveDocs as the given liveDocs
return liveDocs == docsEnum.liveDocs;
}
}
return false;
}
项目:lams
文件:Lucene40PostingsReader.java
private DocsEnum newDocsEnum(Bits liveDocs, FieldInfo fieldInfo, StandardTermState termState) throws IOException {
if (liveDocs == null) {
return new AllDocsSegmentDocsEnum(freqIn).reset(fieldInfo, termState);
} else {
return new LiveDocsSegmentDocsEnum(freqIn, liveDocs).reset(fieldInfo, termState);
}
}
项目:lams
文件:Lucene40PostingsReader.java
DocsEnum reset(FieldInfo fieldInfo, StandardTermState termState) throws IOException {
indexOmitsTF = fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY;
storePayloads = fieldInfo.hasPayloads();
storeOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
freqOffset = termState.freqOffset;
skipOffset = termState.skipOffset;
// TODO: for full enum case (eg segment merging) this
// seek is unnecessary; maybe we can avoid in such
// cases
freqIn.seek(termState.freqOffset);
limit = termState.docFreq;
assert limit > 0;
ord = 0;
doc = -1;
accum = 0;
// if (DEBUG) System.out.println(" sde limit=" + limit + " freqFP=" + freqOffset);
skipped = false;
start = -1;
count = 0;
freq = 1;
if (indexOmitsTF) {
Arrays.fill(freqs, 1);
}
maxBufferedDocId = -1;
return this;
}