Java 类org.apache.lucene.index.Fields 实例源码
项目:elasticsearch_my
文件:PagedBytesIndexFieldData.java
/**
* @return the estimate for loading the entire term set into field data, or 0 if unavailable
*/
public long estimateStringFieldData() {
try {
LeafReader reader = context.reader();
Terms terms = reader.terms(getFieldName());
Fields fields = reader.fields();
final Terms fieldTerms = fields.terms(getFieldName());
if (fieldTerms instanceof FieldReader) {
final Stats stats = ((FieldReader) fieldTerms).getStats();
long totalTermBytes = stats.totalTermBytes;
if (logger.isTraceEnabled()) {
logger.trace("totalTermBytes: {}, terms.size(): {}, terms.getSumDocFreq(): {}",
totalTermBytes, terms.size(), terms.getSumDocFreq());
}
long totalBytes = totalTermBytes + (2 * terms.size()) + (4 * terms.getSumDocFreq());
return totalBytes;
}
} catch (Exception e) {
logger.warn("Unable to estimate memory overhead", e);
}
return 0;
}
项目:elasticsearch_my
文件:MoreLikeThisQueryBuilderTests.java
@Override
protected MultiTermVectorsResponse executeMultiTermVectors(MultiTermVectorsRequest mtvRequest) {
try {
MultiTermVectorsItemResponse[] responses = new MultiTermVectorsItemResponse[mtvRequest.size()];
int i = 0;
for (TermVectorsRequest request : mtvRequest) {
TermVectorsResponse response = new TermVectorsResponse(request.index(), request.type(), request.id());
response.setExists(true);
Fields generatedFields;
if (request.doc() != null) {
generatedFields = generateFields(randomFields, request.doc().utf8ToString());
} else {
generatedFields = generateFields(request.selectedFields().toArray(new String[request.selectedFields().size()]), request.id());
}
EnumSet<TermVectorsRequest.Flag> flags = EnumSet.of(TermVectorsRequest.Flag.Positions, TermVectorsRequest.Flag.Offsets);
response.setFields(generatedFields, request.selectedFields(), flags, generatedFields);
responses[i++] = new MultiTermVectorsItemResponse(response, null);
}
return new MultiTermVectorsResponse(responses);
} catch (IOException ex) {
throw new ElasticsearchException("boom", ex);
}
}
项目:elasticsearch_my
文件:GetTermVectorsIT.java
public void testDuelESLucene() throws Exception {
TestFieldSetting[] testFieldSettings = getFieldSettings();
createIndexBasedOnFieldSettings("test", "alias", testFieldSettings);
//we generate as many docs as many shards we have
TestDoc[] testDocs = generateTestDocs("test", testFieldSettings);
DirectoryReader directoryReader = indexDocsWithLucene(testDocs);
TestConfig[] testConfigs = generateTestConfigs(20, testDocs, testFieldSettings);
for (TestConfig test : testConfigs) {
TermVectorsRequestBuilder request = getRequestForConfig(test);
if (test.expectedException != null) {
assertThrows(request, test.expectedException);
continue;
}
TermVectorsResponse response = request.get();
Fields luceneTermVectors = getTermVectorsFromLucene(directoryReader, test.doc);
validateResponse(response, luceneTermVectors, test);
}
}
项目:lams
文件:Lucene40TermVectorsReader.java
@Override
public Fields get(int docID) throws IOException {
if (tvx != null) {
Fields fields = new TVFields(docID);
if (fields.size() == 0) {
// TODO: we can improve writer here, eg write 0 into
// tvx file, so we know on first read from tvx that
// this doc has no TVs
return null;
} else {
return fields;
}
} else {
return null;
}
}
项目:lams
文件:Lucene3xTermVectorsReader.java
@Override
public Fields get(int docID) throws IOException {
if (tvx != null) {
Fields fields = new TVFields(docID);
if (fields.size() == 0) {
// TODO: we can improve writer here, eg write 0 into
// tvx file, so we know on first read from tvx that
// this doc has no TVs
return null;
} else {
return fields;
}
} else {
return null;
}
}
项目:lams
文件:TermVectorsWriter.java
/** Merges in the term vectors from the readers in
* <code>mergeState</code>. The default implementation skips
* over deleted documents, and uses {@link #startDocument(int)},
* {@link #startField(FieldInfo, int, boolean, boolean, boolean)},
* {@link #startTerm(BytesRef, int)}, {@link #addPosition(int, int, int, BytesRef)},
* and {@link #finish(FieldInfos, int)},
* returning the number of documents that were written.
* Implementations can override this method for more sophisticated
* merging (bulk-byte copying, etc). */
public int merge(MergeState mergeState) throws IOException {
int docCount = 0;
for (int i = 0; i < mergeState.readers.size(); i++) {
final AtomicReader reader = mergeState.readers.get(i);
final int maxDoc = reader.maxDoc();
final Bits liveDocs = reader.getLiveDocs();
for (int docID = 0; docID < maxDoc; docID++) {
if (liveDocs != null && !liveDocs.get(docID)) {
// skip deleted docs
continue;
}
// NOTE: it's very important to first assign to vectors then pass it to
// termVectorsWriter.addAllDocVectors; see LUCENE-1282
Fields vectors = reader.getTermVectors(docID);
addAllDocVectors(vectors, mergeState);
docCount++;
mergeState.checkAbort.work(300);
}
}
finish(mergeState.fieldInfos, docCount);
return docCount;
}
项目:Elasticsearch
文件:DfsOnlyRequest.java
public DfsOnlyRequest(Fields termVectorsFields, String[] indices, String[] types, Set<String> selectedFields) throws IOException {
super(indices);
// build a search request with a query of all the terms
final BoolQueryBuilder boolBuilder = boolQuery();
for (String fieldName : termVectorsFields) {
if ((selectedFields != null) && (!selectedFields.contains(fieldName))) {
continue;
}
Terms terms = termVectorsFields.terms(fieldName);
TermsEnum iterator = terms.iterator();
while (iterator.next() != null) {
String text = iterator.term().utf8ToString();
boolBuilder.should(QueryBuilders.termQuery(fieldName, text));
}
}
// wrap a search request object
this.searchRequest = new SearchRequest(indices).types(types).source(new SearchSourceBuilder().query(boolBuilder));
}
项目:search
文件:WeightedSpanTermExtractor.java
@Override
public Fields fields() throws IOException {
return new FilterFields(super.fields()) {
@Override
public Terms terms(String field) throws IOException {
return super.terms(DelegatingAtomicReader.FIELD_NAME);
}
@Override
public Iterator<String> iterator() {
return Collections.singletonList(DelegatingAtomicReader.FIELD_NAME).iterator();
}
@Override
public int size() {
return 1;
}
};
}
项目:search
文件:TokenSources.java
/**
* A convenience method that tries to first get a TermPositionVector for the
* specified docId, then, falls back to using the passed in
* {@link org.apache.lucene.document.Document} to retrieve the TokenStream.
* This is useful when you already have the document, but would prefer to use
* the vector first.
*
* @param reader The {@link org.apache.lucene.index.IndexReader} to use to try
* and get the vector from
* @param docId The docId to retrieve.
* @param field The field to retrieve on the document
* @param doc The document to fall back on
* @param analyzer The analyzer to use for creating the TokenStream if the
* vector doesn't exist
* @return The {@link org.apache.lucene.analysis.TokenStream} for the
* {@link org.apache.lucene.index.IndexableField} on the
* {@link org.apache.lucene.document.Document}
* @throws IOException if there was an error loading
*/
public static TokenStream getAnyTokenStream(IndexReader reader, int docId,
String field, Document doc, Analyzer analyzer) throws IOException {
TokenStream ts = null;
Fields vectors = reader.getTermVectors(docId);
if (vectors != null) {
Terms vector = vectors.terms(field);
if (vector != null) {
ts = getTokenStream(vector);
}
}
// No token info stored so fall back to analyzing raw content
if (ts == null) {
ts = getTokenStream(doc, field, analyzer);
}
return ts;
}
项目:search
文件:TokenSources.java
/**
* A convenience method that tries a number of approaches to getting a token
* stream. The cost of finding there are no termVectors in the index is
* minimal (1000 invocations still registers 0 ms). So this "lazy" (flexible?)
* approach to coding is probably acceptable
*
* @return null if field not stored correctly
* @throws IOException If there is a low-level I/O error
*/
public static TokenStream getAnyTokenStream(IndexReader reader, int docId,
String field, Analyzer analyzer) throws IOException {
TokenStream ts = null;
Fields vectors = reader.getTermVectors(docId);
if (vectors != null) {
Terms vector = vectors.terms(field);
if (vector != null) {
ts = getTokenStream(vector);
}
}
// No token info stored so fall back to analyzing raw content
if (ts == null) {
ts = getTokenStream(reader, docId, field, analyzer);
}
return ts;
}
项目:search
文件:TokenSources.java
/**
* Returns a {@link TokenStream} with positions and offsets constructed from
* field termvectors. If the field has no termvectors, or positions or offsets
* are not included in the termvector, return null.
* @param reader the {@link IndexReader} to retrieve term vectors from
* @param docId the document to retrieve termvectors for
* @param field the field to retrieve termvectors for
* @return a {@link TokenStream}, or null if positions and offsets are not available
* @throws IOException If there is a low-level I/O error
*/
public static TokenStream getTokenStreamWithOffsets(IndexReader reader, int docId,
String field) throws IOException {
Fields vectors = reader.getTermVectors(docId);
if (vectors == null) {
return null;
}
Terms vector = vectors.terms(field);
if (vector == null) {
return null;
}
if (!vector.hasPositions() || !vector.hasOffsets()) {
return null;
}
return getTokenStream(vector);
}
项目:search
文件:SumTotalTermFreqValueSource.java
@Override
public void createWeight(Map context, IndexSearcher searcher) throws IOException {
long sumTotalTermFreq = 0;
for (AtomicReaderContext readerContext : searcher.getTopReaderContext().leaves()) {
Fields fields = readerContext.reader().fields();
if (fields == null) continue;
Terms terms = fields.terms(indexedField);
if (terms == null) continue;
long v = terms.getSumTotalTermFreq();
if (v == -1) {
sumTotalTermFreq = -1;
break;
} else {
sumTotalTermFreq += v;
}
}
final long ttf = sumTotalTermFreq;
context.put(this, new LongDocValues(this) {
@Override
public long longVal(int doc) {
return ttf;
}
});
}
项目:search
文件:LuceneTestCase.java
/**
* Fields api equivalency
*/
public void assertFieldsEquals(String info, IndexReader leftReader, Fields leftFields, Fields rightFields, boolean deep) throws IOException {
// Fields could be null if there are no postings,
// but then it must be null for both
if (leftFields == null || rightFields == null) {
assertNull(info, leftFields);
assertNull(info, rightFields);
return;
}
assertFieldStatisticsEquals(info, leftFields, rightFields);
Iterator<String> leftEnum = leftFields.iterator();
Iterator<String> rightEnum = rightFields.iterator();
while (leftEnum.hasNext()) {
String field = leftEnum.next();
assertEquals(info, field, rightEnum.next());
assertTermsEquals(info, leftReader, leftFields.terms(field), rightFields.terms(field), deep);
}
assertFalse(rightEnum.hasNext());
}
项目:search
文件:LuceneTestCase.java
/**
* checks that norms are the same across all fields
*/
public void assertNormsEquals(String info, IndexReader leftReader, IndexReader rightReader) throws IOException {
Fields leftFields = MultiFields.getFields(leftReader);
Fields rightFields = MultiFields.getFields(rightReader);
// Fields could be null if there are no postings,
// but then it must be null for both
if (leftFields == null || rightFields == null) {
assertNull(info, leftFields);
assertNull(info, rightFields);
return;
}
for (String field : leftFields) {
NumericDocValues leftNorms = MultiDocValues.getNormValues(leftReader, field);
NumericDocValues rightNorms = MultiDocValues.getNormValues(rightReader, field);
if (leftNorms != null && rightNorms != null) {
assertDocValuesEquals(info, leftReader.maxDoc(), leftNorms, rightNorms);
} else {
assertNull(info, leftNorms);
assertNull(info, rightNorms);
}
}
}
项目:search
文件:Lucene40TermVectorsReader.java
@Override
public Fields get(int docID) throws IOException {
if (tvx != null) {
Fields fields = new TVFields(docID);
if (fields.size() == 0) {
// TODO: we can improve writer here, eg write 0 into
// tvx file, so we know on first read from tvx that
// this doc has no TVs
return null;
} else {
return fields;
}
} else {
return null;
}
}
项目:search
文件:Lucene3xTermVectorsReader.java
@Override
public Fields get(int docID) throws IOException {
if (tvx != null) {
Fields fields = new TVFields(docID);
if (fields.size() == 0) {
// TODO: we can improve writer here, eg write 0 into
// tvx file, so we know on first read from tvx that
// this doc has no TVs
return null;
} else {
return fields;
}
} else {
return null;
}
}
项目:search
文件:TermVectorsWriter.java
/** Merges in the term vectors from the readers in
* <code>mergeState</code>. The default implementation skips
* over deleted documents, and uses {@link #startDocument(int)},
* {@link #startField(FieldInfo, int, boolean, boolean, boolean)},
* {@link #startTerm(BytesRef, int)}, {@link #addPosition(int, int, int, BytesRef)},
* and {@link #finish(FieldInfos, int)},
* returning the number of documents that were written.
* Implementations can override this method for more sophisticated
* merging (bulk-byte copying, etc). */
public int merge(MergeState mergeState) throws IOException {
int docCount = 0;
for (int i = 0; i < mergeState.readers.size(); i++) {
final AtomicReader reader = mergeState.readers.get(i);
final int maxDoc = reader.maxDoc();
final Bits liveDocs = reader.getLiveDocs();
for (int docID = 0; docID < maxDoc; docID++) {
if (liveDocs != null && !liveDocs.get(docID)) {
// skip deleted docs
continue;
}
// NOTE: it's very important to first assign to vectors then pass it to
// termVectorsWriter.addAllDocVectors; see LUCENE-1282
Fields vectors = reader.getTermVectors(docID);
addAllDocVectors(vectors, mergeState);
docCount++;
mergeState.checkAbort.work(300);
}
}
finish(mergeState.fieldInfos, docCount);
return docCount;
}
项目:search
文件:TestMultiThreadTermVectors.java
private void testTermVectors() throws Exception {
// check:
int numDocs = reader.numDocs();
long start = 0L;
for (int docId = 0; docId < numDocs; docId++) {
start = System.currentTimeMillis();
Fields vectors = reader.getTermVectors(docId);
timeElapsed += System.currentTimeMillis()-start;
// verify vectors result
verifyVectors(vectors, docId);
start = System.currentTimeMillis();
Terms vector = reader.getTermVectors(docId).terms("field");
timeElapsed += System.currentTimeMillis()-start;
verifyVector(vector.iterator(null), docId);
}
}
项目:search
文件:SolrIndexSearcher.java
/**
* Returns the first document number containing the term <code>t</code>
* Returns -1 if no document was found.
* This method is primarily intended for clients that want to fetch
* documents using a unique identifier."
* @return the first document number containing the term
*/
public int getFirstMatch(Term t) throws IOException {
Fields fields = atomicReader.fields();
if (fields == null) return -1;
Terms terms = fields.terms(t.field());
if (terms == null) return -1;
BytesRef termBytes = t.bytes();
final TermsEnum termsEnum = terms.iterator(null);
if (!termsEnum.seekExact(termBytes)) {
return -1;
}
DocsEnum docs = termsEnum.docs(atomicReader.getLiveDocs(), null, DocsEnum.FLAG_NONE);
if (docs == null) return -1;
int id = docs.nextDoc();
return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}
项目:search
文件:TestRTGBase.java
protected int getFirstMatch(IndexReader r, Term t) throws IOException {
Fields fields = MultiFields.getFields(r);
if (fields == null) return -1;
Terms terms = fields.terms(t.field());
if (terms == null) return -1;
BytesRef termBytes = t.bytes();
final TermsEnum termsEnum = terms.iterator(null);
if (!termsEnum.seekExact(termBytes)) {
return -1;
}
DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(r), null, DocsEnum.FLAG_NONE);
int id = docs.nextDoc();
if (id != DocIdSetIterator.NO_MORE_DOCS) {
int next = docs.nextDoc();
assertEquals(DocIdSetIterator.NO_MORE_DOCS, next);
}
return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}
项目:eswc-2015-semantic-typing
文件:TfIdfSearcher.java
/**
*
* @param reader
* @return Map of term and its inverse document frequency
*
* @throws IOException
*/
public Map<String, Float> getIdfs(IndexReader reader) throws IOException
{
Fields fields = MultiFields.getFields(reader); //get the fields of the index
for (String field: fields)
{
TermsEnum termEnum = MultiFields.getTerms(reader, field).iterator(null);
BytesRef bytesRef;
while ((bytesRef = termEnum.next()) != null)
{
if (termEnum.seekExact(bytesRef))
{
String term = bytesRef.utf8ToString();
float idf = tfidfSIM.idf( termEnum.docFreq(), reader.numDocs() );
inverseDocFreq.put(term, idf);
System.out.println(term +" idf= "+ idf);
}
}
}
return inverseDocFreq;
}
项目:NYBC
文件:WeightedSpanTermExtractor.java
@Override
public Fields fields() throws IOException {
return new FilterFields(super.fields()) {
@Override
public Terms terms(String field) throws IOException {
return super.terms(DelegatingAtomicReader.FIELD_NAME);
}
@Override
public Iterator<String> iterator() {
return Collections.singletonList(DelegatingAtomicReader.FIELD_NAME).iterator();
}
@Override
public int size() {
return 1;
}
};
}
项目:NYBC
文件:TokenSources.java
/**
* A convenience method that tries to first get a TermPositionVector for the
* specified docId, then, falls back to using the passed in
* {@link org.apache.lucene.document.Document} to retrieve the TokenStream.
* This is useful when you already have the document, but would prefer to use
* the vector first.
*
* @param reader The {@link org.apache.lucene.index.IndexReader} to use to try
* and get the vector from
* @param docId The docId to retrieve.
* @param field The field to retrieve on the document
* @param doc The document to fall back on
* @param analyzer The analyzer to use for creating the TokenStream if the
* vector doesn't exist
* @return The {@link org.apache.lucene.analysis.TokenStream} for the
* {@link org.apache.lucene.index.IndexableField} on the
* {@link org.apache.lucene.document.Document}
* @throws IOException if there was an error loading
*/
public static TokenStream getAnyTokenStream(IndexReader reader, int docId,
String field, Document doc, Analyzer analyzer) throws IOException {
TokenStream ts = null;
Fields vectors = reader.getTermVectors(docId);
if (vectors != null) {
Terms vector = vectors.terms(field);
if (vector != null) {
ts = getTokenStream(vector);
}
}
// No token info stored so fall back to analyzing raw content
if (ts == null) {
ts = getTokenStream(doc, field, analyzer);
}
return ts;
}
项目:NYBC
文件:TokenSources.java
/**
* A convenience method that tries a number of approaches to getting a token
* stream. The cost of finding there are no termVectors in the index is
* minimal (1000 invocations still registers 0 ms). So this "lazy" (flexible?)
* approach to coding is probably acceptable
*
* @return null if field not stored correctly
* @throws IOException If there is a low-level I/O error
*/
public static TokenStream getAnyTokenStream(IndexReader reader, int docId,
String field, Analyzer analyzer) throws IOException {
TokenStream ts = null;
Fields vectors = reader.getTermVectors(docId);
if (vectors != null) {
Terms vector = vectors.terms(field);
if (vector != null) {
ts = getTokenStream(vector);
}
}
// No token info stored so fall back to analyzing raw content
if (ts == null) {
ts = getTokenStream(reader, docId, field, analyzer);
}
return ts;
}
项目:NYBC
文件:TokenSources.java
/**
* Returns a {@link TokenStream} with positions and offsets constructed from
* field termvectors. If the field has no termvectors, or positions or offsets
* are not included in the termvector, return null.
* @param reader the {@link IndexReader} to retrieve term vectors from
* @param docId the document to retrieve termvectors for
* @param field the field to retrieve termvectors for
* @return a {@link TokenStream}, or null if positions and offsets are not available
* @throws IOException If there is a low-level I/O error
*/
public static TokenStream getTokenStreamWithOffsets(IndexReader reader, int docId,
String field) throws IOException {
Fields vectors = reader.getTermVectors(docId);
if (vectors == null) {
return null;
}
Terms vector = vectors.terms(field);
if (vector == null) {
return null;
}
if (!vector.hasPositions() || !vector.hasOffsets()) {
return null;
}
return getTokenStream(vector);
}
项目:NYBC
文件:Lucene40TermVectorsReader.java
@Override
public Fields get(int docID) throws IOException {
if (tvx != null) {
Fields fields = new TVFields(docID);
if (fields.size() == 0) {
// TODO: we can improve writer here, eg write 0 into
// tvx file, so we know on first read from tvx that
// this doc has no TVs
return null;
} else {
return fields;
}
} else {
return null;
}
}
项目:NYBC
文件:TermVectorsWriter.java
/** Merges in the term vectors from the readers in
* <code>mergeState</code>. The default implementation skips
* over deleted documents, and uses {@link #startDocument(int)},
* {@link #startField(FieldInfo, int, boolean, boolean, boolean)},
* {@link #startTerm(BytesRef, int)}, {@link #addPosition(int, int, int, BytesRef)},
* and {@link #finish(FieldInfos, int)},
* returning the number of documents that were written.
* Implementations can override this method for more sophisticated
* merging (bulk-byte copying, etc). */
public int merge(MergeState mergeState) throws IOException {
int docCount = 0;
for (int i = 0; i < mergeState.readers.size(); i++) {
final AtomicReader reader = mergeState.readers.get(i);
final int maxDoc = reader.maxDoc();
final Bits liveDocs = reader.getLiveDocs();
for (int docID = 0; docID < maxDoc; docID++) {
if (liveDocs != null && !liveDocs.get(docID)) {
// skip deleted docs
continue;
}
// NOTE: it's very important to first assign to vectors then pass it to
// termVectorsWriter.addAllDocVectors; see LUCENE-1282
Fields vectors = reader.getTermVectors(docID);
addAllDocVectors(vectors, mergeState);
docCount++;
mergeState.checkAbort.work(300);
}
}
finish(mergeState.fieldInfos, docCount);
return docCount;
}
项目:NYBC
文件:TestMultiThreadTermVectors.java
private void testTermVectors() throws Exception {
// check:
int numDocs = reader.numDocs();
long start = 0L;
for (int docId = 0; docId < numDocs; docId++) {
start = System.currentTimeMillis();
Fields vectors = reader.getTermVectors(docId);
timeElapsed += System.currentTimeMillis()-start;
// verify vectors result
verifyVectors(vectors, docId);
start = System.currentTimeMillis();
Terms vector = reader.getTermVectors(docId).terms("field");
timeElapsed += System.currentTimeMillis()-start;
verifyVector(vector.iterator(null), docId);
}
}
项目:NYBC
文件:SolrIndexSearcher.java
/**
* Returns the first document number containing the term <code>t</code>
* Returns -1 if no document was found.
* This method is primarily intended for clients that want to fetch
* documents using a unique identifier."
* @return the first document number containing the term
*/
public int getFirstMatch(Term t) throws IOException {
Fields fields = atomicReader.fields();
if (fields == null) return -1;
Terms terms = fields.terms(t.field());
if (terms == null) return -1;
BytesRef termBytes = t.bytes();
final TermsEnum termsEnum = terms.iterator(null);
if (!termsEnum.seekExact(termBytes, false)) {
return -1;
}
DocsEnum docs = termsEnum.docs(atomicReader.getLiveDocs(), null, DocsEnum.FLAG_NONE);
if (docs == null) return -1;
int id = docs.nextDoc();
return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}
项目:NYBC
文件:TestRTGBase.java
protected int getFirstMatch(IndexReader r, Term t) throws IOException {
Fields fields = MultiFields.getFields(r);
if (fields == null) return -1;
Terms terms = fields.terms(t.field());
if (terms == null) return -1;
BytesRef termBytes = t.bytes();
final TermsEnum termsEnum = terms.iterator(null);
if (!termsEnum.seekExact(termBytes, false)) {
return -1;
}
DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(r), null, DocsEnum.FLAG_NONE);
int id = docs.nextDoc();
if (id != DocIdSetIterator.NO_MORE_DOCS) {
int next = docs.nextDoc();
assertEquals(DocIdSetIterator.NO_MORE_DOCS, next);
}
return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}
项目:incubator-blur
文件:IndexImporter.java
private void runOldMergeSortRowIdCheckAndDelete(boolean emitDeletes, IndexReader currentIndexReader,
BlurPartitioner blurPartitioner, Text key, int numberOfShards, int shardId, Action action,
AtomicReader atomicReader) throws IOException {
MergeSortRowIdLookup lookup = new MergeSortRowIdLookup(currentIndexReader);
Fields fields = atomicReader.fields();
Terms terms = fields.terms(BlurConstants.ROW_ID);
if (terms != null) {
TermsEnum termsEnum = terms.iterator(null);
BytesRef ref = null;
while ((ref = termsEnum.next()) != null) {
key.set(ref.bytes, ref.offset, ref.length);
int partition = blurPartitioner.getPartition(key, null, numberOfShards);
if (shardId != partition) {
throw new IOException("Index is corrupted, RowIds are found in wrong shard, partition [" + partition
+ "] does not shard [" + shardId + "], this can happen when rows are not hashed correctly.");
}
if (emitDeletes) {
lookup.lookup(ref, action);
}
}
}
}
项目:incubator-blur
文件:MutatableAction.java
private IterableRow getIterableRow(String rowId, IndexSearcherCloseable searcher) throws IOException {
IndexReader indexReader = searcher.getIndexReader();
BytesRef rowIdRef = new BytesRef(rowId);
List<AtomicReaderTermsEnum> possibleRowIds = new ArrayList<AtomicReaderTermsEnum>();
for (AtomicReaderContext atomicReaderContext : indexReader.leaves()) {
AtomicReader atomicReader = atomicReaderContext.reader();
Fields fields = atomicReader.fields();
if (fields == null) {
continue;
}
Terms terms = fields.terms(BlurConstants.ROW_ID);
if (terms == null) {
continue;
}
TermsEnum termsEnum = terms.iterator(null);
if (!termsEnum.seekExact(rowIdRef, true)) {
continue;
}
// need atomic read as well...
possibleRowIds.add(new AtomicReaderTermsEnum(atomicReader, termsEnum));
}
if (possibleRowIds.isEmpty()) {
return null;
}
return new IterableRow(rowId, getRecords(possibleRowIds));
}
项目:search-core
文件:SolrIndexSearcher.java
/**
* Returns the first document number containing the term <code>t</code> Returns -1 if no
* document was found. This method is primarily intended for clients that want to fetch
* documents using a unique identifier."
*
* @return the first document number containing the term
*/
public int getFirstMatch(Term t) throws IOException {
Fields fields = atomicReader.fields();
if(fields == null)
return -1;
Terms terms = fields.terms(t.field());
if(terms == null)
return -1;
BytesRef termBytes = t.bytes();
final TermsEnum termsEnum = terms.iterator(null);
if(!termsEnum.seekExact(termBytes, false)) {
return -1;
}
DocsEnum docs = termsEnum.docs(atomicReader.getLiveDocs(), null, DocsEnum.FLAG_NONE);
if(docs == null)
return -1;
int id = docs.nextDoc();
return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}
项目:search-core
文件:TestRTGBase.java
protected int getFirstMatch(IndexReader r, Term t) throws IOException {
Fields fields = MultiFields.getFields(r);
if (fields == null) return -1;
Terms terms = fields.terms(t.field());
if (terms == null) return -1;
BytesRef termBytes = t.bytes();
final TermsEnum termsEnum = terms.iterator(null);
if (!termsEnum.seekExact(termBytes, false)) {
return -1;
}
DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(r), null, DocsEnum.FLAG_NONE);
int id = docs.nextDoc();
if (id != DocIdSetIterator.NO_MORE_DOCS) {
int next = docs.nextDoc();
assertEquals(DocIdSetIterator.NO_MORE_DOCS, next);
}
return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}
项目:read-open-source-code
文件:Lucene40TermVectorsReader.java
@Override
public Fields get(int docID) throws IOException {
if (tvx != null) {
Fields fields = new TVFields(docID);
if (fields.size() == 0) {
// TODO: we can improve writer here, eg write 0 into
// tvx file, so we know on first read from tvx that
// this doc has no TVs
return null;
} else {
return fields;
}
} else {
return null;
}
}
项目:read-open-source-code
文件:Lucene3xTermVectorsReader.java
@Override
public Fields get(int docID) throws IOException {
if (tvx != null) {
Fields fields = new TVFields(docID);
if (fields.size() == 0) {
// TODO: we can improve writer here, eg write 0 into
// tvx file, so we know on first read from tvx that
// this doc has no TVs
return null;
} else {
return fields;
}
} else {
return null;
}
}
项目:read-open-source-code
文件:TermVectorsWriter.java
/** Merges in the term vectors from the readers in
* <code>mergeState</code>. The default implementation skips
* over deleted documents, and uses {@link #startDocument(int)},
* {@link #startField(FieldInfo, int, boolean, boolean, boolean)},
* {@link #startTerm(BytesRef, int)}, {@link #addPosition(int, int, int, BytesRef)},
* and {@link #finish(FieldInfos, int)},
* returning the number of documents that were written.
* Implementations can override this method for more sophisticated
* merging (bulk-byte copying, etc). */
public int merge(MergeState mergeState) throws IOException {
int docCount = 0;
for (int i = 0; i < mergeState.readers.size(); i++) {
final AtomicReader reader = mergeState.readers.get(i);
final int maxDoc = reader.maxDoc();
final Bits liveDocs = reader.getLiveDocs();
for (int docID = 0; docID < maxDoc; docID++) {
if (liveDocs != null && !liveDocs.get(docID)) {
// skip deleted docs
continue;
}
// NOTE: it's very important to first assign to vectors then pass it to
// termVectorsWriter.addAllDocVectors; see LUCENE-1282
Fields vectors = reader.getTermVectors(docID);
addAllDocVectors(vectors, mergeState);
docCount++;
mergeState.checkAbort.work(300);
}
}
finish(mergeState.fieldInfos, docCount);
return docCount;
}
项目:read-open-source-code
文件:SumTotalTermFreqValueSource.java
@Override
public void createWeight(Map context, IndexSearcher searcher) throws IOException {
long sumTotalTermFreq = 0;
for (AtomicReaderContext readerContext : searcher.getTopReaderContext().leaves()) {
Fields fields = readerContext.reader().fields();
if (fields == null) continue;
Terms terms = fields.terms(indexedField);
if (terms == null) continue;
long v = terms.getSumTotalTermFreq();
if (v == -1) {
sumTotalTermFreq = -1;
break;
} else {
sumTotalTermFreq += v;
}
}
final long ttf = sumTotalTermFreq;
context.put(this, new LongDocValues(this) {
@Override
public long longVal(int doc) {
return ttf;
}
});
}
项目:read-open-source-code
文件:WeightedSpanTermExtractor.java
@Override
public Fields fields() throws IOException {
return new FilterFields(super.fields()) {
@Override
public Terms terms(String field) throws IOException {
return super.terms(DelegatingAtomicReader.FIELD_NAME);
}
@Override
public Iterator<String> iterator() {
return Collections.singletonList(DelegatingAtomicReader.FIELD_NAME).iterator();
}
@Override
public int size() {
return 1;
}
};
}
项目:read-open-source-code
文件:SolrIndexSearcher.java
/**
* Returns the first document number containing the term <code>t</code>
* Returns -1 if no document was found.
* This method is primarily intended for clients that want to fetch
* documents using a unique identifier."
* @return the first document number containing the term
*/
public int getFirstMatch(Term t) throws IOException {
Fields fields = atomicReader.fields();
if (fields == null) return -1;
Terms terms = fields.terms(t.field());
if (terms == null) return -1;
BytesRef termBytes = t.bytes();
final TermsEnum termsEnum = terms.iterator(null);
if (!termsEnum.seekExact(termBytes)) {
return -1;
}
DocsEnum docs = termsEnum.docs(atomicReader.getLiveDocs(), null, DocsEnum.FLAG_NONE);
if (docs == null) return -1;
int id = docs.nextDoc();
return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}