Java 类org.apache.lucene.index.TermsEnum 实例源码

项目:elasticsearch_my    文件:OrdinalsBuilder.java   
/**
 * This method iterates all terms in the given {@link TermsEnum} and
 * associates each terms ordinal with the terms documents. The caller must
 * exhaust the returned {@link BytesRefIterator} which returns all values
 * where the first returned value is associated with the ordinal <tt>1</tt>
 * etc.
 * <p>
 * If the {@link TermsEnum} contains prefix coded numerical values the terms
 * enum should be wrapped with either {@link #wrapNumeric32Bit(TermsEnum)}
 * or {@link #wrapNumeric64Bit(TermsEnum)} depending on its precision. If
 * the {@link TermsEnum} is not wrapped the returned
 * {@link BytesRefIterator} will contain partial precision terms rather than
 * only full-precision terms.
 * </p>
 */
public BytesRefIterator buildFromTerms(final TermsEnum termsEnum) throws IOException {
    return new BytesRefIterator() {
        private PostingsEnum docsEnum = null;

        @Override
        public BytesRef next() throws IOException {
            BytesRef ref;
            if ((ref = termsEnum.next()) != null) {
                docsEnum = termsEnum.postings(docsEnum, PostingsEnum.NONE);
                nextOrdinal();
                int docId;
                while ((docId = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                    addDoc(docId);
                }
            }
            return ref;
        }
    };
}
项目:elasticsearch_my    文件:AbstractIndexOrdinalsFieldData.java   
protected TermsEnum filter(Terms terms, TermsEnum iterator, LeafReader reader) throws IOException {
    if (iterator == null) {
        return null;
    }
    int docCount = terms.getDocCount();
    if (docCount == -1) {
        docCount = reader.maxDoc();
    }
    if (docCount >= minSegmentSize) {
        final int minFreq = minFrequency > 1.0
                ? (int) minFrequency
                : (int)(docCount * minFrequency);
        final int maxFreq = maxFrequency > 1.0
                ? (int) maxFrequency
                : (int)(docCount * maxFrequency);
        if (minFreq > 1 || maxFreq < docCount) {
            iterator = new FrequencyFilter(iterator, minFreq, maxFreq);
        }
    }
    return iterator;
}
项目:elasticsearch_my    文件:TermVectorsWriter.java   
private PostingsEnum writeTermWithDocsAndPos(TermsEnum iterator, PostingsEnum docsAndPosEnum, boolean positions,
                                                     boolean offsets, boolean payloads) throws IOException {
    docsAndPosEnum = iterator.postings(docsAndPosEnum, PostingsEnum.ALL);
    // for each term (iterator next) in this field (field)
    // iterate over the docs (should only be one)
    int nextDoc = docsAndPosEnum.nextDoc();
    assert nextDoc != DocIdSetIterator.NO_MORE_DOCS;
    final int freq = docsAndPosEnum.freq();
    writeFreq(freq);
    for (int j = 0; j < freq; j++) {
        int curPos = docsAndPosEnum.nextPosition();
        if (positions) {
            writePosition(curPos);
        }
        if (offsets) {
            writeOffsets(docsAndPosEnum.startOffset(), docsAndPosEnum.endOffset());
        }
        if (payloads) {
            writePayload(docsAndPosEnum.getPayload());
        }
    }
    nextDoc = docsAndPosEnum.nextDoc();
    assert nextDoc == DocIdSetIterator.NO_MORE_DOCS;
    return docsAndPosEnum;
}
项目:elasticsearch_my    文件:TermVectorsResponse.java   
private void buildTerm(XContentBuilder builder, final CharsRefBuilder spare, Terms curTerms, TermsEnum termIter, BoostAttribute boostAtt) throws IOException {
    // start term, optimized writing
    BytesRef term = termIter.next();
    spare.copyUTF8Bytes(term);
    builder.startObject(spare.toString());
    buildTermStatistics(builder, termIter);
    // finally write the term vectors
    PostingsEnum posEnum = termIter.postings(null, PostingsEnum.ALL);
    int termFreq = posEnum.freq();
    builder.field(FieldStrings.TERM_FREQ, termFreq);
    initMemory(curTerms, termFreq);
    initValues(curTerms, posEnum, termFreq);
    buildValues(builder, curTerms, termFreq);
    buildScore(builder, boostAtt);
    builder.endObject();
}
项目:elasticsearch_my    文件:SimpleLuceneTests.java   
public void testNRTSearchOnClosedWriter() throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
    DirectoryReader reader = DirectoryReader.open(indexWriter);

    for (int i = 0; i < 100; i++) {
        Document document = new Document();
        TextField field = new TextField("_id", Integer.toString(i), Field.Store.YES);
        field.setBoost(i);
        document.add(field);
        indexWriter.addDocument(document);
    }
    reader = refreshReader(reader);

    indexWriter.close();

    TermsEnum termDocs = SlowCompositeReaderWrapper.wrap(reader).terms("_id").iterator();
    termDocs.next();
}
项目:Elasticsearch    文件:TermVectorsResponse.java   
private void buildTerm(XContentBuilder builder, final CharsRefBuilder spare, Terms curTerms, TermsEnum termIter, BoostAttribute boostAtt) throws IOException {
    // start term, optimized writing
    BytesRef term = termIter.next();
    spare.copyUTF8Bytes(term);
    builder.startObject(spare.toString());
    buildTermStatistics(builder, termIter);
    // finally write the term vectors
    PostingsEnum posEnum = termIter.postings(null, PostingsEnum.ALL);
    int termFreq = posEnum.freq();
    builder.field(FieldStrings.TERM_FREQ, termFreq);
    initMemory(curTerms, termFreq);
    initValues(curTerms, posEnum, termFreq);
    buildValues(builder, curTerms, termFreq);
    buildScore(builder, boostAtt);
    builder.endObject();
}
项目:lams    文件:MultiPhraseQuery.java   
public UnionDocsAndPositionsEnum(Bits liveDocs, AtomicReaderContext context, Term[] terms, Map<Term,TermContext> termContexts, TermsEnum termsEnum) throws IOException {
  List<DocsAndPositionsEnum> docsEnums = new LinkedList<>();
  for (int i = 0; i < terms.length; i++) {
    final Term term = terms[i];
    TermState termState = termContexts.get(term).get(context.ord);
    if (termState == null) {
      // Term doesn't exist in reader
      continue;
    }
    termsEnum.seekExact(term.bytes(), termState);
    DocsAndPositionsEnum postings = termsEnum.docsAndPositions(liveDocs, null, DocsEnum.FLAG_NONE);
    if (postings == null) {
      // term does exist, but has no positions
      throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")");
    }
    cost += postings.cost();
    docsEnums.add(postings);
  }

  _queue = new DocsQueue(docsEnums);
  _posList = new IntQueue();
}
项目:lams    文件:TermRangeTermsEnum.java   
/**
 * Enumerates all terms greater/equal than <code>lowerTerm</code>
 * but less/equal than <code>upperTerm</code>. 
 * 
 * If an endpoint is null, it is said to be "open". Either or both 
 * endpoints may be open.  Open endpoints may not be exclusive 
 * (you can't select all but the first or last term without 
 * explicitly specifying the term to exclude.)
 * 
 * @param tenum
 *          TermsEnum to filter
 * @param lowerTerm
 *          The term text at the lower end of the range
 * @param upperTerm
 *          The term text at the upper end of the range
 * @param includeLower
 *          If true, the <code>lowerTerm</code> is included in the range.
 * @param includeUpper
 *          If true, the <code>upperTerm</code> is included in the range.
 */
public TermRangeTermsEnum(TermsEnum tenum, BytesRef lowerTerm, BytesRef upperTerm, 
  boolean includeLower, boolean includeUpper) {
  super(tenum);

  // do a little bit of normalization...
  // open ended range queries should always be inclusive.
  if (lowerTerm == null) {
    this.lowerBytesRef = new BytesRef();
    this.includeLower = true;
  } else {
    this.lowerBytesRef = lowerTerm;
    this.includeLower = includeLower;
  }

  if (upperTerm == null) {
    this.includeUpper = true;
    upperBytesRef = null;
  } else {
    this.includeUpper = includeUpper;
    upperBytesRef = upperTerm;
  }

  setInitialSeekTerm(lowerBytesRef);
  termComp = getComparator();
}
项目:lams    文件:QueryAutoStopWordAnalyzer.java   
/**
 * Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for the
 * given selection of fields from terms with a document frequency greater than
 * the given maxDocFreq
 *
 * @param delegate Analyzer whose TokenStream will be filtered
 * @param indexReader IndexReader to identify the stopwords from
 * @param fields Selection of fields to calculate stopwords for
 * @param maxDocFreq Document frequency terms should be above in order to be stopwords
 * @throws IOException Can be thrown while reading from the IndexReader
 */
public QueryAutoStopWordAnalyzer(
    Analyzer delegate,
    IndexReader indexReader,
    Collection<String> fields,
    int maxDocFreq) throws IOException {
  super(delegate.getReuseStrategy());
  this.delegate = delegate;

  for (String field : fields) {
    Set<String> stopWords = new HashSet<>();
    Terms terms = MultiFields.getTerms(indexReader, field);
    CharsRefBuilder spare = new CharsRefBuilder();
    if (terms != null) {
      TermsEnum te = terms.iterator(null);
      BytesRef text;
      while ((text = te.next()) != null) {
        if (te.docFreq() > maxDocFreq) {
          spare.copyUTF8Bytes(text);
          stopWords.add(spare.toString());
        }
      }
    }
    stopWordsPerField.put(field, stopWords);
  }
}
项目:lams    文件:CompiledAutomaton.java   
/** Return a {@link TermsEnum} intersecting the provided {@link Terms}
 *  with the terms accepted by this automaton. */
public TermsEnum getTermsEnum(Terms terms) throws IOException {
  switch(type) {
  case NONE:
    return TermsEnum.EMPTY;
  case ALL:
    return terms.iterator(null);
  case SINGLE:
    return new SingleTermsEnum(terms.iterator(null), term);
  case PREFIX:
    // TODO: this is very likely faster than .intersect,
    // but we should test and maybe cutover
    return new PrefixTermsEnum(terms.iterator(null), term);
  case NORMAL:
    return terms.intersect(this, null);
  default:
    // unreachable
    throw new RuntimeException("unhandled case");
  }
}
项目:Elasticsearch    文件:DfsOnlyRequest.java   
public DfsOnlyRequest(Fields termVectorsFields, String[] indices, String[] types, Set<String> selectedFields) throws IOException {
    super(indices);

    // build a search request with a query of all the terms
    final BoolQueryBuilder boolBuilder = boolQuery();
    for (String fieldName : termVectorsFields) {
        if ((selectedFields != null) && (!selectedFields.contains(fieldName))) {
            continue;
        }
        Terms terms = termVectorsFields.terms(fieldName);
        TermsEnum iterator = terms.iterator();
        while (iterator.next() != null) {
            String text = iterator.term().utf8ToString();
            boolBuilder.should(QueryBuilders.termQuery(fieldName, text));
        }
    }
    // wrap a search request object
    this.searchRequest = new SearchRequest(indices).types(types).source(new SearchSourceBuilder().query(boolBuilder));
}
项目:lams    文件:SrndTermQuery.java   
@Override
public void visitMatchingTerms(
  IndexReader reader,
  String fieldName,
  MatchingTermVisitor mtv) throws IOException
{
  /* check term presence in index here for symmetry with other SimpleTerm's */
  Terms terms = MultiFields.getTerms(reader, fieldName);
  if (terms != null) {
    TermsEnum termsEnum = terms.iterator(null);

    TermsEnum.SeekStatus status = termsEnum.seekCeil(new BytesRef(getTermText()));
    if (status == TermsEnum.SeekStatus.FOUND) {
      mtv.visitMatchingTerm(getLuceneTerm(fieldName));
    }
  }
}
项目:elasticsearch_my    文件:PercolatorFieldMapper.java   
Query createCandidateQuery(IndexReader indexReader) throws IOException {
    List<BytesRef> extractedTerms = new ArrayList<>();
    LeafReader reader = indexReader.leaves().get(0).reader();
    Fields fields = reader.fields();
    for (String field : fields) {
        Terms terms = fields.terms(field);
        if (terms == null) {
            continue;
        }

        BytesRef fieldBr = new BytesRef(field);
        TermsEnum tenum = terms.iterator();
        for (BytesRef term = tenum.next(); term != null; term = tenum.next()) {
            BytesRefBuilder builder = new BytesRefBuilder();
            builder.append(fieldBr);
            builder.append(FIELD_VALUE_SEPARATOR);
            builder.append(term);
            extractedTerms.add(builder.toBytesRef());
        }
    }
    Query extractionSuccess = new TermInSetQuery(queryTermsField.name(), extractedTerms);
    // include extractionResultField:failed, because docs with this term have no extractedTermsField
    // and otherwise we would fail to return these docs. Docs that failed query term extraction
    // always need to be verified by MemoryIndex:
    Query extractionFailure = new TermQuery(new Term(extractionResultField.name(), EXTRACTION_FAILED));

    return new BooleanQuery.Builder()
            .add(extractionSuccess, Occur.SHOULD)
            .add(extractionFailure, Occur.SHOULD)
            .build();
}
项目:elasticsearch_my    文件:TermsSliceQuery.java   
/**
 * Returns a DocIdSet per segments containing the matching docs for the specified slice.
 */
private DocIdSet build(LeafReader reader) throws IOException {
    final DocIdSetBuilder builder = new DocIdSetBuilder(reader.maxDoc());
    final Terms terms = reader.terms(getField());
    final TermsEnum te = terms.iterator();
    PostingsEnum docsEnum = null;
    for (BytesRef term = te.next(); term != null; term = te.next()) {
        int hashCode = term.hashCode();
        if (contains(hashCode)) {
            docsEnum = te.postings(docsEnum, PostingsEnum.NONE);
            builder.add(docsEnum);
        }
    }
    return builder.build();
}
项目:elasticsearch_my    文件:RamAccountingTermsEnum.java   
public RamAccountingTermsEnum(TermsEnum termsEnum, CircuitBreaker breaker, AbstractIndexFieldData.PerValueEstimator estimator,
                              String fieldName) {
    super(termsEnum);
    this.breaker = breaker;
    this.termsEnum = termsEnum;
    this.estimator = estimator;
    this.fieldName = fieldName;
    this.totalBytes = 0;
    this.flushBuffer = 0;
}
项目:elasticsearch_my    文件:OrdinalsBuilder.java   
/**
 * A {@link TermsEnum} that iterates only highest resolution geo prefix coded terms.
 *
 * @see #buildFromTerms(TermsEnum)
 */
public static TermsEnum wrapGeoPointTerms(TermsEnum termsEnum) {
    return new FilteredTermsEnum(termsEnum, false) {
        @Override
        protected AcceptStatus accept(BytesRef term) throws IOException {
            // accept only the max resolution terms
            // todo is this necessary?
            return GeoPointField.getPrefixCodedShift(term) == GeoPointField.PRECISION_STEP * 4 ?
                AcceptStatus.YES : AcceptStatus.END;
        }
    };
}
项目:Elasticsearch    文件:TermVectorsResponse.java   
private void buildField(XContentBuilder builder, final CharsRefBuilder spare, Fields theFields, Iterator<String> fieldIter) throws IOException {
    String fieldName = fieldIter.next();
    builder.startObject(fieldName);
    Terms curTerms = theFields.terms(fieldName);
    // write field statistics
    buildFieldStatistics(builder, curTerms);
    builder.startObject(FieldStrings.TERMS);
    TermsEnum termIter = curTerms.iterator();
    BoostAttribute boostAtt = termIter.attributes().addAttribute(BoostAttribute.class);
    for (int i = 0; i < curTerms.size(); i++) {
        buildTerm(builder, spare, curTerms, termIter, boostAtt);
    }
    builder.endObject();
    builder.endObject();
}
项目:Elasticsearch    文件:RamAccountingTermsEnum.java   
public RamAccountingTermsEnum(TermsEnum termsEnum, CircuitBreaker breaker, AbstractIndexFieldData.PerValueEstimator estimator,
                              String fieldName) {
    super(termsEnum);
    this.breaker = breaker;
    this.termsEnum = termsEnum;
    this.estimator = estimator;
    this.fieldName = fieldName;
    this.totalBytes = 0;
    this.flushBuffer = 0;
}
项目:elasticsearch_my    文件:PagedBytesIndexFieldData.java   
/**
 * Determine whether the BlockTreeTermsReader.FieldReader can be used
 * for estimating the field data, adding the estimate to the circuit
 * breaker if it can, otherwise wrapping the terms in a
 * RamAccountingTermsEnum to be estimated on a per-term basis.
 *
 * @param terms terms to be estimated
 * @return A possibly wrapped TermsEnum for the terms
 */
@Override
public TermsEnum beforeLoad(Terms terms) throws IOException {
    LeafReader reader = context.reader();

    TermsEnum iterator = terms.iterator();
    TermsEnum filteredIterator = filter(terms, iterator, reader);
    final boolean filtered = iterator != filteredIterator;
    iterator = filteredIterator;

    if (filtered) {
        if (logger.isTraceEnabled()) {
            logger.trace("Filter exists, can't circuit break normally, using RamAccountingTermsEnum");
        }
        return new RamAccountingTermsEnum(iterator, breaker, this, this.fieldName);
    } else {
        estimatedBytes = this.estimateStringFieldData();
        // If we weren't able to estimate, wrap in the RamAccountingTermsEnum
        if (estimatedBytes == 0) {
            iterator = new RamAccountingTermsEnum(iterator, breaker, this, this.fieldName);
        } else {
            breaker.addEstimateBytesAndMaybeBreak(estimatedBytes, fieldName);
        }

        return iterator;
    }
}
项目:elasticsearch_my    文件:XMoreLikeThis.java   
/**
 * Adds terms and frequencies found in vector into the Map termFreqMap
 *
 * @param termFreqMap a Map of terms and their frequencies
 * @param vector List of terms and their frequencies for a doc/field
 * @param fieldName Optional field name of the terms for skip terms
 */
private void addTermFrequencies(Map<String, Int> termFreqMap, Terms vector, @Nullable String fieldName) throws IOException {
    final TermsEnum termsEnum = vector.iterator();
    final CharsRefBuilder spare = new CharsRefBuilder();
    BytesRef text;
    while((text = termsEnum.next()) != null) {
        spare.copyUTF8Bytes(text);
        final String term = spare.toString();
        if (isNoiseWord(term)) {
            continue;
        }
        if (isSkipTerm(fieldName, term)) {
            continue;
        }

        final PostingsEnum docs = termsEnum.postings(null);
        int freq = 0;
        while(docs != null && docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
            freq += docs.freq();
        }

        // increment frequency
        Int cnt = termFreqMap.get(term);
        if (cnt == null) {
            cnt = new Int();
            termFreqMap.put(term, cnt);
            cnt.x = freq;
        } else {
            cnt.x += freq;
        }
    }
}
项目:elasticsearch_my    文件:MultiPhrasePrefixQuery.java   
private void getPrefixTerms(ObjectHashSet<Term> terms, final Term prefix, final IndexReader reader) throws IOException {
    // SlowCompositeReaderWrapper could be used... but this would merge all terms from each segment into one terms
    // instance, which is very expensive. Therefore I think it is better to iterate over each leaf individually.
    List<LeafReaderContext> leaves = reader.leaves();
    for (LeafReaderContext leaf : leaves) {
        Terms _terms = leaf.reader().terms(field);
        if (_terms == null) {
            continue;
        }

        TermsEnum termsEnum = _terms.iterator();
        TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(prefix.bytes());
        if (TermsEnum.SeekStatus.END == seekStatus) {
            continue;
        }

        for (BytesRef term = termsEnum.term(); term != null; term = termsEnum.next()) {
            if (!StringHelper.startsWith(term, prefix.bytes())) {
                break;
            }

            terms.add(new Term(field, BytesRef.deepCopyOf(term)));
            if (terms.size() >= maxExpansions) {
                return;
            }
        }
    }
}
项目:elasticsearch_my    文件:TermVectorsWriter.java   
private PostingsEnum writeTermWithDocsOnly(TermsEnum iterator, PostingsEnum docsEnum) throws IOException {
    docsEnum = iterator.postings(docsEnum);
    int nextDoc = docsEnum.nextDoc();
    assert nextDoc != DocIdSetIterator.NO_MORE_DOCS;
    writeFreq(docsEnum.freq());
    nextDoc = docsEnum.nextDoc();
    assert nextDoc == DocIdSetIterator.NO_MORE_DOCS;
    return docsEnum;
}
项目:elasticsearch_my    文件:TermVectorsWriter.java   
private void writeTermStatistics(TermsEnum topLevelIterator) throws IOException {
    int docFreq = topLevelIterator.docFreq();
    assert (docFreq >= -1);
    writePotentiallyNegativeVInt(docFreq);
    long ttf = topLevelIterator.totalTermFreq();
    assert (ttf >= -1);
    writePotentiallyNegativeVLong(ttf);
}
项目:Elasticsearch    文件:OrdinalsBuilder.java   
/**
 * A {@link TermsEnum} that iterates only full precision prefix coded 64 bit values.
 *
 * @see #buildFromTerms(TermsEnum)
 */
public static TermsEnum wrapNumeric64Bit(TermsEnum termsEnum) {
    return new FilteredTermsEnum(termsEnum, false) {
        @Override
        protected AcceptStatus accept(BytesRef term) throws IOException {
            // we stop accepting terms once we moved across the prefix codec terms - redundant values!
            return NumericUtils.getPrefixCodedLongShift(term) == 0 ? AcceptStatus.YES : AcceptStatus.END;
        }
    };
}
项目:elasticsearch_my    文件:TermVectorsResponse.java   
private void buildField(XContentBuilder builder, final CharsRefBuilder spare, Fields theFields, Iterator<String> fieldIter) throws IOException {
    String fieldName = fieldIter.next();
    builder.startObject(fieldName);
    Terms curTerms = theFields.terms(fieldName);
    // write field statistics
    buildFieldStatistics(builder, curTerms);
    builder.startObject(FieldStrings.TERMS);
    TermsEnum termIter = curTerms.iterator();
    BoostAttribute boostAtt = termIter.attributes().addAttribute(BoostAttribute.class);
    for (int i = 0; i < curTerms.size(); i++) {
        buildTerm(builder, spare, curTerms, termIter, boostAtt);
    }
    builder.endObject();
    builder.endObject();
}
项目:elasticsearch_my    文件:AbstractStringFieldDataTestCase.java   
public void testTermsEnum() throws Exception {
    fillExtendedMvSet();
    writer.forceMerge(1);
    List<LeafReaderContext> atomicReaderContexts = refreshReader();

    IndexOrdinalsFieldData ifd = getForField("value");
    for (LeafReaderContext atomicReaderContext : atomicReaderContexts) {
        AtomicOrdinalsFieldData afd = ifd.load(atomicReaderContext);

        TermsEnum termsEnum = afd.getOrdinalsValues().termsEnum();
        int size = 0;
        while (termsEnum.next() != null) {
            size++;
        }
        assertThat(size, equalTo(12));

        assertThat(termsEnum.seekExact(new BytesRef("10")), is(true));
        assertThat(termsEnum.term().utf8ToString(), equalTo("10"));
        assertThat(termsEnum.next(), nullValue());

        assertThat(termsEnum.seekExact(new BytesRef("08")), is(true));
        assertThat(termsEnum.term().utf8ToString(), equalTo("08"));
        size = 0;
        while (termsEnum.next() != null) {
            size++;
        }
        assertThat(size, equalTo(2));

        termsEnum.seekExact(8);
        assertThat(termsEnum.term().utf8ToString(), equalTo("07"));
        size = 0;
        while (termsEnum.next() != null) {
            size++;
        }
        assertThat(size, equalTo(3));
    }
}
项目:elasticsearch_my    文件:TextFieldMapperTests.java   
public void testDefaultPositionIncrementGap() throws IOException {
    String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
            .startObject("properties").startObject("field").field("type", "text").endObject().endObject()
            .endObject().endObject().string();

    DocumentMapper mapper = indexService.mapperService().merge("type",
            new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE, false);

    assertEquals(mapping, mapper.mappingSource().toString());

    ParsedDocument doc = mapper.parse("test", "type", "1", XContentFactory.jsonBuilder()
            .startObject()
            .array("field", new String[] {"a", "b"})
            .endObject()
            .bytes());

    IndexableField[] fields = doc.rootDoc().getFields("field");
    assertEquals(2, fields.length);

    assertEquals("a", fields[0].stringValue());
    assertEquals("b", fields[1].stringValue());

    IndexShard shard = indexService.getShard(0);
    shard.index(new Engine.Index(new Term("_uid", doc.uid() ), doc));
    shard.refresh("test");
    try (Engine.Searcher searcher = shard.acquireSearcher("test")) {
        LeafReader leaf = searcher.getDirectoryReader().leaves().get(0).reader();
        TermsEnum terms = leaf.terms("field").iterator();
        assertTrue(terms.seekExact(new BytesRef("b")));
        PostingsEnum postings = terms.postings(null, PostingsEnum.POSITIONS);
        assertEquals(0, postings.nextDoc());
        assertEquals(TextFieldMapper.Defaults.POSITION_INCREMENT_GAP + 1, postings.nextPosition());
    }
}
项目:elasticsearch_my    文件:TextFieldMapperTests.java   
public void testPositionIncrementGap() throws IOException {
    final int positionIncrementGap = randomIntBetween(1, 1000);
    String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
            .startObject("properties").startObject("field")
                .field("type", "text")
                .field("position_increment_gap", positionIncrementGap)
            .endObject().endObject()
            .endObject().endObject().string();

    DocumentMapper mapper = indexService.mapperService().merge("type",
            new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE, false);

    assertEquals(mapping, mapper.mappingSource().toString());

    ParsedDocument doc = mapper.parse("test", "type", "1", XContentFactory.jsonBuilder()
            .startObject()
            .array("field", new String[] {"a", "b"})
            .endObject()
            .bytes());

    IndexableField[] fields = doc.rootDoc().getFields("field");
    assertEquals(2, fields.length);

    assertEquals("a", fields[0].stringValue());
    assertEquals("b", fields[1].stringValue());

    IndexShard shard = indexService.getShard(0);
    shard.index(new Engine.Index(new Term("_uid", doc.uid()), doc));
    shard.refresh("test");
    try (Engine.Searcher searcher = shard.acquireSearcher("test")) {
        LeafReader leaf = searcher.getDirectoryReader().leaves().get(0).reader();
        TermsEnum terms = leaf.terms("field").iterator();
        assertTrue(terms.seekExact(new BytesRef("b")));
        PostingsEnum postings = terms.postings(null, PostingsEnum.POSITIONS);
        assertEquals(0, postings.nextDoc());
        assertEquals(positionIncrementGap + 1, postings.nextPosition());
    }
}
项目:elasticsearch_my    文件:GetTermVectorsIT.java   
private void checkBrownFoxTermVector(Fields fields, String fieldName, boolean withPayloads) throws IOException {
    String[] values = {"brown", "dog", "fox", "jumps", "lazy", "over", "quick", "the"};
    int[] freq = {1, 1, 1, 1, 1, 1, 1, 2};
    int[][] pos = {{2}, {8}, {3}, {4}, {7}, {5}, {1}, {0, 6}};
    int[][] startOffset = {{10}, {40}, {16}, {20}, {35}, {26}, {4}, {0, 31}};
    int[][] endOffset = {{15}, {43}, {19}, {25}, {39}, {30}, {9}, {3, 34}};

    Terms terms = fields.terms(fieldName);
    assertThat(terms.size(), equalTo(8L));
    TermsEnum iterator = terms.iterator();
    for (int j = 0; j < values.length; j++) {
        String string = values[j];
        BytesRef next = iterator.next();
        assertThat(next, notNullValue());
        assertThat("expected " + string, string, equalTo(next.utf8ToString()));
        assertThat(next, notNullValue());
        // do not test ttf or doc frequency, because here we have many
        // shards and do not know how documents are distributed
        PostingsEnum docsAndPositions = iterator.postings(null, PostingsEnum.ALL);
        assertThat(docsAndPositions.nextDoc(), equalTo(0));
        assertThat(freq[j], equalTo(docsAndPositions.freq()));
        int[] termPos = pos[j];
        int[] termStartOffset = startOffset[j];
        int[] termEndOffset = endOffset[j];
        assertThat(termPos.length, equalTo(freq[j]));
        assertThat(termStartOffset.length, equalTo(freq[j]));
        assertThat(termEndOffset.length, equalTo(freq[j]));
        for (int k = 0; k < freq[j]; k++) {
            int nextPosition = docsAndPositions.nextPosition();
            assertThat("term: " + string, nextPosition, equalTo(termPos[k]));
            assertThat("term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k]));
            assertThat("term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k]));
            if (withPayloads) {
                assertThat("term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word")));
            }
        }
    }
    assertThat(iterator.next(), nullValue());
}
项目:elasticsearch_my    文件:GetTermVectorsIT.java   
private void compareTermVectors(String fieldName, Fields fields0, Fields fields1) throws IOException {
    Terms terms0 = fields0.terms(fieldName);
    Terms terms1 = fields1.terms(fieldName);
    assertThat(terms0, notNullValue());
    assertThat(terms1, notNullValue());
    assertThat(terms0.size(), equalTo(terms1.size()));

    TermsEnum iter0 = terms0.iterator();
    TermsEnum iter1 = terms1.iterator();
    for (int i = 0; i < terms0.size(); i++) {
        BytesRef next0 = iter0.next();
        assertThat(next0, notNullValue());
        BytesRef next1 = iter1.next();
        assertThat(next1, notNullValue());

        // compare field value
        String string0 = next0.utf8ToString();
        String string1 = next1.utf8ToString();
        assertThat("expected: " + string0, string0, equalTo(string1));

        // compare df and ttf
        assertThat("term: " + string0, iter0.docFreq(), equalTo(iter1.docFreq()));
        assertThat("term: " + string0, iter0.totalTermFreq(), equalTo(iter1.totalTermFreq()));

        // compare freq and docs
        PostingsEnum docsAndPositions0 = iter0.postings(null, PostingsEnum.ALL);
        PostingsEnum docsAndPositions1 = iter1.postings(null, PostingsEnum.ALL);
        assertThat("term: " + string0, docsAndPositions0.nextDoc(), equalTo(docsAndPositions1.nextDoc()));
        assertThat("term: " + string0, docsAndPositions0.freq(), equalTo(docsAndPositions1.freq()));

        // compare position, start offsets and end offsets
        for (int j = 0; j < docsAndPositions0.freq(); j++) {
            assertThat("term: " + string0, docsAndPositions0.nextPosition(), equalTo(docsAndPositions1.nextPosition()));
            assertThat("term: " + string0, docsAndPositions0.startOffset(), equalTo(docsAndPositions1.startOffset()));
            assertThat("term: " + string0, docsAndPositions0.endOffset(), equalTo(docsAndPositions1.endOffset()));
        }
    }
    assertThat(iter0.next(), nullValue());
    assertThat(iter1.next(), nullValue());
}
项目:Elasticsearch    文件:OrdinalsBuilder.java   
/**
 * A {@link TermsEnum} that iterates only full precision prefix coded 32 bit values.
 *
 * @see #buildFromTerms(TermsEnum)
 */
public static TermsEnum wrapNumeric32Bit(TermsEnum termsEnum) {
    return new FilteredTermsEnum(termsEnum, false) {

        @Override
        protected AcceptStatus accept(BytesRef term) throws IOException {
            // we stop accepting terms once we moved across the prefix codec terms - redundant values!
            return NumericUtils.getPrefixCodedIntShift(term) == 0 ? AcceptStatus.YES : AcceptStatus.END;
        }
    };
}
项目:elasticsearch_my    文件:GetTermVectorsIT.java   
private void checkBestTerms(Terms terms, List<String> expectedTerms) throws IOException {
    final TermsEnum termsEnum = terms.iterator();
    List<String> bestTerms = new ArrayList<>();
    BytesRef text;
    while((text = termsEnum.next()) != null) {
        bestTerms.add(text.utf8ToString());
    }
    Collections.sort(expectedTerms);
    Collections.sort(bestTerms);
    assertArrayEquals(expectedTerms.toArray(), bestTerms.toArray());
}
项目:lams    文件:Lucene40TermVectorsReader.java   
@Override
public TermsEnum iterator(TermsEnum reuse) throws IOException {
  TVTermsEnum termsEnum;
  if (reuse instanceof TVTermsEnum) {
    termsEnum = (TVTermsEnum) reuse;
    if (!termsEnum.canReuse(tvf)) {
      termsEnum = new TVTermsEnum();
    }
  } else {
    termsEnum = new TVTermsEnum();
  }
  termsEnum.reset(numTerms, tvfFPStart, storePositions, storeOffsets, storePayloads);
  return termsEnum;
}
项目:lams    文件:Lucene45DocValuesProducer.java   
TermsEnum getTermsEnum() {
  try {
    return getTermsEnum(data.clone());
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
}
项目:lams    文件:Lucene49DocValuesProducer.java   
TermsEnum getTermsEnum() {
  try {
    return getTermsEnum(data.clone());
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
}
项目:ir-generalized-translation-models    文件:AugmentedTermQuery.java   
/**
 * Returns a {@link TermsEnum} positioned at this weights Term or null if
 * the mainTerm does not exist in the given context
 */
private TermsEnum getTermsEnum(LeafReaderContext context,TermContext termContext,Term term) throws IOException {
    if (termContext != null) {
        // TermQuery either used as a Query or the mainTerm states have been provided at construction time
        assert termContext.wasBuiltFor(ReaderUtil.getTopLevelContext(context)) : "The top-reader used to create Weight is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);
        final TermState state = termContext.get(context.ord);
        if (state == null) { // mainTerm is not present in that reader
项目:lams    文件:CompressingTermVectorsReader.java   
@Override
public TermsEnum iterator(TermsEnum reuse) throws IOException {
  final TVTermsEnum termsEnum;
  if (reuse != null && reuse instanceof TVTermsEnum) {
    termsEnum = (TVTermsEnum) reuse;
  } else {
    termsEnum = new TVTermsEnum();
  }
  termsEnum.reset(numTerms, flags, prefixLengths, suffixLengths, termFreqs, positionIndex, positions, startOffsets, lengths,
      payloadIndex, payloadBytes,
      new ByteArrayDataInput(termBytes.bytes, termBytes.offset, termBytes.length));
  return termsEnum;
}
项目:lams    文件:TermQuery.java   
/**
 * Returns a {@link TermsEnum} positioned at this weights Term or null if
 * the term does not exist in the given context
 */
private TermsEnum getTermsEnum(AtomicReaderContext context) throws IOException {
  final TermState state = termStates.get(context.ord);
  if (state == null) { // term is not present in that reader
    assert termNotInReader(context.reader(), term) : "no termstate found but term exists in reader term=" + term;
    return null;
  }
  //System.out.println("LD=" + reader.getLiveDocs() + " set?=" + (reader.getLiveDocs() != null ? reader.getLiveDocs().get(0) : "null"));
  final TermsEnum termsEnum = context.reader().terms(term.field()).iterator(null);
  termsEnum.seekExact(term.bytes(), state);
  return termsEnum;
}
项目:lams    文件:NumericRangeQuery.java   
@Override @SuppressWarnings("unchecked")
protected TermsEnum getTermsEnum(final Terms terms, AttributeSource atts) throws IOException {
  // very strange: java.lang.Number itself is not Comparable, but all subclasses used here are
  if (min != null && max != null && ((Comparable<T>) min).compareTo(max) > 0) {
    return TermsEnum.EMPTY;
  }
  return new NumericRangeTermsEnum(terms.iterator(null));
}
项目:lams    文件:TermRangeQuery.java   
@Override
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
  if (lowerTerm != null && upperTerm != null && lowerTerm.compareTo(upperTerm) > 0) {
    return TermsEnum.EMPTY;
  }

  TermsEnum tenum = terms.iterator(null);

  if ((lowerTerm == null || (includeLower && lowerTerm.length == 0)) && upperTerm == null) {
    return tenum;
  }
  return new TermRangeTermsEnum(tenum,
      lowerTerm, upperTerm, includeLower, includeUpper);
}