/** * This method iterates all terms in the given {@link TermsEnum} and * associates each terms ordinal with the terms documents. The caller must * exhaust the returned {@link BytesRefIterator} which returns all values * where the first returned value is associated with the ordinal <tt>1</tt> * etc. * <p> * If the {@link TermsEnum} contains prefix coded numerical values the terms * enum should be wrapped with either {@link #wrapNumeric32Bit(TermsEnum)} * or {@link #wrapNumeric64Bit(TermsEnum)} depending on its precision. If * the {@link TermsEnum} is not wrapped the returned * {@link BytesRefIterator} will contain partial precision terms rather than * only full-precision terms. * </p> */ public BytesRefIterator buildFromTerms(final TermsEnum termsEnum) throws IOException { return new BytesRefIterator() { private PostingsEnum docsEnum = null; @Override public BytesRef next() throws IOException { BytesRef ref; if ((ref = termsEnum.next()) != null) { docsEnum = termsEnum.postings(docsEnum, PostingsEnum.NONE); nextOrdinal(); int docId; while ((docId = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { addDoc(docId); } } return ref; } }; }
protected TermsEnum filter(Terms terms, TermsEnum iterator, LeafReader reader) throws IOException { if (iterator == null) { return null; } int docCount = terms.getDocCount(); if (docCount == -1) { docCount = reader.maxDoc(); } if (docCount >= minSegmentSize) { final int minFreq = minFrequency > 1.0 ? (int) minFrequency : (int)(docCount * minFrequency); final int maxFreq = maxFrequency > 1.0 ? (int) maxFrequency : (int)(docCount * maxFrequency); if (minFreq > 1 || maxFreq < docCount) { iterator = new FrequencyFilter(iterator, minFreq, maxFreq); } } return iterator; }
private PostingsEnum writeTermWithDocsAndPos(TermsEnum iterator, PostingsEnum docsAndPosEnum, boolean positions, boolean offsets, boolean payloads) throws IOException { docsAndPosEnum = iterator.postings(docsAndPosEnum, PostingsEnum.ALL); // for each term (iterator next) in this field (field) // iterate over the docs (should only be one) int nextDoc = docsAndPosEnum.nextDoc(); assert nextDoc != DocIdSetIterator.NO_MORE_DOCS; final int freq = docsAndPosEnum.freq(); writeFreq(freq); for (int j = 0; j < freq; j++) { int curPos = docsAndPosEnum.nextPosition(); if (positions) { writePosition(curPos); } if (offsets) { writeOffsets(docsAndPosEnum.startOffset(), docsAndPosEnum.endOffset()); } if (payloads) { writePayload(docsAndPosEnum.getPayload()); } } nextDoc = docsAndPosEnum.nextDoc(); assert nextDoc == DocIdSetIterator.NO_MORE_DOCS; return docsAndPosEnum; }
private void buildTerm(XContentBuilder builder, final CharsRefBuilder spare, Terms curTerms, TermsEnum termIter, BoostAttribute boostAtt) throws IOException { // start term, optimized writing BytesRef term = termIter.next(); spare.copyUTF8Bytes(term); builder.startObject(spare.toString()); buildTermStatistics(builder, termIter); // finally write the term vectors PostingsEnum posEnum = termIter.postings(null, PostingsEnum.ALL); int termFreq = posEnum.freq(); builder.field(FieldStrings.TERM_FREQ, termFreq); initMemory(curTerms, termFreq); initValues(curTerms, posEnum, termFreq); buildValues(builder, curTerms, termFreq); buildScore(builder, boostAtt); builder.endObject(); }
public void testNRTSearchOnClosedWriter() throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); DirectoryReader reader = DirectoryReader.open(indexWriter); for (int i = 0; i < 100; i++) { Document document = new Document(); TextField field = new TextField("_id", Integer.toString(i), Field.Store.YES); field.setBoost(i); document.add(field); indexWriter.addDocument(document); } reader = refreshReader(reader); indexWriter.close(); TermsEnum termDocs = SlowCompositeReaderWrapper.wrap(reader).terms("_id").iterator(); termDocs.next(); }
public UnionDocsAndPositionsEnum(Bits liveDocs, AtomicReaderContext context, Term[] terms, Map<Term,TermContext> termContexts, TermsEnum termsEnum) throws IOException { List<DocsAndPositionsEnum> docsEnums = new LinkedList<>(); for (int i = 0; i < terms.length; i++) { final Term term = terms[i]; TermState termState = termContexts.get(term).get(context.ord); if (termState == null) { // Term doesn't exist in reader continue; } termsEnum.seekExact(term.bytes(), termState); DocsAndPositionsEnum postings = termsEnum.docsAndPositions(liveDocs, null, DocsEnum.FLAG_NONE); if (postings == null) { // term does exist, but has no positions throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")"); } cost += postings.cost(); docsEnums.add(postings); } _queue = new DocsQueue(docsEnums); _posList = new IntQueue(); }
/** * Enumerates all terms greater/equal than <code>lowerTerm</code> * but less/equal than <code>upperTerm</code>. * * If an endpoint is null, it is said to be "open". Either or both * endpoints may be open. Open endpoints may not be exclusive * (you can't select all but the first or last term without * explicitly specifying the term to exclude.) * * @param tenum * TermsEnum to filter * @param lowerTerm * The term text at the lower end of the range * @param upperTerm * The term text at the upper end of the range * @param includeLower * If true, the <code>lowerTerm</code> is included in the range. * @param includeUpper * If true, the <code>upperTerm</code> is included in the range. */ public TermRangeTermsEnum(TermsEnum tenum, BytesRef lowerTerm, BytesRef upperTerm, boolean includeLower, boolean includeUpper) { super(tenum); // do a little bit of normalization... // open ended range queries should always be inclusive. if (lowerTerm == null) { this.lowerBytesRef = new BytesRef(); this.includeLower = true; } else { this.lowerBytesRef = lowerTerm; this.includeLower = includeLower; } if (upperTerm == null) { this.includeUpper = true; upperBytesRef = null; } else { this.includeUpper = includeUpper; upperBytesRef = upperTerm; } setInitialSeekTerm(lowerBytesRef); termComp = getComparator(); }
/** * Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for the * given selection of fields from terms with a document frequency greater than * the given maxDocFreq * * @param delegate Analyzer whose TokenStream will be filtered * @param indexReader IndexReader to identify the stopwords from * @param fields Selection of fields to calculate stopwords for * @param maxDocFreq Document frequency terms should be above in order to be stopwords * @throws IOException Can be thrown while reading from the IndexReader */ public QueryAutoStopWordAnalyzer( Analyzer delegate, IndexReader indexReader, Collection<String> fields, int maxDocFreq) throws IOException { super(delegate.getReuseStrategy()); this.delegate = delegate; for (String field : fields) { Set<String> stopWords = new HashSet<>(); Terms terms = MultiFields.getTerms(indexReader, field); CharsRefBuilder spare = new CharsRefBuilder(); if (terms != null) { TermsEnum te = terms.iterator(null); BytesRef text; while ((text = te.next()) != null) { if (te.docFreq() > maxDocFreq) { spare.copyUTF8Bytes(text); stopWords.add(spare.toString()); } } } stopWordsPerField.put(field, stopWords); } }
/** Return a {@link TermsEnum} intersecting the provided {@link Terms} * with the terms accepted by this automaton. */ public TermsEnum getTermsEnum(Terms terms) throws IOException { switch(type) { case NONE: return TermsEnum.EMPTY; case ALL: return terms.iterator(null); case SINGLE: return new SingleTermsEnum(terms.iterator(null), term); case PREFIX: // TODO: this is very likely faster than .intersect, // but we should test and maybe cutover return new PrefixTermsEnum(terms.iterator(null), term); case NORMAL: return terms.intersect(this, null); default: // unreachable throw new RuntimeException("unhandled case"); } }
public DfsOnlyRequest(Fields termVectorsFields, String[] indices, String[] types, Set<String> selectedFields) throws IOException { super(indices); // build a search request with a query of all the terms final BoolQueryBuilder boolBuilder = boolQuery(); for (String fieldName : termVectorsFields) { if ((selectedFields != null) && (!selectedFields.contains(fieldName))) { continue; } Terms terms = termVectorsFields.terms(fieldName); TermsEnum iterator = terms.iterator(); while (iterator.next() != null) { String text = iterator.term().utf8ToString(); boolBuilder.should(QueryBuilders.termQuery(fieldName, text)); } } // wrap a search request object this.searchRequest = new SearchRequest(indices).types(types).source(new SearchSourceBuilder().query(boolBuilder)); }
@Override public void visitMatchingTerms( IndexReader reader, String fieldName, MatchingTermVisitor mtv) throws IOException { /* check term presence in index here for symmetry with other SimpleTerm's */ Terms terms = MultiFields.getTerms(reader, fieldName); if (terms != null) { TermsEnum termsEnum = terms.iterator(null); TermsEnum.SeekStatus status = termsEnum.seekCeil(new BytesRef(getTermText())); if (status == TermsEnum.SeekStatus.FOUND) { mtv.visitMatchingTerm(getLuceneTerm(fieldName)); } } }
Query createCandidateQuery(IndexReader indexReader) throws IOException { List<BytesRef> extractedTerms = new ArrayList<>(); LeafReader reader = indexReader.leaves().get(0).reader(); Fields fields = reader.fields(); for (String field : fields) { Terms terms = fields.terms(field); if (terms == null) { continue; } BytesRef fieldBr = new BytesRef(field); TermsEnum tenum = terms.iterator(); for (BytesRef term = tenum.next(); term != null; term = tenum.next()) { BytesRefBuilder builder = new BytesRefBuilder(); builder.append(fieldBr); builder.append(FIELD_VALUE_SEPARATOR); builder.append(term); extractedTerms.add(builder.toBytesRef()); } } Query extractionSuccess = new TermInSetQuery(queryTermsField.name(), extractedTerms); // include extractionResultField:failed, because docs with this term have no extractedTermsField // and otherwise we would fail to return these docs. Docs that failed query term extraction // always need to be verified by MemoryIndex: Query extractionFailure = new TermQuery(new Term(extractionResultField.name(), EXTRACTION_FAILED)); return new BooleanQuery.Builder() .add(extractionSuccess, Occur.SHOULD) .add(extractionFailure, Occur.SHOULD) .build(); }
/** * Returns a DocIdSet per segments containing the matching docs for the specified slice. */ private DocIdSet build(LeafReader reader) throws IOException { final DocIdSetBuilder builder = new DocIdSetBuilder(reader.maxDoc()); final Terms terms = reader.terms(getField()); final TermsEnum te = terms.iterator(); PostingsEnum docsEnum = null; for (BytesRef term = te.next(); term != null; term = te.next()) { int hashCode = term.hashCode(); if (contains(hashCode)) { docsEnum = te.postings(docsEnum, PostingsEnum.NONE); builder.add(docsEnum); } } return builder.build(); }
public RamAccountingTermsEnum(TermsEnum termsEnum, CircuitBreaker breaker, AbstractIndexFieldData.PerValueEstimator estimator, String fieldName) { super(termsEnum); this.breaker = breaker; this.termsEnum = termsEnum; this.estimator = estimator; this.fieldName = fieldName; this.totalBytes = 0; this.flushBuffer = 0; }
/** * A {@link TermsEnum} that iterates only highest resolution geo prefix coded terms. * * @see #buildFromTerms(TermsEnum) */ public static TermsEnum wrapGeoPointTerms(TermsEnum termsEnum) { return new FilteredTermsEnum(termsEnum, false) { @Override protected AcceptStatus accept(BytesRef term) throws IOException { // accept only the max resolution terms // todo is this necessary? return GeoPointField.getPrefixCodedShift(term) == GeoPointField.PRECISION_STEP * 4 ? AcceptStatus.YES : AcceptStatus.END; } }; }
private void buildField(XContentBuilder builder, final CharsRefBuilder spare, Fields theFields, Iterator<String> fieldIter) throws IOException { String fieldName = fieldIter.next(); builder.startObject(fieldName); Terms curTerms = theFields.terms(fieldName); // write field statistics buildFieldStatistics(builder, curTerms); builder.startObject(FieldStrings.TERMS); TermsEnum termIter = curTerms.iterator(); BoostAttribute boostAtt = termIter.attributes().addAttribute(BoostAttribute.class); for (int i = 0; i < curTerms.size(); i++) { buildTerm(builder, spare, curTerms, termIter, boostAtt); } builder.endObject(); builder.endObject(); }
/** * Determine whether the BlockTreeTermsReader.FieldReader can be used * for estimating the field data, adding the estimate to the circuit * breaker if it can, otherwise wrapping the terms in a * RamAccountingTermsEnum to be estimated on a per-term basis. * * @param terms terms to be estimated * @return A possibly wrapped TermsEnum for the terms */ @Override public TermsEnum beforeLoad(Terms terms) throws IOException { LeafReader reader = context.reader(); TermsEnum iterator = terms.iterator(); TermsEnum filteredIterator = filter(terms, iterator, reader); final boolean filtered = iterator != filteredIterator; iterator = filteredIterator; if (filtered) { if (logger.isTraceEnabled()) { logger.trace("Filter exists, can't circuit break normally, using RamAccountingTermsEnum"); } return new RamAccountingTermsEnum(iterator, breaker, this, this.fieldName); } else { estimatedBytes = this.estimateStringFieldData(); // If we weren't able to estimate, wrap in the RamAccountingTermsEnum if (estimatedBytes == 0) { iterator = new RamAccountingTermsEnum(iterator, breaker, this, this.fieldName); } else { breaker.addEstimateBytesAndMaybeBreak(estimatedBytes, fieldName); } return iterator; } }
/** * Adds terms and frequencies found in vector into the Map termFreqMap * * @param termFreqMap a Map of terms and their frequencies * @param vector List of terms and their frequencies for a doc/field * @param fieldName Optional field name of the terms for skip terms */ private void addTermFrequencies(Map<String, Int> termFreqMap, Terms vector, @Nullable String fieldName) throws IOException { final TermsEnum termsEnum = vector.iterator(); final CharsRefBuilder spare = new CharsRefBuilder(); BytesRef text; while((text = termsEnum.next()) != null) { spare.copyUTF8Bytes(text); final String term = spare.toString(); if (isNoiseWord(term)) { continue; } if (isSkipTerm(fieldName, term)) { continue; } final PostingsEnum docs = termsEnum.postings(null); int freq = 0; while(docs != null && docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { freq += docs.freq(); } // increment frequency Int cnt = termFreqMap.get(term); if (cnt == null) { cnt = new Int(); termFreqMap.put(term, cnt); cnt.x = freq; } else { cnt.x += freq; } } }
private void getPrefixTerms(ObjectHashSet<Term> terms, final Term prefix, final IndexReader reader) throws IOException { // SlowCompositeReaderWrapper could be used... but this would merge all terms from each segment into one terms // instance, which is very expensive. Therefore I think it is better to iterate over each leaf individually. List<LeafReaderContext> leaves = reader.leaves(); for (LeafReaderContext leaf : leaves) { Terms _terms = leaf.reader().terms(field); if (_terms == null) { continue; } TermsEnum termsEnum = _terms.iterator(); TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(prefix.bytes()); if (TermsEnum.SeekStatus.END == seekStatus) { continue; } for (BytesRef term = termsEnum.term(); term != null; term = termsEnum.next()) { if (!StringHelper.startsWith(term, prefix.bytes())) { break; } terms.add(new Term(field, BytesRef.deepCopyOf(term))); if (terms.size() >= maxExpansions) { return; } } } }
private PostingsEnum writeTermWithDocsOnly(TermsEnum iterator, PostingsEnum docsEnum) throws IOException { docsEnum = iterator.postings(docsEnum); int nextDoc = docsEnum.nextDoc(); assert nextDoc != DocIdSetIterator.NO_MORE_DOCS; writeFreq(docsEnum.freq()); nextDoc = docsEnum.nextDoc(); assert nextDoc == DocIdSetIterator.NO_MORE_DOCS; return docsEnum; }
private void writeTermStatistics(TermsEnum topLevelIterator) throws IOException { int docFreq = topLevelIterator.docFreq(); assert (docFreq >= -1); writePotentiallyNegativeVInt(docFreq); long ttf = topLevelIterator.totalTermFreq(); assert (ttf >= -1); writePotentiallyNegativeVLong(ttf); }
/** * A {@link TermsEnum} that iterates only full precision prefix coded 64 bit values. * * @see #buildFromTerms(TermsEnum) */ public static TermsEnum wrapNumeric64Bit(TermsEnum termsEnum) { return new FilteredTermsEnum(termsEnum, false) { @Override protected AcceptStatus accept(BytesRef term) throws IOException { // we stop accepting terms once we moved across the prefix codec terms - redundant values! return NumericUtils.getPrefixCodedLongShift(term) == 0 ? AcceptStatus.YES : AcceptStatus.END; } }; }
public void testTermsEnum() throws Exception { fillExtendedMvSet(); writer.forceMerge(1); List<LeafReaderContext> atomicReaderContexts = refreshReader(); IndexOrdinalsFieldData ifd = getForField("value"); for (LeafReaderContext atomicReaderContext : atomicReaderContexts) { AtomicOrdinalsFieldData afd = ifd.load(atomicReaderContext); TermsEnum termsEnum = afd.getOrdinalsValues().termsEnum(); int size = 0; while (termsEnum.next() != null) { size++; } assertThat(size, equalTo(12)); assertThat(termsEnum.seekExact(new BytesRef("10")), is(true)); assertThat(termsEnum.term().utf8ToString(), equalTo("10")); assertThat(termsEnum.next(), nullValue()); assertThat(termsEnum.seekExact(new BytesRef("08")), is(true)); assertThat(termsEnum.term().utf8ToString(), equalTo("08")); size = 0; while (termsEnum.next() != null) { size++; } assertThat(size, equalTo(2)); termsEnum.seekExact(8); assertThat(termsEnum.term().utf8ToString(), equalTo("07")); size = 0; while (termsEnum.next() != null) { size++; } assertThat(size, equalTo(3)); } }
public void testDefaultPositionIncrementGap() throws IOException { String mapping = XContentFactory.jsonBuilder().startObject().startObject("type") .startObject("properties").startObject("field").field("type", "text").endObject().endObject() .endObject().endObject().string(); DocumentMapper mapper = indexService.mapperService().merge("type", new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE, false); assertEquals(mapping, mapper.mappingSource().toString()); ParsedDocument doc = mapper.parse("test", "type", "1", XContentFactory.jsonBuilder() .startObject() .array("field", new String[] {"a", "b"}) .endObject() .bytes()); IndexableField[] fields = doc.rootDoc().getFields("field"); assertEquals(2, fields.length); assertEquals("a", fields[0].stringValue()); assertEquals("b", fields[1].stringValue()); IndexShard shard = indexService.getShard(0); shard.index(new Engine.Index(new Term("_uid", doc.uid() ), doc)); shard.refresh("test"); try (Engine.Searcher searcher = shard.acquireSearcher("test")) { LeafReader leaf = searcher.getDirectoryReader().leaves().get(0).reader(); TermsEnum terms = leaf.terms("field").iterator(); assertTrue(terms.seekExact(new BytesRef("b"))); PostingsEnum postings = terms.postings(null, PostingsEnum.POSITIONS); assertEquals(0, postings.nextDoc()); assertEquals(TextFieldMapper.Defaults.POSITION_INCREMENT_GAP + 1, postings.nextPosition()); } }
public void testPositionIncrementGap() throws IOException { final int positionIncrementGap = randomIntBetween(1, 1000); String mapping = XContentFactory.jsonBuilder().startObject().startObject("type") .startObject("properties").startObject("field") .field("type", "text") .field("position_increment_gap", positionIncrementGap) .endObject().endObject() .endObject().endObject().string(); DocumentMapper mapper = indexService.mapperService().merge("type", new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE, false); assertEquals(mapping, mapper.mappingSource().toString()); ParsedDocument doc = mapper.parse("test", "type", "1", XContentFactory.jsonBuilder() .startObject() .array("field", new String[] {"a", "b"}) .endObject() .bytes()); IndexableField[] fields = doc.rootDoc().getFields("field"); assertEquals(2, fields.length); assertEquals("a", fields[0].stringValue()); assertEquals("b", fields[1].stringValue()); IndexShard shard = indexService.getShard(0); shard.index(new Engine.Index(new Term("_uid", doc.uid()), doc)); shard.refresh("test"); try (Engine.Searcher searcher = shard.acquireSearcher("test")) { LeafReader leaf = searcher.getDirectoryReader().leaves().get(0).reader(); TermsEnum terms = leaf.terms("field").iterator(); assertTrue(terms.seekExact(new BytesRef("b"))); PostingsEnum postings = terms.postings(null, PostingsEnum.POSITIONS); assertEquals(0, postings.nextDoc()); assertEquals(positionIncrementGap + 1, postings.nextPosition()); } }
private void checkBrownFoxTermVector(Fields fields, String fieldName, boolean withPayloads) throws IOException { String[] values = {"brown", "dog", "fox", "jumps", "lazy", "over", "quick", "the"}; int[] freq = {1, 1, 1, 1, 1, 1, 1, 2}; int[][] pos = {{2}, {8}, {3}, {4}, {7}, {5}, {1}, {0, 6}}; int[][] startOffset = {{10}, {40}, {16}, {20}, {35}, {26}, {4}, {0, 31}}; int[][] endOffset = {{15}, {43}, {19}, {25}, {39}, {30}, {9}, {3, 34}}; Terms terms = fields.terms(fieldName); assertThat(terms.size(), equalTo(8L)); TermsEnum iterator = terms.iterator(); for (int j = 0; j < values.length; j++) { String string = values[j]; BytesRef next = iterator.next(); assertThat(next, notNullValue()); assertThat("expected " + string, string, equalTo(next.utf8ToString())); assertThat(next, notNullValue()); // do not test ttf or doc frequency, because here we have many // shards and do not know how documents are distributed PostingsEnum docsAndPositions = iterator.postings(null, PostingsEnum.ALL); assertThat(docsAndPositions.nextDoc(), equalTo(0)); assertThat(freq[j], equalTo(docsAndPositions.freq())); int[] termPos = pos[j]; int[] termStartOffset = startOffset[j]; int[] termEndOffset = endOffset[j]; assertThat(termPos.length, equalTo(freq[j])); assertThat(termStartOffset.length, equalTo(freq[j])); assertThat(termEndOffset.length, equalTo(freq[j])); for (int k = 0; k < freq[j]; k++) { int nextPosition = docsAndPositions.nextPosition(); assertThat("term: " + string, nextPosition, equalTo(termPos[k])); assertThat("term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k])); assertThat("term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k])); if (withPayloads) { assertThat("term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word"))); } } } assertThat(iterator.next(), nullValue()); }
private void compareTermVectors(String fieldName, Fields fields0, Fields fields1) throws IOException { Terms terms0 = fields0.terms(fieldName); Terms terms1 = fields1.terms(fieldName); assertThat(terms0, notNullValue()); assertThat(terms1, notNullValue()); assertThat(terms0.size(), equalTo(terms1.size())); TermsEnum iter0 = terms0.iterator(); TermsEnum iter1 = terms1.iterator(); for (int i = 0; i < terms0.size(); i++) { BytesRef next0 = iter0.next(); assertThat(next0, notNullValue()); BytesRef next1 = iter1.next(); assertThat(next1, notNullValue()); // compare field value String string0 = next0.utf8ToString(); String string1 = next1.utf8ToString(); assertThat("expected: " + string0, string0, equalTo(string1)); // compare df and ttf assertThat("term: " + string0, iter0.docFreq(), equalTo(iter1.docFreq())); assertThat("term: " + string0, iter0.totalTermFreq(), equalTo(iter1.totalTermFreq())); // compare freq and docs PostingsEnum docsAndPositions0 = iter0.postings(null, PostingsEnum.ALL); PostingsEnum docsAndPositions1 = iter1.postings(null, PostingsEnum.ALL); assertThat("term: " + string0, docsAndPositions0.nextDoc(), equalTo(docsAndPositions1.nextDoc())); assertThat("term: " + string0, docsAndPositions0.freq(), equalTo(docsAndPositions1.freq())); // compare position, start offsets and end offsets for (int j = 0; j < docsAndPositions0.freq(); j++) { assertThat("term: " + string0, docsAndPositions0.nextPosition(), equalTo(docsAndPositions1.nextPosition())); assertThat("term: " + string0, docsAndPositions0.startOffset(), equalTo(docsAndPositions1.startOffset())); assertThat("term: " + string0, docsAndPositions0.endOffset(), equalTo(docsAndPositions1.endOffset())); } } assertThat(iter0.next(), nullValue()); assertThat(iter1.next(), nullValue()); }
/** * A {@link TermsEnum} that iterates only full precision prefix coded 32 bit values. * * @see #buildFromTerms(TermsEnum) */ public static TermsEnum wrapNumeric32Bit(TermsEnum termsEnum) { return new FilteredTermsEnum(termsEnum, false) { @Override protected AcceptStatus accept(BytesRef term) throws IOException { // we stop accepting terms once we moved across the prefix codec terms - redundant values! return NumericUtils.getPrefixCodedIntShift(term) == 0 ? AcceptStatus.YES : AcceptStatus.END; } }; }
private void checkBestTerms(Terms terms, List<String> expectedTerms) throws IOException { final TermsEnum termsEnum = terms.iterator(); List<String> bestTerms = new ArrayList<>(); BytesRef text; while((text = termsEnum.next()) != null) { bestTerms.add(text.utf8ToString()); } Collections.sort(expectedTerms); Collections.sort(bestTerms); assertArrayEquals(expectedTerms.toArray(), bestTerms.toArray()); }
@Override public TermsEnum iterator(TermsEnum reuse) throws IOException { TVTermsEnum termsEnum; if (reuse instanceof TVTermsEnum) { termsEnum = (TVTermsEnum) reuse; if (!termsEnum.canReuse(tvf)) { termsEnum = new TVTermsEnum(); } } else { termsEnum = new TVTermsEnum(); } termsEnum.reset(numTerms, tvfFPStart, storePositions, storeOffsets, storePayloads); return termsEnum; }
TermsEnum getTermsEnum() { try { return getTermsEnum(data.clone()); } catch (IOException e) { throw new RuntimeException(e); } }
/** * Returns a {@link TermsEnum} positioned at this weights Term or null if * the mainTerm does not exist in the given context */ private TermsEnum getTermsEnum(LeafReaderContext context,TermContext termContext,Term term) throws IOException { if (termContext != null) { // TermQuery either used as a Query or the mainTerm states have been provided at construction time assert termContext.wasBuiltFor(ReaderUtil.getTopLevelContext(context)) : "The top-reader used to create Weight is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context); final TermState state = termContext.get(context.ord); if (state == null) { // mainTerm is not present in that reader
@Override public TermsEnum iterator(TermsEnum reuse) throws IOException { final TVTermsEnum termsEnum; if (reuse != null && reuse instanceof TVTermsEnum) { termsEnum = (TVTermsEnum) reuse; } else { termsEnum = new TVTermsEnum(); } termsEnum.reset(numTerms, flags, prefixLengths, suffixLengths, termFreqs, positionIndex, positions, startOffsets, lengths, payloadIndex, payloadBytes, new ByteArrayDataInput(termBytes.bytes, termBytes.offset, termBytes.length)); return termsEnum; }
/** * Returns a {@link TermsEnum} positioned at this weights Term or null if * the term does not exist in the given context */ private TermsEnum getTermsEnum(AtomicReaderContext context) throws IOException { final TermState state = termStates.get(context.ord); if (state == null) { // term is not present in that reader assert termNotInReader(context.reader(), term) : "no termstate found but term exists in reader term=" + term; return null; } //System.out.println("LD=" + reader.getLiveDocs() + " set?=" + (reader.getLiveDocs() != null ? reader.getLiveDocs().get(0) : "null")); final TermsEnum termsEnum = context.reader().terms(term.field()).iterator(null); termsEnum.seekExact(term.bytes(), state); return termsEnum; }
@Override @SuppressWarnings("unchecked") protected TermsEnum getTermsEnum(final Terms terms, AttributeSource atts) throws IOException { // very strange: java.lang.Number itself is not Comparable, but all subclasses used here are if (min != null && max != null && ((Comparable<T>) min).compareTo(max) > 0) { return TermsEnum.EMPTY; } return new NumericRangeTermsEnum(terms.iterator(null)); }
@Override protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException { if (lowerTerm != null && upperTerm != null && lowerTerm.compareTo(upperTerm) > 0) { return TermsEnum.EMPTY; } TermsEnum tenum = terms.iterator(null); if ((lowerTerm == null || (includeLower && lowerTerm.length == 0)) && upperTerm == null) { return tenum; } return new TermRangeTermsEnum(tenum, lowerTerm, upperTerm, includeLower, includeUpper); }