Java 类org.apache.lucene.index.memory.MemoryIndex 实例源码

项目:elasticsearch_my    文件:CandidateQueryTests.java   
private void duelRun(PercolateQuery.QueryStore queryStore, MemoryIndex memoryIndex, IndexSearcher shardSearcher) throws IOException {
    boolean requireScore = randomBoolean();
    IndexSearcher percolateSearcher = memoryIndex.createSearcher();
    Query percolateQuery = fieldType.percolateQuery("type", queryStore, new BytesArray("{}"), percolateSearcher);
    Query query = requireScore ? percolateQuery : new ConstantScoreQuery(percolateQuery);
    TopDocs topDocs = shardSearcher.search(query, 10);

    Query controlQuery = new ControlQuery(memoryIndex, queryStore);
    controlQuery = requireScore ? controlQuery : new ConstantScoreQuery(controlQuery);
    TopDocs controlTopDocs = shardSearcher.search(controlQuery, 10);
    assertThat(topDocs.totalHits, equalTo(controlTopDocs.totalHits));
    assertThat(topDocs.scoreDocs.length, equalTo(controlTopDocs.scoreDocs.length));
    for (int j = 0; j < topDocs.scoreDocs.length; j++) {
        assertThat(topDocs.scoreDocs[j].doc, equalTo(controlTopDocs.scoreDocs[j].doc));
        assertThat(topDocs.scoreDocs[j].score, equalTo(controlTopDocs.scoreDocs[j].score));
        if (requireScore) {
            Explanation explain1 = shardSearcher.explain(query, topDocs.scoreDocs[j].doc);
            Explanation explain2 = shardSearcher.explain(controlQuery, controlTopDocs.scoreDocs[j].doc);
            assertThat(explain1.isMatch(), equalTo(explain2.isMatch()));
            assertThat(explain1.getValue(), equalTo(explain2.getValue()));
        }
    }
}
项目:Elasticsearch    文件:ShardTermVectorsService.java   
private Fields generateTermVectors(Collection<GetField> getFields, boolean withOffsets, @Nullable Map<String, String> perFieldAnalyzer, Set<String> fields)
        throws IOException {
    /* store document in memory index */
    MemoryIndex index = new MemoryIndex(withOffsets);
    for (GetField getField : getFields) {
        String field = getField.getName();
        if (fields.contains(field) == false) {
            // some fields are returned even when not asked for, eg. _timestamp
            continue;
        }
        Analyzer analyzer = getAnalyzerAtField(field, perFieldAnalyzer);
        for (Object text : getField.getValues()) {
            index.addField(field, text.toString(), analyzer);
        }
    }
    /* and read vectors from it */
    return MultiFields.getFields(index.createSearcher().getIndexReader());
}
项目:Elasticsearch    文件:MultiDocumentPercolatorIndex.java   
MemoryIndex indexDoc(ParseContext.Document d, Analyzer analyzer, MemoryIndex memoryIndex) {
    for (IndexableField field : d.getFields()) {
        if (field.fieldType().indexOptions() == IndexOptions.NONE && field.name().equals(UidFieldMapper.NAME)) {
            continue;
        }
        try {
            // TODO: instead of passing null here, we can have a CTL<Map<String,TokenStream>> and pass previous,
            // like the indexer does
            try (TokenStream tokenStream = field.tokenStream(analyzer, null)) {
                if (tokenStream != null) {
                    memoryIndex.addField(field.name(), tokenStream, field.boost());
                }
             }
        } catch (IOException e) {
            throw new ElasticsearchException("Failed to create token stream", e);
        }
    }
    return memoryIndex;
}
项目:Elasticsearch    文件:SingleDocumentPercolatorIndex.java   
@Override
public void prepare(PercolateContext context, ParsedDocument parsedDocument) {
    MemoryIndex memoryIndex = cache.get();
    for (IndexableField field : parsedDocument.rootDoc().getFields()) {
        if (field.fieldType().indexOptions() == IndexOptions.NONE && field.name().equals(UidFieldMapper.NAME)) {
            continue;
        }
        try {
            Analyzer analyzer = context.mapperService().documentMapper(parsedDocument.type()).mappers().indexAnalyzer();
            // TODO: instead of passing null here, we can have a CTL<Map<String,TokenStream>> and pass previous,
            // like the indexer does
            try (TokenStream tokenStream = field.tokenStream(analyzer, null)) {
                if (tokenStream != null) {
                    memoryIndex.addField(field.name(), tokenStream, field.boost());
                }
             }
        } catch (Exception e) {
            throw new ElasticsearchException("Failed to create token stream for [" + field.name() + "]", e);
        }
    }
    context.initialize(new DocEngineSearcher(memoryIndex), parsedDocument);
}
项目:Test_Projects    文件:TestLuceneQueryInMap.java   
public static void main(String[] args) throws ParseException {
     Analyzer analyzer = new StandardAnalyzer();
     MemoryIndex index = new MemoryIndex();
     Map<String, String> event = new HashMap<String, String>();
     event.put("content", "Readings about Salmons and other select Alaska fishing Manuals");
     event.put("author", "Tales of James");

     for(Entry<String, String> entry : event.entrySet()){
         index.addField(entry.getKey(), entry.getValue(),analyzer);
     }

     QueryParser parser = new QueryParser("content", analyzer);
     Query query = parser.parse("+author:james +salmon~ +fish* manual~");

     float score = index.search(query);
     if (score > 0.0f) {
         System.out.println("it's a match");
     } else {
         System.out.println("no match found");
     }
     System.out.println("indexData=" + index.toString());

}
项目:opensearchserver    文件:Classifier.java   
private void bestScoreClassification(Client client, IndexDocument document, LanguageEnum lang, MemoryIndex index)
        throws ParseException, SearchLibException, SyntaxError, IOException {
    ClassifierItem selectedItem = null;
    float maxScore = 0;
    for (ClassifierItem item : valueSet) {
        float score = item.score(client, lang, index);
        if (score > maxScore) {
            selectedItem = item;
            maxScore = score;
        }
    }
    if (selectedItem != null) {
        document.add(getFieldName(), selectedItem.getValue(), selectedItem.getBoost());
        if (scoreFieldName != null && scoreFieldName.length() > 0) {
            document.addString(scoreFieldName, scoreFormat.format(maxScore));
        }
    } else {
        if (defaultValue != null && defaultValue.length() > 0)
            document.add(fieldName, defaultValue, 1.0F);

    }
}
项目:opensearchserver    文件:Classifier.java   
public void classification(Client client, IndexDocument document)
        throws SearchLibException, ParseException, SyntaxError, IOException {
    rwl.r.lock();
    try {
        MemoryIndex index = new MemoryIndex();
        LanguageEnum lang = document.getLang();
        Analyzer analyzer = client.getSchema().getIndexPerFieldAnalyzer(lang);
        for (FieldContent fieldContent : document) {
            String fieldName = fieldContent.getField();
            String concatValues = fieldContent.getMergedValues(" ");
            index.addField(fieldName, concatValues, analyzer);
        }
        if (method == ClassificationMethodEnum.MULTIVALUED)
            multivaluedClassification(client, document, lang, index);
        else if (method == ClassificationMethodEnum.BESTSCORE)
            bestScoreClassification(client, document, lang, index);

    } finally {
        rwl.r.unlock();
    }
}
项目:elasticsearch_my    文件:PercolatorFieldMapperTests.java   
public void testCreateCandidateQuery() throws Exception {
    addQueryMapping();

    MemoryIndex memoryIndex = new MemoryIndex(false);
    memoryIndex.addField("field1", "the quick brown fox jumps over the lazy dog", new WhitespaceAnalyzer());
    memoryIndex.addField("field2", "some more text", new WhitespaceAnalyzer());
    memoryIndex.addField("_field3", "unhide me", new WhitespaceAnalyzer());
    memoryIndex.addField("field4", "123", new WhitespaceAnalyzer());
    memoryIndex.addField(new LongPoint("number_field", 10L), new WhitespaceAnalyzer());

    IndexReader indexReader = memoryIndex.createSearcher().getIndexReader();

    BooleanQuery candidateQuery = (BooleanQuery) fieldType.createCandidateQuery(indexReader);
    assertEquals(2, candidateQuery.clauses().size());
    assertEquals(Occur.SHOULD, candidateQuery.clauses().get(0).getOccur());
    TermInSetQuery termsQuery = (TermInSetQuery) candidateQuery.clauses().get(0).getQuery();

    PrefixCodedTerms terms = termsQuery.getTermData();
    assertThat(terms.size(), equalTo(14L));
    PrefixCodedTerms.TermIterator termIterator = terms.iterator();
    assertTermIterator(termIterator, "_field3\u0000me", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "_field3\u0000unhide", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field1\u0000brown", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field1\u0000dog", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field1\u0000fox", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field1\u0000jumps", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field1\u0000lazy", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field1\u0000over", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field1\u0000quick", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field1\u0000the", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field2\u0000more", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field2\u0000some", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field2\u0000text", fieldType.queryTermsField.name());
    assertTermIterator(termIterator, "field4\u0000123", fieldType.queryTermsField.name());

    assertEquals(Occur.SHOULD, candidateQuery.clauses().get(1).getOccur());
    assertEquals(new TermQuery(new Term(fieldType.extractionResultField.name(), EXTRACTION_FAILED)),
            candidateQuery.clauses().get(1).getQuery());
}
项目:elasticsearch_my    文件:TemplateQueryBuilderTests.java   
@Override
protected Query rewrite(Query query) throws IOException {
    // TemplateQueryBuilder adds some optimization if the template and query builder have boosts / query names that wraps
    // the actual QueryBuilder that comes from the template into a BooleanQueryBuilder to give it an outer boost / name
    // this causes some queries to be not exactly equal but equivalent such that we need to rewrite them before comparing.
    if (query != null) {
        MemoryIndex idx = new MemoryIndex();
        return idx.createSearcher().rewrite(query);
    }
    return new MatchAllDocsQuery(); // null == *:*
}
项目:elasticsearch_my    文件:WrapperQueryBuilderTests.java   
@Override
protected Query rewrite(Query query) throws IOException {
    // WrapperQueryBuilder adds some optimization if the wrapper and query builder have boosts / query names that wraps
    // the actual QueryBuilder that comes from the binary blob into a BooleanQueryBuilder to give it an outer boost / name
    // this causes some queries to be not exactly equal but equivalent such that we need to rewrite them before comparing.
    if (query != null) {
        MemoryIndex idx = new MemoryIndex();
        return idx.createSearcher().rewrite(query);
    }
    return new MatchAllDocsQuery(); // null == *:*
}
项目:elasticsearch_my    文件:MoreLikeThisQueryBuilderTests.java   
/**
 * Here we could go overboard and use a pre-generated indexed random document for a given Item,
 * but for now we'd prefer to simply return the id as the content of the document and that for
 * every field.
 */
private static Fields generateFields(String[] fieldNames, String text) throws IOException {
    MemoryIndex index = new MemoryIndex();
    for (String fieldName : fieldNames) {
        index.addField(fieldName, text, new WhitespaceAnalyzer());
    }
    return MultiFields.getFields(index.createSearcher().getIndexReader());
}
项目:Elasticsearch    文件:PercolatorService.java   
@Inject
public PercolatorService(Settings settings, IndexNameExpressionResolver indexNameExpressionResolver, IndicesService indicesService,
                         PageCacheRecycler pageCacheRecycler, BigArrays bigArrays,
                         HighlightPhase highlightPhase, ClusterService clusterService,
                         AggregationPhase aggregationPhase, ScriptService scriptService,
                         MappingUpdatedAction mappingUpdatedAction) {
    super(settings);
    this.indexNameExpressionResolver = indexNameExpressionResolver;
    this.parseFieldMatcher = new ParseFieldMatcher(settings);
    this.indicesService = indicesService;
    this.pageCacheRecycler = pageCacheRecycler;
    this.bigArrays = bigArrays;
    this.clusterService = clusterService;
    this.highlightPhase = highlightPhase;
    this.aggregationPhase = aggregationPhase;
    this.scriptService = scriptService;
    this.mappingUpdatedAction = mappingUpdatedAction;
    this.sortParseElement = new SortParseElement();

    final long maxReuseBytes = settings.getAsBytesSize("indices.memory.memory_index.size_per_thread", new ByteSizeValue(1, ByteSizeUnit.MB)).bytes();
    cache = new CloseableThreadLocal<MemoryIndex>() {
        @Override
        protected MemoryIndex initialValue() {
            // TODO: should we expose payloads as an option? should offsets be turned on always?
            return new ExtendedMemoryIndex(true, false, maxReuseBytes);
        }
    };
    single = new SingleDocumentPercolatorIndex(cache);
    multi = new MultiDocumentPercolatorIndex(cache);

    percolatorTypes = new IntObjectHashMap<>(6);
    percolatorTypes.put(countPercolator.id(), countPercolator);
    percolatorTypes.put(queryCountPercolator.id(), queryCountPercolator);
    percolatorTypes.put(matchPercolator.id(), matchPercolator);
    percolatorTypes.put(queryPercolator.id(), queryPercolator);
    percolatorTypes.put(scoringPercolator.id(), scoringPercolator);
    percolatorTypes.put(topMatchingPercolator.id(), topMatchingPercolator);
}
项目:lucene4ir    文件:ExampleStatsApp.java   
public void buildTermVector(int docid) throws IOException {
    /*

    */

    Set<String> fieldList = new HashSet<>();
    fieldList.add("content");

    Document doc = reader.document(docid, fieldList);
    MemoryIndex mi = MemoryIndex.fromDocument(doc, new StandardAnalyzer());
    IndexReader mr = mi.createSearcher().getIndexReader();

    Terms t = mr.leaves().get(0).reader().terms("content");

    if ((t != null) && (t.size()>0)) {
        TermsEnum te = t.iterator();
        BytesRef term = null;

        System.out.println(t.size());

        while ((term = te.next()) != null) {
            System.out.println("BytesRef: " + term.utf8ToString());
            System.out.println("docFreq: " + te.docFreq());
            System.out.println("totalTermFreq: " + te.totalTermFreq());

        }

    }
}
项目:solr-redis    文件:TestTaggedQuery.java   
@Test
public void testRewrite() throws IOException {
  MemoryIndex memoryIndex = new MemoryIndex();

  TaggedQuery taggedQuery = new TaggedQuery(new TermQuery(new Term("field", "value")), "tag");
  Query rewrittenQuery = taggedQuery.rewrite(memoryIndex.createSearcher().getTopReaderContext().reader());

  assertTrue(rewrittenQuery instanceof TermQuery);
  assertEquals("field", ((TermQuery) rewrittenQuery).getTerm().field());
  assertEquals("value", ((TermQuery) rewrittenQuery).getTerm().text());
}
项目:Test_Projects    文件:TestLuceneIndexThenSearchOptions.java   
protected static void search(MemoryIndex index, Query query) {
    float score = index.search(query);
    if (score > 0.0f) {
        System.out.println("it's a match for " + query);
    } else {
        System.out.println("no match found for " + query);
    }
}
项目:Test_Projects    文件:TestLuceneIndexThenSearch.java   
public static void main(String args[]) throws ParseException, IOException{
    MemoryIndex index = new MemoryIndex();
    Analyzer analyzer = new StandardAnalyzer();
    StringField field3 = new StringField(AUTHOR, FULL_NAME, Store.YES);
    index.addField(field3, analyzer);

    Query query = new TermQuery(new Term(AUTHOR,FULL_NAME));
    search(index,query);

    query = new TermQuery(new Term(AUTHOR,FIRST_NAME));
    search(index,query);

    query = new TermQuery(new Term(AUTHOR,LAST_NAME));
    search(index,query);    
}
项目:Test_Projects    文件:TestLuceneIndexThenSearch.java   
protected static void search(MemoryIndex index, Query query) {
    float score = index.search(query);
    if (score > 0.0f) {
        System.out.println("it's a match for " + query);
    } else {
        System.out.println("no match found for " + query);
    }
}
项目:opensearchserver    文件:Classifier.java   
private void multivaluedClassification(Client client, IndexDocument document, LanguageEnum lang, MemoryIndex index)
        throws ParseException, SearchLibException, SyntaxError, IOException {
    boolean setDefaultValue = defaultValue != null && defaultValue.length() > 0;
    for (ClassifierItem item : valueSet) {
        float score = item.score(client, lang, index);
        if (score > 0.0f) {
            document.add(fieldName, item.getValue(), item.getBoost());
            if (scoreFieldName != null && scoreFieldName.length() > 0)
                document.addString(scoreFieldName, Float.toString(score));
            setDefaultValue = false;
        }
    }
    if (setDefaultValue)
        document.add(fieldName, defaultValue, 1.0F);
}
项目:opensearchserver    文件:ClassifierItem.java   
protected final float score(Client client, LanguageEnum lang,
        MemoryIndex index) throws ParseException, SearchLibException,
        SyntaxError, IOException {
    Query qry = queryMap.get(lang);
    if (qry == null) {
        AbstractSearchRequest searchRequest = getSearchRequest(client, lang);
        qry = searchRequest.getQuery();
        queryMap.put(lang, qry);
    }
    return index.search(qry);
}
项目:opensearchserver    文件:Fragment.java   
public final double searchScore(final String fieldName,
        final CompiledAnalyzer analyzer, final Query query) {
    searchScore = 0;
    if (query == null || analyzer == null)
        return 0;
    MemoryIndex index = new MemoryIndex();
    index.addField(fieldName, originalText, analyzer);
    searchScore = index.search(query);
    return searchScore;
}
项目:elasticsearch_my    文件:CandidateQueryTests.java   
public void testDuelSpecificQueries() throws Exception {
    List<ParseContext.Document> documents = new ArrayList<>();

    CommonTermsQuery commonTermsQuery = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, 128);
    commonTermsQuery.add(new Term("field", "quick"));
    commonTermsQuery.add(new Term("field", "brown"));
    commonTermsQuery.add(new Term("field", "fox"));
    addQuery(commonTermsQuery, documents);

    BlendedTermQuery blendedTermQuery = BlendedTermQuery.booleanBlendedQuery(new Term[]{new Term("field", "quick"),
            new Term("field", "brown"), new Term("field", "fox")}, false);
    addQuery(blendedTermQuery, documents);

    SpanNearQuery spanNearQuery = new SpanNearQuery.Builder("field", true)
            .addClause(new SpanTermQuery(new Term("field", "quick")))
            .addClause(new SpanTermQuery(new Term("field", "brown")))
            .addClause(new SpanTermQuery(new Term("field", "fox")))
            .build();
    addQuery(spanNearQuery, documents);

    SpanNearQuery spanNearQuery2 = new SpanNearQuery.Builder("field", true)
            .addClause(new SpanTermQuery(new Term("field", "the")))
            .addClause(new SpanTermQuery(new Term("field", "lazy")))
            .addClause(new SpanTermQuery(new Term("field", "doc")))
            .build();
    SpanOrQuery spanOrQuery = new SpanOrQuery(
            spanNearQuery,
            spanNearQuery2
    );
    addQuery(spanOrQuery, documents);

    SpanNotQuery spanNotQuery = new SpanNotQuery(spanNearQuery, spanNearQuery);
    addQuery(spanNotQuery, documents);

    long lowerLong = randomIntBetween(0, 256);
    long upperLong = lowerLong + randomIntBetween(0, 32);
    addQuery(LongPoint.newRangeQuery("long_field", lowerLong, upperLong), documents);

    indexWriter.addDocuments(documents);
    indexWriter.close();
    directoryReader = DirectoryReader.open(directory);
    IndexSearcher shardSearcher = newSearcher(directoryReader);
    // Disable query cache, because ControlQuery cannot be cached...
    shardSearcher.setQueryCache(null);

    Document document = new Document();
    document.add(new TextField("field", "the quick brown fox jumps over the lazy dog", Field.Store.NO));
    long randomLong = randomIntBetween((int) lowerLong, (int) upperLong);
    document.add(new LongPoint("long_field", randomLong));
    MemoryIndex memoryIndex = MemoryIndex.fromDocument(document, new WhitespaceAnalyzer());
    duelRun(queryStore, memoryIndex, shardSearcher);
}
项目:elasticsearch_my    文件:CandidateQueryTests.java   
private ControlQuery(MemoryIndex memoryIndex, PercolateQuery.QueryStore queryStore) {
    this.memoryIndex = memoryIndex;
    this.queryStore = queryStore;
}
项目:Elasticsearch    文件:MultiDocumentPercolatorIndex.java   
MultiDocumentPercolatorIndex(CloseableThreadLocal<MemoryIndex> cache) {
    this.cache = cache;
}
项目:Elasticsearch    文件:MultiDocumentPercolatorIndex.java   
private DocSearcher(IndexSearcher searcher, MemoryIndex rootDocMemoryIndex) {
    super("percolate", searcher);
    this.rootDocMemoryIndex = rootDocMemoryIndex;
}
项目:Elasticsearch    文件:SingleDocumentPercolatorIndex.java   
SingleDocumentPercolatorIndex(CloseableThreadLocal<MemoryIndex> cache) {
    this.cache = cache;
}
项目:Elasticsearch    文件:SingleDocumentPercolatorIndex.java   
public DocEngineSearcher(MemoryIndex memoryIndex) {
    super("percolate", memoryIndex.createSearcher());
    this.memoryIndex = memoryIndex;
}
项目:lucene4ir    文件:ExampleStatsApp.java   
public Map<String, Map<String, List<Integer>>> buildTermVectorWithPosition(int docid, Set<String> fields) throws IOException {

            Map<String, Map<String, List<Integer>>> fieldToTermVector = new HashMap<>();

            Document doc = reader.document(docid, fields);

            MemoryIndex mi = MemoryIndex.fromDocument(doc, new StandardAnalyzer());
            IndexReader mr = mi.createSearcher().getIndexReader();

            for (LeafReaderContext leafContext : mr.leaves()) {

                LeafReader leaf = leafContext.reader();

                for (String field : fields) {
                    Map<String, List<Integer>> termToPositions = new HashMap<>();

                    Terms t = leaf.terms(field);

                    if(t != null) {
                        fieldToTermVector.put(field, termToPositions);
                        TermsEnum tenum = t.iterator();

                        BytesRef termBytes = null;
                        PostingsEnum postings = null;
                        while ((termBytes = tenum.next()) != null) {

                            List<Integer> positions = new ArrayList<>();
                            termToPositions.put(termBytes.utf8ToString(), positions);
                            postings = tenum.postings(postings);
                            postings.advance(0);

                            for (int i = 0; i < postings.freq(); i++) {
                                positions.add(postings.nextPosition());
                            }
                        }
                    }
                }

            }
            return fieldToTermVector;
    }