public void testMMapDirectory() throws IOException { long start = System.currentTimeMillis(); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig .OpenMode.CREATE); FSDirectory open = FSDirectory.open(Paths.get("E:/testlucene")); IndexWriter indexWriter = new IndexWriter(open, indexWriterConfig); for (int i = 0; i < 10000000; i++) { indexWriter.addDocument(addDocument(i)); } indexWriter.commit(); indexWriter.close(); long end = System.currentTimeMillis(); log.error("MMapDirectory consumes {}s!", (end - start) / 1000); start = System.currentTimeMillis(); IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(open)); int total = 0; for (int i = 0; i < 10000000; i++) { TermQuery key1 = new TermQuery(new Term("key1", "key" + i)); TopDocs search = indexSearcher.search(key1, 10); total += search.totalHits; } System.out.println(total); end = System.currentTimeMillis(); log.error("MMapDirectory search consumes {}ms!", (end - start)); }
private void duelRun(PercolateQuery.QueryStore queryStore, MemoryIndex memoryIndex, IndexSearcher shardSearcher) throws IOException { boolean requireScore = randomBoolean(); IndexSearcher percolateSearcher = memoryIndex.createSearcher(); Query percolateQuery = fieldType.percolateQuery("type", queryStore, new BytesArray("{}"), percolateSearcher); Query query = requireScore ? percolateQuery : new ConstantScoreQuery(percolateQuery); TopDocs topDocs = shardSearcher.search(query, 10); Query controlQuery = new ControlQuery(memoryIndex, queryStore); controlQuery = requireScore ? controlQuery : new ConstantScoreQuery(controlQuery); TopDocs controlTopDocs = shardSearcher.search(controlQuery, 10); assertThat(topDocs.totalHits, equalTo(controlTopDocs.totalHits)); assertThat(topDocs.scoreDocs.length, equalTo(controlTopDocs.scoreDocs.length)); for (int j = 0; j < topDocs.scoreDocs.length; j++) { assertThat(topDocs.scoreDocs[j].doc, equalTo(controlTopDocs.scoreDocs[j].doc)); assertThat(topDocs.scoreDocs[j].score, equalTo(controlTopDocs.scoreDocs[j].score)); if (requireScore) { Explanation explain1 = shardSearcher.explain(query, topDocs.scoreDocs[j].doc); Explanation explain2 = shardSearcher.explain(controlQuery, controlTopDocs.scoreDocs[j].doc); assertThat(explain1.isMatch(), equalTo(explain2.isMatch())); assertThat(explain1.getValue(), equalTo(explain2.getValue())); } } }
/** * Returns lucene's document id for the given id in the given {@link IIndexTypeConf} * @param typeConf the 188.166.43.201 to fins. * @param id the id to find. * @return the id or 0 if document was not found. */ public int getDocIdForId(final IIndexTypeConf typeConf, final String id) { final SearchOptions params = new SearchOptions(); params.setMaxResults(1); final BooleanQuery query = new BooleanQuery(); QueryUtil.addTypeConf(query, typeConf); QueryUtil.addId(query, id); final TopDocs topDocs = IndexSearch.getInstance().getTopDocs(query, params); if(topDocs.totalHits == 0) { throw new IllegalStateException("Can't find news with id " + id + " in news index."); } else if(topDocs.totalHits > 1) { LOGGER.warn("Found more than one result for news with id " + id + " in news index. " + "This is an invalid state. Using the first found document."); } return topDocs.scoreDocs[0].doc; }
/** * Returns the amount of {@link News} which are assigned to news groups (news group id > 0) * for the given {@link Query}. */ private int getAmountOfNewsInNewsGroups(final Query filterQuery) { final BooleanQuery query = new BooleanQuery(); query.add(filterQuery, Occur.MUST); // get only news that are in real groups (newsGroupId > 0) final NumericRangeQuery<Long> newsGroupFilterQuery = NumericRangeQuery.newLongRange( NewsIndexType.FIELD_NEWSGROUPID, 0l, null, false, true); query.add(newsGroupFilterQuery, Occur.MUST); final SearchOptions options = new SearchOptions(); options.setMaxResults(0); // we only want the totalHits, not the results. final TopDocs topDocs = IndexSearch.getInstance().getTopDocs(query, options); return topDocs.totalHits; }
private void checkIndexContent(final String elementId, final String fieldContent, final int expectedAmount) throws IOException { final IndexReader reader = IndexManager.getInstance().getIndex().getIndexReader(); final IndexSearcher searcher = new IndexSearcher(reader); final TopDocs topDocs = searcher.search(new TermQuery(new Term(FIELDNAME, fieldContent)), expectedAmount + 10); assertNotNull(topDocs); assertTrue(topDocs.totalHits == expectedAmount); if(expectedAmount > 0) { final ScoreDoc scoreDoc = topDocs.scoreDocs[0]; assertNotNull(scoreDoc); final Document doc = reader.document(scoreDoc.doc); assertNotNull(doc); assertEquals(fieldContent, doc.get(FIELDNAME)); assertEquals(elementId, doc.get(IIndexElement.FIELD_ID)); assertEquals(INDEX_TYPE, doc.get(IIndexElement.FIELD_INDEX_TYPE)); } }
@Override public ScoreDoc[] prefixSearch(String q) throws IOException { if (StringUtils.isEmpty(q) || q.length() > appConfig.getKeywordMaxLength()) { logger.error("empty keywords or over-length! {}", q); return null; } final TopDocs[] rstTopDocs = new TopDocs[2]; final Query nameFldQuery = new PrefixQuery(new Term(NAME.getName(), q)); rstTopDocs[0] = indexSearcher.search(nameFldQuery, appConfig.getQuickTipsNum() * 2, sort); final Query downLoadRankQuery = NumericRangeQuery.newIntRange(DOWNOLOAD_RANK.getName(), MIN_DOWNLOAD_RANK, Integer.MAX_VALUE, true, false); //从下载量最高的1000条记录中,再过滤符合关键字的记录 rstTopDocs[1] = indexSearcher.search(downLoadRankQuery, MAX_TOP, sort); TopDocs rst = TopDocsUtil.mergeDuplicateDocId(TopDocs.merge(sort, MAX_TOP + appConfig.getQuickTipsNum() * 2, rstTopDocs)); if(rst != null) { return rst.scoreDocs; } return null; }
/** Modifies incoming TopDocs (in) by replacing the top hits with resorted's hits, and then resorting all hits. */ private TopDocs combine(TopDocs in, TopDocs resorted, QueryRescoreContext ctx) { System.arraycopy(resorted.scoreDocs, 0, in.scoreDocs, 0, resorted.scoreDocs.length); if (in.scoreDocs.length > resorted.scoreDocs.length) { // These hits were not rescored (beyond the rescore window), so we treat them the same as a hit that did get rescored but did // not match the 2nd pass query: for(int i=resorted.scoreDocs.length;i<in.scoreDocs.length;i++) { // TODO: shouldn't this be up to the ScoreMode? I.e., we should just invoke ScoreMode.combine, passing 0.0f for the // secondary score? in.scoreDocs[i].score *= ctx.queryWeight(); } // TODO: this is wrong, i.e. we are comparing apples and oranges at this point. It would be better if we always rescored all // incoming first pass hits, instead of allowing recoring of just the top subset: Arrays.sort(in.scoreDocs, SCORE_DOC_COMPARATOR); } // update the max score after the resort in.setMaxScore(in.scoreDocs[0].score); return in; }
public void testMultiPhrasePrefixQuery() throws Exception { Analyzer analyzer = new StandardAnalyzer(); Directory dir = newDirectory(); String value = "The quick brown fox."; IndexReader ir = indexOneDoc(dir, "text", value, analyzer); MultiPhrasePrefixQuery query = new MultiPhrasePrefixQuery(); query.add(new Term("text", "quick")); query.add(new Term("text", "brown")); query.add(new Term("text", "fo")); IndexSearcher searcher = newSearcher(ir); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertThat(topDocs.totalHits, equalTo(1)); int docId = topDocs.scoreDocs[0].doc; CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder()); CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, analyzer, passageFormatter, null, value, false); Snippet[] snippets = highlighter.highlightField("text", query, docId, 5); assertThat(snippets.length, equalTo(1)); assertThat(snippets[0].getText(), equalTo("The <b>quick</b> <b>brown</b> <b>fox</b>.")); ir.close(); dir.close(); }
public void testAllTermQuery() throws IOException { Directory dir = newDirectory(); String value = "The quick brown fox."; Analyzer analyzer = new StandardAnalyzer(); IndexReader ir = indexOneDoc(dir, "all", value, analyzer); AllTermQuery query = new AllTermQuery(new Term("all", "fox")); IndexSearcher searcher = newSearcher(ir); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertThat(topDocs.totalHits, equalTo(1)); int docId = topDocs.scoreDocs[0].doc; CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder()); CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, analyzer, passageFormatter, null, value, false); Snippet[] snippets = highlighter.highlightField("all", query, docId, 5); assertThat(snippets.length, equalTo(1)); assertThat(snippets[0].getText(), equalTo("The quick brown <b>fox</b>.")); ir.close(); dir.close(); }
public void testCommonTermsQuery() throws IOException { Directory dir = newDirectory(); String value = "The quick brown fox."; Analyzer analyzer = new StandardAnalyzer(); IndexReader ir = indexOneDoc(dir, "text", value, analyzer); CommonTermsQuery query = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, 128); query.add(new Term("text", "quick")); query.add(new Term("text", "brown")); query.add(new Term("text", "fox")); IndexSearcher searcher = newSearcher(ir); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertThat(topDocs.totalHits, equalTo(1)); int docId = topDocs.scoreDocs[0].doc; CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder()); CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, analyzer, passageFormatter, null, value, false); Snippet[] snippets = highlighter.highlightField("text", query, docId, 5); assertThat(snippets.length, equalTo(1)); assertThat(snippets[0].getText(), equalTo("The <b>quick</b> <b>brown</b> <b>fox</b>.")); ir.close(); dir.close(); }
public Map<String, Integer> search(String word, String field, int maxSearch) { if (indexSearcher == null) { initialize(index); } Map<String, Integer> verbFreqs = new HashMap<>(); QueryParser queryParser = new QueryParser(Version.LUCENE_36, field, analyzer); try { Query query = queryParser.parse(word); TopDocs topDocs = indexSearcher.search(query, maxSearch); ScoreDoc[] doc = topDocs.scoreDocs; for (int i = 0; i < maxSearch && i < doc.length; ++i) { int documentId = doc[i].doc; Document document = indexSearcher.doc(documentId); String verb = document.get(VERB); String frequency = document.get(FREQ); verbFreqs.put(verb, Integer.parseInt(frequency)); } } catch (ParseException | IOException e) { log.warn("Error searching Lucene index.", e); } return verbFreqs; }
@Override protected void assertAvgScoreMode(Query parentFilter, IndexSearcher searcher) throws IOException { MultiValueMode sortMode = MultiValueMode.AVG; Query childFilter = Queries.not(parentFilter); XFieldComparatorSource nestedComparatorSource = createFieldComparator("field2", sortMode, -127, createNested(searcher, parentFilter, childFilter)); Query query = new ToParentBlockJoinQuery(new ConstantScoreQuery(childFilter), new QueryBitSetProducer(parentFilter), ScoreMode.None); Sort sort = new Sort(new SortField("field2", nestedComparatorSource)); TopDocs topDocs = searcher.search(query, 5, sort); assertThat(topDocs.totalHits, equalTo(7)); assertThat(topDocs.scoreDocs.length, equalTo(5)); assertThat(topDocs.scoreDocs[0].doc, equalTo(11)); assertThat(((Number) ((FieldDoc) topDocs.scoreDocs[0]).fields[0]).intValue(), equalTo(2)); assertThat(topDocs.scoreDocs[1].doc, equalTo(7)); assertThat(((Number) ((FieldDoc) topDocs.scoreDocs[1]).fields[0]).intValue(), equalTo(2)); assertThat(topDocs.scoreDocs[2].doc, equalTo(3)); assertThat(((Number) ((FieldDoc) topDocs.scoreDocs[2]).fields[0]).intValue(), equalTo(3)); assertThat(topDocs.scoreDocs[3].doc, equalTo(15)); assertThat(((Number) ((FieldDoc) topDocs.scoreDocs[3]).fields[0]).intValue(), equalTo(3)); assertThat(topDocs.scoreDocs[4].doc, equalTo(19)); assertThat(((Number) ((FieldDoc) topDocs.scoreDocs[4]).fields[0]).intValue(), equalTo(3)); }
protected void assertAvgScoreMode(Query parentFilter, IndexSearcher searcher, IndexFieldData.XFieldComparatorSource innerFieldComparator) throws IOException { MultiValueMode sortMode = MultiValueMode.AVG; Query childFilter = Queries.not(parentFilter); XFieldComparatorSource nestedComparatorSource = createFieldComparator("field2", sortMode, -127, createNested(searcher, parentFilter, childFilter)); Query query = new ToParentBlockJoinQuery(new ConstantScoreQuery(childFilter), new QueryBitSetProducer(parentFilter), ScoreMode.None); Sort sort = new Sort(new SortField("field2", nestedComparatorSource)); TopDocs topDocs = searcher.search(query, 5, sort); assertThat(topDocs.totalHits, equalTo(7)); assertThat(topDocs.scoreDocs.length, equalTo(5)); assertThat(topDocs.scoreDocs[0].doc, equalTo(11)); assertThat(((Number) ((FieldDoc) topDocs.scoreDocs[0]).fields[0]).intValue(), equalTo(2)); assertThat(topDocs.scoreDocs[1].doc, equalTo(7)); assertThat(((Number) ((FieldDoc) topDocs.scoreDocs[1]).fields[0]).intValue(), equalTo(2)); assertThat(topDocs.scoreDocs[2].doc, equalTo(3)); assertThat(((Number) ((FieldDoc) topDocs.scoreDocs[2]).fields[0]).intValue(), equalTo(3)); assertThat(topDocs.scoreDocs[3].doc, equalTo(15)); assertThat(((Number) ((FieldDoc) topDocs.scoreDocs[3]).fields[0]).intValue(), equalTo(3)); assertThat(topDocs.scoreDocs[4].doc, equalTo(19)); assertThat(((Number) ((FieldDoc) topDocs.scoreDocs[4]).fields[0]).intValue(), equalTo(3)); }
protected void assertAvgScoreMode(Query parentFilter, IndexSearcher searcher) throws IOException { MultiValueMode sortMode = MultiValueMode.AVG; Query childFilter = Queries.not(parentFilter); XFieldComparatorSource nestedComparatorSource = createFieldComparator("field2", sortMode, -127, createNested(searcher, parentFilter, childFilter)); Query query = new ToParentBlockJoinQuery(new ConstantScoreQuery(childFilter), new QueryBitSetProducer(parentFilter), ScoreMode.None); Sort sort = new Sort(new SortField("field2", nestedComparatorSource)); TopDocs topDocs = searcher.search(query, 5, sort); assertThat(topDocs.totalHits, equalTo(7)); assertThat(topDocs.scoreDocs.length, equalTo(5)); assertThat(topDocs.scoreDocs[0].doc, equalTo(11)); assertThat(((Number) ((FieldDoc) topDocs.scoreDocs[0]).fields[0]).intValue(), equalTo(2)); assertThat(topDocs.scoreDocs[1].doc, equalTo(3)); assertThat(((Number) ((FieldDoc) topDocs.scoreDocs[1]).fields[0]).intValue(), equalTo(3)); assertThat(topDocs.scoreDocs[2].doc, equalTo(7)); assertThat(((Number) ((FieldDoc) topDocs.scoreDocs[2]).fields[0]).intValue(), equalTo(3)); assertThat(topDocs.scoreDocs[3].doc, equalTo(15)); assertThat(((Number) ((FieldDoc) topDocs.scoreDocs[3]).fields[0]).intValue(), equalTo(3)); assertThat(topDocs.scoreDocs[4].doc, equalTo(19)); assertThat(((Number) ((FieldDoc) topDocs.scoreDocs[4]).fields[0]).intValue(), equalTo(4)); }
public void testVectorHighlighter() throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); Document document = new Document(); document.add(new TextField("_id", "1", Field.Store.YES)); FieldType vectorsType = new FieldType(TextField.TYPE_STORED); vectorsType.setStoreTermVectors(true); vectorsType.setStoreTermVectorPositions(true); vectorsType.setStoreTermVectorOffsets(true); document.add(new Field("content", "the big bad dog", vectorsType)); indexWriter.addDocument(document); IndexReader reader = DirectoryReader.open(indexWriter); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1); assertThat(topDocs.totalHits, equalTo(1)); FastVectorHighlighter highlighter = new FastVectorHighlighter(); String fragment = highlighter.getBestFragment(highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))), reader, topDocs.scoreDocs[0].doc, "content", 30); assertThat(fragment, notNullValue()); assertThat(fragment, equalTo("the big <b>bad</b> dog")); }
public void testVectorHighlighterNoStore() throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); Document document = new Document(); document.add(new TextField("_id", "1", Field.Store.YES)); FieldType vectorsType = new FieldType(TextField.TYPE_NOT_STORED); vectorsType.setStoreTermVectors(true); vectorsType.setStoreTermVectorPositions(true); vectorsType.setStoreTermVectorOffsets(true); document.add(new Field("content", "the big bad dog", vectorsType)); indexWriter.addDocument(document); IndexReader reader = DirectoryReader.open(indexWriter); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1); assertThat(topDocs.totalHits, equalTo(1)); FastVectorHighlighter highlighter = new FastVectorHighlighter(); String fragment = highlighter.getBestFragment(highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))), reader, topDocs.scoreDocs[0].doc, "content", 30); assertThat(fragment, nullValue()); }
@Override public Pager<SearchApp> search(String q, int page, int rows, Integer noAds, Integer official) throws Exception { Assert.isTrue(page > 0, "invalid page " + page); Assert.isTrue(rows > 0 && rows <= appConfig.getSearchMaxNum(), "invalid rows " + rows); if (q != null) { q = q.trim().toLowerCase(); } if (q == null || q.isEmpty() || q.length() > appConfig.getKeywordMaxLength()) { logger.error("empty keywords or over-length! {}", q); return emptyResultPager; } q = q.replaceAll(chSpacesRegex, space); Boolean bNoAds = null, bOfficial = null; if (noAds != null && noAds.intValue() == 1) { bNoAds = Boolean.TRUE; } if (official != null && official.intValue() == 1) { bOfficial = Boolean.TRUE; } TopDocs topDocs = getSearchTotalhits(q, bNoAds, bOfficial); Pager<SearchApp> pager = searchInTotalhitsByQuery(topDocs, page, rows); return pager; }
public void testSimpleNumericOps() throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); Document document = new Document(); document.add(new TextField("_id", "1", Field.Store.YES)); document.add(new LegacyIntField("test", 2, LegacyIntField.TYPE_STORED)); indexWriter.addDocument(document); IndexReader reader = DirectoryReader.open(indexWriter); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1); Document doc = searcher.doc(topDocs.scoreDocs[0].doc); IndexableField f = doc.getField("test"); assertThat(f.stringValue(), equalTo("2")); BytesRefBuilder bytes = new BytesRefBuilder(); LegacyNumericUtils.intToPrefixCoded(2, 0, bytes); topDocs = searcher.search(new TermQuery(new Term("test", bytes.get())), 1); doc = searcher.doc(topDocs.scoreDocs[0].doc); f = doc.getField("test"); assertThat(f.stringValue(), equalTo("2")); indexWriter.close(); }
public void testNoTokens() throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.KEYWORD_ANALYZER)); FieldType allFt = getAllFieldType(); Document doc = new Document(); doc.add(new Field("_id", "1", StoredField.TYPE)); doc.add(new AllField("_all", "", 2.0f, allFt)); indexWriter.addDocument(doc); IndexReader reader = DirectoryReader.open(indexWriter); IndexSearcher searcher = new IndexSearcher(reader); TopDocs docs = searcher.search(new MatchAllDocsQuery(), 10); assertThat(docs.totalHits, equalTo(1)); assertThat(docs.scoreDocs[0].doc, equalTo(0)); }
public void testFillTopDocs() { final int maxIters = randomIntBetween(5, 15); for (int iters = 0; iters < maxIters; iters++) { TopDocs[] topDocs = new TopDocs[randomIntBetween(2, 100)]; int numShards = topDocs.length; AtomicArray<QuerySearchResultProvider> resultProviderAtomicArray = generateQueryResults(numShards, Collections.emptyList(), 2, randomBoolean()); if (randomBoolean()) { int maxNull = randomIntBetween(1, topDocs.length - 1); for (int i = 0; i < maxNull; i++) { resultProviderAtomicArray.set(randomIntBetween(0, resultProviderAtomicArray.length() - 1), null); } } SearchPhaseController.fillTopDocs(topDocs, resultProviderAtomicArray.asList(), Lucene.EMPTY_TOP_DOCS); for (int i = 0; i < topDocs.length; i++) { assertNotNull(topDocs[i]); if (topDocs[i] == Lucene.EMPTY_TOP_DOCS) { assertNull(resultProviderAtomicArray.get(i)); } else { assertNotNull(resultProviderAtomicArray.get(i)); assertNotNull(resultProviderAtomicArray.get(i).queryResult()); assertSame(resultProviderAtomicArray.get(i).queryResult().topDocs(), topDocs[i]); } } } }
public static void main(String[] args) throws Exception { IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(INDEX_DIRECTORY))); IndexSearcher indexSearcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); QueryParser queryParser = new QueryParser(FIELD_CONTENTS, analyzer); String searchString = "shakespeare"; Query query = queryParser.parse(searchString); TopDocs results = indexSearcher.search(query, 5); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; System.out.println(numTotalHits + " total matching documents"); for(int i=0;i<hits.length;++i) { int docId = hits[i].doc; Document d = indexSearcher.doc(docId); System.out.println((i + 1) + ". " + d.get("path") + " score=" + hits[i].score); } }
private void synTokenQuery(String search, final int numbOfResults, final double minLuceneScore, Map<String, Float> result, IndexSearcher searcher) throws ParseException, IOException { QueryParser parser = new QueryParser(Version.LUCENE_46, "surfaceFormTokens", new StandardAnalyzer(Version.LUCENE_46)); search = QueryParser.escape(search); Query q = parser.parse(search); /* * Works only in String field!! */ // Query q = new FuzzyQuery(new Term("surfaceFormTokens", // QueryParser.escape(search)), 2); TopDocs top = searcher.search(q, numbOfResults); for (ScoreDoc doc : top.scoreDocs) { if (doc.score >= minLuceneScore) { final String key = searcher.doc(doc.doc).get("conceptID"); if (result.getOrDefault(key, 0f) < doc.score) { result.put(key, doc.score); } } } }
/** Modifies incoming TopDocs (in) by replacing the top hits with resorted's hits, and then resorting all hits. */ private TopDocs combine(TopDocs in, TopDocs resorted, QueryRescoreContext ctx) { System.arraycopy(resorted.scoreDocs, 0, in.scoreDocs, 0, resorted.scoreDocs.length); if (in.scoreDocs.length > resorted.scoreDocs.length) { // These hits were not rescored (beyond the rescore window), so we treat them the same as a hit that did get rescored but did // not match the 2nd pass query: for(int i=resorted.scoreDocs.length;i<in.scoreDocs.length;i++) { // TODO: shouldn't this be up to the ScoreMode? I.e., we should just invoke ScoreMode.combine, passing 0.0f for the // secondary score? in.scoreDocs[i].score *= ctx.queryWeight(); } // TODO: this is wrong, i.e. we are comparing apples and oranges at this point. It would be better if we always rescored all // incoming first pass hits, instead of allowing recoring of just the top subset: Arrays.sort(in.scoreDocs, SCORE_DOC_COMPARATOR); } return in; }
/** * Releases shard targets that are not used in the docsIdsToLoad. */ protected void releaseIrrelevantSearchContexts(AtomicArray<? extends QuerySearchResultProvider> queryResults, AtomicArray<IntArrayList> docIdsToLoad) { if (docIdsToLoad == null) { return; } // we only release search context that we did not fetch from if we are not scrolling if (request.scroll() == null) { for (AtomicArray.Entry<? extends QuerySearchResultProvider> entry : queryResults.asList()) { final TopDocs topDocs = entry.value.queryResult().queryResult().topDocs(); if (topDocs != null && topDocs.scoreDocs.length > 0 // the shard had matches && docIdsToLoad.get(entry.index) == null) { // but none of them made it to the global top docs try { DiscoveryNode node = nodes.get(entry.value.queryResult().shardTarget().nodeId()); sendReleaseSearchContext(entry.value.queryResult().id(), node); } catch (Throwable t1) { logger.trace("failed to release context", t1); } } } } }
public ArrayList<String> search(String searchQuery) throws IOException, ParseException { ArrayList<String> retList = new ArrayList<String>(); searcher = new Searcher(indexDir); long startTime = System.currentTimeMillis(); TopDocs hits = searcher.search(searchQuery); long endTime = System.currentTimeMillis(); retList.add(hits.totalHits + " documents found. Time :" + (endTime - startTime)); for (ScoreDoc scoreDoc : hits.scoreDocs) { Document doc = searcher.getDocument(scoreDoc); retList.add("FILE_PATH: " + doc.get(LuceneConstants.FILE_PATH)); retList.add("FILE_NAME: " + doc.get(LuceneConstants.FILE_NAME)); } searcher.close(); return retList; }
private void outputBatch(IndexReader indexReader, TopDocs topDocs, int batch, String outputBaseFormat, String pmidOutputFormat, String xmlOutputFormat) throws CorruptIndexException, IOException { String outputBasePath = String.format(outputBaseFormat, batch); OutputDirectory outputBaseDir = new OutputDirectory(outputBasePath); int start = batch * batchSize; int end = Math.min(start + batchSize, topDocs.totalHits); if (pmidOutputFormat != null) { try (PrintStream out = open(batch, outputBaseDir, pmidOutputFormat)) { for (int d = start; d < end; ++d) { outputBatchDocument(indexReader, topDocs, out, PubMedIndexField.PMID, d); } } } if (xmlOutputFormat != null) { try (PrintStream out = open(batch, outputBaseDir, xmlOutputFormat)) { out.println(XML_HEADER); for (int d = start; d < end; ++d) { outputBatchDocument(indexReader, topDocs, out, PubMedIndexField.XML, d); } out.println(XML_FOOTER); } } }
/** * Search sample. * * @param directory the index directory. * @throws IOException in case of I/O failure. * @throws ParseException in case of Query parse exception. */ public static void search(Directory directory) throws IOException, ParseException { IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(directory)); Query query = new QueryParser("title", new StandardAnalyzer()).parse("title:Solr"); TopDocs matches = searcher.search(query, 10); System.out.println("Search returned " + matches.totalHits + " matches."); Arrays.stream(matches.scoreDocs) .map(scoreDoc -> luceneDoc(scoreDoc, searcher)) .forEach(doc -> { System.out.println("-------------------------------------"); System.out.println("ID:\t" + doc.get("id")); System.out.println("TITLE:\t" + doc.get("title")); System.out.println("AUTHOR:\t" + doc.get("author")); System.out.println("SCORE:\t" + doc.get("score")); }); }
/** * Executes a query for all documents in the index. * * @throws Exception never, otherwise the test fails. */ @Test public void findAll() throws Exception { IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(directory)); Query query = new QueryParser("title", new StandardAnalyzer()).parse("Solr"); TopDocs matches = searcher.search(query, 10); assertEquals(3, matches.totalHits); Set<String> expected = new HashSet<String>(); expected.add("1"); expected.add("2"); expected.add("3"); Set<String> result = Arrays.stream(matches.scoreDocs) .map(scoreDoc -> luceneDoc(scoreDoc.doc, searcher)) .map(doc -> doc.get("id")) .collect(Collectors.toCollection(HashSet::new)); assertEquals(expected, result); }
/** * Search all books of a given author. * * @throws Exception never, otherwise the test fails. */ @Test public void findByAuthorSurname() throws Exception { IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(directory)); Query query = new QueryParser("author", new StandardAnalyzer()).parse("Gazzarini"); TopDocs matches = searcher.search(query, 10); assertEquals(1, matches.totalHits); final String id = Arrays.stream(matches.scoreDocs) .map(scoreDoc -> luceneDoc(scoreDoc.doc, searcher)) .map(doc -> doc.get("id")) .findFirst() .get(); assertEquals("1", id); }
/** * 对排好序的TopDocs,进行ScoreDoc去重操作 * @param srcTopDocs * @return * @throws IOException */ public static TopDocs mergeDuplicateDocId(TopDocs srcTopDocs) throws IOException { if(srcTopDocs == null) { return null; } final ScoreDoc[] scoreDocs = srcTopDocs.scoreDocs; int totalHits = srcTopDocs.totalHits; List<ScoreDoc> scoreDocList = new ArrayList<ScoreDoc>(scoreDocs.length); ScoreDoc preScoreDoc = null; int scoreDocSize = 0; for(int i = 0; i < scoreDocs.length; i++) { if(i > 0) { preScoreDoc = scoreDocList.get(scoreDocSize - 1); if(preScoreDoc.doc == scoreDocs[i].doc) { totalHits--; continue; } } scoreDocList.add(scoreDocs[i]); scoreDocSize++; } final ScoreDoc[] hits = new ScoreDoc[scoreDocSize]; scoreDocList.toArray(hits); return new TopDocs(totalHits, hits, srcTopDocs.getMaxScore()); }
@Override public ScoreDoc[] prefixSearch(String keywords) throws IOException { if (StringUtils.isEmpty(keywords) || keywords.length() > appConfig.getKeywordMaxLength()) { logger.error("empty keywords or over-length! {}", keywords); return null; } Sort sort = new Sort(new SortField("downloadRank", SortField.INT, true)); Term nameFldTerm = new Term(fieldName, keywords); PrefixQuery nameFldQuery = new PrefixQuery(nameFldTerm); NumericRangeQuery<Integer> catalogQuery = NumericRangeQuery.newIntRange("catalog", (int) EnumCatalog.SOFT.getCatalog(), (int) EnumCatalog.GAME.getCatalog(), true, true); BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.add(catalogQuery, Occur.MUST); booleanQuery.add(nameFldQuery, Occur.MUST); TopDocs topDocs = quickTipsSearcher.search(booleanQuery, appConfig.getQuickTipsNum() * 2, sort); ScoreDoc[] docs = topDocs.scoreDocs; return docs; }
@Override public Pager<MobileSearchApp> search(final String q, Keyword keywordModel, Integer page, Integer rows, Integer noAds, Integer official) throws Exception { if (StringUtils.isEmpty(q) || q.length() > appConfig.getKeywordMaxLength()) { logger.error("empty keywords or over-length! {}", q); return new Pager<MobileSearchApp>(); } Assert.isTrue(rows > 0 && rows <= 30, "invalid rows " + rows); Boolean bNoAds = null, bOfficial = null; if (noAds != null && noAds.intValue() == 1) { bNoAds = Boolean.TRUE; } if (official != null && official.intValue() == 1) { bOfficial = Boolean.TRUE; } TopDocs topDocs = getSearchTotalhits(q, keywordModel, null, bNoAds, bOfficial); Pager<MobileSearchApp> pager = searchInTotalhitsByQuery(q, page, rows, this.indexSearcher, topDocs); return pager; }
/** * 对排好序的TopDocs,进行ScoreDoc去重操作 * * @param srcTopDocs * @return * @throws IOException */ public static TopDocs mergeDuplicateDocId(TopDocs srcTopDocs) throws IOException { if (srcTopDocs == null) { return null; } final ScoreDoc[] scoreDocs = srcTopDocs.scoreDocs; int totalHits = srcTopDocs.totalHits; List<ScoreDoc> scoreDocList = new ArrayList<ScoreDoc>(); ScoreDoc preScoreDoc = null; int scoreDocSize = 0; for (int i = 0; i < scoreDocs.length; i++) { if (i > 0) { preScoreDoc = scoreDocList.get(scoreDocSize - 1); if (preScoreDoc.doc == scoreDocs[i].doc) { totalHits--; continue; } } scoreDocList.add(scoreDocs[i]); scoreDocSize++; } final ScoreDoc[] hits = new ScoreDoc[scoreDocSize]; scoreDocList.toArray(hits); return new TopDocs(totalHits, hits, srcTopDocs.getMaxScore()); }
/** * Executes the given {@link Query} and returns a {@link DocumentsSearchResult} with * the found documents and meta information about them. * * @param query the query to execute * @param options the additional options to execute the query. * @return {@link DocumentsSearchResult} with the found {@link Document}. */ public DocumentsSearchResult search(final Query query, final SearchOptions options) { final DocumentsSearchResult result = new DocumentsSearchResult(); final TopDocs topDocs = getTopDocs(query, options); if(topDocs != null) { result.setTotalHits(topDocs.totalHits); final Index index = IndexManager.getInstance().getIndex(); final IndexReader reader = index.getIndexReader(); try { LOGGER.debug("Found these documents (total = {}) for query '{}':", topDocs.totalHits, query); int counter = 0; for (final ScoreDoc scoreDoc : topDocs.scoreDocs) { final Document document = reader.document(scoreDoc.doc); if(document != null) { LOGGER.debug("- Score: " + StringUtils.rightPad(Float.toString(scoreDoc.score), 8) + " Doc: " + document.get(IIndexElement.FIELD_ID)); result.addDocument(document, scoreDoc.score); } // if it's the last document -> set ScoreDoc to result if(++counter == topDocs.scoreDocs.length) { result.setLastScoreDoc(scoreDoc); } } } catch(final IOException e) { LOGGER.error("Can't get documents for topdocs.", e); } } return result; }
public void testRedisDirectoryWithRemoteJedisPool() throws IOException { long start = System.currentTimeMillis(); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig .OpenMode.CREATE); JedisPool jedisPool = new JedisPool(new JedisPoolConfig(), "10.97.19.55", 6379, Constants.TIME_OUT); RedisDirectory redisDirectory = new RedisDirectory(new JedisPoolStream(jedisPool)); IndexWriter indexWriter = new IndexWriter(redisDirectory, indexWriterConfig); for (int i = 0; i < 5000000; i++) { indexWriter.addDocument(addDocument(i)); } indexWriter.commit(); indexWriter.close(); redisDirectory.close(); long end = System.currentTimeMillis(); log.error("RedisDirectoryWithJedisPool consumes {}s!", (end - start) / 1000); start = System.currentTimeMillis(); IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(new RedisDirectory(new JedisStream("localhost", 6379)))); int total = 0; for (int i = 0; i < 1000000; i++) { TermQuery key1 = new TermQuery(new Term("key1", "key" + i)); TopDocs search = indexSearcher.search(key1, 10); total += search.totalHits; } System.out.println(total); end = System.currentTimeMillis(); log.error("RedisDirectoryWithJedisPool search consumes {}ms!", (end - start)); }
public void testRedisDirectoryWithJedisPool() throws IOException { long start = System.currentTimeMillis(); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig .OpenMode.CREATE); //indexWriterConfig.setInfoStream(System.out); //indexWriterConfig.setRAMBufferSizeMB(2048); //LogByteSizeMergePolicy logByteSizeMergePolicy = new LogByteSizeMergePolicy(); //logByteSizeMergePolicy.setMinMergeMB(1); //logByteSizeMergePolicy.setMaxMergeMB(64); //logByteSizeMergePolicy.setMaxCFSSegmentSizeMB(64); //indexWriterConfig.setRAMBufferSizeMB(1024).setMergePolicy(logByteSizeMergePolicy).setUseCompoundFile(false); //GenericObjectPoolConfig genericObjectPoolConfig = new GenericObjectPoolConfig(); //获取连接等待时间 //genericObjectPoolConfig.setMaxWaitMillis(3000); //10s超时时间 JedisPool jedisPool = new JedisPool(new JedisPoolConfig(), "localhost", 6379, Constants.TIME_OUT); RedisDirectory redisDirectory = new RedisDirectory(new JedisPoolStream(jedisPool)); IndexWriter indexWriter = new IndexWriter(redisDirectory, indexWriterConfig); for (int i = 0; i < 10000000; i++) { indexWriter.addDocument(addDocument(i)); } indexWriter.commit(); indexWriter.close(); redisDirectory.close(); long end = System.currentTimeMillis(); log.error("RedisDirectoryWithJedisPool consumes {}s!", (end - start) / 1000); start = System.currentTimeMillis(); IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(new RedisDirectory(new JedisStream("localhost", 6379)))); int total = 0; for (int i = 0; i < 10000000; i++) { TermQuery key1 = new TermQuery(new Term("key1", "key" + i)); TopDocs search = indexSearcher.search(key1, 10); total += search.totalHits; } System.out.println(total); end = System.currentTimeMillis(); log.error("RedisDirectoryWithJedisPool search consumes {}ms!", (end - start)); }
public void testRedisDirectoryWithJedis() throws IOException { long start = System.currentTimeMillis(); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig .OpenMode.CREATE); //indexWriterConfig.setInfoStream(System.out); //indexWriterConfig.setRAMBufferSizeMB(2048); //LogByteSizeMergePolicy logByteSizeMergePolicy = new LogByteSizeMergePolicy(); //logByteSizeMergePolicy.setMinMergeMB(1); //logByteSizeMergePolicy.setMaxMergeMB(64); //logByteSizeMergePolicy.setMaxCFSSegmentSizeMB(64); //indexWriterConfig.setRAMBufferSizeMB(1024).setMergePolicy(logByteSizeMergePolicy).setUseCompoundFile(false); //GenericObjectPoolConfig genericObjectPoolConfig = new GenericObjectPoolConfig(); //获取连接等待时间 //genericObjectPoolConfig.setMaxWaitMillis(3000); //10s超时时间 RedisDirectory redisDirectory = new RedisDirectory(new JedisStream("localhost", 6379)); IndexWriter indexWriter = new IndexWriter(redisDirectory, indexWriterConfig); for (int i = 0; i < 10000000; i++) { indexWriter.addDocument(addDocument(i)); } indexWriter.commit(); indexWriter.close(); redisDirectory.close(); long end = System.currentTimeMillis(); log.error("RedisDirectoryWithJedis consumes {}s!", (end - start) / 1000); start = System.currentTimeMillis(); IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(new RedisDirectory(new JedisStream("localhost", 6379)))); int total = 0; for (int i = 0; i < 10000000; i++) { TermQuery key1 = new TermQuery(new Term("key1", "key" + i)); TopDocs search = indexSearcher.search(key1, 10); total += search.totalHits; } System.out.println(total); end = System.currentTimeMillis(); log.error("RedisDirectoryWithJedis search consumes {}ms!", (end - start)); }
public InternalTopHits(String name, int from, int size, TopDocs topDocs, SearchHits searchHits, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) { super(name, pipelineAggregators, metaData); this.from = from; this.size = size; this.topDocs = topDocs; this.searchHits = searchHits; }
public void getMatches(List<ScoreDoc> allDocs) { TopDocs topDocs = tdc.topDocs(); ScoreDoc[] sd = topDocs.scoreDocs; matchedDocs = sd.length; for (ScoreDoc scoreDoc : sd) { // A bit of a hack to (ab)use shardIndex property here to // hold a bucket ID but avoids allocating extra data structures // and users should have bigger concerns if bucket IDs // exceed int capacity.. scoreDoc.shardIndex = (int) parentBucket; } allDocs.addAll(Arrays.asList(sd)); }
@Override public TopDocs topDocs(SearchContext context, FetchSubPhase.HitContext hitContext) throws IOException { Query rawParentFilter; if (parentObjectMapper == null) { rawParentFilter = Queries.newNonNestedFilter(); } else { rawParentFilter = parentObjectMapper.nestedTypeFilter(); } BitSetProducer parentFilter = context.bitsetFilterCache().getBitSetProducer(rawParentFilter); Query childFilter = childObjectMapper.nestedTypeFilter(); Query q = Queries.filtered(query(), new NestedChildrenQuery(parentFilter, childFilter, hitContext)); if (size() == 0) { return new TopDocs(context.searcher().count(q), Lucene.EMPTY_SCORE_DOCS, 0); } else { int topN = Math.min(from() + size(), context.searcher().getIndexReader().maxDoc()); TopDocsCollector topDocsCollector; if (sort() != null) { try { topDocsCollector = TopFieldCollector.create(sort().sort, topN, true, trackScores(), trackScores()); } catch (IOException e) { throw ExceptionsHelper.convertToElastic(e); } } else { topDocsCollector = TopScoreDocCollector.create(topN); } try { context.searcher().search(q, topDocsCollector); } finally { clearReleasables(Lifetime.COLLECTION); } return topDocsCollector.topDocs(from(), size()); } }