public void testVectorHighlighter() throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); Document document = new Document(); document.add(new TextField("_id", "1", Field.Store.YES)); FieldType vectorsType = new FieldType(TextField.TYPE_STORED); vectorsType.setStoreTermVectors(true); vectorsType.setStoreTermVectorPositions(true); vectorsType.setStoreTermVectorOffsets(true); document.add(new Field("content", "the big bad dog", vectorsType)); indexWriter.addDocument(document); IndexReader reader = DirectoryReader.open(indexWriter); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1); assertThat(topDocs.totalHits, equalTo(1)); FastVectorHighlighter highlighter = new FastVectorHighlighter(); String fragment = highlighter.getBestFragment(highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))), reader, topDocs.scoreDocs[0].doc, "content", 30); assertThat(fragment, notNullValue()); assertThat(fragment, equalTo("the big <b>bad</b> dog")); }
public void testVectorHighlighterNoStore() throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); Document document = new Document(); document.add(new TextField("_id", "1", Field.Store.YES)); FieldType vectorsType = new FieldType(TextField.TYPE_NOT_STORED); vectorsType.setStoreTermVectors(true); vectorsType.setStoreTermVectorPositions(true); vectorsType.setStoreTermVectorOffsets(true); document.add(new Field("content", "the big bad dog", vectorsType)); indexWriter.addDocument(document); IndexReader reader = DirectoryReader.open(indexWriter); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1); assertThat(topDocs.totalHits, equalTo(1)); FastVectorHighlighter highlighter = new FastVectorHighlighter(); String fragment = highlighter.getBestFragment(highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))), reader, topDocs.scoreDocs[0].doc, "content", 30); assertThat(fragment, nullValue()); }
public void testVectorHighlighterNoTermVector() throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); Document document = new Document(); document.add(new TextField("_id", "1", Field.Store.YES)); document.add(new TextField("content", "the big bad dog", Field.Store.YES)); indexWriter.addDocument(document); IndexReader reader = DirectoryReader.open(indexWriter); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1); assertThat(topDocs.totalHits, equalTo(1)); FastVectorHighlighter highlighter = new FastVectorHighlighter(); String fragment = highlighter.getBestFragment(highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))), reader, topDocs.scoreDocs[0].doc, "content", 30); assertThat(fragment, nullValue()); }
static void searchIndex(String filename) throws Exception { QueryParser parser = new QueryParser(Version.LUCENE_41, F, analyzer); Query query = parser.parse(QUERY); FastVectorHighlighter highlighter = getHighlighter(); // #C FieldQuery fieldQuery = highlighter.getFieldQuery(query); // #D IndexSearcher searcher = new IndexSearcher(dir); TopDocs docs = searcher.search(query, 10); FileWriter writer = new FileWriter(filename); writer.write("<html>"); writer.write("<body>"); writer.write("<p>QUERY : " + QUERY + "</p>"); for (ScoreDoc scoreDoc : docs.scoreDocs) { String snippet = highlighter.getBestFragment( // #E fieldQuery, searcher.getIndexReader(), // #E scoreDoc.doc, F, 100); // #E if (snippet != null) { writer.write(scoreDoc.doc + " : " + snippet + "<br/>"); } } writer.write("</body></html>"); writer.close(); }
@Override protected BenchmarkHighlighter getBenchmarkHighlighter(Query q){ highlighter = new FastVectorHighlighter( false, false ); final Query myq = q; return new BenchmarkHighlighter(){ @Override public int doHighlight(IndexReader reader, int doc, String field, Document document, Analyzer analyzer, String text) throws Exception { final FieldQuery fq = highlighter.getFieldQuery( myq, reader); String[] fragments = highlighter.getBestFragments(fq, reader, doc, field, fragSize, maxFrags); return fragments != null ? fragments.length : 0; } }; }
private void doHighlightingByFastVectorHighlighter(FastVectorHighlighter highlighter, FieldQuery fieldQuery, SolrQueryRequest req, NamedList docSummaries, int docId, Document doc, String fieldName) throws IOException { SolrParams params = req.getParams(); SolrFragmentsBuilder solrFb = getSolrFragmentsBuilder(fieldName, params); String[] snippets = highlighter.getBestFragments(fieldQuery, req.getSearcher().getIndexReader(), docId, fieldName, params.getFieldInt(fieldName, HighlightParams.FRAGSIZE, 100), params.getFieldInt(fieldName, HighlightParams.SNIPPETS, 1), getFragListBuilder(fieldName, params), getFragmentsBuilder(fieldName, params), solrFb.getPreTags(params, fieldName), solrFb.getPostTags(params, fieldName), getEncoder(fieldName, params)); if (snippets != null && snippets.length > 0) docSummaries.add(fieldName, snippets); else alternateField(docSummaries, params, doc, fieldName, fieldName, req); }
static FastVectorHighlighter getHighlighter() { FragListBuilder fragListBuilder = new SimpleFragListBuilder(); // #F FragmentsBuilder fragmentBuilder = // #F new ScoreOrderFragmentsBuilder( // #F BaseFragmentsBuilder.COLORED_PRE_TAGS, // #F BaseFragmentsBuilder.COLORED_POST_TAGS); // #F return new FastVectorHighlighter(true, true, // #F fragListBuilder, fragmentBuilder); // #F }
/** * Generates a list of Highlighted query fragments for each item in a list of documents, or returns null if * highlighting is disabled. * * @param docs * query results * @param query * the query * @param req * the current request * @param defaultFields * default list of fields to summarize * @return NamedList containing a NamedList for each document, which in turns contains sets (field, summary) pairs. */ @Override @SuppressWarnings("unchecked") public NamedList<Object> doHighlighting(DocList docs, Query query, SolrQueryRequest req, String[] defaultFields) throws IOException { SolrParams params = req.getParams(); if (!isHighlightingEnabled(params)) return null; SolrIndexSearcher searcher = req.getSearcher(); IndexSchema schema = searcher.getSchema(); NamedList fragments = new SimpleOrderedMap(); String[] fieldNames = getHighlightFields(query, req, defaultFields); Set<String> fset = new HashSet<>(); { // pre-fetch documents using the Searcher's doc cache for (String f : fieldNames) { fset.add(f); } // fetch unique key if one exists. SchemaField keyField = schema.getUniqueKeyField(); if (null != keyField) fset.add(keyField.getName()); } // get FastVectorHighlighter instance out of the processing loop FastVectorHighlighter fvh = new FastVectorHighlighter( // FVH cannot process hl.usePhraseHighlighter parameter per-field basis params.getBool(HighlightParams.USE_PHRASE_HIGHLIGHTER, true), // FVH cannot process hl.requireFieldMatch parameter per-field basis params.getBool(HighlightParams.FIELD_MATCH, false)); fvh.setPhraseLimit(params.getInt(HighlightParams.PHRASE_LIMIT, SolrHighlighter.DEFAULT_PHRASE_LIMIT)); FieldQuery fieldQuery = fvh.getFieldQuery(query, searcher.getIndexReader()); // Highlight each document DocIterator iterator = docs.iterator(); for (int i = 0; i < docs.size(); i++) { int docId = iterator.nextDoc(); Document doc = getDocument(searcher.doc(docId, fset), req); NamedList docSummaries = new SimpleOrderedMap(); for (String fieldName : fieldNames) { fieldName = fieldName.trim(); if (useFastVectorHighlighter(params, schema, fieldName)) doHighlightingByFastVectorHighlighter(fvh, fieldQuery, req, docSummaries, docId, doc, fieldName); else doHighlightingByHighlighter(query, req, docSummaries, docId, doc, fieldName); } String printId = schema.printableUniqueKey(doc); fragments.add(printId == null ? null : printId, docSummaries); } return fragments; }