/** * ���� * @param analyzer * @param searcher * @throws IOException * @throws InvalidTokenOffsetsException */ public void searToHighlighterCss(Analyzer analyzer,IndexSearcher searcher) throws IOException, InvalidTokenOffsetsException{ Term term =new Term("Content", new String("免费".getBytes(),"GBK"));//��ѯ��������˼����Ҫ�����Ա�Ϊ��������� TermQuery query =new TermQuery(term); TopDocs docs =searcher.search(query, 10);//���� /**�Զ����ע�����ı���ǩ*/ SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"hightlighterCss\">","</span>"); /**����QueryScorer*/ QueryScorer scorer=new QueryScorer(query); /**����Fragmenter*/ Fragmenter fragmenter = new SimpleSpanFragmenter(scorer); Highlighter highlight=new Highlighter(formatter,scorer); highlight.setTextFragmenter(fragmenter); for(ScoreDoc doc:docs.scoreDocs){//��ȡ���ҵ��ĵ���������� Document document =searcher.doc(doc.doc); String value = document.getField("Content").toString(); TokenStream tokenStream = analyzer.tokenStream("Content", new StringReader(value)); String str1 = highlight.getBestFragment(tokenStream, value); System.out.println(str1); } }
private List<LumongoHighlighter> getHighlighterList(List<HighlightRequest> highlightRequests, Query q) { if (highlightRequests.isEmpty()) { return Collections.emptyList(); } List<LumongoHighlighter> highlighterList = new ArrayList<>(); for (HighlightRequest highlight : highlightRequests) { QueryScorer queryScorer = new QueryScorer(q, highlight.getField()); queryScorer.setExpandMultiTermQuery(true); Fragmenter fragmenter = new SimpleSpanFragmenter(queryScorer, highlight.getFragmentLength()); SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(highlight.getPreTag(), highlight.getPostTag()); LumongoHighlighter highlighter = new LumongoHighlighter(simpleHTMLFormatter, queryScorer, highlight); highlighter.setTextFragmenter(fragmenter); highlighterList.add(highlighter); } return highlighterList; }
public void testHits() throws Exception { IndexSearcher searcher = new IndexSearcher(TestUtil.getBookIndexDirectory()); TermQuery query = new TermQuery(new Term("title", "action")); TopDocs hits = searcher.search(query, 10); QueryScorer scorer = new QueryScorer(query, "title"); Highlighter highlighter = new Highlighter(scorer); highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer)); Analyzer analyzer = new SimpleAnalyzer(); for (ScoreDoc sd : hits.scoreDocs) { StoredDocument doc = searcher.doc(sd.doc); String title = doc.get("title"); TokenStream stream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), sd.doc, "title", doc, analyzer); String fragment = highlighter.getBestFragment(stream, title); LOGGER.info(fragment); } }
public static void search(String indexDir, String q) throws Exception { Directory dir = FSDirectory.open(Paths.get(indexDir)); IndexReader reader = DirectoryReader.open(dir); IndexSearcher is = new IndexSearcher(reader); // Analyzer analyzer=new StandardAnalyzer(); // 标准分词器 SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer(); QueryParser parser = new QueryParser("desc", analyzer); Query query = parser.parse(q); long start = System.currentTimeMillis(); TopDocs hits = is.search(query, 10); long end = System.currentTimeMillis(); System.out.println("匹配 " + q + " ,总共花费" + (end - start) + "毫秒" + "查询到" + hits.totalHits + "个记录"); QueryScorer scorer = new QueryScorer(query); Fragmenter fragmenter = new SimpleSpanFragmenter(scorer); SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<b><font color='red'>", "</font></b>"); Highlighter highlighter = new Highlighter(simpleHTMLFormatter, scorer); highlighter.setTextFragmenter(fragmenter); for (ScoreDoc scoreDoc : hits.scoreDocs) { Document doc = is.doc(scoreDoc.doc); System.out.println(doc.get("city")); System.out.println(doc.get("desc")); String desc = doc.get("desc"); if (desc != null) { TokenStream tokenStream = analyzer.tokenStream("desc", new StringReader(desc)); System.out.println(highlighter.getBestFragment(tokenStream, desc)); } } reader.close(); }
HighlightingHelper(Query query, Analyzer analyzer) { this.analyzer = analyzer; Formatter formatter = new SimpleHTMLFormatter(); Encoder encoder = new MinimalHTMLEncoder(); scorer = new QueryScorer(query); highlighter = new Highlighter(formatter, encoder, scorer); fragmentLength = DEFAULT_FRAGMENT_LENGTH; Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, fragmentLength); highlighter.setTextFragmenter(fragmenter); }
void setFragmentLength(int length) { if (length < 1) { throw new AssertionError("length must be at least 1"); } // Create a new fragmenter if the length is different. if (fragmentLength != length) { Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, length); highlighter.setTextFragmenter(fragmenter); fragmentLength = length; } }
public String getResult(String fieldName, String fieldValue) throws Exception{ BuguIndex index = BuguIndex.getInstance(); QueryParser parser = new QueryParser(index.getVersion(), fieldName, index.getAnalyzer()); Query query = parser.parse(keywords); TokenStream tokens = index.getAnalyzer().tokenStream(fieldName, new StringReader(fieldValue)); QueryScorer scorer = new QueryScorer(query, fieldName); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer)); return highlighter.getBestFragments(tokens, fieldValue, maxFragments, "..."); }
public void testHighlighting() throws Exception { String text = "The quick brown fox jumps over the lazy dog"; TermQuery query = new TermQuery(new Term("field", "fox")); TokenStream tokenStream = new SimpleAnalyzer().tokenStream("field", new StringReader(text)); QueryScorer scorer = new QueryScorer(query, "field"); Fragmenter fragmenter = new SimpleSpanFragmenter(scorer); Highlighter highlighter = new Highlighter(scorer); highlighter.setTextFragmenter(fragmenter); assertEquals("The quick brown <B>fox</B> jumps over the lazy dog", highlighter.getBestFragment(tokenStream, text)); }
private String searchData(String key) throws IOException, ParseException, InvalidTokenOffsetsException { Directory directory = FSDirectory.open(new File(filePath)); IndexSearcher indexSearcher = new IndexSearcher(directory); QueryParser queryParser = new QueryParser(Version.LUCENE_31, "foods", new SmartChineseAnalyzer(Version.LUCENE_31, true)); //queryParser.setDefaultOperator(Operator.AND); Query query = queryParser.parse(key); TopDocs docs = indexSearcher.search(query, 10); QueryScorer queryScorer = new QueryScorer(query, "foods"); Highlighter highlighter = new Highlighter(queryScorer); highlighter.setTextFragmenter(new SimpleSpanFragmenter(queryScorer)); List<SearchResult> searchResults = new ArrayList<SearchResult>(); if (docs != null) { for (ScoreDoc scoreDoc : docs.scoreDocs) { Document doc = indexSearcher.doc(scoreDoc.doc); TokenStream tokenStream = TokenSources.getAnyTokenStream( indexSearcher.getIndexReader(), scoreDoc.doc, "foods", doc, new SmartChineseAnalyzer(Version.LUCENE_31, true)); SearchResult searchResult = new SearchResult(); searchResult.setRestaurantId(Long.valueOf(doc.get("id"))); searchResult.setRestaurantName(doc.get("restaurant_name")); searchResult.setKey(key); searchResult.setFoods(Arrays.asList(highlighter. getBestFragment(tokenStream, doc.get("foods")).split(" "))); searchResults.add(searchResult); } } else { searchResults = null; } indexSearcher.close(); directory.close(); return new Gson().toJson(searchResults); }
public static void main(String[] args) throws Exception { if (args.length != 1) { System.err.println("Usage: HighlightIt <filename-out>"); System.exit(-1); } String filename = args[0]; String searchText = "term"; // QueryParser parser = new QueryParser(Version.LUCENE_41, // "f", // new StandardAnalyzer(Version.LUCENE_41));// #1 Query query = parser.parse(searchText); // SimpleHTMLFormatter formatter = // new SimpleHTMLFormatter("<span class=\"highlight\">", // "</span>"); // TokenStream tokens = new StandardAnalyzer(Version.LUCENE_41) // .tokenStream("f", new StringReader(text)); // QueryScorer scorer = new QueryScorer(query, "f"); // Highlighter highlighter = new Highlighter(formatter, scorer); // highlighter.setTextFragmenter( // new SimpleSpanFragmenter(scorer)); // String result = // highlighter.getBestFragments(tokens, text, 3, "..."); // FileWriter writer = new FileWriter(filename); // writer.write("<html>"); // writer.write("<style>\n" + // ".highlight {\n" + // " background: yellow;\n" + // "}\n" + // "</style>"); // writer.write("<body>"); // writer.write(result); // writer.write("</body></html>"); // writer.close(); // }
/** * * * @param query * @param analyzer * @param fieldName * @param fulltext * @param startDelimiter * @param stopDelimiter * @return * * @throws IOException * @throws InvalidTokenOffsetsException */ public static String getHighlightedField(Query query, Analyzer analyzer, String fieldName, String fulltext, final String startDelimiter, final String stopDelimiter) throws IOException, InvalidTokenOffsetsException { Formatter formatter = new SimpleHTMLFormatter(startDelimiter, stopDelimiter); QueryScorer queryScorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(formatter, queryScorer); highlighter.setTextFragmenter(new SimpleSpanFragmenter(queryScorer, Integer.MAX_VALUE)); highlighter.setMaxDocCharsToAnalyze(Integer.MAX_VALUE); return highlighter.getBestFragment(analyzer, fieldName, fulltext); }