public static String getHighlightString (String text, String keyword) throws IOException { TermQuery query = new TermQuery(new Term("f", keyword)); QueryScorer scorer = new QueryScorer(query); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">","</span>"); Highlighter highlighter = new Highlighter(formatter, scorer); Fragmenter fragmenter = new SimpleFragmenter(50); highlighter.setTextFragmenter(fragmenter); TokenStream tokenStream = new StandardAnalyzer(Version.LUCENE_20).tokenStream("f", new StringReader(text)); //String result = highlighter.getBestFragments(tokenStream, text, 30, "..."); StringBuilder writer = new StringBuilder(""); writer.append("<html>"); writer.append("<style>\n" + ".highlight {\n" + " background: yellow;\n" + "}\n" + "</style>"); writer.append("<body>"); writer.append(""); writer.append("</body></html>"); return ( writer.toString() ); }
/** * ���� * @param analyzer * @param searcher * @throws IOException * @throws InvalidTokenOffsetsException */ public void searToHighlighterCss(Analyzer analyzer,IndexSearcher searcher) throws IOException, InvalidTokenOffsetsException{ Term term =new Term("Content", new String("免费".getBytes(),"GBK"));//��ѯ��������˼����Ҫ�����Ա�Ϊ��������� TermQuery query =new TermQuery(term); TopDocs docs =searcher.search(query, 10);//���� /**�Զ����ע�����ı���ǩ*/ SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"hightlighterCss\">","</span>"); /**����QueryScorer*/ QueryScorer scorer=new QueryScorer(query); /**����Fragmenter*/ Fragmenter fragmenter = new SimpleSpanFragmenter(scorer); Highlighter highlight=new Highlighter(formatter,scorer); highlight.setTextFragmenter(fragmenter); for(ScoreDoc doc:docs.scoreDocs){//��ȡ���ҵ��ĵ���������� Document document =searcher.doc(doc.doc); String value = document.getField("Content").toString(); TokenStream tokenStream = analyzer.tokenStream("Content", new StringReader(value)); String str1 = highlight.getBestFragment(tokenStream, value); System.out.println(str1); } }
private List<LumongoHighlighter> getHighlighterList(List<HighlightRequest> highlightRequests, Query q) { if (highlightRequests.isEmpty()) { return Collections.emptyList(); } List<LumongoHighlighter> highlighterList = new ArrayList<>(); for (HighlightRequest highlight : highlightRequests) { QueryScorer queryScorer = new QueryScorer(q, highlight.getField()); queryScorer.setExpandMultiTermQuery(true); Fragmenter fragmenter = new SimpleSpanFragmenter(queryScorer, highlight.getFragmentLength()); SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(highlight.getPreTag(), highlight.getPostTag()); LumongoHighlighter highlighter = new LumongoHighlighter(simpleHTMLFormatter, queryScorer, highlight); highlighter.setTextFragmenter(fragmenter); highlighterList.add(highlighter); } return highlighterList; }
public void testHits() throws Exception { IndexSearcher searcher = new IndexSearcher(TestUtil.getBookIndexDirectory()); TermQuery query = new TermQuery(new Term("title", "action")); TopDocs hits = searcher.search(query, 10); QueryScorer scorer = new QueryScorer(query, "title"); Highlighter highlighter = new Highlighter(scorer); highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer)); Analyzer analyzer = new SimpleAnalyzer(); for (ScoreDoc sd : hits.scoreDocs) { StoredDocument doc = searcher.doc(sd.doc); String title = doc.get("title"); TokenStream stream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), sd.doc, "title", doc, analyzer); String fragment = highlighter.getBestFragment(stream, title); LOGGER.info(fragment); } }
public void testHighlightPhrase() throws Exception { Query query = new PhraseQuery.Builder() .add(new Term("field", "foo")) .add(new Term("field", "bar")) .build(); QueryScorer queryScorer = new CustomQueryScorer(query); org.apache.lucene.search.highlight.Highlighter highlighter = new org.apache.lucene.search.highlight.Highlighter(queryScorer); String[] frags = highlighter.getBestFragments(new MockAnalyzer(random()), "field", "bar foo bar foo", 10); assertArrayEquals(new String[] {"bar <B>foo</B> <B>bar</B> foo"}, frags); }
static String displayHtmlHighlight(Query query, Analyzer analyzer, String fieldName, String fieldContent, int fragmentSize) throws IOException, InvalidTokenOffsetsException { Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<font color='red'>", "</font>"), new QueryScorer(query)); Fragmenter fragmenter = new SimpleFragmenter(fragmentSize); highlighter.setTextFragmenter(fragmenter); return highlighter.getBestFragment(analyzer, fieldName, fieldContent); }
public static void search(String indexDir, String q) throws Exception { Directory dir = FSDirectory.open(Paths.get(indexDir)); IndexReader reader = DirectoryReader.open(dir); IndexSearcher is = new IndexSearcher(reader); // Analyzer analyzer=new StandardAnalyzer(); // 标准分词器 SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer(); QueryParser parser = new QueryParser("desc", analyzer); Query query = parser.parse(q); long start = System.currentTimeMillis(); TopDocs hits = is.search(query, 10); long end = System.currentTimeMillis(); System.out.println("匹配 " + q + " ,总共花费" + (end - start) + "毫秒" + "查询到" + hits.totalHits + "个记录"); QueryScorer scorer = new QueryScorer(query); Fragmenter fragmenter = new SimpleSpanFragmenter(scorer); SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<b><font color='red'>", "</font></b>"); Highlighter highlighter = new Highlighter(simpleHTMLFormatter, scorer); highlighter.setTextFragmenter(fragmenter); for (ScoreDoc scoreDoc : hits.scoreDocs) { Document doc = is.doc(scoreDoc.doc); System.out.println(doc.get("city")); System.out.println(doc.get("desc")); String desc = doc.get("desc"); if (desc != null) { TokenStream tokenStream = analyzer.tokenStream("desc", new StringReader(desc)); System.out.println(highlighter.getBestFragment(tokenStream, desc)); } } reader.close(); }
@Override @SuppressWarnings("unchecked") public List<Post> search(Paging paging, String q) throws Exception { FullTextSession fullTextSession = Search.getFullTextSession(super.session()); SearchFactory sf = fullTextSession.getSearchFactory(); QueryBuilder qb = sf.buildQueryBuilder().forEntity(PostPO.class).get(); org.apache.lucene.search.Query luceneQuery = qb.keyword().onFields("title","summary","tags").matching(q).createQuery(); FullTextQuery query = fullTextSession.createFullTextQuery(luceneQuery); query.setFirstResult(paging.getFirstResult()); query.setMaxResults(paging.getMaxResults()); StandardAnalyzer standardAnalyzer = new StandardAnalyzer(); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span style='color:red;'>", "</span>"); QueryScorer queryScorer = new QueryScorer(luceneQuery); Highlighter highlighter = new Highlighter(formatter, queryScorer); List<PostPO> list = query.list(); List<Post> rets = new ArrayList<>(list.size()); for (PostPO po : list) { Post m = BeanMapUtils.copy(po, 0); // 处理高亮 String title = highlighter.getBestFragment(standardAnalyzer, "title", m.getTitle()); String summary = highlighter.getBestFragment(standardAnalyzer, "summary", m.getSummary()); if (StringUtils.isNotEmpty(title)) { m.setTitle(title); } if (StringUtils.isNotEmpty(summary)) { m.setSummary(summary); } rets.add(m); } paging.setTotalCount(query.getResultSize()); return rets; }
HighlightingHelper(Query query, Analyzer analyzer) { this.analyzer = analyzer; Formatter formatter = new SimpleHTMLFormatter(); Encoder encoder = new MinimalHTMLEncoder(); scorer = new QueryScorer(query); highlighter = new Highlighter(formatter, encoder, scorer); fragmentLength = DEFAULT_FRAGMENT_LENGTH; Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, fragmentLength); highlighter.setTextFragmenter(fragmenter); }
@Override protected BenchmarkHighlighter getBenchmarkHighlighter(Query q){ highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(q)); highlighter.setMaxDocCharsToAnalyze(maxDocCharsToAnalyze); return new BenchmarkHighlighter(){ @Override public int doHighlight(IndexReader reader, int doc, String field, Document document, Analyzer analyzer, String text) throws Exception { TokenStream ts = TokenSources.getAnyTokenStream(reader, doc, field, document, analyzer); TextFragment[] frag = highlighter.getBestTextFragments(ts, text, mergeContiguous, maxFrags); return frag != null ? frag.length : 0; } }; }
@Override public BenchmarkHighlighter getBenchmarkHighlighter(Query q) { highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(q)); return new BenchmarkHighlighter() { @Override public int doHighlight(IndexReader reader, int doc, String field, Document document, Analyzer analyzer, String text) throws Exception { TokenStream ts = TokenSources.getAnyTokenStream(reader, doc, field, document, analyzer); TextFragment[] frag = highlighter.getBestTextFragments(ts, text, mergeContiguous, maxFrags); numHighlightedResults += frag != null ? frag.length : 0; return frag != null ? frag.length : 0; } }; }
public static Object createHighlighter(Query query,String highlightBegin,String highlightEnd) { return new Highlighter( //new SimpleHTMLFormatter("<span class=\"matching-term\">","</span>"), new SimpleHTMLFormatter(highlightBegin,highlightEnd), new QueryScorer(query)); }
/** * Searches the current corpus using the search terms in the search field. */ private void searchCorpus() { if (search.getText().trim().equals("")) return; try { indexSearcher = guess.getSelected() != null ? getIndex(getDiffCorpus(gold.getSelected(), guess.getSelected())) : getIndex(gold.getSelected()); //System.out.println("Searching..."); QueryParser parser = new QueryParser("Word", analyzer); Query query = parser.parse(search.getText()); Hits hits = indexSearcher.search(query); Highlighter highlighter = new Highlighter(new QueryScorer(query)); DefaultListModel model = new DefaultListModel(); for (int i = 0; i < hits.length(); i++) { Document hitDoc = hits.doc(i); int nr = Integer.parseInt(hitDoc.get("<nr>")); //System.out.println(hitDoc.get("<nr>")); String best = null; for (Object field : hitDoc.getFields()) { Field f = (Field) field; best = highlighter.getBestFragment(analyzer, f.name(), hitDoc.get(f.name())); if (best != null) break; } if (best != null) model.addElement(new Result(nr, "<html>" + nr + ":" + best + "</html>")); //System.out.println(highlighter.getBestFragment(analyzer, "Word", hitDoc.get("Word"))); //assertEquals("This is the text to be indexed.", hitDoc.get("fieldname")); } results.setModel(model); repaint(); } catch (Exception ex) { ex.printStackTrace(); } }
protected Highlighter createHighlighter(org.apache.lucene.search.Query luceneQuery) { SimpleHTMLFormatter format = new SimpleHTMLFormatter("<b><font color='red'>", "</font></b>"); Highlighter highlighter = new Highlighter(format, new QueryScorer(luceneQuery));// 高亮 // highlighter.setTextFragmenter(new // SimpleFragmenter(Integer.MAX_VALUE)); highlighter.setTextFragmenter(new SimpleFragmenter(200)); return highlighter; }
/** * Highlight (bold,color) query words in result-document. Set HighlightResult for content or description. * * @param query * @param analyzer * @param doc * @param resultDocument * @throws IOException */ private void doHighlight(final Query query, final Analyzer analyzer, final Document doc, final ResultDocument resultDocument) throws IOException { final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(HIGHLIGHT_PRE_TAG, HIGHLIGHT_POST_TAG), new QueryScorer(query)); // Get 3 best fragments of content and seperate with a "..." try { // highlight content final String content = doc.get(AbstractOlatDocument.CONTENT_FIELD_NAME); TokenStream tokenStream = analyzer.tokenStream(AbstractOlatDocument.CONTENT_FIELD_NAME, new StringReader(content)); String highlightResult = highlighter.getBestFragments(tokenStream, content, 3, HIGHLIGHT_SEPARATOR); // if no highlightResult is in content => look in description if (highlightResult.length() == 0) { final String description = doc.get(AbstractOlatDocument.DESCRIPTION_FIELD_NAME); tokenStream = analyzer.tokenStream(AbstractOlatDocument.DESCRIPTION_FIELD_NAME, new StringReader(description)); highlightResult = highlighter.getBestFragments(tokenStream, description, 3, HIGHLIGHT_SEPARATOR); resultDocument.setHighlightingDescription(true); } resultDocument.setHighlightResult(highlightResult); // highlight title final String title = doc.get(AbstractOlatDocument.TITLE_FIELD_NAME); tokenStream = analyzer.tokenStream(AbstractOlatDocument.TITLE_FIELD_NAME, new StringReader(title)); final String highlightTitle = highlighter.getBestFragments(tokenStream, title, 3, " "); resultDocument.setHighlightTitle(highlightTitle); } catch (final InvalidTokenOffsetsException e) { log.warn("", e); } }
public String getResult(String fieldName, String fieldValue) throws Exception{ BuguIndex index = BuguIndex.getInstance(); QueryParser parser = new QueryParser(index.getVersion(), fieldName, index.getAnalyzer()); Query query = parser.parse(keywords); TokenStream tokens = index.getAnalyzer().tokenStream(fieldName, new StringReader(fieldValue)); QueryScorer scorer = new QueryScorer(query, fieldName); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer)); return highlighter.getBestFragments(tokens, fieldValue, maxFragments, "..."); }
public void testHighlighting() throws Exception { String text = "The quick brown fox jumps over the lazy dog"; TermQuery query = new TermQuery(new Term("field", "fox")); TokenStream tokenStream = new SimpleAnalyzer().tokenStream("field", new StringReader(text)); QueryScorer scorer = new QueryScorer(query, "field"); Fragmenter fragmenter = new SimpleSpanFragmenter(scorer); Highlighter highlighter = new Highlighter(scorer); highlighter.setTextFragmenter(fragmenter); assertEquals("The quick brown <B>fox</B> jumps over the lazy dog", highlighter.getBestFragment(tokenStream, text)); }
private String searchData(String key) throws IOException, ParseException, InvalidTokenOffsetsException { Directory directory = FSDirectory.open(new File(filePath)); IndexSearcher indexSearcher = new IndexSearcher(directory); QueryParser queryParser = new QueryParser(Version.LUCENE_31, "foods", new SmartChineseAnalyzer(Version.LUCENE_31, true)); //queryParser.setDefaultOperator(Operator.AND); Query query = queryParser.parse(key); TopDocs docs = indexSearcher.search(query, 10); QueryScorer queryScorer = new QueryScorer(query, "foods"); Highlighter highlighter = new Highlighter(queryScorer); highlighter.setTextFragmenter(new SimpleSpanFragmenter(queryScorer)); List<SearchResult> searchResults = new ArrayList<SearchResult>(); if (docs != null) { for (ScoreDoc scoreDoc : docs.scoreDocs) { Document doc = indexSearcher.doc(scoreDoc.doc); TokenStream tokenStream = TokenSources.getAnyTokenStream( indexSearcher.getIndexReader(), scoreDoc.doc, "foods", doc, new SmartChineseAnalyzer(Version.LUCENE_31, true)); SearchResult searchResult = new SearchResult(); searchResult.setRestaurantId(Long.valueOf(doc.get("id"))); searchResult.setRestaurantName(doc.get("restaurant_name")); searchResult.setKey(key); searchResult.setFoods(Arrays.asList(highlighter. getBestFragment(tokenStream, doc.get("foods")).split(" "))); searchResults.add(searchResult); } } else { searchResults = null; } indexSearcher.close(); directory.close(); return new Gson().toJson(searchResults); }
/** * NOTE: This method will not preserve the correct field types. * * @param preTag * @param postTag */ public static Document highlight(int docId, Document document, Query query, FieldManager fieldManager, IndexReader reader, String preTag, String postTag) throws IOException, InvalidTokenOffsetsException { String fieldLessFieldName = fieldManager.getFieldLessFieldName(); Query fixedQuery = fixSuperQuery(query, null, fieldLessFieldName); Analyzer analyzer = fieldManager.getAnalyzerForQuery(); SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(preTag, postTag); Document result = new Document(); for (IndexableField f : document) { String name = f.name(); if (fieldLessFieldName.equals(name) || FIELDS_NOT_TO_HIGHLIGHT.contains(name)) { result.add(f); continue; } String text = f.stringValue(); Number numericValue = f.numericValue(); Query fieldFixedQuery; if (fieldManager.isFieldLessIndexed(name)) { fieldFixedQuery = fixSuperQuery(query, name, fieldLessFieldName); } else { fieldFixedQuery = fixedQuery; } if (numericValue != null) { if (shouldNumberBeHighlighted(name, numericValue, fieldFixedQuery)) { String numberHighlight = preTag + text + postTag; result.add(new StringField(name, numberHighlight, Store.YES)); } } else { Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(fieldFixedQuery, name)); TokenStream tokenStream = TokenSources.getAnyTokenStream(reader, docId, name, analyzer); TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10); for (int j = 0; j < frag.length; j++) { if ((frag[j] != null) && (frag[j].getScore() > 0)) { result.add(new StringField(name, frag[j].toString(), Store.YES)); } } } } return result; }
public static void main(String[] args) throws Exception{ ApplicationContext applicationContext=new ClassPathXmlApplicationContext("applicationContext.xml"); SessionFactory sessionFactory = applicationContext.getBean("hibernate4sessionFactory",SessionFactory.class); FullTextSession fullTextSession = Search.getFullTextSession(sessionFactory.openSession()); //使用Hibernate Search api查询 从多个字段匹配 name、description、authors.name // QueryBuilder qb = fullTextEntityManager.getSearchFactory().buildQueryBuilder().forEntity(Book.class ).get(); // Query luceneQuery = qb.keyword().onFields("name","description","authors.name").matching("移动互联网").createQuery(); //使用lucene api查询 从多个字段匹配 name、description、authors.name //使用庖丁分词器 MultiFieldQueryParser queryParser=new MultiFieldQueryParser(Version.LUCENE_36, new String[]{"name","description","authors.name"}, new PaodingAnalyzer()); Query luceneQuery=queryParser.parse("实战"); FullTextQuery fullTextQuery =fullTextSession.createFullTextQuery(luceneQuery, Book.class); //设置每页显示多少条 fullTextQuery.setMaxResults(5); //设置当前页 fullTextQuery.setFirstResult(0); //高亮设置 SimpleHTMLFormatter formatter=new SimpleHTMLFormatter("<b><font color='red'>", "<font/></b>"); QueryScorer queryScorer=new QueryScorer(luceneQuery); Highlighter highlighter=new Highlighter(formatter, queryScorer); @SuppressWarnings("unchecked") List<Book> resultList = fullTextQuery.list(); System.out.println("共查找到["+resultList.size()+"]条记录"); for (Book book : resultList) { String highlighterString=null; Analyzer analyzer=new PaodingAnalyzer(); try { //高亮name highlighterString=highlighter.getBestFragment(analyzer, "name", book.getName()); if(highlighterString!=null){ book.setName(highlighterString); } //高亮authors.name Set<Author> authors = book.getAuthors(); for (Author author : authors) { highlighterString=highlighter.getBestFragment(analyzer, "authors.name", author.getName()); if(highlighterString!=null){ author.setName(highlighterString); } } //高亮description highlighterString=highlighter.getBestFragment(analyzer, "description", book.getDescription()); if(highlighterString!=null){ book.setDescription(highlighterString); } } catch (Exception e) { } System.out.println("书名:"+book.getName()+"\n描述:"+book.getDescription()+"\n出版日期:"+book.getPublicationDate()); System.out.println("----------------------------------------------------------"); } fullTextSession.close(); sessionFactory.close(); }
@Override public QueryResult<Book> query(String keyword, int start, int pagesize,Analyzer analyzer,String...field) throws Exception{ QueryResult<Book> queryResult=new QueryResult<Book>(); List<Book> books=new ArrayList<Book>(); FullTextSession fullTextSession = Search.getFullTextSession(getSession()); //使用Hibernate Search api查询 从多个字段匹配 name、description、authors.name //QueryBuilder qb = fullTextSession.getSearchFactory().buildQueryBuilder().forEntity(Book.class ).get(); //Query luceneQuery = qb.keyword().onFields(field).matching(keyword).createQuery(); //使用lucene api查询 从多个字段匹配 name、description、authors.name MultiFieldQueryParser queryParser=new MultiFieldQueryParser(Version.LUCENE_36,new String[]{"name","description","authors.name"}, analyzer); Query luceneQuery=queryParser.parse(keyword); FullTextQuery fullTextQuery = fullTextSession.createFullTextQuery(luceneQuery); int searchresultsize = fullTextQuery.getResultSize(); queryResult.setSearchresultsize(searchresultsize); System.out.println("共查找到["+searchresultsize+"]条记录"); fullTextQuery.setFirstResult(start); fullTextQuery.setMaxResults(pagesize); //设置按id排序 fullTextQuery.setSort(new Sort(new SortField("id", SortField.INT ,true))); //高亮设置 SimpleHTMLFormatter formatter=new SimpleHTMLFormatter("<b><font color='red'>", "</font></b>"); QueryScorer queryScorer=new QueryScorer(luceneQuery); Highlighter highlighter=new Highlighter(formatter, queryScorer); @SuppressWarnings("unchecked") List<Book> tempresult = fullTextQuery.list(); for (Book book : tempresult) { String highlighterString=null; try { //高亮name highlighterString=highlighter.getBestFragment(analyzer, "name", book.getName()); if(highlighterString!=null){ book.setName(highlighterString); } //高亮authors.name Set<Author> authors = book.getAuthors(); for (Author author : authors) { highlighterString=highlighter.getBestFragment(analyzer, "authors.name", author.getName()); if(highlighterString!=null){ author.setName(highlighterString); } } //高亮description highlighterString=highlighter.getBestFragment(analyzer, "description", book.getDescription()); if(highlighterString!=null){ book.setDescription(highlighterString); } } catch (Exception e) { } books.add(book); System.out.println("书名:"+book.getName()+"\n描述:"+book.getDescription()+"\n出版日期:"+book.getPublicationDate()); System.out.println("----------------------------------------------------------"); } queryResult.setSearchresult(books); return queryResult; }
public static void main(String[] args) throws Exception{ ApplicationContext applicationContext=new ClassPathXmlApplicationContext("applicationContext.xml"); EntityManagerFactory entityManagerFactory = applicationContext.getBean("entityManagerFactory",EntityManagerFactory.class); FullTextEntityManager fullTextEntityManager = Search.getFullTextEntityManager(entityManagerFactory.createEntityManager()); //使用Hibernate Search api查询 从多个字段匹配 name、description、authors.name // QueryBuilder qb = fullTextEntityManager.getSearchFactory().buildQueryBuilder().forEntity(Book.class ).get(); // Query luceneQuery = qb.keyword().onFields("name","description","authors.name").matching("移动互联网").createQuery(); //使用lucene api查询 从多个字段匹配 name、description、authors.name //使用庖丁分词器 MultiFieldQueryParser queryParser=new MultiFieldQueryParser(Version.LUCENE_36, new String[]{"name","description","authors.name"}, new PaodingAnalyzer()); Query luceneQuery=queryParser.parse("实战"); FullTextQuery fullTextQuery =fullTextEntityManager.createFullTextQuery(luceneQuery, Book.class); //设置每页显示多少条 fullTextQuery.setMaxResults(5); //设置当前页 fullTextQuery.setFirstResult(0); //高亮设置 SimpleHTMLFormatter formatter=new SimpleHTMLFormatter("<b><font color='red'>", "<font/></b>"); QueryScorer queryScorer=new QueryScorer(luceneQuery); Highlighter highlighter=new Highlighter(formatter, queryScorer); @SuppressWarnings("unchecked") List<Book> resultList = fullTextQuery.getResultList(); for (Book book : resultList) { String highlighterString=null; Analyzer analyzer=new PaodingAnalyzer(); try { //高亮name highlighterString=highlighter.getBestFragment(analyzer, "name", book.getName()); if(highlighterString!=null){ book.setName(highlighterString); } //高亮authors.name Set<Author> authors = book.getAuthors(); for (Author author : authors) { highlighterString=highlighter.getBestFragment(analyzer, "authors.name", author.getName()); if(highlighterString!=null){ author.setName(highlighterString); } } //高亮description highlighterString=highlighter.getBestFragment(analyzer, "description", book.getDescription()); if(highlighterString!=null){ book.setDescription(highlighterString); } } catch (Exception e) { } } fullTextEntityManager.close(); entityManagerFactory.close(); }
@Override public QueryResult<Book> query(String keyword, int start, int pagesize,Analyzer analyzer,String...field) throws Exception{ QueryResult<Book> queryResult=new QueryResult<Book>(); List<Book> books=new ArrayList<Book>(); FullTextEntityManager fullTextEntityManager = Search.getFullTextEntityManager(em); //使用Hibernate Search api查询 从多个字段匹配 name、description、authors.name //QueryBuilder qb = fullTextSession.getSearchFactory().buildQueryBuilder().forEntity(Book.class ).get(); //Query luceneQuery = qb.keyword().onFields(field).matching(keyword).createQuery(); //使用lucene api查询 从多个字段匹配 name、description、authors.name MultiFieldQueryParser queryParser=new MultiFieldQueryParser(Version.LUCENE_36,new String[]{"name","description","authors.name"}, analyzer); Query luceneQuery=queryParser.parse(keyword); FullTextQuery fullTextQuery = fullTextEntityManager.createFullTextQuery(luceneQuery); int searchresultsize = fullTextQuery.getResultSize(); queryResult.setSearchresultsize(searchresultsize); fullTextQuery.setFirstResult(start); fullTextQuery.setMaxResults(pagesize); //设置按id排序 fullTextQuery.setSort(new Sort(new SortField("id", SortField.INT ,true))); //高亮设置 SimpleHTMLFormatter formatter=new SimpleHTMLFormatter("<b><font color='red'>", "</font></b>"); QueryScorer queryScorer=new QueryScorer(luceneQuery); Highlighter highlighter=new Highlighter(formatter, queryScorer); @SuppressWarnings("unchecked") List<Book> tempresult = fullTextQuery.getResultList(); for (Book book : tempresult) { String highlighterString=null; try { //高亮name highlighterString=highlighter.getBestFragment(analyzer, "name", book.getName()); if(highlighterString!=null){ book.setName(highlighterString); } //高亮authors.name Set<Author> authors = book.getAuthors(); for (Author author : authors) { highlighterString=highlighter.getBestFragment(analyzer, "authors.name", author.getName()); if(highlighterString!=null){ author.setName(highlighterString); } } //高亮description highlighterString=highlighter.getBestFragment(analyzer, "description", book.getDescription()); if(highlighterString!=null){ book.setDescription(highlighterString); } } catch (Exception e) { } books.add(book); } queryResult.setSearchresult(books); return queryResult; }
public static void main(String[] args) throws Exception { if (args.length != 0) { QUERY = args[0]; } // 将庖丁封装成符合Lucene要求的Analyzer规范 Analyzer analyzer = new PaodingAnalyzer(); //读取本类目录下的text.txt文件 String content = ContentReader.readText(English.class); //接下来是标准的Lucene建立索引和检索的代码 Directory ramDir = new RAMDirectory(); IndexWriter writer = new IndexWriter(ramDir, analyzer); Document doc = new Document(); Field fd = new Field(FIELD_NAME, content, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS); doc.add(fd); writer.addDocument(doc); writer.optimize(); writer.close(); IndexReader reader = IndexReader.open(ramDir); String queryString = QUERY; QueryParser parser = new QueryParser(FIELD_NAME, analyzer); Query query = parser.parse(queryString); Searcher searcher = new IndexSearcher(ramDir); query = query.rewrite(reader); System.out.println("Searching for: " + query.toString(FIELD_NAME)); Hits hits = searcher.search(query); BoldFormatter formatter = new BoldFormatter(); Highlighter highlighter = new Highlighter(formatter, new QueryScorer( query)); highlighter.setTextFragmenter(new SimpleFragmenter(50)); for (int i = 0; i < hits.length(); i++) { String text = hits.doc(i).get(FIELD_NAME); int maxNumFragmentsRequired = 5; String fragmentSeparator = "..."; TermPositionVector tpv = (TermPositionVector) reader .getTermFreqVector(hits.id(i), FIELD_NAME); TokenStream tokenStream = TokenSources.getTokenStream(tpv); String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, fragmentSeparator); System.out.println("\n" + result); } reader.close(); }
public static void main(String[] args) throws Exception { if (args.length != 0) { QUERY = args[0]; } // 将庖丁封装成符合Lucene要求的Analyzer规范 Analyzer analyzer = new PaodingAnalyzer(); //读取本类目录下的text.txt文件 String content = ContentReader.readText(Chinese.class); //接下来是标准的Lucene建立索引和检索的代码 Directory ramDir = new RAMDirectory(); IndexWriter writer = new IndexWriter(ramDir, analyzer); Document doc = new Document(); Field fd = new Field(FIELD_NAME, content, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS); doc.add(fd); writer.addDocument(doc); writer.optimize(); writer.close(); IndexReader reader = IndexReader.open(ramDir); String queryString = QUERY; QueryParser parser = new QueryParser(FIELD_NAME, analyzer); Query query = parser.parse(queryString); Searcher searcher = new IndexSearcher(ramDir); query = query.rewrite(reader); System.out.println("Searching for: " + query.toString(FIELD_NAME)); Hits hits = searcher.search(query); BoldFormatter formatter = new BoldFormatter(); Highlighter highlighter = new Highlighter(formatter, new QueryScorer( query)); highlighter.setTextFragmenter(new SimpleFragmenter(50)); for (int i = 0; i < hits.length(); i++) { String text = hits.doc(i).get(FIELD_NAME); int maxNumFragmentsRequired = 5; String fragmentSeparator = "..."; TermPositionVector tpv = (TermPositionVector) reader .getTermFreqVector(hits.id(i), FIELD_NAME); TokenStream tokenStream = TokenSources.getTokenStream(tpv); String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, fragmentSeparator); System.out.println("\n" + result); } reader.close(); }