Java 类org.apache.lucene.search.highlight.TokenSources 实例源码

项目:miru    文件:LuceneBackedQueryParser.java   
@Override
public String highlight(String locale, boolean useStopWords, String query, String content, String pre, String post, int preview) {
    Analyzer analyzer = termAnalyzers.findAnalyzer(locale, useStopWords);
    QueryParser parser = new QueryParser(defaultField, analyzer);

    String summary = null;
    try {
        SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(pre, post);
        Highlighter hg = new Highlighter(formatter, new QueryTermScorer(parser.parse(query)));
        hg.setMaxDocCharsToAnalyze(preview);
        hg.setTextFragmenter(new SimpleFragmenter(100));

        TokenStream tokens = TokenSources.getTokenStream(defaultField, content, analyzer);
        summary = hg.getBestFragments(tokens, content, 4, " ... ");
    } catch (InvalidTokenOffsetsException | IOException | ParseException ex) {
        LOG.error("Failed to highlight", ex);
    }

    return StringUtils.isBlank(summary) ? null : summary;
}
项目:t4f-data    文件:HighlightTest.java   
public void testHits() throws Exception {
    IndexSearcher searcher = new IndexSearcher(TestUtil.getBookIndexDirectory());
    TermQuery query = new TermQuery(new Term("title", "action"));
    TopDocs hits = searcher.search(query, 10);

    QueryScorer scorer = new QueryScorer(query, "title");
    Highlighter highlighter = new Highlighter(scorer);
    highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));

    Analyzer analyzer = new SimpleAnalyzer();

    for (ScoreDoc sd : hits.scoreDocs) {
        StoredDocument doc = searcher.doc(sd.doc);
        String title = doc.get("title");

        TokenStream stream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), sd.doc, "title", doc,
                analyzer);
        String fragment = highlighter.getBestFragment(stream, title);

        LOGGER.info(fragment);
    }
}
项目:tripod    文件:LuceneService.java   
/**
 * Performs highlighting for a given query and a given document.
 *
 * @param indexSearcher the IndexSearcher performing the query
 * @param query the Tripod LuceneQuery
 * @param scoreDoc the Lucene ScoreDoc
 * @param doc the Lucene Document
 * @param highlighter the Highlighter to use
 * @param result the QueryResult to add the highlights to
 * @throws IOException if an error occurs performing the highlighting
 * @throws InvalidTokenOffsetsException if an error occurs performing the highlighting
 */
protected void performHighlighting(final IndexSearcher indexSearcher, final Query query, final ScoreDoc scoreDoc,
                                   final Document doc, final Highlighter highlighter, final QR result)
        throws IOException, InvalidTokenOffsetsException {

    if (query.getHighlightFields() == null || query.getHighlightFields().isEmpty()) {
        return;
    }

    final List<Highlight> highlights = new ArrayList<>();
    final List<String> hlFieldNames = getHighlightFieldNames(query, doc);

    // process each field to highlight on
    for (String hlField : hlFieldNames) {
        final String text = doc.get(hlField);
        if (StringUtils.isEmpty(text)) {
            continue;
        }

        final List<String> snippets = new ArrayList<>();
        final Fields tvFields = indexSearcher.getIndexReader().getTermVectors(scoreDoc.doc);
        final int maxStartOffset = highlighter.getMaxDocCharsToAnalyze() -1;

        // get the snippets for the given field
        final TokenStream tokenStream = TokenSources.getTokenStream(hlField, tvFields, text, analyzer, maxStartOffset);
        final TextFragment[] textFragments = highlighter.getBestTextFragments(tokenStream, text, false, 10);
        for (TextFragment textFragment : textFragments) {
            if (textFragment != null && textFragment.getScore() > 0) {
                snippets.add(textFragment.toString());
            }
        }

        // if we have snippets then add a highlight result to the QueryResult
        if (snippets.size() > 0) {
            highlights.add(new Highlight(hlField, snippets));
        }
    }

    result.setHighlights(highlights);
}
项目:search    文件:SearchTravRetHighlightTask.java   
@Override
protected BenchmarkHighlighter getBenchmarkHighlighter(Query q){
  highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(q));
  highlighter.setMaxDocCharsToAnalyze(maxDocCharsToAnalyze);
  return new BenchmarkHighlighter(){
    @Override
    public int doHighlight(IndexReader reader, int doc, String field,
        Document document, Analyzer analyzer, String text) throws Exception {
      TokenStream ts = TokenSources.getAnyTokenStream(reader, doc, field, document, analyzer);
      TextFragment[] frag = highlighter.getBestTextFragments(ts, text, mergeContiguous, maxFrags);
      return frag != null ? frag.length : 0;
    }
  };
}
项目:search    文件:CountingHighlighterTestTask.java   
@Override
public BenchmarkHighlighter getBenchmarkHighlighter(Query q) {
  highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(q));
  return new BenchmarkHighlighter() {
    @Override
    public int doHighlight(IndexReader reader, int doc, String field, Document document, Analyzer analyzer, String text) throws Exception {
      TokenStream ts = TokenSources.getAnyTokenStream(reader, doc, field, document, analyzer);
      TextFragment[] frag = highlighter.getBestTextFragments(ts, text, mergeContiguous, maxFrags);
      numHighlightedResults += frag != null ? frag.length : 0;
      return frag != null ? frag.length : 0;
    }
  };
}
项目:NYBC    文件:SearchTravRetHighlightTask.java   
@Override
protected BenchmarkHighlighter getBenchmarkHighlighter(Query q){
  highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(q));
  highlighter.setMaxDocCharsToAnalyze(maxDocCharsToAnalyze);
  return new BenchmarkHighlighter(){
    @Override
    public int doHighlight(IndexReader reader, int doc, String field,
        Document document, Analyzer analyzer, String text) throws Exception {
      TokenStream ts = TokenSources.getAnyTokenStream(reader, doc, field, document, analyzer);
      TextFragment[] frag = highlighter.getBestTextFragments(ts, text, mergeContiguous, maxFrags);
      return frag != null ? frag.length : 0;
    }
  };
}
项目:NYBC    文件:CountingHighlighterTestTask.java   
@Override
public BenchmarkHighlighter getBenchmarkHighlighter(Query q) {
  highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(q));
  return new BenchmarkHighlighter() {
    @Override
    public int doHighlight(IndexReader reader, int doc, String field, Document document, Analyzer analyzer, String text) throws Exception {
      TokenStream ts = TokenSources.getAnyTokenStream(reader, doc, field, document, analyzer);
      TextFragment[] frag = highlighter.getBestTextFragments(ts, text, mergeContiguous, maxFrags);
      numHighlightedResults += frag != null ? frag.length : 0;
      return frag != null ? frag.length : 0;
    }
  };
}
项目:read-open-source-code    文件:SearchTravRetHighlightTask.java   
@Override
protected BenchmarkHighlighter getBenchmarkHighlighter(Query q){
  highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(q));
  highlighter.setMaxDocCharsToAnalyze(maxDocCharsToAnalyze);
  return new BenchmarkHighlighter(){
    @Override
    public int doHighlight(IndexReader reader, int doc, String field,
        Document document, Analyzer analyzer, String text) throws Exception {
      TokenStream ts = TokenSources.getAnyTokenStream(reader, doc, field, document, analyzer);
      TextFragment[] frag = highlighter.getBestTextFragments(ts, text, mergeContiguous, maxFrags);
      return frag != null ? frag.length : 0;
    }
  };
}
项目:read-open-source-code    文件:SearchTravRetHighlightTask.java   
@Override
protected BenchmarkHighlighter getBenchmarkHighlighter(Query q){
  highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(q));
  highlighter.setMaxDocCharsToAnalyze(maxDocCharsToAnalyze);
  return new BenchmarkHighlighter(){
    @Override
    public int doHighlight(IndexReader reader, int doc, String field,
        Document document, Analyzer analyzer, String text) throws Exception {
      TokenStream ts = TokenSources.getAnyTokenStream(reader, doc, field, document, analyzer);
      TextFragment[] frag = highlighter.getBestTextFragments(ts, text, mergeContiguous, maxFrags);
      return frag != null ? frag.length : 0;
    }
  };
}
项目:Maskana-Gestor-de-Conocimiento    文件:SearchTravRetHighlightTask.java   
@Override
protected BenchmarkHighlighter getBenchmarkHighlighter(Query q){
  highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(q));
  highlighter.setMaxDocCharsToAnalyze(maxDocCharsToAnalyze);
  return new BenchmarkHighlighter(){
    @Override
    public int doHighlight(IndexReader reader, int doc, String field,
        Document document, Analyzer analyzer, String text) throws Exception {
      TokenStream ts = TokenSources.getAnyTokenStream(reader, doc, field, document, analyzer);
      TextFragment[] frag = highlighter.getBestTextFragments(ts, text, mergeContiguous, maxFrags);
      return frag != null ? frag.length : 0;
    }
  };
}
项目:Maskana-Gestor-de-Conocimiento    文件:CountingHighlighterTestTask.java   
@Override
public BenchmarkHighlighter getBenchmarkHighlighter(Query q) {
  highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(q));
  return new BenchmarkHighlighter() {
    @Override
    public int doHighlight(IndexReader reader, int doc, String field, Document document, Analyzer analyzer, String text) throws Exception {
      TokenStream ts = TokenSources.getAnyTokenStream(reader, doc, field, document, analyzer);
      TextFragment[] frag = highlighter.getBestTextFragments(ts, text, mergeContiguous, maxFrags);
      numHighlightedResults += frag != null ? frag.length : 0;
      return frag != null ? frag.length : 0;
    }
  };
}
项目:rrs    文件:SearchController.java   
private String searchData(String key) throws IOException, ParseException, InvalidTokenOffsetsException {
    Directory directory = FSDirectory.open(new File(filePath));
    IndexSearcher indexSearcher = new IndexSearcher(directory);

    QueryParser queryParser = new QueryParser(Version.LUCENE_31, "foods",
            new SmartChineseAnalyzer(Version.LUCENE_31, true));
    //queryParser.setDefaultOperator(Operator.AND);

    Query query = queryParser.parse(key);
    TopDocs docs = indexSearcher.search(query, 10);

    QueryScorer queryScorer = new QueryScorer(query, "foods");
    Highlighter highlighter = new Highlighter(queryScorer);
    highlighter.setTextFragmenter(new SimpleSpanFragmenter(queryScorer));


    List<SearchResult> searchResults = new ArrayList<SearchResult>();

    if (docs != null) {
        for (ScoreDoc scoreDoc : docs.scoreDocs) {
            Document doc = indexSearcher.doc(scoreDoc.doc);
            TokenStream tokenStream = TokenSources.getAnyTokenStream(
                    indexSearcher.getIndexReader(), scoreDoc.doc, "foods", doc, 
                    new SmartChineseAnalyzer(Version.LUCENE_31, true));
            SearchResult searchResult = new SearchResult();
            searchResult.setRestaurantId(Long.valueOf(doc.get("id")));
            searchResult.setRestaurantName(doc.get("restaurant_name"));
            searchResult.setKey(key);
            searchResult.setFoods(Arrays.asList(highlighter.
                    getBestFragment(tokenStream, doc.get("foods")).split(" ")));
            searchResults.add(searchResult);
        }
    } else {
        searchResults = null;
    }

    indexSearcher.close();
    directory.close();

    return new Gson().toJson(searchResults);
}
项目:incubator-blur    文件:HighlightHelper.java   
/**
 * NOTE: This method will not preserve the correct field types.
 * 
 * @param preTag
 * @param postTag
 */
public static Document highlight(int docId, Document document, Query query, FieldManager fieldManager,
    IndexReader reader, String preTag, String postTag) throws IOException, InvalidTokenOffsetsException {

  String fieldLessFieldName = fieldManager.getFieldLessFieldName();

  Query fixedQuery = fixSuperQuery(query, null, fieldLessFieldName);

  Analyzer analyzer = fieldManager.getAnalyzerForQuery();

  SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(preTag, postTag);
  Document result = new Document();
  for (IndexableField f : document) {
    String name = f.name();
    if (fieldLessFieldName.equals(name) || FIELDS_NOT_TO_HIGHLIGHT.contains(name)) {
      result.add(f);
      continue;
    }
    String text = f.stringValue();
    Number numericValue = f.numericValue();

    Query fieldFixedQuery;
    if (fieldManager.isFieldLessIndexed(name)) {
      fieldFixedQuery = fixSuperQuery(query, name, fieldLessFieldName);
    } else {
      fieldFixedQuery = fixedQuery;
    }

    if (numericValue != null) {
      if (shouldNumberBeHighlighted(name, numericValue, fieldFixedQuery)) {
        String numberHighlight = preTag + text + postTag;
        result.add(new StringField(name, numberHighlight, Store.YES));
      }
    } else {
      Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(fieldFixedQuery, name));
      TokenStream tokenStream = TokenSources.getAnyTokenStream(reader, docId, name, analyzer);
      TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);
      for (int j = 0; j < frag.length; j++) {
        if ((frag[j] != null) && (frag[j].getScore() > 0)) {
          result.add(new StringField(name, frag[j].toString(), Store.YES));
        }
      }
    }
  }
  return result;
}
项目:paoding-for-lucene-2.4    文件:English.java   
public static void main(String[] args) throws Exception {
    if (args.length != 0) {
        QUERY = args[0];
    }
    // 将庖丁封装成符合Lucene要求的Analyzer规范
    Analyzer analyzer = new PaodingAnalyzer();

    //读取本类目录下的text.txt文件
    String content = ContentReader.readText(English.class);

    //接下来是标准的Lucene建立索引和检索的代码
    Directory ramDir = new RAMDirectory();
    IndexWriter writer = new IndexWriter(ramDir, analyzer);
    Document doc = new Document();
    Field fd = new Field(FIELD_NAME, content, Field.Store.YES,
            Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
    doc.add(fd);
    writer.addDocument(doc);
    writer.optimize();
    writer.close();

    IndexReader reader = IndexReader.open(ramDir);
    String queryString = QUERY;
    QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
    Query query = parser.parse(queryString);
    Searcher searcher = new IndexSearcher(ramDir);
    query = query.rewrite(reader);
    System.out.println("Searching for: " + query.toString(FIELD_NAME));
    Hits hits = searcher.search(query);

    BoldFormatter formatter = new BoldFormatter();
    Highlighter highlighter = new Highlighter(formatter, new QueryScorer(
            query));
    highlighter.setTextFragmenter(new SimpleFragmenter(50));
    for (int i = 0; i < hits.length(); i++) {
        String text = hits.doc(i).get(FIELD_NAME);
        int maxNumFragmentsRequired = 5;
        String fragmentSeparator = "...";
        TermPositionVector tpv = (TermPositionVector) reader
                .getTermFreqVector(hits.id(i), FIELD_NAME);
        TokenStream tokenStream = TokenSources.getTokenStream(tpv);
        String result = highlighter.getBestFragments(tokenStream, text,
                maxNumFragmentsRequired, fragmentSeparator);
        System.out.println("\n" + result);
    }
    reader.close();
}
项目:paoding-for-lucene-2.4    文件:Chinese.java   
public static void main(String[] args) throws Exception {
    if (args.length != 0) {
        QUERY = args[0];
    }
    // 将庖丁封装成符合Lucene要求的Analyzer规范
    Analyzer analyzer = new PaodingAnalyzer();

    //读取本类目录下的text.txt文件
    String content = ContentReader.readText(Chinese.class);

    //接下来是标准的Lucene建立索引和检索的代码
    Directory ramDir = new RAMDirectory();
    IndexWriter writer = new IndexWriter(ramDir, analyzer);
    Document doc = new Document();
    Field fd = new Field(FIELD_NAME, content, Field.Store.YES,
            Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
    doc.add(fd);
    writer.addDocument(doc);
    writer.optimize();
    writer.close();

    IndexReader reader = IndexReader.open(ramDir);
    String queryString = QUERY;
    QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
    Query query = parser.parse(queryString);
    Searcher searcher = new IndexSearcher(ramDir);
    query = query.rewrite(reader);
    System.out.println("Searching for: " + query.toString(FIELD_NAME));
    Hits hits = searcher.search(query);

    BoldFormatter formatter = new BoldFormatter();
    Highlighter highlighter = new Highlighter(formatter, new QueryScorer(
            query));
    highlighter.setTextFragmenter(new SimpleFragmenter(50));
    for (int i = 0; i < hits.length(); i++) {
        String text = hits.doc(i).get(FIELD_NAME);
        int maxNumFragmentsRequired = 5;
        String fragmentSeparator = "...";
        TermPositionVector tpv = (TermPositionVector) reader
                .getTermFreqVector(hits.id(i), FIELD_NAME);
        TokenStream tokenStream = TokenSources.getTokenStream(tpv);
        String result = highlighter.getBestFragments(tokenStream, text,
                maxNumFragmentsRequired, fragmentSeparator);
        System.out.println("\n" + result);
    }
    reader.close();
}
项目:paoding-for-lucene-2.4    文件:Chinese.java   
public static void main(String[] args) throws Exception {
    if (args.length != 0) {
        QUERY = args[0];
    }
    // 将庖丁封装成符合Lucene要求的Analyzer规范
    Analyzer analyzer = new PaodingAnalyzer();

    //读取本类目录下的text.txt文件
    String content = ContentReader.readText(Chinese.class);

    //接下来是标准的Lucene建立索引和检索的代码
    Directory ramDir = new RAMDirectory();
    IndexWriter writer = new IndexWriter(ramDir, analyzer);
    Document doc = new Document();
    Field fd = new Field(FIELD_NAME, content, Field.Store.YES,
            Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
    doc.add(fd);
    writer.addDocument(doc);
    writer.optimize();
    writer.close();

    IndexReader reader = IndexReader.open(ramDir);
    String queryString = QUERY;
    QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
    Query query = parser.parse(queryString);
    Searcher searcher = new IndexSearcher(ramDir);
    query = query.rewrite(reader);
    System.out.println("Searching for: " + query.toString(FIELD_NAME));
    Hits hits = searcher.search(query);

    BoldFormatter formatter = new BoldFormatter();
    Highlighter highlighter = new Highlighter(formatter, new QueryScorer(
            query));
    highlighter.setTextFragmenter(new SimpleFragmenter(50));
    for (int i = 0; i < hits.length(); i++) {
        String text = hits.doc(i).get(FIELD_NAME);
        int maxNumFragmentsRequired = 5;
        String fragmentSeparator = "...";
        TermPositionVector tpv = (TermPositionVector) reader
                .getTermFreqVector(hits.id(i), FIELD_NAME);
        TokenStream tokenStream = TokenSources.getTokenStream(tpv);
        String result = highlighter.getBestFragments(tokenStream, text,
                maxNumFragmentsRequired, fragmentSeparator);
        System.out.println("\n" + result);
    }
    reader.close();
}
项目:paoding-for-lucene-2.4    文件:Chinese.java   
public static void main(String[] args) throws Exception {
    if (args.length != 0) {
        QUERY = args[0];
    }
    // 将庖丁封装成符合Lucene要求的Analyzer规范
    Analyzer analyzer = new PaodingAnalyzer();

    //读取本类目录下的text.txt文件
    String content = ContentReader.readText(Chinese.class);

    //接下来是标准的Lucene建立索引和检索的代码
    Directory ramDir = new RAMDirectory();
    IndexWriter writer = new IndexWriter(ramDir, analyzer);
    Document doc = new Document();
    Field fd = new Field(FIELD_NAME, content, Field.Store.YES,
            Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
    doc.add(fd);
    writer.addDocument(doc);
    writer.optimize();
    writer.close();

    IndexReader reader = IndexReader.open(ramDir);
    String queryString = QUERY;
    QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
    Query query = parser.parse(queryString);
    Searcher searcher = new IndexSearcher(ramDir);
    query = query.rewrite(reader);
    System.out.println("Searching for: " + query.toString(FIELD_NAME));
    Hits hits = searcher.search(query);

    BoldFormatter formatter = new BoldFormatter();
    Highlighter highlighter = new Highlighter(formatter, new QueryScorer(
            query));
    highlighter.setTextFragmenter(new SimpleFragmenter(50));
    for (int i = 0; i < hits.length(); i++) {
        String text = hits.doc(i).get(FIELD_NAME);
        int maxNumFragmentsRequired = 5;
        String fragmentSeparator = "...";
        TermPositionVector tpv = (TermPositionVector) reader
                .getTermFreqVector(hits.id(i), FIELD_NAME);
        TokenStream tokenStream = TokenSources.getTokenStream(tpv);
        String result = highlighter.getBestFragments(tokenStream, text,
                maxNumFragmentsRequired, fragmentSeparator);
        System.out.println("\n" + result);
    }
    reader.close();
}
项目:paoding-for-lucene-2.4    文件:Chinese.java   
public static void main(String[] args) throws Exception {
    if (args.length != 0) {
        QUERY = args[0];
    }
    // 将庖丁封装成符合Lucene要求的Analyzer规范
    Analyzer analyzer = new PaodingAnalyzer();

    //读取本类目录下的text.txt文件
    String content = ContentReader.readText(Chinese.class);

    //接下来是标准的Lucene建立索引和检索的代码
    Directory ramDir = new RAMDirectory();
    IndexWriter writer = new IndexWriter(ramDir, analyzer);
    Document doc = new Document();
    Field fd = new Field(FIELD_NAME, content, Field.Store.YES,
            Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
    doc.add(fd);
    writer.addDocument(doc);
    writer.optimize();
    writer.close();

    IndexReader reader = IndexReader.open(ramDir);
    String queryString = QUERY;
    QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
    Query query = parser.parse(queryString);
    Searcher searcher = new IndexSearcher(ramDir);
    query = query.rewrite(reader);
    System.out.println("Searching for: " + query.toString(FIELD_NAME));
    Hits hits = searcher.search(query);

    BoldFormatter formatter = new BoldFormatter();
    Highlighter highlighter = new Highlighter(formatter, new QueryScorer(
            query));
    highlighter.setTextFragmenter(new SimpleFragmenter(50));
    for (int i = 0; i < hits.length(); i++) {
        String text = hits.doc(i).get(FIELD_NAME);
        int maxNumFragmentsRequired = 5;
        String fragmentSeparator = "...";
        TermPositionVector tpv = (TermPositionVector) reader
                .getTermFreqVector(hits.id(i), FIELD_NAME);
        TokenStream tokenStream = TokenSources.getTokenStream(tpv);
        String result = highlighter.getBestFragments(tokenStream, text,
                maxNumFragmentsRequired, fragmentSeparator);
        System.out.println("\n" + result);
    }
    reader.close();
}
项目:CadalWorkspace    文件:English.java   
public static void main(String[] args) throws Exception {
    if (args.length != 0) {
        QUERY = args[0];
    }
    // 将庖丁封装成符合Lucene要求的Analyzer规范
    Analyzer analyzer = new PaodingAnalyzer();

    //读取本类目录下的text.txt文件
    String content = ContentReader.readText(English.class);

    //接下来是标准的Lucene建立索引和检索的代码
    Directory ramDir = new RAMDirectory();
    IndexWriter writer = new IndexWriter(ramDir, analyzer);
    Document doc = new Document();
    Field fd = new Field(FIELD_NAME, content, Field.Store.YES,
            Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
    doc.add(fd);
    writer.addDocument(doc);
    writer.optimize();
    writer.close();

    IndexReader reader = IndexReader.open(ramDir);
    String queryString = QUERY;
    QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
    Query query = parser.parse(queryString);
    Searcher searcher = new IndexSearcher(ramDir);
    query = query.rewrite(reader);
    System.out.println("Searching for: " + query.toString(FIELD_NAME));
    Hits hits = searcher.search(query);

    BoldFormatter formatter = new BoldFormatter();
    Highlighter highlighter = new Highlighter(formatter, new QueryScorer(
            query));
    highlighter.setTextFragmenter(new SimpleFragmenter(50));
    for (int i = 0; i < hits.length(); i++) {
        String text = hits.doc(i).get(FIELD_NAME);
        int maxNumFragmentsRequired = 5;
        String fragmentSeparator = "...";
        TermPositionVector tpv = (TermPositionVector) reader
                .getTermFreqVector(hits.id(i), FIELD_NAME);
        TokenStream tokenStream = TokenSources.getTokenStream(tpv);
        String result = highlighter.getBestFragments(tokenStream, text,
                maxNumFragmentsRequired, fragmentSeparator);
        System.out.println("\n" + result);
    }
    reader.close();
}
项目:CadalWorkspace    文件:Chinese.java   
public static void main(String[] args) throws Exception {
    if (args.length != 0) {
        QUERY = args[0];
    }
    // 将庖丁封装成符合Lucene要求的Analyzer规范
    Analyzer analyzer = new PaodingAnalyzer();

    //读取本类目录下的text.txt文件
    String content = ContentReader.readText(Chinese.class);

    //接下来是标准的Lucene建立索引和检索的代码
    Directory ramDir = new RAMDirectory();
    IndexWriter writer = new IndexWriter(ramDir, analyzer);
    Document doc = new Document();
    Field fd = new Field(FIELD_NAME, content, Field.Store.YES,
            Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
    doc.add(fd);
    writer.addDocument(doc);
    writer.optimize();
    writer.close();

    IndexReader reader = IndexReader.open(ramDir);
    String queryString = QUERY;
    QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
    Query query = parser.parse(queryString);
    Searcher searcher = new IndexSearcher(ramDir);
    query = query.rewrite(reader);
    System.out.println("Searching for: " + query.toString(FIELD_NAME));
    Hits hits = searcher.search(query);

    BoldFormatter formatter = new BoldFormatter();
    Highlighter highlighter = new Highlighter(formatter, new QueryScorer(
            query));
    highlighter.setTextFragmenter(new SimpleFragmenter(50));
    for (int i = 0; i < hits.length(); i++) {
        String text = hits.doc(i).get(FIELD_NAME);
        int maxNumFragmentsRequired = 5;
        String fragmentSeparator = "...";
        TermPositionVector tpv = (TermPositionVector) reader
                .getTermFreqVector(hits.id(i), FIELD_NAME);
        TokenStream tokenStream = TokenSources.getTokenStream(tpv);
        String result = highlighter.getBestFragments(tokenStream, text,
                maxNumFragmentsRequired, fragmentSeparator);
        System.out.println("\n" + result);
    }
    reader.close();
}
项目:CadalWorkspace    文件:Chinese.java   
public static void main(String[] args) throws Exception {
    if (args.length != 0) {
        QUERY = args[0];
    }
    // 将庖丁封装成符合Lucene要求的Analyzer规范
    Analyzer analyzer = new PaodingAnalyzer();

    //读取本类目录下的text.txt文件
    String content = ContentReader.readText(Chinese.class);

    //接下来是标准的Lucene建立索引和检索的代码
    Directory ramDir = new RAMDirectory();
    IndexWriter writer = new IndexWriter(ramDir, analyzer);
    Document doc = new Document();
    Field fd = new Field(FIELD_NAME, content, Field.Store.YES,
            Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
    doc.add(fd);
    writer.addDocument(doc);
    writer.optimize();
    writer.close();

    IndexReader reader = IndexReader.open(ramDir);
    String queryString = QUERY;
    QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
    Query query = parser.parse(queryString);
    Searcher searcher = new IndexSearcher(ramDir);
    query = query.rewrite(reader);
    System.out.println("Searching for: " + query.toString(FIELD_NAME));
    Hits hits = searcher.search(query);

    BoldFormatter formatter = new BoldFormatter();
    Highlighter highlighter = new Highlighter(formatter, new QueryScorer(
            query));
    highlighter.setTextFragmenter(new SimpleFragmenter(50));
    for (int i = 0; i < hits.length(); i++) {
        String text = hits.doc(i).get(FIELD_NAME);
        int maxNumFragmentsRequired = 5;
        String fragmentSeparator = "...";
        TermPositionVector tpv = (TermPositionVector) reader
                .getTermFreqVector(hits.id(i), FIELD_NAME);
        TokenStream tokenStream = TokenSources.getTokenStream(tpv);
        String result = highlighter.getBestFragments(tokenStream, text,
                maxNumFragmentsRequired, fragmentSeparator);
        System.out.println("\n" + result);
    }
    reader.close();
}
项目:CadalWorkspace    文件:Chinese.java   
public static void main(String[] args) throws Exception {
    if (args.length != 0) {
        QUERY = args[0];
    }
    // 将庖丁封装成符合Lucene要求的Analyzer规范
    Analyzer analyzer = new PaodingAnalyzer();

    //读取本类目录下的text.txt文件
    String content = ContentReader.readText(Chinese.class);

    //接下来是标准的Lucene建立索引和检索的代码
    Directory ramDir = new RAMDirectory();
    IndexWriter writer = new IndexWriter(ramDir, analyzer);
    Document doc = new Document();
    Field fd = new Field(FIELD_NAME, content, Field.Store.YES,
            Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
    doc.add(fd);
    writer.addDocument(doc);
    writer.optimize();
    writer.close();

    IndexReader reader = IndexReader.open(ramDir);
    String queryString = QUERY;
    QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
    Query query = parser.parse(queryString);
    Searcher searcher = new IndexSearcher(ramDir);
    query = query.rewrite(reader);
    System.out.println("Searching for: " + query.toString(FIELD_NAME));
    Hits hits = searcher.search(query);

    BoldFormatter formatter = new BoldFormatter();
    Highlighter highlighter = new Highlighter(formatter, new QueryScorer(
            query));
    highlighter.setTextFragmenter(new SimpleFragmenter(50));
    for (int i = 0; i < hits.length(); i++) {
        String text = hits.doc(i).get(FIELD_NAME);
        int maxNumFragmentsRequired = 5;
        String fragmentSeparator = "...";
        TermPositionVector tpv = (TermPositionVector) reader
                .getTermFreqVector(hits.id(i), FIELD_NAME);
        TokenStream tokenStream = TokenSources.getTokenStream(tpv);
        String result = highlighter.getBestFragments(tokenStream, text,
                maxNumFragmentsRequired, fragmentSeparator);
        System.out.println("\n" + result);
    }
    reader.close();
}
项目:CadalWorkspace    文件:Chinese.java   
public static void main(String[] args) throws Exception {
    if (args.length != 0) {
        QUERY = args[0];
    }
    // 将庖丁封装成符合Lucene要求的Analyzer规范
    Analyzer analyzer = new PaodingAnalyzer();

    //读取本类目录下的text.txt文件
    String content = ContentReader.readText(Chinese.class);

    //接下来是标准的Lucene建立索引和检索的代码
    Directory ramDir = new RAMDirectory();
    IndexWriter writer = new IndexWriter(ramDir, analyzer);
    Document doc = new Document();
    Field fd = new Field(FIELD_NAME, content, Field.Store.YES,
            Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
    doc.add(fd);
    writer.addDocument(doc);
    writer.optimize();
    writer.close();

    IndexReader reader = IndexReader.open(ramDir);
    String queryString = QUERY;
    QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
    Query query = parser.parse(queryString);
    Searcher searcher = new IndexSearcher(ramDir);
    query = query.rewrite(reader);
    System.out.println("Searching for: " + query.toString(FIELD_NAME));
    Hits hits = searcher.search(query);

    BoldFormatter formatter = new BoldFormatter();
    Highlighter highlighter = new Highlighter(formatter, new QueryScorer(
            query));
    highlighter.setTextFragmenter(new SimpleFragmenter(50));
    for (int i = 0; i < hits.length(); i++) {
        String text = hits.doc(i).get(FIELD_NAME);
        int maxNumFragmentsRequired = 5;
        String fragmentSeparator = "...";
        TermPositionVector tpv = (TermPositionVector) reader
                .getTermFreqVector(hits.id(i), FIELD_NAME);
        TokenStream tokenStream = TokenSources.getTokenStream(tpv);
        String result = highlighter.getBestFragments(tokenStream, text,
                maxNumFragmentsRequired, fragmentSeparator);
        System.out.println("\n" + result);
    }
    reader.close();
}
项目:CadalWorkspace    文件:English.java   
public static void main(String[] args) throws Exception {
    if (args.length != 0) {
        QUERY = args[0];
    }
    // 将庖丁封装成符合Lucene要求的Analyzer规范
    Analyzer analyzer = new PaodingAnalyzer();

    //读取本类目录下的text.txt文件
    String content = ContentReader.readText(English.class);

    //接下来是标准的Lucene建立索引和检索的代码
    Directory ramDir = new RAMDirectory();
    IndexWriter writer = new IndexWriter(ramDir, analyzer);
    Document doc = new Document();
    Field fd = new Field(FIELD_NAME, content, Field.Store.YES,
            Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
    doc.add(fd);
    writer.addDocument(doc);
    writer.optimize();
    writer.close();

    IndexReader reader = IndexReader.open(ramDir);
    String queryString = QUERY;
    QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
    Query query = parser.parse(queryString);
    Searcher searcher = new IndexSearcher(ramDir);
    query = query.rewrite(reader);
    System.out.println("Searching for: " + query.toString(FIELD_NAME));
    Hits hits = searcher.search(query);

    BoldFormatter formatter = new BoldFormatter();
    Highlighter highlighter = new Highlighter(formatter, new QueryScorer(
            query));
    highlighter.setTextFragmenter(new SimpleFragmenter(50));
    for (int i = 0; i < hits.length(); i++) {
        String text = hits.doc(i).get(FIELD_NAME);
        int maxNumFragmentsRequired = 5;
        String fragmentSeparator = "...";
        TermPositionVector tpv = (TermPositionVector) reader
                .getTermFreqVector(hits.id(i), FIELD_NAME);
        TokenStream tokenStream = TokenSources.getTokenStream(tpv);
        String result = highlighter.getBestFragments(tokenStream, text,
                maxNumFragmentsRequired, fragmentSeparator);
        System.out.println("\n" + result);
    }
    reader.close();
}
项目:CadalWorkspace    文件:Chinese.java   
public static void main(String[] args) throws Exception {
    if (args.length != 0) {
        QUERY = args[0];
    }
    // 将庖丁封装成符合Lucene要求的Analyzer规范
    Analyzer analyzer = new PaodingAnalyzer();

    //读取本类目录下的text.txt文件
    String content = ContentReader.readText(Chinese.class);

    //接下来是标准的Lucene建立索引和检索的代码
    Directory ramDir = new RAMDirectory();
    IndexWriter writer = new IndexWriter(ramDir, analyzer);
    Document doc = new Document();
    Field fd = new Field(FIELD_NAME, content, Field.Store.YES,
            Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
    doc.add(fd);
    writer.addDocument(doc);
    writer.optimize();
    writer.close();

    IndexReader reader = IndexReader.open(ramDir);
    String queryString = QUERY;
    QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
    Query query = parser.parse(queryString);
    Searcher searcher = new IndexSearcher(ramDir);
    query = query.rewrite(reader);
    System.out.println("Searching for: " + query.toString(FIELD_NAME));
    Hits hits = searcher.search(query);

    BoldFormatter formatter = new BoldFormatter();
    Highlighter highlighter = new Highlighter(formatter, new QueryScorer(
            query));
    highlighter.setTextFragmenter(new SimpleFragmenter(50));
    for (int i = 0; i < hits.length(); i++) {
        String text = hits.doc(i).get(FIELD_NAME);
        int maxNumFragmentsRequired = 5;
        String fragmentSeparator = "...";
        TermPositionVector tpv = (TermPositionVector) reader
                .getTermFreqVector(hits.id(i), FIELD_NAME);
        TokenStream tokenStream = TokenSources.getTokenStream(tpv);
        String result = highlighter.getBestFragments(tokenStream, text,
                maxNumFragmentsRequired, fragmentSeparator);
        System.out.println("\n" + result);
    }
    reader.close();
}
项目:CadalWorkspace    文件:Chinese.java   
public static void main(String[] args) throws Exception {
    if (args.length != 0) {
        QUERY = args[0];
    }
    // 将庖丁封装成符合Lucene要求的Analyzer规范
    Analyzer analyzer = new PaodingAnalyzer();

    //读取本类目录下的text.txt文件
    String content = ContentReader.readText(Chinese.class);

    //接下来是标准的Lucene建立索引和检索的代码
    Directory ramDir = new RAMDirectory();
    IndexWriter writer = new IndexWriter(ramDir, analyzer);
    Document doc = new Document();
    Field fd = new Field(FIELD_NAME, content, Field.Store.YES,
            Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
    doc.add(fd);
    writer.addDocument(doc);
    writer.optimize();
    writer.close();

    IndexReader reader = IndexReader.open(ramDir);
    String queryString = QUERY;
    QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
    Query query = parser.parse(queryString);
    Searcher searcher = new IndexSearcher(ramDir);
    query = query.rewrite(reader);
    System.out.println("Searching for: " + query.toString(FIELD_NAME));
    Hits hits = searcher.search(query);

    BoldFormatter formatter = new BoldFormatter();
    Highlighter highlighter = new Highlighter(formatter, new QueryScorer(
            query));
    highlighter.setTextFragmenter(new SimpleFragmenter(50));
    for (int i = 0; i < hits.length(); i++) {
        String text = hits.doc(i).get(FIELD_NAME);
        int maxNumFragmentsRequired = 5;
        String fragmentSeparator = "...";
        TermPositionVector tpv = (TermPositionVector) reader
                .getTermFreqVector(hits.id(i), FIELD_NAME);
        TokenStream tokenStream = TokenSources.getTokenStream(tpv);
        String result = highlighter.getBestFragments(tokenStream, text,
                maxNumFragmentsRequired, fragmentSeparator);
        System.out.println("\n" + result);
    }
    reader.close();
}
项目:CadalWorkspace    文件:Chinese.java   
public static void main(String[] args) throws Exception {
    if (args.length != 0) {
        QUERY = args[0];
    }
    // 将庖丁封装成符合Lucene要求的Analyzer规范
    Analyzer analyzer = new PaodingAnalyzer();

    //读取本类目录下的text.txt文件
    String content = ContentReader.readText(Chinese.class);

    //接下来是标准的Lucene建立索引和检索的代码
    Directory ramDir = new RAMDirectory();
    IndexWriter writer = new IndexWriter(ramDir, analyzer);
    Document doc = new Document();
    Field fd = new Field(FIELD_NAME, content, Field.Store.YES,
            Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
    doc.add(fd);
    writer.addDocument(doc);
    writer.optimize();
    writer.close();

    IndexReader reader = IndexReader.open(ramDir);
    String queryString = QUERY;
    QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
    Query query = parser.parse(queryString);
    Searcher searcher = new IndexSearcher(ramDir);
    query = query.rewrite(reader);
    System.out.println("Searching for: " + query.toString(FIELD_NAME));
    Hits hits = searcher.search(query);

    BoldFormatter formatter = new BoldFormatter();
    Highlighter highlighter = new Highlighter(formatter, new QueryScorer(
            query));
    highlighter.setTextFragmenter(new SimpleFragmenter(50));
    for (int i = 0; i < hits.length(); i++) {
        String text = hits.doc(i).get(FIELD_NAME);
        int maxNumFragmentsRequired = 5;
        String fragmentSeparator = "...";
        TermPositionVector tpv = (TermPositionVector) reader
                .getTermFreqVector(hits.id(i), FIELD_NAME);
        TokenStream tokenStream = TokenSources.getTokenStream(tpv);
        String result = highlighter.getBestFragments(tokenStream, text,
                maxNumFragmentsRequired, fragmentSeparator);
        System.out.println("\n" + result);
    }
    reader.close();
}
项目:CadalWorkspace    文件:Chinese.java   
public static void main(String[] args) throws Exception {
    if (args.length != 0) {
        QUERY = args[0];
    }
    // 将庖丁封装成符合Lucene要求的Analyzer规范
    Analyzer analyzer = new PaodingAnalyzer();

    //读取本类目录下的text.txt文件
    String content = ContentReader.readText(Chinese.class);

    //接下来是标准的Lucene建立索引和检索的代码
    Directory ramDir = new RAMDirectory();
    IndexWriter writer = new IndexWriter(ramDir, analyzer);
    Document doc = new Document();
    Field fd = new Field(FIELD_NAME, content, Field.Store.YES,
            Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
    doc.add(fd);
    writer.addDocument(doc);
    writer.optimize();
    writer.close();

    IndexReader reader = IndexReader.open(ramDir);
    String queryString = QUERY;
    QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
    Query query = parser.parse(queryString);
    Searcher searcher = new IndexSearcher(ramDir);
    query = query.rewrite(reader);
    System.out.println("Searching for: " + query.toString(FIELD_NAME));
    Hits hits = searcher.search(query);

    BoldFormatter formatter = new BoldFormatter();
    Highlighter highlighter = new Highlighter(formatter, new QueryScorer(
            query));
    highlighter.setTextFragmenter(new SimpleFragmenter(50));
    for (int i = 0; i < hits.length(); i++) {
        String text = hits.doc(i).get(FIELD_NAME);
        int maxNumFragmentsRequired = 5;
        String fragmentSeparator = "...";
        TermPositionVector tpv = (TermPositionVector) reader
                .getTermFreqVector(hits.id(i), FIELD_NAME);
        TokenStream tokenStream = TokenSources.getTokenStream(tpv);
        String result = highlighter.getBestFragments(tokenStream, text,
                maxNumFragmentsRequired, fragmentSeparator);
        System.out.println("\n" + result);
    }
    reader.close();
}