Java 类org.apache.lucene.search.BoostAttribute 实例源码

项目:elasticsearch_my    文件:TermVectorsResponse.java   
private void buildTerm(XContentBuilder builder, final CharsRefBuilder spare, Terms curTerms, TermsEnum termIter, BoostAttribute boostAtt) throws IOException {
    // start term, optimized writing
    BytesRef term = termIter.next();
    spare.copyUTF8Bytes(term);
    builder.startObject(spare.toString());
    buildTermStatistics(builder, termIter);
    // finally write the term vectors
    PostingsEnum posEnum = termIter.postings(null, PostingsEnum.ALL);
    int termFreq = posEnum.freq();
    builder.field(FieldStrings.TERM_FREQ, termFreq);
    initMemory(curTerms, termFreq);
    initValues(curTerms, posEnum, termFreq);
    buildValues(builder, curTerms, termFreq);
    buildScore(builder, boostAtt);
    builder.endObject();
}
项目:Elasticsearch    文件:TermVectorsResponse.java   
private void buildTerm(XContentBuilder builder, final CharsRefBuilder spare, Terms curTerms, TermsEnum termIter, BoostAttribute boostAtt) throws IOException {
    // start term, optimized writing
    BytesRef term = termIter.next();
    spare.copyUTF8Bytes(term);
    builder.startObject(spare.toString());
    buildTermStatistics(builder, termIter);
    // finally write the term vectors
    PostingsEnum posEnum = termIter.postings(null, PostingsEnum.ALL);
    int termFreq = posEnum.freq();
    builder.field(FieldStrings.TERM_FREQ, termFreq);
    initMemory(curTerms, termFreq);
    initValues(curTerms, posEnum, termFreq);
    buildValues(builder, curTerms, termFreq);
    buildScore(builder, boostAtt);
    builder.endObject();
}
项目:elasticsearch_my    文件:TermVectorsResponse.java   
private void buildField(XContentBuilder builder, final CharsRefBuilder spare, Fields theFields, Iterator<String> fieldIter) throws IOException {
    String fieldName = fieldIter.next();
    builder.startObject(fieldName);
    Terms curTerms = theFields.terms(fieldName);
    // write field statistics
    buildFieldStatistics(builder, curTerms);
    builder.startObject(FieldStrings.TERMS);
    TermsEnum termIter = curTerms.iterator();
    BoostAttribute boostAtt = termIter.attributes().addAttribute(BoostAttribute.class);
    for (int i = 0; i < curTerms.size(); i++) {
        buildTerm(builder, spare, curTerms, termIter, boostAtt);
    }
    builder.endObject();
    builder.endObject();
}
项目:Elasticsearch    文件:TermVectorsResponse.java   
private void buildField(XContentBuilder builder, final CharsRefBuilder spare, Fields theFields, Iterator<String> fieldIter) throws IOException {
    String fieldName = fieldIter.next();
    builder.startObject(fieldName);
    Terms curTerms = theFields.terms(fieldName);
    // write field statistics
    buildFieldStatistics(builder, curTerms);
    builder.startObject(FieldStrings.TERMS);
    TermsEnum termIter = curTerms.iterator();
    BoostAttribute boostAtt = termIter.attributes().addAttribute(BoostAttribute.class);
    for (int i = 0; i < curTerms.size(); i++) {
        buildTerm(builder, spare, curTerms, termIter, boostAtt);
    }
    builder.endObject();
    builder.endObject();
}
项目:elasticsearch_my    文件:TermVectorsResponse.java   
private void buildScore(XContentBuilder builder, BoostAttribute boostAtt) throws IOException {
    if (hasScores) {
        builder.field(FieldStrings.SCORE, boostAtt.getBoost());
    }
}
项目:Elasticsearch    文件:TermVectorsResponse.java   
private void buildScore(XContentBuilder builder, BoostAttribute boostAtt) throws IOException {
    if (hasScores) {
        builder.field(FieldStrings.SCORE, boostAtt.getBoost());
    }
}
项目:search    文件:DirectSpellChecker.java   
/**
 * Provide spelling corrections based on several parameters.
 *
 * @param term The term to suggest spelling corrections for
 * @param numSug The maximum number of spelling corrections
 * @param ir The index reader to fetch the candidate spelling corrections from
 * @param docfreq The minimum document frequency a potential suggestion need to have in order to be included
 * @param editDistance The maximum edit distance candidates are allowed to have
 * @param accuracy The minimum accuracy a suggested spelling correction needs to have in order to be included
 * @param spare a chars scratch
 * @return a collection of spelling corrections sorted by <code>ScoreTerm</code>'s natural order.
 * @throws IOException If I/O related errors occur
 */
protected Collection<ScoreTerm> suggestSimilar(Term term, int numSug, IndexReader ir, int docfreq, int editDistance,
                                               float accuracy, final CharsRefBuilder spare) throws IOException {

  AttributeSource atts = new AttributeSource();
  MaxNonCompetitiveBoostAttribute maxBoostAtt =
    atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
  Terms terms = MultiFields.getTerms(ir, term.field());
  if (terms == null) {
    return Collections.emptyList();
  }
  FuzzyTermsEnum e = new FuzzyTermsEnum(terms, atts, term, editDistance, Math.max(minPrefix, editDistance-1), true);
  final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<>();

  BytesRef queryTerm = new BytesRef(term.text());
  BytesRef candidateTerm;
  ScoreTerm st = new ScoreTerm();
  BoostAttribute boostAtt =
    e.attributes().addAttribute(BoostAttribute.class);
  while ((candidateTerm = e.next()) != null) {
    final float boost = boostAtt.getBoost();
    // ignore uncompetitive hits
    if (stQueue.size() >= numSug && boost <= stQueue.peek().boost)
      continue;

    // ignore exact match of the same term
    if (queryTerm.bytesEquals(candidateTerm))
      continue;

    int df = e.docFreq();

    // check docFreq if required
    if (df <= docfreq)
      continue;

    final float score;
    final String termAsString;
    if (distance == INTERNAL_LEVENSHTEIN) {
      // delay creating strings until the end
      termAsString = null;
      // undo FuzzyTermsEnum's scale factor for a real scaled lev score
      score = boost / e.getScaleFactor() + e.getMinSimilarity();
    } else {
      spare.copyUTF8Bytes(candidateTerm);
      termAsString = spare.toString();
      score = distance.getDistance(term.text(), termAsString);
    }

    if (score < accuracy)
      continue;

    // add new entry in PQ
    st.term = BytesRef.deepCopyOf(candidateTerm);
    st.boost = boost;
    st.docfreq = df;
    st.termAsString = termAsString;
    st.score = score;
    stQueue.offer(st);
    // possibly drop entries from queue
    st = (stQueue.size() > numSug) ? stQueue.poll() : new ScoreTerm();
    maxBoostAtt.setMaxNonCompetitiveBoost((stQueue.size() >= numSug) ? stQueue.peek().boost : Float.NEGATIVE_INFINITY);
  }

  return stQueue;
}
项目:NYBC    文件:DirectSpellChecker.java   
/**
 * Provide spelling corrections based on several parameters.
 *
 * @param term The term to suggest spelling corrections for
 * @param numSug The maximum number of spelling corrections
 * @param ir The index reader to fetch the candidate spelling corrections from
 * @param docfreq The minimum document frequency a potential suggestion need to have in order to be included
 * @param editDistance The maximum edit distance candidates are allowed to have
 * @param accuracy The minimum accuracy a suggested spelling correction needs to have in order to be included
 * @param spare a chars scratch
 * @return a collection of spelling corrections sorted by <code>ScoreTerm</code>'s natural order.
 * @throws IOException If I/O related errors occur
 */
protected Collection<ScoreTerm> suggestSimilar(Term term, int numSug, IndexReader ir, int docfreq, int editDistance,
                                               float accuracy, final CharsRef spare) throws IOException {

  AttributeSource atts = new AttributeSource();
  MaxNonCompetitiveBoostAttribute maxBoostAtt =
    atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
  Terms terms = MultiFields.getTerms(ir, term.field());
  if (terms == null) {
    return Collections.emptyList();
  }
  FuzzyTermsEnum e = new FuzzyTermsEnum(terms, atts, term, editDistance, Math.max(minPrefix, editDistance-1), true);
  final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<ScoreTerm>();

  BytesRef queryTerm = new BytesRef(term.text());
  BytesRef candidateTerm;
  ScoreTerm st = new ScoreTerm();
  BoostAttribute boostAtt =
    e.attributes().addAttribute(BoostAttribute.class);
  while ((candidateTerm = e.next()) != null) {
    final float boost = boostAtt.getBoost();
    // ignore uncompetitive hits
    if (stQueue.size() >= numSug && boost <= stQueue.peek().boost)
      continue;

    // ignore exact match of the same term
    if (queryTerm.bytesEquals(candidateTerm))
      continue;

    int df = e.docFreq();

    // check docFreq if required
    if (df <= docfreq)
      continue;

    final float score;
    final String termAsString;
    if (distance == INTERNAL_LEVENSHTEIN) {
      // delay creating strings until the end
      termAsString = null;
      // undo FuzzyTermsEnum's scale factor for a real scaled lev score
      score = boost / e.getScaleFactor() + e.getMinSimilarity();
    } else {
      UnicodeUtil.UTF8toUTF16(candidateTerm, spare);
      termAsString = spare.toString();
      score = distance.getDistance(term.text(), termAsString);
    }

    if (score < accuracy)
      continue;

    // add new entry in PQ
    st.term = BytesRef.deepCopyOf(candidateTerm);
    st.boost = boost;
    st.docfreq = df;
    st.termAsString = termAsString;
    st.score = score;
    stQueue.offer(st);
    // possibly drop entries from queue
    st = (stQueue.size() > numSug) ? stQueue.poll() : new ScoreTerm();
    maxBoostAtt.setMaxNonCompetitiveBoost((stQueue.size() >= numSug) ? stQueue.peek().boost : Float.NEGATIVE_INFINITY);
  }

  return stQueue;
}
项目:read-open-source-code    文件:DirectSpellChecker.java   
/**
 * Provide spelling corrections based on several parameters.
 *
 * @param term The term to suggest spelling corrections for
 * @param numSug The maximum number of spelling corrections
 * @param ir The index reader to fetch the candidate spelling corrections from
 * @param docfreq The minimum document frequency a potential suggestion need to have in order to be included
 * @param editDistance The maximum edit distance candidates are allowed to have
 * @param accuracy The minimum accuracy a suggested spelling correction needs to have in order to be included
 * @param spare a chars scratch
 * @return a collection of spelling corrections sorted by <code>ScoreTerm</code>'s natural order.
 * @throws IOException If I/O related errors occur
 */
protected Collection<ScoreTerm> suggestSimilar(Term term, int numSug, IndexReader ir, int docfreq, int editDistance,
                                               float accuracy, final CharsRef spare) throws IOException {

  AttributeSource atts = new AttributeSource();
  MaxNonCompetitiveBoostAttribute maxBoostAtt =
    atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
  Terms terms = MultiFields.getTerms(ir, term.field());
  if (terms == null) {
    return Collections.emptyList();
  }
  FuzzyTermsEnum e = new FuzzyTermsEnum(terms, atts, term, editDistance, Math.max(minPrefix, editDistance-1), true);
  final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<ScoreTerm>();

  BytesRef queryTerm = new BytesRef(term.text());
  BytesRef candidateTerm;
  ScoreTerm st = new ScoreTerm();
  BoostAttribute boostAtt =
    e.attributes().addAttribute(BoostAttribute.class);
  while ((candidateTerm = e.next()) != null) {
    final float boost = boostAtt.getBoost();
    // ignore uncompetitive hits
    if (stQueue.size() >= numSug && boost <= stQueue.peek().boost)
      continue;

    // ignore exact match of the same term
    if (queryTerm.bytesEquals(candidateTerm))
      continue;

    int df = e.docFreq();

    // check docFreq if required
    if (df <= docfreq)
      continue;

    final float score;
    final String termAsString;
    if (distance == INTERNAL_LEVENSHTEIN) {
      // delay creating strings until the end
      termAsString = null;
      // undo FuzzyTermsEnum's scale factor for a real scaled lev score
      score = boost / e.getScaleFactor() + e.getMinSimilarity();
    } else {
      UnicodeUtil.UTF8toUTF16(candidateTerm, spare);
      termAsString = spare.toString();
      score = distance.getDistance(term.text(), termAsString);
    }

    if (score < accuracy)
      continue;

    // add new entry in PQ
    st.term = BytesRef.deepCopyOf(candidateTerm);
    st.boost = boost;
    st.docfreq = df;
    st.termAsString = termAsString;
    st.score = score;
    stQueue.offer(st);
    // possibly drop entries from queue
    st = (stQueue.size() > numSug) ? stQueue.poll() : new ScoreTerm();
    maxBoostAtt.setMaxNonCompetitiveBoost((stQueue.size() >= numSug) ? stQueue.peek().boost : Float.NEGATIVE_INFINITY);
  }

  return stQueue;
}
项目:read-open-source-code    文件:DirectSpellChecker.java   
/**
 * Provide spelling corrections based on several parameters.
 *
 * @param term The term to suggest spelling corrections for
 * @param numSug The maximum number of spelling corrections
 * @param ir The index reader to fetch the candidate spelling corrections from
 * @param docfreq The minimum document frequency a potential suggestion need to have in order to be included
 * @param editDistance The maximum edit distance candidates are allowed to have
 * @param accuracy The minimum accuracy a suggested spelling correction needs to have in order to be included
 * @param spare a chars scratch
 * @return a collection of spelling corrections sorted by <code>ScoreTerm</code>'s natural order.
 * @throws IOException If I/O related errors occur
 */
protected Collection<ScoreTerm> suggestSimilar(Term term, int numSug, IndexReader ir, int docfreq, int editDistance,
                                               float accuracy, final CharsRef spare) throws IOException {

  AttributeSource atts = new AttributeSource();
  MaxNonCompetitiveBoostAttribute maxBoostAtt =
    atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
  Terms terms = MultiFields.getTerms(ir, term.field());
  if (terms == null) {
    return Collections.emptyList();
  }
  FuzzyTermsEnum e = new FuzzyTermsEnum(terms, atts, term, editDistance, Math.max(minPrefix, editDistance-1), true);
  final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<ScoreTerm>();

  BytesRef queryTerm = new BytesRef(term.text());
  BytesRef candidateTerm;
  ScoreTerm st = new ScoreTerm();
  BoostAttribute boostAtt =
    e.attributes().addAttribute(BoostAttribute.class);
  while ((candidateTerm = e.next()) != null) {
    final float boost = boostAtt.getBoost();
    // ignore uncompetitive hits
    if (stQueue.size() >= numSug && boost <= stQueue.peek().boost)
      continue;

    // ignore exact match of the same term
    if (queryTerm.bytesEquals(candidateTerm))
      continue;

    int df = e.docFreq();

    // check docFreq if required
    if (df <= docfreq)
      continue;

    final float score;
    final String termAsString;
    if (distance == INTERNAL_LEVENSHTEIN) {
      // delay creating strings until the end
      termAsString = null;
      // undo FuzzyTermsEnum's scale factor for a real scaled lev score
      score = boost / e.getScaleFactor() + e.getMinSimilarity();
    } else {
      UnicodeUtil.UTF8toUTF16(candidateTerm, spare);
      termAsString = spare.toString();
      score = distance.getDistance(term.text(), termAsString);
    }

    if (score < accuracy)
      continue;

    // add new entry in PQ
    st.term = BytesRef.deepCopyOf(candidateTerm);
    st.boost = boost;
    st.docfreq = df;
    st.termAsString = termAsString;
    st.score = score;
    stQueue.offer(st);
    // possibly drop entries from queue
    st = (stQueue.size() > numSug) ? stQueue.poll() : new ScoreTerm();
    maxBoostAtt.setMaxNonCompetitiveBoost((stQueue.size() >= numSug) ? stQueue.peek().boost : Float.NEGATIVE_INFINITY);
  }

  return stQueue;
}
项目:read-open-source-code    文件:DirectSpellChecker.java   
/**
 * Provide spelling corrections based on several parameters.
 *
 * @param term The term to suggest spelling corrections for
 * @param numSug The maximum number of spelling corrections
 * @param ir The index reader to fetch the candidate spelling corrections from
 * @param docfreq The minimum document frequency a potential suggestion need to have in order to be included
 * @param editDistance The maximum edit distance candidates are allowed to have
 * @param accuracy The minimum accuracy a suggested spelling correction needs to have in order to be included
 * @param spare a chars scratch
 * @return a collection of spelling corrections sorted by <code>ScoreTerm</code>'s natural order.
 * @throws IOException If I/O related errors occur
 */
protected Collection<ScoreTerm> suggestSimilar(Term term, int numSug, IndexReader ir, int docfreq, int editDistance,
                                               float accuracy, final CharsRefBuilder spare) throws IOException {

  AttributeSource atts = new AttributeSource();
  MaxNonCompetitiveBoostAttribute maxBoostAtt =
    atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
  Terms terms = MultiFields.getTerms(ir, term.field());
  if (terms == null) {
    return Collections.emptyList();
  }
  FuzzyTermsEnum e = new FuzzyTermsEnum(terms, atts, term, editDistance, Math.max(minPrefix, editDistance-1), true);
  final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<>();

  BytesRef queryTerm = new BytesRef(term.text());
  BytesRef candidateTerm;
  ScoreTerm st = new ScoreTerm();
  BoostAttribute boostAtt =
    e.attributes().addAttribute(BoostAttribute.class);
  while ((candidateTerm = e.next()) != null) {
    final float boost = boostAtt.getBoost();
    // ignore uncompetitive hits
    if (stQueue.size() >= numSug && boost <= stQueue.peek().boost)
      continue;

    // ignore exact match of the same term
    if (queryTerm.bytesEquals(candidateTerm))
      continue;

    int df = e.docFreq();

    // check docFreq if required
    if (df <= docfreq)
      continue;

    final float score;
    final String termAsString;
    if (distance == INTERNAL_LEVENSHTEIN) {
      // delay creating strings until the end
      termAsString = null;
      // undo FuzzyTermsEnum's scale factor for a real scaled lev score
      score = boost / e.getScaleFactor() + e.getMinSimilarity();
    } else {
      spare.copyUTF8Bytes(candidateTerm);
      termAsString = spare.toString();
      score = distance.getDistance(term.text(), termAsString);
    }

    if (score < accuracy)
      continue;

    // add new entry in PQ
    st.term = BytesRef.deepCopyOf(candidateTerm);
    st.boost = boost;
    st.docfreq = df;
    st.termAsString = termAsString;
    st.score = score;
    stQueue.offer(st);
    // possibly drop entries from queue
    st = (stQueue.size() > numSug) ? stQueue.poll() : new ScoreTerm();
    maxBoostAtt.setMaxNonCompetitiveBoost((stQueue.size() >= numSug) ? stQueue.peek().boost : Float.NEGATIVE_INFINITY);
  }

  return stQueue;
}
项目:Maskana-Gestor-de-Conocimiento    文件:DirectSpellChecker.java   
/**
 * Provide spelling corrections based on several parameters.
 *
 * @param term The term to suggest spelling corrections for
 * @param numSug The maximum number of spelling corrections
 * @param ir The index reader to fetch the candidate spelling corrections from
 * @param docfreq The minimum document frequency a potential suggestion need to have in order to be included
 * @param editDistance The maximum edit distance candidates are allowed to have
 * @param accuracy The minimum accuracy a suggested spelling correction needs to have in order to be included
 * @param spare a chars scratch
 * @return a collection of spelling corrections sorted by <code>ScoreTerm</code>'s natural order.
 * @throws IOException If I/O related errors occur
 */
protected Collection<ScoreTerm> suggestSimilar(Term term, int numSug, IndexReader ir, int docfreq, int editDistance,
                                               float accuracy, final CharsRef spare) throws IOException {

  AttributeSource atts = new AttributeSource();
  MaxNonCompetitiveBoostAttribute maxBoostAtt =
    atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
  Terms terms = MultiFields.getTerms(ir, term.field());
  if (terms == null) {
    return Collections.emptyList();
  }
  FuzzyTermsEnum e = new FuzzyTermsEnum(terms, atts, term, editDistance, Math.max(minPrefix, editDistance-1), true);
  final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<ScoreTerm>();

  BytesRef queryTerm = new BytesRef(term.text());
  BytesRef candidateTerm;
  ScoreTerm st = new ScoreTerm();
  BoostAttribute boostAtt =
    e.attributes().addAttribute(BoostAttribute.class);
  while ((candidateTerm = e.next()) != null) {
    final float boost = boostAtt.getBoost();
    // ignore uncompetitive hits
    if (stQueue.size() >= numSug && boost <= stQueue.peek().boost)
      continue;

    // ignore exact match of the same term
    if (queryTerm.bytesEquals(candidateTerm))
      continue;

    int df = e.docFreq();

    // check docFreq if required
    if (df <= docfreq)
      continue;

    final float score;
    final String termAsString;
    if (distance == INTERNAL_LEVENSHTEIN) {
      // delay creating strings until the end
      termAsString = null;
      // undo FuzzyTermsEnum's scale factor for a real scaled lev score
      score = boost / e.getScaleFactor() + e.getMinSimilarity();
    } else {
      UnicodeUtil.UTF8toUTF16(candidateTerm, spare);
      termAsString = spare.toString();
      score = distance.getDistance(term.text(), termAsString);
    }

    if (score < accuracy)
      continue;

    // add new entry in PQ
    st.term = BytesRef.deepCopyOf(candidateTerm);
    st.boost = boost;
    st.docfreq = df;
    st.termAsString = termAsString;
    st.score = score;
    stQueue.offer(st);
    // possibly drop entries from queue
    st = (stQueue.size() > numSug) ? stQueue.poll() : new ScoreTerm();
    maxBoostAtt.setMaxNonCompetitiveBoost((stQueue.size() >= numSug) ? stQueue.peek().boost : Float.NEGATIVE_INFINITY);
  }

  return stQueue;
}