Java 类org.apache.lucene.search.BoostAttribute 实例源码
项目:elasticsearch_my
文件:TermVectorsResponse.java
private void buildTerm(XContentBuilder builder, final CharsRefBuilder spare, Terms curTerms, TermsEnum termIter, BoostAttribute boostAtt) throws IOException {
// start term, optimized writing
BytesRef term = termIter.next();
spare.copyUTF8Bytes(term);
builder.startObject(spare.toString());
buildTermStatistics(builder, termIter);
// finally write the term vectors
PostingsEnum posEnum = termIter.postings(null, PostingsEnum.ALL);
int termFreq = posEnum.freq();
builder.field(FieldStrings.TERM_FREQ, termFreq);
initMemory(curTerms, termFreq);
initValues(curTerms, posEnum, termFreq);
buildValues(builder, curTerms, termFreq);
buildScore(builder, boostAtt);
builder.endObject();
}
项目:Elasticsearch
文件:TermVectorsResponse.java
private void buildTerm(XContentBuilder builder, final CharsRefBuilder spare, Terms curTerms, TermsEnum termIter, BoostAttribute boostAtt) throws IOException {
// start term, optimized writing
BytesRef term = termIter.next();
spare.copyUTF8Bytes(term);
builder.startObject(spare.toString());
buildTermStatistics(builder, termIter);
// finally write the term vectors
PostingsEnum posEnum = termIter.postings(null, PostingsEnum.ALL);
int termFreq = posEnum.freq();
builder.field(FieldStrings.TERM_FREQ, termFreq);
initMemory(curTerms, termFreq);
initValues(curTerms, posEnum, termFreq);
buildValues(builder, curTerms, termFreq);
buildScore(builder, boostAtt);
builder.endObject();
}
项目:elasticsearch_my
文件:TermVectorsResponse.java
private void buildField(XContentBuilder builder, final CharsRefBuilder spare, Fields theFields, Iterator<String> fieldIter) throws IOException {
String fieldName = fieldIter.next();
builder.startObject(fieldName);
Terms curTerms = theFields.terms(fieldName);
// write field statistics
buildFieldStatistics(builder, curTerms);
builder.startObject(FieldStrings.TERMS);
TermsEnum termIter = curTerms.iterator();
BoostAttribute boostAtt = termIter.attributes().addAttribute(BoostAttribute.class);
for (int i = 0; i < curTerms.size(); i++) {
buildTerm(builder, spare, curTerms, termIter, boostAtt);
}
builder.endObject();
builder.endObject();
}
项目:Elasticsearch
文件:TermVectorsResponse.java
private void buildField(XContentBuilder builder, final CharsRefBuilder spare, Fields theFields, Iterator<String> fieldIter) throws IOException {
String fieldName = fieldIter.next();
builder.startObject(fieldName);
Terms curTerms = theFields.terms(fieldName);
// write field statistics
buildFieldStatistics(builder, curTerms);
builder.startObject(FieldStrings.TERMS);
TermsEnum termIter = curTerms.iterator();
BoostAttribute boostAtt = termIter.attributes().addAttribute(BoostAttribute.class);
for (int i = 0; i < curTerms.size(); i++) {
buildTerm(builder, spare, curTerms, termIter, boostAtt);
}
builder.endObject();
builder.endObject();
}
项目:elasticsearch_my
文件:TermVectorsResponse.java
private void buildScore(XContentBuilder builder, BoostAttribute boostAtt) throws IOException {
if (hasScores) {
builder.field(FieldStrings.SCORE, boostAtt.getBoost());
}
}
项目:Elasticsearch
文件:TermVectorsResponse.java
private void buildScore(XContentBuilder builder, BoostAttribute boostAtt) throws IOException {
if (hasScores) {
builder.field(FieldStrings.SCORE, boostAtt.getBoost());
}
}
项目:search
文件:DirectSpellChecker.java
/**
* Provide spelling corrections based on several parameters.
*
* @param term The term to suggest spelling corrections for
* @param numSug The maximum number of spelling corrections
* @param ir The index reader to fetch the candidate spelling corrections from
* @param docfreq The minimum document frequency a potential suggestion need to have in order to be included
* @param editDistance The maximum edit distance candidates are allowed to have
* @param accuracy The minimum accuracy a suggested spelling correction needs to have in order to be included
* @param spare a chars scratch
* @return a collection of spelling corrections sorted by <code>ScoreTerm</code>'s natural order.
* @throws IOException If I/O related errors occur
*/
protected Collection<ScoreTerm> suggestSimilar(Term term, int numSug, IndexReader ir, int docfreq, int editDistance,
float accuracy, final CharsRefBuilder spare) throws IOException {
AttributeSource atts = new AttributeSource();
MaxNonCompetitiveBoostAttribute maxBoostAtt =
atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
Terms terms = MultiFields.getTerms(ir, term.field());
if (terms == null) {
return Collections.emptyList();
}
FuzzyTermsEnum e = new FuzzyTermsEnum(terms, atts, term, editDistance, Math.max(minPrefix, editDistance-1), true);
final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<>();
BytesRef queryTerm = new BytesRef(term.text());
BytesRef candidateTerm;
ScoreTerm st = new ScoreTerm();
BoostAttribute boostAtt =
e.attributes().addAttribute(BoostAttribute.class);
while ((candidateTerm = e.next()) != null) {
final float boost = boostAtt.getBoost();
// ignore uncompetitive hits
if (stQueue.size() >= numSug && boost <= stQueue.peek().boost)
continue;
// ignore exact match of the same term
if (queryTerm.bytesEquals(candidateTerm))
continue;
int df = e.docFreq();
// check docFreq if required
if (df <= docfreq)
continue;
final float score;
final String termAsString;
if (distance == INTERNAL_LEVENSHTEIN) {
// delay creating strings until the end
termAsString = null;
// undo FuzzyTermsEnum's scale factor for a real scaled lev score
score = boost / e.getScaleFactor() + e.getMinSimilarity();
} else {
spare.copyUTF8Bytes(candidateTerm);
termAsString = spare.toString();
score = distance.getDistance(term.text(), termAsString);
}
if (score < accuracy)
continue;
// add new entry in PQ
st.term = BytesRef.deepCopyOf(candidateTerm);
st.boost = boost;
st.docfreq = df;
st.termAsString = termAsString;
st.score = score;
stQueue.offer(st);
// possibly drop entries from queue
st = (stQueue.size() > numSug) ? stQueue.poll() : new ScoreTerm();
maxBoostAtt.setMaxNonCompetitiveBoost((stQueue.size() >= numSug) ? stQueue.peek().boost : Float.NEGATIVE_INFINITY);
}
return stQueue;
}
项目:NYBC
文件:DirectSpellChecker.java
/**
* Provide spelling corrections based on several parameters.
*
* @param term The term to suggest spelling corrections for
* @param numSug The maximum number of spelling corrections
* @param ir The index reader to fetch the candidate spelling corrections from
* @param docfreq The minimum document frequency a potential suggestion need to have in order to be included
* @param editDistance The maximum edit distance candidates are allowed to have
* @param accuracy The minimum accuracy a suggested spelling correction needs to have in order to be included
* @param spare a chars scratch
* @return a collection of spelling corrections sorted by <code>ScoreTerm</code>'s natural order.
* @throws IOException If I/O related errors occur
*/
protected Collection<ScoreTerm> suggestSimilar(Term term, int numSug, IndexReader ir, int docfreq, int editDistance,
float accuracy, final CharsRef spare) throws IOException {
AttributeSource atts = new AttributeSource();
MaxNonCompetitiveBoostAttribute maxBoostAtt =
atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
Terms terms = MultiFields.getTerms(ir, term.field());
if (terms == null) {
return Collections.emptyList();
}
FuzzyTermsEnum e = new FuzzyTermsEnum(terms, atts, term, editDistance, Math.max(minPrefix, editDistance-1), true);
final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<ScoreTerm>();
BytesRef queryTerm = new BytesRef(term.text());
BytesRef candidateTerm;
ScoreTerm st = new ScoreTerm();
BoostAttribute boostAtt =
e.attributes().addAttribute(BoostAttribute.class);
while ((candidateTerm = e.next()) != null) {
final float boost = boostAtt.getBoost();
// ignore uncompetitive hits
if (stQueue.size() >= numSug && boost <= stQueue.peek().boost)
continue;
// ignore exact match of the same term
if (queryTerm.bytesEquals(candidateTerm))
continue;
int df = e.docFreq();
// check docFreq if required
if (df <= docfreq)
continue;
final float score;
final String termAsString;
if (distance == INTERNAL_LEVENSHTEIN) {
// delay creating strings until the end
termAsString = null;
// undo FuzzyTermsEnum's scale factor for a real scaled lev score
score = boost / e.getScaleFactor() + e.getMinSimilarity();
} else {
UnicodeUtil.UTF8toUTF16(candidateTerm, spare);
termAsString = spare.toString();
score = distance.getDistance(term.text(), termAsString);
}
if (score < accuracy)
continue;
// add new entry in PQ
st.term = BytesRef.deepCopyOf(candidateTerm);
st.boost = boost;
st.docfreq = df;
st.termAsString = termAsString;
st.score = score;
stQueue.offer(st);
// possibly drop entries from queue
st = (stQueue.size() > numSug) ? stQueue.poll() : new ScoreTerm();
maxBoostAtt.setMaxNonCompetitiveBoost((stQueue.size() >= numSug) ? stQueue.peek().boost : Float.NEGATIVE_INFINITY);
}
return stQueue;
}
项目:read-open-source-code
文件:DirectSpellChecker.java
/**
* Provide spelling corrections based on several parameters.
*
* @param term The term to suggest spelling corrections for
* @param numSug The maximum number of spelling corrections
* @param ir The index reader to fetch the candidate spelling corrections from
* @param docfreq The minimum document frequency a potential suggestion need to have in order to be included
* @param editDistance The maximum edit distance candidates are allowed to have
* @param accuracy The minimum accuracy a suggested spelling correction needs to have in order to be included
* @param spare a chars scratch
* @return a collection of spelling corrections sorted by <code>ScoreTerm</code>'s natural order.
* @throws IOException If I/O related errors occur
*/
protected Collection<ScoreTerm> suggestSimilar(Term term, int numSug, IndexReader ir, int docfreq, int editDistance,
float accuracy, final CharsRef spare) throws IOException {
AttributeSource atts = new AttributeSource();
MaxNonCompetitiveBoostAttribute maxBoostAtt =
atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
Terms terms = MultiFields.getTerms(ir, term.field());
if (terms == null) {
return Collections.emptyList();
}
FuzzyTermsEnum e = new FuzzyTermsEnum(terms, atts, term, editDistance, Math.max(minPrefix, editDistance-1), true);
final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<ScoreTerm>();
BytesRef queryTerm = new BytesRef(term.text());
BytesRef candidateTerm;
ScoreTerm st = new ScoreTerm();
BoostAttribute boostAtt =
e.attributes().addAttribute(BoostAttribute.class);
while ((candidateTerm = e.next()) != null) {
final float boost = boostAtt.getBoost();
// ignore uncompetitive hits
if (stQueue.size() >= numSug && boost <= stQueue.peek().boost)
continue;
// ignore exact match of the same term
if (queryTerm.bytesEquals(candidateTerm))
continue;
int df = e.docFreq();
// check docFreq if required
if (df <= docfreq)
continue;
final float score;
final String termAsString;
if (distance == INTERNAL_LEVENSHTEIN) {
// delay creating strings until the end
termAsString = null;
// undo FuzzyTermsEnum's scale factor for a real scaled lev score
score = boost / e.getScaleFactor() + e.getMinSimilarity();
} else {
UnicodeUtil.UTF8toUTF16(candidateTerm, spare);
termAsString = spare.toString();
score = distance.getDistance(term.text(), termAsString);
}
if (score < accuracy)
continue;
// add new entry in PQ
st.term = BytesRef.deepCopyOf(candidateTerm);
st.boost = boost;
st.docfreq = df;
st.termAsString = termAsString;
st.score = score;
stQueue.offer(st);
// possibly drop entries from queue
st = (stQueue.size() > numSug) ? stQueue.poll() : new ScoreTerm();
maxBoostAtt.setMaxNonCompetitiveBoost((stQueue.size() >= numSug) ? stQueue.peek().boost : Float.NEGATIVE_INFINITY);
}
return stQueue;
}
项目:read-open-source-code
文件:DirectSpellChecker.java
/**
* Provide spelling corrections based on several parameters.
*
* @param term The term to suggest spelling corrections for
* @param numSug The maximum number of spelling corrections
* @param ir The index reader to fetch the candidate spelling corrections from
* @param docfreq The minimum document frequency a potential suggestion need to have in order to be included
* @param editDistance The maximum edit distance candidates are allowed to have
* @param accuracy The minimum accuracy a suggested spelling correction needs to have in order to be included
* @param spare a chars scratch
* @return a collection of spelling corrections sorted by <code>ScoreTerm</code>'s natural order.
* @throws IOException If I/O related errors occur
*/
protected Collection<ScoreTerm> suggestSimilar(Term term, int numSug, IndexReader ir, int docfreq, int editDistance,
float accuracy, final CharsRef spare) throws IOException {
AttributeSource atts = new AttributeSource();
MaxNonCompetitiveBoostAttribute maxBoostAtt =
atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
Terms terms = MultiFields.getTerms(ir, term.field());
if (terms == null) {
return Collections.emptyList();
}
FuzzyTermsEnum e = new FuzzyTermsEnum(terms, atts, term, editDistance, Math.max(minPrefix, editDistance-1), true);
final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<ScoreTerm>();
BytesRef queryTerm = new BytesRef(term.text());
BytesRef candidateTerm;
ScoreTerm st = new ScoreTerm();
BoostAttribute boostAtt =
e.attributes().addAttribute(BoostAttribute.class);
while ((candidateTerm = e.next()) != null) {
final float boost = boostAtt.getBoost();
// ignore uncompetitive hits
if (stQueue.size() >= numSug && boost <= stQueue.peek().boost)
continue;
// ignore exact match of the same term
if (queryTerm.bytesEquals(candidateTerm))
continue;
int df = e.docFreq();
// check docFreq if required
if (df <= docfreq)
continue;
final float score;
final String termAsString;
if (distance == INTERNAL_LEVENSHTEIN) {
// delay creating strings until the end
termAsString = null;
// undo FuzzyTermsEnum's scale factor for a real scaled lev score
score = boost / e.getScaleFactor() + e.getMinSimilarity();
} else {
UnicodeUtil.UTF8toUTF16(candidateTerm, spare);
termAsString = spare.toString();
score = distance.getDistance(term.text(), termAsString);
}
if (score < accuracy)
continue;
// add new entry in PQ
st.term = BytesRef.deepCopyOf(candidateTerm);
st.boost = boost;
st.docfreq = df;
st.termAsString = termAsString;
st.score = score;
stQueue.offer(st);
// possibly drop entries from queue
st = (stQueue.size() > numSug) ? stQueue.poll() : new ScoreTerm();
maxBoostAtt.setMaxNonCompetitiveBoost((stQueue.size() >= numSug) ? stQueue.peek().boost : Float.NEGATIVE_INFINITY);
}
return stQueue;
}
项目:read-open-source-code
文件:DirectSpellChecker.java
/**
* Provide spelling corrections based on several parameters.
*
* @param term The term to suggest spelling corrections for
* @param numSug The maximum number of spelling corrections
* @param ir The index reader to fetch the candidate spelling corrections from
* @param docfreq The minimum document frequency a potential suggestion need to have in order to be included
* @param editDistance The maximum edit distance candidates are allowed to have
* @param accuracy The minimum accuracy a suggested spelling correction needs to have in order to be included
* @param spare a chars scratch
* @return a collection of spelling corrections sorted by <code>ScoreTerm</code>'s natural order.
* @throws IOException If I/O related errors occur
*/
protected Collection<ScoreTerm> suggestSimilar(Term term, int numSug, IndexReader ir, int docfreq, int editDistance,
float accuracy, final CharsRefBuilder spare) throws IOException {
AttributeSource atts = new AttributeSource();
MaxNonCompetitiveBoostAttribute maxBoostAtt =
atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
Terms terms = MultiFields.getTerms(ir, term.field());
if (terms == null) {
return Collections.emptyList();
}
FuzzyTermsEnum e = new FuzzyTermsEnum(terms, atts, term, editDistance, Math.max(minPrefix, editDistance-1), true);
final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<>();
BytesRef queryTerm = new BytesRef(term.text());
BytesRef candidateTerm;
ScoreTerm st = new ScoreTerm();
BoostAttribute boostAtt =
e.attributes().addAttribute(BoostAttribute.class);
while ((candidateTerm = e.next()) != null) {
final float boost = boostAtt.getBoost();
// ignore uncompetitive hits
if (stQueue.size() >= numSug && boost <= stQueue.peek().boost)
continue;
// ignore exact match of the same term
if (queryTerm.bytesEquals(candidateTerm))
continue;
int df = e.docFreq();
// check docFreq if required
if (df <= docfreq)
continue;
final float score;
final String termAsString;
if (distance == INTERNAL_LEVENSHTEIN) {
// delay creating strings until the end
termAsString = null;
// undo FuzzyTermsEnum's scale factor for a real scaled lev score
score = boost / e.getScaleFactor() + e.getMinSimilarity();
} else {
spare.copyUTF8Bytes(candidateTerm);
termAsString = spare.toString();
score = distance.getDistance(term.text(), termAsString);
}
if (score < accuracy)
continue;
// add new entry in PQ
st.term = BytesRef.deepCopyOf(candidateTerm);
st.boost = boost;
st.docfreq = df;
st.termAsString = termAsString;
st.score = score;
stQueue.offer(st);
// possibly drop entries from queue
st = (stQueue.size() > numSug) ? stQueue.poll() : new ScoreTerm();
maxBoostAtt.setMaxNonCompetitiveBoost((stQueue.size() >= numSug) ? stQueue.peek().boost : Float.NEGATIVE_INFINITY);
}
return stQueue;
}
项目:Maskana-Gestor-de-Conocimiento
文件:DirectSpellChecker.java
/**
* Provide spelling corrections based on several parameters.
*
* @param term The term to suggest spelling corrections for
* @param numSug The maximum number of spelling corrections
* @param ir The index reader to fetch the candidate spelling corrections from
* @param docfreq The minimum document frequency a potential suggestion need to have in order to be included
* @param editDistance The maximum edit distance candidates are allowed to have
* @param accuracy The minimum accuracy a suggested spelling correction needs to have in order to be included
* @param spare a chars scratch
* @return a collection of spelling corrections sorted by <code>ScoreTerm</code>'s natural order.
* @throws IOException If I/O related errors occur
*/
protected Collection<ScoreTerm> suggestSimilar(Term term, int numSug, IndexReader ir, int docfreq, int editDistance,
float accuracy, final CharsRef spare) throws IOException {
AttributeSource atts = new AttributeSource();
MaxNonCompetitiveBoostAttribute maxBoostAtt =
atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
Terms terms = MultiFields.getTerms(ir, term.field());
if (terms == null) {
return Collections.emptyList();
}
FuzzyTermsEnum e = new FuzzyTermsEnum(terms, atts, term, editDistance, Math.max(minPrefix, editDistance-1), true);
final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<ScoreTerm>();
BytesRef queryTerm = new BytesRef(term.text());
BytesRef candidateTerm;
ScoreTerm st = new ScoreTerm();
BoostAttribute boostAtt =
e.attributes().addAttribute(BoostAttribute.class);
while ((candidateTerm = e.next()) != null) {
final float boost = boostAtt.getBoost();
// ignore uncompetitive hits
if (stQueue.size() >= numSug && boost <= stQueue.peek().boost)
continue;
// ignore exact match of the same term
if (queryTerm.bytesEquals(candidateTerm))
continue;
int df = e.docFreq();
// check docFreq if required
if (df <= docfreq)
continue;
final float score;
final String termAsString;
if (distance == INTERNAL_LEVENSHTEIN) {
// delay creating strings until the end
termAsString = null;
// undo FuzzyTermsEnum's scale factor for a real scaled lev score
score = boost / e.getScaleFactor() + e.getMinSimilarity();
} else {
UnicodeUtil.UTF8toUTF16(candidateTerm, spare);
termAsString = spare.toString();
score = distance.getDistance(term.text(), termAsString);
}
if (score < accuracy)
continue;
// add new entry in PQ
st.term = BytesRef.deepCopyOf(candidateTerm);
st.boost = boost;
st.docfreq = df;
st.termAsString = termAsString;
st.score = score;
stQueue.offer(st);
// possibly drop entries from queue
st = (stQueue.size() > numSug) ? stQueue.poll() : new ScoreTerm();
maxBoostAtt.setMaxNonCompetitiveBoost((stQueue.size() >= numSug) ? stQueue.peek().boost : Float.NEGATIVE_INFINITY);
}
return stQueue;
}