Java 类org.apache.lucene.search.CollectionStatistics 实例源码
项目:elasticsearch_my
文件:DfsSearchResult.java
public static ObjectObjectHashMap<String, CollectionStatistics> readFieldStats(StreamInput in, ObjectObjectHashMap<String, CollectionStatistics> fieldStatistics) throws IOException {
final int numFieldStatistics = in.readVInt();
if (fieldStatistics == null) {
fieldStatistics = HppcMaps.newNoNullKeysMap(numFieldStatistics);
}
for (int i = 0; i < numFieldStatistics; i++) {
final String field = in.readString();
assert field != null;
final long maxDoc = in.readVLong();
final long docCount = subOne(in.readVLong());
final long sumTotalTermFreq = subOne(in.readVLong());
final long sumDocFreq = subOne(in.readVLong());
CollectionStatistics stats = new CollectionStatistics(field, maxDoc, docCount, sumTotalTermFreq, sumDocFreq);
fieldStatistics.put(field, stats);
}
return fieldStatistics;
}
项目:Elasticsearch
文件:DfsSearchResult.java
public static ObjectObjectHashMap<String, CollectionStatistics> readFieldStats(StreamInput in, ObjectObjectHashMap<String, CollectionStatistics> fieldStatistics) throws IOException {
final int numFieldStatistics = in.readVInt();
if (fieldStatistics == null) {
fieldStatistics = HppcMaps.newNoNullKeysMap(numFieldStatistics);
}
for (int i = 0; i < numFieldStatistics; i++) {
final String field = in.readString();
assert field != null;
final long maxDoc = in.readVLong();
final long docCount = subOne(in.readVLong());
final long sumTotalTermFreq = subOne(in.readVLong());
final long sumDocFreq = subOne(in.readVLong());
CollectionStatistics stats = new CollectionStatistics(field, maxDoc, docCount, sumTotalTermFreq, sumDocFreq);
fieldStatistics.put(field, stats);
}
return fieldStatistics;
}
项目:lucene4ir
文件:SMARTBNNBNNSimilarity.java
@Override
public final SimWeight computeWeight(CollectionStatistics collectionStats,
TermStatistics... termStats)
{
float N, n, idf, adl;
idf = 1.0f;
N = collectionStats.maxDoc();
adl = collectionStats.sumTotalTermFreq() / N;
if (termStats.length == 1) {
n = termStats[0].docFreq();
idf = log(N/n);
}
else {
for (final TermStatistics stat : termStats) {
n = stat.docFreq();
idf += log(N/n);
}
}
return new TFIDFWeight(collectionStats.field(), idf, adl);
}
项目:DoSeR-Disambiguation
文件:FuzzyLabelSimilarity.java
/**
* Computes a score factor for a phrase.
*
* <p>
* The default implementation sums the idf factor for each term in the
* phrase.
*
* @param collectionStats
* collection-level statistics
* @param termStats
* term-level statistics for the terms in the phrase
* @return an Explain object that includes both an idf score factor for the
* phrase and an explanation for each term.
*/
public Explanation idfExplain(final CollectionStatistics collectionStats,
final TermStatistics termStats[]) {
final long max = collectionStats.maxDoc();
float idf = 0.0f;
final Explanation exp = new Explanation();
exp.setDescription("idf(), sum of:");
for (final TermStatistics stat : termStats) {
final long docFreq = stat.docFreq();
final float termIdf = idf(docFreq, max);
exp.addDetail(new Explanation(termIdf, "idf(docFreq=" + docFreq
+ ", maxDocs=" + max + ")"));
idf += termIdf;
}
exp.setValue(idf);
return exp;
}
项目:DoSeR
文件:FuzzyLabelSimilarity.java
/**
* Computes a score factor for a phrase.
*
* <p>
* The default implementation sums the idf factor for each term in the
* phrase.
*
* @param collectionStats
* collection-level statistics
* @param termStats
* term-level statistics for the terms in the phrase
* @return an Explain object that includes both an idf score factor for the
* phrase and an explanation for each term.
*/
public Explanation idfExplain(final CollectionStatistics collectionStats,
final TermStatistics termStats[]) {
final long max = collectionStats.maxDoc();
float idf = 0.0f;
final Explanation exp = new Explanation();
exp.setDescription("idf(), sum of:");
for (final TermStatistics stat : termStats) {
final long docFreq = stat.docFreq();
final float termIdf = idf(docFreq, max);
exp.addDetail(new Explanation(termIdf, "idf(docFreq=" + docFreq
+ ", maxDocs=" + max + ")"));
idf += termIdf;
}
exp.setValue(idf);
return exp;
}
项目:elasticsearch_my
文件:DfsSearchResult.java
public static void writeFieldStats(StreamOutput out, ObjectObjectHashMap<String, CollectionStatistics> fieldStatistics) throws IOException {
out.writeVInt(fieldStatistics.size());
for (ObjectObjectCursor<String, CollectionStatistics> c : fieldStatistics) {
out.writeString(c.key);
CollectionStatistics statistics = c.value;
assert statistics.maxDoc() >= 0;
out.writeVLong(statistics.maxDoc());
out.writeVLong(addOne(statistics.docCount()));
out.writeVLong(addOne(statistics.sumTotalTermFreq()));
out.writeVLong(addOne(statistics.sumDocFreq()));
}
}
项目:elasticsearch_my
文件:TermVectorsWriter.java
private void writeFieldStatistics(CollectionStatistics fieldStats) throws IOException {
long sttf = fieldStats.sumTotalTermFreq();
assert (sttf >= -1);
writePotentiallyNegativeVLong(sttf);
long sdf = fieldStats.sumDocFreq();
assert (sdf >= -1);
writePotentiallyNegativeVLong(sdf);
int dc = (int) fieldStats.docCount();
assert (dc >= -1);
writePotentiallyNegativeVInt(dc);
}
项目:lams
文件:SimilarityBase.java
@Override
public final SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
BasicStats stats[] = new BasicStats[termStats.length];
for (int i = 0; i < termStats.length; i++) {
stats[i] = newStats(collectionStats.field(), queryBoost);
fillBasicStats(stats[i], collectionStats, termStats[i]);
}
return stats.length == 1 ? stats[0] : new MultiSimilarity.MultiStats(stats);
}
项目:lams
文件:SimilarityBase.java
/** Fills all member fields defined in {@code BasicStats} in {@code stats}.
* Subclasses can override this method to fill additional stats. */
protected void fillBasicStats(BasicStats stats, CollectionStatistics collectionStats, TermStatistics termStats) {
// #positions(field) must be >= #positions(term)
assert collectionStats.sumTotalTermFreq() == -1 || collectionStats.sumTotalTermFreq() >= termStats.totalTermFreq();
long numberOfDocuments = collectionStats.maxDoc();
long docFreq = termStats.docFreq();
long totalTermFreq = termStats.totalTermFreq();
// codec does not supply totalTermFreq: substitute docFreq
if (totalTermFreq == -1) {
totalTermFreq = docFreq;
}
final long numberOfFieldTokens;
final float avgFieldLength;
long sumTotalTermFreq = collectionStats.sumTotalTermFreq();
if (sumTotalTermFreq <= 0) {
// field does not exist;
// We have to provide something if codec doesnt supply these measures,
// or if someone omitted frequencies for the field... negative values cause
// NaN/Inf for some scorers.
numberOfFieldTokens = docFreq;
avgFieldLength = 1;
} else {
numberOfFieldTokens = sumTotalTermFreq;
avgFieldLength = (float)numberOfFieldTokens / numberOfDocuments;
}
// TODO: add sumDocFreq for field (numberOfFieldPostings)
stats.setNumberOfDocuments(numberOfDocuments);
stats.setNumberOfFieldTokens(numberOfFieldTokens);
stats.setAvgFieldLength(avgFieldLength);
stats.setDocFreq(docFreq);
stats.setTotalTermFreq(totalTermFreq);
}
项目:lams
文件:BM25Similarity.java
/** The default implementation computes the average as <code>sumTotalTermFreq / maxDoc</code>,
* or returns <code>1</code> if the index does not store sumTotalTermFreq (Lucene 3.x indexes
* or any field that omits frequency information). */
protected float avgFieldLength(CollectionStatistics collectionStats) {
final long sumTotalTermFreq = collectionStats.sumTotalTermFreq();
if (sumTotalTermFreq <= 0) {
return 1f; // field does not exist, or stat is unsupported
} else {
return (float) (sumTotalTermFreq / (double) collectionStats.maxDoc());
}
}
项目:lams
文件:BM25Similarity.java
/**
* Computes a score factor for a phrase.
*
* <p>
* The default implementation sums the idf factor for
* each term in the phrase.
*
* @param collectionStats collection-level statistics
* @param termStats term-level statistics for the terms in the phrase
* @return an Explain object that includes both an idf
* score factor for the phrase and an explanation
* for each term.
*/
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats[]) {
final long max = collectionStats.maxDoc();
float idf = 0.0f;
final Explanation exp = new Explanation();
exp.setDescription("idf(), sum of:");
for (final TermStatistics stat : termStats ) {
final long df = stat.docFreq();
final float termIdf = idf(df, max);
exp.addDetail(new Explanation(termIdf, "idf(docFreq=" + df + ", maxDocs=" + max + ")"));
idf += termIdf;
}
exp.setValue(idf);
return exp;
}
项目:lams
文件:BM25Similarity.java
@Override
public final SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0]) : idfExplain(collectionStats, termStats);
float avgdl = avgFieldLength(collectionStats);
// compute freq-independent part of bm25 equation across all norm values
float cache[] = new float[256];
for (int i = 0; i < cache.length; i++) {
cache[i] = k1 * ((1 - b) + b * decodeNormValue((byte)i) / avgdl);
}
return new BM25Stats(collectionStats.field(), idf, queryBoost, avgdl, cache);
}
项目:lams
文件:PerFieldSimilarityWrapper.java
@Override
public final SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
PerFieldSimWeight weight = new PerFieldSimWeight();
weight.delegate = get(collectionStats.field());
weight.delegateWeight = weight.delegate.computeWeight(queryBoost, collectionStats, termStats);
return weight;
}
项目:lams
文件:MultiSimilarity.java
@Override
public SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
SimWeight subStats[] = new SimWeight[sims.length];
for (int i = 0; i < subStats.length; i++) {
subStats[i] = sims[i].computeWeight(queryBoost, collectionStats, termStats);
}
return new MultiStats(subStats);
}
项目:lams
文件:LMSimilarity.java
/**
* Computes the collection probability of the current term in addition to the
* usual statistics.
*/
@Override
protected void fillBasicStats(BasicStats stats, CollectionStatistics collectionStats, TermStatistics termStats) {
super.fillBasicStats(stats, collectionStats, termStats);
LMStats lmStats = (LMStats) stats;
lmStats.setCollectionProbability(collectionModel.computeProbability(stats));
}
项目:lams
文件:TFIDFSimilarity.java
/**
* Computes a score factor for a phrase.
*
* <p>
* The default implementation sums the idf factor for
* each term in the phrase.
*
* @param collectionStats collection-level statistics
* @param termStats term-level statistics for the terms in the phrase
* @return an Explain object that includes both an idf
* score factor for the phrase and an explanation
* for each term.
*/
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats[]) {
final long max = collectionStats.maxDoc();
float idf = 0.0f;
final Explanation exp = new Explanation();
exp.setDescription("idf(), sum of:");
for (final TermStatistics stat : termStats ) {
final long df = stat.docFreq();
final float termIdf = idf(df, max);
exp.addDetail(new Explanation(termIdf, "idf(docFreq=" + df + ", maxDocs=" + max + ")"));
idf += termIdf;
}
exp.setValue(idf);
return exp;
}
项目:lams
文件:TFIDFSimilarity.java
@Override
public final SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
final Explanation idf = termStats.length == 1
? idfExplain(collectionStats, termStats[0])
: idfExplain(collectionStats, termStats);
return new IDFStats(collectionStats.field(), idf, queryBoost);
}
项目:Elasticsearch
文件:DfsSearchResult.java
public static void writeFieldStats(StreamOutput out, ObjectObjectHashMap<String, CollectionStatistics> fieldStatistics) throws IOException {
out.writeVInt(fieldStatistics.size());
for (ObjectObjectCursor<String, CollectionStatistics> c : fieldStatistics) {
out.writeString(c.key);
CollectionStatistics statistics = c.value;
assert statistics.maxDoc() >= 0;
out.writeVLong(statistics.maxDoc());
out.writeVLong(addOne(statistics.docCount()));
out.writeVLong(addOne(statistics.sumTotalTermFreq()));
out.writeVLong(addOne(statistics.sumDocFreq()));
}
}
项目:Elasticsearch
文件:TermVectorsWriter.java
private void writeFieldStatistics(CollectionStatistics fieldStats) throws IOException {
long sttf = fieldStats.sumTotalTermFreq();
assert (sttf >= -1);
writePotentiallyNegativeVLong(sttf);
long sdf = fieldStats.sumDocFreq();
assert (sdf >= -1);
writePotentiallyNegativeVLong(sdf);
int dc = (int) fieldStats.docCount();
assert (dc >= -1);
writePotentiallyNegativeVInt(dc);
}
项目:ir-generalized-translation-models
文件:BM25SimilarityLossless.java
/** The default implementation computes the average as <code>sumTotalTermFreq / docCount</code>,
* or returns <code>1</code> if the index does not store sumTotalTermFreq:
* any field that omits frequency information). */
protected float avgFieldLength(CollectionStatistics collectionStats) {
final long sumTotalTermFreq = collectionStats.sumTotalTermFreq();
if (sumTotalTermFreq <= 0) {
return 1f; // field does not exist, or stat is unsupported
} else {
final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount();
return (float) (sumTotalTermFreq / (double) docCount);
}
}
项目:ir-generalized-translation-models
文件:BM25SimilarityLossless.java
@Override
public final SimWeight computeWeight(CollectionStatistics collectionStats, TermStatistics... termStats) {
Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0]) : idfExplain(collectionStats, termStats);
float avgdl = avgFieldLength(collectionStats);
return new BM25StatsFixed(collectionStats.field(), k1, b, idf, avgdl);
}
项目:linden
文件:LindenSimilarity.java
@Override
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) {
final long df = termStats.docFreq();
final long max = collectionStats.maxDoc();
final float idf = idfManager.getIDF(termStats.term().utf8ToString());
return new Explanation(idf, "idf(docFreq=" + df + ", maxDocs=" + max + ")");
}
项目:lucene4ir
文件:BM25Similarity.java
/** The default implementation computes the average as <code>sumTotalTermFreq / docCount</code>,
* or returns <code>1</code> if the index does not store sumTotalTermFreq:
* any field that omits frequency information). */
protected float avgFieldLength(CollectionStatistics collectionStats) {
final long sumTotalTermFreq = collectionStats.sumTotalTermFreq();
if (sumTotalTermFreq <= 0) {
return 1f; // field does not exist, or stat is unsupported
} else {
final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount();
return (float) (sumTotalTermFreq / (double) docCount);
}
}
项目:lucene4ir
文件:BM25Similarity.java
@Override
public final SimWeight computeWeight(CollectionStatistics collectionStats, TermStatistics... termStats) {
Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0]) : idfExplain(collectionStats, termStats);
float avgdl = avgFieldLength(collectionStats);
// compute freq-independent part of bm25 equation across all norm values
float cache[] = new float[256];
for (int i = 0; i < cache.length; i++) {
cache[i] = k1 * ((1 - b) + b * decodeNormValue((byte)i) / avgdl);
}
return new BM25Stats(collectionStats.field(), idf, avgdl, cache);
}
项目:lucene4ir
文件:OKAPIBM25Similarity.java
@Override
public final SimWeight computeWeight(CollectionStatistics collectionStats,
TermStatistics... termStats)
{
long N, n;
float idf_, avdl;
idf_ = 1.0f;
N = collectionStats.docCount();
if (N == -1)
N = collectionStats.maxDoc();
avdl = collectionStats.sumTotalTermFreq() / N;
if (termStats.length == 1) {
n = termStats[0].docFreq();
idf_ = idf(n, N);
}
else { /* computation for a phrase */
for (final TermStatistics stat : termStats) {
n = stat.docFreq();
idf_ += idf(n, N);
}
}
return new TFIDFWeight(collectionStats.field(), idf_, avdl);
}
项目:lucene4ir
文件:ExampleStatsApp.java
public void reportCollectionStatistics()throws IOException {
IndexSearcher searcher = new IndexSearcher(reader);
CollectionStatistics collectionStats = searcher.collectionStatistics(Lucene4IRConstants.FIELD_ALL);
long token_count = collectionStats.sumTotalTermFreq();
long doc_count = collectionStats.docCount();
long sum_doc_count = collectionStats.sumDocFreq();
long avg_doc_length = token_count / doc_count;
System.out.println("ALL: Token count: " + token_count+ " Doc Count: " + doc_count + " sum doc: " + sum_doc_count + " avg doc len: " + avg_doc_length);
collectionStats = searcher.collectionStatistics(Lucene4IRConstants.FIELD_TITLE);
token_count = collectionStats.sumTotalTermFreq();
doc_count = collectionStats.docCount();
sum_doc_count = collectionStats.sumDocFreq();
avg_doc_length = token_count / doc_count;
System.out.println("TITLE: Token count: " + token_count+ " Doc Count: " + doc_count + " sum doc: " + sum_doc_count + " avg doc len: " + avg_doc_length);
collectionStats = searcher.collectionStatistics(Lucene4IRConstants.FIELD_CONTENT);
token_count = collectionStats.sumTotalTermFreq();
doc_count = collectionStats.docCount();
sum_doc_count = collectionStats.sumDocFreq();
avg_doc_length = token_count / doc_count;
System.out.println("CONTENT: Token count: " + token_count+ " Doc Count: " + doc_count + " sum doc: " + sum_doc_count + " avg doc len: " + avg_doc_length);
}
项目:DoSeR-Disambiguation
文件:FuzzyLabelSimilarity.java
@Override
public final SimWeight computeWeight(final float queryBoost,
final CollectionStatistics collectionStats,
final TermStatistics... termStats) {
final Explanation idf = termStats.length == 1 ? this.idfExplain(
collectionStats, termStats[0]) : this.idfExplain(
collectionStats, termStats);
return new IDFStats(collectionStats.field(), idf, queryBoost);
}
项目:elasticsearch-simple-similarity
文件:SimpleSimilarity.java
public final SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
if (termStats.length == 1) {
return new SimpleScore(boost, collectionStats, termStats[0]);
} else {
return new SimpleScore(boost, collectionStats, termStats);
}
}
项目:elasticsearch-simple-similarity
文件:SimpleSimilarity.java
SimpleScore(float boost, CollectionStatistics collectionStats, TermStatistics termStats[]) {
float total = 0.0f;
List<Explanation> scores = new ArrayList<>();
for (final TermStatistics stat : termStats) {
String description = String.format("simple score for (%s:%s)", collectionStats.field(), stat.term().utf8ToString());
scores.add(Explanation.match(1.0f, description));
total += 1.0f;
}
this.score = Explanation.match(total, "total score, sum of:", scores);
this.boost = Explanation.match(boost, "boost");
}
项目:LuceneDB
文件:DummySimilarity.java
@Override
public SimWeight computeWeight(float queryBoost,
CollectionStatistics collectionStats, TermStatistics... termStats) {
return new SimWeight() {
@Override
public void normalize(float queryNorm, float topLevelBoost) {
}
@Override
public float getValueForNormalization() {
return 0;
}
};
}
项目:search
文件:SimilarityBase.java
@Override
public final SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
BasicStats stats[] = new BasicStats[termStats.length];
for (int i = 0; i < termStats.length; i++) {
stats[i] = newStats(collectionStats.field(), queryBoost);
fillBasicStats(stats[i], collectionStats, termStats[i]);
}
return stats.length == 1 ? stats[0] : new MultiSimilarity.MultiStats(stats);
}
项目:search
文件:SimilarityBase.java
/** Fills all member fields defined in {@code BasicStats} in {@code stats}.
* Subclasses can override this method to fill additional stats. */
protected void fillBasicStats(BasicStats stats, CollectionStatistics collectionStats, TermStatistics termStats) {
// #positions(field) must be >= #positions(term)
assert collectionStats.sumTotalTermFreq() == -1 || collectionStats.sumTotalTermFreq() >= termStats.totalTermFreq();
long numberOfDocuments = collectionStats.maxDoc();
long docFreq = termStats.docFreq();
long totalTermFreq = termStats.totalTermFreq();
// codec does not supply totalTermFreq: substitute docFreq
if (totalTermFreq == -1) {
totalTermFreq = docFreq;
}
final long numberOfFieldTokens;
final float avgFieldLength;
long sumTotalTermFreq = collectionStats.sumTotalTermFreq();
if (sumTotalTermFreq <= 0) {
// field does not exist;
// We have to provide something if codec doesnt supply these measures,
// or if someone omitted frequencies for the field... negative values cause
// NaN/Inf for some scorers.
numberOfFieldTokens = docFreq;
avgFieldLength = 1;
} else {
numberOfFieldTokens = sumTotalTermFreq;
avgFieldLength = (float)numberOfFieldTokens / numberOfDocuments;
}
// TODO: add sumDocFreq for field (numberOfFieldPostings)
stats.setNumberOfDocuments(numberOfDocuments);
stats.setNumberOfFieldTokens(numberOfFieldTokens);
stats.setAvgFieldLength(avgFieldLength);
stats.setDocFreq(docFreq);
stats.setTotalTermFreq(totalTermFreq);
}
项目:search
文件:BM25Similarity.java
/** The default implementation computes the average as <code>sumTotalTermFreq / maxDoc</code>,
* or returns <code>1</code> if the index does not store sumTotalTermFreq (Lucene 3.x indexes
* or any field that omits frequency information). */
protected float avgFieldLength(CollectionStatistics collectionStats) {
final long sumTotalTermFreq = collectionStats.sumTotalTermFreq();
if (sumTotalTermFreq <= 0) {
return 1f; // field does not exist, or stat is unsupported
} else {
return (float) (sumTotalTermFreq / (double) collectionStats.maxDoc());
}
}
项目:search
文件:BM25Similarity.java
/**
* Computes a score factor for a phrase.
*
* <p>
* The default implementation sums the idf factor for
* each term in the phrase.
*
* @param collectionStats collection-level statistics
* @param termStats term-level statistics for the terms in the phrase
* @return an Explain object that includes both an idf
* score factor for the phrase and an explanation
* for each term.
*/
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats[]) {
final long max = collectionStats.maxDoc();
float idf = 0.0f;
final Explanation exp = new Explanation();
exp.setDescription("idf(), sum of:");
for (final TermStatistics stat : termStats ) {
final long df = stat.docFreq();
final float termIdf = idf(df, max);
exp.addDetail(new Explanation(termIdf, "idf(docFreq=" + df + ", maxDocs=" + max + ")"));
idf += termIdf;
}
exp.setValue(idf);
return exp;
}
项目:search
文件:BM25Similarity.java
@Override
public final SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0]) : idfExplain(collectionStats, termStats);
float avgdl = avgFieldLength(collectionStats);
// compute freq-independent part of bm25 equation across all norm values
float cache[] = new float[256];
for (int i = 0; i < cache.length; i++) {
cache[i] = k1 * ((1 - b) + b * decodeNormValue((byte)i) / avgdl);
}
return new BM25Stats(collectionStats.field(), idf, queryBoost, avgdl, cache);
}
项目:search
文件:PerFieldSimilarityWrapper.java
@Override
public final SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
PerFieldSimWeight weight = new PerFieldSimWeight();
weight.delegate = get(collectionStats.field());
weight.delegateWeight = weight.delegate.computeWeight(queryBoost, collectionStats, termStats);
return weight;
}
项目:search
文件:MultiSimilarity.java
@Override
public SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
SimWeight subStats[] = new SimWeight[sims.length];
for (int i = 0; i < subStats.length; i++) {
subStats[i] = sims[i].computeWeight(queryBoost, collectionStats, termStats);
}
return new MultiStats(subStats);
}
项目:search
文件:LMSimilarity.java
/**
* Computes the collection probability of the current term in addition to the
* usual statistics.
*/
@Override
protected void fillBasicStats(BasicStats stats, CollectionStatistics collectionStats, TermStatistics termStats) {
super.fillBasicStats(stats, collectionStats, termStats);
LMStats lmStats = (LMStats) stats;
lmStats.setCollectionProbability(collectionModel.computeProbability(stats));
}
项目:search
文件:TFIDFSimilarity.java
/**
* Computes a score factor for a phrase.
*
* <p>
* The default implementation sums the idf factor for
* each term in the phrase.
*
* @param collectionStats collection-level statistics
* @param termStats term-level statistics for the terms in the phrase
* @return an Explain object that includes both an idf
* score factor for the phrase and an explanation
* for each term.
*/
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats[]) {
final long max = collectionStats.maxDoc();
float idf = 0.0f;
final Explanation exp = new Explanation();
exp.setDescription("idf(), sum of:");
for (final TermStatistics stat : termStats ) {
final long df = stat.docFreq();
final float termIdf = idf(df, max);
exp.addDetail(new Explanation(termIdf, "idf(docFreq=" + df + ", maxDocs=" + max + ")"));
idf += termIdf;
}
exp.setValue(idf);
return exp;
}
项目:search
文件:TFIDFSimilarity.java
@Override
public final SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
final Explanation idf = termStats.length == 1
? idfExplain(collectionStats, termStats[0])
: idfExplain(collectionStats, termStats);
return new IDFStats(collectionStats.field(), idf, queryBoost);
}