Java 类org.apache.lucene.search.similarities.Similarity 实例源码
项目:lucene-custom-query
文件:SeqSpanScorer.java
SeqSpanScorer(SeqSpanWeight weight, PostingsAndFreq[] postings,
Similarity.SimScorer docScorer, boolean needsScores,
float matchCost) throws IOException {
super(weight);
this.selfWeight = weight;
this.docScorer = docScorer;
this.needsScores = needsScores;
List<DocIdSetIterator> iterators = new ArrayList<>();
List<PostingsAndPosition> postingsAndPositions = new ArrayList<>();
for(PostingsAndFreq posting : postings) {
iterators.add(posting.postings);
postingsAndPositions.add(new PostingsAndPosition(posting.postings, posting.position));
}
conjunction = ConjunctionDISI.intersectIterators(iterators);
this.postings = postingsAndPositions.toArray(new PostingsAndPosition[postingsAndPositions.size()]);
this.matchCost = matchCost;
}
项目:lams
文件:ExactPhraseScorer.java
ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
Similarity.SimScorer docScorer) throws IOException {
super(weight);
this.docScorer = docScorer;
chunkStates = new ChunkState[postings.length];
endMinus1 = postings.length-1;
lead = postings[0].postings;
// min(cost)
cost = lead.cost();
for(int i=0;i<postings.length;i++) {
chunkStates[i] = new ChunkState(postings[i].postings, -postings[i].position);
}
}
项目:lams
文件:PayloadTermQuery.java
protected void processPayload(Similarity similarity) throws IOException {
if (termSpans.isPayloadAvailable()) {
final DocsAndPositionsEnum postings = termSpans.getPostings();
payload = postings.getPayload();
if (payload != null) {
payloadScore = function.currentScore(doc, term.field(),
spans.start(), spans.end(), payloadsSeen, payloadScore,
docScorer.computePayloadFactor(doc, spans.start(), spans.end(), payload));
} else {
payloadScore = function.currentScore(doc, term.field(),
spans.start(), spans.end(), payloadsSeen, payloadScore, 1F);
}
payloadsSeen++;
} else {
// zero out the payload?
}
}
项目:search
文件:MemoryIndex.java
@Override
public NumericDocValues getNormValues(String field) {
FieldInfo fieldInfo = fieldInfos.get(field);
if (fieldInfo == null || fieldInfo.omitsNorms())
return null;
NumericDocValues norms = cachedNormValues;
Similarity sim = getSimilarity();
if (!field.equals(cachedFieldName) || sim != cachedSimilarity) { // not cached?
Info info = getInfo(field);
int numTokens = info != null ? info.numTokens : 0;
int numOverlapTokens = info != null ? info.numOverlapTokens : 0;
float boost = info != null ? info.getBoost() : 1.0f;
FieldInvertState invertState = new FieldInvertState(field, 0, numTokens, numOverlapTokens, 0, boost);
long value = sim.computeNorm(invertState);
norms = new MemoryIndexNormDocValues(value);
// cache it for future reuse
cachedNormValues = norms;
cachedFieldName = field;
cachedSimilarity = sim;
if (DEBUG) System.err.println("MemoryIndexReader.norms: " + field + ":" + value + ":" + numTokens);
}
return norms;
}
项目:search
文件:TestTaxonomyFacetCounts.java
public void testReallyNoNormsForDrillDown() throws Exception {
Directory dir = newDirectory();
Directory taxoDir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
iwc.setSimilarity(new PerFieldSimilarityWrapper() {
final Similarity sim = new DefaultSimilarity();
@Override
public Similarity get(String name) {
assertEquals("field", name);
return sim;
}
});
TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
FacetsConfig config = new FacetsConfig();
Document doc = new Document();
doc.add(newTextField("field", "text", Field.Store.NO));
doc.add(new FacetField("a", "path"));
writer.addDocument(config.build(taxoWriter, doc));
IOUtils.close(writer, taxoWriter, dir, taxoDir);
}
项目:search
文件:ExactPhraseScorer.java
ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
Similarity.SimScorer docScorer) throws IOException {
super(weight);
this.docScorer = docScorer;
chunkStates = new ChunkState[postings.length];
endMinus1 = postings.length-1;
lead = postings[0].postings;
// min(cost)
cost = lead.cost();
for(int i=0;i<postings.length;i++) {
chunkStates[i] = new ChunkState(postings[i].postings, -postings[i].position);
}
}
项目:search
文件:PayloadTermQuery.java
protected void processPayload(Similarity similarity) throws IOException {
if (termSpans.isPayloadAvailable()) {
final DocsAndPositionsEnum postings = termSpans.getPostings();
payload = postings.getPayload();
if (payload != null) {
payloadScore = function.currentScore(doc, term.field(),
spans.start(), spans.end(), payloadsSeen, payloadScore,
docScorer.computePayloadFactor(doc, spans.start(), spans.end(), payload));
} else {
payloadScore = function.currentScore(doc, term.field(),
spans.start(), spans.end(), payloadsSeen, payloadScore, 1F);
}
payloadsSeen++;
} else {
// zero out the payload?
}
}
项目:search
文件:PayloadHelper.java
/**
* Sets up a RAMDirectory, and adds documents (using English.intToEnglish()) with two fields: field and multiField
* and analyzes them using the PayloadAnalyzer
* @param similarity The Similarity class to use in the Searcher
* @param numDocs The num docs to add
* @return An IndexSearcher
*/
// TODO: randomize
public IndexSearcher setUp(Random random, Similarity similarity, int numDocs) throws IOException {
Directory directory = new MockDirectoryWrapper(random, new RAMDirectory());
PayloadAnalyzer analyzer = new PayloadAnalyzer();
// TODO randomize this
IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(
TEST_VERSION_CURRENT, analyzer).setSimilarity(similarity));
// writer.infoStream = System.out;
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
doc.add(new TextField(FIELD, English.intToEnglish(i), Field.Store.YES));
doc.add(new TextField(MULTI_FIELD, English.intToEnglish(i) + " " + English.intToEnglish(i), Field.Store.YES));
doc.add(new TextField(NO_PAYLOAD_FIELD, English.intToEnglish(i), Field.Store.YES));
writer.addDocument(doc);
}
reader = DirectoryReader.open(writer, true);
writer.close();
IndexSearcher searcher = LuceneTestCase.newSearcher(reader);
searcher.setSimilarity(similarity);
return searcher;
}
项目:search
文件:TestBooleanMinShouldMatch.java
public void testRewriteCoord1() throws Exception {
final Similarity oldSimilarity = s.getSimilarity();
try {
s.setSimilarity(new DefaultSimilarity() {
@Override
public float coord(int overlap, int maxOverlap) {
return overlap / ((float)maxOverlap + 1);
}
});
BooleanQuery q1 = new BooleanQuery();
q1.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD);
BooleanQuery q2 = new BooleanQuery();
q2.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD);
q2.setMinimumNumberShouldMatch(1);
TopDocs top1 = s.search(q1,null,100);
TopDocs top2 = s.search(q2,null,100);
assertSubsetOfSameScores(q2, top1, top2);
} finally {
s.setSimilarity(oldSimilarity);
}
}
项目:search
文件:TestBooleanMinShouldMatch.java
public void testRewriteNegate() throws Exception {
final Similarity oldSimilarity = s.getSimilarity();
try {
s.setSimilarity(new DefaultSimilarity() {
@Override
public float coord(int overlap, int maxOverlap) {
return overlap / ((float)maxOverlap + 1);
}
});
BooleanQuery q1 = new BooleanQuery();
q1.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD);
BooleanQuery q2 = new BooleanQuery();
q2.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD);
q2.add(new TermQuery(new Term("data", "Z")), BooleanClause.Occur.MUST_NOT);
TopDocs top1 = s.search(q1,null,100);
TopDocs top2 = s.search(q2,null,100);
assertSubsetOfSameScores(q2, top1, top2);
} finally {
s.setSimilarity(oldSimilarity);
}
}
项目:search
文件:TestBoolean2.java
@Test
public void testQueries10() throws Exception {
BooleanQuery query = new BooleanQuery();
query.add(new TermQuery(new Term(field, "w3")), BooleanClause.Occur.MUST);
query.add(new TermQuery(new Term(field, "xx")), BooleanClause.Occur.MUST);
query.add(new TermQuery(new Term(field, "w2")), BooleanClause.Occur.MUST);
query.add(new TermQuery(new Term(field, "zz")), BooleanClause.Occur.SHOULD);
int[] expDocNrs = {2, 3};
Similarity oldSimilarity = searcher.getSimilarity();
try {
searcher.setSimilarity(new DefaultSimilarity(){
@Override
public float coord(int overlap, int maxOverlap) {
return overlap / ((float)maxOverlap - 1);
}
});
queriesTest(query, expDocNrs);
} finally {
searcher.setSimilarity(oldSimilarity);
}
}
项目:search
文件:TestNorms.java
public void buildIndex(Directory dir) throws IOException {
Random random = random();
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
IndexWriterConfig config = newIndexWriterConfig(analyzer);
Similarity provider = new MySimProvider();
config.setSimilarity(provider);
RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
final LineFileDocs docs = new LineFileDocs(random, defaultCodecSupportsDocValues());
int num = atLeast(100);
for (int i = 0; i < num; i++) {
Document doc = docs.nextDoc();
int boost = random().nextInt(255);
Field f = new TextField(byteTestField, "" + boost, Field.Store.YES);
f.setBoost(boost);
doc.add(f);
writer.addDocument(doc);
doc.removeField(byteTestField);
if (rarely()) {
writer.commit();
}
}
writer.commit();
writer.close();
docs.close();
}
项目:search
文件:IndexSchema.java
static SimilarityFactory readSimilarity(SolrResourceLoader loader, Node node) {
if (node==null) {
return null;
} else {
SimilarityFactory similarityFactory;
final String classArg = ((Element) node).getAttribute(SimilarityFactory.CLASS_NAME);
final Object obj = loader.newInstance(classArg, Object.class, "search.similarities.");
if (obj instanceof SimilarityFactory) {
// configure a factory, get a similarity back
final NamedList<Object> namedList = DOMUtil.childNodesToNamedList(node);
namedList.add(SimilarityFactory.CLASS_NAME, classArg);
SolrParams params = SolrParams.toSolrParams(namedList);
similarityFactory = (SimilarityFactory)obj;
similarityFactory.init(params);
} else {
// just like always, assume it's a Similarity and get a ClassCastException - reasonable error handling
similarityFactory = new SimilarityFactory() {
@Override
public Similarity getSimilarity() {
return (Similarity) obj;
}
};
}
return similarityFactory;
}
}
项目:biospectra
文件:Classifier.java
private void initialize(File indexPath, int kmerSize, int kmerSkips, boolean minStrandKmer, double minShouldMatch, QueryGenerationAlgorithm queryGenerationAlgorithm, Similarity similarity) throws Exception {
if(!indexPath.exists() || !indexPath.isDirectory()) {
throw new IllegalArgumentException("indexPath is not a directory or does not exist");
}
this.indexPath = indexPath;
this.kmerSize = kmerSize;
this.kmerSkips = kmerSkips;
this.minStrandKmer = minStrandKmer;
this.queryAnalyzer = new KmerQueryAnalyzer(this.kmerSize, this.kmerSkips, this.minStrandKmer);
Directory dir = new MMapDirectory(this.indexPath.toPath());
this.indexReader = DirectoryReader.open(dir);
this.indexSearcher = new IndexSearcher(this.indexReader);
if(similarity != null) {
this.indexSearcher.setSimilarity(similarity);
}
this.minShouldMatch = minShouldMatch;
this.queryGenerationAlgorithm = queryGenerationAlgorithm;
BooleanQuery.setMaxClauseCount(10000);
}
项目:elasticsearch_my
文件:HasChildQueryBuilder.java
LateParsingQuery(Query toQuery, Query innerQuery, int minChildren, int maxChildren,
String parentType, ScoreMode scoreMode, ParentChildIndexFieldData parentChildIndexFieldData,
Similarity similarity) {
this.toQuery = toQuery;
this.innerQuery = innerQuery;
this.minChildren = minChildren;
this.maxChildren = maxChildren;
this.parentType = parentType;
this.scoreMode = scoreMode;
this.parentChildIndexFieldData = parentChildIndexFieldData;
this.similarity = similarity;
}
项目:elasticsearch_my
文件:EngineConfig.java
/**
* Creates a new {@link org.elasticsearch.index.engine.EngineConfig}
*/
public EngineConfig(OpenMode openMode, ShardId shardId, ThreadPool threadPool,
IndexSettings indexSettings, Engine.Warmer warmer, Store store, SnapshotDeletionPolicy deletionPolicy,
MergePolicy mergePolicy, Analyzer analyzer,
Similarity similarity, CodecService codecService, Engine.EventListener eventListener,
TranslogRecoveryPerformer translogRecoveryPerformer, QueryCache queryCache, QueryCachingPolicy queryCachingPolicy,
TranslogConfig translogConfig, TimeValue flushMergesAfter, ReferenceManager.RefreshListener refreshListeners,
long maxUnsafeAutoIdTimestamp) {
if (openMode == null) {
throw new IllegalArgumentException("openMode must not be null");
}
this.shardId = shardId;
this.indexSettings = indexSettings;
this.threadPool = threadPool;
this.warmer = warmer == null ? (a) -> {} : warmer;
this.store = store;
this.deletionPolicy = deletionPolicy;
this.mergePolicy = mergePolicy;
this.analyzer = analyzer;
this.similarity = similarity;
this.codecService = codecService;
this.eventListener = eventListener;
codecName = indexSettings.getValue(INDEX_CODEC_SETTING);
// We give IndexWriter a "huge" (256 MB) buffer, so it won't flush on its own unless the ES indexing buffer is also huge and/or
// there are not too many shards allocated to this node. Instead, IndexingMemoryController periodically checks
// and refreshes the most heap-consuming shards when total indexing heap usage across all shards is too high:
indexingBufferSize = new ByteSizeValue(256, ByteSizeUnit.MB);
this.translogRecoveryPerformer = translogRecoveryPerformer;
this.queryCache = queryCache;
this.queryCachingPolicy = queryCachingPolicy;
this.translogConfig = translogConfig;
this.flushMergesAfter = flushMergesAfter;
this.openMode = openMode;
this.refreshListeners = refreshListeners;
assert maxUnsafeAutoIdTimestamp >= IndexRequest.UNSET_AUTO_GENERATED_TIMESTAMP :
"maxUnsafeAutoIdTimestamp must be >= -1 but was " + maxUnsafeAutoIdTimestamp;
this.maxUnsafeAutoIdTimestamp = maxUnsafeAutoIdTimestamp;
}
项目:elasticsearch_my
文件:IndexModuleTests.java
public void testAddSimilarity() throws IOException {
Settings indexSettings = Settings.builder()
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
.put("index.similarity.my_similarity.type", "test_similarity")
.put("index.similarity.my_similarity.key", "there is a key")
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings("foo", indexSettings),
new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
module.addSimilarity("test_similarity", (string, providerSettings, indexLevelSettings) -> new SimilarityProvider() {
@Override
public String name() {
return string;
}
@Override
public Similarity get() {
return new TestSimilarity(providerSettings.get("key"));
}
});
IndexService indexService = newIndexService(module);
SimilarityService similarityService = indexService.similarityService();
assertNotNull(similarityService.getSimilarity("my_similarity"));
assertTrue(similarityService.getSimilarity("my_similarity").get() instanceof TestSimilarity);
assertEquals("my_similarity", similarityService.getSimilarity("my_similarity").name());
assertEquals("there is a key", ((TestSimilarity) similarityService.getSimilarity("my_similarity").get()).key);
indexService.close("simon says", false);
}
项目:elasticsearch_my
文件:HasChildQueryBuilderTests.java
public void testNonDefaultSimilarity() throws Exception {
QueryShardContext shardContext = createShardContext();
HasChildQueryBuilder hasChildQueryBuilder = QueryBuilders.hasChildQuery(CHILD_TYPE, new TermQueryBuilder("custom_string", "value"), ScoreMode.None);
HasChildQueryBuilder.LateParsingQuery query = (HasChildQueryBuilder.LateParsingQuery) hasChildQueryBuilder.toQuery(shardContext);
Similarity expected = SimilarityService.BUILT_IN.get(similarity)
.apply(similarity, Settings.EMPTY, Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build())
.get();
assertThat(((PerFieldSimilarityWrapper) query.getSimilarity()).get("custom_string"), instanceOf(expected.getClass()));
}
项目:lams
文件:SpanScorer.java
protected SpanScorer(Spans spans, Weight weight, Similarity.SimScorer docScorer)
throws IOException {
super(weight);
this.docScorer = docScorer;
this.spans = spans;
doc = -1;
more = spans.next();
}
项目:lams
文件:SloppyPhraseScorer.java
SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
int slop, Similarity.SimScorer docScorer) {
super(weight);
this.docScorer = docScorer;
this.slop = slop;
this.numPostings = postings==null ? 0 : postings.length;
pq = new PhraseQueue(postings.length);
// min(cost)
cost = postings[0].postings.cost();
// convert tps to a list of phrase positions.
// note: phrase-position differs from term-position in that its position
// reflects the phrase offset: pp.pos = tp.pos - offset.
// this allows to easily identify a matching (exact) phrase
// when all PhrasePositions have exactly the same position.
if (postings.length > 0) {
min = new PhrasePositions(postings[0].postings, postings[0].position, 0, postings[0].terms);
max = min;
max.doc = -1;
for (int i = 1; i < postings.length; i++) {
PhrasePositions pp = new PhrasePositions(postings[i].postings, postings[i].position, i, postings[i].terms);
max.next = pp;
max = pp;
max.doc = -1;
}
max.next = min; // make it cyclic for easier manipulation
}
}
项目:lams
文件:IndexWriterConfig.java
/**
* Expert: set the {@link Similarity} implementation used by this IndexWriter.
* <p>
* <b>NOTE:</b> the similarity cannot be null.
*
* <p>Only takes effect when IndexWriter is first created. */
public IndexWriterConfig setSimilarity(Similarity similarity) {
if (similarity == null) {
throw new IllegalArgumentException("similarity must not be null");
}
this.similarity = similarity;
return this;
}
项目:Elasticsearch
文件:SimilarityService.java
@Inject
public SimilarityService(Index index, IndexSettingsService indexSettingsService,
final SimilarityLookupService similarityLookupService, final MapperService mapperService) {
super(index, indexSettingsService.getSettings());
this.similarityLookupService = similarityLookupService;
this.mapperService = mapperService;
Similarity defaultSimilarity = similarityLookupService.similarity(SimilarityLookupService.DEFAULT_SIMILARITY).get();
// Expert users can configure the base type as being different to default, but out-of-box we use default.
Similarity baseSimilarity = (similarityLookupService.similarity("base") != null) ? similarityLookupService.similarity("base").get() :
defaultSimilarity;
this.perFieldSimilarity = (mapperService != null) ? new PerFieldSimilarity(defaultSimilarity, baseSimilarity, mapperService) :
defaultSimilarity;
}
项目:Elasticsearch
文件:HasChildQueryParser.java
public static Query joinUtilHelper(String parentType, ParentChildIndexFieldData parentChildIndexFieldData, Similarity similarity, Query toQuery, ScoreType scoreType, Query innerQuery, int minChildren, int maxChildren) throws IOException {
ScoreMode scoreMode;
// TODO: move entirely over from ScoreType to org.apache.lucene.join.ScoreMode, when we drop the 1.x parent child code.
switch (scoreType) {
case NONE:
scoreMode = ScoreMode.None;
break;
case MIN:
scoreMode = ScoreMode.Min;
break;
case MAX:
scoreMode = ScoreMode.Max;
break;
case SUM:
scoreMode = ScoreMode.Total;
break;
case AVG:
scoreMode = ScoreMode.Avg;
break;
default:
throw new UnsupportedOperationException("score type [" + scoreType + "] not supported");
}
// 0 in pre 2.x p/c impl means unbounded
if (maxChildren == 0) {
maxChildren = Integer.MAX_VALUE;
}
return new LateParsingQuery(toQuery, innerQuery, minChildren, maxChildren, parentType, scoreMode, parentChildIndexFieldData, similarity);
}
项目:Elasticsearch
文件:HasChildQueryParser.java
LateParsingQuery(Query toQuery, Query innerQuery, int minChildren, int maxChildren, String parentType, ScoreMode scoreMode, ParentChildIndexFieldData parentChildIndexFieldData, Similarity similarity) {
this.toQuery = toQuery;
this.innerQuery = innerQuery;
this.minChildren = minChildren;
this.maxChildren = maxChildren;
this.parentType = parentType;
this.scoreMode = scoreMode;
this.parentChildIndexFieldData = parentChildIndexFieldData;
this.similarity = similarity;
}
项目:Elasticsearch
文件:EngineConfig.java
/**
* Creates a new {@link org.elasticsearch.index.engine.EngineConfig}
*/
public EngineConfig(ShardId shardId, ThreadPool threadPool, ShardIndexingService indexingService,
Settings indexSettings, IndicesWarmer warmer, Store store, SnapshotDeletionPolicy deletionPolicy,
MergePolicy mergePolicy, MergeSchedulerConfig mergeSchedulerConfig, Analyzer analyzer,
Similarity similarity, CodecService codecService, Engine.FailedEngineListener failedEngineListener,
TranslogRecoveryPerformer translogRecoveryPerformer, QueryCache queryCache, QueryCachingPolicy queryCachingPolicy, IndexSearcherWrappingService wrappingService, TranslogConfig translogConfig) {
this.shardId = shardId;
this.indexSettings = indexSettings;
this.threadPool = threadPool;
this.indexingService = indexingService;
this.warmer = warmer;
this.store = store;
this.deletionPolicy = deletionPolicy;
this.mergePolicy = mergePolicy;
this.mergeSchedulerConfig = mergeSchedulerConfig;
this.analyzer = analyzer;
this.similarity = similarity;
this.codecService = codecService;
this.failedEngineListener = failedEngineListener;
this.wrappingService = wrappingService;
this.optimizeAutoGenerateId = indexSettings.getAsBoolean(EngineConfig.INDEX_OPTIMIZE_AUTOGENERATED_ID_SETTING, false);
this.compoundOnFlush = indexSettings.getAsBoolean(EngineConfig.INDEX_COMPOUND_ON_FLUSH, compoundOnFlush);
codecName = indexSettings.get(EngineConfig.INDEX_CODEC_SETTING, EngineConfig.DEFAULT_CODEC_NAME);
// We start up inactive and rely on IndexingMemoryController to give us our fair share once we start indexing:
indexingBufferSize = IndexingMemoryController.INACTIVE_SHARD_INDEXING_BUFFER;
gcDeletesInMillis = indexSettings.getAsTime(INDEX_GC_DELETES_SETTING, EngineConfig.DEFAULT_GC_DELETES).millis();
versionMapSizeSetting = indexSettings.get(INDEX_VERSION_MAP_SIZE, DEFAULT_VERSION_MAP_SIZE);
updateVersionMapSize();
this.translogRecoveryPerformer = translogRecoveryPerformer;
this.forceNewTranslog = indexSettings.getAsBoolean(INDEX_FORCE_NEW_TRANSLOG, false);
this.queryCache = queryCache;
this.queryCachingPolicy = queryCachingPolicy;
this.translogConfig = translogConfig;
}
项目:ir-generalized-translation-models
文件:AugmentedTermScorer.java
/**
* Construct an <code>query.{@link AugmentedTermScorer}</code>.
*
* @param weight
* The weight of the <code>Term</code> in the query.
* @param mainTerm
* An iterator over the documents matching the main <code>Term</code>.
* @param similarPostings
* A list of <code>PostingsEnumWeightTuple</code>: term iterator, weight pairs
* @param docScorer
* The <code>Similarity.SimScorer</code> implementation
* to be used for score computations.
*/
public AugmentedTermScorer(Weight weight, PostingsEnum mainTerm, List<PostingsEnumWeightTuple> similarPostings, Similarity.SimScorer docScorer) {
super(weight);
this.postings = new PostingsEnumWeightTuple[similarPostings.size() + 1];
this.postings[0] = new PostingsEnumWeightTuple(mainTerm,1f);
for (int i = 0; i < similarPostings.size(); i++) {
this.postings[i + 1] = similarPostings.get(i);
}
this.iterator = new MultiDocIdSetIterator(this.postings);
this.docScorer = docScorer;
}
项目:linden
文件:LindenSimilarityFactory.java
@Override
public Similarity getInstance(Map<String, String> params) throws IOException {
String dict = params.get("dict");
String normLowerBound = params.get("norm");
Similarity similarity;
if (Strings.isNullOrEmpty(normLowerBound)) {
similarity = new LindenSimilarity(IDFManager.createInstance(dict));
} else {
similarity = new LindenSimilarity(IDFManager.createInstance(dict), Float.parseFloat(normLowerBound));
}
return similarity;
}
项目:linden
文件:TermDocsEnum.java
public TermDocsEnum(FlexibleQuery.FlexibleTerm term, int docFreq, DocsAndPositionsEnum postings, Similarity.SimScorer docScorer, int termPos) throws IOException {
this.term = term;
this.postings = postings;
this.docFreq = docFreq;
this.docScorer = docScorer;
this.termPos = termPos;
}
项目:linden
文件:TermDocsEnum.java
public Explanation explain(Similarity similarity, Query query) {
if (!isMatched())
return null;
ComplexExplanation result = new ComplexExplanation();
result.setDescription("weight("+query+" in "+ doc +") [" + similarity.getClass().getSimpleName() + "], result of:");
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "termFreq=" + freq));
result.addDetail(scoreExplanation);
result.setValue(scoreExplanation.getValue());
result.setMatch(true);
return result;
}
项目:elasticsearch-learning-to-rank
文件:LtrQueryTests.java
@Before
public void setupIndex() throws IOException {
dirUnderTest = newDirectory();
List<Similarity> sims = Arrays.asList(
new ClassicSimilarity(),
new SweetSpotSimilarity(), // extends Classic
new BM25Similarity(),
new LMDirichletSimilarity(),
new BooleanSimilarity(),
new LMJelinekMercerSimilarity(0.2F),
new AxiomaticF3LOG(0.5F, 10),
new DFISimilarity(new IndependenceChiSquared()),
new DFRSimilarity(new BasicModelBE(), new AfterEffectB(), new NormalizationH1()),
new IBSimilarity(new DistributionLL(), new LambdaDF(), new NormalizationH3())
);
similarity = sims.get(random().nextInt(sims.size()));
indexWriterUnderTest = new RandomIndexWriter(random(), dirUnderTest, newIndexWriterConfig().setSimilarity(similarity));
for (int i = 0; i < docs.length; i++) {
Document doc = new Document();
doc.add(newStringField("id", "" + i, Field.Store.YES));
doc.add(newField("field", docs[i], Store.YES));
indexWriterUnderTest.addDocument(doc);
}
indexWriterUnderTest.commit();
indexWriterUnderTest.forceMerge(1);
indexWriterUnderTest.flush();
indexReaderUnderTest = indexWriterUnderTest.getReader();
searcherUnderTest = newSearcher(indexReaderUnderTest);
searcherUnderTest.setSimilarity(similarity);
}
项目:xltsearch
文件:Config.java
void resolve() {
if (resolved) { return; }
// else: resolved == false
if (getLastUpdated() == INDEX_INVALIDATED) { return; }
// hashAlgorithm
hashAlgorithm = get("hash.algorithm");
if (hashAlgorithm == null) { return; }
// version
version = get("lucene.version");
if (version == null) { return; }
// analyzer
Function<Version,Analyzer> analyzerFactory = get("lucene.analyzer");
if (analyzerFactory == null) { return; }
analyzer = analyzerFactory.apply(version);
// similarity
Supplier<Similarity> similarityFactory = get("scoring.model");
if (similarityFactory == null) { return; }
similarity = similarityFactory.get();
// directory
Function<File,Directory> directoryFactory = get("directory.type");
if (directoryFactory == null) { return; }
directory = directoryFactory.apply(
new File(configDir.getPath() + File.separator + INDEX_DIR));
if (directory == null) { return; }
// we made it: config is properly resolved
resolved = true;
}
项目:DoSeR-Disambiguation
文件:LearnToRankFuzzyQuery.java
/**
* Create a new FuzzyQuery that will match terms with an edit distance of at
* most <code>maxEdits</code> to <code>term</code>. If a
* <code>prefixLength</code> > 0 is specified, a common prefix of that
* length is also required.
*
* @param term
* the term to search for
* @param maxEdits
* must be >= 0 and <=
* {@link LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE}.
* @param prefixLength
* length of common (non-fuzzy) prefix
* @param maxExpansions
* the maximum number of terms to match. If this number is
* greater than {@link BooleanQuery#getMaxClauseCount} when the
* query is rewritten, then the maxClauseCount will be used
* instead.
* @param transpositions
* true if transpositions should be treated as a primitive edit
* operation. If this is false, comparisons will implement the
* classic Levenshtein algorithm.
*/
public LearnToRankFuzzyQuery(final Term term, final int maxEdits,
final int prefixLength, final int maxExpansions,
final boolean transpositions, final Similarity sim) {
super(term.field());
if ((maxEdits < 0)
|| (maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE)) {
throw new IllegalArgumentException(
"maxEdits must be between 0 and "
+ LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
}
if (prefixLength < 0) {
throw new IllegalArgumentException(
"prefixLength cannot be negative.");
}
if (maxExpansions < 0) {
throw new IllegalArgumentException(
"maxExpansions cannot be negative.");
}
this.term = term;
this.maxEdits = maxEdits;
this.prefixLength = prefixLength;
this.transpositions = transpositions;
this.maxExpansions = maxExpansions;
setRewriteMethod(new LearnToRankFuzzyQuery.LTRTopTermsScoringBooleanQueryRewrite(
maxExpansions, sim));
// setRewriteMethod(new
// LearnToRankFuzzyQuery.LTRTopTermsScoringBooleanQueryRewrite(
// maxExpansions));
}
项目:DoSeR-Disambiguation
文件:LearnToRankTermQuery.java
/**
* Expert: constructs a TermQuery that will use the provided docFreq instead
* of looking up the docFreq against the searcher.
*/
public LearnToRankTermQuery(final Term term, final int docFreq,
final Similarity sim) {
this.term = term;
this.docFreq = docFreq;
perReaderTermS = null;
this.sim = sim;
}
项目:DoSeR-Disambiguation
文件:LearnToRankTermQuery.java
/**
* Expert: constructs a TermQuery that will use the provided docFreq instead
* of looking up the docFreq against the searcher.
*/
public LearnToRankTermQuery(final Term term, final TermContext states,
final Similarity sim) {
assert states != null;
this.term = term;
docFreq = states.docFreq();
perReaderTermS = states;
this.sim = sim;
}
项目:DoSeR-Disambiguation
文件:LuceneFeatures.java
public static Query queryStringTerm(String str, String field,
Similarity sim, Occur occ, int maxclause) {
final String[] split = str.split(" ");
final LTRBooleanQuery bquery = new LTRBooleanQuery();
for (final String element : split) {
final LearnToRankTermQuery tquery = new LearnToRankTermQuery(
new Term(field, element.toLowerCase(Locale.US)), sim);
bquery.add(tquery, occ);
}
return bquery;
}
项目:DoSeR-Disambiguation
文件:LuceneFeatures.java
public static Query queryStringFuzzy(String str, String field,
Similarity sim, Occur occ, int maxclause) {
final String[] split = str.split(" ");
final LTRBooleanQuery bquery = new LTRBooleanQuery();
for (final String element : split) {
final LearnToRankFuzzyQuery tquery = new LearnToRankFuzzyQuery(
new Term(field, element.toLowerCase(Locale.US)), sim);
bquery.add(tquery, occ);
}
return bquery;
}
项目:DoSeR-Disambiguation
文件:AbstractKnowledgeBase.java
AbstractKnowledgeBase(String uri, boolean dynamic, Similarity sim) {
super();
this.indexUri = uri;
this.dynamic = dynamic;
File indexDir = new File(indexUri);
Directory dir;
try {
dir = FSDirectory.open(indexDir);
this.manager = new SearcherManager(dir, new SearcherFactory());
} catch (IOException e) {
logger.error("IOException in "+AbstractKnowledgeBase.class.getName(), e);
}
}
项目:search
文件:IDFValueSource.java
static TFIDFSimilarity asTFIDF(Similarity sim, String field) {
while (sim instanceof PerFieldSimilarityWrapper) {
sim = ((PerFieldSimilarityWrapper)sim).get(field);
}
if (sim instanceof TFIDFSimilarity) {
return (TFIDFSimilarity)sim;
} else {
return null;
}
}
项目:search
文件:TestValueSources.java
public void testIDF() throws Exception {
Similarity saved = searcher.getSimilarity();
try {
searcher.setSimilarity(new DefaultSimilarity());
assertHits(new FunctionQuery(
new IDFValueSource("bogus", "bogus", "text", new BytesRef("test"))),
new float[] { 0.5945349f, 0.5945349f });
} finally {
searcher.setSimilarity(saved);
}
}
项目:search
文件:TestValueSources.java
public void testNorm() throws Exception {
Similarity saved = searcher.getSimilarity();
try {
// no norm field (so agnostic to indexed similarity)
searcher.setSimilarity(new DefaultSimilarity());
assertHits(new FunctionQuery(
new NormValueSource("byte")),
new float[] { 0f, 0f });
} finally {
searcher.setSimilarity(saved);
}
}