static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { try (InputStream stream = Files.newInputStream(file)) { Document doc = new Document(); Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField); doc.add(new LongPoint("modified", lastModified)); doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { System.out.println("adding " + file); writer.addDocument(doc); } else { System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } }
private int copyFieldsNoDeletions(MergeState mergeState, final AtomicReader reader, final Lucene40StoredFieldsReader matchingFieldsReader, int rawDocLengths[]) throws IOException { final int maxDoc = reader.maxDoc(); int docCount = 0; if (matchingFieldsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" while (docCount < maxDoc) { int len = Math.min(MAX_RAW_MERGE_DOCS, maxDoc - docCount); IndexInput stream = matchingFieldsReader.rawDocs(rawDocLengths, docCount, len); addRawDocuments(stream, rawDocLengths, len); docCount += len; mergeState.checkAbort.work(300 * len); } } else { for (; docCount < maxDoc; docCount++) { // NOTE: it's very important to first assign to doc then pass it to // fieldsWriter.addDocument; see LUCENE-1282 Document doc = reader.document(docCount); addDocument(doc, mergeState.fieldInfos); mergeState.checkAbort.work(300); } } return docCount; }
protected boolean locateContainer(String nodeRef, IndexReader reader) { boolean found = false; try { TermDocs td = reader.termDocs(new Term("ID", nodeRef)); while (td.next()) { int doc = td.doc(); Document document = reader.document(doc); if (document.getField("ISCONTAINER") != null) { found = true; break; } } td.close(); } catch (IOException e) { throw new LuceneIndexException("Failed to delete container and below for " + nodeRef, e); } return found; }
public void testGetParentIdNoParentField() throws Exception { ParentFieldMapper fieldMapper = createParentFieldMapper(); Directory directory = newDirectory(); IndexWriter indexWriter = new IndexWriter(directory, newIndexWriterConfig()); Document document = new Document(); document.add(new SortedDocValuesField("different_field", new BytesRef("1"))); indexWriter.addDocument(document); indexWriter.close(); IndexReader indexReader = DirectoryReader.open(directory); String id = ParentFieldSubFetchPhase.getParentId(fieldMapper, indexReader.leaves().get(0).reader(), 0); assertNull(id); indexReader.close(); directory.close(); }
@Test public void testKeywords() throws IOException { LuceneIndex index = new LuceneIndex(); try (Reference<IndexWriter> writer = index.provideWriter()) { Document doc1 = new Document(); LuceneFields.Keyword.add(doc1,"name", "John", LuceneFields.FieldOptions.STORE_INDEX); writer.use().addDocument(doc1); Document doc2 = new Document(); LuceneFields.Keyword.add(doc2,"name", "James", LuceneFields.FieldOptions.STORE_INDEX); writer.use().addDocument(doc2); } LuceneSearchResults results = index.search(LuceneSearch.builder().query(new TermQuery(new Term("name", "James"))).build()); assertTrue(results.hasCount()); assertEquals(1, (int)results.count()); assertEquals("James", results.toList().get(0).getField("name").stringValue()); }
@Override public void addDocucmentToIndexer(List<App> apps) { Document doc = null; synchronized (lock) { if (CollectionUtils.isEmpty(apps)) { return; } for (App app : apps) { try { doc = newDocument(app, allTags4AppHashMap); indexWriter.addDocument(doc); } catch (Exception e) { logger.error("Exception", e); } } } }
/** * Returns a set of source files containing reference(s) to given type element. * @param element the {@link ElementHandle} of a {@link TypeElement} for which usages should be found * @param searchKind type of reference, {@see SearchKind} * @param scope to search in {@see SearchScope} * @return set of {@link FileObject}s containing the reference(s) * It may return null when the caller is a CancellableTask<CompilationInfo> and is cancelled * inside call of this method. */ public @NullUnknown Set<FileObject> getResources ( final @NonNull ElementHandle<TypeElement> element, final @NonNull Set<SearchKind> searchKind, final @NonNull Set<? extends SearchScopeType> scope) { return searchImpl( element, searchKind, scope, new Convertor<ClassIndexImpl, Convertor<Document,FileObject>>() { @NonNull @Override public Convertor<Document, FileObject> convert(@NonNull final ClassIndexImpl p) { return DocumentUtil.fileObjectConvertor (ClassIndex.ResourceType.SOURCE, p.getSourceRoots()); } }); }
/** * Returns a set of source files containing reference(s) to given package element. * @param element the {@link ElementHandle} of a {@link PackageElement} for which usages should be found * @param searchKind type of reference, {@see SearchKind} * @param scope to search in {@see SearchScope} * @return set of {@link FileObject}s containing the reference(s) * It may return null when the caller is a CancellableTask<CompilationInfo> and is cancelled * inside call of this method. * @since 0.89 */ public @NullUnknown Set<FileObject> getResourcesForPackage ( final @NonNull ElementHandle<PackageElement> element, final @NonNull Set<SearchKind> searchKind, final @NonNull Set<? extends SearchScopeType> scope) { return searchImpl( element, searchKind, scope, new Convertor<ClassIndexImpl, Convertor<Document,FileObject>>() { @NonNull @Override public Convertor<Document, FileObject> convert(@NonNull final ClassIndexImpl p) { return DocumentUtil.fileObjectConvertor (ClassIndex.ResourceType.SOURCE, p.getSourceRoots()); } }); }
private void addAppsToIndexerWriter(List<App> list, IndexWriter indexerWriter) { Field name = new Field(fieldName, "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); NumericField catalog = new NumericField("catalog", Field.Store.NO, true); NumericField downloadRank = new NumericField("downloadRank", Field.Store.NO, true); for (App a : list) { try { Document doc = new Document(); name.setValue(a.getName().toLowerCase()); doc.add(name); downloadRank.setIntValue(a.getDownloadRank()); doc.add(downloadRank); catalog.setIntValue(a.getCatalog()); doc.add(catalog); indexerWriter.addDocument(doc); } catch (Exception e) { logger.error("Exception", e); } } }
public void testSingleValued() throws IOException { Directory dir = newDirectory(); // we need the default codec to check for singletons IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null).setCodec(TestUtil.getDefaultCodec())); Document doc = new Document(); for (IndexableField f : NumberFieldMapper.NumberType.HALF_FLOAT.createFields("half_float", 3f, false, true, false)) { doc.add(f); } w.addDocument(doc); final DirectoryReader dirReader = DirectoryReader.open(w); LeafReader reader = getOnlyLeafReader(dirReader); SortedNumericDoubleValues values = new SortedNumericDVIndexFieldData.SortedNumericHalfFloatFieldData( reader, "half_float").getDoubleValues(); assertNotNull(FieldData.unwrapSingleton(values)); values.setDocument(0); assertEquals(1, values.count()); assertEquals(3f, values.valueAt(0), 0f); IOUtils.close(dirReader, w, dir); }
@Override public void run() { int i = 0; while (i < 10000) { try { if (data.size() <= i) { sleep(1); continue; } final String key = "key" + i; final String val = "value" + i; final List<Document> documents = index.searchForDocuments(new TermQuery(new Term(key, val)), 10, new Sort(new SortField(key, SortField.Type.STRING))); if (documents.size() != 1) { throw new RuntimeException("Invalid number of matching documents for " + key + ", found " + documents); } ++i; } catch (IOException ioe) { error = ioe; break; } catch (InterruptedException e) { } catch (AlreadyClosedException ace) { error = ace; break; } } }
@Override public Token getToken(int index) { Token ret = cachedTokens.get(index); if (ret == null) { ret = new Token(); try { Document doc = tokenSearcher.doc(index); for (IndexableField f : doc.getFields()) if (!f.name().startsWith("GGS:")) ret.getFeatures().put(f.name(), f.stringValue()); else if (f.name().equals("GGS:SpanAnnotation")) ret.parentAnnotations.add(getAnnotation(f.numericValue().intValue())); else if (f.name().equals("GGS:Sentence")) ret.parentSentence = getSentence(f.numericValue().intValue()); ret.indexInSentence = index - ret.parentSentence.getFirstTokenIndexInCorpus(); } catch (IOException e) { e.printStackTrace(); } cachedTokens.put(index, ret); } return ret; }
@Override public Document convert(String p) { if (signal != null) { signal.countDown(); } if (slot != null) { try { this.slot.await(); } catch (InterruptedException ex) { throw new RuntimeException(ex); } } final Document doc = new Document(); doc.add(new Field(FLD_KEY, p, Field.Store.YES, Field.Index.ANALYZED_NO_NORMS)); //NOI18N return doc; }
public void testNoTokens() throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.KEYWORD_ANALYZER)); FieldType allFt = getAllFieldType(); Document doc = new Document(); doc.add(new Field("_id", "1", StoredField.TYPE)); doc.add(new AllField("_all", "", 2.0f, allFt)); indexWriter.addDocument(doc); IndexReader reader = DirectoryReader.open(indexWriter); IndexSearcher searcher = new IndexSearcher(reader); TopDocs docs = searcher.search(new MatchAllDocsQuery(), 10); assertThat(docs.totalHits, equalTo(1)); assertThat(docs.scoreDocs[0].doc, equalTo(0)); }
public void testMinDocCount() throws Exception { try (Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir)) { for (long value : new long[] {7, 3, -10, -6, 5, 50}) { Document doc = new Document(); doc.add(new SortedNumericDocValuesField("field", value)); w.addDocument(doc); } HistogramAggregationBuilder aggBuilder = new HistogramAggregationBuilder("my_agg") .field("field") .interval(10) .minDocCount(2); MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.LONG); fieldType.setName("field"); try (IndexReader reader = w.getReader()) { IndexSearcher searcher = new IndexSearcher(reader); Histogram histogram = searchAndReduce(searcher, new MatchAllDocsQuery(), aggBuilder, fieldType); assertEquals(2, histogram.getBuckets().size()); assertEquals(-10d, histogram.getBuckets().get(0).getKey()); assertEquals(2, histogram.getBuckets().get(0).getDocCount()); assertEquals(0d, histogram.getBuckets().get(1).getKey()); assertEquals(3, histogram.getBuckets().get(1).getDocCount()); } } }
public Map<String, Integer> search(String word, String field, int maxSearch) { if (indexSearcher == null) { initialize(index); } Map<String, Integer> verbFreqs = new HashMap<>(); QueryParser queryParser = new QueryParser(Version.LUCENE_36, field, analyzer); try { Query query = queryParser.parse(word); TopDocs topDocs = indexSearcher.search(query, maxSearch); ScoreDoc[] doc = topDocs.scoreDocs; for (int i = 0; i < maxSearch && i < doc.length; ++i) { int documentId = doc[i].doc; Document document = indexSearcher.doc(documentId); String verb = document.get(VERB); String frequency = document.get(FREQ); verbFreqs.put(verb, Integer.parseInt(frequency)); } } catch (ParseException | IOException e) { log.warn("Error searching Lucene index.", e); } return verbFreqs; }
@Override public void createIndex(NitriteId id, String field, String text) { try { Document document = new Document(); String jsonId = keySerializer.writeValueAsString(id); Field contentField = new TextField(field, text, Field.Store.NO); Field idField = new StringField(CONTENT_ID, jsonId, Field.Store.YES); document.add(idField); document.add(contentField); synchronized (this) { indexWriter.addDocument(document); commit(); } } catch (IOException ioe) { throw new IndexingException(errorMessage( "could not write full-text index data for " + text, 0), ioe); } catch (VirtualMachineError vme) { handleVirtualMachineError(vme); } }
/** * Builds {@link Provider}s for the given {@link Document}s. * @param docsResult the {@link Document}s to convert. * @return a new {@link ISearchResult} containing the {@link Provider}s and the * totalHits from the given {@link DocumentsSearchResult}. */ private ISearchResult<Provider> docsToProviders(final DocumentsSearchResult docsResult) { final List<Provider> providers = new ArrayList<>(); for (final Document doc : docsResult.getResults()) { final String providerId = doc.get(IIndexElement.FIELD_ID); if(NumberUtils.isNumber(providerId)) { providers.add(getProvider(Integer.parseInt(providerId))); } else { LOGGER.error("Not numeric user id from index {}.", providerId); } } return new SimpleSearchResult<>(providers, docsResult.getTotalHits()); }
public void testMissingShard() throws IOException { try (Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir)) { w.addDocument(new Document()); try (IndexReader reader = w.getReader()) { ShardCoreKeyMap map = new ShardCoreKeyMap(); for (LeafReaderContext ctx : reader.leaves()) { try { map.add(ctx.reader()); fail(); } catch (IllegalArgumentException expected) { // ok } } } } }
private void assertCompressionEquals(Mode expected, Codec actual) throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(null); iwc.setCodec(actual); IndexWriter iw = new IndexWriter(dir, iwc); iw.addDocument(new Document()); iw.commit(); iw.close(); DirectoryReader ir = DirectoryReader.open(dir); SegmentReader sr = (SegmentReader) ir.leaves().get(0).reader(); String v = sr.getSegmentInfo().info.getAttribute(Lucene50StoredFieldsFormat.MODE_KEY); assertNotNull(v); assertEquals(expected, Mode.valueOf(v)); ir.close(); dir.close(); }
@Override public void create() throws IOException { GlobalDocumentBuilder docBuilder = lire.createDocumentBuilder(); addFeaturesToDocumentBuilder(features, docBuilder); IndexWriter indexWriter = lire.createIndexWriter(indexDir); for (int i = 0; i < paths.size(); i++) { String path = paths.get(i); callback.beforeAddImageToIndex(i+1, paths.size(), path); BufferedImage img = lire.getBufferedImage(path); Document document = docBuilder.createDocument(img, path); indexWriter.addDocument(document); callback.afterAddImageToIndex(i+1, paths.size(), path); } lire.closeIndexWriter(indexWriter); callback.afterIndexAllImages(paths.size()); }
/** Merges in the stored fields from the readers in * <code>mergeState</code>. The default implementation skips * over deleted documents, and uses {@link #startDocument()}, * {@link #writeField(FieldInfo, IndexableField)}, and {@link #finish(FieldInfos, int)}, * returning the number of documents that were written. * Implementations can override this method for more sophisticated * merging (bulk-byte copying, etc). */ public int merge(MergeState mergeState) throws IOException { int docCount = 0; for (AtomicReader reader : mergeState.readers) { final int maxDoc = reader.maxDoc(); final Bits liveDocs = reader.getLiveDocs(); for (int i = 0; i < maxDoc; i++) { if (liveDocs != null && !liveDocs.get(i)) { // skip deleted docs continue; } // TODO: this could be more efficient using // FieldVisitor instead of loading/writing entire // doc; ie we just have to renumber the field number // on the fly? // NOTE: it's very important to first assign to doc then pass it to // fieldsWriter.addDocument; see LUCENE-1282 Document doc = reader.document(i); addDocument(doc, mergeState.fieldInfos); docCount++; mergeState.checkAbort.work(300); } } finish(mergeState.fieldInfos, docCount); return docCount; }
public void addIndex(UUser user) throws Exception { IndexWriter writer = getWriter(); Document doc = new Document(); /* * yes是会将数据存进索引,如果查询结果中需要将记录显示出来就要存进去,如果查询结果 * 只是显示标题之类的就可以不用存,而且内容过长不建议存进去 * 使用TextField类是可以用于查询的。 */ try { doc.add(new StringField("userid", String.valueOf(user.getId()), Field.Store.YES)); doc.add(new TextField("username", user.getUsername(), Field.Store.YES)); writer.addDocument(doc); } catch (Exception e) { e.printStackTrace(); throw e; } finally { writer.close(); } }
/** * 查询索引 * * @param keywords * @return * @throws Exception */ public List<Document> searchIndex(Integer typeId, String keywords) throws Exception { // 1.init searcher Analyzer analyzer = new PaodingAnalyzer(); IndexReader reader = IndexReader.open(typeId == appConfig.getGameTypeId() ? appConfig.getGameIndexDir() : appConfig.getSoftIndexDir()); BooleanClause.Occur[] flags = new BooleanClause.Occur[] { BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD }; Query query = MultiFieldQueryParser.parse(keywords, appConfig.getQueryFields(), flags, analyzer); query = query.rewrite(reader); // 2.search List<Document> docs = new ArrayList<Document>(); Hits hits = (typeId == appConfig.getGameTypeId() ? gameSearcher.search(query, Sort.RELEVANCE) : softSearcher .search(query, Sort.RELEVANCE));// searcher.search(query, // Sort.RELEVANCE); for (int i = 0; i < hits.length(); i++) { docs.add(hits.doc(i)); } // 3.return reader.close(); return docs; }
@Override public JavaTypeDescription convert(Document p) { final String binName = DocumentUtil.getSimpleBinaryName(p); //The regexp still needed for class files older than 1.5 which has no enclosingMethod attr if (binName == null || ANONYMOUS.matcher(binName).matches() || DocumentUtil.isLocal(p)) { return null; } final ElementHandle<TypeElement> eh = HANDLE_CONVERTOR.convert(p); final String sourceName = SOURCE_CONVERTOR.convert(p); return eh == null ? null : new JavaTypeDescription( ci, eh, DocumentUtil.getSimpleName(p), sourceName); }
@NonNull public static Convertor<Document,FileObject> fileObjectConvertor ( @NonNull final ClassIndex.ResourceType resourceType, @NonNull final FileObject... roots) { assert resourceType != null; assert roots != null; return new FileObjectConvertor (resourceType, roots); }
public static void main(String[] args) throws IOException, ParseException { Pattern quit = Pattern.compile("quit|exit|q|bye", Pattern.CASE_INSENSITIVE); FbEntitySearcher searcher = new FbEntitySearcher("lib/lucene/4.4/inexact", 1000, "inexact"); BufferedReader is = new BufferedReader(new InputStreamReader(System.in)); StopWatch watch = new StopWatch(); while (true) { System.out.print("Search> "); String question = is.readLine().trim(); if (quit.matcher(question).matches()) { System.out.println("Quitting."); break; } if (question.equals("")) continue; watch.reset(); watch.start(); List<Document> docs = searcher.searchDocs(question); watch.stop(); for (Document doc : docs) { // if (! doc.get(FbIndexField.ID.fieldName()).contains("democrat_party")) // continue; if ( Double.parseDouble(doc.get(FbIndexField.POPULARITY.fieldName())) <0.0001) continue; LogInfo.log( "Mid: " + doc.get(FbIndexField.MID.fieldName()) + "\t" + "id: " + doc.get(FbIndexField.ID.fieldName()) + "\t" + "types: " + doc.get(FbIndexField.TYPES.fieldName()) + "\t" + "Name: " + doc.get(FbIndexField.TEXT.fieldName()) + "\t" + "Popularity: " + doc.get(FbIndexField.POPULARITY.fieldName())); } LogInfo.logs("Number of docs: %s, Time: %s", docs.size(), watch); } }
public static String getSimpleBinaryName (final Document doc) { assert doc != null; Fieldable field = doc.getFieldable(FIELD_BINARY_NAME); if (field == null) { return null; } else { final String binName = field.stringValue(); return binName.substring(0, binName.length()-1); } }
protected static Set<String> deletePrimary(Collection<String> nodeRefs, IndexReader reader, boolean delete) throws LuceneIndexException { Set<String> refs = new LinkedHashSet<String>(); for (String nodeRef : nodeRefs) { try { TermDocs td = reader.termDocs(new Term("PRIMARYPARENT", nodeRef)); while (td.next()) { int doc = td.doc(); Document document = reader.document(doc); String[] ids = document.getValues("ID"); refs.add(ids[ids.length - 1]); if (delete) { reader.deleteDocument(doc); } } td.close(); } catch (IOException e) { throw new LuceneIndexException("Failed to delete node by primary parent for " + nodeRef, e); } } return refs; }
public void testOffset() throws Exception { try (Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir)) { for (double value : new double[] {9.3, 3.2, -5, -6.5, 5.3}) { Document doc = new Document(); doc.add(new SortedNumericDocValuesField("field", NumericUtils.doubleToSortableLong(value))); w.addDocument(doc); } HistogramAggregationBuilder aggBuilder = new HistogramAggregationBuilder("my_agg") .field("field") .interval(5) .offset(Math.PI); MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.DOUBLE); fieldType.setName("field"); try (IndexReader reader = w.getReader()) { IndexSearcher searcher = new IndexSearcher(reader); Histogram histogram = search(searcher, new MatchAllDocsQuery(), aggBuilder, fieldType); assertEquals(3, histogram.getBuckets().size()); assertEquals(-10 + Math.PI, histogram.getBuckets().get(0).getKey()); assertEquals(2, histogram.getBuckets().get(0).getDocCount()); assertEquals(Math.PI, histogram.getBuckets().get(1).getKey()); assertEquals(2, histogram.getBuckets().get(1).getDocCount()); assertEquals(5 + Math.PI, histogram.getBuckets().get(2).getKey()); assertEquals(1, histogram.getBuckets().get(2).getDocCount()); } } }
@Override public FileObject convert (final Document doc) { final String binaryName = getBinaryName(doc, kindHolder); return binaryName == null ? null : kindHolder[0] == ElementKind.MODULE ? resolveFile(FileObjects.MODULE_INFO) : convertType(binaryName); }
/** * Returns a native Lucene Document. * * @param match the Document reference with the Lucene internal ID. * @param searcher the {@link IndexSearcher}, which we'll use for executing searches. * @return a native Lucene Document. */ public static Document luceneDoc(ScoreDoc match, IndexSearcher searcher) { try { return searcher.doc(match.doc); } catch (Exception exception) { throw new RuntimeException(exception); } }
/** * Uses the sampler aggregation to find the minimum value of a field out of the top 3 scoring documents in a search. */ public void testSampler() throws IOException { TextFieldType textFieldType = new TextFieldType(); textFieldType.setIndexAnalyzer(new NamedAnalyzer("foo", AnalyzerScope.GLOBAL, new StandardAnalyzer())); MappedFieldType numericFieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.LONG); numericFieldType.setName("int"); IndexWriterConfig indexWriterConfig = newIndexWriterConfig(); indexWriterConfig.setMaxBufferedDocs(100); indexWriterConfig.setRAMBufferSizeMB(100); // flush on open to have a single segment with predictable docIds try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) { for (long value : new long[] {7, 3, -10, -6, 5, 50}) { Document doc = new Document(); StringBuilder text = new StringBuilder(); for (int i = 0; i < value; i++) { text.append("good "); } doc.add(new Field("text", text.toString(), textFieldType)); doc.add(new SortedNumericDocValuesField("int", value)); w.addDocument(doc); } SamplerAggregationBuilder aggBuilder = new SamplerAggregationBuilder("sampler") .shardSize(3) .subAggregation(new MinAggregationBuilder("min") .field("int")); try (IndexReader reader = DirectoryReader.open(w)) { assertEquals("test expects a single segment", 1, reader.leaves().size()); IndexSearcher searcher = new IndexSearcher(reader); Sampler sampler = searchAndReduce(searcher, new TermQuery(new Term("text", "good")), aggBuilder, textFieldType, numericFieldType); Min min = sampler.getAggregations().get("min"); assertEquals(5.0, min.getValue(), 0); } } }
public abstract <T> void getDeclaredElements ( @NonNull String name, @NonNull ClassIndex.NameKind kind, @NonNull Set<? extends ClassIndex.SearchScopeType> scope, @NonNull FieldSelector selector, @NonNull Convertor<? super Document, T> convertor, @NonNull Collection<? super T> result) throws IOException, InterruptedException;
private void countTestCase(boolean withDeletions) throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE); RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); final int numDocs = scaledRandomIntBetween(100, 200); for (int i = 0; i < numDocs; ++i) { Document doc = new Document(); if (randomBoolean()) { doc.add(new StringField("foo", "bar", Store.NO)); } if (randomBoolean()) { doc.add(new StringField("foo", "baz", Store.NO)); } if (withDeletions && (rarely() || i == 0)) { doc.add(new StringField("delete", "yes", Store.NO)); } w.addDocument(doc); } if (withDeletions) { w.deleteDocuments(new Term("delete", "yes")); } final IndexReader reader = w.getReader(); Query matchAll = new MatchAllDocsQuery(); Query matchAllCsq = new ConstantScoreQuery(matchAll); Query tq = new TermQuery(new Term("foo", "bar")); Query tCsq = new ConstantScoreQuery(tq); BooleanQuery bq = new BooleanQuery.Builder() .add(matchAll, Occur.SHOULD) .add(tq, Occur.MUST) .build(); countTestCase(matchAll, reader, false); countTestCase(matchAllCsq, reader, false); countTestCase(tq, reader, withDeletions); countTestCase(tCsq, reader, withDeletions); countTestCase(bq, reader, true); reader.close(); w.close(); dir.close(); }
protected static Set<String> deleteReference(Collection<String> nodeRefs, IndexReader reader, boolean delete) throws LuceneIndexException { Set<String> refs = new LinkedHashSet<String>(); for (String nodeRef : nodeRefs) { try { TermDocs td = reader.termDocs(new Term("PARENT", nodeRef)); while (td.next()) { int doc = td.doc(); Document document = reader.document(doc); String[] ids = document.getValues("ID"); refs.add(ids[ids.length - 1]); if (delete) { reader.deleteDocument(doc); } } td.close(); } catch (IOException e) { throw new LuceneIndexException("Failed to delete node by parent for " + nodeRef, e); } } return refs; }
public void testOverruningClassIndexScopes() throws IOException, InterruptedException { final ClassIndexImpl index = ClassIndexManager.getDefault().getUsagesQuery(src.toURL(), true); assertNotNull(index); final List<Document> res = new ArrayList<>(PKG_COUNT*CLZ_IN_PKG_COUNT); Set<ClassIndex.SearchScopeType> scopes = new HashSet<>(); scopes.add(ClassIndex.SearchScope.SOURCE); index.getDeclaredElements( "", //NOI18N ClassIndex.NameKind.PREFIX, scopes, null, Identity.<Document>getInstance(), res); assertEquals(PKG_COUNT*CLZ_IN_PKG_COUNT, res.size()); res.clear(); scopes.clear(); final Set<String> pkgs = new HashSet<>(); index.getPackageNames("", true, pkgs); assertEquals(PKG_COUNT, pkgs.size()); scopes.add(ClassIndex.createPackageSearchScope( ClassIndex.SearchScope.SOURCE, pkgs.toArray(new String[pkgs.size()]))); index.getDeclaredElements( "", //NOI18N ClassIndex.NameKind.PREFIX, scopes, null, Identity.<Document>getInstance(), res); assertEquals(PKG_COUNT*CLZ_IN_PKG_COUNT, res.size()); }
/** * Adds documents made of subnodes' title, descriptions and uid, then descents deeper into descendants. * * @param node * its subnodes will be parsed * @return documents made of all of node's descendants */ private Set<Document> extractSubnodeDocuments(PedagogicalPlannerSequenceNode node) { Set<Document> docs = new HashSet<Document>(); if ((node != null) && (node.getSubnodes() != null)) { for (PedagogicalPlannerSequenceNode subnode : node.getSubnodes()) { Document doc = new Document(); Field titleField = new TextField(PedagogicalPlannerAction.FIELD_NAME_TITLE, subnode.getTitle(), Field.Store.NO); titleField.setBoost(10); doc.add(titleField); String briefDesc = WebUtil.removeHTMLtags(subnode.getBriefDescription()); if (briefDesc != null) { Field briefDescField = new TextField(PedagogicalPlannerAction.FIELD_NAME_BRIEF_DESCRIPTION, briefDesc, Field.Store.NO); doc.add(briefDescField); } String fullDesc = WebUtil.removeHTMLtags(subnode.getFullDescription()); if (fullDesc != null) { Field fullDescField = new TextField(PedagogicalPlannerAction.FIELD_NAME_FULL_DESCRIPTION, fullDesc, Field.Store.NO); doc.add(fullDescField); } Field uidField = new StringField(PedagogicalPlannerAction.FIELD_NAME_ANCESTOR_UID, subnode.getUid().toString(), Field.Store.YES); doc.add(uidField); docs.add(doc); Set<Document> subnodeDocs = extractSubnodeDocuments(subnode); docs.addAll(subnodeDocs); } } return docs; }
@Override public IndexableField getField(String name) throws IOException { Document document = getDocument(Collections.singleton(name)); if (document != null) { for (IndexableField field : document) { if (field != null && Objects.equals(field.name(), name)) { return field; } } } return null; }
private void writeStandardTermVector(TermVectorsResponse outResponse) throws IOException { Directory dir = newDirectory(); IndexWriterConfig conf = new IndexWriterConfig(new StandardAnalyzer()); conf.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, conf); FieldType type = new FieldType(TextField.TYPE_STORED); type.setStoreTermVectorOffsets(true); type.setStoreTermVectorPayloads(false); type.setStoreTermVectorPositions(true); type.setStoreTermVectors(true); type.freeze(); Document d = new Document(); d.add(new Field("id", "abc", StringField.TYPE_STORED)); d.add(new Field("title", "the1 quick brown fox jumps over the1 lazy dog", type)); d.add(new Field("desc", "the1 quick brown fox jumps over the1 lazy dog", type)); writer.updateDocument(new Term("id", "abc"), d); writer.commit(); writer.close(); DirectoryReader dr = DirectoryReader.open(dir); IndexSearcher s = new IndexSearcher(dr); TopDocs search = s.search(new TermQuery(new Term("id", "abc")), 1); ScoreDoc[] scoreDocs = search.scoreDocs; int doc = scoreDocs[0].doc; Fields termVectors = dr.getTermVectors(doc); EnumSet<Flag> flags = EnumSet.of(Flag.Positions, Flag.Offsets); outResponse.setFields(termVectors, null, flags, termVectors); dr.close(); dir.close(); }