/** * Groups variations from specified {@link List} of {@link VcfFile}s by specified field * @param files a {@link List} of {@link FeatureFile}, which indexes to search * @param query a query to search in index * @param groupBy a field to perform grouping * @return a {@link List} of {@link Group}s, mapping field value to number of variations, having this value * @throws IOException if something goes wrong with the file system */ public List<Group> groupVariations(List<VcfFile> files, Query query, String groupBy) throws IOException { List<Group> res = new ArrayList<>(); if (CollectionUtils.isEmpty(files)) { return Collections.emptyList(); } SimpleFSDirectory[] indexes = fileManager.getIndexesForFiles(files); try (MultiReader reader = openMultiReader(indexes)) { if (reader.numDocs() == 0) { return Collections.emptyList(); } IndexSearcher searcher = new IndexSearcher(reader); AbstractGroupFacetCollector groupedFacetCollector = TermGroupFacetCollector.createTermGroupFacetCollector(FeatureIndexFields.UID.fieldName, getGroupByField(files, groupBy), false, null, GROUP_INITIAL_SIZE); searcher.search(query, groupedFacetCollector); // Computing the grouped facet counts TermGroupFacetCollector.GroupedFacetResult groupedResult = groupedFacetCollector.mergeSegmentResults( reader.numDocs(), 1, false); List<AbstractGroupFacetCollector.FacetEntry> facetEntries = groupedResult.getFacetEntries(0, reader.numDocs()); for (AbstractGroupFacetCollector.FacetEntry facetEntry : facetEntries) { res.add(new Group(facetEntry.getValue().utf8ToString(), facetEntry.getCount())); } } finally { for (SimpleFSDirectory index : indexes) { IOUtils.closeQuietly(index); } } return res; }
/** * Groups variations from specified {@link List} of {@link VcfFile}s by specified field * @param files a {@link List} of {@link FeatureFile}, which indexes to search * @param query a query to search in index * @param groupBy a field to perform grouping * @return a {@link List} of {@link Group}s, mapping field value to number of variations, having this value * @throws IOException if something goes wrong with the file system */ public List<Group> groupVariations(List<VcfFile> files, Query query, String groupBy) throws IOException { List<Group> res = new ArrayList<>(); if (CollectionUtils.isEmpty(files)) { return Collections.emptyList(); } SimpleFSDirectory[] indexes = fileManager.getIndexesForFiles(files); long totalIndexSize = getTotalIndexSize(indexes); if (totalIndexSize > luceneIndexMaxSizeForGrouping) { throw new IllegalArgumentException(getMessage(MessagesConstants.ERROR_FEATURE_INEDX_TOO_LARGE)); } try (MultiReader reader = openMultiReader(indexes)) { if (reader.numDocs() == 0) { return Collections.emptyList(); } IndexSearcher searcher = new IndexSearcher(reader); AbstractGroupFacetCollector groupedFacetCollector = TermGroupFacetCollector.createTermGroupFacetCollector(FeatureIndexFields.UID.fieldName, getGroupByField(files, groupBy), false, null, GROUP_INITIAL_SIZE); searcher.search(query, groupedFacetCollector); // Computing the grouped facet counts TermGroupFacetCollector.GroupedFacetResult groupedResult = groupedFacetCollector.mergeSegmentResults( reader.numDocs(), 1, false); List<AbstractGroupFacetCollector.FacetEntry> facetEntries = groupedResult.getFacetEntries(0, reader.numDocs()); for (AbstractGroupFacetCollector.FacetEntry facetEntry : facetEntries) { res.add(new Group(facetEntry.getValue().utf8ToString(), facetEntry.getCount())); } } finally { for (SimpleFSDirectory index : indexes) { IOUtils.closeQuietly(index); } } return res; }
public NamedList<Integer> getGroupedCounts(SolrIndexSearcher searcher, DocSet base, String field, boolean multiToken, int offset, int limit, int mincount, boolean missing, String sort, String prefix) throws IOException { GroupingSpecification groupingSpecification = rb.getGroupingSpec(); String groupField = groupingSpecification != null ? groupingSpecification.getFields()[0] : null; if (groupField == null) { throw new SolrException ( SolrException.ErrorCode.BAD_REQUEST, "Specify the group.field as parameter or local parameter" ); } BytesRef prefixBR = prefix != null ? new BytesRef(prefix) : null; TermGroupFacetCollector collector = TermGroupFacetCollector.createTermGroupFacetCollector(groupField, field, multiToken, prefixBR, 128); searcher.search(new MatchAllDocsQuery(), base.getTopFilter(), collector); boolean orderByCount = sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY); TermGroupFacetCollector.GroupedFacetResult result = collector.mergeSegmentResults(offset + limit, mincount, orderByCount); CharsRef charsRef = new CharsRef(); FieldType facetFieldType = searcher.getSchema().getFieldType(field); NamedList<Integer> facetCounts = new NamedList<Integer>(); List<TermGroupFacetCollector.FacetEntry> scopedEntries = result.getFacetEntries(offset, limit); for (TermGroupFacetCollector.FacetEntry facetEntry : scopedEntries) { facetFieldType.indexedToReadable(facetEntry.getValue(), charsRef); facetCounts.add(charsRef.toString(), facetEntry.getCount()); } if (missing) { facetCounts.add(null, result.getTotalMissingCount()); } return facetCounts; }
public NamedList<Integer> getGroupedCounts(SolrIndexSearcher searcher, DocSet base, String field, boolean multiToken, int offset, int limit, int mincount, boolean missing, String sort, String prefix, Predicate<BytesRef> termFilter) throws IOException { GroupingSpecification groupingSpecification = rb.getGroupingSpec(); final String groupField = groupingSpecification != null ? groupingSpecification.getFields()[0] : null; if (groupField == null) { throw new SolrException ( SolrException.ErrorCode.BAD_REQUEST, "Specify the group.field as parameter or local parameter" ); } BytesRef prefixBytesRef = prefix != null ? new BytesRef(prefix) : null; final TermGroupFacetCollector collector = TermGroupFacetCollector.createTermGroupFacetCollector(groupField, field, multiToken, prefixBytesRef, 128); Collector groupWrapper = getInsanityWrapper(groupField, collector); Collector fieldWrapper = getInsanityWrapper(field, groupWrapper); // When GroupedFacetCollector can handle numerics we can remove the wrapped collectors searcher.search(base.getTopFilter(), fieldWrapper); boolean orderByCount = sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY); TermGroupFacetCollector.GroupedFacetResult result = collector.mergeSegmentResults(limit < 0 ? Integer.MAX_VALUE : (offset + limit), mincount, orderByCount); CharsRefBuilder charsRef = new CharsRefBuilder(); FieldType facetFieldType = searcher.getSchema().getFieldType(field); NamedList<Integer> facetCounts = new NamedList<>(); List<TermGroupFacetCollector.FacetEntry> scopedEntries = result.getFacetEntries(offset, limit < 0 ? Integer.MAX_VALUE : limit); for (TermGroupFacetCollector.FacetEntry facetEntry : scopedEntries) { //:TODO:can we filter earlier than this to make it more efficient? if (termFilter != null && !termFilter.test(facetEntry.getValue())) { continue; } facetFieldType.indexedToReadable(facetEntry.getValue(), charsRef); facetCounts.add(charsRef.toString(), facetEntry.getCount()); } if (missing) { facetCounts.add(null, result.getTotalMissingCount()); } return facetCounts; }
private GroupedFacetResult(int totalCount, int totalMissingCount, List<TermGroupFacetCollector.FacetEntry> facetEntries) { this.totalCount = totalCount; this.totalMissingCount = totalMissingCount; this.facetEntries = facetEntries; }
public List<TermGroupFacetCollector.FacetEntry> getFacetEntries() { return facetEntries; }
public NamedList<Integer> getGroupedCounts(SolrIndexSearcher searcher, DocSet base, String field, boolean multiToken, int offset, int limit, int mincount, boolean missing, String sort, String prefix) throws IOException { GroupingSpecification groupingSpecification = rb.getGroupingSpec(); String groupField = groupingSpecification != null ? groupingSpecification.getFields()[0] : null; if (groupField == null) { throw new SolrException ( SolrException.ErrorCode.BAD_REQUEST, "Specify the group.field as parameter or local parameter" ); } BytesRef prefixBR = prefix != null ? new BytesRef(prefix) : null; TermGroupFacetCollector collector = TermGroupFacetCollector.createTermGroupFacetCollector(groupField, field, multiToken, prefixBR, 128); searcher.search(new MatchAllDocsQuery(), base.getTopFilter(), collector); boolean orderByCount = sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY); TermGroupFacetCollector.GroupedFacetResult result = collector.mergeSegmentResults(limit < 0 ? Integer.MAX_VALUE : (offset + limit), mincount, orderByCount); CharsRef charsRef = new CharsRef(); FieldType facetFieldType = searcher.getSchema().getFieldType(field); NamedList<Integer> facetCounts = new NamedList<>(); List<TermGroupFacetCollector.FacetEntry> scopedEntries = result.getFacetEntries(offset, limit < 0 ? Integer.MAX_VALUE : limit); for (TermGroupFacetCollector.FacetEntry facetEntry : scopedEntries) { facetFieldType.indexedToReadable(facetEntry.getValue(), charsRef); facetCounts.add(charsRef.toString(), facetEntry.getCount()); } if (missing) { facetCounts.add(null, result.getTotalMissingCount()); } return facetCounts; }
public NamedList<Integer> getGroupedCounts(SolrIndexSearcher searcher, DocSet base, String field, boolean multiToken, int offset, int limit, int mincount, boolean missing, String sort, String prefix) throws IOException { GroupingSpecification groupingSpecification = rb.getGroupingSpec(); String groupField = groupingSpecification != null ? groupingSpecification.getFields()[0] : null; if (groupField == null) { throw new SolrException ( SolrException.ErrorCode.BAD_REQUEST, "Specify the group.field as parameter or local parameter" ); } BytesRef prefixBR = prefix != null ? new BytesRef(prefix) : null; TermGroupFacetCollector collector = TermGroupFacetCollector.createTermGroupFacetCollector(groupField, field, multiToken, prefixBR, 128); searcher.search(new MatchAllDocsQuery(), base.getTopFilter(), collector); boolean orderByCount = sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY); TermGroupFacetCollector.GroupedFacetResult result = collector.mergeSegmentResults(limit < 0 ? Integer.MAX_VALUE : (offset + limit), mincount, orderByCount); CharsRef charsRef = new CharsRef(); FieldType facetFieldType = searcher.getSchema().getFieldType(field); NamedList<Integer> facetCounts = new NamedList<Integer>(); List<TermGroupFacetCollector.FacetEntry> scopedEntries = result.getFacetEntries(offset, limit < 0 ? Integer.MAX_VALUE : limit); for (TermGroupFacetCollector.FacetEntry facetEntry : scopedEntries) { facetFieldType.indexedToReadable(facetEntry.getValue(), charsRef); facetCounts.add(charsRef.toString(), facetEntry.getCount()); } if (missing) { facetCounts.add(null, result.getTotalMissingCount()); } return facetCounts; }