/** * Fixes problems with broken analysis chains if positions and offsets are messed up that can lead to * {@link StringIndexOutOfBoundsException} in the {@link FastVectorHighlighter} */ public static WeightedFragInfo fixWeightedFragInfo(FieldMapper mapper, Field[] values, WeightedFragInfo fragInfo) { assert fragInfo != null : "FragInfo must not be null"; assert mapper.fieldType().name().equals(values[0].name()) : "Expected FieldMapper for field " + values[0].name(); if (!fragInfo.getSubInfos().isEmpty() && (containsBrokenAnalysis(mapper.fieldType().indexAnalyzer()))) { /* This is a special case where broken analysis like WDF is used for term-vector creation at index-time * which can potentially mess up the offsets. To prevent a SAIIOBException we need to resort * the fragments based on their offsets rather than using soley the positions as it is done in * the FastVectorHighlighter. Yet, this is really a lucene problem and should be fixed in lucene rather * than in this hack... aka. "we are are working on in!" */ final List<SubInfo> subInfos = fragInfo.getSubInfos(); CollectionUtil.introSort(subInfos, new Comparator<SubInfo>() { @Override public int compare(SubInfo o1, SubInfo o2) { int startOffset = o1.getTermsOffsets().get(0).getStartOffset(); int startOffset2 = o2.getTermsOffsets().get(0).getStartOffset(); return FragmentBuilderHelper.compare(startOffset, startOffset2); } }); return new WeightedFragInfo(Math.min(fragInfo.getSubInfos().get(0).getTermsOffsets().get(0).getStartOffset(), fragInfo.getStartOffset()), fragInfo.getEndOffset(), subInfos, fragInfo.getTotalBoost()); } else { return fragInfo; } }
/** * Fixes problems with broken analysis chains if positions and offsets are messed up that can lead to * {@link StringIndexOutOfBoundsException} in the {@link FastVectorHighlighter} */ public static WeightedFragInfo fixWeightedFragInfo(FieldMapper mapper, Field[] values, WeightedFragInfo fragInfo) { assert fragInfo != null : "FragInfo must not be null"; assert mapper.fieldType().names().indexName().equals(values[0].name()) : "Expected FieldMapper for field " + values[0].name(); if (!fragInfo.getSubInfos().isEmpty() && (containsBrokenAnalysis(mapper.fieldType().indexAnalyzer()))) { /* This is a special case where broken analysis like WDF is used for term-vector creation at index-time * which can potentially mess up the offsets. To prevent a SAIIOBException we need to resort * the fragments based on their offsets rather than using soley the positions as it is done in * the FastVectorHighlighter. Yet, this is really a lucene problem and should be fixed in lucene rather * than in this hack... aka. "we are are working on in!" */ final List<SubInfo> subInfos = fragInfo.getSubInfos(); CollectionUtil.introSort(subInfos, new Comparator<SubInfo>() { @Override public int compare(SubInfo o1, SubInfo o2) { int startOffset = o1.getTermsOffsets().get(0).getStartOffset(); int startOffset2 = o2.getTermsOffsets().get(0).getStartOffset(); return FragmentBuilderHelper.compare(startOffset, startOffset2); } }); return new WeightedFragInfo(Math.min(fragInfo.getSubInfos().get(0).getTermsOffsets().get(0).getStartOffset(), fragInfo.getStartOffset()), fragInfo.getEndOffset(), subInfos, fragInfo.getTotalBoost()); } else { return fragInfo; } }
protected String makeFragment( StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo, String[] preTags, String[] postTags, Encoder encoder ){ StringBuilder fragment = new StringBuilder(); final int s = fragInfo.getStartOffset(); int[] modifiedStartOffset = { s }; String src = getFragmentSourceMSO( buffer, index, values, s, fragInfo.getEndOffset(), modifiedStartOffset ); int srcIndex = 0; for( SubInfo subInfo : fragInfo.getSubInfos() ){ for( Toffs to : subInfo.getTermsOffsets() ){ fragment .append( encoder.encodeText( src.substring( srcIndex, to.getStartOffset() - modifiedStartOffset[0] ) ) ) .append( getPreTag( preTags, subInfo.getSeqnum() ) ) .append( encoder.encodeText( src.substring( to.getStartOffset() - modifiedStartOffset[0], to.getEndOffset() - modifiedStartOffset[0] ) ) ) .append( getPostTag( postTags, subInfo.getSeqnum() ) ); srcIndex = to.getEndOffset() - modifiedStartOffset[0]; } } fragment.append( encoder.encodeText( src.substring( srcIndex ) ) ); return fragment.toString(); }
private void testCase( Query query, int fragCharSize, String expectedFragInfo, double expectedTotalSubInfoBoost ) throws Exception { makeIndexLongMV(); FieldQuery fq = new FieldQuery( query, true, true ); FieldTermStack stack = new FieldTermStack( reader, 0, F, fq ); FieldPhraseList fpl = new FieldPhraseList( stack, fq ); WeightedFragListBuilder wflb = new WeightedFragListBuilder(); FieldFragList ffl = wflb.createFieldFragList( fpl, fragCharSize ); assertEquals( 1, ffl.getFragInfos().size() ); assertEquals( expectedFragInfo, ffl.getFragInfos().get( 0 ).toString() ); float totalSubInfoBoost = 0; for ( WeightedFragInfo info : ffl.getFragInfos() ) { for ( SubInfo subInfo : info.getSubInfos() ) { totalSubInfoBoost += subInfo.getBoost(); } } assertEquals( expectedTotalSubInfoBoost, totalSubInfoBoost, .0000001 ); }
@Override public String[] createFragments( IndexReader reader, int docId, String fieldName, FieldFragList fieldFragList, int maxNumFragments, String[] preTags, String[] postTags, Encoder encoder ) throws IOException { if( maxNumFragments < 0 ) { throw new IllegalArgumentException( "maxNumFragments(" + maxNumFragments + ") must be positive number." ); } List<WeightedFragInfo> fragInfos = fieldFragList.getFragInfos(); Field[] values = getFields( reader, docId, fieldName ); if( values.length == 0 ) { return null; } if (discreteMultiValueHighlighting && values.length > 1) { fragInfos = discreteMultiValueHighlighting(fragInfos, values); } fragInfos = getWeightedFragInfoList(fragInfos); int limitFragments = maxNumFragments < fragInfos.size() ? maxNumFragments : fragInfos.size(); List<String> fragments = new ArrayList<>( limitFragments ); StringBuilder buffer = new StringBuilder(); int[] nextValueIndex = { 0 }; for( int n = 0; n < limitFragments; n++ ){ WeightedFragInfo fragInfo = fragInfos.get( n ); fragments.add( makeFragment( buffer, nextValueIndex, values, fragInfo, preTags, postTags, encoder ) ); } return fragments.toArray( new String[fragments.size()] ); }
@Override public int compare( WeightedFragInfo o1, WeightedFragInfo o2 ) { if( o1.getTotalBoost() > o2.getTotalBoost() ) return -1; else if( o1.getTotalBoost() < o2.getTotalBoost() ) return 1; // if same score then check startOffset else{ if( o1.getStartOffset() < o2.getStartOffset() ) return -1; else if( o1.getStartOffset() > o2.getStartOffset() ) return 1; } return 0; }
@Override public String[] createFragments( IndexReader reader, int docId, String fieldName, FieldFragList fieldFragList, int maxNumFragments, String[] preTags, String[] postTags, Encoder encoder ) throws IOException { if( maxNumFragments < 0 ) { throw new IllegalArgumentException( "maxNumFragments(" + maxNumFragments + ") must be positive number." ); } List<WeightedFragInfo> fragInfos = fieldFragList.getFragInfos(); Field[] values = getFields( reader, docId, fieldName ); if( values.length == 0 ) { return null; } if (discreteMultiValueHighlighting && values.length > 1) { fragInfos = discreteMultiValueHighlighting(fragInfos, values); } fragInfos = getWeightedFragInfoList(fragInfos); int limitFragments = maxNumFragments < fragInfos.size() ? maxNumFragments : fragInfos.size(); List<String> fragments = new ArrayList<String>( limitFragments ); StringBuilder buffer = new StringBuilder(); int[] nextValueIndex = { 0 }; for( int n = 0; n < limitFragments; n++ ){ WeightedFragInfo fragInfo = fragInfos.get( n ); fragments.add( makeFragment( buffer, nextValueIndex, values, fragInfo, preTags, postTags, encoder ) ); } return fragments.toArray( new String[fragments.size()] ); }
@Override protected String makeFragment( StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo, String[] preTags, String[] postTags, Encoder encoder ){ return super.makeFragment(buffer, index, values, FragmentBuilderHelper.fixWeightedFragInfo(mapper, values, fragInfo), preTags, postTags, encoder); }
/** * do nothing. return the source list. */ @Override public List<WeightedFragInfo> getWeightedFragInfoList( List<WeightedFragInfo> src ) { return src; }
/** * Sort by score the list of WeightedFragInfo */ @Override public List<WeightedFragInfo> getWeightedFragInfoList( List<WeightedFragInfo> src ) { Collections.sort( src, new ScoreComparator() ); return src; }
public abstract List<WeightedFragInfo> getWeightedFragInfoList( List<WeightedFragInfo> src );