Java 类org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseInfo 实例源码

项目:search    文件:SingleFragListBuilder.java   
@Override
public FieldFragList createFieldFragList(FieldPhraseList fieldPhraseList,
    int fragCharSize) {

  FieldFragList ffl = new SimpleFieldFragList( fragCharSize );

  List<WeightedPhraseInfo> wpil = new ArrayList<>();
  Iterator<WeightedPhraseInfo> ite = fieldPhraseList.phraseList.iterator();
  WeightedPhraseInfo phraseInfo = null;
  while( true ){
    if( !ite.hasNext() ) break;
    phraseInfo = ite.next();
    if( phraseInfo == null ) break;

    wpil.add( phraseInfo );
  }
  if( wpil.size() > 0 )
    ffl.add( 0, Integer.MAX_VALUE, wpil );
  return ffl;
}
项目:search    文件:FieldPhraseListTest.java   
public void testWeightedPhraseInfoComparisonConsistency() {
  WeightedPhraseInfo a = newInfo( 0, 0, 1 );
  WeightedPhraseInfo b = newInfo( 1, 2, 1 );
  WeightedPhraseInfo c = newInfo( 2, 3, 1 );
  WeightedPhraseInfo d = newInfo( 0, 0, 1 );
  WeightedPhraseInfo e = newInfo( 0, 0, 2 );

  assertConsistentEquals( a, a );
  assertConsistentEquals( b, b );
  assertConsistentEquals( c, c );
  assertConsistentEquals( d, d );
  assertConsistentEquals( e, e );
  assertConsistentEquals( a, d );
  assertConsistentLessThan( a, b );
  assertConsistentLessThan( b, c );
  assertConsistentLessThan( a, c );
  assertConsistentLessThan( a, e );
  assertConsistentLessThan( e, b );
  assertConsistentLessThan( e, c );
  assertConsistentLessThan( d, b );
  assertConsistentLessThan( d, c );
  assertConsistentLessThan( d, e );
}
项目:NYBC    文件:SingleFragListBuilder.java   
@Override
public FieldFragList createFieldFragList(FieldPhraseList fieldPhraseList,
    int fragCharSize) {

  FieldFragList ffl = new SimpleFieldFragList( fragCharSize );

  List<WeightedPhraseInfo> wpil = new ArrayList<WeightedPhraseInfo>();
  Iterator<WeightedPhraseInfo> ite = fieldPhraseList.phraseList.iterator();
  WeightedPhraseInfo phraseInfo = null;
  while( true ){
    if( !ite.hasNext() ) break;
    phraseInfo = ite.next();
    if( phraseInfo == null ) break;

    wpil.add( phraseInfo );
  }
  if( wpil.size() > 0 )
    ffl.add( 0, Integer.MAX_VALUE, wpil );
  return ffl;
}
项目:read-open-source-code    文件:SingleFragListBuilder.java   
@Override
public FieldFragList createFieldFragList(FieldPhraseList fieldPhraseList,
    int fragCharSize) {

  FieldFragList ffl = new SimpleFieldFragList( fragCharSize );

  List<WeightedPhraseInfo> wpil = new ArrayList<WeightedPhraseInfo>();
  Iterator<WeightedPhraseInfo> ite = fieldPhraseList.phraseList.iterator();
  WeightedPhraseInfo phraseInfo = null;
  while( true ){
    if( !ite.hasNext() ) break;
    phraseInfo = ite.next();
    if( phraseInfo == null ) break;

    wpil.add( phraseInfo );
  }
  if( wpil.size() > 0 )
    ffl.add( 0, Integer.MAX_VALUE, wpil );
  return ffl;
}
项目:read-open-source-code    文件:SingleFragListBuilder.java   
@Override
public FieldFragList createFieldFragList(FieldPhraseList fieldPhraseList,
    int fragCharSize) {

  FieldFragList ffl = new SimpleFieldFragList( fragCharSize );

  List<WeightedPhraseInfo> wpil = new ArrayList<WeightedPhraseInfo>();
  Iterator<WeightedPhraseInfo> ite = fieldPhraseList.phraseList.iterator();
  WeightedPhraseInfo phraseInfo = null;
  while( true ){
    if( !ite.hasNext() ) break;
    phraseInfo = ite.next();
    if( phraseInfo == null ) break;

    wpil.add( phraseInfo );
  }
  if( wpil.size() > 0 )
    ffl.add( 0, Integer.MAX_VALUE, wpil );
  return ffl;
}
项目:read-open-source-code    文件:SingleFragListBuilder.java   
@Override
public FieldFragList createFieldFragList(FieldPhraseList fieldPhraseList,
    int fragCharSize) {

  FieldFragList ffl = new SimpleFieldFragList( fragCharSize );

  List<WeightedPhraseInfo> wpil = new ArrayList<>();
  Iterator<WeightedPhraseInfo> ite = fieldPhraseList.phraseList.iterator();
  WeightedPhraseInfo phraseInfo = null;
  while( true ){
    if( !ite.hasNext() ) break;
    phraseInfo = ite.next();
    if( phraseInfo == null ) break;

    wpil.add( phraseInfo );
  }
  if( wpil.size() > 0 )
    ffl.add( 0, Integer.MAX_VALUE, wpil );
  return ffl;
}
项目:Maskana-Gestor-de-Conocimiento    文件:SingleFragListBuilder.java   
@Override
public FieldFragList createFieldFragList(FieldPhraseList fieldPhraseList,
    int fragCharSize) {

  FieldFragList ffl = new SimpleFieldFragList( fragCharSize );

  List<WeightedPhraseInfo> wpil = new ArrayList<WeightedPhraseInfo>();
  Iterator<WeightedPhraseInfo> ite = fieldPhraseList.phraseList.iterator();
  WeightedPhraseInfo phraseInfo = null;
  while( true ){
    if( !ite.hasNext() ) break;
    phraseInfo = ite.next();
    if( phraseInfo == null ) break;

    wpil.add( phraseInfo );
  }
  if( wpil.size() > 0 )
    ffl.add( 0, Integer.MAX_VALUE, wpil );
  return ffl;
}
项目:Maskana-Gestor-de-Conocimiento    文件:FieldPhraseListTest.java   
public void testWeightedPhraseInfoComparisonConsistency() {
  WeightedPhraseInfo a = newInfo( 0, 0, 1 );
  WeightedPhraseInfo b = newInfo( 1, 2, 1 );
  WeightedPhraseInfo c = newInfo( 2, 3, 1 );
  WeightedPhraseInfo d = newInfo( 0, 0, 1 );
  WeightedPhraseInfo e = newInfo( 0, 0, 2 );

  assertConsistentEquals( a, a );
  assertConsistentEquals( b, b );
  assertConsistentEquals( c, c );
  assertConsistentEquals( d, d );
  assertConsistentEquals( e, e );
  assertConsistentEquals( a, d );
  assertConsistentLessThan( a, b );
  assertConsistentLessThan( b, c );
  assertConsistentLessThan( a, c );
  assertConsistentLessThan( a, e );
  assertConsistentLessThan( e, b );
  assertConsistentLessThan( e, c );
  assertConsistentLessThan( d, b );
  assertConsistentLessThan( d, c );
  assertConsistentLessThan( d, e );
}
项目:search    文件:SimpleFieldFragList.java   
@Override
public void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList ) {
  float totalBoost = 0;
  List<SubInfo> subInfos = new ArrayList<>();
  for( WeightedPhraseInfo phraseInfo : phraseInfoList ){
    subInfos.add( new SubInfo( phraseInfo.getText(), phraseInfo.getTermsOffsets(), phraseInfo.getSeqnum(), phraseInfo.getBoost() ) );
    totalBoost += phraseInfo.getBoost();
  }
  getFragInfos().add( new WeightedFragInfo( startOffset, endOffset, subInfos, totalBoost ) );
}
项目:search    文件:WeightedFieldFragList.java   
@Override
public void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList ) {
  List<SubInfo> tempSubInfos = new ArrayList<>();
  List<SubInfo> realSubInfos = new ArrayList<>();
  HashSet<String> distinctTerms = new HashSet<>();
  int length = 0;

  for( WeightedPhraseInfo phraseInfo : phraseInfoList ){
    float phraseTotalBoost = 0;
    for ( TermInfo ti :  phraseInfo.getTermsInfos()) {
      if ( distinctTerms.add( ti.getText() ) )
        phraseTotalBoost += ti.getWeight() * phraseInfo.getBoost();
      length++;
    }
    tempSubInfos.add( new SubInfo( phraseInfo.getText(), phraseInfo.getTermsOffsets(),
      phraseInfo.getSeqnum(), phraseTotalBoost ) );
  }

  // We want that terms per fragment (length) is included into the weight. Otherwise a one-word-query
  // would cause an equal weight for all fragments regardless of how much words they contain.  
  // To avoid that fragments containing a high number of words possibly "outrank" more relevant fragments
  // we "bend" the length with a standard-normalization a little bit.
  float norm = length * ( 1 / (float)Math.sqrt( length ) );

  float totalBoost = 0;
  for ( SubInfo tempSubInfo : tempSubInfos ) {
    float subInfoBoost = tempSubInfo.getBoost() * norm;
    realSubInfos.add( new SubInfo( tempSubInfo.getText(), tempSubInfo.getTermsOffsets(),
      tempSubInfo.getSeqnum(), subInfoBoost ));
    totalBoost += subInfoBoost;
  }

  getFragInfos().add( new WeightedFragInfo( startOffset, endOffset, realSubInfos, totalBoost ) );
}
项目:NYBC    文件:SimpleFieldFragList.java   
@Override
public void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList ) {
  float totalBoost = 0;
  List<SubInfo> subInfos = new ArrayList<SubInfo>();
  for( WeightedPhraseInfo phraseInfo : phraseInfoList ){
    subInfos.add( new SubInfo( phraseInfo.getText(), phraseInfo.getTermsOffsets(), phraseInfo.getSeqnum() ) );
    totalBoost += phraseInfo.getBoost();
  }
  getFragInfos().add( new WeightedFragInfo( startOffset, endOffset, subInfos, totalBoost ) );
}
项目:NYBC    文件:WeightedFieldFragList.java   
@Override
public void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList ) {

  float totalBoost = 0;

  List<SubInfo> subInfos = new ArrayList<SubInfo>();

  HashSet<String> distinctTerms = new HashSet<String>();

  int length = 0;

  for( WeightedPhraseInfo phraseInfo : phraseInfoList ){

    subInfos.add( new SubInfo( phraseInfo.getText(), phraseInfo.getTermsOffsets(), phraseInfo.getSeqnum() ) );

    for ( TermInfo ti :  phraseInfo.getTermsInfos()) {
      if ( distinctTerms.add( ti.getText() ) )
        totalBoost += ti.getWeight() * phraseInfo.getBoost();
      length++;
    }
  }

  // We want that terms per fragment (length) is included into the weight. Otherwise a one-word-query
  // would cause an equal weight for all fragments regardless of how much words they contain.  
  // To avoid that fragments containing a high number of words possibly "outrank" more relevant fragments
  // we "bend" the length with a standard-normalization a little bit.  
  totalBoost *= length * ( 1 / Math.sqrt( length ) );

  getFragInfos().add( new WeightedFragInfo( startOffset, endOffset, subInfos, totalBoost ) );
}
项目:read-open-source-code    文件:SimpleFieldFragList.java   
@Override
public void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList ) {
  float totalBoost = 0;
  List<SubInfo> subInfos = new ArrayList<SubInfo>();
  for( WeightedPhraseInfo phraseInfo : phraseInfoList ){
    subInfos.add( new SubInfo( phraseInfo.getText(), phraseInfo.getTermsOffsets(), phraseInfo.getSeqnum(), phraseInfo.getBoost() ) );
    totalBoost += phraseInfo.getBoost();
  }
  getFragInfos().add( new WeightedFragInfo( startOffset, endOffset, subInfos, totalBoost ) );
}
项目:read-open-source-code    文件:WeightedFieldFragList.java   
@Override
public void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList ) {
  List<SubInfo> tempSubInfos = new ArrayList<SubInfo>();
  List<SubInfo> realSubInfos = new ArrayList<SubInfo>();
  HashSet<String> distinctTerms = new HashSet<String>();   
  int length = 0;

  for( WeightedPhraseInfo phraseInfo : phraseInfoList ){
    float phraseTotalBoost = 0;
    for ( TermInfo ti :  phraseInfo.getTermsInfos()) {
      if ( distinctTerms.add( ti.getText() ) )
        phraseTotalBoost += ti.getWeight() * phraseInfo.getBoost();
      length++;
    }
    tempSubInfos.add( new SubInfo( phraseInfo.getText(), phraseInfo.getTermsOffsets(),
      phraseInfo.getSeqnum(), phraseTotalBoost ) );
  }

  // We want that terms per fragment (length) is included into the weight. Otherwise a one-word-query
  // would cause an equal weight for all fragments regardless of how much words they contain.  
  // To avoid that fragments containing a high number of words possibly "outrank" more relevant fragments
  // we "bend" the length with a standard-normalization a little bit.
  float norm = length * ( 1 / (float)Math.sqrt( length ) );

  float totalBoost = 0;
  for ( SubInfo tempSubInfo : tempSubInfos ) {
    float subInfoBoost = tempSubInfo.getBoost() * norm;
    realSubInfos.add( new SubInfo( tempSubInfo.getText(), tempSubInfo.getTermsOffsets(),
      tempSubInfo.getSeqnum(), subInfoBoost ));
    totalBoost += subInfoBoost;
  }

  getFragInfos().add( new WeightedFragInfo( startOffset, endOffset, realSubInfos, totalBoost ) );
}
项目:read-open-source-code    文件:SimpleFieldFragList.java   
@Override
public void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList ) {
  float totalBoost = 0;
  List<SubInfo> subInfos = new ArrayList<SubInfo>();
  for( WeightedPhraseInfo phraseInfo : phraseInfoList ){
    subInfos.add( new SubInfo( phraseInfo.getText(), phraseInfo.getTermsOffsets(), phraseInfo.getSeqnum(), phraseInfo.getBoost() ) );
    totalBoost += phraseInfo.getBoost();
  }
  getFragInfos().add( new WeightedFragInfo( startOffset, endOffset, subInfos, totalBoost ) );
}
项目:read-open-source-code    文件:WeightedFieldFragList.java   
@Override
public void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList ) {
  List<SubInfo> tempSubInfos = new ArrayList<SubInfo>();
  List<SubInfo> realSubInfos = new ArrayList<SubInfo>();
  HashSet<String> distinctTerms = new HashSet<String>();   
  int length = 0;

  for( WeightedPhraseInfo phraseInfo : phraseInfoList ){
    float phraseTotalBoost = 0;
    for ( TermInfo ti :  phraseInfo.getTermsInfos()) {
      if ( distinctTerms.add( ti.getText() ) )
        phraseTotalBoost += ti.getWeight() * phraseInfo.getBoost();
      length++;
    }
    tempSubInfos.add( new SubInfo( phraseInfo.getText(), phraseInfo.getTermsOffsets(),
      phraseInfo.getSeqnum(), phraseTotalBoost ) );
  }

  // We want that terms per fragment (length) is included into the weight. Otherwise a one-word-query
  // would cause an equal weight for all fragments regardless of how much words they contain.  
  // To avoid that fragments containing a high number of words possibly "outrank" more relevant fragments
  // we "bend" the length with a standard-normalization a little bit.
  float norm = length * ( 1 / (float)Math.sqrt( length ) );

  float totalBoost = 0;
  for ( SubInfo tempSubInfo : tempSubInfos ) {
    float subInfoBoost = tempSubInfo.getBoost() * norm;
    realSubInfos.add( new SubInfo( tempSubInfo.getText(), tempSubInfo.getTermsOffsets(),
      tempSubInfo.getSeqnum(), subInfoBoost ));
    totalBoost += subInfoBoost;
  }

  getFragInfos().add( new WeightedFragInfo( startOffset, endOffset, realSubInfos, totalBoost ) );
}
项目:read-open-source-code    文件:SimpleFieldFragList.java   
@Override
public void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList ) {
  float totalBoost = 0;
  List<SubInfo> subInfos = new ArrayList<>();
  for( WeightedPhraseInfo phraseInfo : phraseInfoList ){
    subInfos.add( new SubInfo( phraseInfo.getText(), phraseInfo.getTermsOffsets(), phraseInfo.getSeqnum(), phraseInfo.getBoost() ) );
    totalBoost += phraseInfo.getBoost();
  }
  getFragInfos().add( new WeightedFragInfo( startOffset, endOffset, subInfos, totalBoost ) );
}
项目:read-open-source-code    文件:WeightedFieldFragList.java   
@Override
public void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList ) {
  List<SubInfo> tempSubInfos = new ArrayList<>();
  List<SubInfo> realSubInfos = new ArrayList<>();
  HashSet<String> distinctTerms = new HashSet<>();
  int length = 0;

  for( WeightedPhraseInfo phraseInfo : phraseInfoList ){
    float phraseTotalBoost = 0;
    for ( TermInfo ti :  phraseInfo.getTermsInfos()) {
      if ( distinctTerms.add( ti.getText() ) )
        phraseTotalBoost += ti.getWeight() * phraseInfo.getBoost();
      length++;
    }
    tempSubInfos.add( new SubInfo( phraseInfo.getText(), phraseInfo.getTermsOffsets(),
      phraseInfo.getSeqnum(), phraseTotalBoost ) );
  }

  // We want that terms per fragment (length) is included into the weight. Otherwise a one-word-query
  // would cause an equal weight for all fragments regardless of how much words they contain.  
  // To avoid that fragments containing a high number of words possibly "outrank" more relevant fragments
  // we "bend" the length with a standard-normalization a little bit.
  float norm = length * ( 1 / (float)Math.sqrt( length ) );

  float totalBoost = 0;
  for ( SubInfo tempSubInfo : tempSubInfos ) {
    float subInfoBoost = tempSubInfo.getBoost() * norm;
    realSubInfos.add( new SubInfo( tempSubInfo.getText(), tempSubInfo.getTermsOffsets(),
      tempSubInfo.getSeqnum(), subInfoBoost ));
    totalBoost += subInfoBoost;
  }

  getFragInfos().add( new WeightedFragInfo( startOffset, endOffset, realSubInfos, totalBoost ) );
}
项目:Maskana-Gestor-de-Conocimiento    文件:SimpleFieldFragList.java   
@Override
public void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList ) {
  float totalBoost = 0;
  List<SubInfo> subInfos = new ArrayList<SubInfo>();
  for( WeightedPhraseInfo phraseInfo : phraseInfoList ){
    subInfos.add( new SubInfo( phraseInfo.getText(), phraseInfo.getTermsOffsets(), phraseInfo.getSeqnum() ) );
    totalBoost += phraseInfo.getBoost();
  }
  getFragInfos().add( new WeightedFragInfo( startOffset, endOffset, subInfos, totalBoost ) );
}
项目:Maskana-Gestor-de-Conocimiento    文件:WeightedFieldFragList.java   
@Override
public void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList ) {

  float totalBoost = 0;

  List<SubInfo> subInfos = new ArrayList<SubInfo>();

  HashSet<String> distinctTerms = new HashSet<String>();

  int length = 0;

  for( WeightedPhraseInfo phraseInfo : phraseInfoList ){

    subInfos.add( new SubInfo( phraseInfo.getText(), phraseInfo.getTermsOffsets(), phraseInfo.getSeqnum() ) );

    for ( TermInfo ti :  phraseInfo.getTermsInfos()) {
      if ( distinctTerms.add( ti.getText() ) )
        totalBoost += ti.getWeight() * phraseInfo.getBoost();
      length++;
    }
  }

  // We want that terms per fragment (length) is included into the weight. Otherwise a one-word-query
  // would cause an equal weight for all fragments regardless of how much words they contain.  
  // To avoid that fragments containing a high number of words possibly "outrank" more relevant fragments
  // we "bend" the length with a standard-normalization a little bit.  
  totalBoost *= length * ( 1 / Math.sqrt( length ) );

  getFragInfos().add( new WeightedFragInfo( startOffset, endOffset, subInfos, totalBoost ) );
}
项目:search    文件:BaseFragListBuilder.java   
protected FieldFragList createFieldFragList( FieldPhraseList fieldPhraseList, FieldFragList fieldFragList, int fragCharSize ){
  if( fragCharSize < minFragCharSize )
    throw new IllegalArgumentException( "fragCharSize(" + fragCharSize + ") is too small. It must be " + minFragCharSize + " or higher." );

  List<WeightedPhraseInfo> wpil = new ArrayList<>();
  IteratorQueue<WeightedPhraseInfo> queue = new IteratorQueue<>(fieldPhraseList.getPhraseList().iterator());
  WeightedPhraseInfo phraseInfo = null;
  int startOffset = 0;
  while((phraseInfo = queue.top()) != null){
    // if the phrase violates the border of previous fragment, discard it and try next phrase
    if( phraseInfo.getStartOffset() < startOffset )  {
      queue.removeTop();
      continue;
    }

    wpil.clear();
    final int currentPhraseStartOffset = phraseInfo.getStartOffset();
    int currentPhraseEndOffset = phraseInfo.getEndOffset();
    int spanStart = Math.max(currentPhraseStartOffset - margin, startOffset);
    int spanEnd = Math.max(currentPhraseEndOffset, spanStart + fragCharSize);
    if (acceptPhrase(queue.removeTop(),  currentPhraseEndOffset - currentPhraseStartOffset, fragCharSize)) {
      wpil.add(phraseInfo);
    }
    while((phraseInfo = queue.top()) != null) { // pull until we crossed the current spanEnd
      if (phraseInfo.getEndOffset() <= spanEnd) {
        currentPhraseEndOffset = phraseInfo.getEndOffset();
        if (acceptPhrase(queue.removeTop(),  currentPhraseEndOffset - currentPhraseStartOffset, fragCharSize)) {
          wpil.add(phraseInfo);
        }
      } else {
        break;
      }
    }
    if (wpil.isEmpty()) {
      continue;
    }

    final int matchLen = currentPhraseEndOffset - currentPhraseStartOffset;
    // now recalculate the start and end position to "center" the result
    final int newMargin = Math.max(0, (fragCharSize-matchLen)/2); // matchLen can be > fragCharSize prevent IAOOB here
    spanStart = currentPhraseStartOffset - newMargin;
    if (spanStart < startOffset) {
      spanStart = startOffset;
    }
    // whatever is bigger here we grow this out
    spanEnd = spanStart + Math.max(matchLen, fragCharSize);  
    startOffset = spanEnd;
    fieldFragList.add(spanStart, spanEnd, wpil);
  }
  return fieldFragList;
}
项目:search    文件:FieldPhraseListTest.java   
private WeightedPhraseInfo newInfo( int startOffset, int endOffset, float boost ) {
  LinkedList< TermInfo > infos = new LinkedList<>();
  infos.add( new TermInfo( TestUtil.randomUnicodeString(random()), startOffset, endOffset, 0, 0 ) );
  return new WeightedPhraseInfo( infos, boost );
}
项目:NYBC    文件:BaseFragListBuilder.java   
protected FieldFragList createFieldFragList( FieldPhraseList fieldPhraseList, FieldFragList fieldFragList, int fragCharSize ){

  if( fragCharSize < minFragCharSize )
    throw new IllegalArgumentException( "fragCharSize(" + fragCharSize + ") is too small. It must be " + minFragCharSize + " or higher." );

  List<WeightedPhraseInfo> wpil = new ArrayList<WeightedPhraseInfo>();
  Iterator<WeightedPhraseInfo> ite = fieldPhraseList.getPhraseList().iterator();
  WeightedPhraseInfo phraseInfo = null;
  int startOffset = 0;
  boolean taken = false;
  while( true ){
    if( !taken ){
      if( !ite.hasNext() ) break;
      phraseInfo = ite.next();
    }
    taken = false;
    if( phraseInfo == null ) break;

    // if the phrase violates the border of previous fragment, discard it and try next phrase
    if( phraseInfo.getStartOffset() < startOffset ) continue;

    wpil.clear();
    wpil.add( phraseInfo );
    int firstOffset = phraseInfo.getStartOffset();
    int st = phraseInfo.getStartOffset() - margin < startOffset ?
        startOffset : phraseInfo.getStartOffset() - margin;
    int en = st + fragCharSize;
    if( phraseInfo.getEndOffset() > en )
      en = phraseInfo.getEndOffset();

    int lastEndOffset = phraseInfo.getEndOffset();
    while( true ){
      if( ite.hasNext() ){
        phraseInfo = ite.next();
        taken = true;
        if( phraseInfo == null ) break;
      }
      else
        break;
      if( phraseInfo.getEndOffset() <= en ){
        wpil.add( phraseInfo );
        lastEndOffset = phraseInfo.getEndOffset();
      }
      else
        break;
    }
    int matchLen = lastEndOffset - firstOffset;
    //now recalculate the start and end position to "center" the result
    int newMargin = (fragCharSize-matchLen)/2;
    st = firstOffset - newMargin;
    if(st<startOffset){
      st = startOffset;
    }
    en = st+fragCharSize;
    startOffset = en;
    fieldFragList.add( st, en, wpil );
  }
  return fieldFragList;
}
项目:read-open-source-code    文件:BaseFragListBuilder.java   
protected FieldFragList createFieldFragList( FieldPhraseList fieldPhraseList, FieldFragList fieldFragList, int fragCharSize ){
  if( fragCharSize < minFragCharSize )
    throw new IllegalArgumentException( "fragCharSize(" + fragCharSize + ") is too small. It must be " + minFragCharSize + " or higher." );

  List<WeightedPhraseInfo> wpil = new ArrayList<WeightedPhraseInfo>();
  IteratorQueue<WeightedPhraseInfo> queue = new IteratorQueue<WeightedPhraseInfo>(fieldPhraseList.getPhraseList().iterator());
  WeightedPhraseInfo phraseInfo = null;
  int startOffset = 0;
  while((phraseInfo = queue.top()) != null){
    // if the phrase violates the border of previous fragment, discard it and try next phrase
    if( phraseInfo.getStartOffset() < startOffset )  {
      queue.removeTop();
      continue;
    }

    wpil.clear();
    final int currentPhraseStartOffset = phraseInfo.getStartOffset();
    int currentPhraseEndOffset = phraseInfo.getEndOffset();
    int spanStart = Math.max(currentPhraseStartOffset - margin, startOffset);
    int spanEnd = Math.max(currentPhraseEndOffset, spanStart + fragCharSize);
    if (acceptPhrase(queue.removeTop(),  currentPhraseEndOffset - currentPhraseStartOffset, fragCharSize)) {
      wpil.add(phraseInfo);
    }
    while((phraseInfo = queue.top()) != null) { // pull until we crossed the current spanEnd
      if (phraseInfo.getEndOffset() <= spanEnd) {
        currentPhraseEndOffset = phraseInfo.getEndOffset();
        if (acceptPhrase(queue.removeTop(),  currentPhraseEndOffset - currentPhraseStartOffset, fragCharSize)) {
          wpil.add(phraseInfo);
        }
      } else {
        break;
      }
    }
    if (wpil.isEmpty()) {
      continue;
    }

    final int matchLen = currentPhraseEndOffset - currentPhraseStartOffset;
    // now recalculate the start and end position to "center" the result
    final int newMargin = Math.max(0, (fragCharSize-matchLen)/2); // matchLen can be > fragCharSize prevent IAOOB here
    spanStart = currentPhraseStartOffset - newMargin;
    if (spanStart < startOffset) {
      spanStart = startOffset;
    }
    // whatever is bigger here we grow this out
    spanEnd = spanStart + Math.max(matchLen, fragCharSize);  
    startOffset = spanEnd;
    fieldFragList.add(spanStart, spanEnd, wpil);
  }
  return fieldFragList;
}
项目:read-open-source-code    文件:BaseFragListBuilder.java   
protected FieldFragList createFieldFragList( FieldPhraseList fieldPhraseList, FieldFragList fieldFragList, int fragCharSize ){
  if( fragCharSize < minFragCharSize )
    throw new IllegalArgumentException( "fragCharSize(" + fragCharSize + ") is too small. It must be " + minFragCharSize + " or higher." );

  List<WeightedPhraseInfo> wpil = new ArrayList<WeightedPhraseInfo>();
  IteratorQueue<WeightedPhraseInfo> queue = new IteratorQueue<WeightedPhraseInfo>(fieldPhraseList.getPhraseList().iterator());
  WeightedPhraseInfo phraseInfo = null;
  int startOffset = 0;
  while((phraseInfo = queue.top()) != null){
    // if the phrase violates the border of previous fragment, discard it and try next phrase
    if( phraseInfo.getStartOffset() < startOffset )  {
      queue.removeTop();
      continue;
    }

    wpil.clear();
    final int currentPhraseStartOffset = phraseInfo.getStartOffset();
    int currentPhraseEndOffset = phraseInfo.getEndOffset();
    int spanStart = Math.max(currentPhraseStartOffset - margin, startOffset);
    int spanEnd = Math.max(currentPhraseEndOffset, spanStart + fragCharSize);
    if (acceptPhrase(queue.removeTop(),  currentPhraseEndOffset - currentPhraseStartOffset, fragCharSize)) {
      wpil.add(phraseInfo);
    }
    while((phraseInfo = queue.top()) != null) { // pull until we crossed the current spanEnd
      if (phraseInfo.getEndOffset() <= spanEnd) {
        currentPhraseEndOffset = phraseInfo.getEndOffset();
        if (acceptPhrase(queue.removeTop(),  currentPhraseEndOffset - currentPhraseStartOffset, fragCharSize)) {
          wpil.add(phraseInfo);
        }
      } else {
        break;
      }
    }
    if (wpil.isEmpty()) {
      continue;
    }

    final int matchLen = currentPhraseEndOffset - currentPhraseStartOffset;
    // now recalculate the start and end position to "center" the result
    final int newMargin = Math.max(0, (fragCharSize-matchLen)/2); // matchLen can be > fragCharSize prevent IAOOB here
    spanStart = currentPhraseStartOffset - newMargin;
    if (spanStart < startOffset) {
      spanStart = startOffset;
    }
    // whatever is bigger here we grow this out
    spanEnd = spanStart + Math.max(matchLen, fragCharSize);  
    startOffset = spanEnd;
    fieldFragList.add(spanStart, spanEnd, wpil);
  }
  return fieldFragList;
}
项目:read-open-source-code    文件:BaseFragListBuilder.java   
protected FieldFragList createFieldFragList( FieldPhraseList fieldPhraseList, FieldFragList fieldFragList, int fragCharSize ){
  if( fragCharSize < minFragCharSize )
    throw new IllegalArgumentException( "fragCharSize(" + fragCharSize + ") is too small. It must be " + minFragCharSize + " or higher." );

  List<WeightedPhraseInfo> wpil = new ArrayList<>();
  IteratorQueue<WeightedPhraseInfo> queue = new IteratorQueue<>(fieldPhraseList.getPhraseList().iterator());
  WeightedPhraseInfo phraseInfo = null;
  int startOffset = 0;
  while((phraseInfo = queue.top()) != null){
    // if the phrase violates the border of previous fragment, discard it and try next phrase
    if( phraseInfo.getStartOffset() < startOffset )  {
      queue.removeTop();
      continue;
    }

    wpil.clear();
    final int currentPhraseStartOffset = phraseInfo.getStartOffset();
    int currentPhraseEndOffset = phraseInfo.getEndOffset();
    int spanStart = Math.max(currentPhraseStartOffset - margin, startOffset);
    int spanEnd = Math.max(currentPhraseEndOffset, spanStart + fragCharSize);
    if (acceptPhrase(queue.removeTop(),  currentPhraseEndOffset - currentPhraseStartOffset, fragCharSize)) {
      wpil.add(phraseInfo);
    }
    while((phraseInfo = queue.top()) != null) { // pull until we crossed the current spanEnd
      if (phraseInfo.getEndOffset() <= spanEnd) {
        currentPhraseEndOffset = phraseInfo.getEndOffset();
        if (acceptPhrase(queue.removeTop(),  currentPhraseEndOffset - currentPhraseStartOffset, fragCharSize)) {
          wpil.add(phraseInfo);
        }
      } else {
        break;
      }
    }
    if (wpil.isEmpty()) {
      continue;
    }

    final int matchLen = currentPhraseEndOffset - currentPhraseStartOffset;
    // now recalculate the start and end position to "center" the result
    final int newMargin = Math.max(0, (fragCharSize-matchLen)/2); // matchLen can be > fragCharSize prevent IAOOB here
    spanStart = currentPhraseStartOffset - newMargin;
    if (spanStart < startOffset) {
      spanStart = startOffset;
    }
    // whatever is bigger here we grow this out
    spanEnd = spanStart + Math.max(matchLen, fragCharSize);  
    startOffset = spanEnd;
    fieldFragList.add(spanStart, spanEnd, wpil);
  }
  return fieldFragList;
}
项目:Maskana-Gestor-de-Conocimiento    文件:BaseFragListBuilder.java   
protected FieldFragList createFieldFragList( FieldPhraseList fieldPhraseList, FieldFragList fieldFragList, int fragCharSize ){
  if( fragCharSize < minFragCharSize )
    throw new IllegalArgumentException( "fragCharSize(" + fragCharSize + ") is too small. It must be " + minFragCharSize + " or higher." );

  List<WeightedPhraseInfo> wpil = new ArrayList<WeightedPhraseInfo>();
  IteratorQueue<WeightedPhraseInfo> queue = new IteratorQueue<WeightedPhraseInfo>(fieldPhraseList.getPhraseList().iterator());
  WeightedPhraseInfo phraseInfo = null;
  int startOffset = 0;
  while((phraseInfo = queue.top()) != null){
    // if the phrase violates the border of previous fragment, discard it and try next phrase
    if( phraseInfo.getStartOffset() < startOffset )  {
      queue.removeTop();
      continue;
    }

    wpil.clear();
    final int currentPhraseStartOffset = phraseInfo.getStartOffset();
    int currentPhraseEndOffset = phraseInfo.getEndOffset();
    int spanStart = Math.max(currentPhraseStartOffset - margin, startOffset);
    int spanEnd = Math.max(currentPhraseEndOffset, spanStart + fragCharSize);
    if (acceptPhrase(queue.removeTop(),  currentPhraseEndOffset - currentPhraseStartOffset, fragCharSize)) {
      wpil.add(phraseInfo);
    }
    while((phraseInfo = queue.top()) != null) { // pull until we crossed the current spanEnd
      if (phraseInfo.getEndOffset() <= spanEnd) {
        currentPhraseEndOffset = phraseInfo.getEndOffset();
        if (acceptPhrase(queue.removeTop(),  currentPhraseEndOffset - currentPhraseStartOffset, fragCharSize)) {
          wpil.add(phraseInfo);
        }
      } else {
        break;
      }
    }
    if (wpil.isEmpty()) {
      continue;
    }

    final int matchLen = currentPhraseEndOffset - currentPhraseStartOffset;
    // now recalculate the start and end position to "center" the result
    final int newMargin = Math.max(0, (fragCharSize-matchLen)/2); // matchLen can be > fragCharSize prevent IAOOB here
    spanStart = currentPhraseStartOffset - newMargin;
    if (spanStart < startOffset) {
      spanStart = startOffset;
    }
    // whatever is bigger here we grow this out
    spanEnd = spanStart + Math.max(matchLen, fragCharSize);  
    startOffset = spanEnd;
    fieldFragList.add(spanStart, spanEnd, wpil);
  }
  return fieldFragList;
}
项目:Maskana-Gestor-de-Conocimiento    文件:FieldPhraseListTest.java   
private WeightedPhraseInfo newInfo( int startOffset, int endOffset, float boost ) {
  LinkedList< TermInfo > infos = new LinkedList< TermInfo >();
  infos.add( new TermInfo( _TestUtil.randomUnicodeString( random() ), startOffset, endOffset, 0, 0 ) );
  return new WeightedPhraseInfo( infos, boost );
}
项目:search    文件:BaseFragListBuilder.java   
/**
  * A predicate to decide if the given {@link WeightedPhraseInfo} should be
  * accepted as a highlighted phrase or if it should be discarded.
  * <p>
  * The default implementation discards phrases that are composed of more than one term
  * and where the matchLength exceeds the fragment character size.
  * 
  * @param info the phrase info to accept
  * @param matchLength the match length of the current phrase
  * @param fragCharSize the configured fragment character size
  * @return <code>true</code> if this phrase info should be accepted as a highligh phrase
  */
protected boolean acceptPhrase(WeightedPhraseInfo info, int matchLength, int fragCharSize) {
  return info.getTermsOffsets().size() <= 1 ||  matchLength <= fragCharSize;
}
项目:search    文件:FieldFragList.java   
/**
 * convert the list of WeightedPhraseInfo to WeightedFragInfo, then add it to the fragInfos
 * 
 * @param startOffset start offset of the fragment
 * @param endOffset end offset of the fragment
 * @param phraseInfoList list of WeightedPhraseInfo objects
 */
public abstract void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList );
项目:NYBC    文件:FieldFragList.java   
/**
 * convert the list of WeightedPhraseInfo to WeightedFragInfo, then add it to the fragInfos
 * 
 * @param startOffset start offset of the fragment
 * @param endOffset end offset of the fragment
 * @param phraseInfoList list of WeightedPhraseInfo objects
 */
public abstract void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList );
项目:read-open-source-code    文件:BaseFragListBuilder.java   
/**
  * A predicate to decide if the given {@link WeightedPhraseInfo} should be
  * accepted as a highlighted phrase or if it should be discarded.
  * <p>
  * The default implementation discards phrases that are composed of more than one term
  * and where the matchLength exceeds the fragment character size.
  * 
  * @param info the phrase info to accept
  * @param matchLength the match length of the current phrase
  * @param fragCharSize the configured fragment character size
  * @return <code>true</code> if this phrase info should be accepted as a highligh phrase
  */
protected boolean acceptPhrase(WeightedPhraseInfo info, int matchLength, int fragCharSize) {
  return info.getTermsOffsets().size() <= 1 ||  matchLength <= fragCharSize;
}
项目:read-open-source-code    文件:FieldFragList.java   
/**
 * convert the list of WeightedPhraseInfo to WeightedFragInfo, then add it to the fragInfos
 * 
 * @param startOffset start offset of the fragment
 * @param endOffset end offset of the fragment
 * @param phraseInfoList list of WeightedPhraseInfo objects
 */
public abstract void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList );
项目:read-open-source-code    文件:BaseFragListBuilder.java   
/**
  * A predicate to decide if the given {@link WeightedPhraseInfo} should be
  * accepted as a highlighted phrase or if it should be discarded.
  * <p>
  * The default implementation discards phrases that are composed of more than one term
  * and where the matchLength exceeds the fragment character size.
  * 
  * @param info the phrase info to accept
  * @param matchLength the match length of the current phrase
  * @param fragCharSize the configured fragment character size
  * @return <code>true</code> if this phrase info should be accepted as a highligh phrase
  */
protected boolean acceptPhrase(WeightedPhraseInfo info, int matchLength, int fragCharSize) {
  return info.getTermsOffsets().size() <= 1 ||  matchLength <= fragCharSize;
}
项目:read-open-source-code    文件:FieldFragList.java   
/**
 * convert the list of WeightedPhraseInfo to WeightedFragInfo, then add it to the fragInfos
 * 
 * @param startOffset start offset of the fragment
 * @param endOffset end offset of the fragment
 * @param phraseInfoList list of WeightedPhraseInfo objects
 */
public abstract void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList );
项目:read-open-source-code    文件:BaseFragListBuilder.java   
/**
  * A predicate to decide if the given {@link WeightedPhraseInfo} should be
  * accepted as a highlighted phrase or if it should be discarded.
  * <p>
  * The default implementation discards phrases that are composed of more than one term
  * and where the matchLength exceeds the fragment character size.
  * 
  * @param info the phrase info to accept
  * @param matchLength the match length of the current phrase
  * @param fragCharSize the configured fragment character size
  * @return <code>true</code> if this phrase info should be accepted as a highligh phrase
  */
protected boolean acceptPhrase(WeightedPhraseInfo info, int matchLength, int fragCharSize) {
  return info.getTermsOffsets().size() <= 1 ||  matchLength <= fragCharSize;
}
项目:read-open-source-code    文件:FieldFragList.java   
/**
 * convert the list of WeightedPhraseInfo to WeightedFragInfo, then add it to the fragInfos
 * 
 * @param startOffset start offset of the fragment
 * @param endOffset end offset of the fragment
 * @param phraseInfoList list of WeightedPhraseInfo objects
 */
public abstract void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList );
项目:Maskana-Gestor-de-Conocimiento    文件:BaseFragListBuilder.java   
/**
  * A predicate to decide if the given {@link WeightedPhraseInfo} should be
  * accepted as a highlighted phrase or if it should be discarded.
  * <p>
  * The default implementation discards phrases that are composed of more than one term
  * and where the matchLength exceeds the fragment character size.
  * 
  * @param info the phrase info to accept
  * @param matchLength the match length of the current phrase
  * @param fragCharSize the configured fragment character size
  * @return <code>true</code> if this phrase info should be accepted as a highligh phrase
  */
protected boolean acceptPhrase(WeightedPhraseInfo info, int matchLength, int fragCharSize) {
  return info.getTermsOffsets().size() <= 1 ||  matchLength <= fragCharSize;
}
项目:Maskana-Gestor-de-Conocimiento    文件:FieldFragList.java   
/**
 * convert the list of WeightedPhraseInfo to WeightedFragInfo, then add it to the fragInfos
 * 
 * @param startOffset start offset of the fragment
 * @param endOffset end offset of the fragment
 * @param phraseInfoList list of WeightedPhraseInfo objects
 */
public abstract void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList );