Java 类org.apache.lucene.util.StringHelper 实例源码

项目:lams    文件:Lucene40TermVectorsWriter.java   
@Override
public void startTerm(BytesRef term, int freq) throws IOException {
  final int prefix = StringHelper.bytesDifference(lastTerm.get(), term);
  final int suffix = term.length - prefix;
  tvf.writeVInt(prefix);
  tvf.writeVInt(suffix);
  tvf.writeBytes(term.bytes, term.offset + prefix, suffix);
  tvf.writeVInt(freq);
  lastTerm.copyBytes(term);
  lastPosition = lastOffset = 0;

  if (offsets && positions) {
    // we might need to buffer if its a non-bulk merge
    offsetStartBuffer = ArrayUtil.grow(offsetStartBuffer, freq);
    offsetEndBuffer = ArrayUtil.grow(offsetEndBuffer, freq);
  }
  bufferedIndex = 0;
  bufferedFreq = freq;
  payloadData.clear();
}
项目:search    文件:SimpleTextFieldsReader.java   
private TreeMap<String,Long> readFields(IndexInput in) throws IOException {
  ChecksumIndexInput input = new BufferedChecksumIndexInput(in);
  BytesRefBuilder scratch = new BytesRefBuilder();
  TreeMap<String,Long> fields = new TreeMap<>();

  while (true) {
    SimpleTextUtil.readLine(input, scratch);
    if (scratch.get().equals(END)) {
      SimpleTextUtil.checkFooter(input);
      return fields;
    } else if (StringHelper.startsWith(scratch.get(), FIELD)) {
      String fieldName = new String(scratch.bytes(), FIELD.length, scratch.length() - FIELD.length, StandardCharsets.UTF_8);
      fields.put(fieldName, input.getFilePointer());
    }
  }
}
项目:search    文件:AbstractVisitingPrefixTreeFilter.java   
/**
 * Scans ({@code termsEnum.next()}) terms until a term is found that does
 * not start with curVNode's cell. If it finds a leaf cell or a cell at
 * level {@code scanDetailLevel} then it calls {@link
 * #visitScanned(org.apache.lucene.spatial.prefix.tree.Cell)}.
 */
protected void scan(int scanDetailLevel) throws IOException {
  for (;
       thisTerm != null && StringHelper.startsWith(thisTerm, curVNodeTerm);//TODO refactor to use method on curVNode.cell
       thisTerm = termsEnum.next()) {
    scanCell = grid.getCell(thisTerm.bytes, thisTerm.offset, thisTerm.length, scanCell);

    int termLevel = scanCell.getLevel();
    if (termLevel < scanDetailLevel) {
      if (scanCell.isLeaf())
        visitScanned(scanCell);
    } else if (termLevel == scanDetailLevel) {
      if (!scanCell.isLeaf())//LUCENE-5529
        visitScanned(scanCell);
    }
  }//term loop
}
项目:search    文件:PreFlexRWTermVectorsWriter.java   
@Override
public void startTerm(BytesRef term, int freq) throws IOException {
  final int prefix = StringHelper.bytesDifference(lastTerm.get(), term);
  final int suffix = term.length - prefix;
  tvf.writeVInt(prefix);
  tvf.writeVInt(suffix);
  tvf.writeBytes(term.bytes, term.offset + prefix, suffix);
  tvf.writeVInt(freq);
  lastTerm.copyBytes(term);
  lastPosition = lastOffset = 0;

  if (offsets && positions) {
    // we might need to buffer if its a non-bulk merge
    offsetStartBuffer = ArrayUtil.grow(offsetStartBuffer, freq);
    offsetEndBuffer = ArrayUtil.grow(offsetEndBuffer, freq);
    offsetIndex = 0;
    offsetFreq = freq;
  }
}
项目:search    文件:Lucene40TermVectorsWriter.java   
@Override
public void startTerm(BytesRef term, int freq) throws IOException {
  final int prefix = StringHelper.bytesDifference(lastTerm.get(), term);
  final int suffix = term.length - prefix;
  tvf.writeVInt(prefix);
  tvf.writeVInt(suffix);
  tvf.writeBytes(term.bytes, term.offset + prefix, suffix);
  tvf.writeVInt(freq);
  lastTerm.copyBytes(term);
  lastPosition = lastOffset = 0;

  if (offsets && positions) {
    // we might need to buffer if its a non-bulk merge
    offsetStartBuffer = ArrayUtil.grow(offsetStartBuffer, freq);
    offsetEndBuffer = ArrayUtil.grow(offsetEndBuffer, freq);
  }
  bufferedIndex = 0;
  bufferedFreq = freq;
  payloadData.clear();
}
项目:NYBC    文件:AbstractVisitingPrefixTreeFilter.java   
/**
 * Scans ({@code termsEnum.next()}) terms until a term is found that does
 * not start with curVNode's cell. If it finds a leaf cell or a cell at
 * level {@code scanDetailLevel} then it calls {@link
 * #visitScanned(org.apache.lucene.spatial.prefix.tree.Node,
 * com.spatial4j.core.shape.Shape)}.
 */
protected void scan(int scanDetailLevel) throws IOException {
  for (;
       thisTerm != null && StringHelper.startsWith(thisTerm, curVNodeTerm);
       thisTerm = termsEnum.next()) {
    scanCell = grid.getNode(thisTerm.bytes, thisTerm.offset, thisTerm.length, scanCell);

    int termLevel = scanCell.getLevel();
    if (termLevel > scanDetailLevel)
      continue;
    if (termLevel == scanDetailLevel || scanCell.isLeaf()) {
      Shape cShape;
      //if this cell represents a point, use the cell center vs the box
      // (points never have isLeaf())
      if (termLevel == grid.getMaxLevels() && !scanCell.isLeaf())
        cShape = scanCell.getCenter();
      else
        cShape = scanCell.getShape();

      visitScanned(scanCell, cShape);
    }
  }//term loop
}
项目:NYBC    文件:PreFlexRWTermVectorsWriter.java   
@Override
public void startTerm(BytesRef term, int freq) throws IOException {
  final int prefix = StringHelper.bytesDifference(lastTerm, term);
  final int suffix = term.length - prefix;
  tvf.writeVInt(prefix);
  tvf.writeVInt(suffix);
  tvf.writeBytes(term.bytes, term.offset + prefix, suffix);
  tvf.writeVInt(freq);
  lastTerm.copyBytes(term);
  lastPosition = lastOffset = 0;

  if (offsets && positions) {
    // we might need to buffer if its a non-bulk merge
    offsetStartBuffer = ArrayUtil.grow(offsetStartBuffer, freq);
    offsetEndBuffer = ArrayUtil.grow(offsetEndBuffer, freq);
    offsetIndex = 0;
    offsetFreq = freq;
  }
}
项目:NYBC    文件:Lucene40TermVectorsWriter.java   
@Override
public void startTerm(BytesRef term, int freq) throws IOException {
  final int prefix = StringHelper.bytesDifference(lastTerm, term);
  final int suffix = term.length - prefix;
  tvf.writeVInt(prefix);
  tvf.writeVInt(suffix);
  tvf.writeBytes(term.bytes, term.offset + prefix, suffix);
  tvf.writeVInt(freq);
  lastTerm.copyBytes(term);
  lastPosition = lastOffset = 0;

  if (offsets && positions) {
    // we might need to buffer if its a non-bulk merge
    offsetStartBuffer = ArrayUtil.grow(offsetStartBuffer, freq);
    offsetEndBuffer = ArrayUtil.grow(offsetEndBuffer, freq);
  }
  bufferedIndex = 0;
  bufferedFreq = freq;
  payloadData.length = 0;
}
项目:read-open-source-code    文件:Lucene40TermVectorsWriter.java   
@Override
public void startTerm(BytesRef term, int freq) throws IOException {
  final int prefix = StringHelper.bytesDifference(lastTerm, term);
  final int suffix = term.length - prefix;
  tvf.writeVInt(prefix);
  tvf.writeVInt(suffix);
  tvf.writeBytes(term.bytes, term.offset + prefix, suffix);
  tvf.writeVInt(freq);
  lastTerm.copyBytes(term);
  lastPosition = lastOffset = 0;

  if (offsets && positions) {
    // we might need to buffer if its a non-bulk merge
    offsetStartBuffer = ArrayUtil.grow(offsetStartBuffer, freq);
    offsetEndBuffer = ArrayUtil.grow(offsetEndBuffer, freq);
  }
  bufferedIndex = 0;
  bufferedFreq = freq;
  payloadData.length = 0;
}
项目:read-open-source-code    文件:Lucene40TermVectorsWriter.java   
@Override
public void startTerm(BytesRef term, int freq) throws IOException {
  final int prefix = StringHelper.bytesDifference(lastTerm, term);
  final int suffix = term.length - prefix;
  tvf.writeVInt(prefix);
  tvf.writeVInt(suffix);
  tvf.writeBytes(term.bytes, term.offset + prefix, suffix);
  tvf.writeVInt(freq);
  lastTerm.copyBytes(term);
  lastPosition = lastOffset = 0;

  if (offsets && positions) {
    // we might need to buffer if its a non-bulk merge
    offsetStartBuffer = ArrayUtil.grow(offsetStartBuffer, freq);
    offsetEndBuffer = ArrayUtil.grow(offsetEndBuffer, freq);
  }
  bufferedIndex = 0;
  bufferedFreq = freq;
  payloadData.length = 0;
}
项目:read-open-source-code    文件:Lucene40TermVectorsWriter.java   
@Override
public void startTerm(BytesRef term, int freq) throws IOException {
  final int prefix = StringHelper.bytesDifference(lastTerm.get(), term);
  final int suffix = term.length - prefix;
  tvf.writeVInt(prefix);
  tvf.writeVInt(suffix);
  tvf.writeBytes(term.bytes, term.offset + prefix, suffix);
  tvf.writeVInt(freq);
  lastTerm.copyBytes(term);
  lastPosition = lastOffset = 0;

  if (offsets && positions) {
    // we might need to buffer if its a non-bulk merge
    offsetStartBuffer = ArrayUtil.grow(offsetStartBuffer, freq);
    offsetEndBuffer = ArrayUtil.grow(offsetEndBuffer, freq);
  }
  bufferedIndex = 0;
  bufferedFreq = freq;
  payloadData.clear();
}
项目:read-open-source-code    文件:SimpleTextFieldsReader.java   
private TreeMap<String,Long> readFields(IndexInput in) throws IOException {
  ChecksumIndexInput input = new BufferedChecksumIndexInput(in);
  BytesRefBuilder scratch = new BytesRefBuilder();
  TreeMap<String,Long> fields = new TreeMap<>();

  while (true) {
    SimpleTextUtil.readLine(input, scratch);
    if (scratch.get().equals(END)) {
      SimpleTextUtil.checkFooter(input);
      return fields;
    } else if (StringHelper.startsWith(scratch.get(), FIELD)) {
      String fieldName = new String(scratch.bytes(), FIELD.length, scratch.length() - FIELD.length, StandardCharsets.UTF_8);
      fields.put(fieldName, input.getFilePointer());
    }
  }
}
项目:read-open-source-code    文件:AbstractVisitingPrefixTreeFilter.java   
/**
 * Scans ({@code termsEnum.next()}) terms until a term is found that does
 * not start with curVNode's cell. If it finds a leaf cell or a cell at
 * level {@code scanDetailLevel} then it calls {@link
 * #visitScanned(org.apache.lucene.spatial.prefix.tree.Cell)}.
 */
protected void scan(int scanDetailLevel) throws IOException {
  for (;
       thisTerm != null && StringHelper.startsWith(thisTerm, curVNodeTerm);//TODO refactor to use method on curVNode.cell
       thisTerm = termsEnum.next()) {
    scanCell = grid.getCell(thisTerm.bytes, thisTerm.offset, thisTerm.length, scanCell);

    int termLevel = scanCell.getLevel();
    if (termLevel < scanDetailLevel) {
      if (scanCell.isLeaf())
        visitScanned(scanCell);
    } else if (termLevel == scanDetailLevel) {
      if (!scanCell.isLeaf())//LUCENE-5529
        visitScanned(scanCell);
    }
  }//term loop
}
项目:Maskana-Gestor-de-Conocimiento    文件:PreFlexRWTermVectorsWriter.java   
@Override
public void startTerm(BytesRef term, int freq) throws IOException {
  final int prefix = StringHelper.bytesDifference(lastTerm, term);
  final int suffix = term.length - prefix;
  tvf.writeVInt(prefix);
  tvf.writeVInt(suffix);
  tvf.writeBytes(term.bytes, term.offset + prefix, suffix);
  tvf.writeVInt(freq);
  lastTerm.copyBytes(term);
  lastPosition = lastOffset = 0;

  if (offsets && positions) {
    // we might need to buffer if its a non-bulk merge
    offsetStartBuffer = ArrayUtil.grow(offsetStartBuffer, freq);
    offsetEndBuffer = ArrayUtil.grow(offsetEndBuffer, freq);
    offsetIndex = 0;
    offsetFreq = freq;
  }
}
项目:Maskana-Gestor-de-Conocimiento    文件:Lucene40TermVectorsWriter.java   
@Override
public void startTerm(BytesRef term, int freq) throws IOException {
  final int prefix = StringHelper.bytesDifference(lastTerm, term);
  final int suffix = term.length - prefix;
  tvf.writeVInt(prefix);
  tvf.writeVInt(suffix);
  tvf.writeBytes(term.bytes, term.offset + prefix, suffix);
  tvf.writeVInt(freq);
  lastTerm.copyBytes(term);
  lastPosition = lastOffset = 0;

  if (offsets && positions) {
    // we might need to buffer if its a non-bulk merge
    offsetStartBuffer = ArrayUtil.grow(offsetStartBuffer, freq);
    offsetEndBuffer = ArrayUtil.grow(offsetEndBuffer, freq);
  }
  bufferedIndex = 0;
  bufferedFreq = freq;
  payloadData.length = 0;
}
项目:elasticsearch_my    文件:MultiPhrasePrefixQuery.java   
private void getPrefixTerms(ObjectHashSet<Term> terms, final Term prefix, final IndexReader reader) throws IOException {
    // SlowCompositeReaderWrapper could be used... but this would merge all terms from each segment into one terms
    // instance, which is very expensive. Therefore I think it is better to iterate over each leaf individually.
    List<LeafReaderContext> leaves = reader.leaves();
    for (LeafReaderContext leaf : leaves) {
        Terms _terms = leaf.reader().terms(field);
        if (_terms == null) {
            continue;
        }

        TermsEnum termsEnum = _terms.iterator();
        TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(prefix.bytes());
        if (TermsEnum.SeekStatus.END == seekStatus) {
            continue;
        }

        for (BytesRef term = termsEnum.term(); term != null; term = termsEnum.next()) {
            if (!StringHelper.startsWith(term, prefix.bytes())) {
                break;
            }

            terms.add(new Term(field, BytesRef.deepCopyOf(term)));
            if (terms.size() >= maxExpansions) {
                return;
            }
        }
    }
}
项目:lams    文件:Lucene410DocValuesConsumer.java   
private void addReverseTermIndex(FieldInfo field, final Iterable<BytesRef> values, int maxLength) throws IOException {
  long count = 0;
  BytesRefBuilder priorTerm = new BytesRefBuilder();
  priorTerm.grow(maxLength);
  BytesRef indexTerm = new BytesRef();
  long startFP = data.getFilePointer();
  PagedBytes pagedBytes = new PagedBytes(15);
  MonotonicBlockPackedWriter addresses = new MonotonicBlockPackedWriter(data, BLOCK_SIZE);

  for (BytesRef b : values) {
    int termPosition = (int) (count & REVERSE_INTERVAL_MASK);
    if (termPosition == 0) {
      int len = StringHelper.sortKeyLength(priorTerm.get(), b);
      indexTerm.bytes = b.bytes;
      indexTerm.offset = b.offset;
      indexTerm.length = len;
      addresses.add(pagedBytes.copyUsingLengthPrefix(indexTerm));
    } else if (termPosition == REVERSE_INTERVAL_MASK) {
      priorTerm.copyBytes(b);
    }
    count++;
  }
  addresses.finish();
  long numBytes = pagedBytes.getPointer();
  pagedBytes.freeze(true);
  PagedBytesDataInput in = pagedBytes.getDataInput();
  meta.writeLong(startFP);
  data.writeVLong(numBytes);
  data.copyBytes(in, numBytes);
}
项目:lams    文件:PrefixTermsEnum.java   
@Override
protected AcceptStatus accept(BytesRef term) {
  if (StringHelper.startsWith(term, prefixRef)) {
    return AcceptStatus.YES;
  } else {
    return AcceptStatus.END;
  }
}
项目:lams    文件:SortField.java   
/** Returns true if <code>o</code> is equal to this.  If a
 *  {@link FieldComparatorSource} or {@link
 *  FieldCache.Parser} was provided, it must properly
 *  implement equals (unless a singleton is always used). */
@Override
public boolean equals(Object o) {
  if (this == o) return true;
  if (!(o instanceof SortField)) return false;
  final SortField other = (SortField)o;
  return (
    StringHelper.equals(other.field, this.field)
    && other.type == this.type
    && other.reverse == this.reverse
    && (other.comparatorSource == null ? this.comparatorSource == null : other.comparatorSource.equals(this.comparatorSource))
  );
}
项目:lams    文件:AutomatonTermsEnum.java   
/**
 * Returns true if the term matches the automaton. Also stashes away the term
 * to assist with smart enumeration.
 */
@Override
protected AcceptStatus accept(final BytesRef term) {
  if (commonSuffixRef == null || StringHelper.endsWith(term, commonSuffixRef)) {
    if (runAutomaton.run(term.bytes, term.offset, term.length))
      return linear ? AcceptStatus.YES : AcceptStatus.YES_AND_SEEK;
    else
      return (linear && termComp.compare(term, linearUpperBound) < 0) ? 
          AcceptStatus.NO : AcceptStatus.NO_AND_SEEK;
  } else {
    return (linear && termComp.compare(term, linearUpperBound) < 0) ? 
        AcceptStatus.NO : AcceptStatus.NO_AND_SEEK;
  }
}
项目:lams    文件:DocTermOrds.java   
private BytesRef setTerm() throws IOException {
  term = termsEnum.term();
  //System.out.println("  setTerm() term=" + term.utf8ToString() + " vs prefix=" + (prefix == null ? "null" : prefix.utf8ToString()));
  if (prefix != null && !StringHelper.startsWith(term, prefix)) {
    term = null;
  }
  return term;
}
项目:lams    文件:SrndTruncQuery.java   
@Override
public void visitMatchingTerms(
  IndexReader reader,
  String fieldName,
  MatchingTermVisitor mtv) throws IOException
{
  int prefixLength = prefix.length();
  Terms terms = MultiFields.getTerms(reader, fieldName);
  if (terms != null) {
    Matcher matcher = pattern.matcher("");
    try {
      TermsEnum termsEnum = terms.iterator(null);

      TermsEnum.SeekStatus status = termsEnum.seekCeil(prefixRef);
      BytesRef text;
      if (status == TermsEnum.SeekStatus.FOUND) {
        text = prefixRef;
      } else if (status == TermsEnum.SeekStatus.NOT_FOUND) {
        text = termsEnum.term();
      } else {
        text = null;
      }

      while(text != null) {
        if (text != null && StringHelper.startsWith(text, prefixRef)) {
          String textString = text.utf8ToString();
          matcher.reset(textString.substring(prefixLength));
          if (matcher.matches()) {
            mtv.visitMatchingTerm(new Term(fieldName, textString));
          }
        } else {
          break;
        }
        text = termsEnum.next();
      }
    } finally {
      matcher.reset();
    }
  }
}
项目:lams    文件:SrndPrefixQuery.java   
@Override
public void visitMatchingTerms(
  IndexReader reader,
  String fieldName,
  MatchingTermVisitor mtv) throws IOException
{
  /* inspired by PrefixQuery.rewrite(): */
  Terms terms = MultiFields.getTerms(reader, fieldName);
  if (terms != null) {
    TermsEnum termsEnum = terms.iterator(null);

    boolean skip = false;
    TermsEnum.SeekStatus status = termsEnum.seekCeil(new BytesRef(getPrefix()));
    if (status == TermsEnum.SeekStatus.FOUND) {
      mtv.visitMatchingTerm(getLucenePrefixTerm(fieldName));
    } else if (status == TermsEnum.SeekStatus.NOT_FOUND) {
      if (StringHelper.startsWith(termsEnum.term(), prefixRef)) {
        mtv.visitMatchingTerm(new Term(fieldName, termsEnum.term().utf8ToString()));
      } else {
        skip = true;
      }
    } else {
      // EOF
      skip = true;
    }

    if (!skip) {
      while(true) {
        BytesRef text = termsEnum.next();
        if (text != null && StringHelper.startsWith(text, prefixRef)) {
          mtv.visitMatchingTerm(new Term(fieldName, text.utf8ToString()));
        } else {
          break;
        }
      }
    }
  }
}
项目:Elasticsearch    文件:ModuloBucketBuilder.java   
private static int hashCode(@Nullable Object value) {
    if (value == null) {
        return 0;
    }
    if (value instanceof BytesRef) {
        // since lucene 4.8
        // BytesRef.hashCode() uses a random seed across different jvm
        // which causes the hashCode / routing to be different on each node
        // this breaks the group by redistribution logic - need to use a fixed seed here
        // to be consistent.
        return StringHelper.murmurhash3_x86_32(((BytesRef) value), 1);
    }
    return value.hashCode();
}
项目:Elasticsearch    文件:MultiPhrasePrefixQuery.java   
private void getPrefixTerms(ObjectHashSet<Term> terms, final Term prefix, final IndexReader reader) throws IOException {
    // SlowCompositeReaderWrapper could be used... but this would merge all terms from each segment into one terms
    // instance, which is very expensive. Therefore I think it is better to iterate over each leaf individually.
    List<LeafReaderContext> leaves = reader.leaves();
    for (LeafReaderContext leaf : leaves) {
        Terms _terms = leaf.reader().terms(field);
        if (_terms == null) {
            continue;
        }

        TermsEnum termsEnum = _terms.iterator();
        TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(prefix.bytes());
        if (TermsEnum.SeekStatus.END == seekStatus) {
            continue;
        }

        for (BytesRef term = termsEnum.term(); term != null; term = termsEnum.next()) {
            if (!StringHelper.startsWith(term, prefix.bytes())) {
                break;
            }

            terms.add(new Term(field, BytesRef.deepCopyOf(term)));
            if (terms.size() >= maxExpansions) {
                return;
            }
        }
    }
}
项目:search    文件:FixedGapTermsIndexWriter.java   
/** NOTE: if your codec does not sort in unicode code
 *  point order, you must override this method, to simply
 *  return indexedTerm.length. */
protected int indexedTermPrefixLength(final BytesRef priorTerm, final BytesRef indexedTerm) {
  // As long as codec sorts terms in unicode codepoint
  // order, we can safely strip off the non-distinguishing
  // suffix to save RAM in the loaded terms index.
  return StringHelper.sortKeyLength(priorTerm, indexedTerm);
}
项目:search    文件:SimpleTextUtil.java   
public static void checkFooter(ChecksumIndexInput input) throws IOException {
  BytesRefBuilder scratch = new BytesRefBuilder();
  String expectedChecksum = String.format(Locale.ROOT, "%020d", input.getChecksum());
  SimpleTextUtil.readLine(input, scratch);
  if (StringHelper.startsWith(scratch.get(), CHECKSUM) == false) {
    throw new CorruptIndexException("SimpleText failure: expected checksum line but got " + scratch.get().utf8ToString() + " (resource=" + input + ")");
  }
  String actualChecksum = new BytesRef(scratch.bytes(), CHECKSUM.length, scratch.length() - CHECKSUM.length).utf8ToString();
  if (!expectedChecksum.equals(actualChecksum)) {
    throw new CorruptIndexException("SimpleText checksum failure: " + actualChecksum + " != " + expectedChecksum + " (resource=" + input + ")");
  }
  if (input.length() != input.getFilePointer()) {
    throw new CorruptIndexException("Unexpected stuff at the end of file, please be careful with your text editor! (resource=" + input + ")");
  }
}
项目:search    文件:RegexTermsEnum.java   
@Override
protected AcceptStatus accept(BytesRef term) {
  if (StringHelper.startsWith(term, prefixRef)) {
    // TODO: set BoostAttr based on distance of
    // searchTerm.text() and term().text()
    return regexImpl.match(term) ? AcceptStatus.YES : AcceptStatus.NO;
  } else {
    return AcceptStatus.NO;
  }
}
项目:search    文件:SlowFuzzyTermsEnum.java   
/**
 * <p>The termCompare method in FuzzyTermEnum uses Levenshtein distance to 
 * calculate the distance between the given term and the comparing term. 
 * </p>
 * <p>If the minSimilarity is >= 1.0, this uses the maxEdits as the comparison.
 * Otherwise, this method uses the following logic to calculate similarity.
 * <pre>
 *   similarity = 1 - ((float)distance / (float) (prefixLength + Math.min(textlen, targetlen)));
 *   </pre>
 * where distance is the Levenshtein distance for the two words.
 * </p>
 * 
 */
@Override
protected final AcceptStatus accept(BytesRef term) {
  if (StringHelper.startsWith(term, prefixBytesRef)) {
    utf32.copyUTF8Bytes(term);
    final int distance = calcDistance(utf32.ints(), realPrefixLength, utf32.length() - realPrefixLength);

    //Integer.MIN_VALUE is the sentinel that Levenshtein stopped early
    if (distance == Integer.MIN_VALUE){
       return AcceptStatus.NO;
    }
    //no need to calc similarity, if raw is true and distance > maxEdits
    if (raw == true && distance > maxEdits){
          return AcceptStatus.NO;
    } 
    final float similarity = calcSimilarity(distance, (utf32.length() - realPrefixLength), text.length);

    //if raw is true, then distance must also be <= maxEdits by now
    //given the previous if statement
    if (raw == true ||
          (raw == false && similarity > minSimilarity)) {
      boostAtt.setBoost((similarity - minSimilarity) * scale_factor);
      return AcceptStatus.YES;
    } else {
       return AcceptStatus.NO;
    }
  } else {
    return AcceptStatus.END;
  }
}
项目:search    文件:SrndTruncQuery.java   
@Override
public void visitMatchingTerms(
  IndexReader reader,
  String fieldName,
  MatchingTermVisitor mtv) throws IOException
{
  int prefixLength = prefix.length();
  Terms terms = MultiFields.getTerms(reader, fieldName);
  if (terms != null) {
    Matcher matcher = pattern.matcher("");
    try {
      TermsEnum termsEnum = terms.iterator(null);

      TermsEnum.SeekStatus status = termsEnum.seekCeil(prefixRef);
      BytesRef text;
      if (status == TermsEnum.SeekStatus.FOUND) {
        text = prefixRef;
      } else if (status == TermsEnum.SeekStatus.NOT_FOUND) {
        text = termsEnum.term();
      } else {
        text = null;
      }

      while(text != null) {
        if (text != null && StringHelper.startsWith(text, prefixRef)) {
          String textString = text.utf8ToString();
          matcher.reset(textString.substring(prefixLength));
          if (matcher.matches()) {
            mtv.visitMatchingTerm(new Term(fieldName, textString));
          }
        } else {
          break;
        }
        text = termsEnum.next();
      }
    } finally {
      matcher.reset();
    }
  }
}
项目:search    文件:SrndPrefixQuery.java   
@Override
public void visitMatchingTerms(
  IndexReader reader,
  String fieldName,
  MatchingTermVisitor mtv) throws IOException
{
  /* inspired by PrefixQuery.rewrite(): */
  Terms terms = MultiFields.getTerms(reader, fieldName);
  if (terms != null) {
    TermsEnum termsEnum = terms.iterator(null);

    boolean skip = false;
    TermsEnum.SeekStatus status = termsEnum.seekCeil(new BytesRef(getPrefix()));
    if (status == TermsEnum.SeekStatus.FOUND) {
      mtv.visitMatchingTerm(getLucenePrefixTerm(fieldName));
    } else if (status == TermsEnum.SeekStatus.NOT_FOUND) {
      if (StringHelper.startsWith(termsEnum.term(), prefixRef)) {
        mtv.visitMatchingTerm(new Term(fieldName, termsEnum.term().utf8ToString()));
      } else {
        skip = true;
      }
    } else {
      // EOF
      skip = true;
    }

    if (!skip) {
      while(true) {
        BytesRef text = termsEnum.next();
        if (text != null && StringHelper.startsWith(text, prefixRef)) {
          mtv.visitMatchingTerm(new Term(fieldName, text.utf8ToString()));
        } else {
          break;
        }
      }
    }
  }
}
项目:search    文件:Lucene410DocValuesConsumer.java   
private void addReverseTermIndex(FieldInfo field, final Iterable<BytesRef> values, int maxLength) throws IOException {
  long count = 0;
  BytesRefBuilder priorTerm = new BytesRefBuilder();
  priorTerm.grow(maxLength);
  BytesRef indexTerm = new BytesRef();
  long startFP = data.getFilePointer();
  PagedBytes pagedBytes = new PagedBytes(15);
  MonotonicBlockPackedWriter addresses = new MonotonicBlockPackedWriter(data, BLOCK_SIZE);

  for (BytesRef b : values) {
    int termPosition = (int) (count & REVERSE_INTERVAL_MASK);
    if (termPosition == 0) {
      int len = StringHelper.sortKeyLength(priorTerm.get(), b);
      indexTerm.bytes = b.bytes;
      indexTerm.offset = b.offset;
      indexTerm.length = len;
      addresses.add(pagedBytes.copyUsingLengthPrefix(indexTerm));
    } else if (termPosition == REVERSE_INTERVAL_MASK) {
      priorTerm.copyBytes(b);
    }
    count++;
  }
  addresses.finish();
  long numBytes = pagedBytes.getPointer();
  pagedBytes.freeze(true);
  PagedBytesDataInput in = pagedBytes.getDataInput();
  meta.writeLong(startFP);
  data.writeVLong(numBytes);
  data.copyBytes(in, numBytes);
}
项目:search    文件:PrefixTermsEnum.java   
@Override
protected AcceptStatus accept(BytesRef term) {
  if (StringHelper.startsWith(term, prefixRef)) {
    return AcceptStatus.YES;
  } else {
    return AcceptStatus.END;
  }
}
项目:search    文件:SortField.java   
/** Returns true if <code>o</code> is equal to this.  If a
 *  {@link FieldComparatorSource} or {@link
 *  FieldCache.Parser} was provided, it must properly
 *  implement equals (unless a singleton is always used). */
@Override
public boolean equals(Object o) {
  if (this == o) return true;
  if (!(o instanceof SortField)) return false;
  final SortField other = (SortField)o;
  return (
    StringHelper.equals(other.field, this.field)
    && other.type == this.type
    && other.reverse == this.reverse
    && (other.comparatorSource == null ? this.comparatorSource == null : other.comparatorSource.equals(this.comparatorSource))
  );
}
项目:search    文件:AutomatonTermsEnum.java   
/**
 * Returns true if the term matches the automaton. Also stashes away the term
 * to assist with smart enumeration.
 */
@Override
protected AcceptStatus accept(final BytesRef term) {
  if (commonSuffixRef == null || StringHelper.endsWith(term, commonSuffixRef)) {
    if (runAutomaton.run(term.bytes, term.offset, term.length))
      return linear ? AcceptStatus.YES : AcceptStatus.YES_AND_SEEK;
    else
      return (linear && termComp.compare(term, linearUpperBound) < 0) ? 
          AcceptStatus.NO : AcceptStatus.NO_AND_SEEK;
  } else {
    return (linear && termComp.compare(term, linearUpperBound) < 0) ? 
        AcceptStatus.NO : AcceptStatus.NO_AND_SEEK;
  }
}
项目:search    文件:DocTermOrds.java   
private BytesRef setTerm() throws IOException {
  term = termsEnum.term();
  //System.out.println("  setTerm() term=" + term.utf8ToString() + " vs prefix=" + (prefix == null ? "null" : prefix.utf8ToString()));
  if (prefix != null && !StringHelper.startsWith(term, prefix)) {
    term = null;
  }
  return term;
}
项目:NYBC    文件:SimpleTextFieldsReader.java   
private TreeMap<String,Long> readFields(IndexInput in) throws IOException {
  BytesRef scratch = new BytesRef(10);
  TreeMap<String,Long> fields = new TreeMap<String,Long>();

  while (true) {
    SimpleTextUtil.readLine(in, scratch);
    if (scratch.equals(END)) {
      return fields;
    } else if (StringHelper.startsWith(scratch, FIELD)) {
      String fieldName = new String(scratch.bytes, scratch.offset + FIELD.length, scratch.length - FIELD.length, "UTF-8");
      fields.put(fieldName, in.getFilePointer());
    }
  }
}
项目:NYBC    文件:RegexTermsEnum.java   
@Override
protected AcceptStatus accept(BytesRef term) {
  if (StringHelper.startsWith(term, prefixRef)) {
    // TODO: set BoostAttr based on distance of
    // searchTerm.text() and term().text()
    return regexImpl.match(term) ? AcceptStatus.YES : AcceptStatus.NO;
  } else {
    return AcceptStatus.NO;
  }
}
项目:NYBC    文件:SlowFuzzyTermsEnum.java   
/**
 * The termCompare method in FuzzyTermEnum uses Levenshtein distance to 
 * calculate the distance between the given term and the comparing term. 
 */
@Override
protected final AcceptStatus accept(BytesRef term) {
  if (StringHelper.startsWith(term, prefixBytesRef)) {
    UnicodeUtil.UTF8toUTF32(term, utf32);
    final float similarity = similarity(utf32.ints, realPrefixLength, utf32.length - realPrefixLength);
    if (similarity > minSimilarity) {
      boostAtt.setBoost((similarity - minSimilarity) * scale_factor);
      return AcceptStatus.YES;
    } else return AcceptStatus.NO;
  } else {
    return AcceptStatus.END;
  }
}
项目:NYBC    文件:SrndTruncQuery.java   
@Override
public void visitMatchingTerms(
  IndexReader reader,
  String fieldName,
  MatchingTermVisitor mtv) throws IOException
{
  int prefixLength = prefix.length();
  Terms terms = MultiFields.getTerms(reader, fieldName);
  if (terms != null) {
    Matcher matcher = pattern.matcher("");
    try {
      TermsEnum termsEnum = terms.iterator(null);

      TermsEnum.SeekStatus status = termsEnum.seekCeil(prefixRef);
      BytesRef text;
      if (status == TermsEnum.SeekStatus.FOUND) {
        text = prefixRef;
      } else if (status == TermsEnum.SeekStatus.NOT_FOUND) {
        text = termsEnum.term();
      } else {
        text = null;
      }

      while(text != null) {
        if (text != null && StringHelper.startsWith(text, prefixRef)) {
          String textString = text.utf8ToString();
          matcher.reset(textString.substring(prefixLength));
          if (matcher.matches()) {
            mtv.visitMatchingTerm(new Term(fieldName, textString));
          }
        } else {
          break;
        }
        text = termsEnum.next();
      }
    } finally {
      matcher.reset();
    }
  }
}