Java 类org.apache.lucene.util.AttributeSource 实例源码

项目:lams    文件:TeeSinkTokenFilter.java   
@Override
public boolean incrementToken() throws IOException {
  if (input.incrementToken()) {
    // capture state lazily - maybe no SinkFilter accepts this state
    AttributeSource.State state = null;
    for (WeakReference<SinkTokenStream> ref : sinks) {
      final SinkTokenStream sink = ref.get();
      if (sink != null) {
        if (sink.accept(this)) {
          if (state == null) {
            state = this.captureState();
          }
          sink.addState(state);
        }
      }
    }
    return true;
  }

  return false;
}
项目:lams    文件:DateRecognizerSinkFilter.java   
@Override
public boolean accept(AttributeSource source) {
  if (termAtt == null) {
    termAtt = source.addAttribute(CharTermAttribute.class);
  }
  try {
    Date date = dateFormat.parse(termAtt.toString());//We don't care about the date, just that we can parse it as a date
    if (date != null) {
      return true;
    }
  } catch (ParseException e) {

  }

  return false;
}
项目:fastcatsearch3    文件:CachingTokenFilter.java   
@Override
public final boolean incrementToken() throws IOException {
  if (cache == null) {
    // fill cache lazily
    cache = new LinkedList<AttributeSource.State>();
    fillCache();
    iterator = cache.iterator();
  }

  if (!iterator.hasNext()) {
    // the cache is exhausted, return false
    return false;
  }
  // Since the TokenFilter can be reset, the tokens need to be preserved as immutable.
  restoreState(iterator.next());
  return true;
}
项目:elasticsearch-analysis-opennlp    文件:OpenNLPTokenFilter.java   
private String[] walkTokens() throws IOException {
    List<String> wordList = new ArrayList<>();
    while (input.incrementToken()) {
        CharTermAttribute textAtt = input.getAttribute(CharTermAttribute.class);
        OffsetAttribute offsetAtt = input.getAttribute(OffsetAttribute.class);
        char[] buffer = textAtt.buffer();
        String word = new String(buffer, 0, offsetAtt.endOffset() - offsetAtt.startOffset());
        wordList.add(word);
        AttributeSource attrs = input.cloneAttributes();
        tokenAttrs.add(attrs);
    }
    String[] words = new String[wordList.size()];
    for (int i = 0; i < words.length; i++) {
        words[i] = wordList.get(i);
    }
    return words;
}
项目:elasticsearch-analysis-ja    文件:ReloadableKuromojiTokenizerFactory.java   
TokenizerWrapper() {
    super();

    tokenizerTimestamp = dictionaryTimestamp;
    tokenizer = new JapaneseTokenizer(userDictionary,
            discartPunctuation, mode);

    try {
        final Field attributesField = getAccessibleField(AttributeSource.class, "attributes");
        final Object attributesObj = attributesField.get(tokenizer);
        attributesField.set(this, attributesObj);

        final Field attributeImplsField = getAccessibleField(AttributeSource.class, "attributeImpls");
        final Object attributeImplsObj = attributeImplsField.get(tokenizer);
        attributeImplsField.set(this, attributeImplsObj);

        final Field currentStateField = getAccessibleField(AttributeSource.class, "currentState");
        final Object currentStateObj = currentStateField.get(tokenizer);
        currentStateField.set(this, currentStateObj);
    } catch (final Exception e) {
        throw new IllegalStateException(
                "Failed to update the tokenizer.", e);
    }
}
项目:search    文件:TeeSinkTokenFilter.java   
@Override
public boolean incrementToken() throws IOException {
  if (input.incrementToken()) {
    // capture state lazily - maybe no SinkFilter accepts this state
    AttributeSource.State state = null;
    for (WeakReference<SinkTokenStream> ref : sinks) {
      final SinkTokenStream sink = ref.get();
      if (sink != null) {
        if (sink.accept(this)) {
          if (state == null) {
            state = this.captureState();
          }
          sink.addState(state);
        }
      }
    }
    return true;
  }

  return false;
}
项目:search    文件:DateRecognizerSinkFilter.java   
@Override
public boolean accept(AttributeSource source) {
  if (termAtt == null) {
    termAtt = source.addAttribute(CharTermAttribute.class);
  }
  try {
    Date date = dateFormat.parse(termAtt.toString());//We don't care about the date, just that we can parse it as a date
    if (date != null) {
      return true;
    }
  } catch (ParseException e) {

  }

  return false;
}
项目:search    文件:TestRandomChains.java   
static Object[] newTokenizerArgs(Random random, Reader reader, Class<?>[] paramTypes) {
  Object[] args = new Object[paramTypes.length];
  for (int i = 0; i < args.length; i++) {
    Class<?> paramType = paramTypes[i];
    if (paramType == Reader.class) {
      args[i] = reader;
    } else if (paramType == AttributeSource.class) {
      // TODO: args[i] = new AttributeSource();
      // this is currently too scary to deal with!
      args[i] = null; // force IAE
    } else {
      args[i] = newRandomArg(random, paramType);
    }
  }
  return args;
}
项目:search    文件:AnalysisRequestHandlerBase.java   
/**
 * Analyzes the given TokenStream, collecting the Tokens it produces.
 *
 * @param tokenStream TokenStream to analyze
 *
 * @return List of tokens produced from the TokenStream
 */
private List<AttributeSource> analyzeTokenStream(TokenStream tokenStream) {
  final List<AttributeSource> tokens = new ArrayList<>();
  final PositionIncrementAttribute posIncrAtt = tokenStream.addAttribute(PositionIncrementAttribute.class);
  final TokenTrackingAttribute trackerAtt = tokenStream.addAttribute(TokenTrackingAttribute.class);
  // for backwards compatibility, add all "common" attributes
  tokenStream.addAttribute(OffsetAttribute.class);
  tokenStream.addAttribute(TypeAttribute.class);
  try {
    tokenStream.reset();
    int position = 0;
    while (tokenStream.incrementToken()) {
      position += posIncrAtt.getPositionIncrement();
      trackerAtt.setActPosition(position);
      tokens.add(tokenStream.cloneAttributes());
    }
    tokenStream.end();
  } catch (IOException ioe) {
    throw new RuntimeException("Error occured while iterating over tokenstream", ioe);
  } finally {
    IOUtils.closeWhileHandlingException(tokenStream);
  }

  return tokens;
}
项目:elasticsearch-analysis-kuromoji-neologd    文件:ReloadableKuromojiTokenizerFactory.java   
TokenizerWrapper() {
    super();

    tokenizerTimestamp = dictionaryTimestamp;
    tokenizer = new JapaneseTokenizer(userDictionary, discartPunctuation, mode);

    try {
        Field attributesField = getAccessibleField(AttributeSource.class, "attributes");
        final Object attributesObj = attributesField.get(tokenizer);
        attributesField.set(this, attributesObj);

        Field attributeImplsField = getAccessibleField(AttributeSource.class, "attributeImpls");
        final Object attributeImplsObj = attributeImplsField.get(tokenizer);
        attributeImplsField.set(this, attributeImplsObj);

        Field currentStateField = getAccessibleField(AttributeSource.class, "currentState");
        final Object currentStateObj = currentStateField.get(tokenizer);
        currentStateField.set(this, currentStateObj);
    } catch (final Exception e) {
        throw new IllegalStateException(
                "Failed to update the tokenizer.", e);
    }
}
项目:read-open-source-code    文件:DateRecognizerSinkFilter.java   
@Override
public boolean accept(AttributeSource source) {
  if (termAtt == null) {
    termAtt = source.addAttribute(CharTermAttribute.class);
  }
  try {
    Date date = dateFormat.parse(termAtt.toString());//We don't care about the date, just that we can parse it as a date
    if (date != null) {
      return true;
    }
  } catch (ParseException e) {

  }

  return false;
}
项目:read-open-source-code    文件:TeeSinkTokenFilter.java   
@Override
public boolean incrementToken() throws IOException {
  if (input.incrementToken()) {
    // capture state lazily - maybe no SinkFilter accepts this state
    AttributeSource.State state = null;
    for (WeakReference<SinkTokenStream> ref : sinks) {
      final SinkTokenStream sink = ref.get();
      if (sink != null) {
        if (sink.accept(this)) {
          if (state == null) {
            state = this.captureState();
          }
          sink.addState(state);
        }
      }
    }
    return true;
  }

  return false;
}
项目:NYBC    文件:TeeSinkTokenFilter.java   
@Override
public boolean incrementToken() throws IOException {
  if (input.incrementToken()) {
    // capture state lazily - maybe no SinkFilter accepts this state
    AttributeSource.State state = null;
    for (WeakReference<SinkTokenStream> ref : sinks) {
      final SinkTokenStream sink = ref.get();
      if (sink != null) {
        if (sink.accept(this)) {
          if (state == null) {
            state = this.captureState();
          }
          sink.addState(state);
        }
      }
    }
    return true;
  }

  return false;
}
项目:read-open-source-code    文件:DateRecognizerSinkFilter.java   
@Override
public boolean accept(AttributeSource source) {
  if (termAtt == null) {
    termAtt = source.addAttribute(CharTermAttribute.class);
  }
  try {
    Date date = dateFormat.parse(termAtt.toString());//We don't care about the date, just that we can parse it as a date
    if (date != null) {
      return true;
    }
  } catch (ParseException e) {

  }

  return false;
}
项目:read-open-source-code    文件:TeeSinkTokenFilter.java   
@Override
public boolean incrementToken() throws IOException {
  if (input.incrementToken()) {
    // capture state lazily - maybe no SinkFilter accepts this state
    AttributeSource.State state = null;
    for (WeakReference<SinkTokenStream> ref : sinks) {
      final SinkTokenStream sink = ref.get();
      if (sink != null) {
        if (sink.accept(this)) {
          if (state == null) {
            state = this.captureState();
          }
          sink.addState(state);
        }
      }
    }
    return true;
  }

  return false;
}
项目:read-open-source-code    文件:AnalysisRequestHandlerBase.java   
/**
 * Analyzes the given TokenStream, collecting the Tokens it produces.
 *
 * @param tokenStream TokenStream to analyze
 *
 * @return List of tokens produced from the TokenStream
 */
private List<AttributeSource> analyzeTokenStream(TokenStream tokenStream) {
  final List<AttributeSource> tokens = new ArrayList<>();
  final PositionIncrementAttribute posIncrAtt = tokenStream.addAttribute(PositionIncrementAttribute.class);
  final TokenTrackingAttribute trackerAtt = tokenStream.addAttribute(TokenTrackingAttribute.class);
  // for backwards compatibility, add all "common" attributes
  tokenStream.addAttribute(OffsetAttribute.class);
  tokenStream.addAttribute(TypeAttribute.class);
  try {
    tokenStream.reset();
    int position = 0;
    while (tokenStream.incrementToken()) {
      position += posIncrAtt.getPositionIncrement();
      trackerAtt.setActPosition(position);
      tokens.add(tokenStream.cloneAttributes());
    }
    tokenStream.end();
  } catch (IOException ioe) {
    throw new RuntimeException("Error occured while iterating over tokenstream", ioe);
  } finally {
    IOUtils.closeWhileHandlingException(tokenStream);
  }

  return tokens;
}
项目:NYBC    文件:TestRandomChains.java   
static Object[] newTokenizerArgs(Random random, Reader reader, Class<?>[] paramTypes) {
  Object[] args = new Object[paramTypes.length];
  for (int i = 0; i < args.length; i++) {
    Class<?> paramType = paramTypes[i];
    if (paramType == Reader.class) {
      args[i] = reader;
    } else if (paramType == AttributeFactory.class) {
      // TODO: maybe the collator one...???
      args[i] = AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY;
    } else if (paramType == AttributeSource.class) {
      // TODO: args[i] = new AttributeSource();
      // this is currently too scary to deal with!
      args[i] = null; // force IAE
    } else {
      args[i] = newRandomArg(random, paramType);
    }
  }
  return args;
}
项目:read-open-source-code    文件:CachingTokenFilter.java   
@Override
public final boolean incrementToken() throws IOException {
  if (cache == null) {
    // fill cache lazily
    cache = new LinkedList<AttributeSource.State>();
    fillCache();
    iterator = cache.iterator();
  }

  if (!iterator.hasNext()) {
    // the cache is exhausted, return false
    return false;
  }
  // Since the TokenFilter can be reset, the tokens need to be preserved as immutable.
  restoreState(iterator.next());
  return true;
}
项目:lams    文件:NumericRangeQuery.java   
@Override @SuppressWarnings("unchecked")
protected TermsEnum getTermsEnum(final Terms terms, AttributeSource atts) throws IOException {
  // very strange: java.lang.Number itself is not Comparable, but all subclasses used here are
  if (min != null && max != null && ((Comparable<T>) min).compareTo(max) > 0) {
    return TermsEnum.EMPTY;
  }
  return new NumericRangeTermsEnum(terms.iterator(null));
}
项目:lams    文件:TermRangeQuery.java   
@Override
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
  if (lowerTerm != null && upperTerm != null && lowerTerm.compareTo(upperTerm) > 0) {
    return TermsEnum.EMPTY;
  }

  TermsEnum tenum = terms.iterator(null);

  if ((lowerTerm == null || (includeLower && lowerTerm.length == 0)) && upperTerm == null) {
    return tenum;
  }
  return new TermRangeTermsEnum(tenum,
      lowerTerm, upperTerm, includeLower, includeUpper);
}
项目:lams    文件:PrefixQuery.java   
@Override  
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
  TermsEnum tenum = terms.iterator(null);

  if (prefix.bytes().length == 0) {
    // no prefix -- match all terms for this field:
    return tenum;
  }
  return new PrefixTermsEnum(tenum, prefix.bytes());
}
项目:lams    文件:FuzzyQuery.java   
@Override
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
  if (maxEdits == 0 || prefixLength >= term.text().length()) {  // can only match if it's exact
    return new SingleTermsEnum(terms.iterator(null), term.bytes());
  }
  return new FuzzyTermsEnum(terms, atts, getTerm(), maxEdits, prefixLength, transpositions);
}
项目:lams    文件:FieldInvertState.java   
/**
 * Sets attributeSource to a new instance.
 */
void setAttributeSource(AttributeSource attributeSource) {
  if (this.attributeSource != attributeSource) {
    this.attributeSource = attributeSource;
    termAttribute = attributeSource.getAttribute(TermToBytesRefAttribute.class);
    posIncrAttribute = attributeSource.addAttribute(PositionIncrementAttribute.class);
    offsetAttribute = attributeSource.addAttribute(OffsetAttribute.class);
    payloadAttribute = attributeSource.getAttribute(PayloadAttribute.class);
  }
}
项目:lams    文件:SlowSynonymFilter.java   
private AttributeSource nextTok() throws IOException {
  if (buffer!=null && !buffer.isEmpty()) {
    return buffer.removeFirst();
  } else {
    if (!exhausted && input.incrementToken()) {
      return this;
    } else {
      exhausted = true;
      return null;
    }
  }
}
项目:lams    文件:SlowSynonymFilter.java   
private SlowSynonymMap match(SlowSynonymMap map) throws IOException {
  SlowSynonymMap result = null;

  if (map.submap != null) {
    AttributeSource tok = nextTok();
    if (tok != null) {
      // clone ourselves.
      if (tok == this)
        tok = cloneAttributes();
      // check for positionIncrement!=1?  if>1, should not match, if==0, check multiple at this level?
      CharTermAttribute termAtt = tok.getAttribute(CharTermAttribute.class);
      SlowSynonymMap subMap = map.submap.get(termAtt.buffer(), 0, termAtt.length());

      if (subMap != null) {
        // recurse
        result = match(subMap);
      }

      if (result != null) {
        matched.addFirst(tok);
      } else {
        // push back unmatched token
        pushTok(tok);
      }
    }
  }

  // if no longer sequence matched, so if this node has synonyms, it's the match.
  if (result==null && map.synonyms!=null) {
    result = map;
  }

  return result;
}
项目:lams    文件:TokenRangeSinkFilter.java   
@Override
public boolean accept(AttributeSource source) {
  try {
    if (count >= lower && count < upper){
      return true;
    }
    return false;
  } finally {
    count++;
  }
}
项目:lams    文件:TokenTypeSinkFilter.java   
@Override
public boolean accept(AttributeSource source) {
  if (typeAtt == null) {
    typeAtt = source.addAttribute(TypeAttribute.class);
  }

  //check to see if this is a Category
  return (typeToMatch.equals(typeAtt.type()));
}
项目:lams    文件:TeeSinkTokenFilter.java   
@Override
public final void end() throws IOException {
  super.end();
  AttributeSource.State finalState = captureState();
  for (WeakReference<SinkTokenStream> ref : sinks) {
    final SinkTokenStream sink = ref.get();
    if (sink != null) {
      sink.setFinalState(finalState);
    }
  }
}
项目:lams    文件:TeeSinkTokenFilter.java   
@Override
public final boolean incrementToken() {
  // lazy init the iterator
  if (it == null) {
    it = cachedStates.iterator();
  }

  if (!it.hasNext()) {
    return false;
  }

  AttributeSource.State state = it.next();
  restoreState(state);
  return true;
}
项目:lams    文件:WikipediaTokenizer.java   
@Override
public final boolean incrementToken() throws IOException {
  if (tokens != null && tokens.hasNext()){
    AttributeSource.State state = tokens.next();
    restoreState(state);
    return true;
  }
  clearAttributes();
  int tokenType = scanner.getNextToken();

  if (tokenType == WikipediaTokenizerImpl.YYEOF) {
    return false;
  }
  String type = WikipediaTokenizerImpl.TOKEN_TYPES[tokenType];
  if (tokenOutput == TOKENS_ONLY || untokenizedTypes.contains(type) == false){
    setupToken();
  } else if (tokenOutput == UNTOKENIZED_ONLY && untokenizedTypes.contains(type) == true){
    collapseTokens(tokenType);

  }
  else if (tokenOutput == BOTH){
    //collapse into a single token, add it to tokens AND output the individual tokens
    //output the untokenized Token first
    collapseAndSaveTokens(tokenType, type);
  }
  int posinc = scanner.getPositionIncrement();
  if (first && posinc == 0) {
    posinc = 1; // don't emit posinc=0 for the first token!
  }
  posIncrAtt.setPositionIncrement(posinc);
  typeAtt.setType(type);
  first = false;
  return true;
}
项目:lams    文件:WikipediaTokenizer.java   
private void collapseAndSaveTokens(int tokenType, String type) throws IOException {
  //collapse
  StringBuilder buffer = new StringBuilder(32);
  int numAdded = scanner.setText(buffer);
  //TODO: how to know how much whitespace to add
  int theStart = scanner.yychar();
  int lastPos = theStart + numAdded;
  int tmpTokType;
  int numSeen = 0;
  List<AttributeSource.State> tmp = new ArrayList<>();
  setupSavedToken(0, type);
  tmp.add(captureState());
  //while we can get a token and that token is the same type and we have not transitioned to a new wiki-item of the same type
  while ((tmpTokType = scanner.getNextToken()) != WikipediaTokenizerImpl.YYEOF && tmpTokType == tokenType && scanner.getNumWikiTokensSeen() > numSeen){
    int currPos = scanner.yychar();
    //append whitespace
    for (int i = 0; i < (currPos - lastPos); i++){
      buffer.append(' ');
    }
    numAdded = scanner.setText(buffer);
    setupSavedToken(scanner.getPositionIncrement(), type);
    tmp.add(captureState());
    numSeen++;
    lastPos = currPos + numAdded;
  }
  //trim the buffer
  // TODO: this is inefficient
  String s = buffer.toString().trim();
  termAtt.setEmpty().append(s);
  offsetAtt.setOffset(correctOffset(theStart), correctOffset(theStart + s.length()));
  flagsAtt.setFlags(UNTOKENIZED_TOKEN_FLAG);
  //The way the loop is written, we will have proceeded to the next token.  We need to pushback the scanner to lastPos
  if (tmpTokType != WikipediaTokenizerImpl.YYEOF){
    scanner.yypushback(scanner.yylength());
  }
  tokens = tmp.iterator();
}
项目:lams    文件:WordDelimiterFilter.java   
@Override
protected void swap(int i, int j) {
  AttributeSource.State tmp = buffered[i];
  buffered[i] = buffered[j];
  buffered[j] = tmp;

  int tmp2 = startOff[i];
  startOff[i] = startOff[j];
  startOff[j] = tmp2;

  tmp2 = posInc[i];
  posInc[i] = posInc[j];
  posInc[j] = tmp2;
}
项目:Elasticsearch    文件:NumericTokenizer.java   
/** Make this tokenizer get attributes from the delegate token stream. */
private static final AttributeFactory delegatingAttributeFactory(final AttributeSource source) {
    return new AttributeFactory() {
        @Override
        public AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass) {
            return (AttributeImpl) source.addAttribute(attClass);
        }
    };
}
项目:fastcatsearch3    文件:KeywordTokenizer.java   
public KeywordTokenizer(AttributeSource source, Reader input, int bufferSize) {
  super(source, input);
  if (bufferSize <= 0) {
    throw new IllegalArgumentException("bufferSize must be > 0");
  }
  termAtt.resizeBuffer(bufferSize);
}
项目:elasticsearch-analysis-opennlp    文件:OpenNLPTokenFilter.java   
@Override
public final boolean incrementToken() throws IOException {
    clearAttributes();
    if (first) {
        String[] words = walkTokens();
        if (words.length == 0) {
            return false;
        }
        createTags(words);
        first = false;
        indexToken = 0;
    }
    if (indexToken == tokenAttrs.size()) {
        return false;
    }
    AttributeSource as = tokenAttrs.get(indexToken);
    Iterator<? extends Class<? extends Attribute>> it = as.getAttributeClassesIterator();
    while (it.hasNext()) {
        Class<? extends Attribute> attrClass = it.next();
        if (!hasAttribute(attrClass)) {
            addAttribute(attrClass);
        }
    }
    as.copyTo(this);
    indexToken++;
    return true;
}
项目:elasticsearch-analysis-opennlp    文件:OpenNLPTokenFilter.java   
private void appendPayloads(String[] tags, int length) {
    for (int i = 0; i < length; i++) {
        AttributeSource attrs = tokenAttrs.get(i);
        if (tags[i] != null) {
            try {
                PayloadAttribute payloadAtt = attrs.hasAttribute(PayloadAttribute.class) ? attrs.getAttribute(PayloadAttribute.class) : attrs.addAttribute(PayloadAttribute.class);
                BytesRef bytesRef = new BytesRef(tags[i].toUpperCase(Locale.getDefault()).getBytes("UTF-8"));
                payloadAtt.setPayload(bytesRef);
            } catch (UnsupportedEncodingException e) {
                throw new RuntimeException(e);
            }
        }
    }
}
项目:DoSeR-Disambiguation    文件:LearnToRankFuzzyQuery.java   
@Override
protected TermsEnum getTermsEnum(final Terms terms,
        final AttributeSource atts) throws IOException {
    if ((maxEdits == 0) || (prefixLength >= term.text().length())) { // can
                                                                        // only
        // match
        // if
        // it's
        // exact
        return new SingleTermsEnum(terms.iterator(null), term.bytes());
    }
    return new FuzzyTermsEnum(terms, atts, getTerm(), maxEdits,
            prefixLength, transpositions);
}
项目:moar    文件:MoarQuery.java   
@Override
protected TermsEnum getTermsEnum(
        Terms terms, AttributeSource atts) throws IOException {
    MoaMatcher matcher = this.moaPattern.matcher( "" );
    TermsEnum termsEnum = terms.iterator();
    return new MoarTermsEnum( matcher, termsEnum );
}
项目:search    文件:PulsingPostingsReader.java   
/** for a docsenum, gets the 'other' reused enum.
 * Example: Pulsing(Standard).
 * when doing a term range query you are switching back and forth
 * between Pulsing and Standard
 * 
 * The way the reuse works is that Pulsing.other = Standard and
 * Standard.other = Pulsing.
 */
private DocsEnum getOther(DocsEnum de) {
  if (de == null) {
    return null;
  } else {
    final AttributeSource atts = de.attributes();
    return atts.addAttribute(PulsingEnumAttribute.class).enums().get(this);
  }
}
项目:search    文件:TermsQuery.java   
@Override
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
  if (this.terms.size() == 0) {
    return TermsEnum.EMPTY;
  }

  return new SeekingTermSetTermsEnum(terms.iterator(null), this.terms, ords);
}