Java 类org.apache.lucene.util.IntsRef 实例源码

项目:lams    文件:IntSequenceOutputs.java   
@Override
public IntsRef subtract(IntsRef output, IntsRef inc) {
  assert output != null;
  assert inc != null;
  if (inc == NO_OUTPUT) {
    // no prefix removed
    return output;
  } else if (inc.length == output.length) {
    // entire output removed
    return NO_OUTPUT;
  } else {
    assert inc.length < output.length: "inc.length=" + inc.length + " vs output.length=" + output.length;
    assert inc.length > 0;
    return new IntsRef(output.ints, output.offset + inc.length, output.length-inc.length);
  }
}
项目:lams    文件:IntSequenceOutputs.java   
@Override
public IntsRef add(IntsRef prefix, IntsRef output) {
  assert prefix != null;
  assert output != null;
  if (prefix == NO_OUTPUT) {
    return output;
  } else if (output == NO_OUTPUT) {
    return prefix;
  } else {
    assert prefix.length > 0;
    assert output.length > 0;
    IntsRef result = new IntsRef(prefix.length + output.length);
    System.arraycopy(prefix.ints, prefix.offset, result.ints, 0, prefix.length);
    System.arraycopy(output.ints, output.offset, result.ints, prefix.length, output.length);
    result.length = prefix.length + output.length;
    return result;
  }
}
项目:lams    文件:Util.java   
/** Looks up the output for this input, or null if the
 *  input is not accepted. */
public static<T> T get(FST<T> fst, IntsRef input) throws IOException {

  // TODO: would be nice not to alloc this on every lookup
  final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());

  final BytesReader fstReader = fst.getBytesReader();

  // Accumulate output as we go
  T output = fst.outputs.getNoOutput();
  for(int i=0;i<input.length;i++) {
    if (fst.findTargetArc(input.ints[input.offset + i], arc, arc, fstReader) == null) {
      return null;
    }
    output = fst.outputs.add(output, arc.output);
  }

  if (arc.isFinal()) {
    return fst.outputs.add(output, arc.nextFinalOutput);
  } else {
    return null;
  }
}
项目:search    文件:TestFSTsMisc.java   
private void testRandomWords(int maxNumWords, int numIter) throws IOException {
  Random random = new Random(random().nextLong());
  for(int iter=0;iter<numIter;iter++) {
    if (VERBOSE) {
      System.out.println("\nTEST: iter " + iter);
    }
    for(int inputMode=0;inputMode<2;inputMode++) {
      final int numWords = random.nextInt(maxNumWords+1);
      Set<IntsRef> termsSet = new HashSet<>();
      IntsRef[] terms = new IntsRef[numWords];
      while(termsSet.size() < numWords) {
        final String term = getRandomString(random);
        termsSet.add(toIntsRef(term, inputMode));
      }
      doTest(inputMode, termsSet.toArray(new IntsRef[termsSet.size()]));
    }
  }
}
项目:search    文件:TaxonomyFacetCounts.java   
private final void count(List<MatchingDocs> matchingDocs) throws IOException {
  IntsRef scratch  = new IntsRef();
  for(MatchingDocs hits : matchingDocs) {
    OrdinalsReader.OrdinalsSegmentReader ords = ordinalsReader.getReader(hits.context);
    DocIdSetIterator docs = hits.bits.iterator();

    int doc;
    while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
      ords.get(doc, scratch);
      for(int i=0;i<scratch.length;i++) {
        values[scratch.ints[scratch.offset+i]]++;
      }
    }
  }

  rollup();
}
项目:search    文件:DocValuesOrdinalsReader.java   
@Override
public OrdinalsSegmentReader getReader(AtomicReaderContext context) throws IOException {
  BinaryDocValues values0 = context.reader().getBinaryDocValues(field);
  if (values0 == null) {
    values0 = DocValues.emptyBinary();
  }

  final BinaryDocValues values = values0;

  return new OrdinalsSegmentReader() {
    @Override
    public void get(int docID, IntsRef ordinals) throws IOException {
      final BytesRef bytes = values.get(docID);
      decode(bytes, ordinals);
    }
  };
}
项目:search    文件:TestDictionary.java   
public void testSimpleDictionary() throws Exception {
  InputStream affixStream = getClass().getResourceAsStream("simple.aff");
  InputStream dictStream = getClass().getResourceAsStream("simple.dic");

  Dictionary dictionary = new Dictionary(affixStream, dictStream);
  assertEquals(3, dictionary.lookupSuffix(new char[]{'e'}, 0, 1).length);
  assertEquals(1, dictionary.lookupPrefix(new char[]{'s'}, 0, 1).length);
  IntsRef ordList = dictionary.lookupWord(new char[]{'o', 'l', 'r'}, 0, 3);
  assertNotNull(ordList);
  assertEquals(1, ordList.length);

  BytesRef ref = new BytesRef();
  dictionary.flagLookup.get(ordList.ints[0], ref);
  char flags[] = Dictionary.decodeFlags(ref);
  assertEquals(1, flags.length);

  ordList = dictionary.lookupWord(new char[]{'l', 'u', 'c', 'e', 'n'}, 0, 5);
  assertNotNull(ordList);
  assertEquals(1, ordList.length);
  dictionary.flagLookup.get(ordList.ints[0], ref);
  flags = Dictionary.decodeFlags(ref);
  assertEquals(1, flags.length);

  affixStream.close();
  dictStream.close();
}
项目:search    文件:TestDictionary.java   
public void testCompressedDictionary() throws Exception {
  InputStream affixStream = getClass().getResourceAsStream("compressed.aff");
  InputStream dictStream = getClass().getResourceAsStream("compressed.dic");

  Dictionary dictionary = new Dictionary(affixStream, dictStream);
  assertEquals(3, dictionary.lookupSuffix(new char[]{'e'}, 0, 1).length);
  assertEquals(1, dictionary.lookupPrefix(new char[]{'s'}, 0, 1).length);
  IntsRef ordList = dictionary.lookupWord(new char[]{'o', 'l', 'r'}, 0, 3);
  BytesRef ref = new BytesRef();
  dictionary.flagLookup.get(ordList.ints[0], ref);
  char flags[] = Dictionary.decodeFlags(ref);
  assertEquals(1, flags.length);

  affixStream.close();
  dictStream.close();
}
项目:search    文件:TestDictionary.java   
public void testCompressedBeforeSetDictionary() throws Exception {
  InputStream affixStream = getClass().getResourceAsStream("compressed-before-set.aff");
  InputStream dictStream = getClass().getResourceAsStream("compressed.dic");

  Dictionary dictionary = new Dictionary(affixStream, dictStream);
  assertEquals(3, dictionary.lookupSuffix(new char[]{'e'}, 0, 1).length);
  assertEquals(1, dictionary.lookupPrefix(new char[]{'s'}, 0, 1).length);
  IntsRef ordList = dictionary.lookupWord(new char[]{'o', 'l', 'r'}, 0, 3);
  BytesRef ref = new BytesRef();
  dictionary.flagLookup.get(ordList.ints[0], ref);
  char flags[] = Dictionary.decodeFlags(ref);
  assertEquals(1, flags.length);

  affixStream.close();
  dictStream.close();
}
项目:search    文件:TestDictionary.java   
public void testCompressedEmptyAliasDictionary() throws Exception {
  InputStream affixStream = getClass().getResourceAsStream("compressed-empty-alias.aff");
  InputStream dictStream = getClass().getResourceAsStream("compressed.dic");

  Dictionary dictionary = new Dictionary(affixStream, dictStream);
  assertEquals(3, dictionary.lookupSuffix(new char[]{'e'}, 0, 1).length);
  assertEquals(1, dictionary.lookupPrefix(new char[]{'s'}, 0, 1).length);
  IntsRef ordList = dictionary.lookupWord(new char[]{'o', 'l', 'r'}, 0, 3);
  BytesRef ref = new BytesRef();
  dictionary.flagLookup.get(ordList.ints[0], ref);
  char flags[] = Dictionary.decodeFlags(ref);
  assertEquals(1, flags.length);

  affixStream.close();
  dictStream.close();
}
项目:search    文件:IntSequenceOutputs.java   
@Override
public IntsRef subtract(IntsRef output, IntsRef inc) {
  assert output != null;
  assert inc != null;
  if (inc == NO_OUTPUT) {
    // no prefix removed
    return output;
  } else if (inc.length == output.length) {
    // entire output removed
    return NO_OUTPUT;
  } else {
    assert inc.length < output.length: "inc.length=" + inc.length + " vs output.length=" + output.length;
    assert inc.length > 0;
    return new IntsRef(output.ints, output.offset + inc.length, output.length-inc.length);
  }
}
项目:search    文件:IntSequenceOutputs.java   
@Override
public IntsRef add(IntsRef prefix, IntsRef output) {
  assert prefix != null;
  assert output != null;
  if (prefix == NO_OUTPUT) {
    return output;
  } else if (output == NO_OUTPUT) {
    return prefix;
  } else {
    assert prefix.length > 0;
    assert output.length > 0;
    IntsRef result = new IntsRef(prefix.length + output.length);
    System.arraycopy(prefix.ints, prefix.offset, result.ints, 0, prefix.length);
    System.arraycopy(output.ints, output.offset, result.ints, prefix.length, output.length);
    result.length = prefix.length + output.length;
    return result;
  }
}
项目:search    文件:Util.java   
/** Looks up the output for this input, or null if the
 *  input is not accepted. */
public static<T> T get(FST<T> fst, IntsRef input) throws IOException {

  // TODO: would be nice not to alloc this on every lookup
  final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());

  final BytesReader fstReader = fst.getBytesReader();

  // Accumulate output as we go
  T output = fst.outputs.getNoOutput();
  for(int i=0;i<input.length;i++) {
    if (fst.findTargetArc(input.ints[input.offset + i], arc, arc, fstReader) == null) {
      return null;
    }
    output = fst.outputs.add(output, arc.output);
  }

  if (arc.isFinal()) {
    return fst.outputs.add(output, arc.nextFinalOutput);
  } else {
    return null;
  }
}
项目:search    文件:TestFSTs.java   
private void testRandomWords(int maxNumWords, int numIter) throws IOException {
  Random random = new Random(random().nextLong());
  for(int iter=0;iter<numIter;iter++) {
    if (VERBOSE) {
      System.out.println("\nTEST: iter " + iter);
    }
    for(int inputMode=0;inputMode<2;inputMode++) {
      final int numWords = random.nextInt(maxNumWords+1);
      Set<IntsRef> termsSet = new HashSet<>();
      IntsRef[] terms = new IntsRef[numWords];
      while(termsSet.size() < numWords) {
        final String term = getRandomString(random);
        termsSet.add(toIntsRef(term, inputMode));
      }
      doTest(inputMode, termsSet.toArray(new IntsRef[termsSet.size()]));
    }
  }
}
项目:read-open-source-code    文件:Util.java   
/** Adds all leaving arcs, including 'finished' arc, if
 *  the node is final, from this node into the queue.  */
public void addStartPaths(FST.Arc<T> node, T startOutput, boolean allowEmptyString, IntsRef input) throws IOException {

  // De-dup NO_OUTPUT since it must be a singleton:
  if (startOutput.equals(fst.outputs.getNoOutput())) {
    startOutput = fst.outputs.getNoOutput();
  }

  FSTPath<T> path = new FSTPath<T>(startOutput, node, input);
  fst.readFirstTargetArc(node, path.arc, bytesReader);

  //System.out.println("add start paths");

  // Bootstrap: find the min starting arc
  while (true) {
    if (allowEmptyString || path.arc.label != FST.END_LABEL) {
      addIfCompetitive(path);
    }
    if (path.arc.isLast()) {
      break;
    }
    fst.readNextArc(path.arc, bytesReader);
  }
}
项目:NYBC    文件:TestFSTsMisc.java   
private void testRandomWords(int maxNumWords, int numIter) throws IOException {
  Random random = new Random(random().nextLong());
  for(int iter=0;iter<numIter;iter++) {
    if (VERBOSE) {
      System.out.println("\nTEST: iter " + iter);
    }
    for(int inputMode=0;inputMode<2;inputMode++) {
      final int numWords = random.nextInt(maxNumWords+1);
      Set<IntsRef> termsSet = new HashSet<IntsRef>();
      IntsRef[] terms = new IntsRef[numWords];
      while(termsSet.size() < numWords) {
        final String term = getRandomString(random);
        termsSet.add(toIntsRef(term, inputMode));
      }
      doTest(inputMode, termsSet.toArray(new IntsRef[termsSet.size()]));
    }
  }
}
项目:read-open-source-code    文件:TaxonomyFacetCounts.java   
private final void count(List<MatchingDocs> matchingDocs) throws IOException {
  IntsRef scratch  = new IntsRef();
  for(MatchingDocs hits : matchingDocs) {
    OrdinalsReader.OrdinalsSegmentReader ords = ordinalsReader.getReader(hits.context);
    DocIdSetIterator docs = hits.bits.iterator();

    int doc;
    while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
      ords.get(doc, scratch);
      for(int i=0;i<scratch.length;i++) {
        values[scratch.ints[scratch.offset+i]]++;
      }
    }
  }

  rollup();
}
项目:read-open-source-code    文件:FSTCompletionBuilder.java   
/**
 * Builds the final automaton from a list of entries.
 */
private FST<Object> buildAutomaton(BytesRefSorter sorter) throws IOException {
  // Build the automaton.
  final Outputs<Object> outputs = NoOutputs.getSingleton();
  final Object empty = outputs.getNoOutput();
  final Builder<Object> builder = new Builder<Object>(
      FST.INPUT_TYPE.BYTE1, 0, 0, true, true, 
      shareMaxTailLength, outputs, null, false, 
      PackedInts.DEFAULT, true, 15);

  BytesRef scratch = new BytesRef();
  BytesRef entry;
  final IntsRef scratchIntsRef = new IntsRef();
  int count = 0;
  BytesRefIterator iter = sorter.iterator();
  while((entry = iter.next()) != null) {
    count++;
    if (scratch.compareTo(entry) != 0) {
      builder.add(Util.toIntsRef(entry, scratchIntsRef), empty);
      scratch.copyBytes(entry);
    }
  }

  return count == 0 ? null : builder.finish();
}
项目:NYBC    文件:EightFlagsIntEncoder.java   
@Override
public void encode(IntsRef values, BytesRef buf) {
  buf.offset = buf.length = 0;
  int upto = values.offset + values.length;
  for (int i = values.offset; i < upto; i++) {
    int value = values.ints[i];
    if (value == 1) {
      indicator |= ENCODE_TABLE[ordinal];
    } else {
      encodeQueue.ints[encodeQueue.length++] = value - 2;
    }
    ++ordinal;

    // encode the chunk and the indicator
    if (ordinal == 8) {
      encodeChunk(buf);
    }
  }

  // encode remaining values
  if (ordinal != 0) {
    encodeChunk(buf);
  }
}
项目:read-open-source-code    文件:IntSequenceOutputs.java   
@Override
public IntsRef add(IntsRef prefix, IntsRef output) {
  assert prefix != null;
  assert output != null;
  if (prefix == NO_OUTPUT) {
    return output;
  } else if (output == NO_OUTPUT) {
    return prefix;
  } else {
    assert prefix.length > 0;
    assert output.length > 0;
    IntsRef result = new IntsRef(prefix.length + output.length);
    System.arraycopy(prefix.ints, prefix.offset, result.ints, 0, prefix.length);
    System.arraycopy(output.ints, output.offset, result.ints, prefix.length, output.length);
    result.length = prefix.length + output.length;
    return result;
  }
}
项目:NYBC    文件:EncodingTest.java   
@BeforeClass
public static void beforeClassEncodingTest() throws Exception {
  int capacity = atLeast(10000);
  data = new IntsRef(capacity);
  for (int i = 0; i < 10; i++) {
    data.ints[i] = i + 1; // small values
  }
  for (int i = 10; i < data.ints.length; i++) {
    data.ints[i] = random().nextInt(Integer.MAX_VALUE - 1) + 1; // some encoders don't allow 0
  }
  data.length = data.ints.length;

  uniqueSortedData = IntsRef.deepCopyOf(data);
  Arrays.sort(uniqueSortedData.ints);
  uniqueSortedData.length = 0;
  int prev = -1;
  for (int i = 0; i < uniqueSortedData.ints.length; i++) {
    if (uniqueSortedData.ints[i] != prev) {
      uniqueSortedData.ints[uniqueSortedData.length++] = uniqueSortedData.ints[i];
      prev = uniqueSortedData.ints[i];
    }
  }
}
项目:NYBC    文件:EncodingTest.java   
private static void encoderTest(IntEncoder encoder, IntsRef data, IntsRef expected) throws IOException {
  // ensure toString is implemented
  String toString = encoder.toString();
  assertFalse(toString.startsWith(encoder.getClass().getName() + "@"));
  IntDecoder decoder = encoder.createMatchingDecoder();
  toString = decoder.toString();
  assertFalse(toString.startsWith(decoder.getClass().getName() + "@"));

  BytesRef bytes = new BytesRef(100); // some initial capacity - encoders should grow the byte[]
  IntsRef values = new IntsRef(100); // some initial capacity - decoders should grow the int[]
  for (int i = 0; i < 2; i++) {
    // run 2 iterations to catch encoders/decoders which don't reset properly
    encoding(encoder, data, bytes);
    decoding(bytes, values, encoder.createMatchingDecoder());
    assertTrue(expected.intsEquals(values));
  }
}
项目:NYBC    文件:NormalizeCharMap.java   
/** Builds the NormalizeCharMap; call this once you
 *  are done calling {@link #add}. */
public NormalizeCharMap build() {

  final FST<CharsRef> map;
  try {
    final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
    final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<CharsRef>(FST.INPUT_TYPE.BYTE2, outputs);
    final IntsRef scratch = new IntsRef();
    for(Map.Entry<String,String> ent : pendingPairs.entrySet()) {
      builder.add(Util.toUTF16(ent.getKey(), scratch),
                  new CharsRef(ent.getValue()));
    }
    map = builder.finish();
    pendingPairs.clear();
  } catch (IOException ioe) {
    // Bogus FST IOExceptions!!  (will never happen)
    throw new RuntimeException(ioe);
  }

  return new NormalizeCharMap(map);
}
项目:NYBC    文件:TestFSTs.java   
public void testInternalFinalState() throws Exception {
  final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
  final boolean willRewrite = random().nextBoolean();
  final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, willRewrite, PackedInts.DEFAULT, true, 15);
  builder.add(Util.toIntsRef(new BytesRef("stat"), new IntsRef()), outputs.getNoOutput());
  builder.add(Util.toIntsRef(new BytesRef("station"), new IntsRef()), outputs.getNoOutput());
  final FST<Long> fst = builder.finish();
  StringWriter w = new StringWriter();
  //Writer w = new OutputStreamWriter(new FileOutputStream("/x/tmp/out.dot"));
  Util.toDot(fst, w, false, false);
  w.close();
  //System.out.println(w.toString());

  // check for accept state at label t
  assertTrue(w.toString().indexOf("[label=\"t\" style=\"bold\"") != -1);
  // check for accept state at label n
  assertTrue(w.toString().indexOf("[label=\"n\" style=\"bold\"") != -1);
}
项目:read-open-source-code    文件:NormalizeCharMap.java   
/** Builds the NormalizeCharMap; call this once you
 *  are done calling {@link #add}. */
public NormalizeCharMap build() {

  final FST<CharsRef> map;
  try {
    final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
    final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<CharsRef>(FST.INPUT_TYPE.BYTE2, outputs);
    final IntsRef scratch = new IntsRef();
    for(Map.Entry<String,String> ent : pendingPairs.entrySet()) {
      builder.add(Util.toUTF16(ent.getKey(), scratch),
                  new CharsRef(ent.getValue()));
    }
    map = builder.finish();
    pendingPairs.clear();
  } catch (IOException ioe) {
    // Bogus FST IOExceptions!!  (will never happen)
    throw new RuntimeException(ioe);
  }

  return new NormalizeCharMap(map);
}
项目:NYBC    文件:Lucene42DocValuesConsumer.java   
private void writeFST(FieldInfo field, Iterable<BytesRef> values) throws IOException {
  meta.writeVInt(field.number);
  meta.writeByte(FST);
  meta.writeLong(data.getFilePointer());
  PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
  Builder<Long> builder = new Builder<Long>(INPUT_TYPE.BYTE1, outputs);
  IntsRef scratch = new IntsRef();
  long ord = 0;
  for (BytesRef v : values) {
    builder.add(Util.toIntsRef(v, scratch), ord);
    ord++;
  }
  FST<Long> fst = builder.finish();
  if (fst != null) {
    fst.save(data);
  }
  meta.writeVLong(ord);
}
项目:NYBC    文件:IntSequenceOutputs.java   
@Override
public IntsRef subtract(IntsRef output, IntsRef inc) {
  assert output != null;
  assert inc != null;
  if (inc == NO_OUTPUT) {
    // no prefix removed
    return output;
  } else if (inc.length == output.length) {
    // entire output removed
    return NO_OUTPUT;
  } else {
    assert inc.length < output.length: "inc.length=" + inc.length + " vs output.length=" + output.length;
    assert inc.length > 0;
    return new IntsRef(output.ints, output.offset + inc.length, output.length-inc.length);
  }
}
项目:NYBC    文件:Util.java   
/** Looks up the output for this input, or null if the
 *  input is not accepted. */
public static<T> T get(FST<T> fst, IntsRef input) throws IOException {

  // TODO: would be nice not to alloc this on every lookup
  final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());

  final BytesReader fstReader = fst.getBytesReader();

  // Accumulate output as we go
  T output = fst.outputs.getNoOutput();
  for(int i=0;i<input.length;i++) {
    if (fst.findTargetArc(input.ints[input.offset + i], arc, arc, fstReader) == null) {
      return null;
    }
    output = fst.outputs.add(output, arc.output);
  }

  if (arc.isFinal()) {
    return fst.outputs.add(output, arc.nextFinalOutput);
  } else {
    return null;
  }
}
项目:read-open-source-code    文件:StemmerOverrideFilter.java   
/**
 * Returns an {@link StemmerOverrideMap} to be used with the {@link StemmerOverrideFilter}
 * @return an {@link StemmerOverrideMap} to be used with the {@link StemmerOverrideFilter}
 * @throws IOException if an {@link IOException} occurs;
 */
public StemmerOverrideMap build() throws IOException {
  ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
  org.apache.lucene.util.fst.Builder<BytesRef> builder = new org.apache.lucene.util.fst.Builder<BytesRef>(
      FST.INPUT_TYPE.BYTE4, outputs);
  final int[] sort = hash.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
  IntsRef intsSpare = new IntsRef();
  final int size = hash.size();
  for (int i = 0; i < size; i++) {
    int id = sort[i];
    BytesRef bytesRef = hash.get(id, spare);
    UnicodeUtil.UTF8toUTF32(bytesRef, intsSpare);
    builder.add(intsSpare, new BytesRef(outputValues.get(id)));
  }
  return new StemmerOverrideMap(builder.finish(), ignoreCase);
}
项目:elasticsearch_my    文件:XAnalyzingSuggester.java   
public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException {
    final TokenStreamToAutomaton ts2a = getTokenStreamToAutomaton();
    Automaton automaton;
    try (TokenStream ts = stream) {
        automaton = toAutomaton(ts, ts2a);
    }
    LimitedFiniteStringsIterator finiteStrings =
            new LimitedFiniteStringsIterator(automaton, maxGraphExpansions);
    Set<IntsRef> set = new HashSet<>();
    for (IntsRef string = finiteStrings.next(); string != null; string = finiteStrings.next()) {
        set.add(IntsRef.deepCopyOf(string));
    }
    return Collections.unmodifiableSet(set);
}
项目:lams    文件:Lucene42DocValuesProducer.java   
@Override
public void seekExact(long ord) throws IOException {
  // TODO: would be better to make this simpler and faster.
  // but we dont want to introduce a bug that corrupts our enum state!
  bytesReader.setPosition(0);
  fst.getFirstArc(firstArc);
  IntsRef output = Util.getByOutput(fst, ord, bytesReader, firstArc, scratchArc, scratchInts);
  BytesRefBuilder scratchBytes = new BytesRefBuilder();
  scratchBytes.clear();
  Util.toBytesRef(output, scratchBytes);
  // TODO: we could do this lazily, better to try to push into FSTEnum though?
  in.seekExact(scratchBytes.get());
}
项目:lams    文件:Stemmer.java   
private List<CharsRef> doStem(char word[], int length, boolean caseVariant) {
  List<CharsRef> stems = new ArrayList<>();
  IntsRef forms = dictionary.lookupWord(word, 0, length);
  if (forms != null) {
    for (int i = 0; i < forms.length; i += formStep) {
      boolean checkKeepCase = caseVariant && dictionary.keepcase != -1;
      boolean checkNeedAffix = dictionary.needaffix != -1;
      boolean checkOnlyInCompound = dictionary.onlyincompound != -1;
      if (checkKeepCase || checkNeedAffix || checkOnlyInCompound) {
        dictionary.flagLookup.get(forms.ints[forms.offset+i], scratch);
        char wordFlags[] = Dictionary.decodeFlags(scratch);
        // we are looking for a case variant, but this word does not allow it
        if (checkKeepCase && Dictionary.hasFlag(wordFlags, (char)dictionary.keepcase)) {
          continue;
        }
        // we can't add this form, its a pseudostem requiring an affix
        if (checkNeedAffix && Dictionary.hasFlag(wordFlags, (char)dictionary.needaffix)) {
          continue;
        }
        // we can't add this form, it only belongs inside a compound word
        if (checkOnlyInCompound && Dictionary.hasFlag(wordFlags, (char)dictionary.onlyincompound)) {
          continue;
        }
      }
      stems.add(newStem(word, length, forms, i));
    }
  }
  try {
    boolean v = stems.addAll(stem(word, length, -1, -1, -1, 0, true, true, false, false, caseVariant));
  } catch (IOException bogus) {
    throw new RuntimeException(bogus);
  }
  return stems;
}
项目:lams    文件:Stemmer.java   
private CharsRef newStem(char buffer[], int length, IntsRef forms, int formID) {
  final String exception;
  if (dictionary.hasStemExceptions) {
    int exceptionID = forms.ints[forms.offset + formID + 1];
    if (exceptionID > 0) {
      exception = dictionary.getStemException(exceptionID);
    } else {
      exception = null;
    }
  } else {
    exception = null;
  }

  if (dictionary.needsOutputCleaning) {
    scratchSegment.setLength(0);
    if (exception != null) {
      scratchSegment.append(exception);
    } else {
      scratchSegment.append(buffer, 0, length);
    }
    try {
      Dictionary.applyMappings(dictionary.oconv, scratchSegment);
    } catch (IOException bogus) {
      throw new RuntimeException(bogus);
    }
    char cleaned[] = new char[scratchSegment.length()];
    scratchSegment.getChars(0, cleaned.length, cleaned, 0);
    return new CharsRef(cleaned, 0, cleaned.length);
  } else {
    if (exception != null) {
      return new CharsRef(exception);
    } else {
      return new CharsRef(buffer, 0, length);
    }
  }
}
项目:lams    文件:Dictionary.java   
IntsRef lookup(FST<IntsRef> fst, char word[], int offset, int length) {
  if (fst == null) {
    return null;
  }
  final FST.BytesReader bytesReader = fst.getBytesReader();
  final FST.Arc<IntsRef> arc = fst.getFirstArc(new FST.Arc<IntsRef>());
  // Accumulate output as we go
  final IntsRef NO_OUTPUT = fst.outputs.getNoOutput();
  IntsRef output = NO_OUTPUT;

  int l = offset + length;
  try {
    for (int i = offset, cp = 0; i < l; i += Character.charCount(cp)) {
      cp = Character.codePointAt(word, i, l);
      if (fst.findTargetArc(cp, arc, arc, bytesReader) == null) {
        return null;
      } else if (arc.output != NO_OUTPUT) {
        output = fst.outputs.add(output, arc.output);
      }
    }
    if (fst.findTargetArc(FST.END_LABEL, arc, arc, bytesReader) == null) {
      return null;
    } else if (arc.output != NO_OUTPUT) {
      return fst.outputs.add(output, arc.output);
    } else {
      return output;
    }
  } catch (IOException bogus) {
    throw new RuntimeException(bogus);
  }
}
项目:lams    文件:Dictionary.java   
private FST<IntsRef> affixFST(TreeMap<String,List<Integer>> affixes) throws IOException {
  IntSequenceOutputs outputs = IntSequenceOutputs.getSingleton();
  Builder<IntsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE4, outputs);
  IntsRefBuilder scratch = new IntsRefBuilder();
  for (Map.Entry<String,List<Integer>> entry : affixes.entrySet()) {
    Util.toUTF32(entry.getKey(), scratch);
    List<Integer> entries = entry.getValue();
    IntsRef output = new IntsRef(entries.size());
    for (Integer c : entries) {
      output.ints[output.length++] = c;
    }
    builder.add(scratch.get(), output);
  }
  return builder.finish();
}
项目:lams    文件:Operations.java   
/**
 * Returns true if the given string (expressed as unicode codepoints) is accepted by the automaton.  The input must be deterministic.
 * <p>
 * Complexity: linear in the length of the string.
 * <p>
 * <b>Note:</b> for full performance, use the {@link RunAutomaton} class.
 */
public static boolean run(Automaton a, IntsRef s) {
  assert a.isDeterministic();
  int state = 0;
  for (int i=0;i<s.length;i++) {
    int nextState = a.step(state, s.ints[s.offset+i]);
    if (nextState == -1) {
      return false;
    }
    state = nextState;
  }
  return a.isAccept(state);
}
项目:lams    文件:IntSequenceOutputs.java   
@Override
public IntsRef common(IntsRef output1, IntsRef output2) {
  assert output1 != null;
  assert output2 != null;

  int pos1 = output1.offset;
  int pos2 = output2.offset;
  int stopAt1 = pos1 + Math.min(output1.length, output2.length);
  while(pos1 < stopAt1) {
    if (output1.ints[pos1] != output2.ints[pos2]) {
      break;
    }
    pos1++;
    pos2++;
  }

  if (pos1 == output1.offset) {
    // no common prefix
    return NO_OUTPUT;
  } else if (pos1 == output1.offset + output1.length) {
    // output1 is a prefix of output2
    return output1;
  } else if (pos2 == output2.offset + output2.length) {
    // output2 is a prefix of output1
    return output2;
  } else {
    return new IntsRef(output1.ints, output1.offset, pos1-output1.offset);
  }
}
项目:lams    文件:IntSequenceOutputs.java   
@Override
public void write(IntsRef prefix, DataOutput out) throws IOException {
  assert prefix != null;
  out.writeVInt(prefix.length);
  for(int idx=0;idx<prefix.length;idx++) {
    out.writeVInt(prefix.ints[prefix.offset+idx]);
  }
}
项目:lams    文件:IntSequenceOutputs.java   
@Override
public IntsRef read(DataInput in) throws IOException {
  final int len = in.readVInt();
  if (len == 0) {
    return NO_OUTPUT;
  } else {
    final IntsRef output = new IntsRef(len);
    for(int idx=0;idx<len;idx++) {
      output.ints[idx] = in.readVInt();
    }
    output.length = len;
    return output;
  }
}
项目:lams    文件:Util.java   
/** Just maps each UTF16 unit (char) to the ints in an
 *  IntsRef. */
public static IntsRef toUTF16(CharSequence s, IntsRefBuilder scratch) {
  final int charLimit = s.length();
  scratch.setLength(charLimit);
  scratch.grow(charLimit);
  for (int idx = 0; idx < charLimit; idx++) {
    scratch.setIntAt(idx, (int) s.charAt(idx));
  }
  return scratch.get();
}