@Override public void build(TermFreqIterator tfit) throws IOException { if (tfit.getComparator() != null) { // make sure it's unsorted // WTF - this could result in yet another sorted iteration.... tfit = new UnsortedTermFreqIteratorWrapper(tfit); } trie = new JaspellTernarySearchTrie(); trie.setMatchAlmostDiff(editDistance); BytesRef spare; final CharsRef charsSpare = new CharsRef(); while ((spare = tfit.next()) != null) { final long weight = tfit.weight(); if (spare.length == 0) { continue; } charsSpare.grow(spare.length); UnicodeUtil.UTF8toUTF16(spare.bytes, spare.offset, spare.length, charsSpare); trie.put(charsSpare.toString(), Long.valueOf(weight)); } }
@Override public void build(TermFreqIterator iterator) throws IOException { BytesRef scratch = new BytesRef(); TermFreqIterator iter = new WFSTTermFreqIteratorWrapper(iterator); IntsRef scratchInts = new IntsRef(); BytesRef previous = null; PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true); Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs); while ((scratch = iter.next()) != null) { long cost = iter.weight(); if (previous == null) { previous = new BytesRef(); } else if (scratch.equals(previous)) { continue; // for duplicate suggestions, the best weight is actually // added } Util.toIntsRef(scratch, scratchInts); builder.add(scratchInts, cost); previous.copyBytes(scratch); } fst = builder.finish(); }
@Override public void build(TermFreqIterator tfit) throws IOException { root = new TernaryTreeNode(); // buffer first if (tfit.getComparator() != BytesRef.getUTF8SortedAsUTF16Comparator()) { // make sure it's sorted and the comparator uses UTF16 sort order tfit = new SortedTermFreqIteratorWrapper(tfit, BytesRef.getUTF8SortedAsUTF16Comparator()); } ArrayList<String> tokens = new ArrayList<String>(); ArrayList<Number> vals = new ArrayList<Number>(); BytesRef spare; CharsRef charsSpare = new CharsRef(); while ((spare = tfit.next()) != null) { charsSpare.grow(spare.length); UnicodeUtil.UTF8toUTF16(spare.bytes, spare.offset, spare.length, charsSpare); tokens.add(charsSpare.toString()); vals.add(Long.valueOf(tfit.weight())); } autocomplete.balancedTree(tokens.toArray(), vals.toArray(), 0, tokens.size() - 1, root); }
/** Creates a new iterator, buffering entries from the specified iterator */ public BufferingTermFreqIteratorWrapper(TermFreqIterator source) throws IOException { this.comp = source.getComparator(); BytesRef spare; int freqIndex = 0; while((spare = source.next()) != null) { entries.append(spare); if (freqIndex >= freqs.length) { freqs = ArrayUtil.grow(freqs, freqs.length+1); } freqs[freqIndex++] = source.weight(); } }
/** * Creates a new iterator, wrapping the specified iterator and * returning elements in a random order. */ public UnsortedTermFreqIteratorWrapper(TermFreqIterator source) throws IOException { super(source); ords = new int[entries.size()]; Random random = new Random(); for (int i = 0; i < ords.length; i++) { ords[i] = i; } for (int i = 0; i < ords.length; i++) { int randomPosition = random.nextInt(ords.length); int temp = ords[i]; ords[i] = ords[randomPosition]; ords[randomPosition] = temp; } }
public void testEmpty() throws Exception { TermFreqArrayIterator iterator = new TermFreqArrayIterator(new TermFreq[0]); TermFreqIterator wrapper = new SortedTermFreqIteratorWrapper(iterator, BytesRef.getUTF8SortedAsUnicodeComparator()); assertNull(wrapper.next()); wrapper = new UnsortedTermFreqIteratorWrapper(iterator); assertNull(wrapper.next()); }
/** * Creates a new sorted wrapper, sorting by BytesRef * (ascending) then cost (ascending). */ public SortedTermFreqIteratorWrapper(TermFreqIterator source, Comparator<BytesRef> comparator) throws IOException { this.source = source; this.comparator = comparator; this.reader = sort(); }
WFSTTermFreqIteratorWrapper(TermFreqIterator source) throws IOException { super(source); }
@Override public TermFreqIterator getWordsIterator() { return new FileIterator(); }
/** * Creates a new sorted wrapper, using {@link * BytesRef#getUTF8SortedAsUnicodeComparator} for * sorting. */ public SortedTermFreqIteratorWrapper(TermFreqIterator source) throws IOException { this(source, BytesRef.getUTF8SortedAsUnicodeComparator()); }
/** * Builds up a new internal {@link Lookup} representation based on the given {@link TermFreqIterator}. * The implementation might re-sort the data internally. */ public abstract void build(TermFreqIterator tfit) throws IOException;