public void testMaxPosition3WithSynomyms() throws IOException { for (final boolean consumeAll : new boolean[]{true, false}) { MockTokenizer tokenizer = new MockTokenizer(new StringReader("one two three four five"), MockTokenizer.WHITESPACE, false); // if we are consuming all tokens, we can use the checks, otherwise we can't tokenizer.setEnableChecks(consumeAll); SynonymMap.Builder builder = new SynonymMap.Builder(true); builder.add(new CharsRef("one"), new CharsRef("first"), true); builder.add(new CharsRef("one"), new CharsRef("alpha"), true); builder.add(new CharsRef("one"), new CharsRef("beguine"), true); CharsRefBuilder multiWordCharsRef = new CharsRefBuilder(); SynonymMap.Builder.join(new String[]{"and", "indubitably", "single", "only"}, multiWordCharsRef); builder.add(new CharsRef("one"), multiWordCharsRef.get(), true); SynonymMap.Builder.join(new String[]{"dopple", "ganger"}, multiWordCharsRef); builder.add(new CharsRef("two"), multiWordCharsRef.get(), true); SynonymMap synonymMap = builder.build(); TokenStream stream = new SynonymFilter(tokenizer, synonymMap, true); stream = new LimitTokenPositionFilter(stream, 3, consumeAll); // "only", the 4th word of multi-word synonym "and indubitably single only" is not emitted, since its position is greater than 3. assertTokenStreamContents(stream, new String[]{"one", "first", "alpha", "beguine", "and", "two", "indubitably", "dopple", "three", "single", "ganger"}, new int[]{1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0}); } }
private LookupResult getLookupResult(Long output1, BytesRef output2, CharsRefBuilder spare) { LookupResult result; if (hasPayloads) { int sepIndex = -1; for(int i=0;i<output2.length;i++) { if (output2.bytes[output2.offset+i] == payloadSep) { sepIndex = i; break; } } assert sepIndex != -1; final int payloadLen = output2.length - sepIndex - 1; spare.copyUTF8Bytes(output2.bytes, output2.offset, sepIndex); BytesRef payload = new BytesRef(payloadLen); System.arraycopy(output2.bytes, sepIndex+1, payload.bytes, 0, payloadLen); payload.length = payloadLen; result = new LookupResult(spare.toString(), decodeWeight(output1), payload); } else { spare.copyUTF8Bytes(output2); result = new LookupResult(spare.toString(), decodeWeight(output1)); } return result; }
/** * Wraps a {@link CompletionQuery} with context queries * * @param query base completion query to wrap * @param queryContexts a map of context mapping name and collected query contexts * @return a context-enabled query */ public ContextQuery toContextQuery(CompletionQuery query, Map<String, List<ContextMapping.InternalQueryContext>> queryContexts) { ContextQuery typedContextQuery = new ContextQuery(query); if (queryContexts.isEmpty() == false) { CharsRefBuilder scratch = new CharsRefBuilder(); scratch.grow(1); for (int typeId = 0; typeId < contextMappings.size(); typeId++) { scratch.setCharAt(0, (char) typeId); scratch.setLength(1); ContextMapping mapping = contextMappings.get(typeId); List<ContextMapping.InternalQueryContext> internalQueryContext = queryContexts.get(mapping.name()); if (internalQueryContext != null) { for (ContextMapping.InternalQueryContext context : internalQueryContext) { scratch.append(context.context); typedContextQuery.addContext(scratch.toCharsRef(), context.boost, !context.isPrefix); scratch.setLength(1); } } } } return typedContextQuery; }
@Override public TermSuggestion innerExecute(String name, TermSuggestionContext suggestion, IndexSearcher searcher, CharsRefBuilder spare) throws IOException { DirectSpellChecker directSpellChecker = suggestion.getDirectSpellCheckerSettings().createDirectSpellChecker(); final IndexReader indexReader = searcher.getIndexReader(); TermSuggestion response = new TermSuggestion( name, suggestion.getSize(), suggestion.getDirectSpellCheckerSettings().sort() ); List<Token> tokens = queryTerms(suggestion, spare); for (Token token : tokens) { // TODO: Extend DirectSpellChecker in 4.1, to get the raw suggested words as BytesRef SuggestWord[] suggestedWords = directSpellChecker.suggestSimilar( token.term, suggestion.getShardSize(), indexReader, suggestion.getDirectSpellCheckerSettings().suggestMode() ); Text key = new Text(new BytesArray(token.term.bytes())); TermSuggestion.Entry resultEntry = new TermSuggestion.Entry(key, token.startOffset, token.endOffset - token.startOffset); for (SuggestWord suggestWord : suggestedWords) { Text word = new Text(suggestWord.string); resultEntry.addOption(new TermSuggestion.Entry.Option(word, suggestWord.freq, suggestWord.score)); } response.addTerm(resultEntry); } return response; }
private void buildTerm(XContentBuilder builder, final CharsRefBuilder spare, Terms curTerms, TermsEnum termIter, BoostAttribute boostAtt) throws IOException { // start term, optimized writing BytesRef term = termIter.next(); spare.copyUTF8Bytes(term); builder.startObject(spare.toString()); buildTermStatistics(builder, termIter); // finally write the term vectors PostingsEnum posEnum = termIter.postings(null, PostingsEnum.ALL); int termFreq = posEnum.freq(); builder.field(FieldStrings.TERM_FREQ, termFreq); initMemory(curTerms, termFreq); initValues(curTerms, posEnum, termFreq); buildValues(builder, curTerms, termFreq); buildScore(builder, boostAtt); builder.endObject(); }
@Override public Suggest.Suggestion<? extends Suggest.Suggestion.Entry<? extends Suggest.Suggestion.Entry.Option>> innerExecute(String name, CustomSuggestionsContext suggestion, IndexSearcher searcher, CharsRefBuilder spare) throws IOException { // Get the suggestion context String text = suggestion.getText().utf8ToString(); // create two suggestions with 12 and 123 appended Suggest.Suggestion<Suggest.Suggestion.Entry<Suggest.Suggestion.Entry.Option>> response = new Suggest.Suggestion<>(name, suggestion.getSize()); String firstSuggestion = String.format(Locale.ROOT, "%s-%s-%s-%s", text, suggestion.getField(), suggestion.options.get("suffix"), "12"); Suggest.Suggestion.Entry<Suggest.Suggestion.Entry.Option> resultEntry12 = new Suggest.Suggestion.Entry<>(new Text(firstSuggestion), 0, text.length() + 2); response.addTerm(resultEntry12); String secondSuggestion = String.format(Locale.ROOT, "%s-%s-%s-%s", text, suggestion.getField(), suggestion.options.get("suffix"), "123"); Suggest.Suggestion.Entry<Suggest.Suggestion.Entry.Option> resultEntry123 = new Suggest.Suggestion.Entry<>(new Text(secondSuggestion), 0, text.length() + 3); response.addTerm(resultEntry123); return response; }
/** Sugar: just joins the provided terms with {@link * SynonymMap#WORD_SEPARATOR}. reuse and its chars * must not be null. */ public static CharsRef join(String[] words, CharsRefBuilder reuse) { int upto = 0; char[] buffer = reuse.chars(); for (String word : words) { final int wordLen = word.length(); final int needed = (0 == upto ? wordLen : 1 + upto + wordLen); // Add 1 for WORD_SEPARATOR if (needed > buffer.length) { reuse.grow(needed); buffer = reuse.chars(); } if (upto > 0) { buffer[upto++] = SynonymMap.WORD_SEPARATOR; } word.getChars(0, wordLen, buffer, upto); upto += wordLen; } reuse.setLength(upto); return reuse.get(); }
/** * Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for the * given selection of fields from terms with a document frequency greater than * the given maxDocFreq * * @param delegate Analyzer whose TokenStream will be filtered * @param indexReader IndexReader to identify the stopwords from * @param fields Selection of fields to calculate stopwords for * @param maxDocFreq Document frequency terms should be above in order to be stopwords * @throws IOException Can be thrown while reading from the IndexReader */ public QueryAutoStopWordAnalyzer( Analyzer delegate, IndexReader indexReader, Collection<String> fields, int maxDocFreq) throws IOException { super(delegate.getReuseStrategy()); this.delegate = delegate; for (String field : fields) { Set<String> stopWords = new HashSet<>(); Terms terms = MultiFields.getTerms(indexReader, field); CharsRefBuilder spare = new CharsRefBuilder(); if (terms != null) { TermsEnum te = terms.iterator(null); BytesRef text; while ((text = te.next()) != null) { if (te.docFreq() > maxDocFreq) { spare.copyUTF8Bytes(text); stopWords.add(spare.toString()); } } } stopWordsPerField.put(field, stopWords); } }
@Override public TermSuggestion innerExecute(String name, TermSuggestionContext suggestion, IndexSearcher searcher, CharsRefBuilder spare) throws IOException { DirectSpellChecker directSpellChecker = SuggestUtils.getDirectSpellChecker(suggestion.getDirectSpellCheckerSettings()); final IndexReader indexReader = searcher.getIndexReader(); TermSuggestion response = new TermSuggestion( name, suggestion.getSize(), suggestion.getDirectSpellCheckerSettings().sort() ); List<Token> tokens = queryTerms(suggestion, spare); for (Token token : tokens) { // TODO: Extend DirectSpellChecker in 4.1, to get the raw suggested words as BytesRef SuggestWord[] suggestedWords = directSpellChecker.suggestSimilar( token.term, suggestion.getShardSize(), indexReader, suggestion.getDirectSpellCheckerSettings().suggestMode() ); Text key = new Text(new BytesArray(token.term.bytes())); TermSuggestion.Entry resultEntry = new TermSuggestion.Entry(key, token.startOffset, token.endOffset - token.startOffset); for (SuggestWord suggestWord : suggestedWords) { Text word = new Text(suggestWord.string); resultEntry.addOption(new TermSuggestion.Entry.Option(word, suggestWord.freq, suggestWord.score)); } response.addTerm(resultEntry); } return response; }
public LocalEnv(int offset, int limit, int startTermIndex, int adjust, int targetIdx, int nTerms, Predicate<BytesRef> termFilter, int mincount, int[] counts, CharsRefBuilder charsRef, boolean extend, SortedSetDocValues si, SolrIndexSearcher searcher, List<Entry<LeafReader, Bits>> leaves, String fieldName, T ft, NamedList res) { super(offset, limit, targetIdx, mincount, fieldName, ft, res); if (startTermIndex == -1) { // weird case where missing is counted at counts[0]. this.startTermOrd = 0; this.endTermOrd = nTerms - 1; } else if (startTermIndex >= 0) { this.startTermOrd = startTermIndex; this.endTermOrd = startTermIndex + nTerms; } else { throw new IllegalStateException(); } this.startTermIndex = startTermIndex; this.adjust = adjust; this.nTerms = nTerms; this.termFilter = termFilter; this.counts = counts; this.charsRef = charsRef; this.extend = extend; this.si = si; this.searcher = searcher; this.leaves = leaves; }
/** * Adds terms and frequencies found in vector into the Map termFreqMap * * @param termFreqMap * a Map of terms and their frequencies * @param vector * List of terms and their frequencies for a doc/field */ private void addTermFrequencies(Map<String, Int> termFreqMap, Terms vector) throws IOException { final TermsEnum termsEnum = vector.iterator(); final CharsRefBuilder spare = new CharsRefBuilder(); BytesRef text; while ((text = termsEnum.next()) != null) { spare.copyUTF8Bytes(text); final String term = spare.toString(); if (isNoiseWord(term)) { continue; } final int freq = (int) termsEnum.totalTermFreq(); // increment frequency Int cnt = termFreqMap.get(term); if (cnt == null) { cnt = new Int(); termFreqMap.put(term, cnt); cnt.x = freq; } else { cnt.x += freq; } } }
protected static void getTerms(AllTermsShardRequest request, List<String> terms, List<LeafReaderContext> leaves) { List<TermsEnum> termIters = getTermsEnums(request, leaves); CharsRefBuilder spare = new CharsRefBuilder(); BytesRef lastTerm = null; int[] exhausted = new int[termIters.size()]; for (int i = 0; i < exhausted.length; i++) { exhausted[i] = 0; } try { lastTerm = findSmallestTermAfter(request, termIters, lastTerm, exhausted); if (lastTerm == null) { return; } findNMoreTerms(request, terms, termIters, spare, lastTerm, exhausted); } catch (IOException e) { } }
protected static void findNMoreTerms(AllTermsShardRequest request, List<String> terms, List<TermsEnum> termIters, CharsRefBuilder spare, BytesRef lastTerm, int[] exhausted) { if (getDocFreq(termIters, lastTerm, exhausted) >= request.minDocFreq()) { spare.copyUTF8Bytes(lastTerm); terms.add(spare.toString()); } BytesRef bytesRef = new BytesRef(lastTerm.utf8ToString()); lastTerm = bytesRef; while (terms.size() < request.size() && lastTerm != null) { moveIterators(exhausted, termIters, lastTerm); lastTerm = findMinimum(exhausted, termIters); if (lastTerm != null) { if (getDocFreq(termIters, lastTerm, exhausted) >= request.minDocFreq()) { spare.copyUTF8Bytes(lastTerm); terms.add(spare.toString()); } } } }
/** * Adds terms and frequencies found in vector into the Map termFreqMap * * @param termFreqMap a Map of terms and their frequencies * @param vector List of terms and their frequencies for a doc/field */ private void addTermFrequencies(Map<String, Int> termFreqMap, Terms vector) throws IOException { final TermsEnum termsEnum = vector.iterator(null); final CharsRefBuilder spare = new CharsRefBuilder(); BytesRef text; while((text = termsEnum.next()) != null) { spare.copyUTF8Bytes(text); final String term = spare.toString(); if (isNoiseWord(term)) { continue; } final int freq = (int) termsEnum.totalTermFreq(); // increment frequency Int cnt = termFreqMap.get(term); if (cnt == null) { cnt = new Int(); termFreqMap.put(term, cnt); cnt.x = freq; } else { cnt.x += freq; } } }
private LookupResult getLookupResult(Long output1, BytesRef output2, CharsRefBuilder spare) { LookupResult result; if (hasPayloads) { int sepIndex = -1; for(int i=0;i<output2.length;i++) { if (output2.bytes[output2.offset+i] == PAYLOAD_SEP) { sepIndex = i; break; } } assert sepIndex != -1; spare.grow(sepIndex); final int payloadLen = output2.length - sepIndex - 1; spare.copyUTF8Bytes(output2.bytes, output2.offset, sepIndex); BytesRef payload = new BytesRef(payloadLen); System.arraycopy(output2.bytes, sepIndex+1, payload.bytes, 0, payloadLen); payload.length = payloadLen; result = new LookupResult(spare.toString(), decodeWeight(output1), payload); } else { spare.grow(output2.length); spare.copyUTF8Bytes(output2); result = new LookupResult(spare.toString(), decodeWeight(output1)); } return result; }
@Override public List<LookupResult> lookup(CharSequence key, Set<BytesRef> contexts, boolean higherWeightsFirst, int num) { if (contexts != null) { throw new IllegalArgumentException("this suggester doesn't support contexts"); } final List<Completion> completions; if (higherWeightsFirst) { completions = higherWeightsCompletion.lookup(key, num); } else { completions = normalCompletion.lookup(key, num); } final ArrayList<LookupResult> results = new ArrayList<>(completions.size()); CharsRefBuilder spare = new CharsRefBuilder(); for (Completion c : completions) { spare.copyUTF8Bytes(c.utf8); results.add(new LookupResult(spare.toString(), c.bucket)); } return results; }
public void testRandomUnicodeStrings() throws Throwable { char[] buffer = new char[20]; char[] expected = new char[20]; CharsRefBuilder utf16 = new CharsRefBuilder(); int num = atLeast(100000); for (int iter = 0; iter < num; iter++) { boolean hasIllegal = fillUnicode(buffer, expected, 0, 20); BytesRef utf8 = new BytesRef(CharBuffer.wrap(buffer, 0, 20)); if (!hasIllegal) { byte[] b = new String(buffer, 0, 20).getBytes(StandardCharsets.UTF_8); assertEquals(b.length, utf8.length); for(int i=0;i<b.length;i++) assertEquals(b[i], utf8.bytes[i]); } utf16.copyUTF8Bytes(utf8.bytes, 0, utf8.length); assertEquals(utf16.length(), 20); for(int i=0;i<20;i++) assertEquals(expected[i], utf16.charAt(i)); } }
private CharsRef analyze(Analyzer analyzer, String text) throws IOException { CharsRefBuilder charsRefBuilder = new CharsRefBuilder(); try (TokenStream ts = analyzer.tokenStream("", text)) { CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class); ts.reset(); while (ts.incrementToken()) { int length = termAtt.length(); if (length == 0) { throw new IllegalArgumentException("term: " + text + " analyzed to a zero-length token"); } charsRefBuilder.grow(charsRefBuilder.length() + length + 1); /* current + word + separator */ if (charsRefBuilder.length() > 0) { charsRefBuilder.append(CcWordSet.WORD_SEPARATOR); } charsRefBuilder.append(termAtt); } ts.end(); } if (charsRefBuilder.length() == 0) { return null; } charsRefBuilder.append(CcWordSet.WORD_END); return charsRefBuilder.get(); }
@Override protected Suggest.Suggestion<? extends Suggest.Suggestion.Entry<? extends Suggest.Suggestion.Entry.Option>> innerExecute(String name, final CompletionSuggestionContext suggestionContext, final IndexSearcher searcher, CharsRefBuilder spare) throws IOException { if (suggestionContext.getFieldType() != null) { final CompletionFieldMapper.CompletionFieldType fieldType = suggestionContext.getFieldType(); CompletionSuggestion completionSuggestion = new CompletionSuggestion(name, suggestionContext.getSize()); spare.copyUTF8Bytes(suggestionContext.getText()); CompletionSuggestion.Entry completionSuggestEntry = new CompletionSuggestion.Entry( new Text(spare.toString()), 0, spare.length()); completionSuggestion.addTerm(completionSuggestEntry); TopSuggestDocsCollector collector = new TopDocumentsCollector(suggestionContext.getSize()); suggest(searcher, suggestionContext.toQuery(), collector); int numResult = 0; for (TopSuggestDocs.SuggestScoreDoc suggestScoreDoc : collector.get().scoreLookupDocs()) { TopDocumentsCollector.SuggestDoc suggestDoc = (TopDocumentsCollector.SuggestDoc) suggestScoreDoc; // collect contexts Map<String, Set<CharSequence>> contexts = Collections.emptyMap(); if (fieldType.hasContextMappings() && suggestDoc.getContexts().isEmpty() == false) { contexts = fieldType.getContextMappings().getNamedContexts(suggestDoc.getContexts()); } if (numResult++ < suggestionContext.getSize()) { CompletionSuggestion.Entry.Option option = new CompletionSuggestion.Entry.Option(suggestDoc.doc, new Text(suggestDoc.key.toString()), suggestDoc.score, contexts); completionSuggestEntry.addOption(option); } else { break; } } return completionSuggestion; } return null; }
private static List<Token> queryTerms(SuggestionContext suggestion, CharsRefBuilder spare) throws IOException { final List<Token> result = new ArrayList<>(); final String field = suggestion.getField(); DirectCandidateGenerator.analyze(suggestion.getAnalyzer(), suggestion.getText(), field, new DirectCandidateGenerator.TokenConsumer() { @Override public void nextToken() { Term term = new Term(field, BytesRef.deepCopyOf(fillBytesRef(new BytesRefBuilder()))); result.add(new Token(term, offsetAttr.startOffset(), offsetAttr.endOffset())); } }, spare); return result; }
protected BytesRef preFilter(final BytesRef term, final CharsRefBuilder spare, final BytesRefBuilder byteSpare) throws IOException { if (preFilter == null) { return term; } final BytesRefBuilder result = byteSpare; analyze(preFilter, term, field, new TokenConsumer() { @Override public void nextToken() throws IOException { this.fillBytesRef(result); } }, spare); return result.get(); }
public static int analyze(Analyzer analyzer, BytesRef toAnalyze, String field, TokenConsumer consumer, CharsRefBuilder spare) throws IOException { spare.copyUTF8Bytes(toAnalyze); CharsRef charsRef = spare.get(); try (TokenStream ts = analyzer.tokenStream( field, new FastCharArrayReader(charsRef.chars, charsRef.offset, charsRef.length))) { return analyze(ts, consumer); } }
public Suggest.Suggestion<? extends Suggest.Suggestion.Entry<? extends Suggest.Suggestion.Entry.Option>> execute(String name, T suggestion, IndexSearcher searcher, CharsRefBuilder spare) throws IOException { // #3469 We want to ignore empty shards if (searcher.getIndexReader().numDocs() == 0) { return null; } return innerExecute(name, suggestion, searcher, spare); }
/** * Adds terms and frequencies found in vector into the Map termFreqMap * * @param termFreqMap a Map of terms and their frequencies * @param vector List of terms and their frequencies for a doc/field * @param fieldName Optional field name of the terms for skip terms */ private void addTermFrequencies(Map<String, Int> termFreqMap, Terms vector, @Nullable String fieldName) throws IOException { final TermsEnum termsEnum = vector.iterator(); final CharsRefBuilder spare = new CharsRefBuilder(); BytesRef text; while((text = termsEnum.next()) != null) { spare.copyUTF8Bytes(text); final String term = spare.toString(); if (isNoiseWord(term)) { continue; } if (isSkipTerm(fieldName, term)) { continue; } final PostingsEnum docs = termsEnum.postings(null); int freq = 0; while(docs != null && docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { freq += docs.freq(); } // increment frequency Int cnt = termFreqMap.get(term); if (cnt == null) { cnt = new Int(); termFreqMap.put(term, cnt); cnt.x = freq; } else { cnt.x += freq; } } }
private void buildField(XContentBuilder builder, final CharsRefBuilder spare, Fields theFields, Iterator<String> fieldIter) throws IOException { String fieldName = fieldIter.next(); builder.startObject(fieldName); Terms curTerms = theFields.terms(fieldName); // write field statistics buildFieldStatistics(builder, curTerms); builder.startObject(FieldStrings.TERMS); TermsEnum termIter = curTerms.iterator(); BoostAttribute boostAtt = termIter.attributes().addAttribute(BoostAttribute.class); for (int i = 0; i < curTerms.size(); i++) { buildTerm(builder, spare, curTerms, termIter, boostAtt); } builder.endObject(); builder.endObject(); }
/** * @deprecated Use {@link #DutchAnalyzer(CharArraySet,CharArraySet,CharArrayMap)} */ @Deprecated public DutchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable, CharArrayMap<String> stemOverrideDict) { setVersion(matchVersion); this.stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords)); this.excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionTable)); if (stemOverrideDict.isEmpty() || !matchVersion.onOrAfter(Version.LUCENE_3_1)) { this.stemdict = null; this.origStemdict = CharArrayMap.unmodifiableMap(CharArrayMap.copy(matchVersion, stemOverrideDict)); } else { this.origStemdict = null; // we don't need to ignore case here since we lowercase in this analyzer anyway StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(false); CharArrayMap<String>.EntryIterator iter = stemOverrideDict.entrySet().iterator(); CharsRefBuilder spare = new CharsRefBuilder(); while (iter.hasNext()) { char[] nextKey = iter.nextKey(); spare.copyChars(nextKey, 0, nextKey.length); builder.add(spare.get(), iter.currentValue()); } try { this.stemdict = builder.build(); } catch (IOException ex) { throw new RuntimeException("can not build stem dict", ex); } } }
/** Sugar: analyzes the text with the analyzer and * separates by {@link SynonymMap#WORD_SEPARATOR}. * reuse and its chars must not be null. */ public CharsRef analyze(String text, CharsRefBuilder reuse) throws IOException { try (TokenStream ts = analyzer.tokenStream("", text)) { CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class); ts.reset(); reuse.clear(); while (ts.incrementToken()) { int length = termAtt.length(); if (length == 0) { throw new IllegalArgumentException("term: " + text + " analyzed to a zero-length token"); } if (posIncAtt.getPositionIncrement() != 1) { throw new IllegalArgumentException("term: " + text + " analyzed to a token with posinc != 1"); } reuse.grow(reuse.length() + length + 1); /* current + word + separator */ int end = reuse.length(); if (reuse.length() > 0) { reuse.setCharAt(end++, SynonymMap.WORD_SEPARATOR); reuse.setLength(reuse.length() + 1); } System.arraycopy(termAtt.buffer(), 0, reuse.chars(), end, length); reuse.setLength(reuse.length() + length); } ts.end(); } if (reuse.length() == 0) { throw new IllegalArgumentException("term: " + text + " was completely eliminated by analyzer"); } return reuse.get(); }
@Override public void parse(Reader in) throws IOException, ParseException { LineNumberReader br = new LineNumberReader(in); try { String line = null; String lastSynSetID = ""; CharsRef synset[] = new CharsRef[8]; int synsetSize = 0; while ((line = br.readLine()) != null) { String synSetID = line.substring(2, 11); if (!synSetID.equals(lastSynSetID)) { addInternal(synset, synsetSize); synsetSize = 0; } if (synset.length <= synsetSize+1) { synset = Arrays.copyOf(synset, synset.length * 2); } synset[synsetSize] = parseSynonym(line, new CharsRefBuilder()); synsetSize++; lastSynSetID = synSetID; } // final synset in the file addInternal(synset, synsetSize); } catch (IllegalArgumentException e) { ParseException ex = new ParseException("Invalid synonym rule at line " + br.getLineNumber(), 0); ex.initCause(e); throw ex; } finally { br.close(); } }
private List<Token> queryTerms(SuggestionContext suggestion, CharsRefBuilder spare) throws IOException { final List<Token> result = new ArrayList<>(); final String field = suggestion.getField(); SuggestUtils.analyze(suggestion.getAnalyzer(), suggestion.getText(), field, new SuggestUtils.TokenConsumer() { @Override public void nextToken() { Term term = new Term(field, BytesRef.deepCopyOf(fillBytesRef(new BytesRefBuilder()))); result.add(new Token(term, offsetAttr.startOffset(), offsetAttr.endOffset())); } }, spare); return result; }
protected BytesRef preFilter(final BytesRef term, final CharsRefBuilder spare, final BytesRefBuilder byteSpare) throws IOException { if (preFilter == null) { return term; } final BytesRefBuilder result = byteSpare; SuggestUtils.analyze(preFilter, term, field, new SuggestUtils.TokenConsumer() { @Override public void nextToken() throws IOException { this.fillBytesRef(result); } }, spare); return result.get(); }