Java 类org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute 实例源码
项目:lams
文件:TermsFilterBuilder.java
@Override
public Filter getFilter(Element e) throws ParserException {
List<BytesRef> terms = new ArrayList<>();
String text = DOMUtils.getNonBlankTextOrFail(e);
String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
TokenStream ts = null;
try {
ts = analyzer.tokenStream(fieldName, text);
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
ts.reset();
while (ts.incrementToken()) {
termAtt.fillBytesRef();
terms.add(BytesRef.deepCopyOf(bytes));
}
ts.end();
}
catch (IOException ioe) {
throw new RuntimeException("Error constructing terms from index:" + ioe);
} finally {
IOUtils.closeWhileHandlingException(ts);
}
return new TermsFilter(fieldName, terms);
}
项目:lams
文件:QueryParserBase.java
protected BytesRef analyzeMultitermTerm(String field, String part, Analyzer analyzerIn) {
if (analyzerIn == null) analyzerIn = getAnalyzer();
TokenStream source = null;
try {
source = analyzerIn.tokenStream(field, part);
source.reset();
TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
if (!source.incrementToken())
throw new IllegalArgumentException("analyzer returned no terms for multiTerm term: " + part);
termAtt.fillBytesRef();
if (source.incrementToken())
throw new IllegalArgumentException("analyzer returned too many terms for multiTerm term: " + part);
source.end();
return BytesRef.deepCopyOf(bytes);
} catch (IOException e) {
throw new RuntimeException("Error analyzing multiTerm term: " + part, e);
} finally {
IOUtils.closeWhileHandlingException(source);
}
}
项目:search
文件:AbstractTestCase.java
protected List<BytesRef> analyze(String text, String field, Analyzer analyzer) throws IOException {
List<BytesRef> bytesRefs = new ArrayList<>();
TokenStream tokenStream = analyzer.tokenStream(field, text);
try {
TermToBytesRefAttribute termAttribute = tokenStream.getAttribute(TermToBytesRefAttribute.class);
BytesRef bytesRef = termAttribute.getBytesRef();
tokenStream.reset();
while (tokenStream.incrementToken()) {
termAttribute.fillBytesRef();
bytesRefs.add(BytesRef.deepCopyOf(bytesRef));
}
tokenStream.end();
} finally {
IOUtils.closeWhileHandlingException(tokenStream);
}
return bytesRefs;
}
项目:search
文件:TestPerfTasksLogic.java
private void assertEqualCollation(Analyzer a1, Analyzer a2, String text)
throws Exception {
TokenStream ts1 = a1.tokenStream("bogus", text);
TokenStream ts2 = a2.tokenStream("bogus", text);
ts1.reset();
ts2.reset();
TermToBytesRefAttribute termAtt1 = ts1.addAttribute(TermToBytesRefAttribute.class);
TermToBytesRefAttribute termAtt2 = ts2.addAttribute(TermToBytesRefAttribute.class);
assertTrue(ts1.incrementToken());
assertTrue(ts2.incrementToken());
BytesRef bytes1 = termAtt1.getBytesRef();
BytesRef bytes2 = termAtt2.getBytesRef();
termAtt1.fillBytesRef();
termAtt2.fillBytesRef();
assertEquals(bytes1, bytes2);
assertFalse(ts1.incrementToken());
assertFalse(ts2.incrementToken());
ts1.close();
ts2.close();
}
项目:search
文件:TestEmptyTokenStream.java
public void testIndexWriter_LUCENE4656() throws IOException {
Directory directory = newDirectory();
IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig(null));
TokenStream ts = new EmptyTokenStream();
assertFalse(ts.hasAttribute(TermToBytesRefAttribute.class));
Document doc = new Document();
doc.add(new StringField("id", "0", Field.Store.YES));
doc.add(new TextField("description", ts));
// this should not fail because we have no TermToBytesRefAttribute
writer.addDocument(doc);
assertEquals(1, writer.numDocs());
writer.close();
directory.close();
}
项目:search
文件:TermsFilterBuilder.java
@Override
public Filter getFilter(Element e) throws ParserException {
List<BytesRef> terms = new ArrayList<>();
String text = DOMUtils.getNonBlankTextOrFail(e);
String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
TokenStream ts = null;
try {
ts = analyzer.tokenStream(fieldName, text);
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
ts.reset();
while (ts.incrementToken()) {
termAtt.fillBytesRef();
terms.add(BytesRef.deepCopyOf(bytes));
}
ts.end();
}
catch (IOException ioe) {
throw new RuntimeException("Error constructing terms from index:" + ioe);
} finally {
IOUtils.closeWhileHandlingException(ts);
}
return new TermsFilter(fieldName, terms);
}
项目:search
文件:QueryParserBase.java
protected BytesRef analyzeMultitermTerm(String field, String part, Analyzer analyzerIn) {
if (analyzerIn == null) analyzerIn = getAnalyzer();
TokenStream source = null;
try {
source = analyzerIn.tokenStream(field, part);
source.reset();
TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
if (!source.incrementToken())
throw new IllegalArgumentException("analyzer returned no terms for multiTerm term: " + part);
termAtt.fillBytesRef();
if (source.incrementToken())
throw new IllegalArgumentException("analyzer returned too many terms for multiTerm term: " + part);
source.end();
return BytesRef.deepCopyOf(bytes);
} catch (IOException e) {
throw new RuntimeException("Error analyzing multiTerm term: " + part, e);
} finally {
IOUtils.closeWhileHandlingException(source);
}
}
项目:search
文件:TestNumericTokenStream.java
public void testLongStream() throws Exception {
final NumericTokenStream stream=new NumericTokenStream().setLongValue(lvalue);
final TermToBytesRefAttribute bytesAtt = stream.getAttribute(TermToBytesRefAttribute.class);
assertNotNull(bytesAtt);
final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class);
assertNotNull(typeAtt);
final NumericTokenStream.NumericTermAttribute numericAtt = stream.getAttribute(NumericTokenStream.NumericTermAttribute.class);
assertNotNull(numericAtt);
final BytesRef bytes = bytesAtt.getBytesRef();
stream.reset();
assertEquals(64, numericAtt.getValueSize());
for (int shift=0; shift<64; shift+=NumericUtils.PRECISION_STEP_DEFAULT) {
assertTrue("New token is available", stream.incrementToken());
assertEquals("Shift value wrong", shift, numericAtt.getShift());
bytesAtt.fillBytesRef();
assertEquals("Term is incorrectly encoded", lvalue & ~((1L << shift) - 1L), NumericUtils.prefixCodedToLong(bytes));
assertEquals("Term raw value is incorrectly encoded", lvalue & ~((1L << shift) - 1L), numericAtt.getRawValue());
assertEquals("Type incorrect", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type());
}
assertFalse("More tokens available", stream.incrementToken());
stream.end();
stream.close();
}
项目:search
文件:TestNumericTokenStream.java
public void testIntStream() throws Exception {
final NumericTokenStream stream=new NumericTokenStream().setIntValue(ivalue);
final TermToBytesRefAttribute bytesAtt = stream.getAttribute(TermToBytesRefAttribute.class);
assertNotNull(bytesAtt);
final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class);
assertNotNull(typeAtt);
final NumericTokenStream.NumericTermAttribute numericAtt = stream.getAttribute(NumericTokenStream.NumericTermAttribute.class);
assertNotNull(numericAtt);
final BytesRef bytes = bytesAtt.getBytesRef();
stream.reset();
assertEquals(32, numericAtt.getValueSize());
for (int shift=0; shift<32; shift+=NumericUtils.PRECISION_STEP_DEFAULT) {
assertTrue("New token is available", stream.incrementToken());
assertEquals("Shift value wrong", shift, numericAtt.getShift());
bytesAtt.fillBytesRef();
assertEquals("Term is incorrectly encoded", ivalue & ~((1 << shift) - 1), NumericUtils.prefixCodedToInt(bytes));
assertEquals("Term raw value is incorrectly encoded", ((long) ivalue) & ~((1L << shift) - 1L), numericAtt.getRawValue());
assertEquals("Type incorrect", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type());
}
assertFalse("More tokens available", stream.incrementToken());
stream.end();
stream.close();
}
项目:search
文件:TextField.java
public static BytesRef analyzeMultiTerm(String field, String part, Analyzer analyzerIn) {
if (part == null || analyzerIn == null) return null;
TokenStream source = null;
try {
source = analyzerIn.tokenStream(field, part);
source.reset();
TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
if (!source.incrementToken())
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"analyzer returned no terms for multiTerm term: " + part);
termAtt.fillBytesRef();
if (source.incrementToken())
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"analyzer returned too many terms for multiTerm term: " + part);
source.end();
return BytesRef.deepCopyOf(bytes);
} catch (IOException e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"error analyzing range part: " + part, e);
} finally {
IOUtils.closeWhileHandlingException(source);
}
}
项目:resource-query-parser
文件:QueryBuilder.java
/**
* Creates a span query from the tokenstream. In the case of a single token,
* a simple <code>SpanTermQuery</code> is returned. When multiple tokens, an
* ordered <code>SpanNearQuery</code> with slop of 0 is returned.
*/
protected final SpanQuery createSpanQuery(TokenStream in, String field) throws IOException {
TermToBytesRefAttribute termAtt = in.getAttribute(TermToBytesRefAttribute.class);
if (termAtt == null) {
return null;
}
List<SpanTermQuery> terms = new ArrayList<>();
while (in.incrementToken()) {
terms.add(new SpanTermQuery(new Term(field, termAtt.getBytesRef())));
}
if (terms.isEmpty()) {
return null;
} else if (terms.size() == 1) {
return terms.get(0);
} else {
return new SpanNearQuery(terms.toArray(new SpanTermQuery[0]), 0, true);
}
}
项目:resource-query-parser
文件:QueryBuilder.java
/**
* Creates complex boolean query from the cached tokenstream contents
*/
protected Query analyzeMultiBoolean(String field, TokenStream stream, BooleanClause.Occur operator)
throws IOException {
BooleanQuery.Builder q = newBooleanQuery();
List<Term> currentQuery = new ArrayList<>();
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
stream.reset();
while (stream.incrementToken()) {
if (posIncrAtt.getPositionIncrement() != 0) {
add(q, currentQuery, operator);
currentQuery.clear();
}
currentQuery.add(new Term(field, termAtt.getBytesRef()));
}
add(q, currentQuery, operator);
return q.build();
}
项目:resource-query-parser
文件:QueryBuilder.java
/**
* Creates simple phrase query from the cached tokenstream contents
*/
protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException {
PhraseQuery.Builder builder = new PhraseQuery.Builder();
builder.setSlop(slop);
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
int position = -1;
stream.reset();
while (stream.incrementToken()) {
if (enablePositionIncrements) {
position += posIncrAtt.getPositionIncrement();
} else {
position += 1;
}
builder.add(new Term(field, termAtt.getBytesRef()), position);
}
return builder.build();
}
项目:pyramid
文件:PhraseCountQueryBuilder.java
protected Query doToQuery(QueryShardContext context) throws IOException {
// Analyzer analyzer = context.getMapperService().searchAnalyzer();
Analyzer analyzer = new WhitespaceAnalyzer();
try (TokenStream source = analyzer.tokenStream(fieldName, value.toString())) {
CachingTokenFilter stream = new CachingTokenFilter(new LowerCaseFilter(source));
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
if (termAtt == null) {
return null;
}
List<CustomSpanTermQuery> clauses = new ArrayList<>();
stream.reset();
while (stream.incrementToken()) {
Term term = new Term(fieldName, termAtt.getBytesRef());
clauses.add(new CustomSpanTermQuery(term));
}
return new PhraseCountQuery(clauses.toArray(new CustomSpanTermQuery[clauses.size()]), slop, inOrder, weightedCount);
} catch (IOException e) {
throw new RuntimeException("Error analyzing query text", e);
}
}
项目:NYBC
文件:AbstractTestCase.java
protected List<BytesRef> analyze(String text, String field, Analyzer analyzer) throws IOException {
List<BytesRef> bytesRefs = new ArrayList<BytesRef>();
TokenStream tokenStream = analyzer.tokenStream(field, new StringReader(text));
TermToBytesRefAttribute termAttribute = tokenStream.getAttribute(TermToBytesRefAttribute.class);
BytesRef bytesRef = termAttribute.getBytesRef();
tokenStream.reset();
while (tokenStream.incrementToken()) {
termAttribute.fillBytesRef();
bytesRefs.add(BytesRef.deepCopyOf(bytesRef));
}
tokenStream.end();
tokenStream.close();
return bytesRefs;
}
项目:NYBC
文件:TestPerfTasksLogic.java
private void assertEqualCollation(Analyzer a1, Analyzer a2, String text)
throws Exception {
TokenStream ts1 = a1.tokenStream("bogus", new StringReader(text));
TokenStream ts2 = a2.tokenStream("bogus", new StringReader(text));
ts1.reset();
ts2.reset();
TermToBytesRefAttribute termAtt1 = ts1.addAttribute(TermToBytesRefAttribute.class);
TermToBytesRefAttribute termAtt2 = ts2.addAttribute(TermToBytesRefAttribute.class);
assertTrue(ts1.incrementToken());
assertTrue(ts2.incrementToken());
BytesRef bytes1 = termAtt1.getBytesRef();
BytesRef bytes2 = termAtt2.getBytesRef();
termAtt1.fillBytesRef();
termAtt2.fillBytesRef();
assertEquals(bytes1, bytes2);
assertFalse(ts1.incrementToken());
assertFalse(ts2.incrementToken());
ts1.close();
ts2.close();
}
项目:NYBC
文件:TestEmptyTokenStream.java
public void testIndexWriter_LUCENE4656() throws IOException {
Directory directory = newDirectory();
IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig(
TEST_VERSION_CURRENT, null));
TokenStream ts = new EmptyTokenStream();
assertFalse(ts.hasAttribute(TermToBytesRefAttribute.class));
Document doc = new Document();
doc.add(new StringField("id", "0", Field.Store.YES));
doc.add(new TextField("description", ts));
// this should not fail because we have no TermToBytesRefAttribute
writer.addDocument(doc);
assertEquals(1, writer.numDocs());
writer.close();
directory.close();
}
项目:NYBC
文件:TermsFilterBuilder.java
@Override
public Filter getFilter(Element e) throws ParserException {
List<BytesRef> terms = new ArrayList<BytesRef>();
String text = DOMUtils.getNonBlankTextOrFail(e);
String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
try {
TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(text));
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
Term term = null;
BytesRef bytes = termAtt.getBytesRef();
ts.reset();
while (ts.incrementToken()) {
termAtt.fillBytesRef();
terms.add(BytesRef.deepCopyOf(bytes));
}
ts.end();
ts.close();
}
catch (IOException ioe) {
throw new RuntimeException("Error constructing terms from index:" + ioe);
}
return new TermsFilter(fieldName, terms);
}
项目:NYBC
文件:SpanOrTermsBuilder.java
@Override
public SpanQuery getSpanQuery(Element e) throws ParserException {
String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
String value = DOMUtils.getNonBlankTextOrFail(e);
try {
List<SpanQuery> clausesList = new ArrayList<SpanQuery>();
TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(value));
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
ts.reset();
while (ts.incrementToken()) {
termAtt.fillBytesRef();
SpanTermQuery stq = new SpanTermQuery(new Term(fieldName, BytesRef.deepCopyOf(bytes)));
clausesList.add(stq);
}
ts.end();
ts.close();
SpanOrQuery soq = new SpanOrQuery(clausesList.toArray(new SpanQuery[clausesList.size()]));
soq.setBoost(DOMUtils.getAttribute(e, "boost", 1.0f));
return soq;
}
catch (IOException ioe) {
throw new ParserException("IOException parsing value:" + value);
}
}
项目:NYBC
文件:TestNumericTokenStream.java
public void testLongStream() throws Exception {
final NumericTokenStream stream=new NumericTokenStream().setLongValue(lvalue);
// use getAttribute to test if attributes really exist, if not an IAE will be throwed
final TermToBytesRefAttribute bytesAtt = stream.getAttribute(TermToBytesRefAttribute.class);
final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class);
final NumericTokenStream.NumericTermAttribute numericAtt = stream.getAttribute(NumericTokenStream.NumericTermAttribute.class);
final BytesRef bytes = bytesAtt.getBytesRef();
stream.reset();
assertEquals(64, numericAtt.getValueSize());
for (int shift=0; shift<64; shift+=NumericUtils.PRECISION_STEP_DEFAULT) {
assertTrue("New token is available", stream.incrementToken());
assertEquals("Shift value wrong", shift, numericAtt.getShift());
final int hash = bytesAtt.fillBytesRef();
assertEquals("Hash incorrect", bytes.hashCode(), hash);
assertEquals("Term is incorrectly encoded", lvalue & ~((1L << shift) - 1L), NumericUtils.prefixCodedToLong(bytes));
assertEquals("Term raw value is incorrectly encoded", lvalue & ~((1L << shift) - 1L), numericAtt.getRawValue());
assertEquals("Type incorrect", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type());
}
assertFalse("More tokens available", stream.incrementToken());
stream.end();
stream.close();
}
项目:NYBC
文件:TestNumericTokenStream.java
public void testIntStream() throws Exception {
final NumericTokenStream stream=new NumericTokenStream().setIntValue(ivalue);
// use getAttribute to test if attributes really exist, if not an IAE will be throwed
final TermToBytesRefAttribute bytesAtt = stream.getAttribute(TermToBytesRefAttribute.class);
final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class);
final NumericTokenStream.NumericTermAttribute numericAtt = stream.getAttribute(NumericTokenStream.NumericTermAttribute.class);
final BytesRef bytes = bytesAtt.getBytesRef();
stream.reset();
assertEquals(32, numericAtt.getValueSize());
for (int shift=0; shift<32; shift+=NumericUtils.PRECISION_STEP_DEFAULT) {
assertTrue("New token is available", stream.incrementToken());
assertEquals("Shift value wrong", shift, numericAtt.getShift());
final int hash = bytesAtt.fillBytesRef();
assertEquals("Hash incorrect", bytes.hashCode(), hash);
assertEquals("Term is incorrectly encoded", ivalue & ~((1 << shift) - 1), NumericUtils.prefixCodedToInt(bytes));
assertEquals("Term raw value is incorrectly encoded", ((long) ivalue) & ~((1L << shift) - 1L), numericAtt.getRawValue());
assertEquals("Type incorrect", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type());
}
assertFalse("More tokens available", stream.incrementToken());
stream.end();
stream.close();
}
项目:read-open-source-code
文件:TermsFilterBuilder.java
@Override
public Filter getFilter(Element e) throws ParserException {
List<BytesRef> terms = new ArrayList<BytesRef>();
String text = DOMUtils.getNonBlankTextOrFail(e);
String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
TokenStream ts = null;
try {
ts = analyzer.tokenStream(fieldName, text);
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
ts.reset();
while (ts.incrementToken()) {
termAtt.fillBytesRef();
terms.add(BytesRef.deepCopyOf(bytes));
}
ts.end();
}
catch (IOException ioe) {
throw new RuntimeException("Error constructing terms from index:" + ioe);
} finally {
IOUtils.closeWhileHandlingException(ts);
}
return new TermsFilter(fieldName, terms);
}
项目:read-open-source-code
文件:QueryParserBase.java
protected BytesRef analyzeMultitermTerm(String field, String part, Analyzer analyzerIn) {
if (analyzerIn == null) analyzerIn = getAnalyzer();
TokenStream source = null;
try {
source = analyzerIn.tokenStream(field, part);
source.reset();
TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
if (!source.incrementToken())
throw new IllegalArgumentException("analyzer returned no terms for multiTerm term: " + part);
termAtt.fillBytesRef();
if (source.incrementToken())
throw new IllegalArgumentException("analyzer returned too many terms for multiTerm term: " + part);
source.end();
return BytesRef.deepCopyOf(bytes);
} catch (IOException e) {
throw new RuntimeException("Error analyzing multiTerm term: " + part, e);
} finally {
IOUtils.closeWhileHandlingException(source);
}
}
项目:read-open-source-code
文件:TextField.java
public static BytesRef analyzeMultiTerm(String field, String part, Analyzer analyzerIn) {
if (part == null || analyzerIn == null) return null;
TokenStream source = null;
try {
source = analyzerIn.tokenStream(field, part);
source.reset();
TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
if (!source.incrementToken())
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"analyzer returned no terms for multiTerm term: " + part);
termAtt.fillBytesRef();
if (source.incrementToken())
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"analyzer returned too many terms for multiTerm term: " + part);
source.end();
return BytesRef.deepCopyOf(bytes);
} catch (IOException e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"error analyzing range part: " + part, e);
} finally {
IOUtils.closeWhileHandlingException(source);
}
}
项目:read-open-source-code
文件:TermsFilterBuilder.java
@Override
public Filter getFilter(Element e) throws ParserException {
List<BytesRef> terms = new ArrayList<BytesRef>();
String text = DOMUtils.getNonBlankTextOrFail(e);
String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
TokenStream ts = null;
try {
ts = analyzer.tokenStream(fieldName, text);
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
ts.reset();
while (ts.incrementToken()) {
termAtt.fillBytesRef();
terms.add(BytesRef.deepCopyOf(bytes));
}
ts.end();
}
catch (IOException ioe) {
throw new RuntimeException("Error constructing terms from index:" + ioe);
} finally {
IOUtils.closeWhileHandlingException(ts);
}
return new TermsFilter(fieldName, terms);
}
项目:read-open-source-code
文件:QueryParserBase.java
protected BytesRef analyzeMultitermTerm(String field, String part, Analyzer analyzerIn) {
if (analyzerIn == null) analyzerIn = getAnalyzer();
TokenStream source = null;
try {
source = analyzerIn.tokenStream(field, part);
source.reset();
TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
if (!source.incrementToken())
throw new IllegalArgumentException("analyzer returned no terms for multiTerm term: " + part);
termAtt.fillBytesRef();
if (source.incrementToken())
throw new IllegalArgumentException("analyzer returned too many terms for multiTerm term: " + part);
source.end();
return BytesRef.deepCopyOf(bytes);
} catch (IOException e) {
throw new RuntimeException("Error analyzing multiTerm term: " + part, e);
} finally {
IOUtils.closeWhileHandlingException(source);
}
}
项目:read-open-source-code
文件:TermsFilterBuilder.java
@Override
public Filter getFilter(Element e) throws ParserException {
List<BytesRef> terms = new ArrayList<>();
String text = DOMUtils.getNonBlankTextOrFail(e);
String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
TokenStream ts = null;
try {
ts = analyzer.tokenStream(fieldName, text);
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
ts.reset();
while (ts.incrementToken()) {
termAtt.fillBytesRef();
terms.add(BytesRef.deepCopyOf(bytes));
}
ts.end();
}
catch (IOException ioe) {
throw new RuntimeException("Error constructing terms from index:" + ioe);
} finally {
IOUtils.closeWhileHandlingException(ts);
}
return new TermsFilter(fieldName, terms);
}
项目:read-open-source-code
文件:QueryParserBase.java
protected BytesRef analyzeMultitermTerm(String field, String part, Analyzer analyzerIn) {
if (analyzerIn == null) analyzerIn = getAnalyzer();
TokenStream source = null;
try {
source = analyzerIn.tokenStream(field, part);
source.reset();
TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
if (!source.incrementToken())
throw new IllegalArgumentException("analyzer returned no terms for multiTerm term: " + part);
termAtt.fillBytesRef();
if (source.incrementToken())
throw new IllegalArgumentException("analyzer returned too many terms for multiTerm term: " + part);
source.end();
return BytesRef.deepCopyOf(bytes);
} catch (IOException e) {
throw new RuntimeException("Error analyzing multiTerm term: " + part, e);
} finally {
IOUtils.closeWhileHandlingException(source);
}
}
项目:read-open-source-code
文件:TextField.java
public static BytesRef analyzeMultiTerm(String field, String part, Analyzer analyzerIn) {
if (part == null || analyzerIn == null) return null;
TokenStream source = null;
try {
source = analyzerIn.tokenStream(field, part);
source.reset();
TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
if (!source.incrementToken())
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"analyzer returned no terms for multiTerm term: " + part);
termAtt.fillBytesRef();
if (source.incrementToken())
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"analyzer returned too many terms for multiTerm term: " + part);
source.end();
return BytesRef.deepCopyOf(bytes);
} catch (IOException e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"error analyzing range part: " + part, e);
} finally {
IOUtils.closeWhileHandlingException(source);
}
}
项目:SolrTextTagger
文件:Tagger.java
public Tagger(Terms terms, Bits liveDocs, TokenStream tokenStream,
TagClusterReducer tagClusterReducer, boolean skipAltTokens,
boolean ignoreStopWords) throws IOException {
this.terms = terms;
this.liveDocs = liveDocs;
this.tokenStream = tokenStream;
this.skipAltTokens = skipAltTokens;
this.ignoreStopWords = ignoreStopWords;
// termAtt = tokenStream.addAttribute(CharTermAttribute.class);
byteRefAtt = tokenStream.addAttribute(TermToBytesRefAttribute.class);
posIncAtt = tokenStream.addAttribute(PositionIncrementAttribute.class);
offsetAtt = tokenStream.addAttribute(OffsetAttribute.class);
lookupAtt = tokenStream.addAttribute(TaggingAttribute.class);
tokenStream.reset();
this.tagClusterReducer = tagClusterReducer;
}
项目:Maskana-Gestor-de-Conocimiento
文件:AbstractTestCase.java
protected List<BytesRef> analyze(String text, String field, Analyzer analyzer) throws IOException {
List<BytesRef> bytesRefs = new ArrayList<BytesRef>();
TokenStream tokenStream = analyzer.tokenStream(field, text);
try {
TermToBytesRefAttribute termAttribute = tokenStream.getAttribute(TermToBytesRefAttribute.class);
BytesRef bytesRef = termAttribute.getBytesRef();
tokenStream.reset();
while (tokenStream.incrementToken()) {
termAttribute.fillBytesRef();
bytesRefs.add(BytesRef.deepCopyOf(bytesRef));
}
tokenStream.end();
} finally {
IOUtils.closeWhileHandlingException(tokenStream);
}
return bytesRefs;
}
项目:Maskana-Gestor-de-Conocimiento
文件:TestPerfTasksLogic.java
private void assertEqualCollation(Analyzer a1, Analyzer a2, String text)
throws Exception {
TokenStream ts1 = a1.tokenStream("bogus", text);
TokenStream ts2 = a2.tokenStream("bogus", text);
ts1.reset();
ts2.reset();
TermToBytesRefAttribute termAtt1 = ts1.addAttribute(TermToBytesRefAttribute.class);
TermToBytesRefAttribute termAtt2 = ts2.addAttribute(TermToBytesRefAttribute.class);
assertTrue(ts1.incrementToken());
assertTrue(ts2.incrementToken());
BytesRef bytes1 = termAtt1.getBytesRef();
BytesRef bytes2 = termAtt2.getBytesRef();
termAtt1.fillBytesRef();
termAtt2.fillBytesRef();
assertEquals(bytes1, bytes2);
assertFalse(ts1.incrementToken());
assertFalse(ts2.incrementToken());
ts1.close();
ts2.close();
}
项目:Maskana-Gestor-de-Conocimiento
文件:TestEmptyTokenStream.java
public void testIndexWriter_LUCENE4656() throws IOException {
Directory directory = newDirectory();
IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig(
TEST_VERSION_CURRENT, null));
TokenStream ts = new EmptyTokenStream();
assertFalse(ts.hasAttribute(TermToBytesRefAttribute.class));
Document doc = new Document();
doc.add(new StringField("id", "0", Field.Store.YES));
doc.add(new TextField("description", ts));
// this should not fail because we have no TermToBytesRefAttribute
writer.addDocument(doc);
assertEquals(1, writer.numDocs());
writer.close();
directory.close();
}
项目:Maskana-Gestor-de-Conocimiento
文件:TermsFilterBuilder.java
@Override
public Filter getFilter(Element e) throws ParserException {
List<BytesRef> terms = new ArrayList<BytesRef>();
String text = DOMUtils.getNonBlankTextOrFail(e);
String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
TokenStream ts = null;
try {
ts = analyzer.tokenStream(fieldName, text);
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
ts.reset();
while (ts.incrementToken()) {
termAtt.fillBytesRef();
terms.add(BytesRef.deepCopyOf(bytes));
}
ts.end();
}
catch (IOException ioe) {
throw new RuntimeException("Error constructing terms from index:" + ioe);
} finally {
IOUtils.closeWhileHandlingException(ts);
}
return new TermsFilter(fieldName, terms);
}
项目:Maskana-Gestor-de-Conocimiento
文件:QueryParserBase.java
protected BytesRef analyzeMultitermTerm(String field, String part, Analyzer analyzerIn) {
if (analyzerIn == null) analyzerIn = getAnalyzer();
TokenStream source = null;
try {
source = analyzerIn.tokenStream(field, part);
source.reset();
TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
if (!source.incrementToken())
throw new IllegalArgumentException("analyzer returned no terms for multiTerm term: " + part);
termAtt.fillBytesRef();
if (source.incrementToken())
throw new IllegalArgumentException("analyzer returned too many terms for multiTerm term: " + part);
source.end();
return BytesRef.deepCopyOf(bytes);
} catch (IOException e) {
throw new RuntimeException("Error analyzing multiTerm term: " + part, e);
} finally {
IOUtils.closeWhileHandlingException(source);
}
}
项目:Maskana-Gestor-de-Conocimiento
文件:TestNumericTokenStream.java
public void testLongStream() throws Exception {
final NumericTokenStream stream=new NumericTokenStream().setLongValue(lvalue);
// use getAttribute to test if attributes really exist, if not an IAE will be throwed
final TermToBytesRefAttribute bytesAtt = stream.getAttribute(TermToBytesRefAttribute.class);
final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class);
final NumericTokenStream.NumericTermAttribute numericAtt = stream.getAttribute(NumericTokenStream.NumericTermAttribute.class);
final BytesRef bytes = bytesAtt.getBytesRef();
stream.reset();
assertEquals(64, numericAtt.getValueSize());
for (int shift=0; shift<64; shift+=NumericUtils.PRECISION_STEP_DEFAULT) {
assertTrue("New token is available", stream.incrementToken());
assertEquals("Shift value wrong", shift, numericAtt.getShift());
final int hash = bytesAtt.fillBytesRef();
assertEquals("Hash incorrect", bytes.hashCode(), hash);
assertEquals("Term is incorrectly encoded", lvalue & ~((1L << shift) - 1L), NumericUtils.prefixCodedToLong(bytes));
assertEquals("Term raw value is incorrectly encoded", lvalue & ~((1L << shift) - 1L), numericAtt.getRawValue());
assertEquals("Type incorrect", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type());
}
assertFalse("More tokens available", stream.incrementToken());
stream.end();
stream.close();
}
项目:Maskana-Gestor-de-Conocimiento
文件:TestNumericTokenStream.java
public void testIntStream() throws Exception {
final NumericTokenStream stream=new NumericTokenStream().setIntValue(ivalue);
// use getAttribute to test if attributes really exist, if not an IAE will be throwed
final TermToBytesRefAttribute bytesAtt = stream.getAttribute(TermToBytesRefAttribute.class);
final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class);
final NumericTokenStream.NumericTermAttribute numericAtt = stream.getAttribute(NumericTokenStream.NumericTermAttribute.class);
final BytesRef bytes = bytesAtt.getBytesRef();
stream.reset();
assertEquals(32, numericAtt.getValueSize());
for (int shift=0; shift<32; shift+=NumericUtils.PRECISION_STEP_DEFAULT) {
assertTrue("New token is available", stream.incrementToken());
assertEquals("Shift value wrong", shift, numericAtt.getShift());
final int hash = bytesAtt.fillBytesRef();
assertEquals("Hash incorrect", bytes.hashCode(), hash);
assertEquals("Term is incorrectly encoded", ivalue & ~((1 << shift) - 1), NumericUtils.prefixCodedToInt(bytes));
assertEquals("Term raw value is incorrectly encoded", ((long) ivalue) & ~((1L << shift) - 1L), numericAtt.getRawValue());
assertEquals("Type incorrect", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type());
}
assertFalse("More tokens available", stream.incrementToken());
stream.end();
stream.close();
}
项目:lams
文件:FieldInvertState.java
/**
* Sets attributeSource to a new instance.
*/
void setAttributeSource(AttributeSource attributeSource) {
if (this.attributeSource != attributeSource) {
this.attributeSource = attributeSource;
termAttribute = attributeSource.getAttribute(TermToBytesRefAttribute.class);
posIncrAttribute = attributeSource.addAttribute(PositionIncrementAttribute.class);
offsetAttribute = attributeSource.addAttribute(OffsetAttribute.class);
payloadAttribute = attributeSource.getAttribute(PayloadAttribute.class);
}
}
项目:lams
文件:NumericTokenStream.java
@Override
public void reflectWith(AttributeReflector reflector) {
fillBytesRef();
reflector.reflect(TermToBytesRefAttribute.class, "bytes", bytes.toBytesRef());
reflector.reflect(NumericTermAttribute.class, "shift", shift);
reflector.reflect(NumericTermAttribute.class, "rawValue", getRawValue());
reflector.reflect(NumericTermAttribute.class, "valueSize", valueSize);
}
项目:lams
文件:SpanOrTermsBuilder.java
@Override
public SpanQuery getSpanQuery(Element e) throws ParserException {
String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
String value = DOMUtils.getNonBlankTextOrFail(e);
List<SpanQuery> clausesList = new ArrayList<>();
TokenStream ts = null;
try {
ts = analyzer.tokenStream(fieldName, value);
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
ts.reset();
while (ts.incrementToken()) {
termAtt.fillBytesRef();
SpanTermQuery stq = new SpanTermQuery(new Term(fieldName, BytesRef.deepCopyOf(bytes)));
clausesList.add(stq);
}
ts.end();
SpanOrQuery soq = new SpanOrQuery(clausesList.toArray(new SpanQuery[clausesList.size()]));
soq.setBoost(DOMUtils.getAttribute(e, "boost", 1.0f));
return soq;
}
catch (IOException ioe) {
throw new ParserException("IOException parsing value:" + value);
} finally {
IOUtils.closeWhileHandlingException(ts);
}
}