private int copy(BreakIterator line, Text text, Text columnValue, int offset) { // Deceive the BreakIterator to ensure no line breaks after '-' character line.setText(text.plainString().replace("-", "\u00ff")); int done = 0; for (int start = line.first(), end = line.next(); end != BreakIterator.DONE; start = end, end = line.next()) { Text word = text.substring(start, end); //.replace("\u00ff", "-"); // not needed if (columnValue.maxLength >= offset + done + length(word)) { done += copy(word, columnValue, offset + done); // TODO localized length } else { break; } } if (done == 0 && length(text) > columnValue.maxLength) { // The value is a single word that is too big to be written to the column. Write as much as we can. done = copy(text, columnValue, offset); } return done; }
private Vector testLastAndPrevious(BreakIterator bi, String text) { int p = bi.last(); int lastP = p; Vector<String> result = new Vector<String>(); if (p != text.length()) errln("last() returned " + p + " instead of " + text.length()); while (p != BreakIterator.DONE) { p = bi.previous(); if (p != BreakIterator.DONE) { if (p >= lastP) errln("previous() failed to move backward: previous() on position " + lastP + " yielded " + p); result.insertElementAt(text.substring(p, lastP), 0); } else { if (lastP != 0) errln("previous() returned DONE prematurely: offset was " + lastP + " instead of 0"); } lastP = p; } return result; }
@NotNull public static String getFirstSentenceWithoutDot(String fullSentence) { if (containsChar(fullSentence, '.')) { BreakIterator breakIterator = getSentenceInstance(Locale.US); breakIterator.setText(fullSentence); fullSentence = fullSentence.substring(breakIterator.first(), breakIterator.next()).trim(); } if (isNotEmpty(fullSentence)) { String withoutDot = endsWithChar(fullSentence, '.') ? fullSentence.substring(0, fullSentence.length() - 1) : fullSentence; return replace(withoutDot, "\n", ""); } else { return ""; } }
public void TestBug4153072() { BreakIterator iter = BreakIterator.getWordInstance(); String str = "...Hello, World!..."; int begin = 3; int end = str.length() - 3; boolean gotException = false; boolean dummy; iter.setText(new StringCharacterIterator(str, begin, end, begin)); for (int index = -1; index < begin + 1; ++index) { try { dummy = iter.isBoundary(index); if (index < begin) errln("Didn't get exception with offset = " + index + " and begin index = " + begin); } catch (IllegalArgumentException e) { if (index >= begin) errln("Got exception with offset = " + index + " and begin index = " + begin); } } }
@Override public String[] split(String text) { boundary.setText(text); ArrayList<String> sentences = new ArrayList<>(); int start = boundary.first(); for (int end = boundary.next(); end != BreakIterator.DONE; start = end, end = boundary.next()) { sentences.add(text.substring(start, end).trim()); } String[] array = new String[sentences.size()]; for (int i = 0; i < array.length; i++) { array[i] = sentences.get(i); } return array; }
private StyleSpans<Collection<String>> highlightMisspelled(String text) { StyleSpansBuilder<Collection<String>> spansBuilder = new StyleSpansBuilder<>(); BreakIterator wb = BreakIterator.getWordInstance(); wb.setText(text); int lastIndex = wb.first(); int lastKeywordEnd = 0; while(lastIndex != BreakIterator.DONE) { int firstIndex = lastIndex; lastIndex = wb.next(); if(lastIndex != BreakIterator.DONE && Character.isLetterOrDigit(text.charAt(firstIndex))) { String word = text.substring(firstIndex, lastIndex).toLowerCase(); if(!dictionary.contains(word)) { spansBuilder.add(Collections.emptyList(), firstIndex - lastKeywordEnd); spansBuilder.add(Collections.singleton("underlined"), lastIndex - firstIndex); lastKeywordEnd = lastIndex; } } } spansBuilder.add(Collections.emptyList(), text.length() - lastKeywordEnd); return spansBuilder.create(); }
private List<String> piecesOfEmbeddedLine( String line, int width ) { List<String> pieces = new ArrayList<String>(); BreakIterator words = BreakIterator.getLineInstance( Locale.US ); words.setText( line ); StringBuilder nextPiece = new StringBuilder(); int start = words.first(); for ( int end = words.next(); end != DONE; start = end, end = words.next() ) nextPiece = processNextWord( line, nextPiece, start, end, width, pieces ); if ( nextPiece.length() > 0 ) pieces.add( nextPiece.toString() ); return pieces; }
private static BoundaryScanner getBoundaryScanner(Field field) { final FieldOptions fieldOptions = field.fieldOptions(); final Locale boundaryScannerLocale = fieldOptions.boundaryScannerLocale(); switch(fieldOptions.boundaryScannerType()) { case SENTENCE: if (boundaryScannerLocale != null) { return new BreakIteratorBoundaryScanner(BreakIterator.getSentenceInstance(boundaryScannerLocale)); } return DEFAULT_SENTENCE_BOUNDARY_SCANNER; case WORD: if (boundaryScannerLocale != null) { return new BreakIteratorBoundaryScanner(BreakIterator.getWordInstance(boundaryScannerLocale)); } return DEFAULT_WORD_BOUNDARY_SCANNER; default: if (fieldOptions.boundaryMaxScan() != SimpleBoundaryScanner.DEFAULT_MAX_SCAN || fieldOptions.boundaryChars() != SimpleBoundaryScanner.DEFAULT_BOUNDARY_CHARS) { return new SimpleBoundaryScanner(fieldOptions.boundaryMaxScan(), fieldOptions.boundaryChars()); } return DEFAULT_SIMPLE_BOUNDARY_SCANNER; } }
/** * Needed to unify forward and backward searching. * The method assumes that s is the text assigned to words. */ private int findWordLimit(int index, BreakIterator words, boolean direction, String s) { // Fix for 4256660 and 4256661. // Words iterator is different from character and sentence iterators // in that end of one word is not necessarily start of another word. // Please see java.text.BreakIterator JavaDoc. The code below is // based on nextWordStartAfter example from BreakIterator.java. int last = (direction == NEXT) ? words.following(index) : words.preceding(index); int current = (direction == NEXT) ? words.next() : words.previous(); while (current != BreakIterator.DONE) { for (int p = Math.min(last, current); p < Math.max(last, current); p++) { if (Character.isLetter(s.charAt(p))) { return last; } } last = current; current = (direction == NEXT) ? words.next() : words.previous(); } return BreakIterator.DONE; }
@Override protected boolean incrementWord() { int start = wordBreaker.current(); if (start == BreakIterator.DONE) { return false; // BreakIterator exhausted } // find the next set of boundaries, skipping over non-tokens int end = wordBreaker.next(); while (end != BreakIterator.DONE && !Character.isLetterOrDigit(Character.codePointAt(buffer, sentenceStart + start, sentenceEnd))) { start = end; end = wordBreaker.next(); } if (end == BreakIterator.DONE) { return false; // BreakIterator exhausted } clearAttributes(); termAtt.copyBuffer(buffer, sentenceStart + start, end - start); offsetAtt.setOffset(correctOffset(offset + sentenceStart + start), correctOffset(offset + sentenceStart + end)); return true; }
/** * return true if there is a token from the buffer, or null if it is * exhausted. */ private boolean incrementSentence() throws IOException { if (length == 0) // we must refill the buffer return false; while (true) { int start = iterator.current(); if (start == BreakIterator.DONE) return false; // BreakIterator exhausted // find the next set of boundaries int end = iterator.next(); if (end == BreakIterator.DONE) return false; // BreakIterator exhausted setNextSentence(start, end); if (incrementWord()) { return true; } } }
/** * Creates a new DocumentWordTokenizer to work on a document * @param document The document to spell check */ public DocumentWordTokenizer(Document document) { this.document = document; //Create a text segment over the entire document text = new Segment(); sentenceIterator = BreakIterator.getSentenceInstance(); try { document.getText(0, document.getLength(), text); sentenceIterator.setText(text); // robert: use text.getBeginIndex(), not 0, for segment's first offset currentWordPos = getNextWordStart(text, text.getBeginIndex()); //If the current word pos is -1 then the string was all white space if (currentWordPos != -1) { currentWordEnd = getNextWordEnd(text, currentWordPos); nextWordPos = getNextWordStart(text, currentWordEnd); } else { moreTokens = false; } } catch (BadLocationException ex) { moreTokens = false; } }
void TestIsBoundary() { iter = BreakIterator.getWordInstance(Locale.US); for (int i = 0; i < given.length; i++) { iter.setText(given[i]); start = iter.first(); end = iter.next(); while (end < given[i].length()) { if (!iter.isBoundary(end)) { errln("Word break failure: isBoundary() This should be a boundary. Index=" + end + " for " + given[i]); } end = iter.next(); } } }
DocCommentParser(ParserFactory fac, DiagnosticSource diagSource, Comment comment) { this.fac = fac; this.diagSource = diagSource; this.comment = comment; names = fac.names; m = fac.docTreeMaker; Locale locale = (fac.locale == null) ? Locale.getDefault() : fac.locale; Options options = fac.options; boolean useBreakIterator = options.isSet("breakIterator"); if (useBreakIterator || !locale.getLanguage().equals(Locale.ENGLISH.getLanguage())) sentenceBreaker = BreakIterator.getSentenceInstance(locale); initTagParsers(); }
void TestPrintAt_1() { iter = BreakIterator.getWordInstance(Locale.US); int[][] index = { {2, 8, 10, 15, 17}, {1, 8, 10, 12, 15, 17, 20}, {3, 8, 10, 13, 16, 18, 20}, {4, 6, 9, 10, 16}, }; for (int i = 0; i < given.length; i++) { iter.setText(given[i]); for (int j = index[i].length-1; j >= 0; j--) { end = iter.following(index[i][j]); start = iter.previous(); if (!expected[i][j].equals(given[i].substring(start, end))) { errln("Word break failure: printAt_1() expected:<" + expected[i][j] + ">, got:<" + given[i].substring(start, end) + "> start=" + start + " end=" + end); } } } }
/** * Creates a new DocumentWordTokenizer to work on a document * * @param document The document to spell check */ public DocumentWordTokenizer(Document document) { this.document = document; //Create a text segment over the entire document text = new Segment(); sentenceIterator = BreakIterator.getSentenceInstance(); try { document.getText(0, document.getLength(), text); sentenceIterator.setText(text); currentWordPos = getNextWordStart(text, 0); //If the current word pos is -1 then the string was all white space if (currentWordPos != -1) { currentWordEnd = getNextWordEnd(text, currentWordPos); nextWordPos = getNextWordStart(text, currentWordEnd); } else { moreTokens = false; } } catch (BadLocationException ex) { moreTokens = false; } }
public static void main(String[] args) { List<Locale> avail = Arrays.asList(BreakIterator.getAvailableLocales()); diffLocale(BreakIterator.class, avail); avail = Arrays.asList(Collator.getAvailableLocales()); diffLocale(Collator.class, avail); avail = Arrays.asList(DateFormat.getAvailableLocales()); diffLocale(DateFormat.class, avail); avail = Arrays.asList(DateFormatSymbols.getAvailableLocales()); diffLocale(DateFormatSymbols.class, avail); avail = Arrays.asList(DecimalFormatSymbols.getAvailableLocales()); diffLocale(DecimalFormatSymbols.class, avail); avail = Arrays.asList(NumberFormat.getAvailableLocales()); diffLocale(NumberFormat.class, avail); avail = Arrays.asList(Locale.getAvailableLocales()); diffLocale(Locale.class, avail); }
private static String[] splitBySentence(String text) { List<String> sentences = new ArrayList<String>(); // Use Locale.US since the customizer is setting the default (US) locale text only: BreakIterator it = BreakIterator.getSentenceInstance(Locale.US); it.setText(text); int start = it.first(); int end; while ((end = it.next()) != BreakIterator.DONE) { sentences.add(text.substring(start, end)); start = end; } return sentences.toArray(new String[sentences.size()]); }
/** * Constructs a {@code LineBreakMeasurer} for the specified text. * * @param text the text for which this {@code LineBreakMeasurer} * produces {@code TextLayout} objects; the text must contain * at least one character; if the text available through * {@code iter} changes, further calls to this * {@code LineBreakMeasurer} instance are undefined (except, * in some cases, when {@code insertChar} or * {@code deleteChar} are invoked afterward - see below) * @param breakIter the {@link BreakIterator} which defines line * breaks * @param frc contains information about a graphics device which is * needed to measure the text correctly; * text measurements can vary slightly depending on the * device resolution, and attributes such as antialiasing; this * parameter does not specify a translation between the * {@code LineBreakMeasurer} and user space * @throws IllegalArgumentException if the text has less than one character * @see LineBreakMeasurer#insertChar * @see LineBreakMeasurer#deleteChar */ public LineBreakMeasurer(AttributedCharacterIterator text, BreakIterator breakIter, FontRenderContext frc) { if (text.getEndIndex() - text.getBeginIndex() < 1) { throw new IllegalArgumentException("Text must contain at least one character."); } this.breakIter = breakIter; this.measurer = new TextMeasurer(text, frc); this.limit = text.getEndIndex(); this.pos = this.start = text.getBeginIndex(); charIter = new CharArrayIterator(measurer.getChars(), this.start); this.breakIter.setText(charIter); }
/** Wrap multi-line strings. * @param original the original string to wrap * @param width the maximum width of lines * @param breakIterator algorithm for breaking lines * @param removeNewLines if <code>true</code>, any newlines in the original string are ignored * @return the whole string with embedded newlines */ public static String wrapString(String original, int width, BreakIterator breakIterator, boolean removeNewLines) { String[] sarray = wrapStringToArray(original, width, breakIterator, removeNewLines); StringBuilder retBuf = new StringBuilder(); for (int i = 0; i < sarray.length; i++) { retBuf.append(sarray[i]); retBuf.append('\n'); } return retBuf.toString(); }
public void TestGetAvailableLocales() { Locale[] locList = BreakIterator.getAvailableLocales(); if (locList.length == 0) errln("getAvailableLocales() returned an empty list!"); // I have no idea how to test this function... }
@Override protected Passage[] getEmptyHighlight(String fieldName, BreakIterator bi, int maxPassages) { if (returnNonHighlightedSnippets) { //we want to return the first sentence of the first snippet only return super.getEmptyHighlight(fieldName, bi, 1); } return EMPTY_PASSAGE; }
/** * */ private BreakIterator createBreakIterator() { if ((breakIterator == null) || (breakIteratorLocale != locale)) { breakIterator = BreakIterator.getLineInstance(locale); breakIteratorLocale = locale; } return breakIterator; }
public void testSingleSentences() throws Exception { BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT); BreakIterator actual = new CustomSeparatorBreakIterator(randomSeparator()); assertSameBreaks("a", expected, actual); assertSameBreaks("ab", expected, actual); assertSameBreaks("abc", expected, actual); assertSameBreaks("", expected, actual); }
public void testSliceEnd() throws Exception { BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT); BreakIterator actual = new CustomSeparatorBreakIterator(randomSeparator()); assertSameBreaks("a000", 0, 1, expected, actual); assertSameBreaks("ab000", 0, 1, expected, actual); assertSameBreaks("abc000", 0, 1, expected, actual); assertSameBreaks("000", 0, 0, expected, actual); }
MirroredBreakIterator(BreakIterator bi) { List<Integer> b = new ArrayList<Integer>(); int i = bi.first(); charIndex = i; for (; i != DONE; i = bi.next()) { b.add(i); } boundaries = Collections.unmodifiableList(b); }
public void testSliceMiddle() throws Exception { BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT); BreakIterator actual = new CustomSeparatorBreakIterator(randomSeparator()); assertSameBreaks("000a000", 3, 1, expected, actual); assertSameBreaks("000ab000", 3, 2, expected, actual); assertSameBreaks("000abc000", 3, 3, expected, actual); assertSameBreaks("000000", 3, 0, expected, actual); }
/** * Constructor */ DocLocale(DocEnv docenv, String localeName, boolean useBreakIterator) { this.docenv = docenv; this.localeName = localeName; this.useBreakIterator = useBreakIterator; locale = getLocale(); if (locale == null) { docenv.exit(); } else { Locale.setDefault(locale); // NOTE: updating global state } collator = Collator.getInstance(locale); sentenceBreaker = BreakIterator.getSentenceInstance(locale); }
private int copy(BreakIterator line, Text text, Text columnValue, int offset) { // Deceive the BreakIterator to ensure no line breaks after '-' character line.setText(text.plainString().replace("-", "\u00ff")); int done = 0; for (int start = line.first(), end = line.next(); end != BreakIterator.DONE; start = end, end = line.next()) { Text word = text.substring(start, end); //.replace("\u00ff", "-"); // not needed if (columnValue.maxLength >= offset + done + length(word)) { done += copy(word, columnValue, offset + done); // TODO localized length } else { break; } } return done; }
@Override public boolean onDoubleTap(MotionEvent e) { int x = (int) e.getX(); int y = (int) e.getY(); // find the position int offset = getOffsetForPosition(x, y); // select word BreakIterator iterator = BreakIterator.getWordInstance(); iterator.setText(getText().toString()); // start and end are the word boundaries; int start; if (iterator.isBoundary(offset)) { start = offset; } else { start = iterator.preceding(offset); } int end = iterator.following(offset); // handle tapping at the very beginning or end. if (end == BreakIterator.DONE) { end = start; start = iterator.preceding(offset); if (start == BreakIterator.DONE) start = end; } setSelection(start, end); return super.onDoubleTap(e); }
public void useSentenceIterator(String source){ BreakIterator iterator = BreakIterator.getSentenceInstance(Locale.US); iterator.setText(source); int start = iterator.first(); for (int end = iterator.next(); end != BreakIterator.DONE; start = end, end = iterator.next()) { System.out.println(source.substring(start,end)); } }
public void useBreakIterator(String input){ System.out.println("Break Iterator"); BreakIterator tokenizer = BreakIterator.getWordInstance(); tokenizer.setText(input); int start = tokenizer.first(); for (int end = tokenizer.next(); end != BreakIterator.DONE; start = end, end = tokenizer.next()) { System.out.println(input.substring(start,end)); } }
private void _writeTextWithBreaks( FacesContext context, BreakIterator breaks, String textString, int columns ) throws IOException { int start = 0; while (true) { int nextLineBreak = textString.indexOf('\n', start); String substring; if (nextLineBreak >= 0) substring = textString.substring(start, nextLineBreak); else substring = textString.substring(start); _writeTextLineWithBreaks(context, breaks, substring, columns); if (nextLineBreak < 0) break; start = nextLineBreak + 1; char[] chars = new char['\n']; context.getResponseWriter().write(chars, 0, 1); } }
/** * Sets the iterator to refer to the first boundary position following * the specified position. * @offset The position from which to begin searching for a break position. * @return The position of the first break after the current position. */ @Override public int following(int offset) { CharacterIterator text = getText(); checkOffset(offset, text); // Set our internal iteration position (temporarily) // to the position passed in. If this is the _beginning_ position, // then we can just use next() to get our return value text.setIndex(offset); if (offset == text.getBeginIndex()) { cachedLastKnownBreak = handleNext(); return cachedLastKnownBreak; } // otherwise, we have to sync up first. Use handlePrevious() to back // us up to a known break position before the specified position (if // we can determine that the specified position is a break position, // we don't back up at all). This may or may not be the last break // position at or before our starting position. Advance forward // from here until we've passed the starting position. The position // we stop on will be the first break position after the specified one. int result = cachedLastKnownBreak; if (result >= offset || result <= BreakIterator.DONE) { result = handlePrevious(); } else { //it might be better to check if handlePrevious() give us closer //safe value but handlePrevious() is slow too //So, this has to be done carefully text.setIndex(result); } while (result != BreakIterator.DONE && result <= offset) { result = handleNext(); } cachedLastKnownBreak = result; return result; }
/** * Truncates given string based on number of words required * * @param n - number of words required * @param s - input string * @return */ public static String truncateAfterWords(int n, String s) { if (s == null) return null; BreakIterator wb = BreakIterator.getWordInstance(); wb.setText(s); int pos = 0; for (int i = 0; i < n && pos != BreakIterator.DONE && pos < s.length(); ) { if (Character.isLetter(s.codePointAt(pos))) i++; pos = wb.next(); } if (pos == BreakIterator.DONE || pos >= s.length()) return s; return s.substring(0, pos); }
@Override protected BreakIterator getBreakIterator(String field) { if (breakIterator == null) { return super.getBreakIterator(field); } return breakIterator; }
/** * Constructs a <code>LineBreakMeasurer</code> for the specified text. * * @param text the text for which this <code>LineBreakMeasurer</code> * produces <code>TextLayout</code> objects; the text must contain * at least one character; if the text available through * <code>iter</code> changes, further calls to this * <code>LineBreakMeasurer</code> instance are undefined (except, * in some cases, when <code>insertChar</code> or * <code>deleteChar</code> are invoked afterward - see below) * @param breakIter the {@link BreakIterator} which defines line * breaks * @param frc contains information about a graphics device which is * needed to measure the text correctly; * text measurements can vary slightly depending on the * device resolution, and attributes such as antialiasing; this * parameter does not specify a translation between the * <code>LineBreakMeasurer</code> and user space * @throws IllegalArgumentException if the text has less than one character * @see LineBreakMeasurer#insertChar * @see LineBreakMeasurer#deleteChar */ public LineBreakMeasurer(AttributedCharacterIterator text, BreakIterator breakIter, FontRenderContext frc) { if (text.getEndIndex() - text.getBeginIndex() < 1) { throw new IllegalArgumentException("Text must contain at least one character."); } this.breakIter = breakIter; this.measurer = new TextMeasurer(text, frc); this.limit = text.getEndIndex(); this.pos = this.start = text.getBeginIndex(); charIter = new CharArrayIterator(measurer.getChars(), this.start); this.breakIter.setText(charIter); }